skill-seekers 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skill_seekers/__init__.py +22 -0
- skill_seekers/cli/__init__.py +39 -0
- skill_seekers/cli/adaptors/__init__.py +120 -0
- skill_seekers/cli/adaptors/base.py +221 -0
- skill_seekers/cli/adaptors/claude.py +485 -0
- skill_seekers/cli/adaptors/gemini.py +453 -0
- skill_seekers/cli/adaptors/markdown.py +269 -0
- skill_seekers/cli/adaptors/openai.py +503 -0
- skill_seekers/cli/ai_enhancer.py +310 -0
- skill_seekers/cli/api_reference_builder.py +373 -0
- skill_seekers/cli/architectural_pattern_detector.py +525 -0
- skill_seekers/cli/code_analyzer.py +1462 -0
- skill_seekers/cli/codebase_scraper.py +1225 -0
- skill_seekers/cli/config_command.py +563 -0
- skill_seekers/cli/config_enhancer.py +431 -0
- skill_seekers/cli/config_extractor.py +871 -0
- skill_seekers/cli/config_manager.py +452 -0
- skill_seekers/cli/config_validator.py +394 -0
- skill_seekers/cli/conflict_detector.py +528 -0
- skill_seekers/cli/constants.py +72 -0
- skill_seekers/cli/dependency_analyzer.py +757 -0
- skill_seekers/cli/doc_scraper.py +2332 -0
- skill_seekers/cli/enhance_skill.py +488 -0
- skill_seekers/cli/enhance_skill_local.py +1096 -0
- skill_seekers/cli/enhance_status.py +194 -0
- skill_seekers/cli/estimate_pages.py +433 -0
- skill_seekers/cli/generate_router.py +1209 -0
- skill_seekers/cli/github_fetcher.py +534 -0
- skill_seekers/cli/github_scraper.py +1466 -0
- skill_seekers/cli/guide_enhancer.py +723 -0
- skill_seekers/cli/how_to_guide_builder.py +1267 -0
- skill_seekers/cli/install_agent.py +461 -0
- skill_seekers/cli/install_skill.py +178 -0
- skill_seekers/cli/language_detector.py +614 -0
- skill_seekers/cli/llms_txt_detector.py +60 -0
- skill_seekers/cli/llms_txt_downloader.py +104 -0
- skill_seekers/cli/llms_txt_parser.py +150 -0
- skill_seekers/cli/main.py +558 -0
- skill_seekers/cli/markdown_cleaner.py +132 -0
- skill_seekers/cli/merge_sources.py +806 -0
- skill_seekers/cli/package_multi.py +77 -0
- skill_seekers/cli/package_skill.py +241 -0
- skill_seekers/cli/pattern_recognizer.py +1825 -0
- skill_seekers/cli/pdf_extractor_poc.py +1166 -0
- skill_seekers/cli/pdf_scraper.py +617 -0
- skill_seekers/cli/quality_checker.py +519 -0
- skill_seekers/cli/rate_limit_handler.py +438 -0
- skill_seekers/cli/resume_command.py +160 -0
- skill_seekers/cli/run_tests.py +230 -0
- skill_seekers/cli/setup_wizard.py +93 -0
- skill_seekers/cli/split_config.py +390 -0
- skill_seekers/cli/swift_patterns.py +560 -0
- skill_seekers/cli/test_example_extractor.py +1081 -0
- skill_seekers/cli/test_unified_simple.py +179 -0
- skill_seekers/cli/unified_codebase_analyzer.py +572 -0
- skill_seekers/cli/unified_scraper.py +932 -0
- skill_seekers/cli/unified_skill_builder.py +1605 -0
- skill_seekers/cli/upload_skill.py +162 -0
- skill_seekers/cli/utils.py +432 -0
- skill_seekers/mcp/__init__.py +33 -0
- skill_seekers/mcp/agent_detector.py +316 -0
- skill_seekers/mcp/git_repo.py +273 -0
- skill_seekers/mcp/server.py +231 -0
- skill_seekers/mcp/server_fastmcp.py +1249 -0
- skill_seekers/mcp/server_legacy.py +2302 -0
- skill_seekers/mcp/source_manager.py +285 -0
- skill_seekers/mcp/tools/__init__.py +115 -0
- skill_seekers/mcp/tools/config_tools.py +251 -0
- skill_seekers/mcp/tools/packaging_tools.py +826 -0
- skill_seekers/mcp/tools/scraping_tools.py +842 -0
- skill_seekers/mcp/tools/source_tools.py +828 -0
- skill_seekers/mcp/tools/splitting_tools.py +212 -0
- skill_seekers/py.typed +0 -0
- skill_seekers-2.7.3.dist-info/METADATA +2027 -0
- skill_seekers-2.7.3.dist-info/RECORD +79 -0
- skill_seekers-2.7.3.dist-info/WHEEL +5 -0
- skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
- skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
- skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1462 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Code Analyzer for GitHub Repositories
|
|
4
|
+
|
|
5
|
+
Extracts code signatures at configurable depth levels:
|
|
6
|
+
- surface: File tree only (existing behavior)
|
|
7
|
+
- deep: Parse files for signatures, parameters, types
|
|
8
|
+
- full: Complete AST analysis (future enhancement)
|
|
9
|
+
|
|
10
|
+
Supports 9 programming languages with language-specific parsers:
|
|
11
|
+
- Python (AST-based, production quality)
|
|
12
|
+
- JavaScript/TypeScript (regex-based)
|
|
13
|
+
- C/C++ (regex-based)
|
|
14
|
+
- C# (regex-based, inspired by Microsoft C# spec)
|
|
15
|
+
- Go (regex-based, Go language spec)
|
|
16
|
+
- Rust (regex-based, Rust reference)
|
|
17
|
+
- Java (regex-based, Oracle Java spec)
|
|
18
|
+
- Ruby (regex-based, Ruby documentation)
|
|
19
|
+
- PHP (regex-based, PHP reference)
|
|
20
|
+
|
|
21
|
+
Note: Regex-based parsers are simplified implementations. For production use,
|
|
22
|
+
consider using dedicated parsers (tree-sitter, language-specific AST libraries).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import ast
|
|
26
|
+
import contextlib
|
|
27
|
+
import logging
|
|
28
|
+
import re
|
|
29
|
+
from dataclasses import asdict, dataclass
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
logging.basicConfig(level=logging.INFO)
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class Parameter:
|
|
38
|
+
"""Represents a function parameter."""
|
|
39
|
+
|
|
40
|
+
name: str
|
|
41
|
+
type_hint: str | None = None
|
|
42
|
+
default: str | None = None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class FunctionSignature:
|
|
47
|
+
"""Represents a function/method signature."""
|
|
48
|
+
|
|
49
|
+
name: str
|
|
50
|
+
parameters: list[Parameter]
|
|
51
|
+
return_type: str | None = None
|
|
52
|
+
docstring: str | None = None
|
|
53
|
+
line_number: int | None = None
|
|
54
|
+
is_async: bool = False
|
|
55
|
+
is_method: bool = False
|
|
56
|
+
decorators: list[str] = None
|
|
57
|
+
|
|
58
|
+
def __post_init__(self):
|
|
59
|
+
if self.decorators is None:
|
|
60
|
+
self.decorators = []
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class ClassSignature:
|
|
65
|
+
"""Represents a class signature."""
|
|
66
|
+
|
|
67
|
+
name: str
|
|
68
|
+
base_classes: list[str]
|
|
69
|
+
methods: list[FunctionSignature]
|
|
70
|
+
docstring: str | None = None
|
|
71
|
+
line_number: int | None = None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class CodeAnalyzer:
|
|
75
|
+
"""
|
|
76
|
+
Analyzes code at different depth levels.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(self, depth: str = "surface"):
|
|
80
|
+
"""
|
|
81
|
+
Initialize code analyzer.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
depth: Analysis depth ('surface', 'deep', 'full')
|
|
85
|
+
"""
|
|
86
|
+
self.depth = depth
|
|
87
|
+
|
|
88
|
+
def analyze_file(self, file_path: str, content: str, language: str) -> dict[str, Any]:
|
|
89
|
+
"""
|
|
90
|
+
Analyze a single file based on depth level.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
file_path: Path to file in repository
|
|
94
|
+
content: File content as string
|
|
95
|
+
language: Programming language (Python, JavaScript, C#, Go, Rust, Java, Ruby, PHP, etc.)
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Dict containing extracted signatures
|
|
99
|
+
"""
|
|
100
|
+
if self.depth == "surface":
|
|
101
|
+
return {} # Surface level doesn't analyze individual files
|
|
102
|
+
|
|
103
|
+
logger.debug(f"Analyzing {file_path} (language: {language}, depth: {self.depth})")
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
if language == "Python":
|
|
107
|
+
return self._analyze_python(content, file_path)
|
|
108
|
+
elif language in ["JavaScript", "TypeScript"]:
|
|
109
|
+
return self._analyze_javascript(content, file_path)
|
|
110
|
+
elif language in ["C", "C++"]:
|
|
111
|
+
return self._analyze_cpp(content, file_path)
|
|
112
|
+
elif language == "C#":
|
|
113
|
+
return self._analyze_csharp(content, file_path)
|
|
114
|
+
elif language == "Go":
|
|
115
|
+
return self._analyze_go(content, file_path)
|
|
116
|
+
elif language == "Rust":
|
|
117
|
+
return self._analyze_rust(content, file_path)
|
|
118
|
+
elif language == "Java":
|
|
119
|
+
return self._analyze_java(content, file_path)
|
|
120
|
+
elif language == "Ruby":
|
|
121
|
+
return self._analyze_ruby(content, file_path)
|
|
122
|
+
elif language == "PHP":
|
|
123
|
+
return self._analyze_php(content, file_path)
|
|
124
|
+
else:
|
|
125
|
+
logger.debug(f"No analyzer for language: {language}")
|
|
126
|
+
return {}
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.warning(f"Error analyzing {file_path}: {e}")
|
|
129
|
+
return {}
|
|
130
|
+
|
|
131
|
+
def _analyze_python(self, content: str, file_path: str) -> dict[str, Any]:
|
|
132
|
+
"""Analyze Python file using AST."""
|
|
133
|
+
try:
|
|
134
|
+
tree = ast.parse(content)
|
|
135
|
+
except SyntaxError as e:
|
|
136
|
+
logger.debug(f"Syntax error in {file_path}: {e}")
|
|
137
|
+
return {}
|
|
138
|
+
|
|
139
|
+
classes = []
|
|
140
|
+
functions = []
|
|
141
|
+
|
|
142
|
+
for node in ast.walk(tree):
|
|
143
|
+
if isinstance(node, ast.ClassDef):
|
|
144
|
+
class_sig = self._extract_python_class(node)
|
|
145
|
+
classes.append(asdict(class_sig))
|
|
146
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
147
|
+
# Only top-level functions (not methods)
|
|
148
|
+
# Fix AST parser to check isinstance(parent.body, list) before 'in' operator
|
|
149
|
+
is_method = False
|
|
150
|
+
try:
|
|
151
|
+
is_method = any(
|
|
152
|
+
isinstance(parent, ast.ClassDef)
|
|
153
|
+
for parent in ast.walk(tree)
|
|
154
|
+
if hasattr(parent, "body")
|
|
155
|
+
and isinstance(parent.body, list)
|
|
156
|
+
and node in parent.body
|
|
157
|
+
)
|
|
158
|
+
except (TypeError, AttributeError):
|
|
159
|
+
# If body is not iterable or check fails, assume it's a top-level function
|
|
160
|
+
is_method = False
|
|
161
|
+
|
|
162
|
+
if not is_method:
|
|
163
|
+
func_sig = self._extract_python_function(node)
|
|
164
|
+
functions.append(asdict(func_sig))
|
|
165
|
+
|
|
166
|
+
# Extract comments
|
|
167
|
+
comments = self._extract_python_comments(content)
|
|
168
|
+
|
|
169
|
+
return {"classes": classes, "functions": functions, "comments": comments}
|
|
170
|
+
|
|
171
|
+
def _extract_python_class(self, node: ast.ClassDef) -> ClassSignature:
|
|
172
|
+
"""Extract class signature from AST node."""
|
|
173
|
+
# Extract base classes
|
|
174
|
+
bases = []
|
|
175
|
+
for base in node.bases:
|
|
176
|
+
if isinstance(base, ast.Name):
|
|
177
|
+
bases.append(base.id)
|
|
178
|
+
elif isinstance(base, ast.Attribute):
|
|
179
|
+
bases.append(
|
|
180
|
+
f"{base.value.id}.{base.attr}" if hasattr(base.value, "id") else base.attr
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Extract methods
|
|
184
|
+
methods = []
|
|
185
|
+
for item in node.body:
|
|
186
|
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
187
|
+
method_sig = self._extract_python_function(item, is_method=True)
|
|
188
|
+
methods.append(method_sig)
|
|
189
|
+
|
|
190
|
+
# Extract docstring
|
|
191
|
+
docstring = ast.get_docstring(node)
|
|
192
|
+
|
|
193
|
+
return ClassSignature(
|
|
194
|
+
name=node.name,
|
|
195
|
+
base_classes=bases,
|
|
196
|
+
methods=methods,
|
|
197
|
+
docstring=docstring,
|
|
198
|
+
line_number=node.lineno,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def _extract_python_function(self, node, is_method: bool = False) -> FunctionSignature:
|
|
202
|
+
"""Extract function signature from AST node."""
|
|
203
|
+
# Extract parameters
|
|
204
|
+
params = []
|
|
205
|
+
for arg in node.args.args:
|
|
206
|
+
param_type = None
|
|
207
|
+
if arg.annotation:
|
|
208
|
+
param_type = ast.unparse(arg.annotation) if hasattr(ast, "unparse") else None
|
|
209
|
+
|
|
210
|
+
params.append(Parameter(name=arg.arg, type_hint=param_type))
|
|
211
|
+
|
|
212
|
+
# Extract defaults
|
|
213
|
+
defaults = node.args.defaults
|
|
214
|
+
if defaults:
|
|
215
|
+
# Defaults are aligned to the end of params
|
|
216
|
+
num_no_default = len(params) - len(defaults)
|
|
217
|
+
for i, default in enumerate(defaults):
|
|
218
|
+
param_idx = num_no_default + i
|
|
219
|
+
if param_idx < len(params):
|
|
220
|
+
try:
|
|
221
|
+
params[param_idx].default = (
|
|
222
|
+
ast.unparse(default) if hasattr(ast, "unparse") else str(default)
|
|
223
|
+
)
|
|
224
|
+
except Exception:
|
|
225
|
+
params[param_idx].default = "..."
|
|
226
|
+
|
|
227
|
+
# Extract return type
|
|
228
|
+
return_type = None
|
|
229
|
+
if node.returns:
|
|
230
|
+
with contextlib.suppress(Exception):
|
|
231
|
+
return_type = ast.unparse(node.returns) if hasattr(ast, "unparse") else None
|
|
232
|
+
|
|
233
|
+
# Extract decorators
|
|
234
|
+
decorators = []
|
|
235
|
+
for decorator in node.decorator_list:
|
|
236
|
+
try:
|
|
237
|
+
if hasattr(ast, "unparse"):
|
|
238
|
+
decorators.append(ast.unparse(decorator))
|
|
239
|
+
elif isinstance(decorator, ast.Name):
|
|
240
|
+
decorators.append(decorator.id)
|
|
241
|
+
except Exception:
|
|
242
|
+
pass
|
|
243
|
+
|
|
244
|
+
# Extract docstring
|
|
245
|
+
docstring = ast.get_docstring(node)
|
|
246
|
+
|
|
247
|
+
return FunctionSignature(
|
|
248
|
+
name=node.name,
|
|
249
|
+
parameters=params,
|
|
250
|
+
return_type=return_type,
|
|
251
|
+
docstring=docstring,
|
|
252
|
+
line_number=node.lineno,
|
|
253
|
+
is_async=isinstance(node, ast.AsyncFunctionDef),
|
|
254
|
+
is_method=is_method,
|
|
255
|
+
decorators=decorators,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
def _analyze_javascript(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
259
|
+
"""
|
|
260
|
+
Analyze JavaScript/TypeScript file using regex patterns.
|
|
261
|
+
|
|
262
|
+
Note: This is a simplified approach. For production, consider using
|
|
263
|
+
a proper JS/TS parser like esprima or ts-morph.
|
|
264
|
+
"""
|
|
265
|
+
classes = []
|
|
266
|
+
functions = []
|
|
267
|
+
|
|
268
|
+
# Extract class definitions
|
|
269
|
+
class_pattern = r"class\s+(\w+)(?:\s+extends\s+(\w+))?\s*\{"
|
|
270
|
+
for match in re.finditer(class_pattern, content):
|
|
271
|
+
class_name = match.group(1)
|
|
272
|
+
base_class = match.group(2) if match.group(2) else None
|
|
273
|
+
|
|
274
|
+
# Try to extract methods (simplified)
|
|
275
|
+
class_block_start = match.end()
|
|
276
|
+
# This is a simplification - proper parsing would track braces
|
|
277
|
+
class_block_end = content.find("}", class_block_start)
|
|
278
|
+
if class_block_end != -1:
|
|
279
|
+
class_body = content[class_block_start:class_block_end]
|
|
280
|
+
methods = self._extract_js_methods(class_body)
|
|
281
|
+
else:
|
|
282
|
+
methods = []
|
|
283
|
+
|
|
284
|
+
classes.append(
|
|
285
|
+
{
|
|
286
|
+
"name": class_name,
|
|
287
|
+
"base_classes": [base_class] if base_class else [],
|
|
288
|
+
"methods": methods,
|
|
289
|
+
"docstring": None,
|
|
290
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
291
|
+
}
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Extract top-level functions
|
|
295
|
+
func_pattern = r"(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)"
|
|
296
|
+
for match in re.finditer(func_pattern, content):
|
|
297
|
+
func_name = match.group(1)
|
|
298
|
+
params_str = match.group(2)
|
|
299
|
+
is_async = "async" in match.group(0)
|
|
300
|
+
|
|
301
|
+
params = self._parse_js_parameters(params_str)
|
|
302
|
+
|
|
303
|
+
functions.append(
|
|
304
|
+
{
|
|
305
|
+
"name": func_name,
|
|
306
|
+
"parameters": params,
|
|
307
|
+
"return_type": None, # JS doesn't have type annotations (unless TS)
|
|
308
|
+
"docstring": None,
|
|
309
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
310
|
+
"is_async": is_async,
|
|
311
|
+
"is_method": False,
|
|
312
|
+
"decorators": [],
|
|
313
|
+
}
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Extract arrow functions assigned to const/let
|
|
317
|
+
arrow_pattern = r"(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*=>"
|
|
318
|
+
for match in re.finditer(arrow_pattern, content):
|
|
319
|
+
func_name = match.group(1)
|
|
320
|
+
params_str = match.group(2)
|
|
321
|
+
is_async = "async" in match.group(0)
|
|
322
|
+
|
|
323
|
+
params = self._parse_js_parameters(params_str)
|
|
324
|
+
|
|
325
|
+
functions.append(
|
|
326
|
+
{
|
|
327
|
+
"name": func_name,
|
|
328
|
+
"parameters": params,
|
|
329
|
+
"return_type": None,
|
|
330
|
+
"docstring": None,
|
|
331
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
332
|
+
"is_async": is_async,
|
|
333
|
+
"is_method": False,
|
|
334
|
+
"decorators": [],
|
|
335
|
+
}
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Extract comments
|
|
339
|
+
comments = self._extract_js_comments(content)
|
|
340
|
+
|
|
341
|
+
return {"classes": classes, "functions": functions, "comments": comments}
|
|
342
|
+
|
|
343
|
+
def _extract_js_methods(self, class_body: str) -> list[dict]:
|
|
344
|
+
"""Extract method signatures from class body."""
|
|
345
|
+
methods = []
|
|
346
|
+
|
|
347
|
+
# Match method definitions
|
|
348
|
+
method_pattern = r"(?:async\s+)?(\w+)\s*\(([^)]*)\)"
|
|
349
|
+
for match in re.finditer(method_pattern, class_body):
|
|
350
|
+
method_name = match.group(1)
|
|
351
|
+
params_str = match.group(2)
|
|
352
|
+
is_async = "async" in match.group(0)
|
|
353
|
+
|
|
354
|
+
# Skip constructor keyword detection
|
|
355
|
+
if method_name in ["if", "for", "while", "switch"]:
|
|
356
|
+
continue
|
|
357
|
+
|
|
358
|
+
params = self._parse_js_parameters(params_str)
|
|
359
|
+
|
|
360
|
+
methods.append(
|
|
361
|
+
{
|
|
362
|
+
"name": method_name,
|
|
363
|
+
"parameters": params,
|
|
364
|
+
"return_type": None,
|
|
365
|
+
"docstring": None,
|
|
366
|
+
"line_number": None,
|
|
367
|
+
"is_async": is_async,
|
|
368
|
+
"is_method": True,
|
|
369
|
+
"decorators": [],
|
|
370
|
+
}
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
return methods
|
|
374
|
+
|
|
375
|
+
def _parse_js_parameters(self, params_str: str) -> list[dict]:
|
|
376
|
+
"""Parse JavaScript parameter string."""
|
|
377
|
+
params = []
|
|
378
|
+
|
|
379
|
+
if not params_str.strip():
|
|
380
|
+
return params
|
|
381
|
+
|
|
382
|
+
# Split by comma (simplified - doesn't handle complex default values)
|
|
383
|
+
param_list = [p.strip() for p in params_str.split(",")]
|
|
384
|
+
|
|
385
|
+
for param in param_list:
|
|
386
|
+
if not param:
|
|
387
|
+
continue
|
|
388
|
+
|
|
389
|
+
# Check for default value
|
|
390
|
+
if "=" in param:
|
|
391
|
+
name, default = param.split("=", 1)
|
|
392
|
+
name = name.strip()
|
|
393
|
+
default = default.strip()
|
|
394
|
+
else:
|
|
395
|
+
name = param
|
|
396
|
+
default = None
|
|
397
|
+
|
|
398
|
+
# Check for type annotation (TypeScript)
|
|
399
|
+
type_hint = None
|
|
400
|
+
if ":" in name:
|
|
401
|
+
name, type_hint = name.split(":", 1)
|
|
402
|
+
name = name.strip()
|
|
403
|
+
type_hint = type_hint.strip()
|
|
404
|
+
|
|
405
|
+
params.append({"name": name, "type_hint": type_hint, "default": default})
|
|
406
|
+
|
|
407
|
+
return params
|
|
408
|
+
|
|
409
|
+
def _analyze_cpp(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
410
|
+
"""
|
|
411
|
+
Analyze C/C++ header file using regex patterns.
|
|
412
|
+
|
|
413
|
+
Note: This is a simplified approach focusing on header files.
|
|
414
|
+
For production, consider using libclang or similar.
|
|
415
|
+
"""
|
|
416
|
+
classes = []
|
|
417
|
+
functions = []
|
|
418
|
+
|
|
419
|
+
# Extract class definitions (simplified - doesn't handle nested classes)
|
|
420
|
+
class_pattern = r"class\s+(\w+)(?:\s*:\s*public\s+(\w+))?\s*\{"
|
|
421
|
+
for match in re.finditer(class_pattern, content):
|
|
422
|
+
class_name = match.group(1)
|
|
423
|
+
base_class = match.group(2) if match.group(2) else None
|
|
424
|
+
|
|
425
|
+
classes.append(
|
|
426
|
+
{
|
|
427
|
+
"name": class_name,
|
|
428
|
+
"base_classes": [base_class] if base_class else [],
|
|
429
|
+
"methods": [], # Simplified - would need to parse class body
|
|
430
|
+
"docstring": None,
|
|
431
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
432
|
+
}
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
# Extract function declarations
|
|
436
|
+
func_pattern = r"(\w+(?:\s*\*|\s*&)?)\s+(\w+)\s*\(([^)]*)\)"
|
|
437
|
+
for match in re.finditer(func_pattern, content):
|
|
438
|
+
return_type = match.group(1).strip()
|
|
439
|
+
func_name = match.group(2)
|
|
440
|
+
params_str = match.group(3)
|
|
441
|
+
|
|
442
|
+
# Skip common keywords
|
|
443
|
+
if func_name in ["if", "for", "while", "switch", "return"]:
|
|
444
|
+
continue
|
|
445
|
+
|
|
446
|
+
params = self._parse_cpp_parameters(params_str)
|
|
447
|
+
|
|
448
|
+
functions.append(
|
|
449
|
+
{
|
|
450
|
+
"name": func_name,
|
|
451
|
+
"parameters": params,
|
|
452
|
+
"return_type": return_type,
|
|
453
|
+
"docstring": None,
|
|
454
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
455
|
+
"is_async": False,
|
|
456
|
+
"is_method": False,
|
|
457
|
+
"decorators": [],
|
|
458
|
+
}
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# Extract comments
|
|
462
|
+
comments = self._extract_cpp_comments(content)
|
|
463
|
+
|
|
464
|
+
return {"classes": classes, "functions": functions, "comments": comments}
|
|
465
|
+
|
|
466
|
+
def _parse_cpp_parameters(self, params_str: str) -> list[dict]:
|
|
467
|
+
"""Parse C++ parameter string."""
|
|
468
|
+
params = []
|
|
469
|
+
|
|
470
|
+
if not params_str.strip() or params_str.strip() == "void":
|
|
471
|
+
return params
|
|
472
|
+
|
|
473
|
+
# Split by comma (simplified)
|
|
474
|
+
param_list = [p.strip() for p in params_str.split(",")]
|
|
475
|
+
|
|
476
|
+
for param in param_list:
|
|
477
|
+
if not param:
|
|
478
|
+
continue
|
|
479
|
+
|
|
480
|
+
# Check for default value
|
|
481
|
+
default = None
|
|
482
|
+
if "=" in param:
|
|
483
|
+
param, default = param.rsplit("=", 1)
|
|
484
|
+
param = param.strip()
|
|
485
|
+
default = default.strip()
|
|
486
|
+
|
|
487
|
+
# Extract type and name (simplified)
|
|
488
|
+
# Format: "type name" or "type* name" or "type& name"
|
|
489
|
+
parts = param.split()
|
|
490
|
+
if len(parts) >= 2:
|
|
491
|
+
param_type = " ".join(parts[:-1])
|
|
492
|
+
param_name = parts[-1]
|
|
493
|
+
else:
|
|
494
|
+
param_type = param
|
|
495
|
+
param_name = "unknown"
|
|
496
|
+
|
|
497
|
+
params.append({"name": param_name, "type_hint": param_type, "default": default})
|
|
498
|
+
|
|
499
|
+
return params
|
|
500
|
+
|
|
501
|
+
def _extract_python_comments(self, content: str) -> list[dict]:
|
|
502
|
+
"""
|
|
503
|
+
Extract Python comments (# style).
|
|
504
|
+
|
|
505
|
+
Returns list of comment dictionaries with line number, text, and type.
|
|
506
|
+
"""
|
|
507
|
+
comments = []
|
|
508
|
+
|
|
509
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
510
|
+
stripped = line.strip()
|
|
511
|
+
|
|
512
|
+
# Skip shebang and encoding declarations
|
|
513
|
+
if stripped.startswith("#!") or stripped.startswith("#") and "coding" in stripped:
|
|
514
|
+
continue
|
|
515
|
+
|
|
516
|
+
# Extract regular comments
|
|
517
|
+
if stripped.startswith("#"):
|
|
518
|
+
comment_text = stripped[1:].strip()
|
|
519
|
+
comments.append({"line": i, "text": comment_text, "type": "inline"})
|
|
520
|
+
|
|
521
|
+
return comments
|
|
522
|
+
|
|
523
|
+
def _extract_js_comments(self, content: str) -> list[dict]:
|
|
524
|
+
"""
|
|
525
|
+
Extract JavaScript/TypeScript comments (// and /* */ styles).
|
|
526
|
+
|
|
527
|
+
Returns list of comment dictionaries with line number, text, and type.
|
|
528
|
+
"""
|
|
529
|
+
comments = []
|
|
530
|
+
|
|
531
|
+
# Extract single-line comments (//)
|
|
532
|
+
for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
|
|
533
|
+
line_num = content[: match.start()].count("\n") + 1
|
|
534
|
+
comment_text = match.group(1).strip()
|
|
535
|
+
|
|
536
|
+
comments.append({"line": line_num, "text": comment_text, "type": "inline"})
|
|
537
|
+
|
|
538
|
+
# Extract multi-line comments (/* */)
|
|
539
|
+
for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
|
|
540
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
541
|
+
comment_text = match.group(1).strip()
|
|
542
|
+
|
|
543
|
+
comments.append({"line": start_line, "text": comment_text, "type": "block"})
|
|
544
|
+
|
|
545
|
+
return comments
|
|
546
|
+
|
|
547
|
+
def _extract_cpp_comments(self, content: str) -> list[dict]:
|
|
548
|
+
"""
|
|
549
|
+
Extract C++ comments (// and /* */ styles, same as JavaScript).
|
|
550
|
+
|
|
551
|
+
Returns list of comment dictionaries with line number, text, and type.
|
|
552
|
+
"""
|
|
553
|
+
# C++ uses the same comment syntax as JavaScript
|
|
554
|
+
return self._extract_js_comments(content)
|
|
555
|
+
|
|
556
|
+
def _analyze_csharp(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
557
|
+
"""
|
|
558
|
+
Analyze C# file using regex patterns.
|
|
559
|
+
|
|
560
|
+
Note: This is a simplified regex-based approach. For production use with Unity/ASP.NET,
|
|
561
|
+
consider using tree-sitter-c-sharp or Roslyn via pythonnet for more accurate parsing.
|
|
562
|
+
|
|
563
|
+
Regex patterns inspired by C# language specification:
|
|
564
|
+
https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/
|
|
565
|
+
"""
|
|
566
|
+
classes = []
|
|
567
|
+
functions = []
|
|
568
|
+
|
|
569
|
+
# Extract class definitions
|
|
570
|
+
# Matches: [modifiers] class ClassName [: BaseClass] [, Interface]
|
|
571
|
+
class_pattern = r"(?:public|private|internal|protected)?\s*(?:static|abstract|sealed)?\s*class\s+(\w+)(?:\s*:\s*([\w\s,<>]+))?\s*\{"
|
|
572
|
+
for match in re.finditer(class_pattern, content):
|
|
573
|
+
class_name = match.group(1)
|
|
574
|
+
bases_str = match.group(2) if match.group(2) else ""
|
|
575
|
+
|
|
576
|
+
# Parse base classes and interfaces
|
|
577
|
+
base_classes = []
|
|
578
|
+
if bases_str:
|
|
579
|
+
base_classes = [b.strip() for b in bases_str.split(",")]
|
|
580
|
+
|
|
581
|
+
# Try to extract methods (simplified)
|
|
582
|
+
class_block_start = match.end()
|
|
583
|
+
# Find matching closing brace (simplified - doesn't handle nested classes perfectly)
|
|
584
|
+
brace_count = 1
|
|
585
|
+
class_block_end = class_block_start
|
|
586
|
+
for i, char in enumerate(content[class_block_start:], class_block_start):
|
|
587
|
+
if char == "{":
|
|
588
|
+
brace_count += 1
|
|
589
|
+
elif char == "}":
|
|
590
|
+
brace_count -= 1
|
|
591
|
+
if brace_count == 0:
|
|
592
|
+
class_block_end = i
|
|
593
|
+
break
|
|
594
|
+
|
|
595
|
+
if class_block_end > class_block_start:
|
|
596
|
+
class_body = content[class_block_start:class_block_end]
|
|
597
|
+
methods = self._extract_csharp_methods(class_body)
|
|
598
|
+
else:
|
|
599
|
+
methods = []
|
|
600
|
+
|
|
601
|
+
classes.append(
|
|
602
|
+
{
|
|
603
|
+
"name": class_name,
|
|
604
|
+
"base_classes": base_classes,
|
|
605
|
+
"methods": methods,
|
|
606
|
+
"docstring": None, # Would need to extract XML doc comments
|
|
607
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
608
|
+
}
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
# Extract top-level functions/methods
|
|
612
|
+
# Matches: [modifiers] [async] ReturnType MethodName(params)
|
|
613
|
+
func_pattern = r"(?:public|private|internal|protected)?\s*(?:static|virtual|override|abstract)?\s*(?:async\s+)?(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
|
|
614
|
+
for match in re.finditer(func_pattern, content):
|
|
615
|
+
return_type = match.group(1).strip()
|
|
616
|
+
func_name = match.group(2)
|
|
617
|
+
params_str = match.group(3)
|
|
618
|
+
is_async = "async" in match.group(0)
|
|
619
|
+
|
|
620
|
+
# Skip common keywords
|
|
621
|
+
if func_name in ["if", "for", "while", "switch", "return", "using", "namespace"]:
|
|
622
|
+
continue
|
|
623
|
+
|
|
624
|
+
params = self._parse_csharp_parameters(params_str)
|
|
625
|
+
|
|
626
|
+
functions.append(
|
|
627
|
+
{
|
|
628
|
+
"name": func_name,
|
|
629
|
+
"parameters": params,
|
|
630
|
+
"return_type": return_type,
|
|
631
|
+
"docstring": None,
|
|
632
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
633
|
+
"is_async": is_async,
|
|
634
|
+
"is_method": False,
|
|
635
|
+
"decorators": [],
|
|
636
|
+
}
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
# Extract comments
|
|
640
|
+
comments = self._extract_csharp_comments(content)
|
|
641
|
+
|
|
642
|
+
return {"classes": classes, "functions": functions, "comments": comments}
|
|
643
|
+
|
|
644
|
+
def _extract_csharp_methods(self, class_body: str) -> list[dict]:
|
|
645
|
+
"""Extract C# method signatures from class body."""
|
|
646
|
+
methods = []
|
|
647
|
+
|
|
648
|
+
# Match method definitions
|
|
649
|
+
method_pattern = r"(?:public|private|internal|protected)?\s*(?:static|virtual|override|abstract)?\s*(?:async\s+)?(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
|
|
650
|
+
for match in re.finditer(method_pattern, class_body):
|
|
651
|
+
return_type = match.group(1).strip()
|
|
652
|
+
method_name = match.group(2)
|
|
653
|
+
params_str = match.group(3)
|
|
654
|
+
is_async = "async" in match.group(0)
|
|
655
|
+
|
|
656
|
+
# Skip keywords
|
|
657
|
+
if method_name in ["if", "for", "while", "switch", "get", "set"]:
|
|
658
|
+
continue
|
|
659
|
+
|
|
660
|
+
params = self._parse_csharp_parameters(params_str)
|
|
661
|
+
|
|
662
|
+
methods.append(
|
|
663
|
+
{
|
|
664
|
+
"name": method_name,
|
|
665
|
+
"parameters": params,
|
|
666
|
+
"return_type": return_type,
|
|
667
|
+
"docstring": None,
|
|
668
|
+
"line_number": None,
|
|
669
|
+
"is_async": is_async,
|
|
670
|
+
"is_method": True,
|
|
671
|
+
"decorators": [],
|
|
672
|
+
}
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
return methods
|
|
676
|
+
|
|
677
|
+
def _parse_csharp_parameters(self, params_str: str) -> list[dict]:
|
|
678
|
+
"""Parse C# parameter string."""
|
|
679
|
+
params = []
|
|
680
|
+
|
|
681
|
+
if not params_str.strip():
|
|
682
|
+
return params
|
|
683
|
+
|
|
684
|
+
# Split by comma (simplified)
|
|
685
|
+
param_list = [p.strip() for p in params_str.split(",")]
|
|
686
|
+
|
|
687
|
+
for param in param_list:
|
|
688
|
+
if not param:
|
|
689
|
+
continue
|
|
690
|
+
|
|
691
|
+
# Check for default value
|
|
692
|
+
default = None
|
|
693
|
+
if "=" in param:
|
|
694
|
+
param, default = param.split("=", 1)
|
|
695
|
+
param = param.strip()
|
|
696
|
+
default = default.strip()
|
|
697
|
+
|
|
698
|
+
# Parse: [ref/out] Type name
|
|
699
|
+
parts = param.split()
|
|
700
|
+
if len(parts) >= 2:
|
|
701
|
+
# Remove ref/out modifiers
|
|
702
|
+
if parts[0] in ["ref", "out", "in", "params"]:
|
|
703
|
+
parts = parts[1:]
|
|
704
|
+
|
|
705
|
+
if len(parts) >= 2:
|
|
706
|
+
param_type = parts[0]
|
|
707
|
+
param_name = parts[1]
|
|
708
|
+
else:
|
|
709
|
+
param_type = parts[0]
|
|
710
|
+
param_name = "unknown"
|
|
711
|
+
else:
|
|
712
|
+
param_type = None
|
|
713
|
+
param_name = param
|
|
714
|
+
|
|
715
|
+
params.append({"name": param_name, "type_hint": param_type, "default": default})
|
|
716
|
+
|
|
717
|
+
return params
|
|
718
|
+
|
|
719
|
+
def _extract_csharp_comments(self, content: str) -> list[dict]:
|
|
720
|
+
"""Extract C# comments (// and /* */ and /// XML docs)."""
|
|
721
|
+
comments = []
|
|
722
|
+
|
|
723
|
+
# Single-line comments (//)
|
|
724
|
+
for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
|
|
725
|
+
line_num = content[: match.start()].count("\n") + 1
|
|
726
|
+
comment_text = match.group(1).strip()
|
|
727
|
+
|
|
728
|
+
# Distinguish XML doc comments (///)
|
|
729
|
+
comment_type = "doc" if match.group(1).startswith("/") else "inline"
|
|
730
|
+
|
|
731
|
+
comments.append(
|
|
732
|
+
{"line": line_num, "text": comment_text.lstrip("/").strip(), "type": comment_type}
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
# Multi-line comments (/* */)
|
|
736
|
+
for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
|
|
737
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
738
|
+
comment_text = match.group(1).strip()
|
|
739
|
+
|
|
740
|
+
comments.append({"line": start_line, "text": comment_text, "type": "block"})
|
|
741
|
+
|
|
742
|
+
return comments
|
|
743
|
+
|
|
744
|
+
def _analyze_go(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
745
|
+
"""
|
|
746
|
+
Analyze Go file using regex patterns.
|
|
747
|
+
|
|
748
|
+
Note: This is a simplified regex-based approach. For production,
|
|
749
|
+
consider using go/parser from the Go standard library via subprocess.
|
|
750
|
+
|
|
751
|
+
Regex patterns based on Go language specification:
|
|
752
|
+
https://go.dev/ref/spec
|
|
753
|
+
"""
|
|
754
|
+
classes = [] # Go doesn't have classes, but we'll extract structs
|
|
755
|
+
functions = []
|
|
756
|
+
|
|
757
|
+
# Extract struct definitions (Go's equivalent of classes)
|
|
758
|
+
struct_pattern = r"type\s+(\w+)\s+struct\s*\{"
|
|
759
|
+
for match in re.finditer(struct_pattern, content):
|
|
760
|
+
struct_name = match.group(1)
|
|
761
|
+
|
|
762
|
+
classes.append(
|
|
763
|
+
{
|
|
764
|
+
"name": struct_name,
|
|
765
|
+
"base_classes": [], # Go uses embedding, not inheritance
|
|
766
|
+
"methods": [], # Methods extracted separately
|
|
767
|
+
"docstring": None,
|
|
768
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
769
|
+
}
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
# Extract function definitions
|
|
773
|
+
# Matches: func [receiver] name(params) [returns]
|
|
774
|
+
func_pattern = r"func\s+(?:\((\w+)\s+\*?(\w+)\)\s+)?(\w+)\s*\(([^)]*)\)(?:\s+\(([^)]+)\)|(?:\s+(\w+(?:\[.*?\])?(?:,\s*\w+)*)))?"
|
|
775
|
+
for match in re.finditer(func_pattern, content):
|
|
776
|
+
_receiver_var = match.group(1)
|
|
777
|
+
receiver_type = match.group(2)
|
|
778
|
+
func_name = match.group(3)
|
|
779
|
+
params_str = match.group(4)
|
|
780
|
+
returns_multi = match.group(5) # Multiple returns in parentheses
|
|
781
|
+
returns_single = match.group(6) # Single return without parentheses
|
|
782
|
+
|
|
783
|
+
# Determine if it's a method (has receiver)
|
|
784
|
+
is_method = bool(receiver_type)
|
|
785
|
+
|
|
786
|
+
# Parse return type
|
|
787
|
+
return_type = None
|
|
788
|
+
if returns_multi:
|
|
789
|
+
return_type = f"({returns_multi})"
|
|
790
|
+
elif returns_single:
|
|
791
|
+
return_type = returns_single
|
|
792
|
+
|
|
793
|
+
params = self._parse_go_parameters(params_str)
|
|
794
|
+
|
|
795
|
+
functions.append(
|
|
796
|
+
{
|
|
797
|
+
"name": func_name,
|
|
798
|
+
"parameters": params,
|
|
799
|
+
"return_type": return_type,
|
|
800
|
+
"docstring": None,
|
|
801
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
802
|
+
"is_async": False, # Go uses goroutines differently
|
|
803
|
+
"is_method": is_method,
|
|
804
|
+
"decorators": [],
|
|
805
|
+
}
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
# Extract comments
|
|
809
|
+
comments = self._extract_go_comments(content)
|
|
810
|
+
|
|
811
|
+
return {"classes": classes, "functions": functions, "comments": comments}
|
|
812
|
+
|
|
813
|
+
def _parse_go_parameters(self, params_str: str) -> list[dict]:
|
|
814
|
+
"""Parse Go parameter string."""
|
|
815
|
+
params = []
|
|
816
|
+
|
|
817
|
+
if not params_str.strip():
|
|
818
|
+
return params
|
|
819
|
+
|
|
820
|
+
# Split by comma
|
|
821
|
+
param_list = [p.strip() for p in params_str.split(",")]
|
|
822
|
+
|
|
823
|
+
for param in param_list:
|
|
824
|
+
if not param:
|
|
825
|
+
continue
|
|
826
|
+
|
|
827
|
+
# Go format: name type or name1, name2 type
|
|
828
|
+
# Simplified parsing
|
|
829
|
+
parts = param.split()
|
|
830
|
+
if len(parts) >= 2:
|
|
831
|
+
# Last part is type
|
|
832
|
+
param_type = parts[-1]
|
|
833
|
+
param_name = " ".join(parts[:-1])
|
|
834
|
+
else:
|
|
835
|
+
param_type = param
|
|
836
|
+
param_name = "unknown"
|
|
837
|
+
|
|
838
|
+
params.append(
|
|
839
|
+
{
|
|
840
|
+
"name": param_name,
|
|
841
|
+
"type_hint": param_type,
|
|
842
|
+
"default": None, # Go doesn't support default parameters
|
|
843
|
+
}
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
return params
|
|
847
|
+
|
|
848
|
+
def _extract_go_comments(self, content: str) -> list[dict]:
|
|
849
|
+
"""Extract Go comments (// and /* */ styles)."""
|
|
850
|
+
# Go uses C-style comments
|
|
851
|
+
return self._extract_js_comments(content)
|
|
852
|
+
|
|
853
|
+
def _analyze_rust(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
854
|
+
"""
|
|
855
|
+
Analyze Rust file using regex patterns.
|
|
856
|
+
|
|
857
|
+
Note: This is a simplified regex-based approach. For production,
|
|
858
|
+
consider using syn crate via subprocess or tree-sitter-rust.
|
|
859
|
+
|
|
860
|
+
Regex patterns based on Rust language reference:
|
|
861
|
+
https://doc.rust-lang.org/reference/
|
|
862
|
+
"""
|
|
863
|
+
classes = [] # Rust uses structs/enums/traits
|
|
864
|
+
functions = []
|
|
865
|
+
|
|
866
|
+
# Extract struct definitions
|
|
867
|
+
struct_pattern = r"(?:pub\s+)?struct\s+(\w+)(?:<[^>]+>)?\s*\{"
|
|
868
|
+
for match in re.finditer(struct_pattern, content):
|
|
869
|
+
struct_name = match.group(1)
|
|
870
|
+
|
|
871
|
+
classes.append(
|
|
872
|
+
{
|
|
873
|
+
"name": struct_name,
|
|
874
|
+
"base_classes": [], # Rust uses traits, not inheritance
|
|
875
|
+
"methods": [],
|
|
876
|
+
"docstring": None,
|
|
877
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
878
|
+
}
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
# Extract function definitions
|
|
882
|
+
# Matches: [pub] [async] [unsafe] [const] fn name<generics>(params) -> ReturnType
|
|
883
|
+
func_pattern = r"(?:pub\s+)?(?:async\s+)?(?:unsafe\s+)?(?:const\s+)?fn\s+(\w+)(?:<[^>]+>)?\s*\(([^)]*)\)(?:\s*->\s*([^{;]+))?"
|
|
884
|
+
for match in re.finditer(func_pattern, content):
|
|
885
|
+
func_name = match.group(1)
|
|
886
|
+
params_str = match.group(2)
|
|
887
|
+
return_type = match.group(3).strip() if match.group(3) else None
|
|
888
|
+
is_async = "async" in match.group(0)
|
|
889
|
+
|
|
890
|
+
params = self._parse_rust_parameters(params_str)
|
|
891
|
+
|
|
892
|
+
functions.append(
|
|
893
|
+
{
|
|
894
|
+
"name": func_name,
|
|
895
|
+
"parameters": params,
|
|
896
|
+
"return_type": return_type,
|
|
897
|
+
"docstring": None,
|
|
898
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
899
|
+
"is_async": is_async,
|
|
900
|
+
"is_method": False,
|
|
901
|
+
"decorators": [],
|
|
902
|
+
}
|
|
903
|
+
)
|
|
904
|
+
|
|
905
|
+
# Extract comments
|
|
906
|
+
comments = self._extract_rust_comments(content)
|
|
907
|
+
|
|
908
|
+
return {"classes": classes, "functions": functions, "comments": comments}
|
|
909
|
+
|
|
910
|
+
def _parse_rust_parameters(self, params_str: str) -> list[dict]:
|
|
911
|
+
"""Parse Rust parameter string."""
|
|
912
|
+
params = []
|
|
913
|
+
|
|
914
|
+
if not params_str.strip():
|
|
915
|
+
return params
|
|
916
|
+
|
|
917
|
+
# Split by comma
|
|
918
|
+
param_list = [p.strip() for p in params_str.split(",")]
|
|
919
|
+
|
|
920
|
+
for param in param_list:
|
|
921
|
+
if not param:
|
|
922
|
+
continue
|
|
923
|
+
|
|
924
|
+
# Rust format: name: type or &self
|
|
925
|
+
if ":" in param:
|
|
926
|
+
name, param_type = param.split(":", 1)
|
|
927
|
+
name = name.strip()
|
|
928
|
+
param_type = param_type.strip()
|
|
929
|
+
else:
|
|
930
|
+
# Handle &self, &mut self, self
|
|
931
|
+
name = param
|
|
932
|
+
param_type = None
|
|
933
|
+
|
|
934
|
+
params.append(
|
|
935
|
+
{
|
|
936
|
+
"name": name,
|
|
937
|
+
"type_hint": param_type,
|
|
938
|
+
"default": None, # Rust doesn't support default parameters
|
|
939
|
+
}
|
|
940
|
+
)
|
|
941
|
+
|
|
942
|
+
return params
|
|
943
|
+
|
|
944
|
+
def _extract_rust_comments(self, content: str) -> list[dict]:
|
|
945
|
+
"""Extract Rust comments (// and /* */ and /// doc comments)."""
|
|
946
|
+
comments = []
|
|
947
|
+
|
|
948
|
+
# Single-line comments (//)
|
|
949
|
+
for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
|
|
950
|
+
line_num = content[: match.start()].count("\n") + 1
|
|
951
|
+
comment_text = match.group(1).strip()
|
|
952
|
+
|
|
953
|
+
# Distinguish doc comments (/// or //!)
|
|
954
|
+
if comment_text.startswith("/") or comment_text.startswith("!"):
|
|
955
|
+
comment_type = "doc"
|
|
956
|
+
comment_text = comment_text.lstrip("/!").strip()
|
|
957
|
+
else:
|
|
958
|
+
comment_type = "inline"
|
|
959
|
+
|
|
960
|
+
comments.append({"line": line_num, "text": comment_text, "type": comment_type})
|
|
961
|
+
|
|
962
|
+
# Multi-line comments (/* */)
|
|
963
|
+
for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
|
|
964
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
965
|
+
comment_text = match.group(1).strip()
|
|
966
|
+
|
|
967
|
+
comments.append({"line": start_line, "text": comment_text, "type": "block"})
|
|
968
|
+
|
|
969
|
+
return comments
|
|
970
|
+
|
|
971
|
+
def _analyze_java(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
972
|
+
"""
|
|
973
|
+
Analyze Java file using regex patterns.
|
|
974
|
+
|
|
975
|
+
Note: This is a simplified regex-based approach. For production,
|
|
976
|
+
consider using Eclipse JDT or JavaParser library.
|
|
977
|
+
|
|
978
|
+
Regex patterns based on Java language specification:
|
|
979
|
+
https://docs.oracle.com/javase/specs/
|
|
980
|
+
"""
|
|
981
|
+
classes = []
|
|
982
|
+
functions = []
|
|
983
|
+
|
|
984
|
+
# Extract class definitions
|
|
985
|
+
# Matches: [modifiers] class ClassName [extends Base] [implements Interfaces]
|
|
986
|
+
class_pattern = r"(?:public|private|protected)?\s*(?:static|final|abstract)?\s*class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?\s*\{"
|
|
987
|
+
for match in re.finditer(class_pattern, content):
|
|
988
|
+
class_name = match.group(1)
|
|
989
|
+
base_class = match.group(2)
|
|
990
|
+
interfaces_str = match.group(3)
|
|
991
|
+
|
|
992
|
+
base_classes = []
|
|
993
|
+
if base_class:
|
|
994
|
+
base_classes.append(base_class)
|
|
995
|
+
if interfaces_str:
|
|
996
|
+
base_classes.extend([i.strip() for i in interfaces_str.split(",")])
|
|
997
|
+
|
|
998
|
+
# Extract methods (simplified)
|
|
999
|
+
class_block_start = match.end()
|
|
1000
|
+
brace_count = 1
|
|
1001
|
+
class_block_end = class_block_start
|
|
1002
|
+
for i, char in enumerate(content[class_block_start:], class_block_start):
|
|
1003
|
+
if char == "{":
|
|
1004
|
+
brace_count += 1
|
|
1005
|
+
elif char == "}":
|
|
1006
|
+
brace_count -= 1
|
|
1007
|
+
if brace_count == 0:
|
|
1008
|
+
class_block_end = i
|
|
1009
|
+
break
|
|
1010
|
+
|
|
1011
|
+
if class_block_end > class_block_start:
|
|
1012
|
+
class_body = content[class_block_start:class_block_end]
|
|
1013
|
+
methods = self._extract_java_methods(class_body)
|
|
1014
|
+
else:
|
|
1015
|
+
methods = []
|
|
1016
|
+
|
|
1017
|
+
classes.append(
|
|
1018
|
+
{
|
|
1019
|
+
"name": class_name,
|
|
1020
|
+
"base_classes": base_classes,
|
|
1021
|
+
"methods": methods,
|
|
1022
|
+
"docstring": None,
|
|
1023
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
1024
|
+
}
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
# Extract top-level functions (rare in Java, but static methods)
|
|
1028
|
+
func_pattern = r"(?:public|private|protected)?\s*(?:static|final|synchronized)?\s*(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
|
|
1029
|
+
for match in re.finditer(func_pattern, content):
|
|
1030
|
+
return_type = match.group(1).strip()
|
|
1031
|
+
func_name = match.group(2)
|
|
1032
|
+
params_str = match.group(3)
|
|
1033
|
+
|
|
1034
|
+
# Skip keywords
|
|
1035
|
+
if func_name in ["if", "for", "while", "switch", "return", "class", "void"]:
|
|
1036
|
+
continue
|
|
1037
|
+
|
|
1038
|
+
params = self._parse_java_parameters(params_str)
|
|
1039
|
+
|
|
1040
|
+
functions.append(
|
|
1041
|
+
{
|
|
1042
|
+
"name": func_name,
|
|
1043
|
+
"parameters": params,
|
|
1044
|
+
"return_type": return_type,
|
|
1045
|
+
"docstring": None,
|
|
1046
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
1047
|
+
"is_async": False,
|
|
1048
|
+
"is_method": False,
|
|
1049
|
+
"decorators": [],
|
|
1050
|
+
}
|
|
1051
|
+
)
|
|
1052
|
+
|
|
1053
|
+
# Extract comments
|
|
1054
|
+
comments = self._extract_java_comments(content)
|
|
1055
|
+
|
|
1056
|
+
return {"classes": classes, "functions": functions, "comments": comments}
|
|
1057
|
+
|
|
1058
|
+
def _extract_java_methods(self, class_body: str) -> list[dict]:
|
|
1059
|
+
"""Extract Java method signatures from class body."""
|
|
1060
|
+
methods = []
|
|
1061
|
+
|
|
1062
|
+
method_pattern = r"(?:public|private|protected)?\s*(?:static|final|synchronized)?\s*(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
|
|
1063
|
+
for match in re.finditer(method_pattern, class_body):
|
|
1064
|
+
return_type = match.group(1).strip()
|
|
1065
|
+
method_name = match.group(2)
|
|
1066
|
+
params_str = match.group(3)
|
|
1067
|
+
|
|
1068
|
+
# Skip keywords
|
|
1069
|
+
if method_name in ["if", "for", "while", "switch"]:
|
|
1070
|
+
continue
|
|
1071
|
+
|
|
1072
|
+
params = self._parse_java_parameters(params_str)
|
|
1073
|
+
|
|
1074
|
+
methods.append(
|
|
1075
|
+
{
|
|
1076
|
+
"name": method_name,
|
|
1077
|
+
"parameters": params,
|
|
1078
|
+
"return_type": return_type,
|
|
1079
|
+
"docstring": None,
|
|
1080
|
+
"line_number": None,
|
|
1081
|
+
"is_async": False,
|
|
1082
|
+
"is_method": True,
|
|
1083
|
+
"decorators": [],
|
|
1084
|
+
}
|
|
1085
|
+
)
|
|
1086
|
+
|
|
1087
|
+
return methods
|
|
1088
|
+
|
|
1089
|
+
def _parse_java_parameters(self, params_str: str) -> list[dict]:
|
|
1090
|
+
"""Parse Java parameter string."""
|
|
1091
|
+
params = []
|
|
1092
|
+
|
|
1093
|
+
if not params_str.strip():
|
|
1094
|
+
return params
|
|
1095
|
+
|
|
1096
|
+
# Split by comma
|
|
1097
|
+
param_list = [p.strip() for p in params_str.split(",")]
|
|
1098
|
+
|
|
1099
|
+
for param in param_list:
|
|
1100
|
+
if not param:
|
|
1101
|
+
continue
|
|
1102
|
+
|
|
1103
|
+
# Java format: Type name or final Type name
|
|
1104
|
+
parts = param.split()
|
|
1105
|
+
if len(parts) >= 2:
|
|
1106
|
+
# Remove 'final' if present
|
|
1107
|
+
if parts[0] == "final":
|
|
1108
|
+
parts = parts[1:]
|
|
1109
|
+
|
|
1110
|
+
if len(parts) >= 2:
|
|
1111
|
+
param_type = parts[0]
|
|
1112
|
+
param_name = parts[1]
|
|
1113
|
+
else:
|
|
1114
|
+
param_type = parts[0]
|
|
1115
|
+
param_name = "unknown"
|
|
1116
|
+
else:
|
|
1117
|
+
param_type = param
|
|
1118
|
+
param_name = "unknown"
|
|
1119
|
+
|
|
1120
|
+
params.append(
|
|
1121
|
+
{
|
|
1122
|
+
"name": param_name,
|
|
1123
|
+
"type_hint": param_type,
|
|
1124
|
+
"default": None, # Java doesn't support default parameters
|
|
1125
|
+
}
|
|
1126
|
+
)
|
|
1127
|
+
|
|
1128
|
+
return params
|
|
1129
|
+
|
|
1130
|
+
def _extract_java_comments(self, content: str) -> list[dict]:
|
|
1131
|
+
"""Extract Java comments (// and /* */ and /** JavaDoc */)."""
|
|
1132
|
+
comments = []
|
|
1133
|
+
|
|
1134
|
+
# Single-line comments (//)
|
|
1135
|
+
for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
|
|
1136
|
+
line_num = content[: match.start()].count("\n") + 1
|
|
1137
|
+
comment_text = match.group(1).strip()
|
|
1138
|
+
|
|
1139
|
+
comments.append({"line": line_num, "text": comment_text, "type": "inline"})
|
|
1140
|
+
|
|
1141
|
+
# Multi-line and JavaDoc comments (/* */ and /** */)
|
|
1142
|
+
for match in re.finditer(r"/\*\*?(.+?)\*/", content, re.DOTALL):
|
|
1143
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
1144
|
+
comment_text = match.group(1).strip()
|
|
1145
|
+
|
|
1146
|
+
# Distinguish JavaDoc (starts with **)
|
|
1147
|
+
comment_type = "doc" if match.group(0).startswith("/**") else "block"
|
|
1148
|
+
|
|
1149
|
+
comments.append({"line": start_line, "text": comment_text, "type": comment_type})
|
|
1150
|
+
|
|
1151
|
+
return comments
|
|
1152
|
+
|
|
1153
|
+
def _analyze_ruby(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
1154
|
+
"""
|
|
1155
|
+
Analyze Ruby file using regex patterns.
|
|
1156
|
+
|
|
1157
|
+
Note: This is a simplified regex-based approach. For production,
|
|
1158
|
+
consider using parser gem or tree-sitter-ruby.
|
|
1159
|
+
|
|
1160
|
+
Regex patterns based on Ruby language documentation:
|
|
1161
|
+
https://ruby-doc.org/
|
|
1162
|
+
"""
|
|
1163
|
+
classes = []
|
|
1164
|
+
functions = []
|
|
1165
|
+
|
|
1166
|
+
# Extract class definitions
|
|
1167
|
+
class_pattern = r"class\s+(\w+)(?:\s*<\s*(\w+))?\s*$"
|
|
1168
|
+
for match in re.finditer(class_pattern, content, re.MULTILINE):
|
|
1169
|
+
class_name = match.group(1)
|
|
1170
|
+
base_class = match.group(2)
|
|
1171
|
+
|
|
1172
|
+
base_classes = [base_class] if base_class else []
|
|
1173
|
+
|
|
1174
|
+
classes.append(
|
|
1175
|
+
{
|
|
1176
|
+
"name": class_name,
|
|
1177
|
+
"base_classes": base_classes,
|
|
1178
|
+
"methods": [], # Would need to parse class body
|
|
1179
|
+
"docstring": None,
|
|
1180
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
1181
|
+
}
|
|
1182
|
+
)
|
|
1183
|
+
|
|
1184
|
+
# Extract method/function definitions
|
|
1185
|
+
# Matches: def method_name(params)
|
|
1186
|
+
func_pattern = r"def\s+(?:self\.)?(\w+[?!]?)\s*(?:\(([^)]*)\))?"
|
|
1187
|
+
for match in re.finditer(func_pattern, content):
|
|
1188
|
+
func_name = match.group(1)
|
|
1189
|
+
params_str = match.group(2) if match.group(2) else ""
|
|
1190
|
+
|
|
1191
|
+
params = self._parse_ruby_parameters(params_str)
|
|
1192
|
+
|
|
1193
|
+
functions.append(
|
|
1194
|
+
{
|
|
1195
|
+
"name": func_name,
|
|
1196
|
+
"parameters": params,
|
|
1197
|
+
"return_type": None, # Ruby has no type annotations (usually)
|
|
1198
|
+
"docstring": None,
|
|
1199
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
1200
|
+
"is_async": False,
|
|
1201
|
+
"is_method": False,
|
|
1202
|
+
"decorators": [],
|
|
1203
|
+
}
|
|
1204
|
+
)
|
|
1205
|
+
|
|
1206
|
+
# Extract comments
|
|
1207
|
+
comments = self._extract_ruby_comments(content)
|
|
1208
|
+
|
|
1209
|
+
return {"classes": classes, "functions": functions, "comments": comments}
|
|
1210
|
+
|
|
1211
|
+
def _parse_ruby_parameters(self, params_str: str) -> list[dict]:
|
|
1212
|
+
"""Parse Ruby parameter string."""
|
|
1213
|
+
params = []
|
|
1214
|
+
|
|
1215
|
+
if not params_str.strip():
|
|
1216
|
+
return params
|
|
1217
|
+
|
|
1218
|
+
# Split by comma
|
|
1219
|
+
param_list = [p.strip() for p in params_str.split(",")]
|
|
1220
|
+
|
|
1221
|
+
for param in param_list:
|
|
1222
|
+
if not param:
|
|
1223
|
+
continue
|
|
1224
|
+
|
|
1225
|
+
# Check for default value
|
|
1226
|
+
default = None
|
|
1227
|
+
if "=" in param:
|
|
1228
|
+
name, default = param.split("=", 1)
|
|
1229
|
+
name = name.strip()
|
|
1230
|
+
default = default.strip()
|
|
1231
|
+
else:
|
|
1232
|
+
name = param
|
|
1233
|
+
|
|
1234
|
+
# Ruby doesn't have type hints in method signatures
|
|
1235
|
+
params.append({"name": name, "type_hint": None, "default": default})
|
|
1236
|
+
|
|
1237
|
+
return params
|
|
1238
|
+
|
|
1239
|
+
def _extract_ruby_comments(self, content: str) -> list[dict]:
|
|
1240
|
+
"""Extract Ruby comments (# style)."""
|
|
1241
|
+
comments = []
|
|
1242
|
+
|
|
1243
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
1244
|
+
stripped = line.strip()
|
|
1245
|
+
|
|
1246
|
+
# Ruby comments start with #
|
|
1247
|
+
if stripped.startswith("#"):
|
|
1248
|
+
comment_text = stripped[1:].strip()
|
|
1249
|
+
comments.append({"line": i, "text": comment_text, "type": "inline"})
|
|
1250
|
+
|
|
1251
|
+
return comments
|
|
1252
|
+
|
|
1253
|
+
def _analyze_php(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
1254
|
+
"""
|
|
1255
|
+
Analyze PHP file using regex patterns.
|
|
1256
|
+
|
|
1257
|
+
Note: This is a simplified regex-based approach. For production,
|
|
1258
|
+
consider using nikic/PHP-Parser via subprocess or tree-sitter-php.
|
|
1259
|
+
|
|
1260
|
+
Regex patterns based on PHP language reference:
|
|
1261
|
+
https://www.php.net/manual/en/langref.php
|
|
1262
|
+
"""
|
|
1263
|
+
classes = []
|
|
1264
|
+
functions = []
|
|
1265
|
+
|
|
1266
|
+
# Extract class definitions
|
|
1267
|
+
class_pattern = r"(?:abstract\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?\s*\{"
|
|
1268
|
+
for match in re.finditer(class_pattern, content):
|
|
1269
|
+
class_name = match.group(1)
|
|
1270
|
+
base_class = match.group(2)
|
|
1271
|
+
interfaces_str = match.group(3)
|
|
1272
|
+
|
|
1273
|
+
base_classes = []
|
|
1274
|
+
if base_class:
|
|
1275
|
+
base_classes.append(base_class)
|
|
1276
|
+
if interfaces_str:
|
|
1277
|
+
base_classes.extend([i.strip() for i in interfaces_str.split(",")])
|
|
1278
|
+
|
|
1279
|
+
# Extract methods (simplified)
|
|
1280
|
+
class_block_start = match.end()
|
|
1281
|
+
brace_count = 1
|
|
1282
|
+
class_block_end = class_block_start
|
|
1283
|
+
for i, char in enumerate(content[class_block_start:], class_block_start):
|
|
1284
|
+
if char == "{":
|
|
1285
|
+
brace_count += 1
|
|
1286
|
+
elif char == "}":
|
|
1287
|
+
brace_count -= 1
|
|
1288
|
+
if brace_count == 0:
|
|
1289
|
+
class_block_end = i
|
|
1290
|
+
break
|
|
1291
|
+
|
|
1292
|
+
if class_block_end > class_block_start:
|
|
1293
|
+
class_body = content[class_block_start:class_block_end]
|
|
1294
|
+
methods = self._extract_php_methods(class_body)
|
|
1295
|
+
else:
|
|
1296
|
+
methods = []
|
|
1297
|
+
|
|
1298
|
+
classes.append(
|
|
1299
|
+
{
|
|
1300
|
+
"name": class_name,
|
|
1301
|
+
"base_classes": base_classes,
|
|
1302
|
+
"methods": methods,
|
|
1303
|
+
"docstring": None,
|
|
1304
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
1305
|
+
}
|
|
1306
|
+
)
|
|
1307
|
+
|
|
1308
|
+
# Extract function definitions
|
|
1309
|
+
func_pattern = r"function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
|
|
1310
|
+
for match in re.finditer(func_pattern, content):
|
|
1311
|
+
func_name = match.group(1)
|
|
1312
|
+
params_str = match.group(2)
|
|
1313
|
+
return_type = match.group(3)
|
|
1314
|
+
|
|
1315
|
+
params = self._parse_php_parameters(params_str)
|
|
1316
|
+
|
|
1317
|
+
functions.append(
|
|
1318
|
+
{
|
|
1319
|
+
"name": func_name,
|
|
1320
|
+
"parameters": params,
|
|
1321
|
+
"return_type": return_type,
|
|
1322
|
+
"docstring": None,
|
|
1323
|
+
"line_number": content[: match.start()].count("\n") + 1,
|
|
1324
|
+
"is_async": False,
|
|
1325
|
+
"is_method": False,
|
|
1326
|
+
"decorators": [],
|
|
1327
|
+
}
|
|
1328
|
+
)
|
|
1329
|
+
|
|
1330
|
+
# Extract comments
|
|
1331
|
+
comments = self._extract_php_comments(content)
|
|
1332
|
+
|
|
1333
|
+
return {"classes": classes, "functions": functions, "comments": comments}
|
|
1334
|
+
|
|
1335
|
+
def _extract_php_methods(self, class_body: str) -> list[dict]:
|
|
1336
|
+
"""Extract PHP method signatures from class body."""
|
|
1337
|
+
methods = []
|
|
1338
|
+
|
|
1339
|
+
method_pattern = r"(?:public|private|protected)?\s*(?:static|final)?\s*function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
|
|
1340
|
+
for match in re.finditer(method_pattern, class_body):
|
|
1341
|
+
method_name = match.group(1)
|
|
1342
|
+
params_str = match.group(2)
|
|
1343
|
+
return_type = match.group(3)
|
|
1344
|
+
|
|
1345
|
+
params = self._parse_php_parameters(params_str)
|
|
1346
|
+
|
|
1347
|
+
methods.append(
|
|
1348
|
+
{
|
|
1349
|
+
"name": method_name,
|
|
1350
|
+
"parameters": params,
|
|
1351
|
+
"return_type": return_type,
|
|
1352
|
+
"docstring": None,
|
|
1353
|
+
"line_number": None,
|
|
1354
|
+
"is_async": False,
|
|
1355
|
+
"is_method": True,
|
|
1356
|
+
"decorators": [],
|
|
1357
|
+
}
|
|
1358
|
+
)
|
|
1359
|
+
|
|
1360
|
+
return methods
|
|
1361
|
+
|
|
1362
|
+
def _parse_php_parameters(self, params_str: str) -> list[dict]:
|
|
1363
|
+
"""Parse PHP parameter string."""
|
|
1364
|
+
params = []
|
|
1365
|
+
|
|
1366
|
+
if not params_str.strip():
|
|
1367
|
+
return params
|
|
1368
|
+
|
|
1369
|
+
# Split by comma
|
|
1370
|
+
param_list = [p.strip() for p in params_str.split(",")]
|
|
1371
|
+
|
|
1372
|
+
for param in param_list:
|
|
1373
|
+
if not param:
|
|
1374
|
+
continue
|
|
1375
|
+
|
|
1376
|
+
# Check for default value
|
|
1377
|
+
default = None
|
|
1378
|
+
if "=" in param:
|
|
1379
|
+
param, default = param.split("=", 1)
|
|
1380
|
+
param = param.strip()
|
|
1381
|
+
default = default.strip()
|
|
1382
|
+
|
|
1383
|
+
# PHP format: Type $name or just $name
|
|
1384
|
+
parts = param.split()
|
|
1385
|
+
if len(parts) >= 2:
|
|
1386
|
+
param_type = parts[0]
|
|
1387
|
+
param_name = parts[1]
|
|
1388
|
+
else:
|
|
1389
|
+
param_type = None
|
|
1390
|
+
param_name = parts[0] if parts else "unknown"
|
|
1391
|
+
|
|
1392
|
+
# Remove $ from variable name
|
|
1393
|
+
if param_name.startswith("$"):
|
|
1394
|
+
param_name = param_name[1:]
|
|
1395
|
+
|
|
1396
|
+
params.append({"name": param_name, "type_hint": param_type, "default": default})
|
|
1397
|
+
|
|
1398
|
+
return params
|
|
1399
|
+
|
|
1400
|
+
def _extract_php_comments(self, content: str) -> list[dict]:
|
|
1401
|
+
"""Extract PHP comments (// and /* */ and # and /** PHPDoc */)."""
|
|
1402
|
+
comments = []
|
|
1403
|
+
|
|
1404
|
+
# Single-line comments (// and #)
|
|
1405
|
+
for match in re.finditer(r"(?://|#)(.+)$", content, re.MULTILINE):
|
|
1406
|
+
line_num = content[: match.start()].count("\n") + 1
|
|
1407
|
+
comment_text = match.group(1).strip()
|
|
1408
|
+
|
|
1409
|
+
comments.append({"line": line_num, "text": comment_text, "type": "inline"})
|
|
1410
|
+
|
|
1411
|
+
# Multi-line and PHPDoc comments (/* */ and /** */)
|
|
1412
|
+
for match in re.finditer(r"/\*\*?(.+?)\*/", content, re.DOTALL):
|
|
1413
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
1414
|
+
comment_text = match.group(1).strip()
|
|
1415
|
+
|
|
1416
|
+
# Distinguish PHPDoc (starts with **)
|
|
1417
|
+
comment_type = "doc" if match.group(0).startswith("/**") else "block"
|
|
1418
|
+
|
|
1419
|
+
comments.append({"line": start_line, "text": comment_text, "type": comment_type})
|
|
1420
|
+
|
|
1421
|
+
return comments
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
if __name__ == "__main__":
|
|
1425
|
+
# Test the analyzer
|
|
1426
|
+
python_code = '''
|
|
1427
|
+
class Node2D:
|
|
1428
|
+
"""Base class for 2D nodes."""
|
|
1429
|
+
|
|
1430
|
+
def move_local_x(self, delta: float, snap: bool = False) -> None:
|
|
1431
|
+
"""Move node along local X axis."""
|
|
1432
|
+
pass
|
|
1433
|
+
|
|
1434
|
+
async def tween_position(self, target: tuple, duration: float = 1.0):
|
|
1435
|
+
"""Animate position to target."""
|
|
1436
|
+
pass
|
|
1437
|
+
|
|
1438
|
+
def create_sprite(texture: str) -> Node2D:
|
|
1439
|
+
"""Create a new sprite node."""
|
|
1440
|
+
return Node2D()
|
|
1441
|
+
'''
|
|
1442
|
+
|
|
1443
|
+
analyzer = CodeAnalyzer(depth="deep")
|
|
1444
|
+
result = analyzer.analyze_file("test.py", python_code, "Python")
|
|
1445
|
+
|
|
1446
|
+
print("Analysis Result:")
|
|
1447
|
+
print(f"Classes: {len(result.get('classes', []))}")
|
|
1448
|
+
print(f"Functions: {len(result.get('functions', []))}")
|
|
1449
|
+
|
|
1450
|
+
if result.get("classes"):
|
|
1451
|
+
cls = result["classes"][0]
|
|
1452
|
+
print(f"\nClass: {cls['name']}")
|
|
1453
|
+
print(f" Methods: {len(cls['methods'])}")
|
|
1454
|
+
for method in cls["methods"]:
|
|
1455
|
+
params = ", ".join(
|
|
1456
|
+
[
|
|
1457
|
+
f"{p['name']}: {p['type_hint']}"
|
|
1458
|
+
+ (f" = {p['default']}" if p.get("default") else "")
|
|
1459
|
+
for p in method["parameters"]
|
|
1460
|
+
]
|
|
1461
|
+
)
|
|
1462
|
+
print(f" {method['name']}({params}) -> {method['return_type']}")
|