skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1462 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Code Analyzer for GitHub Repositories
4
+
5
+ Extracts code signatures at configurable depth levels:
6
+ - surface: File tree only (existing behavior)
7
+ - deep: Parse files for signatures, parameters, types
8
+ - full: Complete AST analysis (future enhancement)
9
+
10
+ Supports 9 programming languages with language-specific parsers:
11
+ - Python (AST-based, production quality)
12
+ - JavaScript/TypeScript (regex-based)
13
+ - C/C++ (regex-based)
14
+ - C# (regex-based, inspired by Microsoft C# spec)
15
+ - Go (regex-based, Go language spec)
16
+ - Rust (regex-based, Rust reference)
17
+ - Java (regex-based, Oracle Java spec)
18
+ - Ruby (regex-based, Ruby documentation)
19
+ - PHP (regex-based, PHP reference)
20
+
21
+ Note: Regex-based parsers are simplified implementations. For production use,
22
+ consider using dedicated parsers (tree-sitter, language-specific AST libraries).
23
+ """
24
+
25
+ import ast
26
+ import contextlib
27
+ import logging
28
+ import re
29
+ from dataclasses import asdict, dataclass
30
+ from typing import Any
31
+
32
+ logging.basicConfig(level=logging.INFO)
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ @dataclass
37
+ class Parameter:
38
+ """Represents a function parameter."""
39
+
40
+ name: str
41
+ type_hint: str | None = None
42
+ default: str | None = None
43
+
44
+
45
+ @dataclass
46
+ class FunctionSignature:
47
+ """Represents a function/method signature."""
48
+
49
+ name: str
50
+ parameters: list[Parameter]
51
+ return_type: str | None = None
52
+ docstring: str | None = None
53
+ line_number: int | None = None
54
+ is_async: bool = False
55
+ is_method: bool = False
56
+ decorators: list[str] = None
57
+
58
+ def __post_init__(self):
59
+ if self.decorators is None:
60
+ self.decorators = []
61
+
62
+
63
+ @dataclass
64
+ class ClassSignature:
65
+ """Represents a class signature."""
66
+
67
+ name: str
68
+ base_classes: list[str]
69
+ methods: list[FunctionSignature]
70
+ docstring: str | None = None
71
+ line_number: int | None = None
72
+
73
+
74
+ class CodeAnalyzer:
75
+ """
76
+ Analyzes code at different depth levels.
77
+ """
78
+
79
+ def __init__(self, depth: str = "surface"):
80
+ """
81
+ Initialize code analyzer.
82
+
83
+ Args:
84
+ depth: Analysis depth ('surface', 'deep', 'full')
85
+ """
86
+ self.depth = depth
87
+
88
+ def analyze_file(self, file_path: str, content: str, language: str) -> dict[str, Any]:
89
+ """
90
+ Analyze a single file based on depth level.
91
+
92
+ Args:
93
+ file_path: Path to file in repository
94
+ content: File content as string
95
+ language: Programming language (Python, JavaScript, C#, Go, Rust, Java, Ruby, PHP, etc.)
96
+
97
+ Returns:
98
+ Dict containing extracted signatures
99
+ """
100
+ if self.depth == "surface":
101
+ return {} # Surface level doesn't analyze individual files
102
+
103
+ logger.debug(f"Analyzing {file_path} (language: {language}, depth: {self.depth})")
104
+
105
+ try:
106
+ if language == "Python":
107
+ return self._analyze_python(content, file_path)
108
+ elif language in ["JavaScript", "TypeScript"]:
109
+ return self._analyze_javascript(content, file_path)
110
+ elif language in ["C", "C++"]:
111
+ return self._analyze_cpp(content, file_path)
112
+ elif language == "C#":
113
+ return self._analyze_csharp(content, file_path)
114
+ elif language == "Go":
115
+ return self._analyze_go(content, file_path)
116
+ elif language == "Rust":
117
+ return self._analyze_rust(content, file_path)
118
+ elif language == "Java":
119
+ return self._analyze_java(content, file_path)
120
+ elif language == "Ruby":
121
+ return self._analyze_ruby(content, file_path)
122
+ elif language == "PHP":
123
+ return self._analyze_php(content, file_path)
124
+ else:
125
+ logger.debug(f"No analyzer for language: {language}")
126
+ return {}
127
+ except Exception as e:
128
+ logger.warning(f"Error analyzing {file_path}: {e}")
129
+ return {}
130
+
131
+ def _analyze_python(self, content: str, file_path: str) -> dict[str, Any]:
132
+ """Analyze Python file using AST."""
133
+ try:
134
+ tree = ast.parse(content)
135
+ except SyntaxError as e:
136
+ logger.debug(f"Syntax error in {file_path}: {e}")
137
+ return {}
138
+
139
+ classes = []
140
+ functions = []
141
+
142
+ for node in ast.walk(tree):
143
+ if isinstance(node, ast.ClassDef):
144
+ class_sig = self._extract_python_class(node)
145
+ classes.append(asdict(class_sig))
146
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
147
+ # Only top-level functions (not methods)
148
+ # Fix AST parser to check isinstance(parent.body, list) before 'in' operator
149
+ is_method = False
150
+ try:
151
+ is_method = any(
152
+ isinstance(parent, ast.ClassDef)
153
+ for parent in ast.walk(tree)
154
+ if hasattr(parent, "body")
155
+ and isinstance(parent.body, list)
156
+ and node in parent.body
157
+ )
158
+ except (TypeError, AttributeError):
159
+ # If body is not iterable or check fails, assume it's a top-level function
160
+ is_method = False
161
+
162
+ if not is_method:
163
+ func_sig = self._extract_python_function(node)
164
+ functions.append(asdict(func_sig))
165
+
166
+ # Extract comments
167
+ comments = self._extract_python_comments(content)
168
+
169
+ return {"classes": classes, "functions": functions, "comments": comments}
170
+
171
+ def _extract_python_class(self, node: ast.ClassDef) -> ClassSignature:
172
+ """Extract class signature from AST node."""
173
+ # Extract base classes
174
+ bases = []
175
+ for base in node.bases:
176
+ if isinstance(base, ast.Name):
177
+ bases.append(base.id)
178
+ elif isinstance(base, ast.Attribute):
179
+ bases.append(
180
+ f"{base.value.id}.{base.attr}" if hasattr(base.value, "id") else base.attr
181
+ )
182
+
183
+ # Extract methods
184
+ methods = []
185
+ for item in node.body:
186
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
187
+ method_sig = self._extract_python_function(item, is_method=True)
188
+ methods.append(method_sig)
189
+
190
+ # Extract docstring
191
+ docstring = ast.get_docstring(node)
192
+
193
+ return ClassSignature(
194
+ name=node.name,
195
+ base_classes=bases,
196
+ methods=methods,
197
+ docstring=docstring,
198
+ line_number=node.lineno,
199
+ )
200
+
201
+ def _extract_python_function(self, node, is_method: bool = False) -> FunctionSignature:
202
+ """Extract function signature from AST node."""
203
+ # Extract parameters
204
+ params = []
205
+ for arg in node.args.args:
206
+ param_type = None
207
+ if arg.annotation:
208
+ param_type = ast.unparse(arg.annotation) if hasattr(ast, "unparse") else None
209
+
210
+ params.append(Parameter(name=arg.arg, type_hint=param_type))
211
+
212
+ # Extract defaults
213
+ defaults = node.args.defaults
214
+ if defaults:
215
+ # Defaults are aligned to the end of params
216
+ num_no_default = len(params) - len(defaults)
217
+ for i, default in enumerate(defaults):
218
+ param_idx = num_no_default + i
219
+ if param_idx < len(params):
220
+ try:
221
+ params[param_idx].default = (
222
+ ast.unparse(default) if hasattr(ast, "unparse") else str(default)
223
+ )
224
+ except Exception:
225
+ params[param_idx].default = "..."
226
+
227
+ # Extract return type
228
+ return_type = None
229
+ if node.returns:
230
+ with contextlib.suppress(Exception):
231
+ return_type = ast.unparse(node.returns) if hasattr(ast, "unparse") else None
232
+
233
+ # Extract decorators
234
+ decorators = []
235
+ for decorator in node.decorator_list:
236
+ try:
237
+ if hasattr(ast, "unparse"):
238
+ decorators.append(ast.unparse(decorator))
239
+ elif isinstance(decorator, ast.Name):
240
+ decorators.append(decorator.id)
241
+ except Exception:
242
+ pass
243
+
244
+ # Extract docstring
245
+ docstring = ast.get_docstring(node)
246
+
247
+ return FunctionSignature(
248
+ name=node.name,
249
+ parameters=params,
250
+ return_type=return_type,
251
+ docstring=docstring,
252
+ line_number=node.lineno,
253
+ is_async=isinstance(node, ast.AsyncFunctionDef),
254
+ is_method=is_method,
255
+ decorators=decorators,
256
+ )
257
+
258
+ def _analyze_javascript(self, content: str, _file_path: str) -> dict[str, Any]:
259
+ """
260
+ Analyze JavaScript/TypeScript file using regex patterns.
261
+
262
+ Note: This is a simplified approach. For production, consider using
263
+ a proper JS/TS parser like esprima or ts-morph.
264
+ """
265
+ classes = []
266
+ functions = []
267
+
268
+ # Extract class definitions
269
+ class_pattern = r"class\s+(\w+)(?:\s+extends\s+(\w+))?\s*\{"
270
+ for match in re.finditer(class_pattern, content):
271
+ class_name = match.group(1)
272
+ base_class = match.group(2) if match.group(2) else None
273
+
274
+ # Try to extract methods (simplified)
275
+ class_block_start = match.end()
276
+ # This is a simplification - proper parsing would track braces
277
+ class_block_end = content.find("}", class_block_start)
278
+ if class_block_end != -1:
279
+ class_body = content[class_block_start:class_block_end]
280
+ methods = self._extract_js_methods(class_body)
281
+ else:
282
+ methods = []
283
+
284
+ classes.append(
285
+ {
286
+ "name": class_name,
287
+ "base_classes": [base_class] if base_class else [],
288
+ "methods": methods,
289
+ "docstring": None,
290
+ "line_number": content[: match.start()].count("\n") + 1,
291
+ }
292
+ )
293
+
294
+ # Extract top-level functions
295
+ func_pattern = r"(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)"
296
+ for match in re.finditer(func_pattern, content):
297
+ func_name = match.group(1)
298
+ params_str = match.group(2)
299
+ is_async = "async" in match.group(0)
300
+
301
+ params = self._parse_js_parameters(params_str)
302
+
303
+ functions.append(
304
+ {
305
+ "name": func_name,
306
+ "parameters": params,
307
+ "return_type": None, # JS doesn't have type annotations (unless TS)
308
+ "docstring": None,
309
+ "line_number": content[: match.start()].count("\n") + 1,
310
+ "is_async": is_async,
311
+ "is_method": False,
312
+ "decorators": [],
313
+ }
314
+ )
315
+
316
+ # Extract arrow functions assigned to const/let
317
+ arrow_pattern = r"(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*=>"
318
+ for match in re.finditer(arrow_pattern, content):
319
+ func_name = match.group(1)
320
+ params_str = match.group(2)
321
+ is_async = "async" in match.group(0)
322
+
323
+ params = self._parse_js_parameters(params_str)
324
+
325
+ functions.append(
326
+ {
327
+ "name": func_name,
328
+ "parameters": params,
329
+ "return_type": None,
330
+ "docstring": None,
331
+ "line_number": content[: match.start()].count("\n") + 1,
332
+ "is_async": is_async,
333
+ "is_method": False,
334
+ "decorators": [],
335
+ }
336
+ )
337
+
338
+ # Extract comments
339
+ comments = self._extract_js_comments(content)
340
+
341
+ return {"classes": classes, "functions": functions, "comments": comments}
342
+
343
+ def _extract_js_methods(self, class_body: str) -> list[dict]:
344
+ """Extract method signatures from class body."""
345
+ methods = []
346
+
347
+ # Match method definitions
348
+ method_pattern = r"(?:async\s+)?(\w+)\s*\(([^)]*)\)"
349
+ for match in re.finditer(method_pattern, class_body):
350
+ method_name = match.group(1)
351
+ params_str = match.group(2)
352
+ is_async = "async" in match.group(0)
353
+
354
+ # Skip constructor keyword detection
355
+ if method_name in ["if", "for", "while", "switch"]:
356
+ continue
357
+
358
+ params = self._parse_js_parameters(params_str)
359
+
360
+ methods.append(
361
+ {
362
+ "name": method_name,
363
+ "parameters": params,
364
+ "return_type": None,
365
+ "docstring": None,
366
+ "line_number": None,
367
+ "is_async": is_async,
368
+ "is_method": True,
369
+ "decorators": [],
370
+ }
371
+ )
372
+
373
+ return methods
374
+
375
+ def _parse_js_parameters(self, params_str: str) -> list[dict]:
376
+ """Parse JavaScript parameter string."""
377
+ params = []
378
+
379
+ if not params_str.strip():
380
+ return params
381
+
382
+ # Split by comma (simplified - doesn't handle complex default values)
383
+ param_list = [p.strip() for p in params_str.split(",")]
384
+
385
+ for param in param_list:
386
+ if not param:
387
+ continue
388
+
389
+ # Check for default value
390
+ if "=" in param:
391
+ name, default = param.split("=", 1)
392
+ name = name.strip()
393
+ default = default.strip()
394
+ else:
395
+ name = param
396
+ default = None
397
+
398
+ # Check for type annotation (TypeScript)
399
+ type_hint = None
400
+ if ":" in name:
401
+ name, type_hint = name.split(":", 1)
402
+ name = name.strip()
403
+ type_hint = type_hint.strip()
404
+
405
+ params.append({"name": name, "type_hint": type_hint, "default": default})
406
+
407
+ return params
408
+
409
+ def _analyze_cpp(self, content: str, _file_path: str) -> dict[str, Any]:
410
+ """
411
+ Analyze C/C++ header file using regex patterns.
412
+
413
+ Note: This is a simplified approach focusing on header files.
414
+ For production, consider using libclang or similar.
415
+ """
416
+ classes = []
417
+ functions = []
418
+
419
+ # Extract class definitions (simplified - doesn't handle nested classes)
420
+ class_pattern = r"class\s+(\w+)(?:\s*:\s*public\s+(\w+))?\s*\{"
421
+ for match in re.finditer(class_pattern, content):
422
+ class_name = match.group(1)
423
+ base_class = match.group(2) if match.group(2) else None
424
+
425
+ classes.append(
426
+ {
427
+ "name": class_name,
428
+ "base_classes": [base_class] if base_class else [],
429
+ "methods": [], # Simplified - would need to parse class body
430
+ "docstring": None,
431
+ "line_number": content[: match.start()].count("\n") + 1,
432
+ }
433
+ )
434
+
435
+ # Extract function declarations
436
+ func_pattern = r"(\w+(?:\s*\*|\s*&)?)\s+(\w+)\s*\(([^)]*)\)"
437
+ for match in re.finditer(func_pattern, content):
438
+ return_type = match.group(1).strip()
439
+ func_name = match.group(2)
440
+ params_str = match.group(3)
441
+
442
+ # Skip common keywords
443
+ if func_name in ["if", "for", "while", "switch", "return"]:
444
+ continue
445
+
446
+ params = self._parse_cpp_parameters(params_str)
447
+
448
+ functions.append(
449
+ {
450
+ "name": func_name,
451
+ "parameters": params,
452
+ "return_type": return_type,
453
+ "docstring": None,
454
+ "line_number": content[: match.start()].count("\n") + 1,
455
+ "is_async": False,
456
+ "is_method": False,
457
+ "decorators": [],
458
+ }
459
+ )
460
+
461
+ # Extract comments
462
+ comments = self._extract_cpp_comments(content)
463
+
464
+ return {"classes": classes, "functions": functions, "comments": comments}
465
+
466
+ def _parse_cpp_parameters(self, params_str: str) -> list[dict]:
467
+ """Parse C++ parameter string."""
468
+ params = []
469
+
470
+ if not params_str.strip() or params_str.strip() == "void":
471
+ return params
472
+
473
+ # Split by comma (simplified)
474
+ param_list = [p.strip() for p in params_str.split(",")]
475
+
476
+ for param in param_list:
477
+ if not param:
478
+ continue
479
+
480
+ # Check for default value
481
+ default = None
482
+ if "=" in param:
483
+ param, default = param.rsplit("=", 1)
484
+ param = param.strip()
485
+ default = default.strip()
486
+
487
+ # Extract type and name (simplified)
488
+ # Format: "type name" or "type* name" or "type& name"
489
+ parts = param.split()
490
+ if len(parts) >= 2:
491
+ param_type = " ".join(parts[:-1])
492
+ param_name = parts[-1]
493
+ else:
494
+ param_type = param
495
+ param_name = "unknown"
496
+
497
+ params.append({"name": param_name, "type_hint": param_type, "default": default})
498
+
499
+ return params
500
+
501
+ def _extract_python_comments(self, content: str) -> list[dict]:
502
+ """
503
+ Extract Python comments (# style).
504
+
505
+ Returns list of comment dictionaries with line number, text, and type.
506
+ """
507
+ comments = []
508
+
509
+ for i, line in enumerate(content.splitlines(), 1):
510
+ stripped = line.strip()
511
+
512
+ # Skip shebang and encoding declarations
513
+ if stripped.startswith("#!") or stripped.startswith("#") and "coding" in stripped:
514
+ continue
515
+
516
+ # Extract regular comments
517
+ if stripped.startswith("#"):
518
+ comment_text = stripped[1:].strip()
519
+ comments.append({"line": i, "text": comment_text, "type": "inline"})
520
+
521
+ return comments
522
+
523
+ def _extract_js_comments(self, content: str) -> list[dict]:
524
+ """
525
+ Extract JavaScript/TypeScript comments (// and /* */ styles).
526
+
527
+ Returns list of comment dictionaries with line number, text, and type.
528
+ """
529
+ comments = []
530
+
531
+ # Extract single-line comments (//)
532
+ for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
533
+ line_num = content[: match.start()].count("\n") + 1
534
+ comment_text = match.group(1).strip()
535
+
536
+ comments.append({"line": line_num, "text": comment_text, "type": "inline"})
537
+
538
+ # Extract multi-line comments (/* */)
539
+ for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
540
+ start_line = content[: match.start()].count("\n") + 1
541
+ comment_text = match.group(1).strip()
542
+
543
+ comments.append({"line": start_line, "text": comment_text, "type": "block"})
544
+
545
+ return comments
546
+
547
+ def _extract_cpp_comments(self, content: str) -> list[dict]:
548
+ """
549
+ Extract C++ comments (// and /* */ styles, same as JavaScript).
550
+
551
+ Returns list of comment dictionaries with line number, text, and type.
552
+ """
553
+ # C++ uses the same comment syntax as JavaScript
554
+ return self._extract_js_comments(content)
555
+
556
+ def _analyze_csharp(self, content: str, _file_path: str) -> dict[str, Any]:
557
+ """
558
+ Analyze C# file using regex patterns.
559
+
560
+ Note: This is a simplified regex-based approach. For production use with Unity/ASP.NET,
561
+ consider using tree-sitter-c-sharp or Roslyn via pythonnet for more accurate parsing.
562
+
563
+ Regex patterns inspired by C# language specification:
564
+ https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/
565
+ """
566
+ classes = []
567
+ functions = []
568
+
569
+ # Extract class definitions
570
+ # Matches: [modifiers] class ClassName [: BaseClass] [, Interface]
571
+ class_pattern = r"(?:public|private|internal|protected)?\s*(?:static|abstract|sealed)?\s*class\s+(\w+)(?:\s*:\s*([\w\s,<>]+))?\s*\{"
572
+ for match in re.finditer(class_pattern, content):
573
+ class_name = match.group(1)
574
+ bases_str = match.group(2) if match.group(2) else ""
575
+
576
+ # Parse base classes and interfaces
577
+ base_classes = []
578
+ if bases_str:
579
+ base_classes = [b.strip() for b in bases_str.split(",")]
580
+
581
+ # Try to extract methods (simplified)
582
+ class_block_start = match.end()
583
+ # Find matching closing brace (simplified - doesn't handle nested classes perfectly)
584
+ brace_count = 1
585
+ class_block_end = class_block_start
586
+ for i, char in enumerate(content[class_block_start:], class_block_start):
587
+ if char == "{":
588
+ brace_count += 1
589
+ elif char == "}":
590
+ brace_count -= 1
591
+ if brace_count == 0:
592
+ class_block_end = i
593
+ break
594
+
595
+ if class_block_end > class_block_start:
596
+ class_body = content[class_block_start:class_block_end]
597
+ methods = self._extract_csharp_methods(class_body)
598
+ else:
599
+ methods = []
600
+
601
+ classes.append(
602
+ {
603
+ "name": class_name,
604
+ "base_classes": base_classes,
605
+ "methods": methods,
606
+ "docstring": None, # Would need to extract XML doc comments
607
+ "line_number": content[: match.start()].count("\n") + 1,
608
+ }
609
+ )
610
+
611
+ # Extract top-level functions/methods
612
+ # Matches: [modifiers] [async] ReturnType MethodName(params)
613
+ func_pattern = r"(?:public|private|internal|protected)?\s*(?:static|virtual|override|abstract)?\s*(?:async\s+)?(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
614
+ for match in re.finditer(func_pattern, content):
615
+ return_type = match.group(1).strip()
616
+ func_name = match.group(2)
617
+ params_str = match.group(3)
618
+ is_async = "async" in match.group(0)
619
+
620
+ # Skip common keywords
621
+ if func_name in ["if", "for", "while", "switch", "return", "using", "namespace"]:
622
+ continue
623
+
624
+ params = self._parse_csharp_parameters(params_str)
625
+
626
+ functions.append(
627
+ {
628
+ "name": func_name,
629
+ "parameters": params,
630
+ "return_type": return_type,
631
+ "docstring": None,
632
+ "line_number": content[: match.start()].count("\n") + 1,
633
+ "is_async": is_async,
634
+ "is_method": False,
635
+ "decorators": [],
636
+ }
637
+ )
638
+
639
+ # Extract comments
640
+ comments = self._extract_csharp_comments(content)
641
+
642
+ return {"classes": classes, "functions": functions, "comments": comments}
643
+
644
+ def _extract_csharp_methods(self, class_body: str) -> list[dict]:
645
+ """Extract C# method signatures from class body."""
646
+ methods = []
647
+
648
+ # Match method definitions
649
+ method_pattern = r"(?:public|private|internal|protected)?\s*(?:static|virtual|override|abstract)?\s*(?:async\s+)?(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
650
+ for match in re.finditer(method_pattern, class_body):
651
+ return_type = match.group(1).strip()
652
+ method_name = match.group(2)
653
+ params_str = match.group(3)
654
+ is_async = "async" in match.group(0)
655
+
656
+ # Skip keywords
657
+ if method_name in ["if", "for", "while", "switch", "get", "set"]:
658
+ continue
659
+
660
+ params = self._parse_csharp_parameters(params_str)
661
+
662
+ methods.append(
663
+ {
664
+ "name": method_name,
665
+ "parameters": params,
666
+ "return_type": return_type,
667
+ "docstring": None,
668
+ "line_number": None,
669
+ "is_async": is_async,
670
+ "is_method": True,
671
+ "decorators": [],
672
+ }
673
+ )
674
+
675
+ return methods
676
+
677
+ def _parse_csharp_parameters(self, params_str: str) -> list[dict]:
678
+ """Parse C# parameter string."""
679
+ params = []
680
+
681
+ if not params_str.strip():
682
+ return params
683
+
684
+ # Split by comma (simplified)
685
+ param_list = [p.strip() for p in params_str.split(",")]
686
+
687
+ for param in param_list:
688
+ if not param:
689
+ continue
690
+
691
+ # Check for default value
692
+ default = None
693
+ if "=" in param:
694
+ param, default = param.split("=", 1)
695
+ param = param.strip()
696
+ default = default.strip()
697
+
698
+ # Parse: [ref/out] Type name
699
+ parts = param.split()
700
+ if len(parts) >= 2:
701
+ # Remove ref/out modifiers
702
+ if parts[0] in ["ref", "out", "in", "params"]:
703
+ parts = parts[1:]
704
+
705
+ if len(parts) >= 2:
706
+ param_type = parts[0]
707
+ param_name = parts[1]
708
+ else:
709
+ param_type = parts[0]
710
+ param_name = "unknown"
711
+ else:
712
+ param_type = None
713
+ param_name = param
714
+
715
+ params.append({"name": param_name, "type_hint": param_type, "default": default})
716
+
717
+ return params
718
+
719
+ def _extract_csharp_comments(self, content: str) -> list[dict]:
720
+ """Extract C# comments (// and /* */ and /// XML docs)."""
721
+ comments = []
722
+
723
+ # Single-line comments (//)
724
+ for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
725
+ line_num = content[: match.start()].count("\n") + 1
726
+ comment_text = match.group(1).strip()
727
+
728
+ # Distinguish XML doc comments (///)
729
+ comment_type = "doc" if match.group(1).startswith("/") else "inline"
730
+
731
+ comments.append(
732
+ {"line": line_num, "text": comment_text.lstrip("/").strip(), "type": comment_type}
733
+ )
734
+
735
+ # Multi-line comments (/* */)
736
+ for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
737
+ start_line = content[: match.start()].count("\n") + 1
738
+ comment_text = match.group(1).strip()
739
+
740
+ comments.append({"line": start_line, "text": comment_text, "type": "block"})
741
+
742
+ return comments
743
+
744
+ def _analyze_go(self, content: str, _file_path: str) -> dict[str, Any]:
745
+ """
746
+ Analyze Go file using regex patterns.
747
+
748
+ Note: This is a simplified regex-based approach. For production,
749
+ consider using go/parser from the Go standard library via subprocess.
750
+
751
+ Regex patterns based on Go language specification:
752
+ https://go.dev/ref/spec
753
+ """
754
+ classes = [] # Go doesn't have classes, but we'll extract structs
755
+ functions = []
756
+
757
+ # Extract struct definitions (Go's equivalent of classes)
758
+ struct_pattern = r"type\s+(\w+)\s+struct\s*\{"
759
+ for match in re.finditer(struct_pattern, content):
760
+ struct_name = match.group(1)
761
+
762
+ classes.append(
763
+ {
764
+ "name": struct_name,
765
+ "base_classes": [], # Go uses embedding, not inheritance
766
+ "methods": [], # Methods extracted separately
767
+ "docstring": None,
768
+ "line_number": content[: match.start()].count("\n") + 1,
769
+ }
770
+ )
771
+
772
+ # Extract function definitions
773
+ # Matches: func [receiver] name(params) [returns]
774
+ func_pattern = r"func\s+(?:\((\w+)\s+\*?(\w+)\)\s+)?(\w+)\s*\(([^)]*)\)(?:\s+\(([^)]+)\)|(?:\s+(\w+(?:\[.*?\])?(?:,\s*\w+)*)))?"
775
+ for match in re.finditer(func_pattern, content):
776
+ _receiver_var = match.group(1)
777
+ receiver_type = match.group(2)
778
+ func_name = match.group(3)
779
+ params_str = match.group(4)
780
+ returns_multi = match.group(5) # Multiple returns in parentheses
781
+ returns_single = match.group(6) # Single return without parentheses
782
+
783
+ # Determine if it's a method (has receiver)
784
+ is_method = bool(receiver_type)
785
+
786
+ # Parse return type
787
+ return_type = None
788
+ if returns_multi:
789
+ return_type = f"({returns_multi})"
790
+ elif returns_single:
791
+ return_type = returns_single
792
+
793
+ params = self._parse_go_parameters(params_str)
794
+
795
+ functions.append(
796
+ {
797
+ "name": func_name,
798
+ "parameters": params,
799
+ "return_type": return_type,
800
+ "docstring": None,
801
+ "line_number": content[: match.start()].count("\n") + 1,
802
+ "is_async": False, # Go uses goroutines differently
803
+ "is_method": is_method,
804
+ "decorators": [],
805
+ }
806
+ )
807
+
808
+ # Extract comments
809
+ comments = self._extract_go_comments(content)
810
+
811
+ return {"classes": classes, "functions": functions, "comments": comments}
812
+
813
+ def _parse_go_parameters(self, params_str: str) -> list[dict]:
814
+ """Parse Go parameter string."""
815
+ params = []
816
+
817
+ if not params_str.strip():
818
+ return params
819
+
820
+ # Split by comma
821
+ param_list = [p.strip() for p in params_str.split(",")]
822
+
823
+ for param in param_list:
824
+ if not param:
825
+ continue
826
+
827
+ # Go format: name type or name1, name2 type
828
+ # Simplified parsing
829
+ parts = param.split()
830
+ if len(parts) >= 2:
831
+ # Last part is type
832
+ param_type = parts[-1]
833
+ param_name = " ".join(parts[:-1])
834
+ else:
835
+ param_type = param
836
+ param_name = "unknown"
837
+
838
+ params.append(
839
+ {
840
+ "name": param_name,
841
+ "type_hint": param_type,
842
+ "default": None, # Go doesn't support default parameters
843
+ }
844
+ )
845
+
846
+ return params
847
+
848
+ def _extract_go_comments(self, content: str) -> list[dict]:
849
+ """Extract Go comments (// and /* */ styles)."""
850
+ # Go uses C-style comments
851
+ return self._extract_js_comments(content)
852
+
853
+ def _analyze_rust(self, content: str, _file_path: str) -> dict[str, Any]:
854
+ """
855
+ Analyze Rust file using regex patterns.
856
+
857
+ Note: This is a simplified regex-based approach. For production,
858
+ consider using syn crate via subprocess or tree-sitter-rust.
859
+
860
+ Regex patterns based on Rust language reference:
861
+ https://doc.rust-lang.org/reference/
862
+ """
863
+ classes = [] # Rust uses structs/enums/traits
864
+ functions = []
865
+
866
+ # Extract struct definitions
867
+ struct_pattern = r"(?:pub\s+)?struct\s+(\w+)(?:<[^>]+>)?\s*\{"
868
+ for match in re.finditer(struct_pattern, content):
869
+ struct_name = match.group(1)
870
+
871
+ classes.append(
872
+ {
873
+ "name": struct_name,
874
+ "base_classes": [], # Rust uses traits, not inheritance
875
+ "methods": [],
876
+ "docstring": None,
877
+ "line_number": content[: match.start()].count("\n") + 1,
878
+ }
879
+ )
880
+
881
+ # Extract function definitions
882
+ # Matches: [pub] [async] [unsafe] [const] fn name<generics>(params) -> ReturnType
883
+ func_pattern = r"(?:pub\s+)?(?:async\s+)?(?:unsafe\s+)?(?:const\s+)?fn\s+(\w+)(?:<[^>]+>)?\s*\(([^)]*)\)(?:\s*->\s*([^{;]+))?"
884
+ for match in re.finditer(func_pattern, content):
885
+ func_name = match.group(1)
886
+ params_str = match.group(2)
887
+ return_type = match.group(3).strip() if match.group(3) else None
888
+ is_async = "async" in match.group(0)
889
+
890
+ params = self._parse_rust_parameters(params_str)
891
+
892
+ functions.append(
893
+ {
894
+ "name": func_name,
895
+ "parameters": params,
896
+ "return_type": return_type,
897
+ "docstring": None,
898
+ "line_number": content[: match.start()].count("\n") + 1,
899
+ "is_async": is_async,
900
+ "is_method": False,
901
+ "decorators": [],
902
+ }
903
+ )
904
+
905
+ # Extract comments
906
+ comments = self._extract_rust_comments(content)
907
+
908
+ return {"classes": classes, "functions": functions, "comments": comments}
909
+
910
+ def _parse_rust_parameters(self, params_str: str) -> list[dict]:
911
+ """Parse Rust parameter string."""
912
+ params = []
913
+
914
+ if not params_str.strip():
915
+ return params
916
+
917
+ # Split by comma
918
+ param_list = [p.strip() for p in params_str.split(",")]
919
+
920
+ for param in param_list:
921
+ if not param:
922
+ continue
923
+
924
+ # Rust format: name: type or &self
925
+ if ":" in param:
926
+ name, param_type = param.split(":", 1)
927
+ name = name.strip()
928
+ param_type = param_type.strip()
929
+ else:
930
+ # Handle &self, &mut self, self
931
+ name = param
932
+ param_type = None
933
+
934
+ params.append(
935
+ {
936
+ "name": name,
937
+ "type_hint": param_type,
938
+ "default": None, # Rust doesn't support default parameters
939
+ }
940
+ )
941
+
942
+ return params
943
+
944
+ def _extract_rust_comments(self, content: str) -> list[dict]:
945
+ """Extract Rust comments (// and /* */ and /// doc comments)."""
946
+ comments = []
947
+
948
+ # Single-line comments (//)
949
+ for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
950
+ line_num = content[: match.start()].count("\n") + 1
951
+ comment_text = match.group(1).strip()
952
+
953
+ # Distinguish doc comments (/// or //!)
954
+ if comment_text.startswith("/") or comment_text.startswith("!"):
955
+ comment_type = "doc"
956
+ comment_text = comment_text.lstrip("/!").strip()
957
+ else:
958
+ comment_type = "inline"
959
+
960
+ comments.append({"line": line_num, "text": comment_text, "type": comment_type})
961
+
962
+ # Multi-line comments (/* */)
963
+ for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
964
+ start_line = content[: match.start()].count("\n") + 1
965
+ comment_text = match.group(1).strip()
966
+
967
+ comments.append({"line": start_line, "text": comment_text, "type": "block"})
968
+
969
+ return comments
970
+
971
+ def _analyze_java(self, content: str, _file_path: str) -> dict[str, Any]:
972
+ """
973
+ Analyze Java file using regex patterns.
974
+
975
+ Note: This is a simplified regex-based approach. For production,
976
+ consider using Eclipse JDT or JavaParser library.
977
+
978
+ Regex patterns based on Java language specification:
979
+ https://docs.oracle.com/javase/specs/
980
+ """
981
+ classes = []
982
+ functions = []
983
+
984
+ # Extract class definitions
985
+ # Matches: [modifiers] class ClassName [extends Base] [implements Interfaces]
986
+ class_pattern = r"(?:public|private|protected)?\s*(?:static|final|abstract)?\s*class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?\s*\{"
987
+ for match in re.finditer(class_pattern, content):
988
+ class_name = match.group(1)
989
+ base_class = match.group(2)
990
+ interfaces_str = match.group(3)
991
+
992
+ base_classes = []
993
+ if base_class:
994
+ base_classes.append(base_class)
995
+ if interfaces_str:
996
+ base_classes.extend([i.strip() for i in interfaces_str.split(",")])
997
+
998
+ # Extract methods (simplified)
999
+ class_block_start = match.end()
1000
+ brace_count = 1
1001
+ class_block_end = class_block_start
1002
+ for i, char in enumerate(content[class_block_start:], class_block_start):
1003
+ if char == "{":
1004
+ brace_count += 1
1005
+ elif char == "}":
1006
+ brace_count -= 1
1007
+ if brace_count == 0:
1008
+ class_block_end = i
1009
+ break
1010
+
1011
+ if class_block_end > class_block_start:
1012
+ class_body = content[class_block_start:class_block_end]
1013
+ methods = self._extract_java_methods(class_body)
1014
+ else:
1015
+ methods = []
1016
+
1017
+ classes.append(
1018
+ {
1019
+ "name": class_name,
1020
+ "base_classes": base_classes,
1021
+ "methods": methods,
1022
+ "docstring": None,
1023
+ "line_number": content[: match.start()].count("\n") + 1,
1024
+ }
1025
+ )
1026
+
1027
+ # Extract top-level functions (rare in Java, but static methods)
1028
+ func_pattern = r"(?:public|private|protected)?\s*(?:static|final|synchronized)?\s*(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
1029
+ for match in re.finditer(func_pattern, content):
1030
+ return_type = match.group(1).strip()
1031
+ func_name = match.group(2)
1032
+ params_str = match.group(3)
1033
+
1034
+ # Skip keywords
1035
+ if func_name in ["if", "for", "while", "switch", "return", "class", "void"]:
1036
+ continue
1037
+
1038
+ params = self._parse_java_parameters(params_str)
1039
+
1040
+ functions.append(
1041
+ {
1042
+ "name": func_name,
1043
+ "parameters": params,
1044
+ "return_type": return_type,
1045
+ "docstring": None,
1046
+ "line_number": content[: match.start()].count("\n") + 1,
1047
+ "is_async": False,
1048
+ "is_method": False,
1049
+ "decorators": [],
1050
+ }
1051
+ )
1052
+
1053
+ # Extract comments
1054
+ comments = self._extract_java_comments(content)
1055
+
1056
+ return {"classes": classes, "functions": functions, "comments": comments}
1057
+
1058
+ def _extract_java_methods(self, class_body: str) -> list[dict]:
1059
+ """Extract Java method signatures from class body."""
1060
+ methods = []
1061
+
1062
+ method_pattern = r"(?:public|private|protected)?\s*(?:static|final|synchronized)?\s*(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
1063
+ for match in re.finditer(method_pattern, class_body):
1064
+ return_type = match.group(1).strip()
1065
+ method_name = match.group(2)
1066
+ params_str = match.group(3)
1067
+
1068
+ # Skip keywords
1069
+ if method_name in ["if", "for", "while", "switch"]:
1070
+ continue
1071
+
1072
+ params = self._parse_java_parameters(params_str)
1073
+
1074
+ methods.append(
1075
+ {
1076
+ "name": method_name,
1077
+ "parameters": params,
1078
+ "return_type": return_type,
1079
+ "docstring": None,
1080
+ "line_number": None,
1081
+ "is_async": False,
1082
+ "is_method": True,
1083
+ "decorators": [],
1084
+ }
1085
+ )
1086
+
1087
+ return methods
1088
+
1089
+ def _parse_java_parameters(self, params_str: str) -> list[dict]:
1090
+ """Parse Java parameter string."""
1091
+ params = []
1092
+
1093
+ if not params_str.strip():
1094
+ return params
1095
+
1096
+ # Split by comma
1097
+ param_list = [p.strip() for p in params_str.split(",")]
1098
+
1099
+ for param in param_list:
1100
+ if not param:
1101
+ continue
1102
+
1103
+ # Java format: Type name or final Type name
1104
+ parts = param.split()
1105
+ if len(parts) >= 2:
1106
+ # Remove 'final' if present
1107
+ if parts[0] == "final":
1108
+ parts = parts[1:]
1109
+
1110
+ if len(parts) >= 2:
1111
+ param_type = parts[0]
1112
+ param_name = parts[1]
1113
+ else:
1114
+ param_type = parts[0]
1115
+ param_name = "unknown"
1116
+ else:
1117
+ param_type = param
1118
+ param_name = "unknown"
1119
+
1120
+ params.append(
1121
+ {
1122
+ "name": param_name,
1123
+ "type_hint": param_type,
1124
+ "default": None, # Java doesn't support default parameters
1125
+ }
1126
+ )
1127
+
1128
+ return params
1129
+
1130
+ def _extract_java_comments(self, content: str) -> list[dict]:
1131
+ """Extract Java comments (// and /* */ and /** JavaDoc */)."""
1132
+ comments = []
1133
+
1134
+ # Single-line comments (//)
1135
+ for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
1136
+ line_num = content[: match.start()].count("\n") + 1
1137
+ comment_text = match.group(1).strip()
1138
+
1139
+ comments.append({"line": line_num, "text": comment_text, "type": "inline"})
1140
+
1141
+ # Multi-line and JavaDoc comments (/* */ and /** */)
1142
+ for match in re.finditer(r"/\*\*?(.+?)\*/", content, re.DOTALL):
1143
+ start_line = content[: match.start()].count("\n") + 1
1144
+ comment_text = match.group(1).strip()
1145
+
1146
+ # Distinguish JavaDoc (starts with **)
1147
+ comment_type = "doc" if match.group(0).startswith("/**") else "block"
1148
+
1149
+ comments.append({"line": start_line, "text": comment_text, "type": comment_type})
1150
+
1151
+ return comments
1152
+
1153
+ def _analyze_ruby(self, content: str, _file_path: str) -> dict[str, Any]:
1154
+ """
1155
+ Analyze Ruby file using regex patterns.
1156
+
1157
+ Note: This is a simplified regex-based approach. For production,
1158
+ consider using parser gem or tree-sitter-ruby.
1159
+
1160
+ Regex patterns based on Ruby language documentation:
1161
+ https://ruby-doc.org/
1162
+ """
1163
+ classes = []
1164
+ functions = []
1165
+
1166
+ # Extract class definitions
1167
+ class_pattern = r"class\s+(\w+)(?:\s*<\s*(\w+))?\s*$"
1168
+ for match in re.finditer(class_pattern, content, re.MULTILINE):
1169
+ class_name = match.group(1)
1170
+ base_class = match.group(2)
1171
+
1172
+ base_classes = [base_class] if base_class else []
1173
+
1174
+ classes.append(
1175
+ {
1176
+ "name": class_name,
1177
+ "base_classes": base_classes,
1178
+ "methods": [], # Would need to parse class body
1179
+ "docstring": None,
1180
+ "line_number": content[: match.start()].count("\n") + 1,
1181
+ }
1182
+ )
1183
+
1184
+ # Extract method/function definitions
1185
+ # Matches: def method_name(params)
1186
+ func_pattern = r"def\s+(?:self\.)?(\w+[?!]?)\s*(?:\(([^)]*)\))?"
1187
+ for match in re.finditer(func_pattern, content):
1188
+ func_name = match.group(1)
1189
+ params_str = match.group(2) if match.group(2) else ""
1190
+
1191
+ params = self._parse_ruby_parameters(params_str)
1192
+
1193
+ functions.append(
1194
+ {
1195
+ "name": func_name,
1196
+ "parameters": params,
1197
+ "return_type": None, # Ruby has no type annotations (usually)
1198
+ "docstring": None,
1199
+ "line_number": content[: match.start()].count("\n") + 1,
1200
+ "is_async": False,
1201
+ "is_method": False,
1202
+ "decorators": [],
1203
+ }
1204
+ )
1205
+
1206
+ # Extract comments
1207
+ comments = self._extract_ruby_comments(content)
1208
+
1209
+ return {"classes": classes, "functions": functions, "comments": comments}
1210
+
1211
+ def _parse_ruby_parameters(self, params_str: str) -> list[dict]:
1212
+ """Parse Ruby parameter string."""
1213
+ params = []
1214
+
1215
+ if not params_str.strip():
1216
+ return params
1217
+
1218
+ # Split by comma
1219
+ param_list = [p.strip() for p in params_str.split(",")]
1220
+
1221
+ for param in param_list:
1222
+ if not param:
1223
+ continue
1224
+
1225
+ # Check for default value
1226
+ default = None
1227
+ if "=" in param:
1228
+ name, default = param.split("=", 1)
1229
+ name = name.strip()
1230
+ default = default.strip()
1231
+ else:
1232
+ name = param
1233
+
1234
+ # Ruby doesn't have type hints in method signatures
1235
+ params.append({"name": name, "type_hint": None, "default": default})
1236
+
1237
+ return params
1238
+
1239
+ def _extract_ruby_comments(self, content: str) -> list[dict]:
1240
+ """Extract Ruby comments (# style)."""
1241
+ comments = []
1242
+
1243
+ for i, line in enumerate(content.splitlines(), 1):
1244
+ stripped = line.strip()
1245
+
1246
+ # Ruby comments start with #
1247
+ if stripped.startswith("#"):
1248
+ comment_text = stripped[1:].strip()
1249
+ comments.append({"line": i, "text": comment_text, "type": "inline"})
1250
+
1251
+ return comments
1252
+
1253
+ def _analyze_php(self, content: str, _file_path: str) -> dict[str, Any]:
1254
+ """
1255
+ Analyze PHP file using regex patterns.
1256
+
1257
+ Note: This is a simplified regex-based approach. For production,
1258
+ consider using nikic/PHP-Parser via subprocess or tree-sitter-php.
1259
+
1260
+ Regex patterns based on PHP language reference:
1261
+ https://www.php.net/manual/en/langref.php
1262
+ """
1263
+ classes = []
1264
+ functions = []
1265
+
1266
+ # Extract class definitions
1267
+ class_pattern = r"(?:abstract\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?\s*\{"
1268
+ for match in re.finditer(class_pattern, content):
1269
+ class_name = match.group(1)
1270
+ base_class = match.group(2)
1271
+ interfaces_str = match.group(3)
1272
+
1273
+ base_classes = []
1274
+ if base_class:
1275
+ base_classes.append(base_class)
1276
+ if interfaces_str:
1277
+ base_classes.extend([i.strip() for i in interfaces_str.split(",")])
1278
+
1279
+ # Extract methods (simplified)
1280
+ class_block_start = match.end()
1281
+ brace_count = 1
1282
+ class_block_end = class_block_start
1283
+ for i, char in enumerate(content[class_block_start:], class_block_start):
1284
+ if char == "{":
1285
+ brace_count += 1
1286
+ elif char == "}":
1287
+ brace_count -= 1
1288
+ if brace_count == 0:
1289
+ class_block_end = i
1290
+ break
1291
+
1292
+ if class_block_end > class_block_start:
1293
+ class_body = content[class_block_start:class_block_end]
1294
+ methods = self._extract_php_methods(class_body)
1295
+ else:
1296
+ methods = []
1297
+
1298
+ classes.append(
1299
+ {
1300
+ "name": class_name,
1301
+ "base_classes": base_classes,
1302
+ "methods": methods,
1303
+ "docstring": None,
1304
+ "line_number": content[: match.start()].count("\n") + 1,
1305
+ }
1306
+ )
1307
+
1308
+ # Extract function definitions
1309
+ func_pattern = r"function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
1310
+ for match in re.finditer(func_pattern, content):
1311
+ func_name = match.group(1)
1312
+ params_str = match.group(2)
1313
+ return_type = match.group(3)
1314
+
1315
+ params = self._parse_php_parameters(params_str)
1316
+
1317
+ functions.append(
1318
+ {
1319
+ "name": func_name,
1320
+ "parameters": params,
1321
+ "return_type": return_type,
1322
+ "docstring": None,
1323
+ "line_number": content[: match.start()].count("\n") + 1,
1324
+ "is_async": False,
1325
+ "is_method": False,
1326
+ "decorators": [],
1327
+ }
1328
+ )
1329
+
1330
+ # Extract comments
1331
+ comments = self._extract_php_comments(content)
1332
+
1333
+ return {"classes": classes, "functions": functions, "comments": comments}
1334
+
1335
+ def _extract_php_methods(self, class_body: str) -> list[dict]:
1336
+ """Extract PHP method signatures from class body."""
1337
+ methods = []
1338
+
1339
+ method_pattern = r"(?:public|private|protected)?\s*(?:static|final)?\s*function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
1340
+ for match in re.finditer(method_pattern, class_body):
1341
+ method_name = match.group(1)
1342
+ params_str = match.group(2)
1343
+ return_type = match.group(3)
1344
+
1345
+ params = self._parse_php_parameters(params_str)
1346
+
1347
+ methods.append(
1348
+ {
1349
+ "name": method_name,
1350
+ "parameters": params,
1351
+ "return_type": return_type,
1352
+ "docstring": None,
1353
+ "line_number": None,
1354
+ "is_async": False,
1355
+ "is_method": True,
1356
+ "decorators": [],
1357
+ }
1358
+ )
1359
+
1360
+ return methods
1361
+
1362
+ def _parse_php_parameters(self, params_str: str) -> list[dict]:
1363
+ """Parse PHP parameter string."""
1364
+ params = []
1365
+
1366
+ if not params_str.strip():
1367
+ return params
1368
+
1369
+ # Split by comma
1370
+ param_list = [p.strip() for p in params_str.split(",")]
1371
+
1372
+ for param in param_list:
1373
+ if not param:
1374
+ continue
1375
+
1376
+ # Check for default value
1377
+ default = None
1378
+ if "=" in param:
1379
+ param, default = param.split("=", 1)
1380
+ param = param.strip()
1381
+ default = default.strip()
1382
+
1383
+ # PHP format: Type $name or just $name
1384
+ parts = param.split()
1385
+ if len(parts) >= 2:
1386
+ param_type = parts[0]
1387
+ param_name = parts[1]
1388
+ else:
1389
+ param_type = None
1390
+ param_name = parts[0] if parts else "unknown"
1391
+
1392
+ # Remove $ from variable name
1393
+ if param_name.startswith("$"):
1394
+ param_name = param_name[1:]
1395
+
1396
+ params.append({"name": param_name, "type_hint": param_type, "default": default})
1397
+
1398
+ return params
1399
+
1400
+ def _extract_php_comments(self, content: str) -> list[dict]:
1401
+ """Extract PHP comments (// and /* */ and # and /** PHPDoc */)."""
1402
+ comments = []
1403
+
1404
+ # Single-line comments (// and #)
1405
+ for match in re.finditer(r"(?://|#)(.+)$", content, re.MULTILINE):
1406
+ line_num = content[: match.start()].count("\n") + 1
1407
+ comment_text = match.group(1).strip()
1408
+
1409
+ comments.append({"line": line_num, "text": comment_text, "type": "inline"})
1410
+
1411
+ # Multi-line and PHPDoc comments (/* */ and /** */)
1412
+ for match in re.finditer(r"/\*\*?(.+?)\*/", content, re.DOTALL):
1413
+ start_line = content[: match.start()].count("\n") + 1
1414
+ comment_text = match.group(1).strip()
1415
+
1416
+ # Distinguish PHPDoc (starts with **)
1417
+ comment_type = "doc" if match.group(0).startswith("/**") else "block"
1418
+
1419
+ comments.append({"line": start_line, "text": comment_text, "type": comment_type})
1420
+
1421
+ return comments
1422
+
1423
+
1424
+ if __name__ == "__main__":
1425
+ # Test the analyzer
1426
+ python_code = '''
1427
+ class Node2D:
1428
+ """Base class for 2D nodes."""
1429
+
1430
+ def move_local_x(self, delta: float, snap: bool = False) -> None:
1431
+ """Move node along local X axis."""
1432
+ pass
1433
+
1434
+ async def tween_position(self, target: tuple, duration: float = 1.0):
1435
+ """Animate position to target."""
1436
+ pass
1437
+
1438
+ def create_sprite(texture: str) -> Node2D:
1439
+ """Create a new sprite node."""
1440
+ return Node2D()
1441
+ '''
1442
+
1443
+ analyzer = CodeAnalyzer(depth="deep")
1444
+ result = analyzer.analyze_file("test.py", python_code, "Python")
1445
+
1446
+ print("Analysis Result:")
1447
+ print(f"Classes: {len(result.get('classes', []))}")
1448
+ print(f"Functions: {len(result.get('functions', []))}")
1449
+
1450
+ if result.get("classes"):
1451
+ cls = result["classes"][0]
1452
+ print(f"\nClass: {cls['name']}")
1453
+ print(f" Methods: {len(cls['methods'])}")
1454
+ for method in cls["methods"]:
1455
+ params = ", ".join(
1456
+ [
1457
+ f"{p['name']}: {p['type_hint']}"
1458
+ + (f" = {p['default']}" if p.get("default") else "")
1459
+ for p in method["parameters"]
1460
+ ]
1461
+ )
1462
+ print(f" {method['name']}({params}) -> {method['return_type']}")