code2llm 0.5.102__tar.gz → 0.5.104__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {code2llm-0.5.102 → code2llm-0.5.104}/PKG-INFO +18 -11
  2. {code2llm-0.5.102 → code2llm-0.5.104}/README.md +7 -10
  3. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/__init__.py +1 -1
  4. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_commands.py +1 -1
  5. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/base.py +24 -5
  6. code2llm-0.5.104/code2llm/core/lang/cpp.py +35 -0
  7. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/csharp.py +1 -1
  8. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/go_lang.py +27 -12
  9. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/java.py +1 -1
  10. code2llm-0.5.104/code2llm/core/lang/php.py +66 -0
  11. code2llm-0.5.104/code2llm/core/lang/ts_extractors.py +180 -0
  12. code2llm-0.5.104/code2llm/core/lang/ts_parser.py +158 -0
  13. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/typescript.py +18 -20
  14. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/refactoring.py +1 -1
  15. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/__init__.py +1 -1
  16. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/PKG-INFO +18 -11
  17. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/SOURCES.txt +2 -0
  18. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/requires.txt +10 -0
  19. {code2llm-0.5.102 → code2llm-0.5.104}/pyproject.toml +11 -1
  20. {code2llm-0.5.102 → code2llm-0.5.104}/setup.py +1 -1
  21. code2llm-0.5.102/code2llm/core/lang/cpp.py +0 -42
  22. code2llm-0.5.102/code2llm/core/lang/php.py +0 -106
  23. {code2llm-0.5.102 → code2llm-0.5.104}/LICENSE +0 -0
  24. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/__main__.py +0 -0
  25. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/__init__.py +0 -0
  26. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/call_graph.py +0 -0
  27. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/cfg.py +0 -0
  28. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/coupling.py +0 -0
  29. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/data_analysis.py +0 -0
  30. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/dfg.py +0 -0
  31. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/pipeline_detector.py +0 -0
  32. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/side_effects.py +0 -0
  33. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/smells.py +0 -0
  34. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/type_inference.py +0 -0
  35. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/utils/__init__.py +0 -0
  36. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/utils/ast_helpers.py +0 -0
  37. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/api.py +0 -0
  38. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli.py +0 -0
  39. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_analysis.py +0 -0
  40. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_exports/__init__.py +0 -0
  41. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_exports/code2logic.py +0 -0
  42. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_exports/formats.py +0 -0
  43. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_exports/orchestrator.py +0 -0
  44. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_exports/prompt.py +0 -0
  45. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_parser.py +0 -0
  46. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/__init__.py +0 -0
  47. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/analyzer.py +0 -0
  48. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/ast_registry.py +0 -0
  49. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/config.py +0 -0
  50. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/export_pipeline.py +0 -0
  51. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/file_analyzer.py +0 -0
  52. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/file_cache.py +0 -0
  53. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/file_filter.py +0 -0
  54. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/gitignore.py +0 -0
  55. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/incremental.py +0 -0
  56. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/__init__.py +0 -0
  57. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/generic.py +0 -0
  58. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/ruby.py +0 -0
  59. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/rust.py +0 -0
  60. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/large_repo.py +0 -0
  61. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/models.py +0 -0
  62. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/repo_files.py +0 -0
  63. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/__init__.py +0 -0
  64. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/cache.py +0 -0
  65. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/incremental.py +0 -0
  66. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/prioritizer.py +0 -0
  67. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/scanner.py +0 -0
  68. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/strategies.py +0 -0
  69. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming_analyzer.py +0 -0
  70. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/toon_size_manager.py +0 -0
  71. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/__init__.py +0 -0
  72. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/article_view.py +0 -0
  73. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/base.py +0 -0
  74. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/context_exporter.py +0 -0
  75. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/context_view.py +0 -0
  76. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/evolution_exporter.py +0 -0
  77. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/flow_constants.py +0 -0
  78. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/flow_exporter.py +0 -0
  79. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/flow_renderer.py +0 -0
  80. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/html_dashboard.py +0 -0
  81. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/index_generator.py +0 -0
  82. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/json_exporter.py +0 -0
  83. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/llm_exporter.py +0 -0
  84. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/map_exporter.py +0 -0
  85. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/mermaid_exporter.py +0 -0
  86. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/mermaid_flow_helpers.py +0 -0
  87. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/project_yaml_exporter.py +0 -0
  88. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/readme_exporter.py +0 -0
  89. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/report_generators.py +0 -0
  90. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon/__init__.py +0 -0
  91. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon/helpers.py +0 -0
  92. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon/metrics.py +0 -0
  93. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon/module_detail.py +0 -0
  94. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon/renderer.py +0 -0
  95. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon.py +0 -0
  96. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon_view.py +0 -0
  97. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/validate_project.py +0 -0
  98. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/yaml_exporter.py +0 -0
  99. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/generators/__init__.py +0 -0
  100. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/generators/llm_flow.py +0 -0
  101. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/generators/llm_task.py +0 -0
  102. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/generators/mermaid.py +0 -0
  103. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/config.py +0 -0
  104. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/entity_resolution.py +0 -0
  105. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/intent_matching.py +0 -0
  106. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/normalization.py +0 -0
  107. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/pipeline.py +0 -0
  108. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/patterns/__init__.py +0 -0
  109. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/patterns/detector.py +0 -0
  110. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/refactor/__init__.py +0 -0
  111. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/refactor/prompt_engine.py +0 -0
  112. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/dependency_links.txt +0 -0
  113. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/entry_points.txt +0 -0
  114. {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/top_level.txt +0 -0
  115. {code2llm-0.5.102 → code2llm-0.5.104}/setup.cfg +0 -0
  116. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_advanced_analysis.py +0 -0
  117. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_analyzer.py +0 -0
  118. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_deep_analysis.py +0 -0
  119. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_edge_cases.py +0 -0
  120. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_flow_exporter.py +0 -0
  121. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_format_quality.py +0 -0
  122. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_multilanguage_e2e.py +0 -0
  123. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_nlp_pipeline.py +0 -0
  124. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_nonpython_cc_calls.py +0 -0
  125. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_pipeline_detector.py +0 -0
  126. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_project_toon_export.py +0 -0
  127. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_prompt_engine.py +0 -0
  128. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_prompt_txt.py +0 -0
  129. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_refactoring_engine.py +0 -0
  130. {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_toon_v2.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code2llm
3
- Version: 0.5.102
3
+ Version: 0.5.104
4
4
  Summary: High-performance Python code flow analysis with optimized TOON format - CFG, DFG, call graphs, and intelligent code queries
5
5
  Home-page: https://github.com/wronai/stts
6
6
  Author: STTS Project
@@ -37,6 +37,16 @@ Requires-Dist: vulture>=2.10
37
37
  Requires-Dist: tiktoken>=0.5
38
38
  Requires-Dist: tree-sitter>=0.21
39
39
  Requires-Dist: tree-sitter-python>=0.21
40
+ Requires-Dist: tree-sitter-javascript>=0.21
41
+ Requires-Dist: tree-sitter-typescript>=0.21
42
+ Requires-Dist: tree-sitter-go>=0.21
43
+ Requires-Dist: tree-sitter-rust>=0.21
44
+ Requires-Dist: tree-sitter-java>=0.21
45
+ Requires-Dist: tree-sitter-c>=0.21
46
+ Requires-Dist: tree-sitter-cpp>=0.22
47
+ Requires-Dist: tree-sitter-c-sharp>=0.21
48
+ Requires-Dist: tree-sitter-php>=0.22
49
+ Requires-Dist: tree-sitter-ruby>=0.21
40
50
  Provides-Extra: dev
41
51
  Requires-Dist: pytest>=6.2; extra == "dev"
42
52
  Requires-Dist: pytest-cov>=2.12; extra == "dev"
@@ -53,16 +63,13 @@ Dynamic: requires-python
53
63
 
54
64
  # code2llm - Generated Analysis Files
55
65
 
56
-
57
66
  ## AI Cost Tracking
58
67
 
59
- ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.102-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
60
- ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-51.2h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
68
+ ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-yellow) ![AI Model](https://img.shields.io/badge/AI%20Model-openrouter%2Fqwen%2Fqwen3-coder-next-lightgrey)
61
69
 
62
- - 🤖 **LLM usage:** $7.5000 (148 commits)
63
- - 👤 **Human dev:** ~$5123 (51.2h @ $100/h, 30min dedup)
70
+ This project uses AI-generated code. Total cost: **$7.5000** with **153** AI commits.
64
71
 
65
- Generated on 2026-03-31 using [openrouter/qwen/qwen3-coder-next](https://openrouter.ai/qwen/qwen3-coder-next)
72
+ Generated on 2026-04-09 using [openrouter/qwen/qwen3-coder-next](https://openrouter.ai/models/openrouter/qwen/qwen3-coder-next)
66
73
 
67
74
  ---
68
75
 
@@ -390,10 +397,10 @@ code2llm ./ -f yaml --separate-orphans
390
397
  ---
391
398
 
392
399
  **Generated by**: `code2llm ./ -f all --readme`
393
- **Analysis Date**: 2026-03-31
394
- **Total Functions**: 934
395
- **Total Classes**: 106
396
- **Modules**: 122
400
+ **Analysis Date**: 2026-04-09
401
+ **Total Functions**: 1011
402
+ **Total Classes**: 111
403
+ **Modules**: 131
397
404
 
398
405
  For more information about code2llm, visit: https://github.com/tom-sapletta/code2llm
399
406
 
@@ -1,15 +1,12 @@
1
1
  # code2llm - Generated Analysis Files
2
2
 
3
-
4
3
  ## AI Cost Tracking
5
4
 
6
- ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.5.102-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
7
- ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-51.2h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
5
+ ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-yellow) ![AI Model](https://img.shields.io/badge/AI%20Model-openrouter%2Fqwen%2Fqwen3-coder-next-lightgrey)
8
6
 
9
- - 🤖 **LLM usage:** $7.5000 (148 commits)
10
- - 👤 **Human dev:** ~$5123 (51.2h @ $100/h, 30min dedup)
7
+ This project uses AI-generated code. Total cost: **$7.5000** with **153** AI commits.
11
8
 
12
- Generated on 2026-03-31 using [openrouter/qwen/qwen3-coder-next](https://openrouter.ai/qwen/qwen3-coder-next)
9
+ Generated on 2026-04-09 using [openrouter/qwen/qwen3-coder-next](https://openrouter.ai/models/openrouter/qwen/qwen3-coder-next)
13
10
 
14
11
  ---
15
12
 
@@ -337,10 +334,10 @@ code2llm ./ -f yaml --separate-orphans
337
334
  ---
338
335
 
339
336
  **Generated by**: `code2llm ./ -f all --readme`
340
- **Analysis Date**: 2026-03-31
341
- **Total Functions**: 934
342
- **Total Classes**: 106
343
- **Modules**: 122
337
+ **Analysis Date**: 2026-04-09
338
+ **Total Functions**: 1011
339
+ **Total Classes**: 111
340
+ **Modules**: 131
344
341
 
345
342
  For more information about code2llm, visit: https://github.com/tom-sapletta/code2llm
346
343
 
@@ -8,7 +8,7 @@ Includes NLP Processing Pipeline for query normalization, intent matching,
8
8
  and entity resolution with multilingual support.
9
9
  """
10
10
 
11
- __version__ = "0.5.102"
11
+ __version__ = "0.5.104"
12
12
  __author__ = "STTS Project"
13
13
 
14
14
  # Core analysis components (lightweight, always needed)
@@ -128,7 +128,7 @@ def validate_chunked_output(output_dir: Path, args) -> bool:
128
128
  print(f"✗ No chunk directories found in: {output_dir}", file=sys.stderr)
129
129
  return False
130
130
 
131
- required_files = ['analysis.toon', 'context.md', 'evolution.toon.yaml']
131
+ required_files = ['analysis.toon.yaml', 'context.md', 'evolution.toon.yaml']
132
132
  issues = []
133
133
  valid_chunks = []
134
134
 
@@ -420,15 +420,34 @@ def analyze_c_family(
420
420
  patterns: Dict,
421
421
  lang_config: Dict,
422
422
  cc_lang: str = 'c_family',
423
+ ext: str = '',
423
424
  ) -> Dict:
424
425
  """Shared analyzer for C-family languages (Java, C#, C++, etc.).
425
426
 
426
- Reduces boilerplate duplication across Java/C#/C++ analyzers.
427
+ Uses tree-sitter when available (10× faster), falls back to regex.
427
428
  """
428
- result = _extract_declarations(
429
- content, file_path, module_name,
430
- patterns, stats, lang_config,
431
- )
429
+ result = None
430
+
431
+ # Try tree-sitter first (much faster)
432
+ if ext:
433
+ try:
434
+ from .ts_parser import parse_source
435
+ from .ts_extractors import extract_declarations_ts
436
+ tree = parse_source(content, ext)
437
+ if tree:
438
+ result = extract_declarations_ts(
439
+ tree, content.encode('utf-8'), ext, file_path, module_name
440
+ )
441
+ except ImportError:
442
+ pass # tree-sitter not installed
443
+
444
+ # Fallback to regex
445
+ if result is None:
446
+ result = _extract_declarations(
447
+ content, file_path, module_name,
448
+ patterns, stats, lang_config,
449
+ )
450
+
432
451
  calculate_complexity_regex(content, result, lang=cc_lang)
433
452
  extract_calls_regex(content, module_name, result)
434
453
  stats['files_processed'] += 1
@@ -0,0 +1,35 @@
1
+ """C++ analyzer (regex-based, with tree-sitter support)."""
2
+
3
+ import re
4
+ from typing import Dict
5
+
6
+ from code2llm.core.lang.base import analyze_c_family
7
+
8
+ # C++-specific patterns
9
+ _CPP_PATTERNS = {
10
+ 'import': re.compile(r'^\s*#include\s*["<]([^">]+)[">]'),
11
+ 'class': re.compile(
12
+ r'^\s*(?:class|struct)\s+(\w+)'
13
+ r'(?:\s*:\s*(?:public|private|protected)\s+(\w+))?'
14
+ ),
15
+ 'function': re.compile(
16
+ r'^\s*(?:virtual\s+|static\s+|inline\s+)?'
17
+ r'(?:[\w:*&<>\s]+\s+)?'
18
+ r'(\w+)\s*\([^)]*\)'
19
+ ),
20
+ }
21
+
22
+ _CPP_CONFIG = {
23
+ 'index_files': (),
24
+ 'brace_track': True,
25
+ 'reserved': {'if', 'for', 'while', 'switch', 'return', 'catch', 'class'},
26
+ }
27
+
28
+
29
+ def analyze_cpp(content: str, file_path: str, module_name: str,
30
+ ext: str, stats: Dict) -> Dict:
31
+ """Analyze C++ files using shared C-family extraction."""
32
+ return analyze_c_family(
33
+ content, file_path, module_name, stats,
34
+ _CPP_PATTERNS, _CPP_CONFIG, ext=ext,
35
+ )
@@ -38,5 +38,5 @@ def analyze_csharp(content: str, file_path: str, module_name: str,
38
38
  """Analyze C# files using shared C-family extraction."""
39
39
  return analyze_c_family(
40
40
  content, file_path, module_name, stats,
41
- _CSHARP_PATTERNS, _CSHARP_CONFIG,
41
+ _CSHARP_PATTERNS, _CSHARP_CONFIG, ext=ext,
42
42
  )
@@ -1,16 +1,14 @@
1
- """Go analyzer (regex-based)."""
1
+ """Go analyzer (regex-based, with tree-sitter support)."""
2
2
 
3
3
  import re
4
- from pathlib import Path
5
4
  from typing import Dict
6
5
 
7
6
  from code2llm.core.models import ClassInfo, FunctionInfo, ModuleInfo
8
7
  from code2llm.core.lang.base import calculate_complexity_regex, extract_calls_regex
9
8
 
10
9
 
11
- def analyze_go(content: str, file_path: str, module_name: str,
12
- ext: str, stats: Dict) -> Dict:
13
- """Analyze Go files using regex-based parsing."""
10
+ def _analyze_go_regex(content: str, file_path: str, module_name: str, stats: Dict) -> Dict:
11
+ """Regex fallback for Go analysis."""
14
12
  result = {
15
13
  'module': ModuleInfo(name=module_name, file=file_path, is_package=False),
16
14
  'functions': {},
@@ -20,7 +18,6 @@ def analyze_go(content: str, file_path: str, module_name: str,
20
18
  }
21
19
 
22
20
  lines = content.split('\n')
23
-
24
21
  import_pattern = re.compile(r'^\s*import\s+(?:\(\s*["\']([^"\']+)["\']|["\']([^"\']+)["\'])')
25
22
  func_pattern = re.compile(r'^\s*func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(')
26
23
  struct_pattern = re.compile(r'^\s*type\s+(\w+)\s+struct')
@@ -31,14 +28,12 @@ def analyze_go(content: str, file_path: str, module_name: str,
31
28
  if not line or line.startswith('//'):
32
29
  continue
33
30
 
34
- # Imports
35
31
  import_match = import_pattern.match(line)
36
32
  if import_match:
37
33
  imp = import_match.group(1) or import_match.group(2)
38
34
  if imp:
39
35
  result['module'].imports.append(imp)
40
36
 
41
- # Functions
42
37
  func_match = func_pattern.match(line)
43
38
  if func_match:
44
39
  func_name = func_match.group(1)
@@ -53,7 +48,6 @@ def analyze_go(content: str, file_path: str, module_name: str,
53
48
  result['module'].functions.append(qualified_name)
54
49
  stats['functions_found'] += 1
55
50
 
56
- # Structs (treated as classes)
57
51
  struct_match = struct_pattern.match(line)
58
52
  if struct_match:
59
53
  class_name = struct_match.group(1)
@@ -66,7 +60,6 @@ def analyze_go(content: str, file_path: str, module_name: str,
66
60
  result['module'].classes.append(qualified_name)
67
61
  stats['classes_found'] += 1
68
62
 
69
- # Interfaces
70
63
  interface_match = interface_pattern.match(line)
71
64
  if interface_match:
72
65
  class_name = interface_match.group(1)
@@ -79,9 +72,31 @@ def analyze_go(content: str, file_path: str, module_name: str,
79
72
  result['module'].classes.append(qualified_name)
80
73
  stats['classes_found'] += 1
81
74
 
82
- # Regex-based complexity estimation and call extraction
75
+ return result
76
+
77
+
78
+ def analyze_go(content: str, file_path: str, module_name: str,
79
+ ext: str, stats: Dict) -> Dict:
80
+ """Analyze Go files. Uses tree-sitter when available, regex fallback."""
81
+ result = None
82
+
83
+ # Try tree-sitter first
84
+ try:
85
+ from .ts_parser import parse_source
86
+ from .ts_extractors import extract_declarations_ts
87
+ tree = parse_source(content, ext)
88
+ if tree:
89
+ result = extract_declarations_ts(
90
+ tree, content.encode('utf-8'), ext, file_path, module_name
91
+ )
92
+ except ImportError:
93
+ pass
94
+
95
+ # Fallback to regex
96
+ if result is None:
97
+ result = _analyze_go_regex(content, file_path, module_name, stats)
98
+
83
99
  calculate_complexity_regex(content, result, lang='go')
84
100
  extract_calls_regex(content, module_name, result)
85
-
86
101
  stats['files_processed'] += 1
87
102
  return result
@@ -39,5 +39,5 @@ def analyze_java(content: str, file_path: str, module_name: str,
39
39
  """Analyze Java files using shared C-family extraction."""
40
40
  return analyze_c_family(
41
41
  content, file_path, module_name, stats,
42
- _JAVA_PATTERNS, _JAVA_CONFIG,
42
+ _JAVA_PATTERNS, _JAVA_CONFIG, ext=ext,
43
43
  )
@@ -0,0 +1,66 @@
1
+ import re
2
+ from typing import Dict, Optional, Tuple
3
+ from code2llm.core.models import ClassInfo, FunctionInfo, ModuleInfo
4
+ from code2llm.core.lang.base import calculate_complexity_regex, extract_calls_regex, _extract_declarations
5
+
6
+ def _parse_php_metadata(content: str, module_name: str, result: Dict) -> Tuple[Optional[str], bool]:
7
+ lines = content.split('\n')
8
+ current_namespace = None
9
+ in_php = False
10
+ for line in lines:
11
+ line = line.strip()
12
+ if line.startswith('<?php') or line.startswith('<?'):
13
+ in_php = True
14
+ continue
15
+ if line == '?>':
16
+ in_php = False
17
+ continue
18
+ if not in_php: continue
19
+ ns_match = re.match(r'^namespace\s+([\\\w]+)', line)
20
+ if ns_match:
21
+ current_namespace = ns_match.group(1)
22
+ continue
23
+ use_match = re.match(r'^use\s+([\\\w]+)', line)
24
+ if use_match:
25
+ result['module'].imports.append(use_match.group(1))
26
+ return current_namespace, in_php
27
+
28
+ def _adjust_qualified_names(result: Dict, module_name: str, namespace: str) -> None:
29
+ ns_prefix = f".{namespace}"
30
+ for key in ['classes', 'functions']:
31
+ new_items = {}
32
+ for qname, item in list(result[key].items()):
33
+ new_qname = qname.replace(f"{module_name}.", f"{module_name}{ns_prefix}.", 1)
34
+ item.qualified_name = new_qname
35
+ new_items[new_qname] = item
36
+ result[key] = new_items
37
+ result['module'].__setattr__(key, list(new_items.keys()))
38
+
39
+ def _extract_php_traits(content: str, file_path: str, module_name: str, namespace: Optional[str], result: Dict, stats: Dict) -> None:
40
+ trait_pattern = re.compile(r'^\s*trait\s+(\w+)')
41
+ for line_no, line in enumerate(content.split('\n'), 1):
42
+ tm = trait_pattern.match(line.strip())
43
+ if tm:
44
+ tname = tm.group(1)
45
+ qual = f"{module_name}.{namespace + '.' if namespace else ''}{tname}"
46
+ result['classes'][qual] = ClassInfo(name=tname, qualified_name=qual, file=file_path, line=line_no, module=module_name, bases=[], methods=[], docstring="")
47
+ result['module'].classes.append(qual)
48
+ stats['classes_found'] += 1
49
+
50
+ def analyze_php(content: str, file_path: str, module_name: str, ext: str, stats: Dict) -> Dict:
51
+ patterns = {
52
+ 'import': re.compile(r'^(?:include|require|include_once|require_once)\s*["\']([^"\']+)["\']'),
53
+ 'class': re.compile(r'(?:abstract\s+|final\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w,\s\\]+))?'),
54
+ 'interface': re.compile(r'interface\s+(\w+)'),
55
+ 'function': re.compile(r'(?:public\s+|private\s+|protected\s+)?(?:static\s+)?function\s+(\w+)\s*\('),
56
+ }
57
+ lang_config = {'index_files': (), 'brace_track': True, 'reserved': {'if', 'for', 'while', 'switch', 'return', 'catch', 'echo', 'print'}}
58
+ result = _extract_declarations(content, file_path, module_name, patterns, stats, lang_config)
59
+ namespace, _ = _parse_php_metadata(content, module_name, result)
60
+ if namespace:
61
+ _adjust_qualified_names(result, module_name, namespace)
62
+ _extract_php_traits(content, file_path, module_name, namespace, result, stats)
63
+ calculate_complexity_regex(content, result, lang='c_family')
64
+ extract_calls_regex(content, module_name, result)
65
+ stats['files_processed'] += 1
66
+ return result
@@ -0,0 +1,180 @@
1
+ """Tree-sitter based declaration extractors — fast CST traversal.
2
+
3
+ Each language has specific node types for functions, classes, methods.
4
+ This module provides unified extraction using tree-sitter queries.
5
+ """
6
+
7
+ from typing import Dict, List, Optional, Any
8
+ from pathlib import Path
9
+
10
+ from code2llm.core.models import ClassInfo, FunctionInfo, ModuleInfo
11
+
12
+
13
+ # Node type mappings per language
14
+ FUNCTION_TYPES = {
15
+ 'python': ('function_definition', 'async_function_definition'),
16
+ 'javascript': ('function_declaration', 'function_expression', 'arrow_function', 'method_definition'),
17
+ 'typescript': ('function_declaration', 'function_expression', 'arrow_function', 'method_definition'),
18
+ 'go': ('function_declaration', 'method_declaration'),
19
+ 'rust': ('function_item', 'impl_item'),
20
+ 'java': ('method_declaration', 'constructor_declaration'),
21
+ 'c': ('function_definition',),
22
+ 'cpp': ('function_definition', 'template_function'),
23
+ 'csharp': ('method_declaration', 'constructor_declaration'),
24
+ 'php': ('function_definition', 'method_declaration'),
25
+ 'ruby': ('method', 'singleton_method'),
26
+ }
27
+
28
+ CLASS_TYPES = {
29
+ 'python': ('class_definition',),
30
+ 'javascript': ('class_declaration', 'class_expression'),
31
+ 'typescript': ('class_declaration', 'class_expression', 'interface_declaration'),
32
+ 'go': ('type_declaration',),
33
+ 'rust': ('struct_item', 'enum_item', 'impl_item', 'trait_item'),
34
+ 'java': ('class_declaration', 'interface_declaration', 'enum_declaration'),
35
+ 'c': ('struct_specifier',),
36
+ 'cpp': ('class_specifier', 'struct_specifier'),
37
+ 'csharp': ('class_declaration', 'interface_declaration', 'struct_declaration'),
38
+ 'php': ('class_declaration', 'interface_declaration', 'trait_declaration'),
39
+ 'ruby': ('class', 'module'),
40
+ }
41
+
42
+ EXT_TO_LANG = {
43
+ '.py': 'python',
44
+ '.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
45
+ '.ts': 'typescript', '.tsx': 'typescript',
46
+ '.go': 'go',
47
+ '.rs': 'rust',
48
+ '.java': 'java',
49
+ '.c': 'c', '.h': 'c',
50
+ '.cpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp', '.hpp': 'cpp', '.hxx': 'cpp',
51
+ '.cs': 'csharp',
52
+ '.php': 'php',
53
+ '.rb': 'ruby',
54
+ }
55
+
56
+
57
+ def _get_node_text(node, source_bytes: bytes) -> str:
58
+ """Extract text content of a node."""
59
+ return source_bytes[node.start_byte:node.end_byte].decode('utf-8', errors='replace')
60
+
61
+
62
+ def _find_name_node(node) -> Optional[Any]:
63
+ """Find the name/identifier child of a node."""
64
+ for child in node.children:
65
+ if child.type in ('identifier', 'name', 'property_identifier', 'type_identifier'):
66
+ return child
67
+ # For method definitions, look for property_identifier
68
+ if child.type == 'property_identifier':
69
+ return child
70
+ # Fallback: look in named children
71
+ for child in node.children:
72
+ if 'name' in child.type or 'identifier' in child.type:
73
+ return child
74
+ return None
75
+
76
+
77
+ def _extract_functions_ts(tree, source_bytes: bytes, lang: str,
78
+ module_name: str, file_path: str) -> Dict[str, FunctionInfo]:
79
+ """Extract functions using tree-sitter traversal."""
80
+ functions = {}
81
+ func_types = FUNCTION_TYPES.get(lang, ())
82
+
83
+ def visit(node, class_context: Optional[str] = None):
84
+ if node.type in func_types:
85
+ name_node = _find_name_node(node)
86
+ if name_node:
87
+ name = _get_node_text(name_node, source_bytes)
88
+ if class_context:
89
+ qname = f"{module_name}.{class_context}.{name}"
90
+ else:
91
+ qname = f"{module_name}.{name}"
92
+
93
+ # Count lines
94
+ start_line = node.start_point[0] + 1
95
+ end_line = node.end_point[0] + 1
96
+ line_count = end_line - start_line + 1
97
+
98
+ functions[qname] = FunctionInfo(
99
+ name=name,
100
+ qualified_name=qname,
101
+ file=file_path,
102
+ line=start_line,
103
+ end_line=end_line,
104
+ line_count=line_count,
105
+ is_method=class_context is not None,
106
+ class_name=class_context,
107
+ )
108
+
109
+ # Recurse, tracking class context
110
+ new_class = None
111
+ class_types = CLASS_TYPES.get(lang, ())
112
+ if node.type in class_types:
113
+ name_node = _find_name_node(node)
114
+ if name_node:
115
+ new_class = _get_node_text(name_node, source_bytes)
116
+
117
+ for child in node.children:
118
+ visit(child, new_class or class_context)
119
+
120
+ visit(tree.root_node)
121
+ return functions
122
+
123
+
124
+ def _extract_classes_ts(tree, source_bytes: bytes, lang: str,
125
+ module_name: str, file_path: str) -> Dict[str, ClassInfo]:
126
+ """Extract classes using tree-sitter traversal."""
127
+ classes = {}
128
+ class_types = CLASS_TYPES.get(lang, ())
129
+
130
+ def visit(node):
131
+ if node.type in class_types:
132
+ name_node = _find_name_node(node)
133
+ if name_node:
134
+ name = _get_node_text(name_node, source_bytes)
135
+ qname = f"{module_name}.{name}"
136
+ start_line = node.start_point[0] + 1
137
+ end_line = node.end_point[0] + 1
138
+
139
+ classes[qname] = ClassInfo(
140
+ name=name,
141
+ qualified_name=qname,
142
+ file=file_path,
143
+ line=start_line,
144
+ end_line=end_line,
145
+ )
146
+
147
+ for child in node.children:
148
+ visit(child)
149
+
150
+ visit(tree.root_node)
151
+ return classes
152
+
153
+
154
+ def extract_declarations_ts(
155
+ tree,
156
+ source_bytes: bytes,
157
+ ext: str,
158
+ file_path: str,
159
+ module_name: str,
160
+ ) -> Dict:
161
+ """Extract all declarations from a tree-sitter tree.
162
+
163
+ Returns dict compatible with regex-based _extract_declarations.
164
+ """
165
+ lang = EXT_TO_LANG.get(ext, 'generic')
166
+
167
+ functions = _extract_functions_ts(tree, source_bytes, lang, module_name, file_path)
168
+ classes = _extract_classes_ts(tree, source_bytes, lang, module_name, file_path)
169
+
170
+ return {
171
+ 'module': ModuleInfo(
172
+ name=module_name,
173
+ file=file_path,
174
+ is_package=Path(file_path).name in ('__init__.py', 'index.js', 'index.ts', 'mod.rs', 'lib.rs'),
175
+ ),
176
+ 'functions': functions,
177
+ 'classes': classes,
178
+ 'nodes': {},
179
+ 'edges': [],
180
+ }