code2llm 0.5.102__tar.gz → 0.5.104__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code2llm-0.5.102 → code2llm-0.5.104}/PKG-INFO +18 -11
- {code2llm-0.5.102 → code2llm-0.5.104}/README.md +7 -10
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/__init__.py +1 -1
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_commands.py +1 -1
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/base.py +24 -5
- code2llm-0.5.104/code2llm/core/lang/cpp.py +35 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/csharp.py +1 -1
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/go_lang.py +27 -12
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/java.py +1 -1
- code2llm-0.5.104/code2llm/core/lang/php.py +66 -0
- code2llm-0.5.104/code2llm/core/lang/ts_extractors.py +180 -0
- code2llm-0.5.104/code2llm/core/lang/ts_parser.py +158 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/typescript.py +18 -20
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/refactoring.py +1 -1
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/__init__.py +1 -1
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/PKG-INFO +18 -11
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/SOURCES.txt +2 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/requires.txt +10 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/pyproject.toml +11 -1
- {code2llm-0.5.102 → code2llm-0.5.104}/setup.py +1 -1
- code2llm-0.5.102/code2llm/core/lang/cpp.py +0 -42
- code2llm-0.5.102/code2llm/core/lang/php.py +0 -106
- {code2llm-0.5.102 → code2llm-0.5.104}/LICENSE +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/__main__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/__init__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/call_graph.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/cfg.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/coupling.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/data_analysis.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/dfg.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/pipeline_detector.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/side_effects.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/smells.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/type_inference.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/utils/__init__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/analysis/utils/ast_helpers.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/api.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_analysis.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_exports/__init__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_exports/code2logic.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_exports/formats.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_exports/orchestrator.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_exports/prompt.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/cli_parser.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/__init__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/analyzer.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/ast_registry.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/config.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/export_pipeline.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/file_analyzer.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/file_cache.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/file_filter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/gitignore.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/incremental.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/__init__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/generic.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/ruby.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/lang/rust.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/large_repo.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/models.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/repo_files.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/__init__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/cache.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/incremental.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/prioritizer.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/scanner.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming/strategies.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/streaming_analyzer.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/core/toon_size_manager.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/__init__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/article_view.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/base.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/context_exporter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/context_view.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/evolution_exporter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/flow_constants.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/flow_exporter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/flow_renderer.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/html_dashboard.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/index_generator.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/json_exporter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/llm_exporter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/map_exporter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/mermaid_exporter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/mermaid_flow_helpers.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/project_yaml_exporter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/readme_exporter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/report_generators.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon/__init__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon/helpers.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon/metrics.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon/module_detail.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon/renderer.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/toon_view.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/validate_project.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/exporters/yaml_exporter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/generators/__init__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/generators/llm_flow.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/generators/llm_task.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/generators/mermaid.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/config.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/entity_resolution.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/intent_matching.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/normalization.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/nlp/pipeline.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/patterns/__init__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/patterns/detector.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/refactor/__init__.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm/refactor/prompt_engine.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/dependency_links.txt +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/entry_points.txt +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/code2llm.egg-info/top_level.txt +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/setup.cfg +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_advanced_analysis.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_analyzer.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_deep_analysis.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_edge_cases.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_flow_exporter.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_format_quality.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_multilanguage_e2e.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_nlp_pipeline.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_nonpython_cc_calls.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_pipeline_detector.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_project_toon_export.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_prompt_engine.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_prompt_txt.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_refactoring_engine.py +0 -0
- {code2llm-0.5.102 → code2llm-0.5.104}/tests/test_toon_v2.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code2llm
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.104
|
|
4
4
|
Summary: High-performance Python code flow analysis with optimized TOON format - CFG, DFG, call graphs, and intelligent code queries
|
|
5
5
|
Home-page: https://github.com/wronai/stts
|
|
6
6
|
Author: STTS Project
|
|
@@ -37,6 +37,16 @@ Requires-Dist: vulture>=2.10
|
|
|
37
37
|
Requires-Dist: tiktoken>=0.5
|
|
38
38
|
Requires-Dist: tree-sitter>=0.21
|
|
39
39
|
Requires-Dist: tree-sitter-python>=0.21
|
|
40
|
+
Requires-Dist: tree-sitter-javascript>=0.21
|
|
41
|
+
Requires-Dist: tree-sitter-typescript>=0.21
|
|
42
|
+
Requires-Dist: tree-sitter-go>=0.21
|
|
43
|
+
Requires-Dist: tree-sitter-rust>=0.21
|
|
44
|
+
Requires-Dist: tree-sitter-java>=0.21
|
|
45
|
+
Requires-Dist: tree-sitter-c>=0.21
|
|
46
|
+
Requires-Dist: tree-sitter-cpp>=0.22
|
|
47
|
+
Requires-Dist: tree-sitter-c-sharp>=0.21
|
|
48
|
+
Requires-Dist: tree-sitter-php>=0.22
|
|
49
|
+
Requires-Dist: tree-sitter-ruby>=0.21
|
|
40
50
|
Provides-Extra: dev
|
|
41
51
|
Requires-Dist: pytest>=6.2; extra == "dev"
|
|
42
52
|
Requires-Dist: pytest-cov>=2.12; extra == "dev"
|
|
@@ -53,16 +63,13 @@ Dynamic: requires-python
|
|
|
53
63
|
|
|
54
64
|
# code2llm - Generated Analysis Files
|
|
55
65
|
|
|
56
|
-
|
|
57
66
|
## AI Cost Tracking
|
|
58
67
|
|
|
59
|
-
  
|
|
68
|
+
 
|
|
61
69
|
|
|
62
|
-
-
|
|
63
|
-
- 👤 **Human dev:** ~$5123 (51.2h @ $100/h, 30min dedup)
|
|
70
|
+
This project uses AI-generated code. Total cost: **$7.5000** with **153** AI commits.
|
|
64
71
|
|
|
65
|
-
Generated on 2026-
|
|
72
|
+
Generated on 2026-04-09 using [openrouter/qwen/qwen3-coder-next](https://openrouter.ai/models/openrouter/qwen/qwen3-coder-next)
|
|
66
73
|
|
|
67
74
|
---
|
|
68
75
|
|
|
@@ -390,10 +397,10 @@ code2llm ./ -f yaml --separate-orphans
|
|
|
390
397
|
---
|
|
391
398
|
|
|
392
399
|
**Generated by**: `code2llm ./ -f all --readme`
|
|
393
|
-
**Analysis Date**: 2026-
|
|
394
|
-
**Total Functions**:
|
|
395
|
-
**Total Classes**:
|
|
396
|
-
**Modules**:
|
|
400
|
+
**Analysis Date**: 2026-04-09
|
|
401
|
+
**Total Functions**: 1011
|
|
402
|
+
**Total Classes**: 111
|
|
403
|
+
**Modules**: 131
|
|
397
404
|
|
|
398
405
|
For more information about code2llm, visit: https://github.com/tom-sapletta/code2llm
|
|
399
406
|
|
|
@@ -1,15 +1,12 @@
|
|
|
1
1
|
# code2llm - Generated Analysis Files
|
|
2
2
|
|
|
3
|
-
|
|
4
3
|
## AI Cost Tracking
|
|
5
4
|
|
|
6
|
-
  
|
|
5
|
+
 
|
|
8
6
|
|
|
9
|
-
-
|
|
10
|
-
- 👤 **Human dev:** ~$5123 (51.2h @ $100/h, 30min dedup)
|
|
7
|
+
This project uses AI-generated code. Total cost: **$7.5000** with **153** AI commits.
|
|
11
8
|
|
|
12
|
-
Generated on 2026-
|
|
9
|
+
Generated on 2026-04-09 using [openrouter/qwen/qwen3-coder-next](https://openrouter.ai/models/openrouter/qwen/qwen3-coder-next)
|
|
13
10
|
|
|
14
11
|
---
|
|
15
12
|
|
|
@@ -337,10 +334,10 @@ code2llm ./ -f yaml --separate-orphans
|
|
|
337
334
|
---
|
|
338
335
|
|
|
339
336
|
**Generated by**: `code2llm ./ -f all --readme`
|
|
340
|
-
**Analysis Date**: 2026-
|
|
341
|
-
**Total Functions**:
|
|
342
|
-
**Total Classes**:
|
|
343
|
-
**Modules**:
|
|
337
|
+
**Analysis Date**: 2026-04-09
|
|
338
|
+
**Total Functions**: 1011
|
|
339
|
+
**Total Classes**: 111
|
|
340
|
+
**Modules**: 131
|
|
344
341
|
|
|
345
342
|
For more information about code2llm, visit: https://github.com/tom-sapletta/code2llm
|
|
346
343
|
|
|
@@ -8,7 +8,7 @@ Includes NLP Processing Pipeline for query normalization, intent matching,
|
|
|
8
8
|
and entity resolution with multilingual support.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
__version__ = "0.5.
|
|
11
|
+
__version__ = "0.5.104"
|
|
12
12
|
__author__ = "STTS Project"
|
|
13
13
|
|
|
14
14
|
# Core analysis components (lightweight, always needed)
|
|
@@ -128,7 +128,7 @@ def validate_chunked_output(output_dir: Path, args) -> bool:
|
|
|
128
128
|
print(f"✗ No chunk directories found in: {output_dir}", file=sys.stderr)
|
|
129
129
|
return False
|
|
130
130
|
|
|
131
|
-
required_files = ['analysis.toon', 'context.md', 'evolution.toon.yaml']
|
|
131
|
+
required_files = ['analysis.toon.yaml', 'context.md', 'evolution.toon.yaml']
|
|
132
132
|
issues = []
|
|
133
133
|
valid_chunks = []
|
|
134
134
|
|
|
@@ -420,15 +420,34 @@ def analyze_c_family(
|
|
|
420
420
|
patterns: Dict,
|
|
421
421
|
lang_config: Dict,
|
|
422
422
|
cc_lang: str = 'c_family',
|
|
423
|
+
ext: str = '',
|
|
423
424
|
) -> Dict:
|
|
424
425
|
"""Shared analyzer for C-family languages (Java, C#, C++, etc.).
|
|
425
426
|
|
|
426
|
-
|
|
427
|
+
Uses tree-sitter when available (10× faster), falls back to regex.
|
|
427
428
|
"""
|
|
428
|
-
result =
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
429
|
+
result = None
|
|
430
|
+
|
|
431
|
+
# Try tree-sitter first (much faster)
|
|
432
|
+
if ext:
|
|
433
|
+
try:
|
|
434
|
+
from .ts_parser import parse_source
|
|
435
|
+
from .ts_extractors import extract_declarations_ts
|
|
436
|
+
tree = parse_source(content, ext)
|
|
437
|
+
if tree:
|
|
438
|
+
result = extract_declarations_ts(
|
|
439
|
+
tree, content.encode('utf-8'), ext, file_path, module_name
|
|
440
|
+
)
|
|
441
|
+
except ImportError:
|
|
442
|
+
pass # tree-sitter not installed
|
|
443
|
+
|
|
444
|
+
# Fallback to regex
|
|
445
|
+
if result is None:
|
|
446
|
+
result = _extract_declarations(
|
|
447
|
+
content, file_path, module_name,
|
|
448
|
+
patterns, stats, lang_config,
|
|
449
|
+
)
|
|
450
|
+
|
|
432
451
|
calculate_complexity_regex(content, result, lang=cc_lang)
|
|
433
452
|
extract_calls_regex(content, module_name, result)
|
|
434
453
|
stats['files_processed'] += 1
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""C++ analyzer (regex-based, with tree-sitter support)."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
from code2llm.core.lang.base import analyze_c_family
|
|
7
|
+
|
|
8
|
+
# C++-specific patterns
|
|
9
|
+
_CPP_PATTERNS = {
|
|
10
|
+
'import': re.compile(r'^\s*#include\s*["<]([^">]+)[">]'),
|
|
11
|
+
'class': re.compile(
|
|
12
|
+
r'^\s*(?:class|struct)\s+(\w+)'
|
|
13
|
+
r'(?:\s*:\s*(?:public|private|protected)\s+(\w+))?'
|
|
14
|
+
),
|
|
15
|
+
'function': re.compile(
|
|
16
|
+
r'^\s*(?:virtual\s+|static\s+|inline\s+)?'
|
|
17
|
+
r'(?:[\w:*&<>\s]+\s+)?'
|
|
18
|
+
r'(\w+)\s*\([^)]*\)'
|
|
19
|
+
),
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
_CPP_CONFIG = {
|
|
23
|
+
'index_files': (),
|
|
24
|
+
'brace_track': True,
|
|
25
|
+
'reserved': {'if', 'for', 'while', 'switch', 'return', 'catch', 'class'},
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def analyze_cpp(content: str, file_path: str, module_name: str,
|
|
30
|
+
ext: str, stats: Dict) -> Dict:
|
|
31
|
+
"""Analyze C++ files using shared C-family extraction."""
|
|
32
|
+
return analyze_c_family(
|
|
33
|
+
content, file_path, module_name, stats,
|
|
34
|
+
_CPP_PATTERNS, _CPP_CONFIG, ext=ext,
|
|
35
|
+
)
|
|
@@ -38,5 +38,5 @@ def analyze_csharp(content: str, file_path: str, module_name: str,
|
|
|
38
38
|
"""Analyze C# files using shared C-family extraction."""
|
|
39
39
|
return analyze_c_family(
|
|
40
40
|
content, file_path, module_name, stats,
|
|
41
|
-
_CSHARP_PATTERNS, _CSHARP_CONFIG,
|
|
41
|
+
_CSHARP_PATTERNS, _CSHARP_CONFIG, ext=ext,
|
|
42
42
|
)
|
|
@@ -1,16 +1,14 @@
|
|
|
1
|
-
"""Go analyzer (regex-based)."""
|
|
1
|
+
"""Go analyzer (regex-based, with tree-sitter support)."""
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
-
from pathlib import Path
|
|
5
4
|
from typing import Dict
|
|
6
5
|
|
|
7
6
|
from code2llm.core.models import ClassInfo, FunctionInfo, ModuleInfo
|
|
8
7
|
from code2llm.core.lang.base import calculate_complexity_regex, extract_calls_regex
|
|
9
8
|
|
|
10
9
|
|
|
11
|
-
def
|
|
12
|
-
|
|
13
|
-
"""Analyze Go files using regex-based parsing."""
|
|
10
|
+
def _analyze_go_regex(content: str, file_path: str, module_name: str, stats: Dict) -> Dict:
|
|
11
|
+
"""Regex fallback for Go analysis."""
|
|
14
12
|
result = {
|
|
15
13
|
'module': ModuleInfo(name=module_name, file=file_path, is_package=False),
|
|
16
14
|
'functions': {},
|
|
@@ -20,7 +18,6 @@ def analyze_go(content: str, file_path: str, module_name: str,
|
|
|
20
18
|
}
|
|
21
19
|
|
|
22
20
|
lines = content.split('\n')
|
|
23
|
-
|
|
24
21
|
import_pattern = re.compile(r'^\s*import\s+(?:\(\s*["\']([^"\']+)["\']|["\']([^"\']+)["\'])')
|
|
25
22
|
func_pattern = re.compile(r'^\s*func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(')
|
|
26
23
|
struct_pattern = re.compile(r'^\s*type\s+(\w+)\s+struct')
|
|
@@ -31,14 +28,12 @@ def analyze_go(content: str, file_path: str, module_name: str,
|
|
|
31
28
|
if not line or line.startswith('//'):
|
|
32
29
|
continue
|
|
33
30
|
|
|
34
|
-
# Imports
|
|
35
31
|
import_match = import_pattern.match(line)
|
|
36
32
|
if import_match:
|
|
37
33
|
imp = import_match.group(1) or import_match.group(2)
|
|
38
34
|
if imp:
|
|
39
35
|
result['module'].imports.append(imp)
|
|
40
36
|
|
|
41
|
-
# Functions
|
|
42
37
|
func_match = func_pattern.match(line)
|
|
43
38
|
if func_match:
|
|
44
39
|
func_name = func_match.group(1)
|
|
@@ -53,7 +48,6 @@ def analyze_go(content: str, file_path: str, module_name: str,
|
|
|
53
48
|
result['module'].functions.append(qualified_name)
|
|
54
49
|
stats['functions_found'] += 1
|
|
55
50
|
|
|
56
|
-
# Structs (treated as classes)
|
|
57
51
|
struct_match = struct_pattern.match(line)
|
|
58
52
|
if struct_match:
|
|
59
53
|
class_name = struct_match.group(1)
|
|
@@ -66,7 +60,6 @@ def analyze_go(content: str, file_path: str, module_name: str,
|
|
|
66
60
|
result['module'].classes.append(qualified_name)
|
|
67
61
|
stats['classes_found'] += 1
|
|
68
62
|
|
|
69
|
-
# Interfaces
|
|
70
63
|
interface_match = interface_pattern.match(line)
|
|
71
64
|
if interface_match:
|
|
72
65
|
class_name = interface_match.group(1)
|
|
@@ -79,9 +72,31 @@ def analyze_go(content: str, file_path: str, module_name: str,
|
|
|
79
72
|
result['module'].classes.append(qualified_name)
|
|
80
73
|
stats['classes_found'] += 1
|
|
81
74
|
|
|
82
|
-
|
|
75
|
+
return result
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def analyze_go(content: str, file_path: str, module_name: str,
|
|
79
|
+
ext: str, stats: Dict) -> Dict:
|
|
80
|
+
"""Analyze Go files. Uses tree-sitter when available, regex fallback."""
|
|
81
|
+
result = None
|
|
82
|
+
|
|
83
|
+
# Try tree-sitter first
|
|
84
|
+
try:
|
|
85
|
+
from .ts_parser import parse_source
|
|
86
|
+
from .ts_extractors import extract_declarations_ts
|
|
87
|
+
tree = parse_source(content, ext)
|
|
88
|
+
if tree:
|
|
89
|
+
result = extract_declarations_ts(
|
|
90
|
+
tree, content.encode('utf-8'), ext, file_path, module_name
|
|
91
|
+
)
|
|
92
|
+
except ImportError:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
# Fallback to regex
|
|
96
|
+
if result is None:
|
|
97
|
+
result = _analyze_go_regex(content, file_path, module_name, stats)
|
|
98
|
+
|
|
83
99
|
calculate_complexity_regex(content, result, lang='go')
|
|
84
100
|
extract_calls_regex(content, module_name, result)
|
|
85
|
-
|
|
86
101
|
stats['files_processed'] += 1
|
|
87
102
|
return result
|
|
@@ -39,5 +39,5 @@ def analyze_java(content: str, file_path: str, module_name: str,
|
|
|
39
39
|
"""Analyze Java files using shared C-family extraction."""
|
|
40
40
|
return analyze_c_family(
|
|
41
41
|
content, file_path, module_name, stats,
|
|
42
|
-
_JAVA_PATTERNS, _JAVA_CONFIG,
|
|
42
|
+
_JAVA_PATTERNS, _JAVA_CONFIG, ext=ext,
|
|
43
43
|
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, Optional, Tuple
|
|
3
|
+
from code2llm.core.models import ClassInfo, FunctionInfo, ModuleInfo
|
|
4
|
+
from code2llm.core.lang.base import calculate_complexity_regex, extract_calls_regex, _extract_declarations
|
|
5
|
+
|
|
6
|
+
def _parse_php_metadata(content: str, module_name: str, result: Dict) -> Tuple[Optional[str], bool]:
|
|
7
|
+
lines = content.split('\n')
|
|
8
|
+
current_namespace = None
|
|
9
|
+
in_php = False
|
|
10
|
+
for line in lines:
|
|
11
|
+
line = line.strip()
|
|
12
|
+
if line.startswith('<?php') or line.startswith('<?'):
|
|
13
|
+
in_php = True
|
|
14
|
+
continue
|
|
15
|
+
if line == '?>':
|
|
16
|
+
in_php = False
|
|
17
|
+
continue
|
|
18
|
+
if not in_php: continue
|
|
19
|
+
ns_match = re.match(r'^namespace\s+([\\\w]+)', line)
|
|
20
|
+
if ns_match:
|
|
21
|
+
current_namespace = ns_match.group(1)
|
|
22
|
+
continue
|
|
23
|
+
use_match = re.match(r'^use\s+([\\\w]+)', line)
|
|
24
|
+
if use_match:
|
|
25
|
+
result['module'].imports.append(use_match.group(1))
|
|
26
|
+
return current_namespace, in_php
|
|
27
|
+
|
|
28
|
+
def _adjust_qualified_names(result: Dict, module_name: str, namespace: str) -> None:
|
|
29
|
+
ns_prefix = f".{namespace}"
|
|
30
|
+
for key in ['classes', 'functions']:
|
|
31
|
+
new_items = {}
|
|
32
|
+
for qname, item in list(result[key].items()):
|
|
33
|
+
new_qname = qname.replace(f"{module_name}.", f"{module_name}{ns_prefix}.", 1)
|
|
34
|
+
item.qualified_name = new_qname
|
|
35
|
+
new_items[new_qname] = item
|
|
36
|
+
result[key] = new_items
|
|
37
|
+
result['module'].__setattr__(key, list(new_items.keys()))
|
|
38
|
+
|
|
39
|
+
def _extract_php_traits(content: str, file_path: str, module_name: str, namespace: Optional[str], result: Dict, stats: Dict) -> None:
|
|
40
|
+
trait_pattern = re.compile(r'^\s*trait\s+(\w+)')
|
|
41
|
+
for line_no, line in enumerate(content.split('\n'), 1):
|
|
42
|
+
tm = trait_pattern.match(line.strip())
|
|
43
|
+
if tm:
|
|
44
|
+
tname = tm.group(1)
|
|
45
|
+
qual = f"{module_name}.{namespace + '.' if namespace else ''}{tname}"
|
|
46
|
+
result['classes'][qual] = ClassInfo(name=tname, qualified_name=qual, file=file_path, line=line_no, module=module_name, bases=[], methods=[], docstring="")
|
|
47
|
+
result['module'].classes.append(qual)
|
|
48
|
+
stats['classes_found'] += 1
|
|
49
|
+
|
|
50
|
+
def analyze_php(content: str, file_path: str, module_name: str, ext: str, stats: Dict) -> Dict:
|
|
51
|
+
patterns = {
|
|
52
|
+
'import': re.compile(r'^(?:include|require|include_once|require_once)\s*["\']([^"\']+)["\']'),
|
|
53
|
+
'class': re.compile(r'(?:abstract\s+|final\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w,\s\\]+))?'),
|
|
54
|
+
'interface': re.compile(r'interface\s+(\w+)'),
|
|
55
|
+
'function': re.compile(r'(?:public\s+|private\s+|protected\s+)?(?:static\s+)?function\s+(\w+)\s*\('),
|
|
56
|
+
}
|
|
57
|
+
lang_config = {'index_files': (), 'brace_track': True, 'reserved': {'if', 'for', 'while', 'switch', 'return', 'catch', 'echo', 'print'}}
|
|
58
|
+
result = _extract_declarations(content, file_path, module_name, patterns, stats, lang_config)
|
|
59
|
+
namespace, _ = _parse_php_metadata(content, module_name, result)
|
|
60
|
+
if namespace:
|
|
61
|
+
_adjust_qualified_names(result, module_name, namespace)
|
|
62
|
+
_extract_php_traits(content, file_path, module_name, namespace, result, stats)
|
|
63
|
+
calculate_complexity_regex(content, result, lang='c_family')
|
|
64
|
+
extract_calls_regex(content, module_name, result)
|
|
65
|
+
stats['files_processed'] += 1
|
|
66
|
+
return result
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Tree-sitter based declaration extractors — fast CST traversal.
|
|
2
|
+
|
|
3
|
+
Each language has specific node types for functions, classes, methods.
|
|
4
|
+
This module provides unified extraction using tree-sitter queries.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, List, Optional, Any
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from code2llm.core.models import ClassInfo, FunctionInfo, ModuleInfo
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Node type mappings per language
|
|
14
|
+
FUNCTION_TYPES = {
|
|
15
|
+
'python': ('function_definition', 'async_function_definition'),
|
|
16
|
+
'javascript': ('function_declaration', 'function_expression', 'arrow_function', 'method_definition'),
|
|
17
|
+
'typescript': ('function_declaration', 'function_expression', 'arrow_function', 'method_definition'),
|
|
18
|
+
'go': ('function_declaration', 'method_declaration'),
|
|
19
|
+
'rust': ('function_item', 'impl_item'),
|
|
20
|
+
'java': ('method_declaration', 'constructor_declaration'),
|
|
21
|
+
'c': ('function_definition',),
|
|
22
|
+
'cpp': ('function_definition', 'template_function'),
|
|
23
|
+
'csharp': ('method_declaration', 'constructor_declaration'),
|
|
24
|
+
'php': ('function_definition', 'method_declaration'),
|
|
25
|
+
'ruby': ('method', 'singleton_method'),
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
CLASS_TYPES = {
|
|
29
|
+
'python': ('class_definition',),
|
|
30
|
+
'javascript': ('class_declaration', 'class_expression'),
|
|
31
|
+
'typescript': ('class_declaration', 'class_expression', 'interface_declaration'),
|
|
32
|
+
'go': ('type_declaration',),
|
|
33
|
+
'rust': ('struct_item', 'enum_item', 'impl_item', 'trait_item'),
|
|
34
|
+
'java': ('class_declaration', 'interface_declaration', 'enum_declaration'),
|
|
35
|
+
'c': ('struct_specifier',),
|
|
36
|
+
'cpp': ('class_specifier', 'struct_specifier'),
|
|
37
|
+
'csharp': ('class_declaration', 'interface_declaration', 'struct_declaration'),
|
|
38
|
+
'php': ('class_declaration', 'interface_declaration', 'trait_declaration'),
|
|
39
|
+
'ruby': ('class', 'module'),
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
EXT_TO_LANG = {
|
|
43
|
+
'.py': 'python',
|
|
44
|
+
'.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
|
|
45
|
+
'.ts': 'typescript', '.tsx': 'typescript',
|
|
46
|
+
'.go': 'go',
|
|
47
|
+
'.rs': 'rust',
|
|
48
|
+
'.java': 'java',
|
|
49
|
+
'.c': 'c', '.h': 'c',
|
|
50
|
+
'.cpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp', '.hpp': 'cpp', '.hxx': 'cpp',
|
|
51
|
+
'.cs': 'csharp',
|
|
52
|
+
'.php': 'php',
|
|
53
|
+
'.rb': 'ruby',
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _get_node_text(node, source_bytes: bytes) -> str:
|
|
58
|
+
"""Extract text content of a node."""
|
|
59
|
+
return source_bytes[node.start_byte:node.end_byte].decode('utf-8', errors='replace')
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _find_name_node(node) -> Optional[Any]:
|
|
63
|
+
"""Find the name/identifier child of a node."""
|
|
64
|
+
for child in node.children:
|
|
65
|
+
if child.type in ('identifier', 'name', 'property_identifier', 'type_identifier'):
|
|
66
|
+
return child
|
|
67
|
+
# For method definitions, look for property_identifier
|
|
68
|
+
if child.type == 'property_identifier':
|
|
69
|
+
return child
|
|
70
|
+
# Fallback: look in named children
|
|
71
|
+
for child in node.children:
|
|
72
|
+
if 'name' in child.type or 'identifier' in child.type:
|
|
73
|
+
return child
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _extract_functions_ts(tree, source_bytes: bytes, lang: str,
|
|
78
|
+
module_name: str, file_path: str) -> Dict[str, FunctionInfo]:
|
|
79
|
+
"""Extract functions using tree-sitter traversal."""
|
|
80
|
+
functions = {}
|
|
81
|
+
func_types = FUNCTION_TYPES.get(lang, ())
|
|
82
|
+
|
|
83
|
+
def visit(node, class_context: Optional[str] = None):
|
|
84
|
+
if node.type in func_types:
|
|
85
|
+
name_node = _find_name_node(node)
|
|
86
|
+
if name_node:
|
|
87
|
+
name = _get_node_text(name_node, source_bytes)
|
|
88
|
+
if class_context:
|
|
89
|
+
qname = f"{module_name}.{class_context}.{name}"
|
|
90
|
+
else:
|
|
91
|
+
qname = f"{module_name}.{name}"
|
|
92
|
+
|
|
93
|
+
# Count lines
|
|
94
|
+
start_line = node.start_point[0] + 1
|
|
95
|
+
end_line = node.end_point[0] + 1
|
|
96
|
+
line_count = end_line - start_line + 1
|
|
97
|
+
|
|
98
|
+
functions[qname] = FunctionInfo(
|
|
99
|
+
name=name,
|
|
100
|
+
qualified_name=qname,
|
|
101
|
+
file=file_path,
|
|
102
|
+
line=start_line,
|
|
103
|
+
end_line=end_line,
|
|
104
|
+
line_count=line_count,
|
|
105
|
+
is_method=class_context is not None,
|
|
106
|
+
class_name=class_context,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Recurse, tracking class context
|
|
110
|
+
new_class = None
|
|
111
|
+
class_types = CLASS_TYPES.get(lang, ())
|
|
112
|
+
if node.type in class_types:
|
|
113
|
+
name_node = _find_name_node(node)
|
|
114
|
+
if name_node:
|
|
115
|
+
new_class = _get_node_text(name_node, source_bytes)
|
|
116
|
+
|
|
117
|
+
for child in node.children:
|
|
118
|
+
visit(child, new_class or class_context)
|
|
119
|
+
|
|
120
|
+
visit(tree.root_node)
|
|
121
|
+
return functions
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _extract_classes_ts(tree, source_bytes: bytes, lang: str,
|
|
125
|
+
module_name: str, file_path: str) -> Dict[str, ClassInfo]:
|
|
126
|
+
"""Extract classes using tree-sitter traversal."""
|
|
127
|
+
classes = {}
|
|
128
|
+
class_types = CLASS_TYPES.get(lang, ())
|
|
129
|
+
|
|
130
|
+
def visit(node):
|
|
131
|
+
if node.type in class_types:
|
|
132
|
+
name_node = _find_name_node(node)
|
|
133
|
+
if name_node:
|
|
134
|
+
name = _get_node_text(name_node, source_bytes)
|
|
135
|
+
qname = f"{module_name}.{name}"
|
|
136
|
+
start_line = node.start_point[0] + 1
|
|
137
|
+
end_line = node.end_point[0] + 1
|
|
138
|
+
|
|
139
|
+
classes[qname] = ClassInfo(
|
|
140
|
+
name=name,
|
|
141
|
+
qualified_name=qname,
|
|
142
|
+
file=file_path,
|
|
143
|
+
line=start_line,
|
|
144
|
+
end_line=end_line,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
for child in node.children:
|
|
148
|
+
visit(child)
|
|
149
|
+
|
|
150
|
+
visit(tree.root_node)
|
|
151
|
+
return classes
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def extract_declarations_ts(
|
|
155
|
+
tree,
|
|
156
|
+
source_bytes: bytes,
|
|
157
|
+
ext: str,
|
|
158
|
+
file_path: str,
|
|
159
|
+
module_name: str,
|
|
160
|
+
) -> Dict:
|
|
161
|
+
"""Extract all declarations from a tree-sitter tree.
|
|
162
|
+
|
|
163
|
+
Returns dict compatible with regex-based _extract_declarations.
|
|
164
|
+
"""
|
|
165
|
+
lang = EXT_TO_LANG.get(ext, 'generic')
|
|
166
|
+
|
|
167
|
+
functions = _extract_functions_ts(tree, source_bytes, lang, module_name, file_path)
|
|
168
|
+
classes = _extract_classes_ts(tree, source_bytes, lang, module_name, file_path)
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
'module': ModuleInfo(
|
|
172
|
+
name=module_name,
|
|
173
|
+
file=file_path,
|
|
174
|
+
is_package=Path(file_path).name in ('__init__.py', 'index.js', 'index.ts', 'mod.rs', 'lib.rs'),
|
|
175
|
+
),
|
|
176
|
+
'functions': functions,
|
|
177
|
+
'classes': classes,
|
|
178
|
+
'nodes': {},
|
|
179
|
+
'edges': [],
|
|
180
|
+
}
|