code2logic 1.0.43__tar.gz → 1.0.45__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code2logic-1.0.43 → code2logic-1.0.45}/PKG-INFO +50 -37
- {code2logic-1.0.43 → code2logic-1.0.45}/README.md +49 -36
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/__init__.py +1 -1
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/analyzer.py +45 -7
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/cli.py +21 -59
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/similarity.py +32 -6
- {code2logic-1.0.43 → code2logic-1.0.45}/pyproject.toml +1 -1
- {code2logic-1.0.43 → code2logic-1.0.45}/LICENSE +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/__main__.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/adaptive.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/base.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/base_generator.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/benchmark.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/benchmarks/__init__.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/benchmarks/common.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/benchmarks/results.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/benchmarks/runner.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/chunked_reproduction.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/code_review.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/config.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/core/__init__.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/dependency.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/errors.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/file_formats.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/formats/__init__.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/function_logic.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/generators.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/gherkin.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/integrations/__init__.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/intent.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/llm/__init__.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/llm.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/llm_clients.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/llm_profiler.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/logicml.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/markdown_format.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/mcp_server.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/metrics.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/models.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/parsers.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/project_comparison.md +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/project_reproducer.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/prompts.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/py.typed +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/quality.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/refactor.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/reproducer.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/reproduction.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/schemas/__init__.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/schemas/json_schema.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/schemas/logicml_schema.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/schemas/markdown_schema.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/schemas/yaml_schema.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/shared_utils.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/terminal.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/tools/__init__.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/toon_format.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/universal.py +0 -0
- {code2logic-1.0.43 → code2logic-1.0.45}/code2logic/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code2logic
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.45
|
|
4
4
|
Summary: Code2Logic - Source code to logical representation converter for LLM analysis, featuring Tree-sitter parsing, dependency graph analysis, and multi-language support.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -94,10 +94,18 @@ pip install code2logic[nlp] # Enhanced intents
|
|
|
94
94
|
|
|
95
95
|
## 📖 Quick Start
|
|
96
96
|
```bash
|
|
97
|
-
|
|
98
|
-
code2logic ./ -f toon --
|
|
99
|
-
|
|
100
|
-
|
|
97
|
+
# TOON compact (best token efficiency — 5.9x smaller than JSON)
|
|
98
|
+
code2logic ./ -f toon --compact --name project -o ./
|
|
99
|
+
|
|
100
|
+
# TOON with function-logic + structural context
|
|
101
|
+
code2logic ./ -f toon --compact --no-repeat-module \
|
|
102
|
+
--function-logic function.toon --function-logic-context minimal --name project -o ./
|
|
103
|
+
|
|
104
|
+
# TOON-Hybrid (project structure + function details for hub modules)
|
|
105
|
+
code2logic ./ -f toon --hybrid --no-repeat-module --name project -o ./
|
|
106
|
+
|
|
107
|
+
# YAML compact (human-readable, good compromise)
|
|
108
|
+
code2logic ./ -f yaml --compact --name project -o ./
|
|
101
109
|
```
|
|
102
110
|
|
|
103
111
|
### Command Line
|
|
@@ -312,14 +320,20 @@ Similar Functions:
|
|
|
312
320
|
|
|
313
321
|
```text
|
|
314
322
|
code2logic/
|
|
315
|
-
├── analyzer.py
|
|
316
|
-
├── parsers.py
|
|
317
|
-
├── dependency.py
|
|
318
|
-
├── similarity.py
|
|
319
|
-
├── intent.py
|
|
320
|
-
├── generators.py
|
|
321
|
-
├──
|
|
322
|
-
|
|
323
|
+
├── analyzer.py # Main orchestrator
|
|
324
|
+
├── parsers.py # Tree-sitter + fallback parser
|
|
325
|
+
├── dependency.py # NetworkX dependency analysis
|
|
326
|
+
├── similarity.py # Rapidfuzz similar detection
|
|
327
|
+
├── intent.py # NLP intent generation
|
|
328
|
+
├── generators.py # Output generators (MD/Compact/JSON/YAML/CSV)
|
|
329
|
+
├── toon_format.py # TOON generator (compact, hybrid)
|
|
330
|
+
├── logicml.py # LogicML generator (typed signatures)
|
|
331
|
+
├── function_logic.py # Function-logic TOON with structural context
|
|
332
|
+
├── metrics.py # AST-based quality metrics
|
|
333
|
+
├── models.py # Data structures
|
|
334
|
+
├── cli.py # Command-line interface
|
|
335
|
+
├── benchmarks/ # Benchmark runner, results, common utils
|
|
336
|
+
└── llm_clients.py # Unified LLM client (OpenRouter/Ollama/LiteLLM)
|
|
323
337
|
```
|
|
324
338
|
|
|
325
339
|
## 🔌 Integration Examples
|
|
@@ -415,40 +429,39 @@ Compact format is ~10-15x smaller than Markdown.
|
|
|
415
429
|
|
|
416
430
|
## 🔬 Code Reproduction Benchmarks
|
|
417
431
|
|
|
418
|
-
|
|
432
|
+
Benchmark results (20 files, model: `arcee-ai/trinity-large-preview`, 2026-02-25):
|
|
419
433
|
|
|
420
|
-
### Format Comparison
|
|
434
|
+
### Project Benchmark — Format Comparison
|
|
421
435
|
|
|
422
|
-
| Format | Score |
|
|
423
|
-
|
|
424
|
-
| **
|
|
425
|
-
|
|
|
426
|
-
|
|
|
427
|
-
|
|
|
436
|
+
| Format | Score | Syntax OK | Runs OK | ~Tokens | Efficiency (p/kT) |
|
|
437
|
+
|--------|------:|----------:|--------:|--------:|---------:|
|
|
438
|
+
| **toon** | **63,8%** | 100% | 60% | 17 875 | **3,57** |
|
|
439
|
+
| json | 62,9% | 100% | 60% | 104 914 | 0,60 |
|
|
440
|
+
| markdown | 62,5% | 100% | 55% | 36 851 | 1,70 |
|
|
441
|
+
| yaml | 62,4% | 100% | 55% | 68 651 | 0,91 |
|
|
442
|
+
| logicml | 60,4% | 100% | 55% | ~30 000 | ~2,01 |
|
|
443
|
+
| csv | 53,0% | 100% | 40% | 80 779 | 0,66 |
|
|
444
|
+
| function.toon | 49,3% | 95% | 35% | 29 271 | 1,68 |
|
|
445
|
+
| gherkin | 38,6% | 95% | 30% | ~25 000 | ~1,54 |
|
|
446
|
+
|
|
447
|
+
**Behavioral benchmark:** 85,7% (6/7 functions passed).
|
|
428
448
|
|
|
429
449
|
### Key Findings
|
|
430
450
|
|
|
431
|
-
- **
|
|
432
|
-
- **
|
|
433
|
-
- **
|
|
434
|
-
- **
|
|
451
|
+
- **TOON wins on efficiency** — best score (63,8%) at 5,9x fewer tokens than JSON
|
|
452
|
+
- **Syntax OK = 100%** for all major formats — LLM always generates valid syntax
|
|
453
|
+
- **function.toon paradox** — worse than project.toon despite larger file, due to missing class/module context (fixed in v1.0.43 with `--function-logic-context`)
|
|
454
|
+
- **gherkin/csv** — poor fit for code description, their structure doesn't map to programming constructs
|
|
435
455
|
|
|
436
456
|
### Run Benchmarks
|
|
437
457
|
|
|
438
458
|
```bash
|
|
439
|
-
#
|
|
440
|
-
python examples/11_token_benchmark.py --folder tests/samples/ --no-llm
|
|
441
|
-
|
|
442
|
-
# Async multi-format benchmark
|
|
443
|
-
python examples/09_async_benchmark.py --folder tests/samples/ --no-llm
|
|
444
|
-
|
|
445
|
-
# Function-level reproduction
|
|
446
|
-
python examples/10_function_reproduction.py --file tests/samples/sample_functions.py --no-llm
|
|
447
|
-
|
|
448
|
-
python examples/15_unified_benchmark.py --folder tests/samples/ --no-llm
|
|
459
|
+
make benchmark # Full benchmark suite (requires OPENROUTER_API_KEY)
|
|
449
460
|
|
|
450
|
-
#
|
|
451
|
-
python examples/
|
|
461
|
+
# Or individually:
|
|
462
|
+
python examples/15_unified_benchmark.py --type format --folder tests/samples/ --limit 20
|
|
463
|
+
python examples/15_unified_benchmark.py --type project --folder tests/samples/ --limit 20
|
|
464
|
+
python examples/15_unified_benchmark.py --type function --file tests/samples/sample_functions.py
|
|
452
465
|
```
|
|
453
466
|
|
|
454
467
|
## 🤝 Contributing
|
|
@@ -48,10 +48,18 @@ pip install code2logic[nlp] # Enhanced intents
|
|
|
48
48
|
|
|
49
49
|
## 📖 Quick Start
|
|
50
50
|
```bash
|
|
51
|
-
|
|
52
|
-
code2logic ./ -f toon --
|
|
53
|
-
|
|
54
|
-
|
|
51
|
+
# TOON compact (best token efficiency — 5.9x smaller than JSON)
|
|
52
|
+
code2logic ./ -f toon --compact --name project -o ./
|
|
53
|
+
|
|
54
|
+
# TOON with function-logic + structural context
|
|
55
|
+
code2logic ./ -f toon --compact --no-repeat-module \
|
|
56
|
+
--function-logic function.toon --function-logic-context minimal --name project -o ./
|
|
57
|
+
|
|
58
|
+
# TOON-Hybrid (project structure + function details for hub modules)
|
|
59
|
+
code2logic ./ -f toon --hybrid --no-repeat-module --name project -o ./
|
|
60
|
+
|
|
61
|
+
# YAML compact (human-readable, good compromise)
|
|
62
|
+
code2logic ./ -f yaml --compact --name project -o ./
|
|
55
63
|
```
|
|
56
64
|
|
|
57
65
|
### Command Line
|
|
@@ -266,14 +274,20 @@ Similar Functions:
|
|
|
266
274
|
|
|
267
275
|
```text
|
|
268
276
|
code2logic/
|
|
269
|
-
├── analyzer.py
|
|
270
|
-
├── parsers.py
|
|
271
|
-
├── dependency.py
|
|
272
|
-
├── similarity.py
|
|
273
|
-
├── intent.py
|
|
274
|
-
├── generators.py
|
|
275
|
-
├──
|
|
276
|
-
|
|
277
|
+
├── analyzer.py # Main orchestrator
|
|
278
|
+
├── parsers.py # Tree-sitter + fallback parser
|
|
279
|
+
├── dependency.py # NetworkX dependency analysis
|
|
280
|
+
├── similarity.py # Rapidfuzz similar detection
|
|
281
|
+
├── intent.py # NLP intent generation
|
|
282
|
+
├── generators.py # Output generators (MD/Compact/JSON/YAML/CSV)
|
|
283
|
+
├── toon_format.py # TOON generator (compact, hybrid)
|
|
284
|
+
├── logicml.py # LogicML generator (typed signatures)
|
|
285
|
+
├── function_logic.py # Function-logic TOON with structural context
|
|
286
|
+
├── metrics.py # AST-based quality metrics
|
|
287
|
+
├── models.py # Data structures
|
|
288
|
+
├── cli.py # Command-line interface
|
|
289
|
+
├── benchmarks/ # Benchmark runner, results, common utils
|
|
290
|
+
└── llm_clients.py # Unified LLM client (OpenRouter/Ollama/LiteLLM)
|
|
277
291
|
```
|
|
278
292
|
|
|
279
293
|
## 🔌 Integration Examples
|
|
@@ -369,40 +383,39 @@ Compact format is ~10-15x smaller than Markdown.
|
|
|
369
383
|
|
|
370
384
|
## 🔬 Code Reproduction Benchmarks
|
|
371
385
|
|
|
372
|
-
|
|
386
|
+
Benchmark results (20 files, model: `arcee-ai/trinity-large-preview`, 2026-02-25):
|
|
373
387
|
|
|
374
|
-
### Format Comparison
|
|
388
|
+
### Project Benchmark — Format Comparison
|
|
375
389
|
|
|
376
|
-
| Format | Score |
|
|
377
|
-
|
|
378
|
-
| **
|
|
379
|
-
|
|
|
380
|
-
|
|
|
381
|
-
|
|
|
390
|
+
| Format | Score | Syntax OK | Runs OK | ~Tokens | Efficiency (p/kT) |
|
|
391
|
+
|--------|------:|----------:|--------:|--------:|---------:|
|
|
392
|
+
| **toon** | **63,8%** | 100% | 60% | 17 875 | **3,57** |
|
|
393
|
+
| json | 62,9% | 100% | 60% | 104 914 | 0,60 |
|
|
394
|
+
| markdown | 62,5% | 100% | 55% | 36 851 | 1,70 |
|
|
395
|
+
| yaml | 62,4% | 100% | 55% | 68 651 | 0,91 |
|
|
396
|
+
| logicml | 60,4% | 100% | 55% | ~30 000 | ~2,01 |
|
|
397
|
+
| csv | 53,0% | 100% | 40% | 80 779 | 0,66 |
|
|
398
|
+
| function.toon | 49,3% | 95% | 35% | 29 271 | 1,68 |
|
|
399
|
+
| gherkin | 38,6% | 95% | 30% | ~25 000 | ~1,54 |
|
|
400
|
+
|
|
401
|
+
**Behavioral benchmark:** 85,7% (6/7 functions passed).
|
|
382
402
|
|
|
383
403
|
### Key Findings
|
|
384
404
|
|
|
385
|
-
- **
|
|
386
|
-
- **
|
|
387
|
-
- **
|
|
388
|
-
- **
|
|
405
|
+
- **TOON wins on efficiency** — best score (63,8%) at 5,9x fewer tokens than JSON
|
|
406
|
+
- **Syntax OK = 100%** for all major formats — LLM always generates valid syntax
|
|
407
|
+
- **function.toon paradox** — worse than project.toon despite larger file, due to missing class/module context (fixed in v1.0.43 with `--function-logic-context`)
|
|
408
|
+
- **gherkin/csv** — poor fit for code description, their structure doesn't map to programming constructs
|
|
389
409
|
|
|
390
410
|
### Run Benchmarks
|
|
391
411
|
|
|
392
412
|
```bash
|
|
393
|
-
#
|
|
394
|
-
python examples/11_token_benchmark.py --folder tests/samples/ --no-llm
|
|
395
|
-
|
|
396
|
-
# Async multi-format benchmark
|
|
397
|
-
python examples/09_async_benchmark.py --folder tests/samples/ --no-llm
|
|
398
|
-
|
|
399
|
-
# Function-level reproduction
|
|
400
|
-
python examples/10_function_reproduction.py --file tests/samples/sample_functions.py --no-llm
|
|
401
|
-
|
|
402
|
-
python examples/15_unified_benchmark.py --folder tests/samples/ --no-llm
|
|
413
|
+
make benchmark # Full benchmark suite (requires OPENROUTER_API_KEY)
|
|
403
414
|
|
|
404
|
-
#
|
|
405
|
-
python examples/
|
|
415
|
+
# Or individually:
|
|
416
|
+
python examples/15_unified_benchmark.py --type format --folder tests/samples/ --limit 20
|
|
417
|
+
python examples/15_unified_benchmark.py --type project --folder tests/samples/ --limit 20
|
|
418
|
+
python examples/15_unified_benchmark.py --type function --file tests/samples/sample_functions.py
|
|
406
419
|
```
|
|
407
420
|
|
|
408
421
|
## 🤝 Contributing
|
|
@@ -4,7 +4,9 @@ Main project analyzer orchestrating all analysis components.
|
|
|
4
4
|
Provides the high-level API for analyzing codebases.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import logging
|
|
7
8
|
import sys
|
|
9
|
+
import time
|
|
8
10
|
from collections import defaultdict
|
|
9
11
|
from datetime import datetime
|
|
10
12
|
from pathlib import Path
|
|
@@ -16,6 +18,8 @@ from .models import ModuleInfo, ProjectInfo
|
|
|
16
18
|
from .parsers import TREE_SITTER_AVAILABLE, TreeSitterParser, UniversalParser
|
|
17
19
|
from .similarity import RAPIDFUZZ_AVAILABLE, SimilarityDetector
|
|
18
20
|
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
19
23
|
|
|
20
24
|
class ProjectAnalyzer:
|
|
21
25
|
"""
|
|
@@ -103,6 +107,7 @@ class ProjectAnalyzer:
|
|
|
103
107
|
use_treesitter: bool = True,
|
|
104
108
|
verbose: bool = False,
|
|
105
109
|
include_private: bool = False,
|
|
110
|
+
enable_similarity: bool = True,
|
|
106
111
|
):
|
|
107
112
|
"""
|
|
108
113
|
Initialize the project analyzer.
|
|
@@ -112,10 +117,12 @@ class ProjectAnalyzer:
|
|
|
112
117
|
use_treesitter: Whether to use Tree-sitter for parsing
|
|
113
118
|
verbose: Whether to print status messages
|
|
114
119
|
include_private: Whether to include private functions/classes
|
|
120
|
+
enable_similarity: Whether to enable similarity detection
|
|
115
121
|
"""
|
|
116
122
|
self.root_path = Path(root_path).resolve()
|
|
117
123
|
self.verbose = verbose
|
|
118
124
|
self.include_private = include_private
|
|
125
|
+
self.enable_similarity = enable_similarity
|
|
119
126
|
self.modules: List[ModuleInfo] = []
|
|
120
127
|
self.languages: Dict[str, int] = defaultdict(int)
|
|
121
128
|
|
|
@@ -137,10 +144,10 @@ class ProjectAnalyzer:
|
|
|
137
144
|
def _print_status(self):
|
|
138
145
|
"""Print library availability status."""
|
|
139
146
|
parts = []
|
|
140
|
-
parts.append("TS
|
|
141
|
-
parts.append("NX
|
|
142
|
-
parts.append("RF
|
|
143
|
-
parts.append("NLP
|
|
147
|
+
parts.append("TS" if TREE_SITTER_AVAILABLE else "TS")
|
|
148
|
+
parts.append("NX" if NETWORKX_AVAILABLE else "NX")
|
|
149
|
+
parts.append("RF" if RAPIDFUZZ_AVAILABLE else "RF")
|
|
150
|
+
parts.append("NLP" if (SPACY_AVAILABLE or NLTK_AVAILABLE) else "NLP")
|
|
144
151
|
print(f"Libs: {' '.join(parts)}", file=sys.stderr)
|
|
145
152
|
|
|
146
153
|
def analyze(self) -> ProjectInfo:
|
|
@@ -150,18 +157,49 @@ class ProjectAnalyzer:
|
|
|
150
157
|
Returns:
|
|
151
158
|
ProjectInfo with complete analysis results
|
|
152
159
|
"""
|
|
160
|
+
analyze_start = time.time()
|
|
161
|
+
|
|
153
162
|
# Scan and parse files
|
|
163
|
+
t0 = time.time()
|
|
154
164
|
self._scan_files()
|
|
165
|
+
t_scan = time.time() - t0
|
|
166
|
+
if self.verbose:
|
|
167
|
+
log.info(
|
|
168
|
+
"Scan complete: modules=%d languages=%s time=%.2fs",
|
|
169
|
+
len(self.modules),
|
|
170
|
+
dict(self.languages),
|
|
171
|
+
t_scan,
|
|
172
|
+
)
|
|
155
173
|
|
|
156
174
|
# Build dependency graph
|
|
175
|
+
t0 = time.time()
|
|
157
176
|
dep_graph = self.dep_analyzer.build_graph(self.modules)
|
|
158
177
|
dep_metrics = self.dep_analyzer.analyze_metrics()
|
|
178
|
+
t_dep = time.time() - t0
|
|
179
|
+
if self.verbose:
|
|
180
|
+
log.info("Dependency analysis complete: nodes=%d time=%.2fs", len(dep_graph or {}), t_dep)
|
|
159
181
|
|
|
160
182
|
# Detect entry points
|
|
183
|
+
t0 = time.time()
|
|
161
184
|
entrypoints = self._detect_entrypoints()
|
|
185
|
+
t_ep = time.time() - t0
|
|
186
|
+
if self.verbose:
|
|
187
|
+
log.info("Entrypoint detection complete: entrypoints=%d time=%.2fs", len(entrypoints), t_ep)
|
|
162
188
|
|
|
163
189
|
# Find similar functions
|
|
164
|
-
similar =
|
|
190
|
+
similar: Dict[str, List[str]] = {}
|
|
191
|
+
if self.enable_similarity:
|
|
192
|
+
t0 = time.time()
|
|
193
|
+
similar = self.sim_detector.find_similar_functions(self.modules)
|
|
194
|
+
t_sim = time.time() - t0
|
|
195
|
+
if self.verbose:
|
|
196
|
+
log.info("Similarity detection complete: matches=%d time=%.2fs", len(similar), t_sim)
|
|
197
|
+
else:
|
|
198
|
+
if self.verbose:
|
|
199
|
+
log.info("Similarity detection skipped (--no-similarity)")
|
|
200
|
+
|
|
201
|
+
if self.verbose:
|
|
202
|
+
log.info("Total analysis time: %.2fs", time.time() - analyze_start)
|
|
165
203
|
|
|
166
204
|
return ProjectInfo(
|
|
167
205
|
name=self.root_path.name,
|
|
@@ -221,14 +259,14 @@ class ProjectAnalyzer:
|
|
|
221
259
|
module = self.ts_parser.parse(rel_path, content, language)
|
|
222
260
|
except Exception as e:
|
|
223
261
|
if self.verbose:
|
|
224
|
-
|
|
262
|
+
log.debug("Tree-sitter parser failed for %s: %s", rel_path, e)
|
|
225
263
|
|
|
226
264
|
if module is None:
|
|
227
265
|
try:
|
|
228
266
|
module = self.fallback_parser.parse(rel_path, content, language)
|
|
229
267
|
except Exception as e:
|
|
230
268
|
if self.verbose:
|
|
231
|
-
|
|
269
|
+
log.debug("Fallback parser failed for %s: %s", rel_path, e)
|
|
232
270
|
continue
|
|
233
271
|
|
|
234
272
|
if module:
|
|
@@ -10,6 +10,7 @@ Usage:
|
|
|
10
10
|
|
|
11
11
|
import argparse
|
|
12
12
|
import json
|
|
13
|
+
import logging
|
|
13
14
|
import os
|
|
14
15
|
import signal
|
|
15
16
|
import subprocess
|
|
@@ -508,24 +509,12 @@ def _code2logic_llm_cli(argv: list[str]) -> None:
|
|
|
508
509
|
return
|
|
509
510
|
|
|
510
511
|
|
|
511
|
-
def main():
|
|
512
|
-
"""Main CLI entry point."""
|
|
513
|
-
cli_start = time.time()
|
|
514
|
-
|
|
515
|
-
try:
|
|
516
|
-
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
|
517
|
-
except Exception:
|
|
518
|
-
pass
|
|
519
|
-
|
|
520
|
-
if len(sys.argv) > 1 and sys.argv[1] == 'llm':
|
|
521
|
-
_code2logic_llm_cli(sys.argv[2:])
|
|
522
|
-
return
|
|
523
|
-
|
|
512
|
+
def main(argv=None):
|
|
524
513
|
parser = argparse.ArgumentParser(
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
514
|
+
description='Analyze source code and generate logical representations',
|
|
515
|
+
formatter_class=argparse.RawDescriptionHelpFormatter
|
|
516
|
+
)
|
|
517
|
+
epilog='''
|
|
529
518
|
Examples:
|
|
530
519
|
code2logic /path/to/project # Standard Markdown
|
|
531
520
|
code2logic /path/to/project -f csv # CSV (best for LLM, ~50% smaller)
|
|
@@ -551,41 +540,6 @@ Detail levels (columns in csv/json/yaml):
|
|
|
551
540
|
standard - + intent, category, domain, imports (8 columns)
|
|
552
541
|
full - + calls, lines, complexity, hash (16 columns)
|
|
553
542
|
'''
|
|
554
|
-
)
|
|
555
|
-
|
|
556
|
-
def _maybe_print_pretty_help() -> bool:
|
|
557
|
-
"""Print colorized help as markdown when appropriate.
|
|
558
|
-
|
|
559
|
-
Returns True if help was printed and the CLI should exit early.
|
|
560
|
-
"""
|
|
561
|
-
force_pretty = os.environ.get("CODE2LOGIC_PRETTY_HELP") == "1" or bool(os.environ.get("FORCE_COLOR"))
|
|
562
|
-
if not force_pretty:
|
|
563
|
-
if not hasattr(sys.stdout, "isatty") or not sys.stdout.isatty():
|
|
564
|
-
return False
|
|
565
|
-
try:
|
|
566
|
-
from .terminal import render
|
|
567
|
-
except Exception:
|
|
568
|
-
return False
|
|
569
|
-
|
|
570
|
-
help_md = f"""# code2logic
|
|
571
|
-
|
|
572
|
-
Convert source code to logical representation for LLM analysis.
|
|
573
|
-
|
|
574
|
-
## Usage
|
|
575
|
-
|
|
576
|
-
```bash
|
|
577
|
-
code2logic [path] [options]
|
|
578
|
-
```
|
|
579
|
-
|
|
580
|
-
## Help
|
|
581
|
-
|
|
582
|
-
```text
|
|
583
|
-
{parser.format_help().rstrip()}
|
|
584
|
-
```
|
|
585
|
-
"""
|
|
586
|
-
render.markdown(help_md)
|
|
587
|
-
return True
|
|
588
|
-
|
|
589
543
|
parser.add_argument(
|
|
590
544
|
'path',
|
|
591
545
|
nargs='?',
|
|
@@ -690,6 +644,11 @@ code2logic [path] [options]
|
|
|
690
644
|
action='store_true',
|
|
691
645
|
help='Disable Tree-sitter (use fallback parser)'
|
|
692
646
|
)
|
|
647
|
+
parser.add_argument(
|
|
648
|
+
'--no-similarity',
|
|
649
|
+
action='store_true',
|
|
650
|
+
help='Disable similarity detection (RapidFuzz) to speed up analysis on large projects'
|
|
651
|
+
)
|
|
693
652
|
parser.add_argument(
|
|
694
653
|
'-v', '--verbose',
|
|
695
654
|
action='store_true',
|
|
@@ -732,11 +691,10 @@ code2logic [path] [options]
|
|
|
732
691
|
)
|
|
733
692
|
|
|
734
693
|
if len(sys.argv) == 1 or any(a in ("-h", "--help") for a in sys.argv[1:]):
|
|
735
|
-
|
|
736
|
-
parser.print_help()
|
|
694
|
+
parser.print_help()
|
|
737
695
|
return
|
|
738
696
|
|
|
739
|
-
args = parser.parse_args()
|
|
697
|
+
args = parser.parse_args(argv)
|
|
740
698
|
|
|
741
699
|
if not args.no_install and os.environ.get("CODE2LOGIC_NO_INSTALL") in ("1", "true", "True", "yes", "YES"):
|
|
742
700
|
args.no_install = True
|
|
@@ -750,6 +708,11 @@ code2logic [path] [options]
|
|
|
750
708
|
# Initialize logger
|
|
751
709
|
log = Logger(verbose=args.verbose, debug=args.debug)
|
|
752
710
|
|
|
711
|
+
logging.basicConfig(
|
|
712
|
+
level=(logging.DEBUG if args.debug else (logging.INFO if args.verbose else logging.WARNING)),
|
|
713
|
+
format='[%(levelname)s] %(message)s',
|
|
714
|
+
)
|
|
715
|
+
|
|
753
716
|
if args.verbose and not args.quiet:
|
|
754
717
|
log.header("CODE2LOGIC")
|
|
755
718
|
log.detail(f"Version: {__version__}")
|
|
@@ -842,9 +805,7 @@ code2logic [path] [options]
|
|
|
842
805
|
|
|
843
806
|
# Path is required for analysis
|
|
844
807
|
if args.path is None:
|
|
845
|
-
|
|
846
|
-
if not _maybe_print_pretty_help():
|
|
847
|
-
parser.print_help()
|
|
808
|
+
parser.print_help()
|
|
848
809
|
return
|
|
849
810
|
|
|
850
811
|
# Validate path
|
|
@@ -865,7 +826,8 @@ code2logic [path] [options]
|
|
|
865
826
|
analyzer = ProjectAnalyzer(
|
|
866
827
|
args.path,
|
|
867
828
|
use_treesitter=not args.no_treesitter,
|
|
868
|
-
verbose=args.debug
|
|
829
|
+
verbose=args.verbose or args.debug,
|
|
830
|
+
enable_similarity=not args.no_similarity,
|
|
869
831
|
)
|
|
870
832
|
project = analyzer.analyze()
|
|
871
833
|
analyze_time = time.time() - analyze_start
|
|
@@ -4,11 +4,15 @@ Similarity detector using Rapidfuzz.
|
|
|
4
4
|
Detects similar functions across modules to identify
|
|
5
5
|
potential duplicates and refactoring opportunities.
|
|
6
6
|
"""
|
|
7
|
-
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
from collections import defaultdict
|
|
8
10
|
from typing import Dict, List
|
|
9
11
|
|
|
10
12
|
from .models import ModuleInfo
|
|
11
13
|
|
|
14
|
+
log = logging.getLogger(__name__)
|
|
15
|
+
|
|
12
16
|
# Optional Rapidfuzz import
|
|
13
17
|
RAPIDFUZZ_AVAILABLE = False
|
|
14
18
|
try:
|
|
@@ -43,6 +47,8 @@ class SimilarityDetector:
|
|
|
43
47
|
threshold: Minimum similarity score (0-100) to consider as similar
|
|
44
48
|
"""
|
|
45
49
|
self.threshold = threshold
|
|
50
|
+
self.max_functions = 8000
|
|
51
|
+
self.progress_every = 250
|
|
46
52
|
|
|
47
53
|
def find_similar_functions(self, modules: List[ModuleInfo]) -> Dict[str, List[str]]:
|
|
48
54
|
"""
|
|
@@ -58,6 +64,8 @@ class SimilarityDetector:
|
|
|
58
64
|
if not RAPIDFUZZ_AVAILABLE:
|
|
59
65
|
return {}
|
|
60
66
|
|
|
67
|
+
start = time.time()
|
|
68
|
+
|
|
61
69
|
# Collect all functions
|
|
62
70
|
all_funcs: List[dict] = []
|
|
63
71
|
for m in modules:
|
|
@@ -76,15 +84,35 @@ class SimilarityDetector:
|
|
|
76
84
|
if len(all_funcs) < 2:
|
|
77
85
|
return {}
|
|
78
86
|
|
|
87
|
+
if len(all_funcs) > self.max_functions:
|
|
88
|
+
log.warning(
|
|
89
|
+
"Skipping similarity detection: too many functions (%d > %d). Use --no-similarity to silence this.",
|
|
90
|
+
len(all_funcs),
|
|
91
|
+
self.max_functions,
|
|
92
|
+
)
|
|
93
|
+
return {}
|
|
94
|
+
|
|
79
95
|
# Find similar functions
|
|
80
96
|
similar: Dict[str, List[str]] = {}
|
|
81
97
|
names = [f['name'] for f in all_funcs]
|
|
82
98
|
|
|
99
|
+
name_to_fulls: Dict[str, List[str]] = defaultdict(list)
|
|
100
|
+
for f in all_funcs:
|
|
101
|
+
name_to_fulls[f['name']].append(f['full'])
|
|
102
|
+
|
|
83
103
|
for i, func in enumerate(all_funcs):
|
|
84
104
|
# Skip common names that would produce false positives
|
|
85
105
|
if func['name'] in ('__init__', 'constructor', 'toString', 'valueOf'):
|
|
86
106
|
continue
|
|
87
107
|
|
|
108
|
+
if i > 0 and (i % self.progress_every) == 0:
|
|
109
|
+
log.debug(
|
|
110
|
+
"Similarity progress: %d/%d (%.2fs)",
|
|
111
|
+
i,
|
|
112
|
+
len(all_funcs),
|
|
113
|
+
time.time() - start,
|
|
114
|
+
)
|
|
115
|
+
|
|
88
116
|
matches = process.extract(
|
|
89
117
|
func['name'],
|
|
90
118
|
names[:i] + names[i+1:],
|
|
@@ -95,15 +123,13 @@ class SimilarityDetector:
|
|
|
95
123
|
sim_list = []
|
|
96
124
|
for match_name, score, _ in matches:
|
|
97
125
|
if score >= self.threshold and match_name != func['name']:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if other['name'] == match_name:
|
|
101
|
-
sim_list.append(f"{other['full']} ({score}%)")
|
|
102
|
-
break
|
|
126
|
+
for full in name_to_fulls.get(match_name, [])[:3]:
|
|
127
|
+
sim_list.append(f"{full} ({score}%)")
|
|
103
128
|
|
|
104
129
|
if sim_list:
|
|
105
130
|
similar[func['full']] = sim_list
|
|
106
131
|
|
|
132
|
+
log.debug("Similarity finished: funcs=%d matches=%d time=%.2fs", len(all_funcs), len(similar), time.time() - start)
|
|
107
133
|
return similar
|
|
108
134
|
|
|
109
135
|
def find_duplicate_signatures(self, modules: List[ModuleInfo]) -> Dict[str, List[str]]:
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "code2logic"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.45"
|
|
8
8
|
description = "Code2Logic - Source code to logical representation converter for LLM analysis, featuring Tree-sitter parsing, dependency graph analysis, and multi-language support."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "Apache-2.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|