code2logic 1.0.44__tar.gz → 1.0.46__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code2logic-1.0.44 → code2logic-1.0.46}/PKG-INFO +1 -1
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/__init__.py +1 -1
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/analyzer.py +117 -52
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/cli.py +21 -58
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/similarity.py +32 -6
- {code2logic-1.0.44 → code2logic-1.0.46}/pyproject.toml +1 -1
- {code2logic-1.0.44 → code2logic-1.0.46}/LICENSE +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/README.md +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/__main__.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/adaptive.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/base.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/base_generator.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/benchmark.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/benchmarks/__init__.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/benchmarks/common.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/benchmarks/results.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/benchmarks/runner.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/chunked_reproduction.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/code_review.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/config.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/core/__init__.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/dependency.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/errors.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/file_formats.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/formats/__init__.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/function_logic.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/generators.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/gherkin.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/integrations/__init__.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/intent.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/llm/__init__.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/llm.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/llm_clients.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/llm_profiler.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/logicml.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/markdown_format.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/mcp_server.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/metrics.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/models.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/parsers.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/project_comparison.md +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/project_reproducer.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/prompts.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/py.typed +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/quality.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/refactor.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/reproducer.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/reproduction.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/schemas/__init__.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/schemas/json_schema.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/schemas/logicml_schema.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/schemas/markdown_schema.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/schemas/yaml_schema.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/shared_utils.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/terminal.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/tools/__init__.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/toon_format.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/universal.py +0 -0
- {code2logic-1.0.44 → code2logic-1.0.46}/code2logic/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code2logic
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.46
|
|
4
4
|
Summary: Code2Logic - Source code to logical representation converter for LLM analysis, featuring Tree-sitter parsing, dependency graph analysis, and multi-language support.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -4,7 +4,10 @@ Main project analyzer orchestrating all analysis components.
|
|
|
4
4
|
Provides the high-level API for analyzing codebases.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
7
9
|
import sys
|
|
10
|
+
import time
|
|
8
11
|
from collections import defaultdict
|
|
9
12
|
from datetime import datetime
|
|
10
13
|
from pathlib import Path
|
|
@@ -16,6 +19,8 @@ from .models import ModuleInfo, ProjectInfo
|
|
|
16
19
|
from .parsers import TREE_SITTER_AVAILABLE, TreeSitterParser, UniversalParser
|
|
17
20
|
from .similarity import RAPIDFUZZ_AVAILABLE, SimilarityDetector
|
|
18
21
|
|
|
22
|
+
log = logging.getLogger(__name__)
|
|
23
|
+
|
|
19
24
|
|
|
20
25
|
class ProjectAnalyzer:
|
|
21
26
|
"""
|
|
@@ -103,6 +108,7 @@ class ProjectAnalyzer:
|
|
|
103
108
|
use_treesitter: bool = True,
|
|
104
109
|
verbose: bool = False,
|
|
105
110
|
include_private: bool = False,
|
|
111
|
+
enable_similarity: bool = True,
|
|
106
112
|
):
|
|
107
113
|
"""
|
|
108
114
|
Initialize the project analyzer.
|
|
@@ -112,10 +118,12 @@ class ProjectAnalyzer:
|
|
|
112
118
|
use_treesitter: Whether to use Tree-sitter for parsing
|
|
113
119
|
verbose: Whether to print status messages
|
|
114
120
|
include_private: Whether to include private functions/classes
|
|
121
|
+
enable_similarity: Whether to enable similarity detection
|
|
115
122
|
"""
|
|
116
123
|
self.root_path = Path(root_path).resolve()
|
|
117
124
|
self.verbose = verbose
|
|
118
125
|
self.include_private = include_private
|
|
126
|
+
self.enable_similarity = enable_similarity
|
|
119
127
|
self.modules: List[ModuleInfo] = []
|
|
120
128
|
self.languages: Dict[str, int] = defaultdict(int)
|
|
121
129
|
|
|
@@ -137,10 +145,10 @@ class ProjectAnalyzer:
|
|
|
137
145
|
def _print_status(self):
|
|
138
146
|
"""Print library availability status."""
|
|
139
147
|
parts = []
|
|
140
|
-
parts.append("TS
|
|
141
|
-
parts.append("NX
|
|
142
|
-
parts.append("RF
|
|
143
|
-
parts.append("NLP
|
|
148
|
+
parts.append("TS" if TREE_SITTER_AVAILABLE else "TS")
|
|
149
|
+
parts.append("NX" if NETWORKX_AVAILABLE else "NX")
|
|
150
|
+
parts.append("RF" if RAPIDFUZZ_AVAILABLE else "RF")
|
|
151
|
+
parts.append("NLP" if (SPACY_AVAILABLE or NLTK_AVAILABLE) else "NLP")
|
|
144
152
|
print(f"Libs: {' '.join(parts)}", file=sys.stderr)
|
|
145
153
|
|
|
146
154
|
def analyze(self) -> ProjectInfo:
|
|
@@ -150,18 +158,49 @@ class ProjectAnalyzer:
|
|
|
150
158
|
Returns:
|
|
151
159
|
ProjectInfo with complete analysis results
|
|
152
160
|
"""
|
|
161
|
+
analyze_start = time.time()
|
|
162
|
+
|
|
153
163
|
# Scan and parse files
|
|
164
|
+
t0 = time.time()
|
|
154
165
|
self._scan_files()
|
|
166
|
+
t_scan = time.time() - t0
|
|
167
|
+
if self.verbose:
|
|
168
|
+
log.info(
|
|
169
|
+
"Scan complete: modules=%d languages=%s time=%.2fs",
|
|
170
|
+
len(self.modules),
|
|
171
|
+
dict(self.languages),
|
|
172
|
+
t_scan,
|
|
173
|
+
)
|
|
155
174
|
|
|
156
175
|
# Build dependency graph
|
|
176
|
+
t0 = time.time()
|
|
157
177
|
dep_graph = self.dep_analyzer.build_graph(self.modules)
|
|
158
178
|
dep_metrics = self.dep_analyzer.analyze_metrics()
|
|
179
|
+
t_dep = time.time() - t0
|
|
180
|
+
if self.verbose:
|
|
181
|
+
log.info("Dependency analysis complete: nodes=%d time=%.2fs", len(dep_graph or {}), t_dep)
|
|
159
182
|
|
|
160
183
|
# Detect entry points
|
|
184
|
+
t0 = time.time()
|
|
161
185
|
entrypoints = self._detect_entrypoints()
|
|
186
|
+
t_ep = time.time() - t0
|
|
187
|
+
if self.verbose:
|
|
188
|
+
log.info("Entrypoint detection complete: entrypoints=%d time=%.2fs", len(entrypoints), t_ep)
|
|
162
189
|
|
|
163
190
|
# Find similar functions
|
|
164
|
-
similar =
|
|
191
|
+
similar: Dict[str, List[str]] = {}
|
|
192
|
+
if self.enable_similarity:
|
|
193
|
+
t0 = time.time()
|
|
194
|
+
similar = self.sim_detector.find_similar_functions(self.modules)
|
|
195
|
+
t_sim = time.time() - t0
|
|
196
|
+
if self.verbose:
|
|
197
|
+
log.info("Similarity detection complete: matches=%d time=%.2fs", len(similar), t_sim)
|
|
198
|
+
else:
|
|
199
|
+
if self.verbose:
|
|
200
|
+
log.info("Similarity detection skipped (--no-similarity)")
|
|
201
|
+
|
|
202
|
+
if self.verbose:
|
|
203
|
+
log.info("Total analysis time: %.2fs", time.time() - analyze_start)
|
|
165
204
|
|
|
166
205
|
return ProjectInfo(
|
|
167
206
|
name=self.root_path.name,
|
|
@@ -180,63 +219,89 @@ class ProjectAnalyzer:
|
|
|
180
219
|
|
|
181
220
|
def _scan_files(self):
|
|
182
221
|
"""Scan and parse all source files."""
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
if language is None and ext == '':
|
|
198
|
-
try:
|
|
199
|
-
with fp.open('r', encoding='utf-8', errors='ignore') as f:
|
|
200
|
-
language = self._language_from_shebang(f.readline())
|
|
201
|
-
except Exception:
|
|
202
|
-
language = None
|
|
203
|
-
|
|
204
|
-
if language is None:
|
|
205
|
-
continue
|
|
222
|
+
scan_start = time.time()
|
|
223
|
+
files_seen = 0
|
|
224
|
+
files_parsed = 0
|
|
225
|
+
files_matched = 0
|
|
226
|
+
scan_progress_every = 500
|
|
227
|
+
|
|
228
|
+
for root, dirnames, filenames in os.walk(self.root_path):
|
|
229
|
+
dirnames[:] = [d for d in dirnames if d not in self.IGNORE_DIRS]
|
|
230
|
+
for filename in filenames:
|
|
231
|
+
files_seen += 1
|
|
232
|
+
fp = Path(root) / filename
|
|
233
|
+
|
|
234
|
+
if filename in self.IGNORE_FILES:
|
|
235
|
+
continue
|
|
206
236
|
|
|
207
|
-
|
|
237
|
+
ext = fp.suffix.lower()
|
|
238
|
+
language = self.LANGUAGE_EXTENSIONS.get(ext)
|
|
239
|
+
if language is None and ext == '':
|
|
240
|
+
try:
|
|
241
|
+
with fp.open('r', encoding='utf-8', errors='ignore') as f:
|
|
242
|
+
language = self._language_from_shebang(f.readline())
|
|
243
|
+
except Exception:
|
|
244
|
+
language = None
|
|
208
245
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
content = fp.read_text(encoding='utf-8', errors='ignore')
|
|
212
|
-
except Exception:
|
|
213
|
-
continue
|
|
246
|
+
if language is None:
|
|
247
|
+
continue
|
|
214
248
|
|
|
215
|
-
|
|
249
|
+
files_matched += 1
|
|
250
|
+
self.languages[language] += 1
|
|
216
251
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
252
|
+
if self.verbose and files_seen > 0 and (files_seen % scan_progress_every) == 0:
|
|
253
|
+
log.info(
|
|
254
|
+
"Scan progress: seen=%d matched=%d parsed=%d modules=%d time=%.2fs",
|
|
255
|
+
files_seen,
|
|
256
|
+
files_matched,
|
|
257
|
+
files_parsed,
|
|
258
|
+
len(self.modules),
|
|
259
|
+
time.time() - scan_start,
|
|
260
|
+
)
|
|
225
261
|
|
|
226
|
-
if module is None:
|
|
227
262
|
try:
|
|
228
|
-
|
|
229
|
-
except Exception
|
|
230
|
-
if self.verbose:
|
|
231
|
-
print(f"Fallback parser failed for {rel_path}: {e}", file=sys.stderr)
|
|
263
|
+
content = fp.read_text(encoding='utf-8', errors='ignore')
|
|
264
|
+
except Exception:
|
|
232
265
|
continue
|
|
233
266
|
|
|
234
|
-
if module:
|
|
235
267
|
try:
|
|
236
|
-
|
|
268
|
+
rel_path = str(fp.relative_to(self.root_path))
|
|
237
269
|
except Exception:
|
|
238
|
-
|
|
239
|
-
|
|
270
|
+
rel_path = str(fp)
|
|
271
|
+
|
|
272
|
+
module = None
|
|
273
|
+
try:
|
|
274
|
+
if self.ts_parser and self.ts_parser.is_available(language):
|
|
275
|
+
module = self.ts_parser.parse(rel_path, content, language)
|
|
276
|
+
except Exception as e:
|
|
277
|
+
if self.verbose:
|
|
278
|
+
log.debug("Tree-sitter parser failed for %s: %s", rel_path, e)
|
|
279
|
+
|
|
280
|
+
if module is None:
|
|
281
|
+
try:
|
|
282
|
+
module = self.fallback_parser.parse(rel_path, content, language)
|
|
283
|
+
except Exception as e:
|
|
284
|
+
if self.verbose:
|
|
285
|
+
log.debug("Fallback parser failed for %s: %s", rel_path, e)
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
if module:
|
|
289
|
+
files_parsed += 1
|
|
290
|
+
try:
|
|
291
|
+
module.file_bytes = fp.stat().st_size
|
|
292
|
+
except Exception:
|
|
293
|
+
module.file_bytes = len(content.encode('utf-8', errors='ignore'))
|
|
294
|
+
self.modules.append(module)
|
|
295
|
+
|
|
296
|
+
if self.verbose:
|
|
297
|
+
log.info(
|
|
298
|
+
"Scan finished: seen=%d matched=%d parsed=%d modules=%d time=%.2fs",
|
|
299
|
+
files_seen,
|
|
300
|
+
files_matched,
|
|
301
|
+
files_parsed,
|
|
302
|
+
len(self.modules),
|
|
303
|
+
time.time() - scan_start,
|
|
304
|
+
)
|
|
240
305
|
|
|
241
306
|
def _detect_entrypoints(self) -> List[str]:
|
|
242
307
|
"""Detect project entry points."""
|
|
@@ -10,6 +10,7 @@ Usage:
|
|
|
10
10
|
|
|
11
11
|
import argparse
|
|
12
12
|
import json
|
|
13
|
+
import logging
|
|
13
14
|
import os
|
|
14
15
|
import signal
|
|
15
16
|
import subprocess
|
|
@@ -508,24 +509,13 @@ def _code2logic_llm_cli(argv: list[str]) -> None:
|
|
|
508
509
|
return
|
|
509
510
|
|
|
510
511
|
|
|
511
|
-
def main():
|
|
512
|
-
"""Main CLI entry point."""
|
|
512
|
+
def main(argv=None):
|
|
513
513
|
cli_start = time.time()
|
|
514
|
-
|
|
515
|
-
try:
|
|
516
|
-
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
|
517
|
-
except Exception:
|
|
518
|
-
pass
|
|
519
|
-
|
|
520
|
-
if len(sys.argv) > 1 and sys.argv[1] == 'llm':
|
|
521
|
-
_code2logic_llm_cli(sys.argv[2:])
|
|
522
|
-
return
|
|
523
|
-
|
|
524
514
|
parser = argparse.ArgumentParser(
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
515
|
+
description='Analyze source code and generate logical representations',
|
|
516
|
+
formatter_class=argparse.RawDescriptionHelpFormatter
|
|
517
|
+
)
|
|
518
|
+
epilog='''
|
|
529
519
|
Examples:
|
|
530
520
|
code2logic /path/to/project # Standard Markdown
|
|
531
521
|
code2logic /path/to/project -f csv # CSV (best for LLM, ~50% smaller)
|
|
@@ -551,41 +541,6 @@ Detail levels (columns in csv/json/yaml):
|
|
|
551
541
|
standard - + intent, category, domain, imports (8 columns)
|
|
552
542
|
full - + calls, lines, complexity, hash (16 columns)
|
|
553
543
|
'''
|
|
554
|
-
)
|
|
555
|
-
|
|
556
|
-
def _maybe_print_pretty_help() -> bool:
|
|
557
|
-
"""Print colorized help as markdown when appropriate.
|
|
558
|
-
|
|
559
|
-
Returns True if help was printed and the CLI should exit early.
|
|
560
|
-
"""
|
|
561
|
-
force_pretty = os.environ.get("CODE2LOGIC_PRETTY_HELP") == "1" or bool(os.environ.get("FORCE_COLOR"))
|
|
562
|
-
if not force_pretty:
|
|
563
|
-
if not hasattr(sys.stdout, "isatty") or not sys.stdout.isatty():
|
|
564
|
-
return False
|
|
565
|
-
try:
|
|
566
|
-
from .terminal import render
|
|
567
|
-
except Exception:
|
|
568
|
-
return False
|
|
569
|
-
|
|
570
|
-
help_md = f"""# code2logic
|
|
571
|
-
|
|
572
|
-
Convert source code to logical representation for LLM analysis.
|
|
573
|
-
|
|
574
|
-
## Usage
|
|
575
|
-
|
|
576
|
-
```bash
|
|
577
|
-
code2logic [path] [options]
|
|
578
|
-
```
|
|
579
|
-
|
|
580
|
-
## Help
|
|
581
|
-
|
|
582
|
-
```text
|
|
583
|
-
{parser.format_help().rstrip()}
|
|
584
|
-
```
|
|
585
|
-
"""
|
|
586
|
-
render.markdown(help_md)
|
|
587
|
-
return True
|
|
588
|
-
|
|
589
544
|
parser.add_argument(
|
|
590
545
|
'path',
|
|
591
546
|
nargs='?',
|
|
@@ -690,6 +645,11 @@ code2logic [path] [options]
|
|
|
690
645
|
action='store_true',
|
|
691
646
|
help='Disable Tree-sitter (use fallback parser)'
|
|
692
647
|
)
|
|
648
|
+
parser.add_argument(
|
|
649
|
+
'--no-similarity',
|
|
650
|
+
action='store_true',
|
|
651
|
+
help='Disable similarity detection (RapidFuzz) to speed up analysis on large projects'
|
|
652
|
+
)
|
|
693
653
|
parser.add_argument(
|
|
694
654
|
'-v', '--verbose',
|
|
695
655
|
action='store_true',
|
|
@@ -732,11 +692,10 @@ code2logic [path] [options]
|
|
|
732
692
|
)
|
|
733
693
|
|
|
734
694
|
if len(sys.argv) == 1 or any(a in ("-h", "--help") for a in sys.argv[1:]):
|
|
735
|
-
|
|
736
|
-
parser.print_help()
|
|
695
|
+
parser.print_help()
|
|
737
696
|
return
|
|
738
697
|
|
|
739
|
-
args = parser.parse_args()
|
|
698
|
+
args = parser.parse_args(argv)
|
|
740
699
|
|
|
741
700
|
if not args.no_install and os.environ.get("CODE2LOGIC_NO_INSTALL") in ("1", "true", "True", "yes", "YES"):
|
|
742
701
|
args.no_install = True
|
|
@@ -750,6 +709,11 @@ code2logic [path] [options]
|
|
|
750
709
|
# Initialize logger
|
|
751
710
|
log = Logger(verbose=args.verbose, debug=args.debug)
|
|
752
711
|
|
|
712
|
+
logging.basicConfig(
|
|
713
|
+
level=(logging.DEBUG if args.debug else (logging.INFO if args.verbose else logging.WARNING)),
|
|
714
|
+
format='[%(levelname)s] %(message)s',
|
|
715
|
+
)
|
|
716
|
+
|
|
753
717
|
if args.verbose and not args.quiet:
|
|
754
718
|
log.header("CODE2LOGIC")
|
|
755
719
|
log.detail(f"Version: {__version__}")
|
|
@@ -842,9 +806,7 @@ code2logic [path] [options]
|
|
|
842
806
|
|
|
843
807
|
# Path is required for analysis
|
|
844
808
|
if args.path is None:
|
|
845
|
-
|
|
846
|
-
if not _maybe_print_pretty_help():
|
|
847
|
-
parser.print_help()
|
|
809
|
+
parser.print_help()
|
|
848
810
|
return
|
|
849
811
|
|
|
850
812
|
# Validate path
|
|
@@ -865,7 +827,8 @@ code2logic [path] [options]
|
|
|
865
827
|
analyzer = ProjectAnalyzer(
|
|
866
828
|
args.path,
|
|
867
829
|
use_treesitter=not args.no_treesitter,
|
|
868
|
-
verbose=args.debug
|
|
830
|
+
verbose=args.verbose or args.debug,
|
|
831
|
+
enable_similarity=not args.no_similarity,
|
|
869
832
|
)
|
|
870
833
|
project = analyzer.analyze()
|
|
871
834
|
analyze_time = time.time() - analyze_start
|
|
@@ -4,11 +4,15 @@ Similarity detector using Rapidfuzz.
|
|
|
4
4
|
Detects similar functions across modules to identify
|
|
5
5
|
potential duplicates and refactoring opportunities.
|
|
6
6
|
"""
|
|
7
|
-
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
from collections import defaultdict
|
|
8
10
|
from typing import Dict, List
|
|
9
11
|
|
|
10
12
|
from .models import ModuleInfo
|
|
11
13
|
|
|
14
|
+
log = logging.getLogger(__name__)
|
|
15
|
+
|
|
12
16
|
# Optional Rapidfuzz import
|
|
13
17
|
RAPIDFUZZ_AVAILABLE = False
|
|
14
18
|
try:
|
|
@@ -43,6 +47,8 @@ class SimilarityDetector:
|
|
|
43
47
|
threshold: Minimum similarity score (0-100) to consider as similar
|
|
44
48
|
"""
|
|
45
49
|
self.threshold = threshold
|
|
50
|
+
self.max_functions = 8000
|
|
51
|
+
self.progress_every = 250
|
|
46
52
|
|
|
47
53
|
def find_similar_functions(self, modules: List[ModuleInfo]) -> Dict[str, List[str]]:
|
|
48
54
|
"""
|
|
@@ -58,6 +64,8 @@ class SimilarityDetector:
|
|
|
58
64
|
if not RAPIDFUZZ_AVAILABLE:
|
|
59
65
|
return {}
|
|
60
66
|
|
|
67
|
+
start = time.time()
|
|
68
|
+
|
|
61
69
|
# Collect all functions
|
|
62
70
|
all_funcs: List[dict] = []
|
|
63
71
|
for m in modules:
|
|
@@ -76,15 +84,35 @@ class SimilarityDetector:
|
|
|
76
84
|
if len(all_funcs) < 2:
|
|
77
85
|
return {}
|
|
78
86
|
|
|
87
|
+
if len(all_funcs) > self.max_functions:
|
|
88
|
+
log.warning(
|
|
89
|
+
"Skipping similarity detection: too many functions (%d > %d). Use --no-similarity to silence this.",
|
|
90
|
+
len(all_funcs),
|
|
91
|
+
self.max_functions,
|
|
92
|
+
)
|
|
93
|
+
return {}
|
|
94
|
+
|
|
79
95
|
# Find similar functions
|
|
80
96
|
similar: Dict[str, List[str]] = {}
|
|
81
97
|
names = [f['name'] for f in all_funcs]
|
|
82
98
|
|
|
99
|
+
name_to_fulls: Dict[str, List[str]] = defaultdict(list)
|
|
100
|
+
for f in all_funcs:
|
|
101
|
+
name_to_fulls[f['name']].append(f['full'])
|
|
102
|
+
|
|
83
103
|
for i, func in enumerate(all_funcs):
|
|
84
104
|
# Skip common names that would produce false positives
|
|
85
105
|
if func['name'] in ('__init__', 'constructor', 'toString', 'valueOf'):
|
|
86
106
|
continue
|
|
87
107
|
|
|
108
|
+
if i > 0 and (i % self.progress_every) == 0:
|
|
109
|
+
log.debug(
|
|
110
|
+
"Similarity progress: %d/%d (%.2fs)",
|
|
111
|
+
i,
|
|
112
|
+
len(all_funcs),
|
|
113
|
+
time.time() - start,
|
|
114
|
+
)
|
|
115
|
+
|
|
88
116
|
matches = process.extract(
|
|
89
117
|
func['name'],
|
|
90
118
|
names[:i] + names[i+1:],
|
|
@@ -95,15 +123,13 @@ class SimilarityDetector:
|
|
|
95
123
|
sim_list = []
|
|
96
124
|
for match_name, score, _ in matches:
|
|
97
125
|
if score >= self.threshold and match_name != func['name']:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if other['name'] == match_name:
|
|
101
|
-
sim_list.append(f"{other['full']} ({score}%)")
|
|
102
|
-
break
|
|
126
|
+
for full in name_to_fulls.get(match_name, [])[:3]:
|
|
127
|
+
sim_list.append(f"{full} ({score}%)")
|
|
103
128
|
|
|
104
129
|
if sim_list:
|
|
105
130
|
similar[func['full']] = sim_list
|
|
106
131
|
|
|
132
|
+
log.debug("Similarity finished: funcs=%d matches=%d time=%.2fs", len(all_funcs), len(similar), time.time() - start)
|
|
107
133
|
return similar
|
|
108
134
|
|
|
109
135
|
def find_duplicate_signatures(self, modules: List[ModuleInfo]) -> Dict[str, List[str]]:
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "code2logic"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.46"
|
|
8
8
|
description = "Code2Logic - Source code to logical representation converter for LLM analysis, featuring Tree-sitter parsing, dependency graph analysis, and multi-language support."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "Apache-2.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|