code2logic 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
code2logic/__init__.py ADDED
@@ -0,0 +1,88 @@
1
+ """
2
+ Code2Logic - Convert source code to logical representation for LLM analysis.
3
+
4
+ A Python library that analyzes codebases and generates compact, LLM-friendly
5
+ representations with semantic understanding using NLP and AST parsing.
6
+
7
+ Features:
8
+ - Multi-language support (Python, JavaScript, TypeScript, Java, Go, Rust, etc.)
9
+ - Tree-sitter AST parsing for 99% accuracy
10
+ - NetworkX dependency graph analysis with PageRank
11
+ - Rapidfuzz similarity detection for duplicate functions
12
+ - NLP-powered intent extraction from function names and docstrings
13
+
14
+ Example:
15
+ >>> from code2logic import analyze_project, MarkdownGenerator
16
+ >>> project = analyze_project("/path/to/project")
17
+ >>> output = MarkdownGenerator().generate(project)
18
+ >>> print(output)
19
+ """
20
+
21
+ __version__ = "1.0.0"
22
+ __author__ = "Softreck"
23
+ __email__ = "info@softreck.dev"
24
+ __license__ = "MIT"
25
+
26
+ from .analyzer import (
27
+ ProjectAnalyzer,
28
+ analyze_project,
29
+ )
30
+ from .models import (
31
+ FunctionInfo,
32
+ ClassInfo,
33
+ TypeInfo,
34
+ ModuleInfo,
35
+ DependencyNode,
36
+ ProjectInfo,
37
+ )
38
+ from .generators import (
39
+ MarkdownGenerator,
40
+ CompactGenerator,
41
+ JSONGenerator,
42
+ YAMLGenerator,
43
+ CSVGenerator,
44
+ )
45
+ from .gherkin import (
46
+ GherkinGenerator,
47
+ StepDefinitionGenerator,
48
+ CucumberYAMLGenerator,
49
+ csv_to_gherkin,
50
+ gherkin_to_test_data,
51
+ )
52
+ from .intent import EnhancedIntentGenerator
53
+ from .parsers import TreeSitterParser, UniversalParser
54
+ from .dependency import DependencyAnalyzer
55
+ from .similarity import SimilarityDetector
56
+
57
+ __all__ = [
58
+ # Version
59
+ "__version__",
60
+ # Main API
61
+ "analyze_project",
62
+ "ProjectAnalyzer",
63
+ # Models
64
+ "FunctionInfo",
65
+ "ClassInfo",
66
+ "TypeInfo",
67
+ "ModuleInfo",
68
+ "DependencyNode",
69
+ "ProjectInfo",
70
+ # Generators
71
+ "MarkdownGenerator",
72
+ "CompactGenerator",
73
+ "JSONGenerator",
74
+ "YAMLGenerator",
75
+ "CSVGenerator",
76
+ # Gherkin/BDD
77
+ "GherkinGenerator",
78
+ "StepDefinitionGenerator",
79
+ "CucumberYAMLGenerator",
80
+ "csv_to_gherkin",
81
+ "gherkin_to_test_data",
82
+ # Components
83
+ "EnhancedIntentGenerator",
84
+ "TreeSitterParser",
85
+ "UniversalParser",
86
+ "DependencyAnalyzer",
87
+ "SimilarityDetector",
88
+ ]
code2logic/analyzer.py ADDED
@@ -0,0 +1,286 @@
1
+ """
2
+ Main project analyzer orchestrating all analysis components.
3
+
4
+ Provides the high-level API for analyzing codebases.
5
+ """
6
+
7
+ import sys
8
+ from pathlib import Path
9
+ from datetime import datetime
10
+ from collections import defaultdict
11
+ from typing import Optional, List, Dict
12
+
13
+ from .models import ProjectInfo, ModuleInfo
14
+ from .parsers import TreeSitterParser, UniversalParser, TREE_SITTER_AVAILABLE
15
+ from .dependency import DependencyAnalyzer, NETWORKX_AVAILABLE
16
+ from .similarity import SimilarityDetector, RAPIDFUZZ_AVAILABLE
17
+ from .intent import NLTK_AVAILABLE, SPACY_AVAILABLE
18
+
19
+
20
+ class ProjectAnalyzer:
21
+ """
22
+ Main class for analyzing software projects.
23
+
24
+ Orchestrates:
25
+ - File scanning and language detection
26
+ - AST parsing (Tree-sitter or fallback)
27
+ - Dependency graph building and analysis
28
+ - Similar function detection
29
+ - Entry point identification
30
+
31
+ Example:
32
+ >>> analyzer = ProjectAnalyzer("/path/to/project")
33
+ >>> project = analyzer.analyze()
34
+ >>> print(f"Found {project.total_files} files")
35
+
36
+ With options:
37
+ >>> analyzer = ProjectAnalyzer(
38
+ ... "/path/to/project",
39
+ ... use_treesitter=True,
40
+ ... verbose=True
41
+ ... )
42
+ """
43
+
44
+ # Language extension mapping
45
+ LANGUAGE_EXTENSIONS: Dict[str, str] = {
46
+ '.py': 'python',
47
+ '.js': 'javascript',
48
+ '.jsx': 'javascript',
49
+ '.ts': 'typescript',
50
+ '.tsx': 'typescript',
51
+ '.java': 'java',
52
+ '.go': 'go',
53
+ '.rs': 'rust',
54
+ '.c': 'cpp',
55
+ '.cpp': 'cpp',
56
+ '.cc': 'cpp',
57
+ '.h': 'cpp',
58
+ '.hpp': 'cpp',
59
+ '.php': 'php',
60
+ '.rb': 'ruby',
61
+ '.kt': 'kotlin',
62
+ '.swift': 'swift',
63
+ }
64
+
65
+ # Directories to ignore
66
+ IGNORE_DIRS: set = {
67
+ '.git', '.svn', '.hg',
68
+ 'node_modules', '__pycache__', '.venv', 'venv', 'env',
69
+ 'target', 'build', 'dist', 'out', '.next',
70
+ '.idea', '.vscode', '.pytest_cache',
71
+ 'vendor', 'packages', '.tox', 'coverage',
72
+ '.mypy_cache', '.ruff_cache', '.cache',
73
+ }
74
+
75
+ # Files to ignore
76
+ IGNORE_FILES: set = {
77
+ '.gitignore', '.dockerignore',
78
+ 'package-lock.json', 'yarn.lock',
79
+ 'Pipfile.lock', 'poetry.lock',
80
+ 'Cargo.lock', 'pnpm-lock.yaml',
81
+ }
82
+
83
+ def __init__(
84
+ self,
85
+ root_path: str,
86
+ use_treesitter: bool = True,
87
+ verbose: bool = False,
88
+ include_private: bool = False,
89
+ ):
90
+ """
91
+ Initialize the project analyzer.
92
+
93
+ Args:
94
+ root_path: Path to the project root directory
95
+ use_treesitter: Whether to use Tree-sitter for parsing
96
+ verbose: Whether to print status messages
97
+ include_private: Whether to include private functions/classes
98
+ """
99
+ self.root_path = Path(root_path).resolve()
100
+ self.verbose = verbose
101
+ self.include_private = include_private
102
+ self.modules: List[ModuleInfo] = []
103
+ self.languages: Dict[str, int] = defaultdict(int)
104
+
105
+ # Initialize parsers
106
+ self.ts_parser = (
107
+ TreeSitterParser()
108
+ if use_treesitter and TREE_SITTER_AVAILABLE
109
+ else None
110
+ )
111
+ self.fallback_parser = UniversalParser()
112
+
113
+ # Initialize analyzers
114
+ self.dep_analyzer = DependencyAnalyzer()
115
+ self.sim_detector = SimilarityDetector()
116
+
117
+ if verbose:
118
+ self._print_status()
119
+
120
+ def _print_status(self):
121
+ """Print library availability status."""
122
+ parts = []
123
+ parts.append("TS✓" if TREE_SITTER_AVAILABLE else "TS✗")
124
+ parts.append("NX✓" if NETWORKX_AVAILABLE else "NX✗")
125
+ parts.append("RF✓" if RAPIDFUZZ_AVAILABLE else "RF✗")
126
+ parts.append("NLP✓" if (SPACY_AVAILABLE or NLTK_AVAILABLE) else "NLP✗")
127
+ print(f"Libs: {' '.join(parts)}", file=sys.stderr)
128
+
129
+ def analyze(self) -> ProjectInfo:
130
+ """
131
+ Analyze the project.
132
+
133
+ Returns:
134
+ ProjectInfo with complete analysis results
135
+ """
136
+ # Scan and parse files
137
+ self._scan_files()
138
+
139
+ # Build dependency graph
140
+ dep_graph = self.dep_analyzer.build_graph(self.modules)
141
+ dep_metrics = self.dep_analyzer.analyze_metrics()
142
+
143
+ # Detect entry points
144
+ entrypoints = self._detect_entrypoints()
145
+
146
+ # Find similar functions
147
+ similar = self.sim_detector.find_similar_functions(self.modules)
148
+
149
+ return ProjectInfo(
150
+ name=self.root_path.name,
151
+ root_path=str(self.root_path),
152
+ languages=dict(self.languages),
153
+ modules=self.modules,
154
+ dependency_graph=dep_graph,
155
+ dependency_metrics=dep_metrics,
156
+ entrypoints=entrypoints,
157
+ similar_functions=similar,
158
+ total_files=len(self.modules),
159
+ total_lines=sum(m.lines_total for m in self.modules),
160
+ generated_at=datetime.now().isoformat()
161
+ )
162
+
163
+ def _scan_files(self):
164
+ """Scan and parse all source files."""
165
+ for fp in self.root_path.rglob('*'):
166
+ if not fp.is_file():
167
+ continue
168
+
169
+ # Skip ignored directories
170
+ if any(d in fp.parts for d in self.IGNORE_DIRS):
171
+ continue
172
+
173
+ # Skip ignored files
174
+ if fp.name in self.IGNORE_FILES:
175
+ continue
176
+
177
+ # Check extension
178
+ ext = fp.suffix.lower()
179
+ if ext not in self.LANGUAGE_EXTENSIONS:
180
+ continue
181
+
182
+ language = self.LANGUAGE_EXTENSIONS[ext]
183
+ self.languages[language] += 1
184
+
185
+ # Read file
186
+ try:
187
+ content = fp.read_text(encoding='utf-8', errors='ignore')
188
+ except Exception:
189
+ continue
190
+
191
+ rel_path = str(fp.relative_to(self.root_path))
192
+
193
+ # Try Tree-sitter first, then fallback
194
+ module = None
195
+ if self.ts_parser and self.ts_parser.is_available(language):
196
+ module = self.ts_parser.parse(rel_path, content, language)
197
+
198
+ if module is None:
199
+ module = self.fallback_parser.parse(rel_path, content, language)
200
+
201
+ if module:
202
+ self.modules.append(module)
203
+
204
+ def _detect_entrypoints(self) -> List[str]:
205
+ """Detect project entry points."""
206
+ eps = []
207
+
208
+ # From dependency analyzer (nodes with no incoming edges)
209
+ if self.dep_analyzer.graph is not None:
210
+ eps.extend(self.dep_analyzer.get_entrypoints())
211
+
212
+ # Common entry point file names
213
+ main_files = {
214
+ 'main.py', 'app.py', 'server.py', '__main__.py', 'run.py',
215
+ 'main.js', 'app.js', 'server.js', 'index.js',
216
+ 'main.ts', 'app.ts', 'server.ts', 'index.ts',
217
+ 'main.go', 'main.rs', 'Main.java',
218
+ }
219
+
220
+ for m in self.modules:
221
+ fn = Path(m.path).name
222
+ parent = str(Path(m.path).parent)
223
+
224
+ if fn in main_files and m.path not in eps:
225
+ eps.append(m.path)
226
+ elif fn in ('index.js', 'index.ts') and parent in ('.', 'src') and m.path not in eps:
227
+ eps.append(m.path)
228
+
229
+ return eps[:10]
230
+
231
+ def get_statistics(self) -> Dict:
232
+ """
233
+ Get analysis statistics.
234
+
235
+ Returns:
236
+ Dict with analysis statistics
237
+ """
238
+ return {
239
+ 'total_files': len(self.modules),
240
+ 'total_lines': sum(m.lines_total for m in self.modules),
241
+ 'total_code_lines': sum(m.lines_code for m in self.modules),
242
+ 'languages': dict(self.languages),
243
+ 'total_classes': sum(len(m.classes) for m in self.modules),
244
+ 'total_functions': sum(len(m.functions) for m in self.modules),
245
+ }
246
+
247
+
248
+ def analyze_project(
249
+ path: str,
250
+ use_treesitter: bool = True,
251
+ verbose: bool = False,
252
+ ) -> ProjectInfo:
253
+ """
254
+ Convenience function to analyze a project.
255
+
256
+ Args:
257
+ path: Path to the project directory
258
+ use_treesitter: Whether to use Tree-sitter for parsing
259
+ verbose: Whether to print status messages
260
+
261
+ Returns:
262
+ ProjectInfo with analysis results
263
+
264
+ Example:
265
+ >>> from code2logic import analyze_project
266
+ >>> project = analyze_project("/path/to/project")
267
+ >>> print(f"Analyzed {project.total_files} files")
268
+ """
269
+ analyzer = ProjectAnalyzer(path, use_treesitter=use_treesitter, verbose=verbose)
270
+ return analyzer.analyze()
271
+
272
+
273
+ def get_library_status() -> Dict[str, bool]:
274
+ """
275
+ Get availability status of optional libraries.
276
+
277
+ Returns:
278
+ Dict mapping library name to availability status
279
+ """
280
+ return {
281
+ 'tree_sitter': TREE_SITTER_AVAILABLE,
282
+ 'networkx': NETWORKX_AVAILABLE,
283
+ 'rapidfuzz': RAPIDFUZZ_AVAILABLE,
284
+ 'nltk': NLTK_AVAILABLE,
285
+ 'spacy': SPACY_AVAILABLE,
286
+ }
code2logic/cli.py ADDED
@@ -0,0 +1,222 @@
1
+ """
2
+ Command-line interface for Code2Logic.
3
+
4
+ Usage:
5
+ code2logic /path/to/project
6
+ code2logic /path/to/project -f csv -o output.csv
7
+ code2logic /path/to/project -f yaml
8
+ code2logic /path/to/project -f json --flat
9
+ """
10
+
11
+ import argparse
12
+ import os
13
+ import sys
14
+ import subprocess
15
+
16
+ from . import __version__
17
+
18
+
19
+ def ensure_dependencies():
20
+ """Auto-install optional dependencies for best results."""
21
+ packages = {
22
+ 'tree-sitter': 'tree_sitter',
23
+ 'tree-sitter-python': 'tree_sitter_python',
24
+ 'tree-sitter-javascript': 'tree_sitter_javascript',
25
+ 'tree-sitter-typescript': 'tree_sitter_typescript',
26
+ 'networkx': 'networkx',
27
+ 'rapidfuzz': 'rapidfuzz',
28
+ 'pyyaml': 'yaml',
29
+ }
30
+
31
+ missing = []
32
+ for pkg_name, import_name in packages.items():
33
+ try:
34
+ __import__(import_name)
35
+ except ImportError:
36
+ missing.append(pkg_name)
37
+
38
+ if missing:
39
+ print(f"Installing dependencies for best results: {', '.join(missing)}", file=sys.stderr)
40
+ try:
41
+ subprocess.check_call([
42
+ sys.executable, '-m', 'pip', 'install', '-q',
43
+ '--break-system-packages', *missing
44
+ ], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
45
+ print("Dependencies installed successfully!", file=sys.stderr)
46
+ except subprocess.CalledProcessError:
47
+ # Try without --break-system-packages
48
+ try:
49
+ subprocess.check_call([
50
+ sys.executable, '-m', 'pip', 'install', '-q', *missing
51
+ ], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
52
+ print("Dependencies installed successfully!", file=sys.stderr)
53
+ except subprocess.CalledProcessError:
54
+ print(f"Warning: Could not install some dependencies. "
55
+ f"Install manually: pip install {' '.join(missing)}", file=sys.stderr)
56
+
57
+
58
+ def main():
59
+ """Main CLI entry point."""
60
+ parser = argparse.ArgumentParser(
61
+ prog='code2logic',
62
+ description='Convert source code to logical representation for LLM analysis',
63
+ formatter_class=argparse.RawDescriptionHelpFormatter,
64
+ epilog='''
65
+ Examples:
66
+ code2logic /path/to/project # Standard Markdown
67
+ code2logic /path/to/project -f csv # CSV (best for LLM, ~50%% smaller)
68
+ code2logic /path/to/project -f yaml # YAML (human-readable)
69
+ code2logic /path/to/project -f json --flat # Flat JSON (for comparisons)
70
+ code2logic /path/to/project -f compact # Ultra-compact text
71
+
72
+ Output formats (token efficiency):
73
+ csv - Best for LLM (~20K tokens/100 files) - flat table
74
+ compact - Good for LLM (~25K tokens/100 files) - minimal text
75
+ json - Standard (~35K tokens/100 files) - nested/flat
76
+ yaml - Readable (~35K tokens/100 files) - nested/flat
77
+ markdown - Documentation (~55K tokens/100 files)
78
+
79
+ Detail levels (columns in csv/json/yaml):
80
+ minimal - path, type, name, signature (4 columns)
81
+ standard - + intent, category, domain, imports (8 columns)
82
+ full - + calls, lines, complexity, hash (16 columns)
83
+ '''
84
+ )
85
+
86
+ parser.add_argument(
87
+ 'path',
88
+ nargs='?',
89
+ default=None,
90
+ help='Path to the project directory'
91
+ )
92
+ parser.add_argument(
93
+ '-f', '--format',
94
+ choices=['markdown', 'compact', 'json', 'yaml', 'csv', 'gherkin'],
95
+ default='markdown',
96
+ help='Output format (default: markdown)'
97
+ )
98
+ parser.add_argument(
99
+ '-d', '--detail',
100
+ choices=['minimal', 'standard', 'full'],
101
+ default='standard',
102
+ help='Detail level - columns to include (default: standard)'
103
+ )
104
+ parser.add_argument(
105
+ '-o', '--output',
106
+ help='Output file path (default: stdout)'
107
+ )
108
+ parser.add_argument(
109
+ '--flat',
110
+ action='store_true',
111
+ help='Use flat structure (for json/yaml) - better for comparisons'
112
+ )
113
+ parser.add_argument(
114
+ '--no-install',
115
+ action='store_true',
116
+ help='Skip auto-installation of dependencies'
117
+ )
118
+ parser.add_argument(
119
+ '--no-treesitter',
120
+ action='store_true',
121
+ help='Disable Tree-sitter (use fallback parser)'
122
+ )
123
+ parser.add_argument(
124
+ '-v', '--verbose',
125
+ action='store_true',
126
+ help='Verbose output'
127
+ )
128
+ parser.add_argument(
129
+ '--version',
130
+ action='version',
131
+ version=f'%(prog)s {__version__}'
132
+ )
133
+ parser.add_argument(
134
+ '--status',
135
+ action='store_true',
136
+ help='Show library availability status and exit'
137
+ )
138
+
139
+ args = parser.parse_args()
140
+
141
+ # Auto-install dependencies unless disabled
142
+ if not args.no_install and not args.status:
143
+ ensure_dependencies()
144
+
145
+ # Import after potential installation
146
+ from .analyzer import ProjectAnalyzer, get_library_status
147
+ from .generators import (
148
+ MarkdownGenerator, CompactGenerator, JSONGenerator,
149
+ YAMLGenerator, CSVGenerator
150
+ )
151
+ from .gherkin import GherkinGenerator
152
+
153
+ # Status check
154
+ if args.status:
155
+ status = get_library_status()
156
+ print("Library Status:")
157
+ for lib, available in status.items():
158
+ symbol = "✓" if available else "✗"
159
+ print(f" {lib}: {symbol}")
160
+ sys.exit(0)
161
+
162
+ # Path is required for analysis
163
+ if args.path is None:
164
+ print("Error: path is required", file=sys.stderr)
165
+ parser.print_help()
166
+ sys.exit(1)
167
+
168
+ # Validate path
169
+ if not os.path.exists(args.path):
170
+ print(f"Error: Path does not exist: {args.path}", file=sys.stderr)
171
+ sys.exit(1)
172
+
173
+ if not os.path.isdir(args.path):
174
+ print(f"Error: Path is not a directory: {args.path}", file=sys.stderr)
175
+ sys.exit(1)
176
+
177
+ # Analyze
178
+ if args.verbose:
179
+ print(f"Analyzing project: {args.path}", file=sys.stderr)
180
+
181
+ analyzer = ProjectAnalyzer(
182
+ args.path,
183
+ use_treesitter=not args.no_treesitter,
184
+ verbose=args.verbose
185
+ )
186
+ project = analyzer.analyze()
187
+
188
+ if args.verbose:
189
+ print(f"Found {project.total_files} files, {project.total_lines} lines", file=sys.stderr)
190
+
191
+ # Generate output
192
+ if args.format == 'markdown':
193
+ generator = MarkdownGenerator()
194
+ output = generator.generate(project, args.detail)
195
+ elif args.format == 'compact':
196
+ generator = CompactGenerator()
197
+ output = generator.generate(project)
198
+ elif args.format == 'json':
199
+ generator = JSONGenerator()
200
+ output = generator.generate(project, flat=args.flat, detail=args.detail)
201
+ elif args.format == 'yaml':
202
+ generator = YAMLGenerator()
203
+ output = generator.generate(project, flat=args.flat, detail=args.detail)
204
+ elif args.format == 'csv':
205
+ generator = CSVGenerator()
206
+ output = generator.generate(project, detail=args.detail)
207
+ elif args.format == 'gherkin':
208
+ generator = GherkinGenerator()
209
+ output = generator.generate(project, detail=args.detail)
210
+
211
+ # Write output
212
+ if args.output:
213
+ with open(args.output, 'w', encoding='utf-8') as f:
214
+ f.write(output)
215
+ if args.verbose:
216
+ print(f"Output written to: {args.output}", file=sys.stderr)
217
+ else:
218
+ print(output)
219
+
220
+
221
+ if __name__ == '__main__':
222
+ main()