code2logic 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
code2logic/intent.py ADDED
@@ -0,0 +1,246 @@
1
+ """
2
+ Enhanced Intent Generator with NLP support.
3
+
4
+ Uses lemmatization, pattern matching, and docstring extraction
5
+ to generate human-readable intent descriptions for functions.
6
+ """
7
+
8
+ import re
9
+ from typing import Optional, List, Tuple
10
+
11
+ # Optional NLP imports with graceful degradation
12
+ try:
13
+ import nltk
14
+ from nltk.stem import WordNetLemmatizer
15
+ NLTK_AVAILABLE = True
16
+ except ImportError:
17
+ NLTK_AVAILABLE = False
18
+
19
+ try:
20
+ import spacy
21
+ SPACY_AVAILABLE = True
22
+ except ImportError:
23
+ SPACY_AVAILABLE = False
24
+
25
+
26
+ class EnhancedIntentGenerator:
27
+ """
28
+ Generator intencji z NLP - lemmatyzacja, ekstrakcja z docstringów.
29
+
30
+ Supports both English and Polish intent generation.
31
+ Falls back gracefully if NLP libraries are not available.
32
+
33
+ Example:
34
+ >>> gen = EnhancedIntentGenerator(lang='en')
35
+ >>> gen.generate("getUserById", "Fetches a user by their ID")
36
+ 'retrieves user by id'
37
+ >>> gen.generate("validateEmail")
38
+ 'validates email'
39
+ """
40
+
41
+ # Extended verb patterns (PL + EN)
42
+ VERB_PATTERNS: dict[tuple[str, ...], tuple[str, str]] = {
43
+ # CRUD operations
44
+ ('get', 'fetch', 'retrieve', 'load', 'find', 'query', 'read', 'select'):
45
+ ('pobiera', 'retrieves'),
46
+ ('set', 'update', 'modify', 'change', 'edit', 'put', 'patch'):
47
+ ('aktualizuje', 'updates'),
48
+ ('create', 'make', 'build', 'generate', 'new', 'add', 'insert', 'post', 'init'):
49
+ ('tworzy', 'creates'),
50
+ ('delete', 'remove', 'clear', 'destroy', 'drop', 'erase'):
51
+ ('usuwa', 'deletes'),
52
+
53
+ # Validation
54
+ ('is', 'has', 'can', 'should', 'check', 'test', 'assert'):
55
+ ('sprawdza', 'checks'),
56
+ ('validate', 'verify', 'confirm', 'authenticate'):
57
+ ('waliduje', 'validates'),
58
+
59
+ # Transformation
60
+ ('convert', 'transform', 'map', 'translate', 'cast', 'to'):
61
+ ('konwertuje', 'converts'),
62
+ ('parse', 'extract', 'decode', 'deserialize'):
63
+ ('parsuje', 'parses'),
64
+ ('format', 'render', 'serialize', 'encode', 'stringify'):
65
+ ('formatuje', 'formats'),
66
+
67
+ # Communication
68
+ ('send', 'emit', 'dispatch', 'publish', 'notify', 'push'):
69
+ ('wysyła', 'sends'),
70
+ ('receive', 'listen', 'subscribe', 'on', 'handle'):
71
+ ('obsługuje', 'handles'),
72
+
73
+ # Lifecycle
74
+ ('init', 'initialize', 'setup', 'configure', 'bootstrap'):
75
+ ('inicjalizuje', 'initializes'),
76
+ ('start', 'run', 'execute', 'launch', 'begin', 'open'):
77
+ ('uruchamia', 'starts'),
78
+ ('stop', 'end', 'finish', 'close', 'shutdown', 'terminate'):
79
+ ('kończy', 'stops'),
80
+
81
+ # Data operations
82
+ ('process', 'compute', 'calculate', 'evaluate', 'analyze'):
83
+ ('przetwarza', 'processes'),
84
+ ('filter', 'search', 'match', 'lookup'):
85
+ ('filtruje', 'filters'),
86
+ ('sort', 'order', 'arrange', 'rank'):
87
+ ('sortuje', 'sorts'),
88
+ ('merge', 'combine', 'join', 'concat'):
89
+ ('łączy', 'merges'),
90
+ ('split', 'divide', 'separate', 'partition'):
91
+ ('dzieli', 'splits'),
92
+
93
+ # Logging
94
+ ('log', 'print', 'write', 'output', 'display'):
95
+ ('loguje', 'logs'),
96
+
97
+ # Registration
98
+ ('register', 'bind', 'attach', 'connect', 'hook'):
99
+ ('rejestruje', 'registers'),
100
+
101
+ # Caching
102
+ ('cache', 'memoize', 'store', 'save', 'persist'):
103
+ ('cachuje', 'caches'),
104
+ }
105
+
106
+ def __init__(self, lang: str = 'en'):
107
+ """
108
+ Initialize the intent generator.
109
+
110
+ Args:
111
+ lang: Language for intent output ('en' or 'pl')
112
+ """
113
+ self.lang = lang
114
+ self.lemmatizer = None
115
+ self.nlp = None
116
+
117
+ # Initialize NLTK lemmatizer if available
118
+ if NLTK_AVAILABLE:
119
+ try:
120
+ nltk.data.find('corpora/wordnet')
121
+ self.lemmatizer = WordNetLemmatizer()
122
+ except LookupError:
123
+ try:
124
+ nltk.download('wordnet', quiet=True)
125
+ self.lemmatizer = WordNetLemmatizer()
126
+ except Exception:
127
+ pass
128
+
129
+ # Initialize spaCy if available (for more advanced NLP)
130
+ if SPACY_AVAILABLE:
131
+ try:
132
+ model = 'pl_core_news_sm' if lang == 'pl' else 'en_core_web_sm'
133
+ self.nlp = spacy.load(model)
134
+ except OSError:
135
+ try:
136
+ self.nlp = spacy.load('en_core_web_sm')
137
+ except OSError:
138
+ pass
139
+
140
+ def generate(self, name: str, docstring: Optional[str] = None) -> str:
141
+ """
142
+ Generate intent from function name and optional docstring.
143
+
144
+ Args:
145
+ name: Function or method name
146
+ docstring: Optional docstring to extract intent from
147
+
148
+ Returns:
149
+ Human-readable intent description
150
+
151
+ Example:
152
+ >>> gen = EnhancedIntentGenerator()
153
+ >>> gen.generate("calculateTotalPrice")
154
+ 'processes total price'
155
+ """
156
+ # Try docstring first
157
+ if docstring:
158
+ intent = self._extract_from_docstring(docstring)
159
+ if intent and len(intent) >= 10:
160
+ return intent[:80]
161
+
162
+ # Parse function name
163
+ words = self._split_name(name)
164
+ if not words:
165
+ return name
166
+
167
+ first_word = words[0].lower()
168
+ rest = ' '.join(words[1:]).lower() if len(words) > 1 else ''
169
+
170
+ # Lemmatize if available
171
+ if self.lemmatizer:
172
+ try:
173
+ first_word = self.lemmatizer.lemmatize(first_word, pos='v')
174
+ except Exception:
175
+ pass
176
+
177
+ # Match against verb patterns
178
+ intent_idx = 0 if self.lang == 'pl' else 1
179
+ for verbs, intents in self.VERB_PATTERNS.items():
180
+ if first_word in verbs:
181
+ intent = intents[intent_idx]
182
+ return f"{intent} {rest}" if rest else intent
183
+
184
+ # Fallback - join words
185
+ return ' '.join(words).lower()
186
+
187
+ def _extract_from_docstring(self, docstring: str) -> Optional[str]:
188
+ """Extract intent from docstring's first line."""
189
+ if not docstring:
190
+ return None
191
+
192
+ first_line = docstring.split('\n')[0].strip()
193
+
194
+ # Remove common prefixes
195
+ prefixes = [
196
+ 'Returns', 'Return', 'Gets', 'Get', 'Sets', 'Set',
197
+ 'Creates', 'Create', 'Deletes', 'Delete',
198
+ 'The', 'A', 'An'
199
+ ]
200
+ for prefix in prefixes:
201
+ if first_line.startswith(prefix + ' '):
202
+ first_line = first_line[len(prefix)+1:]
203
+ break
204
+
205
+ return first_line[:80] if first_line else None
206
+
207
+ def _split_name(self, name: str) -> List[str]:
208
+ """
209
+ Split function name into words.
210
+
211
+ Handles:
212
+ - camelCase
213
+ - PascalCase
214
+ - snake_case
215
+ - kebab-case
216
+ - ACRONYMS (e.g., XMLParser -> XML Parser)
217
+ """
218
+ # Remove private prefixes
219
+ name = name.lstrip('_').lstrip('#')
220
+
221
+ # Handle kebab-case
222
+ name = name.replace('-', '_')
223
+
224
+ # snake_case
225
+ if '_' in name:
226
+ return [w for w in name.split('_') if w]
227
+
228
+ # camelCase/PascalCase with acronym support
229
+ # XMLParser -> XML Parser, parseXML -> parse XML
230
+ words = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', name)
231
+ words = re.sub(r'([a-z\d])([A-Z])', r'\1 \2', words)
232
+
233
+ return [w.strip() for w in words.split() if w.strip()]
234
+
235
+ @classmethod
236
+ def get_available_features(cls) -> dict[str, bool]:
237
+ """
238
+ Get dictionary of available NLP features.
239
+
240
+ Returns:
241
+ Dict with feature names and availability status
242
+ """
243
+ return {
244
+ 'nltk_lemmatizer': NLTK_AVAILABLE,
245
+ 'spacy': SPACY_AVAILABLE,
246
+ }
code2logic/llm.py ADDED
@@ -0,0 +1,449 @@
1
+ """
2
+ LLM Integration for Code2Logic
3
+
4
+ Provides integration with local Ollama and LiteLLM for:
5
+ - Code generation from CSV analysis
6
+ - Refactoring suggestions
7
+ - Duplicate detection with semantic analysis
8
+ - Code translation between languages
9
+
10
+ Usage:
11
+ from code2logic.llm import CodeAnalyzer
12
+
13
+ analyzer = CodeAnalyzer(model="qwen2.5-coder:7b")
14
+ suggestions = analyzer.suggest_refactoring(project_info)
15
+ """
16
+
17
+ import json
18
+ from typing import Optional, List, Dict, Any
19
+ from dataclasses import dataclass
20
+
21
+ # Optional imports
22
+ try:
23
+ import httpx
24
+ HTTPX_AVAILABLE = True
25
+ except ImportError:
26
+ HTTPX_AVAILABLE = False
27
+
28
+ try:
29
+ from litellm import completion
30
+ LITELLM_AVAILABLE = True
31
+ except ImportError:
32
+ LITELLM_AVAILABLE = False
33
+
34
+
35
+ @dataclass
36
+ class LLMConfig:
37
+ """Configuration for LLM backend."""
38
+ provider: str = "ollama" # "ollama" or "litellm"
39
+ model: str = "qwen2.5-coder:7b"
40
+ base_url: str = "http://localhost:11434"
41
+ api_key: Optional[str] = None
42
+ timeout: int = 120
43
+ temperature: float = 0.7
44
+ max_tokens: int = 2000
45
+
46
+
47
+ class OllamaClient:
48
+ """Direct Ollama API client."""
49
+
50
+ def __init__(self, config: LLMConfig):
51
+ if not HTTPX_AVAILABLE:
52
+ raise ImportError("httpx required: pip install httpx")
53
+ self.config = config
54
+ self.client = httpx.Client(timeout=config.timeout)
55
+
56
+ def generate(self, prompt: str, system: Optional[str] = None) -> str:
57
+ """Generate completion from Ollama."""
58
+ payload = {
59
+ "model": self.config.model,
60
+ "prompt": prompt,
61
+ "stream": False,
62
+ "options": {
63
+ "temperature": self.config.temperature,
64
+ "num_predict": self.config.max_tokens,
65
+ }
66
+ }
67
+
68
+ if system:
69
+ payload["system"] = system
70
+
71
+ response = self.client.post(
72
+ f"{self.config.base_url}/api/generate",
73
+ json=payload
74
+ )
75
+ response.raise_for_status()
76
+ return response.json().get("response", "")
77
+
78
+ def chat(self, messages: List[Dict[str, str]]) -> str:
79
+ """Chat completion from Ollama."""
80
+ payload = {
81
+ "model": self.config.model,
82
+ "messages": messages,
83
+ "stream": False,
84
+ "options": {
85
+ "temperature": self.config.temperature,
86
+ "num_predict": self.config.max_tokens,
87
+ }
88
+ }
89
+
90
+ response = self.client.post(
91
+ f"{self.config.base_url}/api/chat",
92
+ json=payload
93
+ )
94
+ response.raise_for_status()
95
+ return response.json().get("message", {}).get("content", "")
96
+
97
+ def is_available(self) -> bool:
98
+ """Check if Ollama is running."""
99
+ try:
100
+ response = self.client.get(f"{self.config.base_url}/api/tags")
101
+ return response.status_code == 200
102
+ except Exception:
103
+ return False
104
+
105
+ def list_models(self) -> List[str]:
106
+ """List available models."""
107
+ try:
108
+ response = self.client.get(f"{self.config.base_url}/api/tags")
109
+ data = response.json()
110
+ return [m["name"] for m in data.get("models", [])]
111
+ except Exception:
112
+ return []
113
+
114
+
115
+ class LiteLLMClient:
116
+ """LiteLLM client for unified API access."""
117
+
118
+ def __init__(self, config: LLMConfig):
119
+ if not LITELLM_AVAILABLE:
120
+ raise ImportError("litellm required: pip install litellm")
121
+ self.config = config
122
+
123
+ def generate(self, prompt: str, system: Optional[str] = None) -> str:
124
+ """Generate completion via LiteLLM."""
125
+ messages = []
126
+ if system:
127
+ messages.append({"role": "system", "content": system})
128
+ messages.append({"role": "user", "content": prompt})
129
+
130
+ return self.chat(messages)
131
+
132
+ def chat(self, messages: List[Dict[str, str]]) -> str:
133
+ """Chat completion via LiteLLM."""
134
+ model = f"ollama/{self.config.model}"
135
+ if self.config.provider == "litellm":
136
+ model = self.config.model
137
+
138
+ response = completion(
139
+ model=model,
140
+ messages=messages,
141
+ api_base=self.config.base_url,
142
+ temperature=self.config.temperature,
143
+ max_tokens=self.config.max_tokens,
144
+ )
145
+ return response.choices[0].message.content
146
+
147
+ def is_available(self) -> bool:
148
+ """Check if LiteLLM backend is available."""
149
+ try:
150
+ self.chat([{"role": "user", "content": "test"}])
151
+ return True
152
+ except Exception:
153
+ return False
154
+
155
+
156
+ class CodeAnalyzer:
157
+ """
158
+ LLM-powered code analysis for Code2Logic.
159
+
160
+ Example:
161
+ >>> from code2logic import analyze_project
162
+ >>> from code2logic.llm import CodeAnalyzer
163
+ >>>
164
+ >>> project = analyze_project("/path/to/project")
165
+ >>> analyzer = CodeAnalyzer()
166
+ >>>
167
+ >>> # Get refactoring suggestions
168
+ >>> suggestions = analyzer.suggest_refactoring(project)
169
+ >>>
170
+ >>> # Generate code in another language
171
+ >>> code = analyzer.generate_code(project, target_lang="typescript")
172
+ """
173
+
174
+ SYSTEM_PROMPT = """You are an expert software architect and code analyst.
175
+ You analyze code structure and provide actionable suggestions for:
176
+ - Refactoring and code improvement
177
+ - Duplicate detection and consolidation
178
+ - Code generation and translation
179
+ - Architecture optimization
180
+
181
+ Be specific, practical, and provide code examples when helpful."""
182
+
183
+ def __init__(
184
+ self,
185
+ model: str = "qwen2.5-coder:7b",
186
+ provider: str = "ollama",
187
+ base_url: str = "http://localhost:11434",
188
+ **kwargs
189
+ ):
190
+ """
191
+ Initialize CodeAnalyzer.
192
+
193
+ Args:
194
+ model: Model name (e.g., "qwen2.5-coder:7b")
195
+ provider: "ollama" or "litellm"
196
+ base_url: API base URL
197
+ """
198
+ self.config = LLMConfig(
199
+ provider=provider,
200
+ model=model,
201
+ base_url=base_url,
202
+ **kwargs
203
+ )
204
+
205
+ if provider == "ollama":
206
+ self.client = OllamaClient(self.config)
207
+ else:
208
+ self.client = LiteLLMClient(self.config)
209
+
210
+ def is_available(self) -> bool:
211
+ """Check if LLM backend is available."""
212
+ return self.client.is_available()
213
+
214
+ def suggest_refactoring(self, project) -> List[Dict[str, Any]]:
215
+ """
216
+ Analyze project and suggest refactoring improvements.
217
+
218
+ Args:
219
+ project: ProjectInfo from code2logic analysis
220
+
221
+ Returns:
222
+ List of refactoring suggestions with details
223
+ """
224
+ from .generators import CSVGenerator
225
+
226
+ # Generate compact representation
227
+ csv_gen = CSVGenerator()
228
+ csv_data = csv_gen.generate(project, detail='full')
229
+
230
+ # Truncate if too long
231
+ if len(csv_data) > 8000:
232
+ lines = csv_data.split('\n')
233
+ csv_data = '\n'.join(lines[:100]) + f"\n... ({len(lines)-100} more lines)"
234
+
235
+ prompt = f"""Analyze this codebase and suggest refactoring improvements:
236
+
237
+ ```csv
238
+ {csv_data}
239
+ ```
240
+
241
+ For each suggestion, provide:
242
+ 1. Issue type (complexity, duplication, naming, structure)
243
+ 2. Specific location (path, function name)
244
+ 3. Problem description
245
+ 4. Recommended fix with code example if applicable
246
+ 5. Priority (high/medium/low)
247
+
248
+ Format as JSON array."""
249
+
250
+ response = self.client.generate(prompt, system=self.SYSTEM_PROMPT)
251
+
252
+ # Try to parse JSON from response
253
+ try:
254
+ # Find JSON in response
255
+ start = response.find('[')
256
+ end = response.rfind(']') + 1
257
+ if start >= 0 and end > start:
258
+ return json.loads(response[start:end])
259
+ except json.JSONDecodeError:
260
+ pass
261
+
262
+ # Return raw response if JSON parsing fails
263
+ return [{"raw_response": response}]
264
+
265
+ def find_semantic_duplicates(self, project) -> List[Dict[str, Any]]:
266
+ """
267
+ Find semantically similar functions using LLM.
268
+
269
+ Args:
270
+ project: ProjectInfo from code2logic analysis
271
+
272
+ Returns:
273
+ List of duplicate groups with similarity analysis
274
+ """
275
+ # Collect all functions with intents
276
+ functions = []
277
+ for m in project.modules:
278
+ for f in m.functions:
279
+ functions.append({
280
+ 'path': m.path,
281
+ 'name': f.name,
282
+ 'signature': self._build_signature(f),
283
+ 'intent': f.intent or '',
284
+ })
285
+ for c in m.classes:
286
+ for method in c.methods:
287
+ functions.append({
288
+ 'path': m.path,
289
+ 'name': f"{c.name}.{method.name}",
290
+ 'signature': self._build_signature(method),
291
+ 'intent': method.intent or '',
292
+ })
293
+
294
+ if len(functions) > 50:
295
+ functions = functions[:50]
296
+
297
+ prompt = f"""Analyze these functions and find semantic duplicates:
298
+
299
+ {json.dumps(functions, indent=2)}
300
+
301
+ Group functions that:
302
+ 1. Do the same thing (even with different names)
303
+ 2. Have similar logic patterns
304
+ 3. Could be consolidated into shared utilities
305
+
306
+ For each group, explain:
307
+ - Why they are duplicates
308
+ - How to consolidate them
309
+ - Suggested shared function name
310
+
311
+ Format as JSON array of groups."""
312
+
313
+ response = self.client.generate(prompt, system=self.SYSTEM_PROMPT)
314
+
315
+ try:
316
+ start = response.find('[')
317
+ end = response.rfind(']') + 1
318
+ if start >= 0 and end > start:
319
+ return json.loads(response[start:end])
320
+ except json.JSONDecodeError:
321
+ pass
322
+
323
+ return [{"raw_response": response}]
324
+
325
+ def generate_code(
326
+ self,
327
+ project,
328
+ target_lang: str,
329
+ module_filter: Optional[str] = None
330
+ ) -> Dict[str, str]:
331
+ """
332
+ Generate code in target language from project analysis.
333
+
334
+ Args:
335
+ project: ProjectInfo from code2logic analysis
336
+ target_lang: Target language (typescript, python, go, rust, etc.)
337
+ module_filter: Optional filter for specific module paths
338
+
339
+ Returns:
340
+ Dict mapping original path to generated code
341
+ """
342
+ results = {}
343
+
344
+ modules = project.modules
345
+ if module_filter:
346
+ modules = [m for m in modules if module_filter in m.path]
347
+
348
+ for module in modules[:5]: # Limit to 5 modules
349
+ # Build specification
350
+ spec_lines = [f"Module: {module.path}"]
351
+ spec_lines.append(f"Language: {module.language}")
352
+ spec_lines.append(f"Lines: {module.lines_code}")
353
+
354
+ if module.imports:
355
+ spec_lines.append(f"Imports: {', '.join(module.imports[:10])}")
356
+
357
+ if module.classes:
358
+ spec_lines.append("\nClasses:")
359
+ for c in module.classes[:5]:
360
+ spec_lines.append(f" class {c.name}({', '.join(c.bases)})")
361
+ for m in c.methods[:10]:
362
+ spec_lines.append(f" - {m.name}{self._build_signature(m)}: {m.intent}")
363
+
364
+ if module.functions:
365
+ spec_lines.append("\nFunctions:")
366
+ for f in module.functions[:10]:
367
+ spec_lines.append(f" - {f.name}{self._build_signature(f)}: {f.intent}")
368
+
369
+ spec = '\n'.join(spec_lines)
370
+
371
+ prompt = f"""Generate {target_lang} code from this specification:
372
+
373
+ {spec}
374
+
375
+ Requirements:
376
+ 1. Idiomatic {target_lang} code
377
+ 2. Full type annotations
378
+ 3. Docstrings/comments
379
+ 4. Error handling
380
+ 5. Maintain the same public API
381
+
382
+ Output only the code."""
383
+
384
+ response = self.client.generate(prompt, system=self.SYSTEM_PROMPT)
385
+ results[module.path] = response
386
+
387
+ return results
388
+
389
+ def translate_function(
390
+ self,
391
+ name: str,
392
+ signature: str,
393
+ intent: str,
394
+ source_lang: str,
395
+ target_lang: str
396
+ ) -> str:
397
+ """
398
+ Translate a single function to another language.
399
+
400
+ Args:
401
+ name: Function name
402
+ signature: Function signature
403
+ intent: What the function does
404
+ source_lang: Source language
405
+ target_lang: Target language
406
+
407
+ Returns:
408
+ Generated code in target language
409
+ """
410
+ prompt = f"""Translate this {source_lang} function to {target_lang}:
411
+
412
+ Function: {name}
413
+ Signature: {signature}
414
+ Purpose: {intent}
415
+
416
+ Generate idiomatic {target_lang} code with:
417
+ 1. Proper type annotations
418
+ 2. Error handling
419
+ 3. Documentation
420
+
421
+ Output only the code."""
422
+
423
+ return self.client.generate(prompt, system=self.SYSTEM_PROMPT)
424
+
425
+ def _build_signature(self, f) -> str:
426
+ """Build compact signature."""
427
+ params = ','.join(f.params[:4])
428
+ if len(f.params) > 4:
429
+ params += '...'
430
+ ret = f"->{f.return_type}" if f.return_type else ""
431
+ return f"({params}){ret}"
432
+
433
+
434
+ def get_available_backends() -> Dict[str, bool]:
435
+ """Get availability status of LLM backends."""
436
+ status = {
437
+ 'httpx': HTTPX_AVAILABLE,
438
+ 'litellm': LITELLM_AVAILABLE,
439
+ 'ollama': False,
440
+ }
441
+
442
+ if HTTPX_AVAILABLE:
443
+ try:
444
+ client = OllamaClient(LLMConfig())
445
+ status['ollama'] = client.is_available()
446
+ except Exception:
447
+ pass
448
+
449
+ return status