code2logic 1.0.41__tar.gz → 1.0.43__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {code2logic-1.0.41 → code2logic-1.0.43}/PKG-INFO +1 -1
  2. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/__init__.py +1 -1
  3. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/benchmarks/common.py +107 -33
  4. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/benchmarks/results.py +23 -7
  5. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/benchmarks/runner.py +47 -19
  6. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/cli.py +16 -1
  7. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/function_logic.py +32 -1
  8. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/logicml.py +44 -19
  9. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/metrics.py +65 -24
  10. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/toon_format.py +86 -0
  11. {code2logic-1.0.41 → code2logic-1.0.43}/pyproject.toml +1 -1
  12. {code2logic-1.0.41 → code2logic-1.0.43}/LICENSE +0 -0
  13. {code2logic-1.0.41 → code2logic-1.0.43}/README.md +0 -0
  14. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/__main__.py +0 -0
  15. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/adaptive.py +0 -0
  16. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/analyzer.py +0 -0
  17. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/base.py +0 -0
  18. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/base_generator.py +0 -0
  19. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/benchmark.py +0 -0
  20. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/benchmarks/__init__.py +0 -0
  21. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/chunked_reproduction.py +0 -0
  22. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/code_review.py +0 -0
  23. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/config.py +0 -0
  24. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/core/__init__.py +0 -0
  25. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/dependency.py +0 -0
  26. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/errors.py +0 -0
  27. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/file_formats.py +0 -0
  28. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/formats/__init__.py +0 -0
  29. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/generators.py +0 -0
  30. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/gherkin.py +0 -0
  31. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/integrations/__init__.py +0 -0
  32. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/intent.py +0 -0
  33. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/llm/__init__.py +0 -0
  34. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/llm.py +0 -0
  35. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/llm_clients.py +0 -0
  36. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/llm_profiler.py +0 -0
  37. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/markdown_format.py +0 -0
  38. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/mcp_server.py +0 -0
  39. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/models.py +0 -0
  40. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/parsers.py +0 -0
  41. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/project_comparison.md +0 -0
  42. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/project_reproducer.py +0 -0
  43. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/prompts.py +0 -0
  44. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/py.typed +0 -0
  45. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/quality.py +0 -0
  46. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/refactor.py +0 -0
  47. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/reproducer.py +0 -0
  48. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/reproduction.py +0 -0
  49. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/schemas/__init__.py +0 -0
  50. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/schemas/json_schema.py +0 -0
  51. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/schemas/logicml_schema.py +0 -0
  52. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/schemas/markdown_schema.py +0 -0
  53. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/schemas/yaml_schema.py +0 -0
  54. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/shared_utils.py +0 -0
  55. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/similarity.py +0 -0
  56. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/terminal.py +0 -0
  57. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/tools/__init__.py +0 -0
  58. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/universal.py +0 -0
  59. {code2logic-1.0.41 → code2logic-1.0.43}/code2logic/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code2logic
3
- Version: 1.0.41
3
+ Version: 1.0.43
4
4
  Summary: Code2Logic - Source code to logical representation converter for LLM analysis, featuring Tree-sitter parsing, dependency graph analysis, and multi-language support.
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -18,7 +18,7 @@ Example:
18
18
  >>> print(output)
19
19
  """
20
20
 
21
- __version__ = "1.0.41"
21
+ __version__ = "1.0.43"
22
22
  __author__ = "Softreck"
23
23
  __email__ = "info@softreck.dev"
24
24
  __license__ = "MIT"
@@ -41,6 +41,7 @@ def generate_spec(project: ProjectInfo, fmt: str) -> str:
41
41
  no_repeat_name=True,
42
42
  no_repeat_details=True,
43
43
  include_does=True,
44
+ context="minimal",
44
45
  )
45
46
  if fmt == "csv":
46
47
  gen = CSVGenerator()
@@ -189,22 +190,66 @@ Name the test class Test<ClassName> or TestFunctions."""
189
190
 
190
191
  def get_token_reproduction_prompt(spec: str, fmt: str, file_name: str, language: str = "python") -> str:
191
192
  format_hints = {
192
- "json": "Parse the JSON structure and implement all classes and functions with exact signatures.",
193
+ "json": """Parse the JSON structure carefully:
194
+ - 'modules' array contains file-level info with 'classes' and 'functions'
195
+ - Each class has 'name', 'bases', 'methods' with full signatures
196
+ - Each function has 'name', 'params', 'returns', 'doc'
197
+ - Implement ALL classes with their methods and ALL standalone functions
198
+ - Use the 'doc' field to implement actual logic, not just stubs
199
+ CRITICAL: Match every class/function name and signature exactly.""",
193
200
  "json_compact": "Parse the compact JSON and implement all elements with exact signatures.",
194
- "yaml": "Parse the YAML structure and implement all classes and functions with exact signatures.",
195
- "gherkin": """Parse Gherkin/BDD scenarios and implement them as working code:
196
- - Each Feature maps to a class or module
197
- - Each Scenario maps to a function
198
- - Given/When/Then steps describe the logic flow
199
- - Implement actual logic, not just stubs
200
- Focus on the described behavior and implement it directly.""",
201
- "markdown": "Parse embedded Gherkin (behaviors) and YAML (structures). Implement all described classes and functions.",
202
- "logicml": """Parse LogicML and generate VALID code:
203
- - 'sig:' lines describe function signatures (translate to the target language)
204
- - 'type: re-export' means this module primarily re-exports symbols
205
- - 'attrs:' = instance attributes to set in constructor
201
+ "yaml": """Parse the YAML structure carefully:
202
+ - Top-level keys describe modules with classes and functions
203
+ - Each class has 'bases', 'properties', 'methods' with signatures
204
+ - Each function has params, return type, and docstring/intent
205
+ - Implement ALL classes, methods, and standalone functions
206
+ - Use intent/docstring to write actual logic, not placeholders
207
+ CRITICAL: Match every name and signature exactly as specified.""",
208
+ "gherkin": """Parse Gherkin/BDD specification and reconstruct the ORIGINAL source code:
209
+ - 'Feature:' = a class or module (use the name after Feature)
210
+ - 'Scenario:' = a function or method to implement
211
+ - 'Given' steps = setup / preconditions / imports needed
212
+ - 'When' steps = the core action / logic to implement
213
+ - 'Then' steps = expected outcomes / return values / assertions
214
+ - 'And' continues the previous step type
215
+ - '@tag' annotations may indicate decorators or categories
216
+
217
+ IMPORTANT RULES:
218
+ 1. Each Scenario becomes a real function with actual logic (NOT test code)
219
+ 2. Given/When/Then describe behavior, translate them to implementation
220
+ 3. Include all imports mentioned in Given steps
221
+ 4. Use type hints based on parameter descriptions
222
+ 5. Implement real logic based on When/Then steps, not just stubs
223
+ 6. If a Feature has multiple Scenarios, they are methods of the same class""",
224
+ "markdown": """Parse the Markdown specification to reconstruct source code:
225
+ - '## Module' or '### Class' headings define code structure
226
+ - Embedded YAML blocks describe attributes, methods, signatures
227
+ - Embedded Gherkin blocks describe behaviors to implement
228
+ - Code blocks show example usage or signatures
229
+ - Tables may list functions with their parameters and return types
230
+
231
+ IMPORTANT RULES:
232
+ 1. Extract class names, method signatures, and function signatures from headings and YAML
233
+ 2. Implement all listed methods with actual logic based on descriptions
234
+ 3. Include all imports mentioned anywhere in the document
235
+ 4. Use type hints from signatures or parameter descriptions
236
+ 5. Docstrings should come from the description text""",
237
+ "logicml": """Parse LogicML and generate VALID, complete code:
238
+ - 'module:' = file to generate
239
+ - 'sig:' lines = EXACT function signatures (translate to target language)
240
+ - 'does:' = function intent/docstring — use this to implement real logic
241
+ - 'type: re-export' = module primarily re-exports symbols from imports
242
+ - 'attrs:' = instance attributes to initialize in __init__/constructor
206
243
  - 'bases:' = parent classes to inherit from
207
- CRITICAL: Ensure valid syntax - balanced brackets, proper indentation, no undefined variables.""",
244
+ - 'decorators:' = decorators to apply
245
+ - 'calls:' = other functions this function calls (implement the call chain)
246
+ - 'raises:' = exceptions this function may raise
247
+
248
+ CRITICAL RULES:
249
+ 1. Translate EVERY 'sig:' line into a real function with actual logic
250
+ 2. Use 'does:' text to implement meaningful function bodies
251
+ 3. Ensure valid syntax - balanced brackets, proper indentation
252
+ 4. Include ALL imports listed in the module""",
208
253
  "toon": """Parse TOON (Token-Oriented Object Notation) format carefully:
209
254
 
210
255
  STRUCTURE:
@@ -222,19 +267,43 @@ DECORATORS:
222
267
  - 'decorators: @property' = add @property decorator
223
268
  - 'decorators: @staticmethod|@cache' = multiple decorators
224
269
 
225
- CRITICAL: Use imports[], function_docs, and exact signatures to reproduce code accurately.""",
226
- "csv": """Parse the CSV table where each row describes a code element:
227
- - Columns: path, type (class/method/function), name, signature, language, intent, category, domain, imports
228
- - 'method' rows belong to the class in the preceding 'class' row
229
- - Implement all elements with the exact signatures shown
230
- Generate complete code with all classes, methods, and functions.""",
231
- "function.toon": """Parse the function-logic TOON format:
232
- - 'modules[N]{path,lang,items}:' lists files
233
- - 'function_details:' contains per-module function listings
234
- - Each function has: line number, name, signature, description
235
- - 'ClassName.method_name' = method of that class
236
- - 'cc:N' after name = cyclomatic complexity
237
- Implement all listed functions with matching signatures and described behavior.""",
270
+ CRITICAL RULES:
271
+ 1. Use imports[] to generate all import statements
272
+ 2. Use function_docs to write real function bodies (not stubs)
273
+ 3. Match exact signatures from sig: fields
274
+ 4. Include ALL classes with their methods and ALL standalone functions
275
+ 5. Preserve async functions (marked with 'async: true')""",
276
+ "csv": """Parse the CSV table to reconstruct source code:
277
+ - Columns: path, type, name, signature, language, intent, category, domain, imports
278
+ - 'type=class' rows define classes (look at 'bases' if present)
279
+ - 'type=method' rows are methods of the preceding class
280
+ - 'type=function' rows are standalone functions
281
+ - 'signature' column has the exact function signature to use
282
+ - 'intent' column describes what the function does use it to implement real logic
283
+ - 'imports' column lists required imports
284
+
285
+ IMPORTANT RULES:
286
+ 1. Group methods under their parent class
287
+ 2. Include all imports from the 'imports' column
288
+ 3. Match signatures exactly as shown
289
+ 4. Use 'intent' to implement actual logic, not just stubs
290
+ 5. Add type hints based on signature information""",
291
+ "function.toon": """Parse the function-logic TOON format to reconstruct source code:
292
+ - 'modules[N]{path,lang,items}:' lists source files and their function count
293
+ - 'function_details:' contains per-module function listings as tables
294
+ - Table columns: line, name, sig[, does, decorators, calls, raises]
295
+ - 'ClassName.method_name' = this is a method of ClassName (create the class)
296
+ - '~function_name' = async function (add async keyword)
297
+ - 'cc:N' suffix on name = cyclomatic complexity hint (more complex logic needed)
298
+ - 'sig' column has exact signature: (params)->ReturnType
299
+
300
+ CRITICAL RULES:
301
+ 1. Create classes for any ClassName that appears as prefix in 'ClassName.method'
302
+ 2. Translate EVERY listed function into real code with actual logic
303
+ 3. Use 'does' column text to implement meaningful function bodies
304
+ 4. Match signatures EXACTLY from the 'sig' column
305
+ 5. Include imports needed for the types and calls referenced
306
+ 6. Preserve method grouping under their classes""",
238
307
  }
239
308
 
240
309
  # Language-specific guidance appended to prompt
@@ -248,7 +317,7 @@ Implement all listed functions with matching signatures and described behavior."
248
317
  "sql": "Use standard SQL: CREATE TABLE/VIEW/FUNCTION, proper column types, constraints.",
249
318
  }
250
319
 
251
- max_spec = 8000
320
+ max_spec = 12000
252
321
  spec_truncated = spec[:max_spec] if len(spec) > max_spec else spec
253
322
 
254
323
  language_norm = (language or "python").strip().lower()
@@ -267,15 +336,20 @@ Implement all listed functions with matching signatures and described behavior."
267
336
  lang_hint = lang_hints.get(language_norm, '')
268
337
  lang_hint_line = f"\n{lang_hint}" if lang_hint else ''
269
338
 
270
- prompt = f"""Generate {lang_label} code from this {fmt.upper()} specification.
339
+ prompt = f"""Generate complete {lang_label} source code from this {fmt.upper()} specification.
271
340
  {format_hints.get(fmt, '')}{lang_hint_line}
272
341
 
342
+ SPECIFICATION:
273
343
  {spec_truncated}
274
344
 
275
- Requirements:
276
- - Complete, working {lang_label} code for {file_name}
277
- - Include imports and type hints
278
- - Implement all functions with actual logic
345
+ REQUIREMENTS:
346
+ - Output complete, working {lang_label} code for {file_name}
347
+ - Include ALL imports at the top
348
+ - Implement ALL classes, methods, and functions listed in the specification
349
+ - Use type hints throughout
350
+ - Write real logic based on descriptions/intents, NOT placeholder stubs
351
+ - Match function signatures EXACTLY as specified
352
+ - Output ONLY the code, no explanations
279
353
 
280
354
  ```{language_norm}
281
355
  """
@@ -118,6 +118,7 @@ class BenchmarkResult:
118
118
  avg_similarity: float = 0.0
119
119
  syntax_ok_rate: float = 0.0
120
120
  runs_ok_rate: float = 0.0
121
+ failure_rate: float = 0.0
121
122
 
122
123
  # Best format (for format comparisons)
123
124
  best_format: str = ""
@@ -144,17 +145,18 @@ class BenchmarkResult:
144
145
 
145
146
  def calculate_aggregates(self):
146
147
  """Calculate aggregate metrics from detailed results."""
147
- # File results
148
+ # File results – include ALL scores (zeros count as failures)
148
149
  if self.file_results:
149
- scores = [r.score for r in self.file_results if r.score > 0]
150
- self.avg_score = sum(scores) / len(scores) if scores else 0
150
+ all_scores = [r.score for r in self.file_results]
151
+ self.avg_score = sum(all_scores) / len(all_scores) if all_scores else 0
152
+ self.failure_rate = sum(1 for s in all_scores if s == 0) / len(all_scores) * 100
151
153
  self.syntax_ok_rate = sum(1 for r in self.file_results if r.syntax_ok) / len(self.file_results) * 100
152
154
  self.runs_ok_rate = sum(1 for r in self.file_results if r.runs_ok) / len(self.file_results) * 100
153
155
 
154
- # Function results
156
+ # Function results – include ALL similarities
155
157
  if self.function_results:
156
- sims = [r.similarity for r in self.function_results if r.similarity > 0]
157
- self.avg_similarity = sum(sims) / len(sims) if sims else 0
158
+ all_sims = [r.similarity for r in self.function_results]
159
+ self.avg_similarity = sum(all_sims) / len(all_sims) if all_sims else 0
158
160
 
159
161
  # Format results
160
162
  if self.format_results:
@@ -187,10 +189,24 @@ class BenchmarkResult:
187
189
  """Load result from JSON file."""
188
190
  data = json.loads(Path(path).read_text())
189
191
  # Reconstruct nested objects
190
- file_results = [FileResult(**r) for r in data.pop('file_results', [])]
192
+ raw_file_results = data.pop('file_results', [])
193
+ file_results = []
194
+ for r in raw_file_results:
195
+ fmt_results_raw = r.pop('format_results', {})
196
+ fr = FileResult(**r)
197
+ fr.format_results = {
198
+ k: FormatResult(**v) if isinstance(v, dict) else v
199
+ for k, v in fmt_results_raw.items()
200
+ }
201
+ file_results.append(fr)
191
202
  function_results = [FunctionResult(**r) for r in data.pop('function_results', [])]
192
203
  format_results = [FormatResult(**r) for r in data.pop('format_results', [])]
193
204
 
205
+ # Remove unknown fields that may not be in the dataclass
206
+ import dataclasses
207
+ known_fields = {f.name for f in dataclasses.fields(cls)}
208
+ data = {k: v for k, v in data.items() if k in known_fields}
209
+
194
210
  result = cls(**data)
195
211
  result.file_results = file_results
196
212
  result.function_results = function_results
@@ -147,11 +147,15 @@ def _structural_score(original: str, generated: str, language: str) -> float:
147
147
  if not o:
148
148
  return 0.0
149
149
  keys = list(o.keys())
150
- matches = 0
150
+ total = 0.0
151
151
  for k in keys:
152
- if o.get(k, 0) == g.get(k, 0):
153
- matches += 1
154
- return matches / max(len(keys), 1) * 100
152
+ ov = o.get(k, 0)
153
+ gv = g.get(k, 0)
154
+ if ov == 0 and gv == 0:
155
+ total += 1.0
156
+ elif max(ov, gv) > 0:
157
+ total += min(ov, gv) / max(ov, gv)
158
+ return total / max(len(keys), 1) * 100
155
159
 
156
160
 
157
161
  def _extract_code(response: str) -> str:
@@ -159,8 +163,16 @@ def _extract_code(response: str) -> str:
159
163
  if not response:
160
164
  return ""
161
165
 
162
- # Try to find code block
163
- for marker in ['```python', '```py', '```']:
166
+ # Try to find code block — check language-specific markers first, then generic
167
+ markers = [
168
+ '```python', '```py',
169
+ '```javascript', '```js', '```typescript', '```ts',
170
+ '```go', '```rust', '```rs',
171
+ '```java', '```csharp', '```cs', '```c#',
172
+ '```sql',
173
+ '```',
174
+ ]
175
+ for marker in markers:
164
176
  if marker in response:
165
177
  start = response.find(marker) + len(marker)
166
178
  if start < len(response) and response[start] == '\n':
@@ -469,12 +481,12 @@ class {cls}:
469
481
 
470
482
  result.total_time = time.time() - start_time
471
483
 
472
- # Calculate format aggregates
484
+ # Calculate format aggregates – include ALL scores (zeros = failures)
473
485
  for fmt in formats:
474
486
  scores = [
475
487
  fr.format_results[fmt].score
476
488
  for fr in result.file_results
477
- if fmt in fr.format_results and fr.format_results[fmt].score > 0
489
+ if fmt in fr.format_results
478
490
  ]
479
491
  if scores:
480
492
  result.format_scores[fmt] = sum(scores) / len(scores)
@@ -762,24 +774,34 @@ class {cls}:
762
774
 
763
775
  result.original_code = '\n'.join(lines[start:end])
764
776
 
765
- # Create spec
777
+ # Create spec with richer context
778
+ calls_str = ', '.join(getattr(func, 'calls', []) or []) or 'None'
779
+ raises_str = ', '.join(getattr(func, 'raises', []) or []) or 'None'
780
+ cc = getattr(func, 'complexity', 1) or 1
766
781
  spec = f"""Function: {func.name}
767
782
  Language: {language}
768
783
  Signature: {func.name}({', '.join(func.params)}) -> {func.return_type or 'None'}
769
784
  Description: {func.intent or func.docstring or 'No description'}
770
785
  Is Async: {func.is_async}
771
786
  Decorators: {', '.join(func.decorators) if func.decorators else 'None'}
787
+ Calls: {calls_str}
788
+ Raises: {raises_str}
789
+ Complexity: {cc}
772
790
  Lines: {func.lines}
773
791
  """
774
792
 
775
- prompt = f"""Generate ONLY the function code based on this specification:
793
+ prompt = f"""Generate ONLY the complete function code based on this specification:
776
794
 
777
795
  {spec}
778
796
 
779
- Requirements:
780
- - Generate complete, working {language} function
781
- - Match the signature exactly
782
- - Output ONLY the function code
797
+ REQUIREMENTS:
798
+ - Generate a complete, working {language} function with REAL logic (not a stub)
799
+ - Match the signature EXACTLY: {func.name}({', '.join(func.params)}) -> {func.return_type or 'None'}
800
+ - Use the Description to implement actual behavior
801
+ - Include decorators if specified
802
+ - The function should be ~{func.lines} lines long
803
+ - Include proper error handling if Raises is specified
804
+ - Output ONLY the function code, no explanations
783
805
 
784
806
  ```{language}
785
807
  """
@@ -793,7 +815,7 @@ Requirements:
793
815
  result.gen_time = 0.0
794
816
  else:
795
817
  start_time = time.time()
796
- response = client.generate(prompt, max_tokens=2000)
818
+ response = client.generate(prompt, max_tokens=3000)
797
819
  result.gen_time = time.time() - start_time
798
820
  result.reproduced_code = _extract_code(response)
799
821
 
@@ -942,17 +964,23 @@ Requirements:
942
964
 
943
965
  result.total_time = time.time() - start_time
944
966
 
945
- # Calculate format aggregates
967
+ # Calculate format aggregates – include ALL scores (zeros = failures)
946
968
  for fmt in formats:
947
969
  scores = []
948
970
  for fr in result.file_results:
949
971
  if fmt in fr.format_results:
950
- score = fr.format_results[fmt].score
951
- if score > 0:
952
- scores.append(score)
972
+ scores.append(fr.format_results[fmt].score)
953
973
  if scores:
954
974
  result.format_scores[fmt] = sum(scores) / len(scores)
955
975
 
976
+ # Recalculate each file's score as average across all its formats
977
+ for fr in result.file_results:
978
+ if fr.format_results:
979
+ fmt_scores = [r.score for r in fr.format_results.values()]
980
+ fr.score = sum(fmt_scores) / len(fmt_scores)
981
+ fr.syntax_ok = all(r.syntax_ok for r in fr.format_results.values())
982
+ fr.runs_ok = any(r.runs_ok for r in fr.format_results.values())
983
+
956
984
  result.calculate_aggregates()
957
985
 
958
986
  return result
@@ -673,6 +673,13 @@ code2logic [path] [options]
673
673
  action='store_true',
674
674
  help='Include the does/intent column in function-logic TOON output. Without this flag, the does column is omitted to save tokens.'
675
675
  )
676
+ parser.add_argument(
677
+ '--function-logic-context',
678
+ choices=['none', 'minimal', 'full'],
679
+ default='none',
680
+ dest='function_logic_context',
681
+ help='Structural context in function-logic TOON: none (flat list), minimal (class headers with bases), full (classes + properties + imports). Default: none.'
682
+ )
676
683
  parser.add_argument(
677
684
  '--no-install',
678
685
  action='store_true',
@@ -971,11 +978,18 @@ code2logic [path] [options]
971
978
  # For TOON, --compact means ultra-compact format
972
979
  compact = args.compact if hasattr(args, 'compact') else False
973
980
  ultra_compact = args.ultra_compact if hasattr(args, 'ultra_compact') else False
981
+ use_hybrid = args.hybrid if hasattr(args, 'hybrid') else False
974
982
 
975
983
  # Use compact or ultra_compact flag (compact takes precedence for TOON)
976
984
  use_ultra_compact = ultra_compact or compact
977
985
 
978
- if use_ultra_compact:
986
+ if use_hybrid:
987
+ output = generator.generate_hybrid(
988
+ project,
989
+ detail='full',
990
+ no_repeat_name=args.no_repeat_module,
991
+ )
992
+ elif use_ultra_compact:
979
993
  output = generator.generate_ultra_compact(project)
980
994
  else:
981
995
  detail_map = {
@@ -1044,6 +1058,7 @@ code2logic [path] [options]
1044
1058
  no_repeat_name=args.no_repeat_module,
1045
1059
  no_repeat_details=args.no_repeat_details,
1046
1060
  include_does=args.does,
1061
+ context=getattr(args, 'function_logic_context', 'none') or 'none',
1047
1062
  )
1048
1063
  else:
1049
1064
  logic_out = logic_gen.generate(project, detail=args.detail)
@@ -66,7 +66,16 @@ class FunctionLogicGenerator:
66
66
  no_repeat_name: bool = False,
67
67
  no_repeat_details: bool = False,
68
68
  include_does: bool = False,
69
+ context: str = 'none',
69
70
  ) -> str:
71
+ """Generate function-logic in TOON format.
72
+
73
+ Args:
74
+ context: Structural context level:
75
+ 'none' - flat function list (original behavior)
76
+ 'minimal' - class headers (name, bases) before methods
77
+ 'full' - class headers + properties + module imports
78
+ """
70
79
  if detail == 'detailed':
71
80
  detail = 'full'
72
81
  toon = TOONGenerator()
@@ -81,8 +90,11 @@ class FunctionLogicGenerator:
81
90
  lines: List[str] = []
82
91
 
83
92
  # Format header — helps LLM understand the structure
84
- lines.append(f"# {project.name} function-logic | {len(modules_with_items)} modules")
93
+ ctx_label = f" | context:{context}" if context != 'none' else ""
94
+ lines.append(f"# {project.name} function-logic | {len(modules_with_items)} modules{ctx_label}")
85
95
  lines.append("# Convention: name with . = method, ~name = async, cc:N shown only when >1")
96
+ if context != 'none':
97
+ lines.append("# CLASS: header before methods gives structural context (bases, props)")
86
98
 
87
99
  lines.append(f"project: {toon._quote(project.name)}")
88
100
  if getattr(project, 'generated_at', None):
@@ -110,6 +122,25 @@ class FunctionLogicGenerator:
110
122
  details_key = m.path
111
123
  lines.append(f" {toon._quote(details_key)}:")
112
124
 
125
+ # Emit module imports for 'full' context
126
+ if context == 'full' and getattr(m, 'imports', None):
127
+ imports = [i for i in m.imports if i][:20]
128
+ if imports:
129
+ lines.append(f" imports[{len(imports)}]: {','.join(imports)}")
130
+
131
+ # Emit class context headers before function table
132
+ if context != 'none':
133
+ classes = getattr(m, 'classes', []) or []
134
+ if classes:
135
+ for cls in classes:
136
+ bases = ','.join(getattr(cls, 'bases', []) or []) or '-'
137
+ cls_line = f" CLASS {toon._quote(cls.name)}({bases})"
138
+ if context == 'full':
139
+ props = getattr(cls, 'properties', []) or []
140
+ if props:
141
+ cls_line += f" props:[{','.join(props[:15])}]"
142
+ lines.append(cls_line)
143
+
113
144
  header = f"line{dm}name{dm}sig"
114
145
  if include_does and detail in ('standard', 'full'):
115
146
  header += f"{dm}does"
@@ -65,7 +65,7 @@ class LogicMLGenerator:
65
65
  """
66
66
 
67
67
  FORMAT_NAME: str = "logicml"
68
- FILE_EXTENSION: str = ".logicml"
68
+ FILE_EXTENSION: str = ".logicml.yaml"
69
69
  TOKEN_EFFICIENCY: float = 1.4 # 40% better than YAML
70
70
  REPRODUCTION_FIDELITY: float = 0.97
71
71
 
@@ -80,14 +80,22 @@ class LogicMLGenerator:
80
80
  def __init__(self, verbose: bool = False) -> None:
81
81
  self.verbose = verbose
82
82
 
83
- def generate(self, project: ProjectInfo, detail: str = 'standard') -> LogicMLSpec:
84
- """Generate LogicML specification for a project."""
83
+ def generate(self, project: ProjectInfo, detail: str = 'standard', level: str = 'typed') -> LogicMLSpec:
84
+ """Generate LogicML specification for a project.
85
+
86
+ Args:
87
+ detail: Content detail ('minimal', 'standard', 'full')
88
+ level: Signature richness level:
89
+ 'compact' - short params (6 max), minimal types
90
+ 'typed' - full params with types (10 max), return types always shown
91
+ 'full' - typed + calls/raises always shown
92
+ """
85
93
  parts: List[str] = []
86
94
  total_classes = 0
87
95
  total_functions = 0
88
96
 
89
97
  for module in project.modules:
90
- module_spec = self._generate_module(module, detail)
98
+ module_spec = self._generate_module(module, detail, level)
91
99
  if module_spec.strip():
92
100
  parts.append(module_spec)
93
101
  total_classes += len(module.classes)
@@ -104,7 +112,7 @@ class LogicMLGenerator:
104
112
  function_count=total_functions,
105
113
  )
106
114
 
107
- def _generate_module(self, module: ModuleInfo, detail: str) -> str:
115
+ def _generate_module(self, module: ModuleInfo, detail: str, level: str = 'typed') -> str:
108
116
  """Generate LogicML for a single module."""
109
117
  lines: List[str] = []
110
118
  path = Path(module.path)
@@ -158,12 +166,12 @@ class LogicMLGenerator:
158
166
 
159
167
  # Classes
160
168
  for cls in module.classes:
161
- class_yaml = self._generate_class(cls, detail)
169
+ class_yaml = self._generate_class(cls, detail, level)
162
170
  lines.append(class_yaml)
163
171
 
164
172
  # Top-level functions
165
173
  if module.functions:
166
- funcs_yaml = self._generate_functions(module.functions, detail)
174
+ funcs_yaml = self._generate_functions(module.functions, detail, level)
167
175
  lines.append(funcs_yaml)
168
176
 
169
177
  return '\n'.join(lines)
@@ -196,7 +204,7 @@ class LogicMLGenerator:
196
204
 
197
205
  return '\n'.join(lines) if len(lines) > 1 else ''
198
206
 
199
- def _generate_class(self, cls: ClassInfo, detail: str) -> str:
207
+ def _generate_class(self, cls: ClassInfo, detail: str, level: str = 'typed') -> str:
200
208
  """Generate LogicML for a class."""
201
209
  lines: List[str] = [f'\n{cls.name}:']
202
210
 
@@ -245,22 +253,27 @@ class LogicMLGenerator:
245
253
  if cls.methods:
246
254
  lines.append(' methods:')
247
255
  for method in cls.methods[:20]:
248
- method_yaml = self._generate_method(method, detail, indent=4)
256
+ method_yaml = self._generate_method(method, detail, level, indent=4)
249
257
  lines.append(method_yaml)
250
258
 
251
259
  return '\n'.join(lines)
252
260
 
253
- def _generate_method(self, method: FunctionInfo, detail: str, indent: int = 2) -> str:
254
- """Generate LogicML for a method."""
261
+ def _generate_method(self, method: FunctionInfo, detail: str, level: str = 'typed', indent: int = 2) -> str:
262
+ """Generate LogicML for a method.
263
+
264
+ Args:
265
+ level: 'compact' (6 params), 'typed' (10 params, full types), 'full' (typed + calls/raises)
266
+ """
255
267
  prefix = ' ' * indent
256
268
  lines: List[str] = [f'{prefix}{method.name}:']
257
269
 
258
270
  # Check for property decorator
259
271
  is_property = 'property' in method.decorators
260
272
 
261
- # Signature - remove self/cls for compactness
262
- clean_params = remove_self_from_params(method.params[:7])
263
- params = ', '.join(clean_params[:6])
273
+ # Signature - param count depends on level
274
+ max_params = 6 if level == 'compact' else 10
275
+ clean_params = remove_self_from_params(method.params[:max_params + 1])
276
+ params = ', '.join(clean_params[:max_params])
264
277
  ret = method.return_type or 'None'
265
278
 
266
279
  sig = f'({params}) -> {ret}'
@@ -271,19 +284,31 @@ class LogicMLGenerator:
271
284
 
272
285
  lines.append(f'{prefix} sig: {sig}')
273
286
 
274
- # Intent/docstring as "does" - truncated for efficiency
287
+ # Intent/docstring as "does" - longer for typed/full levels
288
+ does_max = 80 if level in ('typed', 'full') else 60
275
289
  if method.docstring:
276
- does = truncate_docstring(method.docstring, max_length=60)
290
+ does = truncate_docstring(method.docstring, max_length=does_max)
277
291
  if does:
278
292
  lines.append(f'{prefix} does: "{does}"')
279
293
  elif method.intent:
280
- intent = method.intent[:60].replace('\n', ' ').replace('"', "'")
294
+ intent = method.intent[:does_max].replace('\n', ' ').replace('"', "'")
281
295
  lines.append(f'{prefix} does: "{intent}"')
282
296
 
283
297
  # Edge cases (from raises)
284
298
  if method.raises and detail in ('standard', 'full'):
285
299
  for exc in method.raises[:2]:
286
300
  lines.append(f'{prefix} edge: "error → raise {exc}"')
301
+ # In 'full' level, also emit raises as list for LLM reconstruction
302
+ if level == 'full':
303
+ raises_str = ", ".join(method.raises[:5])
304
+ lines.append(f'{prefix} raises: [{raises_str}]')
305
+
306
+ # Calls (only in 'full' level or detail='full')
307
+ if level == 'full' and getattr(method, 'calls', None):
308
+ calls = (method.calls or [])[:10]
309
+ if calls:
310
+ calls_str = ", ".join(calls)
311
+ lines.append(f'{prefix} calls: [{calls_str}]')
287
312
 
288
313
  # Side effects
289
314
  side_effects = self._detect_side_effects(method)
@@ -298,12 +323,12 @@ class LogicMLGenerator:
298
323
 
299
324
  return '\n'.join(lines)
300
325
 
301
- def _generate_functions(self, functions: List[FunctionInfo], detail: str) -> str:
326
+ def _generate_functions(self, functions: List[FunctionInfo], detail: str, level: str = 'typed') -> str:
302
327
  """Generate LogicML for top-level functions."""
303
328
  lines: List[str] = ['\nfunctions:']
304
329
 
305
330
  for func in functions[:20]:
306
- func_yaml = self._generate_method(func, detail, indent=2)
331
+ func_yaml = self._generate_method(func, detail, level, indent=2)
307
332
  lines.append(func_yaml)
308
333
 
309
334
  return '\n'.join(lines)
@@ -303,23 +303,11 @@ class ReproductionMetrics:
303
303
  return (dot_product / (magnitude1 * magnitude2)) * 100
304
304
 
305
305
  def _compute_structural_metrics(self, original: str, generated: str) -> StructuralMetrics:
306
- """Compute structural metrics."""
306
+ """Compute structural metrics using AST when possible, regex as fallback."""
307
307
  metrics = StructuralMetrics()
308
308
 
309
- # Count elements
310
- def count_elements(code: str) -> Dict[str, int]:
311
- return {
312
- 'classes': len(re.findall(r'^class\s+\w+', code, re.MULTILINE)),
313
- 'functions': len(re.findall(r'^(?:async\s+)?def\s+\w+', code, re.MULTILINE)),
314
- 'methods': len(re.findall(r'^\s+(?:async\s+)?def\s+\w+', code, re.MULTILINE)),
315
- 'imports': len(re.findall(r'^(?:from|import)\s+', code, re.MULTILINE)),
316
- # Capture both annotated attributes and simple assignments.
317
- # This is still heuristic, but avoids undercounting common code.
318
- 'attributes': len(re.findall(r'^\s+\w+\s*(?::\s*[^=\n]+)?\s*=', code, re.MULTILINE)),
319
- }
320
-
321
- orig = count_elements(original)
322
- gen = count_elements(generated)
309
+ orig = self._count_elements_ast(original)
310
+ gen = self._count_elements_ast(generated)
323
311
 
324
312
  metrics.classes_original = orig['classes']
325
313
  metrics.classes_generated = gen['classes']
@@ -341,15 +329,15 @@ class ReproductionMetrics:
341
329
  metrics.attributes_generated = gen['attributes']
342
330
  metrics.attributes_match = orig['attributes'] == gen['attributes']
343
331
 
344
- # Structural score
345
- matches = sum([
346
- metrics.classes_match,
347
- metrics.functions_match,
348
- metrics.methods_match,
349
- metrics.imports_match,
350
- metrics.attributes_match,
351
- ])
352
- metrics.structural_score = (matches / 5) * 100
332
+ # Ratio-based structural score (partial credit instead of binary)
333
+ total = 0.0
334
+ for key in ('classes', 'functions', 'methods', 'imports', 'attributes'):
335
+ ov, gv = orig[key], gen[key]
336
+ if ov == 0 and gv == 0:
337
+ total += 1.0
338
+ elif max(ov, gv) > 0:
339
+ total += min(ov, gv) / max(ov, gv)
340
+ metrics.structural_score = (total / 5) * 100
353
341
 
354
342
  # Element coverage
355
343
  total_orig = sum(orig.values())
@@ -359,6 +347,59 @@ class ReproductionMetrics:
359
347
 
360
348
  return metrics
361
349
 
350
+ @staticmethod
351
+ def _count_elements_ast(code: str) -> Dict[str, int]:
352
+ """Count structural elements using Python AST, with regex fallback."""
353
+ import ast as _ast
354
+
355
+ try:
356
+ tree = _ast.parse(code)
357
+ except SyntaxError:
358
+ # Fallback to regex for unparseable code
359
+ return {
360
+ 'classes': len(re.findall(r'^class\s+\w+', code, re.MULTILINE)),
361
+ 'functions': len(re.findall(r'^(?:async\s+)?def\s+\w+', code, re.MULTILINE)),
362
+ 'methods': len(re.findall(r'^\s+(?:async\s+)?def\s+\w+', code, re.MULTILINE)),
363
+ 'imports': len(re.findall(r'^(?:from|import)\s+', code, re.MULTILINE)),
364
+ 'attributes': len(re.findall(r'^\s+\w+\s*(?::\s*[^=\n]+)?\s*=', code, re.MULTILINE)),
365
+ }
366
+
367
+ classes = 0
368
+ functions = 0
369
+ methods = 0
370
+ imports = 0
371
+ attributes = 0
372
+
373
+ for node in _ast.walk(tree):
374
+ if isinstance(node, _ast.ClassDef):
375
+ classes += 1
376
+ # Count methods inside classes
377
+ for item in node.body:
378
+ if isinstance(item, (_ast.FunctionDef, _ast.AsyncFunctionDef)):
379
+ methods += 1
380
+ # Count class-level attributes (annotated or assigned)
381
+ elif isinstance(item, (_ast.Assign, _ast.AnnAssign)):
382
+ attributes += 1
383
+ elif isinstance(node, (_ast.FunctionDef, _ast.AsyncFunctionDef)):
384
+ # Only count as top-level function if not inside a class
385
+ # (methods already counted above)
386
+ pass
387
+ elif isinstance(node, (_ast.Import, _ast.ImportFrom)):
388
+ imports += 1
389
+
390
+ # Count top-level functions (not methods)
391
+ for node in _ast.iter_child_nodes(tree):
392
+ if isinstance(node, (_ast.FunctionDef, _ast.AsyncFunctionDef)):
393
+ functions += 1
394
+
395
+ return {
396
+ 'classes': classes,
397
+ 'functions': functions,
398
+ 'methods': methods,
399
+ 'imports': imports,
400
+ 'attributes': attributes,
401
+ }
402
+
362
403
  def _compute_semantic_metrics(self, original: str, generated: str) -> SemanticMetrics:
363
404
  """Compute semantic preservation metrics."""
364
405
  metrics = SemanticMetrics()
@@ -132,6 +132,92 @@ class TOONGenerator:
132
132
 
133
133
  return '\n'.join(lines)
134
134
 
135
+ def generate_hybrid(
136
+ self,
137
+ project: ProjectInfo,
138
+ detail: str = 'full',
139
+ no_repeat_name: bool = True,
140
+ hub_top_n: int = 5,
141
+ hub_functions_detail: str = 'full',
142
+ ) -> str:
143
+ """Generate TOON-Hybrid: project structure + function-logic for hub modules.
144
+
145
+ Combines project-level TOON (classes, imports, structure) with
146
+ selective function-logic details for the most important modules.
147
+
148
+ Args:
149
+ project: Analyzed project info
150
+ detail: Detail level for project structure
151
+ no_repeat_name: Compress repeated directory prefixes
152
+ hub_top_n: Number of top modules to include function details for
153
+ hub_functions_detail: Detail level for function-logic ('standard', 'full')
154
+
155
+ Returns:
156
+ Hybrid TOON string
157
+ """
158
+ from .function_logic import FunctionLogicGenerator
159
+ from .shared_utils import remove_self_from_params
160
+
161
+ # Generate base project TOON
162
+ base = self.generate(project, detail=detail, no_repeat_name=no_repeat_name)
163
+
164
+ # Identify hub modules: use dependency_metrics if available, otherwise sort by function count
165
+ hub_paths: set = set()
166
+ dep_metrics = getattr(project, 'dependency_metrics', {}) or {}
167
+ if dep_metrics:
168
+ ranked = sorted(dep_metrics.items(), key=lambda x: getattr(x[1], 'pagerank', 0), reverse=True)
169
+ hub_paths = {path for path, node in ranked[:hub_top_n]}
170
+ else:
171
+ # Fallback: rank by total functions + methods
172
+ def _item_count(m):
173
+ return len(getattr(m, 'functions', []) or []) + sum(
174
+ len(getattr(c, 'methods', []) or []) for c in (getattr(m, 'classes', []) or [])
175
+ )
176
+ ranked_modules = sorted(project.modules, key=_item_count, reverse=True)
177
+ hub_paths = {m.path for m in ranked_modules[:hub_top_n]}
178
+
179
+ if not hub_paths:
180
+ return base
181
+
182
+ # Generate function-logic section for hub modules only
183
+ hub_modules = [m for m in project.modules if m.path in hub_paths]
184
+ if not hub_modules:
185
+ return base
186
+
187
+ logic_gen = FunctionLogicGenerator()
188
+ lines = [base, "", "# === Hub Module Function Details ==="]
189
+
190
+ for m in hub_modules:
191
+ items = logic_gen._module_items(m)
192
+ if not items:
193
+ continue
194
+ lines.append(f" {self._quote(m.path)}:")
195
+
196
+ # Emit class context
197
+ classes = getattr(m, 'classes', []) or []
198
+ for cls in classes:
199
+ bases = ','.join(getattr(cls, 'bases', []) or []) or '-'
200
+ lines.append(f" CLASS {self._quote(cls.name)}({bases})")
201
+
202
+ # Emit function table
203
+ header = f"line{self.delim_marker}name{self.delim_marker}sig{self.delim_marker}does"
204
+ lines.append(f" functions[{len(items)}]{{{header}}}:")
205
+
206
+ for kind, qname, func in items:
207
+ sig = logic_gen._build_sig(func, include_async_prefix=False, language=m.language)
208
+ start_line = str(getattr(func, 'start_line', 0) or 0)
209
+ display_name = qname
210
+ if getattr(func, 'is_async', False):
211
+ display_name = f"~{qname}"
212
+ cc = getattr(func, 'complexity', 1) or 1
213
+ if cc > 1:
214
+ display_name = f"{display_name} cc:{cc}"
215
+ does = logic_gen._build_does(func)
216
+ row = [start_line, self._quote(display_name), self._quote(sig), self._quote(does)]
217
+ lines.append(f" {self.delimiter.join(row)}")
218
+
219
+ return '\n'.join(lines)
220
+
135
221
  def _generate_modules(self, modules: List[ModuleInfo], detail: str, no_repeat_name: bool = False) -> List[str]:
136
222
  """Generate modules section."""
137
223
  lines = []
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "code2logic"
7
- version = "1.0.41"
7
+ version = "1.0.43"
8
8
  description = "Code2Logic - Source code to logical representation converter for LLM analysis, featuring Tree-sitter parsing, dependency graph analysis, and multi-language support."
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
File without changes
File without changes