PyPI - code2logic - Versions diffs - 1.0.1__tar.gz → 1.0.2__tar.gz - Mend

code2logic 1.0.1tar.gz → 1.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

{code2logic-1.0.1 → code2logic-1.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: code2logic
-Version: 1.0.1
+Version: 1.0.2
 Summary: Convert source code to logical representation for LLM analysis
 Project-URL: Homepage, https://github.com/softreck/code2logic
 Project-URL: Documentation, https://code2logic.readthedocs.io
@@ -31,6 +31,7 @@ Requires-Python: >=3.9
 Provides-Extra: dev
 Requires-Dist: black>=23.0; extra == 'dev'
 Requires-Dist: build>=1.0.0; extra == 'dev'
+Requires-Dist: bumpver>=2023.1129; extra == 'dev'
 Requires-Dist: mypy>=1.0; extra == 'dev'
 Requires-Dist: pre-commit>=3.0; extra == 'dev'
 Requires-Dist: pytest-cov>=4.0; extra == 'dev'
@@ -351,7 +352,20 @@ MIT License - see [LICENSE](LICENSE) for details.
 ## 📚 Documentation
-- [API Documentation](DOCS.md) - Complete API reference
+- [Docs Index](docs/index.md) - Documentation home (start here)
+- [Getting Started](docs/getting-started.md) - Install and first steps
+- [Configuration](docs/configuration.md) - API keys, environment setup
+- [CLI Reference](docs/cli-reference.md) - Command-line usage
+- [Python API](docs/python-api.md) - Programmatic usage
+- [Output Formats](docs/output-formats.md) - Format comparison and usage
+- [Benchmarking](docs/benchmark.md) - Benchmark methodology and results
+- [Repeatability](docs/repeatability.md) - Repeatability testing
+- [LLM Integration](docs/llm-integration.md) - OpenRouter/Ollama/LiteLLM
+- [LLM Comparison Report](docs/llm-comparison-report.md) - Provider/model comparison
+- [Architecture](docs/architecture.md) - System design and components
+- [Examples](docs/examples.md) - Usage workflows and examples
+- [Format Analysis](docs/FORMAT_ANALYSIS.md) - Deeper format evaluation
+- [API Documentation (legacy)](DOCS.md) - Repo-level API reference
 - [Refactoring Plan](TODO.md) - Development roadmap
 ## 🔗 Links

{code2logic-1.0.1 → code2logic-1.0.2}/README.md RENAMED Viewed

@@ -283,7 +283,20 @@ MIT License - see [LICENSE](LICENSE) for details.
 ## 📚 Documentation
-- [API Documentation](DOCS.md) - Complete API reference
+- [Docs Index](docs/index.md) - Documentation home (start here)
+- [Getting Started](docs/getting-started.md) - Install and first steps
+- [Configuration](docs/configuration.md) - API keys, environment setup
+- [CLI Reference](docs/cli-reference.md) - Command-line usage
+- [Python API](docs/python-api.md) - Programmatic usage
+- [Output Formats](docs/output-formats.md) - Format comparison and usage
+- [Benchmarking](docs/benchmark.md) - Benchmark methodology and results
+- [Repeatability](docs/repeatability.md) - Repeatability testing
+- [LLM Integration](docs/llm-integration.md) - OpenRouter/Ollama/LiteLLM
+- [LLM Comparison Report](docs/llm-comparison-report.md) - Provider/model comparison
+- [Architecture](docs/architecture.md) - System design and components
+- [Examples](docs/examples.md) - Usage workflows and examples
+- [Format Analysis](docs/FORMAT_ANALYSIS.md) - Deeper format evaluation
+- [API Documentation (legacy)](DOCS.md) - Repo-level API reference
 - [Refactoring Plan](TODO.md) - Development roadmap
 ## 🔗 Links

{code2logic-1.0.1 → code2logic-1.0.2}/code2logic/__init__.py RENAMED Viewed

@@ -18,7 +18,7 @@ Example:
     >>> print(output)
 """
-__version__ = "1.0.1"
+__version__ = "1.0.2"
 __author__ = "Softreck"
 __email__ = "info@softreck.dev"
 __license__ = "MIT"
@@ -146,6 +146,24 @@ from .prompts import (
     get_review_prompt,
     get_fix_prompt,
 )
+from .schemas import (
+    validate_yaml,
+    validate_logicml,
+    validate_markdown,
+    validate_json,
+    YAMLSchema,
+    LogicMLSchema,
+    MarkdownSchema,
+    JSONSchema,
+)
+from .quality import (
+    QualityAnalyzer,
+    QualityReport,
+    QualityIssue,
+    analyze_quality,
+    get_quality_summary,
+)
+from .similarity import get_refactoring_suggestions
 from .chunked_reproduction import (
     ChunkedReproducer,
     ChunkedResult,

code2logic-1.0.2/code2logic/benchmarks/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+from .common import (
+    create_single_project,
+    generate_spec,
+    generate_spec_token,
+    get_async_reproduction_prompt,
+    get_token_reproduction_prompt,
+    get_simple_reproduction_prompt,
+)

code2logic-1.0.2/code2logic/benchmarks/common.py ADDED Viewed

@@ -0,0 +1,236 @@
+from __future__ import annotations
+from datetime import datetime
+from pathlib import Path
+import json
+from ..gherkin import GherkinGenerator
+from ..generators import JSONGenerator, YAMLGenerator
+from ..logicml import LogicMLGenerator
+from ..markdown_format import MarkdownHybridGenerator
+from ..models import ProjectInfo
+def create_single_project(module_info, file_path: Path) -> ProjectInfo:
+    return ProjectInfo(
+        name=file_path.name,
+        root_path=str(file_path.parent),
+        languages={"python": 1},
+        modules=[module_info],
+        dependency_graph={},
+        dependency_metrics={},
+        entrypoints=[],
+        similar_functions={},
+        total_files=1,
+        total_lines=module_info.lines_total,
+        generated_at=datetime.now().isoformat(),
+    )
+def generate_spec(project: ProjectInfo, fmt: str) -> str:
+    if fmt == "gherkin":
+        gen = GherkinGenerator()
+        return gen.generate(project)
+    if fmt == "yaml":
+        gen = YAMLGenerator()
+        return gen.generate(project, detail="full")
+    if fmt == "markdown":
+        gen = MarkdownHybridGenerator()
+        spec = gen.generate(project)
+        return spec.content
+    if fmt == "json":
+        gen = JSONGenerator()
+        return gen.generate(project, detail="full")
+    if fmt == "logicml":
+        gen = LogicMLGenerator()
+        spec = gen.generate(project)
+        return spec.content
+    return ""
+def _generate_token_json(project: ProjectInfo) -> str:
+    """Generate compact, token-friendly JSON spec (used by examples/11_token_benchmark.py)."""
+    data = {
+        "project": project.name,
+        "files": project.total_files,
+        "lines": project.total_lines,
+        "modules": [],
+    }
+    for m in project.modules:
+        module: dict = {
+            "path": m.path,
+            "language": m.language,
+            "imports": m.imports[:10],
+            "exports": m.exports[:10],
+        }
+        if m.classes:
+            module["classes"] = []
+            for c in m.classes[:20]:
+                cls = {
+                    "name": c.name,
+                    "bases": c.bases,
+                    "doc": (c.docstring[:80] if c.docstring else ""),
+                    "properties": c.properties[:15],
+                    "methods": [
+                        {
+                            "name": method.name,
+                            "params": method.params[:5],
+                            "returns": method.return_type or "None",
+                            "doc": (method.intent[:50] if method.intent else ""),
+                            "async": method.is_async,
+                        }
+                        for method in c.methods[:15]
+                    ],
+                }
+                module["classes"].append(cls)
+        if m.functions:
+            module["functions"] = [
+                {
+                    "name": f.name,
+                    "params": f.params[:6],
+                    "returns": f.return_type or "None",
+                    "doc": (f.intent[:60] if f.intent else ""),
+                    "async": f.is_async,
+                    "lines": f.lines,
+                }
+                for f in m.functions[:20]
+            ]
+        data["modules"].append(module)
+    return json.dumps(data, indent=2)
+def _generate_token_json_compact(project: ProjectInfo) -> str:
+    data = json.loads(_generate_token_json(project))
+    return json.dumps(data, separators=(",", ":"))
+def generate_spec_token(project: ProjectInfo, fmt: str) -> str:
+    """Generate spec optimized for token benchmark (keeps historical behavior).
+    Notes:
+    - json/json_compact use the token-friendly JSON representation.
+    - other formats delegate to generate_spec.
+    """
+    if fmt == "json":
+        return _generate_token_json(project)
+    if fmt == "json_compact":
+        return _generate_token_json_compact(project)
+    return generate_spec(project, fmt)
+def get_async_reproduction_prompt(spec: str, fmt: str, file_name: str, with_tests: bool = False) -> str:
+    base_prompts = {
+        "gherkin": f"""Generate Python code from this Gherkin/BDD specification.
+Implement all scenarios as working, production-ready code.
+{spec[:6000]}
+Requirements:
+- Generate complete, working Python code for {file_name}
+- Include all imports
+- Use type hints
+- Add docstrings""",
+        "yaml": f"""Generate Python code from this YAML specification.
+Match the structure exactly with all classes and functions.
+{spec[:6000]}
+Requirements:
+- Generate complete, working Python code for {file_name}
+- Include all imports
+- Use type hints
+- Implement all methods with actual logic""",
+        "markdown": f"""Generate Python code from this Markdown specification.
+It contains embedded Gherkin (behaviors) and YAML (structures).
+{spec[:6000]}
+Requirements:
+- Generate complete, working Python code for {file_name}
+- Include all imports
+- Implement all classes and functions
+- Use type hints throughout""",
+    }
+    prompt = base_prompts.get(fmt, base_prompts["yaml"])
+    if with_tests:
+        prompt += """
+IMPORTANT: Also generate a unittest test class at the end of the file.
+Include tests for each function/method with at least 2 test cases each.
+Use unittest.TestCase as base class.
+Name the test class Test<ClassName> or TestFunctions."""
+    return prompt
+def get_token_reproduction_prompt(spec: str, fmt: str, file_name: str) -> str:
+    format_hints = {
+        "json": "Parse the JSON structure and implement all classes and functions.",
+        "json_compact": "Parse the compact JSON and implement all elements.",
+        "yaml": "Parse the YAML structure and implement all classes and functions with exact signatures.",
+        "gherkin": "Implement scenarios as SIMPLE, MINIMAL Python code. NO extra error classes, NO over-engineering. Keep code short and direct.",
+        "markdown": "Parse embedded Gherkin (behaviors) and YAML (structures).",
+        "logicml": """Parse LogicML and generate VALID Python code:
+- 'sig: (params) -> Type' = def func(params) -> Type
+- 'sig: async (params)' = async def func(params)
+- 'sig: @property (self)' = @property decorator
+- 'bases: [BaseModel]' = class X(BaseModel) with Field()
+- 'type: re-export' = from .module import X
+CRITICAL: Ensure valid syntax - balanced brackets, proper indentation, no undefined variables.""",
+    }
+    max_spec = 5000
+    spec_truncated = spec[:max_spec] if len(spec) > max_spec else spec
+    prompt = f"""Generate Python code from this {fmt.upper()} specification.
+{format_hints.get(fmt, '')}
+{spec_truncated}
+Requirements:
+- Complete, working Python code for {file_name}
+- Include imports and type hints
+- Implement all functions with actual logic
+```python
+"""
+    return prompt
+def get_simple_reproduction_prompt(spec: str, fmt: str, file_name: str) -> str:
+    prompts = {
+        "gherkin": f"""Generate Python code from this Gherkin/BDD specification.
+Implement all scenarios as working code.
+{spec[:5000]}
+Generate complete Python code for {file_name}:""",
+        "yaml": f"""Generate Python code from this YAML specification.
+Match the structure exactly.
+{spec[:5000]}
+Generate complete Python code for {file_name}:""",
+        "markdown": f"""Generate Python code from this Markdown specification.
+It contains embedded Gherkin and YAML sections.
+{spec[:5000]}
+Generate complete Python code for {file_name}:""",
+        "logicml": f"""Generate Python code from this LogicML specification.
+'sig:' = EXACT function signature, 'does:' = docstring, 'attrs:' = class attributes.
+Match signatures EXACTLY.
+{spec[:5000]}
+Generate complete Python code for {file_name}:""",
+    }
+    return prompts.get(fmt, prompts["yaml"])

{code2logic-1.0.1 → code2logic-1.0.2}/code2logic/chunked_reproduction.py RENAMED Viewed

@@ -17,6 +17,7 @@ from typing import Dict, List, Optional, Tuple
 from pathlib import Path
 from .models import ProjectInfo, ModuleInfo, FunctionInfo, ClassInfo
+from .utils import estimate_tokens
 # LLM context limits (approximate)
@@ -67,11 +68,6 @@ class ChunkedResult:
     errors: List[str]
-def estimate_tokens(text: str) -> int:
-    """Estimate token count."""
-    return len(text) // 4
 def get_llm_limit(model_name: str) -> int:
     """Get context limit for LLM model."""
     model_lower = model_name.lower()

{code2logic-1.0.1 → code2logic-1.0.2}/code2logic/generators.py RENAMED Viewed

@@ -373,6 +373,22 @@ class JSONGenerator:
         if flat:
             return self._generate_flat(project, detail)
         return self._generate_nested(project, detail)
+    def generate_from_module(self, module: ModuleInfo, detail: str = 'full') -> str:
+        project = ProjectInfo(
+            name=Path(module.path).name,
+            root_path=str(Path(module.path).parent),
+            languages={module.language: 1},
+            modules=[module],
+            dependency_graph={},
+            dependency_metrics={},
+            entrypoints=[],
+            similar_functions={},
+            total_files=1,
+            total_lines=module.lines_total,
+            generated_at="",
+        )
+        return self.generate(project, flat=False, detail=detail)
     def _generate_nested(self, project: ProjectInfo, detail: str) -> str:
         """Generate nested JSON structure."""
@@ -584,6 +600,22 @@ class YAMLGenerator:
         return yaml.dump(data, default_flow_style=False, allow_unicode=True,
                         sort_keys=False, width=120)
+    def generate_from_module(self, module: ModuleInfo, detail: str = 'full') -> str:
+        project = ProjectInfo(
+            name=Path(module.path).name,
+            root_path=str(Path(module.path).parent),
+            languages={module.language: 1},
+            modules=[module],
+            dependency_graph={},
+            dependency_metrics={},
+            entrypoints=[],
+            similar_functions={},
+            total_files=1,
+            total_lines=module.lines_total,
+            generated_at="",
+        )
+        return self.generate(project, flat=False, detail=detail)
     def _build_flat_data(self, project: ProjectInfo, detail: str) -> dict:
         """Build flat data structure optimized for comparisons."""
@@ -742,12 +774,17 @@ class YAMLGenerator:
     def _function_to_dict(self, f: FunctionInfo, detail: str) -> dict:
         """Convert function to dict for nested output."""
+        # Clean function name (remove any newlines or special chars)
+        name = f.name.replace('\n', '').strip() if f.name else ''
         data = {
-            'name': f.name,
+            'name': name,
             'signature': self._build_signature(f),
         }
         if detail in ('standard', 'full'):
-            data['intent'] = f.intent
+            # Clean intent - remove newlines and limit length
+            intent = f.intent.replace('\n', ' ').strip()[:100] if f.intent else ''
+            data['intent'] = intent
         if detail == 'full':
             data['lines'] = f.lines
             data['is_async'] = f.is_async
@@ -759,9 +796,17 @@ class YAMLGenerator:
     def _build_signature(self, f: FunctionInfo) -> str:
         """Build compact signature string."""
-        params = ','.join(f.params[:4])
-        if len(f.params) > 4:
-            params += f'...+{len(f.params)-4}'
+        # Clean params - remove newlines and extra spaces
+        clean_params = []
+        for p in f.params[:6]:
+            p_clean = p.replace('\n', ' ').replace('  ', ' ').strip()
+            if p_clean:
+                clean_params.append(p_clean)
+        params = ','.join(clean_params)
+        if len(f.params) > 6:
+            params += f'...+{len(f.params)-6}'
         ret = f"->{f.return_type}" if f.return_type else ""
         return f"({params}){ret}"

{code2logic-1.0.1 → code2logic-1.0.2}/code2logic/logicml.py RENAMED Viewed

@@ -119,8 +119,12 @@ class LogicMLGenerator:
         header_parts.append(f"{module.lines_total} lines")
         lines.append(' | '.join(header_parts))
-        # Handle re-export modules (no classes/functions, only imports)
-        if not module.classes and not module.functions and module.imports:
+        # Handle re-export modules (primarily __init__.py or export-like modules)
+        # Some parsers may classify import-only files as having "classes" (e.g., Enum)
+        # so we also special-case __init__.py.
+        if (path.name == "__init__.py" and module.imports) or (
+            not module.classes and not module.functions and module.imports
+        ):
             lines.append(f"# Re-export module")
             lines.append("type: re-export")
             lines.append("exports:")
@@ -194,8 +198,11 @@ class LogicMLGenerator:
             first_line = doc_lines[0].strip()[:80].replace('"', "'")
             lines.append(f'  doc: "{first_line}"')
-            # Include Attributes section if present
+            # Include Example section if present (important for usage)
             for i, doc_line in enumerate(doc_lines):
+                if 'Example:' in doc_line:
+                    lines.append('  # Example usage in docstring')
+                    break
                 if 'Attributes:' in doc_line or 'Args:' in doc_line:
                     for attr_line in doc_lines[i+1:i+5]:
                         attr_line = attr_line.strip()
@@ -203,9 +210,15 @@ class LogicMLGenerator:
                             lines.append(f'  # {attr_line}')
                     break
-        # Bases
+        # Bases - important for Pydantic/dataclass
         if cls.bases:
-            lines.append(f'  bases: [{", ".join(cls.bases)}]')
+            bases_str = ", ".join(cls.bases)
+            lines.append(f'  bases: [{bases_str}]')
+            # Add hint for special base classes
+            if 'BaseModel' in bases_str:
+                lines.append('  # Pydantic model - use Field() for attributes')
+            elif 'Enum' in bases_str:
+                lines.append('  # Enum class')
         # Type markers
         if cls.is_abstract:
@@ -251,6 +264,9 @@ class LogicMLGenerator:
         prefix = ' ' * indent
         lines: List[str] = [f'{prefix}{method.name}:']
+        # Check for property decorator
+        is_property = 'property' in method.decorators
         # Signature
         params = ', '.join(method.params[:6])
         ret = method.return_type or 'None'
@@ -258,6 +274,8 @@ class LogicMLGenerator:
         sig = f'({params}) -> {ret}'
         if method.is_async:
             sig = f'async {sig}'
+        if is_property:
+            sig = f'@property {sig}'
         lines.append(f'{prefix}  sig: {sig}')

{code2logic-1.0.1 → code2logic-1.0.2}/code2logic/parsers.py RENAMED Viewed

@@ -122,7 +122,7 @@ class TreeSitterParser:
             # Imports
             elif node_type == 'import_statement':
                 imports.extend(self._extract_py_import(child, content))
-            elif node_type == 'import_from_statement':
+            elif node_type in ('import_from_statement', 'from_import_statement', 'import_from'):
                 imports.extend(self._extract_py_from_import(child, content))
             # Functions
@@ -304,18 +304,36 @@ class TreeSitterParser:
     def _extract_py_from_import(self, node, content: str) -> List[str]:
         """Extract from ... import ... statement."""
         imports = []
-        module = None
+        module_parts = []
+        seen_import_kw = False
         for c in node.children:
-            if c.type in ('dotted_name', 'import_prefix'):
-                module = self._text(c, content)
-        if module:
-            for c in node.children:
-                if c.type == 'identifier':
-                    imports.append(f"{module}.{self._text(c, content)}")
-                elif c.type == 'aliased_import':
-                    n = self._find_child(c, 'identifier')
-                    if n:
-                        imports.append(f"{module}.{self._text(n, content)}")
+            if c.type == 'import':
+                seen_import_kw = True
+                continue
+            if not seen_import_kw:
+                if c.type == 'import_prefix':
+                    module_parts.append(self._text(c, content))
+                elif c.type in ('relative_import', 'relative_import_statement'):
+                    module_parts.append(self._text(c, content))
+                elif c.type == 'dotted_name':
+                    module_parts.append(self._text(c, content))
+        module = ''.join(module_parts).strip().lstrip('.')
+        for c in node.children:
+            if c.type == 'identifier':
+                name = self._text(c, content)
+                imports.append(f"{module}.{name}" if module else name)
+            elif c.type == 'dotted_name':
+                name = self._text(c, content)
+                if seen_import_kw:
+                    imports.append(f"{module}.{name}" if module else name)
+            elif c.type == 'aliased_import':
+                n = self._find_child(c, 'identifier')
+                if n:
+                    name = self._text(n, content)
+                    imports.append(f"{module}.{name}" if module else name)
         return imports
     def _extract_py_constant(self, node, content: str) -> Optional[str]:
@@ -673,6 +691,10 @@ class UniversalParser:
         Returns:
             ModuleInfo if parsing succeeds, None otherwise
         """
+        if isinstance(filepath, str) and isinstance(content, str):
+            if "\n" in filepath and "\n" not in content:
+                filepath, content = content, filepath
         if language == 'python':
             return self._parse_python(filepath, content)
         elif language in ('javascript', 'typescript'):

{code2logic-1.0.1 → code2logic-1.0.2}/code2logic/prompts.py RENAMED Viewed

@@ -18,16 +18,22 @@ FORMAT_HINTS: Dict[str, str] = {
 - 'functions' with 'signature' and 'intent'
 Implement all classes and functions with exact signatures.""",
-    'logicml': """Parse LogicML spec and generate COMPLETE code:
-- 'sig: async (...)' = async def function
-- 'sig: (...)' = regular def function
-- 'attrs:' = instance attributes to set in __init__
-- 'does:' = use as docstring
-- 'type: re-export' = module that re-exports from submodules (use 'from .module import X' or 'export * from')
-- 'type: index' = index file with 'export * from ./submodule' pattern
-- 'exports:' = list of names to export/re-export
-- Include ALL imports from 'imports:' section
-CRITICAL: Generate complete working code. For re-export modules, generate proper import/export statements.""",
+    'logicml': """Generate VALID, RUNNABLE Python code from LogicML spec.
+SYNTAX RULES:
+- 'sig: (params) -> Type' = def method(params) -> Type:
+- 'sig: async (params)' = async def method(params):
+- 'sig: @property (self)' = @property decorator above method
+- 'bases: [BaseModel]' = Pydantic: class X(BaseModel): with Field()
+- 'attrs:' = self.attr = value in __init__
+- 'type: re-export' = from .submodule import Name
+CRITICAL REQUIREMENTS:
+1. ALL brackets/parentheses MUST be balanced
+2. ALL imports MUST be at file top
+3. NO undefined variables
+4. Proper 4-space indentation
+5. Each class/function MUST be complete""",
     'gherkin': """Implement scenarios as SIMPLE, MINIMAL Python code:
 - NO extra error classes or exception hierarchies

code2logic 1.0.1__tar.gz → 1.0.2__tar.gz

code2logic 1.0.1tar.gz → 1.0.2tar.gz