code2logic 1.0.1__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code2logic-1.0.1 → code2logic-1.0.2}/PKG-INFO +16 -2
- {code2logic-1.0.1 → code2logic-1.0.2}/README.md +14 -1
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/__init__.py +19 -1
- code2logic-1.0.2/code2logic/benchmarks/__init__.py +8 -0
- code2logic-1.0.2/code2logic/benchmarks/common.py +236 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/chunked_reproduction.py +1 -5
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/generators.py +50 -5
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/logicml.py +23 -5
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/parsers.py +34 -12
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/prompts.py +16 -10
- code2logic-1.0.2/code2logic/quality.py +264 -0
- code2logic-1.0.2/code2logic/schemas/__init__.py +29 -0
- code2logic-1.0.2/code2logic/schemas/json_schema.py +257 -0
- code2logic-1.0.2/code2logic/schemas/logicml_schema.py +250 -0
- code2logic-1.0.2/code2logic/schemas/markdown_schema.py +174 -0
- code2logic-1.0.2/code2logic/schemas/yaml_schema.py +179 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/similarity.py +73 -0
- code2logic-1.0.2/code2logic/utils.py +24 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/pyproject.toml +16 -2
- code2logic-1.0.2/tests/samples/sample_enum.py +95 -0
- code2logic-1.0.2/tests/samples/sample_pydantic.py +78 -0
- code2logic-1.0.2/tests/samples/sample_reexport/__init__.py +21 -0
- code2logic-1.0.2/tests/samples/sample_reexport/exceptions.py +11 -0
- code2logic-1.0.2/tests/samples/sample_reexport/models.py +28 -0
- code2logic-1.0.2/tests/samples/sample_reexport/utils.py +13 -0
- code2logic-1.0.2/tests/test_all_formats.py +641 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/.gitignore +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/CHANGELOG.md +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/LICENSE +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/__main__.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/adaptive.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/analyzer.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/base.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/benchmark.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/cli.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/code_review.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/config.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/dependency.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/file_formats.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/gherkin.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/intent.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/llm.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/llm_clients.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/markdown_format.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/mcp_server.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/metrics.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/models.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/project_reproducer.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/py.typed +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/refactor.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/reproduction.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/code2logic/universal.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/__init__.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/conftest.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_algorithms.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_api.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_async.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_class.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_dataclasses.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_functions.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_go.go +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_javascript.js +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_rust.rs +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_sql.sql +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_sql_dsl.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/samples/sample_typescript.ts +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/test_analyzer.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/test_format_specifics.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/test_generators.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/test_intent.py +0 -0
- {code2logic-1.0.1 → code2logic-1.0.2}/tests/test_reproduction.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code2logic
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Convert source code to logical representation for LLM analysis
|
|
5
5
|
Project-URL: Homepage, https://github.com/softreck/code2logic
|
|
6
6
|
Project-URL: Documentation, https://code2logic.readthedocs.io
|
|
@@ -31,6 +31,7 @@ Requires-Python: >=3.9
|
|
|
31
31
|
Provides-Extra: dev
|
|
32
32
|
Requires-Dist: black>=23.0; extra == 'dev'
|
|
33
33
|
Requires-Dist: build>=1.0.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: bumpver>=2023.1129; extra == 'dev'
|
|
34
35
|
Requires-Dist: mypy>=1.0; extra == 'dev'
|
|
35
36
|
Requires-Dist: pre-commit>=3.0; extra == 'dev'
|
|
36
37
|
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
@@ -351,7 +352,20 @@ MIT License - see [LICENSE](LICENSE) for details.
|
|
|
351
352
|
|
|
352
353
|
## 📚 Documentation
|
|
353
354
|
|
|
354
|
-
- [
|
|
355
|
+
- [Docs Index](docs/index.md) - Documentation home (start here)
|
|
356
|
+
- [Getting Started](docs/getting-started.md) - Install and first steps
|
|
357
|
+
- [Configuration](docs/configuration.md) - API keys, environment setup
|
|
358
|
+
- [CLI Reference](docs/cli-reference.md) - Command-line usage
|
|
359
|
+
- [Python API](docs/python-api.md) - Programmatic usage
|
|
360
|
+
- [Output Formats](docs/output-formats.md) - Format comparison and usage
|
|
361
|
+
- [Benchmarking](docs/benchmark.md) - Benchmark methodology and results
|
|
362
|
+
- [Repeatability](docs/repeatability.md) - Repeatability testing
|
|
363
|
+
- [LLM Integration](docs/llm-integration.md) - OpenRouter/Ollama/LiteLLM
|
|
364
|
+
- [LLM Comparison Report](docs/llm-comparison-report.md) - Provider/model comparison
|
|
365
|
+
- [Architecture](docs/architecture.md) - System design and components
|
|
366
|
+
- [Examples](docs/examples.md) - Usage workflows and examples
|
|
367
|
+
- [Format Analysis](docs/FORMAT_ANALYSIS.md) - Deeper format evaluation
|
|
368
|
+
- [API Documentation (legacy)](DOCS.md) - Repo-level API reference
|
|
355
369
|
- [Refactoring Plan](TODO.md) - Development roadmap
|
|
356
370
|
|
|
357
371
|
## 🔗 Links
|
|
@@ -283,7 +283,20 @@ MIT License - see [LICENSE](LICENSE) for details.
|
|
|
283
283
|
|
|
284
284
|
## 📚 Documentation
|
|
285
285
|
|
|
286
|
-
- [
|
|
286
|
+
- [Docs Index](docs/index.md) - Documentation home (start here)
|
|
287
|
+
- [Getting Started](docs/getting-started.md) - Install and first steps
|
|
288
|
+
- [Configuration](docs/configuration.md) - API keys, environment setup
|
|
289
|
+
- [CLI Reference](docs/cli-reference.md) - Command-line usage
|
|
290
|
+
- [Python API](docs/python-api.md) - Programmatic usage
|
|
291
|
+
- [Output Formats](docs/output-formats.md) - Format comparison and usage
|
|
292
|
+
- [Benchmarking](docs/benchmark.md) - Benchmark methodology and results
|
|
293
|
+
- [Repeatability](docs/repeatability.md) - Repeatability testing
|
|
294
|
+
- [LLM Integration](docs/llm-integration.md) - OpenRouter/Ollama/LiteLLM
|
|
295
|
+
- [LLM Comparison Report](docs/llm-comparison-report.md) - Provider/model comparison
|
|
296
|
+
- [Architecture](docs/architecture.md) - System design and components
|
|
297
|
+
- [Examples](docs/examples.md) - Usage workflows and examples
|
|
298
|
+
- [Format Analysis](docs/FORMAT_ANALYSIS.md) - Deeper format evaluation
|
|
299
|
+
- [API Documentation (legacy)](DOCS.md) - Repo-level API reference
|
|
287
300
|
- [Refactoring Plan](TODO.md) - Development roadmap
|
|
288
301
|
|
|
289
302
|
## 🔗 Links
|
|
@@ -18,7 +18,7 @@ Example:
|
|
|
18
18
|
>>> print(output)
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
__version__ = "1.0.
|
|
21
|
+
__version__ = "1.0.2"
|
|
22
22
|
__author__ = "Softreck"
|
|
23
23
|
__email__ = "info@softreck.dev"
|
|
24
24
|
__license__ = "MIT"
|
|
@@ -146,6 +146,24 @@ from .prompts import (
|
|
|
146
146
|
get_review_prompt,
|
|
147
147
|
get_fix_prompt,
|
|
148
148
|
)
|
|
149
|
+
from .schemas import (
|
|
150
|
+
validate_yaml,
|
|
151
|
+
validate_logicml,
|
|
152
|
+
validate_markdown,
|
|
153
|
+
validate_json,
|
|
154
|
+
YAMLSchema,
|
|
155
|
+
LogicMLSchema,
|
|
156
|
+
MarkdownSchema,
|
|
157
|
+
JSONSchema,
|
|
158
|
+
)
|
|
159
|
+
from .quality import (
|
|
160
|
+
QualityAnalyzer,
|
|
161
|
+
QualityReport,
|
|
162
|
+
QualityIssue,
|
|
163
|
+
analyze_quality,
|
|
164
|
+
get_quality_summary,
|
|
165
|
+
)
|
|
166
|
+
from .similarity import get_refactoring_suggestions
|
|
149
167
|
from .chunked_reproduction import (
|
|
150
168
|
ChunkedReproducer,
|
|
151
169
|
ChunkedResult,
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
from ..gherkin import GherkinGenerator
|
|
8
|
+
from ..generators import JSONGenerator, YAMLGenerator
|
|
9
|
+
from ..logicml import LogicMLGenerator
|
|
10
|
+
from ..markdown_format import MarkdownHybridGenerator
|
|
11
|
+
from ..models import ProjectInfo
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_single_project(module_info, file_path: Path) -> ProjectInfo:
|
|
15
|
+
return ProjectInfo(
|
|
16
|
+
name=file_path.name,
|
|
17
|
+
root_path=str(file_path.parent),
|
|
18
|
+
languages={"python": 1},
|
|
19
|
+
modules=[module_info],
|
|
20
|
+
dependency_graph={},
|
|
21
|
+
dependency_metrics={},
|
|
22
|
+
entrypoints=[],
|
|
23
|
+
similar_functions={},
|
|
24
|
+
total_files=1,
|
|
25
|
+
total_lines=module_info.lines_total,
|
|
26
|
+
generated_at=datetime.now().isoformat(),
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def generate_spec(project: ProjectInfo, fmt: str) -> str:
|
|
31
|
+
if fmt == "gherkin":
|
|
32
|
+
gen = GherkinGenerator()
|
|
33
|
+
return gen.generate(project)
|
|
34
|
+
if fmt == "yaml":
|
|
35
|
+
gen = YAMLGenerator()
|
|
36
|
+
return gen.generate(project, detail="full")
|
|
37
|
+
if fmt == "markdown":
|
|
38
|
+
gen = MarkdownHybridGenerator()
|
|
39
|
+
spec = gen.generate(project)
|
|
40
|
+
return spec.content
|
|
41
|
+
if fmt == "json":
|
|
42
|
+
gen = JSONGenerator()
|
|
43
|
+
return gen.generate(project, detail="full")
|
|
44
|
+
if fmt == "logicml":
|
|
45
|
+
gen = LogicMLGenerator()
|
|
46
|
+
spec = gen.generate(project)
|
|
47
|
+
return spec.content
|
|
48
|
+
return ""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _generate_token_json(project: ProjectInfo) -> str:
|
|
52
|
+
"""Generate compact, token-friendly JSON spec (used by examples/11_token_benchmark.py)."""
|
|
53
|
+
data = {
|
|
54
|
+
"project": project.name,
|
|
55
|
+
"files": project.total_files,
|
|
56
|
+
"lines": project.total_lines,
|
|
57
|
+
"modules": [],
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
for m in project.modules:
|
|
61
|
+
module: dict = {
|
|
62
|
+
"path": m.path,
|
|
63
|
+
"language": m.language,
|
|
64
|
+
"imports": m.imports[:10],
|
|
65
|
+
"exports": m.exports[:10],
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if m.classes:
|
|
69
|
+
module["classes"] = []
|
|
70
|
+
for c in m.classes[:20]:
|
|
71
|
+
cls = {
|
|
72
|
+
"name": c.name,
|
|
73
|
+
"bases": c.bases,
|
|
74
|
+
"doc": (c.docstring[:80] if c.docstring else ""),
|
|
75
|
+
"properties": c.properties[:15],
|
|
76
|
+
"methods": [
|
|
77
|
+
{
|
|
78
|
+
"name": method.name,
|
|
79
|
+
"params": method.params[:5],
|
|
80
|
+
"returns": method.return_type or "None",
|
|
81
|
+
"doc": (method.intent[:50] if method.intent else ""),
|
|
82
|
+
"async": method.is_async,
|
|
83
|
+
}
|
|
84
|
+
for method in c.methods[:15]
|
|
85
|
+
],
|
|
86
|
+
}
|
|
87
|
+
module["classes"].append(cls)
|
|
88
|
+
|
|
89
|
+
if m.functions:
|
|
90
|
+
module["functions"] = [
|
|
91
|
+
{
|
|
92
|
+
"name": f.name,
|
|
93
|
+
"params": f.params[:6],
|
|
94
|
+
"returns": f.return_type or "None",
|
|
95
|
+
"doc": (f.intent[:60] if f.intent else ""),
|
|
96
|
+
"async": f.is_async,
|
|
97
|
+
"lines": f.lines,
|
|
98
|
+
}
|
|
99
|
+
for f in m.functions[:20]
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
data["modules"].append(module)
|
|
103
|
+
|
|
104
|
+
return json.dumps(data, indent=2)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _generate_token_json_compact(project: ProjectInfo) -> str:
|
|
108
|
+
data = json.loads(_generate_token_json(project))
|
|
109
|
+
return json.dumps(data, separators=(",", ":"))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def generate_spec_token(project: ProjectInfo, fmt: str) -> str:
|
|
113
|
+
"""Generate spec optimized for token benchmark (keeps historical behavior).
|
|
114
|
+
|
|
115
|
+
Notes:
|
|
116
|
+
- json/json_compact use the token-friendly JSON representation.
|
|
117
|
+
- other formats delegate to generate_spec.
|
|
118
|
+
"""
|
|
119
|
+
if fmt == "json":
|
|
120
|
+
return _generate_token_json(project)
|
|
121
|
+
if fmt == "json_compact":
|
|
122
|
+
return _generate_token_json_compact(project)
|
|
123
|
+
return generate_spec(project, fmt)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def get_async_reproduction_prompt(spec: str, fmt: str, file_name: str, with_tests: bool = False) -> str:
|
|
127
|
+
base_prompts = {
|
|
128
|
+
"gherkin": f"""Generate Python code from this Gherkin/BDD specification.
|
|
129
|
+
Implement all scenarios as working, production-ready code.
|
|
130
|
+
|
|
131
|
+
{spec[:6000]}
|
|
132
|
+
|
|
133
|
+
Requirements:
|
|
134
|
+
- Generate complete, working Python code for {file_name}
|
|
135
|
+
- Include all imports
|
|
136
|
+
- Use type hints
|
|
137
|
+
- Add docstrings""",
|
|
138
|
+
"yaml": f"""Generate Python code from this YAML specification.
|
|
139
|
+
Match the structure exactly with all classes and functions.
|
|
140
|
+
|
|
141
|
+
{spec[:6000]}
|
|
142
|
+
|
|
143
|
+
Requirements:
|
|
144
|
+
- Generate complete, working Python code for {file_name}
|
|
145
|
+
- Include all imports
|
|
146
|
+
- Use type hints
|
|
147
|
+
- Implement all methods with actual logic""",
|
|
148
|
+
"markdown": f"""Generate Python code from this Markdown specification.
|
|
149
|
+
It contains embedded Gherkin (behaviors) and YAML (structures).
|
|
150
|
+
|
|
151
|
+
{spec[:6000]}
|
|
152
|
+
|
|
153
|
+
Requirements:
|
|
154
|
+
- Generate complete, working Python code for {file_name}
|
|
155
|
+
- Include all imports
|
|
156
|
+
- Implement all classes and functions
|
|
157
|
+
- Use type hints throughout""",
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
prompt = base_prompts.get(fmt, base_prompts["yaml"])
|
|
161
|
+
|
|
162
|
+
if with_tests:
|
|
163
|
+
prompt += """
|
|
164
|
+
|
|
165
|
+
IMPORTANT: Also generate a unittest test class at the end of the file.
|
|
166
|
+
Include tests for each function/method with at least 2 test cases each.
|
|
167
|
+
Use unittest.TestCase as base class.
|
|
168
|
+
Name the test class Test<ClassName> or TestFunctions."""
|
|
169
|
+
|
|
170
|
+
return prompt
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def get_token_reproduction_prompt(spec: str, fmt: str, file_name: str) -> str:
|
|
174
|
+
format_hints = {
|
|
175
|
+
"json": "Parse the JSON structure and implement all classes and functions.",
|
|
176
|
+
"json_compact": "Parse the compact JSON and implement all elements.",
|
|
177
|
+
"yaml": "Parse the YAML structure and implement all classes and functions with exact signatures.",
|
|
178
|
+
"gherkin": "Implement scenarios as SIMPLE, MINIMAL Python code. NO extra error classes, NO over-engineering. Keep code short and direct.",
|
|
179
|
+
"markdown": "Parse embedded Gherkin (behaviors) and YAML (structures).",
|
|
180
|
+
"logicml": """Parse LogicML and generate VALID Python code:
|
|
181
|
+
- 'sig: (params) -> Type' = def func(params) -> Type
|
|
182
|
+
- 'sig: async (params)' = async def func(params)
|
|
183
|
+
- 'sig: @property (self)' = @property decorator
|
|
184
|
+
- 'bases: [BaseModel]' = class X(BaseModel) with Field()
|
|
185
|
+
- 'type: re-export' = from .module import X
|
|
186
|
+
CRITICAL: Ensure valid syntax - balanced brackets, proper indentation, no undefined variables.""",
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
max_spec = 5000
|
|
190
|
+
spec_truncated = spec[:max_spec] if len(spec) > max_spec else spec
|
|
191
|
+
|
|
192
|
+
prompt = f"""Generate Python code from this {fmt.upper()} specification.
|
|
193
|
+
{format_hints.get(fmt, '')}
|
|
194
|
+
|
|
195
|
+
{spec_truncated}
|
|
196
|
+
|
|
197
|
+
Requirements:
|
|
198
|
+
- Complete, working Python code for {file_name}
|
|
199
|
+
- Include imports and type hints
|
|
200
|
+
- Implement all functions with actual logic
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
"""
|
|
204
|
+
return prompt
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def get_simple_reproduction_prompt(spec: str, fmt: str, file_name: str) -> str:
|
|
208
|
+
prompts = {
|
|
209
|
+
"gherkin": f"""Generate Python code from this Gherkin/BDD specification.
|
|
210
|
+
Implement all scenarios as working code.
|
|
211
|
+
|
|
212
|
+
{spec[:5000]}
|
|
213
|
+
|
|
214
|
+
Generate complete Python code for {file_name}:""",
|
|
215
|
+
"yaml": f"""Generate Python code from this YAML specification.
|
|
216
|
+
Match the structure exactly.
|
|
217
|
+
|
|
218
|
+
{spec[:5000]}
|
|
219
|
+
|
|
220
|
+
Generate complete Python code for {file_name}:""",
|
|
221
|
+
"markdown": f"""Generate Python code from this Markdown specification.
|
|
222
|
+
It contains embedded Gherkin and YAML sections.
|
|
223
|
+
|
|
224
|
+
{spec[:5000]}
|
|
225
|
+
|
|
226
|
+
Generate complete Python code for {file_name}:""",
|
|
227
|
+
"logicml": f"""Generate Python code from this LogicML specification.
|
|
228
|
+
'sig:' = EXACT function signature, 'does:' = docstring, 'attrs:' = class attributes.
|
|
229
|
+
Match signatures EXACTLY.
|
|
230
|
+
|
|
231
|
+
{spec[:5000]}
|
|
232
|
+
|
|
233
|
+
Generate complete Python code for {file_name}:""",
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return prompts.get(fmt, prompts["yaml"])
|
|
@@ -17,6 +17,7 @@ from typing import Dict, List, Optional, Tuple
|
|
|
17
17
|
from pathlib import Path
|
|
18
18
|
|
|
19
19
|
from .models import ProjectInfo, ModuleInfo, FunctionInfo, ClassInfo
|
|
20
|
+
from .utils import estimate_tokens
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
# LLM context limits (approximate)
|
|
@@ -67,11 +68,6 @@ class ChunkedResult:
|
|
|
67
68
|
errors: List[str]
|
|
68
69
|
|
|
69
70
|
|
|
70
|
-
def estimate_tokens(text: str) -> int:
|
|
71
|
-
"""Estimate token count."""
|
|
72
|
-
return len(text) // 4
|
|
73
|
-
|
|
74
|
-
|
|
75
71
|
def get_llm_limit(model_name: str) -> int:
|
|
76
72
|
"""Get context limit for LLM model."""
|
|
77
73
|
model_lower = model_name.lower()
|
|
@@ -373,6 +373,22 @@ class JSONGenerator:
|
|
|
373
373
|
if flat:
|
|
374
374
|
return self._generate_flat(project, detail)
|
|
375
375
|
return self._generate_nested(project, detail)
|
|
376
|
+
|
|
377
|
+
def generate_from_module(self, module: ModuleInfo, detail: str = 'full') -> str:
|
|
378
|
+
project = ProjectInfo(
|
|
379
|
+
name=Path(module.path).name,
|
|
380
|
+
root_path=str(Path(module.path).parent),
|
|
381
|
+
languages={module.language: 1},
|
|
382
|
+
modules=[module],
|
|
383
|
+
dependency_graph={},
|
|
384
|
+
dependency_metrics={},
|
|
385
|
+
entrypoints=[],
|
|
386
|
+
similar_functions={},
|
|
387
|
+
total_files=1,
|
|
388
|
+
total_lines=module.lines_total,
|
|
389
|
+
generated_at="",
|
|
390
|
+
)
|
|
391
|
+
return self.generate(project, flat=False, detail=detail)
|
|
376
392
|
|
|
377
393
|
def _generate_nested(self, project: ProjectInfo, detail: str) -> str:
|
|
378
394
|
"""Generate nested JSON structure."""
|
|
@@ -584,6 +600,22 @@ class YAMLGenerator:
|
|
|
584
600
|
|
|
585
601
|
return yaml.dump(data, default_flow_style=False, allow_unicode=True,
|
|
586
602
|
sort_keys=False, width=120)
|
|
603
|
+
|
|
604
|
+
def generate_from_module(self, module: ModuleInfo, detail: str = 'full') -> str:
|
|
605
|
+
project = ProjectInfo(
|
|
606
|
+
name=Path(module.path).name,
|
|
607
|
+
root_path=str(Path(module.path).parent),
|
|
608
|
+
languages={module.language: 1},
|
|
609
|
+
modules=[module],
|
|
610
|
+
dependency_graph={},
|
|
611
|
+
dependency_metrics={},
|
|
612
|
+
entrypoints=[],
|
|
613
|
+
similar_functions={},
|
|
614
|
+
total_files=1,
|
|
615
|
+
total_lines=module.lines_total,
|
|
616
|
+
generated_at="",
|
|
617
|
+
)
|
|
618
|
+
return self.generate(project, flat=False, detail=detail)
|
|
587
619
|
|
|
588
620
|
def _build_flat_data(self, project: ProjectInfo, detail: str) -> dict:
|
|
589
621
|
"""Build flat data structure optimized for comparisons."""
|
|
@@ -742,12 +774,17 @@ class YAMLGenerator:
|
|
|
742
774
|
|
|
743
775
|
def _function_to_dict(self, f: FunctionInfo, detail: str) -> dict:
|
|
744
776
|
"""Convert function to dict for nested output."""
|
|
777
|
+
# Clean function name (remove any newlines or special chars)
|
|
778
|
+
name = f.name.replace('\n', '').strip() if f.name else ''
|
|
779
|
+
|
|
745
780
|
data = {
|
|
746
|
-
'name':
|
|
781
|
+
'name': name,
|
|
747
782
|
'signature': self._build_signature(f),
|
|
748
783
|
}
|
|
749
784
|
if detail in ('standard', 'full'):
|
|
750
|
-
|
|
785
|
+
# Clean intent - remove newlines and limit length
|
|
786
|
+
intent = f.intent.replace('\n', ' ').strip()[:100] if f.intent else ''
|
|
787
|
+
data['intent'] = intent
|
|
751
788
|
if detail == 'full':
|
|
752
789
|
data['lines'] = f.lines
|
|
753
790
|
data['is_async'] = f.is_async
|
|
@@ -759,9 +796,17 @@ class YAMLGenerator:
|
|
|
759
796
|
|
|
760
797
|
def _build_signature(self, f: FunctionInfo) -> str:
|
|
761
798
|
"""Build compact signature string."""
|
|
762
|
-
params
|
|
763
|
-
|
|
764
|
-
|
|
799
|
+
# Clean params - remove newlines and extra spaces
|
|
800
|
+
clean_params = []
|
|
801
|
+
for p in f.params[:6]:
|
|
802
|
+
p_clean = p.replace('\n', ' ').replace(' ', ' ').strip()
|
|
803
|
+
if p_clean:
|
|
804
|
+
clean_params.append(p_clean)
|
|
805
|
+
|
|
806
|
+
params = ','.join(clean_params)
|
|
807
|
+
if len(f.params) > 6:
|
|
808
|
+
params += f'...+{len(f.params)-6}'
|
|
809
|
+
|
|
765
810
|
ret = f"->{f.return_type}" if f.return_type else ""
|
|
766
811
|
return f"({params}){ret}"
|
|
767
812
|
|
|
@@ -119,8 +119,12 @@ class LogicMLGenerator:
|
|
|
119
119
|
header_parts.append(f"{module.lines_total} lines")
|
|
120
120
|
lines.append(' | '.join(header_parts))
|
|
121
121
|
|
|
122
|
-
# Handle re-export modules (
|
|
123
|
-
|
|
122
|
+
# Handle re-export modules (primarily __init__.py or export-like modules)
|
|
123
|
+
# Some parsers may classify import-only files as having "classes" (e.g., Enum)
|
|
124
|
+
# so we also special-case __init__.py.
|
|
125
|
+
if (path.name == "__init__.py" and module.imports) or (
|
|
126
|
+
not module.classes and not module.functions and module.imports
|
|
127
|
+
):
|
|
124
128
|
lines.append(f"# Re-export module")
|
|
125
129
|
lines.append("type: re-export")
|
|
126
130
|
lines.append("exports:")
|
|
@@ -194,8 +198,11 @@ class LogicMLGenerator:
|
|
|
194
198
|
first_line = doc_lines[0].strip()[:80].replace('"', "'")
|
|
195
199
|
lines.append(f' doc: "{first_line}"')
|
|
196
200
|
|
|
197
|
-
# Include
|
|
201
|
+
# Include Example section if present (important for usage)
|
|
198
202
|
for i, doc_line in enumerate(doc_lines):
|
|
203
|
+
if 'Example:' in doc_line:
|
|
204
|
+
lines.append(' # Example usage in docstring')
|
|
205
|
+
break
|
|
199
206
|
if 'Attributes:' in doc_line or 'Args:' in doc_line:
|
|
200
207
|
for attr_line in doc_lines[i+1:i+5]:
|
|
201
208
|
attr_line = attr_line.strip()
|
|
@@ -203,9 +210,15 @@ class LogicMLGenerator:
|
|
|
203
210
|
lines.append(f' # {attr_line}')
|
|
204
211
|
break
|
|
205
212
|
|
|
206
|
-
# Bases
|
|
213
|
+
# Bases - important for Pydantic/dataclass
|
|
207
214
|
if cls.bases:
|
|
208
|
-
|
|
215
|
+
bases_str = ", ".join(cls.bases)
|
|
216
|
+
lines.append(f' bases: [{bases_str}]')
|
|
217
|
+
# Add hint for special base classes
|
|
218
|
+
if 'BaseModel' in bases_str:
|
|
219
|
+
lines.append(' # Pydantic model - use Field() for attributes')
|
|
220
|
+
elif 'Enum' in bases_str:
|
|
221
|
+
lines.append(' # Enum class')
|
|
209
222
|
|
|
210
223
|
# Type markers
|
|
211
224
|
if cls.is_abstract:
|
|
@@ -251,6 +264,9 @@ class LogicMLGenerator:
|
|
|
251
264
|
prefix = ' ' * indent
|
|
252
265
|
lines: List[str] = [f'{prefix}{method.name}:']
|
|
253
266
|
|
|
267
|
+
# Check for property decorator
|
|
268
|
+
is_property = 'property' in method.decorators
|
|
269
|
+
|
|
254
270
|
# Signature
|
|
255
271
|
params = ', '.join(method.params[:6])
|
|
256
272
|
ret = method.return_type or 'None'
|
|
@@ -258,6 +274,8 @@ class LogicMLGenerator:
|
|
|
258
274
|
sig = f'({params}) -> {ret}'
|
|
259
275
|
if method.is_async:
|
|
260
276
|
sig = f'async {sig}'
|
|
277
|
+
if is_property:
|
|
278
|
+
sig = f'@property {sig}'
|
|
261
279
|
|
|
262
280
|
lines.append(f'{prefix} sig: {sig}')
|
|
263
281
|
|
|
@@ -122,7 +122,7 @@ class TreeSitterParser:
|
|
|
122
122
|
# Imports
|
|
123
123
|
elif node_type == 'import_statement':
|
|
124
124
|
imports.extend(self._extract_py_import(child, content))
|
|
125
|
-
elif node_type
|
|
125
|
+
elif node_type in ('import_from_statement', 'from_import_statement', 'import_from'):
|
|
126
126
|
imports.extend(self._extract_py_from_import(child, content))
|
|
127
127
|
|
|
128
128
|
# Functions
|
|
@@ -304,18 +304,36 @@ class TreeSitterParser:
|
|
|
304
304
|
def _extract_py_from_import(self, node, content: str) -> List[str]:
|
|
305
305
|
"""Extract from ... import ... statement."""
|
|
306
306
|
imports = []
|
|
307
|
-
|
|
307
|
+
module_parts = []
|
|
308
|
+
seen_import_kw = False
|
|
308
309
|
for c in node.children:
|
|
309
|
-
if c.type
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
310
|
+
if c.type == 'import':
|
|
311
|
+
seen_import_kw = True
|
|
312
|
+
continue
|
|
313
|
+
|
|
314
|
+
if not seen_import_kw:
|
|
315
|
+
if c.type == 'import_prefix':
|
|
316
|
+
module_parts.append(self._text(c, content))
|
|
317
|
+
elif c.type in ('relative_import', 'relative_import_statement'):
|
|
318
|
+
module_parts.append(self._text(c, content))
|
|
319
|
+
elif c.type == 'dotted_name':
|
|
320
|
+
module_parts.append(self._text(c, content))
|
|
321
|
+
|
|
322
|
+
module = ''.join(module_parts).strip().lstrip('.')
|
|
323
|
+
|
|
324
|
+
for c in node.children:
|
|
325
|
+
if c.type == 'identifier':
|
|
326
|
+
name = self._text(c, content)
|
|
327
|
+
imports.append(f"{module}.{name}" if module else name)
|
|
328
|
+
elif c.type == 'dotted_name':
|
|
329
|
+
name = self._text(c, content)
|
|
330
|
+
if seen_import_kw:
|
|
331
|
+
imports.append(f"{module}.{name}" if module else name)
|
|
332
|
+
elif c.type == 'aliased_import':
|
|
333
|
+
n = self._find_child(c, 'identifier')
|
|
334
|
+
if n:
|
|
335
|
+
name = self._text(n, content)
|
|
336
|
+
imports.append(f"{module}.{name}" if module else name)
|
|
319
337
|
return imports
|
|
320
338
|
|
|
321
339
|
def _extract_py_constant(self, node, content: str) -> Optional[str]:
|
|
@@ -673,6 +691,10 @@ class UniversalParser:
|
|
|
673
691
|
Returns:
|
|
674
692
|
ModuleInfo if parsing succeeds, None otherwise
|
|
675
693
|
"""
|
|
694
|
+
if isinstance(filepath, str) and isinstance(content, str):
|
|
695
|
+
if "\n" in filepath and "\n" not in content:
|
|
696
|
+
filepath, content = content, filepath
|
|
697
|
+
|
|
676
698
|
if language == 'python':
|
|
677
699
|
return self._parse_python(filepath, content)
|
|
678
700
|
elif language in ('javascript', 'typescript'):
|
|
@@ -18,16 +18,22 @@ FORMAT_HINTS: Dict[str, str] = {
|
|
|
18
18
|
- 'functions' with 'signature' and 'intent'
|
|
19
19
|
Implement all classes and functions with exact signatures.""",
|
|
20
20
|
|
|
21
|
-
'logicml': """
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
- '
|
|
25
|
-
- '
|
|
26
|
-
- '
|
|
27
|
-
- '
|
|
28
|
-
- '
|
|
29
|
-
-
|
|
30
|
-
|
|
21
|
+
'logicml': """Generate VALID, RUNNABLE Python code from LogicML spec.
|
|
22
|
+
|
|
23
|
+
SYNTAX RULES:
|
|
24
|
+
- 'sig: (params) -> Type' = def method(params) -> Type:
|
|
25
|
+
- 'sig: async (params)' = async def method(params):
|
|
26
|
+
- 'sig: @property (self)' = @property decorator above method
|
|
27
|
+
- 'bases: [BaseModel]' = Pydantic: class X(BaseModel): with Field()
|
|
28
|
+
- 'attrs:' = self.attr = value in __init__
|
|
29
|
+
- 'type: re-export' = from .submodule import Name
|
|
30
|
+
|
|
31
|
+
CRITICAL REQUIREMENTS:
|
|
32
|
+
1. ALL brackets/parentheses MUST be balanced
|
|
33
|
+
2. ALL imports MUST be at file top
|
|
34
|
+
3. NO undefined variables
|
|
35
|
+
4. Proper 4-space indentation
|
|
36
|
+
5. Each class/function MUST be complete""",
|
|
31
37
|
|
|
32
38
|
'gherkin': """Implement scenarios as SIMPLE, MINIMAL Python code:
|
|
33
39
|
- NO extra error classes or exception hierarchies
|