codecrate 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codecrate/__init__.py +0 -0
- codecrate/_version.py +34 -0
- codecrate/cli.py +250 -0
- codecrate/config.py +98 -0
- codecrate/diffgen.py +110 -0
- codecrate/discover.py +113 -0
- codecrate/ids.py +17 -0
- codecrate/manifest.py +31 -0
- codecrate/markdown.py +457 -0
- codecrate/mdparse.py +145 -0
- codecrate/model.py +51 -0
- codecrate/packer.py +108 -0
- codecrate/parse.py +133 -0
- codecrate/stubber.py +82 -0
- codecrate/token_budget.py +388 -0
- codecrate/udiff.py +187 -0
- codecrate/unpacker.py +149 -0
- codecrate/validate.py +120 -0
- codecrate-0.1.0.dist-info/METADATA +357 -0
- codecrate-0.1.0.dist-info/RECORD +24 -0
- codecrate-0.1.0.dist-info/WHEEL +5 -0
- codecrate-0.1.0.dist-info/entry_points.txt +2 -0
- codecrate-0.1.0.dist-info/licenses/LICENSE +21 -0
- codecrate-0.1.0.dist-info/top_level.txt +1 -0
codecrate/unpacker.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import re
|
|
5
|
+
import warnings
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .mdparse import parse_packed_markdown
|
|
9
|
+
from .udiff import ensure_parent_dir
|
|
10
|
+
|
|
11
|
+
_MARK_RE = re.compile(r"FUNC:([0-9A-Fa-f]{8})")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _ws_len(s: str) -> int:
|
|
15
|
+
return len(s) - len(s.lstrip(" \t"))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _apply_canonical_into_stub(
|
|
19
|
+
stub: str, defs: list[dict], canonical: dict[str, str]
|
|
20
|
+
) -> str:
|
|
21
|
+
"""
|
|
22
|
+
Reconstruct original by locating FUNC:<id> markers in the stub and replacing the
|
|
23
|
+
surrounding def region (decorators + def + stubbed body/docstring) with the
|
|
24
|
+
canonical code. Does not rely on line-number alignment.
|
|
25
|
+
|
|
26
|
+
Marker semantics:
|
|
27
|
+
- New packs use local_id in stub markers (unique per occurrence).
|
|
28
|
+
- Canonical code is still fetched by id (deduped across identical bodies).
|
|
29
|
+
- For backwards compatibility, we also accept markers keyed by id.
|
|
30
|
+
"""
|
|
31
|
+
lines = stub.splitlines(keepends=True)
|
|
32
|
+
|
|
33
|
+
# Allow multiple occurrences of the same marker id (older dedupe packs).
|
|
34
|
+
marker_lines_for: dict[str, list[int]] = {}
|
|
35
|
+
for i, ln in enumerate(lines):
|
|
36
|
+
m = _MARK_RE.search(ln)
|
|
37
|
+
if m:
|
|
38
|
+
marker_lines_for.setdefault(m.group(1).upper(), []).append(i)
|
|
39
|
+
|
|
40
|
+
# Apply bottom-up so indices remain stable.
|
|
41
|
+
work: list[tuple[int, dict, str]] = []
|
|
42
|
+
for d in defs:
|
|
43
|
+
cid = d.get("id") or d.get("local_id")
|
|
44
|
+
if not cid:
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
# Prefer locating the marker by local_id (unique), but fall back to cid for
|
|
48
|
+
# older packs.
|
|
49
|
+
marker_key = d.get("local_id") or cid
|
|
50
|
+
idxs = marker_lines_for.get(str(marker_key).upper())
|
|
51
|
+
if not idxs and str(cid).upper() != str(marker_key).upper():
|
|
52
|
+
idxs = marker_lines_for.get(str(cid).upper())
|
|
53
|
+
|
|
54
|
+
if not idxs:
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
mi = idxs.pop() # consume the bottom-most occurrence
|
|
58
|
+
work.append((mi, d, str(cid)))
|
|
59
|
+
|
|
60
|
+
work.sort(key=lambda t: t[0], reverse=True)
|
|
61
|
+
|
|
62
|
+
for mi, d, cid in work:
|
|
63
|
+
# Fetch canonical by cid first, then fall back to local_id.
|
|
64
|
+
code = canonical.get(cid)
|
|
65
|
+
if code is None:
|
|
66
|
+
alt = d.get("local_id")
|
|
67
|
+
if alt:
|
|
68
|
+
code = canonical.get(str(alt))
|
|
69
|
+
if code is None:
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
# Find def line above (supports single-line defs where marker is on def line).
|
|
73
|
+
def_i = mi
|
|
74
|
+
while def_i >= 0:
|
|
75
|
+
s = lines[def_i].lstrip(" \t")
|
|
76
|
+
if s.startswith("def ") or s.startswith("async def "):
|
|
77
|
+
break
|
|
78
|
+
def_i -= 1
|
|
79
|
+
if def_i < 0:
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
def_indent = _ws_len(lines[def_i])
|
|
83
|
+
|
|
84
|
+
# Include decorators directly above the def.
|
|
85
|
+
start_i = def_i
|
|
86
|
+
j = def_i - 1
|
|
87
|
+
while j >= 0:
|
|
88
|
+
if _ws_len(lines[j]) == def_indent and lines[j].lstrip(" \t").startswith(
|
|
89
|
+
"@"
|
|
90
|
+
):
|
|
91
|
+
start_i = j
|
|
92
|
+
j -= 1
|
|
93
|
+
continue
|
|
94
|
+
break
|
|
95
|
+
|
|
96
|
+
# Replace through the marker line (or just the def line for single-line defs).
|
|
97
|
+
end_i = (def_i + 1) if mi == def_i else (mi + 1)
|
|
98
|
+
|
|
99
|
+
repl = code.splitlines(keepends=True)
|
|
100
|
+
if repl and not repl[-1].endswith("\n"):
|
|
101
|
+
repl[-1] = repl[-1] + "\n"
|
|
102
|
+
lines[start_i:end_i] = repl
|
|
103
|
+
|
|
104
|
+
return "".join(lines)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def unpack_to_dir(markdown_text: str, out_dir: Path) -> None:
|
|
108
|
+
packed = parse_packed_markdown(markdown_text)
|
|
109
|
+
manifest = packed.manifest
|
|
110
|
+
if manifest.get("format") != "codecrate.v4":
|
|
111
|
+
raise ValueError(f"Unsupported format: {manifest.get('format')}")
|
|
112
|
+
|
|
113
|
+
out_dir = out_dir.resolve()
|
|
114
|
+
missing: list[str] = []
|
|
115
|
+
for f in manifest.get("files", []):
|
|
116
|
+
rel = f["path"]
|
|
117
|
+
stub = packed.stubbed_files.get(rel)
|
|
118
|
+
exp = f.get("line_count")
|
|
119
|
+
exp_n = int(exp) if exp is not None else None
|
|
120
|
+
if stub is None or (exp_n and exp_n > 0 and not stub.strip()):
|
|
121
|
+
missing.append(rel)
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
defs = f.get("defs", [])
|
|
125
|
+
reconstructed = _apply_canonical_into_stub(stub, defs, packed.canonical_sources)
|
|
126
|
+
|
|
127
|
+
exp_sha = f.get("sha256_original")
|
|
128
|
+
if exp_sha:
|
|
129
|
+
got_sha = hashlib.sha256(reconstructed.encode("utf-8")).hexdigest()
|
|
130
|
+
if got_sha != exp_sha:
|
|
131
|
+
warnings.warn(
|
|
132
|
+
f"SHA256 mismatch for {rel}: expected {exp_sha}, got {got_sha}",
|
|
133
|
+
RuntimeWarning,
|
|
134
|
+
stacklevel=2,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Prevent path traversal / writing outside out_dir
|
|
138
|
+
target = (out_dir / rel).resolve()
|
|
139
|
+
if out_dir != target and out_dir not in target.parents:
|
|
140
|
+
raise ValueError(f"Refusing to write outside out_dir: {rel}")
|
|
141
|
+
ensure_parent_dir(target)
|
|
142
|
+
target.write_text(reconstructed, encoding="utf-8")
|
|
143
|
+
|
|
144
|
+
if missing:
|
|
145
|
+
files_str = ", ".join(missing[:10])
|
|
146
|
+
if len(missing) > 10:
|
|
147
|
+
files_str += "..."
|
|
148
|
+
msg = f"Missing stubbed file blocks for {len(missing)} file(s): {files_str}"
|
|
149
|
+
warnings.warn(msg, RuntimeWarning, stacklevel=2)
|
codecrate/validate.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .mdparse import parse_packed_markdown
|
|
9
|
+
from .udiff import normalize_newlines
|
|
10
|
+
from .unpacker import _apply_canonical_into_stub
|
|
11
|
+
|
|
12
|
+
_MARK_RE = re.compile(r"FUNC:([0-9A-Fa-f]{8})")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _sha256_text(text: str) -> str:
|
|
16
|
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class ValidationReport:
|
|
21
|
+
errors: list[str]
|
|
22
|
+
warnings: list[str]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def validate_pack_markdown(
|
|
26
|
+
markdown_text: str, *, root: Path | None = None
|
|
27
|
+
) -> ValidationReport:
|
|
28
|
+
"""Validate a packed Codecrate Markdown for internal consistency.
|
|
29
|
+
|
|
30
|
+
Checks (pack-only):
|
|
31
|
+
- Every manifest file has a corresponding stubbed code block.
|
|
32
|
+
- sha256_stubbed matches the stubbed code block (normalized newlines).
|
|
33
|
+
- Every def in manifest has a canonical body in the function library.
|
|
34
|
+
- Reconstructing each file from stub+canonical reproduces sha256_original.
|
|
35
|
+
- Marker collisions / missing markers are reported as warnings.
|
|
36
|
+
|
|
37
|
+
Optional root:
|
|
38
|
+
- If provided, compares reconstructed 'original' text against files on disk.
|
|
39
|
+
"""
|
|
40
|
+
packed = parse_packed_markdown(markdown_text)
|
|
41
|
+
manifest = packed.manifest
|
|
42
|
+
|
|
43
|
+
errors: list[str] = []
|
|
44
|
+
warnings: list[str] = []
|
|
45
|
+
|
|
46
|
+
root_resolved = root.resolve() if root is not None else None
|
|
47
|
+
|
|
48
|
+
files = manifest.get("files") or []
|
|
49
|
+
for f in files:
|
|
50
|
+
rel = f.get("path")
|
|
51
|
+
if not rel:
|
|
52
|
+
errors.append("Manifest entry missing 'path'")
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
stub = packed.stubbed_files.get(rel)
|
|
56
|
+
if stub is None:
|
|
57
|
+
errors.append(f"Missing stubbed file block for {rel}")
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
stub_norm = normalize_newlines(stub)
|
|
61
|
+
exp_stub = f.get("sha256_stubbed")
|
|
62
|
+
got_stub = _sha256_text(stub_norm)
|
|
63
|
+
if exp_stub and got_stub != exp_stub:
|
|
64
|
+
errors.append(
|
|
65
|
+
f"Stub sha mismatch for {rel}: expected {exp_stub}, got {got_stub}"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
marker_ids = [m.group(1).upper() for m in _MARK_RE.finditer(stub_norm)]
|
|
69
|
+
if marker_ids:
|
|
70
|
+
from collections import Counter
|
|
71
|
+
|
|
72
|
+
c = Counter(marker_ids)
|
|
73
|
+
dup = [k for k, v in c.items() if v > 1]
|
|
74
|
+
if dup:
|
|
75
|
+
warnings.append(f"Marker collision in {rel}: {', '.join(sorted(dup))}")
|
|
76
|
+
|
|
77
|
+
defs = f.get("defs") or []
|
|
78
|
+
for d in defs:
|
|
79
|
+
cid = str(d.get("id") or "").upper()
|
|
80
|
+
lid = str(d.get("local_id") or "").upper()
|
|
81
|
+
if cid and cid not in packed.canonical_sources:
|
|
82
|
+
errors.append(
|
|
83
|
+
f"Missing canonical source for {rel}:{d.get('qualname')} id={cid}"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# local_id marker is preferred; fall back to id for older packs
|
|
87
|
+
if (lid and lid not in marker_ids) and (cid and cid not in marker_ids):
|
|
88
|
+
warnings.append(
|
|
89
|
+
f"Missing FUNC marker in stub for {rel}:{d.get('qualname')} "
|
|
90
|
+
f"(local_id={lid or '∅'}, id={cid or '∅'})"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
reconstructed = _apply_canonical_into_stub(
|
|
95
|
+
stub_norm, defs, packed.canonical_sources
|
|
96
|
+
)
|
|
97
|
+
reconstructed = normalize_newlines(reconstructed)
|
|
98
|
+
except Exception as e: # pragma: no cover
|
|
99
|
+
errors.append(f"Failed to reconstruct {rel}: {e}")
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
exp_orig = f.get("sha256_original")
|
|
103
|
+
got_orig = _sha256_text(reconstructed)
|
|
104
|
+
if exp_orig and got_orig != exp_orig:
|
|
105
|
+
errors.append(
|
|
106
|
+
f"Original sha mismatch for {rel}: expected {exp_orig}, got {got_orig}"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
if root_resolved is not None:
|
|
110
|
+
disk_path = root_resolved / rel
|
|
111
|
+
if not disk_path.exists():
|
|
112
|
+
warnings.append(f"On-disk file missing under root: {rel}")
|
|
113
|
+
else:
|
|
114
|
+
disk_text = normalize_newlines(
|
|
115
|
+
disk_path.read_text(encoding="utf-8", errors="replace")
|
|
116
|
+
)
|
|
117
|
+
if _sha256_text(disk_text) != got_orig:
|
|
118
|
+
warnings.append(f"On-disk file differs from pack for {rel}")
|
|
119
|
+
|
|
120
|
+
return ValidationReport(errors=errors, warnings=warnings)
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codecrate
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Pack Python codebases into Markdown optimized for LLM context delivery (pack/unpack/patch/apply)
|
|
5
|
+
Author-email: Holger Nahrstaedt <nahrstaedt@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Holger Nahrstaedt
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/holgern/codecrate
|
|
29
|
+
Keywords: code,llm,markdown
|
|
30
|
+
Classifier: Intended Audience :: Developers
|
|
31
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
32
|
+
Classifier: Operating System :: OS Independent
|
|
33
|
+
Classifier: Programming Language :: Python :: 3
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
38
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
39
|
+
Requires-Python: >3.9.0
|
|
40
|
+
Description-Content-Type: text/markdown
|
|
41
|
+
License-File: LICENSE
|
|
42
|
+
Requires-Dist: pathspec
|
|
43
|
+
Provides-Extra: dev
|
|
44
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
45
|
+
Requires-Dist: ruff>=0.5.0; extra == "dev"
|
|
46
|
+
Dynamic: license-file
|
|
47
|
+
|
|
48
|
+
[](https://pypi.org/project/codecrate/)
|
|
49
|
+

|
|
50
|
+

|
|
51
|
+
[](https://codecov.io/gh/holgern/codecrate)
|
|
52
|
+
|
|
53
|
+
# codecrate
|
|
54
|
+
|
|
55
|
+
`codecrate` turns a Python repository into a Markdown "context pack" optimized for LLM consumption, with full round-trip support:
|
|
56
|
+
|
|
57
|
+
- `pack`: repo → context.md
|
|
58
|
+
- `unpack`: context.md → reconstructed files
|
|
59
|
+
- `patch`: old context.md + current repo → diff-only patch.md
|
|
60
|
+
- `apply`: patch.md → apply changes to repo
|
|
61
|
+
|
|
62
|
+
## Features
|
|
63
|
+
|
|
64
|
+
- **Markdown-native output**: Generates self-contained Markdown files with syntax highlighting
|
|
65
|
+
- **Symbol index**: Quick navigation to functions and classes
|
|
66
|
+
- **Deduplication**: Optionally deduplicate identical function bodies to save tokens
|
|
67
|
+
- **Two layout modes**:
|
|
68
|
+
- `stubs`: Compact file stubs with function bodies in a separate "Function Library"
|
|
69
|
+
- `full`: Complete file contents (no stubbing)
|
|
70
|
+
- **Round-trip support**: Reconstruct original files exactly from Markdown packs
|
|
71
|
+
- **Diff generation**: Create minimal patch Markdown files showing only changed code
|
|
72
|
+
- **Gitignore support**: Respect `.gitignore` when scanning files
|
|
73
|
+
|
|
74
|
+
## Installation
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install -e .
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Or for development:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
pip install -e ".[dev]"
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Quick Start
|
|
87
|
+
|
|
88
|
+
### Pack a Repository
|
|
89
|
+
|
|
90
|
+
Pack your current directory into `context.md`:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
codecrate pack .
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Pack with specific output file:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
codecrate pack . -o my_project.md
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Unpack to Reconstruct Files
|
|
103
|
+
|
|
104
|
+
Reconstruct files from a packed Markdown:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
codecrate unpack context.md -o reconstructed/
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Generate and Apply Patches
|
|
111
|
+
|
|
112
|
+
1. Pack your repository as a baseline:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
codecrate pack . -o baseline.md
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
2. Make changes to your code
|
|
119
|
+
|
|
120
|
+
3. Generate a patch:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
codecrate patch baseline.md . -o changes.md
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
4. Apply the patch:
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
codecrate apply changes.md .
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Configuration
|
|
133
|
+
|
|
134
|
+
Create a `codecrate.toml` file in your repository root:
|
|
135
|
+
|
|
136
|
+
```toml
|
|
137
|
+
[codecrate]
|
|
138
|
+
# File patterns to include (default: ["**/*.py"])
|
|
139
|
+
include = ["**/*.py"]
|
|
140
|
+
|
|
141
|
+
# File patterns to exclude
|
|
142
|
+
exclude = ["**/test_*.py", "**/tests/**"]
|
|
143
|
+
|
|
144
|
+
# Deduplicate identical function bodies (default: false)
|
|
145
|
+
dedupe = true
|
|
146
|
+
|
|
147
|
+
# Keep docstrings in stubbed file view (default: true)
|
|
148
|
+
keep_docstrings = true
|
|
149
|
+
|
|
150
|
+
# Respect .gitignore when scanning (default: true)
|
|
151
|
+
respect_gitignore = true
|
|
152
|
+
|
|
153
|
+
# Output layout: "auto", "stubs", or "full" (default: "auto")
|
|
154
|
+
# - auto: use stubs only if dedupe collapses something
|
|
155
|
+
# - stubs: always use stubs + Function Library
|
|
156
|
+
# - full: emit complete file contents
|
|
157
|
+
layout = "auto"
|
|
158
|
+
|
|
159
|
+
# Split output into multiple files if char count exceeds this (0 = no split)
|
|
160
|
+
split_max_chars = 0
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Command Reference
|
|
164
|
+
|
|
165
|
+
### `pack` - Pack Repository to Markdown
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
codecrate pack <root> [OPTIONS]
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
**Options:**
|
|
172
|
+
|
|
173
|
+
- `-o, --output PATH`: Output markdown path (default: `context.md`)
|
|
174
|
+
- `--dedupe`: Deduplicate identical function bodies
|
|
175
|
+
- `--layout {auto,stubs,full}`: Output layout mode
|
|
176
|
+
- `--keep-docstrings` / `--no-keep-docstrings`: Keep docstrings in stubs
|
|
177
|
+
- `--respect-gitignore` / `--no-respect-gitignore`: Respect `.gitignore`
|
|
178
|
+
- `--include GLOB`: Include glob pattern (repeatable)
|
|
179
|
+
- `--exclude GLOB`: Exclude glob pattern (repeatable)
|
|
180
|
+
- `--split-max-chars N`: Split output into `.partN.md` files
|
|
181
|
+
|
|
182
|
+
### `unpack` - Reconstruct Files from Markdown
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
codecrate unpack <markdown> -o <out-dir>
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
**Options:**
|
|
189
|
+
|
|
190
|
+
- `-o, --out-dir PATH`: Output directory for reconstructed files (required)
|
|
191
|
+
|
|
192
|
+
### `patch` - Generate Diff-Only Patch
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
codecrate patch <old_markdown> <root> [OPTIONS]
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
**Options:**
|
|
199
|
+
|
|
200
|
+
- `-o, --output PATH`: Output patch markdown (default: `patch.md`)
|
|
201
|
+
|
|
202
|
+
### `apply` - Apply Patch to Repository
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
codecrate apply <patch_markdown> <root>
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### `validate-pack` - Validate Pack
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
codecrate validate-pack <markdown> [--root PATH]
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
**Options:**
|
|
215
|
+
|
|
216
|
+
- `--root PATH`: Optional repo root to compare reconstructed files against
|
|
217
|
+
|
|
218
|
+
## Layout Modes
|
|
219
|
+
|
|
220
|
+
### Stubs Mode (Default for `auto` when dedupe is effective)
|
|
221
|
+
|
|
222
|
+
Creates compact file stubs with function bodies replaced by markers:
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
def f(x):
|
|
226
|
+
... # ↪ FUNC:0F203CE2
|
|
227
|
+
|
|
228
|
+
class C:
|
|
229
|
+
def m(self):
|
|
230
|
+
... # ↪ FUNC:6F8ECF73
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
Function bodies are stored in a separate "Function Library" section:
|
|
234
|
+
|
|
235
|
+
````markdown
|
|
236
|
+
## Function Library
|
|
237
|
+
|
|
238
|
+
### 0F203CE2 — `a.f` (a.py:L1–L2)
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
def f(x):
|
|
242
|
+
return x + 1
|
|
243
|
+
```
|
|
244
|
+
````
|
|
245
|
+
|
|
246
|
+
### 6F8ECF73 — `a.C.m` (a.py:L5–L6)
|
|
247
|
+
|
|
248
|
+
```python
|
|
249
|
+
def m(self):
|
|
250
|
+
return 42
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
````
|
|
254
|
+
|
|
255
|
+
This is ideal for:
|
|
256
|
+
- LLMs with limited context windows
|
|
257
|
+
- Repositories with duplicate code (when using `--dedupe`)
|
|
258
|
+
- Code review and analysis workflows
|
|
259
|
+
|
|
260
|
+
### Full Mode
|
|
261
|
+
|
|
262
|
+
Emits complete file contents without stubbing:
|
|
263
|
+
|
|
264
|
+
```python
|
|
265
|
+
def f(x):
|
|
266
|
+
return x + 1
|
|
267
|
+
|
|
268
|
+
class C:
|
|
269
|
+
def m(self):
|
|
270
|
+
return 42
|
|
271
|
+
````
|
|
272
|
+
|
|
273
|
+
This is ideal for:
|
|
274
|
+
|
|
275
|
+
- Repositories without much duplicate code
|
|
276
|
+
- When you need complete context in one place
|
|
277
|
+
- When token limits are not a concern
|
|
278
|
+
|
|
279
|
+
## Workflow Example
|
|
280
|
+
|
|
281
|
+
### Initial Pack
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
# Create a baseline pack of your repository
|
|
285
|
+
codecrate pack . -o baseline.md
|
|
286
|
+
|
|
287
|
+
# Send baseline.md to an LLM for analysis
|
|
288
|
+
# LLM can navigate using the Symbol Index
|
|
289
|
+
# and read full code in the Files section
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
### Iterate with LLM
|
|
293
|
+
|
|
294
|
+
```bash
|
|
295
|
+
# After the LLM suggests changes, generate a patch
|
|
296
|
+
codecrate patch baseline.md . -o iteration1.md
|
|
297
|
+
|
|
298
|
+
# Send iteration1.md to the LLM (much smaller than full pack)
|
|
299
|
+
# Apply the LLM's changes
|
|
300
|
+
codecrate apply iteration1.md .
|
|
301
|
+
|
|
302
|
+
# Create new baseline for next iteration
|
|
303
|
+
codecrate pack . -o baseline.md
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
## Advanced Usage
|
|
307
|
+
|
|
308
|
+
### Packing Multiple Projects
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
# Pack different directories separately
|
|
312
|
+
codecrate pack src/backend -o backend.md
|
|
313
|
+
codecrate pack src/frontend -o frontend.md
|
|
314
|
+
|
|
315
|
+
# Or pack with specific include patterns
|
|
316
|
+
codecrate pack . --include "**/*.py" --exclude "**/migrations/**"
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
### Handling Large Contexts
|
|
320
|
+
|
|
321
|
+
```bash
|
|
322
|
+
# Split into multiple files to fit context windows
|
|
323
|
+
codecrate pack . --split-max-chars 50000
|
|
324
|
+
|
|
325
|
+
# This creates context.md, context.part1.md, context.part2.md, etc.
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
### Deduplication
|
|
329
|
+
|
|
330
|
+
```bash
|
|
331
|
+
# Enable deduplication to save tokens on duplicate code
|
|
332
|
+
codecrate pack . --dedupe
|
|
333
|
+
|
|
334
|
+
# Deduplication is most effective when you have:
|
|
335
|
+
# - Copy-pasted functions
|
|
336
|
+
# - Boilerplate code
|
|
337
|
+
# - Similar utility functions across modules
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
## How It Works
|
|
341
|
+
|
|
342
|
+
1. **Discovery**: Scans files according to include/exclude patterns
|
|
343
|
+
2. **Parsing**: Extracts symbol information (functions, classes) using Python's AST
|
|
344
|
+
3. **Packing**: Creates a structured manifest and canonical function definitions
|
|
345
|
+
4. **Rendering**: Generates Markdown with directory tree, symbol index, and file contents
|
|
346
|
+
5. **Validation**: Ensures round-trip consistency with SHA256 checksums
|
|
347
|
+
|
|
348
|
+
The Markdown format is designed to be:
|
|
349
|
+
|
|
350
|
+
- **Self-contained**: All necessary information in one file
|
|
351
|
+
- **Navigable**: Symbol index with jump links
|
|
352
|
+
- **Reversible**: Can reconstruct original files exactly
|
|
353
|
+
- **Diff-friendly**: Easy to generate minimal patches
|
|
354
|
+
|
|
355
|
+
## License
|
|
356
|
+
|
|
357
|
+
MIT
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
codecrate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
codecrate/_version.py,sha256=5jwwVncvCiTnhOedfkzzxmxsggwmTBORdFL_4wq0ZeY,704
|
|
3
|
+
codecrate/cli.py,sha256=4dH2IYVTXQ33GHpomnViKbrlVGdcJUK_3h263nCWOtc,8294
|
|
4
|
+
codecrate/config.py,sha256=EuOMR0rSq2T5ZuorjfIYrhsX3j1taAsZ_3AsYPGakYQ,3147
|
|
5
|
+
codecrate/diffgen.py,sha256=xMH-wJeMox_xoMLrGck2o7RP5mjlCJjndSHPHql9CJg,3432
|
|
6
|
+
codecrate/discover.py,sha256=eWebPPbPHy5WrtbCX-RA1hKiPyQau6buUm5OcjpVi9U,2847
|
|
7
|
+
codecrate/ids.py,sha256=fO_PAaskT7hLXgvJN7Hv4HD27zPIFnSziB0k2AUpONo,517
|
|
8
|
+
codecrate/manifest.py,sha256=z7fHqZS5vHanMSla-_083e_A9KXuYnWccZyu-wQNl2s,1074
|
|
9
|
+
codecrate/markdown.py,sha256=2ruEmsCF9v-aPpiKEOGusi4T13wEzugNn7JSN6T7KGg,16670
|
|
10
|
+
codecrate/mdparse.py,sha256=Lu9aN3rMP5qetwLapVrRvVRohPXv8L0hmjG0GUG7jys,4746
|
|
11
|
+
codecrate/model.py,sha256=Px9xL4odcvlDgEnSiwd5SMi9_k1Xck4R8DOlahN9Z-4,897
|
|
12
|
+
codecrate/packer.py,sha256=an-E1ZilIOY2hdjF4Gt3J5APOCTRRpiinmvfBiw29Ds,3398
|
|
13
|
+
codecrate/parse.py,sha256=Huu2PjHPBE1cCYI_GsYhgOqXoifP076asg5Tlsfpf8E,4437
|
|
14
|
+
codecrate/stubber.py,sha256=9WxpQs6FTCT0OUX4Si8GdD63VyUJQnNqRcwTm_IPuKc,2857
|
|
15
|
+
codecrate/token_budget.py,sha256=0vG9h7l5yNn909QQEItrYQJIjMT2h5LcM9iZf4zu-3c,12937
|
|
16
|
+
codecrate/udiff.py,sha256=bMvwhgqXZw2pIW5VcRRATUoDspM2XGQU3nInD_A7WWo,6249
|
|
17
|
+
codecrate/unpacker.py,sha256=soh6qwRE6sxoBAbr3UbXk1O5aiwdnfUhkqfFdSvU0_I,5063
|
|
18
|
+
codecrate/validate.py,sha256=-KVU7RQ8zJG-YJoD8kLCYgE0OWWdy-yYzl0gyp-3mWo,4250
|
|
19
|
+
codecrate-0.1.0.dist-info/licenses/LICENSE,sha256=O6yVC2oL8vUoAA3oPEvLSbvxzmwtOOPiY7dOZzgrxi0,1074
|
|
20
|
+
codecrate-0.1.0.dist-info/METADATA,sha256=fwuc8JDMxUx1h9JRTr5vd0JgJ34uZxk7CkEfZqEAR-M,9394
|
|
21
|
+
codecrate-0.1.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
22
|
+
codecrate-0.1.0.dist-info/entry_points.txt,sha256=DcY9tib-PzdLebck4B2RYJ0CGH6cqAJMCHPU3MdA0Dk,49
|
|
23
|
+
codecrate-0.1.0.dist-info/top_level.txt,sha256=-jD2a_aH1iQN4atRhGw7ZhEYnOWe88nfmkz6sPZ6WEg,10
|
|
24
|
+
codecrate-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Holger Nahrstaedt
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
codecrate
|