codecrate 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codecrate/__init__.py +0 -0
- codecrate/_version.py +34 -0
- codecrate/cli.py +250 -0
- codecrate/config.py +98 -0
- codecrate/diffgen.py +110 -0
- codecrate/discover.py +113 -0
- codecrate/ids.py +17 -0
- codecrate/manifest.py +31 -0
- codecrate/markdown.py +457 -0
- codecrate/mdparse.py +145 -0
- codecrate/model.py +51 -0
- codecrate/packer.py +108 -0
- codecrate/parse.py +133 -0
- codecrate/stubber.py +82 -0
- codecrate/token_budget.py +388 -0
- codecrate/udiff.py +187 -0
- codecrate/unpacker.py +149 -0
- codecrate/validate.py +120 -0
- codecrate-0.1.0.dist-info/METADATA +357 -0
- codecrate-0.1.0.dist-info/RECORD +24 -0
- codecrate-0.1.0.dist-info/WHEEL +5 -0
- codecrate-0.1.0.dist-info/entry_points.txt +2 -0
- codecrate-0.1.0.dist-info/licenses/LICENSE +21 -0
- codecrate-0.1.0.dist-info/top_level.txt +1 -0
codecrate/packer.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import replace
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .ids import stable_body_hash
|
|
7
|
+
from .model import ClassRef, DefRef, FilePack, PackResult
|
|
8
|
+
from .parse import module_name_for, parse_symbols
|
|
9
|
+
from .stubber import stub_file_text
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _extract_canonical_source(text: str, d: DefRef) -> str:
|
|
13
|
+
lines = text.splitlines(keepends=True)
|
|
14
|
+
i0 = max(0, d.decorator_start - 1)
|
|
15
|
+
i1 = min(len(lines), d.end_line)
|
|
16
|
+
return "".join(lines[i0:i1]).rstrip() + "\n"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _line_count(text: str) -> int:
|
|
20
|
+
return text.count("\n") + 1 if text else 0
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def pack_repo(
|
|
24
|
+
root: Path,
|
|
25
|
+
files: list[Path],
|
|
26
|
+
keep_docstrings: bool = True,
|
|
27
|
+
dedupe: bool = False,
|
|
28
|
+
) -> tuple[PackResult, dict[str, str]]:
|
|
29
|
+
filepacks: list[FilePack] = []
|
|
30
|
+
all_defs: list[DefRef] = []
|
|
31
|
+
all_classes: list[ClassRef] = []
|
|
32
|
+
|
|
33
|
+
local_canon: dict[str, str] = {}
|
|
34
|
+
|
|
35
|
+
for path in files:
|
|
36
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
37
|
+
|
|
38
|
+
if path.suffix.lower() == ".py":
|
|
39
|
+
classes, defs = parse_symbols(path=path, root=root, text=text)
|
|
40
|
+
file_module = module_name_for(path, root)
|
|
41
|
+
|
|
42
|
+
for d in defs:
|
|
43
|
+
local_canon[d.local_id] = _extract_canonical_source(text, d)
|
|
44
|
+
|
|
45
|
+
stubbed = stub_file_text(text, defs, keep_docstrings=keep_docstrings)
|
|
46
|
+
else:
|
|
47
|
+
# Non-Python files are included verbatim (no symbol parsing / stubbing).
|
|
48
|
+
classes, defs = [], []
|
|
49
|
+
file_module = ""
|
|
50
|
+
stubbed = text
|
|
51
|
+
|
|
52
|
+
fp = FilePack(
|
|
53
|
+
path=path,
|
|
54
|
+
module=file_module,
|
|
55
|
+
original_text=text,
|
|
56
|
+
stubbed_text=stubbed,
|
|
57
|
+
line_count=_line_count(text),
|
|
58
|
+
classes=classes,
|
|
59
|
+
defs=defs,
|
|
60
|
+
)
|
|
61
|
+
filepacks.append(fp)
|
|
62
|
+
all_defs.extend(defs)
|
|
63
|
+
all_classes.extend(classes)
|
|
64
|
+
|
|
65
|
+
canonical_sources: dict[str, str] = {}
|
|
66
|
+
if not dedupe:
|
|
67
|
+
canonical_sources = {d.local_id: local_canon[d.local_id] for d in all_defs}
|
|
68
|
+
else:
|
|
69
|
+
seen_by_hash: dict[str, str] = {}
|
|
70
|
+
remapped_defs: list[DefRef] = []
|
|
71
|
+
|
|
72
|
+
for d in all_defs:
|
|
73
|
+
code = local_canon[d.local_id]
|
|
74
|
+
h = stable_body_hash(code)
|
|
75
|
+
cid = seen_by_hash.get(h)
|
|
76
|
+
if cid is None:
|
|
77
|
+
cid = d.local_id
|
|
78
|
+
seen_by_hash[h] = cid
|
|
79
|
+
canonical_sources[cid] = code
|
|
80
|
+
remapped_defs.append(replace(d, id=cid))
|
|
81
|
+
|
|
82
|
+
all_defs = remapped_defs
|
|
83
|
+
|
|
84
|
+
defs_by_file: dict[Path, list[DefRef]] = {}
|
|
85
|
+
for d in all_defs:
|
|
86
|
+
defs_by_file.setdefault(d.path, []).append(d)
|
|
87
|
+
|
|
88
|
+
filepacks2: list[FilePack] = []
|
|
89
|
+
for fp in filepacks:
|
|
90
|
+
defs2 = defs_by_file.get(fp.path, [])
|
|
91
|
+
stubbed2 = stub_file_text(
|
|
92
|
+
fp.original_text, defs2, keep_docstrings=keep_docstrings
|
|
93
|
+
)
|
|
94
|
+
filepacks2.append(
|
|
95
|
+
FilePack(
|
|
96
|
+
path=fp.path,
|
|
97
|
+
module=fp.module,
|
|
98
|
+
original_text=fp.original_text,
|
|
99
|
+
stubbed_text=stubbed2,
|
|
100
|
+
line_count=fp.line_count,
|
|
101
|
+
classes=fp.classes,
|
|
102
|
+
defs=defs2,
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
filepacks = filepacks2
|
|
106
|
+
|
|
107
|
+
pack = PackResult(root=root, files=filepacks, classes=all_classes, defs=all_defs)
|
|
108
|
+
return pack, canonical_sources
|
codecrate/parse.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .ids import stable_location_id
|
|
7
|
+
from .model import ClassRef, DefRef
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def module_name_for(path: Path, root: Path) -> str:
|
|
11
|
+
rel = path.resolve().relative_to(root.resolve())
|
|
12
|
+
parts = list(rel.parts)
|
|
13
|
+
if parts and parts[0] == "src":
|
|
14
|
+
parts = parts[1:]
|
|
15
|
+
if parts and parts[-1].endswith(".py"):
|
|
16
|
+
parts[-1] = parts[-1][:-3]
|
|
17
|
+
if parts and parts[-1] == "__init__":
|
|
18
|
+
parts = parts[:-1]
|
|
19
|
+
return ".".join(parts)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class _Visitor(ast.NodeVisitor):
|
|
23
|
+
def __init__(self, path: Path, root: Path):
|
|
24
|
+
self.path = path
|
|
25
|
+
self.root = root
|
|
26
|
+
self.module = module_name_for(path, root)
|
|
27
|
+
self.stack: list[str] = []
|
|
28
|
+
self.defs: list[DefRef] = []
|
|
29
|
+
self.classes: list[ClassRef] = []
|
|
30
|
+
|
|
31
|
+
def visit_ClassDef(self, node: ast.ClassDef):
|
|
32
|
+
self._add_class(node)
|
|
33
|
+
self.stack.append(node.name)
|
|
34
|
+
self.generic_visit(node)
|
|
35
|
+
self.stack.pop()
|
|
36
|
+
|
|
37
|
+
def visit_FunctionDef(self, node: ast.FunctionDef):
|
|
38
|
+
self._add_def(node, kind="function")
|
|
39
|
+
self.stack.append(node.name)
|
|
40
|
+
self.generic_visit(node)
|
|
41
|
+
self.stack.pop()
|
|
42
|
+
|
|
43
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
|
|
44
|
+
self._add_def(node, kind="async_function")
|
|
45
|
+
self.stack.append(node.name)
|
|
46
|
+
self.generic_visit(node)
|
|
47
|
+
self.stack.pop()
|
|
48
|
+
|
|
49
|
+
def _decorator_start(self, node: ast.AST, default_line: int) -> int:
|
|
50
|
+
start = default_line
|
|
51
|
+
for d in getattr(node, "decorator_list", []) or []:
|
|
52
|
+
if hasattr(d, "lineno"):
|
|
53
|
+
start = min(start, int(d.lineno))
|
|
54
|
+
return start
|
|
55
|
+
|
|
56
|
+
def _add_class(self, node: ast.ClassDef) -> None:
|
|
57
|
+
qual = ".".join(self.stack + [node.name]) if self.stack else node.name
|
|
58
|
+
class_line = int(getattr(node, "lineno", 1))
|
|
59
|
+
end_line = int(getattr(node, "end_lineno", class_line))
|
|
60
|
+
decorator_start = self._decorator_start(node, class_line)
|
|
61
|
+
|
|
62
|
+
rel_path = self.path.resolve().relative_to(self.root.resolve())
|
|
63
|
+
cid = stable_location_id(rel_path, f"class:{qual}", class_line)
|
|
64
|
+
|
|
65
|
+
self.classes.append(
|
|
66
|
+
ClassRef(
|
|
67
|
+
path=self.path,
|
|
68
|
+
module=self.module,
|
|
69
|
+
qualname=qual,
|
|
70
|
+
id=cid,
|
|
71
|
+
decorator_start=decorator_start,
|
|
72
|
+
class_line=class_line,
|
|
73
|
+
end_line=end_line,
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def _add_def(self, node: ast.AST, kind: str) -> None:
|
|
78
|
+
name = getattr(node, "name", "<anon>")
|
|
79
|
+
qual = ".".join(self.stack + [name]) if self.stack else name
|
|
80
|
+
|
|
81
|
+
def_line = int(getattr(node, "lineno", 1))
|
|
82
|
+
end_line = int(getattr(node, "end_lineno", def_line))
|
|
83
|
+
decorator_start = self._decorator_start(node, def_line)
|
|
84
|
+
|
|
85
|
+
body = getattr(node, "body", []) or []
|
|
86
|
+
body_start = def_line
|
|
87
|
+
doc_start: int | None = None
|
|
88
|
+
doc_end: int | None = None
|
|
89
|
+
|
|
90
|
+
if body:
|
|
91
|
+
body_start = int(getattr(body[0], "lineno", def_line))
|
|
92
|
+
if (
|
|
93
|
+
isinstance(body[0], ast.Expr)
|
|
94
|
+
and isinstance(getattr(body[0], "value", None), ast.Constant)
|
|
95
|
+
and isinstance(getattr(body[0].value, "value", None), str)
|
|
96
|
+
):
|
|
97
|
+
doc_start = int(getattr(body[0], "lineno", body_start))
|
|
98
|
+
doc_end = int(getattr(body[0], "end_lineno", doc_start))
|
|
99
|
+
else:
|
|
100
|
+
body_start = end_line
|
|
101
|
+
|
|
102
|
+
is_single_line = def_line == end_line
|
|
103
|
+
|
|
104
|
+
rel_path = self.path.resolve().relative_to(self.root.resolve())
|
|
105
|
+
local_id = stable_location_id(rel_path, qual, def_line)
|
|
106
|
+
canonical_id = local_id
|
|
107
|
+
|
|
108
|
+
self.defs.append(
|
|
109
|
+
DefRef(
|
|
110
|
+
path=self.path,
|
|
111
|
+
module=self.module,
|
|
112
|
+
qualname=qual,
|
|
113
|
+
id=canonical_id,
|
|
114
|
+
local_id=local_id,
|
|
115
|
+
kind=kind,
|
|
116
|
+
decorator_start=decorator_start,
|
|
117
|
+
def_line=def_line,
|
|
118
|
+
body_start=body_start,
|
|
119
|
+
end_line=end_line,
|
|
120
|
+
doc_start=doc_start,
|
|
121
|
+
doc_end=doc_end,
|
|
122
|
+
is_single_line=is_single_line,
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def parse_symbols(
|
|
128
|
+
path: Path, root: Path, text: str
|
|
129
|
+
) -> tuple[list[ClassRef], list[DefRef]]:
|
|
130
|
+
tree = ast.parse(text)
|
|
131
|
+
v = _Visitor(path=path, root=root)
|
|
132
|
+
v.visit(tree)
|
|
133
|
+
return v.classes, v.defs
|
codecrate/stubber.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import tokenize
|
|
5
|
+
|
|
6
|
+
from .model import DefRef
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _indent_of(line: str) -> str:
|
|
10
|
+
return line[: len(line) - len(line.lstrip(" \t"))]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _rewrite_single_line_def(line: str, marker: str) -> list[str]:
|
|
14
|
+
src = line if line.endswith("\n") else line + "\n"
|
|
15
|
+
tokens = list(tokenize.generate_tokens(io.StringIO(src).readline))
|
|
16
|
+
colon_col = None
|
|
17
|
+
for tok in tokens:
|
|
18
|
+
if tok.type == tokenize.OP and tok.string == ":":
|
|
19
|
+
colon_col = tok.end[1]
|
|
20
|
+
if colon_col is None:
|
|
21
|
+
return [line]
|
|
22
|
+
head = line[:colon_col].rstrip()
|
|
23
|
+
return [f"{head} ... {marker}\n"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _replacement_lines(indent: str, marker: str) -> list[str]:
|
|
27
|
+
# Compact stub: single placeholder line.
|
|
28
|
+
return [f"{indent}... {marker}\n"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def stub_file_text(text: str, defs: list[DefRef], keep_docstrings: bool = True) -> str:
|
|
32
|
+
lines = text.splitlines(keepends=True)
|
|
33
|
+
# IMPORTANT: Do not stub defs that are nested inside other defs.
|
|
34
|
+
# When using compact stubs, stubbing an inner def first can shift line
|
|
35
|
+
# positions and cause later outer-def replacements to overrun and truncate
|
|
36
|
+
# subsequent code (a common issue with nested helper functions).
|
|
37
|
+
outer_defs: list[DefRef] = []
|
|
38
|
+
stack: list[int] = []
|
|
39
|
+
for d in sorted(defs, key=lambda d: (d.decorator_start, -d.end_line)):
|
|
40
|
+
while stack and d.decorator_start > stack[-1]:
|
|
41
|
+
stack.pop()
|
|
42
|
+
if stack and d.end_line <= stack[-1]:
|
|
43
|
+
continue
|
|
44
|
+
outer_defs.append(d)
|
|
45
|
+
stack.append(d.end_line)
|
|
46
|
+
|
|
47
|
+
defs_sorted = sorted(
|
|
48
|
+
outer_defs, key=lambda d: (d.def_line, d.body_start, d.end_line), reverse=True
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
for d in defs_sorted:
|
|
52
|
+
marker = f"# ↪ FUNC:{d.local_id}"
|
|
53
|
+
|
|
54
|
+
if d.is_single_line:
|
|
55
|
+
i = d.def_line - 1
|
|
56
|
+
if 0 <= i < len(lines):
|
|
57
|
+
lines[i : i + 1] = _rewrite_single_line_def(lines[i], marker)
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
start_line = d.body_start
|
|
61
|
+
if keep_docstrings and d.doc_end is not None:
|
|
62
|
+
start_line = d.doc_end + 1
|
|
63
|
+
|
|
64
|
+
i0 = max(0, start_line - 1)
|
|
65
|
+
i1 = min(len(lines), d.end_line)
|
|
66
|
+
if i0 >= i1:
|
|
67
|
+
# No body lines to replace.
|
|
68
|
+
# If we kept the docstring, annotate the docstring closing line instead.
|
|
69
|
+
if keep_docstrings and d.doc_end is not None:
|
|
70
|
+
idx = d.doc_end - 1
|
|
71
|
+
if 0 <= idx < len(lines):
|
|
72
|
+
ln = lines[idx]
|
|
73
|
+
base = ln[:-1] if ln.endswith("\n") else ln
|
|
74
|
+
if marker not in base:
|
|
75
|
+
lines[idx] = base + f" {marker}\n"
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
sample = lines[i0] if 0 <= i0 < len(lines) else ""
|
|
79
|
+
indent = _indent_of(sample) if sample else " " * 4
|
|
80
|
+
lines[i0:i1] = _replacement_lines(indent, marker)
|
|
81
|
+
|
|
82
|
+
return "".join(lines)
|