codecrate 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codecrate/packer.py ADDED
@@ -0,0 +1,108 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import replace
4
+ from pathlib import Path
5
+
6
+ from .ids import stable_body_hash
7
+ from .model import ClassRef, DefRef, FilePack, PackResult
8
+ from .parse import module_name_for, parse_symbols
9
+ from .stubber import stub_file_text
10
+
11
+
12
+ def _extract_canonical_source(text: str, d: DefRef) -> str:
13
+ lines = text.splitlines(keepends=True)
14
+ i0 = max(0, d.decorator_start - 1)
15
+ i1 = min(len(lines), d.end_line)
16
+ return "".join(lines[i0:i1]).rstrip() + "\n"
17
+
18
+
19
+ def _line_count(text: str) -> int:
20
+ return text.count("\n") + 1 if text else 0
21
+
22
+
23
+ def pack_repo(
24
+ root: Path,
25
+ files: list[Path],
26
+ keep_docstrings: bool = True,
27
+ dedupe: bool = False,
28
+ ) -> tuple[PackResult, dict[str, str]]:
29
+ filepacks: list[FilePack] = []
30
+ all_defs: list[DefRef] = []
31
+ all_classes: list[ClassRef] = []
32
+
33
+ local_canon: dict[str, str] = {}
34
+
35
+ for path in files:
36
+ text = path.read_text(encoding="utf-8", errors="replace")
37
+
38
+ if path.suffix.lower() == ".py":
39
+ classes, defs = parse_symbols(path=path, root=root, text=text)
40
+ file_module = module_name_for(path, root)
41
+
42
+ for d in defs:
43
+ local_canon[d.local_id] = _extract_canonical_source(text, d)
44
+
45
+ stubbed = stub_file_text(text, defs, keep_docstrings=keep_docstrings)
46
+ else:
47
+ # Non-Python files are included verbatim (no symbol parsing / stubbing).
48
+ classes, defs = [], []
49
+ file_module = ""
50
+ stubbed = text
51
+
52
+ fp = FilePack(
53
+ path=path,
54
+ module=file_module,
55
+ original_text=text,
56
+ stubbed_text=stubbed,
57
+ line_count=_line_count(text),
58
+ classes=classes,
59
+ defs=defs,
60
+ )
61
+ filepacks.append(fp)
62
+ all_defs.extend(defs)
63
+ all_classes.extend(classes)
64
+
65
+ canonical_sources: dict[str, str] = {}
66
+ if not dedupe:
67
+ canonical_sources = {d.local_id: local_canon[d.local_id] for d in all_defs}
68
+ else:
69
+ seen_by_hash: dict[str, str] = {}
70
+ remapped_defs: list[DefRef] = []
71
+
72
+ for d in all_defs:
73
+ code = local_canon[d.local_id]
74
+ h = stable_body_hash(code)
75
+ cid = seen_by_hash.get(h)
76
+ if cid is None:
77
+ cid = d.local_id
78
+ seen_by_hash[h] = cid
79
+ canonical_sources[cid] = code
80
+ remapped_defs.append(replace(d, id=cid))
81
+
82
+ all_defs = remapped_defs
83
+
84
+ defs_by_file: dict[Path, list[DefRef]] = {}
85
+ for d in all_defs:
86
+ defs_by_file.setdefault(d.path, []).append(d)
87
+
88
+ filepacks2: list[FilePack] = []
89
+ for fp in filepacks:
90
+ defs2 = defs_by_file.get(fp.path, [])
91
+ stubbed2 = stub_file_text(
92
+ fp.original_text, defs2, keep_docstrings=keep_docstrings
93
+ )
94
+ filepacks2.append(
95
+ FilePack(
96
+ path=fp.path,
97
+ module=fp.module,
98
+ original_text=fp.original_text,
99
+ stubbed_text=stubbed2,
100
+ line_count=fp.line_count,
101
+ classes=fp.classes,
102
+ defs=defs2,
103
+ )
104
+ )
105
+ filepacks = filepacks2
106
+
107
+ pack = PackResult(root=root, files=filepacks, classes=all_classes, defs=all_defs)
108
+ return pack, canonical_sources
codecrate/parse.py ADDED
@@ -0,0 +1,133 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from pathlib import Path
5
+
6
+ from .ids import stable_location_id
7
+ from .model import ClassRef, DefRef
8
+
9
+
10
+ def module_name_for(path: Path, root: Path) -> str:
11
+ rel = path.resolve().relative_to(root.resolve())
12
+ parts = list(rel.parts)
13
+ if parts and parts[0] == "src":
14
+ parts = parts[1:]
15
+ if parts and parts[-1].endswith(".py"):
16
+ parts[-1] = parts[-1][:-3]
17
+ if parts and parts[-1] == "__init__":
18
+ parts = parts[:-1]
19
+ return ".".join(parts)
20
+
21
+
22
+ class _Visitor(ast.NodeVisitor):
23
+ def __init__(self, path: Path, root: Path):
24
+ self.path = path
25
+ self.root = root
26
+ self.module = module_name_for(path, root)
27
+ self.stack: list[str] = []
28
+ self.defs: list[DefRef] = []
29
+ self.classes: list[ClassRef] = []
30
+
31
+ def visit_ClassDef(self, node: ast.ClassDef):
32
+ self._add_class(node)
33
+ self.stack.append(node.name)
34
+ self.generic_visit(node)
35
+ self.stack.pop()
36
+
37
+ def visit_FunctionDef(self, node: ast.FunctionDef):
38
+ self._add_def(node, kind="function")
39
+ self.stack.append(node.name)
40
+ self.generic_visit(node)
41
+ self.stack.pop()
42
+
43
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
44
+ self._add_def(node, kind="async_function")
45
+ self.stack.append(node.name)
46
+ self.generic_visit(node)
47
+ self.stack.pop()
48
+
49
+ def _decorator_start(self, node: ast.AST, default_line: int) -> int:
50
+ start = default_line
51
+ for d in getattr(node, "decorator_list", []) or []:
52
+ if hasattr(d, "lineno"):
53
+ start = min(start, int(d.lineno))
54
+ return start
55
+
56
+ def _add_class(self, node: ast.ClassDef) -> None:
57
+ qual = ".".join(self.stack + [node.name]) if self.stack else node.name
58
+ class_line = int(getattr(node, "lineno", 1))
59
+ end_line = int(getattr(node, "end_lineno", class_line))
60
+ decorator_start = self._decorator_start(node, class_line)
61
+
62
+ rel_path = self.path.resolve().relative_to(self.root.resolve())
63
+ cid = stable_location_id(rel_path, f"class:{qual}", class_line)
64
+
65
+ self.classes.append(
66
+ ClassRef(
67
+ path=self.path,
68
+ module=self.module,
69
+ qualname=qual,
70
+ id=cid,
71
+ decorator_start=decorator_start,
72
+ class_line=class_line,
73
+ end_line=end_line,
74
+ )
75
+ )
76
+
77
+ def _add_def(self, node: ast.AST, kind: str) -> None:
78
+ name = getattr(node, "name", "<anon>")
79
+ qual = ".".join(self.stack + [name]) if self.stack else name
80
+
81
+ def_line = int(getattr(node, "lineno", 1))
82
+ end_line = int(getattr(node, "end_lineno", def_line))
83
+ decorator_start = self._decorator_start(node, def_line)
84
+
85
+ body = getattr(node, "body", []) or []
86
+ body_start = def_line
87
+ doc_start: int | None = None
88
+ doc_end: int | None = None
89
+
90
+ if body:
91
+ body_start = int(getattr(body[0], "lineno", def_line))
92
+ if (
93
+ isinstance(body[0], ast.Expr)
94
+ and isinstance(getattr(body[0], "value", None), ast.Constant)
95
+ and isinstance(getattr(body[0].value, "value", None), str)
96
+ ):
97
+ doc_start = int(getattr(body[0], "lineno", body_start))
98
+ doc_end = int(getattr(body[0], "end_lineno", doc_start))
99
+ else:
100
+ body_start = end_line
101
+
102
+ is_single_line = def_line == end_line
103
+
104
+ rel_path = self.path.resolve().relative_to(self.root.resolve())
105
+ local_id = stable_location_id(rel_path, qual, def_line)
106
+ canonical_id = local_id
107
+
108
+ self.defs.append(
109
+ DefRef(
110
+ path=self.path,
111
+ module=self.module,
112
+ qualname=qual,
113
+ id=canonical_id,
114
+ local_id=local_id,
115
+ kind=kind,
116
+ decorator_start=decorator_start,
117
+ def_line=def_line,
118
+ body_start=body_start,
119
+ end_line=end_line,
120
+ doc_start=doc_start,
121
+ doc_end=doc_end,
122
+ is_single_line=is_single_line,
123
+ )
124
+ )
125
+
126
+
127
+ def parse_symbols(
128
+ path: Path, root: Path, text: str
129
+ ) -> tuple[list[ClassRef], list[DefRef]]:
130
+ tree = ast.parse(text)
131
+ v = _Visitor(path=path, root=root)
132
+ v.visit(tree)
133
+ return v.classes, v.defs
codecrate/stubber.py ADDED
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import tokenize
5
+
6
+ from .model import DefRef
7
+
8
+
9
+ def _indent_of(line: str) -> str:
10
+ return line[: len(line) - len(line.lstrip(" \t"))]
11
+
12
+
13
+ def _rewrite_single_line_def(line: str, marker: str) -> list[str]:
14
+ src = line if line.endswith("\n") else line + "\n"
15
+ tokens = list(tokenize.generate_tokens(io.StringIO(src).readline))
16
+ colon_col = None
17
+ for tok in tokens:
18
+ if tok.type == tokenize.OP and tok.string == ":":
19
+ colon_col = tok.end[1]
20
+ if colon_col is None:
21
+ return [line]
22
+ head = line[:colon_col].rstrip()
23
+ return [f"{head} ... {marker}\n"]
24
+
25
+
26
+ def _replacement_lines(indent: str, marker: str) -> list[str]:
27
+ # Compact stub: single placeholder line.
28
+ return [f"{indent}... {marker}\n"]
29
+
30
+
31
+ def stub_file_text(text: str, defs: list[DefRef], keep_docstrings: bool = True) -> str:
32
+ lines = text.splitlines(keepends=True)
33
+ # IMPORTANT: Do not stub defs that are nested inside other defs.
34
+ # When using compact stubs, stubbing an inner def first can shift line
35
+ # positions and cause later outer-def replacements to overrun and truncate
36
+ # subsequent code (a common issue with nested helper functions).
37
+ outer_defs: list[DefRef] = []
38
+ stack: list[int] = []
39
+ for d in sorted(defs, key=lambda d: (d.decorator_start, -d.end_line)):
40
+ while stack and d.decorator_start > stack[-1]:
41
+ stack.pop()
42
+ if stack and d.end_line <= stack[-1]:
43
+ continue
44
+ outer_defs.append(d)
45
+ stack.append(d.end_line)
46
+
47
+ defs_sorted = sorted(
48
+ outer_defs, key=lambda d: (d.def_line, d.body_start, d.end_line), reverse=True
49
+ )
50
+
51
+ for d in defs_sorted:
52
+ marker = f"# ↪ FUNC:{d.local_id}"
53
+
54
+ if d.is_single_line:
55
+ i = d.def_line - 1
56
+ if 0 <= i < len(lines):
57
+ lines[i : i + 1] = _rewrite_single_line_def(lines[i], marker)
58
+ continue
59
+
60
+ start_line = d.body_start
61
+ if keep_docstrings and d.doc_end is not None:
62
+ start_line = d.doc_end + 1
63
+
64
+ i0 = max(0, start_line - 1)
65
+ i1 = min(len(lines), d.end_line)
66
+ if i0 >= i1:
67
+ # No body lines to replace.
68
+ # If we kept the docstring, annotate the docstring closing line instead.
69
+ if keep_docstrings and d.doc_end is not None:
70
+ idx = d.doc_end - 1
71
+ if 0 <= idx < len(lines):
72
+ ln = lines[idx]
73
+ base = ln[:-1] if ln.endswith("\n") else ln
74
+ if marker not in base:
75
+ lines[idx] = base + f" {marker}\n"
76
+ continue
77
+
78
+ sample = lines[i0] if 0 <= i0 < len(lines) else ""
79
+ indent = _indent_of(sample) if sample else " " * 4
80
+ lines[i0:i1] = _replacement_lines(indent, marker)
81
+
82
+ return "".join(lines)