PyPI - codecrate - Versions diffs - 0.1.0__py3-none-any.whl - Mend

codecrate 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

codecrate/__init__.py +0 -0
codecrate/_version.py +34 -0
codecrate/cli.py +250 -0
codecrate/config.py +98 -0
codecrate/diffgen.py +110 -0
codecrate/discover.py +113 -0
codecrate/ids.py +17 -0
codecrate/manifest.py +31 -0
codecrate/markdown.py +457 -0
codecrate/mdparse.py +145 -0
codecrate/model.py +51 -0
codecrate/packer.py +108 -0
codecrate/parse.py +133 -0
codecrate/stubber.py +82 -0
codecrate/token_budget.py +388 -0
codecrate/udiff.py +187 -0
codecrate/unpacker.py +149 -0
codecrate/validate.py +120 -0
codecrate-0.1.0.dist-info/METADATA +357 -0
codecrate-0.1.0.dist-info/RECORD +24 -0
codecrate-0.1.0.dist-info/WHEEL +5 -0
codecrate-0.1.0.dist-info/entry_points.txt +2 -0
codecrate-0.1.0.dist-info/licenses/LICENSE +21 -0
codecrate-0.1.0.dist-info/top_level.txt +1 -0

codecrate/__init__.py ADDED Viewed

File without changes

codecrate/_version.py ADDED Viewed

@@ -0,0 +1,34 @@
+# file generated by setuptools-scm
+# don't change, don't track in version control
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple
+    from typing import Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
+else:
+    VERSION_TUPLE = object
+    COMMIT_ID = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
+__version__ = version = '0.1.0'
+__version_tuple__ = version_tuple = (0, 1, 0)
+__commit_id__ = commit_id = None

codecrate/cli.py ADDED Viewed

@@ -0,0 +1,250 @@
+from __future__ import annotations
+import argparse
+from pathlib import Path
+from .config import load_config
+from .diffgen import generate_patch_markdown
+from .discover import discover_files
+from .markdown import render_markdown
+from .packer import pack_repo
+from .token_budget import split_by_max_chars
+from .udiff import apply_file_diffs, parse_unified_diff
+from .unpacker import unpack_to_dir
+from .validate import validate_pack_markdown
+def build_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        prog="codecrate",
+        description="Pack/unpack/patch/apply for repositories  (Python + text files).",
+    )
+    sub = p.add_subparsers(dest="cmd", required=True)
+    # pack
+    pack = sub.add_parser("pack", help="Pack a repository/directory into Markdown.")
+    pack.add_argument("root", type=Path, help="Root directory to scan")
+    pack.add_argument(
+        "-o",
+        "--output",
+        type=Path,
+        default=None,
+        help="Output markdown path (default: config 'output' or context.md)",
+    )
+    pack.add_argument(
+        "--dedupe", action="store_true", help="Deduplicate identical function bodies"
+    )
+    pack.add_argument(
+        "--layout",
+        choices=["auto", "stubs", "full"],
+        default=None,
+        help="Output layout: auto|stubs|full (default: auto via config)",
+    )
+    pack.add_argument(
+        "--keep-docstrings",
+        action=argparse.BooleanOptionalAction,
+        default=None,
+        help="Keep docstrings in stubbed file view (default: true via config)",
+    )
+    pack.add_argument(
+        "--respect-gitignore",
+        action=argparse.BooleanOptionalAction,
+        default=None,
+        help="Respect .gitignore (default: true via config)",
+    )
+    pack.add_argument(
+        "--manifest",
+        action=argparse.BooleanOptionalAction,
+        default=None,
+        help="Include Manifest section (default: true via config)",
+    )
+    pack.add_argument(
+        "--include", action="append", default=None, help="Include glob (repeatable)"
+    )
+    pack.add_argument(
+        "--exclude", action="append", default=None, help="Exclude glob (repeatable)"
+    )
+    pack.add_argument(
+        "--split-max-chars",
+        type=int,
+        default=None,
+        help="Split output into .partN.md files",
+    )
+    # unpack
+    unpack = sub.add_parser(
+        "unpack", help="Reconstruct files from a packed context Markdown."
+    )
+    unpack.add_argument("markdown", type=Path, help="Packed Markdown file from `pack`")
+    unpack.add_argument(
+        "-o",
+        "--out-dir",
+        type=Path,
+        required=True,
+        help="Output directory for reconstructed files",
+    )
+    # patch
+    patch = sub.add_parser(
+        "patch",
+        help="Generate a diff-only patch Markdown from old pack + current repo.",
+    )
+    patch.add_argument(
+        "old_markdown", type=Path, help="Older packed Markdown (baseline)"
+    )
+    patch.add_argument("root", type=Path, help="Current repo root to compare against")
+    patch.add_argument(
+        "-o",
+        "--output",
+        type=Path,
+        default=Path("patch.md"),
+        help="Output patch markdown",
+    )
+    # apply
+    apply = sub.add_parser("apply", help="Apply a diff-only patch Markdown to a repo.")
+    apply.add_argument(
+        "patch_markdown", type=Path, help="Patch Markdown containing ```diff blocks"
+    )
+    apply.add_argument("root", type=Path, help="Repo root to apply patch to")
+    # validate-pack
+    vpack = sub.add_parser(
+        "validate-pack",
+        help="Validate a packed context Markdown (sha/markers/canonical consistency).",
+    )
+    vpack.add_argument("markdown", type=Path, help="Packed Markdown to validate")
+    vpack.add_argument(
+        "--root",
+        type=Path,
+        default=None,
+        help="Optional repo root to compare reconstructed files against",
+    )
+    return p
+def _extract_diff_blocks(md_text: str) -> str:
+    """
+    Extract only diff fences from markdown and concatenate to a unified diff string.
+    """
+    lines = md_text.splitlines()
+    out: list[str] = []
+    i = 0
+    while i < len(lines):
+        if lines[i].strip() == "```diff":
+            i += 1
+            while i < len(lines) and lines[i].strip() != "```":
+                out.append(lines[i])
+                i += 1
+        i += 1
+    return "\n".join(out) + "\n"
+def main(argv: list[str] | None = None) -> None:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    if args.cmd == "pack":
+        root: Path = args.root.resolve()
+        cfg = load_config(root)
+        include = args.include if args.include is not None else cfg.include
+        exclude = args.exclude if args.exclude is not None else cfg.exclude
+        keep_docstrings = (
+            cfg.keep_docstrings
+            if args.keep_docstrings is None
+            else bool(args.keep_docstrings)
+        )
+        include_manifest = (
+            cfg.manifest if args.manifest is None else bool(args.manifest)
+        )
+        respect_gitignore = (
+            cfg.respect_gitignore
+            if args.respect_gitignore is None
+            else bool(args.respect_gitignore)
+        )
+        dedupe = bool(args.dedupe) or bool(cfg.dedupe)
+        split_max_chars = (
+            cfg.split_max_chars
+            if args.split_max_chars is None
+            else int(args.split_max_chars or 0)
+        )
+        layout = (
+            str(args.layout).strip().lower()
+            if args.layout is not None
+            else str(getattr(cfg, "layout", "auto")).strip().lower()
+        )
+        out_path = (
+            args.output
+            if args.output is not None
+            else Path(getattr(cfg, "output", "context.md"))
+        )
+        disc = discover_files(
+            root=root,
+            include=include,
+            exclude=exclude,
+            respect_gitignore=respect_gitignore,
+        )
+        pack, canonical = pack_repo(
+            disc.root, disc.files, keep_docstrings=keep_docstrings, dedupe=dedupe
+        )
+        md = render_markdown(
+            pack, canonical, layout=layout, include_manifest=include_manifest
+        )
+        # Always write the canonical, unsplit pack
+        # for machine parsing (unpack/validate).
+        out_path.write_text(md, encoding="utf-8")
+        # Additionally, write split parts for LLM consumption, if requested.
+        parts = split_by_max_chars(md, out_path, split_max_chars)
+        extra = [p for p in parts if p.path != out_path]
+        for part in extra:
+            part.path.write_text(part.content, encoding="utf-8")
+        if extra:
+            print(f"Wrote {out_path} and {len(extra)} split part file(s).")
+        else:
+            print(f"Wrote {out_path}.")
+    elif args.cmd == "unpack":
+        md_text = args.markdown.read_text(encoding="utf-8", errors="replace")
+        unpack_to_dir(md_text, args.out_dir)
+        print(f"Unpacked into {args.out_dir}")
+    elif args.cmd == "patch":
+        old_md = args.old_markdown.read_text(encoding="utf-8", errors="replace")
+        cfg = load_config(args.root)
+        patch_md = generate_patch_markdown(
+            old_md,
+            args.root,
+            include=cfg.include,
+            exclude=cfg.exclude,
+            respect_gitignore=cfg.respect_gitignore,
+        )
+        args.output.write_text(patch_md, encoding="utf-8")
+        print(f"Wrote {args.output}")
+    elif args.cmd == "validate-pack":
+        md_text = args.markdown.read_text(encoding="utf-8", errors="replace")
+        report = validate_pack_markdown(md_text, root=args.root)
+        if report.warnings:
+            print("Warnings:")
+            for w in report.warnings:
+                print(f"- {w}")
+        if report.errors:
+            print("Errors:")
+            for e in report.errors:
+                print(f"- {e}")
+            raise SystemExit(1)
+        print("OK: pack is internally consistent.")
+    elif args.cmd == "apply":
+        md_text = args.patch_markdown.read_text(encoding="utf-8", errors="replace")
+        diff_text = _extract_diff_blocks(md_text)
+        diffs = parse_unified_diff(diff_text)
+        changed = apply_file_diffs(diffs, args.root)
+        print(f"Applied patch to {len(changed)} file(s).")
+if __name__ == "__main__":
+    main()

codecrate/config.py ADDED Viewed

@@ -0,0 +1,98 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Literal
+try:
+    import tomllib  # py311+
+except ModuleNotFoundError:  # pragma: no cover
+    import tomli as tomllib  # type: ignore
+CONFIG_FILENAMES: tuple[str, ...] = (".codecrate.toml", "codecrate.toml")
+DEFAULT_INCLUDES: list[str] = [
+    "**/*.py",
+    # Common packaging + repo metadata
+    "pyproject.toml",
+    "project.toml",
+    "setup.cfg",
+    "README*",
+    "LICENSE*",
+    # Docs
+    "docs/**/*.rst",
+    "docs/**/*.md",
+]
+@dataclass
+class Config:
+    # Default output path for `codecrate pack` when CLI does not specify -o/--output
+    output: str = "context.md"
+    keep_docstrings: bool = True
+    dedupe: bool = False
+    respect_gitignore: bool = True
+    include: list[str] = field(default_factory=lambda: DEFAULT_INCLUDES.copy())
+    exclude: list[str] = field(default_factory=list)
+    split_max_chars: int = 0  # 0 means no splitting
+    # Emit the `## Manifest` section (required for unpack/patch/validate-pack).
+    # Disable only for LLM-only packs to save tokens.
+    manifest: bool = True
+    # Output layout:
+    # - "stubs": always emit stubbed files + Function Library (current format)
+    # - "full":  emit full file contents (no Function Library)
+    # - "auto":  use "stubs" only if dedupe actually collapses something,
+    #            otherwise use "full" (best token efficiency when no duplicates)
+    layout: Literal["auto", "stubs", "full"] = "auto"
+def _find_config_path(root: Path) -> Path | None:
+    root = root.resolve()
+    for name in CONFIG_FILENAMES:
+        p = root / name
+        if p.exists():
+            return p
+    return None
+def load_config(root: Path) -> Config:
+    cfg_path = _find_config_path(root)
+    if cfg_path is None:
+        return Config()
+    data = tomllib.loads(cfg_path.read_text(encoding="utf-8"))
+    section: dict[str, Any] = (
+        data.get("codecrate", {}) if isinstance(data, dict) else {}
+    )
+    cfg = Config()
+    out = section.get("output", cfg.output)
+    if isinstance(out, str) and out.strip():
+        cfg.output = out.strip()
+    cfg.keep_docstrings = bool(section.get("keep_docstrings", cfg.keep_docstrings))
+    cfg.dedupe = bool(section.get("dedupe", cfg.dedupe))
+    cfg.respect_gitignore = bool(
+        section.get("respect_gitignore", cfg.respect_gitignore)
+    )
+    man = section.get("manifest", section.get("include_manifest", cfg.manifest))
+    cfg.manifest = bool(man)
+    layout = section.get("layout", cfg.layout)
+    if isinstance(layout, str):
+        layout = layout.strip().lower()
+        if layout in {"auto", "stubs", "full"}:
+            cfg.layout = layout  # type: ignore[assignment]
+    inc = section.get("include", cfg.include)
+    exc = section.get("exclude", cfg.exclude)
+    if isinstance(inc, list):
+        cfg.include = [str(x) for x in inc]
+    if isinstance(exc, list):
+        cfg.exclude = [str(x) for x in exc]
+    split = section.get("split_max_chars", cfg.split_max_chars)
+    try:
+        cfg.split_max_chars = int(split)
+    except Exception:
+        pass
+    return cfg

codecrate/diffgen.py ADDED Viewed

@@ -0,0 +1,110 @@
+from __future__ import annotations
+import difflib
+from pathlib import Path
+from .config import DEFAULT_INCLUDES
+from .discover import discover_files
+from .mdparse import parse_packed_markdown
+from .udiff import normalize_newlines
+from .unpacker import _apply_canonical_into_stub
+def generate_patch_markdown(
+    old_pack_md: str,
+    root: Path,
+    *,
+    include: list[str] | None = None,
+    exclude: list[str] | None = None,
+    respect_gitignore: bool = True,
+) -> str:
+    # If caller doesn't pass include/exclude, use the same defaults as Config.
+    include = DEFAULT_INCLUDES.copy() if include is None else list(include)
+    exclude = [] if exclude is None else list(exclude)
+    packed = parse_packed_markdown(old_pack_md)
+    manifest = packed.manifest
+    root = root.resolve()
+    blocks: list[str] = []
+    blocks.append("# Codecrate Patch\n\n")
+    # Do not leak absolute local paths; keep the header root stable + relative.
+    blocks.append("Root: `.`\n\n")
+    blocks.append("This file contains unified diffs inside ```diff code fences.\n\n")
+    any_changes = False
+    old_paths = {f["path"] for f in manifest.get("files", []) if "path" in f}
+    for f in manifest.get("files", []):
+        rel = f["path"]
+        stub = packed.stubbed_files.get(rel)
+        if stub is None:
+            continue
+        old_text = _apply_canonical_into_stub(
+            stub, f.get("defs", []), packed.canonical_sources
+        )
+        old_text = normalize_newlines(old_text)
+        cur_path = root / rel
+        if not cur_path.exists():
+            # treat as deleted in current
+            diff = difflib.unified_diff(
+                old_text.splitlines(),
+                [],
+                fromfile=f"a/{rel}",
+                tofile="/dev/null",
+                lineterm="",
+            )
+        else:
+            new_text = normalize_newlines(
+                cur_path.read_text(encoding="utf-8", errors="replace")
+            )
+            diff = difflib.unified_diff(
+                old_text.splitlines(),
+                new_text.splitlines(),
+                fromfile=f"a/{rel}",
+                tofile=f"b/{rel}",
+                lineterm="",
+            )
+        diff_lines = list(diff)
+        if diff_lines:
+            any_changes = True
+            blocks.append(f"## `{rel}`\n\n")
+            blocks.append("```diff\n")
+            blocks.append("\n".join(diff_lines) + "\n")
+            blocks.append("```\n\n")
+    # Added files (present in current repo, not in baseline manifest)
+    disc = discover_files(
+        root,
+        include=include,
+        exclude=exclude,
+        respect_gitignore=respect_gitignore,
+    )
+    for p in disc.files:
+        rel = p.relative_to(root).as_posix()
+        if rel in old_paths:
+            continue
+        new_text = normalize_newlines(p.read_text(encoding="utf-8", errors="replace"))
+        diff = difflib.unified_diff(
+            [],
+            new_text.splitlines(),
+            fromfile="/dev/null",
+            tofile=f"b/{rel}",
+            lineterm="",
+        )
+        diff_lines = list(diff)
+        if diff_lines:
+            any_changes = True
+            blocks.append(f"## `{rel}`\n\n")
+            blocks.append("```diff\n")
+            blocks.append("\n".join(diff_lines) + "\n")
+            blocks.append("```\n\n")
+    if not any_changes:
+        blocks.append("_No changes detected._\n")
+    return "".join(blocks)

codecrate/discover.py ADDED Viewed

@@ -0,0 +1,113 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+import pathspec
+DEFAULT_EXCLUDES = [
+    "**/__pycache__/**",
+    "**/*.pyc",
+    "**/.git/**",
+    "**/.venv/**",
+    "**/venv/**",
+    "**/.tox/**",
+    "**/.pytest_cache/**",
+    "**/build/**",
+    "**/dist/**",
+    "**/_version.py",
+]
+@dataclass(frozen=True)
+class Discovery:
+    files: list[Path]
+    root: Path
+def _load_gitignore(root: Path) -> pathspec.PathSpec:
+    gi = root / ".gitignore"
+    if not gi.exists():
+        return pathspec.PathSpec.from_lines("gitwildmatch", [])
+    return pathspec.PathSpec.from_lines(
+        "gitwildmatch", gi.read_text(encoding="utf-8").splitlines()
+    )
+def discover_files(
+    root: Path,
+    include: list[str] | None,
+    exclude: list[str] | None,
+    respect_gitignore: bool = True,
+) -> Discovery:
+    """Discover repository files matching include/exclude patterns.
+    Unlike discover_python_files, this scans *all* files (not just *.py). This is
+    useful for packing metadata and docs files (e.g. pyproject.toml, *.rst).
+    """
+    root = root.resolve()
+    gi = (
+        _load_gitignore(root)
+        if respect_gitignore
+        else pathspec.PathSpec.from_lines("gitwildmatch", [])
+    )
+    inc = pathspec.PathSpec.from_lines("gitwildmatch", include or ["**/*.py"])
+    effective_exclude = DEFAULT_EXCLUDES + (exclude or [])
+    exc = pathspec.PathSpec.from_lines("gitwildmatch", effective_exclude)
+    out: list[Path] = []
+    for p in root.rglob("*"):
+        if not p.is_file():
+            continue
+        rel = p.relative_to(root)
+        rel_s = rel.as_posix()
+        if respect_gitignore and gi.match_file(rel_s):
+            continue
+        if not inc.match_file(rel_s):
+            continue
+        if exc.match_file(rel_s):
+            continue
+        out.append(p)
+    out.sort()
+    return Discovery(files=out, root=root)
+def discover_python_files(
+    root: Path,
+    include: list[str] | None,
+    exclude: list[str] | None,
+    respect_gitignore: bool = True,
+) -> Discovery:
+    root = root.resolve()
+    gi = (
+        _load_gitignore(root)
+        if respect_gitignore
+        else pathspec.PathSpec.from_lines("gitwildmatch", [])
+    )
+    inc = pathspec.PathSpec.from_lines("gitwildmatch", include or ["**/*.py"])
+    effective_exclude = DEFAULT_EXCLUDES + (exclude or [])
+    exc = pathspec.PathSpec.from_lines("gitwildmatch", effective_exclude)
+    out: list[Path] = []
+    for p in root.rglob("*.py"):
+        rel = p.relative_to(root)
+        rel_s = rel.as_posix()
+        if respect_gitignore and gi.match_file(rel_s):
+            continue
+        if not inc.match_file(rel_s):
+            continue
+        if exc.match_file(rel_s):
+            continue
+        out.append(p)
+    out.sort()
+    return Discovery(files=out, root=root)

codecrate/ids.py ADDED Viewed

@@ -0,0 +1,17 @@
+from __future__ import annotations
+import hashlib
+from pathlib import Path
+def stable_location_id(path: Path, qualname: str, lineno: int) -> str:
+    payload = f"{path.as_posix()}::{qualname}::{lineno}".encode()
+    return hashlib.sha1(payload).hexdigest()[:8].upper()
+def stable_body_hash(code: str) -> str:
+    norm = "\n".join(
+        line.rstrip()
+        for line in code.replace("\r\n", "\n").replace("\r", "\n").split("\n")
+    ).strip()
+    return hashlib.sha1(norm.encode("utf-8")).hexdigest().upper()

codecrate/manifest.py ADDED Viewed

@@ -0,0 +1,31 @@
+from __future__ import annotations
+import hashlib
+from dataclasses import asdict
+from typing import Any
+from .model import PackResult
+def to_manifest(pack: PackResult, *, minimal: bool = False) -> dict[str, Any]:
+    def sha256_text(s: str) -> str:
+        return hashlib.sha256(s.encode("utf-8")).hexdigest()
+    files = []
+    for fp in pack.files:
+        rel = fp.path.relative_to(pack.root).as_posix()
+        entry: dict[str, Any] = {
+            "path": rel,
+            "line_count": fp.line_count,
+            "sha256_original": sha256_text(fp.original_text),
+        }
+        if not minimal:
+            entry |= {
+                "module": fp.module,
+                "sha256_stubbed": sha256_text(fp.stubbed_text),
+                "classes": [asdict(c) | {"path": rel} for c in fp.classes],
+                "defs": [asdict(d) | {"path": rel} for d in fp.defs],
+            }
+        files.append(entry)
+    # Root is already shown at the top of the pack; keep manifest root stable + short.
+    return {"format": "codecrate.v4", "root": ".", "files": files}