PyPI - research-git - Versions diffs - 0.0.2__tar.gz → 0.0.3__tar.gz - Mend

research-git 0.0.2tar.gz → 0.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

{research_git-0.0.2/src/research_git.egg-info → research_git-0.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: research-git
-Version: 0.0.2
+Version: 0.0.3
 Summary: A memory system that captures code ideas as semantic capsules you can regenerate onto today's codebase
 Author: Stepzero Lab
 License-Expression: MIT

{research_git-0.0.2 → research_git-0.0.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "research-git"
-version = "0.0.2"
+version = "0.0.3"
 description = "A memory system that captures code ideas as semantic capsules you can regenerate onto today's codebase"
 readme = "README.md"
 license = "MIT"

{research_git-0.0.2 → research_git-0.0.3/src/research_git.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: research-git
-Version: 0.0.2
+Version: 0.0.3
 Summary: A memory system that captures code ideas as semantic capsules you can regenerate onto today's codebase
 Author: Stepzero Lab
 License-Expression: MIT

research_git-0.0.3/src/rgit/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.0.3"

{research_git-0.0.2 → research_git-0.0.3}/src/rgit/_plugin/.claude-plugin/plugin.json RENAMED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "research-git",
   "description": "A memory system for the code you're exploring: capture each idea as a semantic Feature Capsule, recall it, and regenerate it onto today's codebase. Segmentation/regeneration run on natively-dispatched subagents (your subscription) — no pay-per-use API. MCP serves the graph read-only for sharing.",
-  "version": "0.0.2",
+  "version": "0.0.3",
   "author": { "name": "Stepzero Lab" },
   "license": "MIT",
   "keywords": [

{research_git-0.0.2 → research_git-0.0.3}/src/rgit/astmap.py RENAMED Viewed

@@ -6,8 +6,9 @@ from typing import Optional
 import libcst as cst
 from libcst.metadata import MetadataWrapper, PositionProvider
+from .gitutil import _within, parse_git_diff_header
 _HUNK = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@", re.M)
-_FILE = re.compile(r"^\+\+\+ b/(.+)$", re.M)
 def _read_python_source(path: Path) -> str:
@@ -17,21 +18,61 @@ def _read_python_source(path: Path) -> str:
     return path.read_text(encoding="utf-8-sig")
+def _python_source_path(repo: Path, file: str) -> Optional[Path]:
+    """Repo-contained regular Python file, without following external symlinks."""
+    path = repo / file
+    if path.suffix != ".py" or not _within(repo, path):
+        return None
+    try:
+        return path if path.is_file() else None
+    except OSError:
+        return None
 def _changed_line_ranges(diff: str) -> dict[str, list[tuple[int, int]]]:
-    """file -> list of (start, end) line ranges touched on the new side."""
+    """file -> list of (start, end) ranges of *actually changed* new-side lines.
+    Only added lines — plus the new-side anchor of a deletion — count; unified-diff
+    context lines are walked to advance the new-side line counter but never recorded.
+    Using the whole hunk span (header length) would treat untouched neighbouring
+    symbols that merely appear as context as changed (issue #10).
+    """
     result: dict[str, list[tuple[int, int]]] = {}
     current: Optional[str] = None
+    in_hunk = False
+    new_line = 0
+    hunk_start = 0
     for line in diff.splitlines():
-        m = _FILE.match(line)
-        if m:
-            current = m.group(1)
-            result.setdefault(current, [])
+        matched, path = parse_git_diff_header(line, "+++")
+        if matched:
+            current = path
+            in_hunk = False
+            if current is not None:
+                result.setdefault(current, [])
             continue
         h = _HUNK.match(line)
-        if h and current:
-            start = int(h.group(1))
-            length = int(h.group(2) or "1")
-            result[current].append((start, start + max(length, 1) - 1))
+        if h:
+            new_line = hunk_start = int(h.group(1))
+            in_hunk = current is not None
+            continue
+        if not in_hunk:
+            continue
+        if not line:                      # empty context line
+            new_line += 1
+            continue
+        tag = line[0]
+        if tag == "+":                    # added line -> genuinely changed
+            result[current].append((new_line, new_line))
+            new_line += 1
+        elif tag == "-":                  # deletion -> anchor to the surviving line
+            anchor = new_line - 1 if new_line > hunk_start else new_line
+            result[current].append((anchor, anchor))
+        elif tag == " ":                  # context -> advance, do not record
+            new_line += 1
+        elif tag == "\\":                 # ""
+            continue
+        else:                             # non-body line ends the hunk (e.g. next `diff --git`)
+            in_hunk = False
     return result
@@ -62,12 +103,12 @@ def changed_symbols(diff: str, repo: Path) -> list[dict]:
     """[{file, symbol}] for each top-level def/class overlapping a diff hunk."""
     out: list[dict] = []
     for file, ranges in _changed_line_ranges(diff).items():
-        path = repo / file
-        if not path.suffix == ".py" or not path.exists() or not ranges:
+        path = _python_source_path(repo, file)
+        if path is None or not ranges:
             continue
         try:
             wrapper = MetadataWrapper(cst.parse_module(_read_python_source(path)))
-        except cst.ParserSyntaxError:
+        except (cst.ParserSyntaxError, UnicodeDecodeError):
             continue
         finder = _SymbolFinder(ranges)
         wrapper.visit(finder)
@@ -78,12 +119,12 @@ def changed_symbols(diff: str, repo: Path) -> list[dict]:
 def read_symbol_source(repo: Path, file: str, symbol: str) -> Optional[str]:
     """Current source text of a top-level def/class, or None if absent."""
-    path = repo / file
-    if not path.exists():
+    path = _python_source_path(repo, file)
+    if path is None:
         return None
     try:
         module = cst.parse_module(_read_python_source(path))
-    except cst.ParserSyntaxError:
+    except (cst.ParserSyntaxError, UnicodeDecodeError):
         return None
     for stmt in module.body:
         if isinstance(stmt, (cst.FunctionDef, cst.ClassDef)) and stmt.name.value == symbol:
@@ -93,12 +134,12 @@ def read_symbol_source(repo: Path, file: str, symbol: str) -> Optional[str]:
 def symbol_at_line(repo: Path, file: str, line: int) -> Optional[str]:
     """Name of the top-level def/class enclosing `line` (1-based), or None."""
-    path = repo / file
-    if path.suffix != ".py" or not path.exists():
+    path = _python_source_path(repo, file)
+    if path is None:
         return None
     try:
         wrapper = MetadataWrapper(cst.parse_module(_read_python_source(path)))
-    except cst.ParserSyntaxError:
+    except (cst.ParserSyntaxError, UnicodeDecodeError):
         return None
     finder = _SymbolFinder([(line, line)])
     wrapper.visit(finder)

{research_git-0.0.2 → research_git-0.0.3}/src/rgit/cli.py RENAMED Viewed

@@ -60,6 +60,60 @@ def _now() -> str:
     return datetime.datetime.now().isoformat(timespec="seconds")
+def _brief(text: str, limit: int = 1200) -> str:
+    text = (text or "").strip()
+    if len(text) <= limit:
+        return text
+    return text[-limit:]
+def _run_exit_code(returncode: int) -> int:
+    return returncode if returncode > 0 else 1
+def _diff_text(store: Store, diff_ref: Optional[str]) -> str:
+    return store.objects.get(diff_ref).decode(errors="replace") if diff_ref else ""
+def _skip_notices(diff: str) -> list[str]:
+    return [line for line in diff.splitlines()
+            if line.startswith("research-git: skipped ")]
+def _print_skip_summary(diff: str, indent: str = "") -> None:
+    notices = _skip_notices(diff)
+    if not notices:
+        return
+    print(f"{indent}warning: skipped {len(notices)} file(s); "
+          "run `rgit pending --json` for details")
+def _print_run_result(result, store: Store) -> None:
+    prop_id = result.proposal_id
+    if prop_id is None:
+        print(f"run {result.run_id} recorded; no code changes to capture")
+    else:
+        prop = store.get_proposal(prop_id)
+        print(f"run {result.run_id} recorded; proposal {prop_id} awaiting review")
+        _print_skip_summary(_diff_text(store, prop.diff_ref), indent="  ")
+        if not prop.candidates:
+            print("  note: proposal has 0 candidates; run `rgit pending --json`, "
+                  "then `rgit resegment <proposal_id> --from-json <path>`")
+    if result.metrics:
+        metrics = ", ".join(f"{k}={v}" for k, v in result.metrics.items())
+        print(f"  metrics: {metrics}")
+    if result.returncode != 0:
+        print(f"  command exited with status {result.returncode}")
+        err = _brief(result.stderr)
+        out = _brief(result.stdout)
+        if err:
+            print("  stderr:")
+            print(err)
+        if out:
+            print("  stdout:")
+            print(out)
 def build_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(prog="rgit")
     sub = parser.add_subparsers(dest="cmd", required=True)
@@ -161,7 +215,23 @@ def build_parser() -> argparse.ArgumentParser:
     return parser
+def _force_utf8_stdio() -> None:
+    """Make stdout/stderr UTF-8 so non-ASCII output can't raise UnicodeEncodeError.
+    On Windows the console/pipe defaults to the locale codepage (e.g. cp936),
+    which can't encode glyphs we emit (•, box-drawing, arrows) or arbitrary
+    unicode in capsule names/intents. Kept in its own function so it does not
+    depend on `main`'s local `import sys`.
+    """
+    for stream in (sys.stdout, sys.stderr):
+        try:
+            stream.reconfigure(encoding="utf-8")
+        except (AttributeError, ValueError):
+            pass
 def main(argv: Optional[list[str]] = None) -> int:
+    _force_utf8_stdio()
     parser = build_parser()
     args = parser.parse_args(argv)
@@ -215,6 +285,9 @@ def main(argv: Optional[list[str]] = None) -> int:
     if args.cmd == "run":
         cmd = args.rest[1:] if args.rest and args.rest[0] == "--" else args.rest
+        if not cmd:
+            print("no command provided; use `rgit run -- <command>`")
+            return 1
         active = None
         if args.active:
             # accept repeated --with and comma-separated names/ids; resolve to ids
@@ -224,36 +297,63 @@ def main(argv: Optional[list[str]] = None) -> int:
             except KeyError as e:
                 print(str(e).strip('"'))
                 return 1
-        run_id, prop_id = run_experiment(store, cmd, _segmenter(), now=_now(),
-                                         from_features=args.from_features,
-                                         active=active)
+        result = run_experiment(store, cmd, _segmenter(), now=_now(),
+                                from_features=args.from_features,
+                                active=active)
         if args.refresh_guide_file and args.from_features:
             from pathlib import Path
             guide = Path(args.refresh_guide_file).read_text(encoding="utf-8")
             for src in args.from_features:
                 store.update_capsule(src, resurrection_guide=guide)
-        print(f"run {run_id} recorded; proposal {prop_id} awaiting review")
+        _print_run_result(result, store)
         if args.from_features:
             print(f"  linked as variant_of: {', '.join(args.from_features)}")
-        return 0
+        return 0 if result.returncode == 0 else _run_exit_code(result.returncode)
     if args.cmd == "capture":
         pid = segment_diff(store, args.trigger, _segmenter(), run_id=None, now=_now())
+        if pid is None:
+            print("nothing to capture (working tree has no diff)")
+            return 0
+        prop = store.get_proposal(pid)
         print(f"proposal {pid} created")
+        _print_skip_summary(_diff_text(store, prop.diff_ref))
+        if not prop.candidates:
+            print("note: proposal has 0 candidates; run `rgit pending --json`, "
+                  "then `rgit resegment <proposal_id> --from-json <path>`")
         return 0
     if args.cmd == "review":
         if args.dismiss:
-            dismiss(store, args.dismiss)
+            try:
+                dismiss(store, args.dismiss)
+            except (KeyError, ValueError) as e:
+                print(str(e))
+                return 1
             print(f"dismissed {args.dismiss}")
             return 0
         if args.approve:
-            fid = approve(store, args.approve, args.index, args.name)
+            try:
+                fid = approve(store, args.approve, args.index, args.name)
+            except (KeyError, ValueError) as e:
+                print(str(e))
+                print("hint: inspect with `rgit pending --json`; if there are "
+                      "0 candidates, resegment before approving.")
+                return 1
             print(f"approved -> feature {fid}")
             return 0
-        for p in store.list_proposals("open"):
+        proposals = store.list_proposals("open")
+        if not proposals:
+            print("no pending proposals")
+            return 0
+        for p in proposals:
             names = ", ".join(c["name"] for c in p.candidates)
-            print(f"{p.id}  [{p.trigger}]  candidates: {names}")
+            if names:
+                print(f"{p.id}  [{p.trigger}]  candidates: {names}")
+            else:
+                print(f"{p.id}  [{p.trigger}]  0 candidate(s); "
+                      "resegment before approving")
+            _print_skip_summary(_diff_text(store, p.diff_ref), indent="  ")
         return 0
     if args.cmd == "features":
@@ -289,23 +389,44 @@ def main(argv: Optional[list[str]] = None) -> int:
     if args.cmd == "pending":
         items = []
         for p in store.list_proposals("open"):
-            diff = store.objects.get(p.diff_ref).decode() if p.diff_ref else ""
+            diff = _diff_text(store, p.diff_ref)
             items.append({"proposal_id": p.id, "trigger": p.trigger,
                           "diff": diff, "candidates": p.candidates})
         if args.json:
             print(json.dumps(items, indent=2, ensure_ascii=False))
         else:
+            if not items:
+                print("no pending proposals")
+                return 0
             for it in items:
                 print(f"{it['proposal_id']}  [{it['trigger']}]  "
                       f"{len(it['candidates'])} candidate(s)")
+                _print_skip_summary(it["diff"], indent="  ")
         return 0
     if args.cmd == "resegment":
         import sys
         from pathlib import Path
-        raw = sys.stdin.read() if args.from_json == "-" else Path(args.from_json).read_text(encoding="utf-8")
-        candidates = json.loads(raw)
-        store.set_proposal_candidates(args.proposal_id, candidates)
+        if args.from_json == "-":
+            # Read stdin as bytes and decode UTF-8: the host agent pipes UTF-8
+            # JSON, but sys.stdin.read() would decode with the locale codepage
+            # (cp936 on Windows), corrupting non-ASCII intents/names. Fall back to
+            # sys.stdin.read() when there is no binary buffer (e.g. patched stdin).
+            _buf = getattr(sys.stdin, "buffer", None)
+            raw = _buf.read().decode("utf-8") if _buf is not None else sys.stdin.read()
+        else:
+            raw = Path(args.from_json).read_text(encoding="utf-8")
+        from .curation import validate_candidates
+        try:
+            candidates = json.loads(raw)
+            validate_candidates(candidates)
+            store.set_proposal_candidates(args.proposal_id, candidates)
+        except json.JSONDecodeError as e:
+            print(f"invalid JSON: {e}")
+            return 1
+        except (KeyError, ValueError) as e:
+            print(str(e))
+            return 1
         print(f"resegmented {args.proposal_id}: {len(candidates)} candidate(s)")
         return 0
@@ -314,7 +435,15 @@ def main(argv: Optional[list[str]] = None) -> int:
         if args.once:
             snap = watchmod.snapshot(store)
             _, pid = watchmod.tick(store, snap, _now())
-            print(f"staged proposal {pid}" if pid else "nothing to capture")
+            if pid:
+                prop = store.get_proposal(pid)
+                print(f"staged proposal {pid}")
+                _print_skip_summary(_diff_text(store, prop.diff_ref))
+                if not prop.candidates:
+                    print("note: proposal has 0 candidates; run `rgit pending --json`, "
+                          "then `rgit resegment <proposal_id> --from-json <path>`")
+            else:
+                print("nothing to capture")
             return 0
         watchmod.loop(store, interval=args.interval, idle=args.idle, now_fn=_now)
         return 0
@@ -413,5 +542,6 @@ def _find_root():
     import subprocess
     from pathlib import Path
     out = subprocess.run(["git", "rev-parse", "--show-toplevel"],
-                         capture_output=True, text=True, check=True)
+                         capture_output=True, text=True, check=True,
+                         encoding="utf-8", errors="replace")
     return Path(out.stdout.strip())

research_git-0.0.3/src/rgit/curation.py ADDED Viewed

@@ -0,0 +1,98 @@
+from __future__ import annotations
+from typing import Optional
+from .gitutil import current_commit
+from .store.models import Capsule, CodeSlice
+from .store.store import Store
+def approve(store: Store, proposal_id: str, candidate_index: int = 0,
+            name: Optional[str] = None) -> str:
+    """Turn one candidate into an approved Capsule; link it to the run.
+    When `name` matches a candidate's own name, that candidate is selected by
+    name (and `candidate_index` is ignored). This is the robust path for a
+    proposal with several candidates: `--approve <pid> --name <candidate-name>`
+    always picks the right one, so a forgotten `--index` can't silently approve
+    (and mislabel) the wrong candidate. Otherwise `candidate_index` is used.
+    """
+    prop = store.get_proposal(proposal_id)
+    if prop.status != "open":
+        raise ValueError(
+            f"proposal {proposal_id!r} is {prop.status}, not open; cannot approve "
+            f"(re-approving would create a duplicate capsule)")
+    if not prop.candidates:
+        raise ValueError(f"proposal {proposal_id!r} has no candidates to approve")
+    by_name = [i for i, c in enumerate(prop.candidates) if c.get("name") == name]
+    if name is not None and not by_name:
+        # A typo must fail loudly, not silently approve (and mislabel) candidate 0.
+        available = [c.get("name") for c in prop.candidates]
+        raise ValueError(
+            f"no candidate named {name!r} in proposal {proposal_id!r}; "
+            f"available: {available}")
+    idx = by_name[0] if name is not None else candidate_index
+    if idx < 0 or idx >= len(prop.candidates):
+        raise ValueError(
+            f"candidate index {idx} out of range for proposal {proposal_id!r} "
+            f"with {len(prop.candidates)} candidate(s)")
+    cand = prop.candidates[idx]
+    cap = Capsule(
+        id="", name=name or cand["name"], intent=cand["intent"],
+        status="approved", base_commit=current_commit(store.root),
+        knobs=cand.get("knobs", {}), data_assumptions=cand.get("data_assumptions"),
+        resurrection_guide=cand.get("resurrection_guide"), result_summary=None,
+        payload_hash=None,
+        code_slices=[CodeSlice(**c) for c in cand["code_slices"]])
+    fid = store.add_feature(cap)
+    for slice_ in cap.code_slices:                       # touches edges
+        store.add_edge(fid, f"module:{slice_.file}", "touches")
+    if prop.run_id:                                      # produced edge
+        store.add_edge(fid, prop.run_id, "produced")
+    for src in (prop.from_features or []):               # regenerated from -> variant_of
+        store.add_edge(fid, src, "variant_of")
+    store.set_proposal_status(proposal_id, "resolved")
+    return fid
+def dismiss(store: Store, proposal_id: str) -> None:
+    prop = store.get_proposal(proposal_id)
+    if prop.status != "open":
+        raise ValueError(
+            f"proposal {proposal_id!r} is {prop.status}, not open; cannot dismiss")
+    store.set_proposal_status(proposal_id, "dismissed")
+_CODE_SLICE_FIELDS = {"file", "symbol", "anchor", "code", "kind"}
+def validate_candidates(candidates: object) -> None:
+    """Reject malformed candidate input before it is stored.
+    `resegment` accepts arbitrary JSON from the host agent; without this a
+    missing/extra field only surfaces later as an uncaught KeyError/TypeError in
+    `approve()` or the `review` listing. Raises ValueError with a clear message.
+    An empty list is valid (a deliberate 0-candidate proposal).
+    """
+    if not isinstance(candidates, list):
+        raise ValueError("candidates must be a JSON list of candidate objects")
+    for i, c in enumerate(candidates):
+        where = f"candidate {i}"
+        if not isinstance(c, dict):
+            raise ValueError(f"{where} must be a JSON object")
+        for field in ("name", "intent"):
+            if not isinstance(c.get(field), str) or not c[field].strip():
+                raise ValueError(f"{where} is missing a non-empty {field!r}")
+        slices = c.get("code_slices")
+        if not isinstance(slices, list):
+            raise ValueError(f"{where} must have a 'code_slices' list")
+        for j, s in enumerate(slices):
+            if not isinstance(s, dict):
+                raise ValueError(f"{where} code_slices[{j}] must be a JSON object")
+            missing = _CODE_SLICE_FIELDS - set(s)
+            if missing:
+                raise ValueError(f"{where} code_slices[{j}] missing field(s): "
+                                 f"{', '.join(sorted(missing))}")
+            extra = set(s) - _CODE_SLICE_FIELDS
+            if extra:
+                raise ValueError(f"{where} code_slices[{j}] has unknown field(s): "
+                                 f"{', '.join(sorted(extra))}")

research-git 0.0.2__tar.gz → 0.0.3__tar.gz

research-git 0.0.2tar.gz → 0.0.3tar.gz