research-git 0.0.2__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {research_git-0.0.2/src/research_git.egg-info → research_git-0.0.3}/PKG-INFO +1 -1
  2. {research_git-0.0.2 → research_git-0.0.3}/pyproject.toml +1 -1
  3. {research_git-0.0.2 → research_git-0.0.3/src/research_git.egg-info}/PKG-INFO +1 -1
  4. research_git-0.0.3/src/rgit/__init__.py +1 -0
  5. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/_plugin/.claude-plugin/plugin.json +1 -1
  6. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/astmap.py +60 -19
  7. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/cli.py +145 -15
  8. research_git-0.0.3/src/rgit/curation.py +98 -0
  9. research_git-0.0.3/src/rgit/gitutil.py +444 -0
  10. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/hooks.py +5 -3
  11. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/installer.py +8 -2
  12. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/provenance.py +21 -5
  13. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/runner.py +37 -11
  14. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/segmenter.py +14 -5
  15. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/store/db.py +0 -2
  16. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/store/store.py +8 -3
  17. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/toggles.py +8 -4
  18. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/watch.py +2 -2
  19. research_git-0.0.3/tests/test_astmap.py +167 -0
  20. {research_git-0.0.2 → research_git-0.0.3}/tests/test_cli.py +139 -3
  21. {research_git-0.0.2 → research_git-0.0.3}/tests/test_compare.py +18 -0
  22. {research_git-0.0.2 → research_git-0.0.3}/tests/test_curation.py +51 -0
  23. research_git-0.0.3/tests/test_gitutil.py +392 -0
  24. {research_git-0.0.2 → research_git-0.0.3}/tests/test_hooks.py +12 -0
  25. {research_git-0.0.2 → research_git-0.0.3}/tests/test_installer.py +26 -0
  26. {research_git-0.0.2 → research_git-0.0.3}/tests/test_runner.py +28 -4
  27. research_git-0.0.3/tests/test_segmenter.py +197 -0
  28. {research_git-0.0.2 → research_git-0.0.3}/tests/test_store.py +14 -0
  29. {research_git-0.0.2 → research_git-0.0.3}/tests/test_toggles.py +44 -0
  30. research_git-0.0.3/tests/test_watch.py +76 -0
  31. research_git-0.0.2/src/rgit/__init__.py +0 -1
  32. research_git-0.0.2/src/rgit/curation.py +0 -48
  33. research_git-0.0.2/src/rgit/gitutil.py +0 -112
  34. research_git-0.0.2/tests/test_astmap.py +0 -57
  35. research_git-0.0.2/tests/test_gitutil.py +0 -96
  36. research_git-0.0.2/tests/test_segmenter.py +0 -71
  37. research_git-0.0.2/tests/test_watch.py +0 -37
  38. {research_git-0.0.2 → research_git-0.0.3}/LICENSE +0 -0
  39. {research_git-0.0.2 → research_git-0.0.3}/README.md +0 -0
  40. {research_git-0.0.2 → research_git-0.0.3}/setup.cfg +0 -0
  41. {research_git-0.0.2 → research_git-0.0.3}/src/research_git.egg-info/SOURCES.txt +0 -0
  42. {research_git-0.0.2 → research_git-0.0.3}/src/research_git.egg-info/dependency_links.txt +0 -0
  43. {research_git-0.0.2 → research_git-0.0.3}/src/research_git.egg-info/entry_points.txt +0 -0
  44. {research_git-0.0.2 → research_git-0.0.3}/src/research_git.egg-info/requires.txt +0 -0
  45. {research_git-0.0.2 → research_git-0.0.3}/src/research_git.egg-info/top_level.txt +0 -0
  46. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/_plugin/.claude-plugin/marketplace.json +0 -0
  47. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/_plugin/agents/capsule-regenerator.md +0 -0
  48. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/_plugin/agents/capsule-segmenter.md +0 -0
  49. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/_plugin/agents/edge-judge.md +0 -0
  50. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/_plugin/skills/rgit-capture/SKILL.md +0 -0
  51. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/_plugin/skills/rgit-recall/SKILL.md +0 -0
  52. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/ablation.py +0 -0
  53. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/agent_guidance.py +0 -0
  54. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/agent_platforms.py +0 -0
  55. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/compare.py +0 -0
  56. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/compose.py +0 -0
  57. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/edges.py +0 -0
  58. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/graphview.py +0 -0
  59. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/mcp_server.py +0 -0
  60. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/metricdir.py +0 -0
  61. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/metrics.py +0 -0
  62. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/ranking.py +0 -0
  63. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/recall.py +0 -0
  64. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/store/__init__.py +0 -0
  65. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/store/ids.py +0 -0
  66. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/store/models.py +0 -0
  67. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/store/objects.py +0 -0
  68. {research_git-0.0.2 → research_git-0.0.3}/src/rgit/tables.py +0 -0
  69. {research_git-0.0.2 → research_git-0.0.3}/tests/test_ablation.py +0 -0
  70. {research_git-0.0.2 → research_git-0.0.3}/tests/test_active_edges.py +0 -0
  71. {research_git-0.0.2 → research_git-0.0.3}/tests/test_agent_guidance.py +0 -0
  72. {research_git-0.0.2 → research_git-0.0.3}/tests/test_compose.py +0 -0
  73. {research_git-0.0.2 → research_git-0.0.3}/tests/test_db.py +0 -0
  74. {research_git-0.0.2 → research_git-0.0.3}/tests/test_e2e.py +0 -0
  75. {research_git-0.0.2 → research_git-0.0.3}/tests/test_edges.py +0 -0
  76. {research_git-0.0.2 → research_git-0.0.3}/tests/test_graphview.py +0 -0
  77. {research_git-0.0.2 → research_git-0.0.3}/tests/test_guidance_coupling.py +0 -0
  78. {research_git-0.0.2 → research_git-0.0.3}/tests/test_mcp_server.py +0 -0
  79. {research_git-0.0.2 → research_git-0.0.3}/tests/test_metricdir.py +0 -0
  80. {research_git-0.0.2 → research_git-0.0.3}/tests/test_metricdir_store.py +0 -0
  81. {research_git-0.0.2 → research_git-0.0.3}/tests/test_metrics.py +0 -0
  82. {research_git-0.0.2 → research_git-0.0.3}/tests/test_models.py +0 -0
  83. {research_git-0.0.2 → research_git-0.0.3}/tests/test_objects.py +0 -0
  84. {research_git-0.0.2 → research_git-0.0.3}/tests/test_provenance.py +0 -0
  85. {research_git-0.0.2 → research_git-0.0.3}/tests/test_ranking.py +0 -0
  86. {research_git-0.0.2 → research_git-0.0.3}/tests/test_recall.py +0 -0
  87. {research_git-0.0.2 → research_git-0.0.3}/tests/test_review_fixes.py +0 -0
  88. {research_git-0.0.2 → research_git-0.0.3}/tests/test_tables.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: research-git
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: A memory system that captures code ideas as semantic capsules you can regenerate onto today's codebase
5
5
  Author: Stepzero Lab
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "research-git"
3
- version = "0.0.2"
3
+ version = "0.0.3"
4
4
  description = "A memory system that captures code ideas as semantic capsules you can regenerate onto today's codebase"
5
5
  readme = "README.md"
6
6
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: research-git
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: A memory system that captures code ideas as semantic capsules you can regenerate onto today's codebase
5
5
  Author: Stepzero Lab
6
6
  License-Expression: MIT
@@ -0,0 +1 @@
1
+ __version__ = "0.0.3"
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "research-git",
3
3
  "description": "A memory system for the code you're exploring: capture each idea as a semantic Feature Capsule, recall it, and regenerate it onto today's codebase. Segmentation/regeneration run on natively-dispatched subagents (your subscription) — no pay-per-use API. MCP serves the graph read-only for sharing.",
4
- "version": "0.0.2",
4
+ "version": "0.0.3",
5
5
  "author": { "name": "Stepzero Lab" },
6
6
  "license": "MIT",
7
7
  "keywords": [
@@ -6,8 +6,9 @@ from typing import Optional
6
6
  import libcst as cst
7
7
  from libcst.metadata import MetadataWrapper, PositionProvider
8
8
 
9
+ from .gitutil import _within, parse_git_diff_header
10
+
9
11
  _HUNK = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@", re.M)
10
- _FILE = re.compile(r"^\+\+\+ b/(.+)$", re.M)
11
12
 
12
13
 
13
14
  def _read_python_source(path: Path) -> str:
@@ -17,21 +18,61 @@ def _read_python_source(path: Path) -> str:
17
18
  return path.read_text(encoding="utf-8-sig")
18
19
 
19
20
 
21
+ def _python_source_path(repo: Path, file: str) -> Optional[Path]:
22
+ """Repo-contained regular Python file, without following external symlinks."""
23
+ path = repo / file
24
+ if path.suffix != ".py" or not _within(repo, path):
25
+ return None
26
+ try:
27
+ return path if path.is_file() else None
28
+ except OSError:
29
+ return None
30
+
31
+
20
32
  def _changed_line_ranges(diff: str) -> dict[str, list[tuple[int, int]]]:
21
- """file -> list of (start, end) line ranges touched on the new side."""
33
+ """file -> list of (start, end) ranges of *actually changed* new-side lines.
34
+
35
+ Only added lines — plus the new-side anchor of a deletion — count; unified-diff
36
+ context lines are walked to advance the new-side line counter but never recorded.
37
+ Using the whole hunk span (header length) would treat untouched neighbouring
38
+ symbols that merely appear as context as changed (issue #10).
39
+ """
22
40
  result: dict[str, list[tuple[int, int]]] = {}
23
41
  current: Optional[str] = None
42
+ in_hunk = False
43
+ new_line = 0
44
+ hunk_start = 0
24
45
  for line in diff.splitlines():
25
- m = _FILE.match(line)
26
- if m:
27
- current = m.group(1)
28
- result.setdefault(current, [])
46
+ matched, path = parse_git_diff_header(line, "+++")
47
+ if matched:
48
+ current = path
49
+ in_hunk = False
50
+ if current is not None:
51
+ result.setdefault(current, [])
29
52
  continue
30
53
  h = _HUNK.match(line)
31
- if h and current:
32
- start = int(h.group(1))
33
- length = int(h.group(2) or "1")
34
- result[current].append((start, start + max(length, 1) - 1))
54
+ if h:
55
+ new_line = hunk_start = int(h.group(1))
56
+ in_hunk = current is not None
57
+ continue
58
+ if not in_hunk:
59
+ continue
60
+ if not line: # empty context line
61
+ new_line += 1
62
+ continue
63
+ tag = line[0]
64
+ if tag == "+": # added line -> genuinely changed
65
+ result[current].append((new_line, new_line))
66
+ new_line += 1
67
+ elif tag == "-": # deletion -> anchor to the surviving line
68
+ anchor = new_line - 1 if new_line > hunk_start else new_line
69
+ result[current].append((anchor, anchor))
70
+ elif tag == " ": # context -> advance, do not record
71
+ new_line += 1
72
+ elif tag == "\\": # ""
73
+ continue
74
+ else: # non-body line ends the hunk (e.g. next `diff --git`)
75
+ in_hunk = False
35
76
  return result
36
77
 
37
78
 
@@ -62,12 +103,12 @@ def changed_symbols(diff: str, repo: Path) -> list[dict]:
62
103
  """[{file, symbol}] for each top-level def/class overlapping a diff hunk."""
63
104
  out: list[dict] = []
64
105
  for file, ranges in _changed_line_ranges(diff).items():
65
- path = repo / file
66
- if not path.suffix == ".py" or not path.exists() or not ranges:
106
+ path = _python_source_path(repo, file)
107
+ if path is None or not ranges:
67
108
  continue
68
109
  try:
69
110
  wrapper = MetadataWrapper(cst.parse_module(_read_python_source(path)))
70
- except cst.ParserSyntaxError:
111
+ except (cst.ParserSyntaxError, UnicodeDecodeError):
71
112
  continue
72
113
  finder = _SymbolFinder(ranges)
73
114
  wrapper.visit(finder)
@@ -78,12 +119,12 @@ def changed_symbols(diff: str, repo: Path) -> list[dict]:
78
119
 
79
120
  def read_symbol_source(repo: Path, file: str, symbol: str) -> Optional[str]:
80
121
  """Current source text of a top-level def/class, or None if absent."""
81
- path = repo / file
82
- if not path.exists():
122
+ path = _python_source_path(repo, file)
123
+ if path is None:
83
124
  return None
84
125
  try:
85
126
  module = cst.parse_module(_read_python_source(path))
86
- except cst.ParserSyntaxError:
127
+ except (cst.ParserSyntaxError, UnicodeDecodeError):
87
128
  return None
88
129
  for stmt in module.body:
89
130
  if isinstance(stmt, (cst.FunctionDef, cst.ClassDef)) and stmt.name.value == symbol:
@@ -93,12 +134,12 @@ def read_symbol_source(repo: Path, file: str, symbol: str) -> Optional[str]:
93
134
 
94
135
  def symbol_at_line(repo: Path, file: str, line: int) -> Optional[str]:
95
136
  """Name of the top-level def/class enclosing `line` (1-based), or None."""
96
- path = repo / file
97
- if path.suffix != ".py" or not path.exists():
137
+ path = _python_source_path(repo, file)
138
+ if path is None:
98
139
  return None
99
140
  try:
100
141
  wrapper = MetadataWrapper(cst.parse_module(_read_python_source(path)))
101
- except cst.ParserSyntaxError:
142
+ except (cst.ParserSyntaxError, UnicodeDecodeError):
102
143
  return None
103
144
  finder = _SymbolFinder([(line, line)])
104
145
  wrapper.visit(finder)
@@ -60,6 +60,60 @@ def _now() -> str:
60
60
  return datetime.datetime.now().isoformat(timespec="seconds")
61
61
 
62
62
 
63
+ def _brief(text: str, limit: int = 1200) -> str:
64
+ text = (text or "").strip()
65
+ if len(text) <= limit:
66
+ return text
67
+ return text[-limit:]
68
+
69
+
70
+ def _run_exit_code(returncode: int) -> int:
71
+ return returncode if returncode > 0 else 1
72
+
73
+
74
+ def _diff_text(store: Store, diff_ref: Optional[str]) -> str:
75
+ return store.objects.get(diff_ref).decode(errors="replace") if diff_ref else ""
76
+
77
+
78
+ def _skip_notices(diff: str) -> list[str]:
79
+ return [line for line in diff.splitlines()
80
+ if line.startswith("research-git: skipped ")]
81
+
82
+
83
+ def _print_skip_summary(diff: str, indent: str = "") -> None:
84
+ notices = _skip_notices(diff)
85
+ if not notices:
86
+ return
87
+ print(f"{indent}warning: skipped {len(notices)} file(s); "
88
+ "run `rgit pending --json` for details")
89
+
90
+
91
+ def _print_run_result(result, store: Store) -> None:
92
+ prop_id = result.proposal_id
93
+ if prop_id is None:
94
+ print(f"run {result.run_id} recorded; no code changes to capture")
95
+ else:
96
+ prop = store.get_proposal(prop_id)
97
+ print(f"run {result.run_id} recorded; proposal {prop_id} awaiting review")
98
+ _print_skip_summary(_diff_text(store, prop.diff_ref), indent=" ")
99
+ if not prop.candidates:
100
+ print(" note: proposal has 0 candidates; run `rgit pending --json`, "
101
+ "then `rgit resegment <proposal_id> --from-json <path>`")
102
+ if result.metrics:
103
+ metrics = ", ".join(f"{k}={v}" for k, v in result.metrics.items())
104
+ print(f" metrics: {metrics}")
105
+ if result.returncode != 0:
106
+ print(f" command exited with status {result.returncode}")
107
+ err = _brief(result.stderr)
108
+ out = _brief(result.stdout)
109
+ if err:
110
+ print(" stderr:")
111
+ print(err)
112
+ if out:
113
+ print(" stdout:")
114
+ print(out)
115
+
116
+
63
117
  def build_parser() -> argparse.ArgumentParser:
64
118
  parser = argparse.ArgumentParser(prog="rgit")
65
119
  sub = parser.add_subparsers(dest="cmd", required=True)
@@ -161,7 +215,23 @@ def build_parser() -> argparse.ArgumentParser:
161
215
  return parser
162
216
 
163
217
 
218
+ def _force_utf8_stdio() -> None:
219
+ """Make stdout/stderr UTF-8 so non-ASCII output can't raise UnicodeEncodeError.
220
+
221
+ On Windows the console/pipe defaults to the locale codepage (e.g. cp936),
222
+ which can't encode glyphs we emit (•, box-drawing, arrows) or arbitrary
223
+ unicode in capsule names/intents. Kept in its own function so it does not
224
+ depend on `main`'s local `import sys`.
225
+ """
226
+ for stream in (sys.stdout, sys.stderr):
227
+ try:
228
+ stream.reconfigure(encoding="utf-8")
229
+ except (AttributeError, ValueError):
230
+ pass
231
+
232
+
164
233
  def main(argv: Optional[list[str]] = None) -> int:
234
+ _force_utf8_stdio()
165
235
  parser = build_parser()
166
236
  args = parser.parse_args(argv)
167
237
 
@@ -215,6 +285,9 @@ def main(argv: Optional[list[str]] = None) -> int:
215
285
 
216
286
  if args.cmd == "run":
217
287
  cmd = args.rest[1:] if args.rest and args.rest[0] == "--" else args.rest
288
+ if not cmd:
289
+ print("no command provided; use `rgit run -- <command>`")
290
+ return 1
218
291
  active = None
219
292
  if args.active:
220
293
  # accept repeated --with and comma-separated names/ids; resolve to ids
@@ -224,36 +297,63 @@ def main(argv: Optional[list[str]] = None) -> int:
224
297
  except KeyError as e:
225
298
  print(str(e).strip('"'))
226
299
  return 1
227
- run_id, prop_id = run_experiment(store, cmd, _segmenter(), now=_now(),
228
- from_features=args.from_features,
229
- active=active)
300
+ result = run_experiment(store, cmd, _segmenter(), now=_now(),
301
+ from_features=args.from_features,
302
+ active=active)
230
303
  if args.refresh_guide_file and args.from_features:
231
304
  from pathlib import Path
232
305
  guide = Path(args.refresh_guide_file).read_text(encoding="utf-8")
233
306
  for src in args.from_features:
234
307
  store.update_capsule(src, resurrection_guide=guide)
235
- print(f"run {run_id} recorded; proposal {prop_id} awaiting review")
308
+ _print_run_result(result, store)
236
309
  if args.from_features:
237
310
  print(f" linked as variant_of: {', '.join(args.from_features)}")
238
- return 0
311
+ return 0 if result.returncode == 0 else _run_exit_code(result.returncode)
239
312
 
240
313
  if args.cmd == "capture":
241
314
  pid = segment_diff(store, args.trigger, _segmenter(), run_id=None, now=_now())
315
+ if pid is None:
316
+ print("nothing to capture (working tree has no diff)")
317
+ return 0
318
+ prop = store.get_proposal(pid)
242
319
  print(f"proposal {pid} created")
320
+ _print_skip_summary(_diff_text(store, prop.diff_ref))
321
+ if not prop.candidates:
322
+ print("note: proposal has 0 candidates; run `rgit pending --json`, "
323
+ "then `rgit resegment <proposal_id> --from-json <path>`")
243
324
  return 0
244
325
 
245
326
  if args.cmd == "review":
246
327
  if args.dismiss:
247
- dismiss(store, args.dismiss)
328
+ try:
329
+ dismiss(store, args.dismiss)
330
+ except (KeyError, ValueError) as e:
331
+ print(str(e))
332
+ return 1
248
333
  print(f"dismissed {args.dismiss}")
249
334
  return 0
250
335
  if args.approve:
251
- fid = approve(store, args.approve, args.index, args.name)
336
+ try:
337
+ fid = approve(store, args.approve, args.index, args.name)
338
+ except (KeyError, ValueError) as e:
339
+ print(str(e))
340
+ print("hint: inspect with `rgit pending --json`; if there are "
341
+ "0 candidates, resegment before approving.")
342
+ return 1
252
343
  print(f"approved -> feature {fid}")
253
344
  return 0
254
- for p in store.list_proposals("open"):
345
+ proposals = store.list_proposals("open")
346
+ if not proposals:
347
+ print("no pending proposals")
348
+ return 0
349
+ for p in proposals:
255
350
  names = ", ".join(c["name"] for c in p.candidates)
256
- print(f"{p.id} [{p.trigger}] candidates: {names}")
351
+ if names:
352
+ print(f"{p.id} [{p.trigger}] candidates: {names}")
353
+ else:
354
+ print(f"{p.id} [{p.trigger}] 0 candidate(s); "
355
+ "resegment before approving")
356
+ _print_skip_summary(_diff_text(store, p.diff_ref), indent=" ")
257
357
  return 0
258
358
 
259
359
  if args.cmd == "features":
@@ -289,23 +389,44 @@ def main(argv: Optional[list[str]] = None) -> int:
289
389
  if args.cmd == "pending":
290
390
  items = []
291
391
  for p in store.list_proposals("open"):
292
- diff = store.objects.get(p.diff_ref).decode() if p.diff_ref else ""
392
+ diff = _diff_text(store, p.diff_ref)
293
393
  items.append({"proposal_id": p.id, "trigger": p.trigger,
294
394
  "diff": diff, "candidates": p.candidates})
295
395
  if args.json:
296
396
  print(json.dumps(items, indent=2, ensure_ascii=False))
297
397
  else:
398
+ if not items:
399
+ print("no pending proposals")
400
+ return 0
298
401
  for it in items:
299
402
  print(f"{it['proposal_id']} [{it['trigger']}] "
300
403
  f"{len(it['candidates'])} candidate(s)")
404
+ _print_skip_summary(it["diff"], indent=" ")
301
405
  return 0
302
406
 
303
407
  if args.cmd == "resegment":
304
408
  import sys
305
409
  from pathlib import Path
306
- raw = sys.stdin.read() if args.from_json == "-" else Path(args.from_json).read_text(encoding="utf-8")
307
- candidates = json.loads(raw)
308
- store.set_proposal_candidates(args.proposal_id, candidates)
410
+ if args.from_json == "-":
411
+ # Read stdin as bytes and decode UTF-8: the host agent pipes UTF-8
412
+ # JSON, but sys.stdin.read() would decode with the locale codepage
413
+ # (cp936 on Windows), corrupting non-ASCII intents/names. Fall back to
414
+ # sys.stdin.read() when there is no binary buffer (e.g. patched stdin).
415
+ _buf = getattr(sys.stdin, "buffer", None)
416
+ raw = _buf.read().decode("utf-8") if _buf is not None else sys.stdin.read()
417
+ else:
418
+ raw = Path(args.from_json).read_text(encoding="utf-8")
419
+ from .curation import validate_candidates
420
+ try:
421
+ candidates = json.loads(raw)
422
+ validate_candidates(candidates)
423
+ store.set_proposal_candidates(args.proposal_id, candidates)
424
+ except json.JSONDecodeError as e:
425
+ print(f"invalid JSON: {e}")
426
+ return 1
427
+ except (KeyError, ValueError) as e:
428
+ print(str(e))
429
+ return 1
309
430
  print(f"resegmented {args.proposal_id}: {len(candidates)} candidate(s)")
310
431
  return 0
311
432
 
@@ -314,7 +435,15 @@ def main(argv: Optional[list[str]] = None) -> int:
314
435
  if args.once:
315
436
  snap = watchmod.snapshot(store)
316
437
  _, pid = watchmod.tick(store, snap, _now())
317
- print(f"staged proposal {pid}" if pid else "nothing to capture")
438
+ if pid:
439
+ prop = store.get_proposal(pid)
440
+ print(f"staged proposal {pid}")
441
+ _print_skip_summary(_diff_text(store, prop.diff_ref))
442
+ if not prop.candidates:
443
+ print("note: proposal has 0 candidates; run `rgit pending --json`, "
444
+ "then `rgit resegment <proposal_id> --from-json <path>`")
445
+ else:
446
+ print("nothing to capture")
318
447
  return 0
319
448
  watchmod.loop(store, interval=args.interval, idle=args.idle, now_fn=_now)
320
449
  return 0
@@ -413,5 +542,6 @@ def _find_root():
413
542
  import subprocess
414
543
  from pathlib import Path
415
544
  out = subprocess.run(["git", "rev-parse", "--show-toplevel"],
416
- capture_output=True, text=True, check=True)
545
+ capture_output=True, text=True, check=True,
546
+ encoding="utf-8", errors="replace")
417
547
  return Path(out.stdout.strip())
@@ -0,0 +1,98 @@
1
+ from __future__ import annotations
2
+ from typing import Optional
3
+
4
+ from .gitutil import current_commit
5
+ from .store.models import Capsule, CodeSlice
6
+ from .store.store import Store
7
+
8
+
9
+ def approve(store: Store, proposal_id: str, candidate_index: int = 0,
10
+ name: Optional[str] = None) -> str:
11
+ """Turn one candidate into an approved Capsule; link it to the run.
12
+
13
+ When `name` matches a candidate's own name, that candidate is selected by
14
+ name (and `candidate_index` is ignored). This is the robust path for a
15
+ proposal with several candidates: `--approve <pid> --name <candidate-name>`
16
+ always picks the right one, so a forgotten `--index` can't silently approve
17
+ (and mislabel) the wrong candidate. Otherwise `candidate_index` is used.
18
+ """
19
+ prop = store.get_proposal(proposal_id)
20
+ if prop.status != "open":
21
+ raise ValueError(
22
+ f"proposal {proposal_id!r} is {prop.status}, not open; cannot approve "
23
+ f"(re-approving would create a duplicate capsule)")
24
+ if not prop.candidates:
25
+ raise ValueError(f"proposal {proposal_id!r} has no candidates to approve")
26
+ by_name = [i for i, c in enumerate(prop.candidates) if c.get("name") == name]
27
+ if name is not None and not by_name:
28
+ # A typo must fail loudly, not silently approve (and mislabel) candidate 0.
29
+ available = [c.get("name") for c in prop.candidates]
30
+ raise ValueError(
31
+ f"no candidate named {name!r} in proposal {proposal_id!r}; "
32
+ f"available: {available}")
33
+ idx = by_name[0] if name is not None else candidate_index
34
+ if idx < 0 or idx >= len(prop.candidates):
35
+ raise ValueError(
36
+ f"candidate index {idx} out of range for proposal {proposal_id!r} "
37
+ f"with {len(prop.candidates)} candidate(s)")
38
+ cand = prop.candidates[idx]
39
+ cap = Capsule(
40
+ id="", name=name or cand["name"], intent=cand["intent"],
41
+ status="approved", base_commit=current_commit(store.root),
42
+ knobs=cand.get("knobs", {}), data_assumptions=cand.get("data_assumptions"),
43
+ resurrection_guide=cand.get("resurrection_guide"), result_summary=None,
44
+ payload_hash=None,
45
+ code_slices=[CodeSlice(**c) for c in cand["code_slices"]])
46
+ fid = store.add_feature(cap)
47
+ for slice_ in cap.code_slices: # touches edges
48
+ store.add_edge(fid, f"module:{slice_.file}", "touches")
49
+ if prop.run_id: # produced edge
50
+ store.add_edge(fid, prop.run_id, "produced")
51
+ for src in (prop.from_features or []): # regenerated from -> variant_of
52
+ store.add_edge(fid, src, "variant_of")
53
+ store.set_proposal_status(proposal_id, "resolved")
54
+ return fid
55
+
56
+
57
+ def dismiss(store: Store, proposal_id: str) -> None:
58
+ prop = store.get_proposal(proposal_id)
59
+ if prop.status != "open":
60
+ raise ValueError(
61
+ f"proposal {proposal_id!r} is {prop.status}, not open; cannot dismiss")
62
+ store.set_proposal_status(proposal_id, "dismissed")
63
+
64
+
65
+ _CODE_SLICE_FIELDS = {"file", "symbol", "anchor", "code", "kind"}
66
+
67
+
68
+ def validate_candidates(candidates: object) -> None:
69
+ """Reject malformed candidate input before it is stored.
70
+
71
+ `resegment` accepts arbitrary JSON from the host agent; without this a
72
+ missing/extra field only surfaces later as an uncaught KeyError/TypeError in
73
+ `approve()` or the `review` listing. Raises ValueError with a clear message.
74
+ An empty list is valid (a deliberate 0-candidate proposal).
75
+ """
76
+ if not isinstance(candidates, list):
77
+ raise ValueError("candidates must be a JSON list of candidate objects")
78
+ for i, c in enumerate(candidates):
79
+ where = f"candidate {i}"
80
+ if not isinstance(c, dict):
81
+ raise ValueError(f"{where} must be a JSON object")
82
+ for field in ("name", "intent"):
83
+ if not isinstance(c.get(field), str) or not c[field].strip():
84
+ raise ValueError(f"{where} is missing a non-empty {field!r}")
85
+ slices = c.get("code_slices")
86
+ if not isinstance(slices, list):
87
+ raise ValueError(f"{where} must have a 'code_slices' list")
88
+ for j, s in enumerate(slices):
89
+ if not isinstance(s, dict):
90
+ raise ValueError(f"{where} code_slices[{j}] must be a JSON object")
91
+ missing = _CODE_SLICE_FIELDS - set(s)
92
+ if missing:
93
+ raise ValueError(f"{where} code_slices[{j}] missing field(s): "
94
+ f"{', '.join(sorted(missing))}")
95
+ extra = set(s) - _CODE_SLICE_FIELDS
96
+ if extra:
97
+ raise ValueError(f"{where} code_slices[{j}] has unknown field(s): "
98
+ f"{', '.join(sorted(extra))}")