ai-push-hooks 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,308 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import pathlib
6
+ import re
7
+ import shutil
8
+ from dataclasses import dataclass
9
+ from typing import Any
10
+
11
+ from ..types import HookError, RuntimeContext, StepConfig
12
+ from .exec import ensure_dir, extract_pr_url, resolve_storage_path, run_command
13
+
14
+
15
+ @dataclass
16
+ class OpenCodeRunResult:
17
+ output_text: str
18
+ session_id: str | None
19
+ stdout: str
20
+ stderr: str
21
+ return_code: int
22
+
23
+
24
+ def sanitize_filename_component(value: str) -> str:
25
+ cleaned = re.sub(r"[^A-Za-z0-9._-]+", "-", value.strip())
26
+ return cleaned.strip("-") or "value"
27
+
28
+
29
+ def prefer_opencode_cli_candidate(candidate: str) -> str:
30
+ path = pathlib.Path(candidate)
31
+ if path.name != "opencode":
32
+ return candidate
33
+ sibling = path.with_name("opencode-cli")
34
+ if sibling.exists() and os.access(sibling, os.X_OK):
35
+ return str(sibling)
36
+ return candidate
37
+
38
+
39
+ def resolve_opencode_executable() -> str:
40
+ cli_path = shutil.which("opencode-cli")
41
+ if cli_path:
42
+ return cli_path
43
+ opencode_path = shutil.which("opencode")
44
+ if opencode_path:
45
+ return prefer_opencode_cli_candidate(opencode_path)
46
+ raise HookError("opencode is required but not installed")
47
+
48
+
49
+ def parse_opencode_json_run_output(raw: str) -> tuple[str | None, str]:
50
+ session_id: str | None = None
51
+ parts: list[str] = []
52
+ for line in raw.splitlines():
53
+ payload = line.strip()
54
+ if not payload:
55
+ continue
56
+ try:
57
+ event = json.loads(payload)
58
+ except json.JSONDecodeError:
59
+ continue
60
+ if not isinstance(event, dict):
61
+ continue
62
+ if session_id is None and isinstance(event.get("sessionID"), str):
63
+ session_id = str(event["sessionID"]).strip()
64
+ if event.get("type") != "text":
65
+ continue
66
+ part = event.get("part")
67
+ if isinstance(part, dict) and isinstance(part.get("text"), str) and part["text"].strip():
68
+ parts.append(part["text"])
69
+ return session_id, "\n".join(parts).strip()
70
+
71
+
72
+ def extract_json_array(text: str) -> list[Any]:
73
+ start = text.find("[")
74
+ end = text.rfind("]")
75
+ if start < 0 or end < start:
76
+ raise HookError("Could not find JSON array in model output")
77
+ try:
78
+ payload = json.loads(text[start : end + 1])
79
+ except json.JSONDecodeError as exc:
80
+ raise HookError(f"Failed to parse JSON array from model output: {exc}") from exc
81
+ if not isinstance(payload, list):
82
+ raise HookError("Model output JSON is not an array")
83
+ return payload
84
+
85
+
86
+ def extract_json_object(text: str) -> dict[str, Any]:
87
+ start = text.find("{")
88
+ end = text.rfind("}")
89
+ if start < 0 or end < start:
90
+ raise HookError("Could not find JSON object in model output")
91
+ try:
92
+ payload = json.loads(text[start : end + 1])
93
+ except json.JSONDecodeError as exc:
94
+ raise HookError(f"Failed to parse JSON object from model output: {exc}") from exc
95
+ if not isinstance(payload, dict):
96
+ raise HookError("Model output JSON is not an object")
97
+ return payload
98
+
99
+
100
+ def validate_schema(schema: str | None, payload: Any) -> Any:
101
+ if schema is None:
102
+ return payload
103
+ if schema == "string_array":
104
+ if not isinstance(payload, list) or not all(isinstance(item, str) for item in payload):
105
+ raise HookError("Expected schema string_array")
106
+ return payload
107
+ if schema == "docs_issue_array":
108
+ if not isinstance(payload, list):
109
+ raise HookError("Expected schema docs_issue_array")
110
+ for item in payload:
111
+ if not isinstance(item, dict):
112
+ raise HookError("docs_issue_array items must be objects")
113
+ if not str(item.get("file", "")).strip() or not str(item.get("description", "")).strip():
114
+ raise HookError("docs_issue_array items require file and description")
115
+ return payload
116
+ if schema == "beads_alignment_result":
117
+ if not isinstance(payload, dict):
118
+ raise HookError("Expected schema beads_alignment_result")
119
+ commands = payload.get("commands", [])
120
+ if commands is not None and (
121
+ not isinstance(commands, list) or not all(isinstance(item, str) for item in commands)
122
+ ):
123
+ raise HookError("beads_alignment_result.commands must be an array of strings")
124
+ return payload
125
+ if schema == "pr_create_payload":
126
+ if not isinstance(payload, dict):
127
+ raise HookError("Expected schema pr_create_payload")
128
+ return payload
129
+ raise HookError(f"Unsupported schema: {schema}")
130
+
131
+
132
+ def _transcript_dir(context: RuntimeContext) -> pathlib.Path | None:
133
+ if not context.config.logging.capture_llm_transcript:
134
+ return None
135
+ return ensure_dir(
136
+ resolve_storage_path(
137
+ context.repo_root,
138
+ context.git_dir,
139
+ context.config.logging.transcript_dir,
140
+ )
141
+ )
142
+
143
+
144
+ def export_opencode_session_json(
145
+ context: RuntimeContext,
146
+ session_id: str,
147
+ export_path: pathlib.Path,
148
+ ) -> bool:
149
+ completed = run_command(
150
+ [context.opencode_executable or resolve_opencode_executable(), "export", session_id],
151
+ cwd=context.repo_root,
152
+ timeout=context.config.llm.timeout_seconds,
153
+ check=False,
154
+ )
155
+ if completed.returncode != 0:
156
+ return False
157
+ payload = (completed.stdout or "").strip()
158
+ if not payload:
159
+ return False
160
+ export_path.write_text(payload + "\n", encoding="utf-8")
161
+ return True
162
+
163
+
164
+ def delete_opencode_session(context: RuntimeContext, session_id: str) -> None:
165
+ run_command(
166
+ [context.opencode_executable or resolve_opencode_executable(), "session", "delete", session_id],
167
+ cwd=context.repo_root,
168
+ timeout=context.config.llm.timeout_seconds,
169
+ check=False,
170
+ )
171
+
172
+
173
+ def finalize_opencode_session(context: RuntimeContext, stage_name: str, session_id: str | None) -> None:
174
+ if not session_id:
175
+ return
176
+ transcript_dir = _transcript_dir(context)
177
+ if transcript_dir is not None:
178
+ export_name = (
179
+ f"{sanitize_filename_component(context.run_id)}-"
180
+ f"{sanitize_filename_component(stage_name)}-"
181
+ f"{sanitize_filename_component(session_id)}.json"
182
+ )
183
+ export_opencode_session_json(context, session_id, transcript_dir / export_name)
184
+ if context.config.llm.delete_session_after_run:
185
+ delete_opencode_session(context, session_id)
186
+
187
+
188
+ def call_opencode(
189
+ context: RuntimeContext,
190
+ stage_name: str,
191
+ purpose: str,
192
+ prompt: str,
193
+ files: list[pathlib.Path],
194
+ attempt: int | None = None,
195
+ total_attempts: int | None = None,
196
+ existing_session_id: str | None = None,
197
+ ) -> OpenCodeRunResult:
198
+ executable = context.opencode_executable or resolve_opencode_executable()
199
+ context.logger.llm_call(stage_name, purpose, context.config.llm.model, attempt, total_attempts)
200
+ cmd = [
201
+ executable,
202
+ "run",
203
+ "--format",
204
+ "json",
205
+ "--model",
206
+ context.config.llm.model,
207
+ ]
208
+ if context.config.llm.variant:
209
+ cmd.extend(["--variant", context.config.llm.variant])
210
+ if existing_session_id:
211
+ cmd.extend(["--session", existing_session_id])
212
+ else:
213
+ cmd.extend(["--title", f"{context.config.llm.session_title_prefix} {context.run_id} {stage_name}"])
214
+ for file_path in files:
215
+ cmd.extend(["--file", str(file_path)])
216
+ cmd.extend(["--", prompt])
217
+
218
+ completed = run_command(
219
+ cmd,
220
+ cwd=context.repo_root,
221
+ timeout=context.config.llm.timeout_seconds,
222
+ check=False,
223
+ env={"OPENCODE_SERVER_PASSWORD": None},
224
+ )
225
+ session_id, text_output = parse_opencode_json_run_output(completed.stdout or "")
226
+ stdout = completed.stdout or ""
227
+ stderr = completed.stderr or ""
228
+ if context.config.logging.print_llm_output and stdout.strip():
229
+ print(stdout)
230
+ return OpenCodeRunResult(
231
+ output_text=text_output if text_output else stdout.strip(),
232
+ session_id=session_id or existing_session_id,
233
+ stdout=stdout,
234
+ stderr=stderr,
235
+ return_code=completed.returncode,
236
+ )
237
+
238
+
239
+ def run_llm_step(
240
+ context: RuntimeContext,
241
+ step: StepConfig,
242
+ prompt: str,
243
+ input_paths: list[pathlib.Path],
244
+ stage_name: str,
245
+ ) -> Any:
246
+ total_attempts = context.config.llm.json_max_retries + 1
247
+ session_id: str | None = None
248
+ prompt_text = prompt
249
+ last_error = ""
250
+ last_output = ""
251
+ wants_json = bool(step.schema)
252
+ for attempt in range(1, total_attempts + 1):
253
+ result = call_opencode(
254
+ context,
255
+ stage_name=stage_name,
256
+ purpose=f"{step.type}:{step.id}",
257
+ prompt=prompt_text,
258
+ files=input_paths,
259
+ attempt=attempt,
260
+ total_attempts=total_attempts,
261
+ existing_session_id=session_id,
262
+ )
263
+ session_id = result.session_id
264
+ if result.return_code != 0:
265
+ finalize_opencode_session(context, stage_name, session_id)
266
+ details = result.stderr.strip() or result.stdout.strip() or f"exit code {result.return_code}"
267
+ raise HookError(f"OpenCode command failed: {details}")
268
+ try:
269
+ if not wants_json:
270
+ finalize_opencode_session(context, stage_name, session_id)
271
+ return result.output_text
272
+ if step.schema == "string_array":
273
+ payload = extract_json_array(result.output_text)
274
+ else:
275
+ payload = extract_json_object(result.output_text)
276
+ finalize_opencode_session(context, stage_name, session_id)
277
+ return validate_schema(step.schema, payload)
278
+ except HookError as exc:
279
+ last_error = str(exc)
280
+ last_output = result.output_text
281
+ if attempt >= total_attempts:
282
+ break
283
+ snippet = last_output[: context.config.llm.invalid_json_feedback_max_chars]
284
+ if step.schema == "string_array":
285
+ suffix = "Return ONLY valid JSON array."
286
+ else:
287
+ suffix = "Return ONLY valid JSON object."
288
+ prompt_text = (
289
+ prompt
290
+ + "\n\nIMPORTANT: Your previous response was invalid JSON and could not be parsed.\n"
291
+ + f"Parse error: {last_error}\n"
292
+ + suffix
293
+ + "\nPrevious invalid output:\n```text\n"
294
+ + snippet
295
+ + "\n```"
296
+ )
297
+ if context.config.llm.json_retry_new_session:
298
+ session_id = None
299
+ pr_url = extract_pr_url(last_output)
300
+ if pr_url:
301
+ context.logger.info(
302
+ "llm.invalid_json_pr_url_hint",
303
+ "Detected PR URL in invalid JSON output",
304
+ stage_name=stage_name,
305
+ url=pr_url,
306
+ )
307
+ finalize_opencode_session(context, stage_name, session_id)
308
+ raise HookError(f"Model failed to return valid JSON for {stage_name}: {last_error}. {last_output[:400]}")
@@ -0,0 +1,130 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import pathlib
5
+ import sys
6
+ from typing import Sequence
7
+
8
+ from .artifacts import ArtifactStore, generate_run_id
9
+ from .config import load_config
10
+ from .engine import WorkflowEngine
11
+ from .executors.exec import (
12
+ collect_changed_files,
13
+ collect_diff,
14
+ collect_ranges_from_stdin,
15
+ current_branch,
16
+ ensure_dir,
17
+ git,
18
+ resolve_git_dir,
19
+ resolve_repo_root,
20
+ resolve_storage_path,
21
+ should_skip_for_sync_branch,
22
+ )
23
+ from .executors.llm import resolve_opencode_executable
24
+ from .types import HookConfig, HookError, HookLogger, RuntimeContext
25
+
26
+
27
+ def _build_logger(repo_root: pathlib.Path, git_dir: pathlib.Path, config: HookConfig) -> HookLogger:
28
+ jsonl_path = None
29
+ if config.logging.jsonl:
30
+ log_dir = ensure_dir(resolve_storage_path(repo_root, git_dir, config.logging.dir))
31
+ if log_dir is not None:
32
+ jsonl_path = log_dir / "hook.jsonl"
33
+ return HookLogger(jsonl_path=jsonl_path, console_level=config.logging.level)
34
+
35
+
36
+ def _write_summary(context: RuntimeContext, result: dict[str, object]) -> None:
37
+ summary_dir = ensure_dir(
38
+ resolve_storage_path(context.repo_root, context.git_dir, context.config.logging.summary_dir)
39
+ )
40
+ if summary_dir is None:
41
+ return
42
+ summary_path = summary_dir / f"{context.run_id}.json"
43
+ summary_path.write_text(json.dumps(result, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
44
+
45
+
46
+ def _assert_clean_worktree(repo_root: pathlib.Path) -> None:
47
+ status = git(repo_root, ["status", "--short"], check=False).strip()
48
+ if status:
49
+ raise HookError("Hook requires a clean worktree but local changes are present")
50
+
51
+
52
+ def run_hook(
53
+ remote_name: str = "",
54
+ remote_url: str = "",
55
+ stdin_lines: Sequence[str] | None = None,
56
+ cwd: pathlib.Path | None = None,
57
+ ) -> int:
58
+ current_dir = cwd or pathlib.Path.cwd()
59
+ repo_root = resolve_repo_root(current_dir)
60
+ git_dir = resolve_git_dir(repo_root)
61
+ config, _config_path = load_config(repo_root)
62
+ logger = _build_logger(repo_root, git_dir, config)
63
+
64
+ if not config.general.enabled:
65
+ logger.status("hook.disabled", "AI push hooks disabled")
66
+ return 0
67
+ if config.general.require_clean_worktree:
68
+ _assert_clean_worktree(repo_root)
69
+ if config.general.skip_on_sync_branch:
70
+ skip_sync, reason = should_skip_for_sync_branch(repo_root)
71
+ if skip_sync:
72
+ logger.status("hook.skip_sync_branch", f"Skipping AI push hooks: {reason}")
73
+ return 0
74
+
75
+ actual_stdin = list(stdin_lines) if stdin_lines is not None else [line.rstrip("\n") for line in sys.stdin]
76
+ ranges = collect_ranges_from_stdin(repo_root, remote_name or "origin", actual_stdin)
77
+ changed_files = collect_changed_files(repo_root, ranges) if ranges else []
78
+ diff_text = collect_diff(repo_root, ranges, config.llm.max_diff_bytes) if ranges else ""
79
+ run_id = generate_run_id()
80
+ run_dir = resolve_storage_path(repo_root, git_dir, f".git/ai-push-hooks/runs/{run_id}")
81
+
82
+ opencode_executable = None
83
+ if any(
84
+ step.type in {"llm", "apply"}
85
+ for module in config.modules.values()
86
+ for step in module.steps
87
+ if module.enabled
88
+ ):
89
+ opencode_executable = resolve_opencode_executable()
90
+
91
+ context = RuntimeContext(
92
+ repo_root=repo_root,
93
+ git_dir=git_dir,
94
+ config=config,
95
+ logger=logger,
96
+ remote_name=remote_name or "origin",
97
+ remote_url=remote_url,
98
+ stdin_lines=actual_stdin,
99
+ run_id=run_id,
100
+ run_dir=run_dir,
101
+ opencode_executable=opencode_executable,
102
+ cache={
103
+ "ranges": ranges,
104
+ "changed_files": changed_files,
105
+ "diff_text": diff_text,
106
+ "branch_name": current_branch(repo_root),
107
+ "sync_branch": "beads-sync",
108
+ },
109
+ )
110
+ logger.status(
111
+ "hook.start",
112
+ "Starting AI push hooks workflow",
113
+ branch=context.cache["branch_name"],
114
+ changed_files=len(changed_files),
115
+ ranges=ranges,
116
+ )
117
+ engine = WorkflowEngine(context=context, artifacts=ArtifactStore(run_dir))
118
+ try:
119
+ workflow_result = engine.run()
120
+ logger.llm_summary()
121
+ _write_summary(context, {"run_dir": str(workflow_result.run_dir), "modules": workflow_result.modules})
122
+ logger.status("hook.complete", "AI push hooks workflow completed", run_dir=str(workflow_result.run_dir))
123
+ return 0
124
+ except Exception as exc: # noqa: BLE001
125
+ message = str(exc).strip() or exc.__class__.__name__
126
+ logger.error("hook.failed", "AI push hooks workflow failed", error=message)
127
+ if config.general.allow_push_on_error:
128
+ logger.warn("hook.fail_open", "Allowing push because allow_push_on_error=true", error=message)
129
+ return 0
130
+ raise
@@ -0,0 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from .beads import collect_beads_status_context
4
+ from .docs import collect_docs_context
5
+ from .pr import collect_pr_context
6
+
7
+ COLLECTORS = {
8
+ "docs_context": collect_docs_context,
9
+ "beads_status_context": collect_beads_status_context,
10
+ "pr_context": collect_pr_context,
11
+ }
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from ..executors.exec import collect_commit_messages_for_ranges, current_branch, is_feature_branch
6
+ from ..types import CollectorResult, RuntimeContext
7
+
8
+
9
+ def collect_beads_status_context(context: RuntimeContext, state: Any) -> CollectorResult:
10
+ branch_name = current_branch(context.repo_root)
11
+ sync_branch = context.cache.get("sync_branch", "beads-sync")
12
+ if not branch_name or branch_name in {"HEAD", "main", sync_branch} or not is_feature_branch(branch_name):
13
+ return CollectorResult(
14
+ artifacts={"branch-context.txt": f"branch={branch_name}\n"},
15
+ skip_module=True,
16
+ skip_reason="branch does not require beads alignment",
17
+ )
18
+
19
+ ranges = context.cache.get("ranges", [])
20
+ changed_files = context.cache.get("changed_files", [])
21
+ diff_text = context.cache.get("diff_text", "")
22
+ commits = collect_commit_messages_for_ranges(context.repo_root, ranges) if ranges else []
23
+ report_file = "BEADS_STATUS_ACTION_REQUIRED.md"
24
+ commit_lines = []
25
+ for commit in commits:
26
+ commit_lines.append(f"--- {commit['hash']}")
27
+ commit_lines.append(f"subject: {commit['subject']}")
28
+ if commit["body"]:
29
+ commit_lines.append("body:")
30
+ commit_lines.append(commit["body"])
31
+ commit_lines.append("")
32
+ return CollectorResult(
33
+ artifacts={
34
+ "branch-context.txt": "\n".join(
35
+ [
36
+ f"branch={branch_name}",
37
+ f"ranges={','.join(ranges)}",
38
+ f"report_file={report_file}",
39
+ ]
40
+ )
41
+ + "\n",
42
+ "changed-files.txt": "\n".join(changed_files) + ("\n" if changed_files else ""),
43
+ "push.diff": diff_text + ("\n" if diff_text and not diff_text.endswith("\n") else ""),
44
+ "commits.txt": "\n".join(commit_lines).strip() + ("\n" if commit_lines else ""),
45
+ }
46
+ )
@@ -0,0 +1,159 @@
1
+ from __future__ import annotations
2
+
3
+ import pathlib
4
+ import re
5
+ import shutil
6
+ from pathlib import PurePosixPath
7
+ from typing import Any
8
+
9
+ from ..types import CollectorResult, RuntimeContext
10
+ from ..executors.exec import collect_commit_messages_for_ranges, git, run_command
11
+
12
+ DOC_INCLUDE_PATTERNS = ("README.md", "docs/**/*.md")
13
+ DOC_IGNORE_PATTERNS = ("docs/archive/**",)
14
+
15
+
16
+ def _path_matches(path: str, patterns: tuple[str, ...]) -> bool:
17
+ pure = PurePosixPath(path)
18
+ return any(pure.match(pattern) for pattern in patterns)
19
+
20
+
21
+ def _expand_doc_files(repo_root: pathlib.Path) -> list[pathlib.Path]:
22
+ files: list[pathlib.Path] = []
23
+ for candidate in repo_root.rglob("*.md"):
24
+ relative = candidate.relative_to(repo_root).as_posix()
25
+ if not _path_matches(relative, DOC_INCLUDE_PATTERNS):
26
+ continue
27
+ if _path_matches(relative, DOC_IGNORE_PATTERNS):
28
+ continue
29
+ files.append(candidate)
30
+ return sorted(files)
31
+
32
+
33
+ def _deterministic_seed_queries(diff_text: str, changed_files: list[str]) -> list[str]:
34
+ stopwords = {
35
+ "const",
36
+ "return",
37
+ "value",
38
+ "false",
39
+ "true",
40
+ "string",
41
+ "number",
42
+ "object",
43
+ "class",
44
+ "function",
45
+ "public",
46
+ "private",
47
+ "static",
48
+ "async",
49
+ "await",
50
+ "import",
51
+ "export",
52
+ "from",
53
+ "default",
54
+ "docs",
55
+ "readme",
56
+ }
57
+ seeds: list[str] = []
58
+ for changed in changed_files:
59
+ pure = PurePosixPath(changed)
60
+ if len(pure.stem) >= 4:
61
+ seeds.append(pure.stem)
62
+ for segment in pure.parts:
63
+ if len(segment) >= 4 and segment not in {"docs", "src", "tests"}:
64
+ seeds.append(segment)
65
+ seeds.extend(re.findall(r"\b[A-Za-z][A-Za-z0-9_.-]{3,}\b", diff_text))
66
+ deduped: list[str] = []
67
+ seen: set[str] = set()
68
+ for seed in seeds:
69
+ clean = seed.strip()
70
+ if clean.lower() in stopwords or clean in seen:
71
+ continue
72
+ seen.add(clean)
73
+ deduped.append(clean)
74
+ return deduped[:20]
75
+
76
+
77
+ def _parse_rg_line(line: str) -> tuple[str, int, str] | None:
78
+ match = re.match(r"^(.*?):(\d+):(.*)$", line)
79
+ if not match:
80
+ return None
81
+ return match.group(1), int(match.group(2)), match.group(3)
82
+
83
+
84
+ def _search_docs_context(repo_root: pathlib.Path, doc_files: list[pathlib.Path], queries: list[str]) -> str:
85
+ if not doc_files:
86
+ return ""
87
+ if shutil.which("rg") is None or not queries:
88
+ snippets: list[str] = []
89
+ budget = 32000
90
+ for path in doc_files[:8]:
91
+ relative = path.relative_to(repo_root).as_posix()
92
+ content = path.read_text(encoding="utf-8")[:4000]
93
+ block = f"--- {relative} ---\n{content}"
94
+ if len("\n\n".join(snippets)) + len(block) > budget:
95
+ break
96
+ snippets.append(block)
97
+ return "\n\n".join(snippets)
98
+
99
+ files = [str(path) for path in doc_files]
100
+ chunks: list[str] = []
101
+ seen: set[tuple[str, int]] = set()
102
+ for query in queries:
103
+ completed = run_command(
104
+ ["rg", "--line-number", "--no-heading", "--color=never", "-C", "2", "--", query, *files],
105
+ cwd=repo_root,
106
+ check=False,
107
+ )
108
+ if completed.returncode not in {0, 1}:
109
+ continue
110
+ for line in completed.stdout.splitlines():
111
+ parsed = _parse_rg_line(line)
112
+ if not parsed:
113
+ continue
114
+ file_name, line_number, content = parsed
115
+ key = (file_name, line_number)
116
+ if key in seen:
117
+ continue
118
+ seen.add(key)
119
+ chunks.append(f"{file_name}:{line_number}: {content}")
120
+ if sum(len(chunk) for chunk in chunks) > 32000:
121
+ return "\n".join(chunks)
122
+ return "\n".join(chunks)
123
+
124
+
125
+ def collect_docs_context(context: RuntimeContext, _state: Any) -> CollectorResult:
126
+ ranges = context.cache.get("ranges", [])
127
+ changed_files = context.cache.get("changed_files", [])
128
+ diff_text = context.cache.get("diff_text", "")
129
+ doc_files = _expand_doc_files(context.repo_root)
130
+ docs_context = _search_docs_context(
131
+ context.repo_root,
132
+ doc_files,
133
+ _deterministic_seed_queries(diff_text, changed_files),
134
+ )
135
+ recent_commits = git(
136
+ context.repo_root,
137
+ ["log", "--oneline", "-n", "20", "--", "README.md", "docs"],
138
+ check=False,
139
+ )
140
+ commits = collect_commit_messages_for_ranges(context.repo_root, ranges) if ranges else []
141
+ commit_lines = []
142
+ for commit in commits:
143
+ commit_lines.append(f"--- {commit['hash']}")
144
+ commit_lines.append(f"subject: {commit['subject']}")
145
+ if commit["body"]:
146
+ commit_lines.append("body:")
147
+ commit_lines.append(commit["body"])
148
+ commit_lines.append("")
149
+ return CollectorResult(
150
+ artifacts={
151
+ "changed-files.txt": "\n".join(changed_files) + ("\n" if changed_files else ""),
152
+ "push.diff": diff_text + ("\n" if diff_text and not diff_text.endswith("\n") else ""),
153
+ "docs-inventory.txt": "\n".join(path.relative_to(context.repo_root).as_posix() for path in doc_files)
154
+ + ("\n" if doc_files else ""),
155
+ "docs-context.txt": docs_context + ("\n" if docs_context else ""),
156
+ "recent-commits.txt": recent_commits + ("\n" if recent_commits and not recent_commits.endswith("\n") else ""),
157
+ "commits.txt": "\n".join(commit_lines).strip() + ("\n" if commit_lines else ""),
158
+ }
159
+ )