@ranger1/dx 0.1.36 → 0.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,105 @@
1
+ ---
2
+ description: review (GitHub Harvest)
3
+ mode: subagent
4
+ model: openai/gpt-5.2
5
+ temperature: 0.2
6
+ tools:
7
+ write: true
8
+ edit: false
9
+ bash: true
10
+ ---
11
+
12
+ # PR Reviewer (GitHub Harvest)
13
+
14
+ Harvest all GitHub PR review feedback (humans + bots, including Copilot) and normalize into a standard `reviewFile`.
15
+
16
+ ## 输入(prompt 必须包含)
17
+
18
+ - `PR #<number>`
19
+ - `round: <number>`
20
+ - `runId: <string>`(必须透传,禁止自行生成)
21
+ - `contextFile: <filename>`
22
+
23
+ ## Cache 约定(强制)
24
+
25
+ - 缓存目录固定为 `./.cache/`;交接一律传 `./.cache/<file>`(repo 相对路径),禁止 basename-only(如 `foo.md`)。
26
+
27
+ ## 输出(强制)
28
+
29
+ 只输出一行:
30
+
31
+ `reviewFile: ./.cache/<file>.md`
32
+
33
+ ## reviewFile 格式(强制)
34
+
35
+ ```md
36
+ # Review (GHR)
37
+
38
+ PR: <PR_NUMBER>
39
+ Round: <ROUND>
40
+
41
+ ## Summary
42
+
43
+ P0: <n>
44
+ P1: <n>
45
+ P2: <n>
46
+ P3: <n>
47
+
48
+ ## Findings
49
+
50
+ - id: GHR-RC-2752827557
51
+ priority: P1
52
+ category: quality|performance|security|architecture
53
+ file: <path>
54
+ line: <number|null>
55
+ title: <short>
56
+ description: <single-line text>
57
+ suggestion: <single-line text>
58
+ ```
59
+
60
+ ## ID 规则(强制)
61
+
62
+ - Inline 评审(discussion_r...):`GHR-RC-<databaseId>`(databaseId 可映射到 `#discussion_r<databaseId>`)
63
+ - PR Review 总评:`GHR-RV-<reviewId>`
64
+ - PR 普通评论:`GHR-IC-<issueCommentId>`
65
+
66
+ ## 执行步骤(强制)
67
+
68
+ 1) Harvest(确定性)
69
+
70
+ - 调用脚本生成 raw JSON:
71
+
72
+ ```bash
73
+ python3 ~/.opencode/agents/gh_review_harvest.py \
74
+ --pr <PR_NUMBER> \
75
+ --round <ROUND> \
76
+ --run-id <RUN_ID>
77
+ ```
78
+
79
+ - 脚本 stdout 会输出一行 JSON:`{"rawFile":"./.cache/...json"}`,从中取 `rawFile`。
80
+
81
+ 2) Normalize(LLM 分类)
82
+
83
+ - 读取 `rawFile`(JSON)后,提取“建议/问题”并生成 findings:
84
+ - 覆盖 humans + bots(不做作者白名单)。
85
+ - 去噪:丢弃任何 body 中包含 `<!-- pr-review-loop-marker` 的内容。
86
+ - 忽略纯审批/无内容:如 `LGTM`、`Looks good`、`Approved` 等。
87
+ - 默认策略:
88
+ - `isResolved=true` 或 `isOutdated=true` 的 thread 在 harvest 阶段直接丢弃(不进入 rawFile,不消耗 LLM token)。
89
+ - 分类规则(大致):
90
+ - P0: 明确安全漏洞/数据泄漏/资金损失/远程执行
91
+ - P1: 逻辑 bug/权限绕过/会导致线上错误
92
+ - P2: 潜在 bug/鲁棒性/边界条件/可维护性重大问题
93
+ - P3: 风格/命名/小优化/可选建议
94
+ - `category` 只能取:quality|performance|security|architecture
95
+
96
+ 3) 写入 reviewFile
97
+
98
+ - 文件名固定:`./.cache/review-GHR-pr<PR_NUMBER>-r<ROUND>-<RUN_ID>.md`
99
+ - 重要:`title/description/suggestion` 必须是单行;原文有换行时用 `\\n` 转义。
100
+
101
+ ## 禁止事项(强制)
102
+
103
+ - ⛔ 不发布 GitHub 评论(不调用 `gh pr comment/review`)
104
+ - ⛔ 不修改代码(只输出 reviewFile)
105
+ - ⛔ 不生成/伪造 runId
@@ -0,0 +1,269 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Deterministic GitHub PR review harvester.
4
+ #
5
+ # - Fetches inline review threads via GraphQL (reviewThreads) with pagination.
6
+ # - Fetches PR reviews and PR issue comments via REST (gh api) with pagination.
7
+ # - Writes a raw JSON file into project cache: ./.cache/
8
+ # - Prints exactly one JSON object to stdout: {"rawFile":"./.cache/...json"}
9
+
10
+ import argparse
11
+ import json
12
+ import os
13
+ import subprocess
14
+ import sys
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+
18
+
19
+ def _repo_root():
20
+ try:
21
+ p = subprocess.run(
22
+ ["git", "rev-parse", "--show-toplevel"],
23
+ stdout=subprocess.PIPE,
24
+ stderr=subprocess.DEVNULL,
25
+ text=True,
26
+ )
27
+ out = (p.stdout or "").strip()
28
+ if p.returncode == 0 and out:
29
+ return Path(out)
30
+ except Exception:
31
+ pass
32
+ return Path.cwd()
33
+
34
+
35
+ def _cache_dir(repo_root):
36
+ return (repo_root / ".cache").resolve()
37
+
38
+
39
+ def _repo_relpath(repo_root, p):
40
+ try:
41
+ rel = p.resolve().relative_to(repo_root.resolve())
42
+ return "./" + rel.as_posix()
43
+ except Exception:
44
+ return os.path.basename(str(p))
45
+
46
+
47
+ REPO_ROOT = _repo_root()
48
+ CACHE_DIR = _cache_dir(REPO_ROOT)
49
+
50
+
51
+ def _json_out(obj):
52
+ sys.stdout.write(json.dumps(obj, ensure_ascii=True))
53
+ sys.stdout.write("\n")
54
+
55
+
56
+ def _run_capture(cmd):
57
+ try:
58
+ p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
59
+ return p.returncode, p.stdout, p.stderr
60
+ except FileNotFoundError as e:
61
+ return 127, "", str(e)
62
+
63
+
64
+ def _require_gh_auth():
65
+ rc, out, err = _run_capture(["gh", "auth", "status"])
66
+ if rc == 127:
67
+ return False, "GH_CLI_NOT_FOUND", "gh not found in PATH"
68
+ if rc != 0:
69
+ detail = (err or out or "").strip()
70
+ if len(detail) > 4000:
71
+ detail = detail[-4000:]
72
+ return False, "GH_NOT_AUTHENTICATED", detail
73
+ return True, None, None
74
+
75
+
76
+ def _resolve_owner_repo(explicit_repo):
77
+ if explicit_repo:
78
+ s = str(explicit_repo).strip()
79
+ if s and "/" in s:
80
+ return s
81
+ rc, out, _ = _run_capture(["gh", "repo", "view", "--json", "nameWithOwner", "--jq", ".nameWithOwner"])
82
+ owner_repo = out.strip() if rc == 0 else ""
83
+ return owner_repo or None
84
+
85
+
86
+ def _gh_api_json(args):
87
+ rc, out, err = _run_capture(["gh", "api"] + args)
88
+ if rc != 0:
89
+ raise RuntimeError(f"GH_API_FAILED: {(err or out or '').strip()}")
90
+ try:
91
+ return json.loads(out or "null")
92
+ except Exception:
93
+ raise RuntimeError("GH_API_JSON_PARSE_FAILED")
94
+
95
+
96
+ def _gh_api_graphql(query, variables):
97
+ cmd = ["gh", "api", "graphql", "-f", f"query={query}"]
98
+ for k, v in (variables or {}).items():
99
+ if isinstance(v, int):
100
+ cmd.extend(["-F", f"{k}={v}"])
101
+ elif v is None:
102
+ cmd.extend(["-f", f"{k}="])
103
+ else:
104
+ cmd.extend(["-f", f"{k}={v}"])
105
+
106
+ rc, out, err = _run_capture(cmd)
107
+ if rc != 0:
108
+ raise RuntimeError(f"GH_GRAPHQL_FAILED: {(err or out or '').strip()}")
109
+ try:
110
+ return json.loads(out or "null")
111
+ except Exception:
112
+ raise RuntimeError("GH_GRAPHQL_JSON_PARSE_FAILED")
113
+
114
+
115
+ def _flatten_threads(gql_data):
116
+ threads = []
117
+ pr = (((gql_data or {}).get("data") or {}).get("repository") or {}).get("pullRequest") or {}
118
+ conn = pr.get("reviewThreads") or {}
119
+ nodes = conn.get("nodes") or []
120
+ for t in nodes:
121
+ is_resolved = bool((t or {}).get("isResolved"))
122
+ is_outdated = bool((t or {}).get("isOutdated"))
123
+ if is_resolved or is_outdated:
124
+ continue
125
+ comments_conn = (t or {}).get("comments") or {}
126
+ comments_nodes = comments_conn.get("nodes") or []
127
+ comments = []
128
+ for c in comments_nodes:
129
+ author = (c or {}).get("author") or {}
130
+ comments.append(
131
+ {
132
+ "id": (c or {}).get("id"),
133
+ "databaseId": (c or {}).get("databaseId"),
134
+ "url": (c or {}).get("url"),
135
+ "author": {
136
+ "login": author.get("login"),
137
+ "type": author.get("__typename"),
138
+ },
139
+ "body": (c or {}).get("body") or "",
140
+ "bodyText": (c or {}).get("bodyText") or "",
141
+ "createdAt": (c or {}).get("createdAt"),
142
+ "updatedAt": (c or {}).get("updatedAt"),
143
+ }
144
+ )
145
+ threads.append(
146
+ {
147
+ "id": (t or {}).get("id"),
148
+ "isResolved": False,
149
+ "isOutdated": False,
150
+ "path": (t or {}).get("path"),
151
+ "line": (t or {}).get("line"),
152
+ "originalLine": (t or {}).get("originalLine"),
153
+ "startLine": (t or {}).get("startLine"),
154
+ "originalStartLine": (t or {}).get("originalStartLine"),
155
+ "comments": comments,
156
+ }
157
+ )
158
+
159
+ page_info = conn.get("pageInfo") or {}
160
+ return threads, {
161
+ "hasNextPage": bool(page_info.get("hasNextPage")),
162
+ "endCursor": page_info.get("endCursor"),
163
+ }
164
+
165
+
166
+ def _fetch_all_review_threads(owner, repo, pr_number):
167
+ query = (
168
+ "query($owner:String!,$repo:String!,$prNumber:Int!,$after:String){"
169
+ "repository(owner:$owner,name:$repo){"
170
+ "pullRequest(number:$prNumber){"
171
+ "reviewThreads(first:100,after:$after){"
172
+ "pageInfo{hasNextPage endCursor}"
173
+ "nodes{"
174
+ "id isResolved isOutdated path line originalLine startLine originalStartLine "
175
+ "comments(first:100){nodes{"
176
+ "id databaseId url body bodyText createdAt updatedAt author{login __typename}"
177
+ "}}"
178
+ "}"
179
+ "}"
180
+ "}"
181
+ "}"
182
+ "}"
183
+ )
184
+
185
+ after = None
186
+ all_threads = []
187
+ while True:
188
+ data = _gh_api_graphql(query, {"owner": owner, "repo": repo, "prNumber": pr_number, "after": after})
189
+ threads, page = _flatten_threads(data)
190
+ all_threads.extend(threads)
191
+ if not page.get("hasNextPage"):
192
+ break
193
+ after = page.get("endCursor")
194
+ if not after:
195
+ break
196
+ return all_threads
197
+
198
+
199
+ def main(argv):
200
+ class _ArgParser(argparse.ArgumentParser):
201
+ def error(self, message):
202
+ raise ValueError(message)
203
+
204
+ parser = _ArgParser(add_help=False)
205
+ parser.add_argument("--pr", type=int, required=True)
206
+ parser.add_argument("--round", type=int, default=1)
207
+ parser.add_argument("--run-id", required=True)
208
+ parser.add_argument("--repo")
209
+
210
+ try:
211
+ args = parser.parse_args(argv)
212
+ except ValueError:
213
+ _json_out({"error": "INVALID_ARGS"})
214
+ return 2
215
+
216
+ ok, code, detail = _require_gh_auth()
217
+ if not ok:
218
+ _json_out({"error": code, "detail": detail})
219
+ return 1
220
+
221
+ owner_repo = _resolve_owner_repo(args.repo)
222
+ if not owner_repo:
223
+ _json_out({"error": "REPO_NOT_FOUND"})
224
+ return 1
225
+ if "/" not in owner_repo:
226
+ _json_out({"error": "INVALID_REPO"})
227
+ return 1
228
+
229
+ owner, repo = owner_repo.split("/", 1)
230
+ pr_number = int(args.pr)
231
+ round_num = int(args.round)
232
+ run_id = str(args.run_id).strip()
233
+ if not run_id:
234
+ _json_out({"error": "MISSING_RUN_ID"})
235
+ return 1
236
+
237
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
238
+ raw_basename = f"gh-review-raw-pr{pr_number}-r{round_num}-{run_id}.json"
239
+ raw_path = CACHE_DIR / raw_basename
240
+
241
+ try:
242
+ threads = _fetch_all_review_threads(owner, repo, pr_number)
243
+
244
+ reviews = _gh_api_json([f"repos/{owner_repo}/pulls/{pr_number}/reviews", "--paginate"])
245
+ issue_comments = _gh_api_json([f"repos/{owner_repo}/issues/{pr_number}/comments", "--paginate"])
246
+
247
+ now = datetime.now(timezone.utc).isoformat()
248
+ payload = {
249
+ "repo": owner_repo,
250
+ "pr": pr_number,
251
+ "round": round_num,
252
+ "runId": run_id,
253
+ "generatedAt": now,
254
+ "reviewThreads": threads,
255
+ "reviews": reviews if isinstance(reviews, list) else [],
256
+ "issueComments": issue_comments if isinstance(issue_comments, list) else [],
257
+ }
258
+
259
+ raw_path.write_text(json.dumps(payload, ensure_ascii=True), encoding="utf-8", newline="\n")
260
+ except Exception as e:
261
+ _json_out({"error": "HARVEST_FAILED", "detail": str(e)[:800]})
262
+ return 1
263
+
264
+ _json_out({"rawFile": _repo_relpath(REPO_ROOT, raw_path)})
265
+ return 0
266
+
267
+
268
+ if __name__ == "__main__":
269
+ raise SystemExit(main(sys.argv[1:]))
@@ -21,7 +21,7 @@ tools:
21
21
  - `round: <number>`
22
22
  - `runId: <string>`
23
23
  - `contextFile: <path>`(例如:`./.cache/pr-context-...md`)
24
- - `reviewFile: <path>`(三行,分别对应 CDX/CLD/GMN,例如:`./.cache/review-...md`)
24
+ - `reviewFile: <path>`(多行,1+ 条;例如:`./.cache/review-...md`)
25
25
 
26
26
  ### 模式 B:发布修复评论(基于 fixReportFile)
27
27
 
@@ -53,7 +53,7 @@ runId: abcdef123456
53
53
 
54
54
  ## 重复分组(仅作为脚本入参)
55
55
 
56
- 你需要基于 3 份 `reviewFile` 内容判断重复 finding 分组,生成**一行 JSON**(不要代码块、不要解释文字、不要换行)。
56
+ 你需要基于所有 `reviewFile` 内容判断重复 finding 分组,生成**一行 JSON**(不要代码块、不要解释文字、不要换行)。
57
57
 
58
58
  注意:这行 JSON **不是你的最终输出**,它只用于生成 `--duplicate-groups-b64` 传给脚本。
59
59
 
@@ -22,6 +22,7 @@ agent: sisyphus
22
22
  - `codex-reviewer`
23
23
  - `claude-reviewer`
24
24
  - `gemini-reviewer`
25
+ - `gh-thread-reviewer`
25
26
  - `pr-review-aggregate`
26
27
  - `pr-fix`
27
28
 
@@ -57,9 +58,9 @@ agent: sisyphus
57
58
  - 取出:`contextFile`、`runId`、`headOid`(如有)
58
59
  - **CRITICAL**: 必须等待此 Task 成功完成并获取到 `contextFile` 后,才能进入 Step 2
59
60
 
60
- 2. Task(并行): `codex-reviewer` + `claude-reviewer` + `gemini-reviewer` **(依赖 Step 1 的 contextFile)**
61
+ 2. Task(并行): `codex-reviewer` + `claude-reviewer` + `gemini-reviewer` + `gh-thread-reviewer` **(依赖 Step 1 的 contextFile)**
61
62
 
62
- - **DEPENDENCY**: 这三个 reviewers 依赖 Step 1 返回的 `contextFile`,因此**必须等 Step 1 完成后才能并行启动**
63
+ - **DEPENDENCY**: 这些 reviewers 依赖 Step 1 返回的 `contextFile`,因此**必须等 Step 1 完成后才能并行启动**
63
64
  - 每个 reviewer prompt 必须包含:
64
65
  - `PR #{{PR_NUMBER}}`
65
66
  - `round: <ROUND>`
@@ -74,7 +75,7 @@ agent: sisyphus
74
75
 
75
76
  3. Task: `pr-review-aggregate`
76
77
 
77
- - prompt 必须包含:`PR #{{PR_NUMBER}}`、`round: <ROUND>`、`runId: <RUN_ID>`、`contextFile: ./.cache/<file>.md`、三条 `reviewFile: ./.cache/<file>.md`
78
+ - prompt 必须包含:`PR #{{PR_NUMBER}}`、`round: <ROUND>`、`runId: <RUN_ID>`、`contextFile: ./.cache/<file>.md`、以及 1+ 条 `reviewFile: ./.cache/<file>.md`
78
79
  - 输出:`{"stop":true}` 或 `{"stop":false,"fixFile":"..."}`
79
80
  - 若 `stop=true`:本轮结束并退出循环
80
81
  - **唯一性约束**: 每轮只能发布一次 Review Summary;脚本内置幂等检查,重复调用不会重复发布
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ranger1/dx",
3
- "version": "0.1.36",
3
+ "version": "0.1.37",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "repository": {