@ps-neko/nekowork 0.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/AGENTS.md +112 -0
  2. package/CLAUDE.md +81 -0
  3. package/LICENSE +21 -0
  4. package/README.md +283 -0
  5. package/REVIEW.md +96 -0
  6. package/RULES.md +51 -0
  7. package/SOUL.md +21 -0
  8. package/WORKING-CONTEXT.md +52 -0
  9. package/agent.yaml +219 -0
  10. package/agents/architect.md +57 -0
  11. package/agents/code-reviewer.md +60 -0
  12. package/agents/codex-challenger.md +53 -0
  13. package/agents/codex-reviewer.md +56 -0
  14. package/agents/debugger.md +33 -0
  15. package/agents/doc-writer.md +51 -0
  16. package/agents/executor.md +41 -0
  17. package/agents/planner.md +49 -0
  18. package/agents/research.md +50 -0
  19. package/agents/security-reviewer.md +47 -0
  20. package/agents/test-engineer.md +41 -0
  21. package/bridge/mcp-server.js +301 -0
  22. package/commands/claude-led-codex-review.md +29 -0
  23. package/docs/ADVANCED.md +321 -0
  24. package/docs/AI-DEVELOPMENT-LIFECYCLE.md +105 -0
  25. package/docs/ARCHITECTURE.md +205 -0
  26. package/docs/AUDIT.md +114 -0
  27. package/docs/AUTH-MIGRATION.md +282 -0
  28. package/docs/CHANGELOG.md +97 -0
  29. package/docs/CLI-STAGES.md +89 -0
  30. package/docs/CODEMAPS/README.md +15 -0
  31. package/docs/CODEMAPS/agents.md +22 -0
  32. package/docs/CODEMAPS/bridge.md +18 -0
  33. package/docs/CODEMAPS/hooks.md +28 -0
  34. package/docs/CODEMAPS/manifests.md +14 -0
  35. package/docs/CODEMAPS/rules.md +22 -0
  36. package/docs/CODEMAPS/schemas.md +21 -0
  37. package/docs/CODEMAPS/scripts.md +158 -0
  38. package/docs/CODEMAPS/skills.md +29 -0
  39. package/docs/CODEMAPS/tests.md +98 -0
  40. package/docs/CORE-INVARIANTS.md +38 -0
  41. package/docs/DEMO.md +110 -0
  42. package/docs/EXAMPLE-PROJECT.md +92 -0
  43. package/docs/PORTING.md +154 -0
  44. package/docs/PRODUCT-PRINCIPLES.md +303 -0
  45. package/docs/PUBLISH-ALPHA.md +106 -0
  46. package/docs/QUICKSTART.md +344 -0
  47. package/docs/RELEASE-READINESS.md +140 -0
  48. package/docs/RISK-CLASSIFIER.md +50 -0
  49. package/docs/RUNBOOK.md +146 -0
  50. package/docs/SECURITY.md +79 -0
  51. package/docs/SETUP.md +142 -0
  52. package/docs/WHY-NEKOWORK.md +64 -0
  53. package/docs/case-studies/README.md +16 -0
  54. package/docs/case-studies/SINDRESORHUS-IS-PLAIN-OBJ.md +141 -0
  55. package/docs/dev-log/2026-04-29-p1-recovery.md +142 -0
  56. package/docs/dev-log/2026-04-29-week1-4.md +81 -0
  57. package/docs/examples/GITHUB-ACTIONS-HARDENING.md +86 -0
  58. package/docs/examples/QUALITY-LIFECYCLE-SMOKE.md +32 -0
  59. package/docs/examples/TRADING-DASHBOARD-MOCK.md +65 -0
  60. package/docs/workflows-stash/README.md +32 -0
  61. package/docs/workflows-stash/harness-review.yml +166 -0
  62. package/docs/workflows-stash/harness-validate.yml +48 -0
  63. package/examples/github-actions-hardening/.github/workflows/hardened-validate.yml +38 -0
  64. package/examples/github-actions-hardening/README.md +31 -0
  65. package/examples/github-actions-hardening/case-study/ASK.md +26 -0
  66. package/examples/github-actions-hardening/case-study/GATE_STATUS.md +28 -0
  67. package/examples/github-actions-hardening/case-study/PLAN.md +25 -0
  68. package/examples/github-actions-hardening/case-study/SHIP_READY.md +21 -0
  69. package/examples/github-actions-hardening/case-study/TASK.md +30 -0
  70. package/examples/github-actions-hardening/case-study/TEAM_HANDOFFS.md +37 -0
  71. package/examples/github-actions-hardening/case-study/VERIFY_SUMMARY.md +35 -0
  72. package/examples/github-actions-hardening/case-study/WORK_SUMMARY.md +24 -0
  73. package/examples/github-actions-hardening/package.json +12 -0
  74. package/examples/github-actions-hardening/scripts/check.mjs +43 -0
  75. package/examples/quality-lifecycle-smoke/README.md +30 -0
  76. package/examples/quality-lifecycle-smoke/case-study/ASK.md +24 -0
  77. package/examples/quality-lifecycle-smoke/case-study/GATE_STATUS.md +10 -0
  78. package/examples/quality-lifecycle-smoke/case-study/PLAN.md +19 -0
  79. package/examples/quality-lifecycle-smoke/case-study/SHIP_READY.md +11 -0
  80. package/examples/quality-lifecycle-smoke/case-study/TASK.md +19 -0
  81. package/examples/quality-lifecycle-smoke/case-study/TEAM_HANDOFFS.md +21 -0
  82. package/examples/quality-lifecycle-smoke/case-study/VERIFY_SUMMARY.md +44 -0
  83. package/examples/quality-lifecycle-smoke/case-study/WORK_SUMMARY.md +19 -0
  84. package/examples/quality-lifecycle-smoke/package.json +8 -0
  85. package/examples/quality-lifecycle-smoke/scripts/check.mjs +44 -0
  86. package/examples/trading-dashboard-mock/README.md +33 -0
  87. package/examples/trading-dashboard-mock/case-study/ASK.md +24 -0
  88. package/examples/trading-dashboard-mock/case-study/GATE_STATUS.md +28 -0
  89. package/examples/trading-dashboard-mock/case-study/PLAN.md +23 -0
  90. package/examples/trading-dashboard-mock/case-study/SHIP_READY.md +21 -0
  91. package/examples/trading-dashboard-mock/case-study/TASK.md +29 -0
  92. package/examples/trading-dashboard-mock/case-study/TEAM_HANDOFFS.md +49 -0
  93. package/examples/trading-dashboard-mock/case-study/VERIFY_SUMMARY.md +35 -0
  94. package/examples/trading-dashboard-mock/case-study/WORK_SUMMARY.md +27 -0
  95. package/examples/trading-dashboard-mock/fixtures/market.json +9 -0
  96. package/examples/trading-dashboard-mock/index.html +76 -0
  97. package/examples/trading-dashboard-mock/package.json +9 -0
  98. package/examples/trading-dashboard-mock/scripts/check.mjs +54 -0
  99. package/examples/trading-dashboard-mock/src/app.js +83 -0
  100. package/examples/trading-dashboard-mock/src/styles.css +227 -0
  101. package/hooks/hooks.json +44 -0
  102. package/hooks/scripts/config-protection.js +34 -0
  103. package/hooks/scripts/gateguard-fact-force.js +146 -0
  104. package/hooks/scripts/persistent-mode.mjs +27 -0
  105. package/hooks/scripts/pre-bash-dispatcher.js +63 -0
  106. package/hooks/scripts/quality-gate.js +106 -0
  107. package/manifests/install-components.json +195 -0
  108. package/manifests/install-modules.json +101 -0
  109. package/manifests/install-profiles.json +134 -0
  110. package/package.json +96 -0
  111. package/rules/common/coding-style.md +71 -0
  112. package/rules/common/security.md +69 -0
  113. package/rules/common/testing.md +58 -0
  114. package/rules/python/coding-style.md +80 -0
  115. package/rules/python/testing.md +86 -0
  116. package/rules/typescript/coding-style.md +97 -0
  117. package/rules/typescript/security.md +67 -0
  118. package/rules/typescript/testing.md +78 -0
  119. package/schemas/agent-yaml.schema.json +168 -0
  120. package/schemas/agent.schema.json +32 -0
  121. package/schemas/handoff.schema.json +105 -0
  122. package/schemas/hooks.schema.json +35 -0
  123. package/schemas/install-components.schema.json +46 -0
  124. package/schemas/install-modules.schema.json +39 -0
  125. package/schemas/install-profiles.schema.json +32 -0
  126. package/schemas/install-state.schema.json +42 -0
  127. package/schemas/routing.schema.json +42 -0
  128. package/schemas/skill.schema.json +19 -0
  129. package/scripts/agents/dispatch.js +144 -0
  130. package/scripts/agents/runners/claude.js +214 -0
  131. package/scripts/agents/runners/codex.js +233 -0
  132. package/scripts/agents/runners/gemini.js +92 -0
  133. package/scripts/agents/runners/mock.js +107 -0
  134. package/scripts/auth/github-import-gh.js +52 -0
  135. package/scripts/auth/github-login.js +79 -0
  136. package/scripts/auth/github-logout.js +21 -0
  137. package/scripts/auth/github-status.js +46 -0
  138. package/scripts/build-claude.js +101 -0
  139. package/scripts/build-codemaps.js +286 -0
  140. package/scripts/build-codex.js +93 -0
  141. package/scripts/build-cursor.js +132 -0
  142. package/scripts/build-gemini.js +117 -0
  143. package/scripts/build-opencode.js +117 -0
  144. package/scripts/ci/catalog.js +120 -0
  145. package/scripts/ci/check-markers.js +48 -0
  146. package/scripts/ci/security-hardening.js +270 -0
  147. package/scripts/ci/validate-agents.js +88 -0
  148. package/scripts/ci/validate-hooks.js +99 -0
  149. package/scripts/ci/validate-manifests.js +128 -0
  150. package/scripts/ci/validate-skills.js +93 -0
  151. package/scripts/cli.js +1134 -0
  152. package/scripts/core/auth-guard.js +22 -0
  153. package/scripts/core/build-roots.js +11 -0
  154. package/scripts/core/cli-resolver.js +64 -0
  155. package/scripts/core/execution-workspace.js +84 -0
  156. package/scripts/core/git-mutation-guard.js +79 -0
  157. package/scripts/core/install-state.js +125 -0
  158. package/scripts/core/json-extractor.js +32 -0
  159. package/scripts/core/subprocess.js +74 -0
  160. package/scripts/daemon/wait.js +278 -0
  161. package/scripts/demo-external-project.js +222 -0
  162. package/scripts/demo-quick-run.js +193 -0
  163. package/scripts/demo-review.js +204 -0
  164. package/scripts/doctor.js +296 -0
  165. package/scripts/install-apply.js +185 -0
  166. package/scripts/install-plan.js +411 -0
  167. package/scripts/lib/acceptance-criteria.js +105 -0
  168. package/scripts/lib/costs.js +82 -0
  169. package/scripts/lib/instincts.js +194 -0
  170. package/scripts/lib/keychain.js +85 -0
  171. package/scripts/lib/profile-policy.js +134 -0
  172. package/scripts/lib/profile-safety.js +81 -0
  173. package/scripts/lib/risk-classifier.js +145 -0
  174. package/scripts/lib/router.js +138 -0
  175. package/scripts/lib/severity.js +99 -0
  176. package/scripts/lib/token-vault.js +136 -0
  177. package/scripts/orchestrators/apply.js +225 -0
  178. package/scripts/orchestrators/ask.js +143 -0
  179. package/scripts/orchestrators/gate.js +179 -0
  180. package/scripts/orchestrators/ralph.js +179 -0
  181. package/scripts/orchestrators/review.js +452 -0
  182. package/scripts/orchestrators/run.js +151 -0
  183. package/scripts/orchestrators/ship.js +339 -0
  184. package/scripts/orchestrators/team-lite.js +270 -0
  185. package/scripts/orchestrators/team.js +244 -0
  186. package/scripts/orchestrators/verify.js +306 -0
  187. package/scripts/orchestrators/work.js +207 -0
  188. package/scripts/portability/simulate-port.js +220 -0
  189. package/scripts/repair.js +184 -0
  190. package/scripts/sync-claude-md.js +220 -0
  191. package/scripts/verify/claude-live.js +30 -0
  192. package/scripts/verify/codex-live.js +60 -0
  193. package/scripts/verify/gemini-live.js +48 -0
  194. package/scripts/verify/runtime.js +105 -0
  195. package/skills/claude-led-codex-review/SKILL.md +133 -0
  196. package/skills/plan-eng-review/SKILL.md +51 -0
  197. package/skills/porting/SKILL.md +69 -0
  198. package/skills/ralph/SKILL.md +48 -0
  199. package/skills/release-readiness/SKILL.md +62 -0
  200. package/skills/review/SKILL.md +42 -0
  201. package/skills/security-hardening/SKILL.md +59 -0
  202. package/skills/ship/SKILL.md +44 -0
  203. package/skills/tdd-workflow/SKILL.md +42 -0
@@ -0,0 +1,144 @@
1
+ // 에이전트 dispatch. agent.md frontmatter 읽고 provider runner 로 위임.
2
+ // 입력 / 출력은 표준화된 JSON 스키마. 단계 간 컨텍스트는 핸드오프 파일로만.
3
+
4
+ import fs from 'node:fs';
5
+ import path from 'node:path';
6
+ import YAML from 'yaml';
7
+
8
+ import { runMock } from './runners/mock.js';
9
+ import { runClaude } from './runners/claude.js';
10
+ import { runCodex } from './runners/codex.js';
11
+ import { runGemini } from './runners/gemini.js';
12
+ import { decide as routeDecide, trace as routeTrace } from '../lib/router.js';
13
+ import { record as costRecord } from '../lib/costs.js';
14
+ import { classifyRisk } from '../lib/risk-classifier.js';
15
+
16
+ const RUNNERS = {
17
+ mock: runMock,
18
+ claude: runClaude,
19
+ codex: runCodex,
20
+ gemini: runGemini,
21
+ };
22
+
23
+ /**
24
+ * 에이전트 한 번 호출.
25
+ * @param {object} opts
26
+ * @param {string} opts.agent - agent name (예: 'planner')
27
+ * @param {string} opts.stage - 단계 이름 (예: 'plan')
28
+ * @param {string} opts.task - 사용자 작업 한 줄
29
+ * @param {object} opts.context - 디스크 핸드오프 + PRD 등 자료
30
+ * @param {boolean} [opts.live=false] - 실 LLM 호출
31
+ * @param {string} [opts.providerOverride] - provider 강제 지정
32
+ * @param {string} [opts.harnessRoot]
33
+ * @param {string} [opts.projectRoot]
34
+ * @returns {Promise<object>} 핸드오프 객체 (handoff.schema.json 준수)
35
+ */
36
+ export async function dispatch(opts) {
37
+ const harnessRoot = opts.harnessRoot || process.cwd();
38
+ const projectRoot = opts.projectRoot || harnessRoot;
39
+ const agentFile = path.join(harnessRoot, 'agents', `${opts.agent}.md`);
40
+ if (!fs.existsSync(agentFile)) throw new Error(`agent file not found: ${opts.agent}`);
41
+
42
+ const raw = fs.readFileSync(agentFile, 'utf8');
43
+ const fmMatch = raw.match(/^---\s*\n([\s\S]*?)\n---/);
44
+ if (!fmMatch) throw new Error(`agent ${opts.agent} 의 frontmatter 없음`);
45
+ const fm = YAML.parse(fmMatch[1]);
46
+ const body = raw.slice(fmMatch[0].length).trim();
47
+
48
+ const provider = opts.providerOverride || (opts.live ? fm.provider : 'mock');
49
+ const runner = RUNNERS[provider];
50
+ if (!runner) throw new Error(`알 수 없는 provider: ${provider}`);
51
+
52
+ // routing trace
53
+ if (opts.sessionDir) {
54
+ try {
55
+ const decision = routeDecide({
56
+ stage: opts.stage,
57
+ task: opts.task,
58
+ files: opts.context?.files || [],
59
+ ecoMode: !!process.env.HARNESS_ECO,
60
+ riskLevel: classifyRisk({ task: opts.task || '', files: opts.context?.files || [] }).risk,
61
+ harnessRoot,
62
+ });
63
+ decision.provider = provider;
64
+ decision.model = fm.model;
65
+ routeTrace(opts.sessionDir, decision, { stage: opts.stage, task: opts.task });
66
+ } catch { /* trace 실패는 dispatch 자체를 막지 않음 */ }
67
+ }
68
+
69
+ const startTs = Date.now();
70
+ const result = await runner({
71
+ agent: fm.name,
72
+ stage: opts.stage,
73
+ task: opts.task,
74
+ model: fm.model,
75
+ sandbox: opts.sandboxOverride || fm.sandbox,
76
+ networkAccess: fm.network_access,
77
+ disallowedTools: fm.disallowedTools || [],
78
+ promptBody: body,
79
+ context: opts.context || {},
80
+ harnessRoot,
81
+ projectRoot,
82
+ executionMode: opts.executionMode,
83
+ });
84
+ const durMs = Date.now() - startTs;
85
+
86
+ // cost record (mock 도 0 으로 기록 — 호출 카운트 가시성)
87
+ try {
88
+ costRecord({
89
+ session: opts.sessionId || 'default',
90
+ stage: opts.stage,
91
+ agent: fm.name,
92
+ provider,
93
+ model: fm.model,
94
+ input_tokens: result.usage?.input_tokens || 0,
95
+ output_tokens: result.usage?.output_tokens || 0,
96
+ duration_ms: durMs,
97
+ });
98
+ } catch { /* 비용 기록 실패는 무시 */ }
99
+
100
+ // 표준화 + 메타데이터 부착. 런너의 임의 필드는 통과시키지 않고,
101
+ // orchestrator 가 명시적으로 쓰는 메타데이터만 보존한다.
102
+ const standardKeys = new Set([
103
+ 'decided','rejected','risks','files','remaining','issues','verdict','confidence','usage',
104
+ ]);
105
+ const passthroughKeys = new Set(['prdSeed', 'diffPath', 'executionWorkspace']);
106
+ const passthrough = {};
107
+ for (const [k, v] of Object.entries(result || {})) {
108
+ if (!standardKeys.has(k) && passthroughKeys.has(k)) passthrough[k] = v;
109
+ }
110
+
111
+ const handoff = {
112
+ stage: opts.stage,
113
+ agent: fm.name,
114
+ round: opts.context?.round || 1,
115
+ session_id: opts.sessionId || undefined,
116
+ timestamp: new Date().toISOString(),
117
+ duration_ms: durMs,
118
+ provider,
119
+ model: fm.model,
120
+ decided: result.decided ?? '',
121
+ rejected: result.rejected ?? '',
122
+ risks: result.risks ?? '',
123
+ files: result.files ?? [],
124
+ remaining: result.remaining ?? '',
125
+ issues: result.issues ?? [],
126
+ verdict: result.verdict,
127
+ ...passthrough,
128
+ };
129
+ if (result.confidence != null) handoff.confidence = result.confidence;
130
+ for (const [k, v] of Object.entries(handoff)) {
131
+ if (v === undefined) delete handoff[k];
132
+ }
133
+ return handoff;
134
+ }
135
+
136
+ /** agent.md frontmatter 파싱. 파일이 없거나 frontmatter 없으면 null 반환. */
137
+ export function loadAgentFrontmatter(agentName, root = process.cwd()) {
138
+ const f = path.join(root, 'agents', `${agentName}.md`);
139
+ if (!fs.existsSync(f)) return null;
140
+ const raw = fs.readFileSync(f, 'utf8');
141
+ const m = raw.match(/^---\s*\n([\s\S]*?)\n---/);
142
+ if (!m) return null;
143
+ return YAML.parse(m[1]);
144
+ }
@@ -0,0 +1,214 @@
1
+ // Claude runner.
2
+ // Default live mode uses the local Claude Code CLI subscription/OAuth session.
3
+ // Set HARNESS_CLAUDE_RUNNER=sdk to opt into Anthropic SDK/API-key mode.
4
+
5
+ import { assertDelegatedCliAuth } from '../../core/auth-guard.js';
6
+ import { resolveProviderCli } from '../../core/cli-resolver.js';
7
+ import { withGitMutationGuard } from '../../core/git-mutation-guard.js';
8
+ import { extractJson } from '../../core/json-extractor.js';
9
+ import { spawnAndCollect } from '../../core/subprocess.js';
10
+
11
+ const MODEL_MAP = {
12
+ opus: 'claude-opus-4-7',
13
+ sonnet: 'claude-sonnet-4-6',
14
+ haiku: 'claude-haiku-4-5-20251001',
15
+ };
16
+
17
+ export async function runClaude(args) {
18
+ const runner = (process.env.HARNESS_CLAUDE_RUNNER || 'cli').toLowerCase();
19
+ if (runner === 'sdk') return runClaudeSdk(args);
20
+ return runClaudeCli(args);
21
+ }
22
+
23
+ async function runClaudeSdk(args) {
24
+ if (!process.env.ANTHROPIC_API_KEY) {
25
+ throw new Error('ANTHROPIC_API_KEY is required when HARNESS_CLAUDE_RUNNER=sdk. Use Claude Code CLI login for the default runner.');
26
+ }
27
+
28
+ let Anthropic;
29
+ try {
30
+ ({ default: Anthropic } = await import('@anthropic-ai/sdk'));
31
+ } catch {
32
+ throw new Error('@anthropic-ai/sdk is not installed. Install it or use the default Claude Code CLI runner.');
33
+ }
34
+
35
+ const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
36
+ const modelId = MODEL_MAP[args.model] || args.model;
37
+ const systemPrompt = buildSystem(args);
38
+ const userPrompt = buildUserMessage(args);
39
+
40
+ const resp = await client.messages.create({
41
+ model: modelId,
42
+ max_tokens: 4096,
43
+ system: systemPrompt,
44
+ messages: [{ role: 'user', content: userPrompt }],
45
+ });
46
+
47
+ const text = resp.content.map(b => (b.type === 'text' ? b.text : '')).join('').trim();
48
+ const jsonText = extractJson(text);
49
+ if (!jsonText) {
50
+ throw new Error('Claude SDK response did not contain JSON. raw:\n' + text.slice(0, 500));
51
+ }
52
+
53
+ let parsed;
54
+ try { parsed = JSON.parse(jsonText); }
55
+ catch (e) { throw new Error('Claude SDK response JSON parse failed: ' + e.message); }
56
+
57
+ return parsed;
58
+ }
59
+
60
+ async function runClaudeCli(args) {
61
+ assertDelegatedCliAuth('claude');
62
+
63
+ const cwd = args.projectRoot || args.harnessRoot || process.cwd();
64
+ const trustRoots = [cwd, args.harnessRoot].filter(Boolean);
65
+ const claudeBin = resolveProviderCli('claude', { root: cwd, roots: trustRoots });
66
+ if (!claudeBin) {
67
+ throw new Error('claude CLI is not installed. Install/login to Claude Code, or explicitly use HARNESS_CLAUDE_RUNNER=sdk with ANTHROPIC_API_KEY.');
68
+ }
69
+
70
+ const systemPrompt = buildSystem(args);
71
+ const userPrompt = buildUserMessage(args);
72
+ const modelId = process.env.HARNESS_CLAUDE_MODEL || args.model || 'sonnet';
73
+ const cliArgs = buildCliArgs(args, modelId, systemPrompt);
74
+
75
+ const run = () => spawnAndCollect(claudeBin, cliArgs, userPrompt, {
76
+ label: 'claude',
77
+ // 풀사이클 stage 3 implement 는 verify smoke(~25s) 보다 응답이 길어
78
+ // 180s default 로는 timeout 다발. 600s (10분) 로 상향. 환경변수로 추가 조정 가능.
79
+ timeoutMs: Number(process.env.HARNESS_CLAUDE_TIMEOUT_S || 600) * 1000,
80
+ cwd,
81
+ });
82
+ const stdout = args.executionMode === 'workspace-write'
83
+ ? await run()
84
+ : await withGitMutationGuard(
85
+ cwd,
86
+ run,
87
+ { label: 'claude', allowEnvKey: 'HARNESS_CLAUDE_ALLOW_WORKSPACE_MUTATION' },
88
+ );
89
+ const wrapper = parseCliJson(stdout);
90
+ const text = typeof wrapper?.result === 'string' ? wrapper.result : stdout;
91
+ const jsonText = extractJson(text);
92
+ if (!jsonText) {
93
+ throw new Error('Claude CLI response did not contain JSON. raw:\n' + text.slice(0, 500));
94
+ }
95
+
96
+ let parsed;
97
+ try { parsed = JSON.parse(jsonText); }
98
+ catch (e) { throw new Error('Claude CLI response JSON parse failed: ' + e.message); }
99
+
100
+ if (wrapper?.usage) parsed.usage = normalizeCliUsage(wrapper.usage);
101
+ return parsed;
102
+ }
103
+
104
+ function buildCliArgs(a, modelId, systemPrompt) {
105
+ const args = [
106
+ '-p',
107
+ '--output-format', 'json',
108
+ '--no-session-persistence',
109
+ '--model', modelId,
110
+ '--system-prompt', systemPrompt,
111
+ ];
112
+
113
+ if (a.executionMode === 'workspace-write') {
114
+ args.push(
115
+ '--permission-mode', process.env.HARNESS_CLAUDE_EXEC_PERMISSION_MODE || 'acceptEdits',
116
+ '--allowedTools', process.env.HARNESS_CLAUDE_EXEC_TOOLS || 'Edit Write MultiEdit',
117
+ );
118
+ } else {
119
+ args.push('--tools', '', '--permission-mode', 'plan');
120
+ }
121
+
122
+ return args;
123
+ }
124
+
125
+ function buildSystem(a) {
126
+ const tools = a.disallowedTools?.length
127
+ ? `\nDisallowed tools: ${a.disallowedTools.join(', ')}`
128
+ : '';
129
+ return `You are the HARNESS agent "${a.agent}" running stage "${a.stage}".${tools}
130
+ Sandbox: ${a.sandbox || 'workspace-write'}.
131
+ Output rules: respond with ONE JSON object conforming to schemas/handoff.schema.json.
132
+ No prose outside JSON. Korean for natural-language fields.
133
+ ${a.executionMode === 'workspace-write'
134
+ ? 'Workspace-write execution mode: edit files in this isolated git worktree if needed, but do not commit or push. Finish by returning the JSON handoff with changed files and evidence.'
135
+ : 'Non-interactive handoff mode: do not call tools, edit files, run shell commands, wait for approvals, or make commits. If the agent body asks you to implement, test, or commit, summarize the intended change and evidence in JSON only.'}
136
+ Keep the JSON concise so the CLI can finish promptly.
137
+
138
+ Agent body:
139
+ ${a.promptBody}`;
140
+ }
141
+
142
+ function buildUserMessage(a) {
143
+ const lines = [];
144
+ lines.push('## Task');
145
+ lines.push(a.task || '(none)');
146
+ lines.push('');
147
+ if (a.context?.prd) {
148
+ lines.push('## PRD');
149
+ lines.push('```json');
150
+ lines.push(JSON.stringify(a.context.prd, null, 2));
151
+ lines.push('```');
152
+ }
153
+ if (a.context?.acceptanceCriteria?.length) {
154
+ lines.push('## Acceptance Criteria');
155
+ for (const ac of a.context.acceptanceCriteria) {
156
+ lines.push(`- ${ac.id}: ${ac.desc}`);
157
+ }
158
+ lines.push('');
159
+ }
160
+ if (a.context?.qualityChecklist?.length) {
161
+ lines.push(`## Profile Quality Checklist${a.context.profile ? ` (${a.context.profile})` : ''}`);
162
+ for (const item of a.context.qualityChecklist) {
163
+ lines.push(`- ${item}`);
164
+ }
165
+ lines.push('');
166
+ }
167
+ if (a.context?.diff) {
168
+ lines.push('## Git Diff');
169
+ lines.push('```diff');
170
+ lines.push(String(a.context.diff).slice(0, 20000));
171
+ lines.push('```');
172
+ }
173
+ if (a.context?.priorHandoffs?.length) {
174
+ lines.push('## Prior handoffs');
175
+ for (const h of a.context.priorHandoffs) {
176
+ lines.push(`### ${h.stage}`);
177
+ lines.push(`Decided: ${h.decided}`);
178
+ lines.push(`Files: ${(h.files || []).join(', ')}`);
179
+ if (h.verdict) lines.push(`Verdict: ${h.verdict}`);
180
+ lines.push('');
181
+ }
182
+ }
183
+ if (a.context?.round && a.context.round > 1) {
184
+ lines.push(`## Round ${a.context.round}: consider unresolved issues from earlier rounds.`);
185
+ }
186
+ return lines.join('\n');
187
+ }
188
+
189
+ function parseCliJson(stdout) {
190
+ const text = String(stdout || '').trim();
191
+ if (!text) return null;
192
+ try { return JSON.parse(text); }
193
+ catch { return null; }
194
+ }
195
+
196
+ function normalizeCliUsage(usage) {
197
+ const last = Array.isArray(usage.iterations) ? usage.iterations.at(-1) : null;
198
+ return {
199
+ input_tokens: Number(last?.input_tokens ?? usage.input_tokens ?? 0),
200
+ output_tokens: Number(last?.output_tokens ?? usage.output_tokens ?? 0),
201
+ cache_creation_input_tokens: Number(last?.cache_creation_input_tokens ?? usage.cache_creation_input_tokens ?? 0),
202
+ cache_read_input_tokens: Number(last?.cache_read_input_tokens ?? usage.cache_read_input_tokens ?? 0),
203
+ total_cost_usd: Number(usage.total_cost_usd ?? 0),
204
+ };
205
+ }
206
+
207
+ export {
208
+ buildSystem as _buildSystem,
209
+ buildCliArgs as _buildCliArgs,
210
+ buildUserMessage as _buildUserMessage,
211
+ extractJson,
212
+ parseCliJson as _parseCliJson,
213
+ normalizeCliUsage as _normalizeCliUsage,
214
+ };
@@ -0,0 +1,233 @@
1
+ // Codex runner: OpenAI Codex CLI 를 subprocess 로 호출.
2
+ // 환경: codex 바이너리 필요. 없으면 throw.
3
+ //
4
+ // 호출 패턴 (codex 0.124.0+ 비대화형 검증):
5
+ // codex exec --sandbox read-only [--profile <name>] < prompt
6
+ // stdin 으로 prompt 전달, stdout 의 `codex` 라벨 다음 JSON 객체를 응답으로 사용.
7
+ //
8
+ // Codex 는 Claude 컨텍스트를 받지 않는다. 입력은:
9
+ // - system prompt (codex-reviewer 페르소나)
10
+ // - git diff
11
+ // - handoffs/04-self-review.md (Claude self-review 5필드 요약)
12
+ // - prd-<id>.md
13
+ //
14
+ // 출력: stdout 의 JSON. 5필드 + issues + verdict.
15
+
16
+ import { assertDelegatedCliAuth } from '../../core/auth-guard.js';
17
+ import { resolveProviderCli } from '../../core/cli-resolver.js';
18
+ import { withGitMutationGuard } from '../../core/git-mutation-guard.js';
19
+ import { extractJson } from '../../core/json-extractor.js';
20
+ import { spawnAndCollect } from '../../core/subprocess.js';
21
+ import { classifyCategory, classifySeverity, deriveVerdict, severityCounts } from '../../lib/severity.js';
22
+
23
+ export async function runCodex(args) {
24
+ assertDelegatedCliAuth('codex');
25
+
26
+ const cwd = args.projectRoot || args.harnessRoot || process.cwd();
27
+ const trustRoots = [cwd, args.harnessRoot].filter(Boolean);
28
+ const codexBin = resolveProviderCli('codex', { root: cwd, roots: trustRoots });
29
+ if (!codexBin) {
30
+ throw new Error('codex CLI 미설치. https://github.com/openai/codex 또는 --provider=mock 사용.');
31
+ }
32
+
33
+ const stage = args.stage === 'codex-challenge' ? 'challenge' : 'review';
34
+ const promptText = buildPrompt(args);
35
+
36
+ // codex 0.124.0+ 비대화형 모드: `codex exec` + 명시적 sandbox.
37
+ // 인증·CLI 마찰을 줄이기 위해 stdin 으로 직접 prompt 전달.
38
+ const cliArgs = ['exec', '--sandbox', 'read-only'];
39
+
40
+ // profile 은 사용자의 `~/.codex/config.toml` 의존이므로 환경변수로 옵션화.
41
+ // stage 별 분리: HARNESS_CODEX_PROFILE_REVIEW / HARNESS_CODEX_PROFILE_CHALLENGE.
42
+ // 또는 공통: HARNESS_CODEX_PROFILE.
43
+ const profile = process.env[`HARNESS_CODEX_PROFILE_${stage.toUpperCase()}`]
44
+ || process.env.HARNESS_CODEX_PROFILE;
45
+ if (profile) {
46
+ cliArgs.push('--profile', profile);
47
+ }
48
+ if (process.env.HARNESS_CODEX_EXTRA_ARGS) {
49
+ cliArgs.push(...process.env.HARNESS_CODEX_EXTRA_ARGS.split(' '));
50
+ }
51
+
52
+ const stdout = await withGitMutationGuard(
53
+ cwd,
54
+ () => spawnAndCollect(codexBin, cliArgs, promptText, {
55
+ label: 'codex',
56
+ timeoutMs: Number(process.env.HARNESS_CODEX_TIMEOUT_S || 180) * 1000,
57
+ cwd,
58
+ }),
59
+ { label: 'codex', allowEnvKey: 'HARNESS_CODEX_ALLOW_WORKSPACE_MUTATION' },
60
+ );
61
+ // codex CLI 0.125+ stdout: "user\n<prompt echo>\n\ncodex\n<응답>".
62
+ // echo 된 user prompt 에 ```json``` 펜스가 있으면 extractJson 이 오매칭하므로,
63
+ // "codex" 라벨 (단독 줄) 이후만 파싱한다.
64
+ const labelMatch = stdout.match(/(^|\n)codex\r?\n/);
65
+ const cleaned = labelMatch
66
+ ? stdout.slice(labelMatch.index + labelMatch[0].length)
67
+ : stdout;
68
+ const json = extractJson(cleaned);
69
+ if (!json) {
70
+ throw new Error('Codex 응답에서 JSON 을 찾지 못함. raw:\n' + stdout.slice(0, 500));
71
+ }
72
+ return normalizeHandoff(JSON.parse(json));
73
+ }
74
+
75
+ function buildPrompt(a) {
76
+ const lines = [];
77
+ if (a.stage === 'codex-review') {
78
+ lines.push('# 시스템 프롬프트');
79
+ lines.push('당신은 이 변경을 처음 보는 시니어 리뷰어다. Claude self-review 가 놓쳤을 critical / high 만 보고하라.');
80
+ lines.push('');
81
+ } else if (a.stage === 'codex-challenge') {
82
+ lines.push('# 시스템 프롬프트');
83
+ lines.push('당신은 적대적 보안 리서처다. 이 코드를 부수려 들어라. 구체적 공격 시나리오를 issue.why 에 기술하라.');
84
+ lines.push('');
85
+ }
86
+ lines.push('출력은 schemas/handoff.schema.json 에 부합하는 JSON 객체 하나.');
87
+ lines.push('');
88
+ lines.push('# 입력');
89
+ if (a.context?.diff) {
90
+ lines.push('## Git Diff');
91
+ lines.push('```diff');
92
+ lines.push(String(a.context.diff).slice(0, 30000));
93
+ lines.push('```');
94
+ }
95
+ if (a.context?.acceptanceCriteria?.length) {
96
+ lines.push('## Acceptance Criteria');
97
+ for (const ac of a.context.acceptanceCriteria) {
98
+ lines.push(`- ${ac.id}: ${ac.desc}`);
99
+ }
100
+ }
101
+ if (a.context?.qualityChecklist?.length) {
102
+ lines.push(`## Profile Quality Checklist${a.context.profile ? ` (${a.context.profile})` : ''}`);
103
+ for (const item of a.context.qualityChecklist) {
104
+ lines.push(`- ${item}`);
105
+ }
106
+ }
107
+ if (a.context?.evidencePolicy?.evidenceWarningRequired) {
108
+ lines.push('## Evidence Requirements');
109
+ lines.push('For critical, high, or gate-required findings, include claim, evidence, required_fix, confidence, and gate_required.');
110
+ if (a.context.evidencePolicy.strictQuality) {
111
+ lines.push('Strict quality mode is active: missing evidence or acceptance coverage can block ship readiness.');
112
+ }
113
+ }
114
+ if (a.context?.priorHandoffs?.length) {
115
+ lines.push('## 이전 단계 핸드오프 (5필드만)');
116
+ for (const h of a.context.priorHandoffs) {
117
+ lines.push(`### ${h.stage}`);
118
+ lines.push(`- Decided: ${h.decided}`);
119
+ lines.push(`- Files: ${(h.files || []).join(', ')}`);
120
+ if (h.verdict) lines.push(`- Verdict: ${h.verdict}`);
121
+ }
122
+ }
123
+ if (a.context?.prd) {
124
+ lines.push('## PRD');
125
+ lines.push('```json');
126
+ lines.push(JSON.stringify(a.context.prd, null, 2));
127
+ lines.push('```');
128
+ }
129
+ lines.push('');
130
+ lines.push('Review issue fields should be evidence-based when possible: claim, evidence, required_fix, confidence, gate_required.');
131
+ return lines.join('\n');
132
+ }
133
+
134
+ function normalizeHandoff(raw) {
135
+ if (!raw || typeof raw !== 'object') return raw;
136
+
137
+ const pick = (...keys) => {
138
+ for (const key of keys) {
139
+ if (raw[key] !== undefined) return raw[key];
140
+ }
141
+ return undefined;
142
+ };
143
+
144
+ const rawIssues = pick('issues', 'Issues');
145
+ const rawRisks = pick('risks', 'Risks');
146
+ const issueSource = Array.isArray(rawIssues) ? rawIssues : (Array.isArray(rawRisks) ? rawRisks : []);
147
+ const issues = issueSource.map(normalizeIssue);
148
+
149
+ // verdict 결정 전에 confidence / blastRadius 추출 — deriveVerdict opts 로 전달.
150
+ const confRaw = pick('confidence', 'Confidence');
151
+ const confNum = confRaw != null ? Number(confRaw) : NaN;
152
+ const confidence = Number.isFinite(confNum) ? confNum : undefined;
153
+ const filesArr = normalizeFiles(pick('files', 'Files'));
154
+ const blastRadius = filesArr.length;
155
+
156
+ const lower = {
157
+ decided: stringifyField(pick('decided', 'Decided', 'decision', 'Decision')),
158
+ rejected: stringifyField(pick('rejected', 'Rejected')),
159
+ risks: stringifyField(Array.isArray(rawRisks) ? rawRisks.map(r => r.issue || r.summary || r.message || JSON.stringify(r)).join('; ') : rawRisks),
160
+ files: filesArr,
161
+ remaining: stringifyField(pick('remaining', 'Remaining')),
162
+ issues,
163
+ verdict: normalizeVerdict(pick('verdict', 'Verdict'), issues, pick('decided', 'Decided'), { confidence, blastRadius }),
164
+ };
165
+
166
+ if (confidence !== undefined) lower.confidence = confidence;
167
+
168
+ return lower;
169
+ }
170
+
171
+ function normalizeIssue(issue) {
172
+ const i = issue && typeof issue === 'object' ? issue : { summary: String(issue || '') };
173
+ const summary = String(i.summary || i.claim || i.issue || i.message || i.title || '').slice(0, 200) || 'Codex reported an issue';
174
+ const confidence = Number(i.confidence);
175
+ const normalized = {
176
+ severity: i.severity,
177
+ category: i.category,
178
+ file: i.file || i.path,
179
+ line: Number.isInteger(i.line) ? i.line : undefined,
180
+ claim: i.claim,
181
+ evidence: i.evidence,
182
+ summary,
183
+ why: i.why || i.issue || i.message,
184
+ required_fix: i.required_fix ?? undefined,
185
+ suggested_fix: i.suggested_fix ?? i.fix ?? null,
186
+ confidence: Number.isFinite(confidence) ? confidence : undefined,
187
+ gate_required: typeof i.gate_required === 'boolean' ? i.gate_required : undefined,
188
+ };
189
+ normalized.category = classifyCategory(normalized);
190
+ normalized.severity = classifySeverity(normalized);
191
+ for (const key of Object.keys(normalized)) {
192
+ if (normalized[key] === undefined) delete normalized[key];
193
+ }
194
+ return normalized;
195
+ }
196
+
197
+ function normalizeFiles(files) {
198
+ if (!files) return [];
199
+ if (Array.isArray(files)) return files.map(String);
200
+ return [String(files)];
201
+ }
202
+
203
+ function stringifyField(value) {
204
+ if (value == null) return '';
205
+ if (typeof value === 'string') return value;
206
+ return JSON.stringify(value);
207
+ }
208
+
209
+ function normalizeVerdict(verdict, issues, decided, opts = {}) {
210
+ const v = String(verdict || '').toLowerCase();
211
+ let result;
212
+ if (['block', 'approve_with_fixes', 'approve'].includes(v)) {
213
+ result = v;
214
+ } else if (['request_changes', 'changes_requested', 'fix', 'gate'].includes(v)) {
215
+ result = deriveVerdict(
216
+ issues.length ? issues : [{ severity: 'high', category: 'correctness', summary: String(decided || 'changes requested') }],
217
+ opts,
218
+ );
219
+ } else {
220
+ result = deriveVerdict(issues, opts);
221
+ }
222
+
223
+ // 보수 강등: 명시 verdict 가 approve 류라도 high > 5 / confidence < 0.6 이면 block.
224
+ // codex 가 자신감 없게 approve 한 경우의 안전망.
225
+ if (result !== 'block') {
226
+ const c = severityCounts(issues);
227
+ if (c.high > 5) return 'block';
228
+ if (typeof opts.confidence === 'number' && opts.confidence < 0.6) return 'block';
229
+ }
230
+ return result;
231
+ }
232
+
233
+ export { buildPrompt as _buildPrompt, extractJson, normalizeHandoff as _normalizeHandoff };
@@ -0,0 +1,92 @@
1
+ // Gemini runner: calls the local Gemini CLI subprocess.
2
+ // Default auth is delegated to the user's local gemini/gcloud session.
3
+
4
+ import { assertDelegatedCliAuth } from '../../core/auth-guard.js';
5
+ import { resolveProviderCli } from '../../core/cli-resolver.js';
6
+ import { withGitMutationGuard } from '../../core/git-mutation-guard.js';
7
+ import { extractJson } from '../../core/json-extractor.js';
8
+ import { spawnAndCollect } from '../../core/subprocess.js';
9
+
10
+ export async function runGemini(args) {
11
+ assertDelegatedCliAuth('gemini');
12
+
13
+ const cwd = args.projectRoot || args.harnessRoot || process.cwd();
14
+ const trustRoots = [cwd, args.harnessRoot].filter(Boolean);
15
+ const bin = resolveProviderCli('gemini', { root: cwd, roots: trustRoots });
16
+ if (!bin) {
17
+ throw new Error('gemini CLI is not installed. Install/login to Gemini CLI, or use --provider=mock.');
18
+ }
19
+
20
+ const prompt = buildPrompt(args);
21
+ const cliArgs = buildCliArgs(args);
22
+ const stdout = await withGitMutationGuard(
23
+ cwd,
24
+ () => spawnAndCollect(bin, cliArgs, prompt, {
25
+ label: 'gemini',
26
+ timeoutMs: Number(process.env.HARNESS_GEMINI_TIMEOUT_S || 120) * 1000,
27
+ cwd,
28
+ }),
29
+ { label: 'gemini', allowEnvKey: 'HARNESS_GEMINI_ALLOW_WORKSPACE_MUTATION' },
30
+ );
31
+
32
+ return parseGeminiOutput(stdout);
33
+ }
34
+
35
+ function buildCliArgs(a) {
36
+ const args = [
37
+ '--prompt',
38
+ 'Use the instructions provided on stdin. Return only the requested JSON.',
39
+ '--output-format',
40
+ 'json',
41
+ '--approval-mode',
42
+ 'plan',
43
+ '--skip-trust',
44
+ ];
45
+
46
+ const model = process.env.HARNESS_GEMINI_MODEL || a.model;
47
+ if (model) args.push('--model', model);
48
+ return args;
49
+ }
50
+
51
+ function parseGeminiOutput(stdout) {
52
+ const parsed = parseOuterJson(stdout);
53
+ if (parsed && typeof parsed.response === 'string') {
54
+ const responseJson = extractJson(parsed.response);
55
+ if (!responseJson) {
56
+ throw new Error('Gemini JSON wrapper did not contain handoff JSON in response. raw:\n' + parsed.response.slice(0, 500));
57
+ }
58
+ return JSON.parse(responseJson);
59
+ }
60
+
61
+ return parsed;
62
+ }
63
+
64
+ function parseOuterJson(stdout) {
65
+ const text = String(stdout || '').trim();
66
+ if (!text) throw new Error('Gemini response did not contain JSON. raw:\n');
67
+
68
+ try {
69
+ return JSON.parse(text);
70
+ } catch {}
71
+
72
+ const json = extractJson(text);
73
+ if (!json) throw new Error('Gemini response did not contain JSON. raw:\n' + text.slice(0, 500));
74
+ return JSON.parse(json);
75
+ }
76
+
77
+ function buildPrompt(a) {
78
+ return [
79
+ `# System: HARNESS agent "${a.agent}" stage "${a.stage}".`,
80
+ 'Output exactly one JSON object shaped like schemas/handoff.schema.json.',
81
+ `Sandbox: ${a.sandbox || 'read-only'}.`,
82
+ 'Non-interactive handoff mode: do not call tools, edit files, run shell commands, wait for approvals, or make commits.',
83
+ 'No prose outside JSON. Korean for natural-language fields unless the task asks otherwise.',
84
+ '',
85
+ `# Task: ${a.task || '(none)'}`,
86
+ a.promptBody ? '## Agent Body\n' + a.promptBody : '',
87
+ a.context?.diff ? '## Git Diff\n```diff\n' + String(a.context.diff).slice(0, 30000) + '\n```' : '',
88
+ a.context?.prd ? '## PRD\n```json\n' + JSON.stringify(a.context.prd, null, 2) + '\n```' : '',
89
+ ].filter(Boolean).join('\n');
90
+ }
91
+
92
+ export { buildPrompt as _buildPrompt, buildCliArgs as _buildCliArgs, parseGeminiOutput as _parseGeminiOutput };