@maintainabilityai/research-runner 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +82 -0
- package/bin/research-runner.js +2 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +209 -0
- package/dist/llm/anthropic-client.d.ts +39 -0
- package/dist/llm/anthropic-client.js +74 -0
- package/dist/llm/github-models-client.d.ts +46 -0
- package/dist/llm/github-models-client.js +78 -0
- package/dist/llm/llm-router.d.ts +46 -0
- package/dist/llm/llm-router.js +60 -0
- package/dist/mesh/get-mesh-sha.d.ts +1 -0
- package/dist/mesh/get-mesh-sha.js +27 -0
- package/dist/mesh/mesh-reader.d.ts +14 -0
- package/dist/mesh/mesh-reader.js +392 -0
- package/dist/mesh/prompt-loader.d.ts +22 -0
- package/dist/mesh/prompt-loader.js +119 -0
- package/dist/mesh/threat-model-reader.d.ts +33 -0
- package/dist/mesh/threat-model-reader.js +123 -0
- package/dist/runner/archeologist.d.ts +39 -0
- package/dist/runner/archeologist.js +620 -0
- package/dist/runner/audit-emitter.d.ts +62 -0
- package/dist/runner/audit-emitter.js +210 -0
- package/dist/runner/hatters-tag-builder.d.ts +52 -0
- package/dist/runner/hatters-tag-builder.js +40 -0
- package/dist/runner/nodes/analyze-architecture.d.ts +10 -0
- package/dist/runner/nodes/analyze-architecture.js +447 -0
- package/dist/runner/nodes/arxiv-search.d.ts +12 -0
- package/dist/runner/nodes/arxiv-search.js +52 -0
- package/dist/runner/nodes/clone-and-index.d.ts +32 -0
- package/dist/runner/nodes/clone-and-index.js +158 -0
- package/dist/runner/nodes/dedupe-and-rank.d.ts +27 -0
- package/dist/runner/nodes/dedupe-and-rank.js +98 -0
- package/dist/runner/nodes/deterministic-review.d.ts +55 -0
- package/dist/runner/nodes/deterministic-review.js +206 -0
- package/dist/runner/nodes/expert-review.d.ts +68 -0
- package/dist/runner/nodes/expert-review.js +197 -0
- package/dist/runner/nodes/gap-analysis.d.ts +48 -0
- package/dist/runner/nodes/gap-analysis.js +153 -0
- package/dist/runner/nodes/generate-prd-manifest.d.ts +53 -0
- package/dist/runner/nodes/generate-prd-manifest.js +209 -0
- package/dist/runner/nodes/hackernews-search.d.ts +12 -0
- package/dist/runner/nodes/hackernews-search.js +63 -0
- package/dist/runner/nodes/identify-gaps.d.ts +33 -0
- package/dist/runner/nodes/identify-gaps.js +185 -0
- package/dist/runner/nodes/plan-queries.d.ts +28 -0
- package/dist/runner/nodes/plan-queries.js +120 -0
- package/dist/runner/nodes/prd-validator.d.ts +51 -0
- package/dist/runner/nodes/prd-validator.js +203 -0
- package/dist/runner/nodes/synthesis-archaeology-validator.d.ts +22 -0
- package/dist/runner/nodes/synthesis-archaeology-validator.js +131 -0
- package/dist/runner/nodes/synthesis-validator.d.ts +51 -0
- package/dist/runner/nodes/synthesis-validator.js +185 -0
- package/dist/runner/nodes/synthesize-prd.d.ts +84 -0
- package/dist/runner/nodes/synthesize-prd.js +202 -0
- package/dist/runner/nodes/synthesize-report.d.ts +53 -0
- package/dist/runner/nodes/synthesize-report.js +188 -0
- package/dist/runner/nodes/tavily-search.d.ts +21 -0
- package/dist/runner/nodes/tavily-search.js +57 -0
- package/dist/runner/nodes/uspto-search.d.ts +13 -0
- package/dist/runner/nodes/uspto-search.js +62 -0
- package/dist/runner/nodes/verify-grounding.d.ts +54 -0
- package/dist/runner/nodes/verify-grounding.js +134 -0
- package/dist/runner/prd.d.ts +28 -0
- package/dist/runner/prd.js +494 -0
- package/dist/schemas/audit-event.d.ts +1151 -0
- package/dist/schemas/audit-event.js +141 -0
- package/dist/schemas/index.d.ts +17 -0
- package/dist/schemas/index.js +33 -0
- package/dist/schemas/mesh-context.d.ts +415 -0
- package/dist/schemas/mesh-context.js +95 -0
- package/dist/schemas/observed-architecture.d.ts +262 -0
- package/dist/schemas/observed-architecture.js +90 -0
- package/dist/schemas/prd-brief.d.ts +111 -0
- package/dist/schemas/prd-brief.js +37 -0
- package/dist/schemas/prd-doc.d.ts +249 -0
- package/dist/schemas/prd-doc.js +42 -0
- package/dist/schemas/prd-manifest.d.ts +171 -0
- package/dist/schemas/prd-manifest.js +73 -0
- package/dist/schemas/primitives.d.ts +47 -0
- package/dist/schemas/primitives.js +41 -0
- package/dist/schemas/query-plan.d.ts +33 -0
- package/dist/schemas/query-plan.js +25 -0
- package/dist/schemas/ranked-source.d.ts +82 -0
- package/dist/schemas/ranked-source.js +29 -0
- package/dist/schemas/research-brief.d.ts +114 -0
- package/dist/schemas/research-brief.js +49 -0
- package/dist/schemas/research-doc.d.ts +104 -0
- package/dist/schemas/research-doc.js +37 -0
- package/dist/search/arxiv-client.d.ts +41 -0
- package/dist/search/arxiv-client.js +88 -0
- package/dist/search/hackernews-client.d.ts +33 -0
- package/dist/search/hackernews-client.js +44 -0
- package/dist/search/provider-result.d.ts +25 -0
- package/dist/search/provider-result.js +2 -0
- package/dist/search/tavily-client.d.ts +38 -0
- package/dist/search/tavily-client.js +53 -0
- package/dist/search/uspto-client.d.ts +50 -0
- package/dist/search/uspto-client.js +112 -0
- package/dist/utils/run-id.d.ts +2 -0
- package/dist/utils/run-id.js +22 -0
- package/package.json +53 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 MaintainabilityAI Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# @maintainabilityai/research-runner
|
|
2
|
+
|
|
3
|
+
CLI that orchestrates the **Archeologist** (research) and **PRD** agent pipelines
|
|
4
|
+
for the MaintainabilityAI governance mesh. Invoked by the Looking Glass-scaffolded
|
|
5
|
+
GitHub Actions workflows in the mesh repo (`archeologist.yml`, `prd.yml`).
|
|
6
|
+
|
|
7
|
+
See [docs/design/research-and-prd-agents.md](../../docs/design/research-and-prd-agents.md)
|
|
8
|
+
for the full pipeline design.
|
|
9
|
+
|
|
10
|
+
## Status
|
|
11
|
+
|
|
12
|
+
**v0.1 — Phase 1**: package scaffold, Zod schemas, audit emitter (hash-chained
|
|
13
|
+
JSONL), Hatter's Tag builder, stub orchestrators. **No LLM or search-API calls
|
|
14
|
+
yet** — those land in subsequent phases.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install -g @maintainabilityai/research-runner
|
|
20
|
+
# or run via npx in CI:
|
|
21
|
+
npx research-runner archeologist --brief "..." --scope-level bar --scope-id APP-IMDB-002
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## CLI
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
research-runner archeologist [options]
|
|
28
|
+
--brief <text> Plain-English research brief (required)
|
|
29
|
+
--scope-level portfolio|platform|bar
|
|
30
|
+
--scope-id <id> BAR or platform id (required when scope is platform|bar)
|
|
31
|
+
--path research|archaeology (default: research)
|
|
32
|
+
--target-repo owner/repo (archaeology path only)
|
|
33
|
+
--guardrails strict|default|lenient (default: default)
|
|
34
|
+
--output <dir> Where to write artifacts (default: ./research)
|
|
35
|
+
--audit <dir> Where to write JSONL audit log (default: ./.research-audit)
|
|
36
|
+
--emit-pr-body <path> Write the PR body markdown to this path
|
|
37
|
+
--mesh <dir> Mesh repo root (default: cwd)
|
|
38
|
+
|
|
39
|
+
research-runner prd [options]
|
|
40
|
+
--research-pr <url|path> Merged research PR url or research doc path
|
|
41
|
+
--scope-level portfolio|platform|bar
|
|
42
|
+
--scope-id <id>
|
|
43
|
+
--mode shallow|deep (default: deep)
|
|
44
|
+
--grounding strict|default|lenient (default: default)
|
|
45
|
+
--max-iterations <n> (default: 3)
|
|
46
|
+
--output <dir> Where to write artifacts (default: ./prds)
|
|
47
|
+
--audit <dir>
|
|
48
|
+
--emit-pr-body <path>
|
|
49
|
+
--mesh <dir>
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Architecture
|
|
53
|
+
|
|
54
|
+
- `src/schemas/` Zod schemas for every input/output shape
|
|
55
|
+
- `src/runner/` Pipeline orchestrators + cross-cutting utilities (audit, Hatter's Tag)
|
|
56
|
+
- `src/utils/` Stateless helpers (run-id, time, hashing)
|
|
57
|
+
- `bin/` CLI entry stub that requires `dist/cli.js`
|
|
58
|
+
|
|
59
|
+
The runner is deliberately split into **pure nodes** (validation, search APIs,
|
|
60
|
+
dedupe, publish) and **LLM nodes** (query planning, synthesis, expert reviews).
|
|
61
|
+
Pure nodes are reproducible; LLM nodes produce non-deterministic content but
|
|
62
|
+
deterministic *shape* (Zod-validated). See the design doc for the full
|
|
63
|
+
determinism contract.
|
|
64
|
+
|
|
65
|
+
## Audit log
|
|
66
|
+
|
|
67
|
+
Every node emits a hash-chained JSONL event to
|
|
68
|
+
`<audit-dir>/<run_id>.jsonl`. Each event carries `prev_event_hash` and
|
|
69
|
+
`event_hash` (sha256), forming a tamper-evident Merkle-like chain. The final
|
|
70
|
+
`run_complete` event summarizes the run and pins the chain root hash.
|
|
71
|
+
|
|
72
|
+
## Hatter's Tag
|
|
73
|
+
|
|
74
|
+
Every published artifact (research doc, PRD) ends with a `## Hatter's Tag`
|
|
75
|
+
YAML block that pins the run to a specific mesh sha, prompt-library version,
|
|
76
|
+
LLM provider/model, token count, cost, grounding score, and audit chain hash.
|
|
77
|
+
Auditors verify the artifact by re-running the chain against the recorded
|
|
78
|
+
mesh sha.
|
|
79
|
+
|
|
80
|
+
## License
|
|
81
|
+
|
|
82
|
+
MIT
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
/**
|
|
37
|
+
* @maintainabilityai/research-runner CLI entry.
|
|
38
|
+
*
|
|
39
|
+
* Two subcommands: `archeologist` (research pipeline) and `prd` (PRD pipeline).
|
|
40
|
+
* Parses argv into a typed brief, runs the orchestrator, prints structured
|
|
41
|
+
* outputs that the calling GitHub Action consumes via `core.setOutput`.
|
|
42
|
+
*
|
|
43
|
+
* Zero dep on a CLI framework — argv parsing is hand-rolled to keep the
|
|
44
|
+
* package small. The flag surface is fixed by the design doc and shouldn't
|
|
45
|
+
* grow without intent.
|
|
46
|
+
*/
|
|
47
|
+
const fs = __importStar(require("node:fs"));
|
|
48
|
+
const path = __importStar(require("node:path"));
|
|
49
|
+
const archeologist_1 = require("./runner/archeologist");
|
|
50
|
+
const prd_1 = require("./runner/prd");
|
|
51
|
+
const PKG = JSON.parse(fs.readFileSync(path.resolve(__dirname, '..', 'package.json'), 'utf8'));
|
|
52
|
+
function parseFlags(argv) {
|
|
53
|
+
const flags = {};
|
|
54
|
+
for (let i = 0; i < argv.length; i++) {
|
|
55
|
+
const arg = argv[i];
|
|
56
|
+
if (!arg.startsWith('--')) {
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
const key = arg.slice(2).replace(/-/g, '_');
|
|
60
|
+
const value = argv[i + 1];
|
|
61
|
+
if (!value || value.startsWith('--')) {
|
|
62
|
+
// boolean flag — not used yet
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
flags[key] = value;
|
|
66
|
+
i++;
|
|
67
|
+
}
|
|
68
|
+
return flags;
|
|
69
|
+
}
|
|
70
|
+
function emitGithubOutput(outputs) {
|
|
71
|
+
// Run inside GitHub Actions, write to GITHUB_OUTPUT so `steps.<id>.outputs.*` works.
|
|
72
|
+
const githubOutput = process.env.GITHUB_OUTPUT;
|
|
73
|
+
if (!githubOutput) {
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
const lines = Object.entries(outputs).map(([k, v]) => `${k}=${v}`);
|
|
77
|
+
fs.appendFileSync(githubOutput, lines.join('\n') + '\n', 'utf8');
|
|
78
|
+
}
|
|
79
|
+
function abort(msg, code = 1) {
|
|
80
|
+
process.stderr.write(`research-runner: ${msg}\n`);
|
|
81
|
+
process.exit(code);
|
|
82
|
+
}
|
|
83
|
+
async function archeologistCmd(argv) {
|
|
84
|
+
const flags = parseFlags(argv);
|
|
85
|
+
if (!flags.brief) {
|
|
86
|
+
abort('--brief is required');
|
|
87
|
+
}
|
|
88
|
+
if (!flags.scope_level) {
|
|
89
|
+
abort('--scope-level is required');
|
|
90
|
+
}
|
|
91
|
+
if (!flags.scope_id) {
|
|
92
|
+
abort('--scope-id is required (portfolio scope was removed; pass platform slug or BAR id)');
|
|
93
|
+
}
|
|
94
|
+
const briefInput = {
|
|
95
|
+
topic: flags.brief,
|
|
96
|
+
scope: {
|
|
97
|
+
level: flags.scope_level,
|
|
98
|
+
id: flags.scope_id,
|
|
99
|
+
},
|
|
100
|
+
path: flags.path || 'research',
|
|
101
|
+
target_repo: flags.target_repo || undefined,
|
|
102
|
+
guardrails: flags.guardrails || 'default',
|
|
103
|
+
llm_provider: flags.llm_provider || undefined,
|
|
104
|
+
trigger: {
|
|
105
|
+
kind: process.env.GITHUB_ACTIONS === 'true' ? 'workflow_dispatch' : 'local_dev',
|
|
106
|
+
actor: process.env.GITHUB_ACTOR,
|
|
107
|
+
},
|
|
108
|
+
};
|
|
109
|
+
const result = await (0, archeologist_1.runArcheologist)({
|
|
110
|
+
brief: briefInput,
|
|
111
|
+
meshDir: flags.mesh ? path.resolve(flags.mesh) : process.cwd(),
|
|
112
|
+
outputDir: flags.output || 'research',
|
|
113
|
+
auditDir: flags.audit || '.research-audit',
|
|
114
|
+
emitPrBodyPath: flags.emit_pr_body,
|
|
115
|
+
agentVersion: PKG.version,
|
|
116
|
+
});
|
|
117
|
+
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
|
|
118
|
+
emitGithubOutput({
|
|
119
|
+
run_id: result.run_id,
|
|
120
|
+
topic: result.topic,
|
|
121
|
+
artifact_path: result.artifact_path,
|
|
122
|
+
chain_root_hash: result.chain_root_hash,
|
|
123
|
+
pr_body_path: result.pr_body_path || '',
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
async function prdCmd(argv) {
|
|
127
|
+
const flags = parseFlags(argv);
|
|
128
|
+
if (!flags.research_pr) {
|
|
129
|
+
abort('--research-pr is required');
|
|
130
|
+
}
|
|
131
|
+
if (!flags.scope_level) {
|
|
132
|
+
abort('--scope-level is required');
|
|
133
|
+
}
|
|
134
|
+
if (!flags.scope_id) {
|
|
135
|
+
abort('--scope-id is required (portfolio scope was removed; pass platform slug or BAR id)');
|
|
136
|
+
}
|
|
137
|
+
const isUrl = /^https?:\/\//.test(flags.research_pr);
|
|
138
|
+
const briefInput = {
|
|
139
|
+
research_source: isUrl
|
|
140
|
+
? { kind: 'pr', url: flags.research_pr }
|
|
141
|
+
: { kind: 'path', relative_path: flags.research_pr },
|
|
142
|
+
scope: {
|
|
143
|
+
level: flags.scope_level,
|
|
144
|
+
id: flags.scope_id,
|
|
145
|
+
},
|
|
146
|
+
mode: flags.mode || 'deep',
|
|
147
|
+
grounding: flags.grounding || 'default',
|
|
148
|
+
max_iterations: flags.max_iterations ? parseInt(flags.max_iterations, 10) : 3,
|
|
149
|
+
llm_provider: flags.llm_provider || undefined,
|
|
150
|
+
trigger: {
|
|
151
|
+
kind: process.env.GITHUB_ACTIONS === 'true' ? 'workflow_dispatch' : 'local_dev',
|
|
152
|
+
actor: process.env.GITHUB_ACTOR,
|
|
153
|
+
},
|
|
154
|
+
};
|
|
155
|
+
const result = await (0, prd_1.runPrd)({
|
|
156
|
+
brief: briefInput,
|
|
157
|
+
meshDir: flags.mesh ? path.resolve(flags.mesh) : process.cwd(),
|
|
158
|
+
outputDir: flags.output || 'prds',
|
|
159
|
+
auditDir: flags.audit || '.research-audit',
|
|
160
|
+
emitPrBodyPath: flags.emit_pr_body,
|
|
161
|
+
agentVersion: PKG.version,
|
|
162
|
+
});
|
|
163
|
+
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
|
|
164
|
+
emitGithubOutput({
|
|
165
|
+
run_id: result.run_id,
|
|
166
|
+
topic: result.topic,
|
|
167
|
+
artifact_path: result.artifact_path,
|
|
168
|
+
chain_root_hash: result.chain_root_hash,
|
|
169
|
+
pr_body_path: result.pr_body_path || '',
|
|
170
|
+
final_score: result.final_score,
|
|
171
|
+
iterations: result.iterations,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
function help() {
|
|
175
|
+
process.stdout.write(`research-runner v${PKG.version}
|
|
176
|
+
|
|
177
|
+
Usage:
|
|
178
|
+
research-runner archeologist --brief "<topic>" --scope-level <platform|bar> --scope-id ID [--path research|archaeology] [...]
|
|
179
|
+
research-runner prd --research-pr <url|path> --scope-level <platform|bar> --scope-id ID [...]
|
|
180
|
+
|
|
181
|
+
See README.md for the full flag surface.
|
|
182
|
+
`);
|
|
183
|
+
}
|
|
184
|
+
async function main() {
|
|
185
|
+
const [, , subcommand, ...rest] = process.argv;
|
|
186
|
+
switch (subcommand) {
|
|
187
|
+
case 'archeologist':
|
|
188
|
+
await archeologistCmd(rest);
|
|
189
|
+
break;
|
|
190
|
+
case 'prd':
|
|
191
|
+
await prdCmd(rest);
|
|
192
|
+
break;
|
|
193
|
+
case 'help':
|
|
194
|
+
case '--help':
|
|
195
|
+
case '-h':
|
|
196
|
+
case undefined:
|
|
197
|
+
help();
|
|
198
|
+
break;
|
|
199
|
+
case '--version':
|
|
200
|
+
case '-v':
|
|
201
|
+
process.stdout.write(`${PKG.version}\n`);
|
|
202
|
+
break;
|
|
203
|
+
default: abort(`unknown subcommand: ${subcommand}`);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
main().catch(err => {
|
|
207
|
+
process.stderr.write(`research-runner: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
208
|
+
process.exit(1);
|
|
209
|
+
});
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* anthropic-client — minimal `fetch`-based wrapper around Anthropic's
|
|
3
|
+
* `/v1/messages` endpoint. Returns the LLM telemetry shape the audit
|
|
4
|
+
* emitter wants.
|
|
5
|
+
*
|
|
6
|
+
* Why not the official SDK? Three reasons:
|
|
7
|
+
* 1. Smaller install (~300kb saved)
|
|
8
|
+
* 2. The runner needs to support multiple providers (anthropic, openai,
|
|
9
|
+
* azure-openai) and a uniform `fetch`-based abstraction keeps the
|
|
10
|
+
* switch deterministic and testable.
|
|
11
|
+
* 3. Dependency-injecting `fetch` makes mocking trivial in node:test.
|
|
12
|
+
*/
|
|
13
|
+
export type AnthropicModel = 'claude-haiku-4-5' | 'claude-sonnet-4-6' | 'claude-opus-4-7';
|
|
14
|
+
export interface CallAnthropicOpts {
|
|
15
|
+
apiKey: string;
|
|
16
|
+
model: AnthropicModel;
|
|
17
|
+
/** Optional system prompt (instructions that bypass user-message framing). */
|
|
18
|
+
system?: string;
|
|
19
|
+
/** The user-message body — typically the filled prompt pack content. */
|
|
20
|
+
prompt: string;
|
|
21
|
+
/** Hard ceiling on response tokens. */
|
|
22
|
+
maxTokens: number;
|
|
23
|
+
/** Sampling temperature 0-1. Default 0 for deterministic shape. */
|
|
24
|
+
temperature?: number;
|
|
25
|
+
/** Test injection point; defaults to globalThis.fetch. */
|
|
26
|
+
fetchImpl?: typeof fetch;
|
|
27
|
+
/** Abort timeout (ms). Default 60s. */
|
|
28
|
+
timeoutMs?: number;
|
|
29
|
+
}
|
|
30
|
+
export interface CallAnthropicResult {
|
|
31
|
+
text: string;
|
|
32
|
+
inputTokens: number;
|
|
33
|
+
outputTokens: number;
|
|
34
|
+
/** USD cost estimate from public pricing (haiku ~$0.25/$1.25, sonnet ~$3/$15, opus ~$15/$75 per Mtok). */
|
|
35
|
+
costUsd: number;
|
|
36
|
+
/** Anthropic HTTP status (200 on success). */
|
|
37
|
+
httpStatus: number;
|
|
38
|
+
}
|
|
39
|
+
export declare function callAnthropic(opts: CallAnthropicOpts): Promise<CallAnthropicResult>;
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* anthropic-client — minimal `fetch`-based wrapper around Anthropic's
|
|
4
|
+
* `/v1/messages` endpoint. Returns the LLM telemetry shape the audit
|
|
5
|
+
* emitter wants.
|
|
6
|
+
*
|
|
7
|
+
* Why not the official SDK? Three reasons:
|
|
8
|
+
* 1. Smaller install (~300kb saved)
|
|
9
|
+
* 2. The runner needs to support multiple providers (anthropic, openai,
|
|
10
|
+
* azure-openai) and a uniform `fetch`-based abstraction keeps the
|
|
11
|
+
* switch deterministic and testable.
|
|
12
|
+
* 3. Dependency-injecting `fetch` makes mocking trivial in node:test.
|
|
13
|
+
*/
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.callAnthropic = callAnthropic;
|
|
16
|
+
const PRICING = {
|
|
17
|
+
'claude-haiku-4-5': { inputPerMtok: 0.25, outputPerMtok: 1.25 },
|
|
18
|
+
'claude-sonnet-4-6': { inputPerMtok: 3.00, outputPerMtok: 15.00 },
|
|
19
|
+
'claude-opus-4-7': { inputPerMtok: 15.00, outputPerMtok: 75.00 },
|
|
20
|
+
};
|
|
21
|
+
async function callAnthropic(opts) {
|
|
22
|
+
if (!opts.apiKey) {
|
|
23
|
+
throw new Error('ANTHROPIC_API_KEY missing — set the env var or pass apiKey directly');
|
|
24
|
+
}
|
|
25
|
+
const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
|
|
26
|
+
const controller = new AbortController();
|
|
27
|
+
const timer = setTimeout(() => controller.abort(), opts.timeoutMs ?? 60_000);
|
|
28
|
+
let response;
|
|
29
|
+
try {
|
|
30
|
+
response = await fetchImpl('https://api.anthropic.com/v1/messages', {
|
|
31
|
+
method: 'POST',
|
|
32
|
+
headers: {
|
|
33
|
+
'x-api-key': opts.apiKey,
|
|
34
|
+
'anthropic-version': '2023-06-01',
|
|
35
|
+
'content-type': 'application/json',
|
|
36
|
+
},
|
|
37
|
+
body: JSON.stringify({
|
|
38
|
+
model: opts.model,
|
|
39
|
+
max_tokens: opts.maxTokens,
|
|
40
|
+
temperature: opts.temperature ?? 0,
|
|
41
|
+
...(opts.system ? { system: opts.system } : {}),
|
|
42
|
+
messages: [{ role: 'user', content: opts.prompt }],
|
|
43
|
+
}),
|
|
44
|
+
signal: controller.signal,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
finally {
|
|
48
|
+
clearTimeout(timer);
|
|
49
|
+
}
|
|
50
|
+
const httpStatus = response.status;
|
|
51
|
+
if (!response.ok) {
|
|
52
|
+
const body = await safeText(response);
|
|
53
|
+
throw new Error(`Anthropic returned ${httpStatus}: ${body.slice(0, 400)}`);
|
|
54
|
+
}
|
|
55
|
+
const data = await response.json();
|
|
56
|
+
const text = (data.content ?? [])
|
|
57
|
+
.filter(b => b.type === 'text')
|
|
58
|
+
.map(b => b.text ?? '')
|
|
59
|
+
.join('');
|
|
60
|
+
const inputTokens = data.usage?.input_tokens ?? 0;
|
|
61
|
+
const outputTokens = data.usage?.output_tokens ?? 0;
|
|
62
|
+
const pricing = PRICING[opts.model];
|
|
63
|
+
const costUsd = (inputTokens / 1_000_000) * pricing.inputPerMtok +
|
|
64
|
+
(outputTokens / 1_000_000) * pricing.outputPerMtok;
|
|
65
|
+
return { text, inputTokens, outputTokens, costUsd, httpStatus };
|
|
66
|
+
}
|
|
67
|
+
async function safeText(r) {
|
|
68
|
+
try {
|
|
69
|
+
return await r.text();
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
return '';
|
|
73
|
+
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* github-models-client — fetch-based wrapper around the GitHub Models
|
|
3
|
+
* inference endpoint (`https://models.github.ai/inference/chat/completions`).
|
|
4
|
+
*
|
|
5
|
+
* The endpoint is OpenAI-compatible: standard `messages` array, `model`,
|
|
6
|
+
* `max_tokens`, `temperature`. Authentication is the workflow's GITHUB_TOKEN
|
|
7
|
+
* with `permissions: models: read` — no separate API key as a repo secret.
|
|
8
|
+
*
|
|
9
|
+
* Returns the same telemetry shape as callAnthropic so plan_queries /
|
|
10
|
+
* synthesize_report can route through either client without branching on
|
|
11
|
+
* their result types.
|
|
12
|
+
*
|
|
13
|
+
* Model names use GitHub Models namespacing — e.g. `openai/gpt-4o`,
|
|
14
|
+
* `openai/gpt-4o-mini`, `anthropic/claude-3-5-sonnet`. The router (in
|
|
15
|
+
* llm-router.ts) maps internal logical model tiers (`plan` / `synth`) to
|
|
16
|
+
* the concrete provider-specific id.
|
|
17
|
+
*/
|
|
18
|
+
/** Subset of GitHub Models model ids we use. Extend as new tiers land. */
|
|
19
|
+
export type GitHubModelsModel = 'openai/gpt-4o' | 'openai/gpt-4o-mini' | 'anthropic/claude-3-5-sonnet' | 'anthropic/claude-3-5-haiku';
|
|
20
|
+
export interface CallGitHubModelsOpts {
|
|
21
|
+
/** Workflow GITHUB_TOKEN. The model server checks the `models:read` permission scope. */
|
|
22
|
+
token: string;
|
|
23
|
+
model: GitHubModelsModel;
|
|
24
|
+
system?: string;
|
|
25
|
+
prompt: string;
|
|
26
|
+
maxTokens: number;
|
|
27
|
+
temperature?: number;
|
|
28
|
+
fetchImpl?: typeof fetch;
|
|
29
|
+
timeoutMs?: number;
|
|
30
|
+
/** Override the endpoint (for Azure-routed Models or test environments). */
|
|
31
|
+
endpoint?: string;
|
|
32
|
+
}
|
|
33
|
+
export interface CallGitHubModelsResult {
|
|
34
|
+
text: string;
|
|
35
|
+
inputTokens: number;
|
|
36
|
+
outputTokens: number;
|
|
37
|
+
/**
|
|
38
|
+
* GitHub Models billing is opaque to the caller — pricing is org/quota
|
|
39
|
+
* driven, not per-token-published. We report 0 so the audit envelope
|
|
40
|
+
* stays well-typed; reviewers see `provider: github-models` and know to
|
|
41
|
+
* check the GitHub billing surface for the actual spend.
|
|
42
|
+
*/
|
|
43
|
+
costUsd: number;
|
|
44
|
+
httpStatus: number;
|
|
45
|
+
}
|
|
46
|
+
export declare function callGitHubModels(opts: CallGitHubModelsOpts): Promise<CallGitHubModelsResult>;
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* github-models-client — fetch-based wrapper around the GitHub Models
|
|
4
|
+
* inference endpoint (`https://models.github.ai/inference/chat/completions`).
|
|
5
|
+
*
|
|
6
|
+
* The endpoint is OpenAI-compatible: standard `messages` array, `model`,
|
|
7
|
+
* `max_tokens`, `temperature`. Authentication is the workflow's GITHUB_TOKEN
|
|
8
|
+
* with `permissions: models: read` — no separate API key as a repo secret.
|
|
9
|
+
*
|
|
10
|
+
* Returns the same telemetry shape as callAnthropic so plan_queries /
|
|
11
|
+
* synthesize_report can route through either client without branching on
|
|
12
|
+
* their result types.
|
|
13
|
+
*
|
|
14
|
+
* Model names use GitHub Models namespacing — e.g. `openai/gpt-4o`,
|
|
15
|
+
* `openai/gpt-4o-mini`, `anthropic/claude-3-5-sonnet`. The router (in
|
|
16
|
+
* llm-router.ts) maps internal logical model tiers (`plan` / `synth`) to
|
|
17
|
+
* the concrete provider-specific id.
|
|
18
|
+
*/
|
|
19
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
|
+
exports.callGitHubModels = callGitHubModels;
|
|
21
|
+
const DEFAULT_ENDPOINT = 'https://models.github.ai/inference/chat/completions';
|
|
22
|
+
async function callGitHubModels(opts) {
|
|
23
|
+
if (!opts.token) {
|
|
24
|
+
throw new Error('GITHUB_TOKEN missing — `permissions: models: read` is required on the workflow');
|
|
25
|
+
}
|
|
26
|
+
const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
|
|
27
|
+
const endpoint = opts.endpoint ?? DEFAULT_ENDPOINT;
|
|
28
|
+
const controller = new AbortController();
|
|
29
|
+
const timer = setTimeout(() => controller.abort(), opts.timeoutMs ?? 60_000);
|
|
30
|
+
const messages = [];
|
|
31
|
+
if (opts.system) {
|
|
32
|
+
messages.push({ role: 'system', content: opts.system });
|
|
33
|
+
}
|
|
34
|
+
messages.push({ role: 'user', content: opts.prompt });
|
|
35
|
+
let response;
|
|
36
|
+
try {
|
|
37
|
+
response = await fetchImpl(endpoint, {
|
|
38
|
+
method: 'POST',
|
|
39
|
+
headers: {
|
|
40
|
+
'authorization': `Bearer ${opts.token}`,
|
|
41
|
+
'content-type': 'application/json',
|
|
42
|
+
'accept': 'application/json',
|
|
43
|
+
},
|
|
44
|
+
body: JSON.stringify({
|
|
45
|
+
model: opts.model,
|
|
46
|
+
messages,
|
|
47
|
+
max_tokens: opts.maxTokens,
|
|
48
|
+
temperature: opts.temperature ?? 0,
|
|
49
|
+
}),
|
|
50
|
+
signal: controller.signal,
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
finally {
|
|
54
|
+
clearTimeout(timer);
|
|
55
|
+
}
|
|
56
|
+
const httpStatus = response.status;
|
|
57
|
+
if (!response.ok) {
|
|
58
|
+
const body = await safeText(response);
|
|
59
|
+
throw new Error(`GitHub Models returned ${httpStatus}: ${body.slice(0, 400)}`);
|
|
60
|
+
}
|
|
61
|
+
const data = await response.json();
|
|
62
|
+
const text = data.choices?.[0]?.message?.content ?? '';
|
|
63
|
+
return {
|
|
64
|
+
text,
|
|
65
|
+
inputTokens: data.usage?.prompt_tokens ?? 0,
|
|
66
|
+
outputTokens: data.usage?.completion_tokens ?? 0,
|
|
67
|
+
costUsd: 0,
|
|
68
|
+
httpStatus,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
async function safeText(r) {
|
|
72
|
+
try {
|
|
73
|
+
return await r.text();
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
return '';
|
|
77
|
+
}
|
|
78
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* llm-router — single entry point for every LLM hop in the runner.
|
|
3
|
+
*
|
|
4
|
+
* Picks the right provider client based on the brief's `llm_provider` and
|
|
5
|
+
* the requested logical tier (`plan` for cheap structured-JSON work,
|
|
6
|
+
* `synth` for higher-quality long-form synthesis). Translates the tier
|
|
7
|
+
* into a provider-specific model name so node code never hard-codes a
|
|
8
|
+
* model id.
|
|
9
|
+
*
|
|
10
|
+
* Returns a uniform result shape so plan_queries / synthesize_report can
|
|
11
|
+
* use it without branching on provider in their own bodies.
|
|
12
|
+
*/
|
|
13
|
+
import type { LlmProvider } from '../schemas';
|
|
14
|
+
/**
|
|
15
|
+
* Logical model tiers. Each is mapped to a concrete provider-specific id
|
|
16
|
+
* inside the router. Keeping the surface this narrow avoids the runner
|
|
17
|
+
* sprinkling model strings across nodes.
|
|
18
|
+
*/
|
|
19
|
+
export type LlmTier = 'plan' | 'synth';
|
|
20
|
+
export interface CallLlmOpts {
|
|
21
|
+
provider: LlmProvider;
|
|
22
|
+
tier: LlmTier;
|
|
23
|
+
/**
|
|
24
|
+
* Anthropic API key (used when provider === 'anthropic'). For
|
|
25
|
+
* 'github-models', leave undefined and supply `githubToken` instead.
|
|
26
|
+
*/
|
|
27
|
+
anthropicApiKey?: string;
|
|
28
|
+
/** GITHUB_TOKEN (used when provider === 'github-models'). */
|
|
29
|
+
githubToken?: string;
|
|
30
|
+
system?: string;
|
|
31
|
+
prompt: string;
|
|
32
|
+
maxTokens: number;
|
|
33
|
+
temperature?: number;
|
|
34
|
+
fetchImpl?: typeof fetch;
|
|
35
|
+
}
|
|
36
|
+
export interface CallLlmResult {
|
|
37
|
+
provider: LlmProvider;
|
|
38
|
+
/** The actual model id sent to the provider (e.g. `claude-haiku-4-5` or `openai/gpt-4o-mini`). */
|
|
39
|
+
model: string;
|
|
40
|
+
text: string;
|
|
41
|
+
inputTokens: number;
|
|
42
|
+
outputTokens: number;
|
|
43
|
+
costUsd: number;
|
|
44
|
+
httpStatus: number;
|
|
45
|
+
}
|
|
46
|
+
export declare function callLlm(opts: CallLlmOpts): Promise<CallLlmResult>;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.callLlm = callLlm;
|
|
4
|
+
const anthropic_client_1 = require("./anthropic-client");
|
|
5
|
+
const github_models_client_1 = require("./github-models-client");
|
|
6
|
+
/** Per-tier per-provider model id lookup. */
|
|
7
|
+
const MODEL_BY_TIER = {
|
|
8
|
+
plan: { anthropic: 'claude-haiku-4-5', githubModels: 'openai/gpt-4o-mini' },
|
|
9
|
+
synth: { anthropic: 'claude-sonnet-4-6', githubModels: 'anthropic/claude-3-5-sonnet' },
|
|
10
|
+
};
|
|
11
|
+
async function callLlm(opts) {
|
|
12
|
+
const tierModels = MODEL_BY_TIER[opts.tier];
|
|
13
|
+
if (opts.provider === 'anthropic') {
|
|
14
|
+
if (!opts.anthropicApiKey) {
|
|
15
|
+
throw new Error(`callLlm: provider=anthropic requires anthropicApiKey (set ANTHROPIC_API_KEY).`);
|
|
16
|
+
}
|
|
17
|
+
const r = await (0, anthropic_client_1.callAnthropic)({
|
|
18
|
+
apiKey: opts.anthropicApiKey,
|
|
19
|
+
model: tierModels.anthropic,
|
|
20
|
+
system: opts.system,
|
|
21
|
+
prompt: opts.prompt,
|
|
22
|
+
maxTokens: opts.maxTokens,
|
|
23
|
+
temperature: opts.temperature,
|
|
24
|
+
fetchImpl: opts.fetchImpl,
|
|
25
|
+
});
|
|
26
|
+
return {
|
|
27
|
+
provider: 'anthropic',
|
|
28
|
+
model: tierModels.anthropic,
|
|
29
|
+
text: r.text,
|
|
30
|
+
inputTokens: r.inputTokens,
|
|
31
|
+
outputTokens: r.outputTokens,
|
|
32
|
+
costUsd: r.costUsd,
|
|
33
|
+
httpStatus: r.httpStatus,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
if (opts.provider === 'github-models') {
|
|
37
|
+
if (!opts.githubToken) {
|
|
38
|
+
throw new Error(`callLlm: provider=github-models requires githubToken (set GITHUB_TOKEN; workflow needs \`permissions: models: read\`).`);
|
|
39
|
+
}
|
|
40
|
+
const r = await (0, github_models_client_1.callGitHubModels)({
|
|
41
|
+
token: opts.githubToken,
|
|
42
|
+
model: tierModels.githubModels,
|
|
43
|
+
system: opts.system,
|
|
44
|
+
prompt: opts.prompt,
|
|
45
|
+
maxTokens: opts.maxTokens,
|
|
46
|
+
temperature: opts.temperature,
|
|
47
|
+
fetchImpl: opts.fetchImpl,
|
|
48
|
+
});
|
|
49
|
+
return {
|
|
50
|
+
provider: 'github-models',
|
|
51
|
+
model: tierModels.githubModels,
|
|
52
|
+
text: r.text,
|
|
53
|
+
inputTokens: r.inputTokens,
|
|
54
|
+
outputTokens: r.outputTokens,
|
|
55
|
+
costUsd: r.costUsd,
|
|
56
|
+
httpStatus: r.httpStatus,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
throw new Error(`callLlm: provider "${opts.provider}" not yet implemented (phase 2c.1 ships anthropic + github-models; openai + azure-openai land later).`);
|
|
60
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function getMeshSha(meshPath: string): string | null;
|