@maintainabilityai/research-runner 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,16 +11,26 @@
11
11
  * their result types.
12
12
  *
13
13
  * Model names use GitHub Models namespacing — e.g. `openai/gpt-4o`,
14
- * `openai/gpt-4o-mini`, `openai/gpt-4.1`. The router (in
14
+ * `openai/gpt-4o-mini`, `openai/gpt-5-mini`. The router (in
15
15
  * llm-router.ts) maps internal logical model tiers (`plan` / `synth`) to
16
16
  * the concrete provider-specific id.
17
17
  */
18
18
  /**
19
19
  * Subset of GitHub Models model ids we use. Extend as new tiers land.
20
- * GitHub Models does not currently host Anthropic Claude — synth tier
21
- * uses `openai/gpt-4.1` (the "outperforms gpt-4o across the board" tier).
20
+ *
21
+ * GitHub Models has two relevant rate-limit tiers:
22
+ * - "high" — gpt-4o, gpt-4o-mini, gpt-4.1 etc. Per-request input is
23
+ * capped at ~8K tokens regardless of subscription. Fine for our
24
+ * plan-tier (small structured-JSON prompt).
25
+ * - "custom" — gpt-5 family, o-series. Per-request input scales to
26
+ * the model's advertised limit (200K for gpt-5-mini). Routed through
27
+ * Copilot-billed access, so the token-owner needs Copilot.
28
+ *
29
+ * Synth tier uses gpt-5-mini for the larger context window. Anthropic
30
+ * remains the preferred synth target when an Anthropic key is set (see
31
+ * llm-router.ts hybrid routing).
22
32
  */
23
- export type GitHubModelsModel = 'openai/gpt-4o' | 'openai/gpt-4o-mini' | 'openai/gpt-4.1' | 'openai/gpt-4.1-mini';
33
+ export type GitHubModelsModel = 'openai/gpt-4o' | 'openai/gpt-4o-mini' | 'openai/gpt-4.1' | 'openai/gpt-4.1-mini' | 'openai/gpt-5' | 'openai/gpt-5-mini';
24
34
  export interface CallGitHubModelsOpts {
25
35
  /** Workflow GITHUB_TOKEN. The model server checks the `models:read` permission scope. */
26
36
  token: string;
@@ -12,7 +12,7 @@
12
12
  * their result types.
13
13
  *
14
14
  * Model names use GitHub Models namespacing — e.g. `openai/gpt-4o`,
15
- * `openai/gpt-4o-mini`, `openai/gpt-4.1`. The router (in
15
+ * `openai/gpt-4o-mini`, `openai/gpt-5-mini`. The router (in
16
16
  * llm-router.ts) maps internal logical model tiers (`plan` / `synth`) to
17
17
  * the concrete provider-specific id.
18
18
  */
@@ -25,8 +25,9 @@ async function callGitHubModels(opts) {
25
25
  }
26
26
  const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
27
27
  const endpoint = opts.endpoint ?? DEFAULT_ENDPOINT;
28
- // Synthesis prompts can produce 8K-token responses on gpt-4.1, which
29
- // routinely take 60–90s. Default to 120s so we don't abort mid-stream.
28
+ // Synthesis prompts can produce 8K-token responses (and the "custom"
29
+ // tier models like gpt-5-mini can return much more), which routinely
30
+ // take 60–120s. Default to 120s so we don't abort mid-stream.
30
31
  const timeoutMs = opts.timeoutMs ?? 120_000;
31
32
  const controller = new AbortController();
32
33
  const timer = setTimeout(() => controller.abort(), timeoutMs);
@@ -6,7 +6,11 @@ const github_models_client_1 = require("./github-models-client");
6
6
  /** Per-tier per-provider model id lookup. */
7
7
  const MODEL_BY_TIER = {
8
8
  plan: { anthropic: 'claude-haiku-4-5', githubModels: 'openai/gpt-4o-mini' },
9
- synth: { anthropic: 'claude-sonnet-4-6', githubModels: 'openai/gpt-4.1' },
9
+ // gpt-5-mini is in the "custom" GH-Models tier — 200K input context,
10
+ // 100K output, reasoning + tool-calling. Bypasses the 8K cap that
11
+ // hits "high"-tier models like gpt-4.1. Requires the caller's token
12
+ // to have Models access through a Copilot subscription (GMT path).
13
+ synth: { anthropic: 'claude-sonnet-4-6', githubModels: 'openai/gpt-5-mini' },
10
14
  };
11
15
  async function callLlm(opts) {
12
16
  const tierModels = MODEL_BY_TIER[opts.tier];
@@ -76,6 +76,18 @@ const synthesize_report_1 = require("./nodes/synthesize-report");
76
76
  const clone_and_index_1 = require("./nodes/clone-and-index");
77
77
  const analyze_architecture_1 = require("./nodes/analyze-architecture");
78
78
  const identify_gaps_1 = require("./nodes/identify-gaps");
79
+ /**
80
+ * Progress log → stderr. Goes to GitHub Actions job output without
81
+ * polluting stdout (which carries the JSON result the workflow parses).
82
+ * Disabled when RESEARCH_RUNNER_QUIET=1 so unit tests stay clean.
83
+ */
84
+ function progress(msg) {
85
+ if (process.env.RESEARCH_RUNNER_QUIET === '1') {
86
+ return;
87
+ }
88
+ const ts = new Date().toISOString().slice(11, 19); // HH:MM:SS
89
+ process.stderr.write(`[research-runner ${ts}] ${msg}\n`);
90
+ }
79
91
  async function runArcheologist(opts) {
80
92
  // ----- validate_brief (pure) -----
81
93
  const briefParsed = schemas_1.ResearchBrief.safeParse(opts.brief);
@@ -89,6 +101,7 @@ async function runArcheologist(opts) {
89
101
  const githubToken = opts.githubToken ?? process.env.GITHUB_TOKEN ?? '';
90
102
  const tavilyApiKey = opts.tavilyApiKey ?? process.env.TAVILY_API_KEY ?? '';
91
103
  const usptoApiKey = opts.usptoApiKey ?? process.env.USPTO_API_KEY ?? '';
104
+ progress(`▶ run ${runId} | scope=${brief.scope.level}(${brief.scope.id}) | path=${brief.path} | llm_provider=${brief.llm_provider ?? 'anthropic'} | keys: anthropic=${!!anthropicApiKey} github=${!!githubToken} tavily=${!!tavilyApiKey} uspto=${!!usptoApiKey}`);
92
105
  const absoluteAuditDir = path.resolve(opts.meshDir, opts.auditDir);
93
106
  const absoluteOutputDir = path.resolve(opts.meshDir, opts.outputDir);
94
107
  fs.mkdirSync(absoluteOutputDir, { recursive: true });
@@ -240,6 +253,7 @@ async function runArcheologist(opts) {
240
253
  // ============================================================================
241
254
  // RESEARCH PATH (existing): plan_queries → 4 providers → dedupe → gap-analysis
242
255
  // ============================================================================
256
+ progress(`◐ plan_queries — calling LLM to generate query plan…`);
243
257
  const planStart = Date.now();
244
258
  const plan = await (0, plan_queries_1.planQueries)({
245
259
  meshDir: opts.meshDir,
@@ -251,6 +265,7 @@ async function runArcheologist(opts) {
251
265
  fetchImpl: opts.fetchImpl,
252
266
  });
253
267
  researchQueryPlan = plan.queryPlan;
268
+ progress(`✓ plan_queries (${plan.llm.provider} ${plan.llm.model}) in ${Date.now() - planStart}ms — ${plan.llm.inputTokens} in / ${plan.llm.outputTokens} out tokens, ${plan.llm.attempts} attempt${plan.llm.attempts !== 1 ? 's' : ''} → web=${plan.queryPlan.web.length} arxiv=${plan.queryPlan.arxiv.length} patent=${plan.queryPlan.patent.length} community=${plan.queryPlan.community.length}`);
254
269
  totalInputTokens += plan.llm.inputTokens;
255
270
  totalOutputTokens += plan.llm.outputTokens;
256
271
  totalCostUsd += plan.llm.costUsd;
@@ -271,6 +286,7 @@ async function runArcheologist(opts) {
271
286
  // ----- four-provider search (pure_api each, parallel across providers) -----
272
287
  // We run all four providers concurrently with Promise.allSettled so a
273
288
  // provider-level failure (e.g. PatentsView outage) doesn't block the rest.
289
+ progress(`◐ search — tavily(${plan.queryPlan.web.length}) + arxiv(${plan.queryPlan.arxiv.length}) + hackernews(${plan.queryPlan.community.length}) + uspto(${usptoApiKey ? plan.queryPlan.patent.length : 'skipped'}) in parallel…`);
274
290
  const searchStart = Date.now();
275
291
  const [tavily, arxiv, hn, uspto] = await Promise.allSettled([
276
292
  (0, tavily_search_1.runTavilySearch)({ apiKey: tavilyApiKey, queries: plan.queryPlan.web, fetchImpl: opts.fetchImpl }),
@@ -330,9 +346,12 @@ async function runArcheologist(opts) {
330
346
  handleProvider(arxiv, 'arxiv_search', 'arxiv', 'GET /api/query');
331
347
  handleProvider(hn, 'hackernews_search', 'hackernews', 'GET /api/v1/search');
332
348
  handleProvider(uspto, 'uspto_search', 'uspto', 'POST /api/v1/patent/');
349
+ const fmtSettled = (s) => s.status === 'fulfilled' ? 'OK' : `FAIL(${s.reason instanceof Error ? s.reason.message.slice(0, 60) : String(s.reason).slice(0, 60)})`;
350
+ progress(`✓ search done in ${searchDuration}ms — tavily=${providerResultCounts.tavily}/${fmtSettled(tavily)} arxiv=${providerResultCounts.arxiv}/${fmtSettled(arxiv)} hn=${providerResultCounts.hackernews}/${fmtSettled(hn)} uspto=${providerResultCounts.uspto}/${fmtSettled(uspto)} (raw=${allProviderResults.length})`);
333
351
  // ----- dedupe_and_rank (pure) — first pass -----
334
352
  let dedupeStart = Date.now();
335
353
  rankedSources = (0, dedupe_and_rank_1.dedupeAndRank)({ results: allProviderResults, topN: 20 });
354
+ progress(`✓ dedupe_and_rank — ${rankedSources.length} ranked sources (top score=${rankedSources[0]?.salience_score?.toFixed(2) ?? 'n/a'})`);
336
355
  emitter.emit({
337
356
  node_kind: 'pure',
338
357
  node_name: 'dedupe_and_rank',
@@ -354,6 +373,7 @@ async function runArcheologist(opts) {
354
373
  outputs_summary: `signals=${gapSignals.map(s => s.kind).join(',')}`,
355
374
  },
356
375
  });
376
+ progress(`◐ gap_analysis — ${gapSignals.length} signal(s): ${gapSignals.map(s => s.kind).join(',')}`);
357
377
  const gapStart = Date.now();
358
378
  const gap = await (0, gap_analysis_1.runGapAnalysis)({
359
379
  meshDir: opts.meshDir,
@@ -365,6 +385,7 @@ async function runArcheologist(opts) {
365
385
  githubToken,
366
386
  fetchImpl: opts.fetchImpl,
367
387
  });
388
+ progress(`✓ gap_analysis (${gap.llm.provider} ${gap.llm.model}) in ${Date.now() - gapStart}ms — ${gap.llm.inputTokens} in / ${gap.llm.outputTokens} out tokens → ${gap.followUpQueries.length} follow-up queries`);
368
389
  totalInputTokens += gap.llm.inputTokens;
369
390
  totalOutputTokens += gap.llm.outputTokens;
370
391
  totalCostUsd += gap.llm.costUsd;
@@ -436,6 +457,7 @@ async function runArcheologist(opts) {
436
457
  }
437
458
  } // end research-path else branch
438
459
  // ----- synthesize_report (LLM) -----
460
+ progress(`◐ synthesize_report — calling LLM (provider hint=${brief.llm_provider ?? 'anthropic'}, sources=${rankedSources.length}); hybrid routing will pick anthropic for synth if anthropic key is set…`);
439
461
  const synthStart = Date.now();
440
462
  const synthesis = await (0, synthesize_report_1.synthesizeReport)({
441
463
  meshDir: opts.meshDir,
@@ -454,6 +476,7 @@ async function runArcheologist(opts) {
454
476
  totalInputTokens += synthesis.llm.inputTokens;
455
477
  totalOutputTokens += synthesis.llm.outputTokens;
456
478
  totalCostUsd += synthesis.llm.costUsd;
479
+ progress(`✓ synthesize_report (${synthesis.llm.provider} ${synthesis.llm.model}) in ${Date.now() - synthStart}ms — ${synthesis.llm.inputTokens} in / ${synthesis.llm.outputTokens} out tokens, ${synthesis.llm.attempts} attempt${synthesis.llm.attempts !== 1 ? 's' : ''}`);
457
480
  emitter.emit({
458
481
  node_kind: 'llm',
459
482
  node_name: 'synthesize_report',
@@ -554,6 +577,8 @@ async function runArcheologist(opts) {
554
577
  }
555
578
  catch { /* leave on disk — non-fatal, just a tmpdir entry */ }
556
579
  }
580
+ const totalDurationMs = Date.now() - startedAt.getTime();
581
+ progress(`◆ done ${runId} in ${(totalDurationMs / 1000).toFixed(1)}s — ${totalInputTokens} in / ${totalOutputTokens} out tokens, $${roundUsd(totalCostUsd)} | sources=${rankedSources.length} conclusions=${synthesis.citation_stats.conclusion_count} recs=${synthesis.citation_stats.recommendation_count} | artifact=${artifactPath}`);
557
582
  return {
558
583
  run_id: runId,
559
584
  topic: brief.topic,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@maintainabilityai/research-runner",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
5
5
  "license": "MIT",
6
6
  "author": "MaintainabilityAI",