npm - @maintainabilityai/research-runner - Versions diffs - 0.1.4 → 0.1.6 - Mend

@maintainabilityai/research-runner 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/llm/github-models-client.d.ts +14 -4
package/dist/llm/github-models-client.js +4 -3
package/dist/llm/llm-router.js +5 -1
package/dist/runner/archeologist.js +25 -0
package/package.json +1 -1

package/dist/llm/github-models-client.d.ts CHANGED Viewed

@@ -11,16 +11,26 @@
  * their result types.
  *
  * Model names use GitHub Models namespacing — e.g. `openai/gpt-4o`,
- * `openai/gpt-4o-mini`, `openai/gpt-4.1`. The router (in
+ * `openai/gpt-4o-mini`, `openai/gpt-5-mini`. The router (in
  * llm-router.ts) maps internal logical model tiers (`plan` / `synth`) to
  * the concrete provider-specific id.
  */
 /**
  * Subset of GitHub Models model ids we use. Extend as new tiers land.
- * GitHub Models does not currently host Anthropic Claude — synth tier
- * uses `openai/gpt-4.1` (the "outperforms gpt-4o across the board" tier).
+ *
+ * GitHub Models has two relevant rate-limit tiers:
+ *   - "high" — gpt-4o, gpt-4o-mini, gpt-4.1 etc. Per-request input is
+ *     capped at ~8K tokens regardless of subscription. Fine for our
+ *     plan-tier (small structured-JSON prompt).
+ *   - "custom" — gpt-5 family, o-series. Per-request input scales to
+ *     the model's advertised limit (200K for gpt-5-mini). Routed through
+ *     Copilot-billed access, so the token-owner needs Copilot.
+ *
+ * Synth tier uses gpt-5-mini for the larger context window. Anthropic
+ * remains the preferred synth target when an Anthropic key is set (see
+ * llm-router.ts hybrid routing).
  */
-export type GitHubModelsModel = 'openai/gpt-4o' | 'openai/gpt-4o-mini' | 'openai/gpt-4.1' | 'openai/gpt-4.1-mini';
+export type GitHubModelsModel = 'openai/gpt-4o' | 'openai/gpt-4o-mini' | 'openai/gpt-4.1' | 'openai/gpt-4.1-mini' | 'openai/gpt-5' | 'openai/gpt-5-mini';
 export interface CallGitHubModelsOpts {
     /** Workflow GITHUB_TOKEN. The model server checks the `models:read` permission scope. */
     token: string;

package/dist/llm/github-models-client.js CHANGED Viewed

@@ -12,7 +12,7 @@
  * their result types.
  *
  * Model names use GitHub Models namespacing — e.g. `openai/gpt-4o`,
- * `openai/gpt-4o-mini`, `openai/gpt-4.1`. The router (in
+ * `openai/gpt-4o-mini`, `openai/gpt-5-mini`. The router (in
  * llm-router.ts) maps internal logical model tiers (`plan` / `synth`) to
  * the concrete provider-specific id.
  */
@@ -25,8 +25,9 @@ async function callGitHubModels(opts) {
     }
     const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
     const endpoint = opts.endpoint ?? DEFAULT_ENDPOINT;
-    // Synthesis prompts can produce 8K-token responses on gpt-4.1, which
-    // routinely take 60–90s. Default to 120s so we don't abort mid-stream.
+    // Synthesis prompts can produce 8K-token responses (and the "custom"
+    // tier models like gpt-5-mini can return much more), which routinely
+    // take 60–120s. Default to 120s so we don't abort mid-stream.
     const timeoutMs = opts.timeoutMs ?? 120_000;
     const controller = new AbortController();
     const timer = setTimeout(() => controller.abort(), timeoutMs);

package/dist/llm/llm-router.js CHANGED Viewed

@@ -6,7 +6,11 @@ const github_models_client_1 = require("./github-models-client");
 /** Per-tier per-provider model id lookup. */
 const MODEL_BY_TIER = {
     plan: { anthropic: 'claude-haiku-4-5', githubModels: 'openai/gpt-4o-mini' },
-    synth: { anthropic: 'claude-sonnet-4-6', githubModels: 'openai/gpt-4.1' },
+    // gpt-5-mini is in the "custom" GH-Models tier — 200K input context,
+    // 100K output, reasoning + tool-calling. Bypasses the 8K cap that
+    // hits "high"-tier models like gpt-4.1. Requires the caller's token
+    // to have Models access through a Copilot subscription (GMT path).
+    synth: { anthropic: 'claude-sonnet-4-6', githubModels: 'openai/gpt-5-mini' },
 };
 async function callLlm(opts) {
     const tierModels = MODEL_BY_TIER[opts.tier];

package/dist/runner/archeologist.js CHANGED Viewed

@@ -76,6 +76,18 @@ const synthesize_report_1 = require("./nodes/synthesize-report");
 const clone_and_index_1 = require("./nodes/clone-and-index");
 const analyze_architecture_1 = require("./nodes/analyze-architecture");
 const identify_gaps_1 = require("./nodes/identify-gaps");
+/**
+ * Progress log → stderr. Goes to GitHub Actions job output without
+ * polluting stdout (which carries the JSON result the workflow parses).
+ * Disabled when RESEARCH_RUNNER_QUIET=1 so unit tests stay clean.
+ */
+function progress(msg) {
+    if (process.env.RESEARCH_RUNNER_QUIET === '1') {
+        return;
+    }
+    const ts = new Date().toISOString().slice(11, 19); // HH:MM:SS
+    process.stderr.write(`[research-runner ${ts}] ${msg}\n`);
+}
 async function runArcheologist(opts) {
     // ----- validate_brief (pure) -----
     const briefParsed = schemas_1.ResearchBrief.safeParse(opts.brief);
@@ -89,6 +101,7 @@ async function runArcheologist(opts) {
     const githubToken = opts.githubToken ?? process.env.GITHUB_TOKEN ?? '';
     const tavilyApiKey = opts.tavilyApiKey ?? process.env.TAVILY_API_KEY ?? '';
     const usptoApiKey = opts.usptoApiKey ?? process.env.USPTO_API_KEY ?? '';
+    progress(`▶ run ${runId} | scope=${brief.scope.level}(${brief.scope.id}) | path=${brief.path} | llm_provider=${brief.llm_provider ?? 'anthropic'} | keys: anthropic=${!!anthropicApiKey} github=${!!githubToken} tavily=${!!tavilyApiKey} uspto=${!!usptoApiKey}`);
     const absoluteAuditDir = path.resolve(opts.meshDir, opts.auditDir);
     const absoluteOutputDir = path.resolve(opts.meshDir, opts.outputDir);
     fs.mkdirSync(absoluteOutputDir, { recursive: true });
@@ -240,6 +253,7 @@ async function runArcheologist(opts) {
         // ============================================================================
         // RESEARCH PATH (existing): plan_queries → 4 providers → dedupe → gap-analysis
         // ============================================================================
+        progress(`◐ plan_queries — calling LLM to generate query plan…`);
         const planStart = Date.now();
         const plan = await (0, plan_queries_1.planQueries)({
             meshDir: opts.meshDir,
@@ -251,6 +265,7 @@ async function runArcheologist(opts) {
             fetchImpl: opts.fetchImpl,
         });
         researchQueryPlan = plan.queryPlan;
+        progress(`✓ plan_queries (${plan.llm.provider} ${plan.llm.model}) in ${Date.now() - planStart}ms — ${plan.llm.inputTokens} in / ${plan.llm.outputTokens} out tokens, ${plan.llm.attempts} attempt${plan.llm.attempts !== 1 ? 's' : ''} → web=${plan.queryPlan.web.length} arxiv=${plan.queryPlan.arxiv.length} patent=${plan.queryPlan.patent.length} community=${plan.queryPlan.community.length}`);
         totalInputTokens += plan.llm.inputTokens;
         totalOutputTokens += plan.llm.outputTokens;
         totalCostUsd += plan.llm.costUsd;
@@ -271,6 +286,7 @@ async function runArcheologist(opts) {
         // ----- four-provider search (pure_api each, parallel across providers) -----
         // We run all four providers concurrently with Promise.allSettled so a
         // provider-level failure (e.g. PatentsView outage) doesn't block the rest.
+        progress(`◐ search — tavily(${plan.queryPlan.web.length}) + arxiv(${plan.queryPlan.arxiv.length}) + hackernews(${plan.queryPlan.community.length}) + uspto(${usptoApiKey ? plan.queryPlan.patent.length : 'skipped'}) in parallel…`);
         const searchStart = Date.now();
         const [tavily, arxiv, hn, uspto] = await Promise.allSettled([
             (0, tavily_search_1.runTavilySearch)({ apiKey: tavilyApiKey, queries: plan.queryPlan.web, fetchImpl: opts.fetchImpl }),
@@ -330,9 +346,12 @@ async function runArcheologist(opts) {
         handleProvider(arxiv, 'arxiv_search', 'arxiv', 'GET /api/query');
         handleProvider(hn, 'hackernews_search', 'hackernews', 'GET /api/v1/search');
         handleProvider(uspto, 'uspto_search', 'uspto', 'POST /api/v1/patent/');
+        const fmtSettled = (s) => s.status === 'fulfilled' ? 'OK' : `FAIL(${s.reason instanceof Error ? s.reason.message.slice(0, 60) : String(s.reason).slice(0, 60)})`;
+        progress(`✓ search done in ${searchDuration}ms — tavily=${providerResultCounts.tavily}/${fmtSettled(tavily)} arxiv=${providerResultCounts.arxiv}/${fmtSettled(arxiv)} hn=${providerResultCounts.hackernews}/${fmtSettled(hn)} uspto=${providerResultCounts.uspto}/${fmtSettled(uspto)} (raw=${allProviderResults.length})`);
         // ----- dedupe_and_rank (pure) — first pass -----
         let dedupeStart = Date.now();
         rankedSources = (0, dedupe_and_rank_1.dedupeAndRank)({ results: allProviderResults, topN: 20 });
+        progress(`✓ dedupe_and_rank — ${rankedSources.length} ranked sources (top score=${rankedSources[0]?.salience_score?.toFixed(2) ?? 'n/a'})`);
         emitter.emit({
             node_kind: 'pure',
             node_name: 'dedupe_and_rank',
@@ -354,6 +373,7 @@ async function runArcheologist(opts) {
                     outputs_summary: `signals=${gapSignals.map(s => s.kind).join(',')}`,
                 },
             });
+            progress(`◐ gap_analysis — ${gapSignals.length} signal(s): ${gapSignals.map(s => s.kind).join(',')}`);
             const gapStart = Date.now();
             const gap = await (0, gap_analysis_1.runGapAnalysis)({
                 meshDir: opts.meshDir,
@@ -365,6 +385,7 @@ async function runArcheologist(opts) {
                 githubToken,
                 fetchImpl: opts.fetchImpl,
             });
+            progress(`✓ gap_analysis (${gap.llm.provider} ${gap.llm.model}) in ${Date.now() - gapStart}ms — ${gap.llm.inputTokens} in / ${gap.llm.outputTokens} out tokens → ${gap.followUpQueries.length} follow-up queries`);
             totalInputTokens += gap.llm.inputTokens;
             totalOutputTokens += gap.llm.outputTokens;
             totalCostUsd += gap.llm.costUsd;
@@ -436,6 +457,7 @@ async function runArcheologist(opts) {
         }
     } // end research-path else branch
     // ----- synthesize_report (LLM) -----
+    progress(`◐ synthesize_report — calling LLM (provider hint=${brief.llm_provider ?? 'anthropic'}, sources=${rankedSources.length}); hybrid routing will pick anthropic for synth if anthropic key is set…`);
     const synthStart = Date.now();
     const synthesis = await (0, synthesize_report_1.synthesizeReport)({
         meshDir: opts.meshDir,
@@ -454,6 +476,7 @@ async function runArcheologist(opts) {
     totalInputTokens += synthesis.llm.inputTokens;
     totalOutputTokens += synthesis.llm.outputTokens;
     totalCostUsd += synthesis.llm.costUsd;
+    progress(`✓ synthesize_report (${synthesis.llm.provider} ${synthesis.llm.model}) in ${Date.now() - synthStart}ms — ${synthesis.llm.inputTokens} in / ${synthesis.llm.outputTokens} out tokens, ${synthesis.llm.attempts} attempt${synthesis.llm.attempts !== 1 ? 's' : ''}`);
     emitter.emit({
         node_kind: 'llm',
         node_name: 'synthesize_report',
@@ -554,6 +577,8 @@ async function runArcheologist(opts) {
         }
         catch { /* leave on disk — non-fatal, just a tmpdir entry */ }
     }
+    const totalDurationMs = Date.now() - startedAt.getTime();
+    progress(`◆ done ${runId} in ${(totalDurationMs / 1000).toFixed(1)}s — ${totalInputTokens} in / ${totalOutputTokens} out tokens, $${roundUsd(totalCostUsd)} | sources=${rankedSources.length} conclusions=${synthesis.citation_stats.conclusion_count} recs=${synthesis.citation_stats.recommendation_count} | artifact=${artifactPath}`);
     return {
         run_id: runId,
         topic: brief.topic,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@maintainabilityai/research-runner",
-  "version": "0.1.4",
+  "version": "0.1.6",
   "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
   "license": "MIT",
   "author": "MaintainabilityAI",