@maintainabilityai/research-runner 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,17 +3,32 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.callLlm = callLlm;
4
4
  const anthropic_client_1 = require("./anthropic-client");
5
5
  const github_models_client_1 = require("./github-models-client");
6
- /** Per-tier per-provider model id lookup. */
6
+ /**
7
+ * Per-tier per-provider model id lookup. `githubModelsFallback` (when
8
+ * present) is tried automatically on 401/403/404 from the primary —
9
+ * how we let users with a GMT (Copilot-Pro user PAT) reach the
10
+ * "custom"-tier `gpt-5-chat` while users on a workflow bot token
11
+ * still work via the "low"-tier fallback.
12
+ */
7
13
  const MODEL_BY_TIER = {
8
- plan: { anthropic: 'claude-haiku-4-5', githubModels: 'openai/gpt-4o-mini' },
9
- // gpt-5-chat is in the "custom" GH-Models tier (200K input / 100K
10
- // output) and is NON-reasoning verified end-to-end with a live API
11
- // call (reasoning_tokens=0, finish_reason=stop). Picked over gpt-5
12
- // and gpt-5-mini because those are reasoning models that consume the
14
+ // Plan tier — small structured-JSON output, one shot, fits any cap.
15
+ // Try gpt-5-chat first (custom tier, only reachable with a Copilot-
16
+ // enrolled PAT like GMT). Fall back to gpt-4.1-mini (low tier, works
17
+ // on every token including the Actions bot). Empirically equivalent
18
+ // output quality on the V3-anchor prompt the upgrade is "if free,
19
+ // why not"; the fallback keeps everyone working.
20
+ plan: {
21
+ anthropic: 'claude-haiku-4-5',
22
+ githubModels: 'openai/gpt-5-chat',
23
+ githubModelsFallback: 'openai/gpt-4.1-mini',
24
+ },
25
+ // Synth tier — 200K context, non-reasoning (verified live with
26
+ // reasoning_tokens=0, finish_reason=stop). Picked over gpt-5 and
27
+ // gpt-5-mini because those are reasoning models that consume the
13
28
  // completion budget on hidden chain-of-thought before producing any
14
- // visible markdown bad for the synthesis step where we need
15
- // predictable structured output. Requires the caller's token to have
16
- // Models access through a Copilot subscription (GMT path).
29
+ // visible markdown. Synth needs predictable structured output. No
30
+ // fallback model here synth runs on the agent side now (Copilot
31
+ // Coding Agent / @claude), so the runner doesn't fire synth itself.
17
32
  synth: { anthropic: 'claude-sonnet-4-6', githubModels: 'openai/gpt-5-chat' },
18
33
  };
19
34
  /**
@@ -51,28 +66,54 @@ async function callAnthropicTier(opts, tierModels) {
51
66
  httpStatus: r.httpStatus,
52
67
  };
53
68
  }
69
+ /**
70
+ * GitHub Models returns 401 / 403 / 404 when the token can't reach the
71
+ * requested model (typically the workflow bot token hitting a "custom"-
72
+ * tier model like gpt-5-chat). These are recoverable via fallback;
73
+ * everything else (timeouts, 5xx, 413 cap, parse errors) should
74
+ * propagate.
75
+ */
76
+ function isModelAccessError(err) {
77
+ if (!(err instanceof Error)) {
78
+ return false;
79
+ }
80
+ return /GitHub Models returned 40[134]:/.test(err.message);
81
+ }
54
82
  async function callGitHubModelsTier(opts, tierModels) {
55
83
  if (!opts.githubToken) {
56
84
  throw new Error(`callLlm: provider=github-models requires githubToken (set GITHUB_TOKEN; workflow needs \`permissions: models: read\`).`);
57
85
  }
58
- const r = await (0, github_models_client_1.callGitHubModels)({
59
- token: opts.githubToken,
60
- model: tierModels.githubModels,
61
- system: opts.system,
62
- prompt: opts.prompt,
63
- maxTokens: opts.maxTokens,
64
- temperature: opts.temperature,
65
- fetchImpl: opts.fetchImpl,
66
- });
67
- return {
68
- provider: 'github-models',
69
- model: tierModels.githubModels,
70
- text: r.text,
71
- inputTokens: r.inputTokens,
72
- outputTokens: r.outputTokens,
73
- costUsd: r.costUsd,
74
- httpStatus: r.httpStatus,
86
+ const callOne = async (model) => {
87
+ const r = await (0, github_models_client_1.callGitHubModels)({
88
+ token: opts.githubToken,
89
+ model,
90
+ system: opts.system,
91
+ prompt: opts.prompt,
92
+ maxTokens: opts.maxTokens,
93
+ temperature: opts.temperature,
94
+ fetchImpl: opts.fetchImpl,
95
+ });
96
+ return {
97
+ provider: 'github-models',
98
+ model,
99
+ text: r.text,
100
+ inputTokens: r.inputTokens,
101
+ outputTokens: r.outputTokens,
102
+ costUsd: r.costUsd,
103
+ httpStatus: r.httpStatus,
104
+ };
75
105
  };
106
+ try {
107
+ return await callOne(tierModels.githubModels);
108
+ }
109
+ catch (err) {
110
+ if (tierModels.githubModelsFallback && isModelAccessError(err)) {
111
+ const cause = err instanceof Error ? err.message : String(err);
112
+ process.stderr.write(`[research-runner] ⚠ github-models ${tierModels.githubModels} access-denied, falling back to ${tierModels.githubModelsFallback}. Cause: ${cause.slice(0, 200)}\n`);
113
+ return await callOne(tierModels.githubModelsFallback);
114
+ }
115
+ throw err;
116
+ }
76
117
  }
77
118
  async function callLlm(opts) {
78
119
  const tierModels = MODEL_BY_TIER[opts.tier];
@@ -515,7 +515,9 @@ async function runArcheologist(opts) {
515
515
  provider: brief.llm_provider,
516
516
  // plan_queries is the only LLM hop we run now (synth handed off
517
517
  // to the assigned agent). Surface that model in the Hatter's Tag.
518
- model: 'openai/gpt-4o-mini',
518
+ // Plan-tier primary (router falls back to gpt-4.1-mini on access denial).
519
+ // Synth runs on the agent side, so this is the only LLM hop the runner makes.
520
+ model: 'openai/gpt-5-chat',
519
521
  input_tokens: totalInputTokens,
520
522
  output_tokens: totalOutputTokens,
521
523
  cost_usd: roundUsd(totalCostUsd),
@@ -35,7 +35,7 @@ async function runArxivSearch(opts) {
35
35
  fromQuery: query,
36
36
  title: r.title,
37
37
  url: r.abstractUrl,
38
- content: r.summary.slice(0, 500),
38
+ content: r.summary.slice(0, 2000),
39
39
  // Position-derived score: arXiv returns by relevance, decay 0.9 → 0.5.
40
40
  score: Math.max(0.5, 0.9 - j * 0.1),
41
41
  publishedDate: r.published || undefined,
@@ -61,7 +61,7 @@ function dedupeAndRank(opts) {
61
61
  if (r.score > existing.scoreSum / existing.occurrences) {
62
62
  existing.title = r.title || existing.title;
63
63
  if (r.content) {
64
- existing.excerpt = r.content.slice(0, 500);
64
+ existing.excerpt = r.content.slice(0, 2000);
65
65
  }
66
66
  }
67
67
  if (!existing.publishedAt && r.publishedDate) {
@@ -76,7 +76,7 @@ function dedupeAndRank(opts) {
76
76
  canonicalUrl: canonical,
77
77
  provider: r.provider,
78
78
  title: r.title || canonical,
79
- excerpt: (r.content || '').slice(0, 500),
79
+ excerpt: (r.content || '').slice(0, 2000),
80
80
  publishedAt: r.publishedDate,
81
81
  authors: r.authors,
82
82
  scoreSum: r.score,
@@ -134,7 +134,7 @@ function dedupeAndRank(opts) {
134
134
  url: entry.aggregated.canonicalUrl,
135
135
  retrieved_at: retrievedAt,
136
136
  salience_score: roundTo(entry.composite, 4),
137
- excerpt: entry.aggregated.excerpt.slice(0, 500),
137
+ excerpt: entry.aggregated.excerpt.slice(0, 2000),
138
138
  ...(entry.aggregated.publishedAt ? { published_at: entry.aggregated.publishedAt } : {}),
139
139
  ...(entry.aggregated.authors && entry.aggregated.authors.length > 0 ? { authors: entry.aggregated.authors } : {}),
140
140
  }));
@@ -2,12 +2,14 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.formatForHuman = formatForHuman;
4
4
  /**
5
- * Truncates an excerpt for display in the issue comment. Sources can
6
- * carry up to 500 chars; we shorten to ~280 so the issue stays readable.
5
+ * Normalise the excerpt for blockquote display collapse whitespace
6
+ * runs so newlines in arXiv abstracts don't break the markdown quote.
7
+ * No length cap here: the schema's 2000-char limit is what the agent
8
+ * downstream sees, and a downstream synthesis or PRD agent needs the
9
+ * full excerpt to write faithful citations.
7
10
  */
8
- function shortExcerpt(s, n = 280) {
9
- const cleaned = s.replace(/\s+/g, ' ').trim();
10
- return cleaned.length <= n ? cleaned : cleaned.slice(0, n - 1) + '…';
11
+ function normaliseExcerpt(s) {
12
+ return s.replace(/\s+/g, ' ').trim();
11
13
  }
12
14
  function meshSummary(meshContext) {
13
15
  if (meshContext.bar) {
@@ -51,7 +53,7 @@ function providerSection(label, emoji, provider, sources, totalCount) {
51
53
  // The earlier form **[`S1`] [Title](url)** broke GitHub's markdown
52
54
  // parser (it tried to interpret the brackets as a reference link).
53
55
  lines.push(`- \`${s.id}\` **[${s.title}](${s.url})** — score ${s.salience_score.toFixed(2)}${date}${authors}`);
54
- lines.push(` > ${shortExcerpt(s.excerpt)}`);
56
+ lines.push(` > ${normaliseExcerpt(s.excerpt)}`);
55
57
  }
56
58
  lines.push('');
57
59
  return lines;
@@ -46,7 +46,7 @@ async function runUsptoSearch(opts) {
46
46
  fromQuery: query,
47
47
  title: r.title,
48
48
  url: r.url,
49
- content: r.abstract.slice(0, 500),
49
+ content: r.abstract.slice(0, 2000),
50
50
  score: Math.max(0.4, 0.85 - j * 0.1),
51
51
  publishedDate: r.grantedAt || undefined,
52
52
  authors: r.inventors,
@@ -15,7 +15,10 @@ export declare const RankedSource: z.ZodObject<{
15
15
  retrieved_at: z.ZodEffects<z.ZodString, string, string>;
16
16
  /** 0.0 - 1.0, higher = more relevant. Computed by dedupe_and_rank. */
17
17
  salience_score: z.ZodNumber;
18
- /** ≤500-char excerpt the synthesis node may quote directly. */
18
+ /** ≤2000-char excerpt the synthesis agent quotes from. Sized for the
19
+ * current flow where the agent (Copilot Coding Agent / @claude) reads
20
+ * the issue comment — they have plenty of context budget, and 500
21
+ * routinely truncated arXiv abstracts and patent summaries mid-thought. */
19
22
  excerpt: z.ZodString;
20
23
  /** Optional: pub date if the source has one (papers, news, patents). */
21
24
  published_at: z.ZodOptional<z.ZodEffects<z.ZodString, string, string>>;
@@ -52,7 +55,10 @@ export declare const RankedSourceList: z.ZodArray<z.ZodObject<{
52
55
  retrieved_at: z.ZodEffects<z.ZodString, string, string>;
53
56
  /** 0.0 - 1.0, higher = more relevant. Computed by dedupe_and_rank. */
54
57
  salience_score: z.ZodNumber;
55
- /** ≤500-char excerpt the synthesis node may quote directly. */
58
+ /** ≤2000-char excerpt the synthesis agent quotes from. Sized for the
59
+ * current flow where the agent (Copilot Coding Agent / @claude) reads
60
+ * the issue comment — they have plenty of context budget, and 500
61
+ * routinely truncated arXiv abstracts and patent summaries mid-thought. */
56
62
  excerpt: z.ZodString;
57
63
  /** Optional: pub date if the source has one (papers, news, patents). */
58
64
  published_at: z.ZodOptional<z.ZodEffects<z.ZodString, string, string>>;
@@ -19,8 +19,11 @@ exports.RankedSource = zod_1.z.object({
19
19
  retrieved_at: primitives_1.IsoTimestamp,
20
20
  /** 0.0 - 1.0, higher = more relevant. Computed by dedupe_and_rank. */
21
21
  salience_score: zod_1.z.number().min(0).max(1),
22
- /** ≤500-char excerpt the synthesis node may quote directly. */
23
- excerpt: zod_1.z.string().max(500),
22
+ /** ≤2000-char excerpt the synthesis agent quotes from. Sized for the
23
+ * current flow where the agent (Copilot Coding Agent / @claude) reads
24
+ * the issue comment — they have plenty of context budget, and 500
25
+ * routinely truncated arXiv abstracts and patent summaries mid-thought. */
26
+ excerpt: zod_1.z.string().max(2000),
24
27
  /** Optional: pub date if the source has one (papers, news, patents). */
25
28
  published_at: primitives_1.IsoTimestamp.optional(),
26
29
  /** Optional: authors (arxiv / news). */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@maintainabilityai/research-runner",
3
- "version": "0.1.12",
3
+ "version": "0.1.14",
4
4
  "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
5
5
  "license": "MIT",
6
6
  "author": "MaintainabilityAI",