@maintainabilityai/research-runner 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,8 +23,12 @@ async function callAnthropic(opts) {
23
23
  throw new Error('ANTHROPIC_API_KEY missing — set the env var or pass apiKey directly');
24
24
  }
25
25
  const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
26
- // Match github-models default 8K-token synth responses can run 60–90s.
27
- const timeoutMs = opts.timeoutMs ?? 120_000;
26
+ // Sonnet at ~50–80 tok/s × 8K output = 100–160s, plus prompt-processing.
27
+ // 120s aborted real synth runs mid-stream; 240s gives headroom for
28
+ // slow days. (Real fix is streaming so the connection stays alive,
29
+ // but the cost/benefit isn't worth it yet — single-shot is fine
30
+ // until we hit 240s legitimately.)
31
+ const timeoutMs = opts.timeoutMs ?? 240_000;
28
32
  const controller = new AbortController();
29
33
  const timer = setTimeout(() => controller.abort(), timeoutMs);
30
34
  let response;
@@ -30,7 +30,7 @@
30
30
  * remains the preferred synth target when an Anthropic key is set (see
31
31
  * llm-router.ts hybrid routing).
32
32
  */
33
- export type GitHubModelsModel = 'openai/gpt-4o' | 'openai/gpt-4o-mini' | 'openai/gpt-4.1' | 'openai/gpt-4.1-mini' | 'openai/gpt-5' | 'openai/gpt-5-mini';
33
+ export type GitHubModelsModel = 'openai/gpt-4o' | 'openai/gpt-4o-mini' | 'openai/gpt-4.1' | 'openai/gpt-4.1-mini' | 'openai/gpt-5' | 'openai/gpt-5-mini' | 'openai/gpt-5-chat';
34
34
  export interface CallGitHubModelsOpts {
35
35
  /** Workflow GITHUB_TOKEN. The model server checks the `models:read` permission scope. */
36
36
  token: string;
@@ -25,10 +25,13 @@ async function callGitHubModels(opts) {
25
25
  }
26
26
  const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
27
27
  const endpoint = opts.endpoint ?? DEFAULT_ENDPOINT;
28
- // Synthesis prompts can produce 8K-token responses (and the "custom"
29
- // tier models like gpt-5-mini can return much more), which routinely
30
- // take 60120s. Default to 120s so we don't abort mid-stream.
31
- const timeoutMs = opts.timeoutMs ?? 120_000;
28
+ // Synthesis prompts can produce 8K+ output tokens (and "custom"-tier
29
+ // models like gpt-5-chat can return much more), which routinely take
30
+ // 100180s. Match the Anthropic client at 240s for headroom on slow
31
+ // days. (Real fix is streaming so the connection stays alive, but
32
+ // the cost/benefit isn't worth it yet — single-shot is fine until
33
+ // we hit 240s legitimately.)
34
+ const timeoutMs = opts.timeoutMs ?? 240_000;
32
35
  const controller = new AbortController();
33
36
  const timer = setTimeout(() => controller.abort(), timeoutMs);
34
37
  const messages = [];
@@ -48,7 +51,12 @@ async function callGitHubModels(opts) {
48
51
  body: JSON.stringify({
49
52
  model: opts.model,
50
53
  messages,
51
- max_tokens: opts.maxTokens,
54
+ // `max_completion_tokens` is the current Chat Completions param;
55
+ // `max_tokens` is rejected outright by gpt-5 family models with
56
+ // HTTP 400. Verified gpt-4o-mini accepts the new name too, so we
57
+ // use one code path. Temperature stays optional (gpt-5 reasoning
58
+ // models may ignore it but accept it).
59
+ max_completion_tokens: opts.maxTokens,
52
60
  temperature: opts.temperature ?? 0,
53
61
  }),
54
62
  signal: controller.signal,
@@ -6,11 +6,15 @@ const github_models_client_1 = require("./github-models-client");
6
6
  /** Per-tier per-provider model id lookup. */
7
7
  const MODEL_BY_TIER = {
8
8
  plan: { anthropic: 'claude-haiku-4-5', githubModels: 'openai/gpt-4o-mini' },
9
- // gpt-5-mini is in the "custom" GH-Models tier 200K input context,
10
- // 100K output, reasoning + tool-calling. Bypasses the 8K cap that
11
- // hits "high"-tier models like gpt-4.1. Requires the caller's token
12
- // to have Models access through a Copilot subscription (GMT path).
13
- synth: { anthropic: 'claude-sonnet-4-6', githubModels: 'openai/gpt-5-mini' },
9
+ // gpt-5-chat is in the "custom" GH-Models tier (200K input / 100K
10
+ // output) and is NON-reasoning verified end-to-end with a live API
11
+ // call (reasoning_tokens=0, finish_reason=stop). Picked over gpt-5
12
+ // and gpt-5-mini because those are reasoning models that consume the
13
+ // completion budget on hidden chain-of-thought before producing any
14
+ // visible markdown — bad for the synthesis step where we need
15
+ // predictable structured output. Requires the caller's token to have
16
+ // Models access through a Copilot subscription (GMT path).
17
+ synth: { anthropic: 'claude-sonnet-4-6', githubModels: 'openai/gpt-5-chat' },
14
18
  };
15
19
  async function callLlm(opts) {
16
20
  const tierModels = MODEL_BY_TIER[opts.tier];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@maintainabilityai/research-runner",
3
- "version": "0.1.6",
3
+ "version": "0.1.8",
4
4
  "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
5
5
  "license": "MIT",
6
6
  "author": "MaintainabilityAI",