@maintainabilityai/research-runner 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,16 +11,26 @@
11
11
  * their result types.
12
12
  *
13
13
  * Model names use GitHub Models namespacing — e.g. `openai/gpt-4o`,
14
- * `openai/gpt-4o-mini`, `openai/gpt-4.1`. The router (in
14
+ * `openai/gpt-4o-mini`, `openai/gpt-5-mini`. The router (in
15
15
  * llm-router.ts) maps internal logical model tiers (`plan` / `synth`) to
16
16
  * the concrete provider-specific id.
17
17
  */
18
18
  /**
19
19
  * Subset of GitHub Models model ids we use. Extend as new tiers land.
20
- * GitHub Models does not currently host Anthropic Claude — synth tier
21
- * uses `openai/gpt-4.1` (the "outperforms gpt-4o across the board" tier).
20
+ *
21
+ * GitHub Models has two relevant rate-limit tiers:
22
+ * - "high" — gpt-4o, gpt-4o-mini, gpt-4.1 etc. Per-request input is
23
+ * capped at ~8K tokens regardless of subscription. Fine for our
24
+ * plan-tier (small structured-JSON prompt).
25
+ * - "custom" — gpt-5 family, o-series. Per-request input scales to
26
+ * the model's advertised limit (200K for gpt-5-mini). Routed through
27
+ * Copilot-billed access, so the token-owner needs Copilot.
28
+ *
29
+ * Synth tier uses gpt-5-mini for the larger context window. Anthropic
30
+ * remains the preferred synth target when an Anthropic key is set (see
31
+ * llm-router.ts hybrid routing).
22
32
  */
23
- export type GitHubModelsModel = 'openai/gpt-4o' | 'openai/gpt-4o-mini' | 'openai/gpt-4.1' | 'openai/gpt-4.1-mini';
33
+ export type GitHubModelsModel = 'openai/gpt-4o' | 'openai/gpt-4o-mini' | 'openai/gpt-4.1' | 'openai/gpt-4.1-mini' | 'openai/gpt-5' | 'openai/gpt-5-mini' | 'openai/gpt-5-chat';
24
34
  export interface CallGitHubModelsOpts {
25
35
  /** Workflow GITHUB_TOKEN. The model server checks the `models:read` permission scope. */
26
36
  token: string;
@@ -12,7 +12,7 @@
12
12
  * their result types.
13
13
  *
14
14
  * Model names use GitHub Models namespacing — e.g. `openai/gpt-4o`,
15
- * `openai/gpt-4o-mini`, `openai/gpt-4.1`. The router (in
15
+ * `openai/gpt-4o-mini`, `openai/gpt-5-mini`. The router (in
16
16
  * llm-router.ts) maps internal logical model tiers (`plan` / `synth`) to
17
17
  * the concrete provider-specific id.
18
18
  */
@@ -25,8 +25,9 @@ async function callGitHubModels(opts) {
25
25
  }
26
26
  const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
27
27
  const endpoint = opts.endpoint ?? DEFAULT_ENDPOINT;
28
- // Synthesis prompts can produce 8K-token responses on gpt-4.1, which
29
- // routinely take 60–90s. Default to 120s so we don't abort mid-stream.
28
+ // Synthesis prompts can produce 8K-token responses (and the "custom"
29
+ // tier models like gpt-5-mini can return much more), which routinely
30
+ // take 60–120s. Default to 120s so we don't abort mid-stream.
30
31
  const timeoutMs = opts.timeoutMs ?? 120_000;
31
32
  const controller = new AbortController();
32
33
  const timer = setTimeout(() => controller.abort(), timeoutMs);
@@ -47,7 +48,12 @@ async function callGitHubModels(opts) {
47
48
  body: JSON.stringify({
48
49
  model: opts.model,
49
50
  messages,
50
- max_tokens: opts.maxTokens,
51
+ // `max_completion_tokens` is the current Chat Completions param;
52
+ // `max_tokens` is rejected outright by gpt-5 family models with
53
+ // HTTP 400. Verified gpt-4o-mini accepts the new name too, so we
54
+ // use one code path. Temperature stays optional (gpt-5 reasoning
55
+ // models may ignore it but accept it).
56
+ max_completion_tokens: opts.maxTokens,
51
57
  temperature: opts.temperature ?? 0,
52
58
  }),
53
59
  signal: controller.signal,
@@ -6,7 +6,15 @@ const github_models_client_1 = require("./github-models-client");
6
6
  /** Per-tier per-provider model id lookup. */
7
7
  const MODEL_BY_TIER = {
8
8
  plan: { anthropic: 'claude-haiku-4-5', githubModels: 'openai/gpt-4o-mini' },
9
- synth: { anthropic: 'claude-sonnet-4-6', githubModels: 'openai/gpt-4.1' },
9
+ // gpt-5-chat is in the "custom" GH-Models tier (200K input / 100K
10
+ // output) and is NON-reasoning — verified end-to-end with a live API
11
+ // call (reasoning_tokens=0, finish_reason=stop). Picked over gpt-5
12
+ // and gpt-5-mini because those are reasoning models that consume the
13
+ // completion budget on hidden chain-of-thought before producing any
14
+ // visible markdown — bad for the synthesis step where we need
15
+ // predictable structured output. Requires the caller's token to have
16
+ // Models access through a Copilot subscription (GMT path).
17
+ synth: { anthropic: 'claude-sonnet-4-6', githubModels: 'openai/gpt-5-chat' },
10
18
  };
11
19
  async function callLlm(opts) {
12
20
  const tierModels = MODEL_BY_TIER[opts.tier];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@maintainabilityai/research-runner",
3
- "version": "0.1.5",
3
+ "version": "0.1.7",
4
4
  "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
5
5
  "license": "MIT",
6
6
  "author": "MaintainabilityAI",