@oh-my-pi/pi-coding-agent 16.1.1 → 16.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/CHANGELOG.md +50 -0
  2. package/dist/cli.js +3090 -3115
  3. package/dist/types/cli/bench-cli.d.ts +2 -1
  4. package/dist/types/config/model-resolver.d.ts +3 -3
  5. package/dist/types/config/settings-schema.d.ts +1 -1
  6. package/dist/types/main.d.ts +2 -0
  7. package/dist/types/mnemopi/embed-client.d.ts +70 -0
  8. package/dist/types/mnemopi/embed-protocol.d.ts +52 -0
  9. package/dist/types/mnemopi/embed-worker.d.ts +12 -0
  10. package/dist/types/mnemopi/state.d.ts +9 -1
  11. package/dist/types/modes/components/assistant-message.d.ts +12 -0
  12. package/dist/types/modes/components/welcome.d.ts +1 -1
  13. package/dist/types/sdk.d.ts +19 -2
  14. package/dist/types/session/agent-storage.d.ts +2 -0
  15. package/dist/types/session/auth-broker-config.d.ts +34 -6
  16. package/dist/types/session/history-storage.d.ts +1 -1
  17. package/dist/types/system-prompt.d.ts +5 -1
  18. package/dist/types/task/executor.d.ts +10 -0
  19. package/dist/types/tools/find.d.ts +0 -2
  20. package/dist/types/tools/image-gen.d.ts +2 -2
  21. package/dist/types/tools/search.d.ts +3 -3
  22. package/dist/types/utils/image-loading.d.ts +1 -1
  23. package/dist/types/utils/ipc.d.ts +22 -0
  24. package/dist/types/web/search/providers/perplexity-auth.d.ts +37 -0
  25. package/package.json +12 -12
  26. package/scripts/measure-prompt-tokens.ts +63 -0
  27. package/src/cli/bench-cli.ts +64 -3
  28. package/src/cli/startup-cwd.ts +3 -13
  29. package/src/cli.ts +8 -0
  30. package/src/commands/token.ts +52 -33
  31. package/src/config/append-only-context-mode.ts +45 -0
  32. package/src/config/model-discovery.ts +3 -0
  33. package/src/config/model-registry.ts +21 -3
  34. package/src/config/model-resolver.ts +31 -8
  35. package/src/config/settings-schema.ts +1 -1
  36. package/src/cursor.ts +1 -1
  37. package/src/debug/raw-sse-buffer.ts +31 -10
  38. package/src/discovery/builtin-rules/ts-no-return-type.md +0 -1
  39. package/src/eval/py/prelude.py +1 -1
  40. package/src/export/html/tool-views.generated.js +1 -1
  41. package/src/extensibility/extensions/runner.ts +8 -2
  42. package/src/internal-urls/docs-index.generated.txt +1 -1
  43. package/src/lsp/client.ts +24 -0
  44. package/src/main.ts +29 -9
  45. package/src/mnemopi/backend.ts +49 -3
  46. package/src/mnemopi/embed-client.ts +401 -0
  47. package/src/mnemopi/embed-protocol.ts +35 -0
  48. package/src/mnemopi/embed-worker.ts +113 -0
  49. package/src/mnemopi/state.ts +29 -1
  50. package/src/modes/components/assistant-message.ts +86 -0
  51. package/src/modes/components/custom-editor.ts +1 -1
  52. package/src/modes/components/model-selector.ts +2 -2
  53. package/src/modes/components/tips.txt +2 -1
  54. package/src/modes/components/welcome.ts +87 -9
  55. package/src/modes/controllers/event-controller.ts +9 -1
  56. package/src/modes/controllers/selector-controller.ts +2 -2
  57. package/src/modes/theme/theme.ts +69 -0
  58. package/src/prompts/system/personalities/default.md +8 -16
  59. package/src/prompts/system/system-prompt.md +101 -115
  60. package/src/prompts/tools/ast-edit.md +10 -12
  61. package/src/prompts/tools/ast-grep.md +14 -18
  62. package/src/prompts/tools/bash.md +19 -21
  63. package/src/prompts/tools/browser.md +24 -24
  64. package/src/prompts/tools/checkpoint.md +0 -1
  65. package/src/prompts/tools/debug.md +11 -15
  66. package/src/prompts/tools/eval.md +27 -27
  67. package/src/prompts/tools/find.md +6 -10
  68. package/src/prompts/tools/github.md +11 -15
  69. package/src/prompts/tools/goal.md +0 -7
  70. package/src/prompts/tools/inspect-image.md +0 -1
  71. package/src/prompts/tools/irc.md +15 -24
  72. package/src/prompts/tools/job.md +5 -8
  73. package/src/prompts/tools/learn.md +2 -2
  74. package/src/prompts/tools/lsp.md +27 -30
  75. package/src/prompts/tools/manage-skill.md +4 -4
  76. package/src/prompts/tools/read.md +21 -23
  77. package/src/prompts/tools/replace.md +0 -1
  78. package/src/prompts/tools/resolve.md +4 -9
  79. package/src/prompts/tools/rewind.md +1 -1
  80. package/src/prompts/tools/search.md +8 -10
  81. package/src/prompts/tools/task.md +33 -38
  82. package/src/prompts/tools/todo.md +14 -18
  83. package/src/prompts/tools/web-search.md +0 -4
  84. package/src/prompts/tools/write.md +1 -1
  85. package/src/sdk.ts +53 -102
  86. package/src/session/agent-session.ts +25 -2
  87. package/src/session/agent-storage.ts +14 -0
  88. package/src/session/auth-broker-config.ts +37 -76
  89. package/src/session/history-storage.ts +13 -1
  90. package/src/session/session-history-format.ts +1 -1
  91. package/src/session/session-manager.ts +33 -6
  92. package/src/stt/asr-client.ts +2 -7
  93. package/src/system-prompt.ts +28 -8
  94. package/src/task/executor.ts +57 -0
  95. package/src/task/index.ts +15 -1
  96. package/src/tiny/title-client.ts +2 -7
  97. package/src/tools/browser.ts +1 -1
  98. package/src/tools/eval.ts +1 -1
  99. package/src/tools/find.ts +4 -17
  100. package/src/tools/image-gen.ts +4 -8
  101. package/src/tools/memory-edit.ts +1 -1
  102. package/src/tools/render-utils.ts +4 -1
  103. package/src/tools/search.ts +5 -5
  104. package/src/tts/tts-client.ts +2 -7
  105. package/src/utils/image-loading.ts +12 -2
  106. package/src/utils/ipc.ts +38 -0
  107. package/src/web/search/providers/perplexity-auth.ts +133 -0
  108. package/src/web/search/providers/perplexity.ts +2 -125
@@ -11,7 +11,7 @@ import type {
11
11
  SimpleStreamOptions,
12
12
  } from "@oh-my-pi/pi-ai";
13
13
  import { streamSimple } from "@oh-my-pi/pi-ai";
14
- import type { CanonicalModelVariant } from "@oh-my-pi/pi-catalog/identity";
14
+ import { buildModelProviderPriorityRank, type CanonicalModelVariant } from "@oh-my-pi/pi-catalog/identity";
15
15
  import { replaceTabs, truncateToWidth } from "@oh-my-pi/pi-tui";
16
16
  import { formatDuration, getProjectDir } from "@oh-my-pi/pi-utils";
17
17
  import chalk from "chalk";
@@ -50,6 +50,7 @@ export interface BenchModelRegistry {
50
50
  resolveCanonicalModel?(canonicalId: string, options?: CanonicalModelQueryOptions): Model<Api> | undefined;
51
51
  getCanonicalVariants?(canonicalId: string, options?: CanonicalModelQueryOptions): CanonicalModelVariant[];
52
52
  getCanonicalId?(model: Model<Api>): string | undefined;
53
+ hasConfiguredAuth?(model: Model<Api>): boolean;
53
54
  }
54
55
 
55
56
  export interface BenchRuntime {
@@ -346,6 +347,56 @@ interface BenchTarget {
346
347
  thinking: ResolvedThinkingLevel | undefined;
347
348
  }
348
349
 
350
+ /** Highest-priority provider variant: native/OAuth transports outrank mirrors. */
351
+ function pickHighestPriorityProvider(models: Model<Api>[], providerOrder?: readonly string[]): Model<Api> | undefined {
352
+ if (models.length <= 1) return models[0];
353
+ const priority = buildModelProviderPriorityRank(providerOrder);
354
+ return [...models].sort((a, b) => {
355
+ const aRank = priority.get(a.provider.toLowerCase()) ?? Number.POSITIVE_INFINITY;
356
+ const bRank = priority.get(b.provider.toLowerCase()) ?? Number.POSITIVE_INFINITY;
357
+ return aRank - bRank;
358
+ })[0];
359
+ }
360
+
361
+ /**
362
+ * Bench resolves selectors against the entire catalog (credentials are ignored),
363
+ * so an ambiguous id shared by several providers can land on one the user never
364
+ * authenticated. For non-pinned selectors, redirect to an equivalent model under
365
+ * a provider with configured auth. An explicit `provider/id` selector is honored
366
+ * verbatim — even unauthenticated — so forced benchmarking keeps working.
367
+ */
368
+ function resolveAuthenticatedAlternative(
369
+ selector: string,
370
+ model: Model<Api>,
371
+ modelRegistry: BenchModelRegistry,
372
+ providerOrder?: readonly string[],
373
+ ): Model<Api> | undefined {
374
+ if (!modelRegistry.hasConfiguredAuth) return undefined;
375
+ // A pinned `provider/...` selector is authoritative; never redirect off it.
376
+ if (selector.trim().toLowerCase().startsWith(`${model.provider.toLowerCase()}/`)) return undefined;
377
+ if (modelRegistry.hasConfiguredAuth(model)) return undefined;
378
+
379
+ const seen = new Set<string>();
380
+ const authenticated: Model<Api>[] = [];
381
+ const consider = (candidate: Model<Api>): void => {
382
+ const key = `${candidate.provider}/${candidate.id}`;
383
+ if (seen.has(key)) return;
384
+ seen.add(key);
385
+ if (modelRegistry.hasConfiguredAuth?.(candidate)) authenticated.push(candidate);
386
+ };
387
+ // Canonical variants link the same logical model across providers even when
388
+ // ids differ (e.g. fireworks `gpt-oss-20b` <-> openrouter `openai/gpt-oss-20b`).
389
+ const canonicalId = modelRegistry.getCanonicalId?.(model);
390
+ if (canonicalId) {
391
+ for (const variant of modelRegistry.getCanonicalVariants?.(canonicalId) ?? []) consider(variant.model);
392
+ }
393
+ // Same-id fallback for entries outside the canonical index.
394
+ for (const candidate of modelRegistry.getAll()) {
395
+ if (candidate.id === model.id) consider(candidate);
396
+ }
397
+ return pickHighestPriorityProvider(authenticated, providerOrder);
398
+ }
399
+
349
400
  function resolveBenchModels(
350
401
  selectors: string[],
351
402
  modelRegistry: BenchModelRegistry,
@@ -366,10 +417,20 @@ function resolveBenchModels(
366
417
  continue;
367
418
  }
368
419
  if (result.warning) writeStderr(`${chalk.yellow(`Warning: ${result.warning}`)}\n`);
420
+ let model = result.model;
421
+ const authenticated = resolveAuthenticatedAlternative(selector, model, modelRegistry, preferences.providerOrder);
422
+ if (authenticated) {
423
+ writeStderr(
424
+ `${chalk.yellow(
425
+ `Warning: no credentials for "${model.provider}"; benchmarking ${formatModelString(authenticated)} instead. Pin "${formatModelString(model)}" to force it.`,
426
+ )}\n`,
427
+ );
428
+ model = authenticated;
429
+ }
369
430
  resolved.push({
370
431
  selector,
371
- model: result.model,
372
- thinking: resolveThinkingLevelForModel(result.model, result.thinkingLevel),
432
+ model,
433
+ thinking: resolveThinkingLevelForModel(model, result.thinkingLevel),
373
434
  });
374
435
  }
375
436
  if (errors.length > 0) {
@@ -1,7 +1,6 @@
1
- import * as fs from "node:fs/promises";
2
1
  import * as os from "node:os";
3
2
  import * as path from "node:path";
4
- import { getProjectDir, normalizePathForComparison, setProjectDir } from "@oh-my-pi/pi-utils";
3
+ import { directoryExists, getProjectDir, normalizePathForComparison, setProjectDir } from "@oh-my-pi/pi-utils";
5
4
  import type { Args } from "./args";
6
5
 
7
6
  async function maybeAutoChdir(parsed: Args): Promise<void> {
@@ -22,19 +21,10 @@ async function maybeAutoChdir(parsed: Args): Promise<void> {
22
21
  return;
23
22
  }
24
23
 
25
- const isDirectory = async (p: string) => {
26
- try {
27
- const s = await fs.stat(p);
28
- return s.isDirectory();
29
- } catch {
30
- return false;
31
- }
32
- };
33
-
34
24
  const candidates = [path.join(home, "tmp"), "/tmp", "/var/tmp"];
35
25
  for (const candidate of candidates) {
36
26
  try {
37
- if (!(await isDirectory(candidate))) {
27
+ if (!(await directoryExists(candidate))) {
38
28
  continue;
39
29
  }
40
30
  setProjectDir(candidate);
@@ -46,7 +36,7 @@ async function maybeAutoChdir(parsed: Args): Promise<void> {
46
36
 
47
37
  try {
48
38
  const fallback = os.tmpdir();
49
- if (fallback && normalizePath(fallback) !== cwd && (await isDirectory(fallback))) {
39
+ if (fallback && normalizePath(fallback) !== cwd && (await directoryExists(fallback))) {
50
40
  setProjectDir(fallback);
51
41
  }
52
42
  } catch {
package/src/cli.ts CHANGED
@@ -68,6 +68,7 @@ async function runSmokeTest(): Promise<void> {
68
68
  const { smokeTestTinyTitleWorker } = await import("./tiny/title-client");
69
69
  const { smokeTestSttWorker } = await import("./stt/asr-client");
70
70
  const { smokeTestTtsWorker } = await import("./tts/tts-client");
71
+ const { smokeTestMnemopiEmbedWorker } = await import("./mnemopi/embed-client");
71
72
  const { smokeTestJsEvalWorker } = await import("./eval/js/context-manager");
72
73
  await smokeTestSyncWorker();
73
74
 
@@ -87,6 +88,7 @@ async function runSmokeTest(): Promise<void> {
87
88
  await smokeTestSttWorker();
88
89
  await smokeTestJsEvalWorker();
89
90
  await smokeTestTtsWorker();
91
+ await smokeTestMnemopiEmbedWorker();
90
92
  process.stdout.write("smoke-test: ok\n");
91
93
  }
92
94
 
@@ -96,6 +98,7 @@ const TAB_WORKER_ARG = "__omp_worker_tab";
96
98
  const JS_EVAL_WORKER_ARG = "__omp_worker_js_eval";
97
99
  const STT_WORKER_ARG = "__omp_worker_stt";
98
100
  const TTS_WORKER_ARG = "__omp_worker_tts";
101
+ const MNEMOPI_EMBED_WORKER_ARG = "__omp_worker_mnemopi_embed";
99
102
 
100
103
  async function runWorkerEntrypoint(arg: string | undefined): Promise<boolean> {
101
104
  if (arg === TINY_WORKER_ARG) {
@@ -151,6 +154,11 @@ async function runWorkerEntrypoint(arg: string | undefined): Promise<boolean> {
151
154
  await runIpcSubprocessWorker(startTtsWorker);
152
155
  return true;
153
156
  }
157
+ if (arg === MNEMOPI_EMBED_WORKER_ARG) {
158
+ const { startMnemopiEmbedWorker } = await import("./mnemopi/embed-worker");
159
+ await runIpcSubprocessWorker(startMnemopiEmbedWorker);
160
+ return true;
161
+ }
154
162
  return false;
155
163
  }
156
164
 
@@ -7,6 +7,7 @@ import { Args, Command, Flags } from "@oh-my-pi/pi-utils/cli";
7
7
  import chalk from "chalk";
8
8
  import { isAuthenticated, ModelRegistry } from "../config/model-registry";
9
9
  import { discoverAuthStorage } from "../sdk";
10
+ import { getAvailableAuthMethods } from "../web/search/providers/perplexity-auth";
10
11
 
11
12
  export default class Token extends Command {
12
13
  static description = "Get the API key or OAuth token for a provider";
@@ -41,49 +42,67 @@ export default class Token extends Command {
41
42
  const provider = providerName.toLowerCase();
42
43
 
43
44
  const authStorage = await discoverAuthStorage();
44
- const modelRegistry = new ModelRegistry(authStorage);
45
+ try {
46
+ const modelRegistry = new ModelRegistry(authStorage);
45
47
 
46
- // Resolve the API key / token
47
- const apiKey = await modelRegistry.getApiKeyForProvider(provider, undefined, {
48
- forceRefresh: flags["force-refresh"],
49
- });
48
+ // Resolve the API key / token
49
+ let apiKey: string | undefined;
50
50
 
51
- if (!isAuthenticated(apiKey)) {
52
- // Find all active/configured providers
53
- const activeProviders = new Set<string>();
54
- for (const p of PROVIDER_REGISTRY) {
55
- if (authStorage.hasAuth(p.id)) {
56
- activeProviders.add(p.id);
51
+ if (provider === "perplexity") {
52
+ const methods = await getAvailableAuthMethods(authStorage, undefined, {
53
+ forceRefresh: flags["force-refresh"],
54
+ });
55
+ const printable = methods.find(m => m.type === "oauth" || m.type === "api_key");
56
+ if (printable) {
57
+ apiKey = printable.type === "oauth" ? printable.access.accessToken : printable.apiKey;
57
58
  }
58
59
  }
59
- const all = authStorage.getAll();
60
- for (const p in all) {
61
- if (authStorage.hasAuth(p)) {
62
- activeProviders.add(p);
63
- }
60
+
61
+ if (!apiKey) {
62
+ apiKey = await modelRegistry.getApiKeyForProvider(provider, undefined, {
63
+ forceRefresh: flags["force-refresh"],
64
+ });
64
65
  }
65
66
 
66
- const msg = `No active credential found for provider "${providerName}".`;
67
- process.stderr.write(`${chalk.red(msg)}\n`);
68
- if (activeProviders.size > 0) {
69
- process.stderr.write(`Configured providers: ${Array.from(activeProviders).sort().join(", ")}\n`);
67
+ if (!isAuthenticated(apiKey)) {
68
+ // Find all active/configured providers
69
+ const activeProviders = new Set<string>();
70
+ for (const p of PROVIDER_REGISTRY) {
71
+ if (authStorage.hasAuth(p.id)) {
72
+ activeProviders.add(p.id);
73
+ }
74
+ }
75
+ const all = authStorage.getAll();
76
+ for (const p in all) {
77
+ if (authStorage.hasAuth(p)) {
78
+ activeProviders.add(p);
79
+ }
80
+ }
81
+
82
+ const msg = `No active credential found for provider "${providerName}".`;
83
+ process.stderr.write(`${chalk.red(msg)}\n`);
84
+ if (activeProviders.size > 0) {
85
+ process.stderr.write(`Configured providers: ${Array.from(activeProviders).sort().join(", ")}\n`);
86
+ }
87
+ process.exitCode = 1;
88
+ return;
70
89
  }
71
- process.exitCode = 1;
72
- return;
73
- }
74
90
 
75
- if (!flags.raw) {
76
- try {
77
- const parsed = JSON.parse(apiKey);
78
- if (parsed && typeof parsed === "object" && typeof parsed.token === "string") {
79
- process.stdout.write(`${parsed.token}\n`);
80
- return;
91
+ if (!flags.raw) {
92
+ try {
93
+ const parsed = JSON.parse(apiKey);
94
+ if (parsed && typeof parsed === "object" && typeof parsed.token === "string") {
95
+ process.stdout.write(`${parsed.token}\n`);
96
+ return;
97
+ }
98
+ } catch {
99
+ // Not a JSON string, print as-is
81
100
  }
82
- } catch {
83
- // Not a JSON string, print as-is
84
101
  }
85
- }
86
102
 
87
- process.stdout.write(`${apiKey}\n`);
103
+ process.stdout.write(`${apiKey}\n`);
104
+ } finally {
105
+ authStorage.close();
106
+ }
88
107
  }
89
108
  }
@@ -8,10 +8,55 @@ export interface AppendOnlyContextModel {
8
8
  compatConfig?: object;
9
9
  }
10
10
 
11
+ /**
12
+ * Local model servers (Ollama, LM Studio, llama.cpp, vLLM, sglang, …) all
13
+ * rely on llama.cpp-style prefix KV-cache reuse: identical leading tokens
14
+ * skip re-prefill on the next request. Append-only mode is the only way to
15
+ * guarantee byte-stable bytes across turns, since the live system prompt,
16
+ * tool catalogue, and message log all flow through fresh allocations every
17
+ * step (see `agent-loop.ts` `streamAssistantResponse` fallback path).
18
+ */
19
+ const LOCAL_INFERENCE_PROVIDERS = new Set(["ollama", "ollama-cloud", "lm-studio", "llama.cpp"]);
20
+
21
+ /** True when `baseUrl` resolves to a loopback or RFC1918 host — covers
22
+ * llama.cpp/vLLM/sglang servers registered under a user-defined provider id
23
+ * via `models.yaml`. Built-in local provider ids (`ollama`, `lm-studio`,
24
+ * `llama.cpp`) are already handled by `LOCAL_INFERENCE_PROVIDERS`.
25
+ * Substring match on the parsed hostname only; ports, paths, and unparseable
26
+ * URLs return false.
27
+ */
28
+ function hasLocalLoopbackBaseUrl(baseUrl: string | undefined): boolean {
29
+ if (!baseUrl) return false;
30
+ let hostname: string;
31
+ try {
32
+ hostname = new URL(baseUrl).hostname.toLowerCase();
33
+ } catch {
34
+ return false;
35
+ }
36
+ if (
37
+ hostname === "localhost" ||
38
+ hostname === "127.0.0.1" ||
39
+ hostname === "0.0.0.0" ||
40
+ hostname === "::1" ||
41
+ hostname === "[::1]"
42
+ ) {
43
+ return true;
44
+ }
45
+ // RFC1918 private IPv4 ranges.
46
+ if (/^10\./.test(hostname)) return true;
47
+ if (/^192\.168\./.test(hostname)) return true;
48
+ if (/^172\.(1[6-9]|2[0-9]|3[01])\./.test(hostname)) return true;
49
+ // Common ".local" mDNS hostnames used for home-LAN llama.cpp boxes.
50
+ if (hostname.endsWith(".local")) return true;
51
+ return false;
52
+ }
53
+
11
54
  function shouldAutoEnableAppendOnlyContext(model: AppendOnlyContextModel | null | undefined): boolean {
12
55
  if (!model) return false;
13
56
  if (model.provider === "deepseek") return true;
57
+ if (LOCAL_INFERENCE_PROVIDERS.has(model.provider)) return true;
14
58
  if (hostMatchesUrl(model.baseUrl, "xiaomi")) return true;
59
+ if (hasLocalLoopbackBaseUrl(model.baseUrl)) return true;
15
60
  return !!model.compatConfig && "supportsStore" in model.compatConfig && model.compatConfig.supportsStore === true;
16
61
  }
17
62
 
@@ -275,6 +275,7 @@ export async function discoverOllamaModels(
275
275
  baseUrl: `${endpoint}/v1`,
276
276
  reasoning: metadata?.reasoning ?? false,
277
277
  input: metadata?.input ?? ["text"],
278
+ imageInputDecoder: "stb",
278
279
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
279
280
  contextWindow: metadata?.contextWindow ?? 128000,
280
281
  maxTokens: Math.min(metadata?.contextWindow ?? Number.POSITIVE_INFINITY, DISCOVERY_DEFAULT_MAX_TOKENS),
@@ -352,6 +353,7 @@ export async function discoverLlamaCppModels(
352
353
  baseUrl,
353
354
  reasoning: false,
354
355
  input: serverMetadata?.input ?? ["text"],
356
+ imageInputDecoder: "stb",
355
357
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
356
358
  contextWindow: serverMetadata?.contextWindow ?? 128000,
357
359
  maxTokens: Math.min(
@@ -424,6 +426,7 @@ export async function discoverOpenAIModelsList(
424
426
  baseUrl,
425
427
  reasoning: false,
426
428
  input: nativeMetadataForModel?.input ?? ["text"],
429
+ ...(providerConfig.discovery.type === "lm-studio" ? { imageInputDecoder: "stb" as const } : {}),
427
430
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
428
431
  contextWindow,
429
432
  maxTokens: Math.min(contextWindow, discoveryDefaultMaxTokens(providerConfig.api)),
@@ -900,6 +900,7 @@ export class ModelRegistry {
900
900
  ...replacementModel,
901
901
  contextWindow: replacementModel.contextWindow ?? existing.contextWindow,
902
902
  maxTokens: replacementModel.maxTokens ?? existing.maxTokens,
903
+ omitMaxOutputTokens: replacementModel.omitMaxOutputTokens ?? existing.omitMaxOutputTokens,
903
904
  ...(supportsTools !== undefined ? { supportsTools } : {}),
904
905
  };
905
906
  });
@@ -1023,12 +1024,21 @@ export class ModelRegistry {
1023
1024
  }
1024
1025
 
1025
1026
  #normalizeDiscoverableModels(providerConfig: DiscoveryProviderConfig, models: Model<Api>[]): Model<Api>[] {
1027
+ const withDecoderMetadata =
1028
+ providerConfig.discovery.type === "ollama" ||
1029
+ providerConfig.discovery.type === "llama.cpp" ||
1030
+ providerConfig.discovery.type === "lm-studio"
1031
+ ? models.map(model =>
1032
+ buildModel({ ...model, imageInputDecoder: "stb", compat: model.compatConfig } as ModelSpec<Api>),
1033
+ )
1034
+ : models;
1035
+
1026
1036
  if (providerConfig.provider !== "ollama" || providerConfig.api !== "openai-responses") {
1027
- return models;
1037
+ return withDecoderMetadata;
1028
1038
  }
1029
1039
 
1030
1040
  const contextLengthOverride = getOllamaContextLengthOverride();
1031
- return models.map(model => {
1041
+ return withDecoderMetadata.map(model => {
1032
1042
  const normalized =
1033
1043
  model.api === "openai-completions"
1034
1044
  ? buildModel({
@@ -1269,7 +1279,12 @@ export class ModelRegistry {
1269
1279
  models: cached?.models.map(model => model.id) ?? [],
1270
1280
  });
1271
1281
  this.#lastDiscoveryWarnings.delete(providerConfig.provider);
1272
- return cached ? cached.models.map(model => buildModel(model)) : [];
1282
+ return cached
1283
+ ? this.#normalizeDiscoverableModels(
1284
+ providerConfig,
1285
+ cached.models.map(model => buildModel(model)),
1286
+ )
1287
+ : [];
1273
1288
  }
1274
1289
  }
1275
1290
 
@@ -1569,6 +1584,9 @@ export class ModelRegistry {
1569
1584
  }
1570
1585
  #applyHardcodedModelPolicies(models: Model<Api>[]): Model<Api>[] {
1571
1586
  return models.map(model => {
1587
+ if (model.provider === "ollama-cloud" && model.omitMaxOutputTokens !== true) {
1588
+ model = applyModelOverride(model, { omitMaxOutputTokens: true });
1589
+ }
1572
1590
  if (model.id !== "gpt-5.4" || model.provider === "github-copilot") {
1573
1591
  return model;
1574
1592
  }
@@ -556,6 +556,27 @@ function isAlias(id: string): boolean {
556
556
  return !datePattern.test(id);
557
557
  }
558
558
 
559
+ function includeSyntheticAllowedModels(available: Model<Api>[], allowedModels: Iterable<Model<Api>>): Model<Api>[] {
560
+ const allowedByKey = new Map<string, Model<Api>>();
561
+ for (const model of allowedModels) {
562
+ const key = formatModelString(model);
563
+ if (!allowedByKey.has(key)) {
564
+ allowedByKey.set(key, model);
565
+ }
566
+ }
567
+ if (allowedByKey.size === 0) return [];
568
+
569
+ const result: Model<Api>[] = [];
570
+ for (const model of available) {
571
+ if (allowedByKey.delete(formatModelString(model))) {
572
+ result.push(model);
573
+ }
574
+ }
575
+
576
+ result.push(...allowedByKey.values());
577
+ return result;
578
+ }
579
+
559
580
  /**
560
581
  * Find an exact explicit provider/model match.
561
582
  * Bare model ids are handled separately so canonical ids can coalesce variants.
@@ -1335,9 +1356,9 @@ export async function resolveModelScope(
1335
1356
  * the result to models matching those patterns.
1336
1357
  *
1337
1358
  * Returns the unfiltered available list when `enabledModels` is empty.
1338
- * Returns an empty list when `enabledModels` is configured but no available
1339
- * model matches any pattern — callers MUST treat this as "no usable model"
1340
- * rather than falling back to the global default (see issue #1022).
1359
+ * Returns an empty list when `enabledModels` is configured but no model matches
1360
+ * any pattern — callers MUST treat this as "no usable model" rather than
1361
+ * falling back to the global default (see issue #1022).
1341
1362
  */
1342
1363
  export async function resolveAllowedModels(
1343
1364
  modelRegistry: Pick<ModelRegistry, "getAvailable" | "getCanonicalVariants">,
@@ -1353,8 +1374,10 @@ export async function resolveAllowedModels(
1353
1374
  if (scoped.length === 0) {
1354
1375
  return [];
1355
1376
  }
1356
- const allowed = new Set(scoped.map(entry => `${entry.model.provider}/${entry.model.id}`));
1357
- return available.filter(model => allowed.has(`${model.provider}/${model.id}`));
1377
+ return includeSyntheticAllowedModels(
1378
+ available,
1379
+ scoped.map(entry => entry.model),
1380
+ );
1358
1381
  }
1359
1382
 
1360
1383
  /**
@@ -1382,9 +1405,9 @@ export function filterAvailableModelsByEnabledPatterns(
1382
1405
  if (patterns.length === 0) return available;
1383
1406
 
1384
1407
  const context = buildPreferenceContext(available, undefined);
1385
- const allowed = new Set<string>();
1408
+ const allowedModels: Model<Api>[] = [];
1386
1409
  const addAllowed = (model: Model<Api>) => {
1387
- allowed.add(`${model.provider}/${model.id}`);
1410
+ allowedModels.push(model);
1388
1411
  };
1389
1412
 
1390
1413
  for (const pattern of patterns) {
@@ -1409,7 +1432,7 @@ export function filterAvailableModelsByEnabledPatterns(
1409
1432
  }
1410
1433
  }
1411
1434
 
1412
- return allowed.size === 0 ? [] : available.filter(model => allowed.has(`${model.provider}/${model.id}`));
1435
+ return includeSyntheticAllowedModels(available, allowedModels);
1413
1436
  }
1414
1437
 
1415
1438
  export interface ResolveCliModelResult {
@@ -924,7 +924,7 @@ export const SETTINGS_SCHEMA = {
924
924
 
925
925
  inlineToolDescriptors: {
926
926
  type: "boolean",
927
- default: true,
927
+ default: false,
928
928
  ui: {
929
929
  tab: "model",
930
930
  group: "Prompt",
package/src/cursor.ts CHANGED
@@ -181,7 +181,7 @@ export class CursorExecHandlers implements ICursorExecHandlers {
181
181
  const toolResultMessage = await executeTool(this.options, "search", toolCallId, {
182
182
  pattern: args.pattern,
183
183
  paths: [searchPath],
184
- i: args.caseInsensitive || undefined,
184
+ case: args.caseInsensitive === true ? false : undefined,
185
185
  });
186
186
  return toolResultMessage;
187
187
  }
@@ -119,9 +119,16 @@ export class RawSseDebugBuffer {
119
119
  #records: RawSseDebugRecord[] = [];
120
120
  // Parallel to `#records`: `#recordChars[i]` is the precomputed char count
121
121
  // for `#records[i]`. Kept in lockstep by `#append` (push both) and
122
- // `#enforceLimits` (shift both). See the comment above the class for why
123
- // this is a sidecar array instead of a per-record property.
122
+ // `#enforceLimits` (advance `#head` to evict, then `slice` both together
123
+ // when compacting). See the comment above the class for why this is a
124
+ // sidecar array instead of a per-record property.
124
125
  #recordChars: number[] = [];
126
+ // Head-index ring over `#records`/`#recordChars`: index of the oldest live
127
+ // record. Eviction advances `#head` (amortized O(1)) rather than an O(n)
128
+ // front `shift()`; the dead `[0, #head)` prefix is reclaimed lazily by
129
+ // `#enforceLimits`. Live count is `#records.length - #head`; the live
130
+ // records are `#records[#head ..]`.
131
+ #head = 0;
125
132
  #totalChars = 0;
126
133
  #droppedRecords = 0;
127
134
  #droppedChars = 0;
@@ -181,7 +188,7 @@ export class RawSseDebugBuffer {
181
188
 
182
189
  snapshot(): RawSseDebugSnapshot {
183
190
  return {
184
- records: [...this.#records],
191
+ records: this.#records.slice(this.#head),
185
192
  droppedRecords: this.#droppedRecords,
186
193
  droppedChars: this.#droppedChars,
187
194
  totalEvents: this.#totalEvents,
@@ -190,9 +197,12 @@ export class RawSseDebugBuffer {
190
197
  }
191
198
 
192
199
  toRawText(): string {
193
- // Reads the live array directly: `rawRecordText` only computes a string
194
- // from each record, so no caller-visible mutation is possible.
195
- const body = this.#records.map(rawRecordText).join("\n");
200
+ // Reads the live window directly: `rawRecordText` only computes a string
201
+ // from each record, so no caller-visible mutation is possible. With a
202
+ // non-empty dead prefix we map a slice past `#head`; `#head === 0` (the
203
+ // common case) maps `#records` in place with no extra copy.
204
+ const live = this.#head === 0 ? this.#records : this.#records.slice(this.#head);
205
+ const body = live.map(rawRecordText).join("\n");
196
206
  if (this.#droppedRecords === 0) return body;
197
207
  const dropped = `: omp-debug-dropped records=${this.#droppedRecords} chars=${this.#droppedChars}\n\n`;
198
208
  return body.length > 0 ? `${dropped}${body}` : dropped;
@@ -208,14 +218,25 @@ export class RawSseDebugBuffer {
208
218
  }
209
219
 
210
220
  #enforceLimits(): void {
211
- while (this.#records.length > MAX_RAW_SSE_EVENTS || this.#totalChars > MAX_RAW_SSE_CHARS) {
212
- if (this.#records.length === 0) return;
213
- this.#records.shift();
214
- const chars = this.#recordChars.shift() ?? 0;
221
+ while (this.#records.length - this.#head > MAX_RAW_SSE_EVENTS || this.#totalChars > MAX_RAW_SSE_CHARS) {
222
+ if (this.#records.length - this.#head === 0) break;
223
+ const chars = this.#recordChars[this.#head] ?? 0;
224
+ this.#head += 1;
215
225
  this.#totalChars = Math.max(0, this.#totalChars - chars);
216
226
  this.#droppedRecords += 1;
217
227
  this.#droppedChars += chars;
218
228
  }
229
+ // Reclaim the consumed `[0, #head)` prefix once it grows large: one O(n)
230
+ // memmove amortized over many O(1) evictions, bounding the backing arrays
231
+ // to ~2x the live window. `#head >= MAX_RAW_SSE_EVENTS` covers the
232
+ // full-record-count steady state; `#head > liveCount` covers a small live
233
+ // window held by a few large records under the char budget.
234
+ const liveCount = this.#records.length - this.#head;
235
+ if (this.#head >= MAX_RAW_SSE_EVENTS || this.#head > liveCount) {
236
+ this.#records = this.#records.slice(this.#head);
237
+ this.#recordChars = this.#recordChars.slice(this.#head);
238
+ this.#head = 0;
239
+ }
219
240
  }
220
241
 
221
242
  #emit(): void {
@@ -39,7 +39,6 @@ import type { LoadedConfig } from "./config";
39
39
 
40
40
  ## Exceptions
41
41
 
42
- - Timer handles: `ReturnType<typeof setTimeout>` / `setInterval`.
43
42
  - Generic type utilities where the function is a type parameter.
44
43
 
45
44
  Concrete function? Export a concrete type.
@@ -5,7 +5,7 @@ if "__omp_prelude_loaded__" not in globals():
5
5
  from pathlib import Path
6
6
  import os, json, math, re
7
7
  from urllib.parse import unquote
8
- INTENT_FIELD = "_i"
8
+ INTENT_FIELD = "i"
9
9
 
10
10
  # __omp_display is injected by runner.py before the prelude executes; it
11
11
  # mirrors IPython's display() semantics with the same MIME bundle output.