ultimate-pi 0.2.7 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/.agents/skills/harness-eval/SKILL.md +1 -1
  2. package/.agents/skills/harness-governor/SKILL.md +2 -2
  3. package/.agents/skills/harness-spec/SKILL.md +1 -1
  4. package/.pi/PACKAGING.md +3 -2
  5. package/.pi/extensions/custom-header.ts +0 -17
  6. package/.pi/extensions/pi-model-router-harness.ts +42 -0
  7. package/.pi/extensions/policy-gate.ts +18 -0
  8. package/.pi/extensions/provider-payload-sanitize.ts +66 -0
  9. package/.pi/extensions/sentrux-rules-sync.ts +0 -18
  10. package/.pi/harness/README.md +3 -2
  11. package/.pi/harness/docs/adrs/0004-defer-ci-agent-smoke.md +1 -1
  12. package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +1 -1
  13. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +2 -2
  14. package/.pi/harness/evals/smoke/README.md +1 -1
  15. package/.pi/harness/evolution/README.md +1 -1
  16. package/.pi/harness/evolution/chaos-drill.md +1 -1
  17. package/.pi/prompts/harness-setup.md +42 -35
  18. package/.pi/scripts/README.md +25 -9
  19. package/.pi/scripts/harness-cli-verify.sh +4 -2
  20. package/.pi/scripts/harness-seed-project-contracts.mjs +49 -0
  21. package/.pi/scripts/harness-sync-model-router.mjs +84 -0
  22. package/.pi/scripts/harness-verify.mjs +5 -3
  23. package/.pi/scripts/sentrux-rules-sync.mjs +2 -2
  24. package/.pi/scripts/vendor-sync-pi-model-router.sh +47 -0
  25. package/.pi/settings.example.json +0 -1
  26. package/.sentrux/rules.toml +1 -1
  27. package/AGENTS.md +1 -1
  28. package/CHANGELOG.md +62 -0
  29. package/README.md +1 -1
  30. package/THIRD_PARTY_NOTICES.md +8 -0
  31. package/biome.json +2 -1
  32. package/package.json +9 -10
  33. package/vendor/pi-model-router/.prettierignore +4 -0
  34. package/vendor/pi-model-router/.prettierrc +5 -0
  35. package/vendor/pi-model-router/AGENTS.md +39 -0
  36. package/vendor/pi-model-router/LICENSE +21 -0
  37. package/vendor/pi-model-router/README.md +99 -0
  38. package/vendor/pi-model-router/UPSTREAM_PIN.md +8 -0
  39. package/vendor/pi-model-router/docs/ARCHITECTURE.md +54 -0
  40. package/vendor/pi-model-router/extensions/commands.ts +720 -0
  41. package/vendor/pi-model-router/extensions/config.ts +348 -0
  42. package/vendor/pi-model-router/extensions/constants.ts +1 -0
  43. package/vendor/pi-model-router/extensions/index.ts +457 -0
  44. package/vendor/pi-model-router/extensions/provider.ts +529 -0
  45. package/vendor/pi-model-router/extensions/routing.ts +416 -0
  46. package/vendor/pi-model-router/extensions/state.ts +49 -0
  47. package/vendor/pi-model-router/extensions/types.ts +86 -0
  48. package/vendor/pi-model-router/extensions/ui.ts +130 -0
  49. package/vendor/pi-model-router/model-router.example.json +48 -0
  50. package/vendor/pi-model-router/package.json +48 -0
  51. package/vendor/pi-model-router/tsconfig.json +16 -0
  52. package/.pi/extensions/model-router-bootstrap.ts +0 -174
  53. package/.sentrux/.harness-rules-meta.json +0 -5
@@ -0,0 +1,529 @@
1
+ import {
2
+ createAssistantMessageEventStream,
3
+ streamSimple,
4
+ type Api,
5
+ type AssistantMessage,
6
+ type AssistantMessageEventStream,
7
+ type Context,
8
+ type Model,
9
+ type SimpleStreamOptions,
10
+ type Message,
11
+ } from '@mariozechner/pi-ai';
12
+ import type {
13
+ ExtensionAPI,
14
+ ExtensionContext,
15
+ } from '@mariozechner/pi-coding-agent';
16
+ import type {
17
+ RouterConfig,
18
+ RoutingDecision,
19
+ RouterTier,
20
+ RouterPinByProfile,
21
+ RouterThinkingByProfile,
22
+ } from './types.js';
23
+ import { profileNames, parseCanonicalModelRef, ROUTER_TIERS } from './config.js';
24
+ import {
25
+ phaseForTier,
26
+ buildRoutingDecision,
27
+ decideRouting,
28
+ runClassifier,
29
+ extractTextFromContent,
30
+ hasImageAttachment,
31
+ } from './routing.js';
32
+
33
+ export const createErrorMessage = (
34
+ model: Model<Api>,
35
+ message: string,
36
+ ): AssistantMessage => {
37
+ return {
38
+ role: 'assistant',
39
+ content: [],
40
+ api: model.api,
41
+ provider: model.provider,
42
+ model: model.id,
43
+ usage: {
44
+ input: 0,
45
+ output: 0,
46
+ cacheRead: 0,
47
+ cacheWrite: 0,
48
+ totalTokens: 0,
49
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
50
+ },
51
+ stopReason: 'error',
52
+ errorMessage: message,
53
+ timestamp: Date.now(),
54
+ };
55
+ };
56
+
57
+ /**
58
+ * Heuristic token estimator (conservative: 3 characters per token)
59
+ */
60
+ const estimateTokens = (text: string): number => Math.ceil(text.length / 3);
61
+
62
+ /**
63
+ * Truncate context to fit within a target token limit by removing oldest messages.
64
+ * Always preserves the first system message and the latest user message.
65
+ */
66
+ const truncateContext = (context: Context, limit: number): Context => {
67
+ const messages = [...context.messages];
68
+ if (messages.length <= 1) return context;
69
+
70
+ const getSystemTokens = () =>
71
+ context.systemPrompt ? estimateTokens(context.systemPrompt) : 0;
72
+
73
+ // Initial estimate
74
+ const totalTokens =
75
+ getSystemTokens() +
76
+ messages.reduce(
77
+ (sum, m) => sum + estimateTokens(extractTextFromContent(m.content)),
78
+ 0,
79
+ );
80
+ if (totalTokens <= limit) return context;
81
+
82
+ const latestMessage = messages.pop();
83
+ if (!latestMessage) return context;
84
+
85
+ // Remove oldest until it fits
86
+ while (messages.length > 0) {
87
+ const currentTokens =
88
+ getSystemTokens() +
89
+ estimateTokens(extractTextFromContent(latestMessage.content)) +
90
+ messages.reduce(
91
+ (sum, m) => sum + estimateTokens(extractTextFromContent(m.content)),
92
+ 0,
93
+ );
94
+
95
+ if (currentTokens <= limit) break;
96
+ messages.shift(); // Remove oldest
97
+ }
98
+
99
+ const finalMessages: Message[] = [];
100
+ finalMessages.push(...messages);
101
+ finalMessages.push(latestMessage);
102
+
103
+ return { ...context, messages: finalMessages };
104
+ };
105
+
106
+ const supportsReasoning = (
107
+ profile: RouterConfig['profiles'][string],
108
+ modelRegistry: ExtensionContext['modelRegistry'] | undefined,
109
+ ): boolean => {
110
+ if (!modelRegistry) return false;
111
+
112
+ for (const tier of ROUTER_TIERS) {
113
+ try {
114
+ const { provider, modelId } = parseCanonicalModelRef(profile[tier].model);
115
+ if (modelRegistry.find(provider, modelId)?.reasoning) {
116
+ return true;
117
+ }
118
+ } catch (_error) {
119
+ // ignore invalid model refs here; config normalization handles warnings
120
+ }
121
+ }
122
+
123
+ return false;
124
+ };
125
+
126
+ export const registerRouterProvider = (
127
+ pi: ExtensionAPI,
128
+ state: {
129
+ lastRegisteredModels: string;
130
+ readonly currentConfig: RouterConfig;
131
+ readonly currentModelRegistry:
132
+ | ExtensionContext['modelRegistry']
133
+ | undefined;
134
+ readonly lastExtensionContext: ExtensionContext | undefined;
135
+ selectedProfile: string;
136
+ routerEnabled: boolean;
137
+ lastDecision: RoutingDecision | undefined;
138
+ readonly thinkingByProfile: RouterThinkingByProfile;
139
+ readonly pinnedTierByProfile: RouterPinByProfile;
140
+ accumulatedCost: number;
141
+ },
142
+ actions: {
143
+ persistState: () => void;
144
+ recordDebugDecision: (decision: RoutingDecision) => void;
145
+ getThinkingOverride: (profileName: string, tier: RouterTier) => any;
146
+ updateStatus: (ctx: ExtensionContext) => void;
147
+ },
148
+ ) => {
149
+ const profileList = profileNames(state.currentConfig);
150
+
151
+ // Map profiles to their capacities
152
+ const modelDefinitions = profileList.map((name) => {
153
+ const profile = state.currentConfig.profiles[name];
154
+ let contextWindow = 1_000_000;
155
+ let maxTokens = 64_000;
156
+
157
+ if (state.currentModelRegistry) {
158
+ for (const tier of ROUTER_TIERS) {
159
+ try {
160
+ const { provider, modelId } = parseCanonicalModelRef(
161
+ profile[tier].model,
162
+ );
163
+ const tierModel = state.currentModelRegistry.find(provider, modelId);
164
+ if (tierModel) {
165
+ if (tier === 'high') {
166
+ contextWindow = tierModel.contextWindow ?? contextWindow;
167
+ maxTokens = tierModel.maxTokens ?? maxTokens;
168
+ }
169
+ }
170
+ } catch (_error) {
171
+ // ignore
172
+ }
173
+ }
174
+ }
175
+
176
+ return {
177
+ id: name,
178
+ name: `Router ${name}`,
179
+ reasoning: supportsReasoning(profile, state.currentModelRegistry),
180
+ input: ['text', 'image'] as ('text' | 'image')[],
181
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
182
+ contextWindow,
183
+ maxTokens,
184
+ };
185
+ });
186
+
187
+ const modelsKey = modelDefinitions
188
+ .map((m) => `${m.id}:${m.contextWindow}:${m.maxTokens}:${m.reasoning}`)
189
+ .join(',');
190
+ if (state.lastRegisteredModels === modelsKey) return;
191
+
192
+ pi.registerProvider('router', {
193
+ baseUrl: 'router://local',
194
+ apiKey: 'pi-model-router',
195
+ api: 'router-local-api',
196
+ models: modelDefinitions,
197
+ streamSimple(
198
+ model: Model<Api>,
199
+ context: Context,
200
+ options?: SimpleStreamOptions,
201
+ ): AssistantMessageEventStream {
202
+ const stream = createAssistantMessageEventStream();
203
+
204
+ (async () => {
205
+ try {
206
+ if (!state.currentModelRegistry) {
207
+ throw new Error(
208
+ 'Router provider not initialized yet. Wait for session_start and retry.',
209
+ );
210
+ }
211
+ const profile = state.currentConfig.profiles[model.id];
212
+ if (!profile) {
213
+ throw new Error(`Unknown router profile: ${model.id}`);
214
+ }
215
+
216
+ state.selectedProfile = model.id;
217
+ state.routerEnabled = true;
218
+
219
+ const pinnedTier = state.pinnedTierByProfile[model.id];
220
+ const isBudgetExceeded =
221
+ state.currentConfig.maxSessionBudget !== undefined &&
222
+ state.accumulatedCost >= state.currentConfig.maxSessionBudget;
223
+
224
+ let decision: RoutingDecision = decideRouting(
225
+ context,
226
+ model.id,
227
+ profile,
228
+ state.lastDecision,
229
+ pinnedTier,
230
+ state.thinkingByProfile[model.id],
231
+ state.currentConfig.phaseBias,
232
+ state.currentConfig.rules,
233
+ isBudgetExceeded,
234
+ );
235
+
236
+ // Optional Context Trigger Upgrade
237
+ if (
238
+ state.currentConfig.largeContextThreshold &&
239
+ decision.tier !== 'high' &&
240
+ state.lastExtensionContext
241
+ ) {
242
+ try {
243
+ const usage = await state.lastExtensionContext.getContextUsage();
244
+ if (
245
+ usage?.tokens &&
246
+ usage.tokens > state.currentConfig.largeContextThreshold
247
+ ) {
248
+ decision = buildRoutingDecision(
249
+ model.id,
250
+ profile,
251
+ 'high',
252
+ 'planning',
253
+ `Context usage (${usage.tokens}) exceeds threshold (${state.currentConfig.largeContextThreshold}). Forced high tier.`,
254
+ state.thinkingByProfile[model.id],
255
+ false,
256
+ );
257
+ decision.isContextTriggered = true;
258
+ }
259
+ } catch (e) {
260
+ // ignore
261
+ }
262
+ }
263
+
264
+ // Classifier Override
265
+ if (
266
+ state.currentConfig.classifierModel &&
267
+ !pinnedTier &&
268
+ !decision.isContextTriggered &&
269
+ !decision.isRuleMatched
270
+ ) {
271
+ const classifierResult = await runClassifier(
272
+ state.currentConfig.classifierModel,
273
+ state.currentModelRegistry,
274
+ context,
275
+ state.lastDecision?.phase,
276
+ );
277
+ if (classifierResult) {
278
+ decision = buildRoutingDecision(
279
+ model.id,
280
+ profile,
281
+ classifierResult.tier,
282
+ phaseForTier(classifierResult.tier),
283
+ `Classifier: ${classifierResult.reasoning}`,
284
+ state.thinkingByProfile[model.id],
285
+ true,
286
+ );
287
+ if (isBudgetExceeded && decision.tier === 'high') {
288
+ decision.tier = 'medium';
289
+ decision.phase = 'implementation';
290
+ decision.reasoning = `Budget exceeded. Downgraded classifier decision to medium. (Original: ${decision.reasoning})`;
291
+ decision.isBudgetForced = true;
292
+ }
293
+ }
294
+ }
295
+
296
+ const lastMessage = context.messages[context.messages.length - 1];
297
+ const previousDecision = state.lastDecision;
298
+ const isGoogleThinkingToolContinuation =
299
+ lastMessage?.role === 'toolResult' &&
300
+ previousDecision?.profile === model.id &&
301
+ previousDecision.targetProvider === 'google' &&
302
+ previousDecision.thinking !== 'off' &&
303
+ decision.targetProvider === 'google' &&
304
+ decision.thinking !== 'off' &&
305
+ previousDecision.targetLabel !== decision.targetLabel;
306
+
307
+ if (isGoogleThinkingToolContinuation) {
308
+ decision = {
309
+ ...decision,
310
+ tier: previousDecision!.tier,
311
+ phase: previousDecision!.phase,
312
+ targetProvider: previousDecision!.targetProvider,
313
+ targetModelId: previousDecision!.targetModelId,
314
+ targetLabel: previousDecision!.targetLabel,
315
+ thinking: previousDecision!.thinking,
316
+ reasoning:
317
+ `Preserved ${previousDecision!.targetLabel} for a Google tool-result continuation ` +
318
+ `to avoid thought-signature replay errors. (Original: ${decision.reasoning})`,
319
+ };
320
+ }
321
+
322
+ const imageAttached = hasImageAttachment(context);
323
+ if (imageAttached) {
324
+ const checkModelSupportsImage = (modelRef: string) => {
325
+ try {
326
+ const { provider, modelId } = parseCanonicalModelRef(modelRef);
327
+ const m = state.currentModelRegistry?.find(provider, modelId);
328
+ return m?.input?.includes('image') ?? false;
329
+ } catch {
330
+ return false;
331
+ }
332
+ };
333
+
334
+ const tierModels = [
335
+ decision.targetLabel,
336
+ ...(profile[decision.tier].fallbacks ?? []),
337
+ ];
338
+ if (!tierModels.some(checkModelSupportsImage)) {
339
+ const tiersToTry: RouterTier[] =
340
+ decision.tier === 'low'
341
+ ? ['medium', 'high']
342
+ : decision.tier === 'medium'
343
+ ? ['high']
344
+ : [];
345
+
346
+ let foundTier: RouterTier | undefined;
347
+ for (const t of tiersToTry) {
348
+ const tModels = [
349
+ profile[t].model,
350
+ ...(profile[t].fallbacks ?? []),
351
+ ];
352
+ if (tModels.some(checkModelSupportsImage)) {
353
+ foundTier = t;
354
+ break;
355
+ }
356
+ }
357
+
358
+ if (foundTier) {
359
+ decision = buildRoutingDecision(
360
+ model.id,
361
+ profile,
362
+ foundTier,
363
+ phaseForTier(foundTier),
364
+ `Forced ${foundTier} tier because the originally routed ${decision.tier} tier does not support image attachments.`,
365
+ state.thinkingByProfile[model.id],
366
+ false,
367
+ );
368
+ }
369
+ }
370
+ }
371
+
372
+ state.lastDecision = decision;
373
+ actions.recordDebugDecision(decision);
374
+
375
+ if (state.lastExtensionContext) {
376
+ actions.updateStatus(state.lastExtensionContext);
377
+ }
378
+
379
+ let modelsToTry = [
380
+ decision.targetLabel,
381
+ ...(profile[decision.tier].fallbacks ?? []),
382
+ ];
383
+ if (imageAttached) {
384
+ modelsToTry = modelsToTry.filter((modelRef) => {
385
+ try {
386
+ const { provider, modelId } = parseCanonicalModelRef(modelRef);
387
+ const m = state.currentModelRegistry?.find(provider, modelId);
388
+ return m?.input?.includes('image') ?? false;
389
+ } catch {
390
+ return false;
391
+ }
392
+ });
393
+ if (modelsToTry.length === 0) {
394
+ modelsToTry = [decision.targetLabel];
395
+ }
396
+ }
397
+ let lastError: any;
398
+ let success = false;
399
+
400
+ for (let i = 0; i < modelsToTry.length; i++) {
401
+ const modelRef = modelsToTry[i];
402
+ const { provider: targetProvider, modelId: targetModelId } =
403
+ parseCanonicalModelRef(modelRef);
404
+
405
+ if (targetProvider === 'router') continue;
406
+
407
+ const targetModel = state.currentModelRegistry.find(
408
+ targetProvider,
409
+ targetModelId,
410
+ );
411
+ if (!targetModel) {
412
+ lastError = new Error(
413
+ `Routed model not found: ${targetProvider}/${targetModelId}`,
414
+ );
415
+ continue;
416
+ }
417
+
418
+ const auth =
419
+ await state.currentModelRegistry.getApiKeyAndHeaders(targetModel);
420
+ if (!auth.ok || !auth.apiKey) {
421
+ lastError = new Error(
422
+ auth.ok
423
+ ? `No API key for routed model: ${targetProvider}/${targetModelId}`
424
+ : `Auth failed for routed model: ${targetProvider}/${targetModelId}: ${auth.error}`,
425
+ );
426
+ continue;
427
+ }
428
+ const apiKey = auth.apiKey;
429
+ const headers = auth.headers;
430
+
431
+ try {
432
+ // HONESTY CHECK & AUTO-TRUNCATION
433
+ // If the picked model has a smaller context than what we reported, truncate now.
434
+ let effectiveContext = context;
435
+ const targetLimit = targetModel.contextWindow || 128_000;
436
+ if (targetLimit < model.contextWindow!) {
437
+ effectiveContext = truncateContext(context, targetLimit);
438
+ }
439
+
440
+ const thinkingOverride = actions.getThinkingOverride(
441
+ model.id,
442
+ decision.tier,
443
+ );
444
+ const delegatedReasoning =
445
+ targetModel.reasoning &&
446
+ (thinkingOverride ?? decision.thinking) !== 'off'
447
+ ? (thinkingOverride ?? decision.thinking)
448
+ : undefined;
449
+
450
+ if (state.lastExtensionContext) {
451
+ if (delegatedReasoning) {
452
+ state.lastExtensionContext.ui.setHiddenThinkingLabel?.(
453
+ `Thinking (${targetProvider}/${targetModelId})...`,
454
+ );
455
+ } else {
456
+ state.lastExtensionContext.ui.setHiddenThinkingLabel?.();
457
+ }
458
+ }
459
+
460
+ const delegatedStream = streamSimple(
461
+ targetModel,
462
+ effectiveContext,
463
+ {
464
+ ...options,
465
+ apiKey,
466
+ headers,
467
+ ...(delegatedReasoning
468
+ ? { reasoning: delegatedReasoning }
469
+ : {}),
470
+ },
471
+ );
472
+
473
+ let contentReceived = false;
474
+ for await (const event of delegatedStream) {
475
+ if (event.type === 'done') {
476
+ const cost = event.message.usage?.cost?.total ?? 0;
477
+ state.accumulatedCost += cost;
478
+ }
479
+ if (event.type === 'error' && !contentReceived) {
480
+ throw new Error(
481
+ (event as any).error?.errorMessage ||
482
+ 'Model failed before sending content.',
483
+ );
484
+ }
485
+ const isContent =
486
+ event.type === 'text_delta' ||
487
+ event.type === 'thinking_delta' ||
488
+ event.type === 'toolcall_delta' ||
489
+ event.type === 'toolcall_end';
490
+ if (isContent) contentReceived = true;
491
+ stream.push(event);
492
+ }
493
+ success = true;
494
+ if (i > 0) decision.isFallback = true;
495
+ break;
496
+ } catch (err) {
497
+ lastError = err;
498
+ }
499
+ }
500
+
501
+ if (!success) {
502
+ throw (
503
+ lastError ||
504
+ new Error('Failed to delegate to any model in the chain.')
505
+ );
506
+ }
507
+
508
+ stream.end();
509
+ } catch (error) {
510
+ stream.push({
511
+ type: 'error',
512
+ reason: 'error',
513
+ error: createErrorMessage(
514
+ model,
515
+ error instanceof Error ? error.message : String(error),
516
+ ),
517
+ });
518
+ stream.end();
519
+ } finally {
520
+ actions.persistState();
521
+ }
522
+ })();
523
+
524
+ return stream;
525
+ },
526
+ });
527
+
528
+ state.lastRegisteredModels = modelsKey;
529
+ };