pi-lilac-provider 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -304,6 +304,8 @@ function cacheModels(models: JsonModel[]): void {
304
304
  } catch {
305
305
  // Cache write failure is non-fatal
306
306
  }
307
+ // Base model set changed; force getListModels() to rebuild list prices.
308
+ listModelsCache = null;
307
309
  }
308
310
 
309
311
  function mergeWithEmbedded(liveModels: JsonModel[], embeddedModels: JsonModel[]): JsonModel[] {
@@ -398,6 +400,41 @@ function applyDiscounts(models: JsonModel[], discounts: Map<string, JsonDiscount
398
400
  });
399
401
  }
400
402
 
403
+ /**
404
+ * Apply the current discount to a single model's cost IN PLACE.
405
+ *
406
+ * applyDiscounts() returns discounted COPIES for the registry; those copies
407
+ * cannot reach the model pi already bound for the current turn. This mutates
408
+ * that bound object directly so the current turn's calculateCost() (in pi-ai)
409
+ * reads the discounted price. Cost is always recomputed from list price — the
410
+ * patch-applied, pre-discount value from buildModels() — so re-applying a
411
+ * changed discount never compounds a factor already present on the object.
412
+ * cacheWrite is left untouched (Lilac does not discount it, matching
413
+ * applyDiscounts).
414
+ *
415
+ * Targets the object pi bound for the turn, captured before any await or
416
+ * registerProvider() call. registerProvider() refreshes the session model for
417
+ * SUBSEQUENT turns only (via prepareNextTurn); it cannot affect the current
418
+ * turn's cost calc, which is why the in-place mutation is required.
419
+ */
420
+ function applyDiscountInPlace(
421
+ model: { id: string; cost: { input: number; output: number; cacheRead: number; cacheWrite: number } } | undefined,
422
+ listModels: JsonModel[],
423
+ discounts: Map<string, JsonDiscount> | null,
424
+ ): void {
425
+ if (!model?.cost) return;
426
+ const list = listModels.find(m => m.id === model.id);
427
+ if (!list) return;
428
+ // A missing discount entry means list price (factor 1) — e.g. a model whose
429
+ // discount was removed since it was last priced.
430
+ const rawFactor = discounts?.get(model.id)?.creditMultiplier;
431
+ const factor = Number.isFinite(rawFactor) && rawFactor !== undefined ? rawFactor : 1;
432
+ const applyFactor = (n: number) => n > 0 ? Math.round(n * factor * 10000) / 10000 : n;
433
+ model.cost.input = applyFactor(list.cost.input);
434
+ model.cost.output = applyFactor(list.cost.output);
435
+ model.cost.cacheRead = applyFactor(list.cost.cacheRead);
436
+ }
437
+
401
438
  function cacheDiscounts(discounts: Map<string, JsonDiscount>): void {
402
439
  try {
403
440
  fs.mkdirSync(CACHE_DIR, { recursive: true });
@@ -467,6 +504,10 @@ let revalidateAbort: AbortController | null = null;
467
504
  let latestDiscounts: Map<string, JsonDiscount> | null = null;
468
505
  let lastDiscountFetchTime = 0;
469
506
  const STATUS_CACHE_TTL_MS = 30000;
507
+ // List-price (patch-applied, pre-discount) models, cached until the base set
508
+ // changes. Reset in cacheModels() so the next getListModels() rebuilds from the
509
+ // refreshed disk cache / embedded set.
510
+ let listModelsCache: JsonModel[] | null = null;
470
511
 
471
512
  async function resolveApiKey(modelRegistry: ModelRegistry): Promise<void> {
472
513
  cachedApiKey = await modelRegistry.getApiKeyForProvider("lilac") ?? undefined;
@@ -479,6 +520,16 @@ export default function (pi: ExtensionAPI) {
479
520
  const customModels = customModelsData as JsonModel[];
480
521
  const patches = patchData as PatchData;
481
522
 
523
+ // List-price models (patch applied, pre-discount), cached at module scope and
524
+ // rebuilt only when the base set changes (see cacheModels). Used to recompute
525
+ // the in-flight model's cost without compounding an already-applied discount.
526
+ function getListModels(): JsonModel[] {
527
+ if (!listModelsCache) {
528
+ listModelsCache = buildModels(loadStaleModels(embeddedModels), customModels, patches);
529
+ }
530
+ return listModelsCache;
531
+ }
532
+
482
533
  const staleBase = loadStaleModels(embeddedModels);
483
534
  latestDiscounts = loadCachedDiscounts();
484
535
  const staleModels = applyDiscounts(buildModels(staleBase, customModels, patches), latestDiscounts);
@@ -582,10 +633,26 @@ export default function (pi: ExtensionAPI) {
582
633
  });
583
634
 
584
635
  pi.on("before_provider_request", async (_event, ctx) => {
585
- if (ctx.model?.provider !== "lilac") return;
586
-
587
- // Always show status for active lilac model
588
- ctx.ui.setStatus("lilac", dimStatus(ctx, formatDiscountStatus(ctx.model.id)));
636
+ // Capture the in-flight model BEFORE any await or registerProvider() call.
637
+ // ctx.model is a lazy getter to the live session model; at hook start (before
638
+ // any refresh) it is the same object pi's agent loop bound as the model for
639
+ // THIS turn — the exact object whose .cost calculateCost() reads when the
640
+ // response arrives. registerProvider() can only refresh the session model
641
+ // for SUBSEQUENT turns (prepareNextTurn), so to affect the current turn's
642
+ // cost we mutate this object in place. Capturing before the await also guards
643
+ // against a concurrent session_start /status sync swapping the reference
644
+ // mid-hook.
645
+ const inFlightModel = ctx.model;
646
+ if (!inFlightModel || inFlightModel.provider !== "lilac") return;
647
+
648
+ const listModels = getListModels();
649
+
650
+ // Align the in-flight model with the best-known discount right away. This
651
+ // covers turns that don't trigger a fresh fetch (within TTL, or the fetch
652
+ // returned unchanged) and recomputes from list price so it never compounds
653
+ // a previously-applied factor.
654
+ applyDiscountInPlace(inFlightModel, listModels, latestDiscounts);
655
+ ctx.ui.setStatus("lilac", dimStatus(ctx, formatDiscountStatus(inFlightModel.id)));
589
656
 
590
657
  if (!cachedApiKey) return;
591
658
 
@@ -596,24 +663,31 @@ export default function (pi: ExtensionAPI) {
596
663
 
597
664
  const discounts = await fetchStatusDiscounts(cachedApiKey);
598
665
  if (!discounts) return;
666
+
667
+ lastDiscountFetchTime = now;
668
+
599
669
  if (!discountsChanged(latestDiscounts, discounts)) {
600
- lastDiscountFetchTime = now;
601
- ctx.ui.setStatus("lilac", dimStatus(ctx, formatDiscountStatus(ctx.model.id)));
670
+ ctx.ui.setStatus("lilac", dimStatus(ctx, formatDiscountStatus(inFlightModel.id)));
602
671
  return;
603
672
  }
604
673
 
605
- lastDiscountFetchTime = now;
606
674
  cacheDiscounts(discounts);
607
675
  latestDiscounts = discounts;
608
676
 
609
- const base = loadStaleModels(embeddedModels);
677
+ // Re-read list prices in case a concurrent /models sync (session_start)
678
+ // invalidated the list-price cache during the await above. Then re-apply the
679
+ // freshly-fetched discount so THIS turn (not just later ones) is costed at
680
+ // the new price, and re-register so other lilac models pick it up on their
681
+ // next request.
682
+ const freshList = getListModels();
683
+ applyDiscountInPlace(inFlightModel, freshList, discounts);
610
684
  pi.registerProvider("lilac", {
611
685
  baseUrl: BASE_URL,
612
686
  apiKey: "$LILAC_API_KEY",
613
687
  api: "openai-completions",
614
- models: applyDiscounts(buildModels(base, customModels, patches), discounts),
688
+ models: applyDiscounts(freshList, discounts),
615
689
  });
616
- ctx.ui.setStatus("lilac", dimStatus(ctx, formatDiscountStatus(ctx.model.id)));
690
+ ctx.ui.setStatus("lilac", dimStatus(ctx, formatDiscountStatus(inFlightModel.id)));
617
691
  });
618
692
 
619
693
  pi.on("model_select", async (event, ctx) => {
@@ -668,5 +742,5 @@ export default function (pi: ExtensionAPI) {
668
742
  });
669
743
  }
670
744
 
671
- export { fetchStatusDiscounts, applyDiscounts, loadCachedDiscounts, cacheDiscounts };
745
+ export { fetchStatusDiscounts, applyDiscounts, applyDiscountInPlace, loadCachedDiscounts, cacheDiscounts };
672
746
  export type { JsonDiscount, JsonModel, PatchEntry, PatchData };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-lilac-provider",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "description": "Lilac provider extension for pi - Access Kimi K2.6, GLM 5.1, and Gemma 4 models through Lilac's OpenAI-compatible API on idle GPUs",
5
5
  "type": "module",
6
6
  "main": "index.ts",
@@ -13,6 +13,10 @@
13
13
  * 7. turn_end appends discount entry to session JSONL.
14
14
  * 8. before_provider_request refreshes discounts with a 30s cache.
15
15
  * 9. formatDiscountStatus returns fallbacks when data is missing.
16
+ * 10. applyDiscountInPlace mutates the in-flight model's cost in place,
17
+ * recomputed from list price so re-applied discounts never compound.
18
+ * 11. before_provider_request mutates the bound (in-flight) model object so the
19
+ * current turn's cost calc sees the discount in real time.
16
20
  */
17
21
 
18
22
  import type { ExtensionAPI, ModelRegistry } from "@earendil-works/pi-coding-agent";
@@ -43,6 +47,7 @@ const {
43
47
  default: registerLilac,
44
48
  fetchStatusDiscounts,
45
49
  applyDiscounts,
50
+ applyDiscountInPlace,
46
51
  loadCachedDiscounts,
47
52
  cacheDiscounts,
48
53
  } = await import("../index.ts");
@@ -446,6 +451,96 @@ const kimiFinal = afterSecond.config.models.find((m: any) => m.id === "moonshota
446
451
  assert(kimiFinal.discount.discountPercent === 25, "final registration uses correct discount (not stale from first re-register)");
447
452
  assert(kimiFinal.cost.input === 0.525, "final registration uses correct cost (0.70 * 0.75)");
448
453
 
454
+ // ─── Test 12: applyDiscountInPlace mutates the in-flight model ───────────────
455
+
456
+ console.log("\n--- Test 12: applyDiscountInPlace (in-flight mutation) ---");
457
+
458
+ // applyDiscountInPlace closes the one-turn gap: registerProvider() refreshes
459
+ // the registry for LATER turns, but the model pi already bound for the current
460
+ // turn is a separate object whose .cost calculateCost() reads. We mutate THAT
461
+ // object in place, recomputed from list price so a changed discount never
462
+ // compounds on top of an already-applied factor.
463
+
464
+ const listModels = [
465
+ { id: "moonshotai/kimi-k2.6", name: "Kimi K2.6", cost: { input: 0.70, output: 3.50, cacheRead: 0.20, cacheWrite: 0 } },
466
+ ] as any[];
467
+
468
+ // A model object that is "already bound" for the turn — the same reference pi's
469
+ // agent loop holds. It currently carries an OLD discount (factor 0.75).
470
+ const inFlight = {
471
+ id: "moonshotai/kimi-k2.6",
472
+ cost: { input: 0.525, output: 2.625, cacheRead: 0.15, cacheWrite: 0 },
473
+ };
474
+ const inFlightRef = inFlight;
475
+
476
+ // New discount: factor 0.50 (changed). Must recompute from LIST price (0.70),
477
+ // NOT compound on top of the already-applied 0.75 (which would give 0.2625).
478
+ const newDiscounts = new Map([
479
+ ["moonshotai/kimi-k2.6", { supplyState: "high", discountPercent: 50, creditMultiplier: 0.50 }],
480
+ ]);
481
+ applyDiscountInPlace(inFlight, listModels, newDiscounts);
482
+ assert(inFlight === inFlightRef, "mutates the same object in place (no new object created)");
483
+ assert(inFlight.cost.input === 0.35, "recomputed from list price: 0.70 * 0.50 = 0.35 (not compounded 0.2625)");
484
+ assert(inFlight.cost.output === 1.75, "output = 3.50 * 0.50 = 1.75");
485
+ assert(inFlight.cost.cacheRead === 0.1, "cacheRead = 0.20 * 0.50 = 0.1");
486
+ assert(inFlight.cost.cacheWrite === 0, "cacheWrite left untouched (Lilac does not discount it)");
487
+
488
+ // Idempotency + no-compounding: applying the same factor again yields the same
489
+ // values (always recomputed from list price, never compounding).
490
+ applyDiscountInPlace(inFlight, listModels, newDiscounts);
491
+ assert(inFlight.cost.input === 0.35, "re-applying same factor is idempotent (no compounding)");
492
+
493
+ // Discount removed (model no longer in /status) → reverts to list price.
494
+ const emptyDiscounts = new Map();
495
+ applyDiscountInPlace(inFlight, listModels, emptyDiscounts);
496
+ assert(inFlight.cost.input === 0.70, "removed discount reverts to list price (factor 1)");
497
+ assert(inFlight.cost.output === 3.50, "removed discount: output reverts to list price");
498
+
499
+ // null discounts → list price, no throw.
500
+ applyDiscountInPlace(inFlight, listModels, null);
501
+ assert(inFlight.cost.input === 0.70, "null discounts → list price");
502
+
503
+ // Unknown model id (not in listModels) → no-op, no throw.
504
+ const unknown = { id: "some/unknown-model", cost: { input: 9.99, output: 9.99, cacheRead: 9.99, cacheWrite: 0 } };
505
+ applyDiscountInPlace(unknown, listModels, newDiscounts);
506
+ assert(unknown.cost.input === 9.99, "unknown model id → no mutation (no list-price match)");
507
+
508
+ // undefined model → no throw.
509
+ applyDiscountInPlace(undefined, listModels, newDiscounts);
510
+ assert(true, "undefined model does not throw");
511
+
512
+ // ─── Test 13: before_provider_request mutates the bound (in-flight) model ────
513
+
514
+ console.log("\n--- Test 13: before_provider_request mutates the bound model ---");
515
+
516
+ // Simulate the object pi's agent loop bound for the turn. It holds an OUTDATED
517
+ // cost (list price, as if no discount had been applied yet). This same reference
518
+ // is what calculateCost() reads for the current turn — so the handler MUST mutate
519
+ // it in place, not replace it. latestDiscounts currently holds kimi at 0.75 (from
520
+ // Test 4), and the TTL is fresh, so the handler takes the within-TTL path (which
521
+ // previously did NOT touch the in-flight model at all).
522
+ const boundModel = {
523
+ id: "moonshotai/kimi-k2.6",
524
+ provider: "lilac",
525
+ cost: { input: 0.70, output: 3.50, cacheRead: 0.20, cacheWrite: 0 }, // list price (stale)
526
+ };
527
+ const boundRef = boundModel;
528
+
529
+ for (const handler of handlers.get("before_provider_request") || []) {
530
+ await handler(
531
+ { type: "before_provider_request", payload: {} },
532
+ {
533
+ ui: mockUi,
534
+ model: boundModel,
535
+ }
536
+ );
537
+ }
538
+
539
+ assert(boundModel === boundRef, "handler did not replace the bound model object (same reference)");
540
+ assert(boundModel.cost.input === 0.525, "bound model cost mutated in place to 0.70 * 0.75 = 0.525");
541
+ assert(boundModel.cost.output === 2.625, "bound model output mutated to 3.50 * 0.75 = 2.625");
542
+ assert(boundModel.cost.cacheRead === 0.15, "bound model cacheRead mutated to 0.20 * 0.75 = 0.15");
543
+
449
544
  // ─── Cleanup ──────────────────────────────────────────────────────────────────
450
545
 
451
546
  globalThis.fetch = originalFetch;