kimiflare 0.66.0 → 0.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3150,6 +3150,15 @@ var init_system_prompt = __esm({
3150
3150
  });
3151
3151
 
3152
3152
  // src/agent/loop.ts
3153
+ function getSessionWebFetchHistory(sessionId) {
3154
+ const key = sessionId ?? "default";
3155
+ let arr = sessionWebFetchHistory.get(key);
3156
+ if (!arr) {
3157
+ arr = [];
3158
+ sessionWebFetchHistory.set(key, arr);
3159
+ }
3160
+ return arr;
3161
+ }
3153
3162
  function isHighSignalMemory(memory) {
3154
3163
  return memory.topicKey === "project_dependencies" || memory.topicKey === "project_tsconfig" || memory.topicKey === "project_entry_point" || memory.category === "instruction" || memory.category === "preference" || memory.category === "event" && memory.importance >= 3;
3155
3164
  }
@@ -3304,7 +3313,8 @@ Use console.log() to return results. Only console.log output will be sent back t
3304
3313
  const recentToolCalls = [];
3305
3314
  const LOOP_WINDOW = 8;
3306
3315
  const LOOP_THRESHOLD = 2;
3307
- const webFetchHistory = [];
3316
+ const webFetchHistory = getSessionWebFetchHistory(opts2.sessionId);
3317
+ let webFetchesThisTurn = 0;
3308
3318
  const MAX_WEB_FETCH_PER_TURN = 5;
3309
3319
  const WEB_FETCH_DOMAIN_THRESHOLD = 2;
3310
3320
  let cumulativePromptTokens = 0;
@@ -3543,8 +3553,8 @@ Use console.log() to return results. Only console.log output will be sent back t
3543
3553
  try {
3544
3554
  const domain = new URL(url).hostname;
3545
3555
  const domainCount = webFetchHistory.filter((h) => h.domain === domain).length;
3546
- const totalWebFetches = webFetchHistory.length;
3547
- if (totalWebFetches >= MAX_WEB_FETCH_PER_TURN) {
3556
+ const totalSessionFetches = webFetchHistory.length;
3557
+ if (webFetchesThisTurn >= MAX_WEB_FETCH_PER_TURN) {
3548
3558
  const warning = `Research budget exceeded: you have already made ${MAX_WEB_FETCH_PER_TURN} web requests this turn. Synthesize what you have learned instead of fetching more pages.`;
3549
3559
  const budgetResult = {
3550
3560
  tool_call_id: tc.id,
@@ -3565,6 +3575,27 @@ Use console.log() to return results. Only console.log output will be sent back t
3565
3575
  blockedCount++;
3566
3576
  continue;
3567
3577
  }
3578
+ if (totalSessionFetches >= SESSION_WEB_FETCH_CAP) {
3579
+ const warning = `Session research budget exceeded: ${totalSessionFetches} web fetches across this session. Synthesize what you have learned from prior fetches instead of starting another page.`;
3580
+ const sessionCapResult = {
3581
+ tool_call_id: tc.id,
3582
+ name: "web_fetch",
3583
+ content: warning,
3584
+ ok: false
3585
+ };
3586
+ toolResults.push(sessionCapResult);
3587
+ opts2.messages.push({
3588
+ role: "tool",
3589
+ tool_call_id: tc.id,
3590
+ content: sanitizeString(warning),
3591
+ name: "web_fetch"
3592
+ });
3593
+ opts2.callbacks.onToolResult?.(sessionCapResult);
3594
+ recentToolCalls.push(loopSignature);
3595
+ if (recentToolCalls.length > LOOP_WINDOW) recentToolCalls.shift();
3596
+ blockedCount++;
3597
+ continue;
3598
+ }
3568
3599
  if (domainCount >= WEB_FETCH_DOMAIN_THRESHOLD) {
3569
3600
  const warning = `Loop detected: you have fetched from ${domain} multiple times. Consider a different approach or synthesize existing findings.`;
3570
3601
  const loopResult = {
@@ -3587,6 +3618,7 @@ Use console.log() to return results. Only console.log output will be sent back t
3587
3618
  continue;
3588
3619
  }
3589
3620
  webFetchHistory.push({ url, domain });
3621
+ webFetchesThisTurn++;
3590
3622
  } catch {
3591
3623
  }
3592
3624
  }
@@ -3622,9 +3654,16 @@ Use console.log() to return results. Only console.log output will be sent back t
3622
3654
  Output:
3623
3655
  ${sandboxResult.output}` : sandboxResult.output;
3624
3656
  if (resultContent.length > MAX_TOOL_CONTENT_CHARS) {
3657
+ const rawBytes = resultContent.length;
3625
3658
  resultContent = resultContent.slice(0, MAX_TOOL_CONTENT_CHARS) + `
3626
3659
 
3627
- [truncated: ${resultContent.length - MAX_TOOL_CONTENT_CHARS} chars omitted]`;
3660
+ [truncated: ${rawBytes - MAX_TOOL_CONTENT_CHARS} chars omitted]`;
3661
+ opts2.callbacks.onTruncation?.({
3662
+ tool: "execute_code",
3663
+ toolCallId: tc.id,
3664
+ rawBytes,
3665
+ reducedBytes: resultContent.length
3666
+ });
3628
3667
  }
3629
3668
  const result = {
3630
3669
  tool_call_id: tc.id,
@@ -3653,9 +3692,17 @@ ${sandboxResult.output}` : sandboxResult.output;
3653
3692
  );
3654
3693
  let content2 = result.content;
3655
3694
  if (content2.length > MAX_TOOL_CONTENT_CHARS) {
3695
+ const rawBytes = content2.length;
3656
3696
  content2 = content2.slice(0, MAX_TOOL_CONTENT_CHARS) + `
3657
3697
 
3658
- [truncated: ${content2.length - MAX_TOOL_CONTENT_CHARS} chars omitted]`;
3698
+ [truncated: ${rawBytes - MAX_TOOL_CONTENT_CHARS} chars omitted]`;
3699
+ opts2.callbacks.onTruncation?.({
3700
+ tool: tc.function.name,
3701
+ toolCallId: tc.id,
3702
+ rawBytes,
3703
+ reducedBytes: content2.length,
3704
+ artifactId: result.artifactId
3705
+ });
3659
3706
  }
3660
3707
  logger.debug("turn:tool_end", { sessionId: opts2.sessionId, tool: tc.function.name, toolCallId: tc.id, ok: result.ok });
3661
3708
  toolResults.push(result);
@@ -3679,6 +3726,7 @@ ${sandboxResult.output}` : sandboxResult.output;
3679
3726
  );
3680
3727
  const assistantMessage = lastAssistant?.content ?? "";
3681
3728
  const llmOpts = opts2.memoryManager.getExtractionLlmOpts();
3729
+ const turnAtMemoryCommit = turn;
3682
3730
  for (const extractor of EXTRACTORS) {
3683
3731
  if (extractor.match(tc.function.name, filePath)) {
3684
3732
  void (async () => {
@@ -3704,11 +3752,14 @@ ${sandboxResult.output}` : sandboxResult.output;
3704
3752
  );
3705
3753
  if (isHighSignalMemory(memory)) {
3706
3754
  const sid = opts2.sessionId ?? "default";
3707
- const current = (driftAccumulator.get(sid) ?? 0) + 1;
3708
- driftAccumulator.set(sid, current);
3709
- if (current >= DRIFT_THRESHOLD) {
3755
+ const events2 = driftEvents.get(sid) ?? [];
3756
+ events2.push(turnAtMemoryCommit);
3757
+ const cutoff = turnAtMemoryCommit - DRIFT_WINDOW + 1;
3758
+ const recent = events2.filter((t) => t >= cutoff);
3759
+ driftEvents.set(sid, recent);
3760
+ if (recent.length >= DRIFT_THRESHOLD) {
3710
3761
  opts2.callbacks.onKimiMdStale?.();
3711
- driftAccumulator.set(sid, 0);
3762
+ driftEvents.set(sid, []);
3712
3763
  }
3713
3764
  }
3714
3765
  }
@@ -3740,12 +3791,6 @@ ${sandboxResult.output}` : sandboxResult.output;
3740
3791
  if (blockedCount === toolCalls.length && toolCalls.length > 0) {
3741
3792
  loopExhausted = true;
3742
3793
  }
3743
- if (opts2.sessionId) {
3744
- const current = driftAccumulator.get(opts2.sessionId) ?? 0;
3745
- if (current > 0) {
3746
- driftAccumulator.set(opts2.sessionId, Math.max(0, current - 1));
3747
- }
3748
- }
3749
3794
  if (opts2.onIterationEnd) {
3750
3795
  opts2.messages = await opts2.onIterationEnd(opts2.messages, opts2.signal);
3751
3796
  if (opts2.signal.aborted) throw new DOMException("aborted", "AbortError");
@@ -3804,7 +3849,7 @@ function validateToolArguments(raw) {
3804
3849
  return "{}";
3805
3850
  }
3806
3851
  }
3807
- var BudgetExhaustedError, AgentLoopError, codeModeApiCache, driftAccumulator, DRIFT_THRESHOLD, memoryExtractionErrorCounts, MAX_PROMPT_TOKENS, MAX_TOOL_CONTENT_CHARS;
3852
+ var BudgetExhaustedError, AgentLoopError, codeModeApiCache, driftEvents, DRIFT_WINDOW, DRIFT_THRESHOLD, memoryExtractionErrorCounts, sessionWebFetchHistory, SESSION_WEB_FETCH_CAP, MAX_PROMPT_TOKENS, MAX_TOOL_CONTENT_CHARS;
3808
3853
  var init_loop = __esm({
3809
3854
  "src/agent/loop.ts"() {
3810
3855
  "use strict";
@@ -3832,9 +3877,12 @@ var init_loop = __esm({
3832
3877
  }
3833
3878
  };
3834
3879
  codeModeApiCache = /* @__PURE__ */ new Map();
3835
- driftAccumulator = /* @__PURE__ */ new Map();
3836
- DRIFT_THRESHOLD = 5;
3880
+ driftEvents = /* @__PURE__ */ new Map();
3881
+ DRIFT_WINDOW = 10;
3882
+ DRIFT_THRESHOLD = 3;
3837
3883
  memoryExtractionErrorCounts = /* @__PURE__ */ new Map();
3884
+ sessionWebFetchHistory = /* @__PURE__ */ new Map();
3885
+ SESSION_WEB_FETCH_CAP = 25;
3838
3886
  MAX_PROMPT_TOKENS = 24e4;
3839
3887
  MAX_TOOL_CONTENT_CHARS = 1e4;
3840
3888
  }
@@ -6274,6 +6322,12 @@ var init_renderer = __esm({
6274
6322
  });
6275
6323
 
6276
6324
  // src/cost-attribution/reconcile.ts
6325
+ var reconcile_exports = {};
6326
+ __export(reconcile_exports, {
6327
+ aggregateByFeature: () => aggregateByFeature,
6328
+ fetchGatewayLogs: () => fetchGatewayLogs,
6329
+ reconcileWithCloudflare: () => reconcileWithCloudflare
6330
+ });
6277
6331
  function cacheKey(opts2) {
6278
6332
  return `${opts2.gatewayId ?? "none"}:${opts2.startDate}:${opts2.endDate}`;
6279
6333
  }
@@ -6312,6 +6366,28 @@ async function fetchGatewayLogs(accountId, apiToken, gatewayId, startDate, endDa
6312
6366
  }
6313
6367
  return out;
6314
6368
  }
6369
+ function aggregateByFeature(logs) {
6370
+ const map = /* @__PURE__ */ new Map();
6371
+ for (const log2 of logs) {
6372
+ let feature = "unknown";
6373
+ const m = log2.metadata;
6374
+ if (m && typeof m === "object" && !Array.isArray(m)) {
6375
+ const f = m.feature;
6376
+ if (typeof f === "string") feature = f;
6377
+ } else if (typeof m === "string") {
6378
+ try {
6379
+ const parsed = JSON.parse(m);
6380
+ if (typeof parsed.feature === "string") feature = parsed.feature;
6381
+ } catch {
6382
+ }
6383
+ }
6384
+ const entry = map.get(feature) ?? { feature, cost: 0, requests: 0 };
6385
+ entry.cost += typeof log2.cost === "number" ? log2.cost : 0;
6386
+ entry.requests += 1;
6387
+ map.set(feature, entry);
6388
+ }
6389
+ return Array.from(map.values()).sort((a, b) => b.cost - a.cost);
6390
+ }
6315
6391
  async function reconcileWithCloudflare(opts2) {
6316
6392
  if (!opts2.accountId || !opts2.apiToken) {
6317
6393
  return {
@@ -6351,7 +6427,8 @@ async function reconcileWithCloudflare(opts2) {
6351
6427
  localCost: opts2.localCost,
6352
6428
  cloudflareCost,
6353
6429
  driftPct: Math.round(driftPct * 1e3) / 10,
6354
- message: `Reconciled ${logs.length} Gateway log entries`
6430
+ message: `Reconciled ${logs.length} Gateway log entries`,
6431
+ featureBreakdown: aggregateByFeature(logs)
6355
6432
  };
6356
6433
  cache.set(key, { result, expires: Date.now() + 60 * 60 * 1e3 });
6357
6434
  return result;
@@ -9411,7 +9488,8 @@ async function getCostReport(sessionId) {
9411
9488
  gatewayRequests: rawSession.gatewayRequests,
9412
9489
  gatewayCachedRequests: rawSession.gatewayCachedRequests,
9413
9490
  gatewayCost: rawSession.gatewayCost,
9414
- reconcilePending: hasPendingReconcile(rawSession)
9491
+ reconcilePending: hasPendingReconcile(rawSession),
9492
+ lastTurnMs: latestConfirmedDurationMs(rawSession)
9415
9493
  } : { date, promptTokens: 0, completionTokens: 0, cachedTokens: 0, cost: 0 };
9416
9494
  const todayUsage = log2.days.find((d) => d.date === date) ?? { date, promptTokens: 0, completionTokens: 0, cachedTokens: 0, cost: 0 };
9417
9495
  const monthUsage = {
@@ -9456,6 +9534,14 @@ function hasPendingReconcile(session) {
9456
9534
  (t) => t.logId && t.confirmedCost === void 0 && !t.reconcileFailed
9457
9535
  );
9458
9536
  }
9537
+ function latestConfirmedDurationMs(session) {
9538
+ if (!session.turns) return void 0;
9539
+ for (let i = session.turns.length - 1; i >= 0; i--) {
9540
+ const ms = session.turns[i]?.durationMs;
9541
+ if (typeof ms === "number") return ms;
9542
+ }
9543
+ return void 0;
9544
+ }
9459
9545
  async function getSessionGatewayLogs(sessionId) {
9460
9546
  const log2 = await loadLog2();
9461
9547
  const session = log2.sessions.find((s) => s.id === sessionId);
@@ -9501,6 +9587,17 @@ function formatGatewaySection(report, accountId, gatewayId, recentLogs2 = []) {
9501
9587
  );
9502
9588
  return lines.join("\n");
9503
9589
  }
9590
+ function formatFeatureBreakdown(breakdown) {
9591
+ if (!breakdown || breakdown.length === 0) return "";
9592
+ if (breakdown.length === 1 && breakdown[0].feature === "unknown") return "";
9593
+ const lines = ["\u2500\u2500\u2500 By feature (Gateway-confirmed) \u2500\u2500\u2500"];
9594
+ for (const row of breakdown) {
9595
+ lines.push(
9596
+ ` ${row.feature.padEnd(20)} $${row.cost.toFixed(4)} (${row.requests} req)`
9597
+ );
9598
+ }
9599
+ return lines.join("\n");
9600
+ }
9504
9601
  function formatCostReport(report) {
9505
9602
  const lines = [];
9506
9603
  const add = (label, u) => {
@@ -11704,6 +11801,9 @@ function buildRightParts(usage, contextLimit, sessionUsage, gatewayMeta, cloudMo
11704
11801
  } else {
11705
11802
  parts.push(`${prefix}${sessionUsage.cost.toFixed(2)}`);
11706
11803
  }
11804
+ if (typeof sessionUsage.lastTurnMs === "number") {
11805
+ parts.push(formatDuration(sessionUsage.lastTurnMs));
11806
+ }
11707
11807
  } else {
11708
11808
  const cached = usage.prompt_tokens_details?.cached_tokens ?? 0;
11709
11809
  const cost = calculateCost(usage.prompt_tokens, usage.completion_tokens, cached);
@@ -11732,6 +11832,10 @@ function formatGatewayCacheStatus(gatewayMeta) {
11732
11832
  const status = gatewayMeta?.cacheStatus?.trim();
11733
11833
  return status ? `AI Gateway \xB7 cache ${status.toLowerCase()}` : null;
11734
11834
  }
11835
+ function formatDuration(ms) {
11836
+ if (ms < 1e3) return `${Math.round(ms)}ms`;
11837
+ return `${(ms / 1e3).toFixed(1)}s`;
11838
+ }
11735
11839
  function formatElapsed2(ms) {
11736
11840
  const total = Math.floor(ms / 1e3);
11737
11841
  const m = Math.floor(total / 60);
@@ -20031,6 +20135,22 @@ ${wcagWarnings.join("\n")}` }
20031
20135
  const logs = sid ? await getSessionGatewayLogs(sid).catch(() => []) : [];
20032
20136
  const gwSection = formatGatewaySection(report, cfg.accountId, cfg.aiGatewayId, logs);
20033
20137
  if (gwSection) lines.push("", gwSection);
20138
+ try {
20139
+ const { reconcileWithCloudflare: reconcileWithCloudflare2 } = await Promise.resolve().then(() => (init_reconcile(), reconcile_exports));
20140
+ const today4 = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
20141
+ const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1e3).toISOString().slice(0, 10);
20142
+ const recon = await reconcileWithCloudflare2({
20143
+ localCost: report.month.cost,
20144
+ accountId: cfg.accountId,
20145
+ apiToken: cfg.apiToken,
20146
+ gatewayId: cfg.aiGatewayId,
20147
+ startDate: sevenDaysAgo,
20148
+ endDate: today4
20149
+ });
20150
+ const breakdown = formatFeatureBreakdown(recon.featureBreakdown);
20151
+ if (breakdown) lines.push("", breakdown);
20152
+ } catch {
20153
+ }
20034
20154
  }
20035
20155
  if (cfg?.costAttribution) {
20036
20156
  const { getCategoryReportText: getCategoryReportText2 } = await Promise.resolve().then(() => (init_tui_report(), tui_report_exports));
@@ -20108,6 +20228,24 @@ ${wcagWarnings.join("\n")}` }
20108
20228
  lines.push(`collect-logs: ${cfg.aiGatewayCollectLogPayload ?? false}`);
20109
20229
  const meta = cfg.aiGatewayMetadata;
20110
20230
  lines.push(`metadata: ${meta && Object.keys(meta).length > 0 ? JSON.stringify(meta) : "none"}`);
20231
+ const sid = sessionIdRef.current;
20232
+ if (sid) {
20233
+ void getCostReport(sid).then((report) => {
20234
+ const req = report.session.gatewayRequests ?? 0;
20235
+ if (req === 0) return;
20236
+ const cached = report.session.gatewayCachedRequests ?? 0;
20237
+ const pct = (cached / req * 100).toFixed(1);
20238
+ setEvents((e) => [
20239
+ ...e,
20240
+ {
20241
+ kind: "info",
20242
+ key: mkKey(),
20243
+ text: `cache hits (session): ${cached}/${req} (${pct}%)`
20244
+ }
20245
+ ]);
20246
+ }).catch(() => {
20247
+ });
20248
+ }
20111
20249
  } else {
20112
20250
  lines.push("gateway: off (direct Workers AI)");
20113
20251
  }