kimiflare 0.18.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -39,6 +39,14 @@ async function loadConfig() {
39
39
  const cacheStablePrompts = envCacheStable === "0" || envCacheStable === "false" ? false : true;
40
40
  const envCompiled = process.env.KIMIFLARE_COMPILED_CONTEXT;
41
41
  const compiledContext = envCompiled === "1" || envCompiled === "true" ? true : false;
42
+ const envImageTurns = process.env.KIMIFLARE_IMAGE_HISTORY_TURNS;
43
+ const imageHistoryTurns = envImageTurns ? parseInt(envImageTurns, 10) : void 0;
44
+ const envMaxToolIterations = process.env.KIMIFLARE_MAX_TOOL_ITERATIONS;
45
+ const maxToolIterations = envMaxToolIterations ? parseInt(envMaxToolIterations, 10) : void 0;
46
+ const envMaxInputTokens = process.env.KIMIFLARE_MAX_INPUT_TOKENS;
47
+ const maxInputTokens = envMaxInputTokens ? parseInt(envMaxInputTokens, 10) : void 0;
48
+ const envMaxCompletionTokens = process.env.KIMIFLARE_MAX_COMPLETION_TOKENS;
49
+ const maxCompletionTokens = envMaxCompletionTokens ? parseInt(envMaxCompletionTokens, 10) : void 0;
42
50
  if (envAccount && envToken) {
43
51
  return {
44
52
  accountId: envAccount,
@@ -50,7 +58,11 @@ async function loadConfig() {
50
58
  coauthorName: envCoauthor?.name,
51
59
  coauthorEmail: envCoauthor?.email,
52
60
  cacheStablePrompts,
53
- compiledContext
61
+ compiledContext,
62
+ imageHistoryTurns: Number.isNaN(imageHistoryTurns) ? void 0 : imageHistoryTurns,
63
+ maxToolIterations: Number.isNaN(maxToolIterations) ? void 0 : maxToolIterations,
64
+ maxInputTokens: Number.isNaN(maxInputTokens) ? void 0 : maxInputTokens,
65
+ maxCompletionTokens: Number.isNaN(maxCompletionTokens) ? void 0 : maxCompletionTokens
54
66
  };
55
67
  }
56
68
  try {
@@ -68,7 +80,11 @@ async function loadConfig() {
68
80
  coauthorEmail: envCoauthor?.email ?? parsed.coauthorEmail,
69
81
  mcpServers: parsed.mcpServers,
70
82
  cacheStablePrompts: parsed.cacheStablePrompts ?? cacheStablePrompts,
71
- compiledContext: parsed.compiledContext ?? compiledContext
83
+ compiledContext: parsed.compiledContext ?? compiledContext,
84
+ imageHistoryTurns: Number.isNaN(imageHistoryTurns) ? parsed.imageHistoryTurns : imageHistoryTurns,
85
+ maxToolIterations: Number.isNaN(maxToolIterations) ? parsed.maxToolIterations : maxToolIterations,
86
+ maxInputTokens: Number.isNaN(maxInputTokens) ? parsed.maxInputTokens : maxInputTokens,
87
+ maxCompletionTokens: Number.isNaN(maxCompletionTokens) ? parsed.maxCompletionTokens : maxCompletionTokens
72
88
  };
73
89
  }
74
90
  } catch {
@@ -175,6 +191,30 @@ function stableStringify(value, replacer, space) {
175
191
  const sorted = sortKeys(value);
176
192
  return JSON.stringify(sorted, replacer, space);
177
193
  }
194
+ function stripOldImages(messages, keepLastTurns2) {
195
+ if (keepLastTurns2 < 0) return messages;
196
+ let userCount = 0;
197
+ let cutoffIndex = messages.length;
198
+ for (let i = messages.length - 1; i >= 0; i--) {
199
+ if (messages[i].role === "user") {
200
+ userCount++;
201
+ if (userCount === keepLastTurns2) {
202
+ cutoffIndex = i;
203
+ break;
204
+ }
205
+ }
206
+ }
207
+ return messages.map((m, idx) => {
208
+ if (m.role !== "user" || idx >= cutoffIndex) return m;
209
+ if (!Array.isArray(m.content)) return m;
210
+ const stripped = m.content.filter((p) => p.type !== "image_url");
211
+ if (stripped.length === m.content.length) return m;
212
+ return {
213
+ ...m,
214
+ content: stripped.length > 0 ? stripped : "[image omitted]"
215
+ };
216
+ });
217
+ }
178
218
  var init_messages = __esm({
179
219
  "src/agent/messages.ts"() {
180
220
  "use strict";
@@ -199,7 +239,7 @@ async function* runKimi(opts2) {
199
239
  ...opts2.tools && opts2.tools.length ? { tools: opts2.tools, tool_choice: "auto", parallel_tool_calls: true } : {},
200
240
  stream: true,
201
241
  temperature: opts2.temperature ?? 0.2,
202
- max_completion_tokens: opts2.maxCompletionTokens ?? 16384
242
+ max_completion_tokens: opts2.maxCompletionTokens ?? 4096
203
243
  };
204
244
  if (opts2.reasoningEffort) {
205
245
  body.reasoning_effort = opts2.reasoningEffort;
@@ -389,7 +429,7 @@ var init_client = __esm({
389
429
  init_errors();
390
430
  init_messages();
391
431
  RETRYABLE_CODES = /* @__PURE__ */ new Set([3040]);
392
- MAX_ATTEMPTS = 5;
432
+ MAX_ATTEMPTS = 2;
393
433
  }
394
434
  });
395
435
 
@@ -569,6 +609,37 @@ async function logCostDebug(entry) {
569
609
  await rotateJsonl(debugPath(), RETENTION.costDebugMaxBytes, RETENTION.costDebugRotations);
570
610
  await appendFile(debugPath(), JSON.stringify(entry) + "\n", "utf8");
571
611
  }
612
+ function usageDir() {
613
+ return join3(homedir2(), ".kimiflare");
614
+ }
615
+ function usagePath() {
616
+ return join3(usageDir(), "usage.jsonl");
617
+ }
618
+ async function logTurnTokenMetrics(metrics) {
619
+ await mkdir2(usageDir(), { recursive: true });
620
+ await rotateJsonl(usagePath(), RETENTION.costDebugMaxBytes, RETENTION.costDebugRotations);
621
+ await appendFile(usagePath(), JSON.stringify(metrics) + "\n", "utf8");
622
+ }
623
+ function buildTurnTokenMetrics(sessionId, turn, breakdown, estimatedOutputTokenCap, wasCompacted, removedCount, exceedsLimit) {
624
+ return {
625
+ v: LOG_VERSION,
626
+ ts: now(),
627
+ sessionId,
628
+ turn,
629
+ estimatedInputTokens: breakdown.total,
630
+ estimatedOutputTokenCap,
631
+ messageCount: breakdown.messageCount,
632
+ toolOutputCount: breakdown.toolOutputCount,
633
+ tokensFromSystem: breakdown.fromSystem,
634
+ tokensFromSession: breakdown.fromSession,
635
+ tokensFromTools: breakdown.fromTools,
636
+ tokensFromHistory: breakdown.fromHistory,
637
+ tokensFromUserInput: breakdown.fromUserInput,
638
+ wasCompacted,
639
+ removedCount,
640
+ exceedsLimit
641
+ };
642
+ }
572
643
  function serializePrefix(messages) {
573
644
  let end = 0;
574
645
  while (end < messages.length && messages[end].role === "system") {
@@ -713,12 +784,282 @@ var init_strip_reasoning = __esm({
713
784
  }
714
785
  });
715
786
 
787
+ // src/agent/token-limits.ts
788
+ function loadSafetyLimits() {
789
+ return {
790
+ maxInputTokensPerRequest: parseIntEnv("KIMIFLARE_MAX_INPUT_TOKENS", DEFAULT_SAFETY_LIMITS.maxInputTokensPerRequest),
791
+ warningThreshold: parseIntEnv("KIMIFLARE_WARNING_TOKENS", DEFAULT_SAFETY_LIMITS.warningThreshold),
792
+ maxLlmCallsPerUserAction: parseIntEnv("KIMIFLARE_MAX_LLM_CALLS", DEFAULT_SAFETY_LIMITS.maxLlmCallsPerUserAction),
793
+ maxRetriesPerLlmCall: parseIntEnv("KIMIFLARE_MAX_RETRIES", DEFAULT_SAFETY_LIMITS.maxRetriesPerLlmCall),
794
+ maxCompletionTokens: parseIntEnv("KIMIFLARE_MAX_COMPLETION_TOKENS", DEFAULT_SAFETY_LIMITS.maxCompletionTokens),
795
+ maxToolIterations: parseIntEnv("KIMIFLARE_MAX_TOOL_ITERATIONS", DEFAULT_SAFETY_LIMITS.maxToolIterations),
796
+ maxRecentMessages: parseIntEnv("KIMIFLARE_MAX_RECENT_MESSAGES", DEFAULT_SAFETY_LIMITS.maxRecentMessages),
797
+ maxToolOutputChars: parseIntEnv("KIMIFLARE_MAX_TOOL_OUTPUT_CHARS", DEFAULT_SAFETY_LIMITS.maxToolOutputChars)
798
+ };
799
+ }
800
+ function parseIntEnv(name, fallback) {
801
+ const raw = process.env[name];
802
+ if (!raw) return fallback;
803
+ const n = parseInt(raw, 10);
804
+ return Number.isNaN(n) ? fallback : n;
805
+ }
806
+ function estimateTokens(text) {
807
+ return Math.ceil(text.length / 4);
808
+ }
809
+ function estimateMessageTokens(m) {
810
+ let chars = 0;
811
+ if (typeof m.content === "string") {
812
+ chars = m.content.length;
813
+ } else if (Array.isArray(m.content)) {
814
+ for (const part of m.content) {
815
+ if (part.type === "text") chars += part.text.length;
816
+ else if (part.type === "image_url") chars += 1e3;
817
+ }
818
+ }
819
+ if (m.reasoning_content) chars += m.reasoning_content.length;
820
+ if (m.tool_calls) {
821
+ for (const tc of m.tool_calls) {
822
+ chars += tc.function.name.length;
823
+ chars += tc.function.arguments.length;
824
+ }
825
+ }
826
+ return Math.ceil(chars / 4) + 4;
827
+ }
828
+ function estimateMessagesTokens(messages) {
829
+ return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
830
+ }
831
+ function estimateToolDefsTokens(tools) {
832
+ return estimateTokens(JSON.stringify(tools));
833
+ }
834
+ function breakdownTokens(systemMessages, sessionMessages, toolDefs, historyMessages, userMessage) {
835
+ const fromSystem = estimateMessagesTokens(systemMessages);
836
+ const fromSession = estimateMessagesTokens(sessionMessages);
837
+ const fromTools = estimateToolDefsTokens(toolDefs);
838
+ const fromHistory = estimateMessagesTokens(historyMessages);
839
+ const fromUserInput = userMessage ? estimateMessageTokens(userMessage) : 0;
840
+ return {
841
+ total: fromSystem + fromSession + fromTools + fromHistory + fromUserInput,
842
+ fromSystem,
843
+ fromSession,
844
+ fromTools,
845
+ fromHistory,
846
+ fromUserInput,
847
+ messageCount: systemMessages.length + sessionMessages.length + historyMessages.length + (userMessage ? 1 : 0),
848
+ toolOutputCount: historyMessages.filter((m) => m.role === "tool").length
849
+ };
850
+ }
851
+ function compactHistoryForSafety(messages, targetTokens) {
852
+ let compacted = messages.map((m) => {
853
+ if (m.role === "tool" && typeof m.content === "string" && m.content.length > 200) {
854
+ const lines = m.content.split("\n");
855
+ const firstLine2 = lines[0] ?? "";
856
+ const truncated = lines.length > 3 || m.content.length > 200;
857
+ return {
858
+ ...m,
859
+ content: `[${m.name ?? "tool"} result${truncated ? " (truncated)" : ""}] ${firstLine2.slice(0, 120)}`
860
+ };
861
+ }
862
+ return m;
863
+ });
864
+ let removedCount = 0;
865
+ while (estimateMessagesTokens(compacted) > targetTokens && compacted.length > 2) {
866
+ const dropIndex = compacted.findIndex((m, i) => i > 0 && m.role !== "system" && m.role !== "user");
867
+ if (dropIndex === -1) break;
868
+ compacted.splice(dropIndex, 1);
869
+ removedCount++;
870
+ }
871
+ return { messages: compacted, removedCount };
872
+ }
873
+ var DEFAULT_SAFETY_LIMITS;
874
+ var init_token_limits = __esm({
875
+ "src/agent/token-limits.ts"() {
876
+ "use strict";
877
+ DEFAULT_SAFETY_LIMITS = {
878
+ maxInputTokensPerRequest: 3e4,
879
+ warningThreshold: 15e3,
880
+ maxLlmCallsPerUserAction: 10,
881
+ maxRetriesPerLlmCall: 2,
882
+ maxCompletionTokens: 4096,
883
+ maxToolIterations: 10,
884
+ maxRecentMessages: 4,
885
+ maxToolOutputChars: 800
886
+ };
887
+ }
888
+ });
889
+
890
+ // src/agent/tool-output-summarizer.ts
891
+ import { createHash } from "crypto";
892
+ function normalizeForHash(text) {
893
+ return text.toLowerCase().replace(/\s+/g, " ").trim().slice(0, 5e3);
894
+ }
895
+ function stableHash(text) {
896
+ return createHash("sha256").update(text).digest("hex").slice(0, 16);
897
+ }
898
+ function clearOutputHashCache() {
899
+ outputHashCache.clear();
900
+ }
901
+ function summarizeToolOutput(toolCallId, name, rawContent, maxChars = DEFAULT_MAX_CHARS) {
902
+ const normalized = normalizeForHash(rawContent);
903
+ const hash = stableHash(normalized);
904
+ const cached = outputHashCache.get(hash);
905
+ if (cached && cached.firstSeenId !== toolCallId) {
906
+ const ref = `same as previous ${cached.name ?? "tool"} call (result_id=${hash})`;
907
+ return {
908
+ tool_call_id: toolCallId,
909
+ name,
910
+ content: ref,
911
+ truncated: false
912
+ };
913
+ }
914
+ if (!cached) {
915
+ const preview = rawContent.slice(0, 120).replace(/\s+/g, " ");
916
+ outputHashCache.set(hash, { name, firstSeenId: toolCallId, preview });
917
+ }
918
+ const isFailure = rawContent.startsWith("Error:") || rawContent.startsWith("error:") || rawContent.includes("exit code") || rawContent.includes("not found") || rawContent.includes("No such file");
919
+ const isNoisy = rawContent.length > 0 && (rawContent.split("\n").length > 100 || rawContent.length > maxChars * 2);
920
+ if (isFailure && rawContent.length > 200) {
921
+ const firstLine2 = rawContent.split("\n")[0] ?? "";
922
+ return {
923
+ tool_call_id: toolCallId,
924
+ name,
925
+ content: `[${name ?? "tool"} failed] ${firstLine2.slice(0, 160)}`,
926
+ truncated: true
927
+ };
928
+ }
929
+ if (rawContent.length <= maxChars) {
930
+ return {
931
+ tool_call_id: toolCallId,
932
+ name,
933
+ content: rawContent,
934
+ truncated: false
935
+ };
936
+ }
937
+ const truncated = rawContent.slice(0, maxChars);
938
+ const lastNewline = truncated.lastIndexOf("\n");
939
+ const clean = lastNewline > maxChars * 0.5 ? truncated.slice(0, lastNewline) : truncated;
940
+ return {
941
+ tool_call_id: toolCallId,
942
+ name,
943
+ content: `${clean}
944
+ ... (${rawContent.length - clean.length} more chars truncated)`,
945
+ truncated: true
946
+ };
947
+ }
948
+ function summarizeToolMessage(msg, maxChars = DEFAULT_MAX_CHARS) {
949
+ if (msg.role !== "tool" || typeof msg.content !== "string") {
950
+ return msg;
951
+ }
952
+ const summary = summarizeToolOutput(msg.tool_call_id ?? "", msg.name, msg.content, maxChars);
953
+ return {
954
+ ...msg,
955
+ content: summary.content
956
+ };
957
+ }
958
+ function summarizeToolMessages(messages, maxChars = DEFAULT_MAX_CHARS) {
959
+ return messages.map((m) => m.role === "tool" ? summarizeToolMessage(m, maxChars) : m);
960
+ }
961
+ var DEFAULT_MAX_CHARS, outputHashCache;
962
+ var init_tool_output_summarizer = __esm({
963
+ "src/agent/tool-output-summarizer.ts"() {
964
+ "use strict";
965
+ DEFAULT_MAX_CHARS = 800;
966
+ outputHashCache = /* @__PURE__ */ new Map();
967
+ }
968
+ });
969
+
970
+ // src/agent/context-builder.ts
971
+ function buildContext(opts2) {
972
+ const { allMessages, systemMessages, sessionMessages, toolDefs, limits, currentUserMessage } = opts2;
973
+ const prefixLength = systemMessages.length + sessionMessages.length;
974
+ let history = allMessages.slice(prefixLength);
975
+ history = summarizeToolMessages(history, limits.maxToolOutputChars);
976
+ const recentHistory = keepLastTurns(history, limits.maxRecentMessages);
977
+ const contextMessages = [
978
+ ...systemMessages,
979
+ ...sessionMessages,
980
+ ...recentHistory
981
+ ];
982
+ if (currentUserMessage) {
983
+ contextMessages.push(currentUserMessage);
984
+ }
985
+ let breakdown = breakdownTokens(
986
+ systemMessages,
987
+ sessionMessages,
988
+ toolDefs,
989
+ recentHistory,
990
+ currentUserMessage ?? null
991
+ );
992
+ let wasCompacted = false;
993
+ let removedCount = 0;
994
+ if (breakdown.total > limits.maxInputTokensPerRequest) {
995
+ const target = limits.maxInputTokensPerRequest;
996
+ const compacted = compactHistoryForSafety(recentHistory, target - breakdown.fromSystem - breakdown.fromSession - breakdown.fromTools - breakdown.fromUserInput);
997
+ if (compacted.removedCount > 0) {
998
+ wasCompacted = true;
999
+ removedCount = compacted.removedCount;
1000
+ const newContext = [
1001
+ ...systemMessages,
1002
+ ...sessionMessages,
1003
+ ...compacted.messages
1004
+ ];
1005
+ if (currentUserMessage) {
1006
+ newContext.push(currentUserMessage);
1007
+ }
1008
+ breakdown = breakdownTokens(
1009
+ systemMessages,
1010
+ sessionMessages,
1011
+ toolDefs,
1012
+ compacted.messages,
1013
+ currentUserMessage ?? null
1014
+ );
1015
+ return {
1016
+ messages: newContext,
1017
+ breakdown,
1018
+ wasCompacted,
1019
+ removedCount,
1020
+ exceedsLimit: breakdown.total > limits.maxInputTokensPerRequest
1021
+ };
1022
+ }
1023
+ }
1024
+ return {
1025
+ messages: contextMessages,
1026
+ breakdown,
1027
+ wasCompacted,
1028
+ removedCount,
1029
+ exceedsLimit: breakdown.total > limits.maxInputTokensPerRequest
1030
+ };
1031
+ }
1032
+ function keepLastTurns(messages, maxTurns) {
1033
+ if (maxTurns <= 0) return [];
1034
+ const turnStarts = [];
1035
+ for (let i = 0; i < messages.length; i++) {
1036
+ if (messages[i].role === "user") {
1037
+ turnStarts.push(i);
1038
+ }
1039
+ }
1040
+ if (turnStarts.length <= maxTurns) {
1041
+ return messages;
1042
+ }
1043
+ const startIndex = turnStarts[turnStarts.length - maxTurns] ?? 0;
1044
+ return messages.slice(startIndex);
1045
+ }
1046
+ var init_context_builder = __esm({
1047
+ "src/agent/context-builder.ts"() {
1048
+ "use strict";
1049
+ init_token_limits();
1050
+ init_tool_output_summarizer();
1051
+ }
1052
+ });
1053
+
716
1054
  // src/agent/loop.ts
717
1055
  async function runAgentTurn(opts2) {
718
- const max = opts2.maxToolIterations ?? 50;
1056
+ const limits = loadSafetyLimits();
1057
+ const max = opts2.maxToolIterations ?? limits.maxToolIterations;
719
1058
  const toolDefs = toOpenAIToolDefs(opts2.tools);
720
1059
  let turn = 0;
721
1060
  let lastUsage = null;
1061
+ const systemMessages = opts2.systemMessages ?? extractSystemMessages(opts2.messages);
1062
+ const sessionMessages = opts2.sessionMessages ?? [];
722
1063
  for (let iter = 0; iter < max; iter++) {
723
1064
  turn++;
724
1065
  const previousMessages = opts2.messages.slice();
@@ -738,8 +1079,8 @@ async function runAgentTurn(opts2) {
738
1079
  keepLast: Number.isNaN(keepLast) ? 1 : keepLast
739
1080
  });
740
1081
  if (shadowStrip) {
741
- const originalSections = analyzePrompt(opts2.messages);
742
- const strippedSections = analyzePrompt(stripped);
1082
+ const originalSections = analyzePromptSections(opts2.messages);
1083
+ const strippedSections = analyzePromptSections(stripped);
743
1084
  const originalApproxTokens = originalSections.reduce(
744
1085
  (sum, s) => sum + s.approxTokens,
745
1086
  0
@@ -760,15 +1101,49 @@ async function runAgentTurn(opts2) {
760
1101
  apiMessages = stripped;
761
1102
  }
762
1103
  }
1104
+ if (opts2.keepLastImageTurns !== void 0) {
1105
+ apiMessages = stripOldImages(apiMessages, opts2.keepLastImageTurns);
1106
+ }
1107
+ const currentUserMessage = findCurrentUserMessage(apiMessages);
1108
+ const context = buildContext({
1109
+ allMessages: apiMessages,
1110
+ systemMessages,
1111
+ sessionMessages,
1112
+ toolDefs,
1113
+ limits,
1114
+ currentUserMessage
1115
+ });
1116
+ if (opts2.sessionId) {
1117
+ void logTurnTokenMetrics(
1118
+ buildTurnTokenMetrics(
1119
+ opts2.sessionId,
1120
+ turn,
1121
+ context.breakdown,
1122
+ opts2.maxCompletionTokens ?? limits.maxCompletionTokens,
1123
+ context.wasCompacted,
1124
+ context.removedCount,
1125
+ context.exceedsLimit
1126
+ )
1127
+ );
1128
+ }
1129
+ if (context.exceedsLimit) {
1130
+ const assistantMsg3 = {
1131
+ role: "assistant",
1132
+ content: `I cannot continue: the conversation context exceeds the safety limit of ${limits.maxInputTokensPerRequest} tokens. Try running /compact or /clear to reduce context size.`
1133
+ };
1134
+ opts2.messages.push(assistantMsg3);
1135
+ opts2.callbacks.onAssistantFinal?.(assistantMsg3);
1136
+ return;
1137
+ }
763
1138
  const events = runKimi({
764
1139
  accountId: opts2.accountId,
765
1140
  apiToken: opts2.apiToken,
766
1141
  model: opts2.model,
767
- messages: apiMessages,
1142
+ messages: context.messages,
768
1143
  tools: toolDefs,
769
1144
  signal: opts2.signal,
770
1145
  temperature: opts2.temperature,
771
- maxCompletionTokens: opts2.maxCompletionTokens,
1146
+ maxCompletionTokens: opts2.maxCompletionTokens ?? limits.maxCompletionTokens,
772
1147
  reasoningEffort: opts2.reasoningEffort,
773
1148
  sessionId: opts2.sessionId
774
1149
  });
@@ -807,7 +1182,7 @@ async function runAgentTurn(opts2) {
807
1182
  break;
808
1183
  }
809
1184
  }
810
- const assistantMsg = {
1185
+ const assistantMsg2 = {
811
1186
  role: "assistant",
812
1187
  content: content ? sanitizeString(content) : null,
813
1188
  ...reasoning ? { reasoning_content: sanitizeString(reasoning) } : {},
@@ -821,8 +1196,8 @@ async function runAgentTurn(opts2) {
821
1196
  }))
822
1197
  } : {}
823
1198
  };
824
- opts2.messages.push(assistantMsg);
825
- opts2.callbacks.onAssistantFinal?.(assistantMsg);
1199
+ opts2.messages.push(assistantMsg2);
1200
+ opts2.callbacks.onAssistantFinal?.(assistantMsg2);
826
1201
  if (toolCalls.length === 0) {
827
1202
  if (opts2.sessionId && lastUsage) {
828
1203
  void logTurnDebug({
@@ -865,7 +1240,36 @@ async function runAgentTurn(opts2) {
865
1240
  });
866
1241
  }
867
1242
  }
868
- throw new Error(`kimiflare: tool iteration limit reached (${opts2.maxToolIterations ?? 50})`);
1243
+ const remaining = toolCallsFromMessages(opts2.messages);
1244
+ const assistantMsg = {
1245
+ role: "assistant",
1246
+ content: `I reached the tool iteration limit (${max}). There ${remaining === 1 ? "is" : "are"} ${remaining} pending tool call${remaining === 1 ? "" : "s"} that could not be executed. Run /compact or /clear to reset context, or rephrase your request.`
1247
+ };
1248
+ opts2.messages.push(assistantMsg);
1249
+ opts2.callbacks.onAssistantFinal?.(assistantMsg);
1250
+ }
1251
+ function extractSystemMessages(messages) {
1252
+ const end = messages.findIndex((m) => m.role !== "system");
1253
+ return end === -1 ? messages.slice() : messages.slice(0, end);
1254
+ }
1255
+ function findCurrentUserMessage(messages) {
1256
+ const prefixEnd = messages.findIndex((m) => m.role !== "system");
1257
+ const history = prefixEnd === -1 ? [] : messages.slice(prefixEnd);
1258
+ for (let i = history.length - 1; i >= 0; i--) {
1259
+ if (history[i].role === "user") {
1260
+ return history[i];
1261
+ }
1262
+ }
1263
+ return null;
1264
+ }
1265
+ function toolCallsFromMessages(messages) {
1266
+ let count = 0;
1267
+ for (const m of messages) {
1268
+ if (m.role === "assistant" && m.tool_calls) {
1269
+ count += m.tool_calls.length;
1270
+ }
1271
+ }
1272
+ return count;
869
1273
  }
870
1274
  function validateToolArguments(raw) {
871
1275
  if (!raw || !raw.trim()) return "{}";
@@ -876,6 +1280,25 @@ function validateToolArguments(raw) {
876
1280
  return "{}";
877
1281
  }
878
1282
  }
1283
+ function analyzePromptSections(messages) {
1284
+ return messages.map((m) => {
1285
+ let chars = 0;
1286
+ if (typeof m.content === "string") {
1287
+ chars = m.content.length;
1288
+ } else if (Array.isArray(m.content)) {
1289
+ for (const p of m.content) {
1290
+ if (p.type === "text") chars += p.text.length;
1291
+ }
1292
+ }
1293
+ if (m.reasoning_content) chars += m.reasoning_content.length;
1294
+ if (m.tool_calls) {
1295
+ for (const tc of m.tool_calls) {
1296
+ chars += tc.function.name.length + tc.function.arguments.length;
1297
+ }
1298
+ }
1299
+ return { role: m.role, chars, approxTokens: Math.ceil(chars / 4) };
1300
+ });
1301
+ }
879
1302
  var init_loop = __esm({
880
1303
  "src/agent/loop.ts"() {
881
1304
  "use strict";
@@ -884,6 +1307,8 @@ var init_loop = __esm({
884
1307
  init_messages();
885
1308
  init_cost_debug();
886
1309
  init_strip_reasoning();
1310
+ init_token_limits();
1311
+ init_context_builder();
887
1312
  }
888
1313
  });
889
1314
 
@@ -2368,6 +2793,77 @@ var init_update_check = __esm({
2368
2793
  }
2369
2794
  });
2370
2795
 
2796
+ // src/usage-cli.ts
2797
+ var usage_cli_exports = {};
2798
+ __export(usage_cli_exports, {
2799
+ showUsageLog: () => showUsageLog
2800
+ });
2801
+ import { readFile as readFile7 } from "fs/promises";
2802
+ import { homedir as homedir6 } from "os";
2803
+ import { join as join7 } from "path";
2804
+ function usagePath2() {
2805
+ return join7(homedir6(), ".kimiflare", "usage.jsonl");
2806
+ }
2807
+ function fmt(n) {
2808
+ return n.toLocaleString();
2809
+ }
2810
+ async function showUsageLog() {
2811
+ const path = usagePath2();
2812
+ let raw;
2813
+ try {
2814
+ raw = await readFile7(path, "utf8");
2815
+ } catch {
2816
+ console.log("No usage log found at " + path);
2817
+ return;
2818
+ }
2819
+ const lines = raw.trim().split("\n").filter(Boolean);
2820
+ if (lines.length === 0) {
2821
+ console.log("Usage log is empty.");
2822
+ return;
2823
+ }
2824
+ const entries = [];
2825
+ for (const line of lines) {
2826
+ try {
2827
+ entries.push(JSON.parse(line));
2828
+ } catch {
2829
+ }
2830
+ }
2831
+ if (entries.length === 0) {
2832
+ console.log("No valid entries in usage log.");
2833
+ return;
2834
+ }
2835
+ const bySession = /* @__PURE__ */ new Map();
2836
+ for (const e of entries) {
2837
+ const arr = bySession.get(e.sessionId) ?? [];
2838
+ arr.push(e);
2839
+ bySession.set(e.sessionId, arr);
2840
+ }
2841
+ console.log(`Usage log: ${lines.length} entries, ${bySession.size} session(s)
2842
+ `);
2843
+ for (const [sessionId, sessEntries] of bySession) {
2844
+ const last = sessEntries[sessEntries.length - 1];
2845
+ const totalInput = sessEntries.reduce((s, e) => s + e.estimatedInputTokens, 0);
2846
+ const avgInput = Math.round(totalInput / sessEntries.length);
2847
+ console.log(`Session: ${sessionId.slice(0, 16)}\u2026 Turns: ${sessEntries.length}`);
2848
+ console.log(` Last turn: ${fmt(last.estimatedInputTokens)} input tokens / ${fmt(last.estimatedOutputTokenCap)} output cap`);
2849
+ console.log(` Avg input: ${fmt(avgInput)} tokens`);
2850
+ console.log(` Messages: ${last.messageCount} | Tool outputs: ${last.toolOutputCount}`);
2851
+ console.log(` Breakdown: system=${fmt(last.tokensFromSystem)} session=${fmt(last.tokensFromSession)} tools=${fmt(last.tokensFromTools)} history=${fmt(last.tokensFromHistory)} user=${fmt(last.tokensFromUserInput)}`);
2852
+ if (last.wasCompacted) {
2853
+ console.log(` \u26A0\uFE0F Compacted: removed ${last.removedCount} messages`);
2854
+ }
2855
+ if (last.exceedsLimit) {
2856
+ console.log(` \u274C EXCEEDS LIMIT`);
2857
+ }
2858
+ console.log("");
2859
+ }
2860
+ }
2861
+ var init_usage_cli = __esm({
2862
+ "src/usage-cli.ts"() {
2863
+ "use strict";
2864
+ }
2865
+ });
2866
+
2371
2867
  // src/agent/compact.ts
2372
2868
  function indexOfNthUserFromEnd(messages, n) {
2373
2869
  let seen = 0;
@@ -2574,7 +3070,7 @@ var init_session_state = __esm({
2574
3070
  function approxTokens2(n) {
2575
3071
  return Math.round(n / 4);
2576
3072
  }
2577
- function estimateMessageTokens(m) {
3073
+ function estimateMessageTokens2(m) {
2578
3074
  let chars = 0;
2579
3075
  if (typeof m.content === "string") {
2580
3076
  chars = m.content.length;
@@ -2590,7 +3086,7 @@ function estimateMessageTokens(m) {
2590
3086
  return approxTokens2(chars);
2591
3087
  }
2592
3088
  function estimatePromptTokens(messages) {
2593
- return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
3089
+ return messages.reduce((sum, m) => sum + estimateMessageTokens2(m), 0);
2594
3090
  }
2595
3091
  function groupIntoTurns(messages) {
2596
3092
  const prefix = [];
@@ -2770,10 +3266,10 @@ function shouldCompact(opts2) {
2770
3266
  return tokens > tokenThreshold || turns.length > turnThreshold;
2771
3267
  }
2772
3268
  function compactMessages2(opts2) {
2773
- const keepLastTurns = opts2.keepLastTurns ?? 4;
3269
+ const keepLastTurns2 = opts2.keepLastTurns ?? 4;
2774
3270
  const { prefix, turns } = groupIntoTurns(opts2.messages);
2775
3271
  const tokensBefore = estimatePromptTokens(opts2.messages);
2776
- if (turns.length <= keepLastTurns) {
3272
+ if (turns.length <= keepLastTurns2) {
2777
3273
  return {
2778
3274
  newMessages: opts2.messages,
2779
3275
  newState: opts2.state,
@@ -2787,8 +3283,8 @@ function compactMessages2(opts2) {
2787
3283
  }
2788
3284
  };
2789
3285
  }
2790
- const toCompact = turns.slice(0, turns.length - keepLastTurns);
2791
- const toKeep = turns.slice(turns.length - keepLastTurns);
3286
+ const toCompact = turns.slice(0, turns.length - keepLastTurns2);
3287
+ const toKeep = turns.slice(turns.length - keepLastTurns2);
2792
3288
  let newState = { ...opts2.state };
2793
3289
  let archivedCount = 0;
2794
3290
  for (let i = 0; i < toCompact.length; i++) {
@@ -4819,12 +5315,12 @@ __export(sessions_exports, {
4819
5315
  pruneSessions: () => pruneSessions,
4820
5316
  saveSession: () => saveSession
4821
5317
  });
4822
- import { readFile as readFile7, writeFile as writeFile5, mkdir as mkdir5, readdir as readdir2, stat as stat3 } from "fs/promises";
4823
- import { homedir as homedir6 } from "os";
4824
- import { join as join7 } from "path";
5318
+ import { readFile as readFile8, writeFile as writeFile5, mkdir as mkdir5, readdir as readdir2, stat as stat3 } from "fs/promises";
5319
+ import { homedir as homedir7 } from "os";
5320
+ import { join as join8 } from "path";
4825
5321
  function sessionsDir() {
4826
- const xdg = process.env.XDG_DATA_HOME || join7(homedir6(), ".local", "share");
4827
- return join7(xdg, "kimiflare", "sessions");
5322
+ const xdg = process.env.XDG_DATA_HOME || join8(homedir7(), ".local", "share");
5323
+ return join8(xdg, "kimiflare", "sessions");
4828
5324
  }
4829
5325
  function sanitize(text) {
4830
5326
  return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 40);
@@ -4837,7 +5333,7 @@ function makeSessionId(firstPrompt) {
4837
5333
  async function saveSession(file) {
4838
5334
  const dir = sessionsDir();
4839
5335
  await mkdir5(dir, { recursive: true });
4840
- const path = join7(dir, `${file.id}.json`);
5336
+ const path = join8(dir, `${file.id}.json`);
4841
5337
  await writeFile5(path, JSON.stringify(file, null, 2), "utf8");
4842
5338
  return path;
4843
5339
  }
@@ -4857,9 +5353,9 @@ async function listSessions(limit = 30) {
4857
5353
  const summaries = [];
4858
5354
  for (const name of entries) {
4859
5355
  if (!name.endsWith(".json")) continue;
4860
- const path = join7(dir, name);
5356
+ const path = join8(dir, name);
4861
5357
  try {
4862
- const [s, raw] = await Promise.all([stat3(path), readFile7(path, "utf8")]);
5358
+ const [s, raw] = await Promise.all([stat3(path), readFile8(path, "utf8")]);
4863
5359
  const parsed = JSON.parse(raw);
4864
5360
  const firstUser = parsed.messages.find((m) => m.role === "user");
4865
5361
  const firstPrompt = typeof firstUser?.content === "string" ? firstUser.content : firstUser?.content ? firstUser.content.find((p) => p.type === "text")?.text ?? "(no prompt)" : "(no prompt)";
@@ -4878,7 +5374,7 @@ async function listSessions(limit = 30) {
4878
5374
  return summaries.slice(0, limit);
4879
5375
  }
4880
5376
  async function loadSession(filePath) {
4881
- const raw = await readFile7(filePath, "utf8");
5377
+ const raw = await readFile8(filePath, "utf8");
4882
5378
  return JSON.parse(raw);
4883
5379
  }
4884
5380
  var init_sessions = __esm({
@@ -4889,10 +5385,10 @@ var init_sessions = __esm({
4889
5385
  });
4890
5386
 
4891
5387
  // src/util/image.ts
4892
- import { readFile as readFile8 } from "fs/promises";
5388
+ import { readFile as readFile9 } from "fs/promises";
4893
5389
  import { basename as basename2 } from "path";
4894
5390
  async function encodeImageFile(filePath) {
4895
- const buf = await readFile8(filePath);
5391
+ const buf = await readFile9(filePath);
4896
5392
  if (buf.byteLength > MAX_IMAGE_BYTES) {
4897
5393
  throw new Error(
4898
5394
  `image too large (${(buf.byteLength / 1024 / 1024).toFixed(1)} MB); max is ${MAX_IMAGE_BYTES / 1024 / 1024} MB`
@@ -4928,15 +5424,15 @@ var init_image = __esm({
4928
5424
  });
4929
5425
 
4930
5426
  // src/usage-tracker.ts
4931
- import { readFile as readFile9, writeFile as writeFile6, mkdir as mkdir6 } from "fs/promises";
4932
- import { homedir as homedir7 } from "os";
4933
- import { join as join8 } from "path";
4934
- function usageDir() {
4935
- const xdg = process.env.XDG_DATA_HOME || join8(homedir7(), ".local", "share");
4936
- return join8(xdg, "kimiflare");
5427
+ import { readFile as readFile10, writeFile as writeFile6, mkdir as mkdir6 } from "fs/promises";
5428
+ import { homedir as homedir8 } from "os";
5429
+ import { join as join9 } from "path";
5430
+ function usageDir2() {
5431
+ const xdg = process.env.XDG_DATA_HOME || join9(homedir8(), ".local", "share");
5432
+ return join9(xdg, "kimiflare");
4937
5433
  }
4938
- function usagePath() {
4939
- return join8(usageDir(), "usage.json");
5434
+ function usagePath3() {
5435
+ return join9(usageDir2(), "usage.json");
4940
5436
  }
4941
5437
  function today() {
4942
5438
  return (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
@@ -4947,7 +5443,7 @@ function cutoffDate(daysBack) {
4947
5443
  }
4948
5444
  async function loadLog() {
4949
5445
  try {
4950
- const raw = await readFile9(usagePath(), "utf8");
5446
+ const raw = await readFile10(usagePath3(), "utf8");
4951
5447
  const parsed = JSON.parse(raw);
4952
5448
  if (parsed.version === LOG_VERSION2) return parsed;
4953
5449
  } catch {
@@ -4955,8 +5451,8 @@ async function loadLog() {
4955
5451
  return { version: LOG_VERSION2, days: [], sessions: [] };
4956
5452
  }
4957
5453
  async function saveLog(log) {
4958
- await mkdir6(usageDir(), { recursive: true });
4959
- await writeFile6(usagePath(), JSON.stringify(log, null, 2), "utf8");
5454
+ await mkdir6(usageDir2(), { recursive: true });
5455
+ await writeFile6(usagePath3(), JSON.stringify(log, null, 2), "utf8");
4960
5456
  }
4961
5457
  function getOrCreateDay(log, date) {
4962
5458
  let day = log.days.find((d) => d.date === date);
@@ -5073,7 +5569,7 @@ __export(app_exports, {
5073
5569
  import { useState as useState6, useRef as useRef3, useEffect as useEffect4, useCallback } from "react";
5074
5570
  import { Box as Box12, Text as Text13, useApp, useInput as useInput2, render } from "ink";
5075
5571
  import { existsSync } from "fs";
5076
- import { join as join9 } from "path";
5572
+ import { join as join10 } from "path";
5077
5573
  import { unlink as unlink2 } from "fs/promises";
5078
5574
  import { jsx as jsx13, jsxs as jsxs12 } from "react/jsx-runtime";
5079
5575
  function capEvents(prev) {
@@ -5139,9 +5635,10 @@ function App({ initialCfg, initialUpdateResult }) {
5139
5635
  const [hasUpdate, setHasUpdate] = useState6(initialUpdateResult?.hasUpdate ?? false);
5140
5636
  const [latestVersion, setLatestVersion] = useState6(initialUpdateResult?.latestVersion ?? null);
5141
5637
  const cacheStableRef = useRef3(initialCfg?.cacheStablePrompts !== false);
5142
- const messagesRef = useRef3(
5638
+ const systemMessagesRef = useRef3(
5143
5639
  makePrefixMessages(cacheStableRef.current, cfg?.model ?? DEFAULT_MODEL, "edit", ALL_TOOLS)
5144
5640
  );
5641
+ const messagesRef = useRef3(systemMessagesRef.current.slice());
5145
5642
  const executorRef = useRef3(new ToolExecutor(ALL_TOOLS));
5146
5643
  const activeAsstIdRef = useRef3(null);
5147
5644
  const activeControllerRef = useRef3(null);
@@ -5226,7 +5723,7 @@ function App({ initialCfg, initialUpdateResult }) {
5226
5723
  useEffect4(() => {
5227
5724
  modeRef.current = mode;
5228
5725
  if (cacheStableRef.current) {
5229
- messagesRef.current[1] = {
5726
+ const sessionMsg = {
5230
5727
  role: "system",
5231
5728
  content: buildSessionPrefix({
5232
5729
  cwd: process.cwd(),
@@ -5235,8 +5732,10 @@ function App({ initialCfg, initialUpdateResult }) {
5235
5732
  mode
5236
5733
  })
5237
5734
  };
5735
+ messagesRef.current[1] = sessionMsg;
5736
+ systemMessagesRef.current[1] = sessionMsg;
5238
5737
  } else {
5239
- messagesRef.current[0] = {
5738
+ const sysMsg = {
5240
5739
  role: "system",
5241
5740
  content: buildSystemPrompt({
5242
5741
  cwd: process.cwd(),
@@ -5245,6 +5744,8 @@ function App({ initialCfg, initialUpdateResult }) {
5245
5744
  mode
5246
5745
  })
5247
5746
  };
5747
+ messagesRef.current[0] = sysMsg;
5748
+ systemMessagesRef.current[0] = sysMsg;
5248
5749
  }
5249
5750
  if (mode === "plan") {
5250
5751
  executorRef.current.clearSessionPermissions();
@@ -5319,7 +5820,7 @@ function App({ initialCfg, initialUpdateResult }) {
5319
5820
  }
5320
5821
  if (totalTools > 0) {
5321
5822
  if (cacheStableRef.current) {
5322
- messagesRef.current[1] = {
5823
+ const sessionMsg = {
5323
5824
  role: "system",
5324
5825
  content: buildSessionPrefix({
5325
5826
  cwd: process.cwd(),
@@ -5328,8 +5829,10 @@ function App({ initialCfg, initialUpdateResult }) {
5328
5829
  mode: modeRef.current
5329
5830
  })
5330
5831
  };
5832
+ messagesRef.current[1] = sessionMsg;
5833
+ systemMessagesRef.current[1] = sessionMsg;
5331
5834
  } else {
5332
- messagesRef.current[0] = {
5835
+ const sysMsg = {
5333
5836
  role: "system",
5334
5837
  content: buildSystemPrompt({
5335
5838
  cwd: process.cwd(),
@@ -5338,6 +5841,8 @@ function App({ initialCfg, initialUpdateResult }) {
5338
5841
  mode: modeRef.current
5339
5842
  })
5340
5843
  };
5844
+ messagesRef.current[0] = sysMsg;
5845
+ systemMessagesRef.current[0] = sysMsg;
5341
5846
  }
5342
5847
  setEvents((e) => [
5343
5848
  ...e,
@@ -5514,13 +6019,13 @@ function App({ initialCfg, initialUpdateResult }) {
5514
6019
  }
5515
6020
  const cwd = process.cwd();
5516
6021
  for (const name of ["KIMI.md", "KIMIFLARE.md", "AGENT.md"]) {
5517
- if (existsSync(join9(cwd, name))) {
6022
+ if (existsSync(join10(cwd, name))) {
5518
6023
  setEvents((e) => [
5519
6024
  ...e,
5520
6025
  {
5521
6026
  kind: "info",
5522
6027
  key: mkKey(),
5523
- text: `${name} already exists at ${join9(cwd, name)} \u2014 delete it first if you want to regenerate`
6028
+ text: `${name} already exists at ${join10(cwd, name)} \u2014 delete it first if you want to regenerate`
5524
6029
  }
5525
6030
  ]);
5526
6031
  return;
@@ -5560,6 +6065,7 @@ function App({ initialCfg, initialUpdateResult }) {
5560
6065
  reasoningEffort: effortRef.current,
5561
6066
  coauthor: cfg.coauthor !== false ? { name: cfg.coauthorName || "kimiflare", email: cfg.coauthorEmail || "kimiflare@proton.me" } : void 0,
5562
6067
  sessionId: ensureSessionId(),
6068
+ systemMessages: systemMessagesRef.current,
5563
6069
  callbacks: {
5564
6070
  onAssistantStart: () => {
5565
6071
  const id = nextAssistantId++;
@@ -5637,9 +6143,9 @@ function App({ initialCfg, initialUpdateResult }) {
5637
6143
  })
5638
6144
  }
5639
6145
  });
5640
- if (existsSync(join9(cwd, "KIMI.md"))) {
6146
+ if (existsSync(join10(cwd, "KIMI.md"))) {
5641
6147
  if (cacheStableRef.current) {
5642
- messagesRef.current[1] = {
6148
+ const sessionMsg = {
5643
6149
  role: "system",
5644
6150
  content: buildSessionPrefix({
5645
6151
  cwd,
@@ -5648,8 +6154,10 @@ function App({ initialCfg, initialUpdateResult }) {
5648
6154
  mode: modeRef.current
5649
6155
  })
5650
6156
  };
6157
+ messagesRef.current[1] = sessionMsg;
6158
+ systemMessagesRef.current[1] = sessionMsg;
5651
6159
  } else {
5652
- messagesRef.current[0] = {
6160
+ const sysMsg = {
5653
6161
  role: "system",
5654
6162
  content: buildSystemPrompt({
5655
6163
  cwd,
@@ -5658,6 +6166,8 @@ function App({ initialCfg, initialUpdateResult }) {
5658
6166
  mode: modeRef.current
5659
6167
  })
5660
6168
  };
6169
+ messagesRef.current[0] = sysMsg;
6170
+ systemMessagesRef.current[0] = sysMsg;
5661
6171
  }
5662
6172
  setEvents((e) => [
5663
6173
  ...e,
@@ -5684,7 +6194,10 @@ function App({ initialCfg, initialUpdateResult }) {
5684
6194
  if (!picked) return;
5685
6195
  try {
5686
6196
  const file = await loadSession(picked.filePath);
6197
+ const prefixEnd = file.messages.findIndex((m) => m.role !== "system");
6198
+ systemMessagesRef.current = prefixEnd === -1 ? file.messages.slice() : file.messages.slice(0, prefixEnd);
5687
6199
  messagesRef.current = file.messages;
6200
+ clearOutputHashCache();
5688
6201
  sessionIdRef.current = file.id;
5689
6202
  if (file.sessionState && compiledContextRef.current) {
5690
6203
  sessionStateRef.current = file.sessionState;
@@ -5747,10 +6260,12 @@ function App({ initialCfg, initialUpdateResult }) {
5747
6260
  }
5748
6261
  if (c === "/clear") {
5749
6262
  if (cacheStableRef.current && messagesRef.current.length >= 2) {
5750
- messagesRef.current = [messagesRef.current[0], messagesRef.current[1]];
6263
+ systemMessagesRef.current = [messagesRef.current[0], messagesRef.current[1]];
5751
6264
  } else {
5752
- messagesRef.current = [messagesRef.current[0]];
6265
+ systemMessagesRef.current = [messagesRef.current[0]];
5753
6266
  }
6267
+ messagesRef.current = systemMessagesRef.current.slice();
6268
+ clearOutputHashCache();
5754
6269
  sessionIdRef.current = null;
5755
6270
  sessionStateRef.current = emptySessionState();
5756
6271
  artifactStoreRef.current = new ArtifactStore();
@@ -6071,6 +6586,8 @@ use: /thinking low | medium | high`
6071
6586
  reasoningEffort: effortRef.current,
6072
6587
  coauthor: cfg.coauthor !== false ? { name: cfg.coauthorName || "kimiflare", email: cfg.coauthorEmail || "kimiflare@proton.me" } : void 0,
6073
6588
  sessionId: ensureSessionId(),
6589
+ keepLastImageTurns: cfg.imageHistoryTurns ?? 2,
6590
+ systemMessages: systemMessagesRef.current,
6074
6591
  callbacks: {
6075
6592
  onAssistantStart: () => {
6076
6593
  const id = nextAssistantId++;
@@ -6401,6 +6918,7 @@ var init_app = __esm({
6401
6918
  init_sessions();
6402
6919
  init_image();
6403
6920
  init_usage_tracker();
6921
+ init_tool_output_summarizer();
6404
6922
  CONTEXT_LIMIT = 262e3;
6405
6923
  AUTO_COMPACT_SUGGEST_PCT = 0.8;
6406
6924
  MAX_EVENTS = 500;
@@ -6425,22 +6943,30 @@ init_update_check();
6425
6943
  import { Command } from "commander";
6426
6944
  import { readFileSync as readFileSync2 } from "fs";
6427
6945
  import { fileURLToPath as fileURLToPath2 } from "url";
6428
- import { dirname as dirname3, join as join10 } from "path";
6946
+ import { dirname as dirname3, join as join11 } from "path";
6429
6947
  function readPackageVersion() {
6430
6948
  try {
6431
6949
  const here = dirname3(fileURLToPath2(import.meta.url));
6432
- const pkg = JSON.parse(readFileSync2(join10(here, "..", "package.json"), "utf8"));
6950
+ const pkg = JSON.parse(readFileSync2(join11(here, "..", "package.json"), "utf8"));
6433
6951
  return pkg.version ?? "0.0.0";
6434
6952
  } catch {
6435
6953
  return "0.0.0";
6436
6954
  }
6437
6955
  }
6438
6956
  var program = new Command();
6439
- program.name("kimiflare").description("Terminal coding agent powered by Kimi-K2.6 on Cloudflare Workers AI.").version(readPackageVersion()).option("-p, --print <prompt>", "one-shot mode: send prompt, stream reply to stdout, exit").option("-m, --model <id>", "model id (defaults to @cf/moonshotai/kimi-k2.6)").option("--dangerously-allow-all", "auto-approve every permission prompt (print mode only)").option("--reasoning", "include reasoning in stdout (print mode only)").parse();
6957
+ program.name("kimiflare").description("Terminal coding agent powered by Kimi-K2.6 on Cloudflare Workers AI.").version(readPackageVersion()).option("-p, --print <prompt>", "one-shot mode: send prompt, stream reply to stdout, exit").option("-m, --model <id>", "model id (defaults to @cf/moonshotai/kimi-k2.6)").option("--dangerously-allow-all", "auto-approve every permission prompt (print mode only)").option("--reasoning", "include reasoning in stdout (print mode only)").command("usage").description("show per-turn token usage log from ~/.kimiflare/usage.jsonl").action(async () => {
6958
+ const { showUsageLog: showUsageLog2 } = await Promise.resolve().then(() => (init_usage_cli(), usage_cli_exports));
6959
+ await showUsageLog2();
6960
+ });
6961
+ program.parse();
6440
6962
  var opts = program.opts();
6441
6963
  async function main() {
6442
6964
  const cfg = await loadConfig();
6443
6965
  const updateResult = await checkForUpdate();
6966
+ const args = program.args;
6967
+ if (args[0] === "usage") {
6968
+ return;
6969
+ }
6444
6970
  if (opts.print !== void 0) {
6445
6971
  if (!cfg) {
6446
6972
  console.error(
@@ -6484,8 +7010,11 @@ async function runPrintMode(opts2) {
6484
7010
  }
6485
7011
  const cwd = process.cwd();
6486
7012
  const executor = new ToolExecutor(ALL_TOOLS);
7013
+ const systemMessages = [
7014
+ { role: "system", content: buildSystemPrompt({ cwd, tools: ALL_TOOLS, model: opts2.model }) }
7015
+ ];
6487
7016
  const messages = [
6488
- { role: "system", content: buildSystemPrompt({ cwd, tools: ALL_TOOLS, model: opts2.model }) },
7017
+ ...systemMessages,
6489
7018
  { role: "user", content: opts2.prompt }
6490
7019
  ];
6491
7020
  const controller = new AbortController();
@@ -6501,6 +7030,7 @@ async function runPrintMode(opts2) {
6501
7030
  executor,
6502
7031
  cwd,
6503
7032
  signal: controller.signal,
7033
+ systemMessages,
6504
7034
  coauthor: opts2.coauthor !== false ? { name: opts2.coauthorName || "kimiflare", email: opts2.coauthorEmail || "kimiflare@proton.me" } : void 0,
6505
7035
  callbacks: {
6506
7036
  onReasoningDelta: opts2.showReasoning ? (delta) => {