kimiflare 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -41,6 +41,12 @@ async function loadConfig() {
41
41
  const compiledContext = envCompiled === "1" || envCompiled === "true" ? true : false;
42
42
  const envImageTurns = process.env.KIMIFLARE_IMAGE_HISTORY_TURNS;
43
43
  const imageHistoryTurns = envImageTurns ? parseInt(envImageTurns, 10) : void 0;
44
+ const envMaxToolIterations = process.env.KIMIFLARE_MAX_TOOL_ITERATIONS;
45
+ const maxToolIterations = envMaxToolIterations ? parseInt(envMaxToolIterations, 10) : void 0;
46
+ const envMaxInputTokens = process.env.KIMIFLARE_MAX_INPUT_TOKENS;
47
+ const maxInputTokens = envMaxInputTokens ? parseInt(envMaxInputTokens, 10) : void 0;
48
+ const envMaxCompletionTokens = process.env.KIMIFLARE_MAX_COMPLETION_TOKENS;
49
+ const maxCompletionTokens = envMaxCompletionTokens ? parseInt(envMaxCompletionTokens, 10) : void 0;
44
50
  if (envAccount && envToken) {
45
51
  return {
46
52
  accountId: envAccount,
@@ -53,7 +59,10 @@ async function loadConfig() {
53
59
  coauthorEmail: envCoauthor?.email,
54
60
  cacheStablePrompts,
55
61
  compiledContext,
56
- imageHistoryTurns: Number.isNaN(imageHistoryTurns) ? void 0 : imageHistoryTurns
62
+ imageHistoryTurns: Number.isNaN(imageHistoryTurns) ? void 0 : imageHistoryTurns,
63
+ maxToolIterations: Number.isNaN(maxToolIterations) ? void 0 : maxToolIterations,
64
+ maxInputTokens: Number.isNaN(maxInputTokens) ? void 0 : maxInputTokens,
65
+ maxCompletionTokens: Number.isNaN(maxCompletionTokens) ? void 0 : maxCompletionTokens
57
66
  };
58
67
  }
59
68
  try {
@@ -72,7 +81,10 @@ async function loadConfig() {
72
81
  mcpServers: parsed.mcpServers,
73
82
  cacheStablePrompts: parsed.cacheStablePrompts ?? cacheStablePrompts,
74
83
  compiledContext: parsed.compiledContext ?? compiledContext,
75
- imageHistoryTurns: Number.isNaN(imageHistoryTurns) ? parsed.imageHistoryTurns : imageHistoryTurns
84
+ imageHistoryTurns: Number.isNaN(imageHistoryTurns) ? parsed.imageHistoryTurns : imageHistoryTurns,
85
+ maxToolIterations: Number.isNaN(maxToolIterations) ? parsed.maxToolIterations : maxToolIterations,
86
+ maxInputTokens: Number.isNaN(maxInputTokens) ? parsed.maxInputTokens : maxInputTokens,
87
+ maxCompletionTokens: Number.isNaN(maxCompletionTokens) ? parsed.maxCompletionTokens : maxCompletionTokens
76
88
  };
77
89
  }
78
90
  } catch {
@@ -179,14 +191,14 @@ function stableStringify(value, replacer, space) {
179
191
  const sorted = sortKeys(value);
180
192
  return JSON.stringify(sorted, replacer, space);
181
193
  }
182
- function stripOldImages(messages, keepLastTurns) {
183
- if (keepLastTurns < 0) return messages;
194
+ function stripOldImages(messages, keepLastTurns2) {
195
+ if (keepLastTurns2 < 0) return messages;
184
196
  let userCount = 0;
185
197
  let cutoffIndex = messages.length;
186
198
  for (let i = messages.length - 1; i >= 0; i--) {
187
199
  if (messages[i].role === "user") {
188
200
  userCount++;
189
- if (userCount === keepLastTurns) {
201
+ if (userCount === keepLastTurns2) {
190
202
  cutoffIndex = i;
191
203
  break;
192
204
  }
@@ -227,7 +239,7 @@ async function* runKimi(opts2) {
227
239
  ...opts2.tools && opts2.tools.length ? { tools: opts2.tools, tool_choice: "auto", parallel_tool_calls: true } : {},
228
240
  stream: true,
229
241
  temperature: opts2.temperature ?? 0.2,
230
- max_completion_tokens: opts2.maxCompletionTokens ?? 16384
242
+ max_completion_tokens: opts2.maxCompletionTokens ?? 4096
231
243
  };
232
244
  if (opts2.reasoningEffort) {
233
245
  body.reasoning_effort = opts2.reasoningEffort;
@@ -417,7 +429,7 @@ var init_client = __esm({
417
429
  init_errors();
418
430
  init_messages();
419
431
  RETRYABLE_CODES = /* @__PURE__ */ new Set([3040]);
420
- MAX_ATTEMPTS = 5;
432
+ MAX_ATTEMPTS = 2;
421
433
  }
422
434
  });
423
435
 
@@ -597,6 +609,37 @@ async function logCostDebug(entry) {
597
609
  await rotateJsonl(debugPath(), RETENTION.costDebugMaxBytes, RETENTION.costDebugRotations);
598
610
  await appendFile(debugPath(), JSON.stringify(entry) + "\n", "utf8");
599
611
  }
612
+ function usageDir() {
613
+ return join3(homedir2(), ".kimiflare");
614
+ }
615
+ function usagePath() {
616
+ return join3(usageDir(), "usage.jsonl");
617
+ }
618
+ async function logTurnTokenMetrics(metrics) {
619
+ await mkdir2(usageDir(), { recursive: true });
620
+ await rotateJsonl(usagePath(), RETENTION.costDebugMaxBytes, RETENTION.costDebugRotations);
621
+ await appendFile(usagePath(), JSON.stringify(metrics) + "\n", "utf8");
622
+ }
623
+ function buildTurnTokenMetrics(sessionId, turn, breakdown, estimatedOutputTokenCap, wasCompacted, removedCount, exceedsLimit) {
624
+ return {
625
+ v: LOG_VERSION,
626
+ ts: now(),
627
+ sessionId,
628
+ turn,
629
+ estimatedInputTokens: breakdown.total,
630
+ estimatedOutputTokenCap,
631
+ messageCount: breakdown.messageCount,
632
+ toolOutputCount: breakdown.toolOutputCount,
633
+ tokensFromSystem: breakdown.fromSystem,
634
+ tokensFromSession: breakdown.fromSession,
635
+ tokensFromTools: breakdown.fromTools,
636
+ tokensFromHistory: breakdown.fromHistory,
637
+ tokensFromUserInput: breakdown.fromUserInput,
638
+ wasCompacted,
639
+ removedCount,
640
+ exceedsLimit
641
+ };
642
+ }
600
643
  function serializePrefix(messages) {
601
644
  let end = 0;
602
645
  while (end < messages.length && messages[end].role === "system") {
@@ -741,12 +784,282 @@ var init_strip_reasoning = __esm({
741
784
  }
742
785
  });
743
786
 
787
+ // src/agent/token-limits.ts
788
+ function loadSafetyLimits() {
789
+ return {
790
+ maxInputTokensPerRequest: parseIntEnv("KIMIFLARE_MAX_INPUT_TOKENS", DEFAULT_SAFETY_LIMITS.maxInputTokensPerRequest),
791
+ warningThreshold: parseIntEnv("KIMIFLARE_WARNING_TOKENS", DEFAULT_SAFETY_LIMITS.warningThreshold),
792
+ maxLlmCallsPerUserAction: parseIntEnv("KIMIFLARE_MAX_LLM_CALLS", DEFAULT_SAFETY_LIMITS.maxLlmCallsPerUserAction),
793
+ maxRetriesPerLlmCall: parseIntEnv("KIMIFLARE_MAX_RETRIES", DEFAULT_SAFETY_LIMITS.maxRetriesPerLlmCall),
794
+ maxCompletionTokens: parseIntEnv("KIMIFLARE_MAX_COMPLETION_TOKENS", DEFAULT_SAFETY_LIMITS.maxCompletionTokens),
795
+ maxToolIterations: parseIntEnv("KIMIFLARE_MAX_TOOL_ITERATIONS", DEFAULT_SAFETY_LIMITS.maxToolIterations),
796
+ maxRecentMessages: parseIntEnv("KIMIFLARE_MAX_RECENT_MESSAGES", DEFAULT_SAFETY_LIMITS.maxRecentMessages),
797
+ maxToolOutputChars: parseIntEnv("KIMIFLARE_MAX_TOOL_OUTPUT_CHARS", DEFAULT_SAFETY_LIMITS.maxToolOutputChars)
798
+ };
799
+ }
800
+ function parseIntEnv(name, fallback) {
801
+ const raw = process.env[name];
802
+ if (!raw) return fallback;
803
+ const n = parseInt(raw, 10);
804
+ return Number.isNaN(n) ? fallback : n;
805
+ }
806
+ function estimateTokens(text) {
807
+ return Math.ceil(text.length / 4);
808
+ }
809
+ function estimateMessageTokens(m) {
810
+ let chars = 0;
811
+ if (typeof m.content === "string") {
812
+ chars = m.content.length;
813
+ } else if (Array.isArray(m.content)) {
814
+ for (const part of m.content) {
815
+ if (part.type === "text") chars += part.text.length;
816
+ else if (part.type === "image_url") chars += 1e3;
817
+ }
818
+ }
819
+ if (m.reasoning_content) chars += m.reasoning_content.length;
820
+ if (m.tool_calls) {
821
+ for (const tc of m.tool_calls) {
822
+ chars += tc.function.name.length;
823
+ chars += tc.function.arguments.length;
824
+ }
825
+ }
826
+ return Math.ceil(chars / 4) + 4;
827
+ }
828
+ function estimateMessagesTokens(messages) {
829
+ return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
830
+ }
831
+ function estimateToolDefsTokens(tools) {
832
+ return estimateTokens(JSON.stringify(tools));
833
+ }
834
+ function breakdownTokens(systemMessages, sessionMessages, toolDefs, historyMessages, userMessage) {
835
+ const fromSystem = estimateMessagesTokens(systemMessages);
836
+ const fromSession = estimateMessagesTokens(sessionMessages);
837
+ const fromTools = estimateToolDefsTokens(toolDefs);
838
+ const fromHistory = estimateMessagesTokens(historyMessages);
839
+ const fromUserInput = userMessage ? estimateMessageTokens(userMessage) : 0;
840
+ return {
841
+ total: fromSystem + fromSession + fromTools + fromHistory + fromUserInput,
842
+ fromSystem,
843
+ fromSession,
844
+ fromTools,
845
+ fromHistory,
846
+ fromUserInput,
847
+ messageCount: systemMessages.length + sessionMessages.length + historyMessages.length + (userMessage ? 1 : 0),
848
+ toolOutputCount: historyMessages.filter((m) => m.role === "tool").length
849
+ };
850
+ }
851
+ function compactHistoryForSafety(messages, targetTokens) {
852
+ let compacted = messages.map((m) => {
853
+ if (m.role === "tool" && typeof m.content === "string" && m.content.length > 200) {
854
+ const lines = m.content.split("\n");
855
+ const firstLine2 = lines[0] ?? "";
856
+ const truncated = lines.length > 3 || m.content.length > 200;
857
+ return {
858
+ ...m,
859
+ content: `[${m.name ?? "tool"} result${truncated ? " (truncated)" : ""}] ${firstLine2.slice(0, 120)}`
860
+ };
861
+ }
862
+ return m;
863
+ });
864
+ let removedCount = 0;
865
+ while (estimateMessagesTokens(compacted) > targetTokens && compacted.length > 2) {
866
+ const dropIndex = compacted.findIndex((m, i) => i > 0 && m.role !== "system" && m.role !== "user");
867
+ if (dropIndex === -1) break;
868
+ compacted.splice(dropIndex, 1);
869
+ removedCount++;
870
+ }
871
+ return { messages: compacted, removedCount };
872
+ }
873
+ var DEFAULT_SAFETY_LIMITS;
874
+ var init_token_limits = __esm({
875
+ "src/agent/token-limits.ts"() {
876
+ "use strict";
877
+ DEFAULT_SAFETY_LIMITS = {
878
+ maxInputTokensPerRequest: 3e4,
879
+ warningThreshold: 15e3,
880
+ maxLlmCallsPerUserAction: 10,
881
+ maxRetriesPerLlmCall: 2,
882
+ maxCompletionTokens: 4096,
883
+ maxToolIterations: 10,
884
+ maxRecentMessages: 4,
885
+ maxToolOutputChars: 800
886
+ };
887
+ }
888
+ });
889
+
890
+ // src/agent/tool-output-summarizer.ts
891
+ import { createHash } from "crypto";
892
+ function normalizeForHash(text) {
893
+ return text.toLowerCase().replace(/\s+/g, " ").trim().slice(0, 5e3);
894
+ }
895
+ function stableHash(text) {
896
+ return createHash("sha256").update(text).digest("hex").slice(0, 16);
897
+ }
898
+ function clearOutputHashCache() {
899
+ outputHashCache.clear();
900
+ }
901
+ function summarizeToolOutput(toolCallId, name, rawContent, maxChars = DEFAULT_MAX_CHARS) {
902
+ const normalized = normalizeForHash(rawContent);
903
+ const hash = stableHash(normalized);
904
+ const cached = outputHashCache.get(hash);
905
+ if (cached && cached.firstSeenId !== toolCallId) {
906
+ const ref = `same as previous ${cached.name ?? "tool"} call (result_id=${hash})`;
907
+ return {
908
+ tool_call_id: toolCallId,
909
+ name,
910
+ content: ref,
911
+ truncated: false
912
+ };
913
+ }
914
+ if (!cached) {
915
+ const preview = rawContent.slice(0, 120).replace(/\s+/g, " ");
916
+ outputHashCache.set(hash, { name, firstSeenId: toolCallId, preview });
917
+ }
918
+ const isFailure = rawContent.startsWith("Error:") || rawContent.startsWith("error:") || rawContent.includes("exit code") || rawContent.includes("not found") || rawContent.includes("No such file");
919
+ const isNoisy = rawContent.length > 0 && (rawContent.split("\n").length > 100 || rawContent.length > maxChars * 2);
920
+ if (isFailure && rawContent.length > 200) {
921
+ const firstLine2 = rawContent.split("\n")[0] ?? "";
922
+ return {
923
+ tool_call_id: toolCallId,
924
+ name,
925
+ content: `[${name ?? "tool"} failed] ${firstLine2.slice(0, 160)}`,
926
+ truncated: true
927
+ };
928
+ }
929
+ if (rawContent.length <= maxChars) {
930
+ return {
931
+ tool_call_id: toolCallId,
932
+ name,
933
+ content: rawContent,
934
+ truncated: false
935
+ };
936
+ }
937
+ const truncated = rawContent.slice(0, maxChars);
938
+ const lastNewline = truncated.lastIndexOf("\n");
939
+ const clean = lastNewline > maxChars * 0.5 ? truncated.slice(0, lastNewline) : truncated;
940
+ return {
941
+ tool_call_id: toolCallId,
942
+ name,
943
+ content: `${clean}
944
+ ... (${rawContent.length - clean.length} more chars truncated)`,
945
+ truncated: true
946
+ };
947
+ }
948
+ function summarizeToolMessage(msg, maxChars = DEFAULT_MAX_CHARS) {
949
+ if (msg.role !== "tool" || typeof msg.content !== "string") {
950
+ return msg;
951
+ }
952
+ const summary = summarizeToolOutput(msg.tool_call_id ?? "", msg.name, msg.content, maxChars);
953
+ return {
954
+ ...msg,
955
+ content: summary.content
956
+ };
957
+ }
958
+ function summarizeToolMessages(messages, maxChars = DEFAULT_MAX_CHARS) {
959
+ return messages.map((m) => m.role === "tool" ? summarizeToolMessage(m, maxChars) : m);
960
+ }
961
+ var DEFAULT_MAX_CHARS, outputHashCache;
962
+ var init_tool_output_summarizer = __esm({
963
+ "src/agent/tool-output-summarizer.ts"() {
964
+ "use strict";
965
+ DEFAULT_MAX_CHARS = 800;
966
+ outputHashCache = /* @__PURE__ */ new Map();
967
+ }
968
+ });
969
+
970
+ // src/agent/context-builder.ts
971
+ function buildContext(opts2) {
972
+ const { allMessages, systemMessages, sessionMessages, toolDefs, limits, currentUserMessage } = opts2;
973
+ const prefixLength = systemMessages.length + sessionMessages.length;
974
+ let history = allMessages.slice(prefixLength);
975
+ history = summarizeToolMessages(history, limits.maxToolOutputChars);
976
+ const recentHistory = keepLastTurns(history, limits.maxRecentMessages);
977
+ const contextMessages = [
978
+ ...systemMessages,
979
+ ...sessionMessages,
980
+ ...recentHistory
981
+ ];
982
+ if (currentUserMessage) {
983
+ contextMessages.push(currentUserMessage);
984
+ }
985
+ let breakdown = breakdownTokens(
986
+ systemMessages,
987
+ sessionMessages,
988
+ toolDefs,
989
+ recentHistory,
990
+ currentUserMessage ?? null
991
+ );
992
+ let wasCompacted = false;
993
+ let removedCount = 0;
994
+ if (breakdown.total > limits.maxInputTokensPerRequest) {
995
+ const target = limits.maxInputTokensPerRequest;
996
+ const compacted = compactHistoryForSafety(recentHistory, target - breakdown.fromSystem - breakdown.fromSession - breakdown.fromTools - breakdown.fromUserInput);
997
+ if (compacted.removedCount > 0) {
998
+ wasCompacted = true;
999
+ removedCount = compacted.removedCount;
1000
+ const newContext = [
1001
+ ...systemMessages,
1002
+ ...sessionMessages,
1003
+ ...compacted.messages
1004
+ ];
1005
+ if (currentUserMessage) {
1006
+ newContext.push(currentUserMessage);
1007
+ }
1008
+ breakdown = breakdownTokens(
1009
+ systemMessages,
1010
+ sessionMessages,
1011
+ toolDefs,
1012
+ compacted.messages,
1013
+ currentUserMessage ?? null
1014
+ );
1015
+ return {
1016
+ messages: newContext,
1017
+ breakdown,
1018
+ wasCompacted,
1019
+ removedCount,
1020
+ exceedsLimit: breakdown.total > limits.maxInputTokensPerRequest
1021
+ };
1022
+ }
1023
+ }
1024
+ return {
1025
+ messages: contextMessages,
1026
+ breakdown,
1027
+ wasCompacted,
1028
+ removedCount,
1029
+ exceedsLimit: breakdown.total > limits.maxInputTokensPerRequest
1030
+ };
1031
+ }
1032
+ function keepLastTurns(messages, maxTurns) {
1033
+ if (maxTurns <= 0) return [];
1034
+ const turnStarts = [];
1035
+ for (let i = 0; i < messages.length; i++) {
1036
+ if (messages[i].role === "user") {
1037
+ turnStarts.push(i);
1038
+ }
1039
+ }
1040
+ if (turnStarts.length <= maxTurns) {
1041
+ return messages;
1042
+ }
1043
+ const startIndex = turnStarts[turnStarts.length - maxTurns] ?? 0;
1044
+ return messages.slice(startIndex);
1045
+ }
1046
+ var init_context_builder = __esm({
1047
+ "src/agent/context-builder.ts"() {
1048
+ "use strict";
1049
+ init_token_limits();
1050
+ init_tool_output_summarizer();
1051
+ }
1052
+ });
1053
+
744
1054
  // src/agent/loop.ts
745
1055
  async function runAgentTurn(opts2) {
746
- const max = opts2.maxToolIterations ?? 50;
1056
+ const limits = loadSafetyLimits();
1057
+ const max = opts2.maxToolIterations ?? limits.maxToolIterations;
747
1058
  const toolDefs = toOpenAIToolDefs(opts2.tools);
748
1059
  let turn = 0;
749
1060
  let lastUsage = null;
1061
+ const systemMessages = opts2.systemMessages ?? extractSystemMessages(opts2.messages);
1062
+ const sessionMessages = opts2.sessionMessages ?? [];
750
1063
  for (let iter = 0; iter < max; iter++) {
751
1064
  turn++;
752
1065
  const previousMessages = opts2.messages.slice();
@@ -766,8 +1079,8 @@ async function runAgentTurn(opts2) {
766
1079
  keepLast: Number.isNaN(keepLast) ? 1 : keepLast
767
1080
  });
768
1081
  if (shadowStrip) {
769
- const originalSections = analyzePrompt(opts2.messages);
770
- const strippedSections = analyzePrompt(stripped);
1082
+ const originalSections = analyzePromptSections(opts2.messages);
1083
+ const strippedSections = analyzePromptSections(stripped);
771
1084
  const originalApproxTokens = originalSections.reduce(
772
1085
  (sum, s) => sum + s.approxTokens,
773
1086
  0
@@ -791,15 +1104,46 @@ async function runAgentTurn(opts2) {
791
1104
  if (opts2.keepLastImageTurns !== void 0) {
792
1105
  apiMessages = stripOldImages(apiMessages, opts2.keepLastImageTurns);
793
1106
  }
1107
+ const currentUserMessage = findCurrentUserMessage(apiMessages);
1108
+ const context = buildContext({
1109
+ allMessages: apiMessages,
1110
+ systemMessages,
1111
+ sessionMessages,
1112
+ toolDefs,
1113
+ limits,
1114
+ currentUserMessage
1115
+ });
1116
+ if (opts2.sessionId) {
1117
+ void logTurnTokenMetrics(
1118
+ buildTurnTokenMetrics(
1119
+ opts2.sessionId,
1120
+ turn,
1121
+ context.breakdown,
1122
+ opts2.maxCompletionTokens ?? limits.maxCompletionTokens,
1123
+ context.wasCompacted,
1124
+ context.removedCount,
1125
+ context.exceedsLimit
1126
+ )
1127
+ );
1128
+ }
1129
+ if (context.exceedsLimit) {
1130
+ const assistantMsg3 = {
1131
+ role: "assistant",
1132
+ content: `I cannot continue: the conversation context exceeds the safety limit of ${limits.maxInputTokensPerRequest} tokens. Try running /compact or /clear to reduce context size.`
1133
+ };
1134
+ opts2.messages.push(assistantMsg3);
1135
+ opts2.callbacks.onAssistantFinal?.(assistantMsg3);
1136
+ return;
1137
+ }
794
1138
  const events = runKimi({
795
1139
  accountId: opts2.accountId,
796
1140
  apiToken: opts2.apiToken,
797
1141
  model: opts2.model,
798
- messages: apiMessages,
1142
+ messages: context.messages,
799
1143
  tools: toolDefs,
800
1144
  signal: opts2.signal,
801
1145
  temperature: opts2.temperature,
802
- maxCompletionTokens: opts2.maxCompletionTokens,
1146
+ maxCompletionTokens: opts2.maxCompletionTokens ?? limits.maxCompletionTokens,
803
1147
  reasoningEffort: opts2.reasoningEffort,
804
1148
  sessionId: opts2.sessionId
805
1149
  });
@@ -838,7 +1182,7 @@ async function runAgentTurn(opts2) {
838
1182
  break;
839
1183
  }
840
1184
  }
841
- const assistantMsg = {
1185
+ const assistantMsg2 = {
842
1186
  role: "assistant",
843
1187
  content: content ? sanitizeString(content) : null,
844
1188
  ...reasoning ? { reasoning_content: sanitizeString(reasoning) } : {},
@@ -852,8 +1196,8 @@ async function runAgentTurn(opts2) {
852
1196
  }))
853
1197
  } : {}
854
1198
  };
855
- opts2.messages.push(assistantMsg);
856
- opts2.callbacks.onAssistantFinal?.(assistantMsg);
1199
+ opts2.messages.push(assistantMsg2);
1200
+ opts2.callbacks.onAssistantFinal?.(assistantMsg2);
857
1201
  if (toolCalls.length === 0) {
858
1202
  if (opts2.sessionId && lastUsage) {
859
1203
  void logTurnDebug({
@@ -896,7 +1240,36 @@ async function runAgentTurn(opts2) {
896
1240
  });
897
1241
  }
898
1242
  }
899
- throw new Error(`kimiflare: tool iteration limit reached (${opts2.maxToolIterations ?? 50})`);
1243
+ const remaining = toolCallsFromMessages(opts2.messages);
1244
+ const assistantMsg = {
1245
+ role: "assistant",
1246
+ content: `I reached the tool iteration limit (${max}). There ${remaining === 1 ? "is" : "are"} ${remaining} pending tool call${remaining === 1 ? "" : "s"} that could not be executed. Run /compact or /clear to reset context, or rephrase your request.`
1247
+ };
1248
+ opts2.messages.push(assistantMsg);
1249
+ opts2.callbacks.onAssistantFinal?.(assistantMsg);
1250
+ }
1251
+ function extractSystemMessages(messages) {
1252
+ const end = messages.findIndex((m) => m.role !== "system");
1253
+ return end === -1 ? messages.slice() : messages.slice(0, end);
1254
+ }
1255
+ function findCurrentUserMessage(messages) {
1256
+ const prefixEnd = messages.findIndex((m) => m.role !== "system");
1257
+ const history = prefixEnd === -1 ? [] : messages.slice(prefixEnd);
1258
+ for (let i = history.length - 1; i >= 0; i--) {
1259
+ if (history[i].role === "user") {
1260
+ return history[i];
1261
+ }
1262
+ }
1263
+ return null;
1264
+ }
1265
+ function toolCallsFromMessages(messages) {
1266
+ let count = 0;
1267
+ for (const m of messages) {
1268
+ if (m.role === "assistant" && m.tool_calls) {
1269
+ count += m.tool_calls.length;
1270
+ }
1271
+ }
1272
+ return count;
900
1273
  }
901
1274
  function validateToolArguments(raw) {
902
1275
  if (!raw || !raw.trim()) return "{}";
@@ -907,6 +1280,25 @@ function validateToolArguments(raw) {
907
1280
  return "{}";
908
1281
  }
909
1282
  }
1283
+ function analyzePromptSections(messages) {
1284
+ return messages.map((m) => {
1285
+ let chars = 0;
1286
+ if (typeof m.content === "string") {
1287
+ chars = m.content.length;
1288
+ } else if (Array.isArray(m.content)) {
1289
+ for (const p of m.content) {
1290
+ if (p.type === "text") chars += p.text.length;
1291
+ }
1292
+ }
1293
+ if (m.reasoning_content) chars += m.reasoning_content.length;
1294
+ if (m.tool_calls) {
1295
+ for (const tc of m.tool_calls) {
1296
+ chars += tc.function.name.length + tc.function.arguments.length;
1297
+ }
1298
+ }
1299
+ return { role: m.role, chars, approxTokens: Math.ceil(chars / 4) };
1300
+ });
1301
+ }
910
1302
  var init_loop = __esm({
911
1303
  "src/agent/loop.ts"() {
912
1304
  "use strict";
@@ -915,6 +1307,8 @@ var init_loop = __esm({
915
1307
  init_messages();
916
1308
  init_cost_debug();
917
1309
  init_strip_reasoning();
1310
+ init_token_limits();
1311
+ init_context_builder();
918
1312
  }
919
1313
  });
920
1314
 
@@ -2399,6 +2793,77 @@ var init_update_check = __esm({
2399
2793
  }
2400
2794
  });
2401
2795
 
2796
+ // src/usage-cli.ts
2797
+ var usage_cli_exports = {};
2798
+ __export(usage_cli_exports, {
2799
+ showUsageLog: () => showUsageLog
2800
+ });
2801
+ import { readFile as readFile7 } from "fs/promises";
2802
+ import { homedir as homedir6 } from "os";
2803
+ import { join as join7 } from "path";
2804
+ function usagePath2() {
2805
+ return join7(homedir6(), ".kimiflare", "usage.jsonl");
2806
+ }
2807
+ function fmt(n) {
2808
+ return n.toLocaleString();
2809
+ }
2810
+ async function showUsageLog() {
2811
+ const path = usagePath2();
2812
+ let raw;
2813
+ try {
2814
+ raw = await readFile7(path, "utf8");
2815
+ } catch {
2816
+ console.log("No usage log found at " + path);
2817
+ return;
2818
+ }
2819
+ const lines = raw.trim().split("\n").filter(Boolean);
2820
+ if (lines.length === 0) {
2821
+ console.log("Usage log is empty.");
2822
+ return;
2823
+ }
2824
+ const entries = [];
2825
+ for (const line of lines) {
2826
+ try {
2827
+ entries.push(JSON.parse(line));
2828
+ } catch {
2829
+ }
2830
+ }
2831
+ if (entries.length === 0) {
2832
+ console.log("No valid entries in usage log.");
2833
+ return;
2834
+ }
2835
+ const bySession = /* @__PURE__ */ new Map();
2836
+ for (const e of entries) {
2837
+ const arr = bySession.get(e.sessionId) ?? [];
2838
+ arr.push(e);
2839
+ bySession.set(e.sessionId, arr);
2840
+ }
2841
+ console.log(`Usage log: ${lines.length} entries, ${bySession.size} session(s)
2842
+ `);
2843
+ for (const [sessionId, sessEntries] of bySession) {
2844
+ const last = sessEntries[sessEntries.length - 1];
2845
+ const totalInput = sessEntries.reduce((s, e) => s + e.estimatedInputTokens, 0);
2846
+ const avgInput = Math.round(totalInput / sessEntries.length);
2847
+ console.log(`Session: ${sessionId.slice(0, 16)}\u2026 Turns: ${sessEntries.length}`);
2848
+ console.log(` Last turn: ${fmt(last.estimatedInputTokens)} input tokens / ${fmt(last.estimatedOutputTokenCap)} output cap`);
2849
+ console.log(` Avg input: ${fmt(avgInput)} tokens`);
2850
+ console.log(` Messages: ${last.messageCount} | Tool outputs: ${last.toolOutputCount}`);
2851
+ console.log(` Breakdown: system=${fmt(last.tokensFromSystem)} session=${fmt(last.tokensFromSession)} tools=${fmt(last.tokensFromTools)} history=${fmt(last.tokensFromHistory)} user=${fmt(last.tokensFromUserInput)}`);
2852
+ if (last.wasCompacted) {
2853
+ console.log(` \u26A0\uFE0F Compacted: removed ${last.removedCount} messages`);
2854
+ }
2855
+ if (last.exceedsLimit) {
2856
+ console.log(` \u274C EXCEEDS LIMIT`);
2857
+ }
2858
+ console.log("");
2859
+ }
2860
+ }
2861
+ var init_usage_cli = __esm({
2862
+ "src/usage-cli.ts"() {
2863
+ "use strict";
2864
+ }
2865
+ });
2866
+
2402
2867
  // src/agent/compact.ts
2403
2868
  function indexOfNthUserFromEnd(messages, n) {
2404
2869
  let seen = 0;
@@ -2605,7 +3070,7 @@ var init_session_state = __esm({
2605
3070
  function approxTokens2(n) {
2606
3071
  return Math.round(n / 4);
2607
3072
  }
2608
- function estimateMessageTokens(m) {
3073
+ function estimateMessageTokens2(m) {
2609
3074
  let chars = 0;
2610
3075
  if (typeof m.content === "string") {
2611
3076
  chars = m.content.length;
@@ -2621,7 +3086,7 @@ function estimateMessageTokens(m) {
2621
3086
  return approxTokens2(chars);
2622
3087
  }
2623
3088
  function estimatePromptTokens(messages) {
2624
- return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
3089
+ return messages.reduce((sum, m) => sum + estimateMessageTokens2(m), 0);
2625
3090
  }
2626
3091
  function groupIntoTurns(messages) {
2627
3092
  const prefix = [];
@@ -2801,10 +3266,10 @@ function shouldCompact(opts2) {
2801
3266
  return tokens > tokenThreshold || turns.length > turnThreshold;
2802
3267
  }
2803
3268
  function compactMessages2(opts2) {
2804
- const keepLastTurns = opts2.keepLastTurns ?? 4;
3269
+ const keepLastTurns2 = opts2.keepLastTurns ?? 4;
2805
3270
  const { prefix, turns } = groupIntoTurns(opts2.messages);
2806
3271
  const tokensBefore = estimatePromptTokens(opts2.messages);
2807
- if (turns.length <= keepLastTurns) {
3272
+ if (turns.length <= keepLastTurns2) {
2808
3273
  return {
2809
3274
  newMessages: opts2.messages,
2810
3275
  newState: opts2.state,
@@ -2818,8 +3283,8 @@ function compactMessages2(opts2) {
2818
3283
  }
2819
3284
  };
2820
3285
  }
2821
- const toCompact = turns.slice(0, turns.length - keepLastTurns);
2822
- const toKeep = turns.slice(turns.length - keepLastTurns);
3286
+ const toCompact = turns.slice(0, turns.length - keepLastTurns2);
3287
+ const toKeep = turns.slice(turns.length - keepLastTurns2);
2823
3288
  let newState = { ...opts2.state };
2824
3289
  let archivedCount = 0;
2825
3290
  for (let i = 0; i < toCompact.length; i++) {
@@ -4850,12 +5315,12 @@ __export(sessions_exports, {
4850
5315
  pruneSessions: () => pruneSessions,
4851
5316
  saveSession: () => saveSession
4852
5317
  });
4853
- import { readFile as readFile7, writeFile as writeFile5, mkdir as mkdir5, readdir as readdir2, stat as stat3 } from "fs/promises";
4854
- import { homedir as homedir6 } from "os";
4855
- import { join as join7 } from "path";
5318
+ import { readFile as readFile8, writeFile as writeFile5, mkdir as mkdir5, readdir as readdir2, stat as stat3 } from "fs/promises";
5319
+ import { homedir as homedir7 } from "os";
5320
+ import { join as join8 } from "path";
4856
5321
  function sessionsDir() {
4857
- const xdg = process.env.XDG_DATA_HOME || join7(homedir6(), ".local", "share");
4858
- return join7(xdg, "kimiflare", "sessions");
5322
+ const xdg = process.env.XDG_DATA_HOME || join8(homedir7(), ".local", "share");
5323
+ return join8(xdg, "kimiflare", "sessions");
4859
5324
  }
4860
5325
  function sanitize(text) {
4861
5326
  return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 40);
@@ -4868,7 +5333,7 @@ function makeSessionId(firstPrompt) {
4868
5333
  async function saveSession(file) {
4869
5334
  const dir = sessionsDir();
4870
5335
  await mkdir5(dir, { recursive: true });
4871
- const path = join7(dir, `${file.id}.json`);
5336
+ const path = join8(dir, `${file.id}.json`);
4872
5337
  await writeFile5(path, JSON.stringify(file, null, 2), "utf8");
4873
5338
  return path;
4874
5339
  }
@@ -4888,9 +5353,9 @@ async function listSessions(limit = 30) {
4888
5353
  const summaries = [];
4889
5354
  for (const name of entries) {
4890
5355
  if (!name.endsWith(".json")) continue;
4891
- const path = join7(dir, name);
5356
+ const path = join8(dir, name);
4892
5357
  try {
4893
- const [s, raw] = await Promise.all([stat3(path), readFile7(path, "utf8")]);
5358
+ const [s, raw] = await Promise.all([stat3(path), readFile8(path, "utf8")]);
4894
5359
  const parsed = JSON.parse(raw);
4895
5360
  const firstUser = parsed.messages.find((m) => m.role === "user");
4896
5361
  const firstPrompt = typeof firstUser?.content === "string" ? firstUser.content : firstUser?.content ? firstUser.content.find((p) => p.type === "text")?.text ?? "(no prompt)" : "(no prompt)";
@@ -4909,7 +5374,7 @@ async function listSessions(limit = 30) {
4909
5374
  return summaries.slice(0, limit);
4910
5375
  }
4911
5376
  async function loadSession(filePath) {
4912
- const raw = await readFile7(filePath, "utf8");
5377
+ const raw = await readFile8(filePath, "utf8");
4913
5378
  return JSON.parse(raw);
4914
5379
  }
4915
5380
  var init_sessions = __esm({
@@ -4920,10 +5385,10 @@ var init_sessions = __esm({
4920
5385
  });
4921
5386
 
4922
5387
  // src/util/image.ts
4923
- import { readFile as readFile8 } from "fs/promises";
5388
+ import { readFile as readFile9 } from "fs/promises";
4924
5389
  import { basename as basename2 } from "path";
4925
5390
  async function encodeImageFile(filePath) {
4926
- const buf = await readFile8(filePath);
5391
+ const buf = await readFile9(filePath);
4927
5392
  if (buf.byteLength > MAX_IMAGE_BYTES) {
4928
5393
  throw new Error(
4929
5394
  `image too large (${(buf.byteLength / 1024 / 1024).toFixed(1)} MB); max is ${MAX_IMAGE_BYTES / 1024 / 1024} MB`
@@ -4959,15 +5424,15 @@ var init_image = __esm({
4959
5424
  });
4960
5425
 
4961
5426
  // src/usage-tracker.ts
4962
- import { readFile as readFile9, writeFile as writeFile6, mkdir as mkdir6 } from "fs/promises";
4963
- import { homedir as homedir7 } from "os";
4964
- import { join as join8 } from "path";
4965
- function usageDir() {
4966
- const xdg = process.env.XDG_DATA_HOME || join8(homedir7(), ".local", "share");
4967
- return join8(xdg, "kimiflare");
5427
+ import { readFile as readFile10, writeFile as writeFile6, mkdir as mkdir6 } from "fs/promises";
5428
+ import { homedir as homedir8 } from "os";
5429
+ import { join as join9 } from "path";
5430
+ function usageDir2() {
5431
+ const xdg = process.env.XDG_DATA_HOME || join9(homedir8(), ".local", "share");
5432
+ return join9(xdg, "kimiflare");
4968
5433
  }
4969
- function usagePath() {
4970
- return join8(usageDir(), "usage.json");
5434
+ function usagePath3() {
5435
+ return join9(usageDir2(), "usage.json");
4971
5436
  }
4972
5437
  function today() {
4973
5438
  return (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
@@ -4978,7 +5443,7 @@ function cutoffDate(daysBack) {
4978
5443
  }
4979
5444
  async function loadLog() {
4980
5445
  try {
4981
- const raw = await readFile9(usagePath(), "utf8");
5446
+ const raw = await readFile10(usagePath3(), "utf8");
4982
5447
  const parsed = JSON.parse(raw);
4983
5448
  if (parsed.version === LOG_VERSION2) return parsed;
4984
5449
  } catch {
@@ -4986,8 +5451,8 @@ async function loadLog() {
4986
5451
  return { version: LOG_VERSION2, days: [], sessions: [] };
4987
5452
  }
4988
5453
  async function saveLog(log) {
4989
- await mkdir6(usageDir(), { recursive: true });
4990
- await writeFile6(usagePath(), JSON.stringify(log, null, 2), "utf8");
5454
+ await mkdir6(usageDir2(), { recursive: true });
5455
+ await writeFile6(usagePath3(), JSON.stringify(log, null, 2), "utf8");
4991
5456
  }
4992
5457
  function getOrCreateDay(log, date) {
4993
5458
  let day = log.days.find((d) => d.date === date);
@@ -5104,7 +5569,7 @@ __export(app_exports, {
5104
5569
  import { useState as useState6, useRef as useRef3, useEffect as useEffect4, useCallback } from "react";
5105
5570
  import { Box as Box12, Text as Text13, useApp, useInput as useInput2, render } from "ink";
5106
5571
  import { existsSync } from "fs";
5107
- import { join as join9 } from "path";
5572
+ import { join as join10 } from "path";
5108
5573
  import { unlink as unlink2 } from "fs/promises";
5109
5574
  import { jsx as jsx13, jsxs as jsxs12 } from "react/jsx-runtime";
5110
5575
  function capEvents(prev) {
@@ -5170,9 +5635,10 @@ function App({ initialCfg, initialUpdateResult }) {
5170
5635
  const [hasUpdate, setHasUpdate] = useState6(initialUpdateResult?.hasUpdate ?? false);
5171
5636
  const [latestVersion, setLatestVersion] = useState6(initialUpdateResult?.latestVersion ?? null);
5172
5637
  const cacheStableRef = useRef3(initialCfg?.cacheStablePrompts !== false);
5173
- const messagesRef = useRef3(
5638
+ const systemMessagesRef = useRef3(
5174
5639
  makePrefixMessages(cacheStableRef.current, cfg?.model ?? DEFAULT_MODEL, "edit", ALL_TOOLS)
5175
5640
  );
5641
+ const messagesRef = useRef3(systemMessagesRef.current.slice());
5176
5642
  const executorRef = useRef3(new ToolExecutor(ALL_TOOLS));
5177
5643
  const activeAsstIdRef = useRef3(null);
5178
5644
  const activeControllerRef = useRef3(null);
@@ -5257,7 +5723,7 @@ function App({ initialCfg, initialUpdateResult }) {
5257
5723
  useEffect4(() => {
5258
5724
  modeRef.current = mode;
5259
5725
  if (cacheStableRef.current) {
5260
- messagesRef.current[1] = {
5726
+ const sessionMsg = {
5261
5727
  role: "system",
5262
5728
  content: buildSessionPrefix({
5263
5729
  cwd: process.cwd(),
@@ -5266,8 +5732,10 @@ function App({ initialCfg, initialUpdateResult }) {
5266
5732
  mode
5267
5733
  })
5268
5734
  };
5735
+ messagesRef.current[1] = sessionMsg;
5736
+ systemMessagesRef.current[1] = sessionMsg;
5269
5737
  } else {
5270
- messagesRef.current[0] = {
5738
+ const sysMsg = {
5271
5739
  role: "system",
5272
5740
  content: buildSystemPrompt({
5273
5741
  cwd: process.cwd(),
@@ -5276,6 +5744,8 @@ function App({ initialCfg, initialUpdateResult }) {
5276
5744
  mode
5277
5745
  })
5278
5746
  };
5747
+ messagesRef.current[0] = sysMsg;
5748
+ systemMessagesRef.current[0] = sysMsg;
5279
5749
  }
5280
5750
  if (mode === "plan") {
5281
5751
  executorRef.current.clearSessionPermissions();
@@ -5350,7 +5820,7 @@ function App({ initialCfg, initialUpdateResult }) {
5350
5820
  }
5351
5821
  if (totalTools > 0) {
5352
5822
  if (cacheStableRef.current) {
5353
- messagesRef.current[1] = {
5823
+ const sessionMsg = {
5354
5824
  role: "system",
5355
5825
  content: buildSessionPrefix({
5356
5826
  cwd: process.cwd(),
@@ -5359,8 +5829,10 @@ function App({ initialCfg, initialUpdateResult }) {
5359
5829
  mode: modeRef.current
5360
5830
  })
5361
5831
  };
5832
+ messagesRef.current[1] = sessionMsg;
5833
+ systemMessagesRef.current[1] = sessionMsg;
5362
5834
  } else {
5363
- messagesRef.current[0] = {
5835
+ const sysMsg = {
5364
5836
  role: "system",
5365
5837
  content: buildSystemPrompt({
5366
5838
  cwd: process.cwd(),
@@ -5369,6 +5841,8 @@ function App({ initialCfg, initialUpdateResult }) {
5369
5841
  mode: modeRef.current
5370
5842
  })
5371
5843
  };
5844
+ messagesRef.current[0] = sysMsg;
5845
+ systemMessagesRef.current[0] = sysMsg;
5372
5846
  }
5373
5847
  setEvents((e) => [
5374
5848
  ...e,
@@ -5545,13 +6019,13 @@ function App({ initialCfg, initialUpdateResult }) {
5545
6019
  }
5546
6020
  const cwd = process.cwd();
5547
6021
  for (const name of ["KIMI.md", "KIMIFLARE.md", "AGENT.md"]) {
5548
- if (existsSync(join9(cwd, name))) {
6022
+ if (existsSync(join10(cwd, name))) {
5549
6023
  setEvents((e) => [
5550
6024
  ...e,
5551
6025
  {
5552
6026
  kind: "info",
5553
6027
  key: mkKey(),
5554
- text: `${name} already exists at ${join9(cwd, name)} \u2014 delete it first if you want to regenerate`
6028
+ text: `${name} already exists at ${join10(cwd, name)} \u2014 delete it first if you want to regenerate`
5555
6029
  }
5556
6030
  ]);
5557
6031
  return;
@@ -5591,6 +6065,7 @@ function App({ initialCfg, initialUpdateResult }) {
5591
6065
  reasoningEffort: effortRef.current,
5592
6066
  coauthor: cfg.coauthor !== false ? { name: cfg.coauthorName || "kimiflare", email: cfg.coauthorEmail || "kimiflare@proton.me" } : void 0,
5593
6067
  sessionId: ensureSessionId(),
6068
+ systemMessages: systemMessagesRef.current,
5594
6069
  callbacks: {
5595
6070
  onAssistantStart: () => {
5596
6071
  const id = nextAssistantId++;
@@ -5668,9 +6143,9 @@ function App({ initialCfg, initialUpdateResult }) {
5668
6143
  })
5669
6144
  }
5670
6145
  });
5671
- if (existsSync(join9(cwd, "KIMI.md"))) {
6146
+ if (existsSync(join10(cwd, "KIMI.md"))) {
5672
6147
  if (cacheStableRef.current) {
5673
- messagesRef.current[1] = {
6148
+ const sessionMsg = {
5674
6149
  role: "system",
5675
6150
  content: buildSessionPrefix({
5676
6151
  cwd,
@@ -5679,8 +6154,10 @@ function App({ initialCfg, initialUpdateResult }) {
5679
6154
  mode: modeRef.current
5680
6155
  })
5681
6156
  };
6157
+ messagesRef.current[1] = sessionMsg;
6158
+ systemMessagesRef.current[1] = sessionMsg;
5682
6159
  } else {
5683
- messagesRef.current[0] = {
6160
+ const sysMsg = {
5684
6161
  role: "system",
5685
6162
  content: buildSystemPrompt({
5686
6163
  cwd,
@@ -5689,6 +6166,8 @@ function App({ initialCfg, initialUpdateResult }) {
5689
6166
  mode: modeRef.current
5690
6167
  })
5691
6168
  };
6169
+ messagesRef.current[0] = sysMsg;
6170
+ systemMessagesRef.current[0] = sysMsg;
5692
6171
  }
5693
6172
  setEvents((e) => [
5694
6173
  ...e,
@@ -5715,7 +6194,10 @@ function App({ initialCfg, initialUpdateResult }) {
5715
6194
  if (!picked) return;
5716
6195
  try {
5717
6196
  const file = await loadSession(picked.filePath);
6197
+ const prefixEnd = file.messages.findIndex((m) => m.role !== "system");
6198
+ systemMessagesRef.current = prefixEnd === -1 ? file.messages.slice() : file.messages.slice(0, prefixEnd);
5718
6199
  messagesRef.current = file.messages;
6200
+ clearOutputHashCache();
5719
6201
  sessionIdRef.current = file.id;
5720
6202
  if (file.sessionState && compiledContextRef.current) {
5721
6203
  sessionStateRef.current = file.sessionState;
@@ -5778,10 +6260,12 @@ function App({ initialCfg, initialUpdateResult }) {
5778
6260
  }
5779
6261
  if (c === "/clear") {
5780
6262
  if (cacheStableRef.current && messagesRef.current.length >= 2) {
5781
- messagesRef.current = [messagesRef.current[0], messagesRef.current[1]];
6263
+ systemMessagesRef.current = [messagesRef.current[0], messagesRef.current[1]];
5782
6264
  } else {
5783
- messagesRef.current = [messagesRef.current[0]];
6265
+ systemMessagesRef.current = [messagesRef.current[0]];
5784
6266
  }
6267
+ messagesRef.current = systemMessagesRef.current.slice();
6268
+ clearOutputHashCache();
5785
6269
  sessionIdRef.current = null;
5786
6270
  sessionStateRef.current = emptySessionState();
5787
6271
  artifactStoreRef.current = new ArtifactStore();
@@ -6103,6 +6587,7 @@ use: /thinking low | medium | high`
6103
6587
  coauthor: cfg.coauthor !== false ? { name: cfg.coauthorName || "kimiflare", email: cfg.coauthorEmail || "kimiflare@proton.me" } : void 0,
6104
6588
  sessionId: ensureSessionId(),
6105
6589
  keepLastImageTurns: cfg.imageHistoryTurns ?? 2,
6590
+ systemMessages: systemMessagesRef.current,
6106
6591
  callbacks: {
6107
6592
  onAssistantStart: () => {
6108
6593
  const id = nextAssistantId++;
@@ -6433,6 +6918,7 @@ var init_app = __esm({
6433
6918
  init_sessions();
6434
6919
  init_image();
6435
6920
  init_usage_tracker();
6921
+ init_tool_output_summarizer();
6436
6922
  CONTEXT_LIMIT = 262e3;
6437
6923
  AUTO_COMPACT_SUGGEST_PCT = 0.8;
6438
6924
  MAX_EVENTS = 500;
@@ -6457,22 +6943,30 @@ init_update_check();
6457
6943
  import { Command } from "commander";
6458
6944
  import { readFileSync as readFileSync2 } from "fs";
6459
6945
  import { fileURLToPath as fileURLToPath2 } from "url";
6460
- import { dirname as dirname3, join as join10 } from "path";
6946
+ import { dirname as dirname3, join as join11 } from "path";
6461
6947
  function readPackageVersion() {
6462
6948
  try {
6463
6949
  const here = dirname3(fileURLToPath2(import.meta.url));
6464
- const pkg = JSON.parse(readFileSync2(join10(here, "..", "package.json"), "utf8"));
6950
+ const pkg = JSON.parse(readFileSync2(join11(here, "..", "package.json"), "utf8"));
6465
6951
  return pkg.version ?? "0.0.0";
6466
6952
  } catch {
6467
6953
  return "0.0.0";
6468
6954
  }
6469
6955
  }
6470
6956
  var program = new Command();
6471
- program.name("kimiflare").description("Terminal coding agent powered by Kimi-K2.6 on Cloudflare Workers AI.").version(readPackageVersion()).option("-p, --print <prompt>", "one-shot mode: send prompt, stream reply to stdout, exit").option("-m, --model <id>", "model id (defaults to @cf/moonshotai/kimi-k2.6)").option("--dangerously-allow-all", "auto-approve every permission prompt (print mode only)").option("--reasoning", "include reasoning in stdout (print mode only)").parse();
6957
+ program.name("kimiflare").description("Terminal coding agent powered by Kimi-K2.6 on Cloudflare Workers AI.").version(readPackageVersion()).option("-p, --print <prompt>", "one-shot mode: send prompt, stream reply to stdout, exit").option("-m, --model <id>", "model id (defaults to @cf/moonshotai/kimi-k2.6)").option("--dangerously-allow-all", "auto-approve every permission prompt (print mode only)").option("--reasoning", "include reasoning in stdout (print mode only)").command("usage").description("show per-turn token usage log from ~/.kimiflare/usage.jsonl").action(async () => {
6958
+ const { showUsageLog: showUsageLog2 } = await Promise.resolve().then(() => (init_usage_cli(), usage_cli_exports));
6959
+ await showUsageLog2();
6960
+ });
6961
+ program.parse();
6472
6962
  var opts = program.opts();
6473
6963
  async function main() {
6474
6964
  const cfg = await loadConfig();
6475
6965
  const updateResult = await checkForUpdate();
6966
+ const args = program.args;
6967
+ if (args[0] === "usage") {
6968
+ return;
6969
+ }
6476
6970
  if (opts.print !== void 0) {
6477
6971
  if (!cfg) {
6478
6972
  console.error(
@@ -6516,8 +7010,11 @@ async function runPrintMode(opts2) {
6516
7010
  }
6517
7011
  const cwd = process.cwd();
6518
7012
  const executor = new ToolExecutor(ALL_TOOLS);
7013
+ const systemMessages = [
7014
+ { role: "system", content: buildSystemPrompt({ cwd, tools: ALL_TOOLS, model: opts2.model }) }
7015
+ ];
6519
7016
  const messages = [
6520
- { role: "system", content: buildSystemPrompt({ cwd, tools: ALL_TOOLS, model: opts2.model }) },
7017
+ ...systemMessages,
6521
7018
  { role: "user", content: opts2.prompt }
6522
7019
  ];
6523
7020
  const controller = new AbortController();
@@ -6533,6 +7030,7 @@ async function runPrintMode(opts2) {
6533
7030
  executor,
6534
7031
  cwd,
6535
7032
  signal: controller.signal,
7033
+ systemMessages,
6536
7034
  coauthor: opts2.coauthor !== false ? { name: opts2.coauthorName || "kimiflare", email: opts2.coauthorEmail || "kimiflare@proton.me" } : void 0,
6537
7035
  callbacks: {
6538
7036
  onReasoningDelta: opts2.showReasoning ? (delta) => {