kimiflare 0.34.0 → 0.34.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -83,7 +83,7 @@ async function loadConfig() {
83
83
  const envCacheStable = process.env.KIMIFLARE_CACHE_STABLE_PROMPTS;
84
84
  const cacheStablePrompts = envCacheStable === "0" || envCacheStable === "false" ? false : true;
85
85
  const envCompiled = process.env.KIMIFLARE_COMPILED_CONTEXT;
86
- const compiledContext = envCompiled === "1" || envCompiled === "true" ? true : false;
86
+ const compiledContext = envCompiled === "0" || envCompiled === "false" ? false : true;
87
87
  const envImageTurns = process.env.KIMIFLARE_IMAGE_HISTORY_TURNS;
88
88
  const imageHistoryTurns = envImageTurns ? parseInt(envImageTurns, 10) : void 0;
89
89
  const envMemoryEnabled = readBooleanEnv("KIMIFLARE_MEMORY_ENABLED");
@@ -92,6 +92,7 @@ async function loadConfig() {
92
92
  const envMemoryMaxEntries = readNumberEnv("KIMIFLARE_MEMORY_MAX_ENTRIES");
93
93
  const envMemoryEmbeddingModel = process.env.KIMIFLARE_MEMORY_EMBEDDING_MODEL;
94
94
  const envPlumbingModel = process.env.KIMIFLARE_PLUMBING_MODEL;
95
+ const envMemoryExtractionModel = process.env.KIMIFLARE_MEMORY_EXTRACTION_MODEL;
95
96
  const envCodeMode = readBooleanEnv("KIMIFLARE_CODE_MODE");
96
97
  const envCostAttribution = readBooleanEnv("KIMI_COST_ATTRIBUTION");
97
98
  const envFilePicker = readBooleanEnv("KIMIFLARE_FILE_PICKER");
@@ -112,14 +113,15 @@ async function loadConfig() {
112
113
  cacheStablePrompts,
113
114
  compiledContext,
114
115
  imageHistoryTurns: Number.isNaN(imageHistoryTurns) ? void 0 : imageHistoryTurns,
115
- memoryEnabled: envMemoryEnabled,
116
+ memoryEnabled: envMemoryEnabled ?? true,
116
117
  memoryDbPath: envMemoryDbPath,
117
118
  memoryMaxAgeDays: envMemoryMaxAgeDays,
118
119
  memoryMaxEntries: envMemoryMaxEntries,
119
120
  memoryEmbeddingModel: envMemoryEmbeddingModel,
120
121
  plumbingModel: envPlumbingModel,
121
- codeMode: envCodeMode,
122
- costAttribution: envCostAttribution ?? false,
122
+ memoryExtractionModel: envMemoryExtractionModel,
123
+ codeMode: envCodeMode ?? true,
124
+ costAttribution: envCostAttribution ?? true,
123
125
  filePicker: envFilePicker ?? true
124
126
  };
125
127
  }
@@ -144,14 +146,15 @@ async function loadConfig() {
144
146
  cacheStablePrompts: parsed.cacheStablePrompts ?? cacheStablePrompts,
145
147
  compiledContext: parsed.compiledContext ?? compiledContext,
146
148
  imageHistoryTurns: Number.isNaN(imageHistoryTurns) ? parsed.imageHistoryTurns : imageHistoryTurns,
147
- memoryEnabled: envMemoryEnabled ?? parsed.memoryEnabled,
149
+ memoryEnabled: envMemoryEnabled ?? parsed.memoryEnabled ?? true,
148
150
  memoryDbPath: envMemoryDbPath ?? parsed.memoryDbPath,
149
151
  memoryMaxAgeDays: envMemoryMaxAgeDays ?? parsed.memoryMaxAgeDays,
150
152
  memoryMaxEntries: envMemoryMaxEntries ?? parsed.memoryMaxEntries,
151
153
  memoryEmbeddingModel: envMemoryEmbeddingModel ?? parsed.memoryEmbeddingModel,
152
154
  plumbingModel: envPlumbingModel ?? parsed.plumbingModel,
153
- codeMode: envCodeMode ?? parsed.codeMode,
154
- costAttribution: envCostAttribution ?? parsed.costAttribution ?? false,
155
+ memoryExtractionModel: envMemoryExtractionModel ?? parsed.memoryExtractionModel,
156
+ codeMode: envCodeMode ?? parsed.codeMode ?? true,
157
+ costAttribution: envCostAttribution ?? parsed.costAttribution ?? true,
155
158
  filePicker: envFilePicker ?? parsed.filePicker ?? true
156
159
  };
157
160
  }
@@ -553,44 +556,51 @@ async function* parseStream(body, signal) {
553
556
  lastUsage = chunk.usage;
554
557
  yield { type: "usage", usage: chunk.usage };
555
558
  }
559
+ if (typeof chunk.response === "string") {
560
+ const resp = chunk.response;
561
+ if (resp.length) {
562
+ yield { type: "text", delta: resp };
563
+ }
564
+ }
556
565
  const choice = chunk.choices?.[0];
557
- if (!choice) continue;
558
- const d = choice.delta;
559
- if (d) {
560
- if (typeof d.reasoning_content === "string" && d.reasoning_content.length) {
561
- yield { type: "reasoning", delta: d.reasoning_content };
562
- }
563
- if (typeof d.content === "string" && d.content.length) {
564
- yield { type: "text", delta: d.content };
565
- }
566
- if (Array.isArray(d.tool_calls)) {
567
- for (const tc of d.tool_calls) {
568
- const idx = typeof tc.index === "number" ? tc.index : 0;
569
- let buf = toolCalls.get(idx);
570
- const incomingName = tc.function?.name ?? null;
571
- const incomingId = tc.id ?? null;
572
- if (!buf) {
573
- buf = { id: incomingId ?? `tc_${idx}`, name: incomingName ?? "", args: "" };
574
- toolCalls.set(idx, buf);
575
- if (buf.name) {
576
- yield { type: "tool_call_start", index: idx, id: buf.id, name: buf.name };
566
+ if (choice) {
567
+ const d = choice.delta;
568
+ if (d) {
569
+ if (typeof d.reasoning_content === "string" && d.reasoning_content.length) {
570
+ yield { type: "reasoning", delta: d.reasoning_content };
571
+ }
572
+ if (typeof d.content === "string" && d.content.length) {
573
+ yield { type: "text", delta: d.content };
574
+ }
575
+ if (Array.isArray(d.tool_calls)) {
576
+ for (const tc of d.tool_calls) {
577
+ const idx = typeof tc.index === "number" ? tc.index : 0;
578
+ let buf = toolCalls.get(idx);
579
+ const incomingName = tc.function?.name ?? null;
580
+ const incomingId = tc.id ?? null;
581
+ if (!buf) {
582
+ buf = { id: incomingId ?? `tc_${idx}`, name: incomingName ?? "", args: "" };
583
+ toolCalls.set(idx, buf);
584
+ if (buf.name) {
585
+ yield { type: "tool_call_start", index: idx, id: buf.id, name: buf.name };
586
+ }
587
+ } else {
588
+ if (!buf.name && incomingName) {
589
+ buf.name = incomingName;
590
+ yield { type: "tool_call_start", index: idx, id: buf.id, name: buf.name };
591
+ }
592
+ if (buf.id.startsWith("tc_") && incomingId) buf.id = incomingId;
577
593
  }
578
- } else {
579
- if (!buf.name && incomingName) {
580
- buf.name = incomingName;
581
- yield { type: "tool_call_start", index: idx, id: buf.id, name: buf.name };
594
+ const argDelta = tc.function?.arguments;
595
+ if (typeof argDelta === "string" && argDelta.length) {
596
+ buf.args += argDelta;
597
+ yield { type: "tool_call_args", index: idx, argsDelta: argDelta };
582
598
  }
583
- if (buf.id.startsWith("tc_") && incomingId) buf.id = incomingId;
584
- }
585
- const argDelta = tc.function?.arguments;
586
- if (typeof argDelta === "string" && argDelta.length) {
587
- buf.args += argDelta;
588
- yield { type: "tool_call_args", index: idx, argsDelta: argDelta };
589
599
  }
590
600
  }
591
601
  }
602
+ if (choice.finish_reason) finishReason = choice.finish_reason;
592
603
  }
593
- if (choice.finish_reason) finishReason = choice.finish_reason;
594
604
  }
595
605
  for (const [idx, buf] of [...toolCalls.entries()].sort((a, b) => a[0] - b[0])) {
596
606
  if (!buf.name) continue;
@@ -965,10 +975,129 @@ function safeJsonParse(text) {
965
975
  return null;
966
976
  }
967
977
  }
968
- var EXTRACTORS;
978
+ function truncate(str, max) {
979
+ if (str.length <= max) return str;
980
+ return str.slice(0, max) + "\u2026";
981
+ }
982
+ async function callLlm(messages, llmOpts, maxTokens = 64) {
983
+ if (!llmOpts) return "";
984
+ const events = runKimi({
985
+ accountId: llmOpts.accountId,
986
+ apiToken: llmOpts.apiToken,
987
+ model: llmOpts.model,
988
+ messages,
989
+ temperature: 0.1,
990
+ maxCompletionTokens: maxTokens,
991
+ gateway: llmOpts.gateway,
992
+ signal: llmOpts.signal
993
+ });
994
+ let text = "";
995
+ for await (const ev of events) {
996
+ if (ev.type === "text") text += ev.delta;
997
+ }
998
+ return text.trim().replace(/^["']|["']$/g, "").replace(/\s+/g, " ").toLowerCase();
999
+ }
1000
+ async function synthesizeEditEvent(file, toolName, toolArgs, assistantMessage, llmOpts) {
1001
+ if (!llmOpts) return null;
1002
+ const oldString = typeof toolArgs.old_string === "string" ? toolArgs.old_string : "";
1003
+ const newString = typeof toolArgs.new_string === "string" ? toolArgs.new_string : "";
1004
+ const fullContent = typeof toolArgs.content === "string" ? toolArgs.content : "";
1005
+ const isWrite = toolName === "write";
1006
+ const before = isWrite ? "(new file)" : truncate(oldString, 600);
1007
+ const after = isWrite ? truncate(fullContent, 600) : truncate(newString, 600);
1008
+ const intent = assistantMessage ? assistantMessage.slice(-300).trim() : "";
1009
+ const changeContext = `File: ${file}
1010
+ Tool: ${toolName}
1011
+
1012
+ Before:
1013
+ ${before}
1014
+
1015
+ After:
1016
+ ${after}${intent ? `
1017
+
1018
+ Context: ${intent}` : ""}`;
1019
+ const summary1 = await callLlm(
1020
+ [
1021
+ { role: "system", content: EDIT_SYNTHESIS_SYSTEM },
1022
+ { role: "user", content: `${changeContext}
1023
+
1024
+ Summary:` }
1025
+ ],
1026
+ llmOpts
1027
+ );
1028
+ if (summary1.length >= 10 && summary1.length <= 200) {
1029
+ const verdict = await callLlm(
1030
+ [
1031
+ { role: "system", content: VERIFIER_SYSTEM },
1032
+ {
1033
+ role: "user",
1034
+ content: `${changeContext}
1035
+
1036
+ Proposed summary: "${summary1}"
1037
+
1038
+ Is this accurate?`
1039
+ }
1040
+ ],
1041
+ llmOpts,
1042
+ 8
1043
+ );
1044
+ if (verdict.startsWith("yes")) return summary1;
1045
+ }
1046
+ const retrySystem = `${EDIT_SYNTHESIS_SYSTEM}
1047
+
1048
+ CRITICAL: The previous summary was rejected. Be specific. Include concrete details from the After section.`;
1049
+ const summary2 = await callLlm(
1050
+ [
1051
+ { role: "system", content: retrySystem },
1052
+ { role: "user", content: `${changeContext}
1053
+
1054
+ Summary:` }
1055
+ ],
1056
+ llmOpts
1057
+ );
1058
+ if (summary2.length >= 10 && summary2.length <= 200) {
1059
+ const verdict2 = await callLlm(
1060
+ [
1061
+ { role: "system", content: VERIFIER_SYSTEM },
1062
+ {
1063
+ role: "user",
1064
+ content: `${changeContext}
1065
+
1066
+ Proposed summary: "${summary2}"
1067
+
1068
+ Is this accurate?`
1069
+ }
1070
+ ],
1071
+ llmOpts,
1072
+ 8
1073
+ );
1074
+ if (verdict2.startsWith("yes")) return summary2;
1075
+ }
1076
+ return null;
1077
+ }
1078
+ var EDIT_SYNTHESIS_SYSTEM, VERIFIER_SYSTEM, EXTRACTORS;
969
1079
  var init_extractors = __esm({
970
1080
  "src/memory/extractors.ts"() {
971
1081
  "use strict";
1082
+ init_client();
1083
+ EDIT_SYNTHESIS_SYSTEM = `You summarize a SINGLE code edit. Write ONE concise sentence (max 20 words) describing exactly what changed.
1084
+
1085
+ Rules:
1086
+ - Use ONLY the Before/After diff below.
1087
+ - For new files: describe the file's content or purpose. Never say just "added a new file".
1088
+ - For edits: describe the specific code change.
1089
+
1090
+ Examples:
1091
+ - Created test-memory.md containing the text "Memory test".
1092
+ - Fixed race condition in loop.ts by adding AbortSignal guard.
1093
+ - Added vitest dependency and removed jest from package.json.
1094
+
1095
+ Respond with only the summary sentence. No quotes, no preamble.`;
1096
+ VERIFIER_SYSTEM = `You verify whether a summary accurately describes a code edit.
1097
+ Answer exactly "yes" if the summary correctly captures what changed in the file.
1098
+ Answer exactly "no" if the summary is vague, wrong, or misses the actual change.
1099
+
1100
+ Respond with only "yes" or "no".`;
972
1101
  EXTRACTORS = [
973
1102
  {
974
1103
  id: "package_json",
@@ -1022,9 +1151,27 @@ var init_extractors = __esm({
1022
1151
  {
1023
1152
  id: "edit_event",
1024
1153
  match: (tool, file) => (tool === "edit" || tool === "write") && !!file,
1025
- extract: (_content, file) => {
1154
+ extract: async (_content, file, ctx) => {
1026
1155
  if (!file) return null;
1027
1156
  const safeKey = file.replace(/[^a-zA-Z0-9]/g, "_");
1157
+ if (ctx?.llmOpts && (ctx.toolArgs || ctx.assistantMessage)) {
1158
+ const summary = await synthesizeEditEvent(
1159
+ file,
1160
+ ctx.toolArgs?._toolName || "edit",
1161
+ ctx.toolArgs || {},
1162
+ ctx.assistantMessage,
1163
+ ctx.llmOpts
1164
+ );
1165
+ if (summary) {
1166
+ return {
1167
+ content: summary,
1168
+ category: "event",
1169
+ importance: 3,
1170
+ topicKey: `event_edit_${safeKey}`,
1171
+ relatedFiles: [file]
1172
+ };
1173
+ }
1174
+ }
1028
1175
  return {
1029
1176
  content: `File modified: ${file}.`,
1030
1177
  category: "event",
@@ -1715,25 +1862,44 @@ ${sandboxResult.output}` : sandboxResult.output;
1715
1862
  opts2.callbacks.onToolResult?.(result);
1716
1863
  if (opts2.memoryManager) {
1717
1864
  let filePath;
1865
+ let toolArgs = {};
1718
1866
  try {
1719
- const args = JSON.parse(tc.function.arguments || "{}");
1720
- filePath = args.path;
1867
+ toolArgs = JSON.parse(tc.function.arguments || "{}");
1868
+ filePath = toolArgs.path;
1721
1869
  } catch {
1722
1870
  }
1871
+ const lastAssistant = [...opts2.messages].reverse().find(
1872
+ (m) => m.role === "assistant" && m.tool_calls && m.tool_calls.length > 0
1873
+ );
1874
+ const assistantMessage = lastAssistant?.content ?? "";
1875
+ const llmOpts = opts2.memoryManager.getExtractionLlmOpts();
1723
1876
  for (const extractor of EXTRACTORS) {
1724
1877
  if (extractor.match(tc.function.name, filePath)) {
1725
- const memory = extractor.extract(result.content, filePath);
1726
- if (memory) {
1727
- void opts2.memoryManager.remember(
1728
- memory.content,
1729
- memory.category,
1730
- memory.importance,
1731
- opts2.cwd,
1732
- opts2.sessionId ?? "unknown",
1733
- opts2.signal
1734
- ).catch(() => {
1735
- });
1736
- }
1878
+ void (async () => {
1879
+ try {
1880
+ const memory = await extractor.extract(result.content, filePath, {
1881
+ toolArgs: { ...toolArgs, _toolName: tc.function.name },
1882
+ assistantMessage: typeof assistantMessage === "string" ? assistantMessage : "",
1883
+ llmOpts: {
1884
+ ...llmOpts,
1885
+ signal: opts2.signal
1886
+ }
1887
+ });
1888
+ if (memory) {
1889
+ await opts2.memoryManager.remember(
1890
+ memory.content,
1891
+ memory.category,
1892
+ memory.importance,
1893
+ opts2.cwd,
1894
+ opts2.sessionId ?? "unknown",
1895
+ opts2.signal,
1896
+ void 0,
1897
+ memory.topicKey
1898
+ );
1899
+ }
1900
+ } catch {
1901
+ }
1902
+ })();
1737
1903
  }
1738
1904
  }
1739
1905
  }
@@ -9392,6 +9558,18 @@ Return a JSON array of strings. Example:
9392
9558
  gateway: this.opts.gateway
9393
9559
  };
9394
9560
  }
9561
+ get extractionLlmOpts() {
9562
+ return {
9563
+ accountId: this.opts.accountId,
9564
+ apiToken: this.opts.apiToken,
9565
+ model: this.opts.extractionModel ?? "@cf/meta/llama-3.2-3b-instruct",
9566
+ gateway: this.opts.gateway
9567
+ };
9568
+ }
9569
+ /** Expose extraction LLM opts so the agent loop can pass them to extractors. */
9570
+ getExtractionLlmOpts() {
9571
+ return this.extractionLlmOpts;
9572
+ }
9395
9573
  shouldRedact() {
9396
9574
  return this.opts.redactSecrets !== false;
9397
9575
  }
@@ -9399,7 +9577,7 @@ Return a JSON array of strings. Example:
9399
9577
  * Store a memory with verification, topic-key normalization, hypothetical queries,
9400
9578
  * secret redaction, and supersession.
9401
9579
  */
9402
- async remember(content, category, importance, repoPath, sessionId, signal, agentRole) {
9580
+ async remember(content, category, importance, repoPath, sessionId, signal, agentRole, topicKey) {
9403
9581
  if (!this.db) throw new Error("Memory DB not open");
9404
9582
  let safeContent = this.shouldRedact() ? redactSecrets(content) : content;
9405
9583
  if (!safeContent.trim()) {
@@ -9412,10 +9590,10 @@ Return a JSON array of strings. Example:
9412
9590
  if (verified.corrected_content) {
9413
9591
  safeContent = verified.corrected_content;
9414
9592
  }
9415
- const topicKey = this.normalizeTopicKey(safeContent, repoPath);
9593
+ const resolvedTopicKey = topicKey?.trim() || this.normalizeTopicKey(safeContent, repoPath);
9416
9594
  const supersededIds = [];
9417
- if (topicKey) {
9418
- const existing = findMemoriesByTopicKey(this.db, repoPath, topicKey);
9595
+ if (resolvedTopicKey) {
9596
+ const existing = findMemoriesByTopicKey(this.db, repoPath, resolvedTopicKey);
9419
9597
  for (const old of existing) {
9420
9598
  supersedeMemory(this.db, old.id, "pending");
9421
9599
  supersededIds.push(old.id);
@@ -9436,7 +9614,7 @@ Return a JSON array of strings. Example:
9436
9614
  sourceSessionId: sessionId,
9437
9615
  repoPath,
9438
9616
  importance: Math.max(1, Math.min(5, importance)),
9439
- topicKey: topicKey ?? void 0,
9617
+ topicKey: resolvedTopicKey ?? void 0,
9440
9618
  agentRole
9441
9619
  };
9442
9620
  const memory = insertMemory(this.db, input, embeddings[0]);
@@ -12115,6 +12293,7 @@ function App({
12115
12293
  apiToken: cfg.apiToken,
12116
12294
  model: cfg.model,
12117
12295
  plumbingModel: cfg.plumbingModel,
12296
+ extractionModel: cfg.memoryExtractionModel,
12118
12297
  embeddingModel: cfg.memoryEmbeddingModel,
12119
12298
  gateway: gatewayFromConfig(cfg),
12120
12299
  maxAgeDays: cfg.memoryMaxAgeDays ?? RETENTION.memoryMaxAgeDays,
@@ -12807,8 +12986,6 @@ function App({
12807
12986
  sessionId: ensureSessionId(),
12808
12987
  memoryManager: memoryManagerRef.current,
12809
12988
  codeMode: effectiveCodeMode,
12810
- maxInputTokens: effectiveCodeMode ? 2e5 : void 0,
12811
- continueOnLimit: effectiveCodeMode ? true : void 0,
12812
12989
  onIterationEnd,
12813
12990
  onFileChange: (path, content) => {
12814
12991
  if (content) {
@@ -14008,8 +14185,6 @@ ${lines.join("\n")}` }]);
14008
14185
  memoryManager: memoryManagerRef.current,
14009
14186
  keepLastImageTurns: cfg.imageHistoryTurns ?? 2,
14010
14187
  codeMode: effectiveCodeMode,
14011
- maxInputTokens: effectiveCodeMode ? 2e5 : void 0,
14012
- continueOnLimit: effectiveCodeMode ? true : void 0,
14013
14188
  onIterationEnd,
14014
14189
  intentClassification: classification,
14015
14190
  onFileChange: (path, content2) => {