kimiflare 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,6 +22,7 @@
22
22
  ## Why kimiflare
23
23
 
24
24
  - **262k context window** — Read entire modules, large configs, and full stack traces without the model losing track.
25
+ - **Image understanding** — Drop image paths into your prompt (PNG, JPG, WebP, GIF, BMP). The model sees them inline — great for UI reviews, diagrams, screenshots, and mockups.
25
26
  - **Direct to Cloudflare** — No AI Gateway, no proxy, no OpenAI SDK. Your traffic goes straight to Workers AI from your account.
26
27
  - **Plan mode** — Ask the agent to research and produce a plan without touching your filesystem. Review it, then exit plan mode to execute.
27
28
 
@@ -53,6 +54,7 @@ Requires Node.js ≥ 20.
53
54
  | **Type-ahead queue** | Type your next prompt while the model is still working. Queued prompts show as `⏳ …` and fire in order. `Ctrl-C` aborts current + clears queue. |
54
55
  | **Auto-compaction** | At ~80% context usage, kimiflare nudges you to run `/compact`. It summarizes older turns into a dense summary, keeping the last 4 turns intact. |
55
56
  | **Streaming reasoning** | Toggle the model's chain-of-thought with `/reasoning` or `Ctrl-R`. See how it thinks in real time. |
57
+ | **Image understanding** | Drop image paths (PNG, JPG, WebP, GIF, BMP up to 5 MB) into any prompt. The model sees them inline — perfect for UI reviews, diagrams, and screenshots. |
56
58
  | **Live cost tracking** | Status bar shows real-time cost based on Cloudflare pricing: `$0.95/M input`, `$0.16/M cached`, `$4.00/M output`. |
57
59
  | **Session persistence** | Every turn is auto-saved. `/resume` lists past sessions (with message counts) in a paginated picker. |
58
60
  | **Smart permissions** | Bash session-allow is keyed by the first token (e.g., allow all `git` commands). Write/edit show a unified diff before you approve. |
@@ -105,6 +107,19 @@ kimiflare -p "..." --dangerously-allow-all # auto-approve mutating tool
105
107
  kimiflare -p "..." --reasoning # include chain-of-thought in stderr
106
108
  ```
107
109
 
110
+ ### Image understanding
111
+
112
+ Reference image files directly in your prompt — the model sees them inline:
113
+
114
+ ```sh
115
+ kimiflare
116
+ › fix the layout bug in this screenshot docs/bug.png
117
+ › convert this mockup design.png to Tailwind HTML
118
+ › explain this architecture diagram.png
119
+ ```
120
+
121
+ Supported formats: PNG, JPG, JPEG, WebP, GIF, BMP (up to 5 MB each, 10 per message).
122
+
108
123
  ### CLI flags
109
124
 
110
125
  | Flag | Short | Description |
package/dist/index.js CHANGED
@@ -296,10 +296,19 @@ async function* parseStream(body, signal) {
296
296
  }
297
297
  function sanitizeMessagesForApi(messages) {
298
298
  return messages.map((m) => {
299
- if (!m.tool_calls || m.tool_calls.length === 0) return m;
299
+ let next = m;
300
+ if (Array.isArray(m.content)) {
301
+ next = {
302
+ ...m,
303
+ content: m.content.map(
304
+ (part) => part.type === "text" ? { ...part, text: sanitizeString(part.text) } : part
305
+ )
306
+ };
307
+ }
308
+ if (!next.tool_calls || next.tool_calls.length === 0) return next;
300
309
  return {
301
- ...m,
302
- tool_calls: m.tool_calls.map((tc) => ({
310
+ ...next,
311
+ tool_calls: next.tool_calls.map((tc) => ({
303
312
  ...tc,
304
313
  function: {
305
314
  name: tc.function.name,
@@ -1533,15 +1542,16 @@ async function compactMessages(opts2) {
1533
1542
  return { summary: "", newMessages: messages, replacedCount: 0 };
1534
1543
  }
1535
1544
  const transcript = toSummarize.map((m) => {
1545
+ const contentStr = typeof m.content === "string" ? m.content : m.content?.map((p) => p.type === "text" ? p.text : "[image]").join(" ") ?? "";
1536
1546
  if (m.role === "tool") {
1537
- const snippet = (m.content ?? "").slice(0, 500);
1547
+ const snippet = contentStr.slice(0, 500);
1538
1548
  return `[tool ${m.name ?? ""}] ${snippet}`;
1539
1549
  }
1540
1550
  if (m.role === "assistant") {
1541
1551
  const calls = m.tool_calls ? ` (tool_calls: ${m.tool_calls.map((c) => c.function.name).join(", ")})` : "";
1542
- return `[assistant]${calls} ${m.content ?? ""}`;
1552
+ return `[assistant]${calls} ${contentStr}`;
1543
1553
  }
1544
- return `[${m.role}] ${m.content ?? ""}`;
1554
+ return `[${m.role}] ${contentStr}`;
1545
1555
  }).join("\n");
1546
1556
  let summary = "";
1547
1557
  const events = runKimi({
@@ -1867,12 +1877,18 @@ function EventView({
1867
1877
  verbose
1868
1878
  }) {
1869
1879
  if (evt.kind === "user") {
1870
- return /* @__PURE__ */ jsxs4(Box4, { children: [
1871
- /* @__PURE__ */ jsxs4(Text4, { bold: true, color: theme.user, children: [
1872
- "\u203A",
1873
- " "
1880
+ return /* @__PURE__ */ jsxs4(Box4, { flexDirection: "column", children: [
1881
+ /* @__PURE__ */ jsxs4(Box4, { children: [
1882
+ /* @__PURE__ */ jsxs4(Text4, { bold: true, color: theme.user, children: [
1883
+ "\u203A",
1884
+ " "
1885
+ ] }),
1886
+ /* @__PURE__ */ jsx4(Text4, { bold: true, children: evt.text })
1874
1887
  ] }),
1875
- /* @__PURE__ */ jsx4(Text4, { bold: true, children: evt.text })
1888
+ evt.images && evt.images.length > 0 && /* @__PURE__ */ jsx4(Box4, { paddingLeft: 2, children: /* @__PURE__ */ jsxs4(Text4, { color: theme.info.color, dimColor: theme.info.dim, children: [
1889
+ "\u{1F5BC}\uFE0F ",
1890
+ evt.images.join(", ")
1891
+ ] }) })
1876
1892
  ] });
1877
1893
  }
1878
1894
  if (evt.kind === "assistant") {
@@ -3470,7 +3486,7 @@ async function listSessions(limit = 30) {
3470
3486
  const [s, raw] = await Promise.all([stat2(path), readFile7(path, "utf8")]);
3471
3487
  const parsed = JSON.parse(raw);
3472
3488
  const firstUser = parsed.messages.find((m) => m.role === "user");
3473
- const firstPrompt = typeof firstUser?.content === "string" ? firstUser.content : "(no prompt)";
3489
+ const firstPrompt = typeof firstUser?.content === "string" ? firstUser.content : firstUser?.content ? firstUser.content.find((p) => p.type === "text")?.text ?? "(no prompt)" : "(no prompt)";
3474
3490
  summaries.push({
3475
3491
  id: parsed.id,
3476
3492
  filePath: path,
@@ -3495,6 +3511,45 @@ var init_sessions = __esm({
3495
3511
  }
3496
3512
  });
3497
3513
 
3514
+ // src/util/image.ts
3515
+ import { readFile as readFile8 } from "fs/promises";
3516
+ import { basename as basename2 } from "path";
3517
+ async function encodeImageFile(filePath) {
3518
+ const buf = await readFile8(filePath);
3519
+ if (buf.byteLength > MAX_IMAGE_BYTES) {
3520
+ throw new Error(
3521
+ `image too large (${(buf.byteLength / 1024 / 1024).toFixed(1)} MB); max is ${MAX_IMAGE_BYTES / 1024 / 1024} MB`
3522
+ );
3523
+ }
3524
+ const ext = filePath.slice(filePath.lastIndexOf(".")).toLowerCase();
3525
+ const mime = EXT_TO_MIME[ext] ?? "image/jpeg";
3526
+ const b64 = buf.toString("base64");
3527
+ return {
3528
+ filename: basename2(filePath),
3529
+ mime,
3530
+ dataUrl: `data:${mime};base64,${b64}`
3531
+ };
3532
+ }
3533
+ function isImagePath(path) {
3534
+ const ext = path.slice(path.lastIndexOf(".")).toLowerCase();
3535
+ return ext in EXT_TO_MIME;
3536
+ }
3537
+ var MAX_IMAGE_BYTES, EXT_TO_MIME;
3538
+ var init_image = __esm({
3539
+ "src/util/image.ts"() {
3540
+ "use strict";
3541
+ MAX_IMAGE_BYTES = 5 * 1024 * 1024;
3542
+ EXT_TO_MIME = {
3543
+ ".png": "image/png",
3544
+ ".jpg": "image/jpeg",
3545
+ ".jpeg": "image/jpeg",
3546
+ ".gif": "image/gif",
3547
+ ".webp": "image/webp",
3548
+ ".bmp": "image/bmp"
3549
+ };
3550
+ }
3551
+ });
3552
+
3498
3553
  // src/app.tsx
3499
3554
  var app_exports = {};
3500
3555
  __export(app_exports, {
@@ -3510,6 +3565,16 @@ function capEvents(prev) {
3510
3565
  if (prev.length <= MAX_EVENTS) return prev;
3511
3566
  return prev.slice(prev.length - MAX_EVENTS);
3512
3567
  }
3568
+ function findImagePaths(text) {
3569
+ const paths = [];
3570
+ for (const token of text.split(/\s+/)) {
3571
+ const clean = token.replace(/^["']|["',;:!?]$/g, "").replace(/[.,;:!?]$/, "");
3572
+ if (isImagePath(clean) && existsSync(clean)) {
3573
+ paths.push(clean);
3574
+ }
3575
+ }
3576
+ return [...new Set(paths)];
3577
+ }
3513
3578
  function App({ initialCfg, initialUpdateResult }) {
3514
3579
  const { exit } = useApp();
3515
3580
  const [cfg, setCfg] = useState6(initialCfg);
@@ -3561,6 +3626,7 @@ function App({ initialCfg, initialUpdateResult }) {
3561
3626
  const executorRef = useRef3(new ToolExecutor(ALL_TOOLS));
3562
3627
  const activeAsstIdRef = useRef3(null);
3563
3628
  const activeControllerRef = useRef3(null);
3629
+ const permResolveRef = useRef3(null);
3564
3630
  const sessionIdRef = useRef3(null);
3565
3631
  const modeRef = useRef3(mode);
3566
3632
  const effortRef = useRef3(effort);
@@ -3679,7 +3745,13 @@ function App({ initialCfg, initialUpdateResult }) {
3679
3745
  if (!cfg) return;
3680
3746
  if (!sessionIdRef.current) {
3681
3747
  const firstUser = messagesRef.current.find((m) => m.role === "user");
3682
- const firstText = typeof firstUser?.content === "string" ? firstUser.content : "session";
3748
+ let firstText = "session";
3749
+ if (typeof firstUser?.content === "string") {
3750
+ firstText = firstUser.content;
3751
+ } else if (Array.isArray(firstUser?.content)) {
3752
+ const textPart = firstUser.content.find((p) => p.type === "text");
3753
+ if (textPart?.text) firstText = textPart.text;
3754
+ }
3683
3755
  sessionIdRef.current = makeSessionId(firstText);
3684
3756
  }
3685
3757
  try {
@@ -3696,8 +3768,11 @@ function App({ initialCfg, initialUpdateResult }) {
3696
3768
  }, [cfg]);
3697
3769
  useInput2((inputChar, key) => {
3698
3770
  if (key.ctrl && inputChar === "c") {
3699
- if (busy && activeControllerRef.current) {
3700
- activeControllerRef.current.abort();
3771
+ if (busy) {
3772
+ activeControllerRef.current?.abort();
3773
+ permResolveRef.current?.("deny");
3774
+ permResolveRef.current = null;
3775
+ setPerm(null);
3701
3776
  setQueue([]);
3702
3777
  setEvents((e) => [...e, { kind: "info", key: mkKey(), text: "(interrupted)" }]);
3703
3778
  } else {
@@ -3919,7 +3994,11 @@ function App({ initialCfg, initialUpdateResult }) {
3919
3994
  resolve2("deny");
3920
3995
  return;
3921
3996
  }
3922
- setPerm({ tool: req.tool, args: req.args, resolve: resolve2 });
3997
+ permResolveRef.current = resolve2;
3998
+ setPerm({ tool: req.tool, args: req.args, resolve: (d) => {
3999
+ permResolveRef.current = null;
4000
+ resolve2(d);
4001
+ } });
3923
4002
  })
3924
4003
  }
3925
4004
  });
@@ -3939,17 +4018,25 @@ function App({ initialCfg, initialUpdateResult }) {
3939
4018
  ]);
3940
4019
  }
3941
4020
  } catch (e) {
3942
- if (e.name !== "AbortError") {
4021
+ if (e.name === "AbortError") {
4022
+ setEvents((es) => [...es, { kind: "info", key: mkKey(), text: "(interrupted)" }]);
4023
+ setEvents(
4024
+ (evts) => evts.map((e2) => e2.kind === "tool" && e2.status === "running" ? { ...e2, status: "error", result: "(interrupted)" } : e2)
4025
+ );
4026
+ } else {
3943
4027
  setEvents((es) => [
3944
4028
  ...es,
3945
4029
  { kind: "error", key: mkKey(), text: `init failed: ${e.message}` }
3946
4030
  ]);
3947
4031
  }
3948
4032
  } finally {
4033
+ const asstId = activeAsstIdRef.current;
4034
+ if (asstId !== null) updateAssistant(asstId, () => ({ streaming: false }));
3949
4035
  setBusy(false);
3950
4036
  setTurnStartedAt(null);
3951
4037
  activeAsstIdRef.current = null;
3952
4038
  activeControllerRef.current = null;
4039
+ permResolveRef.current = null;
3953
4040
  }
3954
4041
  }, [cfg, busy, updateAssistant, updateTool]);
3955
4042
  const handleResumePick = useCallback(
@@ -3967,7 +4054,12 @@ function App({ initialCfg, initialUpdateResult }) {
3967
4054
  text: `resumed session ${picked.id} (${picked.messageCount} msgs)`
3968
4055
  }
3969
4056
  ]);
3970
- const userMsgs = file.messages.filter((m) => m.role === "user" && typeof m.content === "string").map((m) => m.content);
4057
+ const userMsgs = file.messages.filter((m) => m.role === "user" && m.content).map((m) => {
4058
+ if (!m.content) return "";
4059
+ if (typeof m.content === "string") return m.content;
4060
+ const textPart = m.content.find((p) => p.type === "text");
4061
+ return textPart?.text ?? "";
4062
+ }).filter((text) => text.length > 0);
3971
4063
  if (userMsgs.length > 0) setHistory(userMsgs);
3972
4064
  setUsage(null);
3973
4065
  } catch (e) {
@@ -4223,8 +4315,36 @@ use: /thinking low | medium | high`
4223
4315
  if (!trimmed) return;
4224
4316
  if (trimmed.startsWith("/") && handleSlash(trimmed)) return;
4225
4317
  const display = displayText?.trim() || trimmed;
4226
- setEvents((e) => [...e, { kind: "user", key: mkKey(), text: display }]);
4227
- messagesRef.current.push({ role: "user", content: sanitizeString(trimmed) });
4318
+ const imagePaths = findImagePaths(trimmed).slice(0, MAX_IMAGES_PER_MESSAGE);
4319
+ let images = [];
4320
+ let content = sanitizeString(trimmed);
4321
+ if (imagePaths.length > 0) {
4322
+ const encoded = await Promise.all(
4323
+ imagePaths.map(async (path) => {
4324
+ try {
4325
+ const img = await encodeImageFile(path);
4326
+ return { path, img };
4327
+ } catch (e) {
4328
+ setEvents((es) => [
4329
+ ...es,
4330
+ { kind: "error", key: mkKey(), text: `failed to encode image ${path}: ${e.message}` }
4331
+ ]);
4332
+ return null;
4333
+ }
4334
+ })
4335
+ );
4336
+ const valid = encoded.filter((x) => x !== null);
4337
+ if (valid.length > 0) {
4338
+ images = valid.map((v) => v.img.filename);
4339
+ const parts = [
4340
+ { type: "text", text: sanitizeString(trimmed) },
4341
+ ...valid.map((v) => ({ type: "image_url", image_url: { url: v.img.dataUrl } }))
4342
+ ];
4343
+ content = parts;
4344
+ }
4345
+ }
4346
+ setEvents((e) => [...e, { kind: "user", key: mkKey(), text: display, images: images.length > 0 ? images : void 0 }]);
4347
+ messagesRef.current.push({ role: "user", content });
4228
4348
  setBusy(true);
4229
4349
  setTurnStartedAt(Date.now());
4230
4350
  const controller = new AbortController();
@@ -4328,14 +4448,21 @@ use: /thinking low | medium | high`
4328
4448
  resolve2("deny");
4329
4449
  return;
4330
4450
  }
4331
- setPerm({ tool: req.tool, args: req.args, resolve: resolve2 });
4451
+ permResolveRef.current = resolve2;
4452
+ setPerm({ tool: req.tool, args: req.args, resolve: (d) => {
4453
+ permResolveRef.current = null;
4454
+ resolve2(d);
4455
+ } });
4332
4456
  })
4333
4457
  }
4334
4458
  });
4335
4459
  await saveSessionSafe();
4336
4460
  } catch (e) {
4337
4461
  if (e.name === "AbortError") {
4338
- setEvents((es) => [...es, { kind: "info", key: mkKey(), text: "(aborted)" }]);
4462
+ setEvents((es) => [...es, { kind: "info", key: mkKey(), text: "(interrupted)" }]);
4463
+ setEvents(
4464
+ (evts) => evts.map((e2) => e2.kind === "tool" && e2.status === "running" ? { ...e2, status: "error", result: "(interrupted)" } : e2)
4465
+ );
4339
4466
  } else {
4340
4467
  const isInvalidJson400 = e instanceof KimiApiError && e.httpStatus === 400 && e.message.includes("invalid escaped character");
4341
4468
  if (isInvalidJson400) {
@@ -4356,10 +4483,13 @@ use: /thinking low | medium | high`
4356
4483
  }
4357
4484
  }
4358
4485
  } finally {
4486
+ const asstId = activeAsstIdRef.current;
4487
+ if (asstId !== null) updateAssistant(asstId, () => ({ streaming: false }));
4359
4488
  setBusy(false);
4360
4489
  setTurnStartedAt(null);
4361
4490
  activeAsstIdRef.current = null;
4362
4491
  activeControllerRef.current = null;
4492
+ permResolveRef.current = null;
4363
4493
  }
4364
4494
  },
4365
4495
  [cfg, handleSlash, updateAssistant, updateTool, saveSessionSafe]
@@ -4522,7 +4652,7 @@ async function renderApp(cfg, updateResult) {
4522
4652
  const instance = render(/* @__PURE__ */ jsx13(App, { initialCfg: cfg, initialUpdateResult: updateResult }));
4523
4653
  await instance.waitUntilExit();
4524
4654
  }
4525
- var CONTEXT_LIMIT, AUTO_COMPACT_SUGGEST_PCT, MAX_EVENTS, nextAssistantId, nextKey, mkKey, EFFORT_DESCRIPTIONS;
4655
+ var CONTEXT_LIMIT, AUTO_COMPACT_SUGGEST_PCT, MAX_EVENTS, nextAssistantId, nextKey, mkKey, MAX_IMAGES_PER_MESSAGE, EFFORT_DESCRIPTIONS;
4526
4656
  var init_app = __esm({
4527
4657
  "src/app.tsx"() {
4528
4658
  "use strict";
@@ -4546,12 +4676,14 @@ var init_app = __esm({
4546
4676
  init_theme();
4547
4677
  init_mode();
4548
4678
  init_sessions();
4679
+ init_image();
4549
4680
  CONTEXT_LIMIT = 262e3;
4550
4681
  AUTO_COMPACT_SUGGEST_PCT = 0.8;
4551
4682
  MAX_EVENTS = 500;
4552
4683
  nextAssistantId = 1;
4553
4684
  nextKey = 1;
4554
4685
  mkKey = () => `evt_${nextKey++}`;
4686
+ MAX_IMAGES_PER_MESSAGE = 10;
4555
4687
  EFFORT_DESCRIPTIONS = {
4556
4688
  low: "low \u2014 fastest; lightest reasoning. Best for simple Q&A, small edits, quick coordination.",
4557
4689
  medium: "medium \u2014 balanced (default). Solid quality on most edits, fast on trivial prompts.",