kimiflare 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,6 +22,7 @@
22
22
  ## Why kimiflare
23
23
 
24
24
  - **262k context window** — Read entire modules, large configs, and full stack traces without the model losing track.
25
+ - **Image understanding** — Drop image paths into your prompt (PNG, JPG, WebP, GIF, BMP). The model sees them inline — great for UI reviews, diagrams, screenshots, and mockups.
25
26
  - **Direct to Cloudflare** — No AI Gateway, no proxy, no OpenAI SDK. Your traffic goes straight to Workers AI from your account.
26
27
  - **Plan mode** — Ask the agent to research and produce a plan without touching your filesystem. Review it, then exit plan mode to execute.
27
28
 
@@ -53,6 +54,7 @@ Requires Node.js ≥ 20.
53
54
  | **Type-ahead queue** | Type your next prompt while the model is still working. Queued prompts show as `⏳ …` and fire in order. `Ctrl-C` aborts current + clears queue. |
54
55
  | **Auto-compaction** | At ~80% context usage, kimiflare nudges you to run `/compact`. It summarizes older turns into a dense summary, keeping the last 4 turns intact. |
55
56
  | **Streaming reasoning** | Toggle the model's chain-of-thought with `/reasoning` or `Ctrl-R`. See how it thinks in real time. |
57
+ | **Image understanding** | Drop image paths (PNG, JPG, WebP, GIF, BMP up to 5 MB) into any prompt. The model sees them inline — perfect for UI reviews, diagrams, and screenshots. |
56
58
  | **Live cost tracking** | Status bar shows real-time cost based on Cloudflare pricing: `$0.95/M input`, `$0.16/M cached`, `$4.00/M output`. |
57
59
  | **Session persistence** | Every turn is auto-saved. `/resume` lists past sessions (with message counts) in a paginated picker. |
58
60
  | **Smart permissions** | Bash session-allow is keyed by the first token (e.g., allow all `git` commands). Write/edit show a unified diff before you approve. |
@@ -105,6 +107,19 @@ kimiflare -p "..." --dangerously-allow-all # auto-approve mutating tool
105
107
  kimiflare -p "..." --reasoning # include chain-of-thought in stderr
106
108
  ```
107
109
 
110
+ ### Image understanding
111
+
112
+ Reference image files directly in your prompt — the model sees them inline:
113
+
114
+ ```sh
115
+ kimiflare
116
+ › fix the layout bug in this screenshot docs/bug.png
117
+ › convert this mockup design.png to Tailwind HTML
118
+ › explain this architecture diagram.png
119
+ ```
120
+
121
+ Supported formats: PNG, JPG, JPEG, WebP, GIF, BMP (up to 5 MB each, 10 per message).
122
+
108
123
  ### CLI flags
109
124
 
110
125
  | Flag | Short | Description |
package/dist/index.js CHANGED
@@ -296,10 +296,19 @@ async function* parseStream(body, signal) {
296
296
  }
297
297
  function sanitizeMessagesForApi(messages) {
298
298
  return messages.map((m) => {
299
- if (!m.tool_calls || m.tool_calls.length === 0) return m;
299
+ let next = m;
300
+ if (Array.isArray(m.content)) {
301
+ next = {
302
+ ...m,
303
+ content: m.content.map(
304
+ (part) => part.type === "text" ? { ...part, text: sanitizeString(part.text) } : part
305
+ )
306
+ };
307
+ }
308
+ if (!next.tool_calls || next.tool_calls.length === 0) return next;
300
309
  return {
301
- ...m,
302
- tool_calls: m.tool_calls.map((tc) => ({
310
+ ...next,
311
+ tool_calls: next.tool_calls.map((tc) => ({
303
312
  ...tc,
304
313
  function: {
305
314
  name: tc.function.name,
@@ -1533,15 +1542,16 @@ async function compactMessages(opts2) {
1533
1542
  return { summary: "", newMessages: messages, replacedCount: 0 };
1534
1543
  }
1535
1544
  const transcript = toSummarize.map((m) => {
1545
+ const contentStr = typeof m.content === "string" ? m.content : m.content?.map((p) => p.type === "text" ? p.text : "[image]").join(" ") ?? "";
1536
1546
  if (m.role === "tool") {
1537
- const snippet = (m.content ?? "").slice(0, 500);
1547
+ const snippet = contentStr.slice(0, 500);
1538
1548
  return `[tool ${m.name ?? ""}] ${snippet}`;
1539
1549
  }
1540
1550
  if (m.role === "assistant") {
1541
1551
  const calls = m.tool_calls ? ` (tool_calls: ${m.tool_calls.map((c) => c.function.name).join(", ")})` : "";
1542
- return `[assistant]${calls} ${m.content ?? ""}`;
1552
+ return `[assistant]${calls} ${contentStr}`;
1543
1553
  }
1544
- return `[${m.role}] ${m.content ?? ""}`;
1554
+ return `[${m.role}] ${contentStr}`;
1545
1555
  }).join("\n");
1546
1556
  let summary = "";
1547
1557
  const events = runKimi({
@@ -1867,12 +1877,18 @@ function EventView({
1867
1877
  verbose
1868
1878
  }) {
1869
1879
  if (evt.kind === "user") {
1870
- return /* @__PURE__ */ jsxs4(Box4, { children: [
1871
- /* @__PURE__ */ jsxs4(Text4, { bold: true, color: theme.user, children: [
1872
- "\u203A",
1873
- " "
1880
+ return /* @__PURE__ */ jsxs4(Box4, { flexDirection: "column", children: [
1881
+ /* @__PURE__ */ jsxs4(Box4, { children: [
1882
+ /* @__PURE__ */ jsxs4(Text4, { bold: true, color: theme.user, children: [
1883
+ "\u203A",
1884
+ " "
1885
+ ] }),
1886
+ /* @__PURE__ */ jsx4(Text4, { bold: true, children: evt.text })
1874
1887
  ] }),
1875
- /* @__PURE__ */ jsx4(Text4, { bold: true, children: evt.text })
1888
+ evt.images && evt.images.length > 0 && /* @__PURE__ */ jsx4(Box4, { paddingLeft: 2, children: /* @__PURE__ */ jsxs4(Text4, { color: theme.info.color, dimColor: theme.info.dim, children: [
1889
+ "\u{1F5BC}\uFE0F ",
1890
+ evt.images.join(", ")
1891
+ ] }) })
1876
1892
  ] });
1877
1893
  }
1878
1894
  if (evt.kind === "assistant") {
@@ -3470,7 +3486,7 @@ async function listSessions(limit = 30) {
3470
3486
  const [s, raw] = await Promise.all([stat2(path), readFile7(path, "utf8")]);
3471
3487
  const parsed = JSON.parse(raw);
3472
3488
  const firstUser = parsed.messages.find((m) => m.role === "user");
3473
- const firstPrompt = typeof firstUser?.content === "string" ? firstUser.content : "(no prompt)";
3489
+ const firstPrompt = typeof firstUser?.content === "string" ? firstUser.content : firstUser?.content ? firstUser.content.find((p) => p.type === "text")?.text ?? "(no prompt)" : "(no prompt)";
3474
3490
  summaries.push({
3475
3491
  id: parsed.id,
3476
3492
  filePath: path,
@@ -3495,6 +3511,45 @@ var init_sessions = __esm({
3495
3511
  }
3496
3512
  });
3497
3513
 
3514
+ // src/util/image.ts
3515
+ import { readFile as readFile8 } from "fs/promises";
3516
+ import { basename as basename2 } from "path";
3517
+ async function encodeImageFile(filePath) {
3518
+ const buf = await readFile8(filePath);
3519
+ if (buf.byteLength > MAX_IMAGE_BYTES) {
3520
+ throw new Error(
3521
+ `image too large (${(buf.byteLength / 1024 / 1024).toFixed(1)} MB); max is ${MAX_IMAGE_BYTES / 1024 / 1024} MB`
3522
+ );
3523
+ }
3524
+ const ext = filePath.slice(filePath.lastIndexOf(".")).toLowerCase();
3525
+ const mime = EXT_TO_MIME[ext] ?? "image/jpeg";
3526
+ const b64 = buf.toString("base64");
3527
+ return {
3528
+ filename: basename2(filePath),
3529
+ mime,
3530
+ dataUrl: `data:${mime};base64,${b64}`
3531
+ };
3532
+ }
3533
+ function isImagePath(path) {
3534
+ const ext = path.slice(path.lastIndexOf(".")).toLowerCase();
3535
+ return ext in EXT_TO_MIME;
3536
+ }
3537
+ var MAX_IMAGE_BYTES, EXT_TO_MIME;
3538
+ var init_image = __esm({
3539
+ "src/util/image.ts"() {
3540
+ "use strict";
3541
+ MAX_IMAGE_BYTES = 5 * 1024 * 1024;
3542
+ EXT_TO_MIME = {
3543
+ ".png": "image/png",
3544
+ ".jpg": "image/jpeg",
3545
+ ".jpeg": "image/jpeg",
3546
+ ".gif": "image/gif",
3547
+ ".webp": "image/webp",
3548
+ ".bmp": "image/bmp"
3549
+ };
3550
+ }
3551
+ });
3552
+
3498
3553
  // src/app.tsx
3499
3554
  var app_exports = {};
3500
3555
  __export(app_exports, {
@@ -3510,6 +3565,16 @@ function capEvents(prev) {
3510
3565
  if (prev.length <= MAX_EVENTS) return prev;
3511
3566
  return prev.slice(prev.length - MAX_EVENTS);
3512
3567
  }
3568
+ function findImagePaths(text) {
3569
+ const paths = [];
3570
+ for (const token of text.split(/\s+/)) {
3571
+ const clean = token.replace(/^["']|["',;:!?]$/g, "").replace(/[.,;:!?]$/, "");
3572
+ if (isImagePath(clean) && existsSync(clean)) {
3573
+ paths.push(clean);
3574
+ }
3575
+ }
3576
+ return [...new Set(paths)];
3577
+ }
3513
3578
  function App({ initialCfg, initialUpdateResult }) {
3514
3579
  const { exit } = useApp();
3515
3580
  const [cfg, setCfg] = useState6(initialCfg);
@@ -3679,7 +3744,13 @@ function App({ initialCfg, initialUpdateResult }) {
3679
3744
  if (!cfg) return;
3680
3745
  if (!sessionIdRef.current) {
3681
3746
  const firstUser = messagesRef.current.find((m) => m.role === "user");
3682
- const firstText = typeof firstUser?.content === "string" ? firstUser.content : "session";
3747
+ let firstText = "session";
3748
+ if (typeof firstUser?.content === "string") {
3749
+ firstText = firstUser.content;
3750
+ } else if (Array.isArray(firstUser?.content)) {
3751
+ const textPart = firstUser.content.find((p) => p.type === "text");
3752
+ if (textPart?.text) firstText = textPart.text;
3753
+ }
3683
3754
  sessionIdRef.current = makeSessionId(firstText);
3684
3755
  }
3685
3756
  try {
@@ -3967,7 +4038,12 @@ function App({ initialCfg, initialUpdateResult }) {
3967
4038
  text: `resumed session ${picked.id} (${picked.messageCount} msgs)`
3968
4039
  }
3969
4040
  ]);
3970
- const userMsgs = file.messages.filter((m) => m.role === "user" && typeof m.content === "string").map((m) => m.content);
4041
+ const userMsgs = file.messages.filter((m) => m.role === "user" && m.content).map((m) => {
4042
+ if (!m.content) return "";
4043
+ if (typeof m.content === "string") return m.content;
4044
+ const textPart = m.content.find((p) => p.type === "text");
4045
+ return textPart?.text ?? "";
4046
+ }).filter((text) => text.length > 0);
3971
4047
  if (userMsgs.length > 0) setHistory(userMsgs);
3972
4048
  setUsage(null);
3973
4049
  } catch (e) {
@@ -4223,8 +4299,36 @@ use: /thinking low | medium | high`
4223
4299
  if (!trimmed) return;
4224
4300
  if (trimmed.startsWith("/") && handleSlash(trimmed)) return;
4225
4301
  const display = displayText?.trim() || trimmed;
4226
- setEvents((e) => [...e, { kind: "user", key: mkKey(), text: display }]);
4227
- messagesRef.current.push({ role: "user", content: sanitizeString(trimmed) });
4302
+ const imagePaths = findImagePaths(trimmed).slice(0, MAX_IMAGES_PER_MESSAGE);
4303
+ let images = [];
4304
+ let content = sanitizeString(trimmed);
4305
+ if (imagePaths.length > 0) {
4306
+ const encoded = await Promise.all(
4307
+ imagePaths.map(async (path) => {
4308
+ try {
4309
+ const img = await encodeImageFile(path);
4310
+ return { path, img };
4311
+ } catch (e) {
4312
+ setEvents((es) => [
4313
+ ...es,
4314
+ { kind: "error", key: mkKey(), text: `failed to encode image ${path}: ${e.message}` }
4315
+ ]);
4316
+ return null;
4317
+ }
4318
+ })
4319
+ );
4320
+ const valid = encoded.filter((x) => x !== null);
4321
+ if (valid.length > 0) {
4322
+ images = valid.map((v) => v.img.filename);
4323
+ const parts = [
4324
+ { type: "text", text: sanitizeString(trimmed) },
4325
+ ...valid.map((v) => ({ type: "image_url", image_url: { url: v.img.dataUrl } }))
4326
+ ];
4327
+ content = parts;
4328
+ }
4329
+ }
4330
+ setEvents((e) => [...e, { kind: "user", key: mkKey(), text: display, images: images.length > 0 ? images : void 0 }]);
4331
+ messagesRef.current.push({ role: "user", content });
4228
4332
  setBusy(true);
4229
4333
  setTurnStartedAt(Date.now());
4230
4334
  const controller = new AbortController();
@@ -4522,7 +4626,7 @@ async function renderApp(cfg, updateResult) {
4522
4626
  const instance = render(/* @__PURE__ */ jsx13(App, { initialCfg: cfg, initialUpdateResult: updateResult }));
4523
4627
  await instance.waitUntilExit();
4524
4628
  }
4525
- var CONTEXT_LIMIT, AUTO_COMPACT_SUGGEST_PCT, MAX_EVENTS, nextAssistantId, nextKey, mkKey, EFFORT_DESCRIPTIONS;
4629
+ var CONTEXT_LIMIT, AUTO_COMPACT_SUGGEST_PCT, MAX_EVENTS, nextAssistantId, nextKey, mkKey, MAX_IMAGES_PER_MESSAGE, EFFORT_DESCRIPTIONS;
4526
4630
  var init_app = __esm({
4527
4631
  "src/app.tsx"() {
4528
4632
  "use strict";
@@ -4546,12 +4650,14 @@ var init_app = __esm({
4546
4650
  init_theme();
4547
4651
  init_mode();
4548
4652
  init_sessions();
4653
+ init_image();
4549
4654
  CONTEXT_LIMIT = 262e3;
4550
4655
  AUTO_COMPACT_SUGGEST_PCT = 0.8;
4551
4656
  MAX_EVENTS = 500;
4552
4657
  nextAssistantId = 1;
4553
4658
  nextKey = 1;
4554
4659
  mkKey = () => `evt_${nextKey++}`;
4660
+ MAX_IMAGES_PER_MESSAGE = 10;
4555
4661
  EFFORT_DESCRIPTIONS = {
4556
4662
  low: "low \u2014 fastest; lightest reasoning. Best for simple Q&A, small edits, quick coordination.",
4557
4663
  medium: "medium \u2014 balanced (default). Solid quality on most edits, fast on trivial prompts.",