chattercatcher 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -3,12 +3,12 @@
3
3
  // src/cli.ts
4
4
  import { input, password, select, confirm, number } from "@inquirer/prompts";
5
5
  import { Command } from "commander";
6
- import fs13 from "fs/promises";
6
+ import fs14 from "fs/promises";
7
7
 
8
8
  // package.json
9
9
  var package_default = {
10
10
  name: "chattercatcher",
11
- version: "0.1.17",
11
+ version: "0.1.18",
12
12
  description: "\u672C\u5730\u4F18\u5148\u7684\u98DE\u4E66/Lark \u5BB6\u5EAD\u7FA4\u77E5\u8BC6\u5E93\u673A\u5668\u4EBA",
13
13
  type: "module",
14
14
  main: "dist/index.js",
@@ -102,6 +102,13 @@ var appConfigSchema = z.object({
102
102
  model: z.string().default(""),
103
103
  dimension: z.number().int().positive().nullable().default(null)
104
104
  }),
105
+ multimodal: z.preprocess(
106
+ (value) => value ?? {},
107
+ z.object({
108
+ baseUrl: z.string().url().or(z.literal("")).default(""),
109
+ model: z.string().default("")
110
+ })
111
+ ),
105
112
  storage: z.object({
106
113
  dataDir: z.string().default(defaultDataDir)
107
114
  }),
@@ -126,13 +133,20 @@ var appSecretsSchema = z.object({
126
133
  }),
127
134
  embedding: z.object({
128
135
  apiKey: z.string().default("")
129
- })
136
+ }),
137
+ multimodal: z.preprocess(
138
+ (value) => value ?? {},
139
+ z.object({
140
+ apiKey: z.string().default("")
141
+ })
142
+ )
130
143
  });
131
144
  function createDefaultConfig() {
132
145
  return appConfigSchema.parse({
133
146
  feishu: {},
134
147
  llm: {},
135
148
  embedding: {},
149
+ multimodal: {},
136
150
  storage: {},
137
151
  web: {},
138
152
  schedules: {},
@@ -143,7 +157,8 @@ function createDefaultSecrets() {
143
157
  return appSecretsSchema.parse({
144
158
  feishu: {},
145
159
  llm: {},
146
- embedding: {}
160
+ embedding: {},
161
+ multimodal: {}
147
162
  });
148
163
  }
149
164
 
@@ -478,6 +493,24 @@ function migrateDatabase(database) {
478
493
  created_at TEXT NOT NULL,
479
494
  updated_at TEXT NOT NULL
480
495
  );
496
+
497
+ CREATE TABLE IF NOT EXISTS image_multimodal_tasks (
498
+ id TEXT PRIMARY KEY,
499
+ source_message_id TEXT NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
500
+ platform_message_id TEXT NOT NULL,
501
+ image_key TEXT NOT NULL,
502
+ stored_path TEXT NOT NULL,
503
+ mime_type TEXT NOT NULL,
504
+ status TEXT NOT NULL CHECK(status IN ('pending','running','succeeded','skipped','failed')),
505
+ attempts INTEGER NOT NULL DEFAULT 0,
506
+ last_error TEXT,
507
+ derived_message_id TEXT REFERENCES messages(id) ON DELETE SET NULL,
508
+ created_at TEXT NOT NULL,
509
+ updated_at TEXT NOT NULL,
510
+ UNIQUE(source_message_id, image_key)
511
+ );
512
+
513
+ CREATE INDEX IF NOT EXISTS image_multimodal_tasks_status_idx ON image_multimodal_tasks(status, updated_at);
481
514
  `);
482
515
  }
483
516
 
@@ -1144,6 +1177,7 @@ var MessageRepository = class {
1144
1177
  )
1145
1178
  ON CONFLICT(platform, platform_message_id)
1146
1179
  DO UPDATE SET
1180
+ message_type = excluded.message_type,
1147
1181
  text = excluded.text,
1148
1182
  raw_payload_json = excluded.raw_payload_json,
1149
1183
  received_at = excluded.received_at
@@ -1188,6 +1222,48 @@ var MessageRepository = class {
1188
1222
  transaction();
1189
1223
  return messageId;
1190
1224
  }
1225
+ createImageSummaryMessage(input2) {
1226
+ const source = this.database.prepare(
1227
+ `
1228
+ SELECT
1229
+ m.platform AS platform,
1230
+ m.platform_message_id AS platformMessageId,
1231
+ m.chat_id AS chatId,
1232
+ m.sender_id AS senderId,
1233
+ m.sender_name AS senderName,
1234
+ m.sent_at AS sentAt,
1235
+ c.platform_chat_id AS platformChatId,
1236
+ c.name AS chatName
1237
+ FROM messages m
1238
+ JOIN chats c ON c.id = m.chat_id
1239
+ WHERE m.id = ?
1240
+ `
1241
+ ).get(input2.sourceMessageId);
1242
+ if (!source) {
1243
+ throw new Error("\u539F\u59CB\u56FE\u7247\u6D88\u606F\u4E0D\u5B58\u5728\u3002");
1244
+ }
1245
+ const derivedPlatformMessageId = `${source.platformMessageId}:image-summary:${input2.imageKey}`;
1246
+ return this.ingest({
1247
+ platform: source.platform,
1248
+ platformChatId: source.platformChatId,
1249
+ chatName: source.chatName,
1250
+ platformMessageId: derivedPlatformMessageId,
1251
+ senderId: source.senderId,
1252
+ senderName: source.senderName,
1253
+ messageType: "image_summary",
1254
+ text: `[\u56FE\u7247\u8F6C\u8FF0] ${input2.summary.trim()}`,
1255
+ sentAt: source.sentAt,
1256
+ rawPayload: {
1257
+ derivedFromMessageId: input2.sourceMessageId,
1258
+ sourceAttachmentKind: "image",
1259
+ sourceResourceKey: input2.imageKey,
1260
+ multimodalModel: input2.multimodalModel,
1261
+ isMeaningful: true,
1262
+ ...input2.reason?.trim() ? { reason: input2.reason.trim() } : {},
1263
+ generatedAt: input2.generatedAt
1264
+ }
1265
+ });
1266
+ }
1191
1267
  listRecentMessages(limit = 20) {
1192
1268
  return this.database.prepare(
1193
1269
  `
@@ -1507,6 +1583,69 @@ var EpisodeRepository = class {
1507
1583
  messageIds: window.messages.map((message) => message.id)
1508
1584
  };
1509
1585
  }
1586
+ async refreshWindowForMessage(input2) {
1587
+ const target = this.database.prepare(
1588
+ `
1589
+ SELECT chat_id AS chatId, sent_at AS sentAt
1590
+ FROM messages
1591
+ WHERE id = ?
1592
+ `
1593
+ ).get(input2.messageId);
1594
+ if (!target) {
1595
+ return void 0;
1596
+ }
1597
+ const existingWindow = this.database.prepare(
1598
+ `
1599
+ SELECT e.started_at AS startedAt, e.ended_at AS endedAt
1600
+ FROM messages target
1601
+ JOIN messages source
1602
+ ON source.id = json_extract(target.raw_payload_json, '$.derivedFromMessageId')
1603
+ JOIN memory_episode_messages mem ON mem.message_id = source.id
1604
+ JOIN memory_episodes e ON e.id = mem.episode_id
1605
+ WHERE target.id = ?
1606
+ LIMIT 1
1607
+ `
1608
+ ).get(input2.messageId);
1609
+ if (!existingWindow) {
1610
+ return void 0;
1611
+ }
1612
+ const messageTime = toMillis(target.sentAt);
1613
+ const windowStart = toMillis(existingWindow.startedAt);
1614
+ const windowEnd = Math.max(toMillis(existingWindow.endedAt), messageTime);
1615
+ const rows = this.database.prepare(
1616
+ `
1617
+ SELECT
1618
+ m.id,
1619
+ m.chat_id AS chatId,
1620
+ c.name AS chatName,
1621
+ m.sender_name AS senderName,
1622
+ m.text,
1623
+ m.sent_at AS sentAt
1624
+ FROM messages m
1625
+ JOIN chats c ON c.id = m.chat_id
1626
+ WHERE m.chat_id = ?
1627
+ ORDER BY m.sent_at ASC
1628
+ `
1629
+ ).all(target.chatId);
1630
+ const windowMessages = rows.filter((message) => {
1631
+ const time = toMillis(message.sentAt);
1632
+ return time >= windowStart && time <= windowEnd;
1633
+ });
1634
+ const first = windowMessages[0];
1635
+ const last = windowMessages.at(-1);
1636
+ if (!first || !last) {
1637
+ return void 0;
1638
+ }
1639
+ const window = {
1640
+ chatId: first.chatId,
1641
+ chatName: first.chatName,
1642
+ startedAt: first.sentAt,
1643
+ endedAt: last.sentAt,
1644
+ messages: windowMessages
1645
+ };
1646
+ const summary = await input2.summarize(window);
1647
+ return this.insertEpisode(window, summary);
1648
+ }
1510
1649
  getEpisodeCount() {
1511
1650
  const row = this.database.prepare("SELECT count(*) AS count FROM memory_episodes").get();
1512
1651
  return row.count;
@@ -2478,6 +2617,268 @@ async function ensureFeishuBotOpenId(config, secrets, options = {}) {
2478
2617
  // src/feishu/gateway.ts
2479
2618
  import * as lark2 from "@larksuiteoapi/node-sdk";
2480
2619
 
2620
+ // src/multimodal/tasks.ts
2621
+ import crypto4 from "crypto";
2622
+ function nowIso4() {
2623
+ return (/* @__PURE__ */ new Date()).toISOString();
2624
+ }
2625
+ function stableId3(sourceMessageId, imageKey) {
2626
+ return crypto4.createHash("sha256").update(`${sourceMessageId}${imageKey}`).digest("hex").slice(0, 32);
2627
+ }
2628
+ function mapRow(row) {
2629
+ if (!row) {
2630
+ return void 0;
2631
+ }
2632
+ return {
2633
+ id: row.id,
2634
+ sourceMessageId: row.source_message_id,
2635
+ platformMessageId: row.platform_message_id,
2636
+ imageKey: row.image_key,
2637
+ storedPath: row.stored_path,
2638
+ mimeType: row.mime_type,
2639
+ status: row.status,
2640
+ attempts: row.attempts,
2641
+ ...row.last_error ? { lastError: row.last_error } : {},
2642
+ ...row.derived_message_id ? { derivedMessageId: row.derived_message_id } : {},
2643
+ createdAt: row.created_at,
2644
+ updatedAt: row.updated_at
2645
+ };
2646
+ }
2647
+ var ImageMultimodalTaskRepository = class {
2648
+ constructor(database) {
2649
+ this.database = database;
2650
+ }
2651
+ database;
2652
+ enqueue(input2) {
2653
+ const id = stableId3(input2.sourceMessageId, input2.imageKey);
2654
+ const timestamp = nowIso4();
2655
+ this.database.prepare(
2656
+ `
2657
+ INSERT INTO image_multimodal_tasks (
2658
+ id,
2659
+ source_message_id,
2660
+ platform_message_id,
2661
+ image_key,
2662
+ stored_path,
2663
+ mime_type,
2664
+ status,
2665
+ attempts,
2666
+ created_at,
2667
+ updated_at
2668
+ )
2669
+ VALUES (
2670
+ @id,
2671
+ @sourceMessageId,
2672
+ @platformMessageId,
2673
+ @imageKey,
2674
+ @storedPath,
2675
+ @mimeType,
2676
+ 'pending',
2677
+ 0,
2678
+ @createdAt,
2679
+ @updatedAt
2680
+ )
2681
+ ON CONFLICT(source_message_id, image_key)
2682
+ DO UPDATE SET
2683
+ platform_message_id = excluded.platform_message_id,
2684
+ stored_path = excluded.stored_path,
2685
+ mime_type = excluded.mime_type,
2686
+ status = 'pending',
2687
+ attempts = 0,
2688
+ last_error = NULL,
2689
+ derived_message_id = NULL,
2690
+ updated_at = excluded.updated_at
2691
+ `
2692
+ ).run({
2693
+ id,
2694
+ sourceMessageId: input2.sourceMessageId,
2695
+ platformMessageId: input2.platformMessageId,
2696
+ imageKey: input2.imageKey,
2697
+ storedPath: input2.storedPath,
2698
+ mimeType: input2.mimeType,
2699
+ createdAt: timestamp,
2700
+ updatedAt: timestamp
2701
+ });
2702
+ const record = this.getById(id);
2703
+ if (!record) {
2704
+ throw new Error(`\u56FE\u7247\u591A\u6A21\u6001\u4EFB\u52A1\u5199\u5165\u5931\u8D25\uFF1A${id}`);
2705
+ }
2706
+ return record;
2707
+ }
2708
+ listPending(limit = 10) {
2709
+ const rows = this.database.prepare(
2710
+ `
2711
+ SELECT
2712
+ id,
2713
+ source_message_id,
2714
+ platform_message_id,
2715
+ image_key,
2716
+ stored_path,
2717
+ mime_type,
2718
+ status,
2719
+ attempts,
2720
+ last_error,
2721
+ derived_message_id,
2722
+ created_at,
2723
+ updated_at
2724
+ FROM image_multimodal_tasks
2725
+ WHERE status = 'pending'
2726
+ ORDER BY updated_at ASC
2727
+ LIMIT ?
2728
+ `
2729
+ ).all(limit);
2730
+ return rows.map((row) => mapRow(row)).filter((row) => Boolean(row));
2731
+ }
2732
+ markRunning(id) {
2733
+ const result = this.database.prepare(
2734
+ `
2735
+ UPDATE image_multimodal_tasks
2736
+ SET status = 'running',
2737
+ attempts = attempts + 1,
2738
+ last_error = NULL,
2739
+ updated_at = @updatedAt
2740
+ WHERE id = @id AND status = 'pending'
2741
+ `
2742
+ ).run({ id, updatedAt: nowIso4() });
2743
+ if (result.changes === 0) {
2744
+ throw new Error(`\u56FE\u7247\u591A\u6A21\u6001\u4EFB\u52A1\u72B6\u6001\u65E0\u6CD5\u66F4\u65B0\uFF1A${id}`);
2745
+ }
2746
+ return this.requireById(id);
2747
+ }
2748
+ markSucceeded(id, derivedMessageId) {
2749
+ this.database.prepare(
2750
+ `
2751
+ UPDATE image_multimodal_tasks
2752
+ SET status = 'succeeded',
2753
+ last_error = NULL,
2754
+ derived_message_id = @derivedMessageId,
2755
+ updated_at = @updatedAt
2756
+ WHERE id = @id
2757
+ `
2758
+ ).run({ id, derivedMessageId, updatedAt: nowIso4() });
2759
+ return this.requireById(id);
2760
+ }
2761
+ markSkipped(id, reason) {
2762
+ this.database.prepare(
2763
+ `
2764
+ UPDATE image_multimodal_tasks
2765
+ SET status = 'skipped',
2766
+ last_error = @reason,
2767
+ derived_message_id = NULL,
2768
+ updated_at = @updatedAt
2769
+ WHERE id = @id
2770
+ `
2771
+ ).run({ id, reason, updatedAt: nowIso4() });
2772
+ return this.requireById(id);
2773
+ }
2774
+ markFailed(id, error, finalFailure) {
2775
+ this.database.prepare(
2776
+ `
2777
+ UPDATE image_multimodal_tasks
2778
+ SET status = @status,
2779
+ last_error = @error,
2780
+ derived_message_id = NULL,
2781
+ updated_at = @updatedAt
2782
+ WHERE id = @id
2783
+ `
2784
+ ).run({ id, status: finalFailure ? "failed" : "pending", error, updatedAt: nowIso4() });
2785
+ return this.requireById(id);
2786
+ }
2787
+ getById(id) {
2788
+ const row = this.database.prepare(
2789
+ `
2790
+ SELECT
2791
+ id,
2792
+ source_message_id,
2793
+ platform_message_id,
2794
+ image_key,
2795
+ stored_path,
2796
+ mime_type,
2797
+ status,
2798
+ attempts,
2799
+ last_error,
2800
+ derived_message_id,
2801
+ created_at,
2802
+ updated_at
2803
+ FROM image_multimodal_tasks
2804
+ WHERE id = ?
2805
+ `
2806
+ ).get(id);
2807
+ return mapRow(row);
2808
+ }
2809
+ requireById(id) {
2810
+ const record = this.getById(id);
2811
+ if (!record) {
2812
+ throw new Error(`\u56FE\u7247\u591A\u6A21\u6001\u4EFB\u52A1\u4E0D\u5B58\u5728\uFF1A${id}`);
2813
+ }
2814
+ return record;
2815
+ }
2816
+ };
2817
+
2818
+ // src/multimodal/worker.ts
2819
+ var ImageMultimodalWorker = class {
2820
+ constructor(options) {
2821
+ this.options = options;
2822
+ }
2823
+ options;
2824
+ async processPending(limit = 10) {
2825
+ const result = { processed: 0, succeeded: 0, skipped: 0, failed: 0 };
2826
+ const pending = this.options.tasks.listPending(limit);
2827
+ for (const task of pending) {
2828
+ result.processed += 1;
2829
+ await this.processTask(task, result);
2830
+ }
2831
+ return result;
2832
+ }
2833
+ async processTask(task, result) {
2834
+ let running;
2835
+ try {
2836
+ running = this.options.tasks.markRunning(task.id);
2837
+ } catch (error) {
2838
+ const message = error instanceof Error ? error.message : String(error);
2839
+ if (message.startsWith("\u56FE\u7247\u591A\u6A21\u6001\u4EFB\u52A1\u72B6\u6001\u65E0\u6CD5\u66F4\u65B0\uFF1A")) {
2840
+ return;
2841
+ }
2842
+ throw error;
2843
+ }
2844
+ try {
2845
+ const described = await this.options.model.describeImage({
2846
+ imagePath: running.storedPath,
2847
+ mimeType: running.mimeType
2848
+ });
2849
+ if (!described.isMeaningful) {
2850
+ this.options.tasks.markSkipped(running.id, described.reason || "\u591A\u6A21\u6001\u6A21\u578B\u5224\u5B9A\u56FE\u7247\u65E0\u610F\u4E49\u3002");
2851
+ result.skipped += 1;
2852
+ return;
2853
+ }
2854
+ const derivedMessageId = this.options.messages.createImageSummaryMessage({
2855
+ sourceMessageId: running.sourceMessageId,
2856
+ imageKey: running.imageKey,
2857
+ summary: described.summary,
2858
+ reason: described.reason,
2859
+ multimodalModel: this.options.multimodalModelName,
2860
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString()
2861
+ });
2862
+ if (this.options.vectorIndexMessage) {
2863
+ await this.options.vectorIndexMessage(derivedMessageId);
2864
+ }
2865
+ if (this.options.episodes && this.options.summarizeEpisode) {
2866
+ await this.options.episodes.refreshWindowForMessage({
2867
+ messageId: derivedMessageId,
2868
+ windowMs: this.options.config.episodes.windowMinutes * 60 * 1e3,
2869
+ summarize: this.options.summarizeEpisode
2870
+ });
2871
+ }
2872
+ this.options.tasks.markSucceeded(running.id, derivedMessageId);
2873
+ result.succeeded += 1;
2874
+ } catch (error) {
2875
+ const message = error instanceof Error ? error.message : String(error);
2876
+ this.options.tasks.markFailed(running.id, message, running.attempts >= 3);
2877
+ result.failed += 1;
2878
+ }
2879
+ }
2880
+ };
2881
+
2481
2882
  // src/rag/citations.ts
2482
2883
  function isOpaqueId(value) {
2483
2884
  return Boolean(value && /^(ou|oc|om|cli|on|un|uid)_?[a-z0-9]+/i.test(value));
@@ -2955,6 +3356,7 @@ function createFeishuEventDispatcher(options) {
2955
3356
  payload,
2956
3357
  downloader: options.resourceDownloader,
2957
3358
  config: options.config,
3359
+ secrets: options.secrets,
2958
3360
  vectorIndexMessage: options.attachmentVectorIndexer
2959
3361
  }) : options.ingestor.ingestFeishuEvent(payload);
2960
3362
  if (!result.accepted) {
@@ -2980,6 +3382,23 @@ function createFeishuEventDispatcher(options) {
2980
3382
  }
2981
3383
  if (result.attachment?.downloaded) {
2982
3384
  console.log(`\u98DE\u4E66\u9644\u4EF6\u5DF2\u4E0B\u8F7D\uFF1A${result.attachment.downloaded.storedPath}`);
3385
+ if (options.imageMultimodalProcessor && result.attachment.imageTask) {
3386
+ void new ImageMultimodalWorker({
3387
+ config: options.config,
3388
+ messages: new MessageRepository(options.imageMultimodalProcessor.database),
3389
+ tasks: new ImageMultimodalTaskRepository(options.imageMultimodalProcessor.database),
3390
+ model: options.imageMultimodalProcessor.model,
3391
+ multimodalModelName: options.config.multimodal.model,
3392
+ vectorIndexMessage: options.attachmentVectorIndexer
3393
+ }).processPending().then((imageResult) => {
3394
+ console.log(
3395
+ `\u98DE\u4E66\u56FE\u7247\u591A\u6A21\u6001\u5904\u7406\u5B8C\u6210\uFF1Aprocessed=${imageResult.processed}, succeeded=${imageResult.succeeded}, skipped=${imageResult.skipped}, failed=${imageResult.failed}`
3396
+ );
3397
+ }).catch((error) => {
3398
+ const message = error instanceof Error ? error.message : String(error);
3399
+ console.error(`\u98DE\u4E66\u56FE\u7247\u591A\u6A21\u6001\u5904\u7406\u5931\u8D25\uFF1A${message}`);
3400
+ });
3401
+ }
2983
3402
  if (result.attachment.indexedMessageId) {
2984
3403
  console.log(`\u98DE\u4E66\u9644\u4EF6\u5DF2\u8FDB\u5165 RAG\uFF1A${result.attachment.indexedMessageId}`);
2985
3404
  if (result.attachment.vectorIndexed) {
@@ -3030,7 +3449,8 @@ function createFeishuGateway(options) {
3030
3449
  questionHandler: options.questionHandler,
3031
3450
  resourceDownloader: options.resourceDownloader,
3032
3451
  attachmentVectorIndexer: options.attachmentVectorIndexer,
3033
- episodeProcessor: options.episodeProcessor
3452
+ episodeProcessor: options.episodeProcessor,
3453
+ imageMultimodalProcessor: options.imageMultimodalProcessor
3034
3454
  });
3035
3455
  return {
3036
3456
  async start() {
@@ -3112,7 +3532,7 @@ var FeishuResourceDownloader = class _FeishuResourceDownloader {
3112
3532
  };
3113
3533
 
3114
3534
  // src/files/ingest.ts
3115
- import crypto4 from "crypto";
3535
+ import crypto5 from "crypto";
3116
3536
  import fs11 from "fs/promises";
3117
3537
  import path13 from "path";
3118
3538
 
@@ -3176,7 +3596,7 @@ function ensureSupportedTextFile(filePath) {
3176
3596
  }
3177
3597
  }
3178
3598
  function stableStoredName(sourcePath, fileName) {
3179
- const digest = crypto4.createHash("sha256").update(sourcePath).digest("hex").slice(0, 16);
3599
+ const digest = crypto5.createHash("sha256").update(sourcePath).digest("hex").slice(0, 16);
3180
3600
  return `${digest}-${fileName}`;
3181
3601
  }
3182
3602
  async function ingestLocalFile(input2) {
@@ -3414,12 +3834,17 @@ function extractAttachment(message) {
3414
3834
  }
3415
3835
  return candidate;
3416
3836
  }
3837
+ function isMultimodalReady(config, secrets) {
3838
+ return Boolean(config.multimodal.baseUrl && config.multimodal.model && secrets.multimodal.apiKey);
3839
+ }
3417
3840
  var GatewayIngestor = class {
3418
3841
  messages;
3419
3842
  jobs;
3843
+ imageTasks;
3420
3844
  constructor(database) {
3421
3845
  this.messages = new MessageRepository(database);
3422
3846
  this.jobs = new FileJobRepository(database);
3847
+ this.imageTasks = new ImageMultimodalTaskRepository(database);
3423
3848
  }
3424
3849
  ingestFeishuEvent(payload) {
3425
3850
  const normalized = normalizeFeishuReceiveMessageEvent(payload);
@@ -3451,6 +3876,23 @@ var GatewayIngestor = class {
3451
3876
  messageId: result.message.platformMessageId,
3452
3877
  attachment
3453
3878
  });
3879
+ if (attachment.kind === "image") {
3880
+ const imageTask = isMultimodalReady(input2.config, input2.secrets) ? this.imageTasks.enqueue({
3881
+ sourceMessageId: result.messageId,
3882
+ platformMessageId: result.message.platformMessageId,
3883
+ imageKey: attachment.fileKey,
3884
+ storedPath: downloaded.storedPath,
3885
+ mimeType: attachment.mimeType || "image/jpeg"
3886
+ }) : void 0;
3887
+ return {
3888
+ ...result,
3889
+ attachment: {
3890
+ downloaded,
3891
+ ...imageTask ? { imageTask } : {},
3892
+ skippedReason: imageTask ? "\u56FE\u7247\u5DF2\u4E0B\u8F7D\uFF0C\u7B49\u5F85\u591A\u6A21\u6001\u540E\u53F0\u5904\u7406\u3002" : "\u56FE\u7247\u5DF2\u4E0B\u8F7D\uFF0C\u4F46\u591A\u6A21\u6001\u672A\u914D\u7F6E\u3002"
3893
+ }
3894
+ };
3895
+ }
3454
3896
  if (!isSupportedTextFile(downloaded.storedPath)) {
3455
3897
  return {
3456
3898
  ...result,
@@ -3586,6 +4028,96 @@ async function startDetachedGateway(input2) {
3586
4028
  }
3587
4029
  }
3588
4030
 
4031
+ // src/multimodal/openai-compatible.ts
4032
+ import fs13 from "fs/promises";
4033
+ function normalizeBaseUrl2(baseUrl) {
4034
+ return baseUrl.replace(/\/+$/, "");
4035
+ }
4036
+ function buildPrompt(context) {
4037
+ const contextText = context?.trim();
4038
+ return [
4039
+ "\u8BF7\u7406\u89E3\u8FD9\u5F20\u56FE\u7247\uFF0C\u5224\u65AD\u5B83\u662F\u5426\u5305\u542B\u503C\u5F97\u8FDB\u5165\u77E5\u8BC6\u5E93\u548C\u4F1A\u8BDD\u8BB0\u5FC6\u7684\u6709\u610F\u4E49\u4FE1\u606F\u3002",
4040
+ '\u8BF7\u53EA\u8F93\u51FA JSON\uFF0C\u683C\u5F0F\u4E3A {"summary": string, "isMeaningful": boolean, "reason": string}\u3002',
4041
+ "summary \u4F7F\u7528\u7B80\u6D01\u4E2D\u6587\u8F6C\u8FF0\u56FE\u7247\u4E2D\u7684\u5173\u952E\u4FE1\u606F\uFF1B\u65E0\u610F\u4E49\u56FE\u7247\u4E5F\u8981\u7ED9\u51FA\u7B80\u77ED summary\u3002",
4042
+ contextText ? `\u4E0A\u4E0B\u6587\uFF1A${contextText}` : void 0
4043
+ ].filter(Boolean).join("\n");
4044
+ }
4045
+ function parseDescribeImageResult(content) {
4046
+ let data2;
4047
+ try {
4048
+ data2 = JSON.parse(content);
4049
+ } catch {
4050
+ throw new Error("\u591A\u6A21\u6001\u6A21\u578B\u8FD4\u56DE\u7684 JSON \u65E0\u6CD5\u89E3\u6790\u3002");
4051
+ }
4052
+ if (!data2 || typeof data2 !== "object") {
4053
+ throw new Error("\u591A\u6A21\u6001\u6A21\u578B\u8FD4\u56DE\u683C\u5F0F\u4E0D\u6B63\u786E\u3002");
4054
+ }
4055
+ const result = data2;
4056
+ const summary = typeof result.summary === "string" ? result.summary.trim() : "";
4057
+ if (!summary) {
4058
+ throw new Error("\u591A\u6A21\u6001\u6A21\u578B\u8FD4\u56DE\u7684 summary \u4E3A\u7A7A\u3002");
4059
+ }
4060
+ if (typeof result.isMeaningful !== "boolean") {
4061
+ throw new Error("\u591A\u6A21\u6001\u6A21\u578B\u8FD4\u56DE\u7684 isMeaningful \u4E0D\u662F\u5E03\u5C14\u503C\u3002");
4062
+ }
4063
+ const reason = typeof result.reason === "string" ? result.reason.trim() : "";
4064
+ return {
4065
+ summary,
4066
+ isMeaningful: result.isMeaningful,
4067
+ ...reason ? { reason } : {}
4068
+ };
4069
+ }
4070
+ var OpenAICompatibleMultimodalModel = class {
4071
+ constructor(options) {
4072
+ this.options = options;
4073
+ }
4074
+ options;
4075
+ async describeImage(input2) {
4076
+ if (!this.options.baseUrl || !this.options.apiKey || !this.options.model) {
4077
+ throw new Error("\u591A\u6A21\u6001\u914D\u7F6E\u4E0D\u5B8C\u6574\u3002\u8BF7\u8FD0\u884C chattercatcher setup \u6216 chattercatcher settings\u3002");
4078
+ }
4079
+ const image = await fs13.readFile(input2.imagePath);
4080
+ const response = await fetch(`${normalizeBaseUrl2(this.options.baseUrl)}/chat/completions`, {
4081
+ method: "POST",
4082
+ headers: {
4083
+ authorization: `Bearer ${this.options.apiKey}`,
4084
+ "content-type": "application/json"
4085
+ },
4086
+ body: JSON.stringify({
4087
+ model: this.options.model,
4088
+ messages: [
4089
+ {
4090
+ role: "user",
4091
+ content: [
4092
+ { type: "text", text: buildPrompt(input2.context) },
4093
+ { type: "image_url", image_url: { url: `data:${input2.mimeType};base64,${image.toString("base64")}` } }
4094
+ ]
4095
+ }
4096
+ ],
4097
+ response_format: { type: "json_object" },
4098
+ temperature: this.options.temperature ?? 0.2
4099
+ })
4100
+ });
4101
+ if (!response.ok) {
4102
+ const body = await response.text();
4103
+ throw new Error(`\u591A\u6A21\u6001\u8BF7\u6C42\u5931\u8D25\uFF1A${response.status} ${body}`);
4104
+ }
4105
+ const data2 = await response.json();
4106
+ const content = data2.choices?.[0]?.message?.content?.trim();
4107
+ if (!content) {
4108
+ throw new Error("\u591A\u6A21\u6001\u6A21\u578B\u8FD4\u56DE\u4E3A\u7A7A\u3002");
4109
+ }
4110
+ return parseDescribeImageResult(content);
4111
+ }
4112
+ };
4113
+ function createMultimodalModel(config, secrets) {
4114
+ return new OpenAICompatibleMultimodalModel({
4115
+ baseUrl: config.multimodal.baseUrl,
4116
+ apiKey: secrets.multimodal.apiKey,
4117
+ model: config.multimodal.model
4118
+ });
4119
+ }
4120
+
3589
4121
  // src/rag/indexer.ts
3590
4122
  async function indexMessageChunks(input2) {
3591
4123
  const chunks = input2.messageIds ? input2.messages.listMessageChunksByMessageIds(input2.messageIds, input2.limit ?? 1e4) : input2.messages.listAllMessageChunks(input2.limit ?? 1e4);
@@ -4312,12 +4844,31 @@ async function promptForConfiguration(config, secrets) {
4312
4844
  llmApiKey: secrets.llm.apiKey
4313
4845
  });
4314
4846
  config.embedding.model = await input({ message: "Embedding Model", default: config.embedding.model });
4847
+ const multimodalBaseUrl = await input({
4848
+ message: "Multimodal Base URL\uFF08OpenAI-compatible\uFF0C\u53EF\u7559\u7A7A\uFF09",
4849
+ default: config.multimodal.baseUrl
4850
+ });
4851
+ const multimodalApiKey = await password({
4852
+ message: "Multimodal API Key\uFF08\u53EF\u7559\u7A7A\uFF09",
4853
+ mask: "*"
4854
+ });
4855
+ const multimodalModel = await input({
4856
+ message: "Multimodal Model\uFF08\u53EF\u7559\u7A7A\uFF09",
4857
+ default: config.multimodal.model
4858
+ });
4315
4859
  const dimension = await number({
4316
4860
  message: "Embedding \u7EF4\u5EA6\uFF08\u4E0D\u77E5\u9053\u53EF\u5148\u7559\u7A7A\uFF09",
4317
4861
  default: config.embedding.dimension ?? void 0,
4318
4862
  required: false
4319
4863
  });
4320
4864
  config.embedding.dimension = dimension ?? null;
4865
+ config.multimodal = {
4866
+ baseUrl: multimodalBaseUrl,
4867
+ model: multimodalModel
4868
+ };
4869
+ secrets.multimodal = {
4870
+ apiKey: multimodalApiKey || secrets.multimodal.apiKey
4871
+ };
4321
4872
  config.web.port = await number({ message: "Web UI \u7AEF\u53E3", default: config.web.port, required: true }) ?? config.web.port;
4322
4873
  config.feishu.requireMention = await confirm({
4323
4874
  message: "\u7FA4\u804A\u56DE\u7B54\u662F\u5426\u8981\u6C42 @ \u673A\u5668\u4EBA\uFF1F",
@@ -4345,7 +4896,8 @@ function printSettings(config, secrets) {
4345
4896
  secrets: {
4346
4897
  feishu: { appSecret: maskSecret(secrets.feishu.appSecret) },
4347
4898
  llm: { apiKey: maskSecret(secrets.llm.apiKey) },
4348
- embedding: { apiKey: maskSecret(secrets.embedding.apiKey) }
4899
+ embedding: { apiKey: maskSecret(secrets.embedding.apiKey) },
4900
+ multimodal: { apiKey: maskSecret(secrets.multimodal.apiKey) }
4349
4901
  }
4350
4902
  },
4351
4903
  null,
@@ -4457,6 +5009,10 @@ async function startGatewayForegroundCommand() {
4457
5009
  database,
4458
5010
  model: createChatModel(config, secrets)
4459
5011
  },
5012
+ imageMultimodalProcessor: config.multimodal.baseUrl && config.multimodal.model && secrets.multimodal.apiKey ? {
5013
+ database,
5014
+ model: createMultimodalModel(config, secrets)
5015
+ } : void 0,
4460
5016
  questionHandler: new FeishuQuestionHandler({
4461
5017
  config,
4462
5018
  secrets,
@@ -4824,7 +5380,7 @@ dev.command("ingest-feishu-event").description("\u4ECE JSON \u6587\u4EF6\u6A21\u
4824
5380
  const config = await loadConfig();
4825
5381
  const database = openDatabase(config);
4826
5382
  try {
4827
- const raw = await fs13.readFile(options.file, "utf8");
5383
+ const raw = await fs14.readFile(options.file, "utf8");
4828
5384
  const payload = JSON.parse(raw);
4829
5385
  const result = new GatewayIngestor(database).ingestFeishuEvent(payload);
4830
5386
  if (!result.accepted) {