@firstlovecenter/ai-chat 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,6 +55,14 @@ type NormalizedToolResult = {
55
55
  type NormalizedMessage = {
56
56
  role: 'user';
57
57
  text: string;
58
+ /**
59
+ * Cache hint: when true, the producing route is asking the provider
60
+ * to mark this message's content with a cache breakpoint so the
61
+ * full prefix becomes cacheable on a subsequent request. Anthropic
62
+ * applies `cache_control: ephemeral`; Vertex Gemini ignores the
63
+ * flag (its prefix cache works automatically).
64
+ */
65
+ cached?: boolean;
58
66
  } | {
59
67
  role: 'assistant';
60
68
  /** Free text the model emitted (zero-or-more text blocks joined as-is). */
@@ -69,6 +77,8 @@ type NormalizedMessage = {
69
77
  * thought_signature`. Other adapters can ignore.
70
78
  */
71
79
  providerData?: unknown;
80
+ /** See `user.cached`. */
81
+ cached?: boolean;
72
82
  } | {
73
83
  role: 'tool';
74
84
  results: NormalizedToolResult[];
@@ -155,6 +165,16 @@ type AgentInput<S = unknown> = {
155
165
  systemBlocks: SystemBlock[];
156
166
  /** Constructed ToolProvider — caller resolves the right one via toolProviders[id].createProvider({...}). */
157
167
  provider: ToolProvider;
168
+ /**
169
+ * Conversation history to seed the prompt with, in chronological order.
170
+ * Hosts pass this to give the model memory across turns in a chat session
171
+ * (so a follow-up like "summarize that" resolves the antecedent). The
172
+ * route handler is responsible for fetching prior `chat_messages` and
173
+ * normalising them; see `historyToNormalizedMessages` in `./history`.
174
+ * Tool-call provenance is intentionally not replayed — assistant turns
175
+ * here should be plain text only.
176
+ */
177
+ priorMessages?: NormalizedMessage[];
158
178
  /** Optional caps. Default both. */
159
179
  maxToolTurns?: number;
160
180
  maxOutputTokens?: number;
@@ -55,6 +55,14 @@ type NormalizedToolResult = {
55
55
  type NormalizedMessage = {
56
56
  role: 'user';
57
57
  text: string;
58
+ /**
59
+ * Cache hint: when true, the producing route is asking the provider
60
+ * to mark this message's content with a cache breakpoint so the
61
+ * full prefix becomes cacheable on a subsequent request. Anthropic
62
+ * applies `cache_control: ephemeral`; Vertex Gemini ignores the
63
+ * flag (its prefix cache works automatically).
64
+ */
65
+ cached?: boolean;
58
66
  } | {
59
67
  role: 'assistant';
60
68
  /** Free text the model emitted (zero-or-more text blocks joined as-is). */
@@ -69,6 +77,8 @@ type NormalizedMessage = {
69
77
  * thought_signature`. Other adapters can ignore.
70
78
  */
71
79
  providerData?: unknown;
80
+ /** See `user.cached`. */
81
+ cached?: boolean;
72
82
  } | {
73
83
  role: 'tool';
74
84
  results: NormalizedToolResult[];
@@ -155,6 +165,16 @@ type AgentInput<S = unknown> = {
155
165
  systemBlocks: SystemBlock[];
156
166
  /** Constructed ToolProvider — caller resolves the right one via toolProviders[id].createProvider({...}). */
157
167
  provider: ToolProvider;
168
+ /**
169
+ * Conversation history to seed the prompt with, in chronological order.
170
+ * Hosts pass this to give the model memory across turns in a chat session
171
+ * (so a follow-up like "summarize that" resolves the antecedent). The
172
+ * route handler is responsible for fetching prior `chat_messages` and
173
+ * normalising them; see `historyToNormalizedMessages` in `./history`.
174
+ * Tool-call provenance is intentionally not replayed — assistant turns
175
+ * here should be plain text only.
176
+ */
177
+ priorMessages?: NormalizedMessage[];
158
178
  /** Optional caps. Default both. */
159
179
  maxToolTurns?: number;
160
180
  maxOutputTokens?: number;
@@ -23,7 +23,10 @@ async function runAgent(input) {
23
23
  const maxOutputTokens = input.maxOutputTokens ?? DEFAULT_MAX_OUTPUT_TOKENS;
24
24
  const transcript = [];
25
25
  transcript.push({ kind: "user", text: input.question });
26
- const messages = [{ role: "user", text: input.question }];
26
+ const messages = [
27
+ ...input.priorMessages ?? [],
28
+ { role: "user", text: input.question }
29
+ ];
27
30
  const system = input.systemBlocks;
28
31
  const toolSchemas = Object.values(input.tools).map((t) => t.schema);
29
32
  let toolCallCount = 0;
@@ -216,11 +219,28 @@ function toAnthropicMessages(messages) {
216
219
  const out = [];
217
220
  for (const msg of messages) {
218
221
  if (msg.role === "user") {
219
- out.push({ role: "user", content: msg.text });
222
+ if (msg.cached) {
223
+ out.push({
224
+ role: "user",
225
+ content: [
226
+ {
227
+ type: "text",
228
+ text: msg.text,
229
+ cache_control: { type: "ephemeral" }
230
+ }
231
+ ]
232
+ });
233
+ } else {
234
+ out.push({ role: "user", content: msg.text });
235
+ }
220
236
  } else if (msg.role === "assistant") {
221
237
  const blocks = [];
222
238
  if (msg.text) {
223
- blocks.push({ type: "text", text: msg.text });
239
+ const textBlock = { type: "text", text: msg.text };
240
+ if (msg.cached && msg.toolCalls.length === 0) {
241
+ textBlock.cache_control = { type: "ephemeral" };
242
+ }
243
+ blocks.push(textBlock);
224
244
  }
225
245
  for (const tc of msg.toolCalls) {
226
246
  blocks.push({
@@ -572,6 +592,87 @@ var toolProviders = [
572
592
  function getToolProvider(id) {
573
593
  return toolProviders.find((p) => p.id === id);
574
594
  }
595
+
596
+ // src/server/history.ts
597
+ var DEFAULT_MAX_HISTORY_PAIRS = 20;
598
+ var DEFAULT_MAX_TEXT_CHARS = 4e3;
599
+ function historyToNormalizedMessages(rows, opts = {}) {
600
+ const maxPairs = opts.maxPairs ?? DEFAULT_MAX_HISTORY_PAIRS;
601
+ const maxTextChars = opts.maxTextChars ?? DEFAULT_MAX_TEXT_CHARS;
602
+ const pairs = [];
603
+ let i = 0;
604
+ while (i < rows.length) {
605
+ const row = rows[i];
606
+ if (row.role !== "user" || !row.question) {
607
+ i += 1;
608
+ continue;
609
+ }
610
+ const next = rows[i + 1];
611
+ if (next?.role !== "assistant") {
612
+ i += 1;
613
+ continue;
614
+ }
615
+ const assistantText = truncate(
616
+ assistantMessageToText(next),
617
+ maxTextChars
618
+ );
619
+ if (assistantText) {
620
+ pairs.push([
621
+ { role: "user", text: truncate(row.question, maxTextChars) },
622
+ { role: "assistant", text: assistantText, toolCalls: [] }
623
+ ]);
624
+ }
625
+ i += 2;
626
+ }
627
+ const kept = maxPairs > 0 ? pairs.slice(-maxPairs) : pairs;
628
+ return kept.flat();
629
+ }
630
+ function truncate(text, max) {
631
+ if (max <= 0 || text.length <= max) return text;
632
+ return text.slice(0, max);
633
+ }
634
+ function assistantMessageToText(row) {
635
+ if (row.errorJson) return "";
636
+ const proseText = proseToText(row.prose);
637
+ if (proseText) return proseText;
638
+ const blockText = blocksToText(row.blocks);
639
+ return blockText;
640
+ }
641
+ function proseToText(prose) {
642
+ if (!prose || typeof prose !== "object") return "";
643
+ const entries = Object.entries(prose).map(([k, v]) => [Number(k), typeof v === "string" ? v : ""]).filter(([k, v]) => Number.isFinite(k) && v.length > 0).sort(([a], [b]) => a - b);
644
+ return entries.map(([, v]) => v).join("\n\n").trim();
645
+ }
646
+ function blocksToText(blocks) {
647
+ if (!Array.isArray(blocks)) return "";
648
+ const parts = [];
649
+ for (const raw of blocks) {
650
+ if (!raw || typeof raw !== "object") continue;
651
+ const b = raw;
652
+ switch (b.kind) {
653
+ case "paragraph_brief": {
654
+ const facts = (b.key_facts ?? []).filter((f) => f && f.trim());
655
+ if (b.topic) parts.push(b.topic);
656
+ if (facts.length) parts.push(facts.join("\n"));
657
+ break;
658
+ }
659
+ case "list": {
660
+ const items = (b.items ?? []).filter((s) => s && s.trim());
661
+ if (b.title) parts.push(b.title);
662
+ if (items.length) parts.push(items.map((s) => `- ${s}`).join("\n"));
663
+ break;
664
+ }
665
+ case "chart":
666
+ case "table":
667
+ if (b.title) parts.push(`[${b.title}]`);
668
+ break;
669
+ case "callout":
670
+ if (b.text) parts.push(b.text);
671
+ break;
672
+ }
673
+ }
674
+ return parts.join("\n\n").trim();
675
+ }
575
676
  function vertexHost2(location) {
576
677
  return location === "global" ? "aiplatform.googleapis.com" : `${location}-aiplatform.googleapis.com`;
577
678
  }
@@ -914,12 +1015,23 @@ function createAgentCustomRoutes(ctx) {
914
1015
  const aiSettings = await persistence.getAiSettings();
915
1016
  const effectiveProjectId = aiSettings.gcpProjectId ?? vertex.projectId;
916
1017
  let chatSessionId;
1018
+ let priorMessages = [];
917
1019
  if (incomingChatSessionId !== null) {
918
1020
  const owned = await persistence.getSession(incomingChatSessionId, userId);
919
1021
  if (!owned) {
920
1022
  return jsonError(404, "NOT_FOUND", "Chat session not found.");
921
1023
  }
922
1024
  chatSessionId = owned.id;
1025
+ const stored = await persistence.listMessagesForSession(chatSessionId, userId);
1026
+ priorMessages = historyToNormalizedMessages(stored);
1027
+ if (priorMessages.length > 0) {
1028
+ const last = priorMessages[priorMessages.length - 1];
1029
+ if (last.role === "assistant" && last.toolCalls.length === 0) {
1030
+ priorMessages[priorMessages.length - 1] = { ...last, cached: true };
1031
+ } else if (last.role === "user") {
1032
+ priorMessages[priorMessages.length - 1] = { ...last, cached: true };
1033
+ }
1034
+ }
923
1035
  } else {
924
1036
  const created = await persistence.createSession({
925
1037
  userId,
@@ -1007,6 +1119,7 @@ data: ${JSON.stringify(data)}
1007
1119
  send("meta", { chatSessionId, scopeLabel });
1008
1120
  const agentResult = await runAgent({
1009
1121
  question,
1122
+ priorMessages,
1010
1123
  ctx: toolContext,
1011
1124
  tools: tools.tools,
1012
1125
  systemBlocks,
@@ -1225,7 +1338,17 @@ function createAgentVercelRoutes(ctx) {
1225
1338
  if (short) return short;
1226
1339
  }
1227
1340
  const body = await req.json().catch(() => null);
1228
- const question = typeof body?.question === "string" ? body.question.trim() : "";
1341
+ let question = typeof body?.question === "string" ? body.question.trim() : "";
1342
+ if (!question && Array.isArray(body?.messages)) {
1343
+ const msgs = body.messages;
1344
+ for (let i = msgs.length - 1; i >= 0; i -= 1) {
1345
+ const m = msgs[i];
1346
+ if (m && m.role === "user" && typeof m.content === "string") {
1347
+ question = m.content.trim();
1348
+ break;
1349
+ }
1350
+ }
1351
+ }
1229
1352
  if (!question) {
1230
1353
  return jsonError2(
1231
1354
  400,
@@ -1240,6 +1363,7 @@ function createAgentVercelRoutes(ctx) {
1240
1363
  const aiSettings = await persistence.getAiSettings();
1241
1364
  const effectiveProjectId = aiSettings.gcpProjectId ?? vertex.projectId;
1242
1365
  let chatSessionId;
1366
+ let priorMessages = [];
1243
1367
  if (incomingChatSessionId !== null) {
1244
1368
  const owned = await persistence.getSession(
1245
1369
  incomingChatSessionId,
@@ -1249,6 +1373,11 @@ function createAgentVercelRoutes(ctx) {
1249
1373
  return jsonError2(404, "NOT_FOUND", "Chat session not found.");
1250
1374
  }
1251
1375
  chatSessionId = owned.id;
1376
+ const stored = await persistence.listMessagesForSession(
1377
+ chatSessionId,
1378
+ userId
1379
+ );
1380
+ priorMessages = historyToNormalizedMessages(stored);
1252
1381
  } else {
1253
1382
  const created = await persistence.createSession({
1254
1383
  userId,
@@ -1318,10 +1447,16 @@ function createAgentVercelRoutes(ctx) {
1318
1447
  location: aiSettings.gcpLocation,
1319
1448
  googleAuthOptions: {}
1320
1449
  })(vertex.modelIds.gemini);
1450
+ const priorCoreMessages = priorMessages.filter(
1451
+ (m) => m.role === "user" || m.role === "assistant"
1452
+ ).map((m) => ({ role: m.role, content: m.text }));
1321
1453
  const result = streamText({
1322
1454
  model,
1323
1455
  system,
1324
- messages: [{ role: "user", content: question }],
1456
+ messages: [
1457
+ ...priorCoreMessages,
1458
+ { role: "user", content: question }
1459
+ ],
1325
1460
  tools: vercelTools,
1326
1461
  maxSteps: 12,
1327
1462
  maxTokens: aiSettings.maxOutputTokens,