@ekairos/thread 1.21.88-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +363 -0
  2. package/dist/codex.d.ts +95 -0
  3. package/dist/codex.js +91 -0
  4. package/dist/env.d.ts +12 -0
  5. package/dist/env.js +62 -0
  6. package/dist/events.d.ts +35 -0
  7. package/dist/events.js +102 -0
  8. package/dist/index.d.ts +9 -0
  9. package/dist/index.js +9 -0
  10. package/dist/mcp.d.ts +1 -0
  11. package/dist/mcp.js +1 -0
  12. package/dist/mirror.d.ts +41 -0
  13. package/dist/mirror.js +1 -0
  14. package/dist/oidc.d.ts +7 -0
  15. package/dist/oidc.js +25 -0
  16. package/dist/polyfills/dom-events.d.ts +1 -0
  17. package/dist/polyfills/dom-events.js +89 -0
  18. package/dist/react.d.ts +62 -0
  19. package/dist/react.js +101 -0
  20. package/dist/runtime.d.ts +17 -0
  21. package/dist/runtime.js +23 -0
  22. package/dist/runtime.step.d.ts +9 -0
  23. package/dist/runtime.step.js +7 -0
  24. package/dist/schema.d.ts +2 -0
  25. package/dist/schema.js +200 -0
  26. package/dist/steps/do-story-stream-step.d.ts +29 -0
  27. package/dist/steps/do-story-stream-step.js +89 -0
  28. package/dist/steps/do-thread-stream-step.d.ts +29 -0
  29. package/dist/steps/do-thread-stream-step.js +90 -0
  30. package/dist/steps/mirror.steps.d.ts +6 -0
  31. package/dist/steps/mirror.steps.js +48 -0
  32. package/dist/steps/reaction.steps.d.ts +43 -0
  33. package/dist/steps/reaction.steps.js +354 -0
  34. package/dist/steps/store.steps.d.ts +98 -0
  35. package/dist/steps/store.steps.js +512 -0
  36. package/dist/steps/stream.steps.d.ts +41 -0
  37. package/dist/steps/stream.steps.js +99 -0
  38. package/dist/steps/trace.steps.d.ts +37 -0
  39. package/dist/steps/trace.steps.js +265 -0
  40. package/dist/stores/instant.document-parser.d.ts +6 -0
  41. package/dist/stores/instant.document-parser.js +210 -0
  42. package/dist/stores/instant.documents.d.ts +16 -0
  43. package/dist/stores/instant.documents.js +152 -0
  44. package/dist/stores/instant.store.d.ts +78 -0
  45. package/dist/stores/instant.store.js +530 -0
  46. package/dist/story.actions.d.ts +60 -0
  47. package/dist/story.actions.js +120 -0
  48. package/dist/story.builder.d.ts +115 -0
  49. package/dist/story.builder.js +130 -0
  50. package/dist/story.config.d.ts +54 -0
  51. package/dist/story.config.js +125 -0
  52. package/dist/story.d.ts +2 -0
  53. package/dist/story.engine.d.ts +224 -0
  54. package/dist/story.engine.js +464 -0
  55. package/dist/story.hooks.d.ts +21 -0
  56. package/dist/story.hooks.js +31 -0
  57. package/dist/story.js +6 -0
  58. package/dist/story.registry.d.ts +21 -0
  59. package/dist/story.registry.js +30 -0
  60. package/dist/story.store.d.ts +107 -0
  61. package/dist/story.store.js +1 -0
  62. package/dist/story.toolcalls.d.ts +60 -0
  63. package/dist/story.toolcalls.js +73 -0
  64. package/dist/thread.builder.d.ts +118 -0
  65. package/dist/thread.builder.js +134 -0
  66. package/dist/thread.config.d.ts +15 -0
  67. package/dist/thread.config.js +30 -0
  68. package/dist/thread.d.ts +3 -0
  69. package/dist/thread.engine.d.ts +229 -0
  70. package/dist/thread.engine.js +471 -0
  71. package/dist/thread.events.d.ts +35 -0
  72. package/dist/thread.events.js +105 -0
  73. package/dist/thread.hooks.d.ts +21 -0
  74. package/dist/thread.hooks.js +31 -0
  75. package/dist/thread.js +7 -0
  76. package/dist/thread.reactor.d.ts +82 -0
  77. package/dist/thread.reactor.js +65 -0
  78. package/dist/thread.registry.d.ts +21 -0
  79. package/dist/thread.registry.js +30 -0
  80. package/dist/thread.store.d.ts +121 -0
  81. package/dist/thread.store.js +1 -0
  82. package/dist/thread.toolcalls.d.ts +60 -0
  83. package/dist/thread.toolcalls.js +73 -0
  84. package/dist/tools-to-model-tools.d.ts +19 -0
  85. package/dist/tools-to-model-tools.js +21 -0
  86. package/package.json +133 -0
@@ -0,0 +1,265 @@
1
+ import "../polyfills/dom-events.js";
2
+ import { lookup } from "@instantdb/admin";
3
+ function requireBaseUrl() {
4
+ const baseUrl = process.env.EKAIROS_CORE_BASE_URL ||
5
+ process.env.EKAIROS_TRACES_BASE_URL ||
6
+ process.env.EKAIROS_BASE_URL;
7
+ if (!baseUrl) {
8
+ throw new Error("[thread/trace] Missing EKAIROS_CORE_BASE_URL (or EKAIROS_TRACES_BASE_URL)");
9
+ }
10
+ return baseUrl.replace(/\/$/, "");
11
+ }
12
+ function requireToken() {
13
+ // Preferred: Clerk org API key (opaque token) for ekairos-core.
14
+ const apiKey = process.env.EKAIROS_CLERK_API_KEY;
15
+ if (apiKey)
16
+ return apiKey;
17
+ throw new Error("[thread/trace] Missing EKAIROS_CLERK_API_KEY");
18
+ }
19
+ let jwtCache = null;
20
+ function parseJwtExpMs(token) {
21
+ const parts = token.split(".");
22
+ if (parts.length !== 3)
23
+ return null;
24
+ try {
25
+ const payload = JSON.parse(Buffer.from(parts[1].replace(/-/g, "+").replace(/_/g, "/"), "base64").toString("utf-8"));
26
+ const exp = typeof payload?.exp === "number" ? payload.exp : null;
27
+ return exp ? exp * 1000 : null;
28
+ }
29
+ catch {
30
+ return null;
31
+ }
32
+ }
33
+ async function getTraceAuthHeader(baseUrl, projectId) {
34
+ const apiKey = requireToken();
35
+ const now = Date.now();
36
+ if (jwtCache && jwtCache.expMs - 60000 > now) {
37
+ return `Bearer ${jwtCache.token}`;
38
+ }
39
+ try {
40
+ const res = await fetch(`${baseUrl}/api/thread/traces/auth`, {
41
+ method: "POST",
42
+ headers: {
43
+ "content-type": "application/json",
44
+ authorization: `Bearer ${apiKey}`,
45
+ },
46
+ body: JSON.stringify({ projectId }),
47
+ });
48
+ if (res.ok) {
49
+ const json = (await res.json());
50
+ const token = typeof json?.token === "string" ? json.token : "";
51
+ const expMs = parseJwtExpMs(token) ?? now + 60 * 60 * 1000;
52
+ if (token) {
53
+ jwtCache = { token, expMs };
54
+ return `Bearer ${token}`;
55
+ }
56
+ }
57
+ }
58
+ catch {
59
+ // fall back to API key below
60
+ }
61
+ return `Bearer ${apiKey}`;
62
+ }
63
+ async function readProjectId() {
64
+ const { getRuntimeProjectId } = await import("@ekairos/domain/runtime");
65
+ const fromConfig = String(getRuntimeProjectId() || "").trim();
66
+ if (fromConfig)
67
+ return fromConfig;
68
+ const fallback = typeof process !== "undefined" && process.env
69
+ ? String(process.env.EKAIROS_PROJECT_ID || "").trim()
70
+ : "";
71
+ return fallback;
72
+ }
73
+ export async function writeThreadTraceEvents(params) {
74
+ if (!params.events?.length)
75
+ return;
76
+ const envTrace = params.env?.traces;
77
+ // Tracing must NEVER break workflows by default.
78
+ // Use EKAIROS_TRACES_STRICT=1 if you want to fail hard.
79
+ const strict = envTrace?.strict === true || process.env.EKAIROS_TRACES_STRICT === "1";
80
+ // 1) Local trace persistence (InstantDB source of truth).
81
+ try {
82
+ const { getThreadRuntime } = await import("@ekairos/thread/runtime");
83
+ const runtime = await getThreadRuntime(params.env);
84
+ const db = runtime?.db;
85
+ if (db) {
86
+ const now = new Date();
87
+ const orgId = typeof params.env?.orgId === "string"
88
+ ? String(params.env.orgId)
89
+ : "";
90
+ const projectId = await readProjectId();
91
+ const byRun = new Map();
92
+ for (const ev of params.events) {
93
+ const runId = String(ev.workflowRunId || "");
94
+ if (!runId)
95
+ continue;
96
+ if (!byRun.has(runId))
97
+ byRun.set(runId, []);
98
+ byRun.get(runId).push(ev);
99
+ }
100
+ const seqByRun = new Map();
101
+ const existingCountByRun = new Map();
102
+ for (const [runId] of byRun) {
103
+ let existingCount = 0;
104
+ try {
105
+ const q = await db.query({
106
+ thread_trace_runs: {
107
+ $: { where: { workflowRunId: runId }, limit: 1 },
108
+ },
109
+ });
110
+ const row = q?.thread_trace_runs?.[0];
111
+ existingCount = Number(row?.eventsCount ?? 0) || 0;
112
+ }
113
+ catch {
114
+ // ignore
115
+ }
116
+ existingCountByRun.set(runId, existingCount);
117
+ seqByRun.set(runId, existingCount);
118
+ }
119
+ const txs = [];
120
+ const spanTxs = [];
121
+ for (const ev of params.events) {
122
+ const runId = String(ev.workflowRunId || "");
123
+ if (!runId)
124
+ continue;
125
+ const key = `${runId}:${String(ev.eventId || "")}`;
126
+ if (!key.includes(":"))
127
+ continue;
128
+ const eventAt = typeof ev.eventAt === "string" && ev.eventAt
129
+ ? new Date(ev.eventAt)
130
+ : undefined;
131
+ let seq = Number.isFinite(Number(ev.seq)) ? Number(ev.seq) : undefined;
132
+ if (typeof seq !== "number") {
133
+ const current = seqByRun.get(runId) ?? 0;
134
+ const next = current + 1;
135
+ seqByRun.set(runId, next);
136
+ seq = next;
137
+ }
138
+ ev.seq = seq;
139
+ txs.push(db.tx.thread_trace_events[lookup("key", key)].update({
140
+ key,
141
+ workflowRunId: runId,
142
+ seq,
143
+ eventId: String(ev.eventId || ""),
144
+ eventKind: String(ev.eventKind || ""),
145
+ eventAt: eventAt ?? undefined,
146
+ ingestedAt: now,
147
+ orgId: orgId || undefined,
148
+ projectId: projectId || undefined,
149
+ contextKey: ev.contextKey,
150
+ spanId: ev.spanId,
151
+ parentSpanId: ev.parentSpanId,
152
+ contextId: ev.contextId,
153
+ executionId: ev.executionId,
154
+ stepId: ev.stepId,
155
+ contextEventId: ev.contextEventId,
156
+ toolCallId: ev.toolCallId,
157
+ partKey: ev.partKey,
158
+ partIdx: ev.partIdx,
159
+ isDeleted: ev.isDeleted === true,
160
+ aiProvider: ev.aiProvider,
161
+ aiModel: ev.aiModel,
162
+ promptTokens: ev.promptTokens,
163
+ promptTokensCached: ev.promptTokensCached,
164
+ promptTokensUncached: ev.promptTokensUncached,
165
+ completionTokens: ev.completionTokens,
166
+ totalTokens: ev.totalTokens,
167
+ latencyMs: ev.latencyMs,
168
+ cacheCostUsd: ev.cacheCostUsd,
169
+ computeCostUsd: ev.computeCostUsd,
170
+ costUsd: ev.costUsd,
171
+ payload: ev.payload,
172
+ }));
173
+ if (ev.eventKind === "thread.step" || ev.eventKind === "workflow.step") {
174
+ const spanId = String(ev.stepId || ev.eventId || key);
175
+ spanTxs.push(db.tx.thread_trace_spans[lookup("spanId", spanId)].update({
176
+ spanId,
177
+ parentSpanId: ev.parentSpanId,
178
+ workflowRunId: runId,
179
+ executionId: ev.executionId,
180
+ stepId: ev.stepId,
181
+ kind: ev.eventKind,
182
+ name: ev.eventKind,
183
+ status: "completed",
184
+ startedAt: eventAt ?? now,
185
+ endedAt: eventAt ?? now,
186
+ durationMs: 0,
187
+ payload: ev.payload,
188
+ }));
189
+ }
190
+ }
191
+ if (txs.length) {
192
+ await db.transact(txs);
193
+ }
194
+ if (spanTxs.length) {
195
+ await db.transact(spanTxs);
196
+ }
197
+ for (const [runId, events] of byRun) {
198
+ const eventDates = events
199
+ .map((e) => typeof e.eventAt === "string" && e.eventAt
200
+ ? new Date(e.eventAt)
201
+ : now)
202
+ .filter((d) => !Number.isNaN(d.getTime()));
203
+ const firstEventAt = eventDates.length
204
+ ? new Date(Math.min(...eventDates.map((d) => d.getTime())))
205
+ : now;
206
+ const lastEventAt = eventDates.length
207
+ ? new Date(Math.max(...eventDates.map((d) => d.getTime())))
208
+ : now;
209
+ const existingCount = existingCountByRun.get(runId) ?? 0;
210
+ await db.transact([
211
+ db.tx.thread_trace_runs[lookup("workflowRunId", runId)].update({
212
+ workflowRunId: runId,
213
+ orgId: orgId || undefined,
214
+ projectId: projectId || undefined,
215
+ firstEventAt,
216
+ lastEventAt,
217
+ lastIngestedAt: now,
218
+ eventsCount: existingCount + events.length,
219
+ }),
220
+ ]);
221
+ }
222
+ }
223
+ }
224
+ catch (e) {
225
+ if (strict)
226
+ throw e;
227
+ }
228
+ let baseUrl = "";
229
+ try {
230
+ baseUrl = envTrace?.baseUrl ? String(envTrace.baseUrl).replace(/\/$/, "") : requireBaseUrl();
231
+ }
232
+ catch (e) {
233
+ if (strict)
234
+ throw e;
235
+ return;
236
+ }
237
+ const projectId = envTrace?.projectId ? String(envTrace.projectId).trim() : await readProjectId();
238
+ if (!projectId) {
239
+ if (strict)
240
+ throw new Error("[thread/trace] Missing projectId (ekairosConfig or EKAIROS_PROJECT_ID)");
241
+ return;
242
+ }
243
+ const authHeader = envTrace?.apiKey
244
+ ? `Bearer ${String(envTrace.apiKey).trim()}`
245
+ : await getTraceAuthHeader(baseUrl, projectId);
246
+ const res = await fetch(`${baseUrl}/api/thread/traces/ingest`, {
247
+ method: "POST",
248
+ headers: {
249
+ "content-type": "application/json",
250
+ authorization: authHeader,
251
+ },
252
+ body: JSON.stringify({ projectId, events: params.events }),
253
+ });
254
+ if (!res.ok) {
255
+ const text = await res.text().catch(() => "");
256
+ if (strict) {
257
+ throw new Error(`[thread/trace] ekairos-core ingest failed (${res.status}): ${text}`);
258
+ }
259
+ if (process.env.PLAYWRIGHT_TEST === "1") {
260
+ // eslint-disable-next-line no-console
261
+ console.warn(`[thread/trace] ingest failed (${res.status}): ${text}`);
262
+ }
263
+ return;
264
+ }
265
+ }
@@ -0,0 +1,6 @@
1
+ import "../polyfills/dom-events.js";
2
+ /**
3
+ * Parses a document with LlamaParse and stores it in InstantDB (document_documents + link to file).
4
+ * Returns the created documentId.
5
+ */
6
+ export declare function parseAndStoreDocument(db: any, buffer: Buffer, fileName: string, fileId: string): Promise<string>;
@@ -0,0 +1,210 @@
1
+ import "../polyfills/dom-events.js";
2
+ import { id } from "@instantdb/admin";
3
+ const LLAMA_CLOUD_BASE_URL = "https://api.cloud.llamaindex.ai/api/v1";
4
+ function safeErrorJson(error) {
5
+ const seen = new WeakSet();
6
+ const redactKey = (k) => /token|authorization|cookie|secret|api[_-]?key|password/i.test(k);
7
+ const err = error;
8
+ const payload = {
9
+ name: err?.name,
10
+ message: err?.message,
11
+ status: err?.status,
12
+ body: err?.body,
13
+ data: err?.data,
14
+ stack: err?.stack,
15
+ };
16
+ try {
17
+ return JSON.stringify(payload, (k, v) => {
18
+ if (redactKey(k))
19
+ return "[redacted]";
20
+ if (typeof v === "string" && v.length > 5000)
21
+ return "[truncated-string]";
22
+ if (typeof v === "object" && v !== null) {
23
+ if (seen.has(v))
24
+ return "[circular]";
25
+ seen.add(v);
26
+ }
27
+ return v;
28
+ });
29
+ }
30
+ catch {
31
+ return JSON.stringify({ message: String(err?.message ?? "error") });
32
+ }
33
+ }
34
+ async function probeInstantDocumentSchema(db) {
35
+ // Best-effort probes to pinpoint missing schema pieces WITHOUT logging dynamic payloads.
36
+ // Each line is a static string.
37
+ try {
38
+ await db.query({ document_documents: { $: { limit: 1 } } });
39
+ }
40
+ catch {
41
+ console.error("Instant schema probe: document_documents entity query failed");
42
+ }
43
+ try {
44
+ await db.query({ document_documents: { $: { limit: 1 }, file: {} } });
45
+ }
46
+ catch {
47
+ console.error("Instant schema probe: document_documents.file link query failed");
48
+ }
49
+ try {
50
+ await db.query({ $files: { $: { limit: 1 } } });
51
+ }
52
+ catch {
53
+ console.error("Instant schema probe: $files entity query failed");
54
+ }
55
+ try {
56
+ await db.query({ $files: { $: { limit: 1 }, document: {} } });
57
+ }
58
+ catch {
59
+ console.error("Instant schema probe: $files.document link query failed");
60
+ }
61
+ }
62
+ async function uploadToLlamaCloud(buffer, fileName) {
63
+ const formData = new FormData();
64
+ const uint8Array = new Uint8Array(buffer);
65
+ const blob = new Blob([uint8Array], { type: "application/pdf" });
66
+ formData.append("file", blob, fileName);
67
+ formData.append("parse_mode", "parse_page_with_llm");
68
+ formData.append("high_res_ocr", "true");
69
+ formData.append("adaptive_long_table", "true");
70
+ formData.append("outlined_table_extraction", "true");
71
+ formData.append("output_tables_as_HTML", "true");
72
+ let response;
73
+ try {
74
+ response = await fetch(`${LLAMA_CLOUD_BASE_URL}/parsing/upload`, {
75
+ method: "POST",
76
+ headers: {
77
+ Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
78
+ },
79
+ body: formData,
80
+ });
81
+ }
82
+ catch (error) {
83
+ console.error("LlamaCloud: upload fetch threw", safeErrorJson(error));
84
+ throw error;
85
+ }
86
+ if (!response.ok) {
87
+ const errorText = await response.text();
88
+ throw new Error(`LlamaCloud upload failed: ${response.status} ${errorText}`);
89
+ }
90
+ const result = (await response.json());
91
+ return result.id;
92
+ }
93
+ async function getJobStatus(jobId) {
94
+ let response;
95
+ try {
96
+ response = await fetch(`${LLAMA_CLOUD_BASE_URL}/parsing/job/${jobId}`, {
97
+ method: "GET",
98
+ headers: {
99
+ Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
100
+ },
101
+ });
102
+ }
103
+ catch (error) {
104
+ console.error("LlamaCloud: status fetch threw", safeErrorJson(error));
105
+ throw error;
106
+ }
107
+ if (!response.ok) {
108
+ const errorText = await response.text();
109
+ throw new Error(`LlamaCloud status fetch failed: ${response.status} ${errorText}`);
110
+ }
111
+ return (await response.json());
112
+ }
113
+ async function getParseResult(jobId) {
114
+ let response;
115
+ try {
116
+ response = await fetch(`${LLAMA_CLOUD_BASE_URL}/parsing/job/${jobId}/result/markdown`, {
117
+ method: "GET",
118
+ headers: {
119
+ Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
120
+ },
121
+ });
122
+ }
123
+ catch (error) {
124
+ console.error("LlamaCloud: result fetch threw", safeErrorJson(error));
125
+ throw error;
126
+ }
127
+ if (!response.ok) {
128
+ const errorText = await response.text();
129
+ throw new Error(`LlamaCloud result fetch failed: ${response.status} ${errorText}`);
130
+ }
131
+ return (await response.json());
132
+ }
133
+ async function waitForProcessing(jobId, maxAttempts = 60) {
134
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
135
+ const statusResponse = await getJobStatus(jobId);
136
+ if (statusResponse.status === "SUCCESS" || statusResponse.status === "COMPLETED") {
137
+ return await getParseResult(jobId);
138
+ }
139
+ if (statusResponse.status === "ERROR" || statusResponse.status === "FAILED") {
140
+ throw new Error(`LlamaCloud processing failed with status: ${statusResponse.status}`);
141
+ }
142
+ await new Promise((resolve) => setTimeout(resolve, 2000));
143
+ }
144
+ throw new Error("LlamaCloud processing timeout");
145
+ }
146
+ /**
147
+ * Parses a document with LlamaParse and stores it in InstantDB (document_documents + link to file).
148
+ * Returns the created documentId.
149
+ */
150
+ export async function parseAndStoreDocument(db, buffer, fileName, fileId) {
151
+ let existingDocument;
152
+ try {
153
+ existingDocument = await db.query({
154
+ document_documents: {
155
+ $: {
156
+ where: { "file.id": fileId },
157
+ },
158
+ file: {},
159
+ },
160
+ });
161
+ }
162
+ catch (error) {
163
+ console.error("parseAndStoreDocument: query existing failed", safeErrorJson(error));
164
+ throw error;
165
+ }
166
+ if (existingDocument.document_documents && existingDocument.document_documents.length > 0) {
167
+ return existingDocument.document_documents[0].id;
168
+ }
169
+ const jobId = await uploadToLlamaCloud(buffer, fileName);
170
+ const result = await waitForProcessing(jobId);
171
+ const pages = [];
172
+ if (result.markdown) {
173
+ pages.push({
174
+ id: id(),
175
+ text: result.markdown,
176
+ });
177
+ }
178
+ if (result.pages && result.pages.length > 0) {
179
+ for (const page of result.pages) {
180
+ pages.push({
181
+ id: id(),
182
+ text: page.text,
183
+ });
184
+ }
185
+ }
186
+ if (pages.length === 0) {
187
+ throw new Error("No content extracted from document");
188
+ }
189
+ const documentId = id();
190
+ try {
191
+ await db.transact([
192
+ db.tx.document_documents[documentId].update({
193
+ content: { pages },
194
+ name: fileName,
195
+ mimeType: "application/pdf",
196
+ createdAt: new Date(),
197
+ }),
198
+ db.tx.document_documents[documentId].link({
199
+ file: fileId,
200
+ }),
201
+ ]);
202
+ }
203
+ catch (error) {
204
+ console.error("parseAndStoreDocument: transact failed", safeErrorJson(error));
205
+ // Diagnose missing schema attributes/links (static logs only).
206
+ await probeInstantDocumentSchema(db);
207
+ throw error;
208
+ }
209
+ return documentId;
210
+ }
@@ -0,0 +1,16 @@
1
+ import type { ThreadItem } from "../thread.store.js";
2
+ export declare function coerceDocumentTextPages(documentRecord: any, opts?: {
3
+ pageLabelPrefix?: string;
4
+ }): string;
5
+ export declare function expandEventsWithInstantDocuments(params: {
6
+ db: any;
7
+ events: ThreadItem[];
8
+ /**
9
+ * Hard limit to avoid huge model inputs. Defaults to 120k chars of extracted text.
10
+ */
11
+ maxChars?: number;
12
+ /**
13
+ * Event type used for derived document text. Defaults to "document.parsed".
14
+ */
15
+ derivedEventType?: string;
16
+ }): Promise<ThreadItem[]>;
@@ -0,0 +1,152 @@
1
+ import { parseAndStoreDocument } from "./instant.document-parser.js";
2
+ function isFilePart(part) {
3
+ return Boolean(part &&
4
+ typeof part === "object" &&
5
+ (part.type === "file" || part?.providerMetadata?.instant));
6
+ }
7
+ function formatAttachmentSummary(part) {
8
+ const instant = part?.providerMetadata?.instant ?? {};
9
+ const fileId = typeof instant?.fileId === "string" ? instant.fileId : "";
10
+ const filename = typeof part?.filename === "string" ? part.filename : "";
11
+ const mediaType = typeof part?.mediaType === "string" ? part.mediaType : "";
12
+ // Keep it compact; no URLs (can be signed/sensitive).
13
+ return `fileId="${fileId}" filename="${filename}" mediaType="${mediaType}"`;
14
+ }
15
+ export function coerceDocumentTextPages(documentRecord, opts) {
16
+ const pages = documentRecord?.content?.pages;
17
+ if (!Array.isArray(pages) || pages.length === 0)
18
+ return "";
19
+ const prefix = opts?.pageLabelPrefix ?? "Page";
20
+ return pages
21
+ .map((p, idx) => {
22
+ const text = typeof p?.text === "string" ? p.text : "";
23
+ return `\n\n--- ${prefix} ${idx + 1} ---\n\n${text}`;
24
+ })
25
+ .join("");
26
+ }
27
+ async function resolveInstantFileRecord(db, params) {
28
+ const fileId = params.fileId ? String(params.fileId) : null;
29
+ const filePath = params.path ? String(params.path) : null;
30
+ if (!fileId && !filePath)
31
+ return null;
32
+ if (fileId) {
33
+ const q = await db.query({
34
+ $files: { $: { where: { id: fileId }, limit: 1 }, document: {} },
35
+ });
36
+ return q?.$files?.[0] ?? null;
37
+ }
38
+ const q = await db.query({
39
+ $files: { $: { where: { path: filePath }, limit: 1 }, document: {} },
40
+ });
41
+ return q?.$files?.[0] ?? null;
42
+ }
43
+ async function ensureDocumentParsedForFile(db, params) {
44
+ const fileRecord = params.fileRecord;
45
+ const part = params.part;
46
+ let documentRecord = Array.isArray(fileRecord?.document)
47
+ ? fileRecord.document?.[0]
48
+ : fileRecord.document;
49
+ if (documentRecord?.id)
50
+ return documentRecord;
51
+ const fileUrl = typeof fileRecord?.url === "string" ? fileRecord.url : "";
52
+ if (!fileUrl.startsWith("http://") && !fileUrl.startsWith("https://")) {
53
+ return null;
54
+ }
55
+ const resp = await fetch(fileUrl);
56
+ if (!resp.ok)
57
+ throw new Error(`Failed to fetch file for parsing: HTTP ${resp.status}`);
58
+ const buffer = Buffer.from(await resp.arrayBuffer());
59
+ const name = (typeof part?.filename === "string" && part.filename) ||
60
+ (typeof fileRecord?.path === "string" && fileRecord.path) ||
61
+ "file";
62
+ // NOTE: Do not invent fallback paths. If the file doesn't have a stable `path`,
63
+ // we don't fabricate one.
64
+ const path = typeof fileRecord?.path === "string" ? fileRecord.path : undefined;
65
+ const documentId = await parseAndStoreDocument(db, buffer, name, String(fileRecord.id));
66
+ const dq = await db.query({
67
+ document_documents: { $: { where: { id: documentId }, limit: 1 }, file: {} },
68
+ });
69
+ documentRecord = dq?.document_documents?.[0] ?? null;
70
+ return documentRecord;
71
+ }
72
+ export async function expandEventsWithInstantDocuments(params) {
73
+ const db = params.db;
74
+ const maxChars = typeof params.maxChars === "number" ? params.maxChars : 120000;
75
+ const derivedEventType = params.derivedEventType ?? "document.parsed";
76
+ const out = [];
77
+ for (const event of params.events) {
78
+ const parts = event?.content?.parts;
79
+ if (!Array.isArray(parts) || parts.length === 0) {
80
+ out.push(event);
81
+ continue;
82
+ }
83
+ const hadFileParts = parts.some((p) => isFilePart(p));
84
+ if (hadFileParts) {
85
+ // Do not forward file parts to the model (gateways may not support some media types).
86
+ // The derived `document.parsed` event contains the extracted text.
87
+ const filtered = parts.filter((p) => !isFilePart(p));
88
+ const attachmentSummaries = parts
89
+ .filter((p) => isFilePart(p))
90
+ .map((p) => formatAttachmentSummary(p))
91
+ .join("\n");
92
+ const attachmentInfoText = attachmentSummaries
93
+ ? `Attachment info:\n${attachmentSummaries}`
94
+ : "Attachment info: (unavailable)";
95
+ const sanitized = {
96
+ ...event,
97
+ content: {
98
+ ...event?.content,
99
+ parts: [
100
+ ...filtered,
101
+ {
102
+ type: "text",
103
+ text: "[Attachment omitted from model input. Parsed content will follow in a document.parsed event.]\n" +
104
+ attachmentInfoText,
105
+ },
106
+ ],
107
+ },
108
+ };
109
+ out.push(sanitized);
110
+ }
111
+ else {
112
+ out.push(event);
113
+ }
114
+ for (const part of parts) {
115
+ if (!isFilePart(part))
116
+ continue;
117
+ const instantMeta = part?.providerMetadata?.instant ?? {};
118
+ const fileId = instantMeta?.fileId ? String(instantMeta.fileId) : undefined;
119
+ const filePath = instantMeta?.path ? String(instantMeta.path) : undefined;
120
+ const fileRecord = await resolveInstantFileRecord(db, { fileId, path: filePath });
121
+ if (!fileRecord?.id)
122
+ continue;
123
+ const documentRecord = await ensureDocumentParsedForFile(db, { fileRecord, part });
124
+ const pageText = coerceDocumentTextPages(documentRecord);
125
+ if (!pageText)
126
+ continue;
127
+ const clipped = pageText.length > maxChars
128
+ ? `${pageText.slice(0, maxChars)}\n\n[truncated: maxChars=${maxChars}]`
129
+ : pageText;
130
+ const derivedAttachmentInfo = `Attachment info:\n${formatAttachmentSummary(part)}`;
131
+ const derived = {
132
+ id: `derived:${event.id}:${String(fileRecord.id)}`,
133
+ type: derivedEventType,
134
+ channel: event.channel ?? "web",
135
+ createdAt: new Date().toISOString(),
136
+ content: {
137
+ parts: [
138
+ {
139
+ type: "text",
140
+ text: "Parsed document available.\n" +
141
+ derivedAttachmentInfo +
142
+ "\nProvider: llamacloud",
143
+ },
144
+ { type: "text", text: `Document transcription:${clipped}` },
145
+ ],
146
+ },
147
+ };
148
+ out.push(derived);
149
+ }
150
+ }
151
+ return out;
152
+ }