@exulu/backend 1.53.1 → 1.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,121 @@
1
+ import { generateText, stepCountIs } from "ai";
2
+ import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
3
+ import { withRetry } from "@SRC/utils/with-retry";
4
+ import { harvestChunks } from "./tools";
5
+ import type { AgenticRetrievalOutput, ChunkResult } from "./types";
6
+
7
+ const MAX_STEPS = 10;
8
+
9
+ /**
10
+ * Observe → Infer → Act loop for V4 agentic retrieval.
11
+ *
12
+ * Unlike V3 (which pre-classifies, routes to strategies, and forces tool calls),
13
+ * this loop simply:
14
+ * 1. Calls the model with toolChoice "auto"
15
+ * 2. Executes whatever tools the model picks
16
+ * 3. Harvests any chunk-shaped rows from query results
17
+ * 4. Repeats until the model produces a text response (no tool calls) or
18
+ * the MAX_STEPS budget is exhausted
19
+ *
20
+ * The model decides when it has enough information — no finish_retrieval tool needed.
21
+ */
22
+ export async function* runAgentLoop(params: {
23
+ query: string;
24
+ systemPrompt: string;
25
+ tools: Record<string, AITool>;
26
+ model: LanguageModel;
27
+ onStepComplete?: (step: AgenticRetrievalOutput["steps"][0]) => void;
28
+ }): AsyncGenerator<AgenticRetrievalOutput> {
29
+ const { query, systemPrompt, tools, model, onStepComplete } = params;
30
+
31
+ const output: AgenticRetrievalOutput = {
32
+ steps: [],
33
+ reasoning: [],
34
+ chunks: [],
35
+ usage: [],
36
+ totalTokens: 0,
37
+ };
38
+
39
+ // Deduplicate chunks by chunk_id across all steps
40
+ const seenChunkIds = new Set<string>();
41
+
42
+ const messages: ModelMessage[] = [{ role: "user", content: query }];
43
+
44
+ for (let step = 0; step < MAX_STEPS; step++) {
45
+ console.log(`[EXULU] v4 agent loop — step ${step + 1}/${MAX_STEPS}`);
46
+
47
+ let result: Awaited<ReturnType<typeof generateText>>;
48
+ try {
49
+ result = await withRetry(() =>
50
+ generateText({
51
+ model,
52
+ temperature: 0,
53
+ system: systemPrompt,
54
+ messages,
55
+ tools,
56
+ toolChoice: "auto",
57
+ stopWhen: stepCountIs(1),
58
+ }),
59
+ );
60
+ } catch (err) {
61
+ console.error("[EXULU] v4 generateText failed:", err);
62
+ throw err;
63
+ }
64
+
65
+ // Append assistant turn + tool results to conversation history
66
+ messages.push(...(result.response.messages as ModelMessage[]));
67
+
68
+ // Harvest chunks from any execute_query tool results
69
+ const rawToolResults = (result.toolResults as any[]) ?? [];
70
+ const stepChunks: ChunkResult[] = [];
71
+ for (const chunk of harvestChunks(rawToolResults)) {
72
+ if (!chunk.chunk_id || !seenChunkIds.has(chunk.chunk_id)) {
73
+ if (chunk.chunk_id) seenChunkIds.add(chunk.chunk_id);
74
+ stepChunks.push(chunk);
75
+ }
76
+ }
77
+
78
+ // Record step
79
+ const stepRecord: AgenticRetrievalOutput["steps"][0] = {
80
+ stepNumber: step + 1,
81
+ text: result.text ?? "",
82
+ toolCalls:
83
+ (result.toolCalls as any[])?.map((tc) => ({
84
+ name: tc.toolName,
85
+ id: tc.toolCallId,
86
+ input: tc.input,
87
+ })) ?? [],
88
+ chunks: stepChunks,
89
+ tokens: result.usage?.totalTokens ?? 0,
90
+ };
91
+
92
+ output.steps.push(stepRecord);
93
+ output.reasoning.push({
94
+ text: result.text ?? "",
95
+ tools:
96
+ (result.toolCalls as any[])?.map((tc) => ({
97
+ name: tc.toolName,
98
+ id: tc.toolCallId,
99
+ input: tc.input,
100
+ output: rawToolResults.find(
101
+ (r: any) => (r.toolCallId ?? r.id) === tc.toolCallId,
102
+ )?.output,
103
+ })) ?? [],
104
+ });
105
+ output.chunks.push(...stepChunks);
106
+ output.usage.push(result.usage);
107
+
108
+ onStepComplete?.(stepRecord);
109
+
110
+ yield { ...output };
111
+
112
+ // Stop when the model wrote a text response without calling any tools
113
+ const calledTools = (result.toolCalls as any[])?.length > 0;
114
+ if (!calledTools) {
115
+ console.log(`[EXULU] v4 — model finished after step ${step + 1} (no tool calls)`);
116
+ break;
117
+ }
118
+ }
119
+
120
+ output.totalTokens = output.usage.reduce((sum, u) => sum + (u?.totalTokens ?? 0), 0);
121
+ }
@@ -0,0 +1,76 @@
1
+ import type { ExuluContext } from "@SRC/exulu/context";
2
+ import type { User } from "@EXULU_TYPES/models/user";
3
+
4
+ /**
5
+ * Finds embed('text') or embed('text', 'contextId') calls in a SQL string,
6
+ * generates the embedding vectors using the appropriate context's embedder,
7
+ * and substitutes them with ARRAY[...]::vector literals so db.raw() can execute it.
8
+ *
9
+ * Examples:
10
+ * embed('machine learning') → uses first context that has an embedder
11
+ * embed('machine learning', 'ctx1') → uses the embedder from context 'ctx1'
12
+ */
13
+ export async function preprocessEmbedCalls(
14
+ sql: string,
15
+ contexts: ExuluContext[],
16
+ user?: User,
17
+ role?: string,
18
+ ): Promise<string> {
19
+ // Match embed('...') or embed('...', 'contextId')
20
+ // We use a global regex but process matches manually so we can await async calls
21
+ const EMBED_RE = /embed\('((?:[^'\\]|\\.)*)'\s*(?:,\s*'((?:[^'\\]|\\.)*)')?\)/gi;
22
+
23
+ const matches: { fullMatch: string; text: string; contextId?: string; index: number }[] = [];
24
+
25
+ let m: RegExpExecArray | null;
26
+ while ((m = EMBED_RE.exec(sql)) !== null) {
27
+ matches.push({
28
+ fullMatch: m[0],
29
+ text: m[1],
30
+ contextId: m[2] || undefined,
31
+ index: m.index,
32
+ });
33
+ }
34
+
35
+ if (matches.length === 0) return sql;
36
+
37
+ // Generate all embeddings in parallel
38
+ const substitutions = await Promise.all(
39
+ matches.map(async ({ text, contextId }) => {
40
+ const context = contextId
41
+ ? contexts.find((c) => c.id === contextId)
42
+ : contexts.find((c) => c.embedder != null);
43
+
44
+ if (!context?.embedder) {
45
+ throw new Error(
46
+ `No embedder available${contextId ? ` for context "${contextId}"` : ""}. ` +
47
+ `Available contexts with embedders: [${contexts.filter((c) => c.embedder).map((c) => c.id).join(", ")}]`,
48
+ );
49
+ }
50
+
51
+ const result = await context.embedder.generateFromQuery(
52
+ context.id,
53
+ text,
54
+ undefined,
55
+ (user as any)?.id,
56
+ role,
57
+ );
58
+
59
+ const vector = result?.chunks?.[0]?.vector;
60
+ if (!vector?.length) {
61
+ throw new Error(`Embedder returned no vector for text: "${text}"`);
62
+ }
63
+
64
+ return `ARRAY[${vector.join(",")}]::vector`;
65
+ }),
66
+ );
67
+
68
+ // Replace in reverse order so indices stay valid
69
+ let result = sql;
70
+ for (let i = matches.length - 1; i >= 0; i--) {
71
+ const { fullMatch, index } = matches[i];
72
+ result = result.slice(0, index) + substitutions[i] + result.slice(index + fullMatch.length);
73
+ }
74
+
75
+ return result;
76
+ }
@@ -0,0 +1,181 @@
1
+ import * as os from "os";
2
+ import * as path from "path";
3
+ import * as fs from "fs/promises";
4
+ import { z } from "zod";
5
+ import { randomUUID } from "crypto";
6
+ import type { LanguageModel } from "ai";
7
+ import type { ExuluContext } from "@SRC/exulu/context";
8
+ import type { ExuluReranker } from "@SRC/exulu/reranker";
9
+ import { ExuluTool } from "@SRC/exulu/tool";
10
+ import type { User } from "@EXULU_TYPES/models/user";
11
+ import { checkLicense } from "@EE/entitlements";
12
+ import { createTools } from "./tools";
13
+ import { buildSystemPrompt } from "./system-prompt";
14
+ import { runAgentLoop } from "./agent-loop";
15
+ import type { AgenticRetrievalOutput } from "./types";
16
+
17
+ async function* executeV4({
18
+ query,
19
+ contexts,
20
+ model,
21
+ user,
22
+ role,
23
+ customInstructions,
24
+ }: {
25
+ query: string;
26
+ contexts: ExuluContext[];
27
+ model: LanguageModel;
28
+ user?: User;
29
+ role?: string;
30
+ customInstructions?: string;
31
+ }): AsyncGenerator<AgenticRetrievalOutput> {
32
+ // Per-call temp directory — cleaned up after the loop finishes
33
+ const sessionId = randomUUID();
34
+ const sessionDir = path.join(os.tmpdir(), `exulu-v4-${sessionId}`);
35
+
36
+ console.log("[EXULU] v4 — starting observe-infer-act retrieval");
37
+
38
+ const tools = createTools({ contexts, user, role, sessionDir });
39
+ const systemPrompt = buildSystemPrompt(contexts, customInstructions);
40
+
41
+ let finalOutput: AgenticRetrievalOutput | undefined;
42
+
43
+ try {
44
+ for await (const output of runAgentLoop({
45
+ query,
46
+ systemPrompt,
47
+ tools,
48
+ model,
49
+ })) {
50
+ finalOutput = output;
51
+ yield output;
52
+ }
53
+ } finally {
54
+ // Best-effort cleanup of temp files
55
+ fs.rm(sessionDir, { recursive: true, force: true }).catch(() => {});
56
+ }
57
+
58
+ if (finalOutput) {
59
+ console.log(
60
+ `[EXULU] v4 — done. steps=${finalOutput.steps.length} chunks=${finalOutput.chunks.length} tokens=${finalOutput.totalTokens}`,
61
+ );
62
+ }
63
+ }
64
+
65
+ /**
66
+ * Creates the V4 ExuluTool for agentic context retrieval.
67
+ *
68
+ * V4 uses an observe-infer-act loop with two primitive tools:
69
+ * - execute_query: raw PostgreSQL SELECT via db.raw (with embed() helper for semantic search)
70
+ * - grep: iterative search on large result files
71
+ *
72
+ * Unlike V3, there is no upfront query classification or strategy routing.
73
+ * The agent writes its own SQL and decides when it has found enough information.
74
+ */
75
+ export function createAgenticRetrievalToolV4({
76
+ contexts,
77
+ instructions: adminInstructions,
78
+ rerankers,
79
+ user,
80
+ role,
81
+ model,
82
+ }: {
83
+ contexts: ExuluContext[];
84
+ rerankers: ExuluReranker[];
85
+ user?: User;
86
+ role?: string;
87
+ model?: LanguageModel;
88
+ instructions?: string;
89
+ }): ExuluTool | undefined {
90
+ const license = checkLicense();
91
+ if (!license["agentic-retrieval"]) {
92
+ console.warn("[EXULU] Not licensed for agentic retrieval");
93
+ return undefined;
94
+ }
95
+
96
+ const contextNames = contexts.map((c) => c.id).join(", ");
97
+
98
+ return new ExuluTool({
99
+ id: "agentic_context_search_v4",
100
+ name: "Agentic Context Search (V4)",
101
+ description: `Observe-infer-act retrieval using raw SQL. Searches: ${contextNames}`,
102
+ category: "contexts",
103
+ needsApproval: false,
104
+ type: "context",
105
+ config: [
106
+ {
107
+ name: "instructions",
108
+ description: "Custom instructions for the retrieval agent",
109
+ type: "string",
110
+ default: "",
111
+ },
112
+ {
113
+ name: "reasoning_model",
114
+ description:
115
+ "Override the model used by the retrieval agent (default: inherits from calling agent)",
116
+ type: "string",
117
+ default: "",
118
+ },
119
+ ...contexts.map((ctx) => ({
120
+ name: ctx.id,
121
+ description: `Enable search in "${ctx.name}". ${ctx.description}`,
122
+ type: "boolean" as const,
123
+ default: true,
124
+ })),
125
+ ],
126
+ inputSchema: z.object({
127
+ query: z.string().describe("The question or query to answer"),
128
+ userInstructions: z
129
+ .string()
130
+ .optional()
131
+ .describe("Additional instructions from the user to guide retrieval"),
132
+ }),
133
+ execute: async function* ({
134
+ query,
135
+ userInstructions,
136
+ toolVariablesConfig,
137
+ }: {
138
+ query: string;
139
+ userInstructions?: string;
140
+ toolVariablesConfig?: Record<string, any>;
141
+ }) {
142
+ if (!model) {
143
+ throw new Error("Model is required for executing the agentic retrieval tool");
144
+ }
145
+
146
+ let activeContexts = contexts;
147
+ let configInstructions = "";
148
+
149
+ if (toolVariablesConfig) {
150
+ configInstructions = toolVariablesConfig["instructions"] ?? "";
151
+
152
+ activeContexts = contexts.filter(
153
+ (ctx) =>
154
+ toolVariablesConfig[ctx.id] === true ||
155
+ toolVariablesConfig[ctx.id] === "true" ||
156
+ toolVariablesConfig[ctx.id] === 1,
157
+ );
158
+ if (activeContexts.length === 0) activeContexts = contexts;
159
+ }
160
+
161
+ const combinedInstructions = [
162
+ configInstructions ? `Configuration instructions: ${configInstructions}` : "",
163
+ adminInstructions ? `Admin instructions: ${adminInstructions}` : "",
164
+ userInstructions ? `User instructions: ${userInstructions}` : "",
165
+ ]
166
+ .filter(Boolean)
167
+ .join("\n");
168
+
169
+ for await (const output of executeV4({
170
+ query,
171
+ contexts: activeContexts,
172
+ model,
173
+ user,
174
+ role,
175
+ customInstructions: combinedInstructions || undefined,
176
+ })) {
177
+ yield { result: JSON.stringify(output) };
178
+ }
179
+ },
180
+ });
181
+ }
@@ -0,0 +1,248 @@
1
+ import { getTableName, getChunksTableName, type ExuluContext } from "@SRC/exulu/context";
2
+
3
+ /**
4
+ * Builds the system prompt for the V4 observe-infer-act retrieval agent.
5
+ *
6
+ * The prompt includes:
7
+ * 1. The observe-infer-act loop philosophy
8
+ * 2. The full database schema for every available context
9
+ * 3. Common SQL query patterns (keyword, semantic, hybrid, aggregation)
10
+ * 4. Instructions on when/how to use grep for large result sets
11
+ * 5. The standard column alias convention the agent should follow
12
+ */
13
+ export function buildSystemPrompt(
14
+ contexts: ExuluContext[],
15
+ customInstructions?: string,
16
+ ): string {
17
+ const schemaBlock = buildSchemaBlock(contexts);
18
+ const hasEmbedder = contexts.some((c) => c.embedder != null);
19
+
20
+ return `\
21
+ You are a knowledge base retrieval agent. Your job is to find all information relevant to the user's query.
22
+
23
+ ## Approach: Observe → Infer → Act
24
+
25
+ Work iteratively:
26
+ 1. **Observe** — examine what data you have and what the query asks for
27
+ 2. **Infer** — decide what SQL query will best surface relevant information
28
+ 3. **Act** — execute the query and study the results
29
+ 4. Repeat until you have found sufficient information, then write your final answer.
30
+
31
+ Do NOT guess or hallucinate. If results are empty, try alternative queries (different keywords,
32
+ broader filters, semantic search). Exhaust the available search strategies before concluding
33
+ that no relevant data exists.
34
+
35
+ ---
36
+
37
+ ## Database Schema
38
+
39
+ ${schemaBlock}
40
+
41
+ ---
42
+
43
+ ## Query Patterns
44
+
45
+ ### Keyword / Full-Text Search
46
+ \`\`\`sql
47
+ SELECT
48
+ c.id AS chunk_id,
49
+ c.chunk_index,
50
+ c.content AS chunk_content,
51
+ c.metadata,
52
+ c.source AS item_id,
53
+ i.name AS item_name,
54
+ '<context_id>' AS context
55
+ FROM <context_id>_chunks c
56
+ JOIN <context_id>_items i ON c.source = i.id
57
+ WHERE c.fts @@ plainto_tsquery('english', 'your search terms')
58
+ AND (i.archived IS FALSE OR i.archived IS NULL)
59
+ ORDER BY ts_rank(c.fts, plainto_tsquery('english', 'your search terms')) DESC
60
+ LIMIT 20;
61
+ \`\`\`
62
+
63
+ For German text use \`'german'\` instead of \`'english'\`.
64
+ For multi-language, use \`websearch_to_tsquery\` or UNION both languages.
65
+ ${
66
+ hasEmbedder
67
+ ? `
68
+ ### Semantic Search (use embed() helper)
69
+ \`\`\`sql
70
+ SELECT
71
+ c.id AS chunk_id,
72
+ c.chunk_index,
73
+ c.content AS chunk_content,
74
+ c.metadata,
75
+ c.source AS item_id,
76
+ i.name AS item_name,
77
+ '<context_id>' AS context,
78
+ c.embedding <=> embed('your concept here') AS distance
79
+ FROM <context_id>_chunks c
80
+ JOIN <context_id>_items i ON c.source = i.id
81
+ WHERE (i.archived IS FALSE OR i.archived IS NULL)
82
+ ORDER BY distance ASC
83
+ LIMIT 20;
84
+ \`\`\`
85
+
86
+ ### Hybrid Search (keyword + semantic combined via RRF)
87
+ \`\`\`sql
88
+ WITH fts AS (
89
+ SELECT id, ROW_NUMBER() OVER (ORDER BY ts_rank(fts, q) DESC) AS rank
90
+ FROM <context_id>_chunks, plainto_tsquery('english', 'your query') q
91
+ WHERE fts @@ q
92
+ LIMIT 500
93
+ ),
94
+ sem AS (
95
+ SELECT id, ROW_NUMBER() OVER (ORDER BY embedding <=> embed('your query') ASC) AS rank
96
+ FROM <context_id>_chunks
97
+ LIMIT 500
98
+ ),
99
+ rrf AS (
100
+ SELECT
101
+ COALESCE(fts.id, sem.id) AS id,
102
+ (COALESCE(1.0 / (50 + fts.rank), 0) * 2 + COALESCE(1.0 / (50 + sem.rank), 0)) AS score
103
+ FROM fts FULL OUTER JOIN sem ON fts.id = sem.id
104
+ )
105
+ SELECT
106
+ c.id AS chunk_id,
107
+ c.chunk_index,
108
+ c.content AS chunk_content,
109
+ c.metadata,
110
+ c.source AS item_id,
111
+ i.name AS item_name,
112
+ '<context_id>' AS context,
113
+ rrf.score
114
+ FROM rrf
115
+ JOIN <context_id>_chunks c ON c.id = rrf.id
116
+ JOIN <context_id>_items i ON c.source = i.id
117
+ WHERE (i.archived IS FALSE OR i.archived IS NULL)
118
+ ORDER BY rrf.score DESC
119
+ LIMIT 20;
120
+ \`\`\`
121
+ `
122
+ : `
123
+ Note: No embedder is configured for these contexts. Use keyword/full-text search only.
124
+ `
125
+ }
126
+ ### Browse all chunks of a specific document (in order)
127
+ \`\`\`sql
128
+ SELECT
129
+ c.id AS chunk_id,
130
+ c.chunk_index,
131
+ c.content AS chunk_content,
132
+ c.metadata,
133
+ c.source AS item_id,
134
+ i.name AS item_name,
135
+ '<context_id>' AS context
136
+ FROM <context_id>_chunks c
137
+ JOIN <context_id>_items i ON c.source = i.id
138
+ WHERE c.source = '<item_id>'
139
+ ORDER BY c.chunk_index;
140
+ \`\`\`
141
+
142
+ ### Count / aggregate
143
+ \`\`\`sql
144
+ SELECT COUNT(*) FROM <context_id>_items WHERE archived IS FALSE;
145
+ SELECT COUNT(*) FROM <context_id>_chunks;
146
+ \`\`\`
147
+
148
+ ### Explore item names (when query is about a specific document)
149
+ \`\`\`sql
150
+ SELECT id, name, external_id, "createdAt"
151
+ FROM <context_id>_items
152
+ WHERE (archived IS FALSE OR archived IS NULL)
153
+ AND LOWER(name) LIKE '%keyword%'
154
+ LIMIT 50;
155
+ \`\`\`
156
+
157
+ ### Filter by custom metadata on chunks
158
+ \`\`\`sql
159
+ SELECT chunk_id, chunk_content, item_name, context
160
+ FROM ...
161
+ WHERE c.metadata->>'page' = '5'
162
+ OR c.metadata @> '{"category": "finance"}'
163
+ \`\`\`
164
+
165
+ ---
166
+
167
+ ## Column Alias Convention
168
+
169
+ **Always use these aliases** in queries that return chunks so results are collected correctly:
170
+
171
+ | Alias | Source column |
172
+ |----------------|-------------------------|
173
+ | \`chunk_id\` | \`c.id\` |
174
+ | \`chunk_index\` | \`c.chunk_index\` |
175
+ | \`chunk_content\`| \`c.content\` |
176
+ | \`item_id\` | \`c.source\` |
177
+ | \`item_name\` | \`i.name\` |
178
+ | \`context\` | literal context id string |
179
+ | \`metadata\` | \`c.metadata\` |
180
+
181
+ ---
182
+
183
+ ## Handling Large Results
184
+
185
+ When execute_query returns a file path (results > 20k chars):
186
+ 1. Use \`grep\` with a specific pattern to find relevant sections
187
+ 2. Multiple grep calls are fine — narrow down iteratively
188
+ 3. Once you know specific \`item_id\` or \`chunk_id\` values, run a targeted SELECT to get full content
189
+
190
+ ---
191
+
192
+ ## Search Strategy
193
+
194
+ - **Start broad**: use keyword or hybrid search with your main terms, LIMIT 30–50
195
+ - **Go deeper**: if results are sparse, try alternative phrasings, synonyms, or semantic search
196
+ - **Drill into documents**: once you find a relevant item, fetch its chunks in order to get full context
197
+ - **Cross-context**: search multiple contexts when the query could span knowledge bases
198
+ - **Aggregate last**: use COUNT queries only for "how many" questions
199
+
200
+ ---
201
+ ${customInstructions ? `## Additional Instructions\n\n${customInstructions}\n\n---\n` : ""}
202
+ When you have gathered sufficient information, write a clear answer. Do not call any more tools once you have what you need.`;
203
+ }
204
+
205
+ function buildSchemaBlock(contexts: ExuluContext[]): string {
206
+ return contexts
207
+ .map((ctx) => {
208
+ const itemsTable = getTableName(ctx.id);
209
+ const chunksTable = getChunksTableName(ctx.id);
210
+
211
+ const customFields =
212
+ ctx.fields.length > 0
213
+ ? ctx.fields.map((f) => ` ${f.name} (${f.type})`).join("\n")
214
+ : " (no custom fields)";
215
+
216
+ const embedderNote = ctx.embedder
217
+ ? `Embedder: ${ctx.embedder.name} — semantic search and embed() are available`
218
+ : "No embedder — use keyword search only";
219
+
220
+ return `### Context: "${ctx.name}" (id: \`${ctx.id}\`)
221
+ ${ctx.description || ""}
222
+ ${embedderNote}
223
+
224
+ **${itemsTable}** — documents / items
225
+ id (uuid, primary key)
226
+ name (text)
227
+ external_id (text, nullable)
228
+ archived (boolean, nullable)
229
+ created_by (integer, nullable)
230
+ rights_mode (text, nullable)
231
+ "createdAt" (timestamp)
232
+ "updatedAt" (timestamp)
233
+ -- Custom fields:
234
+ ${customFields}
235
+
236
+ **${chunksTable}** — text chunks (source FK → ${itemsTable}.id)
237
+ id (uuid, primary key)
238
+ source (uuid, FK → ${itemsTable}.id)
239
+ content (text)
240
+ chunk_index (integer)
241
+ fts (tsvector — full-text search index)
242
+ embedding (vector — pgvector, nullable)
243
+ metadata (jsonb, nullable)
244
+ "createdAt" (timestamp)
245
+ "updatedAt" (timestamp)`;
246
+ })
247
+ .join("\n\n");
248
+ }