graphile-llm 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/metering.js CHANGED
@@ -9,9 +9,9 @@
9
9
  * When the quota check fails, the wrapper returns null (graceful degradation)
10
10
  * instead of throwing, so the search pipeline can fall back to text-only.
11
11
  *
12
- * Token counts are estimated from text length (~4 chars per token). No
13
- * tokenizer needed the billing system uses tokens as abstract units
14
- * and the credit_cost on each model's meter normalizes the relative expense.
12
+ * Token counts:
13
+ * - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
14
+ * - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
15
15
  *
16
16
  * The billing functions live in the tenant database and are called via the
17
17
  * Graphile `withPgClient` callback. Function locations (schema, names) are
@@ -88,7 +88,7 @@ export async function logInferenceUsage(ctx, entry) {
88
88
  entry.latencyMs, entry.ragEnabled, entry.chunksRetrieved,
89
89
  entry.embeddingModel, entry.embeddingLatencyMs,
90
90
  entry.status, entry.errorType,
91
- entry.rawUsage ? JSON.stringify(entry.rawUsage) : null,
91
+ entry.rawUsage ? JSON.stringify(entry.rawUsage) : null
92
92
  ]);
93
93
  });
94
94
  }
@@ -108,31 +108,31 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
108
108
  const startTime = Date.now();
109
109
  // No billing context → just embed without metering
110
110
  if (!ctx) {
111
- const result = await embedder(text);
111
+ const { embedding } = await embedder(text);
112
112
  return {
113
- result,
113
+ result: embedding,
114
114
  metered: false,
115
115
  quotaExceeded: false,
116
- latencyMs: Date.now() - startTime,
116
+ latencyMs: Date.now() - startTime
117
117
  };
118
118
  }
119
119
  const meterSlug = options.embeddingMeterSlug;
120
120
  if (!meterSlug) {
121
- const result = await embedder(text);
121
+ const { embedding } = await embedder(text);
122
122
  return {
123
- result,
123
+ result: embedding,
124
124
  metered: false,
125
125
  quotaExceeded: false,
126
- latencyMs: Date.now() - startTime,
126
+ latencyMs: Date.now() - startTime
127
127
  };
128
128
  }
129
129
  if (options.skipMetering) {
130
- const result = await embedder(text);
130
+ const { embedding } = await embedder(text);
131
131
  return {
132
- result,
132
+ result: embedding,
133
133
  metered: false,
134
134
  quotaExceeded: false,
135
- latencyMs: Date.now() - startTime,
135
+ latencyMs: Date.now() - startTime
136
136
  };
137
137
  }
138
138
  // Pre-check: can this entity afford this call?
@@ -146,8 +146,6 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
146
146
  allowed = true;
147
147
  }
148
148
  if (!allowed) {
149
- // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
150
- const placeholderAmountTokens = Math.ceil(text.length / 4);
151
149
  logInferenceUsage(ctx, {
152
150
  databaseId: ctx.databaseId,
153
151
  entityId: ctx.entityId,
@@ -156,9 +154,9 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
156
154
  provider: options.provider ?? null,
157
155
  service: 'embedding',
158
156
  operation: 'create',
159
- inputTokens: placeholderAmountTokens,
157
+ inputTokens: 0,
160
158
  outputTokens: 0,
161
- totalTokens: placeholderAmountTokens,
159
+ totalTokens: 0,
162
160
  cacheReadTokens: null,
163
161
  cacheWriteTokens: null,
164
162
  latencyMs: Date.now() - startTime,
@@ -168,26 +166,25 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
168
166
  embeddingLatencyMs: null,
169
167
  status: 'quota_exceeded',
170
168
  errorType: null,
171
- rawUsage: null,
169
+ rawUsage: null
172
170
  }).catch(() => { });
173
171
  return {
174
172
  result: null,
175
173
  metered: true,
176
174
  quotaExceeded: true,
177
- latencyMs: Date.now() - startTime,
175
+ latencyMs: Date.now() - startTime
178
176
  };
179
177
  }
180
- // Execute embedding
181
- const result = await embedder(text);
178
+ // Execute embedding — real token count from provider via EmbeddingResult
179
+ const { embedding, promptTokens } = await embedder(text);
182
180
  const latencyMs = Date.now() - startTime;
183
- // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
184
- const placeholderAmountTokens = Math.ceil(text.length / 4);
185
181
  ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
186
- await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, text.length, {
182
+ await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, promptTokens, {
187
183
  request_id: ctx.requestId,
188
184
  input_chars: text.length,
189
- dims: result.length,
190
- latency_ms: latencyMs,
185
+ prompt_tokens: promptTokens,
186
+ dims: embedding.length,
187
+ latency_ms: latencyMs
191
188
  });
192
189
  }).catch(() => { });
193
190
  // Log to inference usage table
@@ -199,9 +196,9 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
199
196
  provider: options.provider ?? null,
200
197
  service: 'embedding',
201
198
  operation: 'create',
202
- inputTokens: placeholderAmountTokens,
199
+ inputTokens: promptTokens,
203
200
  outputTokens: 0,
204
- totalTokens: placeholderAmountTokens,
201
+ totalTokens: promptTokens,
205
202
  cacheReadTokens: null,
206
203
  cacheWriteTokens: null,
207
204
  latencyMs,
@@ -211,13 +208,13 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
211
208
  embeddingLatencyMs: latencyMs,
212
209
  status: 'success',
213
210
  errorType: null,
214
- rawUsage: null,
211
+ rawUsage: { prompt_tokens: promptTokens }
215
212
  }).catch(() => { });
216
213
  return {
217
- result,
214
+ result: embedding,
218
215
  metered: true,
219
216
  quotaExceeded: false,
220
- latencyMs,
217
+ latencyMs
221
218
  };
222
219
  }
223
220
  // ─── Metered Chat ───────────────────────────────────────────────────────────
@@ -227,31 +224,31 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
227
224
  export async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {}) {
228
225
  const startTime = Date.now();
229
226
  if (!ctx) {
230
- const result = await chat(messages, chatOptions);
227
+ const chatResult = await chat(messages, chatOptions);
231
228
  return {
232
- result,
229
+ result: chatResult.content,
233
230
  metered: false,
234
231
  quotaExceeded: false,
235
- latencyMs: Date.now() - startTime,
232
+ latencyMs: Date.now() - startTime
236
233
  };
237
234
  }
238
235
  const meterSlug = meteringOptions.chatMeterSlug;
239
236
  if (!meterSlug) {
240
- const result = await chat(messages, chatOptions);
237
+ const chatResult = await chat(messages, chatOptions);
241
238
  return {
242
- result,
239
+ result: chatResult.content,
243
240
  metered: false,
244
241
  quotaExceeded: false,
245
- latencyMs: Date.now() - startTime,
242
+ latencyMs: Date.now() - startTime
246
243
  };
247
244
  }
248
245
  if (meteringOptions.skipMetering) {
249
- const result = await chat(messages, chatOptions);
246
+ const chatResult = await chat(messages, chatOptions);
250
247
  return {
251
- result,
248
+ result: chatResult.content,
252
249
  metered: false,
253
250
  quotaExceeded: false,
254
- latencyMs: Date.now() - startTime,
251
+ latencyMs: Date.now() - startTime
255
252
  };
256
253
  }
257
254
  // Pre-check: can this entity afford this call?
@@ -265,8 +262,7 @@ export async function meteredChat(chat, messages, ctx, chatOptions, meteringOpti
265
262
  allowed = true;
266
263
  }
267
264
  if (!allowed) {
268
- // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
269
- const placeholderInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
265
+ const estimatedInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
270
266
  logInferenceUsage(ctx, {
271
267
  databaseId: ctx.databaseId,
272
268
  entityId: ctx.entityId,
@@ -275,9 +271,9 @@ export async function meteredChat(chat, messages, ctx, chatOptions, meteringOpti
275
271
  provider: meteringOptions.provider ?? null,
276
272
  service: 'llm',
277
273
  operation: 'chat',
278
- inputTokens: placeholderInputTokens,
274
+ inputTokens: estimatedInputTokens,
279
275
  outputTokens: 0,
280
- totalTokens: placeholderInputTokens,
276
+ totalTokens: estimatedInputTokens,
281
277
  cacheReadTokens: null,
282
278
  cacheWriteTokens: null,
283
279
  latencyMs: Date.now() - startTime,
@@ -287,33 +283,31 @@ export async function meteredChat(chat, messages, ctx, chatOptions, meteringOpti
287
283
  embeddingLatencyMs: null,
288
284
  status: 'quota_exceeded',
289
285
  errorType: null,
290
- rawUsage: null,
286
+ rawUsage: null
291
287
  }).catch(() => { });
292
288
  return {
293
289
  result: null,
294
290
  metered: true,
295
291
  quotaExceeded: true,
296
- latencyMs: Date.now() - startTime,
292
+ latencyMs: Date.now() - startTime
297
293
  };
298
294
  }
299
- // Execute chat completion
300
- const result = await chat(messages, chatOptions);
295
+ // Execute chat completion — returns real token usage from provider
296
+ const chatResult = await chat(messages, chatOptions);
301
297
  const latencyMs = Date.now() - startTime;
302
- // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
303
- const inputChars = messages.reduce((sum, m) => sum + m.content.length, 0);
304
- const placeholderInputTokens = Math.ceil(inputChars / 4);
305
- const placeholderOutputTokens = Math.ceil(result.length / 4);
306
- const placeholderTotalTokens = placeholderInputTokens + placeholderOutputTokens;
298
+ const usage = chatResult.usage;
307
299
  ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
308
- await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, inputChars + result.length, {
300
+ await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, usage.totalTokens, {
309
301
  request_id: ctx.requestId,
310
- input_chars: inputChars,
311
- output_chars: result.length,
302
+ input_tokens: usage.input,
303
+ output_tokens: usage.output,
304
+ cache_read_tokens: usage.cacheRead,
305
+ cache_write_tokens: usage.cacheWrite,
312
306
  messages_count: messages.length,
313
- latency_ms: latencyMs,
307
+ latency_ms: latencyMs
314
308
  });
315
309
  }).catch(() => { });
316
- // Log to inference usage table
310
+ // Log to inference usage table with real provider token counts
317
311
  logInferenceUsage(ctx, {
318
312
  databaseId: ctx.databaseId,
319
313
  entityId: ctx.entityId,
@@ -322,11 +316,11 @@ export async function meteredChat(chat, messages, ctx, chatOptions, meteringOpti
322
316
  provider: meteringOptions.provider ?? null,
323
317
  service: 'llm',
324
318
  operation: 'chat',
325
- inputTokens: placeholderInputTokens,
326
- outputTokens: placeholderOutputTokens,
327
- totalTokens: placeholderTotalTokens,
328
- cacheReadTokens: null,
329
- cacheWriteTokens: null,
319
+ inputTokens: usage.input,
320
+ outputTokens: usage.output,
321
+ totalTokens: usage.totalTokens,
322
+ cacheReadTokens: usage.cacheRead || null,
323
+ cacheWriteTokens: usage.cacheWrite || null,
330
324
  latencyMs,
331
325
  ragEnabled: false,
332
326
  chunksRetrieved: null,
@@ -334,13 +328,13 @@ export async function meteredChat(chat, messages, ctx, chatOptions, meteringOpti
334
328
  embeddingLatencyMs: null,
335
329
  status: 'success',
336
330
  errorType: null,
337
- rawUsage: null,
331
+ rawUsage: { reasoning: usage.reasoning }
338
332
  }).catch(() => { });
339
333
  return {
340
- result,
334
+ result: chatResult.content,
341
335
  metered: true,
342
336
  quotaExceeded: false,
343
- latencyMs,
337
+ latencyMs
344
338
  };
345
339
  }
346
340
  // ─── Error Types ────────────────────────────────────────────────────────────
@@ -12,7 +12,7 @@ import { ModuleConfigCache } from 'graphile-cache';
12
12
  // ─── Cache ──────────────────────────────────────────────────────────────────
13
13
  const agentDiscoveryCache = new ModuleConfigCache({
14
14
  name: 'agent-discovery',
15
- ttlMs: 60_000,
15
+ ttlMs: 60_000
16
16
  });
17
17
  /** Clear all cached discovery results (for testing) */
18
18
  export function clearAgentDiscoveryCache() {
@@ -53,7 +53,7 @@ export async function getAgentDiscovery(pool, dbname) {
53
53
  : null,
54
54
  task: row.task_table_name
55
55
  ? { schemaName, tableName: row.task_table_name }
56
- : null,
56
+ : null
57
57
  };
58
58
  }
59
59
  }
@@ -21,7 +21,7 @@
21
21
  * if loaded (it runs after this plugin and before the consumer plugins).
22
22
  */
23
23
  import type { GraphileConfig } from 'graphile-config';
24
- import type { EmbedderFunction, ChatFunction, GraphileLlmOptions } from '../types';
24
+ import type { ChatFunction, EmbedderFunction, GraphileLlmOptions } from '../types';
25
25
  declare global {
26
26
  namespace GraphileBuild {
27
27
  interface Build {
@@ -20,8 +20,8 @@
20
20
  * The optional LlmMeteringPlugin wraps the embedder with billing integration
21
21
  * if loaded (it runs after this plugin and before the consumer plugins).
22
22
  */
23
- import { buildEmbedder, buildEmbedderFromEnv } from '../embedder';
24
23
  import { buildChatCompleter, buildChatCompleterFromEnv } from '../chat';
24
+ import { buildEmbedder, buildEmbedderFromEnv } from '../embedder';
25
25
  import { getLlmEnvOptions } from '../env';
26
26
  /**
27
27
  * Creates the LlmModulePlugin with the given options.
@@ -81,10 +81,10 @@ export function createLlmModulePlugin(options = {}) {
81
81
  llmEmbedder: embedder,
82
82
  llmChatCompleter: chat,
83
83
  llmEmbeddingModel: defaultEmbedder?.model ?? getLlmEnvOptions().embedding.model,
84
- llmChatModel: defaultChatCompleter?.model ?? getLlmEnvOptions().chat.model,
84
+ llmChatModel: defaultChatCompleter?.model ?? getLlmEnvOptions().chat.model
85
85
  }, 'LlmModulePlugin adding llmEmbedder, llmChatCompleter, and model names to build');
86
- },
87
- },
88
- },
86
+ }
87
+ }
88
+ }
89
89
  };
90
90
  }
@@ -31,8 +31,8 @@
31
31
  * - quota exceeded → embedder returns null
32
32
  */
33
33
  import { AsyncLocalStorage } from 'node:async_hooks';
34
- import { meteredEmbed } from '../metering';
35
34
  import { getLlmBillingConfig } from '../config-cache';
35
+ import { meteredEmbed } from '../metering';
36
36
  // ─── Request-scoped context via AsyncLocalStorage ───────────────────────────
37
37
  const meteringStore = new AsyncLocalStorage();
38
38
  // ─── Helpers ────────────────────────────────────────────────────────────────
@@ -72,7 +72,7 @@ async function buildMeteringContext(graphqlContext, resolveEntityId) {
72
72
  requestId,
73
73
  databaseId,
74
74
  actorId,
75
- inferenceLog: inferenceLogConfig,
75
+ inferenceLog: inferenceLogConfig
76
76
  };
77
77
  }
78
78
  /**
@@ -89,10 +89,10 @@ function wrapEmbedderWithMetering(embedder, meteringOptions) {
89
89
  if (!ctx) {
90
90
  // No metering context in scope — call original embedder directly
91
91
  const startTime = Date.now();
92
- const result = await embedder(text);
92
+ const { embedding } = await embedder(text);
93
93
  const latencyMs = Date.now() - startTime;
94
- console.log(`[graphile-llm] Embed (unmetered): dims=${result?.length ?? 0}, latency=${latencyMs}ms`);
95
- return result;
94
+ console.log(`[graphile-llm] Embed (unmetered): dims=${embedding?.length ?? 0}, latency=${latencyMs}ms`);
95
+ return embedding;
96
96
  }
97
97
  const result = await meteredEmbed(embedder, text, ctx, meteringOptions);
98
98
  if (result.quotaExceeded) {
@@ -103,7 +103,7 @@ function wrapEmbedderWithMetering(embedder, meteringOptions) {
103
103
  }
104
104
  // ─── Plugin ─────────────────────────────────────────────────────────────────
105
105
  export function createLlmMeteringPlugin(meteringConfig = {}) {
106
- const { embeddingMeterSlug: configEmbeddingSlug, chatMeterSlug: configChatSlug, skipMetering, resolveEntityId = defaultResolveEntityId, } = meteringConfig;
106
+ const { embeddingMeterSlug: configEmbeddingSlug, chatMeterSlug: configChatSlug, skipMetering, resolveEntityId = defaultResolveEntityId } = meteringConfig;
107
107
  return {
108
108
  name: 'LlmMeteringPlugin',
109
109
  version: '0.2.0',
@@ -134,13 +134,13 @@ export function createLlmMeteringPlugin(meteringConfig = {}) {
134
134
  chatMeterSlug: chatSlug,
135
135
  skipMetering,
136
136
  embeddingModel: embeddingModel ?? undefined,
137
- chatModel: chatModel ?? undefined,
137
+ chatModel: chatModel ?? undefined
138
138
  };
139
139
  // Replace the embedder with a metered version.
140
140
  // Same signature except it can return null (quota exceeded).
141
141
  const meteredEmbedder = wrapEmbedderWithMetering(originalEmbedder, meteringOptions);
142
142
  return build.extend(build, {
143
- llmEmbedder: meteredEmbedder,
143
+ llmEmbedder: meteredEmbedder
144
144
  }, 'LlmMeteringPlugin replacing llmEmbedder with metered version');
145
145
  },
146
146
  /**
@@ -148,7 +148,7 @@ export function createLlmMeteringPlugin(meteringConfig = {}) {
148
148
  * request-scoped metering context via AsyncLocalStorage.
149
149
  */
150
150
  GraphQLObjectType_fields_field(field, build, context) {
151
- const { scope: { isRootQuery, isRootMutation }, } = context;
151
+ const { scope: { isRootQuery, isRootMutation } } = context;
152
152
  if (!isRootQuery && !isRootMutation)
153
153
  return field;
154
154
  // Only wrap if we actually replaced the embedder
@@ -166,10 +166,10 @@ export function createLlmMeteringPlugin(meteringConfig = {}) {
166
166
  return meteringStore.run(ctx, () => {
167
167
  return oldResolve(source, args, graphqlContext, info);
168
168
  });
169
- },
169
+ }
170
170
  };
171
- },
172
- },
173
- },
171
+ }
172
+ }
173
+ }
174
174
  };
175
175
  }
@@ -62,7 +62,7 @@ function parseHasChunksTag(raw, codec) {
62
62
  parentFkField: parsed.parentFk || 'parent_id',
63
63
  parentPkField: parsed.parentPk || 'id',
64
64
  embeddingField: parsed.embeddingField || 'embedding',
65
- contentField: parsed.contentField || 'content',
65
+ contentField: parsed.contentField || 'content'
66
66
  };
67
67
  }
68
68
  /**
@@ -222,10 +222,10 @@ export function createLlmRagPlugin(ragDefaults = {}) {
222
222
  minSimilarity: $minSimilarity,
223
223
  systemPrompt: $systemPrompt,
224
224
  withPgClient: $withPgClient,
225
- pgSettings: $pgSettings,
225
+ pgSettings: $pgSettings
226
226
  });
227
227
  return lambda($combined, async (input) => {
228
- const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings, } = input;
228
+ const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings } = input;
229
229
  if (!prompt || typeof prompt !== 'string') {
230
230
  throw new Error('RAG_INVALID_PROMPT: prompt is required');
231
231
  }
@@ -244,7 +244,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
244
244
  const systemPromptTemplate = querySystemPrompt ?? ragDefaults.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
245
245
  // Step 1: Embed the prompt
246
246
  const startEmbed = Date.now();
247
- const vector = await embedder(prompt);
247
+ const { embedding: vector } = await embedder(prompt);
248
248
  const embedLatency = Date.now() - startEmbed;
249
249
  const vectorString = `[${vector.join(',')}]`;
250
250
  console.log(`[graphile-llm] RAG embed: dims=${vector.length}, latency=${embedLatency}ms`);
@@ -260,7 +260,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
260
260
  content: row.content,
261
261
  parent_id: row.parent_id,
262
262
  distance: parseFloat(row.distance),
263
- table_name: table.parentCodecName,
263
+ table_name: table.parentCodecName
264
264
  });
265
265
  }
266
266
  }
@@ -274,31 +274,31 @@ export function createLlmRagPlugin(ragDefaults = {}) {
274
274
  answer: 'No relevant context found for your query. ' +
275
275
  'Try broadening your search or lowering the minimum similarity threshold.',
276
276
  sources: [],
277
- tokensUsed: null,
277
+ tokensUsed: null
278
278
  };
279
279
  }
280
280
  // Step 3: Assemble context
281
281
  const contextText = assembleContext(topChunks);
282
282
  // Step 4: Call chat completion
283
283
  const startChat = Date.now();
284
- const answer = await chatCompleter([
284
+ const chatResult = await chatCompleter([
285
285
  { role: 'system', content: systemPromptTemplate + contextText },
286
- { role: 'user', content: prompt },
286
+ { role: 'user', content: prompt }
287
287
  ], {
288
- maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS,
288
+ maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS
289
289
  });
290
290
  const chatLatency = Date.now() - startChat;
291
- console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, latency=${chatLatency}ms`);
291
+ console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, tokens=${chatResult.usage.totalTokens}, latency=${chatLatency}ms`);
292
292
  // Step 5: Return response
293
293
  return {
294
- answer,
294
+ answer: chatResult.content,
295
295
  sources: topChunks.map((chunk) => ({
296
296
  content: chunk.content,
297
297
  similarity: 1 - chunk.distance,
298
298
  tableName: chunk.table_name,
299
- parentId: chunk.parent_id,
299
+ parentId: chunk.parent_id
300
300
  })),
301
- tokensUsed: null, // Deferred to metering system
301
+ tokensUsed: chatResult.usage.totalTokens
302
302
  };
303
303
  });
304
304
  },
@@ -313,17 +313,17 @@ export function createLlmRagPlugin(ragDefaults = {}) {
313
313
  'to use embedText. Set defaultEmbedder in GraphileLlmPreset options.');
314
314
  }
315
315
  const startTime = Date.now();
316
- const vector = await embedder(text);
316
+ const { embedding: vector } = await embedder(text);
317
317
  const latencyMs = Date.now() - startTime;
318
318
  console.log(`[graphile-llm] embedText: dims=${vector.length}, latency=${latencyMs}ms`);
319
319
  return {
320
320
  vector,
321
- dimensions: vector.length,
321
+ dimensions: vector.length
322
322
  };
323
323
  });
324
- },
325
- },
326
- },
324
+ }
325
+ }
326
+ }
327
327
  };
328
328
  });
329
329
  return {
@@ -335,7 +335,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
335
335
  after: [
336
336
  'LlmModulePlugin',
337
337
  'UnifiedSearchPlugin',
338
- 'VectorCodecPlugin',
339
- ],
338
+ 'VectorCodecPlugin'
339
+ ]
340
340
  };
341
341
  }
@@ -45,7 +45,7 @@ function getTextToVectorMapping(pgCodec, build) {
45
45
  if (isVectorCodec(attribute.codec)) {
46
46
  const fieldName = build.inflection.attribute({
47
47
  codec: pgCodec,
48
- attributeName,
48
+ attributeName
49
49
  });
50
50
  mapping[`${fieldName}Text`] = fieldName;
51
51
  }
@@ -73,7 +73,7 @@ export function createLlmTextMutationPlugin() {
73
73
  'PgAttributesPlugin',
74
74
  'PgMutationCreatePlugin',
75
75
  'PgMutationUpdateDeletePlugin',
76
- 'VectorCodecPlugin',
76
+ 'VectorCodecPlugin'
77
77
  ],
78
78
  schema: {
79
79
  hooks: {
@@ -82,12 +82,12 @@ export function createLlmTextMutationPlugin() {
82
82
  * for tables that have vector columns.
83
83
  */
84
84
  GraphQLInputObjectType_fields(fields, build, context) {
85
- const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec, }, } = context;
85
+ const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec } } = context;
86
86
  // Only intercept create/update input types for table rows
87
87
  if (!pgCodec?.attributes || (!isPgPatch && !isPgBaseInput && !isMutationInput)) {
88
88
  return fields;
89
89
  }
90
- const { graphql: { GraphQLString }, } = build;
90
+ const { graphql: { GraphQLString } } = build;
91
91
  // Find vector columns on this table
92
92
  const vectorColumns = [];
93
93
  for (const [attributeName, attribute] of Object.entries(pgCodec.attributes)) {
@@ -103,7 +103,7 @@ export function createLlmTextMutationPlugin() {
103
103
  // Convert snake_case column name to camelCase field name
104
104
  const fieldName = build.inflection.attribute({
105
105
  codec: pgCodec,
106
- attributeName: columnName,
106
+ attributeName: columnName
107
107
  });
108
108
  const textFieldName = `${fieldName}Text`;
109
109
  newFields = build.extend(newFields, {
@@ -111,8 +111,8 @@ export function createLlmTextMutationPlugin() {
111
111
  type: GraphQLString,
112
112
  description: `Natural language text to embed server-side into the \`${fieldName}\` vector column. ` +
113
113
  `Mutually exclusive with \`${fieldName}\` — provide one or the other. ` +
114
- 'Requires the LLM plugin to be configured with an embedding provider.',
115
- },
114
+ 'Requires the LLM plugin to be configured with an embedding provider.'
115
+ }
116
116
  }, `LlmTextMutationPlugin adding ${textFieldName} companion field for vector column '${columnName}'`);
117
117
  }
118
118
  return newFields;
@@ -129,7 +129,7 @@ export function createLlmTextMutationPlugin() {
129
129
  * If the embedder returns null (e.g. quota exceeded), throws an error.
130
130
  */
131
131
  GraphQLObjectType_fields_field(field, build, context) {
132
- const { scope: { isRootMutation, fieldName, pgCodec }, } = context;
132
+ const { scope: { isRootMutation, fieldName, pgCodec } } = context;
133
133
  // Only wrap root mutation fields on tables with attributes
134
134
  if (!isRootMutation || !pgCodec || !pgCodec.attributes) {
135
135
  return field;
@@ -192,10 +192,10 @@ export function createLlmTextMutationPlugin() {
192
192
  }
193
193
  await embedTextFields(args);
194
194
  return oldResolve(source, args, graphqlContext, info);
195
- },
195
+ }
196
196
  };
197
- },
198
- },
199
- },
197
+ }
198
+ }
199
+ }
200
200
  };
201
201
  }