graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/__tests__/graphile-llm.test.js +87 -71
  2. package/chat.d.ts +5 -5
  3. package/chat.js +45 -43
  4. package/config-cache.d.ts +77 -0
  5. package/config-cache.js +148 -0
  6. package/embedder.d.ts +5 -5
  7. package/embedder.js +11 -17
  8. package/env.d.ts +31 -0
  9. package/env.js +52 -0
  10. package/esm/__tests__/graphile-llm.test.js +87 -71
  11. package/esm/chat.d.ts +5 -5
  12. package/esm/chat.js +45 -40
  13. package/esm/config-cache.d.ts +77 -0
  14. package/esm/config-cache.js +143 -0
  15. package/esm/embedder.d.ts +5 -5
  16. package/esm/embedder.js +11 -17
  17. package/esm/env.d.ts +31 -0
  18. package/esm/env.js +49 -0
  19. package/esm/index.d.ts +14 -5
  20. package/esm/index.js +11 -5
  21. package/esm/metering.d.ts +114 -0
  22. package/esm/metering.js +352 -0
  23. package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
  24. package/esm/plugins/agent-discovery-plugin.js +65 -0
  25. package/esm/plugins/llm-module-plugin.d.ts +11 -2
  26. package/esm/plugins/llm-module-plugin.js +15 -7
  27. package/esm/plugins/metering-plugin.d.ts +42 -0
  28. package/esm/plugins/metering-plugin.js +175 -0
  29. package/esm/plugins/rag-plugin.js +20 -20
  30. package/esm/plugins/text-mutation-plugin.d.ts +4 -0
  31. package/esm/plugins/text-mutation-plugin.js +23 -13
  32. package/esm/plugins/text-search-plugin.d.ts +4 -0
  33. package/esm/plugins/text-search-plugin.js +23 -11
  34. package/esm/preset.d.ts +21 -1
  35. package/esm/preset.js +33 -6
  36. package/esm/types.d.ts +86 -10
  37. package/index.d.ts +14 -5
  38. package/index.js +25 -8
  39. package/metering.d.ts +114 -0
  40. package/metering.js +359 -0
  41. package/package.json +15 -15
  42. package/plugins/agent-discovery-plugin.d.ts +29 -0
  43. package/plugins/agent-discovery-plugin.js +69 -0
  44. package/plugins/llm-module-plugin.d.ts +11 -2
  45. package/plugins/llm-module-plugin.js +15 -7
  46. package/plugins/metering-plugin.d.ts +42 -0
  47. package/plugins/metering-plugin.js +178 -0
  48. package/plugins/rag-plugin.js +20 -20
  49. package/plugins/text-mutation-plugin.d.ts +4 -0
  50. package/plugins/text-mutation-plugin.js +23 -13
  51. package/plugins/text-search-plugin.d.ts +4 -0
  52. package/plugins/text-search-plugin.js +23 -11
  53. package/preset.d.ts +21 -1
  54. package/preset.js +33 -6
  55. package/types.d.ts +86 -10
@@ -0,0 +1,175 @@
1
+ /**
2
+ * LlmMeteringPlugin
3
+ *
4
+ * Opt-in billing integration for graphile-llm. Completely separate from the
5
+ * pure LLM plugins (text-search, text-mutation, rag).
6
+ *
7
+ * **How it works:**
8
+ * 1. At schema build time, replaces `build.llmEmbedder` with a metered wrapper
9
+ * that has the same `(text: string) => Promise<number[]>` signature
10
+ * 2. At request time, wraps every root query/mutation resolver to set up a
11
+ * request-scoped MeteringContext via AsyncLocalStorage
12
+ * 3. When the embedder is called (by any plugin), the wrapper checks
13
+ * AsyncLocalStorage for a metering context and if found, calls
14
+ * check_billing_quota before and record_usage after
15
+ * 4. If quota is exceeded, the wrapper returns null — the calling plugin sees
16
+ * null and handles it (search falls back to text-only, mutations throw)
17
+ *
18
+ * The pure plugins never import metering, config-cache, or billing types.
19
+ * They call the embedder and handle null results — that's it.
20
+ *
21
+ * **Entity ID resolution:**
22
+ * The billing `entity_id` is resolved via a configurable callback.
23
+ * Default: reads `jwt.claims.user_id` from pgSettings. Override via
24
+ * `metering.resolveEntityId` in GraphileLlmPreset options.
25
+ *
26
+ * **Graceful behavior:**
27
+ * - billing_module not provisioned → embedder passes through unmetered
28
+ * - entity_id not available → embedder passes through unmetered
29
+ * - check_billing_quota throws → call is allowed (billing is opt-in)
30
+ * - record_usage throws → call succeeds, recording silently skipped
31
+ * - quota exceeded → embedder returns null
32
+ */
33
+ import { AsyncLocalStorage } from 'node:async_hooks';
34
+ import { getLlmBillingConfig } from '../config-cache';
35
+ import { meteredEmbed } from '../metering';
36
+ // ─── Request-scoped context via AsyncLocalStorage ───────────────────────────
37
+ const meteringStore = new AsyncLocalStorage();
38
+ // ─── Helpers ────────────────────────────────────────────────────────────────
39
+ function defaultResolveEntityId(pgSettings) {
40
+ return pgSettings['jwt.claims.user_id'] ?? null;
41
+ }
42
+ async function buildMeteringContext(graphqlContext, resolveEntityId) {
43
+ const pgSettings = graphqlContext?.pgSettings ?? {};
44
+ const entityId = resolveEntityId(pgSettings);
45
+ const databaseId = pgSettings['jwt.claims.database_id'] ?? null;
46
+ const requestId = pgSettings['request.id'] ?? null;
47
+ const actorId = pgSettings['jwt.claims.user_id'] ?? null;
48
+ if (!entityId || !databaseId)
49
+ return null;
50
+ const withPgClient = graphqlContext?.withPgClient;
51
+ if (!withPgClient)
52
+ return null;
53
+ let billingConfig = null;
54
+ let inferenceLogConfig = null;
55
+ try {
56
+ await withPgClient(pgSettings, async (pgClient) => {
57
+ const entry = await getLlmBillingConfig(pgClient, databaseId);
58
+ billingConfig = entry.billing;
59
+ inferenceLogConfig = entry.inferenceLog;
60
+ });
61
+ }
62
+ catch {
63
+ return null;
64
+ }
65
+ if (!billingConfig)
66
+ return null;
67
+ return {
68
+ withPgClient,
69
+ pgSettings,
70
+ billing: billingConfig,
71
+ entityId,
72
+ requestId,
73
+ databaseId,
74
+ actorId,
75
+ inferenceLog: inferenceLogConfig
76
+ };
77
+ }
78
+ /**
79
+ * Wrap an embedder with metering that reads context from AsyncLocalStorage.
80
+ * The returned function has the same signature as the original embedder,
81
+ * so downstream plugins are unaware of billing.
82
+ *
83
+ * When no metering context is in scope, the original embedder is called directly.
84
+ * When quota is exceeded, returns null instead of a vector.
85
+ */
86
+ function wrapEmbedderWithMetering(embedder, meteringOptions) {
87
+ return async (text) => {
88
+ const ctx = meteringStore.getStore();
89
+ if (!ctx) {
90
+ // No metering context in scope — call original embedder directly
91
+ const startTime = Date.now();
92
+ const { embedding } = await embedder(text);
93
+ const latencyMs = Date.now() - startTime;
94
+ console.log(`[graphile-llm] Embed (unmetered): dims=${embedding?.length ?? 0}, latency=${latencyMs}ms`);
95
+ return embedding;
96
+ }
97
+ const result = await meteredEmbed(embedder, text, ctx, meteringOptions);
98
+ if (result.quotaExceeded) {
99
+ return null;
100
+ }
101
+ return result.result;
102
+ };
103
+ }
104
+ // ─── Plugin ─────────────────────────────────────────────────────────────────
105
+ export function createLlmMeteringPlugin(meteringConfig = {}) {
106
+ const { embeddingMeterSlug: configEmbeddingSlug, chatMeterSlug: configChatSlug, skipMetering, resolveEntityId = defaultResolveEntityId } = meteringConfig;
107
+ return {
108
+ name: 'LlmMeteringPlugin',
109
+ version: '0.2.0',
110
+ description: 'Wraps LLM embedder/chat with billing quota checks and usage recording',
111
+ after: ['LlmModulePlugin'],
112
+ before: ['LlmTextSearchPlugin', 'LlmTextMutationPlugin', 'LlmRagPlugin'],
113
+ schema: {
114
+ hooks: {
115
+ build(build) {
116
+ const originalEmbedder = build.llmEmbedder;
117
+ if (!originalEmbedder) {
118
+ console.log('[graphile-llm] Metering plugin loaded but no embedder configured — skipping');
119
+ return build;
120
+ }
121
+ // Meter slug = model name by default (three-level waterfall: model → inference → universal)
122
+ const embeddingModel = build.llmEmbeddingModel;
123
+ const chatModel = build.llmChatModel;
124
+ const embeddingSlug = configEmbeddingSlug ?? embeddingModel ?? undefined;
125
+ const chatSlug = configChatSlug ?? chatModel ?? undefined;
126
+ if (embeddingSlug) {
127
+ console.log(`[graphile-llm] Metering enabled — embedding meter: ${embeddingSlug}`);
128
+ }
129
+ else {
130
+ console.log('[graphile-llm] Metering enabled but no embedding model name — usage will not be metered');
131
+ }
132
+ const meteringOptions = {
133
+ embeddingMeterSlug: embeddingSlug,
134
+ chatMeterSlug: chatSlug,
135
+ skipMetering,
136
+ embeddingModel: embeddingModel ?? undefined,
137
+ chatModel: chatModel ?? undefined
138
+ };
139
+ // Replace the embedder with a metered version.
140
+ // Same signature except it can return null (quota exceeded).
141
+ const meteredEmbedder = wrapEmbedderWithMetering(originalEmbedder, meteringOptions);
142
+ return build.extend(build, {
143
+ llmEmbedder: meteredEmbedder
144
+ }, 'LlmMeteringPlugin replacing llmEmbedder with metered version');
145
+ },
146
+ /**
147
+ * Wrap every root query/mutation resolver to establish the
148
+ * request-scoped metering context via AsyncLocalStorage.
149
+ */
150
+ GraphQLObjectType_fields_field(field, build, context) {
151
+ const { scope: { isRootQuery, isRootMutation } } = context;
152
+ if (!isRootQuery && !isRootMutation)
153
+ return field;
154
+ // Only wrap if we actually replaced the embedder
155
+ if (!build.llmEmbedder)
156
+ return field;
157
+ const defaultResolver = (obj) => obj[context.scope.fieldName];
158
+ const { resolve: oldResolve = defaultResolver, ...rest } = field;
159
+ return {
160
+ ...rest,
161
+ async resolve(source, args, graphqlContext, info) {
162
+ // Build the metering context for this request
163
+ const ctx = await buildMeteringContext(graphqlContext, resolveEntityId);
164
+ // Run the original resolver within the AsyncLocalStorage scope
165
+ // so any embedder calls made by downstream plugins pick up the ctx
166
+ return meteringStore.run(ctx, () => {
167
+ return oldResolve(source, args, graphqlContext, info);
168
+ });
169
+ }
170
+ };
171
+ }
172
+ }
173
+ }
174
+ };
175
+ }
@@ -62,7 +62,7 @@ function parseHasChunksTag(raw, codec) {
62
62
  parentFkField: parsed.parentFk || 'parent_id',
63
63
  parentPkField: parsed.parentPk || 'id',
64
64
  embeddingField: parsed.embeddingField || 'embedding',
65
- contentField: parsed.contentField || 'content',
65
+ contentField: parsed.contentField || 'content'
66
66
  };
67
67
  }
68
68
  /**
@@ -222,10 +222,10 @@ export function createLlmRagPlugin(ragDefaults = {}) {
222
222
  minSimilarity: $minSimilarity,
223
223
  systemPrompt: $systemPrompt,
224
224
  withPgClient: $withPgClient,
225
- pgSettings: $pgSettings,
225
+ pgSettings: $pgSettings
226
226
  });
227
227
  return lambda($combined, async (input) => {
228
- const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings, } = input;
228
+ const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings } = input;
229
229
  if (!prompt || typeof prompt !== 'string') {
230
230
  throw new Error('RAG_INVALID_PROMPT: prompt is required');
231
231
  }
@@ -244,7 +244,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
244
244
  const systemPromptTemplate = querySystemPrompt ?? ragDefaults.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
245
245
  // Step 1: Embed the prompt
246
246
  const startEmbed = Date.now();
247
- const vector = await embedder(prompt);
247
+ const { embedding: vector } = await embedder(prompt);
248
248
  const embedLatency = Date.now() - startEmbed;
249
249
  const vectorString = `[${vector.join(',')}]`;
250
250
  console.log(`[graphile-llm] RAG embed: dims=${vector.length}, latency=${embedLatency}ms`);
@@ -260,7 +260,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
260
260
  content: row.content,
261
261
  parent_id: row.parent_id,
262
262
  distance: parseFloat(row.distance),
263
- table_name: table.parentCodecName,
263
+ table_name: table.parentCodecName
264
264
  });
265
265
  }
266
266
  }
@@ -274,31 +274,31 @@ export function createLlmRagPlugin(ragDefaults = {}) {
274
274
  answer: 'No relevant context found for your query. ' +
275
275
  'Try broadening your search or lowering the minimum similarity threshold.',
276
276
  sources: [],
277
- tokensUsed: null,
277
+ tokensUsed: null
278
278
  };
279
279
  }
280
280
  // Step 3: Assemble context
281
281
  const contextText = assembleContext(topChunks);
282
282
  // Step 4: Call chat completion
283
283
  const startChat = Date.now();
284
- const answer = await chatCompleter([
284
+ const chatResult = await chatCompleter([
285
285
  { role: 'system', content: systemPromptTemplate + contextText },
286
- { role: 'user', content: prompt },
286
+ { role: 'user', content: prompt }
287
287
  ], {
288
- maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS,
288
+ maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS
289
289
  });
290
290
  const chatLatency = Date.now() - startChat;
291
- console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, latency=${chatLatency}ms`);
291
+ console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, tokens=${chatResult.usage.totalTokens}, latency=${chatLatency}ms`);
292
292
  // Step 5: Return response
293
293
  return {
294
- answer,
294
+ answer: chatResult.content,
295
295
  sources: topChunks.map((chunk) => ({
296
296
  content: chunk.content,
297
297
  similarity: 1 - chunk.distance,
298
298
  tableName: chunk.table_name,
299
- parentId: chunk.parent_id,
299
+ parentId: chunk.parent_id
300
300
  })),
301
- tokensUsed: null, // Deferred to metering system
301
+ tokensUsed: chatResult.usage.totalTokens
302
302
  };
303
303
  });
304
304
  },
@@ -313,17 +313,17 @@ export function createLlmRagPlugin(ragDefaults = {}) {
313
313
  'to use embedText. Set defaultEmbedder in GraphileLlmPreset options.');
314
314
  }
315
315
  const startTime = Date.now();
316
- const vector = await embedder(text);
316
+ const { embedding: vector } = await embedder(text);
317
317
  const latencyMs = Date.now() - startTime;
318
318
  console.log(`[graphile-llm] embedText: dims=${vector.length}, latency=${latencyMs}ms`);
319
319
  return {
320
320
  vector,
321
- dimensions: vector.length,
321
+ dimensions: vector.length
322
322
  };
323
323
  });
324
- },
325
- },
326
- },
324
+ }
325
+ }
326
+ }
327
327
  };
328
328
  });
329
329
  return {
@@ -335,7 +335,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
335
335
  after: [
336
336
  'LlmModulePlugin',
337
337
  'UnifiedSearchPlugin',
338
- 'VectorCodecPlugin',
339
- ],
338
+ 'VectorCodecPlugin'
339
+ ]
340
340
  };
341
341
  }
@@ -9,6 +9,10 @@
9
9
  * Example:
10
10
  * mutation { createArticle(input: { embeddingText: "Machine learning concepts" }) }
11
11
  *
12
+ * If the embedder returns null (e.g. quota exceeded when the metering plugin
13
+ * is loaded), the mutation throws an error — unlike search, mutations cannot
14
+ * silently skip writing a vector the user asked for.
15
+ *
12
16
  * This is the mutation counterpart to LlmTextSearchPlugin (which handles
13
17
  * filter/query-side text-to-vector). Together they let clients work entirely
14
18
  * with text/prompts instead of raw float vectors.
@@ -9,6 +9,10 @@
9
9
  * Example:
10
10
  * mutation { createArticle(input: { embeddingText: "Machine learning concepts" }) }
11
11
  *
12
+ * If the embedder returns null (e.g. quota exceeded when the metering plugin
13
+ * is loaded), the mutation throws an error — unlike search, mutations cannot
14
+ * silently skip writing a vector the user asked for.
15
+ *
12
16
  * This is the mutation counterpart to LlmTextSearchPlugin (which handles
13
17
  * filter/query-side text-to-vector). Together they let clients work entirely
14
18
  * with text/prompts instead of raw float vectors.
@@ -41,7 +45,7 @@ function getTextToVectorMapping(pgCodec, build) {
41
45
  if (isVectorCodec(attribute.codec)) {
42
46
  const fieldName = build.inflection.attribute({
43
47
  codec: pgCodec,
44
- attributeName,
48
+ attributeName
45
49
  });
46
50
  mapping[`${fieldName}Text`] = fieldName;
47
51
  }
@@ -61,7 +65,7 @@ function getTextToVectorMapping(pgCodec, build) {
61
65
  export function createLlmTextMutationPlugin() {
62
66
  return {
63
67
  name: 'LlmTextMutationPlugin',
64
- version: '0.1.0',
68
+ version: '0.2.0',
65
69
  description: 'Adds text companion fields on mutation inputs for vector columns — ' +
66
70
  'text is embedded server-side before storing',
67
71
  after: [
@@ -69,7 +73,7 @@ export function createLlmTextMutationPlugin() {
69
73
  'PgAttributesPlugin',
70
74
  'PgMutationCreatePlugin',
71
75
  'PgMutationUpdateDeletePlugin',
72
- 'VectorCodecPlugin',
76
+ 'VectorCodecPlugin'
73
77
  ],
74
78
  schema: {
75
79
  hooks: {
@@ -78,12 +82,12 @@ export function createLlmTextMutationPlugin() {
78
82
  * for tables that have vector columns.
79
83
  */
80
84
  GraphQLInputObjectType_fields(fields, build, context) {
81
- const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec, }, } = context;
85
+ const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec } } = context;
82
86
  // Only intercept create/update input types for table rows
83
87
  if (!pgCodec?.attributes || (!isPgPatch && !isPgBaseInput && !isMutationInput)) {
84
88
  return fields;
85
89
  }
86
- const { graphql: { GraphQLString }, } = build;
90
+ const { graphql: { GraphQLString } } = build;
87
91
  // Find vector columns on this table
88
92
  const vectorColumns = [];
89
93
  for (const [attributeName, attribute] of Object.entries(pgCodec.attributes)) {
@@ -99,7 +103,7 @@ export function createLlmTextMutationPlugin() {
99
103
  // Convert snake_case column name to camelCase field name
100
104
  const fieldName = build.inflection.attribute({
101
105
  codec: pgCodec,
102
- attributeName: columnName,
106
+ attributeName: columnName
103
107
  });
104
108
  const textFieldName = `${fieldName}Text`;
105
109
  newFields = build.extend(newFields, {
@@ -107,8 +111,8 @@ export function createLlmTextMutationPlugin() {
107
111
  type: GraphQLString,
108
112
  description: `Natural language text to embed server-side into the \`${fieldName}\` vector column. ` +
109
113
  `Mutually exclusive with \`${fieldName}\` — provide one or the other. ` +
110
- 'Requires the LLM plugin to be configured with an embedding provider.',
111
- },
114
+ 'Requires the LLM plugin to be configured with an embedding provider.'
115
+ }
112
116
  }, `LlmTextMutationPlugin adding ${textFieldName} companion field for vector column '${columnName}'`);
113
117
  }
114
118
  return newFields;
@@ -121,9 +125,11 @@ export function createLlmTextMutationPlugin() {
121
125
  * Uses the same v4-style resolver wrapping pattern as graphile-upload-plugin
122
126
  * and graphile-bucket-provisioner-plugin. grafserv v5 supports this through
123
127
  * its backwards-compatibility layer.
128
+ *
129
+ * If the embedder returns null (e.g. quota exceeded), throws an error.
124
130
  */
125
131
  GraphQLObjectType_fields_field(field, build, context) {
126
- const { scope: { isRootMutation, fieldName, pgCodec }, } = context;
132
+ const { scope: { isRootMutation, fieldName, pgCodec } } = context;
127
133
  // Only wrap root mutation fields on tables with attributes
128
134
  if (!isRootMutation || !pgCodec || !pgCodec.attributes) {
129
135
  return field;
@@ -163,6 +169,10 @@ export function createLlmTextMutationPlugin() {
163
169
  const startTime = Date.now();
164
170
  const vector = await embedder(value);
165
171
  const latencyMs = Date.now() - startTime;
172
+ if (vector === null) {
173
+ throw new Error(`EMBED_QUOTA_EXCEEDED: Cannot embed ${key} — embedding quota exceeded. ` +
174
+ 'Upgrade your plan or wait for the next billing period.');
175
+ }
166
176
  console.log(`[graphile-llm] Mutation embed: field=${key}, dims=${vector.length}, latency=${latencyMs}ms`);
167
177
  // Inject the vector into the corresponding field
168
178
  obj[vectorFieldName] = vector;
@@ -182,10 +192,10 @@ export function createLlmTextMutationPlugin() {
182
192
  }
183
193
  await embedTextFields(args);
184
194
  return oldResolve(source, args, graphqlContext, info);
185
- },
195
+ }
186
196
  };
187
- },
188
- },
189
- },
197
+ }
198
+ }
199
+ }
190
200
  };
191
201
  }
@@ -22,6 +22,10 @@
22
22
  *
23
23
  * If the embedder is not configured, the `text` field is still registered
24
24
  * (so the schema is stable) but will return a clear error at execution time.
25
+ *
26
+ * If the embedder returns null (e.g. quota exceeded when the metering
27
+ * plugin is loaded), the text field is silently removed — the query
28
+ * continues with text-only search as a graceful fallback.
25
29
  */
26
30
  import type { GraphileConfig } from 'graphile-config';
27
31
  declare global {
@@ -22,6 +22,10 @@
22
22
  *
23
23
  * If the embedder is not configured, the `text` field is still registered
24
24
  * (so the schema is stable) but will return a clear error at execution time.
25
+ *
26
+ * If the embedder returns null (e.g. quota exceeded when the metering
27
+ * plugin is loaded), the text field is silently removed — the query
28
+ * continues with text-only search as a graceful fallback.
25
29
  */
26
30
  /**
27
31
  * Check if a codec has any pgvector `vector` columns.
@@ -38,6 +42,9 @@ function hasVectorColumns(pgCodec) {
38
42
  /**
39
43
  * Recursively walk a `where` argument object and embed any VectorNearbyInput
40
44
  * values that have `text` instead of `vector`.
45
+ *
46
+ * If the embedder returns null (e.g. quota exceeded), the text field is
47
+ * removed so the pgvector filter is skipped — graceful text-only fallback.
41
48
  */
42
49
  async function embedTextInWhere(obj, embedder) {
43
50
  if (!obj || typeof obj !== 'object')
@@ -53,6 +60,11 @@ async function embedTextInWhere(obj, embedder) {
53
60
  const startTime = Date.now();
54
61
  const vector = await embedder(value.text);
55
62
  const latencyMs = Date.now() - startTime;
63
+ if (vector === null) {
64
+ // Embedder returned null (e.g. quota exceeded) — skip vector search
65
+ delete value.text;
66
+ return;
67
+ }
56
68
  console.log(`[graphile-llm] Search embed: field=${key}, dims=${vector.length}, latency=${latencyMs}ms`);
57
69
  // Replace text with vector
58
70
  value.vector = vector;
@@ -85,12 +97,12 @@ async function embedTextInWhere(obj, embedder) {
85
97
  export function createLlmTextSearchPlugin() {
86
98
  return {
87
99
  name: 'LlmTextSearchPlugin',
88
- version: '0.1.0',
100
+ version: '0.2.0',
89
101
  description: 'Adds text-to-vector embedding support on VectorNearbyInput filter fields',
90
102
  after: [
91
103
  'LlmModulePlugin',
92
104
  'UnifiedSearchPlugin',
93
- 'VectorCodecPlugin',
105
+ 'VectorCodecPlugin'
94
106
  ],
95
107
  schema: {
96
108
  hooks: {
@@ -101,18 +113,18 @@ export function createLlmTextSearchPlugin() {
101
113
  * The field is optional — clients provide either `text` or `vector`.
102
114
  */
103
115
  GraphQLInputObjectType_fields(fields, build, context) {
104
- const { scope: { inputObjectTypeName }, } = context;
116
+ const { scope: { inputObjectTypeName } } = context;
105
117
  if (inputObjectTypeName !== 'VectorNearbyInput') {
106
118
  return fields;
107
119
  }
108
- const { graphql: { GraphQLString }, } = build;
120
+ const { graphql: { GraphQLString } } = build;
109
121
  return build.extend(fields, {
110
122
  text: {
111
123
  type: GraphQLString,
112
124
  description: 'Natural language text to embed server-side for similarity search. ' +
113
125
  'Mutually exclusive with `vector` — provide one or the other. ' +
114
- 'Requires the LLM plugin to be configured with an embedding provider.',
115
- },
126
+ 'Requires the LLM plugin to be configured with an embedding provider.'
127
+ }
116
128
  }, 'LlmTextSearchPlugin adding text field to VectorNearbyInput');
117
129
  },
118
130
  /**
@@ -124,7 +136,7 @@ export function createLlmTextSearchPlugin() {
124
136
  * and graphile-bucket-provisioner-plugin.
125
137
  */
126
138
  GraphQLObjectType_fields_field(field, build, context) {
127
- const { scope: { isRootQuery, pgCodec }, } = context;
139
+ const { scope: { isRootQuery, pgCodec } } = context;
128
140
  // Only wrap root query fields on tables with vector columns
129
141
  if (!isRootQuery || !pgCodec || !hasVectorColumns(pgCodec)) {
130
142
  return field;
@@ -146,7 +158,7 @@ export function createLlmTextSearchPlugin() {
146
158
  await embedTextInWhere(args.filter, embedder);
147
159
  }
148
160
  return oldResolve(source, args, graphqlContext, info);
149
- },
161
+ }
150
162
  };
151
163
  },
152
164
  finalize(schema, build) {
@@ -156,8 +168,8 @@ export function createLlmTextSearchPlugin() {
156
168
  'will return errors if used. Configure an embedding provider to enable.');
157
169
  }
158
170
  return schema;
159
- },
160
- },
161
- },
171
+ }
172
+ }
173
+ }
162
174
  };
163
175
  }
package/esm/preset.d.ts CHANGED
@@ -8,7 +8,7 @@
8
8
  * - Resolves an embedder from configuration (llm_module, env vars, or preset options)
9
9
  * - Adds a `text: String` field to `VectorNearbyInput` for text-based vector search
10
10
  * - Adds `{column}Text: String` companion fields on mutation inputs for vector columns
11
- * - Logs token usage to console (metering integration deferred to billing system)
11
+ * - Optionally enables billing/metering via the LlmMeteringPlugin
12
12
  *
13
13
  * This preset is standalone — it is NOT included in ConstructivePreset by default.
14
14
  * Projects that want LLM features opt in by adding it to their preset.
@@ -42,6 +42,26 @@
42
42
  * ],
43
43
  * };
44
44
  * ```
45
+ *
46
+ * @example With billing metering (opt-in, meter slug = model name by default):
47
+ * ```typescript
48
+ * GraphileLlmPreset({
49
+ * defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
50
+ * metering: true,
51
+ * // → embedding calls metered under 'text-embedding-3-small' meter slug
52
+ * // → three-level waterfall: text-embedding-3-small → inference pool → universal
53
+ * })
54
+ * ```
55
+ *
56
+ * @example With custom entity_id resolution (bill per-database):
57
+ * ```typescript
58
+ * GraphileLlmPreset({
59
+ * defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
60
+ * metering: {
61
+ * resolveEntityId: (pgSettings) => pgSettings['jwt.claims.database_id'],
62
+ * },
63
+ * })
64
+ * ```
45
65
  */
46
66
  import type { GraphileConfig } from 'graphile-config';
47
67
  import type { GraphileLlmOptions } from './types';
package/esm/preset.js CHANGED
@@ -8,7 +8,7 @@
8
8
  * - Resolves an embedder from configuration (llm_module, env vars, or preset options)
9
9
  * - Adds a `text: String` field to `VectorNearbyInput` for text-based vector search
10
10
  * - Adds `{column}Text: String` companion fields on mutation inputs for vector columns
11
- * - Logs token usage to console (metering integration deferred to billing system)
11
+ * - Optionally enables billing/metering via the LlmMeteringPlugin
12
12
  *
13
13
  * This preset is standalone — it is NOT included in ConstructivePreset by default.
14
14
  * Projects that want LLM features opt in by adding it to their preset.
@@ -42,11 +42,32 @@
42
42
  * ],
43
43
  * };
44
44
  * ```
45
+ *
46
+ * @example With billing metering (opt-in, meter slug = model name by default):
47
+ * ```typescript
48
+ * GraphileLlmPreset({
49
+ * defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
50
+ * metering: true,
51
+ * // → embedding calls metered under 'text-embedding-3-small' meter slug
52
+ * // → three-level waterfall: text-embedding-3-small → inference pool → universal
53
+ * })
54
+ * ```
55
+ *
56
+ * @example With custom entity_id resolution (bill per-database):
57
+ * ```typescript
58
+ * GraphileLlmPreset({
59
+ * defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
60
+ * metering: {
61
+ * resolveEntityId: (pgSettings) => pgSettings['jwt.claims.database_id'],
62
+ * },
63
+ * })
64
+ * ```
45
65
  */
46
66
  import { createLlmModulePlugin } from './plugins/llm-module-plugin';
47
- import { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
48
- import { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
67
+ import { createLlmMeteringPlugin } from './plugins/metering-plugin';
49
68
  import { createLlmRagPlugin } from './plugins/rag-plugin';
69
+ import { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
70
+ import { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
50
71
  /**
51
72
  * Creates a preset that includes all LLM plugins.
52
73
  *
@@ -54,10 +75,16 @@ import { createLlmRagPlugin } from './plugins/rag-plugin';
54
75
  * @returns A GraphileConfig.Preset to add to your extends array
55
76
  */
56
77
  export function GraphileLlmPreset(options = {}) {
57
- const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, } = options;
78
+ const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, metering } = options;
58
79
  const plugins = [
59
- createLlmModulePlugin(options),
80
+ createLlmModulePlugin(options)
60
81
  ];
82
+ // Metering is opt-in: only loaded when metering is truthy
83
+ // (true, or a MeteringConfig object)
84
+ if (metering) {
85
+ const meteringConfig = metering === true ? {} : metering;
86
+ plugins.push(createLlmMeteringPlugin(meteringConfig));
87
+ }
61
88
  if (enableTextSearch) {
62
89
  plugins.push(createLlmTextSearchPlugin());
63
90
  }
@@ -68,7 +95,7 @@ export function GraphileLlmPreset(options = {}) {
68
95
  plugins.push(createLlmRagPlugin(ragDefaults));
69
96
  }
70
97
  return {
71
- plugins,
98
+ plugins
72
99
  };
73
100
  }
74
101
  export default GraphileLlmPreset;