graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/__tests__/graphile-llm.test.js +87 -71
  2. package/chat.d.ts +5 -5
  3. package/chat.js +45 -43
  4. package/config-cache.d.ts +77 -0
  5. package/config-cache.js +148 -0
  6. package/embedder.d.ts +5 -5
  7. package/embedder.js +11 -17
  8. package/env.d.ts +31 -0
  9. package/env.js +52 -0
  10. package/esm/__tests__/graphile-llm.test.js +87 -71
  11. package/esm/chat.d.ts +5 -5
  12. package/esm/chat.js +45 -40
  13. package/esm/config-cache.d.ts +77 -0
  14. package/esm/config-cache.js +143 -0
  15. package/esm/embedder.d.ts +5 -5
  16. package/esm/embedder.js +11 -17
  17. package/esm/env.d.ts +31 -0
  18. package/esm/env.js +49 -0
  19. package/esm/index.d.ts +14 -5
  20. package/esm/index.js +11 -5
  21. package/esm/metering.d.ts +114 -0
  22. package/esm/metering.js +352 -0
  23. package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
  24. package/esm/plugins/agent-discovery-plugin.js +65 -0
  25. package/esm/plugins/llm-module-plugin.d.ts +11 -2
  26. package/esm/plugins/llm-module-plugin.js +15 -7
  27. package/esm/plugins/metering-plugin.d.ts +42 -0
  28. package/esm/plugins/metering-plugin.js +175 -0
  29. package/esm/plugins/rag-plugin.js +20 -20
  30. package/esm/plugins/text-mutation-plugin.d.ts +4 -0
  31. package/esm/plugins/text-mutation-plugin.js +23 -13
  32. package/esm/plugins/text-search-plugin.d.ts +4 -0
  33. package/esm/plugins/text-search-plugin.js +23 -11
  34. package/esm/preset.d.ts +21 -1
  35. package/esm/preset.js +33 -6
  36. package/esm/types.d.ts +86 -10
  37. package/index.d.ts +14 -5
  38. package/index.js +25 -8
  39. package/metering.d.ts +114 -0
  40. package/metering.js +359 -0
  41. package/package.json +15 -15
  42. package/plugins/agent-discovery-plugin.d.ts +29 -0
  43. package/plugins/agent-discovery-plugin.js +69 -0
  44. package/plugins/llm-module-plugin.d.ts +11 -2
  45. package/plugins/llm-module-plugin.js +15 -7
  46. package/plugins/metering-plugin.d.ts +42 -0
  47. package/plugins/metering-plugin.js +178 -0
  48. package/plugins/rag-plugin.js +20 -20
  49. package/plugins/text-mutation-plugin.d.ts +4 -0
  50. package/plugins/text-mutation-plugin.js +23 -13
  51. package/plugins/text-search-plugin.d.ts +4 -0
  52. package/plugins/text-search-plugin.js +23 -11
  53. package/preset.d.ts +21 -1
  54. package/preset.js +33 -6
  55. package/types.d.ts +86 -10
@@ -0,0 +1,178 @@
1
+ "use strict";
2
+ /**
3
+ * LlmMeteringPlugin
4
+ *
5
+ * Opt-in billing integration for graphile-llm. Completely separate from the
6
+ * pure LLM plugins (text-search, text-mutation, rag).
7
+ *
8
+ * **How it works:**
9
+ * 1. At schema build time, replaces `build.llmEmbedder` with a metered wrapper
10
+ * that has the same `(text: string) => Promise<number[]>` signature
11
+ * 2. At request time, wraps every root query/mutation resolver to set up a
12
+ * request-scoped MeteringContext via AsyncLocalStorage
13
+ * 3. When the embedder is called (by any plugin), the wrapper checks
14
+ * AsyncLocalStorage for a metering context and if found, calls
15
+ * check_billing_quota before and record_usage after
16
+ * 4. If quota is exceeded, the wrapper returns null — the calling plugin sees
17
+ * null and handles it (search falls back to text-only, mutations throw)
18
+ *
19
+ * The pure plugins never import metering, config-cache, or billing types.
20
+ * They call the embedder and handle null results — that's it.
21
+ *
22
+ * **Entity ID resolution:**
23
+ * The billing `entity_id` is resolved via a configurable callback.
24
+ * Default: reads `jwt.claims.user_id` from pgSettings. Override via
25
+ * `metering.resolveEntityId` in GraphileLlmPreset options.
26
+ *
27
+ * **Graceful behavior:**
28
+ * - billing_module not provisioned → embedder passes through unmetered
29
+ * - entity_id not available → embedder passes through unmetered
30
+ * - check_billing_quota throws → call is allowed (billing is opt-in)
31
+ * - record_usage throws → call succeeds, recording silently skipped
32
+ * - quota exceeded → embedder returns null
33
+ */
34
+ Object.defineProperty(exports, "__esModule", { value: true });
35
+ exports.createLlmMeteringPlugin = createLlmMeteringPlugin;
36
+ const node_async_hooks_1 = require("node:async_hooks");
37
+ const config_cache_1 = require("../config-cache");
38
+ const metering_1 = require("../metering");
39
+ // ─── Request-scoped context via AsyncLocalStorage ───────────────────────────
40
+ const meteringStore = new node_async_hooks_1.AsyncLocalStorage();
41
+ // ─── Helpers ────────────────────────────────────────────────────────────────
42
+ function defaultResolveEntityId(pgSettings) {
43
+ return pgSettings['jwt.claims.user_id'] ?? null;
44
+ }
45
+ async function buildMeteringContext(graphqlContext, resolveEntityId) {
46
+ const pgSettings = graphqlContext?.pgSettings ?? {};
47
+ const entityId = resolveEntityId(pgSettings);
48
+ const databaseId = pgSettings['jwt.claims.database_id'] ?? null;
49
+ const requestId = pgSettings['request.id'] ?? null;
50
+ const actorId = pgSettings['jwt.claims.user_id'] ?? null;
51
+ if (!entityId || !databaseId)
52
+ return null;
53
+ const withPgClient = graphqlContext?.withPgClient;
54
+ if (!withPgClient)
55
+ return null;
56
+ let billingConfig = null;
57
+ let inferenceLogConfig = null;
58
+ try {
59
+ await withPgClient(pgSettings, async (pgClient) => {
60
+ const entry = await (0, config_cache_1.getLlmBillingConfig)(pgClient, databaseId);
61
+ billingConfig = entry.billing;
62
+ inferenceLogConfig = entry.inferenceLog;
63
+ });
64
+ }
65
+ catch {
66
+ return null;
67
+ }
68
+ if (!billingConfig)
69
+ return null;
70
+ return {
71
+ withPgClient,
72
+ pgSettings,
73
+ billing: billingConfig,
74
+ entityId,
75
+ requestId,
76
+ databaseId,
77
+ actorId,
78
+ inferenceLog: inferenceLogConfig
79
+ };
80
+ }
81
+ /**
82
+ * Wrap an embedder with metering that reads context from AsyncLocalStorage.
83
+ * The returned function has the same signature as the original embedder,
84
+ * so downstream plugins are unaware of billing.
85
+ *
86
+ * When no metering context is in scope, the original embedder is called directly.
87
+ * When quota is exceeded, returns null instead of a vector.
88
+ */
89
+ function wrapEmbedderWithMetering(embedder, meteringOptions) {
90
+ return async (text) => {
91
+ const ctx = meteringStore.getStore();
92
+ if (!ctx) {
93
+ // No metering context in scope — call original embedder directly
94
+ const startTime = Date.now();
95
+ const { embedding } = await embedder(text);
96
+ const latencyMs = Date.now() - startTime;
97
+ console.log(`[graphile-llm] Embed (unmetered): dims=${embedding?.length ?? 0}, latency=${latencyMs}ms`);
98
+ return embedding;
99
+ }
100
+ const result = await (0, metering_1.meteredEmbed)(embedder, text, ctx, meteringOptions);
101
+ if (result.quotaExceeded) {
102
+ return null;
103
+ }
104
+ return result.result;
105
+ };
106
+ }
107
+ // ─── Plugin ─────────────────────────────────────────────────────────────────
108
+ function createLlmMeteringPlugin(meteringConfig = {}) {
109
+ const { embeddingMeterSlug: configEmbeddingSlug, chatMeterSlug: configChatSlug, skipMetering, resolveEntityId = defaultResolveEntityId } = meteringConfig;
110
+ return {
111
+ name: 'LlmMeteringPlugin',
112
+ version: '0.2.0',
113
+ description: 'Wraps LLM embedder/chat with billing quota checks and usage recording',
114
+ after: ['LlmModulePlugin'],
115
+ before: ['LlmTextSearchPlugin', 'LlmTextMutationPlugin', 'LlmRagPlugin'],
116
+ schema: {
117
+ hooks: {
118
+ build(build) {
119
+ const originalEmbedder = build.llmEmbedder;
120
+ if (!originalEmbedder) {
121
+ console.log('[graphile-llm] Metering plugin loaded but no embedder configured — skipping');
122
+ return build;
123
+ }
124
+ // Meter slug = model name by default (three-level waterfall: model → inference → universal)
125
+ const embeddingModel = build.llmEmbeddingModel;
126
+ const chatModel = build.llmChatModel;
127
+ const embeddingSlug = configEmbeddingSlug ?? embeddingModel ?? undefined;
128
+ const chatSlug = configChatSlug ?? chatModel ?? undefined;
129
+ if (embeddingSlug) {
130
+ console.log(`[graphile-llm] Metering enabled — embedding meter: ${embeddingSlug}`);
131
+ }
132
+ else {
133
+ console.log('[graphile-llm] Metering enabled but no embedding model name — usage will not be metered');
134
+ }
135
+ const meteringOptions = {
136
+ embeddingMeterSlug: embeddingSlug,
137
+ chatMeterSlug: chatSlug,
138
+ skipMetering,
139
+ embeddingModel: embeddingModel ?? undefined,
140
+ chatModel: chatModel ?? undefined
141
+ };
142
+ // Replace the embedder with a metered version.
143
+ // Same signature except it can return null (quota exceeded).
144
+ const meteredEmbedder = wrapEmbedderWithMetering(originalEmbedder, meteringOptions);
145
+ return build.extend(build, {
146
+ llmEmbedder: meteredEmbedder
147
+ }, 'LlmMeteringPlugin replacing llmEmbedder with metered version');
148
+ },
149
+ /**
150
+ * Wrap every root query/mutation resolver to establish the
151
+ * request-scoped metering context via AsyncLocalStorage.
152
+ */
153
+ GraphQLObjectType_fields_field(field, build, context) {
154
+ const { scope: { isRootQuery, isRootMutation } } = context;
155
+ if (!isRootQuery && !isRootMutation)
156
+ return field;
157
+ // Only wrap if we actually replaced the embedder
158
+ if (!build.llmEmbedder)
159
+ return field;
160
+ const defaultResolver = (obj) => obj[context.scope.fieldName];
161
+ const { resolve: oldResolve = defaultResolver, ...rest } = field;
162
+ return {
163
+ ...rest,
164
+ async resolve(source, args, graphqlContext, info) {
165
+ // Build the metering context for this request
166
+ const ctx = await buildMeteringContext(graphqlContext, resolveEntityId);
167
+ // Run the original resolver within the AsyncLocalStorage scope
168
+ // so any embedder calls made by downstream plugins pick up the ctx
169
+ return meteringStore.run(ctx, () => {
170
+ return oldResolve(source, args, graphqlContext, info);
171
+ });
172
+ }
173
+ };
174
+ }
175
+ }
176
+ }
177
+ };
178
+ }
@@ -65,7 +65,7 @@ function parseHasChunksTag(raw, codec) {
65
65
  parentFkField: parsed.parentFk || 'parent_id',
66
66
  parentPkField: parsed.parentPk || 'id',
67
67
  embeddingField: parsed.embeddingField || 'embedding',
68
- contentField: parsed.contentField || 'content',
68
+ contentField: parsed.contentField || 'content'
69
69
  };
70
70
  }
71
71
  /**
@@ -225,10 +225,10 @@ function createLlmRagPlugin(ragDefaults = {}) {
225
225
  minSimilarity: $minSimilarity,
226
226
  systemPrompt: $systemPrompt,
227
227
  withPgClient: $withPgClient,
228
- pgSettings: $pgSettings,
228
+ pgSettings: $pgSettings
229
229
  });
230
230
  return (0, grafast_1.lambda)($combined, async (input) => {
231
- const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings, } = input;
231
+ const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings } = input;
232
232
  if (!prompt || typeof prompt !== 'string') {
233
233
  throw new Error('RAG_INVALID_PROMPT: prompt is required');
234
234
  }
@@ -247,7 +247,7 @@ function createLlmRagPlugin(ragDefaults = {}) {
247
247
  const systemPromptTemplate = querySystemPrompt ?? ragDefaults.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
248
248
  // Step 1: Embed the prompt
249
249
  const startEmbed = Date.now();
250
- const vector = await embedder(prompt);
250
+ const { embedding: vector } = await embedder(prompt);
251
251
  const embedLatency = Date.now() - startEmbed;
252
252
  const vectorString = `[${vector.join(',')}]`;
253
253
  console.log(`[graphile-llm] RAG embed: dims=${vector.length}, latency=${embedLatency}ms`);
@@ -263,7 +263,7 @@ function createLlmRagPlugin(ragDefaults = {}) {
263
263
  content: row.content,
264
264
  parent_id: row.parent_id,
265
265
  distance: parseFloat(row.distance),
266
- table_name: table.parentCodecName,
266
+ table_name: table.parentCodecName
267
267
  });
268
268
  }
269
269
  }
@@ -277,31 +277,31 @@ function createLlmRagPlugin(ragDefaults = {}) {
277
277
  answer: 'No relevant context found for your query. ' +
278
278
  'Try broadening your search or lowering the minimum similarity threshold.',
279
279
  sources: [],
280
- tokensUsed: null,
280
+ tokensUsed: null
281
281
  };
282
282
  }
283
283
  // Step 3: Assemble context
284
284
  const contextText = assembleContext(topChunks);
285
285
  // Step 4: Call chat completion
286
286
  const startChat = Date.now();
287
- const answer = await chatCompleter([
287
+ const chatResult = await chatCompleter([
288
288
  { role: 'system', content: systemPromptTemplate + contextText },
289
- { role: 'user', content: prompt },
289
+ { role: 'user', content: prompt }
290
290
  ], {
291
- maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS,
291
+ maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS
292
292
  });
293
293
  const chatLatency = Date.now() - startChat;
294
- console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, latency=${chatLatency}ms`);
294
+ console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, tokens=${chatResult.usage.totalTokens}, latency=${chatLatency}ms`);
295
295
  // Step 5: Return response
296
296
  return {
297
- answer,
297
+ answer: chatResult.content,
298
298
  sources: topChunks.map((chunk) => ({
299
299
  content: chunk.content,
300
300
  similarity: 1 - chunk.distance,
301
301
  tableName: chunk.table_name,
302
- parentId: chunk.parent_id,
302
+ parentId: chunk.parent_id
303
303
  })),
304
- tokensUsed: null, // Deferred to metering system
304
+ tokensUsed: chatResult.usage.totalTokens
305
305
  };
306
306
  });
307
307
  },
@@ -316,17 +316,17 @@ function createLlmRagPlugin(ragDefaults = {}) {
316
316
  'to use embedText. Set defaultEmbedder in GraphileLlmPreset options.');
317
317
  }
318
318
  const startTime = Date.now();
319
- const vector = await embedder(text);
319
+ const { embedding: vector } = await embedder(text);
320
320
  const latencyMs = Date.now() - startTime;
321
321
  console.log(`[graphile-llm] embedText: dims=${vector.length}, latency=${latencyMs}ms`);
322
322
  return {
323
323
  vector,
324
- dimensions: vector.length,
324
+ dimensions: vector.length
325
325
  };
326
326
  });
327
- },
328
- },
329
- },
327
+ }
328
+ }
329
+ }
330
330
  };
331
331
  });
332
332
  return {
@@ -338,7 +338,7 @@ function createLlmRagPlugin(ragDefaults = {}) {
338
338
  after: [
339
339
  'LlmModulePlugin',
340
340
  'UnifiedSearchPlugin',
341
- 'VectorCodecPlugin',
342
- ],
341
+ 'VectorCodecPlugin'
342
+ ]
343
343
  };
344
344
  }
@@ -9,6 +9,10 @@
9
9
  * Example:
10
10
  * mutation { createArticle(input: { embeddingText: "Machine learning concepts" }) }
11
11
  *
12
+ * If the embedder returns null (e.g. quota exceeded when the metering plugin
13
+ * is loaded), the mutation throws an error — unlike search, mutations cannot
14
+ * silently skip writing a vector the user asked for.
15
+ *
12
16
  * This is the mutation counterpart to LlmTextSearchPlugin (which handles
13
17
  * filter/query-side text-to-vector). Together they let clients work entirely
14
18
  * with text/prompts instead of raw float vectors.
@@ -10,6 +10,10 @@
10
10
  * Example:
11
11
  * mutation { createArticle(input: { embeddingText: "Machine learning concepts" }) }
12
12
  *
13
+ * If the embedder returns null (e.g. quota exceeded when the metering plugin
14
+ * is loaded), the mutation throws an error — unlike search, mutations cannot
15
+ * silently skip writing a vector the user asked for.
16
+ *
13
17
  * This is the mutation counterpart to LlmTextSearchPlugin (which handles
14
18
  * filter/query-side text-to-vector). Together they let clients work entirely
15
19
  * with text/prompts instead of raw float vectors.
@@ -44,7 +48,7 @@ function getTextToVectorMapping(pgCodec, build) {
44
48
  if (isVectorCodec(attribute.codec)) {
45
49
  const fieldName = build.inflection.attribute({
46
50
  codec: pgCodec,
47
- attributeName,
51
+ attributeName
48
52
  });
49
53
  mapping[`${fieldName}Text`] = fieldName;
50
54
  }
@@ -64,7 +68,7 @@ function getTextToVectorMapping(pgCodec, build) {
64
68
  function createLlmTextMutationPlugin() {
65
69
  return {
66
70
  name: 'LlmTextMutationPlugin',
67
- version: '0.1.0',
71
+ version: '0.2.0',
68
72
  description: 'Adds text companion fields on mutation inputs for vector columns — ' +
69
73
  'text is embedded server-side before storing',
70
74
  after: [
@@ -72,7 +76,7 @@ function createLlmTextMutationPlugin() {
72
76
  'PgAttributesPlugin',
73
77
  'PgMutationCreatePlugin',
74
78
  'PgMutationUpdateDeletePlugin',
75
- 'VectorCodecPlugin',
79
+ 'VectorCodecPlugin'
76
80
  ],
77
81
  schema: {
78
82
  hooks: {
@@ -81,12 +85,12 @@ function createLlmTextMutationPlugin() {
81
85
  * for tables that have vector columns.
82
86
  */
83
87
  GraphQLInputObjectType_fields(fields, build, context) {
84
- const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec, }, } = context;
88
+ const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec } } = context;
85
89
  // Only intercept create/update input types for table rows
86
90
  if (!pgCodec?.attributes || (!isPgPatch && !isPgBaseInput && !isMutationInput)) {
87
91
  return fields;
88
92
  }
89
- const { graphql: { GraphQLString }, } = build;
93
+ const { graphql: { GraphQLString } } = build;
90
94
  // Find vector columns on this table
91
95
  const vectorColumns = [];
92
96
  for (const [attributeName, attribute] of Object.entries(pgCodec.attributes)) {
@@ -102,7 +106,7 @@ function createLlmTextMutationPlugin() {
102
106
  // Convert snake_case column name to camelCase field name
103
107
  const fieldName = build.inflection.attribute({
104
108
  codec: pgCodec,
105
- attributeName: columnName,
109
+ attributeName: columnName
106
110
  });
107
111
  const textFieldName = `${fieldName}Text`;
108
112
  newFields = build.extend(newFields, {
@@ -110,8 +114,8 @@ function createLlmTextMutationPlugin() {
110
114
  type: GraphQLString,
111
115
  description: `Natural language text to embed server-side into the \`${fieldName}\` vector column. ` +
112
116
  `Mutually exclusive with \`${fieldName}\` — provide one or the other. ` +
113
- 'Requires the LLM plugin to be configured with an embedding provider.',
114
- },
117
+ 'Requires the LLM plugin to be configured with an embedding provider.'
118
+ }
115
119
  }, `LlmTextMutationPlugin adding ${textFieldName} companion field for vector column '${columnName}'`);
116
120
  }
117
121
  return newFields;
@@ -124,9 +128,11 @@ function createLlmTextMutationPlugin() {
124
128
  * Uses the same v4-style resolver wrapping pattern as graphile-upload-plugin
125
129
  * and graphile-bucket-provisioner-plugin. grafserv v5 supports this through
126
130
  * its backwards-compatibility layer.
131
+ *
132
+ * If the embedder returns null (e.g. quota exceeded), throws an error.
127
133
  */
128
134
  GraphQLObjectType_fields_field(field, build, context) {
129
- const { scope: { isRootMutation, fieldName, pgCodec }, } = context;
135
+ const { scope: { isRootMutation, fieldName, pgCodec } } = context;
130
136
  // Only wrap root mutation fields on tables with attributes
131
137
  if (!isRootMutation || !pgCodec || !pgCodec.attributes) {
132
138
  return field;
@@ -166,6 +172,10 @@ function createLlmTextMutationPlugin() {
166
172
  const startTime = Date.now();
167
173
  const vector = await embedder(value);
168
174
  const latencyMs = Date.now() - startTime;
175
+ if (vector === null) {
176
+ throw new Error(`EMBED_QUOTA_EXCEEDED: Cannot embed ${key} — embedding quota exceeded. ` +
177
+ 'Upgrade your plan or wait for the next billing period.');
178
+ }
169
179
  console.log(`[graphile-llm] Mutation embed: field=${key}, dims=${vector.length}, latency=${latencyMs}ms`);
170
180
  // Inject the vector into the corresponding field
171
181
  obj[vectorFieldName] = vector;
@@ -185,10 +195,10 @@ function createLlmTextMutationPlugin() {
185
195
  }
186
196
  await embedTextFields(args);
187
197
  return oldResolve(source, args, graphqlContext, info);
188
- },
198
+ }
189
199
  };
190
- },
191
- },
192
- },
200
+ }
201
+ }
202
+ }
193
203
  };
194
204
  }
@@ -22,6 +22,10 @@
22
22
  *
23
23
  * If the embedder is not configured, the `text` field is still registered
24
24
  * (so the schema is stable) but will return a clear error at execution time.
25
+ *
26
+ * If the embedder returns null (e.g. quota exceeded when the metering
27
+ * plugin is loaded), the text field is silently removed — the query
28
+ * continues with text-only search as a graceful fallback.
25
29
  */
26
30
  import type { GraphileConfig } from 'graphile-config';
27
31
  declare global {
@@ -23,6 +23,10 @@
23
23
  *
24
24
  * If the embedder is not configured, the `text` field is still registered
25
25
  * (so the schema is stable) but will return a clear error at execution time.
26
+ *
27
+ * If the embedder returns null (e.g. quota exceeded when the metering
28
+ * plugin is loaded), the text field is silently removed — the query
29
+ * continues with text-only search as a graceful fallback.
26
30
  */
27
31
  Object.defineProperty(exports, "__esModule", { value: true });
28
32
  exports.createLlmTextSearchPlugin = createLlmTextSearchPlugin;
@@ -41,6 +45,9 @@ function hasVectorColumns(pgCodec) {
41
45
  /**
42
46
  * Recursively walk a `where` argument object and embed any VectorNearbyInput
43
47
  * values that have `text` instead of `vector`.
48
+ *
49
+ * If the embedder returns null (e.g. quota exceeded), the text field is
50
+ * removed so the pgvector filter is skipped — graceful text-only fallback.
44
51
  */
45
52
  async function embedTextInWhere(obj, embedder) {
46
53
  if (!obj || typeof obj !== 'object')
@@ -56,6 +63,11 @@ async function embedTextInWhere(obj, embedder) {
56
63
  const startTime = Date.now();
57
64
  const vector = await embedder(value.text);
58
65
  const latencyMs = Date.now() - startTime;
66
+ if (vector === null) {
67
+ // Embedder returned null (e.g. quota exceeded) — skip vector search
68
+ delete value.text;
69
+ return;
70
+ }
59
71
  console.log(`[graphile-llm] Search embed: field=${key}, dims=${vector.length}, latency=${latencyMs}ms`);
60
72
  // Replace text with vector
61
73
  value.vector = vector;
@@ -88,12 +100,12 @@ async function embedTextInWhere(obj, embedder) {
88
100
  function createLlmTextSearchPlugin() {
89
101
  return {
90
102
  name: 'LlmTextSearchPlugin',
91
- version: '0.1.0',
103
+ version: '0.2.0',
92
104
  description: 'Adds text-to-vector embedding support on VectorNearbyInput filter fields',
93
105
  after: [
94
106
  'LlmModulePlugin',
95
107
  'UnifiedSearchPlugin',
96
- 'VectorCodecPlugin',
108
+ 'VectorCodecPlugin'
97
109
  ],
98
110
  schema: {
99
111
  hooks: {
@@ -104,18 +116,18 @@ function createLlmTextSearchPlugin() {
104
116
  * The field is optional — clients provide either `text` or `vector`.
105
117
  */
106
118
  GraphQLInputObjectType_fields(fields, build, context) {
107
- const { scope: { inputObjectTypeName }, } = context;
119
+ const { scope: { inputObjectTypeName } } = context;
108
120
  if (inputObjectTypeName !== 'VectorNearbyInput') {
109
121
  return fields;
110
122
  }
111
- const { graphql: { GraphQLString }, } = build;
123
+ const { graphql: { GraphQLString } } = build;
112
124
  return build.extend(fields, {
113
125
  text: {
114
126
  type: GraphQLString,
115
127
  description: 'Natural language text to embed server-side for similarity search. ' +
116
128
  'Mutually exclusive with `vector` — provide one or the other. ' +
117
- 'Requires the LLM plugin to be configured with an embedding provider.',
118
- },
129
+ 'Requires the LLM plugin to be configured with an embedding provider.'
130
+ }
119
131
  }, 'LlmTextSearchPlugin adding text field to VectorNearbyInput');
120
132
  },
121
133
  /**
@@ -127,7 +139,7 @@ function createLlmTextSearchPlugin() {
127
139
  * and graphile-bucket-provisioner-plugin.
128
140
  */
129
141
  GraphQLObjectType_fields_field(field, build, context) {
130
- const { scope: { isRootQuery, pgCodec }, } = context;
142
+ const { scope: { isRootQuery, pgCodec } } = context;
131
143
  // Only wrap root query fields on tables with vector columns
132
144
  if (!isRootQuery || !pgCodec || !hasVectorColumns(pgCodec)) {
133
145
  return field;
@@ -149,7 +161,7 @@ function createLlmTextSearchPlugin() {
149
161
  await embedTextInWhere(args.filter, embedder);
150
162
  }
151
163
  return oldResolve(source, args, graphqlContext, info);
152
- },
164
+ }
153
165
  };
154
166
  },
155
167
  finalize(schema, build) {
@@ -159,8 +171,8 @@ function createLlmTextSearchPlugin() {
159
171
  'will return errors if used. Configure an embedding provider to enable.');
160
172
  }
161
173
  return schema;
162
- },
163
- },
164
- },
174
+ }
175
+ }
176
+ }
165
177
  };
166
178
  }
package/preset.d.ts CHANGED
@@ -8,7 +8,7 @@
8
8
  * - Resolves an embedder from configuration (llm_module, env vars, or preset options)
9
9
  * - Adds a `text: String` field to `VectorNearbyInput` for text-based vector search
10
10
  * - Adds `{column}Text: String` companion fields on mutation inputs for vector columns
11
- * - Logs token usage to console (metering integration deferred to billing system)
11
+ * - Optionally enables billing/metering via the LlmMeteringPlugin
12
12
  *
13
13
  * This preset is standalone — it is NOT included in ConstructivePreset by default.
14
14
  * Projects that want LLM features opt in by adding it to their preset.
@@ -42,6 +42,26 @@
42
42
  * ],
43
43
  * };
44
44
  * ```
45
+ *
46
+ * @example With billing metering (opt-in, meter slug = model name by default):
47
+ * ```typescript
48
+ * GraphileLlmPreset({
49
+ * defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
50
+ * metering: true,
51
+ * // → embedding calls metered under 'text-embedding-3-small' meter slug
52
+ * // → three-level waterfall: text-embedding-3-small → inference pool → universal
53
+ * })
54
+ * ```
55
+ *
56
+ * @example With custom entity_id resolution (bill per-database):
57
+ * ```typescript
58
+ * GraphileLlmPreset({
59
+ * defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
60
+ * metering: {
61
+ * resolveEntityId: (pgSettings) => pgSettings['jwt.claims.database_id'],
62
+ * },
63
+ * })
64
+ * ```
45
65
  */
46
66
  import type { GraphileConfig } from 'graphile-config';
47
67
  import type { GraphileLlmOptions } from './types';
package/preset.js CHANGED
@@ -9,7 +9,7 @@
9
9
  * - Resolves an embedder from configuration (llm_module, env vars, or preset options)
10
10
  * - Adds a `text: String` field to `VectorNearbyInput` for text-based vector search
11
11
  * - Adds `{column}Text: String` companion fields on mutation inputs for vector columns
12
- * - Logs token usage to console (metering integration deferred to billing system)
12
+ * - Optionally enables billing/metering via the LlmMeteringPlugin
13
13
  *
14
14
  * This preset is standalone — it is NOT included in ConstructivePreset by default.
15
15
  * Projects that want LLM features opt in by adding it to their preset.
@@ -43,13 +43,34 @@
43
43
  * ],
44
44
  * };
45
45
  * ```
46
+ *
47
+ * @example With billing metering (opt-in, meter slug = model name by default):
48
+ * ```typescript
49
+ * GraphileLlmPreset({
50
+ * defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
51
+ * metering: true,
52
+ * // → embedding calls metered under 'text-embedding-3-small' meter slug
53
+ * // → three-level waterfall: text-embedding-3-small → inference pool → universal
54
+ * })
55
+ * ```
56
+ *
57
+ * @example With custom entity_id resolution (bill per-database):
58
+ * ```typescript
59
+ * GraphileLlmPreset({
60
+ * defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
61
+ * metering: {
62
+ * resolveEntityId: (pgSettings) => pgSettings['jwt.claims.database_id'],
63
+ * },
64
+ * })
65
+ * ```
46
66
  */
47
67
  Object.defineProperty(exports, "__esModule", { value: true });
48
68
  exports.GraphileLlmPreset = GraphileLlmPreset;
49
69
  const llm_module_plugin_1 = require("./plugins/llm-module-plugin");
50
- const text_search_plugin_1 = require("./plugins/text-search-plugin");
51
- const text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
70
+ const metering_plugin_1 = require("./plugins/metering-plugin");
52
71
  const rag_plugin_1 = require("./plugins/rag-plugin");
72
+ const text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
73
+ const text_search_plugin_1 = require("./plugins/text-search-plugin");
53
74
  /**
54
75
  * Creates a preset that includes all LLM plugins.
55
76
  *
@@ -57,10 +78,16 @@ const rag_plugin_1 = require("./plugins/rag-plugin");
57
78
  * @returns A GraphileConfig.Preset to add to your extends array
58
79
  */
59
80
  function GraphileLlmPreset(options = {}) {
60
- const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, } = options;
81
+ const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, metering } = options;
61
82
  const plugins = [
62
- (0, llm_module_plugin_1.createLlmModulePlugin)(options),
83
+ (0, llm_module_plugin_1.createLlmModulePlugin)(options)
63
84
  ];
85
+ // Metering is opt-in: only loaded when metering is truthy
86
+ // (true, or a MeteringConfig object)
87
+ if (metering) {
88
+ const meteringConfig = metering === true ? {} : metering;
89
+ plugins.push((0, metering_plugin_1.createLlmMeteringPlugin)(meteringConfig));
90
+ }
64
91
  if (enableTextSearch) {
65
92
  plugins.push((0, text_search_plugin_1.createLlmTextSearchPlugin)());
66
93
  }
@@ -71,7 +98,7 @@ function GraphileLlmPreset(options = {}) {
71
98
  plugins.push((0, rag_plugin_1.createLlmRagPlugin)(ragDefaults));
72
99
  }
73
100
  return {
74
- plugins,
101
+ plugins
75
102
  };
76
103
  }
77
104
  exports.default = GraphileLlmPreset;