npm - graphile-llm - Versions diffs - 0.7.3 → 0.9.0 - Mend

graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/__tests__/graphile-llm.test.js +87 -71
package/chat.d.ts +5 -5
package/chat.js +45 -43
package/config-cache.d.ts +77 -0
package/config-cache.js +148 -0
package/embedder.d.ts +5 -5
package/embedder.js +11 -17
package/env.d.ts +31 -0
package/env.js +52 -0
package/esm/__tests__/graphile-llm.test.js +87 -71
package/esm/chat.d.ts +5 -5
package/esm/chat.js +45 -40
package/esm/config-cache.d.ts +77 -0
package/esm/config-cache.js +143 -0
package/esm/embedder.d.ts +5 -5
package/esm/embedder.js +11 -17
package/esm/env.d.ts +31 -0
package/esm/env.js +49 -0
package/esm/index.d.ts +14 -5
package/esm/index.js +11 -5
package/esm/metering.d.ts +114 -0
package/esm/metering.js +352 -0
package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
package/esm/plugins/agent-discovery-plugin.js +65 -0
package/esm/plugins/llm-module-plugin.d.ts +11 -2
package/esm/plugins/llm-module-plugin.js +15 -7
package/esm/plugins/metering-plugin.d.ts +42 -0
package/esm/plugins/metering-plugin.js +175 -0
package/esm/plugins/rag-plugin.js +20 -20
package/esm/plugins/text-mutation-plugin.d.ts +4 -0
package/esm/plugins/text-mutation-plugin.js +23 -13
package/esm/plugins/text-search-plugin.d.ts +4 -0
package/esm/plugins/text-search-plugin.js +23 -11
package/esm/preset.d.ts +21 -1
package/esm/preset.js +33 -6
package/esm/types.d.ts +86 -10
package/index.d.ts +14 -5
package/index.js +25 -8
package/metering.d.ts +114 -0
package/metering.js +359 -0
package/package.json +15 -15
package/plugins/agent-discovery-plugin.d.ts +29 -0
package/plugins/agent-discovery-plugin.js +69 -0
package/plugins/llm-module-plugin.d.ts +11 -2
package/plugins/llm-module-plugin.js +15 -7
package/plugins/metering-plugin.d.ts +42 -0
package/plugins/metering-plugin.js +178 -0
package/plugins/rag-plugin.js +20 -20
package/plugins/text-mutation-plugin.d.ts +4 -0
package/plugins/text-mutation-plugin.js +23 -13
package/plugins/text-search-plugin.d.ts +4 -0
package/plugins/text-search-plugin.js +23 -11
package/preset.d.ts +21 -1
package/preset.js +33 -6
package/types.d.ts +86 -10

package/esm/plugins/metering-plugin.js ADDED Viewed

@@ -0,0 +1,175 @@
+/**
+ * LlmMeteringPlugin
+ *
+ * Opt-in billing integration for graphile-llm. Completely separate from the
+ * pure LLM plugins (text-search, text-mutation, rag).
+ *
+ * **How it works:**
+ * 1. At schema build time, replaces `build.llmEmbedder` with a metered wrapper
+ *    that has the same `(text: string) => Promise<number[]>` signature
+ * 2. At request time, wraps every root query/mutation resolver to set up a
+ *    request-scoped MeteringContext via AsyncLocalStorage
+ * 3. When the embedder is called (by any plugin), the wrapper checks
+ *    AsyncLocalStorage for a metering context and if found, calls
+ *    check_billing_quota before and record_usage after
+ * 4. If quota is exceeded, the wrapper returns null — the calling plugin sees
+ *    null and handles it (search falls back to text-only, mutations throw)
+ *
+ * The pure plugins never import metering, config-cache, or billing types.
+ * They call the embedder and handle null results — that's it.
+ *
+ * **Entity ID resolution:**
+ * The billing `entity_id` is resolved via a configurable callback.
+ * Default: reads `jwt.claims.user_id` from pgSettings. Override via
+ * `metering.resolveEntityId` in GraphileLlmPreset options.
+ *
+ * **Graceful behavior:**
+ * - billing_module not provisioned → embedder passes through unmetered
+ * - entity_id not available → embedder passes through unmetered
+ * - check_billing_quota throws → call is allowed (billing is opt-in)
+ * - record_usage throws → call succeeds, recording silently skipped
+ * - quota exceeded → embedder returns null
+ */
+import { AsyncLocalStorage } from 'node:async_hooks';
+import { getLlmBillingConfig } from '../config-cache';
+import { meteredEmbed } from '../metering';
+// ─── Request-scoped context via AsyncLocalStorage ───────────────────────────
+const meteringStore = new AsyncLocalStorage();
+// ─── Helpers ────────────────────────────────────────────────────────────────
+function defaultResolveEntityId(pgSettings) {
+    return pgSettings['jwt.claims.user_id'] ?? null;
+}
+async function buildMeteringContext(graphqlContext, resolveEntityId) {
+    const pgSettings = graphqlContext?.pgSettings ?? {};
+    const entityId = resolveEntityId(pgSettings);
+    const databaseId = pgSettings['jwt.claims.database_id'] ?? null;
+    const requestId = pgSettings['request.id'] ?? null;
+    const actorId = pgSettings['jwt.claims.user_id'] ?? null;
+    if (!entityId || !databaseId)
+        return null;
+    const withPgClient = graphqlContext?.withPgClient;
+    if (!withPgClient)
+        return null;
+    let billingConfig = null;
+    let inferenceLogConfig = null;
+    try {
+        await withPgClient(pgSettings, async (pgClient) => {
+            const entry = await getLlmBillingConfig(pgClient, databaseId);
+            billingConfig = entry.billing;
+            inferenceLogConfig = entry.inferenceLog;
+        });
+    }
+    catch {
+        return null;
+    }
+    if (!billingConfig)
+        return null;
+    return {
+        withPgClient,
+        pgSettings,
+        billing: billingConfig,
+        entityId,
+        requestId,
+        databaseId,
+        actorId,
+        inferenceLog: inferenceLogConfig
+    };
+}
+/**
+ * Wrap an embedder with metering that reads context from AsyncLocalStorage.
+ * The returned function has the same signature as the original embedder,
+ * so downstream plugins are unaware of billing.
+ *
+ * When no metering context is in scope, the original embedder is called directly.
+ * When quota is exceeded, returns null instead of a vector.
+ */
+function wrapEmbedderWithMetering(embedder, meteringOptions) {
+    return async (text) => {
+        const ctx = meteringStore.getStore();
+        if (!ctx) {
+            // No metering context in scope — call original embedder directly
+            const startTime = Date.now();
+            const { embedding } = await embedder(text);
+            const latencyMs = Date.now() - startTime;
+            console.log(`[graphile-llm] Embed (unmetered): dims=${embedding?.length ?? 0}, latency=${latencyMs}ms`);
+            return embedding;
+        }
+        const result = await meteredEmbed(embedder, text, ctx, meteringOptions);
+        if (result.quotaExceeded) {
+            return null;
+        }
+        return result.result;
+    };
+}
+// ─── Plugin ─────────────────────────────────────────────────────────────────
+export function createLlmMeteringPlugin(meteringConfig = {}) {
+    const { embeddingMeterSlug: configEmbeddingSlug, chatMeterSlug: configChatSlug, skipMetering, resolveEntityId = defaultResolveEntityId } = meteringConfig;
+    return {
+        name: 'LlmMeteringPlugin',
+        version: '0.2.0',
+        description: 'Wraps LLM embedder/chat with billing quota checks and usage recording',
+        after: ['LlmModulePlugin'],
+        before: ['LlmTextSearchPlugin', 'LlmTextMutationPlugin', 'LlmRagPlugin'],
+        schema: {
+            hooks: {
+                build(build) {
+                    const originalEmbedder = build.llmEmbedder;
+                    if (!originalEmbedder) {
+                        console.log('[graphile-llm] Metering plugin loaded but no embedder configured — skipping');
+                        return build;
+                    }
+                    // Meter slug = model name by default (three-level waterfall: model → inference → universal)
+                    const embeddingModel = build.llmEmbeddingModel;
+                    const chatModel = build.llmChatModel;
+                    const embeddingSlug = configEmbeddingSlug ?? embeddingModel ?? undefined;
+                    const chatSlug = configChatSlug ?? chatModel ?? undefined;
+                    if (embeddingSlug) {
+                        console.log(`[graphile-llm] Metering enabled — embedding meter: ${embeddingSlug}`);
+                    }
+                    else {
+                        console.log('[graphile-llm] Metering enabled but no embedding model name — usage will not be metered');
+                    }
+                    const meteringOptions = {
+                        embeddingMeterSlug: embeddingSlug,
+                        chatMeterSlug: chatSlug,
+                        skipMetering,
+                        embeddingModel: embeddingModel ?? undefined,
+                        chatModel: chatModel ?? undefined
+                    };
+                    // Replace the embedder with a metered version.
+                    // Same signature except it can return null (quota exceeded).
+                    const meteredEmbedder = wrapEmbedderWithMetering(originalEmbedder, meteringOptions);
+                    return build.extend(build, {
+                        llmEmbedder: meteredEmbedder
+                    }, 'LlmMeteringPlugin replacing llmEmbedder with metered version');
+                },
+                /**
+                 * Wrap every root query/mutation resolver to establish the
+                 * request-scoped metering context via AsyncLocalStorage.
+                 */
+                GraphQLObjectType_fields_field(field, build, context) {
+                    const { scope: { isRootQuery, isRootMutation } } = context;
+                    if (!isRootQuery && !isRootMutation)
+                        return field;
+                    // Only wrap if we actually replaced the embedder
+                    if (!build.llmEmbedder)
+                        return field;
+                    const defaultResolver = (obj) => obj[context.scope.fieldName];
+                    const { resolve: oldResolve = defaultResolver, ...rest } = field;
+                    return {
+                        ...rest,
+                        async resolve(source, args, graphqlContext, info) {
+                            // Build the metering context for this request
+                            const ctx = await buildMeteringContext(graphqlContext, resolveEntityId);
+                            // Run the original resolver within the AsyncLocalStorage scope
+                            // so any embedder calls made by downstream plugins pick up the ctx
+                            return meteringStore.run(ctx, () => {
+                                return oldResolve(source, args, graphqlContext, info);
+                            });
+                        }
+                    };
+                }
+            }
+        }
+    };
+}

package/esm/plugins/rag-plugin.js CHANGED Viewed

@@ -62,7 +62,7 @@ function parseHasChunksTag(raw, codec) {
         parentFkField: parsed.parentFk || 'parent_id',
         parentPkField: parsed.parentPk || 'id',
         embeddingField: parsed.embeddingField || 'embedding',
-        contentField: parsed.contentField || 'content',
+        contentField: parsed.contentField || 'content'
     };
 }
 /**
@@ -222,10 +222,10 @@ export function createLlmRagPlugin(ragDefaults = {}) {
                             minSimilarity: $minSimilarity,
                             systemPrompt: $systemPrompt,
                             withPgClient: $withPgClient,
-                            pgSettings: $pgSettings,
+                            pgSettings: $pgSettings
                         });
                         return lambda($combined, async (input) => {
-                            const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings, } = input;
+                            const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings } = input;
                             if (!prompt || typeof prompt !== 'string') {
                                 throw new Error('RAG_INVALID_PROMPT: prompt is required');
                             }
@@ -244,7 +244,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
                             const systemPromptTemplate = querySystemPrompt ?? ragDefaults.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
                             // Step 1: Embed the prompt
                             const startEmbed = Date.now();
-                            const vector = await embedder(prompt);
+                            const { embedding: vector } = await embedder(prompt);
                             const embedLatency = Date.now() - startEmbed;
                             const vectorString = `[${vector.join(',')}]`;
                             console.log(`[graphile-llm] RAG embed: dims=${vector.length}, latency=${embedLatency}ms`);
@@ -260,7 +260,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
                                                 content: row.content,
                                                 parent_id: row.parent_id,
                                                 distance: parseFloat(row.distance),
-                                                table_name: table.parentCodecName,
+                                                table_name: table.parentCodecName
                                             });
                                         }
                                     }
@@ -274,31 +274,31 @@ export function createLlmRagPlugin(ragDefaults = {}) {
                                     answer: 'No relevant context found for your query. ' +
                                         'Try broadening your search or lowering the minimum similarity threshold.',
                                     sources: [],
-                                    tokensUsed: null,
+                                    tokensUsed: null
                                 };
                             }
                             // Step 3: Assemble context
                             const contextText = assembleContext(topChunks);
                             // Step 4: Call chat completion
                             const startChat = Date.now();
-                            const answer = await chatCompleter([
+                            const chatResult = await chatCompleter([
                                 { role: 'system', content: systemPromptTemplate + contextText },
-                                { role: 'user', content: prompt },
+                                { role: 'user', content: prompt }
                             ], {
-                                maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS,
+                                maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS
                             });
                             const chatLatency = Date.now() - startChat;
-                            console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, latency=${chatLatency}ms`);
+                            console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, tokens=${chatResult.usage.totalTokens}, latency=${chatLatency}ms`);
                             // Step 5: Return response
                             return {
-                                answer,
+                                answer: chatResult.content,
                                 sources: topChunks.map((chunk) => ({
                                     content: chunk.content,
                                     similarity: 1 - chunk.distance,
                                     tableName: chunk.table_name,
-                                    parentId: chunk.parent_id,
+                                    parentId: chunk.parent_id
                                 })),
-                                tokensUsed: null, // Deferred to metering system
+                                tokensUsed: chatResult.usage.totalTokens
                             };
                         });
                     },
@@ -313,17 +313,17 @@ export function createLlmRagPlugin(ragDefaults = {}) {
                                     'to use embedText. Set defaultEmbedder in GraphileLlmPreset options.');
                             }
                             const startTime = Date.now();
-                            const vector = await embedder(text);
+                            const { embedding: vector } = await embedder(text);
                             const latencyMs = Date.now() - startTime;
                             console.log(`[graphile-llm] embedText: dims=${vector.length}, latency=${latencyMs}ms`);
                             return {
                                 vector,
-                                dimensions: vector.length,
+                                dimensions: vector.length
                             };
                         });
-                    },
-                },
-            },
+                    }
+                }
+            }
         };
     });
     return {
@@ -335,7 +335,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
         after: [
             'LlmModulePlugin',
             'UnifiedSearchPlugin',
-            'VectorCodecPlugin',
-        ],
+            'VectorCodecPlugin'
+        ]
     };
 }

package/esm/plugins/text-mutation-plugin.d.ts CHANGED Viewed

@@ -9,6 +9,10 @@
  * Example:
  *   mutation { createArticle(input: { embeddingText: "Machine learning concepts" }) }
  *
+ * If the embedder returns null (e.g. quota exceeded when the metering plugin
+ * is loaded), the mutation throws an error — unlike search, mutations cannot
+ * silently skip writing a vector the user asked for.
+ *
  * This is the mutation counterpart to LlmTextSearchPlugin (which handles
  * filter/query-side text-to-vector). Together they let clients work entirely
  * with text/prompts instead of raw float vectors.

package/esm/plugins/text-mutation-plugin.js CHANGED Viewed

@@ -9,6 +9,10 @@
  * Example:
  *   mutation { createArticle(input: { embeddingText: "Machine learning concepts" }) }
  *
+ * If the embedder returns null (e.g. quota exceeded when the metering plugin
+ * is loaded), the mutation throws an error — unlike search, mutations cannot
+ * silently skip writing a vector the user asked for.
+ *
  * This is the mutation counterpart to LlmTextSearchPlugin (which handles
  * filter/query-side text-to-vector). Together they let clients work entirely
  * with text/prompts instead of raw float vectors.
@@ -41,7 +45,7 @@ function getTextToVectorMapping(pgCodec, build) {
         if (isVectorCodec(attribute.codec)) {
             const fieldName = build.inflection.attribute({
                 codec: pgCodec,
-                attributeName,
+                attributeName
             });
             mapping[`${fieldName}Text`] = fieldName;
         }
@@ -61,7 +65,7 @@ function getTextToVectorMapping(pgCodec, build) {
 export function createLlmTextMutationPlugin() {
     return {
         name: 'LlmTextMutationPlugin',
-        version: '0.1.0',
+        version: '0.2.0',
         description: 'Adds text companion fields on mutation inputs for vector columns — ' +
             'text is embedded server-side before storing',
         after: [
@@ -69,7 +73,7 @@ export function createLlmTextMutationPlugin() {
             'PgAttributesPlugin',
             'PgMutationCreatePlugin',
             'PgMutationUpdateDeletePlugin',
-            'VectorCodecPlugin',
+            'VectorCodecPlugin'
         ],
         schema: {
             hooks: {
@@ -78,12 +82,12 @@ export function createLlmTextMutationPlugin() {
                  * for tables that have vector columns.
                  */
                 GraphQLInputObjectType_fields(fields, build, context) {
-                    const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec, }, } = context;
+                    const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec } } = context;
                     // Only intercept create/update input types for table rows
                     if (!pgCodec?.attributes || (!isPgPatch && !isPgBaseInput && !isMutationInput)) {
                         return fields;
                     }
-                    const { graphql: { GraphQLString }, } = build;
+                    const { graphql: { GraphQLString } } = build;
                     // Find vector columns on this table
                     const vectorColumns = [];
                     for (const [attributeName, attribute] of Object.entries(pgCodec.attributes)) {
@@ -99,7 +103,7 @@ export function createLlmTextMutationPlugin() {
                         // Convert snake_case column name to camelCase field name
                         const fieldName = build.inflection.attribute({
                             codec: pgCodec,
-                            attributeName: columnName,
+                            attributeName: columnName
                         });
                         const textFieldName = `${fieldName}Text`;
                         newFields = build.extend(newFields, {
@@ -107,8 +111,8 @@ export function createLlmTextMutationPlugin() {
                                 type: GraphQLString,
                                 description: `Natural language text to embed server-side into the \`${fieldName}\` vector column. ` +
                                     `Mutually exclusive with \`${fieldName}\` — provide one or the other. ` +
-                                    'Requires the LLM plugin to be configured with an embedding provider.',
-                            },
+                                    'Requires the LLM plugin to be configured with an embedding provider.'
+                            }
                         }, `LlmTextMutationPlugin adding ${textFieldName} companion field for vector column '${columnName}'`);
                     }
                     return newFields;
@@ -121,9 +125,11 @@ export function createLlmTextMutationPlugin() {
                  * Uses the same v4-style resolver wrapping pattern as graphile-upload-plugin
                  * and graphile-bucket-provisioner-plugin. grafserv v5 supports this through
                  * its backwards-compatibility layer.
+                 *
+                 * If the embedder returns null (e.g. quota exceeded), throws an error.
                  */
                 GraphQLObjectType_fields_field(field, build, context) {
-                    const { scope: { isRootMutation, fieldName, pgCodec }, } = context;
+                    const { scope: { isRootMutation, fieldName, pgCodec } } = context;
                     // Only wrap root mutation fields on tables with attributes
                     if (!isRootMutation || !pgCodec || !pgCodec.attributes) {
                         return field;
@@ -163,6 +169,10 @@ export function createLlmTextMutationPlugin() {
                                             const startTime = Date.now();
                                             const vector = await embedder(value);
                                             const latencyMs = Date.now() - startTime;
+                                            if (vector === null) {
+                                                throw new Error(`EMBED_QUOTA_EXCEEDED: Cannot embed ${key} — embedding quota exceeded. ` +
+                                                    'Upgrade your plan or wait for the next billing period.');
+                                            }
                                             console.log(`[graphile-llm] Mutation embed: field=${key}, dims=${vector.length}, latency=${latencyMs}ms`);
                                             // Inject the vector into the corresponding field
                                             obj[vectorFieldName] = vector;
@@ -182,10 +192,10 @@ export function createLlmTextMutationPlugin() {
                             }
                             await embedTextFields(args);
                             return oldResolve(source, args, graphqlContext, info);
-                        },
+                        }
                     };
-                },
-            },
-        },
+                }
+            }
+        }
     };
 }

package/esm/plugins/text-search-plugin.d.ts CHANGED Viewed

@@ -22,6 +22,10 @@
  *
  * If the embedder is not configured, the `text` field is still registered
  * (so the schema is stable) but will return a clear error at execution time.
+ *
+ * If the embedder returns null (e.g. quota exceeded when the metering
+ * plugin is loaded), the text field is silently removed — the query
+ * continues with text-only search as a graceful fallback.
  */
 import type { GraphileConfig } from 'graphile-config';
 declare global {

package/esm/plugins/text-search-plugin.js CHANGED Viewed

@@ -22,6 +22,10 @@
  *
  * If the embedder is not configured, the `text` field is still registered
  * (so the schema is stable) but will return a clear error at execution time.
+ *
+ * If the embedder returns null (e.g. quota exceeded when the metering
+ * plugin is loaded), the text field is silently removed — the query
+ * continues with text-only search as a graceful fallback.
  */
 /**
  * Check if a codec has any pgvector `vector` columns.
@@ -38,6 +42,9 @@ function hasVectorColumns(pgCodec) {
 /**
  * Recursively walk a `where` argument object and embed any VectorNearbyInput
  * values that have `text` instead of `vector`.
+ *
+ * If the embedder returns null (e.g. quota exceeded), the text field is
+ * removed so the pgvector filter is skipped — graceful text-only fallback.
  */
 async function embedTextInWhere(obj, embedder) {
     if (!obj || typeof obj !== 'object')
@@ -53,6 +60,11 @@ async function embedTextInWhere(obj, embedder) {
                 const startTime = Date.now();
                 const vector = await embedder(value.text);
                 const latencyMs = Date.now() - startTime;
+                if (vector === null) {
+                    // Embedder returned null (e.g. quota exceeded) — skip vector search
+                    delete value.text;
+                    return;
+                }
                 console.log(`[graphile-llm] Search embed: field=${key}, dims=${vector.length}, latency=${latencyMs}ms`);
                 // Replace text with vector
                 value.vector = vector;
@@ -85,12 +97,12 @@ async function embedTextInWhere(obj, embedder) {
 export function createLlmTextSearchPlugin() {
     return {
         name: 'LlmTextSearchPlugin',
-        version: '0.1.0',
+        version: '0.2.0',
         description: 'Adds text-to-vector embedding support on VectorNearbyInput filter fields',
         after: [
             'LlmModulePlugin',
             'UnifiedSearchPlugin',
-            'VectorCodecPlugin',
+            'VectorCodecPlugin'
         ],
         schema: {
             hooks: {
@@ -101,18 +113,18 @@ export function createLlmTextSearchPlugin() {
                  * The field is optional — clients provide either `text` or `vector`.
                  */
                 GraphQLInputObjectType_fields(fields, build, context) {
-                    const { scope: { inputObjectTypeName }, } = context;
+                    const { scope: { inputObjectTypeName } } = context;
                     if (inputObjectTypeName !== 'VectorNearbyInput') {
                         return fields;
                     }
-                    const { graphql: { GraphQLString }, } = build;
+                    const { graphql: { GraphQLString } } = build;
                     return build.extend(fields, {
                         text: {
                             type: GraphQLString,
                             description: 'Natural language text to embed server-side for similarity search. ' +
                                 'Mutually exclusive with `vector` — provide one or the other. ' +
-                                'Requires the LLM plugin to be configured with an embedding provider.',
-                        },
+                                'Requires the LLM plugin to be configured with an embedding provider.'
+                        }
                     }, 'LlmTextSearchPlugin adding text field to VectorNearbyInput');
                 },
                 /**
@@ -124,7 +136,7 @@ export function createLlmTextSearchPlugin() {
                  * and graphile-bucket-provisioner-plugin.
                  */
                 GraphQLObjectType_fields_field(field, build, context) {
-                    const { scope: { isRootQuery, pgCodec }, } = context;
+                    const { scope: { isRootQuery, pgCodec } } = context;
                     // Only wrap root query fields on tables with vector columns
                     if (!isRootQuery || !pgCodec || !hasVectorColumns(pgCodec)) {
                         return field;
@@ -146,7 +158,7 @@ export function createLlmTextSearchPlugin() {
                                 await embedTextInWhere(args.filter, embedder);
                             }
                             return oldResolve(source, args, graphqlContext, info);
-                        },
+                        }
                     };
                 },
                 finalize(schema, build) {
@@ -156,8 +168,8 @@ export function createLlmTextSearchPlugin() {
                             'will return errors if used. Configure an embedding provider to enable.');
                     }
                     return schema;
-                },
-            },
-        },
+                }
+            }
+        }
     };
 }

package/esm/preset.d.ts CHANGED Viewed

@@ -8,7 +8,7 @@
  * - Resolves an embedder from configuration (llm_module, env vars, or preset options)
  * - Adds a `text: String` field to `VectorNearbyInput` for text-based vector search
  * - Adds `{column}Text: String` companion fields on mutation inputs for vector columns
- * - Logs token usage to console (metering integration deferred to billing system)
+ * - Optionally enables billing/metering via the LlmMeteringPlugin
  *
  * This preset is standalone — it is NOT included in ConstructivePreset by default.
  * Projects that want LLM features opt in by adding it to their preset.
@@ -42,6 +42,26 @@
  *   ],
  * };
  * ```
+ *
+ * @example With billing metering (opt-in, meter slug = model name by default):
+ * ```typescript
+ * GraphileLlmPreset({
+ *   defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
+ *   metering: true,
+ *   // → embedding calls metered under 'text-embedding-3-small' meter slug
+ *   // → three-level waterfall: text-embedding-3-small → inference pool → universal
+ * })
+ * ```
+ *
+ * @example With custom entity_id resolution (bill per-database):
+ * ```typescript
+ * GraphileLlmPreset({
+ *   defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
+ *   metering: {
+ *     resolveEntityId: (pgSettings) => pgSettings['jwt.claims.database_id'],
+ *   },
+ * })
+ * ```
  */
 import type { GraphileConfig } from 'graphile-config';
 import type { GraphileLlmOptions } from './types';

package/esm/preset.js CHANGED Viewed

@@ -8,7 +8,7 @@
  * - Resolves an embedder from configuration (llm_module, env vars, or preset options)
  * - Adds a `text: String` field to `VectorNearbyInput` for text-based vector search
  * - Adds `{column}Text: String` companion fields on mutation inputs for vector columns
- * - Logs token usage to console (metering integration deferred to billing system)
+ * - Optionally enables billing/metering via the LlmMeteringPlugin
  *
  * This preset is standalone — it is NOT included in ConstructivePreset by default.
  * Projects that want LLM features opt in by adding it to their preset.
@@ -42,11 +42,32 @@
  *   ],
  * };
  * ```
+ *
+ * @example With billing metering (opt-in, meter slug = model name by default):
+ * ```typescript
+ * GraphileLlmPreset({
+ *   defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
+ *   metering: true,
+ *   // → embedding calls metered under 'text-embedding-3-small' meter slug
+ *   // → three-level waterfall: text-embedding-3-small → inference pool → universal
+ * })
+ * ```
+ *
+ * @example With custom entity_id resolution (bill per-database):
+ * ```typescript
+ * GraphileLlmPreset({
+ *   defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
+ *   metering: {
+ *     resolveEntityId: (pgSettings) => pgSettings['jwt.claims.database_id'],
+ *   },
+ * })
+ * ```
  */
 import { createLlmModulePlugin } from './plugins/llm-module-plugin';
-import { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
-import { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
+import { createLlmMeteringPlugin } from './plugins/metering-plugin';
 import { createLlmRagPlugin } from './plugins/rag-plugin';
+import { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
+import { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
 /**
  * Creates a preset that includes all LLM plugins.
  *
@@ -54,10 +75,16 @@ import { createLlmRagPlugin } from './plugins/rag-plugin';
  * @returns A GraphileConfig.Preset to add to your extends array
  */
 export function GraphileLlmPreset(options = {}) {
-    const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, } = options;
+    const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, metering } = options;
     const plugins = [
-        createLlmModulePlugin(options),
+        createLlmModulePlugin(options)
     ];
+    // Metering is opt-in: only loaded when metering is truthy
+    // (true, or a MeteringConfig object)
+    if (metering) {
+        const meteringConfig = metering === true ? {} : metering;
+        plugins.push(createLlmMeteringPlugin(meteringConfig));
+    }
     if (enableTextSearch) {
         plugins.push(createLlmTextSearchPlugin());
     }
@@ -68,7 +95,7 @@ export function GraphileLlmPreset(options = {}) {
         plugins.push(createLlmRagPlugin(ragDefaults));
     }
     return {
-        plugins,
+        plugins
     };
 }
 export default GraphileLlmPreset;