graphile-llm 0.7.3 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/graphile-llm.test.js +87 -71
- package/chat.d.ts +5 -5
- package/chat.js +45 -43
- package/config-cache.d.ts +77 -0
- package/config-cache.js +148 -0
- package/embedder.d.ts +5 -5
- package/embedder.js +11 -17
- package/env.d.ts +31 -0
- package/env.js +52 -0
- package/esm/__tests__/graphile-llm.test.js +87 -71
- package/esm/chat.d.ts +5 -5
- package/esm/chat.js +45 -40
- package/esm/config-cache.d.ts +77 -0
- package/esm/config-cache.js +143 -0
- package/esm/embedder.d.ts +5 -5
- package/esm/embedder.js +11 -17
- package/esm/env.d.ts +31 -0
- package/esm/env.js +49 -0
- package/esm/index.d.ts +14 -5
- package/esm/index.js +11 -5
- package/esm/metering.d.ts +114 -0
- package/esm/metering.js +352 -0
- package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
- package/esm/plugins/agent-discovery-plugin.js +65 -0
- package/esm/plugins/llm-module-plugin.d.ts +11 -2
- package/esm/plugins/llm-module-plugin.js +15 -7
- package/esm/plugins/metering-plugin.d.ts +42 -0
- package/esm/plugins/metering-plugin.js +175 -0
- package/esm/plugins/rag-plugin.js +20 -20
- package/esm/plugins/text-mutation-plugin.d.ts +4 -0
- package/esm/plugins/text-mutation-plugin.js +23 -13
- package/esm/plugins/text-search-plugin.d.ts +4 -0
- package/esm/plugins/text-search-plugin.js +23 -11
- package/esm/preset.d.ts +21 -1
- package/esm/preset.js +33 -6
- package/esm/types.d.ts +86 -10
- package/index.d.ts +14 -5
- package/index.js +25 -8
- package/metering.d.ts +114 -0
- package/metering.js +359 -0
- package/package.json +15 -15
- package/plugins/agent-discovery-plugin.d.ts +29 -0
- package/plugins/agent-discovery-plugin.js +69 -0
- package/plugins/llm-module-plugin.d.ts +11 -2
- package/plugins/llm-module-plugin.js +15 -7
- package/plugins/metering-plugin.d.ts +42 -0
- package/plugins/metering-plugin.js +178 -0
- package/plugins/rag-plugin.js +20 -20
- package/plugins/text-mutation-plugin.d.ts +4 -0
- package/plugins/text-mutation-plugin.js +23 -13
- package/plugins/text-search-plugin.d.ts +4 -0
- package/plugins/text-search-plugin.js +23 -11
- package/preset.d.ts +21 -1
- package/preset.js +33 -6
- package/types.d.ts +86 -10
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* LlmMeteringPlugin
|
|
4
|
+
*
|
|
5
|
+
* Opt-in billing integration for graphile-llm. Completely separate from the
|
|
6
|
+
* pure LLM plugins (text-search, text-mutation, rag).
|
|
7
|
+
*
|
|
8
|
+
* **How it works:**
|
|
9
|
+
* 1. At schema build time, replaces `build.llmEmbedder` with a metered wrapper
|
|
10
|
+
* that has the same `(text: string) => Promise<number[]>` signature
|
|
11
|
+
* 2. At request time, wraps every root query/mutation resolver to set up a
|
|
12
|
+
* request-scoped MeteringContext via AsyncLocalStorage
|
|
13
|
+
* 3. When the embedder is called (by any plugin), the wrapper checks
|
|
14
|
+
* AsyncLocalStorage for a metering context and if found, calls
|
|
15
|
+
* check_billing_quota before and record_usage after
|
|
16
|
+
* 4. If quota is exceeded, the wrapper returns null — the calling plugin sees
|
|
17
|
+
* null and handles it (search falls back to text-only, mutations throw)
|
|
18
|
+
*
|
|
19
|
+
* The pure plugins never import metering, config-cache, or billing types.
|
|
20
|
+
* They call the embedder and handle null results — that's it.
|
|
21
|
+
*
|
|
22
|
+
* **Entity ID resolution:**
|
|
23
|
+
* The billing `entity_id` is resolved via a configurable callback.
|
|
24
|
+
* Default: reads `jwt.claims.user_id` from pgSettings. Override via
|
|
25
|
+
* `metering.resolveEntityId` in GraphileLlmPreset options.
|
|
26
|
+
*
|
|
27
|
+
* **Graceful behavior:**
|
|
28
|
+
* - billing_module not provisioned → embedder passes through unmetered
|
|
29
|
+
* - entity_id not available → embedder passes through unmetered
|
|
30
|
+
* - check_billing_quota throws → call is allowed (billing is opt-in)
|
|
31
|
+
* - record_usage throws → call succeeds, recording silently skipped
|
|
32
|
+
* - quota exceeded → embedder returns null
|
|
33
|
+
*/
|
|
34
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
35
|
+
exports.createLlmMeteringPlugin = createLlmMeteringPlugin;
|
|
36
|
+
const node_async_hooks_1 = require("node:async_hooks");
|
|
37
|
+
const config_cache_1 = require("../config-cache");
|
|
38
|
+
const metering_1 = require("../metering");
|
|
39
|
+
// ─── Request-scoped context via AsyncLocalStorage ───────────────────────────
|
|
40
|
+
const meteringStore = new node_async_hooks_1.AsyncLocalStorage();
|
|
41
|
+
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
42
|
+
function defaultResolveEntityId(pgSettings) {
|
|
43
|
+
return pgSettings['jwt.claims.user_id'] ?? null;
|
|
44
|
+
}
|
|
45
|
+
async function buildMeteringContext(graphqlContext, resolveEntityId) {
|
|
46
|
+
const pgSettings = graphqlContext?.pgSettings ?? {};
|
|
47
|
+
const entityId = resolveEntityId(pgSettings);
|
|
48
|
+
const databaseId = pgSettings['jwt.claims.database_id'] ?? null;
|
|
49
|
+
const requestId = pgSettings['request.id'] ?? null;
|
|
50
|
+
const actorId = pgSettings['jwt.claims.user_id'] ?? null;
|
|
51
|
+
if (!entityId || !databaseId)
|
|
52
|
+
return null;
|
|
53
|
+
const withPgClient = graphqlContext?.withPgClient;
|
|
54
|
+
if (!withPgClient)
|
|
55
|
+
return null;
|
|
56
|
+
let billingConfig = null;
|
|
57
|
+
let inferenceLogConfig = null;
|
|
58
|
+
try {
|
|
59
|
+
await withPgClient(pgSettings, async (pgClient) => {
|
|
60
|
+
const entry = await (0, config_cache_1.getLlmBillingConfig)(pgClient, databaseId);
|
|
61
|
+
billingConfig = entry.billing;
|
|
62
|
+
inferenceLogConfig = entry.inferenceLog;
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
if (!billingConfig)
|
|
69
|
+
return null;
|
|
70
|
+
return {
|
|
71
|
+
withPgClient,
|
|
72
|
+
pgSettings,
|
|
73
|
+
billing: billingConfig,
|
|
74
|
+
entityId,
|
|
75
|
+
requestId,
|
|
76
|
+
databaseId,
|
|
77
|
+
actorId,
|
|
78
|
+
inferenceLog: inferenceLogConfig
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Wrap an embedder with metering that reads context from AsyncLocalStorage.
|
|
83
|
+
* The returned function has the same signature as the original embedder,
|
|
84
|
+
* so downstream plugins are unaware of billing.
|
|
85
|
+
*
|
|
86
|
+
* When no metering context is in scope, the original embedder is called directly.
|
|
87
|
+
* When quota is exceeded, returns null instead of a vector.
|
|
88
|
+
*/
|
|
89
|
+
function wrapEmbedderWithMetering(embedder, meteringOptions) {
|
|
90
|
+
return async (text) => {
|
|
91
|
+
const ctx = meteringStore.getStore();
|
|
92
|
+
if (!ctx) {
|
|
93
|
+
// No metering context in scope — call original embedder directly
|
|
94
|
+
const startTime = Date.now();
|
|
95
|
+
const { embedding } = await embedder(text);
|
|
96
|
+
const latencyMs = Date.now() - startTime;
|
|
97
|
+
console.log(`[graphile-llm] Embed (unmetered): dims=${embedding?.length ?? 0}, latency=${latencyMs}ms`);
|
|
98
|
+
return embedding;
|
|
99
|
+
}
|
|
100
|
+
const result = await (0, metering_1.meteredEmbed)(embedder, text, ctx, meteringOptions);
|
|
101
|
+
if (result.quotaExceeded) {
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
return result.result;
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
// ─── Plugin ─────────────────────────────────────────────────────────────────
|
|
108
|
+
function createLlmMeteringPlugin(meteringConfig = {}) {
|
|
109
|
+
const { embeddingMeterSlug: configEmbeddingSlug, chatMeterSlug: configChatSlug, skipMetering, resolveEntityId = defaultResolveEntityId } = meteringConfig;
|
|
110
|
+
return {
|
|
111
|
+
name: 'LlmMeteringPlugin',
|
|
112
|
+
version: '0.2.0',
|
|
113
|
+
description: 'Wraps LLM embedder/chat with billing quota checks and usage recording',
|
|
114
|
+
after: ['LlmModulePlugin'],
|
|
115
|
+
before: ['LlmTextSearchPlugin', 'LlmTextMutationPlugin', 'LlmRagPlugin'],
|
|
116
|
+
schema: {
|
|
117
|
+
hooks: {
|
|
118
|
+
build(build) {
|
|
119
|
+
const originalEmbedder = build.llmEmbedder;
|
|
120
|
+
if (!originalEmbedder) {
|
|
121
|
+
console.log('[graphile-llm] Metering plugin loaded but no embedder configured — skipping');
|
|
122
|
+
return build;
|
|
123
|
+
}
|
|
124
|
+
// Meter slug = model name by default (three-level waterfall: model → inference → universal)
|
|
125
|
+
const embeddingModel = build.llmEmbeddingModel;
|
|
126
|
+
const chatModel = build.llmChatModel;
|
|
127
|
+
const embeddingSlug = configEmbeddingSlug ?? embeddingModel ?? undefined;
|
|
128
|
+
const chatSlug = configChatSlug ?? chatModel ?? undefined;
|
|
129
|
+
if (embeddingSlug) {
|
|
130
|
+
console.log(`[graphile-llm] Metering enabled — embedding meter: ${embeddingSlug}`);
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
console.log('[graphile-llm] Metering enabled but no embedding model name — usage will not be metered');
|
|
134
|
+
}
|
|
135
|
+
const meteringOptions = {
|
|
136
|
+
embeddingMeterSlug: embeddingSlug,
|
|
137
|
+
chatMeterSlug: chatSlug,
|
|
138
|
+
skipMetering,
|
|
139
|
+
embeddingModel: embeddingModel ?? undefined,
|
|
140
|
+
chatModel: chatModel ?? undefined
|
|
141
|
+
};
|
|
142
|
+
// Replace the embedder with a metered version.
|
|
143
|
+
// Same signature except it can return null (quota exceeded).
|
|
144
|
+
const meteredEmbedder = wrapEmbedderWithMetering(originalEmbedder, meteringOptions);
|
|
145
|
+
return build.extend(build, {
|
|
146
|
+
llmEmbedder: meteredEmbedder
|
|
147
|
+
}, 'LlmMeteringPlugin replacing llmEmbedder with metered version');
|
|
148
|
+
},
|
|
149
|
+
/**
|
|
150
|
+
* Wrap every root query/mutation resolver to establish the
|
|
151
|
+
* request-scoped metering context via AsyncLocalStorage.
|
|
152
|
+
*/
|
|
153
|
+
GraphQLObjectType_fields_field(field, build, context) {
|
|
154
|
+
const { scope: { isRootQuery, isRootMutation } } = context;
|
|
155
|
+
if (!isRootQuery && !isRootMutation)
|
|
156
|
+
return field;
|
|
157
|
+
// Only wrap if we actually replaced the embedder
|
|
158
|
+
if (!build.llmEmbedder)
|
|
159
|
+
return field;
|
|
160
|
+
const defaultResolver = (obj) => obj[context.scope.fieldName];
|
|
161
|
+
const { resolve: oldResolve = defaultResolver, ...rest } = field;
|
|
162
|
+
return {
|
|
163
|
+
...rest,
|
|
164
|
+
async resolve(source, args, graphqlContext, info) {
|
|
165
|
+
// Build the metering context for this request
|
|
166
|
+
const ctx = await buildMeteringContext(graphqlContext, resolveEntityId);
|
|
167
|
+
// Run the original resolver within the AsyncLocalStorage scope
|
|
168
|
+
// so any embedder calls made by downstream plugins pick up the ctx
|
|
169
|
+
return meteringStore.run(ctx, () => {
|
|
170
|
+
return oldResolve(source, args, graphqlContext, info);
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
}
|
package/plugins/rag-plugin.js
CHANGED
|
@@ -65,7 +65,7 @@ function parseHasChunksTag(raw, codec) {
|
|
|
65
65
|
parentFkField: parsed.parentFk || 'parent_id',
|
|
66
66
|
parentPkField: parsed.parentPk || 'id',
|
|
67
67
|
embeddingField: parsed.embeddingField || 'embedding',
|
|
68
|
-
contentField: parsed.contentField || 'content'
|
|
68
|
+
contentField: parsed.contentField || 'content'
|
|
69
69
|
};
|
|
70
70
|
}
|
|
71
71
|
/**
|
|
@@ -225,10 +225,10 @@ function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
225
225
|
minSimilarity: $minSimilarity,
|
|
226
226
|
systemPrompt: $systemPrompt,
|
|
227
227
|
withPgClient: $withPgClient,
|
|
228
|
-
pgSettings: $pgSettings
|
|
228
|
+
pgSettings: $pgSettings
|
|
229
229
|
});
|
|
230
230
|
return (0, grafast_1.lambda)($combined, async (input) => {
|
|
231
|
-
const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings
|
|
231
|
+
const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings } = input;
|
|
232
232
|
if (!prompt || typeof prompt !== 'string') {
|
|
233
233
|
throw new Error('RAG_INVALID_PROMPT: prompt is required');
|
|
234
234
|
}
|
|
@@ -247,7 +247,7 @@ function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
247
247
|
const systemPromptTemplate = querySystemPrompt ?? ragDefaults.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
|
|
248
248
|
// Step 1: Embed the prompt
|
|
249
249
|
const startEmbed = Date.now();
|
|
250
|
-
const vector = await embedder(prompt);
|
|
250
|
+
const { embedding: vector } = await embedder(prompt);
|
|
251
251
|
const embedLatency = Date.now() - startEmbed;
|
|
252
252
|
const vectorString = `[${vector.join(',')}]`;
|
|
253
253
|
console.log(`[graphile-llm] RAG embed: dims=${vector.length}, latency=${embedLatency}ms`);
|
|
@@ -263,7 +263,7 @@ function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
263
263
|
content: row.content,
|
|
264
264
|
parent_id: row.parent_id,
|
|
265
265
|
distance: parseFloat(row.distance),
|
|
266
|
-
table_name: table.parentCodecName
|
|
266
|
+
table_name: table.parentCodecName
|
|
267
267
|
});
|
|
268
268
|
}
|
|
269
269
|
}
|
|
@@ -277,31 +277,31 @@ function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
277
277
|
answer: 'No relevant context found for your query. ' +
|
|
278
278
|
'Try broadening your search or lowering the minimum similarity threshold.',
|
|
279
279
|
sources: [],
|
|
280
|
-
tokensUsed: null
|
|
280
|
+
tokensUsed: null
|
|
281
281
|
};
|
|
282
282
|
}
|
|
283
283
|
// Step 3: Assemble context
|
|
284
284
|
const contextText = assembleContext(topChunks);
|
|
285
285
|
// Step 4: Call chat completion
|
|
286
286
|
const startChat = Date.now();
|
|
287
|
-
const
|
|
287
|
+
const chatResult = await chatCompleter([
|
|
288
288
|
{ role: 'system', content: systemPromptTemplate + contextText },
|
|
289
|
-
{ role: 'user', content: prompt }
|
|
289
|
+
{ role: 'user', content: prompt }
|
|
290
290
|
], {
|
|
291
|
-
maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS
|
|
291
|
+
maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS
|
|
292
292
|
});
|
|
293
293
|
const chatLatency = Date.now() - startChat;
|
|
294
|
-
console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, latency=${chatLatency}ms`);
|
|
294
|
+
console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, tokens=${chatResult.usage.totalTokens}, latency=${chatLatency}ms`);
|
|
295
295
|
// Step 5: Return response
|
|
296
296
|
return {
|
|
297
|
-
answer,
|
|
297
|
+
answer: chatResult.content,
|
|
298
298
|
sources: topChunks.map((chunk) => ({
|
|
299
299
|
content: chunk.content,
|
|
300
300
|
similarity: 1 - chunk.distance,
|
|
301
301
|
tableName: chunk.table_name,
|
|
302
|
-
parentId: chunk.parent_id
|
|
302
|
+
parentId: chunk.parent_id
|
|
303
303
|
})),
|
|
304
|
-
tokensUsed:
|
|
304
|
+
tokensUsed: chatResult.usage.totalTokens
|
|
305
305
|
};
|
|
306
306
|
});
|
|
307
307
|
},
|
|
@@ -316,17 +316,17 @@ function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
316
316
|
'to use embedText. Set defaultEmbedder in GraphileLlmPreset options.');
|
|
317
317
|
}
|
|
318
318
|
const startTime = Date.now();
|
|
319
|
-
const vector = await embedder(text);
|
|
319
|
+
const { embedding: vector } = await embedder(text);
|
|
320
320
|
const latencyMs = Date.now() - startTime;
|
|
321
321
|
console.log(`[graphile-llm] embedText: dims=${vector.length}, latency=${latencyMs}ms`);
|
|
322
322
|
return {
|
|
323
323
|
vector,
|
|
324
|
-
dimensions: vector.length
|
|
324
|
+
dimensions: vector.length
|
|
325
325
|
};
|
|
326
326
|
});
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
330
|
};
|
|
331
331
|
});
|
|
332
332
|
return {
|
|
@@ -338,7 +338,7 @@ function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
338
338
|
after: [
|
|
339
339
|
'LlmModulePlugin',
|
|
340
340
|
'UnifiedSearchPlugin',
|
|
341
|
-
'VectorCodecPlugin'
|
|
342
|
-
]
|
|
341
|
+
'VectorCodecPlugin'
|
|
342
|
+
]
|
|
343
343
|
};
|
|
344
344
|
}
|
|
@@ -9,6 +9,10 @@
|
|
|
9
9
|
* Example:
|
|
10
10
|
* mutation { createArticle(input: { embeddingText: "Machine learning concepts" }) }
|
|
11
11
|
*
|
|
12
|
+
* If the embedder returns null (e.g. quota exceeded when the metering plugin
|
|
13
|
+
* is loaded), the mutation throws an error — unlike search, mutations cannot
|
|
14
|
+
* silently skip writing a vector the user asked for.
|
|
15
|
+
*
|
|
12
16
|
* This is the mutation counterpart to LlmTextSearchPlugin (which handles
|
|
13
17
|
* filter/query-side text-to-vector). Together they let clients work entirely
|
|
14
18
|
* with text/prompts instead of raw float vectors.
|
|
@@ -10,6 +10,10 @@
|
|
|
10
10
|
* Example:
|
|
11
11
|
* mutation { createArticle(input: { embeddingText: "Machine learning concepts" }) }
|
|
12
12
|
*
|
|
13
|
+
* If the embedder returns null (e.g. quota exceeded when the metering plugin
|
|
14
|
+
* is loaded), the mutation throws an error — unlike search, mutations cannot
|
|
15
|
+
* silently skip writing a vector the user asked for.
|
|
16
|
+
*
|
|
13
17
|
* This is the mutation counterpart to LlmTextSearchPlugin (which handles
|
|
14
18
|
* filter/query-side text-to-vector). Together they let clients work entirely
|
|
15
19
|
* with text/prompts instead of raw float vectors.
|
|
@@ -44,7 +48,7 @@ function getTextToVectorMapping(pgCodec, build) {
|
|
|
44
48
|
if (isVectorCodec(attribute.codec)) {
|
|
45
49
|
const fieldName = build.inflection.attribute({
|
|
46
50
|
codec: pgCodec,
|
|
47
|
-
attributeName
|
|
51
|
+
attributeName
|
|
48
52
|
});
|
|
49
53
|
mapping[`${fieldName}Text`] = fieldName;
|
|
50
54
|
}
|
|
@@ -64,7 +68,7 @@ function getTextToVectorMapping(pgCodec, build) {
|
|
|
64
68
|
function createLlmTextMutationPlugin() {
|
|
65
69
|
return {
|
|
66
70
|
name: 'LlmTextMutationPlugin',
|
|
67
|
-
version: '0.
|
|
71
|
+
version: '0.2.0',
|
|
68
72
|
description: 'Adds text companion fields on mutation inputs for vector columns — ' +
|
|
69
73
|
'text is embedded server-side before storing',
|
|
70
74
|
after: [
|
|
@@ -72,7 +76,7 @@ function createLlmTextMutationPlugin() {
|
|
|
72
76
|
'PgAttributesPlugin',
|
|
73
77
|
'PgMutationCreatePlugin',
|
|
74
78
|
'PgMutationUpdateDeletePlugin',
|
|
75
|
-
'VectorCodecPlugin'
|
|
79
|
+
'VectorCodecPlugin'
|
|
76
80
|
],
|
|
77
81
|
schema: {
|
|
78
82
|
hooks: {
|
|
@@ -81,12 +85,12 @@ function createLlmTextMutationPlugin() {
|
|
|
81
85
|
* for tables that have vector columns.
|
|
82
86
|
*/
|
|
83
87
|
GraphQLInputObjectType_fields(fields, build, context) {
|
|
84
|
-
const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec
|
|
88
|
+
const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec } } = context;
|
|
85
89
|
// Only intercept create/update input types for table rows
|
|
86
90
|
if (!pgCodec?.attributes || (!isPgPatch && !isPgBaseInput && !isMutationInput)) {
|
|
87
91
|
return fields;
|
|
88
92
|
}
|
|
89
|
-
const { graphql: { GraphQLString }
|
|
93
|
+
const { graphql: { GraphQLString } } = build;
|
|
90
94
|
// Find vector columns on this table
|
|
91
95
|
const vectorColumns = [];
|
|
92
96
|
for (const [attributeName, attribute] of Object.entries(pgCodec.attributes)) {
|
|
@@ -102,7 +106,7 @@ function createLlmTextMutationPlugin() {
|
|
|
102
106
|
// Convert snake_case column name to camelCase field name
|
|
103
107
|
const fieldName = build.inflection.attribute({
|
|
104
108
|
codec: pgCodec,
|
|
105
|
-
attributeName: columnName
|
|
109
|
+
attributeName: columnName
|
|
106
110
|
});
|
|
107
111
|
const textFieldName = `${fieldName}Text`;
|
|
108
112
|
newFields = build.extend(newFields, {
|
|
@@ -110,8 +114,8 @@ function createLlmTextMutationPlugin() {
|
|
|
110
114
|
type: GraphQLString,
|
|
111
115
|
description: `Natural language text to embed server-side into the \`${fieldName}\` vector column. ` +
|
|
112
116
|
`Mutually exclusive with \`${fieldName}\` — provide one or the other. ` +
|
|
113
|
-
'Requires the LLM plugin to be configured with an embedding provider.'
|
|
114
|
-
}
|
|
117
|
+
'Requires the LLM plugin to be configured with an embedding provider.'
|
|
118
|
+
}
|
|
115
119
|
}, `LlmTextMutationPlugin adding ${textFieldName} companion field for vector column '${columnName}'`);
|
|
116
120
|
}
|
|
117
121
|
return newFields;
|
|
@@ -124,9 +128,11 @@ function createLlmTextMutationPlugin() {
|
|
|
124
128
|
* Uses the same v4-style resolver wrapping pattern as graphile-upload-plugin
|
|
125
129
|
* and graphile-bucket-provisioner-plugin. grafserv v5 supports this through
|
|
126
130
|
* its backwards-compatibility layer.
|
|
131
|
+
*
|
|
132
|
+
* If the embedder returns null (e.g. quota exceeded), throws an error.
|
|
127
133
|
*/
|
|
128
134
|
GraphQLObjectType_fields_field(field, build, context) {
|
|
129
|
-
const { scope: { isRootMutation, fieldName, pgCodec }
|
|
135
|
+
const { scope: { isRootMutation, fieldName, pgCodec } } = context;
|
|
130
136
|
// Only wrap root mutation fields on tables with attributes
|
|
131
137
|
if (!isRootMutation || !pgCodec || !pgCodec.attributes) {
|
|
132
138
|
return field;
|
|
@@ -166,6 +172,10 @@ function createLlmTextMutationPlugin() {
|
|
|
166
172
|
const startTime = Date.now();
|
|
167
173
|
const vector = await embedder(value);
|
|
168
174
|
const latencyMs = Date.now() - startTime;
|
|
175
|
+
if (vector === null) {
|
|
176
|
+
throw new Error(`EMBED_QUOTA_EXCEEDED: Cannot embed ${key} — embedding quota exceeded. ` +
|
|
177
|
+
'Upgrade your plan or wait for the next billing period.');
|
|
178
|
+
}
|
|
169
179
|
console.log(`[graphile-llm] Mutation embed: field=${key}, dims=${vector.length}, latency=${latencyMs}ms`);
|
|
170
180
|
// Inject the vector into the corresponding field
|
|
171
181
|
obj[vectorFieldName] = vector;
|
|
@@ -185,10 +195,10 @@ function createLlmTextMutationPlugin() {
|
|
|
185
195
|
}
|
|
186
196
|
await embedTextFields(args);
|
|
187
197
|
return oldResolve(source, args, graphqlContext, info);
|
|
188
|
-
}
|
|
198
|
+
}
|
|
189
199
|
};
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
193
203
|
};
|
|
194
204
|
}
|
|
@@ -22,6 +22,10 @@
|
|
|
22
22
|
*
|
|
23
23
|
* If the embedder is not configured, the `text` field is still registered
|
|
24
24
|
* (so the schema is stable) but will return a clear error at execution time.
|
|
25
|
+
*
|
|
26
|
+
* If the embedder returns null (e.g. quota exceeded when the metering
|
|
27
|
+
* plugin is loaded), the text field is silently removed — the query
|
|
28
|
+
* continues with text-only search as a graceful fallback.
|
|
25
29
|
*/
|
|
26
30
|
import type { GraphileConfig } from 'graphile-config';
|
|
27
31
|
declare global {
|
|
@@ -23,6 +23,10 @@
|
|
|
23
23
|
*
|
|
24
24
|
* If the embedder is not configured, the `text` field is still registered
|
|
25
25
|
* (so the schema is stable) but will return a clear error at execution time.
|
|
26
|
+
*
|
|
27
|
+
* If the embedder returns null (e.g. quota exceeded when the metering
|
|
28
|
+
* plugin is loaded), the text field is silently removed — the query
|
|
29
|
+
* continues with text-only search as a graceful fallback.
|
|
26
30
|
*/
|
|
27
31
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
28
32
|
exports.createLlmTextSearchPlugin = createLlmTextSearchPlugin;
|
|
@@ -41,6 +45,9 @@ function hasVectorColumns(pgCodec) {
|
|
|
41
45
|
/**
|
|
42
46
|
* Recursively walk a `where` argument object and embed any VectorNearbyInput
|
|
43
47
|
* values that have `text` instead of `vector`.
|
|
48
|
+
*
|
|
49
|
+
* If the embedder returns null (e.g. quota exceeded), the text field is
|
|
50
|
+
* removed so the pgvector filter is skipped — graceful text-only fallback.
|
|
44
51
|
*/
|
|
45
52
|
async function embedTextInWhere(obj, embedder) {
|
|
46
53
|
if (!obj || typeof obj !== 'object')
|
|
@@ -56,6 +63,11 @@ async function embedTextInWhere(obj, embedder) {
|
|
|
56
63
|
const startTime = Date.now();
|
|
57
64
|
const vector = await embedder(value.text);
|
|
58
65
|
const latencyMs = Date.now() - startTime;
|
|
66
|
+
if (vector === null) {
|
|
67
|
+
// Embedder returned null (e.g. quota exceeded) — skip vector search
|
|
68
|
+
delete value.text;
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
59
71
|
console.log(`[graphile-llm] Search embed: field=${key}, dims=${vector.length}, latency=${latencyMs}ms`);
|
|
60
72
|
// Replace text with vector
|
|
61
73
|
value.vector = vector;
|
|
@@ -88,12 +100,12 @@ async function embedTextInWhere(obj, embedder) {
|
|
|
88
100
|
function createLlmTextSearchPlugin() {
|
|
89
101
|
return {
|
|
90
102
|
name: 'LlmTextSearchPlugin',
|
|
91
|
-
version: '0.
|
|
103
|
+
version: '0.2.0',
|
|
92
104
|
description: 'Adds text-to-vector embedding support on VectorNearbyInput filter fields',
|
|
93
105
|
after: [
|
|
94
106
|
'LlmModulePlugin',
|
|
95
107
|
'UnifiedSearchPlugin',
|
|
96
|
-
'VectorCodecPlugin'
|
|
108
|
+
'VectorCodecPlugin'
|
|
97
109
|
],
|
|
98
110
|
schema: {
|
|
99
111
|
hooks: {
|
|
@@ -104,18 +116,18 @@ function createLlmTextSearchPlugin() {
|
|
|
104
116
|
* The field is optional — clients provide either `text` or `vector`.
|
|
105
117
|
*/
|
|
106
118
|
GraphQLInputObjectType_fields(fields, build, context) {
|
|
107
|
-
const { scope: { inputObjectTypeName }
|
|
119
|
+
const { scope: { inputObjectTypeName } } = context;
|
|
108
120
|
if (inputObjectTypeName !== 'VectorNearbyInput') {
|
|
109
121
|
return fields;
|
|
110
122
|
}
|
|
111
|
-
const { graphql: { GraphQLString }
|
|
123
|
+
const { graphql: { GraphQLString } } = build;
|
|
112
124
|
return build.extend(fields, {
|
|
113
125
|
text: {
|
|
114
126
|
type: GraphQLString,
|
|
115
127
|
description: 'Natural language text to embed server-side for similarity search. ' +
|
|
116
128
|
'Mutually exclusive with `vector` — provide one or the other. ' +
|
|
117
|
-
'Requires the LLM plugin to be configured with an embedding provider.'
|
|
118
|
-
}
|
|
129
|
+
'Requires the LLM plugin to be configured with an embedding provider.'
|
|
130
|
+
}
|
|
119
131
|
}, 'LlmTextSearchPlugin adding text field to VectorNearbyInput');
|
|
120
132
|
},
|
|
121
133
|
/**
|
|
@@ -127,7 +139,7 @@ function createLlmTextSearchPlugin() {
|
|
|
127
139
|
* and graphile-bucket-provisioner-plugin.
|
|
128
140
|
*/
|
|
129
141
|
GraphQLObjectType_fields_field(field, build, context) {
|
|
130
|
-
const { scope: { isRootQuery, pgCodec }
|
|
142
|
+
const { scope: { isRootQuery, pgCodec } } = context;
|
|
131
143
|
// Only wrap root query fields on tables with vector columns
|
|
132
144
|
if (!isRootQuery || !pgCodec || !hasVectorColumns(pgCodec)) {
|
|
133
145
|
return field;
|
|
@@ -149,7 +161,7 @@ function createLlmTextSearchPlugin() {
|
|
|
149
161
|
await embedTextInWhere(args.filter, embedder);
|
|
150
162
|
}
|
|
151
163
|
return oldResolve(source, args, graphqlContext, info);
|
|
152
|
-
}
|
|
164
|
+
}
|
|
153
165
|
};
|
|
154
166
|
},
|
|
155
167
|
finalize(schema, build) {
|
|
@@ -159,8 +171,8 @@ function createLlmTextSearchPlugin() {
|
|
|
159
171
|
'will return errors if used. Configure an embedding provider to enable.');
|
|
160
172
|
}
|
|
161
173
|
return schema;
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
165
177
|
};
|
|
166
178
|
}
|
package/preset.d.ts
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* - Resolves an embedder from configuration (llm_module, env vars, or preset options)
|
|
9
9
|
* - Adds a `text: String` field to `VectorNearbyInput` for text-based vector search
|
|
10
10
|
* - Adds `{column}Text: String` companion fields on mutation inputs for vector columns
|
|
11
|
-
* -
|
|
11
|
+
* - Optionally enables billing/metering via the LlmMeteringPlugin
|
|
12
12
|
*
|
|
13
13
|
* This preset is standalone — it is NOT included in ConstructivePreset by default.
|
|
14
14
|
* Projects that want LLM features opt in by adding it to their preset.
|
|
@@ -42,6 +42,26 @@
|
|
|
42
42
|
* ],
|
|
43
43
|
* };
|
|
44
44
|
* ```
|
|
45
|
+
*
|
|
46
|
+
* @example With billing metering (opt-in, meter slug = model name by default):
|
|
47
|
+
* ```typescript
|
|
48
|
+
* GraphileLlmPreset({
|
|
49
|
+
* defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
|
|
50
|
+
* metering: true,
|
|
51
|
+
* // → embedding calls metered under 'text-embedding-3-small' meter slug
|
|
52
|
+
* // → three-level waterfall: text-embedding-3-small → inference pool → universal
|
|
53
|
+
* })
|
|
54
|
+
* ```
|
|
55
|
+
*
|
|
56
|
+
* @example With custom entity_id resolution (bill per-database):
|
|
57
|
+
* ```typescript
|
|
58
|
+
* GraphileLlmPreset({
|
|
59
|
+
* defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
|
|
60
|
+
* metering: {
|
|
61
|
+
* resolveEntityId: (pgSettings) => pgSettings['jwt.claims.database_id'],
|
|
62
|
+
* },
|
|
63
|
+
* })
|
|
64
|
+
* ```
|
|
45
65
|
*/
|
|
46
66
|
import type { GraphileConfig } from 'graphile-config';
|
|
47
67
|
import type { GraphileLlmOptions } from './types';
|
package/preset.js
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* - Resolves an embedder from configuration (llm_module, env vars, or preset options)
|
|
10
10
|
* - Adds a `text: String` field to `VectorNearbyInput` for text-based vector search
|
|
11
11
|
* - Adds `{column}Text: String` companion fields on mutation inputs for vector columns
|
|
12
|
-
* -
|
|
12
|
+
* - Optionally enables billing/metering via the LlmMeteringPlugin
|
|
13
13
|
*
|
|
14
14
|
* This preset is standalone — it is NOT included in ConstructivePreset by default.
|
|
15
15
|
* Projects that want LLM features opt in by adding it to their preset.
|
|
@@ -43,13 +43,34 @@
|
|
|
43
43
|
* ],
|
|
44
44
|
* };
|
|
45
45
|
* ```
|
|
46
|
+
*
|
|
47
|
+
* @example With billing metering (opt-in, meter slug = model name by default):
|
|
48
|
+
* ```typescript
|
|
49
|
+
* GraphileLlmPreset({
|
|
50
|
+
* defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
|
|
51
|
+
* metering: true,
|
|
52
|
+
* // → embedding calls metered under 'text-embedding-3-small' meter slug
|
|
53
|
+
* // → three-level waterfall: text-embedding-3-small → inference pool → universal
|
|
54
|
+
* })
|
|
55
|
+
* ```
|
|
56
|
+
*
|
|
57
|
+
* @example With custom entity_id resolution (bill per-database):
|
|
58
|
+
* ```typescript
|
|
59
|
+
* GraphileLlmPreset({
|
|
60
|
+
* defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
|
|
61
|
+
* metering: {
|
|
62
|
+
* resolveEntityId: (pgSettings) => pgSettings['jwt.claims.database_id'],
|
|
63
|
+
* },
|
|
64
|
+
* })
|
|
65
|
+
* ```
|
|
46
66
|
*/
|
|
47
67
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
48
68
|
exports.GraphileLlmPreset = GraphileLlmPreset;
|
|
49
69
|
const llm_module_plugin_1 = require("./plugins/llm-module-plugin");
|
|
50
|
-
const
|
|
51
|
-
const text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
|
|
70
|
+
const metering_plugin_1 = require("./plugins/metering-plugin");
|
|
52
71
|
const rag_plugin_1 = require("./plugins/rag-plugin");
|
|
72
|
+
const text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
|
|
73
|
+
const text_search_plugin_1 = require("./plugins/text-search-plugin");
|
|
53
74
|
/**
|
|
54
75
|
* Creates a preset that includes all LLM plugins.
|
|
55
76
|
*
|
|
@@ -57,10 +78,16 @@ const rag_plugin_1 = require("./plugins/rag-plugin");
|
|
|
57
78
|
* @returns A GraphileConfig.Preset to add to your extends array
|
|
58
79
|
*/
|
|
59
80
|
function GraphileLlmPreset(options = {}) {
|
|
60
|
-
const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, } = options;
|
|
81
|
+
const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, metering } = options;
|
|
61
82
|
const plugins = [
|
|
62
|
-
(0, llm_module_plugin_1.createLlmModulePlugin)(options)
|
|
83
|
+
(0, llm_module_plugin_1.createLlmModulePlugin)(options)
|
|
63
84
|
];
|
|
85
|
+
// Metering is opt-in: only loaded when metering is truthy
|
|
86
|
+
// (true, or a MeteringConfig object)
|
|
87
|
+
if (metering) {
|
|
88
|
+
const meteringConfig = metering === true ? {} : metering;
|
|
89
|
+
plugins.push((0, metering_plugin_1.createLlmMeteringPlugin)(meteringConfig));
|
|
90
|
+
}
|
|
64
91
|
if (enableTextSearch) {
|
|
65
92
|
plugins.push((0, text_search_plugin_1.createLlmTextSearchPlugin)());
|
|
66
93
|
}
|
|
@@ -71,7 +98,7 @@ function GraphileLlmPreset(options = {}) {
|
|
|
71
98
|
plugins.push((0, rag_plugin_1.createLlmRagPlugin)(ragDefaults));
|
|
72
99
|
}
|
|
73
100
|
return {
|
|
74
|
-
plugins
|
|
101
|
+
plugins
|
|
75
102
|
};
|
|
76
103
|
}
|
|
77
104
|
exports.default = GraphileLlmPreset;
|