graphile-llm 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/graphile-llm.test.js +81 -67
- package/chat.js +37 -27
- package/config-cache.js +4 -4
- package/embedder.js +3 -1
- package/env.js +6 -6
- package/esm/__tests__/graphile-llm.test.js +81 -67
- package/esm/chat.js +37 -24
- package/esm/config-cache.js +4 -4
- package/esm/embedder.js +3 -1
- package/esm/env.js +6 -6
- package/esm/index.d.ts +12 -12
- package/esm/index.js +7 -11
- package/esm/metering.d.ts +5 -5
- package/esm/metering.js +60 -66
- package/esm/plugins/agent-discovery-plugin.js +2 -2
- package/esm/plugins/llm-module-plugin.d.ts +1 -1
- package/esm/plugins/llm-module-plugin.js +5 -5
- package/esm/plugins/metering-plugin.js +13 -13
- package/esm/plugins/rag-plugin.js +20 -20
- package/esm/plugins/text-mutation-plugin.js +12 -12
- package/esm/plugins/text-search-plugin.js +10 -10
- package/esm/preset.js +6 -6
- package/esm/types.d.ts +39 -4
- package/index.d.ts +12 -12
- package/index.js +11 -15
- package/metering.d.ts +5 -5
- package/metering.js +60 -66
- package/package.json +3 -3
- package/plugins/agent-discovery-plugin.js +2 -2
- package/plugins/llm-module-plugin.d.ts +1 -1
- package/plugins/llm-module-plugin.js +5 -5
- package/plugins/metering-plugin.js +13 -13
- package/plugins/rag-plugin.js +20 -20
- package/plugins/text-mutation-plugin.js +12 -12
- package/plugins/text-search-plugin.js +10 -10
- package/preset.js +6 -6
- package/types.d.ts +39 -4
package/esm/metering.js
CHANGED
|
@@ -9,9 +9,9 @@
|
|
|
9
9
|
* When the quota check fails, the wrapper returns null (graceful degradation)
|
|
10
10
|
* instead of throwing, so the search pipeline can fall back to text-only.
|
|
11
11
|
*
|
|
12
|
-
* Token counts
|
|
13
|
-
*
|
|
14
|
-
*
|
|
12
|
+
* Token counts:
|
|
13
|
+
* - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
|
|
14
|
+
* - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
|
|
15
15
|
*
|
|
16
16
|
* The billing functions live in the tenant database and are called via the
|
|
17
17
|
* Graphile `withPgClient` callback. Function locations (schema, names) are
|
|
@@ -88,7 +88,7 @@ export async function logInferenceUsage(ctx, entry) {
|
|
|
88
88
|
entry.latencyMs, entry.ragEnabled, entry.chunksRetrieved,
|
|
89
89
|
entry.embeddingModel, entry.embeddingLatencyMs,
|
|
90
90
|
entry.status, entry.errorType,
|
|
91
|
-
entry.rawUsage ? JSON.stringify(entry.rawUsage) : null
|
|
91
|
+
entry.rawUsage ? JSON.stringify(entry.rawUsage) : null
|
|
92
92
|
]);
|
|
93
93
|
});
|
|
94
94
|
}
|
|
@@ -108,31 +108,31 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
108
108
|
const startTime = Date.now();
|
|
109
109
|
// No billing context → just embed without metering
|
|
110
110
|
if (!ctx) {
|
|
111
|
-
const
|
|
111
|
+
const { embedding } = await embedder(text);
|
|
112
112
|
return {
|
|
113
|
-
result,
|
|
113
|
+
result: embedding,
|
|
114
114
|
metered: false,
|
|
115
115
|
quotaExceeded: false,
|
|
116
|
-
latencyMs: Date.now() - startTime
|
|
116
|
+
latencyMs: Date.now() - startTime
|
|
117
117
|
};
|
|
118
118
|
}
|
|
119
119
|
const meterSlug = options.embeddingMeterSlug;
|
|
120
120
|
if (!meterSlug) {
|
|
121
|
-
const
|
|
121
|
+
const { embedding } = await embedder(text);
|
|
122
122
|
return {
|
|
123
|
-
result,
|
|
123
|
+
result: embedding,
|
|
124
124
|
metered: false,
|
|
125
125
|
quotaExceeded: false,
|
|
126
|
-
latencyMs: Date.now() - startTime
|
|
126
|
+
latencyMs: Date.now() - startTime
|
|
127
127
|
};
|
|
128
128
|
}
|
|
129
129
|
if (options.skipMetering) {
|
|
130
|
-
const
|
|
130
|
+
const { embedding } = await embedder(text);
|
|
131
131
|
return {
|
|
132
|
-
result,
|
|
132
|
+
result: embedding,
|
|
133
133
|
metered: false,
|
|
134
134
|
quotaExceeded: false,
|
|
135
|
-
latencyMs: Date.now() - startTime
|
|
135
|
+
latencyMs: Date.now() - startTime
|
|
136
136
|
};
|
|
137
137
|
}
|
|
138
138
|
// Pre-check: can this entity afford this call?
|
|
@@ -146,8 +146,6 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
146
146
|
allowed = true;
|
|
147
147
|
}
|
|
148
148
|
if (!allowed) {
|
|
149
|
-
// Placeholder: replace with actual provider token counts once generateWithUsage() is approved
|
|
150
|
-
const placeholderAmountTokens = Math.ceil(text.length / 4);
|
|
151
149
|
logInferenceUsage(ctx, {
|
|
152
150
|
databaseId: ctx.databaseId,
|
|
153
151
|
entityId: ctx.entityId,
|
|
@@ -156,9 +154,9 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
156
154
|
provider: options.provider ?? null,
|
|
157
155
|
service: 'embedding',
|
|
158
156
|
operation: 'create',
|
|
159
|
-
inputTokens:
|
|
157
|
+
inputTokens: 0,
|
|
160
158
|
outputTokens: 0,
|
|
161
|
-
totalTokens:
|
|
159
|
+
totalTokens: 0,
|
|
162
160
|
cacheReadTokens: null,
|
|
163
161
|
cacheWriteTokens: null,
|
|
164
162
|
latencyMs: Date.now() - startTime,
|
|
@@ -168,26 +166,25 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
168
166
|
embeddingLatencyMs: null,
|
|
169
167
|
status: 'quota_exceeded',
|
|
170
168
|
errorType: null,
|
|
171
|
-
rawUsage: null
|
|
169
|
+
rawUsage: null
|
|
172
170
|
}).catch(() => { });
|
|
173
171
|
return {
|
|
174
172
|
result: null,
|
|
175
173
|
metered: true,
|
|
176
174
|
quotaExceeded: true,
|
|
177
|
-
latencyMs: Date.now() - startTime
|
|
175
|
+
latencyMs: Date.now() - startTime
|
|
178
176
|
};
|
|
179
177
|
}
|
|
180
|
-
// Execute embedding
|
|
181
|
-
const
|
|
178
|
+
// Execute embedding — real token count from provider via EmbeddingResult
|
|
179
|
+
const { embedding, promptTokens } = await embedder(text);
|
|
182
180
|
const latencyMs = Date.now() - startTime;
|
|
183
|
-
// Placeholder: replace with actual provider token counts once generateWithUsage() is approved
|
|
184
|
-
const placeholderAmountTokens = Math.ceil(text.length / 4);
|
|
185
181
|
ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
|
|
186
|
-
await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug,
|
|
182
|
+
await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, promptTokens, {
|
|
187
183
|
request_id: ctx.requestId,
|
|
188
184
|
input_chars: text.length,
|
|
189
|
-
|
|
190
|
-
|
|
185
|
+
prompt_tokens: promptTokens,
|
|
186
|
+
dims: embedding.length,
|
|
187
|
+
latency_ms: latencyMs
|
|
191
188
|
});
|
|
192
189
|
}).catch(() => { });
|
|
193
190
|
// Log to inference usage table
|
|
@@ -199,9 +196,9 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
199
196
|
provider: options.provider ?? null,
|
|
200
197
|
service: 'embedding',
|
|
201
198
|
operation: 'create',
|
|
202
|
-
inputTokens:
|
|
199
|
+
inputTokens: promptTokens,
|
|
203
200
|
outputTokens: 0,
|
|
204
|
-
totalTokens:
|
|
201
|
+
totalTokens: promptTokens,
|
|
205
202
|
cacheReadTokens: null,
|
|
206
203
|
cacheWriteTokens: null,
|
|
207
204
|
latencyMs,
|
|
@@ -211,13 +208,13 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
211
208
|
embeddingLatencyMs: latencyMs,
|
|
212
209
|
status: 'success',
|
|
213
210
|
errorType: null,
|
|
214
|
-
rawUsage:
|
|
211
|
+
rawUsage: { prompt_tokens: promptTokens }
|
|
215
212
|
}).catch(() => { });
|
|
216
213
|
return {
|
|
217
|
-
result,
|
|
214
|
+
result: embedding,
|
|
218
215
|
metered: true,
|
|
219
216
|
quotaExceeded: false,
|
|
220
|
-
latencyMs
|
|
217
|
+
latencyMs
|
|
221
218
|
};
|
|
222
219
|
}
|
|
223
220
|
// ─── Metered Chat ───────────────────────────────────────────────────────────
|
|
@@ -227,31 +224,31 @@ export async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
227
224
|
export async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {}) {
|
|
228
225
|
const startTime = Date.now();
|
|
229
226
|
if (!ctx) {
|
|
230
|
-
const
|
|
227
|
+
const chatResult = await chat(messages, chatOptions);
|
|
231
228
|
return {
|
|
232
|
-
result,
|
|
229
|
+
result: chatResult.content,
|
|
233
230
|
metered: false,
|
|
234
231
|
quotaExceeded: false,
|
|
235
|
-
latencyMs: Date.now() - startTime
|
|
232
|
+
latencyMs: Date.now() - startTime
|
|
236
233
|
};
|
|
237
234
|
}
|
|
238
235
|
const meterSlug = meteringOptions.chatMeterSlug;
|
|
239
236
|
if (!meterSlug) {
|
|
240
|
-
const
|
|
237
|
+
const chatResult = await chat(messages, chatOptions);
|
|
241
238
|
return {
|
|
242
|
-
result,
|
|
239
|
+
result: chatResult.content,
|
|
243
240
|
metered: false,
|
|
244
241
|
quotaExceeded: false,
|
|
245
|
-
latencyMs: Date.now() - startTime
|
|
242
|
+
latencyMs: Date.now() - startTime
|
|
246
243
|
};
|
|
247
244
|
}
|
|
248
245
|
if (meteringOptions.skipMetering) {
|
|
249
|
-
const
|
|
246
|
+
const chatResult = await chat(messages, chatOptions);
|
|
250
247
|
return {
|
|
251
|
-
result,
|
|
248
|
+
result: chatResult.content,
|
|
252
249
|
metered: false,
|
|
253
250
|
quotaExceeded: false,
|
|
254
|
-
latencyMs: Date.now() - startTime
|
|
251
|
+
latencyMs: Date.now() - startTime
|
|
255
252
|
};
|
|
256
253
|
}
|
|
257
254
|
// Pre-check: can this entity afford this call?
|
|
@@ -265,8 +262,7 @@ export async function meteredChat(chat, messages, ctx, chatOptions, meteringOpti
|
|
|
265
262
|
allowed = true;
|
|
266
263
|
}
|
|
267
264
|
if (!allowed) {
|
|
268
|
-
|
|
269
|
-
const placeholderInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
|
|
265
|
+
const estimatedInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
|
|
270
266
|
logInferenceUsage(ctx, {
|
|
271
267
|
databaseId: ctx.databaseId,
|
|
272
268
|
entityId: ctx.entityId,
|
|
@@ -275,9 +271,9 @@ export async function meteredChat(chat, messages, ctx, chatOptions, meteringOpti
|
|
|
275
271
|
provider: meteringOptions.provider ?? null,
|
|
276
272
|
service: 'llm',
|
|
277
273
|
operation: 'chat',
|
|
278
|
-
inputTokens:
|
|
274
|
+
inputTokens: estimatedInputTokens,
|
|
279
275
|
outputTokens: 0,
|
|
280
|
-
totalTokens:
|
|
276
|
+
totalTokens: estimatedInputTokens,
|
|
281
277
|
cacheReadTokens: null,
|
|
282
278
|
cacheWriteTokens: null,
|
|
283
279
|
latencyMs: Date.now() - startTime,
|
|
@@ -287,33 +283,31 @@ export async function meteredChat(chat, messages, ctx, chatOptions, meteringOpti
|
|
|
287
283
|
embeddingLatencyMs: null,
|
|
288
284
|
status: 'quota_exceeded',
|
|
289
285
|
errorType: null,
|
|
290
|
-
rawUsage: null
|
|
286
|
+
rawUsage: null
|
|
291
287
|
}).catch(() => { });
|
|
292
288
|
return {
|
|
293
289
|
result: null,
|
|
294
290
|
metered: true,
|
|
295
291
|
quotaExceeded: true,
|
|
296
|
-
latencyMs: Date.now() - startTime
|
|
292
|
+
latencyMs: Date.now() - startTime
|
|
297
293
|
};
|
|
298
294
|
}
|
|
299
|
-
// Execute chat completion
|
|
300
|
-
const
|
|
295
|
+
// Execute chat completion — returns real token usage from provider
|
|
296
|
+
const chatResult = await chat(messages, chatOptions);
|
|
301
297
|
const latencyMs = Date.now() - startTime;
|
|
302
|
-
|
|
303
|
-
const inputChars = messages.reduce((sum, m) => sum + m.content.length, 0);
|
|
304
|
-
const placeholderInputTokens = Math.ceil(inputChars / 4);
|
|
305
|
-
const placeholderOutputTokens = Math.ceil(result.length / 4);
|
|
306
|
-
const placeholderTotalTokens = placeholderInputTokens + placeholderOutputTokens;
|
|
298
|
+
const usage = chatResult.usage;
|
|
307
299
|
ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
|
|
308
|
-
await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug,
|
|
300
|
+
await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, usage.totalTokens, {
|
|
309
301
|
request_id: ctx.requestId,
|
|
310
|
-
|
|
311
|
-
|
|
302
|
+
input_tokens: usage.input,
|
|
303
|
+
output_tokens: usage.output,
|
|
304
|
+
cache_read_tokens: usage.cacheRead,
|
|
305
|
+
cache_write_tokens: usage.cacheWrite,
|
|
312
306
|
messages_count: messages.length,
|
|
313
|
-
latency_ms: latencyMs
|
|
307
|
+
latency_ms: latencyMs
|
|
314
308
|
});
|
|
315
309
|
}).catch(() => { });
|
|
316
|
-
// Log to inference usage table
|
|
310
|
+
// Log to inference usage table with real provider token counts
|
|
317
311
|
logInferenceUsage(ctx, {
|
|
318
312
|
databaseId: ctx.databaseId,
|
|
319
313
|
entityId: ctx.entityId,
|
|
@@ -322,11 +316,11 @@ export async function meteredChat(chat, messages, ctx, chatOptions, meteringOpti
|
|
|
322
316
|
provider: meteringOptions.provider ?? null,
|
|
323
317
|
service: 'llm',
|
|
324
318
|
operation: 'chat',
|
|
325
|
-
inputTokens:
|
|
326
|
-
outputTokens:
|
|
327
|
-
totalTokens:
|
|
328
|
-
cacheReadTokens: null,
|
|
329
|
-
cacheWriteTokens: null,
|
|
319
|
+
inputTokens: usage.input,
|
|
320
|
+
outputTokens: usage.output,
|
|
321
|
+
totalTokens: usage.totalTokens,
|
|
322
|
+
cacheReadTokens: usage.cacheRead || null,
|
|
323
|
+
cacheWriteTokens: usage.cacheWrite || null,
|
|
330
324
|
latencyMs,
|
|
331
325
|
ragEnabled: false,
|
|
332
326
|
chunksRetrieved: null,
|
|
@@ -334,13 +328,13 @@ export async function meteredChat(chat, messages, ctx, chatOptions, meteringOpti
|
|
|
334
328
|
embeddingLatencyMs: null,
|
|
335
329
|
status: 'success',
|
|
336
330
|
errorType: null,
|
|
337
|
-
rawUsage:
|
|
331
|
+
rawUsage: { reasoning: usage.reasoning }
|
|
338
332
|
}).catch(() => { });
|
|
339
333
|
return {
|
|
340
|
-
result,
|
|
334
|
+
result: chatResult.content,
|
|
341
335
|
metered: true,
|
|
342
336
|
quotaExceeded: false,
|
|
343
|
-
latencyMs
|
|
337
|
+
latencyMs
|
|
344
338
|
};
|
|
345
339
|
}
|
|
346
340
|
// ─── Error Types ────────────────────────────────────────────────────────────
|
|
@@ -12,7 +12,7 @@ import { ModuleConfigCache } from 'graphile-cache';
|
|
|
12
12
|
// ─── Cache ──────────────────────────────────────────────────────────────────
|
|
13
13
|
const agentDiscoveryCache = new ModuleConfigCache({
|
|
14
14
|
name: 'agent-discovery',
|
|
15
|
-
ttlMs: 60_000
|
|
15
|
+
ttlMs: 60_000
|
|
16
16
|
});
|
|
17
17
|
/** Clear all cached discovery results (for testing) */
|
|
18
18
|
export function clearAgentDiscoveryCache() {
|
|
@@ -53,7 +53,7 @@ export async function getAgentDiscovery(pool, dbname) {
|
|
|
53
53
|
: null,
|
|
54
54
|
task: row.task_table_name
|
|
55
55
|
? { schemaName, tableName: row.task_table_name }
|
|
56
|
-
: null
|
|
56
|
+
: null
|
|
57
57
|
};
|
|
58
58
|
}
|
|
59
59
|
}
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
* if loaded (it runs after this plugin and before the consumer plugins).
|
|
22
22
|
*/
|
|
23
23
|
import type { GraphileConfig } from 'graphile-config';
|
|
24
|
-
import type {
|
|
24
|
+
import type { ChatFunction, EmbedderFunction, GraphileLlmOptions } from '../types';
|
|
25
25
|
declare global {
|
|
26
26
|
namespace GraphileBuild {
|
|
27
27
|
interface Build {
|
|
@@ -20,8 +20,8 @@
|
|
|
20
20
|
* The optional LlmMeteringPlugin wraps the embedder with billing integration
|
|
21
21
|
* if loaded (it runs after this plugin and before the consumer plugins).
|
|
22
22
|
*/
|
|
23
|
-
import { buildEmbedder, buildEmbedderFromEnv } from '../embedder';
|
|
24
23
|
import { buildChatCompleter, buildChatCompleterFromEnv } from '../chat';
|
|
24
|
+
import { buildEmbedder, buildEmbedderFromEnv } from '../embedder';
|
|
25
25
|
import { getLlmEnvOptions } from '../env';
|
|
26
26
|
/**
|
|
27
27
|
* Creates the LlmModulePlugin with the given options.
|
|
@@ -81,10 +81,10 @@ export function createLlmModulePlugin(options = {}) {
|
|
|
81
81
|
llmEmbedder: embedder,
|
|
82
82
|
llmChatCompleter: chat,
|
|
83
83
|
llmEmbeddingModel: defaultEmbedder?.model ?? getLlmEnvOptions().embedding.model,
|
|
84
|
-
llmChatModel: defaultChatCompleter?.model ?? getLlmEnvOptions().chat.model
|
|
84
|
+
llmChatModel: defaultChatCompleter?.model ?? getLlmEnvOptions().chat.model
|
|
85
85
|
}, 'LlmModulePlugin adding llmEmbedder, llmChatCompleter, and model names to build');
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
89
|
};
|
|
90
90
|
}
|
|
@@ -31,8 +31,8 @@
|
|
|
31
31
|
* - quota exceeded → embedder returns null
|
|
32
32
|
*/
|
|
33
33
|
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
34
|
-
import { meteredEmbed } from '../metering';
|
|
35
34
|
import { getLlmBillingConfig } from '../config-cache';
|
|
35
|
+
import { meteredEmbed } from '../metering';
|
|
36
36
|
// ─── Request-scoped context via AsyncLocalStorage ───────────────────────────
|
|
37
37
|
const meteringStore = new AsyncLocalStorage();
|
|
38
38
|
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
@@ -72,7 +72,7 @@ async function buildMeteringContext(graphqlContext, resolveEntityId) {
|
|
|
72
72
|
requestId,
|
|
73
73
|
databaseId,
|
|
74
74
|
actorId,
|
|
75
|
-
inferenceLog: inferenceLogConfig
|
|
75
|
+
inferenceLog: inferenceLogConfig
|
|
76
76
|
};
|
|
77
77
|
}
|
|
78
78
|
/**
|
|
@@ -89,10 +89,10 @@ function wrapEmbedderWithMetering(embedder, meteringOptions) {
|
|
|
89
89
|
if (!ctx) {
|
|
90
90
|
// No metering context in scope — call original embedder directly
|
|
91
91
|
const startTime = Date.now();
|
|
92
|
-
const
|
|
92
|
+
const { embedding } = await embedder(text);
|
|
93
93
|
const latencyMs = Date.now() - startTime;
|
|
94
|
-
console.log(`[graphile-llm] Embed (unmetered): dims=${
|
|
95
|
-
return
|
|
94
|
+
console.log(`[graphile-llm] Embed (unmetered): dims=${embedding?.length ?? 0}, latency=${latencyMs}ms`);
|
|
95
|
+
return embedding;
|
|
96
96
|
}
|
|
97
97
|
const result = await meteredEmbed(embedder, text, ctx, meteringOptions);
|
|
98
98
|
if (result.quotaExceeded) {
|
|
@@ -103,7 +103,7 @@ function wrapEmbedderWithMetering(embedder, meteringOptions) {
|
|
|
103
103
|
}
|
|
104
104
|
// ─── Plugin ─────────────────────────────────────────────────────────────────
|
|
105
105
|
export function createLlmMeteringPlugin(meteringConfig = {}) {
|
|
106
|
-
const { embeddingMeterSlug: configEmbeddingSlug, chatMeterSlug: configChatSlug, skipMetering, resolveEntityId = defaultResolveEntityId
|
|
106
|
+
const { embeddingMeterSlug: configEmbeddingSlug, chatMeterSlug: configChatSlug, skipMetering, resolveEntityId = defaultResolveEntityId } = meteringConfig;
|
|
107
107
|
return {
|
|
108
108
|
name: 'LlmMeteringPlugin',
|
|
109
109
|
version: '0.2.0',
|
|
@@ -134,13 +134,13 @@ export function createLlmMeteringPlugin(meteringConfig = {}) {
|
|
|
134
134
|
chatMeterSlug: chatSlug,
|
|
135
135
|
skipMetering,
|
|
136
136
|
embeddingModel: embeddingModel ?? undefined,
|
|
137
|
-
chatModel: chatModel ?? undefined
|
|
137
|
+
chatModel: chatModel ?? undefined
|
|
138
138
|
};
|
|
139
139
|
// Replace the embedder with a metered version.
|
|
140
140
|
// Same signature except it can return null (quota exceeded).
|
|
141
141
|
const meteredEmbedder = wrapEmbedderWithMetering(originalEmbedder, meteringOptions);
|
|
142
142
|
return build.extend(build, {
|
|
143
|
-
llmEmbedder: meteredEmbedder
|
|
143
|
+
llmEmbedder: meteredEmbedder
|
|
144
144
|
}, 'LlmMeteringPlugin replacing llmEmbedder with metered version');
|
|
145
145
|
},
|
|
146
146
|
/**
|
|
@@ -148,7 +148,7 @@ export function createLlmMeteringPlugin(meteringConfig = {}) {
|
|
|
148
148
|
* request-scoped metering context via AsyncLocalStorage.
|
|
149
149
|
*/
|
|
150
150
|
GraphQLObjectType_fields_field(field, build, context) {
|
|
151
|
-
const { scope: { isRootQuery, isRootMutation }
|
|
151
|
+
const { scope: { isRootQuery, isRootMutation } } = context;
|
|
152
152
|
if (!isRootQuery && !isRootMutation)
|
|
153
153
|
return field;
|
|
154
154
|
// Only wrap if we actually replaced the embedder
|
|
@@ -166,10 +166,10 @@ export function createLlmMeteringPlugin(meteringConfig = {}) {
|
|
|
166
166
|
return meteringStore.run(ctx, () => {
|
|
167
167
|
return oldResolve(source, args, graphqlContext, info);
|
|
168
168
|
});
|
|
169
|
-
}
|
|
169
|
+
}
|
|
170
170
|
};
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
174
|
};
|
|
175
175
|
}
|
|
@@ -62,7 +62,7 @@ function parseHasChunksTag(raw, codec) {
|
|
|
62
62
|
parentFkField: parsed.parentFk || 'parent_id',
|
|
63
63
|
parentPkField: parsed.parentPk || 'id',
|
|
64
64
|
embeddingField: parsed.embeddingField || 'embedding',
|
|
65
|
-
contentField: parsed.contentField || 'content'
|
|
65
|
+
contentField: parsed.contentField || 'content'
|
|
66
66
|
};
|
|
67
67
|
}
|
|
68
68
|
/**
|
|
@@ -222,10 +222,10 @@ export function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
222
222
|
minSimilarity: $minSimilarity,
|
|
223
223
|
systemPrompt: $systemPrompt,
|
|
224
224
|
withPgClient: $withPgClient,
|
|
225
|
-
pgSettings: $pgSettings
|
|
225
|
+
pgSettings: $pgSettings
|
|
226
226
|
});
|
|
227
227
|
return lambda($combined, async (input) => {
|
|
228
|
-
const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings
|
|
228
|
+
const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings } = input;
|
|
229
229
|
if (!prompt || typeof prompt !== 'string') {
|
|
230
230
|
throw new Error('RAG_INVALID_PROMPT: prompt is required');
|
|
231
231
|
}
|
|
@@ -244,7 +244,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
244
244
|
const systemPromptTemplate = querySystemPrompt ?? ragDefaults.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
|
|
245
245
|
// Step 1: Embed the prompt
|
|
246
246
|
const startEmbed = Date.now();
|
|
247
|
-
const vector = await embedder(prompt);
|
|
247
|
+
const { embedding: vector } = await embedder(prompt);
|
|
248
248
|
const embedLatency = Date.now() - startEmbed;
|
|
249
249
|
const vectorString = `[${vector.join(',')}]`;
|
|
250
250
|
console.log(`[graphile-llm] RAG embed: dims=${vector.length}, latency=${embedLatency}ms`);
|
|
@@ -260,7 +260,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
260
260
|
content: row.content,
|
|
261
261
|
parent_id: row.parent_id,
|
|
262
262
|
distance: parseFloat(row.distance),
|
|
263
|
-
table_name: table.parentCodecName
|
|
263
|
+
table_name: table.parentCodecName
|
|
264
264
|
});
|
|
265
265
|
}
|
|
266
266
|
}
|
|
@@ -274,31 +274,31 @@ export function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
274
274
|
answer: 'No relevant context found for your query. ' +
|
|
275
275
|
'Try broadening your search or lowering the minimum similarity threshold.',
|
|
276
276
|
sources: [],
|
|
277
|
-
tokensUsed: null
|
|
277
|
+
tokensUsed: null
|
|
278
278
|
};
|
|
279
279
|
}
|
|
280
280
|
// Step 3: Assemble context
|
|
281
281
|
const contextText = assembleContext(topChunks);
|
|
282
282
|
// Step 4: Call chat completion
|
|
283
283
|
const startChat = Date.now();
|
|
284
|
-
const
|
|
284
|
+
const chatResult = await chatCompleter([
|
|
285
285
|
{ role: 'system', content: systemPromptTemplate + contextText },
|
|
286
|
-
{ role: 'user', content: prompt }
|
|
286
|
+
{ role: 'user', content: prompt }
|
|
287
287
|
], {
|
|
288
|
-
maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS
|
|
288
|
+
maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS
|
|
289
289
|
});
|
|
290
290
|
const chatLatency = Date.now() - startChat;
|
|
291
|
-
console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, latency=${chatLatency}ms`);
|
|
291
|
+
console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, tokens=${chatResult.usage.totalTokens}, latency=${chatLatency}ms`);
|
|
292
292
|
// Step 5: Return response
|
|
293
293
|
return {
|
|
294
|
-
answer,
|
|
294
|
+
answer: chatResult.content,
|
|
295
295
|
sources: topChunks.map((chunk) => ({
|
|
296
296
|
content: chunk.content,
|
|
297
297
|
similarity: 1 - chunk.distance,
|
|
298
298
|
tableName: chunk.table_name,
|
|
299
|
-
parentId: chunk.parent_id
|
|
299
|
+
parentId: chunk.parent_id
|
|
300
300
|
})),
|
|
301
|
-
tokensUsed:
|
|
301
|
+
tokensUsed: chatResult.usage.totalTokens
|
|
302
302
|
};
|
|
303
303
|
});
|
|
304
304
|
},
|
|
@@ -313,17 +313,17 @@ export function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
313
313
|
'to use embedText. Set defaultEmbedder in GraphileLlmPreset options.');
|
|
314
314
|
}
|
|
315
315
|
const startTime = Date.now();
|
|
316
|
-
const vector = await embedder(text);
|
|
316
|
+
const { embedding: vector } = await embedder(text);
|
|
317
317
|
const latencyMs = Date.now() - startTime;
|
|
318
318
|
console.log(`[graphile-llm] embedText: dims=${vector.length}, latency=${latencyMs}ms`);
|
|
319
319
|
return {
|
|
320
320
|
vector,
|
|
321
|
-
dimensions: vector.length
|
|
321
|
+
dimensions: vector.length
|
|
322
322
|
};
|
|
323
323
|
});
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
327
|
};
|
|
328
328
|
});
|
|
329
329
|
return {
|
|
@@ -335,7 +335,7 @@ export function createLlmRagPlugin(ragDefaults = {}) {
|
|
|
335
335
|
after: [
|
|
336
336
|
'LlmModulePlugin',
|
|
337
337
|
'UnifiedSearchPlugin',
|
|
338
|
-
'VectorCodecPlugin'
|
|
339
|
-
]
|
|
338
|
+
'VectorCodecPlugin'
|
|
339
|
+
]
|
|
340
340
|
};
|
|
341
341
|
}
|
|
@@ -45,7 +45,7 @@ function getTextToVectorMapping(pgCodec, build) {
|
|
|
45
45
|
if (isVectorCodec(attribute.codec)) {
|
|
46
46
|
const fieldName = build.inflection.attribute({
|
|
47
47
|
codec: pgCodec,
|
|
48
|
-
attributeName
|
|
48
|
+
attributeName
|
|
49
49
|
});
|
|
50
50
|
mapping[`${fieldName}Text`] = fieldName;
|
|
51
51
|
}
|
|
@@ -73,7 +73,7 @@ export function createLlmTextMutationPlugin() {
|
|
|
73
73
|
'PgAttributesPlugin',
|
|
74
74
|
'PgMutationCreatePlugin',
|
|
75
75
|
'PgMutationUpdateDeletePlugin',
|
|
76
|
-
'VectorCodecPlugin'
|
|
76
|
+
'VectorCodecPlugin'
|
|
77
77
|
],
|
|
78
78
|
schema: {
|
|
79
79
|
hooks: {
|
|
@@ -82,12 +82,12 @@ export function createLlmTextMutationPlugin() {
|
|
|
82
82
|
* for tables that have vector columns.
|
|
83
83
|
*/
|
|
84
84
|
GraphQLInputObjectType_fields(fields, build, context) {
|
|
85
|
-
const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec
|
|
85
|
+
const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec } } = context;
|
|
86
86
|
// Only intercept create/update input types for table rows
|
|
87
87
|
if (!pgCodec?.attributes || (!isPgPatch && !isPgBaseInput && !isMutationInput)) {
|
|
88
88
|
return fields;
|
|
89
89
|
}
|
|
90
|
-
const { graphql: { GraphQLString }
|
|
90
|
+
const { graphql: { GraphQLString } } = build;
|
|
91
91
|
// Find vector columns on this table
|
|
92
92
|
const vectorColumns = [];
|
|
93
93
|
for (const [attributeName, attribute] of Object.entries(pgCodec.attributes)) {
|
|
@@ -103,7 +103,7 @@ export function createLlmTextMutationPlugin() {
|
|
|
103
103
|
// Convert snake_case column name to camelCase field name
|
|
104
104
|
const fieldName = build.inflection.attribute({
|
|
105
105
|
codec: pgCodec,
|
|
106
|
-
attributeName: columnName
|
|
106
|
+
attributeName: columnName
|
|
107
107
|
});
|
|
108
108
|
const textFieldName = `${fieldName}Text`;
|
|
109
109
|
newFields = build.extend(newFields, {
|
|
@@ -111,8 +111,8 @@ export function createLlmTextMutationPlugin() {
|
|
|
111
111
|
type: GraphQLString,
|
|
112
112
|
description: `Natural language text to embed server-side into the \`${fieldName}\` vector column. ` +
|
|
113
113
|
`Mutually exclusive with \`${fieldName}\` — provide one or the other. ` +
|
|
114
|
-
'Requires the LLM plugin to be configured with an embedding provider.'
|
|
115
|
-
}
|
|
114
|
+
'Requires the LLM plugin to be configured with an embedding provider.'
|
|
115
|
+
}
|
|
116
116
|
}, `LlmTextMutationPlugin adding ${textFieldName} companion field for vector column '${columnName}'`);
|
|
117
117
|
}
|
|
118
118
|
return newFields;
|
|
@@ -129,7 +129,7 @@ export function createLlmTextMutationPlugin() {
|
|
|
129
129
|
* If the embedder returns null (e.g. quota exceeded), throws an error.
|
|
130
130
|
*/
|
|
131
131
|
GraphQLObjectType_fields_field(field, build, context) {
|
|
132
|
-
const { scope: { isRootMutation, fieldName, pgCodec }
|
|
132
|
+
const { scope: { isRootMutation, fieldName, pgCodec } } = context;
|
|
133
133
|
// Only wrap root mutation fields on tables with attributes
|
|
134
134
|
if (!isRootMutation || !pgCodec || !pgCodec.attributes) {
|
|
135
135
|
return field;
|
|
@@ -192,10 +192,10 @@ export function createLlmTextMutationPlugin() {
|
|
|
192
192
|
}
|
|
193
193
|
await embedTextFields(args);
|
|
194
194
|
return oldResolve(source, args, graphqlContext, info);
|
|
195
|
-
}
|
|
195
|
+
}
|
|
196
196
|
};
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
200
|
};
|
|
201
201
|
}
|