graphile-llm 0.7.3 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/graphile-llm.test.js +87 -71
- package/chat.d.ts +5 -5
- package/chat.js +45 -43
- package/config-cache.d.ts +77 -0
- package/config-cache.js +148 -0
- package/embedder.d.ts +5 -5
- package/embedder.js +11 -17
- package/env.d.ts +31 -0
- package/env.js +52 -0
- package/esm/__tests__/graphile-llm.test.js +87 -71
- package/esm/chat.d.ts +5 -5
- package/esm/chat.js +45 -40
- package/esm/config-cache.d.ts +77 -0
- package/esm/config-cache.js +143 -0
- package/esm/embedder.d.ts +5 -5
- package/esm/embedder.js +11 -17
- package/esm/env.d.ts +31 -0
- package/esm/env.js +49 -0
- package/esm/index.d.ts +14 -5
- package/esm/index.js +11 -5
- package/esm/metering.d.ts +114 -0
- package/esm/metering.js +352 -0
- package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
- package/esm/plugins/agent-discovery-plugin.js +65 -0
- package/esm/plugins/llm-module-plugin.d.ts +11 -2
- package/esm/plugins/llm-module-plugin.js +15 -7
- package/esm/plugins/metering-plugin.d.ts +42 -0
- package/esm/plugins/metering-plugin.js +175 -0
- package/esm/plugins/rag-plugin.js +20 -20
- package/esm/plugins/text-mutation-plugin.d.ts +4 -0
- package/esm/plugins/text-mutation-plugin.js +23 -13
- package/esm/plugins/text-search-plugin.d.ts +4 -0
- package/esm/plugins/text-search-plugin.js +23 -11
- package/esm/preset.d.ts +21 -1
- package/esm/preset.js +33 -6
- package/esm/types.d.ts +86 -10
- package/index.d.ts +14 -5
- package/index.js +25 -8
- package/metering.d.ts +114 -0
- package/metering.js +359 -0
- package/package.json +15 -15
- package/plugins/agent-discovery-plugin.d.ts +29 -0
- package/plugins/agent-discovery-plugin.js +69 -0
- package/plugins/llm-module-plugin.d.ts +11 -2
- package/plugins/llm-module-plugin.js +15 -7
- package/plugins/metering-plugin.d.ts +42 -0
- package/plugins/metering-plugin.js +178 -0
- package/plugins/rag-plugin.js +20 -20
- package/plugins/text-mutation-plugin.d.ts +4 -0
- package/plugins/text-mutation-plugin.js +23 -13
- package/plugins/text-search-plugin.d.ts +4 -0
- package/plugins/text-search-plugin.js +23 -11
- package/preset.d.ts +21 -1
- package/preset.js +33 -6
- package/types.d.ts +86 -10
package/esm/metering.js
ADDED
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* metering — Billing-aware wrappers for embedder and chat functions
|
|
3
|
+
*
|
|
4
|
+
* Wraps EmbedderFunction and ChatFunction with:
|
|
5
|
+
* 1. Pre-check: `check_billing_quota(meter_slug, entity_id, estimated_amount)`
|
|
6
|
+
* 2. Execute the underlying function
|
|
7
|
+
* 3. Post-record: `record_usage(meter_slug, entity_id, actual_amount)`
|
|
8
|
+
*
|
|
9
|
+
* When the quota check fails, the wrapper returns null (graceful degradation)
|
|
10
|
+
* instead of throwing, so the search pipeline can fall back to text-only.
|
|
11
|
+
*
|
|
12
|
+
* Token counts:
|
|
13
|
+
* - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
|
|
14
|
+
* - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
|
|
15
|
+
*
|
|
16
|
+
* The billing functions live in the tenant database and are called via the
|
|
17
|
+
* Graphile `withPgClient` callback. Function locations (schema, names) are
|
|
18
|
+
* resolved from `billing_module` metaschema and cached by `config-cache.ts`.
|
|
19
|
+
*/
|
|
20
|
+
// ─── Billing SQL Helpers ────────────────────────────────────────────────────
|
|
21
|
+
/**
|
|
22
|
+
* Check if the entity has sufficient quota for the requested amount.
|
|
23
|
+
* Returns true if the call is allowed, false if quota is exceeded.
|
|
24
|
+
*
|
|
25
|
+
* Gracefully returns true if the billing function doesn't exist or errors —
|
|
26
|
+
* metering is opt-in, so missing infrastructure means "allow".
|
|
27
|
+
*/
|
|
28
|
+
async function checkQuota(pgClient, billing, entityId, meterSlug, amount) {
|
|
29
|
+
try {
|
|
30
|
+
const sql = `SELECT "${billing.privateSchema}"."${billing.checkBillingQuotaFunction}"($1, $2::uuid, $3) AS allowed`;
|
|
31
|
+
const result = await pgClient.query(sql, [meterSlug, entityId, amount]);
|
|
32
|
+
return result.rows[0]?.allowed !== false;
|
|
33
|
+
}
|
|
34
|
+
catch (e) {
|
|
35
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
36
|
+
console.warn(`[graphile-llm] check_billing_quota failed (allowing): ${message}`);
|
|
37
|
+
return true;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Record usage after a successful call.
|
|
42
|
+
* Gracefully skips if the billing function doesn't exist or errors.
|
|
43
|
+
*/
|
|
44
|
+
async function recordUsage(pgClient, billing, entityId, meterSlug, amount, metadata) {
|
|
45
|
+
try {
|
|
46
|
+
const sql = `SELECT "${billing.privateSchema}"."${billing.recordUsageFunction}"($1, $2::uuid, $3, $4::jsonb)`;
|
|
47
|
+
await pgClient.query(sql, [meterSlug, entityId, amount, JSON.stringify(metadata)]);
|
|
48
|
+
}
|
|
49
|
+
catch (e) {
|
|
50
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
51
|
+
console.warn(`[graphile-llm] record_usage failed (non-fatal): ${message}`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Write a row to the usage_log_inference table.
|
|
56
|
+
* Gracefully skips if the inference_log_module is not provisioned.
|
|
57
|
+
*
|
|
58
|
+
* TODO: Also write to child (generated) database when dual-write is needed.
|
|
59
|
+
*/
|
|
60
|
+
export async function logInferenceUsage(ctx, entry) {
|
|
61
|
+
if (!ctx.inferenceLog)
|
|
62
|
+
return;
|
|
63
|
+
const { schema, tableName } = ctx.inferenceLog;
|
|
64
|
+
const sql = `INSERT INTO "${schema}"."${tableName}" (
|
|
65
|
+
database_id, entity_id, actor_id,
|
|
66
|
+
model, provider, service, operation,
|
|
67
|
+
input_tokens, output_tokens, total_tokens,
|
|
68
|
+
cache_read_tokens, cache_write_tokens,
|
|
69
|
+
latency_ms, rag_enabled, chunks_retrieved,
|
|
70
|
+
embedding_model, embedding_latency_ms,
|
|
71
|
+
status, error_type, raw_usage
|
|
72
|
+
) VALUES (
|
|
73
|
+
$1, $2, $3,
|
|
74
|
+
$4, $5, $6, $7,
|
|
75
|
+
$8, $9, $10,
|
|
76
|
+
$11, $12,
|
|
77
|
+
$13, $14, $15,
|
|
78
|
+
$16, $17,
|
|
79
|
+
$18, $19, $20
|
|
80
|
+
)`;
|
|
81
|
+
try {
|
|
82
|
+
await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
|
|
83
|
+
await pgClient.query(sql, [
|
|
84
|
+
entry.databaseId, entry.entityId, entry.actorId,
|
|
85
|
+
entry.model, entry.provider, entry.service, entry.operation,
|
|
86
|
+
entry.inputTokens, entry.outputTokens, entry.totalTokens,
|
|
87
|
+
entry.cacheReadTokens, entry.cacheWriteTokens,
|
|
88
|
+
entry.latencyMs, entry.ragEnabled, entry.chunksRetrieved,
|
|
89
|
+
entry.embeddingModel, entry.embeddingLatencyMs,
|
|
90
|
+
entry.status, entry.errorType,
|
|
91
|
+
entry.rawUsage ? JSON.stringify(entry.rawUsage) : null
|
|
92
|
+
]);
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
catch (e) {
|
|
96
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
97
|
+
console.warn(`[graphile-llm] inference log INSERT failed (non-fatal): ${message}`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// ─── Metered Embedder ───────────────────────────────────────────────────────
|
|
101
|
+
/**
|
|
102
|
+
* Wrap an embedder with billing quota check + usage recording.
|
|
103
|
+
*
|
|
104
|
+
* The returned MeterResult contains `quotaExceeded: true` when the pre-check
|
|
105
|
+
* fails, enabling the caller to fall back to text-only search.
|
|
106
|
+
*/
|
|
107
|
+
export async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
108
|
+
const startTime = Date.now();
|
|
109
|
+
// No billing context → just embed without metering
|
|
110
|
+
if (!ctx) {
|
|
111
|
+
const { embedding } = await embedder(text);
|
|
112
|
+
return {
|
|
113
|
+
result: embedding,
|
|
114
|
+
metered: false,
|
|
115
|
+
quotaExceeded: false,
|
|
116
|
+
latencyMs: Date.now() - startTime
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
const meterSlug = options.embeddingMeterSlug;
|
|
120
|
+
if (!meterSlug) {
|
|
121
|
+
const { embedding } = await embedder(text);
|
|
122
|
+
return {
|
|
123
|
+
result: embedding,
|
|
124
|
+
metered: false,
|
|
125
|
+
quotaExceeded: false,
|
|
126
|
+
latencyMs: Date.now() - startTime
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
if (options.skipMetering) {
|
|
130
|
+
const { embedding } = await embedder(text);
|
|
131
|
+
return {
|
|
132
|
+
result: embedding,
|
|
133
|
+
metered: false,
|
|
134
|
+
quotaExceeded: false,
|
|
135
|
+
latencyMs: Date.now() - startTime
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
// Pre-check: can this entity afford this call?
|
|
139
|
+
let allowed = true;
|
|
140
|
+
try {
|
|
141
|
+
await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
|
|
142
|
+
allowed = await checkQuota(pgClient, ctx.billing, ctx.entityId, meterSlug, 1);
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
catch {
|
|
146
|
+
allowed = true;
|
|
147
|
+
}
|
|
148
|
+
if (!allowed) {
|
|
149
|
+
logInferenceUsage(ctx, {
|
|
150
|
+
databaseId: ctx.databaseId,
|
|
151
|
+
entityId: ctx.entityId,
|
|
152
|
+
actorId: ctx.actorId,
|
|
153
|
+
model: options.embeddingModel ?? meterSlug,
|
|
154
|
+
provider: options.provider ?? null,
|
|
155
|
+
service: 'embedding',
|
|
156
|
+
operation: 'create',
|
|
157
|
+
inputTokens: 0,
|
|
158
|
+
outputTokens: 0,
|
|
159
|
+
totalTokens: 0,
|
|
160
|
+
cacheReadTokens: null,
|
|
161
|
+
cacheWriteTokens: null,
|
|
162
|
+
latencyMs: Date.now() - startTime,
|
|
163
|
+
ragEnabled: false,
|
|
164
|
+
chunksRetrieved: null,
|
|
165
|
+
embeddingModel: options.embeddingModel ?? null,
|
|
166
|
+
embeddingLatencyMs: null,
|
|
167
|
+
status: 'quota_exceeded',
|
|
168
|
+
errorType: null,
|
|
169
|
+
rawUsage: null
|
|
170
|
+
}).catch(() => { });
|
|
171
|
+
return {
|
|
172
|
+
result: null,
|
|
173
|
+
metered: true,
|
|
174
|
+
quotaExceeded: true,
|
|
175
|
+
latencyMs: Date.now() - startTime
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
// Execute embedding — real token count from provider via EmbeddingResult
|
|
179
|
+
const { embedding, promptTokens } = await embedder(text);
|
|
180
|
+
const latencyMs = Date.now() - startTime;
|
|
181
|
+
ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
|
|
182
|
+
await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, promptTokens, {
|
|
183
|
+
request_id: ctx.requestId,
|
|
184
|
+
input_chars: text.length,
|
|
185
|
+
prompt_tokens: promptTokens,
|
|
186
|
+
dims: embedding.length,
|
|
187
|
+
latency_ms: latencyMs
|
|
188
|
+
});
|
|
189
|
+
}).catch(() => { });
|
|
190
|
+
// Log to inference usage table
|
|
191
|
+
logInferenceUsage(ctx, {
|
|
192
|
+
databaseId: ctx.databaseId,
|
|
193
|
+
entityId: ctx.entityId,
|
|
194
|
+
actorId: ctx.actorId,
|
|
195
|
+
model: options.embeddingModel ?? meterSlug,
|
|
196
|
+
provider: options.provider ?? null,
|
|
197
|
+
service: 'embedding',
|
|
198
|
+
operation: 'create',
|
|
199
|
+
inputTokens: promptTokens,
|
|
200
|
+
outputTokens: 0,
|
|
201
|
+
totalTokens: promptTokens,
|
|
202
|
+
cacheReadTokens: null,
|
|
203
|
+
cacheWriteTokens: null,
|
|
204
|
+
latencyMs,
|
|
205
|
+
ragEnabled: false,
|
|
206
|
+
chunksRetrieved: null,
|
|
207
|
+
embeddingModel: options.embeddingModel ?? null,
|
|
208
|
+
embeddingLatencyMs: latencyMs,
|
|
209
|
+
status: 'success',
|
|
210
|
+
errorType: null,
|
|
211
|
+
rawUsage: { prompt_tokens: promptTokens }
|
|
212
|
+
}).catch(() => { });
|
|
213
|
+
return {
|
|
214
|
+
result: embedding,
|
|
215
|
+
metered: true,
|
|
216
|
+
quotaExceeded: false,
|
|
217
|
+
latencyMs
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
// ─── Metered Chat ───────────────────────────────────────────────────────────
|
|
221
|
+
/**
|
|
222
|
+
* Wrap a chat completion call with billing quota check + usage recording.
|
|
223
|
+
*/
|
|
224
|
+
export async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {}) {
|
|
225
|
+
const startTime = Date.now();
|
|
226
|
+
if (!ctx) {
|
|
227
|
+
const chatResult = await chat(messages, chatOptions);
|
|
228
|
+
return {
|
|
229
|
+
result: chatResult.content,
|
|
230
|
+
metered: false,
|
|
231
|
+
quotaExceeded: false,
|
|
232
|
+
latencyMs: Date.now() - startTime
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
const meterSlug = meteringOptions.chatMeterSlug;
|
|
236
|
+
if (!meterSlug) {
|
|
237
|
+
const chatResult = await chat(messages, chatOptions);
|
|
238
|
+
return {
|
|
239
|
+
result: chatResult.content,
|
|
240
|
+
metered: false,
|
|
241
|
+
quotaExceeded: false,
|
|
242
|
+
latencyMs: Date.now() - startTime
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
if (meteringOptions.skipMetering) {
|
|
246
|
+
const chatResult = await chat(messages, chatOptions);
|
|
247
|
+
return {
|
|
248
|
+
result: chatResult.content,
|
|
249
|
+
metered: false,
|
|
250
|
+
quotaExceeded: false,
|
|
251
|
+
latencyMs: Date.now() - startTime
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
// Pre-check: can this entity afford this call?
|
|
255
|
+
let allowed = true;
|
|
256
|
+
try {
|
|
257
|
+
await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
|
|
258
|
+
allowed = await checkQuota(pgClient, ctx.billing, ctx.entityId, meterSlug, 1);
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
catch {
|
|
262
|
+
allowed = true;
|
|
263
|
+
}
|
|
264
|
+
if (!allowed) {
|
|
265
|
+
const estimatedInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
|
|
266
|
+
logInferenceUsage(ctx, {
|
|
267
|
+
databaseId: ctx.databaseId,
|
|
268
|
+
entityId: ctx.entityId,
|
|
269
|
+
actorId: ctx.actorId,
|
|
270
|
+
model: meteringOptions.chatModel ?? meterSlug,
|
|
271
|
+
provider: meteringOptions.provider ?? null,
|
|
272
|
+
service: 'llm',
|
|
273
|
+
operation: 'chat',
|
|
274
|
+
inputTokens: estimatedInputTokens,
|
|
275
|
+
outputTokens: 0,
|
|
276
|
+
totalTokens: estimatedInputTokens,
|
|
277
|
+
cacheReadTokens: null,
|
|
278
|
+
cacheWriteTokens: null,
|
|
279
|
+
latencyMs: Date.now() - startTime,
|
|
280
|
+
ragEnabled: false,
|
|
281
|
+
chunksRetrieved: null,
|
|
282
|
+
embeddingModel: null,
|
|
283
|
+
embeddingLatencyMs: null,
|
|
284
|
+
status: 'quota_exceeded',
|
|
285
|
+
errorType: null,
|
|
286
|
+
rawUsage: null
|
|
287
|
+
}).catch(() => { });
|
|
288
|
+
return {
|
|
289
|
+
result: null,
|
|
290
|
+
metered: true,
|
|
291
|
+
quotaExceeded: true,
|
|
292
|
+
latencyMs: Date.now() - startTime
|
|
293
|
+
};
|
|
294
|
+
}
|
|
295
|
+
// Execute chat completion — returns real token usage from provider
|
|
296
|
+
const chatResult = await chat(messages, chatOptions);
|
|
297
|
+
const latencyMs = Date.now() - startTime;
|
|
298
|
+
const usage = chatResult.usage;
|
|
299
|
+
ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
|
|
300
|
+
await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, usage.totalTokens, {
|
|
301
|
+
request_id: ctx.requestId,
|
|
302
|
+
input_tokens: usage.input,
|
|
303
|
+
output_tokens: usage.output,
|
|
304
|
+
cache_read_tokens: usage.cacheRead,
|
|
305
|
+
cache_write_tokens: usage.cacheWrite,
|
|
306
|
+
messages_count: messages.length,
|
|
307
|
+
latency_ms: latencyMs
|
|
308
|
+
});
|
|
309
|
+
}).catch(() => { });
|
|
310
|
+
// Log to inference usage table with real provider token counts
|
|
311
|
+
logInferenceUsage(ctx, {
|
|
312
|
+
databaseId: ctx.databaseId,
|
|
313
|
+
entityId: ctx.entityId,
|
|
314
|
+
actorId: ctx.actorId,
|
|
315
|
+
model: meteringOptions.chatModel ?? meterSlug,
|
|
316
|
+
provider: meteringOptions.provider ?? null,
|
|
317
|
+
service: 'llm',
|
|
318
|
+
operation: 'chat',
|
|
319
|
+
inputTokens: usage.input,
|
|
320
|
+
outputTokens: usage.output,
|
|
321
|
+
totalTokens: usage.totalTokens,
|
|
322
|
+
cacheReadTokens: usage.cacheRead || null,
|
|
323
|
+
cacheWriteTokens: usage.cacheWrite || null,
|
|
324
|
+
latencyMs,
|
|
325
|
+
ragEnabled: false,
|
|
326
|
+
chunksRetrieved: null,
|
|
327
|
+
embeddingModel: null,
|
|
328
|
+
embeddingLatencyMs: null,
|
|
329
|
+
status: 'success',
|
|
330
|
+
errorType: null,
|
|
331
|
+
rawUsage: { reasoning: usage.reasoning }
|
|
332
|
+
}).catch(() => { });
|
|
333
|
+
return {
|
|
334
|
+
result: chatResult.content,
|
|
335
|
+
metered: true,
|
|
336
|
+
quotaExceeded: false,
|
|
337
|
+
latencyMs
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
// ─── Error Types ────────────────────────────────────────────────────────────
|
|
341
|
+
export class QuotaExceededError extends Error {
|
|
342
|
+
code = 'QUOTA_EXCEEDED';
|
|
343
|
+
meterSlug;
|
|
344
|
+
entityId;
|
|
345
|
+
constructor(meterSlug, entityId) {
|
|
346
|
+
super(`LLM quota exceeded for meter '${meterSlug}' on entity '${entityId}'. ` +
|
|
347
|
+
'Upgrade your plan or wait for the next billing period.');
|
|
348
|
+
this.name = 'QuotaExceededError';
|
|
349
|
+
this.meterSlug = meterSlug;
|
|
350
|
+
this.entityId = entityId;
|
|
351
|
+
}
|
|
352
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Discovery
|
|
3
|
+
*
|
|
4
|
+
* Discovers agent tables by querying the agent_chat_module config table
|
|
5
|
+
* at runtime. The module stores schema_id, table names, and table IDs
|
|
6
|
+
* when provisioned — no smart tags needed.
|
|
7
|
+
*
|
|
8
|
+
* Results are cached per-database with a TTL so the REST middleware
|
|
9
|
+
* doesn't hit the database on every request.
|
|
10
|
+
*/
|
|
11
|
+
import { Pool } from 'pg';
|
|
12
|
+
export interface AgentTableInfo {
|
|
13
|
+
/** The PostgreSQL schema name (e.g. 'agent_public') */
|
|
14
|
+
schemaName: string;
|
|
15
|
+
/** The table name (e.g. 'agent_thread') */
|
|
16
|
+
tableName: string;
|
|
17
|
+
}
|
|
18
|
+
export interface AgentDiscovery {
|
|
19
|
+
thread: AgentTableInfo | null;
|
|
20
|
+
message: AgentTableInfo | null;
|
|
21
|
+
task: AgentTableInfo | null;
|
|
22
|
+
}
|
|
23
|
+
/** Clear all cached discovery results (for testing) */
|
|
24
|
+
export declare function clearAgentDiscoveryCache(): void;
|
|
25
|
+
/**
|
|
26
|
+
* Look up agent table info for a database, querying the module config table.
|
|
27
|
+
* Results are cached per-database with a 60s TTL.
|
|
28
|
+
*/
|
|
29
|
+
export declare function getAgentDiscovery(pool: Pool, dbname: string): Promise<AgentDiscovery | null>;
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Discovery
|
|
3
|
+
*
|
|
4
|
+
* Discovers agent tables by querying the agent_chat_module config table
|
|
5
|
+
* at runtime. The module stores schema_id, table names, and table IDs
|
|
6
|
+
* when provisioned — no smart tags needed.
|
|
7
|
+
*
|
|
8
|
+
* Results are cached per-database with a TTL so the REST middleware
|
|
9
|
+
* doesn't hit the database on every request.
|
|
10
|
+
*/
|
|
11
|
+
import { ModuleConfigCache } from 'graphile-cache';
|
|
12
|
+
// ─── Cache ──────────────────────────────────────────────────────────────────
|
|
13
|
+
const agentDiscoveryCache = new ModuleConfigCache({
|
|
14
|
+
name: 'agent-discovery',
|
|
15
|
+
ttlMs: 60_000
|
|
16
|
+
});
|
|
17
|
+
/** Clear all cached discovery results (for testing) */
|
|
18
|
+
export function clearAgentDiscoveryCache() {
|
|
19
|
+
agentDiscoveryCache.clear();
|
|
20
|
+
}
|
|
21
|
+
// ─── Discovery Query ────────────────────────────────────────────────────────
|
|
22
|
+
const DISCOVERY_SQL = `
|
|
23
|
+
SELECT
|
|
24
|
+
s.schema_name,
|
|
25
|
+
acm.thread_table_name,
|
|
26
|
+
acm.message_table_name,
|
|
27
|
+
acm.task_table_name
|
|
28
|
+
FROM metaschema_modules_public.agent_chat_module acm
|
|
29
|
+
JOIN metaschema_public.schema s ON s.id = acm.schema_id
|
|
30
|
+
LIMIT 1
|
|
31
|
+
`;
|
|
32
|
+
/**
|
|
33
|
+
* Look up agent table info for a database, querying the module config table.
|
|
34
|
+
* Results are cached per-database with a 60s TTL.
|
|
35
|
+
*/
|
|
36
|
+
export async function getAgentDiscovery(pool, dbname) {
|
|
37
|
+
const cached = agentDiscoveryCache.get(dbname);
|
|
38
|
+
if (cached !== undefined) {
|
|
39
|
+
return cached;
|
|
40
|
+
}
|
|
41
|
+
let discovery = null;
|
|
42
|
+
try {
|
|
43
|
+
const { rows } = await pool.query(DISCOVERY_SQL);
|
|
44
|
+
if (rows.length > 0) {
|
|
45
|
+
const row = rows[0];
|
|
46
|
+
const schemaName = row.schema_name;
|
|
47
|
+
discovery = {
|
|
48
|
+
thread: row.thread_table_name
|
|
49
|
+
? { schemaName, tableName: row.thread_table_name }
|
|
50
|
+
: null,
|
|
51
|
+
message: row.message_table_name
|
|
52
|
+
? { schemaName, tableName: row.message_table_name }
|
|
53
|
+
: null,
|
|
54
|
+
task: row.task_table_name
|
|
55
|
+
? { schemaName, tableName: row.task_table_name }
|
|
56
|
+
: null
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
// Module table doesn't exist in this database — not provisioned
|
|
62
|
+
}
|
|
63
|
+
agentDiscoveryCache.set(dbname, discovery);
|
|
64
|
+
return discovery;
|
|
65
|
+
}
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
* LlmModulePlugin
|
|
3
3
|
*
|
|
4
4
|
* Detects and loads the `llm_module` configuration from `services_public.api_modules`.
|
|
5
|
-
* Makes the resolved embedder available to other plugins
|
|
5
|
+
* Makes the resolved embedder and chat completer available to other plugins
|
|
6
|
+
* via the build context.
|
|
6
7
|
*
|
|
7
8
|
* This plugin is the foundation that enables per-database LLM configuration.
|
|
8
9
|
* When an API has an `llm_module` configured, the embedder is resolved and
|
|
@@ -14,9 +15,13 @@
|
|
|
14
15
|
* 2. `defaultEmbedder` from preset options (dev/testing fallback)
|
|
15
16
|
* 3. Environment variables (EMBEDDER_PROVIDER, EMBEDDER_MODEL, EMBEDDER_BASE_URL)
|
|
16
17
|
* 4. null — LLM features are disabled
|
|
18
|
+
*
|
|
19
|
+
* This plugin is intentionally pure — no billing or metering logic.
|
|
20
|
+
* The optional LlmMeteringPlugin wraps the embedder with billing integration
|
|
21
|
+
* if loaded (it runs after this plugin and before the consumer plugins).
|
|
17
22
|
*/
|
|
18
23
|
import type { GraphileConfig } from 'graphile-config';
|
|
19
|
-
import type {
|
|
24
|
+
import type { ChatFunction, EmbedderFunction, GraphileLlmOptions } from '../types';
|
|
20
25
|
declare global {
|
|
21
26
|
namespace GraphileBuild {
|
|
22
27
|
interface Build {
|
|
@@ -24,6 +29,10 @@ declare global {
|
|
|
24
29
|
llmEmbedder: EmbedderFunction | null;
|
|
25
30
|
/** The resolved chat completion function, or null if not configured */
|
|
26
31
|
llmChatCompleter: ChatFunction | null;
|
|
32
|
+
/** The embedding model name (used as billing meter slug) */
|
|
33
|
+
llmEmbeddingModel: string | null;
|
|
34
|
+
/** The chat model name (used as billing meter slug) */
|
|
35
|
+
llmChatModel: string | null;
|
|
27
36
|
}
|
|
28
37
|
}
|
|
29
38
|
namespace GraphileConfig {
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
* LlmModulePlugin
|
|
3
3
|
*
|
|
4
4
|
* Detects and loads the `llm_module` configuration from `services_public.api_modules`.
|
|
5
|
-
* Makes the resolved embedder available to other plugins
|
|
5
|
+
* Makes the resolved embedder and chat completer available to other plugins
|
|
6
|
+
* via the build context.
|
|
6
7
|
*
|
|
7
8
|
* This plugin is the foundation that enables per-database LLM configuration.
|
|
8
9
|
* When an API has an `llm_module` configured, the embedder is resolved and
|
|
@@ -14,9 +15,14 @@
|
|
|
14
15
|
* 2. `defaultEmbedder` from preset options (dev/testing fallback)
|
|
15
16
|
* 3. Environment variables (EMBEDDER_PROVIDER, EMBEDDER_MODEL, EMBEDDER_BASE_URL)
|
|
16
17
|
* 4. null — LLM features are disabled
|
|
18
|
+
*
|
|
19
|
+
* This plugin is intentionally pure — no billing or metering logic.
|
|
20
|
+
* The optional LlmMeteringPlugin wraps the embedder with billing integration
|
|
21
|
+
* if loaded (it runs after this plugin and before the consumer plugins).
|
|
17
22
|
*/
|
|
18
|
-
import { buildEmbedder, buildEmbedderFromEnv } from '../embedder';
|
|
19
23
|
import { buildChatCompleter, buildChatCompleterFromEnv } from '../chat';
|
|
24
|
+
import { buildEmbedder, buildEmbedderFromEnv } from '../embedder';
|
|
25
|
+
import { getLlmEnvOptions } from '../env';
|
|
20
26
|
/**
|
|
21
27
|
* Creates the LlmModulePlugin with the given options.
|
|
22
28
|
*/
|
|
@@ -24,7 +30,7 @@ export function createLlmModulePlugin(options = {}) {
|
|
|
24
30
|
const { defaultEmbedder, defaultChatCompleter } = options;
|
|
25
31
|
return {
|
|
26
32
|
name: 'LlmModulePlugin',
|
|
27
|
-
version: '0.
|
|
33
|
+
version: '0.2.0',
|
|
28
34
|
description: 'Resolves LLM embedder and chat completer configuration and makes them available to other plugins',
|
|
29
35
|
schema: {
|
|
30
36
|
hooks: {
|
|
@@ -74,9 +80,11 @@ export function createLlmModulePlugin(options = {}) {
|
|
|
74
80
|
return build.extend(build, {
|
|
75
81
|
llmEmbedder: embedder,
|
|
76
82
|
llmChatCompleter: chat,
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
83
|
+
llmEmbeddingModel: defaultEmbedder?.model ?? getLlmEnvOptions().embedding.model,
|
|
84
|
+
llmChatModel: defaultChatCompleter?.model ?? getLlmEnvOptions().chat.model
|
|
85
|
+
}, 'LlmModulePlugin adding llmEmbedder, llmChatCompleter, and model names to build');
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
81
89
|
};
|
|
82
90
|
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LlmMeteringPlugin
|
|
3
|
+
*
|
|
4
|
+
* Opt-in billing integration for graphile-llm. Completely separate from the
|
|
5
|
+
* pure LLM plugins (text-search, text-mutation, rag).
|
|
6
|
+
*
|
|
7
|
+
* **How it works:**
|
|
8
|
+
* 1. At schema build time, replaces `build.llmEmbedder` with a metered wrapper
|
|
9
|
+
* that has the same `(text: string) => Promise<number[]>` signature
|
|
10
|
+
* 2. At request time, wraps every root query/mutation resolver to set up a
|
|
11
|
+
* request-scoped MeteringContext via AsyncLocalStorage
|
|
12
|
+
* 3. When the embedder is called (by any plugin), the wrapper checks
|
|
13
|
+
* AsyncLocalStorage for a metering context and if found, calls
|
|
14
|
+
* check_billing_quota before and record_usage after
|
|
15
|
+
* 4. If quota is exceeded, the wrapper returns null — the calling plugin sees
|
|
16
|
+
* null and handles it (search falls back to text-only, mutations throw)
|
|
17
|
+
*
|
|
18
|
+
* The pure plugins never import metering, config-cache, or billing types.
|
|
19
|
+
* They call the embedder and handle null results — that's it.
|
|
20
|
+
*
|
|
21
|
+
* **Entity ID resolution:**
|
|
22
|
+
* The billing `entity_id` is resolved via a configurable callback.
|
|
23
|
+
* Default: reads `jwt.claims.user_id` from pgSettings. Override via
|
|
24
|
+
* `metering.resolveEntityId` in GraphileLlmPreset options.
|
|
25
|
+
*
|
|
26
|
+
* **Graceful behavior:**
|
|
27
|
+
* - billing_module not provisioned → embedder passes through unmetered
|
|
28
|
+
* - entity_id not available → embedder passes through unmetered
|
|
29
|
+
* - check_billing_quota throws → call is allowed (billing is opt-in)
|
|
30
|
+
* - record_usage throws → call succeeds, recording silently skipped
|
|
31
|
+
* - quota exceeded → embedder returns null
|
|
32
|
+
*/
|
|
33
|
+
import type { GraphileConfig } from 'graphile-config';
|
|
34
|
+
import type { MeteringConfig } from '../types';
|
|
35
|
+
declare global {
|
|
36
|
+
namespace GraphileConfig {
|
|
37
|
+
interface Plugins {
|
|
38
|
+
LlmMeteringPlugin: true;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
export declare function createLlmMeteringPlugin(meteringConfig?: MeteringConfig): GraphileConfig.Plugin;
|