graphile-llm 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/__tests__/graphile-llm.test.js +6 -4
  2. package/chat.d.ts +5 -5
  3. package/chat.js +8 -16
  4. package/config-cache.d.ts +77 -0
  5. package/config-cache.js +148 -0
  6. package/embedder.d.ts +5 -5
  7. package/embedder.js +8 -16
  8. package/env.d.ts +31 -0
  9. package/env.js +52 -0
  10. package/esm/__tests__/graphile-llm.test.js +6 -4
  11. package/esm/chat.d.ts +5 -5
  12. package/esm/chat.js +8 -16
  13. package/esm/config-cache.d.ts +77 -0
  14. package/esm/config-cache.js +143 -0
  15. package/esm/embedder.d.ts +5 -5
  16. package/esm/embedder.js +8 -16
  17. package/esm/env.d.ts +31 -0
  18. package/esm/env.js +49 -0
  19. package/esm/index.d.ts +10 -1
  20. package/esm/index.js +11 -1
  21. package/esm/metering.d.ts +114 -0
  22. package/esm/metering.js +358 -0
  23. package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
  24. package/esm/plugins/agent-discovery-plugin.js +65 -0
  25. package/esm/plugins/llm-module-plugin.d.ts +10 -1
  26. package/esm/plugins/llm-module-plugin.js +11 -3
  27. package/esm/plugins/metering-plugin.d.ts +42 -0
  28. package/esm/plugins/metering-plugin.js +175 -0
  29. package/esm/plugins/text-mutation-plugin.d.ts +4 -0
  30. package/esm/plugins/text-mutation-plugin.js +11 -1
  31. package/esm/plugins/text-search-plugin.d.ts +4 -0
  32. package/esm/plugins/text-search-plugin.js +13 -1
  33. package/esm/preset.d.ts +21 -1
  34. package/esm/preset.js +29 -2
  35. package/esm/types.d.ts +47 -6
  36. package/index.d.ts +10 -1
  37. package/index.js +23 -2
  38. package/metering.d.ts +114 -0
  39. package/metering.js +365 -0
  40. package/package.json +15 -15
  41. package/plugins/agent-discovery-plugin.d.ts +29 -0
  42. package/plugins/agent-discovery-plugin.js +69 -0
  43. package/plugins/llm-module-plugin.d.ts +10 -1
  44. package/plugins/llm-module-plugin.js +11 -3
  45. package/plugins/metering-plugin.d.ts +42 -0
  46. package/plugins/metering-plugin.js +178 -0
  47. package/plugins/text-mutation-plugin.d.ts +4 -0
  48. package/plugins/text-mutation-plugin.js +11 -1
  49. package/plugins/text-search-plugin.d.ts +4 -0
  50. package/plugins/text-search-plugin.js +13 -1
  51. package/preset.d.ts +21 -1
  52. package/preset.js +29 -2
  53. package/types.d.ts +47 -6
package/metering.js ADDED
@@ -0,0 +1,365 @@
1
+ "use strict";
2
+ /**
3
+ * metering — Billing-aware wrappers for embedder and chat functions
4
+ *
5
+ * Wraps EmbedderFunction and ChatFunction with:
6
+ * 1. Pre-check: `check_billing_quota(meter_slug, entity_id, estimated_amount)`
7
+ * 2. Execute the underlying function
8
+ * 3. Post-record: `record_usage(meter_slug, entity_id, actual_amount)`
9
+ *
10
+ * When the quota check fails, the wrapper returns null (graceful degradation)
11
+ * instead of throwing, so the search pipeline can fall back to text-only.
12
+ *
13
+ * Token counts are estimated from text length (~4 chars per token). No
14
+ * tokenizer needed — the billing system uses tokens as abstract units
15
+ * and the credit_cost on each model's meter normalizes the relative expense.
16
+ *
17
+ * The billing functions live in the tenant database and are called via the
18
+ * Graphile `withPgClient` callback. Function locations (schema, names) are
19
+ * resolved from `billing_module` metaschema and cached by `config-cache.ts`.
20
+ */
21
+ Object.defineProperty(exports, "__esModule", { value: true });
22
+ exports.QuotaExceededError = void 0;
23
+ exports.logInferenceUsage = logInferenceUsage;
24
+ exports.meteredEmbed = meteredEmbed;
25
+ exports.meteredChat = meteredChat;
26
+ // ─── Billing SQL Helpers ────────────────────────────────────────────────────
27
+ /**
28
+ * Check if the entity has sufficient quota for the requested amount.
29
+ * Returns true if the call is allowed, false if quota is exceeded.
30
+ *
31
+ * Gracefully returns true if the billing function doesn't exist or errors —
32
+ * metering is opt-in, so missing infrastructure means "allow".
33
+ */
34
+ async function checkQuota(pgClient, billing, entityId, meterSlug, amount) {
35
+ try {
36
+ const sql = `SELECT "${billing.privateSchema}"."${billing.checkBillingQuotaFunction}"($1, $2::uuid, $3) AS allowed`;
37
+ const result = await pgClient.query(sql, [meterSlug, entityId, amount]);
38
+ return result.rows[0]?.allowed !== false;
39
+ }
40
+ catch (e) {
41
+ const message = e instanceof Error ? e.message : String(e);
42
+ console.warn(`[graphile-llm] check_billing_quota failed (allowing): ${message}`);
43
+ return true;
44
+ }
45
+ }
46
+ /**
47
+ * Record usage after a successful call.
48
+ * Gracefully skips if the billing function doesn't exist or errors.
49
+ */
50
+ async function recordUsage(pgClient, billing, entityId, meterSlug, amount, metadata) {
51
+ try {
52
+ const sql = `SELECT "${billing.privateSchema}"."${billing.recordUsageFunction}"($1, $2::uuid, $3, $4::jsonb)`;
53
+ await pgClient.query(sql, [meterSlug, entityId, amount, JSON.stringify(metadata)]);
54
+ }
55
+ catch (e) {
56
+ const message = e instanceof Error ? e.message : String(e);
57
+ console.warn(`[graphile-llm] record_usage failed (non-fatal): ${message}`);
58
+ }
59
+ }
60
+ /**
61
+ * Write a row to the usage_log_inference table.
62
+ * Gracefully skips if the inference_log_module is not provisioned.
63
+ *
64
+ * TODO: Also write to child (generated) database when dual-write is needed.
65
+ */
66
+ async function logInferenceUsage(ctx, entry) {
67
+ if (!ctx.inferenceLog)
68
+ return;
69
+ const { schema, tableName } = ctx.inferenceLog;
70
+ const sql = `INSERT INTO "${schema}"."${tableName}" (
71
+ database_id, entity_id, actor_id,
72
+ model, provider, service, operation,
73
+ input_tokens, output_tokens, total_tokens,
74
+ cache_read_tokens, cache_write_tokens,
75
+ latency_ms, rag_enabled, chunks_retrieved,
76
+ embedding_model, embedding_latency_ms,
77
+ status, error_type, raw_usage
78
+ ) VALUES (
79
+ $1, $2, $3,
80
+ $4, $5, $6, $7,
81
+ $8, $9, $10,
82
+ $11, $12,
83
+ $13, $14, $15,
84
+ $16, $17,
85
+ $18, $19, $20
86
+ )`;
87
+ try {
88
+ await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
89
+ await pgClient.query(sql, [
90
+ entry.databaseId, entry.entityId, entry.actorId,
91
+ entry.model, entry.provider, entry.service, entry.operation,
92
+ entry.inputTokens, entry.outputTokens, entry.totalTokens,
93
+ entry.cacheReadTokens, entry.cacheWriteTokens,
94
+ entry.latencyMs, entry.ragEnabled, entry.chunksRetrieved,
95
+ entry.embeddingModel, entry.embeddingLatencyMs,
96
+ entry.status, entry.errorType,
97
+ entry.rawUsage ? JSON.stringify(entry.rawUsage) : null,
98
+ ]);
99
+ });
100
+ }
101
+ catch (e) {
102
+ const message = e instanceof Error ? e.message : String(e);
103
+ console.warn(`[graphile-llm] inference log INSERT failed (non-fatal): ${message}`);
104
+ }
105
+ }
106
+ // ─── Metered Embedder ───────────────────────────────────────────────────────
107
+ /**
108
+ * Wrap an embedder with billing quota check + usage recording.
109
+ *
110
+ * The returned MeterResult contains `quotaExceeded: true` when the pre-check
111
+ * fails, enabling the caller to fall back to text-only search.
112
+ */
113
+ async function meteredEmbed(embedder, text, ctx, options = {}) {
114
+ const startTime = Date.now();
115
+ // No billing context → just embed without metering
116
+ if (!ctx) {
117
+ const result = await embedder(text);
118
+ return {
119
+ result,
120
+ metered: false,
121
+ quotaExceeded: false,
122
+ latencyMs: Date.now() - startTime,
123
+ };
124
+ }
125
+ const meterSlug = options.embeddingMeterSlug;
126
+ if (!meterSlug) {
127
+ const result = await embedder(text);
128
+ return {
129
+ result,
130
+ metered: false,
131
+ quotaExceeded: false,
132
+ latencyMs: Date.now() - startTime,
133
+ };
134
+ }
135
+ if (options.skipMetering) {
136
+ const result = await embedder(text);
137
+ return {
138
+ result,
139
+ metered: false,
140
+ quotaExceeded: false,
141
+ latencyMs: Date.now() - startTime,
142
+ };
143
+ }
144
+ // Pre-check: can this entity afford this call?
145
+ let allowed = true;
146
+ try {
147
+ await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
148
+ allowed = await checkQuota(pgClient, ctx.billing, ctx.entityId, meterSlug, 1);
149
+ });
150
+ }
151
+ catch {
152
+ allowed = true;
153
+ }
154
+ if (!allowed) {
155
+ // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
156
+ const placeholderAmountTokens = Math.ceil(text.length / 4);
157
+ logInferenceUsage(ctx, {
158
+ databaseId: ctx.databaseId,
159
+ entityId: ctx.entityId,
160
+ actorId: ctx.actorId,
161
+ model: options.embeddingModel ?? meterSlug,
162
+ provider: options.provider ?? null,
163
+ service: 'embedding',
164
+ operation: 'create',
165
+ inputTokens: placeholderAmountTokens,
166
+ outputTokens: 0,
167
+ totalTokens: placeholderAmountTokens,
168
+ cacheReadTokens: null,
169
+ cacheWriteTokens: null,
170
+ latencyMs: Date.now() - startTime,
171
+ ragEnabled: false,
172
+ chunksRetrieved: null,
173
+ embeddingModel: options.embeddingModel ?? null,
174
+ embeddingLatencyMs: null,
175
+ status: 'quota_exceeded',
176
+ errorType: null,
177
+ rawUsage: null,
178
+ }).catch(() => { });
179
+ return {
180
+ result: null,
181
+ metered: true,
182
+ quotaExceeded: true,
183
+ latencyMs: Date.now() - startTime,
184
+ };
185
+ }
186
+ // Execute embedding
187
+ const result = await embedder(text);
188
+ const latencyMs = Date.now() - startTime;
189
+ // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
190
+ const placeholderAmountTokens = Math.ceil(text.length / 4);
191
+ ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
192
+ await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, text.length, {
193
+ request_id: ctx.requestId,
194
+ input_chars: text.length,
195
+ dims: result.length,
196
+ latency_ms: latencyMs,
197
+ });
198
+ }).catch(() => { });
199
+ // Log to inference usage table
200
+ logInferenceUsage(ctx, {
201
+ databaseId: ctx.databaseId,
202
+ entityId: ctx.entityId,
203
+ actorId: ctx.actorId,
204
+ model: options.embeddingModel ?? meterSlug,
205
+ provider: options.provider ?? null,
206
+ service: 'embedding',
207
+ operation: 'create',
208
+ inputTokens: placeholderAmountTokens,
209
+ outputTokens: 0,
210
+ totalTokens: placeholderAmountTokens,
211
+ cacheReadTokens: null,
212
+ cacheWriteTokens: null,
213
+ latencyMs,
214
+ ragEnabled: false,
215
+ chunksRetrieved: null,
216
+ embeddingModel: options.embeddingModel ?? null,
217
+ embeddingLatencyMs: latencyMs,
218
+ status: 'success',
219
+ errorType: null,
220
+ rawUsage: null,
221
+ }).catch(() => { });
222
+ return {
223
+ result,
224
+ metered: true,
225
+ quotaExceeded: false,
226
+ latencyMs,
227
+ };
228
+ }
229
+ // ─── Metered Chat ───────────────────────────────────────────────────────────
230
+ /**
231
+ * Wrap a chat completion call with billing quota check + usage recording.
232
+ */
233
+ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {}) {
234
+ const startTime = Date.now();
235
+ if (!ctx) {
236
+ const result = await chat(messages, chatOptions);
237
+ return {
238
+ result,
239
+ metered: false,
240
+ quotaExceeded: false,
241
+ latencyMs: Date.now() - startTime,
242
+ };
243
+ }
244
+ const meterSlug = meteringOptions.chatMeterSlug;
245
+ if (!meterSlug) {
246
+ const result = await chat(messages, chatOptions);
247
+ return {
248
+ result,
249
+ metered: false,
250
+ quotaExceeded: false,
251
+ latencyMs: Date.now() - startTime,
252
+ };
253
+ }
254
+ if (meteringOptions.skipMetering) {
255
+ const result = await chat(messages, chatOptions);
256
+ return {
257
+ result,
258
+ metered: false,
259
+ quotaExceeded: false,
260
+ latencyMs: Date.now() - startTime,
261
+ };
262
+ }
263
+ // Pre-check: can this entity afford this call?
264
+ let allowed = true;
265
+ try {
266
+ await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
267
+ allowed = await checkQuota(pgClient, ctx.billing, ctx.entityId, meterSlug, 1);
268
+ });
269
+ }
270
+ catch {
271
+ allowed = true;
272
+ }
273
+ if (!allowed) {
274
+ // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
275
+ const placeholderInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
276
+ logInferenceUsage(ctx, {
277
+ databaseId: ctx.databaseId,
278
+ entityId: ctx.entityId,
279
+ actorId: ctx.actorId,
280
+ model: meteringOptions.chatModel ?? meterSlug,
281
+ provider: meteringOptions.provider ?? null,
282
+ service: 'llm',
283
+ operation: 'chat',
284
+ inputTokens: placeholderInputTokens,
285
+ outputTokens: 0,
286
+ totalTokens: placeholderInputTokens,
287
+ cacheReadTokens: null,
288
+ cacheWriteTokens: null,
289
+ latencyMs: Date.now() - startTime,
290
+ ragEnabled: false,
291
+ chunksRetrieved: null,
292
+ embeddingModel: null,
293
+ embeddingLatencyMs: null,
294
+ status: 'quota_exceeded',
295
+ errorType: null,
296
+ rawUsage: null,
297
+ }).catch(() => { });
298
+ return {
299
+ result: null,
300
+ metered: true,
301
+ quotaExceeded: true,
302
+ latencyMs: Date.now() - startTime,
303
+ };
304
+ }
305
+ // Execute chat completion
306
+ const result = await chat(messages, chatOptions);
307
+ const latencyMs = Date.now() - startTime;
308
+ // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
309
+ const inputChars = messages.reduce((sum, m) => sum + m.content.length, 0);
310
+ const placeholderInputTokens = Math.ceil(inputChars / 4);
311
+ const placeholderOutputTokens = Math.ceil(result.length / 4);
312
+ const placeholderTotalTokens = placeholderInputTokens + placeholderOutputTokens;
313
+ ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
314
+ await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, inputChars + result.length, {
315
+ request_id: ctx.requestId,
316
+ input_chars: inputChars,
317
+ output_chars: result.length,
318
+ messages_count: messages.length,
319
+ latency_ms: latencyMs,
320
+ });
321
+ }).catch(() => { });
322
+ // Log to inference usage table
323
+ logInferenceUsage(ctx, {
324
+ databaseId: ctx.databaseId,
325
+ entityId: ctx.entityId,
326
+ actorId: ctx.actorId,
327
+ model: meteringOptions.chatModel ?? meterSlug,
328
+ provider: meteringOptions.provider ?? null,
329
+ service: 'llm',
330
+ operation: 'chat',
331
+ inputTokens: placeholderInputTokens,
332
+ outputTokens: placeholderOutputTokens,
333
+ totalTokens: placeholderTotalTokens,
334
+ cacheReadTokens: null,
335
+ cacheWriteTokens: null,
336
+ latencyMs,
337
+ ragEnabled: false,
338
+ chunksRetrieved: null,
339
+ embeddingModel: null,
340
+ embeddingLatencyMs: null,
341
+ status: 'success',
342
+ errorType: null,
343
+ rawUsage: null,
344
+ }).catch(() => { });
345
+ return {
346
+ result,
347
+ metered: true,
348
+ quotaExceeded: false,
349
+ latencyMs,
350
+ };
351
+ }
352
+ // ─── Error Types ────────────────────────────────────────────────────────────
353
+ class QuotaExceededError extends Error {
354
+ code = 'QUOTA_EXCEEDED';
355
+ meterSlug;
356
+ entityId;
357
+ constructor(meterSlug, entityId) {
358
+ super(`LLM quota exceeded for meter '${meterSlug}' on entity '${entityId}'. ` +
359
+ 'Upgrade your plan or wait for the next billing period.');
360
+ this.name = 'QuotaExceededError';
361
+ this.meterSlug = meterSlug;
362
+ this.entityId = entityId;
363
+ }
364
+ }
365
+ exports.QuotaExceededError = QuotaExceededError;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "graphile-llm",
3
- "version": "0.7.2",
3
+ "version": "0.8.0",
4
4
  "description": "LLM integration plugin for PostGraphile v5 — server-side text-to-vector embedding and text companion fields for pgvector columns",
5
5
  "author": "Constructive <developers@constructive.io>",
6
6
  "homepage": "https://github.com/constructive-io/constructive",
@@ -29,20 +29,20 @@
29
29
  "url": "https://github.com/constructive-io/constructive/issues"
30
30
  },
31
31
  "dependencies": {
32
- "@agentic-kit/ollama": "^1.0.3",
33
- "@constructive-io/graphql-env": "^3.11.0"
32
+ "@agentic-kit/ollama": "^1.2.1",
33
+ "graphile-cache": "^3.11.2"
34
34
  },
35
35
  "peerDependencies": {
36
- "@dataplan/pg": "1.0.0",
37
- "grafast": "1.0.0",
38
- "graphile-build": "5.0.0",
39
- "graphile-build-pg": "5.0.0",
40
- "graphile-config": "1.0.0",
36
+ "@dataplan/pg": "1.0.3",
37
+ "grafast": "1.0.2",
38
+ "graphile-build": "5.0.2",
39
+ "graphile-build-pg": "5.0.2",
40
+ "graphile-config": "1.0.1",
41
41
  "graphile-search": "workspace:^",
42
42
  "graphile-utils": "5.0.0",
43
43
  "graphql": "16.13.0",
44
- "pg-sql2": "5.0.0",
45
- "postgraphile": "5.0.0"
44
+ "pg-sql2": "5.0.1",
45
+ "postgraphile": "5.0.3"
46
46
  },
47
47
  "peerDependenciesMeta": {
48
48
  "graphile-search": {
@@ -54,11 +54,11 @@
54
54
  },
55
55
  "devDependencies": {
56
56
  "@types/node": "^22.19.11",
57
- "graphile-connection-filter": "^1.10.1",
58
- "graphile-search": "^1.13.0",
59
- "graphile-test": "^4.15.1",
57
+ "graphile-connection-filter": "^1.10.3",
58
+ "graphile-search": "^1.13.2",
59
+ "graphile-test": "^4.15.3",
60
60
  "makage": "^0.3.0",
61
- "pgsql-test": "^4.14.1"
61
+ "pgsql-test": "^4.14.3"
62
62
  },
63
63
  "keywords": [
64
64
  "postgraphile",
@@ -73,5 +73,5 @@
73
73
  "ollama",
74
74
  "openai"
75
75
  ],
76
- "gitHead": "1aaafe14a8ba4eeeaab099f5fdc69865ce4e2a2e"
76
+ "gitHead": "030e1144acbd4e288ee74eff2ac0021ca0382ef7"
77
77
  }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Agent Discovery
3
+ *
4
+ * Discovers agent tables by querying the agent_chat_module config table
5
+ * at runtime. The module stores schema_id, table names, and table IDs
6
+ * when provisioned — no smart tags needed.
7
+ *
8
+ * Results are cached per-database with a TTL so the REST middleware
9
+ * doesn't hit the database on every request.
10
+ */
11
+ import { Pool } from 'pg';
12
+ export interface AgentTableInfo {
13
+ /** The PostgreSQL schema name (e.g. 'agent_public') */
14
+ schemaName: string;
15
+ /** The table name (e.g. 'agent_thread') */
16
+ tableName: string;
17
+ }
18
+ export interface AgentDiscovery {
19
+ thread: AgentTableInfo | null;
20
+ message: AgentTableInfo | null;
21
+ task: AgentTableInfo | null;
22
+ }
23
+ /** Clear all cached discovery results (for testing) */
24
+ export declare function clearAgentDiscoveryCache(): void;
25
+ /**
26
+ * Look up agent table info for a database, querying the module config table.
27
+ * Results are cached per-database with a 60s TTL.
28
+ */
29
+ export declare function getAgentDiscovery(pool: Pool, dbname: string): Promise<AgentDiscovery | null>;
@@ -0,0 +1,69 @@
1
+ "use strict";
2
+ /**
3
+ * Agent Discovery
4
+ *
5
+ * Discovers agent tables by querying the agent_chat_module config table
6
+ * at runtime. The module stores schema_id, table names, and table IDs
7
+ * when provisioned — no smart tags needed.
8
+ *
9
+ * Results are cached per-database with a TTL so the REST middleware
10
+ * doesn't hit the database on every request.
11
+ */
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.clearAgentDiscoveryCache = clearAgentDiscoveryCache;
14
+ exports.getAgentDiscovery = getAgentDiscovery;
15
+ const graphile_cache_1 = require("graphile-cache");
16
+ // ─── Cache ──────────────────────────────────────────────────────────────────
17
+ const agentDiscoveryCache = new graphile_cache_1.ModuleConfigCache({
18
+ name: 'agent-discovery',
19
+ ttlMs: 60_000,
20
+ });
21
+ /** Clear all cached discovery results (for testing) */
22
+ function clearAgentDiscoveryCache() {
23
+ agentDiscoveryCache.clear();
24
+ }
25
+ // ─── Discovery Query ────────────────────────────────────────────────────────
26
+ const DISCOVERY_SQL = `
27
+ SELECT
28
+ s.schema_name,
29
+ acm.thread_table_name,
30
+ acm.message_table_name,
31
+ acm.task_table_name
32
+ FROM metaschema_modules_public.agent_chat_module acm
33
+ JOIN metaschema_public.schema s ON s.id = acm.schema_id
34
+ LIMIT 1
35
+ `;
36
+ /**
37
+ * Look up agent table info for a database, querying the module config table.
38
+ * Results are cached per-database with a 60s TTL.
39
+ */
40
+ async function getAgentDiscovery(pool, dbname) {
41
+ const cached = agentDiscoveryCache.get(dbname);
42
+ if (cached !== undefined) {
43
+ return cached;
44
+ }
45
+ let discovery = null;
46
+ try {
47
+ const { rows } = await pool.query(DISCOVERY_SQL);
48
+ if (rows.length > 0) {
49
+ const row = rows[0];
50
+ const schemaName = row.schema_name;
51
+ discovery = {
52
+ thread: row.thread_table_name
53
+ ? { schemaName, tableName: row.thread_table_name }
54
+ : null,
55
+ message: row.message_table_name
56
+ ? { schemaName, tableName: row.message_table_name }
57
+ : null,
58
+ task: row.task_table_name
59
+ ? { schemaName, tableName: row.task_table_name }
60
+ : null,
61
+ };
62
+ }
63
+ }
64
+ catch {
65
+ // Module table doesn't exist in this database — not provisioned
66
+ }
67
+ agentDiscoveryCache.set(dbname, discovery);
68
+ return discovery;
69
+ }
@@ -2,7 +2,8 @@
2
2
  * LlmModulePlugin
3
3
  *
4
4
  * Detects and loads the `llm_module` configuration from `services_public.api_modules`.
5
- * Makes the resolved embedder available to other plugins via the build context.
5
+ * Makes the resolved embedder and chat completer available to other plugins
6
+ * via the build context.
6
7
  *
7
8
  * This plugin is the foundation that enables per-database LLM configuration.
8
9
  * When an API has an `llm_module` configured, the embedder is resolved and
@@ -14,6 +15,10 @@
14
15
  * 2. `defaultEmbedder` from preset options (dev/testing fallback)
15
16
  * 3. Environment variables (EMBEDDER_PROVIDER, EMBEDDER_MODEL, EMBEDDER_BASE_URL)
16
17
  * 4. null — LLM features are disabled
18
+ *
19
+ * This plugin is intentionally pure — no billing or metering logic.
20
+ * The optional LlmMeteringPlugin wraps the embedder with billing integration
21
+ * if loaded (it runs after this plugin and before the consumer plugins).
17
22
  */
18
23
  import type { GraphileConfig } from 'graphile-config';
19
24
  import type { EmbedderFunction, ChatFunction, GraphileLlmOptions } from '../types';
@@ -24,6 +29,10 @@ declare global {
24
29
  llmEmbedder: EmbedderFunction | null;
25
30
  /** The resolved chat completion function, or null if not configured */
26
31
  llmChatCompleter: ChatFunction | null;
32
+ /** The embedding model name (used as billing meter slug) */
33
+ llmEmbeddingModel: string | null;
34
+ /** The chat model name (used as billing meter slug) */
35
+ llmChatModel: string | null;
27
36
  }
28
37
  }
29
38
  namespace GraphileConfig {
@@ -3,7 +3,8 @@
3
3
  * LlmModulePlugin
4
4
  *
5
5
  * Detects and loads the `llm_module` configuration from `services_public.api_modules`.
6
- * Makes the resolved embedder available to other plugins via the build context.
6
+ * Makes the resolved embedder and chat completer available to other plugins
7
+ * via the build context.
7
8
  *
8
9
  * This plugin is the foundation that enables per-database LLM configuration.
9
10
  * When an API has an `llm_module` configured, the embedder is resolved and
@@ -15,11 +16,16 @@
15
16
  * 2. `defaultEmbedder` from preset options (dev/testing fallback)
16
17
  * 3. Environment variables (EMBEDDER_PROVIDER, EMBEDDER_MODEL, EMBEDDER_BASE_URL)
17
18
  * 4. null — LLM features are disabled
19
+ *
20
+ * This plugin is intentionally pure — no billing or metering logic.
21
+ * The optional LlmMeteringPlugin wraps the embedder with billing integration
22
+ * if loaded (it runs after this plugin and before the consumer plugins).
18
23
  */
19
24
  Object.defineProperty(exports, "__esModule", { value: true });
20
25
  exports.createLlmModulePlugin = createLlmModulePlugin;
21
26
  const embedder_1 = require("../embedder");
22
27
  const chat_1 = require("../chat");
28
+ const env_1 = require("../env");
23
29
  /**
24
30
  * Creates the LlmModulePlugin with the given options.
25
31
  */
@@ -27,7 +33,7 @@ function createLlmModulePlugin(options = {}) {
27
33
  const { defaultEmbedder, defaultChatCompleter } = options;
28
34
  return {
29
35
  name: 'LlmModulePlugin',
30
- version: '0.1.0',
36
+ version: '0.2.0',
31
37
  description: 'Resolves LLM embedder and chat completer configuration and makes them available to other plugins',
32
38
  schema: {
33
39
  hooks: {
@@ -77,7 +83,9 @@ function createLlmModulePlugin(options = {}) {
77
83
  return build.extend(build, {
78
84
  llmEmbedder: embedder,
79
85
  llmChatCompleter: chat,
80
- }, 'LlmModulePlugin adding llmEmbedder and llmChatCompleter to build');
86
+ llmEmbeddingModel: defaultEmbedder?.model ?? (0, env_1.getLlmEnvOptions)().embedding.model,
87
+ llmChatModel: defaultChatCompleter?.model ?? (0, env_1.getLlmEnvOptions)().chat.model,
88
+ }, 'LlmModulePlugin adding llmEmbedder, llmChatCompleter, and model names to build');
81
89
  },
82
90
  },
83
91
  },
@@ -0,0 +1,42 @@
1
+ /**
2
+ * LlmMeteringPlugin
3
+ *
4
+ * Opt-in billing integration for graphile-llm. Completely separate from the
5
+ * pure LLM plugins (text-search, text-mutation, rag).
6
+ *
7
+ * **How it works:**
8
+ * 1. At schema build time, replaces `build.llmEmbedder` with a metered wrapper
9
+ * that has the same `(text: string) => Promise<number[]>` signature
10
+ * 2. At request time, wraps every root query/mutation resolver to set up a
11
+ * request-scoped MeteringContext via AsyncLocalStorage
12
+ * 3. When the embedder is called (by any plugin), the wrapper checks
13
+ * AsyncLocalStorage for a metering context and if found, calls
14
+ * check_billing_quota before and record_usage after
15
+ * 4. If quota is exceeded, the wrapper returns null — the calling plugin sees
16
+ * null and handles it (search falls back to text-only, mutations throw)
17
+ *
18
+ * The pure plugins never import metering, config-cache, or billing types.
19
+ * They call the embedder and handle null results — that's it.
20
+ *
21
+ * **Entity ID resolution:**
22
+ * The billing `entity_id` is resolved via a configurable callback.
23
+ * Default: reads `jwt.claims.user_id` from pgSettings. Override via
24
+ * `metering.resolveEntityId` in GraphileLlmPreset options.
25
+ *
26
+ * **Graceful behavior:**
27
+ * - billing_module not provisioned → embedder passes through unmetered
28
+ * - entity_id not available → embedder passes through unmetered
29
+ * - check_billing_quota throws → call is allowed (billing is opt-in)
30
+ * - record_usage throws → call succeeds, recording silently skipped
31
+ * - quota exceeded → embedder returns null
32
+ */
33
+ import type { GraphileConfig } from 'graphile-config';
34
+ import type { MeteringConfig } from '../types';
35
+ declare global {
36
+ namespace GraphileConfig {
37
+ interface Plugins {
38
+ LlmMeteringPlugin: true;
39
+ }
40
+ }
41
+ }
42
+ export declare function createLlmMeteringPlugin(meteringConfig?: MeteringConfig): GraphileConfig.Plugin;