graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/__tests__/graphile-llm.test.js +87 -71
  2. package/chat.d.ts +5 -5
  3. package/chat.js +45 -43
  4. package/config-cache.d.ts +77 -0
  5. package/config-cache.js +148 -0
  6. package/embedder.d.ts +5 -5
  7. package/embedder.js +11 -17
  8. package/env.d.ts +31 -0
  9. package/env.js +52 -0
  10. package/esm/__tests__/graphile-llm.test.js +87 -71
  11. package/esm/chat.d.ts +5 -5
  12. package/esm/chat.js +45 -40
  13. package/esm/config-cache.d.ts +77 -0
  14. package/esm/config-cache.js +143 -0
  15. package/esm/embedder.d.ts +5 -5
  16. package/esm/embedder.js +11 -17
  17. package/esm/env.d.ts +31 -0
  18. package/esm/env.js +49 -0
  19. package/esm/index.d.ts +14 -5
  20. package/esm/index.js +11 -5
  21. package/esm/metering.d.ts +114 -0
  22. package/esm/metering.js +352 -0
  23. package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
  24. package/esm/plugins/agent-discovery-plugin.js +65 -0
  25. package/esm/plugins/llm-module-plugin.d.ts +11 -2
  26. package/esm/plugins/llm-module-plugin.js +15 -7
  27. package/esm/plugins/metering-plugin.d.ts +42 -0
  28. package/esm/plugins/metering-plugin.js +175 -0
  29. package/esm/plugins/rag-plugin.js +20 -20
  30. package/esm/plugins/text-mutation-plugin.d.ts +4 -0
  31. package/esm/plugins/text-mutation-plugin.js +23 -13
  32. package/esm/plugins/text-search-plugin.d.ts +4 -0
  33. package/esm/plugins/text-search-plugin.js +23 -11
  34. package/esm/preset.d.ts +21 -1
  35. package/esm/preset.js +33 -6
  36. package/esm/types.d.ts +86 -10
  37. package/index.d.ts +14 -5
  38. package/index.js +25 -8
  39. package/metering.d.ts +114 -0
  40. package/metering.js +359 -0
  41. package/package.json +15 -15
  42. package/plugins/agent-discovery-plugin.d.ts +29 -0
  43. package/plugins/agent-discovery-plugin.js +69 -0
  44. package/plugins/llm-module-plugin.d.ts +11 -2
  45. package/plugins/llm-module-plugin.js +15 -7
  46. package/plugins/metering-plugin.d.ts +42 -0
  47. package/plugins/metering-plugin.js +178 -0
  48. package/plugins/rag-plugin.js +20 -20
  49. package/plugins/text-mutation-plugin.d.ts +4 -0
  50. package/plugins/text-mutation-plugin.js +23 -13
  51. package/plugins/text-search-plugin.d.ts +4 -0
  52. package/plugins/text-search-plugin.js +23 -11
  53. package/preset.d.ts +21 -1
  54. package/preset.js +33 -6
  55. package/types.d.ts +86 -10
package/metering.js ADDED
@@ -0,0 +1,359 @@
1
+ "use strict";
2
+ /**
3
+ * metering — Billing-aware wrappers for embedder and chat functions
4
+ *
5
+ * Wraps EmbedderFunction and ChatFunction with:
6
+ * 1. Pre-check: `check_billing_quota(meter_slug, entity_id, estimated_amount)`
7
+ * 2. Execute the underlying function
8
+ * 3. Post-record: `record_usage(meter_slug, entity_id, actual_amount)`
9
+ *
10
+ * When the quota check fails, the wrapper returns null (graceful degradation)
11
+ * instead of throwing, so the search pipeline can fall back to text-only.
12
+ *
13
+ * Token counts:
14
+ * - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
15
+ * - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
16
+ *
17
+ * The billing functions live in the tenant database and are called via the
18
+ * Graphile `withPgClient` callback. Function locations (schema, names) are
19
+ * resolved from `billing_module` metaschema and cached by `config-cache.ts`.
20
+ */
21
+ Object.defineProperty(exports, "__esModule", { value: true });
22
+ exports.QuotaExceededError = void 0;
23
+ exports.logInferenceUsage = logInferenceUsage;
24
+ exports.meteredEmbed = meteredEmbed;
25
+ exports.meteredChat = meteredChat;
26
+ // ─── Billing SQL Helpers ────────────────────────────────────────────────────
27
+ /**
28
+ * Check if the entity has sufficient quota for the requested amount.
29
+ * Returns true if the call is allowed, false if quota is exceeded.
30
+ *
31
+ * Gracefully returns true if the billing function doesn't exist or errors —
32
+ * metering is opt-in, so missing infrastructure means "allow".
33
+ */
34
+ async function checkQuota(pgClient, billing, entityId, meterSlug, amount) {
35
+ try {
36
+ const sql = `SELECT "${billing.privateSchema}"."${billing.checkBillingQuotaFunction}"($1, $2::uuid, $3) AS allowed`;
37
+ const result = await pgClient.query(sql, [meterSlug, entityId, amount]);
38
+ return result.rows[0]?.allowed !== false;
39
+ }
40
+ catch (e) {
41
+ const message = e instanceof Error ? e.message : String(e);
42
+ console.warn(`[graphile-llm] check_billing_quota failed (allowing): ${message}`);
43
+ return true;
44
+ }
45
+ }
46
+ /**
47
+ * Record usage after a successful call.
48
+ * Gracefully skips if the billing function doesn't exist or errors.
49
+ */
50
+ async function recordUsage(pgClient, billing, entityId, meterSlug, amount, metadata) {
51
+ try {
52
+ const sql = `SELECT "${billing.privateSchema}"."${billing.recordUsageFunction}"($1, $2::uuid, $3, $4::jsonb)`;
53
+ await pgClient.query(sql, [meterSlug, entityId, amount, JSON.stringify(metadata)]);
54
+ }
55
+ catch (e) {
56
+ const message = e instanceof Error ? e.message : String(e);
57
+ console.warn(`[graphile-llm] record_usage failed (non-fatal): ${message}`);
58
+ }
59
+ }
60
+ /**
61
+ * Write a row to the usage_log_inference table.
62
+ * Gracefully skips if the inference_log_module is not provisioned.
63
+ *
64
+ * TODO: Also write to child (generated) database when dual-write is needed.
65
+ */
66
+ async function logInferenceUsage(ctx, entry) {
67
+ if (!ctx.inferenceLog)
68
+ return;
69
+ const { schema, tableName } = ctx.inferenceLog;
70
+ const sql = `INSERT INTO "${schema}"."${tableName}" (
71
+ database_id, entity_id, actor_id,
72
+ model, provider, service, operation,
73
+ input_tokens, output_tokens, total_tokens,
74
+ cache_read_tokens, cache_write_tokens,
75
+ latency_ms, rag_enabled, chunks_retrieved,
76
+ embedding_model, embedding_latency_ms,
77
+ status, error_type, raw_usage
78
+ ) VALUES (
79
+ $1, $2, $3,
80
+ $4, $5, $6, $7,
81
+ $8, $9, $10,
82
+ $11, $12,
83
+ $13, $14, $15,
84
+ $16, $17,
85
+ $18, $19, $20
86
+ )`;
87
+ try {
88
+ await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
89
+ await pgClient.query(sql, [
90
+ entry.databaseId, entry.entityId, entry.actorId,
91
+ entry.model, entry.provider, entry.service, entry.operation,
92
+ entry.inputTokens, entry.outputTokens, entry.totalTokens,
93
+ entry.cacheReadTokens, entry.cacheWriteTokens,
94
+ entry.latencyMs, entry.ragEnabled, entry.chunksRetrieved,
95
+ entry.embeddingModel, entry.embeddingLatencyMs,
96
+ entry.status, entry.errorType,
97
+ entry.rawUsage ? JSON.stringify(entry.rawUsage) : null
98
+ ]);
99
+ });
100
+ }
101
+ catch (e) {
102
+ const message = e instanceof Error ? e.message : String(e);
103
+ console.warn(`[graphile-llm] inference log INSERT failed (non-fatal): ${message}`);
104
+ }
105
+ }
106
+ // ─── Metered Embedder ───────────────────────────────────────────────────────
107
+ /**
108
+ * Wrap an embedder with billing quota check + usage recording.
109
+ *
110
+ * The returned MeterResult contains `quotaExceeded: true` when the pre-check
111
+ * fails, enabling the caller to fall back to text-only search.
112
+ */
113
+ async function meteredEmbed(embedder, text, ctx, options = {}) {
114
+ const startTime = Date.now();
115
+ // No billing context → just embed without metering
116
+ if (!ctx) {
117
+ const { embedding } = await embedder(text);
118
+ return {
119
+ result: embedding,
120
+ metered: false,
121
+ quotaExceeded: false,
122
+ latencyMs: Date.now() - startTime
123
+ };
124
+ }
125
+ const meterSlug = options.embeddingMeterSlug;
126
+ if (!meterSlug) {
127
+ const { embedding } = await embedder(text);
128
+ return {
129
+ result: embedding,
130
+ metered: false,
131
+ quotaExceeded: false,
132
+ latencyMs: Date.now() - startTime
133
+ };
134
+ }
135
+ if (options.skipMetering) {
136
+ const { embedding } = await embedder(text);
137
+ return {
138
+ result: embedding,
139
+ metered: false,
140
+ quotaExceeded: false,
141
+ latencyMs: Date.now() - startTime
142
+ };
143
+ }
144
+ // Pre-check: can this entity afford this call?
145
+ let allowed = true;
146
+ try {
147
+ await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
148
+ allowed = await checkQuota(pgClient, ctx.billing, ctx.entityId, meterSlug, 1);
149
+ });
150
+ }
151
+ catch {
152
+ allowed = true;
153
+ }
154
+ if (!allowed) {
155
+ logInferenceUsage(ctx, {
156
+ databaseId: ctx.databaseId,
157
+ entityId: ctx.entityId,
158
+ actorId: ctx.actorId,
159
+ model: options.embeddingModel ?? meterSlug,
160
+ provider: options.provider ?? null,
161
+ service: 'embedding',
162
+ operation: 'create',
163
+ inputTokens: 0,
164
+ outputTokens: 0,
165
+ totalTokens: 0,
166
+ cacheReadTokens: null,
167
+ cacheWriteTokens: null,
168
+ latencyMs: Date.now() - startTime,
169
+ ragEnabled: false,
170
+ chunksRetrieved: null,
171
+ embeddingModel: options.embeddingModel ?? null,
172
+ embeddingLatencyMs: null,
173
+ status: 'quota_exceeded',
174
+ errorType: null,
175
+ rawUsage: null
176
+ }).catch(() => { });
177
+ return {
178
+ result: null,
179
+ metered: true,
180
+ quotaExceeded: true,
181
+ latencyMs: Date.now() - startTime
182
+ };
183
+ }
184
+ // Execute embedding — real token count from provider via EmbeddingResult
185
+ const { embedding, promptTokens } = await embedder(text);
186
+ const latencyMs = Date.now() - startTime;
187
+ ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
188
+ await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, promptTokens, {
189
+ request_id: ctx.requestId,
190
+ input_chars: text.length,
191
+ prompt_tokens: promptTokens,
192
+ dims: embedding.length,
193
+ latency_ms: latencyMs
194
+ });
195
+ }).catch(() => { });
196
+ // Log to inference usage table
197
+ logInferenceUsage(ctx, {
198
+ databaseId: ctx.databaseId,
199
+ entityId: ctx.entityId,
200
+ actorId: ctx.actorId,
201
+ model: options.embeddingModel ?? meterSlug,
202
+ provider: options.provider ?? null,
203
+ service: 'embedding',
204
+ operation: 'create',
205
+ inputTokens: promptTokens,
206
+ outputTokens: 0,
207
+ totalTokens: promptTokens,
208
+ cacheReadTokens: null,
209
+ cacheWriteTokens: null,
210
+ latencyMs,
211
+ ragEnabled: false,
212
+ chunksRetrieved: null,
213
+ embeddingModel: options.embeddingModel ?? null,
214
+ embeddingLatencyMs: latencyMs,
215
+ status: 'success',
216
+ errorType: null,
217
+ rawUsage: { prompt_tokens: promptTokens }
218
+ }).catch(() => { });
219
+ return {
220
+ result: embedding,
221
+ metered: true,
222
+ quotaExceeded: false,
223
+ latencyMs
224
+ };
225
+ }
226
+ // ─── Metered Chat ───────────────────────────────────────────────────────────
227
+ /**
228
+ * Wrap a chat completion call with billing quota check + usage recording.
229
+ */
230
+ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {}) {
231
+ const startTime = Date.now();
232
+ if (!ctx) {
233
+ const chatResult = await chat(messages, chatOptions);
234
+ return {
235
+ result: chatResult.content,
236
+ metered: false,
237
+ quotaExceeded: false,
238
+ latencyMs: Date.now() - startTime
239
+ };
240
+ }
241
+ const meterSlug = meteringOptions.chatMeterSlug;
242
+ if (!meterSlug) {
243
+ const chatResult = await chat(messages, chatOptions);
244
+ return {
245
+ result: chatResult.content,
246
+ metered: false,
247
+ quotaExceeded: false,
248
+ latencyMs: Date.now() - startTime
249
+ };
250
+ }
251
+ if (meteringOptions.skipMetering) {
252
+ const chatResult = await chat(messages, chatOptions);
253
+ return {
254
+ result: chatResult.content,
255
+ metered: false,
256
+ quotaExceeded: false,
257
+ latencyMs: Date.now() - startTime
258
+ };
259
+ }
260
+ // Pre-check: can this entity afford this call?
261
+ let allowed = true;
262
+ try {
263
+ await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
264
+ allowed = await checkQuota(pgClient, ctx.billing, ctx.entityId, meterSlug, 1);
265
+ });
266
+ }
267
+ catch {
268
+ allowed = true;
269
+ }
270
+ if (!allowed) {
271
+ const estimatedInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
272
+ logInferenceUsage(ctx, {
273
+ databaseId: ctx.databaseId,
274
+ entityId: ctx.entityId,
275
+ actorId: ctx.actorId,
276
+ model: meteringOptions.chatModel ?? meterSlug,
277
+ provider: meteringOptions.provider ?? null,
278
+ service: 'llm',
279
+ operation: 'chat',
280
+ inputTokens: estimatedInputTokens,
281
+ outputTokens: 0,
282
+ totalTokens: estimatedInputTokens,
283
+ cacheReadTokens: null,
284
+ cacheWriteTokens: null,
285
+ latencyMs: Date.now() - startTime,
286
+ ragEnabled: false,
287
+ chunksRetrieved: null,
288
+ embeddingModel: null,
289
+ embeddingLatencyMs: null,
290
+ status: 'quota_exceeded',
291
+ errorType: null,
292
+ rawUsage: null
293
+ }).catch(() => { });
294
+ return {
295
+ result: null,
296
+ metered: true,
297
+ quotaExceeded: true,
298
+ latencyMs: Date.now() - startTime
299
+ };
300
+ }
301
+ // Execute chat completion — returns real token usage from provider
302
+ const chatResult = await chat(messages, chatOptions);
303
+ const latencyMs = Date.now() - startTime;
304
+ const usage = chatResult.usage;
305
+ ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
306
+ await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, usage.totalTokens, {
307
+ request_id: ctx.requestId,
308
+ input_tokens: usage.input,
309
+ output_tokens: usage.output,
310
+ cache_read_tokens: usage.cacheRead,
311
+ cache_write_tokens: usage.cacheWrite,
312
+ messages_count: messages.length,
313
+ latency_ms: latencyMs
314
+ });
315
+ }).catch(() => { });
316
+ // Log to inference usage table with real provider token counts
317
+ logInferenceUsage(ctx, {
318
+ databaseId: ctx.databaseId,
319
+ entityId: ctx.entityId,
320
+ actorId: ctx.actorId,
321
+ model: meteringOptions.chatModel ?? meterSlug,
322
+ provider: meteringOptions.provider ?? null,
323
+ service: 'llm',
324
+ operation: 'chat',
325
+ inputTokens: usage.input,
326
+ outputTokens: usage.output,
327
+ totalTokens: usage.totalTokens,
328
+ cacheReadTokens: usage.cacheRead || null,
329
+ cacheWriteTokens: usage.cacheWrite || null,
330
+ latencyMs,
331
+ ragEnabled: false,
332
+ chunksRetrieved: null,
333
+ embeddingModel: null,
334
+ embeddingLatencyMs: null,
335
+ status: 'success',
336
+ errorType: null,
337
+ rawUsage: { reasoning: usage.reasoning }
338
+ }).catch(() => { });
339
+ return {
340
+ result: chatResult.content,
341
+ metered: true,
342
+ quotaExceeded: false,
343
+ latencyMs
344
+ };
345
+ }
346
+ // ─── Error Types ────────────────────────────────────────────────────────────
347
+ class QuotaExceededError extends Error {
348
+ code = 'QUOTA_EXCEEDED';
349
+ meterSlug;
350
+ entityId;
351
+ constructor(meterSlug, entityId) {
352
+ super(`LLM quota exceeded for meter '${meterSlug}' on entity '${entityId}'. ` +
353
+ 'Upgrade your plan or wait for the next billing period.');
354
+ this.name = 'QuotaExceededError';
355
+ this.meterSlug = meterSlug;
356
+ this.entityId = entityId;
357
+ }
358
+ }
359
+ exports.QuotaExceededError = QuotaExceededError;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "graphile-llm",
3
- "version": "0.7.3",
3
+ "version": "0.9.0",
4
4
  "description": "LLM integration plugin for PostGraphile v5 — server-side text-to-vector embedding and text companion fields for pgvector columns",
5
5
  "author": "Constructive <developers@constructive.io>",
6
6
  "homepage": "https://github.com/constructive-io/constructive",
@@ -29,20 +29,20 @@
29
29
  "url": "https://github.com/constructive-io/constructive/issues"
30
30
  },
31
31
  "dependencies": {
32
- "@agentic-kit/ollama": "^1.0.3",
33
- "@constructive-io/graphql-env": "^3.11.0"
32
+ "@agentic-kit/ollama": "^2.0.0",
33
+ "graphile-cache": "^3.11.2"
34
34
  },
35
35
  "peerDependencies": {
36
- "@dataplan/pg": "1.0.0",
37
- "grafast": "1.0.0",
38
- "graphile-build": "5.0.0",
39
- "graphile-build-pg": "5.0.0",
40
- "graphile-config": "1.0.0",
36
+ "@dataplan/pg": "1.0.3",
37
+ "grafast": "1.0.2",
38
+ "graphile-build": "5.0.2",
39
+ "graphile-build-pg": "5.0.2",
40
+ "graphile-config": "1.0.1",
41
41
  "graphile-search": "workspace:^",
42
42
  "graphile-utils": "5.0.0",
43
43
  "graphql": "16.13.0",
44
- "pg-sql2": "5.0.0",
45
- "postgraphile": "5.0.0"
44
+ "pg-sql2": "5.0.1",
45
+ "postgraphile": "5.0.3"
46
46
  },
47
47
  "peerDependenciesMeta": {
48
48
  "graphile-search": {
@@ -54,11 +54,11 @@
54
54
  },
55
55
  "devDependencies": {
56
56
  "@types/node": "^22.19.11",
57
- "graphile-connection-filter": "^1.10.2",
58
- "graphile-search": "^1.13.1",
59
- "graphile-test": "^4.15.2",
57
+ "graphile-connection-filter": "^1.10.3",
58
+ "graphile-search": "^1.13.2",
59
+ "graphile-test": "^4.15.3",
60
60
  "makage": "^0.3.0",
61
- "pgsql-test": "^4.14.2"
61
+ "pgsql-test": "^4.14.3"
62
62
  },
63
63
  "keywords": [
64
64
  "postgraphile",
@@ -73,5 +73,5 @@
73
73
  "ollama",
74
74
  "openai"
75
75
  ],
76
- "gitHead": "7b7c25cb21fd0e97c2ab3c94b994b0ca89bd3247"
76
+ "gitHead": "f3ea414974306e3c0d1d68edc93b4cdd8fa6e806"
77
77
  }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Agent Discovery
3
+ *
4
+ * Discovers agent tables by querying the agent_chat_module config table
5
+ * at runtime. The module stores schema_id, table names, and table IDs
6
+ * when provisioned — no smart tags needed.
7
+ *
8
+ * Results are cached per-database with a TTL so the REST middleware
9
+ * doesn't hit the database on every request.
10
+ */
11
+ import { Pool } from 'pg';
12
+ export interface AgentTableInfo {
13
+ /** The PostgreSQL schema name (e.g. 'agent_public') */
14
+ schemaName: string;
15
+ /** The table name (e.g. 'agent_thread') */
16
+ tableName: string;
17
+ }
18
+ export interface AgentDiscovery {
19
+ thread: AgentTableInfo | null;
20
+ message: AgentTableInfo | null;
21
+ task: AgentTableInfo | null;
22
+ }
23
+ /** Clear all cached discovery results (for testing) */
24
+ export declare function clearAgentDiscoveryCache(): void;
25
+ /**
26
+ * Look up agent table info for a database, querying the module config table.
27
+ * Results are cached per-database with a 60s TTL.
28
+ */
29
+ export declare function getAgentDiscovery(pool: Pool, dbname: string): Promise<AgentDiscovery | null>;
@@ -0,0 +1,69 @@
1
+ "use strict";
2
+ /**
3
+ * Agent Discovery
4
+ *
5
+ * Discovers agent tables by querying the agent_chat_module config table
6
+ * at runtime. The module stores schema_id, table names, and table IDs
7
+ * when provisioned — no smart tags needed.
8
+ *
9
+ * Results are cached per-database with a TTL so the REST middleware
10
+ * doesn't hit the database on every request.
11
+ */
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.clearAgentDiscoveryCache = clearAgentDiscoveryCache;
14
+ exports.getAgentDiscovery = getAgentDiscovery;
15
+ const graphile_cache_1 = require("graphile-cache");
16
+ // ─── Cache ──────────────────────────────────────────────────────────────────
17
+ const agentDiscoveryCache = new graphile_cache_1.ModuleConfigCache({
18
+ name: 'agent-discovery',
19
+ ttlMs: 60_000
20
+ });
21
+ /** Clear all cached discovery results (for testing) */
22
+ function clearAgentDiscoveryCache() {
23
+ agentDiscoveryCache.clear();
24
+ }
25
+ // ─── Discovery Query ────────────────────────────────────────────────────────
26
+ const DISCOVERY_SQL = `
27
+ SELECT
28
+ s.schema_name,
29
+ acm.thread_table_name,
30
+ acm.message_table_name,
31
+ acm.task_table_name
32
+ FROM metaschema_modules_public.agent_chat_module acm
33
+ JOIN metaschema_public.schema s ON s.id = acm.schema_id
34
+ LIMIT 1
35
+ `;
36
+ /**
37
+ * Look up agent table info for a database, querying the module config table.
38
+ * Results are cached per-database with a 60s TTL.
39
+ */
40
+ async function getAgentDiscovery(pool, dbname) {
41
+ const cached = agentDiscoveryCache.get(dbname);
42
+ if (cached !== undefined) {
43
+ return cached;
44
+ }
45
+ let discovery = null;
46
+ try {
47
+ const { rows } = await pool.query(DISCOVERY_SQL);
48
+ if (rows.length > 0) {
49
+ const row = rows[0];
50
+ const schemaName = row.schema_name;
51
+ discovery = {
52
+ thread: row.thread_table_name
53
+ ? { schemaName, tableName: row.thread_table_name }
54
+ : null,
55
+ message: row.message_table_name
56
+ ? { schemaName, tableName: row.message_table_name }
57
+ : null,
58
+ task: row.task_table_name
59
+ ? { schemaName, tableName: row.task_table_name }
60
+ : null
61
+ };
62
+ }
63
+ }
64
+ catch {
65
+ // Module table doesn't exist in this database — not provisioned
66
+ }
67
+ agentDiscoveryCache.set(dbname, discovery);
68
+ return discovery;
69
+ }
@@ -2,7 +2,8 @@
2
2
  * LlmModulePlugin
3
3
  *
4
4
  * Detects and loads the `llm_module` configuration from `services_public.api_modules`.
5
- * Makes the resolved embedder available to other plugins via the build context.
5
+ * Makes the resolved embedder and chat completer available to other plugins
6
+ * via the build context.
6
7
  *
7
8
  * This plugin is the foundation that enables per-database LLM configuration.
8
9
  * When an API has an `llm_module` configured, the embedder is resolved and
@@ -14,9 +15,13 @@
14
15
  * 2. `defaultEmbedder` from preset options (dev/testing fallback)
15
16
  * 3. Environment variables (EMBEDDER_PROVIDER, EMBEDDER_MODEL, EMBEDDER_BASE_URL)
16
17
  * 4. null — LLM features are disabled
18
+ *
19
+ * This plugin is intentionally pure — no billing or metering logic.
20
+ * The optional LlmMeteringPlugin wraps the embedder with billing integration
21
+ * if loaded (it runs after this plugin and before the consumer plugins).
17
22
  */
18
23
  import type { GraphileConfig } from 'graphile-config';
19
- import type { EmbedderFunction, ChatFunction, GraphileLlmOptions } from '../types';
24
+ import type { ChatFunction, EmbedderFunction, GraphileLlmOptions } from '../types';
20
25
  declare global {
21
26
  namespace GraphileBuild {
22
27
  interface Build {
@@ -24,6 +29,10 @@ declare global {
24
29
  llmEmbedder: EmbedderFunction | null;
25
30
  /** The resolved chat completion function, or null if not configured */
26
31
  llmChatCompleter: ChatFunction | null;
32
+ /** The embedding model name (used as billing meter slug) */
33
+ llmEmbeddingModel: string | null;
34
+ /** The chat model name (used as billing meter slug) */
35
+ llmChatModel: string | null;
27
36
  }
28
37
  }
29
38
  namespace GraphileConfig {
@@ -3,7 +3,8 @@
3
3
  * LlmModulePlugin
4
4
  *
5
5
  * Detects and loads the `llm_module` configuration from `services_public.api_modules`.
6
- * Makes the resolved embedder available to other plugins via the build context.
6
+ * Makes the resolved embedder and chat completer available to other plugins
7
+ * via the build context.
7
8
  *
8
9
  * This plugin is the foundation that enables per-database LLM configuration.
9
10
  * When an API has an `llm_module` configured, the embedder is resolved and
@@ -15,11 +16,16 @@
15
16
  * 2. `defaultEmbedder` from preset options (dev/testing fallback)
16
17
  * 3. Environment variables (EMBEDDER_PROVIDER, EMBEDDER_MODEL, EMBEDDER_BASE_URL)
17
18
  * 4. null — LLM features are disabled
19
+ *
20
+ * This plugin is intentionally pure — no billing or metering logic.
21
+ * The optional LlmMeteringPlugin wraps the embedder with billing integration
22
+ * if loaded (it runs after this plugin and before the consumer plugins).
18
23
  */
19
24
  Object.defineProperty(exports, "__esModule", { value: true });
20
25
  exports.createLlmModulePlugin = createLlmModulePlugin;
21
- const embedder_1 = require("../embedder");
22
26
  const chat_1 = require("../chat");
27
+ const embedder_1 = require("../embedder");
28
+ const env_1 = require("../env");
23
29
  /**
24
30
  * Creates the LlmModulePlugin with the given options.
25
31
  */
@@ -27,7 +33,7 @@ function createLlmModulePlugin(options = {}) {
27
33
  const { defaultEmbedder, defaultChatCompleter } = options;
28
34
  return {
29
35
  name: 'LlmModulePlugin',
30
- version: '0.1.0',
36
+ version: '0.2.0',
31
37
  description: 'Resolves LLM embedder and chat completer configuration and makes them available to other plugins',
32
38
  schema: {
33
39
  hooks: {
@@ -77,9 +83,11 @@ function createLlmModulePlugin(options = {}) {
77
83
  return build.extend(build, {
78
84
  llmEmbedder: embedder,
79
85
  llmChatCompleter: chat,
80
- }, 'LlmModulePlugin adding llmEmbedder and llmChatCompleter to build');
81
- },
82
- },
83
- },
86
+ llmEmbeddingModel: defaultEmbedder?.model ?? (0, env_1.getLlmEnvOptions)().embedding.model,
87
+ llmChatModel: defaultChatCompleter?.model ?? (0, env_1.getLlmEnvOptions)().chat.model
88
+ }, 'LlmModulePlugin adding llmEmbedder, llmChatCompleter, and model names to build');
89
+ }
90
+ }
91
+ }
84
92
  };
85
93
  }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * LlmMeteringPlugin
3
+ *
4
+ * Opt-in billing integration for graphile-llm. Completely separate from the
5
+ * pure LLM plugins (text-search, text-mutation, rag).
6
+ *
7
+ * **How it works:**
8
+ * 1. At schema build time, replaces `build.llmEmbedder` with a metered wrapper
9
+ * that has the same `(text: string) => Promise<number[]>` signature
10
+ * 2. At request time, wraps every root query/mutation resolver to set up a
11
+ * request-scoped MeteringContext via AsyncLocalStorage
12
+ * 3. When the embedder is called (by any plugin), the wrapper checks
13
+ * AsyncLocalStorage for a metering context and if found, calls
14
+ * check_billing_quota before and record_usage after
15
+ * 4. If quota is exceeded, the wrapper returns null — the calling plugin sees
16
+ * null and handles it (search falls back to text-only, mutations throw)
17
+ *
18
+ * The pure plugins never import metering, config-cache, or billing types.
19
+ * They call the embedder and handle null results — that's it.
20
+ *
21
+ * **Entity ID resolution:**
22
+ * The billing `entity_id` is resolved via a configurable callback.
23
+ * Default: reads `jwt.claims.user_id` from pgSettings. Override via
24
+ * `metering.resolveEntityId` in GraphileLlmPreset options.
25
+ *
26
+ * **Graceful behavior:**
27
+ * - billing_module not provisioned → embedder passes through unmetered
28
+ * - entity_id not available → embedder passes through unmetered
29
+ * - check_billing_quota throws → call is allowed (billing is opt-in)
30
+ * - record_usage throws → call succeeds, recording silently skipped
31
+ * - quota exceeded → embedder returns null
32
+ */
33
+ import type { GraphileConfig } from 'graphile-config';
34
+ import type { MeteringConfig } from '../types';
35
+ declare global {
36
+ namespace GraphileConfig {
37
+ interface Plugins {
38
+ LlmMeteringPlugin: true;
39
+ }
40
+ }
41
+ }
42
+ export declare function createLlmMeteringPlugin(meteringConfig?: MeteringConfig): GraphileConfig.Plugin;