graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/__tests__/graphile-llm.test.js +87 -71
  2. package/chat.d.ts +5 -5
  3. package/chat.js +45 -43
  4. package/config-cache.d.ts +77 -0
  5. package/config-cache.js +148 -0
  6. package/embedder.d.ts +5 -5
  7. package/embedder.js +11 -17
  8. package/env.d.ts +31 -0
  9. package/env.js +52 -0
  10. package/esm/__tests__/graphile-llm.test.js +87 -71
  11. package/esm/chat.d.ts +5 -5
  12. package/esm/chat.js +45 -40
  13. package/esm/config-cache.d.ts +77 -0
  14. package/esm/config-cache.js +143 -0
  15. package/esm/embedder.d.ts +5 -5
  16. package/esm/embedder.js +11 -17
  17. package/esm/env.d.ts +31 -0
  18. package/esm/env.js +49 -0
  19. package/esm/index.d.ts +14 -5
  20. package/esm/index.js +11 -5
  21. package/esm/metering.d.ts +114 -0
  22. package/esm/metering.js +352 -0
  23. package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
  24. package/esm/plugins/agent-discovery-plugin.js +65 -0
  25. package/esm/plugins/llm-module-plugin.d.ts +11 -2
  26. package/esm/plugins/llm-module-plugin.js +15 -7
  27. package/esm/plugins/metering-plugin.d.ts +42 -0
  28. package/esm/plugins/metering-plugin.js +175 -0
  29. package/esm/plugins/rag-plugin.js +20 -20
  30. package/esm/plugins/text-mutation-plugin.d.ts +4 -0
  31. package/esm/plugins/text-mutation-plugin.js +23 -13
  32. package/esm/plugins/text-search-plugin.d.ts +4 -0
  33. package/esm/plugins/text-search-plugin.js +23 -11
  34. package/esm/preset.d.ts +21 -1
  35. package/esm/preset.js +33 -6
  36. package/esm/types.d.ts +86 -10
  37. package/index.d.ts +14 -5
  38. package/index.js +25 -8
  39. package/metering.d.ts +114 -0
  40. package/metering.js +359 -0
  41. package/package.json +15 -15
  42. package/plugins/agent-discovery-plugin.d.ts +29 -0
  43. package/plugins/agent-discovery-plugin.js +69 -0
  44. package/plugins/llm-module-plugin.d.ts +11 -2
  45. package/plugins/llm-module-plugin.js +15 -7
  46. package/plugins/metering-plugin.d.ts +42 -0
  47. package/plugins/metering-plugin.js +178 -0
  48. package/plugins/rag-plugin.js +20 -20
  49. package/plugins/text-mutation-plugin.d.ts +4 -0
  50. package/plugins/text-mutation-plugin.js +23 -13
  51. package/plugins/text-search-plugin.d.ts +4 -0
  52. package/plugins/text-search-plugin.js +23 -11
  53. package/preset.d.ts +21 -1
  54. package/preset.js +33 -6
  55. package/types.d.ts +86 -10
@@ -0,0 +1,352 @@
1
+ /**
2
+ * metering — Billing-aware wrappers for embedder and chat functions
3
+ *
4
+ * Wraps EmbedderFunction and ChatFunction with:
5
+ * 1. Pre-check: `check_billing_quota(meter_slug, entity_id, estimated_amount)`
6
+ * 2. Execute the underlying function
7
+ * 3. Post-record: `record_usage(meter_slug, entity_id, actual_amount)`
8
+ *
9
+ * When the quota check fails, the wrapper returns null (graceful degradation)
10
+ * instead of throwing, so the search pipeline can fall back to text-only.
11
+ *
12
+ * Token counts:
13
+ * - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
14
+ * - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
15
+ *
16
+ * The billing functions live in the tenant database and are called via the
17
+ * Graphile `withPgClient` callback. Function locations (schema, names) are
18
+ * resolved from `billing_module` metaschema and cached by `config-cache.ts`.
19
+ */
20
+ // ─── Billing SQL Helpers ────────────────────────────────────────────────────
21
+ /**
22
+ * Check if the entity has sufficient quota for the requested amount.
23
+ * Returns true if the call is allowed, false if quota is exceeded.
24
+ *
25
+ * Gracefully returns true if the billing function doesn't exist or errors —
26
+ * metering is opt-in, so missing infrastructure means "allow".
27
+ */
28
+ async function checkQuota(pgClient, billing, entityId, meterSlug, amount) {
29
+ try {
30
+ const sql = `SELECT "${billing.privateSchema}"."${billing.checkBillingQuotaFunction}"($1, $2::uuid, $3) AS allowed`;
31
+ const result = await pgClient.query(sql, [meterSlug, entityId, amount]);
32
+ return result.rows[0]?.allowed !== false;
33
+ }
34
+ catch (e) {
35
+ const message = e instanceof Error ? e.message : String(e);
36
+ console.warn(`[graphile-llm] check_billing_quota failed (allowing): ${message}`);
37
+ return true;
38
+ }
39
+ }
40
+ /**
41
+ * Record usage after a successful call.
42
+ * Gracefully skips if the billing function doesn't exist or errors.
43
+ */
44
+ async function recordUsage(pgClient, billing, entityId, meterSlug, amount, metadata) {
45
+ try {
46
+ const sql = `SELECT "${billing.privateSchema}"."${billing.recordUsageFunction}"($1, $2::uuid, $3, $4::jsonb)`;
47
+ await pgClient.query(sql, [meterSlug, entityId, amount, JSON.stringify(metadata)]);
48
+ }
49
+ catch (e) {
50
+ const message = e instanceof Error ? e.message : String(e);
51
+ console.warn(`[graphile-llm] record_usage failed (non-fatal): ${message}`);
52
+ }
53
+ }
54
+ /**
55
+ * Write a row to the usage_log_inference table.
56
+ * Gracefully skips if the inference_log_module is not provisioned.
57
+ *
58
+ * TODO: Also write to child (generated) database when dual-write is needed.
59
+ */
60
+ export async function logInferenceUsage(ctx, entry) {
61
+ if (!ctx.inferenceLog)
62
+ return;
63
+ const { schema, tableName } = ctx.inferenceLog;
64
+ const sql = `INSERT INTO "${schema}"."${tableName}" (
65
+ database_id, entity_id, actor_id,
66
+ model, provider, service, operation,
67
+ input_tokens, output_tokens, total_tokens,
68
+ cache_read_tokens, cache_write_tokens,
69
+ latency_ms, rag_enabled, chunks_retrieved,
70
+ embedding_model, embedding_latency_ms,
71
+ status, error_type, raw_usage
72
+ ) VALUES (
73
+ $1, $2, $3,
74
+ $4, $5, $6, $7,
75
+ $8, $9, $10,
76
+ $11, $12,
77
+ $13, $14, $15,
78
+ $16, $17,
79
+ $18, $19, $20
80
+ )`;
81
+ try {
82
+ await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
83
+ await pgClient.query(sql, [
84
+ entry.databaseId, entry.entityId, entry.actorId,
85
+ entry.model, entry.provider, entry.service, entry.operation,
86
+ entry.inputTokens, entry.outputTokens, entry.totalTokens,
87
+ entry.cacheReadTokens, entry.cacheWriteTokens,
88
+ entry.latencyMs, entry.ragEnabled, entry.chunksRetrieved,
89
+ entry.embeddingModel, entry.embeddingLatencyMs,
90
+ entry.status, entry.errorType,
91
+ entry.rawUsage ? JSON.stringify(entry.rawUsage) : null
92
+ ]);
93
+ });
94
+ }
95
+ catch (e) {
96
+ const message = e instanceof Error ? e.message : String(e);
97
+ console.warn(`[graphile-llm] inference log INSERT failed (non-fatal): ${message}`);
98
+ }
99
+ }
100
+ // ─── Metered Embedder ───────────────────────────────────────────────────────
101
+ /**
102
+ * Wrap an embedder with billing quota check + usage recording.
103
+ *
104
+ * The returned MeterResult contains `quotaExceeded: true` when the pre-check
105
+ * fails, enabling the caller to fall back to text-only search.
106
+ */
107
+ export async function meteredEmbed(embedder, text, ctx, options = {}) {
108
+ const startTime = Date.now();
109
+ // No billing context → just embed without metering
110
+ if (!ctx) {
111
+ const { embedding } = await embedder(text);
112
+ return {
113
+ result: embedding,
114
+ metered: false,
115
+ quotaExceeded: false,
116
+ latencyMs: Date.now() - startTime
117
+ };
118
+ }
119
+ const meterSlug = options.embeddingMeterSlug;
120
+ if (!meterSlug) {
121
+ const { embedding } = await embedder(text);
122
+ return {
123
+ result: embedding,
124
+ metered: false,
125
+ quotaExceeded: false,
126
+ latencyMs: Date.now() - startTime
127
+ };
128
+ }
129
+ if (options.skipMetering) {
130
+ const { embedding } = await embedder(text);
131
+ return {
132
+ result: embedding,
133
+ metered: false,
134
+ quotaExceeded: false,
135
+ latencyMs: Date.now() - startTime
136
+ };
137
+ }
138
+ // Pre-check: can this entity afford this call?
139
+ let allowed = true;
140
+ try {
141
+ await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
142
+ allowed = await checkQuota(pgClient, ctx.billing, ctx.entityId, meterSlug, 1);
143
+ });
144
+ }
145
+ catch {
146
+ allowed = true;
147
+ }
148
+ if (!allowed) {
149
+ logInferenceUsage(ctx, {
150
+ databaseId: ctx.databaseId,
151
+ entityId: ctx.entityId,
152
+ actorId: ctx.actorId,
153
+ model: options.embeddingModel ?? meterSlug,
154
+ provider: options.provider ?? null,
155
+ service: 'embedding',
156
+ operation: 'create',
157
+ inputTokens: 0,
158
+ outputTokens: 0,
159
+ totalTokens: 0,
160
+ cacheReadTokens: null,
161
+ cacheWriteTokens: null,
162
+ latencyMs: Date.now() - startTime,
163
+ ragEnabled: false,
164
+ chunksRetrieved: null,
165
+ embeddingModel: options.embeddingModel ?? null,
166
+ embeddingLatencyMs: null,
167
+ status: 'quota_exceeded',
168
+ errorType: null,
169
+ rawUsage: null
170
+ }).catch(() => { });
171
+ return {
172
+ result: null,
173
+ metered: true,
174
+ quotaExceeded: true,
175
+ latencyMs: Date.now() - startTime
176
+ };
177
+ }
178
+ // Execute embedding — real token count from provider via EmbeddingResult
179
+ const { embedding, promptTokens } = await embedder(text);
180
+ const latencyMs = Date.now() - startTime;
181
+ ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
182
+ await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, promptTokens, {
183
+ request_id: ctx.requestId,
184
+ input_chars: text.length,
185
+ prompt_tokens: promptTokens,
186
+ dims: embedding.length,
187
+ latency_ms: latencyMs
188
+ });
189
+ }).catch(() => { });
190
+ // Log to inference usage table
191
+ logInferenceUsage(ctx, {
192
+ databaseId: ctx.databaseId,
193
+ entityId: ctx.entityId,
194
+ actorId: ctx.actorId,
195
+ model: options.embeddingModel ?? meterSlug,
196
+ provider: options.provider ?? null,
197
+ service: 'embedding',
198
+ operation: 'create',
199
+ inputTokens: promptTokens,
200
+ outputTokens: 0,
201
+ totalTokens: promptTokens,
202
+ cacheReadTokens: null,
203
+ cacheWriteTokens: null,
204
+ latencyMs,
205
+ ragEnabled: false,
206
+ chunksRetrieved: null,
207
+ embeddingModel: options.embeddingModel ?? null,
208
+ embeddingLatencyMs: latencyMs,
209
+ status: 'success',
210
+ errorType: null,
211
+ rawUsage: { prompt_tokens: promptTokens }
212
+ }).catch(() => { });
213
+ return {
214
+ result: embedding,
215
+ metered: true,
216
+ quotaExceeded: false,
217
+ latencyMs
218
+ };
219
+ }
220
+ // ─── Metered Chat ───────────────────────────────────────────────────────────
221
+ /**
222
+ * Wrap a chat completion call with billing quota check + usage recording.
223
+ */
224
+ export async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {}) {
225
+ const startTime = Date.now();
226
+ if (!ctx) {
227
+ const chatResult = await chat(messages, chatOptions);
228
+ return {
229
+ result: chatResult.content,
230
+ metered: false,
231
+ quotaExceeded: false,
232
+ latencyMs: Date.now() - startTime
233
+ };
234
+ }
235
+ const meterSlug = meteringOptions.chatMeterSlug;
236
+ if (!meterSlug) {
237
+ const chatResult = await chat(messages, chatOptions);
238
+ return {
239
+ result: chatResult.content,
240
+ metered: false,
241
+ quotaExceeded: false,
242
+ latencyMs: Date.now() - startTime
243
+ };
244
+ }
245
+ if (meteringOptions.skipMetering) {
246
+ const chatResult = await chat(messages, chatOptions);
247
+ return {
248
+ result: chatResult.content,
249
+ metered: false,
250
+ quotaExceeded: false,
251
+ latencyMs: Date.now() - startTime
252
+ };
253
+ }
254
+ // Pre-check: can this entity afford this call?
255
+ let allowed = true;
256
+ try {
257
+ await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
258
+ allowed = await checkQuota(pgClient, ctx.billing, ctx.entityId, meterSlug, 1);
259
+ });
260
+ }
261
+ catch {
262
+ allowed = true;
263
+ }
264
+ if (!allowed) {
265
+ const estimatedInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
266
+ logInferenceUsage(ctx, {
267
+ databaseId: ctx.databaseId,
268
+ entityId: ctx.entityId,
269
+ actorId: ctx.actorId,
270
+ model: meteringOptions.chatModel ?? meterSlug,
271
+ provider: meteringOptions.provider ?? null,
272
+ service: 'llm',
273
+ operation: 'chat',
274
+ inputTokens: estimatedInputTokens,
275
+ outputTokens: 0,
276
+ totalTokens: estimatedInputTokens,
277
+ cacheReadTokens: null,
278
+ cacheWriteTokens: null,
279
+ latencyMs: Date.now() - startTime,
280
+ ragEnabled: false,
281
+ chunksRetrieved: null,
282
+ embeddingModel: null,
283
+ embeddingLatencyMs: null,
284
+ status: 'quota_exceeded',
285
+ errorType: null,
286
+ rawUsage: null
287
+ }).catch(() => { });
288
+ return {
289
+ result: null,
290
+ metered: true,
291
+ quotaExceeded: true,
292
+ latencyMs: Date.now() - startTime
293
+ };
294
+ }
295
+ // Execute chat completion — returns real token usage from provider
296
+ const chatResult = await chat(messages, chatOptions);
297
+ const latencyMs = Date.now() - startTime;
298
+ const usage = chatResult.usage;
299
+ ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
300
+ await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, usage.totalTokens, {
301
+ request_id: ctx.requestId,
302
+ input_tokens: usage.input,
303
+ output_tokens: usage.output,
304
+ cache_read_tokens: usage.cacheRead,
305
+ cache_write_tokens: usage.cacheWrite,
306
+ messages_count: messages.length,
307
+ latency_ms: latencyMs
308
+ });
309
+ }).catch(() => { });
310
+ // Log to inference usage table with real provider token counts
311
+ logInferenceUsage(ctx, {
312
+ databaseId: ctx.databaseId,
313
+ entityId: ctx.entityId,
314
+ actorId: ctx.actorId,
315
+ model: meteringOptions.chatModel ?? meterSlug,
316
+ provider: meteringOptions.provider ?? null,
317
+ service: 'llm',
318
+ operation: 'chat',
319
+ inputTokens: usage.input,
320
+ outputTokens: usage.output,
321
+ totalTokens: usage.totalTokens,
322
+ cacheReadTokens: usage.cacheRead || null,
323
+ cacheWriteTokens: usage.cacheWrite || null,
324
+ latencyMs,
325
+ ragEnabled: false,
326
+ chunksRetrieved: null,
327
+ embeddingModel: null,
328
+ embeddingLatencyMs: null,
329
+ status: 'success',
330
+ errorType: null,
331
+ rawUsage: { reasoning: usage.reasoning }
332
+ }).catch(() => { });
333
+ return {
334
+ result: chatResult.content,
335
+ metered: true,
336
+ quotaExceeded: false,
337
+ latencyMs
338
+ };
339
+ }
340
+ // ─── Error Types ────────────────────────────────────────────────────────────
341
+ export class QuotaExceededError extends Error {
342
+ code = 'QUOTA_EXCEEDED';
343
+ meterSlug;
344
+ entityId;
345
+ constructor(meterSlug, entityId) {
346
+ super(`LLM quota exceeded for meter '${meterSlug}' on entity '${entityId}'. ` +
347
+ 'Upgrade your plan or wait for the next billing period.');
348
+ this.name = 'QuotaExceededError';
349
+ this.meterSlug = meterSlug;
350
+ this.entityId = entityId;
351
+ }
352
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Agent Discovery
3
+ *
4
+ * Discovers agent tables by querying the agent_chat_module config table
5
+ * at runtime. The module stores schema_id, table names, and table IDs
6
+ * when provisioned — no smart tags needed.
7
+ *
8
+ * Results are cached per-database with a TTL so the REST middleware
9
+ * doesn't hit the database on every request.
10
+ */
11
+ import { Pool } from 'pg';
12
+ export interface AgentTableInfo {
13
+ /** The PostgreSQL schema name (e.g. 'agent_public') */
14
+ schemaName: string;
15
+ /** The table name (e.g. 'agent_thread') */
16
+ tableName: string;
17
+ }
18
+ export interface AgentDiscovery {
19
+ thread: AgentTableInfo | null;
20
+ message: AgentTableInfo | null;
21
+ task: AgentTableInfo | null;
22
+ }
23
+ /** Clear all cached discovery results (for testing) */
24
+ export declare function clearAgentDiscoveryCache(): void;
25
+ /**
26
+ * Look up agent table info for a database, querying the module config table.
27
+ * Results are cached per-database with a 60s TTL.
28
+ */
29
+ export declare function getAgentDiscovery(pool: Pool, dbname: string): Promise<AgentDiscovery | null>;
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Agent Discovery
3
+ *
4
+ * Discovers agent tables by querying the agent_chat_module config table
5
+ * at runtime. The module stores schema_id, table names, and table IDs
6
+ * when provisioned — no smart tags needed.
7
+ *
8
+ * Results are cached per-database with a TTL so the REST middleware
9
+ * doesn't hit the database on every request.
10
+ */
11
+ import { ModuleConfigCache } from 'graphile-cache';
12
+ // ─── Cache ──────────────────────────────────────────────────────────────────
13
+ const agentDiscoveryCache = new ModuleConfigCache({
14
+ name: 'agent-discovery',
15
+ ttlMs: 60_000
16
+ });
17
+ /** Clear all cached discovery results (for testing) */
18
+ export function clearAgentDiscoveryCache() {
19
+ agentDiscoveryCache.clear();
20
+ }
21
+ // ─── Discovery Query ────────────────────────────────────────────────────────
22
+ const DISCOVERY_SQL = `
23
+ SELECT
24
+ s.schema_name,
25
+ acm.thread_table_name,
26
+ acm.message_table_name,
27
+ acm.task_table_name
28
+ FROM metaschema_modules_public.agent_chat_module acm
29
+ JOIN metaschema_public.schema s ON s.id = acm.schema_id
30
+ LIMIT 1
31
+ `;
32
+ /**
33
+ * Look up agent table info for a database, querying the module config table.
34
+ * Results are cached per-database with a 60s TTL.
35
+ */
36
+ export async function getAgentDiscovery(pool, dbname) {
37
+ const cached = agentDiscoveryCache.get(dbname);
38
+ if (cached !== undefined) {
39
+ return cached;
40
+ }
41
+ let discovery = null;
42
+ try {
43
+ const { rows } = await pool.query(DISCOVERY_SQL);
44
+ if (rows.length > 0) {
45
+ const row = rows[0];
46
+ const schemaName = row.schema_name;
47
+ discovery = {
48
+ thread: row.thread_table_name
49
+ ? { schemaName, tableName: row.thread_table_name }
50
+ : null,
51
+ message: row.message_table_name
52
+ ? { schemaName, tableName: row.message_table_name }
53
+ : null,
54
+ task: row.task_table_name
55
+ ? { schemaName, tableName: row.task_table_name }
56
+ : null
57
+ };
58
+ }
59
+ }
60
+ catch {
61
+ // Module table doesn't exist in this database — not provisioned
62
+ }
63
+ agentDiscoveryCache.set(dbname, discovery);
64
+ return discovery;
65
+ }
@@ -2,7 +2,8 @@
2
2
  * LlmModulePlugin
3
3
  *
4
4
  * Detects and loads the `llm_module` configuration from `services_public.api_modules`.
5
- * Makes the resolved embedder available to other plugins via the build context.
5
+ * Makes the resolved embedder and chat completer available to other plugins
6
+ * via the build context.
6
7
  *
7
8
  * This plugin is the foundation that enables per-database LLM configuration.
8
9
  * When an API has an `llm_module` configured, the embedder is resolved and
@@ -14,9 +15,13 @@
14
15
  * 2. `defaultEmbedder` from preset options (dev/testing fallback)
15
16
  * 3. Environment variables (EMBEDDER_PROVIDER, EMBEDDER_MODEL, EMBEDDER_BASE_URL)
16
17
  * 4. null — LLM features are disabled
18
+ *
19
+ * This plugin is intentionally pure — no billing or metering logic.
20
+ * The optional LlmMeteringPlugin wraps the embedder with billing integration
21
+ * if loaded (it runs after this plugin and before the consumer plugins).
17
22
  */
18
23
  import type { GraphileConfig } from 'graphile-config';
19
- import type { EmbedderFunction, ChatFunction, GraphileLlmOptions } from '../types';
24
+ import type { ChatFunction, EmbedderFunction, GraphileLlmOptions } from '../types';
20
25
  declare global {
21
26
  namespace GraphileBuild {
22
27
  interface Build {
@@ -24,6 +29,10 @@ declare global {
24
29
  llmEmbedder: EmbedderFunction | null;
25
30
  /** The resolved chat completion function, or null if not configured */
26
31
  llmChatCompleter: ChatFunction | null;
32
+ /** The embedding model name (used as billing meter slug) */
33
+ llmEmbeddingModel: string | null;
34
+ /** The chat model name (used as billing meter slug) */
35
+ llmChatModel: string | null;
27
36
  }
28
37
  }
29
38
  namespace GraphileConfig {
@@ -2,7 +2,8 @@
2
2
  * LlmModulePlugin
3
3
  *
4
4
  * Detects and loads the `llm_module` configuration from `services_public.api_modules`.
5
- * Makes the resolved embedder available to other plugins via the build context.
5
+ * Makes the resolved embedder and chat completer available to other plugins
6
+ * via the build context.
6
7
  *
7
8
  * This plugin is the foundation that enables per-database LLM configuration.
8
9
  * When an API has an `llm_module` configured, the embedder is resolved and
@@ -14,9 +15,14 @@
14
15
  * 2. `defaultEmbedder` from preset options (dev/testing fallback)
15
16
  * 3. Environment variables (EMBEDDER_PROVIDER, EMBEDDER_MODEL, EMBEDDER_BASE_URL)
16
17
  * 4. null — LLM features are disabled
18
+ *
19
+ * This plugin is intentionally pure — no billing or metering logic.
20
+ * The optional LlmMeteringPlugin wraps the embedder with billing integration
21
+ * if loaded (it runs after this plugin and before the consumer plugins).
17
22
  */
18
- import { buildEmbedder, buildEmbedderFromEnv } from '../embedder';
19
23
  import { buildChatCompleter, buildChatCompleterFromEnv } from '../chat';
24
+ import { buildEmbedder, buildEmbedderFromEnv } from '../embedder';
25
+ import { getLlmEnvOptions } from '../env';
20
26
  /**
21
27
  * Creates the LlmModulePlugin with the given options.
22
28
  */
@@ -24,7 +30,7 @@ export function createLlmModulePlugin(options = {}) {
24
30
  const { defaultEmbedder, defaultChatCompleter } = options;
25
31
  return {
26
32
  name: 'LlmModulePlugin',
27
- version: '0.1.0',
33
+ version: '0.2.0',
28
34
  description: 'Resolves LLM embedder and chat completer configuration and makes them available to other plugins',
29
35
  schema: {
30
36
  hooks: {
@@ -74,9 +80,11 @@ export function createLlmModulePlugin(options = {}) {
74
80
  return build.extend(build, {
75
81
  llmEmbedder: embedder,
76
82
  llmChatCompleter: chat,
77
- }, 'LlmModulePlugin adding llmEmbedder and llmChatCompleter to build');
78
- },
79
- },
80
- },
83
+ llmEmbeddingModel: defaultEmbedder?.model ?? getLlmEnvOptions().embedding.model,
84
+ llmChatModel: defaultChatCompleter?.model ?? getLlmEnvOptions().chat.model
85
+ }, 'LlmModulePlugin adding llmEmbedder, llmChatCompleter, and model names to build');
86
+ }
87
+ }
88
+ }
81
89
  };
82
90
  }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * LlmMeteringPlugin
3
+ *
4
+ * Opt-in billing integration for graphile-llm. Completely separate from the
5
+ * pure LLM plugins (text-search, text-mutation, rag).
6
+ *
7
+ * **How it works:**
8
+ * 1. At schema build time, replaces `build.llmEmbedder` with a metered wrapper
9
+ * that has the same `(text: string) => Promise<number[]>` signature
10
+ * 2. At request time, wraps every root query/mutation resolver to set up a
11
+ * request-scoped MeteringContext via AsyncLocalStorage
12
+ * 3. When the embedder is called (by any plugin), the wrapper checks
13
+ * AsyncLocalStorage for a metering context and if found, calls
14
+ * check_billing_quota before and record_usage after
15
+ * 4. If quota is exceeded, the wrapper returns null — the calling plugin sees
16
+ * null and handles it (search falls back to text-only, mutations throw)
17
+ *
18
+ * The pure plugins never import metering, config-cache, or billing types.
19
+ * They call the embedder and handle null results — that's it.
20
+ *
21
+ * **Entity ID resolution:**
22
+ * The billing `entity_id` is resolved via a configurable callback.
23
+ * Default: reads `jwt.claims.user_id` from pgSettings. Override via
24
+ * `metering.resolveEntityId` in GraphileLlmPreset options.
25
+ *
26
+ * **Graceful behavior:**
27
+ * - billing_module not provisioned → embedder passes through unmetered
28
+ * - entity_id not available → embedder passes through unmetered
29
+ * - check_billing_quota throws → call is allowed (billing is opt-in)
30
+ * - record_usage throws → call succeeds, recording silently skipped
31
+ * - quota exceeded → embedder returns null
32
+ */
33
+ import type { GraphileConfig } from 'graphile-config';
34
+ import type { MeteringConfig } from '../types';
35
+ declare global {
36
+ namespace GraphileConfig {
37
+ interface Plugins {
38
+ LlmMeteringPlugin: true;
39
+ }
40
+ }
41
+ }
42
+ export declare function createLlmMeteringPlugin(meteringConfig?: MeteringConfig): GraphileConfig.Plugin;