graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/__tests__/graphile-llm.test.js +87 -71
  2. package/chat.d.ts +5 -5
  3. package/chat.js +45 -43
  4. package/config-cache.d.ts +77 -0
  5. package/config-cache.js +148 -0
  6. package/embedder.d.ts +5 -5
  7. package/embedder.js +11 -17
  8. package/env.d.ts +31 -0
  9. package/env.js +52 -0
  10. package/esm/__tests__/graphile-llm.test.js +87 -71
  11. package/esm/chat.d.ts +5 -5
  12. package/esm/chat.js +45 -40
  13. package/esm/config-cache.d.ts +77 -0
  14. package/esm/config-cache.js +143 -0
  15. package/esm/embedder.d.ts +5 -5
  16. package/esm/embedder.js +11 -17
  17. package/esm/env.d.ts +31 -0
  18. package/esm/env.js +49 -0
  19. package/esm/index.d.ts +14 -5
  20. package/esm/index.js +11 -5
  21. package/esm/metering.d.ts +114 -0
  22. package/esm/metering.js +352 -0
  23. package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
  24. package/esm/plugins/agent-discovery-plugin.js +65 -0
  25. package/esm/plugins/llm-module-plugin.d.ts +11 -2
  26. package/esm/plugins/llm-module-plugin.js +15 -7
  27. package/esm/plugins/metering-plugin.d.ts +42 -0
  28. package/esm/plugins/metering-plugin.js +175 -0
  29. package/esm/plugins/rag-plugin.js +20 -20
  30. package/esm/plugins/text-mutation-plugin.d.ts +4 -0
  31. package/esm/plugins/text-mutation-plugin.js +23 -13
  32. package/esm/plugins/text-search-plugin.d.ts +4 -0
  33. package/esm/plugins/text-search-plugin.js +23 -11
  34. package/esm/preset.d.ts +21 -1
  35. package/esm/preset.js +33 -6
  36. package/esm/types.d.ts +86 -10
  37. package/index.d.ts +14 -5
  38. package/index.js +25 -8
  39. package/metering.d.ts +114 -0
  40. package/metering.js +359 -0
  41. package/package.json +15 -15
  42. package/plugins/agent-discovery-plugin.d.ts +29 -0
  43. package/plugins/agent-discovery-plugin.js +69 -0
  44. package/plugins/llm-module-plugin.d.ts +11 -2
  45. package/plugins/llm-module-plugin.js +15 -7
  46. package/plugins/metering-plugin.d.ts +42 -0
  47. package/plugins/metering-plugin.js +178 -0
  48. package/plugins/rag-plugin.js +20 -20
  49. package/plugins/text-mutation-plugin.d.ts +4 -0
  50. package/plugins/text-mutation-plugin.js +23 -13
  51. package/plugins/text-search-plugin.d.ts +4 -0
  52. package/plugins/text-search-plugin.js +23 -11
  53. package/preset.d.ts +21 -1
  54. package/preset.js +33 -6
  55. package/types.d.ts +86 -10
@@ -0,0 +1,143 @@
1
+ /**
2
+ * config-cache — Per-database LLM billing configuration cache
3
+ *
4
+ * Caches resolved billing function names per database_id.
5
+ * Uses an LRU cache with TTL so config changes propagate within a bounded window
6
+ * without requiring a server restart.
7
+ *
8
+ * Resolution flow:
9
+ * Billing config from `metaschema_modules_public.billing_module`
10
+ * (schema name + function names for record_usage, check_billing_quota)
11
+ *
12
+ * All queries run through the Graphile `withPgClient` callback, which gives us
13
+ * a client connected to the tenant database with proper role settings.
14
+ *
15
+ * The LLM module config (provider, model, etc.) is already resolved by the
16
+ * LlmModulePlugin at schema-build time. This cache handles the runtime-only
17
+ * billing piece.
18
+ */
19
+ import { ModuleConfigCache } from 'graphile-cache';
20
+ // ─── SQL Queries ────────────────────────────────────────────────────────────
21
+ /**
22
+ * Check if the billing_module table exists before querying it.
23
+ * This prevents hard errors on databases that don't have the billing
24
+ * module provisioned (the metaschema_modules_public schema or the
25
+ * billing_module table might not exist at all).
26
+ */
27
+ const BILLING_MODULE_SQL = `
28
+ SELECT
29
+ s.schema_name AS public_schema,
30
+ ps.schema_name AS private_schema,
31
+ bm.record_usage_function
32
+ FROM metaschema_modules_public.billing_module bm
33
+ JOIN metaschema_public.schema s ON bm.schema_id = s.id
34
+ JOIN metaschema_public.schema ps ON bm.private_schema_id = ps.id
35
+ WHERE bm.database_id = $1
36
+ LIMIT 1
37
+ `;
38
+ /**
39
+ * Resolve the inference log module's schema and table name.
40
+ */
41
+ const INFERENCE_LOG_MODULE_SQL = `
42
+ SELECT
43
+ s.schema_name AS schema,
44
+ ilm.inference_log_table_name AS table_name
45
+ FROM metaschema_modules_public.inference_log_module ilm
46
+ JOIN metaschema_public.schema s ON ilm.schema_id = s.id
47
+ WHERE ilm.database_id = $1
48
+ LIMIT 1
49
+ `;
50
+ // ─── Cache ──────────────────────────────────────────────────────────────────
51
+ const billingCache = new ModuleConfigCache({
52
+ name: 'billing-config',
53
+ ttlMs: 5 * 60 * 1000, // 5 minutes
54
+ max: 50
55
+ });
56
+ // ─── Resolution Functions ───────────────────────────────────────────────────
57
+ /**
58
+ * SQL to check if a schema exists. Used as a guard before querying
59
+ * metaschema tables that may not be provisioned.
60
+ */
61
+ const SCHEMA_EXISTS_SQL = `
62
+ SELECT 1 FROM information_schema.schemata WHERE schema_name = $1 LIMIT 1
63
+ `;
64
+ async function resolveInferenceLogConfig(pgClient, databaseId) {
65
+ try {
66
+ const schemaCheck = await pgClient.query(SCHEMA_EXISTS_SQL, ['metaschema_modules_public']);
67
+ if (schemaCheck.rows.length === 0)
68
+ return null;
69
+ const result = await pgClient.query(INFERENCE_LOG_MODULE_SQL, [databaseId]);
70
+ const row = result.rows[0];
71
+ if (!row?.schema || !row?.table_name)
72
+ return null;
73
+ return {
74
+ schema: row.schema,
75
+ tableName: row.table_name
76
+ };
77
+ }
78
+ catch {
79
+ return null;
80
+ }
81
+ }
82
+ async function resolveBillingConfig(pgClient, databaseId) {
83
+ try {
84
+ // Guard: check if the metaschema_modules_public schema exists.
85
+ // If the database doesn't have the billing module provisioned,
86
+ // this schema (or the billing_module table) won't exist.
87
+ const schemaCheck = await pgClient.query(SCHEMA_EXISTS_SQL, ['metaschema_modules_public']);
88
+ if (schemaCheck.rows.length === 0)
89
+ return null;
90
+ const result = await pgClient.query(BILLING_MODULE_SQL, [databaseId]);
91
+ const row = result.rows[0];
92
+ if (!row?.record_usage_function)
93
+ return null;
94
+ return {
95
+ publicSchema: row.public_schema,
96
+ privateSchema: row.private_schema,
97
+ recordUsageFunction: row.record_usage_function,
98
+ // The check_billing_quota function name follows the inflection pattern
99
+ checkBillingQuotaFunction: 'check_billing_quota'
100
+ };
101
+ }
102
+ catch {
103
+ // Schema/table doesn't exist or query failed — billing not available
104
+ return null;
105
+ }
106
+ }
107
+ // ─── Public API ─────────────────────────────────────────────────────────────
108
+ /**
109
+ * Resolve billing config for a database.
110
+ * Results are cached per database_id with a 5-minute TTL.
111
+ *
112
+ * @param pgClient - A client connected to the tenant database (from withPgClient)
113
+ * @param databaseId - The database UUID
114
+ */
115
+ export async function getLlmBillingConfig(pgClient, databaseId) {
116
+ const cached = billingCache.get(databaseId);
117
+ if (cached)
118
+ return cached;
119
+ const [billing, inferenceLog] = await Promise.all([
120
+ resolveBillingConfig(pgClient, databaseId),
121
+ resolveInferenceLogConfig(pgClient, databaseId)
122
+ ]);
123
+ const entry = { billing, inferenceLog };
124
+ billingCache.set(databaseId, entry);
125
+ return entry;
126
+ }
127
+ /**
128
+ * Invalidate the cached config for a specific database (or all).
129
+ */
130
+ export function invalidateLlmBillingConfig(databaseId) {
131
+ if (databaseId) {
132
+ billingCache.delete(databaseId);
133
+ }
134
+ else {
135
+ billingCache.clear();
136
+ }
137
+ }
138
+ /**
139
+ * Get cache stats for diagnostics.
140
+ */
141
+ export function getLlmBillingCacheStats() {
142
+ return { size: billingCache.size, max: 50 };
143
+ }
package/esm/embedder.d.ts CHANGED
@@ -24,12 +24,12 @@ export declare function buildEmbedder(config: EmbedderConfig): EmbedderFunction
24
24
  */
25
25
  export declare function buildEmbedderFromModule(data: LlmModuleData): EmbedderFunction | null;
26
26
  /**
27
- * Resolve an embedder from environment variables via getEnvOptions().
27
+ * Resolve an embedder from environment variables.
28
28
  * This is a fallback for development when no llm_module or defaultEmbedder is configured.
29
29
  *
30
- * Environment variables (parsed by @constructive-io/graphql-env):
31
- * EMBEDDER_PROVIDER - Provider name ('ollama')
32
- * EMBEDDER_MODEL - Model identifier
33
- * EMBEDDER_BASE_URL - Provider base URL
30
+ * Environment variables (with defaults from env.ts):
31
+ * EMBEDDER_PROVIDER - Provider name (default: 'ollama')
32
+ * EMBEDDER_MODEL - Model identifier (default: 'nomic-embed-text')
33
+ * EMBEDDER_BASE_URL - Provider base URL (default: 'http://localhost:11434')
34
34
  */
35
35
  export declare function buildEmbedderFromEnv(): EmbedderFunction | null;
package/esm/embedder.js CHANGED
@@ -10,10 +10,12 @@
10
10
  * 3. Environment variables (EMBEDDER_PROVIDER, EMBEDDER_MODEL, EMBEDDER_BASE_URL)
11
11
  */
12
12
  import OllamaClient from '@agentic-kit/ollama';
13
- import { getEnvOptions } from '@constructive-io/graphql-env';
13
+ import { getLlmEnvOptions } from './env';
14
14
  // ─── Built-in Providers ─────────────────────────────────────────────────────
15
15
  /**
16
16
  * Create an Ollama-based embedder function.
17
+ *
18
+ * Uses the /api/embed endpoint which returns prompt_eval_count (real token count).
17
19
  */
18
20
  function createOllamaEmbedder(baseUrl = 'http://localhost:11434', model = 'nomic-embed-text') {
19
21
  const client = new OllamaClient(baseUrl);
@@ -47,27 +49,19 @@ export function buildEmbedderFromModule(data) {
47
49
  return buildEmbedder({
48
50
  provider: data.embedding_provider,
49
51
  model: data.embedding_model,
50
- baseUrl: data.embedding_base_url,
51
- apiKey: data.api_key_ref,
52
+ baseUrl: data.embedding_base_url
52
53
  });
53
54
  }
54
55
  /**
55
- * Resolve an embedder from environment variables via getEnvOptions().
56
+ * Resolve an embedder from environment variables.
56
57
  * This is a fallback for development when no llm_module or defaultEmbedder is configured.
57
58
  *
58
- * Environment variables (parsed by @constructive-io/graphql-env):
59
- * EMBEDDER_PROVIDER - Provider name ('ollama')
60
- * EMBEDDER_MODEL - Model identifier
61
- * EMBEDDER_BASE_URL - Provider base URL
59
+ * Environment variables (with defaults from env.ts):
60
+ * EMBEDDER_PROVIDER - Provider name (default: 'ollama')
61
+ * EMBEDDER_MODEL - Model identifier (default: 'nomic-embed-text')
62
+ * EMBEDDER_BASE_URL - Provider base URL (default: 'http://localhost:11434')
62
63
  */
63
64
  export function buildEmbedderFromEnv() {
64
- const { llm } = getEnvOptions();
65
- const provider = llm?.embedder?.provider;
66
- if (!provider)
67
- return null;
68
- return buildEmbedder({
69
- provider,
70
- model: llm?.embedder?.model,
71
- baseUrl: llm?.embedder?.baseUrl,
72
- });
65
+ const { embedding } = getLlmEnvOptions();
66
+ return buildEmbedder(embedding);
73
67
  }
package/esm/env.d.ts ADDED
@@ -0,0 +1,31 @@
1
+ /**
2
+ * LLM Environment Configuration
3
+ *
4
+ * Single source of truth for all LLM-related environment variables and defaults.
5
+ * Every other module in graphile-llm imports from here — no direct process.env
6
+ * reads or scattered null coalescing elsewhere.
7
+ *
8
+ * Environment variables:
9
+ * EMBEDDER_PROVIDER - Embedding provider name ('ollama')
10
+ * EMBEDDER_MODEL - Embedding model (default: 'nomic-embed-text')
11
+ * EMBEDDER_BASE_URL - Embedding provider URL (default: 'http://localhost:11434')
12
+ * CHAT_PROVIDER - Chat provider name ('ollama')
13
+ * CHAT_MODEL - Chat model (default: 'llama3')
14
+ * CHAT_BASE_URL - Chat provider URL (default: 'http://localhost:11434')
15
+ */
16
+ export interface LlmProviderConfig {
17
+ provider: string;
18
+ model: string;
19
+ baseUrl: string;
20
+ }
21
+ export interface LlmEnvOptions {
22
+ embedding: LlmProviderConfig;
23
+ chat: LlmProviderConfig;
24
+ }
25
+ /**
26
+ * Resolve LLM configuration from environment variables with sensible defaults.
27
+ *
28
+ * Call this once and pass the result around — never read process.env directly
29
+ * in plugin code.
30
+ */
31
+ export declare function getLlmEnvOptions(): LlmEnvOptions;
package/esm/env.js ADDED
@@ -0,0 +1,49 @@
1
+ /**
2
+ * LLM Environment Configuration
3
+ *
4
+ * Single source of truth for all LLM-related environment variables and defaults.
5
+ * Every other module in graphile-llm imports from here — no direct process.env
6
+ * reads or scattered null coalescing elsewhere.
7
+ *
8
+ * Environment variables:
9
+ * EMBEDDER_PROVIDER - Embedding provider name ('ollama')
10
+ * EMBEDDER_MODEL - Embedding model (default: 'nomic-embed-text')
11
+ * EMBEDDER_BASE_URL - Embedding provider URL (default: 'http://localhost:11434')
12
+ * CHAT_PROVIDER - Chat provider name ('ollama')
13
+ * CHAT_MODEL - Chat model (default: 'llama3')
14
+ * CHAT_BASE_URL - Chat provider URL (default: 'http://localhost:11434')
15
+ */
16
+ // ─── Defaults ───────────────────────────────────────────────────────────────
17
+ const LLM_DEFAULTS = {
18
+ embedding: {
19
+ provider: 'ollama',
20
+ model: 'nomic-embed-text',
21
+ baseUrl: 'http://localhost:11434'
22
+ },
23
+ chat: {
24
+ provider: 'ollama',
25
+ model: 'llama3',
26
+ baseUrl: 'http://localhost:11434'
27
+ }
28
+ };
29
+ // ─── Resolution ─────────────────────────────────────────────────────────────
30
+ /**
31
+ * Resolve LLM configuration from environment variables with sensible defaults.
32
+ *
33
+ * Call this once and pass the result around — never read process.env directly
34
+ * in plugin code.
35
+ */
36
+ export function getLlmEnvOptions() {
37
+ return {
38
+ embedding: {
39
+ provider: process.env.EMBEDDER_PROVIDER ?? LLM_DEFAULTS.embedding.provider,
40
+ model: process.env.EMBEDDER_MODEL ?? LLM_DEFAULTS.embedding.model,
41
+ baseUrl: process.env.EMBEDDER_BASE_URL ?? LLM_DEFAULTS.embedding.baseUrl
42
+ },
43
+ chat: {
44
+ provider: process.env.CHAT_PROVIDER ?? LLM_DEFAULTS.chat.provider,
45
+ model: process.env.CHAT_MODEL ?? LLM_DEFAULTS.chat.model,
46
+ baseUrl: process.env.CHAT_BASE_URL ?? LLM_DEFAULTS.chat.baseUrl
47
+ }
48
+ };
49
+ }
package/esm/index.d.ts CHANGED
@@ -29,11 +29,20 @@
29
29
  * };
30
30
  * ```
31
31
  */
32
+ export type { LlmEnvOptions, LlmProviderConfig } from './env';
33
+ export { getLlmEnvOptions } from './env';
32
34
  export { GraphileLlmPreset } from './preset';
33
35
  export { createLlmModulePlugin } from './plugins/llm-module-plugin';
34
- export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
35
- export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
36
36
  export { createLlmRagPlugin } from './plugins/rag-plugin';
37
- export { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from './embedder';
38
- export { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from './chat';
39
- export type { EmbedderFunction, EmbedderConfig, ChatFunction, ChatConfig, ChatMessage, ChatOptions, LlmModuleData, GraphileLlmOptions, RagDefaults, ChunkTableInfo, } from './types';
37
+ export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
38
+ export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
39
+ export { createLlmMeteringPlugin } from './plugins/metering-plugin';
40
+ export type { AgentDiscovery, AgentTableInfo } from './plugins/agent-discovery-plugin';
41
+ export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
42
+ export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
43
+ export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
44
+ export type { InferenceLogEntry, MeteringContext, MeteringOptions, MeterResult, WithPgClient } from './metering';
45
+ export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
46
+ export type { BillingConfig, InferenceLogConfig, LlmBillingCacheEntry, PgClient } from './config-cache';
47
+ export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';
48
+ export type { ChatConfig, ChatFunction, ChatMessage, ChatOptions, ChatResult, ChunkTableInfo, EmbedderConfig, EmbedderFunction, EmbeddingResult, GraphileLlmOptions, LlmModuleData, LlmUsage, MeteringConfig, RagDefaults } from './types';
package/esm/index.js CHANGED
@@ -29,14 +29,20 @@
29
29
  * };
30
30
  * ```
31
31
  */
32
+ export { getLlmEnvOptions } from './env';
32
33
  // Preset (recommended entry point)
33
34
  export { GraphileLlmPreset } from './preset';
34
- // Individual plugins
35
+ // Individual plugins (pure — no billing dependency)
35
36
  export { createLlmModulePlugin } from './plugins/llm-module-plugin';
36
- export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
37
- export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
38
37
  export { createLlmRagPlugin } from './plugins/rag-plugin';
38
+ export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
39
+ export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
40
+ // Metering plugin (opt-in billing integration)
41
+ export { createLlmMeteringPlugin } from './plugins/metering-plugin';
42
+ export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
39
43
  // Embedder utilities
40
- export { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from './embedder';
44
+ export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
41
45
  // Chat completion utilities
42
- export { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from './chat';
46
+ export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
47
+ export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
48
+ export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';
@@ -0,0 +1,114 @@
1
+ /**
2
+ * metering — Billing-aware wrappers for embedder and chat functions
3
+ *
4
+ * Wraps EmbedderFunction and ChatFunction with:
5
+ * 1. Pre-check: `check_billing_quota(meter_slug, entity_id, estimated_amount)`
6
+ * 2. Execute the underlying function
7
+ * 3. Post-record: `record_usage(meter_slug, entity_id, actual_amount)`
8
+ *
9
+ * When the quota check fails, the wrapper returns null (graceful degradation)
10
+ * instead of throwing, so the search pipeline can fall back to text-only.
11
+ *
12
+ * Token counts:
13
+ * - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
14
+ * - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
15
+ *
16
+ * The billing functions live in the tenant database and are called via the
17
+ * Graphile `withPgClient` callback. Function locations (schema, names) are
18
+ * resolved from `billing_module` metaschema and cached by `config-cache.ts`.
19
+ */
20
+ import type { BillingConfig, InferenceLogConfig, PgClient } from './config-cache';
21
+ import type { ChatFunction, ChatMessage, ChatOptions, EmbedderFunction } from './types';
22
+ /**
23
+ * Callback matching Graphile's withPgClient signature.
24
+ * Acquires a pg client, calls the callback, then releases the client.
25
+ */
26
+ export type WithPgClient = (pgSettings: Record<string, string>, callback: (pgClient: PgClient) => Promise<void>) => Promise<void>;
27
+ export interface MeteringContext {
28
+ /** Callback to acquire a tenant database client */
29
+ withPgClient: WithPgClient;
30
+ /** pgSettings from the GraphQL context (for role/claims) */
31
+ pgSettings: Record<string, string>;
32
+ /** Billing function references from the billing_module */
33
+ billing: BillingConfig;
34
+ /** Entity ID to meter against (from JWT claims) */
35
+ entityId: string;
36
+ /** Per-request correlation ID (from request.id pgSetting) */
37
+ requestId: string | null;
38
+ /** Database UUID from JWT claims */
39
+ databaseId: string;
40
+ /** Actor (user) ID from JWT claims */
41
+ actorId: string | null;
42
+ /** Inference log table config (null if inference_log_module not provisioned) */
43
+ inferenceLog: InferenceLogConfig | null;
44
+ }
45
+ export interface MeteringOptions {
46
+ /** Meter slug for embedding operations (default: model name from build config) */
47
+ embeddingMeterSlug?: string;
48
+ /** Meter slug for chat completion operations (default: model name from build config) */
49
+ chatMeterSlug?: string;
50
+ /** Whether to skip metering entirely (e.g. for local dev). Default: false */
51
+ skipMetering?: boolean;
52
+ /** Embedding model name (for inference log) */
53
+ embeddingModel?: string;
54
+ /** Chat model name (for inference log) */
55
+ chatModel?: string;
56
+ /** Provider name (for inference log) */
57
+ provider?: string;
58
+ }
59
+ export interface MeterResult<T> {
60
+ /** The result from the underlying function, or null if quota exceeded */
61
+ result: T | null;
62
+ /** Whether the call was metered */
63
+ metered: boolean;
64
+ /** Whether the call was skipped due to quota limits */
65
+ quotaExceeded: boolean;
66
+ /** Latency of the underlying function call in ms */
67
+ latencyMs: number;
68
+ }
69
+ export interface InferenceLogEntry {
70
+ databaseId: string;
71
+ entityId: string;
72
+ actorId: string | null;
73
+ model: string;
74
+ provider: string | null;
75
+ service: 'llm' | 'embedding' | 'tts' | 'stt' | 'ocr' | 'image_gen' | 'search' | 'compute';
76
+ operation: string;
77
+ inputTokens: number;
78
+ outputTokens: number;
79
+ totalTokens: number;
80
+ cacheReadTokens: number | null;
81
+ cacheWriteTokens: number | null;
82
+ latencyMs: number;
83
+ ragEnabled: boolean;
84
+ chunksRetrieved: number | null;
85
+ embeddingModel: string | null;
86
+ embeddingLatencyMs: number | null;
87
+ status: 'success' | 'quota_exceeded' | 'provider_error' | 'timeout';
88
+ errorType: string | null;
89
+ rawUsage: Record<string, unknown> | null;
90
+ }
91
+ /**
92
+ * Write a row to the usage_log_inference table.
93
+ * Gracefully skips if the inference_log_module is not provisioned.
94
+ *
95
+ * TODO: Also write to child (generated) database when dual-write is needed.
96
+ */
97
+ export declare function logInferenceUsage(ctx: MeteringContext, entry: InferenceLogEntry): Promise<void>;
98
+ /**
99
+ * Wrap an embedder with billing quota check + usage recording.
100
+ *
101
+ * The returned MeterResult contains `quotaExceeded: true` when the pre-check
102
+ * fails, enabling the caller to fall back to text-only search.
103
+ */
104
+ export declare function meteredEmbed(embedder: EmbedderFunction, text: string, ctx: MeteringContext | null, options?: MeteringOptions): Promise<MeterResult<number[]>>;
105
+ /**
106
+ * Wrap a chat completion call with billing quota check + usage recording.
107
+ */
108
+ export declare function meteredChat(chat: ChatFunction, messages: ChatMessage[], ctx: MeteringContext | null, chatOptions?: ChatOptions, meteringOptions?: MeteringOptions): Promise<MeterResult<string>>;
109
+ export declare class QuotaExceededError extends Error {
110
+ readonly code = "QUOTA_EXCEEDED";
111
+ readonly meterSlug: string;
112
+ readonly entityId: string;
113
+ constructor(meterSlug: string, entityId: string);
114
+ }