graphile-llm 0.7.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/graphile-llm.test.js +6 -4
- package/chat.d.ts +5 -5
- package/chat.js +8 -16
- package/config-cache.d.ts +77 -0
- package/config-cache.js +148 -0
- package/embedder.d.ts +5 -5
- package/embedder.js +8 -16
- package/env.d.ts +31 -0
- package/env.js +52 -0
- package/esm/__tests__/graphile-llm.test.js +6 -4
- package/esm/chat.d.ts +5 -5
- package/esm/chat.js +8 -16
- package/esm/config-cache.d.ts +77 -0
- package/esm/config-cache.js +143 -0
- package/esm/embedder.d.ts +5 -5
- package/esm/embedder.js +8 -16
- package/esm/env.d.ts +31 -0
- package/esm/env.js +49 -0
- package/esm/index.d.ts +10 -1
- package/esm/index.js +11 -1
- package/esm/metering.d.ts +114 -0
- package/esm/metering.js +358 -0
- package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
- package/esm/plugins/agent-discovery-plugin.js +65 -0
- package/esm/plugins/llm-module-plugin.d.ts +10 -1
- package/esm/plugins/llm-module-plugin.js +11 -3
- package/esm/plugins/metering-plugin.d.ts +42 -0
- package/esm/plugins/metering-plugin.js +175 -0
- package/esm/plugins/text-mutation-plugin.d.ts +4 -0
- package/esm/plugins/text-mutation-plugin.js +11 -1
- package/esm/plugins/text-search-plugin.d.ts +4 -0
- package/esm/plugins/text-search-plugin.js +13 -1
- package/esm/preset.d.ts +21 -1
- package/esm/preset.js +29 -2
- package/esm/types.d.ts +47 -6
- package/index.d.ts +10 -1
- package/index.js +23 -2
- package/metering.d.ts +114 -0
- package/metering.js +365 -0
- package/package.json +15 -15
- package/plugins/agent-discovery-plugin.d.ts +29 -0
- package/plugins/agent-discovery-plugin.js +69 -0
- package/plugins/llm-module-plugin.d.ts +10 -1
- package/plugins/llm-module-plugin.js +11 -3
- package/plugins/metering-plugin.d.ts +42 -0
- package/plugins/metering-plugin.js +178 -0
- package/plugins/text-mutation-plugin.d.ts +4 -0
- package/plugins/text-mutation-plugin.js +11 -1
- package/plugins/text-search-plugin.d.ts +4 -0
- package/plugins/text-search-plugin.js +13 -1
- package/preset.d.ts +21 -1
- package/preset.js +29 -2
- package/types.d.ts +47 -6
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LlmMeteringPlugin
|
|
3
|
+
*
|
|
4
|
+
* Opt-in billing integration for graphile-llm. Completely separate from the
|
|
5
|
+
* pure LLM plugins (text-search, text-mutation, rag).
|
|
6
|
+
*
|
|
7
|
+
* **How it works:**
|
|
8
|
+
* 1. At schema build time, replaces `build.llmEmbedder` with a metered wrapper
|
|
9
|
+
* that has the same `(text: string) => Promise<number[]>` signature
|
|
10
|
+
* 2. At request time, wraps every root query/mutation resolver to set up a
|
|
11
|
+
* request-scoped MeteringContext via AsyncLocalStorage
|
|
12
|
+
* 3. When the embedder is called (by any plugin), the wrapper checks
|
|
13
|
+
* AsyncLocalStorage for a metering context and if found, calls
|
|
14
|
+
* check_billing_quota before and record_usage after
|
|
15
|
+
* 4. If quota is exceeded, the wrapper returns null — the calling plugin sees
|
|
16
|
+
* null and handles it (search falls back to text-only, mutations throw)
|
|
17
|
+
*
|
|
18
|
+
* The pure plugins never import metering, config-cache, or billing types.
|
|
19
|
+
* They call the embedder and handle null results — that's it.
|
|
20
|
+
*
|
|
21
|
+
* **Entity ID resolution:**
|
|
22
|
+
* The billing `entity_id` is resolved via a configurable callback.
|
|
23
|
+
* Default: reads `jwt.claims.user_id` from pgSettings. Override via
|
|
24
|
+
* `metering.resolveEntityId` in GraphileLlmPreset options.
|
|
25
|
+
*
|
|
26
|
+
* **Graceful behavior:**
|
|
27
|
+
* - billing_module not provisioned → embedder passes through unmetered
|
|
28
|
+
* - entity_id not available → embedder passes through unmetered
|
|
29
|
+
* - check_billing_quota throws → call is allowed (billing is opt-in)
|
|
30
|
+
* - record_usage throws → call succeeds, recording silently skipped
|
|
31
|
+
* - quota exceeded → embedder returns null
|
|
32
|
+
*/
|
|
33
|
+
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
34
|
+
import { meteredEmbed } from '../metering';
|
|
35
|
+
import { getLlmBillingConfig } from '../config-cache';
|
|
36
|
+
// ─── Request-scoped context via AsyncLocalStorage ───────────────────────────
|
|
37
|
+
const meteringStore = new AsyncLocalStorage();
|
|
38
|
+
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
39
|
+
function defaultResolveEntityId(pgSettings) {
|
|
40
|
+
return pgSettings['jwt.claims.user_id'] ?? null;
|
|
41
|
+
}
|
|
42
|
+
async function buildMeteringContext(graphqlContext, resolveEntityId) {
|
|
43
|
+
const pgSettings = graphqlContext?.pgSettings ?? {};
|
|
44
|
+
const entityId = resolveEntityId(pgSettings);
|
|
45
|
+
const databaseId = pgSettings['jwt.claims.database_id'] ?? null;
|
|
46
|
+
const requestId = pgSettings['request.id'] ?? null;
|
|
47
|
+
const actorId = pgSettings['jwt.claims.user_id'] ?? null;
|
|
48
|
+
if (!entityId || !databaseId)
|
|
49
|
+
return null;
|
|
50
|
+
const withPgClient = graphqlContext?.withPgClient;
|
|
51
|
+
if (!withPgClient)
|
|
52
|
+
return null;
|
|
53
|
+
let billingConfig = null;
|
|
54
|
+
let inferenceLogConfig = null;
|
|
55
|
+
try {
|
|
56
|
+
await withPgClient(pgSettings, async (pgClient) => {
|
|
57
|
+
const entry = await getLlmBillingConfig(pgClient, databaseId);
|
|
58
|
+
billingConfig = entry.billing;
|
|
59
|
+
inferenceLogConfig = entry.inferenceLog;
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
if (!billingConfig)
|
|
66
|
+
return null;
|
|
67
|
+
return {
|
|
68
|
+
withPgClient,
|
|
69
|
+
pgSettings,
|
|
70
|
+
billing: billingConfig,
|
|
71
|
+
entityId,
|
|
72
|
+
requestId,
|
|
73
|
+
databaseId,
|
|
74
|
+
actorId,
|
|
75
|
+
inferenceLog: inferenceLogConfig,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Wrap an embedder with metering that reads context from AsyncLocalStorage.
|
|
80
|
+
* The returned function has the same signature as the original embedder,
|
|
81
|
+
* so downstream plugins are unaware of billing.
|
|
82
|
+
*
|
|
83
|
+
* When no metering context is in scope, the original embedder is called directly.
|
|
84
|
+
* When quota is exceeded, returns null instead of a vector.
|
|
85
|
+
*/
|
|
86
|
+
function wrapEmbedderWithMetering(embedder, meteringOptions) {
|
|
87
|
+
return async (text) => {
|
|
88
|
+
const ctx = meteringStore.getStore();
|
|
89
|
+
if (!ctx) {
|
|
90
|
+
// No metering context in scope — call original embedder directly
|
|
91
|
+
const startTime = Date.now();
|
|
92
|
+
const result = await embedder(text);
|
|
93
|
+
const latencyMs = Date.now() - startTime;
|
|
94
|
+
console.log(`[graphile-llm] Embed (unmetered): dims=${result?.length ?? 0}, latency=${latencyMs}ms`);
|
|
95
|
+
return result;
|
|
96
|
+
}
|
|
97
|
+
const result = await meteredEmbed(embedder, text, ctx, meteringOptions);
|
|
98
|
+
if (result.quotaExceeded) {
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
return result.result;
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
// ─── Plugin ─────────────────────────────────────────────────────────────────
|
|
105
|
+
export function createLlmMeteringPlugin(meteringConfig = {}) {
|
|
106
|
+
const { embeddingMeterSlug: configEmbeddingSlug, chatMeterSlug: configChatSlug, skipMetering, resolveEntityId = defaultResolveEntityId, } = meteringConfig;
|
|
107
|
+
return {
|
|
108
|
+
name: 'LlmMeteringPlugin',
|
|
109
|
+
version: '0.2.0',
|
|
110
|
+
description: 'Wraps LLM embedder/chat with billing quota checks and usage recording',
|
|
111
|
+
after: ['LlmModulePlugin'],
|
|
112
|
+
before: ['LlmTextSearchPlugin', 'LlmTextMutationPlugin', 'LlmRagPlugin'],
|
|
113
|
+
schema: {
|
|
114
|
+
hooks: {
|
|
115
|
+
build(build) {
|
|
116
|
+
const originalEmbedder = build.llmEmbedder;
|
|
117
|
+
if (!originalEmbedder) {
|
|
118
|
+
console.log('[graphile-llm] Metering plugin loaded but no embedder configured — skipping');
|
|
119
|
+
return build;
|
|
120
|
+
}
|
|
121
|
+
// Meter slug = model name by default (three-level waterfall: model → inference → universal)
|
|
122
|
+
const embeddingModel = build.llmEmbeddingModel;
|
|
123
|
+
const chatModel = build.llmChatModel;
|
|
124
|
+
const embeddingSlug = configEmbeddingSlug ?? embeddingModel ?? undefined;
|
|
125
|
+
const chatSlug = configChatSlug ?? chatModel ?? undefined;
|
|
126
|
+
if (embeddingSlug) {
|
|
127
|
+
console.log(`[graphile-llm] Metering enabled — embedding meter: ${embeddingSlug}`);
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
console.log('[graphile-llm] Metering enabled but no embedding model name — usage will not be metered');
|
|
131
|
+
}
|
|
132
|
+
const meteringOptions = {
|
|
133
|
+
embeddingMeterSlug: embeddingSlug,
|
|
134
|
+
chatMeterSlug: chatSlug,
|
|
135
|
+
skipMetering,
|
|
136
|
+
embeddingModel: embeddingModel ?? undefined,
|
|
137
|
+
chatModel: chatModel ?? undefined,
|
|
138
|
+
};
|
|
139
|
+
// Replace the embedder with a metered version.
|
|
140
|
+
// Same signature except it can return null (quota exceeded).
|
|
141
|
+
const meteredEmbedder = wrapEmbedderWithMetering(originalEmbedder, meteringOptions);
|
|
142
|
+
return build.extend(build, {
|
|
143
|
+
llmEmbedder: meteredEmbedder,
|
|
144
|
+
}, 'LlmMeteringPlugin replacing llmEmbedder with metered version');
|
|
145
|
+
},
|
|
146
|
+
/**
|
|
147
|
+
* Wrap every root query/mutation resolver to establish the
|
|
148
|
+
* request-scoped metering context via AsyncLocalStorage.
|
|
149
|
+
*/
|
|
150
|
+
GraphQLObjectType_fields_field(field, build, context) {
|
|
151
|
+
const { scope: { isRootQuery, isRootMutation }, } = context;
|
|
152
|
+
if (!isRootQuery && !isRootMutation)
|
|
153
|
+
return field;
|
|
154
|
+
// Only wrap if we actually replaced the embedder
|
|
155
|
+
if (!build.llmEmbedder)
|
|
156
|
+
return field;
|
|
157
|
+
const defaultResolver = (obj) => obj[context.scope.fieldName];
|
|
158
|
+
const { resolve: oldResolve = defaultResolver, ...rest } = field;
|
|
159
|
+
return {
|
|
160
|
+
...rest,
|
|
161
|
+
async resolve(source, args, graphqlContext, info) {
|
|
162
|
+
// Build the metering context for this request
|
|
163
|
+
const ctx = await buildMeteringContext(graphqlContext, resolveEntityId);
|
|
164
|
+
// Run the original resolver within the AsyncLocalStorage scope
|
|
165
|
+
// so any embedder calls made by downstream plugins pick up the ctx
|
|
166
|
+
return meteringStore.run(ctx, () => {
|
|
167
|
+
return oldResolve(source, args, graphqlContext, info);
|
|
168
|
+
});
|
|
169
|
+
},
|
|
170
|
+
};
|
|
171
|
+
},
|
|
172
|
+
},
|
|
173
|
+
},
|
|
174
|
+
};
|
|
175
|
+
}
|
|
@@ -9,6 +9,10 @@
|
|
|
9
9
|
* Example:
|
|
10
10
|
* mutation { createArticle(input: { embeddingText: "Machine learning concepts" }) }
|
|
11
11
|
*
|
|
12
|
+
* If the embedder returns null (e.g. quota exceeded when the metering plugin
|
|
13
|
+
* is loaded), the mutation throws an error — unlike search, mutations cannot
|
|
14
|
+
* silently skip writing a vector the user asked for.
|
|
15
|
+
*
|
|
12
16
|
* This is the mutation counterpart to LlmTextSearchPlugin (which handles
|
|
13
17
|
* filter/query-side text-to-vector). Together they let clients work entirely
|
|
14
18
|
* with text/prompts instead of raw float vectors.
|
|
@@ -9,6 +9,10 @@
|
|
|
9
9
|
* Example:
|
|
10
10
|
* mutation { createArticle(input: { embeddingText: "Machine learning concepts" }) }
|
|
11
11
|
*
|
|
12
|
+
* If the embedder returns null (e.g. quota exceeded when the metering plugin
|
|
13
|
+
* is loaded), the mutation throws an error — unlike search, mutations cannot
|
|
14
|
+
* silently skip writing a vector the user asked for.
|
|
15
|
+
*
|
|
12
16
|
* This is the mutation counterpart to LlmTextSearchPlugin (which handles
|
|
13
17
|
* filter/query-side text-to-vector). Together they let clients work entirely
|
|
14
18
|
* with text/prompts instead of raw float vectors.
|
|
@@ -61,7 +65,7 @@ function getTextToVectorMapping(pgCodec, build) {
|
|
|
61
65
|
export function createLlmTextMutationPlugin() {
|
|
62
66
|
return {
|
|
63
67
|
name: 'LlmTextMutationPlugin',
|
|
64
|
-
version: '0.
|
|
68
|
+
version: '0.2.0',
|
|
65
69
|
description: 'Adds text companion fields on mutation inputs for vector columns — ' +
|
|
66
70
|
'text is embedded server-side before storing',
|
|
67
71
|
after: [
|
|
@@ -121,6 +125,8 @@ export function createLlmTextMutationPlugin() {
|
|
|
121
125
|
* Uses the same v4-style resolver wrapping pattern as graphile-upload-plugin
|
|
122
126
|
* and graphile-bucket-provisioner-plugin. grafserv v5 supports this through
|
|
123
127
|
* its backwards-compatibility layer.
|
|
128
|
+
*
|
|
129
|
+
* If the embedder returns null (e.g. quota exceeded), throws an error.
|
|
124
130
|
*/
|
|
125
131
|
GraphQLObjectType_fields_field(field, build, context) {
|
|
126
132
|
const { scope: { isRootMutation, fieldName, pgCodec }, } = context;
|
|
@@ -163,6 +169,10 @@ export function createLlmTextMutationPlugin() {
|
|
|
163
169
|
const startTime = Date.now();
|
|
164
170
|
const vector = await embedder(value);
|
|
165
171
|
const latencyMs = Date.now() - startTime;
|
|
172
|
+
if (vector === null) {
|
|
173
|
+
throw new Error(`EMBED_QUOTA_EXCEEDED: Cannot embed ${key} — embedding quota exceeded. ` +
|
|
174
|
+
'Upgrade your plan or wait for the next billing period.');
|
|
175
|
+
}
|
|
166
176
|
console.log(`[graphile-llm] Mutation embed: field=${key}, dims=${vector.length}, latency=${latencyMs}ms`);
|
|
167
177
|
// Inject the vector into the corresponding field
|
|
168
178
|
obj[vectorFieldName] = vector;
|
|
@@ -22,6 +22,10 @@
|
|
|
22
22
|
*
|
|
23
23
|
* If the embedder is not configured, the `text` field is still registered
|
|
24
24
|
* (so the schema is stable) but will return a clear error at execution time.
|
|
25
|
+
*
|
|
26
|
+
* If the embedder returns null (e.g. quota exceeded when the metering
|
|
27
|
+
* plugin is loaded), the text field is silently removed — the query
|
|
28
|
+
* continues with text-only search as a graceful fallback.
|
|
25
29
|
*/
|
|
26
30
|
import type { GraphileConfig } from 'graphile-config';
|
|
27
31
|
declare global {
|
|
@@ -22,6 +22,10 @@
|
|
|
22
22
|
*
|
|
23
23
|
* If the embedder is not configured, the `text` field is still registered
|
|
24
24
|
* (so the schema is stable) but will return a clear error at execution time.
|
|
25
|
+
*
|
|
26
|
+
* If the embedder returns null (e.g. quota exceeded when the metering
|
|
27
|
+
* plugin is loaded), the text field is silently removed — the query
|
|
28
|
+
* continues with text-only search as a graceful fallback.
|
|
25
29
|
*/
|
|
26
30
|
/**
|
|
27
31
|
* Check if a codec has any pgvector `vector` columns.
|
|
@@ -38,6 +42,9 @@ function hasVectorColumns(pgCodec) {
|
|
|
38
42
|
/**
|
|
39
43
|
* Recursively walk a `where` argument object and embed any VectorNearbyInput
|
|
40
44
|
* values that have `text` instead of `vector`.
|
|
45
|
+
*
|
|
46
|
+
* If the embedder returns null (e.g. quota exceeded), the text field is
|
|
47
|
+
* removed so the pgvector filter is skipped — graceful text-only fallback.
|
|
41
48
|
*/
|
|
42
49
|
async function embedTextInWhere(obj, embedder) {
|
|
43
50
|
if (!obj || typeof obj !== 'object')
|
|
@@ -53,6 +60,11 @@ async function embedTextInWhere(obj, embedder) {
|
|
|
53
60
|
const startTime = Date.now();
|
|
54
61
|
const vector = await embedder(value.text);
|
|
55
62
|
const latencyMs = Date.now() - startTime;
|
|
63
|
+
if (vector === null) {
|
|
64
|
+
// Embedder returned null (e.g. quota exceeded) — skip vector search
|
|
65
|
+
delete value.text;
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
56
68
|
console.log(`[graphile-llm] Search embed: field=${key}, dims=${vector.length}, latency=${latencyMs}ms`);
|
|
57
69
|
// Replace text with vector
|
|
58
70
|
value.vector = vector;
|
|
@@ -85,7 +97,7 @@ async function embedTextInWhere(obj, embedder) {
|
|
|
85
97
|
export function createLlmTextSearchPlugin() {
|
|
86
98
|
return {
|
|
87
99
|
name: 'LlmTextSearchPlugin',
|
|
88
|
-
version: '0.
|
|
100
|
+
version: '0.2.0',
|
|
89
101
|
description: 'Adds text-to-vector embedding support on VectorNearbyInput filter fields',
|
|
90
102
|
after: [
|
|
91
103
|
'LlmModulePlugin',
|
package/esm/preset.d.ts
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* - Resolves an embedder from configuration (llm_module, env vars, or preset options)
|
|
9
9
|
* - Adds a `text: String` field to `VectorNearbyInput` for text-based vector search
|
|
10
10
|
* - Adds `{column}Text: String` companion fields on mutation inputs for vector columns
|
|
11
|
-
* -
|
|
11
|
+
* - Optionally enables billing/metering via the LlmMeteringPlugin
|
|
12
12
|
*
|
|
13
13
|
* This preset is standalone — it is NOT included in ConstructivePreset by default.
|
|
14
14
|
* Projects that want LLM features opt in by adding it to their preset.
|
|
@@ -42,6 +42,26 @@
|
|
|
42
42
|
* ],
|
|
43
43
|
* };
|
|
44
44
|
* ```
|
|
45
|
+
*
|
|
46
|
+
* @example With billing metering (opt-in, meter slug = model name by default):
|
|
47
|
+
* ```typescript
|
|
48
|
+
* GraphileLlmPreset({
|
|
49
|
+
* defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
|
|
50
|
+
* metering: true,
|
|
51
|
+
* // → embedding calls metered under 'text-embedding-3-small' meter slug
|
|
52
|
+
* // → three-level waterfall: text-embedding-3-small → inference pool → universal
|
|
53
|
+
* })
|
|
54
|
+
* ```
|
|
55
|
+
*
|
|
56
|
+
* @example With custom entity_id resolution (bill per-database):
|
|
57
|
+
* ```typescript
|
|
58
|
+
* GraphileLlmPreset({
|
|
59
|
+
* defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
|
|
60
|
+
* metering: {
|
|
61
|
+
* resolveEntityId: (pgSettings) => pgSettings['jwt.claims.database_id'],
|
|
62
|
+
* },
|
|
63
|
+
* })
|
|
64
|
+
* ```
|
|
45
65
|
*/
|
|
46
66
|
import type { GraphileConfig } from 'graphile-config';
|
|
47
67
|
import type { GraphileLlmOptions } from './types';
|
package/esm/preset.js
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* - Resolves an embedder from configuration (llm_module, env vars, or preset options)
|
|
9
9
|
* - Adds a `text: String` field to `VectorNearbyInput` for text-based vector search
|
|
10
10
|
* - Adds `{column}Text: String` companion fields on mutation inputs for vector columns
|
|
11
|
-
* -
|
|
11
|
+
* - Optionally enables billing/metering via the LlmMeteringPlugin
|
|
12
12
|
*
|
|
13
13
|
* This preset is standalone — it is NOT included in ConstructivePreset by default.
|
|
14
14
|
* Projects that want LLM features opt in by adding it to their preset.
|
|
@@ -42,11 +42,32 @@
|
|
|
42
42
|
* ],
|
|
43
43
|
* };
|
|
44
44
|
* ```
|
|
45
|
+
*
|
|
46
|
+
* @example With billing metering (opt-in, meter slug = model name by default):
|
|
47
|
+
* ```typescript
|
|
48
|
+
* GraphileLlmPreset({
|
|
49
|
+
* defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
|
|
50
|
+
* metering: true,
|
|
51
|
+
* // → embedding calls metered under 'text-embedding-3-small' meter slug
|
|
52
|
+
* // → three-level waterfall: text-embedding-3-small → inference pool → universal
|
|
53
|
+
* })
|
|
54
|
+
* ```
|
|
55
|
+
*
|
|
56
|
+
* @example With custom entity_id resolution (bill per-database):
|
|
57
|
+
* ```typescript
|
|
58
|
+
* GraphileLlmPreset({
|
|
59
|
+
* defaultEmbedder: { provider: 'openai', model: 'text-embedding-3-small' },
|
|
60
|
+
* metering: {
|
|
61
|
+
* resolveEntityId: (pgSettings) => pgSettings['jwt.claims.database_id'],
|
|
62
|
+
* },
|
|
63
|
+
* })
|
|
64
|
+
* ```
|
|
45
65
|
*/
|
|
46
66
|
import { createLlmModulePlugin } from './plugins/llm-module-plugin';
|
|
47
67
|
import { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
|
|
48
68
|
import { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
|
|
49
69
|
import { createLlmRagPlugin } from './plugins/rag-plugin';
|
|
70
|
+
import { createLlmMeteringPlugin } from './plugins/metering-plugin';
|
|
50
71
|
/**
|
|
51
72
|
* Creates a preset that includes all LLM plugins.
|
|
52
73
|
*
|
|
@@ -54,10 +75,16 @@ import { createLlmRagPlugin } from './plugins/rag-plugin';
|
|
|
54
75
|
* @returns A GraphileConfig.Preset to add to your extends array
|
|
55
76
|
*/
|
|
56
77
|
export function GraphileLlmPreset(options = {}) {
|
|
57
|
-
const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, } = options;
|
|
78
|
+
const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, metering, } = options;
|
|
58
79
|
const plugins = [
|
|
59
80
|
createLlmModulePlugin(options),
|
|
60
81
|
];
|
|
82
|
+
// Metering is opt-in: only loaded when metering is truthy
|
|
83
|
+
// (true, or a MeteringConfig object)
|
|
84
|
+
if (metering) {
|
|
85
|
+
const meteringConfig = metering === true ? {} : metering;
|
|
86
|
+
plugins.push(createLlmMeteringPlugin(meteringConfig));
|
|
87
|
+
}
|
|
61
88
|
if (enableTextSearch) {
|
|
62
89
|
plugins.push(createLlmTextSearchPlugin());
|
|
63
90
|
}
|
package/esm/types.d.ts
CHANGED
|
@@ -17,8 +17,6 @@ export interface EmbedderConfig {
|
|
|
17
17
|
model?: string;
|
|
18
18
|
/** Base URL for the provider (e.g. 'http://localhost:11434' for Ollama) */
|
|
19
19
|
baseUrl?: string;
|
|
20
|
-
/** API key for providers that require authentication (e.g. OpenAI) */
|
|
21
|
-
apiKey?: string;
|
|
22
20
|
}
|
|
23
21
|
/**
|
|
24
22
|
* A single message in a chat conversation.
|
|
@@ -50,8 +48,6 @@ export interface ChatConfig {
|
|
|
50
48
|
model?: string;
|
|
51
49
|
/** Base URL for the provider */
|
|
52
50
|
baseUrl?: string;
|
|
53
|
-
/** API key for providers that require authentication */
|
|
54
|
-
apiKey?: string;
|
|
55
51
|
}
|
|
56
52
|
/**
|
|
57
53
|
* The shape of the `llm_module` data stored in `services_public.api_modules`.
|
|
@@ -74,8 +70,6 @@ export interface LlmModuleData {
|
|
|
74
70
|
chat_model?: string;
|
|
75
71
|
/** Base URL for the chat provider */
|
|
76
72
|
chat_base_url?: string;
|
|
77
|
-
/** API key reference (e.g. 'vault://openai-key' or env var name) */
|
|
78
|
-
api_key_ref?: string;
|
|
79
73
|
/** Rate limit: requests per minute */
|
|
80
74
|
rate_limit_rpm?: number;
|
|
81
75
|
/** Maximum tokens per request */
|
|
@@ -131,6 +125,41 @@ export interface ChunkTableInfo {
|
|
|
131
125
|
/** Text content column on chunks table (the actual chunk text) */
|
|
132
126
|
contentField: string;
|
|
133
127
|
}
|
|
128
|
+
/**
|
|
129
|
+
* Configuration for billing/metering integration.
|
|
130
|
+
* When provided, embedding and chat calls are wrapped with quota checks
|
|
131
|
+
* and usage recording via the billing_module functions.
|
|
132
|
+
*/
|
|
133
|
+
export interface MeteringConfig {
|
|
134
|
+
/**
|
|
135
|
+
* Meter slug for embedding operations.
|
|
136
|
+
* Must match a slug in the billing_module meters table.
|
|
137
|
+
*
|
|
138
|
+
* @default the embedding model name (e.g. 'text-embedding-3-small')
|
|
139
|
+
* — meter slug = model name, so each model has its own meter
|
|
140
|
+
* in the three-level waterfall (per-model → inference pool → universal).
|
|
141
|
+
*/
|
|
142
|
+
embeddingMeterSlug?: string;
|
|
143
|
+
/**
|
|
144
|
+
* Meter slug for chat completion operations.
|
|
145
|
+
*
|
|
146
|
+
* @default the chat model name (e.g. 'gpt-4o-mini')
|
|
147
|
+
*/
|
|
148
|
+
chatMeterSlug?: string;
|
|
149
|
+
/**
|
|
150
|
+
* Disable metering entirely (e.g. for local dev).
|
|
151
|
+
* When true, billing functions are never called.
|
|
152
|
+
* @default false
|
|
153
|
+
*/
|
|
154
|
+
skipMetering?: boolean;
|
|
155
|
+
/**
|
|
156
|
+
* Resolve the billing entity_id from pgSettings.
|
|
157
|
+
* The entity_id identifies who gets billed (user, org, etc.).
|
|
158
|
+
*
|
|
159
|
+
* @default reads jwt.claims.user_id
|
|
160
|
+
*/
|
|
161
|
+
resolveEntityId?: (pgSettings: Record<string, string>) => string | null;
|
|
162
|
+
}
|
|
134
163
|
/**
|
|
135
164
|
* Options for the GraphileLlmPreset.
|
|
136
165
|
*/
|
|
@@ -170,4 +199,16 @@ export interface GraphileLlmOptions {
|
|
|
170
199
|
* Individual queries can override these values.
|
|
171
200
|
*/
|
|
172
201
|
ragDefaults?: RagDefaults;
|
|
202
|
+
/**
|
|
203
|
+
* Billing/metering configuration (opt-in).
|
|
204
|
+
* When truthy, loads the LlmMeteringPlugin which wraps the embedder
|
|
205
|
+
* with billing quota checks + usage recording.
|
|
206
|
+
*
|
|
207
|
+
* Set to `true` to enable metering with defaults (entity_id from jwt.claims.user_id).
|
|
208
|
+
* Provide a MeteringConfig object for fine-grained control (custom entity_id, meter slugs).
|
|
209
|
+
* Set to `false` or omit to disable metering entirely.
|
|
210
|
+
*
|
|
211
|
+
* @default undefined (metering disabled)
|
|
212
|
+
*/
|
|
213
|
+
metering?: boolean | MeteringConfig;
|
|
173
214
|
}
|
package/index.d.ts
CHANGED
|
@@ -29,11 +29,20 @@
|
|
|
29
29
|
* };
|
|
30
30
|
* ```
|
|
31
31
|
*/
|
|
32
|
+
export { getLlmEnvOptions } from './env';
|
|
33
|
+
export type { LlmEnvOptions, LlmProviderConfig } from './env';
|
|
32
34
|
export { GraphileLlmPreset } from './preset';
|
|
33
35
|
export { createLlmModulePlugin } from './plugins/llm-module-plugin';
|
|
34
36
|
export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
|
|
35
37
|
export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
|
|
36
38
|
export { createLlmRagPlugin } from './plugins/rag-plugin';
|
|
39
|
+
export { createLlmMeteringPlugin } from './plugins/metering-plugin';
|
|
40
|
+
export { getAgentDiscovery, clearAgentDiscoveryCache } from './plugins/agent-discovery-plugin';
|
|
41
|
+
export type { AgentTableInfo, AgentDiscovery } from './plugins/agent-discovery-plugin';
|
|
37
42
|
export { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from './embedder';
|
|
38
43
|
export { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from './chat';
|
|
39
|
-
export
|
|
44
|
+
export { meteredEmbed, meteredChat, logInferenceUsage, QuotaExceededError } from './metering';
|
|
45
|
+
export type { MeteringContext, MeteringOptions, MeterResult, WithPgClient, InferenceLogEntry } from './metering';
|
|
46
|
+
export { getLlmBillingConfig, invalidateLlmBillingConfig, getLlmBillingCacheStats, } from './config-cache';
|
|
47
|
+
export type { BillingConfig, LlmBillingCacheEntry, InferenceLogConfig, PgClient } from './config-cache';
|
|
48
|
+
export type { EmbedderFunction, EmbedderConfig, ChatFunction, ChatConfig, ChatMessage, ChatOptions, LlmModuleData, GraphileLlmOptions, MeteringConfig, RagDefaults, ChunkTableInfo, } from './types';
|
package/index.js
CHANGED
|
@@ -31,11 +31,14 @@
|
|
|
31
31
|
* ```
|
|
32
32
|
*/
|
|
33
33
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
34
|
-
exports.buildChatCompleterFromEnv = exports.buildChatCompleterFromModule = exports.buildChatCompleter = exports.buildEmbedderFromEnv = exports.buildEmbedderFromModule = exports.buildEmbedder = exports.createLlmRagPlugin = exports.createLlmTextMutationPlugin = exports.createLlmTextSearchPlugin = exports.createLlmModulePlugin = exports.GraphileLlmPreset = void 0;
|
|
34
|
+
exports.getLlmBillingCacheStats = exports.invalidateLlmBillingConfig = exports.getLlmBillingConfig = exports.QuotaExceededError = exports.logInferenceUsage = exports.meteredChat = exports.meteredEmbed = exports.buildChatCompleterFromEnv = exports.buildChatCompleterFromModule = exports.buildChatCompleter = exports.buildEmbedderFromEnv = exports.buildEmbedderFromModule = exports.buildEmbedder = exports.clearAgentDiscoveryCache = exports.getAgentDiscovery = exports.createLlmMeteringPlugin = exports.createLlmRagPlugin = exports.createLlmTextMutationPlugin = exports.createLlmTextSearchPlugin = exports.createLlmModulePlugin = exports.GraphileLlmPreset = exports.getLlmEnvOptions = void 0;
|
|
35
|
+
// Environment configuration (single source of truth for LLM defaults)
|
|
36
|
+
var env_1 = require("./env");
|
|
37
|
+
Object.defineProperty(exports, "getLlmEnvOptions", { enumerable: true, get: function () { return env_1.getLlmEnvOptions; } });
|
|
35
38
|
// Preset (recommended entry point)
|
|
36
39
|
var preset_1 = require("./preset");
|
|
37
40
|
Object.defineProperty(exports, "GraphileLlmPreset", { enumerable: true, get: function () { return preset_1.GraphileLlmPreset; } });
|
|
38
|
-
// Individual plugins
|
|
41
|
+
// Individual plugins (pure — no billing dependency)
|
|
39
42
|
var llm_module_plugin_1 = require("./plugins/llm-module-plugin");
|
|
40
43
|
Object.defineProperty(exports, "createLlmModulePlugin", { enumerable: true, get: function () { return llm_module_plugin_1.createLlmModulePlugin; } });
|
|
41
44
|
var text_search_plugin_1 = require("./plugins/text-search-plugin");
|
|
@@ -44,6 +47,13 @@ var text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
|
|
|
44
47
|
Object.defineProperty(exports, "createLlmTextMutationPlugin", { enumerable: true, get: function () { return text_mutation_plugin_1.createLlmTextMutationPlugin; } });
|
|
45
48
|
var rag_plugin_1 = require("./plugins/rag-plugin");
|
|
46
49
|
Object.defineProperty(exports, "createLlmRagPlugin", { enumerable: true, get: function () { return rag_plugin_1.createLlmRagPlugin; } });
|
|
50
|
+
// Metering plugin (opt-in billing integration)
|
|
51
|
+
var metering_plugin_1 = require("./plugins/metering-plugin");
|
|
52
|
+
Object.defineProperty(exports, "createLlmMeteringPlugin", { enumerable: true, get: function () { return metering_plugin_1.createLlmMeteringPlugin; } });
|
|
53
|
+
// Agent discovery (queries agent_chat_module config table at runtime)
|
|
54
|
+
var agent_discovery_plugin_1 = require("./plugins/agent-discovery-plugin");
|
|
55
|
+
Object.defineProperty(exports, "getAgentDiscovery", { enumerable: true, get: function () { return agent_discovery_plugin_1.getAgentDiscovery; } });
|
|
56
|
+
Object.defineProperty(exports, "clearAgentDiscoveryCache", { enumerable: true, get: function () { return agent_discovery_plugin_1.clearAgentDiscoveryCache; } });
|
|
47
57
|
// Embedder utilities
|
|
48
58
|
var embedder_1 = require("./embedder");
|
|
49
59
|
Object.defineProperty(exports, "buildEmbedder", { enumerable: true, get: function () { return embedder_1.buildEmbedder; } });
|
|
@@ -54,3 +64,14 @@ var chat_1 = require("./chat");
|
|
|
54
64
|
Object.defineProperty(exports, "buildChatCompleter", { enumerable: true, get: function () { return chat_1.buildChatCompleter; } });
|
|
55
65
|
Object.defineProperty(exports, "buildChatCompleterFromModule", { enumerable: true, get: function () { return chat_1.buildChatCompleterFromModule; } });
|
|
56
66
|
Object.defineProperty(exports, "buildChatCompleterFromEnv", { enumerable: true, get: function () { return chat_1.buildChatCompleterFromEnv; } });
|
|
67
|
+
// Metering utilities (for custom integration)
|
|
68
|
+
var metering_1 = require("./metering");
|
|
69
|
+
Object.defineProperty(exports, "meteredEmbed", { enumerable: true, get: function () { return metering_1.meteredEmbed; } });
|
|
70
|
+
Object.defineProperty(exports, "meteredChat", { enumerable: true, get: function () { return metering_1.meteredChat; } });
|
|
71
|
+
Object.defineProperty(exports, "logInferenceUsage", { enumerable: true, get: function () { return metering_1.logInferenceUsage; } });
|
|
72
|
+
Object.defineProperty(exports, "QuotaExceededError", { enumerable: true, get: function () { return metering_1.QuotaExceededError; } });
|
|
73
|
+
// Config cache (for custom integration)
|
|
74
|
+
var config_cache_1 = require("./config-cache");
|
|
75
|
+
Object.defineProperty(exports, "getLlmBillingConfig", { enumerable: true, get: function () { return config_cache_1.getLlmBillingConfig; } });
|
|
76
|
+
Object.defineProperty(exports, "invalidateLlmBillingConfig", { enumerable: true, get: function () { return config_cache_1.invalidateLlmBillingConfig; } });
|
|
77
|
+
Object.defineProperty(exports, "getLlmBillingCacheStats", { enumerable: true, get: function () { return config_cache_1.getLlmBillingCacheStats; } });
|
package/metering.d.ts
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* metering — Billing-aware wrappers for embedder and chat functions
|
|
3
|
+
*
|
|
4
|
+
* Wraps EmbedderFunction and ChatFunction with:
|
|
5
|
+
* 1. Pre-check: `check_billing_quota(meter_slug, entity_id, estimated_amount)`
|
|
6
|
+
* 2. Execute the underlying function
|
|
7
|
+
* 3. Post-record: `record_usage(meter_slug, entity_id, actual_amount)`
|
|
8
|
+
*
|
|
9
|
+
* When the quota check fails, the wrapper returns null (graceful degradation)
|
|
10
|
+
* instead of throwing, so the search pipeline can fall back to text-only.
|
|
11
|
+
*
|
|
12
|
+
* Token counts are estimated from text length (~4 chars per token). No
|
|
13
|
+
* tokenizer needed — the billing system uses tokens as abstract units
|
|
14
|
+
* and the credit_cost on each model's meter normalizes the relative expense.
|
|
15
|
+
*
|
|
16
|
+
* The billing functions live in the tenant database and are called via the
|
|
17
|
+
* Graphile `withPgClient` callback. Function locations (schema, names) are
|
|
18
|
+
* resolved from `billing_module` metaschema and cached by `config-cache.ts`.
|
|
19
|
+
*/
|
|
20
|
+
import type { PgClient, BillingConfig, InferenceLogConfig } from './config-cache';
|
|
21
|
+
import type { EmbedderFunction, ChatFunction, ChatMessage, ChatOptions } from './types';
|
|
22
|
+
/**
|
|
23
|
+
* Callback matching Graphile's withPgClient signature.
|
|
24
|
+
* Acquires a pg client, calls the callback, then releases the client.
|
|
25
|
+
*/
|
|
26
|
+
export type WithPgClient = (pgSettings: Record<string, string>, callback: (pgClient: PgClient) => Promise<void>) => Promise<void>;
|
|
27
|
+
export interface MeteringContext {
|
|
28
|
+
/** Callback to acquire a tenant database client */
|
|
29
|
+
withPgClient: WithPgClient;
|
|
30
|
+
/** pgSettings from the GraphQL context (for role/claims) */
|
|
31
|
+
pgSettings: Record<string, string>;
|
|
32
|
+
/** Billing function references from the billing_module */
|
|
33
|
+
billing: BillingConfig;
|
|
34
|
+
/** Entity ID to meter against (from JWT claims) */
|
|
35
|
+
entityId: string;
|
|
36
|
+
/** Per-request correlation ID (from request.id pgSetting) */
|
|
37
|
+
requestId: string | null;
|
|
38
|
+
/** Database UUID from JWT claims */
|
|
39
|
+
databaseId: string;
|
|
40
|
+
/** Actor (user) ID from JWT claims */
|
|
41
|
+
actorId: string | null;
|
|
42
|
+
/** Inference log table config (null if inference_log_module not provisioned) */
|
|
43
|
+
inferenceLog: InferenceLogConfig | null;
|
|
44
|
+
}
|
|
45
|
+
export interface MeteringOptions {
|
|
46
|
+
/** Meter slug for embedding operations (default: model name from build config) */
|
|
47
|
+
embeddingMeterSlug?: string;
|
|
48
|
+
/** Meter slug for chat completion operations (default: model name from build config) */
|
|
49
|
+
chatMeterSlug?: string;
|
|
50
|
+
/** Whether to skip metering entirely (e.g. for local dev). Default: false */
|
|
51
|
+
skipMetering?: boolean;
|
|
52
|
+
/** Embedding model name (for inference log) */
|
|
53
|
+
embeddingModel?: string;
|
|
54
|
+
/** Chat model name (for inference log) */
|
|
55
|
+
chatModel?: string;
|
|
56
|
+
/** Provider name (for inference log) */
|
|
57
|
+
provider?: string;
|
|
58
|
+
}
|
|
59
|
+
export interface MeterResult<T> {
|
|
60
|
+
/** The result from the underlying function, or null if quota exceeded */
|
|
61
|
+
result: T | null;
|
|
62
|
+
/** Whether the call was metered */
|
|
63
|
+
metered: boolean;
|
|
64
|
+
/** Whether the call was skipped due to quota limits */
|
|
65
|
+
quotaExceeded: boolean;
|
|
66
|
+
/** Latency of the underlying function call in ms */
|
|
67
|
+
latencyMs: number;
|
|
68
|
+
}
|
|
69
|
+
export interface InferenceLogEntry {
|
|
70
|
+
databaseId: string;
|
|
71
|
+
entityId: string;
|
|
72
|
+
actorId: string | null;
|
|
73
|
+
model: string;
|
|
74
|
+
provider: string | null;
|
|
75
|
+
service: 'llm' | 'embedding' | 'tts' | 'stt' | 'ocr' | 'image_gen' | 'search' | 'compute';
|
|
76
|
+
operation: string;
|
|
77
|
+
inputTokens: number;
|
|
78
|
+
outputTokens: number;
|
|
79
|
+
totalTokens: number;
|
|
80
|
+
cacheReadTokens: number | null;
|
|
81
|
+
cacheWriteTokens: number | null;
|
|
82
|
+
latencyMs: number;
|
|
83
|
+
ragEnabled: boolean;
|
|
84
|
+
chunksRetrieved: number | null;
|
|
85
|
+
embeddingModel: string | null;
|
|
86
|
+
embeddingLatencyMs: number | null;
|
|
87
|
+
status: 'success' | 'quota_exceeded' | 'provider_error' | 'timeout';
|
|
88
|
+
errorType: string | null;
|
|
89
|
+
rawUsage: Record<string, unknown> | null;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Write a row to the usage_log_inference table.
|
|
93
|
+
* Gracefully skips if the inference_log_module is not provisioned.
|
|
94
|
+
*
|
|
95
|
+
* TODO: Also write to child (generated) database when dual-write is needed.
|
|
96
|
+
*/
|
|
97
|
+
export declare function logInferenceUsage(ctx: MeteringContext, entry: InferenceLogEntry): Promise<void>;
|
|
98
|
+
/**
|
|
99
|
+
* Wrap an embedder with billing quota check + usage recording.
|
|
100
|
+
*
|
|
101
|
+
* The returned MeterResult contains `quotaExceeded: true` when the pre-check
|
|
102
|
+
* fails, enabling the caller to fall back to text-only search.
|
|
103
|
+
*/
|
|
104
|
+
export declare function meteredEmbed(embedder: EmbedderFunction, text: string, ctx: MeteringContext | null, options?: MeteringOptions): Promise<MeterResult<number[]>>;
|
|
105
|
+
/**
|
|
106
|
+
* Wrap a chat completion call with billing quota check + usage recording.
|
|
107
|
+
*/
|
|
108
|
+
export declare function meteredChat(chat: ChatFunction, messages: ChatMessage[], ctx: MeteringContext | null, chatOptions?: ChatOptions, meteringOptions?: MeteringOptions): Promise<MeterResult<string>>;
|
|
109
|
+
export declare class QuotaExceededError extends Error {
|
|
110
|
+
readonly code = "QUOTA_EXCEEDED";
|
|
111
|
+
readonly meterSlug: string;
|
|
112
|
+
readonly entityId: string;
|
|
113
|
+
constructor(meterSlug: string, entityId: string);
|
|
114
|
+
}
|