graphile-llm 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,8 +34,8 @@
34
34
  Object.defineProperty(exports, "__esModule", { value: true });
35
35
  exports.createLlmMeteringPlugin = createLlmMeteringPlugin;
36
36
  const node_async_hooks_1 = require("node:async_hooks");
37
- const metering_1 = require("../metering");
38
37
  const config_cache_1 = require("../config-cache");
38
+ const metering_1 = require("../metering");
39
39
  // ─── Request-scoped context via AsyncLocalStorage ───────────────────────────
40
40
  const meteringStore = new node_async_hooks_1.AsyncLocalStorage();
41
41
  // ─── Helpers ────────────────────────────────────────────────────────────────
@@ -75,7 +75,7 @@ async function buildMeteringContext(graphqlContext, resolveEntityId) {
75
75
  requestId,
76
76
  databaseId,
77
77
  actorId,
78
- inferenceLog: inferenceLogConfig,
78
+ inferenceLog: inferenceLogConfig
79
79
  };
80
80
  }
81
81
  /**
@@ -92,10 +92,10 @@ function wrapEmbedderWithMetering(embedder, meteringOptions) {
92
92
  if (!ctx) {
93
93
  // No metering context in scope — call original embedder directly
94
94
  const startTime = Date.now();
95
- const result = await embedder(text);
95
+ const { embedding } = await embedder(text);
96
96
  const latencyMs = Date.now() - startTime;
97
- console.log(`[graphile-llm] Embed (unmetered): dims=${result?.length ?? 0}, latency=${latencyMs}ms`);
98
- return result;
97
+ console.log(`[graphile-llm] Embed (unmetered): dims=${embedding?.length ?? 0}, latency=${latencyMs}ms`);
98
+ return embedding;
99
99
  }
100
100
  const result = await (0, metering_1.meteredEmbed)(embedder, text, ctx, meteringOptions);
101
101
  if (result.quotaExceeded) {
@@ -106,7 +106,7 @@ function wrapEmbedderWithMetering(embedder, meteringOptions) {
106
106
  }
107
107
  // ─── Plugin ─────────────────────────────────────────────────────────────────
108
108
  function createLlmMeteringPlugin(meteringConfig = {}) {
109
- const { embeddingMeterSlug: configEmbeddingSlug, chatMeterSlug: configChatSlug, skipMetering, resolveEntityId = defaultResolveEntityId, } = meteringConfig;
109
+ const { embeddingMeterSlug: configEmbeddingSlug, chatMeterSlug: configChatSlug, skipMetering, resolveEntityId = defaultResolveEntityId } = meteringConfig;
110
110
  return {
111
111
  name: 'LlmMeteringPlugin',
112
112
  version: '0.2.0',
@@ -137,13 +137,13 @@ function createLlmMeteringPlugin(meteringConfig = {}) {
137
137
  chatMeterSlug: chatSlug,
138
138
  skipMetering,
139
139
  embeddingModel: embeddingModel ?? undefined,
140
- chatModel: chatModel ?? undefined,
140
+ chatModel: chatModel ?? undefined
141
141
  };
142
142
  // Replace the embedder with a metered version.
143
143
  // Same signature except it can return null (quota exceeded).
144
144
  const meteredEmbedder = wrapEmbedderWithMetering(originalEmbedder, meteringOptions);
145
145
  return build.extend(build, {
146
- llmEmbedder: meteredEmbedder,
146
+ llmEmbedder: meteredEmbedder
147
147
  }, 'LlmMeteringPlugin replacing llmEmbedder with metered version');
148
148
  },
149
149
  /**
@@ -151,7 +151,7 @@ function createLlmMeteringPlugin(meteringConfig = {}) {
151
151
  * request-scoped metering context via AsyncLocalStorage.
152
152
  */
153
153
  GraphQLObjectType_fields_field(field, build, context) {
154
- const { scope: { isRootQuery, isRootMutation }, } = context;
154
+ const { scope: { isRootQuery, isRootMutation } } = context;
155
155
  if (!isRootQuery && !isRootMutation)
156
156
  return field;
157
157
  // Only wrap if we actually replaced the embedder
@@ -169,10 +169,10 @@ function createLlmMeteringPlugin(meteringConfig = {}) {
169
169
  return meteringStore.run(ctx, () => {
170
170
  return oldResolve(source, args, graphqlContext, info);
171
171
  });
172
- },
172
+ }
173
173
  };
174
- },
175
- },
176
- },
174
+ }
175
+ }
176
+ }
177
177
  };
178
178
  }
@@ -65,7 +65,7 @@ function parseHasChunksTag(raw, codec) {
65
65
  parentFkField: parsed.parentFk || 'parent_id',
66
66
  parentPkField: parsed.parentPk || 'id',
67
67
  embeddingField: parsed.embeddingField || 'embedding',
68
- contentField: parsed.contentField || 'content',
68
+ contentField: parsed.contentField || 'content'
69
69
  };
70
70
  }
71
71
  /**
@@ -225,10 +225,10 @@ function createLlmRagPlugin(ragDefaults = {}) {
225
225
  minSimilarity: $minSimilarity,
226
226
  systemPrompt: $systemPrompt,
227
227
  withPgClient: $withPgClient,
228
- pgSettings: $pgSettings,
228
+ pgSettings: $pgSettings
229
229
  });
230
230
  return (0, grafast_1.lambda)($combined, async (input) => {
231
- const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings, } = input;
231
+ const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings } = input;
232
232
  if (!prompt || typeof prompt !== 'string') {
233
233
  throw new Error('RAG_INVALID_PROMPT: prompt is required');
234
234
  }
@@ -247,7 +247,7 @@ function createLlmRagPlugin(ragDefaults = {}) {
247
247
  const systemPromptTemplate = querySystemPrompt ?? ragDefaults.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
248
248
  // Step 1: Embed the prompt
249
249
  const startEmbed = Date.now();
250
- const vector = await embedder(prompt);
250
+ const { embedding: vector } = await embedder(prompt);
251
251
  const embedLatency = Date.now() - startEmbed;
252
252
  const vectorString = `[${vector.join(',')}]`;
253
253
  console.log(`[graphile-llm] RAG embed: dims=${vector.length}, latency=${embedLatency}ms`);
@@ -263,7 +263,7 @@ function createLlmRagPlugin(ragDefaults = {}) {
263
263
  content: row.content,
264
264
  parent_id: row.parent_id,
265
265
  distance: parseFloat(row.distance),
266
- table_name: table.parentCodecName,
266
+ table_name: table.parentCodecName
267
267
  });
268
268
  }
269
269
  }
@@ -277,31 +277,31 @@ function createLlmRagPlugin(ragDefaults = {}) {
277
277
  answer: 'No relevant context found for your query. ' +
278
278
  'Try broadening your search or lowering the minimum similarity threshold.',
279
279
  sources: [],
280
- tokensUsed: null,
280
+ tokensUsed: null
281
281
  };
282
282
  }
283
283
  // Step 3: Assemble context
284
284
  const contextText = assembleContext(topChunks);
285
285
  // Step 4: Call chat completion
286
286
  const startChat = Date.now();
287
- const answer = await chatCompleter([
287
+ const chatResult = await chatCompleter([
288
288
  { role: 'system', content: systemPromptTemplate + contextText },
289
- { role: 'user', content: prompt },
289
+ { role: 'user', content: prompt }
290
290
  ], {
291
- maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS,
291
+ maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS
292
292
  });
293
293
  const chatLatency = Date.now() - startChat;
294
- console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, latency=${chatLatency}ms`);
294
+ console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, tokens=${chatResult.usage.totalTokens}, latency=${chatLatency}ms`);
295
295
  // Step 5: Return response
296
296
  return {
297
- answer,
297
+ answer: chatResult.content,
298
298
  sources: topChunks.map((chunk) => ({
299
299
  content: chunk.content,
300
300
  similarity: 1 - chunk.distance,
301
301
  tableName: chunk.table_name,
302
- parentId: chunk.parent_id,
302
+ parentId: chunk.parent_id
303
303
  })),
304
- tokensUsed: null, // Deferred to metering system
304
+ tokensUsed: chatResult.usage.totalTokens
305
305
  };
306
306
  });
307
307
  },
@@ -316,17 +316,17 @@ function createLlmRagPlugin(ragDefaults = {}) {
316
316
  'to use embedText. Set defaultEmbedder in GraphileLlmPreset options.');
317
317
  }
318
318
  const startTime = Date.now();
319
- const vector = await embedder(text);
319
+ const { embedding: vector } = await embedder(text);
320
320
  const latencyMs = Date.now() - startTime;
321
321
  console.log(`[graphile-llm] embedText: dims=${vector.length}, latency=${latencyMs}ms`);
322
322
  return {
323
323
  vector,
324
- dimensions: vector.length,
324
+ dimensions: vector.length
325
325
  };
326
326
  });
327
- },
328
- },
329
- },
327
+ }
328
+ }
329
+ }
330
330
  };
331
331
  });
332
332
  return {
@@ -338,7 +338,7 @@ function createLlmRagPlugin(ragDefaults = {}) {
338
338
  after: [
339
339
  'LlmModulePlugin',
340
340
  'UnifiedSearchPlugin',
341
- 'VectorCodecPlugin',
342
- ],
341
+ 'VectorCodecPlugin'
342
+ ]
343
343
  };
344
344
  }
@@ -48,7 +48,7 @@ function getTextToVectorMapping(pgCodec, build) {
48
48
  if (isVectorCodec(attribute.codec)) {
49
49
  const fieldName = build.inflection.attribute({
50
50
  codec: pgCodec,
51
- attributeName,
51
+ attributeName
52
52
  });
53
53
  mapping[`${fieldName}Text`] = fieldName;
54
54
  }
@@ -76,7 +76,7 @@ function createLlmTextMutationPlugin() {
76
76
  'PgAttributesPlugin',
77
77
  'PgMutationCreatePlugin',
78
78
  'PgMutationUpdateDeletePlugin',
79
- 'VectorCodecPlugin',
79
+ 'VectorCodecPlugin'
80
80
  ],
81
81
  schema: {
82
82
  hooks: {
@@ -85,12 +85,12 @@ function createLlmTextMutationPlugin() {
85
85
  * for tables that have vector columns.
86
86
  */
87
87
  GraphQLInputObjectType_fields(fields, build, context) {
88
- const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec, }, } = context;
88
+ const { scope: { isPgPatch, isPgBaseInput, isMutationInput, pgCodec } } = context;
89
89
  // Only intercept create/update input types for table rows
90
90
  if (!pgCodec?.attributes || (!isPgPatch && !isPgBaseInput && !isMutationInput)) {
91
91
  return fields;
92
92
  }
93
- const { graphql: { GraphQLString }, } = build;
93
+ const { graphql: { GraphQLString } } = build;
94
94
  // Find vector columns on this table
95
95
  const vectorColumns = [];
96
96
  for (const [attributeName, attribute] of Object.entries(pgCodec.attributes)) {
@@ -106,7 +106,7 @@ function createLlmTextMutationPlugin() {
106
106
  // Convert snake_case column name to camelCase field name
107
107
  const fieldName = build.inflection.attribute({
108
108
  codec: pgCodec,
109
- attributeName: columnName,
109
+ attributeName: columnName
110
110
  });
111
111
  const textFieldName = `${fieldName}Text`;
112
112
  newFields = build.extend(newFields, {
@@ -114,8 +114,8 @@ function createLlmTextMutationPlugin() {
114
114
  type: GraphQLString,
115
115
  description: `Natural language text to embed server-side into the \`${fieldName}\` vector column. ` +
116
116
  `Mutually exclusive with \`${fieldName}\` — provide one or the other. ` +
117
- 'Requires the LLM plugin to be configured with an embedding provider.',
118
- },
117
+ 'Requires the LLM plugin to be configured with an embedding provider.'
118
+ }
119
119
  }, `LlmTextMutationPlugin adding ${textFieldName} companion field for vector column '${columnName}'`);
120
120
  }
121
121
  return newFields;
@@ -132,7 +132,7 @@ function createLlmTextMutationPlugin() {
132
132
  * If the embedder returns null (e.g. quota exceeded), throws an error.
133
133
  */
134
134
  GraphQLObjectType_fields_field(field, build, context) {
135
- const { scope: { isRootMutation, fieldName, pgCodec }, } = context;
135
+ const { scope: { isRootMutation, fieldName, pgCodec } } = context;
136
136
  // Only wrap root mutation fields on tables with attributes
137
137
  if (!isRootMutation || !pgCodec || !pgCodec.attributes) {
138
138
  return field;
@@ -195,10 +195,10 @@ function createLlmTextMutationPlugin() {
195
195
  }
196
196
  await embedTextFields(args);
197
197
  return oldResolve(source, args, graphqlContext, info);
198
- },
198
+ }
199
199
  };
200
- },
201
- },
202
- },
200
+ }
201
+ }
202
+ }
203
203
  };
204
204
  }
@@ -105,7 +105,7 @@ function createLlmTextSearchPlugin() {
105
105
  after: [
106
106
  'LlmModulePlugin',
107
107
  'UnifiedSearchPlugin',
108
- 'VectorCodecPlugin',
108
+ 'VectorCodecPlugin'
109
109
  ],
110
110
  schema: {
111
111
  hooks: {
@@ -116,18 +116,18 @@ function createLlmTextSearchPlugin() {
116
116
  * The field is optional — clients provide either `text` or `vector`.
117
117
  */
118
118
  GraphQLInputObjectType_fields(fields, build, context) {
119
- const { scope: { inputObjectTypeName }, } = context;
119
+ const { scope: { inputObjectTypeName } } = context;
120
120
  if (inputObjectTypeName !== 'VectorNearbyInput') {
121
121
  return fields;
122
122
  }
123
- const { graphql: { GraphQLString }, } = build;
123
+ const { graphql: { GraphQLString } } = build;
124
124
  return build.extend(fields, {
125
125
  text: {
126
126
  type: GraphQLString,
127
127
  description: 'Natural language text to embed server-side for similarity search. ' +
128
128
  'Mutually exclusive with `vector` — provide one or the other. ' +
129
- 'Requires the LLM plugin to be configured with an embedding provider.',
130
- },
129
+ 'Requires the LLM plugin to be configured with an embedding provider.'
130
+ }
131
131
  }, 'LlmTextSearchPlugin adding text field to VectorNearbyInput');
132
132
  },
133
133
  /**
@@ -139,7 +139,7 @@ function createLlmTextSearchPlugin() {
139
139
  * and graphile-bucket-provisioner-plugin.
140
140
  */
141
141
  GraphQLObjectType_fields_field(field, build, context) {
142
- const { scope: { isRootQuery, pgCodec }, } = context;
142
+ const { scope: { isRootQuery, pgCodec } } = context;
143
143
  // Only wrap root query fields on tables with vector columns
144
144
  if (!isRootQuery || !pgCodec || !hasVectorColumns(pgCodec)) {
145
145
  return field;
@@ -161,7 +161,7 @@ function createLlmTextSearchPlugin() {
161
161
  await embedTextInWhere(args.filter, embedder);
162
162
  }
163
163
  return oldResolve(source, args, graphqlContext, info);
164
- },
164
+ }
165
165
  };
166
166
  },
167
167
  finalize(schema, build) {
@@ -171,8 +171,8 @@ function createLlmTextSearchPlugin() {
171
171
  'will return errors if used. Configure an embedding provider to enable.');
172
172
  }
173
173
  return schema;
174
- },
175
- },
176
- },
174
+ }
175
+ }
176
+ }
177
177
  };
178
178
  }
package/preset.js CHANGED
@@ -67,10 +67,10 @@
67
67
  Object.defineProperty(exports, "__esModule", { value: true });
68
68
  exports.GraphileLlmPreset = GraphileLlmPreset;
69
69
  const llm_module_plugin_1 = require("./plugins/llm-module-plugin");
70
- const text_search_plugin_1 = require("./plugins/text-search-plugin");
71
- const text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
72
- const rag_plugin_1 = require("./plugins/rag-plugin");
73
70
  const metering_plugin_1 = require("./plugins/metering-plugin");
71
+ const rag_plugin_1 = require("./plugins/rag-plugin");
72
+ const text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
73
+ const text_search_plugin_1 = require("./plugins/text-search-plugin");
74
74
  /**
75
75
  * Creates a preset that includes all LLM plugins.
76
76
  *
@@ -78,9 +78,9 @@ const metering_plugin_1 = require("./plugins/metering-plugin");
78
78
  * @returns A GraphileConfig.Preset to add to your extends array
79
79
  */
80
80
  function GraphileLlmPreset(options = {}) {
81
- const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, metering, } = options;
81
+ const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, metering } = options;
82
82
  const plugins = [
83
- (0, llm_module_plugin_1.createLlmModulePlugin)(options),
83
+ (0, llm_module_plugin_1.createLlmModulePlugin)(options)
84
84
  ];
85
85
  // Metering is opt-in: only loaded when metering is truthy
86
86
  // (true, or a MeteringConfig object)
@@ -98,7 +98,7 @@ function GraphileLlmPreset(options = {}) {
98
98
  plugins.push((0, rag_plugin_1.createLlmRagPlugin)(ragDefaults));
99
99
  }
100
100
  return {
101
- plugins,
101
+ plugins
102
102
  };
103
103
  }
104
104
  exports.default = GraphileLlmPreset;
package/types.d.ts CHANGED
@@ -4,9 +4,18 @@
4
4
  * Shared type definitions for the LLM plugin.
5
5
  */
6
6
  /**
7
- * A function that converts text into a vector embedding.
7
+ * Result from an embedding call, including real token usage from the provider.
8
8
  */
9
- export type EmbedderFunction = (text: string) => Promise<number[]>;
9
+ export interface EmbeddingResult {
10
+ /** The vector embedding */
11
+ embedding: number[];
12
+ /** Number of prompt tokens consumed (from provider; 0 if unavailable) */
13
+ promptTokens: number;
14
+ }
15
+ /**
16
+ * A function that converts text into a vector embedding with token usage.
17
+ */
18
+ export type EmbedderFunction = (text: string) => Promise<EmbeddingResult>;
10
19
  /**
11
20
  * Configuration for an embedding provider.
12
21
  */
@@ -18,6 +27,24 @@ export interface EmbedderConfig {
18
27
  /** Base URL for the provider (e.g. 'http://localhost:11434' for Ollama) */
19
28
  baseUrl?: string;
20
29
  }
30
+ /**
31
+ * Token usage metadata returned by LLM providers.
32
+ * Maps to the billing schema's inference_log columns.
33
+ */
34
+ export interface LlmUsage {
35
+ /** Prompt / input tokens consumed */
36
+ input: number;
37
+ /** Completion / output tokens generated (includes reasoning for providers that count it) */
38
+ output: number;
39
+ /** Reasoning tokens (subset of output — not additive) */
40
+ reasoning: number;
41
+ /** Tokens served from prompt cache (zero cost) */
42
+ cacheRead: number;
43
+ /** Tokens written to prompt cache */
44
+ cacheWrite: number;
45
+ /** input + output + cacheRead + cacheWrite */
46
+ totalTokens: number;
47
+ }
21
48
  /**
22
49
  * A single message in a chat conversation.
23
50
  */
@@ -35,9 +62,17 @@ export interface ChatOptions {
35
62
  temperature?: number;
36
63
  }
37
64
  /**
38
- * A function that sends messages to a chat completion provider and returns the response.
65
+ * Result from a chat completion call, including real token usage.
66
+ */
67
+ export interface ChatResult {
68
+ content: string;
69
+ usage: LlmUsage;
70
+ }
71
+ /**
72
+ * A function that sends messages to a chat completion provider
73
+ * and returns the response with token usage metadata.
39
74
  */
40
- export type ChatFunction = (messages: ChatMessage[], options?: ChatOptions) => Promise<string>;
75
+ export type ChatFunction = (messages: ChatMessage[], options?: ChatOptions) => Promise<ChatResult>;
41
76
  /**
42
77
  * Configuration for a chat completion provider.
43
78
  */