graphile-llm 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -102,7 +102,7 @@ export function createLlmTextSearchPlugin() {
102
102
  after: [
103
103
  'LlmModulePlugin',
104
104
  'UnifiedSearchPlugin',
105
- 'VectorCodecPlugin',
105
+ 'VectorCodecPlugin'
106
106
  ],
107
107
  schema: {
108
108
  hooks: {
@@ -113,18 +113,18 @@ export function createLlmTextSearchPlugin() {
113
113
  * The field is optional — clients provide either `text` or `vector`.
114
114
  */
115
115
  GraphQLInputObjectType_fields(fields, build, context) {
116
- const { scope: { inputObjectTypeName }, } = context;
116
+ const { scope: { inputObjectTypeName } } = context;
117
117
  if (inputObjectTypeName !== 'VectorNearbyInput') {
118
118
  return fields;
119
119
  }
120
- const { graphql: { GraphQLString }, } = build;
120
+ const { graphql: { GraphQLString } } = build;
121
121
  return build.extend(fields, {
122
122
  text: {
123
123
  type: GraphQLString,
124
124
  description: 'Natural language text to embed server-side for similarity search. ' +
125
125
  'Mutually exclusive with `vector` — provide one or the other. ' +
126
- 'Requires the LLM plugin to be configured with an embedding provider.',
127
- },
126
+ 'Requires the LLM plugin to be configured with an embedding provider.'
127
+ }
128
128
  }, 'LlmTextSearchPlugin adding text field to VectorNearbyInput');
129
129
  },
130
130
  /**
@@ -136,7 +136,7 @@ export function createLlmTextSearchPlugin() {
136
136
  * and graphile-bucket-provisioner-plugin.
137
137
  */
138
138
  GraphQLObjectType_fields_field(field, build, context) {
139
- const { scope: { isRootQuery, pgCodec }, } = context;
139
+ const { scope: { isRootQuery, pgCodec } } = context;
140
140
  // Only wrap root query fields on tables with vector columns
141
141
  if (!isRootQuery || !pgCodec || !hasVectorColumns(pgCodec)) {
142
142
  return field;
@@ -158,7 +158,7 @@ export function createLlmTextSearchPlugin() {
158
158
  await embedTextInWhere(args.filter, embedder);
159
159
  }
160
160
  return oldResolve(source, args, graphqlContext, info);
161
- },
161
+ }
162
162
  };
163
163
  },
164
164
  finalize(schema, build) {
@@ -168,8 +168,8 @@ export function createLlmTextSearchPlugin() {
168
168
  'will return errors if used. Configure an embedding provider to enable.');
169
169
  }
170
170
  return schema;
171
- },
172
- },
173
- },
171
+ }
172
+ }
173
+ }
174
174
  };
175
175
  }
package/esm/preset.js CHANGED
@@ -64,10 +64,10 @@
64
64
  * ```
65
65
  */
66
66
  import { createLlmModulePlugin } from './plugins/llm-module-plugin';
67
- import { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
68
- import { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
69
- import { createLlmRagPlugin } from './plugins/rag-plugin';
70
67
  import { createLlmMeteringPlugin } from './plugins/metering-plugin';
68
+ import { createLlmRagPlugin } from './plugins/rag-plugin';
69
+ import { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
70
+ import { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
71
71
  /**
72
72
  * Creates a preset that includes all LLM plugins.
73
73
  *
@@ -75,9 +75,9 @@ import { createLlmMeteringPlugin } from './plugins/metering-plugin';
75
75
  * @returns A GraphileConfig.Preset to add to your extends array
76
76
  */
77
77
  export function GraphileLlmPreset(options = {}) {
78
- const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, metering, } = options;
78
+ const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, metering } = options;
79
79
  const plugins = [
80
- createLlmModulePlugin(options),
80
+ createLlmModulePlugin(options)
81
81
  ];
82
82
  // Metering is opt-in: only loaded when metering is truthy
83
83
  // (true, or a MeteringConfig object)
@@ -95,7 +95,7 @@ export function GraphileLlmPreset(options = {}) {
95
95
  plugins.push(createLlmRagPlugin(ragDefaults));
96
96
  }
97
97
  return {
98
- plugins,
98
+ plugins
99
99
  };
100
100
  }
101
101
  export default GraphileLlmPreset;
package/esm/types.d.ts CHANGED
@@ -4,9 +4,18 @@
4
4
  * Shared type definitions for the LLM plugin.
5
5
  */
6
6
  /**
7
- * A function that converts text into a vector embedding.
7
+ * Result from an embedding call, including real token usage from the provider.
8
8
  */
9
- export type EmbedderFunction = (text: string) => Promise<number[]>;
9
+ export interface EmbeddingResult {
10
+ /** The vector embedding */
11
+ embedding: number[];
12
+ /** Number of prompt tokens consumed (from provider; 0 if unavailable) */
13
+ promptTokens: number;
14
+ }
15
+ /**
16
+ * A function that converts text into a vector embedding with token usage.
17
+ */
18
+ export type EmbedderFunction = (text: string) => Promise<EmbeddingResult>;
10
19
  /**
11
20
  * Configuration for an embedding provider.
12
21
  */
@@ -18,6 +27,24 @@ export interface EmbedderConfig {
18
27
  /** Base URL for the provider (e.g. 'http://localhost:11434' for Ollama) */
19
28
  baseUrl?: string;
20
29
  }
30
+ /**
31
+ * Token usage metadata returned by LLM providers.
32
+ * Maps to the billing schema's inference_log columns.
33
+ */
34
+ export interface LlmUsage {
35
+ /** Prompt / input tokens consumed */
36
+ input: number;
37
+ /** Completion / output tokens generated (includes reasoning for providers that count it) */
38
+ output: number;
39
+ /** Reasoning tokens (subset of output — not additive) */
40
+ reasoning: number;
41
+ /** Tokens served from prompt cache (zero cost) */
42
+ cacheRead: number;
43
+ /** Tokens written to prompt cache */
44
+ cacheWrite: number;
45
+ /** input + output + cacheRead + cacheWrite */
46
+ totalTokens: number;
47
+ }
21
48
  /**
22
49
  * A single message in a chat conversation.
23
50
  */
@@ -35,9 +62,17 @@ export interface ChatOptions {
35
62
  temperature?: number;
36
63
  }
37
64
  /**
38
- * A function that sends messages to a chat completion provider and returns the response.
65
+ * Result from a chat completion call, including real token usage.
66
+ */
67
+ export interface ChatResult {
68
+ content: string;
69
+ usage: LlmUsage;
70
+ }
71
+ /**
72
+ * A function that sends messages to a chat completion provider
73
+ * and returns the response with token usage metadata.
39
74
  */
40
- export type ChatFunction = (messages: ChatMessage[], options?: ChatOptions) => Promise<string>;
75
+ export type ChatFunction = (messages: ChatMessage[], options?: ChatOptions) => Promise<ChatResult>;
41
76
  /**
42
77
  * Configuration for a chat completion provider.
43
78
  */
package/index.d.ts CHANGED
@@ -29,20 +29,20 @@
29
29
  * };
30
30
  * ```
31
31
  */
32
- export { getLlmEnvOptions } from './env';
33
32
  export type { LlmEnvOptions, LlmProviderConfig } from './env';
33
+ export { getLlmEnvOptions } from './env';
34
34
  export { GraphileLlmPreset } from './preset';
35
35
  export { createLlmModulePlugin } from './plugins/llm-module-plugin';
36
- export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
37
- export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
38
36
  export { createLlmRagPlugin } from './plugins/rag-plugin';
37
+ export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
38
+ export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
39
39
  export { createLlmMeteringPlugin } from './plugins/metering-plugin';
40
- export { getAgentDiscovery, clearAgentDiscoveryCache } from './plugins/agent-discovery-plugin';
41
- export type { AgentTableInfo, AgentDiscovery } from './plugins/agent-discovery-plugin';
42
- export { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from './embedder';
43
- export { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from './chat';
44
- export { meteredEmbed, meteredChat, logInferenceUsage, QuotaExceededError } from './metering';
45
- export type { MeteringContext, MeteringOptions, MeterResult, WithPgClient, InferenceLogEntry } from './metering';
46
- export { getLlmBillingConfig, invalidateLlmBillingConfig, getLlmBillingCacheStats, } from './config-cache';
47
- export type { BillingConfig, LlmBillingCacheEntry, InferenceLogConfig, PgClient } from './config-cache';
48
- export type { EmbedderFunction, EmbedderConfig, ChatFunction, ChatConfig, ChatMessage, ChatOptions, LlmModuleData, GraphileLlmOptions, MeteringConfig, RagDefaults, ChunkTableInfo, } from './types';
40
+ export type { AgentDiscovery, AgentTableInfo } from './plugins/agent-discovery-plugin';
41
+ export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
42
+ export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
43
+ export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
44
+ export type { InferenceLogEntry, MeteringContext, MeteringOptions, MeterResult, WithPgClient } from './metering';
45
+ export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
46
+ export type { BillingConfig, InferenceLogConfig, LlmBillingCacheEntry, PgClient } from './config-cache';
47
+ export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';
48
+ export type { ChatConfig, ChatFunction, ChatMessage, ChatOptions, ChatResult, ChunkTableInfo, EmbedderConfig, EmbedderFunction, EmbeddingResult, GraphileLlmOptions, LlmModuleData, LlmUsage, MeteringConfig, RagDefaults } from './types';
package/index.js CHANGED
@@ -31,8 +31,7 @@
31
31
  * ```
32
32
  */
33
33
  Object.defineProperty(exports, "__esModule", { value: true });
34
- exports.getLlmBillingCacheStats = exports.invalidateLlmBillingConfig = exports.getLlmBillingConfig = exports.QuotaExceededError = exports.logInferenceUsage = exports.meteredChat = exports.meteredEmbed = exports.buildChatCompleterFromEnv = exports.buildChatCompleterFromModule = exports.buildChatCompleter = exports.buildEmbedderFromEnv = exports.buildEmbedderFromModule = exports.buildEmbedder = exports.clearAgentDiscoveryCache = exports.getAgentDiscovery = exports.createLlmMeteringPlugin = exports.createLlmRagPlugin = exports.createLlmTextMutationPlugin = exports.createLlmTextSearchPlugin = exports.createLlmModulePlugin = exports.GraphileLlmPreset = exports.getLlmEnvOptions = void 0;
35
- // Environment configuration (single source of truth for LLM defaults)
34
+ exports.invalidateLlmBillingConfig = exports.getLlmBillingConfig = exports.getLlmBillingCacheStats = exports.QuotaExceededError = exports.meteredEmbed = exports.meteredChat = exports.logInferenceUsage = exports.buildChatCompleterFromModule = exports.buildChatCompleterFromEnv = exports.buildChatCompleter = exports.buildEmbedderFromModule = exports.buildEmbedderFromEnv = exports.buildEmbedder = exports.getAgentDiscovery = exports.clearAgentDiscoveryCache = exports.createLlmMeteringPlugin = exports.createLlmTextSearchPlugin = exports.createLlmTextMutationPlugin = exports.createLlmRagPlugin = exports.createLlmModulePlugin = exports.GraphileLlmPreset = exports.getLlmEnvOptions = void 0;
36
35
  var env_1 = require("./env");
37
36
  Object.defineProperty(exports, "getLlmEnvOptions", { enumerable: true, get: function () { return env_1.getLlmEnvOptions; } });
38
37
  // Preset (recommended entry point)
@@ -41,37 +40,34 @@ Object.defineProperty(exports, "GraphileLlmPreset", { enumerable: true, get: fun
41
40
  // Individual plugins (pure — no billing dependency)
42
41
  var llm_module_plugin_1 = require("./plugins/llm-module-plugin");
43
42
  Object.defineProperty(exports, "createLlmModulePlugin", { enumerable: true, get: function () { return llm_module_plugin_1.createLlmModulePlugin; } });
44
- var text_search_plugin_1 = require("./plugins/text-search-plugin");
45
- Object.defineProperty(exports, "createLlmTextSearchPlugin", { enumerable: true, get: function () { return text_search_plugin_1.createLlmTextSearchPlugin; } });
46
- var text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
47
- Object.defineProperty(exports, "createLlmTextMutationPlugin", { enumerable: true, get: function () { return text_mutation_plugin_1.createLlmTextMutationPlugin; } });
48
43
  var rag_plugin_1 = require("./plugins/rag-plugin");
49
44
  Object.defineProperty(exports, "createLlmRagPlugin", { enumerable: true, get: function () { return rag_plugin_1.createLlmRagPlugin; } });
45
+ var text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
46
+ Object.defineProperty(exports, "createLlmTextMutationPlugin", { enumerable: true, get: function () { return text_mutation_plugin_1.createLlmTextMutationPlugin; } });
47
+ var text_search_plugin_1 = require("./plugins/text-search-plugin");
48
+ Object.defineProperty(exports, "createLlmTextSearchPlugin", { enumerable: true, get: function () { return text_search_plugin_1.createLlmTextSearchPlugin; } });
50
49
  // Metering plugin (opt-in billing integration)
51
50
  var metering_plugin_1 = require("./plugins/metering-plugin");
52
51
  Object.defineProperty(exports, "createLlmMeteringPlugin", { enumerable: true, get: function () { return metering_plugin_1.createLlmMeteringPlugin; } });
53
- // Agent discovery (queries agent_chat_module config table at runtime)
54
52
  var agent_discovery_plugin_1 = require("./plugins/agent-discovery-plugin");
55
- Object.defineProperty(exports, "getAgentDiscovery", { enumerable: true, get: function () { return agent_discovery_plugin_1.getAgentDiscovery; } });
56
53
  Object.defineProperty(exports, "clearAgentDiscoveryCache", { enumerable: true, get: function () { return agent_discovery_plugin_1.clearAgentDiscoveryCache; } });
54
+ Object.defineProperty(exports, "getAgentDiscovery", { enumerable: true, get: function () { return agent_discovery_plugin_1.getAgentDiscovery; } });
57
55
  // Embedder utilities
58
56
  var embedder_1 = require("./embedder");
59
57
  Object.defineProperty(exports, "buildEmbedder", { enumerable: true, get: function () { return embedder_1.buildEmbedder; } });
60
- Object.defineProperty(exports, "buildEmbedderFromModule", { enumerable: true, get: function () { return embedder_1.buildEmbedderFromModule; } });
61
58
  Object.defineProperty(exports, "buildEmbedderFromEnv", { enumerable: true, get: function () { return embedder_1.buildEmbedderFromEnv; } });
59
+ Object.defineProperty(exports, "buildEmbedderFromModule", { enumerable: true, get: function () { return embedder_1.buildEmbedderFromModule; } });
62
60
  // Chat completion utilities
63
61
  var chat_1 = require("./chat");
64
62
  Object.defineProperty(exports, "buildChatCompleter", { enumerable: true, get: function () { return chat_1.buildChatCompleter; } });
65
- Object.defineProperty(exports, "buildChatCompleterFromModule", { enumerable: true, get: function () { return chat_1.buildChatCompleterFromModule; } });
66
63
  Object.defineProperty(exports, "buildChatCompleterFromEnv", { enumerable: true, get: function () { return chat_1.buildChatCompleterFromEnv; } });
67
- // Metering utilities (for custom integration)
64
+ Object.defineProperty(exports, "buildChatCompleterFromModule", { enumerable: true, get: function () { return chat_1.buildChatCompleterFromModule; } });
68
65
  var metering_1 = require("./metering");
69
- Object.defineProperty(exports, "meteredEmbed", { enumerable: true, get: function () { return metering_1.meteredEmbed; } });
70
- Object.defineProperty(exports, "meteredChat", { enumerable: true, get: function () { return metering_1.meteredChat; } });
71
66
  Object.defineProperty(exports, "logInferenceUsage", { enumerable: true, get: function () { return metering_1.logInferenceUsage; } });
67
+ Object.defineProperty(exports, "meteredChat", { enumerable: true, get: function () { return metering_1.meteredChat; } });
68
+ Object.defineProperty(exports, "meteredEmbed", { enumerable: true, get: function () { return metering_1.meteredEmbed; } });
72
69
  Object.defineProperty(exports, "QuotaExceededError", { enumerable: true, get: function () { return metering_1.QuotaExceededError; } });
73
- // Config cache (for custom integration)
74
70
  var config_cache_1 = require("./config-cache");
71
+ Object.defineProperty(exports, "getLlmBillingCacheStats", { enumerable: true, get: function () { return config_cache_1.getLlmBillingCacheStats; } });
75
72
  Object.defineProperty(exports, "getLlmBillingConfig", { enumerable: true, get: function () { return config_cache_1.getLlmBillingConfig; } });
76
73
  Object.defineProperty(exports, "invalidateLlmBillingConfig", { enumerable: true, get: function () { return config_cache_1.invalidateLlmBillingConfig; } });
77
- Object.defineProperty(exports, "getLlmBillingCacheStats", { enumerable: true, get: function () { return config_cache_1.getLlmBillingCacheStats; } });
package/metering.d.ts CHANGED
@@ -9,16 +9,16 @@
9
9
  * When the quota check fails, the wrapper returns null (graceful degradation)
10
10
  * instead of throwing, so the search pipeline can fall back to text-only.
11
11
  *
12
- * Token counts are estimated from text length (~4 chars per token). No
13
- * tokenizer needed the billing system uses tokens as abstract units
14
- * and the credit_cost on each model's meter normalizes the relative expense.
12
+ * Token counts:
13
+ * - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
14
+ * - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
15
15
  *
16
16
  * The billing functions live in the tenant database and are called via the
17
17
  * Graphile `withPgClient` callback. Function locations (schema, names) are
18
18
  * resolved from `billing_module` metaschema and cached by `config-cache.ts`.
19
19
  */
20
- import type { PgClient, BillingConfig, InferenceLogConfig } from './config-cache';
21
- import type { EmbedderFunction, ChatFunction, ChatMessage, ChatOptions } from './types';
20
+ import type { BillingConfig, InferenceLogConfig, PgClient } from './config-cache';
21
+ import type { ChatFunction, ChatMessage, ChatOptions, EmbedderFunction } from './types';
22
22
  /**
23
23
  * Callback matching Graphile's withPgClient signature.
24
24
  * Acquires a pg client, calls the callback, then releases the client.
package/metering.js CHANGED
@@ -10,9 +10,9 @@
10
10
  * When the quota check fails, the wrapper returns null (graceful degradation)
11
11
  * instead of throwing, so the search pipeline can fall back to text-only.
12
12
  *
13
- * Token counts are estimated from text length (~4 chars per token). No
14
- * tokenizer needed the billing system uses tokens as abstract units
15
- * and the credit_cost on each model's meter normalizes the relative expense.
13
+ * Token counts:
14
+ * - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
15
+ * - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
16
16
  *
17
17
  * The billing functions live in the tenant database and are called via the
18
18
  * Graphile `withPgClient` callback. Function locations (schema, names) are
@@ -94,7 +94,7 @@ async function logInferenceUsage(ctx, entry) {
94
94
  entry.latencyMs, entry.ragEnabled, entry.chunksRetrieved,
95
95
  entry.embeddingModel, entry.embeddingLatencyMs,
96
96
  entry.status, entry.errorType,
97
- entry.rawUsage ? JSON.stringify(entry.rawUsage) : null,
97
+ entry.rawUsage ? JSON.stringify(entry.rawUsage) : null
98
98
  ]);
99
99
  });
100
100
  }
@@ -114,31 +114,31 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
114
114
  const startTime = Date.now();
115
115
  // No billing context → just embed without metering
116
116
  if (!ctx) {
117
- const result = await embedder(text);
117
+ const { embedding } = await embedder(text);
118
118
  return {
119
- result,
119
+ result: embedding,
120
120
  metered: false,
121
121
  quotaExceeded: false,
122
- latencyMs: Date.now() - startTime,
122
+ latencyMs: Date.now() - startTime
123
123
  };
124
124
  }
125
125
  const meterSlug = options.embeddingMeterSlug;
126
126
  if (!meterSlug) {
127
- const result = await embedder(text);
127
+ const { embedding } = await embedder(text);
128
128
  return {
129
- result,
129
+ result: embedding,
130
130
  metered: false,
131
131
  quotaExceeded: false,
132
- latencyMs: Date.now() - startTime,
132
+ latencyMs: Date.now() - startTime
133
133
  };
134
134
  }
135
135
  if (options.skipMetering) {
136
- const result = await embedder(text);
136
+ const { embedding } = await embedder(text);
137
137
  return {
138
- result,
138
+ result: embedding,
139
139
  metered: false,
140
140
  quotaExceeded: false,
141
- latencyMs: Date.now() - startTime,
141
+ latencyMs: Date.now() - startTime
142
142
  };
143
143
  }
144
144
  // Pre-check: can this entity afford this call?
@@ -152,8 +152,6 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
152
152
  allowed = true;
153
153
  }
154
154
  if (!allowed) {
155
- // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
156
- const placeholderAmountTokens = Math.ceil(text.length / 4);
157
155
  logInferenceUsage(ctx, {
158
156
  databaseId: ctx.databaseId,
159
157
  entityId: ctx.entityId,
@@ -162,9 +160,9 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
162
160
  provider: options.provider ?? null,
163
161
  service: 'embedding',
164
162
  operation: 'create',
165
- inputTokens: placeholderAmountTokens,
163
+ inputTokens: 0,
166
164
  outputTokens: 0,
167
- totalTokens: placeholderAmountTokens,
165
+ totalTokens: 0,
168
166
  cacheReadTokens: null,
169
167
  cacheWriteTokens: null,
170
168
  latencyMs: Date.now() - startTime,
@@ -174,26 +172,25 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
174
172
  embeddingLatencyMs: null,
175
173
  status: 'quota_exceeded',
176
174
  errorType: null,
177
- rawUsage: null,
175
+ rawUsage: null
178
176
  }).catch(() => { });
179
177
  return {
180
178
  result: null,
181
179
  metered: true,
182
180
  quotaExceeded: true,
183
- latencyMs: Date.now() - startTime,
181
+ latencyMs: Date.now() - startTime
184
182
  };
185
183
  }
186
- // Execute embedding
187
- const result = await embedder(text);
184
+ // Execute embedding — real token count from provider via EmbeddingResult
185
+ const { embedding, promptTokens } = await embedder(text);
188
186
  const latencyMs = Date.now() - startTime;
189
- // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
190
- const placeholderAmountTokens = Math.ceil(text.length / 4);
191
187
  ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
192
- await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, text.length, {
188
+ await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, promptTokens, {
193
189
  request_id: ctx.requestId,
194
190
  input_chars: text.length,
195
- dims: result.length,
196
- latency_ms: latencyMs,
191
+ prompt_tokens: promptTokens,
192
+ dims: embedding.length,
193
+ latency_ms: latencyMs
197
194
  });
198
195
  }).catch(() => { });
199
196
  // Log to inference usage table
@@ -205,9 +202,9 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
205
202
  provider: options.provider ?? null,
206
203
  service: 'embedding',
207
204
  operation: 'create',
208
- inputTokens: placeholderAmountTokens,
205
+ inputTokens: promptTokens,
209
206
  outputTokens: 0,
210
- totalTokens: placeholderAmountTokens,
207
+ totalTokens: promptTokens,
211
208
  cacheReadTokens: null,
212
209
  cacheWriteTokens: null,
213
210
  latencyMs,
@@ -217,13 +214,13 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
217
214
  embeddingLatencyMs: latencyMs,
218
215
  status: 'success',
219
216
  errorType: null,
220
- rawUsage: null,
217
+ rawUsage: { prompt_tokens: promptTokens }
221
218
  }).catch(() => { });
222
219
  return {
223
- result,
220
+ result: embedding,
224
221
  metered: true,
225
222
  quotaExceeded: false,
226
- latencyMs,
223
+ latencyMs
227
224
  };
228
225
  }
229
226
  // ─── Metered Chat ───────────────────────────────────────────────────────────
@@ -233,31 +230,31 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
233
230
  async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {}) {
234
231
  const startTime = Date.now();
235
232
  if (!ctx) {
236
- const result = await chat(messages, chatOptions);
233
+ const chatResult = await chat(messages, chatOptions);
237
234
  return {
238
- result,
235
+ result: chatResult.content,
239
236
  metered: false,
240
237
  quotaExceeded: false,
241
- latencyMs: Date.now() - startTime,
238
+ latencyMs: Date.now() - startTime
242
239
  };
243
240
  }
244
241
  const meterSlug = meteringOptions.chatMeterSlug;
245
242
  if (!meterSlug) {
246
- const result = await chat(messages, chatOptions);
243
+ const chatResult = await chat(messages, chatOptions);
247
244
  return {
248
- result,
245
+ result: chatResult.content,
249
246
  metered: false,
250
247
  quotaExceeded: false,
251
- latencyMs: Date.now() - startTime,
248
+ latencyMs: Date.now() - startTime
252
249
  };
253
250
  }
254
251
  if (meteringOptions.skipMetering) {
255
- const result = await chat(messages, chatOptions);
252
+ const chatResult = await chat(messages, chatOptions);
256
253
  return {
257
- result,
254
+ result: chatResult.content,
258
255
  metered: false,
259
256
  quotaExceeded: false,
260
- latencyMs: Date.now() - startTime,
257
+ latencyMs: Date.now() - startTime
261
258
  };
262
259
  }
263
260
  // Pre-check: can this entity afford this call?
@@ -271,8 +268,7 @@ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {
271
268
  allowed = true;
272
269
  }
273
270
  if (!allowed) {
274
- // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
275
- const placeholderInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
271
+ const estimatedInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
276
272
  logInferenceUsage(ctx, {
277
273
  databaseId: ctx.databaseId,
278
274
  entityId: ctx.entityId,
@@ -281,9 +277,9 @@ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {
281
277
  provider: meteringOptions.provider ?? null,
282
278
  service: 'llm',
283
279
  operation: 'chat',
284
- inputTokens: placeholderInputTokens,
280
+ inputTokens: estimatedInputTokens,
285
281
  outputTokens: 0,
286
- totalTokens: placeholderInputTokens,
282
+ totalTokens: estimatedInputTokens,
287
283
  cacheReadTokens: null,
288
284
  cacheWriteTokens: null,
289
285
  latencyMs: Date.now() - startTime,
@@ -293,33 +289,31 @@ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {
293
289
  embeddingLatencyMs: null,
294
290
  status: 'quota_exceeded',
295
291
  errorType: null,
296
- rawUsage: null,
292
+ rawUsage: null
297
293
  }).catch(() => { });
298
294
  return {
299
295
  result: null,
300
296
  metered: true,
301
297
  quotaExceeded: true,
302
- latencyMs: Date.now() - startTime,
298
+ latencyMs: Date.now() - startTime
303
299
  };
304
300
  }
305
- // Execute chat completion
306
- const result = await chat(messages, chatOptions);
301
+ // Execute chat completion — returns real token usage from provider
302
+ const chatResult = await chat(messages, chatOptions);
307
303
  const latencyMs = Date.now() - startTime;
308
- // Placeholder: replace with actual provider token counts once generateWithUsage() is approved
309
- const inputChars = messages.reduce((sum, m) => sum + m.content.length, 0);
310
- const placeholderInputTokens = Math.ceil(inputChars / 4);
311
- const placeholderOutputTokens = Math.ceil(result.length / 4);
312
- const placeholderTotalTokens = placeholderInputTokens + placeholderOutputTokens;
304
+ const usage = chatResult.usage;
313
305
  ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
314
- await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, inputChars + result.length, {
306
+ await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, usage.totalTokens, {
315
307
  request_id: ctx.requestId,
316
- input_chars: inputChars,
317
- output_chars: result.length,
308
+ input_tokens: usage.input,
309
+ output_tokens: usage.output,
310
+ cache_read_tokens: usage.cacheRead,
311
+ cache_write_tokens: usage.cacheWrite,
318
312
  messages_count: messages.length,
319
- latency_ms: latencyMs,
313
+ latency_ms: latencyMs
320
314
  });
321
315
  }).catch(() => { });
322
- // Log to inference usage table
316
+ // Log to inference usage table with real provider token counts
323
317
  logInferenceUsage(ctx, {
324
318
  databaseId: ctx.databaseId,
325
319
  entityId: ctx.entityId,
@@ -328,11 +322,11 @@ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {
328
322
  provider: meteringOptions.provider ?? null,
329
323
  service: 'llm',
330
324
  operation: 'chat',
331
- inputTokens: placeholderInputTokens,
332
- outputTokens: placeholderOutputTokens,
333
- totalTokens: placeholderTotalTokens,
334
- cacheReadTokens: null,
335
- cacheWriteTokens: null,
325
+ inputTokens: usage.input,
326
+ outputTokens: usage.output,
327
+ totalTokens: usage.totalTokens,
328
+ cacheReadTokens: usage.cacheRead || null,
329
+ cacheWriteTokens: usage.cacheWrite || null,
336
330
  latencyMs,
337
331
  ragEnabled: false,
338
332
  chunksRetrieved: null,
@@ -340,13 +334,13 @@ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {
340
334
  embeddingLatencyMs: null,
341
335
  status: 'success',
342
336
  errorType: null,
343
- rawUsage: null,
337
+ rawUsage: { reasoning: usage.reasoning }
344
338
  }).catch(() => { });
345
339
  return {
346
- result,
340
+ result: chatResult.content,
347
341
  metered: true,
348
342
  quotaExceeded: false,
349
- latencyMs,
343
+ latencyMs
350
344
  };
351
345
  }
352
346
  // ─── Error Types ────────────────────────────────────────────────────────────
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "graphile-llm",
3
- "version": "0.8.0",
3
+ "version": "0.9.0",
4
4
  "description": "LLM integration plugin for PostGraphile v5 — server-side text-to-vector embedding and text companion fields for pgvector columns",
5
5
  "author": "Constructive <developers@constructive.io>",
6
6
  "homepage": "https://github.com/constructive-io/constructive",
@@ -29,7 +29,7 @@
29
29
  "url": "https://github.com/constructive-io/constructive/issues"
30
30
  },
31
31
  "dependencies": {
32
- "@agentic-kit/ollama": "^1.2.1",
32
+ "@agentic-kit/ollama": "^2.0.0",
33
33
  "graphile-cache": "^3.11.2"
34
34
  },
35
35
  "peerDependencies": {
@@ -73,5 +73,5 @@
73
73
  "ollama",
74
74
  "openai"
75
75
  ],
76
- "gitHead": "030e1144acbd4e288ee74eff2ac0021ca0382ef7"
76
+ "gitHead": "f3ea414974306e3c0d1d68edc93b4cdd8fa6e806"
77
77
  }
@@ -16,7 +16,7 @@ const graphile_cache_1 = require("graphile-cache");
16
16
  // ─── Cache ──────────────────────────────────────────────────────────────────
17
17
  const agentDiscoveryCache = new graphile_cache_1.ModuleConfigCache({
18
18
  name: 'agent-discovery',
19
- ttlMs: 60_000,
19
+ ttlMs: 60_000
20
20
  });
21
21
  /** Clear all cached discovery results (for testing) */
22
22
  function clearAgentDiscoveryCache() {
@@ -57,7 +57,7 @@ async function getAgentDiscovery(pool, dbname) {
57
57
  : null,
58
58
  task: row.task_table_name
59
59
  ? { schemaName, tableName: row.task_table_name }
60
- : null,
60
+ : null
61
61
  };
62
62
  }
63
63
  }
@@ -21,7 +21,7 @@
21
21
  * if loaded (it runs after this plugin and before the consumer plugins).
22
22
  */
23
23
  import type { GraphileConfig } from 'graphile-config';
24
- import type { EmbedderFunction, ChatFunction, GraphileLlmOptions } from '../types';
24
+ import type { ChatFunction, EmbedderFunction, GraphileLlmOptions } from '../types';
25
25
  declare global {
26
26
  namespace GraphileBuild {
27
27
  interface Build {
@@ -23,8 +23,8 @@
23
23
  */
24
24
  Object.defineProperty(exports, "__esModule", { value: true });
25
25
  exports.createLlmModulePlugin = createLlmModulePlugin;
26
- const embedder_1 = require("../embedder");
27
26
  const chat_1 = require("../chat");
27
+ const embedder_1 = require("../embedder");
28
28
  const env_1 = require("../env");
29
29
  /**
30
30
  * Creates the LlmModulePlugin with the given options.
@@ -84,10 +84,10 @@ function createLlmModulePlugin(options = {}) {
84
84
  llmEmbedder: embedder,
85
85
  llmChatCompleter: chat,
86
86
  llmEmbeddingModel: defaultEmbedder?.model ?? (0, env_1.getLlmEnvOptions)().embedding.model,
87
- llmChatModel: defaultChatCompleter?.model ?? (0, env_1.getLlmEnvOptions)().chat.model,
87
+ llmChatModel: defaultChatCompleter?.model ?? (0, env_1.getLlmEnvOptions)().chat.model
88
88
  }, 'LlmModulePlugin adding llmEmbedder, llmChatCompleter, and model names to build');
89
- },
90
- },
91
- },
89
+ }
90
+ }
91
+ }
92
92
  };
93
93
  }