graphile-llm 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/graphile-llm.test.js +81 -67
- package/chat.js +37 -27
- package/config-cache.js +4 -4
- package/embedder.js +3 -1
- package/env.js +6 -6
- package/esm/__tests__/graphile-llm.test.js +81 -67
- package/esm/chat.js +37 -24
- package/esm/config-cache.js +4 -4
- package/esm/embedder.js +3 -1
- package/esm/env.js +6 -6
- package/esm/index.d.ts +12 -12
- package/esm/index.js +7 -11
- package/esm/metering.d.ts +5 -5
- package/esm/metering.js +60 -66
- package/esm/plugins/agent-discovery-plugin.js +2 -2
- package/esm/plugins/llm-module-plugin.d.ts +1 -1
- package/esm/plugins/llm-module-plugin.js +5 -5
- package/esm/plugins/metering-plugin.js +13 -13
- package/esm/plugins/rag-plugin.js +20 -20
- package/esm/plugins/text-mutation-plugin.js +12 -12
- package/esm/plugins/text-search-plugin.js +10 -10
- package/esm/preset.js +6 -6
- package/esm/types.d.ts +39 -4
- package/index.d.ts +12 -12
- package/index.js +11 -15
- package/metering.d.ts +5 -5
- package/metering.js +60 -66
- package/package.json +8 -8
- package/plugins/agent-discovery-plugin.js +2 -2
- package/plugins/llm-module-plugin.d.ts +1 -1
- package/plugins/llm-module-plugin.js +5 -5
- package/plugins/metering-plugin.js +13 -13
- package/plugins/rag-plugin.js +20 -20
- package/plugins/text-mutation-plugin.js +12 -12
- package/plugins/text-search-plugin.js +10 -10
- package/preset.js +6 -6
- package/types.d.ts +39 -4
|
@@ -102,7 +102,7 @@ export function createLlmTextSearchPlugin() {
|
|
|
102
102
|
after: [
|
|
103
103
|
'LlmModulePlugin',
|
|
104
104
|
'UnifiedSearchPlugin',
|
|
105
|
-
'VectorCodecPlugin'
|
|
105
|
+
'VectorCodecPlugin'
|
|
106
106
|
],
|
|
107
107
|
schema: {
|
|
108
108
|
hooks: {
|
|
@@ -113,18 +113,18 @@ export function createLlmTextSearchPlugin() {
|
|
|
113
113
|
* The field is optional — clients provide either `text` or `vector`.
|
|
114
114
|
*/
|
|
115
115
|
GraphQLInputObjectType_fields(fields, build, context) {
|
|
116
|
-
const { scope: { inputObjectTypeName }
|
|
116
|
+
const { scope: { inputObjectTypeName } } = context;
|
|
117
117
|
if (inputObjectTypeName !== 'VectorNearbyInput') {
|
|
118
118
|
return fields;
|
|
119
119
|
}
|
|
120
|
-
const { graphql: { GraphQLString }
|
|
120
|
+
const { graphql: { GraphQLString } } = build;
|
|
121
121
|
return build.extend(fields, {
|
|
122
122
|
text: {
|
|
123
123
|
type: GraphQLString,
|
|
124
124
|
description: 'Natural language text to embed server-side for similarity search. ' +
|
|
125
125
|
'Mutually exclusive with `vector` — provide one or the other. ' +
|
|
126
|
-
'Requires the LLM plugin to be configured with an embedding provider.'
|
|
127
|
-
}
|
|
126
|
+
'Requires the LLM plugin to be configured with an embedding provider.'
|
|
127
|
+
}
|
|
128
128
|
}, 'LlmTextSearchPlugin adding text field to VectorNearbyInput');
|
|
129
129
|
},
|
|
130
130
|
/**
|
|
@@ -136,7 +136,7 @@ export function createLlmTextSearchPlugin() {
|
|
|
136
136
|
* and graphile-bucket-provisioner-plugin.
|
|
137
137
|
*/
|
|
138
138
|
GraphQLObjectType_fields_field(field, build, context) {
|
|
139
|
-
const { scope: { isRootQuery, pgCodec }
|
|
139
|
+
const { scope: { isRootQuery, pgCodec } } = context;
|
|
140
140
|
// Only wrap root query fields on tables with vector columns
|
|
141
141
|
if (!isRootQuery || !pgCodec || !hasVectorColumns(pgCodec)) {
|
|
142
142
|
return field;
|
|
@@ -158,7 +158,7 @@ export function createLlmTextSearchPlugin() {
|
|
|
158
158
|
await embedTextInWhere(args.filter, embedder);
|
|
159
159
|
}
|
|
160
160
|
return oldResolve(source, args, graphqlContext, info);
|
|
161
|
-
}
|
|
161
|
+
}
|
|
162
162
|
};
|
|
163
163
|
},
|
|
164
164
|
finalize(schema, build) {
|
|
@@ -168,8 +168,8 @@ export function createLlmTextSearchPlugin() {
|
|
|
168
168
|
'will return errors if used. Configure an embedding provider to enable.');
|
|
169
169
|
}
|
|
170
170
|
return schema;
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
174
|
};
|
|
175
175
|
}
|
package/esm/preset.js
CHANGED
|
@@ -64,10 +64,10 @@
|
|
|
64
64
|
* ```
|
|
65
65
|
*/
|
|
66
66
|
import { createLlmModulePlugin } from './plugins/llm-module-plugin';
|
|
67
|
-
import { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
|
|
68
|
-
import { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
|
|
69
|
-
import { createLlmRagPlugin } from './plugins/rag-plugin';
|
|
70
67
|
import { createLlmMeteringPlugin } from './plugins/metering-plugin';
|
|
68
|
+
import { createLlmRagPlugin } from './plugins/rag-plugin';
|
|
69
|
+
import { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
|
|
70
|
+
import { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
|
|
71
71
|
/**
|
|
72
72
|
* Creates a preset that includes all LLM plugins.
|
|
73
73
|
*
|
|
@@ -75,9 +75,9 @@ import { createLlmMeteringPlugin } from './plugins/metering-plugin';
|
|
|
75
75
|
* @returns A GraphileConfig.Preset to add to your extends array
|
|
76
76
|
*/
|
|
77
77
|
export function GraphileLlmPreset(options = {}) {
|
|
78
|
-
const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, metering
|
|
78
|
+
const { enableTextSearch = true, enableTextMutations = true, enableRag = false, ragDefaults, metering } = options;
|
|
79
79
|
const plugins = [
|
|
80
|
-
createLlmModulePlugin(options)
|
|
80
|
+
createLlmModulePlugin(options)
|
|
81
81
|
];
|
|
82
82
|
// Metering is opt-in: only loaded when metering is truthy
|
|
83
83
|
// (true, or a MeteringConfig object)
|
|
@@ -95,7 +95,7 @@ export function GraphileLlmPreset(options = {}) {
|
|
|
95
95
|
plugins.push(createLlmRagPlugin(ragDefaults));
|
|
96
96
|
}
|
|
97
97
|
return {
|
|
98
|
-
plugins
|
|
98
|
+
plugins
|
|
99
99
|
};
|
|
100
100
|
}
|
|
101
101
|
export default GraphileLlmPreset;
|
package/esm/types.d.ts
CHANGED
|
@@ -4,9 +4,18 @@
|
|
|
4
4
|
* Shared type definitions for the LLM plugin.
|
|
5
5
|
*/
|
|
6
6
|
/**
|
|
7
|
-
*
|
|
7
|
+
* Result from an embedding call, including real token usage from the provider.
|
|
8
8
|
*/
|
|
9
|
-
export
|
|
9
|
+
export interface EmbeddingResult {
|
|
10
|
+
/** The vector embedding */
|
|
11
|
+
embedding: number[];
|
|
12
|
+
/** Number of prompt tokens consumed (from provider; 0 if unavailable) */
|
|
13
|
+
promptTokens: number;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* A function that converts text into a vector embedding with token usage.
|
|
17
|
+
*/
|
|
18
|
+
export type EmbedderFunction = (text: string) => Promise<EmbeddingResult>;
|
|
10
19
|
/**
|
|
11
20
|
* Configuration for an embedding provider.
|
|
12
21
|
*/
|
|
@@ -18,6 +27,24 @@ export interface EmbedderConfig {
|
|
|
18
27
|
/** Base URL for the provider (e.g. 'http://localhost:11434' for Ollama) */
|
|
19
28
|
baseUrl?: string;
|
|
20
29
|
}
|
|
30
|
+
/**
|
|
31
|
+
* Token usage metadata returned by LLM providers.
|
|
32
|
+
* Maps to the billing schema's inference_log columns.
|
|
33
|
+
*/
|
|
34
|
+
export interface LlmUsage {
|
|
35
|
+
/** Prompt / input tokens consumed */
|
|
36
|
+
input: number;
|
|
37
|
+
/** Completion / output tokens generated (includes reasoning for providers that count it) */
|
|
38
|
+
output: number;
|
|
39
|
+
/** Reasoning tokens (subset of output — not additive) */
|
|
40
|
+
reasoning: number;
|
|
41
|
+
/** Tokens served from prompt cache (zero cost) */
|
|
42
|
+
cacheRead: number;
|
|
43
|
+
/** Tokens written to prompt cache */
|
|
44
|
+
cacheWrite: number;
|
|
45
|
+
/** input + output + cacheRead + cacheWrite */
|
|
46
|
+
totalTokens: number;
|
|
47
|
+
}
|
|
21
48
|
/**
|
|
22
49
|
* A single message in a chat conversation.
|
|
23
50
|
*/
|
|
@@ -35,9 +62,17 @@ export interface ChatOptions {
|
|
|
35
62
|
temperature?: number;
|
|
36
63
|
}
|
|
37
64
|
/**
|
|
38
|
-
*
|
|
65
|
+
* Result from a chat completion call, including real token usage.
|
|
66
|
+
*/
|
|
67
|
+
export interface ChatResult {
|
|
68
|
+
content: string;
|
|
69
|
+
usage: LlmUsage;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* A function that sends messages to a chat completion provider
|
|
73
|
+
* and returns the response with token usage metadata.
|
|
39
74
|
*/
|
|
40
|
-
export type ChatFunction = (messages: ChatMessage[], options?: ChatOptions) => Promise<
|
|
75
|
+
export type ChatFunction = (messages: ChatMessage[], options?: ChatOptions) => Promise<ChatResult>;
|
|
41
76
|
/**
|
|
42
77
|
* Configuration for a chat completion provider.
|
|
43
78
|
*/
|
package/index.d.ts
CHANGED
|
@@ -29,20 +29,20 @@
|
|
|
29
29
|
* };
|
|
30
30
|
* ```
|
|
31
31
|
*/
|
|
32
|
-
export { getLlmEnvOptions } from './env';
|
|
33
32
|
export type { LlmEnvOptions, LlmProviderConfig } from './env';
|
|
33
|
+
export { getLlmEnvOptions } from './env';
|
|
34
34
|
export { GraphileLlmPreset } from './preset';
|
|
35
35
|
export { createLlmModulePlugin } from './plugins/llm-module-plugin';
|
|
36
|
-
export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
|
|
37
|
-
export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
|
|
38
36
|
export { createLlmRagPlugin } from './plugins/rag-plugin';
|
|
37
|
+
export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
|
|
38
|
+
export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
|
|
39
39
|
export { createLlmMeteringPlugin } from './plugins/metering-plugin';
|
|
40
|
-
export {
|
|
41
|
-
export
|
|
42
|
-
export { buildEmbedder,
|
|
43
|
-
export { buildChatCompleter,
|
|
44
|
-
export {
|
|
45
|
-
export
|
|
46
|
-
export {
|
|
47
|
-
export
|
|
48
|
-
export type {
|
|
40
|
+
export type { AgentDiscovery, AgentTableInfo } from './plugins/agent-discovery-plugin';
|
|
41
|
+
export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
|
|
42
|
+
export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
|
|
43
|
+
export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
|
|
44
|
+
export type { InferenceLogEntry, MeteringContext, MeteringOptions, MeterResult, WithPgClient } from './metering';
|
|
45
|
+
export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
|
|
46
|
+
export type { BillingConfig, InferenceLogConfig, LlmBillingCacheEntry, PgClient } from './config-cache';
|
|
47
|
+
export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';
|
|
48
|
+
export type { ChatConfig, ChatFunction, ChatMessage, ChatOptions, ChatResult, ChunkTableInfo, EmbedderConfig, EmbedderFunction, EmbeddingResult, GraphileLlmOptions, LlmModuleData, LlmUsage, MeteringConfig, RagDefaults } from './types';
|
package/index.js
CHANGED
|
@@ -31,8 +31,7 @@
|
|
|
31
31
|
* ```
|
|
32
32
|
*/
|
|
33
33
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
34
|
-
exports.
|
|
35
|
-
// Environment configuration (single source of truth for LLM defaults)
|
|
34
|
+
exports.invalidateLlmBillingConfig = exports.getLlmBillingConfig = exports.getLlmBillingCacheStats = exports.QuotaExceededError = exports.meteredEmbed = exports.meteredChat = exports.logInferenceUsage = exports.buildChatCompleterFromModule = exports.buildChatCompleterFromEnv = exports.buildChatCompleter = exports.buildEmbedderFromModule = exports.buildEmbedderFromEnv = exports.buildEmbedder = exports.getAgentDiscovery = exports.clearAgentDiscoveryCache = exports.createLlmMeteringPlugin = exports.createLlmTextSearchPlugin = exports.createLlmTextMutationPlugin = exports.createLlmRagPlugin = exports.createLlmModulePlugin = exports.GraphileLlmPreset = exports.getLlmEnvOptions = void 0;
|
|
36
35
|
var env_1 = require("./env");
|
|
37
36
|
Object.defineProperty(exports, "getLlmEnvOptions", { enumerable: true, get: function () { return env_1.getLlmEnvOptions; } });
|
|
38
37
|
// Preset (recommended entry point)
|
|
@@ -41,37 +40,34 @@ Object.defineProperty(exports, "GraphileLlmPreset", { enumerable: true, get: fun
|
|
|
41
40
|
// Individual plugins (pure — no billing dependency)
|
|
42
41
|
var llm_module_plugin_1 = require("./plugins/llm-module-plugin");
|
|
43
42
|
Object.defineProperty(exports, "createLlmModulePlugin", { enumerable: true, get: function () { return llm_module_plugin_1.createLlmModulePlugin; } });
|
|
44
|
-
var text_search_plugin_1 = require("./plugins/text-search-plugin");
|
|
45
|
-
Object.defineProperty(exports, "createLlmTextSearchPlugin", { enumerable: true, get: function () { return text_search_plugin_1.createLlmTextSearchPlugin; } });
|
|
46
|
-
var text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
|
|
47
|
-
Object.defineProperty(exports, "createLlmTextMutationPlugin", { enumerable: true, get: function () { return text_mutation_plugin_1.createLlmTextMutationPlugin; } });
|
|
48
43
|
var rag_plugin_1 = require("./plugins/rag-plugin");
|
|
49
44
|
Object.defineProperty(exports, "createLlmRagPlugin", { enumerable: true, get: function () { return rag_plugin_1.createLlmRagPlugin; } });
|
|
45
|
+
var text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
|
|
46
|
+
Object.defineProperty(exports, "createLlmTextMutationPlugin", { enumerable: true, get: function () { return text_mutation_plugin_1.createLlmTextMutationPlugin; } });
|
|
47
|
+
var text_search_plugin_1 = require("./plugins/text-search-plugin");
|
|
48
|
+
Object.defineProperty(exports, "createLlmTextSearchPlugin", { enumerable: true, get: function () { return text_search_plugin_1.createLlmTextSearchPlugin; } });
|
|
50
49
|
// Metering plugin (opt-in billing integration)
|
|
51
50
|
var metering_plugin_1 = require("./plugins/metering-plugin");
|
|
52
51
|
Object.defineProperty(exports, "createLlmMeteringPlugin", { enumerable: true, get: function () { return metering_plugin_1.createLlmMeteringPlugin; } });
|
|
53
|
-
// Agent discovery (queries agent_chat_module config table at runtime)
|
|
54
52
|
var agent_discovery_plugin_1 = require("./plugins/agent-discovery-plugin");
|
|
55
|
-
Object.defineProperty(exports, "getAgentDiscovery", { enumerable: true, get: function () { return agent_discovery_plugin_1.getAgentDiscovery; } });
|
|
56
53
|
Object.defineProperty(exports, "clearAgentDiscoveryCache", { enumerable: true, get: function () { return agent_discovery_plugin_1.clearAgentDiscoveryCache; } });
|
|
54
|
+
Object.defineProperty(exports, "getAgentDiscovery", { enumerable: true, get: function () { return agent_discovery_plugin_1.getAgentDiscovery; } });
|
|
57
55
|
// Embedder utilities
|
|
58
56
|
var embedder_1 = require("./embedder");
|
|
59
57
|
Object.defineProperty(exports, "buildEmbedder", { enumerable: true, get: function () { return embedder_1.buildEmbedder; } });
|
|
60
|
-
Object.defineProperty(exports, "buildEmbedderFromModule", { enumerable: true, get: function () { return embedder_1.buildEmbedderFromModule; } });
|
|
61
58
|
Object.defineProperty(exports, "buildEmbedderFromEnv", { enumerable: true, get: function () { return embedder_1.buildEmbedderFromEnv; } });
|
|
59
|
+
Object.defineProperty(exports, "buildEmbedderFromModule", { enumerable: true, get: function () { return embedder_1.buildEmbedderFromModule; } });
|
|
62
60
|
// Chat completion utilities
|
|
63
61
|
var chat_1 = require("./chat");
|
|
64
62
|
Object.defineProperty(exports, "buildChatCompleter", { enumerable: true, get: function () { return chat_1.buildChatCompleter; } });
|
|
65
|
-
Object.defineProperty(exports, "buildChatCompleterFromModule", { enumerable: true, get: function () { return chat_1.buildChatCompleterFromModule; } });
|
|
66
63
|
Object.defineProperty(exports, "buildChatCompleterFromEnv", { enumerable: true, get: function () { return chat_1.buildChatCompleterFromEnv; } });
|
|
67
|
-
|
|
64
|
+
Object.defineProperty(exports, "buildChatCompleterFromModule", { enumerable: true, get: function () { return chat_1.buildChatCompleterFromModule; } });
|
|
68
65
|
var metering_1 = require("./metering");
|
|
69
|
-
Object.defineProperty(exports, "meteredEmbed", { enumerable: true, get: function () { return metering_1.meteredEmbed; } });
|
|
70
|
-
Object.defineProperty(exports, "meteredChat", { enumerable: true, get: function () { return metering_1.meteredChat; } });
|
|
71
66
|
Object.defineProperty(exports, "logInferenceUsage", { enumerable: true, get: function () { return metering_1.logInferenceUsage; } });
|
|
67
|
+
Object.defineProperty(exports, "meteredChat", { enumerable: true, get: function () { return metering_1.meteredChat; } });
|
|
68
|
+
Object.defineProperty(exports, "meteredEmbed", { enumerable: true, get: function () { return metering_1.meteredEmbed; } });
|
|
72
69
|
Object.defineProperty(exports, "QuotaExceededError", { enumerable: true, get: function () { return metering_1.QuotaExceededError; } });
|
|
73
|
-
// Config cache (for custom integration)
|
|
74
70
|
var config_cache_1 = require("./config-cache");
|
|
71
|
+
Object.defineProperty(exports, "getLlmBillingCacheStats", { enumerable: true, get: function () { return config_cache_1.getLlmBillingCacheStats; } });
|
|
75
72
|
Object.defineProperty(exports, "getLlmBillingConfig", { enumerable: true, get: function () { return config_cache_1.getLlmBillingConfig; } });
|
|
76
73
|
Object.defineProperty(exports, "invalidateLlmBillingConfig", { enumerable: true, get: function () { return config_cache_1.invalidateLlmBillingConfig; } });
|
|
77
|
-
Object.defineProperty(exports, "getLlmBillingCacheStats", { enumerable: true, get: function () { return config_cache_1.getLlmBillingCacheStats; } });
|
package/metering.d.ts
CHANGED
|
@@ -9,16 +9,16 @@
|
|
|
9
9
|
* When the quota check fails, the wrapper returns null (graceful degradation)
|
|
10
10
|
* instead of throwing, so the search pipeline can fall back to text-only.
|
|
11
11
|
*
|
|
12
|
-
* Token counts
|
|
13
|
-
*
|
|
14
|
-
*
|
|
12
|
+
* Token counts:
|
|
13
|
+
* - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
|
|
14
|
+
* - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
|
|
15
15
|
*
|
|
16
16
|
* The billing functions live in the tenant database and are called via the
|
|
17
17
|
* Graphile `withPgClient` callback. Function locations (schema, names) are
|
|
18
18
|
* resolved from `billing_module` metaschema and cached by `config-cache.ts`.
|
|
19
19
|
*/
|
|
20
|
-
import type {
|
|
21
|
-
import type {
|
|
20
|
+
import type { BillingConfig, InferenceLogConfig, PgClient } from './config-cache';
|
|
21
|
+
import type { ChatFunction, ChatMessage, ChatOptions, EmbedderFunction } from './types';
|
|
22
22
|
/**
|
|
23
23
|
* Callback matching Graphile's withPgClient signature.
|
|
24
24
|
* Acquires a pg client, calls the callback, then releases the client.
|
package/metering.js
CHANGED
|
@@ -10,9 +10,9 @@
|
|
|
10
10
|
* When the quota check fails, the wrapper returns null (graceful degradation)
|
|
11
11
|
* instead of throwing, so the search pipeline can fall back to text-only.
|
|
12
12
|
*
|
|
13
|
-
* Token counts
|
|
14
|
-
*
|
|
15
|
-
*
|
|
13
|
+
* Token counts:
|
|
14
|
+
* - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
|
|
15
|
+
* - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
|
|
16
16
|
*
|
|
17
17
|
* The billing functions live in the tenant database and are called via the
|
|
18
18
|
* Graphile `withPgClient` callback. Function locations (schema, names) are
|
|
@@ -94,7 +94,7 @@ async function logInferenceUsage(ctx, entry) {
|
|
|
94
94
|
entry.latencyMs, entry.ragEnabled, entry.chunksRetrieved,
|
|
95
95
|
entry.embeddingModel, entry.embeddingLatencyMs,
|
|
96
96
|
entry.status, entry.errorType,
|
|
97
|
-
entry.rawUsage ? JSON.stringify(entry.rawUsage) : null
|
|
97
|
+
entry.rawUsage ? JSON.stringify(entry.rawUsage) : null
|
|
98
98
|
]);
|
|
99
99
|
});
|
|
100
100
|
}
|
|
@@ -114,31 +114,31 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
114
114
|
const startTime = Date.now();
|
|
115
115
|
// No billing context → just embed without metering
|
|
116
116
|
if (!ctx) {
|
|
117
|
-
const
|
|
117
|
+
const { embedding } = await embedder(text);
|
|
118
118
|
return {
|
|
119
|
-
result,
|
|
119
|
+
result: embedding,
|
|
120
120
|
metered: false,
|
|
121
121
|
quotaExceeded: false,
|
|
122
|
-
latencyMs: Date.now() - startTime
|
|
122
|
+
latencyMs: Date.now() - startTime
|
|
123
123
|
};
|
|
124
124
|
}
|
|
125
125
|
const meterSlug = options.embeddingMeterSlug;
|
|
126
126
|
if (!meterSlug) {
|
|
127
|
-
const
|
|
127
|
+
const { embedding } = await embedder(text);
|
|
128
128
|
return {
|
|
129
|
-
result,
|
|
129
|
+
result: embedding,
|
|
130
130
|
metered: false,
|
|
131
131
|
quotaExceeded: false,
|
|
132
|
-
latencyMs: Date.now() - startTime
|
|
132
|
+
latencyMs: Date.now() - startTime
|
|
133
133
|
};
|
|
134
134
|
}
|
|
135
135
|
if (options.skipMetering) {
|
|
136
|
-
const
|
|
136
|
+
const { embedding } = await embedder(text);
|
|
137
137
|
return {
|
|
138
|
-
result,
|
|
138
|
+
result: embedding,
|
|
139
139
|
metered: false,
|
|
140
140
|
quotaExceeded: false,
|
|
141
|
-
latencyMs: Date.now() - startTime
|
|
141
|
+
latencyMs: Date.now() - startTime
|
|
142
142
|
};
|
|
143
143
|
}
|
|
144
144
|
// Pre-check: can this entity afford this call?
|
|
@@ -152,8 +152,6 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
152
152
|
allowed = true;
|
|
153
153
|
}
|
|
154
154
|
if (!allowed) {
|
|
155
|
-
// Placeholder: replace with actual provider token counts once generateWithUsage() is approved
|
|
156
|
-
const placeholderAmountTokens = Math.ceil(text.length / 4);
|
|
157
155
|
logInferenceUsage(ctx, {
|
|
158
156
|
databaseId: ctx.databaseId,
|
|
159
157
|
entityId: ctx.entityId,
|
|
@@ -162,9 +160,9 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
162
160
|
provider: options.provider ?? null,
|
|
163
161
|
service: 'embedding',
|
|
164
162
|
operation: 'create',
|
|
165
|
-
inputTokens:
|
|
163
|
+
inputTokens: 0,
|
|
166
164
|
outputTokens: 0,
|
|
167
|
-
totalTokens:
|
|
165
|
+
totalTokens: 0,
|
|
168
166
|
cacheReadTokens: null,
|
|
169
167
|
cacheWriteTokens: null,
|
|
170
168
|
latencyMs: Date.now() - startTime,
|
|
@@ -174,26 +172,25 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
174
172
|
embeddingLatencyMs: null,
|
|
175
173
|
status: 'quota_exceeded',
|
|
176
174
|
errorType: null,
|
|
177
|
-
rawUsage: null
|
|
175
|
+
rawUsage: null
|
|
178
176
|
}).catch(() => { });
|
|
179
177
|
return {
|
|
180
178
|
result: null,
|
|
181
179
|
metered: true,
|
|
182
180
|
quotaExceeded: true,
|
|
183
|
-
latencyMs: Date.now() - startTime
|
|
181
|
+
latencyMs: Date.now() - startTime
|
|
184
182
|
};
|
|
185
183
|
}
|
|
186
|
-
// Execute embedding
|
|
187
|
-
const
|
|
184
|
+
// Execute embedding — real token count from provider via EmbeddingResult
|
|
185
|
+
const { embedding, promptTokens } = await embedder(text);
|
|
188
186
|
const latencyMs = Date.now() - startTime;
|
|
189
|
-
// Placeholder: replace with actual provider token counts once generateWithUsage() is approved
|
|
190
|
-
const placeholderAmountTokens = Math.ceil(text.length / 4);
|
|
191
187
|
ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
|
|
192
|
-
await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug,
|
|
188
|
+
await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, promptTokens, {
|
|
193
189
|
request_id: ctx.requestId,
|
|
194
190
|
input_chars: text.length,
|
|
195
|
-
|
|
196
|
-
|
|
191
|
+
prompt_tokens: promptTokens,
|
|
192
|
+
dims: embedding.length,
|
|
193
|
+
latency_ms: latencyMs
|
|
197
194
|
});
|
|
198
195
|
}).catch(() => { });
|
|
199
196
|
// Log to inference usage table
|
|
@@ -205,9 +202,9 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
205
202
|
provider: options.provider ?? null,
|
|
206
203
|
service: 'embedding',
|
|
207
204
|
operation: 'create',
|
|
208
|
-
inputTokens:
|
|
205
|
+
inputTokens: promptTokens,
|
|
209
206
|
outputTokens: 0,
|
|
210
|
-
totalTokens:
|
|
207
|
+
totalTokens: promptTokens,
|
|
211
208
|
cacheReadTokens: null,
|
|
212
209
|
cacheWriteTokens: null,
|
|
213
210
|
latencyMs,
|
|
@@ -217,13 +214,13 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
217
214
|
embeddingLatencyMs: latencyMs,
|
|
218
215
|
status: 'success',
|
|
219
216
|
errorType: null,
|
|
220
|
-
rawUsage:
|
|
217
|
+
rawUsage: { prompt_tokens: promptTokens }
|
|
221
218
|
}).catch(() => { });
|
|
222
219
|
return {
|
|
223
|
-
result,
|
|
220
|
+
result: embedding,
|
|
224
221
|
metered: true,
|
|
225
222
|
quotaExceeded: false,
|
|
226
|
-
latencyMs
|
|
223
|
+
latencyMs
|
|
227
224
|
};
|
|
228
225
|
}
|
|
229
226
|
// ─── Metered Chat ───────────────────────────────────────────────────────────
|
|
@@ -233,31 +230,31 @@ async function meteredEmbed(embedder, text, ctx, options = {}) {
|
|
|
233
230
|
async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {}) {
|
|
234
231
|
const startTime = Date.now();
|
|
235
232
|
if (!ctx) {
|
|
236
|
-
const
|
|
233
|
+
const chatResult = await chat(messages, chatOptions);
|
|
237
234
|
return {
|
|
238
|
-
result,
|
|
235
|
+
result: chatResult.content,
|
|
239
236
|
metered: false,
|
|
240
237
|
quotaExceeded: false,
|
|
241
|
-
latencyMs: Date.now() - startTime
|
|
238
|
+
latencyMs: Date.now() - startTime
|
|
242
239
|
};
|
|
243
240
|
}
|
|
244
241
|
const meterSlug = meteringOptions.chatMeterSlug;
|
|
245
242
|
if (!meterSlug) {
|
|
246
|
-
const
|
|
243
|
+
const chatResult = await chat(messages, chatOptions);
|
|
247
244
|
return {
|
|
248
|
-
result,
|
|
245
|
+
result: chatResult.content,
|
|
249
246
|
metered: false,
|
|
250
247
|
quotaExceeded: false,
|
|
251
|
-
latencyMs: Date.now() - startTime
|
|
248
|
+
latencyMs: Date.now() - startTime
|
|
252
249
|
};
|
|
253
250
|
}
|
|
254
251
|
if (meteringOptions.skipMetering) {
|
|
255
|
-
const
|
|
252
|
+
const chatResult = await chat(messages, chatOptions);
|
|
256
253
|
return {
|
|
257
|
-
result,
|
|
254
|
+
result: chatResult.content,
|
|
258
255
|
metered: false,
|
|
259
256
|
quotaExceeded: false,
|
|
260
|
-
latencyMs: Date.now() - startTime
|
|
257
|
+
latencyMs: Date.now() - startTime
|
|
261
258
|
};
|
|
262
259
|
}
|
|
263
260
|
// Pre-check: can this entity afford this call?
|
|
@@ -271,8 +268,7 @@ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {
|
|
|
271
268
|
allowed = true;
|
|
272
269
|
}
|
|
273
270
|
if (!allowed) {
|
|
274
|
-
|
|
275
|
-
const placeholderInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
|
|
271
|
+
const estimatedInputTokens = Math.ceil(messages.reduce((sum, m) => sum + m.content.length, 0) / 4);
|
|
276
272
|
logInferenceUsage(ctx, {
|
|
277
273
|
databaseId: ctx.databaseId,
|
|
278
274
|
entityId: ctx.entityId,
|
|
@@ -281,9 +277,9 @@ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {
|
|
|
281
277
|
provider: meteringOptions.provider ?? null,
|
|
282
278
|
service: 'llm',
|
|
283
279
|
operation: 'chat',
|
|
284
|
-
inputTokens:
|
|
280
|
+
inputTokens: estimatedInputTokens,
|
|
285
281
|
outputTokens: 0,
|
|
286
|
-
totalTokens:
|
|
282
|
+
totalTokens: estimatedInputTokens,
|
|
287
283
|
cacheReadTokens: null,
|
|
288
284
|
cacheWriteTokens: null,
|
|
289
285
|
latencyMs: Date.now() - startTime,
|
|
@@ -293,33 +289,31 @@ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {
|
|
|
293
289
|
embeddingLatencyMs: null,
|
|
294
290
|
status: 'quota_exceeded',
|
|
295
291
|
errorType: null,
|
|
296
|
-
rawUsage: null
|
|
292
|
+
rawUsage: null
|
|
297
293
|
}).catch(() => { });
|
|
298
294
|
return {
|
|
299
295
|
result: null,
|
|
300
296
|
metered: true,
|
|
301
297
|
quotaExceeded: true,
|
|
302
|
-
latencyMs: Date.now() - startTime
|
|
298
|
+
latencyMs: Date.now() - startTime
|
|
303
299
|
};
|
|
304
300
|
}
|
|
305
|
-
// Execute chat completion
|
|
306
|
-
const
|
|
301
|
+
// Execute chat completion — returns real token usage from provider
|
|
302
|
+
const chatResult = await chat(messages, chatOptions);
|
|
307
303
|
const latencyMs = Date.now() - startTime;
|
|
308
|
-
|
|
309
|
-
const inputChars = messages.reduce((sum, m) => sum + m.content.length, 0);
|
|
310
|
-
const placeholderInputTokens = Math.ceil(inputChars / 4);
|
|
311
|
-
const placeholderOutputTokens = Math.ceil(result.length / 4);
|
|
312
|
-
const placeholderTotalTokens = placeholderInputTokens + placeholderOutputTokens;
|
|
304
|
+
const usage = chatResult.usage;
|
|
313
305
|
ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
|
|
314
|
-
await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug,
|
|
306
|
+
await recordUsage(pgClient, ctx.billing, ctx.entityId, meterSlug, usage.totalTokens, {
|
|
315
307
|
request_id: ctx.requestId,
|
|
316
|
-
|
|
317
|
-
|
|
308
|
+
input_tokens: usage.input,
|
|
309
|
+
output_tokens: usage.output,
|
|
310
|
+
cache_read_tokens: usage.cacheRead,
|
|
311
|
+
cache_write_tokens: usage.cacheWrite,
|
|
318
312
|
messages_count: messages.length,
|
|
319
|
-
latency_ms: latencyMs
|
|
313
|
+
latency_ms: latencyMs
|
|
320
314
|
});
|
|
321
315
|
}).catch(() => { });
|
|
322
|
-
// Log to inference usage table
|
|
316
|
+
// Log to inference usage table with real provider token counts
|
|
323
317
|
logInferenceUsage(ctx, {
|
|
324
318
|
databaseId: ctx.databaseId,
|
|
325
319
|
entityId: ctx.entityId,
|
|
@@ -328,11 +322,11 @@ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {
|
|
|
328
322
|
provider: meteringOptions.provider ?? null,
|
|
329
323
|
service: 'llm',
|
|
330
324
|
operation: 'chat',
|
|
331
|
-
inputTokens:
|
|
332
|
-
outputTokens:
|
|
333
|
-
totalTokens:
|
|
334
|
-
cacheReadTokens: null,
|
|
335
|
-
cacheWriteTokens: null,
|
|
325
|
+
inputTokens: usage.input,
|
|
326
|
+
outputTokens: usage.output,
|
|
327
|
+
totalTokens: usage.totalTokens,
|
|
328
|
+
cacheReadTokens: usage.cacheRead || null,
|
|
329
|
+
cacheWriteTokens: usage.cacheWrite || null,
|
|
336
330
|
latencyMs,
|
|
337
331
|
ragEnabled: false,
|
|
338
332
|
chunksRetrieved: null,
|
|
@@ -340,13 +334,13 @@ async function meteredChat(chat, messages, ctx, chatOptions, meteringOptions = {
|
|
|
340
334
|
embeddingLatencyMs: null,
|
|
341
335
|
status: 'success',
|
|
342
336
|
errorType: null,
|
|
343
|
-
rawUsage:
|
|
337
|
+
rawUsage: { reasoning: usage.reasoning }
|
|
344
338
|
}).catch(() => { });
|
|
345
339
|
return {
|
|
346
|
-
result,
|
|
340
|
+
result: chatResult.content,
|
|
347
341
|
metered: true,
|
|
348
342
|
quotaExceeded: false,
|
|
349
|
-
latencyMs
|
|
343
|
+
latencyMs
|
|
350
344
|
};
|
|
351
345
|
}
|
|
352
346
|
// ─── Error Types ────────────────────────────────────────────────────────────
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "graphile-llm",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.10.0",
|
|
4
4
|
"description": "LLM integration plugin for PostGraphile v5 — server-side text-to-vector embedding and text companion fields for pgvector columns",
|
|
5
5
|
"author": "Constructive <developers@constructive.io>",
|
|
6
6
|
"homepage": "https://github.com/constructive-io/constructive",
|
|
@@ -29,8 +29,8 @@
|
|
|
29
29
|
"url": "https://github.com/constructive-io/constructive/issues"
|
|
30
30
|
},
|
|
31
31
|
"dependencies": {
|
|
32
|
-
"@agentic-kit/ollama": "^
|
|
33
|
-
"graphile-cache": "^3.
|
|
32
|
+
"@agentic-kit/ollama": "^2.0.0",
|
|
33
|
+
"graphile-cache": "^3.12.0"
|
|
34
34
|
},
|
|
35
35
|
"peerDependencies": {
|
|
36
36
|
"@dataplan/pg": "1.0.3",
|
|
@@ -54,11 +54,11 @@
|
|
|
54
54
|
},
|
|
55
55
|
"devDependencies": {
|
|
56
56
|
"@types/node": "^22.19.11",
|
|
57
|
-
"graphile-connection-filter": "^1.
|
|
58
|
-
"graphile-search": "^1.
|
|
59
|
-
"graphile-test": "^4.
|
|
57
|
+
"graphile-connection-filter": "^1.11.0",
|
|
58
|
+
"graphile-search": "^1.14.0",
|
|
59
|
+
"graphile-test": "^4.16.0",
|
|
60
60
|
"makage": "^0.3.0",
|
|
61
|
-
"pgsql-test": "^4.
|
|
61
|
+
"pgsql-test": "^4.15.0"
|
|
62
62
|
},
|
|
63
63
|
"keywords": [
|
|
64
64
|
"postgraphile",
|
|
@@ -73,5 +73,5 @@
|
|
|
73
73
|
"ollama",
|
|
74
74
|
"openai"
|
|
75
75
|
],
|
|
76
|
-
"gitHead": "
|
|
76
|
+
"gitHead": "c0d04574f7719d92e67becb58d60791ae978c5f5"
|
|
77
77
|
}
|
|
@@ -16,7 +16,7 @@ const graphile_cache_1 = require("graphile-cache");
|
|
|
16
16
|
// ─── Cache ──────────────────────────────────────────────────────────────────
|
|
17
17
|
const agentDiscoveryCache = new graphile_cache_1.ModuleConfigCache({
|
|
18
18
|
name: 'agent-discovery',
|
|
19
|
-
ttlMs: 60_000
|
|
19
|
+
ttlMs: 60_000
|
|
20
20
|
});
|
|
21
21
|
/** Clear all cached discovery results (for testing) */
|
|
22
22
|
function clearAgentDiscoveryCache() {
|
|
@@ -57,7 +57,7 @@ async function getAgentDiscovery(pool, dbname) {
|
|
|
57
57
|
: null,
|
|
58
58
|
task: row.task_table_name
|
|
59
59
|
? { schemaName, tableName: row.task_table_name }
|
|
60
|
-
: null
|
|
60
|
+
: null
|
|
61
61
|
};
|
|
62
62
|
}
|
|
63
63
|
}
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
* if loaded (it runs after this plugin and before the consumer plugins).
|
|
22
22
|
*/
|
|
23
23
|
import type { GraphileConfig } from 'graphile-config';
|
|
24
|
-
import type {
|
|
24
|
+
import type { ChatFunction, EmbedderFunction, GraphileLlmOptions } from '../types';
|
|
25
25
|
declare global {
|
|
26
26
|
namespace GraphileBuild {
|
|
27
27
|
interface Build {
|