npm - graphile-llm - Versions diffs - 0.7.3 → 0.9.0 - Mend

graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/__tests__/graphile-llm.test.js +87 -71
package/chat.d.ts +5 -5
package/chat.js +45 -43
package/config-cache.d.ts +77 -0
package/config-cache.js +148 -0
package/embedder.d.ts +5 -5
package/embedder.js +11 -17
package/env.d.ts +31 -0
package/env.js +52 -0
package/esm/__tests__/graphile-llm.test.js +87 -71
package/esm/chat.d.ts +5 -5
package/esm/chat.js +45 -40
package/esm/config-cache.d.ts +77 -0
package/esm/config-cache.js +143 -0
package/esm/embedder.d.ts +5 -5
package/esm/embedder.js +11 -17
package/esm/env.d.ts +31 -0
package/esm/env.js +49 -0
package/esm/index.d.ts +14 -5
package/esm/index.js +11 -5
package/esm/metering.d.ts +114 -0
package/esm/metering.js +352 -0
package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
package/esm/plugins/agent-discovery-plugin.js +65 -0
package/esm/plugins/llm-module-plugin.d.ts +11 -2
package/esm/plugins/llm-module-plugin.js +15 -7
package/esm/plugins/metering-plugin.d.ts +42 -0
package/esm/plugins/metering-plugin.js +175 -0
package/esm/plugins/rag-plugin.js +20 -20
package/esm/plugins/text-mutation-plugin.d.ts +4 -0
package/esm/plugins/text-mutation-plugin.js +23 -13
package/esm/plugins/text-search-plugin.d.ts +4 -0
package/esm/plugins/text-search-plugin.js +23 -11
package/esm/preset.d.ts +21 -1
package/esm/preset.js +33 -6
package/esm/types.d.ts +86 -10
package/index.d.ts +14 -5
package/index.js +25 -8
package/metering.d.ts +114 -0
package/metering.js +359 -0
package/package.json +15 -15
package/plugins/agent-discovery-plugin.d.ts +29 -0
package/plugins/agent-discovery-plugin.js +69 -0
package/plugins/llm-module-plugin.d.ts +11 -2
package/plugins/llm-module-plugin.js +15 -7
package/plugins/metering-plugin.d.ts +42 -0
package/plugins/metering-plugin.js +178 -0
package/plugins/rag-plugin.js +20 -20
package/plugins/text-mutation-plugin.d.ts +4 -0
package/plugins/text-mutation-plugin.js +23 -13
package/plugins/text-search-plugin.d.ts +4 -0
package/plugins/text-search-plugin.js +23 -11
package/preset.d.ts +21 -1
package/preset.js +33 -6
package/types.d.ts +86 -10

package/__tests__/graphile-llm.test.js CHANGED Viewed

@@ -36,19 +36,19 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-const path_1 = require("path");
 const ollama_1 = __importDefault(require("@agentic-kit/ollama"));
-const graphile_test_1 = require("graphile-test");
 const graphile_connection_filter_1 = require("graphile-connection-filter");
+const pgvector_1 = require("graphile-search/adapters/pgvector");
 const vector_codec_1 = require("graphile-search/codecs/vector-codec");
 const plugin_1 = require("graphile-search/plugin");
-const pgvector_1 = require("graphile-search/adapters/pgvector");
+const graphile_test_1 = require("graphile-test");
+const path_1 = require("path");
+const chat_1 = require("../../src/chat");
+const embedder_1 = require("../../src/embedder");
 const llm_module_plugin_1 = require("../../src/plugins/llm-module-plugin");
-const text_search_plugin_1 = require("../../src/plugins/text-search-plugin");
-const text_mutation_plugin_1 = require("../../src/plugins/text-mutation-plugin");
 const rag_plugin_1 = require("../../src/plugins/rag-plugin");
-const embedder_1 = require("../../src/embedder");
-const chat_1 = require("../../src/chat");
+const text_mutation_plugin_1 = require("../../src/plugins/text-mutation-plugin");
+const text_search_plugin_1 = require("../../src/plugins/text-search-plugin");
 // ─── @agentic-kit/ollama client ─────────────────────────────────────────────
 const ollamaClient = new ollama_1.default('http://localhost:11434');
 async function ensureNomicModel() {
@@ -68,14 +68,14 @@ describe('Embedder abstraction', () => {
             const embedder = (0, embedder_1.buildEmbedder)({
                 provider: 'ollama',
                 model: 'nomic-embed-text',
-                baseUrl: 'http://localhost:11434',
+                baseUrl: 'http://localhost:11434'
             });
             expect(embedder).not.toBeNull();
             expect(typeof embedder).toBe('function');
         });
         it('returns null for unknown provider', () => {
             const embedder = (0, embedder_1.buildEmbedder)({
-                provider: 'unknown-provider',
+                provider: 'unknown-provider'
             });
             expect(embedder).toBeNull();
         });
@@ -90,7 +90,7 @@ describe('Embedder abstraction', () => {
             const moduleData = {
                 embedding_provider: 'ollama',
                 embedding_model: 'nomic-embed-text',
-                embedding_base_url: 'http://localhost:11434',
+                embedding_base_url: 'http://localhost:11434'
             };
             const embedder = (0, embedder_1.buildEmbedderFromModule)(moduleData);
             expect(embedder).not.toBeNull();
@@ -98,7 +98,7 @@ describe('Embedder abstraction', () => {
         });
         it('returns null for unsupported provider in module data', () => {
             const moduleData = {
-                embedding_provider: 'unsupported',
+                embedding_provider: 'unsupported'
             };
             const embedder = (0, embedder_1.buildEmbedderFromModule)(moduleData);
             expect(embedder).toBeNull();
@@ -109,18 +109,19 @@ describe('Embedder abstraction', () => {
         afterEach(() => {
             process.env = originalEnv;
         });
-        it('returns null when EMBEDDER_PROVIDER is not set', () => {
+        it('returns default ollama embedder when EMBEDDER_PROVIDER is not set', () => {
             process.env = { ...originalEnv };
             delete process.env.EMBEDDER_PROVIDER;
             const embedder = (0, embedder_1.buildEmbedderFromEnv)();
-            expect(embedder).toBeNull();
+            expect(embedder).not.toBeNull();
+            expect(typeof embedder).toBe('function');
         });
         it('builds embedder from environment variables', () => {
             process.env = {
                 ...originalEnv,
                 EMBEDDER_PROVIDER: 'ollama',
                 EMBEDDER_MODEL: 'nomic-embed-text',
-                EMBEDDER_BASE_URL: 'http://localhost:11434',
+                EMBEDDER_BASE_URL: 'http://localhost:11434'
             };
             const embedder = (0, embedder_1.buildEmbedderFromEnv)();
             expect(embedder).not.toBeNull();
@@ -138,7 +139,7 @@ describe('graphile-llm schema enrichment', () => {
     let query;
     beforeAll(async () => {
         const unifiedPlugin = (0, plugin_1.createUnifiedSearchPlugin)({
-            adapters: [(0, pgvector_1.createPgvectorAdapter)()],
+            adapters: [(0, pgvector_1.createPgvectorAdapter)()]
         });
         const testPreset = {
             extends: [(0, graphile_connection_filter_1.ConnectionFilterPreset)()],
@@ -151,18 +152,18 @@ describe('graphile-llm schema enrichment', () => {
                     defaultEmbedder: {
                         provider: 'ollama',
                         model: 'nomic-embed-text',
-                        baseUrl: 'http://localhost:11434',
-                    },
+                        baseUrl: 'http://localhost:11434'
+                    }
                 }),
                 (0, text_search_plugin_1.createLlmTextSearchPlugin)(),
-                (0, text_mutation_plugin_1.createLlmTextMutationPlugin)(),
-            ],
+                (0, text_mutation_plugin_1.createLlmTextMutationPlugin)()
+            ]
         };
         const connections = await (0, graphile_test_1.getConnections)({
             schemas: ['llm_test'],
             preset: testPreset,
             useRoot: true,
-            authRole: 'postgres',
+            authRole: 'postgres'
         }, [graphile_test_1.seed.sqlfile([(0, path_1.join)(__dirname, './setup.sql')])]);
         db = connections.db;
         teardown = connections.teardown;
@@ -291,33 +292,36 @@ describe('graphile-llm with real Ollama embedding', () => {
         const embedder = (0, embedder_1.buildEmbedder)({
             provider: 'ollama',
             model: 'nomic-embed-text',
-            baseUrl: 'http://localhost:11434',
+            baseUrl: 'http://localhost:11434'
         });
         expect(embedder).not.toBeNull();
-        const vector = await embedder('Machine learning is transforming AI');
+        const result = await embedder('Machine learning is transforming AI');
         // nomic-embed-text produces 768-dimensional vectors
-        expect(Array.isArray(vector)).toBe(true);
-        expect(vector.length).toBe(768);
+        expect(Array.isArray(result.embedding)).toBe(true);
+        expect(result.embedding.length).toBe(768);
+        expect(result.promptTokens).toBeGreaterThan(0);
         // All elements should be numbers
-        for (const v of vector) {
+        for (const v of result.embedding) {
             expect(typeof v).toBe('number');
             expect(Number.isFinite(v)).toBe(true);
         }
         // Vector should not be all zeros
-        const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
+        const magnitude = Math.sqrt(result.embedding.reduce((sum, v) => sum + v * v, 0));
         expect(magnitude).toBeGreaterThan(0);
     });
     it('should produce different vectors for semantically different text', async () => {
         const embedder = (0, embedder_1.buildEmbedder)({
             provider: 'ollama',
             model: 'nomic-embed-text',
-            baseUrl: 'http://localhost:11434',
+            baseUrl: 'http://localhost:11434'
         });
         expect(embedder).not.toBeNull();
-        const [vecA, vecB] = await Promise.all([
+        const [resultA, resultB] = await Promise.all([
             embedder('Artificial intelligence and machine learning'),
-            embedder('Cooking recipes for Italian pasta dishes'),
+            embedder('Cooking recipes for Italian pasta dishes')
         ]);
+        const vecA = resultA.embedding;
+        const vecB = resultB.embedding;
         expect(vecA.length).toBe(768);
         expect(vecB.length).toBe(768);
         // Compute cosine similarity
@@ -337,13 +341,15 @@ describe('graphile-llm with real Ollama embedding', () => {
         const embedder = (0, embedder_1.buildEmbedder)({
             provider: 'ollama',
             model: 'nomic-embed-text',
-            baseUrl: 'http://localhost:11434',
+            baseUrl: 'http://localhost:11434'
         });
         expect(embedder).not.toBeNull();
-        const [vecA, vecB] = await Promise.all([
+        const [resultA, resultB] = await Promise.all([
             embedder('Machine learning and artificial intelligence'),
-            embedder('AI and ML are subfields of computer science'),
+            embedder('AI and ML are subfields of computer science')
         ]);
+        const vecA = resultA.embedding;
+        const vecB = resultB.embedding;
         expect(vecA.length).toBe(768);
         expect(vecB.length).toBe(768);
         // Compute cosine similarity
@@ -359,11 +365,14 @@ describe('graphile-llm with real Ollama embedding', () => {
         // Semantically similar texts should have high similarity
         expect(cosineSimilarity).toBeGreaterThan(0.5);
     });
-    it('should produce embeddings via @agentic-kit/ollama OllamaClient directly', async () => {
-        const vector = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
-        expect(Array.isArray(vector)).toBe(true);
-        expect(vector.length).toBe(768);
-        for (const v of vector) {
+    it('should produce embeddings with token count via @agentic-kit/ollama OllamaClient directly', async () => {
+        const result = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
+        expect(result).toHaveProperty('embedding');
+        expect(result).toHaveProperty('promptTokens');
+        expect(Array.isArray(result.embedding)).toBe(true);
+        expect(result.embedding.length).toBe(768);
+        expect(result.promptTokens).toBeGreaterThan(0);
+        for (const v of result.embedding) {
             expect(typeof v).toBe('number');
             expect(Number.isFinite(v)).toBe(true);
         }
@@ -378,14 +387,14 @@ describe('Chat completion abstraction', () => {
             const chat = (0, chat_1.buildChatCompleter)({
                 provider: 'ollama',
                 model: 'llama3',
-                baseUrl: 'http://localhost:11434',
+                baseUrl: 'http://localhost:11434'
             });
             expect(chat).not.toBeNull();
             expect(typeof chat).toBe('function');
         });
         it('returns null for unknown provider', () => {
             const chat = (0, chat_1.buildChatCompleter)({
-                provider: 'unknown-provider',
+                provider: 'unknown-provider'
             });
             expect(chat).toBeNull();
         });
@@ -401,7 +410,7 @@ describe('Chat completion abstraction', () => {
                 embedding_provider: 'ollama',
                 chat_provider: 'ollama',
                 chat_model: 'llama3',
-                chat_base_url: 'http://localhost:11434',
+                chat_base_url: 'http://localhost:11434'
             };
             const chat = (0, chat_1.buildChatCompleterFromModule)(moduleData);
             expect(chat).not.toBeNull();
@@ -409,7 +418,7 @@ describe('Chat completion abstraction', () => {
         });
         it('returns null when chat_provider is not set', () => {
             const moduleData = {
-                embedding_provider: 'ollama',
+                embedding_provider: 'ollama'
             };
             const chat = (0, chat_1.buildChatCompleterFromModule)(moduleData);
             expect(chat).toBeNull();
@@ -420,18 +429,19 @@ describe('Chat completion abstraction', () => {
         afterEach(() => {
             process.env = originalEnv;
         });
-        it('returns null when CHAT_PROVIDER is not set', () => {
+        it('returns default ollama chat completer when CHAT_PROVIDER is not set', () => {
             process.env = { ...originalEnv };
             delete process.env.CHAT_PROVIDER;
             const chat = (0, chat_1.buildChatCompleterFromEnv)();
-            expect(chat).toBeNull();
+            expect(chat).not.toBeNull();
+            expect(typeof chat).toBe('function');
         });
         it('builds chat completer from environment variables', () => {
             process.env = {
                 ...originalEnv,
                 CHAT_PROVIDER: 'ollama',
                 CHAT_MODEL: 'llama3',
-                CHAT_BASE_URL: 'http://localhost:11434',
+                CHAT_BASE_URL: 'http://localhost:11434'
             };
             const chat = (0, chat_1.buildChatCompleterFromEnv)();
             expect(chat).not.toBeNull();
@@ -470,10 +480,10 @@ function makeTestSmartTagsPlugin(tagsByTable) {
                             Object.assign(c.extensions.tags, tags);
                         }
                         return _;
-                    },
-                },
-            },
-        },
+                    }
+                }
+            }
+        }
     };
 }
 describe('RAG plugin schema enrichment', () => {
@@ -482,7 +492,7 @@ describe('RAG plugin schema enrichment', () => {
     let query;
     beforeAll(async () => {
         const unifiedPlugin = (0, plugin_1.createUnifiedSearchPlugin)({
-            adapters: [(0, pgvector_1.createPgvectorAdapter)()],
+            adapters: [(0, pgvector_1.createPgvectorAdapter)()]
         });
         const smartTagsPlugin = makeTestSmartTagsPlugin({
             articles: {
@@ -491,16 +501,22 @@ describe('RAG plugin schema enrichment', () => {
                     parentFk: 'parent_id',
                     parentPk: 'id',
                     embeddingField: 'embedding',
-                    contentField: 'content',
-                },
-            },
+                    contentField: 'content'
+                }
+            }
         });
-        // Mock embedder that returns a fixed 3-dim vector
-        const mockEmbedder = async (_text) => [1, 0, 0];
-        // Mock chat completer that returns a canned response
+        // Mock embedder that returns a fixed 3-dim vector with token count
+        const mockEmbedder = async (_text) => ({
+            embedding: [1, 0, 0],
+            promptTokens: 5
+        });
+        // Mock chat completer that returns a canned response with usage
         const mockChatCompleter = async (messages) => {
             const userMessage = messages.find((m) => m.role === 'user');
-            return `Mock answer for: ${userMessage?.content || 'unknown'}`;
+            return {
+                content: `Mock answer for: ${userMessage?.content || 'unknown'}`,
+                usage: { input: 10, output: 15, reasoning: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 25 }
+            };
         };
         const testPreset = {
             extends: [(0, graphile_connection_filter_1.ConnectionFilterPreset)()],
@@ -512,13 +528,13 @@ describe('RAG plugin schema enrichment', () => {
                     defaultEmbedder: {
                         provider: 'ollama',
                         model: 'nomic-embed-text',
-                        baseUrl: 'http://localhost:11434',
-                    },
+                        baseUrl: 'http://localhost:11434'
+                    }
                 }),
                 (0, text_search_plugin_1.createLlmTextSearchPlugin)(),
                 (0, text_mutation_plugin_1.createLlmTextMutationPlugin)(),
-                (0, rag_plugin_1.createLlmRagPlugin)(),
-            ],
+                (0, rag_plugin_1.createLlmRagPlugin)()
+            ]
         };
         // Override the embedder and chat completer on the build context
         // by wrapping the LlmModulePlugin's build hook
@@ -531,20 +547,20 @@ describe('RAG plugin schema enrichment', () => {
                     build(build) {
                         return build.extend(build, {
                             llmEmbedder: mockEmbedder,
-                            llmChatCompleter: mockChatCompleter,
+                            llmChatCompleter: mockChatCompleter
                         }, 'TestOverridePlugin overriding embedder and chat completer');
-                    },
-                },
-            },
+                    }
+                }
+            }
         };
         const connections = await (0, graphile_test_1.getConnections)({
             schemas: ['llm_test'],
             preset: {
                 ...testPreset,
-                plugins: [...testPreset.plugins, overridePlugin],
+                plugins: [...testPreset.plugins, overridePlugin]
             },
             useRoot: true,
-            authRole: 'postgres',
+            authRole: 'postgres'
         }, [graphile_test_1.seed.sqlfile([(0, path_1.join)(__dirname, './setup.sql')])]);
         db = connections.db;
         teardown = connections.teardown;
@@ -668,7 +684,7 @@ describe('GraphileLlmPreset toggles', () => {
     it('enableRag=false excludes RAG plugin (no ragQuery field)', async () => {
         const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
         const preset = GraphileLlmPreset({
-            enableRag: false,
+            enableRag: false
         });
         const pluginNames = preset.plugins.map((p) => p.name);
         expect(pluginNames).not.toContain('LlmRagPlugin');
@@ -676,7 +692,7 @@ describe('GraphileLlmPreset toggles', () => {
     it('enableRag=true includes RAG plugin', async () => {
         const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
         const preset = GraphileLlmPreset({
-            enableRag: true,
+            enableRag: true
         });
         const pluginNames = preset.plugins.map((p) => p.name);
         expect(pluginNames).toContain('LlmRagPlugin');
@@ -684,7 +700,7 @@ describe('GraphileLlmPreset toggles', () => {
     it('enableTextSearch=false excludes text search plugin', async () => {
         const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
         const preset = GraphileLlmPreset({
-            enableTextSearch: false,
+            enableTextSearch: false
         });
         const pluginNames = preset.plugins.map((p) => p.name);
         expect(pluginNames).not.toContain('LlmTextSearchPlugin');
@@ -694,7 +710,7 @@ describe('GraphileLlmPreset toggles', () => {
     it('enableTextMutations=false excludes text mutation plugin', async () => {
         const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
         const preset = GraphileLlmPreset({
-            enableTextMutations: false,
+            enableTextMutations: false
         });
         const pluginNames = preset.plugins.map((p) => p.name);
         expect(pluginNames).not.toContain('LlmTextMutationPlugin');
@@ -704,7 +720,7 @@ describe('GraphileLlmPreset toggles', () => {
         const preset = GraphileLlmPreset({
             enableTextSearch: false,
             enableTextMutations: false,
-            enableRag: false,
+            enableRag: false
         });
         const pluginNames = preset.plugins.map((p) => p.name);
         expect(pluginNames).toEqual(['LlmModulePlugin']);

package/chat.d.ts CHANGED Viewed

@@ -26,12 +26,12 @@ export declare function buildChatCompleter(config: ChatConfig): ChatFunction | n
  */
 export declare function buildChatCompleterFromModule(data: LlmModuleData): ChatFunction | null;
 /**
- * Resolve a chat completer from environment variables via getEnvOptions().
+ * Resolve a chat completer from environment variables.
  * This is a fallback for development when no llm_module or defaultChatCompleter is configured.
  *
- * Environment variables (parsed by @constructive-io/graphql-env):
- *   CHAT_PROVIDER - Provider name ('ollama')
- *   CHAT_MODEL    - Model identifier (e.g. 'llama3')
- *   CHAT_BASE_URL - Provider base URL
+ * Environment variables (with defaults from env.ts):
+ *   CHAT_PROVIDER  - Provider name (default: 'ollama')
+ *   CHAT_MODEL     - Model identifier (default: 'llama3')
+ *   CHAT_BASE_URL  - Provider base URL (default: 'http://localhost:11434')
  */
 export declare function buildChatCompleterFromEnv(): ChatFunction | null;

package/chat.js CHANGED Viewed

@@ -12,45 +12,55 @@
  *   2. The preset's `defaultChatCompleter` option (fallback for dev/testing)
  *   3. Environment variables (CHAT_PROVIDER, CHAT_MODEL, CHAT_BASE_URL)
  */
-var __importDefault = (this && this.__importDefault) || function (mod) {
-    return (mod && mod.__esModule) ? mod : { "default": mod };
-};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.buildChatCompleter = buildChatCompleter;
 exports.buildChatCompleterFromModule = buildChatCompleterFromModule;
 exports.buildChatCompleterFromEnv = buildChatCompleterFromEnv;
-const ollama_1 = __importDefault(require("@agentic-kit/ollama"));
-const graphql_env_1 = require("@constructive-io/graphql-env");
+const ollama_1 = require("@agentic-kit/ollama");
+const env_1 = require("./env");
 // ─── Built-in Providers ─────────────────────────────────────────────────────
 /**
  * Create an Ollama-based chat completion function.
  *
- * Uses OllamaClient.generate() with a messages array, which internally
- * routes to the /api/chat endpoint.
+ * Uses OllamaAdapter.stream() to get both response content and real token
+ * usage counts from the provider (prompt_eval_count, eval_count).
  */
 function createOllamaChatCompleter(baseUrl = 'http://localhost:11434', model = 'llama3') {
-    const client = new ollama_1.default(baseUrl);
+    const adapter = new ollama_1.OllamaAdapter(baseUrl);
     return async (messages, options) => {
-        // Build the input for OllamaClient.generate() in chat mode
-        const input = {
-            model,
-            messages: messages.filter((m) => m.role !== 'system'),
-        };
-        // Extract system message if present
         const systemMsg = messages.find((m) => m.role === 'system');
-        if (systemMsg) {
-            input.system = systemMsg.content;
-        }
-        if (options?.temperature !== undefined) {
-            input.temperature = options.temperature;
-        }
-        const startTime = Date.now();
-        const response = await client.generate(input);
-        const latencyMs = Date.now() - startTime;
-        // Token count logging (metering deferred to billing system)
-        console.log(`[graphile-llm] Chat completion: model=${model}, latency=${latencyMs}ms, ` +
-            `messages=${messages.length}`);
-        return response;
+        const nonSystem = messages.filter((m) => m.role !== 'system');
+        const modelDesc = adapter.createModel(model, {
+            maxOutputTokens: options?.maxTokens
+        });
+        const context = {
+            systemPrompt: systemMsg?.content,
+            messages: nonSystem.map((m) => ({
+                role: m.role,
+                content: m.content,
+                timestamp: Date.now()
+            }))
+        };
+        const stream = adapter.stream(modelDesc, context, {
+            temperature: options?.temperature,
+            maxTokens: options?.maxTokens
+        });
+        const result = await stream.result();
+        const content = result.content
+            .filter((block) => block.type === 'text')
+            .map((block) => block.text)
+            .join('');
+        return {
+            content,
+            usage: {
+                input: result.usage.input,
+                output: result.usage.output,
+                reasoning: result.usage.reasoning,
+                cacheRead: result.usage.cacheRead,
+                cacheWrite: result.usage.cacheWrite,
+                totalTokens: result.usage.totalTokens
+            }
+        };
     };
 }
 // ─── Chat Completer Construction ────────────────────────────────────────────
@@ -81,27 +91,19 @@ function buildChatCompleterFromModule(data) {
     return buildChatCompleter({
         provider: data.chat_provider,
         model: data.chat_model,
-        baseUrl: data.chat_base_url,
-        apiKey: data.api_key_ref,
+        baseUrl: data.chat_base_url
     });
 }
 /**
- * Resolve a chat completer from environment variables via getEnvOptions().
+ * Resolve a chat completer from environment variables.
  * This is a fallback for development when no llm_module or defaultChatCompleter is configured.
  *
- * Environment variables (parsed by @constructive-io/graphql-env):
- *   CHAT_PROVIDER - Provider name ('ollama')
- *   CHAT_MODEL    - Model identifier (e.g. 'llama3')
- *   CHAT_BASE_URL - Provider base URL
+ * Environment variables (with defaults from env.ts):
+ *   CHAT_PROVIDER  - Provider name (default: 'ollama')
+ *   CHAT_MODEL     - Model identifier (default: 'llama3')
+ *   CHAT_BASE_URL  - Provider base URL (default: 'http://localhost:11434')
  */
 function buildChatCompleterFromEnv() {
-    const { llm } = (0, graphql_env_1.getEnvOptions)();
-    const provider = llm?.chat?.provider;
-    if (!provider)
-        return null;
-    return buildChatCompleter({
-        provider,
-        model: llm?.chat?.model,
-        baseUrl: llm?.chat?.baseUrl,
-    });
+    const { chat } = (0, env_1.getLlmEnvOptions)();
+    return buildChatCompleter(chat);
 }

package/config-cache.d.ts ADDED Viewed

@@ -0,0 +1,77 @@
+/**
+ * config-cache — Per-database LLM billing configuration cache
+ *
+ * Caches resolved billing function names per database_id.
+ * Uses an LRU cache with TTL so config changes propagate within a bounded window
+ * without requiring a server restart.
+ *
+ * Resolution flow:
+ *   Billing config from `metaschema_modules_public.billing_module`
+ *   (schema name + function names for record_usage, check_billing_quota)
+ *
+ * All queries run through the Graphile `withPgClient` callback, which gives us
+ * a client connected to the tenant database with proper role settings.
+ *
+ * The LLM module config (provider, model, etc.) is already resolved by the
+ * LlmModulePlugin at schema-build time. This cache handles the runtime-only
+ * billing piece.
+ */
+/**
+ * Generic pg client interface matching what Graphile's withPgClient provides.
+ * Avoids a hard dependency on the `pg` package.
+ */
+export interface PgClient {
+    query(sql: string, values?: unknown[]): Promise<{
+        rows: Record<string, unknown>[];
+    }>;
+}
+/**
+ * Billing function metadata resolved from the billing_module metaschema table.
+ */
+export interface BillingConfig {
+    /** Private schema containing the billing functions */
+    privateSchema: string;
+    /** Name of the record_usage function */
+    recordUsageFunction: string;
+    /** Name of the check_billing_quota function */
+    checkBillingQuotaFunction: string;
+    /** Public schema containing meters table */
+    publicSchema: string;
+}
+/**
+ * Inference log table metadata resolved from the inference_log_module.
+ */
+export interface InferenceLogConfig {
+    /** Schema containing the usage_log_inference table */
+    schema: string;
+    /** Name of the inference log table */
+    tableName: string;
+}
+/**
+ * Per-database cached configuration for the LLM billing integration.
+ */
+export interface LlmBillingCacheEntry {
+    /** Billing function references (null if billing_module not provisioned) */
+    billing: BillingConfig | null;
+    /** Inference log table references (null if inference_log_module not provisioned) */
+    inferenceLog: InferenceLogConfig | null;
+}
+/**
+ * Resolve billing config for a database.
+ * Results are cached per database_id with a 5-minute TTL.
+ *
+ * @param pgClient - A client connected to the tenant database (from withPgClient)
+ * @param databaseId - The database UUID
+ */
+export declare function getLlmBillingConfig(pgClient: PgClient, databaseId: string): Promise<LlmBillingCacheEntry>;
+/**
+ * Invalidate the cached config for a specific database (or all).
+ */
+export declare function invalidateLlmBillingConfig(databaseId?: string): void;
+/**
+ * Get cache stats for diagnostics.
+ */
+export declare function getLlmBillingCacheStats(): {
+    size: number;
+    max: number;
+};