npm - graphile-llm - Versions diffs - 0.8.0 → 0.10.0 - Mend

graphile-llm 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/__tests__/graphile-llm.test.js +81 -67
package/chat.js +37 -27
package/config-cache.js +4 -4
package/embedder.js +3 -1
package/env.js +6 -6
package/esm/__tests__/graphile-llm.test.js +81 -67
package/esm/chat.js +37 -24
package/esm/config-cache.js +4 -4
package/esm/embedder.js +3 -1
package/esm/env.js +6 -6
package/esm/index.d.ts +12 -12
package/esm/index.js +7 -11
package/esm/metering.d.ts +5 -5
package/esm/metering.js +60 -66
package/esm/plugins/agent-discovery-plugin.js +2 -2
package/esm/plugins/llm-module-plugin.d.ts +1 -1
package/esm/plugins/llm-module-plugin.js +5 -5
package/esm/plugins/metering-plugin.js +13 -13
package/esm/plugins/rag-plugin.js +20 -20
package/esm/plugins/text-mutation-plugin.js +12 -12
package/esm/plugins/text-search-plugin.js +10 -10
package/esm/preset.js +6 -6
package/esm/types.d.ts +39 -4
package/index.d.ts +12 -12
package/index.js +11 -15
package/metering.d.ts +5 -5
package/metering.js +60 -66
package/package.json +8 -8
package/plugins/agent-discovery-plugin.js +2 -2
package/plugins/llm-module-plugin.d.ts +1 -1
package/plugins/llm-module-plugin.js +5 -5
package/plugins/metering-plugin.js +13 -13
package/plugins/rag-plugin.js +20 -20
package/plugins/text-mutation-plugin.js +12 -12
package/plugins/text-search-plugin.js +10 -10
package/preset.js +6 -6
package/types.d.ts +39 -4

package/esm/__tests__/graphile-llm.test.js CHANGED Viewed

@@ -1,16 +1,16 @@
-import { join } from 'path';
 import OllamaClient from '@agentic-kit/ollama';
-import { getConnections, seed } from 'graphile-test';
 import { ConnectionFilterPreset } from 'graphile-connection-filter';
+import { createPgvectorAdapter } from 'graphile-search/adapters/pgvector';
 import { VectorCodecPlugin } from 'graphile-search/codecs/vector-codec';
 import { createUnifiedSearchPlugin } from 'graphile-search/plugin';
-import { createPgvectorAdapter } from 'graphile-search/adapters/pgvector';
+import { getConnections, seed } from 'graphile-test';
+import { join } from 'path';
+import { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from '../../src/chat';
+import { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from '../../src/embedder';
 import { createLlmModulePlugin } from '../../src/plugins/llm-module-plugin';
-import { createLlmTextSearchPlugin } from '../../src/plugins/text-search-plugin';
-import { createLlmTextMutationPlugin } from '../../src/plugins/text-mutation-plugin';
 import { createLlmRagPlugin } from '../../src/plugins/rag-plugin';
-import { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from '../../src/embedder';
-import { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from '../../src/chat';
+import { createLlmTextMutationPlugin } from '../../src/plugins/text-mutation-plugin';
+import { createLlmTextSearchPlugin } from '../../src/plugins/text-search-plugin';
 // ─── @agentic-kit/ollama client ─────────────────────────────────────────────
 const ollamaClient = new OllamaClient('http://localhost:11434');
 async function ensureNomicModel() {
@@ -30,14 +30,14 @@ describe('Embedder abstraction', () => {
             const embedder = buildEmbedder({
                 provider: 'ollama',
                 model: 'nomic-embed-text',
-                baseUrl: 'http://localhost:11434',
+                baseUrl: 'http://localhost:11434'
             });
             expect(embedder).not.toBeNull();
             expect(typeof embedder).toBe('function');
         });
         it('returns null for unknown provider', () => {
             const embedder = buildEmbedder({
-                provider: 'unknown-provider',
+                provider: 'unknown-provider'
             });
             expect(embedder).toBeNull();
         });
@@ -52,7 +52,7 @@ describe('Embedder abstraction', () => {
             const moduleData = {
                 embedding_provider: 'ollama',
                 embedding_model: 'nomic-embed-text',
-                embedding_base_url: 'http://localhost:11434',
+                embedding_base_url: 'http://localhost:11434'
             };
             const embedder = buildEmbedderFromModule(moduleData);
             expect(embedder).not.toBeNull();
@@ -60,7 +60,7 @@ describe('Embedder abstraction', () => {
         });
         it('returns null for unsupported provider in module data', () => {
             const moduleData = {
-                embedding_provider: 'unsupported',
+                embedding_provider: 'unsupported'
             };
             const embedder = buildEmbedderFromModule(moduleData);
             expect(embedder).toBeNull();
@@ -83,7 +83,7 @@ describe('Embedder abstraction', () => {
                 ...originalEnv,
                 EMBEDDER_PROVIDER: 'ollama',
                 EMBEDDER_MODEL: 'nomic-embed-text',
-                EMBEDDER_BASE_URL: 'http://localhost:11434',
+                EMBEDDER_BASE_URL: 'http://localhost:11434'
             };
             const embedder = buildEmbedderFromEnv();
             expect(embedder).not.toBeNull();
@@ -101,7 +101,7 @@ describe('graphile-llm schema enrichment', () => {
     let query;
     beforeAll(async () => {
         const unifiedPlugin = createUnifiedSearchPlugin({
-            adapters: [createPgvectorAdapter()],
+            adapters: [createPgvectorAdapter()]
         });
         const testPreset = {
             extends: [ConnectionFilterPreset()],
@@ -114,18 +114,18 @@ describe('graphile-llm schema enrichment', () => {
                     defaultEmbedder: {
                         provider: 'ollama',
                         model: 'nomic-embed-text',
-                        baseUrl: 'http://localhost:11434',
-                    },
+                        baseUrl: 'http://localhost:11434'
+                    }
                 }),
                 createLlmTextSearchPlugin(),
-                createLlmTextMutationPlugin(),
-            ],
+                createLlmTextMutationPlugin()
+            ]
         };
         const connections = await getConnections({
             schemas: ['llm_test'],
             preset: testPreset,
             useRoot: true,
-            authRole: 'postgres',
+            authRole: 'postgres'
         }, [seed.sqlfile([join(__dirname, './setup.sql')])]);
         db = connections.db;
         teardown = connections.teardown;
@@ -254,33 +254,36 @@ describe('graphile-llm with real Ollama embedding', () => {
         const embedder = buildEmbedder({
             provider: 'ollama',
             model: 'nomic-embed-text',
-            baseUrl: 'http://localhost:11434',
+            baseUrl: 'http://localhost:11434'
         });
         expect(embedder).not.toBeNull();
-        const vector = await embedder('Machine learning is transforming AI');
+        const result = await embedder('Machine learning is transforming AI');
         // nomic-embed-text produces 768-dimensional vectors
-        expect(Array.isArray(vector)).toBe(true);
-        expect(vector.length).toBe(768);
+        expect(Array.isArray(result.embedding)).toBe(true);
+        expect(result.embedding.length).toBe(768);
+        expect(result.promptTokens).toBeGreaterThan(0);
         // All elements should be numbers
-        for (const v of vector) {
+        for (const v of result.embedding) {
             expect(typeof v).toBe('number');
             expect(Number.isFinite(v)).toBe(true);
         }
         // Vector should not be all zeros
-        const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
+        const magnitude = Math.sqrt(result.embedding.reduce((sum, v) => sum + v * v, 0));
         expect(magnitude).toBeGreaterThan(0);
     });
     it('should produce different vectors for semantically different text', async () => {
         const embedder = buildEmbedder({
             provider: 'ollama',
             model: 'nomic-embed-text',
-            baseUrl: 'http://localhost:11434',
+            baseUrl: 'http://localhost:11434'
         });
         expect(embedder).not.toBeNull();
-        const [vecA, vecB] = await Promise.all([
+        const [resultA, resultB] = await Promise.all([
             embedder('Artificial intelligence and machine learning'),
-            embedder('Cooking recipes for Italian pasta dishes'),
+            embedder('Cooking recipes for Italian pasta dishes')
         ]);
+        const vecA = resultA.embedding;
+        const vecB = resultB.embedding;
         expect(vecA.length).toBe(768);
         expect(vecB.length).toBe(768);
         // Compute cosine similarity
@@ -300,13 +303,15 @@ describe('graphile-llm with real Ollama embedding', () => {
         const embedder = buildEmbedder({
             provider: 'ollama',
             model: 'nomic-embed-text',
-            baseUrl: 'http://localhost:11434',
+            baseUrl: 'http://localhost:11434'
         });
         expect(embedder).not.toBeNull();
-        const [vecA, vecB] = await Promise.all([
+        const [resultA, resultB] = await Promise.all([
             embedder('Machine learning and artificial intelligence'),
-            embedder('AI and ML are subfields of computer science'),
+            embedder('AI and ML are subfields of computer science')
         ]);
+        const vecA = resultA.embedding;
+        const vecB = resultB.embedding;
         expect(vecA.length).toBe(768);
         expect(vecB.length).toBe(768);
         // Compute cosine similarity
@@ -322,11 +327,14 @@ describe('graphile-llm with real Ollama embedding', () => {
         // Semantically similar texts should have high similarity
         expect(cosineSimilarity).toBeGreaterThan(0.5);
     });
-    it('should produce embeddings via @agentic-kit/ollama OllamaClient directly', async () => {
-        const vector = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
-        expect(Array.isArray(vector)).toBe(true);
-        expect(vector.length).toBe(768);
-        for (const v of vector) {
+    it('should produce embeddings with token count via @agentic-kit/ollama OllamaClient directly', async () => {
+        const result = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
+        expect(result).toHaveProperty('embedding');
+        expect(result).toHaveProperty('promptTokens');
+        expect(Array.isArray(result.embedding)).toBe(true);
+        expect(result.embedding.length).toBe(768);
+        expect(result.promptTokens).toBeGreaterThan(0);
+        for (const v of result.embedding) {
             expect(typeof v).toBe('number');
             expect(Number.isFinite(v)).toBe(true);
         }
@@ -341,14 +349,14 @@ describe('Chat completion abstraction', () => {
             const chat = buildChatCompleter({
                 provider: 'ollama',
                 model: 'llama3',
-                baseUrl: 'http://localhost:11434',
+                baseUrl: 'http://localhost:11434'
             });
             expect(chat).not.toBeNull();
             expect(typeof chat).toBe('function');
         });
         it('returns null for unknown provider', () => {
             const chat = buildChatCompleter({
-                provider: 'unknown-provider',
+                provider: 'unknown-provider'
             });
             expect(chat).toBeNull();
         });
@@ -364,7 +372,7 @@ describe('Chat completion abstraction', () => {
                 embedding_provider: 'ollama',
                 chat_provider: 'ollama',
                 chat_model: 'llama3',
-                chat_base_url: 'http://localhost:11434',
+                chat_base_url: 'http://localhost:11434'
             };
             const chat = buildChatCompleterFromModule(moduleData);
             expect(chat).not.toBeNull();
@@ -372,7 +380,7 @@ describe('Chat completion abstraction', () => {
         });
         it('returns null when chat_provider is not set', () => {
             const moduleData = {
-                embedding_provider: 'ollama',
+                embedding_provider: 'ollama'
             };
             const chat = buildChatCompleterFromModule(moduleData);
             expect(chat).toBeNull();
@@ -395,7 +403,7 @@ describe('Chat completion abstraction', () => {
                 ...originalEnv,
                 CHAT_PROVIDER: 'ollama',
                 CHAT_MODEL: 'llama3',
-                CHAT_BASE_URL: 'http://localhost:11434',
+                CHAT_BASE_URL: 'http://localhost:11434'
             };
             const chat = buildChatCompleterFromEnv();
             expect(chat).not.toBeNull();
@@ -434,10 +442,10 @@ function makeTestSmartTagsPlugin(tagsByTable) {
                             Object.assign(c.extensions.tags, tags);
                         }
                         return _;
-                    },
-                },
-            },
-        },
+                    }
+                }
+            }
+        }
     };
 }
 describe('RAG plugin schema enrichment', () => {
@@ -446,7 +454,7 @@ describe('RAG plugin schema enrichment', () => {
     let query;
     beforeAll(async () => {
         const unifiedPlugin = createUnifiedSearchPlugin({
-            adapters: [createPgvectorAdapter()],
+            adapters: [createPgvectorAdapter()]
         });
         const smartTagsPlugin = makeTestSmartTagsPlugin({
             articles: {
@@ -455,16 +463,22 @@ describe('RAG plugin schema enrichment', () => {
                     parentFk: 'parent_id',
                     parentPk: 'id',
                     embeddingField: 'embedding',
-                    contentField: 'content',
-                },
-            },
+                    contentField: 'content'
+                }
+            }
+        });
+        // Mock embedder that returns a fixed 3-dim vector with token count
+        const mockEmbedder = async (_text) => ({
+            embedding: [1, 0, 0],
+            promptTokens: 5
         });
-        // Mock embedder that returns a fixed 3-dim vector
-        const mockEmbedder = async (_text) => [1, 0, 0];
-        // Mock chat completer that returns a canned response
+        // Mock chat completer that returns a canned response with usage
         const mockChatCompleter = async (messages) => {
             const userMessage = messages.find((m) => m.role === 'user');
-            return `Mock answer for: ${userMessage?.content || 'unknown'}`;
+            return {
+                content: `Mock answer for: ${userMessage?.content || 'unknown'}`,
+                usage: { input: 10, output: 15, reasoning: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 25 }
+            };
         };
         const testPreset = {
             extends: [ConnectionFilterPreset()],
@@ -476,13 +490,13 @@ describe('RAG plugin schema enrichment', () => {
                     defaultEmbedder: {
                         provider: 'ollama',
                         model: 'nomic-embed-text',
-                        baseUrl: 'http://localhost:11434',
-                    },
+                        baseUrl: 'http://localhost:11434'
+                    }
                 }),
                 createLlmTextSearchPlugin(),
                 createLlmTextMutationPlugin(),
-                createLlmRagPlugin(),
-            ],
+                createLlmRagPlugin()
+            ]
         };
         // Override the embedder and chat completer on the build context
         // by wrapping the LlmModulePlugin's build hook
@@ -495,20 +509,20 @@ describe('RAG plugin schema enrichment', () => {
                     build(build) {
                         return build.extend(build, {
                             llmEmbedder: mockEmbedder,
-                            llmChatCompleter: mockChatCompleter,
+                            llmChatCompleter: mockChatCompleter
                         }, 'TestOverridePlugin overriding embedder and chat completer');
-                    },
-                },
-            },
+                    }
+                }
+            }
         };
         const connections = await getConnections({
             schemas: ['llm_test'],
             preset: {
                 ...testPreset,
-                plugins: [...testPreset.plugins, overridePlugin],
+                plugins: [...testPreset.plugins, overridePlugin]
             },
             useRoot: true,
-            authRole: 'postgres',
+            authRole: 'postgres'
         }, [seed.sqlfile([join(__dirname, './setup.sql')])]);
         db = connections.db;
         teardown = connections.teardown;
@@ -632,7 +646,7 @@ describe('GraphileLlmPreset toggles', () => {
     it('enableRag=false excludes RAG plugin (no ragQuery field)', async () => {
         const { GraphileLlmPreset } = await import('../../src/preset');
         const preset = GraphileLlmPreset({
-            enableRag: false,
+            enableRag: false
         });
         const pluginNames = preset.plugins.map((p) => p.name);
         expect(pluginNames).not.toContain('LlmRagPlugin');
@@ -640,7 +654,7 @@ describe('GraphileLlmPreset toggles', () => {
     it('enableRag=true includes RAG plugin', async () => {
         const { GraphileLlmPreset } = await import('../../src/preset');
         const preset = GraphileLlmPreset({
-            enableRag: true,
+            enableRag: true
         });
         const pluginNames = preset.plugins.map((p) => p.name);
         expect(pluginNames).toContain('LlmRagPlugin');
@@ -648,7 +662,7 @@ describe('GraphileLlmPreset toggles', () => {
     it('enableTextSearch=false excludes text search plugin', async () => {
         const { GraphileLlmPreset } = await import('../../src/preset');
         const preset = GraphileLlmPreset({
-            enableTextSearch: false,
+            enableTextSearch: false
         });
         const pluginNames = preset.plugins.map((p) => p.name);
         expect(pluginNames).not.toContain('LlmTextSearchPlugin');
@@ -658,7 +672,7 @@ describe('GraphileLlmPreset toggles', () => {
     it('enableTextMutations=false excludes text mutation plugin', async () => {
         const { GraphileLlmPreset } = await import('../../src/preset');
         const preset = GraphileLlmPreset({
-            enableTextMutations: false,
+            enableTextMutations: false
         });
         const pluginNames = preset.plugins.map((p) => p.name);
         expect(pluginNames).not.toContain('LlmTextMutationPlugin');
@@ -668,7 +682,7 @@ describe('GraphileLlmPreset toggles', () => {
         const preset = GraphileLlmPreset({
             enableTextSearch: false,
             enableTextMutations: false,
-            enableRag: false,
+            enableRag: false
         });
         const pluginNames = preset.plugins.map((p) => p.name);
         expect(pluginNames).toEqual(['LlmModulePlugin']);

package/esm/chat.js CHANGED Viewed

@@ -11,38 +11,51 @@
  *   2. The preset's `defaultChatCompleter` option (fallback for dev/testing)
  *   3. Environment variables (CHAT_PROVIDER, CHAT_MODEL, CHAT_BASE_URL)
  */
-import OllamaClient from '@agentic-kit/ollama';
+import { OllamaAdapter } from '@agentic-kit/ollama';
 import { getLlmEnvOptions } from './env';
 // ─── Built-in Providers ─────────────────────────────────────────────────────
 /**
  * Create an Ollama-based chat completion function.
  *
- * Uses OllamaClient.generate() with a messages array, which internally
- * routes to the /api/chat endpoint.
+ * Uses OllamaAdapter.stream() to get both response content and real token
+ * usage counts from the provider (prompt_eval_count, eval_count).
  */
 function createOllamaChatCompleter(baseUrl = 'http://localhost:11434', model = 'llama3') {
-    const client = new OllamaClient(baseUrl);
+    const adapter = new OllamaAdapter(baseUrl);
     return async (messages, options) => {
-        // Build the input for OllamaClient.generate() in chat mode
-        const input = {
-            model,
-            messages: messages.filter((m) => m.role !== 'system'),
-        };
-        // Extract system message if present
         const systemMsg = messages.find((m) => m.role === 'system');
-        if (systemMsg) {
-            input.system = systemMsg.content;
-        }
-        if (options?.temperature !== undefined) {
-            input.temperature = options.temperature;
-        }
-        const startTime = Date.now();
-        const response = await client.generate(input);
-        const latencyMs = Date.now() - startTime;
-        // Token count logging (metering deferred to billing system)
-        console.log(`[graphile-llm] Chat completion: model=${model}, latency=${latencyMs}ms, ` +
-            `messages=${messages.length}`);
-        return response;
+        const nonSystem = messages.filter((m) => m.role !== 'system');
+        const modelDesc = adapter.createModel(model, {
+            maxOutputTokens: options?.maxTokens
+        });
+        const context = {
+            systemPrompt: systemMsg?.content,
+            messages: nonSystem.map((m) => ({
+                role: m.role,
+                content: m.content,
+                timestamp: Date.now()
+            }))
+        };
+        const stream = adapter.stream(modelDesc, context, {
+            temperature: options?.temperature,
+            maxTokens: options?.maxTokens
+        });
+        const result = await stream.result();
+        const content = result.content
+            .filter((block) => block.type === 'text')
+            .map((block) => block.text)
+            .join('');
+        return {
+            content,
+            usage: {
+                input: result.usage.input,
+                output: result.usage.output,
+                reasoning: result.usage.reasoning,
+                cacheRead: result.usage.cacheRead,
+                cacheWrite: result.usage.cacheWrite,
+                totalTokens: result.usage.totalTokens
+            }
+        };
     };
 }
 // ─── Chat Completer Construction ────────────────────────────────────────────
@@ -73,7 +86,7 @@ export function buildChatCompleterFromModule(data) {
     return buildChatCompleter({
         provider: data.chat_provider,
         model: data.chat_model,
-        baseUrl: data.chat_base_url,
+        baseUrl: data.chat_base_url
     });
 }
 /**

package/esm/config-cache.js CHANGED Viewed

@@ -51,7 +51,7 @@ const INFERENCE_LOG_MODULE_SQL = `
 const billingCache = new ModuleConfigCache({
     name: 'billing-config',
     ttlMs: 5 * 60 * 1000, // 5 minutes
-    max: 50,
+    max: 50
 });
 // ─── Resolution Functions ───────────────────────────────────────────────────
 /**
@@ -72,7 +72,7 @@ async function resolveInferenceLogConfig(pgClient, databaseId) {
             return null;
         return {
             schema: row.schema,
-            tableName: row.table_name,
+            tableName: row.table_name
         };
     }
     catch {
@@ -96,7 +96,7 @@ async function resolveBillingConfig(pgClient, databaseId) {
             privateSchema: row.private_schema,
             recordUsageFunction: row.record_usage_function,
             // The check_billing_quota function name follows the inflection pattern
-            checkBillingQuotaFunction: 'check_billing_quota',
+            checkBillingQuotaFunction: 'check_billing_quota'
         };
     }
     catch {
@@ -118,7 +118,7 @@ export async function getLlmBillingConfig(pgClient, databaseId) {
         return cached;
     const [billing, inferenceLog] = await Promise.all([
         resolveBillingConfig(pgClient, databaseId),
-        resolveInferenceLogConfig(pgClient, databaseId),
+        resolveInferenceLogConfig(pgClient, databaseId)
     ]);
     const entry = { billing, inferenceLog };
     billingCache.set(databaseId, entry);

package/esm/embedder.js CHANGED Viewed

@@ -14,6 +14,8 @@ import { getLlmEnvOptions } from './env';
 // ─── Built-in Providers ─────────────────────────────────────────────────────
 /**
  * Create an Ollama-based embedder function.
+ *
+ * Uses the /api/embed endpoint which returns prompt_eval_count (real token count).
  */
 function createOllamaEmbedder(baseUrl = 'http://localhost:11434', model = 'nomic-embed-text') {
     const client = new OllamaClient(baseUrl);
@@ -47,7 +49,7 @@ export function buildEmbedderFromModule(data) {
     return buildEmbedder({
         provider: data.embedding_provider,
         model: data.embedding_model,
-        baseUrl: data.embedding_base_url,
+        baseUrl: data.embedding_base_url
     });
 }
 /**

package/esm/env.js CHANGED Viewed

@@ -18,13 +18,13 @@ const LLM_DEFAULTS = {
     embedding: {
         provider: 'ollama',
         model: 'nomic-embed-text',
-        baseUrl: 'http://localhost:11434',
+        baseUrl: 'http://localhost:11434'
     },
     chat: {
         provider: 'ollama',
         model: 'llama3',
-        baseUrl: 'http://localhost:11434',
-    },
+        baseUrl: 'http://localhost:11434'
+    }
 };
 // ─── Resolution ─────────────────────────────────────────────────────────────
 /**
@@ -38,12 +38,12 @@ export function getLlmEnvOptions() {
         embedding: {
             provider: process.env.EMBEDDER_PROVIDER ?? LLM_DEFAULTS.embedding.provider,
             model: process.env.EMBEDDER_MODEL ?? LLM_DEFAULTS.embedding.model,
-            baseUrl: process.env.EMBEDDER_BASE_URL ?? LLM_DEFAULTS.embedding.baseUrl,
+            baseUrl: process.env.EMBEDDER_BASE_URL ?? LLM_DEFAULTS.embedding.baseUrl
         },
         chat: {
             provider: process.env.CHAT_PROVIDER ?? LLM_DEFAULTS.chat.provider,
             model: process.env.CHAT_MODEL ?? LLM_DEFAULTS.chat.model,
-            baseUrl: process.env.CHAT_BASE_URL ?? LLM_DEFAULTS.chat.baseUrl,
-        },
+            baseUrl: process.env.CHAT_BASE_URL ?? LLM_DEFAULTS.chat.baseUrl
+        }
     };
 }

package/esm/index.d.ts CHANGED Viewed

@@ -29,20 +29,20 @@
  * };
  * ```
  */
-export { getLlmEnvOptions } from './env';
 export type { LlmEnvOptions, LlmProviderConfig } from './env';
+export { getLlmEnvOptions } from './env';
 export { GraphileLlmPreset } from './preset';
 export { createLlmModulePlugin } from './plugins/llm-module-plugin';
-export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
-export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
 export { createLlmRagPlugin } from './plugins/rag-plugin';
+export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
+export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
 export { createLlmMeteringPlugin } from './plugins/metering-plugin';
-export { getAgentDiscovery, clearAgentDiscoveryCache } from './plugins/agent-discovery-plugin';
-export type { AgentTableInfo, AgentDiscovery } from './plugins/agent-discovery-plugin';
-export { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from './embedder';
-export { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from './chat';
-export { meteredEmbed, meteredChat, logInferenceUsage, QuotaExceededError } from './metering';
-export type { MeteringContext, MeteringOptions, MeterResult, WithPgClient, InferenceLogEntry } from './metering';
-export { getLlmBillingConfig, invalidateLlmBillingConfig, getLlmBillingCacheStats, } from './config-cache';
-export type { BillingConfig, LlmBillingCacheEntry, InferenceLogConfig, PgClient } from './config-cache';
-export type { EmbedderFunction, EmbedderConfig, ChatFunction, ChatConfig, ChatMessage, ChatOptions, LlmModuleData, GraphileLlmOptions, MeteringConfig, RagDefaults, ChunkTableInfo, } from './types';
+export type { AgentDiscovery, AgentTableInfo } from './plugins/agent-discovery-plugin';
+export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
+export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
+export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
+export type { InferenceLogEntry, MeteringContext, MeteringOptions, MeterResult, WithPgClient } from './metering';
+export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
+export type { BillingConfig, InferenceLogConfig, LlmBillingCacheEntry, PgClient } from './config-cache';
+export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';
+export type { ChatConfig, ChatFunction, ChatMessage, ChatOptions, ChatResult, ChunkTableInfo, EmbedderConfig, EmbedderFunction, EmbeddingResult, GraphileLlmOptions, LlmModuleData, LlmUsage, MeteringConfig, RagDefaults } from './types';

package/esm/index.js CHANGED Viewed

@@ -29,24 +29,20 @@
  * };
  * ```
  */
-// Environment configuration (single source of truth for LLM defaults)
 export { getLlmEnvOptions } from './env';
 // Preset (recommended entry point)
 export { GraphileLlmPreset } from './preset';
 // Individual plugins (pure — no billing dependency)
 export { createLlmModulePlugin } from './plugins/llm-module-plugin';
-export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
-export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
 export { createLlmRagPlugin } from './plugins/rag-plugin';
+export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
+export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
 // Metering plugin (opt-in billing integration)
 export { createLlmMeteringPlugin } from './plugins/metering-plugin';
-// Agent discovery (queries agent_chat_module config table at runtime)
-export { getAgentDiscovery, clearAgentDiscoveryCache } from './plugins/agent-discovery-plugin';
+export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
 // Embedder utilities
-export { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from './embedder';
+export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
 // Chat completion utilities
-export { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from './chat';
-// Metering utilities (for custom integration)
-export { meteredEmbed, meteredChat, logInferenceUsage, QuotaExceededError } from './metering';
-// Config cache (for custom integration)
-export { getLlmBillingConfig, invalidateLlmBillingConfig, getLlmBillingCacheStats, } from './config-cache';
+export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
+export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
+export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';

package/esm/metering.d.ts CHANGED Viewed

@@ -9,16 +9,16 @@
  * When the quota check fails, the wrapper returns null (graceful degradation)
  * instead of throwing, so the search pipeline can fall back to text-only.
  *
- * Token counts are estimated from text length (~4 chars per token). No
- * tokenizer needed — the billing system uses tokens as abstract units
- * and the credit_cost on each model's meter normalizes the relative expense.
+ * Token counts:
+ *   - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
+ *   - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
  *
  * The billing functions live in the tenant database and are called via the
  * Graphile `withPgClient` callback. Function locations (schema, names) are
  * resolved from `billing_module` metaschema and cached by `config-cache.ts`.
  */
-import type { PgClient, BillingConfig, InferenceLogConfig } from './config-cache';
-import type { EmbedderFunction, ChatFunction, ChatMessage, ChatOptions } from './types';
+import type { BillingConfig, InferenceLogConfig, PgClient } from './config-cache';
+import type { ChatFunction, ChatMessage, ChatOptions, EmbedderFunction } from './types';
 /**
  * Callback matching Graphile's withPgClient signature.
  * Acquires a pg client, calls the callback, then releases the client.