graphile-llm 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,19 +36,19 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
36
36
  return (mod && mod.__esModule) ? mod : { "default": mod };
37
37
  };
38
38
  Object.defineProperty(exports, "__esModule", { value: true });
39
- const path_1 = require("path");
40
39
  const ollama_1 = __importDefault(require("@agentic-kit/ollama"));
41
- const graphile_test_1 = require("graphile-test");
42
40
  const graphile_connection_filter_1 = require("graphile-connection-filter");
41
+ const pgvector_1 = require("graphile-search/adapters/pgvector");
43
42
  const vector_codec_1 = require("graphile-search/codecs/vector-codec");
44
43
  const plugin_1 = require("graphile-search/plugin");
45
- const pgvector_1 = require("graphile-search/adapters/pgvector");
44
+ const graphile_test_1 = require("graphile-test");
45
+ const path_1 = require("path");
46
+ const chat_1 = require("../../src/chat");
47
+ const embedder_1 = require("../../src/embedder");
46
48
  const llm_module_plugin_1 = require("../../src/plugins/llm-module-plugin");
47
- const text_search_plugin_1 = require("../../src/plugins/text-search-plugin");
48
- const text_mutation_plugin_1 = require("../../src/plugins/text-mutation-plugin");
49
49
  const rag_plugin_1 = require("../../src/plugins/rag-plugin");
50
- const embedder_1 = require("../../src/embedder");
51
- const chat_1 = require("../../src/chat");
50
+ const text_mutation_plugin_1 = require("../../src/plugins/text-mutation-plugin");
51
+ const text_search_plugin_1 = require("../../src/plugins/text-search-plugin");
52
52
  // ─── @agentic-kit/ollama client ─────────────────────────────────────────────
53
53
  const ollamaClient = new ollama_1.default('http://localhost:11434');
54
54
  async function ensureNomicModel() {
@@ -68,14 +68,14 @@ describe('Embedder abstraction', () => {
68
68
  const embedder = (0, embedder_1.buildEmbedder)({
69
69
  provider: 'ollama',
70
70
  model: 'nomic-embed-text',
71
- baseUrl: 'http://localhost:11434',
71
+ baseUrl: 'http://localhost:11434'
72
72
  });
73
73
  expect(embedder).not.toBeNull();
74
74
  expect(typeof embedder).toBe('function');
75
75
  });
76
76
  it('returns null for unknown provider', () => {
77
77
  const embedder = (0, embedder_1.buildEmbedder)({
78
- provider: 'unknown-provider',
78
+ provider: 'unknown-provider'
79
79
  });
80
80
  expect(embedder).toBeNull();
81
81
  });
@@ -90,7 +90,7 @@ describe('Embedder abstraction', () => {
90
90
  const moduleData = {
91
91
  embedding_provider: 'ollama',
92
92
  embedding_model: 'nomic-embed-text',
93
- embedding_base_url: 'http://localhost:11434',
93
+ embedding_base_url: 'http://localhost:11434'
94
94
  };
95
95
  const embedder = (0, embedder_1.buildEmbedderFromModule)(moduleData);
96
96
  expect(embedder).not.toBeNull();
@@ -98,7 +98,7 @@ describe('Embedder abstraction', () => {
98
98
  });
99
99
  it('returns null for unsupported provider in module data', () => {
100
100
  const moduleData = {
101
- embedding_provider: 'unsupported',
101
+ embedding_provider: 'unsupported'
102
102
  };
103
103
  const embedder = (0, embedder_1.buildEmbedderFromModule)(moduleData);
104
104
  expect(embedder).toBeNull();
@@ -121,7 +121,7 @@ describe('Embedder abstraction', () => {
121
121
  ...originalEnv,
122
122
  EMBEDDER_PROVIDER: 'ollama',
123
123
  EMBEDDER_MODEL: 'nomic-embed-text',
124
- EMBEDDER_BASE_URL: 'http://localhost:11434',
124
+ EMBEDDER_BASE_URL: 'http://localhost:11434'
125
125
  };
126
126
  const embedder = (0, embedder_1.buildEmbedderFromEnv)();
127
127
  expect(embedder).not.toBeNull();
@@ -139,7 +139,7 @@ describe('graphile-llm schema enrichment', () => {
139
139
  let query;
140
140
  beforeAll(async () => {
141
141
  const unifiedPlugin = (0, plugin_1.createUnifiedSearchPlugin)({
142
- adapters: [(0, pgvector_1.createPgvectorAdapter)()],
142
+ adapters: [(0, pgvector_1.createPgvectorAdapter)()]
143
143
  });
144
144
  const testPreset = {
145
145
  extends: [(0, graphile_connection_filter_1.ConnectionFilterPreset)()],
@@ -152,18 +152,18 @@ describe('graphile-llm schema enrichment', () => {
152
152
  defaultEmbedder: {
153
153
  provider: 'ollama',
154
154
  model: 'nomic-embed-text',
155
- baseUrl: 'http://localhost:11434',
156
- },
155
+ baseUrl: 'http://localhost:11434'
156
+ }
157
157
  }),
158
158
  (0, text_search_plugin_1.createLlmTextSearchPlugin)(),
159
- (0, text_mutation_plugin_1.createLlmTextMutationPlugin)(),
160
- ],
159
+ (0, text_mutation_plugin_1.createLlmTextMutationPlugin)()
160
+ ]
161
161
  };
162
162
  const connections = await (0, graphile_test_1.getConnections)({
163
163
  schemas: ['llm_test'],
164
164
  preset: testPreset,
165
165
  useRoot: true,
166
- authRole: 'postgres',
166
+ authRole: 'postgres'
167
167
  }, [graphile_test_1.seed.sqlfile([(0, path_1.join)(__dirname, './setup.sql')])]);
168
168
  db = connections.db;
169
169
  teardown = connections.teardown;
@@ -292,33 +292,36 @@ describe('graphile-llm with real Ollama embedding', () => {
292
292
  const embedder = (0, embedder_1.buildEmbedder)({
293
293
  provider: 'ollama',
294
294
  model: 'nomic-embed-text',
295
- baseUrl: 'http://localhost:11434',
295
+ baseUrl: 'http://localhost:11434'
296
296
  });
297
297
  expect(embedder).not.toBeNull();
298
- const vector = await embedder('Machine learning is transforming AI');
298
+ const result = await embedder('Machine learning is transforming AI');
299
299
  // nomic-embed-text produces 768-dimensional vectors
300
- expect(Array.isArray(vector)).toBe(true);
301
- expect(vector.length).toBe(768);
300
+ expect(Array.isArray(result.embedding)).toBe(true);
301
+ expect(result.embedding.length).toBe(768);
302
+ expect(result.promptTokens).toBeGreaterThan(0);
302
303
  // All elements should be numbers
303
- for (const v of vector) {
304
+ for (const v of result.embedding) {
304
305
  expect(typeof v).toBe('number');
305
306
  expect(Number.isFinite(v)).toBe(true);
306
307
  }
307
308
  // Vector should not be all zeros
308
- const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
309
+ const magnitude = Math.sqrt(result.embedding.reduce((sum, v) => sum + v * v, 0));
309
310
  expect(magnitude).toBeGreaterThan(0);
310
311
  });
311
312
  it('should produce different vectors for semantically different text', async () => {
312
313
  const embedder = (0, embedder_1.buildEmbedder)({
313
314
  provider: 'ollama',
314
315
  model: 'nomic-embed-text',
315
- baseUrl: 'http://localhost:11434',
316
+ baseUrl: 'http://localhost:11434'
316
317
  });
317
318
  expect(embedder).not.toBeNull();
318
- const [vecA, vecB] = await Promise.all([
319
+ const [resultA, resultB] = await Promise.all([
319
320
  embedder('Artificial intelligence and machine learning'),
320
- embedder('Cooking recipes for Italian pasta dishes'),
321
+ embedder('Cooking recipes for Italian pasta dishes')
321
322
  ]);
323
+ const vecA = resultA.embedding;
324
+ const vecB = resultB.embedding;
322
325
  expect(vecA.length).toBe(768);
323
326
  expect(vecB.length).toBe(768);
324
327
  // Compute cosine similarity
@@ -338,13 +341,15 @@ describe('graphile-llm with real Ollama embedding', () => {
338
341
  const embedder = (0, embedder_1.buildEmbedder)({
339
342
  provider: 'ollama',
340
343
  model: 'nomic-embed-text',
341
- baseUrl: 'http://localhost:11434',
344
+ baseUrl: 'http://localhost:11434'
342
345
  });
343
346
  expect(embedder).not.toBeNull();
344
- const [vecA, vecB] = await Promise.all([
347
+ const [resultA, resultB] = await Promise.all([
345
348
  embedder('Machine learning and artificial intelligence'),
346
- embedder('AI and ML are subfields of computer science'),
349
+ embedder('AI and ML are subfields of computer science')
347
350
  ]);
351
+ const vecA = resultA.embedding;
352
+ const vecB = resultB.embedding;
348
353
  expect(vecA.length).toBe(768);
349
354
  expect(vecB.length).toBe(768);
350
355
  // Compute cosine similarity
@@ -360,11 +365,14 @@ describe('graphile-llm with real Ollama embedding', () => {
360
365
  // Semantically similar texts should have high similarity
361
366
  expect(cosineSimilarity).toBeGreaterThan(0.5);
362
367
  });
363
- it('should produce embeddings via @agentic-kit/ollama OllamaClient directly', async () => {
364
- const vector = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
365
- expect(Array.isArray(vector)).toBe(true);
366
- expect(vector.length).toBe(768);
367
- for (const v of vector) {
368
+ it('should produce embeddings with token count via @agentic-kit/ollama OllamaClient directly', async () => {
369
+ const result = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
370
+ expect(result).toHaveProperty('embedding');
371
+ expect(result).toHaveProperty('promptTokens');
372
+ expect(Array.isArray(result.embedding)).toBe(true);
373
+ expect(result.embedding.length).toBe(768);
374
+ expect(result.promptTokens).toBeGreaterThan(0);
375
+ for (const v of result.embedding) {
368
376
  expect(typeof v).toBe('number');
369
377
  expect(Number.isFinite(v)).toBe(true);
370
378
  }
@@ -379,14 +387,14 @@ describe('Chat completion abstraction', () => {
379
387
  const chat = (0, chat_1.buildChatCompleter)({
380
388
  provider: 'ollama',
381
389
  model: 'llama3',
382
- baseUrl: 'http://localhost:11434',
390
+ baseUrl: 'http://localhost:11434'
383
391
  });
384
392
  expect(chat).not.toBeNull();
385
393
  expect(typeof chat).toBe('function');
386
394
  });
387
395
  it('returns null for unknown provider', () => {
388
396
  const chat = (0, chat_1.buildChatCompleter)({
389
- provider: 'unknown-provider',
397
+ provider: 'unknown-provider'
390
398
  });
391
399
  expect(chat).toBeNull();
392
400
  });
@@ -402,7 +410,7 @@ describe('Chat completion abstraction', () => {
402
410
  embedding_provider: 'ollama',
403
411
  chat_provider: 'ollama',
404
412
  chat_model: 'llama3',
405
- chat_base_url: 'http://localhost:11434',
413
+ chat_base_url: 'http://localhost:11434'
406
414
  };
407
415
  const chat = (0, chat_1.buildChatCompleterFromModule)(moduleData);
408
416
  expect(chat).not.toBeNull();
@@ -410,7 +418,7 @@ describe('Chat completion abstraction', () => {
410
418
  });
411
419
  it('returns null when chat_provider is not set', () => {
412
420
  const moduleData = {
413
- embedding_provider: 'ollama',
421
+ embedding_provider: 'ollama'
414
422
  };
415
423
  const chat = (0, chat_1.buildChatCompleterFromModule)(moduleData);
416
424
  expect(chat).toBeNull();
@@ -433,7 +441,7 @@ describe('Chat completion abstraction', () => {
433
441
  ...originalEnv,
434
442
  CHAT_PROVIDER: 'ollama',
435
443
  CHAT_MODEL: 'llama3',
436
- CHAT_BASE_URL: 'http://localhost:11434',
444
+ CHAT_BASE_URL: 'http://localhost:11434'
437
445
  };
438
446
  const chat = (0, chat_1.buildChatCompleterFromEnv)();
439
447
  expect(chat).not.toBeNull();
@@ -472,10 +480,10 @@ function makeTestSmartTagsPlugin(tagsByTable) {
472
480
  Object.assign(c.extensions.tags, tags);
473
481
  }
474
482
  return _;
475
- },
476
- },
477
- },
478
- },
483
+ }
484
+ }
485
+ }
486
+ }
479
487
  };
480
488
  }
481
489
  describe('RAG plugin schema enrichment', () => {
@@ -484,7 +492,7 @@ describe('RAG plugin schema enrichment', () => {
484
492
  let query;
485
493
  beforeAll(async () => {
486
494
  const unifiedPlugin = (0, plugin_1.createUnifiedSearchPlugin)({
487
- adapters: [(0, pgvector_1.createPgvectorAdapter)()],
495
+ adapters: [(0, pgvector_1.createPgvectorAdapter)()]
488
496
  });
489
497
  const smartTagsPlugin = makeTestSmartTagsPlugin({
490
498
  articles: {
@@ -493,16 +501,22 @@ describe('RAG plugin schema enrichment', () => {
493
501
  parentFk: 'parent_id',
494
502
  parentPk: 'id',
495
503
  embeddingField: 'embedding',
496
- contentField: 'content',
497
- },
498
- },
504
+ contentField: 'content'
505
+ }
506
+ }
499
507
  });
500
- // Mock embedder that returns a fixed 3-dim vector
501
- const mockEmbedder = async (_text) => [1, 0, 0];
502
- // Mock chat completer that returns a canned response
508
+ // Mock embedder that returns a fixed 3-dim vector with token count
509
+ const mockEmbedder = async (_text) => ({
510
+ embedding: [1, 0, 0],
511
+ promptTokens: 5
512
+ });
513
+ // Mock chat completer that returns a canned response with usage
503
514
  const mockChatCompleter = async (messages) => {
504
515
  const userMessage = messages.find((m) => m.role === 'user');
505
- return `Mock answer for: ${userMessage?.content || 'unknown'}`;
516
+ return {
517
+ content: `Mock answer for: ${userMessage?.content || 'unknown'}`,
518
+ usage: { input: 10, output: 15, reasoning: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 25 }
519
+ };
506
520
  };
507
521
  const testPreset = {
508
522
  extends: [(0, graphile_connection_filter_1.ConnectionFilterPreset)()],
@@ -514,13 +528,13 @@ describe('RAG plugin schema enrichment', () => {
514
528
  defaultEmbedder: {
515
529
  provider: 'ollama',
516
530
  model: 'nomic-embed-text',
517
- baseUrl: 'http://localhost:11434',
518
- },
531
+ baseUrl: 'http://localhost:11434'
532
+ }
519
533
  }),
520
534
  (0, text_search_plugin_1.createLlmTextSearchPlugin)(),
521
535
  (0, text_mutation_plugin_1.createLlmTextMutationPlugin)(),
522
- (0, rag_plugin_1.createLlmRagPlugin)(),
523
- ],
536
+ (0, rag_plugin_1.createLlmRagPlugin)()
537
+ ]
524
538
  };
525
539
  // Override the embedder and chat completer on the build context
526
540
  // by wrapping the LlmModulePlugin's build hook
@@ -533,20 +547,20 @@ describe('RAG plugin schema enrichment', () => {
533
547
  build(build) {
534
548
  return build.extend(build, {
535
549
  llmEmbedder: mockEmbedder,
536
- llmChatCompleter: mockChatCompleter,
550
+ llmChatCompleter: mockChatCompleter
537
551
  }, 'TestOverridePlugin overriding embedder and chat completer');
538
- },
539
- },
540
- },
552
+ }
553
+ }
554
+ }
541
555
  };
542
556
  const connections = await (0, graphile_test_1.getConnections)({
543
557
  schemas: ['llm_test'],
544
558
  preset: {
545
559
  ...testPreset,
546
- plugins: [...testPreset.plugins, overridePlugin],
560
+ plugins: [...testPreset.plugins, overridePlugin]
547
561
  },
548
562
  useRoot: true,
549
- authRole: 'postgres',
563
+ authRole: 'postgres'
550
564
  }, [graphile_test_1.seed.sqlfile([(0, path_1.join)(__dirname, './setup.sql')])]);
551
565
  db = connections.db;
552
566
  teardown = connections.teardown;
@@ -670,7 +684,7 @@ describe('GraphileLlmPreset toggles', () => {
670
684
  it('enableRag=false excludes RAG plugin (no ragQuery field)', async () => {
671
685
  const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
672
686
  const preset = GraphileLlmPreset({
673
- enableRag: false,
687
+ enableRag: false
674
688
  });
675
689
  const pluginNames = preset.plugins.map((p) => p.name);
676
690
  expect(pluginNames).not.toContain('LlmRagPlugin');
@@ -678,7 +692,7 @@ describe('GraphileLlmPreset toggles', () => {
678
692
  it('enableRag=true includes RAG plugin', async () => {
679
693
  const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
680
694
  const preset = GraphileLlmPreset({
681
- enableRag: true,
695
+ enableRag: true
682
696
  });
683
697
  const pluginNames = preset.plugins.map((p) => p.name);
684
698
  expect(pluginNames).toContain('LlmRagPlugin');
@@ -686,7 +700,7 @@ describe('GraphileLlmPreset toggles', () => {
686
700
  it('enableTextSearch=false excludes text search plugin', async () => {
687
701
  const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
688
702
  const preset = GraphileLlmPreset({
689
- enableTextSearch: false,
703
+ enableTextSearch: false
690
704
  });
691
705
  const pluginNames = preset.plugins.map((p) => p.name);
692
706
  expect(pluginNames).not.toContain('LlmTextSearchPlugin');
@@ -696,7 +710,7 @@ describe('GraphileLlmPreset toggles', () => {
696
710
  it('enableTextMutations=false excludes text mutation plugin', async () => {
697
711
  const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
698
712
  const preset = GraphileLlmPreset({
699
- enableTextMutations: false,
713
+ enableTextMutations: false
700
714
  });
701
715
  const pluginNames = preset.plugins.map((p) => p.name);
702
716
  expect(pluginNames).not.toContain('LlmTextMutationPlugin');
@@ -706,7 +720,7 @@ describe('GraphileLlmPreset toggles', () => {
706
720
  const preset = GraphileLlmPreset({
707
721
  enableTextSearch: false,
708
722
  enableTextMutations: false,
709
- enableRag: false,
723
+ enableRag: false
710
724
  });
711
725
  const pluginNames = preset.plugins.map((p) => p.name);
712
726
  expect(pluginNames).toEqual(['LlmModulePlugin']);
package/chat.js CHANGED
@@ -12,45 +12,55 @@
12
12
  * 2. The preset's `defaultChatCompleter` option (fallback for dev/testing)
13
13
  * 3. Environment variables (CHAT_PROVIDER, CHAT_MODEL, CHAT_BASE_URL)
14
14
  */
15
- var __importDefault = (this && this.__importDefault) || function (mod) {
16
- return (mod && mod.__esModule) ? mod : { "default": mod };
17
- };
18
15
  Object.defineProperty(exports, "__esModule", { value: true });
19
16
  exports.buildChatCompleter = buildChatCompleter;
20
17
  exports.buildChatCompleterFromModule = buildChatCompleterFromModule;
21
18
  exports.buildChatCompleterFromEnv = buildChatCompleterFromEnv;
22
- const ollama_1 = __importDefault(require("@agentic-kit/ollama"));
19
+ const ollama_1 = require("@agentic-kit/ollama");
23
20
  const env_1 = require("./env");
24
21
  // ─── Built-in Providers ─────────────────────────────────────────────────────
25
22
  /**
26
23
  * Create an Ollama-based chat completion function.
27
24
  *
28
- * Uses OllamaClient.generate() with a messages array, which internally
29
- * routes to the /api/chat endpoint.
25
+ * Uses OllamaAdapter.stream() to get both response content and real token
26
+ * usage counts from the provider (prompt_eval_count, eval_count).
30
27
  */
31
28
  function createOllamaChatCompleter(baseUrl = 'http://localhost:11434', model = 'llama3') {
32
- const client = new ollama_1.default(baseUrl);
29
+ const adapter = new ollama_1.OllamaAdapter(baseUrl);
33
30
  return async (messages, options) => {
34
- // Build the input for OllamaClient.generate() in chat mode
35
- const input = {
36
- model,
37
- messages: messages.filter((m) => m.role !== 'system'),
38
- };
39
- // Extract system message if present
40
31
  const systemMsg = messages.find((m) => m.role === 'system');
41
- if (systemMsg) {
42
- input.system = systemMsg.content;
43
- }
44
- if (options?.temperature !== undefined) {
45
- input.temperature = options.temperature;
46
- }
47
- const startTime = Date.now();
48
- const response = await client.generate(input);
49
- const latencyMs = Date.now() - startTime;
50
- // Token count logging (metering deferred to billing system)
51
- console.log(`[graphile-llm] Chat completion: model=${model}, latency=${latencyMs}ms, ` +
52
- `messages=${messages.length}`);
53
- return response;
32
+ const nonSystem = messages.filter((m) => m.role !== 'system');
33
+ const modelDesc = adapter.createModel(model, {
34
+ maxOutputTokens: options?.maxTokens
35
+ });
36
+ const context = {
37
+ systemPrompt: systemMsg?.content,
38
+ messages: nonSystem.map((m) => ({
39
+ role: m.role,
40
+ content: m.content,
41
+ timestamp: Date.now()
42
+ }))
43
+ };
44
+ const stream = adapter.stream(modelDesc, context, {
45
+ temperature: options?.temperature,
46
+ maxTokens: options?.maxTokens
47
+ });
48
+ const result = await stream.result();
49
+ const content = result.content
50
+ .filter((block) => block.type === 'text')
51
+ .map((block) => block.text)
52
+ .join('');
53
+ return {
54
+ content,
55
+ usage: {
56
+ input: result.usage.input,
57
+ output: result.usage.output,
58
+ reasoning: result.usage.reasoning,
59
+ cacheRead: result.usage.cacheRead,
60
+ cacheWrite: result.usage.cacheWrite,
61
+ totalTokens: result.usage.totalTokens
62
+ }
63
+ };
54
64
  };
55
65
  }
56
66
  // ─── Chat Completer Construction ────────────────────────────────────────────
@@ -81,7 +91,7 @@ function buildChatCompleterFromModule(data) {
81
91
  return buildChatCompleter({
82
92
  provider: data.chat_provider,
83
93
  model: data.chat_model,
84
- baseUrl: data.chat_base_url,
94
+ baseUrl: data.chat_base_url
85
95
  });
86
96
  }
87
97
  /**
package/config-cache.js CHANGED
@@ -56,7 +56,7 @@ const INFERENCE_LOG_MODULE_SQL = `
56
56
  const billingCache = new graphile_cache_1.ModuleConfigCache({
57
57
  name: 'billing-config',
58
58
  ttlMs: 5 * 60 * 1000, // 5 minutes
59
- max: 50,
59
+ max: 50
60
60
  });
61
61
  // ─── Resolution Functions ───────────────────────────────────────────────────
62
62
  /**
@@ -77,7 +77,7 @@ async function resolveInferenceLogConfig(pgClient, databaseId) {
77
77
  return null;
78
78
  return {
79
79
  schema: row.schema,
80
- tableName: row.table_name,
80
+ tableName: row.table_name
81
81
  };
82
82
  }
83
83
  catch {
@@ -101,7 +101,7 @@ async function resolveBillingConfig(pgClient, databaseId) {
101
101
  privateSchema: row.private_schema,
102
102
  recordUsageFunction: row.record_usage_function,
103
103
  // The check_billing_quota function name follows the inflection pattern
104
- checkBillingQuotaFunction: 'check_billing_quota',
104
+ checkBillingQuotaFunction: 'check_billing_quota'
105
105
  };
106
106
  }
107
107
  catch {
@@ -123,7 +123,7 @@ async function getLlmBillingConfig(pgClient, databaseId) {
123
123
  return cached;
124
124
  const [billing, inferenceLog] = await Promise.all([
125
125
  resolveBillingConfig(pgClient, databaseId),
126
- resolveInferenceLogConfig(pgClient, databaseId),
126
+ resolveInferenceLogConfig(pgClient, databaseId)
127
127
  ]);
128
128
  const entry = { billing, inferenceLog };
129
129
  billingCache.set(databaseId, entry);
package/embedder.js CHANGED
@@ -22,6 +22,8 @@ const env_1 = require("./env");
22
22
  // ─── Built-in Providers ─────────────────────────────────────────────────────
23
23
  /**
24
24
  * Create an Ollama-based embedder function.
25
+ *
26
+ * Uses the /api/embed endpoint which returns prompt_eval_count (real token count).
25
27
  */
26
28
  function createOllamaEmbedder(baseUrl = 'http://localhost:11434', model = 'nomic-embed-text') {
27
29
  const client = new ollama_1.default(baseUrl);
@@ -55,7 +57,7 @@ function buildEmbedderFromModule(data) {
55
57
  return buildEmbedder({
56
58
  provider: data.embedding_provider,
57
59
  model: data.embedding_model,
58
- baseUrl: data.embedding_base_url,
60
+ baseUrl: data.embedding_base_url
59
61
  });
60
62
  }
61
63
  /**
package/env.js CHANGED
@@ -21,13 +21,13 @@ const LLM_DEFAULTS = {
21
21
  embedding: {
22
22
  provider: 'ollama',
23
23
  model: 'nomic-embed-text',
24
- baseUrl: 'http://localhost:11434',
24
+ baseUrl: 'http://localhost:11434'
25
25
  },
26
26
  chat: {
27
27
  provider: 'ollama',
28
28
  model: 'llama3',
29
- baseUrl: 'http://localhost:11434',
30
- },
29
+ baseUrl: 'http://localhost:11434'
30
+ }
31
31
  };
32
32
  // ─── Resolution ─────────────────────────────────────────────────────────────
33
33
  /**
@@ -41,12 +41,12 @@ function getLlmEnvOptions() {
41
41
  embedding: {
42
42
  provider: process.env.EMBEDDER_PROVIDER ?? LLM_DEFAULTS.embedding.provider,
43
43
  model: process.env.EMBEDDER_MODEL ?? LLM_DEFAULTS.embedding.model,
44
- baseUrl: process.env.EMBEDDER_BASE_URL ?? LLM_DEFAULTS.embedding.baseUrl,
44
+ baseUrl: process.env.EMBEDDER_BASE_URL ?? LLM_DEFAULTS.embedding.baseUrl
45
45
  },
46
46
  chat: {
47
47
  provider: process.env.CHAT_PROVIDER ?? LLM_DEFAULTS.chat.provider,
48
48
  model: process.env.CHAT_MODEL ?? LLM_DEFAULTS.chat.model,
49
- baseUrl: process.env.CHAT_BASE_URL ?? LLM_DEFAULTS.chat.baseUrl,
50
- },
49
+ baseUrl: process.env.CHAT_BASE_URL ?? LLM_DEFAULTS.chat.baseUrl
50
+ }
51
51
  };
52
52
  }