graphile-llm 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,16 @@
1
- import { join } from 'path';
2
1
  import OllamaClient from '@agentic-kit/ollama';
3
- import { getConnections, seed } from 'graphile-test';
4
2
  import { ConnectionFilterPreset } from 'graphile-connection-filter';
3
+ import { createPgvectorAdapter } from 'graphile-search/adapters/pgvector';
5
4
  import { VectorCodecPlugin } from 'graphile-search/codecs/vector-codec';
6
5
  import { createUnifiedSearchPlugin } from 'graphile-search/plugin';
7
- import { createPgvectorAdapter } from 'graphile-search/adapters/pgvector';
6
+ import { getConnections, seed } from 'graphile-test';
7
+ import { join } from 'path';
8
+ import { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from '../../src/chat';
9
+ import { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from '../../src/embedder';
8
10
  import { createLlmModulePlugin } from '../../src/plugins/llm-module-plugin';
9
- import { createLlmTextSearchPlugin } from '../../src/plugins/text-search-plugin';
10
- import { createLlmTextMutationPlugin } from '../../src/plugins/text-mutation-plugin';
11
11
  import { createLlmRagPlugin } from '../../src/plugins/rag-plugin';
12
- import { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from '../../src/embedder';
13
- import { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from '../../src/chat';
12
+ import { createLlmTextMutationPlugin } from '../../src/plugins/text-mutation-plugin';
13
+ import { createLlmTextSearchPlugin } from '../../src/plugins/text-search-plugin';
14
14
  // ─── @agentic-kit/ollama client ─────────────────────────────────────────────
15
15
  const ollamaClient = new OllamaClient('http://localhost:11434');
16
16
  async function ensureNomicModel() {
@@ -30,14 +30,14 @@ describe('Embedder abstraction', () => {
30
30
  const embedder = buildEmbedder({
31
31
  provider: 'ollama',
32
32
  model: 'nomic-embed-text',
33
- baseUrl: 'http://localhost:11434',
33
+ baseUrl: 'http://localhost:11434'
34
34
  });
35
35
  expect(embedder).not.toBeNull();
36
36
  expect(typeof embedder).toBe('function');
37
37
  });
38
38
  it('returns null for unknown provider', () => {
39
39
  const embedder = buildEmbedder({
40
- provider: 'unknown-provider',
40
+ provider: 'unknown-provider'
41
41
  });
42
42
  expect(embedder).toBeNull();
43
43
  });
@@ -52,7 +52,7 @@ describe('Embedder abstraction', () => {
52
52
  const moduleData = {
53
53
  embedding_provider: 'ollama',
54
54
  embedding_model: 'nomic-embed-text',
55
- embedding_base_url: 'http://localhost:11434',
55
+ embedding_base_url: 'http://localhost:11434'
56
56
  };
57
57
  const embedder = buildEmbedderFromModule(moduleData);
58
58
  expect(embedder).not.toBeNull();
@@ -60,7 +60,7 @@ describe('Embedder abstraction', () => {
60
60
  });
61
61
  it('returns null for unsupported provider in module data', () => {
62
62
  const moduleData = {
63
- embedding_provider: 'unsupported',
63
+ embedding_provider: 'unsupported'
64
64
  };
65
65
  const embedder = buildEmbedderFromModule(moduleData);
66
66
  expect(embedder).toBeNull();
@@ -83,7 +83,7 @@ describe('Embedder abstraction', () => {
83
83
  ...originalEnv,
84
84
  EMBEDDER_PROVIDER: 'ollama',
85
85
  EMBEDDER_MODEL: 'nomic-embed-text',
86
- EMBEDDER_BASE_URL: 'http://localhost:11434',
86
+ EMBEDDER_BASE_URL: 'http://localhost:11434'
87
87
  };
88
88
  const embedder = buildEmbedderFromEnv();
89
89
  expect(embedder).not.toBeNull();
@@ -101,7 +101,7 @@ describe('graphile-llm schema enrichment', () => {
101
101
  let query;
102
102
  beforeAll(async () => {
103
103
  const unifiedPlugin = createUnifiedSearchPlugin({
104
- adapters: [createPgvectorAdapter()],
104
+ adapters: [createPgvectorAdapter()]
105
105
  });
106
106
  const testPreset = {
107
107
  extends: [ConnectionFilterPreset()],
@@ -114,18 +114,18 @@ describe('graphile-llm schema enrichment', () => {
114
114
  defaultEmbedder: {
115
115
  provider: 'ollama',
116
116
  model: 'nomic-embed-text',
117
- baseUrl: 'http://localhost:11434',
118
- },
117
+ baseUrl: 'http://localhost:11434'
118
+ }
119
119
  }),
120
120
  createLlmTextSearchPlugin(),
121
- createLlmTextMutationPlugin(),
122
- ],
121
+ createLlmTextMutationPlugin()
122
+ ]
123
123
  };
124
124
  const connections = await getConnections({
125
125
  schemas: ['llm_test'],
126
126
  preset: testPreset,
127
127
  useRoot: true,
128
- authRole: 'postgres',
128
+ authRole: 'postgres'
129
129
  }, [seed.sqlfile([join(__dirname, './setup.sql')])]);
130
130
  db = connections.db;
131
131
  teardown = connections.teardown;
@@ -254,33 +254,36 @@ describe('graphile-llm with real Ollama embedding', () => {
254
254
  const embedder = buildEmbedder({
255
255
  provider: 'ollama',
256
256
  model: 'nomic-embed-text',
257
- baseUrl: 'http://localhost:11434',
257
+ baseUrl: 'http://localhost:11434'
258
258
  });
259
259
  expect(embedder).not.toBeNull();
260
- const vector = await embedder('Machine learning is transforming AI');
260
+ const result = await embedder('Machine learning is transforming AI');
261
261
  // nomic-embed-text produces 768-dimensional vectors
262
- expect(Array.isArray(vector)).toBe(true);
263
- expect(vector.length).toBe(768);
262
+ expect(Array.isArray(result.embedding)).toBe(true);
263
+ expect(result.embedding.length).toBe(768);
264
+ expect(result.promptTokens).toBeGreaterThan(0);
264
265
  // All elements should be numbers
265
- for (const v of vector) {
266
+ for (const v of result.embedding) {
266
267
  expect(typeof v).toBe('number');
267
268
  expect(Number.isFinite(v)).toBe(true);
268
269
  }
269
270
  // Vector should not be all zeros
270
- const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
271
+ const magnitude = Math.sqrt(result.embedding.reduce((sum, v) => sum + v * v, 0));
271
272
  expect(magnitude).toBeGreaterThan(0);
272
273
  });
273
274
  it('should produce different vectors for semantically different text', async () => {
274
275
  const embedder = buildEmbedder({
275
276
  provider: 'ollama',
276
277
  model: 'nomic-embed-text',
277
- baseUrl: 'http://localhost:11434',
278
+ baseUrl: 'http://localhost:11434'
278
279
  });
279
280
  expect(embedder).not.toBeNull();
280
- const [vecA, vecB] = await Promise.all([
281
+ const [resultA, resultB] = await Promise.all([
281
282
  embedder('Artificial intelligence and machine learning'),
282
- embedder('Cooking recipes for Italian pasta dishes'),
283
+ embedder('Cooking recipes for Italian pasta dishes')
283
284
  ]);
285
+ const vecA = resultA.embedding;
286
+ const vecB = resultB.embedding;
284
287
  expect(vecA.length).toBe(768);
285
288
  expect(vecB.length).toBe(768);
286
289
  // Compute cosine similarity
@@ -300,13 +303,15 @@ describe('graphile-llm with real Ollama embedding', () => {
300
303
  const embedder = buildEmbedder({
301
304
  provider: 'ollama',
302
305
  model: 'nomic-embed-text',
303
- baseUrl: 'http://localhost:11434',
306
+ baseUrl: 'http://localhost:11434'
304
307
  });
305
308
  expect(embedder).not.toBeNull();
306
- const [vecA, vecB] = await Promise.all([
309
+ const [resultA, resultB] = await Promise.all([
307
310
  embedder('Machine learning and artificial intelligence'),
308
- embedder('AI and ML are subfields of computer science'),
311
+ embedder('AI and ML are subfields of computer science')
309
312
  ]);
313
+ const vecA = resultA.embedding;
314
+ const vecB = resultB.embedding;
310
315
  expect(vecA.length).toBe(768);
311
316
  expect(vecB.length).toBe(768);
312
317
  // Compute cosine similarity
@@ -322,11 +327,14 @@ describe('graphile-llm with real Ollama embedding', () => {
322
327
  // Semantically similar texts should have high similarity
323
328
  expect(cosineSimilarity).toBeGreaterThan(0.5);
324
329
  });
325
- it('should produce embeddings via @agentic-kit/ollama OllamaClient directly', async () => {
326
- const vector = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
327
- expect(Array.isArray(vector)).toBe(true);
328
- expect(vector.length).toBe(768);
329
- for (const v of vector) {
330
+ it('should produce embeddings with token count via @agentic-kit/ollama OllamaClient directly', async () => {
331
+ const result = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
332
+ expect(result).toHaveProperty('embedding');
333
+ expect(result).toHaveProperty('promptTokens');
334
+ expect(Array.isArray(result.embedding)).toBe(true);
335
+ expect(result.embedding.length).toBe(768);
336
+ expect(result.promptTokens).toBeGreaterThan(0);
337
+ for (const v of result.embedding) {
330
338
  expect(typeof v).toBe('number');
331
339
  expect(Number.isFinite(v)).toBe(true);
332
340
  }
@@ -341,14 +349,14 @@ describe('Chat completion abstraction', () => {
341
349
  const chat = buildChatCompleter({
342
350
  provider: 'ollama',
343
351
  model: 'llama3',
344
- baseUrl: 'http://localhost:11434',
352
+ baseUrl: 'http://localhost:11434'
345
353
  });
346
354
  expect(chat).not.toBeNull();
347
355
  expect(typeof chat).toBe('function');
348
356
  });
349
357
  it('returns null for unknown provider', () => {
350
358
  const chat = buildChatCompleter({
351
- provider: 'unknown-provider',
359
+ provider: 'unknown-provider'
352
360
  });
353
361
  expect(chat).toBeNull();
354
362
  });
@@ -364,7 +372,7 @@ describe('Chat completion abstraction', () => {
364
372
  embedding_provider: 'ollama',
365
373
  chat_provider: 'ollama',
366
374
  chat_model: 'llama3',
367
- chat_base_url: 'http://localhost:11434',
375
+ chat_base_url: 'http://localhost:11434'
368
376
  };
369
377
  const chat = buildChatCompleterFromModule(moduleData);
370
378
  expect(chat).not.toBeNull();
@@ -372,7 +380,7 @@ describe('Chat completion abstraction', () => {
372
380
  });
373
381
  it('returns null when chat_provider is not set', () => {
374
382
  const moduleData = {
375
- embedding_provider: 'ollama',
383
+ embedding_provider: 'ollama'
376
384
  };
377
385
  const chat = buildChatCompleterFromModule(moduleData);
378
386
  expect(chat).toBeNull();
@@ -395,7 +403,7 @@ describe('Chat completion abstraction', () => {
395
403
  ...originalEnv,
396
404
  CHAT_PROVIDER: 'ollama',
397
405
  CHAT_MODEL: 'llama3',
398
- CHAT_BASE_URL: 'http://localhost:11434',
406
+ CHAT_BASE_URL: 'http://localhost:11434'
399
407
  };
400
408
  const chat = buildChatCompleterFromEnv();
401
409
  expect(chat).not.toBeNull();
@@ -434,10 +442,10 @@ function makeTestSmartTagsPlugin(tagsByTable) {
434
442
  Object.assign(c.extensions.tags, tags);
435
443
  }
436
444
  return _;
437
- },
438
- },
439
- },
440
- },
445
+ }
446
+ }
447
+ }
448
+ }
441
449
  };
442
450
  }
443
451
  describe('RAG plugin schema enrichment', () => {
@@ -446,7 +454,7 @@ describe('RAG plugin schema enrichment', () => {
446
454
  let query;
447
455
  beforeAll(async () => {
448
456
  const unifiedPlugin = createUnifiedSearchPlugin({
449
- adapters: [createPgvectorAdapter()],
457
+ adapters: [createPgvectorAdapter()]
450
458
  });
451
459
  const smartTagsPlugin = makeTestSmartTagsPlugin({
452
460
  articles: {
@@ -455,16 +463,22 @@ describe('RAG plugin schema enrichment', () => {
455
463
  parentFk: 'parent_id',
456
464
  parentPk: 'id',
457
465
  embeddingField: 'embedding',
458
- contentField: 'content',
459
- },
460
- },
466
+ contentField: 'content'
467
+ }
468
+ }
469
+ });
470
+ // Mock embedder that returns a fixed 3-dim vector with token count
471
+ const mockEmbedder = async (_text) => ({
472
+ embedding: [1, 0, 0],
473
+ promptTokens: 5
461
474
  });
462
- // Mock embedder that returns a fixed 3-dim vector
463
- const mockEmbedder = async (_text) => [1, 0, 0];
464
- // Mock chat completer that returns a canned response
475
+ // Mock chat completer that returns a canned response with usage
465
476
  const mockChatCompleter = async (messages) => {
466
477
  const userMessage = messages.find((m) => m.role === 'user');
467
- return `Mock answer for: ${userMessage?.content || 'unknown'}`;
478
+ return {
479
+ content: `Mock answer for: ${userMessage?.content || 'unknown'}`,
480
+ usage: { input: 10, output: 15, reasoning: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 25 }
481
+ };
468
482
  };
469
483
  const testPreset = {
470
484
  extends: [ConnectionFilterPreset()],
@@ -476,13 +490,13 @@ describe('RAG plugin schema enrichment', () => {
476
490
  defaultEmbedder: {
477
491
  provider: 'ollama',
478
492
  model: 'nomic-embed-text',
479
- baseUrl: 'http://localhost:11434',
480
- },
493
+ baseUrl: 'http://localhost:11434'
494
+ }
481
495
  }),
482
496
  createLlmTextSearchPlugin(),
483
497
  createLlmTextMutationPlugin(),
484
- createLlmRagPlugin(),
485
- ],
498
+ createLlmRagPlugin()
499
+ ]
486
500
  };
487
501
  // Override the embedder and chat completer on the build context
488
502
  // by wrapping the LlmModulePlugin's build hook
@@ -495,20 +509,20 @@ describe('RAG plugin schema enrichment', () => {
495
509
  build(build) {
496
510
  return build.extend(build, {
497
511
  llmEmbedder: mockEmbedder,
498
- llmChatCompleter: mockChatCompleter,
512
+ llmChatCompleter: mockChatCompleter
499
513
  }, 'TestOverridePlugin overriding embedder and chat completer');
500
- },
501
- },
502
- },
514
+ }
515
+ }
516
+ }
503
517
  };
504
518
  const connections = await getConnections({
505
519
  schemas: ['llm_test'],
506
520
  preset: {
507
521
  ...testPreset,
508
- plugins: [...testPreset.plugins, overridePlugin],
522
+ plugins: [...testPreset.plugins, overridePlugin]
509
523
  },
510
524
  useRoot: true,
511
- authRole: 'postgres',
525
+ authRole: 'postgres'
512
526
  }, [seed.sqlfile([join(__dirname, './setup.sql')])]);
513
527
  db = connections.db;
514
528
  teardown = connections.teardown;
@@ -632,7 +646,7 @@ describe('GraphileLlmPreset toggles', () => {
632
646
  it('enableRag=false excludes RAG plugin (no ragQuery field)', async () => {
633
647
  const { GraphileLlmPreset } = await import('../../src/preset');
634
648
  const preset = GraphileLlmPreset({
635
- enableRag: false,
649
+ enableRag: false
636
650
  });
637
651
  const pluginNames = preset.plugins.map((p) => p.name);
638
652
  expect(pluginNames).not.toContain('LlmRagPlugin');
@@ -640,7 +654,7 @@ describe('GraphileLlmPreset toggles', () => {
640
654
  it('enableRag=true includes RAG plugin', async () => {
641
655
  const { GraphileLlmPreset } = await import('../../src/preset');
642
656
  const preset = GraphileLlmPreset({
643
- enableRag: true,
657
+ enableRag: true
644
658
  });
645
659
  const pluginNames = preset.plugins.map((p) => p.name);
646
660
  expect(pluginNames).toContain('LlmRagPlugin');
@@ -648,7 +662,7 @@ describe('GraphileLlmPreset toggles', () => {
648
662
  it('enableTextSearch=false excludes text search plugin', async () => {
649
663
  const { GraphileLlmPreset } = await import('../../src/preset');
650
664
  const preset = GraphileLlmPreset({
651
- enableTextSearch: false,
665
+ enableTextSearch: false
652
666
  });
653
667
  const pluginNames = preset.plugins.map((p) => p.name);
654
668
  expect(pluginNames).not.toContain('LlmTextSearchPlugin');
@@ -658,7 +672,7 @@ describe('GraphileLlmPreset toggles', () => {
658
672
  it('enableTextMutations=false excludes text mutation plugin', async () => {
659
673
  const { GraphileLlmPreset } = await import('../../src/preset');
660
674
  const preset = GraphileLlmPreset({
661
- enableTextMutations: false,
675
+ enableTextMutations: false
662
676
  });
663
677
  const pluginNames = preset.plugins.map((p) => p.name);
664
678
  expect(pluginNames).not.toContain('LlmTextMutationPlugin');
@@ -668,7 +682,7 @@ describe('GraphileLlmPreset toggles', () => {
668
682
  const preset = GraphileLlmPreset({
669
683
  enableTextSearch: false,
670
684
  enableTextMutations: false,
671
- enableRag: false,
685
+ enableRag: false
672
686
  });
673
687
  const pluginNames = preset.plugins.map((p) => p.name);
674
688
  expect(pluginNames).toEqual(['LlmModulePlugin']);
package/esm/chat.js CHANGED
@@ -11,38 +11,51 @@
11
11
  * 2. The preset's `defaultChatCompleter` option (fallback for dev/testing)
12
12
  * 3. Environment variables (CHAT_PROVIDER, CHAT_MODEL, CHAT_BASE_URL)
13
13
  */
14
- import OllamaClient from '@agentic-kit/ollama';
14
+ import { OllamaAdapter } from '@agentic-kit/ollama';
15
15
  import { getLlmEnvOptions } from './env';
16
16
  // ─── Built-in Providers ─────────────────────────────────────────────────────
17
17
  /**
18
18
  * Create an Ollama-based chat completion function.
19
19
  *
20
- * Uses OllamaClient.generate() with a messages array, which internally
21
- * routes to the /api/chat endpoint.
20
+ * Uses OllamaAdapter.stream() to get both response content and real token
21
+ * usage counts from the provider (prompt_eval_count, eval_count).
22
22
  */
23
23
  function createOllamaChatCompleter(baseUrl = 'http://localhost:11434', model = 'llama3') {
24
- const client = new OllamaClient(baseUrl);
24
+ const adapter = new OllamaAdapter(baseUrl);
25
25
  return async (messages, options) => {
26
- // Build the input for OllamaClient.generate() in chat mode
27
- const input = {
28
- model,
29
- messages: messages.filter((m) => m.role !== 'system'),
30
- };
31
- // Extract system message if present
32
26
  const systemMsg = messages.find((m) => m.role === 'system');
33
- if (systemMsg) {
34
- input.system = systemMsg.content;
35
- }
36
- if (options?.temperature !== undefined) {
37
- input.temperature = options.temperature;
38
- }
39
- const startTime = Date.now();
40
- const response = await client.generate(input);
41
- const latencyMs = Date.now() - startTime;
42
- // Token count logging (metering deferred to billing system)
43
- console.log(`[graphile-llm] Chat completion: model=${model}, latency=${latencyMs}ms, ` +
44
- `messages=${messages.length}`);
45
- return response;
27
+ const nonSystem = messages.filter((m) => m.role !== 'system');
28
+ const modelDesc = adapter.createModel(model, {
29
+ maxOutputTokens: options?.maxTokens
30
+ });
31
+ const context = {
32
+ systemPrompt: systemMsg?.content,
33
+ messages: nonSystem.map((m) => ({
34
+ role: m.role,
35
+ content: m.content,
36
+ timestamp: Date.now()
37
+ }))
38
+ };
39
+ const stream = adapter.stream(modelDesc, context, {
40
+ temperature: options?.temperature,
41
+ maxTokens: options?.maxTokens
42
+ });
43
+ const result = await stream.result();
44
+ const content = result.content
45
+ .filter((block) => block.type === 'text')
46
+ .map((block) => block.text)
47
+ .join('');
48
+ return {
49
+ content,
50
+ usage: {
51
+ input: result.usage.input,
52
+ output: result.usage.output,
53
+ reasoning: result.usage.reasoning,
54
+ cacheRead: result.usage.cacheRead,
55
+ cacheWrite: result.usage.cacheWrite,
56
+ totalTokens: result.usage.totalTokens
57
+ }
58
+ };
46
59
  };
47
60
  }
48
61
  // ─── Chat Completer Construction ────────────────────────────────────────────
@@ -73,7 +86,7 @@ export function buildChatCompleterFromModule(data) {
73
86
  return buildChatCompleter({
74
87
  provider: data.chat_provider,
75
88
  model: data.chat_model,
76
- baseUrl: data.chat_base_url,
89
+ baseUrl: data.chat_base_url
77
90
  });
78
91
  }
79
92
  /**
@@ -51,7 +51,7 @@ const INFERENCE_LOG_MODULE_SQL = `
51
51
  const billingCache = new ModuleConfigCache({
52
52
  name: 'billing-config',
53
53
  ttlMs: 5 * 60 * 1000, // 5 minutes
54
- max: 50,
54
+ max: 50
55
55
  });
56
56
  // ─── Resolution Functions ───────────────────────────────────────────────────
57
57
  /**
@@ -72,7 +72,7 @@ async function resolveInferenceLogConfig(pgClient, databaseId) {
72
72
  return null;
73
73
  return {
74
74
  schema: row.schema,
75
- tableName: row.table_name,
75
+ tableName: row.table_name
76
76
  };
77
77
  }
78
78
  catch {
@@ -96,7 +96,7 @@ async function resolveBillingConfig(pgClient, databaseId) {
96
96
  privateSchema: row.private_schema,
97
97
  recordUsageFunction: row.record_usage_function,
98
98
  // The check_billing_quota function name follows the inflection pattern
99
- checkBillingQuotaFunction: 'check_billing_quota',
99
+ checkBillingQuotaFunction: 'check_billing_quota'
100
100
  };
101
101
  }
102
102
  catch {
@@ -118,7 +118,7 @@ export async function getLlmBillingConfig(pgClient, databaseId) {
118
118
  return cached;
119
119
  const [billing, inferenceLog] = await Promise.all([
120
120
  resolveBillingConfig(pgClient, databaseId),
121
- resolveInferenceLogConfig(pgClient, databaseId),
121
+ resolveInferenceLogConfig(pgClient, databaseId)
122
122
  ]);
123
123
  const entry = { billing, inferenceLog };
124
124
  billingCache.set(databaseId, entry);
package/esm/embedder.js CHANGED
@@ -14,6 +14,8 @@ import { getLlmEnvOptions } from './env';
14
14
  // ─── Built-in Providers ─────────────────────────────────────────────────────
15
15
  /**
16
16
  * Create an Ollama-based embedder function.
17
+ *
18
+ * Uses the /api/embed endpoint which returns prompt_eval_count (real token count).
17
19
  */
18
20
  function createOllamaEmbedder(baseUrl = 'http://localhost:11434', model = 'nomic-embed-text') {
19
21
  const client = new OllamaClient(baseUrl);
@@ -47,7 +49,7 @@ export function buildEmbedderFromModule(data) {
47
49
  return buildEmbedder({
48
50
  provider: data.embedding_provider,
49
51
  model: data.embedding_model,
50
- baseUrl: data.embedding_base_url,
52
+ baseUrl: data.embedding_base_url
51
53
  });
52
54
  }
53
55
  /**
package/esm/env.js CHANGED
@@ -18,13 +18,13 @@ const LLM_DEFAULTS = {
18
18
  embedding: {
19
19
  provider: 'ollama',
20
20
  model: 'nomic-embed-text',
21
- baseUrl: 'http://localhost:11434',
21
+ baseUrl: 'http://localhost:11434'
22
22
  },
23
23
  chat: {
24
24
  provider: 'ollama',
25
25
  model: 'llama3',
26
- baseUrl: 'http://localhost:11434',
27
- },
26
+ baseUrl: 'http://localhost:11434'
27
+ }
28
28
  };
29
29
  // ─── Resolution ─────────────────────────────────────────────────────────────
30
30
  /**
@@ -38,12 +38,12 @@ export function getLlmEnvOptions() {
38
38
  embedding: {
39
39
  provider: process.env.EMBEDDER_PROVIDER ?? LLM_DEFAULTS.embedding.provider,
40
40
  model: process.env.EMBEDDER_MODEL ?? LLM_DEFAULTS.embedding.model,
41
- baseUrl: process.env.EMBEDDER_BASE_URL ?? LLM_DEFAULTS.embedding.baseUrl,
41
+ baseUrl: process.env.EMBEDDER_BASE_URL ?? LLM_DEFAULTS.embedding.baseUrl
42
42
  },
43
43
  chat: {
44
44
  provider: process.env.CHAT_PROVIDER ?? LLM_DEFAULTS.chat.provider,
45
45
  model: process.env.CHAT_MODEL ?? LLM_DEFAULTS.chat.model,
46
- baseUrl: process.env.CHAT_BASE_URL ?? LLM_DEFAULTS.chat.baseUrl,
47
- },
46
+ baseUrl: process.env.CHAT_BASE_URL ?? LLM_DEFAULTS.chat.baseUrl
47
+ }
48
48
  };
49
49
  }
package/esm/index.d.ts CHANGED
@@ -29,20 +29,20 @@
29
29
  * };
30
30
  * ```
31
31
  */
32
- export { getLlmEnvOptions } from './env';
33
32
  export type { LlmEnvOptions, LlmProviderConfig } from './env';
33
+ export { getLlmEnvOptions } from './env';
34
34
  export { GraphileLlmPreset } from './preset';
35
35
  export { createLlmModulePlugin } from './plugins/llm-module-plugin';
36
- export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
37
- export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
38
36
  export { createLlmRagPlugin } from './plugins/rag-plugin';
37
+ export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
38
+ export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
39
39
  export { createLlmMeteringPlugin } from './plugins/metering-plugin';
40
- export { getAgentDiscovery, clearAgentDiscoveryCache } from './plugins/agent-discovery-plugin';
41
- export type { AgentTableInfo, AgentDiscovery } from './plugins/agent-discovery-plugin';
42
- export { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from './embedder';
43
- export { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from './chat';
44
- export { meteredEmbed, meteredChat, logInferenceUsage, QuotaExceededError } from './metering';
45
- export type { MeteringContext, MeteringOptions, MeterResult, WithPgClient, InferenceLogEntry } from './metering';
46
- export { getLlmBillingConfig, invalidateLlmBillingConfig, getLlmBillingCacheStats, } from './config-cache';
47
- export type { BillingConfig, LlmBillingCacheEntry, InferenceLogConfig, PgClient } from './config-cache';
48
- export type { EmbedderFunction, EmbedderConfig, ChatFunction, ChatConfig, ChatMessage, ChatOptions, LlmModuleData, GraphileLlmOptions, MeteringConfig, RagDefaults, ChunkTableInfo, } from './types';
40
+ export type { AgentDiscovery, AgentTableInfo } from './plugins/agent-discovery-plugin';
41
+ export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
42
+ export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
43
+ export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
44
+ export type { InferenceLogEntry, MeteringContext, MeteringOptions, MeterResult, WithPgClient } from './metering';
45
+ export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
46
+ export type { BillingConfig, InferenceLogConfig, LlmBillingCacheEntry, PgClient } from './config-cache';
47
+ export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';
48
+ export type { ChatConfig, ChatFunction, ChatMessage, ChatOptions, ChatResult, ChunkTableInfo, EmbedderConfig, EmbedderFunction, EmbeddingResult, GraphileLlmOptions, LlmModuleData, LlmUsage, MeteringConfig, RagDefaults } from './types';
package/esm/index.js CHANGED
@@ -29,24 +29,20 @@
29
29
  * };
30
30
  * ```
31
31
  */
32
- // Environment configuration (single source of truth for LLM defaults)
33
32
  export { getLlmEnvOptions } from './env';
34
33
  // Preset (recommended entry point)
35
34
  export { GraphileLlmPreset } from './preset';
36
35
  // Individual plugins (pure — no billing dependency)
37
36
  export { createLlmModulePlugin } from './plugins/llm-module-plugin';
38
- export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
39
- export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
40
37
  export { createLlmRagPlugin } from './plugins/rag-plugin';
38
+ export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
39
+ export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
41
40
  // Metering plugin (opt-in billing integration)
42
41
  export { createLlmMeteringPlugin } from './plugins/metering-plugin';
43
- // Agent discovery (queries agent_chat_module config table at runtime)
44
- export { getAgentDiscovery, clearAgentDiscoveryCache } from './plugins/agent-discovery-plugin';
42
+ export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
45
43
  // Embedder utilities
46
- export { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from './embedder';
44
+ export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
47
45
  // Chat completion utilities
48
- export { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from './chat';
49
- // Metering utilities (for custom integration)
50
- export { meteredEmbed, meteredChat, logInferenceUsage, QuotaExceededError } from './metering';
51
- // Config cache (for custom integration)
52
- export { getLlmBillingConfig, invalidateLlmBillingConfig, getLlmBillingCacheStats, } from './config-cache';
46
+ export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
47
+ export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
48
+ export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';
package/esm/metering.d.ts CHANGED
@@ -9,16 +9,16 @@
9
9
  * When the quota check fails, the wrapper returns null (graceful degradation)
10
10
  * instead of throwing, so the search pipeline can fall back to text-only.
11
11
  *
12
- * Token counts are estimated from text length (~4 chars per token). No
13
- * tokenizer needed the billing system uses tokens as abstract units
14
- * and the credit_cost on each model's meter normalizes the relative expense.
12
+ * Token counts:
13
+ * - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
14
+ * - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
15
15
  *
16
16
  * The billing functions live in the tenant database and are called via the
17
17
  * Graphile `withPgClient` callback. Function locations (schema, names) are
18
18
  * resolved from `billing_module` metaschema and cached by `config-cache.ts`.
19
19
  */
20
- import type { PgClient, BillingConfig, InferenceLogConfig } from './config-cache';
21
- import type { EmbedderFunction, ChatFunction, ChatMessage, ChatOptions } from './types';
20
+ import type { BillingConfig, InferenceLogConfig, PgClient } from './config-cache';
21
+ import type { ChatFunction, ChatMessage, ChatOptions, EmbedderFunction } from './types';
22
22
  /**
23
23
  * Callback matching Graphile's withPgClient signature.
24
24
  * Acquires a pg client, calls the callback, then releases the client.