graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/__tests__/graphile-llm.test.js +87 -71
  2. package/chat.d.ts +5 -5
  3. package/chat.js +45 -43
  4. package/config-cache.d.ts +77 -0
  5. package/config-cache.js +148 -0
  6. package/embedder.d.ts +5 -5
  7. package/embedder.js +11 -17
  8. package/env.d.ts +31 -0
  9. package/env.js +52 -0
  10. package/esm/__tests__/graphile-llm.test.js +87 -71
  11. package/esm/chat.d.ts +5 -5
  12. package/esm/chat.js +45 -40
  13. package/esm/config-cache.d.ts +77 -0
  14. package/esm/config-cache.js +143 -0
  15. package/esm/embedder.d.ts +5 -5
  16. package/esm/embedder.js +11 -17
  17. package/esm/env.d.ts +31 -0
  18. package/esm/env.js +49 -0
  19. package/esm/index.d.ts +14 -5
  20. package/esm/index.js +11 -5
  21. package/esm/metering.d.ts +114 -0
  22. package/esm/metering.js +352 -0
  23. package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
  24. package/esm/plugins/agent-discovery-plugin.js +65 -0
  25. package/esm/plugins/llm-module-plugin.d.ts +11 -2
  26. package/esm/plugins/llm-module-plugin.js +15 -7
  27. package/esm/plugins/metering-plugin.d.ts +42 -0
  28. package/esm/plugins/metering-plugin.js +175 -0
  29. package/esm/plugins/rag-plugin.js +20 -20
  30. package/esm/plugins/text-mutation-plugin.d.ts +4 -0
  31. package/esm/plugins/text-mutation-plugin.js +23 -13
  32. package/esm/plugins/text-search-plugin.d.ts +4 -0
  33. package/esm/plugins/text-search-plugin.js +23 -11
  34. package/esm/preset.d.ts +21 -1
  35. package/esm/preset.js +33 -6
  36. package/esm/types.d.ts +86 -10
  37. package/index.d.ts +14 -5
  38. package/index.js +25 -8
  39. package/metering.d.ts +114 -0
  40. package/metering.js +359 -0
  41. package/package.json +15 -15
  42. package/plugins/agent-discovery-plugin.d.ts +29 -0
  43. package/plugins/agent-discovery-plugin.js +69 -0
  44. package/plugins/llm-module-plugin.d.ts +11 -2
  45. package/plugins/llm-module-plugin.js +15 -7
  46. package/plugins/metering-plugin.d.ts +42 -0
  47. package/plugins/metering-plugin.js +178 -0
  48. package/plugins/rag-plugin.js +20 -20
  49. package/plugins/text-mutation-plugin.d.ts +4 -0
  50. package/plugins/text-mutation-plugin.js +23 -13
  51. package/plugins/text-search-plugin.d.ts +4 -0
  52. package/plugins/text-search-plugin.js +23 -11
  53. package/preset.d.ts +21 -1
  54. package/preset.js +33 -6
  55. package/types.d.ts +86 -10
@@ -1,16 +1,16 @@
1
- import { join } from 'path';
2
1
  import OllamaClient from '@agentic-kit/ollama';
3
- import { getConnections, seed } from 'graphile-test';
4
2
  import { ConnectionFilterPreset } from 'graphile-connection-filter';
3
+ import { createPgvectorAdapter } from 'graphile-search/adapters/pgvector';
5
4
  import { VectorCodecPlugin } from 'graphile-search/codecs/vector-codec';
6
5
  import { createUnifiedSearchPlugin } from 'graphile-search/plugin';
7
- import { createPgvectorAdapter } from 'graphile-search/adapters/pgvector';
6
+ import { getConnections, seed } from 'graphile-test';
7
+ import { join } from 'path';
8
+ import { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from '../../src/chat';
9
+ import { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from '../../src/embedder';
8
10
  import { createLlmModulePlugin } from '../../src/plugins/llm-module-plugin';
9
- import { createLlmTextSearchPlugin } from '../../src/plugins/text-search-plugin';
10
- import { createLlmTextMutationPlugin } from '../../src/plugins/text-mutation-plugin';
11
11
  import { createLlmRagPlugin } from '../../src/plugins/rag-plugin';
12
- import { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from '../../src/embedder';
13
- import { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from '../../src/chat';
12
+ import { createLlmTextMutationPlugin } from '../../src/plugins/text-mutation-plugin';
13
+ import { createLlmTextSearchPlugin } from '../../src/plugins/text-search-plugin';
14
14
  // ─── @agentic-kit/ollama client ─────────────────────────────────────────────
15
15
  const ollamaClient = new OllamaClient('http://localhost:11434');
16
16
  async function ensureNomicModel() {
@@ -30,14 +30,14 @@ describe('Embedder abstraction', () => {
30
30
  const embedder = buildEmbedder({
31
31
  provider: 'ollama',
32
32
  model: 'nomic-embed-text',
33
- baseUrl: 'http://localhost:11434',
33
+ baseUrl: 'http://localhost:11434'
34
34
  });
35
35
  expect(embedder).not.toBeNull();
36
36
  expect(typeof embedder).toBe('function');
37
37
  });
38
38
  it('returns null for unknown provider', () => {
39
39
  const embedder = buildEmbedder({
40
- provider: 'unknown-provider',
40
+ provider: 'unknown-provider'
41
41
  });
42
42
  expect(embedder).toBeNull();
43
43
  });
@@ -52,7 +52,7 @@ describe('Embedder abstraction', () => {
52
52
  const moduleData = {
53
53
  embedding_provider: 'ollama',
54
54
  embedding_model: 'nomic-embed-text',
55
- embedding_base_url: 'http://localhost:11434',
55
+ embedding_base_url: 'http://localhost:11434'
56
56
  };
57
57
  const embedder = buildEmbedderFromModule(moduleData);
58
58
  expect(embedder).not.toBeNull();
@@ -60,7 +60,7 @@ describe('Embedder abstraction', () => {
60
60
  });
61
61
  it('returns null for unsupported provider in module data', () => {
62
62
  const moduleData = {
63
- embedding_provider: 'unsupported',
63
+ embedding_provider: 'unsupported'
64
64
  };
65
65
  const embedder = buildEmbedderFromModule(moduleData);
66
66
  expect(embedder).toBeNull();
@@ -71,18 +71,19 @@ describe('Embedder abstraction', () => {
71
71
  afterEach(() => {
72
72
  process.env = originalEnv;
73
73
  });
74
- it('returns null when EMBEDDER_PROVIDER is not set', () => {
74
+ it('returns default ollama embedder when EMBEDDER_PROVIDER is not set', () => {
75
75
  process.env = { ...originalEnv };
76
76
  delete process.env.EMBEDDER_PROVIDER;
77
77
  const embedder = buildEmbedderFromEnv();
78
- expect(embedder).toBeNull();
78
+ expect(embedder).not.toBeNull();
79
+ expect(typeof embedder).toBe('function');
79
80
  });
80
81
  it('builds embedder from environment variables', () => {
81
82
  process.env = {
82
83
  ...originalEnv,
83
84
  EMBEDDER_PROVIDER: 'ollama',
84
85
  EMBEDDER_MODEL: 'nomic-embed-text',
85
- EMBEDDER_BASE_URL: 'http://localhost:11434',
86
+ EMBEDDER_BASE_URL: 'http://localhost:11434'
86
87
  };
87
88
  const embedder = buildEmbedderFromEnv();
88
89
  expect(embedder).not.toBeNull();
@@ -100,7 +101,7 @@ describe('graphile-llm schema enrichment', () => {
100
101
  let query;
101
102
  beforeAll(async () => {
102
103
  const unifiedPlugin = createUnifiedSearchPlugin({
103
- adapters: [createPgvectorAdapter()],
104
+ adapters: [createPgvectorAdapter()]
104
105
  });
105
106
  const testPreset = {
106
107
  extends: [ConnectionFilterPreset()],
@@ -113,18 +114,18 @@ describe('graphile-llm schema enrichment', () => {
113
114
  defaultEmbedder: {
114
115
  provider: 'ollama',
115
116
  model: 'nomic-embed-text',
116
- baseUrl: 'http://localhost:11434',
117
- },
117
+ baseUrl: 'http://localhost:11434'
118
+ }
118
119
  }),
119
120
  createLlmTextSearchPlugin(),
120
- createLlmTextMutationPlugin(),
121
- ],
121
+ createLlmTextMutationPlugin()
122
+ ]
122
123
  };
123
124
  const connections = await getConnections({
124
125
  schemas: ['llm_test'],
125
126
  preset: testPreset,
126
127
  useRoot: true,
127
- authRole: 'postgres',
128
+ authRole: 'postgres'
128
129
  }, [seed.sqlfile([join(__dirname, './setup.sql')])]);
129
130
  db = connections.db;
130
131
  teardown = connections.teardown;
@@ -253,33 +254,36 @@ describe('graphile-llm with real Ollama embedding', () => {
253
254
  const embedder = buildEmbedder({
254
255
  provider: 'ollama',
255
256
  model: 'nomic-embed-text',
256
- baseUrl: 'http://localhost:11434',
257
+ baseUrl: 'http://localhost:11434'
257
258
  });
258
259
  expect(embedder).not.toBeNull();
259
- const vector = await embedder('Machine learning is transforming AI');
260
+ const result = await embedder('Machine learning is transforming AI');
260
261
  // nomic-embed-text produces 768-dimensional vectors
261
- expect(Array.isArray(vector)).toBe(true);
262
- expect(vector.length).toBe(768);
262
+ expect(Array.isArray(result.embedding)).toBe(true);
263
+ expect(result.embedding.length).toBe(768);
264
+ expect(result.promptTokens).toBeGreaterThan(0);
263
265
  // All elements should be numbers
264
- for (const v of vector) {
266
+ for (const v of result.embedding) {
265
267
  expect(typeof v).toBe('number');
266
268
  expect(Number.isFinite(v)).toBe(true);
267
269
  }
268
270
  // Vector should not be all zeros
269
- const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
271
+ const magnitude = Math.sqrt(result.embedding.reduce((sum, v) => sum + v * v, 0));
270
272
  expect(magnitude).toBeGreaterThan(0);
271
273
  });
272
274
  it('should produce different vectors for semantically different text', async () => {
273
275
  const embedder = buildEmbedder({
274
276
  provider: 'ollama',
275
277
  model: 'nomic-embed-text',
276
- baseUrl: 'http://localhost:11434',
278
+ baseUrl: 'http://localhost:11434'
277
279
  });
278
280
  expect(embedder).not.toBeNull();
279
- const [vecA, vecB] = await Promise.all([
281
+ const [resultA, resultB] = await Promise.all([
280
282
  embedder('Artificial intelligence and machine learning'),
281
- embedder('Cooking recipes for Italian pasta dishes'),
283
+ embedder('Cooking recipes for Italian pasta dishes')
282
284
  ]);
285
+ const vecA = resultA.embedding;
286
+ const vecB = resultB.embedding;
283
287
  expect(vecA.length).toBe(768);
284
288
  expect(vecB.length).toBe(768);
285
289
  // Compute cosine similarity
@@ -299,13 +303,15 @@ describe('graphile-llm with real Ollama embedding', () => {
299
303
  const embedder = buildEmbedder({
300
304
  provider: 'ollama',
301
305
  model: 'nomic-embed-text',
302
- baseUrl: 'http://localhost:11434',
306
+ baseUrl: 'http://localhost:11434'
303
307
  });
304
308
  expect(embedder).not.toBeNull();
305
- const [vecA, vecB] = await Promise.all([
309
+ const [resultA, resultB] = await Promise.all([
306
310
  embedder('Machine learning and artificial intelligence'),
307
- embedder('AI and ML are subfields of computer science'),
311
+ embedder('AI and ML are subfields of computer science')
308
312
  ]);
313
+ const vecA = resultA.embedding;
314
+ const vecB = resultB.embedding;
309
315
  expect(vecA.length).toBe(768);
310
316
  expect(vecB.length).toBe(768);
311
317
  // Compute cosine similarity
@@ -321,11 +327,14 @@ describe('graphile-llm with real Ollama embedding', () => {
321
327
  // Semantically similar texts should have high similarity
322
328
  expect(cosineSimilarity).toBeGreaterThan(0.5);
323
329
  });
324
- it('should produce embeddings via @agentic-kit/ollama OllamaClient directly', async () => {
325
- const vector = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
326
- expect(Array.isArray(vector)).toBe(true);
327
- expect(vector.length).toBe(768);
328
- for (const v of vector) {
330
+ it('should produce embeddings with token count via @agentic-kit/ollama OllamaClient directly', async () => {
331
+ const result = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
332
+ expect(result).toHaveProperty('embedding');
333
+ expect(result).toHaveProperty('promptTokens');
334
+ expect(Array.isArray(result.embedding)).toBe(true);
335
+ expect(result.embedding.length).toBe(768);
336
+ expect(result.promptTokens).toBeGreaterThan(0);
337
+ for (const v of result.embedding) {
329
338
  expect(typeof v).toBe('number');
330
339
  expect(Number.isFinite(v)).toBe(true);
331
340
  }
@@ -340,14 +349,14 @@ describe('Chat completion abstraction', () => {
340
349
  const chat = buildChatCompleter({
341
350
  provider: 'ollama',
342
351
  model: 'llama3',
343
- baseUrl: 'http://localhost:11434',
352
+ baseUrl: 'http://localhost:11434'
344
353
  });
345
354
  expect(chat).not.toBeNull();
346
355
  expect(typeof chat).toBe('function');
347
356
  });
348
357
  it('returns null for unknown provider', () => {
349
358
  const chat = buildChatCompleter({
350
- provider: 'unknown-provider',
359
+ provider: 'unknown-provider'
351
360
  });
352
361
  expect(chat).toBeNull();
353
362
  });
@@ -363,7 +372,7 @@ describe('Chat completion abstraction', () => {
363
372
  embedding_provider: 'ollama',
364
373
  chat_provider: 'ollama',
365
374
  chat_model: 'llama3',
366
- chat_base_url: 'http://localhost:11434',
375
+ chat_base_url: 'http://localhost:11434'
367
376
  };
368
377
  const chat = buildChatCompleterFromModule(moduleData);
369
378
  expect(chat).not.toBeNull();
@@ -371,7 +380,7 @@ describe('Chat completion abstraction', () => {
371
380
  });
372
381
  it('returns null when chat_provider is not set', () => {
373
382
  const moduleData = {
374
- embedding_provider: 'ollama',
383
+ embedding_provider: 'ollama'
375
384
  };
376
385
  const chat = buildChatCompleterFromModule(moduleData);
377
386
  expect(chat).toBeNull();
@@ -382,18 +391,19 @@ describe('Chat completion abstraction', () => {
382
391
  afterEach(() => {
383
392
  process.env = originalEnv;
384
393
  });
385
- it('returns null when CHAT_PROVIDER is not set', () => {
394
+ it('returns default ollama chat completer when CHAT_PROVIDER is not set', () => {
386
395
  process.env = { ...originalEnv };
387
396
  delete process.env.CHAT_PROVIDER;
388
397
  const chat = buildChatCompleterFromEnv();
389
- expect(chat).toBeNull();
398
+ expect(chat).not.toBeNull();
399
+ expect(typeof chat).toBe('function');
390
400
  });
391
401
  it('builds chat completer from environment variables', () => {
392
402
  process.env = {
393
403
  ...originalEnv,
394
404
  CHAT_PROVIDER: 'ollama',
395
405
  CHAT_MODEL: 'llama3',
396
- CHAT_BASE_URL: 'http://localhost:11434',
406
+ CHAT_BASE_URL: 'http://localhost:11434'
397
407
  };
398
408
  const chat = buildChatCompleterFromEnv();
399
409
  expect(chat).not.toBeNull();
@@ -432,10 +442,10 @@ function makeTestSmartTagsPlugin(tagsByTable) {
432
442
  Object.assign(c.extensions.tags, tags);
433
443
  }
434
444
  return _;
435
- },
436
- },
437
- },
438
- },
445
+ }
446
+ }
447
+ }
448
+ }
439
449
  };
440
450
  }
441
451
  describe('RAG plugin schema enrichment', () => {
@@ -444,7 +454,7 @@ describe('RAG plugin schema enrichment', () => {
444
454
  let query;
445
455
  beforeAll(async () => {
446
456
  const unifiedPlugin = createUnifiedSearchPlugin({
447
- adapters: [createPgvectorAdapter()],
457
+ adapters: [createPgvectorAdapter()]
448
458
  });
449
459
  const smartTagsPlugin = makeTestSmartTagsPlugin({
450
460
  articles: {
@@ -453,16 +463,22 @@ describe('RAG plugin schema enrichment', () => {
453
463
  parentFk: 'parent_id',
454
464
  parentPk: 'id',
455
465
  embeddingField: 'embedding',
456
- contentField: 'content',
457
- },
458
- },
466
+ contentField: 'content'
467
+ }
468
+ }
469
+ });
470
+ // Mock embedder that returns a fixed 3-dim vector with token count
471
+ const mockEmbedder = async (_text) => ({
472
+ embedding: [1, 0, 0],
473
+ promptTokens: 5
459
474
  });
460
- // Mock embedder that returns a fixed 3-dim vector
461
- const mockEmbedder = async (_text) => [1, 0, 0];
462
- // Mock chat completer that returns a canned response
475
+ // Mock chat completer that returns a canned response with usage
463
476
  const mockChatCompleter = async (messages) => {
464
477
  const userMessage = messages.find((m) => m.role === 'user');
465
- return `Mock answer for: ${userMessage?.content || 'unknown'}`;
478
+ return {
479
+ content: `Mock answer for: ${userMessage?.content || 'unknown'}`,
480
+ usage: { input: 10, output: 15, reasoning: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 25 }
481
+ };
466
482
  };
467
483
  const testPreset = {
468
484
  extends: [ConnectionFilterPreset()],
@@ -474,13 +490,13 @@ describe('RAG plugin schema enrichment', () => {
474
490
  defaultEmbedder: {
475
491
  provider: 'ollama',
476
492
  model: 'nomic-embed-text',
477
- baseUrl: 'http://localhost:11434',
478
- },
493
+ baseUrl: 'http://localhost:11434'
494
+ }
479
495
  }),
480
496
  createLlmTextSearchPlugin(),
481
497
  createLlmTextMutationPlugin(),
482
- createLlmRagPlugin(),
483
- ],
498
+ createLlmRagPlugin()
499
+ ]
484
500
  };
485
501
  // Override the embedder and chat completer on the build context
486
502
  // by wrapping the LlmModulePlugin's build hook
@@ -493,20 +509,20 @@ describe('RAG plugin schema enrichment', () => {
493
509
  build(build) {
494
510
  return build.extend(build, {
495
511
  llmEmbedder: mockEmbedder,
496
- llmChatCompleter: mockChatCompleter,
512
+ llmChatCompleter: mockChatCompleter
497
513
  }, 'TestOverridePlugin overriding embedder and chat completer');
498
- },
499
- },
500
- },
514
+ }
515
+ }
516
+ }
501
517
  };
502
518
  const connections = await getConnections({
503
519
  schemas: ['llm_test'],
504
520
  preset: {
505
521
  ...testPreset,
506
- plugins: [...testPreset.plugins, overridePlugin],
522
+ plugins: [...testPreset.plugins, overridePlugin]
507
523
  },
508
524
  useRoot: true,
509
- authRole: 'postgres',
525
+ authRole: 'postgres'
510
526
  }, [seed.sqlfile([join(__dirname, './setup.sql')])]);
511
527
  db = connections.db;
512
528
  teardown = connections.teardown;
@@ -630,7 +646,7 @@ describe('GraphileLlmPreset toggles', () => {
630
646
  it('enableRag=false excludes RAG plugin (no ragQuery field)', async () => {
631
647
  const { GraphileLlmPreset } = await import('../../src/preset');
632
648
  const preset = GraphileLlmPreset({
633
- enableRag: false,
649
+ enableRag: false
634
650
  });
635
651
  const pluginNames = preset.plugins.map((p) => p.name);
636
652
  expect(pluginNames).not.toContain('LlmRagPlugin');
@@ -638,7 +654,7 @@ describe('GraphileLlmPreset toggles', () => {
638
654
  it('enableRag=true includes RAG plugin', async () => {
639
655
  const { GraphileLlmPreset } = await import('../../src/preset');
640
656
  const preset = GraphileLlmPreset({
641
- enableRag: true,
657
+ enableRag: true
642
658
  });
643
659
  const pluginNames = preset.plugins.map((p) => p.name);
644
660
  expect(pluginNames).toContain('LlmRagPlugin');
@@ -646,7 +662,7 @@ describe('GraphileLlmPreset toggles', () => {
646
662
  it('enableTextSearch=false excludes text search plugin', async () => {
647
663
  const { GraphileLlmPreset } = await import('../../src/preset');
648
664
  const preset = GraphileLlmPreset({
649
- enableTextSearch: false,
665
+ enableTextSearch: false
650
666
  });
651
667
  const pluginNames = preset.plugins.map((p) => p.name);
652
668
  expect(pluginNames).not.toContain('LlmTextSearchPlugin');
@@ -656,7 +672,7 @@ describe('GraphileLlmPreset toggles', () => {
656
672
  it('enableTextMutations=false excludes text mutation plugin', async () => {
657
673
  const { GraphileLlmPreset } = await import('../../src/preset');
658
674
  const preset = GraphileLlmPreset({
659
- enableTextMutations: false,
675
+ enableTextMutations: false
660
676
  });
661
677
  const pluginNames = preset.plugins.map((p) => p.name);
662
678
  expect(pluginNames).not.toContain('LlmTextMutationPlugin');
@@ -666,7 +682,7 @@ describe('GraphileLlmPreset toggles', () => {
666
682
  const preset = GraphileLlmPreset({
667
683
  enableTextSearch: false,
668
684
  enableTextMutations: false,
669
- enableRag: false,
685
+ enableRag: false
670
686
  });
671
687
  const pluginNames = preset.plugins.map((p) => p.name);
672
688
  expect(pluginNames).toEqual(['LlmModulePlugin']);
package/esm/chat.d.ts CHANGED
@@ -26,12 +26,12 @@ export declare function buildChatCompleter(config: ChatConfig): ChatFunction | n
26
26
  */
27
27
  export declare function buildChatCompleterFromModule(data: LlmModuleData): ChatFunction | null;
28
28
  /**
29
- * Resolve a chat completer from environment variables via getEnvOptions().
29
+ * Resolve a chat completer from environment variables.
30
30
  * This is a fallback for development when no llm_module or defaultChatCompleter is configured.
31
31
  *
32
- * Environment variables (parsed by @constructive-io/graphql-env):
33
- * CHAT_PROVIDER - Provider name ('ollama')
34
- * CHAT_MODEL - Model identifier (e.g. 'llama3')
35
- * CHAT_BASE_URL - Provider base URL
32
+ * Environment variables (with defaults from env.ts):
33
+ * CHAT_PROVIDER - Provider name (default: 'ollama')
34
+ * CHAT_MODEL - Model identifier (default: 'llama3')
35
+ * CHAT_BASE_URL - Provider base URL (default: 'http://localhost:11434')
36
36
  */
37
37
  export declare function buildChatCompleterFromEnv(): ChatFunction | null;
package/esm/chat.js CHANGED
@@ -11,38 +11,51 @@
11
11
  * 2. The preset's `defaultChatCompleter` option (fallback for dev/testing)
12
12
  * 3. Environment variables (CHAT_PROVIDER, CHAT_MODEL, CHAT_BASE_URL)
13
13
  */
14
- import OllamaClient from '@agentic-kit/ollama';
15
- import { getEnvOptions } from '@constructive-io/graphql-env';
14
+ import { OllamaAdapter } from '@agentic-kit/ollama';
15
+ import { getLlmEnvOptions } from './env';
16
16
  // ─── Built-in Providers ─────────────────────────────────────────────────────
17
17
  /**
18
18
  * Create an Ollama-based chat completion function.
19
19
  *
20
- * Uses OllamaClient.generate() with a messages array, which internally
21
- * routes to the /api/chat endpoint.
20
+ * Uses OllamaAdapter.stream() to get both response content and real token
21
+ * usage counts from the provider (prompt_eval_count, eval_count).
22
22
  */
23
23
  function createOllamaChatCompleter(baseUrl = 'http://localhost:11434', model = 'llama3') {
24
- const client = new OllamaClient(baseUrl);
24
+ const adapter = new OllamaAdapter(baseUrl);
25
25
  return async (messages, options) => {
26
- // Build the input for OllamaClient.generate() in chat mode
27
- const input = {
28
- model,
29
- messages: messages.filter((m) => m.role !== 'system'),
30
- };
31
- // Extract system message if present
32
26
  const systemMsg = messages.find((m) => m.role === 'system');
33
- if (systemMsg) {
34
- input.system = systemMsg.content;
35
- }
36
- if (options?.temperature !== undefined) {
37
- input.temperature = options.temperature;
38
- }
39
- const startTime = Date.now();
40
- const response = await client.generate(input);
41
- const latencyMs = Date.now() - startTime;
42
- // Token count logging (metering deferred to billing system)
43
- console.log(`[graphile-llm] Chat completion: model=${model}, latency=${latencyMs}ms, ` +
44
- `messages=${messages.length}`);
45
- return response;
27
+ const nonSystem = messages.filter((m) => m.role !== 'system');
28
+ const modelDesc = adapter.createModel(model, {
29
+ maxOutputTokens: options?.maxTokens
30
+ });
31
+ const context = {
32
+ systemPrompt: systemMsg?.content,
33
+ messages: nonSystem.map((m) => ({
34
+ role: m.role,
35
+ content: m.content,
36
+ timestamp: Date.now()
37
+ }))
38
+ };
39
+ const stream = adapter.stream(modelDesc, context, {
40
+ temperature: options?.temperature,
41
+ maxTokens: options?.maxTokens
42
+ });
43
+ const result = await stream.result();
44
+ const content = result.content
45
+ .filter((block) => block.type === 'text')
46
+ .map((block) => block.text)
47
+ .join('');
48
+ return {
49
+ content,
50
+ usage: {
51
+ input: result.usage.input,
52
+ output: result.usage.output,
53
+ reasoning: result.usage.reasoning,
54
+ cacheRead: result.usage.cacheRead,
55
+ cacheWrite: result.usage.cacheWrite,
56
+ totalTokens: result.usage.totalTokens
57
+ }
58
+ };
46
59
  };
47
60
  }
48
61
  // ─── Chat Completer Construction ────────────────────────────────────────────
@@ -73,27 +86,19 @@ export function buildChatCompleterFromModule(data) {
73
86
  return buildChatCompleter({
74
87
  provider: data.chat_provider,
75
88
  model: data.chat_model,
76
- baseUrl: data.chat_base_url,
77
- apiKey: data.api_key_ref,
89
+ baseUrl: data.chat_base_url
78
90
  });
79
91
  }
80
92
  /**
81
- * Resolve a chat completer from environment variables via getEnvOptions().
93
+ * Resolve a chat completer from environment variables.
82
94
  * This is a fallback for development when no llm_module or defaultChatCompleter is configured.
83
95
  *
84
- * Environment variables (parsed by @constructive-io/graphql-env):
85
- * CHAT_PROVIDER - Provider name ('ollama')
86
- * CHAT_MODEL - Model identifier (e.g. 'llama3')
87
- * CHAT_BASE_URL - Provider base URL
96
+ * Environment variables (with defaults from env.ts):
97
+ * CHAT_PROVIDER - Provider name (default: 'ollama')
98
+ * CHAT_MODEL - Model identifier (default: 'llama3')
99
+ * CHAT_BASE_URL - Provider base URL (default: 'http://localhost:11434')
88
100
  */
89
101
  export function buildChatCompleterFromEnv() {
90
- const { llm } = getEnvOptions();
91
- const provider = llm?.chat?.provider;
92
- if (!provider)
93
- return null;
94
- return buildChatCompleter({
95
- provider,
96
- model: llm?.chat?.model,
97
- baseUrl: llm?.chat?.baseUrl,
98
- });
102
+ const { chat } = getLlmEnvOptions();
103
+ return buildChatCompleter(chat);
99
104
  }
@@ -0,0 +1,77 @@
1
+ /**
2
+ * config-cache — Per-database LLM billing configuration cache
3
+ *
4
+ * Caches resolved billing function names per database_id.
5
+ * Uses an LRU cache with TTL so config changes propagate within a bounded window
6
+ * without requiring a server restart.
7
+ *
8
+ * Resolution flow:
9
+ * Billing config from `metaschema_modules_public.billing_module`
10
+ * (schema name + function names for record_usage, check_billing_quota)
11
+ *
12
+ * All queries run through the Graphile `withPgClient` callback, which gives us
13
+ * a client connected to the tenant database with proper role settings.
14
+ *
15
+ * The LLM module config (provider, model, etc.) is already resolved by the
16
+ * LlmModulePlugin at schema-build time. This cache handles the runtime-only
17
+ * billing piece.
18
+ */
19
+ /**
20
+ * Generic pg client interface matching what Graphile's withPgClient provides.
21
+ * Avoids a hard dependency on the `pg` package.
22
+ */
23
+ export interface PgClient {
24
+ query(sql: string, values?: unknown[]): Promise<{
25
+ rows: Record<string, unknown>[];
26
+ }>;
27
+ }
28
+ /**
29
+ * Billing function metadata resolved from the billing_module metaschema table.
30
+ */
31
+ export interface BillingConfig {
32
+ /** Private schema containing the billing functions */
33
+ privateSchema: string;
34
+ /** Name of the record_usage function */
35
+ recordUsageFunction: string;
36
+ /** Name of the check_billing_quota function */
37
+ checkBillingQuotaFunction: string;
38
+ /** Public schema containing meters table */
39
+ publicSchema: string;
40
+ }
41
+ /**
42
+ * Inference log table metadata resolved from the inference_log_module.
43
+ */
44
+ export interface InferenceLogConfig {
45
+ /** Schema containing the usage_log_inference table */
46
+ schema: string;
47
+ /** Name of the inference log table */
48
+ tableName: string;
49
+ }
50
+ /**
51
+ * Per-database cached configuration for the LLM billing integration.
52
+ */
53
+ export interface LlmBillingCacheEntry {
54
+ /** Billing function references (null if billing_module not provisioned) */
55
+ billing: BillingConfig | null;
56
+ /** Inference log table references (null if inference_log_module not provisioned) */
57
+ inferenceLog: InferenceLogConfig | null;
58
+ }
59
+ /**
60
+ * Resolve billing config for a database.
61
+ * Results are cached per database_id with a 5-minute TTL.
62
+ *
63
+ * @param pgClient - A client connected to the tenant database (from withPgClient)
64
+ * @param databaseId - The database UUID
65
+ */
66
+ export declare function getLlmBillingConfig(pgClient: PgClient, databaseId: string): Promise<LlmBillingCacheEntry>;
67
+ /**
68
+ * Invalidate the cached config for a specific database (or all).
69
+ */
70
+ export declare function invalidateLlmBillingConfig(databaseId?: string): void;
71
+ /**
72
+ * Get cache stats for diagnostics.
73
+ */
74
+ export declare function getLlmBillingCacheStats(): {
75
+ size: number;
76
+ max: number;
77
+ };