graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/__tests__/graphile-llm.test.js +87 -71
  2. package/chat.d.ts +5 -5
  3. package/chat.js +45 -43
  4. package/config-cache.d.ts +77 -0
  5. package/config-cache.js +148 -0
  6. package/embedder.d.ts +5 -5
  7. package/embedder.js +11 -17
  8. package/env.d.ts +31 -0
  9. package/env.js +52 -0
  10. package/esm/__tests__/graphile-llm.test.js +87 -71
  11. package/esm/chat.d.ts +5 -5
  12. package/esm/chat.js +45 -40
  13. package/esm/config-cache.d.ts +77 -0
  14. package/esm/config-cache.js +143 -0
  15. package/esm/embedder.d.ts +5 -5
  16. package/esm/embedder.js +11 -17
  17. package/esm/env.d.ts +31 -0
  18. package/esm/env.js +49 -0
  19. package/esm/index.d.ts +14 -5
  20. package/esm/index.js +11 -5
  21. package/esm/metering.d.ts +114 -0
  22. package/esm/metering.js +352 -0
  23. package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
  24. package/esm/plugins/agent-discovery-plugin.js +65 -0
  25. package/esm/plugins/llm-module-plugin.d.ts +11 -2
  26. package/esm/plugins/llm-module-plugin.js +15 -7
  27. package/esm/plugins/metering-plugin.d.ts +42 -0
  28. package/esm/plugins/metering-plugin.js +175 -0
  29. package/esm/plugins/rag-plugin.js +20 -20
  30. package/esm/plugins/text-mutation-plugin.d.ts +4 -0
  31. package/esm/plugins/text-mutation-plugin.js +23 -13
  32. package/esm/plugins/text-search-plugin.d.ts +4 -0
  33. package/esm/plugins/text-search-plugin.js +23 -11
  34. package/esm/preset.d.ts +21 -1
  35. package/esm/preset.js +33 -6
  36. package/esm/types.d.ts +86 -10
  37. package/index.d.ts +14 -5
  38. package/index.js +25 -8
  39. package/metering.d.ts +114 -0
  40. package/metering.js +359 -0
  41. package/package.json +15 -15
  42. package/plugins/agent-discovery-plugin.d.ts +29 -0
  43. package/plugins/agent-discovery-plugin.js +69 -0
  44. package/plugins/llm-module-plugin.d.ts +11 -2
  45. package/plugins/llm-module-plugin.js +15 -7
  46. package/plugins/metering-plugin.d.ts +42 -0
  47. package/plugins/metering-plugin.js +178 -0
  48. package/plugins/rag-plugin.js +20 -20
  49. package/plugins/text-mutation-plugin.d.ts +4 -0
  50. package/plugins/text-mutation-plugin.js +23 -13
  51. package/plugins/text-search-plugin.d.ts +4 -0
  52. package/plugins/text-search-plugin.js +23 -11
  53. package/preset.d.ts +21 -1
  54. package/preset.js +33 -6
  55. package/types.d.ts +86 -10
@@ -36,19 +36,19 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
36
36
  return (mod && mod.__esModule) ? mod : { "default": mod };
37
37
  };
38
38
  Object.defineProperty(exports, "__esModule", { value: true });
39
- const path_1 = require("path");
40
39
  const ollama_1 = __importDefault(require("@agentic-kit/ollama"));
41
- const graphile_test_1 = require("graphile-test");
42
40
  const graphile_connection_filter_1 = require("graphile-connection-filter");
41
+ const pgvector_1 = require("graphile-search/adapters/pgvector");
43
42
  const vector_codec_1 = require("graphile-search/codecs/vector-codec");
44
43
  const plugin_1 = require("graphile-search/plugin");
45
- const pgvector_1 = require("graphile-search/adapters/pgvector");
44
+ const graphile_test_1 = require("graphile-test");
45
+ const path_1 = require("path");
46
+ const chat_1 = require("../../src/chat");
47
+ const embedder_1 = require("../../src/embedder");
46
48
  const llm_module_plugin_1 = require("../../src/plugins/llm-module-plugin");
47
- const text_search_plugin_1 = require("../../src/plugins/text-search-plugin");
48
- const text_mutation_plugin_1 = require("../../src/plugins/text-mutation-plugin");
49
49
  const rag_plugin_1 = require("../../src/plugins/rag-plugin");
50
- const embedder_1 = require("../../src/embedder");
51
- const chat_1 = require("../../src/chat");
50
+ const text_mutation_plugin_1 = require("../../src/plugins/text-mutation-plugin");
51
+ const text_search_plugin_1 = require("../../src/plugins/text-search-plugin");
52
52
  // ─── @agentic-kit/ollama client ─────────────────────────────────────────────
53
53
  const ollamaClient = new ollama_1.default('http://localhost:11434');
54
54
  async function ensureNomicModel() {
@@ -68,14 +68,14 @@ describe('Embedder abstraction', () => {
68
68
  const embedder = (0, embedder_1.buildEmbedder)({
69
69
  provider: 'ollama',
70
70
  model: 'nomic-embed-text',
71
- baseUrl: 'http://localhost:11434',
71
+ baseUrl: 'http://localhost:11434'
72
72
  });
73
73
  expect(embedder).not.toBeNull();
74
74
  expect(typeof embedder).toBe('function');
75
75
  });
76
76
  it('returns null for unknown provider', () => {
77
77
  const embedder = (0, embedder_1.buildEmbedder)({
78
- provider: 'unknown-provider',
78
+ provider: 'unknown-provider'
79
79
  });
80
80
  expect(embedder).toBeNull();
81
81
  });
@@ -90,7 +90,7 @@ describe('Embedder abstraction', () => {
90
90
  const moduleData = {
91
91
  embedding_provider: 'ollama',
92
92
  embedding_model: 'nomic-embed-text',
93
- embedding_base_url: 'http://localhost:11434',
93
+ embedding_base_url: 'http://localhost:11434'
94
94
  };
95
95
  const embedder = (0, embedder_1.buildEmbedderFromModule)(moduleData);
96
96
  expect(embedder).not.toBeNull();
@@ -98,7 +98,7 @@ describe('Embedder abstraction', () => {
98
98
  });
99
99
  it('returns null for unsupported provider in module data', () => {
100
100
  const moduleData = {
101
- embedding_provider: 'unsupported',
101
+ embedding_provider: 'unsupported'
102
102
  };
103
103
  const embedder = (0, embedder_1.buildEmbedderFromModule)(moduleData);
104
104
  expect(embedder).toBeNull();
@@ -109,18 +109,19 @@ describe('Embedder abstraction', () => {
109
109
  afterEach(() => {
110
110
  process.env = originalEnv;
111
111
  });
112
- it('returns null when EMBEDDER_PROVIDER is not set', () => {
112
+ it('returns default ollama embedder when EMBEDDER_PROVIDER is not set', () => {
113
113
  process.env = { ...originalEnv };
114
114
  delete process.env.EMBEDDER_PROVIDER;
115
115
  const embedder = (0, embedder_1.buildEmbedderFromEnv)();
116
- expect(embedder).toBeNull();
116
+ expect(embedder).not.toBeNull();
117
+ expect(typeof embedder).toBe('function');
117
118
  });
118
119
  it('builds embedder from environment variables', () => {
119
120
  process.env = {
120
121
  ...originalEnv,
121
122
  EMBEDDER_PROVIDER: 'ollama',
122
123
  EMBEDDER_MODEL: 'nomic-embed-text',
123
- EMBEDDER_BASE_URL: 'http://localhost:11434',
124
+ EMBEDDER_BASE_URL: 'http://localhost:11434'
124
125
  };
125
126
  const embedder = (0, embedder_1.buildEmbedderFromEnv)();
126
127
  expect(embedder).not.toBeNull();
@@ -138,7 +139,7 @@ describe('graphile-llm schema enrichment', () => {
138
139
  let query;
139
140
  beforeAll(async () => {
140
141
  const unifiedPlugin = (0, plugin_1.createUnifiedSearchPlugin)({
141
- adapters: [(0, pgvector_1.createPgvectorAdapter)()],
142
+ adapters: [(0, pgvector_1.createPgvectorAdapter)()]
142
143
  });
143
144
  const testPreset = {
144
145
  extends: [(0, graphile_connection_filter_1.ConnectionFilterPreset)()],
@@ -151,18 +152,18 @@ describe('graphile-llm schema enrichment', () => {
151
152
  defaultEmbedder: {
152
153
  provider: 'ollama',
153
154
  model: 'nomic-embed-text',
154
- baseUrl: 'http://localhost:11434',
155
- },
155
+ baseUrl: 'http://localhost:11434'
156
+ }
156
157
  }),
157
158
  (0, text_search_plugin_1.createLlmTextSearchPlugin)(),
158
- (0, text_mutation_plugin_1.createLlmTextMutationPlugin)(),
159
- ],
159
+ (0, text_mutation_plugin_1.createLlmTextMutationPlugin)()
160
+ ]
160
161
  };
161
162
  const connections = await (0, graphile_test_1.getConnections)({
162
163
  schemas: ['llm_test'],
163
164
  preset: testPreset,
164
165
  useRoot: true,
165
- authRole: 'postgres',
166
+ authRole: 'postgres'
166
167
  }, [graphile_test_1.seed.sqlfile([(0, path_1.join)(__dirname, './setup.sql')])]);
167
168
  db = connections.db;
168
169
  teardown = connections.teardown;
@@ -291,33 +292,36 @@ describe('graphile-llm with real Ollama embedding', () => {
291
292
  const embedder = (0, embedder_1.buildEmbedder)({
292
293
  provider: 'ollama',
293
294
  model: 'nomic-embed-text',
294
- baseUrl: 'http://localhost:11434',
295
+ baseUrl: 'http://localhost:11434'
295
296
  });
296
297
  expect(embedder).not.toBeNull();
297
- const vector = await embedder('Machine learning is transforming AI');
298
+ const result = await embedder('Machine learning is transforming AI');
298
299
  // nomic-embed-text produces 768-dimensional vectors
299
- expect(Array.isArray(vector)).toBe(true);
300
- expect(vector.length).toBe(768);
300
+ expect(Array.isArray(result.embedding)).toBe(true);
301
+ expect(result.embedding.length).toBe(768);
302
+ expect(result.promptTokens).toBeGreaterThan(0);
301
303
  // All elements should be numbers
302
- for (const v of vector) {
304
+ for (const v of result.embedding) {
303
305
  expect(typeof v).toBe('number');
304
306
  expect(Number.isFinite(v)).toBe(true);
305
307
  }
306
308
  // Vector should not be all zeros
307
- const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
309
+ const magnitude = Math.sqrt(result.embedding.reduce((sum, v) => sum + v * v, 0));
308
310
  expect(magnitude).toBeGreaterThan(0);
309
311
  });
310
312
  it('should produce different vectors for semantically different text', async () => {
311
313
  const embedder = (0, embedder_1.buildEmbedder)({
312
314
  provider: 'ollama',
313
315
  model: 'nomic-embed-text',
314
- baseUrl: 'http://localhost:11434',
316
+ baseUrl: 'http://localhost:11434'
315
317
  });
316
318
  expect(embedder).not.toBeNull();
317
- const [vecA, vecB] = await Promise.all([
319
+ const [resultA, resultB] = await Promise.all([
318
320
  embedder('Artificial intelligence and machine learning'),
319
- embedder('Cooking recipes for Italian pasta dishes'),
321
+ embedder('Cooking recipes for Italian pasta dishes')
320
322
  ]);
323
+ const vecA = resultA.embedding;
324
+ const vecB = resultB.embedding;
321
325
  expect(vecA.length).toBe(768);
322
326
  expect(vecB.length).toBe(768);
323
327
  // Compute cosine similarity
@@ -337,13 +341,15 @@ describe('graphile-llm with real Ollama embedding', () => {
337
341
  const embedder = (0, embedder_1.buildEmbedder)({
338
342
  provider: 'ollama',
339
343
  model: 'nomic-embed-text',
340
- baseUrl: 'http://localhost:11434',
344
+ baseUrl: 'http://localhost:11434'
341
345
  });
342
346
  expect(embedder).not.toBeNull();
343
- const [vecA, vecB] = await Promise.all([
347
+ const [resultA, resultB] = await Promise.all([
344
348
  embedder('Machine learning and artificial intelligence'),
345
- embedder('AI and ML are subfields of computer science'),
349
+ embedder('AI and ML are subfields of computer science')
346
350
  ]);
351
+ const vecA = resultA.embedding;
352
+ const vecB = resultB.embedding;
347
353
  expect(vecA.length).toBe(768);
348
354
  expect(vecB.length).toBe(768);
349
355
  // Compute cosine similarity
@@ -359,11 +365,14 @@ describe('graphile-llm with real Ollama embedding', () => {
359
365
  // Semantically similar texts should have high similarity
360
366
  expect(cosineSimilarity).toBeGreaterThan(0.5);
361
367
  });
362
- it('should produce embeddings via @agentic-kit/ollama OllamaClient directly', async () => {
363
- const vector = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
364
- expect(Array.isArray(vector)).toBe(true);
365
- expect(vector.length).toBe(768);
366
- for (const v of vector) {
368
+ it('should produce embeddings with token count via @agentic-kit/ollama OllamaClient directly', async () => {
369
+ const result = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
370
+ expect(result).toHaveProperty('embedding');
371
+ expect(result).toHaveProperty('promptTokens');
372
+ expect(Array.isArray(result.embedding)).toBe(true);
373
+ expect(result.embedding.length).toBe(768);
374
+ expect(result.promptTokens).toBeGreaterThan(0);
375
+ for (const v of result.embedding) {
367
376
  expect(typeof v).toBe('number');
368
377
  expect(Number.isFinite(v)).toBe(true);
369
378
  }
@@ -378,14 +387,14 @@ describe('Chat completion abstraction', () => {
378
387
  const chat = (0, chat_1.buildChatCompleter)({
379
388
  provider: 'ollama',
380
389
  model: 'llama3',
381
- baseUrl: 'http://localhost:11434',
390
+ baseUrl: 'http://localhost:11434'
382
391
  });
383
392
  expect(chat).not.toBeNull();
384
393
  expect(typeof chat).toBe('function');
385
394
  });
386
395
  it('returns null for unknown provider', () => {
387
396
  const chat = (0, chat_1.buildChatCompleter)({
388
- provider: 'unknown-provider',
397
+ provider: 'unknown-provider'
389
398
  });
390
399
  expect(chat).toBeNull();
391
400
  });
@@ -401,7 +410,7 @@ describe('Chat completion abstraction', () => {
401
410
  embedding_provider: 'ollama',
402
411
  chat_provider: 'ollama',
403
412
  chat_model: 'llama3',
404
- chat_base_url: 'http://localhost:11434',
413
+ chat_base_url: 'http://localhost:11434'
405
414
  };
406
415
  const chat = (0, chat_1.buildChatCompleterFromModule)(moduleData);
407
416
  expect(chat).not.toBeNull();
@@ -409,7 +418,7 @@ describe('Chat completion abstraction', () => {
409
418
  });
410
419
  it('returns null when chat_provider is not set', () => {
411
420
  const moduleData = {
412
- embedding_provider: 'ollama',
421
+ embedding_provider: 'ollama'
413
422
  };
414
423
  const chat = (0, chat_1.buildChatCompleterFromModule)(moduleData);
415
424
  expect(chat).toBeNull();
@@ -420,18 +429,19 @@ describe('Chat completion abstraction', () => {
420
429
  afterEach(() => {
421
430
  process.env = originalEnv;
422
431
  });
423
- it('returns null when CHAT_PROVIDER is not set', () => {
432
+ it('returns default ollama chat completer when CHAT_PROVIDER is not set', () => {
424
433
  process.env = { ...originalEnv };
425
434
  delete process.env.CHAT_PROVIDER;
426
435
  const chat = (0, chat_1.buildChatCompleterFromEnv)();
427
- expect(chat).toBeNull();
436
+ expect(chat).not.toBeNull();
437
+ expect(typeof chat).toBe('function');
428
438
  });
429
439
  it('builds chat completer from environment variables', () => {
430
440
  process.env = {
431
441
  ...originalEnv,
432
442
  CHAT_PROVIDER: 'ollama',
433
443
  CHAT_MODEL: 'llama3',
434
- CHAT_BASE_URL: 'http://localhost:11434',
444
+ CHAT_BASE_URL: 'http://localhost:11434'
435
445
  };
436
446
  const chat = (0, chat_1.buildChatCompleterFromEnv)();
437
447
  expect(chat).not.toBeNull();
@@ -470,10 +480,10 @@ function makeTestSmartTagsPlugin(tagsByTable) {
470
480
  Object.assign(c.extensions.tags, tags);
471
481
  }
472
482
  return _;
473
- },
474
- },
475
- },
476
- },
483
+ }
484
+ }
485
+ }
486
+ }
477
487
  };
478
488
  }
479
489
  describe('RAG plugin schema enrichment', () => {
@@ -482,7 +492,7 @@ describe('RAG plugin schema enrichment', () => {
482
492
  let query;
483
493
  beforeAll(async () => {
484
494
  const unifiedPlugin = (0, plugin_1.createUnifiedSearchPlugin)({
485
- adapters: [(0, pgvector_1.createPgvectorAdapter)()],
495
+ adapters: [(0, pgvector_1.createPgvectorAdapter)()]
486
496
  });
487
497
  const smartTagsPlugin = makeTestSmartTagsPlugin({
488
498
  articles: {
@@ -491,16 +501,22 @@ describe('RAG plugin schema enrichment', () => {
491
501
  parentFk: 'parent_id',
492
502
  parentPk: 'id',
493
503
  embeddingField: 'embedding',
494
- contentField: 'content',
495
- },
496
- },
504
+ contentField: 'content'
505
+ }
506
+ }
497
507
  });
498
- // Mock embedder that returns a fixed 3-dim vector
499
- const mockEmbedder = async (_text) => [1, 0, 0];
500
- // Mock chat completer that returns a canned response
508
+ // Mock embedder that returns a fixed 3-dim vector with token count
509
+ const mockEmbedder = async (_text) => ({
510
+ embedding: [1, 0, 0],
511
+ promptTokens: 5
512
+ });
513
+ // Mock chat completer that returns a canned response with usage
501
514
  const mockChatCompleter = async (messages) => {
502
515
  const userMessage = messages.find((m) => m.role === 'user');
503
- return `Mock answer for: ${userMessage?.content || 'unknown'}`;
516
+ return {
517
+ content: `Mock answer for: ${userMessage?.content || 'unknown'}`,
518
+ usage: { input: 10, output: 15, reasoning: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 25 }
519
+ };
504
520
  };
505
521
  const testPreset = {
506
522
  extends: [(0, graphile_connection_filter_1.ConnectionFilterPreset)()],
@@ -512,13 +528,13 @@ describe('RAG plugin schema enrichment', () => {
512
528
  defaultEmbedder: {
513
529
  provider: 'ollama',
514
530
  model: 'nomic-embed-text',
515
- baseUrl: 'http://localhost:11434',
516
- },
531
+ baseUrl: 'http://localhost:11434'
532
+ }
517
533
  }),
518
534
  (0, text_search_plugin_1.createLlmTextSearchPlugin)(),
519
535
  (0, text_mutation_plugin_1.createLlmTextMutationPlugin)(),
520
- (0, rag_plugin_1.createLlmRagPlugin)(),
521
- ],
536
+ (0, rag_plugin_1.createLlmRagPlugin)()
537
+ ]
522
538
  };
523
539
  // Override the embedder and chat completer on the build context
524
540
  // by wrapping the LlmModulePlugin's build hook
@@ -531,20 +547,20 @@ describe('RAG plugin schema enrichment', () => {
531
547
  build(build) {
532
548
  return build.extend(build, {
533
549
  llmEmbedder: mockEmbedder,
534
- llmChatCompleter: mockChatCompleter,
550
+ llmChatCompleter: mockChatCompleter
535
551
  }, 'TestOverridePlugin overriding embedder and chat completer');
536
- },
537
- },
538
- },
552
+ }
553
+ }
554
+ }
539
555
  };
540
556
  const connections = await (0, graphile_test_1.getConnections)({
541
557
  schemas: ['llm_test'],
542
558
  preset: {
543
559
  ...testPreset,
544
- plugins: [...testPreset.plugins, overridePlugin],
560
+ plugins: [...testPreset.plugins, overridePlugin]
545
561
  },
546
562
  useRoot: true,
547
- authRole: 'postgres',
563
+ authRole: 'postgres'
548
564
  }, [graphile_test_1.seed.sqlfile([(0, path_1.join)(__dirname, './setup.sql')])]);
549
565
  db = connections.db;
550
566
  teardown = connections.teardown;
@@ -668,7 +684,7 @@ describe('GraphileLlmPreset toggles', () => {
668
684
  it('enableRag=false excludes RAG plugin (no ragQuery field)', async () => {
669
685
  const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
670
686
  const preset = GraphileLlmPreset({
671
- enableRag: false,
687
+ enableRag: false
672
688
  });
673
689
  const pluginNames = preset.plugins.map((p) => p.name);
674
690
  expect(pluginNames).not.toContain('LlmRagPlugin');
@@ -676,7 +692,7 @@ describe('GraphileLlmPreset toggles', () => {
676
692
  it('enableRag=true includes RAG plugin', async () => {
677
693
  const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
678
694
  const preset = GraphileLlmPreset({
679
- enableRag: true,
695
+ enableRag: true
680
696
  });
681
697
  const pluginNames = preset.plugins.map((p) => p.name);
682
698
  expect(pluginNames).toContain('LlmRagPlugin');
@@ -684,7 +700,7 @@ describe('GraphileLlmPreset toggles', () => {
684
700
  it('enableTextSearch=false excludes text search plugin', async () => {
685
701
  const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
686
702
  const preset = GraphileLlmPreset({
687
- enableTextSearch: false,
703
+ enableTextSearch: false
688
704
  });
689
705
  const pluginNames = preset.plugins.map((p) => p.name);
690
706
  expect(pluginNames).not.toContain('LlmTextSearchPlugin');
@@ -694,7 +710,7 @@ describe('GraphileLlmPreset toggles', () => {
694
710
  it('enableTextMutations=false excludes text mutation plugin', async () => {
695
711
  const { GraphileLlmPreset } = await Promise.resolve().then(() => __importStar(require('../../src/preset')));
696
712
  const preset = GraphileLlmPreset({
697
- enableTextMutations: false,
713
+ enableTextMutations: false
698
714
  });
699
715
  const pluginNames = preset.plugins.map((p) => p.name);
700
716
  expect(pluginNames).not.toContain('LlmTextMutationPlugin');
@@ -704,7 +720,7 @@ describe('GraphileLlmPreset toggles', () => {
704
720
  const preset = GraphileLlmPreset({
705
721
  enableTextSearch: false,
706
722
  enableTextMutations: false,
707
- enableRag: false,
723
+ enableRag: false
708
724
  });
709
725
  const pluginNames = preset.plugins.map((p) => p.name);
710
726
  expect(pluginNames).toEqual(['LlmModulePlugin']);
package/chat.d.ts CHANGED
@@ -26,12 +26,12 @@ export declare function buildChatCompleter(config: ChatConfig): ChatFunction | n
26
26
  */
27
27
  export declare function buildChatCompleterFromModule(data: LlmModuleData): ChatFunction | null;
28
28
  /**
29
- * Resolve a chat completer from environment variables via getEnvOptions().
29
+ * Resolve a chat completer from environment variables.
30
30
  * This is a fallback for development when no llm_module or defaultChatCompleter is configured.
31
31
  *
32
- * Environment variables (parsed by @constructive-io/graphql-env):
33
- * CHAT_PROVIDER - Provider name ('ollama')
34
- * CHAT_MODEL - Model identifier (e.g. 'llama3')
35
- * CHAT_BASE_URL - Provider base URL
32
+ * Environment variables (with defaults from env.ts):
33
+ * CHAT_PROVIDER - Provider name (default: 'ollama')
34
+ * CHAT_MODEL - Model identifier (default: 'llama3')
35
+ * CHAT_BASE_URL - Provider base URL (default: 'http://localhost:11434')
36
36
  */
37
37
  export declare function buildChatCompleterFromEnv(): ChatFunction | null;
package/chat.js CHANGED
@@ -12,45 +12,55 @@
12
12
  * 2. The preset's `defaultChatCompleter` option (fallback for dev/testing)
13
13
  * 3. Environment variables (CHAT_PROVIDER, CHAT_MODEL, CHAT_BASE_URL)
14
14
  */
15
- var __importDefault = (this && this.__importDefault) || function (mod) {
16
- return (mod && mod.__esModule) ? mod : { "default": mod };
17
- };
18
15
  Object.defineProperty(exports, "__esModule", { value: true });
19
16
  exports.buildChatCompleter = buildChatCompleter;
20
17
  exports.buildChatCompleterFromModule = buildChatCompleterFromModule;
21
18
  exports.buildChatCompleterFromEnv = buildChatCompleterFromEnv;
22
- const ollama_1 = __importDefault(require("@agentic-kit/ollama"));
23
- const graphql_env_1 = require("@constructive-io/graphql-env");
19
+ const ollama_1 = require("@agentic-kit/ollama");
20
+ const env_1 = require("./env");
24
21
  // ─── Built-in Providers ─────────────────────────────────────────────────────
25
22
  /**
26
23
  * Create an Ollama-based chat completion function.
27
24
  *
28
- * Uses OllamaClient.generate() with a messages array, which internally
29
- * routes to the /api/chat endpoint.
25
+ * Uses OllamaAdapter.stream() to get both response content and real token
26
+ * usage counts from the provider (prompt_eval_count, eval_count).
30
27
  */
31
28
  function createOllamaChatCompleter(baseUrl = 'http://localhost:11434', model = 'llama3') {
32
- const client = new ollama_1.default(baseUrl);
29
+ const adapter = new ollama_1.OllamaAdapter(baseUrl);
33
30
  return async (messages, options) => {
34
- // Build the input for OllamaClient.generate() in chat mode
35
- const input = {
36
- model,
37
- messages: messages.filter((m) => m.role !== 'system'),
38
- };
39
- // Extract system message if present
40
31
  const systemMsg = messages.find((m) => m.role === 'system');
41
- if (systemMsg) {
42
- input.system = systemMsg.content;
43
- }
44
- if (options?.temperature !== undefined) {
45
- input.temperature = options.temperature;
46
- }
47
- const startTime = Date.now();
48
- const response = await client.generate(input);
49
- const latencyMs = Date.now() - startTime;
50
- // Token count logging (metering deferred to billing system)
51
- console.log(`[graphile-llm] Chat completion: model=${model}, latency=${latencyMs}ms, ` +
52
- `messages=${messages.length}`);
53
- return response;
32
+ const nonSystem = messages.filter((m) => m.role !== 'system');
33
+ const modelDesc = adapter.createModel(model, {
34
+ maxOutputTokens: options?.maxTokens
35
+ });
36
+ const context = {
37
+ systemPrompt: systemMsg?.content,
38
+ messages: nonSystem.map((m) => ({
39
+ role: m.role,
40
+ content: m.content,
41
+ timestamp: Date.now()
42
+ }))
43
+ };
44
+ const stream = adapter.stream(modelDesc, context, {
45
+ temperature: options?.temperature,
46
+ maxTokens: options?.maxTokens
47
+ });
48
+ const result = await stream.result();
49
+ const content = result.content
50
+ .filter((block) => block.type === 'text')
51
+ .map((block) => block.text)
52
+ .join('');
53
+ return {
54
+ content,
55
+ usage: {
56
+ input: result.usage.input,
57
+ output: result.usage.output,
58
+ reasoning: result.usage.reasoning,
59
+ cacheRead: result.usage.cacheRead,
60
+ cacheWrite: result.usage.cacheWrite,
61
+ totalTokens: result.usage.totalTokens
62
+ }
63
+ };
54
64
  };
55
65
  }
56
66
  // ─── Chat Completer Construction ────────────────────────────────────────────
@@ -81,27 +91,19 @@ function buildChatCompleterFromModule(data) {
81
91
  return buildChatCompleter({
82
92
  provider: data.chat_provider,
83
93
  model: data.chat_model,
84
- baseUrl: data.chat_base_url,
85
- apiKey: data.api_key_ref,
94
+ baseUrl: data.chat_base_url
86
95
  });
87
96
  }
88
97
  /**
89
- * Resolve a chat completer from environment variables via getEnvOptions().
98
+ * Resolve a chat completer from environment variables.
90
99
  * This is a fallback for development when no llm_module or defaultChatCompleter is configured.
91
100
  *
92
- * Environment variables (parsed by @constructive-io/graphql-env):
93
- * CHAT_PROVIDER - Provider name ('ollama')
94
- * CHAT_MODEL - Model identifier (e.g. 'llama3')
95
- * CHAT_BASE_URL - Provider base URL
101
+ * Environment variables (with defaults from env.ts):
102
+ * CHAT_PROVIDER - Provider name (default: 'ollama')
103
+ * CHAT_MODEL - Model identifier (default: 'llama3')
104
+ * CHAT_BASE_URL - Provider base URL (default: 'http://localhost:11434')
96
105
  */
97
106
  function buildChatCompleterFromEnv() {
98
- const { llm } = (0, graphql_env_1.getEnvOptions)();
99
- const provider = llm?.chat?.provider;
100
- if (!provider)
101
- return null;
102
- return buildChatCompleter({
103
- provider,
104
- model: llm?.chat?.model,
105
- baseUrl: llm?.chat?.baseUrl,
106
- });
107
+ const { chat } = (0, env_1.getLlmEnvOptions)();
108
+ return buildChatCompleter(chat);
107
109
  }
@@ -0,0 +1,77 @@
1
+ /**
2
+ * config-cache — Per-database LLM billing configuration cache
3
+ *
4
+ * Caches resolved billing function names per database_id.
5
+ * Uses an LRU cache with TTL so config changes propagate within a bounded window
6
+ * without requiring a server restart.
7
+ *
8
+ * Resolution flow:
9
+ * Billing config from `metaschema_modules_public.billing_module`
10
+ * (schema name + function names for record_usage, check_billing_quota)
11
+ *
12
+ * All queries run through the Graphile `withPgClient` callback, which gives us
13
+ * a client connected to the tenant database with proper role settings.
14
+ *
15
+ * The LLM module config (provider, model, etc.) is already resolved by the
16
+ * LlmModulePlugin at schema-build time. This cache handles the runtime-only
17
+ * billing piece.
18
+ */
19
+ /**
20
+ * Generic pg client interface matching what Graphile's withPgClient provides.
21
+ * Avoids a hard dependency on the `pg` package.
22
+ */
23
+ export interface PgClient {
24
+ query(sql: string, values?: unknown[]): Promise<{
25
+ rows: Record<string, unknown>[];
26
+ }>;
27
+ }
28
+ /**
29
+ * Billing function metadata resolved from the billing_module metaschema table.
30
+ */
31
+ export interface BillingConfig {
32
+ /** Private schema containing the billing functions */
33
+ privateSchema: string;
34
+ /** Name of the record_usage function */
35
+ recordUsageFunction: string;
36
+ /** Name of the check_billing_quota function */
37
+ checkBillingQuotaFunction: string;
38
+ /** Public schema containing meters table */
39
+ publicSchema: string;
40
+ }
41
+ /**
42
+ * Inference log table metadata resolved from the inference_log_module.
43
+ */
44
+ export interface InferenceLogConfig {
45
+ /** Schema containing the usage_log_inference table */
46
+ schema: string;
47
+ /** Name of the inference log table */
48
+ tableName: string;
49
+ }
50
+ /**
51
+ * Per-database cached configuration for the LLM billing integration.
52
+ */
53
+ export interface LlmBillingCacheEntry {
54
+ /** Billing function references (null if billing_module not provisioned) */
55
+ billing: BillingConfig | null;
56
+ /** Inference log table references (null if inference_log_module not provisioned) */
57
+ inferenceLog: InferenceLogConfig | null;
58
+ }
59
+ /**
60
+ * Resolve billing config for a database.
61
+ * Results are cached per database_id with a 5-minute TTL.
62
+ *
63
+ * @param pgClient - A client connected to the tenant database (from withPgClient)
64
+ * @param databaseId - The database UUID
65
+ */
66
+ export declare function getLlmBillingConfig(pgClient: PgClient, databaseId: string): Promise<LlmBillingCacheEntry>;
67
+ /**
68
+ * Invalidate the cached config for a specific database (or all).
69
+ */
70
+ export declare function invalidateLlmBillingConfig(databaseId?: string): void;
71
+ /**
72
+ * Get cache stats for diagnostics.
73
+ */
74
+ export declare function getLlmBillingCacheStats(): {
75
+ size: number;
76
+ max: number;
77
+ };