graphile-llm 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/graphile-llm.test.js +81 -67
- package/chat.js +37 -27
- package/config-cache.js +4 -4
- package/embedder.js +3 -1
- package/env.js +6 -6
- package/esm/__tests__/graphile-llm.test.js +81 -67
- package/esm/chat.js +37 -24
- package/esm/config-cache.js +4 -4
- package/esm/embedder.js +3 -1
- package/esm/env.js +6 -6
- package/esm/index.d.ts +12 -12
- package/esm/index.js +7 -11
- package/esm/metering.d.ts +5 -5
- package/esm/metering.js +60 -66
- package/esm/plugins/agent-discovery-plugin.js +2 -2
- package/esm/plugins/llm-module-plugin.d.ts +1 -1
- package/esm/plugins/llm-module-plugin.js +5 -5
- package/esm/plugins/metering-plugin.js +13 -13
- package/esm/plugins/rag-plugin.js +20 -20
- package/esm/plugins/text-mutation-plugin.js +12 -12
- package/esm/plugins/text-search-plugin.js +10 -10
- package/esm/preset.js +6 -6
- package/esm/types.d.ts +39 -4
- package/index.d.ts +12 -12
- package/index.js +11 -15
- package/metering.d.ts +5 -5
- package/metering.js +60 -66
- package/package.json +8 -8
- package/plugins/agent-discovery-plugin.js +2 -2
- package/plugins/llm-module-plugin.d.ts +1 -1
- package/plugins/llm-module-plugin.js +5 -5
- package/plugins/metering-plugin.js +13 -13
- package/plugins/rag-plugin.js +20 -20
- package/plugins/text-mutation-plugin.js +12 -12
- package/plugins/text-search-plugin.js +10 -10
- package/preset.js +6 -6
- package/types.d.ts +39 -4
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
import { join } from 'path';
|
|
2
1
|
import OllamaClient from '@agentic-kit/ollama';
|
|
3
|
-
import { getConnections, seed } from 'graphile-test';
|
|
4
2
|
import { ConnectionFilterPreset } from 'graphile-connection-filter';
|
|
3
|
+
import { createPgvectorAdapter } from 'graphile-search/adapters/pgvector';
|
|
5
4
|
import { VectorCodecPlugin } from 'graphile-search/codecs/vector-codec';
|
|
6
5
|
import { createUnifiedSearchPlugin } from 'graphile-search/plugin';
|
|
7
|
-
import {
|
|
6
|
+
import { getConnections, seed } from 'graphile-test';
|
|
7
|
+
import { join } from 'path';
|
|
8
|
+
import { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from '../../src/chat';
|
|
9
|
+
import { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from '../../src/embedder';
|
|
8
10
|
import { createLlmModulePlugin } from '../../src/plugins/llm-module-plugin';
|
|
9
|
-
import { createLlmTextSearchPlugin } from '../../src/plugins/text-search-plugin';
|
|
10
|
-
import { createLlmTextMutationPlugin } from '../../src/plugins/text-mutation-plugin';
|
|
11
11
|
import { createLlmRagPlugin } from '../../src/plugins/rag-plugin';
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
12
|
+
import { createLlmTextMutationPlugin } from '../../src/plugins/text-mutation-plugin';
|
|
13
|
+
import { createLlmTextSearchPlugin } from '../../src/plugins/text-search-plugin';
|
|
14
14
|
// ─── @agentic-kit/ollama client ─────────────────────────────────────────────
|
|
15
15
|
const ollamaClient = new OllamaClient('http://localhost:11434');
|
|
16
16
|
async function ensureNomicModel() {
|
|
@@ -30,14 +30,14 @@ describe('Embedder abstraction', () => {
|
|
|
30
30
|
const embedder = buildEmbedder({
|
|
31
31
|
provider: 'ollama',
|
|
32
32
|
model: 'nomic-embed-text',
|
|
33
|
-
baseUrl: 'http://localhost:11434'
|
|
33
|
+
baseUrl: 'http://localhost:11434'
|
|
34
34
|
});
|
|
35
35
|
expect(embedder).not.toBeNull();
|
|
36
36
|
expect(typeof embedder).toBe('function');
|
|
37
37
|
});
|
|
38
38
|
it('returns null for unknown provider', () => {
|
|
39
39
|
const embedder = buildEmbedder({
|
|
40
|
-
provider: 'unknown-provider'
|
|
40
|
+
provider: 'unknown-provider'
|
|
41
41
|
});
|
|
42
42
|
expect(embedder).toBeNull();
|
|
43
43
|
});
|
|
@@ -52,7 +52,7 @@ describe('Embedder abstraction', () => {
|
|
|
52
52
|
const moduleData = {
|
|
53
53
|
embedding_provider: 'ollama',
|
|
54
54
|
embedding_model: 'nomic-embed-text',
|
|
55
|
-
embedding_base_url: 'http://localhost:11434'
|
|
55
|
+
embedding_base_url: 'http://localhost:11434'
|
|
56
56
|
};
|
|
57
57
|
const embedder = buildEmbedderFromModule(moduleData);
|
|
58
58
|
expect(embedder).not.toBeNull();
|
|
@@ -60,7 +60,7 @@ describe('Embedder abstraction', () => {
|
|
|
60
60
|
});
|
|
61
61
|
it('returns null for unsupported provider in module data', () => {
|
|
62
62
|
const moduleData = {
|
|
63
|
-
embedding_provider: 'unsupported'
|
|
63
|
+
embedding_provider: 'unsupported'
|
|
64
64
|
};
|
|
65
65
|
const embedder = buildEmbedderFromModule(moduleData);
|
|
66
66
|
expect(embedder).toBeNull();
|
|
@@ -83,7 +83,7 @@ describe('Embedder abstraction', () => {
|
|
|
83
83
|
...originalEnv,
|
|
84
84
|
EMBEDDER_PROVIDER: 'ollama',
|
|
85
85
|
EMBEDDER_MODEL: 'nomic-embed-text',
|
|
86
|
-
EMBEDDER_BASE_URL: 'http://localhost:11434'
|
|
86
|
+
EMBEDDER_BASE_URL: 'http://localhost:11434'
|
|
87
87
|
};
|
|
88
88
|
const embedder = buildEmbedderFromEnv();
|
|
89
89
|
expect(embedder).not.toBeNull();
|
|
@@ -101,7 +101,7 @@ describe('graphile-llm schema enrichment', () => {
|
|
|
101
101
|
let query;
|
|
102
102
|
beforeAll(async () => {
|
|
103
103
|
const unifiedPlugin = createUnifiedSearchPlugin({
|
|
104
|
-
adapters: [createPgvectorAdapter()]
|
|
104
|
+
adapters: [createPgvectorAdapter()]
|
|
105
105
|
});
|
|
106
106
|
const testPreset = {
|
|
107
107
|
extends: [ConnectionFilterPreset()],
|
|
@@ -114,18 +114,18 @@ describe('graphile-llm schema enrichment', () => {
|
|
|
114
114
|
defaultEmbedder: {
|
|
115
115
|
provider: 'ollama',
|
|
116
116
|
model: 'nomic-embed-text',
|
|
117
|
-
baseUrl: 'http://localhost:11434'
|
|
118
|
-
}
|
|
117
|
+
baseUrl: 'http://localhost:11434'
|
|
118
|
+
}
|
|
119
119
|
}),
|
|
120
120
|
createLlmTextSearchPlugin(),
|
|
121
|
-
createLlmTextMutationPlugin()
|
|
122
|
-
]
|
|
121
|
+
createLlmTextMutationPlugin()
|
|
122
|
+
]
|
|
123
123
|
};
|
|
124
124
|
const connections = await getConnections({
|
|
125
125
|
schemas: ['llm_test'],
|
|
126
126
|
preset: testPreset,
|
|
127
127
|
useRoot: true,
|
|
128
|
-
authRole: 'postgres'
|
|
128
|
+
authRole: 'postgres'
|
|
129
129
|
}, [seed.sqlfile([join(__dirname, './setup.sql')])]);
|
|
130
130
|
db = connections.db;
|
|
131
131
|
teardown = connections.teardown;
|
|
@@ -254,33 +254,36 @@ describe('graphile-llm with real Ollama embedding', () => {
|
|
|
254
254
|
const embedder = buildEmbedder({
|
|
255
255
|
provider: 'ollama',
|
|
256
256
|
model: 'nomic-embed-text',
|
|
257
|
-
baseUrl: 'http://localhost:11434'
|
|
257
|
+
baseUrl: 'http://localhost:11434'
|
|
258
258
|
});
|
|
259
259
|
expect(embedder).not.toBeNull();
|
|
260
|
-
const
|
|
260
|
+
const result = await embedder('Machine learning is transforming AI');
|
|
261
261
|
// nomic-embed-text produces 768-dimensional vectors
|
|
262
|
-
expect(Array.isArray(
|
|
263
|
-
expect(
|
|
262
|
+
expect(Array.isArray(result.embedding)).toBe(true);
|
|
263
|
+
expect(result.embedding.length).toBe(768);
|
|
264
|
+
expect(result.promptTokens).toBeGreaterThan(0);
|
|
264
265
|
// All elements should be numbers
|
|
265
|
-
for (const v of
|
|
266
|
+
for (const v of result.embedding) {
|
|
266
267
|
expect(typeof v).toBe('number');
|
|
267
268
|
expect(Number.isFinite(v)).toBe(true);
|
|
268
269
|
}
|
|
269
270
|
// Vector should not be all zeros
|
|
270
|
-
const magnitude = Math.sqrt(
|
|
271
|
+
const magnitude = Math.sqrt(result.embedding.reduce((sum, v) => sum + v * v, 0));
|
|
271
272
|
expect(magnitude).toBeGreaterThan(0);
|
|
272
273
|
});
|
|
273
274
|
it('should produce different vectors for semantically different text', async () => {
|
|
274
275
|
const embedder = buildEmbedder({
|
|
275
276
|
provider: 'ollama',
|
|
276
277
|
model: 'nomic-embed-text',
|
|
277
|
-
baseUrl: 'http://localhost:11434'
|
|
278
|
+
baseUrl: 'http://localhost:11434'
|
|
278
279
|
});
|
|
279
280
|
expect(embedder).not.toBeNull();
|
|
280
|
-
const [
|
|
281
|
+
const [resultA, resultB] = await Promise.all([
|
|
281
282
|
embedder('Artificial intelligence and machine learning'),
|
|
282
|
-
embedder('Cooking recipes for Italian pasta dishes')
|
|
283
|
+
embedder('Cooking recipes for Italian pasta dishes')
|
|
283
284
|
]);
|
|
285
|
+
const vecA = resultA.embedding;
|
|
286
|
+
const vecB = resultB.embedding;
|
|
284
287
|
expect(vecA.length).toBe(768);
|
|
285
288
|
expect(vecB.length).toBe(768);
|
|
286
289
|
// Compute cosine similarity
|
|
@@ -300,13 +303,15 @@ describe('graphile-llm with real Ollama embedding', () => {
|
|
|
300
303
|
const embedder = buildEmbedder({
|
|
301
304
|
provider: 'ollama',
|
|
302
305
|
model: 'nomic-embed-text',
|
|
303
|
-
baseUrl: 'http://localhost:11434'
|
|
306
|
+
baseUrl: 'http://localhost:11434'
|
|
304
307
|
});
|
|
305
308
|
expect(embedder).not.toBeNull();
|
|
306
|
-
const [
|
|
309
|
+
const [resultA, resultB] = await Promise.all([
|
|
307
310
|
embedder('Machine learning and artificial intelligence'),
|
|
308
|
-
embedder('AI and ML are subfields of computer science')
|
|
311
|
+
embedder('AI and ML are subfields of computer science')
|
|
309
312
|
]);
|
|
313
|
+
const vecA = resultA.embedding;
|
|
314
|
+
const vecB = resultB.embedding;
|
|
310
315
|
expect(vecA.length).toBe(768);
|
|
311
316
|
expect(vecB.length).toBe(768);
|
|
312
317
|
// Compute cosine similarity
|
|
@@ -322,11 +327,14 @@ describe('graphile-llm with real Ollama embedding', () => {
|
|
|
322
327
|
// Semantically similar texts should have high similarity
|
|
323
328
|
expect(cosineSimilarity).toBeGreaterThan(0.5);
|
|
324
329
|
});
|
|
325
|
-
it('should produce embeddings via @agentic-kit/ollama OllamaClient directly', async () => {
|
|
326
|
-
const
|
|
327
|
-
expect(
|
|
328
|
-
expect(
|
|
329
|
-
|
|
330
|
+
it('should produce embeddings with token count via @agentic-kit/ollama OllamaClient directly', async () => {
|
|
331
|
+
const result = await ollamaClient.generateEmbedding('Testing the agentic-kit Ollama client directly', 'nomic-embed-text');
|
|
332
|
+
expect(result).toHaveProperty('embedding');
|
|
333
|
+
expect(result).toHaveProperty('promptTokens');
|
|
334
|
+
expect(Array.isArray(result.embedding)).toBe(true);
|
|
335
|
+
expect(result.embedding.length).toBe(768);
|
|
336
|
+
expect(result.promptTokens).toBeGreaterThan(0);
|
|
337
|
+
for (const v of result.embedding) {
|
|
330
338
|
expect(typeof v).toBe('number');
|
|
331
339
|
expect(Number.isFinite(v)).toBe(true);
|
|
332
340
|
}
|
|
@@ -341,14 +349,14 @@ describe('Chat completion abstraction', () => {
|
|
|
341
349
|
const chat = buildChatCompleter({
|
|
342
350
|
provider: 'ollama',
|
|
343
351
|
model: 'llama3',
|
|
344
|
-
baseUrl: 'http://localhost:11434'
|
|
352
|
+
baseUrl: 'http://localhost:11434'
|
|
345
353
|
});
|
|
346
354
|
expect(chat).not.toBeNull();
|
|
347
355
|
expect(typeof chat).toBe('function');
|
|
348
356
|
});
|
|
349
357
|
it('returns null for unknown provider', () => {
|
|
350
358
|
const chat = buildChatCompleter({
|
|
351
|
-
provider: 'unknown-provider'
|
|
359
|
+
provider: 'unknown-provider'
|
|
352
360
|
});
|
|
353
361
|
expect(chat).toBeNull();
|
|
354
362
|
});
|
|
@@ -364,7 +372,7 @@ describe('Chat completion abstraction', () => {
|
|
|
364
372
|
embedding_provider: 'ollama',
|
|
365
373
|
chat_provider: 'ollama',
|
|
366
374
|
chat_model: 'llama3',
|
|
367
|
-
chat_base_url: 'http://localhost:11434'
|
|
375
|
+
chat_base_url: 'http://localhost:11434'
|
|
368
376
|
};
|
|
369
377
|
const chat = buildChatCompleterFromModule(moduleData);
|
|
370
378
|
expect(chat).not.toBeNull();
|
|
@@ -372,7 +380,7 @@ describe('Chat completion abstraction', () => {
|
|
|
372
380
|
});
|
|
373
381
|
it('returns null when chat_provider is not set', () => {
|
|
374
382
|
const moduleData = {
|
|
375
|
-
embedding_provider: 'ollama'
|
|
383
|
+
embedding_provider: 'ollama'
|
|
376
384
|
};
|
|
377
385
|
const chat = buildChatCompleterFromModule(moduleData);
|
|
378
386
|
expect(chat).toBeNull();
|
|
@@ -395,7 +403,7 @@ describe('Chat completion abstraction', () => {
|
|
|
395
403
|
...originalEnv,
|
|
396
404
|
CHAT_PROVIDER: 'ollama',
|
|
397
405
|
CHAT_MODEL: 'llama3',
|
|
398
|
-
CHAT_BASE_URL: 'http://localhost:11434'
|
|
406
|
+
CHAT_BASE_URL: 'http://localhost:11434'
|
|
399
407
|
};
|
|
400
408
|
const chat = buildChatCompleterFromEnv();
|
|
401
409
|
expect(chat).not.toBeNull();
|
|
@@ -434,10 +442,10 @@ function makeTestSmartTagsPlugin(tagsByTable) {
|
|
|
434
442
|
Object.assign(c.extensions.tags, tags);
|
|
435
443
|
}
|
|
436
444
|
return _;
|
|
437
|
-
}
|
|
438
|
-
}
|
|
439
|
-
}
|
|
440
|
-
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
441
449
|
};
|
|
442
450
|
}
|
|
443
451
|
describe('RAG plugin schema enrichment', () => {
|
|
@@ -446,7 +454,7 @@ describe('RAG plugin schema enrichment', () => {
|
|
|
446
454
|
let query;
|
|
447
455
|
beforeAll(async () => {
|
|
448
456
|
const unifiedPlugin = createUnifiedSearchPlugin({
|
|
449
|
-
adapters: [createPgvectorAdapter()]
|
|
457
|
+
adapters: [createPgvectorAdapter()]
|
|
450
458
|
});
|
|
451
459
|
const smartTagsPlugin = makeTestSmartTagsPlugin({
|
|
452
460
|
articles: {
|
|
@@ -455,16 +463,22 @@ describe('RAG plugin schema enrichment', () => {
|
|
|
455
463
|
parentFk: 'parent_id',
|
|
456
464
|
parentPk: 'id',
|
|
457
465
|
embeddingField: 'embedding',
|
|
458
|
-
contentField: 'content'
|
|
459
|
-
}
|
|
460
|
-
}
|
|
466
|
+
contentField: 'content'
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
});
|
|
470
|
+
// Mock embedder that returns a fixed 3-dim vector with token count
|
|
471
|
+
const mockEmbedder = async (_text) => ({
|
|
472
|
+
embedding: [1, 0, 0],
|
|
473
|
+
promptTokens: 5
|
|
461
474
|
});
|
|
462
|
-
// Mock
|
|
463
|
-
const mockEmbedder = async (_text) => [1, 0, 0];
|
|
464
|
-
// Mock chat completer that returns a canned response
|
|
475
|
+
// Mock chat completer that returns a canned response with usage
|
|
465
476
|
const mockChatCompleter = async (messages) => {
|
|
466
477
|
const userMessage = messages.find((m) => m.role === 'user');
|
|
467
|
-
return
|
|
478
|
+
return {
|
|
479
|
+
content: `Mock answer for: ${userMessage?.content || 'unknown'}`,
|
|
480
|
+
usage: { input: 10, output: 15, reasoning: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 25 }
|
|
481
|
+
};
|
|
468
482
|
};
|
|
469
483
|
const testPreset = {
|
|
470
484
|
extends: [ConnectionFilterPreset()],
|
|
@@ -476,13 +490,13 @@ describe('RAG plugin schema enrichment', () => {
|
|
|
476
490
|
defaultEmbedder: {
|
|
477
491
|
provider: 'ollama',
|
|
478
492
|
model: 'nomic-embed-text',
|
|
479
|
-
baseUrl: 'http://localhost:11434'
|
|
480
|
-
}
|
|
493
|
+
baseUrl: 'http://localhost:11434'
|
|
494
|
+
}
|
|
481
495
|
}),
|
|
482
496
|
createLlmTextSearchPlugin(),
|
|
483
497
|
createLlmTextMutationPlugin(),
|
|
484
|
-
createLlmRagPlugin()
|
|
485
|
-
]
|
|
498
|
+
createLlmRagPlugin()
|
|
499
|
+
]
|
|
486
500
|
};
|
|
487
501
|
// Override the embedder and chat completer on the build context
|
|
488
502
|
// by wrapping the LlmModulePlugin's build hook
|
|
@@ -495,20 +509,20 @@ describe('RAG plugin schema enrichment', () => {
|
|
|
495
509
|
build(build) {
|
|
496
510
|
return build.extend(build, {
|
|
497
511
|
llmEmbedder: mockEmbedder,
|
|
498
|
-
llmChatCompleter: mockChatCompleter
|
|
512
|
+
llmChatCompleter: mockChatCompleter
|
|
499
513
|
}, 'TestOverridePlugin overriding embedder and chat completer');
|
|
500
|
-
}
|
|
501
|
-
}
|
|
502
|
-
}
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
}
|
|
503
517
|
};
|
|
504
518
|
const connections = await getConnections({
|
|
505
519
|
schemas: ['llm_test'],
|
|
506
520
|
preset: {
|
|
507
521
|
...testPreset,
|
|
508
|
-
plugins: [...testPreset.plugins, overridePlugin]
|
|
522
|
+
plugins: [...testPreset.plugins, overridePlugin]
|
|
509
523
|
},
|
|
510
524
|
useRoot: true,
|
|
511
|
-
authRole: 'postgres'
|
|
525
|
+
authRole: 'postgres'
|
|
512
526
|
}, [seed.sqlfile([join(__dirname, './setup.sql')])]);
|
|
513
527
|
db = connections.db;
|
|
514
528
|
teardown = connections.teardown;
|
|
@@ -632,7 +646,7 @@ describe('GraphileLlmPreset toggles', () => {
|
|
|
632
646
|
it('enableRag=false excludes RAG plugin (no ragQuery field)', async () => {
|
|
633
647
|
const { GraphileLlmPreset } = await import('../../src/preset');
|
|
634
648
|
const preset = GraphileLlmPreset({
|
|
635
|
-
enableRag: false
|
|
649
|
+
enableRag: false
|
|
636
650
|
});
|
|
637
651
|
const pluginNames = preset.plugins.map((p) => p.name);
|
|
638
652
|
expect(pluginNames).not.toContain('LlmRagPlugin');
|
|
@@ -640,7 +654,7 @@ describe('GraphileLlmPreset toggles', () => {
|
|
|
640
654
|
it('enableRag=true includes RAG plugin', async () => {
|
|
641
655
|
const { GraphileLlmPreset } = await import('../../src/preset');
|
|
642
656
|
const preset = GraphileLlmPreset({
|
|
643
|
-
enableRag: true
|
|
657
|
+
enableRag: true
|
|
644
658
|
});
|
|
645
659
|
const pluginNames = preset.plugins.map((p) => p.name);
|
|
646
660
|
expect(pluginNames).toContain('LlmRagPlugin');
|
|
@@ -648,7 +662,7 @@ describe('GraphileLlmPreset toggles', () => {
|
|
|
648
662
|
it('enableTextSearch=false excludes text search plugin', async () => {
|
|
649
663
|
const { GraphileLlmPreset } = await import('../../src/preset');
|
|
650
664
|
const preset = GraphileLlmPreset({
|
|
651
|
-
enableTextSearch: false
|
|
665
|
+
enableTextSearch: false
|
|
652
666
|
});
|
|
653
667
|
const pluginNames = preset.plugins.map((p) => p.name);
|
|
654
668
|
expect(pluginNames).not.toContain('LlmTextSearchPlugin');
|
|
@@ -658,7 +672,7 @@ describe('GraphileLlmPreset toggles', () => {
|
|
|
658
672
|
it('enableTextMutations=false excludes text mutation plugin', async () => {
|
|
659
673
|
const { GraphileLlmPreset } = await import('../../src/preset');
|
|
660
674
|
const preset = GraphileLlmPreset({
|
|
661
|
-
enableTextMutations: false
|
|
675
|
+
enableTextMutations: false
|
|
662
676
|
});
|
|
663
677
|
const pluginNames = preset.plugins.map((p) => p.name);
|
|
664
678
|
expect(pluginNames).not.toContain('LlmTextMutationPlugin');
|
|
@@ -668,7 +682,7 @@ describe('GraphileLlmPreset toggles', () => {
|
|
|
668
682
|
const preset = GraphileLlmPreset({
|
|
669
683
|
enableTextSearch: false,
|
|
670
684
|
enableTextMutations: false,
|
|
671
|
-
enableRag: false
|
|
685
|
+
enableRag: false
|
|
672
686
|
});
|
|
673
687
|
const pluginNames = preset.plugins.map((p) => p.name);
|
|
674
688
|
expect(pluginNames).toEqual(['LlmModulePlugin']);
|
package/esm/chat.js
CHANGED
|
@@ -11,38 +11,51 @@
|
|
|
11
11
|
* 2. The preset's `defaultChatCompleter` option (fallback for dev/testing)
|
|
12
12
|
* 3. Environment variables (CHAT_PROVIDER, CHAT_MODEL, CHAT_BASE_URL)
|
|
13
13
|
*/
|
|
14
|
-
import
|
|
14
|
+
import { OllamaAdapter } from '@agentic-kit/ollama';
|
|
15
15
|
import { getLlmEnvOptions } from './env';
|
|
16
16
|
// ─── Built-in Providers ─────────────────────────────────────────────────────
|
|
17
17
|
/**
|
|
18
18
|
* Create an Ollama-based chat completion function.
|
|
19
19
|
*
|
|
20
|
-
* Uses
|
|
21
|
-
*
|
|
20
|
+
* Uses OllamaAdapter.stream() to get both response content and real token
|
|
21
|
+
* usage counts from the provider (prompt_eval_count, eval_count).
|
|
22
22
|
*/
|
|
23
23
|
function createOllamaChatCompleter(baseUrl = 'http://localhost:11434', model = 'llama3') {
|
|
24
|
-
const
|
|
24
|
+
const adapter = new OllamaAdapter(baseUrl);
|
|
25
25
|
return async (messages, options) => {
|
|
26
|
-
// Build the input for OllamaClient.generate() in chat mode
|
|
27
|
-
const input = {
|
|
28
|
-
model,
|
|
29
|
-
messages: messages.filter((m) => m.role !== 'system'),
|
|
30
|
-
};
|
|
31
|
-
// Extract system message if present
|
|
32
26
|
const systemMsg = messages.find((m) => m.role === 'system');
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
27
|
+
const nonSystem = messages.filter((m) => m.role !== 'system');
|
|
28
|
+
const modelDesc = adapter.createModel(model, {
|
|
29
|
+
maxOutputTokens: options?.maxTokens
|
|
30
|
+
});
|
|
31
|
+
const context = {
|
|
32
|
+
systemPrompt: systemMsg?.content,
|
|
33
|
+
messages: nonSystem.map((m) => ({
|
|
34
|
+
role: m.role,
|
|
35
|
+
content: m.content,
|
|
36
|
+
timestamp: Date.now()
|
|
37
|
+
}))
|
|
38
|
+
};
|
|
39
|
+
const stream = adapter.stream(modelDesc, context, {
|
|
40
|
+
temperature: options?.temperature,
|
|
41
|
+
maxTokens: options?.maxTokens
|
|
42
|
+
});
|
|
43
|
+
const result = await stream.result();
|
|
44
|
+
const content = result.content
|
|
45
|
+
.filter((block) => block.type === 'text')
|
|
46
|
+
.map((block) => block.text)
|
|
47
|
+
.join('');
|
|
48
|
+
return {
|
|
49
|
+
content,
|
|
50
|
+
usage: {
|
|
51
|
+
input: result.usage.input,
|
|
52
|
+
output: result.usage.output,
|
|
53
|
+
reasoning: result.usage.reasoning,
|
|
54
|
+
cacheRead: result.usage.cacheRead,
|
|
55
|
+
cacheWrite: result.usage.cacheWrite,
|
|
56
|
+
totalTokens: result.usage.totalTokens
|
|
57
|
+
}
|
|
58
|
+
};
|
|
46
59
|
};
|
|
47
60
|
}
|
|
48
61
|
// ─── Chat Completer Construction ────────────────────────────────────────────
|
|
@@ -73,7 +86,7 @@ export function buildChatCompleterFromModule(data) {
|
|
|
73
86
|
return buildChatCompleter({
|
|
74
87
|
provider: data.chat_provider,
|
|
75
88
|
model: data.chat_model,
|
|
76
|
-
baseUrl: data.chat_base_url
|
|
89
|
+
baseUrl: data.chat_base_url
|
|
77
90
|
});
|
|
78
91
|
}
|
|
79
92
|
/**
|
package/esm/config-cache.js
CHANGED
|
@@ -51,7 +51,7 @@ const INFERENCE_LOG_MODULE_SQL = `
|
|
|
51
51
|
const billingCache = new ModuleConfigCache({
|
|
52
52
|
name: 'billing-config',
|
|
53
53
|
ttlMs: 5 * 60 * 1000, // 5 minutes
|
|
54
|
-
max: 50
|
|
54
|
+
max: 50
|
|
55
55
|
});
|
|
56
56
|
// ─── Resolution Functions ───────────────────────────────────────────────────
|
|
57
57
|
/**
|
|
@@ -72,7 +72,7 @@ async function resolveInferenceLogConfig(pgClient, databaseId) {
|
|
|
72
72
|
return null;
|
|
73
73
|
return {
|
|
74
74
|
schema: row.schema,
|
|
75
|
-
tableName: row.table_name
|
|
75
|
+
tableName: row.table_name
|
|
76
76
|
};
|
|
77
77
|
}
|
|
78
78
|
catch {
|
|
@@ -96,7 +96,7 @@ async function resolveBillingConfig(pgClient, databaseId) {
|
|
|
96
96
|
privateSchema: row.private_schema,
|
|
97
97
|
recordUsageFunction: row.record_usage_function,
|
|
98
98
|
// The check_billing_quota function name follows the inflection pattern
|
|
99
|
-
checkBillingQuotaFunction: 'check_billing_quota'
|
|
99
|
+
checkBillingQuotaFunction: 'check_billing_quota'
|
|
100
100
|
};
|
|
101
101
|
}
|
|
102
102
|
catch {
|
|
@@ -118,7 +118,7 @@ export async function getLlmBillingConfig(pgClient, databaseId) {
|
|
|
118
118
|
return cached;
|
|
119
119
|
const [billing, inferenceLog] = await Promise.all([
|
|
120
120
|
resolveBillingConfig(pgClient, databaseId),
|
|
121
|
-
resolveInferenceLogConfig(pgClient, databaseId)
|
|
121
|
+
resolveInferenceLogConfig(pgClient, databaseId)
|
|
122
122
|
]);
|
|
123
123
|
const entry = { billing, inferenceLog };
|
|
124
124
|
billingCache.set(databaseId, entry);
|
package/esm/embedder.js
CHANGED
|
@@ -14,6 +14,8 @@ import { getLlmEnvOptions } from './env';
|
|
|
14
14
|
// ─── Built-in Providers ─────────────────────────────────────────────────────
|
|
15
15
|
/**
|
|
16
16
|
* Create an Ollama-based embedder function.
|
|
17
|
+
*
|
|
18
|
+
* Uses the /api/embed endpoint which returns prompt_eval_count (real token count).
|
|
17
19
|
*/
|
|
18
20
|
function createOllamaEmbedder(baseUrl = 'http://localhost:11434', model = 'nomic-embed-text') {
|
|
19
21
|
const client = new OllamaClient(baseUrl);
|
|
@@ -47,7 +49,7 @@ export function buildEmbedderFromModule(data) {
|
|
|
47
49
|
return buildEmbedder({
|
|
48
50
|
provider: data.embedding_provider,
|
|
49
51
|
model: data.embedding_model,
|
|
50
|
-
baseUrl: data.embedding_base_url
|
|
52
|
+
baseUrl: data.embedding_base_url
|
|
51
53
|
});
|
|
52
54
|
}
|
|
53
55
|
/**
|
package/esm/env.js
CHANGED
|
@@ -18,13 +18,13 @@ const LLM_DEFAULTS = {
|
|
|
18
18
|
embedding: {
|
|
19
19
|
provider: 'ollama',
|
|
20
20
|
model: 'nomic-embed-text',
|
|
21
|
-
baseUrl: 'http://localhost:11434'
|
|
21
|
+
baseUrl: 'http://localhost:11434'
|
|
22
22
|
},
|
|
23
23
|
chat: {
|
|
24
24
|
provider: 'ollama',
|
|
25
25
|
model: 'llama3',
|
|
26
|
-
baseUrl: 'http://localhost:11434'
|
|
27
|
-
}
|
|
26
|
+
baseUrl: 'http://localhost:11434'
|
|
27
|
+
}
|
|
28
28
|
};
|
|
29
29
|
// ─── Resolution ─────────────────────────────────────────────────────────────
|
|
30
30
|
/**
|
|
@@ -38,12 +38,12 @@ export function getLlmEnvOptions() {
|
|
|
38
38
|
embedding: {
|
|
39
39
|
provider: process.env.EMBEDDER_PROVIDER ?? LLM_DEFAULTS.embedding.provider,
|
|
40
40
|
model: process.env.EMBEDDER_MODEL ?? LLM_DEFAULTS.embedding.model,
|
|
41
|
-
baseUrl: process.env.EMBEDDER_BASE_URL ?? LLM_DEFAULTS.embedding.baseUrl
|
|
41
|
+
baseUrl: process.env.EMBEDDER_BASE_URL ?? LLM_DEFAULTS.embedding.baseUrl
|
|
42
42
|
},
|
|
43
43
|
chat: {
|
|
44
44
|
provider: process.env.CHAT_PROVIDER ?? LLM_DEFAULTS.chat.provider,
|
|
45
45
|
model: process.env.CHAT_MODEL ?? LLM_DEFAULTS.chat.model,
|
|
46
|
-
baseUrl: process.env.CHAT_BASE_URL ?? LLM_DEFAULTS.chat.baseUrl
|
|
47
|
-
}
|
|
46
|
+
baseUrl: process.env.CHAT_BASE_URL ?? LLM_DEFAULTS.chat.baseUrl
|
|
47
|
+
}
|
|
48
48
|
};
|
|
49
49
|
}
|
package/esm/index.d.ts
CHANGED
|
@@ -29,20 +29,20 @@
|
|
|
29
29
|
* };
|
|
30
30
|
* ```
|
|
31
31
|
*/
|
|
32
|
-
export { getLlmEnvOptions } from './env';
|
|
33
32
|
export type { LlmEnvOptions, LlmProviderConfig } from './env';
|
|
33
|
+
export { getLlmEnvOptions } from './env';
|
|
34
34
|
export { GraphileLlmPreset } from './preset';
|
|
35
35
|
export { createLlmModulePlugin } from './plugins/llm-module-plugin';
|
|
36
|
-
export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
|
|
37
|
-
export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
|
|
38
36
|
export { createLlmRagPlugin } from './plugins/rag-plugin';
|
|
37
|
+
export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
|
|
38
|
+
export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
|
|
39
39
|
export { createLlmMeteringPlugin } from './plugins/metering-plugin';
|
|
40
|
-
export {
|
|
41
|
-
export
|
|
42
|
-
export { buildEmbedder,
|
|
43
|
-
export { buildChatCompleter,
|
|
44
|
-
export {
|
|
45
|
-
export
|
|
46
|
-
export {
|
|
47
|
-
export
|
|
48
|
-
export type {
|
|
40
|
+
export type { AgentDiscovery, AgentTableInfo } from './plugins/agent-discovery-plugin';
|
|
41
|
+
export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
|
|
42
|
+
export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
|
|
43
|
+
export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
|
|
44
|
+
export type { InferenceLogEntry, MeteringContext, MeteringOptions, MeterResult, WithPgClient } from './metering';
|
|
45
|
+
export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
|
|
46
|
+
export type { BillingConfig, InferenceLogConfig, LlmBillingCacheEntry, PgClient } from './config-cache';
|
|
47
|
+
export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';
|
|
48
|
+
export type { ChatConfig, ChatFunction, ChatMessage, ChatOptions, ChatResult, ChunkTableInfo, EmbedderConfig, EmbedderFunction, EmbeddingResult, GraphileLlmOptions, LlmModuleData, LlmUsage, MeteringConfig, RagDefaults } from './types';
|
package/esm/index.js
CHANGED
|
@@ -29,24 +29,20 @@
|
|
|
29
29
|
* };
|
|
30
30
|
* ```
|
|
31
31
|
*/
|
|
32
|
-
// Environment configuration (single source of truth for LLM defaults)
|
|
33
32
|
export { getLlmEnvOptions } from './env';
|
|
34
33
|
// Preset (recommended entry point)
|
|
35
34
|
export { GraphileLlmPreset } from './preset';
|
|
36
35
|
// Individual plugins (pure — no billing dependency)
|
|
37
36
|
export { createLlmModulePlugin } from './plugins/llm-module-plugin';
|
|
38
|
-
export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
|
|
39
|
-
export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
|
|
40
37
|
export { createLlmRagPlugin } from './plugins/rag-plugin';
|
|
38
|
+
export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
|
|
39
|
+
export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
|
|
41
40
|
// Metering plugin (opt-in billing integration)
|
|
42
41
|
export { createLlmMeteringPlugin } from './plugins/metering-plugin';
|
|
43
|
-
|
|
44
|
-
export { getAgentDiscovery, clearAgentDiscoveryCache } from './plugins/agent-discovery-plugin';
|
|
42
|
+
export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
|
|
45
43
|
// Embedder utilities
|
|
46
|
-
export { buildEmbedder,
|
|
44
|
+
export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
|
|
47
45
|
// Chat completion utilities
|
|
48
|
-
export { buildChatCompleter,
|
|
49
|
-
|
|
50
|
-
export {
|
|
51
|
-
// Config cache (for custom integration)
|
|
52
|
-
export { getLlmBillingConfig, invalidateLlmBillingConfig, getLlmBillingCacheStats, } from './config-cache';
|
|
46
|
+
export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
|
|
47
|
+
export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
|
|
48
|
+
export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';
|
package/esm/metering.d.ts
CHANGED
|
@@ -9,16 +9,16 @@
|
|
|
9
9
|
* When the quota check fails, the wrapper returns null (graceful degradation)
|
|
10
10
|
* instead of throwing, so the search pipeline can fall back to text-only.
|
|
11
11
|
*
|
|
12
|
-
* Token counts
|
|
13
|
-
*
|
|
14
|
-
*
|
|
12
|
+
* Token counts:
|
|
13
|
+
* - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
|
|
14
|
+
* - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
|
|
15
15
|
*
|
|
16
16
|
* The billing functions live in the tenant database and are called via the
|
|
17
17
|
* Graphile `withPgClient` callback. Function locations (schema, names) are
|
|
18
18
|
* resolved from `billing_module` metaschema and cached by `config-cache.ts`.
|
|
19
19
|
*/
|
|
20
|
-
import type {
|
|
21
|
-
import type {
|
|
20
|
+
import type { BillingConfig, InferenceLogConfig, PgClient } from './config-cache';
|
|
21
|
+
import type { ChatFunction, ChatMessage, ChatOptions, EmbedderFunction } from './types';
|
|
22
22
|
/**
|
|
23
23
|
* Callback matching Graphile's withPgClient signature.
|
|
24
24
|
* Acquires a pg client, calls the callback, then releases the client.
|