@revealui/ai 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/dist/ingestion/pipeline.d.ts +2 -1
- package/dist/ingestion/pipeline.d.ts.map +1 -1
- package/dist/ingestion/pipeline.js +10 -5
- package/dist/llm/client.d.ts +12 -2
- package/dist/llm/client.d.ts.map +1 -1
- package/dist/llm/client.js +46 -6
- package/dist/llm/providers/anthropic.d.ts.map +1 -1
- package/dist/llm/providers/anthropic.js +17 -3
- package/dist/llm/providers/base.d.ts +28 -1
- package/dist/llm/providers/base.d.ts.map +1 -1
- package/dist/llm/providers/bitnet.d.ts +28 -0
- package/dist/llm/providers/bitnet.d.ts.map +1 -0
- package/dist/llm/providers/bitnet.js +36 -0
- package/dist/llm/providers/inference-snaps.d.ts +44 -0
- package/dist/llm/providers/inference-snaps.d.ts.map +1 -0
- package/dist/llm/providers/inference-snaps.js +65 -0
- package/dist/llm/providers/openai.d.ts.map +1 -1
- package/dist/llm/providers/openai.js +2 -0
- package/dist/llm/semantic-cache.d.ts.map +1 -1
- package/dist/llm/semantic-cache.js +13 -5
- package/dist/llm/server.d.ts +2 -0
- package/dist/llm/server.d.ts.map +1 -1
- package/dist/llm/server.js +2 -0
- package/dist/memory/persistence/crdt-persistence.d.ts.map +1 -1
- package/dist/memory/persistence/crdt-persistence.js +2 -1
- package/dist/memory/preferences/user-preferences-manager.d.ts.map +1 -1
- package/dist/memory/preferences/user-preferences-manager.js +10 -9
- package/dist/memory/stores/episodic-memory.d.ts.map +1 -1
- package/dist/memory/stores/episodic-memory.js +2 -1
- package/dist/memory/utils/index.d.ts +0 -1
- package/dist/memory/utils/index.d.ts.map +1 -1
- package/dist/memory/utils/index.js +0 -1
- package/dist/memory/vector/vector-memory-service.d.ts +3 -0
- package/dist/memory/vector/vector-memory-service.d.ts.map +1 -1
- package/dist/memory/vector/vector-memory-service.js +18 -5
- package/dist/skills/loader/github-loader.d.ts.map +1 -1
- package/dist/skills/loader/github-loader.js +27 -16
- package/dist/skills/loader/vercel-loader.d.ts.map +1 -1
- package/dist/skills/loader/vercel-loader.js +26 -16
- package/package.json +16 -9
- package/dist/memory/utils/logger.d.ts +0 -21
- package/dist/memory/utils/logger.d.ts.map +0 -1
- package/dist/memory/utils/logger.js +0 -62
package/README.md
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# @revealui/ai
|
|
2
2
|
|
|
3
|
+
> **Commercial package** — requires a [RevealUI Pro license](https://revealui.com/pro). Free to install and evaluate; a license key is required for production use.
|
|
4
|
+
|
|
5
|
+
|
|
3
6
|
AI system for RevealUI - memory, LLM, orchestration, and tools.
|
|
4
7
|
|
|
5
8
|
## Features
|
|
@@ -24,9 +24,10 @@ export interface IngestResult {
|
|
|
24
24
|
}
|
|
25
25
|
export declare class IngestionPipeline {
|
|
26
26
|
private db;
|
|
27
|
+
private restDb;
|
|
27
28
|
private embeddingFn;
|
|
28
29
|
private splitter;
|
|
29
|
-
constructor(db: Database, embeddingFn: (text: string) => Promise<number[]>);
|
|
30
|
+
constructor(db: Database, restDb: Database, embeddingFn: (text: string) => Promise<number[]>);
|
|
30
31
|
ingest(req: IngestRequest): Promise<IngestResult>;
|
|
31
32
|
ingestBatch(docs: IngestRequest[]): Promise<IngestResult[]>;
|
|
32
33
|
deleteDocument(documentId: string): Promise<void>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/ingestion/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAA;
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/ingestion/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAA;AAOnD,MAAM,WAAW,aAAa;IAC5B,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,gBAAgB,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,CAAA;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,gBAAgB,CAAC,EAAE,MAAM,CAAA;IACzB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,YAAY,CAAC,EAAE,MAAM,CAAA;CACtB;AAED,MAAM,WAAW,YAAY;IAC3B,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,MAAM,EAAE,SAAS,GAAG,QAAQ,CAAA;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAcD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,EAAE,CAAU;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,WAAW,CAAqC;IACxD,OAAO,CAAC,QAAQ,CAA4B;gBAEhC,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC;IAOtF,MAAM,CAAC,GAAG,EAAE,aAAa,GAAG,OAAO,CAAC,YAAY,CAAC;IA6EjD,WAAW,CAAC,IAAI,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAa3D,cAAc,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAKjD,cAAc,CAClB,WAAW,EAAE,MAAM,EACnB,gBAAgB,EAAE,MAAM,EACxB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,IAAI,CAAC;CAiBjB"}
|
|
@@ -4,12 +4,13 @@
|
|
|
4
4
|
* Orchestrates: parse → split → embed → store.
|
|
5
5
|
* One document produces N chunks, each with a 768-dim embedding.
|
|
6
6
|
*/
|
|
7
|
+
import { safeVectorInsert } from '@revealui/db/validation';
|
|
7
8
|
import { ragChunks, ragDocuments } from '@revealui/db/schema/rag';
|
|
8
9
|
import { and, eq } from 'drizzle-orm';
|
|
9
10
|
import { createParser } from './file-parsers.js';
|
|
10
11
|
import { RecursiveCharacterSplitter } from './text-splitter.js';
|
|
11
12
|
function generateId(prefix) {
|
|
12
|
-
return `${prefix}-${
|
|
13
|
+
return `${prefix}-${crypto.randomUUID()}`;
|
|
13
14
|
}
|
|
14
15
|
function estimateWordCount(text) {
|
|
15
16
|
return text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
@@ -19,18 +20,22 @@ function estimateTokens(text) {
|
|
|
19
20
|
}
|
|
20
21
|
export class IngestionPipeline {
|
|
21
22
|
db;
|
|
23
|
+
restDb;
|
|
22
24
|
embeddingFn;
|
|
23
25
|
splitter;
|
|
24
|
-
constructor(db, embeddingFn) {
|
|
26
|
+
constructor(db, restDb, embeddingFn) {
|
|
25
27
|
this.db = db;
|
|
28
|
+
this.restDb = restDb;
|
|
26
29
|
this.embeddingFn = embeddingFn;
|
|
27
30
|
this.splitter = new RecursiveCharacterSplitter();
|
|
28
31
|
}
|
|
29
32
|
async ingest(req) {
|
|
30
33
|
const docId = generateId('rdoc');
|
|
31
34
|
const now = new Date();
|
|
32
|
-
// 1. Insert document row with status='processing'
|
|
33
|
-
|
|
35
|
+
// 1. Insert document row with status='processing', guarded by cross-DB ref check.
|
|
36
|
+
// safeVectorInsert validates that workspaceId (= site ID) exists in NeonDB before
|
|
37
|
+
// writing to the Supabase vector store, preventing orphaned RAG documents.
|
|
38
|
+
await safeVectorInsert(this.restDb, async () => this.db.insert(ragDocuments).values({
|
|
34
39
|
id: docId,
|
|
35
40
|
workspaceId: req.workspaceId,
|
|
36
41
|
sourceType: req.sourceType,
|
|
@@ -44,7 +49,7 @@ export class IngestionPipeline {
|
|
|
44
49
|
status: 'processing',
|
|
45
50
|
createdAt: now,
|
|
46
51
|
updatedAt: now,
|
|
47
|
-
});
|
|
52
|
+
}), { siteId: req.workspaceId });
|
|
48
53
|
try {
|
|
49
54
|
// 2. Parse
|
|
50
55
|
const parser = createParser(req.mimeType ?? 'text/plain');
|
package/dist/llm/client.d.ts
CHANGED
|
@@ -12,10 +12,10 @@ export declare function redactSensitiveFields(obj: Record<string, unknown>): Rec
|
|
|
12
12
|
import type { Database } from '@revealui/db/client';
|
|
13
13
|
import type { AuditStore } from '../audit/store.js';
|
|
14
14
|
import type { ProviderHealthMonitor } from './provider-health.js';
|
|
15
|
-
import type { Embedding, LLMChatOptions, LLMChunk, LLMEmbedOptions, LLMResponse, LLMStreamOptions, Message } from './providers/base.js';
|
|
15
|
+
import type { Embedding, LLMChatOptions, LLMChunk, LLMEmbedOptions, LLMProvider, LLMResponse, LLMStreamOptions, Message } from './providers/base.js';
|
|
16
16
|
import { type CacheStats, type ResponseCacheOptions } from './response-cache.js';
|
|
17
17
|
import { type SemanticCacheOptions, type SemanticCacheStats } from './semantic-cache.js';
|
|
18
|
-
export type LLMProviderType = 'openai' | 'anthropic' | 'vultr' | 'groq' | 'ollama' | 'huggingface';
|
|
18
|
+
export type LLMProviderType = 'openai' | 'anthropic' | 'vultr' | 'groq' | 'ollama' | 'bitnet' | 'huggingface' | 'inference-snaps';
|
|
19
19
|
export interface LLMClientConfig {
|
|
20
20
|
provider: LLMProviderType;
|
|
21
21
|
apiKey: string;
|
|
@@ -27,6 +27,15 @@ export interface LLMClientConfig {
|
|
|
27
27
|
apiKeyFn?: () => Promise<string>;
|
|
28
28
|
baseURL?: string;
|
|
29
29
|
model?: string;
|
|
30
|
+
/**
|
|
31
|
+
* Dedicated embedding provider. When set, all embed() calls are routed here
|
|
32
|
+
* instead of the primary provider. Required when the primary provider does not
|
|
33
|
+
* support embeddings (e.g. BitNet).
|
|
34
|
+
*
|
|
35
|
+
* Auto-wired by createLLMClientFromEnv() when BITNET_BASE_URL + OLLAMA_BASE_URL
|
|
36
|
+
* are both set.
|
|
37
|
+
*/
|
|
38
|
+
embedProvider?: LLMProvider;
|
|
30
39
|
temperature?: number;
|
|
31
40
|
maxTokens?: number;
|
|
32
41
|
fallbackProvider?: LLMProviderType;
|
|
@@ -50,6 +59,7 @@ export interface LLMClientConfig {
|
|
|
50
59
|
export declare class LLMClient {
|
|
51
60
|
private provider;
|
|
52
61
|
private fallbackProvider?;
|
|
62
|
+
private embedProviderOverride?;
|
|
53
63
|
private config;
|
|
54
64
|
private rateLimitState;
|
|
55
65
|
private responseCache?;
|
package/dist/llm/client.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/llm/client.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAoBH;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAY3F;AAID,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAA;AAInD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAA;AACnD,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAA;
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/llm/client.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAoBH;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAY3F;AAID,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAA;AAInD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAA;AACnD,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAA;AAGjE,OAAO,KAAK,EACV,SAAS,EACT,cAAc,EACd,QAAQ,EACR,eAAe,EACf,WAAW,EACX,WAAW,EACX,gBAAgB,EAChB,OAAO,EACR,MAAM,qBAAqB,CAAA;AAS5B,OAAO,EAAE,KAAK,UAAU,EAAiB,KAAK,oBAAoB,EAAE,MAAM,qBAAqB,CAAA;AAC/F,OAAO,EAEL,KAAK,oBAAoB,EACzB,KAAK,kBAAkB,EACxB,MAAM,qBAAqB,CAAA;AAG5B,MAAM,MAAM,eAAe,GACvB,QAAQ,GACR,WAAW,GACX,OAAO,GACP,MAAM,GACN,QAAQ,GACR,QAAQ,GACR,aAAa,GACb,iBAAiB,CAAA;AAErB,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,eAAe,CAAA;IACzB,MAAM,EAAE,MAAM,CAAA;IACd;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,OAAO,CAAC,MAAM,CAAC,CAAA;IAChC,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd;;;;;;;OAOG;IACH,aAAa,CAAC,EAAE,WAAW,CAAA;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,gBAAgB,CAAC,EAAE,eAAe,CAAA;IAClC,SAAS,CAAC,EAAE;QACV,iBAAiB,CAAC,EAAE,MAAM,CAAA;QAC1B,cAAc,CAAC,EAAE,MAAM,CAAA;KACxB,CAAA;IACD,oFAAoF;IACpF,oBAAoB,CAAC,EAAE,OAAO,CAAA;IAC9B,wEAAwE;IACxE,mBAAmB,CAAC,EAAE,OAAO,CAAA;IAC7B,6BAA6B;IAC7B,oBAAoB,CAAC,EAAE,oBAAoB,CAAA;IAC3C,iEAAiE;IACjE,mBAAmB,CAAC,EAAE,OAAO,CAAA;IAC7B,6BAA6B;IAC7B,oBAAoB,CAAC,EAAE,oBAAoB,CAAA;IAC3C,0EAA0E;IAC1E,aAAa,CAAC,EAAE,qBAAqB,CAAA;CACtC;AAQD,qBAAa,SAAS;IACpB,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,gBAAgB,CAAC,CAAa;IACtC,OAAO,CAAC,qBAAqB,CAAC,CAAa;IAC3C,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,cAAc,CAAgB;IACtC,OAAO,CAAC,aAAa,CAAC,CAAe;IACrC,OAAO,CAAC,aAAa,CAAC,CAAe;IACrC,OAAO,CAAC,aAAa,CAAC,CAAuB;IAC7C,wFAAwF;IACxF,OAAO,CAAC,aAAa,CAAQ;gBAEjB,MAAM,EAAE,eAAe;IA8CnC,OAAO,CAAC,cAAc;IAkCtB;;;OAGG;YACW,uBAAuB;IAmBrC,OAAO,CAAC,cAAc;IAoCtB,OAAO,CAAC,aAAa;IAMf,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,WAAW,CAAC;IAmHzE,KAAK,CACT,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,EACvB,OAAO,CAAC,EAAE,eAAe,GACxB,OAAO,CAAC,SAAS,GAAG,SAAS,EAAE,CAAC;IA2B5B,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,aAAa,CAAC,QAAQ,CAAC;IA0BvF;;;OAGG;IACH,eAAe,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,gBAAgB,EAAE,MAAM,CAAA;KAAE;IAIlF;;OAEG;IACH,gBAAgB,IAAI,qBAAqB,GAAG,SAAS;IAIrD;;;;OAIG;IACH,qBAAqB,IAAI,UAAU,GAAG,SAAS;IAI/C;;OAEG;IACH,kBAAkB,IAAI,IAAI;IAI1B;;;;OAIG;IACH,qBAAqB,IAAI,kBAAkB,GAAG,SAAS;IAIvD;;OAEG;IACH,kBAAkB,IAAI,IAAI;CAG3B;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,sBAAsB,IAAI,SAAS,CAgGlD;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,sBAAsB,CAC1C,MAAM,EAAE,MAAM,EACd,EAAE,EAAE,QAAQ,EACZ,UAAU,CAAC,EAAE,UAAU,GACtB,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,CAiD3B"}
|
package/dist/llm/client.js
CHANGED
|
@@ -43,7 +43,9 @@ import { decryptApiKey } from '@revealui/db/crypto';
|
|
|
43
43
|
import { tenantProviderConfigs, userApiKeys } from '@revealui/db/schema';
|
|
44
44
|
import { and, eq } from 'drizzle-orm';
|
|
45
45
|
import { AnthropicProvider } from './providers/anthropic.js';
|
|
46
|
+
import { BitnetProvider } from './providers/bitnet.js';
|
|
46
47
|
import { GroqProvider } from './providers/groq.js';
|
|
48
|
+
import { InferenceSnapsProvider, } from './providers/inference-snaps.js';
|
|
47
49
|
import { OllamaProvider } from './providers/ollama.js';
|
|
48
50
|
import { OpenAIProvider } from './providers/openai.js';
|
|
49
51
|
import { VultrProvider } from './providers/vultr.js';
|
|
@@ -53,6 +55,7 @@ import { estimateRequest as _estimateRequestTokens } from './token-counter.js';
|
|
|
53
55
|
export class LLMClient {
|
|
54
56
|
provider;
|
|
55
57
|
fallbackProvider;
|
|
58
|
+
embedProviderOverride;
|
|
56
59
|
config;
|
|
57
60
|
rateLimitState;
|
|
58
61
|
responseCache;
|
|
@@ -78,6 +81,8 @@ export class LLMClient {
|
|
|
78
81
|
}
|
|
79
82
|
// Wire health monitor if provided
|
|
80
83
|
this.healthMonitor = config.healthMonitor;
|
|
84
|
+
// Wire dedicated embed provider if supplied (e.g. Ollama when BitNet is primary)
|
|
85
|
+
this.embedProviderOverride = config.embedProvider;
|
|
81
86
|
// Create primary provider
|
|
82
87
|
this.provider = this.createProvider(config.provider, {
|
|
83
88
|
apiKey: config.apiKey,
|
|
@@ -112,6 +117,10 @@ export class LLMClient {
|
|
|
112
117
|
return new GroqProvider(config);
|
|
113
118
|
case 'ollama':
|
|
114
119
|
return new OllamaProvider(config);
|
|
120
|
+
case 'bitnet':
|
|
121
|
+
return new BitnetProvider(config);
|
|
122
|
+
case 'inference-snaps':
|
|
123
|
+
return new InferenceSnapsProvider(config);
|
|
115
124
|
default:
|
|
116
125
|
throw new Error(`Unknown provider type: ${String(type)}`);
|
|
117
126
|
}
|
|
@@ -273,13 +282,15 @@ export class LLMClient {
|
|
|
273
282
|
if (!this.checkRateLimit()) {
|
|
274
283
|
throw new Error('Rate limit exceeded');
|
|
275
284
|
}
|
|
285
|
+
// Use dedicated embed provider if one was configured (e.g. Ollama when BitNet is primary)
|
|
286
|
+
const embedProvider = this.embedProviderOverride ?? this.provider;
|
|
276
287
|
try {
|
|
277
288
|
this.recordRequest();
|
|
278
|
-
return await
|
|
289
|
+
return await embedProvider.embed(text, options);
|
|
279
290
|
}
|
|
280
291
|
catch (error) {
|
|
281
|
-
// Try fallback if available
|
|
282
|
-
if (this.fallbackProvider) {
|
|
292
|
+
// Try fallback if available (only when using the primary provider path)
|
|
293
|
+
if (!this.embedProviderOverride && this.fallbackProvider) {
|
|
283
294
|
try {
|
|
284
295
|
return await this.fallbackProvider.embed(text, options);
|
|
285
296
|
}
|
|
@@ -377,6 +388,12 @@ export function createLLMClientFromEnv() {
|
|
|
377
388
|
if (process.env.LLM_PROVIDER) {
|
|
378
389
|
provider = process.env.LLM_PROVIDER;
|
|
379
390
|
}
|
|
391
|
+
else if (process.env.INFERENCE_SNAPS_BASE_URL) {
|
|
392
|
+
provider = 'inference-snaps';
|
|
393
|
+
}
|
|
394
|
+
else if (process.env.BITNET_BASE_URL) {
|
|
395
|
+
provider = 'bitnet';
|
|
396
|
+
}
|
|
380
397
|
else if (process.env.GROQ_API_KEY) {
|
|
381
398
|
provider = 'groq';
|
|
382
399
|
}
|
|
@@ -389,8 +406,9 @@ export function createLLMClientFromEnv() {
|
|
|
389
406
|
else {
|
|
390
407
|
// No provider configured — throw a clear error. OpenAI is intentionally excluded from
|
|
391
408
|
// auto-detection (no revenue yet). Set LLM_PROVIDER=openai explicitly if needed.
|
|
392
|
-
throw new Error('No LLM provider configured. Set one of:
|
|
393
|
-
'
|
|
409
|
+
throw new Error('No LLM provider configured. Set one of: BITNET_BASE_URL (local BitNet), ' +
|
|
410
|
+
'INFERENCE_SNAPS_BASE_URL (local snap), GROQ_API_KEY (recommended cloud), ' +
|
|
411
|
+
'OLLAMA_BASE_URL (local Ollama), or ANTHROPIC_API_KEY. ' +
|
|
394
412
|
'Alternatively, set LLM_PROVIDER explicitly.');
|
|
395
413
|
}
|
|
396
414
|
let apiKey;
|
|
@@ -422,9 +440,30 @@ export function createLLMClientFromEnv() {
|
|
|
422
440
|
baseURL = process.env.OLLAMA_BASE_URL;
|
|
423
441
|
defaultModel = 'llama3.2:3b';
|
|
424
442
|
}
|
|
443
|
+
else if (provider === 'bitnet') {
|
|
444
|
+
apiKey = 'bitnet'; // llama-server ignores the API key
|
|
445
|
+
baseURL = process.env.BITNET_BASE_URL;
|
|
446
|
+
defaultModel = 'bitnet-b1.58-2B-4T';
|
|
447
|
+
}
|
|
448
|
+
else if (provider === 'inference-snaps') {
|
|
449
|
+
apiKey = 'inference-snaps'; // inference-snaps ignores the API key
|
|
450
|
+
baseURL = process.env.INFERENCE_SNAPS_BASE_URL;
|
|
451
|
+
defaultModel = 'gemma3';
|
|
452
|
+
}
|
|
425
453
|
if (!apiKey) {
|
|
426
454
|
throw new Error(`API key not found for provider "${provider}". Set the corresponding env var ` +
|
|
427
|
-
`(GROQ_API_KEY, OLLAMA_BASE_URL, ANTHROPIC_API_KEY, or OPENAI_API_KEY).`);
|
|
455
|
+
`(INFERENCE_SNAPS_BASE_URL, GROQ_API_KEY, OLLAMA_BASE_URL, ANTHROPIC_API_KEY, or OPENAI_API_KEY).`);
|
|
456
|
+
}
|
|
457
|
+
// When BitNet is the chat provider, auto-wire Ollama as the embed backend.
|
|
458
|
+
// BitNet does not support /v1/embeddings; Ollama (nomic-embed-text) fills that role.
|
|
459
|
+
// If OLLAMA_BASE_URL is not set, embed() will throw with a helpful message.
|
|
460
|
+
let embedProvider;
|
|
461
|
+
if (provider === 'bitnet' && process.env.OLLAMA_BASE_URL) {
|
|
462
|
+
embedProvider = new OllamaProvider({
|
|
463
|
+
apiKey: 'ollama',
|
|
464
|
+
baseURL: process.env.OLLAMA_BASE_URL,
|
|
465
|
+
embedModel: process.env.OLLAMA_EMBED_MODEL ?? 'nomic-embed-text',
|
|
466
|
+
});
|
|
428
467
|
}
|
|
429
468
|
return new LLMClient({
|
|
430
469
|
provider,
|
|
@@ -438,6 +477,7 @@ export function createLLMClientFromEnv() {
|
|
|
438
477
|
process.env.RESPONSE_CACHE_ENABLED === 'true',
|
|
439
478
|
enableSemanticCache: process.env.LLM_ENABLE_SEMANTIC_CACHE === 'true' ||
|
|
440
479
|
process.env.SEMANTIC_CACHE_ENABLED === 'true',
|
|
480
|
+
embedProvider,
|
|
441
481
|
});
|
|
442
482
|
}
|
|
443
483
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"anthropic.d.ts","sourceRoot":"","sources":["../../../src/llm/providers/anthropic.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"anthropic.d.ts","sourceRoot":"","sources":["../../../src/llm/providers/anthropic.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAEV,SAAS,EACT,cAAc,EACd,QAAQ,EACR,eAAe,EACf,WAAW,EACX,iBAAiB,EACjB,WAAW,EACX,gBAAgB,EAChB,OAAO,EAER,MAAM,WAAW,CAAA;AAelB,MAAM,WAAW,uBAAwB,SAAQ,iBAAiB;IAChE,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,oFAAoF;IACpF,oBAAoB,CAAC,EAAE,OAAO,CAAA;CAC/B;AAqDD,qBAAa,iBAAkB,YAAW,WAAW;IACnD,OAAO,CAAC,MAAM,CAAyB;IACvC,OAAO,CAAC,OAAO,CAAQ;gBAEX,MAAM,EAAE,uBAAuB;IAKrC,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,WAAW,CAAC;IA6F/E,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,SAAS,GAAG,SAAS,EAAE,CAAC;IAUpF,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,aAAa,CAAC,QAAQ,CAAC;IAoGvF;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IA0B5B;;;OAGG;IACH,OAAO,CAAC,WAAW;IAmBnB,OAAO,CAAC,cAAc;CAkBvB"}
|
|
@@ -3,6 +3,19 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Implementation of LLMProvider for Anthropic Claude API
|
|
5
5
|
*/
|
|
6
|
+
/**
|
|
7
|
+
* Extract plain text from a message content value.
|
|
8
|
+
* Anthropic has its own image format — for now, image parts are silently skipped
|
|
9
|
+
* and only text parts are forwarded. Vision via Anthropic is out of scope.
|
|
10
|
+
*/
|
|
11
|
+
function toTextContent(content) {
|
|
12
|
+
if (typeof content === 'string')
|
|
13
|
+
return content;
|
|
14
|
+
return content
|
|
15
|
+
.filter((p) => p.type === 'text')
|
|
16
|
+
.map((p) => p.text)
|
|
17
|
+
.join('\n');
|
|
18
|
+
}
|
|
6
19
|
const isRecord = (value) => typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
7
20
|
const isTextBlock = (block) => block.type === 'text' && typeof block.text === 'string';
|
|
8
21
|
const isToolUseBlock = (block) => block.type === 'tool_use';
|
|
@@ -198,12 +211,12 @@ export class AnthropicProvider {
|
|
|
198
211
|
}
|
|
199
212
|
// If caching disabled, use simple string format
|
|
200
213
|
if (!enableCache) {
|
|
201
|
-
return systemMessages.map((m) => m.content).join('\n');
|
|
214
|
+
return systemMessages.map((m) => toTextContent(m.content)).join('\n');
|
|
202
215
|
}
|
|
203
216
|
// With caching, use structured format and cache the last block
|
|
204
217
|
return systemMessages.map((msg, index) => ({
|
|
205
218
|
type: 'text',
|
|
206
|
-
text: msg.content,
|
|
219
|
+
text: toTextContent(msg.content),
|
|
207
220
|
// Cache the last system message (most likely to be reused)
|
|
208
221
|
...(index === systemMessages.length - 1 && msg.cacheControl
|
|
209
222
|
? { cache_control: msg.cacheControl }
|
|
@@ -239,7 +252,8 @@ export class AnthropicProvider {
|
|
|
239
252
|
}
|
|
240
253
|
const formatted = {
|
|
241
254
|
role: msg.role === 'assistant' ? 'assistant' : 'user',
|
|
242
|
-
|
|
255
|
+
// Anthropic uses a different image format; extract text only for now.
|
|
256
|
+
content: toTextContent(msg.content),
|
|
243
257
|
};
|
|
244
258
|
return formatted;
|
|
245
259
|
})
|
|
@@ -3,9 +3,36 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Abstract interface for all LLM providers (OpenAI, Anthropic, etc.)
|
|
5
5
|
*/
|
|
6
|
+
/**
|
|
7
|
+
* A plain text content part — used in multipart messages.
|
|
8
|
+
*/
|
|
9
|
+
export interface TextPart {
|
|
10
|
+
type: 'text';
|
|
11
|
+
text: string;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* An image content part — base64 data URL or HTTPS URL.
|
|
15
|
+
* Supported by OpenAI-compatible providers (inference-snaps, Ollama vision, GPT-4o).
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* { type: 'image_url', image_url: { url: 'data:image/jpeg;base64,...' } }
|
|
19
|
+
*/
|
|
20
|
+
export interface ImagePart {
|
|
21
|
+
type: 'image_url';
|
|
22
|
+
image_url: {
|
|
23
|
+
/** Base64 data URL (data:image/jpeg;base64,...) or HTTPS image URL */
|
|
24
|
+
url: string;
|
|
25
|
+
/** Resolution hint for the model. Defaults to 'auto'. */
|
|
26
|
+
detail?: 'low' | 'high' | 'auto';
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
/** Union of all content part types for multipart messages. */
|
|
30
|
+
export type ContentPart = TextPart | ImagePart;
|
|
6
31
|
export interface Message {
|
|
7
32
|
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
8
|
-
content
|
|
33
|
+
/** Plain text or multipart content (text + images). Arrays are passed through
|
|
34
|
+
* to OpenAI-compatible providers as-is; other providers receive text parts only. */
|
|
35
|
+
content: string | ContentPart[];
|
|
9
36
|
name?: string;
|
|
10
37
|
toolCalls?: ToolCall[];
|
|
11
38
|
toolCallId?: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../../src/llm/providers/base.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,GAAG,MAAM,CAAA;IAC9C,OAAO,EAAE,MAAM,CAAA;
|
|
1
|
+
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../../src/llm/providers/base.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,IAAI,EAAE,MAAM,CAAA;CACb;AAED;;;;;;GAMG;AACH,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,WAAW,CAAA;IACjB,SAAS,EAAE;QACT,sEAAsE;QACtE,GAAG,EAAE,MAAM,CAAA;QACX,yDAAyD;QACzD,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,GAAG,MAAM,CAAA;KACjC,CAAA;CACF;AAED,8DAA8D;AAC9D,MAAM,MAAM,WAAW,GAAG,QAAQ,GAAG,SAAS,CAAA;AAE9C,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,GAAG,MAAM,CAAA;IAC9C;yFACqF;IACrF,OAAO,EAAE,MAAM,GAAG,WAAW,EAAE,CAAA;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAA;IACtB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,0FAA0F;IAC1F,YAAY,CAAC,EAAE;QAAE,IAAI,EAAE,WAAW,CAAA;KAAE,CAAA;CACrC;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAA;IACV,IAAI,EAAE,UAAU,CAAA;IAChB,QAAQ,EAAE;QACR,IAAI,EAAE,MAAM,CAAA;QACZ,SAAS,EAAE,MAAM,CAAA;KAClB,CAAA;CACF;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,WAAW,CAAA;IACjB,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAA;IACtB,YAAY,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,YAAY,GAAG,gBAAgB,CAAA;IAClE,KAAK,CAAC,EAAE;QACN,YAAY,EAAE,MAAM,CAAA;QACpB,gBAAgB,EAAE,MAAM,CAAA;QACxB,WAAW,EAAE,MAAM,CAAA;QACnB,4BAA4B;QAC5B,mBAAmB,CAAC,EAAE,MAAM,CAAA;QAC5B,eAAe,CAAC,EAAE,MAAM,CAAA;KACzB,CAAA;CACF;AAED,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,EAAE,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,OAAO,CAAA;IACb,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAA;CACvB;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B;;OAEG;IACH,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,WAAW,CAAC,CAAA;IAEzE;;OAEG;IACH,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,SAAS,GAAG,SAAS,EAAE,CAAC,CAAA;IAE3F;;OAEG;IACH,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAA;CACjF;AAED,MAAM,WAAW,cAAc;IAC7B,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,KAAK,CAAC,EAAE,cAAc,EAAE,CAAA;IACxB,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,QAAQ,EAAE;YAAE,IAAI,EAAE,MAAM,CAAA;SAAE,CAAA;KAAE,CAAA;IAC/E,+EAA+E;IAC/E,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,gBAAgB;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,KAAK,CAAC,EAAE,cAAc,EAAE,CAAA;IACxB,+EAA+E;IAC/E,WAAW,CAAC,EAAE,OAAO,CAAA;CACtB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,UAAU,CAAA;IAChB,QAAQ,EAAE;QACR,IAAI,EAAE,MAAM,CAAA;QACZ,WAAW,EAAE,MAAM,CAAA;QACnB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;KACpC,CAAA;CACF;AAED,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,QAAQ,GAAG,YAAY,GAAG,gBAAgB,CAAA"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BitNet Provider
|
|
3
|
+
*
|
|
4
|
+
* Local inference via BitNet's OpenAI-compatible llama-server (http://localhost:8080/v1).
|
|
5
|
+
* No API key required. Runs entirely on CPU (AVX2). Zero cost, fully offline.
|
|
6
|
+
*
|
|
7
|
+
* Setup: pnpm bitnet:install (clone + compile + download model)
|
|
8
|
+
* Start: pnpm bitnet:serve (start inference server on :8080)
|
|
9
|
+
*
|
|
10
|
+
* Note: BitNet is a generative model only. It does not expose /v1/embeddings.
|
|
11
|
+
* For vector search, use Ollama (nomic-embed-text) or @xenova/transformers.
|
|
12
|
+
*/
|
|
13
|
+
import type { Embedding, LLMChatOptions, LLMChunk, LLMEmbedOptions, LLMProvider, LLMProviderConfig, LLMResponse, LLMStreamOptions, Message } from './base.js';
|
|
14
|
+
export interface BitnetProviderConfig extends Omit<LLMProviderConfig, 'apiKey'> {
|
|
15
|
+
apiKey?: string;
|
|
16
|
+
/** Defaults to http://localhost:8080/v1 */
|
|
17
|
+
baseURL?: string;
|
|
18
|
+
/** Chat model. Defaults to bitnet-b1.58-2B-4T — installed by pnpm bitnet:install */
|
|
19
|
+
model?: string;
|
|
20
|
+
}
|
|
21
|
+
export declare class BitnetProvider implements LLMProvider {
|
|
22
|
+
private inner;
|
|
23
|
+
constructor(config: BitnetProviderConfig);
|
|
24
|
+
chat(messages: Message[], options?: LLMChatOptions): Promise<LLMResponse>;
|
|
25
|
+
stream(messages: Message[], options?: LLMStreamOptions): AsyncIterable<LLMChunk>;
|
|
26
|
+
embed(_text: string | string[], _options?: LLMEmbedOptions): Promise<Embedding | Embedding[]>;
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=bitnet.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bitnet.d.ts","sourceRoot":"","sources":["../../../src/llm/providers/bitnet.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EACV,SAAS,EACT,cAAc,EACd,QAAQ,EACR,eAAe,EACf,WAAW,EACX,iBAAiB,EACjB,WAAW,EACX,gBAAgB,EAChB,OAAO,EACR,MAAM,WAAW,CAAA;AAGlB,MAAM,WAAW,oBAAqB,SAAQ,IAAI,CAAC,iBAAiB,EAAE,QAAQ,CAAC;IAC7E,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,2CAA2C;IAC3C,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,oFAAoF;IACpF,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED,qBAAa,cAAe,YAAW,WAAW;IAChD,OAAO,CAAC,KAAK,CAAgB;gBAEjB,MAAM,EAAE,oBAAoB;IAUxC,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,WAAW,CAAC;IAIzE,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,aAAa,CAAC,QAAQ,CAAC;IAIhF,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,EAAE,QAAQ,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,SAAS,GAAG,SAAS,EAAE,CAAC;CAO9F"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BitNet Provider
|
|
3
|
+
*
|
|
4
|
+
* Local inference via BitNet's OpenAI-compatible llama-server (http://localhost:8080/v1).
|
|
5
|
+
* No API key required. Runs entirely on CPU (AVX2). Zero cost, fully offline.
|
|
6
|
+
*
|
|
7
|
+
* Setup: pnpm bitnet:install (clone + compile + download model)
|
|
8
|
+
* Start: pnpm bitnet:serve (start inference server on :8080)
|
|
9
|
+
*
|
|
10
|
+
* Note: BitNet is a generative model only. It does not expose /v1/embeddings.
|
|
11
|
+
* For vector search, use Ollama (nomic-embed-text) or @xenova/transformers.
|
|
12
|
+
*/
|
|
13
|
+
import { OpenAIProvider } from './openai.js';
|
|
14
|
+
export class BitnetProvider {
|
|
15
|
+
inner;
|
|
16
|
+
constructor(config) {
|
|
17
|
+
this.inner = new OpenAIProvider({
|
|
18
|
+
...config,
|
|
19
|
+
// llama-server ignores the API key but the OpenAI client requires a non-empty value
|
|
20
|
+
apiKey: config.apiKey ?? 'bitnet',
|
|
21
|
+
baseURL: config.baseURL ?? 'http://localhost:8080/v1',
|
|
22
|
+
model: config.model ?? 'bitnet-b1.58-2B-4T',
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
chat(messages, options) {
|
|
26
|
+
return this.inner.chat(messages, options);
|
|
27
|
+
}
|
|
28
|
+
stream(messages, options) {
|
|
29
|
+
return this.inner.stream(messages, options);
|
|
30
|
+
}
|
|
31
|
+
embed(_text, _options) {
|
|
32
|
+
throw new Error('BitNet does not support embeddings. Set OLLAMA_BASE_URL to auto-wire Ollama ' +
|
|
33
|
+
'(nomic-embed-text) as the embed backend, or use @xenova/transformers for ' +
|
|
34
|
+
'fully offline embedding generation.');
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical Inference Snaps Provider
|
|
3
|
+
*
|
|
4
|
+
* Local inference via Canonical's inference-snaps OpenAI-compatible API.
|
|
5
|
+
* No API key required. Zero cost, fully offline, hardware-optimized.
|
|
6
|
+
*
|
|
7
|
+
* Supported models (snaps):
|
|
8
|
+
* gemma3 — general LLM + vision (text/image in, text out)
|
|
9
|
+
* deepseek-r1 — reasoning LLM
|
|
10
|
+
* qwen-vl — vision-language model (image + text)
|
|
11
|
+
* nemotron-nano — general LLM (reasoning + non-reasoning)
|
|
12
|
+
*
|
|
13
|
+
* Install a model:
|
|
14
|
+
* sudo snap install gemma3
|
|
15
|
+
* gemma3 set http.port=9090 # optional: change port (default varies)
|
|
16
|
+
* gemma3 status # shows base URL and available models
|
|
17
|
+
*
|
|
18
|
+
* Set env vars:
|
|
19
|
+
* INFERENCE_SNAPS_BASE_URL=http://localhost:9090/v1
|
|
20
|
+
* LLM_MODEL=gemma3 # must match the snap name / model ID
|
|
21
|
+
* LLM_EMBED_MODEL=gemma3 # optional: model for embeddings
|
|
22
|
+
*
|
|
23
|
+
* Docs: https://documentation.ubuntu.com/inference-snaps
|
|
24
|
+
*/
|
|
25
|
+
import type { Embedding, LLMChatOptions, LLMChunk, LLMEmbedOptions, LLMProvider, LLMProviderConfig, LLMResponse, LLMStreamOptions, Message } from './base.js';
|
|
26
|
+
export interface InferenceSnapsProviderConfig extends Omit<LLMProviderConfig, 'apiKey'> {
|
|
27
|
+
apiKey?: string;
|
|
28
|
+
/** Base URL of the inference-snaps service, e.g. http://localhost:9090/v1 */
|
|
29
|
+
baseURL: string;
|
|
30
|
+
/** Chat/vision model name — must match the snap's model ID (e.g. 'gemma3', 'deepseek-r1') */
|
|
31
|
+
model?: string;
|
|
32
|
+
/** Embedding model name. Defaults to the chat model when omitted. */
|
|
33
|
+
embedModel?: string;
|
|
34
|
+
}
|
|
35
|
+
export declare class InferenceSnapsProvider implements LLMProvider {
|
|
36
|
+
private inner;
|
|
37
|
+
private embedModel;
|
|
38
|
+
private baseURL;
|
|
39
|
+
constructor(config: InferenceSnapsProviderConfig);
|
|
40
|
+
chat(messages: Message[], options?: LLMChatOptions): Promise<LLMResponse>;
|
|
41
|
+
stream(messages: Message[], options?: LLMStreamOptions): AsyncIterable<LLMChunk>;
|
|
42
|
+
embed(text: string | string[], _options?: LLMEmbedOptions): Promise<Embedding | Embedding[]>;
|
|
43
|
+
}
|
|
44
|
+
//# sourceMappingURL=inference-snaps.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"inference-snaps.d.ts","sourceRoot":"","sources":["../../../src/llm/providers/inference-snaps.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EACV,SAAS,EACT,cAAc,EACd,QAAQ,EACR,eAAe,EACf,WAAW,EACX,iBAAiB,EACjB,WAAW,EACX,gBAAgB,EAChB,OAAO,EACR,MAAM,WAAW,CAAA;AAGlB,MAAM,WAAW,4BAA6B,SAAQ,IAAI,CAAC,iBAAiB,EAAE,QAAQ,CAAC;IACrF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,6EAA6E;IAC7E,OAAO,EAAE,MAAM,CAAA;IACf,6FAA6F;IAC7F,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,qEAAqE;IACrE,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,qBAAa,sBAAuB,YAAW,WAAW;IACxD,OAAO,CAAC,KAAK,CAAgB;IAC7B,OAAO,CAAC,UAAU,CAAQ;IAC1B,OAAO,CAAC,OAAO,CAAQ;gBAEX,MAAM,EAAE,4BAA4B;IAahD,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,WAAW,CAAC;IAIzE,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,aAAa,CAAC,QAAQ,CAAC;IAI1E,KAAK,CACT,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,EACvB,QAAQ,CAAC,EAAE,eAAe,GACzB,OAAO,CAAC,SAAS,GAAG,SAAS,EAAE,CAAC;CAsBpC"}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical Inference Snaps Provider
|
|
3
|
+
*
|
|
4
|
+
* Local inference via Canonical's inference-snaps OpenAI-compatible API.
|
|
5
|
+
* No API key required. Zero cost, fully offline, hardware-optimized.
|
|
6
|
+
*
|
|
7
|
+
* Supported models (snaps):
|
|
8
|
+
* gemma3 — general LLM + vision (text/image in, text out)
|
|
9
|
+
* deepseek-r1 — reasoning LLM
|
|
10
|
+
* qwen-vl — vision-language model (image + text)
|
|
11
|
+
* nemotron-nano — general LLM (reasoning + non-reasoning)
|
|
12
|
+
*
|
|
13
|
+
* Install a model:
|
|
14
|
+
* sudo snap install gemma3
|
|
15
|
+
* gemma3 set http.port=9090 # optional: change port (default varies)
|
|
16
|
+
* gemma3 status # shows base URL and available models
|
|
17
|
+
*
|
|
18
|
+
* Set env vars:
|
|
19
|
+
* INFERENCE_SNAPS_BASE_URL=http://localhost:9090/v1
|
|
20
|
+
* LLM_MODEL=gemma3 # must match the snap name / model ID
|
|
21
|
+
* LLM_EMBED_MODEL=gemma3 # optional: model for embeddings
|
|
22
|
+
*
|
|
23
|
+
* Docs: https://documentation.ubuntu.com/inference-snaps
|
|
24
|
+
*/
|
|
25
|
+
import { OpenAIProvider } from './openai.js';
|
|
26
|
+
export class InferenceSnapsProvider {
|
|
27
|
+
inner;
|
|
28
|
+
embedModel;
|
|
29
|
+
baseURL;
|
|
30
|
+
constructor(config) {
|
|
31
|
+
this.baseURL = config.baseURL;
|
|
32
|
+
// Use the same model for embeddings unless explicitly overridden
|
|
33
|
+
this.embedModel = config.embedModel ?? config.model ?? 'gemma3';
|
|
34
|
+
this.inner = new OpenAIProvider({
|
|
35
|
+
...config,
|
|
36
|
+
// inference-snaps ignores the API key; OpenAI client requires a non-empty value
|
|
37
|
+
apiKey: config.apiKey ?? 'inference-snaps',
|
|
38
|
+
baseURL: config.baseURL,
|
|
39
|
+
model: config.model ?? 'gemma3',
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
chat(messages, options) {
|
|
43
|
+
return this.inner.chat(messages, options);
|
|
44
|
+
}
|
|
45
|
+
stream(messages, options) {
|
|
46
|
+
return this.inner.stream(messages, options);
|
|
47
|
+
}
|
|
48
|
+
async embed(text, _options) {
|
|
49
|
+
const texts = Array.isArray(text) ? text : [text];
|
|
50
|
+
const response = await fetch(`${this.baseURL}/embeddings`, {
|
|
51
|
+
method: 'POST',
|
|
52
|
+
headers: { 'Content-Type': 'application/json' },
|
|
53
|
+
body: JSON.stringify({ model: this.embedModel, input: texts }),
|
|
54
|
+
});
|
|
55
|
+
if (!response.ok) {
|
|
56
|
+
throw new Error(`inference-snaps embeddings error: ${response.statusText}`);
|
|
57
|
+
}
|
|
58
|
+
const data = (await response.json());
|
|
59
|
+
const embeddings = (data.data ?? []).map((item) => {
|
|
60
|
+
const vector = item.embedding ?? [];
|
|
61
|
+
return { vector, dimension: vector.length, model: this.embedModel };
|
|
62
|
+
});
|
|
63
|
+
return Array.isArray(text) ? embeddings : embeddings[0];
|
|
64
|
+
}
|
|
65
|
+
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"openai.d.ts","sourceRoot":"","sources":["../../../src/llm/providers/openai.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,SAAS,EAET,cAAc,EACd,QAAQ,EACR,eAAe,EACf,WAAW,EACX,iBAAiB,EACjB,WAAW,EACX,gBAAgB,EAChB,OAAO,EAER,MAAM,WAAW,CAAA;AAElB,MAAM,WAAW,oBAAqB,SAAQ,iBAAiB;IAC7D,YAAY,CAAC,EAAE,MAAM,CAAA;CACtB;AAsDD,qBAAa,cAAe,YAAW,WAAW;IAChD,OAAO,CAAC,MAAM,CAAsB;IACpC,OAAO,CAAC,OAAO,CAAQ;gBAEX,MAAM,EAAE,oBAAoB;IAKlC,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,WAAW,CAAC;IAgFzE,KAAK,CACT,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,EACvB,OAAO,CAAC,EAAE,eAAe,GACxB,OAAO,CAAC,SAAS,GAAG,SAAS,EAAE,CAAC;IA2C5B,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,aAAa,CAAC,QAAQ,CAAC;IA0FvF,OAAO,CAAC,cAAc;
|
|
1
|
+
{"version":3,"file":"openai.d.ts","sourceRoot":"","sources":["../../../src/llm/providers/openai.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,SAAS,EAET,cAAc,EACd,QAAQ,EACR,eAAe,EACf,WAAW,EACX,iBAAiB,EACjB,WAAW,EACX,gBAAgB,EAChB,OAAO,EAER,MAAM,WAAW,CAAA;AAElB,MAAM,WAAW,oBAAqB,SAAQ,iBAAiB;IAC7D,YAAY,CAAC,EAAE,MAAM,CAAA;CACtB;AAsDD,qBAAa,cAAe,YAAW,WAAW;IAChD,OAAO,CAAC,MAAM,CAAsB;IACpC,OAAO,CAAC,OAAO,CAAQ;gBAEX,MAAM,EAAE,oBAAoB;IAKlC,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,WAAW,CAAC;IAgFzE,KAAK,CACT,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,EACvB,OAAO,CAAC,EAAE,eAAe,GACxB,OAAO,CAAC,SAAS,GAAG,SAAS,EAAE,CAAC;IA2C5B,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,aAAa,CAAC,QAAQ,CAAC;IA0FvF,OAAO,CAAC,cAAc;CA4BvB"}
|
|
@@ -224,6 +224,8 @@ export class OpenAIProvider {
|
|
|
224
224
|
return messages.map((msg) => {
|
|
225
225
|
const formatted = {
|
|
226
226
|
role: msg.role,
|
|
227
|
+
// Pass array content through as-is — OpenAI-compatible APIs (including
|
|
228
|
+
// inference-snaps vision models) accept the same multipart format natively.
|
|
227
229
|
content: msg.content,
|
|
228
230
|
};
|
|
229
231
|
if (msg.name) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"semantic-cache.d.ts","sourceRoot":"","sources":["../../src/llm/semantic-cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAKH,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAA;AAElD,MAAM,WAAW,oBAAoB;IACnC,+DAA+D;IAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B,qDAAqD;IACrD,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,iDAAiD;IACjD,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,uCAAuC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,uCAAuC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,EAAE,MAAM,CAAA;IACf,aAAa,EAAE,MAAM,CAAA;IACrB,YAAY,EAAE,MAAM,CAAA;CACrB;AAED,MAAM,WAAW,sBAAsB;IACrC,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,EAAE,CAAA;IACnB,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,CAAC,EAAE;QACN,YAAY,EAAE,MAAM,CAAA;QACpB,gBAAgB,EAAE,MAAM,CAAA;QACxB,WAAW,EAAE,MAAM,CAAA;KACpB,CAAA;CACF;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,aAAa,CAAqB;IAC1C,OAAO,CAAC,OAAO,CAAgC;IAC/C,OAAO,CAAC,KAAK,CAIZ;IACD,OAAO,CAAC,MAAM,
|
|
1
|
+
{"version":3,"file":"semantic-cache.d.ts","sourceRoot":"","sources":["../../src/llm/semantic-cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAKH,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAA;AAElD,MAAM,WAAW,oBAAoB;IACnC,+DAA+D;IAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B,qDAAqD;IACrD,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,iDAAiD;IACjD,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,uCAAuC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,uCAAuC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,EAAE,MAAM,CAAA;IACf,aAAa,EAAE,MAAM,CAAA;IACrB,YAAY,EAAE,MAAM,CAAA;CACrB;AAED,MAAM,WAAW,sBAAsB;IACrC,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,EAAE,CAAA;IACnB,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,CAAC,EAAE;QACN,YAAY,EAAE,MAAM,CAAA;QACpB,gBAAgB,EAAE,MAAM,CAAA;QACxB,WAAW,EAAE,MAAM,CAAA;KACpB,CAAA;CACF;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,aAAa,CAAqB;IAC1C,OAAO,CAAC,OAAO,CAAgC;IAC/C,OAAO,CAAC,KAAK,CAIZ;IACD,OAAO,CAAC,MAAM,CAA+C;gBAEjD,OAAO,GAAE,oBAAyB;IAiB9C;;;;;OAKG;IACG,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,sBAAsB,GAAG,SAAS,CAAC;IAwFrE;;;;;;OAMG;IACG,GAAG,CACP,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,KAAK,CAAC,EAAE;QACN,YAAY,EAAE,MAAM,CAAA;QACpB,gBAAgB,EAAE,MAAM,CAAA;QACxB,WAAW,EAAE,MAAM,CAAA;KACpB,GACA,OAAO,CAAC,IAAI,CAAC;IAqChB;;;;OAIG;IACH,YAAY,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG,MAAM;IAczC;;OAEG;IACH,QAAQ,IAAI,kBAAkB;IAmB9B;;OAEG;IACH,UAAU,IAAI,IAAI;IAQlB;;;;OAIG;IACG,YAAY,IAAI,OAAO,CAAC,MAAM,CAAC;IAMrC;;;;;;;;;;;;OAYG;IACG,SAAS,CAAC,OAAO,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;CAKpF;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,6BAA6B,CAC3C,KAAK,EAAE,kBAAkB,EACzB,OAAO,EAAE;IACP,iBAAiB,EAAE,MAAM,CAAA;IACzB,cAAc,EAAE,MAAM,CAAA;CACvB,GACA;IACD,UAAU,EAAE,MAAM,CAAA;IAClB,cAAc,EAAE,MAAM,CAAA;IACtB,aAAa,EAAE,MAAM,CAAA;CACtB,CAUA;AAQD;;;;;;;;;GASG;AACH,wBAAgB,sBAAsB,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,aAAa,CAKpF;AAED;;GAEG;AACH,wBAAgB,wBAAwB,IAAI,IAAI,CAE/C"}
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
* @see https://redis.io/blog/what-is-semantic-caching/
|
|
25
25
|
*/
|
|
26
26
|
import { generateEmbedding } from '../embeddings/index.js';
|
|
27
|
-
import { createLogger } from '
|
|
27
|
+
import { createLogger } from '@revealui/core/observability/logger';
|
|
28
28
|
import { VectorMemoryService } from '../memory/vector/vector-memory-service.js';
|
|
29
29
|
/**
|
|
30
30
|
* Semantic cache that uses vector similarity for intelligent caching
|
|
@@ -50,7 +50,7 @@ export class SemanticCache {
|
|
|
50
50
|
vectorService;
|
|
51
51
|
options;
|
|
52
52
|
stats;
|
|
53
|
-
logger = createLogger('
|
|
53
|
+
logger = createLogger({ component: 'SemanticCache' });
|
|
54
54
|
constructor(options = {}) {
|
|
55
55
|
this.vectorService = new VectorMemoryService();
|
|
56
56
|
this.options = {
|
|
@@ -134,7 +134,7 @@ export class SemanticCache {
|
|
|
134
134
|
}
|
|
135
135
|
catch (error) {
|
|
136
136
|
// Fail gracefully - return undefined on error
|
|
137
|
-
this.logger.error('Semantic cache error
|
|
137
|
+
this.logger.error('Semantic cache error', error instanceof Error ? error : new Error(String(error)));
|
|
138
138
|
if (this.options.enableStats) {
|
|
139
139
|
this.stats.misses++;
|
|
140
140
|
}
|
|
@@ -181,7 +181,7 @@ export class SemanticCache {
|
|
|
181
181
|
}
|
|
182
182
|
catch (error) {
|
|
183
183
|
// Fail gracefully - log error but don't throw
|
|
184
|
-
this.logger.error('Failed to store in semantic cache
|
|
184
|
+
this.logger.error('Failed to store in semantic cache', error instanceof Error ? error : new Error(String(error)));
|
|
185
185
|
}
|
|
186
186
|
}
|
|
187
187
|
/**
|
|
@@ -192,7 +192,15 @@ export class SemanticCache {
|
|
|
192
192
|
extractQuery(messages) {
|
|
193
193
|
return messages
|
|
194
194
|
.filter((m) => m.role === 'user')
|
|
195
|
-
.map((m) =>
|
|
195
|
+
.map((m) => {
|
|
196
|
+
if (typeof m.content === 'string')
|
|
197
|
+
return m.content;
|
|
198
|
+
// Multipart: extract text parts only for cache key generation
|
|
199
|
+
return m.content
|
|
200
|
+
.filter((p) => p.type === 'text')
|
|
201
|
+
.map((p) => p.text)
|
|
202
|
+
.join(' ');
|
|
203
|
+
})
|
|
196
204
|
.join(' ');
|
|
197
205
|
}
|
|
198
206
|
/**
|
package/dist/llm/server.d.ts
CHANGED
|
@@ -7,7 +7,9 @@
|
|
|
7
7
|
export * from './client.js';
|
|
8
8
|
export * from './providers/anthropic.js';
|
|
9
9
|
export * from './providers/base.js';
|
|
10
|
+
export * from './providers/bitnet.js';
|
|
10
11
|
export * from './providers/groq.js';
|
|
12
|
+
export * from './providers/inference-snaps.js';
|
|
11
13
|
export * from './providers/ollama.js';
|
|
12
14
|
export * from './providers/openai.js';
|
|
13
15
|
export * from './providers/vultr.js';
|