@seanhogg/builderforce-memory 2026.6.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +582 -0
- package/dist/agent/SSMAgent.d.ts +146 -0
- package/dist/agent/SSMAgent.d.ts.map +1 -0
- package/dist/agent/SSMAgent.js +231 -0
- package/dist/agent/SSMAgent.js.map +1 -0
- package/dist/agent/index.d.ts +3 -0
- package/dist/agent/index.d.ts.map +1 -0
- package/dist/agent/index.js +2 -0
- package/dist/agent/index.js.map +1 -0
- package/dist/bridges/AnthropicBridge.d.ts +47 -0
- package/dist/bridges/AnthropicBridge.d.ts.map +1 -0
- package/dist/bridges/AnthropicBridge.js +120 -0
- package/dist/bridges/AnthropicBridge.js.map +1 -0
- package/dist/bridges/CachingBridge.d.ts +44 -0
- package/dist/bridges/CachingBridge.d.ts.map +1 -0
- package/dist/bridges/CachingBridge.js +62 -0
- package/dist/bridges/CachingBridge.js.map +1 -0
- package/dist/bridges/FetchBridge.d.ts +30 -0
- package/dist/bridges/FetchBridge.d.ts.map +1 -0
- package/dist/bridges/FetchBridge.js +24 -0
- package/dist/bridges/FetchBridge.js.map +1 -0
- package/dist/bridges/OpenAIBridge.d.ts +33 -0
- package/dist/bridges/OpenAIBridge.d.ts.map +1 -0
- package/dist/bridges/OpenAIBridge.js +110 -0
- package/dist/bridges/OpenAIBridge.js.map +1 -0
- package/dist/bridges/ResponseCache.d.ts +65 -0
- package/dist/bridges/ResponseCache.d.ts.map +1 -0
- package/dist/bridges/ResponseCache.js +97 -0
- package/dist/bridges/ResponseCache.js.map +1 -0
- package/dist/bridges/SemanticCachingBridge.d.ts +31 -0
- package/dist/bridges/SemanticCachingBridge.d.ts.map +1 -0
- package/dist/bridges/SemanticCachingBridge.js +44 -0
- package/dist/bridges/SemanticCachingBridge.js.map +1 -0
- package/dist/bridges/TransformerBridge.d.ts +35 -0
- package/dist/bridges/TransformerBridge.d.ts.map +1 -0
- package/dist/bridges/TransformerBridge.js +10 -0
- package/dist/bridges/TransformerBridge.js.map +1 -0
- package/dist/bridges/index.d.ts +14 -0
- package/dist/bridges/index.d.ts.map +1 -0
- package/dist/bridges/index.js +7 -0
- package/dist/bridges/index.js.map +1 -0
- package/dist/cache/FetchSemanticCacheBackend.d.ts +40 -0
- package/dist/cache/FetchSemanticCacheBackend.d.ts.map +1 -0
- package/dist/cache/FetchSemanticCacheBackend.js +61 -0
- package/dist/cache/FetchSemanticCacheBackend.js.map +1 -0
- package/dist/cache/SemanticCache.d.ts +105 -0
- package/dist/cache/SemanticCache.d.ts.map +1 -0
- package/dist/cache/SemanticCache.js +130 -0
- package/dist/cache/SemanticCache.js.map +1 -0
- package/dist/cache/index.d.ts +5 -0
- package/dist/cache/index.d.ts.map +1 -0
- package/dist/cache/index.js +3 -0
- package/dist/cache/index.js.map +1 -0
- package/dist/distillation/DistillationEngine.d.ts +107 -0
- package/dist/distillation/DistillationEngine.d.ts.map +1 -0
- package/dist/distillation/DistillationEngine.js +152 -0
- package/dist/distillation/DistillationEngine.js.map +1 -0
- package/dist/distillation/index.d.ts +3 -0
- package/dist/distillation/index.d.ts.map +1 -0
- package/dist/distillation/index.js +2 -0
- package/dist/distillation/index.js.map +1 -0
- package/dist/errors/SSMError.d.ts +14 -0
- package/dist/errors/SSMError.d.ts.map +1 -0
- package/dist/errors/SSMError.js +18 -0
- package/dist/errors/SSMError.js.map +1 -0
- package/dist/errors/index.d.ts +3 -0
- package/dist/errors/index.d.ts.map +1 -0
- package/dist/errors/index.js +2 -0
- package/dist/errors/index.js.map +1 -0
- package/dist/index.d.ts +65 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +59 -0
- package/dist/index.js.map +1 -0
- package/dist/memory/MemoryStore.d.ts +152 -0
- package/dist/memory/MemoryStore.d.ts.map +1 -0
- package/dist/memory/MemoryStore.js +290 -0
- package/dist/memory/MemoryStore.js.map +1 -0
- package/dist/memory/index.d.ts +3 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +2 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/router/InferenceRouter.d.ts +92 -0
- package/dist/router/InferenceRouter.d.ts.map +1 -0
- package/dist/router/InferenceRouter.js +113 -0
- package/dist/router/InferenceRouter.js.map +1 -0
- package/dist/router/index.d.ts +3 -0
- package/dist/router/index.d.ts.map +1 -0
- package/dist/router/index.js +2 -0
- package/dist/router/index.js.map +1 -0
- package/dist/runtime/SSMRuntime.d.ts +167 -0
- package/dist/runtime/SSMRuntime.d.ts.map +1 -0
- package/dist/runtime/SSMRuntime.js +199 -0
- package/dist/runtime/SSMRuntime.js.map +1 -0
- package/dist/runtime/index.d.ts +3 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +2 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/session/errors.d.ts +10 -0
- package/dist/session/errors.d.ts.map +1 -0
- package/dist/session/errors.js +14 -0
- package/dist/session/errors.js.map +1 -0
- package/dist/session/index.d.ts +11 -0
- package/dist/session/index.d.ts.map +1 -0
- package/dist/session/index.js +7 -0
- package/dist/session/index.js.map +1 -0
- package/dist/session/persistence.d.ts +14 -0
- package/dist/session/persistence.d.ts.map +1 -0
- package/dist/session/persistence.js +100 -0
- package/dist/session/persistence.js.map +1 -0
- package/dist/session/presets.d.ts +31 -0
- package/dist/session/presets.d.ts.map +1 -0
- package/dist/session/presets.js +91 -0
- package/dist/session/presets.js.map +1 -0
- package/dist/session/session.d.ts +186 -0
- package/dist/session/session.d.ts.map +1 -0
- package/dist/session/session.js +358 -0
- package/dist/session/session.js.map +1 -0
- package/dist/session/streaming.d.ts +13 -0
- package/dist/session/streaming.d.ts.map +1 -0
- package/dist/session/streaming.js +74 -0
- package/dist/session/streaming.js.map +1 -0
- package/dist/session/tokenizer.d.ts +18 -0
- package/dist/session/tokenizer.d.ts.map +1 -0
- package/dist/session/tokenizer.js +11 -0
- package/dist/session/tokenizer.js.map +1 -0
- package/dist/similarity/index.d.ts +19 -0
- package/dist/similarity/index.d.ts.map +1 -0
- package/dist/similarity/index.js +42 -0
- package/dist/similarity/index.js.map +1 -0
- package/package.json +120 -0
- package/src/agent/SSMAgent.ts +327 -0
- package/src/agent/index.ts +2 -0
- package/src/bridges/AnthropicBridge.ts +166 -0
- package/src/bridges/CachingBridge.ts +79 -0
- package/src/bridges/FetchBridge.ts +41 -0
- package/src/bridges/OpenAIBridge.ts +143 -0
- package/src/bridges/ResponseCache.ts +131 -0
- package/src/bridges/SemanticCachingBridge.ts +60 -0
- package/src/bridges/TransformerBridge.ts +38 -0
- package/src/bridges/index.ts +13 -0
- package/src/cache/FetchSemanticCacheBackend.ts +79 -0
- package/src/cache/SemanticCache.ts +196 -0
- package/src/cache/index.ts +9 -0
- package/src/distillation/DistillationEngine.ts +248 -0
- package/src/distillation/index.ts +2 -0
- package/src/errors/SSMError.ts +26 -0
- package/src/errors/index.ts +2 -0
- package/src/index.ts +128 -0
- package/src/memory/MemoryStore.ts +408 -0
- package/src/memory/index.ts +2 -0
- package/src/router/InferenceRouter.ts +201 -0
- package/src/router/index.ts +2 -0
- package/src/runtime/SSMRuntime.ts +309 -0
- package/src/runtime/index.ts +2 -0
- package/src/session/errors.ts +24 -0
- package/src/session/index.ts +25 -0
- package/src/session/persistence.ts +142 -0
- package/src/session/presets.ts +122 -0
- package/src/session/session.ts +657 -0
- package/src/session/streaming.ts +97 -0
- package/src/session/tokenizer.ts +18 -0
- package/src/similarity/index.ts +42 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSMAgent – high-level orchestration primitive.
|
|
3
|
+
*
|
|
4
|
+
* Combines SSMRuntime (inference + adaptation), MemoryStore (persistent facts),
|
|
5
|
+
* and a conversation history manager into a single agent interface.
|
|
6
|
+
*
|
|
7
|
+
* Prompt format matches MambaChatbot so the SSM model can follow the same
|
|
8
|
+
* token patterns it was trained on:
|
|
9
|
+
*
|
|
10
|
+
* System: <systemPrompt>
|
|
11
|
+
* [Fact (<key>): <content> ← injected by importance desc, filtered by tag/key]
|
|
12
|
+
* User: <message>
|
|
13
|
+
* Assistant: <message>
|
|
14
|
+
* ...
|
|
15
|
+
* User: <current input>
|
|
16
|
+
* Assistant:
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import type { AdaptOptions, AdaptResult } from '../session/index.js';
|
|
20
|
+
import type { SSMRuntime, GenerateOptions } from '../runtime/SSMRuntime.js';
|
|
21
|
+
import type { MemoryStore, MemoryEntry } from '../memory/MemoryStore.js';
|
|
22
|
+
import { SSMError } from '../errors/SSMError.js';
|
|
23
|
+
|
|
24
|
+
// ── Types ─────────────────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
export type MessageRole = 'user' | 'assistant' | 'system';
|
|
27
|
+
|
|
28
|
+
export interface AgentMessage {
|
|
29
|
+
role : MessageRole;
|
|
30
|
+
content: string;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface SSMAgentOptions {
|
|
34
|
+
/** The runtime to use for inference and adaptation. */
|
|
35
|
+
runtime : SSMRuntime;
|
|
36
|
+
/** Optional memory store for persistent fact retrieval. */
|
|
37
|
+
memory? : MemoryStore;
|
|
38
|
+
/** Default system prompt. Default: 'You are a helpful assistant.' */
|
|
39
|
+
systemPrompt? : string;
|
|
40
|
+
/**
|
|
41
|
+
* Max user+assistant turn pairs to include in context.
|
|
42
|
+
* Oldest turns are dropped first.
|
|
43
|
+
* Default: 20
|
|
44
|
+
*/
|
|
45
|
+
maxHistoryTurns? : number;
|
|
46
|
+
/**
|
|
47
|
+
* When true, the agent serialises its conversation history to memory
|
|
48
|
+
* under the `__history__` key on `destroy()`, and loads it back on
|
|
49
|
+
* construction if the key is present.
|
|
50
|
+
* Default: true
|
|
51
|
+
*/
|
|
52
|
+
persistHistory? : boolean;
|
|
53
|
+
/**
|
|
54
|
+
* How facts are picked from the MemoryStore for injection each turn:
|
|
55
|
+
* - 'semantic' (default): top-`maxFacts` by SSM-embedding similarity to
|
|
56
|
+
* the input (`recallSimilar`) — injects the few *relevant* facts, which
|
|
57
|
+
* keeps the prompt small and is paraphrase-robust.
|
|
58
|
+
* - 'substring': legacy behaviour — only facts whose key literally appears
|
|
59
|
+
* in the input.
|
|
60
|
+
* `injectAllFacts` on a turn overrides this and injects everything.
|
|
61
|
+
*/
|
|
62
|
+
factSelection? : 'semantic' | 'substring';
|
|
63
|
+
/** Max facts injected in 'semantic' mode. Default: 8. */
|
|
64
|
+
maxFacts? : number;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export interface ThinkOptions extends GenerateOptions {
|
|
68
|
+
/** Override the system prompt for this single turn only. */
|
|
69
|
+
systemPrompt?: string;
|
|
70
|
+
/**
|
|
71
|
+
* Inject all recalled facts into the context for this turn.
|
|
72
|
+
* Default: false — only facts whose keys appear in the input are injected.
|
|
73
|
+
*/
|
|
74
|
+
injectAllFacts?: boolean;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/** Key used to persist conversation history in the MemoryStore. */
|
|
78
|
+
const HISTORY_KEY = '__history__';
|
|
79
|
+
|
|
80
|
+
// ── SSMAgent ──────────────────────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
export class SSMAgent {
|
|
83
|
+
private readonly _runtime : SSMRuntime;
|
|
84
|
+
private readonly _memory : MemoryStore | undefined;
|
|
85
|
+
private readonly _systemPrompt : string;
|
|
86
|
+
private readonly _maxHistoryTurns : number;
|
|
87
|
+
private readonly _persistHistory : boolean;
|
|
88
|
+
private readonly _factSelection : 'semantic' | 'substring';
|
|
89
|
+
private readonly _maxFacts : number;
|
|
90
|
+
private _history: AgentMessage[] = [];
|
|
91
|
+
|
|
92
|
+
constructor(opts: SSMAgentOptions) {
|
|
93
|
+
this._runtime = opts.runtime;
|
|
94
|
+
this._memory = opts.memory;
|
|
95
|
+
this._systemPrompt = opts.systemPrompt ?? 'You are a helpful assistant.';
|
|
96
|
+
this._maxHistoryTurns = opts.maxHistoryTurns ?? 20;
|
|
97
|
+
this._persistHistory = opts.persistHistory ?? true;
|
|
98
|
+
this._factSelection = opts.factSelection ?? 'semantic';
|
|
99
|
+
this._maxFacts = opts.maxFacts ?? 8;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Initialises the agent, loading persisted history from memory if available.
|
|
104
|
+
* Call this after construction when `persistHistory` is enabled and a memory
|
|
105
|
+
* store is present.
|
|
106
|
+
*/
|
|
107
|
+
async init(): Promise<void> {
|
|
108
|
+
if (this._persistHistory && this._memory) {
|
|
109
|
+
try {
|
|
110
|
+
const entry = await this._memory.recall(HISTORY_KEY);
|
|
111
|
+
if (entry) {
|
|
112
|
+
const parsed = JSON.parse(entry.content) as AgentMessage[];
|
|
113
|
+
if (Array.isArray(parsed)) {
|
|
114
|
+
this._history = parsed;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
} catch {
|
|
118
|
+
// Corrupted or missing history — start fresh
|
|
119
|
+
this._history = [];
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// ── Inference ─────────────────────────────────────────────────────────────
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Sends a user message and returns the full assistant response.
|
|
128
|
+
* Routes through InferenceRouter — may use SSM or transformer bridge.
|
|
129
|
+
* Appends both user and assistant turns to history.
|
|
130
|
+
*/
|
|
131
|
+
async think(input: string, opts: ThinkOptions = {}): Promise<string> {
|
|
132
|
+
const { systemPrompt, injectAllFacts, ...generateOpts } = opts;
|
|
133
|
+
const { system, conversation } = await this._buildPrompt(input, systemPrompt, injectAllFacts);
|
|
134
|
+
|
|
135
|
+
const raw = await this._runtime.generate(conversation, {
|
|
136
|
+
maxNewTokens: 200,
|
|
137
|
+
temperature : 0.7,
|
|
138
|
+
topK : 50,
|
|
139
|
+
topP : 0.9,
|
|
140
|
+
system,
|
|
141
|
+
...generateOpts,
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
// Trim any additional turns the model may have hallucinated
|
|
145
|
+
const response = raw.split('\nUser:')[0].trim();
|
|
146
|
+
|
|
147
|
+
this._appendHistory(input, response);
|
|
148
|
+
return response;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Streaming variant of `think()`.
|
|
153
|
+
* Always uses the SSM path (consistent low-latency streaming).
|
|
154
|
+
* Appends history after the stream completes.
|
|
155
|
+
*/
|
|
156
|
+
async *thinkStream(input: string, opts: ThinkOptions = {}): AsyncIterable<string> {
|
|
157
|
+
const { systemPrompt, injectAllFacts, bridgeOpts: _b, ...completeOpts } = opts;
|
|
158
|
+
const { system, conversation } = await this._buildPrompt(input, systemPrompt, injectAllFacts);
|
|
159
|
+
|
|
160
|
+
let full = '';
|
|
161
|
+
for await (const token of this._runtime.stream(conversation, {
|
|
162
|
+
maxNewTokens: 200,
|
|
163
|
+
temperature : 0.7,
|
|
164
|
+
topK : 50,
|
|
165
|
+
topP : 0.9,
|
|
166
|
+
system,
|
|
167
|
+
...completeOpts,
|
|
168
|
+
})) {
|
|
169
|
+
full += token;
|
|
170
|
+
yield token;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const response = full.split('\nUser:')[0].trim();
|
|
174
|
+
this._appendHistory(input, response);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// ── Adaptation ────────────────────────────────────────────────────────────
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Fine-tunes the SSM on the provided text.
|
|
181
|
+
* Pass-through to runtime.adapt().
|
|
182
|
+
*/
|
|
183
|
+
async learn(data: string, opts?: AdaptOptions): Promise<AdaptResult> {
|
|
184
|
+
return this._runtime.adapt(data, opts);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// ── Memory ────────────────────────────────────────────────────────────────
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Stores a fact in the MemoryStore.
|
|
191
|
+
* Throws SSMError('MEMORY_UNAVAILABLE') if no MemoryStore was provided.
|
|
192
|
+
*/
|
|
193
|
+
async remember(key: string, fact: string): Promise<void> {
|
|
194
|
+
if (!this._memory) {
|
|
195
|
+
throw new SSMError(
|
|
196
|
+
'MEMORY_UNAVAILABLE',
|
|
197
|
+
'SSMAgent was constructed without a MemoryStore. Pass `memory` in SSMAgentOptions.',
|
|
198
|
+
);
|
|
199
|
+
}
|
|
200
|
+
await this._memory.remember(key, fact);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Retrieves a fact from the MemoryStore.
|
|
205
|
+
* Returns `undefined` if key not found or no MemoryStore was provided.
|
|
206
|
+
*/
|
|
207
|
+
async recall(key: string): Promise<string | undefined> {
|
|
208
|
+
if (!this._memory) return undefined;
|
|
209
|
+
const entry = await this._memory.recall(key);
|
|
210
|
+
return entry?.content;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// ── History ───────────────────────────────────────────────────────────────
|
|
214
|
+
|
|
215
|
+
/** Clears all conversation history. Does not affect MemoryStore. */
|
|
216
|
+
clearHistory(): void {
|
|
217
|
+
this._history = [];
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/** Number of complete user+assistant turn pairs. */
|
|
221
|
+
get turnCount(): number {
|
|
222
|
+
return Math.floor(this._history.length / 2);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/** Read-only snapshot of the current conversation history. */
|
|
226
|
+
get history(): readonly AgentMessage[] {
|
|
227
|
+
return this._history;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// ── Lifecycle ─────────────────────────────────────────────────────────────
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Persists conversation history to memory (if `persistHistory` is true and
|
|
234
|
+
* a MemoryStore is available) and destroys the underlying runtime.
|
|
235
|
+
*/
|
|
236
|
+
async destroy(): Promise<void> {
|
|
237
|
+
if (this._persistHistory && this._memory && this._history.length > 0) {
|
|
238
|
+
try {
|
|
239
|
+
await this._memory.remember(HISTORY_KEY, JSON.stringify(this._history));
|
|
240
|
+
} catch {
|
|
241
|
+
// Persistence failure is non-fatal
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
this._runtime.destroy();
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// ── Private helpers ───────────────────────────────────────────────────────
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Selects which stored facts to inject for this turn:
|
|
251
|
+
* - injectAllFacts → every fact
|
|
252
|
+
* - 'semantic' → top-`maxFacts` by SSM-embedding similarity to the input
|
|
253
|
+
* (recallSimilar; falls back to lexical overlap when the
|
|
254
|
+
* runtime can't embed) — small, relevant, paraphrase-robust
|
|
255
|
+
* - 'substring' → legacy key-substring match
|
|
256
|
+
*/
|
|
257
|
+
private async _selectFacts(input: string, injectAllFacts?: boolean): Promise<MemoryEntry[]> {
|
|
258
|
+
const memory = this._memory!;
|
|
259
|
+
if (injectAllFacts) return memory.recallAll();
|
|
260
|
+
if (this._factSelection === 'semantic') {
|
|
261
|
+
return memory.recallSimilar(input, this._maxFacts, this._runtime);
|
|
262
|
+
}
|
|
263
|
+
return (await memory.recallAll()).filter(f => input.includes(f.key));
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Builds the prompt as two parts split at the cache boundary:
|
|
268
|
+
* - `system` : the System line + injected Facts — stable across a
|
|
269
|
+
* turn, so the transformer can cache it (see
|
|
270
|
+
* SSMRuntime.GenerateOptions.system).
|
|
271
|
+
* - `conversation` : trimmed history + the current User turn — volatile,
|
|
272
|
+
* regenerated every turn.
|
|
273
|
+
*
|
|
274
|
+
* Joining them with a newline (which SSMRuntime does on the SSM path)
|
|
275
|
+
* reproduces the original single-string MambaChatbot format exactly, so the
|
|
276
|
+
* SSM sees an unchanged prompt.
|
|
277
|
+
*/
|
|
278
|
+
private async _buildPrompt(
|
|
279
|
+
input : string,
|
|
280
|
+
systemPromptOverride?: string,
|
|
281
|
+
injectAllFacts? : boolean,
|
|
282
|
+
): Promise<{ system: string; conversation: string }> {
|
|
283
|
+
const sys = systemPromptOverride ?? this._systemPrompt;
|
|
284
|
+
const systemLines : string[] = [`System: ${sys}`];
|
|
285
|
+
|
|
286
|
+
// Inject relevant facts from MemoryStore, sorted by importance descending
|
|
287
|
+
if (this._memory) {
|
|
288
|
+
const relevant = (await this._selectFacts(input, injectAllFacts))
|
|
289
|
+
// Never inject the serialised conversation-history blob as a fact.
|
|
290
|
+
.filter(f => f.key !== HISTORY_KEY);
|
|
291
|
+
|
|
292
|
+
// Sort by importance descending (missing importance defaults to 0.5)
|
|
293
|
+
const sorted = relevant.slice().sort(
|
|
294
|
+
(a, b) => (b.importance ?? 0.5) - (a.importance ?? 0.5),
|
|
295
|
+
);
|
|
296
|
+
|
|
297
|
+
for (const fact of sorted) {
|
|
298
|
+
systemLines.push(`Fact (${fact.key}): ${fact.content}`);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Trim history to maxHistoryTurns pairs (oldest first)
|
|
303
|
+
const maxMessages = this._maxHistoryTurns * 2;
|
|
304
|
+
const trimmed = this._history.length > maxMessages
|
|
305
|
+
? this._history.slice(this._history.length - maxMessages)
|
|
306
|
+
: this._history;
|
|
307
|
+
|
|
308
|
+
const convoLines: string[] = [];
|
|
309
|
+
for (const msg of trimmed) {
|
|
310
|
+
const speaker = msg.role === 'user' ? 'User' : 'Assistant';
|
|
311
|
+
convoLines.push(`${speaker}: ${msg.content}`);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
convoLines.push(`User: ${input}`);
|
|
315
|
+
convoLines.push('Assistant:');
|
|
316
|
+
|
|
317
|
+
return {
|
|
318
|
+
system : systemLines.join('\n'),
|
|
319
|
+
conversation: convoLines.join('\n'),
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
private _appendHistory(input: string, response: string): void {
|
|
324
|
+
this._history.push({ role: 'user', content: input });
|
|
325
|
+
this._history.push({ role: 'assistant', content: response });
|
|
326
|
+
}
|
|
327
|
+
}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AnthropicBridge – TransformerBridge implementation for the Anthropic Messages API.
|
|
3
|
+
*
|
|
4
|
+
* Uses the /v1/messages endpoint. System prompts are passed as the top-level
|
|
5
|
+
* `system` field (not a message role), per the Anthropic spec.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { SSMError } from '../errors/SSMError.js';
|
|
9
|
+
import type { TransformerBridge, BridgeGenerateOptions } from './TransformerBridge.js';
|
|
10
|
+
|
|
11
|
+
export interface AnthropicBridgeOptions {
|
|
12
|
+
/** Anthropic API key. */
|
|
13
|
+
apiKey : string;
|
|
14
|
+
/**
|
|
15
|
+
* Model to use. Default: 'claude-haiku-4-5' (cheapest current model:
|
|
16
|
+
* $1/1M input, $5/1M output). The previous default `claude-3-5-haiku-*`
|
|
17
|
+
* was retired on 2026-02-19 and now 404s.
|
|
18
|
+
*/
|
|
19
|
+
model? : string;
|
|
20
|
+
/** Anthropic API version header. Default: '2023-06-01'. */
|
|
21
|
+
apiVersion? : string;
|
|
22
|
+
/** Default system prompt. Default: none. */
|
|
23
|
+
systemPrompt? : string;
|
|
24
|
+
/** Default max tokens — required by Anthropic. Default: 1024. */
|
|
25
|
+
maxTokens? : number;
|
|
26
|
+
/**
|
|
27
|
+
* When true (default), the system prompt is sent as a cacheable content
|
|
28
|
+
* block (`cache_control: {type: 'ephemeral'}`). Prompt caching bills cache
|
|
29
|
+
* reads at ~10% of the input price, so a stable system prefix reused across
|
|
30
|
+
* turns is up to ~90% cheaper on its input tokens. Caching only engages once
|
|
31
|
+
* the cached prefix exceeds the model minimum (~4096 tokens for Haiku 4.5);
|
|
32
|
+
* below that it is a silent no-op, never an error. Set false to opt out.
|
|
33
|
+
*/
|
|
34
|
+
cacheSystem? : boolean;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const API_URL = 'https://api.anthropic.com/v1/messages';
|
|
38
|
+
|
|
39
|
+
export class AnthropicBridge implements TransformerBridge {
|
|
40
|
+
readonly supportsStreaming = true as const;
|
|
41
|
+
|
|
42
|
+
private readonly _apiKey : string;
|
|
43
|
+
private readonly _model : string;
|
|
44
|
+
private readonly _apiVersion : string;
|
|
45
|
+
private readonly _systemPrompt: string;
|
|
46
|
+
private readonly _maxTokens : number;
|
|
47
|
+
private readonly _cacheSystem : boolean;
|
|
48
|
+
|
|
49
|
+
constructor(opts: AnthropicBridgeOptions) {
|
|
50
|
+
this._apiKey = opts.apiKey;
|
|
51
|
+
this._model = opts.model ?? 'claude-haiku-4-5';
|
|
52
|
+
this._apiVersion = opts.apiVersion ?? '2023-06-01';
|
|
53
|
+
this._systemPrompt = opts.systemPrompt ?? '';
|
|
54
|
+
this._maxTokens = opts.maxTokens ?? 1024;
|
|
55
|
+
this._cacheSystem = opts.cacheSystem ?? true;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async generate(prompt: string, opts: BridgeGenerateOptions = {}): Promise<string> {
|
|
59
|
+
const body = this._buildBody(prompt, opts, false);
|
|
60
|
+
const res = await this._fetch(body);
|
|
61
|
+
|
|
62
|
+
if (!res.ok) {
|
|
63
|
+
const text = await res.text().catch(() => '');
|
|
64
|
+
throw new SSMError(
|
|
65
|
+
'BRIDGE_REQUEST_FAILED',
|
|
66
|
+
`Anthropic API returned ${res.status}: ${text}`,
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const json = await res.json() as Record<string, unknown>;
|
|
71
|
+
const content = (json as any).content?.[0]?.text;
|
|
72
|
+
if (typeof content !== 'string') {
|
|
73
|
+
throw new SSMError('BRIDGE_RESPONSE_INVALID', 'Unexpected Anthropic response shape.');
|
|
74
|
+
}
|
|
75
|
+
return content;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async *stream(prompt: string, opts: BridgeGenerateOptions = {}): AsyncIterable<string> {
|
|
79
|
+
const body = this._buildBody(prompt, opts, true);
|
|
80
|
+
const res = await this._fetch(body);
|
|
81
|
+
|
|
82
|
+
if (!res.ok) {
|
|
83
|
+
const text = await res.text().catch(() => '');
|
|
84
|
+
throw new SSMError(
|
|
85
|
+
'BRIDGE_REQUEST_FAILED',
|
|
86
|
+
`Anthropic streaming API returned ${res.status}: ${text}`,
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (!res.body) {
|
|
91
|
+
throw new SSMError('BRIDGE_RESPONSE_INVALID', 'Anthropic streaming response has no body.');
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
yield* parseAnthropicStream(res.body);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
private _buildBody(prompt: string, opts: BridgeGenerateOptions, stream: boolean): string {
|
|
98
|
+
const sys = opts.systemPrompt ?? this._systemPrompt;
|
|
99
|
+
const body: Record<string, unknown> = {
|
|
100
|
+
model : opts.model ?? this._model,
|
|
101
|
+
max_tokens: opts.maxTokens ?? this._maxTokens,
|
|
102
|
+
messages : [{ role: 'user', content: prompt }],
|
|
103
|
+
};
|
|
104
|
+
if (sys) {
|
|
105
|
+
// Caching is a prefix match: render the stable system prompt as a
|
|
106
|
+
// single cache-marked content block so reads on subsequent turns are
|
|
107
|
+
// billed at ~10% of input price. The volatile user message is sent
|
|
108
|
+
// unmarked after it, so it never enters the cached prefix.
|
|
109
|
+
body['system'] = this._cacheSystem
|
|
110
|
+
? [{ type: 'text', text: sys, cache_control: { type: 'ephemeral' } }]
|
|
111
|
+
: sys;
|
|
112
|
+
}
|
|
113
|
+
if (stream) body['stream'] = true;
|
|
114
|
+
return JSON.stringify(body);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
private _fetch(body: string): Promise<Response> {
|
|
118
|
+
return fetch(API_URL, {
|
|
119
|
+
method : 'POST',
|
|
120
|
+
headers: {
|
|
121
|
+
'Content-Type' : 'application/json',
|
|
122
|
+
'x-api-key' : this._apiKey,
|
|
123
|
+
'anthropic-version' : this._apiVersion,
|
|
124
|
+
},
|
|
125
|
+
body,
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// ── SSE parser (Anthropic event format) ──────────────────────────────────────
|
|
131
|
+
|
|
132
|
+
async function* parseAnthropicStream(body: ReadableStream<Uint8Array>): AsyncIterable<string> {
|
|
133
|
+
const reader = body.getReader();
|
|
134
|
+
const decoder = new TextDecoder();
|
|
135
|
+
let buffer = '';
|
|
136
|
+
|
|
137
|
+
try {
|
|
138
|
+
while (true) {
|
|
139
|
+
const { done, value } = await reader.read();
|
|
140
|
+
if (done) break;
|
|
141
|
+
|
|
142
|
+
buffer += decoder.decode(value, { stream: true });
|
|
143
|
+
const lines = buffer.split('\n');
|
|
144
|
+
buffer = lines.pop() as string; // split() always yields ≥1 element → never undefined
|
|
145
|
+
|
|
146
|
+
for (const line of lines) {
|
|
147
|
+
const trimmed = line.trim();
|
|
148
|
+
if (!trimmed.startsWith('data: ')) continue;
|
|
149
|
+
|
|
150
|
+
const data = trimmed.slice(6);
|
|
151
|
+
try {
|
|
152
|
+
const event = JSON.parse(data) as Record<string, unknown>;
|
|
153
|
+
// content_block_delta events carry the streamed text
|
|
154
|
+
if (event['type'] === 'content_block_delta') {
|
|
155
|
+
const text = (event as any).delta?.text;
|
|
156
|
+
if (typeof text === 'string' && text.length > 0) yield text;
|
|
157
|
+
}
|
|
158
|
+
} catch {
|
|
159
|
+
// Skip malformed SSE lines
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
} finally {
|
|
164
|
+
reader.releaseLock();
|
|
165
|
+
}
|
|
166
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CachingBridge – a read-through caching decorator for any TransformerBridge.
|
|
3
|
+
*
|
|
4
|
+
* Wraps an inner bridge and memoises `generate()` keyed on the full request
|
|
5
|
+
* shape (model, system, prompt, sampling). Identical completions are served
|
|
6
|
+
* from memory instead of re-billing the provider — the single most effective
|
|
7
|
+
* lever for cutting LLM spend on repeated prompts (distillation passes, retries,
|
|
8
|
+
* fan-out over duplicate inputs).
|
|
9
|
+
*
|
|
10
|
+
* Composes with every bridge, so the caching policy lives in one place rather
|
|
11
|
+
* than being reimplemented per provider:
|
|
12
|
+
*
|
|
13
|
+
* const bridge = new CachingBridge(new AnthropicBridge({ apiKey }));
|
|
14
|
+
*
|
|
15
|
+
* Streaming is delegated straight through and never cached — a token stream is
|
|
16
|
+
* consumed once and caching it would defeat its purpose.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import type { TransformerBridge, BridgeGenerateOptions } from './TransformerBridge.js';
|
|
20
|
+
import { ResponseCache, buildCacheKey, type ResponseCacheOptions } from './ResponseCache.js';
|
|
21
|
+
|
|
22
|
+
export interface CachingBridgeOptions extends ResponseCacheOptions {
|
|
23
|
+
/**
|
|
24
|
+
* Provide a shared ResponseCache instance instead of letting the bridge
|
|
25
|
+
* create its own. Use this to share one cache across multiple bridges, or
|
|
26
|
+
* to inspect/clear the cache from outside.
|
|
27
|
+
*/
|
|
28
|
+
cache? : ResponseCache;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export class CachingBridge implements TransformerBridge {
|
|
32
|
+
private readonly _inner : TransformerBridge;
|
|
33
|
+
private readonly _cache : ResponseCache;
|
|
34
|
+
|
|
35
|
+
constructor(inner: TransformerBridge, opts: CachingBridgeOptions = {}) {
|
|
36
|
+
this._inner = inner;
|
|
37
|
+
this._cache = opts.cache ?? new ResponseCache(opts);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Mirrors the wrapped bridge so callers can still gate on streaming support. */
|
|
41
|
+
get supportsStreaming(): boolean {
|
|
42
|
+
return this._inner.supportsStreaming;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** The underlying cache — exposed for stats inspection and manual eviction. */
|
|
46
|
+
get cache(): ResponseCache {
|
|
47
|
+
return this._cache;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async generate(prompt: string, opts: BridgeGenerateOptions = {}): Promise<string> {
|
|
51
|
+
const key = buildCacheKey({
|
|
52
|
+
prompt,
|
|
53
|
+
model : opts.model,
|
|
54
|
+
systemPrompt : opts.systemPrompt,
|
|
55
|
+
maxTokens : opts.maxTokens,
|
|
56
|
+
temperature : opts.temperature,
|
|
57
|
+
topP : opts.topP,
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
const cached = this._cache.get(key);
|
|
61
|
+
if (cached !== undefined) return cached;
|
|
62
|
+
|
|
63
|
+
const value = await this._inner.generate(prompt, opts);
|
|
64
|
+
this._cache.set(key, value, Date.now());
|
|
65
|
+
return value;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Streaming is delegated to the inner bridge unchanged and is never cached.
|
|
70
|
+
* Present only when the inner bridge supports it, so `supportsStreaming`
|
|
71
|
+
* stays an accurate gate.
|
|
72
|
+
*/
|
|
73
|
+
stream(prompt: string, opts?: BridgeGenerateOptions): AsyncIterable<string> {
|
|
74
|
+
if (!this._inner.stream) {
|
|
75
|
+
throw new Error('Wrapped bridge does not support streaming.');
|
|
76
|
+
}
|
|
77
|
+
return this._inner.stream(prompt, opts);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FetchBridge – generic OpenAI-compatible bridge for local or hosted endpoints.
|
|
3
|
+
*
|
|
4
|
+
* Works with Ollama, LM Studio, vLLM, llama.cpp server, or any service that
|
|
5
|
+
* exposes a /chat/completions endpoint compatible with the OpenAI request
|
|
6
|
+
* and response schema.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { OpenAIBridge } from './OpenAIBridge.js';
|
|
10
|
+
import type { BridgeGenerateOptions } from './TransformerBridge.js';
|
|
11
|
+
|
|
12
|
+
export interface FetchBridgeOptions {
|
|
13
|
+
/** Base URL of the OpenAI-compatible server, e.g. 'http://localhost:1234/v1'. */
|
|
14
|
+
baseUrl : string;
|
|
15
|
+
/** API key — many local servers require any non-empty string. Default: 'local'. */
|
|
16
|
+
apiKey? : string;
|
|
17
|
+
/** Model name understood by the server. Default: 'default'. */
|
|
18
|
+
model? : string;
|
|
19
|
+
/** Default system prompt. */
|
|
20
|
+
systemPrompt? : string;
|
|
21
|
+
/** Default max tokens. Default: 512. */
|
|
22
|
+
maxTokens? : number;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* FetchBridge is a thin re-configuration of OpenAIBridge pointed at a custom
|
|
27
|
+
* base URL. All streaming and request logic is inherited.
|
|
28
|
+
*/
|
|
29
|
+
export class FetchBridge extends OpenAIBridge {
|
|
30
|
+
constructor(opts: FetchBridgeOptions) {
|
|
31
|
+
super({
|
|
32
|
+
apiKey : opts.apiKey ?? 'local',
|
|
33
|
+
model : opts.model ?? 'default',
|
|
34
|
+
baseUrl : opts.baseUrl,
|
|
35
|
+
systemPrompt : opts.systemPrompt,
|
|
36
|
+
maxTokens : opts.maxTokens,
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export type { BridgeGenerateOptions };
|