@directive-run/ai 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -73
- package/dist/anthropic.cjs +1 -1
- package/dist/anthropic.cjs.map +1 -1
- package/dist/anthropic.d.cts +5 -9
- package/dist/anthropic.d.ts +5 -9
- package/dist/anthropic.js +1 -1
- package/dist/anthropic.js.map +1 -1
- package/dist/gemini.cjs +3 -0
- package/dist/gemini.cjs.map +1 -0
- package/dist/gemini.d.cts +93 -0
- package/dist/gemini.d.ts +93 -0
- package/dist/gemini.js +3 -0
- package/dist/gemini.js.map +1 -0
- package/dist/index.cjs +117 -45
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1376 -2118
- package/dist/index.d.ts +1376 -2118
- package/dist/index.js +117 -45
- package/dist/index.js.map +1 -1
- package/dist/multi-agent-orchestrator-CxL8ycw_.d.cts +2290 -0
- package/dist/multi-agent-orchestrator-uMp8bLfV.d.ts +2290 -0
- package/dist/ollama.cjs.map +1 -1
- package/dist/ollama.d.cts +3 -2
- package/dist/ollama.d.ts +3 -2
- package/dist/ollama.js.map +1 -1
- package/dist/openai.cjs +2 -2
- package/dist/openai.cjs.map +1 -1
- package/dist/openai.d.cts +4 -8
- package/dist/openai.d.ts +4 -8
- package/dist/openai.js +2 -2
- package/dist/openai.js.map +1 -1
- package/dist/semantic-cache-F0psCRuz.d.cts +271 -0
- package/dist/semantic-cache-F0psCRuz.d.ts +271 -0
- package/dist/testing.cjs +42 -7
- package/dist/testing.cjs.map +1 -1
- package/dist/testing.d.cts +365 -5
- package/dist/testing.d.ts +365 -5
- package/dist/testing.js +42 -7
- package/dist/testing.js.map +1 -1
- package/dist/types-Co4BzMiH.d.cts +1373 -0
- package/dist/types-Co4BzMiH.d.ts +1373 -0
- package/package.json +8 -3
- package/dist/types-BKCdgKC-.d.cts +0 -300
- package/dist/types-BKCdgKC-.d.ts +0 -300
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Caching Guardrail
|
|
3
|
+
*
|
|
4
|
+
* Caches agent responses based on semantic similarity to reduce redundant LLM calls.
|
|
5
|
+
* Uses vector embeddings to find semantically similar previous queries.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* import { createSemanticCacheGuardrail } from '@directive-run/ai';
|
|
10
|
+
*
|
|
11
|
+
* const cacheGuardrail = createSemanticCacheGuardrail({
|
|
12
|
+
* embedder: async (text) => {
|
|
13
|
+
* // Use your embedding model (OpenAI, local model, etc.)
|
|
14
|
+
* return await getEmbedding(text);
|
|
15
|
+
* },
|
|
16
|
+
* similarityThreshold: 0.95,
|
|
17
|
+
* maxCacheSize: 1000,
|
|
18
|
+
* ttlMs: 3600000, // 1 hour
|
|
19
|
+
* });
|
|
20
|
+
*
|
|
21
|
+
* const orchestrator = createAgentOrchestrator({
|
|
22
|
+
* guardrails: {
|
|
23
|
+
* input: [cacheGuardrail],
|
|
24
|
+
* },
|
|
25
|
+
* runner: run,
|
|
26
|
+
* });
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
/** Vector embedding (array of numbers) */
|
|
30
|
+
type Embedding = number[];
|
|
31
|
+
/** Function to generate embeddings for text */
|
|
32
|
+
type EmbedderFn = (text: string) => Promise<Embedding>;
|
|
33
|
+
/** Cached response entry */
|
|
34
|
+
interface CacheEntry {
|
|
35
|
+
id: string;
|
|
36
|
+
query: string;
|
|
37
|
+
queryEmbedding: Embedding;
|
|
38
|
+
response: string;
|
|
39
|
+
metadata: Record<string, unknown>;
|
|
40
|
+
createdAt: number;
|
|
41
|
+
accessedAt: number;
|
|
42
|
+
accessCount: number;
|
|
43
|
+
agentName?: string;
|
|
44
|
+
}
|
|
45
|
+
/** Cache lookup result */
|
|
46
|
+
interface CacheLookupResult {
|
|
47
|
+
hit: boolean;
|
|
48
|
+
entry?: CacheEntry;
|
|
49
|
+
similarity?: number;
|
|
50
|
+
latencyMs: number;
|
|
51
|
+
}
|
|
52
|
+
/** Semantic cache configuration */
|
|
53
|
+
interface SemanticCacheConfig {
|
|
54
|
+
/** Function to generate embeddings */
|
|
55
|
+
embedder: EmbedderFn;
|
|
56
|
+
/** Similarity threshold (0.0 to 1.0) for cache hits */
|
|
57
|
+
similarityThreshold?: number;
|
|
58
|
+
/** Maximum number of entries to cache */
|
|
59
|
+
maxCacheSize?: number;
|
|
60
|
+
/** Time-to-live in milliseconds for cache entries */
|
|
61
|
+
ttlMs?: number;
|
|
62
|
+
/** Cache namespace for multi-tenant scenarios */
|
|
63
|
+
namespace?: string;
|
|
64
|
+
/** Custom storage backend (defaults to in-memory) */
|
|
65
|
+
storage?: SemanticCacheStorage;
|
|
66
|
+
/** Callback when cache hit occurs */
|
|
67
|
+
onHit?: (entry: CacheEntry, similarity: number) => void;
|
|
68
|
+
/** Callback when cache miss occurs */
|
|
69
|
+
onMiss?: (query: string) => void;
|
|
70
|
+
/** Callback when cache lookup encounters an error */
|
|
71
|
+
onError?: (error: Error) => void;
|
|
72
|
+
/** Whether to include agent name in cache key */
|
|
73
|
+
perAgent?: boolean;
|
|
74
|
+
}
|
|
75
|
+
/** Storage interface for cache backends */
|
|
76
|
+
interface SemanticCacheStorage {
|
|
77
|
+
/** Get all entries for a namespace */
|
|
78
|
+
getEntries(namespace: string): Promise<CacheEntry[]>;
|
|
79
|
+
/** Add an entry to the cache */
|
|
80
|
+
addEntry(namespace: string, entry: CacheEntry): Promise<void>;
|
|
81
|
+
/** Update an entry (e.g., access count) */
|
|
82
|
+
updateEntry(namespace: string, id: string, updates: Partial<CacheEntry>): Promise<void>;
|
|
83
|
+
/** Remove an entry */
|
|
84
|
+
removeEntry(namespace: string, id: string): Promise<void>;
|
|
85
|
+
/** Clear all entries in a namespace */
|
|
86
|
+
clear(namespace: string): Promise<void>;
|
|
87
|
+
}
|
|
88
|
+
/** Semantic cache instance */
|
|
89
|
+
interface SemanticCache {
|
|
90
|
+
/** Look up a query in the cache */
|
|
91
|
+
lookup(query: string, agentName?: string): Promise<CacheLookupResult>;
|
|
92
|
+
/** Store a response in the cache */
|
|
93
|
+
store(query: string, response: string, agentName?: string, metadata?: Record<string, unknown>): Promise<void>;
|
|
94
|
+
/** Invalidate cache entries matching a predicate */
|
|
95
|
+
invalidate(predicate: (entry: CacheEntry) => boolean): Promise<number>;
|
|
96
|
+
/** Clear all cache entries */
|
|
97
|
+
clear(): Promise<void>;
|
|
98
|
+
/** Get cache statistics */
|
|
99
|
+
getStats(): CacheStats;
|
|
100
|
+
/** Export cache entries (for persistence) */
|
|
101
|
+
export(): Promise<CacheEntry[]>;
|
|
102
|
+
/** Import cache entries (from persistence) */
|
|
103
|
+
import(entries: CacheEntry[]): Promise<void>;
|
|
104
|
+
}
|
|
105
|
+
/** Cache statistics */
|
|
106
|
+
interface CacheStats {
|
|
107
|
+
totalEntries: number;
|
|
108
|
+
totalHits: number;
|
|
109
|
+
totalMisses: number;
|
|
110
|
+
hitRate: number;
|
|
111
|
+
avgSimilarityOnHit: number;
|
|
112
|
+
oldestEntry: number | null;
|
|
113
|
+
newestEntry: number | null;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Create an in-memory cache storage backend.
|
|
117
|
+
*/
|
|
118
|
+
declare function createInMemoryStorage(): SemanticCacheStorage;
|
|
119
|
+
/**
|
|
120
|
+
* Create a semantic cache instance.
|
|
121
|
+
*
|
|
122
|
+
* @example
|
|
123
|
+
* ```typescript
|
|
124
|
+
* const cache = createSemanticCache({
|
|
125
|
+
* embedder: async (text) => {
|
|
126
|
+
* const response = await openai.embeddings.create({
|
|
127
|
+
* model: 'text-embedding-3-small',
|
|
128
|
+
* input: text,
|
|
129
|
+
* });
|
|
130
|
+
* return response.data[0].embedding;
|
|
131
|
+
* },
|
|
132
|
+
* similarityThreshold: 0.92,
|
|
133
|
+
* maxCacheSize: 500,
|
|
134
|
+
* ttlMs: 3600000, // 1 hour
|
|
135
|
+
* });
|
|
136
|
+
*
|
|
137
|
+
* // Check cache before calling agent
|
|
138
|
+
* const result = await cache.lookup(userQuery);
|
|
139
|
+
* if (result.hit) {
|
|
140
|
+
* return result.entry!.response;
|
|
141
|
+
* }
|
|
142
|
+
*
|
|
143
|
+
* // Call agent and cache response
|
|
144
|
+
* const response = await runAgent(userQuery);
|
|
145
|
+
* await cache.store(userQuery, response);
|
|
146
|
+
* ```
|
|
147
|
+
*/
|
|
148
|
+
declare function createSemanticCache(config: SemanticCacheConfig): SemanticCache;
|
|
149
|
+
/** Input guardrail data for semantic cache */
|
|
150
|
+
interface SemanticCacheGuardrailData {
|
|
151
|
+
input: string;
|
|
152
|
+
agentName?: string;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Result of semantic cache guardrail.
|
|
156
|
+
*
|
|
157
|
+
* **Important semantics:**
|
|
158
|
+
* - `passed: false` + `cacheHit: true` = Short-circuit with cached response (not an error!)
|
|
159
|
+
* - `passed: true` + `cacheHit: false` = No cache hit, proceed with agent call
|
|
160
|
+
*
|
|
161
|
+
* The `passed: false` follows guardrail convention where "not passing" stops the flow,
|
|
162
|
+
* but in this case stopping is desirable (returning cached data is good).
|
|
163
|
+
*/
|
|
164
|
+
interface SemanticCacheGuardrailResult {
|
|
165
|
+
/**
|
|
166
|
+
* Whether to proceed with the agent call.
|
|
167
|
+
* `false` means short-circuit with cached response (this is good, not an error).
|
|
168
|
+
* `true` means no cache hit, proceed with agent.
|
|
169
|
+
*/
|
|
170
|
+
passed: boolean;
|
|
171
|
+
/** Indicates whether this was a cache hit */
|
|
172
|
+
cacheHit: boolean;
|
|
173
|
+
/** Reason for the result */
|
|
174
|
+
reason?: string;
|
|
175
|
+
/** The cached response (only present on cache hit) */
|
|
176
|
+
cachedResponse?: string;
|
|
177
|
+
/** Similarity score (0-1) of the cache hit */
|
|
178
|
+
similarity?: number;
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Create a semantic caching input guardrail.
|
|
182
|
+
*
|
|
183
|
+
* **How it works:**
|
|
184
|
+
* - On cache HIT: Returns `{ passed: false, cacheHit: true, cachedResponse: "..." }`
|
|
185
|
+
* The orchestrator should detect `cacheHit: true` and return the cached response.
|
|
186
|
+
* - On cache MISS: Returns `{ passed: true, cacheHit: false }`
|
|
187
|
+
* Proceed with normal agent execution.
|
|
188
|
+
*
|
|
189
|
+
* **Important:** `passed: false` with `cacheHit: true` is SUCCESS, not failure.
|
|
190
|
+
* The guardrail "short-circuits" the flow to return cached data efficiently.
|
|
191
|
+
*
|
|
192
|
+
* @example
|
|
193
|
+
* ```typescript
|
|
194
|
+
* const cacheGuardrail = createSemanticCacheGuardrail({
|
|
195
|
+
* cache: mySemanticCache,
|
|
196
|
+
* });
|
|
197
|
+
*
|
|
198
|
+
* const orchestrator = createAgentOrchestrator({
|
|
199
|
+
* guardrails: {
|
|
200
|
+
* input: [
|
|
201
|
+
* {
|
|
202
|
+
* name: 'semantic-cache',
|
|
203
|
+
* fn: cacheGuardrail,
|
|
204
|
+
* },
|
|
205
|
+
* ],
|
|
206
|
+
* },
|
|
207
|
+
* runner: run,
|
|
208
|
+
* });
|
|
209
|
+
*
|
|
210
|
+
* // In your orchestrator wrapper, check for cache hits:
|
|
211
|
+
* const guardrailResult = await cacheGuardrail({ input: userQuery });
|
|
212
|
+
* if (guardrailResult.cacheHit) {
|
|
213
|
+
* return guardrailResult.cachedResponse; // Fast path!
|
|
214
|
+
* }
|
|
215
|
+
* // Otherwise proceed with agent call...
|
|
216
|
+
* ```
|
|
217
|
+
*/
|
|
218
|
+
declare function createSemanticCacheGuardrail(config: {
|
|
219
|
+
cache: SemanticCache;
|
|
220
|
+
}): (data: SemanticCacheGuardrailData) => Promise<SemanticCacheGuardrailResult>;
|
|
221
|
+
/**
|
|
222
|
+
* Create a simple hash-based "embedder" for testing.
|
|
223
|
+
* NOT suitable for production - use a real embedding model.
|
|
224
|
+
*/
|
|
225
|
+
declare function createTestEmbedder(dimensions?: number): EmbedderFn;
|
|
226
|
+
/** Batched embedder instance with dispose capability */
|
|
227
|
+
interface BatchedEmbedder {
|
|
228
|
+
/** Embed a single text (batched internally) */
|
|
229
|
+
embed: EmbedderFn;
|
|
230
|
+
/** Flush any pending batch immediately */
|
|
231
|
+
flush(): Promise<void>;
|
|
232
|
+
/** Dispose of the embedder, clearing timers and rejecting pending requests */
|
|
233
|
+
dispose(): void;
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Create a batched embedder that groups multiple texts into single API calls.
|
|
237
|
+
*
|
|
238
|
+
* **BREAKING CHANGE:** Previously returned `EmbedderFn` directly. Now returns
|
|
239
|
+
* a `BatchedEmbedder` object with `embed`, `flush`, and `dispose` methods.
|
|
240
|
+
*
|
|
241
|
+
* To migrate: `const embed = createBatchedEmbedder(...)` becomes
|
|
242
|
+
* `const { embed } = createBatchedEmbedder(...)`.
|
|
243
|
+
*
|
|
244
|
+
* @example
|
|
245
|
+
* ```typescript
|
|
246
|
+
* const batchedEmbedder = createBatchedEmbedder({
|
|
247
|
+
* batchSize: 20,
|
|
248
|
+
* embedBatch: async (texts) => {
|
|
249
|
+
* const response = await openai.embeddings.create({
|
|
250
|
+
* model: 'text-embedding-3-small',
|
|
251
|
+
* input: texts,
|
|
252
|
+
* });
|
|
253
|
+
* return response.data.map(d => d.embedding);
|
|
254
|
+
* },
|
|
255
|
+
* maxWaitMs: 50,
|
|
256
|
+
* });
|
|
257
|
+
*
|
|
258
|
+
* // Use the embedder
|
|
259
|
+
* const embedding = await batchedEmbedder.embed("Hello world");
|
|
260
|
+
*
|
|
261
|
+
* // Clean up when done
|
|
262
|
+
* batchedEmbedder.dispose();
|
|
263
|
+
* ```
|
|
264
|
+
*/
|
|
265
|
+
declare function createBatchedEmbedder(config: {
|
|
266
|
+
batchSize?: number;
|
|
267
|
+
embedBatch: (texts: string[]) => Promise<Embedding[]>;
|
|
268
|
+
maxWaitMs?: number;
|
|
269
|
+
}): BatchedEmbedder;
|
|
270
|
+
|
|
271
|
+
export { type BatchedEmbedder as B, type CacheEntry as C, type Embedding as E, type SemanticCache as S, type EmbedderFn as a, type CacheLookupResult as b, type CacheStats as c, type SemanticCacheConfig as d, type SemanticCacheStorage as e, createBatchedEmbedder as f, createInMemoryStorage as g, createSemanticCache as h, createSemanticCacheGuardrail as i, createTestEmbedder as j };
|