@cartisien/engram 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +234 -151
  2. package/dist/benchmarks/ab-eval/harness.d.ts +31 -0
  3. package/dist/benchmarks/ab-eval/harness.d.ts.map +1 -0
  4. package/dist/benchmarks/ab-eval/harness.js +198 -0
  5. package/dist/benchmarks/ab-eval/harness.js.map +1 -0
  6. package/dist/benchmarks/ab-eval/questions.d.ts +14 -0
  7. package/dist/benchmarks/ab-eval/questions.d.ts.map +1 -0
  8. package/dist/benchmarks/ab-eval/questions.js +139 -0
  9. package/dist/benchmarks/ab-eval/questions.js.map +1 -0
  10. package/dist/benchmarks/ab-eval/report.d.ts +8 -0
  11. package/dist/benchmarks/ab-eval/report.d.ts.map +1 -0
  12. package/dist/benchmarks/ab-eval/report.js +53 -0
  13. package/dist/benchmarks/ab-eval/report.js.map +1 -0
  14. package/dist/benchmarks/ab-eval/run-smoke.d.ts +6 -0
  15. package/dist/benchmarks/ab-eval/run-smoke.d.ts.map +1 -0
  16. package/dist/benchmarks/ab-eval/run-smoke.js +21 -0
  17. package/dist/benchmarks/ab-eval/run-smoke.js.map +1 -0
  18. package/dist/compiler/extractor.d.ts +10 -0
  19. package/dist/compiler/extractor.d.ts.map +1 -0
  20. package/dist/compiler/extractor.js +110 -0
  21. package/dist/compiler/extractor.js.map +1 -0
  22. package/dist/compiler/schemas.d.ts +100 -0
  23. package/dist/compiler/schemas.d.ts.map +1 -0
  24. package/dist/compiler/schemas.js +46 -0
  25. package/dist/compiler/schemas.js.map +1 -0
  26. package/dist/compiler/validators/atomicity.d.ts +13 -0
  27. package/dist/compiler/validators/atomicity.d.ts.map +1 -0
  28. package/dist/compiler/validators/atomicity.js +33 -0
  29. package/dist/compiler/validators/atomicity.js.map +1 -0
  30. package/dist/compiler/validators/duplicate.d.ts +13 -0
  31. package/dist/compiler/validators/duplicate.d.ts.map +1 -0
  32. package/dist/compiler/validators/duplicate.js +32 -0
  33. package/dist/compiler/validators/duplicate.js.map +1 -0
  34. package/dist/compiler/validators/entity-reference.d.ts +13 -0
  35. package/dist/compiler/validators/entity-reference.d.ts.map +1 -0
  36. package/dist/compiler/validators/entity-reference.js +35 -0
  37. package/dist/compiler/validators/entity-reference.js.map +1 -0
  38. package/dist/compiler/validators/index.d.ts +20 -0
  39. package/dist/compiler/validators/index.d.ts.map +1 -0
  40. package/dist/compiler/validators/index.js +58 -0
  41. package/dist/compiler/validators/index.js.map +1 -0
  42. package/dist/compiler/validators/inference.d.ts +13 -0
  43. package/dist/compiler/validators/inference.d.ts.map +1 -0
  44. package/dist/compiler/validators/inference.js +27 -0
  45. package/dist/compiler/validators/inference.js.map +1 -0
  46. package/dist/compiler/validators/pronoun-ban.d.ts +11 -0
  47. package/dist/compiler/validators/pronoun-ban.d.ts.map +1 -0
  48. package/dist/compiler/validators/pronoun-ban.js +50 -0
  49. package/dist/compiler/validators/pronoun-ban.js.map +1 -0
  50. package/dist/compiler/validators/temporal.d.ts +13 -0
  51. package/dist/compiler/validators/temporal.d.ts.map +1 -0
  52. package/dist/compiler/validators/temporal.js +43 -0
  53. package/dist/compiler/validators/temporal.js.map +1 -0
  54. package/dist/compiler/validators/vagueness.d.ts +14 -0
  55. package/dist/compiler/validators/vagueness.d.ts.map +1 -0
  56. package/dist/compiler/validators/vagueness.js +49 -0
  57. package/dist/compiler/validators/vagueness.js.map +1 -0
  58. package/dist/index.d.ts +1 -0
  59. package/dist/index.d.ts.map +1 -1
  60. package/dist/index.js +8 -2
  61. package/dist/index.js.map +1 -1
  62. package/package.json +3 -2
package/README.md CHANGED
@@ -1,270 +1,353 @@
1
1
  # Engram
2
2
 
3
- > **Persistent semantic memory for AI agents.**
3
+ > Persistent semantic memory for AI agents — local-first, zero cloud, zero config.
4
4
 
5
- ![Engram demo](assets/demo.gif)
5
+ [![npm](https://img.shields.io/npm/v/@cartisien/engram)](https://www.npmjs.com/package/@cartisien/engram)
6
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.18988892.svg)](https://doi.org/10.5281/zenodo.18988892)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
6
8
 
7
- ```typescript
8
- import { Engram } from '@cartisien/engram';
9
+ ---
9
10
 
10
- const memory = new Engram({ dbPath: './memory.db' });
11
+ ## Quickstart 30 seconds
11
12
 
12
- // Store
13
- await memory.remember('user_123', 'User prefers TypeScript and dark mode', 'user');
13
+ ### Claude Desktop / Cursor (MCP)
14
14
 
15
- // Recall semantically — finds the right memory even without exact keyword match
16
- const context = await memory.recall('user_123', 'what are the user\'s preferences?', 5);
17
- // [{ content: 'User prefers TypeScript and dark mode', similarity: 0.82, ... }]
15
+ ```json
16
+ {
17
+ "mcpServers": {
18
+ "engram": {
19
+ "command": "npx",
20
+ "args": ["-y", "@cartisien/engram-mcp"]
21
+ }
22
+ }
23
+ }
18
24
  ```
19
25
 
26
+ That's it. Engram gives Claude persistent memory across conversations — stored locally in a SQLite file, no API key required.
27
+
28
+ → Full MCP docs: [`@cartisien/engram-mcp`](https://github.com/Cartisien/engram-mcp)
29
+
20
30
  ---
21
31
 
22
- ## The Problem
32
+ ### TypeScript / Node.js SDK
33
+
34
+ ```bash
35
+ npm install @cartisien/engram
36
+ ```
23
37
 
24
- AI assistants are amnesiacs. Every conversation starts fresh. Context windows fill up. Important details get lost.
38
+ ```typescript
39
+ import { Engram } from '@cartisien/engram';
25
40
 
26
- Stuffing everything into the system prompt wastes tokens and still misses things. You need a retrieval layer not a dump.
41
+ const memory = new Engram(); // zero configsaves to ./engram.db
27
42
 
28
- ## The Solution
43
+ await memory.remember('user_123', 'Prefers TypeScript and dark mode');
44
+ const context = await memory.recall('user_123', 'what does this user prefer?');
45
+ // → [{ content: 'Prefers TypeScript and dark mode', similarity: 0.91 }]
46
+ ```
29
47
 
30
- Engram gives your agents **persistent, semantically searchable memory** SQLite-backed, TypeScript-first, zero config.
48
+ No Ollama? It falls back to keyword search automatically and tells you:
49
+ ```
50
+ [engram] Ollama not found — falling back to keyword search.
51
+ For semantic search: install Ollama and run: ollama pull nomic-embed-text
52
+ ```
31
53
 
32
- - **Semantic search:** Finds relevant memories by meaning, not just keywords (via local Ollama embeddings)
33
- - **Graph memory:** Extracts entity-relationship triples — recall connected context automatically
34
- - **Consolidation:** Summarizes old memories into long-term entries so context stays dense, not noisy
35
- - **Zero config:** Works out of the box, falls back to keyword search without Ollama
36
- - **Local-first:** Your data stays on your machine. No API keys, no cloud required
37
- - **MCP-native:** Drop into Claude Desktop or Cursor via [`@cartisien/engram-mcp`](https://github.com/Cartisien/engram-mcp)
38
- - **Typed:** Full TypeScript support
54
+ ---
39
55
 
40
- ## Installation
56
+ ### Python SDK
41
57
 
42
58
  ```bash
43
- npm install @cartisien/engram
59
+ pip install cartisien-engram
44
60
  ```
45
61
 
46
- ### Optional: Local Embeddings (Recommended)
62
+ ```python
63
+ from cartisien_engram import Engram
47
64
 
48
- For semantic search, install [Ollama](https://ollama.ai) and pull the embedding model:
65
+ memory = Engram() # saves to ./engram.db
49
66
 
50
- ```bash
51
- ollama pull nomic-embed-text
67
+ memory.remember("user_123", "Prefers dark mode and async Python")
68
+ context = memory.recall("user_123", "user preferences")
52
69
  ```
53
70
 
54
- Without Ollama, Engram falls back to keyword search automatically.
71
+ ---
72
+
73
+ ## Drop into your agent
55
74
 
56
- ## Quick Start
75
+ Paste this into any LLM chat handler:
57
76
 
58
77
  ```typescript
59
78
  import { Engram } from '@cartisien/engram';
60
79
 
61
- const memory = new Engram({
62
- dbPath: './bot-memory.db',
63
- embeddingUrl: 'http://localhost:11434', // Ollama default
64
- });
65
-
66
- // In your agent/chat handler
67
- async function handleMessage(sessionId: string, message: string) {
68
- // 1. Recall relevant context semantically
69
- const context = await memory.recall(sessionId, message, 5);
80
+ const memory = new Engram();
70
81
 
71
- // 2. Build prompt with memory
72
- const prompt = buildPrompt(context, message);
82
+ async function chat(sessionId: string, userMessage: string, llm: any) {
83
+ // 1. Pull relevant context before calling LLM
84
+ const context = await memory.recall(sessionId, userMessage, 5);
85
+ const contextStr = context.map(m => m.content).join('\n');
73
86
 
74
- // 3. Get AI response
75
- const response = await llm.chat(prompt);
87
+ // 2. Call your LLM with memory in the system prompt
88
+ const response = await llm.chat({
89
+ system: `Relevant context from memory:\n${contextStr}`,
90
+ user: userMessage,
91
+ });
76
92
 
77
- // 4. Store both sides
78
- await memory.remember(sessionId, message, 'user');
93
+ // 3. Store both sides
94
+ await memory.remember(sessionId, userMessage, 'user');
79
95
  await memory.remember(sessionId, response, 'assistant');
80
96
 
81
97
  return response;
82
98
  }
83
99
  ```
84
100
 
101
+ **Vercel AI SDK:**
102
+ ```typescript
103
+ import { Engram } from '@cartisien/engram';
104
+ import { streamText } from 'ai';
105
+ import { openai } from '@ai-sdk/openai';
106
+
107
+ const memory = new Engram();
108
+
109
+ export async function POST(req: Request) {
110
+ const { messages, sessionId } = await req.json();
111
+ const lastMessage = messages.at(-1)?.content ?? '';
112
+
113
+ const context = await memory.recall(sessionId, lastMessage, 5);
114
+ const contextStr = context.map(m => m.content).join('\n');
115
+
116
+ // Store user message
117
+ await memory.remember(sessionId, lastMessage, 'user');
118
+
119
+ const result = streamText({
120
+ model: openai('gpt-4o'),
121
+ system: context.length ? `Memory:\n${contextStr}` : undefined,
122
+ messages,
123
+ onFinish: async ({ text }) => {
124
+ await memory.remember(sessionId, text, 'assistant');
125
+ },
126
+ });
127
+
128
+ return result.toDataStreamResponse();
129
+ }
130
+ ```
131
+
132
+ ---
133
+
134
+ ## Why Engram
135
+
136
+ AI assistants forget everything between conversations. Most memory solutions either:
137
+ - Require cloud accounts and send your data to their servers
138
+ - Store raw chunks that go stale and contradict each other
139
+
140
+ Engram stores memories as **evolving beliefs** — locally, in SQLite, with no cloud dependency.
141
+
142
+ | | Engram | Mem0 | Zep |
143
+ |---|---|---|---|
144
+ | Local-first | ✅ | ⚠️ self-host option | ⚠️ self-host option |
145
+ | Zero API key | ✅ | ❌ | ❌ |
146
+ | Zero config | ✅ | ❌ | ❌ |
147
+ | TypeScript-first | ✅ | ❌ Python-first | ❌ Python-first |
148
+ | MCP native | ✅ | ❌ | ❌ |
149
+ | Belief revision | ✅ | ⚠️ | ❌ |
150
+ | Open source | ✅ MIT | ✅ | ✅ |
151
+
152
+ ---
153
+
154
+ ## Semantic search setup (optional)
155
+
156
+ Engram uses Ollama for local embeddings. Without it, keyword search works automatically.
157
+
158
+ ```bash
159
+ # Install Ollama: https://ollama.ai
160
+ ollama pull nomic-embed-text
161
+ ```
162
+
163
+ That's the only setup step. Engram detects it automatically.
164
+
165
+ ---
166
+
85
167
  ## API
86
168
 
87
- ### `new Engram(config?)`
169
+ ### Core
88
170
 
89
171
  ```typescript
90
- const memory = new Engram({
91
- dbPath: './memory.db', // SQLite file path (default: ':memory:')
92
- maxContextLength: 4000, // Max chars per entry (default: 4000)
93
- embeddingUrl: 'http://localhost:11434', // Ollama base URL
94
- embeddingModel: 'nomic-embed-text', // Embedding model
95
- semanticSearch: true, // Enable semantic search (default: true)
96
- });
172
+ const memory = new Engram(config?)
97
173
  ```
98
174
 
99
- ### `remember(sessionId, content, role?, metadata?)`
175
+ Config defaults (all optional):
176
+
177
+ | Option | Default | Description |
178
+ |--------|---------|-------------|
179
+ | `dbPath` | `./engram.db` | SQLite file path |
180
+ | `embeddingUrl` | `$OLLAMA_URL` or `http://localhost:11434` | Ollama base URL |
181
+ | `embeddingModel` | `nomic-embed-text` | Embedding model |
182
+ | `semanticSearch` | `true` | Enable semantic search |
183
+ | `graphMemory` | `false` | Entity relationship extraction |
184
+ | `autoConsolidate` | `false` | Auto-summarize old memories |
100
185
 
101
- Store a memory. Embedding is generated automatically.
186
+ ### `remember(sessionId, content, role?, metadata?)`
187
+ Store a memory. Embedding generated automatically.
102
188
 
103
189
  ```typescript
104
- await memory.remember('session_abc', 'User loves Thai food', 'user');
190
+ await memory.remember('session_1', 'User is vegetarian', 'user');
105
191
  ```
106
192
 
107
193
  ### `recall(sessionId, query?, limit?, options?)`
108
-
109
- Retrieve relevant memories. Uses semantic search when available, keyword fallback otherwise. Returns entries sorted by similarity score.
194
+ Retrieve relevant memories. Semantic + keyword + recency, merged via RRF.
110
195
 
111
196
  ```typescript
112
- const results = await memory.recall('session_abc', 'food preferences', 5);
113
- // [{ content: '...', similarity: 0.84, ... }]
197
+ const results = await memory.recall('session_1', 'dietary preferences', 5);
198
+ // [{ content: 'User is vegetarian', similarity: 0.91, certainty: 0.5, ... }]
114
199
  ```
115
200
 
116
201
  ### `history(sessionId, limit?)`
117
-
118
202
  Chronological conversation history.
119
203
 
120
204
  ```typescript
121
- const chat = await memory.history('session_abc', 20);
205
+ const chat = await memory.history('session_1', 20);
122
206
  ```
123
207
 
124
208
  ### `forget(sessionId, options?)`
125
-
126
209
  Delete memories.
127
210
 
128
211
  ```typescript
129
- await memory.forget('session_abc'); // all
130
- await memory.forget('session_abc', { id: 'entry_id' }); // one
131
- await memory.forget('session_abc', { before: new Date() }); // old entries
212
+ await memory.forget('session_1'); // all
213
+ await memory.forget('session_1', { id: 'entry_id' }); // one
214
+ await memory.forget('session_1', { before: new Date() }); // old
132
215
  ```
133
216
 
134
- ### `graph(sessionId, entity)`
217
+ ### `stats(sessionId)`
218
+ Memory counts by tier, role, embeddings.
135
219
 
136
- Returns a one-hop relationship map for a named entity — all connected entities and the memories that link them.
220
+ ```typescript
221
+ const s = await memory.stats('session_1');
222
+ // { total: 42, byTier: { working: 30, long_term: 12 }, withEmbeddings: 42 }
223
+ ```
137
224
 
138
- Requires `graphMemory: true` in config and a running Ollama instance with `qwen2.5:32b` (or override via `graphModel`).
225
+ ---
226
+
227
+ ## Belief revision
228
+
229
+ Every memory has a `certainty` score (0–1). Stale or contradicted memories fade naturally.
139
230
 
140
231
  ```typescript
141
- const memory = new Engram({
142
- dbPath: './memory.db',
143
- graphMemory: true,
144
- graphModel: 'qwen2.5:32b', // default
145
- });
232
+ // Confirm a memory
233
+ await memory.reinforce(entryId); // certainty += 0.15
146
234
 
147
- const graph = await memory.graph('session_abc', 'GovScout');
148
- // {
149
- // entity: 'GovScout',
150
- // edges: [
151
- // { relation: 'uses', target: 'MUI', sourceMemoryId: '...' },
152
- // { relation: 'built_by', target: 'Jeff', sourceMemoryId: '...' },
153
- // ],
154
- // memories: [ { content: '...', ... } ]
155
- // }
156
- ```
235
+ // Flag a contradiction — old memory marked contradicted, new one stored
236
+ await memory.contradict('session_1', oldId, 'User switched to dark mode');
157
237
 
158
- ### `recall()` with graph augmentation
238
+ // Remove a memory from recall
239
+ await memory.invalidate(entryId);
159
240
 
160
- ```typescript
161
- const results = await memory.recall('session_abc', 'what is GovScout?', 5, {
162
- includeGraph: true, // augment top results with graph-connected memories
163
- });
241
+ // Detect contradictions before storing
242
+ const result = await memory.detectContradictions('session_1', newContent);
243
+ if (result.detected) { /* handle */ }
164
244
  ```
165
245
 
166
- ### `consolidate(sessionId, options?)` *(v0.4)*
246
+ ---
167
247
 
168
- Summarizes old working memories into dense long-term entries via a local LLM. Originals are archived (hidden from recall but not deleted).
248
+ ## Consolidation
249
+
250
+ Summarize old working memories into dense long-term entries via local LLM.
169
251
 
170
252
  ```typescript
171
253
  const memory = new Engram({
172
- dbPath: './memory.db',
173
- autoConsolidate: true, // auto-trigger on remember() (default: false)
174
- consolidateThreshold: 100, // trigger when working memories exceed this (default: 100)
175
- consolidateKeep: 20, // keep N most recent working memories untouched (default: 20)
176
- consolidateBatch: 50, // memories to process per run (default: 50)
177
- consolidateModel: 'qwen2.5:32b', // LLM for summarization
254
+ autoConsolidate: true,
255
+ consolidateThreshold: 100, // trigger when working memories exceed this
256
+ consolidateModel: 'qwen2.5:32b',
178
257
  });
179
258
 
180
- // Manual consolidation
181
- const result = await memory.consolidate('session_abc');
259
+ // Or manually
260
+ const result = await memory.consolidate('session_1');
182
261
  // → { summarized: 50, created: 4, archived: 50 }
183
262
 
184
263
  // Preview without writing
185
- const preview = await memory.consolidate('session_abc', { dryRun: true });
186
- // → { summarized: 50, created: 0, archived: 0, previews: ['User prefers TypeScript...', ...] }
264
+ const preview = await memory.consolidate('session_1', { dryRun: true });
187
265
  ```
188
266
 
189
- **Memory tiers:**
190
- - `working` — recent, granular memories (default)
191
- - `long_term` — LLM-generated summaries of consolidated batches
192
- - `archived` — original memories after consolidation (excluded from recall)
267
+ ---
268
+
269
+ ## Graph memory
193
270
 
194
- `recall()` searches `working` and `long_term` by default. Pass `tiers` to override:
271
+ Entity-relationship extraction for connected context.
195
272
 
196
273
  ```typescript
197
- // Search all tiers including archived
198
- const results = await memory.recall('session_abc', 'preferences', 10, {
199
- tiers: ['working', 'long_term', 'archived'],
274
+ const memory = new Engram({ graphMemory: true });
275
+
276
+ // After remembering "Jeff is building GovScout with MUI and React"
277
+ const graph = await memory.graph('session_1', 'GovScout');
278
+ // {
279
+ // entity: 'govscout',
280
+ // relationships: [
281
+ // { type: 'outgoing', relation: 'uses', target: 'mui' },
282
+ // { type: 'outgoing', relation: 'built_by', target: 'jeff' },
283
+ // ]
284
+ // }
285
+
286
+ // Auto-augment recall with graph-connected memories
287
+ const results = await memory.recall('session_1', 'what is GovScout?', 5, {
288
+ includeGraph: true,
200
289
  });
201
290
  ```
202
291
 
203
- ### `stats(sessionId)`
292
+ ---
293
+
294
+ ## reflect()
295
+
296
+ Synthesize insights across memories — actual reasoning, not just retrieval.
204
297
 
205
298
  ```typescript
206
- const stats = await memory.stats('session_abc');
207
- // {
208
- // total: 42,
209
- // byRole: { user: 21, assistant: 21 },
210
- // byTier: { working: 30, long_term: 12, archived: 50 },
211
- // withEmbeddings: 42,
212
- // graphNodes: 18,
213
- // graphEdges: 31
299
+ const result = await memory.reflect('session_1', 'What does this user care most about?');
300
+ // {
301
+ // insights: [
302
+ // 'User strongly prefers TypeScript over JavaScript',
303
+ // 'Has a recurring deadline sensitivity around Fridays',
304
+ // ],
305
+ // memoriesUsed: [...],
214
306
  // }
215
307
  ```
216
308
 
217
- ## MCP Server
309
+ ---
218
310
 
219
- Use Engram directly in Claude Desktop, Cursor, or any MCP client:
311
+ ## User-scoped memory
220
312
 
221
- ```bash
222
- npx -y @cartisien/engram-mcp
223
- ```
313
+ Persist facts about a user across all sessions.
224
314
 
225
- ```json
226
- {
227
- "mcpServers": {
228
- "engram": {
229
- "command": "npx",
230
- "args": ["-y", "@cartisien/engram-mcp"]
231
- }
232
- }
233
- }
234
- ```
315
+ ```typescript
316
+ await memory.rememberUser('user_jeff', 'Prefers TypeScript');
317
+ await memory.rememberUser('user_jeff', 'Timezone: America/New_York');
235
318
 
236
- [`@cartisien/engram-mcp`](https://github.com/Cartisien/engram-mcp) on GitHub
319
+ // Blend into any session recall
320
+ const results = await memory.recall('any_session', 'preferences', 10, {
321
+ userId: 'user_jeff',
322
+ });
323
+ ```
237
324
 
238
- ## Philosophy
325
+ ---
239
326
 
240
- > *"The trace precedes presence."*
327
+ ## Remote client
241
328
 
242
- Memory isn't storage. It's the substrate of self.
329
+ Connect to a self-hosted Engram server.
243
330
 
244
- Engram doesn't just persist data — it gives your agents **continuity**. The ability to learn, reference, and grow across conversations.
331
+ ```typescript
332
+ import { EngramClient } from '@cartisien/engram';
245
333
 
246
- ## Roadmap
334
+ const memory = new EngramClient({ baseUrl: 'http://your-server:3470' });
335
+ // Same API as Engram
336
+ ```
247
337
 
248
- - **v0.1** ✅ SQLite persistence, keyword search
249
- - **v0.2** ✅ Semantic search via local Ollama embeddings
250
- - **v0.3** ✅ Graph memory — entity relationships, connected context
251
- - **v0.4** ✅ Memory consolidation, long-term summarization
338
+ ---
252
339
 
253
340
  ## The Cartisien Memory Suite
254
341
 
255
342
  | Package | Purpose |
256
343
  |---------|---------|
257
- | [`@cartisien/engram`](https://github.com/Cartisien/engram) | Persistent memory SDK — **this package** |
344
+ | [`@cartisien/engram`](https://github.com/Cartisien/engram) | Memory SDK — **this package** |
258
345
  | [`@cartisien/engram-mcp`](https://github.com/Cartisien/engram-mcp) | MCP server for Claude Desktop / Cursor |
259
- | `@cartisien/extensa` | Vector infrastructure *(coming soon)* |
260
- | `@cartisien/cogito` | Agent identity & lifecycle *(coming soon)* |
346
+ | `@cartisien/extensa` | Vector infrastructure |
347
+ | `@cartisien/cogito` | Agent identity & lifecycle |
261
348
 
262
- *Res cogitans meets res extensa.*
349
+ ---
263
350
 
264
351
  ## License
265
352
 
266
353
  MIT © [Cartisien Interactive](https://cartisien.com)
267
-
268
- ---
269
-
270
- **Built for people who think forgetting is a bug.**
@@ -0,0 +1,31 @@
1
+ /**
2
+ * A/B Evaluation Harness
3
+ *
4
+ * Compares two retrieval modes:
5
+ * Mode A: Raw chunk retrieval — embed query, cosine search against raw conversation text
6
+ * Mode B: Claim retrieval — extractClaims + runAllValidators, embed canonical_text, cosine search
7
+ *
8
+ * For each question, both modes retrieve context, then an LLM answers from that context.
9
+ * Scoring uses exact substring match + LLM-based semantic match as fallback.
10
+ */
11
+ import type { ConversationTurn } from '../../compiler/schemas.js';
12
+ import { type EvalQuestion } from './questions.js';
13
+ export interface PerQuestionResult {
14
+ questionId: number;
15
+ question: string;
16
+ expected_answer: string;
17
+ mode_a_answer: string;
18
+ mode_b_answer: string;
19
+ mode_a_correct: boolean;
20
+ mode_b_correct: boolean;
21
+ mode_a_context: string;
22
+ mode_b_context: string;
23
+ }
24
+ export interface ABTestReport {
25
+ mode_a_score: number;
26
+ mode_b_score: number;
27
+ total_questions: number;
28
+ per_question_results: PerQuestionResult[];
29
+ }
30
+ export declare function runABTest(apiKey: string, corpus: ConversationTurn[][], questions?: EvalQuestion[]): Promise<ABTestReport>;
31
+ //# sourceMappingURL=harness.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"harness.d.ts","sourceRoot":"","sources":["../../../src/benchmarks/ab-eval/harness.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAA;AAKjE,OAAO,EAAkB,KAAK,YAAY,EAAE,MAAM,gBAAgB,CAAA;AAMlE,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAA;IAClB,QAAQ,EAAE,MAAM,CAAA;IAChB,eAAe,EAAE,MAAM,CAAA;IACvB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,cAAc,EAAE,OAAO,CAAA;IACvB,cAAc,EAAE,OAAO,CAAA;IACvB,cAAc,EAAE,MAAM,CAAA;IACtB,cAAc,EAAE,MAAM,CAAA;CACvB;AAED,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,MAAM,CAAA;IACpB,YAAY,EAAE,MAAM,CAAA;IACpB,eAAe,EAAE,MAAM,CAAA;IACvB,oBAAoB,EAAE,iBAAiB,EAAE,CAAA;CAC1C;AA8LD,wBAAsB,SAAS,CAC7B,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,gBAAgB,EAAE,EAAE,EAC5B,SAAS,CAAC,EAAE,YAAY,EAAE,GACzB,OAAO,CAAC,YAAY,CAAC,CAiEvB"}