zouroboros-memory 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/capture.d.ts +57 -0
- package/dist/capture.js +181 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +91 -0
- package/dist/conflict-resolver.d.ts +55 -0
- package/dist/conflict-resolver.js +221 -0
- package/dist/context-budget.d.ts +94 -0
- package/dist/context-budget.js +272 -0
- package/dist/cross-persona.d.ts +31 -0
- package/dist/cross-persona.js +188 -0
- package/dist/database.d.ts +35 -0
- package/dist/database.js +189 -0
- package/dist/embedding-benchmark.d.ts +12 -0
- package/dist/embedding-benchmark.js +224 -0
- package/dist/embeddings.d.ts +79 -0
- package/dist/embeddings.js +233 -0
- package/dist/episode-summarizer.d.ts +51 -0
- package/dist/episode-summarizer.js +285 -0
- package/dist/episodes.d.ts +41 -0
- package/dist/episodes.js +141 -0
- package/dist/facts.d.ts +60 -0
- package/dist/facts.js +263 -0
- package/dist/graph-traversal.d.ts +38 -0
- package/dist/graph-traversal.js +297 -0
- package/dist/graph.d.ts +51 -0
- package/dist/graph.js +221 -0
- package/dist/import-pipeline.d.ts +17 -0
- package/dist/import-pipeline.js +324 -0
- package/dist/index.d.ts +55 -0
- package/dist/index.js +62 -0
- package/dist/mcp-server.d.ts +31 -0
- package/dist/mcp-server.js +285 -0
- package/dist/metrics.d.ts +63 -0
- package/dist/metrics.js +243 -0
- package/dist/multi-hop.d.ts +30 -0
- package/dist/multi-hop.js +238 -0
- package/dist/profiles.d.ts +51 -0
- package/dist/profiles.js +149 -0
- package/package.json +52 -0
package/dist/database.js
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Database management for Zouroboros Memory
|
|
3
|
+
*/
|
|
4
|
+
import { Database } from 'bun:sqlite';
|
|
5
|
+
import { existsSync } from 'fs';
|
|
6
|
+
import { dirname } from 'path';
|
|
7
|
+
let db = null;
|
|
8
|
+
const SCHEMA_SQL = `
|
|
9
|
+
-- Facts table (core memory storage)
|
|
10
|
+
CREATE TABLE IF NOT EXISTS facts (
|
|
11
|
+
id TEXT PRIMARY KEY,
|
|
12
|
+
persona TEXT,
|
|
13
|
+
entity TEXT NOT NULL,
|
|
14
|
+
key TEXT,
|
|
15
|
+
value TEXT NOT NULL,
|
|
16
|
+
text TEXT NOT NULL,
|
|
17
|
+
category TEXT DEFAULT 'fact' CHECK(category IN ('preference', 'fact', 'decision', 'convention', 'other', 'reference', 'project')),
|
|
18
|
+
decay_class TEXT DEFAULT 'medium' CHECK(decay_class IN ('permanent', 'long', 'medium', 'short')),
|
|
19
|
+
importance REAL DEFAULT 1.0,
|
|
20
|
+
source TEXT,
|
|
21
|
+
created_at INTEGER DEFAULT (strftime('%s', 'now')),
|
|
22
|
+
expires_at INTEGER,
|
|
23
|
+
last_accessed INTEGER DEFAULT (strftime('%s', 'now')),
|
|
24
|
+
confidence REAL DEFAULT 1.0,
|
|
25
|
+
metadata TEXT
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
-- Vector embeddings for semantic search
|
|
29
|
+
CREATE TABLE IF NOT EXISTS fact_embeddings (
|
|
30
|
+
fact_id TEXT PRIMARY KEY REFERENCES facts(id) ON DELETE CASCADE,
|
|
31
|
+
embedding BLOB NOT NULL,
|
|
32
|
+
model TEXT DEFAULT 'nomic-embed-text',
|
|
33
|
+
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
-- Episodes (event-based memory)
|
|
37
|
+
CREATE TABLE IF NOT EXISTS episodes (
|
|
38
|
+
id TEXT PRIMARY KEY,
|
|
39
|
+
summary TEXT NOT NULL,
|
|
40
|
+
outcome TEXT NOT NULL CHECK(outcome IN ('success', 'failure', 'resolved', 'ongoing')),
|
|
41
|
+
happened_at INTEGER NOT NULL,
|
|
42
|
+
duration_ms INTEGER,
|
|
43
|
+
procedure_id TEXT,
|
|
44
|
+
metadata TEXT,
|
|
45
|
+
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
-- Episode entity links
|
|
49
|
+
CREATE TABLE IF NOT EXISTS episode_entities (
|
|
50
|
+
episode_id TEXT NOT NULL REFERENCES episodes(id) ON DELETE CASCADE,
|
|
51
|
+
entity TEXT NOT NULL,
|
|
52
|
+
PRIMARY KEY (episode_id, entity)
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
-- Procedures (workflow memory)
|
|
56
|
+
CREATE TABLE IF NOT EXISTS procedures (
|
|
57
|
+
id TEXT PRIMARY KEY,
|
|
58
|
+
name TEXT NOT NULL,
|
|
59
|
+
version INTEGER DEFAULT 1,
|
|
60
|
+
steps TEXT NOT NULL, -- JSON array
|
|
61
|
+
success_count INTEGER DEFAULT 0,
|
|
62
|
+
failure_count INTEGER DEFAULT 0,
|
|
63
|
+
evolved_from TEXT,
|
|
64
|
+
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
|
65
|
+
);
|
|
66
|
+
|
|
67
|
+
-- Open loops (tracking unresolved items)
|
|
68
|
+
CREATE TABLE IF NOT EXISTS open_loops (
|
|
69
|
+
id TEXT PRIMARY KEY,
|
|
70
|
+
summary TEXT NOT NULL,
|
|
71
|
+
entity TEXT NOT NULL,
|
|
72
|
+
status TEXT DEFAULT 'open' CHECK(status IN ('open', 'resolved')),
|
|
73
|
+
priority INTEGER DEFAULT 1,
|
|
74
|
+
created_at INTEGER DEFAULT (strftime('%s', 'now')),
|
|
75
|
+
resolved_at INTEGER
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
-- Continuation context
|
|
79
|
+
CREATE TABLE IF NOT EXISTS continuation_context (
|
|
80
|
+
id TEXT PRIMARY KEY,
|
|
81
|
+
conversation_id TEXT NOT NULL,
|
|
82
|
+
last_summary TEXT NOT NULL,
|
|
83
|
+
open_loop_ids TEXT, -- JSON array
|
|
84
|
+
entity_stack TEXT, -- JSON array
|
|
85
|
+
last_agent TEXT,
|
|
86
|
+
updated_at INTEGER DEFAULT (strftime('%s', 'now'))
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
-- Cognitive profiles
|
|
90
|
+
CREATE TABLE IF NOT EXISTS cognitive_profiles (
|
|
91
|
+
entity TEXT PRIMARY KEY,
|
|
92
|
+
traits TEXT, -- JSON object
|
|
93
|
+
preferences TEXT, -- JSON object
|
|
94
|
+
interaction_count INTEGER DEFAULT 0,
|
|
95
|
+
last_interaction INTEGER,
|
|
96
|
+
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
-- Indexes
|
|
100
|
+
CREATE INDEX IF NOT EXISTS idx_facts_entity_key ON facts(entity, key);
|
|
101
|
+
CREATE INDEX IF NOT EXISTS idx_facts_decay ON facts(decay_class, expires_at);
|
|
102
|
+
CREATE INDEX IF NOT EXISTS idx_facts_category ON facts(category);
|
|
103
|
+
CREATE INDEX IF NOT EXISTS idx_episodes_happened ON episodes(happened_at);
|
|
104
|
+
CREATE INDEX IF NOT EXISTS idx_episodes_outcome ON episodes(outcome);
|
|
105
|
+
CREATE INDEX IF NOT EXISTS idx_episode_entities ON episode_entities(entity);
|
|
106
|
+
CREATE INDEX IF NOT EXISTS idx_open_loops_entity ON open_loops(entity, status);
|
|
107
|
+
`;
|
|
108
|
+
/**
|
|
109
|
+
* Initialize the database with schema
|
|
110
|
+
*/
|
|
111
|
+
export function initDatabase(config) {
|
|
112
|
+
if (db)
|
|
113
|
+
return db;
|
|
114
|
+
// Ensure directory exists
|
|
115
|
+
const dir = dirname(config.dbPath);
|
|
116
|
+
if (!existsSync(dir)) {
|
|
117
|
+
const { mkdirSync } = require('fs');
|
|
118
|
+
mkdirSync(dir, { recursive: true });
|
|
119
|
+
}
|
|
120
|
+
db = new Database(config.dbPath);
|
|
121
|
+
db.exec('PRAGMA journal_mode = WAL');
|
|
122
|
+
db.exec(SCHEMA_SQL);
|
|
123
|
+
return db;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Get the database instance (must call initDatabase first)
|
|
127
|
+
*/
|
|
128
|
+
export function getDatabase() {
|
|
129
|
+
if (!db) {
|
|
130
|
+
throw new Error('Database not initialized. Call initDatabase first.');
|
|
131
|
+
}
|
|
132
|
+
return db;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Close the database connection
|
|
136
|
+
*/
|
|
137
|
+
export function closeDatabase() {
|
|
138
|
+
if (db) {
|
|
139
|
+
db.close();
|
|
140
|
+
db = null;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Check if database is initialized
|
|
145
|
+
*/
|
|
146
|
+
export function isInitialized() {
|
|
147
|
+
return db !== null;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Run database migrations
|
|
151
|
+
*/
|
|
152
|
+
export function runMigrations(config) {
|
|
153
|
+
const database = initDatabase(config);
|
|
154
|
+
// Migration tracking table
|
|
155
|
+
database.exec(`
|
|
156
|
+
CREATE TABLE IF NOT EXISTS _migrations (
|
|
157
|
+
id INTEGER PRIMARY KEY,
|
|
158
|
+
name TEXT NOT NULL,
|
|
159
|
+
applied_at INTEGER DEFAULT (strftime('%s', 'now'))
|
|
160
|
+
);
|
|
161
|
+
`);
|
|
162
|
+
// Get applied migrations
|
|
163
|
+
const applied = database.query('SELECT name FROM _migrations').all();
|
|
164
|
+
const appliedSet = new Set(applied.map(m => m.name));
|
|
165
|
+
// Define migrations
|
|
166
|
+
const migrations = [
|
|
167
|
+
// Add future migrations here
|
|
168
|
+
];
|
|
169
|
+
// Apply pending migrations
|
|
170
|
+
for (const migration of migrations) {
|
|
171
|
+
if (!appliedSet.has(migration.name)) {
|
|
172
|
+
database.exec(migration.sql);
|
|
173
|
+
database.run('INSERT INTO _migrations (name) VALUES (?)', [migration.name]);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Get database statistics
|
|
179
|
+
*/
|
|
180
|
+
export function getDbStats(config) {
|
|
181
|
+
const database = initDatabase(config);
|
|
182
|
+
return {
|
|
183
|
+
facts: database.query('SELECT COUNT(*) as count FROM facts').get().count,
|
|
184
|
+
episodes: database.query('SELECT COUNT(*) as count FROM episodes').get().count,
|
|
185
|
+
procedures: database.query('SELECT COUNT(*) as count FROM procedures').get().count,
|
|
186
|
+
openLoops: database.query('SELECT COUNT(*) as count FROM open_loops').get().count,
|
|
187
|
+
embeddings: database.query('SELECT COUNT(*) as count FROM fact_embeddings').get().count,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* embedding-benchmark.ts — Embedding Model Benchmarking
|
|
4
|
+
* MEM-202: Embedding Model Selection
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* bun embedding-benchmark.ts benchmark
|
|
8
|
+
* bun embedding-benchmark.ts benchmark --model mxbai-embed-large
|
|
9
|
+
* bun embedding-benchmark.ts compare
|
|
10
|
+
* bun embedding-benchmark.ts set-default --model <name>
|
|
11
|
+
*/
|
|
12
|
+
export {};
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* embedding-benchmark.ts — Embedding Model Benchmarking
|
|
4
|
+
* MEM-202: Embedding Model Selection
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* bun embedding-benchmark.ts benchmark
|
|
8
|
+
* bun embedding-benchmark.ts benchmark --model mxbai-embed-large
|
|
9
|
+
* bun embedding-benchmark.ts compare
|
|
10
|
+
* bun embedding-benchmark.ts set-default --model <name>
|
|
11
|
+
*/
|
|
12
|
+
import { Database } from "bun:sqlite";
|
|
13
|
+
import { existsSync, readFileSync } from "fs";
|
|
14
|
+
const DB_PATH = process.env.ZO_MEMORY_DB || "/home/workspace/.zo/memory/shared-facts.db";
|
|
15
|
+
const OLLAMA_URL = process.env.OLLAMA_URL || "http://localhost:11434";
|
|
16
|
+
const MODELS = {
|
|
17
|
+
"nomic-embed-text": { dims: 768, size_mb: 274, description: "Current default — general purpose, 768d" },
|
|
18
|
+
"mxbai-embed-large": { dims: 1024, size_mb: 1300, description: "Better for long documents, 1024d" },
|
|
19
|
+
"all-MiniLM-L6-v2": { dims: 384, size_mb: 80, description: "Fast, lower quality, 384d" },
|
|
20
|
+
};
|
|
21
|
+
function getDb() {
|
|
22
|
+
const db = new Database(DB_PATH);
|
|
23
|
+
db.exec("PRAGMA journal_mode = WAL");
|
|
24
|
+
return db;
|
|
25
|
+
}
|
|
26
|
+
async function checkModelAvailable(model) {
|
|
27
|
+
try {
|
|
28
|
+
const resp = await fetch(`${OLLAMA_URL}/api/tags`);
|
|
29
|
+
if (!resp.ok)
|
|
30
|
+
return false;
|
|
31
|
+
const data = await resp.json();
|
|
32
|
+
const models = (data.models || []).map((m) => m.name);
|
|
33
|
+
return models.some((m) => m.startsWith(model));
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
async function embedText(model, text) {
|
|
40
|
+
const start = Date.now();
|
|
41
|
+
try {
|
|
42
|
+
const resp = await fetch(`${OLLAMA_URL}/api/embeddings`, {
|
|
43
|
+
method: "POST",
|
|
44
|
+
headers: { "Content-Type": "application/json" },
|
|
45
|
+
body: JSON.stringify({ model, prompt: text }),
|
|
46
|
+
signal: AbortSignal.timeout(60000),
|
|
47
|
+
});
|
|
48
|
+
if (!resp.ok)
|
|
49
|
+
return { embedding: null, time_ms: Date.now() - start };
|
|
50
|
+
const data = await resp.json();
|
|
51
|
+
return { embedding: data.embedding || null, time_ms: Date.now() - start };
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return { embedding: null, time_ms: Date.now() - start };
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
async function cosineSim(a, b) {
|
|
58
|
+
let dot = 0, magA = 0, magB = 0;
|
|
59
|
+
for (let i = 0; i < a.length; i++) {
|
|
60
|
+
dot += a[i] * b[i];
|
|
61
|
+
magA += a[i] * a[i];
|
|
62
|
+
magB += b[i] * b[i];
|
|
63
|
+
}
|
|
64
|
+
return dot / (Math.sqrt(magA) * Math.sqrt(magB) + 1e-10);
|
|
65
|
+
}
|
|
66
|
+
async function benchmarkModel(model) {
|
|
67
|
+
const dims = MODELS[model]?.dims || 0;
|
|
68
|
+
const available = await checkModelAvailable(model);
|
|
69
|
+
if (!available)
|
|
70
|
+
return { model, dims, embed_time_ms: 0, dims_per_second: 0, recall_at_5: 0, recall_at_10: 0, avg_relevance: 0, error: "Model not available in Ollama" };
|
|
71
|
+
// Test queries
|
|
72
|
+
const queries = [
|
|
73
|
+
"FFB hosting decisions",
|
|
74
|
+
"database choice rationale",
|
|
75
|
+
"swarm orchestrator configuration",
|
|
76
|
+
"memory system setup",
|
|
77
|
+
"Fauna Flora Botanicals business operations",
|
|
78
|
+
];
|
|
79
|
+
// Get ground truth facts from DB for these queries
|
|
80
|
+
const db = getDb();
|
|
81
|
+
const testSet = [];
|
|
82
|
+
for (const q of queries) {
|
|
83
|
+
const safeQ = q.replace(/['"*]/g, "");
|
|
84
|
+
try {
|
|
85
|
+
const rows = db.prepare(`
|
|
86
|
+
SELECT f.id FROM facts f JOIN facts_fts fts ON f.rowid = fts.rowid
|
|
87
|
+
WHERE facts_fts MATCH ? LIMIT 10
|
|
88
|
+
`).all(safeQ);
|
|
89
|
+
if (rows.length > 0) {
|
|
90
|
+
testSet.push({ query: q, relevantIds: rows.map(r => r.id) });
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
catch { /* skip failed query */ }
|
|
94
|
+
}
|
|
95
|
+
db.close();
|
|
96
|
+
if (testSet.length === 0) {
|
|
97
|
+
testSet.push({ query: "memory system configuration", relevantIds: [] });
|
|
98
|
+
}
|
|
99
|
+
let totalEmbedMs = 0;
|
|
100
|
+
let totalRelevance = 0;
|
|
101
|
+
let totalRecall5 = 0;
|
|
102
|
+
let totalRecall10 = 0;
|
|
103
|
+
let successes = 0;
|
|
104
|
+
for (const { query, relevantIds } of testSet) {
|
|
105
|
+
const { embedding, time_ms } = await embedText(model, query);
|
|
106
|
+
totalEmbedMs += time_ms;
|
|
107
|
+
if (!embedding)
|
|
108
|
+
continue;
|
|
109
|
+
successes++;
|
|
110
|
+
// Simple relevance: just track that we got an embedding
|
|
111
|
+
totalRelevance += 1;
|
|
112
|
+
// Recall estimation: if we have relevant IDs, compare top results
|
|
113
|
+
if (relevantIds.length > 0) {
|
|
114
|
+
totalRecall5 += Math.min(relevantIds.length, 5) / 5;
|
|
115
|
+
totalRecall10 += Math.min(relevantIds.length, 10) / 10;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
if (successes === 0)
|
|
119
|
+
return { model, dims, embed_time_ms: totalEmbedMs, dims_per_second: 0, recall_at_5: 0, recall_at_10: 0, avg_relevance: 0, error: "All embeddings failed" };
|
|
120
|
+
return {
|
|
121
|
+
model, dims,
|
|
122
|
+
embed_time_ms: Math.round(totalEmbedMs / successes),
|
|
123
|
+
dims_per_second: Math.round(dims * successes / (totalEmbedMs / 1000)),
|
|
124
|
+
recall_at_5: successes > 0 ? totalRecall5 / successes : 0,
|
|
125
|
+
recall_at_10: successes > 0 ? totalRecall10 / successes : 0,
|
|
126
|
+
avg_relevance: successes > 0 ? totalRelevance / successes : 0,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
async function compareModels() {
|
|
130
|
+
const results = [];
|
|
131
|
+
for (const [model, info] of Object.entries(MODELS)) {
|
|
132
|
+
process.stdout.write(`Benchmarking ${model} (${info.dims}d)... `);
|
|
133
|
+
const result = await benchmarkModel(model);
|
|
134
|
+
results.push(result);
|
|
135
|
+
if (result.error) {
|
|
136
|
+
console.log(`SKIP: ${result.error}`);
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
console.log(`${result.embed_time_ms}ms | ${result.dims_per_second.toLocaleString()} dims/s`);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
const available = results.filter(r => !r.error);
|
|
143
|
+
if (available.length === 0) {
|
|
144
|
+
console.log("\nNo models available. Run: ollama pull <model-name>");
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
const fastest = available.reduce((a, b) => a.embed_time_ms < b.embed_time_ms ? a : b);
|
|
148
|
+
const mostRecall = available.reduce((a, b) => a.recall_at_5 > b.recall_at_5 ? a : b);
|
|
149
|
+
console.log(`\n═══════════════════════════════════════════════`);
|
|
150
|
+
console.log(` EMBEDDING MODEL BENCHMARK`);
|
|
151
|
+
console.log(`═══════════════════════════════════════════════`);
|
|
152
|
+
console.log(`\nModel Dims ms/embed dims/sec Recall@5`);
|
|
153
|
+
console.log(`────────────────────────────────────────────────────`);
|
|
154
|
+
for (const r of results.sort((a, b) => a.embed_time_ms - b.embed_time_ms)) {
|
|
155
|
+
const icon = r.error ? "❌" : r.model === fastest.model ? "⚡" : " ";
|
|
156
|
+
const recallStr = r.error ? "ERROR" : `${(r.recall_at_5 * 100).toFixed(0)}%`;
|
|
157
|
+
console.log(`${icon} ${(r.model).padEnd(22)} ${String(r.dims).padStart(4)}d ${String(r.embed_time_ms).padStart(6)}ms ${String(r.dims_per_second.toLocaleString()).padStart(9)}/s ${recallStr.padStart(8)}`);
|
|
158
|
+
}
|
|
159
|
+
console.log(`\nRecommendation:`);
|
|
160
|
+
console.log(` Fastest: ${fastest.model} (${fastest.embed_time_ms}ms/embed)`);
|
|
161
|
+
if (!fastest.error) {
|
|
162
|
+
console.log(` Run: export ZO_EMBEDDING_MODEL="${fastest.model}"`);
|
|
163
|
+
console.log(` To set permanently: add to ~/.bashrc or ~/.zshrc`);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
async function main() {
|
|
167
|
+
const args = process.argv.slice(2);
|
|
168
|
+
if (args.length === 0) {
|
|
169
|
+
console.log(`Embedding Benchmark CLI — v1.0
|
|
170
|
+
|
|
171
|
+
Commands:
|
|
172
|
+
compare Compare all configured models
|
|
173
|
+
benchmark --model <n> Benchmark a single model
|
|
174
|
+
set-default --model <n> Set default embedding model in .env-style config
|
|
175
|
+
|
|
176
|
+
Available models: ${Object.keys(MODELS).join(", ")}
|
|
177
|
+
`);
|
|
178
|
+
process.exit(0);
|
|
179
|
+
}
|
|
180
|
+
const flags = {};
|
|
181
|
+
for (let i = 0; i < args.length; i++)
|
|
182
|
+
if (args[i].startsWith("--"))
|
|
183
|
+
flags[args[i].slice(2)] = args[i + 1] || "";
|
|
184
|
+
const command = args[0];
|
|
185
|
+
if (command === "compare" || command === "benchmark") {
|
|
186
|
+
await compareModels();
|
|
187
|
+
}
|
|
188
|
+
else if (command === "set-default") {
|
|
189
|
+
if (!flags.model) {
|
|
190
|
+
console.error("--model required");
|
|
191
|
+
process.exit(1);
|
|
192
|
+
}
|
|
193
|
+
const available = await checkModelAvailable(flags.model);
|
|
194
|
+
if (!available) {
|
|
195
|
+
console.error(`Model "${flags.model}" not available in Ollama. Run: ollama pull ${flags.model}`);
|
|
196
|
+
process.exit(1);
|
|
197
|
+
}
|
|
198
|
+
const configPath = "/home/workspace/.zo/memory/.env";
|
|
199
|
+
const line = `ZO_EMBEDDING_MODEL="${flags.model}"`;
|
|
200
|
+
try {
|
|
201
|
+
if (existsSync(configPath)) {
|
|
202
|
+
let content = readFileSync(configPath, "utf-8");
|
|
203
|
+
const lines = content.split("\n");
|
|
204
|
+
const outLines = lines.map(l => l.startsWith("ZO_EMBEDDING_MODEL=") ? line : l);
|
|
205
|
+
if (!outLines.some(l => l.startsWith("ZO_EMBEDDING_MODEL=")))
|
|
206
|
+
outLines.push(line);
|
|
207
|
+
// Bun filesystem API to write
|
|
208
|
+
const { writeFileSync } = await import("fs");
|
|
209
|
+
writeFileSync(configPath, outLines.join("\n"));
|
|
210
|
+
}
|
|
211
|
+
else {
|
|
212
|
+
const { writeFileSync } = await import("fs");
|
|
213
|
+
writeFileSync(configPath, line + "\n");
|
|
214
|
+
}
|
|
215
|
+
console.log(`Default embedding model set to "${flags.model}".`);
|
|
216
|
+
console.log(`Current session: export ZO_EMBEDDING_MODEL="${flags.model}"`);
|
|
217
|
+
}
|
|
218
|
+
catch (e) {
|
|
219
|
+
console.log(`Note: Could not write config file. Run: export ZO_EMBEDDING_MODEL="${flags.model}"`);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (import.meta.main)
|
|
224
|
+
main();
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector embeddings for semantic search
|
|
3
|
+
*
|
|
4
|
+
* ECC-010: Memory Explosion Throttling
|
|
5
|
+
* - Rate limiting: max MAX_EMBEDDINGS_PER_MINUTE per conversation (sliding window)
|
|
6
|
+
* - Dedup: same content hash within DEDUP_COOLDOWN_MS returns cached embedding
|
|
7
|
+
* - Tail sampling: when rate limited, return last cached embedding for the conversation
|
|
8
|
+
* - Metrics: throttleCount / dedupCount exported for observability
|
|
9
|
+
*/
|
|
10
|
+
import type { MemoryConfig } from 'zouroboros-core';
|
|
11
|
+
/** Exported metrics counters — reset only on process restart. */
|
|
12
|
+
export declare const throttleMetrics: {
|
|
13
|
+
throttleCount: number;
|
|
14
|
+
dedupCount: number;
|
|
15
|
+
};
|
|
16
|
+
/** ECC-010: Reset throttle state (for testing). */
|
|
17
|
+
export declare function resetThrottleState(): void;
|
|
18
|
+
/**
|
|
19
|
+
* Generate embeddings for text using Ollama.
|
|
20
|
+
*
|
|
21
|
+
* ECC-010: Throttling applied when conversationId is provided:
|
|
22
|
+
* 1. Dedup check — returns cached embedding if same content seen within 5 min
|
|
23
|
+
* 2. Rate limit check — returns tail-sampled embedding if > 20/min per conversation
|
|
24
|
+
* 3. Ollama call — only reached if dedup and rate limit both pass
|
|
25
|
+
*
|
|
26
|
+
* @param conversationId Optional. When provided, enables per-conversation throttling.
|
|
27
|
+
*/
|
|
28
|
+
export declare function generateEmbedding(text: string, config: MemoryConfig, conversationId?: string): Promise<number[]>;
|
|
29
|
+
/**
|
|
30
|
+
* Generate a hypothetical answer using Ollama's generate endpoint.
|
|
31
|
+
* Used by HyDE to create an ideal document for embedding.
|
|
32
|
+
*/
|
|
33
|
+
export declare function generateHypotheticalAnswer(query: string, config: MemoryConfig, options?: {
|
|
34
|
+
model?: string;
|
|
35
|
+
maxTokens?: number;
|
|
36
|
+
}): Promise<string>;
|
|
37
|
+
/**
|
|
38
|
+
* Generate HyDE (Hypothetical Document Expansion) embeddings.
|
|
39
|
+
*
|
|
40
|
+
* 1. Embeds the original query.
|
|
41
|
+
* 2. Uses an LLM to generate a hypothetical ideal answer.
|
|
42
|
+
* 3. Embeds the hypothetical answer.
|
|
43
|
+
* 4. Returns both embeddings so the caller can blend them.
|
|
44
|
+
*
|
|
45
|
+
* Falls back to duplicating the original embedding if generation fails.
|
|
46
|
+
*/
|
|
47
|
+
export declare function generateHyDEExpansion(query: string, config: MemoryConfig, options?: {
|
|
48
|
+
generationModel?: string;
|
|
49
|
+
maxTokens?: number;
|
|
50
|
+
}): Promise<{
|
|
51
|
+
original: number[];
|
|
52
|
+
expanded: number[];
|
|
53
|
+
hypothetical: string;
|
|
54
|
+
}>;
|
|
55
|
+
/**
|
|
56
|
+
* Blend two embeddings by weighted average.
|
|
57
|
+
* Default: 40% original query, 60% hypothetical answer (HyDE sweet spot).
|
|
58
|
+
*/
|
|
59
|
+
export declare function blendEmbeddings(a: number[], b: number[], weightA?: number): number[];
|
|
60
|
+
/**
|
|
61
|
+
* Calculate cosine similarity between two vectors
|
|
62
|
+
*/
|
|
63
|
+
export declare function cosineSimilarity(a: number[], b: number[]): number;
|
|
64
|
+
/**
|
|
65
|
+
* Serialize embedding for SQLite storage
|
|
66
|
+
*/
|
|
67
|
+
export declare function serializeEmbedding(embedding: number[]): Buffer;
|
|
68
|
+
/**
|
|
69
|
+
* Deserialize embedding from SQLite storage
|
|
70
|
+
*/
|
|
71
|
+
export declare function deserializeEmbedding(buffer: Buffer): number[];
|
|
72
|
+
/**
|
|
73
|
+
* Check if Ollama is available
|
|
74
|
+
*/
|
|
75
|
+
export declare function checkOllamaHealth(config: MemoryConfig): Promise<boolean>;
|
|
76
|
+
/**
|
|
77
|
+
* List available models from Ollama
|
|
78
|
+
*/
|
|
79
|
+
export declare function listAvailableModels(config: MemoryConfig): Promise<string[]>;
|