@geravant/sinain 1.12.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/sinain-core/package-lock.json +963 -0
- package/sinain-core/package.json +1 -0
- package/sinain-core/src/buffers/feed-buffer.ts +32 -0
- package/sinain-core/src/embedding/service.ts +66 -0
- package/sinain-core/src/index.ts +19 -2
- package/sinain-core/src/learning/local-curation.ts +137 -7
- package/sinain-core/src/server.ts +31 -0
- package/sinain-memory/README.md +105 -0
- package/sinain-memory/__pycache__/embed_client.cpython-312.pyc +0 -0
- package/sinain-memory/__pycache__/graph_query.cpython-312.pyc +0 -0
- package/sinain-memory/__pycache__/triplestore.cpython-312.pyc +0 -0
- package/sinain-memory/embed_client.py +117 -0
- package/sinain-memory/eval/benchmarks/__pycache__/meeting_adapter.cpython-312.pyc +0 -0
- package/sinain-memory/eval/benchmarks/__pycache__/meeting_runner.cpython-312.pyc +0 -0
- package/sinain-memory/eval/benchmarks/__pycache__/query.cpython-312.pyc +0 -0
- package/sinain-memory/eval/benchmarks/__pycache__/runner.cpython-312.pyc +0 -0
- package/sinain-memory/eval/benchmarks/meeting_adapter.py +81 -0
- package/sinain-memory/eval/benchmarks/meeting_runner.py +230 -0
- package/sinain-memory/eval/benchmarks/query.py +37 -16
- package/sinain-memory/eval/benchmarks/run_meeting_bench.sh +318 -0
- package/sinain-memory/eval/benchmarks/runner.py +10 -3
- package/sinain-memory/graph_query.py +257 -15
- package/sinain-memory/knowledge_integrator.py +365 -72
- package/sinain-memory/memory-config.json +1 -1
- package/sinain-memory/session_distiller.py +43 -19
- package/sinain-memory/triplestore.py +60 -0
package/sinain-core/package.json
CHANGED
|
@@ -10,11 +10,31 @@ export class FeedBuffer {
|
|
|
10
10
|
private _version = 0;
|
|
11
11
|
private maxSize: number;
|
|
12
12
|
private _hwm = 0;
|
|
13
|
+
private _onFullCb: ((items: FeedItem[]) => void) | null = null;
|
|
14
|
+
private _onFullArmed = true;
|
|
15
|
+
private _onFullVersion = 0; // version at last re-arm
|
|
13
16
|
|
|
14
17
|
constructor(maxSize = 100) {
|
|
15
18
|
this.maxSize = maxSize;
|
|
16
19
|
}
|
|
17
20
|
|
|
21
|
+
/**
|
|
22
|
+
* Register a callback that fires when the buffer reaches capacity AND
|
|
23
|
+
* at least half the buffer has been replaced with new items since the
|
|
24
|
+
* last distillation. This prevents rapid-fire triggers on the same content.
|
|
25
|
+
*/
|
|
26
|
+
onFull(cb: (items: FeedItem[]) => void): void {
|
|
27
|
+
this._onFullCb = cb;
|
|
28
|
+
this._onFullArmed = true;
|
|
29
|
+
this._onFullVersion = 0;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** Re-arm the onFull callback (call after incremental distillation completes). */
|
|
33
|
+
rearmOnFull(): void {
|
|
34
|
+
this._onFullVersion = this._version;
|
|
35
|
+
this._onFullArmed = true;
|
|
36
|
+
}
|
|
37
|
+
|
|
18
38
|
/** Push a new feed item. Returns the created item. */
|
|
19
39
|
push(text: string, priority: Priority, source: FeedItem["source"], channel: FeedChannel = "stream"): FeedItem {
|
|
20
40
|
const item: FeedItem = {
|
|
@@ -27,6 +47,18 @@ export class FeedBuffer {
|
|
|
27
47
|
};
|
|
28
48
|
this.items.push(item);
|
|
29
49
|
if (this.items.length > this._hwm) this._hwm = this.items.length;
|
|
50
|
+
|
|
51
|
+
// Fire onFull when buffer is at capacity AND enough new items have arrived
|
|
52
|
+
// since the last distillation (at least half the buffer replaced)
|
|
53
|
+
const newSinceRearm = this._version - this._onFullVersion;
|
|
54
|
+
if (this.items.length >= this.maxSize
|
|
55
|
+
&& this._onFullCb && this._onFullArmed
|
|
56
|
+
&& newSinceRearm >= Math.floor(this.maxSize / 2)) {
|
|
57
|
+
this._onFullArmed = false;
|
|
58
|
+
const snapshot = [...this.items];
|
|
59
|
+
queueMicrotask(() => this._onFullCb!(snapshot));
|
|
60
|
+
}
|
|
61
|
+
|
|
30
62
|
if (this.items.length > this.maxSize) {
|
|
31
63
|
this.items.shift();
|
|
32
64
|
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EmbeddingService — in-process sentence embeddings for knowledge dedup + retrieval.
|
|
3
|
+
*
|
|
4
|
+
* Loads all-MiniLM-L6-v2 via @huggingface/transformers (ONNX runtime, no Python).
|
|
5
|
+
* Model loads async at startup (~9s), embeddings are 2-4ms per text after that.
|
|
6
|
+
*
|
|
7
|
+
* Used by:
|
|
8
|
+
* - knowledge_integrator.py (via POST /embed) for dedup before asserting facts
|
|
9
|
+
* - graph_query.py (via POST /embed) for semantic retrieval
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { log, warn } from "../log.js";
|
|
13
|
+
|
|
14
|
+
const TAG = "embedding";
|
|
15
|
+
const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
|
|
16
|
+
|
|
17
|
+
type Pipeline = (texts: string | string[], options: { pooling: string; normalize: boolean }) => Promise<{ data: Float32Array; dims: number[] }>;
|
|
18
|
+
|
|
19
|
+
export class EmbeddingService {
|
|
20
|
+
private pipeline: Pipeline | null = null;
|
|
21
|
+
private loading = false;
|
|
22
|
+
private _ready = false;
|
|
23
|
+
|
|
24
|
+
get ready(): boolean {
|
|
25
|
+
return this._ready;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Load the model in the background. Non-blocking — returns immediately. */
|
|
29
|
+
loadAsync(): void {
|
|
30
|
+
if (this.loading || this._ready) return;
|
|
31
|
+
this.loading = true;
|
|
32
|
+
|
|
33
|
+
const start = Date.now();
|
|
34
|
+
log(TAG, `loading ${MODEL_ID} (background)...`);
|
|
35
|
+
|
|
36
|
+
import("@huggingface/transformers").then(async ({ pipeline }) => {
|
|
37
|
+
this.pipeline = await pipeline("feature-extraction", MODEL_ID) as unknown as Pipeline;
|
|
38
|
+
this._ready = true;
|
|
39
|
+
log(TAG, `model ready in ${Date.now() - start}ms (384 dims)`);
|
|
40
|
+
}).catch((err) => {
|
|
41
|
+
warn(TAG, `failed to load model: ${err.message?.slice(0, 100)}`);
|
|
42
|
+
this.loading = false;
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Embed one or more texts. Returns array of float32 arrays (384 dims each). */
|
|
47
|
+
async embed(texts: string[]): Promise<Float32Array[]> {
|
|
48
|
+
if (!this.pipeline) {
|
|
49
|
+
throw new Error("Embedding model not loaded yet");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const results: Float32Array[] = [];
|
|
53
|
+
for (const text of texts) {
|
|
54
|
+
const output = await this.pipeline(text, { pooling: "mean", normalize: true });
|
|
55
|
+
results.push(new Float32Array(output.data));
|
|
56
|
+
}
|
|
57
|
+
return results;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** Compute cosine similarity between two embeddings. */
|
|
61
|
+
static cosine(a: Float32Array, b: Float32Array): number {
|
|
62
|
+
let dot = 0;
|
|
63
|
+
for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
|
|
64
|
+
return dot;
|
|
65
|
+
}
|
|
66
|
+
}
|
package/sinain-core/src/index.ts
CHANGED
|
@@ -16,6 +16,7 @@ import { TraceStore } from "./trace/trace-store.js";
|
|
|
16
16
|
import { FeedbackStore } from "./learning/feedback-store.js";
|
|
17
17
|
import { SignalCollector } from "./learning/signal-collector.js";
|
|
18
18
|
import { LocalCurationService } from "./learning/local-curation.js";
|
|
19
|
+
import { EmbeddingService } from "./embedding/service.js";
|
|
19
20
|
import { createAppServer } from "./server.js";
|
|
20
21
|
import { Profiler } from "./profiler.js";
|
|
21
22
|
import { CostTracker } from "./cost/tracker.js";
|
|
@@ -70,7 +71,7 @@ async function queryKnowledgeFactsMulti(entities: string[], maxFacts: number): P
|
|
|
70
71
|
for (const dbPath of dbPaths) {
|
|
71
72
|
if (!existsSync(dbPath)) continue;
|
|
72
73
|
try {
|
|
73
|
-
const args = [scriptPath, "--db", dbPath, "--max-facts", String(maxFacts), "--format", "
|
|
74
|
+
const args = [scriptPath, "--db", dbPath, "--max-facts", String(maxFacts), "--format", "compact"];
|
|
74
75
|
if (entities.length > 0) args.push("--entities", JSON.stringify(entities));
|
|
75
76
|
const out = execFileSync("python3", args, { timeout: 5000, encoding: "utf-8" }).trim();
|
|
76
77
|
if (out) results.push(out);
|
|
@@ -338,11 +339,25 @@ async function main() {
|
|
|
338
339
|
? new FeedbackStore(config.learningConfig.feedbackDir, config.learningConfig.retentionDays)
|
|
339
340
|
: null;
|
|
340
341
|
|
|
342
|
+
// ── Initialize embedding service (non-blocking) ──
|
|
343
|
+
const embeddingService = new EmbeddingService();
|
|
344
|
+
embeddingService.loadAsync(); // ~9s background load, server starts immediately
|
|
345
|
+
|
|
341
346
|
// ── Initialize local knowledge pipeline ──
|
|
342
347
|
const localCuration = new LocalCurationService();
|
|
343
|
-
|
|
348
|
+
// Distill pending session in background — don't block server startup
|
|
349
|
+
setImmediate(() => {
|
|
350
|
+
localCuration.distillPendingSession();
|
|
351
|
+
});
|
|
344
352
|
localCuration.startPeriodicCuration();
|
|
345
353
|
|
|
354
|
+
// Wire incremental distillation: when feed buffer fills, distill before items are lost
|
|
355
|
+
localCuration.setSenseBuffer(senseBuffer);
|
|
356
|
+
localCuration.setRearmCallback(() => feedBuffer.rearmOnFull());
|
|
357
|
+
feedBuffer.onFull((items) => {
|
|
358
|
+
localCuration.distillIncremental(items);
|
|
359
|
+
});
|
|
360
|
+
|
|
346
361
|
// ── Initialize escalation ──
|
|
347
362
|
const escalator = new Escalator({
|
|
348
363
|
feedBuffer,
|
|
@@ -668,6 +683,8 @@ async function main() {
|
|
|
668
683
|
},
|
|
669
684
|
getSpawnPending: () => escalator.getSpawnPending(),
|
|
670
685
|
respondSpawn: (id: string, result: string) => escalator.respondSpawn(id, result),
|
|
686
|
+
embedTexts: (texts: string[]) => embeddingService.embed(texts),
|
|
687
|
+
isEmbeddingReady: () => embeddingService.ready,
|
|
671
688
|
});
|
|
672
689
|
|
|
673
690
|
// ── Wire overlay profiling ──
|
|
@@ -15,6 +15,7 @@ import { existsSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, appendF
|
|
|
15
15
|
import { resolve, dirname } from "node:path";
|
|
16
16
|
import { fileURLToPath } from "node:url";
|
|
17
17
|
import type { FeedItem } from "../types.js";
|
|
18
|
+
import type { SenseBuffer } from "../buffers/sense-buffer.js";
|
|
18
19
|
import { log, warn, error } from "../log.js";
|
|
19
20
|
|
|
20
21
|
const TAG = "local-curation";
|
|
@@ -55,6 +56,10 @@ export class LocalCurationService {
|
|
|
55
56
|
private scriptsDir: string;
|
|
56
57
|
private sessionStartTs: number;
|
|
57
58
|
private curationTimer: ReturnType<typeof setInterval> | null = null;
|
|
59
|
+
private _lastDistilledTs = 0; // timestamp of last incremental distillation
|
|
60
|
+
private _incrementalRunning = false;
|
|
61
|
+
private _rearmCb: (() => void) | null = null; // callback to re-arm feed buffer onFull
|
|
62
|
+
private _senseBuffer: SenseBuffer | null = null;
|
|
58
63
|
|
|
59
64
|
constructor() {
|
|
60
65
|
this.memoryDir = resolveMemoryDir();
|
|
@@ -90,6 +95,100 @@ export class LocalCurationService {
|
|
|
90
95
|
}
|
|
91
96
|
}
|
|
92
97
|
|
|
98
|
+
/** Timestamp of last incremental distillation (items before this are already distilled). */
|
|
99
|
+
get lastDistilledTs(): number {
|
|
100
|
+
return this._lastDistilledTs;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** Set the callback to re-arm the feed buffer's onFull trigger after distillation. */
|
|
104
|
+
setRearmCallback(cb: () => void): void {
|
|
105
|
+
this._rearmCb = cb;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/** Attach sense buffer for screen context in distillation. */
|
|
109
|
+
setSenseBuffer(sb: SenseBuffer): void {
|
|
110
|
+
this._senseBuffer = sb;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/** Extract screen context from sense buffer as feed-item-compatible entries. */
|
|
114
|
+
private getSenseContext(): Array<{ text: string; ts: number; source: string; channel: string }> {
|
|
115
|
+
if (!this._senseBuffer) return [];
|
|
116
|
+
const events = this._senseBuffer.queryByTime(this._lastDistilledTs || (Date.now() - 30 * 60 * 1000));
|
|
117
|
+
const items: Array<{ text: string; ts: number; source: string; channel: string }> = [];
|
|
118
|
+
for (const evt of events) {
|
|
119
|
+
// Include OCR text (what's visible on screen)
|
|
120
|
+
if (evt.ocr && evt.ocr.length > 20) {
|
|
121
|
+
const app = evt.semantic?.context?.app || "unknown";
|
|
122
|
+
items.push({
|
|
123
|
+
text: `[screen: ${app}] ${evt.ocr}`,
|
|
124
|
+
ts: evt.ts,
|
|
125
|
+
source: "sense",
|
|
126
|
+
channel: "screen",
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
// Include vision summaries (AI description of screen content)
|
|
130
|
+
if (evt.semantic?.visible?.summary) {
|
|
131
|
+
items.push({
|
|
132
|
+
text: `[screen-context] ${evt.semantic.visible.summary}`,
|
|
133
|
+
ts: evt.ts,
|
|
134
|
+
source: "sense",
|
|
135
|
+
channel: "screen",
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return items;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Incremental distillation — called when the feed buffer reaches capacity.
|
|
144
|
+
* Distills the current buffer contents before they fall off the ring buffer.
|
|
145
|
+
* Runs async so it doesn't block new items from arriving.
|
|
146
|
+
*/
|
|
147
|
+
async distillIncremental(feedItems: FeedItem[]): Promise<void> {
|
|
148
|
+
if (this._incrementalRunning) {
|
|
149
|
+
log(TAG, "incremental distillation already running — skipping");
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
this._incrementalRunning = true;
|
|
153
|
+
|
|
154
|
+
try {
|
|
155
|
+
const itemCount = feedItems.length;
|
|
156
|
+
log(TAG, `incremental distillation: ${itemCount} items (buffer full)`);
|
|
157
|
+
|
|
158
|
+
const sessionMeta = {
|
|
159
|
+
ts: new Date().toISOString(),
|
|
160
|
+
sessionKey: "local-incremental",
|
|
161
|
+
durationMs: Date.now() - this.sessionStartTs,
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
const audioItems = feedItems.map(item => ({
|
|
165
|
+
text: item.text,
|
|
166
|
+
ts: item.ts,
|
|
167
|
+
source: item.source || "unknown",
|
|
168
|
+
channel: item.channel || "agent",
|
|
169
|
+
}));
|
|
170
|
+
|
|
171
|
+
// Merge screen context from sense buffer (OCR + vision summaries)
|
|
172
|
+
const senseItems = this.getSenseContext();
|
|
173
|
+
const transcript = [...audioItems, ...senseItems].sort((a, b) => a.ts - b.ts);
|
|
174
|
+
|
|
175
|
+
if (senseItems.length > 0) {
|
|
176
|
+
log(TAG, `including ${senseItems.length} screen context items in distillation`);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (this.runDistillation(transcript, sessionMeta)) {
|
|
180
|
+
this._lastDistilledTs = Date.now();
|
|
181
|
+
log(TAG, `incremental distillation complete — ${itemCount} audio + ${senseItems.length} screen items processed`);
|
|
182
|
+
}
|
|
183
|
+
} catch (err: any) {
|
|
184
|
+
warn(TAG, `incremental distillation failed: ${err.message?.slice(0, 100)}`);
|
|
185
|
+
} finally {
|
|
186
|
+
this._incrementalRunning = false;
|
|
187
|
+
// Re-arm the buffer callback so next fill triggers another distillation
|
|
188
|
+
this._rearmCb?.();
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
93
192
|
/**
|
|
94
193
|
* Save feed items to disk for deferred distillation.
|
|
95
194
|
* Called during shutdown — instant (no LLM), survives tsx force-kill.
|
|
@@ -160,13 +259,20 @@ export class LocalCurationService {
|
|
|
160
259
|
* picked up on next startup via distillPendingSession().
|
|
161
260
|
*/
|
|
162
261
|
async distillSession(feedItems: FeedItem[]): Promise<void> {
|
|
163
|
-
|
|
164
|
-
|
|
262
|
+
// Filter to only items not yet covered by incremental distillation
|
|
263
|
+
const items = this._lastDistilledTs > 0
|
|
264
|
+
? feedItems.filter(i => i.ts > this._lastDistilledTs)
|
|
265
|
+
: feedItems;
|
|
266
|
+
|
|
267
|
+
if (items.length < 1) {
|
|
268
|
+
log(TAG, `skipping shutdown distillation — all ${feedItems.length} items already distilled incrementally`);
|
|
165
269
|
return;
|
|
166
270
|
}
|
|
167
271
|
|
|
272
|
+
log(TAG, `shutdown distillation: ${items.length} items (${feedItems.length - items.length} already distilled incrementally)`);
|
|
273
|
+
|
|
168
274
|
// Step 0: Save to disk FIRST — survives force-kill
|
|
169
|
-
this.savePendingSession(
|
|
275
|
+
this.savePendingSession(items);
|
|
170
276
|
|
|
171
277
|
const sessionMeta = {
|
|
172
278
|
ts: new Date().toISOString(),
|
|
@@ -174,7 +280,7 @@ export class LocalCurationService {
|
|
|
174
280
|
durationMs: Date.now() - this.sessionStartTs,
|
|
175
281
|
};
|
|
176
282
|
|
|
177
|
-
const transcript =
|
|
283
|
+
const transcript = items.map(item => ({
|
|
178
284
|
text: item.text,
|
|
179
285
|
ts: item.ts,
|
|
180
286
|
source: item.source || "unknown",
|
|
@@ -204,13 +310,37 @@ export class LocalCurationService {
|
|
|
204
310
|
log(TAG, `distilling session: ${transcript.length} items, ${Math.round(sessionMeta.durationMs / 60000)} min`);
|
|
205
311
|
|
|
206
312
|
try {
|
|
313
|
+
// Step 0.5: Retrieve existing entities for context (Mem0 retrieve-before-extract pattern)
|
|
314
|
+
let existingEntities = "";
|
|
315
|
+
const dbPath = resolve(this.memoryDir, "knowledge-graph.db");
|
|
316
|
+
if (existsSync(dbPath)) {
|
|
317
|
+
try {
|
|
318
|
+
existingEntities = execFileSync("python3", [
|
|
319
|
+
resolve(this.scriptsDir, "graph_query.py"),
|
|
320
|
+
"--db", dbPath,
|
|
321
|
+
"--top", "20",
|
|
322
|
+
"--format", "compact",
|
|
323
|
+
], {
|
|
324
|
+
timeout: 5_000,
|
|
325
|
+
encoding: "utf-8",
|
|
326
|
+
env: { ...process.env, PYTHONPATH: this.scriptsDir },
|
|
327
|
+
}).trim();
|
|
328
|
+
} catch {
|
|
329
|
+
// Non-fatal — distillation works without existing entities
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
207
333
|
// Step 1: Distill session into a SessionDigest
|
|
208
|
-
const
|
|
334
|
+
const distillerArgs = [
|
|
209
335
|
resolve(this.scriptsDir, "session_distiller.py"),
|
|
210
336
|
"--memory-dir", this.memoryDir,
|
|
211
337
|
"--transcript", JSON.stringify(transcript),
|
|
212
338
|
"--session-meta", JSON.stringify(sessionMeta),
|
|
213
|
-
]
|
|
339
|
+
];
|
|
340
|
+
if (existingEntities) {
|
|
341
|
+
distillerArgs.push("--existing-entities", existingEntities);
|
|
342
|
+
}
|
|
343
|
+
const digestJson = execFileSync("python3", distillerArgs, {
|
|
214
344
|
timeout: 30_000,
|
|
215
345
|
encoding: "utf-8",
|
|
216
346
|
env: { ...process.env, PYTHONPATH: this.scriptsDir },
|
|
@@ -236,7 +366,7 @@ export class LocalCurationService {
|
|
|
236
366
|
"--memory-dir", this.memoryDir,
|
|
237
367
|
"--digest", JSON.stringify(digest),
|
|
238
368
|
], {
|
|
239
|
-
timeout:
|
|
369
|
+
timeout: 60_000, // 60s: LLM call (~10s) + embedding dedup (~5s) + graph ops
|
|
240
370
|
encoding: "utf-8",
|
|
241
371
|
env: { ...process.env, PYTHONPATH: this.scriptsDir },
|
|
242
372
|
});
|
|
@@ -184,6 +184,8 @@ export interface ServerDeps {
|
|
|
184
184
|
onSpawnCommand?: (text: string) => void;
|
|
185
185
|
getSpawnPending?: () => { id: string; task: string; label: string; ts: number } | null;
|
|
186
186
|
respondSpawn?: (id: string, result: string) => { ok: boolean; error?: string };
|
|
187
|
+
embedTexts?: (texts: string[]) => Promise<Float32Array[]>;
|
|
188
|
+
isEmbeddingReady?: () => boolean;
|
|
187
189
|
}
|
|
188
190
|
|
|
189
191
|
function readBody(req: IncomingMessage, maxBytes: number): Promise<string> {
|
|
@@ -519,6 +521,35 @@ export function createAppServer(deps: ServerDeps) {
|
|
|
519
521
|
return;
|
|
520
522
|
}
|
|
521
523
|
|
|
524
|
+
// ── /embed ── (used by knowledge_integrator.py and graph_query.py)
|
|
525
|
+
if (req.method === "POST" && url.pathname === "/embed") {
|
|
526
|
+
if (!deps.embedTexts || !deps.isEmbeddingReady?.()) {
|
|
527
|
+
res.writeHead(503);
|
|
528
|
+
res.end(JSON.stringify({ error: "embedding model loading" }));
|
|
529
|
+
return;
|
|
530
|
+
}
|
|
531
|
+
let body = "";
|
|
532
|
+
req.on("data", (c: Buffer) => { body += c; });
|
|
533
|
+
req.on("end", async () => {
|
|
534
|
+
try {
|
|
535
|
+
const { texts } = JSON.parse(body);
|
|
536
|
+
if (!Array.isArray(texts) || texts.length === 0) {
|
|
537
|
+
res.writeHead(400);
|
|
538
|
+
res.end(JSON.stringify({ error: "texts array required" }));
|
|
539
|
+
return;
|
|
540
|
+
}
|
|
541
|
+
const embeddings = await deps.embedTexts!(texts);
|
|
542
|
+
// Return as base64-encoded float32 arrays for efficiency
|
|
543
|
+
const encoded = embeddings.map(e => Buffer.from(e.buffer).toString("base64"));
|
|
544
|
+
res.end(JSON.stringify({ embeddings: encoded, dims: 384 }));
|
|
545
|
+
} catch (err: any) {
|
|
546
|
+
res.writeHead(500);
|
|
547
|
+
res.end(JSON.stringify({ error: err.message?.slice(0, 200) }));
|
|
548
|
+
}
|
|
549
|
+
});
|
|
550
|
+
return;
|
|
551
|
+
}
|
|
552
|
+
|
|
522
553
|
// ── /health ──
|
|
523
554
|
if (req.method === "GET" && url.pathname === "/health") {
|
|
524
555
|
res.end(JSON.stringify({
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# sinain-memory
|
|
2
|
+
|
|
3
|
+
Local-first knowledge pipeline for SinainHUD. Captures what the user sees and hears, distills it into a knowledge graph, and makes it retrievable for the agent's context.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
Two-step pipeline: **LLM extraction** (what to remember) + **deterministic integration** (how to store it).
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
Audio transcripts + Screen OCR
|
|
11
|
+
|
|
|
12
|
+
session_distiller.py (LLM)
|
|
13
|
+
Extracts: facts[], entities[], decisions[]
|
|
14
|
+
|
|
|
15
|
+
knowledge_integrator.py (code — no LLM)
|
|
16
|
+
- Converts facts to graph assertions (deterministic)
|
|
17
|
+
- Creates entity:* nodes with freeform types
|
|
18
|
+
- Links facts to entities via ref edges
|
|
19
|
+
- Infers cross-entity relationships from fact content
|
|
20
|
+
- Deduplicates via embedding similarity (cosine 0.78)
|
|
21
|
+
- Auto-curates playbook (tag overlap, no LLM)
|
|
22
|
+
|
|
|
23
|
+
triplestore.py (SQLite EAV)
|
|
24
|
+
- 4 covering indexes: EAVT, AEVT, VAET, AVET
|
|
25
|
+
- FTS5 full-text search on fact values
|
|
26
|
+
- Confidence decay (60-day half-life)
|
|
27
|
+
- Touched-entities tracking for cache invalidation
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Key Design Decisions
|
|
31
|
+
|
|
32
|
+
**Deterministic integration.** The integrator does NOT use an LLM. Early experiments showed that LLM-based integration produced 0-20 facts per run depending on model mood, token truncation, and format errors. The deterministic approach converts every distiller fact to a graph assertion — consistent, fast, and reliable.
|
|
33
|
+
|
|
34
|
+
**Two-layer entity model.** `fact:*` entities store individual claims (searchable via FTS5 and tags). `entity:*` nodes represent real-world entities connected by typed ref edges. The VAET index enables backref traversal: "find all facts about Citibank" is an O(log n) index lookup.
|
|
35
|
+
|
|
36
|
+
**Incremental distillation.** Long sessions (>8 min) trigger distillation when the feed buffer reaches capacity, before items are lost to the ring buffer. The `onFull` callback fires when 50% new items have accumulated since the last pass.
|
|
37
|
+
|
|
38
|
+
**Embedding-based dedup.** sinain-core hosts an in-process all-MiniLM-L6-v2 model (384 dims, 2-4ms per embedding). The `/embed` endpoint is used for both write-time dedup (prevent storing semantic duplicates) and read-time re-ranking (surface most relevant facts for a query).
|
|
39
|
+
|
|
40
|
+
## Triplestore
|
|
41
|
+
|
|
42
|
+
SQLite-backed EAV store inspired by Datomic/RhizomeDB with 4 covering indexes:
|
|
43
|
+
|
|
44
|
+
| Index | Query Pattern | Example |
|
|
45
|
+
|-------|-------------|---------|
|
|
46
|
+
| **EAVT** | What does entity X look like? | `store.entity("entity:citibank")` |
|
|
47
|
+
| **AEVT** | Which entities have attribute Y? | `store.entities_with_attr("type")` |
|
|
48
|
+
| **VAET** | What references entity Z? (backrefs) | `store.backrefs("entity:citibank")` |
|
|
49
|
+
| **AVET** | Find entity by attribute+value | `store.lookup("type", "person")` |
|
|
50
|
+
|
|
51
|
+
Additional features:
|
|
52
|
+
- **FTS5** full-text search on fact values with auto-sync triggers
|
|
53
|
+
- **Confidence decay**: exponential half-life (60 days) — facts lose relevance without reinforcement
|
|
54
|
+
- **Temporal queries**: `entity_as_of(id, date)` for point-in-time knowledge
|
|
55
|
+
- **Touched-entities index**: O(1) "was entity X modified since tx Y?" for cache invalidation
|
|
56
|
+
- **Soft retraction**: facts are marked retracted, not deleted — preserves history
|
|
57
|
+
|
|
58
|
+
## Retrieval
|
|
59
|
+
|
|
60
|
+
Hybrid retrieval with Reciprocal Rank Fusion (RRF):
|
|
61
|
+
|
|
62
|
+
1. **FTS5** keyword search on fact values
|
|
63
|
+
2. **Tag-based** entity matching via AVET index
|
|
64
|
+
3. **Top-confidence** facts as baseline
|
|
65
|
+
4. **Entity graph boost**: facts linked to query-mentioned entities via backrefs get an RRF score bonus
|
|
66
|
+
5. **Embedding re-ranking** (when sinain-core is running): semantic similarity between query and facts
|
|
67
|
+
6. **Confidence decay** applied as tiebreaker
|
|
68
|
+
|
|
69
|
+
Results are grouped by entity for cross-fact reasoning.
|
|
70
|
+
|
|
71
|
+
## Distiller Output Schema
|
|
72
|
+
|
|
73
|
+
```json
|
|
74
|
+
{
|
|
75
|
+
"whatHappened": "2-3 sentence summary",
|
|
76
|
+
"facts": ["self-contained factual sentence", ...],
|
|
77
|
+
"decisions": ["who decided what, with deadline", ...],
|
|
78
|
+
"entities": [{"name": "entity-slug", "type": "freeform-type"}, ...],
|
|
79
|
+
"patterns": ["reusable technique or workflow", ...],
|
|
80
|
+
"preferences": ["user preference or habit", ...],
|
|
81
|
+
"isEmpty": false
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Facts are guided by 5 diversity dimensions: **WHO** (people, roles), **WHAT** (properties, descriptions), **HOW MUCH** (numbers, dates), **WHAT CHANGED** (decisions, agreements), **WHAT'S NEXT** (commitments, plans).
|
|
86
|
+
|
|
87
|
+
## Files
|
|
88
|
+
|
|
89
|
+
| File | Role |
|
|
90
|
+
|------|------|
|
|
91
|
+
| `session_distiller.py` | LLM extraction: transcript to structured digest |
|
|
92
|
+
| `knowledge_integrator.py` | Deterministic storage: digest to graph ops + playbook |
|
|
93
|
+
| `triplestore.py` | SQLite EAV with 4 indexes + FTS5 + temporal |
|
|
94
|
+
| `graph_query.py` | Hybrid retrieval with RRF fusion |
|
|
95
|
+
| `embed_client.py` | Python client for sinain-core `/embed` endpoint |
|
|
96
|
+
| `common.py` | Shared LLM call utilities |
|
|
97
|
+
| `memory-config.json` | Model selection, token limits, timeouts |
|
|
98
|
+
|
|
99
|
+
## Configuration
|
|
100
|
+
|
|
101
|
+
| Env Var | Default | Description |
|
|
102
|
+
|---------|---------|-------------|
|
|
103
|
+
| `SINAIN_MEMORY_DIR` | `~/.sinain/memory` | Knowledge graph directory |
|
|
104
|
+
| `LEARNING_ENABLED` | `true` | Enable/disable distillation pipeline |
|
|
105
|
+
| `AGENT_ENABLED` | `true` | Set `false` for capture-only mode |
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Embedding client — calls sinain-core's /embed endpoint for vector operations.
|
|
2
|
+
|
|
3
|
+
Provides semantic similarity for:
|
|
4
|
+
- Write path: dedup before asserting facts (knowledge_integrator.py)
|
|
5
|
+
- Read path: semantic retrieval (graph_query.py)
|
|
6
|
+
|
|
7
|
+
Falls back gracefully if sinain-core is not running or model not loaded.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import base64
|
|
11
|
+
import json
|
|
12
|
+
import struct
|
|
13
|
+
import urllib.request
|
|
14
|
+
from functools import lru_cache
|
|
15
|
+
|
|
16
|
+
SINAIN_CORE_URL = "http://localhost:9500"
|
|
17
|
+
EMBED_TIMEOUT_S = 5
|
|
18
|
+
SIMILARITY_THRESHOLD = 0.78 # calibrated: catches rephrased facts, rejects different facts
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def embed(texts: list[str]) -> list[list[float]] | None:
|
|
22
|
+
"""Embed texts via sinain-core /embed endpoint. Returns None if unavailable."""
|
|
23
|
+
try:
|
|
24
|
+
data = json.dumps({"texts": texts}).encode()
|
|
25
|
+
req = urllib.request.Request(
|
|
26
|
+
f"{SINAIN_CORE_URL}/embed",
|
|
27
|
+
data=data,
|
|
28
|
+
headers={"Content-Type": "application/json"},
|
|
29
|
+
method="POST",
|
|
30
|
+
)
|
|
31
|
+
with urllib.request.urlopen(req, timeout=EMBED_TIMEOUT_S) as resp:
|
|
32
|
+
result = json.loads(resp.read())
|
|
33
|
+
# Decode base64 float32 arrays
|
|
34
|
+
embeddings = []
|
|
35
|
+
for b64 in result["embeddings"]:
|
|
36
|
+
raw = base64.b64decode(b64)
|
|
37
|
+
floats = list(struct.unpack(f"{len(raw)//4}f", raw))
|
|
38
|
+
embeddings.append(floats)
|
|
39
|
+
return embeddings
|
|
40
|
+
except Exception:
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def cosine(a: list[float], b: list[float]) -> float:
|
|
45
|
+
"""Cosine similarity between two vectors."""
|
|
46
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
47
|
+
return dot # vectors are pre-normalized by the model
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def find_duplicates_batch(
|
|
51
|
+
new_texts: list[str],
|
|
52
|
+
existing_texts: list[str],
|
|
53
|
+
threshold: float = SIMILARITY_THRESHOLD,
|
|
54
|
+
) -> dict[int, int]:
|
|
55
|
+
"""Find duplicates for multiple new texts against existing texts in one batch.
|
|
56
|
+
|
|
57
|
+
Returns {new_index: existing_index} for texts with similarity >= threshold.
|
|
58
|
+
Single HTTP call for all texts — avoids per-fact round trips.
|
|
59
|
+
"""
|
|
60
|
+
if not existing_texts or not new_texts:
|
|
61
|
+
return {}
|
|
62
|
+
|
|
63
|
+
all_texts = new_texts + existing_texts
|
|
64
|
+
embeddings = embed(all_texts)
|
|
65
|
+
if embeddings is None:
|
|
66
|
+
return {}
|
|
67
|
+
|
|
68
|
+
n_new = len(new_texts)
|
|
69
|
+
result = {}
|
|
70
|
+
|
|
71
|
+
for i in range(n_new):
|
|
72
|
+
best_idx = None
|
|
73
|
+
best_sim = threshold
|
|
74
|
+
for j in range(n_new, len(embeddings)):
|
|
75
|
+
sim = cosine(embeddings[i], embeddings[j])
|
|
76
|
+
if sim > best_sim:
|
|
77
|
+
best_sim = sim
|
|
78
|
+
best_idx = j - n_new
|
|
79
|
+
if best_idx is not None:
|
|
80
|
+
result[i] = best_idx
|
|
81
|
+
|
|
82
|
+
return result
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def find_duplicate(
|
|
86
|
+
new_text: str,
|
|
87
|
+
existing_texts: list[str],
|
|
88
|
+
threshold: float = SIMILARITY_THRESHOLD,
|
|
89
|
+
) -> int | None:
|
|
90
|
+
"""Find the index of the most similar existing text, or None if no match."""
|
|
91
|
+
result = find_duplicates_batch([new_text], existing_texts, threshold)
|
|
92
|
+
return result.get(0)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def rank_by_similarity(
|
|
96
|
+
query: str,
|
|
97
|
+
texts: list[str],
|
|
98
|
+
) -> list[tuple[int, float]] | None:
|
|
99
|
+
"""Rank texts by semantic similarity to query. Returns [(index, score), ...] descending.
|
|
100
|
+
|
|
101
|
+
Returns None if embedding service unavailable (caller should fall back to keyword).
|
|
102
|
+
"""
|
|
103
|
+
if not texts:
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
all_texts = [query] + texts
|
|
107
|
+
embeddings = embed(all_texts)
|
|
108
|
+
if embeddings is None:
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
query_emb = embeddings[0]
|
|
112
|
+
scored = []
|
|
113
|
+
for i, emb in enumerate(embeddings[1:]):
|
|
114
|
+
scored.append((i, cosine(query_emb, emb)))
|
|
115
|
+
|
|
116
|
+
scored.sort(key=lambda x: x[1], reverse=True)
|
|
117
|
+
return scored
|
|
Binary file
|
|
Binary file
|