amalfa 0.0.0-reserved → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.biomeignore +19 -0
- package/:memory: +0 -0
- package/:memory:-shm +0 -0
- package/:memory:-wal +0 -0
- package/LICENSE +21 -0
- package/README.md +343 -13
- package/README.old.md +112 -0
- package/agents.config.json +11 -0
- package/amalfa.config.example.ts +100 -0
- package/biome.json +49 -0
- package/bun.lock +371 -0
- package/docs/AGENT_PROTOCOLS.md +28 -0
- package/docs/ARCHITECTURAL_OVERVIEW.md +123 -0
- package/docs/BENTO_BOXING_DEPRECATION.md +281 -0
- package/docs/Bun-SQLite.html +464 -0
- package/docs/COMMIT_GUIDELINES.md +367 -0
- package/docs/DEVELOPER_ONBOARDING.md +36 -0
- package/docs/Graph and Vector Database Best Practices.md +214 -0
- package/docs/PERFORMANCE_BASELINES.md +88 -0
- package/docs/REPOSITORY_CLEANUP_SUMMARY.md +261 -0
- package/docs/edge-generation-methods.md +57 -0
- package/docs/elevator-pitch.md +118 -0
- package/docs/graph-and-vector-database-playbook.html +480 -0
- package/docs/hardened-sqlite.md +85 -0
- package/docs/headless-knowledge-management.md +79 -0
- package/docs/john-kaye-flux-prompt.md +46 -0
- package/docs/keyboard-shortcuts.md +80 -0
- package/docs/opinion-proceed-pattern.md +29 -0
- package/docs/polyvis-nodes-edges-schema.md +77 -0
- package/docs/protocols/lab-protocol.md +30 -0
- package/docs/reaction-iquest-loop-coder.md +46 -0
- package/docs/services.md +60 -0
- package/docs/sqlite-wal-readonly-trap.md +228 -0
- package/docs/strategy/css-architecture.md +40 -0
- package/docs/test-document-cycle.md +83 -0
- package/docs/test_lifecycle_E2E.md +4 -0
- package/docs/the-bicameral-graph.md +83 -0
- package/docs/user-guide.md +70 -0
- package/docs/vision-helper.md +53 -0
- package/drizzle/0000_minor_iron_fist.sql +19 -0
- package/drizzle/meta/0000_snapshot.json +139 -0
- package/drizzle/meta/_journal.json +13 -0
- package/example_usage.ts +39 -0
- package/experiment.sh +35 -0
- package/hello +2 -0
- package/index.html +52 -0
- package/knowledge/excalibur.md +12 -0
- package/package.json +60 -15
- package/plans/experience-graph-integration.md +60 -0
- package/prompts/gemini-king-mode-prompt.md +46 -0
- package/public/docs/MCP_TOOLS.md +372 -0
- package/schemas/README.md +20 -0
- package/schemas/cda.schema.json +84 -0
- package/schemas/conceptual-lexicon.schema.json +75 -0
- package/scratchpads/dummy-debrief-boxed.md +39 -0
- package/scratchpads/dummy-debrief.md +27 -0
- package/scratchpads/scratchpad-design.md +50 -0
- package/scratchpads/scratchpad-scrolling.md +20 -0
- package/scratchpads/scratchpad-toc-disappearance.md +23 -0
- package/scratchpads/scratchpad-toc.md +28 -0
- package/scratchpads/test_gardener.md +7 -0
- package/src/EnlightenedTriad.ts +146 -0
- package/src/JIT_Triad.ts +137 -0
- package/src/cli.ts +318 -0
- package/src/config/constants.ts +7 -0
- package/src/config/defaults.ts +81 -0
- package/src/core/BentoNormalizer.ts +113 -0
- package/src/core/EdgeWeaver.ts +145 -0
- package/src/core/FractureLogic.ts +22 -0
- package/src/core/Harvester.ts +73 -0
- package/src/core/LLMClient.ts +93 -0
- package/src/core/LouvainGate.ts +67 -0
- package/src/core/MarkdownMasker.ts +49 -0
- package/src/core/README.md +11 -0
- package/src/core/SemanticMatcher.ts +89 -0
- package/src/core/SemanticWeaver.ts +96 -0
- package/src/core/TagEngine.ts +56 -0
- package/src/core/TimelineWeaver.ts +61 -0
- package/src/core/VectorEngine.ts +232 -0
- package/src/daemon/index.ts +221 -0
- package/src/data/experience/test_doc_1.md +2 -0
- package/src/data/experience/test_doc_2.md +2 -0
- package/src/db/schema.ts +46 -0
- package/src/demo-triad.ts +45 -0
- package/src/gardeners/AutoTagger.ts +116 -0
- package/src/gardeners/BaseGardener.ts +55 -0
- package/src/llm/EnlightenedProvider.ts +95 -0
- package/src/mcp/README.md +6 -0
- package/src/mcp/index.ts +341 -0
- package/src/pipeline/AmalfaIngestor.ts +262 -0
- package/src/pipeline/HarvesterPipeline.ts +101 -0
- package/src/pipeline/Ingestor.ts +555 -0
- package/src/pipeline/README.md +7 -0
- package/src/pipeline/SemanticHarvester.ts +222 -0
- package/src/resonance/DatabaseFactory.ts +100 -0
- package/src/resonance/README.md +148 -0
- package/src/resonance/cli/README.md +7 -0
- package/src/resonance/cli/ingest.ts +41 -0
- package/src/resonance/cli/migrate.ts +54 -0
- package/src/resonance/config.ts +40 -0
- package/src/resonance/daemon.ts +236 -0
- package/src/resonance/db.ts +422 -0
- package/src/resonance/pipeline/README.md +7 -0
- package/src/resonance/pipeline/extract.ts +89 -0
- package/src/resonance/pipeline/transform_docs.ts +60 -0
- package/src/resonance/schema.ts +138 -0
- package/src/resonance/services/embedder.ts +131 -0
- package/src/resonance/services/simpleTokenizer.ts +119 -0
- package/src/resonance/services/stats.ts +327 -0
- package/src/resonance/services/tokenizer.ts +159 -0
- package/src/resonance/transform/cda.ts +393 -0
- package/src/resonance/types/enriched-cda.ts +112 -0
- package/src/services/README.md +56 -0
- package/src/services/llama.ts +59 -0
- package/src/services/llamauv.ts +56 -0
- package/src/services/olmo3.ts +58 -0
- package/src/services/phi.ts +52 -0
- package/src/types/artifact.ts +12 -0
- package/src/utils/EnvironmentVerifier.ts +67 -0
- package/src/utils/Logger.ts +21 -0
- package/src/utils/ServiceLifecycle.ts +207 -0
- package/src/utils/ZombieDefense.ts +244 -0
- package/src/utils/validator.ts +264 -0
- package/substack/substack-playbook-1.md +95 -0
- package/substack/substack-playbook-2.md +78 -0
- package/tasks/ui-investigation.md +26 -0
- package/test-db +0 -0
- package/test-db-shm +0 -0
- package/test-db-wal +0 -0
- package/tests/canary/verify_pinch_check.ts +44 -0
- package/tests/fixtures/ingest_test.md +12 -0
- package/tests/fixtures/ingest_test_boxed.md +13 -0
- package/tests/fixtures/safety_test.md +45 -0
- package/tests/fixtures/safety_test_boxed.md +49 -0
- package/tests/fixtures/tagged_output.md +49 -0
- package/tests/fixtures/tagged_test.md +49 -0
- package/tests/mcp-server-settings.json +8 -0
- package/tsconfig.json +46 -0
- package/verify-embedder.ts +54 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import { watch } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import settings from "@/polyvis.settings.json";
|
|
4
|
+
import { Ingestor } from "../pipeline/Ingestor";
|
|
5
|
+
import { EnvironmentVerifier } from "../utils/EnvironmentVerifier";
|
|
6
|
+
import { getLogger } from "../utils/Logger";
|
|
7
|
+
import { ServiceLifecycle } from "../utils/ServiceLifecycle";
|
|
8
|
+
import { Embedder } from "./services/embedder";
|
|
9
|
+
|
|
10
|
+
const args = process.argv.slice(2);
|
|
11
|
+
const command = args[0] || "serve";
|
|
12
|
+
const log = getLogger("Daemon");
|
|
13
|
+
|
|
14
|
+
// --- Helper: Notifications ---
|
|
15
|
+
|
|
16
|
+
async function notify(title: string, message: string) {
|
|
17
|
+
// Native macOS notifications via AppleScript
|
|
18
|
+
// Zero dependencies
|
|
19
|
+
try {
|
|
20
|
+
const script = `display notification "${message}" with title "${title}"`;
|
|
21
|
+
await Bun.spawn(["osascript", "-e", script]);
|
|
22
|
+
} catch (e) {
|
|
23
|
+
log.error({ err: e }, "Failed to send notification");
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// --- Service Lifecycle ---
|
|
28
|
+
|
|
29
|
+
const lifecycle = new ServiceLifecycle({
|
|
30
|
+
name: "Daemon",
|
|
31
|
+
pidFile: ".daemon.pid",
|
|
32
|
+
logFile: ".daemon.log",
|
|
33
|
+
entryPoint: "src/resonance/daemon.ts",
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
// --- Server Logic (The actual Daemon) ---
|
|
37
|
+
|
|
38
|
+
async function main() {
|
|
39
|
+
// 0. Verify Environment
|
|
40
|
+
await EnvironmentVerifier.verifyOrExit();
|
|
41
|
+
|
|
42
|
+
// 1. Initialize Ingestion (Daemon Mode: Watch Enabled)
|
|
43
|
+
const PORT = parseInt(process.env.VECTOR_PORT || "3010", 10);
|
|
44
|
+
|
|
45
|
+
log.info({ port: PORT }, "🔌 Vector Daemon starting...");
|
|
46
|
+
log.info("Initializing Embedder...");
|
|
47
|
+
|
|
48
|
+
// 1. Initialize Embedder (Compute Node)
|
|
49
|
+
try {
|
|
50
|
+
const embedder = Embedder.getInstance();
|
|
51
|
+
await embedder.embed("warmup", true);
|
|
52
|
+
log.info("✅ Embedder Ready.");
|
|
53
|
+
} catch (e) {
|
|
54
|
+
log.fatal({ err: e }, "❌ Failed to initialize embedder");
|
|
55
|
+
process.exit(1);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// 2. Start HTTP Server
|
|
59
|
+
Bun.serve({
|
|
60
|
+
port: PORT,
|
|
61
|
+
async fetch(req) {
|
|
62
|
+
const url = new URL(req.url);
|
|
63
|
+
|
|
64
|
+
if (req.method === "GET" && url.pathname === "/health") {
|
|
65
|
+
return new Response(JSON.stringify({ status: "ok" }), {
|
|
66
|
+
headers: { "Content-Type": "application/json" },
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (req.method === "POST" && url.pathname === "/embed") {
|
|
71
|
+
try {
|
|
72
|
+
const body = (await req.json()) as { text: string };
|
|
73
|
+
if (!body.text || typeof body.text !== "string") {
|
|
74
|
+
return new Response("Bad Request: 'text' field required", {
|
|
75
|
+
status: 400,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const vector = await Embedder.getInstance().embed(body.text, true);
|
|
80
|
+
|
|
81
|
+
return new Response(JSON.stringify({ vector: Array.from(vector) }), {
|
|
82
|
+
headers: { "Content-Type": "application/json" },
|
|
83
|
+
});
|
|
84
|
+
} catch (e) {
|
|
85
|
+
log.error({ err: e }, "Embedder API Error");
|
|
86
|
+
return new Response("Internal Server Error", { status: 500 });
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return new Response("Not Found", { status: 404 });
|
|
91
|
+
},
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
log.info(`🚀 Vector Daemon listening on http://localhost:${PORT}`);
|
|
95
|
+
|
|
96
|
+
// 3. Start The Watcher (Active Custodian)
|
|
97
|
+
startWatcher();
|
|
98
|
+
|
|
99
|
+
// Handle cleanup
|
|
100
|
+
process.on("SIGTERM", () => {
|
|
101
|
+
log.info("🛑 Received SIGTERM, shutting down...");
|
|
102
|
+
process.exit(0);
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// --- Watcher Logic ---
|
|
107
|
+
|
|
108
|
+
let debounceTimer: ReturnType<typeof setTimeout> | null = null;
|
|
109
|
+
const DEBOUNCE_MS = 2000;
|
|
110
|
+
const pendingFiles = new Set<string>();
|
|
111
|
+
|
|
112
|
+
// Retry queue: Track failed ingestions with attempt counts
|
|
113
|
+
const retryQueue = new Map<
|
|
114
|
+
string,
|
|
115
|
+
{ attempts: number; lastError: string; lastAttempt: number }
|
|
116
|
+
>();
|
|
117
|
+
const MAX_RETRIES = 3;
|
|
118
|
+
const RETRY_BACKOFF_MS = 5000; // Wait 5 seconds before retry
|
|
119
|
+
|
|
120
|
+
function startWatcher() {
|
|
121
|
+
// Dynamically load watch targets from settings
|
|
122
|
+
const rawSources = settings.paths.sources.experience;
|
|
123
|
+
const dirsToWatch = rawSources.map((s) => s.path);
|
|
124
|
+
|
|
125
|
+
log.info({ triggers: dirsToWatch }, "👀 Watching directories");
|
|
126
|
+
|
|
127
|
+
dirsToWatch.forEach((dir) => {
|
|
128
|
+
const path = join(process.cwd(), dir);
|
|
129
|
+
try {
|
|
130
|
+
watch(path, { recursive: true }, (event, filename) => {
|
|
131
|
+
// Ignore dotfiles and ensure markdown
|
|
132
|
+
if (filename && !filename.startsWith(".") && filename.endsWith(".md")) {
|
|
133
|
+
log.debug(
|
|
134
|
+
{ file: `${dir}/${filename}`, event },
|
|
135
|
+
"📝 Change detected",
|
|
136
|
+
);
|
|
137
|
+
|
|
138
|
+
// Add full path to pending set
|
|
139
|
+
const fullPath = join(process.cwd(), dir, filename);
|
|
140
|
+
pendingFiles.add(fullPath);
|
|
141
|
+
|
|
142
|
+
triggerIngestion();
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
} catch (e) {
|
|
146
|
+
log.warn({ dir, err: e }, "⚠️ Could not watch directory");
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function triggerIngestion() {
|
|
152
|
+
if (debounceTimer) {
|
|
153
|
+
clearTimeout(debounceTimer);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
debounceTimer = setTimeout(async () => {
|
|
157
|
+
const batchSize = pendingFiles.size;
|
|
158
|
+
if (batchSize === 0) return;
|
|
159
|
+
|
|
160
|
+
log.info({ batchSize }, "🔄 Debounce settle. Starting Batch Ingestion...");
|
|
161
|
+
|
|
162
|
+
// Drain the set
|
|
163
|
+
const batch = Array.from(pendingFiles);
|
|
164
|
+
pendingFiles.clear();
|
|
165
|
+
|
|
166
|
+
try {
|
|
167
|
+
// Re-instantiate DB/Ingestor for fresh context
|
|
168
|
+
const ingestor = new Ingestor();
|
|
169
|
+
|
|
170
|
+
// OPTIMIZATION: Pass only the changed files
|
|
171
|
+
await ingestor.run({ files: batch });
|
|
172
|
+
|
|
173
|
+
log.info("✅ Batch Ingestion Complete.");
|
|
174
|
+
// Clear retry counts for successful files
|
|
175
|
+
for (const file of batch) {
|
|
176
|
+
retryQueue.delete(file);
|
|
177
|
+
}
|
|
178
|
+
await notify("PolyVis Resonance", `Graph Updated (${batchSize} files).`);
|
|
179
|
+
} catch (e) {
|
|
180
|
+
const errorMsg = e instanceof Error ? e.message : String(e);
|
|
181
|
+
log.error({ err: e }, "❌ Ingestion Failed");
|
|
182
|
+
|
|
183
|
+
// Re-queue failed files with retry logic
|
|
184
|
+
const now = Date.now();
|
|
185
|
+
for (const file of batch) {
|
|
186
|
+
const retryInfo = retryQueue.get(file) || {
|
|
187
|
+
attempts: 0,
|
|
188
|
+
lastError: "",
|
|
189
|
+
lastAttempt: 0,
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
if (retryInfo.attempts < MAX_RETRIES) {
|
|
193
|
+
// Re-queue with exponential backoff
|
|
194
|
+
const nextAttempt = retryInfo.attempts + 1;
|
|
195
|
+
retryQueue.set(file, {
|
|
196
|
+
attempts: nextAttempt,
|
|
197
|
+
lastError: errorMsg,
|
|
198
|
+
lastAttempt: now,
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
// Re-add to pending files after backoff delay
|
|
202
|
+
setTimeout(() => {
|
|
203
|
+
pendingFiles.add(file);
|
|
204
|
+
triggerIngestion();
|
|
205
|
+
}, RETRY_BACKOFF_MS * nextAttempt);
|
|
206
|
+
|
|
207
|
+
log.warn(
|
|
208
|
+
{
|
|
209
|
+
file,
|
|
210
|
+
attempt: nextAttempt,
|
|
211
|
+
max: MAX_RETRIES,
|
|
212
|
+
delayMs: RETRY_BACKOFF_MS * nextAttempt,
|
|
213
|
+
},
|
|
214
|
+
"🔄 Scheduling Retry",
|
|
215
|
+
);
|
|
216
|
+
} else {
|
|
217
|
+
// Abandon after max retries
|
|
218
|
+
log.error(
|
|
219
|
+
{ file, lastError: retryInfo.lastError },
|
|
220
|
+
"⛔ ABANDONED: File failed max retries",
|
|
221
|
+
);
|
|
222
|
+
retryQueue.delete(file); // Remove from tracking
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
await notify(
|
|
227
|
+
"PolyVis Resonance",
|
|
228
|
+
`Ingestion Failed (${batch.length} files will retry)`,
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
}, DEBOUNCE_MS);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// --- Dispatch ---
|
|
235
|
+
|
|
236
|
+
await lifecycle.run(command, main);
|
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
import type { Database } from "bun:sqlite";
|
|
2
|
+
import { getLogger } from "@src/utils/Logger";
|
|
3
|
+
import settings from "@/polyvis.settings.json";
|
|
4
|
+
import { DatabaseFactory } from "./DatabaseFactory";
|
|
5
|
+
import { CURRENT_SCHEMA_VERSION, MIGRATIONS } from "./schema";
|
|
6
|
+
|
|
7
|
+
const log = getLogger("ResonanceDB");
|
|
8
|
+
|
|
9
|
+
// Types matching Schema
|
|
10
|
+
export interface Node {
|
|
11
|
+
id: string;
|
|
12
|
+
type: string;
|
|
13
|
+
label?: string; // stored as 'title'
|
|
14
|
+
content?: string;
|
|
15
|
+
domain?: string;
|
|
16
|
+
layer?: string;
|
|
17
|
+
embedding?: Float32Array;
|
|
18
|
+
hash?: string;
|
|
19
|
+
meta?: Record<string, unknown>; // JSON object for flexible metadata
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export class ResonanceDB {
|
|
23
|
+
private db: Database;
|
|
24
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: May be used for debugging/diagnostics
|
|
25
|
+
private dbPath: string;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Factory method to load the default Resonance Graph based on settings.
|
|
29
|
+
*/
|
|
30
|
+
static init(): ResonanceDB {
|
|
31
|
+
return new ResonanceDB(settings.paths.database.resonance);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* @param dbPath - Absolute path to the SQLite database file
|
|
36
|
+
*
|
|
37
|
+
* Note: ResonanceDB always opens in read-write mode regardless of any options.
|
|
38
|
+
* WAL mode requires write access to the -shm (shared memory) file even for readers.
|
|
39
|
+
*/
|
|
40
|
+
constructor(dbPath: string) {
|
|
41
|
+
this.dbPath = dbPath;
|
|
42
|
+
// Use DatabaseFactory to ensure compliant configuration (WAL mode + timeouts)
|
|
43
|
+
// Always read-write: WAL mode requires all connections to have write access to -shm file
|
|
44
|
+
this.db = DatabaseFactory.connect(dbPath, { readonly: false });
|
|
45
|
+
|
|
46
|
+
// Always check migration (it's safe now with locking)
|
|
47
|
+
this.migrate();
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
private migrate() {
|
|
51
|
+
const row = this.db.query("PRAGMA user_version").get() as {
|
|
52
|
+
user_version: number;
|
|
53
|
+
};
|
|
54
|
+
let currentVersion = row.user_version;
|
|
55
|
+
|
|
56
|
+
// Backward Compatibility for existing unversioned DBs
|
|
57
|
+
if (currentVersion === 0) {
|
|
58
|
+
const tables = this.db
|
|
59
|
+
.query(
|
|
60
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='nodes'",
|
|
61
|
+
)
|
|
62
|
+
.get();
|
|
63
|
+
if (tables) {
|
|
64
|
+
// DB exists but has version 0. Detect schema state.
|
|
65
|
+
const cols = this.db.query("PRAGMA table_info(nodes)").all() as {
|
|
66
|
+
name: string;
|
|
67
|
+
}[];
|
|
68
|
+
const hasHash = cols.some((c) => c.name === "hash");
|
|
69
|
+
const hasMeta = cols.some((c) => c.name === "meta");
|
|
70
|
+
|
|
71
|
+
if (hasHash && hasMeta) {
|
|
72
|
+
currentVersion = 3;
|
|
73
|
+
} else if (hasHash) {
|
|
74
|
+
currentVersion = 2;
|
|
75
|
+
} else {
|
|
76
|
+
currentVersion = 1;
|
|
77
|
+
}
|
|
78
|
+
// Update the version on the file so we don't guess next time
|
|
79
|
+
this.db.run(`PRAGMA user_version = ${currentVersion}`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (currentVersion >= CURRENT_SCHEMA_VERSION) return;
|
|
84
|
+
|
|
85
|
+
log.info(
|
|
86
|
+
`📦 ResonanceDB: Migrating from v${currentVersion} to v${CURRENT_SCHEMA_VERSION}...`,
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
for (const migration of MIGRATIONS) {
|
|
90
|
+
if (migration.version > currentVersion) {
|
|
91
|
+
// console.log(` Running Migration v${migration.version}: ${migration.description}`);
|
|
92
|
+
if (migration.sql) {
|
|
93
|
+
this.db.run(migration.sql);
|
|
94
|
+
}
|
|
95
|
+
if (migration.up) {
|
|
96
|
+
migration.up(this.db);
|
|
97
|
+
}
|
|
98
|
+
this.db.run(`PRAGMA user_version = ${migration.version}`);
|
|
99
|
+
currentVersion = migration.version;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
insertNode(node: Node) {
|
|
105
|
+
// No inline migrations here anymore!
|
|
106
|
+
|
|
107
|
+
const stmt = this.db.prepare(`
|
|
108
|
+
INSERT OR REPLACE INTO nodes (id, type, title, content, domain, layer, embedding, hash, meta)
|
|
109
|
+
VALUES ($id, $type, $title, $content, $domain, $layer, $embedding, $hash, $meta)
|
|
110
|
+
`);
|
|
111
|
+
|
|
112
|
+
try {
|
|
113
|
+
// FAFCAS Protocol: Trust pre-normalized embeddings from Embedder/VectorEngine
|
|
114
|
+
// Embeddings are already normalized at generation boundary
|
|
115
|
+
const blob = node.embedding
|
|
116
|
+
? new Uint8Array(
|
|
117
|
+
node.embedding.buffer,
|
|
118
|
+
node.embedding.byteOffset,
|
|
119
|
+
node.embedding.byteLength,
|
|
120
|
+
)
|
|
121
|
+
: null;
|
|
122
|
+
|
|
123
|
+
stmt.run({
|
|
124
|
+
$id: String(node.id),
|
|
125
|
+
$type: String(node.type),
|
|
126
|
+
$title: node.label ? String(node.label) : null,
|
|
127
|
+
$content: node.content ? String(node.content) : null,
|
|
128
|
+
$domain: String(node.domain || "knowledge"),
|
|
129
|
+
$layer: String(node.layer || "experience"),
|
|
130
|
+
$embedding: blob,
|
|
131
|
+
$hash: node.hash ? String(node.hash) : null,
|
|
132
|
+
$meta: node.meta ? JSON.stringify(node.meta) : null,
|
|
133
|
+
});
|
|
134
|
+
} catch (err) {
|
|
135
|
+
log.error(
|
|
136
|
+
{
|
|
137
|
+
err,
|
|
138
|
+
id: node.id,
|
|
139
|
+
blobSize: node.embedding ? node.embedding.byteLength : 0,
|
|
140
|
+
blobType: node.embedding
|
|
141
|
+
? node.embedding instanceof Float32Array
|
|
142
|
+
? "F32"
|
|
143
|
+
: "Other"
|
|
144
|
+
: "Null",
|
|
145
|
+
},
|
|
146
|
+
"❌ Failed to insert node",
|
|
147
|
+
);
|
|
148
|
+
throw err;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
getRawDb(): Database {
|
|
153
|
+
return this.db;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
insertEdge(source: string, target: string, type: string = "related_to") {
|
|
157
|
+
this.db.run(
|
|
158
|
+
`
|
|
159
|
+
INSERT OR IGNORE INTO edges (source, target, type)
|
|
160
|
+
VALUES (?, ?, ?)
|
|
161
|
+
`,
|
|
162
|
+
[source, target, type],
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Insert a semantic edge with confidence and veracity metadata.
|
|
168
|
+
* Used by the Sieve+Net harvester for extracted triples.
|
|
169
|
+
*/
|
|
170
|
+
insertSemanticEdge(
|
|
171
|
+
source: string,
|
|
172
|
+
target: string,
|
|
173
|
+
type: string,
|
|
174
|
+
confidence: number = 1.0,
|
|
175
|
+
veracity: number = 1.0,
|
|
176
|
+
contextSource?: string,
|
|
177
|
+
) {
|
|
178
|
+
this.db.run(
|
|
179
|
+
`
|
|
180
|
+
INSERT INTO edges (source, target, type, confidence, veracity, context_source)
|
|
181
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
182
|
+
ON CONFLICT(source, target, type) DO UPDATE SET
|
|
183
|
+
confidence = excluded.confidence,
|
|
184
|
+
veracity = excluded.veracity,
|
|
185
|
+
context_source = excluded.context_source
|
|
186
|
+
`,
|
|
187
|
+
[source, target, type, confidence, veracity, contextSource ?? null],
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Typed Data Accessors
|
|
192
|
+
|
|
193
|
+
// Typed Data Accessors
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Fetch nodes with Safe Limits.
|
|
197
|
+
* @param options.excludeContent If true, skips 'content' and 'embedding' columns (Large BLOBs).
|
|
198
|
+
*/
|
|
199
|
+
getNodes(
|
|
200
|
+
options: {
|
|
201
|
+
domain?: string;
|
|
202
|
+
type?: string;
|
|
203
|
+
limit?: number;
|
|
204
|
+
offset?: number;
|
|
205
|
+
excludeContent?: boolean; // FAFCAS: Optimization for metadata scans
|
|
206
|
+
} = {},
|
|
207
|
+
): Node[] {
|
|
208
|
+
// Safe Default Limit? User specifically asked to fix unbounded.
|
|
209
|
+
// But for backward compatibility with scripts that expect everything, we might need a high limit or explicit 'unlimited' flag.
|
|
210
|
+
// Let's default to unlimited IF not specified, but strongly encourage limits in docs.
|
|
211
|
+
// Actually, 'sloppy code' implies implicit SELECT * is bad.
|
|
212
|
+
// Let's implement options but keep default behavior 'all' to avoid breaking existing logic silently,
|
|
213
|
+
// BUT we will log a warning if count > 1000 and no limit?
|
|
214
|
+
// No, let's just implement the capabilities first. callers must opt-in to limits.
|
|
215
|
+
|
|
216
|
+
const cols = options.excludeContent
|
|
217
|
+
? "id, type, title, domain, layer, hash, meta" // No content, No embedding
|
|
218
|
+
: "*";
|
|
219
|
+
|
|
220
|
+
let sql = `SELECT ${cols} FROM nodes WHERE 1=1`;
|
|
221
|
+
const params: (string | number)[] = [];
|
|
222
|
+
|
|
223
|
+
if (options.domain) {
|
|
224
|
+
sql += " AND domain = ?";
|
|
225
|
+
params.push(options.domain);
|
|
226
|
+
}
|
|
227
|
+
if (options.type) {
|
|
228
|
+
sql += " AND type = ?";
|
|
229
|
+
params.push(options.type);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (options.limit) {
|
|
233
|
+
sql += " LIMIT ?";
|
|
234
|
+
params.push(options.limit);
|
|
235
|
+
}
|
|
236
|
+
if (options.offset) {
|
|
237
|
+
sql += " OFFSET ?";
|
|
238
|
+
params.push(options.offset);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const rows = this.db.query(sql).all(...params) as Record<string, unknown>[];
|
|
242
|
+
return rows.map((row) => this.mapRowToNode(row));
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
getLexicon(): {
|
|
246
|
+
id: string;
|
|
247
|
+
label: string;
|
|
248
|
+
aliases: string[];
|
|
249
|
+
definition: string;
|
|
250
|
+
}[] {
|
|
251
|
+
// Optimized: Exclude content/embedding since we only need ID, Title, Aliases
|
|
252
|
+
const _sql =
|
|
253
|
+
"SELECT id, title, meta, content FROM nodes WHERE domain = 'lexicon' AND type = 'concept'";
|
|
254
|
+
// Note: content is 'definition', usually small. Keep it. Embedding is big.
|
|
255
|
+
// Actually, let's select specific columns to avoid embedding blob
|
|
256
|
+
|
|
257
|
+
const rows = this.db
|
|
258
|
+
.query(
|
|
259
|
+
"SELECT id, title, meta, content FROM nodes WHERE domain = 'lexicon' AND type = 'concept'",
|
|
260
|
+
)
|
|
261
|
+
.all() as { id: string; title: string; meta: string; content: string }[];
|
|
262
|
+
|
|
263
|
+
return rows.map((row) => {
|
|
264
|
+
const meta = row.meta ? JSON.parse(row.meta) : {};
|
|
265
|
+
return {
|
|
266
|
+
id: row.id,
|
|
267
|
+
label: row.title,
|
|
268
|
+
aliases: meta.aliases || [],
|
|
269
|
+
definition: row.content,
|
|
270
|
+
...meta,
|
|
271
|
+
};
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// biome-ignore lint/suspicious/noExplicitAny: row is raw DB result
|
|
276
|
+
private mapRowToNode(row: any): Node {
|
|
277
|
+
return {
|
|
278
|
+
id: row.id,
|
|
279
|
+
type: row.type,
|
|
280
|
+
label: row.title,
|
|
281
|
+
content: row.content, // May be undefined if excluded
|
|
282
|
+
domain: row.domain,
|
|
283
|
+
layer: row.layer,
|
|
284
|
+
// Only hydrate embedding if it exists (was selected)
|
|
285
|
+
embedding: row.embedding
|
|
286
|
+
? new Float32Array(
|
|
287
|
+
row.embedding.buffer,
|
|
288
|
+
row.embedding.byteOffset,
|
|
289
|
+
row.embedding.byteLength / 4,
|
|
290
|
+
)
|
|
291
|
+
: undefined,
|
|
292
|
+
hash: row.hash,
|
|
293
|
+
meta: row.meta ? JSON.parse(row.meta) : {},
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
getNodeHash(id: string): string | null {
|
|
298
|
+
const row = this.db
|
|
299
|
+
.prepare("SELECT hash FROM nodes WHERE id = ?")
|
|
300
|
+
.get(id) as { hash: string } | undefined;
|
|
301
|
+
return row ? row.hash : null;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
getStats() {
|
|
305
|
+
const nodesCount = (
|
|
306
|
+
this.db.query("SELECT COUNT(*) as c FROM nodes").get() as { c: number }
|
|
307
|
+
).c;
|
|
308
|
+
const edgesCount = (
|
|
309
|
+
this.db.query("SELECT COUNT(*) as c FROM edges").get() as { c: number }
|
|
310
|
+
).c;
|
|
311
|
+
const vectorsCount = (
|
|
312
|
+
this.db
|
|
313
|
+
.query("SELECT COUNT(*) as c FROM nodes WHERE embedding IS NOT NULL")
|
|
314
|
+
.get() as { c: number }
|
|
315
|
+
).c;
|
|
316
|
+
const semanticTokensCount = (
|
|
317
|
+
this.db
|
|
318
|
+
.query(
|
|
319
|
+
"SELECT COUNT(*) as c FROM nodes WHERE meta LIKE '%semantic_tokens%'",
|
|
320
|
+
)
|
|
321
|
+
.get() as { c: number }
|
|
322
|
+
).c;
|
|
323
|
+
|
|
324
|
+
return {
|
|
325
|
+
nodes: nodesCount,
|
|
326
|
+
edges: edgesCount,
|
|
327
|
+
vectors: vectorsCount,
|
|
328
|
+
semantic_tokens: semanticTokensCount,
|
|
329
|
+
db_size_bytes:
|
|
330
|
+
(this.db.query("PRAGMA page_count").get() as { page_count: number })
|
|
331
|
+
.page_count *
|
|
332
|
+
(this.db.query("PRAGMA page_size").get() as { page_size: number })
|
|
333
|
+
.page_size,
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
getNodesByType(type: string): Node[] {
|
|
338
|
+
// Forward to new method
|
|
339
|
+
return this.getNodes({ type });
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// searchText method removed (Hollow Node Simplification)
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Transaction Management
|
|
346
|
+
* Wraps database operations in transactions for atomicity and performance
|
|
347
|
+
*/
|
|
348
|
+
beginTransaction() {
|
|
349
|
+
this.db.run("BEGIN TRANSACTION");
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
commit() {
|
|
353
|
+
this.db.run("COMMIT");
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
rollback() {
|
|
357
|
+
this.db.run("ROLLBACK");
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
close() {
|
|
361
|
+
this.db.close();
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
checkpoint() {
|
|
365
|
+
this.db.run("PRAGMA wal_checkpoint(TRUNCATE);");
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// Helper: Calculate magnitude (L2 norm) of a vector
|
|
370
|
+
function magnitude(vec: Float32Array): number {
|
|
371
|
+
let sum = 0;
|
|
372
|
+
for (let i = 0; i < vec.length; i++) {
|
|
373
|
+
sum += (vec[i] || 0) * (vec[i] || 0);
|
|
374
|
+
}
|
|
375
|
+
return Math.sqrt(sum);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// FAFCAS Protocol: use Dot Product for normalized vectors
|
|
379
|
+
// Source: playbooks/embeddings-and-fafcas-protocol-playbook.md
|
|
380
|
+
//
|
|
381
|
+
// Returns 0 for zero-magnitude vectors (failed embeddings) to prevent
|
|
382
|
+
// false matches in search results.
|
|
383
|
+
export function dotProduct(a: Float32Array, b: Float32Array): number {
|
|
384
|
+
// Check for zero vectors (failed embeddings)
|
|
385
|
+
const magA = magnitude(a);
|
|
386
|
+
const magB = magnitude(b);
|
|
387
|
+
|
|
388
|
+
if (magA < 1e-6 || magB < 1e-6) {
|
|
389
|
+
// log.warn("⚠️ Zero vector detected in dot product, skipping comparison"); // Too noisy for tight loops
|
|
390
|
+
return 0;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
let sum = 0;
|
|
394
|
+
// Modern JS engines SIMD-optimize this loop automatically
|
|
395
|
+
for (let i = 0; i < a.length; i++) {
|
|
396
|
+
sum += (a[i] || 0) * (b[i] || 0);
|
|
397
|
+
}
|
|
398
|
+
return sum;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Source: playbooks/embeddings-and-fafcas-protocol-playbook.md
|
|
402
|
+
export function toFafcas(vector: Float32Array): Uint8Array {
|
|
403
|
+
// 1. Calculate Magnitude (L2 Norm)
|
|
404
|
+
let sum = 0;
|
|
405
|
+
for (let i = 0; i < vector.length; i++) {
|
|
406
|
+
const val = vector[i] || 0;
|
|
407
|
+
sum += val * val;
|
|
408
|
+
}
|
|
409
|
+
const magnitude = Math.sqrt(sum);
|
|
410
|
+
|
|
411
|
+
// 2. Normalize (Divide by Magnitude)
|
|
412
|
+
// Optimization: If magnitude is 0, return zero vector
|
|
413
|
+
if (magnitude > 1e-6) {
|
|
414
|
+
for (let i = 0; i < vector.length; i++) {
|
|
415
|
+
const val = vector[i] || 0;
|
|
416
|
+
vector[i] = val / magnitude;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// 3. Serialize to Raw Bytes (FAFCAS Blob)
|
|
421
|
+
return new Uint8Array(vector.buffer, vector.byteOffset, vector.byteLength);
|
|
422
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# 🔄 Resonance Pipeline
|
|
2
|
+
|
|
3
|
+
Data processing and extraction steps for the Resonance Engine.
|
|
4
|
+
|
|
5
|
+
## Contents
|
|
6
|
+
- **`extract.ts`**: Extracts high-value terms from the knowledge graph for frontend use.
|
|
7
|
+
- **`transform_docs.ts`**: Prepares markdown documents for ingestion.
|