amalfa 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +226 -247
- package/amalfa.config.example.ts +8 -6
- package/docs/AGENT-METADATA-PATTERNS.md +1021 -0
- package/docs/CONFIG_E2E_VALIDATION.md +147 -0
- package/docs/CONFIG_UNIFICATION.md +187 -0
- package/docs/CONFIG_VALIDATION.md +103 -0
- package/docs/LEGACY_DEPRECATION.md +174 -0
- package/docs/MCP_SETUP.md +317 -0
- package/docs/QUICK_START_MCP.md +168 -0
- package/docs/SESSION-2026-01-06-METADATA-PATTERNS.md +346 -0
- package/docs/SETUP.md +464 -0
- package/docs/SETUP_COMPLETE.md +464 -0
- package/docs/VISION-AGENT-LEARNING.md +1242 -0
- package/docs/_current-config-status.md +93 -0
- package/package.json +6 -3
- package/polyvis.settings.json.bak +38 -0
- package/src/cli.ts +159 -31
- package/src/config/defaults.ts +73 -15
- package/src/core/VectorEngine.ts +18 -9
- package/src/daemon/index.ts +12 -8
- package/src/mcp/index.ts +62 -7
- package/src/pipeline/AmalfaIngestor.ts +22 -12
- package/src/pipeline/PreFlightAnalyzer.ts +434 -0
- package/src/resonance/DatabaseFactory.ts +3 -4
- package/src/resonance/db.ts +8 -6
- package/src/resonance/schema.ts +19 -1
- package/src/resonance/services/vector-daemon.ts +151 -0
- package/src/utils/DaemonManager.ts +147 -0
- package/src/utils/ZombieDefense.ts +5 -1
- package/:memory: +0 -0
- package/:memory:-shm +0 -0
- package/:memory:-wal +0 -0
- package/README.old.md +0 -112
- package/agents.config.json +0 -11
- package/drizzle/0000_minor_iron_fist.sql +0 -19
- package/drizzle/meta/0000_snapshot.json +0 -139
- package/drizzle/meta/_journal.json +0 -13
- package/example_usage.ts +0 -39
- package/experiment.sh +0 -35
- package/hello +0 -2
- package/index.html +0 -52
- package/knowledge/excalibur.md +0 -12
- package/plans/experience-graph-integration.md +0 -60
- package/prompts/gemini-king-mode-prompt.md +0 -46
- package/public/docs/MCP_TOOLS.md +0 -372
- package/schemas/README.md +0 -20
- package/schemas/cda.schema.json +0 -84
- package/schemas/conceptual-lexicon.schema.json +0 -75
- package/scratchpads/dummy-debrief-boxed.md +0 -39
- package/scratchpads/dummy-debrief.md +0 -27
- package/scratchpads/scratchpad-design.md +0 -50
- package/scratchpads/scratchpad-scrolling.md +0 -20
- package/scratchpads/scratchpad-toc-disappearance.md +0 -23
- package/scratchpads/scratchpad-toc.md +0 -28
- package/scratchpads/test_gardener.md +0 -7
- package/src/core/LLMClient.ts +0 -93
- package/src/core/TagEngine.ts +0 -56
- package/src/db/schema.ts +0 -46
- package/src/gardeners/AutoTagger.ts +0 -116
- package/src/pipeline/HarvesterPipeline.ts +0 -101
- package/src/pipeline/Ingestor.ts +0 -555
- package/src/resonance/cli/ingest.ts +0 -41
- package/src/resonance/cli/migrate.ts +0 -54
- package/src/resonance/config.ts +0 -40
- package/src/resonance/daemon.ts +0 -236
- package/src/resonance/pipeline/extract.ts +0 -89
- package/src/resonance/pipeline/transform_docs.ts +0 -60
- package/src/resonance/services/tokenizer.ts +0 -159
- package/src/resonance/transform/cda.ts +0 -393
- package/src/utils/EnvironmentVerifier.ts +0 -67
- package/substack/substack-playbook-1.md +0 -95
- package/substack/substack-playbook-2.md +0 -78
- package/tasks/ui-investigation.md +0 -26
- package/test-db +0 -0
- package/test-db-shm +0 -0
- package/test-db-wal +0 -0
- package/tests/canary/verify_pinch_check.ts +0 -44
- package/tests/fixtures/ingest_test.md +0 -12
- package/tests/fixtures/ingest_test_boxed.md +0 -13
- package/tests/fixtures/safety_test.md +0 -45
- package/tests/fixtures/safety_test_boxed.md +0 -49
- package/tests/fixtures/tagged_output.md +0 -49
- package/tests/fixtures/tagged_test.md +0 -49
- package/tests/mcp-server-settings.json +0 -8
- package/verify-embedder.ts +0 -54
package/src/pipeline/Ingestor.ts
DELETED
|
@@ -1,555 +0,0 @@
|
|
|
1
|
-
import type { Database } from "bun:sqlite";
|
|
2
|
-
import { join } from "node:path";
|
|
3
|
-
import { EdgeWeaver } from "@src/core/EdgeWeaver";
|
|
4
|
-
import { LouvainGate } from "@src/core/LouvainGate";
|
|
5
|
-
import { DatabaseFactory } from "@src/resonance/DatabaseFactory";
|
|
6
|
-
// Types
|
|
7
|
-
import type { Node } from "@src/resonance/db";
|
|
8
|
-
import { ResonanceDB } from "@src/resonance/db";
|
|
9
|
-
import { Embedder } from "@src/resonance/services/embedder";
|
|
10
|
-
import { TokenizerService } from "@src/resonance/services/tokenizer";
|
|
11
|
-
import { getLogger } from "@src/utils/Logger";
|
|
12
|
-
import { PipelineValidator } from "@src/utils/validator";
|
|
13
|
-
import { Glob } from "bun";
|
|
14
|
-
import settings from "@/polyvis.settings.json";
|
|
15
|
-
|
|
16
|
-
export interface IngestorOptions {
|
|
17
|
-
file?: string;
|
|
18
|
-
files?: string[];
|
|
19
|
-
dir?: string;
|
|
20
|
-
dbPath?: string;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
interface LexiconItem {
|
|
24
|
-
id: string;
|
|
25
|
-
title: string;
|
|
26
|
-
aliases: string[];
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
interface IngestionStats {
|
|
30
|
-
nodes: number;
|
|
31
|
-
edges: number;
|
|
32
|
-
vectors: number;
|
|
33
|
-
semantic_tokens: number;
|
|
34
|
-
db_size_bytes: number;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
interface RawLexiconItem {
|
|
38
|
-
id: string;
|
|
39
|
-
label: string;
|
|
40
|
-
aliases?: string[];
|
|
41
|
-
description?: string;
|
|
42
|
-
category?: string;
|
|
43
|
-
tags?: string[];
|
|
44
|
-
title?: string; // Sometimes source JSON uses title
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
export class Ingestor {
|
|
48
|
-
private db: ResonanceDB;
|
|
49
|
-
private embedder: Embedder;
|
|
50
|
-
private tokenizer: TokenizerService;
|
|
51
|
-
private dbPath: string;
|
|
52
|
-
private log = getLogger("Ingestor");
|
|
53
|
-
|
|
54
|
-
constructor(dbPath?: string) {
|
|
55
|
-
this.dbPath = dbPath
|
|
56
|
-
? join(process.cwd(), dbPath)
|
|
57
|
-
: join(process.cwd(), settings.paths.database.resonance);
|
|
58
|
-
|
|
59
|
-
this.db = new ResonanceDB(this.dbPath);
|
|
60
|
-
this.embedder = Embedder.getInstance();
|
|
61
|
-
this.tokenizer = TokenizerService.getInstance();
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
public getEmbedder(): Embedder {
|
|
65
|
-
return this.embedder;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
/**
|
|
69
|
-
* UNIFIED PIPELINE RUNNER
|
|
70
|
-
* Runs both Persona and Experience pipelines.
|
|
71
|
-
*/
|
|
72
|
-
async run(options: IngestorOptions = {}) {
|
|
73
|
-
const sqliteDb = await this.init(options);
|
|
74
|
-
|
|
75
|
-
// Phase 1: Persona
|
|
76
|
-
const lexiconItems = await this.runPersona();
|
|
77
|
-
|
|
78
|
-
// Phase 2: Experience
|
|
79
|
-
await this.runExperience(options, lexiconItems, sqliteDb);
|
|
80
|
-
|
|
81
|
-
this.cleanup(sqliteDb);
|
|
82
|
-
return true; // Simplified success check
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
/**
|
|
86
|
-
* PHASE 1: PERSONA
|
|
87
|
-
* Ingests Core Ontology (Lexicon) and Directives (CDA).
|
|
88
|
-
*/
|
|
89
|
-
async runPersona(): Promise<LexiconItem[]> {
|
|
90
|
-
this.log.info("🧩 [Phase 1] Starting Persona Ingestion...");
|
|
91
|
-
|
|
92
|
-
this.db.beginTransaction();
|
|
93
|
-
try {
|
|
94
|
-
// 1. Lexicon
|
|
95
|
-
const lexicon = await this.bootstrapLexicon();
|
|
96
|
-
|
|
97
|
-
// 2. CDA
|
|
98
|
-
await this.ingestCDA();
|
|
99
|
-
|
|
100
|
-
this.db.commit();
|
|
101
|
-
this.log.info("✅ [Phase 1] Persona Ingestion Complete.");
|
|
102
|
-
return lexicon;
|
|
103
|
-
} catch (e) {
|
|
104
|
-
this.db.rollback();
|
|
105
|
-
this.log.error(
|
|
106
|
-
{ err: e },
|
|
107
|
-
"❌ [Phase 1] Persona Ingestion Failed, rolled back.",
|
|
108
|
-
);
|
|
109
|
-
throw e;
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
/**
|
|
114
|
-
* PHASE 2: EXPERIENCE
|
|
115
|
-
* Ingests Documents, Playbooks, and Debriefs.
|
|
116
|
-
*/
|
|
117
|
-
async runExperience(
|
|
118
|
-
options: IngestorOptions,
|
|
119
|
-
lexicon: LexiconItem[] = [],
|
|
120
|
-
sqliteDb: Database,
|
|
121
|
-
) {
|
|
122
|
-
this.log.info("📚 [Phase 2] Starting Experience Ingestion...");
|
|
123
|
-
|
|
124
|
-
// REMOVED: Global Transaction (prevents 100% data loss on 99% crash)
|
|
125
|
-
try {
|
|
126
|
-
// If lexicon is empty (e.g. running independently), try loading from DB
|
|
127
|
-
if (lexicon.length === 0) {
|
|
128
|
-
lexicon = await this.loadLexiconFromDB();
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
const weaver = new EdgeWeaver(this.db, lexicon);
|
|
132
|
-
const filesToProcess = this.getFilesToProcess(options);
|
|
133
|
-
|
|
134
|
-
// Process
|
|
135
|
-
const startTime = performance.now();
|
|
136
|
-
let totalChars = 0;
|
|
137
|
-
let processedCount = 0;
|
|
138
|
-
const BATCH_SIZE = 50;
|
|
139
|
-
|
|
140
|
-
for (const [i, fileEntry] of filesToProcess.entries()) {
|
|
141
|
-
// Start Batch Transaction
|
|
142
|
-
if (i % BATCH_SIZE === 0) {
|
|
143
|
-
this.db.beginTransaction();
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
const charsProccessed = await this.processFile(
|
|
147
|
-
fileEntry,
|
|
148
|
-
this.db,
|
|
149
|
-
this.embedder,
|
|
150
|
-
weaver,
|
|
151
|
-
this.tokenizer,
|
|
152
|
-
);
|
|
153
|
-
totalChars += charsProccessed;
|
|
154
|
-
processedCount++;
|
|
155
|
-
|
|
156
|
-
// Commit Batch Transaction
|
|
157
|
-
if ((i + 1) % BATCH_SIZE === 0 || i === filesToProcess.length - 1) {
|
|
158
|
-
this.db.commit();
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
const endTime = performance.now();
|
|
163
|
-
const durationSec = (endTime - startTime) / 1000;
|
|
164
|
-
const charsPerSec = totalChars / durationSec;
|
|
165
|
-
const dbStats = this.db.getStats();
|
|
166
|
-
|
|
167
|
-
this.logStats(
|
|
168
|
-
processedCount,
|
|
169
|
-
totalChars,
|
|
170
|
-
durationSec,
|
|
171
|
-
charsPerSec,
|
|
172
|
-
dbStats,
|
|
173
|
-
);
|
|
174
|
-
|
|
175
|
-
// Weaving (Protected by its own transaction)
|
|
176
|
-
this.db.beginTransaction();
|
|
177
|
-
try {
|
|
178
|
-
await this.runWeavers();
|
|
179
|
-
this.db.commit();
|
|
180
|
-
} catch (e) {
|
|
181
|
-
this.db.rollback();
|
|
182
|
-
this.log.error({ err: e }, "❌ Weaving failed, partial rollback");
|
|
183
|
-
// Continue, as ingestion is primary
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
// Validation (On native SQLite connection for speed/independence)
|
|
187
|
-
// Run AFTER commit so validator sees committed data
|
|
188
|
-
const validator = new PipelineValidator();
|
|
189
|
-
validator.captureBaseline(sqliteDb);
|
|
190
|
-
|
|
191
|
-
// Only run validation if we processed files
|
|
192
|
-
if (processedCount > 0) {
|
|
193
|
-
const report = validator.validate(sqliteDb);
|
|
194
|
-
validator.printReport(report);
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
// ------------------------------------------------------------------
|
|
198
|
-
// 🛡️ IRON-CLAD PERSISTENCE PROTOCOL
|
|
199
|
-
// ------------------------------------------------------------------
|
|
200
|
-
|
|
201
|
-
// 1. Force WAL Checkpoint
|
|
202
|
-
this.log.info("💾 Persistence: Forcing WAL Checkpoint...");
|
|
203
|
-
this.db.getRawDb().run("PRAGMA wal_checkpoint(TRUNCATE);");
|
|
204
|
-
|
|
205
|
-
// 2. Pinch Check (Verification)
|
|
206
|
-
const finalSize = Bun.file(this.dbPath).size; // .size is sync on BunFile
|
|
207
|
-
if (finalSize === 0) {
|
|
208
|
-
throw new Error(
|
|
209
|
-
"CRITICAL: Pinch Check Failed. Database file is 0 bytes.",
|
|
210
|
-
);
|
|
211
|
-
}
|
|
212
|
-
this.log.info(
|
|
213
|
-
{ dbSizeMB: (finalSize / 1024 / 1024).toFixed(2) },
|
|
214
|
-
"✅ Persistence Verified",
|
|
215
|
-
);
|
|
216
|
-
|
|
217
|
-
this.log.info("✅ [Phase 2] Experience Ingestion Complete.");
|
|
218
|
-
} catch (e) {
|
|
219
|
-
// No global rollback available (intentional)
|
|
220
|
-
this.log.error({ err: e }, "❌ [Phase 2] Experience Ingestion Failed.");
|
|
221
|
-
throw e;
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
// --- Lifecycle Helpers ---
|
|
226
|
-
|
|
227
|
-
public async init(_options: IngestorOptions) {
|
|
228
|
-
this.log.info("🌉 <THE BRIDGE> Ingestion Protocol Initiated...");
|
|
229
|
-
// Ensure Embedder Init
|
|
230
|
-
await this.embedder.embed("init");
|
|
231
|
-
// Use Factory for safe validation connection
|
|
232
|
-
// Note: Validation is read-heavy but might need to see WAL updates
|
|
233
|
-
return DatabaseFactory.connect(this.dbPath);
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
public cleanup(sqliteDb: Database) {
|
|
237
|
-
sqliteDb.close();
|
|
238
|
-
this.db.close();
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
private async loadLexiconFromDB(): Promise<LexiconItem[]> {
|
|
242
|
-
this.log.info("🧠 Loading Lexicon from Database...");
|
|
243
|
-
const rawLexicon = this.db.getLexicon();
|
|
244
|
-
const lexicon: LexiconItem[] = rawLexicon.map((item: RawLexiconItem) => ({
|
|
245
|
-
id: item.id,
|
|
246
|
-
title: item.label,
|
|
247
|
-
aliases: item.aliases || [],
|
|
248
|
-
}));
|
|
249
|
-
this.tokenizer.loadLexicon(lexicon);
|
|
250
|
-
return lexicon;
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
private async bootstrapLexicon(): Promise<LexiconItem[]> {
|
|
254
|
-
let lexicon: LexiconItem[] = [];
|
|
255
|
-
try {
|
|
256
|
-
const legacyPath = join(
|
|
257
|
-
process.cwd(),
|
|
258
|
-
settings.paths.sources.persona.lexicon,
|
|
259
|
-
);
|
|
260
|
-
if (legacyPath) {
|
|
261
|
-
const file = Bun.file(legacyPath);
|
|
262
|
-
if (await file.exists()) {
|
|
263
|
-
const json = await file.json();
|
|
264
|
-
if (json) {
|
|
265
|
-
const items = Array.isArray(json) ? json : json.concepts;
|
|
266
|
-
lexicon = (items as RawLexiconItem[]).map((c) => ({
|
|
267
|
-
id: c.id,
|
|
268
|
-
title: c.title || c.label,
|
|
269
|
-
aliases: c.aliases || [],
|
|
270
|
-
}));
|
|
271
|
-
|
|
272
|
-
// Bootstrap Nodes
|
|
273
|
-
for (const item of items) {
|
|
274
|
-
this.db.insertNode({
|
|
275
|
-
id: item.id,
|
|
276
|
-
type: "concept",
|
|
277
|
-
label: item.title || item.label,
|
|
278
|
-
content: item.description || item.title || item.label,
|
|
279
|
-
domain: "persona",
|
|
280
|
-
layer: "ontology",
|
|
281
|
-
meta: {
|
|
282
|
-
category: item.category,
|
|
283
|
-
tags: item.tags,
|
|
284
|
-
},
|
|
285
|
-
} as Node);
|
|
286
|
-
}
|
|
287
|
-
this.log.info({ count: lexicon.length }, "📚 Bootstrapped Lexicon");
|
|
288
|
-
this.tokenizer.loadLexicon(lexicon);
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
} catch (e) {
|
|
293
|
-
this.log.warn({ err: e }, "⚠️ Lexicon bootstrap failed");
|
|
294
|
-
}
|
|
295
|
-
return lexicon;
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
private async ingestCDA() {
|
|
299
|
-
try {
|
|
300
|
-
const enrichedCdaPath = join(
|
|
301
|
-
process.cwd(),
|
|
302
|
-
".resonance",
|
|
303
|
-
"artifacts",
|
|
304
|
-
"cda-enriched.json",
|
|
305
|
-
);
|
|
306
|
-
const cdaFile = Bun.file(enrichedCdaPath);
|
|
307
|
-
|
|
308
|
-
if (await cdaFile.exists()) {
|
|
309
|
-
const enrichedCda = await cdaFile.json();
|
|
310
|
-
let directiveCount = 0;
|
|
311
|
-
|
|
312
|
-
for (const entry of enrichedCda.entries) {
|
|
313
|
-
this.db.insertNode({
|
|
314
|
-
id: entry.id,
|
|
315
|
-
type: "directive",
|
|
316
|
-
label: entry.title,
|
|
317
|
-
content: entry.definition,
|
|
318
|
-
domain: "persona",
|
|
319
|
-
layer: "directive",
|
|
320
|
-
meta: {
|
|
321
|
-
section: entry.section,
|
|
322
|
-
tags: entry.explicit_tags,
|
|
323
|
-
},
|
|
324
|
-
} as Node);
|
|
325
|
-
directiveCount++;
|
|
326
|
-
|
|
327
|
-
for (const rel of entry.validated_relationships) {
|
|
328
|
-
const check = LouvainGate.check(
|
|
329
|
-
this.db.getRawDb(),
|
|
330
|
-
entry.id,
|
|
331
|
-
rel.target,
|
|
332
|
-
);
|
|
333
|
-
if (check.allowed) {
|
|
334
|
-
this.db.insertEdge(entry.id, rel.target, rel.type);
|
|
335
|
-
} else {
|
|
336
|
-
// console.log(`[LouvainGate] ${check.reason}`);
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
this.log.info({ count: directiveCount }, "📋 Ingested CDA");
|
|
341
|
-
} else {
|
|
342
|
-
this.log.warn("⚠️ No enriched CDA found.");
|
|
343
|
-
}
|
|
344
|
-
} catch (e) {
|
|
345
|
-
this.log.warn({ err: e }, "⚠️ CDA ingestion failed");
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
private getFilesToProcess(
|
|
350
|
-
options: IngestorOptions,
|
|
351
|
-
): { path: string; type: string }[] {
|
|
352
|
-
const files: { path: string; type: string }[] = [];
|
|
353
|
-
|
|
354
|
-
// 1. Specific single file
|
|
355
|
-
if (options.file) {
|
|
356
|
-
files.push({ path: String(options.file), type: "document" });
|
|
357
|
-
}
|
|
358
|
-
// 2. Specific list of files (Batch)
|
|
359
|
-
else if (options.files && options.files.length > 0) {
|
|
360
|
-
for (const f of options.files) {
|
|
361
|
-
files.push({ path: String(f), type: "document" });
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
// 3. Full Directory Scan
|
|
365
|
-
else {
|
|
366
|
-
const sources = options.dir
|
|
367
|
-
? [{ path: String(options.dir), name: "Document" }]
|
|
368
|
-
: settings.paths.sources.experience;
|
|
369
|
-
|
|
370
|
-
for (const source of sources) {
|
|
371
|
-
const glob = new Glob("**/*.md");
|
|
372
|
-
for (const file of glob.scanSync(source.path)) {
|
|
373
|
-
files.push({
|
|
374
|
-
path: join(process.cwd(), source.path, file),
|
|
375
|
-
type: source.name.toLowerCase(), // Use name from settings as generic type base
|
|
376
|
-
});
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
|
-
return files;
|
|
381
|
-
}
|
|
382
|
-
|
|
383
|
-
private async runWeavers() {
|
|
384
|
-
try {
|
|
385
|
-
const { TimelineWeaver } = await import("@src/core/TimelineWeaver");
|
|
386
|
-
TimelineWeaver.weave(this.db);
|
|
387
|
-
} catch (e) {
|
|
388
|
-
this.log.warn({ err: e }, "⚠️ Timeline Weaver failed");
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
try {
|
|
392
|
-
const { SemanticWeaver } = await import("@src/core/SemanticWeaver");
|
|
393
|
-
SemanticWeaver.weave(this.db);
|
|
394
|
-
} catch (e) {
|
|
395
|
-
this.log.warn({ err: e }, "⚠️ Semantic Weaver failed");
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
private logStats(
|
|
400
|
-
count: number,
|
|
401
|
-
chars: number,
|
|
402
|
-
duration: number,
|
|
403
|
-
throughput: number,
|
|
404
|
-
stats: IngestionStats,
|
|
405
|
-
) {
|
|
406
|
-
this.log.info(
|
|
407
|
-
{
|
|
408
|
-
processed: {
|
|
409
|
-
files: count,
|
|
410
|
-
chars: chars,
|
|
411
|
-
sizeKB: (chars / 1024).toFixed(2),
|
|
412
|
-
durationSec: duration.toFixed(2),
|
|
413
|
-
throughput: throughput.toFixed(2),
|
|
414
|
-
},
|
|
415
|
-
db: {
|
|
416
|
-
nodes: stats.nodes,
|
|
417
|
-
vectors: stats.vectors,
|
|
418
|
-
edges: stats.edges,
|
|
419
|
-
semantic_tokens: stats.semantic_tokens,
|
|
420
|
-
sizeMB: (stats.db_size_bytes / 1024 / 1024).toFixed(2),
|
|
421
|
-
},
|
|
422
|
-
},
|
|
423
|
-
"🏁 Ingestion Complete",
|
|
424
|
-
);
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
// --- Processing Logic ---
|
|
428
|
-
|
|
429
|
-
private async processFile(
|
|
430
|
-
fileEntry: { path: string; type: string },
|
|
431
|
-
db: ResonanceDB,
|
|
432
|
-
embedder: Embedder,
|
|
433
|
-
weaver: EdgeWeaver,
|
|
434
|
-
tokenizer: TokenizerService,
|
|
435
|
-
): Promise<number> {
|
|
436
|
-
const filePath = fileEntry.path;
|
|
437
|
-
const type = fileEntry.type;
|
|
438
|
-
const content = await Bun.file(filePath).text();
|
|
439
|
-
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
|
440
|
-
const frontmatter = fmMatch?.[1] ? this.parseFrontmatter(fmMatch[1]) : {};
|
|
441
|
-
let totalBoxChars = 0;
|
|
442
|
-
|
|
443
|
-
const boxRegex =
|
|
444
|
-
/<!-- locus:([a-zA-Z0-9-]+) -->\n([\s\S]*?)(?=<!-- locus:|$)/g;
|
|
445
|
-
let match: RegExpExecArray | null;
|
|
446
|
-
let foundBoxes = false;
|
|
447
|
-
|
|
448
|
-
while (true) {
|
|
449
|
-
match = boxRegex.exec(content);
|
|
450
|
-
if (!match) break;
|
|
451
|
-
if (!match[1] || !match[2]) continue;
|
|
452
|
-
|
|
453
|
-
foundBoxes = true;
|
|
454
|
-
const locusId = match[1];
|
|
455
|
-
const boxContent = match[2].trim();
|
|
456
|
-
|
|
457
|
-
await this.processBox(
|
|
458
|
-
locusId,
|
|
459
|
-
boxContent,
|
|
460
|
-
type,
|
|
461
|
-
frontmatter,
|
|
462
|
-
filePath,
|
|
463
|
-
db,
|
|
464
|
-
embedder,
|
|
465
|
-
weaver,
|
|
466
|
-
tokenizer,
|
|
467
|
-
);
|
|
468
|
-
totalBoxChars += boxContent.length;
|
|
469
|
-
}
|
|
470
|
-
|
|
471
|
-
if (!foundBoxes) {
|
|
472
|
-
const filename = filePath.split("/").pop() || "unknown";
|
|
473
|
-
const id = filename
|
|
474
|
-
.replace(".md", "")
|
|
475
|
-
.toLowerCase()
|
|
476
|
-
.replace(/[^a-z0-9-]/g, "-");
|
|
477
|
-
|
|
478
|
-
if (content.length > 10) {
|
|
479
|
-
await this.processBox(
|
|
480
|
-
id,
|
|
481
|
-
content,
|
|
482
|
-
type,
|
|
483
|
-
frontmatter,
|
|
484
|
-
filePath,
|
|
485
|
-
db,
|
|
486
|
-
embedder,
|
|
487
|
-
weaver,
|
|
488
|
-
tokenizer,
|
|
489
|
-
);
|
|
490
|
-
return content.length;
|
|
491
|
-
}
|
|
492
|
-
return 0;
|
|
493
|
-
}
|
|
494
|
-
return totalBoxChars;
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
private async processBox(
|
|
498
|
-
id: string,
|
|
499
|
-
content: string,
|
|
500
|
-
type: string,
|
|
501
|
-
meta: Record<string, unknown>,
|
|
502
|
-
sourcePath: string,
|
|
503
|
-
db: ResonanceDB,
|
|
504
|
-
embedder: Embedder,
|
|
505
|
-
weaver: EdgeWeaver,
|
|
506
|
-
tokenizer: TokenizerService,
|
|
507
|
-
) {
|
|
508
|
-
const tokens = tokenizer.extract(content);
|
|
509
|
-
// MD5 hash for content change detection
|
|
510
|
-
const hasher = new Bun.CryptoHasher("md5");
|
|
511
|
-
hasher.update(content.trim());
|
|
512
|
-
const currentHash = hasher.digest("hex");
|
|
513
|
-
const storedHash = db.getNodeHash(id);
|
|
514
|
-
|
|
515
|
-
if (storedHash === currentHash) return;
|
|
516
|
-
|
|
517
|
-
this.log.debug(
|
|
518
|
-
{ id, length: content.length },
|
|
519
|
-
"⚡️ Ingesting changed content",
|
|
520
|
-
);
|
|
521
|
-
|
|
522
|
-
// Removed hardcoded narrative allowlist.
|
|
523
|
-
// Logic: If it's mounted, we process & embed it.
|
|
524
|
-
let embedding: Float32Array | undefined;
|
|
525
|
-
if (content.length > 50) {
|
|
526
|
-
embedding = (await embedder.embed(content)) || undefined;
|
|
527
|
-
}
|
|
528
|
-
|
|
529
|
-
const node = {
|
|
530
|
-
id: id,
|
|
531
|
-
type: type,
|
|
532
|
-
label: (meta.title as string) || sourcePath.split("/").pop(),
|
|
533
|
-
content: content,
|
|
534
|
-
domain: "experience",
|
|
535
|
-
layer: "note",
|
|
536
|
-
embedding: embedding,
|
|
537
|
-
hash: currentHash,
|
|
538
|
-
meta: { ...meta, source: sourcePath, semantic_tokens: tokens },
|
|
539
|
-
};
|
|
540
|
-
|
|
541
|
-
db.insertNode(node);
|
|
542
|
-
weaver.weave(id, content);
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
private parseFrontmatter(text: string): Record<string, unknown> {
|
|
546
|
-
const meta: Record<string, unknown> = {};
|
|
547
|
-
text.split("\n").forEach((line) => {
|
|
548
|
-
const [key, ...vals] = line.split(":");
|
|
549
|
-
if (key && vals.length) {
|
|
550
|
-
meta[key.trim()] = vals.join(":").trim();
|
|
551
|
-
}
|
|
552
|
-
});
|
|
553
|
-
return meta;
|
|
554
|
-
}
|
|
555
|
-
}
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
import { parseArgs } from "node:util";
|
|
2
|
-
import { Ingestor } from "@src/pipeline/Ingestor";
|
|
3
|
-
import { ZombieDefense } from "@src/utils/ZombieDefense";
|
|
4
|
-
|
|
5
|
-
// Parse CLI
|
|
6
|
-
const { values } = parseArgs({
|
|
7
|
-
args: Bun.argv,
|
|
8
|
-
options: {
|
|
9
|
-
file: {
|
|
10
|
-
type: "string",
|
|
11
|
-
},
|
|
12
|
-
dir: {
|
|
13
|
-
type: "string",
|
|
14
|
-
},
|
|
15
|
-
db: {
|
|
16
|
-
type: "string",
|
|
17
|
-
},
|
|
18
|
-
},
|
|
19
|
-
strict: true,
|
|
20
|
-
allowPositionals: true,
|
|
21
|
-
});
|
|
22
|
-
|
|
23
|
-
const ingestor = new Ingestor(values.db);
|
|
24
|
-
|
|
25
|
-
// Zombie Defense: Ensure clean slate before heavy I/O
|
|
26
|
-
await ZombieDefense.assertClean("Ingest", true);
|
|
27
|
-
|
|
28
|
-
console.log("🚀 Starting Ingestion...");
|
|
29
|
-
const result = await ingestor.run({
|
|
30
|
-
file: values.file,
|
|
31
|
-
dir: values.dir,
|
|
32
|
-
dbPath: values.db,
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
if (!result) {
|
|
36
|
-
console.error("❌ Ingestion Failed");
|
|
37
|
-
process.exit(1);
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
console.log("✅ Ingestion Success");
|
|
41
|
-
process.exit(0);
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import { join } from "node:path";
|
|
2
|
-
import settings from "@/polyvis.settings.json";
|
|
3
|
-
import { ResonanceDB } from "../db";
|
|
4
|
-
|
|
5
|
-
// Migration / Seeding Script for Resonance DB
|
|
6
|
-
const dbPath = join(process.cwd(), settings.paths.database.resonance);
|
|
7
|
-
console.log(`Migrating/Seeding Database: ${dbPath}`);
|
|
8
|
-
|
|
9
|
-
// 1. Initialize DB (Triggers internal Schema Migrations)
|
|
10
|
-
const db = new ResonanceDB(dbPath);
|
|
11
|
-
|
|
12
|
-
console.log("✅ Schema Initialization Complete (via ResonanceDB).");
|
|
13
|
-
|
|
14
|
-
// 2. Data Patching (Legacy Support)
|
|
15
|
-
// Moving logic that fixes old domain/layer conventions
|
|
16
|
-
console.log("Patching Data Defaults...");
|
|
17
|
-
try {
|
|
18
|
-
db.getRawDb().exec(
|
|
19
|
-
"UPDATE nodes SET domain = 'persona', layer = 'ontology' WHERE domain IS 'knowledge' AND (id LIKE 'term-%' OR id LIKE 'OH-%' OR id LIKE 'CIP-%' OR id LIKE 'COG-%')",
|
|
20
|
-
);
|
|
21
|
-
} catch (_e) {
|
|
22
|
-
console.warn("Patching skipped (Tables might be empty).");
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
// 3. Genesis Node Injection
|
|
26
|
-
console.log("Injecting Genesis Node...");
|
|
27
|
-
const genesis = {
|
|
28
|
-
id: "000-GENESIS",
|
|
29
|
-
title: "PolyVis Prime",
|
|
30
|
-
type: "root",
|
|
31
|
-
content: "The singular origin point of the PolyVis context.",
|
|
32
|
-
domain: "system",
|
|
33
|
-
layer: "ontology",
|
|
34
|
-
meta: {
|
|
35
|
-
order_index: -1,
|
|
36
|
-
},
|
|
37
|
-
};
|
|
38
|
-
|
|
39
|
-
try {
|
|
40
|
-
// biome-ignore lint/suspicious/noExplicitAny: genesis is a partial node matching the schema
|
|
41
|
-
db.insertNode(genesis as any);
|
|
42
|
-
} catch (e: unknown) {
|
|
43
|
-
const err = e as { message: string };
|
|
44
|
-
console.warn("Genesis injection failed:", err.message);
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
// 4. Connect Genesis to Heads
|
|
48
|
-
const heads = ["term-001", "CIP-1", "OH-061"];
|
|
49
|
-
heads.forEach((head) => {
|
|
50
|
-
db.insertEdge(genesis.id, head, "genesis");
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
console.log("✅ Seeding Complete.");
|
|
54
|
-
db.close();
|
package/src/resonance/config.ts
DELETED
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
-
import { join } from "node:path";
|
|
3
|
-
import { z } from "zod";
|
|
4
|
-
|
|
5
|
-
export const ResonanceConfigSchema = z.object({
|
|
6
|
-
paths: z.object({
|
|
7
|
-
database: z.object({
|
|
8
|
-
resonance: z.string().default("public/resonance.db"),
|
|
9
|
-
}),
|
|
10
|
-
sources: z.object({
|
|
11
|
-
experience: z.object({
|
|
12
|
-
directories: z
|
|
13
|
-
.array(z.string())
|
|
14
|
-
.default(["debriefs", "playbooks", "briefs"]),
|
|
15
|
-
}),
|
|
16
|
-
persona: z.object({
|
|
17
|
-
lexicon: z
|
|
18
|
-
.string()
|
|
19
|
-
.default("scripts/fixtures/conceptual-lexicon-ref-v1.79.json"),
|
|
20
|
-
cda: z.string().default("scripts/fixtures/cda-ref-v63.json"),
|
|
21
|
-
}),
|
|
22
|
-
}),
|
|
23
|
-
}),
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
export type ResonanceConfig = z.infer<typeof ResonanceConfigSchema>;
|
|
27
|
-
|
|
28
|
-
export function loadConfig(): ResonanceConfig {
|
|
29
|
-
const configPath = join(process.cwd(), "polyvis.settings.json");
|
|
30
|
-
if (!existsSync(configPath)) {
|
|
31
|
-
throw new Error("polyvis.settings.json not found");
|
|
32
|
-
}
|
|
33
|
-
try {
|
|
34
|
-
const data = JSON.parse(readFileSync(configPath, "utf-8"));
|
|
35
|
-
return ResonanceConfigSchema.parse(data);
|
|
36
|
-
} catch (e) {
|
|
37
|
-
console.error("❌ Invalid configuration file:", e);
|
|
38
|
-
throw e;
|
|
39
|
-
}
|
|
40
|
-
}
|