@ijfw/memory-server 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ijfw +27 -0
- package/bin/ijfw-dashboard +180 -0
- package/bin/ijfw-dispatch-plan +41 -0
- package/bin/ijfw-memorize +273 -0
- package/bin/ijfw-memory +51 -0
- package/fixtures/demo-target.js +28 -0
- package/package.json +53 -0
- package/src/api-client.js +190 -0
- package/src/audit-roster.js +315 -0
- package/src/caps.js +37 -0
- package/src/cold-scan-runner.mjs +37 -0
- package/src/compute/edges.js +155 -0
- package/src/compute/extract.js +560 -0
- package/src/compute/fts5.js +420 -0
- package/src/compute/graph-auto-index.js +191 -0
- package/src/compute/graph-lock.js +114 -0
- package/src/compute/index.js +18 -0
- package/src/compute/migration-runner.js +116 -0
- package/src/compute/migrations/001-initial.js +23 -0
- package/src/compute/migrations/002-porter-stemming-source.js +139 -0
- package/src/compute/migrations/003-tier-semantic.js +69 -0
- package/src/compute/migrations/004-kg-tables.js +83 -0
- package/src/compute/migrations/005-stale-candidate.js +72 -0
- package/src/compute/python-resolver.js +106 -0
- package/src/compute/runner-vm.js +185 -0
- package/src/compute/runner.js +416 -0
- package/src/compute/sandbox-detect.js +122 -0
- package/src/compute/sandbox-linux.js +164 -0
- package/src/compute/sandbox-macos.js +167 -0
- package/src/compute/sandbox-windows.js +63 -0
- package/src/compute/schema.sql +118 -0
- package/src/compute/staleness.js +239 -0
- package/src/compute/synonyms.js +367 -0
- package/src/compute/traverse.js +180 -0
- package/src/cost/aggregator.js +229 -0
- package/src/cost/pricing.js +134 -0
- package/src/cost/readers/claude.js +179 -0
- package/src/cost/readers/codex.js +131 -0
- package/src/cost/readers/gemini.js +111 -0
- package/src/cost/savings.js +243 -0
- package/src/cross-dispatcher.js +437 -0
- package/src/cross-orchestrator-cli.js +1885 -0
- package/src/cross-orchestrator.js +598 -0
- package/src/cross-project-search.js +114 -0
- package/src/dashboard-client.html +1180 -0
- package/src/dashboard-server.js +895 -0
- package/src/design-companion.js +81 -0
- package/src/dispatch/colon-syntax.js +732 -0
- package/src/dispatch-planner.js +235 -0
- package/src/dream/cooldown.js +105 -0
- package/src/dream/runner.mjs +373 -0
- package/src/dream/staleness-wiring.js +195 -0
- package/src/feedback-detector.js +57 -0
- package/src/hero-line.js +115 -0
- package/src/importers/claude-mem.js +152 -0
- package/src/importers/cli.js +311 -0
- package/src/importers/common.js +84 -0
- package/src/importers/discover.js +235 -0
- package/src/importers/rtk.js +107 -0
- package/src/intent-router.js +221 -0
- package/src/lib/atomic-io.js +201 -0
- package/src/lib/cache.js +33 -0
- package/src/lib/npm-view.js +104 -0
- package/src/lib/status-card.js +95 -0
- package/src/lib/token.js +85 -0
- package/src/memory/fts5.js +349 -0
- package/src/memory/migration-runner.js +116 -0
- package/src/memory/migrations/001-fts5-init.js +26 -0
- package/src/memory/migrations/002-tier-semantic.js +60 -0
- package/src/memory/migrations/003-stale-candidate.js +60 -0
- package/src/memory/reader.js +300 -0
- package/src/memory/recall-counter.js +76 -0
- package/src/memory/schema.sql +79 -0
- package/src/memory/search.js +431 -0
- package/src/memory/staleness.js +237 -0
- package/src/memory/tier-promotion.js +377 -0
- package/src/memory/tokenize.js +63 -0
- package/src/project-type-detector.js +866 -0
- package/src/prompt-check.js +171 -0
- package/src/ralph-allowlist.js +88 -0
- package/src/receipts.js +129 -0
- package/src/redactor.js +107 -0
- package/src/sandbox.js +275 -0
- package/src/sanitizer.js +69 -0
- package/src/scan-resume.js +167 -0
- package/src/schema.js +82 -0
- package/src/search-bm25.js +108 -0
- package/src/server.js +1414 -0
- package/src/swarm-config.js +80 -0
- package/src/trident/dispatch.js +211 -0
- package/src/trident/lens-health.js +253 -0
- package/src/update-apply.js +79 -0
- package/src/update-check.js +136 -0
- package/src/vectors.js +178 -0
- package/templates/design/bento-grid.md +84 -0
- package/templates/design/brutalist-luxe.md +82 -0
- package/templates/design/cinematic-dark.md +82 -0
- package/templates/design/data-dense-dashboard.md +88 -0
- package/templates/design/editorial-warm.md +81 -0
- package/templates/design/glassmorphic.md +84 -0
- package/templates/design/magazine-editorial.md +84 -0
- package/templates/design/maximalist-vibrant.md +85 -0
- package/templates/design/neo-swiss-tech.md +85 -0
- package/templates/design/swiss-minimal.md +80 -0
- package/templates/design/terminal-native.md +83 -0
- package/templates/design/warm-organic.md +84 -0
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
// IJFW v1.3.0 Alpha -- C9 Compute Lever / FTS5 layer.
|
|
2
|
+
//
|
|
3
|
+
// Per-project SQLite db at <projectRoot>/.ijfw/index/compute.db with FTS5
|
|
4
|
+
// virtual tables over the `raw` and `compiled` content tables. Schema is
|
|
5
|
+
// owned by ./schema.sql (V3-B4) and applied via the migration runner.
|
|
6
|
+
//
|
|
7
|
+
// Driver: better-sqlite3 (synchronous N-API binding, prebuild-binaries
|
|
8
|
+
// fallback). The mcp-server engines.node floor is >=18 which rules out the
|
|
9
|
+
// experimental built-in node:sqlite. A thin loader below tolerates
|
|
10
|
+
// node:sqlite ONLY when better-sqlite3 is unavailable AND Node >= 22 AND
|
|
11
|
+
// the explicit IJFW_COMPUTE_DRIVER=node-sqlite env opt-in is set -- this
|
|
12
|
+
// is a development-time escape hatch for local verification, NOT a
|
|
13
|
+
// shipping path.
|
|
14
|
+
//
|
|
15
|
+
// Integrity discipline:
|
|
16
|
+
// - openDb() -- enforces schema version; refuses downgrade.
|
|
17
|
+
// - safeWrite() -- runs `redactSecrets()` over `body` and `topic` BEFORE
|
|
18
|
+
// inserting (D-PILLAR-SPEC §12 ingest scrub gate), then
|
|
19
|
+
// inserts inside a transaction and runs PRAGMA quick_check
|
|
20
|
+
// after each insert; throws IntegrityError on anything
|
|
21
|
+
// other than 'ok'. The scrub default is on; setting
|
|
22
|
+
// IJFW_INGEST_SCRUB=0 disables it for local debugging
|
|
23
|
+
// only -- never a production posture.
|
|
24
|
+
// - search() -- FTS5 MATCH; top-k rows from the content table ordered
|
|
25
|
+
// by bm25 rank.
|
|
26
|
+
// - closeDb() -- clean close; suppresses double-close errors.
|
|
27
|
+
//
|
|
28
|
+
// Security model (D-PILLAR-SPEC §12, real fix-wave C3):
|
|
29
|
+
// Secrets are scrubbed at the observation-ingest boundary. By the time a
|
|
30
|
+
// row reaches the FTS index, the entity extractor, or the kg layer, all
|
|
31
|
+
// `redactSecrets`-recognised tokens have been replaced with
|
|
32
|
+
// `[REDACTED:<kind>]` placeholders. The scrub is the security gate; the
|
|
33
|
+
// `kg_nodes.redacted=1` flag downstream is a residual-safety belt for
|
|
34
|
+
// the rare case where an entity-regex match's value happens to look
|
|
35
|
+
// secret-shaped (e.g. a function name `validateSk_live_xxx`).
|
|
36
|
+
|
|
37
|
+
import { existsSync, mkdirSync } from 'fs';
|
|
38
|
+
import { join, resolve, normalize, isAbsolute, dirname } from 'path';
|
|
39
|
+
import { runMigrations, highestKnownVersion, SchemaVersionError } from './migration-runner.js';
|
|
40
|
+
import { autoIndexGraphFromBody } from './graph-auto-index.js';
|
|
41
|
+
import { redactSecrets } from '../redactor.js';
|
|
42
|
+
|
|
43
|
+
// D-PILLAR-SPEC §12 ingest scrub gate. Default-on; the only escape hatch
|
|
44
|
+
// is the IJFW_INGEST_SCRUB=0 env var, which exists for local debugging
|
|
45
|
+
// (e.g. asserting raw body shape in a fixture) and is NOT a shipping
|
|
46
|
+
// posture. Read on every safeWrite call so test harnesses can flip it
|
|
47
|
+
// without re-importing the module.
|
|
48
|
+
function ingestScrubEnabled() {
|
|
49
|
+
return process.env.IJFW_INGEST_SCRUB !== '0';
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export { SchemaVersionError };
|
|
53
|
+
|
|
54
|
+
export class IntegrityError extends Error {
|
|
55
|
+
constructor(message) {
|
|
56
|
+
super(message);
|
|
57
|
+
this.name = 'IntegrityError';
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export class ComputeDbError extends Error {
|
|
62
|
+
constructor(message) {
|
|
63
|
+
super(message);
|
|
64
|
+
this.name = 'ComputeDbError';
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const DB_FILENAME = 'compute.db';
|
|
69
|
+
const INDEX_DIR_NAME = 'index';
|
|
70
|
+
const IJFW_DIR_NAME = '.ijfw';
|
|
71
|
+
|
|
72
|
+
// Tables that have a parallel <table>_fts FTS5 virtual table. search()
|
|
73
|
+
// rejects requests against any other content table.
|
|
74
|
+
const FTS_TABLES = new Set(['raw', 'compiled']);
|
|
75
|
+
|
|
76
|
+
const ALLOWED_WRITE_TABLES = new Set(['raw', 'compiled', 'trident_run', 'schema_meta']);
|
|
77
|
+
|
|
78
|
+
// Internal helper -- runs a multi-statement SQL string against the db
|
|
79
|
+
// handle. Both better-sqlite3 and node:sqlite expose .exec(sql) with the
|
|
80
|
+
// same shape; we route through this wrapper to keep call sites uniform.
|
|
81
|
+
function runSql(db, sql) {
|
|
82
|
+
return db.exec(sql);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// --- Driver loader -----------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
// Returns { kind, open } where open(filename) yields a synchronous handle
|
|
88
|
+
// exposing .exec(sql), .prepare(sql).{run,get,all}(...), .close(), and a
|
|
89
|
+
// .txn(fn) helper that returns a callable wrapping fn in BEGIN/COMMIT/
|
|
90
|
+
// ROLLBACK semantics. better-sqlite3 already has .transaction() with
|
|
91
|
+
// matching semantics; node:sqlite gets a shim.
|
|
92
|
+
async function loadDriver() {
|
|
93
|
+
// Preferred: better-sqlite3 (the shipping path).
|
|
94
|
+
try {
|
|
95
|
+
const mod = await import('better-sqlite3');
|
|
96
|
+
const Database = mod.default || mod;
|
|
97
|
+
return {
|
|
98
|
+
kind: 'better-sqlite3',
|
|
99
|
+
open(filename) {
|
|
100
|
+
const db = new Database(filename);
|
|
101
|
+
// H2: write transactions use BEGIN IMMEDIATE so we acquire the
|
|
102
|
+
// RESERVED lock at txn start (not at first INSERT). This converts
|
|
103
|
+
// racing writers from a SQLITE_BUSY explosion into a clean queue
|
|
104
|
+
// (paired with PRAGMA busy_timeout=5000 in openDb).
|
|
105
|
+
db.txn = (fn) => {
|
|
106
|
+
// better-sqlite3 >= 7 supports `.immediate` on the returned txn fn.
|
|
107
|
+
const wrapped = db.transaction(fn);
|
|
108
|
+
if (wrapped && typeof wrapped.immediate === 'function') {
|
|
109
|
+
return wrapped.immediate;
|
|
110
|
+
}
|
|
111
|
+
// Fallback: wrap with explicit BEGIN IMMEDIATE / COMMIT / ROLLBACK.
|
|
112
|
+
return (...args) => {
|
|
113
|
+
runSql(db, 'BEGIN IMMEDIATE');
|
|
114
|
+
try {
|
|
115
|
+
const r = fn(...args);
|
|
116
|
+
runSql(db, 'COMMIT');
|
|
117
|
+
return r;
|
|
118
|
+
} catch (err) {
|
|
119
|
+
try { runSql(db, 'ROLLBACK'); } catch { /* ignore */ }
|
|
120
|
+
throw err;
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
};
|
|
124
|
+
return db;
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
} catch { /* fall through to node:sqlite escape hatch */ }
|
|
128
|
+
|
|
129
|
+
// Escape hatch: node:sqlite (Node >= 22). NOT a shipping path. Documented
|
|
130
|
+
// and gated by env to avoid accidentally substituting it for production.
|
|
131
|
+
if (process.env.IJFW_COMPUTE_DRIVER === 'node-sqlite') {
|
|
132
|
+
const major = parseInt(String(process.versions.node).split('.')[0], 10);
|
|
133
|
+
if (major < 22) {
|
|
134
|
+
throw new ComputeDbError(
|
|
135
|
+
'IJFW_COMPUTE_DRIVER=node-sqlite requested but Node < 22 does not provide node:sqlite.'
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
const { DatabaseSync } = await import('node:sqlite');
|
|
139
|
+
return {
|
|
140
|
+
kind: 'node-sqlite',
|
|
141
|
+
open(filename) {
|
|
142
|
+
const db = new DatabaseSync(filename);
|
|
143
|
+
// Shim a better-sqlite3-style transaction helper. node:sqlite has
|
|
144
|
+
// no equivalent; we use BEGIN/COMMIT/ROLLBACK around the callback.
|
|
145
|
+
db.txn = (fn) => {
|
|
146
|
+
return (...args) => {
|
|
147
|
+
// H2: BEGIN IMMEDIATE matches the better-sqlite3 path -- writers
|
|
148
|
+
// acquire RESERVED lock immediately rather than upgrading mid-txn.
|
|
149
|
+
runSql(db, 'BEGIN IMMEDIATE');
|
|
150
|
+
try {
|
|
151
|
+
const r = fn(...args);
|
|
152
|
+
runSql(db, 'COMMIT');
|
|
153
|
+
return r;
|
|
154
|
+
} catch (err) {
|
|
155
|
+
try { runSql(db, 'ROLLBACK'); } catch { /* ignore */ }
|
|
156
|
+
throw err;
|
|
157
|
+
}
|
|
158
|
+
};
|
|
159
|
+
};
|
|
160
|
+
return db;
|
|
161
|
+
},
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
throw new ComputeDbError(
|
|
166
|
+
'Compute db driver unavailable: install better-sqlite3 (npm i better-sqlite3) ' +
|
|
167
|
+
'or set IJFW_COMPUTE_DRIVER=node-sqlite on Node 22+ for local development only.'
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// --- Path resolution ---------------------------------------------------------
|
|
172
|
+
|
|
173
|
+
function resolveProjectRoot(projectRoot) {
|
|
174
|
+
const raw = projectRoot || process.env.IJFW_PROJECT_DIR || process.cwd();
|
|
175
|
+
if (typeof raw !== 'string' || !raw) {
|
|
176
|
+
throw new ComputeDbError('Project root must be a non-empty string.');
|
|
177
|
+
}
|
|
178
|
+
const abs = resolve(raw);
|
|
179
|
+
const norm = normalize(abs);
|
|
180
|
+
if (!isAbsolute(norm)) {
|
|
181
|
+
throw new ComputeDbError(`Project root resolves to non-absolute path: ${raw}`);
|
|
182
|
+
}
|
|
183
|
+
return norm;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
export function dbPathFor(projectRoot) {
|
|
187
|
+
const root = resolveProjectRoot(projectRoot);
|
|
188
|
+
return join(root, IJFW_DIR_NAME, INDEX_DIR_NAME, DB_FILENAME);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function ensureDbDir(filename) {
|
|
192
|
+
const dir = dirname(filename);
|
|
193
|
+
if (dir && !existsSync(dir)) {
|
|
194
|
+
mkdirSync(dir, { recursive: true });
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// --- Public API --------------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
// Open or create the per-project compute db. On a fresh file, runs every
|
|
201
|
+
// migration up to the highest known version. On an existing file, refuses
|
|
202
|
+
// to operate if user_version is higher than this build supports.
|
|
203
|
+
export async function openDb(projectRoot) {
|
|
204
|
+
const filename = dbPathFor(projectRoot);
|
|
205
|
+
ensureDbDir(filename);
|
|
206
|
+
|
|
207
|
+
const driver = await loadDriver();
|
|
208
|
+
const db = driver.open(filename);
|
|
209
|
+
db.__ijfw_driver = driver.kind;
|
|
210
|
+
db.__ijfw_filename = filename;
|
|
211
|
+
|
|
212
|
+
// Useful pragmas: WAL for concurrent readers; foreign_keys off (we don't
|
|
213
|
+
// use FKs and FTS5 contentless tables don't tolerate them anyway).
|
|
214
|
+
try { runSql(db, 'PRAGMA journal_mode = WAL'); } catch { /* fall back to default */ }
|
|
215
|
+
try { runSql(db, 'PRAGMA synchronous = NORMAL'); } catch { /* default fine */ }
|
|
216
|
+
// H2: concurrent-write protection. busy_timeout makes the driver retry on
|
|
217
|
+
// SQLITE_BUSY for up to 5s before throwing -- prevents racing writers from
|
|
218
|
+
// collapsing into a hard error when WAL contention spikes briefly.
|
|
219
|
+
try { runSql(db, 'PRAGMA busy_timeout = 5000'); } catch { /* nothing */ }
|
|
220
|
+
|
|
221
|
+
const target = await highestKnownVersion();
|
|
222
|
+
const current = readUserVersion(db);
|
|
223
|
+
|
|
224
|
+
if (current > target) {
|
|
225
|
+
db.close();
|
|
226
|
+
throw new SchemaVersionError(
|
|
227
|
+
`Compute db schema version ${current} at ${filename} is newer than this build supports (max ${target}). ` +
|
|
228
|
+
`Refusing to downgrade.`
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
if (current < target) {
|
|
232
|
+
await runMigrations(db, current, target);
|
|
233
|
+
}
|
|
234
|
+
return db;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function readUserVersion(db) {
|
|
238
|
+
// Both better-sqlite3 and node:sqlite expose .prepare(...).get() with the
|
|
239
|
+
// same shape for PRAGMA reads.
|
|
240
|
+
const row = db.prepare('PRAGMA user_version').get();
|
|
241
|
+
if (!row) return 0;
|
|
242
|
+
return Number(row.user_version ?? row.USER_VERSION ?? 0);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Insert one row into a content table inside a transaction, then run
|
|
246
|
+
// PRAGMA quick_check on the whole db. Throws IntegrityError on anything
|
|
247
|
+
// other than 'ok'. Returns { id } of the inserted row.
|
|
248
|
+
//
|
|
249
|
+
// Allowed tables: raw, compiled, trident_run, schema_meta. Caller passes a
|
|
250
|
+
// row object whose keys match the table's columns; binding is positional
|
|
251
|
+
// via INSERT (col, ...) VALUES (?, ?, ...).
|
|
252
|
+
export function safeWrite(db, table, row) {
|
|
253
|
+
if (!db || typeof db.prepare !== 'function') {
|
|
254
|
+
throw new ComputeDbError('safeWrite: db handle is invalid.');
|
|
255
|
+
}
|
|
256
|
+
if (!ALLOWED_WRITE_TABLES.has(table)) {
|
|
257
|
+
throw new ComputeDbError(`safeWrite: refusing to write to table "${table}".`);
|
|
258
|
+
}
|
|
259
|
+
if (!row || typeof row !== 'object' || Array.isArray(row)) {
|
|
260
|
+
throw new ComputeDbError('safeWrite: row must be a plain object.');
|
|
261
|
+
}
|
|
262
|
+
const cols = Object.keys(row);
|
|
263
|
+
if (cols.length === 0) {
|
|
264
|
+
throw new ComputeDbError('safeWrite: row has no columns.');
|
|
265
|
+
}
|
|
266
|
+
// Validate column names match SQL identifier shape -- defence-in-depth
|
|
267
|
+
// against caller passing user-controlled keys.
|
|
268
|
+
for (const c of cols) {
|
|
269
|
+
if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(c)) {
|
|
270
|
+
throw new ComputeDbError(`safeWrite: invalid column name "${c}".`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// D-PILLAR-SPEC §12 ingest scrub gate. Replace `body` and `topic` with
|
|
275
|
+
// their redacted forms BEFORE the INSERT runs, so the FTS index, the
|
|
276
|
+
// entity extractor (D2), and any downstream reader only ever see the
|
|
277
|
+
// scrubbed text. This applies to every `safeWrite` regardless of table
|
|
278
|
+
// (body/topic are user-supplied content surfaces wherever they appear).
|
|
279
|
+
// Trident audit metadata (trident_run.summary) and schema_meta rows
|
|
280
|
+
// don't carry body/topic columns so they're untouched.
|
|
281
|
+
if (ingestScrubEnabled() && (table === 'raw' || table === 'compiled')) {
|
|
282
|
+
if (typeof row.body === 'string' && row.body.length > 0) {
|
|
283
|
+
row = { ...row, body: redactSecrets(row.body) };
|
|
284
|
+
}
|
|
285
|
+
if (typeof row.topic === 'string' && row.topic.length > 0) {
|
|
286
|
+
row = { ...row, topic: redactSecrets(row.topic) };
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
const placeholders = cols.map(() => '?').join(', ');
|
|
291
|
+
const sql = `INSERT INTO ${table} (${cols.join(', ')}) VALUES (${placeholders})`;
|
|
292
|
+
const values = cols.map(c => row[c]);
|
|
293
|
+
|
|
294
|
+
// Run insert + quick_check inside a single transaction. Rollback on
|
|
295
|
+
// either failure so we never leave a half-written FTS index behind.
|
|
296
|
+
let inserted;
|
|
297
|
+
const tx = db.txn(() => {
|
|
298
|
+
const stmt = db.prepare(sql);
|
|
299
|
+
const info = stmt.run(...values);
|
|
300
|
+
inserted = { id: info && info.lastInsertRowid != null ? Number(info.lastInsertRowid) : null };
|
|
301
|
+
const qc = db.prepare('PRAGMA quick_check').get();
|
|
302
|
+
const status = qc && (qc.quick_check ?? qc.QUICK_CHECK);
|
|
303
|
+
if (status !== 'ok') {
|
|
304
|
+
throw new IntegrityError(
|
|
305
|
+
`PRAGMA quick_check failed after insert into ${table}: ${status || '(no result)'}.`
|
|
306
|
+
);
|
|
307
|
+
}
|
|
308
|
+
});
|
|
309
|
+
tx();
|
|
310
|
+
|
|
311
|
+
// GA-B3: D2 graph auto-population. Fires after the content tx commits
|
|
312
|
+
// so a graph-extraction failure can never roll back the observation
|
|
313
|
+
// write. Auto-index runs only on `raw` + `compiled` content tables
|
|
314
|
+
// (the observation surfaces); audit_finding / trident_run rows skip.
|
|
315
|
+
// Helper swallows all internal errors -- ingest correctness never
|
|
316
|
+
// depends on the graph layer succeeding.
|
|
317
|
+
if (table === 'raw' || table === 'compiled') {
|
|
318
|
+
try {
|
|
319
|
+
const body = typeof row.body === 'string' ? row.body : null;
|
|
320
|
+
if (body) {
|
|
321
|
+
autoIndexGraphFromBody({
|
|
322
|
+
db,
|
|
323
|
+
body,
|
|
324
|
+
sessionId: row.session_id || null,
|
|
325
|
+
ts: typeof row.ts === 'number' ? row.ts : Date.now(),
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
} catch { /* auto-index is best-effort; never fail safeWrite */ }
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return inserted;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// FTS5 search against <table>_fts. Returns top-k rows from the content
|
|
335
|
+
// table joined on rowid, ordered by relevance (FTS5 default rank: bm25).
|
|
336
|
+
//
|
|
337
|
+
// Query syntax: standard FTS5 MATCH expressions (terms, phrases, NEAR,
|
|
338
|
+
// prefix*, AND/OR/NOT). Caller is responsible for sanitising user-supplied
|
|
339
|
+
// queries (FTS5 syntax errors throw -- caught and reported here).
|
|
340
|
+
//
|
|
341
|
+
// Args:
|
|
342
|
+
// db, table, query, k -- as before
|
|
343
|
+
// opts.include_stale -- D4 retrieval guard. When false (default),
|
|
344
|
+
// rows with stale_candidate >= 1 are excluded
|
|
345
|
+
// so cascading-staleness flags actually gate
|
|
346
|
+
// retrieval. When true, all rows return
|
|
347
|
+
// (debug + grader path).
|
|
348
|
+
// Back-compat: callers passing a number for
|
|
349
|
+
// the 4th arg still get k-as-limit; opts is
|
|
350
|
+
// optional 5th arg.
|
|
351
|
+
//
|
|
352
|
+
// Tolerance: pre-D4 dbs may not have the stale_candidate column (e.g.
|
|
353
|
+
// fixture dbs created at a lower user_version). When the column is
|
|
354
|
+
// absent, the WHERE filter is silently dropped so older callers and
|
|
355
|
+
// tests don't break.
|
|
356
|
+
export function search(db, table, query, k = 10, opts = {}) {
|
|
357
|
+
if (!db || typeof db.prepare !== 'function') {
|
|
358
|
+
throw new ComputeDbError('search: db handle is invalid.');
|
|
359
|
+
}
|
|
360
|
+
if (!FTS_TABLES.has(table)) {
|
|
361
|
+
throw new ComputeDbError(`search: no FTS5 index for table "${table}".`);
|
|
362
|
+
}
|
|
363
|
+
if (typeof query !== 'string' || query.trim().length === 0) {
|
|
364
|
+
return [];
|
|
365
|
+
}
|
|
366
|
+
const limit = Math.min(Math.max(1, parseInt(k, 10) || 10), 1000);
|
|
367
|
+
const ftsTable = `${table}_fts`;
|
|
368
|
+
const includeStale = opts && opts.include_stale === true;
|
|
369
|
+
|
|
370
|
+
// D4: stale-candidate filter. Default behaviour excludes rows with
|
|
371
|
+
// stale_candidate >= 1 from search results. Override via include_stale
|
|
372
|
+
// to surface flagged rows for debugging.
|
|
373
|
+
const hasStaleColumn = tableHasColumn(db, table, 'stale_candidate');
|
|
374
|
+
const staleClause = (!includeStale && hasStaleColumn)
|
|
375
|
+
? ' AND COALESCE(t.stale_candidate, 0) = 0'
|
|
376
|
+
: '';
|
|
377
|
+
|
|
378
|
+
// Join FTS5 rowid back to the content table to recover full row data.
|
|
379
|
+
const sql = `
|
|
380
|
+
SELECT t.*, bm25(${ftsTable}) AS rank
|
|
381
|
+
FROM ${ftsTable} f
|
|
382
|
+
JOIN ${table} t ON t.id = f.rowid
|
|
383
|
+
WHERE ${ftsTable} MATCH ?${staleClause}
|
|
384
|
+
ORDER BY rank ASC
|
|
385
|
+
LIMIT ?`;
|
|
386
|
+
try {
|
|
387
|
+
return db.prepare(sql).all(query, limit);
|
|
388
|
+
} catch (err) {
|
|
389
|
+
throw new ComputeDbError(`search failed for ${table}: ${err.message}`);
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Cache table_info lookups so repeated search() calls don't re-scan the
|
|
394
|
+
// schema each invocation. WeakMap keyed on the db handle so a closed/
|
|
395
|
+
// reopened db gets a fresh cache automatically.
|
|
396
|
+
const __tableInfoCache = new WeakMap();
|
|
397
|
+
function tableHasColumn(db, table, column) {
|
|
398
|
+
let perDb = __tableInfoCache.get(db);
|
|
399
|
+
if (!perDb) {
|
|
400
|
+
perDb = new Map();
|
|
401
|
+
__tableInfoCache.set(db, perDb);
|
|
402
|
+
}
|
|
403
|
+
const key = `${table}.${column}`;
|
|
404
|
+
if (perDb.has(key)) return perDb.get(key);
|
|
405
|
+
let present = false;
|
|
406
|
+
try {
|
|
407
|
+
const rows = db.prepare(`PRAGMA table_info(${table})`).all();
|
|
408
|
+
present = rows.some(r => String(r.name) === column);
|
|
409
|
+
} catch { /* missing table -> treat column as absent */ }
|
|
410
|
+
perDb.set(key, present);
|
|
411
|
+
return present;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// Clean close. Tolerates double-close (already-closed handles).
|
|
415
|
+
export function closeDb(db) {
|
|
416
|
+
if (!db) return;
|
|
417
|
+
try { db.close(); } catch { /* already closed or driver-specific noop */ }
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
export default { openDb, safeWrite, search, closeDb, dbPathFor, IntegrityError, SchemaVersionError, ComputeDbError };
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
// IJFW v1.3.0 -- D2 graph auto-population on ingest (GA fix-wave GA-B3).
|
|
2
|
+
//
|
|
3
|
+
// Source authority: PRD-v2 section 9 Pillar D D2 + .planning/1.3.0/D-PILLAR-SPEC.md
|
|
4
|
+
// section 3 (entity extraction ordering) + GA fix-wave finding GA-B3.
|
|
5
|
+
//
|
|
6
|
+
// PROBLEM (pre-fix-wave): graph:index colon-syntax dispatch is the ONLY
|
|
7
|
+
// caller of writeEdges in production. Nothing wires the symbol graph into
|
|
8
|
+
// the actual ingest path, so kg_nodes / kg_edges stay empty and D4's
|
|
9
|
+
// propagateStale BFS has nothing to walk.
|
|
10
|
+
//
|
|
11
|
+
// FIX: this module is invoked as a side-effect of every successful
|
|
12
|
+
// observation insert (compute safeWrite + memory indexEntry). It runs
|
|
13
|
+
// extractEntities on the just-written body, upserts kg_nodes, and writes
|
|
14
|
+
// co-occurrence edges via the existing edges.js writeEdges. Failure is
|
|
15
|
+
// always swallowed -- ingest must never fail because the graph layer is
|
|
16
|
+
// unhappy.
|
|
17
|
+
//
|
|
18
|
+
// OPT-OUT: env var IJFW_GRAPH_AUTO_INDEX. Default is on. Set to '0' or
|
|
19
|
+
// 'false' to disable (used by fixture tests that want to seed graphs
|
|
20
|
+
// manually without auto-index races).
|
|
21
|
+
//
|
|
22
|
+
// LOCK: acquireGraphWriteLock is acquired for the write window. The
|
|
23
|
+
// caller is allowed to be running inside a memory-side or compute-side
|
|
24
|
+
// txn -- the graph lock is a separate file-level CAS, not a DB lock, so
|
|
25
|
+
// there's no SQLite-level conflict.
|
|
26
|
+
|
|
27
|
+
import { extractEntities } from './extract.js';
|
|
28
|
+
import { writeEdges } from './edges.js';
|
|
29
|
+
import { acquireGraphWriteLock } from './graph-lock.js';
|
|
30
|
+
|
|
31
|
+
// Cached driver loader. We reuse the same better-sqlite3 module the rest
|
|
32
|
+
// of the compute layer uses; the import is awaited once on first call.
|
|
33
|
+
let __computeOpenDb = null;
|
|
34
|
+
async function getComputeOpenDb() {
|
|
35
|
+
if (__computeOpenDb) return __computeOpenDb;
|
|
36
|
+
const mod = await import('./fts5.js');
|
|
37
|
+
__computeOpenDb = { openDb: mod.openDb, closeDb: mod.closeDb };
|
|
38
|
+
return __computeOpenDb;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function isAutoIndexEnabled() {
|
|
42
|
+
const v = process.env.IJFW_GRAPH_AUTO_INDEX;
|
|
43
|
+
if (v === undefined || v === null || v === '') return true;
|
|
44
|
+
if (v === '0' || /^false$/i.test(String(v))) return false;
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* autoIndexGraphFromBody({ db, body, sessionId, ts? }) -> result|null
|
|
50
|
+
*
|
|
51
|
+
* Synchronous wrapper used when the caller already holds a compute db
|
|
52
|
+
* handle (e.g. compute safeWrite). Extracts entities from body, acquires
|
|
53
|
+
* the graph-write lock, upserts nodes + edges. Returns the writeEdges
|
|
54
|
+
* envelope on success, null on opt-out or any swallowed failure.
|
|
55
|
+
*
|
|
56
|
+
* Caller invariants:
|
|
57
|
+
* - db is a compute db handle (has kg_nodes / kg_edges tables).
|
|
58
|
+
* - projectRoot is read from db.__ijfw_filename so the lock file lands
|
|
59
|
+
* next to the db.
|
|
60
|
+
*/
|
|
61
|
+
export function autoIndexGraphFromBody({ db, body, sessionId, ts }) {
|
|
62
|
+
if (!isAutoIndexEnabled()) return null;
|
|
63
|
+
if (!db || typeof db.prepare !== 'function') return null;
|
|
64
|
+
if (typeof body !== 'string' || body.length === 0) return null;
|
|
65
|
+
|
|
66
|
+
// Resolve projectRoot from the db filename (always set by openDb).
|
|
67
|
+
// db.__ijfw_filename is `<projectRoot>/.ijfw/index/compute.db`.
|
|
68
|
+
const filename = String(db.__ijfw_filename || '');
|
|
69
|
+
const projectRoot = filename
|
|
70
|
+
? filename.replace(/\/\.ijfw\/index\/[^/]+\.db$/, '').replace(/\\\.ijfw\\index\\[^\\]+\.db$/, '')
|
|
71
|
+
: null;
|
|
72
|
+
if (!projectRoot) return null;
|
|
73
|
+
|
|
74
|
+
// kg_nodes presence check (tolerates fixture dbs that opened without
|
|
75
|
+
// running migrations). When the table is absent, silently skip.
|
|
76
|
+
if (!hasGraphTables(db)) return null;
|
|
77
|
+
|
|
78
|
+
let entities;
|
|
79
|
+
try {
|
|
80
|
+
entities = extractEntities(body, { minMentions: 1 });
|
|
81
|
+
} catch {
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
if (!entities || entities.length === 0) return null;
|
|
85
|
+
|
|
86
|
+
let lock;
|
|
87
|
+
try {
|
|
88
|
+
lock = acquireGraphWriteLock(projectRoot, { waitMs: 1500 });
|
|
89
|
+
} catch {
|
|
90
|
+
// Lock contention -- skip auto-index this round; the next ingest
|
|
91
|
+
// covers the same body if it lands again, and graph:index can be
|
|
92
|
+
// called explicitly to backfill.
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
try {
|
|
97
|
+
// Wrap inside a single tx so kg_nodes + kg_edges writes are atomic.
|
|
98
|
+
let result = null;
|
|
99
|
+
if (typeof db.txn === 'function') {
|
|
100
|
+
const tx = db.txn(() => {
|
|
101
|
+
result = writeEdges(db, sessionId || null, entities, { ts });
|
|
102
|
+
});
|
|
103
|
+
tx();
|
|
104
|
+
} else {
|
|
105
|
+
result = writeEdges(db, sessionId || null, entities, { ts });
|
|
106
|
+
}
|
|
107
|
+
return result;
|
|
108
|
+
} catch {
|
|
109
|
+
return null;
|
|
110
|
+
} finally {
|
|
111
|
+
if (lock) lock.released();
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* autoIndexGraphFromMemoryBody({ memoryDb, body, sessionId, ts? }) -> result|null
|
|
117
|
+
*
|
|
118
|
+
* Called from the memory ingest path. Memory db doesn't carry the
|
|
119
|
+
* symbol graph; we open the compute db at the same projectRoot, run
|
|
120
|
+
* extract + writeEdges there, then close the compute handle. Failures
|
|
121
|
+
* are swallowed so memory ingest never breaks because of graph issues.
|
|
122
|
+
*
|
|
123
|
+
* Async because we have to open the compute db on demand. Memory
|
|
124
|
+
* indexEntry callers that want truly synchronous behavior can fire-and-
|
|
125
|
+
* forget the returned promise; ingest correctness does not depend on
|
|
126
|
+
* the auto-index landing.
|
|
127
|
+
*/
|
|
128
|
+
export async function autoIndexGraphFromMemoryBody({ memoryDb, body, sessionId, ts }) {
|
|
129
|
+
if (!isAutoIndexEnabled()) return null;
|
|
130
|
+
if (typeof body !== 'string' || body.length === 0) return null;
|
|
131
|
+
|
|
132
|
+
const filename = String(memoryDb && memoryDb.__ijfw_filename || '');
|
|
133
|
+
const projectRoot = filename
|
|
134
|
+
? filename.replace(/\/\.ijfw\/index\/[^/]+\.db$/, '').replace(/\\\.ijfw\\index\\[^\\]+\.db$/, '')
|
|
135
|
+
: null;
|
|
136
|
+
if (!projectRoot) return null;
|
|
137
|
+
|
|
138
|
+
let entities;
|
|
139
|
+
try {
|
|
140
|
+
entities = extractEntities(body, { minMentions: 1 });
|
|
141
|
+
} catch {
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
if (!entities || entities.length === 0) return null;
|
|
145
|
+
|
|
146
|
+
const { openDb, closeDb } = await getComputeOpenDb();
|
|
147
|
+
let computeDb = null;
|
|
148
|
+
let lock = null;
|
|
149
|
+
try {
|
|
150
|
+
computeDb = await openDb(projectRoot);
|
|
151
|
+
if (!hasGraphTables(computeDb)) return null;
|
|
152
|
+
lock = acquireGraphWriteLock(projectRoot, { waitMs: 1500 });
|
|
153
|
+
let result = null;
|
|
154
|
+
if (typeof computeDb.txn === 'function') {
|
|
155
|
+
const tx = computeDb.txn(() => {
|
|
156
|
+
result = writeEdges(computeDb, sessionId || null, entities, { ts });
|
|
157
|
+
});
|
|
158
|
+
tx();
|
|
159
|
+
} else {
|
|
160
|
+
result = writeEdges(computeDb, sessionId || null, entities, { ts });
|
|
161
|
+
}
|
|
162
|
+
return result;
|
|
163
|
+
} catch {
|
|
164
|
+
return null;
|
|
165
|
+
} finally {
|
|
166
|
+
if (lock) lock.released();
|
|
167
|
+
if (computeDb) {
|
|
168
|
+
try { closeDb(computeDb); } catch { /* best-effort */ }
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// --- helpers --------------------------------------------------------------
|
|
174
|
+
|
|
175
|
+
function hasGraphTables(db) {
|
|
176
|
+
try {
|
|
177
|
+
const row = db.prepare(
|
|
178
|
+
`SELECT name FROM sqlite_master WHERE type='table' AND name='kg_nodes'`
|
|
179
|
+
).get();
|
|
180
|
+
return !!row;
|
|
181
|
+
} catch {
|
|
182
|
+
return false;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
export const __test = { isAutoIndexEnabled, hasGraphTables };
|
|
187
|
+
|
|
188
|
+
export default {
|
|
189
|
+
autoIndexGraphFromBody,
|
|
190
|
+
autoIndexGraphFromMemoryBody,
|
|
191
|
+
};
|