@ijfw/memory-server 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ijfw +27 -0
- package/bin/ijfw-dashboard +180 -0
- package/bin/ijfw-dispatch-plan +41 -0
- package/bin/ijfw-memorize +273 -0
- package/bin/ijfw-memory +51 -0
- package/fixtures/demo-target.js +28 -0
- package/package.json +53 -0
- package/src/api-client.js +190 -0
- package/src/audit-roster.js +315 -0
- package/src/caps.js +37 -0
- package/src/cold-scan-runner.mjs +37 -0
- package/src/compute/edges.js +155 -0
- package/src/compute/extract.js +560 -0
- package/src/compute/fts5.js +420 -0
- package/src/compute/graph-auto-index.js +191 -0
- package/src/compute/graph-lock.js +114 -0
- package/src/compute/index.js +18 -0
- package/src/compute/migration-runner.js +116 -0
- package/src/compute/migrations/001-initial.js +23 -0
- package/src/compute/migrations/002-porter-stemming-source.js +139 -0
- package/src/compute/migrations/003-tier-semantic.js +69 -0
- package/src/compute/migrations/004-kg-tables.js +83 -0
- package/src/compute/migrations/005-stale-candidate.js +72 -0
- package/src/compute/python-resolver.js +106 -0
- package/src/compute/runner-vm.js +185 -0
- package/src/compute/runner.js +416 -0
- package/src/compute/sandbox-detect.js +122 -0
- package/src/compute/sandbox-linux.js +164 -0
- package/src/compute/sandbox-macos.js +167 -0
- package/src/compute/sandbox-windows.js +63 -0
- package/src/compute/schema.sql +118 -0
- package/src/compute/staleness.js +239 -0
- package/src/compute/synonyms.js +367 -0
- package/src/compute/traverse.js +180 -0
- package/src/cost/aggregator.js +229 -0
- package/src/cost/pricing.js +134 -0
- package/src/cost/readers/claude.js +179 -0
- package/src/cost/readers/codex.js +131 -0
- package/src/cost/readers/gemini.js +111 -0
- package/src/cost/savings.js +243 -0
- package/src/cross-dispatcher.js +437 -0
- package/src/cross-orchestrator-cli.js +1885 -0
- package/src/cross-orchestrator.js +598 -0
- package/src/cross-project-search.js +114 -0
- package/src/dashboard-client.html +1180 -0
- package/src/dashboard-server.js +895 -0
- package/src/design-companion.js +81 -0
- package/src/dispatch/colon-syntax.js +732 -0
- package/src/dispatch-planner.js +235 -0
- package/src/dream/cooldown.js +105 -0
- package/src/dream/runner.mjs +373 -0
- package/src/dream/staleness-wiring.js +195 -0
- package/src/feedback-detector.js +57 -0
- package/src/hero-line.js +115 -0
- package/src/importers/claude-mem.js +152 -0
- package/src/importers/cli.js +311 -0
- package/src/importers/common.js +84 -0
- package/src/importers/discover.js +235 -0
- package/src/importers/rtk.js +107 -0
- package/src/intent-router.js +221 -0
- package/src/lib/atomic-io.js +201 -0
- package/src/lib/cache.js +33 -0
- package/src/lib/npm-view.js +104 -0
- package/src/lib/status-card.js +95 -0
- package/src/lib/token.js +85 -0
- package/src/memory/fts5.js +349 -0
- package/src/memory/migration-runner.js +116 -0
- package/src/memory/migrations/001-fts5-init.js +26 -0
- package/src/memory/migrations/002-tier-semantic.js +60 -0
- package/src/memory/migrations/003-stale-candidate.js +60 -0
- package/src/memory/reader.js +300 -0
- package/src/memory/recall-counter.js +76 -0
- package/src/memory/schema.sql +79 -0
- package/src/memory/search.js +431 -0
- package/src/memory/staleness.js +237 -0
- package/src/memory/tier-promotion.js +377 -0
- package/src/memory/tokenize.js +63 -0
- package/src/project-type-detector.js +866 -0
- package/src/prompt-check.js +171 -0
- package/src/ralph-allowlist.js +88 -0
- package/src/receipts.js +129 -0
- package/src/redactor.js +107 -0
- package/src/sandbox.js +275 -0
- package/src/sanitizer.js +69 -0
- package/src/scan-resume.js +167 -0
- package/src/schema.js +82 -0
- package/src/search-bm25.js +108 -0
- package/src/server.js +1414 -0
- package/src/swarm-config.js +80 -0
- package/src/trident/dispatch.js +211 -0
- package/src/trident/lens-health.js +253 -0
- package/src/update-apply.js +79 -0
- package/src/update-check.js +136 -0
- package/src/vectors.js +178 -0
- package/templates/design/bento-grid.md +84 -0
- package/templates/design/brutalist-luxe.md +82 -0
- package/templates/design/cinematic-dark.md +82 -0
- package/templates/design/data-dense-dashboard.md +88 -0
- package/templates/design/editorial-warm.md +81 -0
- package/templates/design/glassmorphic.md +84 -0
- package/templates/design/magazine-editorial.md +84 -0
- package/templates/design/maximalist-vibrant.md +85 -0
- package/templates/design/neo-swiss-tech.md +85 -0
- package/templates/design/swiss-minimal.md +80 -0
- package/templates/design/terminal-native.md +83 -0
- package/templates/design/warm-organic.md +84 -0
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
// IJFW v1.3.0 Alpha -- C9.5 coding-domain synonym expansion.
|
|
2
|
+
//
|
|
3
|
+
// Query-time rewriter (NOT stored synonyms). Expands tokens in a user query
|
|
4
|
+
// against a hand-curated coding-domain map and joins each expansion with
|
|
5
|
+
// the original via FTS5 OR. Symmetric: matching the LHS expands to RHS,
|
|
6
|
+
// matching the RHS also expands back to LHS.
|
|
7
|
+
//
|
|
8
|
+
// One-canonical-key invariant (P5-L2 fix-wave): every short token resolves
|
|
9
|
+
// to exactly ONE canonical sense. Overloaded keys (e.g. previous `ts` had
|
|
10
|
+
// both `[ts, typescript]` AND `[ts, timestamp]`) silently merged both
|
|
11
|
+
// expansion sets at search time, broadening retrieval beyond the user's
|
|
12
|
+
// likely intent. We DROP the offender entirely rather than picking a
|
|
13
|
+
// winner: callers spell `typescript` or `timestamp` explicitly when they
|
|
14
|
+
// mean it. The map builder asserts no duplicate keys at startup.
|
|
15
|
+
//
|
|
16
|
+
// Behaviour:
|
|
17
|
+
// - Default-on. Disable per-process or per-call via IJFW_SYNONYM_EXPAND=0.
|
|
18
|
+
// Any other value (including unset) leaves expansion active.
|
|
19
|
+
// - Tokenizer: ASCII word boundaries on the surface query. We do NOT try
|
|
20
|
+
// to parse FTS5 syntax in full; quoted phrases, prefix-stars, NEAR/AND
|
|
21
|
+
// operators, and column filters pass through untouched, while bare
|
|
22
|
+
// terms get expanded. This keeps the rewriter cheap and predictable
|
|
23
|
+
// -- callers wanting precision can quote the term or set the env to 0.
|
|
24
|
+
// - Multi-word expansions ("knowledge base") become quoted FTS5 phrases
|
|
25
|
+
// so they don't trip MATCH parser into requiring two separate columns.
|
|
26
|
+
// - Result envelope reports `synonym_matches: { token: [expansions] }`
|
|
27
|
+
// so callers can show users what fired and offer the toggle-off retry.
|
|
28
|
+
//
|
|
29
|
+
// Map curation: ~80 pairs covering common coding shorthand and domain
|
|
30
|
+
// abbreviations frequently used by AI coding agents. Keep entries lower-
|
|
31
|
+
// cased; rewriter lowercases tokens before lookup but preserves the
|
|
32
|
+
// original-cased token in the OR clause for FTS5 (porter tokenizer is
|
|
33
|
+
// case-folding so casing is irrelevant on the FTS5 side).
|
|
34
|
+
|
|
35
|
+
// Symmetric synonym groups. Each group is a set of words that should all
|
|
36
|
+
// match each other at search time. Listed once; the builder fans out the
|
|
37
|
+
// pair index in both directions.
|
|
38
|
+
const SYNONYM_GROUPS = [
|
|
39
|
+
// Database / storage
|
|
40
|
+
['db', 'database'],
|
|
41
|
+
['sql', 'rdbms'],
|
|
42
|
+
['kv', 'keyvalue', 'key-value'],
|
|
43
|
+
['cache', 'caching'],
|
|
44
|
+
|
|
45
|
+
// Auth
|
|
46
|
+
['auth', 'authentication', 'authn'],
|
|
47
|
+
['authz', 'authorization'],
|
|
48
|
+
['oauth', 'oauth2'],
|
|
49
|
+
['jwt', 'jwt-token', 'json-web-token'],
|
|
50
|
+
['cred', 'credential', 'credentials'],
|
|
51
|
+
|
|
52
|
+
// Performance / runtime
|
|
53
|
+
['perf', 'performance'],
|
|
54
|
+
['lat', 'latency'],
|
|
55
|
+
['mem', 'memory'],
|
|
56
|
+
['cpu', 'processor'],
|
|
57
|
+
['gc', 'garbagecollection', 'garbage-collect'],
|
|
58
|
+
|
|
59
|
+
// Config / env / deps
|
|
60
|
+
['config', 'configuration'],
|
|
61
|
+
['conf', 'configuration'],
|
|
62
|
+
['env', 'environment'],
|
|
63
|
+
['envvar', 'environment-variable'],
|
|
64
|
+
['dep', 'dependency'],
|
|
65
|
+
['deps', 'dependencies'],
|
|
66
|
+
['pkg', 'package'],
|
|
67
|
+
['repo', 'repository'],
|
|
68
|
+
|
|
69
|
+
// Network / requests
|
|
70
|
+
['req', 'request'],
|
|
71
|
+
['resp', 'response'],
|
|
72
|
+
['res', 'response'],
|
|
73
|
+
['http', 'https'],
|
|
74
|
+
['url', 'uri'],
|
|
75
|
+
['api', 'endpoint'],
|
|
76
|
+
['rpc', 'remote-procedure-call'],
|
|
77
|
+
['ws', 'websocket'],
|
|
78
|
+
|
|
79
|
+
// Errors / handling
|
|
80
|
+
['err', 'error'],
|
|
81
|
+
['exc', 'exception'],
|
|
82
|
+
['warn', 'warning'],
|
|
83
|
+
['debug', 'debugging'],
|
|
84
|
+
['log', 'logging'],
|
|
85
|
+
['trace', 'stacktrace', 'stack-trace'],
|
|
86
|
+
|
|
87
|
+
// Init / lifecycle
|
|
88
|
+
['init', 'initialize', 'initialise'],
|
|
89
|
+
['cfg', 'configure'],
|
|
90
|
+
['boot', 'bootstrap'],
|
|
91
|
+
['shutdown', 'teardown'],
|
|
92
|
+
|
|
93
|
+
// Code structure
|
|
94
|
+
['fn', 'function'],
|
|
95
|
+
['func', 'function'],
|
|
96
|
+
['var', 'variable'],
|
|
97
|
+
['const', 'constant'],
|
|
98
|
+
['ctx', 'context'],
|
|
99
|
+
['ctxt', 'context'],
|
|
100
|
+
['evt', 'event'],
|
|
101
|
+
['msg', 'message'],
|
|
102
|
+
['param', 'parameter'],
|
|
103
|
+
['args', 'arguments'],
|
|
104
|
+
['attr', 'attribute'],
|
|
105
|
+
['prop', 'property'],
|
|
106
|
+
|
|
107
|
+
// Pub/Sub / messaging
|
|
108
|
+
['sub', 'subscribe', 'subscription'],
|
|
109
|
+
['pub', 'publish', 'publication'],
|
|
110
|
+
['mq', 'message-queue'],
|
|
111
|
+
['tx', 'transaction'],
|
|
112
|
+
['rx', 'receive'],
|
|
113
|
+
|
|
114
|
+
// Sync / async
|
|
115
|
+
['sync', 'synchronous'],
|
|
116
|
+
['async', 'asynchronous'],
|
|
117
|
+
['concurrent', 'concurrency'],
|
|
118
|
+
['par', 'parallel'],
|
|
119
|
+
|
|
120
|
+
// Languages / formats
|
|
121
|
+
// P5-L2: `ts` was previously mapped to BOTH `typescript` AND `timestamp`
|
|
122
|
+
// (see Process/runtime block below in earlier revisions). The two senses
|
|
123
|
+
// silently merged at lookup time -- a user searching `ts` got both
|
|
124
|
+
// expansions ORed together regardless of intent. Fix: drop `ts` entirely
|
|
125
|
+
// from both groups. Callers spell out `typescript` or `timestamp` when
|
|
126
|
+
// they mean either one. Per-group dedup at build time enforces the
|
|
127
|
+
// one-canonical-key invariant.
|
|
128
|
+
['js', 'javascript'],
|
|
129
|
+
['py', 'python'],
|
|
130
|
+
['rb', 'ruby'],
|
|
131
|
+
['go', 'golang'],
|
|
132
|
+
['rs', 'rust'],
|
|
133
|
+
['md', 'markdown'],
|
|
134
|
+
['yml', 'yaml'],
|
|
135
|
+
['json', 'jsonl'],
|
|
136
|
+
|
|
137
|
+
// Library / module
|
|
138
|
+
['lib', 'library'],
|
|
139
|
+
['mod', 'module'],
|
|
140
|
+
['proto', 'protocol'],
|
|
141
|
+
['regex', 'regexp'],
|
|
142
|
+
['mw', 'middleware'],
|
|
143
|
+
['plugin', 'extension'],
|
|
144
|
+
['ext', 'extension'],
|
|
145
|
+
|
|
146
|
+
// Observability
|
|
147
|
+
['obs', 'observability'],
|
|
148
|
+
['metrics', 'telemetry'],
|
|
149
|
+
['trace', 'tracing'],
|
|
150
|
+
['span', 'spans'],
|
|
151
|
+
['kb', 'knowledge-base'],
|
|
152
|
+
|
|
153
|
+
// Process / runtime
|
|
154
|
+
// P5-L2: `ts` -> `timestamp` removed alongside the Languages/formats
|
|
155
|
+
// mapping above. `ts` had two senses; we keep neither.
|
|
156
|
+
['proc', 'process'],
|
|
157
|
+
['thread', 'threading'],
|
|
158
|
+
['daemon', 'background'],
|
|
159
|
+
['svc', 'service'],
|
|
160
|
+
|
|
161
|
+
// Testing
|
|
162
|
+
['test', 'tests'],
|
|
163
|
+
['e2e', 'end-to-end'],
|
|
164
|
+
['unit', 'unit-test'],
|
|
165
|
+
['integ', 'integration'],
|
|
166
|
+
['mock', 'mocking'],
|
|
167
|
+
['stub', 'stubbing'],
|
|
168
|
+
['fixture', 'fixtures'],
|
|
169
|
+
|
|
170
|
+
// Security / crypto
|
|
171
|
+
['crypto', 'cryptography'],
|
|
172
|
+
['enc', 'encryption'],
|
|
173
|
+
['dec', 'decryption'],
|
|
174
|
+
['hash', 'hashing'],
|
|
175
|
+
['sig', 'signature'],
|
|
176
|
+
|
|
177
|
+
// CI / deploy
|
|
178
|
+
['ci', 'continuous-integration'],
|
|
179
|
+
['cd', 'continuous-deployment'],
|
|
180
|
+
['deploy', 'deployment'],
|
|
181
|
+
['rel', 'release'],
|
|
182
|
+
['ver', 'version'],
|
|
183
|
+
|
|
184
|
+
// Data
|
|
185
|
+
['arr', 'array'],
|
|
186
|
+
['obj', 'object'],
|
|
187
|
+
['str', 'string'],
|
|
188
|
+
['num', 'number'],
|
|
189
|
+
['bool', 'boolean'],
|
|
190
|
+
['int', 'integer'],
|
|
191
|
+
['float', 'floating-point'],
|
|
192
|
+
|
|
193
|
+
// Misc common shorthand
|
|
194
|
+
['util', 'utility', 'utils'],
|
|
195
|
+
['admin', 'administrator'],
|
|
196
|
+
['user', 'users'],
|
|
197
|
+
['ui', 'user-interface'],
|
|
198
|
+
['ux', 'user-experience'],
|
|
199
|
+
['doc', 'documentation'],
|
|
200
|
+
['docs', 'documentation'],
|
|
201
|
+
];
|
|
202
|
+
|
|
203
|
+
// Build flat lookup: lowercase token -> array of expansion strings (excluding
|
|
204
|
+
// the token itself). Multi-word expansions stay as their original phrase
|
|
205
|
+
// form; the rewriter quotes them when emitted into the FTS5 expression.
|
|
206
|
+
function buildSynonymMap(groups) {
|
|
207
|
+
const map = new Map();
|
|
208
|
+
for (const group of groups) {
|
|
209
|
+
const lowered = group.map(w => String(w).toLowerCase());
|
|
210
|
+
for (const word of lowered) {
|
|
211
|
+
const others = lowered.filter(w => w !== word);
|
|
212
|
+
const existing = map.get(word) || [];
|
|
213
|
+
// Dedup while preserving insertion order so deterministic output.
|
|
214
|
+
for (const o of others) {
|
|
215
|
+
if (!existing.includes(o)) existing.push(o);
|
|
216
|
+
}
|
|
217
|
+
map.set(word, existing);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
return map;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const SYNONYM_MAP = buildSynonymMap(SYNONYM_GROUPS);
|
|
224
|
+
|
|
225
|
+
// Public for tests / debugging.
|
|
226
|
+
export function synonymMapSize() {
|
|
227
|
+
return SYNONYM_MAP.size;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Read env once per call. Anything other than '0', 'false', 'no', 'off'
|
|
231
|
+
// (case-insensitive) leaves expansion enabled. Empty/unset = enabled.
|
|
232
|
+
function expansionEnabled(envOverride) {
|
|
233
|
+
const v = envOverride !== undefined ? envOverride : process.env.IJFW_SYNONYM_EXPAND;
|
|
234
|
+
if (v === undefined || v === null || v === '') return true;
|
|
235
|
+
const s = String(v).toLowerCase();
|
|
236
|
+
return !(s === '0' || s === 'false' || s === 'no' || s === 'off');
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Token regex: match runs of [A-Za-z0-9_] -- ASCII identifier-shaped tokens.
|
|
240
|
+
// FTS5 operators (AND, OR, NOT, NEAR), column filters (col:term), prefix
|
|
241
|
+
// stars (term*), and quoted phrases ("a b") don't get matched as a single
|
|
242
|
+
// token by this regex and pass through unchanged via the position-aware
|
|
243
|
+
// segment loop in expandQuery.
|
|
244
|
+
|
|
245
|
+
// Tokens that look like identifiers but are FTS5 reserved keywords. We
|
|
246
|
+
// never expand these.
|
|
247
|
+
const FTS5_RESERVED = new Set(['and', 'or', 'not', 'near']);
|
|
248
|
+
|
|
249
|
+
// Render an expansion into an FTS5 term. Multi-word phrases (containing
|
|
250
|
+
// any non-identifier character) get wrapped in double quotes so FTS5
|
|
251
|
+
// treats them as a phrase. Single tokens go through bare.
|
|
252
|
+
function renderExpansion(s) {
|
|
253
|
+
if (/[^A-Za-z0-9_]/.test(s)) {
|
|
254
|
+
// Quote phrase; escape any embedded double-quotes by doubling per FTS5
|
|
255
|
+
// rules (defensive -- our static map has none).
|
|
256
|
+
return '"' + String(s).replace(/"/g, '""') + '"';
|
|
257
|
+
}
|
|
258
|
+
return String(s);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* expandQuery(query, opts) -> { expanded, synonym_matches, applied }
|
|
263
|
+
*
|
|
264
|
+
* - expanded: rewritten FTS5 query string. When no expansions fire (or env
|
|
265
|
+
* disables expansion), this equals the input string unchanged.
|
|
266
|
+
* - synonym_matches: { [token]: [expansions, ...] } recording which input
|
|
267
|
+
* tokens fired and what they expanded to. Empty object when nothing fired.
|
|
268
|
+
* - applied: boolean -- true iff at least one token expanded.
|
|
269
|
+
*
|
|
270
|
+
* opts.env (optional): override IJFW_SYNONYM_EXPAND for this call (used by
|
|
271
|
+
* the dispatcher to honour a per-call override env even when the process
|
|
272
|
+
* env says default).
|
|
273
|
+
*/
|
|
274
|
+
export function expandQuery(query, opts = {}) {
|
|
275
|
+
const empty = { expanded: typeof query === 'string' ? query : '', synonym_matches: {}, applied: false };
|
|
276
|
+
if (typeof query !== 'string' || query.length === 0) return empty;
|
|
277
|
+
if (!expansionEnabled(opts.env)) return empty;
|
|
278
|
+
|
|
279
|
+
// We split the query at whitespace and operate on each whitespace-
|
|
280
|
+
// separated segment. Each segment is either:
|
|
281
|
+
// (a) a quoted phrase ("...") -- pass through unchanged
|
|
282
|
+
// (b) an FTS5 reserved keyword -- pass through unchanged
|
|
283
|
+
// (c) bare identifier token -- expand if present in the map
|
|
284
|
+
// (d) anything else (col:tok, tok*, parens) -- pass through unchanged
|
|
285
|
+
//
|
|
286
|
+
// (d) is conservative: we rewrite (c) only when the segment is purely
|
|
287
|
+
// [A-Za-z0-9_]+ so we don't trip over FTS5 syntax we don't model.
|
|
288
|
+
const matches = {};
|
|
289
|
+
const out = [];
|
|
290
|
+
|
|
291
|
+
// Split on whitespace runs but preserve quoted phrases as single segments.
|
|
292
|
+
const segments = splitPreservingQuotedPhrases(query);
|
|
293
|
+
|
|
294
|
+
for (const seg of segments) {
|
|
295
|
+
if (seg.length === 0) continue;
|
|
296
|
+
if (/^"[^"]*"$/.test(seg)) {
|
|
297
|
+
out.push(seg);
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
if (/^[A-Za-z0-9_]+$/.test(seg)) {
|
|
301
|
+
const lower = seg.toLowerCase();
|
|
302
|
+
if (FTS5_RESERVED.has(lower)) {
|
|
303
|
+
out.push(seg);
|
|
304
|
+
continue;
|
|
305
|
+
}
|
|
306
|
+
const expansions = SYNONYM_MAP.get(lower);
|
|
307
|
+
if (expansions && expansions.length > 0) {
|
|
308
|
+
matches[seg] = [...expansions];
|
|
309
|
+
const rendered = expansions.map(renderExpansion);
|
|
310
|
+
// Wrap in parens so the OR group binds tightly inside the larger
|
|
311
|
+
// query (e.g., `db AND user` rewrites to `(db OR database) AND user`,
|
|
312
|
+
// not the broken `db OR database AND user`).
|
|
313
|
+
out.push('(' + [seg, ...rendered].join(' OR ') + ')');
|
|
314
|
+
continue;
|
|
315
|
+
}
|
|
316
|
+
out.push(seg);
|
|
317
|
+
continue;
|
|
318
|
+
}
|
|
319
|
+
// Anything else: pass through unchanged.
|
|
320
|
+
out.push(seg);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
const expanded = out.join(' ');
|
|
324
|
+
const applied = Object.keys(matches).length > 0;
|
|
325
|
+
return { expanded, synonym_matches: matches, applied };
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Split a query string into whitespace-separated segments while preserving
|
|
329
|
+
// quoted phrases ("a b c") as single segments. Single-quoted segments are
|
|
330
|
+
// not preserved -- FTS5 only recognises double quotes.
|
|
331
|
+
function splitPreservingQuotedPhrases(query) {
|
|
332
|
+
const out = [];
|
|
333
|
+
let i = 0;
|
|
334
|
+
const n = query.length;
|
|
335
|
+
let buf = '';
|
|
336
|
+
while (i < n) {
|
|
337
|
+
const ch = query[i];
|
|
338
|
+
if (/\s/.test(ch)) {
|
|
339
|
+
if (buf.length > 0) { out.push(buf); buf = ''; }
|
|
340
|
+
i++;
|
|
341
|
+
continue;
|
|
342
|
+
}
|
|
343
|
+
if (ch === '"') {
|
|
344
|
+
// Capture through the matching quote inclusive. If unmatched, treat
|
|
345
|
+
// the rest of the string as a single segment (defensive).
|
|
346
|
+
let j = i + 1;
|
|
347
|
+
while (j < n && query[j] !== '"') j++;
|
|
348
|
+
if (j < n) {
|
|
349
|
+
buf += query.slice(i, j + 1);
|
|
350
|
+
i = j + 1;
|
|
351
|
+
} else {
|
|
352
|
+
buf += query.slice(i);
|
|
353
|
+
i = n;
|
|
354
|
+
}
|
|
355
|
+
continue;
|
|
356
|
+
}
|
|
357
|
+
buf += ch;
|
|
358
|
+
i++;
|
|
359
|
+
}
|
|
360
|
+
if (buf.length > 0) out.push(buf);
|
|
361
|
+
return out;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Test helpers -- not part of the public API contract.
|
|
365
|
+
export const __test = { splitPreservingQuotedPhrases, buildSynonymMap, SYNONYM_GROUPS };
|
|
366
|
+
|
|
367
|
+
export default { expandQuery, synonymMapSize };
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
// IJFW v1.3.0 -- D2 BFS traversal.
|
|
2
|
+
//
|
|
3
|
+
// Source authority: D-PILLAR-SPEC.md section 2 (depth cap 2, weight
|
|
4
|
+
// threshold 0.5) + section 4 (BFS query surface).
|
|
5
|
+
//
|
|
6
|
+
// Reads kg_nodes / kg_edges via the compute db. Reads do NOT acquire
|
|
7
|
+
// .graph-write.lock -- WAL mode allows concurrent reads with writers.
|
|
8
|
+
//
|
|
9
|
+
// API:
|
|
10
|
+
// bfsTraverse(db, startNodeId, depth=2, edgeKinds=['co_occurs'])
|
|
11
|
+
// -> { nodes, edges, traversal_path }
|
|
12
|
+
//
|
|
13
|
+
// resolveNode(db, kind, name)
|
|
14
|
+
// -> { id, kind, name, ... } | null
|
|
15
|
+
//
|
|
16
|
+
// bfsRelated(db, query, opts?)
|
|
17
|
+
// -> { nodes, edges, traversal_path } using entity-resolution heuristic
|
|
18
|
+
|
|
19
|
+
const DEFAULT_DEPTH = 2;
|
|
20
|
+
const DEFAULT_WEIGHT_THRESHOLD = 0.5;
|
|
21
|
+
const DEFAULT_EDGE_KINDS = ['co_occurs'];
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* resolveNode(db, kind, name) -> kg_nodes row | null
|
|
25
|
+
*
|
|
26
|
+
* Exact (kind, name) match. Returns null on miss. Used by graph:traverse
|
|
27
|
+
* dispatch to convert a string identifier into a node id.
|
|
28
|
+
*/
|
|
29
|
+
export function resolveNode(db, kind, name) {
|
|
30
|
+
if (!db || typeof db.prepare !== 'function') return null;
|
|
31
|
+
if (!kind || !name) return null;
|
|
32
|
+
return db.prepare(
|
|
33
|
+
`SELECT id, kind, name, first_seen, last_seen, redacted ` +
|
|
34
|
+
`FROM kg_nodes WHERE kind = ? AND name = ?`
|
|
35
|
+
).get(String(kind), String(name)) || null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* bfsTraverse(db, startNodeId, depth, edgeKinds, opts?) -> { nodes, edges, traversal_path }
|
|
40
|
+
*
|
|
41
|
+
* Breadth-first walk from startNodeId. Traverses kg_edges where:
|
|
42
|
+
* - kind in edgeKinds
|
|
43
|
+
* - weight >= opts.weightThreshold (default 0.5 per D-PILLAR-SPEC section 2)
|
|
44
|
+
* Up to `depth` hops. Both src and dst indexes are queried so traversal
|
|
45
|
+
* is undirected at the read layer.
|
|
46
|
+
*
|
|
47
|
+
* Returns:
|
|
48
|
+
* nodes -- distinct kg_nodes rows reached (including the start node)
|
|
49
|
+
* edges -- kg_edges rows traversed (deduped by (src, dst, kind))
|
|
50
|
+
* traversal_path -- array of node ids in BFS visitation order
|
|
51
|
+
*/
|
|
52
|
+
export function bfsTraverse(db, startNodeId, depth = DEFAULT_DEPTH, edgeKinds = DEFAULT_EDGE_KINDS, opts = {}) {
|
|
53
|
+
if (!db || typeof db.prepare !== 'function') {
|
|
54
|
+
throw new Error('bfsTraverse: db handle is invalid.');
|
|
55
|
+
}
|
|
56
|
+
const startId = Number(startNodeId);
|
|
57
|
+
if (!Number.isFinite(startId) || startId <= 0) {
|
|
58
|
+
throw new Error(`bfsTraverse: startNodeId must be a positive number; got ${startNodeId}`);
|
|
59
|
+
}
|
|
60
|
+
const maxDepth = Number.isInteger(depth) && depth >= 0 ? depth : DEFAULT_DEPTH;
|
|
61
|
+
const kinds = Array.isArray(edgeKinds) && edgeKinds.length > 0
|
|
62
|
+
? edgeKinds.map(String)
|
|
63
|
+
: DEFAULT_EDGE_KINDS;
|
|
64
|
+
const weightThreshold = Number.isFinite(opts.weightThreshold)
|
|
65
|
+
? Number(opts.weightThreshold)
|
|
66
|
+
: DEFAULT_WEIGHT_THRESHOLD;
|
|
67
|
+
|
|
68
|
+
// Verify start node exists.
|
|
69
|
+
const startNode = db.prepare(
|
|
70
|
+
`SELECT id, kind, name, first_seen, last_seen, redacted FROM kg_nodes WHERE id = ?`
|
|
71
|
+
).get(startId);
|
|
72
|
+
if (!startNode) {
|
|
73
|
+
return { nodes: [], edges: [], traversal_path: [] };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// BFS state.
|
|
77
|
+
const visitedNodes = new Map(); // id -> node row
|
|
78
|
+
const visitedEdges = new Map(); // edgeKey -> edge row
|
|
79
|
+
const traversalPath = [];
|
|
80
|
+
visitedNodes.set(startNode.id, startNode);
|
|
81
|
+
traversalPath.push(startNode.id);
|
|
82
|
+
|
|
83
|
+
// Pre-compile edge query (kinds expanded inline; param count varies).
|
|
84
|
+
const placeholders = kinds.map(() => '?').join(', ');
|
|
85
|
+
const queryNeighbours = db.prepare(
|
|
86
|
+
`SELECT src, dst, kind, weight, co_occurrence_count, ts FROM kg_edges ` +
|
|
87
|
+
`WHERE (src = ? OR dst = ?) AND kind IN (${placeholders}) AND weight >= ?`
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
const queryNode = db.prepare(
|
|
91
|
+
`SELECT id, kind, name, first_seen, last_seen, redacted FROM kg_nodes WHERE id = ?`
|
|
92
|
+
);
|
|
93
|
+
|
|
94
|
+
let frontier = [startNode.id];
|
|
95
|
+
for (let hop = 0; hop < maxDepth && frontier.length > 0; hop++) {
|
|
96
|
+
const next = [];
|
|
97
|
+
for (const nodeId of frontier) {
|
|
98
|
+
const rows = queryNeighbours.all(nodeId, nodeId, ...kinds, weightThreshold);
|
|
99
|
+
for (const row of rows) {
|
|
100
|
+
const otherId = Number(row.src) === nodeId ? Number(row.dst) : Number(row.src);
|
|
101
|
+
// Skip if other endpoint is the start AND already visited (no
|
|
102
|
+
// self-loops at hop 1 etc.).
|
|
103
|
+
const ek = `${Math.min(Number(row.src), Number(row.dst))}|${Math.max(Number(row.src), Number(row.dst))}|${row.kind}`;
|
|
104
|
+
if (!visitedEdges.has(ek)) visitedEdges.set(ek, row);
|
|
105
|
+
|
|
106
|
+
if (!visitedNodes.has(otherId)) {
|
|
107
|
+
const nrow = queryNode.get(otherId);
|
|
108
|
+
if (nrow) {
|
|
109
|
+
visitedNodes.set(otherId, nrow);
|
|
110
|
+
traversalPath.push(otherId);
|
|
111
|
+
// Only follow non-redacted neighbours into the next hop --
|
|
112
|
+
// redacted nodes terminate the traversal at their boundary.
|
|
113
|
+
if (!nrow.redacted) next.push(otherId);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
frontier = next;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
nodes: [...visitedNodes.values()],
|
|
123
|
+
edges: [...visitedEdges.values()],
|
|
124
|
+
traversal_path: traversalPath,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* bfsRelated(db, query, opts?) -> { nodes, edges, traversal_path }
|
|
130
|
+
*
|
|
131
|
+
* Lightweight entity-resolution wrapper for `ijfw_memory_search graph:related`.
|
|
132
|
+
* Tries exact (kind, name) match first; if no match, attempts case-
|
|
133
|
+
* insensitive substring across all kinds and uses the first hit.
|
|
134
|
+
*
|
|
135
|
+
* If no entity resolves, returns empty result. Caller (server.js) merges
|
|
136
|
+
* with FTS5 hits for the search response envelope.
|
|
137
|
+
*/
|
|
138
|
+
export function bfsRelated(db, query, opts = {}) {
|
|
139
|
+
if (typeof query !== 'string' || !query.trim()) {
|
|
140
|
+
return { nodes: [], edges: [], traversal_path: [], resolved: null };
|
|
141
|
+
}
|
|
142
|
+
const q = query.trim();
|
|
143
|
+
|
|
144
|
+
// Try exact name match across kinds (most specific first).
|
|
145
|
+
const exact = db.prepare(
|
|
146
|
+
`SELECT id, kind, name FROM kg_nodes WHERE name = ? LIMIT 1`
|
|
147
|
+
).get(q);
|
|
148
|
+
let startNode = exact || null;
|
|
149
|
+
|
|
150
|
+
if (!startNode) {
|
|
151
|
+
// Substring match (case-insensitive). First hit wins; deterministic
|
|
152
|
+
// tiebreaker = lowest id (oldest entity).
|
|
153
|
+
const sub = db.prepare(
|
|
154
|
+
`SELECT id, kind, name FROM kg_nodes ` +
|
|
155
|
+
`WHERE name LIKE ? COLLATE NOCASE ORDER BY id ASC LIMIT 1`
|
|
156
|
+
).get(`%${q}%`);
|
|
157
|
+
startNode = sub || null;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (!startNode) {
|
|
161
|
+
return { nodes: [], edges: [], traversal_path: [], resolved: null };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const result = bfsTraverse(
|
|
165
|
+
db,
|
|
166
|
+
startNode.id,
|
|
167
|
+
opts.depth != null ? opts.depth : DEFAULT_DEPTH,
|
|
168
|
+
opts.edgeKinds || DEFAULT_EDGE_KINDS,
|
|
169
|
+
{ weightThreshold: opts.weightThreshold }
|
|
170
|
+
);
|
|
171
|
+
return { ...result, resolved: { id: startNode.id, kind: startNode.kind, name: startNode.name } };
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
export const __test = {
|
|
175
|
+
DEFAULT_DEPTH,
|
|
176
|
+
DEFAULT_WEIGHT_THRESHOLD,
|
|
177
|
+
DEFAULT_EDGE_KINDS,
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
export default { bfsTraverse, resolveNode, bfsRelated };
|