neurain 0.1.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/LICENSE +57 -0
- package/README.md +205 -0
- package/SECURITY.md +22 -0
- package/bin/neurain.mjs +7 -0
- package/docs/comparison-mem0.en.md +22 -0
- package/docs/connect-claude.en.md +48 -0
- package/docs/connect-claude.kr.md +51 -0
- package/docs/connect-codex.en.md +38 -0
- package/docs/connect-codex.kr.md +40 -0
- package/docs/connect-gemini.en.md +71 -0
- package/docs/connect-gemini.kr.md +71 -0
- package/docs/connect-runtime.en.md +61 -0
- package/docs/connect-runtime.kr.md +61 -0
- package/docs/development-status.en.md +157 -0
- package/docs/development-status.kr.md +157 -0
- package/docs/knowledge-os.en.md +105 -0
- package/docs/knowledge-os.kr.md +106 -0
- package/docs/pricing.en.md +14 -0
- package/docs/privacy-and-data-flow.en.md +25 -0
- package/docs/public-saas-readiness.en.md +39 -0
- package/docs/quickstart.en.md +64 -0
- package/docs/quickstart.kr.md +64 -0
- package/docs/release-checklist.en.md +38 -0
- package/docs/safety.en.md +36 -0
- package/docs/self-improvement-90-roadmap.en.md +429 -0
- package/docs/self-improvement-90-roadmap.kr.md +429 -0
- package/docs/self-improving-workflows.en.md +163 -0
- package/docs/self-improving-workflows.kr.md +163 -0
- package/docs/support.en.md +17 -0
- package/docs/troubleshooting.en.md +35 -0
- package/package.json +36 -0
- package/src/cli.mjs +261 -0
- package/src/core/adopt.mjs +304 -0
- package/src/core/answer_eval.mjs +450 -0
- package/src/core/capabilities.mjs +217 -0
- package/src/core/capture_durable.mjs +181 -0
- package/src/core/classify.mjs +237 -0
- package/src/core/compile_desk.mjs +324 -0
- package/src/core/complete.mjs +108 -0
- package/src/core/config.mjs +142 -0
- package/src/core/connect.mjs +355 -0
- package/src/core/curator.mjs +351 -0
- package/src/core/daemon.mjs +536 -0
- package/src/core/digest.mjs +155 -0
- package/src/core/doctor.mjs +115 -0
- package/src/core/durable.mjs +96 -0
- package/src/core/envelope.mjs +97 -0
- package/src/core/flush.mjs +190 -0
- package/src/core/fs.mjs +121 -0
- package/src/core/init.mjs +194 -0
- package/src/core/journal.mjs +269 -0
- package/src/core/labels.mjs +117 -0
- package/src/core/lessons.mjs +793 -0
- package/src/core/lifecycle.mjs +1138 -0
- package/src/core/link_check.mjs +180 -0
- package/src/core/live_cases.mjs +221 -0
- package/src/core/onboard.mjs +175 -0
- package/src/core/plan_receipt.mjs +177 -0
- package/src/core/plan_writeback.mjs +176 -0
- package/src/core/queue.mjs +62 -0
- package/src/core/queue_archive.mjs +87 -0
- package/src/core/queue_model.mjs +161 -0
- package/src/core/queue_write.mjs +28 -0
- package/src/core/recall.mjs +1802 -0
- package/src/core/recall_bench.mjs +275 -0
- package/src/core/recall_corpus.mjs +152 -0
- package/src/core/recall_facts.mjs +233 -0
- package/src/core/recall_intel.mjs +233 -0
- package/src/core/recall_lexical.mjs +269 -0
- package/src/core/recap.mjs +78 -0
- package/src/core/review_queue.mjs +131 -0
- package/src/core/review_worker.mjs +284 -0
- package/src/core/route.mjs +73 -0
- package/src/core/safety.mjs +57 -0
- package/src/core/scheduler.mjs +697 -0
- package/src/core/search.mjs +54 -0
- package/src/core/secret_scan.mjs +143 -0
- package/src/core/semantic.mjs +187 -0
- package/src/core/source_digest.mjs +56 -0
- package/src/core/source_digest_gen.mjs +311 -0
- package/src/core/stage.mjs +105 -0
- package/src/core/status.mjs +175 -0
- package/src/core/vault_state.mjs +115 -0
- package/src/core/watch.mjs +282 -0
- package/src/core/wiki_log.mjs +29 -0
- package/src/core/wrap.mjs +62 -0
- package/src/mcp/server.mjs +865 -0
- package/templates/starter-vault/README.md +9 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
// Routed search intelligence for recall (W-A). A dependency-free, pure-JS port of
|
|
2
|
+
// the vault's search-intel adapter: it reads per-area entity / domain / intent /
|
|
3
|
+
// alias indexes registered in a search index registry (default
|
|
4
|
+
// 00_system/neurain/search-index-registry.json) and exposes detection, routing,
|
|
5
|
+
// and query-expansion primitives that the lexical ranker uses for the BM25 +
|
|
6
|
+
// structural boosts. Every file is optional: a missing registry or a missing
|
|
7
|
+
// area index contributes nothing and never throws, so a generic vault with no
|
|
8
|
+
// routing data degrades to plain BM25 + structural boosts. Unlike the vault
|
|
9
|
+
// original this holds no module-global cache; the loaded intel lives on the
|
|
10
|
+
// lexical context object the caller builds, which keeps it testable.
|
|
11
|
+
import fs from 'node:fs';
|
|
12
|
+
import path from 'node:path';
|
|
13
|
+
|
|
14
|
+
function readJsonSafe(absPath, fallback) {
|
|
15
|
+
try {
|
|
16
|
+
return JSON.parse(fs.readFileSync(absPath, 'utf8'));
|
|
17
|
+
} catch {
|
|
18
|
+
return fallback;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function normalizeText(value) {
|
|
23
|
+
return String(value ?? '').toLowerCase();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function normalizeRel(value) {
|
|
27
|
+
return String(value ?? '').replace(/\\/g, '/').replace(/^\.\//, '');
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Short-token rule (vault parity): 1-3 char alnum tokens match only on word
|
|
31
|
+
// boundaries, so short codes (CR, IR, LP, DD) do not over-match inside longer
|
|
32
|
+
// words; 4+ char tokens match as substrings.
|
|
33
|
+
export function includesTermBoundary(normalizedText, term) {
|
|
34
|
+
const value = normalizeText(term).trim();
|
|
35
|
+
if (!value) return false;
|
|
36
|
+
if (/^[a-z0-9]{1,3}$/.test(value)) {
|
|
37
|
+
const escaped = value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
38
|
+
return new RegExp(`(^|[^a-z0-9])${escaped}($|[^a-z0-9])`).test(normalizedText);
|
|
39
|
+
}
|
|
40
|
+
return normalizedText.includes(value);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Resolve an area-cwd-relative path to a real root-relative path by applying the
|
|
44
|
+
// area's path_map and returning the first candidate that exists; falls back to
|
|
45
|
+
// area_root/value. Mirrors the vault resolveRepoPath so boost paths land on the
|
|
46
|
+
// same root-relative paths the corpus emits.
|
|
47
|
+
function resolveAreaPath(root, value, areaRoot, pathMap) {
|
|
48
|
+
const normalized = normalizeRel(value);
|
|
49
|
+
if (!normalized) return '';
|
|
50
|
+
const candidates = [normalized];
|
|
51
|
+
for (const rule of pathMap || []) {
|
|
52
|
+
if (rule && rule.prefix && normalized.startsWith(rule.prefix)) {
|
|
53
|
+
candidates.push(rule.replace_with + normalized.slice(rule.prefix.length));
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
for (const candidate of candidates) {
|
|
57
|
+
const rel = `${areaRoot}/${candidate}`;
|
|
58
|
+
if (fs.existsSync(path.join(root, rel))) return rel;
|
|
59
|
+
}
|
|
60
|
+
return `${areaRoot}/${normalized}`;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Domain-pack merge (vault parity): kaia routes are spread across pack files, so
|
|
64
|
+
// skipping packs would silently weaken route_authority boosts.
|
|
65
|
+
function loadDomainPacks(root, areaRoot, indexDir, packRegistryRel) {
|
|
66
|
+
if (!packRegistryRel) return [];
|
|
67
|
+
const registry = readJsonSafe(path.join(root, `${areaRoot}/${indexDir}/${packRegistryRel}`), null);
|
|
68
|
+
if (!registry || !Array.isArray(registry.pack_paths)) return [];
|
|
69
|
+
const packsDir = path.posix.dirname(`${indexDir}/${packRegistryRel}`);
|
|
70
|
+
const packs = [];
|
|
71
|
+
for (const rel of registry.pack_paths) {
|
|
72
|
+
const pack = readJsonSafe(path.join(root, `${areaRoot}/${packsDir}/${rel}`), null);
|
|
73
|
+
if (pack) packs.push(pack);
|
|
74
|
+
}
|
|
75
|
+
return packs;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Search-only projection of a domain pack: keep the ranking fields, drop
|
|
79
|
+
// answer/capture/flush fields (not used by search). Pack member entities are NOT
|
|
80
|
+
// route keywords (that over-routes); aliases are the intended triggers.
|
|
81
|
+
function packToRoute(pack, fallback = {}) {
|
|
82
|
+
const aliases = Array.isArray(pack.aliases) ? pack.aliases : [];
|
|
83
|
+
const keywords = [...new Set([...(fallback.keywords ?? []), ...aliases])];
|
|
84
|
+
return {
|
|
85
|
+
...fallback,
|
|
86
|
+
id: pack.id,
|
|
87
|
+
label: pack.label ?? fallback.label,
|
|
88
|
+
ledger: pack.ledger || fallback.ledger || '',
|
|
89
|
+
keywords,
|
|
90
|
+
entity_ids: fallback.entity_ids ?? [],
|
|
91
|
+
source_priority: pack.source_priority ?? fallback.source_priority ?? [],
|
|
92
|
+
current_files: pack.current_files ?? fallback.current_files ?? [],
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Load and resolve the per-area routing intelligence into one snapshot. No
|
|
97
|
+
// module cache: the caller owns the returned object.
|
|
98
|
+
export function loadRecallIntel(root, recallCfg) {
|
|
99
|
+
const registryRel = recallCfg?.intel?.registry || '00_system/neurain/search-index-registry.json';
|
|
100
|
+
const registry = readJsonSafe(path.join(root, registryRel), { areas: {} });
|
|
101
|
+
const entities = [];
|
|
102
|
+
const routes = [];
|
|
103
|
+
const intents = [];
|
|
104
|
+
const aliases = [];
|
|
105
|
+
const relationshipPaths = [];
|
|
106
|
+
|
|
107
|
+
for (const [area, entry] of Object.entries(registry.areas || {})) {
|
|
108
|
+
if (!entry || !entry.area_root || !entry.index_dir) continue;
|
|
109
|
+
const base = `${entry.area_root}/${entry.index_dir}`;
|
|
110
|
+
const pm = entry.path_map || [];
|
|
111
|
+
const resolve = (value) => resolveAreaPath(root, value, entry.area_root, pm);
|
|
112
|
+
|
|
113
|
+
for (const e of readJsonSafe(path.join(root, `${base}/${entry.entities}`), [])) {
|
|
114
|
+
entities.push({
|
|
115
|
+
area,
|
|
116
|
+
id: e.id,
|
|
117
|
+
type: e.type,
|
|
118
|
+
canonical: e.canonical,
|
|
119
|
+
aliases: e.aliases || [],
|
|
120
|
+
source_docs: (e.source_docs || []).map(resolve),
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const baseRoutes = readJsonSafe(path.join(root, `${base}/${entry.domain_routing}`), []);
|
|
125
|
+
const byId = new Map((Array.isArray(baseRoutes) ? baseRoutes : []).map((r) => [r.id, { ...r }]));
|
|
126
|
+
for (const pack of loadDomainPacks(root, entry.area_root, entry.index_dir, entry.domain_pack_registry)) {
|
|
127
|
+
byId.set(pack.id, { ...(byId.get(pack.id) ?? {}), ...packToRoute(pack, byId.get(pack.id) ?? {}) });
|
|
128
|
+
}
|
|
129
|
+
for (const r of byId.values()) {
|
|
130
|
+
const boostPaths = [
|
|
131
|
+
...(r.boost_paths || []),
|
|
132
|
+
...(r.source_priority || []),
|
|
133
|
+
...(r.current_files || []),
|
|
134
|
+
...(r.ledger ? [r.ledger] : []),
|
|
135
|
+
].map(resolve);
|
|
136
|
+
routes.push({
|
|
137
|
+
area,
|
|
138
|
+
id: r.id,
|
|
139
|
+
label: r.label,
|
|
140
|
+
keywords: r.keywords || [],
|
|
141
|
+
entity_ids: r.entity_ids || [],
|
|
142
|
+
authority_weight: Number.isFinite(Number(r.authority_weight)) ? Number(r.authority_weight) : 50,
|
|
143
|
+
boost_paths: [...new Set(boostPaths)],
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
for (const i of readJsonSafe(path.join(root, `${base}/${entry.query_intents}`), [])) {
|
|
148
|
+
intents.push({ area, id: i.id, triggers: i.triggers || [], expansions: i.expansions || [], boost_paths: (i.boost_paths || []).map(resolve) });
|
|
149
|
+
}
|
|
150
|
+
for (const a of readJsonSafe(path.join(root, `${base}/${entry.semantic_aliases}`), [])) {
|
|
151
|
+
aliases.push({ area, id: a.id, label: a.label, triggers: a.triggers || [], expansions: a.expansions || [], boost_paths: (a.boost_paths || []).map(resolve) });
|
|
152
|
+
}
|
|
153
|
+
if (entry.relationships) relationshipPaths.push(resolve(`${entry.index_dir}/${entry.relationships}`));
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return { entities, routes, intents, aliases, relationshipPaths };
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const EMPTY_INTEL = { entities: [], routes: [], intents: [], aliases: [], relationshipPaths: [] };
|
|
160
|
+
|
|
161
|
+
export function detectEntities(query, intel = EMPTY_INTEL) {
|
|
162
|
+
const q = normalizeText(query);
|
|
163
|
+
return intel.entities.filter((e) => (e.aliases || []).some((a) => includesTermBoundary(q, a)));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export function routeDomains(query, intel = EMPTY_INTEL) {
|
|
167
|
+
const q = normalizeText(query);
|
|
168
|
+
return intel.routes.filter((r) => (r.keywords || []).some((k) => includesTermBoundary(q, k)));
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Directly-mentioned entities PLUS entities sharing a relevant domain (a domain
|
|
172
|
+
// is relevant if the query routes to it by keyword or it groups a detected
|
|
173
|
+
// entity). The per-area domain entity_ids encode those relationships, so this
|
|
174
|
+
// stays generic instead of hardcoding any one area's graph.
|
|
175
|
+
export function relevantEntities(query, intel = EMPTY_INTEL) {
|
|
176
|
+
const direct = detectEntities(query, intel);
|
|
177
|
+
const directIds = new Set(direct.map((e) => e.id));
|
|
178
|
+
const byId = new Map(intel.entities.map((e) => [e.id, e]));
|
|
179
|
+
const relevantDomainIds = new Set(routeDomains(query, intel).map((r) => r.id));
|
|
180
|
+
for (const r of intel.routes) {
|
|
181
|
+
if ((r.entity_ids || []).some((id) => directIds.has(id))) relevantDomainIds.add(r.id);
|
|
182
|
+
}
|
|
183
|
+
const out = new Map(direct.map((e) => [e.id, e]));
|
|
184
|
+
for (const r of intel.routes) {
|
|
185
|
+
if (!relevantDomainIds.has(r.id)) continue;
|
|
186
|
+
for (const id of r.entity_ids || []) {
|
|
187
|
+
const e = byId.get(id);
|
|
188
|
+
if (e && !out.has(id)) out.set(id, e);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return [...out.values()];
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export function expandIntelQuery(query, intel = EMPTY_INTEL) {
|
|
195
|
+
const q = normalizeText(query);
|
|
196
|
+
const out = [];
|
|
197
|
+
for (const i of intel.intents) if ((i.triggers || []).some((t) => includesTermBoundary(q, t))) out.push(...i.expansions);
|
|
198
|
+
for (const a of intel.aliases) if ((a.triggers || []).some((t) => includesTermBoundary(q, t))) out.push(...a.expansions);
|
|
199
|
+
return [...new Set(out)];
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
export function intelBoostPathsForQuery(query, intel = EMPTY_INTEL) {
|
|
203
|
+
const q = normalizeText(query);
|
|
204
|
+
const out = [];
|
|
205
|
+
for (const i of intel.intents) {
|
|
206
|
+
if ((i.triggers || []).some((t) => includesTermBoundary(q, t))) out.push(...(i.boost_paths || []));
|
|
207
|
+
}
|
|
208
|
+
for (const a of intel.aliases) {
|
|
209
|
+
if ((a.triggers || []).some((t) => includesTermBoundary(q, t))) out.push(...(a.boost_paths || []));
|
|
210
|
+
}
|
|
211
|
+
return [...new Set(out)];
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Vault-wide alias map (default 00_system/neurain/search-aliases.json): Korean /
|
|
215
|
+
// nickname triggers that expand to English-canonical terms. Returned as a plain
|
|
216
|
+
// map; expansion is applied by the lexical ranker.
|
|
217
|
+
export function loadAliasMap(root, recallCfg) {
|
|
218
|
+
const aliasesRel = recallCfg?.intel?.aliases || '00_system/neurain/search-aliases.json';
|
|
219
|
+
const map = readJsonSafe(path.join(root, aliasesRel), { aliases: {} });
|
|
220
|
+
return map && typeof map.aliases === 'object' ? map.aliases : {};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export function expandAliases(aliasMap, queryLower) {
|
|
224
|
+
const out = [];
|
|
225
|
+
for (const [alias, expansions] of Object.entries(aliasMap || {})) {
|
|
226
|
+
if (queryLower.includes(String(alias).toLowerCase())) {
|
|
227
|
+
for (const expansion of expansions || []) {
|
|
228
|
+
for (const token of String(expansion).toLowerCase().split(/\s+/).filter(Boolean)) out.push(token);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
return out;
|
|
233
|
+
}
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
// Routed lexical ranker for recall (W-A). A faithful, engine-idiom port of the
|
|
2
|
+
// vault's search-core scoring: substring-based BM25 over raw file text plus the
|
|
3
|
+
// additive structural boosts (exact match, slug-in-path, phrase, heading), layer
|
|
4
|
+
// boosts (wiki_core, session_handoff, area_index, queue/raw_inbox), and the
|
|
5
|
+
// routing/memory boosts (entity authority, domain route authority, entity
|
|
6
|
+
// overlap, relationship, fact-ledger fusion), then a per-layer diversity cap.
|
|
7
|
+
// The BM25 constants and every boost weight are kept IDENTICAL to the vault
|
|
8
|
+
// because the parity gate measures the formula, not the file layout; the
|
|
9
|
+
// engine-style part is structural (pure functions over an explicit context, no
|
|
10
|
+
// module globals, config-injected directory names, the shared corpus selector).
|
|
11
|
+
//
|
|
12
|
+
// Scores RAW file text, not the whitespace-normalized recall-doc bodies, because
|
|
13
|
+
// headText/phrase/length all depend on the original line structure. Terms are
|
|
14
|
+
// derived by whitespace split with punctuation retained (vault parity), NOT the
|
|
15
|
+
// semantic tokenizer.
|
|
16
|
+
import path from 'node:path';
|
|
17
|
+
import { listRecallMarkdownFiles, reEsc } from './recall_corpus.mjs';
|
|
18
|
+
import { readText } from './fs.mjs';
|
|
19
|
+
import {
|
|
20
|
+
detectEntities,
|
|
21
|
+
expandAliases,
|
|
22
|
+
expandIntelQuery,
|
|
23
|
+
includesTermBoundary,
|
|
24
|
+
intelBoostPathsForQuery,
|
|
25
|
+
loadAliasMap,
|
|
26
|
+
loadRecallIntel,
|
|
27
|
+
relevantEntities,
|
|
28
|
+
routeDomains,
|
|
29
|
+
} from './recall_intel.mjs';
|
|
30
|
+
import { factsFor, loadFactIntel } from './recall_facts.mjs';
|
|
31
|
+
|
|
32
|
+
const sourceIdPattern = /\braw-\d{8}-(?:\d{3}|dryrun)\b/i;
|
|
33
|
+
// BM25 content weight relative to the additive structural boosts (vault parity).
|
|
34
|
+
const BM25_WEIGHT = 4;
|
|
35
|
+
const BM25_K1 = 1.5;
|
|
36
|
+
const BM25_B = 0.75;
|
|
37
|
+
|
|
38
|
+
function dirsFromConfig(recallCfg) {
|
|
39
|
+
const systemDir = recallCfg.system_dir || '00_system';
|
|
40
|
+
return {
|
|
41
|
+
areasDir: recallCfg.areas_dir || '10_areas',
|
|
42
|
+
wikiDir: recallCfg.wiki_dir || 'wiki',
|
|
43
|
+
systemDir,
|
|
44
|
+
hubsDir: recallCfg.hubs_dir || '20_hubs',
|
|
45
|
+
handoffDir: `${systemDir}/sessions/handoffs`,
|
|
46
|
+
rawInbox: `${recallCfg.raw_dir || 'raw'}/_inbox`,
|
|
47
|
+
queueRel: recallCfg.intel?.queue || '00_system/neurain/writeback-queue.jsonl',
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Precompiled layer classifier. Per-area buckets (area:<name>) so the diversity
|
|
52
|
+
// cap applies within each area, letting several areas surface instead of one
|
|
53
|
+
// crowding the rest out (vault parity).
|
|
54
|
+
function makeLayerClassifier(dirs) {
|
|
55
|
+
const wikiCore = new RegExp(`^${reEsc(dirs.wikiDir)}/(concepts|source-summaries|entities|decisions)/`);
|
|
56
|
+
const areaIndex = new RegExp(`^${reEsc(dirs.areasDir)}/[^/]+/(?:_area|index|sources_map|log)\\.md$`);
|
|
57
|
+
const areaBucket = new RegExp(`^${reEsc(dirs.areasDir)}/([^/]+)/`);
|
|
58
|
+
return (rel) => {
|
|
59
|
+
if (rel === dirs.queueRel) return 'queue_metadata';
|
|
60
|
+
if (wikiCore.test(rel)) return 'wiki_core';
|
|
61
|
+
if (rel.startsWith(`${dirs.handoffDir}/`)) return 'session_handoff';
|
|
62
|
+
if (rel === `${dirs.systemDir}/area-registry.md`) return 'area_index';
|
|
63
|
+
if (areaIndex.test(rel)) return 'area_index';
|
|
64
|
+
if (rel.startsWith(`${dirs.rawInbox}/`)) return 'raw_inbox';
|
|
65
|
+
if (rel.startsWith(`${dirs.wikiDir}/`)) return 'wiki_other';
|
|
66
|
+
if (rel.startsWith(`${dirs.systemDir}/`)) return 'system';
|
|
67
|
+
const match = rel.match(areaBucket);
|
|
68
|
+
if (match) return `area:${match[1]}`;
|
|
69
|
+
return 'root';
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function capLayerResults(items, limit, queryLooksLikeSourceId) {
|
|
74
|
+
if (!Number.isFinite(limit) || limit <= 0) return items;
|
|
75
|
+
const counts = new Map();
|
|
76
|
+
const capped = [];
|
|
77
|
+
for (const item of items) {
|
|
78
|
+
const count = counts.get(item.layer) || 0;
|
|
79
|
+
const sourceIdExempt = queryLooksLikeSourceId && ['queue_metadata', 'raw_inbox'].includes(item.layer);
|
|
80
|
+
if (item.exact_match || item.authoritative || sourceIdExempt || count < limit) {
|
|
81
|
+
capped.push(item);
|
|
82
|
+
counts.set(item.layer, count + 1);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return capped;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function headText(text) {
|
|
89
|
+
const parts = [];
|
|
90
|
+
const lines = String(text || '').split(/\r?\n/);
|
|
91
|
+
for (const line of lines.slice(0, 14)) {
|
|
92
|
+
const titleMatch = line.match(/^title:\s*(.+)$/i);
|
|
93
|
+
if (titleMatch) parts.push(titleMatch[1].replace(/['"]/g, ''));
|
|
94
|
+
if (/^#{1,2}\s+/.test(line)) {
|
|
95
|
+
parts.push(line.replace(/^#{1,2}\s+/, ''));
|
|
96
|
+
break;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return parts.join(' ').toLowerCase();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function slugish(value) {
|
|
103
|
+
return String(value || '')
|
|
104
|
+
.toLowerCase()
|
|
105
|
+
.replace(/['"]/g, '')
|
|
106
|
+
.replace(/[^a-z0-9가-힣]+/gi, '-')
|
|
107
|
+
.replace(/^-+|-+$/g, '');
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Build the QUERY-INDEPENDENT lexical context once (corpus walk + raw reads +
|
|
111
|
+
// intel/facts/alias snapshots + the held-aside queue doc), reused across many
|
|
112
|
+
// queries. intel/facts/aliasMap can be injected (tests); otherwise loaded from
|
|
113
|
+
// the registry, degrading to empty when files are absent.
|
|
114
|
+
export function buildLexicalContext(root, { area = '', recallCfg, intel, facts, aliasMap } = {}) {
|
|
115
|
+
if (!recallCfg) throw new Error('buildLexicalContext requires recallCfg');
|
|
116
|
+
const dirs = dirsFromConfig(recallCfg);
|
|
117
|
+
const classify = makeLayerClassifier(dirs);
|
|
118
|
+
const files = listRecallMarkdownFiles(root, recallCfg, { area });
|
|
119
|
+
const baseDocs = files.map(({ rel, text }) => ({
|
|
120
|
+
text,
|
|
121
|
+
lower: text.toLowerCase(),
|
|
122
|
+
relPath: rel,
|
|
123
|
+
layer: classify(rel),
|
|
124
|
+
length: text.length,
|
|
125
|
+
}));
|
|
126
|
+
const queueText = readText(path.join(root, dirs.queueRel), '');
|
|
127
|
+
const queueDoc = {
|
|
128
|
+
text: queueText,
|
|
129
|
+
lower: queueText.toLowerCase(),
|
|
130
|
+
relPath: dirs.queueRel,
|
|
131
|
+
layer: 'queue_metadata',
|
|
132
|
+
length: queueText.length,
|
|
133
|
+
};
|
|
134
|
+
return {
|
|
135
|
+
root,
|
|
136
|
+
area,
|
|
137
|
+
recallCfg,
|
|
138
|
+
dirs,
|
|
139
|
+
baseDocs,
|
|
140
|
+
queueDoc,
|
|
141
|
+
intel: intel || loadRecallIntel(root, recallCfg),
|
|
142
|
+
facts: facts || loadFactIntel(root, recallCfg),
|
|
143
|
+
aliasMap: aliasMap || loadAliasMap(root, recallCfg),
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Run ONE query against a prebuilt lexical context. Returns
|
|
148
|
+
// { query, top, results } where each result carries score, layer, signals,
|
|
149
|
+
// matched_entities, exact_match, authoritative. Pure over ctx.
|
|
150
|
+
export function lexicalSearchWithContext(ctx, query, { top = 10, maxPerLayer = 3, includeQueue: forceQueue = false } = {}) {
|
|
151
|
+
const queryLooksLikeSourceId = sourceIdPattern.test(String(query));
|
|
152
|
+
const includeQueue = Boolean(forceQueue) || queryLooksLikeSourceId || /\bqueue\b|writeback/i.test(String(query));
|
|
153
|
+
|
|
154
|
+
const terms = String(query).toLowerCase().split(/\s+/).filter(Boolean);
|
|
155
|
+
const aliasTerms = expandAliases(ctx.aliasMap, String(query).toLowerCase());
|
|
156
|
+
const intel = ctx.intel;
|
|
157
|
+
const queryEntities = detectEntities(query, intel);
|
|
158
|
+
const queryDomains = routeDomains(query, intel);
|
|
159
|
+
const relEntities = relevantEntities(query, intel);
|
|
160
|
+
const intelTerms = expandIntelQuery(query, intel)
|
|
161
|
+
.flatMap((s) => String(s).toLowerCase().split(/\s+/))
|
|
162
|
+
.filter(Boolean);
|
|
163
|
+
const entityBoostPaths = new Set(relEntities.flatMap((e) => e.source_docs));
|
|
164
|
+
const routeBoostWeights = new Map();
|
|
165
|
+
const addRouteBoost = (p, weight = 50) => routeBoostWeights.set(p, (routeBoostWeights.get(p) || 0) + weight);
|
|
166
|
+
for (const d of queryDomains) for (const p of d.boost_paths) addRouteBoost(p, d.authority_weight);
|
|
167
|
+
for (const p of intelBoostPathsForQuery(query, intel)) addRouteBoost(p, 50);
|
|
168
|
+
const intelBoostPaths = new Set([...entityBoostPaths, ...routeBoostWeights.keys()]);
|
|
169
|
+
const intelRelationshipPaths = new Set(intel.relationshipPaths);
|
|
170
|
+
const queryIsFollowup = /follow|owner|담당|누가|들고|액션|action|next|task|태스크|follow-up|후속/i.test(String(query));
|
|
171
|
+
const memBoostPaths = new Set(
|
|
172
|
+
queryEntities.length || queryDomains.length
|
|
173
|
+
? factsFor(query, ctx.facts, { top: 40 }).flatMap((f) => [...(f.sources_resolved || []), f.file]).filter(Boolean)
|
|
174
|
+
: []
|
|
175
|
+
);
|
|
176
|
+
const searchTerms = [...new Set([...terms, ...aliasTerms, ...intelTerms])];
|
|
177
|
+
|
|
178
|
+
const docs = includeQueue ? [...ctx.baseDocs, ctx.queueDoc] : ctx.baseDocs;
|
|
179
|
+
const docCount = docs.length || 1;
|
|
180
|
+
const avgLength = docs.reduce((sum, doc) => sum + doc.length, 0) / docCount || 1;
|
|
181
|
+
const idf = {};
|
|
182
|
+
for (const term of searchTerms) {
|
|
183
|
+
const docFrequency = docs.reduce((count, doc) => count + (doc.lower.includes(term) ? 1 : 0), 0);
|
|
184
|
+
idf[term] = Math.log(1 + (docCount - docFrequency + 0.5) / (docFrequency + 0.5));
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function scoreDoc(doc) {
|
|
188
|
+
const { lower, text, relPath, layer, length } = doc;
|
|
189
|
+
const lowerPath = relPath.toLowerCase();
|
|
190
|
+
const queryLower = String(query).toLowerCase();
|
|
191
|
+
const querySlug = slugish(queryLower);
|
|
192
|
+
|
|
193
|
+
let bm25 = 0;
|
|
194
|
+
for (const term of searchTerms) {
|
|
195
|
+
const tf = lower.split(term).length - 1;
|
|
196
|
+
if (tf === 0) continue;
|
|
197
|
+
const denom = tf + BM25_K1 * (1 - BM25_B + (BM25_B * length) / avgLength);
|
|
198
|
+
bm25 += (idf[term] || 0) * ((tf * (BM25_K1 + 1)) / denom);
|
|
199
|
+
}
|
|
200
|
+
let score = bm25 * BM25_WEIGHT;
|
|
201
|
+
const signals = {};
|
|
202
|
+
if (bm25 > 0) signals.bm25 = Math.round(bm25 * BM25_WEIGHT * 100) / 100;
|
|
203
|
+
|
|
204
|
+
if (lower.includes(queryLower)) { score += 5; signals.phrase = 5; }
|
|
205
|
+
|
|
206
|
+
const head = headText(text);
|
|
207
|
+
let headingBonus = 0;
|
|
208
|
+
for (const term of searchTerms) {
|
|
209
|
+
if (head.includes(term)) { score += 6; headingBonus += 6; }
|
|
210
|
+
}
|
|
211
|
+
if (headingBonus) signals.heading = headingBonus;
|
|
212
|
+
|
|
213
|
+
const aliasExact = aliasTerms.some(
|
|
214
|
+
(a) => lowerPath === a || lowerPath.endsWith(`/${a}.md`) || lower.includes(`title: ${a}`) || lower.includes(`# ${a}`)
|
|
215
|
+
);
|
|
216
|
+
const exactMatch =
|
|
217
|
+
aliasExact ||
|
|
218
|
+
(querySlug && lowerPath === querySlug) ||
|
|
219
|
+
(querySlug && lowerPath.endsWith(`/${querySlug}.md`)) ||
|
|
220
|
+
lower.includes(`title: ${queryLower}`) ||
|
|
221
|
+
lower.includes(`# ${queryLower}`);
|
|
222
|
+
if (querySlug && lowerPath.includes(querySlug)) { score += 25; signals.slug_path = 25; }
|
|
223
|
+
else if (aliasTerms.some((a) => lowerPath.includes(a))) { score += 25; signals.slug_path = 25; }
|
|
224
|
+
if (exactMatch) { score += 30; signals.exact_match = 30; }
|
|
225
|
+
|
|
226
|
+
const matched = score > 0;
|
|
227
|
+
if (matched && layer === 'wiki_core') { score += 18; signals.layer = 18; }
|
|
228
|
+
if (matched && layer === 'session_handoff') { score += 10; signals.layer = 10; }
|
|
229
|
+
if (matched && layer === 'area_index') { score += 6; signals.layer = 6; }
|
|
230
|
+
if (matched && layer === 'queue_metadata') { const b = queryLooksLikeSourceId ? 30 : 1; score += b; signals.layer = b; }
|
|
231
|
+
if (matched && layer === 'raw_inbox') { const b = queryLooksLikeSourceId ? 18 : 4; score += b; signals.layer = b; }
|
|
232
|
+
if (matched && entityBoostPaths.has(relPath)) { score += 15; signals.entity_authority = 15; }
|
|
233
|
+
if (matched && routeBoostWeights.has(relPath)) {
|
|
234
|
+
const b = routeBoostWeights.get(relPath);
|
|
235
|
+
score += b;
|
|
236
|
+
signals.route_authority = b;
|
|
237
|
+
}
|
|
238
|
+
if (matched && queryEntities.length) {
|
|
239
|
+
const overlap = queryEntities.reduce(
|
|
240
|
+
(n, e) => n + ((e.aliases || []).some((a) => includesTermBoundary(lower, a)) ? 1 : 0),
|
|
241
|
+
0
|
|
242
|
+
);
|
|
243
|
+
if (overlap) { score += overlap * 10; signals.entity_overlap = overlap * 10; }
|
|
244
|
+
}
|
|
245
|
+
if (matched && intelRelationshipPaths.has(relPath)) { const b = queryIsFollowup ? 42 : 16; score += b; signals.relationship = b; }
|
|
246
|
+
if (matched && memBoostPaths.has(relPath)) { score += 18; signals.memory_fusion = 18; }
|
|
247
|
+
|
|
248
|
+
const line = text.split(/\r?\n/).find((candidate) => searchTerms.some((term) => candidate.toLowerCase().includes(term)));
|
|
249
|
+
return {
|
|
250
|
+
path: relPath,
|
|
251
|
+
layer,
|
|
252
|
+
score: Math.round(score * 100) / 100,
|
|
253
|
+
exact_match: exactMatch,
|
|
254
|
+
matched_entities: relEntities.map((e) => e.canonical),
|
|
255
|
+
authoritative: intelBoostPaths.has(relPath) || memBoostPaths.has(relPath),
|
|
256
|
+
snippet: line ? line.trim().slice(0, 180) : '',
|
|
257
|
+
signals,
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const ranked = docs
|
|
262
|
+
.map(scoreDoc)
|
|
263
|
+
.map((item) => (/^outputs?\//.test(item.path) ? { ...item, score: Math.round(item.score * 0.55 * 100) / 100 } : item))
|
|
264
|
+
.filter((item) => item.score > 0)
|
|
265
|
+
.sort((a, b) => b.score - a.score || a.path.localeCompare(b.path));
|
|
266
|
+
|
|
267
|
+
const results = capLayerResults(ranked, maxPerLayer, queryLooksLikeSourceId).slice(0, top);
|
|
268
|
+
return { query: String(query), top, results };
|
|
269
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { absPath, relPath, safeResolve, walkFiles } from './fs.mjs';
|
|
4
|
+
import { lessonCandidates } from './lessons.mjs';
|
|
5
|
+
import { redactedPreview } from './safety.mjs';
|
|
6
|
+
|
|
7
|
+
export async function recapCommand(args) {
|
|
8
|
+
const root = absPath(args._[0] || args.root || process.cwd());
|
|
9
|
+
const recap = buildRecap(root, { area: args.area || '' });
|
|
10
|
+
if (args.json) return { json: true, payload: recap };
|
|
11
|
+
return {
|
|
12
|
+
text: [
|
|
13
|
+
'# Neurain recap',
|
|
14
|
+
'',
|
|
15
|
+
`- Root: ${root}`,
|
|
16
|
+
`- Sources checked: ${recap.sources_checked.length}`,
|
|
17
|
+
`- Lesson candidates: ${recap.lesson_candidate_count}`,
|
|
18
|
+
'',
|
|
19
|
+
'## Recent signals',
|
|
20
|
+
...(recap.recent_signals.length ? recap.recent_signals.map((line) => `- ${line}`) : ['- none']),
|
|
21
|
+
'',
|
|
22
|
+
'## Open work hints',
|
|
23
|
+
...(recap.open_work_hints.length ? recap.open_work_hints.map((line) => `- ${line}`) : ['- none']),
|
|
24
|
+
].join('\n'),
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function buildRecap(root, { area = '' } = {}) {
|
|
29
|
+
const sources = recapSources(root, { area });
|
|
30
|
+
const recentSignals = [];
|
|
31
|
+
const openWorkHints = [];
|
|
32
|
+
for (const source of sources) {
|
|
33
|
+
const text = readTail(source.abs);
|
|
34
|
+
for (const line of text.split(/\r?\n/).map((item) => item.trim()).filter(Boolean).slice(-24)) {
|
|
35
|
+
const preview = redactedPreview(line, 180);
|
|
36
|
+
if (/(done|complete|pass|updated|added|merged|완료|통과|업데이트|병합)/i.test(line)) recentSignals.push(`${source.rel}: ${preview.text}`);
|
|
37
|
+
if (/(todo|pending|next|open|blocked|needs|남은|대기|필요)/i.test(line)) openWorkHints.push(`${source.rel}: ${preview.text}`);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
const candidates = lessonCandidates(root, { limit: 5, area });
|
|
41
|
+
return {
|
|
42
|
+
ok: true,
|
|
43
|
+
command: 'recap',
|
|
44
|
+
root,
|
|
45
|
+
durable_write: false,
|
|
46
|
+
sources_checked: sources.map((source) => source.rel),
|
|
47
|
+
recent_signals: unique(recentSignals).slice(0, 8),
|
|
48
|
+
open_work_hints: unique(openWorkHints).slice(0, 8),
|
|
49
|
+
lesson_candidate_count: candidates.length,
|
|
50
|
+
lesson_candidate_ids: candidates.map((candidate) => candidate.id),
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function recapSources(root, { area = '' } = {}) {
|
|
55
|
+
const fixed = ['log.md', 'wiki/log.md']
|
|
56
|
+
.map((rel) => ({ rel, abs: safeResolve(root, rel) }))
|
|
57
|
+
.filter((source) => fs.existsSync(source.abs));
|
|
58
|
+
const normalizedArea = String(area || '').replace(/^_+/, '').trim();
|
|
59
|
+
if (!normalizedArea) return fixed;
|
|
60
|
+
const areaPrefix = `10_areas/_${normalizedArea}/`;
|
|
61
|
+
const briefs = walkFiles(root, { includeRaw: false, maxFiles: 20000 })
|
|
62
|
+
.map((abs) => ({ abs, rel: relPath(root, abs) }))
|
|
63
|
+
.filter((source) => source.rel.startsWith(areaPrefix) && /(^|\/)current\/.+brief\.md$/i.test(source.rel));
|
|
64
|
+
return [...fixed, ...briefs].slice(0, 30);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function readTail(file) {
|
|
68
|
+
try {
|
|
69
|
+
const text = fs.readFileSync(file, 'utf8');
|
|
70
|
+
return text.split(/\r?\n/).slice(-80).join('\n');
|
|
71
|
+
} catch {
|
|
72
|
+
return '';
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function unique(values) {
|
|
77
|
+
return [...new Set(values)];
|
|
78
|
+
}
|