neurain 0.1.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/LICENSE +57 -0
- package/README.md +205 -0
- package/SECURITY.md +22 -0
- package/bin/neurain.mjs +7 -0
- package/docs/comparison-mem0.en.md +22 -0
- package/docs/connect-claude.en.md +48 -0
- package/docs/connect-claude.kr.md +51 -0
- package/docs/connect-codex.en.md +38 -0
- package/docs/connect-codex.kr.md +40 -0
- package/docs/connect-gemini.en.md +71 -0
- package/docs/connect-gemini.kr.md +71 -0
- package/docs/connect-runtime.en.md +61 -0
- package/docs/connect-runtime.kr.md +61 -0
- package/docs/development-status.en.md +157 -0
- package/docs/development-status.kr.md +157 -0
- package/docs/knowledge-os.en.md +105 -0
- package/docs/knowledge-os.kr.md +106 -0
- package/docs/pricing.en.md +14 -0
- package/docs/privacy-and-data-flow.en.md +25 -0
- package/docs/public-saas-readiness.en.md +39 -0
- package/docs/quickstart.en.md +64 -0
- package/docs/quickstart.kr.md +64 -0
- package/docs/release-checklist.en.md +38 -0
- package/docs/safety.en.md +36 -0
- package/docs/self-improvement-90-roadmap.en.md +429 -0
- package/docs/self-improvement-90-roadmap.kr.md +429 -0
- package/docs/self-improving-workflows.en.md +163 -0
- package/docs/self-improving-workflows.kr.md +163 -0
- package/docs/support.en.md +17 -0
- package/docs/troubleshooting.en.md +35 -0
- package/package.json +36 -0
- package/src/cli.mjs +261 -0
- package/src/core/adopt.mjs +304 -0
- package/src/core/answer_eval.mjs +450 -0
- package/src/core/capabilities.mjs +217 -0
- package/src/core/capture_durable.mjs +181 -0
- package/src/core/classify.mjs +237 -0
- package/src/core/compile_desk.mjs +324 -0
- package/src/core/complete.mjs +108 -0
- package/src/core/config.mjs +142 -0
- package/src/core/connect.mjs +355 -0
- package/src/core/curator.mjs +351 -0
- package/src/core/daemon.mjs +536 -0
- package/src/core/digest.mjs +155 -0
- package/src/core/doctor.mjs +115 -0
- package/src/core/durable.mjs +96 -0
- package/src/core/envelope.mjs +97 -0
- package/src/core/flush.mjs +190 -0
- package/src/core/fs.mjs +121 -0
- package/src/core/init.mjs +194 -0
- package/src/core/journal.mjs +269 -0
- package/src/core/labels.mjs +117 -0
- package/src/core/lessons.mjs +793 -0
- package/src/core/lifecycle.mjs +1138 -0
- package/src/core/link_check.mjs +180 -0
- package/src/core/live_cases.mjs +221 -0
- package/src/core/onboard.mjs +175 -0
- package/src/core/plan_receipt.mjs +177 -0
- package/src/core/plan_writeback.mjs +176 -0
- package/src/core/queue.mjs +62 -0
- package/src/core/queue_archive.mjs +87 -0
- package/src/core/queue_model.mjs +161 -0
- package/src/core/queue_write.mjs +28 -0
- package/src/core/recall.mjs +1802 -0
- package/src/core/recall_bench.mjs +275 -0
- package/src/core/recall_corpus.mjs +152 -0
- package/src/core/recall_facts.mjs +233 -0
- package/src/core/recall_intel.mjs +233 -0
- package/src/core/recall_lexical.mjs +269 -0
- package/src/core/recap.mjs +78 -0
- package/src/core/review_queue.mjs +131 -0
- package/src/core/review_worker.mjs +284 -0
- package/src/core/route.mjs +73 -0
- package/src/core/safety.mjs +57 -0
- package/src/core/scheduler.mjs +697 -0
- package/src/core/search.mjs +54 -0
- package/src/core/secret_scan.mjs +143 -0
- package/src/core/semantic.mjs +187 -0
- package/src/core/source_digest.mjs +56 -0
- package/src/core/source_digest_gen.mjs +311 -0
- package/src/core/stage.mjs +105 -0
- package/src/core/status.mjs +175 -0
- package/src/core/vault_state.mjs +115 -0
- package/src/core/watch.mjs +282 -0
- package/src/core/wiki_log.mjs +29 -0
- package/src/core/wrap.mjs +62 -0
- package/src/mcp/server.mjs +865 -0
- package/templates/starter-vault/README.md +9 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
// `capture` command (W-B, B4, durable). Captures text into the vault as an
|
|
2
|
+
// append-only raw source: renders raw markdown (routed THROUGH the stage secret
|
|
3
|
+
// gate), writes the _inbox envelope, and appends one pending writeback-queue row
|
|
4
|
+
// — all inside a single capture critical section (one lock on the queue) so
|
|
5
|
+
// concurrent captures cannot collide on a source id or interleave the append.
|
|
6
|
+
//
|
|
7
|
+
// W-D BOUNDARY: the engine NEVER writes session-state.json. It returns a
|
|
8
|
+
// session_state_delta the future vault shuttle applies; pending_count there is
|
|
9
|
+
// ADVISORY (the shuttle must recompute at apply time). See the B4 cross-review.
|
|
10
|
+
//
|
|
11
|
+
// SCOPE (B4 increment): text input only. File capture (asset copy + text
|
|
12
|
+
// extraction), overlap-candidate detection, and numeric-conflict flagging are NOT
|
|
13
|
+
// ported yet; until they are, engine capture is for porting/shadow use and is the
|
|
14
|
+
// documented FLIP GATE for `capture` (a real capture that would overlap an
|
|
15
|
+
// existing summary or carry a file must keep using the vault tool).
|
|
16
|
+
import fs from 'node:fs';
|
|
17
|
+
import path from 'node:path';
|
|
18
|
+
import { absPath, ensureDir, timestamp } from './fs.mjs';
|
|
19
|
+
import { vaultConfig } from './config.mjs';
|
|
20
|
+
import { firstLineTitle, inferFlushLevelFromEnvelope, inferTargetLayerFromIntent } from './classify.mjs';
|
|
21
|
+
import { folderForSourceType, makeEnvelope, readArgInput, renderCaptureMarkdown, slugify } from './envelope.mjs';
|
|
22
|
+
import { withFileLock, atomicWriteJson } from './durable.mjs';
|
|
23
|
+
import { stageAndPromote } from './stage.mjs';
|
|
24
|
+
import { loadSessionState, pendingCountForSession } from './vault_state.mjs';
|
|
25
|
+
import { appendWikiLog } from './wiki_log.mjs';
|
|
26
|
+
|
|
27
|
+
function handoffPathFor(vaultCfg, sessionId, session) {
|
|
28
|
+
return (session && session.handoff_path) || `${vaultCfg.session_handoff_dir}/${sessionId}.md`;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// max+1 over today's existing inbox envelopes (not count+1, so a deleted id is
|
|
32
|
+
// never reused while a later one is live). Called inside the capture lock.
|
|
33
|
+
function nextSourceId(root, vaultCfg) {
|
|
34
|
+
const day = timestamp().slice(0, 10).replace(/-/g, '');
|
|
35
|
+
const inboxAbs = path.join(root, vaultCfg.raw_inbox_dir);
|
|
36
|
+
ensureDir(inboxAbs);
|
|
37
|
+
const prefix = `raw-${day}-`;
|
|
38
|
+
const nums = fs.readdirSync(inboxAbs)
|
|
39
|
+
.filter((name) => name.startsWith(prefix) && name.endsWith('.json'))
|
|
40
|
+
.map((name) => parseInt(name.slice(prefix.length), 10))
|
|
41
|
+
.filter((n) => Number.isFinite(n));
|
|
42
|
+
return `${prefix}${String((nums.length ? Math.max(...nums) : 0) + 1).padStart(3, '0')}`;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function buildQueueItem(envelope, { sourceId, rawPath, title, session, sessionId, flushLevel, targetLayer, targetPath, conflictPolicy, handoffPath }) {
|
|
46
|
+
return {
|
|
47
|
+
queued_at: envelope.captured_at,
|
|
48
|
+
status: 'pending',
|
|
49
|
+
source_id: sourceId,
|
|
50
|
+
raw_path: rawPath,
|
|
51
|
+
title,
|
|
52
|
+
sensitivity: envelope.sensitivity,
|
|
53
|
+
write_intent: envelope.write_intent,
|
|
54
|
+
area_candidates: envelope.area_candidates,
|
|
55
|
+
requires_user_decision: envelope.requires_user_decision,
|
|
56
|
+
...(session
|
|
57
|
+
? { session_id: sessionId, scope: session.scope || '', flush_level: flushLevel, target_layer: targetLayer, target_path: targetPath, conflict_policy: conflictPolicy, handoff_path: handoffPath }
|
|
58
|
+
: {}),
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export async function captureCommand(args) {
|
|
63
|
+
const root = absPath(args._[0] || args.root || process.cwd());
|
|
64
|
+
const vaultCfg = vaultConfig(root);
|
|
65
|
+
|
|
66
|
+
if (args['source-file'] || args.source) {
|
|
67
|
+
process.exitCode = 1;
|
|
68
|
+
return done(args, { ok: false, command: 'capture', durable_write: false, error: 'File capture is not ported to the engine yet (flip gate: asset copy + text extraction + overlap/numeric detection). Use the vault capture tool for files.' });
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const sessionId = String(args['session-id'] || '');
|
|
72
|
+
let session = null;
|
|
73
|
+
if (sessionId) {
|
|
74
|
+
let state = { sessions: {} };
|
|
75
|
+
try { state = loadSessionState(root, vaultCfg); } catch (error) {
|
|
76
|
+
return done(args, { ok: false, command: 'capture', durable_write: false, error: error.message });
|
|
77
|
+
}
|
|
78
|
+
session = (state.sessions || {})[sessionId];
|
|
79
|
+
if (!session) return done(args, { ok: false, command: 'capture', durable_write: false, error: `unknown session "${sessionId}"` });
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const text = readArgInput(args, root);
|
|
83
|
+
if (!text.trim()) return done(args, { ok: false, command: 'capture', durable_write: false, error: 'No input provided. Pass --text, positional text, or --file.' });
|
|
84
|
+
|
|
85
|
+
const sourceType = String(args.type || 'memo').toLowerCase();
|
|
86
|
+
const title = args.title || firstLineTitle(text, 'Untitled Capture');
|
|
87
|
+
const routeText = [title, session?.scope || '', args.notes || '', text].filter(Boolean).join('\n');
|
|
88
|
+
const folder = folderForSourceType(sourceType);
|
|
89
|
+
const [year, month] = timestamp().slice(0, 10).split('-');
|
|
90
|
+
const overrides = { area: args.area || (session && session.area), sensitivity: args.sensitivity, intent: args.intent };
|
|
91
|
+
const allowSecret = Boolean(args['allow-secret']);
|
|
92
|
+
|
|
93
|
+
const buildEnvelope = (sourceId, status) => {
|
|
94
|
+
const e = makeEnvelope(root, { sourceId, sourceType, title, text: routeText, status, overrides }, { vaultCfg });
|
|
95
|
+
e.raw_path = path.join(vaultCfg.raw_dir, folder, year, month, `${sourceId}-${slugify(title)}.md`).split(path.sep).join('/');
|
|
96
|
+
if (session) { e.session_id = sessionId; e.session_scope = session.scope || ''; e.handoff_path = handoffPathFor(vaultCfg, sessionId, session); }
|
|
97
|
+
return e;
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
if (args['dry-run']) {
|
|
101
|
+
const envelope = buildEnvelope(`raw-${timestamp().slice(0, 10).replace(/-/g, '')}-DRYRUN`, 'planned');
|
|
102
|
+
const flushLevel = args['flush-level'] || inferFlushLevelFromEnvelope(envelope);
|
|
103
|
+
const targetLayer = args['target-layer'] || inferTargetLayerFromIntent(envelope.write_intent);
|
|
104
|
+
const queueItem = buildQueueItem(envelope, { sourceId: envelope.source_id, rawPath: envelope.raw_path, title, session, sessionId, flushLevel, targetLayer, targetPath: args['target-path'] || '', conflictPolicy: args['conflict-policy'] || 'queue_first', handoffPath: handoffPathFor(vaultCfg, sessionId, session) });
|
|
105
|
+
return done(args, {
|
|
106
|
+
ok: true, command: 'capture', durable_write: false, dry_run: true,
|
|
107
|
+
envelope, queue_item: queueItem,
|
|
108
|
+
would_write: [envelope.raw_path, `${vaultCfg.raw_inbox_dir}/${envelope.source_id}.json`, vaultCfg.writeback_queue, `${vaultCfg.wiki_dir}/log.md`],
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Single capture critical section: id allocation + raw promote + envelope +
|
|
113
|
+
// queue append are serialized on the queue lock so two captures cannot collide.
|
|
114
|
+
const queueAbs = path.join(root, vaultCfg.writeback_queue);
|
|
115
|
+
ensureDir(path.dirname(queueAbs));
|
|
116
|
+
const outcome = withFileLock(queueAbs, () => {
|
|
117
|
+
const sourceId = nextSourceId(root, vaultCfg);
|
|
118
|
+
const envelope = buildEnvelope(sourceId, 'captured');
|
|
119
|
+
const rawPath = envelope.raw_path;
|
|
120
|
+
|
|
121
|
+
// raw markdown goes THROUGH the stage gate: a blocking secret holds it in
|
|
122
|
+
// staging and writes NOTHING durable (no envelope, no queue row).
|
|
123
|
+
const staged = stageAndPromote(root, rawPath, renderCaptureMarkdown(envelope, text), { allowSecret });
|
|
124
|
+
if (!staged.promoted) return { refused: true, staged };
|
|
125
|
+
|
|
126
|
+
atomicWriteJson(path.join(root, `${vaultCfg.raw_inbox_dir}/${sourceId}.json`), envelope);
|
|
127
|
+
|
|
128
|
+
const flushLevel = args['flush-level'] || inferFlushLevelFromEnvelope(envelope);
|
|
129
|
+
const targetLayer = args['target-layer'] || inferTargetLayerFromIntent(envelope.write_intent);
|
|
130
|
+
const queueItem = buildQueueItem(envelope, { sourceId, rawPath, title, session, sessionId, flushLevel, targetLayer, targetPath: args['target-path'] || '', conflictPolicy: args['conflict-policy'] || 'queue_first', handoffPath: handoffPathFor(vaultCfg, sessionId, session) });
|
|
131
|
+
fs.appendFileSync(queueAbs, `${JSON.stringify(queueItem)}\n`); // direct: already holding the queue lock
|
|
132
|
+
const pendingCount = pendingCountForSession(root, vaultCfg, sessionId); // after append
|
|
133
|
+
return { refused: false, sourceId, envelope, rawPath, pendingCount };
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
if (outcome.refused) {
|
|
137
|
+
process.exitCode = 2;
|
|
138
|
+
return done(args, { ok: false, command: 'capture', durable_write: false, refused: true, reason: `secret-like content held in staging: ${outcome.staged.summary}`, staged_path: outcome.staged.staged_path, hits: outcome.staged.hits });
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const { sourceId, envelope, rawPath, pendingCount } = outcome;
|
|
142
|
+
appendWikiLog(root, vaultCfg, 'ingest', title, [
|
|
143
|
+
`Source: ${sourceId}`, `Raw path: ${rawPath}`,
|
|
144
|
+
`Areas: ${envelope.area_candidates.join(', ') || 'none'}`,
|
|
145
|
+
`Sensitivity: ${envelope.sensitivity}`, 'Status: captured',
|
|
146
|
+
]);
|
|
147
|
+
|
|
148
|
+
return done(args, {
|
|
149
|
+
ok: true, command: 'capture', durable_write: true, captured: true,
|
|
150
|
+
source_id: sourceId, raw_path: rawPath,
|
|
151
|
+
envelope_path: `${vaultCfg.raw_inbox_dir}/${sourceId}.json`,
|
|
152
|
+
queue_path: vaultCfg.writeback_queue,
|
|
153
|
+
requires_user_decision: envelope.requires_user_decision,
|
|
154
|
+
envelope,
|
|
155
|
+
session_state_delta: session ? {
|
|
156
|
+
applied: false,
|
|
157
|
+
reason: 'reserved-for-W-D: engine never writes session-state; the vault shuttle applies this.',
|
|
158
|
+
recompute_pending_at_apply: true,
|
|
159
|
+
session_id: sessionId,
|
|
160
|
+
patch: { pending_count: pendingCount },
|
|
161
|
+
area_brief_note: null,
|
|
162
|
+
handoff_note: null,
|
|
163
|
+
} : null,
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function done(args, payload) {
|
|
168
|
+
if (args.json) return { json: true, payload };
|
|
169
|
+
if (!payload.ok) {
|
|
170
|
+
if (payload.refused) {
|
|
171
|
+
const lines = [`⛔ BLOCKED (secret): ${payload.reason}`];
|
|
172
|
+
for (const h of payload.hits || []) lines.push(` [${h.confidence}] ${h.type}: ${h.sample}`);
|
|
173
|
+
return { text: lines.join('\n') };
|
|
174
|
+
}
|
|
175
|
+
return { text: `# Capture\n\n- ${payload.error}` };
|
|
176
|
+
}
|
|
177
|
+
if (payload.dry_run) return { text: `# Capture Dry Run\n\n- Source ID: ${payload.envelope.source_id}\n- Raw path: ${payload.envelope.raw_path}\n- Areas: ${payload.envelope.area_candidates.join(', ') || 'none'}\n- Sensitivity: ${payload.envelope.sensitivity}\n- Intent: ${payload.envelope.write_intent}` };
|
|
178
|
+
const lines = ['# Captured', '', `- Source ID: ${payload.source_id}`, `- Raw path: ${payload.raw_path}`, `- Envelope: ${payload.envelope_path}`, `- Requires user decision: ${payload.requires_user_decision ? 'yes' : 'no'}`];
|
|
179
|
+
if (payload.session_state_delta) lines.push(`- Session-state delta (unapplied, pending_count=${payload.session_state_delta.patch.pending_count}) returned for the vault shuttle.`);
|
|
180
|
+
return { text: lines.join('\n') };
|
|
181
|
+
}
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
// Registry-driven source classification (W-B). A faithful, root-injected port of
|
|
2
|
+
// the vault's neurain-utils classifyText path: area routing by per-area keyword
|
|
3
|
+
// profiles, sensitivity/write-intent detection, and registry-derived domain and
|
|
4
|
+
// entity tagging. Pure and read-only — it reads the search-index registry and
|
|
5
|
+
// per-area _area.md / index files, never writes. Kept separate from recall_intel
|
|
6
|
+
// (the recall ranker's intel) because the vault's capture/route classifier is a
|
|
7
|
+
// distinct code path with its own keyword-profile scoring; route and
|
|
8
|
+
// plan-writeback parity tracks THIS path, so it is ported verbatim.
|
|
9
|
+
import fs from 'node:fs';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
import { absPath, readText } from './fs.mjs';
|
|
12
|
+
import { vaultConfig } from './config.mjs';
|
|
13
|
+
|
|
14
|
+
// Per-root cache of the derived intel + profiles. The vault caches per-process;
|
|
15
|
+
// the engine keys by resolved root so multiple vaults (tests, MCP scoping) never
|
|
16
|
+
// cross-contaminate. One-shot CLI runs read it once; long-lived MCP reuses it.
|
|
17
|
+
const _intelCache = new Map();
|
|
18
|
+
const _profileCache = new Map();
|
|
19
|
+
|
|
20
|
+
function matchLine(text, regex) {
|
|
21
|
+
const match = String(text || '').match(regex);
|
|
22
|
+
return match ? match[1].trim() : '';
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Term match with word boundaries for ASCII (so "cr" does not match inside
|
|
26
|
+
// "across"); CJK and multiword terms fall back to substring. Verbatim from vault.
|
|
27
|
+
export function hasTerm(haystack, needle) {
|
|
28
|
+
if (!needle) return false;
|
|
29
|
+
if (!/^[a-z0-9]+$/i.test(needle)) return haystack.includes(needle);
|
|
30
|
+
return new RegExp(`(^|[^a-z0-9])${needle}([^a-z0-9]|$)`, 'i').test(haystack);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Per-area routing intelligence derived from each area's OWN registered search
|
|
34
|
+
// index (entities + domain routing) plus the registry sensitivity baseline, so a
|
|
35
|
+
// stranger's vault routes by ITS data and nothing area-specific is hardcoded.
|
|
36
|
+
export function loadAreaIntel(root, vaultCfg = vaultConfig(root)) {
|
|
37
|
+
const key = absPath(root);
|
|
38
|
+
if (_intelCache.has(key)) return _intelCache.get(key);
|
|
39
|
+
const out = { entities: [], domains: [], sensitivity: {} };
|
|
40
|
+
let reg;
|
|
41
|
+
try {
|
|
42
|
+
reg = JSON.parse(readText(path.join(root, vaultCfg.search_index_registry)));
|
|
43
|
+
} catch {
|
|
44
|
+
reg = { areas: {} };
|
|
45
|
+
}
|
|
46
|
+
for (const [area, def] of Object.entries((reg && reg.areas) || {})) {
|
|
47
|
+
if (def.sensitivity) out.sensitivity[area] = String(def.sensitivity);
|
|
48
|
+
const areaRoot = def.area_root || `${vaultCfg.areas_dir}/${area}`;
|
|
49
|
+
const idxDir = def.index_dir || 'search-index';
|
|
50
|
+
try {
|
|
51
|
+
const ents = JSON.parse(readText(path.join(root, areaRoot, idxDir, def.entities || 'entities.json')));
|
|
52
|
+
for (const e of ents || []) {
|
|
53
|
+
const aliases = [e.canonical, ...(e.aliases || [])].map((s) => String(s || '').toLowerCase()).filter((s) => s.length >= 2);
|
|
54
|
+
if (e.canonical) out.entities.push({ area, canonical: e.canonical, aliases });
|
|
55
|
+
}
|
|
56
|
+
} catch { /* area may have no entity index */ }
|
|
57
|
+
try {
|
|
58
|
+
const doms = JSON.parse(readText(path.join(root, areaRoot, idxDir, def.domain_routing || 'domain-routing.json')));
|
|
59
|
+
for (const d of doms || []) out.domains.push({ area, label: d.label || d.id || '', keywords: (d.keywords || []).map((s) => String(s || '').toLowerCase()).filter((s) => s.length >= 2) });
|
|
60
|
+
} catch { /* area may have no domain routing */ }
|
|
61
|
+
}
|
|
62
|
+
_intelCache.set(key, out);
|
|
63
|
+
return out;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function keywordsForArea(intel, slug, name) {
|
|
67
|
+
const ks = new Set([String(slug).toLowerCase(), String(name || '').toLowerCase()]);
|
|
68
|
+
for (const e of intel.entities) if (e.area === slug) for (const a of e.aliases) ks.add(a);
|
|
69
|
+
return [...ks].filter(Boolean);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Area keyword profiles: one per 10_areas/<slug> directory, keyworded by slug +
|
|
73
|
+
// _area.md Name + that area's registered entity aliases. Used for area routing.
|
|
74
|
+
export function loadAreaProfiles(root, vaultCfg = vaultConfig(root), intel = loadAreaIntel(root, vaultCfg)) {
|
|
75
|
+
const key = absPath(root);
|
|
76
|
+
if (_profileCache.has(key)) return _profileCache.get(key);
|
|
77
|
+
const areasAbs = path.join(root, vaultCfg.areas_dir);
|
|
78
|
+
let profiles = [];
|
|
79
|
+
if (fs.existsSync(areasAbs)) {
|
|
80
|
+
profiles = fs
|
|
81
|
+
.readdirSync(areasAbs, { withFileTypes: true })
|
|
82
|
+
.filter((entry) => entry.isDirectory())
|
|
83
|
+
.map((entry) => {
|
|
84
|
+
const slug = entry.name;
|
|
85
|
+
const text = readText(path.join(areasAbs, slug, '_area.md'), '');
|
|
86
|
+
const name = matchLine(text, /- Name:\s*(.+)/i) || slug;
|
|
87
|
+
const status = matchLine(text, /- Status:\s*(.+)/i) || 'active';
|
|
88
|
+
const sensitivity = matchLine(text, /- Sensitivity:\s*(.+)/i) || 'internal';
|
|
89
|
+
return { slug, name, status, sensitivity, keywords: keywordsForArea(intel, slug, name) };
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
_profileCache.set(key, profiles);
|
|
93
|
+
return profiles;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function knownAreaIds(root, vaultCfg = vaultConfig(root)) {
|
|
97
|
+
const dir = path.join(root, vaultCfg.areas_dir);
|
|
98
|
+
if (!fs.existsSync(dir)) return [];
|
|
99
|
+
return fs
|
|
100
|
+
.readdirSync(dir, { withFileTypes: true })
|
|
101
|
+
.filter((entry) => entry.isDirectory() && !entry.name.startsWith('.'))
|
|
102
|
+
.map((entry) => entry.name);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Map a possibly-stale bare slug ("acme") to the current area id ("_acme") when
|
|
106
|
+
// the prefixed area exists, so the underscore-prefix convention never silently
|
|
107
|
+
// breaks a route. An already-correct id is returned unchanged; an unknown id
|
|
108
|
+
// passes through for the caller to validate.
|
|
109
|
+
export function normalizeAreaId(root, area, vaultCfg = vaultConfig(root)) {
|
|
110
|
+
const value = String(area || '').trim();
|
|
111
|
+
if (!value) return value;
|
|
112
|
+
const ids = knownAreaIds(root, vaultCfg);
|
|
113
|
+
if (ids.includes(value)) return value;
|
|
114
|
+
if (ids.includes(`_${value}`)) return `_${value}`;
|
|
115
|
+
return value;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export function detectSensitivity(text, areas = [], intel = { sensitivity: {} }) {
|
|
119
|
+
const lower = String(text || '').toLowerCase();
|
|
120
|
+
if (
|
|
121
|
+
/\b(private|secret|password|credential|seed phrase|mnemonic|passport|ssn|bank account|api key|token)\b/.test(lower) ||
|
|
122
|
+
/개인|민감|비공개|주민|여권|계좌|부채|자산|등기부|가상자산|세금|연봉|급여|병원|의료|가족|주소/.test(lower)
|
|
123
|
+
) {
|
|
124
|
+
return 'private';
|
|
125
|
+
}
|
|
126
|
+
const sens = intel.sensitivity || {};
|
|
127
|
+
if (areas.some((a) => /private/i.test(sens[a] || ''))) return 'private';
|
|
128
|
+
if (
|
|
129
|
+
areas.some((a) => /internal/i.test(sens[a] || '')) ||
|
|
130
|
+
/\b(internal|foundation|strategy|governance|bd|partner|deal|roadmap)\b/.test(lower) ||
|
|
131
|
+
/내부|전략|파트너|재단|거버넌스|토크노믹스/.test(lower)
|
|
132
|
+
) {
|
|
133
|
+
return 'internal';
|
|
134
|
+
}
|
|
135
|
+
return 'public';
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export function detectWriteIntent(text) {
|
|
139
|
+
const lower = String(text || '').toLowerCase();
|
|
140
|
+
if (/\b(deliverable|report|deck|slides|draft|export|proposal)\b|보고서|발표|초안|제안서|결과물|산출물/.test(lower)) return 'output_request';
|
|
141
|
+
if (/\b(task|todo|action item|follow up|remind|deadline)\b|할일|태스크|액션|마감|리마인드/.test(lower)) return 'create_task';
|
|
142
|
+
if (/\b(current|status|update|progress|now)\b|현재|현황|업데이트|진행상황/.test(lower)) return 'update_current';
|
|
143
|
+
if (/\b(remember|save|store|classify|summarize|capture|ingest|memo|note)\b|기억|저장|정리|분류|요약|편입|메모/.test(lower)) return 'remember';
|
|
144
|
+
return 'evidence_only';
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export function detectDomains(text, areas = [], intel = { domains: [] }) {
|
|
148
|
+
const lower = String(text || '').toLowerCase();
|
|
149
|
+
const inScope = (a) => !areas.length || areas.includes(a);
|
|
150
|
+
const domains = new Set();
|
|
151
|
+
for (const d of intel.domains) {
|
|
152
|
+
if (inScope(d.area) && d.label && d.keywords.some((k) => hasTerm(lower, k))) domains.add(d.label);
|
|
153
|
+
}
|
|
154
|
+
return [...domains];
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
export function detectEntities(text, areas = [], intel = { entities: [] }) {
|
|
158
|
+
const lower = String(text || '').toLowerCase();
|
|
159
|
+
const inScope = (a) => !areas.length || areas.includes(a);
|
|
160
|
+
const entities = new Set();
|
|
161
|
+
for (const e of intel.entities) {
|
|
162
|
+
if (inScope(e.area) && e.aliases.some((a) => hasTerm(lower, a))) entities.add(e.canonical);
|
|
163
|
+
}
|
|
164
|
+
return [...entities];
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// The classifier itself. Registry/profiles read from `root`; `hints` may carry an
|
|
168
|
+
// area (boosts its profile), an explicit sensitivity/intent override, and a
|
|
169
|
+
// requiresUserDecision flag. Returns the vault's route shape verbatim.
|
|
170
|
+
export function classifyText(root, text, hints = {}, opts = {}) {
|
|
171
|
+
const vaultCfg = opts.vaultCfg || vaultConfig(root);
|
|
172
|
+
const intel = opts.intel || loadAreaIntel(root, vaultCfg);
|
|
173
|
+
const profiles = opts.profiles || loadAreaProfiles(root, vaultCfg, intel);
|
|
174
|
+
const source = String(text || '');
|
|
175
|
+
const lower = source.toLowerCase();
|
|
176
|
+
|
|
177
|
+
const scores = profiles.map((profile) => {
|
|
178
|
+
const keywordScore = profile.keywords.reduce((score, keyword) => (hasTerm(lower, keyword.toLowerCase()) ? score + 1 : score), 0);
|
|
179
|
+
const hintScore = hints.area === profile.slug ? 5 : 0;
|
|
180
|
+
return { profile, score: keywordScore + hintScore };
|
|
181
|
+
});
|
|
182
|
+
const areaCandidates = scores
|
|
183
|
+
.filter((item) => item.score > 0)
|
|
184
|
+
.sort((a, b) => b.score - a.score)
|
|
185
|
+
.map((item) => item.profile.slug);
|
|
186
|
+
|
|
187
|
+
const sensitivity = hints.sensitivity || detectSensitivity(source, areaCandidates, intel);
|
|
188
|
+
const writeIntent = hints.intent || detectWriteIntent(source);
|
|
189
|
+
const domainCandidates = detectDomains(source, areaCandidates, intel);
|
|
190
|
+
const entityCandidates = detectEntities(source, areaCandidates, intel);
|
|
191
|
+
const conflict = /\b(conflict|contradiction|contradict|supersede|disagree)\b|모순|충돌|상충|반박|정정/.test(lower);
|
|
192
|
+
const requiresUserDecision =
|
|
193
|
+
Boolean(hints.requiresUserDecision) ||
|
|
194
|
+
conflict ||
|
|
195
|
+
(sensitivity === 'private' && writeIntent !== 'evidence_only') ||
|
|
196
|
+
areaCandidates.length > 1;
|
|
197
|
+
|
|
198
|
+
return {
|
|
199
|
+
area_candidates: areaCandidates,
|
|
200
|
+
domain_candidates: domainCandidates,
|
|
201
|
+
entity_candidates: entityCandidates,
|
|
202
|
+
sensitivity,
|
|
203
|
+
write_intent: writeIntent,
|
|
204
|
+
requires_user_decision: requiresUserDecision,
|
|
205
|
+
route_score: Object.fromEntries(scores.filter((item) => item.score > 0).map((item) => [item.profile.slug, item.score])),
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export function firstLineTitle(text, fallback = 'Untitled Capture') {
|
|
210
|
+
const first = String(text || '')
|
|
211
|
+
.split(/\r?\n/)
|
|
212
|
+
.map((line) => line.trim())
|
|
213
|
+
.find(Boolean);
|
|
214
|
+
return first ? first.slice(0, 90) : fallback;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
export function inferFlushLevelFromEnvelope(envelope = {}) {
|
|
218
|
+
if (envelope.requires_user_decision || envelope.sensitivity === 'private') return 'full';
|
|
219
|
+
if (['update_current', 'create_task'].includes(envelope.write_intent)) return 'full';
|
|
220
|
+
if (envelope.write_intent === 'evidence_only') return 'light';
|
|
221
|
+
return 'standard';
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
export function inferTargetLayerFromIntent(intent = 'evidence_only') {
|
|
225
|
+
if (intent === 'update_current') return 'current';
|
|
226
|
+
if (intent === 'create_task') return 'task_memory';
|
|
227
|
+
if (intent === 'output_request') return 'output';
|
|
228
|
+
if (intent === 'evidence_only') return 'raw';
|
|
229
|
+
return 'wiki';
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Test-only: clear the per-root caches (so a test can rewrite a vault's registry
|
|
233
|
+
// and re-classify in the same process). Never called by command paths.
|
|
234
|
+
export function _resetClassifyCache() {
|
|
235
|
+
_intelCache.clear();
|
|
236
|
+
_profileCache.clear();
|
|
237
|
+
}
|