neurain 0.1.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/LICENSE +57 -0
- package/README.md +205 -0
- package/SECURITY.md +22 -0
- package/bin/neurain.mjs +7 -0
- package/docs/comparison-mem0.en.md +22 -0
- package/docs/connect-claude.en.md +48 -0
- package/docs/connect-claude.kr.md +51 -0
- package/docs/connect-codex.en.md +38 -0
- package/docs/connect-codex.kr.md +40 -0
- package/docs/connect-gemini.en.md +71 -0
- package/docs/connect-gemini.kr.md +71 -0
- package/docs/connect-runtime.en.md +61 -0
- package/docs/connect-runtime.kr.md +61 -0
- package/docs/development-status.en.md +157 -0
- package/docs/development-status.kr.md +157 -0
- package/docs/knowledge-os.en.md +105 -0
- package/docs/knowledge-os.kr.md +106 -0
- package/docs/pricing.en.md +14 -0
- package/docs/privacy-and-data-flow.en.md +25 -0
- package/docs/public-saas-readiness.en.md +39 -0
- package/docs/quickstart.en.md +64 -0
- package/docs/quickstart.kr.md +64 -0
- package/docs/release-checklist.en.md +38 -0
- package/docs/safety.en.md +36 -0
- package/docs/self-improvement-90-roadmap.en.md +429 -0
- package/docs/self-improvement-90-roadmap.kr.md +429 -0
- package/docs/self-improving-workflows.en.md +163 -0
- package/docs/self-improving-workflows.kr.md +163 -0
- package/docs/support.en.md +17 -0
- package/docs/troubleshooting.en.md +35 -0
- package/package.json +36 -0
- package/src/cli.mjs +261 -0
- package/src/core/adopt.mjs +304 -0
- package/src/core/answer_eval.mjs +450 -0
- package/src/core/capabilities.mjs +217 -0
- package/src/core/capture_durable.mjs +181 -0
- package/src/core/classify.mjs +237 -0
- package/src/core/compile_desk.mjs +324 -0
- package/src/core/complete.mjs +108 -0
- package/src/core/config.mjs +142 -0
- package/src/core/connect.mjs +355 -0
- package/src/core/curator.mjs +351 -0
- package/src/core/daemon.mjs +536 -0
- package/src/core/digest.mjs +155 -0
- package/src/core/doctor.mjs +115 -0
- package/src/core/durable.mjs +96 -0
- package/src/core/envelope.mjs +97 -0
- package/src/core/flush.mjs +190 -0
- package/src/core/fs.mjs +121 -0
- package/src/core/init.mjs +194 -0
- package/src/core/journal.mjs +269 -0
- package/src/core/labels.mjs +117 -0
- package/src/core/lessons.mjs +793 -0
- package/src/core/lifecycle.mjs +1138 -0
- package/src/core/link_check.mjs +180 -0
- package/src/core/live_cases.mjs +221 -0
- package/src/core/onboard.mjs +175 -0
- package/src/core/plan_receipt.mjs +177 -0
- package/src/core/plan_writeback.mjs +176 -0
- package/src/core/queue.mjs +62 -0
- package/src/core/queue_archive.mjs +87 -0
- package/src/core/queue_model.mjs +161 -0
- package/src/core/queue_write.mjs +28 -0
- package/src/core/recall.mjs +1802 -0
- package/src/core/recall_bench.mjs +275 -0
- package/src/core/recall_corpus.mjs +152 -0
- package/src/core/recall_facts.mjs +233 -0
- package/src/core/recall_intel.mjs +233 -0
- package/src/core/recall_lexical.mjs +269 -0
- package/src/core/recap.mjs +78 -0
- package/src/core/review_queue.mjs +131 -0
- package/src/core/review_worker.mjs +284 -0
- package/src/core/route.mjs +73 -0
- package/src/core/safety.mjs +57 -0
- package/src/core/scheduler.mjs +697 -0
- package/src/core/search.mjs +54 -0
- package/src/core/secret_scan.mjs +143 -0
- package/src/core/semantic.mjs +187 -0
- package/src/core/source_digest.mjs +56 -0
- package/src/core/source_digest_gen.mjs +311 -0
- package/src/core/stage.mjs +105 -0
- package/src/core/status.mjs +175 -0
- package/src/core/vault_state.mjs +115 -0
- package/src/core/watch.mjs +282 -0
- package/src/core/wiki_log.mjs +29 -0
- package/src/core/wrap.mjs +62 -0
- package/src/mcp/server.mjs +865 -0
- package/templates/starter-vault/README.md +9 -0
|
@@ -0,0 +1,450 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { absPath, isTextFile, relPath, safeResolve, sha256, walkFiles } from './fs.mjs';
|
|
4
|
+
import { injectionLike, secretLike } from './safety.mjs';
|
|
5
|
+
|
|
6
|
+
const DEFAULT_SUPPORTED = 50;
|
|
7
|
+
const DEFAULT_CONFLICT = 20;
|
|
8
|
+
const DEFAULT_PRIVATE = 20;
|
|
9
|
+
const DEFAULT_STALE = 20;
|
|
10
|
+
const DEFAULT_NO_ANSWER = 10;
|
|
11
|
+
|
|
12
|
+
export async function answerCommand(args) {
|
|
13
|
+
const [subcommand, ...rest] = args._;
|
|
14
|
+
const root = absPath(rest[0] || args.root || process.cwd());
|
|
15
|
+
if (!subcommand || subcommand === 'eval') return renderAnswerEval(root, args);
|
|
16
|
+
throw new Error('Unknown answer command. Use "answer eval".');
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function evaluateAnswerQuality(root, {
|
|
20
|
+
caseFile = '',
|
|
21
|
+
supportedCases = DEFAULT_SUPPORTED,
|
|
22
|
+
conflictCases = DEFAULT_CONFLICT,
|
|
23
|
+
privateCases = DEFAULT_PRIVATE,
|
|
24
|
+
staleCases = DEFAULT_STALE,
|
|
25
|
+
noAnswerCases = DEFAULT_NO_ANSWER,
|
|
26
|
+
minCases = 50,
|
|
27
|
+
} = {}) {
|
|
28
|
+
const before = snapshotAnswerEvalWriteSurface(root);
|
|
29
|
+
const cases = caseFile
|
|
30
|
+
? loadCaseFile(root, caseFile)
|
|
31
|
+
: buildSyntheticCases({
|
|
32
|
+
supportedCases,
|
|
33
|
+
conflictCases,
|
|
34
|
+
privateCases,
|
|
35
|
+
staleCases,
|
|
36
|
+
noAnswerCases,
|
|
37
|
+
});
|
|
38
|
+
const results = cases.map(evaluateCase);
|
|
39
|
+
const after = snapshotAnswerEvalWriteSurface(root);
|
|
40
|
+
const evaluated = results.length;
|
|
41
|
+
const supportedResults = results.filter((item) => item.type === 'supported');
|
|
42
|
+
const citationResults = results.filter((item) => !item.abstention_expected);
|
|
43
|
+
const conflictResults = results.filter((item) => item.type === 'conflict');
|
|
44
|
+
const abstentionResults = results.filter((item) => item.type === 'no_answer');
|
|
45
|
+
const privateResults = results.filter((item) => item.type === 'private_boundary');
|
|
46
|
+
const staleResults = results.filter((item) => item.type === 'stale');
|
|
47
|
+
|
|
48
|
+
const faithfulnessRate = rate(supportedResults, 'faithful');
|
|
49
|
+
const citationAccuracy = rate(citationResults, 'citation_accurate');
|
|
50
|
+
const conflictSurfacingRate = rate(conflictResults, 'conflict_surfaced');
|
|
51
|
+
const abstentionCorrectness = rate(abstentionResults, 'abstention_correct');
|
|
52
|
+
const privateBoundaryRate = rate(privateResults, 'private_boundary_kept');
|
|
53
|
+
const staleHandlingRate = rate(staleResults, 'stale_handled');
|
|
54
|
+
const targetRootUntouched = stableJson(before) === stableJson(after);
|
|
55
|
+
const unsupported = results.filter((item) => !item.pass).slice(0, 20);
|
|
56
|
+
const ok = evaluated >= Number(minCases || 50)
|
|
57
|
+
&& faithfulnessRate >= 0.95
|
|
58
|
+
&& citationAccuracy >= 0.95
|
|
59
|
+
&& conflictSurfacingRate >= 0.9
|
|
60
|
+
&& abstentionCorrectness >= 0.9
|
|
61
|
+
&& privateBoundaryRate >= 0.95
|
|
62
|
+
&& staleHandlingRate >= 0.9
|
|
63
|
+
&& targetRootUntouched;
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
ok,
|
|
67
|
+
command: 'answer eval',
|
|
68
|
+
root,
|
|
69
|
+
durable_write: false,
|
|
70
|
+
model_calls: false,
|
|
71
|
+
external_tool_calls: false,
|
|
72
|
+
eval_type: caseFile ? 'answer_quality_cases' : 'answer_quality_fixture',
|
|
73
|
+
metric_scope: caseFile ? 'source_grounded_answer_quality_cases' : 'synthetic_answer_quality_policy_regression',
|
|
74
|
+
answer_quality_evaluated: true,
|
|
75
|
+
reviewed_answer_quality_evaluated: Boolean(caseFile),
|
|
76
|
+
retrieval_only_eval: false,
|
|
77
|
+
case_file: caseFile || null,
|
|
78
|
+
evaluated_cases: evaluated,
|
|
79
|
+
min_cases: Number(minCases || 50),
|
|
80
|
+
case_breakdown: countBy(results, 'type'),
|
|
81
|
+
faithfulness_rate: roundRate(faithfulnessRate),
|
|
82
|
+
citation_accuracy: roundRate(citationAccuracy),
|
|
83
|
+
conflict_surfacing_rate: roundRate(conflictSurfacingRate),
|
|
84
|
+
abstention_correctness: roundRate(abstentionCorrectness),
|
|
85
|
+
private_boundary_rate: roundRate(privateBoundaryRate),
|
|
86
|
+
stale_handling_rate: roundRate(staleHandlingRate),
|
|
87
|
+
target_root_untouched: targetRootUntouched,
|
|
88
|
+
source_grounded_truth_gates: {
|
|
89
|
+
faithfulness_min: 0.95,
|
|
90
|
+
citation_accuracy_min: 0.95,
|
|
91
|
+
conflict_surfacing_min: 0.9,
|
|
92
|
+
abstention_correctness_min: 0.9,
|
|
93
|
+
private_boundary_min: 0.95,
|
|
94
|
+
stale_handling_min: 0.9,
|
|
95
|
+
},
|
|
96
|
+
unsupported_cases: unsupported,
|
|
97
|
+
cases: results,
|
|
98
|
+
missing_evidence: ok
|
|
99
|
+
? null
|
|
100
|
+
: evaluated < Number(minCases || 50)
|
|
101
|
+
? `Need at least ${Number(minCases || 50)} answer-quality case(s).`
|
|
102
|
+
: !targetRootUntouched
|
|
103
|
+
? 'Answer eval changed the target root write surface.'
|
|
104
|
+
: 'One or more answer-quality gates did not pass.',
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function renderAnswerEval(root, args) {
|
|
109
|
+
const fixtureSize = Number(args['fixture-size'] || 0);
|
|
110
|
+
const counts = fixtureSize > 0
|
|
111
|
+
? scaledCounts(fixtureSize)
|
|
112
|
+
: {
|
|
113
|
+
supportedCases: Number(args['supported-cases'] || DEFAULT_SUPPORTED),
|
|
114
|
+
conflictCases: Number(args['conflict-cases'] || DEFAULT_CONFLICT),
|
|
115
|
+
privateCases: Number(args['private-cases'] || DEFAULT_PRIVATE),
|
|
116
|
+
staleCases: Number(args['stale-cases'] || DEFAULT_STALE),
|
|
117
|
+
noAnswerCases: Number(args['no-answer-cases'] || DEFAULT_NO_ANSWER),
|
|
118
|
+
};
|
|
119
|
+
return evaluateAnswerQuality(root, {
|
|
120
|
+
caseFile: args['case-file'] || '',
|
|
121
|
+
minCases: Number(args['min-cases'] || 50),
|
|
122
|
+
...counts,
|
|
123
|
+
}).then((payload) => args.json ? { json: true, payload } : {
|
|
124
|
+
text: [
|
|
125
|
+
'# Neurain answer eval',
|
|
126
|
+
'',
|
|
127
|
+
`- OK: ${payload.ok ? 'yes' : 'no'}`,
|
|
128
|
+
`- Type: ${payload.eval_type}`,
|
|
129
|
+
`- Cases: ${payload.evaluated_cases}`,
|
|
130
|
+
`- Faithfulness: ${payload.faithfulness_rate}`,
|
|
131
|
+
`- Citation accuracy: ${payload.citation_accuracy}`,
|
|
132
|
+
`- Conflict surfacing: ${payload.conflict_surfacing_rate}`,
|
|
133
|
+
`- Abstention correctness: ${payload.abstention_correctness}`,
|
|
134
|
+
`- Private boundary: ${payload.private_boundary_rate}`,
|
|
135
|
+
`- Stale handling: ${payload.stale_handling_rate}`,
|
|
136
|
+
payload.missing_evidence ? `- Missing evidence: ${payload.missing_evidence}` : '',
|
|
137
|
+
].filter(Boolean).join('\n'),
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function evaluateCase(testCase) {
|
|
142
|
+
const answer = normalizeAnswer(testCase.answer || {});
|
|
143
|
+
const sources = Array.isArray(testCase.sources) ? testCase.sources : [];
|
|
144
|
+
const publicSources = sources.filter((source) => !source.private);
|
|
145
|
+
const privateSources = sources.filter((source) => source.private);
|
|
146
|
+
const citations = answer.citations;
|
|
147
|
+
const checks = {
|
|
148
|
+
faithful: true,
|
|
149
|
+
citation_accurate: true,
|
|
150
|
+
conflict_surfaced: testCase.type !== 'conflict',
|
|
151
|
+
abstention_correct: testCase.type !== 'no_answer',
|
|
152
|
+
private_boundary_kept: testCase.type !== 'private_boundary',
|
|
153
|
+
stale_handled: testCase.type !== 'stale',
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
for (const claim of testCase.claims || []) {
|
|
157
|
+
const claimText = String(claim.text || '').trim();
|
|
158
|
+
const terms = termsForClaim(claim);
|
|
159
|
+
const citedIds = claim.citations || citations.map((item) => item.source_id);
|
|
160
|
+
const citedSources = citedIds.map((id) => sources.find((source) => source.id === id)).filter(Boolean);
|
|
161
|
+
const supported = citedSources.some((source) => sourceSupportsTerms(source, terms));
|
|
162
|
+
const answerMentionsClaim = claimText ? includesNormalized(answer.text, claimText) : terms.every((term) => includesNormalized(answer.text, term));
|
|
163
|
+
if (!supported || !answerMentionsClaim) checks.faithful = false;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
for (const citation of citations) {
|
|
167
|
+
const source = sources.find((item) => item.id === citation.source_id);
|
|
168
|
+
if (!source) {
|
|
169
|
+
checks.citation_accurate = false;
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
const terms = termsForCitation(citation, testCase);
|
|
173
|
+
if (terms.length && !sourceSupportsTerms(source, terms)) checks.citation_accurate = false;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (testCase.type === 'conflict') {
|
|
177
|
+
const conflictTerms = (testCase.conflict_terms || []).map((term) => String(term || '').toLowerCase());
|
|
178
|
+
const citesBothSides = new Set(citations.map((item) => item.source_id)).size >= 2;
|
|
179
|
+
checks.conflict_surfaced = Boolean(answer.conflict_surface && citesBothSides && conflictTerms.every((term) => includesNormalized(answer.text, term)));
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
if (testCase.type === 'no_answer') {
|
|
183
|
+
checks.abstention_correct = Boolean(answer.abstained && citations.length === 0 && /not enough evidence|insufficient evidence|모르|근거가 부족/i.test(answer.text));
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if (testCase.type === 'private_boundary') {
|
|
187
|
+
const privateIds = new Set(privateSources.map((source) => source.id));
|
|
188
|
+
const privateMarkers = privateSources.flatMap((source) => source.private_markers || termsFromText(source.text));
|
|
189
|
+
const citesPrivate = citations.some((citation) => privateIds.has(citation.source_id));
|
|
190
|
+
const leaksPrivate = privateMarkers.some((marker) => marker.length >= 6 && includesNormalized(answer.text, marker));
|
|
191
|
+
checks.private_boundary_kept = !citesPrivate && !leaksPrivate && publicSources.length >= 1;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
if (testCase.type === 'stale') {
|
|
195
|
+
const currentId = testCase.current_source_id;
|
|
196
|
+
const staleId = testCase.stale_source_id;
|
|
197
|
+
checks.stale_handled = Boolean(
|
|
198
|
+
currentId
|
|
199
|
+
&& citations.some((citation) => citation.source_id === currentId)
|
|
200
|
+
&& !citations.some((citation) => citation.source_id === staleId)
|
|
201
|
+
&& termsForClaim(testCase.claims?.[0] || {}).every((term) => includesNormalized(answer.text, term))
|
|
202
|
+
);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const unsafeAnswer = Boolean(secretLike(answer.text) || injectionLike(answer.text));
|
|
206
|
+
const pass = Object.values(checks).every(Boolean) && !unsafeAnswer;
|
|
207
|
+
return {
|
|
208
|
+
id: String(testCase.id || ''),
|
|
209
|
+
type: String(testCase.type || 'supported'),
|
|
210
|
+
pass,
|
|
211
|
+
...checks,
|
|
212
|
+
unsafe_answer: unsafeAnswer,
|
|
213
|
+
citation_count: citations.length,
|
|
214
|
+
abstention_expected: testCase.type === 'no_answer',
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function loadCaseFile(root, caseFile) {
|
|
219
|
+
const abs = path.isAbsolute(caseFile) ? caseFile : safeResolve(root, caseFile);
|
|
220
|
+
if (!fs.existsSync(abs)) throw new Error(`Answer eval case file does not exist: ${caseFile}`);
|
|
221
|
+
if (!isTextFile(abs)) throw new Error(`Answer eval case file must be a text JSON file: ${caseFile}`);
|
|
222
|
+
const parsed = JSON.parse(fs.readFileSync(abs, 'utf8'));
|
|
223
|
+
const cases = Array.isArray(parsed) ? parsed : parsed.cases;
|
|
224
|
+
if (!Array.isArray(cases)) throw new Error('Answer eval case file must contain an array or { "cases": [] }.');
|
|
225
|
+
return cases;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function buildSyntheticCases({
|
|
229
|
+
supportedCases,
|
|
230
|
+
conflictCases,
|
|
231
|
+
privateCases,
|
|
232
|
+
staleCases,
|
|
233
|
+
noAnswerCases,
|
|
234
|
+
}) {
|
|
235
|
+
const cases = [];
|
|
236
|
+
for (let i = 0; i < boundedCount(supportedCases, DEFAULT_SUPPORTED); i += 1) {
|
|
237
|
+
const serial = pad(i + 1);
|
|
238
|
+
cases.push({
|
|
239
|
+
id: `supported-${serial}`,
|
|
240
|
+
type: 'supported',
|
|
241
|
+
question: `What is the supported answer marker ${serial}?`,
|
|
242
|
+
sources: [{ id: 's1', text: `Supported answer marker answer${serial} is sourcegrounded${serial}.` }],
|
|
243
|
+
claims: [{ text: `answer${serial}`, support_terms: [`answer${serial}`, `sourcegrounded${serial}`], citations: ['s1'] }],
|
|
244
|
+
answer: {
|
|
245
|
+
text: `The supported answer is answer${serial} because sourcegrounded${serial} is present.`,
|
|
246
|
+
citations: [{ source_id: 's1', support_terms: [`answer${serial}`, `sourcegrounded${serial}`] }],
|
|
247
|
+
},
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
for (let i = 0; i < boundedCount(conflictCases, DEFAULT_CONFLICT); i += 1) {
|
|
251
|
+
const serial = pad(i + 1);
|
|
252
|
+
cases.push({
|
|
253
|
+
id: `conflict-${serial}`,
|
|
254
|
+
type: 'conflict',
|
|
255
|
+
question: `Which value is correct for conflict marker ${serial}?`,
|
|
256
|
+
sources: [
|
|
257
|
+
{ id: 'a', text: `Conflict marker conflict${serial} says value alpha${serial}.` },
|
|
258
|
+
{ id: 'b', text: `Conflict marker conflict${serial} says value beta${serial}.` },
|
|
259
|
+
],
|
|
260
|
+
claims: [],
|
|
261
|
+
conflict_terms: [`alpha${serial}`, `beta${serial}`],
|
|
262
|
+
answer: {
|
|
263
|
+
text: `The sources conflict: one says alpha${serial}, another says beta${serial}.`,
|
|
264
|
+
conflict_surface: true,
|
|
265
|
+
citations: [
|
|
266
|
+
{ source_id: 'a', support_terms: [`alpha${serial}`] },
|
|
267
|
+
{ source_id: 'b', support_terms: [`beta${serial}`] },
|
|
268
|
+
],
|
|
269
|
+
},
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
for (let i = 0; i < boundedCount(privateCases, DEFAULT_PRIVATE); i += 1) {
|
|
273
|
+
const serial = pad(i + 1);
|
|
274
|
+
cases.push({
|
|
275
|
+
id: `private-${serial}`,
|
|
276
|
+
type: 'private_boundary',
|
|
277
|
+
question: `What can be answered publicly for private marker ${serial}?`,
|
|
278
|
+
sources: [
|
|
279
|
+
{ id: 'pub', text: `Public marker publicsafe${serial} can be cited safely.` },
|
|
280
|
+
{ id: 'priv', text: `Private marker privatesecret${serial} must not appear.`, private: true, private_markers: [`privatesecret${serial}`] },
|
|
281
|
+
],
|
|
282
|
+
claims: [{ text: `publicsafe${serial}`, support_terms: [`publicsafe${serial}`], citations: ['pub'] }],
|
|
283
|
+
answer: {
|
|
284
|
+
text: `The public source supports publicsafe${serial}.`,
|
|
285
|
+
citations: [{ source_id: 'pub', support_terms: [`publicsafe${serial}`] }],
|
|
286
|
+
},
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
for (let i = 0; i < boundedCount(staleCases, DEFAULT_STALE); i += 1) {
|
|
290
|
+
const serial = pad(i + 1);
|
|
291
|
+
cases.push({
|
|
292
|
+
id: `stale-${serial}`,
|
|
293
|
+
type: 'stale',
|
|
294
|
+
question: `Which status is current for stale marker ${serial}?`,
|
|
295
|
+
sources: [
|
|
296
|
+
{ id: 'old', text: `Old source says stale${serial} status is retired${serial}.` },
|
|
297
|
+
{ id: 'current', text: `Current source says stale${serial} status is active${serial}.` },
|
|
298
|
+
],
|
|
299
|
+
current_source_id: 'current',
|
|
300
|
+
stale_source_id: 'old',
|
|
301
|
+
claims: [{ text: `active${serial}`, support_terms: [`stale${serial}`, `active${serial}`], citations: ['current'] }],
|
|
302
|
+
answer: {
|
|
303
|
+
text: `The current status is active${serial} for stale${serial}.`,
|
|
304
|
+
citations: [{ source_id: 'current', support_terms: [`stale${serial}`, `active${serial}`] }],
|
|
305
|
+
},
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
for (let i = 0; i < boundedCount(noAnswerCases, DEFAULT_NO_ANSWER); i += 1) {
|
|
309
|
+
const serial = pad(i + 1);
|
|
310
|
+
cases.push({
|
|
311
|
+
id: `no-answer-${serial}`,
|
|
312
|
+
type: 'no_answer',
|
|
313
|
+
question: `What is missing marker ${serial}?`,
|
|
314
|
+
sources: [{ id: 's1', text: `This source discusses nearby topic nearby${serial}, but not the requested missing marker.` }],
|
|
315
|
+
claims: [],
|
|
316
|
+
answer: {
|
|
317
|
+
text: 'Not enough evidence in the provided sources.',
|
|
318
|
+
abstained: true,
|
|
319
|
+
citations: [],
|
|
320
|
+
},
|
|
321
|
+
});
|
|
322
|
+
}
|
|
323
|
+
return cases;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
function scaledCounts(total) {
|
|
327
|
+
const count = Math.max(10, Math.min(Number(total || 120), 500));
|
|
328
|
+
const supportedCases = Math.max(1, Math.round(count * 0.42));
|
|
329
|
+
const conflictCases = Math.max(1, Math.round(count * 0.17));
|
|
330
|
+
const privateCases = Math.max(1, Math.round(count * 0.17));
|
|
331
|
+
const staleCases = Math.max(1, Math.round(count * 0.17));
|
|
332
|
+
const used = supportedCases + conflictCases + privateCases + staleCases;
|
|
333
|
+
return {
|
|
334
|
+
supportedCases,
|
|
335
|
+
conflictCases,
|
|
336
|
+
privateCases,
|
|
337
|
+
staleCases,
|
|
338
|
+
noAnswerCases: Math.max(1, count - used),
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function normalizeAnswer(answer) {
|
|
343
|
+
return {
|
|
344
|
+
text: String(answer.text || ''),
|
|
345
|
+
citations: Array.isArray(answer.citations) ? answer.citations.map((citation) => ({
|
|
346
|
+
source_id: String(citation.source_id || ''),
|
|
347
|
+
claim: String(citation.claim || ''),
|
|
348
|
+
support_terms: Array.isArray(citation.support_terms) ? citation.support_terms.map(String) : [],
|
|
349
|
+
})) : [],
|
|
350
|
+
conflict_surface: Boolean(answer.conflict_surface),
|
|
351
|
+
abstained: Boolean(answer.abstained),
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function termsForClaim(claim) {
|
|
356
|
+
if (!claim) return [];
|
|
357
|
+
if (Array.isArray(claim.support_terms) && claim.support_terms.length) return claim.support_terms.map(String);
|
|
358
|
+
return termsFromText(claim.text || '');
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
function termsForCitation(citation, testCase) {
|
|
362
|
+
if (Array.isArray(citation.support_terms) && citation.support_terms.length) return citation.support_terms.map(String);
|
|
363
|
+
const claim = (testCase.claims || []).find((item) => (item.citations || []).includes(citation.source_id));
|
|
364
|
+
return termsForClaim(claim);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function termsFromText(text) {
|
|
368
|
+
return String(text || '').toLowerCase().match(/[a-z0-9_-]{4,}/g) || [];
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
function sourceSupportsTerms(source, terms) {
|
|
372
|
+
return terms.every((term) => includesNormalized(source.text || '', term));
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
function includesNormalized(text, term) {
|
|
376
|
+
return String(text || '').toLowerCase().includes(String(term || '').toLowerCase());
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
function rate(items, key) {
|
|
380
|
+
if (!items.length) return 1;
|
|
381
|
+
return items.filter((item) => item[key] === true).length / items.length;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
function roundRate(value) {
|
|
385
|
+
return Number(value.toFixed(3));
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
function countBy(items, key) {
|
|
389
|
+
const out = {};
|
|
390
|
+
for (const item of items) out[item[key]] = (out[item[key]] || 0) + 1;
|
|
391
|
+
return out;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
function boundedCount(value, fallback) {
|
|
395
|
+
const parsed = Number(value);
|
|
396
|
+
return Math.max(0, Math.min(Number.isFinite(parsed) ? parsed : fallback, 500));
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function pad(value) {
|
|
400
|
+
return String(value).padStart(3, '0');
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
function stableJson(value) {
|
|
404
|
+
return JSON.stringify(value, Object.keys(flattenKeys(value)).sort());
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
function flattenKeys(value, out = {}) {
|
|
408
|
+
if (value && typeof value === 'object') {
|
|
409
|
+
for (const key of Object.keys(value)) {
|
|
410
|
+
out[key] = true;
|
|
411
|
+
flattenKeys(value[key], out);
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
return out;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
function snapshotAnswerEvalWriteSurface(root) {
|
|
418
|
+
const rels = [
|
|
419
|
+
'00_system/neurain/events.ndjson',
|
|
420
|
+
'00_system/neurain/recall.sqlite',
|
|
421
|
+
'00_system/neurain/recall.sqlite-wal',
|
|
422
|
+
'00_system/neurain/recall.sqlite-shm',
|
|
423
|
+
'00_system/evals',
|
|
424
|
+
'output/receipts',
|
|
425
|
+
];
|
|
426
|
+
const out = {};
|
|
427
|
+
for (const rel of rels) {
|
|
428
|
+
let abs;
|
|
429
|
+
try {
|
|
430
|
+
abs = safeResolve(root, rel);
|
|
431
|
+
} catch {
|
|
432
|
+
out[rel] = { exists: false, hash: 'invalid-root' };
|
|
433
|
+
continue;
|
|
434
|
+
}
|
|
435
|
+
if (!fs.existsSync(abs)) {
|
|
436
|
+
out[rel] = { exists: false, hash: '' };
|
|
437
|
+
continue;
|
|
438
|
+
}
|
|
439
|
+
const stat = fs.statSync(abs);
|
|
440
|
+
if (stat.isDirectory()) {
|
|
441
|
+
const files = walkFiles(abs, { includeRaw: false, maxFiles: 10000 })
|
|
442
|
+
.map((file) => `${relPath(abs, file)}:${sha256(fs.readFileSync(file))}`)
|
|
443
|
+
.sort();
|
|
444
|
+
out[rel] = { exists: true, type: 'directory', hash: sha256(files.join('\n')) };
|
|
445
|
+
} else {
|
|
446
|
+
out[rel] = { exists: true, type: 'file', hash: sha256(fs.readFileSync(abs)) };
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
return out;
|
|
450
|
+
}
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
const capabilityCards = [
|
|
2
|
+
{
|
|
3
|
+
id: 'status-doctor',
|
|
4
|
+
title: 'Check local vault state',
|
|
5
|
+
phase: 'core',
|
|
6
|
+
commands: ['neurain doctor <folder>', 'neurain mcp tool neurain_status'],
|
|
7
|
+
summary: 'Read required files, backup note, lesson registry state, and host connection guidance.',
|
|
8
|
+
tags: ['status', 'doctor', 'health', 'startup'],
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
id: 'folder-adoption-scan',
|
|
12
|
+
title: 'Scan an existing work folder safely',
|
|
13
|
+
phase: 'core',
|
|
14
|
+
commands: ['neurain adopt <folder> --dry-run', 'neurain mcp tool neurain_adopt_scan'],
|
|
15
|
+
summary: 'Detect generated files, secrets, symlinks, large files, and recommend in-place, hybrid, or copy mode before any write.',
|
|
16
|
+
tags: ['adopt', 'folder', 'scan', 'safety'],
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
id: 'event-journal',
|
|
20
|
+
title: 'Record and verify local workflow events',
|
|
21
|
+
phase: 'self-improve-90-e2-phase-1',
|
|
22
|
+
commands: ['neurain journal list <folder>', 'neurain journal add <folder> --type test --summary "..." --confirm "1건 저장 진행"', 'neurain mcp tool neurain_journal_list'],
|
|
23
|
+
summary: 'Append-only local event substrate for wrap, review, test, correction, rollback, and sync signals. Writes require confirmation; MCP is read-only.',
|
|
24
|
+
tags: ['journal', 'event', 'receipt', 'watch', 'review-worker'],
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
id: 'watch-report',
|
|
28
|
+
title: 'Observe local signals without writing',
|
|
29
|
+
phase: 'self-improve-90-e3-phase-2',
|
|
30
|
+
commands: ['neurain watch <folder> --poll-once', 'neurain mcp tool neurain_watch_report'],
|
|
31
|
+
summary: 'Read recent local file signals, journal events, recap hints, and lesson candidates as a review-worker input. It emits reports only.',
|
|
32
|
+
tags: ['watch', 'background-review', 'review-worker', 'event', 'automation'],
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
id: 'review-worker',
|
|
36
|
+
title: 'Review local signals into improvement proposals',
|
|
37
|
+
phase: 'self-improve-90-e4-phase-3',
|
|
38
|
+
commands: ['neurain review <folder> --json', 'neurain mcp tool neurain_review_worker'],
|
|
39
|
+
summary: 'Convert watch reports, event journal entries, and lesson candidates into a deterministic manual review report. It performs no model calls and no writes.',
|
|
40
|
+
tags: ['review', 'background-review', 'lesson', 'candidate', 'automation', 'runtime'],
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
id: 'scheduler-tick',
|
|
44
|
+
title: 'Decide when local review should run',
|
|
45
|
+
phase: 'self-improve-90-e8-phase-3b',
|
|
46
|
+
commands: ['neurain scheduler tick <folder> --json', 'neurain mcp tool neurain_scheduler_tick'],
|
|
47
|
+
summary: 'Run a read-only one-shot scheduler tick that inspects watch signals and includes a review worker report only when thresholds are met.',
|
|
48
|
+
tags: ['scheduler', 'background-review', 'automation', 'watch', 'review-worker', 'runtime'],
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
id: 'scheduler-eval',
|
|
52
|
+
title: 'Measure background review trigger quality',
|
|
53
|
+
phase: 'self-improve-90-e20-scheduler-trigger-eval',
|
|
54
|
+
commands: ['neurain scheduler eval <folder> --fixture-size 100 --json', 'neurain mcp tool neurain_scheduler_eval'],
|
|
55
|
+
summary: 'Validate scheduler trigger precision, recall, no-recursion, private-boundary handling, and target-root non-write before claiming background automation quality.',
|
|
56
|
+
tags: ['scheduler', 'eval', 'background-review', 'automation', 'precision', 'recall', 'runtime'],
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
id: 'scheduler-monitor',
|
|
60
|
+
title: 'Monitor local review needs in the foreground',
|
|
61
|
+
phase: 'self-improve-90-e9-foreground-monitor',
|
|
62
|
+
commands: ['neurain scheduler monitor <folder> --interval-seconds 60 --max-ticks 3 --json'],
|
|
63
|
+
summary: 'Run a user-started bounded foreground monitor that repeats read-only scheduler ticks without installing background jobs or writing durable knowledge.',
|
|
64
|
+
tags: ['scheduler', 'monitor', 'automation', 'watch', 'review-worker', 'runtime'],
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
id: 'continuous-daemon',
|
|
68
|
+
title: 'Keep local review checks running while the user works',
|
|
69
|
+
phase: 'self-improve-90-e11-continuous-daemon',
|
|
70
|
+
commands: ['neurain daemon run <folder> --interval-seconds 300', 'neurain daemon status <folder>', 'neurain daemon stop <folder>'],
|
|
71
|
+
summary: 'Run a user-started foreground daemon loop that repeats scheduler ticks and writes only operational state. It never writes wiki, promotes lessons, calls models, or exposes MCP tools.',
|
|
72
|
+
tags: ['daemon', 'scheduler', 'monitor', 'automation', 'watch', 'review-worker', 'runtime'],
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
id: 'lifecycle-lineage',
|
|
76
|
+
title: 'Track host lifecycle and session lineage',
|
|
77
|
+
phase: 'self-improve-90-e10-lifecycle-lineage',
|
|
78
|
+
commands: ['neurain lifecycle emit <folder> --host codex --event turn_end --session-id session --confirm "1건 저장 진행"', 'neurain lifecycle report <folder> --json', 'neurain mcp tool neurain_lifecycle_report'],
|
|
79
|
+
summary: 'Record append-only host boundary events and report turn completion, compaction, resume, and review-due lineage without Neurain pretending to own the host model loop.',
|
|
80
|
+
tags: ['lifecycle', 'lineage', 'session', 'automation', 'runtime', 'agent-loop'],
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
id: 'claude-lifecycle-hooks',
|
|
84
|
+
title: 'Preview Claude Code lifecycle hook automation',
|
|
85
|
+
phase: 'self-improve-90-e12-claude-lifecycle-hooks',
|
|
86
|
+
commands: ['neurain connect claude <folder> --lifecycle-hooks --dry-run', 'neurain lifecycle hook <folder> --host claude --confirm "1건 저장 진행"'],
|
|
87
|
+
summary: 'Preview Claude Code command hooks that map SessionStart, UserPromptSubmit, Stop, and SessionEnd into Neurain lifecycle receipts without storing prompt bodies or transcript paths.',
|
|
88
|
+
tags: ['lifecycle', 'hooks', 'claude', 'automation', 'session', 'agent-loop'],
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
id: 'lesson-curator',
|
|
92
|
+
title: 'Manage lesson lifecycle safely',
|
|
93
|
+
phase: 'self-improve-90-e5-phase-4',
|
|
94
|
+
commands: ['neurain curator status <folder>', 'neurain curator run <folder> --dry-run', 'neurain mcp tool neurain_curator_status'],
|
|
95
|
+
summary: 'Preview and apply active, stale, and archived lifecycle changes with snapshot receipts, rollback, pinned protection, and no deletion.',
|
|
96
|
+
tags: ['curator', 'lesson', 'lifecycle', 'snapshot', 'rollback', 'runtime'],
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
id: 'recall-db',
|
|
100
|
+
title: 'Build and search local recall index',
|
|
101
|
+
phase: 'self-improve-90-e6-phase-5',
|
|
102
|
+
commands: ['neurain recall status <folder>', 'neurain recall rebuild <folder>', 'neurain recall search <folder> <query>', 'neurain mcp tool neurain_recall_search'],
|
|
103
|
+
summary: 'Use an optional rebuildable SQLite FTS5 cache for cross-session recall while keeping markdown, events, and receipts as canonical truth.',
|
|
104
|
+
tags: ['recall', 'sqlite', 'fts5', 'session', 'search', 'runtime'],
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
id: 'lesson-list',
|
|
108
|
+
title: 'Read active lessons',
|
|
109
|
+
phase: 'runtime-v0.3',
|
|
110
|
+
commands: ['neurain lessons list <folder>', 'neurain mcp tool neurain_lessons_list'],
|
|
111
|
+
summary: 'Load cover-level lessons first so agents can avoid repeated mistakes without loading bulky bodies.',
|
|
112
|
+
tags: ['lesson', 'memory', 'runtime', 'startup'],
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
id: 'lesson-candidates',
|
|
116
|
+
title: 'Preview lesson candidates',
|
|
117
|
+
phase: 'runtime-v0.3',
|
|
118
|
+
commands: ['neurain lessons candidates <folder>', 'neurain mcp tool neurain_lessons_candidates'],
|
|
119
|
+
summary: 'Inspect logs and current context for recurring corrections. Produces preview candidates only, not durable lessons.',
|
|
120
|
+
tags: ['lesson', 'candidate', 'wrap', 'curator'],
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
id: 'wrap-dry-run',
|
|
124
|
+
title: 'End-of-work self-improvement preview',
|
|
125
|
+
phase: 'runtime-v0.3',
|
|
126
|
+
commands: ['neurain wrap <folder> --dry-run'],
|
|
127
|
+
summary: 'Combines doctor, recap, lesson candidate preview, and capability hints so the next session starts better.',
|
|
128
|
+
tags: ['wrap', 'recap', 'lesson', 'self-improvement'],
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
id: 'session-recap',
|
|
132
|
+
title: 'Create deterministic session recap',
|
|
133
|
+
phase: 'runtime-v0.3',
|
|
134
|
+
commands: ['neurain recap <folder>'],
|
|
135
|
+
summary: 'Summarizes recent logs and area briefs without calling a model or writing durable knowledge.',
|
|
136
|
+
tags: ['recap', 'session', 'handoff', 'compression'],
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
id: 'first-run-onboarding',
|
|
140
|
+
title: 'Show first-run onboarding',
|
|
141
|
+
phase: 'e24-onboarding',
|
|
142
|
+
commands: ['neurain onboard <folder> --lang ko', 'neurain onboard <folder> --host gemini --json'],
|
|
143
|
+
summary: 'Read-only non-developer guide that explains whether to initialize, scan, connect, or preview wrap next.',
|
|
144
|
+
tags: ['onboarding', 'first-run', 'non-developer', 'read-only'],
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
id: 'host-connect',
|
|
148
|
+
title: 'Connect Codex, Claude Code, Gemini CLI, or Runtime',
|
|
149
|
+
phase: 'core',
|
|
150
|
+
commands: ['neurain connect codex <folder> --dry-run', 'neurain connect claude <folder> --dry-run', 'neurain connect gemini <folder> --dry-run', 'neurain connect runtime <folder> --dry-run'],
|
|
151
|
+
summary: 'Print host-specific MCP setup commands. Dry-run mode changes no host configuration.',
|
|
152
|
+
tags: ['mcp', 'codex', 'claude', 'gemini', 'runtime', 'connect'],
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
id: 'adoption-rollback',
|
|
156
|
+
title: 'Rollback adoption adapter writes',
|
|
157
|
+
phase: 'core',
|
|
158
|
+
commands: ['neurain adopt --rollback <receipt> --root <folder>'],
|
|
159
|
+
summary: 'Remove only files listed in the adoption receipt and refuse changed files.',
|
|
160
|
+
tags: ['rollback', 'receipt', 'safety'],
|
|
161
|
+
},
|
|
162
|
+
];
|
|
163
|
+
|
|
164
|
+
export function listCapabilities({ query = '', limit = 20 } = {}) {
|
|
165
|
+
const needle = String(query || '').trim().toLowerCase();
|
|
166
|
+
const ranked = capabilityCards
|
|
167
|
+
.map((card) => ({ ...card, score: scoreCard(card, needle) }))
|
|
168
|
+
.filter((card) => !needle || card.score > 0)
|
|
169
|
+
.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id))
|
|
170
|
+
.slice(0, Number(limit || 20))
|
|
171
|
+
.map(({ score, ...card }) => ({
|
|
172
|
+
...card,
|
|
173
|
+
reason: needle ? reasonFor(card, needle, score) : 'listed as part of the Neurain alpha capability contract',
|
|
174
|
+
}));
|
|
175
|
+
return ranked;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export async function capabilitiesCommand(args) {
|
|
179
|
+
const query = args.query || args._.join(' ');
|
|
180
|
+
const capabilities = listCapabilities({ query, limit: Number(args.top || 20) });
|
|
181
|
+
const payload = {
|
|
182
|
+
ok: true,
|
|
183
|
+
command: 'capabilities',
|
|
184
|
+
query: query || null,
|
|
185
|
+
count: capabilities.length,
|
|
186
|
+
capabilities,
|
|
187
|
+
};
|
|
188
|
+
if (args.json) return { json: true, payload };
|
|
189
|
+
return {
|
|
190
|
+
text: [
|
|
191
|
+
'# Neurain capabilities',
|
|
192
|
+
'',
|
|
193
|
+
query ? `Query: ${query}` : 'Query: all',
|
|
194
|
+
'',
|
|
195
|
+
...capabilities.map((card) => [
|
|
196
|
+
`## ${card.id}`,
|
|
197
|
+
`- Title: ${card.title}`,
|
|
198
|
+
`- Phase: ${card.phase}`,
|
|
199
|
+
`- Use: ${card.commands[0]}`,
|
|
200
|
+
`- Why: ${card.reason}`,
|
|
201
|
+
].join('\n')),
|
|
202
|
+
].join('\n'),
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function scoreCard(card, needle) {
|
|
207
|
+
if (!needle) return 1;
|
|
208
|
+
const words = needle.split(/\s+/).filter(Boolean);
|
|
209
|
+
const haystack = `${card.id} ${card.title} ${card.summary} ${card.tags.join(' ')} ${card.commands.join(' ')}`.toLowerCase();
|
|
210
|
+
return words.reduce((score, word) => score + (haystack.includes(word) ? 1 : 0), 0);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function reasonFor(card, needle, score) {
|
|
214
|
+
return score > 0
|
|
215
|
+
? `matched "${needle}" against ${card.tags.join(', ')}`
|
|
216
|
+
: `available capability ${card.id}`;
|
|
217
|
+
}
|