neurain 0.1.0-alpha.7 → 0.1.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,11 @@
4
4
 
5
5
  - No unreleased changes recorded.
6
6
 
7
+ ## 0.1.0-alpha.8
8
+
9
+ - Privacy (exact recall freshness): the exact-token branch reads from the SQLite FTS index, which is a cache rebuilt only on demand, so a markdown file that was public when indexed but has since turned `sensitivity: private`, been deleted, or gained a secret could linger in `recall search` and `hybrid-search` exact results until the next rebuild. `searchRecall` now re-gates the returned markdown paths against the CURRENT files (same private/secret/exists gate the markdown branches already apply) and drops any that are no longer safe. Only the top-K returned paths are re-read, so the exact branch stays fast (~4ms warm); a fresh index drops nothing, so results are unchanged (golden 9/9 identical). Event and receipt rows keep their own collection-time gating and pass through. Added `test/perf_recall_equivalence.test.mjs` coverage that a public-then-private and a deleted file do not surface from a stale index.
10
+
11
+
7
12
  ## 0.1.0-alpha.7
8
13
 
9
14
  - Hardening (recall perf, from an adversarial review): lock the "byte-identical results" claim and tighten the fast-path contracts, with no change to ranking/scores (golden-identical).
package/README.md CHANGED
@@ -204,7 +204,7 @@ It exposes read/capture/scan/preview tools only. It does not silently compile, p
204
204
 
205
205
  ## Status
206
206
 
207
- This is `0.1.0-alpha.7`. It is not a public SaaS GA release. The alpha exists to prove installability, local-first onboarding, Codex, Claude, Gemini, and Runtime connectivity, plus safety receipts.
207
+ This is `0.1.0-alpha.8`. It is not a public SaaS GA release. The alpha exists to prove installability, local-first onboarding, Codex, Claude, Gemini, and Runtime connectivity, plus safety receipts.
208
208
 
209
209
  Alpha publish command:
210
210
 
@@ -2,8 +2,8 @@
2
2
 
3
3
  Version: v0.1
4
4
  Last updated: 2026-06-20 KST
5
- Package: `neurain@0.1.0-alpha.7`
6
- Latest documented commit: `18bbb9f perf(recall): lock byte-identical claim in CI + harden fast-path contracts`
5
+ Package: `neurain@0.1.0-alpha.8`
6
+ Latest documented commit: `ba1028e fix(recall): re-gate exact (FTS) results against current files to drop stale-private`
7
7
 
8
8
  This document is the canonical product development snapshot for the public package. It tracks what is shipped, what has evidence, and what must not be claimed yet.
9
9
 
@@ -2,8 +2,8 @@
2
2
 
3
3
  Version: v0.1
4
4
  Last updated: 2026-06-20 KST
5
- Package: `neurain@0.1.0-alpha.7`
6
- Latest documented commit: `18bbb9f perf(recall): lock byte-identical claim in CI + harden fast-path contracts`
5
+ Package: `neurain@0.1.0-alpha.8`
6
+ Latest documented commit: `ba1028e fix(recall): re-gate exact (FTS) results against current files to drop stale-private`
7
7
 
8
8
  이 문서는 public package 기준의 canonical 개발 상태 스냅샷입니다. 무엇이 shipped인지, 어떤 증거가 있는지, 아직 주장하면 안 되는 것이 무엇인지 함께 기록합니다.
9
9
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "neurain",
3
- "version": "0.1.0-alpha.7",
3
+ "version": "0.1.0-alpha.8",
4
4
  "description": "Local-first Neurain Knowledge OS CLI and MCP connector.",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",
@@ -7,7 +7,7 @@ import { inferSensitivityFromPath } from './safety.mjs';
7
7
  import { alternativeForm, getProvider, tokenize } from './semantic.mjs';
8
8
  import { recallConfig } from './config.mjs';
9
9
  import { createSensitivityResolver } from './labels.mjs';
10
- import { kindForPath, listRecallMarkdownFiles, recallConfigErrors, resolveAreaDir, safeToIndex, scopeForArea, scopeForPath, titleForText } from './recall_corpus.mjs';
10
+ import { filterCurrentlySafePaths, kindForPath, listRecallMarkdownFiles, recallConfigErrors, resolveAreaDir, safeToIndex, scopeForArea, scopeForPath, titleForText } from './recall_corpus.mjs';
11
11
  import { buildLexicalContext, lexicalSearchWithContext } from './recall_lexical.mjs';
12
12
  import { benchRecall, scorecardRecall } from './recall_bench.mjs';
13
13
 
@@ -158,17 +158,28 @@ export async function searchRecall(root, query, { top = 10, host = '', fallback
158
158
  ORDER BY rank ASC, d.path ASC
159
159
  LIMIT ?
160
160
  `).all(ftsQuery, String(host || ''), String(host || ''), scopeFilter, scopeFilter, limit);
161
- payload.results = rows.map((row) => ({
162
- path: row.path,
163
- kind: row.kind,
164
- host: row.host,
165
- scope: row.scope,
166
- sensitivity: row.sensitivity,
167
- title: row.title,
168
- snippet: row.snippet,
169
- source_hash: row.source_hash,
170
- score: Number((-Number(row.rank || 0)).toFixed(3)),
171
- }));
161
+ // The FTS index is a cache rebuilt only on demand, so re-gate the returned MARKDOWN
162
+ // paths against the CURRENT files: drop any row whose file has since turned private,
163
+ // been deleted, gained a secret, or left the corpus. Only the top-K paths are
164
+ // re-read, so the exact branch stays fast while never surfacing stale-private or
165
+ // deleted content. A fresh index drops nothing (results unchanged). Event/receipt
166
+ // rows use synthetic paths (a '#event' suffix or a non-.md receipt path) and carry
167
+ // their own collection-time gating, so they pass through unchanged.
168
+ const isMarkdownRow = (p) => p.endsWith('.md') && !p.includes('#');
169
+ const stillSafe = filterCurrentlySafePaths(root, recallConfig(root), rows.map((row) => row.path).filter(isMarkdownRow));
170
+ payload.results = rows
171
+ .filter((row) => !isMarkdownRow(row.path) || stillSafe.has(row.path))
172
+ .map((row) => ({
173
+ path: row.path,
174
+ kind: row.kind,
175
+ host: row.host,
176
+ scope: row.scope,
177
+ sensitivity: row.sensitivity,
178
+ title: row.title,
179
+ snippet: row.snippet,
180
+ source_hash: row.source_hash,
181
+ score: Number((-Number(row.rank || 0)).toFixed(3)),
182
+ }));
172
183
  } catch (error) {
173
184
  payload.ok = false;
174
185
  payload.error = error.message;
@@ -150,3 +150,29 @@ export function listRecallMarkdownFiles(root, recallCfg, { area = '' } = {}) {
150
150
  }
151
151
  return out;
152
152
  }
153
+
154
+ // Re-verify, against the CURRENT files, that specific paths still belong in the
155
+ // recall corpus right now (in-corpus + exists + not private + not secret/injection).
156
+ // The exact-token branch reads from a SQLite cache that is only rebuilt explicitly,
157
+ // so a file that was public when indexed but has since turned private or been
158
+ // deleted could otherwise linger in exact results until the next rebuild. Applying
159
+ // this same gate to the returned paths closes that staleness window. It re-reads
160
+ // only the handful of returned paths (top-K), not the whole corpus, so the exact
161
+ // branch stays fast. Returns a Set of the still-safe paths.
162
+ export function filterCurrentlySafePaths(root, recallCfg, rels) {
163
+ const resolver = createSensitivityResolver(root, recallCfg);
164
+ const matches = buildRecallPathMatcher(recallCfg);
165
+ const safe = new Set();
166
+ for (const rel of rels) {
167
+ if (safe.has(rel)) continue;
168
+ if (!matches(rel)) continue;
169
+ if (!isTextFile(rel)) continue;
170
+ const abs = path.join(root, rel);
171
+ if (!fs.existsSync(abs)) continue;
172
+ const text = readText(abs, '');
173
+ if (resolver.sensitivityFor(rel, text) === 'private') continue;
174
+ if (!safeToIndex(text)) continue;
175
+ safe.add(rel);
176
+ }
177
+ return safe;
178
+ }