@agfpd/iapeer-memory-core 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/archive.ts +88 -0
- package/src/context-render.ts +15 -11
- package/src/frontmatter-fill.ts +195 -6
- package/src/human-edit-detect.ts +27 -7
- package/src/index.ts +36 -1
- package/src/memoryd.ts +117 -7
- package/src/search.ts +46 -0
- package/src/tags-gate.ts +174 -0
- package/src/taxonomy.ts +74 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agfpd/iapeer-memory-core",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.9",
|
|
4
4
|
"description": "iapeer-memory core — host-neutral TypeScript memory primitive: vault schema/taxonomy config, search engine, memoryd, context renderer, role contracts. Consumed by the @agfpd/iapeer-memory facade; version kept in lockstep by its release flow (docs/10-distribution.md).",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
package/src/archive.ts
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic archiving — lean §2.2a.
|
|
3
|
+
*
|
|
4
|
+
* In lean, archiving leaves the Index overlay and becomes BASE (0 LLM): a
|
|
5
|
+
* note whose `status` is a FINAL token (`isStale` — устарело/завершён/…; «на
|
|
6
|
+
* паузе» is PENDING, not stale → a resumable note is never archived) is moved
|
|
7
|
+
* to the archive folder by memoryd. The decision is taxonomy, not judgement.
|
|
8
|
+
*
|
|
9
|
+
* Wikilinks resolve by TITLE, so the graph survives the move (edges are
|
|
10
|
+
* reindexed); the archive is NOT excluded from search (it stays findable with
|
|
11
|
+
* the stale boost). The move is flat (`07_Archive/<basename>`), collisions
|
|
12
|
+
* resolved with a numeric suffix.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import path from "node:path";
|
|
16
|
+
import { isStale, type TaxonomyPreset } from "./taxonomy.js";
|
|
17
|
+
|
|
18
|
+
/** First path segment of a vault-relative path. */
|
|
19
|
+
function firstSegment(relPath: string): string {
|
|
20
|
+
return relPath.split(/[\\/]/)[0] ?? "";
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Folders whose notes are subject to archiving — UNIFIED rule, no exceptions
|
|
25
|
+
* (decision Артур 15.06, §2.2a): the six monitored content folders (five
|
|
26
|
+
* canonical permanent + agent memory + `03_Projects`). NOT the archive
|
|
27
|
+
* itself, the inboxes, or the system folder. A completed phase/project
|
|
28
|
+
* (status `completed`/`cancelled`) is stale like any other note and moves to
|
|
29
|
+
* the archive; active `03_Projects` then shows only live work. Wikilinks
|
|
30
|
+
* survive by title; the archive stays searchable.
|
|
31
|
+
*/
|
|
32
|
+
export function isArchivableZone(relPath: string, taxonomy: TaxonomyPreset): boolean {
|
|
33
|
+
const f = taxonomy.folders;
|
|
34
|
+
const head = firstSegment(relPath);
|
|
35
|
+
return (
|
|
36
|
+
head === f.knowledge ||
|
|
37
|
+
head === f.decisions ||
|
|
38
|
+
head === f.projects ||
|
|
39
|
+
head === f.ideas ||
|
|
40
|
+
head === f.lists ||
|
|
41
|
+
head === f.agentMemory
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Read `status` from a note's frontmatter (null when absent/no frontmatter). */
|
|
46
|
+
export function statusOf(content: string): string | null {
|
|
47
|
+
const fm = /^---[^\S\n]*\n([\s\S]*?)\n---/.exec(content);
|
|
48
|
+
if (!fm) return null;
|
|
49
|
+
const m = /^status\s*:\s*(.+?)\s*$/m.exec(fm[1]);
|
|
50
|
+
return m ? m[1].trim() : null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Should this note be archived? In an archivable content zone AND carrying a
|
|
55
|
+
* final (stale) status. Notes already in the archive are excluded by
|
|
56
|
+
* `isArchivableZone` (the archive folder is not in the set).
|
|
57
|
+
*/
|
|
58
|
+
export function shouldArchive(
|
|
59
|
+
relPath: string,
|
|
60
|
+
content: string,
|
|
61
|
+
taxonomy: TaxonomyPreset,
|
|
62
|
+
): boolean {
|
|
63
|
+
if (!isArchivableZone(relPath, taxonomy)) return false;
|
|
64
|
+
return isStale(taxonomy, statusOf(content));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Flat archive target (vault-relative): `<archive>/<basename>`, with a numeric
|
|
69
|
+
* suffix on collision (`<stem>-2.md`, `-3.md`, …). `exists` answers whether a
|
|
70
|
+
* vault-relative path is already taken.
|
|
71
|
+
*/
|
|
72
|
+
export function archiveTargetRel(
|
|
73
|
+
basename: string,
|
|
74
|
+
taxonomy: TaxonomyPreset,
|
|
75
|
+
exists: (rel: string) => boolean,
|
|
76
|
+
): string {
|
|
77
|
+
const arch = taxonomy.folders.archive;
|
|
78
|
+
const isMd = basename.endsWith(".md");
|
|
79
|
+
const stem = isMd ? basename.slice(0, -3) : basename;
|
|
80
|
+
const ext = isMd ? ".md" : "";
|
|
81
|
+
let candidate = `${arch}/${basename}`;
|
|
82
|
+
let n = 2;
|
|
83
|
+
while (exists(candidate)) {
|
|
84
|
+
candidate = `${arch}/${stem}-${n}${ext}`;
|
|
85
|
+
n += 1;
|
|
86
|
+
}
|
|
87
|
+
return candidate;
|
|
88
|
+
}
|
package/src/context-render.ts
CHANGED
|
@@ -74,16 +74,20 @@ export type FragmentEnv = {
|
|
|
74
74
|
};
|
|
75
75
|
/** Rendered author index file (capped variant), absolute path. */
|
|
76
76
|
authorIndexPath: string;
|
|
77
|
-
/**
|
|
78
|
-
|
|
77
|
+
/** Compact tags-dictionary projection, absolute path — injected to EVERY
|
|
78
|
+
* author in lean (§3), not just the Index. */
|
|
79
|
+
tagsProjectionPath?: string;
|
|
80
|
+
/** Layer title for the projection (the dictionary file name, e.g. `Теги.md`);
|
|
81
|
+
* defaults to the projection file basename. */
|
|
82
|
+
tagsTitle?: string;
|
|
79
83
|
};
|
|
80
84
|
|
|
81
85
|
/**
|
|
82
86
|
* Assemble the per-peer fragment layers in the reference build_layers
|
|
83
|
-
* order
|
|
84
|
-
*
|
|
85
|
-
* is the role doctrine; the guide arrives host-wide by layer
|
|
86
|
-
* Missing/empty sources are skipped gracefully.
|
|
87
|
+
* order: paths → tags-dictionary projection (lean §3: ALL authors now, not
|
|
88
|
+
* only the Index) → author index. The curator gets no writer guide — its
|
|
89
|
+
* contract is the role doctrine; the guide arrives host-wide by layer
|
|
90
|
+
* mechanics. Missing/empty sources are skipped gracefully.
|
|
87
91
|
*/
|
|
88
92
|
export function buildLayers(env: FragmentEnv): ContextLayer[] {
|
|
89
93
|
const layers: ContextLayer[] = [];
|
|
@@ -101,11 +105,11 @@ export function buildLayers(env: FragmentEnv): ContextLayer[] {
|
|
|
101
105
|
].join("\n");
|
|
102
106
|
layers.push(["iapeer-memory paths", pathsBlock]);
|
|
103
107
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
108
|
+
if (env.tagsProjectionPath) {
|
|
109
|
+
const tags = readFileOrEmpty(env.tagsProjectionPath);
|
|
110
|
+
if (tags.trim()) {
|
|
111
|
+
layers.push([env.tagsTitle || path.basename(env.tagsProjectionPath), tags]);
|
|
112
|
+
}
|
|
109
113
|
}
|
|
110
114
|
|
|
111
115
|
const idx = readFileOrEmpty(env.authorIndexPath);
|
package/src/frontmatter-fill.ts
CHANGED
|
@@ -42,7 +42,7 @@ import fs from "node:fs";
|
|
|
42
42
|
import path from "node:path";
|
|
43
43
|
import crypto from "node:crypto";
|
|
44
44
|
import type { TaxonomyPreset } from "./taxonomy.js";
|
|
45
|
-
import { DEFAULT_CURATOR_SET } from "./taxonomy.js";
|
|
45
|
+
import { DEFAULT_CURATOR_SET, genreForFolder, linksSectionPattern } from "./taxonomy.js";
|
|
46
46
|
import { guardedWriteFileSync, guardedUnlinkSync } from "./fs-guard.js";
|
|
47
47
|
|
|
48
48
|
const FRONTMATTER_RE = /^---[^\S\n]*\n([\s\S]*?\n)---[^\S\n]*(?:\n|$)/;
|
|
@@ -96,6 +96,70 @@ export function setIfMissing(block: string, key: string, value: string): string
|
|
|
96
96
|
return `${block}${key}: ${value}\n`;
|
|
97
97
|
}
|
|
98
98
|
|
|
99
|
+
/** Read a scalar field value, or null when absent. */
|
|
100
|
+
export function readScalar(block: string, key: string): string | null {
|
|
101
|
+
const m = new RegExp(`^${escapeRe(key)}\\s*:\\s*(.+?)\\s*$`, "m").exec(block);
|
|
102
|
+
return m ? m[1].trim() : null;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/** Parse a YAML list field (block-list ` - item` or inline `[a, b]`). */
|
|
106
|
+
export function parseListField(block: string, key: string): string[] {
|
|
107
|
+
const lines = block.split("\n");
|
|
108
|
+
for (let i = 0; i < lines.length; i++) {
|
|
109
|
+
const m = new RegExp(`^${escapeRe(key)}\\s*:\\s*(.*)$`).exec(lines[i]);
|
|
110
|
+
if (!m) continue;
|
|
111
|
+
const inline = m[1].trim();
|
|
112
|
+
if (inline) {
|
|
113
|
+
return inline
|
|
114
|
+
.replace(/^\[/, "")
|
|
115
|
+
.replace(/\]$/, "")
|
|
116
|
+
.split(",")
|
|
117
|
+
.map((s) => s.trim().replace(/^["']|["']$/g, ""))
|
|
118
|
+
.filter(Boolean);
|
|
119
|
+
}
|
|
120
|
+
const out: string[] = [];
|
|
121
|
+
for (let j = i + 1; j < lines.length; j++) {
|
|
122
|
+
const item = /^\s+-\s+(.*)$/.exec(lines[j]);
|
|
123
|
+
if (!item) break;
|
|
124
|
+
const v = item[1].trim().replace(/^["']|["']$/g, "");
|
|
125
|
+
if (v) out.push(v);
|
|
126
|
+
}
|
|
127
|
+
return out;
|
|
128
|
+
}
|
|
129
|
+
return [];
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/** Remove a list field entirely (its `key:` line + any ` - item` lines). */
|
|
133
|
+
function removeListField(block: string, key: string): string {
|
|
134
|
+
const lines = block.split("\n");
|
|
135
|
+
const out: string[] = [];
|
|
136
|
+
const head = new RegExp(`^${escapeRe(key)}\\s*:`);
|
|
137
|
+
for (let i = 0; i < lines.length; i++) {
|
|
138
|
+
if (head.test(lines[i])) {
|
|
139
|
+
// skip the key line and the following block-list items
|
|
140
|
+
while (i + 1 < lines.length && /^\s+-\s/.test(lines[i + 1])) i++;
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
out.push(lines[i]);
|
|
144
|
+
}
|
|
145
|
+
return out.join("\n");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Append `name` to `coauthors` (lean §3a auto-coauthor). No-op if already
|
|
150
|
+
* present. Rewrites the field as a normalised block-list at the end of the
|
|
151
|
+
* frontmatter — idempotent once the name is in the list.
|
|
152
|
+
*/
|
|
153
|
+
export function addCoauthor(block: string, name: string): string {
|
|
154
|
+
const existing = parseListField(block, "coauthors");
|
|
155
|
+
if (existing.includes(name)) return block;
|
|
156
|
+
let b = removeListField(block, "coauthors");
|
|
157
|
+
if (b && !b.endsWith("\n")) b += "\n";
|
|
158
|
+
const all = [...existing, name];
|
|
159
|
+
b += "coauthors:\n" + all.map((n) => ` - ${n}`).join("\n") + "\n";
|
|
160
|
+
return b;
|
|
161
|
+
}
|
|
162
|
+
|
|
99
163
|
/** Returns [fmBlock, rest]. No frontmatter → ["", content]. */
|
|
100
164
|
export function splitFrontmatter(content: string): [string, string] {
|
|
101
165
|
const m = FRONTMATTER_RE.exec(content);
|
|
@@ -197,14 +261,64 @@ export function fillInbox(
|
|
|
197
261
|
return fmBlock;
|
|
198
262
|
}
|
|
199
263
|
|
|
200
|
-
|
|
264
|
+
/**
|
|
265
|
+
* Full permanent-zone (canon) fill — the lean §2 «страж» core, SHARED between
|
|
266
|
+
* the post-write hook (`processFile`) and the human-edit detector
|
|
267
|
+
* (`decideUpdate`), so an agent's write and a human's external write get
|
|
268
|
+
* identical deterministic frontmatter (mandate §2: «ОБЩАЯ логика из 2 путей»).
|
|
269
|
+
*
|
|
270
|
+
* Before lean the permanent branch was a near-empty stamp (canon frontmatter
|
|
271
|
+
* was supplied by the Index on placement). In lean the author writes only
|
|
272
|
+
* body + tags + organic inline links + a self-describing title; everything
|
|
273
|
+
* here is derived deterministically (0 LLM):
|
|
274
|
+
* - `title` ← file name; `type`/`status` ← the FOLDER's genre (§2.1);
|
|
275
|
+
* `created` ← today; `author` ← the writer (non-curator);
|
|
276
|
+
* - `last_edited_by`/`updated` ← the stamp pair (always upserted).
|
|
277
|
+
*/
|
|
278
|
+
export function fillPermanentFull(
|
|
201
279
|
fmBlock: string,
|
|
202
|
-
opts: {
|
|
280
|
+
opts: {
|
|
281
|
+
path: string;
|
|
282
|
+
agent: string;
|
|
283
|
+
vault: string;
|
|
284
|
+
today: string;
|
|
285
|
+
nowStamp: string;
|
|
286
|
+
ctx: FillContext;
|
|
287
|
+
},
|
|
203
288
|
): string {
|
|
289
|
+
const { taxonomy } = opts.ctx;
|
|
290
|
+
// Service stamp (always) — load-bearing for smart-hash echo-safety and the
|
|
291
|
+
// unstamped detector, symmetric with fillInbox/fillMemory.
|
|
204
292
|
fmBlock = upsert(fmBlock, "last_edited_by", opts.agent);
|
|
205
293
|
fmBlock = upsert(fmBlock, "updated", opts.nowStamp);
|
|
294
|
+
// Canon frontmatter the author no longer hand-writes (§2.1). setIfMissing —
|
|
295
|
+
// an explicit author value is never clobbered; a re-edit of an existing note
|
|
296
|
+
// is a stamp-only no-op on these.
|
|
297
|
+
fmBlock = setIfMissing(fmBlock, "title", basenameNoExt(opts.path));
|
|
298
|
+
const folder = opts.vault ? relParts(opts.path, opts.vault)?.[0] : undefined;
|
|
299
|
+
const genre = folder ? genreForFolder(taxonomy, folder) : null;
|
|
300
|
+
if (genre) {
|
|
301
|
+
fmBlock = setIfMissing(fmBlock, "type", genre.type);
|
|
302
|
+
fmBlock = setIfMissing(fmBlock, "status", genre.initialStatus);
|
|
303
|
+
}
|
|
304
|
+
fmBlock = setIfMissing(fmBlock, "created", opts.today);
|
|
305
|
+
// AUTHOR GUARD (Артур's invariant; fork-1 decision boris 15.06, §3a): a
|
|
306
|
+
// curator (Index/Scriber/DreamWeaver) edits canon STRUCTURE, never authors
|
|
307
|
+
// content — it never becomes `author` (nor, L2, `coauthors`). Same guard as
|
|
308
|
+
// the inbox branch. A non-curator writing canon IS the author. `needs_review`
|
|
309
|
+
// is the guard's flag, lifted only by Index/human.
|
|
206
310
|
if (!isCurator(opts.agent, opts.ctx)) {
|
|
311
|
+
fmBlock = setIfMissing(fmBlock, "author", opts.agent);
|
|
207
312
|
fmBlock = upsert(fmBlock, "needs_review", "true");
|
|
313
|
+
// §3a auto-coauthor: a non-curator who edits a canon note authored by
|
|
314
|
+
// SOMEONE ELSE is recorded as a coauthor — content collaboration that
|
|
315
|
+
// kills duplicates. Curators are excluded (fork-1 boris 15.06: they edit
|
|
316
|
+
// STRUCTURE, not content — same guard as the author guard). `author` is
|
|
317
|
+
// immutable; only `coauthors` grows. On a NEW note author===agent → no-op.
|
|
318
|
+
const author = readScalar(fmBlock, "author");
|
|
319
|
+
if (author && author !== opts.agent) {
|
|
320
|
+
fmBlock = addCoauthor(fmBlock, opts.agent);
|
|
321
|
+
}
|
|
208
322
|
}
|
|
209
323
|
return fmBlock;
|
|
210
324
|
}
|
|
@@ -417,6 +531,71 @@ export function normalizeFields(
|
|
|
417
531
|
return lines.join("\n");
|
|
418
532
|
}
|
|
419
533
|
|
|
534
|
+
/**
|
|
535
|
+
* YAML-safe normalisation of EVERY scalar frontmatter field (lean §2.2). Before
|
|
536
|
+
* lean only `description` was normalised; the «`: ` inside a plain scalar»
|
|
537
|
+
* failure (incident 49/538 unparseable) applies to ANY field — title, status,
|
|
538
|
+
* author, etc. Block-list fields (`tags`, `coauthors`) are EXCLUDED: they use
|
|
539
|
+
* the ` - item` form, and the inline `[..]`/`: ` heuristic would corrupt them.
|
|
540
|
+
* Idempotent (a clean-quoted value is left untouched).
|
|
541
|
+
*/
|
|
542
|
+
export function normalizeAllScalars(
|
|
543
|
+
fmBlock: string,
|
|
544
|
+
excludeKeys: readonly string[] = EMPTY_ARRAY_KEYS,
|
|
545
|
+
): string {
|
|
546
|
+
const lines = fmBlock.split("\n");
|
|
547
|
+
for (let idx = 0; idx < lines.length; idx++) {
|
|
548
|
+
const m = NORMALIZE_LINE_RE.exec(lines[idx]);
|
|
549
|
+
if (!m || excludeKeys.includes(m[1])) continue;
|
|
550
|
+
const newVal = normalizeScalarValue(m[2]);
|
|
551
|
+
if (newVal !== null) lines[idx] = `${m[1]}: ${newVal}`;
|
|
552
|
+
}
|
|
553
|
+
return lines.join("\n");
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
/** Markdown thematic break: `---`, `***`, `___`, optionally spaced. */
|
|
557
|
+
const HR_LINE_RE = /^[ \t]*([-*_])(?:[ \t]*\1){2,}[ \t]*$/;
|
|
558
|
+
|
|
559
|
+
/**
|
|
560
|
+
* A line that is a fuzzy match of the links-section heading: any `#` level,
|
|
561
|
+
* any spacing, any case, but the section text EXACTLY (so `## Связанные…`
|
|
562
|
+
* never matches `## Связи`).
|
|
563
|
+
*/
|
|
564
|
+
function isFuzzyLinksHeading(line: string, taxonomy: TaxonomyPreset): boolean {
|
|
565
|
+
const sectionText = taxonomy.linksSection.replace(/^#+\s*/, "");
|
|
566
|
+
const esc = sectionText.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
567
|
+
return new RegExp(`^#{1,6}\\s*${esc}\\s*$`, "i").test(line.trim());
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
/**
|
|
571
|
+
* Make a leading links-section block recognisable to the parser's
|
|
572
|
+
* `stripLinksSection` (heading at body start + a `---` divider), so the block
|
|
573
|
+
* is cut from search/embedding content instead of polluting BM25 with
|
|
574
|
+
* popular-target false hits (lean §2.2). CONSERVATIVE and mechanical (§10.2):
|
|
575
|
+
* only the heading line FORM and the block's HR divider are rewritten — never
|
|
576
|
+
* content, nothing inserted or moved. Body without a leading links heading →
|
|
577
|
+
* no-op. Idempotent.
|
|
578
|
+
*/
|
|
579
|
+
export function normalizeLinksBlock(body: string, taxonomy: TaxonomyPreset): string {
|
|
580
|
+
const lines = body.split("\n");
|
|
581
|
+
let i = 0;
|
|
582
|
+
while (i < lines.length && lines[i].trim() === "") i++;
|
|
583
|
+
if (i >= lines.length || !isFuzzyLinksHeading(lines[i], taxonomy)) return body;
|
|
584
|
+
if (lines[i] !== taxonomy.linksSection) lines[i] = taxonomy.linksSection;
|
|
585
|
+
// Normalise the block's first HR divider to `---`. Scan only across the link
|
|
586
|
+
// list (`- …` / `* …` items and blanks); a content line means no divider —
|
|
587
|
+
// leave it (the heading fix alone is the safe part).
|
|
588
|
+
for (let j = i + 1; j < lines.length; j++) {
|
|
589
|
+
if (HR_LINE_RE.test(lines[j])) {
|
|
590
|
+
if (lines[j].trim() !== "---") lines[j] = "---";
|
|
591
|
+
break;
|
|
592
|
+
}
|
|
593
|
+
const t = lines[j].trim();
|
|
594
|
+
if (t !== "" && !t.startsWith("-") && !t.startsWith("*")) break;
|
|
595
|
+
}
|
|
596
|
+
return lines.join("\n");
|
|
597
|
+
}
|
|
598
|
+
|
|
420
599
|
/**
|
|
421
600
|
* Assemble the new file. A body not starting with a newline gets one, so the
|
|
422
601
|
* markdown parser sees the frontmatter separately from the first paragraph.
|
|
@@ -509,10 +688,19 @@ export function processFile(filePath: string, opts: ProcessOptions): boolean {
|
|
|
509
688
|
const [fmBlock, rest] = splitFrontmatter(content);
|
|
510
689
|
|
|
511
690
|
let newFm: string | null;
|
|
691
|
+
let newBody = rest;
|
|
512
692
|
if (zone === "inbox") {
|
|
513
693
|
newFm = fillInbox(fmBlock, { path: filePath, agent: opts.agent, today, nowStamp, ctx });
|
|
514
694
|
} else if (zone === "permanent") {
|
|
515
|
-
newFm =
|
|
695
|
+
newFm = fillPermanentFull(fmBlock, {
|
|
696
|
+
path: filePath,
|
|
697
|
+
agent: opts.agent,
|
|
698
|
+
vault,
|
|
699
|
+
today,
|
|
700
|
+
nowStamp,
|
|
701
|
+
ctx,
|
|
702
|
+
});
|
|
703
|
+
newBody = normalizeLinksBlock(rest, opts.taxonomy);
|
|
516
704
|
} else {
|
|
517
705
|
newFm = fillMemory(fmBlock, {
|
|
518
706
|
path: filePath,
|
|
@@ -523,11 +711,12 @@ export function processFile(filePath: string, opts: ProcessOptions): boolean {
|
|
|
523
711
|
ctx,
|
|
524
712
|
});
|
|
525
713
|
if (newFm === null) return false;
|
|
714
|
+
newBody = normalizeLinksBlock(rest, opts.taxonomy);
|
|
526
715
|
}
|
|
527
716
|
|
|
528
717
|
newFm = stripEmptyArrays(newFm);
|
|
529
|
-
newFm =
|
|
530
|
-
const newContent = assemble(newFm,
|
|
718
|
+
newFm = normalizeAllScalars(newFm);
|
|
719
|
+
const newContent = assemble(newFm, newBody);
|
|
531
720
|
if (newContent === content) return false;
|
|
532
721
|
atomicWrite(filePath, newContent);
|
|
533
722
|
return true;
|
package/src/human-edit-detect.ts
CHANGED
|
@@ -32,6 +32,7 @@
|
|
|
32
32
|
import crypto from "node:crypto";
|
|
33
33
|
import path from "node:path";
|
|
34
34
|
import type { TaxonomyPreset } from "./taxonomy.js";
|
|
35
|
+
import { fillPermanentFull } from "./frontmatter-fill.js";
|
|
35
36
|
|
|
36
37
|
export const DEFAULT_FRESH_EDIT_WINDOW_S = 90;
|
|
37
38
|
|
|
@@ -116,6 +117,11 @@ export type DecideUpdateInput = {
|
|
|
116
117
|
birthtimeMs: number;
|
|
117
118
|
mtimeMs: number;
|
|
118
119
|
basename: string;
|
|
120
|
+
/** Absolute file path — the permanent branch derives the folder's genre
|
|
121
|
+
* (type/status) from it via the shared `fillPermanentFull` (lean §2.1). */
|
|
122
|
+
path: string;
|
|
123
|
+
/** Vault root — folder resolution for the genre lookup. */
|
|
124
|
+
vault: string;
|
|
119
125
|
lastHash: string | null;
|
|
120
126
|
taxonomy: TaxonomyPreset;
|
|
121
127
|
/** Config (default DEFAULT_FRESH_EDIT_WINDOW_S). */
|
|
@@ -147,12 +153,14 @@ export function decideUpdate(input: DecideUpdateInput): DecideUpdateResult {
|
|
|
147
153
|
if (fmMatch) {
|
|
148
154
|
fmBlock = fmMatch[1];
|
|
149
155
|
body = input.content.slice(fmMatch[0].length);
|
|
150
|
-
} else
|
|
156
|
+
} else {
|
|
157
|
+
// Bare body (no frontmatter) — both zones BUILD it now. In lean the guard
|
|
158
|
+
// must complete a human's bare canon note, not skip it (§2.2: «голое тело
|
|
159
|
+
// человека страж должен ДОСТРОИТЬ»; the pre-lean «permanent-no-frontmatter
|
|
160
|
+
// → skip» is removed — canon frontmatter is the guard's job now, not the
|
|
161
|
+
// Index's on placement).
|
|
151
162
|
fmBlock = "";
|
|
152
163
|
body = input.content;
|
|
153
|
-
} else {
|
|
154
|
-
// PERMANENT without frontmatter — not our zone.
|
|
155
|
-
return { action: "skip", recordHash: null, reason: "permanent-no-frontmatter" };
|
|
156
164
|
}
|
|
157
165
|
|
|
158
166
|
const lebMatch = /^last_edited_by\s*:\s*(.+?)\s*$/m.exec(fmBlock);
|
|
@@ -192,9 +200,21 @@ export function decideUpdate(input: DecideUpdateInput): DecideUpdateResult {
|
|
|
192
200
|
newFm = setIfMissing(newFm, "author", input.human);
|
|
193
201
|
newFm = setIfMissing(newFm, "needs_review", "true");
|
|
194
202
|
} else {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
203
|
+
// PERMANENT (canon) — the SHARED guard fill (mandate §2: identical to the
|
|
204
|
+
// hook path). Existing notes: stamp-only no-op on the constants; a human's
|
|
205
|
+
// bare-body canon note: full frontmatter (title/type-from-folder/status/
|
|
206
|
+
// created/author). `created` ← birthtime (file creation, not edit time).
|
|
207
|
+
const createdSource =
|
|
208
|
+
input.birthtimeMs > 0 ? new Date(input.birthtimeMs) : new Date(input.mtimeMs);
|
|
209
|
+
const createdDate = createdSource.toISOString().slice(0, 10);
|
|
210
|
+
newFm = fillPermanentFull(newFm, {
|
|
211
|
+
path: input.path,
|
|
212
|
+
agent: input.human,
|
|
213
|
+
vault: input.vault,
|
|
214
|
+
today: createdDate,
|
|
215
|
+
nowStamp,
|
|
216
|
+
ctx: { taxonomy: input.taxonomy },
|
|
217
|
+
});
|
|
198
218
|
}
|
|
199
219
|
|
|
200
220
|
if (newFm === fmBlock) {
|
package/src/index.ts
CHANGED
|
@@ -20,17 +20,52 @@ export {
|
|
|
20
20
|
getTaxonomy,
|
|
21
21
|
isLocaleId,
|
|
22
22
|
defaultExcludeFolders,
|
|
23
|
+
genreForFolder,
|
|
24
|
+
isStale,
|
|
25
|
+
statusGroup,
|
|
23
26
|
DEFAULT_CURATOR_SET,
|
|
24
27
|
DEFAULT_RANKING,
|
|
25
28
|
type LocaleId,
|
|
26
29
|
type RankingConfig,
|
|
27
30
|
type TaxonomyPreset,
|
|
31
|
+
type TaxonomyInitialStatus,
|
|
28
32
|
} from "./taxonomy.js";
|
|
29
33
|
|
|
30
34
|
// frontmatter: post-write fill + structural fm-update (CLI contract in module header)
|
|
31
|
-
export {
|
|
35
|
+
export {
|
|
36
|
+
processFile,
|
|
37
|
+
resolveAgentName,
|
|
38
|
+
splitFrontmatter,
|
|
39
|
+
resolveZone,
|
|
40
|
+
type ProcessOptions,
|
|
41
|
+
type Zone,
|
|
42
|
+
} from "./frontmatter-fill.js";
|
|
32
43
|
export { fmUpdate, collectOps, yamlSafeScalar, type FmUpdateOptions, type Op } from "./fm-update.js";
|
|
33
44
|
|
|
45
|
+
// deterministic archiving (lean §2.2a)
|
|
46
|
+
export {
|
|
47
|
+
isArchivableZone,
|
|
48
|
+
statusOf,
|
|
49
|
+
shouldArchive,
|
|
50
|
+
archiveTargetRel,
|
|
51
|
+
} from "./archive.js";
|
|
52
|
+
export { snapshotVault } from "./permanent-detect.js";
|
|
53
|
+
|
|
54
|
+
// tag gate + injected dictionary projection (lean §3)
|
|
55
|
+
export { tagsDictionarySourceRel } from "./tags-mirror.js";
|
|
56
|
+
export {
|
|
57
|
+
parseDictionaryEntries,
|
|
58
|
+
parseDictionaryTags,
|
|
59
|
+
isTagAllowed,
|
|
60
|
+
parseNoteTags,
|
|
61
|
+
tagGateProblems,
|
|
62
|
+
renderTagsProjection,
|
|
63
|
+
DEFAULT_TAGS_BOUNDARY_MAXLEN,
|
|
64
|
+
type DictionaryEntry,
|
|
65
|
+
type TagGateOptions,
|
|
66
|
+
type ProjectionOptions,
|
|
67
|
+
} from "./tags-gate.js";
|
|
68
|
+
|
|
34
69
|
// author index rendering
|
|
35
70
|
export { regenerateVaultIndex, fullIndexPathFor, type RenderContext } from "./index-render.js";
|
|
36
71
|
|
package/src/memoryd.ts
CHANGED
|
@@ -39,8 +39,11 @@ import type { CoreConfig } from "./config.js";
|
|
|
39
39
|
import { openDatabase, type CoreDb } from "./db.js";
|
|
40
40
|
import { indexAll } from "./indexer.js";
|
|
41
41
|
import { runSearch, runGraph, runMap } from "./mcp-tools.js";
|
|
42
|
+
import { runDedup } from "./search.js";
|
|
42
43
|
import { decideUpdate, getZone, sha256 } from "./human-edit-detect.js";
|
|
43
44
|
import { decideMirror, tagsDictionarySourceRel } from "./tags-mirror.js";
|
|
45
|
+
import { renderTagsProjection, DEFAULT_TAGS_BOUNDARY_MAXLEN } from "./tags-gate.js";
|
|
46
|
+
import { isArchivableZone, shouldArchive, archiveTargetRel } from "./archive.js";
|
|
44
47
|
import {
|
|
45
48
|
snapshotVault,
|
|
46
49
|
snapshotInbox,
|
|
@@ -353,6 +356,39 @@ export async function startMcpHttp(opts: {
|
|
|
353
356
|
const httpServer = http.createServer((req, res) => {
|
|
354
357
|
void (async () => {
|
|
355
358
|
const url = (req.url ?? "").split("?")[0];
|
|
359
|
+
// Dedup hint (lean §3a) — a memoryd-INTERNAL RPC for the post-write hook,
|
|
360
|
+
// NOT an MCP tool (the MCP surface stays the three read tools). Loopback
|
|
361
|
+
// (host is 127.0.0.1) + read-only. The hook calls it fail-open with a
|
|
362
|
+
// short timeout, so a slow/down memoryd never hangs a write.
|
|
363
|
+
if (url === "/dedup" && req.method === "POST") {
|
|
364
|
+
const chunks: Buffer[] = [];
|
|
365
|
+
req.on("data", (c) => chunks.push(c as Buffer));
|
|
366
|
+
await new Promise<void>((resolve) => req.on("end", () => resolve()));
|
|
367
|
+
try {
|
|
368
|
+
const body = JSON.parse(Buffer.concat(chunks).toString("utf-8")) as {
|
|
369
|
+
content?: string;
|
|
370
|
+
threshold?: number;
|
|
371
|
+
limit?: number;
|
|
372
|
+
};
|
|
373
|
+
const content = (body.content ?? "").trim();
|
|
374
|
+
const result = content
|
|
375
|
+
? await runDedup(opts.db, opts.config, {
|
|
376
|
+
content,
|
|
377
|
+
threshold: body.threshold,
|
|
378
|
+
limit: body.limit,
|
|
379
|
+
})
|
|
380
|
+
: { enabled: Boolean(opts.config.embedding), matches: [] };
|
|
381
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
382
|
+
res.end(JSON.stringify(result));
|
|
383
|
+
} catch (err) {
|
|
384
|
+
logger.error(`dedup request failed: ${String(err)}`);
|
|
385
|
+
if (!res.headersSent) {
|
|
386
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
387
|
+
res.end(JSON.stringify({ enabled: false, matches: [], error: "internal error" }));
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
return;
|
|
391
|
+
}
|
|
356
392
|
if (url !== "/mcp") {
|
|
357
393
|
res.writeHead(404, { "Content-Type": "application/json" });
|
|
358
394
|
res.end(JSON.stringify({ error: "not found; MCP endpoint is /mcp" }));
|
|
@@ -531,6 +567,9 @@ export type MemorydOptions = {
|
|
|
531
567
|
heartbeatPath?: string;
|
|
532
568
|
/** Tags mirror target; default `<db dir>/tags-dictionary.md`. */
|
|
533
569
|
tagsMirrorPath?: string;
|
|
570
|
+
/** Compact tags-projection target (injected to all peers, lean §3);
|
|
571
|
+
* default `<db dir>/tags-projection.md`. */
|
|
572
|
+
tagsProjectionPath?: string;
|
|
534
573
|
/** Detect-hash persistence file; default `<db dir>/memoryd.hashes.json`. */
|
|
535
574
|
hashStatePath?: string;
|
|
536
575
|
/** Persisted batch baselines (inbox + permanent + human-inbox snapshots). */
|
|
@@ -597,6 +636,9 @@ export async function startMemoryd(opts: MemorydOptions): Promise<MemorydHandle>
|
|
|
597
636
|
const dbDir = path.dirname(config.index.dbPath);
|
|
598
637
|
const heartbeatPath = opts.heartbeatPath ?? path.join(dbDir, "memoryd.heartbeat");
|
|
599
638
|
const tagsMirrorPath = opts.tagsMirrorPath ?? path.join(dbDir, "tags-dictionary.md");
|
|
639
|
+
const tagsProjectionPath = opts.tagsProjectionPath ?? path.join(dbDir, "tags-projection.md");
|
|
640
|
+
const tagsBoundaryMaxLen =
|
|
641
|
+
Number(process.env.IAPEER_MEMORY_TAGS_BOUNDARY_MAXLEN) || DEFAULT_TAGS_BOUNDARY_MAXLEN;
|
|
600
642
|
const hashStatePath = opts.hashStatePath ?? path.join(dbDir, "memoryd.hashes.json");
|
|
601
643
|
const persistMs = opts.persistMs ?? 60_000;
|
|
602
644
|
const taxonomy = config.taxonomy;
|
|
@@ -745,7 +787,10 @@ export async function startMemoryd(opts: MemorydOptions): Promise<MemorydHandle>
|
|
|
745
787
|
indexAgent,
|
|
746
788
|
paths: fragments.paths,
|
|
747
789
|
authorIndexPath: outFile,
|
|
748
|
-
|
|
790
|
+
// lean §3: the compact dictionary projection is injected to EVERY
|
|
791
|
+
// author now (pre-lean: only the Index got the full mirror).
|
|
792
|
+
tagsProjectionPath,
|
|
793
|
+
tagsTitle: taxonomy.systemFiles.tagsDictionary,
|
|
749
794
|
},
|
|
750
795
|
});
|
|
751
796
|
rendered++;
|
|
@@ -785,12 +830,36 @@ export async function startMemoryd(opts: MemorydOptions): Promise<MemorydHandle>
|
|
|
785
830
|
mirrorContent = null;
|
|
786
831
|
}
|
|
787
832
|
const decision = decideMirror({ srcContent, mirrorContent });
|
|
788
|
-
if (decision.action
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
833
|
+
if (decision.action === "write") {
|
|
834
|
+
fs.mkdirSync(path.dirname(tagsMirrorPath), { recursive: true });
|
|
835
|
+
const tmp = `${tagsMirrorPath}.tmp`;
|
|
836
|
+
guardedWriteFileSync(tmp, srcContent!, "utf-8");
|
|
837
|
+
fs.renameSync(tmp, tagsMirrorPath);
|
|
838
|
+
mirrorContent = srcContent;
|
|
839
|
+
logger.info(`tags mirror updated (${decision.reason})`);
|
|
840
|
+
}
|
|
841
|
+
// Refresh the compact projection from the (up-to-date) mirror — even when
|
|
842
|
+
// the mirror was unchanged, so the projection materialises on first run
|
|
843
|
+
// after the feature ships (lean §3). Idempotent: writes only on a change.
|
|
844
|
+
syncTagsProjection(mirrorContent);
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
function syncTagsProjection(mirrorContent: string | null): void {
|
|
848
|
+
if (!mirrorContent || !mirrorContent.trim()) return; // no dict → keep existing
|
|
849
|
+
const proj = renderTagsProjection(mirrorContent, { boundaryMaxLen: tagsBoundaryMaxLen });
|
|
850
|
+
if (!proj.trim()) return;
|
|
851
|
+
let existing: string | null = null;
|
|
852
|
+
try {
|
|
853
|
+
existing = fs.readFileSync(tagsProjectionPath, "utf-8");
|
|
854
|
+
} catch {
|
|
855
|
+
existing = null;
|
|
856
|
+
}
|
|
857
|
+
if (existing === proj) return;
|
|
858
|
+
fs.mkdirSync(path.dirname(tagsProjectionPath), { recursive: true });
|
|
859
|
+
const tmp = `${tagsProjectionPath}.tmp`;
|
|
860
|
+
guardedWriteFileSync(tmp, proj, "utf-8");
|
|
861
|
+
fs.renameSync(tmp, tagsProjectionPath);
|
|
862
|
+
logger.info("tags projection updated");
|
|
794
863
|
}
|
|
795
864
|
|
|
796
865
|
/** Zone map of the unstamped detector (design §3): the agent inbox +
|
|
@@ -901,6 +970,8 @@ export async function startMemoryd(opts: MemorydOptions): Promise<MemorydHandle>
|
|
|
901
970
|
birthtimeMs: stat.birthtime ? stat.birthtime.getTime() : 0,
|
|
902
971
|
mtimeMs: stat.mtime.getTime(),
|
|
903
972
|
basename: path.basename(filePath),
|
|
973
|
+
path: filePath,
|
|
974
|
+
vault: config.vaultPath,
|
|
904
975
|
lastHash: lastSeenHashes.get(filePath) ?? null,
|
|
905
976
|
taxonomy,
|
|
906
977
|
freshEditWindowS: opts.freshEditWindowS,
|
|
@@ -926,6 +997,44 @@ export async function startMemoryd(opts: MemorydOptions): Promise<MemorydHandle>
|
|
|
926
997
|
}
|
|
927
998
|
}
|
|
928
999
|
|
|
1000
|
+
/**
|
|
1001
|
+
* Deterministic archiving (lean §2.2a): move stale notes among the changed
|
|
1002
|
+
* files to the archive folder. Runs AFTER humanEditPass (a note just marked
|
|
1003
|
+
* stale carries its stamp). The move is invisible to permanent-detect
|
|
1004
|
+
* (deletions are ignored; the archive is outside `monitoredFolders`), and
|
|
1005
|
+
* `indexAll` below reconciles the path change (drops the source, indexes the
|
|
1006
|
+
* archived copy — still searchable with the stale boost). Returns the count.
|
|
1007
|
+
*/
|
|
1008
|
+
function archiveStaleNotes(candidatesAbs: Set<string>): number {
|
|
1009
|
+
let moved = 0;
|
|
1010
|
+
for (const abs of candidatesAbs) {
|
|
1011
|
+
const rel = path.relative(config.vaultPath, abs);
|
|
1012
|
+
if (!isArchivableZone(rel, taxonomy)) continue;
|
|
1013
|
+
let content: string;
|
|
1014
|
+
try {
|
|
1015
|
+
content = fs.readFileSync(abs, "utf-8");
|
|
1016
|
+
} catch {
|
|
1017
|
+
continue; // deleted mid-debounce
|
|
1018
|
+
}
|
|
1019
|
+
if (!shouldArchive(rel, content, taxonomy)) continue;
|
|
1020
|
+
const targetRel = archiveTargetRel(path.basename(abs), taxonomy, (r) =>
|
|
1021
|
+
fs.existsSync(path.join(config.vaultPath, r)),
|
|
1022
|
+
);
|
|
1023
|
+
const targetAbs = path.join(config.vaultPath, targetRel);
|
|
1024
|
+
try {
|
|
1025
|
+
fs.mkdirSync(path.dirname(targetAbs), { recursive: true });
|
|
1026
|
+
fs.renameSync(abs, targetAbs);
|
|
1027
|
+
silentStamps.delete(rel); // baselines follow the move
|
|
1028
|
+
lastSeenHashes.delete(abs);
|
|
1029
|
+
moved += 1;
|
|
1030
|
+
logger.info(`archived (stale): ${rel} → ${targetRel}`);
|
|
1031
|
+
} catch (err) {
|
|
1032
|
+
logger.error(`archive failed for ${rel}: ${String(err)}`);
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
return moved;
|
|
1036
|
+
}
|
|
1037
|
+
|
|
929
1038
|
// ── fs.watch + debounce ──
|
|
930
1039
|
const pending = new Set<string>();
|
|
931
1040
|
let flushTimer: ReturnType<typeof setTimeout> | null = null;
|
|
@@ -981,6 +1090,7 @@ export async function startMemoryd(opts: MemorydOptions): Promise<MemorydHandle>
|
|
|
981
1090
|
}
|
|
982
1091
|
|
|
983
1092
|
humanEditPass(changed);
|
|
1093
|
+
archiveStaleNotes(changed); // lean §2.2a — stale → archive before reindex
|
|
984
1094
|
syncTagsMirror();
|
|
985
1095
|
await indexAll({ db, config, logger }); // incremental by content hash
|
|
986
1096
|
renderFleetFragments("vault-change"); // docs/05: свежесть за секунды
|
package/src/search.ts
CHANGED
|
@@ -255,6 +255,52 @@ export async function runVaultSearch(params: {
|
|
|
255
255
|
return { results, pipeline };
|
|
256
256
|
}
|
|
257
257
|
|
|
258
|
+
export type DedupMatch = { path: string; title: string; similarity: number };
|
|
259
|
+
|
|
260
|
+
export const DEFAULT_DEDUP_THRESHOLD = 0.82;
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Dedup hint (lean §3a): given the content of a CANON note being written,
|
|
264
|
+
* find existing CANON notes with raw cosine similarity ≥ threshold. SEMANTIC
|
|
265
|
+
* by design — with embeddings disabled it returns `{enabled:false}` and the
|
|
266
|
+
* caller stays SILENT (a noisy BM25 overlap is worse than nothing; the author
|
|
267
|
+
* has memory_search). Read-only; operative/inbox matches are excluded (a
|
|
268
|
+
* canon note is not a duplicate of someone's operative note). Title = file
|
|
269
|
+
* basename (the guard sets `title` from the file name), so the caller can
|
|
270
|
+
* render `[[title]]`.
|
|
271
|
+
*/
|
|
272
|
+
export async function runDedup(
|
|
273
|
+
db: CoreDb,
|
|
274
|
+
config: CoreConfig,
|
|
275
|
+
params: { content: string; threshold?: number; limit?: number },
|
|
276
|
+
): Promise<{ enabled: boolean; matches: DedupMatch[] }> {
|
|
277
|
+
if (!config.embedding) return { enabled: false, matches: [] };
|
|
278
|
+
const threshold = params.threshold ?? DEFAULT_DEDUP_THRESHOLD;
|
|
279
|
+
const limit = Math.max(1, params.limit ?? 5);
|
|
280
|
+
const q = await embedQuery(params.content, config.embedding);
|
|
281
|
+
if (!q.vector) return { enabled: true, matches: [] }; // embed failed / circuit-open → silent
|
|
282
|
+
const raw = vectorSearch(db, q.vector, limit * 3); // headroom for the canon filter
|
|
283
|
+
const f = config.taxonomy.folders;
|
|
284
|
+
const canonHeads = new Set([
|
|
285
|
+
f.knowledge,
|
|
286
|
+
f.decisions,
|
|
287
|
+
f.projects,
|
|
288
|
+
f.ideas,
|
|
289
|
+
f.lists,
|
|
290
|
+
f.archive,
|
|
291
|
+
]);
|
|
292
|
+
const matches: DedupMatch[] = [];
|
|
293
|
+
for (const r of raw) {
|
|
294
|
+
if (r.score < threshold) continue;
|
|
295
|
+
const head = r.path.split("/")[0];
|
|
296
|
+
if (!canonHeads.has(head)) continue; // canon (+archive) only
|
|
297
|
+
const base = r.path.split("/").pop() ?? r.path;
|
|
298
|
+
matches.push({ path: r.path, title: base.replace(/\.md$/, ""), similarity: r.score });
|
|
299
|
+
if (matches.length >= limit) break;
|
|
300
|
+
}
|
|
301
|
+
return { enabled: true, matches };
|
|
302
|
+
}
|
|
303
|
+
|
|
258
304
|
// --- Vector search ---
|
|
259
305
|
//
|
|
260
306
|
// Hot path: `vec_chunks` virtual table from sqlite-vec, MATCH+ORDER BY runs
|
package/src/tags-gate.ts
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tag gate + injected dictionary projection — lean §3.
|
|
3
|
+
*
|
|
4
|
+
* In lean the author tags canon notes THEMSELVES from a controlled vocabulary
|
|
5
|
+
* (`99_System/Tags.md`). Two deterministic jobs (0 LLM) live here:
|
|
6
|
+
*
|
|
7
|
+
* 1. GATE (`tagGateProblems`): the guard validates a canon note's tags
|
|
8
|
+
* against the dictionary — an unknown tag is NOT accepted; the author is
|
|
9
|
+
* told to register it in the dictionary first (a deliberate step that
|
|
10
|
+
* kills drift: `security` vs `Безопасность`). ≥1 tag is required on canon;
|
|
11
|
+
* operative notes carry none. PostToolUse fires AFTER the write, so the
|
|
12
|
+
* «rejection» is: keep `needs_review` + teach the author to fix it next
|
|
13
|
+
* step (§2.3 NB).
|
|
14
|
+
*
|
|
15
|
+
* 2. PROJECTION (`renderTagsProjection`): the dictionary is now injected to
|
|
16
|
+
* EVERY author (pre-lean: only the Index). The injected form is a COMPACT,
|
|
17
|
+
* budgeted projection — names always, the boundary ONLY where the
|
|
18
|
+
* dictionary author wrote one (overlapping domains needing disambiguation;
|
|
19
|
+
* `—` marks self-evident tags → name only). Token cost is ×the whole
|
|
20
|
+
* fleet, so the full curator table stays the SOURCE and only this slice is
|
|
21
|
+
* injected (§3, §11).
|
|
22
|
+
*
|
|
23
|
+
* The dictionary is a markdown table: `| Tag | Boundary (optional) |`. Parsing
|
|
24
|
+
* is locale-independent (no header label hard-coded): a header row is the one
|
|
25
|
+
* immediately followed by the `|---|` separator.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
export const DEFAULT_TAGS_BOUNDARY_MAXLEN = 160;
|
|
29
|
+
|
|
30
|
+
/** A `|---|`-style table separator cell. */
|
|
31
|
+
function isSeparatorCell(cell: string): boolean {
|
|
32
|
+
return /^:?-{2,}:?$/.test(cell.trim());
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/** Split a markdown table row into trimmed cells (outer pipes dropped). */
|
|
36
|
+
function tableCells(line: string): string[] | null {
|
|
37
|
+
const t = line.trim();
|
|
38
|
+
if (!t.startsWith("|")) return null;
|
|
39
|
+
// Drop the leading and (if present) trailing pipe, then split.
|
|
40
|
+
const inner = t.replace(/^\|/, "").replace(/\|\s*$/, "");
|
|
41
|
+
return inner.split("|").map((c) => c.trim());
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export type DictionaryEntry = { name: string; boundary: string };
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Parse the dictionary table into entries (name + boundary). Skips header rows
|
|
48
|
+
* (a row whose NEXT line is a separator) and separator rows. Generic over any
|
|
49
|
+
* number of tables/sections. A `—`/`-`/empty boundary means «self-evident».
|
|
50
|
+
*/
|
|
51
|
+
export function parseDictionaryEntries(dictContent: string): DictionaryEntry[] {
|
|
52
|
+
const lines = dictContent.split("\n");
|
|
53
|
+
const entries: DictionaryEntry[] = [];
|
|
54
|
+
for (let i = 0; i < lines.length; i++) {
|
|
55
|
+
const cells = tableCells(lines[i]);
|
|
56
|
+
if (!cells || cells.length === 0) continue;
|
|
57
|
+
const name = cells[0];
|
|
58
|
+
if (!name || isSeparatorCell(name)) continue;
|
|
59
|
+
// Header row: the next non-empty line is a separator.
|
|
60
|
+
const nextCells = i + 1 < lines.length ? tableCells(lines[i + 1]) : null;
|
|
61
|
+
if (nextCells && nextCells.length && isSeparatorCell(nextCells[0])) continue;
|
|
62
|
+
const boundaryRaw = (cells[1] ?? "").trim();
|
|
63
|
+
const boundary = boundaryRaw === "—" || boundaryRaw === "-" ? "" : boundaryRaw;
|
|
64
|
+
entries.push({ name, boundary });
|
|
65
|
+
}
|
|
66
|
+
return entries;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** Just the valid tag names (the gate's allow-set). */
|
|
70
|
+
export function parseDictionaryTags(dictContent: string): string[] {
|
|
71
|
+
return parseDictionaryEntries(dictContent).map((e) => e.name);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* A note tag is valid when it (or its root before `/`) is in the dictionary —
|
|
76
|
+
* subtags (`Бизнес/Грузоперевозки`) inherit their root's membership (§3, the
|
|
77
|
+
* `Бизнес` boundary documents the subtag convention).
|
|
78
|
+
*/
|
|
79
|
+
export function isTagAllowed(tag: string, allow: ReadonlySet<string>): boolean {
|
|
80
|
+
if (allow.has(tag)) return true;
|
|
81
|
+
const root = tag.split("/")[0];
|
|
82
|
+
return root !== tag && allow.has(root);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/** Extract tags from a frontmatter block — block-list and inline-array forms. */
|
|
86
|
+
export function parseNoteTags(fmBlock: string): string[] {
|
|
87
|
+
const lines = fmBlock.split("\n");
|
|
88
|
+
for (let i = 0; i < lines.length; i++) {
|
|
89
|
+
const m = /^tags\s*:\s*(.*)$/.exec(lines[i]);
|
|
90
|
+
if (!m) continue;
|
|
91
|
+
const inline = m[1].trim();
|
|
92
|
+
if (inline) {
|
|
93
|
+
// inline array `[A, B]` or a bare scalar.
|
|
94
|
+
const arr = inline.replace(/^\[/, "").replace(/\]$/, "");
|
|
95
|
+
return arr
|
|
96
|
+
.split(",")
|
|
97
|
+
.map((s) => s.trim().replace(/^["']|["']$/g, ""))
|
|
98
|
+
.filter(Boolean);
|
|
99
|
+
}
|
|
100
|
+
// block-list form: following ` - item` lines.
|
|
101
|
+
const out: string[] = [];
|
|
102
|
+
for (let j = i + 1; j < lines.length; j++) {
|
|
103
|
+
const item = /^\s+-\s+(.*)$/.exec(lines[j]);
|
|
104
|
+
if (!item) break;
|
|
105
|
+
const v = item[1].trim().replace(/^["']|["']$/g, "");
|
|
106
|
+
if (v) out.push(v);
|
|
107
|
+
}
|
|
108
|
+
return out;
|
|
109
|
+
}
|
|
110
|
+
return [];
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export type TagGateOptions = {
|
|
114
|
+
/** Canon requires ≥1 tag; operative/other zones do not. */
|
|
115
|
+
requireAtLeastOne: boolean;
|
|
116
|
+
/** Vault-relative dictionary path, for the teaching message. */
|
|
117
|
+
dictionaryRel: string;
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Validate a note's tags against the dictionary. Returns author-facing problem
|
|
122
|
+
* lines (empty = clean). The guard stays SILENT when there is nothing to fix
|
|
123
|
+
* (§2.3); each line names a concrete fix.
|
|
124
|
+
*/
|
|
125
|
+
export function tagGateProblems(
|
|
126
|
+
noteTags: readonly string[],
|
|
127
|
+
allow: ReadonlySet<string>,
|
|
128
|
+
opts: TagGateOptions,
|
|
129
|
+
): string[] {
|
|
130
|
+
const problems: string[] = [];
|
|
131
|
+
if (opts.requireAtLeastOne && noteTags.length === 0) {
|
|
132
|
+
problems.push(
|
|
133
|
+
`canon note has no tags — add ≥1 from the dictionary (${opts.dictionaryRel}).`,
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
for (const tag of noteTags) {
|
|
137
|
+
if (!isTagAllowed(tag, allow)) {
|
|
138
|
+
problems.push(
|
|
139
|
+
`tag "${tag}" is not in the dictionary — register it in ${opts.dictionaryRel} first ` +
|
|
140
|
+
`(reuse an existing tag if one fits, e.g. by domain), then tag the note.`,
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return problems;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/** Truncate to `max` chars on a word boundary where possible, adding `…`. */
|
|
148
|
+
function clip(s: string, max: number): string {
|
|
149
|
+
if (s.length <= max) return s;
|
|
150
|
+
const cut = s.slice(0, max);
|
|
151
|
+
const lastSpace = cut.lastIndexOf(" ");
|
|
152
|
+
return (lastSpace > max * 0.6 ? cut.slice(0, lastSpace) : cut).trimEnd() + "…";
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
export type ProjectionOptions = {
|
|
156
|
+
/** Per-tag boundary character budget (×whole fleet, §11). */
|
|
157
|
+
boundaryMaxLen?: number;
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Render the COMPACT injected projection of the dictionary (§3/§11): one tag
|
|
162
|
+
* per line, `Name` for self-evident tags, `Name — boundary` (clipped to the
|
|
163
|
+
* budget) for overlapping domains. No table chrome, no frontmatter — the full
|
|
164
|
+
* curator table stays the source. Returns "" for an empty/unparseable dict.
|
|
165
|
+
*/
|
|
166
|
+
export function renderTagsProjection(dictContent: string, opts: ProjectionOptions = {}): string {
|
|
167
|
+
const max = opts.boundaryMaxLen ?? DEFAULT_TAGS_BOUNDARY_MAXLEN;
|
|
168
|
+
const entries = parseDictionaryEntries(dictContent);
|
|
169
|
+
if (entries.length === 0) return "";
|
|
170
|
+
const lines = entries.map((e) =>
|
|
171
|
+
e.boundary ? `${e.name} — ${clip(e.boundary, max)}` : e.name,
|
|
172
|
+
);
|
|
173
|
+
return lines.join("\n");
|
|
174
|
+
}
|
package/src/taxonomy.ts
CHANGED
|
@@ -59,6 +59,16 @@ export type TaxonomyStatusTokens = {
|
|
|
59
59
|
current: string;
|
|
60
60
|
};
|
|
61
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Initial `status` token a new note of each canonical type carries — the
|
|
64
|
+
* guard fills `status` on the permanent branch from the FOLDER's type (lean
|
|
65
|
+
* mode §2.1, «начальный токен типа»). Verified against the live RU vault:
|
|
66
|
+
* knowledge→актуально, decision→принято, idea→новая, list→актуально,
|
|
67
|
+
* project→активный, agent_memory→актуально. Every token is a member of
|
|
68
|
+
* `statuses.active` (parity-tested).
|
|
69
|
+
*/
|
|
70
|
+
export type TaxonomyInitialStatus = Record<keyof TaxonomyTypes, string>;
|
|
71
|
+
|
|
62
72
|
export type TaxonomyStatuses = {
|
|
63
73
|
/** Current/live lifecycle states — search boost ×activeBoost. */
|
|
64
74
|
active: string[];
|
|
@@ -122,6 +132,9 @@ export type TaxonomyPreset = {
|
|
|
122
132
|
subtypeOrder: string[];
|
|
123
133
|
statuses: TaxonomyStatuses;
|
|
124
134
|
statusTokens: TaxonomyStatusTokens;
|
|
135
|
+
/** Per-type initial `status` token — the guard fills the permanent branch
|
|
136
|
+
* from the folder's type (lean §2.1). */
|
|
137
|
+
initialStatus: TaxonomyInitialStatus;
|
|
125
138
|
/** Phase status tokens in their project-group render order:
|
|
126
139
|
* planned → active → paused → completed → cancelled. */
|
|
127
140
|
phaseStatusOrder: string[];
|
|
@@ -169,6 +182,14 @@ export const TAXONOMY_EN: TaxonomyPreset = {
|
|
|
169
182
|
stale: ["outdated", "superseded", "dropped", "completed", "cancelled"],
|
|
170
183
|
},
|
|
171
184
|
statusTokens: { draft: "draft", current: "current" },
|
|
185
|
+
initialStatus: {
|
|
186
|
+
knowledge: "current",
|
|
187
|
+
decision: "accepted",
|
|
188
|
+
idea: "new",
|
|
189
|
+
project: "active",
|
|
190
|
+
list: "current",
|
|
191
|
+
agentMemory: "current",
|
|
192
|
+
},
|
|
172
193
|
phaseStatusOrder: ["planned", "active", "paused", "completed", "cancelled"],
|
|
173
194
|
indexStrings: {
|
|
174
195
|
header: "Vault index of notes by",
|
|
@@ -246,6 +267,14 @@ export const TAXONOMY_RU: TaxonomyPreset = {
|
|
|
246
267
|
stale: ["устарело", "заменено", "отброшена", "завершён", "завершена", "отменена"],
|
|
247
268
|
},
|
|
248
269
|
statusTokens: { draft: "черновик", current: "актуально" },
|
|
270
|
+
initialStatus: {
|
|
271
|
+
knowledge: "актуально",
|
|
272
|
+
decision: "принято",
|
|
273
|
+
idea: "новая",
|
|
274
|
+
project: "активный",
|
|
275
|
+
list: "актуально",
|
|
276
|
+
agentMemory: "актуально",
|
|
277
|
+
},
|
|
249
278
|
phaseStatusOrder: ["запланирована", "активная", "на паузе", "завершена", "отменена"],
|
|
250
279
|
indexStrings: {
|
|
251
280
|
header: "Vault-индекс заметок автора",
|
|
@@ -321,6 +350,51 @@ export function statusGroup(
|
|
|
321
350
|
return null;
|
|
322
351
|
}
|
|
323
352
|
|
|
353
|
+
/** True when `status` is a final/closed token — the archiving predicate
|
|
354
|
+
* (lean §2.2a: `isStale → move to archive`). Mirrors the search/index
|
|
355
|
+
* semantics (`statusGroup === "stale"`); a single source for the memoryd
|
|
356
|
+
* archiver and the index renderer. `на паузе`/`paused` are PENDING, not
|
|
357
|
+
* stale — a resumable note is never archived. */
|
|
358
|
+
export function isStale(taxonomy: TaxonomyPreset, status: string | null | undefined): boolean {
|
|
359
|
+
if (!status) return false;
|
|
360
|
+
return statusGroup(taxonomy, status.trim()) === "stale";
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Folder-key → type-key pairing (lean §2.1 «helper выравнивания ключей»):
|
|
365
|
+
* the two maps are parallel-keyed but differ by plurality
|
|
366
|
+
* (`decisions`↔`decision`, `ideas`↔`idea`, `lists`↔`list`,
|
|
367
|
+
* `projects`↔`project`). `agentMemory` is paired too — the memory zone reuses
|
|
368
|
+
* the same alignment. Inbox/archive/system have no canonical type.
|
|
369
|
+
*/
|
|
370
|
+
const FOLDER_TYPE_PAIRS: ReadonlyArray<[keyof TaxonomyFolders, keyof TaxonomyTypes]> = [
|
|
371
|
+
["knowledge", "knowledge"],
|
|
372
|
+
["decisions", "decision"],
|
|
373
|
+
["projects", "project"],
|
|
374
|
+
["ideas", "idea"],
|
|
375
|
+
["lists", "list"],
|
|
376
|
+
["agentMemory", "agentMemory"],
|
|
377
|
+
];
|
|
378
|
+
|
|
379
|
+
/**
|
|
380
|
+
* Genre (type + initial status) declared by a vault FOLDER name — the guard
|
|
381
|
+
* derives `type` and the starting `status` from the note's position (lean
|
|
382
|
+
* §2.1: «Папка = объявление жанра»). `folderName` is the first path segment
|
|
383
|
+
* relative to the vault. Returns null for folders without a canonical type
|
|
384
|
+
* (both inboxes, archive, system) — the caller fills no type/status there.
|
|
385
|
+
*/
|
|
386
|
+
export function genreForFolder(
|
|
387
|
+
taxonomy: TaxonomyPreset,
|
|
388
|
+
folderName: string,
|
|
389
|
+
): { type: string; initialStatus: string } | null {
|
|
390
|
+
for (const [fKey, tKey] of FOLDER_TYPE_PAIRS) {
|
|
391
|
+
if (taxonomy.folders[fKey] === folderName) {
|
|
392
|
+
return { type: taxonomy.types[tKey], initialStatus: taxonomy.initialStatus[tKey] };
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
return null;
|
|
396
|
+
}
|
|
397
|
+
|
|
324
398
|
/**
|
|
325
399
|
* Default search-index exclusions: both inboxes (raw drafts, possibly without
|
|
326
400
|
* frontmatter) and the system folder (templates/dictionary, not content).
|