@isaacriehm/cairn-core 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -1
- package/dist/attention/bulk-accept.d.ts +0 -2
- package/dist/attention/bulk-accept.js +0 -3
- package/dist/attention/bulk-accept.js.map +1 -1
- package/dist/attention/scoring.d.ts +1 -3
- package/dist/attention/scoring.js +1 -12
- package/dist/attention/scoring.js.map +1 -1
- package/dist/claude/cache.js +1 -0
- package/dist/claude/cache.js.map +1 -1
- package/dist/claude/runner.js +25 -1
- package/dist/claude/runner.js.map +1 -1
- package/dist/claude/types.d.ts +8 -0
- package/dist/hooks/runners/payload.d.ts +10 -0
- package/dist/hooks/runners/payload.js +13 -0
- package/dist/hooks/runners/payload.js.map +1 -1
- package/dist/hooks/runners/session-end.js +2 -4
- package/dist/hooks/runners/session-end.js.map +1 -1
- package/dist/hooks/runners/session-start.js +48 -4
- package/dist/hooks/runners/session-start.js.map +1 -1
- package/dist/init/brand-derive.js +6 -1
- package/dist/init/brand-derive.js.map +1 -1
- package/dist/init/brand-setup.d.ts +12 -1
- package/dist/init/brand-setup.js +36 -1
- package/dist/init/brand-setup.js.map +1 -1
- package/dist/init/curator/corpus.d.ts +92 -0
- package/dist/init/curator/corpus.js +171 -0
- package/dist/init/curator/corpus.js.map +1 -0
- package/dist/init/curator/emit.d.ts +42 -0
- package/dist/init/curator/emit.js +230 -0
- package/dist/init/curator/emit.js.map +1 -0
- package/dist/init/curator/index.d.ts +1 -0
- package/dist/init/curator/index.js +2 -0
- package/dist/init/curator/index.js.map +1 -0
- package/dist/init/curator/regex-prefilter.d.ts +54 -0
- package/dist/init/curator/regex-prefilter.js +185 -0
- package/dist/init/curator/regex-prefilter.js.map +1 -0
- package/dist/init/curator/validate.d.ts +46 -0
- package/dist/init/curator/validate.js +100 -0
- package/dist/init/curator/validate.js.map +1 -0
- package/dist/init/curator/walker.d.ts +36 -0
- package/dist/init/curator/walker.js +380 -0
- package/dist/init/curator/walker.js.map +1 -0
- package/dist/init/eta-calibration.d.ts +39 -0
- package/dist/init/eta-calibration.js +143 -0
- package/dist/init/eta-calibration.js.map +1 -0
- package/dist/init/index.d.ts +3 -2
- package/dist/init/index.js +2 -1
- package/dist/init/index.js.map +1 -1
- package/dist/init/init.js +4 -20
- package/dist/init/init.js.map +1 -1
- package/dist/init/mapper-merge.d.ts +4 -6
- package/dist/init/mapper-merge.js +11 -34
- package/dist/init/mapper-merge.js.map +1 -1
- package/dist/init/mapper-parallel.d.ts +0 -1
- package/dist/init/mapper-parallel.js +7 -6
- package/dist/init/mapper-parallel.js.map +1 -1
- package/dist/init/mapper-prompts.d.ts +1 -4
- package/dist/init/mapper-prompts.js +2 -6
- package/dist/init/mapper-prompts.js.map +1 -1
- package/dist/init/mapper.d.ts +8 -7
- package/dist/init/mapper.js +23 -15
- package/dist/init/mapper.js.map +1 -1
- package/dist/init/overlay.js +0 -1
- package/dist/init/overlay.js.map +1 -1
- package/dist/init/phases/10-rules-merge.d.ts +7 -2
- package/dist/init/phases/10-rules-merge.js +18 -45
- package/dist/init/phases/10-rules-merge.js.map +1 -1
- package/dist/init/phases/13-multidev.d.ts +5 -1
- package/dist/init/phases/13-multidev.js +23 -2
- package/dist/init/phases/13-multidev.js.map +1 -1
- package/dist/init/phases/4-seed.js +1 -2
- package/dist/init/phases/4-seed.js.map +1 -1
- package/dist/init/phases/5-preflight.d.ts +42 -0
- package/dist/init/phases/5-preflight.js +244 -0
- package/dist/init/phases/5-preflight.js.map +1 -0
- package/dist/init/phases/7-topic-index.d.ts +6 -0
- package/dist/init/phases/7-topic-index.js +13 -0
- package/dist/init/phases/7-topic-index.js.map +1 -1
- package/dist/init/phases/8-docs-ingest.d.ts +6 -5
- package/dist/init/phases/8-docs-ingest.js +17 -56
- package/dist/init/phases/8-docs-ingest.js.map +1 -1
- package/dist/init/phases/9a-walker.d.ts +15 -0
- package/dist/init/phases/9a-walker.js +63 -0
- package/dist/init/phases/9a-walker.js.map +1 -0
- package/dist/init/phases/9b-curate.d.ts +19 -0
- package/dist/init/phases/9b-curate.js +79 -0
- package/dist/init/phases/9b-curate.js.map +1 -0
- package/dist/init/phases/9c-emit.d.ts +13 -0
- package/dist/init/phases/9c-emit.js +57 -0
- package/dist/init/phases/9c-emit.js.map +1 -0
- package/dist/init/phases/index.d.ts +6 -5
- package/dist/init/phases/index.js +4 -4
- package/dist/init/phases/index.js.map +1 -1
- package/dist/init/phases/mapper-output-io.d.ts +5 -5
- package/dist/init/phases/mapper-output-io.js +5 -5
- package/dist/init/phases/orchestrator.js +1 -1
- package/dist/init/phases/state-io.js +1 -1
- package/dist/init/phases/types.d.ts +79 -13
- package/dist/init/phases/types.js +4 -2
- package/dist/init/phases/types.js.map +1 -1
- package/dist/init/source-comments/ingest.d.ts +0 -2
- package/dist/init/source-comments/ingest.js.map +1 -1
- package/dist/init/source-comments/walker.js +2 -2
- package/dist/init/topic-index/index.d.ts +8 -0
- package/dist/init/topic-index/index.js +10 -2
- package/dist/init/topic-index/index.js.map +1 -1
- package/dist/init/topic-index/judge.d.ts +15 -0
- package/dist/init/topic-index/judge.js +15 -1
- package/dist/init/topic-index/judge.js.map +1 -1
- package/dist/init/topic-index/resolve.js +41 -14
- package/dist/init/topic-index/resolve.js.map +1 -1
- package/dist/init/walker.d.ts +1 -1
- package/dist/init/walker.js +1 -1
- package/dist/init/workflow-block.d.ts +5 -6
- package/dist/init/workflow-block.js +5 -9
- package/dist/init/workflow-block.js.map +1 -1
- package/dist/mcp/tools/bulk-accept-attention.d.ts +1 -1
- package/dist/mcp/tools/bulk-accept-attention.js +4 -6
- package/dist/mcp/tools/bulk-accept-attention.js.map +1 -1
- package/dist/mcp/tools/init-phases.d.ts +7 -6
- package/dist/mcp/tools/init-phases.js +34 -59
- package/dist/mcp/tools/init-phases.js.map +1 -1
- package/package.json +2 -2
- package/templates/.cairn/config/trust-policy.yaml +0 -3
- package/templates/.cairn/config/workflow.md +0 -1
- package/templates/.cairn/ground/canonical-map/topics.yaml +0 -12
- package/dist/init/phases/5-pilot.d.ts +0 -10
- package/dist/init/phases/5-pilot.js +0 -108
- package/dist/init/phases/5-pilot.js.map +0 -1
- package/dist/init/phases/9-source-comments.d.ts +0 -6
- package/dist/init/phases/9-source-comments.js +0 -67
- package/dist/init/phases/9-source-comments.js.map +0 -1
- package/dist/init/phases/parallel-8910.d.ts +0 -27
- package/dist/init/phases/parallel-8910.js +0 -197
- package/dist/init/phases/parallel-8910.js.map +0 -1
- package/dist/init/phases/source-comments-output-io.d.ts +0 -89
- package/dist/init/phases/source-comments-output-io.js +0 -81
- package/dist/init/phases/source-comments-output-io.js.map +0 -1
- package/templates/.cairn/ground/capabilities/mcp-tools.yaml +0 -29
- package/templates/.cairn/ground/capabilities/skills.yaml +0 -25
- package/templates/.cairn/ground/capabilities/snippets.yaml +0 -29
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Curator pipeline — regex pre-filter (Phase 9a-walker).
|
|
3
|
+
*
|
|
4
|
+
* Drops corpus records that the curator subagents would only classify
|
|
5
|
+
* as noise:
|
|
6
|
+
* - test files / fixtures / snapshots
|
|
7
|
+
* - generated / build / vendor / migrations
|
|
8
|
+
* - .archive/ + .planning/archive/
|
|
9
|
+
* - JSX block comments (lots of UI annotation noise)
|
|
10
|
+
* - license / SPDX headers
|
|
11
|
+
* - JSDoc with only @param/@returns/@see/@throws and < 30 words prose
|
|
12
|
+
* - TODO-only or banner-only comments
|
|
13
|
+
*
|
|
14
|
+
* Mapper.off_limits_globs is also applied (so the operator's own
|
|
15
|
+
* exclusion list filters the corpus too).
|
|
16
|
+
*
|
|
17
|
+
* Plus a pure-function `stripJsdocTags` that strips `@domain`,
|
|
18
|
+
* `@orgScope`, `@softDelete`, `@see`, `@param`, `@returns`, `@throws`
|
|
19
|
+
* scaffolding lines from prose so they don't leak into reducer
|
|
20
|
+
* output. Validators downstream re-check `jsdoc-tag-leak`; this is
|
|
21
|
+
* the defense-in-depth at the walker level.
|
|
22
|
+
*
|
|
23
|
+
* Drop targets (from curator plan):
|
|
24
|
+
* 60–80% of the raw corpus is expected to drop here.
|
|
25
|
+
*/
|
|
26
|
+
export type DropReason = "test-file" | "generated-dir" | "archive-dir" | "off-limits-glob" | "jsx-block-comment" | "license-header" | "jsdoc-tag-only" | "todo-or-banner-only" | "below-minimum-prose";
|
|
27
|
+
export interface PrefilterArgs {
|
|
28
|
+
/** Repo-relative path the block came from. */
|
|
29
|
+
file: string;
|
|
30
|
+
/**
|
|
31
|
+
* Source-kind tag — `comment` records get the JSX + JSDoc-tag-only
|
|
32
|
+
* filters; `doc` and `rule` records skip those (paragraph + section
|
|
33
|
+
* shapes are different).
|
|
34
|
+
*/
|
|
35
|
+
source_kind: "comment" | "doc" | "rule";
|
|
36
|
+
/** Cleaned prose (after the language-specific marker stripper). */
|
|
37
|
+
prose: string;
|
|
38
|
+
/** Raw block (carries surrounding context for the JSX detector). */
|
|
39
|
+
raw?: string;
|
|
40
|
+
/** Mapper off-limits globs to honor. Glob matching is prefix/contains-friendly. */
|
|
41
|
+
offLimitsGlobs?: string[];
|
|
42
|
+
}
|
|
43
|
+
export interface PrefilterResult {
|
|
44
|
+
drop: boolean;
|
|
45
|
+
reason?: DropReason;
|
|
46
|
+
/** Prose with JSDoc scaffolding tags stripped. Caller writes this to corpus.jsonl. */
|
|
47
|
+
cleanedProse: string;
|
|
48
|
+
}
|
|
49
|
+
export declare function applyPrefilter(args: PrefilterArgs): PrefilterResult;
|
|
50
|
+
/**
|
|
51
|
+
* Strip JSDoc-style scaffolding tag lines from prose. Defensive layer
|
|
52
|
+
* before the LLM ever sees them; validators downstream still check.
|
|
53
|
+
*/
|
|
54
|
+
export declare function stripJsdocTags(prose: string): string;
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Curator pipeline — regex pre-filter (Phase 9a-walker).
|
|
3
|
+
*
|
|
4
|
+
* Drops corpus records that the curator subagents would only classify
|
|
5
|
+
* as noise:
|
|
6
|
+
* - test files / fixtures / snapshots
|
|
7
|
+
* - generated / build / vendor / migrations
|
|
8
|
+
* - .archive/ + .planning/archive/
|
|
9
|
+
* - JSX block comments (lots of UI annotation noise)
|
|
10
|
+
* - license / SPDX headers
|
|
11
|
+
* - JSDoc with only @param/@returns/@see/@throws and < 30 words prose
|
|
12
|
+
* - TODO-only or banner-only comments
|
|
13
|
+
*
|
|
14
|
+
* Mapper.off_limits_globs is also applied (so the operator's own
|
|
15
|
+
* exclusion list filters the corpus too).
|
|
16
|
+
*
|
|
17
|
+
* Plus a pure-function `stripJsdocTags` that strips `@domain`,
|
|
18
|
+
* `@orgScope`, `@softDelete`, `@see`, `@param`, `@returns`, `@throws`
|
|
19
|
+
* scaffolding lines from prose so they don't leak into reducer
|
|
20
|
+
* output. Validators downstream re-check `jsdoc-tag-leak`; this is
|
|
21
|
+
* the defense-in-depth at the walker level.
|
|
22
|
+
*
|
|
23
|
+
* Drop targets (from curator plan):
|
|
24
|
+
* 60–80% of the raw corpus is expected to drop here.
|
|
25
|
+
*/
|
|
26
|
+
const TEST_FILE_RE = /(?:^|\/)(?:[^/]+\.(?:spec|test)\.(?:[tj]sx?|mjs|cjs)|__tests__\/|e2e\/|fixtures\/|snapshots?\/|__snapshots__\/)/;
|
|
27
|
+
const GENERATED_DIR_RE = /(?:^|\/)(?:migrations|dist|build|generated|vendor|node_modules)\//;
|
|
28
|
+
const ARCHIVE_DIR_RE = /(?:^|\/)\.(?:archive|planning\/archive)\//;
|
|
29
|
+
const TODO_BANNER_RE = /^\s*(?:TODO|FIXME|XXX|HACK|NOTE)\b/;
|
|
30
|
+
const PURE_BANNER_RE = /^[\s\W_]+$/;
|
|
31
|
+
const SPDX_LICENSE_RE = /\b(?:SPDX-License-Identifier|All rights reserved|Licensed under)\b/i;
|
|
32
|
+
const COPYRIGHT_RE = /\bcopyright\b/i;
|
|
33
|
+
const JSDOC_TAG_LINE_RE = /^\s*@(?:domain|orgScope|softDelete|see|param|returns?|throws?|example|deprecated|since|version|author|module|namespace|alias|constant|type|typedef|callback|exports|category|memberof|inheritdoc|override|fileoverview|api|public|private|protected|internal|readonly|abstract|static|access|todo|fires|listens|hideconstructor)\b.*$/gm;
|
|
34
|
+
const MIN_WORDS_AFTER_TAG_STRIP = 12;
|
|
35
|
+
export function applyPrefilter(args) {
|
|
36
|
+
const cleaned = stripJsdocTags(args.prose);
|
|
37
|
+
if (TEST_FILE_RE.test(args.file)) {
|
|
38
|
+
return { drop: true, reason: "test-file", cleanedProse: cleaned };
|
|
39
|
+
}
|
|
40
|
+
if (GENERATED_DIR_RE.test(args.file)) {
|
|
41
|
+
return { drop: true, reason: "generated-dir", cleanedProse: cleaned };
|
|
42
|
+
}
|
|
43
|
+
if (ARCHIVE_DIR_RE.test(args.file)) {
|
|
44
|
+
return { drop: true, reason: "archive-dir", cleanedProse: cleaned };
|
|
45
|
+
}
|
|
46
|
+
if (matchesAnyGlob(args.file, args.offLimitsGlobs)) {
|
|
47
|
+
return { drop: true, reason: "off-limits-glob", cleanedProse: cleaned };
|
|
48
|
+
}
|
|
49
|
+
if (args.source_kind === "comment") {
|
|
50
|
+
if (isJsxBlockComment(args.raw, args.file)) {
|
|
51
|
+
return { drop: true, reason: "jsx-block-comment", cleanedProse: cleaned };
|
|
52
|
+
}
|
|
53
|
+
if (isLicenseHeader(args.raw ?? args.prose)) {
|
|
54
|
+
return { drop: true, reason: "license-header", cleanedProse: cleaned };
|
|
55
|
+
}
|
|
56
|
+
if (isJsdocTagOnly(args.prose, cleaned)) {
|
|
57
|
+
return { drop: true, reason: "jsdoc-tag-only", cleanedProse: cleaned };
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (isTodoOrBannerOnly(cleaned)) {
|
|
61
|
+
return { drop: true, reason: "todo-or-banner-only", cleanedProse: cleaned };
|
|
62
|
+
}
|
|
63
|
+
if (countWords(cleaned) < MIN_WORDS_AFTER_TAG_STRIP) {
|
|
64
|
+
return { drop: true, reason: "below-minimum-prose", cleanedProse: cleaned };
|
|
65
|
+
}
|
|
66
|
+
return { drop: false, cleanedProse: cleaned };
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Strip JSDoc-style scaffolding tag lines from prose. Defensive layer
|
|
70
|
+
* before the LLM ever sees them; validators downstream still check.
|
|
71
|
+
*/
|
|
72
|
+
export function stripJsdocTags(prose) {
|
|
73
|
+
return prose
|
|
74
|
+
.replace(JSDOC_TAG_LINE_RE, "")
|
|
75
|
+
.split("\n")
|
|
76
|
+
.map((line) => line.trimEnd())
|
|
77
|
+
.join("\n")
|
|
78
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
79
|
+
.trim();
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* JSX block comments live inside `{/* … *\/}` — they're UI annotations
|
|
83
|
+
* and almost never carry decision-bearing prose. Detection: the file
|
|
84
|
+
* is .tsx/.jsx and either the raw block starts with `{/` (the JSX
|
|
85
|
+
* wrapper got captured) or the surrounding chars are JSX brackets.
|
|
86
|
+
*
|
|
87
|
+
* Conservative — when the source is not .tsx/.jsx we never flag this
|
|
88
|
+
* reason, since `/* … *\/` in normal .ts is fine.
|
|
89
|
+
*/
|
|
90
|
+
function isJsxBlockComment(raw, file) {
|
|
91
|
+
if (!/\.(?:[tj])sx$/.test(file))
|
|
92
|
+
return false;
|
|
93
|
+
if (raw === undefined)
|
|
94
|
+
return false;
|
|
95
|
+
const head = raw.trimStart().slice(0, 4);
|
|
96
|
+
return head.startsWith("{/*");
|
|
97
|
+
}
|
|
98
|
+
function isLicenseHeader(raw) {
|
|
99
|
+
const head = raw.slice(0, 1500);
|
|
100
|
+
return SPDX_LICENSE_RE.test(head) || COPYRIGHT_RE.test(head);
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Returns true when the JSDoc block carries scaffolding tags but
|
|
104
|
+
* < MIN_WORDS_AFTER_TAG_STRIP words of real prose. Many JSDoc blocks
|
|
105
|
+
* are pure `@param`/`@returns` lists with a one-line summary —
|
|
106
|
+
* curator should drop those rather than dispatch a Sonnet call.
|
|
107
|
+
*/
|
|
108
|
+
function isJsdocTagOnly(originalProse, cleaned) {
|
|
109
|
+
const hadTag = JSDOC_TAG_LINE_RE.test(originalProse);
|
|
110
|
+
if (!hadTag)
|
|
111
|
+
return false;
|
|
112
|
+
return countWords(cleaned) < MIN_WORDS_AFTER_TAG_STRIP;
|
|
113
|
+
}
|
|
114
|
+
function isTodoOrBannerOnly(cleaned) {
|
|
115
|
+
if (cleaned.length === 0)
|
|
116
|
+
return true;
|
|
117
|
+
const lines = cleaned
|
|
118
|
+
.split("\n")
|
|
119
|
+
.map((l) => l.trim())
|
|
120
|
+
.filter((l) => l.length > 0);
|
|
121
|
+
if (lines.length === 0)
|
|
122
|
+
return true;
|
|
123
|
+
let nonTodo = 0;
|
|
124
|
+
for (const line of lines) {
|
|
125
|
+
if (TODO_BANNER_RE.test(line))
|
|
126
|
+
continue;
|
|
127
|
+
if (PURE_BANNER_RE.test(line))
|
|
128
|
+
continue;
|
|
129
|
+
nonTodo += 1;
|
|
130
|
+
}
|
|
131
|
+
return nonTodo === 0;
|
|
132
|
+
}
|
|
133
|
+
function countWords(prose) {
|
|
134
|
+
const stripped = prose.replace(/[^\p{L}\p{N}\s]/gu, " ");
|
|
135
|
+
const tokens = stripped.split(/\s+/).filter((t) => t.length > 0);
|
|
136
|
+
return tokens.length;
|
|
137
|
+
}
|
|
138
|
+
// Lightweight glob match — supports `*` and `**` segment wildcards,
|
|
139
|
+
// good enough for `off_limits_globs` patterns like `vendor` or `dist`
|
|
140
|
+
// at any depth. Not a full minimatch; the curator only needs
|
|
141
|
+
// prefix-style matching.
|
|
142
|
+
function matchesAnyGlob(file, globs) {
|
|
143
|
+
if (globs === undefined || globs.length === 0)
|
|
144
|
+
return false;
|
|
145
|
+
for (const g of globs) {
|
|
146
|
+
if (matchesGlob(file, g))
|
|
147
|
+
return true;
|
|
148
|
+
}
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
function matchesGlob(file, glob) {
|
|
152
|
+
const re = globToRegex(glob);
|
|
153
|
+
return re.test(file);
|
|
154
|
+
}
|
|
155
|
+
function globToRegex(glob) {
|
|
156
|
+
// Translate ** → `.*`, * → `[^/]*`, escape everything else.
|
|
157
|
+
let out = "";
|
|
158
|
+
for (let i = 0; i < glob.length; i++) {
|
|
159
|
+
const ch = glob[i];
|
|
160
|
+
if (ch === "*") {
|
|
161
|
+
if (glob[i + 1] === "*") {
|
|
162
|
+
out += ".*";
|
|
163
|
+
i += 1;
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
out += "[^/]*";
|
|
167
|
+
}
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
if (ch === "?") {
|
|
171
|
+
out += "[^/]";
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
if (ch === undefined)
|
|
175
|
+
continue;
|
|
176
|
+
if (".+^$()[]{}|\\".includes(ch)) {
|
|
177
|
+
out += `\\${ch}`;
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
out += ch;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
return new RegExp(`^${out}$`);
|
|
184
|
+
}
|
|
185
|
+
//# sourceMappingURL=regex-prefilter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"regex-prefilter.js","sourceRoot":"","sources":["../../../src/init/curator/regex-prefilter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,MAAM,YAAY,GAChB,iHAAiH,CAAC;AACpH,MAAM,gBAAgB,GACpB,mEAAmE,CAAC;AACtE,MAAM,cAAc,GAAG,2CAA2C,CAAC;AACnE,MAAM,cAAc,GAAG,oCAAoC,CAAC;AAC5D,MAAM,cAAc,GAAG,YAAY,CAAC;AACpC,MAAM,eAAe,GAAG,qEAAqE,CAAC;AAC9F,MAAM,YAAY,GAAG,gBAAgB,CAAC;AACtC,MAAM,iBAAiB,GACrB,yUAAyU,CAAC;AAa5U,MAAM,yBAAyB,GAAG,EAAE,CAAC;AA0BrC,MAAM,UAAU,cAAc,CAAC,IAAmB;IAChD,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAE3C,IAAI,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACjC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IACpE,CAAC;IACD,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACrC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,eAAe,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IACxE,CAAC;IACD,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACnC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,aAAa,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IACtE,CAAC;IACD,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC;QACnD,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,iBAAiB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IAC1E,CAAC;IAED,IAAI,IAAI,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;QACnC,IAAI,iBAAiB,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,mBAAmB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;QAC5E,CAAC;QACD,IAAI,eAAe,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5C,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,gBAAgB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;QACzE,CAAC;QACD,IAAI,cAAc,CAAC,IAAI,CAAC,KAAK,EAAE,OAAO,CAAC,EAAE,CAAC;YACxC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,gBAAgB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;QACzE,CAAC;IACH,CAAC;IAED,IAAI,kBAAkB,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,qBAAqB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IAC9E,CAAC;IACD,IAAI,UAAU,CAAC,OAAO,CAAC,GAAG,yBAAyB,EAAE,CAAC;QACpD,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,qBAAqB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IAC9E,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;AAChD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,OAAO,KAAK;SACT,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC;SAC9B,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;SAC7B,IAAI,CAAC,IAAI,CAAC;SACV,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,IAAI,EAAE,CAAC;AACZ,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,iBAAiB,CAAC,GAAuB,EAAE,IAAY;IAC9D,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAC9C,IAAI,GAAG,KAAK,SAAS;QAAE,OAAO,KAAK,CAAC;IACpC,MAAM,IAAI,GAAG,GAAG,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACzC,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,eAAe,CAAC,GAAW;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAChC,OAAO,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC/D,CAAC;AAED;;;;;GAKG;AACH,SAAS,cAAc,CAAC,aAAqB,EAAE,OAAe;IAC5D,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACrD,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,OAAO,UAAU,CAAC,OAAO,CAAC,GAAG,yBAAyB,CAAC;AACzD,CAAC;AAED,SAAS,kBAAkB,CAAC,OAAe;IACzC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,KAAK,GAAG,OAAO;SAClB,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC/B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACpC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,SAAS;QACxC,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,SAAS;QACxC,OAAO,IAAI,CAAC,CAAC;IACf,CAAC;IACD,OAAO,OAAO,KAAK,CAAC,CAAC;AACvB,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,mBAAmB,EAAE,GAAG,CAAC,CAAC;IACzD,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACjE,OAAO,MAAM,CAAC,MAAM,CAAC;AACvB,CAAC;AAED,oEAAoE;AACpE,sEAAsE;AACtE,6DAA6D;AAC7D,yBAAyB;AACzB,SAAS,cAAc,CAAC,IAAY,EAAE,KAAgB;IACpD,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAC5D,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC;YAAE,OAAO,IAAI,CAAC;IACxC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,WAAW,CAAC,IAAY,EAAE,IAAY;IAC7C,MAAM,EAAE,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAC7B,OAAO,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACvB,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,4DAA4D;IAC5D,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;gBACxB,GAAG,IAAI,IAAI,CAAC;gBACZ,CAAC,IAAI,CAAC,CAAC;YACT,CAAC;iBAAM,CAAC;gBACN,GAAG,IAAI,OAAO,CAAC;YACjB,CAAC;YACD,SAAS;QACX,CAAC;QACD,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,GAAG,IAAI,MAAM,CAAC;YACd,SAAS;QACX,CAAC;QACD,IAAI,EAAE,KAAK,SAAS;YAAE,SAAS;QAC/B,IAAI,eAAe,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,GAAG,IAAI,KAAK,EAAE,EAAE,CAAC;QACnB,CAAC;aAAM,CAAC;YACN,GAAG,IAAI,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,OAAO,IAAI,MAAM,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC;AAChC,CAAC"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Curator pipeline — strict per-entry validators.
|
|
3
|
+
*
|
|
4
|
+
* Phase 9c-emit feeds every reducer-output entry through `validateEntry`
|
|
5
|
+
* before writing it to `.cairn/ground/decisions/` or
|
|
6
|
+
* `.cairn/ground/invariants/`. Operator's auto-accept directive
|
|
7
|
+
* (curator plan §"Decision log" Q2) requires a high quality bar — when
|
|
8
|
+
* an entry fails any check it is dropped silently with a counter
|
|
9
|
+
* logged, never falling back to `_inbox/`.
|
|
10
|
+
*
|
|
11
|
+
* Failure modes encoded by `rejectReason`:
|
|
12
|
+
* - `title-length` — empty or > 80 chars
|
|
13
|
+
* - `title-no-cap` — does not start with an uppercase letter
|
|
14
|
+
* - `title-trailing-punct` — ends in `,` `:` `;`
|
|
15
|
+
* - `title-truncated-or-jsx` — ends in `...` or starts with `{/*` (JSX
|
|
16
|
+
* block-comment leakage)
|
|
17
|
+
* - `body-missing-<section>` — required heading missing
|
|
18
|
+
* - `jsdoc-tag-leak` — body contains `@domain`, `@orgScope`,
|
|
19
|
+
* `@softDelete`, `@see`, `@param`,
|
|
20
|
+
* `@returns`, `@throws` (curator pasted
|
|
21
|
+
* raw scaffolding)
|
|
22
|
+
* - `title-pasted-in-body` — body contains the title verbatim
|
|
23
|
+
* (indicates unsynthesized pass-through)
|
|
24
|
+
* - `no-scope-globs` — empty `scope_globs`
|
|
25
|
+
* - `no-evidence` — empty `evidence_files`
|
|
26
|
+
* - `evidence-missing:<path>` — cited evidence file does not exist
|
|
27
|
+
*/
|
|
28
|
+
export interface FinalEntry {
|
|
29
|
+
kind: "DEC" | "INV";
|
|
30
|
+
title: string;
|
|
31
|
+
body: string;
|
|
32
|
+
scope_globs: string[];
|
|
33
|
+
evidence_files: string[];
|
|
34
|
+
topic_tags: string[];
|
|
35
|
+
}
|
|
36
|
+
export interface ValidationResult {
|
|
37
|
+
valid: boolean;
|
|
38
|
+
rejectReason?: string;
|
|
39
|
+
}
|
|
40
|
+
export declare function validateEntry(e: FinalEntry, repoRoot: string): ValidationResult;
|
|
41
|
+
/**
|
|
42
|
+
* Strip the `:42-58` or `#L42-L58` line-range suffix off an evidence
|
|
43
|
+
* file reference. Both forms are accepted on input; the validator
|
|
44
|
+
* resolves to the bare path before checking file existence.
|
|
45
|
+
*/
|
|
46
|
+
export declare function stripLineRange(evidenceFile: string): string;
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Curator pipeline — strict per-entry validators.
|
|
3
|
+
*
|
|
4
|
+
* Phase 9c-emit feeds every reducer-output entry through `validateEntry`
|
|
5
|
+
* before writing it to `.cairn/ground/decisions/` or
|
|
6
|
+
* `.cairn/ground/invariants/`. Operator's auto-accept directive
|
|
7
|
+
* (curator plan §"Decision log" Q2) requires a high quality bar — when
|
|
8
|
+
* an entry fails any check it is dropped silently with a counter
|
|
9
|
+
* logged, never falling back to `_inbox/`.
|
|
10
|
+
*
|
|
11
|
+
* Failure modes encoded by `rejectReason`:
|
|
12
|
+
* - `title-length` — empty or > 80 chars
|
|
13
|
+
* - `title-no-cap` — does not start with an uppercase letter
|
|
14
|
+
* - `title-trailing-punct` — ends in `,` `:` `;`
|
|
15
|
+
* - `title-truncated-or-jsx` — ends in `...` or starts with `{/*` (JSX
|
|
16
|
+
* block-comment leakage)
|
|
17
|
+
* - `body-missing-<section>` — required heading missing
|
|
18
|
+
* - `jsdoc-tag-leak` — body contains `@domain`, `@orgScope`,
|
|
19
|
+
* `@softDelete`, `@see`, `@param`,
|
|
20
|
+
* `@returns`, `@throws` (curator pasted
|
|
21
|
+
* raw scaffolding)
|
|
22
|
+
* - `title-pasted-in-body` — body contains the title verbatim
|
|
23
|
+
* (indicates unsynthesized pass-through)
|
|
24
|
+
* - `no-scope-globs` — empty `scope_globs`
|
|
25
|
+
* - `no-evidence` — empty `evidence_files`
|
|
26
|
+
* - `evidence-missing:<path>` — cited evidence file does not exist
|
|
27
|
+
*/
|
|
28
|
+
import { existsSync } from "node:fs";
|
|
29
|
+
import { join } from "node:path";
|
|
30
|
+
export function validateEntry(e, repoRoot) {
|
|
31
|
+
// Title — order matters: catch the most-specific failure modes
|
|
32
|
+
// (truncation / JSX leakage) BEFORE the generic capitalization check
|
|
33
|
+
// so a JSX-prefixed title doesn't get reported as merely
|
|
34
|
+
// `title-no-cap`.
|
|
35
|
+
if (e.title.length === 0 || e.title.length > 80) {
|
|
36
|
+
return { valid: false, rejectReason: "title-length" };
|
|
37
|
+
}
|
|
38
|
+
if (/\.\.\.$/.test(e.title) || /^\{\/\*/.test(e.title)) {
|
|
39
|
+
return { valid: false, rejectReason: "title-truncated-or-jsx" };
|
|
40
|
+
}
|
|
41
|
+
if (!/^[A-Z]/.test(e.title)) {
|
|
42
|
+
return { valid: false, rejectReason: "title-no-cap" };
|
|
43
|
+
}
|
|
44
|
+
if (/[,:;]$/.test(e.title)) {
|
|
45
|
+
return { valid: false, rejectReason: "title-trailing-punct" };
|
|
46
|
+
}
|
|
47
|
+
// Body sections
|
|
48
|
+
const requiredSections = e.kind === "INV"
|
|
49
|
+
? ["## Context", "## Invariant", "## Why"]
|
|
50
|
+
: ["## Context", "## Decision", "## Why"];
|
|
51
|
+
for (const sec of requiredSections) {
|
|
52
|
+
if (!e.body.includes(sec)) {
|
|
53
|
+
return { valid: false, rejectReason: `body-missing-${sec}` };
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
// No JSDoc tag leak
|
|
57
|
+
if (/@(domain|orgScope|softDelete|see|param|returns|throws)\b/.test(e.body)) {
|
|
58
|
+
return { valid: false, rejectReason: "jsdoc-tag-leak" };
|
|
59
|
+
}
|
|
60
|
+
// No title-in-body paste
|
|
61
|
+
if (e.body.includes(e.title)) {
|
|
62
|
+
return { valid: false, rejectReason: "title-pasted-in-body" };
|
|
63
|
+
}
|
|
64
|
+
// scope_globs nonempty
|
|
65
|
+
if (e.scope_globs.length === 0) {
|
|
66
|
+
return { valid: false, rejectReason: "no-scope-globs" };
|
|
67
|
+
}
|
|
68
|
+
// evidence_files nonempty + exist
|
|
69
|
+
if (e.evidence_files.length === 0) {
|
|
70
|
+
return { valid: false, rejectReason: "no-evidence" };
|
|
71
|
+
}
|
|
72
|
+
for (const ev of e.evidence_files) {
|
|
73
|
+
const path = stripLineRange(ev);
|
|
74
|
+
if (!existsSync(join(repoRoot, path))) {
|
|
75
|
+
return { valid: false, rejectReason: `evidence-missing:${path}` };
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return { valid: true };
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Strip the `:42-58` or `#L42-L58` line-range suffix off an evidence
|
|
82
|
+
* file reference. Both forms are accepted on input; the validator
|
|
83
|
+
* resolves to the bare path before checking file existence.
|
|
84
|
+
*/
|
|
85
|
+
export function stripLineRange(evidenceFile) {
|
|
86
|
+
// GitHub-style anchor: `path/to/file.ts#L42-L58`
|
|
87
|
+
const hashIdx = evidenceFile.indexOf("#");
|
|
88
|
+
if (hashIdx !== -1)
|
|
89
|
+
return evidenceFile.slice(0, hashIdx);
|
|
90
|
+
// Colon-style range: `path/to/file.ts:42-58`. Only strip when the
|
|
91
|
+
// suffix is digit-only (avoids clobbering Windows drive letters
|
|
92
|
+
// `C:/...` — but those are absolute and should never appear in a
|
|
93
|
+
// repo-relative evidence path; defensive anyway).
|
|
94
|
+
const colonIdx = evidenceFile.lastIndexOf(":");
|
|
95
|
+
if (colonIdx > 1 && /^\d+(?:-\d+)?$/.test(evidenceFile.slice(colonIdx + 1))) {
|
|
96
|
+
return evidenceFile.slice(0, colonIdx);
|
|
97
|
+
}
|
|
98
|
+
return evidenceFile;
|
|
99
|
+
}
|
|
100
|
+
//# sourceMappingURL=validate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validate.js","sourceRoot":"","sources":["../../../src/init/curator/validate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAgBjC,MAAM,UAAU,aAAa,CAC3B,CAAa,EACb,QAAgB;IAEhB,+DAA+D;IAC/D,qEAAqE;IACrE,yDAAyD;IACzD,kBAAkB;IAClB,IAAI,CAAC,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QAChD,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,CAAC;IACxD,CAAC;IACD,IAAI,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;QACvD,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,wBAAwB,EAAE,CAAC;IAClE,CAAC;IACD,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,CAAC;IACxD,CAAC;IACD,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,sBAAsB,EAAE,CAAC;IAChE,CAAC;IAED,gBAAgB;IAChB,MAAM,gBAAgB,GAAG,CAAC,CAAC,IAAI,KAAK,KAAK;QACvC,CAAC,CAAC,CAAC,YAAY,EAAE,cAAc,EAAE,QAAQ,CAAC;QAC1C,CAAC,CAAC,CAAC,YAAY,EAAE,aAAa,EAAE,QAAQ,CAAC,CAAC;IAC5C,KAAK,MAAM,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACnC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,gBAAgB,GAAG,EAAE,EAAE,CAAC;QAC/D,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,IAAI,0DAA0D,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5E,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,gBAAgB,EAAE,CAAC;IAC1D,CAAC;IAED,yBAAyB;IACzB,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,sBAAsB,EAAE,CAAC;IAChE,CAAC;IAED,uBAAuB;IACvB,IAAI,CAAC,CAAC,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,gBAAgB,EAAE,CAAC;IAC1D,CAAC;IAED,kCAAkC;IAClC,IAAI,CAAC,CAAC,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAClC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,CAAC;IACvD,CAAC;IACD,KAAK,MAAM,EAAE,IAAI,CAAC,CAAC,cAAc,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,cAAc,CAAC,EAAE,CAAC,CAAC;QAChC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC;YACtC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,oBAAoB,IAAI,EAAE,EAAE,CAAC;QACpE,CAAC;IACH,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;AACzB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,YAAoB;IACjD,iDAAiD;IACjD,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC1C,IAAI,OAAO,KAAK,CAAC,CAAC;QAAE,OAAO,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IAC1D,kEAAkE;IAClE,gEAAgE;IAChE,iEAAiE;IACjE,kDAAkD;IAClD,MAAM,QAAQ,GAAG,YAAY,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC/C,IAAI,QAAQ,GAAG,CAAC,IAAI,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5E,OAAO,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Curator pipeline — Phase 9a-walker top-level (v0.9.0).
|
|
3
|
+
*
|
|
4
|
+
* Builds the unified corpus by running three sub-walkers:
|
|
5
|
+
*
|
|
6
|
+
* - Source comments (existing `walkSourceComments`) — essay-class
|
|
7
|
+
* block comments per source file
|
|
8
|
+
* - Doc paragraphs (existing `discoverDocs` + paragraph splitter)
|
|
9
|
+
* — README + docs/**\/*.md paragraphs ≥80 chars
|
|
10
|
+
* - Rule sections (existing `discoverRuleSources` +
|
|
11
|
+
* `parseRuleSections`) — H2/H3 sections from CLAUDE.md /
|
|
12
|
+
* AGENTS.md / .claude/rules/**\/*.md
|
|
13
|
+
*
|
|
14
|
+
* Each candidate runs through the regex pre-filter (`regex-prefilter.ts`)
|
|
15
|
+
* which drops 60-80% of raw blocks (test files, JSX comments, license
|
|
16
|
+
* headers, JSDoc with only @tags, etc.). Survivors get written to
|
|
17
|
+
* `.cairn/init/curator/corpus.jsonl` and packed into shards capped
|
|
18
|
+
* at 120k input tokens (`shards.json`).
|
|
19
|
+
*/
|
|
20
|
+
export interface RunCuratorWalkerArgs {
|
|
21
|
+
repoRoot: string;
|
|
22
|
+
}
|
|
23
|
+
export interface RunCuratorWalkerResult {
|
|
24
|
+
corpus_path: string;
|
|
25
|
+
shards_path: string;
|
|
26
|
+
records_total: number;
|
|
27
|
+
records_by_kind: {
|
|
28
|
+
comment: number;
|
|
29
|
+
doc: number;
|
|
30
|
+
rule: number;
|
|
31
|
+
};
|
|
32
|
+
dropped: Record<string, number>;
|
|
33
|
+
shards: number;
|
|
34
|
+
total_input_tokens_estimate: number;
|
|
35
|
+
}
|
|
36
|
+
export declare function runCuratorWalker(args: RunCuratorWalkerArgs): Promise<RunCuratorWalkerResult>;
|