@isaacriehm/cairn-core 0.8.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/dist/.tsbuildinfo +1 -1
  2. package/dist/attention/bulk-accept.d.ts +0 -2
  3. package/dist/attention/bulk-accept.js +0 -3
  4. package/dist/attention/bulk-accept.js.map +1 -1
  5. package/dist/attention/scoring.d.ts +1 -3
  6. package/dist/attention/scoring.js +1 -12
  7. package/dist/attention/scoring.js.map +1 -1
  8. package/dist/claude/cache.js +1 -0
  9. package/dist/claude/cache.js.map +1 -1
  10. package/dist/claude/runner.js +25 -1
  11. package/dist/claude/runner.js.map +1 -1
  12. package/dist/claude/types.d.ts +8 -0
  13. package/dist/hooks/runners/payload.d.ts +10 -0
  14. package/dist/hooks/runners/payload.js +13 -0
  15. package/dist/hooks/runners/payload.js.map +1 -1
  16. package/dist/hooks/runners/session-end.js +2 -4
  17. package/dist/hooks/runners/session-end.js.map +1 -1
  18. package/dist/hooks/runners/session-start.js +48 -4
  19. package/dist/hooks/runners/session-start.js.map +1 -1
  20. package/dist/init/brand-derive.js +6 -1
  21. package/dist/init/brand-derive.js.map +1 -1
  22. package/dist/init/brand-setup.d.ts +12 -1
  23. package/dist/init/brand-setup.js +36 -1
  24. package/dist/init/brand-setup.js.map +1 -1
  25. package/dist/init/curator/corpus.d.ts +92 -0
  26. package/dist/init/curator/corpus.js +171 -0
  27. package/dist/init/curator/corpus.js.map +1 -0
  28. package/dist/init/curator/emit.d.ts +42 -0
  29. package/dist/init/curator/emit.js +230 -0
  30. package/dist/init/curator/emit.js.map +1 -0
  31. package/dist/init/curator/index.d.ts +1 -0
  32. package/dist/init/curator/index.js +2 -0
  33. package/dist/init/curator/index.js.map +1 -0
  34. package/dist/init/curator/regex-prefilter.d.ts +54 -0
  35. package/dist/init/curator/regex-prefilter.js +185 -0
  36. package/dist/init/curator/regex-prefilter.js.map +1 -0
  37. package/dist/init/curator/validate.d.ts +46 -0
  38. package/dist/init/curator/validate.js +100 -0
  39. package/dist/init/curator/validate.js.map +1 -0
  40. package/dist/init/curator/walker.d.ts +36 -0
  41. package/dist/init/curator/walker.js +380 -0
  42. package/dist/init/curator/walker.js.map +1 -0
  43. package/dist/init/eta-calibration.d.ts +39 -0
  44. package/dist/init/eta-calibration.js +143 -0
  45. package/dist/init/eta-calibration.js.map +1 -0
  46. package/dist/init/index.d.ts +3 -2
  47. package/dist/init/index.js +2 -1
  48. package/dist/init/index.js.map +1 -1
  49. package/dist/init/init.js +4 -20
  50. package/dist/init/init.js.map +1 -1
  51. package/dist/init/mapper-merge.d.ts +4 -6
  52. package/dist/init/mapper-merge.js +11 -34
  53. package/dist/init/mapper-merge.js.map +1 -1
  54. package/dist/init/mapper-parallel.d.ts +0 -1
  55. package/dist/init/mapper-parallel.js +7 -6
  56. package/dist/init/mapper-parallel.js.map +1 -1
  57. package/dist/init/mapper-prompts.d.ts +1 -4
  58. package/dist/init/mapper-prompts.js +2 -6
  59. package/dist/init/mapper-prompts.js.map +1 -1
  60. package/dist/init/mapper.d.ts +8 -7
  61. package/dist/init/mapper.js +23 -15
  62. package/dist/init/mapper.js.map +1 -1
  63. package/dist/init/overlay.js +0 -1
  64. package/dist/init/overlay.js.map +1 -1
  65. package/dist/init/phases/10-rules-merge.d.ts +7 -2
  66. package/dist/init/phases/10-rules-merge.js +18 -45
  67. package/dist/init/phases/10-rules-merge.js.map +1 -1
  68. package/dist/init/phases/13-multidev.d.ts +5 -1
  69. package/dist/init/phases/13-multidev.js +23 -2
  70. package/dist/init/phases/13-multidev.js.map +1 -1
  71. package/dist/init/phases/4-seed.js +1 -2
  72. package/dist/init/phases/4-seed.js.map +1 -1
  73. package/dist/init/phases/5-preflight.d.ts +42 -0
  74. package/dist/init/phases/5-preflight.js +244 -0
  75. package/dist/init/phases/5-preflight.js.map +1 -0
  76. package/dist/init/phases/7-topic-index.d.ts +6 -0
  77. package/dist/init/phases/7-topic-index.js +13 -0
  78. package/dist/init/phases/7-topic-index.js.map +1 -1
  79. package/dist/init/phases/8-docs-ingest.d.ts +6 -5
  80. package/dist/init/phases/8-docs-ingest.js +17 -56
  81. package/dist/init/phases/8-docs-ingest.js.map +1 -1
  82. package/dist/init/phases/9a-walker.d.ts +15 -0
  83. package/dist/init/phases/9a-walker.js +63 -0
  84. package/dist/init/phases/9a-walker.js.map +1 -0
  85. package/dist/init/phases/9b-curate.d.ts +19 -0
  86. package/dist/init/phases/9b-curate.js +79 -0
  87. package/dist/init/phases/9b-curate.js.map +1 -0
  88. package/dist/init/phases/9c-emit.d.ts +13 -0
  89. package/dist/init/phases/9c-emit.js +57 -0
  90. package/dist/init/phases/9c-emit.js.map +1 -0
  91. package/dist/init/phases/index.d.ts +6 -5
  92. package/dist/init/phases/index.js +4 -4
  93. package/dist/init/phases/index.js.map +1 -1
  94. package/dist/init/phases/mapper-output-io.d.ts +5 -5
  95. package/dist/init/phases/mapper-output-io.js +5 -5
  96. package/dist/init/phases/orchestrator.js +1 -1
  97. package/dist/init/phases/state-io.js +1 -1
  98. package/dist/init/phases/types.d.ts +79 -13
  99. package/dist/init/phases/types.js +4 -2
  100. package/dist/init/phases/types.js.map +1 -1
  101. package/dist/init/source-comments/ingest.d.ts +0 -2
  102. package/dist/init/source-comments/ingest.js.map +1 -1
  103. package/dist/init/source-comments/walker.js +2 -2
  104. package/dist/init/topic-index/index.d.ts +8 -0
  105. package/dist/init/topic-index/index.js +10 -2
  106. package/dist/init/topic-index/index.js.map +1 -1
  107. package/dist/init/topic-index/judge.d.ts +15 -0
  108. package/dist/init/topic-index/judge.js +15 -1
  109. package/dist/init/topic-index/judge.js.map +1 -1
  110. package/dist/init/topic-index/resolve.js +41 -14
  111. package/dist/init/topic-index/resolve.js.map +1 -1
  112. package/dist/init/walker.d.ts +1 -1
  113. package/dist/init/walker.js +1 -1
  114. package/dist/init/workflow-block.d.ts +5 -6
  115. package/dist/init/workflow-block.js +5 -9
  116. package/dist/init/workflow-block.js.map +1 -1
  117. package/dist/mcp/tools/bulk-accept-attention.d.ts +1 -1
  118. package/dist/mcp/tools/bulk-accept-attention.js +4 -6
  119. package/dist/mcp/tools/bulk-accept-attention.js.map +1 -1
  120. package/dist/mcp/tools/init-phases.d.ts +7 -6
  121. package/dist/mcp/tools/init-phases.js +34 -59
  122. package/dist/mcp/tools/init-phases.js.map +1 -1
  123. package/dist/session-start/build.js +47 -5
  124. package/dist/session-start/build.js.map +1 -1
  125. package/package.json +2 -2
  126. package/templates/.cairn/config/trust-policy.yaml +0 -3
  127. package/templates/.cairn/config/workflow.md +0 -1
  128. package/templates/.cairn/ground/canonical-map/topics.yaml +0 -12
  129. package/dist/init/phases/5-pilot.d.ts +0 -10
  130. package/dist/init/phases/5-pilot.js +0 -108
  131. package/dist/init/phases/5-pilot.js.map +0 -1
  132. package/dist/init/phases/9-source-comments.d.ts +0 -6
  133. package/dist/init/phases/9-source-comments.js +0 -67
  134. package/dist/init/phases/9-source-comments.js.map +0 -1
  135. package/dist/init/phases/parallel-8910.d.ts +0 -27
  136. package/dist/init/phases/parallel-8910.js +0 -197
  137. package/dist/init/phases/parallel-8910.js.map +0 -1
  138. package/dist/init/phases/source-comments-output-io.d.ts +0 -89
  139. package/dist/init/phases/source-comments-output-io.js +0 -81
  140. package/dist/init/phases/source-comments-output-io.js.map +0 -1
  141. package/templates/.cairn/ground/capabilities/mcp-tools.yaml +0 -29
  142. package/templates/.cairn/ground/capabilities/skills.yaml +0 -25
  143. package/templates/.cairn/ground/capabilities/snippets.yaml +0 -29
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Curator pipeline — regex pre-filter (Phase 9a-walker).
3
+ *
4
+ * Drops corpus records that the curator subagents would only classify
5
+ * as noise:
6
+ * - test files / fixtures / snapshots
7
+ * - generated / build / vendor / migrations
8
+ * - .archive/ + .planning/archive/
9
+ * - JSX block comments (lots of UI annotation noise)
10
+ * - license / SPDX headers
11
+ * - JSDoc with only @param/@returns/@see/@throws and < 30 words prose
12
+ * - TODO-only or banner-only comments
13
+ *
14
+ * Mapper.off_limits_globs is also applied (so the operator's own
15
+ * exclusion list filters the corpus too).
16
+ *
17
+ * Plus a pure-function `stripJsdocTags` that strips `@domain`,
18
+ * `@orgScope`, `@softDelete`, `@see`, `@param`, `@returns`, `@throws`
19
+ * scaffolding lines from prose so they don't leak into reducer
20
+ * output. Validators downstream re-check `jsdoc-tag-leak`; this is
21
+ * the defense-in-depth at the walker level.
22
+ *
23
+ * Drop targets (from curator plan):
24
+ * 60–80% of the raw corpus is expected to drop here.
25
+ */
26
+ export type DropReason = "test-file" | "generated-dir" | "archive-dir" | "off-limits-glob" | "jsx-block-comment" | "license-header" | "jsdoc-tag-only" | "todo-or-banner-only" | "below-minimum-prose";
27
+ export interface PrefilterArgs {
28
+ /** Repo-relative path the block came from. */
29
+ file: string;
30
+ /**
31
+ * Source-kind tag — `comment` records get the JSX + JSDoc-tag-only
32
+ * filters; `doc` and `rule` records skip those (paragraph + section
33
+ * shapes are different).
34
+ */
35
+ source_kind: "comment" | "doc" | "rule";
36
+ /** Cleaned prose (after the language-specific marker stripper). */
37
+ prose: string;
38
+ /** Raw block (carries surrounding context for the JSX detector). */
39
+ raw?: string;
40
+ /** Mapper off-limits globs to honor. Glob matching is prefix/contains-friendly. */
41
+ offLimitsGlobs?: string[];
42
+ }
43
+ export interface PrefilterResult {
44
+ drop: boolean;
45
+ reason?: DropReason;
46
+ /** Prose with JSDoc scaffolding tags stripped. Caller writes this to corpus.jsonl. */
47
+ cleanedProse: string;
48
+ }
49
+ export declare function applyPrefilter(args: PrefilterArgs): PrefilterResult;
50
+ /**
51
+ * Strip JSDoc-style scaffolding tag lines from prose. Defensive layer
52
+ * before the LLM ever sees them; validators downstream still check.
53
+ */
54
+ export declare function stripJsdocTags(prose: string): string;
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Curator pipeline — regex pre-filter (Phase 9a-walker).
3
+ *
4
+ * Drops corpus records that the curator subagents would only classify
5
+ * as noise:
6
+ * - test files / fixtures / snapshots
7
+ * - generated / build / vendor / migrations
8
+ * - .archive/ + .planning/archive/
9
+ * - JSX block comments (lots of UI annotation noise)
10
+ * - license / SPDX headers
11
+ * - JSDoc with only @param/@returns/@see/@throws and < 30 words prose
12
+ * - TODO-only or banner-only comments
13
+ *
14
+ * Mapper.off_limits_globs is also applied (so the operator's own
15
+ * exclusion list filters the corpus too).
16
+ *
17
+ * Plus a pure-function `stripJsdocTags` that strips `@domain`,
18
+ * `@orgScope`, `@softDelete`, `@see`, `@param`, `@returns`, `@throws`
19
+ * scaffolding lines from prose so they don't leak into reducer
20
+ * output. Validators downstream re-check `jsdoc-tag-leak`; this is
21
+ * the defense-in-depth at the walker level.
22
+ *
23
+ * Drop targets (from curator plan):
24
+ * 60–80% of the raw corpus is expected to drop here.
25
+ */
26
+ const TEST_FILE_RE = /(?:^|\/)(?:[^/]+\.(?:spec|test)\.(?:[tj]sx?|mjs|cjs)|__tests__\/|e2e\/|fixtures\/|snapshots?\/|__snapshots__\/)/;
27
+ const GENERATED_DIR_RE = /(?:^|\/)(?:migrations|dist|build|generated|vendor|node_modules)\//;
28
+ const ARCHIVE_DIR_RE = /(?:^|\/)\.(?:archive|planning\/archive)\//;
29
+ const TODO_BANNER_RE = /^\s*(?:TODO|FIXME|XXX|HACK|NOTE)\b/;
30
+ const PURE_BANNER_RE = /^[\s\W_]+$/;
31
+ const SPDX_LICENSE_RE = /\b(?:SPDX-License-Identifier|All rights reserved|Licensed under)\b/i;
32
+ const COPYRIGHT_RE = /\bcopyright\b/i;
33
+ const JSDOC_TAG_LINE_RE = /^\s*@(?:domain|orgScope|softDelete|see|param|returns?|throws?|example|deprecated|since|version|author|module|namespace|alias|constant|type|typedef|callback|exports|category|memberof|inheritdoc|override|fileoverview|api|public|private|protected|internal|readonly|abstract|static|access|todo|fires|listens|hideconstructor)\b.*$/gm;
34
+ const MIN_WORDS_AFTER_TAG_STRIP = 12;
35
+ export function applyPrefilter(args) {
36
+ const cleaned = stripJsdocTags(args.prose);
37
+ if (TEST_FILE_RE.test(args.file)) {
38
+ return { drop: true, reason: "test-file", cleanedProse: cleaned };
39
+ }
40
+ if (GENERATED_DIR_RE.test(args.file)) {
41
+ return { drop: true, reason: "generated-dir", cleanedProse: cleaned };
42
+ }
43
+ if (ARCHIVE_DIR_RE.test(args.file)) {
44
+ return { drop: true, reason: "archive-dir", cleanedProse: cleaned };
45
+ }
46
+ if (matchesAnyGlob(args.file, args.offLimitsGlobs)) {
47
+ return { drop: true, reason: "off-limits-glob", cleanedProse: cleaned };
48
+ }
49
+ if (args.source_kind === "comment") {
50
+ if (isJsxBlockComment(args.raw, args.file)) {
51
+ return { drop: true, reason: "jsx-block-comment", cleanedProse: cleaned };
52
+ }
53
+ if (isLicenseHeader(args.raw ?? args.prose)) {
54
+ return { drop: true, reason: "license-header", cleanedProse: cleaned };
55
+ }
56
+ if (isJsdocTagOnly(args.prose, cleaned)) {
57
+ return { drop: true, reason: "jsdoc-tag-only", cleanedProse: cleaned };
58
+ }
59
+ }
60
+ if (isTodoOrBannerOnly(cleaned)) {
61
+ return { drop: true, reason: "todo-or-banner-only", cleanedProse: cleaned };
62
+ }
63
+ if (countWords(cleaned) < MIN_WORDS_AFTER_TAG_STRIP) {
64
+ return { drop: true, reason: "below-minimum-prose", cleanedProse: cleaned };
65
+ }
66
+ return { drop: false, cleanedProse: cleaned };
67
+ }
68
+ /**
69
+ * Strip JSDoc-style scaffolding tag lines from prose. Defensive layer
70
+ * before the LLM ever sees them; validators downstream still check.
71
+ */
72
+ export function stripJsdocTags(prose) {
73
+ return prose
74
+ .replace(JSDOC_TAG_LINE_RE, "")
75
+ .split("\n")
76
+ .map((line) => line.trimEnd())
77
+ .join("\n")
78
+ .replace(/\n{3,}/g, "\n\n")
79
+ .trim();
80
+ }
81
+ /**
82
+ * JSX block comments live inside `{/* … *\/}` — they're UI annotations
83
+ * and almost never carry decision-bearing prose. Detection: the file
84
+ * is .tsx/.jsx and either the raw block starts with `{/` (the JSX
85
+ * wrapper got captured) or the surrounding chars are JSX brackets.
86
+ *
87
+ * Conservative — when the source is not .tsx/.jsx we never flag this
88
+ * reason, since `/* … *\/` in normal .ts is fine.
89
+ */
90
+ function isJsxBlockComment(raw, file) {
91
+ if (!/\.(?:[tj])sx$/.test(file))
92
+ return false;
93
+ if (raw === undefined)
94
+ return false;
95
+ const head = raw.trimStart().slice(0, 4);
96
+ return head.startsWith("{/*");
97
+ }
98
+ function isLicenseHeader(raw) {
99
+ const head = raw.slice(0, 1500);
100
+ return SPDX_LICENSE_RE.test(head) || COPYRIGHT_RE.test(head);
101
+ }
102
+ /**
103
+ * Returns true when the JSDoc block carries scaffolding tags but
104
+ * < MIN_WORDS_AFTER_TAG_STRIP words of real prose. Many JSDoc blocks
105
+ * are pure `@param`/`@returns` lists with a one-line summary —
106
+ * curator should drop those rather than dispatch a Sonnet call.
107
+ */
108
+ function isJsdocTagOnly(originalProse, cleaned) {
109
+ const hadTag = JSDOC_TAG_LINE_RE.test(originalProse);
110
+ if (!hadTag)
111
+ return false;
112
+ return countWords(cleaned) < MIN_WORDS_AFTER_TAG_STRIP;
113
+ }
114
+ function isTodoOrBannerOnly(cleaned) {
115
+ if (cleaned.length === 0)
116
+ return true;
117
+ const lines = cleaned
118
+ .split("\n")
119
+ .map((l) => l.trim())
120
+ .filter((l) => l.length > 0);
121
+ if (lines.length === 0)
122
+ return true;
123
+ let nonTodo = 0;
124
+ for (const line of lines) {
125
+ if (TODO_BANNER_RE.test(line))
126
+ continue;
127
+ if (PURE_BANNER_RE.test(line))
128
+ continue;
129
+ nonTodo += 1;
130
+ }
131
+ return nonTodo === 0;
132
+ }
133
+ function countWords(prose) {
134
+ const stripped = prose.replace(/[^\p{L}\p{N}\s]/gu, " ");
135
+ const tokens = stripped.split(/\s+/).filter((t) => t.length > 0);
136
+ return tokens.length;
137
+ }
138
+ // Lightweight glob match — supports `*` and `**` segment wildcards,
139
+ // good enough for `off_limits_globs` patterns like `vendor` or `dist`
140
+ // at any depth. Not a full minimatch; the curator only needs
141
+ // prefix-style matching.
142
+ function matchesAnyGlob(file, globs) {
143
+ if (globs === undefined || globs.length === 0)
144
+ return false;
145
+ for (const g of globs) {
146
+ if (matchesGlob(file, g))
147
+ return true;
148
+ }
149
+ return false;
150
+ }
151
+ function matchesGlob(file, glob) {
152
+ const re = globToRegex(glob);
153
+ return re.test(file);
154
+ }
155
+ function globToRegex(glob) {
156
+ // Translate ** → `.*`, * → `[^/]*`, escape everything else.
157
+ let out = "";
158
+ for (let i = 0; i < glob.length; i++) {
159
+ const ch = glob[i];
160
+ if (ch === "*") {
161
+ if (glob[i + 1] === "*") {
162
+ out += ".*";
163
+ i += 1;
164
+ }
165
+ else {
166
+ out += "[^/]*";
167
+ }
168
+ continue;
169
+ }
170
+ if (ch === "?") {
171
+ out += "[^/]";
172
+ continue;
173
+ }
174
+ if (ch === undefined)
175
+ continue;
176
+ if (".+^$()[]{}|\\".includes(ch)) {
177
+ out += `\\${ch}`;
178
+ }
179
+ else {
180
+ out += ch;
181
+ }
182
+ }
183
+ return new RegExp(`^${out}$`);
184
+ }
185
+ //# sourceMappingURL=regex-prefilter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"regex-prefilter.js","sourceRoot":"","sources":["../../../src/init/curator/regex-prefilter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,MAAM,YAAY,GAChB,iHAAiH,CAAC;AACpH,MAAM,gBAAgB,GACpB,mEAAmE,CAAC;AACtE,MAAM,cAAc,GAAG,2CAA2C,CAAC;AACnE,MAAM,cAAc,GAAG,oCAAoC,CAAC;AAC5D,MAAM,cAAc,GAAG,YAAY,CAAC;AACpC,MAAM,eAAe,GAAG,qEAAqE,CAAC;AAC9F,MAAM,YAAY,GAAG,gBAAgB,CAAC;AACtC,MAAM,iBAAiB,GACrB,yUAAyU,CAAC;AAa5U,MAAM,yBAAyB,GAAG,EAAE,CAAC;AA0BrC,MAAM,UAAU,cAAc,CAAC,IAAmB;IAChD,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAE3C,IAAI,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACjC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IACpE,CAAC;IACD,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACrC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,eAAe,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IACxE,CAAC;IACD,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACnC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,aAAa,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IACtE,CAAC;IACD,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC;QACnD,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,iBAAiB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IAC1E,CAAC;IAED,IAAI,IAAI,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;QACnC,IAAI,iBAAiB,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,mBAAmB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;QAC5E,CAAC;QACD,IAAI,eAAe,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5C,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,gBAAgB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;QACzE,CAAC;QACD,IAAI,cAAc,CAAC,IAAI,CAAC,KAAK,EAAE,OAAO,CAAC,EAAE,CAAC;YACxC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,gBAAgB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;QACzE,CAAC;IACH,CAAC;IAED,IAAI,kBAAkB,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,qBAAqB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IAC9E,CAAC;IACD,IAAI,UAAU,CAAC,OAAO,CAAC,GAAG,yBAAyB,EAAE,CAAC;QACpD,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,qBAAqB,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IAC9E,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;AAChD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,OAAO,KAAK;SACT,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC;SAC9B,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;SAC7B,IAAI,CAAC,IAAI,CAAC;SACV,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,IAAI,EAAE,CAAC;AACZ,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,iBAAiB,CAAC,GAAuB,EAAE,IAAY;IAC9D,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAC9C,IAAI,GAAG,KAAK,SAAS;QAAE,OAAO,KAAK,CAAC;IACpC,MAAM,IAAI,GAAG,GAAG,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACzC,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,eAAe,CAAC,GAAW;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAChC,OAAO,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC/D,CAAC;AAED;;;;;GAKG;AACH,SAAS,cAAc,CAAC,aAAqB,EAAE,OAAe;IAC5D,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACrD,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,OAAO,UAAU,CAAC,OAAO,CAAC,GAAG,yBAAyB,CAAC;AACzD,CAAC;AAED,SAAS,kBAAkB,CAAC,OAAe;IACzC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,KAAK,GAAG,OAAO;SAClB,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC/B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACpC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,SAAS;QACxC,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,SAAS;QACxC,OAAO,IAAI,CAAC,CAAC;IACf,CAAC;IACD,OAAO,OAAO,KAAK,CAAC,CAAC;AACvB,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,mBAAmB,EAAE,GAAG,CAAC,CAAC;IACzD,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACjE,OAAO,MAAM,CAAC,MAAM,CAAC;AACvB,CAAC;AAED,oEAAoE;AACpE,sEAAsE;AACtE,6DAA6D;AAC7D,yBAAyB;AACzB,SAAS,cAAc,CAAC,IAAY,EAAE,KAAgB;IACpD,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAC5D,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC;YAAE,OAAO,IAAI,CAAC;IACxC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,WAAW,CAAC,IAAY,EAAE,IAAY;IAC7C,MAAM,EAAE,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAC7B,OAAO,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACvB,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,4DAA4D;IAC5D,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;gBACxB,GAAG,IAAI,IAAI,CAAC;gBACZ,CAAC,IAAI,CAAC,CAAC;YACT,CAAC;iBAAM,CAAC;gBACN,GAAG,IAAI,OAAO,CAAC;YACjB,CAAC;YACD,SAAS;QACX,CAAC;QACD,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,GAAG,IAAI,MAAM,CAAC;YACd,SAAS;QACX,CAAC;QACD,IAAI,EAAE,KAAK,SAAS;YAAE,SAAS;QAC/B,IAAI,eAAe,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,GAAG,IAAI,KAAK,EAAE,EAAE,CAAC;QACnB,CAAC;aAAM,CAAC;YACN,GAAG,IAAI,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,OAAO,IAAI,MAAM,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC;AAChC,CAAC"}
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Curator pipeline — strict per-entry validators.
3
+ *
4
+ * Phase 9c-emit feeds every reducer-output entry through `validateEntry`
5
+ * before writing it to `.cairn/ground/decisions/` or
6
+ * `.cairn/ground/invariants/`. Operator's auto-accept directive
7
+ * (curator plan §"Decision log" Q2) requires a high quality bar — when
8
+ * an entry fails any check it is dropped silently with a counter
9
+ * logged, never falling back to `_inbox/`.
10
+ *
11
+ * Failure modes encoded by `rejectReason`:
12
+ * - `title-length` — empty or > 80 chars
13
+ * - `title-no-cap` — does not start with an uppercase letter
14
+ * - `title-trailing-punct` — ends in `,` `:` `;`
15
+ * - `title-truncated-or-jsx` — ends in `...` or starts with `{/*` (JSX
16
+ * block-comment leakage)
17
+ * - `body-missing-<section>` — required heading missing
18
+ * - `jsdoc-tag-leak` — body contains `@domain`, `@orgScope`,
19
+ * `@softDelete`, `@see`, `@param`,
20
+ * `@returns`, `@throws` (curator pasted
21
+ * raw scaffolding)
22
+ * - `title-pasted-in-body` — body contains the title verbatim
23
+ * (indicates unsynthesized pass-through)
24
+ * - `no-scope-globs` — empty `scope_globs`
25
+ * - `no-evidence` — empty `evidence_files`
26
+ * - `evidence-missing:<path>` — cited evidence file does not exist
27
+ */
28
+ export interface FinalEntry {
29
+ kind: "DEC" | "INV";
30
+ title: string;
31
+ body: string;
32
+ scope_globs: string[];
33
+ evidence_files: string[];
34
+ topic_tags: string[];
35
+ }
36
+ export interface ValidationResult {
37
+ valid: boolean;
38
+ rejectReason?: string;
39
+ }
40
+ export declare function validateEntry(e: FinalEntry, repoRoot: string): ValidationResult;
41
+ /**
42
+ * Strip the `:42-58` or `#L42-L58` line-range suffix off an evidence
43
+ * file reference. Both forms are accepted on input; the validator
44
+ * resolves to the bare path before checking file existence.
45
+ */
46
+ export declare function stripLineRange(evidenceFile: string): string;
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Curator pipeline — strict per-entry validators.
3
+ *
4
+ * Phase 9c-emit feeds every reducer-output entry through `validateEntry`
5
+ * before writing it to `.cairn/ground/decisions/` or
6
+ * `.cairn/ground/invariants/`. Operator's auto-accept directive
7
+ * (curator plan §"Decision log" Q2) requires a high quality bar — when
8
+ * an entry fails any check it is dropped silently with a counter
9
+ * logged, never falling back to `_inbox/`.
10
+ *
11
+ * Failure modes encoded by `rejectReason`:
12
+ * - `title-length` — empty or > 80 chars
13
+ * - `title-no-cap` — does not start with an uppercase letter
14
+ * - `title-trailing-punct` — ends in `,` `:` `;`
15
+ * - `title-truncated-or-jsx` — ends in `...` or starts with `{/*` (JSX
16
+ * block-comment leakage)
17
+ * - `body-missing-<section>` — required heading missing
18
+ * - `jsdoc-tag-leak` — body contains `@domain`, `@orgScope`,
19
+ * `@softDelete`, `@see`, `@param`,
20
+ * `@returns`, `@throws` (curator pasted
21
+ * raw scaffolding)
22
+ * - `title-pasted-in-body` — body contains the title verbatim
23
+ * (indicates unsynthesized pass-through)
24
+ * - `no-scope-globs` — empty `scope_globs`
25
+ * - `no-evidence` — empty `evidence_files`
26
+ * - `evidence-missing:<path>` — cited evidence file does not exist
27
+ */
28
+ import { existsSync } from "node:fs";
29
+ import { join } from "node:path";
30
+ export function validateEntry(e, repoRoot) {
31
+ // Title — order matters: catch the most-specific failure modes
32
+ // (truncation / JSX leakage) BEFORE the generic capitalization check
33
+ // so a JSX-prefixed title doesn't get reported as merely
34
+ // `title-no-cap`.
35
+ if (e.title.length === 0 || e.title.length > 80) {
36
+ return { valid: false, rejectReason: "title-length" };
37
+ }
38
+ if (/\.\.\.$/.test(e.title) || /^\{\/\*/.test(e.title)) {
39
+ return { valid: false, rejectReason: "title-truncated-or-jsx" };
40
+ }
41
+ if (!/^[A-Z]/.test(e.title)) {
42
+ return { valid: false, rejectReason: "title-no-cap" };
43
+ }
44
+ if (/[,:;]$/.test(e.title)) {
45
+ return { valid: false, rejectReason: "title-trailing-punct" };
46
+ }
47
+ // Body sections
48
+ const requiredSections = e.kind === "INV"
49
+ ? ["## Context", "## Invariant", "## Why"]
50
+ : ["## Context", "## Decision", "## Why"];
51
+ for (const sec of requiredSections) {
52
+ if (!e.body.includes(sec)) {
53
+ return { valid: false, rejectReason: `body-missing-${sec}` };
54
+ }
55
+ }
56
+ // No JSDoc tag leak
57
+ if (/@(domain|orgScope|softDelete|see|param|returns|throws)\b/.test(e.body)) {
58
+ return { valid: false, rejectReason: "jsdoc-tag-leak" };
59
+ }
60
+ // No title-in-body paste
61
+ if (e.body.includes(e.title)) {
62
+ return { valid: false, rejectReason: "title-pasted-in-body" };
63
+ }
64
+ // scope_globs nonempty
65
+ if (e.scope_globs.length === 0) {
66
+ return { valid: false, rejectReason: "no-scope-globs" };
67
+ }
68
+ // evidence_files nonempty + exist
69
+ if (e.evidence_files.length === 0) {
70
+ return { valid: false, rejectReason: "no-evidence" };
71
+ }
72
+ for (const ev of e.evidence_files) {
73
+ const path = stripLineRange(ev);
74
+ if (!existsSync(join(repoRoot, path))) {
75
+ return { valid: false, rejectReason: `evidence-missing:${path}` };
76
+ }
77
+ }
78
+ return { valid: true };
79
+ }
80
+ /**
81
+ * Strip the `:42-58` or `#L42-L58` line-range suffix off an evidence
82
+ * file reference. Both forms are accepted on input; the validator
83
+ * resolves to the bare path before checking file existence.
84
+ */
85
+ export function stripLineRange(evidenceFile) {
86
+ // GitHub-style anchor: `path/to/file.ts#L42-L58`
87
+ const hashIdx = evidenceFile.indexOf("#");
88
+ if (hashIdx !== -1)
89
+ return evidenceFile.slice(0, hashIdx);
90
+ // Colon-style range: `path/to/file.ts:42-58`. Only strip when the
91
+ // suffix is digit-only (avoids clobbering Windows drive letters
92
+ // `C:/...` — but those are absolute and should never appear in a
93
+ // repo-relative evidence path; defensive anyway).
94
+ const colonIdx = evidenceFile.lastIndexOf(":");
95
+ if (colonIdx > 1 && /^\d+(?:-\d+)?$/.test(evidenceFile.slice(colonIdx + 1))) {
96
+ return evidenceFile.slice(0, colonIdx);
97
+ }
98
+ return evidenceFile;
99
+ }
100
+ //# sourceMappingURL=validate.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validate.js","sourceRoot":"","sources":["../../../src/init/curator/validate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAgBjC,MAAM,UAAU,aAAa,CAC3B,CAAa,EACb,QAAgB;IAEhB,+DAA+D;IAC/D,qEAAqE;IACrE,yDAAyD;IACzD,kBAAkB;IAClB,IAAI,CAAC,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QAChD,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,CAAC;IACxD,CAAC;IACD,IAAI,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;QACvD,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,wBAAwB,EAAE,CAAC;IAClE,CAAC;IACD,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,CAAC;IACxD,CAAC;IACD,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,sBAAsB,EAAE,CAAC;IAChE,CAAC;IAED,gBAAgB;IAChB,MAAM,gBAAgB,GAAG,CAAC,CAAC,IAAI,KAAK,KAAK;QACvC,CAAC,CAAC,CAAC,YAAY,EAAE,cAAc,EAAE,QAAQ,CAAC;QAC1C,CAAC,CAAC,CAAC,YAAY,EAAE,aAAa,EAAE,QAAQ,CAAC,CAAC;IAC5C,KAAK,MAAM,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACnC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,gBAAgB,GAAG,EAAE,EAAE,CAAC;QAC/D,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,IAAI,0DAA0D,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5E,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,gBAAgB,EAAE,CAAC;IAC1D,CAAC;IAED,yBAAyB;IACzB,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,sBAAsB,EAAE,CAAC;IAChE,CAAC;IAED,uBAAuB;IACvB,IAAI,CAAC,CAAC,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,gBAAgB,EAAE,CAAC;IAC1D,CAAC;IAED,kCAAkC;IAClC,IAAI,CAAC,CAAC,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAClC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,CAAC;IACvD,CAAC;IACD,KAAK,MAAM,EAAE,IAAI,CAAC,CAAC,cAAc,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,cAAc,CAAC,EAAE,CAAC,CAAC;QAChC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC;YACtC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,oBAAoB,IAAI,EAAE,EAAE,CAAC;QACpE,CAAC;IACH,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;AACzB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,YAAoB;IACjD,iDAAiD;IACjD,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC1C,IAAI,OAAO,KAAK,CAAC,CAAC;QAAE,OAAO,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IAC1D,kEAAkE;IAClE,gEAAgE;IAChE,iEAAiE;IACjE,kDAAkD;IAClD,MAAM,QAAQ,GAAG,YAAY,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC/C,IAAI,QAAQ,GAAG,CAAC,IAAI,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5E,OAAO,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC"}
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Curator pipeline — Phase 9a-walker top-level (v0.9.0).
3
+ *
4
+ * Builds the unified corpus by running three sub-walkers:
5
+ *
6
+ * - Source comments (existing `walkSourceComments`) — essay-class
7
+ * block comments per source file
8
+ * - Doc paragraphs (existing `discoverDocs` + paragraph splitter)
9
+ * — README + docs/**\/*.md paragraphs ≥80 chars
10
+ * - Rule sections (existing `discoverRuleSources` +
11
+ * `parseRuleSections`) — H2/H3 sections from CLAUDE.md /
12
+ * AGENTS.md / .claude/rules/**\/*.md
13
+ *
14
+ * Each candidate runs through the regex pre-filter (`regex-prefilter.ts`)
15
+ * which drops 60-80% of raw blocks (test files, JSX comments, license
16
+ * headers, JSDoc with only @tags, etc.). Survivors get written to
17
+ * `.cairn/init/curator/corpus.jsonl` and packed into shards capped
18
+ * at 120k input tokens (`shards.json`).
19
+ */
20
+ export interface RunCuratorWalkerArgs {
21
+ repoRoot: string;
22
+ }
23
+ export interface RunCuratorWalkerResult {
24
+ corpus_path: string;
25
+ shards_path: string;
26
+ records_total: number;
27
+ records_by_kind: {
28
+ comment: number;
29
+ doc: number;
30
+ rule: number;
31
+ };
32
+ dropped: Record<string, number>;
33
+ shards: number;
34
+ total_input_tokens_estimate: number;
35
+ }
36
+ export declare function runCuratorWalker(args: RunCuratorWalkerArgs): Promise<RunCuratorWalkerResult>;