claudeos-core 2.3.1 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,7 @@ const { detectStack } = require("./stack-detector");
16
16
  const { scanStructure } = require("./structure-scanner");
17
17
  const { splitDomainGroups, determineActiveDomains, selectTemplates } = require("./domain-grouper");
18
18
  const { generatePrompts } = require("./prompt-generator");
19
+ const { collectSourcePaths } = require("./source-paths");
19
20
 
20
21
  const ROOT = process.env.CLAUDEOS_ROOT || path.resolve(__dirname, "../..");
21
22
  const GENERATED_DIR = path.join(ROOT, "claudeos-core/generated");
@@ -57,6 +58,23 @@ async function main() {
57
58
  }
58
59
  console.log();
59
60
 
61
+ // Phase 2.5: Allowed source paths (v2.3.x+ — path-hallucination prevention)
62
+ //
63
+ // Collect the authoritative list of source files that actually exist on
64
+ // disk. Pass 3/4 prompts use this list (via pass3a-facts.md and the
65
+ // pass3-footer.md grounding rule) to refuse citations of convention-based
66
+ // fabricated paths like `src/app/providers.tsx` when the project does
67
+ // not in fact ship that file. See plan-installer/source-paths.js for
68
+ // the full rationale.
69
+ console.log(" [Phase 2.5] Collecting source-path allowlist...");
70
+ const sourcePaths = await collectSourcePaths(ROOT);
71
+ if (sourcePaths.mode === "full") {
72
+ console.log(` ${sourcePaths.totalFiles} source file(s) enumerated (full mode)`);
73
+ } else {
74
+ console.log(` ${sourcePaths.totalFiles} source files across ${sourcePaths.paths.length} dirs (rollup mode — project exceeds enumeration budget)`);
75
+ }
76
+ console.log();
77
+
60
78
  // Phase 3: Template selection
61
79
  console.log(" [Phase 3] Selecting templates...");
62
80
  const templates = selectTemplates(stack);
@@ -111,6 +129,11 @@ async function main() {
111
129
  templates, isMultiStack, rootPackage,
112
130
  domains, backendDomains, frontendDomains, frontend,
113
131
  activeDomains: active,
132
+ // v2.3.x+: authoritative on-disk source-file list. Consumed by
133
+ // pass3-context-builder → pass3a-facts.md → Pass 3/4 prompts to
134
+ // prevent convention-based path hallucination (e.g. Next.js
135
+ // `src/app/providers.tsx` when the project does not ship it).
136
+ allowedSourcePaths: sourcePaths,
114
137
  summary: {
115
138
  totalDomains: domains.length, backendDomains: backendDomains.length,
116
139
  frontendDomains: frontendDomains.length,
@@ -192,6 +192,20 @@ function buildPass3Context(generatedDir) {
192
192
  // ─── Frontend stats (if scan-frontend ran) ──────────────────────────
193
193
  frontend: analysis.frontend || { exists: false },
194
194
 
195
+ // ─── Source path allowlist (v2.3.x+ — path-hallucination prevention) ─
196
+ // Authoritative on-disk source-file list, copied from project-analysis.
197
+ // Pass 3a includes a rendered form of this in pass3a-facts.md as
198
+ // "## Allowed Source Paths". Pass 3b/3c/3d then cite ONLY from this
199
+ // list when writing `src/...` / `packages/...` / language-specific
200
+ // paths in rule/standard files.
201
+ //
202
+ // Shape: { mode: "full" | "rollup", paths: string[], totalFiles: number,
203
+ // excludedDirs: string[] }
204
+ // See plan-installer/source-paths.js for the full contract.
205
+ allowedSourcePaths: (analysis.allowedSourcePaths && typeof analysis.allowedSourcePaths === "object")
206
+ ? analysis.allowedSourcePaths
207
+ : { mode: "full", paths: [], totalFiles: 0, excludedDirs: [] },
208
+
195
209
  // ─── pass2-merged.json descriptor (signals, not contents) ───────────
196
210
  // Pass 3 reads this to decide whether it's safe to open the full file
197
211
  // for a specific missing detail, and how aggressively to summarize first.
@@ -383,7 +383,8 @@ async function scanFrontendDomains(stack, ROOT) {
383
383
  // Emitting those as domains fragments one logical app into 5+
384
384
  // pseudo-domains, which in turn primes Pass 3 to fabricate
385
385
  // prefixed filenames (featureRoutePath.ts, admin-api.service.ts) —
386
- // the hallucination class observed in frontend-react-B dogfooding.
386
+ // a canonical hallucination class the single-SPA rule is designed
387
+ // to prevent.
387
388
  //
388
389
  // Detection of "single-SPA" mode: inspect the top-level platform
389
390
  // segment for every glob match and count distinct values. If only
@@ -0,0 +1,242 @@
1
+ /**
2
+ * ClaudeOS-Core — Source Path Collector
3
+ *
4
+ * Collects the authoritative list of source file paths that actually exist
5
+ * in the project. Used by pass3-context-builder to inject an "allowlist"
6
+ * into pass3a-facts.md, which Pass 3/4 prompts then reference as the ONLY
7
+ * set of paths they may cite in generated rule/standard files.
8
+ *
9
+ * ─── Why this exists ──────────────────────────────────────────────────
10
+ *
11
+ * Pass 3 hallucination failures in the v2.3.x series have almost always
12
+ * been "convention-based path fabrication": the LLM, recognizing a
13
+ * Next.js / Vite / Spring / Django project, would cite the framework's
14
+ * canonical paths (`src/app/providers.tsx`, `src/middleware.ts`,
15
+ * `src/__mocks__/handlers.ts`, etc.) from its training data rather than
16
+ * from `pass2-merged.json`. The strongly-worded prompt warnings in
17
+ * pass3-footer.md and pass4.md reduced but did not eliminate this class
18
+ * of failure.
19
+ *
20
+ * The root cause is asymmetry of evidence: the prompt tells the LLM what
21
+ * NOT to do in general terms ("do not invent paths"), but never tells it
22
+ * WHAT IS ALLOWED specifically. This module closes that asymmetry by
23
+ * producing a concrete list — "here are the N paths that exist; cite
24
+ * none other" — which is far easier for an LLM to comply with than a
25
+ * negative abstract constraint.
26
+ *
27
+ * ─── Design constraints ──────────────────────────────────────────────
28
+ *
29
+ * 1. Language-agnostic: must work for Java, Kotlin, TypeScript, JavaScript,
30
+ * Vue, and Python projects. Uses a fixed extension list rather than
31
+ * querying the per-language scanners (which each have their own
32
+ * incompatible output shapes).
33
+ *
34
+ * 2. Budget-bounded: even a moderately large project can have 3000+ source
35
+ * files, which would balloon pass3a-facts.md past its 10 KB target and
36
+ * re-introduce the context-overflow failure mode. We cap at MAX_PATHS
37
+ * (500) and use a directory-rollup strategy for projects above that
38
+ * threshold — preserving the "unique top-level paths" the LLM actually
39
+ * needs for reference without enumerating every leaf file.
40
+ *
41
+ * 3. Read-only: does not touch the per-language scanners (scan-java.js,
42
+ * scan-kotlin.js, scan-node.js, scan-python.js, scan-frontend.js).
43
+ * Those scanners are covered by 16 test files with tight expectations
44
+ * on their return shapes; a separate independent glob here avoids
45
+ * destabilizing them.
46
+ */
47
+
48
+ "use strict";
49
+
50
+ const path = require("path");
51
+ const { glob } = require("glob");
52
+
53
+ // File extensions we consider "source". Intentionally narrower than
54
+ // everything — we want the paths that are likely to be cited in rule
55
+ // files, not every config/asset/lock file.
56
+ const SOURCE_EXTENSIONS = [
57
+ // Backend: JVM
58
+ "java", "kt", "kts",
59
+ // Backend/Frontend: JS/TS
60
+ "js", "jsx", "ts", "tsx", "mjs", "cjs",
61
+ // Frontend: SFC
62
+ "vue", "svelte",
63
+ // Backend: Python
64
+ "py",
65
+ ];
66
+
67
+ // Directories to exclude from the scan. Each entry is matched as a path
68
+ // segment (so "node_modules" excludes `a/node_modules/b` too). Test/mock
69
+ // directories are INCLUDED — they are legitimate citation targets for
70
+ // rules about testing conventions.
71
+ const EXCLUDED_DIRS = [
72
+ "node_modules",
73
+ "dist",
74
+ "build",
75
+ "out",
76
+ ".next",
77
+ ".nuxt",
78
+ ".output",
79
+ ".svelte-kit",
80
+ "target", // Maven/Gradle build output
81
+ ".gradle",
82
+ ".idea",
83
+ ".vscode",
84
+ ".git",
85
+ "coverage",
86
+ ".turbo",
87
+ ".cache",
88
+ ".claude", // our own generated dir
89
+ "claudeos-core", // our own generated dir
90
+ "__pycache__",
91
+ ".venv",
92
+ "venv",
93
+ ".pytest_cache",
94
+ ];
95
+
96
+ // Hard cap on enumerated paths. Chosen to keep the injected section of
97
+ // pass3a-facts.md under ~10 KB (500 paths × ~40 chars avg ≈ 20 KB of raw
98
+ // text, but fits within the markdown list format budget after header +
99
+ // directory rollup).
100
+ const MAX_PATHS = 500;
101
+
102
+ // When the project has more paths than MAX_PATHS, we fall back to a
103
+ // directory-rollup strategy. We list every directory that contains at
104
+ // least MIN_FILES_PER_DIR source files, which lets the LLM know "this
105
+ // directory exists and has files in it" without enumerating each leaf.
106
+ const MIN_FILES_PER_DIR = 1;
107
+ const MAX_DIRS = 300;
108
+
109
+ /**
110
+ * Scan the project for source file paths.
111
+ *
112
+ * @param {string} projectRoot absolute path to project root
113
+ * @returns {Promise<{
114
+ * mode: "full" | "rollup",
115
+ * paths: string[],
116
+ * totalFiles: number,
117
+ * excludedDirs: string[],
118
+ * }>}
119
+ *
120
+ * mode === "full": `paths` is the complete enumeration of source files
121
+ * (relative to projectRoot, forward slashes)
122
+ * mode === "rollup": `paths` is a list of directory paths (each ending
123
+ * with "/") that contain source files. Used when the
124
+ * project exceeds MAX_PATHS; the Pass 3 prompt tells
125
+ * the LLM to treat each directory as a scope it may
126
+ * cite, but to verify specific filenames against
127
+ * pass2-merged.json before writing them.
128
+ */
129
+ async function collectSourcePaths(projectRoot) {
130
+ const pattern = `**/*.{${SOURCE_EXTENSIONS.join(",")}}`;
131
+ // Build ignore patterns. glob's `ignore` option accepts micromatch
132
+ // patterns; we wrap each excluded dir as `**/NAME/**` to match at any
133
+ // depth.
134
+ const ignore = EXCLUDED_DIRS.map((d) => `**/${d}/**`);
135
+
136
+ let files;
137
+ try {
138
+ files = await glob(pattern, {
139
+ cwd: projectRoot,
140
+ ignore,
141
+ nodir: true,
142
+ dot: false,
143
+ });
144
+ } catch (_e) {
145
+ // glob failure is not fatal — we just return an empty allowlist and
146
+ // Pass 3 falls back to the pre-v2.3.4 behavior of relying on the
147
+ // prompt warning alone.
148
+ return { mode: "full", paths: [], totalFiles: 0, excludedDirs: EXCLUDED_DIRS.slice() };
149
+ }
150
+
151
+ // Normalize separators. glob returns forward slashes on POSIX but may
152
+ // return backslashes on Windows depending on the shell.
153
+ const normalized = files.map((f) => f.split(path.sep).join("/")).sort();
154
+
155
+ if (normalized.length <= MAX_PATHS) {
156
+ return {
157
+ mode: "full",
158
+ paths: normalized,
159
+ totalFiles: normalized.length,
160
+ excludedDirs: EXCLUDED_DIRS.slice(),
161
+ };
162
+ }
163
+
164
+ // Rollup mode: group by parent directory.
165
+ const dirCounts = new Map();
166
+ for (const f of normalized) {
167
+ const dir = path.posix.dirname(f);
168
+ dirCounts.set(dir, (dirCounts.get(dir) || 0) + 1);
169
+ }
170
+ const dirs = [...dirCounts.entries()]
171
+ .filter(([, count]) => count >= MIN_FILES_PER_DIR)
172
+ .sort((a, b) => {
173
+ // Sort by file-count desc (most populated dirs first), then alpha.
174
+ if (b[1] !== a[1]) return b[1] - a[1];
175
+ return a[0].localeCompare(b[0]);
176
+ })
177
+ .slice(0, MAX_DIRS)
178
+ .map(([dir]) => dir + "/")
179
+ .sort();
180
+
181
+ return {
182
+ mode: "rollup",
183
+ paths: dirs,
184
+ totalFiles: normalized.length,
185
+ excludedDirs: EXCLUDED_DIRS.slice(),
186
+ };
187
+ }
188
+
189
+ /**
190
+ * Render a collected source-path list as a Markdown section body, ready
191
+ * to be embedded into pass3-context.json (which is then cited by Pass 3a
192
+ * when writing pass3a-facts.md).
193
+ *
194
+ * The render format is deliberately compact — bulleted list with path in
195
+ * backticks — because this text will be read once by the LLM for every
196
+ * Pass 3 sub-stage and we want to minimize token cost.
197
+ */
198
+ function renderAllowedPathsSection(collected) {
199
+ const { mode, paths, totalFiles } = collected;
200
+ const lines = [];
201
+
202
+ if (mode === "full") {
203
+ lines.push(
204
+ `Source files on disk (total: ${totalFiles}). ` +
205
+ `When writing a \`src/...\` / \`packages/...\` / \`apps/...\` / ` +
206
+ `language-specific path in any rule or standard file, cite ONLY ` +
207
+ `paths that appear in this list. Do not invent filenames based on ` +
208
+ `framework convention (Next.js, Vite, Spring, Django, etc.) — if ` +
209
+ `a convention-standard path is not listed below, the project does ` +
210
+ `NOT use that convention.`
211
+ );
212
+ lines.push("");
213
+ for (const p of paths) {
214
+ lines.push(`- \`${p}\``);
215
+ }
216
+ } else {
217
+ lines.push(
218
+ `Source directories on disk (total: ${totalFiles} files across ` +
219
+ `${paths.length} listed directories — individual file enumeration ` +
220
+ `was skipped because the project exceeds the ${MAX_PATHS}-file ` +
221
+ `budget for this section). When citing a specific file path in a ` +
222
+ `rule or standard file, the file's PARENT DIRECTORY must match ` +
223
+ `one of the entries below. For the exact filename, consult ` +
224
+ `\`pass2-merged.json\` ONCE and record the result in your ` +
225
+ `in-context fact table. Never infer a filename from framework ` +
226
+ `convention.`
227
+ );
228
+ lines.push("");
229
+ for (const d of paths) {
230
+ lines.push(`- \`${d}\``);
231
+ }
232
+ }
233
+
234
+ return lines.join("\n");
235
+ }
236
+
237
+ module.exports = {
238
+ collectSourcePaths,
239
+ renderAllowedPathsSection,
240
+ // Exported for test visibility.
241
+ _constants: { SOURCE_EXTENSIONS, EXCLUDED_DIRS, MAX_PATHS, MIN_FILES_PER_DIR, MAX_DIRS },
242
+ };