@bastani/atomic 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.agents/skills/ast-grep/SKILL.md +323 -0
  2. package/.agents/skills/ast-grep/references/rule_reference.md +297 -0
  3. package/.agents/skills/ripgrep/SKILL.md +382 -0
  4. package/.mcp.json +5 -6
  5. package/dist/commands/cli/claude-inflight-hook.d.ts +100 -0
  6. package/dist/commands/cli/claude-inflight-hook.d.ts.map +1 -0
  7. package/dist/commands/cli/claude-stop-hook.d.ts +2 -0
  8. package/dist/commands/cli/claude-stop-hook.d.ts.map +1 -1
  9. package/dist/lib/spawn.d.ts +1 -1
  10. package/dist/lib/spawn.d.ts.map +1 -1
  11. package/dist/sdk/providers/claude.d.ts +36 -0
  12. package/dist/sdk/providers/claude.d.ts.map +1 -1
  13. package/dist/sdk/providers/copilot.d.ts +17 -1
  14. package/dist/sdk/providers/copilot.d.ts.map +1 -1
  15. package/dist/sdk/runtime/executor.d.ts.map +1 -1
  16. package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts +49 -34
  17. package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts.map +1 -1
  18. package/dist/sdk/workflows/builtin/deep-research-codebase/copilot/index.d.ts +18 -16
  19. package/dist/sdk/workflows/builtin/deep-research-codebase/copilot/index.d.ts.map +1 -1
  20. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/batching.d.ts +43 -0
  21. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/batching.d.ts.map +1 -0
  22. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.d.ts +30 -0
  23. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.d.ts.map +1 -1
  24. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scout.d.ts +2 -1
  25. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scout.d.ts.map +1 -1
  26. package/dist/sdk/workflows/builtin/deep-research-codebase/opencode/index.d.ts +18 -16
  27. package/dist/sdk/workflows/builtin/deep-research-codebase/opencode/index.d.ts.map +1 -1
  28. package/dist/services/config/additional-instructions.d.ts +67 -0
  29. package/dist/services/config/additional-instructions.d.ts.map +1 -0
  30. package/package.json +3 -1
  31. package/src/cli.ts +18 -1
  32. package/src/commands/cli/chat/index.ts +52 -2
  33. package/src/commands/cli/claude-inflight-hook.test.ts +598 -0
  34. package/src/commands/cli/claude-inflight-hook.ts +359 -0
  35. package/src/commands/cli/claude-stop-hook.ts +40 -4
  36. package/src/commands/cli/init/index.ts +9 -0
  37. package/src/lib/spawn.ts +6 -2
  38. package/src/sdk/providers/claude.ts +131 -0
  39. package/src/sdk/providers/copilot.ts +30 -1
  40. package/src/sdk/runtime/executor.ts +43 -2
  41. package/src/sdk/workflows/builtin/deep-research-codebase/claude/index.ts +318 -158
  42. package/src/sdk/workflows/builtin/deep-research-codebase/copilot/index.ts +253 -129
  43. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/batching.ts +65 -0
  44. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/ignore-by-default.d.ts +8 -0
  45. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.ts +203 -12
  46. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/scout.ts +248 -78
  47. package/src/sdk/workflows/builtin/deep-research-codebase/opencode/index.ts +258 -146
  48. package/src/services/config/additional-instructions.ts +273 -0
  49. package/src/services/system/auto-sync.ts +10 -1
@@ -49,6 +49,14 @@ const TRAILING_PROSE_REMINDER =
49
49
  "Do NOT end the turn on a tool call — downstream stages read your assistant " +
50
50
  "transcript and will see nothing if the final message is a tool invocation.";
51
51
 
52
+ const AST_GREP_ENV_NOTICE =
53
+ "You are operating in an environment where ast-grep is installed. For any " +
54
+ "code search that requires understanding of syntax or code structure, you " +
55
+ "should default to using `ast-grep --lang [language] -p '<pattern>'`. Rely " +
56
+ "on your ast-grep skill for best practices. Adjust the --lang flag as " +
57
+ "needed for the specific programming language. Avoid using text-only " +
58
+ "search tools unless a plain-text search is explicitly requested.";
59
+
52
60
  /** Slugify the user's prompt for use in the final research filename. */
53
61
  export function slugifyPrompt(prompt: string): string {
54
62
  const slug = prompt
@@ -78,8 +86,15 @@ function renderPartitionDirs(partition: PartitionUnit[]): string {
78
86
  }
79
87
 
80
88
  // ─────────────────────────────────────────────────────────────────────────────
81
- // Stage 1a — codebase-scout (single LLM orientation call)
89
+ // Stage 1a — codebase-scout + query planner (single LLM call)
82
90
  // ─────────────────────────────────────────────────────────────────────────────
91
+ //
92
+ // The scout produces both a ≤300-word architectural orientation AND a list of
93
+ // per-partition ast-grep query seeds. The combined output is embedded verbatim
94
+ // into each specialist sub-agent's prompt as a single <ARCHITECTURAL_ORIENTATION>
95
+ // block — no JSON envelope, no deterministic parser. Specialists locate "their"
96
+ // partition's seeds by searching for the matching section header inside the
97
+ // block and treat them as starting points, not commands.
83
98
 
84
99
  export function buildScoutPrompt(opts: {
85
100
  question: string;
@@ -103,8 +118,9 @@ export function buildScoutPrompt(opts: {
103
118
  `</RESEARCH_QUESTION>`,
104
119
  ``,
105
120
  `<CONTEXT>`,
106
- `You are the codebase scout for the deep-research-codebase workflow. The`,
107
- `workflow has already computed the codebase layout deterministically:`,
121
+ `You are the codebase scout AND query planner for the deep-research-codebase`,
122
+ `workflow. The workflow has already computed the codebase layout`,
123
+ `deterministically:`,
108
124
  ``,
109
125
  `- Total source files: ${opts.totalFiles.toLocaleString()}`,
110
126
  `- Total LOC: ${opts.totalLoc.toLocaleString()}`,
@@ -119,25 +135,49 @@ export function buildScoutPrompt(opts: {
119
135
  "```",
120
136
  `</CONTEXT>`,
121
137
  ``,
138
+ `<TOOLING>`,
139
+ AST_GREP_ENV_NOTICE,
140
+ `Consult https://ast-grep.github.io/reference/languages.html for the`,
141
+ `canonical language list, and https://ast-grep.github.io/llms-full.txt for`,
142
+ `the full rule reference, when you need them.`,
143
+ `</TOOLING>`,
144
+ ``,
122
145
  `<TASK>`,
123
- `Read the tree above and produce a brief architectural orientation that`,
124
- `the downstream specialist sub-agents will use to anchor their searches.`,
146
+ `Produce TWO sections both will be embedded verbatim into the specialist`,
147
+ `sub-agents' prompts. Use the markdown headers shown so specialists can`,
148
+ `find their partition's seeds.`,
125
149
  ``,
126
- `Cover, in ≤300 words:`,
150
+ `## Orientation`,
151
+ `In ≤300 words, cover:`,
127
152
  ` 1. The repo's overall shape (monorepo vs single package, polyglot or not)`,
128
153
  ` 2. The 3-5 most important top-level directories and what each contains`,
129
- ` 3. Architectural boundaries / layering you can see from the tree`,
154
+ ` 3. Architectural boundaries / layering visible from the tree`,
130
155
  ` 4. Where entry points or main modules likely live`,
131
156
  ``,
132
- `Do NOT attempt to answer the research question yet — your job is`,
133
- `orientation for downstream specialists, not investigation. You may use`,
134
- `Read/Glob/Grep sparingly to verify guesses about a few key files,`,
135
- `but keep the output short.`,
157
+ `## Query Seeds`,
158
+ `For each of the ${opts.explorerCount} partitions, suggest 2-4 ast-grep`,
159
+ `query seeds the specialists could start from. Format each seed as:`,
160
+ ``,
161
+ `### Partition <n>`,
162
+ `- Query: \`ast-grep --lang <language> -p '<pattern>'\``,
163
+ ` Why: <one sentence>`,
164
+ ``,
165
+ `For structural rules (kind + has/inside), use a fenced YAML block instead`,
166
+ `of the \`-p\` form, with the same Why line.`,
167
+ ``,
168
+ `Seeds are starting points, not commands — specialists adapt as they find`,
169
+ `things. If a partition is clearly irrelevant to the question, write a`,
170
+ `single-line note explaining why and skip its seeds.`,
136
171
  `</TASK>`,
137
172
  ``,
138
173
  `<CONSTRAINTS>`,
139
174
  DOCUMENTARIAN_DISCLAIMER,
140
- `Stay under 300 words. No bullet lists longer than 5 items.`,
175
+ `Do NOT investigate the codebase to answer the question yourself — your`,
176
+ `job is orientation + seeding, not investigation. You may use Read/Glob/`,
177
+ `Grep/ast-grep sparingly to verify guesses about a few key files or to`,
178
+ `confirm a pattern parses, but keep output focused.`,
179
+ `Stay under 300 words for the Orientation section. Plain markdown only —`,
180
+ `no JSON envelope, no structured output.`,
141
181
  TRAILING_PROSE_REMINDER,
142
182
  `</CONSTRAINTS>`,
143
183
  ``,
@@ -186,7 +226,16 @@ export function buildLocatorPrompt(opts: {
186
226
  `relates to the research question, and return a categorized index.`,
187
227
  `</MISSION>`,
188
228
  ``,
229
+ `<TOOLING>`,
230
+ AST_GREP_ENV_NOTICE,
231
+ `</TOOLING>`,
232
+ ``,
189
233
  `<ARCHITECTURAL_ORIENTATION>`,
234
+ `The briefing below contains both a high-level orientation AND per-partition`,
235
+ `ast-grep query seeds. Find the **Partition ${opts.index}** section for the`,
236
+ `seeds scoped to your investigation — treat them as starting points, not`,
237
+ `commands. Adapt or skip seeds that don't fit what you actually find.`,
238
+ ``,
190
239
  orientation,
191
240
  `</ARCHITECTURAL_ORIENTATION>`,
192
241
  ``,
@@ -267,7 +316,16 @@ export function buildPatternFinderPrompt(opts: {
267
316
  `Return runnable-looking snippets, not abstract descriptions.`,
268
317
  `</MISSION>`,
269
318
  ``,
319
+ `<TOOLING>`,
320
+ AST_GREP_ENV_NOTICE,
321
+ `</TOOLING>`,
322
+ ``,
270
323
  `<ARCHITECTURAL_ORIENTATION>`,
324
+ `The briefing below contains both a high-level orientation AND per-partition`,
325
+ `ast-grep query seeds. Find the **Partition ${opts.index}** section for the`,
326
+ `seeds scoped to your investigation — treat them as starting points, not`,
327
+ `commands. Adapt or skip seeds that don't fit what you actually find.`,
328
+ ``,
271
329
  orientation,
272
330
  `</ARCHITECTURAL_ORIENTATION>`,
273
331
  ``,
@@ -337,7 +395,16 @@ export function buildAnalyzerPrompt(opts: {
337
395
  `precise \`file.ts:line\` references throughout.`,
338
396
  `</MISSION>`,
339
397
  ``,
398
+ `<TOOLING>`,
399
+ AST_GREP_ENV_NOTICE,
400
+ `</TOOLING>`,
401
+ ``,
340
402
  `<ARCHITECTURAL_ORIENTATION>`,
403
+ `The briefing below contains both a high-level orientation AND per-partition`,
404
+ `ast-grep query seeds. Find the **Partition ${opts.index}** section for the`,
405
+ `seeds scoped to your investigation — treat them as starting points, not`,
406
+ `commands. Adapt or skip seeds that don't fit what you actually find.`,
407
+ ``,
341
408
  orientation,
342
409
  `</ARCHITECTURAL_ORIENTATION>`,
343
410
  ``,
@@ -765,3 +832,127 @@ export function buildAggregatorPrompt(opts: {
765
832
  `</RESEARCH_QUESTION_REMINDER>`,
766
833
  ].join("\n");
767
834
  }
835
+
836
+ // ─────────────────────────────────────────────────────────────────────────────
837
+ // Stage 2 — batched specialist dispatch (Task-tool fan-out)
838
+ // ─────────────────────────────────────────────────────────────────────────────
839
+ //
840
+ // To cap parallel SDK subprocesses, specialist invocations are grouped into
841
+ // "batch sessions" (see helpers/batching.ts). Each batch session is a single
842
+ // Claude Agent SDK call whose main thread dispatches up to N sub-agents via
843
+ // the Task tool. The sub-agents write their verbatim findings to per-task
844
+ // scratch files and reply with a single confirmation token, so the
845
+ // orchestrator's context grows by N short lines rather than N transcripts
846
+ // (filesystem-context skill).
847
+
848
+ /**
849
+ * Wrap a specialist prompt with the "write to file, reply with token only"
850
+ * envelope. The envelope is what the orchestrator hands to the Task tool's
851
+ * `prompt` parameter — the inner specialist prompt is built by the existing
852
+ * `buildLocatorPrompt` / `buildPatternFinderPrompt` / etc. and embedded
853
+ * verbatim so prompt semantics stay identical to the unbatched workflow.
854
+ */
855
+ export function wrapPromptForTaskDispatch(opts: {
856
+ specialistPrompt: string;
857
+ outputPath: string;
858
+ agentLabel: string;
859
+ }): string {
860
+ return [
861
+ `<TASK_OUTPUT_CONTRACT>`,
862
+ `Write your COMPLETE response — the verbatim markdown findings exactly as`,
863
+ `the prompt below specifies — to this absolute path using the Write tool:`,
864
+ ``,
865
+ ` ${opts.outputPath}`,
866
+ ``,
867
+ `Then reply with exactly the single token "DONE" and nothing else. Your`,
868
+ `parent only needs confirmation; the file is the real output. Do NOT`,
869
+ `inline your findings into your reply, do NOT add commentary, do NOT`,
870
+ `summarise — just write the file, then reply "DONE".`,
871
+ ``,
872
+ `If you cannot produce useful findings (e.g. the partition has nothing`,
873
+ `relevant to the question), write a one-line sentinel to the file`,
874
+ `explaining why, then still reply "DONE". Reply with`,
875
+ `"FAILED: <one-line reason>" only if you could not even write the file.`,
876
+ `</TASK_OUTPUT_CONTRACT>`,
877
+ ``,
878
+ `<${opts.agentLabel}_TASK>`,
879
+ opts.specialistPrompt,
880
+ `</${opts.agentLabel}_TASK>`,
881
+ ].join("\n");
882
+ }
883
+
884
+ /**
885
+ * Build the orchestrator prompt for a batch session. The orchestrator's job
886
+ * is purely deterministic dispatch — fire one Task tool call per task in
887
+ * **a single assistant message** so they execute in parallel, then report a
888
+ * one-line tally. It must NOT inline sub-agent findings, paraphrase the
889
+ * embedded prompts, or retry failures — siblings still run and synthesis
890
+ * tolerates missing files.
891
+ */
892
+ export function buildBatchOrchestratorPrompt(opts: {
893
+ wave: 1 | 2;
894
+ batchIndex: number;
895
+ totalBatches: number;
896
+ tasks: Array<{
897
+ subagentType: string;
898
+ prompt: string;
899
+ outputPath: string;
900
+ }>;
901
+ }): string {
902
+ const taskBlocks = opts.tasks
903
+ .map((t, i) =>
904
+ [
905
+ `### Task ${i + 1} of ${opts.tasks.length} — \`${t.subagentType}\``,
906
+ `Output path the sub-agent will write to: \`${t.outputPath}\``,
907
+ ``,
908
+ `Verbatim prompt to pass as the Task tool's \`prompt\` parameter:`,
909
+ ``,
910
+ "````",
911
+ t.prompt,
912
+ "````",
913
+ ``,
914
+ ].join("\n"),
915
+ )
916
+ .join("\n");
917
+
918
+ return [
919
+ `<BATCH_DISPATCH_MISSION>`,
920
+ `You are the deterministic dispatcher for batch ${opts.batchIndex} of`,
921
+ `${opts.totalBatches} in wave ${opts.wave} of the deep-research-codebase`,
922
+ `workflow. Your sole job is to spawn the ${opts.tasks.length} sub-agent`,
923
+ `task${opts.tasks.length === 1 ? "" : "s"} listed below using the Task tool.`,
924
+ `</BATCH_DISPATCH_MISSION>`,
925
+ ``,
926
+ `<DISPATCH_RULES>`,
927
+ `1. Issue ALL ${opts.tasks.length} Task tool calls in a SINGLE assistant`,
928
+ ` message (parallel tool use), not sequentially across multiple turns.`,
929
+ ` Parallel dispatch is the only reason this batch exists — sequential`,
930
+ ` calls defeat its purpose.`,
931
+ `2. For each task: set \`subagent_type\` to the value shown, set \`prompt\``,
932
+ ` to the verbatim text inside the fenced block (no paraphrasing,`,
933
+ ` truncating, or added framing), and set \`description\` to a short`,
934
+ ` 3–5 word label.`,
935
+ `3. Dispatch every task even if some look similar to others. Tasks here`,
936
+ ` cover DIFFERENT codebase partitions or DIFFERENT specialist roles —`,
937
+ ` apparent overlap is not real overlap. Do NOT merge, skip, or combine.`,
938
+ `4. Do NOT inline any sub-agent's findings into your reply. The sub-agents`,
939
+ ` write their output to disk; downstream stages read those files.`,
940
+ `5. Do NOT retry failed sub-agents. Siblings still run and the synthesis`,
941
+ ` step tolerates missing files.`,
942
+ `</DISPATCH_RULES>`,
943
+ ``,
944
+ `<FINAL_REPLY_FORMAT>`,
945
+ `After all sub-agents complete, your final assistant message must be`,
946
+ `exactly one line of the form:`,
947
+ ``,
948
+ ` BATCH ${opts.batchIndex} COMPLETE: <ok>/${opts.tasks.length} ok, <failed> failed`,
949
+ ``,
950
+ `where <ok> is the count that replied "DONE" and <failed> is the count`,
951
+ `that replied "FAILED" or otherwise did not produce a file.`,
952
+ `</FINAL_REPLY_FORMAT>`,
953
+ ``,
954
+ `---`,
955
+ ``,
956
+ taskBlocks,
957
+ ].join("\n");
958
+ }
@@ -3,7 +3,8 @@
3
3
  *
4
4
  * Responsibilities:
5
5
  * 1. Discover the codebase root (git toplevel, falling back to cwd).
6
- * 2. List all source files, respecting .gitignore when in a git repo.
6
+ * 2. List all source files, honoring `.gitignore` via git ls-files in repos
7
+ * and via `rg --files` in non-repo directories that still have one.
7
8
  * 3. Count lines of code per file using batched `wc -l`.
8
9
  * 4. Render a compact directory tree (depth-bounded) for prompt context.
9
10
  * 5. Build "partition units" by aggregating LOC at depth-1, then drilling
@@ -15,32 +16,157 @@
15
16
 
16
17
  // Use Bun.spawnSync instead of node:child_process for consistency with the rest of the codebase.
17
18
 
18
- /** Source-file extensions we treat as "code" for LOC accounting. */
19
- const CODE_EXTENSIONS = new Set<string>([
20
- // Web / TS / JS
21
- "ts", "tsx", "js", "jsx", "mjs", "cjs",
22
- "vue", "svelte", "astro",
23
- // Systems
24
- "c", "cc", "cpp", "cxx", "h", "hpp", "rs", "go", "zig",
25
- // JVM / .NET
26
- "java", "kt", "kts", "scala", "groovy", "cs", "fs",
27
- // Scripting
28
- "py", "rb", "php", "pl", "lua", "sh", "bash", "zsh", "fish",
29
- // Mobile
30
- "swift", "m", "mm",
31
- // Functional / niche
32
- "ex", "exs", "erl", "elm", "hs", "ml", "clj", "cljs", "edn",
33
- "r", "jl", "dart", "nim",
34
- // Schemas / DSLs that materially shape behavior
35
- "sql", "graphql", "proto",
19
+ import * as linguistLanguages from "linguist-languages";
20
+ import type { Language } from "linguist-languages";
21
+ import ignore, { type Ignore } from "ignore";
22
+ import ignoreByDefault from "ignore-by-default";
23
+ import { readdirSync, readFileSync } from "node:fs";
24
+ import { join, posix as posixPath, relative, sep } from "node:path";
25
+
26
+ /**
27
+ * Source-file extensions we treat as "code" for LOC accounting.
28
+ *
29
+ * Derived from GitHub Linguist (`linguist-languages`), filtered to
30
+ * `type === "programming"`. Linguist tracks 500+ programming languages and
31
+ * keeps the canonical extension list per language up to date — using it
32
+ * removes a maintenance burden and picks up obscure-but-legitimate
33
+ * languages we'd never enumerate by hand.
34
+ *
35
+ * Three modifications layered on top of the raw linguist data:
36
+ *
37
+ * 1. **Multi-segment extensions are skipped.** Linguist lists entries like
38
+ * `.coffee.md` (Literate CoffeeScript) and `.gradle.kts` (Gradle Kotlin
39
+ * DSL). Our `isCodeFile()` only sees the tail after the final dot, so
40
+ * collapsing `.coffee.md` to `md` would mis-classify Markdown as code.
41
+ * Skipping them is safe because the base languages they extend always
42
+ * list a single-segment extension as well (`.coffee`, `.kts`).
43
+ * 2. **EXCLUDE_EXTENSIONS denylist.** A handful of single-segment
44
+ * extensions that programming-typed languages claim but which in
45
+ * practice almost always mean a non-code file (`.md` is claimed by
46
+ * GCC Machine Description but means Markdown 99.9% of the time).
47
+ * 3. **SCHEMA_EXTENSIONS allowlist.** Schemas/DSLs that linguist
48
+ * categorises as `type: "data"` but which materially shape codebase
49
+ * behaviour and belong in research scope.
50
+ */
51
+ const SCHEMA_EXTENSIONS = ["sql", "graphql", "proto"] as const;
52
+
53
+ /**
54
+ * Single-segment extensions that linguist's `programming`-typed languages
55
+ * claim but which in real-world codebases almost always mean a non-code
56
+ * file. Each entry needs a one-line justification.
57
+ */
58
+ const EXCLUDE_EXTENSIONS = new Set<string>([
59
+ "md", // claimed by "GCC Machine Description"; almost always Markdown.
36
60
  ]);
37
61
 
38
- /** Directories we always exclude even when not using git ls-files. */
39
- const FIND_IGNORE_PATTERNS = [
40
- "node_modules", ".git", "dist", "build", "out",
41
- ".next", ".nuxt", ".turbo", ".vercel", ".cache",
42
- "target", "vendor", "__pycache__", ".venv", "venv", "coverage",
43
- ];
62
+ const CODE_EXTENSIONS: Set<string> = (() => {
63
+ const out = new Set<string>();
64
+ // Each named export of `linguist-languages` is a `Language`; the namespace
65
+ // import has no other shape, so casting `Object.values(...)` to `Language[]`
66
+ // is sound and removes the need for an `unknown` intermediary.
67
+ for (const lang of Object.values(linguistLanguages) as Language[]) {
68
+ if (lang.type !== "programming") continue;
69
+ for (const ext of lang.extensions ?? []) {
70
+ const cleaned = ext.replace(/^\./, "").toLowerCase();
71
+ // Skip multi-segment extensions — see file-level comment.
72
+ if (cleaned.includes(".")) continue;
73
+ if (EXCLUDE_EXTENSIONS.has(cleaned)) continue;
74
+ out.add(cleaned);
75
+ }
76
+ }
77
+ for (const ext of SCHEMA_EXTENSIONS) out.add(ext);
78
+ return out;
79
+ })();
80
+
81
+ /**
82
+ * Recursively walk a directory tree, honoring nested `.gitignore` files at
83
+ * every level and seeding with `ignore-by-default`'s minimal universal set
84
+ * (`node_modules`, `.git`, `coverage`, etc.). Returns repo-relative paths.
85
+ *
86
+ * Used as the last-resort discovery fallback when neither `git ls-files` nor
87
+ * `rg --files` is available. The walker matches `.gitignore` semantics:
88
+ * • Patterns from a `.gitignore` only apply to files at or below the
89
+ * `.gitignore`'s directory.
90
+ * • Inherited rules from ancestor directories continue to apply.
91
+ * • Negations and the rest of gitignore syntax come from the `ignore`
92
+ * package, which is the de facto JS implementation.
93
+ *
94
+ * Symlinks are intentionally not followed (avoids cycles).
95
+ */
96
+ function walkWithIgnore(root: string): string[] {
97
+ const out: string[] = [];
98
+
99
+ const baseline: Ignore = ignore().add(ignoreByDefault.directories());
100
+ walk(root, [{ basePath: "", matcher: baseline }]);
101
+
102
+ function walk(
103
+ dir: string,
104
+ inheritedScopes: ReadonlyArray<{ basePath: string; matcher: Ignore }>,
105
+ ): void {
106
+ let scopes = inheritedScopes;
107
+ try {
108
+ const content = readFileSync(join(dir, ".gitignore"), "utf8");
109
+ const here = ignore().add(content);
110
+ // Normalize basePath to posix so it can be combined with `posix`
111
+ // (forward-slash) entry paths via `posix.relative` below — mixing
112
+ // separators in `path.relative` is undefined behaviour on Windows.
113
+ const basePathRel = relative(root, dir);
114
+ const basePath =
115
+ sep === "/" ? basePathRel : basePathRel.split(sep).join("/");
116
+ scopes = [
117
+ ...inheritedScopes,
118
+ { basePath, matcher: here },
119
+ ];
120
+ } catch {
121
+ // No .gitignore at this level — keep inherited scopes.
122
+ }
123
+
124
+ let entries;
125
+ try {
126
+ entries = readdirSync(dir, { withFileTypes: true });
127
+ } catch {
128
+ return;
129
+ }
130
+
131
+ for (const entry of entries) {
132
+ // Skip everything that isn't a regular file or a regular directory —
133
+ // most importantly, skip symlinks so we don't follow cycles.
134
+ if (!entry.isFile() && !entry.isDirectory()) continue;
135
+
136
+ const full = join(dir, entry.name);
137
+ const rel = relative(root, full);
138
+ // The `ignore` package requires forward-slash paths.
139
+ const posix = sep === "/" ? rel : rel.split(sep).join("/");
140
+ // Trailing slash so directory-only patterns (`dist/`) match.
141
+ const probe = entry.isDirectory() ? `${posix}/` : posix;
142
+
143
+ let ignored = false;
144
+ for (const scope of scopes) {
145
+ const within =
146
+ scope.basePath === ""
147
+ ? probe
148
+ : posixPath.relative(scope.basePath, posix) +
149
+ (entry.isDirectory() ? "/" : "");
150
+ // If `within` escapes the scope (starts with `..`), the file isn't
151
+ // under this .gitignore's reach — skip the check.
152
+ if (within.startsWith("..")) continue;
153
+ if (scope.matcher.ignores(within)) {
154
+ ignored = true;
155
+ break;
156
+ }
157
+ }
158
+ if (ignored) continue;
159
+
160
+ if (entry.isDirectory()) {
161
+ walk(full, scopes);
162
+ } else {
163
+ out.push(rel);
164
+ }
165
+ }
166
+ }
167
+
168
+ return out;
169
+ }
44
170
 
45
171
  /** Per-file LOC + path. */
46
172
  export type FileStats = { path: string; loc: number };
@@ -72,14 +198,19 @@ export type CodebaseScout = {
72
198
 
73
199
  /** Resolve the project root. Prefers `git rev-parse --show-toplevel`. */
74
200
  export function getCodebaseRoot(): string {
75
- const r = Bun.spawnSync({
76
- cmd: ["git", "rev-parse", "--show-toplevel"],
77
- stdout: "pipe",
78
- stderr: "pipe",
79
- });
80
- if (r.success && r.stdout) {
81
- return r.stdout.toString().trim();
82
- }
201
+ // Bun.spawnSync throws (rather than returning success:false) when the
202
+ // executable is missing from PATH — wrap so the documented "falls back to
203
+ // cwd" contract holds even on machines without git installed.
204
+ try {
205
+ const r = Bun.spawnSync({
206
+ cmd: ["git", "rev-parse", "--show-toplevel"],
207
+ stdout: "pipe",
208
+ stderr: "pipe",
209
+ });
210
+ if (r.success && r.stdout) {
211
+ return r.stdout.toString().trim();
212
+ }
213
+ } catch { /* git not on PATH — fall back to cwd */ }
83
214
  return process.cwd();
84
215
  }
85
216
 
@@ -90,36 +221,53 @@ function isCodeFile(p: string): boolean {
90
221
  return CODE_EXTENSIONS.has(ext);
91
222
  }
92
223
 
93
- /** List all files in the repository. Prefers git ls-files (respects .gitignore). */
224
+ /**
225
+ * List all files in the repository, honoring `.gitignore` whenever possible.
226
+ *
227
+ * Three discovery paths, tried in order — every path respects `.gitignore`:
228
+ *
229
+ * 1. **git ls-files** — for git repos. Combines `--cached` (tracked) with
230
+ * `--others --exclude-standard` (untracked-but-not-ignored) so a freshly
231
+ * created file the user hasn't `git add`-ed yet still appears, while
232
+ * anything matching `.gitignore` / `.git/info/exclude` is excluded.
233
+ * 2. **ripgrep `rg --files --hidden`** — for non-git directories that still
234
+ * have a `.gitignore` (or `.ignore`). `rg` honors both without needing
235
+ * a repo, and always excludes `.git/`. `--hidden` keeps tracked dotfiles
236
+ * like `.github/`, `.claude/` visible (matching git's behavior).
237
+ * 3. **In-process walker** — last-resort fallback when neither git nor rg
238
+ * is available. Uses the `ignore` package to honor every `.gitignore`
239
+ * it encounters (including nested ones), seeded with `ignore-by-default`
240
+ * for the universal-ignore baseline (`node_modules`, `.git`, etc.).
241
+ */
94
242
  function listAllFiles(root: string): string[] {
95
- const git = Bun.spawnSync({
96
- cmd: ["git", "ls-files"],
97
- cwd: root,
98
- stdout: "pipe",
99
- stderr: "pipe",
100
- });
101
- if (git.success && git.stdout) {
102
- return git.stdout.toString().split("\n").filter((l) => l.length > 0);
103
- }
243
+ // Bun.spawnSync throws (rather than returning success:false) when the
244
+ // executable is missing from PATH, so each branch is wrapped in try/catch
245
+ // and falls through to the next discovery strategy on error.
246
+ try {
247
+ const git = Bun.spawnSync({
248
+ cmd: ["git", "ls-files", "--cached", "--others", "--exclude-standard"],
249
+ cwd: root,
250
+ stdout: "pipe",
251
+ stderr: "pipe",
252
+ });
253
+ if (git.success && git.stdout) {
254
+ return git.stdout.toString().split("\n").filter((l) => l.length > 0);
255
+ }
256
+ } catch { /* git not on PATH — fall through to rg */ }
104
257
 
105
- // Fallback: shell out to find with the standard ignore patterns.
106
- const args: string[] = ["find", ".", "-type", "f"];
107
- for (const pattern of FIND_IGNORE_PATTERNS) {
108
- args.push("-not", "-path", `*/${pattern}/*`);
109
- }
110
- const find = Bun.spawnSync({
111
- cmd: args,
112
- cwd: root,
113
- stdout: "pipe",
114
- stderr: "pipe",
115
- });
116
- if (find.success && find.stdout) {
117
- return find.stdout.toString()
118
- .split("\n")
119
- .map((p) => p.replace(/^\.\//, ""))
120
- .filter((p) => p.length > 0);
121
- }
122
- return [];
258
+ try {
259
+ const rg = Bun.spawnSync({
260
+ cmd: ["rg", "--files", "--hidden"],
261
+ cwd: root,
262
+ stdout: "pipe",
263
+ stderr: "pipe",
264
+ });
265
+ if (rg.success && rg.stdout) {
266
+ return rg.stdout.toString().split("\n").filter((l) => l.length > 0);
267
+ }
268
+ } catch { /* rg not on PATH — fall through to in-process walker */ }
269
+
270
+ return walkWithIgnore(root);
123
271
  }
124
272
 
125
273
  /**
@@ -127,7 +275,11 @@ function listAllFiles(root: string): string[] {
127
275
  * " N filename"
128
276
  * " N total" (when more than one file is passed)
129
277
  *
130
- * We batch to avoid command-line length limits.
278
+ * We batch to avoid command-line length limits. When `wc` is missing from
279
+ * PATH (typical on Windows) `Bun.spawnSync` throws ENOENT — each batch is
280
+ * wrapped so we can fall back to an in-process newline counter rather than
281
+ * aborting the workflow or silently zeroing every file's LOC (which would
282
+ * collapse the partition bin-packer).
131
283
  */
132
284
  function countLines(root: string, files: string[]): Map<string, number> {
133
285
  const result = new Map<string, number>();
@@ -136,22 +288,40 @@ function countLines(root: string, files: string[]): Map<string, number> {
136
288
  const BATCH = 200;
137
289
  for (let i = 0; i < files.length; i += BATCH) {
138
290
  const batch = files.slice(i, i + BATCH);
139
- const r = Bun.spawnSync({
140
- cmd: ["wc", "-l", "--", ...batch],
141
- cwd: root,
142
- stdout: "pipe",
143
- stderr: "pipe",
144
- });
145
- if (!r.stdout) continue;
146
- for (const line of r.stdout.toString().split("\n")) {
147
- const m = line.match(/^\s*(\d+)\s+(.+)$/);
148
- // Regex groups are typed `string | undefined` under strict mode even
149
- // when the whole match succeeded — guard explicitly.
150
- const countStr = m?.[1];
151
- const filename = m?.[2]?.trim();
152
- if (countStr === undefined || filename === undefined) continue;
153
- if (filename === "total") continue;
154
- result.set(filename, parseInt(countStr, 10));
291
+ let wcOk = false;
292
+ try {
293
+ const r = Bun.spawnSync({
294
+ cmd: ["wc", "-l", "--", ...batch],
295
+ cwd: root,
296
+ stdout: "pipe",
297
+ stderr: "pipe",
298
+ });
299
+ if (r.stdout) {
300
+ wcOk = true;
301
+ for (const line of r.stdout.toString().split("\n")) {
302
+ const m = line.match(/^\s*(\d+)\s+(.+)$/);
303
+ // Regex groups are typed `string | undefined` under strict mode even
304
+ // when the whole match succeeded guard explicitly.
305
+ const countStr = m?.[1];
306
+ const filename = m?.[2]?.trim();
307
+ if (countStr === undefined || filename === undefined) continue;
308
+ if (filename === "total") continue;
309
+ result.set(filename, parseInt(countStr, 10));
310
+ }
311
+ }
312
+ } catch { /* wc not on PATH — fall through to in-process counter */ }
313
+ if (wcOk) continue;
314
+ // In-process fallback: count newline bytes. Matches `wc -l` semantics
315
+ // (a final line without a trailing `\n` is not counted).
316
+ for (const f of batch) {
317
+ try {
318
+ const content = readFileSync(join(root, f), "utf8");
319
+ let count = 0;
320
+ for (let j = 0; j < content.length; j++) {
321
+ if (content.charCodeAt(j) === 10) count++;
322
+ }
323
+ result.set(f, count);
324
+ } catch { /* unreadable — leave unset; consumer treats as 0 */ }
155
325
  }
156
326
  }
157
327
  return result;