sentinelayer-cli 0.6.2 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (280) hide show
  1. package/README.md +1009 -996
  2. package/bin/create-sentinelayer.js +5 -5
  3. package/bin/sentinelayer-cli.js +4 -4
  4. package/bin/sl.js +5 -5
  5. package/package.json +64 -63
  6. package/src/agents/ai-governance/index.js +12 -0
  7. package/src/agents/ai-governance/tools/base.js +171 -0
  8. package/src/agents/ai-governance/tools/eval-regression.js +47 -0
  9. package/src/agents/ai-governance/tools/hitl-audit.js +81 -0
  10. package/src/agents/ai-governance/tools/index.js +52 -0
  11. package/src/agents/ai-governance/tools/prompt-drift.js +42 -0
  12. package/src/agents/ai-governance/tools/provenance-check.js +69 -0
  13. package/src/agents/backend/index.js +12 -0
  14. package/src/agents/backend/tools/base.js +189 -0
  15. package/src/agents/backend/tools/circuit-breaker-check.js +123 -0
  16. package/src/agents/backend/tools/idempotency-audit.js +105 -0
  17. package/src/agents/backend/tools/index.js +87 -0
  18. package/src/agents/backend/tools/retry-audit.js +132 -0
  19. package/src/agents/backend/tools/timeout-audit.js +144 -0
  20. package/src/agents/code-quality/index.js +12 -0
  21. package/src/agents/code-quality/tools/base.js +159 -0
  22. package/src/agents/code-quality/tools/complexity-measure.js +197 -0
  23. package/src/agents/code-quality/tools/coupling-analysis.js +81 -0
  24. package/src/agents/code-quality/tools/cycle-detect.js +49 -0
  25. package/src/agents/code-quality/tools/dep-graph.js +196 -0
  26. package/src/agents/code-quality/tools/index.js +89 -0
  27. package/src/agents/data-layer/index.js +12 -0
  28. package/src/agents/data-layer/tools/base.js +181 -0
  29. package/src/agents/data-layer/tools/index-audit.js +165 -0
  30. package/src/agents/data-layer/tools/index.js +83 -0
  31. package/src/agents/data-layer/tools/migration-scan.js +135 -0
  32. package/src/agents/data-layer/tools/query-explain.js +120 -0
  33. package/src/agents/data-layer/tools/tenancy-scan.js +166 -0
  34. package/src/agents/documentation/index.js +12 -0
  35. package/src/agents/documentation/tools/api-diff.js +91 -0
  36. package/src/agents/documentation/tools/base.js +151 -0
  37. package/src/agents/documentation/tools/dead-link-check.js +58 -0
  38. package/src/agents/documentation/tools/docstring-coverage.js +78 -0
  39. package/src/agents/documentation/tools/index.js +52 -0
  40. package/src/agents/documentation/tools/readme-freshness.js +61 -0
  41. package/src/agents/envelope/fix-cycle.js +45 -0
  42. package/src/agents/envelope/index.js +31 -0
  43. package/src/agents/envelope/loop.js +150 -0
  44. package/src/agents/envelope/pulse.js +18 -0
  45. package/src/agents/envelope/stream.js +40 -0
  46. package/src/agents/infrastructure/index.js +12 -0
  47. package/src/agents/infrastructure/tools/base.js +171 -0
  48. package/src/agents/infrastructure/tools/checkov-run.js +32 -0
  49. package/src/agents/infrastructure/tools/drift-detect.js +59 -0
  50. package/src/agents/infrastructure/tools/iam-least-priv-check.js +78 -0
  51. package/src/agents/infrastructure/tools/index.js +52 -0
  52. package/src/agents/infrastructure/tools/tflint-run.js +31 -0
  53. package/src/agents/jules/config/definition.js +160 -160
  54. package/src/agents/jules/config/system-prompt.js +182 -182
  55. package/src/agents/jules/error-intake.js +51 -51
  56. package/src/agents/jules/fix-cycle.js +17 -17
  57. package/src/agents/jules/loop.js +460 -450
  58. package/src/agents/jules/pulse.js +10 -10
  59. package/src/agents/jules/stream.js +187 -186
  60. package/src/agents/jules/swarm/file-scanner.js +74 -74
  61. package/src/agents/jules/swarm/index.js +11 -11
  62. package/src/agents/jules/swarm/orchestrator.js +362 -362
  63. package/src/agents/jules/swarm/pattern-hunter.js +123 -123
  64. package/src/agents/jules/swarm/sub-agent.js +315 -309
  65. package/src/agents/jules/tools/aidenid-email.js +189 -189
  66. package/src/agents/jules/tools/auth-audit.js +1708 -1691
  67. package/src/agents/jules/tools/dispatch.js +340 -335
  68. package/src/agents/jules/tools/file-edit.js +2 -2
  69. package/src/agents/jules/tools/file-read.js +2 -2
  70. package/src/agents/jules/tools/frontend-analyze.js +570 -570
  71. package/src/agents/jules/tools/glob.js +2 -2
  72. package/src/agents/jules/tools/grep.js +2 -2
  73. package/src/agents/jules/tools/index.js +29 -29
  74. package/src/agents/jules/tools/path-guards.js +2 -2
  75. package/src/agents/jules/tools/runtime-audit.js +507 -507
  76. package/src/agents/jules/tools/shell.js +2 -2
  77. package/src/agents/jules/tools/url-policy.js +100 -100
  78. package/src/agents/mode.js +113 -0
  79. package/src/agents/observability/index.js +12 -0
  80. package/src/agents/observability/tools/alert-audit.js +39 -0
  81. package/src/agents/observability/tools/base.js +181 -0
  82. package/src/agents/observability/tools/dashboard-gap.js +42 -0
  83. package/src/agents/observability/tools/index.js +54 -0
  84. package/src/agents/observability/tools/log-schema-check.js +74 -0
  85. package/src/agents/observability/tools/span-coverage.js +74 -0
  86. package/src/agents/persona-visuals.js +102 -61
  87. package/src/agents/release/index.js +12 -0
  88. package/src/agents/release/tools/base.js +181 -0
  89. package/src/agents/release/tools/changelog-diff.js +86 -0
  90. package/src/agents/release/tools/feature-flag-audit.js +126 -0
  91. package/src/agents/release/tools/index.js +61 -0
  92. package/src/agents/release/tools/rollback-verify.js +129 -0
  93. package/src/agents/release/tools/semver-check.js +109 -0
  94. package/src/agents/reliability/index.js +12 -0
  95. package/src/agents/reliability/tools/backpressure-check.js +129 -0
  96. package/src/agents/reliability/tools/base.js +181 -0
  97. package/src/agents/reliability/tools/chaos-probe.js +109 -0
  98. package/src/agents/reliability/tools/graceful-degradation-check.js +114 -0
  99. package/src/agents/reliability/tools/health-check-audit.js +111 -0
  100. package/src/agents/reliability/tools/index.js +87 -0
  101. package/src/agents/run-persona.js +109 -0
  102. package/src/agents/security/index.js +12 -0
  103. package/src/agents/security/tools/authz-audit.js +134 -0
  104. package/src/agents/security/tools/base.js +190 -0
  105. package/src/agents/security/tools/crypto-review.js +175 -0
  106. package/src/agents/security/tools/index.js +97 -0
  107. package/src/agents/security/tools/sast-scan.js +175 -0
  108. package/src/agents/security/tools/secrets-scan.js +216 -0
  109. package/src/agents/shared-tools/dispatch-core.js +320 -315
  110. package/src/agents/shared-tools/file-edit.js +180 -180
  111. package/src/agents/shared-tools/file-read.js +100 -100
  112. package/src/agents/shared-tools/glob.js +168 -168
  113. package/src/agents/shared-tools/grep.js +228 -228
  114. package/src/agents/shared-tools/index.js +46 -46
  115. package/src/agents/shared-tools/path-guards.js +161 -161
  116. package/src/agents/shared-tools/shell.js +383 -383
  117. package/src/agents/supply-chain/index.js +12 -0
  118. package/src/agents/supply-chain/tools/attestation-check.js +42 -0
  119. package/src/agents/supply-chain/tools/base.js +151 -0
  120. package/src/agents/supply-chain/tools/index.js +52 -0
  121. package/src/agents/supply-chain/tools/lockfile-integrity.js +73 -0
  122. package/src/agents/supply-chain/tools/package-verify.js +56 -0
  123. package/src/agents/supply-chain/tools/sbom-diff.js +34 -0
  124. package/src/agents/testing/index.js +12 -0
  125. package/src/agents/testing/tools/base.js +202 -0
  126. package/src/agents/testing/tools/coverage-gap.js +144 -0
  127. package/src/agents/testing/tools/flake-detect.js +125 -0
  128. package/src/agents/testing/tools/index.js +85 -0
  129. package/src/agents/testing/tools/mutation-test.js +143 -0
  130. package/src/agents/testing/tools/snapshot-diff.js +103 -0
  131. package/src/ai/aidenid.js +1021 -1009
  132. package/src/ai/client.js +553 -553
  133. package/src/ai/domain-target-store.js +268 -268
  134. package/src/ai/identity-store.js +270 -270
  135. package/src/ai/proxy.js +137 -137
  136. package/src/ai/site-store.js +145 -145
  137. package/src/audit/agents/architecture.js +180 -180
  138. package/src/audit/agents/compliance.js +179 -179
  139. package/src/audit/agents/documentation.js +165 -165
  140. package/src/audit/agents/performance.js +145 -145
  141. package/src/audit/agents/security.js +215 -215
  142. package/src/audit/agents/testing.js +172 -172
  143. package/src/audit/orchestrator.js +557 -557
  144. package/src/audit/package.js +204 -204
  145. package/src/audit/registry.js +284 -284
  146. package/src/audit/replay.js +103 -103
  147. package/src/auth/gate.js +428 -371
  148. package/src/auth/http.js +681 -611
  149. package/src/auth/service.js +1106 -1106
  150. package/src/auth/session-store.js +813 -813
  151. package/src/cli.js +257 -252
  152. package/src/commands/ai/identity-lifecycle.js +1338 -1338
  153. package/src/commands/ai/provision-governance.js +1272 -1272
  154. package/src/commands/ai/shared.js +147 -147
  155. package/src/commands/ai.js +11 -11
  156. package/src/commands/apply.js +12 -12
  157. package/src/commands/audit.js +1171 -1166
  158. package/src/commands/auth.js +419 -419
  159. package/src/commands/chat.js +184 -191
  160. package/src/commands/config.js +184 -184
  161. package/src/commands/cost.js +311 -311
  162. package/src/commands/daemon/core.js +850 -850
  163. package/src/commands/daemon/extended.js +1048 -1048
  164. package/src/commands/daemon/shared.js +213 -213
  165. package/src/commands/daemon.js +11 -11
  166. package/src/commands/guide.js +174 -174
  167. package/src/commands/ingest.js +58 -58
  168. package/src/commands/init.js +55 -55
  169. package/src/commands/legacy-args.js +20 -10
  170. package/src/commands/mcp.js +461 -461
  171. package/src/commands/omargate.js +63 -29
  172. package/src/commands/persona.js +65 -20
  173. package/src/commands/plugin.js +260 -260
  174. package/src/commands/policy.js +132 -132
  175. package/src/commands/prompt.js +238 -238
  176. package/src/commands/review.js +704 -704
  177. package/src/commands/scan.js +865 -872
  178. package/src/commands/session.js +1238 -0
  179. package/src/commands/spec.js +771 -716
  180. package/src/commands/swarm.js +651 -651
  181. package/src/commands/telemetry.js +202 -202
  182. package/src/commands/watch.js +511 -511
  183. package/src/config/agent-dictionary.js +182 -182
  184. package/src/config/io.js +56 -56
  185. package/src/config/paths.js +18 -18
  186. package/src/config/schema.js +55 -55
  187. package/src/config/service.js +184 -184
  188. package/src/coord/events-log.js +141 -0
  189. package/src/coord/handshake.js +719 -0
  190. package/src/coord/index.js +35 -0
  191. package/src/coord/paths.js +84 -0
  192. package/src/coord/priority.js +62 -0
  193. package/src/coord/tarjan.js +157 -0
  194. package/src/cost/budget.js +235 -235
  195. package/src/cost/history.js +188 -188
  196. package/src/cost/tokenizer.js +160 -0
  197. package/src/cost/tracker.js +232 -171
  198. package/src/daemon/artifact-lineage.js +896 -534
  199. package/src/daemon/assignment-ledger.js +1083 -770
  200. package/src/daemon/ast-drift.js +496 -0
  201. package/src/daemon/ast-parser-layer.js +258 -258
  202. package/src/daemon/budget-governor.js +633 -633
  203. package/src/daemon/callgraph-overlay.js +646 -646
  204. package/src/daemon/error-worker.js +1209 -626
  205. package/src/daemon/fix-cycle.js +384 -377
  206. package/src/daemon/hybrid-mapper.js +929 -929
  207. package/src/daemon/ingest-refresh.js +79 -11
  208. package/src/daemon/jira-lifecycle.js +767 -632
  209. package/src/daemon/operator-control.js +657 -657
  210. package/src/daemon/pulse.js +327 -327
  211. package/src/daemon/reliability-lane.js +471 -471
  212. package/src/daemon/scope-engine.js +1068 -0
  213. package/src/daemon/watchdog.js +971 -971
  214. package/src/events/schema.js +190 -0
  215. package/src/guide/generator.js +316 -316
  216. package/src/ingest/engine.js +933 -918
  217. package/src/ingest/ownership.js +380 -0
  218. package/src/interactive/index.js +97 -97
  219. package/src/legacy-cli.js +3228 -2994
  220. package/src/mcp/registry.js +695 -695
  221. package/src/memory/blackboard.js +301 -301
  222. package/src/memory/retrieval.js +581 -581
  223. package/src/orchestrator/kai-chen.js +126 -0
  224. package/src/plugin/manifest.js +553 -553
  225. package/src/policy/packs.js +144 -144
  226. package/src/prompt/generator.js +136 -118
  227. package/src/review/ai-review.js +672 -679
  228. package/src/review/compliance-pack.js +389 -0
  229. package/src/review/investor-dd-config.js +54 -0
  230. package/src/review/investor-dd-file-loop.js +303 -0
  231. package/src/review/investor-dd-file-router.js +406 -0
  232. package/src/review/investor-dd-html-report.js +233 -0
  233. package/src/review/investor-dd-notification.js +120 -0
  234. package/src/review/investor-dd-orchestrator.js +405 -0
  235. package/src/review/investor-dd-persona-runner.js +275 -0
  236. package/src/review/live-validator.js +253 -0
  237. package/src/review/local-review.js +1351 -1305
  238. package/src/review/omargate-interactive.js +68 -68
  239. package/src/review/omargate-orchestrator.js +492 -300
  240. package/src/review/persona-prompts.js +484 -296
  241. package/src/review/reconciliation-rules.js +329 -0
  242. package/src/review/replay.js +235 -235
  243. package/src/review/report.js +664 -664
  244. package/src/review/reproducibility-chain.js +136 -0
  245. package/src/review/scan-modes.js +147 -42
  246. package/src/review/spec-binding.js +487 -487
  247. package/src/scaffold/generator.js +67 -67
  248. package/src/scaffold/templates.js +150 -150
  249. package/src/scan/generator.js +418 -418
  250. package/src/scan/gh-secrets.js +107 -107
  251. package/src/session/agent-registry.js +359 -0
  252. package/src/session/analytics.js +479 -0
  253. package/src/session/daemon.js +1396 -0
  254. package/src/session/file-locks.js +666 -0
  255. package/src/session/paths.js +37 -0
  256. package/src/session/recap.js +567 -0
  257. package/src/session/redact.js +82 -0
  258. package/src/session/runtime-bridge.js +762 -0
  259. package/src/session/scoring.js +406 -0
  260. package/src/session/setup-guides.js +304 -0
  261. package/src/session/store.js +704 -0
  262. package/src/session/stream.js +333 -0
  263. package/src/session/sync.js +753 -0
  264. package/src/session/tasks.js +1054 -0
  265. package/src/session/templates.js +188 -0
  266. package/src/spec/generator.js +619 -519
  267. package/src/spec/regenerate.js +237 -237
  268. package/src/spec/templates.js +91 -91
  269. package/src/swarm/dashboard.js +247 -247
  270. package/src/swarm/factory.js +363 -363
  271. package/src/swarm/pentest.js +934 -934
  272. package/src/swarm/registry.js +419 -419
  273. package/src/swarm/report.js +158 -158
  274. package/src/swarm/runtime.js +569 -576
  275. package/src/swarm/scenario-dsl.js +272 -272
  276. package/src/telemetry/ledger.js +302 -302
  277. package/src/telemetry/session-tracker.js +234 -234
  278. package/src/telemetry/sync.js +203 -203
  279. package/src/ui/command-hints.js +13 -13
  280. package/src/ui/markdown.js +220 -220
@@ -1,918 +1,933 @@
1
- import { execFile } from "node:child_process";
2
- import { createHash } from "node:crypto";
3
- import fs from "node:fs";
4
- import fsp from "node:fs/promises";
5
- import path from "node:path";
6
- import process from "node:process";
7
- import { promisify } from "node:util";
8
-
9
- import ignore from "ignore";
10
-
11
- import { resolveOutputRoot } from "../config/service.js";
12
-
13
- const DEFAULT_IGNORED_DIRS = new Set([
14
- ".git",
15
- "node_modules",
16
- ".venv",
17
- ".next",
18
- "dist",
19
- "build",
20
- "coverage",
21
- ".sentinelayer",
22
- ".turbo",
23
- ".idea",
24
- ".vscode",
25
- ]);
26
-
27
- const MAX_FILE_SIZE_BYTES = 1024 * 1024;
28
- const FILE_INDEX_LIMIT = 5000;
29
- const execFileAsync = promisify(execFile);
30
- const INGEST_CACHE_SCHEMA = "path-size-mtime-sha256-v1";
31
-
32
- const LANGUAGE_BY_EXTENSION = {
33
- ".js": "JavaScript",
34
- ".cjs": "JavaScript",
35
- ".mjs": "JavaScript",
36
- ".ts": "TypeScript",
37
- ".tsx": "TypeScript",
38
- ".jsx": "JavaScript",
39
- ".py": "Python",
40
- ".go": "Go",
41
- ".rs": "Rust",
42
- ".java": "Java",
43
- ".kt": "Kotlin",
44
- ".swift": "Swift",
45
- ".rb": "Ruby",
46
- ".php": "PHP",
47
- ".cs": "C#",
48
- ".cpp": "C++",
49
- ".cc": "C++",
50
- ".c": "C",
51
- ".h": "C/C++ Header",
52
- ".hpp": "C/C++ Header",
53
- ".sql": "SQL",
54
- ".md": "Markdown",
55
- ".yml": "YAML",
56
- ".yaml": "YAML",
57
- ".json": "JSON",
58
- ".toml": "TOML",
59
- ".tf": "Terraform",
60
- ".sh": "Shell",
61
- ".bash": "Shell",
62
- ".ps1": "PowerShell",
63
- ".dockerfile": "Docker",
64
- };
65
-
66
- const MANIFEST_CANDIDATES = new Set([
67
- "package.json",
68
- "requirements.txt",
69
- "pyproject.toml",
70
- "go.mod",
71
- "Cargo.toml",
72
- "Gemfile",
73
- "pom.xml",
74
- "build.gradle",
75
- "build.gradle.kts",
76
- "composer.json",
77
- ]);
78
-
79
- const ENTRY_POINT_CANDIDATES = [
80
- "src/index.ts",
81
- "src/index.tsx",
82
- "src/index.js",
83
- "src/main.ts",
84
- "src/main.js",
85
- "src/server.ts",
86
- "src/server.js",
87
- "index.ts",
88
- "index.js",
89
- "main.ts",
90
- "main.js",
91
- "main.py",
92
- "app.py",
93
- "server.py",
94
- "cmd/main.go",
95
- "src/main.rs",
96
- ];
97
-
98
- function toPosixPath(value) {
99
- return String(value || "").replace(/\\/g, "/");
100
- }
101
-
102
- function parseIsoToEpoch(value) {
103
- const normalized = String(value || "").trim();
104
- if (!normalized) {
105
- return null;
106
- }
107
- const epoch = Date.parse(normalized);
108
- if (!Number.isFinite(epoch)) {
109
- return null;
110
- }
111
- return epoch;
112
- }
113
-
114
- function normalizeMtimeMs(value) {
115
- const normalized = Number(value || 0);
116
- if (!Number.isFinite(normalized) || normalized < 0) {
117
- return 0;
118
- }
119
- return Math.floor(normalized);
120
- }
121
-
122
- function appendFingerprintInput(hasher, relativePath, sizeBytes, mtimeMs) {
123
- hasher.update(
124
- `${toPosixPath(relativePath)}\u001f${String(Number(sizeBytes || 0))}\u001f${normalizeMtimeMs(
125
- mtimeMs
126
- )}\n`,
127
- "utf-8"
128
- );
129
- }
130
-
131
- function countLoc(text) {
132
- return String(text || "")
133
- .split(/\r?\n/)
134
- .filter((line) => line.trim().length > 0).length;
135
- }
136
-
137
- function detectLanguage(relativePath) {
138
- const normalized = toPosixPath(relativePath);
139
- const baseName = path.basename(normalized).toLowerCase();
140
- if (baseName === "dockerfile") {
141
- return "Docker";
142
- }
143
- const extension = path.extname(baseName);
144
- return LANGUAGE_BY_EXTENSION[extension] || "Other";
145
- }
146
-
147
- async function readIgnorePatterns(filePath) {
148
- try {
149
- const raw = await fsp.readFile(filePath, "utf-8");
150
- return String(raw || "")
151
- .split(/\r?\n/)
152
- .map((line) => line.trim())
153
- .filter((line) => line && !line.startsWith("#"));
154
- } catch (error) {
155
- if (error && error.code === "ENOENT") {
156
- return [];
157
- }
158
- throw error;
159
- }
160
- }
161
-
162
- async function createIgnoreMatcher(rootPath) {
163
- const matcher = ignore();
164
- const gitignorePatterns = await readIgnorePatterns(path.join(rootPath, ".gitignore"));
165
- const sentinelPatterns = await readIgnorePatterns(path.join(rootPath, ".sentinelayerignore"));
166
- matcher.add([...gitignorePatterns, ...sentinelPatterns]);
167
-
168
- return {
169
- ignores(relativePath, isDirectory) {
170
- const normalized = toPosixPath(relativePath);
171
- if (!normalized) {
172
- return false;
173
- }
174
- const candidate = isDirectory ? `${normalized}/` : normalized;
175
- return matcher.ignores(candidate);
176
- },
177
- };
178
- }
179
-
180
- async function computeCodebaseContentFingerprint({ rootPath }) {
181
- const resolvedRoot = path.resolve(rootPath || process.cwd());
182
- const ignoreMatcher = await createIgnoreMatcher(resolvedRoot);
183
- const stack = [resolvedRoot];
184
- const hasher = createHash("sha256");
185
- let filesCount = 0;
186
- let latestFileMtimeMs = 0;
187
-
188
- while (stack.length > 0) {
189
- const current = stack.pop();
190
- if (!current) {
191
- continue;
192
- }
193
-
194
- let entries = [];
195
- try {
196
- entries = await fsp.readdir(current, { withFileTypes: true });
197
- } catch {
198
- continue;
199
- }
200
-
201
- for (const entry of entries) {
202
- const fullPath = path.join(current, entry.name);
203
- const relativePath = toPosixPath(path.relative(resolvedRoot, fullPath));
204
-
205
- if (entry.isDirectory()) {
206
- if (!relativePath) {
207
- continue;
208
- }
209
- if (DEFAULT_IGNORED_DIRS.has(entry.name)) {
210
- continue;
211
- }
212
- if (ignoreMatcher.ignores(relativePath, true)) {
213
- continue;
214
- }
215
- stack.push(fullPath);
216
- continue;
217
- }
218
-
219
- if (!entry.isFile()) {
220
- continue;
221
- }
222
- if (ignoreMatcher.ignores(relativePath, false)) {
223
- continue;
224
- }
225
-
226
- let stat = null;
227
- try {
228
- stat = await fsp.stat(fullPath);
229
- } catch {
230
- stat = null;
231
- }
232
- if (!stat || stat.size > MAX_FILE_SIZE_BYTES) {
233
- continue;
234
- }
235
-
236
- filesCount += 1;
237
- latestFileMtimeMs = Math.max(latestFileMtimeMs, normalizeMtimeMs(stat.mtimeMs));
238
- appendFingerprintInput(hasher, relativePath, stat.size, stat.mtimeMs);
239
- }
240
- }
241
-
242
- return {
243
- schema: INGEST_CACHE_SCHEMA,
244
- contentHash: hasher.digest("hex"),
245
- filesCount,
246
- latestFileMtimeMs,
247
- };
248
- }
249
-
250
- async function readExistingIngest(outputPath) {
251
- try {
252
- const parsed = JSON.parse(await fsp.readFile(outputPath, "utf-8"));
253
- return parsed && typeof parsed === "object" ? parsed : null;
254
- } catch (error) {
255
- if (error && error.code === "ENOENT") {
256
- return null;
257
- }
258
- throw error;
259
- }
260
- }
261
-
262
- async function resolveIngestOutputPath({ rootPath, outputFile = "", outputDir = "" }) {
263
- const resolvedRoot = path.resolve(rootPath || process.cwd());
264
- const explicitOutputFile = String(outputFile || "").trim();
265
- if (explicitOutputFile) {
266
- return path.resolve(resolvedRoot, explicitOutputFile);
267
- }
268
- const outputRoot = await resolveOutputRoot({
269
- cwd: resolvedRoot,
270
- outputDirOverride: outputDir,
271
- });
272
- return path.join(outputRoot, "CODEBASE_INGEST.json");
273
- }
274
-
275
- async function readGitLastCommitAt(rootPath) {
276
- const resolvedRoot = path.resolve(rootPath || process.cwd());
277
- try {
278
- const { stdout } = await execFileAsync("git", [
279
- "-C",
280
- resolvedRoot,
281
- "log",
282
- "-1",
283
- "--format=%cI",
284
- ]);
285
- const normalized = String(stdout || "").trim();
286
- return parseIsoToEpoch(normalized) === null ? "" : normalized;
287
- } catch {
288
- return "";
289
- }
290
- }
291
-
292
- function buildIngestStaleness({ existingIngest, fingerprint, lastCommitAt }) {
293
- if (!existingIngest) {
294
- return {
295
- stale: true,
296
- reasons: ["missing_ingest"],
297
- };
298
- }
299
-
300
- const reasons = [];
301
- const generatedAtEpoch = parseIsoToEpoch(existingIngest.generatedAt);
302
- const lastCommitEpoch = parseIsoToEpoch(lastCommitAt);
303
- if (generatedAtEpoch === null) {
304
- reasons.push("invalid_generated_at");
305
- } else if (lastCommitEpoch !== null && generatedAtEpoch < lastCommitEpoch) {
306
- reasons.push("older_than_last_commit");
307
- }
308
-
309
- const existingContentHash = String(existingIngest.cache?.contentHash || "").trim();
310
- if (existingContentHash && existingContentHash !== fingerprint.contentHash) {
311
- reasons.push("content_hash_mismatch");
312
- } else if (!existingContentHash) {
313
- reasons.push("missing_content_hash");
314
- }
315
-
316
- return {
317
- stale: reasons.length > 0,
318
- reasons,
319
- };
320
- }
321
-
322
- export function formatIngestResolutionNotice(resolution = {}) {
323
- const reasons = Array.isArray(resolution.reasons) ? resolution.reasons : [];
324
- if (resolution.refreshed) {
325
- return `ingest refreshed (${reasons.join(", ") || "requested"})`;
326
- }
327
- if (resolution.stale) {
328
- return `ingest stale (${reasons.join(", ") || "unknown"}); re-run with --refresh`;
329
- }
330
- return "ingest cache hit";
331
- }
332
-
333
- function safeJsonParse(raw) {
334
- try {
335
- return JSON.parse(raw);
336
- } catch {
337
- return null;
338
- }
339
- }
340
-
341
- function normalizeDependencySet(dependencies) {
342
- if (!dependencies || typeof dependencies !== "object") {
343
- return new Set();
344
- }
345
- return new Set(Object.keys(dependencies).map((value) => String(value || "").toLowerCase()));
346
- }
347
-
348
- function detectFrameworks(manifests) {
349
- const frameworks = new Set();
350
-
351
- const packageJson = manifests["package.json"] ? safeJsonParse(manifests["package.json"]) : null;
352
- if (packageJson) {
353
- const deps = normalizeDependencySet({
354
- ...(packageJson.dependencies || {}),
355
- ...(packageJson.devDependencies || {}),
356
- ...(packageJson.peerDependencies || {}),
357
- });
358
- if (deps.has("next")) frameworks.add("nextjs");
359
- if (deps.has("react")) frameworks.add("react");
360
- if (deps.has("vue")) frameworks.add("vue");
361
- if (deps.has("svelte")) frameworks.add("svelte");
362
- if (deps.has("express")) frameworks.add("express");
363
- if (deps.has("fastify")) frameworks.add("fastify");
364
- if (deps.has("hono")) frameworks.add("hono");
365
- if (deps.has("@nestjs/core")) frameworks.add("nestjs");
366
- if (deps.has("prisma")) frameworks.add("prisma");
367
- if (deps.has("typeorm")) frameworks.add("typeorm");
368
- if (deps.has("drizzle-orm")) frameworks.add("drizzle");
369
- if (deps.has("playwright")) frameworks.add("playwright");
370
- if (deps.has("jest")) frameworks.add("jest");
371
- if (deps.has("vitest")) frameworks.add("vitest");
372
- if (deps.has("@opentelemetry/api") || deps.has("@sentry/node")) frameworks.add("observability-js");
373
- }
374
-
375
- const requirementsText = String(manifests["requirements.txt"] || "").toLowerCase();
376
- if (/\bfastapi\b/.test(requirementsText)) frameworks.add("fastapi");
377
- if (/\bdjango\b/.test(requirementsText)) frameworks.add("django");
378
- if (/\bflask\b/.test(requirementsText)) frameworks.add("flask");
379
-
380
- const pyprojectText = String(manifests["pyproject.toml"] || "").toLowerCase();
381
- if (/\bfastapi\b/.test(pyprojectText)) frameworks.add("fastapi");
382
- if (/\bdjango\b/.test(pyprojectText)) frameworks.add("django");
383
- if (/\bflask\b/.test(pyprojectText)) frameworks.add("flask");
384
-
385
- const goModText = String(manifests["go.mod"] || "").toLowerCase();
386
- if (/gin-gonic\/gin/.test(goModText)) frameworks.add("gin");
387
- if (/gofiber\/fiber/.test(goModText)) frameworks.add("fiber");
388
- if (/labstack\/echo/.test(goModText)) frameworks.add("echo");
389
-
390
- const cargoText = String(manifests["Cargo.toml"] || "").toLowerCase();
391
- if (/\baxum\b/.test(cargoText)) frameworks.add("axum");
392
- if (/\bactix-web\b/.test(cargoText)) frameworks.add("actix-web");
393
-
394
- const gemfileText = String(manifests.Gemfile || "").toLowerCase();
395
- if (/\brails\b/.test(gemfileText)) frameworks.add("rails");
396
-
397
- return [...frameworks].sort((left, right) => left.localeCompare(right));
398
- }
399
-
400
- function derivePackageMetadata(manifests) {
401
- const packageJson = manifests["package.json"] ? safeJsonParse(manifests["package.json"]) : null;
402
- if (!packageJson || typeof packageJson !== "object") {
403
- return {
404
- name: "",
405
- scripts: [],
406
- };
407
- }
408
-
409
- const scripts =
410
- packageJson.scripts && typeof packageJson.scripts === "object"
411
- ? Object.keys(packageJson.scripts)
412
- .map((value) => String(value || "").trim())
413
- .filter(Boolean)
414
- .sort((left, right) => left.localeCompare(right))
415
- : [];
416
-
417
- return {
418
- name: String(packageJson.name || "").trim(),
419
- scripts,
420
- };
421
- }
422
-
423
- function deriveEntryPoints(fileSet, manifests) {
424
- const entryPoints = new Set();
425
- for (const candidate of ENTRY_POINT_CANDIDATES) {
426
- if (fileSet.has(candidate)) {
427
- entryPoints.add(candidate);
428
- }
429
- }
430
-
431
- const packageJson = manifests["package.json"] ? safeJsonParse(manifests["package.json"]) : null;
432
- if (packageJson) {
433
- if (typeof packageJson.main === "string" && packageJson.main.trim()) {
434
- entryPoints.add(packageJson.main.trim());
435
- }
436
- if (packageJson.bin && typeof packageJson.bin === "object") {
437
- for (const value of Object.values(packageJson.bin)) {
438
- const normalized = String(value || "").trim();
439
- if (normalized) {
440
- entryPoints.add(normalized);
441
- }
442
- }
443
- }
444
- }
445
-
446
- return [...entryPoints].sort((left, right) => left.localeCompare(right));
447
- }
448
-
449
- function deriveRiskSurfaces({ fileSet, frameworks, manifests, languageStats }) {
450
- const surfaces = new Map();
451
-
452
- const addSurface = (surface, reason) => {
453
- if (!surfaces.has(surface)) {
454
- surfaces.set(surface, reason);
455
- }
456
- };
457
-
458
- const hasFile = (predicate) => [...fileSet].some(predicate);
459
- const hasFramework = (name) => frameworks.includes(name);
460
-
461
- addSurface("code_quality", "Source files detected.");
462
- addSurface("security_overlay", "Credential/policy scanning is applicable for any repository ingest.");
463
-
464
- const hasTests = hasFile((file) => /(^|\/)(test|tests|__tests__)\//.test(file) || /\.(test|spec)\./.test(file));
465
- if (hasTests || hasFramework("jest") || hasFramework("vitest") || hasFramework("playwright")) {
466
- addSurface("testing_correctness", "Test assets detected.");
467
- }
468
-
469
- const hasFrontend =
470
- hasFramework("nextjs") ||
471
- hasFramework("react") ||
472
- hasFramework("vue") ||
473
- hasFramework("svelte") ||
474
- languageStats.JavaScript ||
475
- languageStats.TypeScript;
476
- if (hasFrontend) {
477
- addSurface("frontend_runtime", "Frontend/runtime JavaScript stack detected.");
478
- }
479
-
480
- const hasBackend =
481
- hasFramework("express") ||
482
- hasFramework("nestjs") ||
483
- hasFramework("fastify") ||
484
- hasFramework("hono") ||
485
- hasFramework("fastapi") ||
486
- hasFramework("django") ||
487
- hasFramework("flask") ||
488
- hasFramework("gin") ||
489
- hasFramework("fiber") ||
490
- hasFramework("echo");
491
- if (hasBackend) {
492
- addSurface("backend_runtime", "Backend framework/runtime hints detected.");
493
- }
494
-
495
- const hasData =
496
- hasFramework("prisma") ||
497
- hasFramework("typeorm") ||
498
- hasFramework("drizzle") ||
499
- hasFile((file) => /(^|\/)(migrations|db|database|sql)\//.test(file) || file.endsWith(".sql"));
500
- if (hasData) {
501
- addSurface("data_layer", "Data-model or migration assets detected.");
502
- }
503
-
504
- const hasInfra =
505
- hasFile(
506
- (file) =>
507
- file.endsWith(".tf") ||
508
- file.includes("docker-compose") ||
509
- file.endsWith("Dockerfile") ||
510
- /(^|\/)(k8s|helm|terraform)\//.test(file)
511
- );
512
- if (hasInfra) {
513
- addSurface("infrastructure", "Infrastructure-as-code or container orchestration assets detected.");
514
- }
515
-
516
- const hasRelease = hasFile((file) => file.startsWith(".github/workflows/") || file.startsWith(".gitlab-ci"));
517
- if (hasRelease) {
518
- addSurface("release_engineering", "CI/CD workflow definitions detected.");
519
- }
520
-
521
- const hasSupplyChain =
522
- Object.keys(manifests).length > 0 ||
523
- hasFile((file) =>
524
- [
525
- "package-lock.json",
526
- "pnpm-lock.yaml",
527
- "yarn.lock",
528
- "poetry.lock",
529
- "Pipfile.lock",
530
- "Cargo.lock",
531
- ].some((candidate) => file.endsWith(candidate))
532
- );
533
- if (hasSupplyChain) {
534
- addSurface("supply_chain", "Dependency manifests/lockfiles detected.");
535
- }
536
-
537
- const hasObservability =
538
- hasFramework("observability-js") ||
539
- hasFile((file) => /sentry|opentelemetry|prometheus|grafana/i.test(file));
540
- if (hasObservability) {
541
- addSurface("observability", "Observability tooling indicators detected.");
542
- }
543
-
544
- const hasAiPipeline = hasFile((file) => /(^|\/)(prompts|models|llm|agents?)\//i.test(file));
545
- if (hasAiPipeline) {
546
- addSurface("ai_pipeline", "AI/agent pipeline assets detected.");
547
- }
548
-
549
- const hasDocs = hasFile((file) => file.endsWith(".md") || file.startsWith("docs/"));
550
- if (hasDocs) {
551
- addSurface("docs_knowledge", "Documentation assets detected.");
552
- }
553
-
554
- if (hasInfra || hasObservability || hasRelease) {
555
- addSurface("reliability_sre", "Operational and deployment assets detected.");
556
- }
557
-
558
- return [...surfaces.entries()]
559
- .map(([surface, reason]) => ({ surface, reason }))
560
- .sort((left, right) => left.surface.localeCompare(right.surface));
561
- }
562
-
563
- function summarizeLanguageStats(languageStats, totalLoc) {
564
- return Object.entries(languageStats)
565
- .map(([language, stats]) => ({
566
- language,
567
- files: stats.files,
568
- loc: stats.loc,
569
- locShare: totalLoc > 0 ? Number((stats.loc / totalLoc).toFixed(4)) : 0,
570
- }))
571
- .sort((left, right) => right.loc - left.loc || left.language.localeCompare(right.language));
572
- }
573
-
574
- async function listTopLevel(rootPath, ignoreMatcher) {
575
- const dirs = [];
576
- const files = [];
577
- let entries = [];
578
- try {
579
- entries = await fsp.readdir(rootPath, { withFileTypes: true });
580
- } catch {
581
- return { directories: dirs, files };
582
- }
583
-
584
- for (const entry of entries) {
585
- const name = String(entry.name || "");
586
- if (!name) continue;
587
- if (DEFAULT_IGNORED_DIRS.has(name)) continue;
588
- if (ignoreMatcher.ignores(name, entry.isDirectory())) continue;
589
- if (entry.isDirectory()) {
590
- dirs.push(name);
591
- } else if (entry.isFile()) {
592
- files.push(name);
593
- }
594
- }
595
-
596
- return {
597
- directories: dirs.sort((left, right) => left.localeCompare(right)).slice(0, 200),
598
- files: files.sort((left, right) => left.localeCompare(right)).slice(0, 200),
599
- };
600
- }
601
-
602
- export async function collectCodebaseIngest({ rootPath = process.cwd() } = {}) {
603
- const resolvedRoot = path.resolve(rootPath);
604
- const ignoreMatcher = await createIgnoreMatcher(resolvedRoot);
605
- const topLevel = await listTopLevel(resolvedRoot, ignoreMatcher);
606
- const fingerprintHasher = createHash("sha256");
607
- let fingerprintFilesCount = 0;
608
- let latestFileMtimeMs = 0;
609
-
610
- const stack = [resolvedRoot];
611
- const fileSet = new Set();
612
- const languageStats = {};
613
- const manifests = {};
614
-
615
- const indexedFiles = [];
616
- let indexedOmittedCount = 0;
617
- let filesScanned = 0;
618
- let directoriesScanned = 0;
619
- let totalLoc = 0;
620
- let totalBytes = 0;
621
-
622
- while (stack.length > 0) {
623
- const current = stack.pop();
624
- if (!current) continue;
625
-
626
- let entries = [];
627
- try {
628
- entries = await fsp.readdir(current, { withFileTypes: true });
629
- } catch {
630
- continue;
631
- }
632
-
633
- directoriesScanned += 1;
634
-
635
- for (const entry of entries) {
636
- const fullPath = path.join(current, entry.name);
637
- const relativePath = toPosixPath(path.relative(resolvedRoot, fullPath));
638
-
639
- if (entry.isDirectory()) {
640
- if (!relativePath) {
641
- continue;
642
- }
643
- if (DEFAULT_IGNORED_DIRS.has(entry.name)) {
644
- continue;
645
- }
646
- if (ignoreMatcher.ignores(relativePath, true)) {
647
- continue;
648
- }
649
- stack.push(fullPath);
650
- continue;
651
- }
652
-
653
- if (!entry.isFile()) {
654
- continue;
655
- }
656
- if (ignoreMatcher.ignores(relativePath, false)) {
657
- continue;
658
- }
659
-
660
- let stat;
661
- try {
662
- stat = await fsp.stat(fullPath);
663
- } catch {
664
- continue;
665
- }
666
- if (!stat || stat.size > MAX_FILE_SIZE_BYTES) {
667
- continue;
668
- }
669
-
670
- appendFingerprintInput(fingerprintHasher, relativePath, stat.size, stat.mtimeMs);
671
- fingerprintFilesCount += 1;
672
- latestFileMtimeMs = Math.max(latestFileMtimeMs, normalizeMtimeMs(stat.mtimeMs));
673
-
674
- let text = "";
675
- try {
676
- text = await fsp.readFile(fullPath, "utf-8");
677
- } catch {
678
- continue;
679
- }
680
-
681
- const loc = countLoc(text);
682
- const language = detectLanguage(relativePath);
683
-
684
- filesScanned += 1;
685
- totalLoc += loc;
686
- totalBytes += stat.size;
687
- fileSet.add(relativePath);
688
-
689
- if (!languageStats[language]) {
690
- languageStats[language] = { files: 0, loc: 0 };
691
- }
692
- languageStats[language].files += 1;
693
- languageStats[language].loc += loc;
694
-
695
- const baseName = path.basename(relativePath);
696
- if (MANIFEST_CANDIDATES.has(baseName)) {
697
- manifests[baseName] = text;
698
- }
699
-
700
- if (indexedFiles.length < FILE_INDEX_LIMIT) {
701
- indexedFiles.push({
702
- path: relativePath,
703
- language,
704
- loc,
705
- sizeBytes: stat.size,
706
- });
707
- } else {
708
- indexedOmittedCount += 1;
709
- }
710
- }
711
- }
712
-
713
- const frameworks = detectFrameworks(manifests);
714
- const packageMetadata = derivePackageMetadata(manifests);
715
- const entryPoints = deriveEntryPoints(fileSet, manifests);
716
- const riskSurfaces = deriveRiskSurfaces({
717
- fileSet,
718
- frameworks,
719
- manifests,
720
- languageStats,
721
- });
722
-
723
- return {
724
- schemaVersion: "1.0.0",
725
- generatedAt: new Date().toISOString(),
726
- rootPath: resolvedRoot,
727
- summary: {
728
- filesScanned,
729
- directoriesScanned,
730
- totalLoc,
731
- totalBytes,
732
- },
733
- topLevel,
734
- manifests: {
735
- detected: Object.keys(manifests).sort((left, right) => left.localeCompare(right)),
736
- },
737
- languages: summarizeLanguageStats(languageStats, totalLoc),
738
- frameworks,
739
- packageMetadata,
740
- entryPoints,
741
- riskSurfaces,
742
- indexedFiles: {
743
- limit: FILE_INDEX_LIMIT,
744
- omitted: indexedOmittedCount,
745
- files: indexedFiles,
746
- },
747
- cache: {
748
- schema: INGEST_CACHE_SCHEMA,
749
- contentHash: fingerprintHasher.digest("hex"),
750
- filesCount: fingerprintFilesCount,
751
- latestFileMtimeMs,
752
- },
753
- };
754
- }
755
-
756
- export function formatIngestSummary(ingest) {
757
- const summary = ingest && ingest.summary ? ingest.summary : {};
758
- const languageHead = Array.isArray(ingest.languages)
759
- ? ingest.languages
760
- .slice(0, 5)
761
- .map((item) => `${item.language}(${item.files} files/${item.loc} LOC)`)
762
- .join(", ")
763
- : "none";
764
- const frameworks = Array.isArray(ingest.frameworks) && ingest.frameworks.length
765
- ? ingest.frameworks.join(", ")
766
- : "none";
767
- const entryPoints = Array.isArray(ingest.entryPoints) && ingest.entryPoints.length
768
- ? ingest.entryPoints.join(", ")
769
- : "none";
770
- const packageName = String(ingest.packageMetadata?.name || "").trim();
771
- const packageScripts = Array.isArray(ingest.packageMetadata?.scripts)
772
- ? ingest.packageMetadata.scripts
773
- : [];
774
-
775
- const lines = [
776
- `Workspace path: ${ingest.rootPath}`,
777
- `Top-level directories: ${(ingest.topLevel?.directories || []).slice(0, 20).join(", ") || "none"}`,
778
- `Top-level files: ${(ingest.topLevel?.files || []).slice(0, 20).join(", ") || "none"}`,
779
- `Files scanned: ${summary.filesScanned || 0}`,
780
- `Total LOC: ${summary.totalLoc || 0}`,
781
- `Languages: ${languageHead}`,
782
- `Frameworks: ${frameworks}`,
783
- `Entry points: ${entryPoints}`,
784
- ];
785
-
786
- if (packageName) {
787
- lines.push(`package.json name: ${packageName}`);
788
- }
789
- if (packageScripts.length > 0) {
790
- lines.push(`package scripts: ${packageScripts.slice(0, 15).join(", ")}`);
791
- }
792
-
793
- return lines.join("\n");
794
- }
795
-
796
- export async function writeCodebaseIngest({ ingest, rootPath, outputFile = "", outputDir = "" } = {}) {
797
- const resolvedRoot = path.resolve(rootPath || process.cwd());
798
- const resolvedOutputFile = String(outputFile || "").trim();
799
- const outputPath = resolvedOutputFile
800
- ? path.resolve(resolvedRoot, resolvedOutputFile)
801
- : path.join(
802
- await resolveOutputRoot({
803
- cwd: resolvedRoot,
804
- outputDirOverride: outputDir,
805
- }),
806
- "CODEBASE_INGEST.json"
807
- );
808
-
809
- await fsp.mkdir(path.dirname(outputPath), { recursive: true });
810
- await fsp.writeFile(outputPath, `${JSON.stringify(ingest, null, 2)}\n`, "utf-8");
811
- return outputPath;
812
- }
813
-
814
- export async function resolveCodebaseIngest({
815
- rootPath = process.cwd(),
816
- outputFile = "",
817
- outputDir = "",
818
- refresh = false,
819
- } = {}) {
820
- const resolvedRoot = path.resolve(rootPath || process.cwd());
821
- const outputPath = await resolveIngestOutputPath({
822
- rootPath: resolvedRoot,
823
- outputFile,
824
- outputDir,
825
- });
826
- const existingIngest = await readExistingIngest(outputPath);
827
- const fingerprint = await computeCodebaseContentFingerprint({
828
- rootPath: resolvedRoot,
829
- });
830
- const lastCommitAt = await readGitLastCommitAt(resolvedRoot);
831
- const staleness = buildIngestStaleness({
832
- existingIngest,
833
- fingerprint,
834
- lastCommitAt,
835
- });
836
- const staleBeforeRefresh = staleness.stale;
837
-
838
- let ingest = existingIngest;
839
- let refreshed = false;
840
- let refreshedBecause = "";
841
- if (!existingIngest) {
842
- refreshed = true;
843
- refreshedBecause = "missing_ingest";
844
- } else if (refresh) {
845
- refreshed = true;
846
- refreshedBecause = "refresh_requested";
847
- }
848
-
849
- if (refreshed) {
850
- ingest = await collectCodebaseIngest({
851
- rootPath: resolvedRoot,
852
- });
853
- ingest.generatedAt = new Date().toISOString();
854
- if (!ingest.cache || typeof ingest.cache !== "object") {
855
- ingest.cache = {};
856
- }
857
- ingest.cache.schema = INGEST_CACHE_SCHEMA;
858
- ingest.cache.contentHash = fingerprint.contentHash;
859
- ingest.cache.filesCount = fingerprint.filesCount;
860
- ingest.cache.latestFileMtimeMs = fingerprint.latestFileMtimeMs;
861
- await writeCodebaseIngest({
862
- ingest,
863
- rootPath: resolvedRoot,
864
- outputFile,
865
- outputDir,
866
- });
867
- }
868
-
869
- const resolutionReasons = refreshed
870
- ? [refreshedBecause, ...staleness.reasons].filter(Boolean)
871
- : staleness.reasons;
872
-
873
- return {
874
- ingest,
875
- outputPath,
876
- refreshed,
877
- stale: refreshed ? false : staleness.stale,
878
- staleBeforeRefresh,
879
- reasons: resolutionReasons,
880
- refreshedBecause,
881
- refreshRequested: Boolean(refresh),
882
- lastCommitAt,
883
- fingerprint,
884
- event:
885
- refreshed || staleBeforeRefresh
886
- ? {
887
- event: "ingest_refresh",
888
- payload: {
889
- refreshed,
890
- stale: refreshed ? false : staleness.stale,
891
- reason:
892
- refreshedBecause || (staleness.reasons.length > 0 ? staleness.reasons.join(",") : "cache_hit"),
893
- contentHash: fingerprint.contentHash,
894
- filesCount: fingerprint.filesCount,
895
- lastCommitAt,
896
- },
897
- }
898
- : null,
899
- };
900
- }
901
-
902
- export async function generateCodebaseIngest({
903
- rootPath = process.cwd(),
904
- outputFile = "",
905
- outputDir = "",
906
- } = {}) {
907
- const ingest = await collectCodebaseIngest({ rootPath });
908
- const outputPath = await writeCodebaseIngest({
909
- ingest,
910
- rootPath,
911
- outputFile,
912
- outputDir,
913
- });
914
- return {
915
- ingest,
916
- outputPath,
917
- };
918
- }
1
+ import { execFile } from "node:child_process";
2
+ import { createHash } from "node:crypto";
3
+ import fs from "node:fs";
4
+ import fsp from "node:fs/promises";
5
+ import path from "node:path";
6
+ import process from "node:process";
7
+ import { promisify } from "node:util";
8
+
9
+ import ignore from "ignore";
10
+
11
+ import { resolveOutputRoot } from "../config/service.js";
12
+
13
+ const DEFAULT_IGNORED_DIRS = new Set([
14
+ ".git",
15
+ "node_modules",
16
+ ".venv",
17
+ ".next",
18
+ "dist",
19
+ "build",
20
+ "coverage",
21
+ ".sentinelayer",
22
+ ".turbo",
23
+ ".idea",
24
+ ".vscode",
25
+ ]);
26
+
27
+ const MAX_FILE_SIZE_BYTES = 1024 * 1024;
28
+ const FILE_INDEX_LIMIT = 5000;
29
+ const execFileAsync = promisify(execFile);
30
+ const INGEST_CACHE_SCHEMA = "path-size-mtime-sha256-v1";
31
+
32
+ const LANGUAGE_BY_EXTENSION = {
33
+ ".js": "JavaScript",
34
+ ".cjs": "JavaScript",
35
+ ".mjs": "JavaScript",
36
+ ".ts": "TypeScript",
37
+ ".tsx": "TypeScript",
38
+ ".jsx": "JavaScript",
39
+ ".py": "Python",
40
+ ".go": "Go",
41
+ ".rs": "Rust",
42
+ ".java": "Java",
43
+ ".kt": "Kotlin",
44
+ ".swift": "Swift",
45
+ ".rb": "Ruby",
46
+ ".php": "PHP",
47
+ ".cs": "C#",
48
+ ".cpp": "C++",
49
+ ".cc": "C++",
50
+ ".c": "C",
51
+ ".h": "C/C++ Header",
52
+ ".hpp": "C/C++ Header",
53
+ ".sql": "SQL",
54
+ ".md": "Markdown",
55
+ ".yml": "YAML",
56
+ ".yaml": "YAML",
57
+ ".json": "JSON",
58
+ ".toml": "TOML",
59
+ ".tf": "Terraform",
60
+ ".sh": "Shell",
61
+ ".bash": "Shell",
62
+ ".ps1": "PowerShell",
63
+ ".dockerfile": "Docker",
64
+ };
65
+
66
+ const MANIFEST_CANDIDATES = new Set([
67
+ "package.json",
68
+ "requirements.txt",
69
+ "pyproject.toml",
70
+ "go.mod",
71
+ "Cargo.toml",
72
+ "Gemfile",
73
+ "pom.xml",
74
+ "build.gradle",
75
+ "build.gradle.kts",
76
+ "composer.json",
77
+ ]);
78
+
79
+ const ENTRY_POINT_CANDIDATES = [
80
+ "src/index.ts",
81
+ "src/index.tsx",
82
+ "src/index.js",
83
+ "src/main.ts",
84
+ "src/main.js",
85
+ "src/server.ts",
86
+ "src/server.js",
87
+ "index.ts",
88
+ "index.js",
89
+ "main.ts",
90
+ "main.js",
91
+ "main.py",
92
+ "app.py",
93
+ "server.py",
94
+ "cmd/main.go",
95
+ "src/main.rs",
96
+ ];
97
+
98
+ function toPosixPath(value) {
99
+ return String(value || "").replace(/\\/g, "/");
100
+ }
101
+
102
+ function parseIsoToEpoch(value) {
103
+ const normalized = String(value || "").trim();
104
+ if (!normalized) {
105
+ return null;
106
+ }
107
+ const epoch = Date.parse(normalized);
108
+ if (!Number.isFinite(epoch)) {
109
+ return null;
110
+ }
111
+ return epoch;
112
+ }
113
+
114
+ function normalizeMtimeMs(value) {
115
+ const normalized = Number(value || 0);
116
+ if (!Number.isFinite(normalized) || normalized < 0) {
117
+ return 0;
118
+ }
119
+ return Math.floor(normalized);
120
+ }
121
+
122
+ function appendFingerprintInput(hasher, relativePath, sizeBytes, mtimeMs) {
123
+ hasher.update(
124
+ `${toPosixPath(relativePath)}\u001f${String(Number(sizeBytes || 0))}\u001f${normalizeMtimeMs(
125
+ mtimeMs
126
+ )}\n`,
127
+ "utf-8"
128
+ );
129
+ }
130
+
131
+ function countLoc(text) {
132
+ return String(text || "")
133
+ .split(/\r?\n/)
134
+ .filter((line) => line.trim().length > 0).length;
135
+ }
136
+
137
+ function detectLanguage(relativePath) {
138
+ const normalized = toPosixPath(relativePath);
139
+ const baseName = path.basename(normalized).toLowerCase();
140
+ if (baseName === "dockerfile") {
141
+ return "Docker";
142
+ }
143
+ const extension = path.extname(baseName);
144
+ return LANGUAGE_BY_EXTENSION[extension] || "Other";
145
+ }
146
+
147
+ async function readIgnorePatterns(filePath) {
148
+ try {
149
+ const raw = await fsp.readFile(filePath, "utf-8");
150
+ return String(raw || "")
151
+ .split(/\r?\n/)
152
+ .map((line) => line.trim())
153
+ .filter((line) => line && !line.startsWith("#"));
154
+ } catch (error) {
155
+ if (error && error.code === "ENOENT") {
156
+ return [];
157
+ }
158
+ throw error;
159
+ }
160
+ }
161
+
162
+ async function createIgnoreMatcher(rootPath) {
163
+ const matcher = ignore();
164
+ const gitignorePatterns = await readIgnorePatterns(path.join(rootPath, ".gitignore"));
165
+ const sentinelPatterns = await readIgnorePatterns(path.join(rootPath, ".sentinelayerignore"));
166
+ matcher.add([...gitignorePatterns, ...sentinelPatterns]);
167
+
168
+ return {
169
+ ignores(relativePath, isDirectory) {
170
+ const normalized = toPosixPath(relativePath);
171
+ if (!normalized) {
172
+ return false;
173
+ }
174
+ const candidate = isDirectory ? `${normalized}/` : normalized;
175
+ return matcher.ignores(candidate);
176
+ },
177
+ };
178
+ }
179
+
180
+ async function computeCodebaseContentFingerprint({ rootPath }) {
181
+ const resolvedRoot = path.resolve(rootPath || process.cwd());
182
+ const ignoreMatcher = await createIgnoreMatcher(resolvedRoot);
183
+ const stack = [resolvedRoot];
184
+ const hasher = createHash("sha256");
185
+ let filesCount = 0;
186
+ let latestFileMtimeMs = 0;
187
+
188
+ while (stack.length > 0) {
189
+ const current = stack.pop();
190
+ if (!current) {
191
+ continue;
192
+ }
193
+
194
+ let entries = [];
195
+ try {
196
+ entries = await fsp.readdir(current, { withFileTypes: true });
197
+ } catch {
198
+ continue;
199
+ }
200
+
201
+ for (const entry of entries) {
202
+ const fullPath = path.join(current, entry.name);
203
+ const relativePath = toPosixPath(path.relative(resolvedRoot, fullPath));
204
+
205
+ if (entry.isDirectory()) {
206
+ if (!relativePath) {
207
+ continue;
208
+ }
209
+ if (DEFAULT_IGNORED_DIRS.has(entry.name)) {
210
+ continue;
211
+ }
212
+ if (ignoreMatcher.ignores(relativePath, true)) {
213
+ continue;
214
+ }
215
+ stack.push(fullPath);
216
+ continue;
217
+ }
218
+
219
+ if (!entry.isFile()) {
220
+ continue;
221
+ }
222
+ if (ignoreMatcher.ignores(relativePath, false)) {
223
+ continue;
224
+ }
225
+
226
+ let stat = null;
227
+ try {
228
+ stat = await fsp.stat(fullPath);
229
+ } catch {
230
+ stat = null;
231
+ }
232
+ if (!stat || stat.size > MAX_FILE_SIZE_BYTES) {
233
+ continue;
234
+ }
235
+
236
+ filesCount += 1;
237
+ latestFileMtimeMs = Math.max(latestFileMtimeMs, normalizeMtimeMs(stat.mtimeMs));
238
+ appendFingerprintInput(hasher, relativePath, stat.size, stat.mtimeMs);
239
+ }
240
+ }
241
+
242
+ return {
243
+ schema: INGEST_CACHE_SCHEMA,
244
+ contentHash: hasher.digest("hex"),
245
+ filesCount,
246
+ latestFileMtimeMs,
247
+ };
248
+ }
249
+
250
+ async function readExistingIngest(outputPath) {
251
+ try {
252
+ const parsed = JSON.parse(await fsp.readFile(outputPath, "utf-8"));
253
+ return parsed && typeof parsed === "object" ? parsed : null;
254
+ } catch (error) {
255
+ if (error && error.code === "ENOENT") {
256
+ return null;
257
+ }
258
+ throw error;
259
+ }
260
+ }
261
+
262
+ async function resolveIngestOutputPath({ rootPath, outputFile = "", outputDir = "" }) {
263
+ const resolvedRoot = path.resolve(rootPath || process.cwd());
264
+ const explicitOutputFile = String(outputFile || "").trim();
265
+ if (explicitOutputFile) {
266
+ return path.resolve(resolvedRoot, explicitOutputFile);
267
+ }
268
+ const outputRoot = await resolveOutputRoot({
269
+ cwd: resolvedRoot,
270
+ outputDirOverride: outputDir,
271
+ });
272
+ return path.join(outputRoot, "CODEBASE_INGEST.json");
273
+ }
274
+
275
+ async function readGitLastCommitAt(rootPath) {
276
+ const resolvedRoot = path.resolve(rootPath || process.cwd());
277
+ try {
278
+ const { stdout } = await execFileAsync("git", [
279
+ "-C",
280
+ resolvedRoot,
281
+ "log",
282
+ "-1",
283
+ "--format=%cI",
284
+ ]);
285
+ const normalized = String(stdout || "").trim();
286
+ return parseIsoToEpoch(normalized) === null ? "" : normalized;
287
+ } catch {
288
+ return "";
289
+ }
290
+ }
291
+
292
+ function buildIngestStaleness({ existingIngest, fingerprint, lastCommitAt }) {
293
+ if (!existingIngest) {
294
+ return {
295
+ stale: true,
296
+ reasons: ["missing_ingest"],
297
+ };
298
+ }
299
+
300
+ const reasons = [];
301
+ const generatedAtEpoch = parseIsoToEpoch(existingIngest.generatedAt);
302
+ const lastCommitEpoch = parseIsoToEpoch(lastCommitAt);
303
+ if (generatedAtEpoch === null) {
304
+ reasons.push("invalid_generated_at");
305
+ } else if (lastCommitEpoch !== null && generatedAtEpoch < lastCommitEpoch) {
306
+ reasons.push("older_than_last_commit");
307
+ }
308
+
309
+ const existingContentHash = String(existingIngest.cache?.contentHash || "").trim();
310
+ if (existingContentHash && existingContentHash !== fingerprint.contentHash) {
311
+ reasons.push("content_hash_mismatch");
312
+ } else if (!existingContentHash) {
313
+ reasons.push("missing_content_hash");
314
+ }
315
+
316
+ return {
317
+ stale: reasons.length > 0,
318
+ reasons,
319
+ };
320
+ }
321
+
322
+ export function formatIngestResolutionNotice(resolution = {}) {
323
+ const reasons = Array.isArray(resolution.reasons) ? resolution.reasons : [];
324
+ if (resolution.refreshed) {
325
+ return `ingest refreshed (${reasons.join(", ") || "requested"})`;
326
+ }
327
+ if (resolution.stale) {
328
+ return `ingest stale (${reasons.join(", ") || "unknown"}); re-run with --refresh`;
329
+ }
330
+ return "ingest cache hit";
331
+ }
332
+
333
+ function safeJsonParse(raw) {
334
+ try {
335
+ return JSON.parse(raw);
336
+ } catch {
337
+ return null;
338
+ }
339
+ }
340
+
341
+ function normalizeDependencySet(dependencies) {
342
+ if (!dependencies || typeof dependencies !== "object") {
343
+ return new Set();
344
+ }
345
+ return new Set(Object.keys(dependencies).map((value) => String(value || "").toLowerCase()));
346
+ }
347
+
348
+ function detectFrameworks(manifests) {
349
+ const frameworks = new Set();
350
+
351
+ const packageJson = manifests["package.json"] ? safeJsonParse(manifests["package.json"]) : null;
352
+ if (packageJson) {
353
+ const deps = normalizeDependencySet({
354
+ ...(packageJson.dependencies || {}),
355
+ ...(packageJson.devDependencies || {}),
356
+ ...(packageJson.peerDependencies || {}),
357
+ });
358
+ if (deps.has("next")) frameworks.add("nextjs");
359
+ if (deps.has("react")) frameworks.add("react");
360
+ if (deps.has("vue")) frameworks.add("vue");
361
+ if (deps.has("svelte")) frameworks.add("svelte");
362
+ if (deps.has("express")) frameworks.add("express");
363
+ if (deps.has("fastify")) frameworks.add("fastify");
364
+ if (deps.has("hono")) frameworks.add("hono");
365
+ if (deps.has("@nestjs/core")) frameworks.add("nestjs");
366
+ if (deps.has("prisma")) frameworks.add("prisma");
367
+ if (deps.has("typeorm")) frameworks.add("typeorm");
368
+ if (deps.has("drizzle-orm")) frameworks.add("drizzle");
369
+ if (deps.has("playwright")) frameworks.add("playwright");
370
+ if (deps.has("jest")) frameworks.add("jest");
371
+ if (deps.has("vitest")) frameworks.add("vitest");
372
+ if (deps.has("@opentelemetry/api") || deps.has("@sentry/node")) frameworks.add("observability-js");
373
+ }
374
+
375
+ const requirementsText = String(manifests["requirements.txt"] || "").toLowerCase();
376
+ if (/\bfastapi\b/.test(requirementsText)) frameworks.add("fastapi");
377
+ if (/\bdjango\b/.test(requirementsText)) frameworks.add("django");
378
+ if (/\bflask\b/.test(requirementsText)) frameworks.add("flask");
379
+
380
+ const pyprojectText = String(manifests["pyproject.toml"] || "").toLowerCase();
381
+ if (/\bfastapi\b/.test(pyprojectText)) frameworks.add("fastapi");
382
+ if (/\bdjango\b/.test(pyprojectText)) frameworks.add("django");
383
+ if (/\bflask\b/.test(pyprojectText)) frameworks.add("flask");
384
+
385
+ const goModText = String(manifests["go.mod"] || "").toLowerCase();
386
+ if (/gin-gonic\/gin/.test(goModText)) frameworks.add("gin");
387
+ if (/gofiber\/fiber/.test(goModText)) frameworks.add("fiber");
388
+ if (/labstack\/echo/.test(goModText)) frameworks.add("echo");
389
+
390
+ const cargoText = String(manifests["Cargo.toml"] || "").toLowerCase();
391
+ if (/\baxum\b/.test(cargoText)) frameworks.add("axum");
392
+ if (/\bactix-web\b/.test(cargoText)) frameworks.add("actix-web");
393
+
394
+ const gemfileText = String(manifests.Gemfile || "").toLowerCase();
395
+ if (/\brails\b/.test(gemfileText)) frameworks.add("rails");
396
+
397
+ return [...frameworks].sort((left, right) => left.localeCompare(right));
398
+ }
399
+
400
+ function derivePackageMetadata(manifests) {
401
+ const packageJson = manifests["package.json"] ? safeJsonParse(manifests["package.json"]) : null;
402
+ if (!packageJson || typeof packageJson !== "object") {
403
+ return {
404
+ name: "",
405
+ scripts: [],
406
+ };
407
+ }
408
+
409
+ const scripts =
410
+ packageJson.scripts && typeof packageJson.scripts === "object"
411
+ ? Object.keys(packageJson.scripts)
412
+ .map((value) => String(value || "").trim())
413
+ .filter(Boolean)
414
+ .sort((left, right) => left.localeCompare(right))
415
+ : [];
416
+
417
+ return {
418
+ name: String(packageJson.name || "").trim(),
419
+ scripts,
420
+ };
421
+ }
422
+
423
+ function deriveEntryPoints(fileSet, manifests) {
424
+ const entryPoints = new Set();
425
+ for (const candidate of ENTRY_POINT_CANDIDATES) {
426
+ if (fileSet.has(candidate)) {
427
+ entryPoints.add(candidate);
428
+ }
429
+ }
430
+
431
+ const packageJson = manifests["package.json"] ? safeJsonParse(manifests["package.json"]) : null;
432
+ if (packageJson) {
433
+ if (typeof packageJson.main === "string" && packageJson.main.trim()) {
434
+ entryPoints.add(packageJson.main.trim());
435
+ }
436
+ if (packageJson.bin && typeof packageJson.bin === "object") {
437
+ for (const value of Object.values(packageJson.bin)) {
438
+ const normalized = String(value || "").trim();
439
+ if (normalized) {
440
+ entryPoints.add(normalized);
441
+ }
442
+ }
443
+ }
444
+ }
445
+
446
+ return [...entryPoints].sort((left, right) => left.localeCompare(right));
447
+ }
448
+
449
+ function deriveRiskSurfaces({ fileSet, frameworks, manifests, languageStats }) {
450
+ const surfaces = new Map();
451
+
452
+ const addSurface = (surface, reason) => {
453
+ if (!surfaces.has(surface)) {
454
+ surfaces.set(surface, reason);
455
+ }
456
+ };
457
+
458
+ const hasFile = (predicate) => [...fileSet].some(predicate);
459
+ const hasFramework = (name) => frameworks.includes(name);
460
+
461
+ addSurface("code_quality", "Source files detected.");
462
+ addSurface("security_overlay", "Credential/policy scanning is applicable for any repository ingest.");
463
+
464
+ const hasTests = hasFile((file) => /(^|\/)(test|tests|__tests__)\//.test(file) || /\.(test|spec)\./.test(file));
465
+ if (hasTests || hasFramework("jest") || hasFramework("vitest") || hasFramework("playwright")) {
466
+ addSurface("testing_correctness", "Test assets detected.");
467
+ }
468
+
469
+ const hasFrontend =
470
+ hasFramework("nextjs") ||
471
+ hasFramework("react") ||
472
+ hasFramework("vue") ||
473
+ hasFramework("svelte") ||
474
+ languageStats.JavaScript ||
475
+ languageStats.TypeScript;
476
+ if (hasFrontend) {
477
+ addSurface("frontend_runtime", "Frontend/runtime JavaScript stack detected.");
478
+ }
479
+
480
+ const hasBackend =
481
+ hasFramework("express") ||
482
+ hasFramework("nestjs") ||
483
+ hasFramework("fastify") ||
484
+ hasFramework("hono") ||
485
+ hasFramework("fastapi") ||
486
+ hasFramework("django") ||
487
+ hasFramework("flask") ||
488
+ hasFramework("gin") ||
489
+ hasFramework("fiber") ||
490
+ hasFramework("echo");
491
+ if (hasBackend) {
492
+ addSurface("backend_runtime", "Backend framework/runtime hints detected.");
493
+ }
494
+
495
+ const hasData =
496
+ hasFramework("prisma") ||
497
+ hasFramework("typeorm") ||
498
+ hasFramework("drizzle") ||
499
+ hasFile((file) => /(^|\/)(migrations|db|database|sql)\//.test(file) || file.endsWith(".sql"));
500
+ if (hasData) {
501
+ addSurface("data_layer", "Data-model or migration assets detected.");
502
+ }
503
+
504
+ const hasInfra =
505
+ hasFile(
506
+ (file) =>
507
+ file.endsWith(".tf") ||
508
+ file.includes("docker-compose") ||
509
+ file.endsWith("Dockerfile") ||
510
+ /(^|\/)(k8s|helm|terraform)\//.test(file)
511
+ );
512
+ if (hasInfra) {
513
+ addSurface("infrastructure", "Infrastructure-as-code or container orchestration assets detected.");
514
+ }
515
+
516
+ const hasRelease = hasFile((file) => file.startsWith(".github/workflows/") || file.startsWith(".gitlab-ci"));
517
+ if (hasRelease) {
518
+ addSurface("release_engineering", "CI/CD workflow definitions detected.");
519
+ }
520
+
521
+ const hasSupplyChain =
522
+ Object.keys(manifests).length > 0 ||
523
+ hasFile((file) =>
524
+ [
525
+ "package-lock.json",
526
+ "pnpm-lock.yaml",
527
+ "yarn.lock",
528
+ "poetry.lock",
529
+ "Pipfile.lock",
530
+ "Cargo.lock",
531
+ ].some((candidate) => file.endsWith(candidate))
532
+ );
533
+ if (hasSupplyChain) {
534
+ addSurface("supply_chain", "Dependency manifests/lockfiles detected.");
535
+ }
536
+
537
+ const hasObservability =
538
+ hasFramework("observability-js") ||
539
+ hasFile((file) => /sentry|opentelemetry|prometheus|grafana/i.test(file));
540
+ if (hasObservability) {
541
+ addSurface("observability", "Observability tooling indicators detected.");
542
+ }
543
+
544
+ const hasAiPipeline = hasFile((file) => /(^|\/)(prompts|models|llm|agents?)\//i.test(file));
545
+ if (hasAiPipeline) {
546
+ addSurface("ai_pipeline", "AI/agent pipeline assets detected.");
547
+ }
548
+
549
+ const hasDocs = hasFile((file) => file.endsWith(".md") || file.startsWith("docs/"));
550
+ if (hasDocs) {
551
+ addSurface("docs_knowledge", "Documentation assets detected.");
552
+ }
553
+
554
+ if (hasInfra || hasObservability || hasRelease) {
555
+ addSurface("reliability_sre", "Operational and deployment assets detected.");
556
+ }
557
+
558
+ return [...surfaces.entries()]
559
+ .map(([surface, reason]) => ({ surface, reason }))
560
+ .sort((left, right) => left.surface.localeCompare(right.surface));
561
+ }
562
+
563
+ function summarizeLanguageStats(languageStats, totalLoc) {
564
+ return Object.entries(languageStats)
565
+ .map(([language, stats]) => ({
566
+ language,
567
+ files: stats.files,
568
+ loc: stats.loc,
569
+ locShare: totalLoc > 0 ? Number((stats.loc / totalLoc).toFixed(4)) : 0,
570
+ }))
571
+ .sort((left, right) => right.loc - left.loc || left.language.localeCompare(right.language));
572
+ }
573
+
574
+ async function listTopLevel(rootPath, ignoreMatcher) {
575
+ const dirs = [];
576
+ const files = [];
577
+ let entries = [];
578
+ try {
579
+ entries = await fsp.readdir(rootPath, { withFileTypes: true });
580
+ } catch {
581
+ return { directories: dirs, files };
582
+ }
583
+
584
+ for (const entry of entries) {
585
+ const name = String(entry.name || "");
586
+ if (!name) continue;
587
+ if (DEFAULT_IGNORED_DIRS.has(name)) continue;
588
+ if (ignoreMatcher.ignores(name, entry.isDirectory())) continue;
589
+ if (entry.isDirectory()) {
590
+ dirs.push(name);
591
+ } else if (entry.isFile()) {
592
+ files.push(name);
593
+ }
594
+ }
595
+
596
+ return {
597
+ directories: dirs.sort((left, right) => left.localeCompare(right)).slice(0, 200),
598
+ files: files.sort((left, right) => left.localeCompare(right)).slice(0, 200),
599
+ };
600
+ }
601
+
602
+ export async function collectCodebaseIngest({ rootPath = process.cwd() } = {}) {
603
+ const resolvedRoot = path.resolve(rootPath);
604
+ const ignoreMatcher = await createIgnoreMatcher(resolvedRoot);
605
+ const topLevel = await listTopLevel(resolvedRoot, ignoreMatcher);
606
+ const fingerprintHasher = createHash("sha256");
607
+ let fingerprintFilesCount = 0;
608
+ let latestFileMtimeMs = 0;
609
+
610
+ const stack = [resolvedRoot];
611
+ const fileSet = new Set();
612
+ const languageStats = {};
613
+ const manifests = {};
614
+
615
+ const indexedFiles = [];
616
+ let indexedOmittedCount = 0;
617
+ let filesScanned = 0;
618
+ let directoriesScanned = 0;
619
+ let totalLoc = 0;
620
+ let totalBytes = 0;
621
+
622
+ while (stack.length > 0) {
623
+ const current = stack.pop();
624
+ if (!current) continue;
625
+
626
+ let entries = [];
627
+ try {
628
+ entries = await fsp.readdir(current, { withFileTypes: true });
629
+ } catch {
630
+ continue;
631
+ }
632
+
633
+ directoriesScanned += 1;
634
+
635
+ for (const entry of entries) {
636
+ const fullPath = path.join(current, entry.name);
637
+ const relativePath = toPosixPath(path.relative(resolvedRoot, fullPath));
638
+
639
+ if (entry.isDirectory()) {
640
+ if (!relativePath) {
641
+ continue;
642
+ }
643
+ if (DEFAULT_IGNORED_DIRS.has(entry.name)) {
644
+ continue;
645
+ }
646
+ if (ignoreMatcher.ignores(relativePath, true)) {
647
+ continue;
648
+ }
649
+ stack.push(fullPath);
650
+ continue;
651
+ }
652
+
653
+ if (!entry.isFile()) {
654
+ continue;
655
+ }
656
+ if (ignoreMatcher.ignores(relativePath, false)) {
657
+ continue;
658
+ }
659
+
660
+ let stat;
661
+ try {
662
+ stat = await fsp.stat(fullPath);
663
+ } catch {
664
+ continue;
665
+ }
666
+ if (!stat || stat.size > MAX_FILE_SIZE_BYTES) {
667
+ continue;
668
+ }
669
+
670
+ appendFingerprintInput(fingerprintHasher, relativePath, stat.size, stat.mtimeMs);
671
+ fingerprintFilesCount += 1;
672
+ latestFileMtimeMs = Math.max(latestFileMtimeMs, normalizeMtimeMs(stat.mtimeMs));
673
+
674
+ let text = "";
675
+ try {
676
+ text = await fsp.readFile(fullPath, "utf-8");
677
+ } catch {
678
+ continue;
679
+ }
680
+
681
+ const loc = countLoc(text);
682
+ const language = detectLanguage(relativePath);
683
+
684
+ filesScanned += 1;
685
+ totalLoc += loc;
686
+ totalBytes += stat.size;
687
+ fileSet.add(relativePath);
688
+
689
+ if (!languageStats[language]) {
690
+ languageStats[language] = { files: 0, loc: 0 };
691
+ }
692
+ languageStats[language].files += 1;
693
+ languageStats[language].loc += loc;
694
+
695
+ const baseName = path.basename(relativePath);
696
+ if (MANIFEST_CANDIDATES.has(baseName)) {
697
+ manifests[baseName] = text;
698
+ }
699
+
700
+ if (indexedFiles.length < FILE_INDEX_LIMIT) {
701
+ indexedFiles.push({
702
+ path: relativePath,
703
+ language,
704
+ loc,
705
+ sizeBytes: stat.size,
706
+ });
707
+ } else {
708
+ indexedOmittedCount += 1;
709
+ }
710
+ }
711
+ }
712
+
713
+ const frameworks = detectFrameworks(manifests);
714
+ const packageMetadata = derivePackageMetadata(manifests);
715
+ const entryPoints = deriveEntryPoints(fileSet, manifests);
716
+ const riskSurfaces = deriveRiskSurfaces({
717
+ fileSet,
718
+ frameworks,
719
+ manifests,
720
+ languageStats,
721
+ });
722
+
723
+ return {
724
+ schemaVersion: "1.0.0",
725
+ generatedAt: new Date().toISOString(),
726
+ rootPath: resolvedRoot,
727
+ summary: {
728
+ filesScanned,
729
+ directoriesScanned,
730
+ totalLoc,
731
+ totalBytes,
732
+ },
733
+ topLevel,
734
+ manifests: {
735
+ detected: Object.keys(manifests).sort((left, right) => left.localeCompare(right)),
736
+ },
737
+ languages: summarizeLanguageStats(languageStats, totalLoc),
738
+ frameworks,
739
+ packageMetadata,
740
+ entryPoints,
741
+ riskSurfaces,
742
+ indexedFiles: {
743
+ limit: FILE_INDEX_LIMIT,
744
+ omitted: indexedOmittedCount,
745
+ files: indexedFiles,
746
+ },
747
+ cache: {
748
+ schema: INGEST_CACHE_SCHEMA,
749
+ contentHash: fingerprintHasher.digest("hex"),
750
+ filesCount: fingerprintFilesCount,
751
+ latestFileMtimeMs,
752
+ },
753
+ };
754
+ }
755
+
756
+ export function formatIngestSummary(ingest) {
757
+ const summary = ingest && ingest.summary ? ingest.summary : {};
758
+ const languageHead = Array.isArray(ingest.languages)
759
+ ? ingest.languages
760
+ .slice(0, 5)
761
+ .map((item) => `${item.language}(${item.files} files/${item.loc} LOC)`)
762
+ .join(", ")
763
+ : "none";
764
+ const frameworks = Array.isArray(ingest.frameworks) && ingest.frameworks.length
765
+ ? ingest.frameworks.join(", ")
766
+ : "none";
767
+ const entryPoints = Array.isArray(ingest.entryPoints) && ingest.entryPoints.length
768
+ ? ingest.entryPoints.join(", ")
769
+ : "none";
770
+ const packageName = String(ingest.packageMetadata?.name || "").trim();
771
+ const packageScripts = Array.isArray(ingest.packageMetadata?.scripts)
772
+ ? ingest.packageMetadata.scripts
773
+ : [];
774
+
775
+ const lines = [
776
+ `Workspace path: ${ingest.rootPath}`,
777
+ `Top-level directories: ${(ingest.topLevel?.directories || []).slice(0, 20).join(", ") || "none"}`,
778
+ `Top-level files: ${(ingest.topLevel?.files || []).slice(0, 20).join(", ") || "none"}`,
779
+ `Files scanned: ${summary.filesScanned || 0}`,
780
+ `Total LOC: ${summary.totalLoc || 0}`,
781
+ `Languages: ${languageHead}`,
782
+ `Frameworks: ${frameworks}`,
783
+ `Entry points: ${entryPoints}`,
784
+ ];
785
+
786
+ if (packageName) {
787
+ lines.push(`package.json name: ${packageName}`);
788
+ }
789
+ if (packageScripts.length > 0) {
790
+ lines.push(`package scripts: ${packageScripts.slice(0, 15).join(", ")}`);
791
+ }
792
+
793
+ return lines.join("\n");
794
+ }
795
+
796
+ export async function writeCodebaseIngest({ ingest, rootPath, outputFile = "", outputDir = "" } = {}) {
797
+ const resolvedRoot = path.resolve(rootPath || process.cwd());
798
+ const resolvedOutputFile = String(outputFile || "").trim();
799
+ const outputPath = resolvedOutputFile
800
+ ? path.resolve(resolvedRoot, resolvedOutputFile)
801
+ : path.join(
802
+ await resolveOutputRoot({
803
+ cwd: resolvedRoot,
804
+ outputDirOverride: outputDir,
805
+ }),
806
+ "CODEBASE_INGEST.json"
807
+ );
808
+
809
+ await fsp.mkdir(path.dirname(outputPath), { recursive: true });
810
+ await fsp.writeFile(outputPath, `${JSON.stringify(ingest, null, 2)}\n`, "utf-8");
811
+ return outputPath;
812
+ }
813
+
814
+ export async function resolveCodebaseIngest({
815
+ rootPath = process.cwd(),
816
+ outputFile = "",
817
+ outputDir = "",
818
+ refresh = false,
819
+ } = {}) {
820
+ const resolvedRoot = path.resolve(rootPath || process.cwd());
821
+ const outputPath = await resolveIngestOutputPath({
822
+ rootPath: resolvedRoot,
823
+ outputFile,
824
+ outputDir,
825
+ });
826
+ const existingIngest = await readExistingIngest(outputPath);
827
+ const fingerprint = await computeCodebaseContentFingerprint({
828
+ rootPath: resolvedRoot,
829
+ });
830
+ const lastCommitAt = await readGitLastCommitAt(resolvedRoot);
831
+ const staleness = buildIngestStaleness({
832
+ existingIngest,
833
+ fingerprint,
834
+ lastCommitAt,
835
+ });
836
+ const staleBeforeRefresh = staleness.stale;
837
+
838
+ let ingest = existingIngest;
839
+ let refreshed = false;
840
+ let refreshedBecause = "";
841
+ if (!existingIngest) {
842
+ refreshed = true;
843
+ refreshedBecause = "missing_ingest";
844
+ } else if (refresh) {
845
+ refreshed = true;
846
+ refreshedBecause = "refresh_requested";
847
+ }
848
+
849
+ if (refreshed) {
850
+ ingest = await collectCodebaseIngest({
851
+ rootPath: resolvedRoot,
852
+ });
853
+ ingest.generatedAt = new Date().toISOString();
854
+ if (!ingest.cache || typeof ingest.cache !== "object") {
855
+ ingest.cache = {};
856
+ }
857
+ ingest.cache.schema = INGEST_CACHE_SCHEMA;
858
+ ingest.cache.contentHash = fingerprint.contentHash;
859
+ ingest.cache.filesCount = fingerprint.filesCount;
860
+ ingest.cache.latestFileMtimeMs = fingerprint.latestFileMtimeMs;
861
+ await writeCodebaseIngest({
862
+ ingest,
863
+ rootPath: resolvedRoot,
864
+ outputFile,
865
+ outputDir,
866
+ });
867
+ }
868
+
869
+ const resolutionReasons = refreshed
870
+ ? [refreshedBecause, ...staleness.reasons].filter(Boolean)
871
+ : staleness.reasons;
872
+
873
+ return {
874
+ ingest,
875
+ outputPath,
876
+ refreshed,
877
+ stale: refreshed ? false : staleness.stale,
878
+ staleBeforeRefresh,
879
+ reasons: resolutionReasons,
880
+ refreshedBecause,
881
+ refreshRequested: Boolean(refresh),
882
+ lastCommitAt,
883
+ fingerprint,
884
+ event:
885
+ refreshed || staleBeforeRefresh
886
+ ? {
887
+ event: "ingest_refresh",
888
+ payload: {
889
+ refreshed,
890
+ stale: refreshed ? false : staleness.stale,
891
+ reason:
892
+ refreshedBecause || (staleness.reasons.length > 0 ? staleness.reasons.join(",") : "cache_hit"),
893
+ contentHash: fingerprint.contentHash,
894
+ filesCount: fingerprint.filesCount,
895
+ lastCommitAt,
896
+ },
897
+ }
898
+ : null,
899
+ };
900
+ }
901
+
902
+ export async function generateCodebaseIngest({
903
+ rootPath = process.cwd(),
904
+ outputFile = "",
905
+ outputDir = "",
906
+ } = {}) {
907
+ const ingest = await collectCodebaseIngest({ rootPath });
908
+ const outputPath = await writeCodebaseIngest({
909
+ ingest,
910
+ rootPath,
911
+ outputFile,
912
+ outputDir,
913
+ });
914
+ return {
915
+ ingest,
916
+ outputPath,
917
+ };
918
+ }
919
+
920
+ // File → persona ownership routing (#A10, spec §5.7). Implementation lives
921
+ // in ./ownership.js to keep this 918-LOC module from ballooning; re-exported
922
+ // here so existing callers that already import from ingest/engine.js can
923
+ // reach the new API without extra plumbing.
924
+ export {
925
+ buildOwnershipMap,
926
+ computeRoutingStats,
927
+ loadScaffoldConfig,
928
+ parseScaffoldYaml,
929
+ routeFileHeuristic,
930
+ routeFindingsToPersonas,
931
+ DEFAULT_HEURISTIC_FALLBACK,
932
+ SCAFFOLD_RELATIVE_PATH,
933
+ } from "./ownership.js";