@activemind/scd 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/LICENSE.md +35 -0
  2. package/README.md +417 -0
  3. package/bin/scd.js +140 -0
  4. package/lib/audit-report.js +93 -0
  5. package/lib/audit-sync.js +172 -0
  6. package/lib/audit.js +356 -0
  7. package/lib/cli-helpers.js +108 -0
  8. package/lib/commands/accept.js +28 -0
  9. package/lib/commands/audit.js +17 -0
  10. package/lib/commands/configure.js +200 -0
  11. package/lib/commands/doctor.js +14 -0
  12. package/lib/commands/exceptions.js +19 -0
  13. package/lib/commands/export-findings.js +46 -0
  14. package/lib/commands/findings.js +306 -0
  15. package/lib/commands/ignore.js +28 -0
  16. package/lib/commands/init.js +16 -0
  17. package/lib/commands/insights.js +24 -0
  18. package/lib/commands/install.js +15 -0
  19. package/lib/commands/list.js +109 -0
  20. package/lib/commands/remove.js +16 -0
  21. package/lib/commands/repo.js +862 -0
  22. package/lib/commands/report.js +234 -0
  23. package/lib/commands/resolve.js +25 -0
  24. package/lib/commands/rules.js +185 -0
  25. package/lib/commands/scan.js +519 -0
  26. package/lib/commands/scope.js +341 -0
  27. package/lib/commands/sync.js +40 -0
  28. package/lib/commands/uninstall.js +15 -0
  29. package/lib/commands/version.js +33 -0
  30. package/lib/comment-map.js +388 -0
  31. package/lib/config.js +325 -0
  32. package/lib/context-modifiers.js +211 -0
  33. package/lib/deep-analyzer.js +225 -0
  34. package/lib/doctor.js +236 -0
  35. package/lib/exception-manager.js +675 -0
  36. package/lib/export-findings.js +376 -0
  37. package/lib/file-context.js +380 -0
  38. package/lib/file-filter.js +204 -0
  39. package/lib/file-manifest.js +145 -0
  40. package/lib/git-utils.js +102 -0
  41. package/lib/global-config.js +239 -0
  42. package/lib/hooks-manager.js +130 -0
  43. package/lib/init-repo.js +147 -0
  44. package/lib/insights-analyzer.js +416 -0
  45. package/lib/insights-output.js +160 -0
  46. package/lib/installer.js +128 -0
  47. package/lib/output-constants.js +32 -0
  48. package/lib/output-terminal.js +407 -0
  49. package/lib/push-queue.js +322 -0
  50. package/lib/remove-repo.js +108 -0
  51. package/lib/repo-context.js +187 -0
  52. package/lib/report-html.js +1154 -0
  53. package/lib/report-index.js +157 -0
  54. package/lib/report-json.js +136 -0
  55. package/lib/report-markdown.js +250 -0
  56. package/lib/resolve-manager.js +148 -0
  57. package/lib/rule-registry.js +205 -0
  58. package/lib/scan-cache.js +171 -0
  59. package/lib/scan-context.js +312 -0
  60. package/lib/scan-schema.js +67 -0
  61. package/lib/scanner-full.js +681 -0
  62. package/lib/scanner-manual.js +348 -0
  63. package/lib/scanner-secrets.js +83 -0
  64. package/lib/scope.js +331 -0
  65. package/lib/store-verify.js +395 -0
  66. package/lib/store.js +310 -0
  67. package/lib/taint-register.js +196 -0
  68. package/lib/version-check.js +46 -0
  69. package/package.json +37 -0
  70. package/rules/rule-loader.js +324 -0
  71. package/rules/rules-aspx-cs.json +399 -0
  72. package/rules/rules-aspx.json +222 -0
  73. package/rules/rules-infra-leakage.json +434 -0
  74. package/rules/rules-js.json +664 -0
  75. package/rules/rules-php.json +521 -0
  76. package/rules/rules-python.json +466 -0
  77. package/rules/rules-secrets.json +99 -0
  78. package/rules/rules-sensitive-files.json +475 -0
  79. package/rules/rules-ts.json +76 -0
@@ -0,0 +1,380 @@
1
+ /**
2
+ * file-context.js
3
+ * Builds a context object for a file before rules are applied.
4
+ *
5
+ * Detection is two-layer:
6
+ * Layer 1 — path/filename signals (always evaluated, fast)
7
+ * Layer 2 — content signals (first 50 lines, always run when content available)
8
+ *
9
+ * Vendor and generated files are classified definitively from path/filename alone.
10
+ * All other test/fixture classifications are tentative — content must confirm them.
11
+ * If content is unavailable (e.g. secrets scanner path), tentative type is used as-is.
12
+ *
13
+ * The returned context is passed to applyContextModifiers() in context-modifiers.js.
14
+ * Rules themselves are not modified — file context is a purely additive layer.
15
+ */
16
+
17
+ 'use strict';
18
+
19
+ const path = require('path');
20
+
21
+ // ── File type constants ────────────────────────────────────────────────────
22
+ const FILE_TYPES = {
23
+ FIXTURE: 'fixture',
24
+ VENDOR: 'vendor',
25
+ GENERATED: 'generated',
26
+ TEST: 'test',
27
+ CONFIG: 'config',
28
+ DOCS: 'docs',
29
+ SOURCE: 'source',
30
+ };
31
+
32
+ // ── Path/filename signal patterns ──────────────────────────────────────────
33
+
34
+ // TENTATIVE — requires content confirmation when content is available.
35
+ const FIXTURE_PATH_SEGMENTS = [
36
+ '/fixtures/', '/fixture/', '/mocks/', '/mock/', '/stubs/', '/stub/',
37
+ '/__fixtures__/', '/__mocks__/',
38
+ ];
39
+
40
+ // TENTATIVE — requires content confirmation when content is available.
41
+ const TEST_PATH_SEGMENTS = [
42
+ '/test/', '/tests/', '/spec/', '/specs/',
43
+ '/__tests__/', '/__specs__/',
44
+ '/e2e/', '/integration-tests/', '/unit/',
45
+ ];
46
+
47
+ // DEFINITIVE — never production code regardless of content.
48
+ const VENDOR_PATH_SEGMENTS = [
49
+ '/vendor/', '/node_modules/', '/bower_components/', '/site-packages/',
50
+ '/lib/python', '/packages/', '/.venv/', '/venv/',
51
+ ];
52
+
53
+ // DEFINITIVE — machine output, findings are not actionable.
54
+ const GENERATED_PATH_SEGMENTS = [
55
+ '/dist/', '/build/', '/out/', '/.next/', '/.nuxt/',
56
+ '/generated/', '/gen/', '/auto-generated/',
57
+ '/coverage/', '/.nyc_output/', '/__pycache__/',
58
+ ];
59
+
60
+ // TENTATIVE — requires content confirmation when content is available.
61
+ const DOCS_PATH_SEGMENTS = [
62
+ '/docs/', '/doc/', '/documentation/', '/wiki/',
63
+ ];
64
+
65
+ // TEST_FILENAME_RE: TENTATIVE — requires content confirmation.
66
+ // STRONG_TEST_FILENAME_RE: DEFINITIVE — filename alone is sufficient, no content needed.
67
+ // Rules: suffix .test.EXT or .spec.EXT, prefix test_*.EXT, suffix _test.EXT
68
+ // These are language-standard test file naming conventions — ambiguity is negligible.
69
+ // Path-only signals (/tests/ directory without filename signal) remain tentative.
70
+ // GENERATED_FILE_RE: DEFINITIVE.
71
+ // CONFIG_FILE_RE: DIRECT — config modifier is 0, no suppression risk.
72
+ const STRONG_TEST_FILENAME_RE = /(?:\.(?:test|spec)\.[a-z]+$|_test\.[a-z]+$|^test_[^/]+\.[a-z]+$)/i;
73
+ const TEST_FILENAME_RE = /(?:\.(?:test|spec)\.[a-z]+$|_test\.[a-z]+$|^test_.*\.[a-z]+$|Test\.[a-z]+$|\.test$|\.spec$)/i;
74
+ const GENERATED_FILE_RE = /(?:\.min\.(?:js|css)$|package-lock\.json$|yarn\.lock$|composer\.lock$|Pipfile\.lock$|\.lock$|\.d\.ts$)/i;
75
+ const CONFIG_FILE_RE = /(?:\.(?:env|config|conf|ini|cfg|properties|yml|yaml|toml|json)$|^\.env(?:\.[a-z]+)?$|webpack\.config\.|vite\.config\.|babel\.config\.|jest\.config\.|karma\.conf\.|rollup\.config\.|tsconfig\.|\.eslintrc|\.prettierrc|\.stylelintrc)/i;
76
+
77
+ // ── Test framework content signals ─────────────────────────────────────────
78
+ // Checked against the first 50 lines of the file.
79
+ // Ordered by specificity — more specific signals first.
80
+
81
+ const FRAMEWORK_CONTENT_SIGNALS = [
82
+ // Jest
83
+ { framework: 'jest', re: /\bjest\.(?:mock|fn|spyOn|setTimeout|useFakeTimers)\b/ },
84
+ { framework: 'jest', re: /from\s+['"](?:@jest\/globals|jest-each)['"]/ },
85
+ // Vitest
86
+ { framework: 'vitest', re: /from\s+['"]vitest['"]/ },
87
+ { framework: 'vitest', re: /\bvi\.(?:mock|fn|spyOn)\b/ },
88
+ // Playwright — @playwright/test, widely used for E2E
89
+ { framework: 'playwright', re: /from\s+['"]@playwright\/test['"]/ },
90
+ { framework: 'playwright', re: /\btest\.(?:describe|beforeAll|afterAll|beforeEach|afterEach)\b/ },
91
+ // Mocha / Chai
92
+ { framework: 'mocha', re: /\b(?:before|after|beforeEach|afterEach)\s*\(/ },
93
+ { framework: 'mocha', re: /\bassert\.[a-z]+\s*\(/ },
94
+ // Pytest
95
+ { framework: 'pytest', re: /\bimport\s+pytest\b/ },
96
+ { framework: 'pytest', re: /\bdef\s+test_[a-z_]+\s*\(/ },
97
+ { framework: 'pytest', re: /@pytest\.fixture\b/ },
98
+ // Python unittest (stdlib)
99
+ { framework: 'unittest', re: /\bimport\s+unittest\b/ },
100
+ { framework: 'unittest', re: /\bfrom\s+unittest\b/ },
101
+ { framework: 'unittest', re: /\bclass\s+\w+\s*\(\s*unittest\.TestCase\s*\)/ },
102
+ // PHPUnit — direct extends
103
+ { framework: 'phpunit', re: /\bextends\s+(?:TestCase|PHPUnit[\\]Framework[\\]TestCase)\b/ },
104
+ { framework: 'phpunit', re: /public\s+function\s+test[A-Z]/ },
105
+ // PHPUnit — import/namespace signals (catches indirect inheritance chains)
106
+ { framework: 'phpunit', re: /\buse\s+PHPUnit\\/ },
107
+ { framework: 'phpunit', re: /\bnamespace\s+\S+\\Tests?\\/ },
108
+ // Pest — modern PHP testing framework
109
+ { framework: 'pest', re: /\buses\s*\(\s*\w+::class\s*\)/ },
110
+ { framework: 'pest', re: /\bit\s*\(\s*['"]/ },
111
+ // C# — NUnit, xUnit, MSTest (attribute-based, detected without imports)
112
+ { framework: 'nunit', re: /\[(?:Test|TestFixture|SetUp|TearDown|OneTimeSetUp)\]/ },
113
+ { framework: 'xunit', re: /\[(?:Fact|Theory|InlineData|ClassData)\]/ },
114
+ { framework: 'mstest', re: /\[(?:TestMethod|TestClass|TestInitialize|TestCleanup)\]/ },
115
+ // RSpec
116
+ { framework: 'rspec', re: /\b(?:describe|context|it)\s+['"].*['"],?\s*do\b/ },
117
+ { framework: 'rspec', re: /\bexpect\s*\(.*\)\.to\s/ },
118
+ // Ruby Minitest
119
+ { framework: 'minitest', re: /require\s+['"]minitest\/autorun['"]/ },
120
+ { framework: 'minitest', re: /\bclass\s+\w+\s*<\s*Minitest::Test\b/ },
121
+ // Node.js built-in test runner (node:test, Node 18+)
122
+ { framework: 'node-test', re: /require\s*\(\s*['"]node:test['"]\s*\)/ },
123
+ { framework: 'node-test', re: /from\s+['"]node:test['"]/ },
124
+ // Bun test runner (bun:test)
125
+ { framework: 'bun-test', re: /from\s+['"]bun:test['"]/ },
126
+ { framework: 'bun-test', re: /require\s*\(\s*['"]bun:test['"]\s*\)/ },
127
+ // Generic — lower specificity, checked last
128
+ { framework: null, re: /\b(?:describe|it|expect|assert|should)\s*[\.(]/ },
129
+ ];
130
+
131
+ // ── Language detection from extension ─────────────────────────────────────
132
+ const EXT_TO_LANGUAGE = {
133
+ js: 'javascript', mjs: 'javascript', cjs: 'javascript',
134
+ jsx: 'javascript',
135
+ ts: 'typescript', tsx: 'typescript',
136
+ py: 'python',
137
+ php: 'php',
138
+ cs: 'csharp',
139
+ aspx: 'aspnet', ascx: 'aspnet', master: 'aspnet',
140
+ rb: 'ruby',
141
+ go: 'go',
142
+ java: 'java',
143
+ kt: 'kotlin',
144
+ rs: 'rust',
145
+ sh: 'shell', bash: 'shell',
146
+ ps1: 'powershell',
147
+ bat: 'batch', cmd: 'batch',
148
+ yml: 'yaml', yaml: 'yaml',
149
+ json: 'json',
150
+ xml: 'xml',
151
+ sql: 'sql',
152
+ env: 'env',
153
+ ini: 'ini', cfg: 'ini', conf: 'ini', properties: 'ini',
154
+ txt: 'text',
155
+ log: 'text',
156
+ md: 'markdown',
157
+ pem: 'pem', key: 'pem', pfx: 'pem', p12: 'pem',
158
+ };
159
+
160
+ // ── Helpers ────────────────────────────────────────────────────────────────
161
+
162
+ function normalisePath(filePath) {
163
+ const normalised = filePath.replace(/\\/g, '/').toLowerCase();
164
+ return normalised.startsWith('/') ? normalised : '/' + normalised;
165
+ }
166
+
167
+ function hasPathSegment(normPath, segments) {
168
+ return segments.some(seg => normPath.includes(seg));
169
+ }
170
+
171
+ function firstLines(content, n = 50) {
172
+ if (!content) return '';
173
+ let count = 0;
174
+ let idx = 0;
175
+ while (idx < content.length && count < n) {
176
+ if (content[idx] === '\n') count++;
177
+ idx++;
178
+ }
179
+ return content.slice(0, idx);
180
+ }
181
+
182
+ // ── Main export ────────────────────────────────────────────────────────────
183
+
184
+ /**
185
+ * Build a context object for a file before rules are evaluated.
186
+ *
187
+ * @param {string} filePath - Relative or absolute path to the file.
188
+ * @param {string} [content] - File content (optional; used for content signals).
189
+ * @returns {FileContext}
190
+ *
191
+ * @typedef {Object} FileContext
192
+ * @property {string} filePath - As supplied.
193
+ * @property {string} fileType - One of: source | test | fixture | vendor | generated | config | docs
194
+ * @property {string|null} testFramework - Detected test framework, or null.
195
+ * @property {string|null} language - Detected language from extension, or null.
196
+ * @property {string[]} signals - Human-readable list of signals that drove classification.
197
+ */
198
+ function buildFileContext(filePath, content) {
199
+ const normPath = normalisePath(filePath);
200
+ const basename = path.basename(filePath).toLowerCase();
201
+ const ext = (filePath.split('.').pop() || '').toLowerCase();
202
+ const language = EXT_TO_LANGUAGE[ext] || null;
203
+
204
+ const signals = [];
205
+ let fileType = null;
206
+ let testFramework = null;
207
+
208
+ // ── Layer 1: path/filename signals ────────────────────────────────────────
209
+ // Vendor and generated are definitive. All test/fixture signals are tentative.
210
+
211
+ // Definitive: vendor
212
+ if (!fileType && hasPathSegment(normPath, VENDOR_PATH_SEGMENTS)) {
213
+ fileType = FILE_TYPES.VENDOR;
214
+ signals.push(`path: vendor segment in ${normPath}`);
215
+ }
216
+
217
+ // Definitive: generated path
218
+ if (!fileType && hasPathSegment(normPath, GENERATED_PATH_SEGMENTS)) {
219
+ fileType = FILE_TYPES.GENERATED;
220
+ signals.push(`path: generated segment in ${normPath}`);
221
+ }
222
+
223
+ // Definitive: generated filename
224
+ if (!fileType && GENERATED_FILE_RE.test(basename)) {
225
+ fileType = FILE_TYPES.GENERATED;
226
+ signals.push(`filename: generated pattern (${basename})`);
227
+ }
228
+
229
+ // Tentative classifications — stored, not committed until content confirms.
230
+ // tentativeIsPathBased tracks whether the signal is path-based (reliable without
231
+ // content) or filename-only (unreliable — must not be committed without content).
232
+ let tentativeType = null;
233
+ let tentativeSignal = null;
234
+ let tentativeIsPathBased = false;
235
+
236
+ if (!fileType && hasPathSegment(normPath, FIXTURE_PATH_SEGMENTS)) {
237
+ tentativeType = FILE_TYPES.FIXTURE;
238
+ tentativeSignal = `path: fixture segment in ${normPath}`;
239
+ tentativeIsPathBased = true;
240
+ }
241
+
242
+ // Definitive: strong test filename signal AND known test path — both required.
243
+ // Rationale: filename alone is insufficient — kunddata.test.js in /src/ must not
244
+ // bypass scanning. An attacker can rename a file to evade detection.
245
+ if (!fileType && !tentativeType
246
+ && STRONG_TEST_FILENAME_RE.test(basename)
247
+ && hasPathSegment(normPath, TEST_PATH_SEGMENTS)) {
248
+ fileType = FILE_TYPES.TEST;
249
+ signals.push(`filename+path: strong test signal (${basename}) — definitive`);
250
+ }
251
+
252
+ // Tentative: strong filename without test path, or weaker filename signals.
253
+ // Filename-only tentative is NOT committed without content — see no-content branch.
254
+ if (!fileType && !tentativeType && TEST_FILENAME_RE.test(basename)) {
255
+ tentativeType = FILE_TYPES.TEST;
256
+ tentativeSignal = `filename: test pattern (${basename})`;
257
+ tentativeIsPathBased = false;
258
+ }
259
+
260
+ if (!fileType && !tentativeType && hasPathSegment(normPath, TEST_PATH_SEGMENTS)) {
261
+ tentativeType = FILE_TYPES.TEST;
262
+ tentativeSignal = `path: test segment in ${normPath}`;
263
+ tentativeIsPathBased = true;
264
+ }
265
+
266
+ // Config: direct (modifier = 0, suppression never occurs)
267
+ if (!fileType && !tentativeType && CONFIG_FILE_RE.test(basename)) {
268
+ fileType = FILE_TYPES.CONFIG;
269
+ signals.push(`filename: config pattern (${basename})`);
270
+ }
271
+
272
+ // Docs: tentative (path-based)
273
+ if (!fileType && !tentativeType && hasPathSegment(normPath, DOCS_PATH_SEGMENTS)) {
274
+ tentativeType = FILE_TYPES.DOCS;
275
+ tentativeSignal = `path: docs segment in ${normPath}`;
276
+ tentativeIsPathBased = true;
277
+ }
278
+
279
+ // ── Early commit for data/config extensions in test/fixture paths ──────────
280
+ // Data and config file types (.json, .yaml, .txt, .sql etc.) cannot contain
281
+ // test framework imports — Layer 2 content confirmation will never succeed.
282
+ // If such a file is in a test/fixture path, commit the tentative type directly
283
+ // without requiring content confirmation.
284
+ // Rationale: a .json file in /tests/ is test data by definition.
285
+ // A .yaml file in /fixtures/ is a fixture by definition.
286
+ // These are never "source code" that needs scanning for vulnerabilities
287
+ // at the same severity as production config.
288
+ const DATA_EXTS_NO_CONFIRM = new Set([
289
+ 'json', 'yaml', 'yml', 'xml', 'txt', 'log', 'sql',
290
+ 'sqlite', 'sqlite3', 'db', 'pem', 'key', 'pfx', 'p12',
291
+ 'csv', 'tsv', 'toml', 'ini', 'cfg', 'conf', 'properties',
292
+ ]);
293
+ if (!fileType && tentativeType && DATA_EXTS_NO_CONFIRM.has(ext)) {
294
+ fileType = tentativeType;
295
+ signals.push(tentativeSignal);
296
+ signals.push(`data/config extension in test path — committed without content confirmation`);
297
+ tentativeType = null;
298
+ tentativeSignal = null;
299
+ }
300
+
301
+ // ── Layer 1.5: framework detection for filename-confirmed test files ─────────
302
+ // Strong filename signals (*.test.js, *.spec.ts, test_*.py) commit fileType=TEST
303
+ // in Layer 1, so Layer 2's content loop never runs and testFramework stays null.
304
+ // Detect the framework here before Layer 2 guards on !fileType.
305
+ if (fileType === FILE_TYPES.TEST && !testFramework && content) {
306
+ const head = firstLines(content, 50);
307
+ for (const { framework, re } of FRAMEWORK_CONTENT_SIGNALS) {
308
+ if (framework && re.test(head)) {
309
+ testFramework = framework;
310
+ signals.push(`content: framework=${framework}`);
311
+ break;
312
+ }
313
+ }
314
+ }
315
+
316
+ // ── Layer 2: content signals ───────────────────────────────────────────────
317
+ // Run when fileType is not definitively set.
318
+ // With content: tentative must be confirmed, or it falls back to source.
319
+ // Without content: tentative is committed as-is (best available signal).
320
+
321
+ if (!fileType) {
322
+ if (content) {
323
+ const head = firstLines(content, 50);
324
+
325
+ for (const { framework, re } of FRAMEWORK_CONTENT_SIGNALS) {
326
+ if (re.test(head)) {
327
+ if (!testFramework && framework) {
328
+ testFramework = framework;
329
+ signals.push(`content: framework=${framework}`);
330
+ }
331
+
332
+ if (!fileType) {
333
+ if (tentativeType) {
334
+ fileType = tentativeType;
335
+ signals.push(tentativeSignal);
336
+ signals.push(`content: confirmed (${re.source.slice(0, 40)})`);
337
+ } else {
338
+ fileType = FILE_TYPES.TEST;
339
+ signals.push(`content: test signal (${re.source.slice(0, 40)})`);
340
+ }
341
+ }
342
+
343
+ if (fileType && testFramework) break;
344
+ }
345
+ }
346
+
347
+ // Tentative not confirmed by content → source
348
+ if (!fileType && tentativeType) {
349
+ signals.push(`path/filename: ${tentativeSignal} — not confirmed by content, treated as source`);
350
+ fileType = FILE_TYPES.SOURCE;
351
+ }
352
+
353
+ } else {
354
+ // No content — only path-based tentative is committed; filename-only falls to source.
355
+ // A path signal (/tests/, /fixtures/ etc.) is reliable without content.
356
+ // A filename signal alone (*.test.js in /src/) is not — commit it as source to
357
+ // avoid suppressing findings in misnamed production files.
358
+ if (tentativeType && tentativeIsPathBased) {
359
+ fileType = tentativeType;
360
+ signals.push(tentativeSignal);
361
+ signals.push('content: unavailable — path signal committed without confirmation');
362
+ }
363
+ }
364
+ }
365
+
366
+ // ── Default ───────────────────────────────────────────────────────────────
367
+ if (!fileType) {
368
+ fileType = FILE_TYPES.SOURCE;
369
+ }
370
+
371
+ return {
372
+ filePath,
373
+ fileType,
374
+ testFramework,
375
+ language,
376
+ signals,
377
+ };
378
+ }
379
+
380
+ module.exports = { buildFileContext, FILE_TYPES };
@@ -0,0 +1,204 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * lib/file-filter.js
5
+ *
6
+ * Builds a file filter for use during scan file discovery.
7
+ * Applies .gitignore rules and scd scope.yml file_excludes patterns.
8
+ *
9
+ * Previously named gitignore-filter.js — renamed as scope.yml support
10
+ * extends filtering beyond git-related exclusions.
11
+ *
12
+ * Strategy:
13
+ * Level 1 (git available): `git ls-files --cached --others --exclude-standard`
14
+ * Returns exactly the files git tracks or would track — free and correct.
15
+ * Level 2 (fallback, no git): Parse .gitignore files manually.
16
+ * Handles the common case: someone downloaded a repo without git clone,
17
+ * or git is not installed on the machine.
18
+ *
19
+ * Usage:
20
+ * const { buildIgnoreFilter } = require('./gitignore-filter');
21
+ * const shouldIgnore = buildIgnoreFilter(repoRoot);
22
+ * if (shouldIgnore(filePath)) // skip this file
23
+ */
24
+
25
+ const fs = require('fs');
26
+ const path = require('path');
27
+ const { execSync } = require('child_process');
28
+
29
+ // ── Level 1: git ls-files ────────────────────────────────────────────────
30
+
31
+ /**
32
+ * Get the set of files tracked or untracked-but-not-ignored by git.
33
+ * Returns null if git is unavailable or the directory is not a git repo.
34
+ */
35
+ function getGitTrackedFiles(repoRoot) {
36
+ try {
37
+ const output = execSync(
38
+ 'git ls-files --cached --others --exclude-standard',
39
+ { cwd: repoRoot, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
40
+ );
41
+ const files = new Set(
42
+ output.split('\n')
43
+ .map(f => f.trim())
44
+ .filter(Boolean)
45
+ .map(f => path.resolve(repoRoot, f))
46
+ );
47
+ return files;
48
+ } catch {
49
+ return null;
50
+ }
51
+ }
52
+
53
+ // ── Level 2: manual .gitignore parsing ──────────────────────────────────
54
+
55
+ /**
56
+ * Parse a single .gitignore file and return an array of pattern objects.
57
+ * Each pattern: { regex, negated, anchored }
58
+ */
59
+ function parseGitignoreFile(filePath) {
60
+ if (!fs.existsSync(filePath)) return [];
61
+ const lines = fs.readFileSync(filePath, 'utf8').split('\n');
62
+ const patterns = [];
63
+
64
+ for (let line of lines) {
65
+ line = line.trim();
66
+ // Skip empty lines and comments
67
+ if (!line || line.startsWith('#')) continue;
68
+
69
+ const negated = line.startsWith('!');
70
+ if (negated) line = line.slice(1);
71
+
72
+ // Anchored: pattern starts with / (relative to .gitignore location)
73
+ const anchored = line.startsWith('/');
74
+ if (anchored) line = line.slice(1);
75
+
76
+ // Convert glob pattern to regex.
77
+ // Order matters: ** and * must be marked before regex-escaping,
78
+ // otherwise plain * survives the escape step and lands verbatim
79
+ // in the final regex (causing "nothing to repeat" errors on ^*).
80
+ let regexStr = line
81
+ .replace(/\*\*/g, '__DOUBLESTAR__') // protect ** first
82
+ .replace(/\*/g, '__STAR__') // then protect plain *
83
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&') // escape regex special chars
84
+ .replace(/__STAR__/g, '[^/]*') // * = anything except /
85
+ .replace(/__DOUBLESTAR__/g, '.*') // ** = anything including /
86
+ .replace(/\?/g, '[^/]'); // ? = single char except /
87
+
88
+ // Directory pattern (ends with /)
89
+ const dirOnly = regexStr.endsWith('/');
90
+ if (dirOnly) regexStr = regexStr.slice(0, -1);
91
+
92
+ patterns.push({
93
+ regex: new RegExp((anchored ? '^' : '(^|/)') + regexStr + (dirOnly ? '(/|$)' : '(/|$)')),
94
+ negated,
95
+ dirOnly,
96
+ });
97
+ }
98
+
99
+ return patterns;
100
+ }
101
+
102
+ /**
103
+ * Collect all .gitignore files from repoRoot down (including nested ones).
104
+ * Returns array of { dir, patterns } objects.
105
+ */
106
+ function collectGitignorePatterns(repoRoot) {
107
+ const result = [];
108
+
109
+ function walk(dir) {
110
+ const gitignorePath = path.join(dir, '.gitignore');
111
+ const patterns = parseGitignoreFile(gitignorePath);
112
+ if (patterns.length > 0) {
113
+ result.push({ dir, patterns });
114
+ }
115
+ try {
116
+ for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
117
+ if (entry.isDirectory() && !entry.name.startsWith('.')) {
118
+ walk(path.join(dir, entry.name));
119
+ }
120
+ }
121
+ } catch { /* permission errors etc — skip */ }
122
+ }
123
+
124
+ walk(repoRoot);
125
+ return result;
126
+ }
127
+
128
+ /**
129
+ * Given collected gitignore rules, check if a file path should be ignored.
130
+ */
131
+ function isIgnoredByPatterns(filePath, gitignoreRules) {
132
+ let ignored = false;
133
+
134
+ for (const { dir, patterns } of gitignoreRules) {
135
+ // Only apply rules from .gitignore files at or above this file
136
+ if (!filePath.startsWith(dir + path.sep) && filePath !== dir) continue;
137
+
138
+ // Make path relative to the .gitignore's directory
139
+ const rel = path.relative(dir, filePath).split(path.sep).join('/');
140
+
141
+ for (const { regex, negated } of patterns) {
142
+ if (regex.test(rel)) {
143
+ ignored = !negated;
144
+ }
145
+ }
146
+ }
147
+
148
+ return ignored;
149
+ }
150
+
151
+ // ── Public API ───────────────────────────────────────────────────────────
152
+
153
+ /**
154
+ * Build a filter function for the given repo root.
155
+ *
156
+ * Returns: shouldIgnore(absoluteFilePath) → boolean
157
+ *
158
+ * Level 1 (git available): uses git ls-files set — O(1) lookup per file.
159
+ * Level 2 (fallback): parses .gitignore files — pattern matching per file.
160
+ * Level 3 (no .gitignore): returns () => false — nothing ignored.
161
+ *
162
+ * @param {string} repoRoot Absolute path to the repo root
163
+ * @param {boolean} debug Log which strategy was used
164
+ */
165
+ function buildIgnoreFilter(repoRoot, { debug = false } = {}) {
166
+ // Level 1: try git ls-files
167
+ const trackedFiles = getGitTrackedFiles(repoRoot);
168
+ if (trackedFiles !== null) {
169
+ if (debug) console.error(`[gitignore] Using git ls-files (${trackedFiles.size} files tracked)`);
170
+ // A file should be ignored if it is NOT in the tracked set
171
+ // But we only want to ignore files that git explicitly ignores —
172
+ // not untracked files that simply haven't been added yet.
173
+ // So we use a different git call to get explicitly ignored files.
174
+ try {
175
+ const ignoredOutput = execSync(
176
+ 'git ls-files --others --ignored --exclude-standard',
177
+ { cwd: repoRoot, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
178
+ );
179
+ const ignoredFiles = new Set(
180
+ ignoredOutput.split('\n')
181
+ .map(f => f.trim())
182
+ .filter(Boolean)
183
+ .map(f => path.resolve(repoRoot, f))
184
+ );
185
+ if (debug) console.error(`[gitignore] ${ignoredFiles.size} files explicitly ignored by git`);
186
+ return (filePath) => ignoredFiles.has(filePath);
187
+ } catch {
188
+ // git available but ls-files --ignored failed — fall through to Level 2
189
+ }
190
+ }
191
+
192
+ // Level 2: parse .gitignore files manually
193
+ const gitignoreRules = collectGitignorePatterns(repoRoot);
194
+ if (gitignoreRules.length > 0) {
195
+ if (debug) console.error(`[gitignore] Using manual parser (${gitignoreRules.length} .gitignore file(s))`);
196
+ return (filePath) => isIgnoredByPatterns(filePath, gitignoreRules);
197
+ }
198
+
199
+ // Level 3: no .gitignore found — nothing to filter
200
+ if (debug) console.error('[gitignore] No .gitignore found — no files filtered');
201
+ return () => false;
202
+ }
203
+
204
+ module.exports = { buildIgnoreFilter };