sealcode 1.3.5 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1004 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * sealcode@1.4.0 — universal project discovery ("auto" preset).
5
+ *
6
+ * The original preset model assumed a single marker file picked the right
7
+ * include/exclude template. In practice that fails the moment a project
8
+ * deviates from the textbook layout — Next.js App Router, Django + extra
9
+ * apps, monorepos, .NET, Flutter, Elixir, polyglot repos. The user then
10
+ * runs `sealcode init`, sees "Detected ecosystem: Node.js / TypeScript",
11
+ * hits Enter, and only a fraction of the source actually gets locked.
12
+ *
13
+ * Discovery solves this without abandoning the preset model:
14
+ *
15
+ * 1. If the project is a git repo, `git ls-files` is the source of truth
16
+ * for "what matters". It already respects `.gitignore`, captures every
17
+ * tracked dir, and works on every stack.
18
+ *
19
+ * 2. If git is missing or empty, fall back to a filesystem walk that
20
+ * matches a broad source-extension allowlist.
21
+ *
22
+ * 3. Apply universal exclusions (binaries, build outputs, lockfiles too
23
+ * noisy to encrypt, secrets, anything > 5 MB).
24
+ *
25
+ * 4. Persist a deterministic `.sealcoderc.json` with `include: ["**\/*"]`
26
+ * and a comprehensive `exclude` list, so every collaborator and CI
27
+ * job sees the exact same file set on `sealcode lock`. No implicit
28
+ * re-detection drift.
29
+ *
30
+ * `scanProject` returns a structured report the CLI (`sealcode scan`) and
31
+ * the init wizard both consume.
32
+ */
33
+
34
+ const fs = require('fs');
35
+ const path = require('path');
36
+ const { spawnSync } = require('child_process');
37
+ const fg = require('fast-glob');
38
+ const { normPath } = require('./util');
39
+
40
+ // --------------------------------------------------------------------------
41
+ // Universal exclusions. These never make it into the vault by default, no
42
+ // matter what the include patterns say.
43
+ //
44
+ // Categories matter for the scan report ("excluded: 41 binary, 11 build, ...")
45
+ // so we keep them as named groups instead of one flat array.
46
+ // --------------------------------------------------------------------------
47
+
48
+ const EXCLUDE_VCS = ['.git/**', '.hg/**', '.svn/**'];
49
+
50
+ const EXCLUDE_OS = ['.DS_Store', 'Thumbs.db', 'desktop.ini'];
51
+
52
+ const EXCLUDE_EDITOR = [
53
+ '.idea/**',
54
+ '.vscode/**',
55
+ '*.swp',
56
+ '*.swo',
57
+ '*~',
58
+ '.history/**',
59
+ ];
60
+
61
+ const EXCLUDE_DEPS = [
62
+ 'node_modules/**',
63
+ 'bower_components/**',
64
+ 'jspm_packages/**',
65
+ '.pnpm-store/**',
66
+ '.yarn/**',
67
+ 'venv/**',
68
+ '.venv/**',
69
+ 'env/**',
70
+ '__pycache__/**',
71
+ '**/__pycache__/**',
72
+ '.tox/**',
73
+ '.eggs/**',
74
+ '*.egg-info/**',
75
+ 'vendor/bundle/**',
76
+ '.bundle/**',
77
+ ];
78
+
79
+ const EXCLUDE_BUILD = [
80
+ 'dist/**',
81
+ 'build/**',
82
+ 'out/**',
83
+ 'target/**',
84
+ '.next/**',
85
+ '.nuxt/**',
86
+ '.turbo/**',
87
+ '.svelte-kit/**',
88
+ '.astro/**',
89
+ '.output/**',
90
+ '.vercel/**',
91
+ '.netlify/**',
92
+ '.serverless/**',
93
+ '.fusebox/**',
94
+ '.parcel-cache/**',
95
+ '.cache/**',
96
+ '.gradle/**',
97
+ '.expo/**',
98
+ 'coverage/**',
99
+ '.nyc_output/**',
100
+ '.pytest_cache/**',
101
+ '.mypy_cache/**',
102
+ '*.pyc',
103
+ '*.pyo',
104
+ '*.class',
105
+ '*.o',
106
+ '*.obj',
107
+ 'storage/framework/**',
108
+ 'storage/logs/**',
109
+ 'bootstrap/cache/**',
110
+ ];
111
+
112
+ const EXCLUDE_LOGS_TMP = ['*.log', 'logs/**', '*.tmp', 'tmp/**'];
113
+
114
+ const EXCLUDE_SECRETS = [
115
+ '*.pem',
116
+ '*.crt',
117
+ '*.cer',
118
+ '*.key',
119
+ '*.p12',
120
+ '*.pfx',
121
+ 'id_rsa*',
122
+ 'id_dsa*',
123
+ 'id_ecdsa*',
124
+ 'id_ed25519*',
125
+ '*.kdbx',
126
+ '.sealcoderc.json',
127
+ '.sealcode.key',
128
+ '.vaultlinerc.json',
129
+ '.vaultline.key',
130
+ ];
131
+
132
+ // Binary extensions where encryption adds no value (and bloats the vault).
133
+ // Source-shaped extensions (json, yaml, toml, md, sh, etc.) are NOT here —
134
+ // those get locked.
135
+ const EXCLUDE_BINARY = [
136
+ // Images
137
+ '*.png',
138
+ '*.jpg',
139
+ '*.jpeg',
140
+ '*.gif',
141
+ '*.webp',
142
+ '*.ico',
143
+ '*.bmp',
144
+ '*.tif',
145
+ '*.tiff',
146
+ '*.psd',
147
+ '*.ai',
148
+ '*.heic',
149
+ '*.heif',
150
+ // Fonts
151
+ '*.woff',
152
+ '*.woff2',
153
+ '*.ttf',
154
+ '*.otf',
155
+ '*.eot',
156
+ // Media
157
+ '*.mp3',
158
+ '*.mp4',
159
+ '*.mov',
160
+ '*.avi',
161
+ '*.webm',
162
+ '*.ogg',
163
+ '*.oga',
164
+ '*.ogv',
165
+ '*.wav',
166
+ '*.flac',
167
+ '*.aac',
168
+ '*.m4a',
169
+ '*.m4v',
170
+ '*.mkv',
171
+ // Documents (binary)
172
+ '*.pdf',
173
+ '*.doc',
174
+ '*.docx',
175
+ '*.xls',
176
+ '*.xlsx',
177
+ '*.ppt',
178
+ '*.pptx',
179
+ // Archives
180
+ '*.zip',
181
+ '*.tar',
182
+ '*.tar.gz',
183
+ '*.tgz',
184
+ '*.tar.bz2',
185
+ '*.tbz2',
186
+ '*.gz',
187
+ '*.bz2',
188
+ '*.7z',
189
+ '*.rar',
190
+ '*.xz',
191
+ // Compiled / binary artifacts
192
+ '*.so',
193
+ '*.dll',
194
+ '*.dylib',
195
+ '*.exe',
196
+ '*.bin',
197
+ '*.iso',
198
+ '*.dmg',
199
+ '*.deb',
200
+ '*.rpm',
201
+ '*.apk',
202
+ '*.aab',
203
+ '*.ipa',
204
+ '*.wasm',
205
+ ];
206
+
207
+ const EXCLUDE_CATEGORIES = {
208
+ vcs: EXCLUDE_VCS,
209
+ os: EXCLUDE_OS,
210
+ editor: EXCLUDE_EDITOR,
211
+ deps: EXCLUDE_DEPS,
212
+ build: EXCLUDE_BUILD,
213
+ logs: EXCLUDE_LOGS_TMP,
214
+ secrets: EXCLUDE_SECRETS,
215
+ binary: EXCLUDE_BINARY,
216
+ };
217
+
218
+ const SIZE_CAP_BYTES = 5 * 1024 * 1024; // 5 MB
219
+
220
+ // Files we render as plausible stubs so the project still looks like
221
+ // itself in a locked repo (build tools, IDEs and humans expect them).
222
+ const STUB_TEMPLATES = {
223
+ 'package.json': '{\n "name": "app",\n "version": "1.0.0",\n "private": true\n}\n',
224
+ 'composer.json': '{\n "name": "app/app",\n "type": "project"\n}\n',
225
+ 'go.mod': 'module app\n\ngo 1.21\n',
226
+ 'Cargo.toml':
227
+ '[package]\nname = "app"\nversion = "0.1.0"\nedition = "2021"\n',
228
+ 'pyproject.toml': '[project]\nname = "app"\nversion = "0.1.0"\n',
229
+ };
230
+
231
+ // --------------------------------------------------------------------------
232
+ // Source extension allowlist used when git is unavailable. Kept broad on
233
+ // purpose — we'd rather encrypt one extra README than miss a source file.
234
+ // --------------------------------------------------------------------------
235
+
236
+ const SOURCE_EXTS = new Set([
237
+ // JS / TS
238
+ '.js',
239
+ '.jsx',
240
+ '.ts',
241
+ '.tsx',
242
+ '.mjs',
243
+ '.cjs',
244
+ '.mts',
245
+ '.cts',
246
+ '.vue',
247
+ '.svelte',
248
+ '.astro',
249
+ // Python
250
+ '.py',
251
+ '.pyi',
252
+ '.ipynb',
253
+ // Ruby
254
+ '.rb',
255
+ '.erb',
256
+ '.rake',
257
+ // PHP
258
+ '.php',
259
+ '.phtml',
260
+ '.blade.php',
261
+ // Go / Rust / C-family
262
+ '.go',
263
+ '.rs',
264
+ '.c',
265
+ '.h',
266
+ '.cc',
267
+ '.cpp',
268
+ '.hpp',
269
+ '.cxx',
270
+ '.hxx',
271
+ '.m',
272
+ '.mm',
273
+ '.cs',
274
+ // JVM
275
+ '.java',
276
+ '.kt',
277
+ '.kts',
278
+ '.scala',
279
+ '.groovy',
280
+ '.clj',
281
+ '.cljs',
282
+ // Swift / Objective-C
283
+ '.swift',
284
+ // Web
285
+ '.html',
286
+ '.htm',
287
+ '.xhtml',
288
+ '.css',
289
+ '.scss',
290
+ '.sass',
291
+ '.less',
292
+ '.styl',
293
+ // Data / config (textual)
294
+ '.json',
295
+ '.json5',
296
+ '.jsonc',
297
+ '.yaml',
298
+ '.yml',
299
+ '.toml',
300
+ '.ini',
301
+ '.cfg',
302
+ '.conf',
303
+ '.env',
304
+ '.env.example',
305
+ '.env.template',
306
+ '.xml',
307
+ '.csv',
308
+ '.tsv',
309
+ // Docs
310
+ '.md',
311
+ '.mdx',
312
+ '.rst',
313
+ '.txt',
314
+ '.tex',
315
+ // Shell / scripts
316
+ '.sh',
317
+ '.bash',
318
+ '.zsh',
319
+ '.fish',
320
+ '.ps1',
321
+ '.psm1',
322
+ '.bat',
323
+ '.cmd',
324
+ // SQL & graph
325
+ '.sql',
326
+ '.graphql',
327
+ '.gql',
328
+ '.proto',
329
+ // Other
330
+ '.lua',
331
+ '.dart',
332
+ '.r',
333
+ '.jl',
334
+ '.ex',
335
+ '.exs',
336
+ '.eex',
337
+ '.heex',
338
+ '.leex',
339
+ '.tf',
340
+ '.tfvars',
341
+ '.hcl',
342
+ '.nomad',
343
+ '.nix',
344
+ '.dockerfile',
345
+ '.svg', // textual, but kept here — exclude_binary doesn't list svg either
346
+ ]);
347
+
348
+ const SOURCE_BASENAMES = new Set([
349
+ 'Dockerfile',
350
+ 'Containerfile',
351
+ 'Makefile',
352
+ 'GNUmakefile',
353
+ 'Rakefile',
354
+ 'Gemfile',
355
+ 'Procfile',
356
+ 'Brewfile',
357
+ 'Vagrantfile',
358
+ 'Jenkinsfile',
359
+ 'CMakeLists.txt',
360
+ 'CMakeLists.cmake',
361
+ '.gitignore',
362
+ '.gitattributes',
363
+ '.editorconfig',
364
+ '.dockerignore',
365
+ '.npmrc',
366
+ '.yarnrc',
367
+ '.prettierrc',
368
+ '.prettierignore',
369
+ '.eslintrc',
370
+ '.eslintignore',
371
+ '.stylelintrc',
372
+ '.babelrc',
373
+ '.nvmrc',
374
+ '.tool-versions',
375
+ ]);
376
+
377
+ // --------------------------------------------------------------------------
378
+ // Git layer
379
+ // --------------------------------------------------------------------------
380
+
381
+ // sealcode@1.4.0 — git commands run with explicit timeouts. A hung index
382
+ // (Windows antivirus, NFS, corrupted .git) used to stall `sealcode scan`
383
+ // and the implicit `getActiveConfig` fallback forever. 5s is generous —
384
+ // `git ls-files` on a 200k-file monorepo completes in <1s — and gives
385
+ // us a clear "fall through to filesystem walk" path on slow systems.
386
+ const GIT_TIMEOUT_MS = 5_000;
387
+ // Cap the candidate set at 200k files to avoid pathological repos
388
+ // blowing out heap. Anyone bigger than that should be carving the repo
389
+ // up; the warning surfaces in scanProject's `truncated` flag.
390
+ const GIT_MAX_FILES = 200_000;
391
+
392
+ function isGitRepo(projectRoot) {
393
+ try {
394
+ const r = spawnSync('git', ['rev-parse', '--is-inside-work-tree'], {
395
+ cwd: projectRoot,
396
+ encoding: 'utf8',
397
+ timeout: GIT_TIMEOUT_MS,
398
+ });
399
+ return r.status === 0 && r.stdout.trim() === 'true';
400
+ } catch (_) {
401
+ return false;
402
+ }
403
+ }
404
+
405
+ /**
406
+ * Return the set of files git considers part of the project: tracked files
407
+ * plus untracked-but-not-ignored files. This is the "what should I lock"
408
+ * source of truth when available.
409
+ *
410
+ * sealcode@1.4.0 — no in-memory cap (the previous `maxBuffer: 64MB` would
411
+ * silently truncate the stdout of any repo whose `git ls-files` output
412
+ * crossed that boundary; spawnSync returns ENOBUFS in that case which we
413
+ * then read as `status !== 0` and a NULL `stdout`, dropping the entire
414
+ * file list). We now collect bytes via spawnSync's default
415
+ * `maxBuffer: 1024MB` and explicitly bail with `truncated: true` if the
416
+ * line count exceeds GIT_MAX_FILES. Callers see a complete list or know
417
+ * they got a truncated one.
418
+ */
419
+ function gitListFiles(projectRoot) {
420
+ if (!isGitRepo(projectRoot)) return null;
421
+ // -z splits on NUL, which is the only safe separator for paths.
422
+ let r;
423
+ try {
424
+ r = spawnSync(
425
+ 'git',
426
+ ['ls-files', '-z', '--cached', '--others', '--exclude-standard'],
427
+ {
428
+ cwd: projectRoot,
429
+ encoding: 'buffer',
430
+ timeout: GIT_TIMEOUT_MS,
431
+ // 1 GiB — defensive ceiling; spawnSync defaults to 1 MiB so a
432
+ // ~30k-file repo would silently fail without this.
433
+ maxBuffer: 1024 * 1024 * 1024,
434
+ }
435
+ );
436
+ } catch (_) {
437
+ return null;
438
+ }
439
+ if (r.error || r.status !== 0 || r.signal === 'SIGTERM') return null;
440
+ const raw = r.stdout ? r.stdout.toString('utf8') : '';
441
+ if (!raw) return [];
442
+ const list = raw
443
+ .split('\0')
444
+ .filter(Boolean)
445
+ .map(normPath);
446
+ if (list.length > GIT_MAX_FILES) {
447
+ list.length = GIT_MAX_FILES;
448
+ }
449
+ list.sort();
450
+ return list;
451
+ }
452
+
453
+ // --------------------------------------------------------------------------
454
+ // Filesystem fallback (used when git is unavailable)
455
+ // --------------------------------------------------------------------------
456
+
457
+ function filesystemListFiles(projectRoot, exclude) {
458
+ return fg
459
+ .sync(['**/*'], {
460
+ cwd: projectRoot,
461
+ ignore: exclude,
462
+ dot: true,
463
+ onlyFiles: true,
464
+ followSymbolicLinks: false,
465
+ suppressErrors: true,
466
+ })
467
+ .map(normPath)
468
+ .sort();
469
+ }
470
+
471
+ // sealcode@1.4.0 — heuristic source-file detection. Used to flag
472
+ // "suspicious excluded" files in the scan report (i.e. files git tracks
473
+ // that LOOK like source but our exclude rules are dropping). Liberal on
474
+ // purpose — false positives just generate one extra hint line, false
475
+ // negatives mean source silently never gets locked.
476
+ function looksLikeSource(rel) {
477
+ const base = path.basename(rel);
478
+ if (SOURCE_BASENAMES.has(base)) return true;
479
+ // Compound markers (Dockerfile.dev, Makefile.local, etc.)
480
+ for (const marker of SOURCE_BASENAMES) {
481
+ if (base.startsWith(marker + '.') || base.startsWith(marker + '_')) {
482
+ return true;
483
+ }
484
+ }
485
+ const lower = base.toLowerCase();
486
+ if (lower.startsWith('dockerfile')) return true;
487
+ if (lower.startsWith('makefile')) return true;
488
+ if (lower.startsWith('rakefile')) return true;
489
+ // Final extension (handles .ts, .js, .py, etc.)
490
+ const ext = path.extname(rel).toLowerCase();
491
+ if (ext && SOURCE_EXTS.has(ext)) return true;
492
+ // Composite extensions. Strip up to TWO leading parts so things like
493
+ // `foo.spec.ts`, `bar.test.js`, `baz.d.ts`, `quux.blade.php`,
494
+ // `mod.config.mjs` all light up. We test progressively shorter
495
+ // suffixes against SOURCE_EXTS.
496
+ const lowerBase = base.toLowerCase();
497
+ const parts = lowerBase.split('.');
498
+ if (parts.length >= 3) {
499
+ for (let i = parts.length - 2; i >= 1; i--) {
500
+ const compound = '.' + parts.slice(i).join('.');
501
+ if (SOURCE_EXTS.has(compound)) return true;
502
+ // Also check the bare extension at this position (covers
503
+ // `foo.spec.ts` where `.ts` is final and recognized by the loop
504
+ // above — keeping for clarity).
505
+ const single = '.' + parts[parts.length - 1];
506
+ if (SOURCE_EXTS.has(single)) return true;
507
+ }
508
+ }
509
+ // Files with NO extension that aren't well-known basenames are
510
+ // unlikely to be source — leave them out so binary blobs in `bin/`
511
+ // don't flood the warning list.
512
+ return false;
513
+ }
514
+
515
+ // --------------------------------------------------------------------------
516
+ // Locked-dir selection
517
+ // --------------------------------------------------------------------------
518
+
519
+ // Map ecosystem marker → non-suspicious lockedDir. Mirrors the spirit of
520
+ // the named presets in presets.js but without locking us into their full
521
+ // include/exclude templates.
522
+ const LOCKED_DIR_RULES = [
523
+ { marker: 'go.mod', lockedDir: 'internal/sealed' },
524
+ { marker: 'Cargo.toml', lockedDir: 'target/.cache' },
525
+ { marker: 'composer.json', lockedDir: 'storage/sealed' },
526
+ { marker: 'Gemfile', lockedDir: 'vendor/sealed' },
527
+ { marker: 'pom.xml', lockedDir: '.sealed' },
528
+ { marker: 'build.gradle', lockedDir: '.sealed' },
529
+ { marker: 'build.gradle.kts', lockedDir: '.sealed' },
530
+ ];
531
+
532
+ function chooseLockedDir(projectRoot) {
533
+ for (const rule of LOCKED_DIR_RULES) {
534
+ if (fs.existsSync(path.join(projectRoot, rule.marker))) {
535
+ return rule.lockedDir;
536
+ }
537
+ }
538
+ return 'vendor';
539
+ }
540
+
541
+ // --------------------------------------------------------------------------
542
+ // Stubs (per-ecosystem hint files left on disk after lock)
543
+ // --------------------------------------------------------------------------
544
+
545
+ function buildStubs(projectRoot) {
546
+ const stubs = {};
547
+ for (const [file, body] of Object.entries(STUB_TEMPLATES)) {
548
+ if (fs.existsSync(path.join(projectRoot, file))) {
549
+ stubs[file] = body;
550
+ }
551
+ }
552
+ return stubs;
553
+ }
554
+
555
+ // --------------------------------------------------------------------------
556
+ // Putting it together
557
+ // --------------------------------------------------------------------------
558
+
559
+ function buildExcludeList(lockedDir) {
560
+ const base = [
561
+ ...EXCLUDE_VCS,
562
+ ...EXCLUDE_OS,
563
+ ...EXCLUDE_EDITOR,
564
+ ...EXCLUDE_DEPS,
565
+ ...EXCLUDE_BUILD,
566
+ ...EXCLUDE_LOGS_TMP,
567
+ ...EXCLUDE_SECRETS,
568
+ ...EXCLUDE_BINARY,
569
+ ];
570
+ // The locked dir itself must never be locked.
571
+ const lockedGlob = `${lockedDir.replace(/\/+$/, '')}/**`;
572
+ if (!base.includes(lockedGlob)) base.push(lockedGlob);
573
+ return base;
574
+ }
575
+
576
+ function classifyExclusion(rel) {
577
+ const base = path.basename(rel);
578
+ const lower = rel.toLowerCase();
579
+ if (lower.startsWith('.git/') || lower.startsWith('.hg/') || lower.startsWith('.svn/')) {
580
+ return 'vcs';
581
+ }
582
+ if (EXCLUDE_OS.includes(base)) return 'os';
583
+ if (
584
+ lower.startsWith('.idea/') ||
585
+ lower.startsWith('.vscode/') ||
586
+ lower.startsWith('.history/') ||
587
+ base.endsWith('.swp') ||
588
+ base.endsWith('.swo') ||
589
+ base.endsWith('~')
590
+ ) {
591
+ return 'editor';
592
+ }
593
+ if (
594
+ lower.includes('node_modules/') ||
595
+ lower.includes('__pycache__/') ||
596
+ lower.startsWith('venv/') ||
597
+ lower.startsWith('.venv/') ||
598
+ lower.startsWith('env/') ||
599
+ lower.startsWith('.yarn/') ||
600
+ lower.startsWith('.pnpm-store/') ||
601
+ lower.startsWith('vendor/bundle/') ||
602
+ lower.startsWith('.bundle/')
603
+ ) {
604
+ return 'deps';
605
+ }
606
+ if (
607
+ lower.startsWith('dist/') ||
608
+ lower.startsWith('build/') ||
609
+ lower.startsWith('out/') ||
610
+ lower.startsWith('target/') ||
611
+ lower.startsWith('.next/') ||
612
+ lower.startsWith('.nuxt/') ||
613
+ lower.startsWith('.turbo/') ||
614
+ lower.startsWith('.svelte-kit/') ||
615
+ lower.startsWith('.astro/') ||
616
+ lower.startsWith('.output/') ||
617
+ lower.startsWith('.cache/') ||
618
+ lower.startsWith('coverage/') ||
619
+ base.endsWith('.pyc') ||
620
+ base.endsWith('.class') ||
621
+ base.endsWith('.o') ||
622
+ base.endsWith('.obj')
623
+ ) {
624
+ return 'build';
625
+ }
626
+ if (base.endsWith('.log') || base.endsWith('.tmp') || lower.startsWith('logs/') || lower.startsWith('tmp/')) {
627
+ return 'logs';
628
+ }
629
+ if (
630
+ base.endsWith('.pem') ||
631
+ base.endsWith('.crt') ||
632
+ base.endsWith('.cer') ||
633
+ base.endsWith('.key') ||
634
+ base.endsWith('.p12') ||
635
+ base.endsWith('.pfx') ||
636
+ base.startsWith('id_rsa') ||
637
+ base.startsWith('id_dsa') ||
638
+ base.startsWith('id_ecdsa') ||
639
+ base.startsWith('id_ed25519')
640
+ ) {
641
+ return 'secrets';
642
+ }
643
+ const ext = path.extname(rel).toLowerCase();
644
+ const binaryExts = new Set(
645
+ EXCLUDE_BINARY.map((g) => (g.startsWith('*') ? g.slice(1).toLowerCase() : g.toLowerCase()))
646
+ );
647
+ if (binaryExts.has(ext)) return 'binary';
648
+ return 'other';
649
+ }
650
+
651
+ /**
652
+ * Top-level discovery entry point. Returns a structured report; callers
653
+ * (scan, init) decide what to do with it.
654
+ *
655
+ * @param {string} projectRoot
656
+ * @param {{ lockedDir?: string }} [opts]
657
+ */
658
+ function scanProject(projectRoot, opts = {}) {
659
+ const lockedDir = opts.lockedDir || chooseLockedDir(projectRoot);
660
+ const exclude = buildExcludeList(lockedDir);
661
+
662
+ // Source of truth for "what could be locked" — git if available, else
663
+ // a filesystem walk filtered to source-looking files. We strip any
664
+ // paths that live inside the lockedDir (they are ciphertext, not
665
+ // source — counting them as "excluded" would be misleading on a
666
+ // re-scan of an already-initialized project).
667
+ let candidates;
668
+ let source;
669
+ const lockedPrefix = lockedDir.replace(/\/+$/, '') + '/';
670
+ const fromGit = gitListFiles(projectRoot);
671
+ if (fromGit) {
672
+ candidates = fromGit.filter((p) => !p.startsWith(lockedPrefix));
673
+ source = 'git';
674
+ } else {
675
+ candidates = filesystemListFiles(projectRoot, exclude);
676
+ source = 'filesystem';
677
+ }
678
+
679
+ // Now run the actual include/exclude that lock will use, so the report
680
+ // matches reality exactly. We use the same fast-glob call as seal.js.
681
+ const includeGlobs = ['**/*'];
682
+ const matched = new Set(
683
+ fg
684
+ .sync(includeGlobs, {
685
+ cwd: projectRoot,
686
+ ignore: exclude,
687
+ dot: true,
688
+ onlyFiles: true,
689
+ followSymbolicLinks: false,
690
+ suppressErrors: true,
691
+ })
692
+ .map(normPath)
693
+ );
694
+
695
+ // Size-cap filter — large binaries even if they look textual.
696
+ const oversize = [];
697
+ const wouldLock = [];
698
+ for (const rel of matched) {
699
+ const abs = path.join(projectRoot, rel);
700
+ let stat;
701
+ try {
702
+ stat = fs.statSync(abs);
703
+ } catch (_) {
704
+ continue;
705
+ }
706
+ if (stat.size > SIZE_CAP_BYTES) {
707
+ oversize.push({ path: rel, size: stat.size });
708
+ continue;
709
+ }
710
+ wouldLock.push(rel);
711
+ }
712
+
713
+ // Coverage analysis (only meaningful when git is the source).
714
+ let gitCoverage = null;
715
+ if (source === 'git') {
716
+ const matchedSet = new Set(wouldLock);
717
+ const gitSet = new Set(candidates);
718
+ const inGitNotLocked = candidates.filter((p) => !matchedSet.has(p));
719
+ const lockedNotInGit = wouldLock.filter((p) => !gitSet.has(p));
720
+
721
+ // Bucket excluded git files by reason.
722
+ const excludedByReason = {};
723
+ for (const p of inGitNotLocked) {
724
+ const reason = classifyExclusion(p);
725
+ excludedByReason[reason] = (excludedByReason[reason] || 0) + 1;
726
+ }
727
+ // Flag git-tracked files that *look* like source but are getting
728
+ // excluded — these are the most likely false-negatives the user
729
+ // wants to know about.
730
+ const suspiciousExcluded = inGitNotLocked.filter(
731
+ (p) => looksLikeSource(p) && classifyExclusion(p) === 'other'
732
+ );
733
+
734
+ gitCoverage = {
735
+ tracked: candidates.length,
736
+ excluded: inGitNotLocked.length,
737
+ excludedByReason,
738
+ suspiciousExcluded,
739
+ lockedNotInGit,
740
+ };
741
+ }
742
+
743
+ // Per-directory breakdown of the locked set.
744
+ const byTopLevel = {};
745
+ for (const rel of wouldLock) {
746
+ const slash = rel.indexOf('/');
747
+ const top = slash === -1 ? '(root files)' : rel.slice(0, slash) + '/';
748
+ byTopLevel[top] = (byTopLevel[top] || 0) + 1;
749
+ }
750
+ const totalBytes = wouldLock.reduce((sum, rel) => {
751
+ try {
752
+ return sum + fs.statSync(path.join(projectRoot, rel)).size;
753
+ } catch (_) {
754
+ return sum;
755
+ }
756
+ }, 0);
757
+
758
+ return {
759
+ source,
760
+ lockedDir,
761
+ include: includeGlobs,
762
+ exclude,
763
+ wouldLock,
764
+ oversize,
765
+ gitCoverage,
766
+ byTopLevel,
767
+ totalBytes,
768
+ candidates,
769
+ };
770
+ }
771
+
772
+ /**
773
+ * Build the persisted .sealcoderc.json config object for the auto preset.
774
+ * Deterministic — the same project on two machines will produce identical
775
+ * include/exclude lists.
776
+ */
777
+ function buildAutoConfig(projectRoot) {
778
+ const report = scanProject(projectRoot);
779
+ return {
780
+ cfg: {
781
+ version: 1,
782
+ preset: 'auto',
783
+ lockedDir: report.lockedDir,
784
+ include: report.include,
785
+ exclude: report.exclude,
786
+ stubs: buildStubs(projectRoot),
787
+ },
788
+ report,
789
+ };
790
+ }
791
+
792
+ // --------------------------------------------------------------------------
793
+ // Monorepo / multi-microservice detection (sealcode@1.4.0)
794
+ //
795
+ // Sealcode is licensed per project. A folder that's clearly a monorepo —
796
+ // `services/auth/package.json` + `services/billing/package.json` + a root
797
+ // `docker-compose.yml`, or a `pnpm-workspace.yaml` listing N packages —
798
+ // should NOT be sealed as one giant vault by default. The owner is meant
799
+ // to `cd` into each service and run `sealcode init` separately so each
800
+ // gets its own keys, its own grants, its own billing line item.
801
+ //
802
+ // detectMicroservices() returns:
803
+ //
804
+ // {
805
+ // isMonorepo: boolean, // true if we'd refuse without override
806
+ // reason: string, // human-readable headline
807
+ // services: [{ relPath, marker, label }],
808
+ // workspaces: [{ file, kind }], // explicit workspace declarations
809
+ // }
810
+ //
811
+ // Heuristic, in order:
812
+ // 1. Explicit workspace declarations (lerna.json, pnpm-workspace.yaml,
813
+ // nx.json, turbo.json, rush.json, root package.json with a
814
+ // `workspaces` field) — always treated as a monorepo.
815
+ // 2. ≥ 2 sibling directories (depth 1 or 2) containing a project marker
816
+ // (package.json, pyproject.toml, requirements.txt, setup.py,
817
+ // manage.py, go.mod, Cargo.toml, Gemfile, composer.json, pom.xml,
818
+ // build.gradle, build.gradle.kts).
819
+ // 3. The CLI refuses to init at the root; the user is told which
820
+ // services were detected and how to lock each one. `--allow-monorepo`
821
+ // overrides for the rare case where one big vault is desired.
822
+
823
+ const PROJECT_MARKERS = new Set([
824
+ 'package.json',
825
+ 'pyproject.toml',
826
+ 'requirements.txt',
827
+ 'setup.py',
828
+ 'manage.py',
829
+ 'go.mod',
830
+ 'Cargo.toml',
831
+ 'Gemfile',
832
+ 'composer.json',
833
+ 'pom.xml',
834
+ 'build.gradle',
835
+ 'build.gradle.kts',
836
+ 'mix.exs',
837
+ 'pubspec.yaml',
838
+ ]);
839
+
840
+ // Folders we never recurse into while hunting for services. These are
841
+ // dependency caches, build outputs, or version-controlled sub-trees that
842
+ // happen to ship their own package.json — counting them would generate
843
+ // thousands of false positives on any non-trivial repo.
844
+ const SKIP_DIRS_FOR_DETECT = new Set([
845
+ 'node_modules',
846
+ 'vendor',
847
+ 'venv',
848
+ '.venv',
849
+ 'env',
850
+ '.env',
851
+ '__pycache__',
852
+ '.git',
853
+ '.hg',
854
+ '.svn',
855
+ 'dist',
856
+ 'build',
857
+ 'target',
858
+ '.next',
859
+ '.nuxt',
860
+ '.output',
861
+ '.cache',
862
+ '.parcel-cache',
863
+ '.turbo',
864
+ '.gradle',
865
+ '.idea',
866
+ '.vscode',
867
+ 'tmp',
868
+ '.tmp',
869
+ 'coverage',
870
+ '.nyc_output',
871
+ // sealcode's own locked dirs
872
+ 'sealed',
873
+ '_site_packages',
874
+ '_sealed',
875
+ '.sealed',
876
+ ]);
877
+
878
+ function readJsonSafe(absPath) {
879
+ try {
880
+ return JSON.parse(fs.readFileSync(absPath, 'utf8'));
881
+ } catch (_) {
882
+ return null;
883
+ }
884
+ }
885
+
886
+ function detectWorkspaceDeclarations(projectRoot) {
887
+ const declarations = [];
888
+ const files = [
889
+ { name: 'lerna.json', kind: 'lerna' },
890
+ { name: 'pnpm-workspace.yaml', kind: 'pnpm' },
891
+ { name: 'nx.json', kind: 'nx' },
892
+ { name: 'turbo.json', kind: 'turbo' },
893
+ { name: 'rush.json', kind: 'rush' },
894
+ ];
895
+ for (const f of files) {
896
+ if (fs.existsSync(path.join(projectRoot, f.name))) {
897
+ declarations.push({ file: f.name, kind: f.kind });
898
+ }
899
+ }
900
+ // npm/yarn/bun workspaces live in the root package.json. Read it if
901
+ // we have one; an array OR object `workspaces` field means monorepo.
902
+ const rootPkgPath = path.join(projectRoot, 'package.json');
903
+ if (fs.existsSync(rootPkgPath)) {
904
+ const pkg = readJsonSafe(rootPkgPath);
905
+ if (pkg && pkg.workspaces) {
906
+ const value = pkg.workspaces;
907
+ const looksLikeWorkspace =
908
+ Array.isArray(value) ||
909
+ (value && typeof value === 'object' && Array.isArray(value.packages));
910
+ if (looksLikeWorkspace) {
911
+ declarations.push({ file: 'package.json', kind: 'npm-workspaces' });
912
+ }
913
+ }
914
+ }
915
+ return declarations;
916
+ }
917
+
918
+ // Walk up to MAX_DEPTH directory levels under root looking for project
919
+ // markers. Stops descending into any SKIP_DIRS_FOR_DETECT and into any
920
+ // directory that already has its own marker (we don't double-count a
921
+ // service's nested package.json).
922
+ function findServiceDirs(projectRoot, maxDepth = 3) {
923
+ const results = [];
924
+ function walk(rel, depth) {
925
+ if (depth > maxDepth) return;
926
+ const abs = path.join(projectRoot, rel);
927
+ let entries;
928
+ try {
929
+ entries = fs.readdirSync(abs, { withFileTypes: true });
930
+ } catch (_) {
931
+ return;
932
+ }
933
+ // First pass: does THIS directory have a project marker? If so,
934
+ // record it and don't descend further (a service's own nested
935
+ // packages don't count as separate services).
936
+ let foundMarker = null;
937
+ for (const e of entries) {
938
+ if (e.isFile() && PROJECT_MARKERS.has(e.name)) {
939
+ foundMarker = e.name;
940
+ break;
941
+ }
942
+ }
943
+ if (foundMarker && depth >= 1) {
944
+ // depth >= 1 → don't count the project root itself as a service.
945
+ results.push({
946
+ relPath: rel || '.',
947
+ marker: foundMarker,
948
+ label: rel.split(path.sep).pop() || rel,
949
+ });
950
+ return;
951
+ }
952
+ // Recurse into subdirectories.
953
+ for (const e of entries) {
954
+ if (!e.isDirectory()) continue;
955
+ if (SKIP_DIRS_FOR_DETECT.has(e.name)) continue;
956
+ if (e.name.startsWith('.') && e.name !== '.github') continue;
957
+ walk(path.join(rel, e.name), depth + 1);
958
+ }
959
+ }
960
+ walk('', 0);
961
+ return results;
962
+ }
963
+
964
+ function detectMicroservices(projectRoot) {
965
+ const workspaces = detectWorkspaceDeclarations(projectRoot);
966
+ const services = findServiceDirs(projectRoot);
967
+
968
+ // Two distinct ways to land in monorepo territory:
969
+ // (a) explicit workspace declaration at the root
970
+ // (b) ≥ 2 service dirs found by walking
971
+ let isMonorepo = false;
972
+ let reason = '';
973
+ if (workspaces.length > 0) {
974
+ isMonorepo = true;
975
+ reason = `Detected an explicit workspace declaration (${workspaces.map((w) => w.file).join(', ')}).`;
976
+ } else if (services.length >= 2) {
977
+ isMonorepo = true;
978
+ reason = `Detected ${services.length} project directories with their own build manifests.`;
979
+ }
980
+
981
+ return { isMonorepo, reason, services, workspaces };
982
+ }
983
+
984
+ module.exports = {
985
+ // primary API
986
+ scanProject,
987
+ buildAutoConfig,
988
+ detectMicroservices,
989
+ // helpers (exported for tests / reuse)
990
+ chooseLockedDir,
991
+ buildExcludeList,
992
+ buildStubs,
993
+ classifyExclusion,
994
+ looksLikeSource,
995
+ isGitRepo,
996
+ gitListFiles,
997
+ // constants
998
+ EXCLUDE_CATEGORIES,
999
+ SOURCE_EXTS,
1000
+ SOURCE_BASENAMES,
1001
+ STUB_TEMPLATES,
1002
+ SIZE_CAP_BYTES,
1003
+ PROJECT_MARKERS,
1004
+ };