@agentique.io/validator 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1016 @@
1
+ import { createHash } from "node:crypto";
2
+ import { promises as fs } from "node:fs";
3
+ import path from "node:path";
4
+
5
+ export const EXTERNAL_INTAKE_SCHEMA_VERSION = "agentique.externalIntake.v1";
6
+
7
+ const DEFAULT_SKIP_DIRS = new Set([".git", "node_modules"]);
8
+ const DEFAULT_INTAKE_POLICY = Object.freeze({
9
+ maxFiles: 10000,
10
+ maxBytes: 100 * 1024 * 1024
11
+ });
12
+ const GITATTRIBUTES_READ_LIMIT_BYTES = 64 * 1024;
13
+ const LFS_POINTER_READ_LIMIT_BYTES = 2048;
14
+ const LFS_POINTER_HEADER = "version https://git-lfs.github.com/spec/v1";
15
+ const PAYLOAD_PREFIX_READ_LIMIT_BYTES = 4096;
16
+ const SCRIPT_TEXT_READ_LIMIT_BYTES = 32 * 1024;
17
+ const DANGEROUS_TEXT_READ_LIMIT_BYTES = 32 * 1024;
18
+ const SECRET_TEXT_READ_LIMIT_BYTES = 64 * 1024;
19
+ const LICENSE_TEXT_READ_LIMIT_BYTES = 64 * 1024;
20
+ const ARCHIVE_EXTENSIONS = new Set([".7z", ".gz", ".rar", ".tar", ".tar.gz", ".tgz", ".zip"]);
21
+ const EXECUTABLE_EXTENSIONS = new Set([
22
+ ".bat",
23
+ ".cmd",
24
+ ".com",
25
+ ".dll",
26
+ ".dylib",
27
+ ".exe",
28
+ ".jar",
29
+ ".msi",
30
+ ".node",
31
+ ".ps1",
32
+ ".sh",
33
+ ".so",
34
+ ".wasm"
35
+ ]);
36
+ const PACKAGE_LIFECYCLE_SCRIPTS = new Set(["preinstall", "install", "postinstall", "prepare", "prepublish", "prepublishonly", "prepack", "postpack"]);
37
+ const EXECUTABLE_SURFACE_EXTENSIONS = new Set([".bash", ".ps1", ".sh", ".zsh"]);
38
+ const DANGEROUS_CAPABILITY_RULES = Object.freeze([
39
+ Object.freeze({
40
+ category: "download-pipe-execute",
41
+ pattern: /\b(?:curl|wget|iwr|invoke-webrequest)\b[\s\S]{0,120}\|\s*(?:bash|sh|zsh|powershell|pwsh|iex|invoke-expression)\b/i
42
+ }),
43
+ Object.freeze({
44
+ category: "destructive-filesystem",
45
+ pattern: /\b(?:rm\s+-rf|rmdir\s+\/s|remove-item\b[\s\S]{0,80}-recurse|del\s+\/[fqsa])\b/i
46
+ }),
47
+ Object.freeze({
48
+ category: "credential-environment-access",
49
+ pattern: /\b(?:process\.env|os\.environ|getenv\(|GITHUB_TOKEN|AWS_SECRET_ACCESS_KEY|npm_token|pypi_token|\.env)\b/i
50
+ }),
51
+ Object.freeze({
52
+ category: "encoded-payload",
53
+ pattern: /\b(?:base64\s+(?:-d|--decode)|frombase64string|atob\(|Buffer\.from\([^)]{0,80}base64)\b/i
54
+ }),
55
+ Object.freeze({
56
+ category: "process-spawn",
57
+ pattern: /\b(?:child_process|execSync|spawnSync|execFileSync|subprocess\.(?:run|popen|call)|ProcessBuilder)\b/i
58
+ }),
59
+ Object.freeze({
60
+ category: "unpinned-reference",
61
+ pattern: /\b(?:uses\s*:\s*[^@\s]+@(?:main|master|latest)|image\s*:\s*[^:\s]+:latest)\b/i
62
+ }),
63
+ Object.freeze({
64
+ category: "self-hosted-runner",
65
+ pattern: /\bruns-on\s*:\s*\[?[^\n]*self-hosted\b/i
66
+ })
67
+ ]);
68
+ const SECRET_RULES = Object.freeze([
69
+ Object.freeze({ id: "private-key", pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----/gi }),
70
+ Object.freeze({ id: "openai-key", pattern: /\bsk-[A-Za-z0-9_-]{16,}\b/g }),
71
+ Object.freeze({ id: "github-token", pattern: /\bgh[pousr]_[A-Za-z0-9_]{20,}\b/g }),
72
+ Object.freeze({ id: "aws-access-key", pattern: /\bAKIA[0-9A-Z]{16}\b/g }),
73
+ Object.freeze({ id: "npm-token", pattern: /\bnpm_[A-Za-z0-9]{20,}\b/g }),
74
+ Object.freeze({ id: "pypi-token", pattern: /\bpypi-[A-Za-z0-9_-]{20,}\b/g }),
75
+ Object.freeze({ id: "jwt-token", pattern: /\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b/g }),
76
+ Object.freeze({ id: "bearer-token", pattern: /\bAuthorization\s*:\s*Bearer\s+[A-Za-z0-9._-]{16,}\b/gi }),
77
+ Object.freeze({ id: "database-url", pattern: /\b(?:postgres|postgresql|mysql|mongodb):\/\/[^:\s/]+:[^@\s]+@[^\s]+/gi }),
78
+ Object.freeze({ id: "credential-url", pattern: /\bhttps?:\/\/[^:\s/]+:[^@\s]+@[^\s]+/gi }),
79
+ Object.freeze({ id: "assignment-secret", pattern: /\b(?:api[_-]?key|api[_-]?token|secret|password|token)\s*[:=]\s*["'][^"'\s]{8,}["']/gi })
80
+ ]);
81
+
82
+ export async function scanExternalIntake(options) {
83
+ const command = options.command ?? "external-intake";
84
+ const sourceDir = path.resolve(options.sourceDir);
85
+ const policy = normalizePolicy(options);
86
+ const findings = [];
87
+ const inventory = [];
88
+ const licenses = [];
89
+
90
+ let stat;
91
+ try {
92
+ stat = await fs.stat(sourceDir);
93
+ } catch (error) {
94
+ throw new Error(`Unable to read source directory: ${safeErrorCode(error)}`);
95
+ }
96
+
97
+ if (!stat.isDirectory()) {
98
+ throw new Error("Source must be a directory.");
99
+ }
100
+
101
+ await walkDirectory({ root: sourceDir, current: sourceDir, inventory, findings, licenses });
102
+ inventory.sort((left, right) => (left.path < right.path ? -1 : left.path > right.path ? 1 : 0));
103
+ licenses.sort((left, right) => left.path.localeCompare(right.path) || left.source.localeCompare(right.source));
104
+ applyRepositoryLimitGates({ inventory, findings, policy });
105
+ applyLicenseGates({ licenses, findings });
106
+
107
+ const blockingFindings = findings.filter((finding) => finding.blocking);
108
+ const bytes = inventory.reduce((total, item) => total + item.bytes, 0);
109
+
110
+ return freezeReport({
111
+ schemaVersion: EXTERNAL_INTAKE_SCHEMA_VERSION,
112
+ command,
113
+ source: {
114
+ label: path.basename(sourceDir)
115
+ },
116
+ summary: {
117
+ files: inventory.length,
118
+ bytes,
119
+ findings: findings.length,
120
+ blockingFindings: blockingFindings.length
121
+ },
122
+ decision: blockingFindings.length > 0 ? "blocked" : "passed",
123
+ inventory,
124
+ licenses,
125
+ findings
126
+ });
127
+ }
128
+
129
+ async function walkDirectory({ root, current, inventory, findings, licenses }) {
130
+ let entries;
131
+ try {
132
+ entries = await fs.readdir(current, { withFileTypes: true });
133
+ } catch (error) {
134
+ findings.push(
135
+ createFinding({
136
+ code: "intake.read-directory",
137
+ severity: "high",
138
+ message: "Unable to read directory.",
139
+ path: relativePath(root, current),
140
+ blocking: true,
141
+ details: { reason: safeErrorCode(error) }
142
+ })
143
+ );
144
+ return;
145
+ }
146
+
147
+ entries.sort((left, right) => left.name.localeCompare(right.name));
148
+
149
+ for (const entry of entries) {
150
+ const absolutePath = path.join(current, entry.name);
151
+ const rel = relativePath(root, absolutePath);
152
+
153
+ if (entry.isDirectory()) {
154
+ if (DEFAULT_SKIP_DIRS.has(entry.name)) {
155
+ continue;
156
+ }
157
+ await walkDirectory({ root, current: absolutePath, inventory, findings, licenses });
158
+ continue;
159
+ }
160
+
161
+ if (!entry.isFile()) {
162
+ findings.push(
163
+ createFinding({
164
+ code: "intake.unsupported-entry",
165
+ severity: "medium",
166
+ message: "Only regular files are supported in external intake inventory.",
167
+ path: rel,
168
+ blocking: true
169
+ })
170
+ );
171
+ continue;
172
+ }
173
+
174
+ try {
175
+ const stat = await fs.stat(absolutePath);
176
+ inventory.push({
177
+ path: rel,
178
+ bytes: stat.size
179
+ });
180
+ await applyRepositoryMetadataGates({ filePath: absolutePath, rel, stat, findings });
181
+ await applyPayloadClassifier({ filePath: absolutePath, rel, findings });
182
+ await applyScriptWorkflowInventory({ filePath: absolutePath, rel, findings });
183
+ await applyDangerousCapabilityClassifier({ filePath: absolutePath, rel, findings });
184
+ await applySecretScanner({ filePath: absolutePath, rel, findings });
185
+ await applyLicenseInventory({ filePath: absolutePath, rel, findings, licenses });
186
+ } catch (error) {
187
+ findings.push(
188
+ createFinding({
189
+ code: "intake.read-file",
190
+ severity: "high",
191
+ message: "Unable to stat file.",
192
+ path: rel,
193
+ blocking: true,
194
+ details: { reason: safeErrorCode(error) }
195
+ })
196
+ );
197
+ }
198
+ }
199
+ }
200
+
201
+ async function applyLicenseInventory({ filePath, rel, findings, licenses }) {
202
+ const basename = path.posix.basename(rel).toLowerCase();
203
+ if (basename === "package.json") {
204
+ await collectPackageLicense({ filePath, rel, findings, licenses });
205
+ }
206
+
207
+ if (!isLicenseFileName(basename)) {
208
+ return;
209
+ }
210
+
211
+ const content = await readTextPrefix({
212
+ filePath,
213
+ rel,
214
+ maxBytes: LICENSE_TEXT_READ_LIMIT_BYTES,
215
+ findings,
216
+ purpose: "license-inventory"
217
+ });
218
+ const normalized = normalizeLicenseText(content);
219
+ licenses.push(
220
+ Object.freeze({
221
+ path: rel,
222
+ source: "license-file",
223
+ expression: null,
224
+ normalized,
225
+ status: normalized ? "recognized" : "unknown"
226
+ })
227
+ );
228
+ }
229
+
230
+ async function collectPackageLicense({ filePath, rel, findings, licenses }) {
231
+ const content = await readTextPrefix({
232
+ filePath,
233
+ rel,
234
+ maxBytes: LICENSE_TEXT_READ_LIMIT_BYTES,
235
+ findings,
236
+ purpose: "license-package-json"
237
+ });
238
+
239
+ let manifest;
240
+ try {
241
+ manifest = JSON.parse(content);
242
+ } catch {
243
+ return;
244
+ }
245
+
246
+ const expression = packageLicenseExpression(manifest?.license);
247
+ if (!expression) {
248
+ return;
249
+ }
250
+
251
+ const normalized = normalizeLicenseExpression(expression);
252
+ licenses.push(
253
+ Object.freeze({
254
+ path: rel,
255
+ source: "package-json",
256
+ expression,
257
+ normalized,
258
+ status: normalized ? "recognized" : "unknown"
259
+ })
260
+ );
261
+ }
262
+
263
+ function packageLicenseExpression(value) {
264
+ if (typeof value === "string") {
265
+ return value.trim() || null;
266
+ }
267
+ if (value && typeof value === "object" && typeof value.type === "string") {
268
+ return value.type.trim() || null;
269
+ }
270
+ return null;
271
+ }
272
+
273
+ function isLicenseFileName(basename) {
274
+ return basename === "license" || basename === "licence" || basename.startsWith("license.") || basename.startsWith("licence.") || basename === "copying";
275
+ }
276
+
277
+ function normalizeLicenseExpression(expression) {
278
+ const normalized = expression.trim().replace(/[()]/g, "").toUpperCase();
279
+ const map = new Map([
280
+ ["MIT", "MIT"],
281
+ ["APACHE-2.0", "Apache-2.0"],
282
+ ["GPL-2.0", "GPL-2.0"],
283
+ ["GPL-2.0-ONLY", "GPL-2.0"],
284
+ ["GPL-3.0", "GPL-3.0"],
285
+ ["GPL-3.0-ONLY", "GPL-3.0"],
286
+ ["BSD-3-CLAUSE", "BSD-3-Clause"],
287
+ ["ISC", "ISC"],
288
+ ["MPL-2.0", "MPL-2.0"]
289
+ ]);
290
+ return map.get(normalized) ?? null;
291
+ }
292
+
293
+ function normalizeLicenseText(content) {
294
+ if (/MIT License/i.test(content)) {
295
+ return "MIT";
296
+ }
297
+ if (/Apache License[\s\S]{0,400}Version 2\.0/i.test(content)) {
298
+ return "Apache-2.0";
299
+ }
300
+ if (/GNU GENERAL PUBLIC LICENSE[\s\S]{0,800}Version 3/i.test(content)) {
301
+ return "GPL-3.0";
302
+ }
303
+ if (/GNU GENERAL PUBLIC LICENSE[\s\S]{0,800}Version 2/i.test(content)) {
304
+ return "GPL-2.0";
305
+ }
306
+ if (/Redistribution and use in source and binary forms/i.test(content) && /Neither the name/i.test(content)) {
307
+ return "BSD-3-Clause";
308
+ }
309
+ if (/ISC License/i.test(content)) {
310
+ return "ISC";
311
+ }
312
+ if (/Mozilla Public License Version 2\.0/i.test(content)) {
313
+ return "MPL-2.0";
314
+ }
315
+ return null;
316
+ }
317
+
318
+ function applyLicenseGates({ licenses, findings }) {
319
+ if (licenses.length === 0) {
320
+ findings.push(
321
+ createFinding({
322
+ code: "license.missing",
323
+ severity: "high",
324
+ message: "No license signal was found in external intake.",
325
+ blocking: true
326
+ })
327
+ );
328
+ return;
329
+ }
330
+
331
+ for (const item of licenses) {
332
+ findings.push(
333
+ createFinding({
334
+ code: item.status === "recognized" ? "license.detected" : "license.unknown",
335
+ severity: item.status === "recognized" ? "low" : "high",
336
+ message: item.status === "recognized" ? "License signal was detected." : "Unknown license signal requires manual review.",
337
+ path: item.path,
338
+ blocking: item.status !== "recognized",
339
+ details: {
340
+ source: item.source,
341
+ expression: item.expression ?? undefined,
342
+ normalized: item.normalized ?? undefined
343
+ }
344
+ })
345
+ );
346
+ }
347
+
348
+ const normalizedLicenses = [...new Set(licenses.map((item) => item.normalized).filter(Boolean))].sort();
349
+ if (normalizedLicenses.length > 1) {
350
+ findings.push(
351
+ createFinding({
352
+ code: "license.conflict",
353
+ severity: "high",
354
+ message: "Conflicting license signals require manual review.",
355
+ blocking: true,
356
+ details: {
357
+ normalized: normalizedLicenses
358
+ }
359
+ })
360
+ );
361
+ }
362
+ }
363
+
364
+ async function applySecretScanner({ filePath, rel, findings }) {
365
+ const content = await readTextPrefix({
366
+ filePath,
367
+ rel,
368
+ maxBytes: SECRET_TEXT_READ_LIMIT_BYTES,
369
+ findings,
370
+ purpose: "secret-scan"
371
+ });
372
+ if (!content) {
373
+ return;
374
+ }
375
+
376
+ const seen = new Set();
377
+ for (const rule of SECRET_RULES) {
378
+ rule.pattern.lastIndex = 0;
379
+ for (const match of content.matchAll(rule.pattern)) {
380
+ const matchText = match[0];
381
+ const index = match.index ?? 0;
382
+ const line = lineNumberAt(content, index);
383
+ const dedupeKey = `${rule.id}\0${line}\0${index}`;
384
+ if (seen.has(dedupeKey)) {
385
+ continue;
386
+ }
387
+ seen.add(dedupeKey);
388
+ findings.push(
389
+ createFinding({
390
+ code: "secret.detected",
391
+ severity: "critical",
392
+ message: "Potential secret is present in external intake.",
393
+ path: rel,
394
+ blocking: true,
395
+ details: {
396
+ rule: rule.id,
397
+ line,
398
+ redacted: `[redacted:${rule.id}]`,
399
+ fingerprint: fingerprintSecret({ rel, ruleId: rule.id, line, matchText })
400
+ }
401
+ })
402
+ );
403
+ }
404
+ }
405
+ }
406
+
407
+ function lineNumberAt(content, index) {
408
+ let line = 1;
409
+ for (let offset = 0; offset < index; offset += 1) {
410
+ if (content.charCodeAt(offset) === 10) {
411
+ line += 1;
412
+ }
413
+ }
414
+ return line;
415
+ }
416
+
417
+ function fingerprintSecret({ rel, ruleId, line, matchText }) {
418
+ return `sha256:${createHash("sha256").update(`${rel}\0${ruleId}\0${line}\0${matchText}`).digest("hex")}`;
419
+ }
420
+
421
+ async function applyDangerousCapabilityClassifier({ filePath, rel, findings }) {
422
+ const content = await readTextPrefix({
423
+ filePath,
424
+ rel,
425
+ maxBytes: DANGEROUS_TEXT_READ_LIMIT_BYTES,
426
+ findings,
427
+ purpose: "dangerous-capability"
428
+ });
429
+ if (!content) {
430
+ return;
431
+ }
432
+
433
+ const seenCategories = new Set();
434
+ for (const rule of DANGEROUS_CAPABILITY_RULES) {
435
+ if (seenCategories.has(rule.category)) {
436
+ continue;
437
+ }
438
+ const match = content.match(rule.pattern);
439
+ if (!match) {
440
+ continue;
441
+ }
442
+ seenCategories.add(rule.category);
443
+ findings.push(
444
+ createFinding({
445
+ code: "dangerous.capability",
446
+ severity: "high",
447
+ message: "Dangerous capability pattern is present in external intake.",
448
+ path: rel,
449
+ blocking: true,
450
+ details: {
451
+ category: rule.category,
452
+ snippet: redactSnippet(extractSnippet(content, match.index ?? 0, match[0].length))
453
+ }
454
+ })
455
+ );
456
+ }
457
+ }
458
+
459
+ function extractSnippet(content, index, length) {
460
+ const start = Math.max(0, index - 40);
461
+ const end = Math.min(content.length, index + length + 40);
462
+ return content.slice(start, end);
463
+ }
464
+
465
+ async function applyScriptWorkflowInventory({ filePath, rel, findings }) {
466
+ const lowerPath = rel.toLowerCase();
467
+ const basename = path.posix.basename(lowerPath);
468
+
469
+ if (basename === "package.json") {
470
+ await inspectPackageScripts({ filePath, rel, findings });
471
+ }
472
+
473
+ if (/^\.github\/workflows\/[^/]+\.ya?ml$/i.test(rel)) {
474
+ await inspectWorkflowFile({ filePath, rel, findings });
475
+ }
476
+
477
+ if (basename === "action.yml" || basename === "action.yaml") {
478
+ await inspectCompositeAction({ filePath, rel, findings });
479
+ }
480
+
481
+ if (isExecutableSurfacePath(lowerPath)) {
482
+ const content = await readTextPrefix({
483
+ filePath,
484
+ rel,
485
+ maxBytes: SCRIPT_TEXT_READ_LIMIT_BYTES,
486
+ findings,
487
+ purpose: "script-inventory"
488
+ });
489
+ findings.push(
490
+ createFinding({
491
+ code: "script.executable-surface",
492
+ severity: "high",
493
+ message: "Executable file surface is present in external intake.",
494
+ path: rel,
495
+ blocking: true,
496
+ details: {
497
+ surface: executableSurfaceKind(lowerPath),
498
+ snippet: redactSnippet(content)
499
+ }
500
+ })
501
+ );
502
+ }
503
+ }
504
+
505
+ async function inspectPackageScripts({ filePath, rel, findings }) {
506
+ const content = await readTextPrefix({
507
+ filePath,
508
+ rel,
509
+ maxBytes: SCRIPT_TEXT_READ_LIMIT_BYTES,
510
+ findings,
511
+ purpose: "script-package-json"
512
+ });
513
+
514
+ let manifest;
515
+ try {
516
+ manifest = JSON.parse(content);
517
+ } catch {
518
+ findings.push(
519
+ createFinding({
520
+ code: "script.package-json-parse",
521
+ severity: "high",
522
+ message: "Unable to parse package.json for script inventory.",
523
+ path: rel,
524
+ blocking: true
525
+ })
526
+ );
527
+ return;
528
+ }
529
+
530
+ if (!manifest || typeof manifest !== "object" || !manifest.scripts || typeof manifest.scripts !== "object") {
531
+ return;
532
+ }
533
+
534
+ for (const [name, command] of Object.entries(manifest.scripts).sort(([left], [right]) => left.localeCompare(right))) {
535
+ if (typeof command !== "string") {
536
+ continue;
537
+ }
538
+ const lifecycle = PACKAGE_LIFECYCLE_SCRIPTS.has(name.toLowerCase());
539
+ findings.push(
540
+ createFinding({
541
+ code: lifecycle ? "script.lifecycle" : "script.package-script",
542
+ severity: lifecycle ? "high" : "medium",
543
+ message: lifecycle ? "Package lifecycle script is present in external intake." : "Package script is present in external intake inventory.",
544
+ path: rel,
545
+ blocking: lifecycle,
546
+ details: {
547
+ name,
548
+ snippet: redactSnippet(command)
549
+ }
550
+ })
551
+ );
552
+ }
553
+ }
554
+
555
+ async function inspectWorkflowFile({ filePath, rel, findings }) {
556
+ const content = await readTextPrefix({
557
+ filePath,
558
+ rel,
559
+ maxBytes: SCRIPT_TEXT_READ_LIMIT_BYTES,
560
+ findings,
561
+ purpose: "script-workflow"
562
+ });
563
+ const runLine = content.split(/\r?\n/).find((line) => /^\s*(?:-\s*)?run\s*:/.test(line));
564
+ if (!runLine) {
565
+ return;
566
+ }
567
+
568
+ findings.push(
569
+ createFinding({
570
+ code: "script.workflow-run",
571
+ severity: "high",
572
+ message: "GitHub workflow run step is present in external intake.",
573
+ path: rel,
574
+ blocking: true,
575
+ details: {
576
+ snippet: redactSnippet(runLine)
577
+ }
578
+ })
579
+ );
580
+ }
581
+
582
+ async function inspectCompositeAction({ filePath, rel, findings }) {
583
+ const content = await readTextPrefix({
584
+ filePath,
585
+ rel,
586
+ maxBytes: SCRIPT_TEXT_READ_LIMIT_BYTES,
587
+ findings,
588
+ purpose: "script-composite-action"
589
+ });
590
+ if (!/runs\s*:[\s\S]*using\s*:\s*['"]?composite['"]?/i.test(content)) {
591
+ return;
592
+ }
593
+
594
+ findings.push(
595
+ createFinding({
596
+ code: "script.composite-action",
597
+ severity: "high",
598
+ message: "Composite action entrypoint is present in external intake.",
599
+ path: rel,
600
+ blocking: true,
601
+ details: {
602
+ snippet: redactSnippet(content)
603
+ }
604
+ })
605
+ );
606
+ }
607
+
608
+ function isExecutableSurfacePath(lowerPath) {
609
+ const basename = path.posix.basename(lowerPath);
610
+ if (basename === "dockerfile" || lowerPath.endsWith(".dockerfile")) {
611
+ return true;
612
+ }
613
+ if (basename === "makefile" || basename === "gnumakefile") {
614
+ return true;
615
+ }
616
+ for (const extension of EXECUTABLE_SURFACE_EXTENSIONS) {
617
+ if (lowerPath.endsWith(extension)) {
618
+ return true;
619
+ }
620
+ }
621
+ return false;
622
+ }
623
+
624
+ function executableSurfaceKind(lowerPath) {
625
+ const basename = path.posix.basename(lowerPath);
626
+ if (basename === "dockerfile" || lowerPath.endsWith(".dockerfile")) {
627
+ return "dockerfile";
628
+ }
629
+ if (basename === "makefile" || basename === "gnumakefile") {
630
+ return "makefile";
631
+ }
632
+ if (lowerPath.endsWith(".ps1")) {
633
+ return "powershell";
634
+ }
635
+ return "shell";
636
+ }
637
+
638
+ function redactSnippet(value) {
639
+ return String(value)
640
+ .replace(/(bearer\s+)[A-Za-z0-9._-]{8,}/gi, "$1[redacted-token]")
641
+ .replace(/\b(sk-[A-Za-z0-9_-]{8,}|gh[pousr]_[A-Za-z0-9_]{8,}|AKIA[0-9A-Z]{12,})\b/g, "[redacted-token]")
642
+ .replace(/\b(token|secret|password|api[_-]?key)\s*[:=]\s*["']?[^"',\s]+/gi, "$1=[redacted]")
643
+ .replace(/\s+/g, " ")
644
+ .trim()
645
+ .slice(0, 160);
646
+ }
647
+
648
+ async function applyPayloadClassifier({ filePath, rel, findings }) {
649
+ const lowerPath = rel.toLowerCase();
650
+ const extensionSignals = [];
651
+ if (hasCompoundExtension(lowerPath, ARCHIVE_EXTENSIONS)) {
652
+ extensionSignals.push("archive-extension");
653
+ }
654
+ if (hasCompoundExtension(lowerPath, EXECUTABLE_EXTENSIONS)) {
655
+ extensionSignals.push("executable-extension");
656
+ }
657
+
658
+ const prefix = await readBufferPrefix({
659
+ filePath,
660
+ rel,
661
+ maxBytes: PAYLOAD_PREFIX_READ_LIMIT_BYTES,
662
+ findings,
663
+ purpose: "payload-classifier"
664
+ });
665
+ if (!prefix) {
666
+ return;
667
+ }
668
+
669
+ const magicSignals = detectMagicSignals(prefix);
670
+ const signals = [...extensionSignals, ...magicSignals];
671
+ if (isBinaryLike(prefix) && !signals.includes("binary-heuristic")) {
672
+ signals.push("binary-heuristic");
673
+ }
674
+
675
+ const category = classifyPayloadSignals(signals);
676
+ if (!category) {
677
+ return;
678
+ }
679
+
680
+ findings.push(
681
+ createFinding({
682
+ code: `payload.${category}`,
683
+ severity: "high",
684
+ message: `External intake does not allow ${category} payloads.`,
685
+ path: rel,
686
+ blocking: true,
687
+ details: {
688
+ category,
689
+ signals: stableUnique(signals)
690
+ }
691
+ })
692
+ );
693
+ }
694
+
695
+ function hasCompoundExtension(lowerPath, extensionSet) {
696
+ for (const extension of extensionSet) {
697
+ if (lowerPath.endsWith(extension)) {
698
+ return true;
699
+ }
700
+ }
701
+ return false;
702
+ }
703
+
704
+ function detectMagicSignals(buffer) {
705
+ const signals = [];
706
+ if (hasBytes(buffer, [0x50, 0x4b, 0x03, 0x04]) || hasBytes(buffer, [0x50, 0x4b, 0x05, 0x06]) || hasBytes(buffer, [0x50, 0x4b, 0x07, 0x08])) {
707
+ signals.push("zip-magic");
708
+ }
709
+ if (hasBytes(buffer, [0x1f, 0x8b])) {
710
+ signals.push("gzip-magic");
711
+ }
712
+ if (hasBytes(buffer, [0x4d, 0x5a])) {
713
+ signals.push("pe-magic");
714
+ }
715
+ if (hasBytes(buffer, [0x7f, 0x45, 0x4c, 0x46])) {
716
+ signals.push("elf-magic");
717
+ }
718
+ if (
719
+ hasBytes(buffer, [0xfe, 0xed, 0xfa, 0xce]) ||
720
+ hasBytes(buffer, [0xfe, 0xed, 0xfa, 0xcf]) ||
721
+ hasBytes(buffer, [0xce, 0xfa, 0xed, 0xfe]) ||
722
+ hasBytes(buffer, [0xcf, 0xfa, 0xed, 0xfe]) ||
723
+ hasBytes(buffer, [0xca, 0xfe, 0xba, 0xbe])
724
+ ) {
725
+ signals.push("macho-magic");
726
+ }
727
+ if (hasBytes(buffer, [0x25, 0x50, 0x44, 0x46, 0x2d])) {
728
+ signals.push("pdf-magic");
729
+ }
730
+ if (buffer.subarray(0, 16).toString("ascii") === "SQLite format 3\0") {
731
+ signals.push("sqlite-magic");
732
+ }
733
+ return signals;
734
+ }
735
+
736
+ function hasBytes(buffer, bytes) {
737
+ if (buffer.length < bytes.length) {
738
+ return false;
739
+ }
740
+ return bytes.every((byte, index) => buffer[index] === byte);
741
+ }
742
+
743
+ function isBinaryLike(buffer) {
744
+ if (buffer.length === 0) {
745
+ return false;
746
+ }
747
+
748
+ let controlBytes = 0;
749
+ for (const byte of buffer) {
750
+ if (byte === 0) {
751
+ return true;
752
+ }
753
+ if ((byte < 0x09 || (byte > 0x0d && byte < 0x20)) && byte !== 0x1b) {
754
+ controlBytes += 1;
755
+ }
756
+ }
757
+
758
+ return controlBytes / buffer.length > 0.1;
759
+ }
760
+
761
+ function classifyPayloadSignals(signals) {
762
+ if (signals.some((signal) => signal === "archive-extension" || signal === "zip-magic" || signal === "gzip-magic")) {
763
+ return "archive";
764
+ }
765
+ if (signals.some((signal) => signal === "executable-extension" || signal === "pe-magic" || signal === "elf-magic" || signal === "macho-magic")) {
766
+ return "executable";
767
+ }
768
+ if (signals.some((signal) => signal === "binary-heuristic" || signal === "pdf-magic" || signal === "sqlite-magic")) {
769
+ return "binary";
770
+ }
771
+ return null;
772
+ }
773
+
774
+ function stableUnique(values) {
775
+ return [...new Set(values)].sort();
776
+ }
777
+
778
+ function normalizePolicy(options) {
779
+ return Object.freeze({
780
+ maxFiles: normalizePositiveInteger(options.maxFiles, DEFAULT_INTAKE_POLICY.maxFiles, "maxFiles"),
781
+ maxBytes: normalizePositiveInteger(options.maxBytes, DEFAULT_INTAKE_POLICY.maxBytes, "maxBytes")
782
+ });
783
+ }
784
+
785
+ function normalizePositiveInteger(value, fallback, name) {
786
+ if (value === undefined || value === null) {
787
+ return fallback;
788
+ }
789
+
790
+ if (!Number.isSafeInteger(value) || value < 1) {
791
+ throw new Error(`${name} must be a positive safe integer.`);
792
+ }
793
+
794
+ return value;
795
+ }
796
+
797
+ async function applyRepositoryMetadataGates({ filePath, rel, stat, findings }) {
798
+ const basename = path.posix.basename(rel);
799
+ if (basename === ".gitmodules") {
800
+ findings.push(
801
+ createFinding({
802
+ code: "repo.submodule-config",
803
+ severity: "high",
804
+ message: "Submodule configuration is not allowed for external intake.",
805
+ path: rel,
806
+ blocking: true
807
+ })
808
+ );
809
+ }
810
+
811
+ if (basename === ".gitattributes") {
812
+ await inspectGitAttributes({ filePath, rel, stat, findings });
813
+ }
814
+
815
+ await inspectLfsPointer({ filePath, rel, stat, findings });
816
+ }
817
+
818
+ async function inspectGitAttributes({ filePath, rel, stat, findings }) {
819
+ const content = await readTextPrefix({
820
+ filePath,
821
+ rel,
822
+ maxBytes: GITATTRIBUTES_READ_LIMIT_BYTES,
823
+ findings,
824
+ purpose: "gitattributes"
825
+ });
826
+
827
+ if (stat.size > GITATTRIBUTES_READ_LIMIT_BYTES) {
828
+ findings.push(
829
+ createFinding({
830
+ code: "repo.metadata-truncated",
831
+ severity: "high",
832
+ message: "Repository metadata file exceeds bounded read limit.",
833
+ path: rel,
834
+ blocking: true,
835
+ details: {
836
+ bytes: stat.size,
837
+ maxBytes: GITATTRIBUTES_READ_LIMIT_BYTES
838
+ }
839
+ })
840
+ );
841
+ }
842
+
843
+ if (/\bfilter\s*=\s*lfs\b/i.test(content)) {
844
+ findings.push(
845
+ createFinding({
846
+ code: "repo.lfs-attributes",
847
+ severity: "high",
848
+ message: "Git LFS filter rules are not allowed for external intake.",
849
+ path: rel,
850
+ blocking: true
851
+ })
852
+ );
853
+ }
854
+ }
855
+
856
+ async function inspectLfsPointer({ filePath, rel, stat, findings }) {
857
+ if (stat.size > LFS_POINTER_READ_LIMIT_BYTES) {
858
+ return;
859
+ }
860
+
861
+ const content = await readTextPrefix({
862
+ filePath,
863
+ rel,
864
+ maxBytes: LFS_POINTER_READ_LIMIT_BYTES,
865
+ findings,
866
+ purpose: "lfs-pointer"
867
+ });
868
+
869
+ if (content.startsWith(LFS_POINTER_HEADER) && /\noid sha256:[a-f0-9]{64}\b/i.test(content) && /\nsize \d+\b/i.test(content)) {
870
+ findings.push(
871
+ createFinding({
872
+ code: "repo.lfs-pointer",
873
+ severity: "high",
874
+ message: "Git LFS pointer files are not allowed for external intake.",
875
+ path: rel,
876
+ blocking: true
877
+ })
878
+ );
879
+ }
880
+ }
881
+
882
+ async function readTextPrefix({ filePath, rel, maxBytes, findings, purpose }) {
883
+ const buffer = await readBufferPrefix({ filePath, rel, maxBytes, findings, purpose });
884
+ return buffer ? buffer.toString("utf8") : "";
885
+ }
886
+
887
+ async function readBufferPrefix({ filePath, rel, maxBytes, findings, purpose }) {
888
+ let handle;
889
+ try {
890
+ handle = await fs.open(filePath, "r");
891
+ const buffer = Buffer.alloc(maxBytes);
892
+ const result = await handle.read(buffer, 0, maxBytes, 0);
893
+ return buffer.subarray(0, result.bytesRead);
894
+ } catch (error) {
895
+ const isPayloadRead = purpose === "payload-classifier";
896
+ const isScriptRead = purpose.startsWith("script-");
897
+ const isDangerousRead = purpose === "dangerous-capability";
898
+ const isSecretRead = purpose === "secret-scan";
899
+ const isLicenseRead = purpose.startsWith("license-");
900
+ findings.push(
901
+ createFinding({
902
+ code: isPayloadRead
903
+ ? "payload.read-file"
904
+ : isScriptRead
905
+ ? "script.read-file"
906
+ : isDangerousRead
907
+ ? "dangerous.read-file"
908
+ : isSecretRead
909
+ ? "secret.read-file"
910
+ : isLicenseRead
911
+ ? "license.read-file"
912
+ : "repo.metadata-read",
913
+ severity: "high",
914
+ message: isPayloadRead
915
+ ? "Unable to read file prefix for payload classification."
916
+ : isScriptRead
917
+ ? "Unable to read file prefix for script inventory."
918
+ : isDangerousRead
919
+ ? "Unable to read file prefix for dangerous capability classification."
920
+ : isSecretRead
921
+ ? "Unable to read file prefix for secret scanning."
922
+ : isLicenseRead
923
+ ? "Unable to read file prefix for license inventory."
924
+ : "Unable to read repository metadata.",
925
+ path: rel,
926
+ blocking: true,
927
+ details: {
928
+ purpose,
929
+ reason: safeErrorCode(error)
930
+ }
931
+ })
932
+ );
933
+ return null;
934
+ } finally {
935
+ await handle?.close();
936
+ }
937
+ }
938
+
939
+ function applyRepositoryLimitGates({ inventory, findings, policy }) {
940
+ const bytes = inventory.reduce((total, item) => total + item.bytes, 0);
941
+ if (inventory.length > policy.maxFiles) {
942
+ findings.push(
943
+ createFinding({
944
+ code: "repo.max-files",
945
+ severity: "high",
946
+ message: "Repository file count exceeds external intake policy.",
947
+ blocking: true,
948
+ details: {
949
+ files: inventory.length,
950
+ maxFiles: policy.maxFiles
951
+ }
952
+ })
953
+ );
954
+ }
955
+
956
+ if (bytes > policy.maxBytes) {
957
+ findings.push(
958
+ createFinding({
959
+ code: "repo.max-bytes",
960
+ severity: "high",
961
+ message: "Repository byte count exceeds external intake policy.",
962
+ blocking: true,
963
+ details: {
964
+ bytes,
965
+ maxBytes: policy.maxBytes
966
+ }
967
+ })
968
+ );
969
+ }
970
+ }
971
+
972
+ function createFinding({ code, severity, message, path: findingPath = ".", blocking = false, details = {} }) {
973
+ return Object.freeze({
974
+ code,
975
+ severity,
976
+ message,
977
+ path: normalizeReportPath(findingPath),
978
+ blocking,
979
+ details: freezePlainObject(details)
980
+ });
981
+ }
982
+
983
+ function relativePath(root, absolutePath) {
984
+ const rel = path.relative(root, absolutePath) || ".";
985
+ return normalizeReportPath(rel);
986
+ }
987
+
988
+ function normalizeReportPath(value) {
989
+ return String(value).replace(/\\/g, "/");
990
+ }
991
+
992
+ function safeErrorCode(error) {
993
+ return typeof error?.code === "string" ? error.code : "unknown";
994
+ }
995
+
996
+ function freezeReport(report) {
997
+ return Object.freeze({
998
+ ...report,
999
+ source: Object.freeze({ ...report.source }),
1000
+ summary: Object.freeze({ ...report.summary }),
1001
+ inventory: Object.freeze(report.inventory.map((item) => Object.freeze({ ...item }))),
1002
+ licenses: Object.freeze((report.licenses ?? []).map((item) => Object.freeze({ ...item }))),
1003
+ findings: Object.freeze(report.findings.map((item) => freezeFinding(item)))
1004
+ });
1005
+ }
1006
+
1007
+ function freezeFinding(finding) {
1008
+ return Object.freeze({
1009
+ ...finding,
1010
+ details: freezePlainObject(finding.details ?? {})
1011
+ });
1012
+ }
1013
+
1014
+ function freezePlainObject(value) {
1015
+ return Object.freeze({ ...value });
1016
+ }