codeprobe-scanner 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/.claude/settings.local.json +19 -0
  2. package/.dockerignore +17 -0
  3. package/.env.development +8 -0
  4. package/.env.example +20 -0
  5. package/.env.setup +214 -0
  6. package/.github/workflows/codeprobe-scan.yml +137 -0
  7. package/.github/workflows/codeprobe.yml +84 -0
  8. package/.github/workflows/scan-schedule.yml +28 -0
  9. package/ANALYSIS_SUMMARY.md +365 -0
  10. package/API_INTEGRATIONS.md +469 -0
  11. package/BUILD_PLAYBOOK.md +349 -0
  12. package/CLAUDE.md +106 -0
  13. package/DEPLOY.md +452 -0
  14. package/DEPLOYMENT_STATUS.md +240 -0
  15. package/DEPLOY_CHECKLIST.md +316 -0
  16. package/Dockerfile +24 -0
  17. package/EXECUTION_PLAN.html +1086 -0
  18. package/IMPLEMENTATION_COMPLETE.md +288 -0
  19. package/IMPLEMENTATION_SUMMARY.md +443 -0
  20. package/INTERACTIVE_FIX_FLOW.md +308 -0
  21. package/MIGRATION_COMPLETE.md +327 -0
  22. package/ORCHESTRATOR_SYNTHESIS.json +80 -0
  23. package/PENDING_WORK.md +308 -0
  24. package/PREFLIGHT_PLAN.md +182 -0
  25. package/QUICKSTART.md +305 -0
  26. package/README.md +15 -0
  27. package/STAGE_1_SETUP_ENGINE.md +245 -0
  28. package/STAGE_2_ARCHITECTURE.md +714 -0
  29. package/STAGE_2_CLI_VERIFICATION.md +269 -0
  30. package/STAGE_2_COMPLETE.md +332 -0
  31. package/STAGE_2_IMPLEMENTATION_PLAN.md +679 -0
  32. package/STAGE_3_COMPLETE.md +246 -0
  33. package/STAGE_3_DASHBOARD_POLISH.md +371 -0
  34. package/STAGE_3_SETUP.md +155 -0
  35. package/VIDEODB_INTEGRATION.md +237 -0
  36. package/archived/DASHBOARD_UI_WALKTHROUGH.md +392 -0
  37. package/archived/FRONTEND_SETUP.md +236 -0
  38. package/archived/auth.ts +40 -0
  39. package/archived/dashboard/components/BusinessImpactCard.tsx +48 -0
  40. package/archived/dashboard/components/CVETable.tsx +104 -0
  41. package/archived/dashboard/components/ErrorBoundary.tsx +48 -0
  42. package/archived/dashboard/components/PatchDiffViewer.tsx +43 -0
  43. package/archived/dashboard/components/RiskGauge.tsx +64 -0
  44. package/archived/dashboard/frontend.tsx +104 -0
  45. package/archived/dashboard/hooks/useAuth.ts +32 -0
  46. package/archived/dashboard/hooks/useScan.ts +65 -0
  47. package/archived/dashboard/index.html +15 -0
  48. package/archived/dashboard/pages/LoginPage.tsx +28 -0
  49. package/archived/dashboard/pages/ScanDetailPage.tsx +143 -0
  50. package/archived/dashboard/pages/ScansListPage.tsx +160 -0
  51. package/bin/install-and-run.sh +91 -0
  52. package/bun.lock +603 -0
  53. package/codeprobe-prd.md +674 -0
  54. package/cve-cache.json +25 -0
  55. package/demo-vulnerable-app/.github/workflows/codeprobe.yml +32 -0
  56. package/demo-vulnerable-app/README.md +70 -0
  57. package/demo-vulnerable-app/package-lock.json +27 -0
  58. package/demo-vulnerable-app/package.json +15 -0
  59. package/demo-vulnerable-app/server.js +34 -0
  60. package/demo.sh +45 -0
  61. package/index.ts +19 -0
  62. package/package.json +28 -0
  63. package/patches.json +12 -0
  64. package/serve-dashboard.ts +23 -0
  65. package/src/api/server-cli.ts +270 -0
  66. package/src/api/server.ts +293 -0
  67. package/src/bot/server.ts +113 -0
  68. package/src/cli/commands/report.ts +92 -0
  69. package/src/cli/commands/scan-with-fix.ts +123 -0
  70. package/src/cli/commands/scan.ts +137 -0
  71. package/src/cli/config.ts +188 -0
  72. package/src/cli/errors.ts +120 -0
  73. package/src/cli/index.ts +137 -0
  74. package/src/cli/progress.ts +119 -0
  75. package/src/cli-server.ts +523 -0
  76. package/src/engine/index.ts +90 -0
  77. package/src/engine/matcher.ts +115 -0
  78. package/src/engine/parser.ts +91 -0
  79. package/src/engine/patcher.ts +280 -0
  80. package/src/engine/report.ts +137 -0
  81. package/src/engine/sandbox.ts +222 -0
  82. package/src/engine/scraper.ts +122 -0
  83. package/src/integrations/videodb.ts +153 -0
  84. package/src/mcp/server.ts +149 -0
  85. package/src/scraper-cron.ts +103 -0
  86. package/src/shared/constants.ts +88 -0
  87. package/src/shared/types.ts +123 -0
  88. package/src/shared/utils.ts +80 -0
  89. package/src/test/cli.test.ts +211 -0
  90. package/src/test/dashboard.test.ts +38 -0
  91. package/src/test/demo-scan.json +32 -0
  92. package/src/test/engine.test.ts +157 -0
  93. package/tailwind.config.js +11 -0
  94. package/tsconfig.json +30 -0
  95. package/verify-dashboard.ts +87 -0
  96. package/verify-env.sh +98 -0
@@ -0,0 +1,674 @@
1
+ # CodeProbe - Product Requirements Document
2
+
3
+ ## 1. Overview
4
+
5
+ **Feature Name:** CodeProbe
6
+ **Status:** Draft (Hackathon MVP)
7
+ **Date:** June 2026
8
+ **Version:** 1.0
9
+ **Author:** Nachiketh Reddy
10
+ **Target Event:** AgentForge SG Super AI Edition, SMU School of Economics, June 2026
11
+
12
+ **Problem:** 60% of data breaches in 2026 involve vulnerabilities where a patch was already available and not applied. SAST tools like Snyk and Dependabot identify CVEs theoretically present in dependencies, but cannot confirm whether a specific codebase is actually exploitable. Engineers deprioritize patches based on theoretical risk, leading to breaches.
13
+
14
+ **Proposed Solution:** An autonomous AI agent that confirms which CVEs are actually exploitable in a target codebase by running known proof-of-concept exploits in isolated sandboxes, then generates verified patches. Accessible via CLI, GitHub bot, CI/CD integration, and MCP.
15
+
16
+ **Primary Goal:** Win first place at AgentForge SG hackathon.
17
+ **Secondary Goal:** Demonstrate production-ready architecture that could scale post-hackathon.
18
+
19
+ ---
20
+
21
+ ## 2. Background & Context
22
+
23
+ **Problem Statement:**
24
+ Modern development teams face vulnerability overload. SAST scanners report hundreds of theoretical CVEs per project. The median time to patch a critical vulnerability is 60+ days, while exploit-to-breach time has dropped to hours. The gap between "vulnerability exists" and "vulnerability is exploitable in this specific codebase" is where engineering teams lose prioritization signal.
25
+
26
+ **Justification:**
27
+ - Verizon DBIR 2026: 60% of breaches used known, patched CVEs
28
+ - IBM Cost of Breach 2026: Average breach costs $4.9M
29
+ - 290k+ CVEs indexed globally as of early 2026, growing 25% YoY
30
+ - No existing product confirms exploitability through live sandbox execution
31
+
32
+ **Goals:**
33
+ - Ship a working end-to-end MVP within 5-hour build window
34
+ - Demonstrate all three sponsor APIs (Daytona, Bright Data, Nosana) with meaningful integration
35
+ - Build three interfaces (CLI, GitHub bot, CI/CD) to show product flexibility
36
+ - Pass the hackathon judging criteria: Completeness (working MVP), Innovation (live exploit verification), Real-Life Problem Solving ($4.9M breach cost), Sponsored Product Usage (deep, natural integration)
37
+
38
+ ---
39
+
40
+ ## 3. User Personas
41
+
42
+ **Primary Persona: Security-Conscious Developer**
43
+ Name: Alex, Senior Backend Engineer at a 50-person startup
44
+ Technical Level: High
45
+ Pain: Receives 200+ Dependabot alerts weekly. Cannot tell which ones matter. Spends hours triaging theoretical risks.
46
+ Goal: Ship code fast without missing the vulnerability that actually matters.
47
+
48
+ **Secondary Persona: DevOps Engineer**
49
+ Name: Priya, DevOps Lead at a mid-size SaaS company
50
+ Technical Level: High
51
+ Pain: Manages CI/CD pipelines, responsible for security gates. Needs automated tools that don't slow down deployments with false positives.
52
+ Goal: Integrate security checks that give clear go/no-go signals.
53
+
54
+ **Tertiary Persona: Technical Founder/CTO**
55
+ Name: Marcus, CTO of an early-stage startup
56
+ Technical Level: Medium-High
57
+ Pain: Needs to understand security posture for investor due diligence. Non-technical board members ask "are we secure?" and he has no concrete answer.
58
+ Goal: A clear, visual report showing confirmed vs theoretical risks in business terms.
59
+
60
+ ---
61
+
62
+ ## 4. User Stories
63
+
64
+ **As a developer running the CLI,**
65
+ I want to scan my local repository for vulnerabilities,
66
+ So that I can see which CVEs are confirmed exploitable before I commit.
67
+
68
+ **As a developer running the CLI with --fix,**
69
+ I want CodeProbe to generate patches and push them to a new branch on GitHub,
70
+ So that I can open a PR with verified fixes.
71
+
72
+ **As a developer opening a pull request,**
73
+ I want the GitHub bot to automatically scan my code,
74
+ So that I get immediate feedback on whether my changes introduce exploitable vulnerabilities.
75
+
76
+ **As a developer reviewing bot findings,**
77
+ I want to confirm the suggested fix,
78
+ So that the bot creates a PR with patches I can review and merge.
79
+
80
+ **As a DevOps engineer,**
81
+ I want to integrate CodeProbe into our CI/CD pipeline,
82
+ So that vulnerable code is blocked before deployment.
83
+
84
+ **As a technical founder,**
85
+ I want a visual dashboard showing confirmed vs theoretical risks,
86
+ So that I can communicate security posture to non-technical stakeholders.
87
+
88
+ **As a security researcher,**
89
+ I want to use CodeProbe via MCP from Claude Desktop,
90
+ So that I can integrate vulnerability scanning into my AI-assisted workflow.
91
+
92
+ ---
93
+
94
+ ## 5. Functional Requirements
95
+
96
+ ### Core Engine: Vulnerability Scanner
97
+
98
+ **Req 5.1: Repository Input**
99
+ The system must accept public GitHub repository URLs as input. For CLI mode, it must also accept local repository paths. The system must parse dependency manifests (package.json, package-lock.json, requirements.txt, Cargo.toml) to extract dependency versions.
100
+
101
+ **Req 5.2: CVE Database Scraping**
102
+ The system must use Bright Data to scrape live CVE data from NVD, Exploit-DB, GitHub Security Advisories, and Snyk Vulnerability Database. Scraping must run in parallel for performance. If Bright Data is blocked or fails, the system must fall back to a cached CVE database and display a warning to the user.
103
+
104
+ **Req 5.3: Exploit Verification (Core Innovation)**
105
+ The system must spawn isolated Daytona sandboxes, one per critical CVE. Each sandbox must:
106
+ - Install the vulnerable dependency version
107
+ - Inject a known proof-of-concept exploit script (e.g., for log4shell)
108
+ - Execute the exploit
109
+ - Capture output, network calls, and filesystem changes
110
+ - Determine if the exploit succeeded (Confirmed Exploitable) or failed (Theoretical Risk)
111
+
112
+ **Req 5.4: Patch Generation**
113
+ The system must use Nosana-hosted LLMs (CodeBERT or StarCoder2) to generate exact code diffs that fix confirmed vulnerabilities. The LLM must analyze the vulnerable code path and produce a minimal, targeted patch. If Nosana container startup exceeds 60 seconds, the system must fall back to Claude API (Anthropic) while keeping Nosana branding in the UI.
114
+
115
+ **Req 5.5: Report Generation**
116
+ The system must generate a detailed report containing:
117
+ - Executive summary: Confirmed exploitable count, total theoretical count, overall risk score, business impact estimate ($)
118
+ - Per-CVE breakdown: CVE ID, severity, CVSS score, affected package + version, exploit status (Confirmed/Theoretical), proof-of-concept evidence (sandbox logs, output capture)
119
+ - Patch details: Exact code diff, upgrade version (if available), breaking change warnings
120
+ - Remediation timeline estimate
121
+ - Supply chain attack warnings (if any reported in the wild for scanned dependencies)
122
+
123
+ **Req 5.6: Supply Chain Attack Detection**
124
+ The dashboard must warn on any supply chain attacks reported for scanned dependencies (e.g., compromised maintainer, malicious version published).
125
+
126
+ ### Interface 1: CLI Tool
127
+
128
+ **Req 5.7: CLI Commands**
129
+ The CLI must support:
130
+ - `codeprobe init` - Connect GitHub account via OAuth
131
+ - `codeprobe scan` - Scan current directory, output report to terminal
132
+ - `codeprobe scan --fix` - Scan, generate patches, commit to new branch, push to GitHub
133
+ - `codeprobe scan <repo-url>` - Scan a public repo without cloning
134
+ - `codeprobe apply <cve-id>` - Apply a specific patch from previous scan
135
+ - `codeprobe report` - Display last scan results in formatted terminal output
136
+
137
+ **Req 5.8: CLI Output Format**
138
+ The CLI must display a color-coded table showing:
139
+ - Green rows: Confirmed Exploitable (with PoC evidence)
140
+ - Yellow rows: Theoretical Risk (no PoC available or exploit failed)
141
+ - Red rows: Supply Chain Attack Warning
142
+
143
+ **Req 5.9: Commit Messages**
144
+ When auto-fixing via `--fix`, the CLI must create commits with format:
145
+ `[CodeProbe] Fix CVE-2021-44228 (log4shell) in package.json`
146
+ Body must include: CVE description, severity, exploit verification status, link to full report.
147
+
148
+ ### Interface 2: GitHub Bot
149
+
150
+ **Req 5.10: Bot Trigger**
151
+ The bot must run automatically on every pull request opened against the main branch. The bot must comment on the PR within 2 minutes with initial scan results. The bot must update the comment when the scan completes.
152
+
153
+ **Req 5.11: Bot Permissions**
154
+ The bot must have read access to the repository. The bot must open pull requests with patches but must not push directly to protected branches. Users must manually merge the bot's PR.
155
+
156
+ **Req 5.12: Bot Comment Format**
157
+ The bot comment must show:
158
+ - Scan status (running, complete, failed)
159
+ - Summary: "Found X confirmed exploitable, Y theoretical"
160
+ - List of CVEs with severity badges
161
+ - Link to full dashboard
162
+ - "Auto-fix available" button if patches can be generated
163
+
164
+ **Req 5.13: Auto-Fix PR**
165
+ When a user clicks "Auto-fix available", the bot must:
166
+ - Generate patches for confirmed exploitable CVEs
167
+ - Create a new branch (e.g., `codeprobe-fix-pr-123`)
168
+ - Commit patches with proper commit messages
169
+ - Open a new PR linking back to the original
170
+ - Request review from the original PR author
171
+
172
+ ### Interface 3: CI/CD Integration
173
+
174
+ **Req 5.14: GitHub Action**
175
+ The system must provide a GitHub Action that can be added to `.github/workflows/`. The action must accept inputs: `fail-on-confirmed-exploitable` (boolean), `fail-on-theoretical` (boolean), `report-format` (json/markdown/sarif).
176
+
177
+ **Req 5.15: Exit Codes**
178
+ The action must exit with code 0 (success), 1 (vulnerabilities found), or 2 (scan failed). Exit codes must be configurable based on user policy.
179
+
180
+ **Req 5.16: SARIF Output**
181
+ The action must support SARIF format for integration with GitHub Code Scanning. Results must include exploit verification status in the SARIF properties.
182
+
183
+ ### Interface 4: MCP Server
184
+
185
+ **Req 5.17: MCP Tools**
186
+ The system must expose MCP tools:
187
+ - `scan_repository(repo_url)` - Returns scan ID
188
+ - `get_scan_status(scan_id)` - Returns current status
189
+ - `get_scan_results(scan_id)` - Returns full report
190
+ - `apply_fix(scan_id, cve_id)` - Applies patch to repo (requires auth)
191
+
192
+ **Req 5.18: MCP Resources**
193
+ The system must expose MCP resources for cached CVE data and exploit PoC scripts.
194
+
195
+ ### Dashboard
196
+
197
+ **Req 5.19: Dashboard Views**
198
+ The dashboard must support two views:
199
+ - Technical view (default): Full CVE details, sandbox logs, code diffs, severity scores
200
+ - Executive view: Business impact estimate, risk score gauge, count of confirmed vs theoretical, supply chain warnings
201
+
202
+ **Req 5.20: Business Impact Translation**
203
+ The executive view must translate "confirmed exploitable" into business terms: "1 critical RCE vulnerability found. If exploited: attacker can run code on your server → data breach → $4.9M average cost."
204
+
205
+ ---
206
+
207
+ ## 6. Non-Functional Requirements
208
+
209
+ **Performance:**
210
+ - CLI scan must complete in under 3 minutes for a typical Node.js project (50 dependencies)
211
+ - CVE scraping must run in parallel (target: 10 sources in 30 seconds)
212
+ - Sandbox verification must run 3 CVEs in parallel (target: 90 seconds for 3 critical CVEs)
213
+ - Dashboard must load in under 2 seconds
214
+ - GitHub bot must post initial comment within 2 minutes of PR open
215
+
216
+ **Security:**
217
+ - All exploit code must run in isolated Daytona sandboxes with no network access to host
218
+ - User code must never be sent to third-party APIs (Nosana runs models locally)
219
+ - Bright Data scraping must use residential proxies to avoid rate limiting
220
+ - OAuth tokens must be stored encrypted at rest
221
+ - Patch generation must not introduce new vulnerabilities (LLM output must be validated)
222
+
223
+ **Reliability:**
224
+ - System must gracefully degrade if Bright Data fails (use cached CVE data)
225
+ - System must gracefully degrade if Nosana fails (fall back to Claude API)
226
+ - System must handle Daytona sandbox crashes (retry with fresh sandbox)
227
+ - System must handle partial scan failures (continue scanning remaining CVEs, report partial results)
228
+
229
+ **Scalability:**
230
+ - Architecture must support horizontal scaling (multiple Daytona sandboxes, parallel CVE scraping)
231
+ - Dashboard must support 100+ concurrent users
232
+ - MCP server must handle 50+ concurrent tool calls
233
+
234
+ **Usability:**
235
+ - CLI output must be readable in standard terminal (80 columns)
236
+ - Dashboard must be responsive (mobile, tablet, desktop)
237
+ - Error messages must be actionable (e.g., "Bright Data API key invalid. Set BRIGHT_DATA_API_KEY in ~/.codeprobe/config")
238
+
239
+ **Compatibility:**
240
+ - CLI must work on macOS, Linux, and Windows (WSL)
241
+ - GitHub bot must work with GitHub.com and GitHub Enterprise
242
+ - MCP server must work with Claude Desktop and other MCP clients
243
+
244
+ ---
245
+
246
+ ## 7. UX/UI Design
247
+
248
+ ### CLI Interface
249
+
250
+ ```
251
+ $ codeprobe scan https://github.com/user/repo
252
+
253
+ ⚡ CodeProbe v1.0.0
254
+ Scanning repository... https://github.com/user/repo
255
+
256
+ [12:34:56] Bright Data: Scraping CVE databases (NVD, Exploit-DB, Snyk)...
257
+ [12:35:14] Found 14 vulnerabilities across 8 dependencies
258
+
259
+ [12:35:15] Daytona: Spinning up 3 isolated sandboxes for CRITICAL CVEs...
260
+ [12:35:15] ├─ Sandbox 1: CVE-2021-44228 (log4shell)
261
+ [12:35:15] ├─ Sandbox 2: CVE-2022-22965 (Spring4Shell)
262
+ [12:35:15] └─ Sandbox 3: CVE-2023-44487 (HTTP/2 Rapid Reset)
263
+
264
+ [12:36:45] Exploit verification complete:
265
+ ✓ CVE-2021-44228: CONFIRMED EXPLOITABLE (RCE achieved in 3.2s)
266
+ ✗ CVE-2022-22965: THEORETICAL (PoC failed - vulnerable code path not reachable)
267
+ ✓ CVE-2023-44487: CONFIRMED EXPLOITABLE (DoS achieved in 1.8s)
268
+
269
+ [12:36:50] Nosana: Generating patches for confirmed vulnerabilities...
270
+ [12:37:20] Patches generated
271
+
272
+ ────────────────────────────────────────────────────────────
273
+ SCAN COMPLETE
274
+
275
+ Risk Score: 9.2/10 (CRITICAL)
276
+ Confirmed Exploitable: 2
277
+ Theoretical Risk: 12
278
+ Supply Chain Warnings: 0
279
+
280
+ Business Impact Estimate: $9.8M potential breach cost
281
+
282
+ View full report: https://codeprobe.dev/r/scan_abc123
283
+ ────────────────────────────────────────────────────────────
284
+ ```
285
+
286
+ ### GitHub Bot Comment
287
+
288
+ ```markdown
289
+ ## ⚡ CodeProbe Security Scan
290
+
291
+ **Status:** ✅ Complete
292
+ **Risk Score:** 9.2/10 (CRITICAL)
293
+ **Scan Duration:** 2m 34s
294
+
295
+ ### Summary
296
+ - **2** Confirmed Exploitable (PoC verified in sandbox)
297
+ - **12** Theoretical Risk (CVE exists, exploit not verified)
298
+ - **0** Supply Chain Warnings
299
+
300
+ ### Critical Findings
301
+
302
+ | CVE | Severity | Package | Status |
303
+ |-----|----------|---------|--------|
304
+ | [CVE-2021-44228](link) | CRITICAL | log4j@2.14.1 | ✅ Confirmed Exploitable |
305
+ | [CVE-2023-44487](link) | HIGH | http2-server@1.0.0 | ✅ Confirmed Exploitable |
306
+
307
+ ### Business Impact
308
+ If exploited, these vulnerabilities could lead to:
309
+ - Remote code execution on your server
310
+ - Data breach ($4.9M average cost)
311
+ - Complete system compromise
312
+
313
+ ### Recommended Actions
314
+ - [🔧 Auto-fix available](link) - Generate patches and open PR
315
+ - [📊 View full report](link) - Detailed analysis
316
+ - [📖 Learn more](link) - About these CVEs
317
+ ```
318
+
319
+ ### Dashboard Layout
320
+
321
+ **Header:**
322
+ - CodeProbe logo + scan ID
323
+ - Risk score gauge (0-10)
324
+ - Action buttons: Export Report, Share, Re-scan
325
+
326
+ **Left Sidebar:**
327
+ - Executive Summary (default)
328
+ - Technical Details
329
+ - Patch Diff
330
+ - Sandbox Logs
331
+ - Supply Chain Monitor
332
+
333
+ **Main Content:**
334
+ - CVE cards grouped by severity
335
+ - Each card shows: CVE ID, package, exploit status, expandable details
336
+ - Code diff viewer for patches
337
+ - "Business Impact" callout box for non-technical viewers
338
+
339
+ **Footer:**
340
+ - Last updated timestamp
341
+ - Powered by Daytona | Bright Data | Nosana
342
+
343
+ ---
344
+
345
+ ## 8. Edge Cases
346
+
347
+ **8.1: Bright Data Scraping Fails**
348
+ System must display warning: "⚠️ Using cached CVE data (last updated: 2 hours ago). Live scraping unavailable." Scan continues with cached data. Dashboard shows a "data freshness" indicator.
349
+
350
+ **8.2: Daytona Sandbox Crashes**
351
+ System must retry with a fresh sandbox (max 2 retries). If still fails, system must mark that CVE as "Verification Failed" and continue scanning others. User sees: "⚠️ Could not verify CVE-2021-44228 (sandbox crashed). Manual verification recommended."
352
+
353
+ **8.3: Nosana Container Startup Timeout**
354
+ If Nosana GPU provisioning exceeds 60 seconds, system must automatically fall back to Claude API (Anthropic). UI must still show "Powered by Nosana" branding. A subtle indicator shows "Using Claude API fallback (Nosana unavailable)".
355
+
356
+ **8.4: No Exploitable CVEs Found**
357
+ System must display: "✅ Good news! No exploitable vulnerabilities found. 14 theoretical CVEs detected, but all exploit attempts failed in isolated sandboxes. Your code is safe from known exploits." (Not an error case—positive outcome.)
358
+
359
+ **8.5: Malicious Package Detected**
360
+ If a scanned dependency has known supply chain attack indicators (e.g., compromised maintainer, known typosquatting), system must display a RED banner: "🚨 SUPPLY CHAIN WARNING: package@1.2.3 was compromised on 2024-03-15. Remove immediately."
361
+
362
+ **8.6: Repository Too Large**
363
+ If a repository has 500+ dependencies, system must warn: "⚠️ Large repository detected (523 dependencies). Scan will focus on CRITICAL severity CVEs first. Estimated time: 8 minutes."
364
+
365
+ **8.7: Network Connectivity Lost**
366
+ CLI must cache scan results locally. When connectivity restored, results can be uploaded to dashboard. System must display: "📡 Offline mode. Results saved to ~/.codeprobe/last-scan.json"
367
+
368
+ **8.8: Invalid GitHub Token**
369
+ CLI must display clear error: "❌ GitHub authentication failed. Run `codeprobe logout` then `codeprobe init` to re-authenticate."
370
+
371
+ **8.9: Conflicting Patches**
372
+ If auto-fixing multiple CVEs creates merge conflicts, system must create separate commits per CVE and open individual PRs. User sees: "⚠️ 3 patches opened as separate PRs due to conflicts. Review each individually."
373
+
374
+ **8.10: LLM Generates Invalid Patch**
375
+ If Nosana/Claude generates a patch that doesn't compile, system must retry generation with different prompt (max 2 retries). If still fails, system must mark patch as "manual review required" and provide CVE details for human fix.
376
+
377
+ ---
378
+
379
+ ## 9. Analytics & Success Metrics
380
+
381
+ **Hackathon Judging Criteria (Primary):**
382
+ - Completeness (working MVP): Pass/Fail binary
383
+ - Innovation (live exploit verification): 1-10 scale
384
+ - Real-Life Problem Solving ($4.9M breach cost, 60% of breaches): 1-10 scale
385
+ - Sponsored Product Usage (Daytona, Bright Data, Nosana deep integration): 1-10 scale
386
+
387
+ **Technical Performance Metrics:**
388
+ - Time to scan a typical Node.js project: Target < 3 minutes
389
+ - CVE scraping success rate: Target > 95%
390
+ - Sandbox verification success rate: Target > 90%
391
+ - Patch generation success rate: Target > 80% (80% of generated patches compile and fix the vulnerability)
392
+
393
+ **Demo Success Metrics:**
394
+ - Demo runs without errors: Target 100%
395
+ - All three interfaces (CLI, GitHub bot, CI/CD) demonstrated: Target 100%
396
+ - Live exploit verification shown in real-time: Target 100%
397
+ - Business impact message delivered clearly: Target 100%
398
+
399
+ **User Experience Metrics (Post-Hackathon):**
400
+ - CLI install time: Target < 30 seconds
401
+ - Time from `codeprobe scan` to first result: Target < 2 minutes
402
+ - Dashboard load time: Target < 2 seconds
403
+ - GitHub bot comment latency: Target < 2 minutes from PR open
404
+
405
+ ---
406
+
407
+ ## 10. Technical Architecture
408
+
409
+ ### System Components
410
+
411
+ **Core Engine (Bun runtime):**
412
+ - Repository parser (extracts dependencies from manifests)
413
+ - CVE matcher (maps dependencies to known CVEs using semver)
414
+ - Exploit orchestrator (manages Daytona sandboxes)
415
+ - Patch generator (calls Nosana LLM)
416
+ - Report builder (formats output for CLI/dashboard/CI)
417
+
418
+ **Bright Data Integration:**
419
+ - Web scraper for NVD, Exploit-DB, Snyk, GitHub Security Advisories
420
+ - Parallel scraping with residential proxies
421
+ - Caching layer for offline/fallback scenarios
422
+
423
+ **Daytona Integration:**
424
+ - Sandbox pool manager (creates isolated containers)
425
+ - Exploit runner (executes PoC scripts in sandboxes)
426
+ - Output capture (logs, network calls, filesystem changes)
427
+ - Verification logic (determines Confirmed vs Theoretical)
428
+
429
+ **Nosana Integration:**
430
+ - Local LLM inference (CodeBERT for security analysis, StarCoder2 for patch generation)
431
+ - GPU container orchestration
432
+ - Fallback to Claude API if Nosana unavailable
433
+
434
+ **Dashboard (React):**
435
+ - Technical view (full CVE details, code diffs, sandbox logs)
436
+ - Executive view (business impact, risk score, warnings)
437
+ - Real-time scan progress updates
438
+
439
+ **GitHub Bot:**
440
+ - Webhook handler for PR events
441
+ - Comment updater (posts/edits PR comments)
442
+ - PR creator (opens auto-fix PRs with patches)
443
+
444
+ **MCP Server:**
445
+ - Tool implementations (scan, get_status, get_results, apply_fix)
446
+ - Resource providers (cached CVE data, PoC scripts)
447
+ - OAuth/auth handling
448
+
449
+ ### Data Flow
450
+
451
+ ```
452
+ User Input (Repo URL)
453
+
454
+ Core Engine → Parse Dependencies
455
+
456
+ Bright Data → Scrape CVE Databases (parallel)
457
+
458
+ Core Engine → Match Dependencies to CVEs (semver)
459
+
460
+ Daytona → Spawn Sandboxes for CRITICAL CVEs
461
+
462
+ Daytona → Run PoC Exploits in Sandboxes
463
+
464
+ Core Engine → Verify Exploit Results
465
+
466
+ Nosana → Generate Patches for Confirmed CVEs
467
+
468
+ Core Engine → Build Report
469
+
470
+ Output → CLI / Dashboard / GitHub Bot / MCP
471
+ ```
472
+
473
+ ---
474
+
475
+ ## 11. Pre-Hackathon Preparation
476
+
477
+ **P1: Demo CVE Selection**
478
+ Pre-select log4shell (CVE-2021-44228) and HTTP/2 Rapid Reset (CVE-2023-44487) as the demo CVEs. Both are extremely well-documented with thousands of PoC scripts available. Test both against a demo Node.js app and have working exploits ready.
479
+
480
+ **P2: Demo Repository Preparation**
481
+ Create a demo GitHub repository with intentionally vulnerable dependencies. Ensure the repository is public so the CLI can scan it without auth. Include 2-3 critical CVEs and 5-10 theoretical ones for a realistic demo.
482
+
483
+ **P3: Bright Data API Key**
484
+ Sign up for Bright Data, get API key, test residential proxy scraping. Verify it can scrape NVD and Exploit-DB without rate limiting.
485
+
486
+ **P4: Daytona Sandbox Testing**
487
+ Test spawning a Daytona sandbox, installing a vulnerable package, running a PoC exploit, and capturing output. Verify network isolation (sandbox cannot reach host or internet).
488
+
489
+ **P5: Nosana GPU Container**
490
+ Test Nosana container startup time with CodeBERT or StarCoder2 model. Measure cold start time. Prepare Claude API fallback credentials.
491
+
492
+ **P6: CLI Installation**
493
+ Build and test the CLI binary for macOS, Linux, and Windows. Verify it works on a fresh machine with just the binary + config file.
494
+
495
+ **P7: GitHub Bot Setup**
496
+ Create a GitHub App, configure webhooks, test PR comment posting. Verify OAuth flow works.
497
+
498
+ **P8: Demo Script Rehearsal**
499
+ Rehearse the 2-minute demo at least 5 times. Time each section. Practice the live exploit verification moment. Prepare for Q&A from judges.
500
+
501
+ ---
502
+
503
+ ## 12. 5-Hour Build Plan
504
+
505
+ **10:00-10:30: Kickoff + Workshop**
506
+ Team formation, sponsor API introductions, credential provisioning.
507
+
508
+ **10:30-11:30: Architecture + Setup**
509
+ - Initialize Bun project
510
+ - Set up Bright Data scraper (test with NVD)
511
+ - Set up Daytona sandbox (test exploit execution)
512
+ - Set up Nosana LLM (test patch generation)
513
+ - Create demo repository
514
+
515
+ **11:30-13:00: Core Engine**
516
+ - Dependency parser (Node.js only for MVP)
517
+ - CVE matcher (exact version matching from package-lock.json)
518
+ - Sandbox orchestrator (parallel execution, 3 CVEs at a time)
519
+ - Report builder (JSON format)
520
+
521
+ **13:00-14:00: CLI Interface**
522
+ - Implement `codeprobe scan` command
523
+ - Add `--fix` flag for auto-patching
524
+ - Format terminal output with colors and tables
525
+ - Test end-to-end on demo repo
526
+
527
+ **14:00-15:00: Dashboard**
528
+ - React app with Technical and Executive views
529
+ - Real-time scan progress (WebSocket or polling)
530
+ - Code diff viewer for patches
531
+ - Business impact calculator
532
+
533
+ **15:00-16:00: GitHub Bot + MCP**
534
+ - GitHub App webhook handler
535
+ - PR comment posting
536
+ - Auto-fix PR creation
537
+ - MCP server with scan/get_status/apply_fix tools
538
+
539
+ **16:00-16:30: Polish + Rehearsal**
540
+ - Fix bugs from integration testing
541
+ - Polish dashboard UI
542
+ - Rehearse demo (3-5 times)
543
+ - Prepare backup plans for failure modes
544
+
545
+ **16:30: Submit**
546
+
547
+ ---
548
+
549
+ ## 13. Risks & Mitigations
550
+
551
+ | Risk | Impact | Mitigation |
552
+ |------|--------|------------|
553
+ | Bright Data scraping blocked | High | Use cached CVE database as fallback, display warning |
554
+ | Daytona sandbox crash | High | Retry with fresh sandbox (max 2 retries), mark CVE as "Verification Failed" |
555
+ | Nosana GPU cold start > 60s | Medium | Auto-fallback to Claude API, keep Nosana branding in UI |
556
+ | Demo CVE not exploitable in our test repo | Critical | Pre-test log4shell + HTTP/2 Rapid Reset before hackathon, have backup CVEs ready |
557
+ | GitHub bot OAuth issues | Medium | Test auth flow pre-hackathon, have manual token fallback |
558
+ | Patch generation produces invalid code | Medium | Validate patches compile, retry with different prompt, mark as "manual review" |
559
+ | Scope creep | High | Strict V0 definition: Node.js only, 2 demo CVEs, 3 interfaces (CLI/GitHub/CI), MCP stretch |
560
+ | Non-technical judge confusion | Medium | Add "Business Impact" screen with $4.9M breach cost framing |
561
+ | Network issues during demo | High | Pre-record video of working demo as backup, have offline mode in CLI |
562
+ | Time overrun | High | Cut MCP if needed, cut CI/CD integration if needed, prioritize CLI + GitHub bot |
563
+
564
+ ---
565
+
566
+ ## 14. Success Criteria
567
+
568
+ **Must Have (Demo Will Not Work Without):**
569
+ - Working CLI that scans a public GitHub repo
570
+ - Live Bright Data CVE scraping
571
+ - Daytona sandbox spawning and exploit execution
572
+ - Nosana LLM patch generation
573
+ - Detailed report output
574
+ - At least 2 confirmed exploitable CVEs in demo
575
+
576
+ **Should Have (Strong Demo):**
577
+ - GitHub bot with auto-fix PR creation
578
+ - Dashboard with Technical + Executive views
579
+ - Business impact translation
580
+ - Supply chain attack warnings
581
+
582
+ **Nice to Have (Impressive Demo):**
583
+ - CI/CD GitHub Action
584
+ - MCP server
585
+ - SARIF output support
586
+ - Offline mode with cached results
587
+
588
+ ---
589
+
590
+ ## 15. Open Questions
591
+
592
+ None at this time. All decisions made based on hackathon constraints and judging criteria.
593
+
594
+ ---
595
+
596
+ ## 16. Post-Hackathon (If Idea Proceeds)
597
+
598
+ **Note:** This is a stretch section. Only relevant if the team decides to continue development after the hackathon. Not part of the hackathon scope.
599
+
600
+ **Potential Next Steps:**
601
+ - Multi-language support (Python, Rust, Java)
602
+ - Enterprise SSO integration
603
+ - Self-hosted deployment option
604
+ - Custom PoC upload for private exploits
605
+ - Slack/Teams notifications
606
+ - Jira/Linear integration for vulnerability tracking
607
+ - Historical scan tracking and trends
608
+ - Team collaboration features (assign vulnerabilities, comment threads)
609
+ - Custom security policies (fail build if > 5 critical CVEs)
610
+ - White-label offering for security consultancies
611
+ - Enterprise pricing tiers (based on repos, users, scan frequency)
612
+
613
+ ---
614
+
615
+ ## 17. Business Model (For Presentation)
616
+
617
+ **Pricing Model:** Enterprise license
618
+ **Contact:** "Contact us for pricing details"
619
+ **Target Market:** Mid-size to large companies with 10+ developers, security-conscious industries (fintech, healthcare, SaaS)
620
+ **Value Proposition:** Reduce breach risk ($4.9M average cost) by confirming exploitability before patches are prioritized. Ship code 10x faster with clear security signals.
621
+ **Sales Motion:** Direct sales to CTOs and DevOps leads, land-and-expand from single team to org-wide deployment.
622
+
623
+ ---
624
+
625
+ ## 18. Appendix
626
+
627
+ **A. Demo Repository Structure**
628
+ ```
629
+ demo-vulnerable-app/
630
+ ├── package.json (intentionally vulnerable dependencies)
631
+ ├── package-lock.json
632
+ ├── README.md (explains demo purpose)
633
+ ├── src/
634
+ │ ├── server.js (uses vulnerable packages)
635
+ │ └── routes/
636
+ └── .github/
637
+ └── workflows/
638
+ └── codeprobe.yml
639
+ ```
640
+
641
+ **B. Demo CVEs**
642
+ - CVE-2021-44228 (log4shell) - CRITICAL, RCE, 10.0 CVSS
643
+ - CVE-2023-44487 (HTTP/2 Rapid Reset) - HIGH, DoS, 7.5 CVSS
644
+ - CVE-2022-22965 (Spring4Shell) - CRITICAL, RCE, 9.8 CVSS (backup)
645
+
646
+ **C. Bright Data Scraping Targets**
647
+ - NVD: https://nvd.nist.gov/vuln/detail/{CVE-ID}
648
+ - Exploit-DB: https://www.exploit-db.com/exploits/{ID}
649
+ - Snyk: https://security.snyk.io/vuln/{CVE-ID}
650
+ - GitHub Security Advisories: https://github.com/advisories/{GHSA-ID}
651
+
652
+ **D. Nosana Models**
653
+ - CodeBERT: For security analysis and vulnerability classification
654
+ - StarCoder2 33B: For patch generation and code fixes
655
+ - DeepSeek-Coder: Alternative for code generation
656
+
657
+ **E. Daytona Sandbox Configuration**
658
+ - Base image: Node.js 20
659
+ - Network: Isolated (no host access)
660
+ - Resources: 1 CPU, 512MB RAM, 5GB disk
661
+ - Timeout: 60 seconds per exploit
662
+ - Cleanup: Automatic after scan completes
663
+
664
+ **F. References**
665
+ - Verizon DBIR 2026: https://www.verizon.com/business/resources/reports/dbir/
666
+ - IBM Cost of Breach 2026: https://www.ibm.com/security/data-breach
667
+ - NVD: https://nvd.nist.gov/
668
+ - Daytona Docs: https://daytona.io/docs
669
+ - Bright Data Docs: https://brightdata.com/docs
670
+ - Nosana Docs: https://docs.nosana.io
671
+
672
+ ---
673
+
674
+ **End of PRD**
package/cve-cache.json ADDED
@@ -0,0 +1,25 @@
1
+ {
2
+ "timestamp": "2026-06-13T00:00:00Z",
3
+ "source": "cached",
4
+ "cves": [
5
+ {
6
+ "id": "CVE-2022-29078",
7
+ "package": "ejs",
8
+ "affected_versions": [
9
+ "3.1.0",
10
+ "3.1.1",
11
+ "3.1.2",
12
+ "3.1.3",
13
+ "3.1.4",
14
+ "3.1.5",
15
+ "3.1.6"
16
+ ],
17
+ "fixed_version": "3.1.7",
18
+ "severity": "CRITICAL",
19
+ "cvss": 9.8,
20
+ "description": "EJS before 3.1.7 allows template injection attacks with arbitrary code execution",
21
+ "cwe": "CWE-94",
22
+ "exploit_url": "https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29078"
23
+ }
24
+ ]
25
+ }