codeprobe-scanner 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/.claude/settings.local.json +19 -0
  2. package/.dockerignore +17 -0
  3. package/.env.development +8 -0
  4. package/.env.example +20 -0
  5. package/.env.setup +214 -0
  6. package/.github/workflows/codeprobe-scan.yml +137 -0
  7. package/.github/workflows/codeprobe.yml +84 -0
  8. package/.github/workflows/scan-schedule.yml +28 -0
  9. package/ANALYSIS_SUMMARY.md +365 -0
  10. package/API_INTEGRATIONS.md +469 -0
  11. package/BUILD_PLAYBOOK.md +349 -0
  12. package/CLAUDE.md +106 -0
  13. package/DEPLOY.md +452 -0
  14. package/DEPLOYMENT_STATUS.md +240 -0
  15. package/DEPLOY_CHECKLIST.md +316 -0
  16. package/Dockerfile +24 -0
  17. package/EXECUTION_PLAN.html +1086 -0
  18. package/IMPLEMENTATION_COMPLETE.md +288 -0
  19. package/IMPLEMENTATION_SUMMARY.md +443 -0
  20. package/INTERACTIVE_FIX_FLOW.md +308 -0
  21. package/MIGRATION_COMPLETE.md +327 -0
  22. package/ORCHESTRATOR_SYNTHESIS.json +80 -0
  23. package/PENDING_WORK.md +308 -0
  24. package/PREFLIGHT_PLAN.md +182 -0
  25. package/QUICKSTART.md +305 -0
  26. package/README.md +15 -0
  27. package/STAGE_1_SETUP_ENGINE.md +245 -0
  28. package/STAGE_2_ARCHITECTURE.md +714 -0
  29. package/STAGE_2_CLI_VERIFICATION.md +269 -0
  30. package/STAGE_2_COMPLETE.md +332 -0
  31. package/STAGE_2_IMPLEMENTATION_PLAN.md +679 -0
  32. package/STAGE_3_COMPLETE.md +246 -0
  33. package/STAGE_3_DASHBOARD_POLISH.md +371 -0
  34. package/STAGE_3_SETUP.md +155 -0
  35. package/VIDEODB_INTEGRATION.md +237 -0
  36. package/archived/DASHBOARD_UI_WALKTHROUGH.md +392 -0
  37. package/archived/FRONTEND_SETUP.md +236 -0
  38. package/archived/auth.ts +40 -0
  39. package/archived/dashboard/components/BusinessImpactCard.tsx +48 -0
  40. package/archived/dashboard/components/CVETable.tsx +104 -0
  41. package/archived/dashboard/components/ErrorBoundary.tsx +48 -0
  42. package/archived/dashboard/components/PatchDiffViewer.tsx +43 -0
  43. package/archived/dashboard/components/RiskGauge.tsx +64 -0
  44. package/archived/dashboard/frontend.tsx +104 -0
  45. package/archived/dashboard/hooks/useAuth.ts +32 -0
  46. package/archived/dashboard/hooks/useScan.ts +65 -0
  47. package/archived/dashboard/index.html +15 -0
  48. package/archived/dashboard/pages/LoginPage.tsx +28 -0
  49. package/archived/dashboard/pages/ScanDetailPage.tsx +143 -0
  50. package/archived/dashboard/pages/ScansListPage.tsx +160 -0
  51. package/bin/install-and-run.sh +91 -0
  52. package/bun.lock +603 -0
  53. package/codeprobe-prd.md +674 -0
  54. package/cve-cache.json +25 -0
  55. package/demo-vulnerable-app/.github/workflows/codeprobe.yml +32 -0
  56. package/demo-vulnerable-app/README.md +70 -0
  57. package/demo-vulnerable-app/package-lock.json +27 -0
  58. package/demo-vulnerable-app/package.json +15 -0
  59. package/demo-vulnerable-app/server.js +34 -0
  60. package/demo.sh +45 -0
  61. package/index.ts +19 -0
  62. package/package.json +28 -0
  63. package/patches.json +12 -0
  64. package/serve-dashboard.ts +23 -0
  65. package/src/api/server-cli.ts +270 -0
  66. package/src/api/server.ts +293 -0
  67. package/src/bot/server.ts +113 -0
  68. package/src/cli/commands/report.ts +92 -0
  69. package/src/cli/commands/scan-with-fix.ts +123 -0
  70. package/src/cli/commands/scan.ts +137 -0
  71. package/src/cli/config.ts +188 -0
  72. package/src/cli/errors.ts +120 -0
  73. package/src/cli/index.ts +137 -0
  74. package/src/cli/progress.ts +119 -0
  75. package/src/cli-server.ts +523 -0
  76. package/src/engine/index.ts +90 -0
  77. package/src/engine/matcher.ts +115 -0
  78. package/src/engine/parser.ts +91 -0
  79. package/src/engine/patcher.ts +280 -0
  80. package/src/engine/report.ts +137 -0
  81. package/src/engine/sandbox.ts +222 -0
  82. package/src/engine/scraper.ts +122 -0
  83. package/src/integrations/videodb.ts +153 -0
  84. package/src/mcp/server.ts +149 -0
  85. package/src/scraper-cron.ts +103 -0
  86. package/src/shared/constants.ts +88 -0
  87. package/src/shared/types.ts +123 -0
  88. package/src/shared/utils.ts +80 -0
  89. package/src/test/cli.test.ts +211 -0
  90. package/src/test/dashboard.test.ts +38 -0
  91. package/src/test/demo-scan.json +32 -0
  92. package/src/test/engine.test.ts +157 -0
  93. package/tailwind.config.js +11 -0
  94. package/tsconfig.json +30 -0
  95. package/verify-dashboard.ts +87 -0
  96. package/verify-env.sh +98 -0
@@ -0,0 +1,80 @@
1
+ {
2
+ "tldr": "CodeProbe MVP concept is sound (exploit verification differentiates from Snyk), but plan contains critical contradictions (Log4Shell cannot work in Node.js), unrealistic timeline (5h vs. 12-24h needed), and zero test infrastructure—recommend human-in-loop revision before build.",
3
+ "overall_confidence": 75,
4
+ "min_confidence": 75,
5
+ "confidence_details": {
6
+ "eng": 85,
7
+ "design": 82,
8
+ "qa": 75,
9
+ "security": 85
10
+ },
11
+ "overall_risk": "high",
12
+ "has_high_risk": true,
13
+ "risk_details": {
14
+ "eng": "high",
15
+ "design": "high",
16
+ "qa": "high",
17
+ "security": "high"
18
+ },
19
+ "consensus": "conflict",
20
+ "consensus_details": {
21
+ "eng": "revise",
22
+ "design": "revise",
23
+ "qa": "revise",
24
+ "security": "revise"
25
+ },
26
+ "any_block": false,
27
+ "decision": "human_in_loop",
28
+ "decision_reason": "min_confidence (75) is below auto-approve threshold (8/10). ALL four agents flagged HIGH risk. No agent recommended 'block,' but unanimous 'revise' indicates plan requires material changes before implementation. Specific blockers: (1) Log4Shell CVE incompatible with Node.js demo repo; (2) Sandbox isolation requirement contradicts Log4Shell PoC mechanism (requires LDAP/RMI callback); (3) Timeline is 5h, realistic delivery is 12-24h; (4) Zero test coverage with critical live paths (exploit execution, fallbacks) untested; (5) Security claims about 'user code never third-party' violated by Claude API fallback.",
29
+ "unanimous_high_confidence": false,
30
+ "summary_by_agent": {
31
+ "eng": "Sound concept, but Log4Shell cannot be exploited in Node.js; sandbox isolation vs. exploit mechanism are mutually exclusive; timeline severely underestimated.",
32
+ "design": "Strong wow moment, but interaction states dangerously incomplete (error, partial, empty, loading duration undefined); dashboard scope (2 full React views) unrealistic for 1 hour; accessibility gaps (color-only, no ARIA); auth/sharing model undefined.",
33
+ "qa": "MVP has zero test infrastructure; five untested fallback mechanisms (Bright Data, Daytona, Nosana, patch, OAuth) are single-points-of-failure; demo-day relies entirely on pre-recorded video backup and manual rehearsal.",
34
+ "security": "High-risk findings: (1) Dashboard public without authentication; (2) LLM patch output lacks validation gate; (3) OAuth token encryption key derivation undefined; (4) Sandbox escape risk; (5) Bright Data scraper integrity unchecked. All recommend revise + external security review."
35
+ },
36
+ "key_findings": [
37
+ "CRITICAL: Log4Shell (CVE-2021-44228) is Java/log4j, not Node.js npm-compatible. Demo repo is Node.js-only. Cannot genuinely exploit Log4Shell despite being locked as primary demo CVE. Breaks 50% of 'live exploit verification' wow moment.",
38
+ "CRITICAL: Sandbox network isolation requirement contradicts Log4Shell PoC mechanism. Log4Shell requires outbound LDAP/RMI callback. Fully isolated sandbox cannot execute. HTTP/2 Rapid Reset (CVE-2023-44487) is only viable demo CVE.",
39
+ "CRITICAL: Security claim 'user code never sent to third-party APIs (Nosana runs locally)' violated by Claude API fallback. On Nosana unavailability, user code uploads to Anthropic. Dashboard shows 'Powered by Nosana' during fallback, misleading judges.",
40
+ "CRITICAL: Timeline 5h vs. realistic 12-24h. GitHub App OAuth (2-3h) + Daytona orchestration (2-3h) + dashboard (1-2h) exceeds 5h allocation. Cuts to scope inevitable.",
41
+ "HIGH: Zero automated tests. Live exploit execution (the core 'wow moment') untested. All five fallback mechanisms (Bright Data, Daytona, Nosana, patch, OAuth) untested. Demo-day failure risk is high if external APIs fail.",
42
+ "HIGH: Dashboard auth model undefined. Scan results accessible via URL without authentication. Any person with scan URL can view complete CVE details, patches, PoC evidence. Security vulnerability (IDOR/AuthN failures).",
43
+ "HIGH: Interaction states dangerously incomplete. Error state undefined (Bright Data fails?), partial-result state undefined (sandbox crashes?), empty state undefined (no vulns found?). In security tools, silence is dangerous.",
44
+ "HIGH: Dashboard scope unrealistic. Two full React views (Technical + Executive) with responsive design, accessibility, error handling in 1 hour. Realistic: 3-5 days for production-ready dashboard.",
45
+ "HIGH: Patch generation validation missing. Plan targets 80% success rate (PRD §9) with zero harness. No automated check that generated patches compile or actually fix CVE. Nosana prompt uses CodeBERT (encoder, not generative—wrong model named).",
46
+ "MEDIUM: GitHub bot auto-creates PRs without user approval gate. Plan says 'read access' (Req 5.11) but auto-fix requires write + branch creation. Contradiction in scope.",
47
+ "MEDIUM: LLM eval suite missing. Nosana/Claude patch generation requires (1) compilation check, (2) vulnerability fix verification (re-run PoC against patched code). Zero test harness.",
48
+ "MEDIUM: PREFLIGHT_PLAN.md says 'Skip MCP,' but codeprobe-prd.md schedules 'GitHub Bot + MCP' in 15:00-16:00 block. Binding docs contradict.",
49
+ "MEDIUM: Accessibility gaps. Risk gauge uses color-only encoding (red=high). CVE table has no semantic ARIA. No mobile responsive strategy. Red-green colorblindness breaks usability."
50
+ ],
51
+ "auto_approve_eligible": false,
52
+ "auto_approve_reason_if_eligible": null,
53
+ "human_in_loop_reason_if_applicable": "Min confidence 75 < 80. Has high risk: YES (all four agents flagged HIGH). Consensus: unanimous 'revise' (no unanimous_approve). Specific blockers require material revision: (1) Fix CVE/repo mismatch, (2) Revise timeline estimate or cut scope, (3) Define critical interaction states (error, partial, empty, loading %), (4) Implement dashboard auth model, (5) Add patch generation validation harness, (6) Prioritize HTTP/2 Rapid Reset over Log4Shell, (7) Resolve security claims about third-party code sending. Recommend human review of revised plan before proceeding to implementation.",
54
+ "agent_outputs": [
55
+ {
56
+ "agent": "Eng Reviewer",
57
+ "confidence": 85,
58
+ "risk": "high",
59
+ "recommendation": "revise"
60
+ },
61
+ {
62
+ "agent": "Design Reviewer",
63
+ "confidence": 82,
64
+ "risk": "high",
65
+ "recommendation": "revise"
66
+ },
67
+ {
68
+ "agent": "QA Reviewer",
69
+ "confidence": 75,
70
+ "risk": "high",
71
+ "recommendation": "revise"
72
+ },
73
+ {
74
+ "agent": "Security Reviewer",
75
+ "confidence": 85,
76
+ "risk": "high",
77
+ "recommendation": "revise"
78
+ }
79
+ ]
80
+ }
@@ -0,0 +1,308 @@
1
+ # CodeProbe: Pending Work Inventory
2
+
3
+ ## Quick Status Summary
4
+
5
+ ```
6
+ FULLY WORKING (Green): CLI scan, Dashboard views, API server, Parser
7
+ PARTIALLY WORKING (Yellow): Patch application, Bot framework, Scraper (ejs only)
8
+ NOT WORKING (Red): Real Daytona, Real Nosana, Real Bright Data, Bot scanning
9
+ MOCKED/SIMULATED: Sandbox exploits (realistic demo), Patch generation
10
+ ```
11
+
12
+ **Overall: 65% Complete** — Ready for demo, needs work for production
13
+
14
+ ---
15
+
16
+ ## WHAT'S WORKING ✅
17
+
18
+ ### Frontend (100% Functional)
19
+ - ✅ Dashboard loads at http://localhost:3000
20
+ - ✅ GitHub OAuth login flow
21
+ - ✅ Scans list page (paginated, filtered, sorted)
22
+ - ✅ Scan detail page (risk gauge, CVE list, patch diffs)
23
+ - ✅ Business impact card ($4.9M)
24
+ - ✅ All components render correctly
25
+ - ✅ API calls working (GET /api/scans, /api/scans/{id})
26
+
27
+ ### CLI (95% Functional)
28
+ - ✅ `scan` command — End-to-end working
29
+ - ✅ `report` command — Works
30
+ - ✅ `config` command — Works
31
+ - ✅ JSON output — Works
32
+ - ✅ Verbose logging — Works
33
+ - ❌ `--fix` flag — Creates branch but doesn't apply patches to files
34
+
35
+ ### Backend/API (90% Functional)
36
+ - ✅ API server serves dashboard HTML
37
+ - ✅ API endpoints working (/api/scans, /api/auth)
38
+ - ✅ File-based scan storage working
39
+ - ✅ OAuth integration working
40
+ - ✅ Error handling working
41
+
42
+ ### Engine (Core Pipeline) (80% Functional)
43
+ - ✅ Parser — Reads package.json correctly
44
+ - ✅ Matcher — Matches CVEs to dependencies
45
+ - ✅ Risk scoring — Calculates 0-10 scale
46
+ - ⚠️ Scraper — Works for ejs, empty for others
47
+ - ⚠️ Sandbox — Simulates ejs RCE, realistic output
48
+ - ⚠️ Patcher — Returns pre-baked patches only
49
+
50
+ ### Tests (100% Passing)
51
+ - ✅ 25/25 tests pass
52
+ - ✅ Engine tests pass
53
+ - ✅ CLI tests pass
54
+ - ✅ Dashboard tests pass
55
+
56
+ ---
57
+
58
+ ## WHAT'S PENDING (INCOMPLETE) 🔴
59
+
60
+ ### High Priority (Blocks Demo)
61
+
62
+ | Feature | Issue | Impact | Fix Time |
63
+ |---------|-------|--------|----------|
64
+ | **Patch Application** | --fix creates branch but doesn't modify files | Users can't apply patches | 30 min |
65
+ | **GitHub Bot Scanning** | Bot receives webhooks but doesn't scan repos | Can't scan PRs automatically | 2 hours |
66
+ | **API Authentication** | Dev mode accepts any token; no production auth | Can't deploy to prod securely | 1 hour |
67
+
68
+ ### Medium Priority (Nice to Have for Demo)
69
+
70
+ | Feature | Issue | Impact | Fix Time |
71
+ |---------|-------|--------|----------|
72
+ | **Real Daytona Integration** | Sandbox exploits are simulated | Can't verify real vulnerabilities | 4 hours |
73
+ | **Real Nosana Integration** | Patches are pre-baked only | No LLM-generated fixes | 4 hours |
74
+ | **Multi-Language Support** | Only Node.js (npm) works | Can't scan Python/Rust/Go/Java repos | 2+ days |
75
+ | **WebSocket Updates** | Dashboard doesn't auto-refresh | No real-time progress | 2 hours |
76
+
77
+ ### Low Priority (Post-Hackathon)
78
+
79
+ | Feature | Issue | Impact | Fix Time |
80
+ |---------|-------|--------|----------|
81
+ | **Database** | File-based storage only | No persistent history, no scaling | 1 day |
82
+ | **MCP Full Integration** | Framework exists, no repo ops | Can't use from Claude Desktop | 2 hours |
83
+ | **Production Deployment** | Hardcoded localhost everywhere | Can't run on servers | 2 hours |
84
+ | **Audit Logs** | No logging of actions | Can't track who did what | 3 hours |
85
+
86
+ ---
87
+
88
+ ## WHAT'S MOCKED (INTENTIONAL) 🎭
89
+
90
+ ### For MVP Demo (Acceptable)
91
+
92
+ 1. **Daytona Sandbox** → Simulates ejs RCE exploit
93
+ - Returns realistic output
94
+ - Works for demo, not production
95
+ - **To use real Daytona**: Requires Docker/K8s, ~4 hours
96
+
97
+ 2. **Nosana Patch Generation** → Returns pre-baked patches
98
+ - Demonstrates what patches would look like
99
+ - Only ejs@3.1.6 → 3.1.7 defined
100
+ - **To use real Nosana**: Requires GPU account, ~4 hours
101
+
102
+ 3. **Bright Data Scraping** → Falls back to NVD API
103
+ - Demonstrates what Bright Data would do
104
+ - Only ejs CVE-2022-29078 tested
105
+ - **To use real Bright Data**: Requires account, ~2 hours
106
+
107
+ ---
108
+
109
+ ## SPECIFIC PENDING ITEMS BY COMPONENT
110
+
111
+ ### Frontend (src/dashboard/) — 95% DONE
112
+
113
+ **Working:**
114
+ ```
115
+ ✅ frontend.tsx — Multi-page SPA
116
+ ✅ LoginPage.tsx — GitHub OAuth
117
+ ✅ ScansListPage.tsx — Lists scans
118
+ ✅ ScanDetailPage.tsx — Shows details
119
+ ✅ RiskGauge.tsx — Draws gauge
120
+ ✅ CVETable.tsx — Lists CVEs
121
+ ✅ BusinessImpactCard.tsx — Shows $4.9M
122
+ ✅ PatchDiffViewer.tsx — Shows diffs
123
+ ✅ useAuth.ts — Token management
124
+ ✅ useScan.ts — API calls
125
+ ```
126
+
127
+ **Pending:**
128
+ ```
129
+ ❌ Executive/Technical view toggle
130
+ ❌ Real-time WebSocket updates
131
+ ❌ PDF export
132
+ ❌ Trend analysis (historical)
133
+ ⚠️ Hardcoded GitHub client ID (should be env var)
134
+ ⚠️ Hardcoded CORS origin
135
+ ```
136
+
137
+ ### Backend (src/api/) — 90% DONE
138
+
139
+ **Working:**
140
+ ```
141
+ ✅ server.ts — Serves HTML + API
142
+ ✅ /api/scans — Lists scans
143
+ ✅ /api/scans/{id} — Gets scan
144
+ ✅ /api/auth/github — OAuth callback
145
+ ✅ CORS headers
146
+ ```
147
+
148
+ **Pending:**
149
+ ```
150
+ ❌ Production authentication (JWT)
151
+ ❌ Webhook signature verification
152
+ ❌ Database integration
153
+ ⚠️ Hardcoded dev auth bypass
154
+ ```
155
+
156
+ ### Engine (src/engine/) — 80% DONE
157
+
158
+ **Working:**
159
+ ```
160
+ ✅ parser.ts — Reads package.json
161
+ ✅ matcher.ts — Matches CVEs
162
+ ✅ report.ts — Saves scans
163
+ ```
164
+
165
+ **Partially Working:**
166
+ ```
167
+ ⚠️ scraper.ts — Only ejs works, others empty
168
+ ⚠️ sandbox.ts — Simulates ejs, not real Daytona
169
+ ⚠️ patcher.ts — Pre-baked only, no LLM
170
+ ```
171
+
172
+ **Pending:**
173
+ ```
174
+ ❌ Multi-language support (Python, Rust, etc.)
175
+ ❌ Real Daytona integration
176
+ ❌ Real Nosana integration
177
+ ❌ Real Bright Data integration
178
+ ```
179
+
180
+ ### CLI (src/cli/) — 95% DONE
181
+
182
+ **Working:**
183
+ ```
184
+ ✅ index.ts — Command dispatch
185
+ ✅ commands/scan.ts — Scan execution
186
+ ✅ commands/report.ts — Display results
187
+ ✅ config.ts — Token management
188
+ ✅ progress.ts — Progress logging
189
+ ```
190
+
191
+ **Partially Working:**
192
+ ```
193
+ ⚠️ commands/scan-with-fix.ts — Creates branch but doesn't apply patches
194
+ ```
195
+
196
+ **Pending:**
197
+ ```
198
+ ❌ Actual file modification for --fix
199
+ ❌ Git push integration
200
+ ```
201
+
202
+ ### Bot (src/bot/) — 30% DONE
203
+
204
+ **Working:**
205
+ ```
206
+ ✅ server.ts — Webhook listener
207
+ ✅ Posts initial comment
208
+ ```
209
+
210
+ **Pending:**
211
+ ```
212
+ ❌ Clone repository
213
+ ❌ Run engine.scan()
214
+ ❌ Update PR comment with results
215
+ ❌ Create auto-fix PR
216
+ ❌ Webhook signature verification
217
+ ```
218
+
219
+ ### MCP (src/mcp/) — 30% DONE
220
+
221
+ **Working:**
222
+ ```
223
+ ✅ server.ts — JSON-RPC listener
224
+ ✅ Tool definitions (scan_repository, get_scan_results, etc.)
225
+ ```
226
+
227
+ **Pending:**
228
+ ```
229
+ ❌ Actual repo cloning
230
+ ❌ Scan execution
231
+ ❌ Patch application
232
+ ❌ Full integration with Claude Desktop
233
+ ```
234
+
235
+ ### CI/CD (.github/workflows/) — 100% DONE
236
+
237
+ ```
238
+ ✅ Workflow runs on every PR
239
+ ✅ Executes scan
240
+ ✅ Uploads SARIF
241
+ ✅ Posts results in PR comment
242
+ ```
243
+
244
+ ---
245
+
246
+ ## QUESTIONS BEFORE I PROCEED ❓
247
+
248
+ Before I start fixing everything, I need to clarify your priorities:
249
+
250
+ ### 1. **Demo Scope**
251
+ - **Option A**: Fix only what's needed for hackathon judging (60% effort)
252
+ - **Option B**: Make everything production-ready (200% effort)
253
+ - **Option C**: Fix critical path + real integrations (120% effort)
254
+
255
+ ### 2. **Sponsor APIs**
256
+ - **Option A**: Keep mocks, just add branding ✅ (DONE)
257
+ - **Option B**: Actually integrate real Daytona/Nosana/Bright Data APIs (8+ hours)
258
+ - **Option C**: Keep mocks but make them more realistic (2 hours)
259
+
260
+ ### 3. **Backend Features**
261
+ - **Priority 1**: Fix patch application (--fix flag actually modifies files)? (Yes/No)
262
+ - **Priority 2**: Implement bot scanning? (Yes/No)
263
+ - **Priority 3**: Add database instead of files? (Yes/No)
264
+ - **Priority 4**: Multi-language support? (Yes/No)
265
+
266
+ ### 4. **Frontend Enhancements**
267
+ - **Priority 1**: Fix hardcoded values (Client ID, CORS)? (Yes/No)
268
+ - **Priority 2**: Add Executive/Technical view toggle? (Yes/No)
269
+ - **Priority 3**: Add WebSocket updates? (Yes/No)
270
+
271
+ ### 5. **Tests**
272
+ - **Option A**: Keep unit tests only
273
+ - **Option B**: Add E2E tests (Playwright) for dashboard
274
+ - **Option C**: Add integration tests for full pipeline
275
+
276
+ ---
277
+
278
+ ## WHAT I RECOMMEND 💡
279
+
280
+ **For Hackathon (Next 2 hours):**
281
+ 1. ✅ Fix patch application (--fix flag)
282
+ 2. ✅ Remove hardcoded values (env vars)
283
+ 3. ✅ Fix bot framework to actually scan
284
+ 4. ✅ Add simple E2E test showing full flow
285
+ 5. ✅ Clean up demo data
286
+
287
+ **For Production (After hackathon):**
288
+ 1. Real Daytona/Nosana/Bright Data integration
289
+ 2. Database instead of files
290
+ 3. JWT authentication
291
+ 4. Multi-language support
292
+ 5. Monitoring/alerting
293
+
294
+ ---
295
+
296
+ ## Please Answer These 5 Questions:
297
+
298
+ 1. **What's your priority: Demo perfect OR All features working?**
299
+ 2. **Do you want real sponsor APIs or keep the mocks?**
300
+ 3. **How much time do you have? (1 hour? 4 hours? Full day?)**
301
+ 4. **Should I focus on backend or frontend first?**
302
+ 5. **Should I add tests or just fix the code?**
303
+
304
+ Once you answer, I'll:
305
+ - Create a detailed fix plan
306
+ - Implement fixes in order of priority
307
+ - Test everything end-to-end
308
+ - Commit and push to main
@@ -0,0 +1,182 @@
1
+ # CodeProbe MVP — Preflight Plan
2
+
3
+ **Status:** Foundation Discovery Complete
4
+ **Build Window:** 5 hours (hackathon)
5
+ **Target Event:** AgentForge SG Super AI Edition, June 2026
6
+
7
+ ---
8
+
9
+ ## Scope
10
+
11
+ ### In Scope (Must Ship)
12
+ - Working CLI (`codeprobe scan`) with live exploit verification
13
+ - Bright Data CVE scraping (real integration)
14
+ - Daytona sandbox spawning + PoC execution
15
+ - Detailed terminal + JSON report output
16
+ - GitHub bot (real OAuth, PR comments + auto-fix PR creation)
17
+ - React dashboard (Technical + Executive views)
18
+ - Business impact messaging ($4.9M breach cost)
19
+
20
+ ### Out of Scope (Nice to Have / Post-Hackathon)
21
+ - MCP server (too risky time-wise; skip for MVP)
22
+ - CI/CD GitHub Action (cut if time < 1 hour remaining)
23
+ - Multi-language support (Node.js only for MVP)
24
+ - Custom PoC upload
25
+ - Historical scan tracking / audit logs
26
+
27
+ ### Demo Day Visible
28
+ - Live CLI scan of demo repo with real Bright Data + Daytona exploit execution
29
+ - 2 confirmed exploitable CVEs demonstrated
30
+ - GitHub bot commenting on a test PR
31
+ - Dashboard showing business impact
32
+
33
+ ---
34
+
35
+ ## Grill-Me Decisions (Locked)
36
+
37
+ | Decision | Choice | Rationale |
38
+ |----------|--------|-----------|
39
+ | **Demo CVEs** | HTTP/2 Rapid Reset (CVE-2023-44487) ONLY (Log4Shell removed — Java/log4j incompatible with Node.js demo repo) | Log4Shell can't work in Node.js repo + requires outbound callbacks incompatible with isolated sandbox. HTTP/2 is DoS, works in isolation, Node.js compatible, public PoCs exist. |
40
+ | **Time Crunch Fallback** | **Revised priority**: CLI + exploit verification first (non-negotiable). Dashboard minimal second. Bot + GitHub OAuth as stretch. MCP + CI cut. | Exploit verification is the only "wow moment" that matters. Everything else is bonus. |
41
+ | **Wow Moment** | Live sandbox exploit execution (real-time PoC success/failure proof) — HTTP/2 DoS verified in isolated container | Differentiates from theoretical scanning; judges see actual vulnerability confirmation with pre-baked patches as fallback. |
42
+ | **GitHub Bot** | Cut from MVP unless time allows (2-3h for OAuth + webhook setup) | Exploit verification alone is sufficient for hackathon. Bot is nice-to-have, not must-have. |
43
+ | **Patch Generation** | Pre-bake patches for demo CVEs into codebase + validate harness for LLM fallback | Zero failure risk on patches. LLM (Nosana/Claude) generation is stretch goal with validation test. |
44
+ | **Dashboard Auth** | GitHub OAuth required (scan results are sensitive security data) | Without auth, anyone with scan URL can view CVE details, PoCs, patches — IDOR vulnerability. Implement simple login. |
45
+
46
+ ---
47
+
48
+ ## Foundations (Nine Technical Locks)
49
+
50
+ | # | Area | Decision | Notes |
51
+ |----|------|----------|-------|
52
+ | 1 | **Schema** | Simple JSON: `{ scan_id, timestamp, repo_url, cves: [{id, severity, exploitable, patch_diff}], risk_score }` | No database (MVP); filesystem storage `~/.codeprobe/scans/` + S3 for dashboard |
53
+ | 2 | **TypeScript** | TypeScript strict mode + shared types across CLI, engine, bot, dashboard | `src/shared/types.ts` for all data contracts |
54
+ | 3 | **Validation** | Zod for runtime schema validation (repo URLs, CVE data, patch diffs) | Zero runtime overhead post-validation; lightweight for Bun |
55
+ | 4 | **Routing** | REST API: POST `/api/scan` (start), GET `/api/scan/:id` (status), GET `/api/results/:id` (full report) | Stateless, simple webhooks for bot |
56
+ | 5 | **Auth** | GitHub OAuth for bot + CLI (store encrypted in `~/.codeprobe/auth.json`). Sponsor API keys as env vars. | No user accounts (MVP). OAuth flow pre-tested. |
57
+ | 6 | **CSS** | TailwindCSS for dashboard React app | Fast, responsive utilities, no build friction |
58
+ | 7 | **UI Framework** | React 18 + Vite for dashboard. Terminal UI (chalk + table-like output) for CLI. | No heavy Terminal UI framework; keep CLI simple |
59
+ | 8 | **Client-Server** | **Streaming** (Server-Sent Events). CLI spawns local scan engine, polls/streams progress via event emitter. | Event-driven; CLI sees real-time: "Scraping...", "Spinning up...", "Exploit running...", "Done." |
60
+ | 9 | **Folders** | Monorepo: `src/cli`, `src/engine`, `src/dashboard`, `src/bot`, `src/shared`. Each is independently testable. | Clear boundaries; minimal cross-module coupling. |
61
+
62
+ ---
63
+
64
+ ## Architecture Overview
65
+
66
+ ```
67
+ CLI (Bun CLI executable)
68
+
69
+ Local Engine (dependency parser, CVE matcher, sandbox orchestrator)
70
+
71
+ Bright Data (async CVE scraping)
72
+
73
+ Daytona (sandbox pool, exploit runner)
74
+
75
+ Nosana LLM or Claude API (patch generation)
76
+
77
+ Report Builder (JSON + formatted output)
78
+
79
+ Dashboard (React, pulls latest scan from S3/local cache)
80
+
81
+ GitHub Bot (webhook handler, PR comments, auto-fix)
82
+ ```
83
+
84
+ ---
85
+
86
+ ## Data Flow
87
+
88
+ 1. **CLI Input**: `codeprobe scan <repo-url-or-local-path>`
89
+ 2. **Dependency Parsing**: Extract versions from `package.json`, `package-lock.json`
90
+ 3. **CVE Scraping**: Bright Data scrapes NVD, Exploit-DB, Snyk (parallel, 30s target)
91
+ 4. **CVE Matching**: Semver matching of dependencies to known CVEs
92
+ 5. **Sandbox Spawning**: Daytona creates isolated containers for CRITICAL CVEs (3 at a time)
93
+ 6. **Exploit Execution**: PoC script runs in sandbox, captures output/filesystem/network
94
+ 7. **Verification**: Exploit succeeded = "Confirmed Exploitable"; failed = "Theoretical Risk"
95
+ 8. **Patch Generation**: Nosana LLM generates code diffs (or pre-baked fallback)
96
+ 9. **Report Output**: JSON saved locally, uploaded to S3, displayed in dashboard + CLI
97
+ 10. **GitHub Bot**: Webhook fetches latest scan, posts PR comment, offers auto-fix PR
98
+
99
+ ---
100
+
101
+ ## MVP Deliverables
102
+
103
+ ### Hour 0 (Prep, before build): Critical Setup
104
+ - [ ] Bun project with TypeScript strict mode + shared types
105
+ - [ ] Provision Bright Data, Daytona, Nosana API keys
106
+ - [ ] Create demo repo with HTTP/2 vulnerable server
107
+ - [ ] Pre-generate + validate patches for demo CVE
108
+ - [ ] Test Daytona sandbox spawn + exploit execution (offline)
109
+ - [ ] Set up GitHub OAuth test app (if dashboard included)
110
+
111
+ ### Hour 1 (0:00–1:00): Core Engine + CLI Bootstrap
112
+ - [ ] Bun project initialized with TypeScript
113
+ - [ ] Dependency parser (Node.js package.json parsing)
114
+ - [ ] Bright Data scraper (test with NVD — fallback to cached JSON if fails)
115
+ - [ ] Daytona sandbox integration (spawn, install, run PoC)
116
+ - [ ] Report builder (JSON schema: scan_id, CVEs, risk_score, patches)
117
+ - [ ] CLI `codeprobe scan` command skeleton
118
+
119
+ ### Hour 2 (1:00–2:00): Orchestration + Exploit Verification
120
+ - [ ] Sandbox orchestrator (single CVE execution, capture output)
121
+ - [ ] Exploit runner (inject PoC script, timeout + retry logic)
122
+ - [ ] Verification logic (exploit succeeded/failed detection)
123
+ - [ ] CLI end-to-end test on demo repo (live Bright Data + Daytona)
124
+ - [ ] Terminal output (colors, progress, results table)
125
+
126
+ ### Hour 3 (2:00–3:00): Validation + Fallbacks
127
+ - [ ] LLM patch generation (pre-baked patches + Nosana/Claude fallback with validation)
128
+ - [ ] Error handling (Bright Data fails → cached CVE data, Daytona crashes → retry)
129
+ - [ ] Config file (`~/.codeprobe/config`, GitHub auth token storage)
130
+ - [ ] `codeprobe scan --fix` branch creation + commit
131
+ - [ ] Full integration test (CLI start-to-finish on demo repo)
132
+
133
+ ### Hour 4+ (3:00–5:00): Dashboard (if time) + Polish
134
+ - **If 4+ hours available**: React dashboard (Technical view only) + GitHub OAuth login
135
+ - [ ] Scan history list + detail view
136
+ - [ ] Risk score display + CVE table
137
+ - [ ] Patch diff viewer
138
+ - [ ] GitHub OAuth integration
139
+ - **Always by 5:00**: Demo rehearsal (3–5 times), record fallback video, final bug fixes
140
+
141
+ ### Stretch Goals (if time > 5h, include ONLY if time safe):
142
+ - [ ] Executive view (business impact messaging)
143
+ - [ ] GitHub bot webhook (PR comments, auto-fix PR)
144
+ - [ ] SARIF output for CI/CD
145
+
146
+ ---
147
+
148
+ ## Risk Assessment (Pre-Preflight)
149
+
150
+ | Risk | Likelihood | Mitigation |
151
+ |------|------------|-----------|
152
+ | Bright Data rate-limited during demo | Medium | Pre-cache CVE data; have offline mode |
153
+ | Daytona sandbox timeout | Low | Retry logic (max 2 retries); mark as "Verification Failed" |
154
+ | Nosana cold start > 60s | Medium | Pre-test; have Claude API fallback ready |
155
+ | GitHub OAuth fails demo day | Low | Test pre-hackathon; have manual token fallback |
156
+ | Patch generation broken | Medium | Pre-generate 2–3 patches for demo CVEs; bake into dashboard |
157
+ | Scope creep / time overrun | High | **Strict cut order: skip MCP → skip CI → skip dashboard polish → keep CLI + bot + exploit** |
158
+
159
+ ---
160
+
161
+ ## Success Criteria
162
+
163
+ **MVP Demo Must Show:**
164
+ 1. Live Bright Data scraping
165
+ 2. Daytona sandbox spawning
166
+ 3. PoC exploit running in sandbox
167
+ 4. Output: 2 CVEs marked "Confirmed Exploitable"
168
+ 5. Patch generated (or shown as example)
169
+ 6. GitHub bot commenting on a PR
170
+ 7. Business impact messaging (judge understands $4.9M value)
171
+
172
+ **Non-negotiable:**
173
+ - Working CLI
174
+ - Real Daytona exploit verification
175
+ - Real GitHub bot (not mock)
176
+ - Business impact clear
177
+
178
+ ---
179
+
180
+ ## Known Unknowns
181
+
182
+ None. All decisions locked. Sponsor API keys provisioned. Ready to preflight agent review.