rogue-live-redteam 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (322) hide show
  1. rogue_live_redteam-1.0.0/.gitignore +237 -0
  2. rogue_live_redteam-1.0.0/LICENSE +21 -0
  3. rogue_live_redteam-1.0.0/PKG-INFO +304 -0
  4. rogue_live_redteam-1.0.0/README.md +256 -0
  5. rogue_live_redteam-1.0.0/pyproject.toml +94 -0
  6. rogue_live_redteam-1.0.0/src/rogue/__init__.py +35 -0
  7. rogue_live_redteam-1.0.0/src/rogue/adapters/__init__.py +38 -0
  8. rogue_live_redteam-1.0.0/src/rogue/adapters/_provider_errors.py +127 -0
  9. rogue_live_redteam-1.0.0/src/rogue/adapters/anthropic.py +221 -0
  10. rogue_live_redteam-1.0.0/src/rogue/adapters/base.py +99 -0
  11. rogue_live_redteam-1.0.0/src/rogue/adapters/custom.py +41 -0
  12. rogue_live_redteam-1.0.0/src/rogue/adapters/gemini.py +255 -0
  13. rogue_live_redteam-1.0.0/src/rogue/adapters/mock.py +143 -0
  14. rogue_live_redteam-1.0.0/src/rogue/adapters/model_specs.py +195 -0
  15. rogue_live_redteam-1.0.0/src/rogue/adapters/openai.py +55 -0
  16. rogue_live_redteam-1.0.0/src/rogue/adapters/openai_compat.py +272 -0
  17. rogue_live_redteam-1.0.0/src/rogue/adapters/openrouter.py +34 -0
  18. rogue_live_redteam-1.0.0/src/rogue/api/__init__.py +1 -0
  19. rogue_live_redteam-1.0.0/src/rogue/api/demo.py +108 -0
  20. rogue_live_redteam-1.0.0/src/rogue/api/main.py +1854 -0
  21. rogue_live_redteam-1.0.0/src/rogue/api/newsletter.py +88 -0
  22. rogue_live_redteam-1.0.0/src/rogue/api/observability.py +162 -0
  23. rogue_live_redteam-1.0.0/src/rogue/api/v1/__init__.py +4 -0
  24. rogue_live_redteam-1.0.0/src/rogue/api/v1/attestation.py +160 -0
  25. rogue_live_redteam-1.0.0/src/rogue/api/v1/deps.py +101 -0
  26. rogue_live_redteam-1.0.0/src/rogue/api/v1/scans.py +287 -0
  27. rogue_live_redteam-1.0.0/src/rogue/api/v1/slack_events.py +109 -0
  28. rogue_live_redteam-1.0.0/src/rogue/api/v1/validate_benchmark.py +158 -0
  29. rogue_live_redteam-1.0.0/src/rogue/attestation/__init__.py +63 -0
  30. rogue_live_redteam-1.0.0/src/rogue/attestation/chain.py +160 -0
  31. rogue_live_redteam-1.0.0/src/rogue/attestation/emit.py +182 -0
  32. rogue_live_redteam-1.0.0/src/rogue/attestation/remediation.py +156 -0
  33. rogue_live_redteam-1.0.0/src/rogue/attestation/replay.py +107 -0
  34. rogue_live_redteam-1.0.0/src/rogue/attestation/schemas.py +83 -0
  35. rogue_live_redteam-1.0.0/src/rogue/attestation/service.py +255 -0
  36. rogue_live_redteam-1.0.0/src/rogue/benchmark.py +131 -0
  37. rogue_live_redteam-1.0.0/src/rogue/cli.py +299 -0
  38. rogue_live_redteam-1.0.0/src/rogue/client.py +194 -0
  39. rogue_live_redteam-1.0.0/src/rogue/config.py +174 -0
  40. rogue_live_redteam-1.0.0/src/rogue/core/ARCHITECTURE.md +194 -0
  41. rogue_live_redteam-1.0.0/src/rogue/core/__init__.py +71 -0
  42. rogue_live_redteam-1.0.0/src/rogue/core/attachment.py +116 -0
  43. rogue_live_redteam-1.0.0/src/rogue/core/capabilities.py +76 -0
  44. rogue_live_redteam-1.0.0/src/rogue/core/conformance.py +292 -0
  45. rogue_live_redteam-1.0.0/src/rogue/core/content_blocks.py +112 -0
  46. rogue_live_redteam-1.0.0/src/rogue/core/errors.py +136 -0
  47. rogue_live_redteam-1.0.0/src/rogue/core/invocation.py +110 -0
  48. rogue_live_redteam-1.0.0/src/rogue/core/message.py +88 -0
  49. rogue_live_redteam-1.0.0/src/rogue/core/registry.py +84 -0
  50. rogue_live_redteam-1.0.0/src/rogue/data/benchmark/advbench_100.jsonl +40 -0
  51. rogue_live_redteam-1.0.0/src/rogue/data/benchmark/jbb_100.jsonl +40 -0
  52. rogue_live_redteam-1.0.0/src/rogue/db/__init__.py +1 -0
  53. rogue_live_redteam-1.0.0/src/rogue/db/bandit_state.py +30 -0
  54. rogue_live_redteam-1.0.0/src/rogue/db/image_cache.py +144 -0
  55. rogue_live_redteam-1.0.0/src/rogue/db/migrations/README +1 -0
  56. rogue_live_redteam-1.0.0/src/rogue/db/migrations/env.py +53 -0
  57. rogue_live_redteam-1.0.0/src/rogue/db/migrations/script.py.mako +28 -0
  58. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0001_initial_schema.py +254 -0
  59. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0002_breach_matrix_view.py +70 -0
  60. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0003_align_bright_data_cost_log.py +139 -0
  61. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0004_add_multi_turn_persona_chain_family.py +57 -0
  62. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0005_add_persona_used_to_breach_results.py +63 -0
  63. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0006_add_synthesized_and_slot_requirements.py +115 -0
  64. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0007_add_pair_refinement_steps.py +128 -0
  65. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0008_add_breach_matrix_materialized.py +105 -0
  66. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0009_breach_matrix_baseline_only.py +95 -0
  67. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0010_add_bandit_state.py +39 -0
  68. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0011_add_fetch_cache.py +60 -0
  69. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0012_add_primitive_images.py +51 -0
  70. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0013_add_attack_strategies.py +81 -0
  71. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0014_strategy_lifecycle.py +144 -0
  72. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0015_add_renderer_capabilities.py +96 -0
  73. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0016_renderer_ladder_strategies.py +34 -0
  74. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0017_add_ladder_attempts.py +67 -0
  75. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0018_split_valid_trials.py +49 -0
  76. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0019_add_ladder_rotation_membership.py +57 -0
  77. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0020_add_technique_provenance.py +42 -0
  78. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0021_add_benchmark_runs.py +60 -0
  79. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0022_platform_tables.py +135 -0
  80. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0023_secrets.py +34 -0
  81. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0024_integrations.py +38 -0
  82. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0025_adaptive_prioritization_columns.py +59 -0
  83. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0026_technique_retrieval_tables.py +101 -0
  84. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0027_primitive_grammar_labels.py +87 -0
  85. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0028_demo_requests.py +48 -0
  86. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0029_newsletter_subscribers.py +50 -0
  87. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0030_reconcile_constraints.py +134 -0
  88. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0031_attestation_entries.py +102 -0
  89. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0032_mitigations.py +82 -0
  90. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0033_slack_registered_agents.py +47 -0
  91. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0034_slack_snapshot_and_policy.py +51 -0
  92. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0035_slack_target_api_key.py +30 -0
  93. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0036_oversight_tables.py +95 -0
  94. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0037_skill_pool.py +166 -0
  95. rogue_live_redteam-1.0.0/src/rogue/db/migrations/versions/0038_add_exfil_method_to_breach_results.py +61 -0
  96. rogue_live_redteam-1.0.0/src/rogue/db/models.py +1278 -0
  97. rogue_live_redteam-1.0.0/src/rogue/db/neon_sync.py +197 -0
  98. rogue_live_redteam-1.0.0/src/rogue/dedupe/__init__.py +24 -0
  99. rogue_live_redteam-1.0.0/src/rogue/dedupe/embeddings.py +158 -0
  100. rogue_live_redteam-1.0.0/src/rogue/dedupe/function_word_divergence.py +126 -0
  101. rogue_live_redteam-1.0.0/src/rogue/dedupe/quarantine.py +65 -0
  102. rogue_live_redteam-1.0.0/src/rogue/diff/__init__.py +1 -0
  103. rogue_live_redteam-1.0.0/src/rogue/diff/bootstrap.py +103 -0
  104. rogue_live_redteam-1.0.0/src/rogue/diff/threat_brief.py +733 -0
  105. rogue_live_redteam-1.0.0/src/rogue/extract/__init__.py +1 -0
  106. rogue_live_redteam-1.0.0/src/rogue/extract/extraction_agent.py +1038 -0
  107. rogue_live_redteam-1.0.0/src/rogue/extract/prompts/extraction_v1.md +270 -0
  108. rogue_live_redteam-1.0.0/src/rogue/extract/prompts/extraction_v2.md +367 -0
  109. rogue_live_redteam-1.0.0/src/rogue/extract/prompts/extraction_v3.md +456 -0
  110. rogue_live_redteam-1.0.0/src/rogue/extract/prompts/extraction_v4.md +511 -0
  111. rogue_live_redteam-1.0.0/src/rogue/governance/__init__.py +86 -0
  112. rogue_live_redteam-1.0.0/src/rogue/governance/assurance.py +483 -0
  113. rogue_live_redteam-1.0.0/src/rogue/governance/coverage.py +231 -0
  114. rogue_live_redteam-1.0.0/src/rogue/governance/decompose.py +258 -0
  115. rogue_live_redteam-1.0.0/src/rogue/governance/reaim.py +196 -0
  116. rogue_live_redteam-1.0.0/src/rogue/governance/report.py +111 -0
  117. rogue_live_redteam-1.0.0/src/rogue/governance/rule_judge.py +301 -0
  118. rogue_live_redteam-1.0.0/src/rogue/governance/scan_runner.py +148 -0
  119. rogue_live_redteam-1.0.0/src/rogue/grammar/__init__.py +21 -0
  120. rogue_live_redteam-1.0.0/src/rogue/grammar/combinations.py +398 -0
  121. rogue_live_redteam-1.0.0/src/rogue/grammar/dataset.py +292 -0
  122. rogue_live_redteam-1.0.0/src/rogue/grammar/labeler.py +299 -0
  123. rogue_live_redteam-1.0.0/src/rogue/grammar/stats.py +300 -0
  124. rogue_live_redteam-1.0.0/src/rogue/grammar/validation.py +807 -0
  125. rogue_live_redteam-1.0.0/src/rogue/harvest/__init__.py +1 -0
  126. rogue_live_redteam-1.0.0/src/rogue/harvest/bandit.py +279 -0
  127. rogue_live_redteam-1.0.0/src/rogue/harvest/bandit_attribution.py +162 -0
  128. rogue_live_redteam-1.0.0/src/rogue/harvest/bandit_serp_phase.py +326 -0
  129. rogue_live_redteam-1.0.0/src/rogue/harvest/bright_data_client.py +1369 -0
  130. rogue_live_redteam-1.0.0/src/rogue/harvest/discovery_agent.py +634 -0
  131. rogue_live_redteam-1.0.0/src/rogue/harvest/fetch_cache.py +134 -0
  132. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/__init__.py +29 -0
  133. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/base.py +162 -0
  134. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/brightdata.py +141 -0
  135. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/capabilities.py +50 -0
  136. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/conformance.py +183 -0
  137. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/crawl4ai.py +282 -0
  138. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/ddg.py +406 -0
  139. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/direct.py +260 -0
  140. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/firecrawl.py +295 -0
  141. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/hf_api.py +307 -0
  142. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/playwright.py +200 -0
  143. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/reddit_oauth.py +439 -0
  144. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/registry.py +216 -0
  145. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/routing.py +114 -0
  146. rogue_live_redteam-1.0.0/src/rogue/harvest/fetchers/x_besteffort.py +404 -0
  147. rogue_live_redteam-1.0.0/src/rogue/harvest/link_extract.py +176 -0
  148. rogue_live_redteam-1.0.0/src/rogue/harvest/link_follow_phase.py +219 -0
  149. rogue_live_redteam-1.0.0/src/rogue/harvest/media_extract.py +254 -0
  150. rogue_live_redteam-1.0.0/src/rogue/harvest/media_fetch.py +198 -0
  151. rogue_live_redteam-1.0.0/src/rogue/harvest/media_ingest.py +294 -0
  152. rogue_live_redteam-1.0.0/src/rogue/harvest/media_pdf.py +126 -0
  153. rogue_live_redteam-1.0.0/src/rogue/harvest/source_date.py +82 -0
  154. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/__init__.py +29 -0
  155. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/arxiv_listing.py +193 -0
  156. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/base.py +128 -0
  157. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/blog_static.py +198 -0
  158. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/community_archive.py +170 -0
  159. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/github_search.py +377 -0
  160. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/huggingface_discussion.py +193 -0
  161. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/leakhub_scrape.py +291 -0
  162. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/obliteratus_hf.py +169 -0
  163. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/pliny_github.py +454 -0
  164. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/reddit_subreddit.py +252 -0
  165. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/x_user_timeline.py +143 -0
  166. rogue_live_redteam-1.0.0/src/rogue/harvest/sources/x_via_unlocker.py +128 -0
  167. rogue_live_redteam-1.0.0/src/rogue/harvest/x_status.py +56 -0
  168. rogue_live_redteam-1.0.0/src/rogue/instrument/__init__.py +49 -0
  169. rogue_live_redteam-1.0.0/src/rogue/instrument/model.py +311 -0
  170. rogue_live_redteam-1.0.0/src/rogue/instrument/project.py +104 -0
  171. rogue_live_redteam-1.0.0/src/rogue/integrations/__init__.py +9 -0
  172. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/__init__.py +74 -0
  173. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/agent_store.py +274 -0
  174. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/change_witness.py +277 -0
  175. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/delivery.py +122 -0
  176. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/diff_post.py +241 -0
  177. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/harvest_hook.py +82 -0
  178. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/inbound.py +99 -0
  179. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/policy.py +106 -0
  180. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/redline_guard.py +341 -0
  181. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/registration.py +220 -0
  182. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/signing.py +70 -0
  183. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/trigger.py +147 -0
  184. rogue_live_redteam-1.0.0/src/rogue/integrations/slack/tripwire.py +304 -0
  185. rogue_live_redteam-1.0.0/src/rogue/mcp_server/__init__.py +1 -0
  186. rogue_live_redteam-1.0.0/src/rogue/mcp_server/scan_tools.py +714 -0
  187. rogue_live_redteam-1.0.0/src/rogue/mcp_server/server.py +671 -0
  188. rogue_live_redteam-1.0.0/src/rogue/mcp_server/slack_tools.py +312 -0
  189. rogue_live_redteam-1.0.0/src/rogue/memory/ARCHITECTURE.md +54 -0
  190. rogue_live_redteam-1.0.0/src/rogue/memory/__init__.py +33 -0
  191. rogue_live_redteam-1.0.0/src/rogue/memory/attestation.py +336 -0
  192. rogue_live_redteam-1.0.0/src/rogue/memory/cohorts.py +196 -0
  193. rogue_live_redteam-1.0.0/src/rogue/memory/combination.py +845 -0
  194. rogue_live_redteam-1.0.0/src/rogue/memory/economics.py +267 -0
  195. rogue_live_redteam-1.0.0/src/rogue/memory/judges.py +588 -0
  196. rogue_live_redteam-1.0.0/src/rogue/memory/leakage.py +678 -0
  197. rogue_live_redteam-1.0.0/src/rogue/memory/pool.py +388 -0
  198. rogue_live_redteam-1.0.0/src/rogue/memory/promotion.py +404 -0
  199. rogue_live_redteam-1.0.0/src/rogue/memory/reverification.py +156 -0
  200. rogue_live_redteam-1.0.0/src/rogue/notify.py +52 -0
  201. rogue_live_redteam-1.0.0/src/rogue/oversight/ARCHITECTURE.md +33 -0
  202. rogue_live_redteam-1.0.0/src/rogue/oversight/__init__.py +4 -0
  203. rogue_live_redteam-1.0.0/src/rogue/oversight/attestation.py +233 -0
  204. rogue_live_redteam-1.0.0/src/rogue/oversight/case_corpus.py +205 -0
  205. rogue_live_redteam-1.0.0/src/rogue/oversight/cockpit.py +161 -0
  206. rogue_live_redteam-1.0.0/src/rogue/oversight/decider.py +362 -0
  207. rogue_live_redteam-1.0.0/src/rogue/oversight/disposition_judge.py +109 -0
  208. rogue_live_redteam-1.0.0/src/rogue/oversight/independence_lint.py +259 -0
  209. rogue_live_redteam-1.0.0/src/rogue/oversight/scorer.py +153 -0
  210. rogue_live_redteam-1.0.0/src/rogue/packs/README.md +9 -0
  211. rogue_live_redteam-1.0.0/src/rogue/packs/__init__.py +66 -0
  212. rogue_live_redteam-1.0.0/src/rogue/packs/aggressive.json +872 -0
  213. rogue_live_redteam-1.0.0/src/rogue/packs/compliance.json +507 -0
  214. rogue_live_redteam-1.0.0/src/rogue/packs/default.json +386 -0
  215. rogue_live_redteam-1.0.0/src/rogue/platform/__init__.py +33 -0
  216. rogue_live_redteam-1.0.0/src/rogue/platform/benchmark_service.py +124 -0
  217. rogue_live_redteam-1.0.0/src/rogue/platform/engine.py +531 -0
  218. rogue_live_redteam-1.0.0/src/rogue/platform/integration_store.py +132 -0
  219. rogue_live_redteam-1.0.0/src/rogue/platform/integrations/__init__.py +35 -0
  220. rogue_live_redteam-1.0.0/src/rogue/platform/integrations/dispatcher.py +132 -0
  221. rogue_live_redteam-1.0.0/src/rogue/platform/integrations/jira.py +252 -0
  222. rogue_live_redteam-1.0.0/src/rogue/platform/integrations/slack.py +103 -0
  223. rogue_live_redteam-1.0.0/src/rogue/platform/interfaces.py +140 -0
  224. rogue_live_redteam-1.0.0/src/rogue/platform/memory.py +113 -0
  225. rogue_live_redteam-1.0.0/src/rogue/platform/models.py +330 -0
  226. rogue_live_redteam-1.0.0/src/rogue/platform/queue.py +164 -0
  227. rogue_live_redteam-1.0.0/src/rogue/platform/repertoire.py +106 -0
  228. rogue_live_redteam-1.0.0/src/rogue/platform/report_service.py +615 -0
  229. rogue_live_redteam-1.0.0/src/rogue/platform/scan_service.py +151 -0
  230. rogue_live_redteam-1.0.0/src/rogue/platform/schemas.py +115 -0
  231. rogue_live_redteam-1.0.0/src/rogue/platform/scoring.py +41 -0
  232. rogue_live_redteam-1.0.0/src/rogue/platform/secrets.py +114 -0
  233. rogue_live_redteam-1.0.0/src/rogue/platform/snapshot_store.py +122 -0
  234. rogue_live_redteam-1.0.0/src/rogue/platform/store.py +216 -0
  235. rogue_live_redteam-1.0.0/src/rogue/platform/tenancy.py +255 -0
  236. rogue_live_redteam-1.0.0/src/rogue/platform/worker.py +314 -0
  237. rogue_live_redteam-1.0.0/src/rogue/remediation/__init__.py +51 -0
  238. rogue_live_redteam-1.0.0/src/rogue/remediation/generate.py +485 -0
  239. rogue_live_redteam-1.0.0/src/rogue/remediation/legit_corpus.py +54 -0
  240. rogue_live_redteam-1.0.0/src/rogue/remediation/loop.py +171 -0
  241. rogue_live_redteam-1.0.0/src/rogue/remediation/over_block_judge.py +57 -0
  242. rogue_live_redteam-1.0.0/src/rogue/remediation/report.py +61 -0
  243. rogue_live_redteam-1.0.0/src/rogue/remediation/retest.py +425 -0
  244. rogue_live_redteam-1.0.0/src/rogue/report.py +831 -0
  245. rogue_live_redteam-1.0.0/src/rogue/reproduce/__init__.py +1 -0
  246. rogue_live_redteam-1.0.0/src/rogue/reproduce/arms_strategies.py +232 -0
  247. rogue_live_redteam-1.0.0/src/rogue/reproduce/calibration/__init__.py +48 -0
  248. rogue_live_redteam-1.0.0/src/rogue/reproduce/calibration/binary_report.py +233 -0
  249. rogue_live_redteam-1.0.0/src/rogue/reproduce/calibration/bootstrap.py +104 -0
  250. rogue_live_redteam-1.0.0/src/rogue/reproduce/calibration_adapters.py +135 -0
  251. rogue_live_redteam-1.0.0/src/rogue/reproduce/calibration_sampling.py +172 -0
  252. rogue_live_redteam-1.0.0/src/rogue/reproduce/coj.py +162 -0
  253. rogue_live_redteam-1.0.0/src/rogue/reproduce/endpoint_scan.py +195 -0
  254. rogue_live_redteam-1.0.0/src/rogue/reproduce/escalation_ladder.py +1568 -0
  255. rogue_live_redteam-1.0.0/src/rogue/reproduce/escalation_planner.py +915 -0
  256. rogue_live_redteam-1.0.0/src/rogue/reproduce/growth_scheduler.py +145 -0
  257. rogue_live_redteam-1.0.0/src/rogue/reproduce/instantiator.py +774 -0
  258. rogue_live_redteam-1.0.0/src/rogue/reproduce/iterative_attacker.py +697 -0
  259. rogue_live_redteam-1.0.0/src/rogue/reproduce/jbb_eval.py +187 -0
  260. rogue_live_redteam-1.0.0/src/rogue/reproduce/judge.py +929 -0
  261. rogue_live_redteam-1.0.0/src/rogue/reproduce/judge_batch.py +201 -0
  262. rogue_live_redteam-1.0.0/src/rogue/reproduce/judge_calibration.py +296 -0
  263. rogue_live_redteam-1.0.0/src/rogue/reproduce/ladder_priors.py +736 -0
  264. rogue_live_redteam-1.0.0/src/rogue/reproduce/llm_cost_log.py +193 -0
  265. rogue_live_redteam-1.0.0/src/rogue/reproduce/modality_renderers/__init__.py +44 -0
  266. rogue_live_redteam-1.0.0/src/rogue/reproduce/modality_renderers/audio_styles.py +97 -0
  267. rogue_live_redteam-1.0.0/src/rogue/reproduce/modality_renderers/audio_tts.py +81 -0
  268. rogue_live_redteam-1.0.0/src/rogue/reproduce/modality_renderers/exif.py +65 -0
  269. rogue_live_redteam-1.0.0/src/rogue/reproduce/modality_renderers/mml.py +252 -0
  270. rogue_live_redteam-1.0.0/src/rogue/reproduce/modality_renderers/ocr.py +46 -0
  271. rogue_live_redteam-1.0.0/src/rogue/reproduce/modality_renderers/polyjailbreak.py +157 -0
  272. rogue_live_redteam-1.0.0/src/rogue/reproduce/modality_renderers/typographic.py +147 -0
  273. rogue_live_redteam-1.0.0/src/rogue/reproduce/modality_renderers/vpi.py +299 -0
  274. rogue_live_redteam-1.0.0/src/rogue/reproduce/pair_orchestrator.py +418 -0
  275. rogue_live_redteam-1.0.0/src/rogue/reproduce/persona_wrap.py +458 -0
  276. rogue_live_redteam-1.0.0/src/rogue/reproduce/prompts/fabricated_sensitive_value_v1.md +326 -0
  277. rogue_live_redteam-1.0.0/src/rogue/reproduce/prompts/fabricated_sensitive_value_v2.md +512 -0
  278. rogue_live_redteam-1.0.0/src/rogue/reproduce/prompts/infodisc_v1.md +274 -0
  279. rogue_live_redteam-1.0.0/src/rogue/reproduce/prompts/infodisc_v2.md +366 -0
  280. rogue_live_redteam-1.0.0/src/rogue/reproduce/prompts/judge_v1.md +157 -0
  281. rogue_live_redteam-1.0.0/src/rogue/reproduce/prompts/judge_v2.md +202 -0
  282. rogue_live_redteam-1.0.0/src/rogue/reproduce/prompts/judge_v3.md +260 -0
  283. rogue_live_redteam-1.0.0/src/rogue/reproduce/prompts/unauthorized_action_v1.md +295 -0
  284. rogue_live_redteam-1.0.0/src/rogue/reproduce/prompts/unauthorized_action_v2.md +348 -0
  285. rogue_live_redteam-1.0.0/src/rogue/reproduce/prompts/unauthorized_action_v3.md +457 -0
  286. rogue_live_redteam-1.0.0/src/rogue/reproduce/renderer_registry.py +376 -0
  287. rogue_live_redteam-1.0.0/src/rogue/reproduce/rubrics/__init__.py +122 -0
  288. rogue_live_redteam-1.0.0/src/rogue/reproduce/rubrics/context.py +47 -0
  289. rogue_live_redteam-1.0.0/src/rogue/reproduce/rubrics/label_set.py +261 -0
  290. rogue_live_redteam-1.0.0/src/rogue/reproduce/slot_fill.py +186 -0
  291. rogue_live_redteam-1.0.0/src/rogue/reproduce/strategy_library.py +226 -0
  292. rogue_live_redteam-1.0.0/src/rogue/reproduce/strategy_lifecycle.py +600 -0
  293. rogue_live_redteam-1.0.0/src/rogue/reproduce/strategy_templates.py +173 -0
  294. rogue_live_redteam-1.0.0/src/rogue/reproduce/strongreject_grader.py +418 -0
  295. rogue_live_redteam-1.0.0/src/rogue/reproduce/structured_data.py +160 -0
  296. rogue_live_redteam-1.0.0/src/rogue/reproduce/syntactic_mutation.py +433 -0
  297. rogue_live_redteam-1.0.0/src/rogue/reproduce/target_panel.py +314 -0
  298. rogue_live_redteam-1.0.0/src/rogue/reproduce/verdict_projection.py +107 -0
  299. rogue_live_redteam-1.0.0/src/rogue/reproduce/wildguard_eval.py +350 -0
  300. rogue_live_redteam-1.0.0/src/rogue/retrieval/__init__.py +84 -0
  301. rogue_live_redteam-1.0.0/src/rogue/retrieval/embed.py +103 -0
  302. rogue_live_redteam-1.0.0/src/rogue/retrieval/embedding_text.py +110 -0
  303. rogue_live_redteam-1.0.0/src/rogue/retrieval/evaluation.py +495 -0
  304. rogue_live_redteam-1.0.0/src/rogue/retrieval/retriever.py +116 -0
  305. rogue_live_redteam-1.0.0/src/rogue/retrieval/target_fingerprint.py +189 -0
  306. rogue_live_redteam-1.0.0/src/rogue/retrieval/technique_profile_builder.py +369 -0
  307. rogue_live_redteam-1.0.0/src/rogue/scan.py +127 -0
  308. rogue_live_redteam-1.0.0/src/rogue/schemas/__init__.py +115 -0
  309. rogue_live_redteam-1.0.0/src/rogue/schemas/attack_primitive.py +319 -0
  310. rogue_live_redteam-1.0.0/src/rogue/schemas/breach_result.py +147 -0
  311. rogue_live_redteam-1.0.0/src/rogue/schemas/deployment_config.py +138 -0
  312. rogue_live_redteam-1.0.0/src/rogue/schemas/governance.py +140 -0
  313. rogue_live_redteam-1.0.0/src/rogue/schemas/grammar_node.py +429 -0
  314. rogue_live_redteam-1.0.0/src/rogue/schemas/raw_document.py +112 -0
  315. rogue_live_redteam-1.0.0/src/rogue/schemas/remediation.py +100 -0
  316. rogue_live_redteam-1.0.0/src/rogue/schemas/renderer_manifest.py +140 -0
  317. rogue_live_redteam-1.0.0/src/rogue/schemas/source_provenance.py +67 -0
  318. rogue_live_redteam-1.0.0/src/rogue/schemas/target_fingerprint.py +60 -0
  319. rogue_live_redteam-1.0.0/src/rogue/schemas/technique_profile.py +90 -0
  320. rogue_live_redteam-1.0.0/src/rogue/schemas/technique_spec.py +160 -0
  321. rogue_live_redteam-1.0.0/src/rogue/taxonomy/__init__.py +22 -0
  322. rogue_live_redteam-1.0.0/src/rogue/taxonomy/crosswalk.py +353 -0
@@ -0,0 +1,237 @@
1
+ # secrets
2
+ .env
3
+ .env.local
4
+ .env.*.local
5
+ .neon_url.local
6
+ .env.bak*
7
+
8
+ # python
9
+ __pycache__/
10
+ *.py[cod]
11
+ *$py.class
12
+ *.so
13
+ .Python
14
+ venv/
15
+ .venv/
16
+ env/
17
+ .pytest_cache/
18
+ .mypy_cache/
19
+ .ruff_cache/
20
+ .coverage
21
+ .coverage.*
22
+ coverage.xml
23
+ htmlcov/
24
+
25
+ # uv / poetry caches (lockfiles ARE committed)
26
+ .uv-cache/
27
+ .poetry-cache/
28
+
29
+ # node / next.js
30
+ node_modules/
31
+ .next/
32
+ out/
33
+ .vercel/
34
+ *.tsbuildinfo
35
+
36
+ # db / data
37
+ postgres-data/
38
+ pgdata/
39
+ *.db
40
+ *.sqlite
41
+ *.sqlite3
42
+ dump.sql
43
+
44
+ # editor / OS
45
+ .DS_Store
46
+ .idea/
47
+ .vscode/
48
+ *.swp
49
+ *.swo
50
+
51
+ # bright data / llm cost logs (regenerated per run)
52
+ bright_data_cost_log.csv
53
+ llm_cost_log.csv
54
+ *.log
55
+
56
+ # build artifacts
57
+ dist/
58
+ build/
59
+ *.egg-info/
60
+
61
+ # runtime-generated state (bandit arm stats, dataset exports, threat-brief outputs)
62
+ # Tracked at the directory level via tests/fixtures/.gitkeep — actual contents stay local.
63
+ data/*
64
+ !data/.gitkeep
65
+ # Bandit arm stats — harmless (no secrets/PII); published so /api/bandit/stats
66
+ # renders on a fresh clone + the Render build can COPY it.
67
+ !data/discovery_bandit.json
68
+ # Per-breach-type judge calibration SUMMARIES — Surface-1 RedlineGuard + the per-rule judge read
69
+ # these at runtime (data/calibration/<breach_type>_report.json) to report REAL precision; without
70
+ # them prod degrades to "uncalibrated". Small summary stats only (precision/recall/agreement CIs +
71
+ # gate + one summary line) — NO eval cases / transcripts / PII. Eval corpora, kappa worksheets,
72
+ # benchmark reports (over_block/strongreject/wildguard), and backups stay local.
73
+ !data/calibration/
74
+ data/calibration/*
75
+ !data/calibration/information_disclosure_report.json
76
+ !data/calibration/unauthorized_action_report.json
77
+ !data/calibration/fabricated_sensitive_value_report.json
78
+ # P2 external-benchmark result reports (back the JBB / WildGuard / StrongREJECT numbers)
79
+ !data/calibration/jbb_judge_report_v3.json
80
+ !data/calibration/wildguard_report.json
81
+ !data/calibration/wildguard_report_harmful.json
82
+ !data/calibration/strongreject_report.json
83
+ # Released derived results behind the papers (derived only — see RESPONSIBLE_RELEASE.md)
84
+ !data/research/
85
+ data/research/*
86
+ !data/research/reproducibility_gap_results.json
87
+ !data/research/reextracted_claims.json
88
+ !data/research/reproducibility_gap_pairs.csv
89
+ !data/research/coverage_validity_results.json
90
+ !data/research/scheduler_results.json
91
+ !data/research/skill_leak_curve_2026-06-13_REDO.log
92
+ !data/research/skill_leak_curve_2026-06-13_DIAGNOSIS.md
93
+
94
+ # LeakHub session capture — Playwright storage_state JSON. Effectively your
95
+ # signed-in session; never commit. See scripts/_capture_leakhub_storage.py.
96
+ leakhub_storage_state.json
97
+
98
+ # notebooks / scratch
99
+ .ipynb_checkpoints/
100
+ scratch/
101
+ tmp/
102
+
103
+ # vendor docs archive — local reference only, not for committing
104
+ website/
105
+
106
+ # research-paper code archive — same pattern as website/ above.
107
+ # Holds shallow clones of PAIR / Crescendo / AutoDAN / PAP reference repos
108
+ # per ROGUE_PLAN.md §10.7 implementation checklist. Research code is not
109
+ # licensed for redistribution in our repo; canonical sources stay on GitHub.
110
+ papers/
111
+
112
+ # Local-only agent instructions
113
+ CLAUDE.md
114
+
115
+ mani.py
116
+
117
+ # AI session files (local-only)
118
+ frontend/AGENTS.md
119
+
120
+ # local test creds + demo flow (plaintext throwaway key — kept out of git history)
121
+ TESTING.md
122
+ frontend/CLAUDE.md
123
+ tasks/
124
+
125
+ # Internal planning / notes — not for the public repo
126
+ ROGUE_PLAN.md
127
+ glossary.md
128
+ answers.md
129
+ new_methods.md
130
+ assets/deck_slide_lines.md
131
+ # rogue v2 spec — internal planning, local-only
132
+ docs/v2/
133
+ # Moat-building method notes (SERP queries, BD harvest recipe, harvest scripts) —
134
+ # the corpus DATA stays committed (tests/runtime load it), but the prose that
135
+ # documents HOW it was harvested is internal-only. Nothing in code/CI reads these.
136
+ tests/fixtures/memory/HARVEST_NOTES.md
137
+ tests/fixtures/oversight/HARVEST_NOTES.md
138
+
139
+ # Dev-scratch scripts — not part of the runnable project
140
+ scripts/_*
141
+ # Unpublished paper teaser figure generator (the sibling research fig scripts are tracked)
142
+ scripts/research/p3_teaser.py
143
+
144
+ # Stray root npm files (accidental `npm install react-markdown` at repo root;
145
+ # real frontend deps live in frontend/package.json)
146
+ /package.json
147
+ /package-lock.json
148
+
149
+ # Internal marketing/positioning notes — not part of the public repo
150
+ /marketing_claims.md
151
+ /monetization_venues.md
152
+ # Internal go-to-market + demo-production playbook (trailer/filming notes,
153
+ # creative briefs, sitemap, positioning) — production process, not product.
154
+ docs/marketing/
155
+ # Founder's internal outbound/sales index (cold-email skeleton, prospect list,
156
+ # pre-send checklist) — never public.
157
+ docs/outbound_package.md
158
+
159
+ # Personal / non-product files (must never reach the auto-deploying repo)
160
+ *_CV.md
161
+ skills-lock.json
162
+ .agents/
163
+ startup/
164
+ video/
165
+ # Content-marketing / demo-video tooling — not part of the red-team product
166
+ scripts/youtube_research.py
167
+ scripts/assemble_video.sh
168
+ scripts/build_trailer_clips.sh
169
+ scripts/build_trailer_cut.sh
170
+ scripts/make_trailer_captions.py
171
+ scripts/trailer/
172
+ assets/trailer/
173
+
174
+ # Local working artifacts + review UIs — not for the public repo
175
+ .claude/
176
+ /19_leakage_labels.json
177
+ /19_net_effect_labels.json
178
+ /oversight_decisions.json
179
+ /leakage_label.html
180
+ /net_effect_label.html
181
+ /oversight_review.html
182
+
183
+ # Local-only research/working docs (WIP; not for the public repo)
184
+ docs/research/adaptive_orchestration_systems.md
185
+ docs/research/scheduler_allocation_study.md
186
+ docs/research/adaptive_orchestration_paper.md
187
+ docs/research/paper_figures.md
188
+ docs/research/figs/
189
+ scripts/paper_figs.py
190
+ scripts/export_paper_data.py
191
+ docs/research/RESEARCH_TODO.md
192
+ # arXiv/workshop submission packages — papers live on arXiv; the repo links to
193
+ # them (see PAPERS.md) rather than shipping the LaTeX sources + internal plan.
194
+ docs/research/publishing/
195
+ # contingent roadmap (build-if-trigger), not implemented system
196
+ docs/3b_v2_renderer_design.md
197
+ .vercel
198
+ .env*
199
+ # ...but .env.example files ARE tracked (documentation, no secrets).
200
+ !.env.example
201
+ !frontend/.env.example
202
+
203
+ # Demo/video working media — large; excluded from deploy via .vercelignore, kept local.
204
+ assets/*.mp4
205
+ assets/*.mp3
206
+ assets/*.pdf
207
+ assets/captures/
208
+ assets/music/
209
+ assets/rogue-*.png
210
+ assets/0531.mp4
211
+
212
+ # Surface-2 oversight: the harvested-page provenance CACHE is bulky (~5.7M) + reproducible from
213
+ # source_refs — keep it local. The answer-key corpus + notes + the lightweight index ARE committed
214
+ # (the corpus is the moat; the CI lint + tests load it).
215
+ tests/fixtures/oversight/_raw/*
216
+ !tests/fixtures/oversight/_raw/_index.json
217
+
218
+ # Surface-3 skill-pool harvested-page provenance CACHE — bulky + reproducible from source_refs; local.
219
+ # The skill pool + canary ground-truth + manifest ARE committed (the moat; tests/lint load them).
220
+ tests/fixtures/memory/_raw/*
221
+ !tests/fixtures/memory/_raw/_index.json
222
+
223
+ # Surface-3 held-out-task harvested-page cache — bulky + reproducible; local. Tasks + manifest committed.
224
+ tests/fixtures/memory/_raw_tasks/*
225
+ !tests/fixtures/memory/_raw_tasks/_index.json
226
+
227
+ # §08 judge-calibration labelable case fixtures — committed so a labeler's labels tie to a fixed set
228
+ # (the harvester's Groq capture is non-deterministic and can't be regenerated identically).
229
+ !data/calibration/leakage_label_cases.json
230
+ !data/calibration/net_effect_label_cases.json
231
+ !data/calibration/net_effect_synthetic_cases.json
232
+ !data/calibration/net_effect_report.json
233
+
234
+ # Brand/marketing process docs (map the private docs/marketing moat) — local only
235
+ PIPELINE_WEBSITE_TO_BRAND.txt
236
+ LESSONS_PROMO_VIDEO_AND_HIGGSFIELD.txt
237
+ PIPELINE_MASTER_PRODUCT_TO_LAUNCH.txt
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Soren Obounou Nguia
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,304 @@
1
+ Metadata-Version: 2.4
2
+ Name: rogue-live-redteam
3
+ Version: 1.0.0
4
+ Summary: Continuous open-web LLM red-team: harvests live jailbreaks from 15+ open-web sources, reproduces them against your model x system-prompt x tools, and serves results over its own MCP server.
5
+ Project-URL: Homepage, https://rogue-eosin.vercel.app
6
+ Project-URL: Repository, https://github.com/nguiaSoren/ROGUE
7
+ Author: Soren Obounou Nguia
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: ai-safety,bright-data,jailbreak,llm,mcp,prompt-injection,red-team,security
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Topic :: Security
17
+ Requires-Python: <3.12,>=3.11
18
+ Requires-Dist: alembic>=1.13
19
+ Requires-Dist: anthropic>=0.34
20
+ Requires-Dist: datasets>=4.8.5
21
+ Requires-Dist: fastapi<1,>=0.115
22
+ Requires-Dist: httpx>=0.27
23
+ Requires-Dist: mcp>=1.0
24
+ Requires-Dist: openai>=1.40
25
+ Requires-Dist: pgvector>=0.3
26
+ Requires-Dist: pillow>=10.4
27
+ Requires-Dist: playwright>=1.60.0
28
+ Requires-Dist: psycopg[binary]>=3.2
29
+ Requires-Dist: pydantic<3,>=2.7
30
+ Requires-Dist: pypdf>=4.3
31
+ Requires-Dist: python-dotenv>=1.0
32
+ Requires-Dist: reportlab>=4.5.1
33
+ Requires-Dist: sentry-sdk[fastapi]>=2.0
34
+ Requires-Dist: slowapi>=0.1.9
35
+ Requires-Dist: sqlalchemy<3,>=2.0
36
+ Requires-Dist: tenacity>=8.5
37
+ Requires-Dist: ulid-py>=1.1
38
+ Requires-Dist: uvicorn[standard]>=0.30
39
+ Provides-Extra: crawl4ai
40
+ Requires-Dist: crawl4ai>=0.4.0; extra == 'crawl4ai'
41
+ Provides-Extra: dev
42
+ Requires-Dist: mypy>=1.10; extra == 'dev'
43
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
44
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
45
+ Requires-Dist: pytest>=8.0; extra == 'dev'
46
+ Requires-Dist: ruff>=0.5; extra == 'dev'
47
+ Description-Content-Type: text/markdown
48
+
49
+ <p align="center">
50
+ <img src="assets/brand/png/logo-stacked.png" alt="ROGUE" width="300">
51
+ </p>
52
+
53
+ <h1 align="center">ROGUE — Red-team every way a high-stakes AI agent can fail</h1>
54
+ <p align="center"><b><i>The Red-Team That Never Sleeps.</i></b></p>
55
+ <p align="center"><sub>Powered end-to-end by 5 Bright Data products · built for the Bright Data real-time AI-agents hackathon (results pending)</sub></p>
56
+
57
+ ROGUE measures **every place a high-stakes AI agent can go wrong** — whether the **model** can be broken, whether the **human oversight** around it is meaningful, and whether the **knowledge it accumulates** is safe — each against an independent, continuously-refreshed standard, with a reproducible **signed** record. And it closes the loop: it doesn't just find the break, it **generates and verifies the fix** (you own the runtime — ROGUE never sits in your request path). The continuous open-web harvest behind the model surface runs on just **$0.05–$0.30 of Bright Data** a day.
58
+
59
+ > ### 🥇 The first continuous open-web red-team you can query over MCP.
60
+ > ROGUE harvests new jailbreaks **through Bright Data's MCP**, reproduces each one against **your** config, and serves the results **back through its own MCP server** — so you can ask Claude / Cursor *"which live attacks breach my config?"* from your editor. A two-way MCP loop — harvest *and* distribution — that no other red-team tool closes.
61
+
62
+ [![Demo](https://img.shields.io/badge/demo-live-brightgreen)](https://rogue-eosin.vercel.app)
63
+ [![Trailer](https://img.shields.io/badge/%E2%96%B6%20trailer-watch-red)](https://youtu.be/pVOQYJvMC6w)
64
+ [![Dataset](https://img.shields.io/badge/%F0%9F%A4%97%20dataset-gated-yellow)](https://huggingface.co/datasets/soren19/rogue-attacks-2026-05)
65
+ [![Research](https://img.shields.io/badge/research-papers-blueviolet)](PAPERS.md)
66
+ [![License](https://img.shields.io/badge/license-MIT-lightgrey)](LICENSE)
67
+ [![Python](https://img.shields.io/badge/python-3.11-blue)](pyproject.toml)
68
+
69
+ ## See it live
70
+
71
+ - **Dashboard:** https://rogue-eosin.vercel.app — live, deployed.
72
+ - **Trailer:** [watch the 45-second trailer on YouTube](https://youtu.be/pVOQYJvMC6w) (preview below).
73
+ - **Dataset:** [358 attack primitives across 15 families](https://huggingface.co/datasets/soren19/rogue-attacks-2026-05), MIT-licensed and access-gated (defensive-research-only terms — see [`RESPONSIBLE_RELEASE.md`](RESPONSIBLE_RELEASE.md)).
74
+ - **In Slack:** point a Slack incoming webhook at ROGUE and the daily threat brief + every new HIGH/CRITICAL breach post straight to your workspace (the platform integration also files findings to Jira). ROGUE comes to where your team already triages.
75
+
76
+ https://github.com/user-attachments/assets/355df07c-71a1-44e1-8146-e59d93187d24
77
+
78
+ ## Why ROGUE
79
+
80
+ Other LLM red-teams run a *fixed* attack set you have to keep updating. ROGUE is the only one that does all of this together:
81
+
82
+ - **Harvests live, every day** — new jailbreaks and prompt-injections pulled from 15+ open-web sources (via all 5 Bright Data products), so your report is never older than yesterday.
83
+ - **Reproduces against *your* exact config** — your model × system-prompt × tools, not a generic safety benchmark.
84
+ - **Is queryable over MCP, both ways** — it *harvests* through MCP and *serves* results through its own MCP server, so you can ask "what breaches a model like mine?" from inside Cursor or Claude. No other red-team closes that loop.
85
+ - **Measures three surfaces, signed** — the model, the human approval gate, and the shared skill-pool — each scored against an independent answer key and emitted as a tamper-evident attestation.
86
+ - **Runs on the LLM you choose** — the judge and extraction models are configurable (`JUDGE_MODEL`), any provider or a local model (Ollama via `OPENAI_BASE_URL`); not locked to one vendor.
87
+
88
+ Each ingredient exists somewhere; **no competitor does the whole combination** — that's what makes ROGUE a continuous, queryable, multi-surface red-team rather than a one-off scan.
89
+
90
+ ## Use it in 30 seconds
91
+
92
+ ### Query ROGUE from your IDE — hosted MCP, zero setup
93
+ The MCP server is mounted into the live API, so there is nothing to clone or run:
94
+
95
+ ```
96
+ https://rogue-private.onrender.com/mcp/
97
+ ```
98
+
99
+ The [dashboard home](https://rogue-eosin.vercel.app) has one-click **Add to Cursor** / **Add to VS Code** buttons; for Claude Desktop, add it as a custom connector. It exposes ~19 tools — read-only corpus/breach queries plus scan / report / benchmark actions. Full tool list + local install: [MCP integration](#mcp-integration) below.
100
+
101
+ ### Submit an endpoint, get a report — hosted API
102
+ `POST /v1/scans` with a target → ROGUE queues it for the same scan engine behind the dashboard and MCP, returning a scored report as **JSON, HTML, or a CISO-ready PDF** on completion. The hosted `/v1` API is **live and key-authorized today** (private beta), but the background worker that drains the scan queue isn't deployed yet, so a queued scan does not complete on the host. For a graded report today, run it locally (below) or point the SDK at your own target — the identical engine, the identical report.
103
+
104
+ ### Run it locally
105
+ ```bash
106
+ git clone https://github.com/nguiaSoren/ROGUE && cd ROGUE
107
+ cp .env.example .env # add your keys
108
+ docker compose up -d && uv sync --extra dev
109
+ alembic upgrade head && python scripts/ops/seed_demo_data.py
110
+ uvicorn rogue.api.main:app --reload
111
+ ```
112
+
113
+ ### Scan your own model — the SDK
114
+ After cloning, run a **full scan offline with no API key** (a mocked target + judge, end to end → an HTML report):
115
+
116
+ ```bash
117
+ pip install -e . # the `rogue` SDK + CLI
118
+ PYTHONPATH=src python3 examples/sdk_quickstart.py # runs a scan, writes a report — no key
119
+ ```
120
+
121
+ Against a real target it's three lines (plus a judge key — ROGUE grades every response; see [`docs/SDK.md`](docs/SDK.md)):
122
+
123
+ ```python
124
+ from rogue import Client
125
+ client = Client(endpoint="https://api.company.com/v1", api_key="sk-...") # or Client(provider="openai")
126
+ report = client.scan(pack="aggressive", budget=10.0)
127
+ print(report.summary()); report.to_html("scan.html")
128
+ ```
129
+
130
+ *(`pip install rogue` is not live yet — the package isn't on PyPI; install editable from this repo as above.)*
131
+
132
+ ## Integrations
133
+
134
+ ROGUE meets your team where it already works:
135
+
136
+ | Surface | Status | What you get |
137
+ |---|---|---|
138
+ | **Your IDE** — MCP | ✅ **Available now** · keyless | One config block in Claude Desktop / Cursor / Windsurf / VS Code; the editor's agent queries the live threat DB on the spot. Add an account to launch full scans without leaving your work. `https://rogue-private.onrender.com/mcp` |
139
+ | **Your chat & tracker** — Slack + Jira | ✅ Slack alerts now · ⏳ auto-fan-out rolling out | Point a Slack incoming webhook (`SLACK_WEBHOOK_URL`) at ROGUE and the daily threat brief + new CRITICAL/HIGH breaches post to your workspace automatically — **works today**. Or connect Slack + Jira as per-org integrations (Fernet-encrypted creds) and file findings via the MCP action tools (`send_slack_alert` / `create_jira_ticket`); automatic fan-out on every scan completion is rolling out with the hosted worker. [Setup](docs/platform/integrations/slack-github-jira.md) |
140
+ | **API & SDK** — REST `/v1` + Python | ✅ live · ⏳ hosted scans rolling out | The `/v1` REST API + OpenAPI spec are live and key-authorized at `https://rogue-private.onrender.com/v1`. The **Python SDK runs real scans today** against your own target (`from rogue import Client`; `pip install -e .` — see [`docs/SDK.md`](docs/SDK.md)). *Hosted* scan execution (a `POST /v1/scans` that completes server-side) is rolling out. |
141
+ | **Security tooling** — SOAR / SIEM | 🔜 **Coming soon** | Splunk / Palo Alto Cortex connectors to pipe findings into your existing security stack. On the roadmap, not available today. |
142
+
143
+ ## What ROGUE does
144
+
145
+ Five-layer pipeline: **Harvest → Extract → Dedupe → Reproduce → Diff.**
146
+
147
+ 1. **Harvest** — 19 open-web sources fetched via 5 Bright Data products.
148
+ 2. **Extract** — an LLM agent structures each fetched document into an `AttackPrimitive`.
149
+ 3. **Dedupe** — pgvector cosine similarity clusters near-duplicate attacks.
150
+ 4. **Reproduce** — each canonical primitive runs against your `DeploymentConfig` × 5 trials.
151
+ 5. **Diff** — a separate judge model verdicts each trial; the daily diff ships to Slack, MCP, and the dashboard.
152
+
153
+ > **New to the codebase?** [`docs/PROJECT_STRUCTURE.md`](docs/PROJECT_STRUCTURE.md) maps every directory to its pipeline layer and the architecture doc that explains it.
154
+
155
+ ## What ROGUE red-teams
156
+
157
+ ROGUE measures **every place a high-stakes AI agent can go wrong** — whether the agent can be **broken**, whether the **human oversight** around it is meaningful, and whether the **knowledge it accumulates** is safe — each against an independent, continuously-refreshed standard, and each backed by a result rather than a claim:
158
+
159
+ - **The model.** Does a live jailbreak or prompt-injection break *your* deployment? The daily breach matrix replays open-web attacks against your model × system-prompt × tools, graded by a [human-calibrated judge](docs/judge-calibration.md). Finding: most *claimed* jailbreaks don't even reproduce — [Claimed Potency Does Not Predict Reproduction](PAPERS.md).
160
+ - **The human gate.** When a person "approves" an AI action, does that approval mean anything? ROGUE measures a reviewer's **false-approve rate** against an independent answer key — the rubber-stamping failure mode regulators now care about ([oversight](PAPERS.md)).
161
+ - **The agent's memory.** Does a shared agent skill-pool leak one user's secrets to the next? ROGUE plants canaries in scrubbed skills and measures recovery — 85% leaked on a weak model despite an explicit never-reveal instruction ([Scrubbing Is Not Containment](PAPERS.md)).
162
+
163
+ …and it **closes the loop (assurance-native remediation).** Finding a breach is half the job. ROGUE *generates* a verified mitigation — a system-prompt patch, a tool-permission scope, distilled fine-tuning data — and **re-tests it against the same live corpus to prove it actually closed the breach without over-blocking** (measured with the same calibrated judge). ROGUE generates and verifies the fix; **you own the runtime — it never sits in your request path.**
164
+
165
+ One engine, one independent standard — same operation each time (fire inputs at an AI decision-maker, capture what it does, score it against the standard, emit a reproducible signed record).
166
+
167
+ ## Research
168
+
169
+ ROGUE's findings are written up as papers and posts — **[PAPERS.md](PAPERS.md)** is the index, and each entry links to its preprint plus the code and data *in this repo* that reproduces it.
170
+
171
+ - **Allocation Is a Capability-Growth Mechanism** — in a self-growing red-team, evaluation *allocation* is a capability lever, not an efficiency layer (8 of 20 starved candidates graduate vs 0 of 20; Fisher *p* = 0.003). · *arXiv `cs.CR`×`cs.LG` — preprint posting soon*
172
+ - **Consummation-Gated Breach Judges** — one gate template ("engagement ≠ breach; consummation = breach") calibrates breach judges across classes, validated against human labels four ways. · *arXiv `cs.CR`×`cs.CL` — preprint posting soon*
173
+ - **Claimed Potency Does Not Predict Reproduction** — most open-web jailbreaks don't survive as working carriers in deployment context, and a source's claimed rate carries no usable signal (Spearman −0.10). · *arXiv `cs.CR` (lead paper) — preprint posting soon*
174
+ - **Scrubbing Is Not Containment** — canary leakage from shared agent skill pools tracks *alignment*, not model size. · *workshop paper + Hugging Face blog — posting soon*
175
+
176
+ ## Deep dives
177
+
178
+ The mechanics behind the pipeline, each on its own page:
179
+
180
+ - **Bright Data integration.** Five BD products end-to-end, plus a self-tuning ε-greedy SERP bandit that allocates the daily harvest budget by yield (novel primitives per dollar) at $0.05–$0.30 per harvest. → [docs/bright-data.md](docs/bright-data.md)
181
+ - **Multimodal red-team.** Refused text jailbreaks become real images and audio via deterministic black-box renderers, climbing an autonomous escalation ladder that stops at the first breach; Bright Data sources real carrier images to composite onto. → [docs/multimodal.md](docs/multimodal.md)
182
+ - **Self-growing attack repertoire.** ROGUE harvests reusable *techniques*, not just payloads — classifying, routing, and graduating / retiring / resurrecting them on live breach evidence, with a governed renderer registry and grammar-driven planning (the planner-willingness finding: 22% → 100% by changing only the planner). → [docs/self-growing-repertoire.md](docs/self-growing-repertoire.md)
183
+ - **Judge calibration.** Every breach number is an LLM verdict, so the judge is validated against independent human labels four ways — in-distribution FP 2.56%, WildGuardTest harm 88.5%, StrongREJECT −26% inflation, JBB **91.0%** human agreement (top of field, reproducible from `data/calibration/`), up from a 70.3% v1 judge after a diagnosed recalibration. → [docs/judge-calibration.md](docs/judge-calibration.md)
184
+ - **Benchmark — coverage over time.** Frozen AdvBench / JBB goal sets run through ROGUE's own graduated ladder against a fixed target, to answer "is this month's ROGUE better than last month's?" (honest caveat: still N=1, pre-recalibration). → [docs/benchmark.md](docs/benchmark.md)
185
+ - **Dashboard tour.** A 5-second pitch and a 5-minute deep-dive: cinematic home, `/feed` war room (attacks replayed as ATTACKER → MODEL → JUDGE), `/matrix` breach heatmap, `/brief` threat brief. → [docs/dashboard.md](docs/dashboard.md)
186
+
187
+ ## Capabilities
188
+
189
+ - 15-family attack taxonomy (OWASP LLM Top 10 + MITRE ATLAS aligned) — see [`docs/taxonomy.md`](docs/taxonomy.md).
190
+ - 14-slot payload-template vocabulary for cross-deployment reproduction.
191
+ - 19-source open-web harvest list — see [`docs/sources.md`](docs/sources.md).
192
+ - 8-model target panel (GPT-5.4 Nano, Claude Haiku 4.5, Llama-3.1-8B, Mistral Small, Gemini 3.1 Flash-Lite, Claude Opus 4.8, + two audio targets) — cheap-tier models per lab, an open-weight reliability anchor, a frontier reference, and audio endpoints for multimodal coverage.
193
+ - Judge-model verdict pipeline (REFUSED / EVADED / PARTIAL_BREACH / FULL_BREACH), human-validated four ways — see [Judge calibration](docs/judge-calibration.md).
194
+ - Daily threat brief (markdown + JSON) + Slack webhook.
195
+ - ROGUE-as-MCP-server: query the attack DB from Claude Desktop / Cursor / Windsurf.
196
+ - True multimodal red-team and a self-growing technique repertoire (see [Deep dives](#deep-dives)).
197
+ - External benchmark layer against frozen AdvBench / JailbreakBench goal sets.
198
+
199
+ ## Roadmap
200
+
201
+ - **Expand source coverage** — deeper Web Scraper API integration brings the next ~100 open-web sources online.
202
+ - **Customer SDK** — a drop-in SDK that lands ROGUE verdicts in the workflows teams already run (private beta; SOAR/SIEM connectors planned).
203
+ - **Break bandit** — a second, contextual Thompson-sampling bandit that learns *how to break* (which escalation strategy to try first per attack-family × target); the control surface and reward log are already built and instrumented in prod.
204
+ - **Enterprise** — RBAC, audit logs, and compliance reporting for teams that need them.
205
+
206
+ ---
207
+
208
+ # Run it yourself
209
+
210
+ *Everything below is for builders — connecting ROGUE to your tools, running it locally, or driving the pipeline.*
211
+
212
+ ## Architecture
213
+
214
+ See [`docs/architecture.md`](docs/architecture.md) for the five-layer pipeline diagram and the locked stack table.
215
+
216
+ ## MCP integration
217
+
218
+ ROGUE exposes its threat-intelligence database as a **producer-side MCP server** — Claude Desktop / Cursor / Windsurf users query the live breach matrix from inside their IDE.
219
+
220
+ **Hosted (recommended, zero setup).** The server is mounted into the live API at `https://rogue-private.onrender.com/mcp/`. Use the **Add to Cursor / Add to VS Code** buttons on the [dashboard home](https://rogue-eosin.vercel.app), or add it as a custom connector in Claude Desktop (Settings → Customize → add a custom connector → paste the URL). The hosted server exposes the read-only query tools **and** the action tools (validate / scan / report / benchmark + Level-3 workflow tools) — ~19 in all.
221
+
222
+ **Local (against your own DB), one command:**
223
+
224
+ ```bash
225
+ uv run python scripts/ops/install_mcp.py # Claude Desktop (default)
226
+ uv run python scripts/ops/install_mcp.py --client cursor # or: cursor / windsurf
227
+ ```
228
+
229
+ This detects the client's config path, merges in the `rogue` server entry pointing at your checkout (preserving every other key), and backs up the old file first. It's idempotent; `--dry-run` previews, `--uninstall` removes. Then restart the client. Requires a populated DB (run `harvest_once.py` + `reproduce_once.py` at least once); the deployed build reads the live Neon DB.
230
+
231
+ **Read-only query tools:** `query_attacks`, `query_diff`, `query_threat_brief`, `query_breaches_for_config`, `query_attack_detail`, `query_worst_attacks`. After connecting, ask Claude *"What new attacks broke our customer-support config in the last 24 hours?"* and it will call `query_diff` + `query_breaches_for_config` and summarize.
232
+
233
+ **Transport.** Stdio by default (the Claude Desktop path). For remote clients, serve over HTTP:
234
+
235
+ ```bash
236
+ ROGUE_MCP_TRANSPORT=streamable-http uv run python -m rogue.mcp_server.server
237
+ # serves http://127.0.0.1:8001/mcp (ROGUE_MCP_HOST / ROGUE_MCP_PORT override the bind)
238
+ ```
239
+
240
+ ## Pipeline CLI reference
241
+
242
+ The two `$`-billed driver scripts spend Bright Data + LLM credit and write the live DB — run them deliberately. All flags are optional.
243
+
244
+ <details><summary><b><code>harvest_once.py</code> — harvest → extract → dedup → persist</b></summary>
245
+
246
+ ```bash
247
+ uv run python scripts/harvest/harvest_once.py --since 1d
248
+ ```
249
+
250
+ | Flag | Default | What it does |
251
+ |---|---|---|
252
+ | `--since` | `1d` | Harvest window (`1d`, `14d`, `6h`). |
253
+ | `--x-handles` | off | Comma-separated X handles to scrape this run (X is off by default — BD's profile scraper is slow). |
254
+ | `--database-url` | `$DATABASE_URL` | Target SQLAlchemy URL. |
255
+ | `--extraction-model` | Claude Haiku 4.5 | Provider-prefixed extraction model (prompt-cached). |
256
+ | `--embedding-model` | `text-embedding-3-small` | Embedding model for dedup. |
257
+
258
+ Env toggles: `EXTRACTION_CONCURRENCY` · `HARVEST_INGEST_IMAGES=0` · `HARVEST_FOLLOW_LINKS=0`. For a single known-fresh URL, use `scripts/harvest/harvest_url.py --url "https://x.com/.../status/<id>"`.
259
+
260
+ </details>
261
+
262
+ <details><summary><b><code>reproduce_once.py</code> — render → target panel → judge → persist</b></summary>
263
+
264
+ ```bash
265
+ uv run python scripts/reproduce/reproduce_once.py --primitive-limit 50 --judge-batch
266
+ ```
267
+
268
+ | Flag | Default | What it does |
269
+ |---|---|---|
270
+ | `--primitive-limit N` | all | Cap how many primitives are reproduced (top-N by `reproducibility_score`). |
271
+ | `--only-unreproduced` | off | Reproduce only primitives with no `breach_results` yet. |
272
+ | `--primitive-ids A,B,…` | — | Reproduce exactly the named primitives (overrides other filters). |
273
+ | `--n-trials N` | 5 | Trials per (primitive × config) — powers the bootstrap CI. |
274
+ | `--multimodal-only` | off | Only image/audio primitives, rendered as real media. |
275
+ | `--persona NAME` | off | PAP persona wrap (the B side of the A/B). |
276
+ | `--escalate` | off | Inline auto-ladder for panel-wide refusals (costly; bound with `--escalate-max-spend`). |
277
+ | `--candidate-quota N` | 0 | Reserve N guaranteed harvested-candidate attempts before early-stop (scheduler policy). |
278
+ | `--judge-batch` | off | Grade via the Anthropic Batch API (50% off + caching; baseline-only). |
279
+
280
+ `scripts/reproduce/candidate_quota_ab.py` runs the candidate-quota A/B (the empirical baseline for the break-bandit).
281
+
282
+ </details>
283
+
284
+ ## Repository layout
285
+
286
+ ```
287
+ src/rogue/ # Python package (schemas, harvest, extract, dedupe, reproduce, diff, mcp_server, db, api)
288
+ docs/ # architecture, schemas, taxonomy, sources, budget + the deep-dive pages
289
+ tests/ # schema round-trip tests + golden fixtures
290
+ scripts/ # harvest_once.py, reproduce_once.py, calibration/, ops/
291
+ frontend/ # Next.js dashboard
292
+ ```
293
+
294
+ ## Built by
295
+
296
+ Benaja Soren Obounou Lekogo Nguia — AI Systems Engineer; previously Grand-Prize winner at Yonsei University for LLM security tooling (GPTFuzz optimization), adversarial-ML research at AIM Intelligence (HWARANG red-team series).
297
+
298
+ > "I built ROGUE solo in 6 days because Bright Data abstracted away 5 different anti-bot stacks I'd otherwise have spent weeks on. The MCP Server plus pre-built Reddit / X scrapers turned a 6-week project into a 6-day project."
299
+ >
300
+ > — Benaja Soren Obounou Lekogo Nguia
301
+
302
+ ## License
303
+
304
+ MIT. See [`LICENSE`](LICENSE).