faultray 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. faultray-1.2.0.dist-info/METADATA +705 -0
  2. faultray-1.2.0.dist-info/RECORD +168 -0
  3. faultray-1.2.0.dist-info/WHEEL +4 -0
  4. faultray-1.2.0.dist-info/entry_points.txt +4 -0
  5. faultray-1.2.0.dist-info/licenses/LICENSE +21 -0
  6. infrasim/__init__.py +79 -0
  7. infrasim/ai/__init__.py +0 -0
  8. infrasim/ai/analyzer.py +636 -0
  9. infrasim/ai/architecture_advisor.py +1205 -0
  10. infrasim/ai/nl_to_infra.py +1059 -0
  11. infrasim/api/__init__.py +0 -0
  12. infrasim/api/auth.py +170 -0
  13. infrasim/api/billing.py +190 -0
  14. infrasim/api/database.py +265 -0
  15. infrasim/api/graphql_api.py +386 -0
  16. infrasim/api/insurance_api.py +562 -0
  17. infrasim/api/leaderboard.py +362 -0
  18. infrasim/api/oauth.py +189 -0
  19. infrasim/api/server.py +1736 -0
  20. infrasim/api/static/graph.js +319 -0
  21. infrasim/api/static/style.css +2125 -0
  22. infrasim/api/static/sw.js +66 -0
  23. infrasim/api/teams.py +481 -0
  24. infrasim/api/templates/advisor.html +422 -0
  25. infrasim/api/templates/analyze.html +186 -0
  26. infrasim/api/templates/base.html +261 -0
  27. infrasim/api/templates/blast_radius.html +1141 -0
  28. infrasim/api/templates/compliance.html +265 -0
  29. infrasim/api/templates/components.html +164 -0
  30. infrasim/api/templates/cost.html +121 -0
  31. infrasim/api/templates/dashboard.html +279 -0
  32. infrasim/api/templates/graph.html +89 -0
  33. infrasim/api/templates/landing.html +787 -0
  34. infrasim/api/templates/reports.html +132 -0
  35. infrasim/api/templates/security.html +149 -0
  36. infrasim/api/templates/settings.html +170 -0
  37. infrasim/api/templates/simulation.html +310 -0
  38. infrasim/api/widget.py +120 -0
  39. infrasim/cache.py +177 -0
  40. infrasim/ci/__init__.py +1 -0
  41. infrasim/ci/github_action.py +443 -0
  42. infrasim/ci/sarif_exporter.py +403 -0
  43. infrasim/cli/__init__.py +44 -0
  44. infrasim/cli/admin.py +862 -0
  45. infrasim/cli/advisor_cmd.py +395 -0
  46. infrasim/cli/analyze.py +248 -0
  47. infrasim/cli/auto_fix.py +151 -0
  48. infrasim/cli/autoscale_cmd.py +355 -0
  49. infrasim/cli/backtest.py +106 -0
  50. infrasim/cli/benchmark_cmd.py +281 -0
  51. infrasim/cli/config_cmd.py +93 -0
  52. infrasim/cli/daemon_cmd.py +110 -0
  53. infrasim/cli/diff_cmd.py +118 -0
  54. infrasim/cli/discovery.py +472 -0
  55. infrasim/cli/dna_cmd.py +124 -0
  56. infrasim/cli/drift_cmd.py +341 -0
  57. infrasim/cli/evaluate.py +1094 -0
  58. infrasim/cli/feeds.py +228 -0
  59. infrasim/cli/genome.py +585 -0
  60. infrasim/cli/history_cmd.py +132 -0
  61. infrasim/cli/main.py +412 -0
  62. infrasim/cli/marketplace_cmd.py +464 -0
  63. infrasim/cli/nl_command.py +210 -0
  64. infrasim/cli/ops.py +1292 -0
  65. infrasim/cli/predictive.py +363 -0
  66. infrasim/cli/quickstart.py +179 -0
  67. infrasim/cli/replay_cmd.py +567 -0
  68. infrasim/cli/simulate.py +420 -0
  69. infrasim/cli/sla_cmd.py +337 -0
  70. infrasim/cli/supply_chain_cmd.py +122 -0
  71. infrasim/cli/tf_check.py +422 -0
  72. infrasim/cli/timeline_cmd.py +376 -0
  73. infrasim/cli/twin_cmd.py +138 -0
  74. infrasim/config.py +119 -0
  75. infrasim/daemon.py +206 -0
  76. infrasim/differ.py +149 -0
  77. infrasim/discovery/__init__.py +0 -0
  78. infrasim/discovery/aws_scanner.py +1167 -0
  79. infrasim/discovery/azure_scanner.py +769 -0
  80. infrasim/discovery/flow_analyzer.py +290 -0
  81. infrasim/discovery/gcp_scanner.py +704 -0
  82. infrasim/discovery/k8s_scanner.py +568 -0
  83. infrasim/discovery/metric_calibrator.py +324 -0
  84. infrasim/discovery/prometheus.py +308 -0
  85. infrasim/discovery/prometheus_monitor.py +82 -0
  86. infrasim/discovery/scanner.py +193 -0
  87. infrasim/discovery/terraform.py +600 -0
  88. infrasim/features.py +48 -0
  89. infrasim/feeds/__init__.py +1 -0
  90. infrasim/feeds/analyzer.py +416 -0
  91. infrasim/feeds/fetcher.py +152 -0
  92. infrasim/feeds/sources.py +69 -0
  93. infrasim/feeds/store.py +139 -0
  94. infrasim/history.py +379 -0
  95. infrasim/i18n.py +79 -0
  96. infrasim/integrations/__init__.py +1 -0
  97. infrasim/integrations/datadog.py +122 -0
  98. infrasim/integrations/grafana.py +85 -0
  99. infrasim/integrations/incident_correlator.py +471 -0
  100. infrasim/integrations/issue_tracker.py +110 -0
  101. infrasim/integrations/opsgenie.py +61 -0
  102. infrasim/integrations/slack_bot.py +376 -0
  103. infrasim/integrations/terraform_provider.py +264 -0
  104. infrasim/integrations/webhooks.py +446 -0
  105. infrasim/licensing.py +76 -0
  106. infrasim/log_config.py +33 -0
  107. infrasim/marketplace/__init__.py +19 -0
  108. infrasim/marketplace/builtin_packages.py +1341 -0
  109. infrasim/marketplace/catalog.py +516 -0
  110. infrasim/marketplace.py +266 -0
  111. infrasim/model/__init__.py +0 -0
  112. infrasim/model/components.py +310 -0
  113. infrasim/model/demo.py +123 -0
  114. infrasim/model/dna.py +229 -0
  115. infrasim/model/graph.py +380 -0
  116. infrasim/model/loader.py +282 -0
  117. infrasim/plugins/__init__.py +5 -0
  118. infrasim/plugins/registry.py +129 -0
  119. infrasim/remediation/__init__.py +0 -0
  120. infrasim/remediation/auto_pipeline.py +344 -0
  121. infrasim/remediation/iac_exporter.py +1708 -0
  122. infrasim/remediation/iac_generator.py +863 -0
  123. infrasim/reporter/__init__.py +0 -0
  124. infrasim/reporter/compliance.py +438 -0
  125. infrasim/reporter/evidence_generator.py +513 -0
  126. infrasim/reporter/executive_report.py +625 -0
  127. infrasim/reporter/export.py +368 -0
  128. infrasim/reporter/html_report.py +308 -0
  129. infrasim/reporter/pdf_report.py +215 -0
  130. infrasim/reporter/report.py +144 -0
  131. infrasim/reporter/templates/report.html +509 -0
  132. infrasim/scoring.py +415 -0
  133. infrasim/simulator/__init__.py +0 -0
  134. infrasim/simulator/advisor_engine.py +407 -0
  135. infrasim/simulator/autoscaling_engine.py +339 -0
  136. infrasim/simulator/availability_model.py +485 -0
  137. infrasim/simulator/backtest_engine.py +139 -0
  138. infrasim/simulator/bayesian_model.py +236 -0
  139. infrasim/simulator/benchmarking.py +652 -0
  140. infrasim/simulator/capacity_engine.py +668 -0
  141. infrasim/simulator/carbon_engine.py +274 -0
  142. infrasim/simulator/cascade.py +701 -0
  143. infrasim/simulator/chaos_calendar.py +362 -0
  144. infrasim/simulator/chaos_genome.py +1286 -0
  145. infrasim/simulator/compliance_engine.py +703 -0
  146. infrasim/simulator/cost_engine.py +340 -0
  147. infrasim/simulator/digital_twin.py +286 -0
  148. infrasim/simulator/dr_engine.py +348 -0
  149. infrasim/simulator/drift_detector.py +982 -0
  150. infrasim/simulator/dynamic_engine.py +1091 -0
  151. infrasim/simulator/engine.py +296 -0
  152. infrasim/simulator/financial_risk.py +345 -0
  153. infrasim/simulator/gameday_engine.py +387 -0
  154. infrasim/simulator/incident_db.py +982 -0
  155. infrasim/simulator/incident_replay.py +812 -0
  156. infrasim/simulator/markov_model.py +278 -0
  157. infrasim/simulator/monte_carlo.py +276 -0
  158. infrasim/simulator/ops_engine.py +1914 -0
  159. infrasim/simulator/planner.py +670 -0
  160. infrasim/simulator/predictive_engine.py +347 -0
  161. infrasim/simulator/resilience_timeline.py +612 -0
  162. infrasim/simulator/scenarios.py +932 -0
  163. infrasim/simulator/security_engine.py +525 -0
  164. infrasim/simulator/sla_validator.py +788 -0
  165. infrasim/simulator/supply_chain_engine.py +371 -0
  166. infrasim/simulator/traffic.py +460 -0
  167. infrasim/simulator/whatif_engine.py +858 -0
  168. infrasim/telemetry.py +61 -0
@@ -0,0 +1,705 @@
1
+ Metadata-Version: 2.4
2
+ Name: faultray
3
+ Version: 1.2.0
4
+ Summary: FaultRay — Zero-risk infrastructure chaos engineering. Prove your availability ceiling mathematically.
5
+ Project-URL: Homepage, https://github.com/mattyopon/infrasim
6
+ Project-URL: Documentation, https://github.com/mattyopon/infrasim#readme
7
+ Project-URL: Repository, https://github.com/mattyopon/infrasim
8
+ Project-URL: Issues, https://github.com/mattyopon/infrasim/issues
9
+ Project-URL: Changelog, https://github.com/mattyopon/infrasim#changelog
10
+ Author-email: Yutaro Maeda <mattyopon@gmail.com>
11
+ License: MIT
12
+ License-File: LICENSE
13
+ Keywords: chaos-engineering,devops,infrastructure,reliability,simulation,slo,sre
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: System Administrators
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Software Development :: Testing
22
+ Classifier: Topic :: System :: Networking
23
+ Classifier: Topic :: System :: Systems Administration
24
+ Classifier: Typing :: Typed
25
+ Requires-Python: >=3.11
26
+ Requires-Dist: aiosqlite>=0.20
27
+ Requires-Dist: fastapi>=0.115
28
+ Requires-Dist: httpx>=0.28
29
+ Requires-Dist: jinja2>=3.1
30
+ Requires-Dist: networkx>=3.0
31
+ Requires-Dist: psutil>=6.0
32
+ Requires-Dist: pydantic>=2.0
33
+ Requires-Dist: pyyaml>=6.0
34
+ Requires-Dist: rich>=13.0
35
+ Requires-Dist: sqlalchemy>=2.0
36
+ Requires-Dist: typer>=0.15
37
+ Requires-Dist: uvicorn>=0.34
38
+ Provides-Extra: all-clouds
39
+ Requires-Dist: azure-identity>=1.15; extra == 'all-clouds'
40
+ Requires-Dist: boto3>=1.28; extra == 'all-clouds'
41
+ Requires-Dist: google-cloud-compute>=1.0; extra == 'all-clouds'
42
+ Requires-Dist: kubernetes>=28.0; extra == 'all-clouds'
43
+ Provides-Extra: aws
44
+ Requires-Dist: boto3>=1.28; extra == 'aws'
45
+ Provides-Extra: azure
46
+ Requires-Dist: azure-identity>=1.15; extra == 'azure'
47
+ Requires-Dist: azure-mgmt-compute>=30.0; extra == 'azure'
48
+ Requires-Dist: azure-mgmt-redis>=14.0; extra == 'azure'
49
+ Requires-Dist: azure-mgmt-sql>=3.0; extra == 'azure'
50
+ Provides-Extra: dev
51
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
52
+ Requires-Dist: pytest>=8.0; extra == 'dev'
53
+ Requires-Dist: ruff>=0.8; extra == 'dev'
54
+ Provides-Extra: gcp
55
+ Requires-Dist: google-cloud-compute>=1.0; extra == 'gcp'
56
+ Requires-Dist: google-cloud-redis>=2.0; extra == 'gcp'
57
+ Requires-Dist: google-cloud-sql-admin>=1.0; extra == 'gcp'
58
+ Requires-Dist: google-cloud-storage>=2.0; extra == 'gcp'
59
+ Provides-Extra: k8s
60
+ Requires-Dist: kubernetes>=28.0; extra == 'k8s'
61
+ Description-Content-Type: text/markdown
62
+
63
+ # FaultRay — Zero-Risk Infrastructure Chaos Simulation
64
+
65
+ > **Simulate infrastructure failures without touching production.**
66
+ > **Prove your system's availability ceiling mathematically.**
67
+
68
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
69
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
70
+ [![Tests](https://img.shields.io/badge/tests-89%20passed-brightgreen.svg)]()
71
+ [![Version](https://img.shields.io/badge/version-5.14-blue.svg)]()
72
+ [![Docker](https://img.shields.io/badge/docker-ready-2496ED.svg)](Dockerfile)
73
+ [![PyPI](https://img.shields.io/badge/PyPI-faultray-orange.svg)]()
74
+
75
+ ---
76
+
77
+ ## Why FaultRay?
78
+
79
+ Most chaos engineering tools inject real faults into real infrastructure. FaultRay takes a fundamentally different approach: **pure mathematical simulation** that models your entire dependency graph in memory, runs 150+ failure scenarios, and proves your system's theoretical availability ceiling — all without touching a single server.
80
+
81
+ | | **Gremlin** | **Steadybit** | **AWS FIS** | **FaultRay** |
82
+ |---|---|---|---|---|
83
+ | **Approach** | Fault injection | Fault injection | Fault injection | Mathematical simulation |
84
+ | **Risk to production** | Medium-High | Medium | Medium | **Zero** |
85
+ | **Setup required** | Agent per host | Agent per host | AWS-only | **Single pip install** |
86
+ | **Scenario count** | Manual config | Manual config | AWS services only | **150+ auto-generated** |
87
+ | **Availability proof** | No | No | No | **3-Layer Limit Model** |
88
+ | **Cost** | $$$$ | $$$ | $$ (AWS-only) | **Free / OSS** |
89
+ | **Dependency graph** | No | Limited | No | **Full NetworkX graph** |
90
+ | **Terraform integration** | No | No | Native | **tfstate + plan analysis** |
91
+ | **Security feed** | No | No | No | **Auto CVE scenarios** |
92
+
93
+ **Key differentiators:**
94
+
95
+ - **Zero risk** — Runs entirely in memory. No agents, no sidecars, no production impact.
96
+ - **5 simulation engines** — Cascade, Dynamic, Ops, What-If, and Capacity engines working together.
97
+ - **3-Layer Availability Limit Model** — The only tool that mathematically proves your system's availability ceiling (see below).
98
+
99
+ ---
100
+
101
+ ## Quick Start
102
+
103
+ ### pip
104
+
105
+ ```bash
106
+ # Install
107
+ pip install -e .
108
+
109
+ # Run demo (6-component web stack simulation)
110
+ faultray demo
111
+
112
+ # With web dashboard
113
+ faultray demo --web
114
+ ```
115
+
116
+ ### Docker
117
+
118
+ ```bash
119
+ # Web dashboard (http://localhost:8000)
120
+ docker compose up web
121
+
122
+ # Demo mode with dashboard
123
+ docker compose --profile demo up demo
124
+
125
+ # CLI mode
126
+ docker compose --profile cli run cli simulate
127
+
128
+ # Build from source
129
+ docker build -t faultray .
130
+ docker run -p 8000:8000 faultray
131
+ ```
132
+
133
+ ### Demo Output
134
+
135
+ ```
136
+ ╭────────── FaultRay Chaos Simulation Report ──────────╮
137
+ │ Resilience Score: 36/100 │
138
+ │ Scenarios tested: 150 │
139
+ │ Critical: 7 Warning: 66 Passed: 77 │
140
+ ╰──────────────────────────────────────────────────────╯
141
+
142
+ CRITICAL FINDINGS
143
+
144
+ 10.0/10 CRITICAL Traffic spike (10x)
145
+ Cascade path:
146
+ ├── DOWN nginx (LB)
147
+ ├── DOWN api-server-1
148
+ ├── DOWN api-server-2
149
+ ├── DOWN PostgreSQL (primary)
150
+ ├── DOWN Redis (cache)
151
+ └── DOWN RabbitMQ
152
+ ```
153
+
154
+ ---
155
+
156
+ ## Features
157
+
158
+ | | Feature | Description |
159
+ |---|---|---|
160
+ | :shield: | **Zero Risk Simulation** | Runs entirely in memory — no agents, no sidecars, no production impact |
161
+ | :chart_with_upwards_trend: | **150+ Chaos Scenarios** | 30 categories of failure scenarios auto-generated from your topology |
162
+ | :link: | **Dependency Graph Analysis** | NetworkX-powered graph modeling with cascade fault prediction |
163
+ | :triangular_ruler: | **3-Layer Availability Proof** | Mathematically proves your system's theoretical availability ceiling |
164
+ | :dart: | **SLO/SLI Tracking** | Availability, latency, and error rate tracking against SLO targets |
165
+ | :crystal_ball: | **What-If Analysis** | Parameter sweep for fault tolerance sensitivity analysis |
166
+ | :bar_chart: | **Capacity Planning** | Growth forecasting with SLO compliance evaluation |
167
+ | :ocean: | **10 Traffic Models** | DDoS, diurnal, flash crowd, growth trend, and more |
168
+ | :clock1: | **Ops Simulation** | Long-running (days/weeks) operational simulation with SLO tracking |
169
+ | :zap: | **Dynamic Simulation** | Time-stepped simulation with traffic pattern integration |
170
+ | :newspaper: | **Security Feed** | Auto-generates scenarios from CISA, NVD, Krebs, BleepingComputer |
171
+ | :globe_with_meridians: | **Terraform Integration** | Import from tfstate/plan with change impact analysis |
172
+ | :desktop_computer: | **Web Dashboard** | D3.js interactive graph + Grafana-style dashboard |
173
+ | :mag: | **Multiple Discovery** | Local scan, Prometheus, Terraform, YAML |
174
+
175
+ ---
176
+
177
+ ## 3-Layer Availability Limit Model
178
+
179
+ **This is FaultRay's unique contribution to chaos engineering.**
180
+
181
+ Traditional chaos tools answer "what breaks?" FaultRay answers **"what is the maximum availability your architecture can physically achieve?"** using a three-layer mathematical model.
182
+
183
+ ```
184
+ ┌─────────────────────────────────────────┐
185
+ │ │
186
+ Layer 3 ──────── │ Theoretical Limit 6.65 nines │ ── Upper bound
187
+ │ (perfect redundancy + perfect failover)│ (unreachable)
188
+ │ │
189
+ Layer 2 ──────── │ Hardware Limit 5.91 nines │ ── Physical ceiling
190
+ │ (component MTBF × redundancy) │ (hard constraint)
191
+ │ │
192
+ Layer 1 ──────── │ Software Limit 4.00 nines │ ── Practical ceiling
193
+ │ (deployment + config + human error) │ (your real target)
194
+ │ │
195
+ └─────────────────────────────────────────┘
196
+ ```
197
+
198
+ ### Layer 1: Software Availability Limit (practical ceiling)
199
+
200
+ Accounts for deployment failures, configuration drift, human error, and software bugs. Most organizations cannot exceed **4.00 nines (99.99%)** at this layer without extreme operational maturity.
201
+
202
+ ### Layer 2: Hardware Availability Limit (physical ceiling)
203
+
204
+ Calculated from component MTBF (Mean Time Between Failures), redundancy factor, and failover time. Even with perfect software, hardware constraints cap availability at approximately **5.91 nines (99.999%)**.
205
+
206
+ ### Layer 3: Theoretical Availability Limit (mathematical upper bound)
207
+
208
+ Assumes perfect redundancy, instant failover, and zero software errors. This is the mathematical ceiling your architecture can never exceed: **6.65 nines (99.99997%)**.
209
+
210
+ **Why this matters:** If your SLO target is 99.99% but your Layer 1 limit is 99.95%, no amount of engineering effort will close the gap without architectural changes. FaultRay tells you this **before** you waste months trying.
211
+
212
+ ---
213
+
214
+ ## 5 Simulation Engines
215
+
216
+ ### 1. Cascade Engine
217
+ Models fault propagation through dependency graphs. Identifies single points of failure, compound failures, and cascade paths.
218
+ ```bash
219
+ faultray load infra.yaml
220
+ faultray simulate --html report.html
221
+ ```
222
+
223
+ ### 2. Dynamic Engine
224
+ Time-stepped simulation with traffic pattern integration. Models real-world load variations over hours or days.
225
+ ```bash
226
+ faultray dynamic infra.yaml --traffic diurnal --duration 24h --step 1min
227
+ ```
228
+
229
+ ### 3. Ops Engine
230
+ Long-running operational simulation (days to weeks) with SLO tracking, incident generation, and deployment events.
231
+ ```bash
232
+ faultray ops-sim infra.yaml --days 7 --step 5min
233
+ ```
234
+
235
+ ### 4. What-If Engine
236
+ Parameter sweep analysis to understand fault tolerance sensitivity across multiple dimensions.
237
+ ```bash
238
+ faultray whatif infra.yaml --parameter mttr_factor --values "0.5,1.0,2.0,4.0"
239
+ ```
240
+
241
+ ### 5. Capacity Engine
242
+ Growth forecasting with resource exhaustion prediction and SLO compliance evaluation.
243
+ ```bash
244
+ faultray capacity infra.yaml --growth 0.15 --slo 99.9
245
+ ```
246
+
247
+ ---
248
+
249
+ ## Usage
250
+
251
+ ### From YAML Definition
252
+
253
+ ```yaml
254
+ # infra.yaml
255
+ components:
256
+ - id: nginx
257
+ type: load_balancer
258
+ port: 443
259
+ replicas: 2
260
+ metrics: { cpu_percent: 25, memory_percent: 30 }
261
+ capacity: { max_connections: 10000 }
262
+
263
+ - id: api
264
+ type: app_server
265
+ port: 8080
266
+ metrics: { cpu_percent: 65, memory_percent: 70 }
267
+ capacity: { max_connections: 500, connection_pool_size: 100 }
268
+
269
+ - id: postgres
270
+ type: database
271
+ port: 5432
272
+ metrics: { cpu_percent: 45, memory_percent: 80, disk_percent: 72 }
273
+ capacity: { max_connections: 100 }
274
+
275
+ dependencies:
276
+ - source: nginx
277
+ target: api
278
+ type: requires
279
+ - source: api
280
+ target: postgres
281
+ type: requires
282
+ ```
283
+
284
+ ```bash
285
+ faultray load infra.yaml
286
+ faultray simulate --html report.html
287
+ ```
288
+
289
+ ### From Terraform
290
+
291
+ ```bash
292
+ # Import from state file
293
+ faultray tf-import --state terraform.tfstate
294
+
295
+ # Import from live terraform
296
+ faultray tf-import --dir ./terraform
297
+
298
+ # Analyze plan impact
299
+ terraform plan -out=plan.out
300
+ faultray tf-plan plan.out --html plan-report.html
301
+ ```
302
+
303
+ ### From Prometheus
304
+
305
+ ```bash
306
+ faultray scan --prometheus-url http://prometheus:9090
307
+ faultray simulate
308
+ ```
309
+
310
+ ### Security News Feed
311
+
312
+ ```bash
313
+ # Fetch latest security news and generate scenarios
314
+ faultray feed-update
315
+
316
+ # View generated scenarios
317
+ faultray feed-list
318
+
319
+ # Simulate with feed scenarios included automatically
320
+ faultray simulate
321
+ ```
322
+
323
+ ### Web Dashboard
324
+
325
+ ```bash
326
+ faultray serve --port 8080
327
+ # Open http://localhost:8080
328
+ ```
329
+
330
+ ### Operational Simulation
331
+
332
+ Simulate long-running operations and track SLO compliance and incident patterns over time.
333
+
334
+ ```bash
335
+ # Run 7-day operational simulation with 5-minute time steps
336
+ faultray ops-sim infra.yaml --days 7 --step 5min
337
+
338
+ # Run with default parameters
339
+ faultray ops-sim --defaults
340
+ ```
341
+
342
+ ### What-If Analysis
343
+
344
+ Sweep parameters to analyze fault tolerance sensitivity across multiple dimensions.
345
+
346
+ ```bash
347
+ # Run with default parameter sweep
348
+ faultray whatif infra.yaml --defaults
349
+
350
+ # Sweep a specific parameter
351
+ faultray whatif --parameter mttr_factor --values "0.5,1.0,2.0,4.0"
352
+ ```
353
+
354
+ ### Capacity Planning
355
+
356
+ Forecast resource exhaustion and evaluate SLO compliance under growth projections.
357
+
358
+ ```bash
359
+ # Capacity planning with 15% annual growth targeting 99.9% SLO
360
+ faultray capacity infra.yaml --growth 0.15 --slo 99.9
361
+ ```
362
+
363
+ ### Traffic Patterns
364
+
365
+ 10 traffic models available for dynamic simulation:
366
+
367
+ | Pattern | Description |
368
+ |---------|-------------|
369
+ | `CONSTANT` | Steady-state constant traffic |
370
+ | `RAMP` | Linear traffic increase |
371
+ | `SPIKE` | Instantaneous traffic spike |
372
+ | `WAVE` | Sinusoidal wave pattern |
373
+ | `DDoS_VOLUMETRIC` | High-volume DDoS attack |
374
+ | `DDoS_SLOWLORIS` | Slowloris-style DDoS attack |
375
+ | `FLASH_CROWD` | Sudden viral popularity surge |
376
+ | `DIURNAL` | Daily cycle (high daytime, low nighttime) |
377
+ | `DIURNAL_WEEKLY` | Weekly cycle (high weekdays, low weekends) |
378
+ | `GROWTH_TREND` | Long-term organic growth trend |
379
+
380
+ ```bash
381
+ # Dynamic simulation with traffic pattern
382
+ faultray dynamic infra.yaml --traffic diurnal --duration 24h --step 1min
383
+ ```
384
+
385
+ ---
386
+
387
+ ## Chaos Scenarios (30 Categories)
388
+
389
+ | Category | Examples |
390
+ |----------|---------|
391
+ | **Single Failures** | Component down, CPU saturation, OOM, disk full, network partition |
392
+ | **Traffic** | 1.5x, 2x, 3x, 5x, 10x (DDoS-level) traffic spikes |
393
+ | **Compound** | All pairwise (C(n,2)) and triple (C(n,3)) simultaneous failures |
394
+ | **DB-Specific** | Log explosion, replication lag, connection storm, lock contention |
395
+ | **Cache-Specific** | Stampede, eviction storm, split brain |
396
+ | **Queue-Specific** | Backpressure, poison message |
397
+ | **LB-Specific** | Health check failure, TLS expiry, config reload failure |
398
+ | **App-Specific** | Memory leak, thread exhaustion, GC pause, bad deployment |
399
+ | **Infrastructure** | Zone failure, cascading timeouts, total meltdown, rolling restart |
400
+ | **Real-World** | Black Friday (10x + cache pressure), noisy neighbor, slow DB at peak |
401
+ | **Security Feed** | Auto-generated from CISA, NVD, Krebs, BleepingComputer, etc. |
402
+
403
+ ---
404
+
405
+ ## Risk Scoring
406
+
407
+ ```
408
+ severity = (impact x spread) x likelihood
409
+
410
+ impact = weighted health status (DOWN=1.0, OVERLOADED=0.5, DEGRADED=0.25)
411
+ spread = affected_components / total_components
412
+ likelihood = proximity to failure threshold (0.2 = unlikely, 1.0 = imminent)
413
+ ```
414
+
415
+ | Level | Score | Meaning |
416
+ |-------|-------|---------|
417
+ | CRITICAL | 7.0-10.0 | Cascading failure, major outage risk |
418
+ | WARNING | 4.0-6.9 | Degradation, limited cascade |
419
+ | PASSED | 0.0-3.9 | Low risk, contained impact |
420
+
421
+ ---
422
+
423
+ ## Architecture
424
+
425
+ ```
426
+ Discovery Layer Model Layer Simulator Layer
427
+ ┌─────────────┐ ┌─────────────────┐ ┌──────────────────┐
428
+ │ Local Scan │ │ InfraGraph │ │ 30-cat Scenarios │
429
+ │ Prometheus │───>│ Components │───>│ Cascade Engine │
430
+ │ Terraform │ │ Dependencies │ │ Dynamic Engine │
431
+ │ YAML Loader │ │ NetworkX Graph │ │ Ops Engine │
432
+ └─────────────┘ └─────────────────┘ │ What-If Engine │
433
+ │ Capacity Engine │
434
+ │ Traffic Models │
435
+ │ Feed Scenarios │
436
+ │ Risk Scoring │
437
+ │ 3-Layer Limits │
438
+ └──────────────────┘
439
+
440
+ ┌─────────────────┐ ┌──────────────────┐
441
+ │ Web Dashboard │<───│ CLI Reporter │
442
+ │ FastAPI + D3.js │ │ HTML Reporter │
443
+ │ Docker Ready │ │ JSON Export │
444
+ └─────────────────┘ └──────────────────┘
445
+ ```
446
+
447
+ ---
448
+
449
+ ## CLI Commands
450
+
451
+ | Command | Description |
452
+ |---------|-------------|
453
+ | `faultray scan` | Discover local system or Prometheus infrastructure |
454
+ | `faultray simulate` | Run chaos simulation (150+ scenarios) |
455
+ | `faultray dynamic` | Run dynamic time-stepped simulation with traffic patterns |
456
+ | `faultray ops-sim` | Long-running operational simulation with SLO tracking |
457
+ | `faultray show` | Display infrastructure model summary |
458
+ | `faultray load <yaml>` | Load infrastructure from YAML |
459
+ | `faultray tf-import` | Import from Terraform state |
460
+ | `faultray tf-plan <plan>` | Analyze Terraform plan impact |
461
+ | `faultray report` | Generate HTML report |
462
+ | `faultray serve` | Launch web dashboard |
463
+ | `faultray demo` | Run demo with sample infrastructure |
464
+ | `faultray feed-update` | Update scenarios from security news |
465
+ | `faultray feed-list` | Show stored feed scenarios |
466
+ | `faultray feed-sources` | Show configured news sources |
467
+ | `faultray feed-clear` | Clear feed scenario store |
468
+ | `faultray whatif` | Run what-if analysis (parameter sweep) |
469
+ | `faultray capacity` | Capacity planning with growth forecasting |
470
+
471
+ ---
472
+
473
+ ## Docker
474
+
475
+ ### Docker Compose Services
476
+
477
+ | Service | Description | Command |
478
+ |---------|-------------|---------|
479
+ | `web` | Web dashboard on port 8000 | `docker compose up web` |
480
+ | `demo` | Demo mode with sample infrastructure | `docker compose --profile demo up demo` |
481
+ | `cli` | CLI mode for running simulations | `docker compose --profile cli run cli <command>` |
482
+
483
+ ### Docker Build
484
+
485
+ ```bash
486
+ # Build
487
+ docker build -t faultray .
488
+
489
+ # Run web dashboard
490
+ docker run -p 8000:8000 faultray
491
+
492
+ # Run CLI command
493
+ docker run --rm faultray faultray simulate
494
+
495
+ # Mount custom infrastructure definition
496
+ docker run --rm -v $(pwd)/infra.yaml:/app/infra.yaml faultray faultray load /app/infra.yaml
497
+ ```
498
+
499
+ ### Docker Compose Examples
500
+
501
+ ```bash
502
+ # Start web dashboard
503
+ docker compose up web
504
+
505
+ # Run a simulation via CLI
506
+ docker compose --profile cli run cli load examples/demo-infra.yaml
507
+
508
+ # Run with Terraform state mounted
509
+ docker compose --profile cli run -v $(pwd)/terraform.tfstate:/app/terraform.tfstate \
510
+ cli tf-import --state /app/terraform.tfstate
511
+ ```
512
+
513
+ ---
514
+
515
+ ## Development
516
+
517
+ ```bash
518
+ # Install in development mode
519
+ pip install -e ".[dev]"
520
+
521
+ # Run tests (89 tests, < 1 second)
522
+ pytest tests/ -v
523
+
524
+ # Lint
525
+ ruff check src/ tests/
526
+
527
+ # Build Docker image
528
+ docker build -t faultray:dev .
529
+ ```
530
+
531
+ ### Test Coverage
532
+
533
+ | Module | Tests | Coverage |
534
+ |--------|-------|----------|
535
+ | Cascade Engine | 14 | Fault propagation, severity scoring, compound failures |
536
+ | Dynamic Engine | 14 | CLI output, severity classification, boundary values |
537
+ | Ops Engine | 9 | SLO tracking, traffic patterns, deployments |
538
+ | Capacity Engine | 8 | Forecasting, right-sizing, SLO targets |
539
+ | Scenarios | 4 | Rolling restart edge cases, scenario generation |
540
+ | Traffic | 11 | All 10 traffic patterns + determinism |
541
+ | Feeds | 11 | Analysis, scoring, store operations |
542
+ | Loader | 10 | YAML parsing, validation, circular dependency detection |
543
+ | Graph | 2 | Cascade paths, critical path limits |
544
+ | **Total** | **89** | **All passing** |
545
+
546
+ ### Requirements
547
+
548
+ - Python 3.11+
549
+ - Dependencies: typer, rich, pydantic, networkx, psutil, fastapi, uvicorn, jinja2, httpx, pyyaml
550
+
551
+ ---
552
+
553
+ ## Changelog
554
+
555
+ ### v5.14 (2026-03-14)
556
+ - 3-Layer Availability Limit Model: mathematical proof of system availability ceiling
557
+ - Layer 1 (Software 4.00 nines), Layer 2 (Hardware 5.91 nines), Layer 3 (Theoretical 6.65 nines)
558
+ - README overhauled to commercial/OSS quality with bilingual EN/JP support
559
+
560
+ ### v5.13 (2026-03-14)
561
+ - Docker Compose multi-service configuration (web, demo, cli profiles)
562
+ - Volume mounts for persistent feed data and report output
563
+
564
+ ### v5.12 (2026-03-14)
565
+ - Dockerfile with Python 3.11-slim base
566
+ - Container-ready web dashboard deployment
567
+
568
+ ### v5.11 (2026-03-14)
569
+ - Competitive positioning against Gremlin, Steadybit, AWS FIS
570
+ - Feature matrix documentation
571
+
572
+ ### v5.10 (2026-03-14)
573
+ - Architecture diagram updated with all 5 engines and 3-Layer Limits
574
+ - JSON export support for simulation results
575
+
576
+ ### v5.9 (2026-03-14)
577
+ - Traffic model descriptions translated to English
578
+ - Bilingual documentation structure (EN/JP)
579
+
580
+ ### v5.8 (2026-03-14)
581
+ - Dynamic Engine label in architecture (was "Ops Engine" duplicate)
582
+ - CLI command table aligned with all registered subcommands
583
+
584
+ ### v5.7 (2026-03-14)
585
+ - Risk scoring formula documentation improvements
586
+ - Severity threshold boundary clarification
587
+
588
+ ### v5.6 (2026-03-14)
589
+ - Fix: Rolling restart scenario now keeps at least 1 server running
590
+ - 4 new scenario edge case tests
591
+
592
+ ### v5.5 (2026-03-14)
593
+ - Fix: Dynamic simulation results always showed 0 critical/0 warning (float vs string comparison)
594
+ - Fix: `dynamic` command passed report object instead of results list
595
+ - Fix: `--deploy-hour` validation (0-23 range)
596
+ - 14 new dynamic CLI tests
597
+
598
+ ### v5.4 (2026-03-14)
599
+ - Pydantic field_validators for input boundary defense
600
+
601
+ ### v5.3 (2026-03-13)
602
+ - Fix TypeError in dynamic CLI command
603
+
604
+ ### v5.2 (2026-03-13)
605
+ - Security hardening and robustness improvements
606
+
607
+ ### v5.1 (2026-03-13)
608
+ - Consistency fixes, test coverage, CLI validation
609
+
610
+ ### v5.0 (2026-03-13)
611
+ - README overhaul, graph fixes, CLI UX improvements
612
+
613
+ ---
614
+
615
+ ## License
616
+
617
+ MIT License - see [LICENSE](LICENSE)
618
+
619
+ ---
620
+
621
+ ---
622
+
623
+ # FaultRay — ゼロリスク・インフラ障害シミュレーション(日本語)
624
+
625
+ > **本番環境に一切触れずにインフラ障害をシミュレーション。**
626
+ > **システムの可用性上限を数学的に証明。**
627
+
628
+ ## なぜ FaultRay なのか?
629
+
630
+ 従来のカオスエンジニアリングツール(Gremlin, Steadybit, AWS FIS)は**実際のインフラに障害を注入**します。FaultRay はまったく異なるアプローチ:**純粋な数学的シミュレーション**で依存関係グラフ全体をメモリ上にモデル化し、150以上の障害シナリオを実行して、システムの理論的可用性上限を証明します。サーバーに一切触れません。
631
+
632
+ | | **Gremlin** | **Steadybit** | **AWS FIS** | **FaultRay** |
633
+ |---|---|---|---|---|
634
+ | **アプローチ** | 障害注入 | 障害注入 | 障害注入 | 数学的シミュレーション |
635
+ | **本番リスク** | 中〜高 | 中 | 中 | **ゼロ** |
636
+ | **セットアップ** | ホスト毎にエージェント | ホスト毎にエージェント | AWSのみ | **pip install のみ** |
637
+ | **シナリオ数** | 手動設定 | 手動設定 | AWSサービスのみ | **150+自動生成** |
638
+ | **可用性証明** | なし | なし | なし | **3層限界モデル** |
639
+ | **コスト** | $$$$ | $$$ | $$ | **無料 / OSS** |
640
+
641
+ ## クイックスタート
642
+
643
+ ### pip
644
+
645
+ ```bash
646
+ # インストール
647
+ pip install -e .
648
+
649
+ # デモ実行(6コンポーネントWebスタック)
650
+ faultray demo
651
+
652
+ # Web ダッシュボード付き
653
+ faultray demo --web
654
+ ```
655
+
656
+ ### Docker
657
+
658
+ ```bash
659
+ # Web ダッシュボード(http://localhost:8000)
660
+ docker compose up web
661
+
662
+ # デモモード
663
+ docker compose --profile demo up demo
664
+
665
+ # CLI モード
666
+ docker compose --profile cli run cli simulate
667
+ ```
668
+
669
+ ## 主要機能
670
+
671
+ - :shield: **ゼロリスクシミュレーション** — 完全にメモリ上で実行。エージェント不要、本番への影響ゼロ
672
+ - :chart_with_upwards_trend: **150以上のカオスシナリオ** — 30カテゴリの障害シナリオをトポロジーから自動生成
673
+ - :link: **依存関係グラフ解析** — NetworkX によるグラフモデリングと連鎖障害予測
674
+ - :triangular_ruler: **3層可用性限界証明** — システムの理論的可用性上限を数学的に証明
675
+ - :dart: **SLO/SLI 追跡** — 可用性・レイテンシ・エラー率のSLO目標に対する追跡
676
+ - :crystal_ball: **What-If 分析** — パラメータスイープによる障害耐性の感度分析
677
+ - :bar_chart: **キャパシティプランニング** — 成長予測に基づくSLO達成可否の評価
678
+ - :ocean: **10種類のトラフィックモデル** — DDoS・日次変動・フラッシュクラウド等
679
+ - :newspaper: **セキュリティフィード** — CISA, NVD等から最新脅威シナリオを自動追加
680
+ - :globe_with_meridians: **Terraform 連携** — tfstate/plan からインフラ自動インポートと変更影響分析
681
+ - :desktop_computer: **Web ダッシュボード** — D3.js インタラクティブグラフ + Grafana風ダッシュボード
682
+
683
+ ## 3層可用性限界モデル(最大の特徴)
684
+
685
+ FaultRay 独自の理論モデルです。従来のカオスツールが「何が壊れるか?」に答えるのに対し、FaultRay は **「あなたのアーキテクチャが物理的に達成できる最大可用性はいくつか?」** に答えます。
686
+
687
+ | 層 | 名称 | 上限 | 説明 |
688
+ |---|---|---|---|
689
+ | **Layer 3** | 理論限界 | 6.65 nines | 完全な冗長性+瞬時フェイルオーバーを仮定した数学的上限(到達不可) |
690
+ | **Layer 2** | ハードウェア限界 | 5.91 nines | コンポーネントMTBF × 冗長係数から算出される物理的上限 |
691
+ | **Layer 1** | ソフトウェア限界 | 4.00 nines | デプロイ失敗・設定ドリフト・ヒューマンエラーを考慮した実用上限 |
692
+
693
+ **重要な意味:** SLO目標が99.99%でもLayer 1の限界が99.95%なら、どれだけエンジニアリング努力を重ねてもアーキテクチャ変更なしにはギャップを埋められません。FaultRay は**数ヶ月の無駄な努力の前に**それを教えてくれます。
694
+
695
+ ## 5つのシミュレーションエンジン
696
+
697
+ 1. **カスケードエンジン** — 依存関係グラフを通じた障害伝搬モデリング
698
+ 2. **ダイナミックエンジン** — トラフィックパターン連動の時間ステップ型シミュレーション
699
+ 3. **Opsエンジン** — 長期間(数日〜数週間)の運用シミュレーション
700
+ 4. **What-Ifエンジン** — パラメータスイープによる感度分析
701
+ 5. **キャパシティエンジン** — 成長予測とリソース枯渇予測
702
+
703
+ ## ライセンス
704
+
705
+ MIT License - [LICENSE](LICENSE) を参照