flow-doctor 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. flow_doctor-0.1.0/LICENSE +21 -0
  2. flow_doctor-0.1.0/PKG-INFO +365 -0
  3. flow_doctor-0.1.0/README.md +300 -0
  4. flow_doctor-0.1.0/flow_doctor/__init__.py +8 -0
  5. flow_doctor-0.1.0/flow_doctor/core/__init__.py +0 -0
  6. flow_doctor-0.1.0/flow_doctor/core/client.py +723 -0
  7. flow_doctor-0.1.0/flow_doctor/core/config.py +362 -0
  8. flow_doctor-0.1.0/flow_doctor/core/dedup.py +94 -0
  9. flow_doctor-0.1.0/flow_doctor/core/handler.py +105 -0
  10. flow_doctor-0.1.0/flow_doctor/core/models.py +126 -0
  11. flow_doctor-0.1.0/flow_doctor/core/rate_limiter.py +57 -0
  12. flow_doctor-0.1.0/flow_doctor/core/scrubber.py +114 -0
  13. flow_doctor-0.1.0/flow_doctor/diagnosis/__init__.py +13 -0
  14. flow_doctor-0.1.0/flow_doctor/diagnosis/agent_provider.py +180 -0
  15. flow_doctor-0.1.0/flow_doctor/diagnosis/context.py +200 -0
  16. flow_doctor-0.1.0/flow_doctor/diagnosis/git_context.py +94 -0
  17. flow_doctor-0.1.0/flow_doctor/diagnosis/knowledge_base.py +89 -0
  18. flow_doctor-0.1.0/flow_doctor/diagnosis/provider.py +130 -0
  19. flow_doctor-0.1.0/flow_doctor/digest/__init__.py +5 -0
  20. flow_doctor-0.1.0/flow_doctor/digest/generator.py +122 -0
  21. flow_doctor-0.1.0/flow_doctor/fix/__init__.py +1 -0
  22. flow_doctor-0.1.0/flow_doctor/fix/cli.py +439 -0
  23. flow_doctor-0.1.0/flow_doctor/fix/generator.py +96 -0
  24. flow_doctor-0.1.0/flow_doctor/fix/pr_creator.py +188 -0
  25. flow_doctor-0.1.0/flow_doctor/fix/prompts.py +85 -0
  26. flow_doctor-0.1.0/flow_doctor/fix/replay_store.py +41 -0
  27. flow_doctor-0.1.0/flow_doctor/fix/scope_guard.py +56 -0
  28. flow_doctor-0.1.0/flow_doctor/fix/validator.py +39 -0
  29. flow_doctor-0.1.0/flow_doctor/notify/__init__.py +0 -0
  30. flow_doctor-0.1.0/flow_doctor/notify/base.py +29 -0
  31. flow_doctor-0.1.0/flow_doctor/notify/email.py +114 -0
  32. flow_doctor-0.1.0/flow_doctor/notify/github.py +196 -0
  33. flow_doctor-0.1.0/flow_doctor/notify/slack.py +94 -0
  34. flow_doctor-0.1.0/flow_doctor/remediation/__init__.py +0 -0
  35. flow_doctor-0.1.0/flow_doctor/remediation/decision_gate.py +229 -0
  36. flow_doctor-0.1.0/flow_doctor/remediation/executor.py +279 -0
  37. flow_doctor-0.1.0/flow_doctor/remediation/playbook.py +100 -0
  38. flow_doctor-0.1.0/flow_doctor/storage/__init__.py +0 -0
  39. flow_doctor-0.1.0/flow_doctor/storage/base.py +89 -0
  40. flow_doctor-0.1.0/flow_doctor/storage/sqlite.py +540 -0
  41. flow_doctor-0.1.0/flow_doctor.egg-info/PKG-INFO +365 -0
  42. flow_doctor-0.1.0/flow_doctor.egg-info/SOURCES.txt +70 -0
  43. flow_doctor-0.1.0/flow_doctor.egg-info/dependency_links.txt +1 -0
  44. flow_doctor-0.1.0/flow_doctor.egg-info/entry_points.txt +2 -0
  45. flow_doctor-0.1.0/flow_doctor.egg-info/requires.txt +24 -0
  46. flow_doctor-0.1.0/flow_doctor.egg-info/top_level.txt +1 -0
  47. flow_doctor-0.1.0/pyproject.toml +55 -0
  48. flow_doctor-0.1.0/setup.cfg +4 -0
  49. flow_doctor-0.1.0/tests/test_config.py +125 -0
  50. flow_doctor-0.1.0/tests/test_context_assembler.py +132 -0
  51. flow_doctor-0.1.0/tests/test_coverage_gaps.py +314 -0
  52. flow_doctor-0.1.0/tests/test_dedup.py +116 -0
  53. flow_doctor-0.1.0/tests/test_diagnosis_provider.py +165 -0
  54. flow_doctor-0.1.0/tests/test_digest.py +169 -0
  55. flow_doctor-0.1.0/tests/test_fix_cli.py +197 -0
  56. flow_doctor-0.1.0/tests/test_fix_generator.py +155 -0
  57. flow_doctor-0.1.0/tests/test_git_context.py +71 -0
  58. flow_doctor-0.1.0/tests/test_github_notifier.py +122 -0
  59. flow_doctor-0.1.0/tests/test_handler.py +239 -0
  60. flow_doctor-0.1.0/tests/test_knowledge_base.py +148 -0
  61. flow_doctor-0.1.0/tests/test_models.py +81 -0
  62. flow_doctor-0.1.0/tests/test_notifications.py +114 -0
  63. flow_doctor-0.1.0/tests/test_phase2_integration.py +251 -0
  64. flow_doctor-0.1.0/tests/test_pr_creator.py +113 -0
  65. flow_doctor-0.1.0/tests/test_rate_limiter.py +104 -0
  66. flow_doctor-0.1.0/tests/test_remediation_pipeline.py +618 -0
  67. flow_doctor-0.1.0/tests/test_replay_store.py +67 -0
  68. flow_doctor-0.1.0/tests/test_reporter.py +272 -0
  69. flow_doctor-0.1.0/tests/test_scope_guard.py +62 -0
  70. flow_doctor-0.1.0/tests/test_scrubber.py +81 -0
  71. flow_doctor-0.1.0/tests/test_storage.py +147 -0
  72. flow_doctor-0.1.0/tests/test_validator.py +78 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Brian McMahon
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,365 @@
1
+ Metadata-Version: 2.4
2
+ Name: flow-doctor
3
+ Version: 0.1.0
4
+ Summary: Pipeline error handler: capture, deduplicate, diagnose, and auto-fix failures.
5
+ Author: Brian McMahon
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Brian McMahon
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/cipher813/flow-doctor
29
+ Project-URL: Repository, https://github.com/cipher813/flow-doctor
30
+ Project-URL: Issues, https://github.com/cipher813/flow-doctor/issues
31
+ Keywords: error-handling,pipeline,monitoring,diagnosis,auto-fix,llm
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Programming Language :: Python :: 3.9
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Programming Language :: Python :: 3.13
41
+ Classifier: Topic :: Software Development :: Libraries
42
+ Classifier: Topic :: System :: Monitoring
43
+ Requires-Python: >=3.9
44
+ Description-Content-Type: text/markdown
45
+ License-File: LICENSE
46
+ Requires-Dist: pyyaml>=6.0
47
+ Provides-Extra: slack
48
+ Requires-Dist: requests>=2.28; extra == "slack"
49
+ Provides-Extra: diagnosis
50
+ Requires-Dist: anthropic>=0.40; extra == "diagnosis"
51
+ Provides-Extra: remediation
52
+ Requires-Dist: boto3>=1.26; extra == "remediation"
53
+ Provides-Extra: agent
54
+ Requires-Dist: claude-agent-sdk>=0.1; extra == "agent"
55
+ Requires-Dist: anthropic>=0.40; extra == "agent"
56
+ Provides-Extra: all
57
+ Requires-Dist: requests>=2.28; extra == "all"
58
+ Requires-Dist: anthropic>=0.40; extra == "all"
59
+ Requires-Dist: boto3>=1.26; extra == "all"
60
+ Provides-Extra: dev
61
+ Requires-Dist: pytest>=7.0; extra == "dev"
62
+ Requires-Dist: requests>=2.28; extra == "dev"
63
+ Requires-Dist: anthropic>=0.40; extra == "dev"
64
+ Dynamic: license-file
65
+
66
+ # Flow Doctor
67
+
68
+ [![PyPI version](https://img.shields.io/pypi/v/flow-doctor.svg)](https://pypi.org/project/flow-doctor/)
69
+ [![Python](https://img.shields.io/pypi/pyversions/flow-doctor.svg)](https://pypi.org/project/flow-doctor/)
70
+ [![Tests](https://img.shields.io/github/actions/workflow/status/cipher813/flow-doctor/ci.yml?label=tests)](https://github.com/cipher813/flow-doctor/actions)
71
+ [![Coverage](https://img.shields.io/badge/coverage-81%25-brightgreen)](https://github.com/cipher813/flow-doctor)
72
+ [![License](https://img.shields.io/github/license/cipher813/flow-doctor)](LICENSE)
73
+
74
+ Pipeline error handler for Python. Captures exceptions, diagnoses root causes with LLMs, files GitHub issues, and generates fix PRs.
75
+
76
+ ```python
77
+ import flow_doctor
78
+
79
+ fd = flow_doctor.init(config_path="flow-doctor.yaml")
80
+ handler = flow_doctor.FlowDoctorHandler(fd, level=logging.WARNING)
81
+ logging.getLogger().addHandler(handler)
82
+
83
+ # Every WARNING+ log is now captured, deduplicated, diagnosed, and routed.
84
+ ```
85
+
86
+ ## How It Works
87
+
88
+ ```
89
+ Exception → Capture → Dedup → Diagnose (LLM) → GitHub Issue → Fix PR
90
+ ```
91
+
92
+ 1. **Capture** — exception, traceback, logs, and runtime context
93
+ 2. **Dedup** — same error signature within cooldown window is suppressed
94
+ 3. **Cascade** — if a declared upstream dependency also failed, tag it and skip diagnosis
95
+ 4. **Diagnose** — check the knowledge base (free), then call Claude if rate limit allows
96
+ 5. **Notify** — file a GitHub issue, send Slack/email (rate-limited with daily digest fallback)
97
+ 6. **Fix** — human adds `flow-doctor:fix` label on the issue, triggering automated fix PR generation
98
+
99
+ ## Installation
100
+
101
+ ```bash
102
+ pip install flow-doctor # core only
103
+ pip install "flow-doctor[diagnosis]" # + LLM diagnosis (anthropic SDK)
104
+ pip install "flow-doctor[diagnosis,remediation]" # + auto-remediation (boto3 for SSM/Step Functions)
105
+ pip install "flow-doctor[all]" # everything
106
+ ```
107
+
108
+ ## Quick Start
109
+
110
+ ### Option 1: Logging handler (recommended)
111
+
112
+ Attach to Python's logging system. Zero changes at call sites — any `WARNING+` log triggers the full pipeline.
113
+
114
+ ```python
115
+ import logging
116
+ import flow_doctor
117
+
118
+ fd = flow_doctor.init(config_path="flow-doctor.yaml")
119
+ handler = flow_doctor.FlowDoctorHandler(fd, level=logging.WARNING)
120
+ logging.getLogger().addHandler(handler)
121
+
122
+ # These now trigger dedup, diagnosis, and notifications automatically:
123
+ logger.warning("Upstream data is 48h stale")
124
+ logger.error("S3 backup failed: AccessDenied")
125
+ logger.exception("Pipeline crashed")
126
+ ```
127
+
128
+ The handler is **non-blocking** — `emit()` enqueues work and returns immediately. A background thread calls `fd.report()` asynchronously.
129
+
130
+ ### Option 2: Direct reporting
131
+
132
+ ```python
133
+ fd = flow_doctor.init(config_path="flow-doctor.yaml")
134
+
135
+ try:
136
+ run_pipeline()
137
+ except Exception as e:
138
+ fd.report(e) # never crashes the caller
139
+ ```
140
+
141
+ ### Option 3: Context manager / decorator
142
+
143
+ ```python
144
+ with fd.guard():
145
+ run_pipeline() # exceptions are reported and re-raised
146
+
147
+ @fd.monitor
148
+ def handler(event, context):
149
+ run_pipeline()
150
+ ```
151
+
152
+ ### Log capture
153
+
154
+ Attach recent logs to the next error report for richer diagnosis context:
155
+
156
+ ```python
157
+ with fd.capture_logs(level=logging.INFO):
158
+ logger.info("Starting scan with 900 tickers...")
159
+ run_pipeline()
160
+ # All captured logs are attached to the next fd.report() call
161
+ ```
162
+
163
+ ## Configuration
164
+
165
+ Create a `flow-doctor.yaml` in your project root:
166
+
167
+ ```yaml
168
+ flow_name: my-pipeline
169
+ repo: owner/repo
170
+
171
+ notify:
172
+ - type: github
173
+ repo: owner/repo
174
+ - type: email
175
+ sender: alerts@example.com
176
+ recipients: oncall@example.com
177
+
178
+ store:
179
+ type: sqlite
180
+ path: flow_doctor.db
181
+
182
+ diagnosis:
183
+ enabled: true
184
+ model: claude-sonnet-4-6-20250514
185
+ api_key: ${ANTHROPIC_API_KEY}
186
+ timeout_seconds: 30
187
+ max_daily_cost_usd: 1.00
188
+
189
+ github:
190
+ token: ${GITHUB_TOKEN}
191
+ labels: [flow-doctor]
192
+
193
+ rate_limits:
194
+ max_diagnosed_per_day: 3
195
+ max_issues_per_day: 3
196
+ dedup_cooldown_minutes: 60
197
+
198
+ dependencies:
199
+ - upstream-pipeline
200
+
201
+ remediation:
202
+ enabled: true
203
+ dry_run: true
204
+ auto_remediate_min_confidence: 0.9
205
+ market_hours_lockout: false
206
+
207
+ auto_fix:
208
+ enabled: true
209
+ confidence_threshold: 0.90
210
+ test_command: "python -m pytest tests/ -x -q"
211
+ scope:
212
+ allow: ["src/", "lib/"]
213
+ deny: ["*.yaml", "*.yml"]
214
+ ```
215
+
216
+ Environment variables in `${VAR}` syntax are resolved at load time.
217
+
218
+ Inline configuration (no YAML file):
219
+
220
+ ```python
221
+ fd = flow_doctor.init(
222
+ flow_name="my-pipeline",
223
+ repo="owner/repo",
224
+ store={"type": "sqlite", "path": "flow_doctor.db"},
225
+ notify=["github:owner/repo"],
226
+ )
227
+ ```
228
+
229
+ ## Features
230
+
231
+ ### Error Capture and Dedup
232
+
233
+ - Traceback extraction with frame-based signature hashing
234
+ - Configurable cooldown window (default 60 min) — same error is captured once, not spammed
235
+ - Cascade detection tags downstream failures caused by upstream dependency outages
236
+ - Automatic secret scrubbing (AWS keys, Bearer tokens, passwords in URLs)
237
+
238
+ ### LLM Diagnosis
239
+
240
+ - Structured root cause analysis via Claude: category, confidence, affected files, remediation
241
+ - Six categories: `TRANSIENT`, `DATA`, `CODE`, `CONFIG`, `EXTERNAL`, `INFRA`
242
+ - Knowledge base caching — known patterns are matched for free before calling the LLM
243
+ - Git context assembly (recent commits, changed files) for better diagnosis accuracy
244
+ - Daily cost cap (default $1.00) and rate limiting (default 3 diagnoses/day)
245
+
246
+ ### GitHub Issues
247
+
248
+ - Auto-filed with diagnosis, traceback, and captured logs
249
+ - Machine-readable metadata embedded in HTML comments for downstream automation
250
+ - Rate-limited with graceful degradation to daily digest
251
+
252
+ ### Auto-Fix PRs
253
+
254
+ Human-in-the-loop: a human reviews the diagnosis, adds a `flow-doctor:fix` label, and a GitHub Actions workflow generates a validated fix PR.
255
+
256
+ 1. An error occurs and Flow Doctor creates a GitHub issue with structured diagnosis
257
+ 2. A human reviews the diagnosis and adds the `flow-doctor:fix` label
258
+ 3. GitHub Actions triggers `flow-doctor generate-fix`
259
+ 4. The CLI generates a diff via LLM, validates against scope rules, runs tests
260
+ 5. If tests pass, a PR is opened. If tests fail, a comment explains what went wrong.
261
+
262
+ **Safety gates** — fix generation is skipped when:
263
+ - Confidence below threshold (default 90%)
264
+ - Category is `EXTERNAL` or `INFRA` (nothing to fix in code)
265
+ - Config issue involves credentials/secrets
266
+ - Generated diff touches files outside configured scope
267
+ - Tests fail after applying the fix
268
+
269
+ ### Remediation Playbooks
270
+
271
+ Define patterns that map failure signatures to automated actions:
272
+
273
+ ```python
274
+ from flow_doctor.remediation.playbook import Playbook, PlaybookPattern, RemediationAction, RemediationType
275
+
276
+ my_playbook = Playbook(patterns=[
277
+ PlaybookPattern(
278
+ name="service_down",
279
+ description="App service not responding",
280
+ category="INFRA",
281
+ message_pattern=r"(connection refused|service unavailable)",
282
+ action=RemediationAction(
283
+ action_type=RemediationType.RESTART_SERVICE,
284
+ description="Restart the app service",
285
+ commands=["sudo systemctl restart myapp"],
286
+ ssm_target="app-server",
287
+ ),
288
+ ),
289
+ ])
290
+ ```
291
+
292
+ ### Notifications
293
+
294
+ - **GitHub issues** — primary notification with full diagnosis
295
+ - **Slack** — webhook-based alerts with severity emoji and diagnosis snippet
296
+ - **Email** — SMTP with detailed body (traceback, diagnosis, affected files)
297
+ - **Daily digest** — summarizes rate-limited/suppressed errors at end of day
298
+
299
+ ## Auto-Fix CLI
300
+
301
+ ```bash
302
+ flow-doctor generate-fix \
303
+ --issue-number 42 \
304
+ --repo owner/repo \
305
+ --token $GITHUB_TOKEN \
306
+ --config flow-doctor.yaml \
307
+ --dry-run
308
+ ```
309
+
310
+ GitHub Actions workflow (copy to your repo at `.github/workflows/flow-doctor-fix.yml`):
311
+
312
+ ```yaml
313
+ name: Flow Doctor Fix
314
+ on:
315
+ issues:
316
+ types: [labeled]
317
+ jobs:
318
+ generate-fix:
319
+ if: github.event.label.name == 'flow-doctor:fix'
320
+ runs-on: ubuntu-latest
321
+ steps:
322
+ - uses: actions/checkout@v4
323
+ - uses: actions/setup-python@v5
324
+ with:
325
+ python-version: '3.11'
326
+ - run: pip install flow-doctor[diagnosis]
327
+ - run: |
328
+ python -m flow_doctor.fix.cli generate-fix \
329
+ --issue-number ${{ github.event.issue.number }} \
330
+ --repo ${{ github.repository }} \
331
+ --token $GITHUB_TOKEN
332
+ env:
333
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
334
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
335
+ ```
336
+
337
+ ## Architecture
338
+
339
+ ```
340
+ flow_doctor/
341
+ core/ # Client, config, models, dedup, rate limiting, scrubber, logging handler
342
+ diagnosis/ # LLM provider, context assembly, knowledge base, git context
343
+ digest/ # Daily digest generator
344
+ fix/ # Auto-fix: LLM generator, scope guard, test validator, PR creator, CLI
345
+ notify/ # Slack, email, GitHub issue backends
346
+ remediation/ # Decision gate, executor, playbook patterns
347
+ storage/ # SQLite backend (thread-safe, per-thread connections)
348
+ ```
349
+
350
+ ## Development
351
+
352
+ ```bash
353
+ git clone https://github.com/cipher813/flow-doctor.git
354
+ cd flow-doctor
355
+ python -m venv .venv && source .venv/bin/activate
356
+ pip install -e ".[dev]"
357
+
358
+ python -m pytest tests/ -x -q # 212 tests
359
+ python -m pytest tests/ --cov=flow_doctor # coverage report
360
+ python examples/smoke_test.py # end-to-end smoke test
361
+ ```
362
+
363
+ ## License
364
+
365
+ [MIT](LICENSE)
@@ -0,0 +1,300 @@
1
+ # Flow Doctor
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/flow-doctor.svg)](https://pypi.org/project/flow-doctor/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/flow-doctor.svg)](https://pypi.org/project/flow-doctor/)
5
+ [![Tests](https://img.shields.io/github/actions/workflow/status/cipher813/flow-doctor/ci.yml?label=tests)](https://github.com/cipher813/flow-doctor/actions)
6
+ [![Coverage](https://img.shields.io/badge/coverage-81%25-brightgreen)](https://github.com/cipher813/flow-doctor)
7
+ [![License](https://img.shields.io/github/license/cipher813/flow-doctor)](LICENSE)
8
+
9
+ Pipeline error handler for Python. Captures exceptions, diagnoses root causes with LLMs, files GitHub issues, and generates fix PRs.
10
+
11
+ ```python
12
+ import flow_doctor
13
+
14
+ fd = flow_doctor.init(config_path="flow-doctor.yaml")
15
+ handler = flow_doctor.FlowDoctorHandler(fd, level=logging.WARNING)
16
+ logging.getLogger().addHandler(handler)
17
+
18
+ # Every WARNING+ log is now captured, deduplicated, diagnosed, and routed.
19
+ ```
20
+
21
+ ## How It Works
22
+
23
+ ```
24
+ Exception → Capture → Dedup → Diagnose (LLM) → GitHub Issue → Fix PR
25
+ ```
26
+
27
+ 1. **Capture** — exception, traceback, logs, and runtime context
28
+ 2. **Dedup** — same error signature within cooldown window is suppressed
29
+ 3. **Cascade** — if a declared upstream dependency also failed, tag it and skip diagnosis
30
+ 4. **Diagnose** — check the knowledge base (free), then call Claude if rate limit allows
31
+ 5. **Notify** — file a GitHub issue, send Slack/email (rate-limited with daily digest fallback)
32
+ 6. **Fix** — human adds `flow-doctor:fix` label on the issue, triggering automated fix PR generation
33
+
34
+ ## Installation
35
+
36
+ ```bash
37
+ pip install flow-doctor # core only
38
+ pip install "flow-doctor[diagnosis]" # + LLM diagnosis (anthropic SDK)
39
+ pip install "flow-doctor[diagnosis,remediation]" # + auto-remediation (boto3 for SSM/Step Functions)
40
+ pip install "flow-doctor[all]" # everything
41
+ ```
42
+
43
+ ## Quick Start
44
+
45
+ ### Option 1: Logging handler (recommended)
46
+
47
+ Attach to Python's logging system. Zero changes at call sites — any `WARNING+` log triggers the full pipeline.
48
+
49
+ ```python
50
+ import logging
51
+ import flow_doctor
52
+
53
+ fd = flow_doctor.init(config_path="flow-doctor.yaml")
54
+ handler = flow_doctor.FlowDoctorHandler(fd, level=logging.WARNING)
55
+ logging.getLogger().addHandler(handler)
56
+
57
+ # These now trigger dedup, diagnosis, and notifications automatically:
58
+ logger.warning("Upstream data is 48h stale")
59
+ logger.error("S3 backup failed: AccessDenied")
60
+ logger.exception("Pipeline crashed")
61
+ ```
62
+
63
+ The handler is **non-blocking** — `emit()` enqueues work and returns immediately. A background thread calls `fd.report()` asynchronously.
64
+
65
+ ### Option 2: Direct reporting
66
+
67
+ ```python
68
+ fd = flow_doctor.init(config_path="flow-doctor.yaml")
69
+
70
+ try:
71
+ run_pipeline()
72
+ except Exception as e:
73
+ fd.report(e) # never crashes the caller
74
+ ```
75
+
76
+ ### Option 3: Context manager / decorator
77
+
78
+ ```python
79
+ with fd.guard():
80
+ run_pipeline() # exceptions are reported and re-raised
81
+
82
+ @fd.monitor
83
+ def handler(event, context):
84
+ run_pipeline()
85
+ ```
86
+
87
+ ### Log capture
88
+
89
+ Attach recent logs to the next error report for richer diagnosis context:
90
+
91
+ ```python
92
+ with fd.capture_logs(level=logging.INFO):
93
+ logger.info("Starting scan with 900 tickers...")
94
+ run_pipeline()
95
+ # All captured logs are attached to the next fd.report() call
96
+ ```
97
+
98
+ ## Configuration
99
+
100
+ Create a `flow-doctor.yaml` in your project root:
101
+
102
+ ```yaml
103
+ flow_name: my-pipeline
104
+ repo: owner/repo
105
+
106
+ notify:
107
+ - type: github
108
+ repo: owner/repo
109
+ - type: email
110
+ sender: alerts@example.com
111
+ recipients: oncall@example.com
112
+
113
+ store:
114
+ type: sqlite
115
+ path: flow_doctor.db
116
+
117
+ diagnosis:
118
+ enabled: true
119
+ model: claude-sonnet-4-6-20250514
120
+ api_key: ${ANTHROPIC_API_KEY}
121
+ timeout_seconds: 30
122
+ max_daily_cost_usd: 1.00
123
+
124
+ github:
125
+ token: ${GITHUB_TOKEN}
126
+ labels: [flow-doctor]
127
+
128
+ rate_limits:
129
+ max_diagnosed_per_day: 3
130
+ max_issues_per_day: 3
131
+ dedup_cooldown_minutes: 60
132
+
133
+ dependencies:
134
+ - upstream-pipeline
135
+
136
+ remediation:
137
+ enabled: true
138
+ dry_run: true
139
+ auto_remediate_min_confidence: 0.9
140
+ market_hours_lockout: false
141
+
142
+ auto_fix:
143
+ enabled: true
144
+ confidence_threshold: 0.90
145
+ test_command: "python -m pytest tests/ -x -q"
146
+ scope:
147
+ allow: ["src/", "lib/"]
148
+ deny: ["*.yaml", "*.yml"]
149
+ ```
150
+
151
+ Environment variables in `${VAR}` syntax are resolved at load time.
152
+
153
+ Inline configuration (no YAML file):
154
+
155
+ ```python
156
+ fd = flow_doctor.init(
157
+ flow_name="my-pipeline",
158
+ repo="owner/repo",
159
+ store={"type": "sqlite", "path": "flow_doctor.db"},
160
+ notify=["github:owner/repo"],
161
+ )
162
+ ```
163
+
164
+ ## Features
165
+
166
+ ### Error Capture and Dedup
167
+
168
+ - Traceback extraction with frame-based signature hashing
169
+ - Configurable cooldown window (default 60 min) — same error is captured once, not spammed
170
+ - Cascade detection tags downstream failures caused by upstream dependency outages
171
+ - Automatic secret scrubbing (AWS keys, Bearer tokens, passwords in URLs)
172
+
173
+ ### LLM Diagnosis
174
+
175
+ - Structured root cause analysis via Claude: category, confidence, affected files, remediation
176
+ - Six categories: `TRANSIENT`, `DATA`, `CODE`, `CONFIG`, `EXTERNAL`, `INFRA`
177
+ - Knowledge base caching — known patterns are matched for free before calling the LLM
178
+ - Git context assembly (recent commits, changed files) for better diagnosis accuracy
179
+ - Daily cost cap (default $1.00) and rate limiting (default 3 diagnoses/day)
180
+
181
+ ### GitHub Issues
182
+
183
+ - Auto-filed with diagnosis, traceback, and captured logs
184
+ - Machine-readable metadata embedded in HTML comments for downstream automation
185
+ - Rate-limited with graceful degradation to daily digest
186
+
187
+ ### Auto-Fix PRs
188
+
189
+ Human-in-the-loop: a human reviews the diagnosis, adds a `flow-doctor:fix` label, and a GitHub Actions workflow generates a validated fix PR.
190
+
191
+ 1. An error occurs and Flow Doctor creates a GitHub issue with structured diagnosis
192
+ 2. A human reviews the diagnosis and adds the `flow-doctor:fix` label
193
+ 3. GitHub Actions triggers `flow-doctor generate-fix`
194
+ 4. The CLI generates a diff via LLM, validates against scope rules, runs tests
195
+ 5. If tests pass, a PR is opened. If tests fail, a comment explains what went wrong.
196
+
197
+ **Safety gates** — fix generation is skipped when:
198
+ - Confidence below threshold (default 90%)
199
+ - Category is `EXTERNAL` or `INFRA` (nothing to fix in code)
200
+ - Config issue involves credentials/secrets
201
+ - Generated diff touches files outside configured scope
202
+ - Tests fail after applying the fix
203
+
204
+ ### Remediation Playbooks
205
+
206
+ Define patterns that map failure signatures to automated actions:
207
+
208
+ ```python
209
+ from flow_doctor.remediation.playbook import Playbook, PlaybookPattern, RemediationAction, RemediationType
210
+
211
+ my_playbook = Playbook(patterns=[
212
+ PlaybookPattern(
213
+ name="service_down",
214
+ description="App service not responding",
215
+ category="INFRA",
216
+ message_pattern=r"(connection refused|service unavailable)",
217
+ action=RemediationAction(
218
+ action_type=RemediationType.RESTART_SERVICE,
219
+ description="Restart the app service",
220
+ commands=["sudo systemctl restart myapp"],
221
+ ssm_target="app-server",
222
+ ),
223
+ ),
224
+ ])
225
+ ```
226
+
227
+ ### Notifications
228
+
229
+ - **GitHub issues** — primary notification with full diagnosis
230
+ - **Slack** — webhook-based alerts with severity emoji and diagnosis snippet
231
+ - **Email** — SMTP with detailed body (traceback, diagnosis, affected files)
232
+ - **Daily digest** — summarizes rate-limited/suppressed errors at end of day
233
+
234
+ ## Auto-Fix CLI
235
+
236
+ ```bash
237
+ flow-doctor generate-fix \
238
+ --issue-number 42 \
239
+ --repo owner/repo \
240
+ --token $GITHUB_TOKEN \
241
+ --config flow-doctor.yaml \
242
+ --dry-run
243
+ ```
244
+
245
+ GitHub Actions workflow (copy to your repo at `.github/workflows/flow-doctor-fix.yml`):
246
+
247
+ ```yaml
248
+ name: Flow Doctor Fix
249
+ on:
250
+ issues:
251
+ types: [labeled]
252
+ jobs:
253
+ generate-fix:
254
+ if: github.event.label.name == 'flow-doctor:fix'
255
+ runs-on: ubuntu-latest
256
+ steps:
257
+ - uses: actions/checkout@v4
258
+ - uses: actions/setup-python@v5
259
+ with:
260
+ python-version: '3.11'
261
+ - run: pip install flow-doctor[diagnosis]
262
+ - run: |
263
+ python -m flow_doctor.fix.cli generate-fix \
264
+ --issue-number ${{ github.event.issue.number }} \
265
+ --repo ${{ github.repository }} \
266
+ --token $GITHUB_TOKEN
267
+ env:
268
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
269
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
270
+ ```
271
+
272
+ ## Architecture
273
+
274
+ ```
275
+ flow_doctor/
276
+ core/ # Client, config, models, dedup, rate limiting, scrubber, logging handler
277
+ diagnosis/ # LLM provider, context assembly, knowledge base, git context
278
+ digest/ # Daily digest generator
279
+ fix/ # Auto-fix: LLM generator, scope guard, test validator, PR creator, CLI
280
+ notify/ # Slack, email, GitHub issue backends
281
+ remediation/ # Decision gate, executor, playbook patterns
282
+ storage/ # SQLite backend (thread-safe, per-thread connections)
283
+ ```
284
+
285
+ ## Development
286
+
287
+ ```bash
288
+ git clone https://github.com/cipher813/flow-doctor.git
289
+ cd flow-doctor
290
+ python -m venv .venv && source .venv/bin/activate
291
+ pip install -e ".[dev]"
292
+
293
+ python -m pytest tests/ -x -q # 212 tests
294
+ python -m pytest tests/ --cov=flow_doctor # coverage report
295
+ python examples/smoke_test.py # end-to-end smoke test
296
+ ```
297
+
298
+ ## License
299
+
300
+ [MIT](LICENSE)
@@ -0,0 +1,8 @@
1
+ """Flow Doctor -- call-site error handler for pipeline reliability."""
2
+
3
+ from flow_doctor.core.client import FlowDoctor, init
4
+ from flow_doctor.core.handler import FlowDoctorHandler
5
+ from flow_doctor.core.models import Severity
6
+
7
+ __all__ = ["FlowDoctor", "FlowDoctorHandler", "Severity", "init"]
8
+ __version__ = "0.1.0"
File without changes