agent-failure-doctor 3.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. agent_failure_doctor-3.2.0/LICENSE +5 -0
  2. agent_failure_doctor-3.2.0/PKG-INFO +595 -0
  3. agent_failure_doctor-3.2.0/README.md +570 -0
  4. agent_failure_doctor-3.2.0/agent_failure_doctor.egg-info/PKG-INFO +595 -0
  5. agent_failure_doctor-3.2.0/agent_failure_doctor.egg-info/SOURCES.txt +190 -0
  6. agent_failure_doctor-3.2.0/agent_failure_doctor.egg-info/dependency_links.txt +1 -0
  7. agent_failure_doctor-3.2.0/agent_failure_doctor.egg-info/entry_points.txt +3 -0
  8. agent_failure_doctor-3.2.0/agent_failure_doctor.egg-info/requires.txt +3 -0
  9. agent_failure_doctor-3.2.0/agent_failure_doctor.egg-info/top_level.txt +4 -0
  10. agent_failure_doctor-3.2.0/failure_doctor/__init__.py +2 -0
  11. agent_failure_doctor-3.2.0/failure_doctor/__main__.py +8 -0
  12. agent_failure_doctor-3.2.0/failure_doctor/agent_invocation.py +243 -0
  13. agent_failure_doctor-3.2.0/failure_doctor/ai_handoff.py +432 -0
  14. agent_failure_doctor-3.2.0/failure_doctor/auto_collect.py +550 -0
  15. agent_failure_doctor-3.2.0/failure_doctor/batch.py +212 -0
  16. agent_failure_doctor-3.2.0/failure_doctor/cli.py +1202 -0
  17. agent_failure_doctor-3.2.0/failure_doctor/run_capture.py +204 -0
  18. agent_failure_doctor-3.2.0/failure_doctor/sanitize_share.py +313 -0
  19. agent_failure_doctor-3.2.0/integrations/__init__.py +2 -0
  20. agent_failure_doctor-3.2.0/integrations/_pack.py +91 -0
  21. agent_failure_doctor-3.2.0/integrations/browser_use/__init__.py +2 -0
  22. agent_failure_doctor-3.2.0/integrations/browser_use/adapter.py +84 -0
  23. agent_failure_doctor-3.2.0/integrations/cross_framework/__init__.py +5 -0
  24. agent_failure_doctor-3.2.0/integrations/cross_framework/common.py +319 -0
  25. agent_failure_doctor-3.2.0/integrations/generic_log_pack/__init__.py +2 -0
  26. agent_failure_doctor-3.2.0/integrations/generic_log_pack/adapter.py +56 -0
  27. agent_failure_doctor-3.2.0/integrations/playwright/__init__.py +2 -0
  28. agent_failure_doctor-3.2.0/integrations/playwright/collector.py +70 -0
  29. agent_failure_doctor-3.2.0/pyproject.toml +63 -0
  30. agent_failure_doctor-3.2.0/setup.cfg +4 -0
  31. agent_failure_doctor-3.2.0/tests/test_agent_frontend_invocation.py +92 -0
  32. agent_failure_doctor-3.2.0/tests/test_ai_handoff_patch_proposal.py +164 -0
  33. agent_failure_doctor-3.2.0/tests/test_applied_scenario_safety.py +92 -0
  34. agent_failure_doctor-3.2.0/tests/test_applied_scenario_validation.py +90 -0
  35. agent_failure_doctor-3.2.0/tests/test_auto_capture_run_cli.py +124 -0
  36. agent_failure_doctor-3.2.0/tests/test_auto_collector_launcher.py +19 -0
  37. agent_failure_doctor-3.2.0/tests/test_auto_collector_manifest.py +44 -0
  38. agent_failure_doctor-3.2.0/tests/test_auto_collector_pipeline.py +37 -0
  39. agent_failure_doctor-3.2.0/tests/test_auto_collector_presets.py +30 -0
  40. agent_failure_doctor-3.2.0/tests/test_auto_collector_safety.py +53 -0
  41. agent_failure_doctor-3.2.0/tests/test_auto_collector_sanitize.py +44 -0
  42. agent_failure_doctor-3.2.0/tests/test_auto_collector_scope.py +70 -0
  43. agent_failure_doctor-3.2.0/tests/test_auto_collector_validation.py +28 -0
  44. agent_failure_doctor-3.2.0/tests/test_auto_collector_watch.py +26 -0
  45. agent_failure_doctor-3.2.0/tests/test_batch_diagnosis_fleet_mode.py +92 -0
  46. agent_failure_doctor-3.2.0/tests/test_benchmark_runner.py +78 -0
  47. agent_failure_doctor-3.2.0/tests/test_benchmark_scoring.py +84 -0
  48. agent_failure_doctor-3.2.0/tests/test_browser_use_adapter.py +59 -0
  49. agent_failure_doctor-3.2.0/tests/test_classifier_evidence_scoring.py +155 -0
  50. agent_failure_doctor-3.2.0/tests/test_composite_p95_engine.py +110 -0
  51. agent_failure_doctor-3.2.0/tests/test_composite_p95_schema_and_cli.py +71 -0
  52. agent_failure_doctor-3.2.0/tests/test_composite_p95_scorecard.py +32 -0
  53. agent_failure_doctor-3.2.0/tests/test_composite_p95_strict_runner.py +34 -0
  54. agent_failure_doctor-3.2.0/tests/test_composite_plan_verify_p95.py +45 -0
  55. agent_failure_doctor-3.2.0/tests/test_composite_showcase_reports.py +40 -0
  56. agent_failure_doctor-3.2.0/tests/test_crawler_failure_coverage_matrix.py +35 -0
  57. agent_failure_doctor-3.2.0/tests/test_credibility_engineering.py +119 -0
  58. agent_failure_doctor-3.2.0/tests/test_cross_framework_common.py +64 -0
  59. agent_failure_doctor-3.2.0/tests/test_cross_framework_p95_validation.py +41 -0
  60. agent_failure_doctor-3.2.0/tests/test_cross_framework_validation_runner.py +27 -0
  61. agent_failure_doctor-3.2.0/tests/test_cypress_adapter.py +18 -0
  62. agent_failure_doctor-3.2.0/tests/test_dashboard_contains_all_p95_tracks.py +33 -0
  63. agent_failure_doctor-3.2.0/tests/test_dashboard_sections.py +46 -0
  64. agent_failure_doctor-3.2.0/tests/test_distribution_feedback_pack.py +72 -0
  65. agent_failure_doctor-3.2.0/tests/test_external_public_reference_pack.py +69 -0
  66. agent_failure_doctor-3.2.0/tests/test_external_validation_pack.py +114 -0
  67. agent_failure_doctor-3.2.0/tests/test_failure_artifact_expansion.py +798 -0
  68. agent_failure_doctor-3.2.0/tests/test_failure_artifacts.py +117 -0
  69. agent_failure_doctor-3.2.0/tests/test_failure_doctor_actionable_report.py +123 -0
  70. agent_failure_doctor-3.2.0/tests/test_failure_doctor_adapt_cli.py +69 -0
  71. agent_failure_doctor-3.2.0/tests/test_failure_doctor_cli.py +129 -0
  72. agent_failure_doctor-3.2.0/tests/test_failure_doctor_inputs.py +89 -0
  73. agent_failure_doctor-3.2.0/tests/test_failure_doctor_plan_cli.py +48 -0
  74. agent_failure_doctor-3.2.0/tests/test_failure_doctor_public_log_rules.py +81 -0
  75. agent_failure_doctor-3.2.0/tests/test_failure_doctor_real_user_input_pack.py +194 -0
  76. agent_failure_doctor-3.2.0/tests/test_failure_doctor_verify_cli.py +41 -0
  77. agent_failure_doctor-3.2.0/tests/test_fix_plan_generation.py +47 -0
  78. agent_failure_doctor-3.2.0/tests/test_generic_log_pack_adapter.py +54 -0
  79. agent_failure_doctor-3.2.0/tests/test_github_action_docs.py +34 -0
  80. agent_failure_doctor-3.2.0/tests/test_hardening_pack.py +104 -0
  81. agent_failure_doctor-3.2.0/tests/test_integration_safety.py +66 -0
  82. agent_failure_doctor-3.2.0/tests/test_knowledge_base_patterns.py +60 -0
  83. agent_failure_doctor-3.2.0/tests/test_open_source_adoption_pack.py +60 -0
  84. agent_failure_doctor-3.2.0/tests/test_open_source_entry.py +80 -0
  85. agent_failure_doctor-3.2.0/tests/test_p95_core_triage_gate.py +40 -0
  86. agent_failure_doctor-3.2.0/tests/test_p98_master_gate.py +50 -0
  87. agent_failure_doctor-3.2.0/tests/test_p98_pillar_validations.py +60 -0
  88. agent_failure_doctor-3.2.0/tests/test_p98_safety_boundary.py +48 -0
  89. agent_failure_doctor-3.2.0/tests/test_p98_scorecard.py +22 -0
  90. agent_failure_doctor-3.2.0/tests/test_p98_track_not_fake_pass.py +48 -0
  91. agent_failure_doctor-3.2.0/tests/test_playwright_collector.py +67 -0
  92. agent_failure_doctor-3.2.0/tests/test_playwright_trace_p95_validation.py +37 -0
  93. agent_failure_doctor-3.2.0/tests/test_positioning_roadmap.py +39 -0
  94. agent_failure_doctor-3.2.0/tests/test_public_failure_corpus.py +55 -0
  95. agent_failure_doctor-3.2.0/tests/test_public_release_cleanup.py +195 -0
  96. agent_failure_doctor-3.2.0/tests/test_puppeteer_adapter.py +18 -0
  97. agent_failure_doctor-3.2.0/tests/test_real_trace_fixture_integrity.py +66 -0
  98. agent_failure_doctor-3.2.0/tests/test_real_trace_semantic_validation.py +297 -0
  99. agent_failure_doctor-3.2.0/tests/test_real_trace_validation_runner.py +33 -0
  100. agent_failure_doctor-3.2.0/tests/test_regression_case_generation.py +20 -0
  101. agent_failure_doctor-3.2.0/tests/test_release_alignment_pack.py +124 -0
  102. agent_failure_doctor-3.2.0/tests/test_release_notes_presence.py +36 -0
  103. agent_failure_doctor-3.2.0/tests/test_release_trust_pack.py +55 -0
  104. agent_failure_doctor-3.2.0/tests/test_resolution_safety_boundary.py +18 -0
  105. agent_failure_doctor-3.2.0/tests/test_resolution_schemas.py +51 -0
  106. agent_failure_doctor-3.2.0/tests/test_resolution_validation_runner.py +29 -0
  107. agent_failure_doctor-3.2.0/tests/test_resolution_verifier.py +47 -0
  108. agent_failure_doctor-3.2.0/tests/test_route_mock_har_diagnosis.py +184 -0
  109. agent_failure_doctor-3.2.0/tests/test_runtime_error_classifier.py +26 -0
  110. agent_failure_doctor-3.2.0/tests/test_sanitize_share_pack.py +161 -0
  111. agent_failure_doctor-3.2.0/tests/test_sanitized_failure_pack_templates.py +90 -0
  112. agent_failure_doctor-3.2.0/tests/test_scrapy_requests_adapter.py +18 -0
  113. agent_failure_doctor-3.2.0/tests/test_selenium_adapter.py +18 -0
  114. agent_failure_doctor-3.2.0/tests/test_shadow_dom_locator_diagnosis.py +199 -0
  115. agent_failure_doctor-3.2.0/tests/test_source_ledger_real_failures.py +67 -0
  116. agent_failure_doctor-3.2.0/tests/test_spiderbuf_feedback_hardening.py +104 -0
  117. agent_failure_doctor-3.2.0/tests/test_spiderbuf_inspired_safety.py +71 -0
  118. agent_failure_doctor-3.2.0/tests/test_spiderbuf_inspired_validation.py +80 -0
  119. agent_failure_doctor-3.2.0/tests/test_storage_state_context_diagnosis.py +180 -0
  120. agent_failure_doctor-3.2.0/tests/test_trace_doctor_cli.py +239 -0
  121. agent_failure_doctor-3.2.0/tests/test_training_challenge_p95_validation.py +38 -0
  122. agent_failure_doctor-3.2.0/tests/test_v06_website_change_antibot.py +192 -0
  123. agent_failure_doctor-3.2.0/tests/test_validation_dashboard_tracks.py +19 -0
  124. agent_failure_doctor-3.2.0/tests/test_validation_hardening_v1_3.py +101 -0
  125. agent_failure_doctor-3.2.0/tests/test_validation_pack.py +86 -0
  126. agent_failure_doctor-3.2.0/tests/test_version_alignment.py +34 -0
  127. agent_failure_doctor-3.2.0/tests/test_warb_cli.py +577 -0
  128. agent_failure_doctor-3.2.0/tools/benchmark/__init__.py +1 -0
  129. agent_failure_doctor-3.2.0/tools/benchmark/run_benchmark.py +291 -0
  130. agent_failure_doctor-3.2.0/tools/benchmark/scoring.py +133 -0
  131. agent_failure_doctor-3.2.0/tools/benchmark/standard_tasks.py +117 -0
  132. agent_failure_doctor-3.2.0/tools/diagnostics/__init__.py +1 -0
  133. agent_failure_doctor-3.2.0/tools/diagnostics/diagnose_failure.py +354 -0
  134. agent_failure_doctor-3.2.0/tools/failure_artifacts/__init__.py +1 -0
  135. agent_failure_doctor-3.2.0/tools/failure_artifacts/adapters.py +978 -0
  136. agent_failure_doctor-3.2.0/tools/failure_artifacts/artifact.py +83 -0
  137. agent_failure_doctor-3.2.0/tools/failure_artifacts/candidates.py +177 -0
  138. agent_failure_doctor-3.2.0/tools/failure_artifacts/causal_policy.py +134 -0
  139. agent_failure_doctor-3.2.0/tools/failure_artifacts/classifier.py +1368 -0
  140. agent_failure_doctor-3.2.0/tools/failure_artifacts/collector.py +50 -0
  141. agent_failure_doctor-3.2.0/tools/failure_artifacts/composite.py +153 -0
  142. agent_failure_doctor-3.2.0/tools/failure_artifacts/diagnose.py +11 -0
  143. agent_failure_doctor-3.2.0/tools/failure_artifacts/doctor.py +134 -0
  144. agent_failure_doctor-3.2.0/tools/failure_artifacts/evidence_graph.py +69 -0
  145. agent_failure_doctor-3.2.0/tools/failure_artifacts/evidence_nodes.py +138 -0
  146. agent_failure_doctor-3.2.0/tools/failure_artifacts/guardrails.py +63 -0
  147. agent_failure_doctor-3.2.0/tools/failure_artifacts/issue.py +120 -0
  148. agent_failure_doctor-3.2.0/tools/failure_artifacts/packager.py +214 -0
  149. agent_failure_doctor-3.2.0/tools/failure_artifacts/regression.py +72 -0
  150. agent_failure_doctor-3.2.0/tools/failure_artifacts/regression_case.py +24 -0
  151. agent_failure_doctor-3.2.0/tools/failure_artifacts/report.py +54 -0
  152. agent_failure_doctor-3.2.0/tools/failure_artifacts/reporter.py +49 -0
  153. agent_failure_doctor-3.2.0/tools/failure_artifacts/resolution.py +486 -0
  154. agent_failure_doctor-3.2.0/tools/failure_artifacts/schema.py +17 -0
  155. agent_failure_doctor-3.2.0/tools/failure_artifacts/templates.py +59 -0
  156. agent_failure_doctor-3.2.0/tools/knowledge_base/__init__.py +2 -0
  157. agent_failure_doctor-3.2.0/tools/knowledge_base/load_patterns.py +39 -0
  158. agent_failure_doctor-3.2.0/tools/knowledge_base/search_patterns.py +38 -0
  159. agent_failure_doctor-3.2.0/tools/knowledge_base/validate_patterns.py +112 -0
  160. agent_failure_doctor-3.2.0/tools/real_trace_generation/__init__.py +2 -0
  161. agent_failure_doctor-3.2.0/tools/real_trace_generation/generate_real_trace_fixtures.py +391 -0
  162. agent_failure_doctor-3.2.0/tools/validation/__init__.py +2 -0
  163. agent_failure_doctor-3.2.0/tools/validation/p98_common.py +33 -0
  164. agent_failure_doctor-3.2.0/tools/validation/run_ai_handoff_p98_validation.py +47 -0
  165. agent_failure_doctor-3.2.0/tools/validation/run_ai_handoff_validation.py +152 -0
  166. agent_failure_doctor-3.2.0/tools/validation/run_applied_scenario_validation.py +195 -0
  167. agent_failure_doctor-3.2.0/tools/validation/run_auto_collector_validation.py +242 -0
  168. agent_failure_doctor-3.2.0/tools/validation/run_batch_diagnosis_p98_validation.py +38 -0
  169. agent_failure_doctor-3.2.0/tools/validation/run_composite_counterfactual_p98_validation.py +48 -0
  170. agent_failure_doctor-3.2.0/tools/validation/run_composite_diagnosis_p95_strict_validation.py +308 -0
  171. agent_failure_doctor-3.2.0/tools/validation/run_crawler_failure_coverage_matrix.py +84 -0
  172. agent_failure_doctor-3.2.0/tools/validation/run_cross_framework_p95_validation.py +113 -0
  173. agent_failure_doctor-3.2.0/tools/validation/run_cross_framework_p98_validation.py +54 -0
  174. agent_failure_doctor-3.2.0/tools/validation/run_cross_framework_validation.py +153 -0
  175. agent_failure_doctor-3.2.0/tools/validation/run_external_public_reference_validation.py +356 -0
  176. agent_failure_doctor-3.2.0/tools/validation/run_external_validation.py +236 -0
  177. agent_failure_doctor-3.2.0/tools/validation/run_p95_core_triage_gate.py +104 -0
  178. agent_failure_doctor-3.2.0/tools/validation/run_p98_master_gate.py +159 -0
  179. agent_failure_doctor-3.2.0/tools/validation/run_playwright_trace_p95_validation.py +85 -0
  180. agent_failure_doctor-3.2.0/tools/validation/run_playwright_trace_p98_validation.py +48 -0
  181. agent_failure_doctor-3.2.0/tools/validation/run_real_trace_validation.py +152 -0
  182. agent_failure_doctor-3.2.0/tools/validation/run_resolution_validation.py +120 -0
  183. agent_failure_doctor-3.2.0/tools/validation/run_sanitize_share_p98_validation.py +37 -0
  184. agent_failure_doctor-3.2.0/tools/validation/run_spiderbuf_inspired_validation.py +178 -0
  185. agent_failure_doctor-3.2.0/tools/validation/run_training_challenge_p98_validation.py +43 -0
  186. agent_failure_doctor-3.2.0/tools/validation/run_training_challenge_validation.py +89 -0
  187. agent_failure_doctor-3.2.0/tools/validation/run_validation_hardening.py +444 -0
  188. agent_failure_doctor-3.2.0/tools/validation/write_composite_showcase_reports.py +148 -0
  189. agent_failure_doctor-3.2.0/tools/warb.py +545 -0
  190. agent_failure_doctor-3.2.0/trace_doctor/__init__.py +2 -0
  191. agent_failure_doctor-3.2.0/trace_doctor/__main__.py +8 -0
  192. agent_failure_doctor-3.2.0/trace_doctor/cli.py +139 -0
@@ -0,0 +1,5 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 WebAgentRuntimeBench
4
+
5
+ Permission is hereby granted... (MIT text)
@@ -0,0 +1,595 @@
1
+ Metadata-Version: 2.4
2
+ Name: agent-failure-doctor
3
+ Version: 3.2.0
4
+ Summary: Local-first failure diagnosis for AI browser automation, Playwright, crawler, and RPA runs.
5
+ Author: sida lin
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/tobybgy-lsd/web-agent-runtime-bench
8
+ Project-URL: Repository, https://github.com/tobybgy-lsd/web-agent-runtime-bench
9
+ Project-URL: Issues, https://github.com/tobybgy-lsd/web-agent-runtime-bench/issues
10
+ Keywords: playwright,browser automation,ai agent,crawler,rpa,debugging,failure diagnosis
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Testing
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Provides-Extra: trace-gen
23
+ Requires-Dist: playwright>=1.45; extra == "trace-gen"
24
+ Dynamic: license-file
25
+
26
+ # Agent Failure Doctor
27
+
28
+ [中文文档](README.zh-CN.md)
29
+
30
+ ![CI](https://github.com/tobybgy-lsd/web-agent-runtime-bench/actions/workflows/benchmark.yml/badge.svg)
31
+ ![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)
32
+ ![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)
33
+
34
+ Local-first failure diagnosis lifecycle tool for AI browser automation,
35
+ Playwright, crawler, RPA, and business automation failures.
36
+
37
+ - Current milestone: Agent Failure Doctor v3.2 Auto Collector P98 Gate
38
+ - Previous stable line: Agent Failure Doctor v3.1.0 P98 Master Gate.
39
+ - Previous P95 stable line: Agent Failure Doctor v2.4.1 P95 Alignment & Missing Tracks Pack.
40
+
41
+ **Input:** trace.zip / error.log / console.txt / network.json /
42
+ screenshot metadata / user_description.txt
43
+
44
+ **Output:** diagnosis, evidence, next action, repair suggestions,
45
+ GitHub issue draft, Codex fix prompt.
46
+
47
+ ## Quickstart
48
+
49
+ ```powershell
50
+ git clone https://github.com/tobybgy-lsd/web-agent-runtime-bench.git
51
+ cd web-agent-runtime-bench
52
+ python -m pip install -e .
53
+ failure-doctor diagnose .\examples\failed_runs\proxy_network_error --out .\report
54
+ failure-doctor plan .\report --out .\fix_plan
55
+ failure-doctor collect --project . --preset auto --out .\failure_doctor_auto_report `
56
+ --auto-diagnose --auto-handoff --auto-sanitize
57
+ failure-doctor agent-bootstrap --target all --project .
58
+ ```
59
+
60
+ See [validation/dashboard.md](validation/dashboard.md),
61
+ [docs/P98_LIMITS.md](docs/P98_LIMITS.md),
62
+ [docs/AGENT_FRONTEND_INVOCATION.md](docs/AGENT_FRONTEND_INVOCATION.md),
63
+ and [docs/safety_boundary.md](docs/safety_boundary.md).
64
+
65
+ P98 master gate passed with the auto collector pillar included.
66
+
67
+ Advanced commands include `failure-doctor handoff`,
68
+ `failure-doctor agent-bootstrap`, `failure-doctor propose-patch`, and
69
+ `failure-doctor batch`.
70
+
71
+ **Core commands:** `collect` / `diagnose` / `plan` / `verify` / `run` /
72
+ `watch` / `sanitize` / `adapt` / `handoff` / `agent-bootstrap` /
73
+ `propose-patch` / `batch`
74
+
75
+ **Classic lifecycle:** `diagnose` / `plan` / `verify` / `run` /
76
+ `sanitize` / `adapt` -> `diagnose -> plan -> AI handoff / patch proposal
77
+ -> verify -> sanitize/share`
78
+
79
+ **P98 gate:** `knowledge base -> coverage matrix ->
80
+ trace/cross-framework/training/composite/handoff/batch/sanitize/auto-collector
81
+ -> master gate`
82
+
83
+ ## Distribution & Feedback
84
+
85
+ v3.2.0 is the current stable technical baseline. The next phase is distribution
86
+ and real user feedback, not more synthetic feature expansion.
87
+
88
+ - PyPI release runbook: [docs/PYPI_RELEASE.md](docs/PYPI_RELEASE.md)
89
+ - 2-minute demo script: [docs/DEMO_VIDEO_SCRIPT.md](docs/DEMO_VIDEO_SCRIPT.md)
90
+ - Technical article draft: [docs/TECH_ARTICLE_DRAFT.md](docs/TECH_ARTICLE_DRAFT.md)
91
+ - Real user feedback loop: [docs/REAL_USER_FEEDBACK_LOOP.md](docs/REAL_USER_FEEDBACK_LOOP.md)
92
+
93
+ After PyPI publication, the target install command is:
94
+
95
+ ```powershell
96
+ pip install agent-failure-doctor
97
+ ```
98
+
99
+ For non-technical Windows users, double-click
100
+ `scripts/windows/Start-FailureDoctor-Diagnosis.bat` or drag a failed project
101
+ folder onto it.
102
+
103
+ Advanced v3.2 commands include `failure-doctor collect` and `failure-doctor watch`.
104
+
105
+ Agent frontend invocation:
106
+
107
+ ```powershell
108
+ failure-doctor agent-bootstrap --target all --project .
109
+ ```
110
+
111
+ This writes `.failure-doctor/AGENT_ENTRYPOINT.md` plus Codex, Cursor,
112
+ Claude Code, VS Code/Copilot, Antigravity, OpenCode, Qoder, Trae, WorkBuddy,
113
+ OpenClaw, Hermes, and generic agent workflow instructions.
114
+
115
+ Agent Failure Doctor uses a deterministic evidence-based diagnostic engine.
116
+ It does not claim to solve arbitrary failures, but it provides explainable
117
+ classification, evidence, fix plans, and before/after verification for known
118
+ automation failure patterns.
119
+
120
+ Applied scenario demos are local-only mock workflows for commerce automation,
121
+ live monitoring, content publishing, GUI data bridge, and ERP sync failure
122
+ diagnosis.
123
+
124
+ Spiderbuf-inspired challenge demos are local-only mock failure packs inspired
125
+ by public crawler-training challenge categories; they validate diagnosis and
126
+ safe next actions without accessing spiderbuf.cn or publishing private solution
127
+ logic.
128
+
129
+ **Integration commands:** `failure-doctor collect-playwright` / `failure-doctor pack-logs` / `failure-doctor adapt`
130
+
131
+ ## What You Get
132
+
133
+ ```text
134
+ report/
135
+ |-- diagnosis.json
136
+ |-- diagnosis.md
137
+ |-- evidence.json
138
+ |-- input_summary.json
139
+ |-- issue_draft.md
140
+ |-- repair_suggestions.md
141
+ |-- codex_fix_prompt.md
142
+ `-- failure_doctor_report.zip
143
+ ```
144
+
145
+ Agent Failure Doctor turns sanitized automation failure materials into a report
146
+ that explains what likely failed, what evidence supports the diagnosis, what
147
+ evidence is missing, and what to ask Codex or another coding assistant to
148
+ change next.
149
+
150
+ ## One-Minute Start
151
+
152
+ Auto Capture:
153
+
154
+ ```powershell
155
+ failure-doctor run -- python crawler.py
156
+ failure-doctor run -- pytest tests/test_listing.py
157
+ failure-doctor run -- playwright test
158
+ ```
159
+
160
+ This writes a local run folder under `.failure-doctor/runs/<run_id>/`:
161
+
162
+ ```text
163
+ .failure-doctor/runs/<run_id>/
164
+ |-- command.txt
165
+ |-- exit_code.txt
166
+ |-- stdout.log
167
+ |-- stderr.log
168
+ |-- environment.json
169
+ |-- detected_artifacts.json
170
+ |-- input_summary.json
171
+ |-- diagnosis/
172
+ |-- fix_plan/
173
+ |-- verification_hint.md
174
+ `-- shareable_failure_pack.zip
175
+ ```
176
+
177
+ The generated `safe_to_share.json` defaults to `safe_to_share=false`; review and sanitize before sending a pack to anyone else.
178
+
179
+ Sanitize & Share Pack:
180
+
181
+ Sanitize a failed run before sharing it:
182
+
183
+ ```powershell
184
+ failure-doctor sanitize .\.failure-doctor\runs\<run_id> --out .\shareable_failure_pack
185
+ ```
186
+
187
+ This writes redacted logs, redacted network summaries, trace metadata only, a
188
+ redaction report, a review gate, and `shareable_failure_pack.zip`.
189
+
190
+ Raw `trace.zip` archives are not copied into the sanitized pack.
191
+
192
+ Put a failed run in a folder:
193
+
194
+ ```text
195
+ my_failed_run/
196
+ |-- error.log
197
+ |-- console.txt
198
+ |-- network.json
199
+ |-- README.txt
200
+ `-- screenshot.png
201
+ ```
202
+
203
+ Then run:
204
+
205
+ ```powershell
206
+ failure-doctor diagnose .\my_failed_run --out .\report
207
+ ```
208
+
209
+ The tool inventories inputs and uses this evidence priority:
210
+
211
+ ```text
212
+ trace.zip > log > network.json > user description > screenshot metadata
213
+ ```
214
+
215
+ When evidence is too thin, it should downgrade to `insufficient_evidence` instead of guessing.
216
+
217
+ ## Minimal Demos
218
+
219
+ Proxy/network failure:
220
+
221
+ ```powershell
222
+ failure-doctor diagnose .\examples\failed_runs\proxy_failed --out .\report_proxy
223
+ ```
224
+
225
+ Strict mode locator conflict:
226
+
227
+ ```powershell
228
+ failure-doctor diagnose .\examples\failed_runs\strict_mode_locator --out .\report_locator
229
+ ```
230
+
231
+ Low-evidence screenshot-only run:
232
+
233
+ ```powershell
234
+ failure-doctor diagnose .\examples\failed_runs\low_evidence_screenshot_only --out .\report_low_evidence
235
+ ```
236
+
237
+ Native Playwright trace fixture:
238
+
239
+ ```powershell
240
+ trace-doctor diagnose .\examples\realistic_playwright_traces\02_login_redirect_302\trace.zip --out .\report_login_trace
241
+ ```
242
+
243
+ ## Before / After Report
244
+
245
+ Report structure: conclusion / evidence / why / next action / Codex fix prompt
246
+
247
+ Before:
248
+
249
+ ```text
250
+ page.goto: net::ERR_PROXY_CONNECTION_FAILED while opening https://example.test
251
+ ```
252
+
253
+ After:
254
+
255
+ ```text
256
+ Conclusion: network/proxy setup failed before the page loaded.
257
+ Evidence: Playwright reported net::ERR_PROXY_CONNECTION_FAILED.
258
+ Next action: check proxy settings, DNS, VPN, and CI network configuration.
259
+ Codex fix prompt: add trace/log capture and make proxy configuration explicit.
260
+ ```
261
+
262
+ ## Verify a Fix
263
+
264
+ ```powershell
265
+ failure-doctor diagnose .\failed_run --out .\report
266
+ failure-doctor plan .\report --out .\fix_plan
267
+ failure-doctor verify --before .\failed_run --after .\rerun_after_fix --out .\verification_report
268
+ ```
269
+
270
+ `verify` compares before/after evidence and reports whether the original failure
271
+ is resolved, unchanged, changed into another failure, or insufficiently
272
+ evidenced.
273
+
274
+ ## AI Handoff & Patch Proposal
275
+
276
+ Turn a report into task packs that Codex, Claude Code, or Cursor can execute:
277
+
278
+ ```powershell
279
+ failure-doctor handoff .\report --target codex --out .\ai_handoff
280
+ failure-doctor handoff .\report --target claude_code --out .\ai_handoff
281
+ failure-doctor handoff .\report --target cursor --out .\ai_handoff
282
+ ```
283
+
284
+ This writes:
285
+
286
+ ```text
287
+ ai_handoff/
288
+ |-- ai_handoff.json
289
+ |-- ai_handoff.md
290
+ |-- codex_task.md
291
+ |-- claude_code_task.md
292
+ |-- cursor_task.md
293
+ |-- affected_files.json
294
+ |-- validation_commands.md
295
+ |-- forbidden_actions.md
296
+ |-- token_budget_report.json
297
+ `-- ai_handoff_pack.zip
298
+ ```
299
+
300
+ Generate a dry-run patch proposal without modifying source code:
301
+
302
+ ```powershell
303
+ failure-doctor propose-patch --repo . --report .\report --out .\patch_plan
304
+ ```
305
+
306
+ This writes:
307
+
308
+ ```text
309
+ patch_plan/
310
+ |-- patch_proposal.md
311
+ |-- proposed_changes.json
312
+ |-- affected_files.json
313
+ |-- validation_commands.md
314
+ `-- patch_risk_assessment.json
315
+ ```
316
+
317
+ `propose-patch` is intentionally proposal-only. It does not edit files, apply patches, run tests, or open pull requests.
318
+
319
+ v2.5 validation writes `validation/ai_handoff_validation.json`:
320
+
321
+ ```text
322
+ 20/20 Codex task files generated
323
+ 20/20 Claude Code task files generated
324
+ 20/20 Cursor task files generated
325
+ 18/20 patch proposals generated
326
+ 20/20 required sections present
327
+ 20/20 concise token budget checks pass
328
+ 0 forbidden outputs
329
+ ```
330
+
331
+ ## Batch Diagnosis / Fleet Mode
332
+
333
+ Diagnose many failed runs and get a fleet-level summary:
334
+
335
+ ```powershell
336
+ failure-doctor batch .\runs --out .\batch_report
337
+ ```
338
+
339
+ Input:
340
+
341
+ ```text
342
+ runs/
343
+ |-- run_001/
344
+ |-- run_002/
345
+ |-- run_003/
346
+ `-- ...
347
+ ```
348
+
349
+ Output:
350
+
351
+ ```text
352
+ batch_report/
353
+ |-- summary.json
354
+ |-- summary.md
355
+ |-- failures_by_type.csv
356
+ |-- top_root_causes.md
357
+ |-- repeated_failures.md
358
+ |-- suggested_regression_cases.md
359
+ |-- repair_priority.md
360
+ `-- reports/
361
+ ```
362
+
363
+ Fleet mode answers which failures repeat, which root causes dominate, which runs
364
+ should become regression cases, and which fixes deserve priority.
365
+
366
+ ## P98 Controlled Maturity
367
+
368
+ v3.0 starts the P98 controlled maturity track. This is not an ecosystem score;
369
+ it does not count stars, external PRs, external issues, PyPI downloads, or
370
+ long-term community adoption.
371
+
372
+ Current P98 assets:
373
+
374
+ - [docs/P98_CONTROLLED_MATURITY_SCORECARD.md](docs/P98_CONTROLLED_MATURITY_SCORECARD.md)
375
+ - [knowledge_base/](knowledge_base/)
376
+ - [docs/CRAWLER_FAILURE_COVERAGE_MATRIX.md](docs/CRAWLER_FAILURE_COVERAGE_MATRIX.md)
377
+ - [validation/crawler_failure_coverage_matrix.json](validation/crawler_failure_coverage_matrix.json)
378
+
379
+ Knowledge-base commands:
380
+
381
+ ```powershell
382
+ python -m tools.knowledge_base.validate_patterns
383
+ python -m tools.knowledge_base.search_patterns --query selector_drift
384
+ python -m tools.validation.run_crawler_failure_coverage_matrix
385
+ ```
386
+
387
+ ## Applied Scenario Demos
388
+
389
+ Local-only mock demos show how Agent Failure Doctor can diagnose failures in:
390
+
391
+ - hot product collection
392
+ - live commerce monitoring
393
+ - ecommerce listing automation
394
+ - authorized content publishing workflow
395
+ - GUI / RPA data bridge
396
+ - ERP-to-ecommerce sync
397
+
398
+ Run:
399
+
400
+ ```powershell
401
+ python -m tools.validation.run_applied_scenario_validation
402
+ ```
403
+
404
+ ## Spiderbuf-Inspired Challenge Demos
405
+
406
+ `examples/spiderbuf_inspired_challenges/` contains local-only mock failure packs inspired by public crawler-training challenge categories:
407
+
408
+ - cookie/session required
409
+ - iframe extraction
410
+ - Ajax dynamic loading
411
+ - random CSS selector drift
412
+ - infinite scroll missing items
413
+ - rate limit 429
414
+ - API signature required
415
+ - browser fingerprint risk
416
+ - Selenium detection risk
417
+ - challenge page detected
418
+
419
+ These cases are diagnosis-only. They do not access spiderbuf.cn, do not include
420
+ private solutions, and do not include access-control defeat steps.
421
+
422
+ ```powershell
423
+ python -m tools.validation.run_spiderbuf_inspired_validation
424
+ ```
425
+
426
+ ## Integrations
427
+
428
+ Collect Playwright test-results into a failure pack:
429
+
430
+ ```powershell
431
+ failure-doctor collect-playwright .\examples\mock_playwright_test_results --out .\tmp_failure_pack
432
+ failure-doctor diagnose .\tmp_failure_pack --out .\tmp_collected_report
433
+ ```
434
+
435
+ Normalize a loose log folder:
436
+
437
+ ```powershell
438
+ failure-doctor pack-logs .\examples\mock_raw_logs --out .\tmp_log_pack
439
+ failure-doctor diagnose .\tmp_log_pack --out .\tmp_log_report
440
+ ```
441
+
442
+ Normalize a Selenium, Puppeteer, Cypress, Scrapy, requests, or httpx failure log:
443
+
444
+ ```powershell
445
+ failure-doctor adapt .\examples\cross_framework_fixtures\selenium\no_such_element\raw --framework selenium --out .\tmp_selenium_pack
446
+ failure-doctor diagnose .\tmp_selenium_pack --out .\tmp_selenium_report
447
+ failure-doctor plan .\tmp_selenium_report --out .\tmp_selenium_fix_plan
448
+ ```
449
+
450
+ Supported adapter frameworks:
451
+
452
+ ```text
453
+ selenium | puppeteer | cypress | scrapy | requests | httpx | auto
454
+ ```
455
+
456
+ Playwright remains the deepest native trace backend. Cross-framework adapters
457
+ normalize local logs and metadata into the same failure lifecycle; they do not
458
+ run those frameworks or connect to external platforms.
459
+
460
+ See [docs/INTEGRATIONS.md](docs/INTEGRATIONS.md) and [docs/GITHUB_ACTION_USAGE.md](docs/GITHUB_ACTION_USAGE.md).
461
+
462
+ ## Validation Status
463
+
464
+ Current milestone: Agent Failure Doctor v3.2 Auto Collector P98 Gate.
465
+
466
+ Previous stable line: Agent Failure Doctor v2.4.1 P95 Alignment & Missing Tracks Pack.
467
+
468
+ - 131 source-ledger records with separated `real_public_issue`, `official_doc_pattern`, and `public_inspired_sanitized` labels
469
+ - 50 traceable real public issue records
470
+ - 100 Playwright Trace Doctor P95 fixtures
471
+ - 100/100 Playwright trace reasonable classifications
472
+ - 100/100 Playwright trace exact subtype matches
473
+ - 62 external public reference seeds
474
+ - 20 external public reference held-out records
475
+ - 20/20 external public reference reasonable classifications
476
+ - 20/20 external public reference actionable next actions
477
+ - 12 resolution validation cases
478
+ - 12/12 resolution statuses correct
479
+ - 18 applied scenario validation cases
480
+ - 18/18 applied scenario reasonable classifications
481
+ - 18/18 applied scenario valid fix plans
482
+ - 18/18 applied scenario verification statuses correct
483
+ - Playwright collector, generic log packer, browser-use adapter, and GitHub Actions usage docs
484
+ - v2.0 Auto Capture command wrapper: `failure-doctor run -- <command>`
485
+ - Sanitize & Share command: `failure-doctor sanitize <failed_run> --out <shareable_failure_pack>`
486
+ - Cross-framework adapter command: `failure-doctor adapt <input> --framework <framework> --out <failure_pack>`
487
+ - 100 cross-framework P95 fixtures across Selenium, Puppeteer, Cypress, Scrapy, requests, httpx, browser-use, and generic RPA
488
+ - 100/100 cross-framework P95 reasonable classifications
489
+ - 100/100 cross-framework P95 valid fix plans
490
+ - 0 forbidden outputs in cross-framework P95 validation
491
+ - 40 training challenge P95 local-only validation cases
492
+ - 40/40 training challenge reasonable classifications
493
+ - 40/40 training challenge valid fix plans
494
+ - 40/40 training challenge verification statuses correct
495
+ - 0 forbidden outputs and 0 private solution leaks in training challenge validation
496
+ - 160 composite P95 strict local-only validation cases
497
+ - 160/160 composite primary classifications correct
498
+ - 160/160 composite repair-order checks correct
499
+ - 160/160 composite evidence graphs generated
500
+ - 0 forbidden outputs in composite P95 strict validation
501
+ - P95 Core Triad Gate: pass
502
+ - 3 composite showcase reports under `sample_reports/composite_showcase/`
503
+ - 10 external held-out public-source records
504
+ - 9/10 external held-out reasonable classifications
505
+ - 10/10 external held-out actionable next actions
506
+ - 0 forbidden outputs in generated reports/prompts
507
+ - GitHub Actions green across Ubuntu, macOS, Windows, plus Windows benchmark/smoke/safety
508
+
509
+ See [docs/VALIDATION_REPORT.md](docs/VALIDATION_REPORT.md),
510
+ [docs/EXTERNAL_DATA_SOURCES.md](docs/EXTERNAL_DATA_SOURCES.md), and
511
+ [validation/dashboard.md](validation/dashboard.md) for validation metrics,
512
+ limits, and boundaries.
513
+
514
+ ## Reproduce Validation
515
+
516
+ ```powershell
517
+ python -m tools.real_trace_generation.generate_real_trace_fixtures `
518
+ --out .\examples\realistic_playwright_traces `
519
+ --count 30 `
520
+ --clean
521
+ python -m tools.validation.run_real_trace_validation
522
+ python -m tools.validation.run_playwright_trace_p95_validation
523
+ python -m tools.validation.run_external_public_reference_validation
524
+ python -m tools.validation.run_resolution_validation
525
+ python -m tools.validation.run_spiderbuf_inspired_validation
526
+ python -m tools.validation.run_training_challenge_validation
527
+ python -m tools.validation.run_cross_framework_p95_validation
528
+ python -m tools.validation.run_composite_diagnosis_p95_strict_validation
529
+ python -m tools.validation.run_p95_core_triage_gate
530
+ python scripts\validate_external_heldout.py
531
+ ```
532
+
533
+ ## Safety Boundary
534
+
535
+ This project is for local, sanitized failure diagnosis.
536
+
537
+ It is not:
538
+
539
+ - a challenge-solving tool
540
+ - an access-control circumvention tool
541
+ - a credential extractor
542
+ - a real-platform scraper
543
+ - a tool for unauthorized collection
544
+
545
+ For suspected platform risk cases, the intended output is identification,
546
+ routing, and compliance-oriented next steps such as reducing request volume,
547
+ using an official API, confirming authorization, contacting the platform, or
548
+ stopping unauthorized collection.
549
+
550
+ ## Contributing Failure Cases
551
+
552
+ You do not need to write code. The most useful contribution is a sanitized
553
+ failure case: log snippets, trace metadata, network summaries, screenshot
554
+ metadata, and a short description of what happened.
555
+
556
+ Open an [External failure case issue](.github/ISSUE_TEMPLATE/external_failure_case.yml) and remove secrets before posting:
557
+
558
+ - passwords
559
+ - API keys
560
+ - cookies
561
+ - tokens
562
+ - authorization headers
563
+ - private screenshots
564
+ - private data
565
+ - personal data
566
+
567
+ Accepted input types include sanitized `error.log`, `trace.zip`, `console.txt`,
568
+ `network.json`, screenshot metadata, and `user_description.txt`.
569
+
570
+ If you allow it, a sanitized case may be assigned an `EXT-YYYY-NNNN` id, run
571
+ once with the current released version before rule changes, and added to the
572
+ external validation dashboard.
573
+
574
+ Templates and author-generated examples are not counted as external cases.
575
+
576
+ See [CONTRIBUTING.md](CONTRIBUTING.md),
577
+ [docs/external_validation_protocol.md](docs/external_validation_protocol.md),
578
+ [docs/REAL_TRACE_CONTRIBUTION_GUIDE.md](docs/REAL_TRACE_CONTRIBUTION_GUIDE.md),
579
+ and [docs/REAL_DATA_SOURCES.md](docs/REAL_DATA_SOURCES.md).
580
+
581
+ ## Commands
582
+
583
+ Run all tests:
584
+
585
+ ```powershell
586
+ python -m unittest discover -s tests -p "test_*.py"
587
+ ```
588
+
589
+ Run smoke and safety checks:
590
+
591
+ ```powershell
592
+ scripts\smoke_test.ps1
593
+ scripts\local_safety_scan.ps1
594
+ ```
595
+