empathy-framework 5.3.0__py3-none-any.whl → 5.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. empathy_framework-5.4.0.dist-info/METADATA +47 -0
  2. empathy_framework-5.4.0.dist-info/RECORD +8 -0
  3. {empathy_framework-5.3.0.dist-info → empathy_framework-5.4.0.dist-info}/top_level.txt +0 -1
  4. empathy_healthcare_plugin/__init__.py +12 -11
  5. empathy_llm_toolkit/__init__.py +12 -26
  6. empathy_os/__init__.py +12 -356
  7. empathy_software_plugin/__init__.py +12 -11
  8. empathy_framework-5.3.0.dist-info/METADATA +0 -1026
  9. empathy_framework-5.3.0.dist-info/RECORD +0 -456
  10. empathy_framework-5.3.0.dist-info/entry_points.txt +0 -26
  11. empathy_framework-5.3.0.dist-info/licenses/LICENSE +0 -201
  12. empathy_framework-5.3.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +0 -101
  13. empathy_healthcare_plugin/monitors/__init__.py +0 -9
  14. empathy_healthcare_plugin/monitors/clinical_protocol_monitor.py +0 -315
  15. empathy_healthcare_plugin/monitors/monitoring/__init__.py +0 -44
  16. empathy_healthcare_plugin/monitors/monitoring/protocol_checker.py +0 -300
  17. empathy_healthcare_plugin/monitors/monitoring/protocol_loader.py +0 -214
  18. empathy_healthcare_plugin/monitors/monitoring/sensor_parsers.py +0 -306
  19. empathy_healthcare_plugin/monitors/monitoring/trajectory_analyzer.py +0 -389
  20. empathy_healthcare_plugin/protocols/cardiac.json +0 -93
  21. empathy_healthcare_plugin/protocols/post_operative.json +0 -92
  22. empathy_healthcare_plugin/protocols/respiratory.json +0 -92
  23. empathy_healthcare_plugin/protocols/sepsis.json +0 -141
  24. empathy_llm_toolkit/README.md +0 -553
  25. empathy_llm_toolkit/agent_factory/__init__.py +0 -53
  26. empathy_llm_toolkit/agent_factory/adapters/__init__.py +0 -85
  27. empathy_llm_toolkit/agent_factory/adapters/autogen_adapter.py +0 -312
  28. empathy_llm_toolkit/agent_factory/adapters/crewai_adapter.py +0 -483
  29. empathy_llm_toolkit/agent_factory/adapters/haystack_adapter.py +0 -298
  30. empathy_llm_toolkit/agent_factory/adapters/langchain_adapter.py +0 -362
  31. empathy_llm_toolkit/agent_factory/adapters/langgraph_adapter.py +0 -333
  32. empathy_llm_toolkit/agent_factory/adapters/native.py +0 -228
  33. empathy_llm_toolkit/agent_factory/adapters/wizard_adapter.py +0 -423
  34. empathy_llm_toolkit/agent_factory/base.py +0 -305
  35. empathy_llm_toolkit/agent_factory/crews/__init__.py +0 -67
  36. empathy_llm_toolkit/agent_factory/crews/code_review.py +0 -1113
  37. empathy_llm_toolkit/agent_factory/crews/health_check.py +0 -1262
  38. empathy_llm_toolkit/agent_factory/crews/refactoring.py +0 -1128
  39. empathy_llm_toolkit/agent_factory/crews/security_audit.py +0 -1018
  40. empathy_llm_toolkit/agent_factory/decorators.py +0 -287
  41. empathy_llm_toolkit/agent_factory/factory.py +0 -558
  42. empathy_llm_toolkit/agent_factory/framework.py +0 -193
  43. empathy_llm_toolkit/agent_factory/memory_integration.py +0 -328
  44. empathy_llm_toolkit/agent_factory/resilient.py +0 -320
  45. empathy_llm_toolkit/agents_md/__init__.py +0 -22
  46. empathy_llm_toolkit/agents_md/loader.py +0 -218
  47. empathy_llm_toolkit/agents_md/parser.py +0 -271
  48. empathy_llm_toolkit/agents_md/registry.py +0 -307
  49. empathy_llm_toolkit/claude_memory.py +0 -466
  50. empathy_llm_toolkit/cli/__init__.py +0 -8
  51. empathy_llm_toolkit/cli/sync_claude.py +0 -487
  52. empathy_llm_toolkit/code_health.py +0 -1313
  53. empathy_llm_toolkit/commands/__init__.py +0 -51
  54. empathy_llm_toolkit/commands/context.py +0 -375
  55. empathy_llm_toolkit/commands/loader.py +0 -301
  56. empathy_llm_toolkit/commands/models.py +0 -231
  57. empathy_llm_toolkit/commands/parser.py +0 -371
  58. empathy_llm_toolkit/commands/registry.py +0 -429
  59. empathy_llm_toolkit/config/__init__.py +0 -29
  60. empathy_llm_toolkit/config/unified.py +0 -291
  61. empathy_llm_toolkit/context/__init__.py +0 -22
  62. empathy_llm_toolkit/context/compaction.py +0 -455
  63. empathy_llm_toolkit/context/manager.py +0 -434
  64. empathy_llm_toolkit/contextual_patterns.py +0 -361
  65. empathy_llm_toolkit/core.py +0 -907
  66. empathy_llm_toolkit/git_pattern_extractor.py +0 -435
  67. empathy_llm_toolkit/hooks/__init__.py +0 -24
  68. empathy_llm_toolkit/hooks/config.py +0 -306
  69. empathy_llm_toolkit/hooks/executor.py +0 -289
  70. empathy_llm_toolkit/hooks/registry.py +0 -302
  71. empathy_llm_toolkit/hooks/scripts/__init__.py +0 -39
  72. empathy_llm_toolkit/hooks/scripts/evaluate_session.py +0 -201
  73. empathy_llm_toolkit/hooks/scripts/first_time_init.py +0 -285
  74. empathy_llm_toolkit/hooks/scripts/pre_compact.py +0 -207
  75. empathy_llm_toolkit/hooks/scripts/session_end.py +0 -183
  76. empathy_llm_toolkit/hooks/scripts/session_start.py +0 -163
  77. empathy_llm_toolkit/hooks/scripts/suggest_compact.py +0 -225
  78. empathy_llm_toolkit/learning/__init__.py +0 -30
  79. empathy_llm_toolkit/learning/evaluator.py +0 -438
  80. empathy_llm_toolkit/learning/extractor.py +0 -514
  81. empathy_llm_toolkit/learning/storage.py +0 -560
  82. empathy_llm_toolkit/levels.py +0 -227
  83. empathy_llm_toolkit/pattern_confidence.py +0 -414
  84. empathy_llm_toolkit/pattern_resolver.py +0 -272
  85. empathy_llm_toolkit/pattern_summary.py +0 -350
  86. empathy_llm_toolkit/providers.py +0 -967
  87. empathy_llm_toolkit/routing/__init__.py +0 -32
  88. empathy_llm_toolkit/routing/model_router.py +0 -362
  89. empathy_llm_toolkit/security/IMPLEMENTATION_SUMMARY.md +0 -413
  90. empathy_llm_toolkit/security/PHASE2_COMPLETE.md +0 -384
  91. empathy_llm_toolkit/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +0 -271
  92. empathy_llm_toolkit/security/QUICK_REFERENCE.md +0 -316
  93. empathy_llm_toolkit/security/README.md +0 -262
  94. empathy_llm_toolkit/security/__init__.py +0 -62
  95. empathy_llm_toolkit/security/audit_logger.py +0 -929
  96. empathy_llm_toolkit/security/audit_logger_example.py +0 -152
  97. empathy_llm_toolkit/security/pii_scrubber.py +0 -640
  98. empathy_llm_toolkit/security/secrets_detector.py +0 -678
  99. empathy_llm_toolkit/security/secrets_detector_example.py +0 -304
  100. empathy_llm_toolkit/security/secure_memdocs.py +0 -1192
  101. empathy_llm_toolkit/security/secure_memdocs_example.py +0 -278
  102. empathy_llm_toolkit/session_status.py +0 -745
  103. empathy_llm_toolkit/state.py +0 -246
  104. empathy_llm_toolkit/utils/__init__.py +0 -5
  105. empathy_llm_toolkit/utils/tokens.py +0 -349
  106. empathy_os/adaptive/__init__.py +0 -13
  107. empathy_os/adaptive/task_complexity.py +0 -127
  108. empathy_os/agent_monitoring.py +0 -414
  109. empathy_os/cache/__init__.py +0 -117
  110. empathy_os/cache/base.py +0 -166
  111. empathy_os/cache/dependency_manager.py +0 -256
  112. empathy_os/cache/hash_only.py +0 -251
  113. empathy_os/cache/hybrid.py +0 -457
  114. empathy_os/cache/storage.py +0 -285
  115. empathy_os/cache_monitor.py +0 -356
  116. empathy_os/cache_stats.py +0 -298
  117. empathy_os/cli/__init__.py +0 -152
  118. empathy_os/cli/__main__.py +0 -12
  119. empathy_os/cli/commands/__init__.py +0 -1
  120. empathy_os/cli/commands/batch.py +0 -264
  121. empathy_os/cli/commands/cache.py +0 -248
  122. empathy_os/cli/commands/help.py +0 -331
  123. empathy_os/cli/commands/info.py +0 -140
  124. empathy_os/cli/commands/inspect.py +0 -436
  125. empathy_os/cli/commands/inspection.py +0 -57
  126. empathy_os/cli/commands/memory.py +0 -48
  127. empathy_os/cli/commands/metrics.py +0 -92
  128. empathy_os/cli/commands/orchestrate.py +0 -184
  129. empathy_os/cli/commands/patterns.py +0 -207
  130. empathy_os/cli/commands/profiling.py +0 -202
  131. empathy_os/cli/commands/provider.py +0 -98
  132. empathy_os/cli/commands/routing.py +0 -285
  133. empathy_os/cli/commands/setup.py +0 -96
  134. empathy_os/cli/commands/status.py +0 -235
  135. empathy_os/cli/commands/sync.py +0 -166
  136. empathy_os/cli/commands/tier.py +0 -121
  137. empathy_os/cli/commands/utilities.py +0 -114
  138. empathy_os/cli/commands/workflow.py +0 -579
  139. empathy_os/cli/core.py +0 -32
  140. empathy_os/cli/parsers/__init__.py +0 -68
  141. empathy_os/cli/parsers/batch.py +0 -118
  142. empathy_os/cli/parsers/cache.py +0 -65
  143. empathy_os/cli/parsers/help.py +0 -41
  144. empathy_os/cli/parsers/info.py +0 -26
  145. empathy_os/cli/parsers/inspect.py +0 -66
  146. empathy_os/cli/parsers/metrics.py +0 -42
  147. empathy_os/cli/parsers/orchestrate.py +0 -61
  148. empathy_os/cli/parsers/patterns.py +0 -54
  149. empathy_os/cli/parsers/provider.py +0 -40
  150. empathy_os/cli/parsers/routing.py +0 -110
  151. empathy_os/cli/parsers/setup.py +0 -42
  152. empathy_os/cli/parsers/status.py +0 -47
  153. empathy_os/cli/parsers/sync.py +0 -31
  154. empathy_os/cli/parsers/tier.py +0 -33
  155. empathy_os/cli/parsers/workflow.py +0 -77
  156. empathy_os/cli/utils/__init__.py +0 -1
  157. empathy_os/cli/utils/data.py +0 -242
  158. empathy_os/cli/utils/helpers.py +0 -68
  159. empathy_os/cli_legacy.py +0 -3957
  160. empathy_os/cli_minimal.py +0 -1159
  161. empathy_os/cli_router.py +0 -437
  162. empathy_os/cli_unified.py +0 -814
  163. empathy_os/config/__init__.py +0 -66
  164. empathy_os/config/xml_config.py +0 -286
  165. empathy_os/config.py +0 -545
  166. empathy_os/coordination.py +0 -870
  167. empathy_os/core.py +0 -1511
  168. empathy_os/core_modules/__init__.py +0 -15
  169. empathy_os/cost_tracker.py +0 -626
  170. empathy_os/dashboard/__init__.py +0 -41
  171. empathy_os/dashboard/app.py +0 -512
  172. empathy_os/dashboard/simple_server.py +0 -435
  173. empathy_os/dashboard/standalone_server.py +0 -547
  174. empathy_os/discovery.py +0 -306
  175. empathy_os/emergence.py +0 -306
  176. empathy_os/exceptions.py +0 -123
  177. empathy_os/feedback_loops.py +0 -373
  178. empathy_os/hot_reload/README.md +0 -473
  179. empathy_os/hot_reload/__init__.py +0 -62
  180. empathy_os/hot_reload/config.py +0 -83
  181. empathy_os/hot_reload/integration.py +0 -229
  182. empathy_os/hot_reload/reloader.py +0 -298
  183. empathy_os/hot_reload/watcher.py +0 -183
  184. empathy_os/hot_reload/websocket.py +0 -177
  185. empathy_os/levels.py +0 -577
  186. empathy_os/leverage_points.py +0 -441
  187. empathy_os/logging_config.py +0 -261
  188. empathy_os/mcp/__init__.py +0 -10
  189. empathy_os/mcp/server.py +0 -506
  190. empathy_os/memory/__init__.py +0 -237
  191. empathy_os/memory/claude_memory.py +0 -469
  192. empathy_os/memory/config.py +0 -224
  193. empathy_os/memory/control_panel.py +0 -1290
  194. empathy_os/memory/control_panel_support.py +0 -145
  195. empathy_os/memory/cross_session.py +0 -845
  196. empathy_os/memory/edges.py +0 -179
  197. empathy_os/memory/encryption.py +0 -159
  198. empathy_os/memory/file_session.py +0 -770
  199. empathy_os/memory/graph.py +0 -570
  200. empathy_os/memory/long_term.py +0 -913
  201. empathy_os/memory/long_term_types.py +0 -99
  202. empathy_os/memory/mixins/__init__.py +0 -25
  203. empathy_os/memory/mixins/backend_init_mixin.py +0 -249
  204. empathy_os/memory/mixins/capabilities_mixin.py +0 -208
  205. empathy_os/memory/mixins/handoff_mixin.py +0 -208
  206. empathy_os/memory/mixins/lifecycle_mixin.py +0 -49
  207. empathy_os/memory/mixins/long_term_mixin.py +0 -352
  208. empathy_os/memory/mixins/promotion_mixin.py +0 -109
  209. empathy_os/memory/mixins/short_term_mixin.py +0 -182
  210. empathy_os/memory/nodes.py +0 -179
  211. empathy_os/memory/redis_bootstrap.py +0 -540
  212. empathy_os/memory/security/__init__.py +0 -31
  213. empathy_os/memory/security/audit_logger.py +0 -932
  214. empathy_os/memory/security/pii_scrubber.py +0 -640
  215. empathy_os/memory/security/secrets_detector.py +0 -678
  216. empathy_os/memory/short_term.py +0 -2192
  217. empathy_os/memory/simple_storage.py +0 -302
  218. empathy_os/memory/storage/__init__.py +0 -15
  219. empathy_os/memory/storage_backend.py +0 -167
  220. empathy_os/memory/summary_index.py +0 -583
  221. empathy_os/memory/types.py +0 -446
  222. empathy_os/memory/unified.py +0 -182
  223. empathy_os/meta_workflows/__init__.py +0 -74
  224. empathy_os/meta_workflows/agent_creator.py +0 -248
  225. empathy_os/meta_workflows/builtin_templates.py +0 -567
  226. empathy_os/meta_workflows/cli_commands/__init__.py +0 -56
  227. empathy_os/meta_workflows/cli_commands/agent_commands.py +0 -321
  228. empathy_os/meta_workflows/cli_commands/analytics_commands.py +0 -442
  229. empathy_os/meta_workflows/cli_commands/config_commands.py +0 -232
  230. empathy_os/meta_workflows/cli_commands/memory_commands.py +0 -182
  231. empathy_os/meta_workflows/cli_commands/template_commands.py +0 -354
  232. empathy_os/meta_workflows/cli_commands/workflow_commands.py +0 -382
  233. empathy_os/meta_workflows/cli_meta_workflows.py +0 -59
  234. empathy_os/meta_workflows/form_engine.py +0 -292
  235. empathy_os/meta_workflows/intent_detector.py +0 -409
  236. empathy_os/meta_workflows/models.py +0 -569
  237. empathy_os/meta_workflows/pattern_learner.py +0 -738
  238. empathy_os/meta_workflows/plan_generator.py +0 -384
  239. empathy_os/meta_workflows/session_context.py +0 -397
  240. empathy_os/meta_workflows/template_registry.py +0 -229
  241. empathy_os/meta_workflows/workflow.py +0 -984
  242. empathy_os/metrics/__init__.py +0 -12
  243. empathy_os/metrics/collector.py +0 -31
  244. empathy_os/metrics/prompt_metrics.py +0 -194
  245. empathy_os/models/__init__.py +0 -172
  246. empathy_os/models/__main__.py +0 -13
  247. empathy_os/models/adaptive_routing.py +0 -437
  248. empathy_os/models/auth_cli.py +0 -444
  249. empathy_os/models/auth_strategy.py +0 -450
  250. empathy_os/models/cli.py +0 -655
  251. empathy_os/models/empathy_executor.py +0 -354
  252. empathy_os/models/executor.py +0 -257
  253. empathy_os/models/fallback.py +0 -762
  254. empathy_os/models/provider_config.py +0 -282
  255. empathy_os/models/registry.py +0 -472
  256. empathy_os/models/tasks.py +0 -359
  257. empathy_os/models/telemetry/__init__.py +0 -71
  258. empathy_os/models/telemetry/analytics.py +0 -594
  259. empathy_os/models/telemetry/backend.py +0 -196
  260. empathy_os/models/telemetry/data_models.py +0 -431
  261. empathy_os/models/telemetry/storage.py +0 -489
  262. empathy_os/models/token_estimator.py +0 -420
  263. empathy_os/models/validation.py +0 -280
  264. empathy_os/monitoring/__init__.py +0 -52
  265. empathy_os/monitoring/alerts.py +0 -946
  266. empathy_os/monitoring/alerts_cli.py +0 -448
  267. empathy_os/monitoring/multi_backend.py +0 -271
  268. empathy_os/monitoring/otel_backend.py +0 -362
  269. empathy_os/optimization/__init__.py +0 -19
  270. empathy_os/optimization/context_optimizer.py +0 -272
  271. empathy_os/orchestration/__init__.py +0 -67
  272. empathy_os/orchestration/agent_templates.py +0 -707
  273. empathy_os/orchestration/config_store.py +0 -499
  274. empathy_os/orchestration/execution_strategies.py +0 -2111
  275. empathy_os/orchestration/meta_orchestrator.py +0 -1168
  276. empathy_os/orchestration/pattern_learner.py +0 -696
  277. empathy_os/orchestration/real_tools.py +0 -931
  278. empathy_os/pattern_cache.py +0 -187
  279. empathy_os/pattern_library.py +0 -542
  280. empathy_os/patterns/debugging/all_patterns.json +0 -81
  281. empathy_os/patterns/debugging/workflow_20260107_1770825e.json +0 -77
  282. empathy_os/patterns/refactoring_memory.json +0 -89
  283. empathy_os/persistence.py +0 -564
  284. empathy_os/platform_utils.py +0 -265
  285. empathy_os/plugins/__init__.py +0 -28
  286. empathy_os/plugins/base.py +0 -361
  287. empathy_os/plugins/registry.py +0 -268
  288. empathy_os/project_index/__init__.py +0 -32
  289. empathy_os/project_index/cli.py +0 -335
  290. empathy_os/project_index/index.py +0 -667
  291. empathy_os/project_index/models.py +0 -504
  292. empathy_os/project_index/reports.py +0 -474
  293. empathy_os/project_index/scanner.py +0 -777
  294. empathy_os/project_index/scanner_parallel.py +0 -291
  295. empathy_os/prompts/__init__.py +0 -61
  296. empathy_os/prompts/config.py +0 -77
  297. empathy_os/prompts/context.py +0 -177
  298. empathy_os/prompts/parser.py +0 -285
  299. empathy_os/prompts/registry.py +0 -313
  300. empathy_os/prompts/templates.py +0 -208
  301. empathy_os/redis_config.py +0 -302
  302. empathy_os/redis_memory.py +0 -799
  303. empathy_os/resilience/__init__.py +0 -56
  304. empathy_os/resilience/circuit_breaker.py +0 -256
  305. empathy_os/resilience/fallback.py +0 -179
  306. empathy_os/resilience/health.py +0 -300
  307. empathy_os/resilience/retry.py +0 -209
  308. empathy_os/resilience/timeout.py +0 -135
  309. empathy_os/routing/__init__.py +0 -43
  310. empathy_os/routing/chain_executor.py +0 -433
  311. empathy_os/routing/classifier.py +0 -217
  312. empathy_os/routing/smart_router.py +0 -234
  313. empathy_os/routing/workflow_registry.py +0 -343
  314. empathy_os/scaffolding/README.md +0 -589
  315. empathy_os/scaffolding/__init__.py +0 -35
  316. empathy_os/scaffolding/__main__.py +0 -14
  317. empathy_os/scaffolding/cli.py +0 -240
  318. empathy_os/socratic/__init__.py +0 -256
  319. empathy_os/socratic/ab_testing.py +0 -958
  320. empathy_os/socratic/blueprint.py +0 -533
  321. empathy_os/socratic/cli.py +0 -703
  322. empathy_os/socratic/collaboration.py +0 -1114
  323. empathy_os/socratic/domain_templates.py +0 -924
  324. empathy_os/socratic/embeddings.py +0 -738
  325. empathy_os/socratic/engine.py +0 -794
  326. empathy_os/socratic/explainer.py +0 -682
  327. empathy_os/socratic/feedback.py +0 -772
  328. empathy_os/socratic/forms.py +0 -629
  329. empathy_os/socratic/generator.py +0 -732
  330. empathy_os/socratic/llm_analyzer.py +0 -637
  331. empathy_os/socratic/mcp_server.py +0 -702
  332. empathy_os/socratic/session.py +0 -312
  333. empathy_os/socratic/storage.py +0 -667
  334. empathy_os/socratic/success.py +0 -730
  335. empathy_os/socratic/visual_editor.py +0 -860
  336. empathy_os/socratic/web_ui.py +0 -958
  337. empathy_os/telemetry/__init__.py +0 -39
  338. empathy_os/telemetry/agent_coordination.py +0 -475
  339. empathy_os/telemetry/agent_tracking.py +0 -367
  340. empathy_os/telemetry/approval_gates.py +0 -545
  341. empathy_os/telemetry/cli.py +0 -1231
  342. empathy_os/telemetry/commands/__init__.py +0 -14
  343. empathy_os/telemetry/commands/dashboard_commands.py +0 -696
  344. empathy_os/telemetry/event_streaming.py +0 -409
  345. empathy_os/telemetry/feedback_loop.py +0 -567
  346. empathy_os/telemetry/usage_tracker.py +0 -591
  347. empathy_os/templates.py +0 -754
  348. empathy_os/test_generator/__init__.py +0 -38
  349. empathy_os/test_generator/__main__.py +0 -14
  350. empathy_os/test_generator/cli.py +0 -234
  351. empathy_os/test_generator/generator.py +0 -355
  352. empathy_os/test_generator/risk_analyzer.py +0 -216
  353. empathy_os/tier_recommender.py +0 -384
  354. empathy_os/tools.py +0 -183
  355. empathy_os/trust/__init__.py +0 -28
  356. empathy_os/trust/circuit_breaker.py +0 -579
  357. empathy_os/trust_building.py +0 -527
  358. empathy_os/validation/__init__.py +0 -19
  359. empathy_os/validation/xml_validator.py +0 -281
  360. empathy_os/vscode_bridge.py +0 -173
  361. empathy_os/workflow_commands.py +0 -780
  362. empathy_os/workflow_patterns/__init__.py +0 -33
  363. empathy_os/workflow_patterns/behavior.py +0 -249
  364. empathy_os/workflow_patterns/core.py +0 -76
  365. empathy_os/workflow_patterns/output.py +0 -99
  366. empathy_os/workflow_patterns/registry.py +0 -255
  367. empathy_os/workflow_patterns/structural.py +0 -288
  368. empathy_os/workflows/__init__.py +0 -539
  369. empathy_os/workflows/autonomous_test_gen.py +0 -1268
  370. empathy_os/workflows/base.py +0 -2667
  371. empathy_os/workflows/batch_processing.py +0 -342
  372. empathy_os/workflows/bug_predict.py +0 -1084
  373. empathy_os/workflows/builder.py +0 -273
  374. empathy_os/workflows/caching.py +0 -253
  375. empathy_os/workflows/code_review.py +0 -1048
  376. empathy_os/workflows/code_review_adapters.py +0 -312
  377. empathy_os/workflows/code_review_pipeline.py +0 -722
  378. empathy_os/workflows/config.py +0 -645
  379. empathy_os/workflows/dependency_check.py +0 -644
  380. empathy_os/workflows/document_gen/__init__.py +0 -25
  381. empathy_os/workflows/document_gen/config.py +0 -30
  382. empathy_os/workflows/document_gen/report_formatter.py +0 -162
  383. empathy_os/workflows/document_gen/workflow.py +0 -1426
  384. empathy_os/workflows/document_manager.py +0 -216
  385. empathy_os/workflows/document_manager_README.md +0 -134
  386. empathy_os/workflows/documentation_orchestrator.py +0 -1205
  387. empathy_os/workflows/history.py +0 -510
  388. empathy_os/workflows/keyboard_shortcuts/__init__.py +0 -39
  389. empathy_os/workflows/keyboard_shortcuts/generators.py +0 -391
  390. empathy_os/workflows/keyboard_shortcuts/parsers.py +0 -416
  391. empathy_os/workflows/keyboard_shortcuts/prompts.py +0 -295
  392. empathy_os/workflows/keyboard_shortcuts/schema.py +0 -193
  393. empathy_os/workflows/keyboard_shortcuts/workflow.py +0 -509
  394. empathy_os/workflows/llm_base.py +0 -363
  395. empathy_os/workflows/manage_docs.py +0 -87
  396. empathy_os/workflows/manage_docs_README.md +0 -134
  397. empathy_os/workflows/manage_documentation.py +0 -821
  398. empathy_os/workflows/new_sample_workflow1.py +0 -149
  399. empathy_os/workflows/new_sample_workflow1_README.md +0 -150
  400. empathy_os/workflows/orchestrated_health_check.py +0 -849
  401. empathy_os/workflows/orchestrated_release_prep.py +0 -600
  402. empathy_os/workflows/output.py +0 -413
  403. empathy_os/workflows/perf_audit.py +0 -863
  404. empathy_os/workflows/pr_review.py +0 -762
  405. empathy_os/workflows/progress.py +0 -785
  406. empathy_os/workflows/progress_server.py +0 -322
  407. empathy_os/workflows/progressive/README 2.md +0 -454
  408. empathy_os/workflows/progressive/README.md +0 -454
  409. empathy_os/workflows/progressive/__init__.py +0 -82
  410. empathy_os/workflows/progressive/cli.py +0 -219
  411. empathy_os/workflows/progressive/core.py +0 -488
  412. empathy_os/workflows/progressive/orchestrator.py +0 -723
  413. empathy_os/workflows/progressive/reports.py +0 -520
  414. empathy_os/workflows/progressive/telemetry.py +0 -274
  415. empathy_os/workflows/progressive/test_gen.py +0 -495
  416. empathy_os/workflows/progressive/workflow.py +0 -589
  417. empathy_os/workflows/refactor_plan.py +0 -694
  418. empathy_os/workflows/release_prep.py +0 -895
  419. empathy_os/workflows/release_prep_crew.py +0 -969
  420. empathy_os/workflows/research_synthesis.py +0 -404
  421. empathy_os/workflows/routing.py +0 -168
  422. empathy_os/workflows/secure_release.py +0 -593
  423. empathy_os/workflows/security_adapters.py +0 -297
  424. empathy_os/workflows/security_audit.py +0 -1329
  425. empathy_os/workflows/security_audit_phase3.py +0 -355
  426. empathy_os/workflows/seo_optimization.py +0 -633
  427. empathy_os/workflows/step_config.py +0 -234
  428. empathy_os/workflows/telemetry_mixin.py +0 -269
  429. empathy_os/workflows/test5.py +0 -125
  430. empathy_os/workflows/test5_README.md +0 -158
  431. empathy_os/workflows/test_coverage_boost_crew.py +0 -849
  432. empathy_os/workflows/test_gen/__init__.py +0 -52
  433. empathy_os/workflows/test_gen/ast_analyzer.py +0 -249
  434. empathy_os/workflows/test_gen/config.py +0 -88
  435. empathy_os/workflows/test_gen/data_models.py +0 -38
  436. empathy_os/workflows/test_gen/report_formatter.py +0 -289
  437. empathy_os/workflows/test_gen/test_templates.py +0 -381
  438. empathy_os/workflows/test_gen/workflow.py +0 -655
  439. empathy_os/workflows/test_gen.py +0 -54
  440. empathy_os/workflows/test_gen_behavioral.py +0 -477
  441. empathy_os/workflows/test_gen_parallel.py +0 -341
  442. empathy_os/workflows/test_lifecycle.py +0 -526
  443. empathy_os/workflows/test_maintenance.py +0 -627
  444. empathy_os/workflows/test_maintenance_cli.py +0 -590
  445. empathy_os/workflows/test_maintenance_crew.py +0 -840
  446. empathy_os/workflows/test_runner.py +0 -622
  447. empathy_os/workflows/tier_tracking.py +0 -531
  448. empathy_os/workflows/xml_enhanced_crew.py +0 -285
  449. empathy_software_plugin/SOFTWARE_PLUGIN_README.md +0 -57
  450. empathy_software_plugin/cli/__init__.py +0 -120
  451. empathy_software_plugin/cli/inspect.py +0 -362
  452. empathy_software_plugin/cli.py +0 -574
  453. empathy_software_plugin/plugin.py +0 -188
  454. workflow_scaffolding/__init__.py +0 -11
  455. workflow_scaffolding/__main__.py +0 -12
  456. workflow_scaffolding/cli.py +0 -206
  457. workflow_scaffolding/generator.py +0 -265
  458. {empathy_framework-5.3.0.dist-info → empathy_framework-5.4.0.dist-info}/WHEEL +0 -0
@@ -1,1268 +0,0 @@
1
- """Autonomous Test Generation with Dashboard Integration - Enhanced Edition.
2
-
3
- Generates behavioral tests with real-time monitoring via Agent Coordination Dashboard.
4
-
5
- ENHANCEMENTS (Phase 1):
6
- - Extended thinking mode for better test planning
7
- - Prompt caching for 90% cost reduction
8
- - Full source code (no truncation)
9
- - Workflow-specific prompts with mocking templates
10
- - Few-shot learning with examples
11
-
12
- ENHANCEMENTS (Phase 2 - Multi-Turn Refinement):
13
- - Iterative test generation with validation loop
14
- - Automatic failure detection and fixing
15
- - Conversation history for context preservation
16
-
17
- ENHANCEMENTS (Phase 3 - Coverage-Guided Generation):
18
- - Coverage analysis integration
19
- - Iterative coverage improvement targeting uncovered lines
20
- - Systematic path to 80%+ coverage
21
-
22
- Copyright 2026 Smart-AI-Memory
23
- Licensed under Apache 2.0
24
- """
25
-
26
- import json
27
- import logging
28
- import re
29
- import subprocess
30
- import sys
31
- from dataclasses import dataclass
32
- from pathlib import Path
33
- from typing import Any
34
-
35
- from empathy_os.memory.short_term import RedisShortTermMemory
36
- from empathy_os.telemetry.agent_tracking import HeartbeatCoordinator
37
- from empathy_os.telemetry.event_streaming import EventStreamer
38
- from empathy_os.telemetry.feedback_loop import FeedbackLoop
39
-
40
- logger = logging.getLogger(__name__)
41
-
42
-
43
- @dataclass
44
- class ValidationResult:
45
- """Result of pytest validation."""
46
- passed: bool
47
- failures: str
48
- error_count: int
49
- output: str
50
-
51
-
52
- @dataclass
53
- class CoverageResult:
54
- """Result of coverage analysis."""
55
- coverage: float
56
- missing_lines: list[int]
57
- total_statements: int
58
- covered_statements: int
59
-
60
-
61
- class AutonomousTestGenerator:
62
- """Generate tests autonomously with dashboard monitoring and Anthropic best practices."""
63
-
64
- def __init__(
65
- self,
66
- agent_id: str,
67
- batch_num: int,
68
- modules: list[dict[str, Any]],
69
- enable_refinement: bool = True,
70
- max_refinement_iterations: int = 3,
71
- enable_coverage_guided: bool = False,
72
- target_coverage: float = 0.80
73
- ):
74
- """Initialize generator.
75
-
76
- Args:
77
- agent_id: Unique agent identifier
78
- batch_num: Batch number (1-18)
79
- modules: List of modules to generate tests for
80
- enable_refinement: Enable Phase 2 multi-turn refinement (default: True)
81
- max_refinement_iterations: Max iterations for refinement (default: 3)
82
- enable_coverage_guided: Enable Phase 3 coverage-guided generation (default: False)
83
- target_coverage: Target coverage percentage (default: 0.80 = 80%)
84
- """
85
- self.agent_id = agent_id
86
- self.batch_num = batch_num
87
- self.modules = modules
88
-
89
- # Phase 2 & 3 configuration
90
- self.enable_refinement = enable_refinement
91
- self.max_refinement_iterations = max_refinement_iterations
92
- self.enable_coverage_guided = enable_coverage_guided
93
- self.target_coverage = target_coverage
94
-
95
- # Initialize memory backend for dashboard integration
96
- try:
97
- self.memory = RedisShortTermMemory()
98
- self.coordinator = HeartbeatCoordinator(memory=self.memory, enable_streaming=True)
99
- self.event_streamer = EventStreamer(memory=self.memory)
100
- self.feedback_loop = FeedbackLoop(memory=self.memory)
101
- except Exception as e:
102
- logger.warning(f"Failed to initialize memory backend: {e}")
103
- self.coordinator = HeartbeatCoordinator()
104
- self.event_streamer = None
105
- self.feedback_loop = None
106
-
107
- self.output_dir = Path(f"tests/behavioral/generated/batch{batch_num}")
108
- self.output_dir.mkdir(parents=True, exist_ok=True)
109
-
110
- logger.info(f"Generator initialized: refinement={enable_refinement}, coverage_guided={enable_coverage_guided}")
111
-
112
- def generate_all(self) -> dict[str, Any]:
113
- """Generate tests for all modules with progress tracking.
114
-
115
- Returns:
116
- Summary of generation results
117
- """
118
- # Start tracking
119
- self.coordinator.start_heartbeat(
120
- agent_id=self.agent_id,
121
- metadata={
122
- "batch": self.batch_num,
123
- "total_modules": len(self.modules),
124
- "workflow": "autonomous_test_generation",
125
- }
126
- )
127
-
128
- try:
129
- results = {
130
- "batch": self.batch_num,
131
- "total_modules": len(self.modules),
132
- "completed": 0,
133
- "failed": 0,
134
- "tests_generated": 0,
135
- "files_created": [],
136
- }
137
-
138
- for i, module in enumerate(self.modules):
139
- progress = (i + 1) / len(self.modules)
140
- module_name = module["file"].replace("src/empathy_os/", "")
141
-
142
- # Update dashboard
143
- self.coordinator.beat(
144
- status="running",
145
- progress=progress,
146
- current_task=f"Generating tests for {module_name}"
147
- )
148
-
149
- try:
150
- # Generate tests for this module
151
- test_file = self._generate_module_tests(module)
152
- if test_file:
153
- results["completed"] += 1
154
- results["files_created"].append(str(test_file))
155
- logger.info(f"✅ Generated tests for {module_name}")
156
-
157
- # Send event to dashboard
158
- if self.event_streamer:
159
- self.event_streamer.publish_event(
160
- event_type="test_file_created",
161
- data={
162
- "agent_id": self.agent_id,
163
- "module": module_name,
164
- "test_file": str(test_file),
165
- "batch": self.batch_num
166
- }
167
- )
168
-
169
- # Record quality feedback
170
- if self.feedback_loop:
171
- self.feedback_loop.record_feedback(
172
- workflow_name="test-generation",
173
- stage_name="generation",
174
- tier="capable",
175
- quality_score=1.0, # Success
176
- metadata={"module": module_name, "status": "success", "batch": self.batch_num}
177
- )
178
- else:
179
- results["failed"] += 1
180
- logger.warning(f"⚠️ Skipped {module_name} (validation failed)")
181
-
182
- # Record failure feedback
183
- if self.feedback_loop:
184
- self.feedback_loop.record_feedback(
185
- workflow_name="test-generation",
186
- stage_name="validation",
187
- tier="capable",
188
- quality_score=0.0, # Failure
189
- metadata={"module": module_name, "status": "validation_failed", "batch": self.batch_num}
190
- )
191
-
192
- except Exception as e:
193
- results["failed"] += 1
194
- logger.error(f"❌ Error generating tests for {module_name}: {e}")
195
-
196
- # Send error event
197
- if self.event_streamer:
198
- self.event_streamer.publish_event(
199
- event_type="test_generation_error",
200
- data={
201
- "agent_id": self.agent_id,
202
- "module": module_name,
203
- "error": str(e),
204
- "batch": self.batch_num
205
- }
206
- )
207
-
208
- # Count total tests
209
- results["tests_generated"] = self._count_tests()
210
-
211
- # Final update
212
- self.coordinator.beat(
213
- status="completed",
214
- progress=1.0,
215
- current_task=f"Completed: {results['completed']}/{results['total_modules']} modules"
216
- )
217
-
218
- return results
219
-
220
- except Exception as e:
221
- # Error tracking
222
- self.coordinator.beat(
223
- status="failed",
224
- progress=0.0,
225
- current_task=f"Failed: {str(e)}"
226
- )
227
- raise
228
-
229
- finally:
230
- # Stop heartbeat
231
- self.coordinator.stop_heartbeat(
232
- final_status="completed" if results["completed"] > 0 else "failed"
233
- )
234
-
235
- def _generate_module_tests(self, module: dict[str, Any]) -> Path | None:
236
- """Generate tests for a single module using LLM agent.
237
-
238
- Args:
239
- module: Module info dict with 'file', 'total', 'missing', etc.
240
-
241
- Returns:
242
- Path to generated test file, or None if skipped
243
- """
244
- source_file = Path(module["file"])
245
- module_name = source_file.stem
246
-
247
- # Skip if module doesn't exist
248
- if not source_file.exists():
249
- logger.warning(f"Source file not found: {source_file}")
250
- return None
251
-
252
- # Read source to understand what needs testing
253
- try:
254
- source_code = source_file.read_text()
255
- except Exception as e:
256
- logger.error(f"Cannot read {source_file}: {e}")
257
- return None
258
-
259
- # Generate test file path
260
- test_file = self.output_dir / f"test_{module_name}_behavioral.py"
261
-
262
- # Extract module path for imports
263
- module_path = str(source_file).replace("src/", "").replace(".py", "").replace("/", ".")
264
-
265
- # Generate tests using LLM agent with Anthropic best practices
266
- # Phase 1: Basic generation
267
- # Phase 2: Multi-turn refinement (if enabled)
268
- # Phase 3: Coverage-guided improvement (if enabled)
269
-
270
- if self.enable_refinement:
271
- logger.info(f"🔄 Using Phase 2: Multi-turn refinement for {module_name}")
272
- test_content = self._generate_with_refinement(module_name, module_path, source_file, source_code, test_file)
273
- else:
274
- logger.info(f"📝 Using Phase 1: Basic generation for {module_name}")
275
- test_content = self._generate_with_llm(module_name, module_path, source_file, source_code)
276
-
277
- if not test_content:
278
- logger.warning(f"LLM generation failed for {module_name}")
279
- return None
280
-
281
- logger.info(f"LLM generated {len(test_content)} bytes for {module_name}")
282
-
283
- # Phase 3: Coverage-guided improvement (if enabled)
284
- if self.enable_coverage_guided:
285
- logger.info(f"📊 Applying Phase 3: Coverage-guided improvement for {module_name}")
286
- improved_content = self._generate_with_coverage_target(
287
- module_name, module_path, source_file, source_code, test_file, test_content
288
- )
289
- if improved_content:
290
- test_content = improved_content
291
- logger.info(f"✅ Coverage-guided improvement complete for {module_name}")
292
- else:
293
- logger.warning(f"⚠️ Coverage-guided improvement failed, using previous version for {module_name}")
294
-
295
- # Write final test file
296
- test_file.write_text(test_content)
297
- logger.info(f"Wrote test file: {test_file}")
298
-
299
- # Validate it can be imported
300
- if not self._validate_test_file(test_file):
301
- test_file.unlink()
302
- return None
303
-
304
- return test_file
305
-
306
- def _is_workflow_module(self, source_code: str, module_path: str) -> bool:
307
- """Detect if module is a workflow requiring special handling.
308
-
309
- Args:
310
- source_code: Source code content
311
- module_path: Python import path
312
-
313
- Returns:
314
- True if this is a workflow module needing LLM mocking
315
- """
316
- # Check for workflow indicators
317
- indicators = [
318
- r"class\s+\w+Workflow",
319
- r"async\s+def\s+execute",
320
- r"tier_routing",
321
- r"LLMProvider",
322
- r"TelemetryCollector",
323
- r"from\s+anthropic\s+import",
324
- r"messages\.create",
325
- r"client\.messages"
326
- ]
327
-
328
- return any(re.search(pattern, source_code) for pattern in indicators)
329
-
330
- def _get_example_tests(self) -> str:
331
- """Get few-shot examples of excellent tests for prompt learning."""
332
- return """EXAMPLE 1: Testing a utility function with mocking
333
- ```python
334
- import pytest
335
- from unittest.mock import Mock, patch
336
- from mymodule import process_data
337
-
338
- class TestProcessData:
339
- def test_processes_valid_data_successfully(self):
340
- \"\"\"Given valid input data, when processing, then returns expected result.\"\"\"
341
- # Given
342
- input_data = {"key": "value", "count": 42}
343
-
344
- # When
345
- result = process_data(input_data)
346
-
347
- # Then
348
- assert result is not None
349
- assert result["status"] == "success"
350
- assert result["processed"] is True
351
-
352
- def test_handles_invalid_data_with_error(self):
353
- \"\"\"Given invalid input, when processing, then raises ValueError.\"\"\"
354
- # Given
355
- invalid_data = {"missing": "key"}
356
-
357
- # When/Then
358
- with pytest.raises(ValueError, match="Required key 'key' not found"):
359
- process_data(invalid_data)
360
- ```
361
-
362
- EXAMPLE 2: Testing a workflow with LLM mocking
363
- ```python
364
- import pytest
365
- from unittest.mock import Mock, AsyncMock, patch
366
- from mymodule import MyWorkflow
367
-
368
- @pytest.fixture
369
- def mock_llm_client(mocker):
370
- \"\"\"Mock Anthropic LLM client.\"\"\"
371
- mock = mocker.patch('anthropic.Anthropic')
372
- mock_response = Mock()
373
- mock_response.content = [Mock(text="mock LLM response")]
374
- mock_response.usage = Mock(input_tokens=100, output_tokens=50)
375
- mock_response.stop_reason = "end_turn"
376
- mock.return_value.messages.create = AsyncMock(return_value=mock_response)
377
- return mock
378
-
379
- class TestMyWorkflow:
380
- @pytest.mark.asyncio
381
- async def test_executes_successfully_with_mocked_llm(self, mock_llm_client):
382
- \"\"\"Given valid input, when executing workflow, then completes successfully.\"\"\"
383
- # Given
384
- workflow = MyWorkflow()
385
- input_data = {"prompt": "test prompt"}
386
-
387
- # When
388
- result = await workflow.execute(input_data)
389
-
390
- # Then
391
- assert result is not None
392
- assert "response" in result
393
- mock_llm_client.return_value.messages.create.assert_called_once()
394
-
395
- @pytest.mark.asyncio
396
- async def test_handles_api_error_gracefully(self, mock_llm_client):
397
- \"\"\"Given API failure, when executing, then handles error appropriately.\"\"\"
398
- # Given
399
- workflow = MyWorkflow()
400
- mock_llm_client.return_value.messages.create.side_effect = Exception("API Error")
401
-
402
- # When/Then
403
- with pytest.raises(Exception, match="API Error"):
404
- await workflow.execute({"prompt": "test"})
405
- ```
406
- """
407
-
408
- def _get_workflow_specific_prompt(self, module_name: str, module_path: str, source_code: str) -> str:
409
- """Get workflow-specific test generation prompt with comprehensive mocking guidance."""
410
- return f"""Generate comprehensive tests for this WORKFLOW module.
411
-
412
- ⚠️ CRITICAL: This module makes LLM API calls and requires proper mocking.
413
-
414
- MODULE: {module_name}
415
- IMPORT PATH: {module_path}
416
-
417
- SOURCE CODE (COMPLETE - NO TRUNCATION):
418
- ```python
419
- {source_code}
420
- ```
421
-
422
- WORKFLOW TESTING REQUIREMENTS:
423
-
424
- 1. **Mock LLM API calls** - NEVER make real API calls in tests
425
- ```python
426
- @pytest.fixture
427
- def mock_llm_client(mocker):
428
- mock = mocker.patch('anthropic.Anthropic')
429
- mock_response = Mock()
430
- mock_response.content = [Mock(text="mock response")]
431
- mock_response.usage = Mock(input_tokens=100, output_tokens=50)
432
- mock_response.stop_reason = "end_turn"
433
- mock.return_value.messages.create = AsyncMock(return_value=mock_response)
434
- return mock
435
- ```
436
-
437
- 2. **Test tier routing** - Verify correct model selection (cheap/capable/premium)
438
- 3. **Test telemetry** - Mock and verify telemetry recording
439
- 4. **Test cost calculation** - Verify token usage and cost tracking
440
- 5. **Test error handling** - Mock API failures, timeouts, rate limits
441
- 6. **Test caching** - Mock cache hits/misses if applicable
442
-
443
- TARGET COVERAGE: 40-50% (realistic for workflow classes with proper mocking)
444
-
445
- Generate a complete test file with:
446
- - Copyright header: "Generated by enhanced autonomous test generation system."
447
- - Proper imports (from {module_path})
448
- - Mock fixtures for ALL external dependencies (LLM, databases, APIs, file I/O)
449
- - Given/When/Then structure in docstrings
450
- - Both success and failure test cases
451
- - Edge case handling
452
- - Docstrings for all tests describing behavior
453
-
454
- Return ONLY the complete Python test file, no explanations."""
455
-
456
- def _generate_with_llm(self, module_name: str, module_path: str, source_file: Path, source_code: str) -> str | None:
457
- """Generate comprehensive tests using LLM with Anthropic best practices.
458
-
459
- ENHANCEMENTS (Phase 1):
460
- - Extended thinking (20K token budget) for thorough test planning
461
- - Prompt caching for 90% cost reduction
462
- - Full source code (NO TRUNCATION)
463
- - Workflow-specific prompts when detected
464
-
465
- Args:
466
- module_name: Name of module being tested
467
- module_path: Python import path (e.g., empathy_os.config)
468
- source_file: Path to source file
469
- source_code: Source code content (FULL, not truncated)
470
-
471
- Returns:
472
- Test file content with comprehensive tests, or None if generation failed
473
- """
474
- import os
475
-
476
- try:
477
- import anthropic
478
- except ImportError:
479
- logger.error("anthropic package not installed")
480
- return None
481
-
482
- # Get API key
483
- api_key = os.getenv("ANTHROPIC_API_KEY")
484
- if not api_key:
485
- logger.error("ANTHROPIC_API_KEY not set")
486
- return None
487
-
488
- # Detect if this is a workflow module
489
- is_workflow = self._is_workflow_module(source_code, module_path)
490
- logger.info(f"Module {module_name}: workflow={is_workflow}, size={len(source_code)} bytes (FULL)")
491
-
492
- # Build appropriate prompt based on module type
493
- if is_workflow:
494
- generation_prompt = self._get_workflow_specific_prompt(module_name, module_path, source_code)
495
- else:
496
- generation_prompt = f"""Generate comprehensive behavioral tests for this Python module.
497
-
498
- SOURCE FILE: {source_file}
499
- MODULE PATH: {module_path}
500
-
501
- SOURCE CODE (COMPLETE):
502
- ```python
503
- {source_code}
504
- ```
505
-
506
- Generate a complete test file that:
507
- 1. Uses Given/When/Then behavioral test structure
508
- 2. Tests all public functions and classes
509
- 3. Includes edge cases and error handling
510
- 4. Uses proper mocking for external dependencies
511
- 5. Targets 80%+ code coverage for this module
512
- 6. Follows pytest conventions
513
-
514
- Requirements:
515
- - Import from {module_path} (not from src/)
516
- - Use pytest fixtures where appropriate
517
- - Mock external dependencies (APIs, databases, file I/O)
518
- - Test both success and failure paths
519
- - Include docstrings for all tests
520
- - Use descriptive test names
521
- - Start with copyright header:
522
- \"\"\"Behavioral tests for {module_name}.
523
-
524
- Generated by enhanced autonomous test generation system.
525
-
526
- Copyright 2026 Smart-AI-Memory
527
- Licensed under Apache 2.0
528
- \"\"\"
529
-
530
- Return ONLY the complete Python test file content, no explanations."""
531
-
532
- # Build messages with prompt caching (90% cost reduction on retries)
533
- messages = [
534
- {
535
- "role": "user",
536
- "content": [
537
- {
538
- "type": "text",
539
- "text": "You are an expert Python test engineer. Here are examples of excellent tests:",
540
- "cache_control": {"type": "ephemeral"}
541
- },
542
- {
543
- "type": "text",
544
- "text": self._get_example_tests(),
545
- "cache_control": {"type": "ephemeral"}
546
- },
547
- {
548
- "type": "text",
549
- "text": generation_prompt
550
- }
551
- ]
552
- }
553
- ]
554
-
555
- try:
556
- # Call Anthropic API with extended thinking and caching
557
- logger.info(f"Calling LLM with extended thinking for {module_name} (workflow={is_workflow})")
558
- client = anthropic.Anthropic(api_key=api_key)
559
- response = client.messages.create(
560
- model="claude-sonnet-4-5", # capable tier
561
- max_tokens=40000, # Very generous total budget for comprehensive tests
562
- thinking={
563
- "type": "enabled",
564
- "budget_tokens": 20000 # Generous thinking budget for thorough planning
565
- },
566
- messages=messages,
567
- timeout=900.0, # 15 minutes timeout for extended thinking + generation
568
- )
569
-
570
- if not response.content:
571
- logger.warning(f"Empty LLM response for {module_name}")
572
- return None
573
-
574
- # Extract test content (thinking comes first, then text)
575
- test_content = None
576
- for block in response.content:
577
- if block.type == "text":
578
- test_content = block.text.strip()
579
- break
580
-
581
- if not test_content:
582
- logger.warning(f"No text content in LLM response for {module_name}")
583
- return None
584
-
585
- logger.info(f"LLM returned {len(test_content)} bytes for {module_name}")
586
-
587
- if len(test_content) < 100:
588
- logger.warning(f"LLM response too short for {module_name}: {test_content[:200]}")
589
- return None
590
-
591
- # Clean up response (remove markdown fences if present)
592
- if test_content.startswith("```python"):
593
- test_content = test_content[len("```python"):].strip()
594
- if test_content.endswith("```"):
595
- test_content = test_content[:-3].strip()
596
-
597
- # Check for truncation indicators
598
- if response.stop_reason == "max_tokens":
599
- logger.warning(f"⚠️ LLM response truncated for {module_name} (hit max_tokens)")
600
- # Response might be incomplete but let validation catch it
601
-
602
- # Quick syntax pre-check before returning
603
- try:
604
- import ast
605
- ast.parse(test_content)
606
- logger.info(f"✓ Quick syntax check passed for {module_name}")
607
- except SyntaxError as e:
608
- logger.error(f"❌ LLM generated invalid syntax for {module_name}: {e.msg} at line {e.lineno}")
609
- return None
610
-
611
- logger.info(f"Test content cleaned, final size: {len(test_content)} bytes")
612
- return test_content
613
-
614
- except Exception as e:
615
- logger.error(f"LLM generation error for {module_name}: {e}", exc_info=True)
616
- return None
617
-
618
- def _run_pytest_validation(self, test_file: Path) -> ValidationResult:
619
- """Run pytest on generated tests and collect failures.
620
-
621
- Args:
622
- test_file: Path to test file to validate
623
-
624
- Returns:
625
- ValidationResult with test outcomes and failure details
626
- """
627
- try:
628
- result = subprocess.run(
629
- [sys.executable, "-m", "pytest", str(test_file), "-v", "--tb=short"],
630
- capture_output=True,
631
- text=True,
632
- timeout=60,
633
- )
634
-
635
- passed = result.returncode == 0
636
- output = result.stdout + "\n" + result.stderr
637
-
638
- # Count errors
639
- error_count = output.count("FAILED") + output.count("ERROR")
640
-
641
- # Extract failure details
642
- failures = ""
643
- if not passed:
644
- # Extract relevant failure information
645
- lines = output.split("\n")
646
- failure_lines = []
647
- in_failure = False
648
- for line in lines:
649
- if "FAILED" in line or "ERROR" in line:
650
- in_failure = True
651
- if in_failure:
652
- failure_lines.append(line)
653
- if line.startswith("="): # End of failure section
654
- in_failure = False
655
- failures = "\n".join(failure_lines[:100]) # Limit to 100 lines
656
-
657
- logger.info(f"Pytest validation: passed={passed}, errors={error_count}")
658
-
659
- return ValidationResult(
660
- passed=passed,
661
- failures=failures,
662
- error_count=error_count,
663
- output=output
664
- )
665
-
666
- except subprocess.TimeoutExpired:
667
- logger.error(f"Pytest validation timeout for {test_file}")
668
- return ValidationResult(
669
- passed=False,
670
- failures="Validation timeout after 60 seconds",
671
- error_count=1,
672
- output="Timeout"
673
- )
674
- except Exception as e:
675
- logger.error(f"Pytest validation exception: {e}")
676
- return ValidationResult(
677
- passed=False,
678
- failures=f"Validation exception: {e}",
679
- error_count=1,
680
- output=str(e)
681
- )
682
-
683
- def _call_llm_with_history(
684
- self,
685
- conversation_history: list[dict[str, Any]],
686
- api_key: str
687
- ) -> str | None:
688
- """Call LLM with conversation history for refinement.
689
-
690
- Args:
691
- conversation_history: List of messages (role + content)
692
- api_key: Anthropic API key
693
-
694
- Returns:
695
- Refined test content or None if failed
696
- """
697
- try:
698
- import anthropic
699
-
700
- client = anthropic.Anthropic(api_key=api_key)
701
- response = client.messages.create(
702
- model="claude-sonnet-4-5",
703
- max_tokens=40000, # Very generous total budget for iterative refinement
704
- thinking={
705
- "type": "enabled",
706
- "budget_tokens": 20000 # Generous thinking budget for thorough analysis
707
- },
708
- messages=conversation_history,
709
- timeout=900.0, # 15 minutes timeout for refinement iterations
710
- )
711
-
712
- if not response.content:
713
- logger.warning("Empty LLM response during refinement")
714
- return None
715
-
716
- # Extract text content
717
- test_content = None
718
- for block in response.content:
719
- if block.type == "text":
720
- test_content = block.text.strip()
721
- break
722
-
723
- if not test_content:
724
- logger.warning("No text content in refinement response")
725
- return None
726
-
727
- # Clean up response
728
- if test_content.startswith("```python"):
729
- test_content = test_content[len("```python"):].strip()
730
- if test_content.endswith("```"):
731
- test_content = test_content[:-3].strip()
732
-
733
- return test_content
734
-
735
- except Exception as e:
736
- logger.error(f"LLM refinement error: {e}", exc_info=True)
737
- return None
738
-
739
- def _generate_with_refinement(
740
- self,
741
- module_name: str,
742
- module_path: str,
743
- source_file: Path,
744
- source_code: str,
745
- test_file: Path
746
- ) -> str | None:
747
- """Generate tests with iterative refinement (Phase 2).
748
-
749
- Process:
750
- 1. Generate initial tests
751
- 2. Run pytest validation
752
- 3. If failures, ask Claude to fix
753
- 4. Repeat until tests pass or max iterations
754
-
755
- Args:
756
- module_name: Name of module being tested
757
- module_path: Python import path
758
- source_file: Path to source file
759
- source_code: Source code content
760
- test_file: Path where tests will be written
761
-
762
- Returns:
763
- Final test content or None if all attempts failed
764
- """
765
- import os
766
-
767
- api_key = os.getenv("ANTHROPIC_API_KEY")
768
- if not api_key:
769
- logger.error("ANTHROPIC_API_KEY not set")
770
- return None
771
-
772
- logger.info(f"🔄 Phase 2: Multi-turn refinement enabled for {module_name} (max {self.max_refinement_iterations} iterations)")
773
-
774
- # Step 1: Generate initial tests
775
- test_content = self._generate_with_llm(module_name, module_path, source_file, source_code)
776
- if not test_content:
777
- logger.warning("Initial generation failed")
778
- return None
779
-
780
- # Build conversation history for subsequent refinements
781
- is_workflow = self._is_workflow_module(source_code, module_path)
782
-
783
- # Initial prompt (for history tracking)
784
- if is_workflow:
785
- initial_prompt = self._get_workflow_specific_prompt(module_name, module_path, source_code)
786
- else:
787
- initial_prompt = f"""Generate comprehensive behavioral tests for {module_name}.
788
-
789
- SOURCE CODE:
790
- ```python
791
- {source_code}
792
- ```"""
793
-
794
- conversation_history = [
795
- {
796
- "role": "user",
797
- "content": [
798
- {"type": "text", "text": "You are an expert Python test engineer. Examples:", "cache_control": {"type": "ephemeral"}},
799
- {"type": "text", "text": self._get_example_tests(), "cache_control": {"type": "ephemeral"}},
800
- {"type": "text", "text": initial_prompt}
801
- ]
802
- },
803
- {
804
- "role": "assistant",
805
- "content": test_content
806
- }
807
- ]
808
-
809
- # Step 2: Iterative refinement loop
810
- for iteration in range(self.max_refinement_iterations):
811
- logger.info(f"📝 Refinement iteration {iteration + 1}/{self.max_refinement_iterations} for {module_name}")
812
-
813
- # Write current version to temp file
814
- temp_test_file = test_file.parent / f"_temp_{test_file.name}"
815
- temp_test_file.write_text(test_content)
816
-
817
- # Validate with pytest
818
- validation_result = self._run_pytest_validation(temp_test_file)
819
-
820
- if validation_result.passed:
821
- logger.info(f"✅ Tests passed on iteration {iteration + 1} for {module_name}")
822
- temp_test_file.unlink() # Clean up
823
- return test_content
824
-
825
- # Tests failed - ask Claude to fix
826
- logger.warning(f"⚠️ Tests failed on iteration {iteration + 1}: {validation_result.error_count} errors")
827
-
828
- refinement_prompt = f"""The tests you generated have failures. Please fix these specific issues:
829
-
830
- FAILURES:
831
- {validation_result.failures[:2000]}
832
-
833
- Requirements:
834
- 1. Fix ONLY the failing tests - don't rewrite everything
835
- 2. Ensure imports are correct
836
- 3. Ensure mocking is properly configured
837
- 4. Return the COMPLETE corrected test file (not just the fixes)
838
- 5. Keep the same structure and copyright header
839
-
840
- Return ONLY the complete Python test file, no explanations."""
841
-
842
- # Add to conversation history
843
- conversation_history.append({
844
- "role": "user",
845
- "content": refinement_prompt
846
- })
847
-
848
- # Call LLM for refinement
849
- refined_content = self._call_llm_with_history(conversation_history, api_key)
850
-
851
- if not refined_content:
852
- logger.error(f"❌ Refinement failed on iteration {iteration + 1}")
853
- temp_test_file.unlink()
854
- break
855
-
856
- # Update content and history
857
- test_content = refined_content
858
- conversation_history.append({
859
- "role": "assistant",
860
- "content": test_content
861
- })
862
-
863
- logger.info(f"🔄 Refinement iteration {iteration + 1} complete, retrying validation...")
864
-
865
- # Max iterations reached
866
- logger.warning(f"⚠️ Max refinement iterations reached for {module_name} - returning best attempt")
867
- return test_content
868
-
869
- def _run_coverage_analysis(self, test_file: Path, source_file: Path) -> CoverageResult:
870
- """Run coverage analysis on tests.
871
-
872
- Args:
873
- test_file: Path to test file
874
- source_file: Path to source file being tested
875
-
876
- Returns:
877
- CoverageResult with coverage metrics and missing lines
878
- """
879
- try:
880
- # Run pytest with coverage
881
- result = subprocess.run(
882
- [
883
- sys.executable, "-m", "pytest",
884
- str(test_file),
885
- f"--cov={source_file.parent}",
886
- "--cov-report=term-missing",
887
- "--cov-report=json",
888
- "-v"
889
- ],
890
- capture_output=True,
891
- text=True,
892
- timeout=120,
893
- cwd=Path.cwd()
894
- )
895
-
896
- # Parse coverage from JSON report
897
- coverage_json_path = Path(".coverage.json")
898
- if not coverage_json_path.exists():
899
- logger.warning("Coverage JSON not generated")
900
- return CoverageResult(
901
- coverage=0.0,
902
- missing_lines=[],
903
- total_statements=0,
904
- covered_statements=0
905
- )
906
-
907
- with open(coverage_json_path) as f:
908
- coverage_data = json.load(f)
909
-
910
- # Find coverage for our specific source file
911
- source_key = str(source_file)
912
- file_coverage = None
913
- for key in coverage_data.get("files", {}).keys():
914
- if source_file.name in key or source_key in key:
915
- file_coverage = coverage_data["files"][key]
916
- break
917
-
918
- if not file_coverage:
919
- logger.warning(f"No coverage data found for {source_file}")
920
- return CoverageResult(
921
- coverage=0.0,
922
- missing_lines=[],
923
- total_statements=0,
924
- covered_statements=0
925
- )
926
-
927
- # Extract metrics
928
- total_statements = file_coverage["summary"]["num_statements"]
929
- covered_statements = file_coverage["summary"]["covered_lines"]
930
- coverage_pct = file_coverage["summary"]["percent_covered"] / 100.0
931
- missing_lines = file_coverage["missing_lines"]
932
-
933
- logger.info(f"Coverage: {coverage_pct:.1%} ({covered_statements}/{total_statements} statements)")
934
-
935
- return CoverageResult(
936
- coverage=coverage_pct,
937
- missing_lines=missing_lines,
938
- total_statements=total_statements,
939
- covered_statements=covered_statements
940
- )
941
-
942
- except subprocess.TimeoutExpired:
943
- logger.error("Coverage analysis timeout")
944
- return CoverageResult(coverage=0.0, missing_lines=[], total_statements=0, covered_statements=0)
945
- except Exception as e:
946
- logger.error(f"Coverage analysis error: {e}", exc_info=True)
947
- return CoverageResult(coverage=0.0, missing_lines=[], total_statements=0, covered_statements=0)
948
-
949
- def _extract_uncovered_lines(self, source_file: Path, missing_lines: list[int]) -> str:
950
- """Extract source code for uncovered lines.
951
-
952
- Args:
953
- source_file: Path to source file
954
- missing_lines: List of uncovered line numbers
955
-
956
- Returns:
957
- Formatted string with uncovered code sections
958
- """
959
- if not missing_lines:
960
- return "No uncovered lines"
961
-
962
- try:
963
- source_lines = source_file.read_text().split("\n")
964
-
965
- # Group consecutive lines into ranges
966
- ranges = []
967
- start = missing_lines[0]
968
- end = start
969
-
970
- for line_num in missing_lines[1:]:
971
- if line_num == end + 1:
972
- end = line_num
973
- else:
974
- ranges.append((start, end))
975
- start = line_num
976
- end = start
977
- ranges.append((start, end))
978
-
979
- # Extract code for each range with context
980
- uncovered_sections = []
981
- for start, end in ranges[:10]: # Limit to 10 ranges
982
- context_start = max(0, start - 3)
983
- context_end = min(len(source_lines), end + 2)
984
-
985
- section = []
986
- section.append(f"Lines {start}-{end}:")
987
- for i in range(context_start, context_end):
988
- line_marker = ">>>" if start <= i + 1 <= end else " "
989
- section.append(f"{line_marker} {i + 1:4d}: {source_lines[i]}")
990
-
991
- uncovered_sections.append("\n".join(section))
992
-
993
- return "\n\n".join(uncovered_sections)
994
-
995
- except Exception as e:
996
- logger.error(f"Error extracting uncovered lines: {e}")
997
- return f"Error extracting lines: {e}"
998
-
999
- def _generate_with_coverage_target(
1000
- self,
1001
- module_name: str,
1002
- module_path: str,
1003
- source_file: Path,
1004
- source_code: str,
1005
- test_file: Path,
1006
- initial_test_content: str
1007
- ) -> str | None:
1008
- """Generate tests iteratively until coverage target met (Phase 3).
1009
-
1010
- Process:
1011
- 1. Start with initial tests
1012
- 2. Run coverage analysis
1013
- 3. If target not met, identify uncovered lines
1014
- 4. Ask Claude to add tests for uncovered code
1015
- 5. Repeat until target coverage reached or max iterations
1016
-
1017
- Args:
1018
- module_name: Name of module being tested
1019
- module_path: Python import path
1020
- source_file: Path to source file
1021
- source_code: Source code content
1022
- test_file: Path to test file
1023
- initial_test_content: Initial test content from Phase 1/2
1024
-
1025
- Returns:
1026
- Final test content with improved coverage or None if failed
1027
- """
1028
- import os
1029
-
1030
- api_key = os.getenv("ANTHROPIC_API_KEY")
1031
- if not api_key:
1032
- logger.error("ANTHROPIC_API_KEY not set")
1033
- return None
1034
-
1035
- logger.info(f"📊 Phase 3: Coverage-guided generation enabled (target: {self.target_coverage:.0%})")
1036
-
1037
- test_content = initial_test_content
1038
- max_coverage_iterations = 5
1039
-
1040
- for iteration in range(max_coverage_iterations):
1041
- logger.info(f"📈 Coverage iteration {iteration + 1}/{max_coverage_iterations} for {module_name}")
1042
-
1043
- # Write current tests
1044
- test_file.write_text(test_content)
1045
-
1046
- # Run coverage analysis
1047
- coverage_result = self._run_coverage_analysis(test_file, source_file)
1048
-
1049
- logger.info(f"Current coverage: {coverage_result.coverage:.1%}, target: {self.target_coverage:.0%}")
1050
-
1051
- # Check if target reached
1052
- if coverage_result.coverage >= self.target_coverage:
1053
- logger.info(f"✅ Coverage target reached: {coverage_result.coverage:.1%}")
1054
- return test_content
1055
-
1056
- # Not enough progress
1057
- if iteration > 0 and coverage_result.coverage <= 0.05:
1058
- logger.warning("⚠️ Coverage not improving, stopping")
1059
- break
1060
-
1061
- # Identify uncovered code
1062
- uncovered_code = self._extract_uncovered_lines(source_file, coverage_result.missing_lines)
1063
-
1064
- # Ask Claude to add tests for uncovered lines
1065
- refinement_prompt = f"""Current coverage: {coverage_result.coverage:.1%}
1066
- Target coverage: {self.target_coverage:.0%}
1067
- Missing: {len(coverage_result.missing_lines)} lines
1068
-
1069
- UNCOVERED CODE:
1070
- {uncovered_code[:3000]}
1071
-
1072
- Please ADD tests to cover these specific uncovered lines. Requirements:
1073
- 1. Focus ONLY on the uncovered lines shown above
1074
- 2. Add new test methods to the existing test classes
1075
- 3. Return the COMPLETE test file with additions (not just new tests)
1076
- 4. Use appropriate mocking for external dependencies
1077
- 5. Keep existing tests intact - just add new ones
1078
-
1079
- Return ONLY the complete Python test file with additions, no explanations."""
1080
-
1081
- # Build conversation with caching
1082
- messages = [
1083
- {
1084
- "role": "user",
1085
- "content": [
1086
- {"type": "text", "text": "You are an expert Python test engineer. Examples:", "cache_control": {"type": "ephemeral"}},
1087
- {"type": "text", "text": self._get_example_tests(), "cache_control": {"type": "ephemeral"}},
1088
- {"type": "text", "text": f"Source code:\n```python\n{source_code}\n```", "cache_control": {"type": "ephemeral"}},
1089
- {"type": "text", "text": f"Current tests:\n```python\n{test_content}\n```"},
1090
- {"type": "text", "text": refinement_prompt}
1091
- ]
1092
- }
1093
- ]
1094
-
1095
- # Call LLM for coverage improvement
1096
- try:
1097
- import anthropic
1098
- client = anthropic.Anthropic(api_key=api_key)
1099
- response = client.messages.create(
1100
- model="claude-sonnet-4-5",
1101
- max_tokens=40000, # Very generous total budget for coverage improvement
1102
- thinking={"type": "enabled", "budget_tokens": 20000}, # Thorough thinking for coverage gaps
1103
- messages=messages,
1104
- timeout=900.0, # 15 minutes timeout for coverage-guided iterations
1105
- )
1106
-
1107
- refined_content = None
1108
- for block in response.content:
1109
- if block.type == "text":
1110
- refined_content = block.text.strip()
1111
- break
1112
-
1113
- if not refined_content:
1114
- logger.warning(f"No content in coverage refinement iteration {iteration + 1}")
1115
- break
1116
-
1117
- # Clean up
1118
- if refined_content.startswith("```python"):
1119
- refined_content = refined_content[len("```python"):].strip()
1120
- if refined_content.endswith("```"):
1121
- refined_content = refined_content[:-3].strip()
1122
-
1123
- test_content = refined_content
1124
- logger.info(f"🔄 Coverage iteration {iteration + 1} complete, retrying analysis...")
1125
-
1126
- except Exception as e:
1127
- logger.error(f"Coverage refinement error on iteration {iteration + 1}: {e}")
1128
- break
1129
-
1130
- # Return best attempt
1131
- logger.info(f"Coverage-guided generation complete: final coverage ~{coverage_result.coverage:.1%}")
1132
- return test_content
1133
-
1134
- def _validate_test_file(self, test_file: Path) -> bool:
1135
- """Validate test file can be imported and has valid syntax.
1136
-
1137
- Args:
1138
- test_file: Path to test file
1139
-
1140
- Returns:
1141
- True if valid, False otherwise
1142
- """
1143
- # Step 1: Check for syntax errors with ast.parse (fast)
1144
- try:
1145
- import ast
1146
- content = test_file.read_text()
1147
- ast.parse(content)
1148
- logger.info(f"✓ Syntax check passed for {test_file.name}")
1149
- except SyntaxError as e:
1150
- logger.error(f"❌ Syntax error in {test_file.name} at line {e.lineno}: {e.msg}")
1151
- return False
1152
- except Exception as e:
1153
- logger.error(f"❌ Cannot parse {test_file.name}: {e}")
1154
- return False
1155
-
1156
- # Step 2: Check if pytest can collect the tests
1157
- try:
1158
- result = subprocess.run(
1159
- [sys.executable, "-m", "pytest", "--collect-only", str(test_file)],
1160
- capture_output=True,
1161
- text=True,
1162
- timeout=10,
1163
- )
1164
-
1165
- if result.returncode != 0:
1166
- logger.error(f"❌ Pytest collection failed for {test_file.name}")
1167
- logger.error(f" Error: {result.stderr[:500]}")
1168
- return False
1169
-
1170
- logger.info(f"✓ Pytest collection passed for {test_file.name}")
1171
- return True
1172
-
1173
- except subprocess.TimeoutExpired:
1174
- logger.error(f"❌ Validation timeout for {test_file.name}")
1175
- return False
1176
- except Exception as e:
1177
- logger.error(f"❌ Validation exception for {test_file}: {e}")
1178
- return False
1179
-
1180
- def _count_tests(self) -> int:
1181
- """Count total tests in generated files.
1182
-
1183
- Returns:
1184
- Number of tests
1185
- """
1186
- try:
1187
- result = subprocess.run(
1188
- [sys.executable, "-m", "pytest", "--collect-only", "-q", str(self.output_dir)],
1189
- capture_output=True,
1190
- text=True,
1191
- timeout=30,
1192
- )
1193
- # Parse output like "123 tests collected"
1194
- for line in result.stdout.split("\n"):
1195
- if "tests collected" in line:
1196
- return int(line.split()[0])
1197
- return 0
1198
- except Exception:
1199
- return 0
1200
-
1201
-
1202
- def run_batch_generation(
1203
- batch_num: int,
1204
- modules_json: str,
1205
- enable_refinement: bool = True,
1206
- enable_coverage_guided: bool = False
1207
- ) -> None:
1208
- """Run test generation for a batch.
1209
-
1210
- Args:
1211
- batch_num: Batch number
1212
- modules_json: JSON string of modules to process
1213
- enable_refinement: Enable Phase 2 multi-turn refinement (default: True)
1214
- enable_coverage_guided: Enable Phase 3 coverage-guided generation (default: False)
1215
- """
1216
- # Parse modules
1217
- modules = json.loads(modules_json)
1218
-
1219
- # Create agent with Phase 2 & 3 configuration
1220
- agent_id = f"test-gen-batch{batch_num}"
1221
- generator = AutonomousTestGenerator(
1222
- agent_id,
1223
- batch_num,
1224
- modules,
1225
- enable_refinement=enable_refinement,
1226
- enable_coverage_guided=enable_coverage_guided
1227
- )
1228
-
1229
- # Generate tests
1230
- print(f"Starting autonomous test generation for batch {batch_num}")
1231
- print(f"Modules to process: {len(modules)}")
1232
- print(f"Agent ID: {agent_id}")
1233
- print("\nENHANCEMENTS:")
1234
- print(" Phase 1: Extended thinking + Prompt caching + Workflow detection")
1235
- print(f" Phase 2: Multi-turn refinement = {'ENABLED' if enable_refinement else 'DISABLED'}")
1236
- print(f" Phase 3: Coverage-guided = {'ENABLED' if enable_coverage_guided else 'DISABLED'}")
1237
- print("\nMonitor at: http://localhost:8000\n")
1238
-
1239
- results = generator.generate_all()
1240
-
1241
- # Report results
1242
- print(f"\n{'='*60}")
1243
- print(f"Batch {batch_num} Complete!")
1244
- print(f"{'='*60}")
1245
- print(f"Modules processed: {results['completed']}/{results['total_modules']}")
1246
- print(f"Tests generated: {results['tests_generated']}")
1247
- print(f"Files created: {len(results['files_created'])}")
1248
- print(f"Failed: {results['failed']}")
1249
-
1250
-
1251
- if __name__ == "__main__":
1252
- import sys
1253
-
1254
- if len(sys.argv) < 3:
1255
- print("Usage: python -m empathy_os.workflows.autonomous_test_gen <batch_num> <modules_json> [--no-refinement] [--coverage-guided]")
1256
- print("\nOptions:")
1257
- print(" --no-refinement Disable Phase 2 multi-turn refinement")
1258
- print(" --coverage-guided Enable Phase 3 coverage-guided generation (slower)")
1259
- sys.exit(1)
1260
-
1261
- batch_num = int(sys.argv[1])
1262
- modules_json = sys.argv[2]
1263
-
1264
- # Parse optional flags
1265
- enable_refinement = "--no-refinement" not in sys.argv
1266
- enable_coverage_guided = "--coverage-guided" in sys.argv
1267
-
1268
- run_batch_generation(batch_num, modules_json, enable_refinement, enable_coverage_guided)