empathy-framework 5.2.1__py3-none-any.whl → 5.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (480) hide show
  1. empathy_framework-5.4.0.dist-info/METADATA +47 -0
  2. empathy_framework-5.4.0.dist-info/RECORD +8 -0
  3. {empathy_framework-5.2.1.dist-info → empathy_framework-5.4.0.dist-info}/top_level.txt +0 -1
  4. empathy_healthcare_plugin/__init__.py +12 -11
  5. empathy_llm_toolkit/__init__.py +12 -26
  6. empathy_os/__init__.py +12 -356
  7. empathy_software_plugin/__init__.py +12 -11
  8. empathy_framework-5.2.1.dist-info/METADATA +0 -1002
  9. empathy_framework-5.2.1.dist-info/RECORD +0 -478
  10. empathy_framework-5.2.1.dist-info/entry_points.txt +0 -26
  11. empathy_framework-5.2.1.dist-info/licenses/LICENSE +0 -201
  12. empathy_framework-5.2.1.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +0 -101
  13. empathy_healthcare_plugin/monitors/__init__.py +0 -9
  14. empathy_healthcare_plugin/monitors/clinical_protocol_monitor.py +0 -315
  15. empathy_healthcare_plugin/monitors/monitoring/__init__.py +0 -44
  16. empathy_healthcare_plugin/monitors/monitoring/protocol_checker.py +0 -300
  17. empathy_healthcare_plugin/monitors/monitoring/protocol_loader.py +0 -214
  18. empathy_healthcare_plugin/monitors/monitoring/sensor_parsers.py +0 -306
  19. empathy_healthcare_plugin/monitors/monitoring/trajectory_analyzer.py +0 -389
  20. empathy_healthcare_plugin/protocols/cardiac.json +0 -93
  21. empathy_healthcare_plugin/protocols/post_operative.json +0 -92
  22. empathy_healthcare_plugin/protocols/respiratory.json +0 -92
  23. empathy_healthcare_plugin/protocols/sepsis.json +0 -141
  24. empathy_llm_toolkit/README.md +0 -553
  25. empathy_llm_toolkit/agent_factory/__init__.py +0 -53
  26. empathy_llm_toolkit/agent_factory/adapters/__init__.py +0 -85
  27. empathy_llm_toolkit/agent_factory/adapters/autogen_adapter.py +0 -312
  28. empathy_llm_toolkit/agent_factory/adapters/crewai_adapter.py +0 -483
  29. empathy_llm_toolkit/agent_factory/adapters/haystack_adapter.py +0 -298
  30. empathy_llm_toolkit/agent_factory/adapters/langchain_adapter.py +0 -362
  31. empathy_llm_toolkit/agent_factory/adapters/langgraph_adapter.py +0 -333
  32. empathy_llm_toolkit/agent_factory/adapters/native.py +0 -228
  33. empathy_llm_toolkit/agent_factory/adapters/wizard_adapter.py +0 -423
  34. empathy_llm_toolkit/agent_factory/base.py +0 -305
  35. empathy_llm_toolkit/agent_factory/crews/__init__.py +0 -67
  36. empathy_llm_toolkit/agent_factory/crews/code_review.py +0 -1113
  37. empathy_llm_toolkit/agent_factory/crews/health_check.py +0 -1262
  38. empathy_llm_toolkit/agent_factory/crews/refactoring.py +0 -1128
  39. empathy_llm_toolkit/agent_factory/crews/security_audit.py +0 -1018
  40. empathy_llm_toolkit/agent_factory/decorators.py +0 -287
  41. empathy_llm_toolkit/agent_factory/factory.py +0 -558
  42. empathy_llm_toolkit/agent_factory/framework.py +0 -193
  43. empathy_llm_toolkit/agent_factory/memory_integration.py +0 -328
  44. empathy_llm_toolkit/agent_factory/resilient.py +0 -320
  45. empathy_llm_toolkit/agents_md/__init__.py +0 -22
  46. empathy_llm_toolkit/agents_md/loader.py +0 -218
  47. empathy_llm_toolkit/agents_md/parser.py +0 -271
  48. empathy_llm_toolkit/agents_md/registry.py +0 -307
  49. empathy_llm_toolkit/claude_memory.py +0 -466
  50. empathy_llm_toolkit/cli/__init__.py +0 -8
  51. empathy_llm_toolkit/cli/sync_claude.py +0 -487
  52. empathy_llm_toolkit/code_health.py +0 -1313
  53. empathy_llm_toolkit/commands/__init__.py +0 -51
  54. empathy_llm_toolkit/commands/context.py +0 -375
  55. empathy_llm_toolkit/commands/loader.py +0 -301
  56. empathy_llm_toolkit/commands/models.py +0 -231
  57. empathy_llm_toolkit/commands/parser.py +0 -371
  58. empathy_llm_toolkit/commands/registry.py +0 -429
  59. empathy_llm_toolkit/config/__init__.py +0 -29
  60. empathy_llm_toolkit/config/unified.py +0 -291
  61. empathy_llm_toolkit/context/__init__.py +0 -22
  62. empathy_llm_toolkit/context/compaction.py +0 -455
  63. empathy_llm_toolkit/context/manager.py +0 -434
  64. empathy_llm_toolkit/contextual_patterns.py +0 -361
  65. empathy_llm_toolkit/core.py +0 -907
  66. empathy_llm_toolkit/git_pattern_extractor.py +0 -435
  67. empathy_llm_toolkit/hooks/__init__.py +0 -24
  68. empathy_llm_toolkit/hooks/config.py +0 -306
  69. empathy_llm_toolkit/hooks/executor.py +0 -289
  70. empathy_llm_toolkit/hooks/registry.py +0 -302
  71. empathy_llm_toolkit/hooks/scripts/__init__.py +0 -39
  72. empathy_llm_toolkit/hooks/scripts/evaluate_session.py +0 -201
  73. empathy_llm_toolkit/hooks/scripts/first_time_init.py +0 -285
  74. empathy_llm_toolkit/hooks/scripts/pre_compact.py +0 -207
  75. empathy_llm_toolkit/hooks/scripts/session_end.py +0 -183
  76. empathy_llm_toolkit/hooks/scripts/session_start.py +0 -163
  77. empathy_llm_toolkit/hooks/scripts/suggest_compact.py +0 -225
  78. empathy_llm_toolkit/learning/__init__.py +0 -30
  79. empathy_llm_toolkit/learning/evaluator.py +0 -438
  80. empathy_llm_toolkit/learning/extractor.py +0 -514
  81. empathy_llm_toolkit/learning/storage.py +0 -560
  82. empathy_llm_toolkit/levels.py +0 -227
  83. empathy_llm_toolkit/pattern_confidence.py +0 -414
  84. empathy_llm_toolkit/pattern_resolver.py +0 -272
  85. empathy_llm_toolkit/pattern_summary.py +0 -350
  86. empathy_llm_toolkit/providers.py +0 -967
  87. empathy_llm_toolkit/routing/__init__.py +0 -32
  88. empathy_llm_toolkit/routing/model_router.py +0 -362
  89. empathy_llm_toolkit/security/IMPLEMENTATION_SUMMARY.md +0 -413
  90. empathy_llm_toolkit/security/PHASE2_COMPLETE.md +0 -384
  91. empathy_llm_toolkit/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +0 -271
  92. empathy_llm_toolkit/security/QUICK_REFERENCE.md +0 -316
  93. empathy_llm_toolkit/security/README.md +0 -262
  94. empathy_llm_toolkit/security/__init__.py +0 -62
  95. empathy_llm_toolkit/security/audit_logger.py +0 -929
  96. empathy_llm_toolkit/security/audit_logger_example.py +0 -152
  97. empathy_llm_toolkit/security/pii_scrubber.py +0 -640
  98. empathy_llm_toolkit/security/secrets_detector.py +0 -678
  99. empathy_llm_toolkit/security/secrets_detector_example.py +0 -304
  100. empathy_llm_toolkit/security/secure_memdocs.py +0 -1192
  101. empathy_llm_toolkit/security/secure_memdocs_example.py +0 -278
  102. empathy_llm_toolkit/session_status.py +0 -745
  103. empathy_llm_toolkit/state.py +0 -246
  104. empathy_llm_toolkit/utils/__init__.py +0 -5
  105. empathy_llm_toolkit/utils/tokens.py +0 -349
  106. empathy_os/adaptive/__init__.py +0 -13
  107. empathy_os/adaptive/task_complexity.py +0 -127
  108. empathy_os/agent_monitoring.py +0 -414
  109. empathy_os/cache/__init__.py +0 -117
  110. empathy_os/cache/base.py +0 -166
  111. empathy_os/cache/dependency_manager.py +0 -256
  112. empathy_os/cache/hash_only.py +0 -251
  113. empathy_os/cache/hybrid.py +0 -453
  114. empathy_os/cache/storage.py +0 -285
  115. empathy_os/cache_monitor.py +0 -356
  116. empathy_os/cache_stats.py +0 -298
  117. empathy_os/cli/__init__.py +0 -152
  118. empathy_os/cli/__main__.py +0 -12
  119. empathy_os/cli/commands/__init__.py +0 -1
  120. empathy_os/cli/commands/batch.py +0 -256
  121. empathy_os/cli/commands/cache.py +0 -248
  122. empathy_os/cli/commands/help.py +0 -331
  123. empathy_os/cli/commands/info.py +0 -140
  124. empathy_os/cli/commands/inspect.py +0 -436
  125. empathy_os/cli/commands/inspection.py +0 -57
  126. empathy_os/cli/commands/memory.py +0 -48
  127. empathy_os/cli/commands/metrics.py +0 -92
  128. empathy_os/cli/commands/orchestrate.py +0 -184
  129. empathy_os/cli/commands/patterns.py +0 -207
  130. empathy_os/cli/commands/profiling.py +0 -198
  131. empathy_os/cli/commands/provider.py +0 -98
  132. empathy_os/cli/commands/routing.py +0 -285
  133. empathy_os/cli/commands/setup.py +0 -96
  134. empathy_os/cli/commands/status.py +0 -235
  135. empathy_os/cli/commands/sync.py +0 -166
  136. empathy_os/cli/commands/tier.py +0 -121
  137. empathy_os/cli/commands/utilities.py +0 -114
  138. empathy_os/cli/commands/workflow.py +0 -575
  139. empathy_os/cli/core.py +0 -32
  140. empathy_os/cli/parsers/__init__.py +0 -68
  141. empathy_os/cli/parsers/batch.py +0 -118
  142. empathy_os/cli/parsers/cache 2.py +0 -65
  143. empathy_os/cli/parsers/cache.py +0 -65
  144. empathy_os/cli/parsers/help.py +0 -41
  145. empathy_os/cli/parsers/info.py +0 -26
  146. empathy_os/cli/parsers/inspect.py +0 -66
  147. empathy_os/cli/parsers/metrics.py +0 -42
  148. empathy_os/cli/parsers/orchestrate.py +0 -61
  149. empathy_os/cli/parsers/patterns.py +0 -54
  150. empathy_os/cli/parsers/provider.py +0 -40
  151. empathy_os/cli/parsers/routing.py +0 -110
  152. empathy_os/cli/parsers/setup.py +0 -42
  153. empathy_os/cli/parsers/status.py +0 -47
  154. empathy_os/cli/parsers/sync.py +0 -31
  155. empathy_os/cli/parsers/tier.py +0 -33
  156. empathy_os/cli/parsers/workflow.py +0 -77
  157. empathy_os/cli/utils/__init__.py +0 -1
  158. empathy_os/cli/utils/data.py +0 -242
  159. empathy_os/cli/utils/helpers.py +0 -68
  160. empathy_os/cli_legacy.py +0 -3957
  161. empathy_os/cli_minimal.py +0 -1159
  162. empathy_os/cli_router 2.py +0 -416
  163. empathy_os/cli_router.py +0 -437
  164. empathy_os/cli_unified.py +0 -814
  165. empathy_os/config/__init__.py +0 -66
  166. empathy_os/config/xml_config.py +0 -286
  167. empathy_os/config.py +0 -532
  168. empathy_os/coordination.py +0 -870
  169. empathy_os/core.py +0 -1511
  170. empathy_os/core_modules/__init__.py +0 -15
  171. empathy_os/cost_tracker.py +0 -626
  172. empathy_os/dashboard/__init__.py +0 -41
  173. empathy_os/dashboard/app 2.py +0 -512
  174. empathy_os/dashboard/app.py +0 -512
  175. empathy_os/dashboard/simple_server 2.py +0 -403
  176. empathy_os/dashboard/simple_server.py +0 -403
  177. empathy_os/dashboard/standalone_server 2.py +0 -536
  178. empathy_os/dashboard/standalone_server.py +0 -547
  179. empathy_os/discovery.py +0 -306
  180. empathy_os/emergence.py +0 -306
  181. empathy_os/exceptions.py +0 -123
  182. empathy_os/feedback_loops.py +0 -373
  183. empathy_os/hot_reload/README.md +0 -473
  184. empathy_os/hot_reload/__init__.py +0 -62
  185. empathy_os/hot_reload/config.py +0 -83
  186. empathy_os/hot_reload/integration.py +0 -229
  187. empathy_os/hot_reload/reloader.py +0 -298
  188. empathy_os/hot_reload/watcher.py +0 -183
  189. empathy_os/hot_reload/websocket.py +0 -177
  190. empathy_os/levels.py +0 -577
  191. empathy_os/leverage_points.py +0 -441
  192. empathy_os/logging_config.py +0 -261
  193. empathy_os/mcp/__init__.py +0 -10
  194. empathy_os/mcp/server.py +0 -506
  195. empathy_os/memory/__init__.py +0 -237
  196. empathy_os/memory/claude_memory.py +0 -469
  197. empathy_os/memory/config.py +0 -224
  198. empathy_os/memory/control_panel.py +0 -1290
  199. empathy_os/memory/control_panel_support.py +0 -145
  200. empathy_os/memory/cross_session.py +0 -845
  201. empathy_os/memory/edges.py +0 -179
  202. empathy_os/memory/encryption.py +0 -159
  203. empathy_os/memory/file_session.py +0 -770
  204. empathy_os/memory/graph.py +0 -570
  205. empathy_os/memory/long_term.py +0 -913
  206. empathy_os/memory/long_term_types.py +0 -99
  207. empathy_os/memory/mixins/__init__.py +0 -25
  208. empathy_os/memory/mixins/backend_init_mixin.py +0 -244
  209. empathy_os/memory/mixins/capabilities_mixin.py +0 -199
  210. empathy_os/memory/mixins/handoff_mixin.py +0 -208
  211. empathy_os/memory/mixins/lifecycle_mixin.py +0 -49
  212. empathy_os/memory/mixins/long_term_mixin.py +0 -352
  213. empathy_os/memory/mixins/promotion_mixin.py +0 -109
  214. empathy_os/memory/mixins/short_term_mixin.py +0 -182
  215. empathy_os/memory/nodes.py +0 -179
  216. empathy_os/memory/redis_bootstrap.py +0 -540
  217. empathy_os/memory/security/__init__.py +0 -31
  218. empathy_os/memory/security/audit_logger.py +0 -932
  219. empathy_os/memory/security/pii_scrubber.py +0 -640
  220. empathy_os/memory/security/secrets_detector.py +0 -678
  221. empathy_os/memory/short_term.py +0 -2150
  222. empathy_os/memory/simple_storage.py +0 -302
  223. empathy_os/memory/storage/__init__.py +0 -15
  224. empathy_os/memory/storage_backend.py +0 -167
  225. empathy_os/memory/summary_index.py +0 -583
  226. empathy_os/memory/types.py +0 -441
  227. empathy_os/memory/unified.py +0 -182
  228. empathy_os/meta_workflows/__init__.py +0 -74
  229. empathy_os/meta_workflows/agent_creator.py +0 -248
  230. empathy_os/meta_workflows/builtin_templates.py +0 -567
  231. empathy_os/meta_workflows/cli_commands/__init__.py +0 -56
  232. empathy_os/meta_workflows/cli_commands/agent_commands.py +0 -321
  233. empathy_os/meta_workflows/cli_commands/analytics_commands.py +0 -442
  234. empathy_os/meta_workflows/cli_commands/config_commands.py +0 -232
  235. empathy_os/meta_workflows/cli_commands/memory_commands.py +0 -182
  236. empathy_os/meta_workflows/cli_commands/template_commands.py +0 -354
  237. empathy_os/meta_workflows/cli_commands/workflow_commands.py +0 -382
  238. empathy_os/meta_workflows/cli_meta_workflows.py +0 -59
  239. empathy_os/meta_workflows/form_engine.py +0 -292
  240. empathy_os/meta_workflows/intent_detector.py +0 -409
  241. empathy_os/meta_workflows/models.py +0 -569
  242. empathy_os/meta_workflows/pattern_learner.py +0 -738
  243. empathy_os/meta_workflows/plan_generator.py +0 -384
  244. empathy_os/meta_workflows/session_context.py +0 -397
  245. empathy_os/meta_workflows/template_registry.py +0 -229
  246. empathy_os/meta_workflows/workflow.py +0 -984
  247. empathy_os/metrics/__init__.py +0 -12
  248. empathy_os/metrics/collector.py +0 -31
  249. empathy_os/metrics/prompt_metrics.py +0 -194
  250. empathy_os/models/__init__.py +0 -172
  251. empathy_os/models/__main__.py +0 -13
  252. empathy_os/models/adaptive_routing 2.py +0 -437
  253. empathy_os/models/adaptive_routing.py +0 -437
  254. empathy_os/models/auth_cli.py +0 -444
  255. empathy_os/models/auth_strategy.py +0 -450
  256. empathy_os/models/cli.py +0 -655
  257. empathy_os/models/empathy_executor.py +0 -354
  258. empathy_os/models/executor.py +0 -257
  259. empathy_os/models/fallback.py +0 -762
  260. empathy_os/models/provider_config.py +0 -282
  261. empathy_os/models/registry.py +0 -472
  262. empathy_os/models/tasks.py +0 -359
  263. empathy_os/models/telemetry/__init__.py +0 -71
  264. empathy_os/models/telemetry/analytics.py +0 -594
  265. empathy_os/models/telemetry/backend.py +0 -196
  266. empathy_os/models/telemetry/data_models.py +0 -431
  267. empathy_os/models/telemetry/storage.py +0 -489
  268. empathy_os/models/token_estimator.py +0 -420
  269. empathy_os/models/validation.py +0 -280
  270. empathy_os/monitoring/__init__.py +0 -52
  271. empathy_os/monitoring/alerts.py +0 -946
  272. empathy_os/monitoring/alerts_cli.py +0 -448
  273. empathy_os/monitoring/multi_backend.py +0 -271
  274. empathy_os/monitoring/otel_backend.py +0 -362
  275. empathy_os/optimization/__init__.py +0 -19
  276. empathy_os/optimization/context_optimizer.py +0 -272
  277. empathy_os/orchestration/__init__.py +0 -67
  278. empathy_os/orchestration/agent_templates.py +0 -707
  279. empathy_os/orchestration/config_store.py +0 -499
  280. empathy_os/orchestration/execution_strategies.py +0 -2111
  281. empathy_os/orchestration/meta_orchestrator.py +0 -1168
  282. empathy_os/orchestration/pattern_learner.py +0 -696
  283. empathy_os/orchestration/real_tools.py +0 -931
  284. empathy_os/pattern_cache.py +0 -187
  285. empathy_os/pattern_library.py +0 -542
  286. empathy_os/patterns/debugging/all_patterns.json +0 -81
  287. empathy_os/patterns/debugging/workflow_20260107_1770825e.json +0 -77
  288. empathy_os/patterns/refactoring_memory.json +0 -89
  289. empathy_os/persistence.py +0 -564
  290. empathy_os/platform_utils.py +0 -265
  291. empathy_os/plugins/__init__.py +0 -28
  292. empathy_os/plugins/base.py +0 -361
  293. empathy_os/plugins/registry.py +0 -268
  294. empathy_os/project_index/__init__.py +0 -32
  295. empathy_os/project_index/cli.py +0 -335
  296. empathy_os/project_index/index.py +0 -667
  297. empathy_os/project_index/models.py +0 -504
  298. empathy_os/project_index/reports.py +0 -474
  299. empathy_os/project_index/scanner.py +0 -777
  300. empathy_os/project_index/scanner_parallel 2.py +0 -291
  301. empathy_os/project_index/scanner_parallel.py +0 -291
  302. empathy_os/prompts/__init__.py +0 -61
  303. empathy_os/prompts/config.py +0 -77
  304. empathy_os/prompts/context.py +0 -177
  305. empathy_os/prompts/parser.py +0 -285
  306. empathy_os/prompts/registry.py +0 -313
  307. empathy_os/prompts/templates.py +0 -208
  308. empathy_os/redis_config.py +0 -302
  309. empathy_os/redis_memory.py +0 -799
  310. empathy_os/resilience/__init__.py +0 -56
  311. empathy_os/resilience/circuit_breaker.py +0 -256
  312. empathy_os/resilience/fallback.py +0 -179
  313. empathy_os/resilience/health.py +0 -300
  314. empathy_os/resilience/retry.py +0 -209
  315. empathy_os/resilience/timeout.py +0 -135
  316. empathy_os/routing/__init__.py +0 -43
  317. empathy_os/routing/chain_executor.py +0 -433
  318. empathy_os/routing/classifier.py +0 -217
  319. empathy_os/routing/smart_router.py +0 -234
  320. empathy_os/routing/workflow_registry.py +0 -343
  321. empathy_os/scaffolding/README.md +0 -589
  322. empathy_os/scaffolding/__init__.py +0 -35
  323. empathy_os/scaffolding/__main__.py +0 -14
  324. empathy_os/scaffolding/cli.py +0 -240
  325. empathy_os/socratic/__init__.py +0 -256
  326. empathy_os/socratic/ab_testing.py +0 -958
  327. empathy_os/socratic/blueprint.py +0 -533
  328. empathy_os/socratic/cli.py +0 -703
  329. empathy_os/socratic/collaboration.py +0 -1114
  330. empathy_os/socratic/domain_templates.py +0 -924
  331. empathy_os/socratic/embeddings.py +0 -738
  332. empathy_os/socratic/engine.py +0 -794
  333. empathy_os/socratic/explainer.py +0 -682
  334. empathy_os/socratic/feedback.py +0 -772
  335. empathy_os/socratic/forms.py +0 -629
  336. empathy_os/socratic/generator.py +0 -732
  337. empathy_os/socratic/llm_analyzer.py +0 -637
  338. empathy_os/socratic/mcp_server.py +0 -702
  339. empathy_os/socratic/session.py +0 -312
  340. empathy_os/socratic/storage.py +0 -667
  341. empathy_os/socratic/success.py +0 -730
  342. empathy_os/socratic/visual_editor.py +0 -860
  343. empathy_os/socratic/web_ui.py +0 -958
  344. empathy_os/telemetry/__init__.py +0 -39
  345. empathy_os/telemetry/agent_coordination 2.py +0 -478
  346. empathy_os/telemetry/agent_coordination.py +0 -476
  347. empathy_os/telemetry/agent_tracking 2.py +0 -350
  348. empathy_os/telemetry/agent_tracking.py +0 -348
  349. empathy_os/telemetry/approval_gates 2.py +0 -563
  350. empathy_os/telemetry/approval_gates.py +0 -551
  351. empathy_os/telemetry/cli.py +0 -1231
  352. empathy_os/telemetry/commands/__init__.py +0 -14
  353. empathy_os/telemetry/commands/dashboard_commands.py +0 -696
  354. empathy_os/telemetry/event_streaming 2.py +0 -405
  355. empathy_os/telemetry/event_streaming.py +0 -405
  356. empathy_os/telemetry/feedback_loop 2.py +0 -557
  357. empathy_os/telemetry/feedback_loop.py +0 -554
  358. empathy_os/telemetry/usage_tracker.py +0 -591
  359. empathy_os/templates.py +0 -754
  360. empathy_os/test_generator/__init__.py +0 -38
  361. empathy_os/test_generator/__main__.py +0 -14
  362. empathy_os/test_generator/cli.py +0 -234
  363. empathy_os/test_generator/generator.py +0 -355
  364. empathy_os/test_generator/risk_analyzer.py +0 -216
  365. empathy_os/tier_recommender.py +0 -384
  366. empathy_os/tools.py +0 -183
  367. empathy_os/trust/__init__.py +0 -28
  368. empathy_os/trust/circuit_breaker.py +0 -579
  369. empathy_os/trust_building.py +0 -527
  370. empathy_os/validation/__init__.py +0 -19
  371. empathy_os/validation/xml_validator.py +0 -281
  372. empathy_os/vscode_bridge 2.py +0 -173
  373. empathy_os/vscode_bridge.py +0 -173
  374. empathy_os/workflow_commands.py +0 -780
  375. empathy_os/workflow_patterns/__init__.py +0 -33
  376. empathy_os/workflow_patterns/behavior.py +0 -249
  377. empathy_os/workflow_patterns/core.py +0 -76
  378. empathy_os/workflow_patterns/output.py +0 -99
  379. empathy_os/workflow_patterns/registry.py +0 -255
  380. empathy_os/workflow_patterns/structural.py +0 -288
  381. empathy_os/workflows/__init__.py +0 -539
  382. empathy_os/workflows/autonomous_test_gen.py +0 -1268
  383. empathy_os/workflows/base.py +0 -2667
  384. empathy_os/workflows/batch_processing.py +0 -342
  385. empathy_os/workflows/bug_predict.py +0 -1084
  386. empathy_os/workflows/builder.py +0 -273
  387. empathy_os/workflows/caching.py +0 -253
  388. empathy_os/workflows/code_review.py +0 -1048
  389. empathy_os/workflows/code_review_adapters.py +0 -312
  390. empathy_os/workflows/code_review_pipeline.py +0 -722
  391. empathy_os/workflows/config.py +0 -645
  392. empathy_os/workflows/dependency_check.py +0 -644
  393. empathy_os/workflows/document_gen/__init__.py +0 -25
  394. empathy_os/workflows/document_gen/config.py +0 -30
  395. empathy_os/workflows/document_gen/report_formatter.py +0 -162
  396. empathy_os/workflows/document_gen/workflow.py +0 -1426
  397. empathy_os/workflows/document_gen.py +0 -29
  398. empathy_os/workflows/document_manager.py +0 -216
  399. empathy_os/workflows/document_manager_README.md +0 -134
  400. empathy_os/workflows/documentation_orchestrator.py +0 -1205
  401. empathy_os/workflows/history.py +0 -510
  402. empathy_os/workflows/keyboard_shortcuts/__init__.py +0 -39
  403. empathy_os/workflows/keyboard_shortcuts/generators.py +0 -391
  404. empathy_os/workflows/keyboard_shortcuts/parsers.py +0 -416
  405. empathy_os/workflows/keyboard_shortcuts/prompts.py +0 -295
  406. empathy_os/workflows/keyboard_shortcuts/schema.py +0 -193
  407. empathy_os/workflows/keyboard_shortcuts/workflow.py +0 -509
  408. empathy_os/workflows/llm_base.py +0 -363
  409. empathy_os/workflows/manage_docs.py +0 -87
  410. empathy_os/workflows/manage_docs_README.md +0 -134
  411. empathy_os/workflows/manage_documentation.py +0 -821
  412. empathy_os/workflows/new_sample_workflow1.py +0 -149
  413. empathy_os/workflows/new_sample_workflow1_README.md +0 -150
  414. empathy_os/workflows/orchestrated_health_check.py +0 -849
  415. empathy_os/workflows/orchestrated_release_prep.py +0 -600
  416. empathy_os/workflows/output.py +0 -410
  417. empathy_os/workflows/perf_audit.py +0 -863
  418. empathy_os/workflows/pr_review.py +0 -762
  419. empathy_os/workflows/progress.py +0 -779
  420. empathy_os/workflows/progress_server.py +0 -322
  421. empathy_os/workflows/progressive/README 2.md +0 -454
  422. empathy_os/workflows/progressive/README.md +0 -454
  423. empathy_os/workflows/progressive/__init__ 2.py +0 -92
  424. empathy_os/workflows/progressive/__init__.py +0 -82
  425. empathy_os/workflows/progressive/cli 2.py +0 -242
  426. empathy_os/workflows/progressive/cli.py +0 -219
  427. empathy_os/workflows/progressive/core 2.py +0 -488
  428. empathy_os/workflows/progressive/core.py +0 -488
  429. empathy_os/workflows/progressive/orchestrator 2.py +0 -701
  430. empathy_os/workflows/progressive/orchestrator.py +0 -723
  431. empathy_os/workflows/progressive/reports 2.py +0 -528
  432. empathy_os/workflows/progressive/reports.py +0 -520
  433. empathy_os/workflows/progressive/telemetry 2.py +0 -280
  434. empathy_os/workflows/progressive/telemetry.py +0 -274
  435. empathy_os/workflows/progressive/test_gen 2.py +0 -514
  436. empathy_os/workflows/progressive/test_gen.py +0 -495
  437. empathy_os/workflows/progressive/workflow 2.py +0 -628
  438. empathy_os/workflows/progressive/workflow.py +0 -589
  439. empathy_os/workflows/refactor_plan.py +0 -694
  440. empathy_os/workflows/release_prep.py +0 -895
  441. empathy_os/workflows/release_prep_crew.py +0 -969
  442. empathy_os/workflows/research_synthesis.py +0 -404
  443. empathy_os/workflows/routing.py +0 -168
  444. empathy_os/workflows/secure_release.py +0 -593
  445. empathy_os/workflows/security_adapters.py +0 -297
  446. empathy_os/workflows/security_audit.py +0 -1329
  447. empathy_os/workflows/security_audit_phase3.py +0 -355
  448. empathy_os/workflows/seo_optimization.py +0 -633
  449. empathy_os/workflows/step_config.py +0 -234
  450. empathy_os/workflows/telemetry_mixin.py +0 -269
  451. empathy_os/workflows/test5.py +0 -125
  452. empathy_os/workflows/test5_README.md +0 -158
  453. empathy_os/workflows/test_coverage_boost_crew.py +0 -849
  454. empathy_os/workflows/test_gen/__init__.py +0 -52
  455. empathy_os/workflows/test_gen/ast_analyzer.py +0 -249
  456. empathy_os/workflows/test_gen/config.py +0 -88
  457. empathy_os/workflows/test_gen/data_models.py +0 -38
  458. empathy_os/workflows/test_gen/report_formatter.py +0 -289
  459. empathy_os/workflows/test_gen/test_templates.py +0 -381
  460. empathy_os/workflows/test_gen/workflow.py +0 -655
  461. empathy_os/workflows/test_gen.py +0 -54
  462. empathy_os/workflows/test_gen_behavioral.py +0 -477
  463. empathy_os/workflows/test_gen_parallel.py +0 -341
  464. empathy_os/workflows/test_lifecycle.py +0 -526
  465. empathy_os/workflows/test_maintenance.py +0 -627
  466. empathy_os/workflows/test_maintenance_cli.py +0 -590
  467. empathy_os/workflows/test_maintenance_crew.py +0 -840
  468. empathy_os/workflows/test_runner.py +0 -622
  469. empathy_os/workflows/tier_tracking.py +0 -531
  470. empathy_os/workflows/xml_enhanced_crew.py +0 -285
  471. empathy_software_plugin/SOFTWARE_PLUGIN_README.md +0 -57
  472. empathy_software_plugin/cli/__init__.py +0 -120
  473. empathy_software_plugin/cli/inspect.py +0 -362
  474. empathy_software_plugin/cli.py +0 -574
  475. empathy_software_plugin/plugin.py +0 -188
  476. workflow_scaffolding/__init__.py +0 -11
  477. workflow_scaffolding/__main__.py +0 -12
  478. workflow_scaffolding/cli.py +0 -206
  479. workflow_scaffolding/generator.py +0 -265
  480. {empathy_framework-5.2.1.dist-info → empathy_framework-5.4.0.dist-info}/WHEEL +0 -0
@@ -1,1268 +0,0 @@
1
- """Autonomous Test Generation with Dashboard Integration - Enhanced Edition.
2
-
3
- Generates behavioral tests with real-time monitoring via Agent Coordination Dashboard.
4
-
5
- ENHANCEMENTS (Phase 1):
6
- - Extended thinking mode for better test planning
7
- - Prompt caching for 90% cost reduction
8
- - Full source code (no truncation)
9
- - Workflow-specific prompts with mocking templates
10
- - Few-shot learning with examples
11
-
12
- ENHANCEMENTS (Phase 2 - Multi-Turn Refinement):
13
- - Iterative test generation with validation loop
14
- - Automatic failure detection and fixing
15
- - Conversation history for context preservation
16
-
17
- ENHANCEMENTS (Phase 3 - Coverage-Guided Generation):
18
- - Coverage analysis integration
19
- - Iterative coverage improvement targeting uncovered lines
20
- - Systematic path to 80%+ coverage
21
-
22
- Copyright 2026 Smart-AI-Memory
23
- Licensed under Apache 2.0
24
- """
25
-
26
- import json
27
- import logging
28
- import re
29
- import subprocess
30
- import sys
31
- from dataclasses import dataclass
32
- from pathlib import Path
33
- from typing import Any
34
-
35
- from empathy_os.memory.short_term import RedisShortTermMemory
36
- from empathy_os.telemetry.agent_tracking import HeartbeatCoordinator
37
- from empathy_os.telemetry.event_streaming import EventStreamer
38
- from empathy_os.telemetry.feedback_loop import FeedbackLoop
39
-
40
- logger = logging.getLogger(__name__)
41
-
42
-
43
- @dataclass
44
- class ValidationResult:
45
- """Result of pytest validation."""
46
- passed: bool
47
- failures: str
48
- error_count: int
49
- output: str
50
-
51
-
52
- @dataclass
53
- class CoverageResult:
54
- """Result of coverage analysis."""
55
- coverage: float
56
- missing_lines: list[int]
57
- total_statements: int
58
- covered_statements: int
59
-
60
-
61
- class AutonomousTestGenerator:
62
- """Generate tests autonomously with dashboard monitoring and Anthropic best practices."""
63
-
64
- def __init__(
65
- self,
66
- agent_id: str,
67
- batch_num: int,
68
- modules: list[dict[str, Any]],
69
- enable_refinement: bool = True,
70
- max_refinement_iterations: int = 3,
71
- enable_coverage_guided: bool = False,
72
- target_coverage: float = 0.80
73
- ):
74
- """Initialize generator.
75
-
76
- Args:
77
- agent_id: Unique agent identifier
78
- batch_num: Batch number (1-18)
79
- modules: List of modules to generate tests for
80
- enable_refinement: Enable Phase 2 multi-turn refinement (default: True)
81
- max_refinement_iterations: Max iterations for refinement (default: 3)
82
- enable_coverage_guided: Enable Phase 3 coverage-guided generation (default: False)
83
- target_coverage: Target coverage percentage (default: 0.80 = 80%)
84
- """
85
- self.agent_id = agent_id
86
- self.batch_num = batch_num
87
- self.modules = modules
88
-
89
- # Phase 2 & 3 configuration
90
- self.enable_refinement = enable_refinement
91
- self.max_refinement_iterations = max_refinement_iterations
92
- self.enable_coverage_guided = enable_coverage_guided
93
- self.target_coverage = target_coverage
94
-
95
- # Initialize memory backend for dashboard integration
96
- try:
97
- self.memory = RedisShortTermMemory()
98
- self.coordinator = HeartbeatCoordinator(memory=self.memory, enable_streaming=True)
99
- self.event_streamer = EventStreamer(memory=self.memory)
100
- self.feedback_loop = FeedbackLoop(memory=self.memory)
101
- except Exception as e:
102
- logger.warning(f"Failed to initialize memory backend: {e}")
103
- self.coordinator = HeartbeatCoordinator()
104
- self.event_streamer = None
105
- self.feedback_loop = None
106
-
107
- self.output_dir = Path(f"tests/behavioral/generated/batch{batch_num}")
108
- self.output_dir.mkdir(parents=True, exist_ok=True)
109
-
110
- logger.info(f"Generator initialized: refinement={enable_refinement}, coverage_guided={enable_coverage_guided}")
111
-
112
- def generate_all(self) -> dict[str, Any]:
113
- """Generate tests for all modules with progress tracking.
114
-
115
- Returns:
116
- Summary of generation results
117
- """
118
- # Start tracking
119
- self.coordinator.start_heartbeat(
120
- agent_id=self.agent_id,
121
- metadata={
122
- "batch": self.batch_num,
123
- "total_modules": len(self.modules),
124
- "workflow": "autonomous_test_generation",
125
- }
126
- )
127
-
128
- try:
129
- results = {
130
- "batch": self.batch_num,
131
- "total_modules": len(self.modules),
132
- "completed": 0,
133
- "failed": 0,
134
- "tests_generated": 0,
135
- "files_created": [],
136
- }
137
-
138
- for i, module in enumerate(self.modules):
139
- progress = (i + 1) / len(self.modules)
140
- module_name = module["file"].replace("src/empathy_os/", "")
141
-
142
- # Update dashboard
143
- self.coordinator.beat(
144
- status="running",
145
- progress=progress,
146
- current_task=f"Generating tests for {module_name}"
147
- )
148
-
149
- try:
150
- # Generate tests for this module
151
- test_file = self._generate_module_tests(module)
152
- if test_file:
153
- results["completed"] += 1
154
- results["files_created"].append(str(test_file))
155
- logger.info(f"✅ Generated tests for {module_name}")
156
-
157
- # Send event to dashboard
158
- if self.event_streamer:
159
- self.event_streamer.publish_event(
160
- event_type="test_file_created",
161
- data={
162
- "agent_id": self.agent_id,
163
- "module": module_name,
164
- "test_file": str(test_file),
165
- "batch": self.batch_num
166
- }
167
- )
168
-
169
- # Record quality feedback
170
- if self.feedback_loop:
171
- self.feedback_loop.record_feedback(
172
- workflow_name="test-generation",
173
- stage_name="generation",
174
- tier="capable",
175
- quality_score=1.0, # Success
176
- metadata={"module": module_name, "status": "success", "batch": self.batch_num}
177
- )
178
- else:
179
- results["failed"] += 1
180
- logger.warning(f"⚠️ Skipped {module_name} (validation failed)")
181
-
182
- # Record failure feedback
183
- if self.feedback_loop:
184
- self.feedback_loop.record_feedback(
185
- workflow_name="test-generation",
186
- stage_name="validation",
187
- tier="capable",
188
- quality_score=0.0, # Failure
189
- metadata={"module": module_name, "status": "validation_failed", "batch": self.batch_num}
190
- )
191
-
192
- except Exception as e:
193
- results["failed"] += 1
194
- logger.error(f"❌ Error generating tests for {module_name}: {e}")
195
-
196
- # Send error event
197
- if self.event_streamer:
198
- self.event_streamer.publish_event(
199
- event_type="test_generation_error",
200
- data={
201
- "agent_id": self.agent_id,
202
- "module": module_name,
203
- "error": str(e),
204
- "batch": self.batch_num
205
- }
206
- )
207
-
208
- # Count total tests
209
- results["tests_generated"] = self._count_tests()
210
-
211
- # Final update
212
- self.coordinator.beat(
213
- status="completed",
214
- progress=1.0,
215
- current_task=f"Completed: {results['completed']}/{results['total_modules']} modules"
216
- )
217
-
218
- return results
219
-
220
- except Exception as e:
221
- # Error tracking
222
- self.coordinator.beat(
223
- status="failed",
224
- progress=0.0,
225
- current_task=f"Failed: {str(e)}"
226
- )
227
- raise
228
-
229
- finally:
230
- # Stop heartbeat
231
- self.coordinator.stop_heartbeat(
232
- final_status="completed" if results["completed"] > 0 else "failed"
233
- )
234
-
235
- def _generate_module_tests(self, module: dict[str, Any]) -> Path | None:
236
- """Generate tests for a single module using LLM agent.
237
-
238
- Args:
239
- module: Module info dict with 'file', 'total', 'missing', etc.
240
-
241
- Returns:
242
- Path to generated test file, or None if skipped
243
- """
244
- source_file = Path(module["file"])
245
- module_name = source_file.stem
246
-
247
- # Skip if module doesn't exist
248
- if not source_file.exists():
249
- logger.warning(f"Source file not found: {source_file}")
250
- return None
251
-
252
- # Read source to understand what needs testing
253
- try:
254
- source_code = source_file.read_text()
255
- except Exception as e:
256
- logger.error(f"Cannot read {source_file}: {e}")
257
- return None
258
-
259
- # Generate test file path
260
- test_file = self.output_dir / f"test_{module_name}_behavioral.py"
261
-
262
- # Extract module path for imports
263
- module_path = str(source_file).replace("src/", "").replace(".py", "").replace("/", ".")
264
-
265
- # Generate tests using LLM agent with Anthropic best practices
266
- # Phase 1: Basic generation
267
- # Phase 2: Multi-turn refinement (if enabled)
268
- # Phase 3: Coverage-guided improvement (if enabled)
269
-
270
- if self.enable_refinement:
271
- logger.info(f"🔄 Using Phase 2: Multi-turn refinement for {module_name}")
272
- test_content = self._generate_with_refinement(module_name, module_path, source_file, source_code, test_file)
273
- else:
274
- logger.info(f"📝 Using Phase 1: Basic generation for {module_name}")
275
- test_content = self._generate_with_llm(module_name, module_path, source_file, source_code)
276
-
277
- if not test_content:
278
- logger.warning(f"LLM generation failed for {module_name}")
279
- return None
280
-
281
- logger.info(f"LLM generated {len(test_content)} bytes for {module_name}")
282
-
283
- # Phase 3: Coverage-guided improvement (if enabled)
284
- if self.enable_coverage_guided:
285
- logger.info(f"📊 Applying Phase 3: Coverage-guided improvement for {module_name}")
286
- improved_content = self._generate_with_coverage_target(
287
- module_name, module_path, source_file, source_code, test_file, test_content
288
- )
289
- if improved_content:
290
- test_content = improved_content
291
- logger.info(f"✅ Coverage-guided improvement complete for {module_name}")
292
- else:
293
- logger.warning(f"⚠️ Coverage-guided improvement failed, using previous version for {module_name}")
294
-
295
- # Write final test file
296
- test_file.write_text(test_content)
297
- logger.info(f"Wrote test file: {test_file}")
298
-
299
- # Validate it can be imported
300
- if not self._validate_test_file(test_file):
301
- test_file.unlink()
302
- return None
303
-
304
- return test_file
305
-
306
- def _is_workflow_module(self, source_code: str, module_path: str) -> bool:
307
- """Detect if module is a workflow requiring special handling.
308
-
309
- Args:
310
- source_code: Source code content
311
- module_path: Python import path
312
-
313
- Returns:
314
- True if this is a workflow module needing LLM mocking
315
- """
316
- # Check for workflow indicators
317
- indicators = [
318
- r"class\s+\w+Workflow",
319
- r"async\s+def\s+execute",
320
- r"tier_routing",
321
- r"LLMProvider",
322
- r"TelemetryCollector",
323
- r"from\s+anthropic\s+import",
324
- r"messages\.create",
325
- r"client\.messages"
326
- ]
327
-
328
- return any(re.search(pattern, source_code) for pattern in indicators)
329
-
330
- def _get_example_tests(self) -> str:
331
- """Get few-shot examples of excellent tests for prompt learning."""
332
- return """EXAMPLE 1: Testing a utility function with mocking
333
- ```python
334
- import pytest
335
- from unittest.mock import Mock, patch
336
- from mymodule import process_data
337
-
338
- class TestProcessData:
339
- def test_processes_valid_data_successfully(self):
340
- \"\"\"Given valid input data, when processing, then returns expected result.\"\"\"
341
- # Given
342
- input_data = {"key": "value", "count": 42}
343
-
344
- # When
345
- result = process_data(input_data)
346
-
347
- # Then
348
- assert result is not None
349
- assert result["status"] == "success"
350
- assert result["processed"] is True
351
-
352
- def test_handles_invalid_data_with_error(self):
353
- \"\"\"Given invalid input, when processing, then raises ValueError.\"\"\"
354
- # Given
355
- invalid_data = {"missing": "key"}
356
-
357
- # When/Then
358
- with pytest.raises(ValueError, match="Required key 'key' not found"):
359
- process_data(invalid_data)
360
- ```
361
-
362
- EXAMPLE 2: Testing a workflow with LLM mocking
363
- ```python
364
- import pytest
365
- from unittest.mock import Mock, AsyncMock, patch
366
- from mymodule import MyWorkflow
367
-
368
- @pytest.fixture
369
- def mock_llm_client(mocker):
370
- \"\"\"Mock Anthropic LLM client.\"\"\"
371
- mock = mocker.patch('anthropic.Anthropic')
372
- mock_response = Mock()
373
- mock_response.content = [Mock(text="mock LLM response")]
374
- mock_response.usage = Mock(input_tokens=100, output_tokens=50)
375
- mock_response.stop_reason = "end_turn"
376
- mock.return_value.messages.create = AsyncMock(return_value=mock_response)
377
- return mock
378
-
379
- class TestMyWorkflow:
380
- @pytest.mark.asyncio
381
- async def test_executes_successfully_with_mocked_llm(self, mock_llm_client):
382
- \"\"\"Given valid input, when executing workflow, then completes successfully.\"\"\"
383
- # Given
384
- workflow = MyWorkflow()
385
- input_data = {"prompt": "test prompt"}
386
-
387
- # When
388
- result = await workflow.execute(input_data)
389
-
390
- # Then
391
- assert result is not None
392
- assert "response" in result
393
- mock_llm_client.return_value.messages.create.assert_called_once()
394
-
395
- @pytest.mark.asyncio
396
- async def test_handles_api_error_gracefully(self, mock_llm_client):
397
- \"\"\"Given API failure, when executing, then handles error appropriately.\"\"\"
398
- # Given
399
- workflow = MyWorkflow()
400
- mock_llm_client.return_value.messages.create.side_effect = Exception("API Error")
401
-
402
- # When/Then
403
- with pytest.raises(Exception, match="API Error"):
404
- await workflow.execute({"prompt": "test"})
405
- ```
406
- """
407
-
408
- def _get_workflow_specific_prompt(self, module_name: str, module_path: str, source_code: str) -> str:
409
- """Get workflow-specific test generation prompt with comprehensive mocking guidance."""
410
- return f"""Generate comprehensive tests for this WORKFLOW module.
411
-
412
- ⚠️ CRITICAL: This module makes LLM API calls and requires proper mocking.
413
-
414
- MODULE: {module_name}
415
- IMPORT PATH: {module_path}
416
-
417
- SOURCE CODE (COMPLETE - NO TRUNCATION):
418
- ```python
419
- {source_code}
420
- ```
421
-
422
- WORKFLOW TESTING REQUIREMENTS:
423
-
424
- 1. **Mock LLM API calls** - NEVER make real API calls in tests
425
- ```python
426
- @pytest.fixture
427
- def mock_llm_client(mocker):
428
- mock = mocker.patch('anthropic.Anthropic')
429
- mock_response = Mock()
430
- mock_response.content = [Mock(text="mock response")]
431
- mock_response.usage = Mock(input_tokens=100, output_tokens=50)
432
- mock_response.stop_reason = "end_turn"
433
- mock.return_value.messages.create = AsyncMock(return_value=mock_response)
434
- return mock
435
- ```
436
-
437
- 2. **Test tier routing** - Verify correct model selection (cheap/capable/premium)
438
- 3. **Test telemetry** - Mock and verify telemetry recording
439
- 4. **Test cost calculation** - Verify token usage and cost tracking
440
- 5. **Test error handling** - Mock API failures, timeouts, rate limits
441
- 6. **Test caching** - Mock cache hits/misses if applicable
442
-
443
- TARGET COVERAGE: 40-50% (realistic for workflow classes with proper mocking)
444
-
445
- Generate a complete test file with:
446
- - Copyright header: "Generated by enhanced autonomous test generation system."
447
- - Proper imports (from {module_path})
448
- - Mock fixtures for ALL external dependencies (LLM, databases, APIs, file I/O)
449
- - Given/When/Then structure in docstrings
450
- - Both success and failure test cases
451
- - Edge case handling
452
- - Docstrings for all tests describing behavior
453
-
454
- Return ONLY the complete Python test file, no explanations."""
455
-
456
- def _generate_with_llm(self, module_name: str, module_path: str, source_file: Path, source_code: str) -> str | None:
457
- """Generate comprehensive tests using LLM with Anthropic best practices.
458
-
459
- ENHANCEMENTS (Phase 1):
460
- - Extended thinking (20K token budget) for thorough test planning
461
- - Prompt caching for 90% cost reduction
462
- - Full source code (NO TRUNCATION)
463
- - Workflow-specific prompts when detected
464
-
465
- Args:
466
- module_name: Name of module being tested
467
- module_path: Python import path (e.g., empathy_os.config)
468
- source_file: Path to source file
469
- source_code: Source code content (FULL, not truncated)
470
-
471
- Returns:
472
- Test file content with comprehensive tests, or None if generation failed
473
- """
474
- import os
475
-
476
- try:
477
- import anthropic
478
- except ImportError:
479
- logger.error("anthropic package not installed")
480
- return None
481
-
482
- # Get API key
483
- api_key = os.getenv("ANTHROPIC_API_KEY")
484
- if not api_key:
485
- logger.error("ANTHROPIC_API_KEY not set")
486
- return None
487
-
488
- # Detect if this is a workflow module
489
- is_workflow = self._is_workflow_module(source_code, module_path)
490
- logger.info(f"Module {module_name}: workflow={is_workflow}, size={len(source_code)} bytes (FULL)")
491
-
492
- # Build appropriate prompt based on module type
493
- if is_workflow:
494
- generation_prompt = self._get_workflow_specific_prompt(module_name, module_path, source_code)
495
- else:
496
- generation_prompt = f"""Generate comprehensive behavioral tests for this Python module.
497
-
498
- SOURCE FILE: {source_file}
499
- MODULE PATH: {module_path}
500
-
501
- SOURCE CODE (COMPLETE):
502
- ```python
503
- {source_code}
504
- ```
505
-
506
- Generate a complete test file that:
507
- 1. Uses Given/When/Then behavioral test structure
508
- 2. Tests all public functions and classes
509
- 3. Includes edge cases and error handling
510
- 4. Uses proper mocking for external dependencies
511
- 5. Targets 80%+ code coverage for this module
512
- 6. Follows pytest conventions
513
-
514
- Requirements:
515
- - Import from {module_path} (not from src/)
516
- - Use pytest fixtures where appropriate
517
- - Mock external dependencies (APIs, databases, file I/O)
518
- - Test both success and failure paths
519
- - Include docstrings for all tests
520
- - Use descriptive test names
521
- - Start with copyright header:
522
- \"\"\"Behavioral tests for {module_name}.
523
-
524
- Generated by enhanced autonomous test generation system.
525
-
526
- Copyright 2026 Smart-AI-Memory
527
- Licensed under Apache 2.0
528
- \"\"\"
529
-
530
- Return ONLY the complete Python test file content, no explanations."""
531
-
532
- # Build messages with prompt caching (90% cost reduction on retries)
533
- messages = [
534
- {
535
- "role": "user",
536
- "content": [
537
- {
538
- "type": "text",
539
- "text": "You are an expert Python test engineer. Here are examples of excellent tests:",
540
- "cache_control": {"type": "ephemeral"}
541
- },
542
- {
543
- "type": "text",
544
- "text": self._get_example_tests(),
545
- "cache_control": {"type": "ephemeral"}
546
- },
547
- {
548
- "type": "text",
549
- "text": generation_prompt
550
- }
551
- ]
552
- }
553
- ]
554
-
555
- try:
556
- # Call Anthropic API with extended thinking and caching
557
- logger.info(f"Calling LLM with extended thinking for {module_name} (workflow={is_workflow})")
558
- client = anthropic.Anthropic(api_key=api_key)
559
- response = client.messages.create(
560
- model="claude-sonnet-4-5", # capable tier
561
- max_tokens=40000, # Very generous total budget for comprehensive tests
562
- thinking={
563
- "type": "enabled",
564
- "budget_tokens": 20000 # Generous thinking budget for thorough planning
565
- },
566
- messages=messages,
567
- timeout=900.0, # 15 minutes timeout for extended thinking + generation
568
- )
569
-
570
- if not response.content:
571
- logger.warning(f"Empty LLM response for {module_name}")
572
- return None
573
-
574
- # Extract test content (thinking comes first, then text)
575
- test_content = None
576
- for block in response.content:
577
- if block.type == "text":
578
- test_content = block.text.strip()
579
- break
580
-
581
- if not test_content:
582
- logger.warning(f"No text content in LLM response for {module_name}")
583
- return None
584
-
585
- logger.info(f"LLM returned {len(test_content)} bytes for {module_name}")
586
-
587
- if len(test_content) < 100:
588
- logger.warning(f"LLM response too short for {module_name}: {test_content[:200]}")
589
- return None
590
-
591
- # Clean up response (remove markdown fences if present)
592
- if test_content.startswith("```python"):
593
- test_content = test_content[len("```python"):].strip()
594
- if test_content.endswith("```"):
595
- test_content = test_content[:-3].strip()
596
-
597
- # Check for truncation indicators
598
- if response.stop_reason == "max_tokens":
599
- logger.warning(f"⚠️ LLM response truncated for {module_name} (hit max_tokens)")
600
- # Response might be incomplete but let validation catch it
601
-
602
- # Quick syntax pre-check before returning
603
- try:
604
- import ast
605
- ast.parse(test_content)
606
- logger.info(f"✓ Quick syntax check passed for {module_name}")
607
- except SyntaxError as e:
608
- logger.error(f"❌ LLM generated invalid syntax for {module_name}: {e.msg} at line {e.lineno}")
609
- return None
610
-
611
- logger.info(f"Test content cleaned, final size: {len(test_content)} bytes")
612
- return test_content
613
-
614
- except Exception as e:
615
- logger.error(f"LLM generation error for {module_name}: {e}", exc_info=True)
616
- return None
617
-
618
- def _run_pytest_validation(self, test_file: Path) -> ValidationResult:
619
- """Run pytest on generated tests and collect failures.
620
-
621
- Args:
622
- test_file: Path to test file to validate
623
-
624
- Returns:
625
- ValidationResult with test outcomes and failure details
626
- """
627
- try:
628
- result = subprocess.run(
629
- [sys.executable, "-m", "pytest", str(test_file), "-v", "--tb=short"],
630
- capture_output=True,
631
- text=True,
632
- timeout=60,
633
- )
634
-
635
- passed = result.returncode == 0
636
- output = result.stdout + "\n" + result.stderr
637
-
638
- # Count errors
639
- error_count = output.count("FAILED") + output.count("ERROR")
640
-
641
- # Extract failure details
642
- failures = ""
643
- if not passed:
644
- # Extract relevant failure information
645
- lines = output.split("\n")
646
- failure_lines = []
647
- in_failure = False
648
- for line in lines:
649
- if "FAILED" in line or "ERROR" in line:
650
- in_failure = True
651
- if in_failure:
652
- failure_lines.append(line)
653
- if line.startswith("="): # End of failure section
654
- in_failure = False
655
- failures = "\n".join(failure_lines[:100]) # Limit to 100 lines
656
-
657
- logger.info(f"Pytest validation: passed={passed}, errors={error_count}")
658
-
659
- return ValidationResult(
660
- passed=passed,
661
- failures=failures,
662
- error_count=error_count,
663
- output=output
664
- )
665
-
666
- except subprocess.TimeoutExpired:
667
- logger.error(f"Pytest validation timeout for {test_file}")
668
- return ValidationResult(
669
- passed=False,
670
- failures="Validation timeout after 60 seconds",
671
- error_count=1,
672
- output="Timeout"
673
- )
674
- except Exception as e:
675
- logger.error(f"Pytest validation exception: {e}")
676
- return ValidationResult(
677
- passed=False,
678
- failures=f"Validation exception: {e}",
679
- error_count=1,
680
- output=str(e)
681
- )
682
-
683
- def _call_llm_with_history(
684
- self,
685
- conversation_history: list[dict[str, Any]],
686
- api_key: str
687
- ) -> str | None:
688
- """Call LLM with conversation history for refinement.
689
-
690
- Args:
691
- conversation_history: List of messages (role + content)
692
- api_key: Anthropic API key
693
-
694
- Returns:
695
- Refined test content or None if failed
696
- """
697
- try:
698
- import anthropic
699
-
700
- client = anthropic.Anthropic(api_key=api_key)
701
- response = client.messages.create(
702
- model="claude-sonnet-4-5",
703
- max_tokens=40000, # Very generous total budget for iterative refinement
704
- thinking={
705
- "type": "enabled",
706
- "budget_tokens": 20000 # Generous thinking budget for thorough analysis
707
- },
708
- messages=conversation_history,
709
- timeout=900.0, # 15 minutes timeout for refinement iterations
710
- )
711
-
712
- if not response.content:
713
- logger.warning("Empty LLM response during refinement")
714
- return None
715
-
716
- # Extract text content
717
- test_content = None
718
- for block in response.content:
719
- if block.type == "text":
720
- test_content = block.text.strip()
721
- break
722
-
723
- if not test_content:
724
- logger.warning("No text content in refinement response")
725
- return None
726
-
727
- # Clean up response
728
- if test_content.startswith("```python"):
729
- test_content = test_content[len("```python"):].strip()
730
- if test_content.endswith("```"):
731
- test_content = test_content[:-3].strip()
732
-
733
- return test_content
734
-
735
- except Exception as e:
736
- logger.error(f"LLM refinement error: {e}", exc_info=True)
737
- return None
738
-
739
- def _generate_with_refinement(
740
- self,
741
- module_name: str,
742
- module_path: str,
743
- source_file: Path,
744
- source_code: str,
745
- test_file: Path
746
- ) -> str | None:
747
- """Generate tests with iterative refinement (Phase 2).
748
-
749
- Process:
750
- 1. Generate initial tests
751
- 2. Run pytest validation
752
- 3. If failures, ask Claude to fix
753
- 4. Repeat until tests pass or max iterations
754
-
755
- Args:
756
- module_name: Name of module being tested
757
- module_path: Python import path
758
- source_file: Path to source file
759
- source_code: Source code content
760
- test_file: Path where tests will be written
761
-
762
- Returns:
763
- Final test content or None if all attempts failed
764
- """
765
- import os
766
-
767
- api_key = os.getenv("ANTHROPIC_API_KEY")
768
- if not api_key:
769
- logger.error("ANTHROPIC_API_KEY not set")
770
- return None
771
-
772
- logger.info(f"🔄 Phase 2: Multi-turn refinement enabled for {module_name} (max {self.max_refinement_iterations} iterations)")
773
-
774
- # Step 1: Generate initial tests
775
- test_content = self._generate_with_llm(module_name, module_path, source_file, source_code)
776
- if not test_content:
777
- logger.warning("Initial generation failed")
778
- return None
779
-
780
- # Build conversation history for subsequent refinements
781
- is_workflow = self._is_workflow_module(source_code, module_path)
782
-
783
- # Initial prompt (for history tracking)
784
- if is_workflow:
785
- initial_prompt = self._get_workflow_specific_prompt(module_name, module_path, source_code)
786
- else:
787
- initial_prompt = f"""Generate comprehensive behavioral tests for {module_name}.
788
-
789
- SOURCE CODE:
790
- ```python
791
- {source_code}
792
- ```"""
793
-
794
- conversation_history = [
795
- {
796
- "role": "user",
797
- "content": [
798
- {"type": "text", "text": "You are an expert Python test engineer. Examples:", "cache_control": {"type": "ephemeral"}},
799
- {"type": "text", "text": self._get_example_tests(), "cache_control": {"type": "ephemeral"}},
800
- {"type": "text", "text": initial_prompt}
801
- ]
802
- },
803
- {
804
- "role": "assistant",
805
- "content": test_content
806
- }
807
- ]
808
-
809
- # Step 2: Iterative refinement loop
810
- for iteration in range(self.max_refinement_iterations):
811
- logger.info(f"📝 Refinement iteration {iteration + 1}/{self.max_refinement_iterations} for {module_name}")
812
-
813
- # Write current version to temp file
814
- temp_test_file = test_file.parent / f"_temp_{test_file.name}"
815
- temp_test_file.write_text(test_content)
816
-
817
- # Validate with pytest
818
- validation_result = self._run_pytest_validation(temp_test_file)
819
-
820
- if validation_result.passed:
821
- logger.info(f"✅ Tests passed on iteration {iteration + 1} for {module_name}")
822
- temp_test_file.unlink() # Clean up
823
- return test_content
824
-
825
- # Tests failed - ask Claude to fix
826
- logger.warning(f"⚠️ Tests failed on iteration {iteration + 1}: {validation_result.error_count} errors")
827
-
828
- refinement_prompt = f"""The tests you generated have failures. Please fix these specific issues:
829
-
830
- FAILURES:
831
- {validation_result.failures[:2000]}
832
-
833
- Requirements:
834
- 1. Fix ONLY the failing tests - don't rewrite everything
835
- 2. Ensure imports are correct
836
- 3. Ensure mocking is properly configured
837
- 4. Return the COMPLETE corrected test file (not just the fixes)
838
- 5. Keep the same structure and copyright header
839
-
840
- Return ONLY the complete Python test file, no explanations."""
841
-
842
- # Add to conversation history
843
- conversation_history.append({
844
- "role": "user",
845
- "content": refinement_prompt
846
- })
847
-
848
- # Call LLM for refinement
849
- refined_content = self._call_llm_with_history(conversation_history, api_key)
850
-
851
- if not refined_content:
852
- logger.error(f"❌ Refinement failed on iteration {iteration + 1}")
853
- temp_test_file.unlink()
854
- break
855
-
856
- # Update content and history
857
- test_content = refined_content
858
- conversation_history.append({
859
- "role": "assistant",
860
- "content": test_content
861
- })
862
-
863
- logger.info(f"🔄 Refinement iteration {iteration + 1} complete, retrying validation...")
864
-
865
- # Max iterations reached
866
- logger.warning(f"⚠️ Max refinement iterations reached for {module_name} - returning best attempt")
867
- return test_content
868
-
869
- def _run_coverage_analysis(self, test_file: Path, source_file: Path) -> CoverageResult:
870
- """Run coverage analysis on tests.
871
-
872
- Args:
873
- test_file: Path to test file
874
- source_file: Path to source file being tested
875
-
876
- Returns:
877
- CoverageResult with coverage metrics and missing lines
878
- """
879
- try:
880
- # Run pytest with coverage
881
- result = subprocess.run(
882
- [
883
- sys.executable, "-m", "pytest",
884
- str(test_file),
885
- f"--cov={source_file.parent}",
886
- "--cov-report=term-missing",
887
- "--cov-report=json",
888
- "-v"
889
- ],
890
- capture_output=True,
891
- text=True,
892
- timeout=120,
893
- cwd=Path.cwd()
894
- )
895
-
896
- # Parse coverage from JSON report
897
- coverage_json_path = Path(".coverage.json")
898
- if not coverage_json_path.exists():
899
- logger.warning("Coverage JSON not generated")
900
- return CoverageResult(
901
- coverage=0.0,
902
- missing_lines=[],
903
- total_statements=0,
904
- covered_statements=0
905
- )
906
-
907
- with open(coverage_json_path) as f:
908
- coverage_data = json.load(f)
909
-
910
- # Find coverage for our specific source file
911
- source_key = str(source_file)
912
- file_coverage = None
913
- for key in coverage_data.get("files", {}).keys():
914
- if source_file.name in key or source_key in key:
915
- file_coverage = coverage_data["files"][key]
916
- break
917
-
918
- if not file_coverage:
919
- logger.warning(f"No coverage data found for {source_file}")
920
- return CoverageResult(
921
- coverage=0.0,
922
- missing_lines=[],
923
- total_statements=0,
924
- covered_statements=0
925
- )
926
-
927
- # Extract metrics
928
- total_statements = file_coverage["summary"]["num_statements"]
929
- covered_statements = file_coverage["summary"]["covered_lines"]
930
- coverage_pct = file_coverage["summary"]["percent_covered"] / 100.0
931
- missing_lines = file_coverage["missing_lines"]
932
-
933
- logger.info(f"Coverage: {coverage_pct:.1%} ({covered_statements}/{total_statements} statements)")
934
-
935
- return CoverageResult(
936
- coverage=coverage_pct,
937
- missing_lines=missing_lines,
938
- total_statements=total_statements,
939
- covered_statements=covered_statements
940
- )
941
-
942
- except subprocess.TimeoutExpired:
943
- logger.error("Coverage analysis timeout")
944
- return CoverageResult(coverage=0.0, missing_lines=[], total_statements=0, covered_statements=0)
945
- except Exception as e:
946
- logger.error(f"Coverage analysis error: {e}", exc_info=True)
947
- return CoverageResult(coverage=0.0, missing_lines=[], total_statements=0, covered_statements=0)
948
-
949
- def _extract_uncovered_lines(self, source_file: Path, missing_lines: list[int]) -> str:
950
- """Extract source code for uncovered lines.
951
-
952
- Args:
953
- source_file: Path to source file
954
- missing_lines: List of uncovered line numbers
955
-
956
- Returns:
957
- Formatted string with uncovered code sections
958
- """
959
- if not missing_lines:
960
- return "No uncovered lines"
961
-
962
- try:
963
- source_lines = source_file.read_text().split("\n")
964
-
965
- # Group consecutive lines into ranges
966
- ranges = []
967
- start = missing_lines[0]
968
- end = start
969
-
970
- for line_num in missing_lines[1:]:
971
- if line_num == end + 1:
972
- end = line_num
973
- else:
974
- ranges.append((start, end))
975
- start = line_num
976
- end = start
977
- ranges.append((start, end))
978
-
979
- # Extract code for each range with context
980
- uncovered_sections = []
981
- for start, end in ranges[:10]: # Limit to 10 ranges
982
- context_start = max(0, start - 3)
983
- context_end = min(len(source_lines), end + 2)
984
-
985
- section = []
986
- section.append(f"Lines {start}-{end}:")
987
- for i in range(context_start, context_end):
988
- line_marker = ">>>" if start <= i + 1 <= end else " "
989
- section.append(f"{line_marker} {i + 1:4d}: {source_lines[i]}")
990
-
991
- uncovered_sections.append("\n".join(section))
992
-
993
- return "\n\n".join(uncovered_sections)
994
-
995
- except Exception as e:
996
- logger.error(f"Error extracting uncovered lines: {e}")
997
- return f"Error extracting lines: {e}"
998
-
999
- def _generate_with_coverage_target(
1000
- self,
1001
- module_name: str,
1002
- module_path: str,
1003
- source_file: Path,
1004
- source_code: str,
1005
- test_file: Path,
1006
- initial_test_content: str
1007
- ) -> str | None:
1008
- """Generate tests iteratively until coverage target met (Phase 3).
1009
-
1010
- Process:
1011
- 1. Start with initial tests
1012
- 2. Run coverage analysis
1013
- 3. If target not met, identify uncovered lines
1014
- 4. Ask Claude to add tests for uncovered code
1015
- 5. Repeat until target coverage reached or max iterations
1016
-
1017
- Args:
1018
- module_name: Name of module being tested
1019
- module_path: Python import path
1020
- source_file: Path to source file
1021
- source_code: Source code content
1022
- test_file: Path to test file
1023
- initial_test_content: Initial test content from Phase 1/2
1024
-
1025
- Returns:
1026
- Final test content with improved coverage or None if failed
1027
- """
1028
- import os
1029
-
1030
- api_key = os.getenv("ANTHROPIC_API_KEY")
1031
- if not api_key:
1032
- logger.error("ANTHROPIC_API_KEY not set")
1033
- return None
1034
-
1035
- logger.info(f"📊 Phase 3: Coverage-guided generation enabled (target: {self.target_coverage:.0%})")
1036
-
1037
- test_content = initial_test_content
1038
- max_coverage_iterations = 5
1039
-
1040
- for iteration in range(max_coverage_iterations):
1041
- logger.info(f"📈 Coverage iteration {iteration + 1}/{max_coverage_iterations} for {module_name}")
1042
-
1043
- # Write current tests
1044
- test_file.write_text(test_content)
1045
-
1046
- # Run coverage analysis
1047
- coverage_result = self._run_coverage_analysis(test_file, source_file)
1048
-
1049
- logger.info(f"Current coverage: {coverage_result.coverage:.1%}, target: {self.target_coverage:.0%}")
1050
-
1051
- # Check if target reached
1052
- if coverage_result.coverage >= self.target_coverage:
1053
- logger.info(f"✅ Coverage target reached: {coverage_result.coverage:.1%}")
1054
- return test_content
1055
-
1056
- # Not enough progress
1057
- if iteration > 0 and coverage_result.coverage <= 0.05:
1058
- logger.warning("⚠️ Coverage not improving, stopping")
1059
- break
1060
-
1061
- # Identify uncovered code
1062
- uncovered_code = self._extract_uncovered_lines(source_file, coverage_result.missing_lines)
1063
-
1064
- # Ask Claude to add tests for uncovered lines
1065
- refinement_prompt = f"""Current coverage: {coverage_result.coverage:.1%}
1066
- Target coverage: {self.target_coverage:.0%}
1067
- Missing: {len(coverage_result.missing_lines)} lines
1068
-
1069
- UNCOVERED CODE:
1070
- {uncovered_code[:3000]}
1071
-
1072
- Please ADD tests to cover these specific uncovered lines. Requirements:
1073
- 1. Focus ONLY on the uncovered lines shown above
1074
- 2. Add new test methods to the existing test classes
1075
- 3. Return the COMPLETE test file with additions (not just new tests)
1076
- 4. Use appropriate mocking for external dependencies
1077
- 5. Keep existing tests intact - just add new ones
1078
-
1079
- Return ONLY the complete Python test file with additions, no explanations."""
1080
-
1081
- # Build conversation with caching
1082
- messages = [
1083
- {
1084
- "role": "user",
1085
- "content": [
1086
- {"type": "text", "text": "You are an expert Python test engineer. Examples:", "cache_control": {"type": "ephemeral"}},
1087
- {"type": "text", "text": self._get_example_tests(), "cache_control": {"type": "ephemeral"}},
1088
- {"type": "text", "text": f"Source code:\n```python\n{source_code}\n```", "cache_control": {"type": "ephemeral"}},
1089
- {"type": "text", "text": f"Current tests:\n```python\n{test_content}\n```"},
1090
- {"type": "text", "text": refinement_prompt}
1091
- ]
1092
- }
1093
- ]
1094
-
1095
- # Call LLM for coverage improvement
1096
- try:
1097
- import anthropic
1098
- client = anthropic.Anthropic(api_key=api_key)
1099
- response = client.messages.create(
1100
- model="claude-sonnet-4-5",
1101
- max_tokens=40000, # Very generous total budget for coverage improvement
1102
- thinking={"type": "enabled", "budget_tokens": 20000}, # Thorough thinking for coverage gaps
1103
- messages=messages,
1104
- timeout=900.0, # 15 minutes timeout for coverage-guided iterations
1105
- )
1106
-
1107
- refined_content = None
1108
- for block in response.content:
1109
- if block.type == "text":
1110
- refined_content = block.text.strip()
1111
- break
1112
-
1113
- if not refined_content:
1114
- logger.warning(f"No content in coverage refinement iteration {iteration + 1}")
1115
- break
1116
-
1117
- # Clean up
1118
- if refined_content.startswith("```python"):
1119
- refined_content = refined_content[len("```python"):].strip()
1120
- if refined_content.endswith("```"):
1121
- refined_content = refined_content[:-3].strip()
1122
-
1123
- test_content = refined_content
1124
- logger.info(f"🔄 Coverage iteration {iteration + 1} complete, retrying analysis...")
1125
-
1126
- except Exception as e:
1127
- logger.error(f"Coverage refinement error on iteration {iteration + 1}: {e}")
1128
- break
1129
-
1130
- # Return best attempt
1131
- logger.info(f"Coverage-guided generation complete: final coverage ~{coverage_result.coverage:.1%}")
1132
- return test_content
1133
-
1134
- def _validate_test_file(self, test_file: Path) -> bool:
1135
- """Validate test file can be imported and has valid syntax.
1136
-
1137
- Args:
1138
- test_file: Path to test file
1139
-
1140
- Returns:
1141
- True if valid, False otherwise
1142
- """
1143
- # Step 1: Check for syntax errors with ast.parse (fast)
1144
- try:
1145
- import ast
1146
- content = test_file.read_text()
1147
- ast.parse(content)
1148
- logger.info(f"✓ Syntax check passed for {test_file.name}")
1149
- except SyntaxError as e:
1150
- logger.error(f"❌ Syntax error in {test_file.name} at line {e.lineno}: {e.msg}")
1151
- return False
1152
- except Exception as e:
1153
- logger.error(f"❌ Cannot parse {test_file.name}: {e}")
1154
- return False
1155
-
1156
- # Step 2: Check if pytest can collect the tests
1157
- try:
1158
- result = subprocess.run(
1159
- [sys.executable, "-m", "pytest", "--collect-only", str(test_file)],
1160
- capture_output=True,
1161
- text=True,
1162
- timeout=10,
1163
- )
1164
-
1165
- if result.returncode != 0:
1166
- logger.error(f"❌ Pytest collection failed for {test_file.name}")
1167
- logger.error(f" Error: {result.stderr[:500]}")
1168
- return False
1169
-
1170
- logger.info(f"✓ Pytest collection passed for {test_file.name}")
1171
- return True
1172
-
1173
- except subprocess.TimeoutExpired:
1174
- logger.error(f"❌ Validation timeout for {test_file.name}")
1175
- return False
1176
- except Exception as e:
1177
- logger.error(f"❌ Validation exception for {test_file}: {e}")
1178
- return False
1179
-
1180
- def _count_tests(self) -> int:
1181
- """Count total tests in generated files.
1182
-
1183
- Returns:
1184
- Number of tests
1185
- """
1186
- try:
1187
- result = subprocess.run(
1188
- [sys.executable, "-m", "pytest", "--collect-only", "-q", str(self.output_dir)],
1189
- capture_output=True,
1190
- text=True,
1191
- timeout=30,
1192
- )
1193
- # Parse output like "123 tests collected"
1194
- for line in result.stdout.split("\n"):
1195
- if "tests collected" in line:
1196
- return int(line.split()[0])
1197
- return 0
1198
- except Exception:
1199
- return 0
1200
-
1201
-
1202
- def run_batch_generation(
1203
- batch_num: int,
1204
- modules_json: str,
1205
- enable_refinement: bool = True,
1206
- enable_coverage_guided: bool = False
1207
- ) -> None:
1208
- """Run test generation for a batch.
1209
-
1210
- Args:
1211
- batch_num: Batch number
1212
- modules_json: JSON string of modules to process
1213
- enable_refinement: Enable Phase 2 multi-turn refinement (default: True)
1214
- enable_coverage_guided: Enable Phase 3 coverage-guided generation (default: False)
1215
- """
1216
- # Parse modules
1217
- modules = json.loads(modules_json)
1218
-
1219
- # Create agent with Phase 2 & 3 configuration
1220
- agent_id = f"test-gen-batch{batch_num}"
1221
- generator = AutonomousTestGenerator(
1222
- agent_id,
1223
- batch_num,
1224
- modules,
1225
- enable_refinement=enable_refinement,
1226
- enable_coverage_guided=enable_coverage_guided
1227
- )
1228
-
1229
- # Generate tests
1230
- print(f"Starting autonomous test generation for batch {batch_num}")
1231
- print(f"Modules to process: {len(modules)}")
1232
- print(f"Agent ID: {agent_id}")
1233
- print("\nENHANCEMENTS:")
1234
- print(" Phase 1: Extended thinking + Prompt caching + Workflow detection")
1235
- print(f" Phase 2: Multi-turn refinement = {'ENABLED' if enable_refinement else 'DISABLED'}")
1236
- print(f" Phase 3: Coverage-guided = {'ENABLED' if enable_coverage_guided else 'DISABLED'}")
1237
- print("\nMonitor at: http://localhost:8000\n")
1238
-
1239
- results = generator.generate_all()
1240
-
1241
- # Report results
1242
- print(f"\n{'='*60}")
1243
- print(f"Batch {batch_num} Complete!")
1244
- print(f"{'='*60}")
1245
- print(f"Modules processed: {results['completed']}/{results['total_modules']}")
1246
- print(f"Tests generated: {results['tests_generated']}")
1247
- print(f"Files created: {len(results['files_created'])}")
1248
- print(f"Failed: {results['failed']}")
1249
-
1250
-
1251
- if __name__ == "__main__":
1252
- import sys
1253
-
1254
- if len(sys.argv) < 3:
1255
- print("Usage: python -m empathy_os.workflows.autonomous_test_gen <batch_num> <modules_json> [--no-refinement] [--coverage-guided]")
1256
- print("\nOptions:")
1257
- print(" --no-refinement Disable Phase 2 multi-turn refinement")
1258
- print(" --coverage-guided Enable Phase 3 coverage-guided generation (slower)")
1259
- sys.exit(1)
1260
-
1261
- batch_num = int(sys.argv[1])
1262
- modules_json = sys.argv[2]
1263
-
1264
- # Parse optional flags
1265
- enable_refinement = "--no-refinement" not in sys.argv
1266
- enable_coverage_guided = "--coverage-guided" in sys.argv
1267
-
1268
- run_batch_generation(batch_num, modules_json, enable_refinement, enable_coverage_guided)