empathy-framework 5.3.0__py3-none-any.whl → 5.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. empathy_framework-5.4.0.dist-info/METADATA +47 -0
  2. empathy_framework-5.4.0.dist-info/RECORD +8 -0
  3. {empathy_framework-5.3.0.dist-info → empathy_framework-5.4.0.dist-info}/top_level.txt +0 -1
  4. empathy_healthcare_plugin/__init__.py +12 -11
  5. empathy_llm_toolkit/__init__.py +12 -26
  6. empathy_os/__init__.py +12 -356
  7. empathy_software_plugin/__init__.py +12 -11
  8. empathy_framework-5.3.0.dist-info/METADATA +0 -1026
  9. empathy_framework-5.3.0.dist-info/RECORD +0 -456
  10. empathy_framework-5.3.0.dist-info/entry_points.txt +0 -26
  11. empathy_framework-5.3.0.dist-info/licenses/LICENSE +0 -201
  12. empathy_framework-5.3.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +0 -101
  13. empathy_healthcare_plugin/monitors/__init__.py +0 -9
  14. empathy_healthcare_plugin/monitors/clinical_protocol_monitor.py +0 -315
  15. empathy_healthcare_plugin/monitors/monitoring/__init__.py +0 -44
  16. empathy_healthcare_plugin/monitors/monitoring/protocol_checker.py +0 -300
  17. empathy_healthcare_plugin/monitors/monitoring/protocol_loader.py +0 -214
  18. empathy_healthcare_plugin/monitors/monitoring/sensor_parsers.py +0 -306
  19. empathy_healthcare_plugin/monitors/monitoring/trajectory_analyzer.py +0 -389
  20. empathy_healthcare_plugin/protocols/cardiac.json +0 -93
  21. empathy_healthcare_plugin/protocols/post_operative.json +0 -92
  22. empathy_healthcare_plugin/protocols/respiratory.json +0 -92
  23. empathy_healthcare_plugin/protocols/sepsis.json +0 -141
  24. empathy_llm_toolkit/README.md +0 -553
  25. empathy_llm_toolkit/agent_factory/__init__.py +0 -53
  26. empathy_llm_toolkit/agent_factory/adapters/__init__.py +0 -85
  27. empathy_llm_toolkit/agent_factory/adapters/autogen_adapter.py +0 -312
  28. empathy_llm_toolkit/agent_factory/adapters/crewai_adapter.py +0 -483
  29. empathy_llm_toolkit/agent_factory/adapters/haystack_adapter.py +0 -298
  30. empathy_llm_toolkit/agent_factory/adapters/langchain_adapter.py +0 -362
  31. empathy_llm_toolkit/agent_factory/adapters/langgraph_adapter.py +0 -333
  32. empathy_llm_toolkit/agent_factory/adapters/native.py +0 -228
  33. empathy_llm_toolkit/agent_factory/adapters/wizard_adapter.py +0 -423
  34. empathy_llm_toolkit/agent_factory/base.py +0 -305
  35. empathy_llm_toolkit/agent_factory/crews/__init__.py +0 -67
  36. empathy_llm_toolkit/agent_factory/crews/code_review.py +0 -1113
  37. empathy_llm_toolkit/agent_factory/crews/health_check.py +0 -1262
  38. empathy_llm_toolkit/agent_factory/crews/refactoring.py +0 -1128
  39. empathy_llm_toolkit/agent_factory/crews/security_audit.py +0 -1018
  40. empathy_llm_toolkit/agent_factory/decorators.py +0 -287
  41. empathy_llm_toolkit/agent_factory/factory.py +0 -558
  42. empathy_llm_toolkit/agent_factory/framework.py +0 -193
  43. empathy_llm_toolkit/agent_factory/memory_integration.py +0 -328
  44. empathy_llm_toolkit/agent_factory/resilient.py +0 -320
  45. empathy_llm_toolkit/agents_md/__init__.py +0 -22
  46. empathy_llm_toolkit/agents_md/loader.py +0 -218
  47. empathy_llm_toolkit/agents_md/parser.py +0 -271
  48. empathy_llm_toolkit/agents_md/registry.py +0 -307
  49. empathy_llm_toolkit/claude_memory.py +0 -466
  50. empathy_llm_toolkit/cli/__init__.py +0 -8
  51. empathy_llm_toolkit/cli/sync_claude.py +0 -487
  52. empathy_llm_toolkit/code_health.py +0 -1313
  53. empathy_llm_toolkit/commands/__init__.py +0 -51
  54. empathy_llm_toolkit/commands/context.py +0 -375
  55. empathy_llm_toolkit/commands/loader.py +0 -301
  56. empathy_llm_toolkit/commands/models.py +0 -231
  57. empathy_llm_toolkit/commands/parser.py +0 -371
  58. empathy_llm_toolkit/commands/registry.py +0 -429
  59. empathy_llm_toolkit/config/__init__.py +0 -29
  60. empathy_llm_toolkit/config/unified.py +0 -291
  61. empathy_llm_toolkit/context/__init__.py +0 -22
  62. empathy_llm_toolkit/context/compaction.py +0 -455
  63. empathy_llm_toolkit/context/manager.py +0 -434
  64. empathy_llm_toolkit/contextual_patterns.py +0 -361
  65. empathy_llm_toolkit/core.py +0 -907
  66. empathy_llm_toolkit/git_pattern_extractor.py +0 -435
  67. empathy_llm_toolkit/hooks/__init__.py +0 -24
  68. empathy_llm_toolkit/hooks/config.py +0 -306
  69. empathy_llm_toolkit/hooks/executor.py +0 -289
  70. empathy_llm_toolkit/hooks/registry.py +0 -302
  71. empathy_llm_toolkit/hooks/scripts/__init__.py +0 -39
  72. empathy_llm_toolkit/hooks/scripts/evaluate_session.py +0 -201
  73. empathy_llm_toolkit/hooks/scripts/first_time_init.py +0 -285
  74. empathy_llm_toolkit/hooks/scripts/pre_compact.py +0 -207
  75. empathy_llm_toolkit/hooks/scripts/session_end.py +0 -183
  76. empathy_llm_toolkit/hooks/scripts/session_start.py +0 -163
  77. empathy_llm_toolkit/hooks/scripts/suggest_compact.py +0 -225
  78. empathy_llm_toolkit/learning/__init__.py +0 -30
  79. empathy_llm_toolkit/learning/evaluator.py +0 -438
  80. empathy_llm_toolkit/learning/extractor.py +0 -514
  81. empathy_llm_toolkit/learning/storage.py +0 -560
  82. empathy_llm_toolkit/levels.py +0 -227
  83. empathy_llm_toolkit/pattern_confidence.py +0 -414
  84. empathy_llm_toolkit/pattern_resolver.py +0 -272
  85. empathy_llm_toolkit/pattern_summary.py +0 -350
  86. empathy_llm_toolkit/providers.py +0 -967
  87. empathy_llm_toolkit/routing/__init__.py +0 -32
  88. empathy_llm_toolkit/routing/model_router.py +0 -362
  89. empathy_llm_toolkit/security/IMPLEMENTATION_SUMMARY.md +0 -413
  90. empathy_llm_toolkit/security/PHASE2_COMPLETE.md +0 -384
  91. empathy_llm_toolkit/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +0 -271
  92. empathy_llm_toolkit/security/QUICK_REFERENCE.md +0 -316
  93. empathy_llm_toolkit/security/README.md +0 -262
  94. empathy_llm_toolkit/security/__init__.py +0 -62
  95. empathy_llm_toolkit/security/audit_logger.py +0 -929
  96. empathy_llm_toolkit/security/audit_logger_example.py +0 -152
  97. empathy_llm_toolkit/security/pii_scrubber.py +0 -640
  98. empathy_llm_toolkit/security/secrets_detector.py +0 -678
  99. empathy_llm_toolkit/security/secrets_detector_example.py +0 -304
  100. empathy_llm_toolkit/security/secure_memdocs.py +0 -1192
  101. empathy_llm_toolkit/security/secure_memdocs_example.py +0 -278
  102. empathy_llm_toolkit/session_status.py +0 -745
  103. empathy_llm_toolkit/state.py +0 -246
  104. empathy_llm_toolkit/utils/__init__.py +0 -5
  105. empathy_llm_toolkit/utils/tokens.py +0 -349
  106. empathy_os/adaptive/__init__.py +0 -13
  107. empathy_os/adaptive/task_complexity.py +0 -127
  108. empathy_os/agent_monitoring.py +0 -414
  109. empathy_os/cache/__init__.py +0 -117
  110. empathy_os/cache/base.py +0 -166
  111. empathy_os/cache/dependency_manager.py +0 -256
  112. empathy_os/cache/hash_only.py +0 -251
  113. empathy_os/cache/hybrid.py +0 -457
  114. empathy_os/cache/storage.py +0 -285
  115. empathy_os/cache_monitor.py +0 -356
  116. empathy_os/cache_stats.py +0 -298
  117. empathy_os/cli/__init__.py +0 -152
  118. empathy_os/cli/__main__.py +0 -12
  119. empathy_os/cli/commands/__init__.py +0 -1
  120. empathy_os/cli/commands/batch.py +0 -264
  121. empathy_os/cli/commands/cache.py +0 -248
  122. empathy_os/cli/commands/help.py +0 -331
  123. empathy_os/cli/commands/info.py +0 -140
  124. empathy_os/cli/commands/inspect.py +0 -436
  125. empathy_os/cli/commands/inspection.py +0 -57
  126. empathy_os/cli/commands/memory.py +0 -48
  127. empathy_os/cli/commands/metrics.py +0 -92
  128. empathy_os/cli/commands/orchestrate.py +0 -184
  129. empathy_os/cli/commands/patterns.py +0 -207
  130. empathy_os/cli/commands/profiling.py +0 -202
  131. empathy_os/cli/commands/provider.py +0 -98
  132. empathy_os/cli/commands/routing.py +0 -285
  133. empathy_os/cli/commands/setup.py +0 -96
  134. empathy_os/cli/commands/status.py +0 -235
  135. empathy_os/cli/commands/sync.py +0 -166
  136. empathy_os/cli/commands/tier.py +0 -121
  137. empathy_os/cli/commands/utilities.py +0 -114
  138. empathy_os/cli/commands/workflow.py +0 -579
  139. empathy_os/cli/core.py +0 -32
  140. empathy_os/cli/parsers/__init__.py +0 -68
  141. empathy_os/cli/parsers/batch.py +0 -118
  142. empathy_os/cli/parsers/cache.py +0 -65
  143. empathy_os/cli/parsers/help.py +0 -41
  144. empathy_os/cli/parsers/info.py +0 -26
  145. empathy_os/cli/parsers/inspect.py +0 -66
  146. empathy_os/cli/parsers/metrics.py +0 -42
  147. empathy_os/cli/parsers/orchestrate.py +0 -61
  148. empathy_os/cli/parsers/patterns.py +0 -54
  149. empathy_os/cli/parsers/provider.py +0 -40
  150. empathy_os/cli/parsers/routing.py +0 -110
  151. empathy_os/cli/parsers/setup.py +0 -42
  152. empathy_os/cli/parsers/status.py +0 -47
  153. empathy_os/cli/parsers/sync.py +0 -31
  154. empathy_os/cli/parsers/tier.py +0 -33
  155. empathy_os/cli/parsers/workflow.py +0 -77
  156. empathy_os/cli/utils/__init__.py +0 -1
  157. empathy_os/cli/utils/data.py +0 -242
  158. empathy_os/cli/utils/helpers.py +0 -68
  159. empathy_os/cli_legacy.py +0 -3957
  160. empathy_os/cli_minimal.py +0 -1159
  161. empathy_os/cli_router.py +0 -437
  162. empathy_os/cli_unified.py +0 -814
  163. empathy_os/config/__init__.py +0 -66
  164. empathy_os/config/xml_config.py +0 -286
  165. empathy_os/config.py +0 -545
  166. empathy_os/coordination.py +0 -870
  167. empathy_os/core.py +0 -1511
  168. empathy_os/core_modules/__init__.py +0 -15
  169. empathy_os/cost_tracker.py +0 -626
  170. empathy_os/dashboard/__init__.py +0 -41
  171. empathy_os/dashboard/app.py +0 -512
  172. empathy_os/dashboard/simple_server.py +0 -435
  173. empathy_os/dashboard/standalone_server.py +0 -547
  174. empathy_os/discovery.py +0 -306
  175. empathy_os/emergence.py +0 -306
  176. empathy_os/exceptions.py +0 -123
  177. empathy_os/feedback_loops.py +0 -373
  178. empathy_os/hot_reload/README.md +0 -473
  179. empathy_os/hot_reload/__init__.py +0 -62
  180. empathy_os/hot_reload/config.py +0 -83
  181. empathy_os/hot_reload/integration.py +0 -229
  182. empathy_os/hot_reload/reloader.py +0 -298
  183. empathy_os/hot_reload/watcher.py +0 -183
  184. empathy_os/hot_reload/websocket.py +0 -177
  185. empathy_os/levels.py +0 -577
  186. empathy_os/leverage_points.py +0 -441
  187. empathy_os/logging_config.py +0 -261
  188. empathy_os/mcp/__init__.py +0 -10
  189. empathy_os/mcp/server.py +0 -506
  190. empathy_os/memory/__init__.py +0 -237
  191. empathy_os/memory/claude_memory.py +0 -469
  192. empathy_os/memory/config.py +0 -224
  193. empathy_os/memory/control_panel.py +0 -1290
  194. empathy_os/memory/control_panel_support.py +0 -145
  195. empathy_os/memory/cross_session.py +0 -845
  196. empathy_os/memory/edges.py +0 -179
  197. empathy_os/memory/encryption.py +0 -159
  198. empathy_os/memory/file_session.py +0 -770
  199. empathy_os/memory/graph.py +0 -570
  200. empathy_os/memory/long_term.py +0 -913
  201. empathy_os/memory/long_term_types.py +0 -99
  202. empathy_os/memory/mixins/__init__.py +0 -25
  203. empathy_os/memory/mixins/backend_init_mixin.py +0 -249
  204. empathy_os/memory/mixins/capabilities_mixin.py +0 -208
  205. empathy_os/memory/mixins/handoff_mixin.py +0 -208
  206. empathy_os/memory/mixins/lifecycle_mixin.py +0 -49
  207. empathy_os/memory/mixins/long_term_mixin.py +0 -352
  208. empathy_os/memory/mixins/promotion_mixin.py +0 -109
  209. empathy_os/memory/mixins/short_term_mixin.py +0 -182
  210. empathy_os/memory/nodes.py +0 -179
  211. empathy_os/memory/redis_bootstrap.py +0 -540
  212. empathy_os/memory/security/__init__.py +0 -31
  213. empathy_os/memory/security/audit_logger.py +0 -932
  214. empathy_os/memory/security/pii_scrubber.py +0 -640
  215. empathy_os/memory/security/secrets_detector.py +0 -678
  216. empathy_os/memory/short_term.py +0 -2192
  217. empathy_os/memory/simple_storage.py +0 -302
  218. empathy_os/memory/storage/__init__.py +0 -15
  219. empathy_os/memory/storage_backend.py +0 -167
  220. empathy_os/memory/summary_index.py +0 -583
  221. empathy_os/memory/types.py +0 -446
  222. empathy_os/memory/unified.py +0 -182
  223. empathy_os/meta_workflows/__init__.py +0 -74
  224. empathy_os/meta_workflows/agent_creator.py +0 -248
  225. empathy_os/meta_workflows/builtin_templates.py +0 -567
  226. empathy_os/meta_workflows/cli_commands/__init__.py +0 -56
  227. empathy_os/meta_workflows/cli_commands/agent_commands.py +0 -321
  228. empathy_os/meta_workflows/cli_commands/analytics_commands.py +0 -442
  229. empathy_os/meta_workflows/cli_commands/config_commands.py +0 -232
  230. empathy_os/meta_workflows/cli_commands/memory_commands.py +0 -182
  231. empathy_os/meta_workflows/cli_commands/template_commands.py +0 -354
  232. empathy_os/meta_workflows/cli_commands/workflow_commands.py +0 -382
  233. empathy_os/meta_workflows/cli_meta_workflows.py +0 -59
  234. empathy_os/meta_workflows/form_engine.py +0 -292
  235. empathy_os/meta_workflows/intent_detector.py +0 -409
  236. empathy_os/meta_workflows/models.py +0 -569
  237. empathy_os/meta_workflows/pattern_learner.py +0 -738
  238. empathy_os/meta_workflows/plan_generator.py +0 -384
  239. empathy_os/meta_workflows/session_context.py +0 -397
  240. empathy_os/meta_workflows/template_registry.py +0 -229
  241. empathy_os/meta_workflows/workflow.py +0 -984
  242. empathy_os/metrics/__init__.py +0 -12
  243. empathy_os/metrics/collector.py +0 -31
  244. empathy_os/metrics/prompt_metrics.py +0 -194
  245. empathy_os/models/__init__.py +0 -172
  246. empathy_os/models/__main__.py +0 -13
  247. empathy_os/models/adaptive_routing.py +0 -437
  248. empathy_os/models/auth_cli.py +0 -444
  249. empathy_os/models/auth_strategy.py +0 -450
  250. empathy_os/models/cli.py +0 -655
  251. empathy_os/models/empathy_executor.py +0 -354
  252. empathy_os/models/executor.py +0 -257
  253. empathy_os/models/fallback.py +0 -762
  254. empathy_os/models/provider_config.py +0 -282
  255. empathy_os/models/registry.py +0 -472
  256. empathy_os/models/tasks.py +0 -359
  257. empathy_os/models/telemetry/__init__.py +0 -71
  258. empathy_os/models/telemetry/analytics.py +0 -594
  259. empathy_os/models/telemetry/backend.py +0 -196
  260. empathy_os/models/telemetry/data_models.py +0 -431
  261. empathy_os/models/telemetry/storage.py +0 -489
  262. empathy_os/models/token_estimator.py +0 -420
  263. empathy_os/models/validation.py +0 -280
  264. empathy_os/monitoring/__init__.py +0 -52
  265. empathy_os/monitoring/alerts.py +0 -946
  266. empathy_os/monitoring/alerts_cli.py +0 -448
  267. empathy_os/monitoring/multi_backend.py +0 -271
  268. empathy_os/monitoring/otel_backend.py +0 -362
  269. empathy_os/optimization/__init__.py +0 -19
  270. empathy_os/optimization/context_optimizer.py +0 -272
  271. empathy_os/orchestration/__init__.py +0 -67
  272. empathy_os/orchestration/agent_templates.py +0 -707
  273. empathy_os/orchestration/config_store.py +0 -499
  274. empathy_os/orchestration/execution_strategies.py +0 -2111
  275. empathy_os/orchestration/meta_orchestrator.py +0 -1168
  276. empathy_os/orchestration/pattern_learner.py +0 -696
  277. empathy_os/orchestration/real_tools.py +0 -931
  278. empathy_os/pattern_cache.py +0 -187
  279. empathy_os/pattern_library.py +0 -542
  280. empathy_os/patterns/debugging/all_patterns.json +0 -81
  281. empathy_os/patterns/debugging/workflow_20260107_1770825e.json +0 -77
  282. empathy_os/patterns/refactoring_memory.json +0 -89
  283. empathy_os/persistence.py +0 -564
  284. empathy_os/platform_utils.py +0 -265
  285. empathy_os/plugins/__init__.py +0 -28
  286. empathy_os/plugins/base.py +0 -361
  287. empathy_os/plugins/registry.py +0 -268
  288. empathy_os/project_index/__init__.py +0 -32
  289. empathy_os/project_index/cli.py +0 -335
  290. empathy_os/project_index/index.py +0 -667
  291. empathy_os/project_index/models.py +0 -504
  292. empathy_os/project_index/reports.py +0 -474
  293. empathy_os/project_index/scanner.py +0 -777
  294. empathy_os/project_index/scanner_parallel.py +0 -291
  295. empathy_os/prompts/__init__.py +0 -61
  296. empathy_os/prompts/config.py +0 -77
  297. empathy_os/prompts/context.py +0 -177
  298. empathy_os/prompts/parser.py +0 -285
  299. empathy_os/prompts/registry.py +0 -313
  300. empathy_os/prompts/templates.py +0 -208
  301. empathy_os/redis_config.py +0 -302
  302. empathy_os/redis_memory.py +0 -799
  303. empathy_os/resilience/__init__.py +0 -56
  304. empathy_os/resilience/circuit_breaker.py +0 -256
  305. empathy_os/resilience/fallback.py +0 -179
  306. empathy_os/resilience/health.py +0 -300
  307. empathy_os/resilience/retry.py +0 -209
  308. empathy_os/resilience/timeout.py +0 -135
  309. empathy_os/routing/__init__.py +0 -43
  310. empathy_os/routing/chain_executor.py +0 -433
  311. empathy_os/routing/classifier.py +0 -217
  312. empathy_os/routing/smart_router.py +0 -234
  313. empathy_os/routing/workflow_registry.py +0 -343
  314. empathy_os/scaffolding/README.md +0 -589
  315. empathy_os/scaffolding/__init__.py +0 -35
  316. empathy_os/scaffolding/__main__.py +0 -14
  317. empathy_os/scaffolding/cli.py +0 -240
  318. empathy_os/socratic/__init__.py +0 -256
  319. empathy_os/socratic/ab_testing.py +0 -958
  320. empathy_os/socratic/blueprint.py +0 -533
  321. empathy_os/socratic/cli.py +0 -703
  322. empathy_os/socratic/collaboration.py +0 -1114
  323. empathy_os/socratic/domain_templates.py +0 -924
  324. empathy_os/socratic/embeddings.py +0 -738
  325. empathy_os/socratic/engine.py +0 -794
  326. empathy_os/socratic/explainer.py +0 -682
  327. empathy_os/socratic/feedback.py +0 -772
  328. empathy_os/socratic/forms.py +0 -629
  329. empathy_os/socratic/generator.py +0 -732
  330. empathy_os/socratic/llm_analyzer.py +0 -637
  331. empathy_os/socratic/mcp_server.py +0 -702
  332. empathy_os/socratic/session.py +0 -312
  333. empathy_os/socratic/storage.py +0 -667
  334. empathy_os/socratic/success.py +0 -730
  335. empathy_os/socratic/visual_editor.py +0 -860
  336. empathy_os/socratic/web_ui.py +0 -958
  337. empathy_os/telemetry/__init__.py +0 -39
  338. empathy_os/telemetry/agent_coordination.py +0 -475
  339. empathy_os/telemetry/agent_tracking.py +0 -367
  340. empathy_os/telemetry/approval_gates.py +0 -545
  341. empathy_os/telemetry/cli.py +0 -1231
  342. empathy_os/telemetry/commands/__init__.py +0 -14
  343. empathy_os/telemetry/commands/dashboard_commands.py +0 -696
  344. empathy_os/telemetry/event_streaming.py +0 -409
  345. empathy_os/telemetry/feedback_loop.py +0 -567
  346. empathy_os/telemetry/usage_tracker.py +0 -591
  347. empathy_os/templates.py +0 -754
  348. empathy_os/test_generator/__init__.py +0 -38
  349. empathy_os/test_generator/__main__.py +0 -14
  350. empathy_os/test_generator/cli.py +0 -234
  351. empathy_os/test_generator/generator.py +0 -355
  352. empathy_os/test_generator/risk_analyzer.py +0 -216
  353. empathy_os/tier_recommender.py +0 -384
  354. empathy_os/tools.py +0 -183
  355. empathy_os/trust/__init__.py +0 -28
  356. empathy_os/trust/circuit_breaker.py +0 -579
  357. empathy_os/trust_building.py +0 -527
  358. empathy_os/validation/__init__.py +0 -19
  359. empathy_os/validation/xml_validator.py +0 -281
  360. empathy_os/vscode_bridge.py +0 -173
  361. empathy_os/workflow_commands.py +0 -780
  362. empathy_os/workflow_patterns/__init__.py +0 -33
  363. empathy_os/workflow_patterns/behavior.py +0 -249
  364. empathy_os/workflow_patterns/core.py +0 -76
  365. empathy_os/workflow_patterns/output.py +0 -99
  366. empathy_os/workflow_patterns/registry.py +0 -255
  367. empathy_os/workflow_patterns/structural.py +0 -288
  368. empathy_os/workflows/__init__.py +0 -539
  369. empathy_os/workflows/autonomous_test_gen.py +0 -1268
  370. empathy_os/workflows/base.py +0 -2667
  371. empathy_os/workflows/batch_processing.py +0 -342
  372. empathy_os/workflows/bug_predict.py +0 -1084
  373. empathy_os/workflows/builder.py +0 -273
  374. empathy_os/workflows/caching.py +0 -253
  375. empathy_os/workflows/code_review.py +0 -1048
  376. empathy_os/workflows/code_review_adapters.py +0 -312
  377. empathy_os/workflows/code_review_pipeline.py +0 -722
  378. empathy_os/workflows/config.py +0 -645
  379. empathy_os/workflows/dependency_check.py +0 -644
  380. empathy_os/workflows/document_gen/__init__.py +0 -25
  381. empathy_os/workflows/document_gen/config.py +0 -30
  382. empathy_os/workflows/document_gen/report_formatter.py +0 -162
  383. empathy_os/workflows/document_gen/workflow.py +0 -1426
  384. empathy_os/workflows/document_manager.py +0 -216
  385. empathy_os/workflows/document_manager_README.md +0 -134
  386. empathy_os/workflows/documentation_orchestrator.py +0 -1205
  387. empathy_os/workflows/history.py +0 -510
  388. empathy_os/workflows/keyboard_shortcuts/__init__.py +0 -39
  389. empathy_os/workflows/keyboard_shortcuts/generators.py +0 -391
  390. empathy_os/workflows/keyboard_shortcuts/parsers.py +0 -416
  391. empathy_os/workflows/keyboard_shortcuts/prompts.py +0 -295
  392. empathy_os/workflows/keyboard_shortcuts/schema.py +0 -193
  393. empathy_os/workflows/keyboard_shortcuts/workflow.py +0 -509
  394. empathy_os/workflows/llm_base.py +0 -363
  395. empathy_os/workflows/manage_docs.py +0 -87
  396. empathy_os/workflows/manage_docs_README.md +0 -134
  397. empathy_os/workflows/manage_documentation.py +0 -821
  398. empathy_os/workflows/new_sample_workflow1.py +0 -149
  399. empathy_os/workflows/new_sample_workflow1_README.md +0 -150
  400. empathy_os/workflows/orchestrated_health_check.py +0 -849
  401. empathy_os/workflows/orchestrated_release_prep.py +0 -600
  402. empathy_os/workflows/output.py +0 -413
  403. empathy_os/workflows/perf_audit.py +0 -863
  404. empathy_os/workflows/pr_review.py +0 -762
  405. empathy_os/workflows/progress.py +0 -785
  406. empathy_os/workflows/progress_server.py +0 -322
  407. empathy_os/workflows/progressive/README 2.md +0 -454
  408. empathy_os/workflows/progressive/README.md +0 -454
  409. empathy_os/workflows/progressive/__init__.py +0 -82
  410. empathy_os/workflows/progressive/cli.py +0 -219
  411. empathy_os/workflows/progressive/core.py +0 -488
  412. empathy_os/workflows/progressive/orchestrator.py +0 -723
  413. empathy_os/workflows/progressive/reports.py +0 -520
  414. empathy_os/workflows/progressive/telemetry.py +0 -274
  415. empathy_os/workflows/progressive/test_gen.py +0 -495
  416. empathy_os/workflows/progressive/workflow.py +0 -589
  417. empathy_os/workflows/refactor_plan.py +0 -694
  418. empathy_os/workflows/release_prep.py +0 -895
  419. empathy_os/workflows/release_prep_crew.py +0 -969
  420. empathy_os/workflows/research_synthesis.py +0 -404
  421. empathy_os/workflows/routing.py +0 -168
  422. empathy_os/workflows/secure_release.py +0 -593
  423. empathy_os/workflows/security_adapters.py +0 -297
  424. empathy_os/workflows/security_audit.py +0 -1329
  425. empathy_os/workflows/security_audit_phase3.py +0 -355
  426. empathy_os/workflows/seo_optimization.py +0 -633
  427. empathy_os/workflows/step_config.py +0 -234
  428. empathy_os/workflows/telemetry_mixin.py +0 -269
  429. empathy_os/workflows/test5.py +0 -125
  430. empathy_os/workflows/test5_README.md +0 -158
  431. empathy_os/workflows/test_coverage_boost_crew.py +0 -849
  432. empathy_os/workflows/test_gen/__init__.py +0 -52
  433. empathy_os/workflows/test_gen/ast_analyzer.py +0 -249
  434. empathy_os/workflows/test_gen/config.py +0 -88
  435. empathy_os/workflows/test_gen/data_models.py +0 -38
  436. empathy_os/workflows/test_gen/report_formatter.py +0 -289
  437. empathy_os/workflows/test_gen/test_templates.py +0 -381
  438. empathy_os/workflows/test_gen/workflow.py +0 -655
  439. empathy_os/workflows/test_gen.py +0 -54
  440. empathy_os/workflows/test_gen_behavioral.py +0 -477
  441. empathy_os/workflows/test_gen_parallel.py +0 -341
  442. empathy_os/workflows/test_lifecycle.py +0 -526
  443. empathy_os/workflows/test_maintenance.py +0 -627
  444. empathy_os/workflows/test_maintenance_cli.py +0 -590
  445. empathy_os/workflows/test_maintenance_crew.py +0 -840
  446. empathy_os/workflows/test_runner.py +0 -622
  447. empathy_os/workflows/tier_tracking.py +0 -531
  448. empathy_os/workflows/xml_enhanced_crew.py +0 -285
  449. empathy_software_plugin/SOFTWARE_PLUGIN_README.md +0 -57
  450. empathy_software_plugin/cli/__init__.py +0 -120
  451. empathy_software_plugin/cli/inspect.py +0 -362
  452. empathy_software_plugin/cli.py +0 -574
  453. empathy_software_plugin/plugin.py +0 -188
  454. workflow_scaffolding/__init__.py +0 -11
  455. workflow_scaffolding/__main__.py +0 -12
  456. workflow_scaffolding/cli.py +0 -206
  457. workflow_scaffolding/generator.py +0 -265
  458. {empathy_framework-5.3.0.dist-info → empathy_framework-5.4.0.dist-info}/WHEEL +0 -0
@@ -1,958 +0,0 @@
1
- """A/B Testing for Workflow Optimization
2
-
3
- Enables controlled experiments to compare different workflow configurations
4
- and determine which performs better for specific goals or domains.
5
-
6
- Key Features:
7
- - Experiment definition with control and variant groups
8
- - Statistical significance testing
9
- - Automatic traffic allocation
10
- - Multi-armed bandit for adaptive optimization
11
- - Integration with feedback loop
12
-
13
- Copyright 2026 Smart-AI-Memory
14
- Licensed under Fair Source License 0.9
15
- """
16
-
17
- from __future__ import annotations
18
-
19
- import hashlib
20
- import json
21
- import logging
22
- import math
23
- import random # Security Note: For A/B test simulation data, not cryptographic use
24
- import time
25
- from dataclasses import dataclass, field
26
- from datetime import datetime
27
- from enum import Enum
28
- from pathlib import Path
29
- from typing import Any
30
-
31
- logger = logging.getLogger(__name__)
32
-
33
-
34
- # =============================================================================
35
- # DATA STRUCTURES
36
- # =============================================================================
37
-
38
-
39
- class ExperimentStatus(Enum):
40
- """Status of an A/B experiment."""
41
-
42
- DRAFT = "draft"
43
- RUNNING = "running"
44
- PAUSED = "paused"
45
- COMPLETED = "completed"
46
- STOPPED = "stopped"
47
-
48
-
49
- class AllocationStrategy(Enum):
50
- """Strategy for allocating traffic to variants."""
51
-
52
- FIXED = "fixed" # Fixed percentage split
53
- EPSILON_GREEDY = "epsilon_greedy" # Explore vs exploit
54
- THOMPSON_SAMPLING = "thompson_sampling" # Bayesian bandits
55
- UCB = "ucb" # Upper confidence bound
56
-
57
-
58
- @dataclass
59
- class Variant:
60
- """A variant in an A/B experiment."""
61
-
62
- variant_id: str
63
- name: str
64
- description: str
65
- config: dict[str, Any]
66
- is_control: bool = False
67
- traffic_percentage: float = 50.0
68
-
69
- # Statistics
70
- impressions: int = 0
71
- conversions: int = 0
72
- total_success_score: float = 0.0
73
-
74
- @property
75
- def conversion_rate(self) -> float:
76
- """Calculate conversion rate."""
77
- if self.impressions == 0:
78
- return 0.0
79
- return self.conversions / self.impressions
80
-
81
- @property
82
- def avg_success_score(self) -> float:
83
- """Calculate average success score."""
84
- if self.impressions == 0:
85
- return 0.0
86
- return self.total_success_score / self.impressions
87
-
88
- def to_dict(self) -> dict[str, Any]:
89
- """Convert to dictionary."""
90
- return {
91
- "variant_id": self.variant_id,
92
- "name": self.name,
93
- "description": self.description,
94
- "config": self.config,
95
- "is_control": self.is_control,
96
- "traffic_percentage": self.traffic_percentage,
97
- "impressions": self.impressions,
98
- "conversions": self.conversions,
99
- "total_success_score": self.total_success_score,
100
- }
101
-
102
- @classmethod
103
- def from_dict(cls, data: dict[str, Any]) -> Variant:
104
- """Create from dictionary."""
105
- return cls(
106
- variant_id=data["variant_id"],
107
- name=data["name"],
108
- description=data["description"],
109
- config=data["config"],
110
- is_control=data.get("is_control", False),
111
- traffic_percentage=data.get("traffic_percentage", 50.0),
112
- impressions=data.get("impressions", 0),
113
- conversions=data.get("conversions", 0),
114
- total_success_score=data.get("total_success_score", 0.0),
115
- )
116
-
117
-
118
- @dataclass
119
- class Experiment:
120
- """An A/B experiment definition."""
121
-
122
- experiment_id: str
123
- name: str
124
- description: str
125
- hypothesis: str
126
- variants: list[Variant]
127
- domain_filter: str | None = None
128
- goal_filter: str | None = None
129
- allocation_strategy: AllocationStrategy = AllocationStrategy.FIXED
130
- min_sample_size: int = 100
131
- max_duration_days: int = 30
132
- confidence_level: float = 0.95
133
- status: ExperimentStatus = ExperimentStatus.DRAFT
134
- created_at: datetime = field(default_factory=datetime.now)
135
- started_at: datetime | None = None
136
- ended_at: datetime | None = None
137
-
138
- def to_dict(self) -> dict[str, Any]:
139
- """Convert to dictionary."""
140
- return {
141
- "experiment_id": self.experiment_id,
142
- "name": self.name,
143
- "description": self.description,
144
- "hypothesis": self.hypothesis,
145
- "variants": [v.to_dict() for v in self.variants],
146
- "domain_filter": self.domain_filter,
147
- "goal_filter": self.goal_filter,
148
- "allocation_strategy": self.allocation_strategy.value,
149
- "min_sample_size": self.min_sample_size,
150
- "max_duration_days": self.max_duration_days,
151
- "confidence_level": self.confidence_level,
152
- "status": self.status.value,
153
- "created_at": self.created_at.isoformat(),
154
- "started_at": self.started_at.isoformat() if self.started_at else None,
155
- "ended_at": self.ended_at.isoformat() if self.ended_at else None,
156
- }
157
-
158
- @classmethod
159
- def from_dict(cls, data: dict[str, Any]) -> Experiment:
160
- """Create from dictionary."""
161
- return cls(
162
- experiment_id=data["experiment_id"],
163
- name=data["name"],
164
- description=data["description"],
165
- hypothesis=data["hypothesis"],
166
- variants=[Variant.from_dict(v) for v in data["variants"]],
167
- domain_filter=data.get("domain_filter"),
168
- goal_filter=data.get("goal_filter"),
169
- allocation_strategy=AllocationStrategy(data.get("allocation_strategy", "fixed")),
170
- min_sample_size=data.get("min_sample_size", 100),
171
- max_duration_days=data.get("max_duration_days", 30),
172
- confidence_level=data.get("confidence_level", 0.95),
173
- status=ExperimentStatus(data.get("status", "draft")),
174
- created_at=datetime.fromisoformat(data["created_at"]),
175
- started_at=(
176
- datetime.fromisoformat(data["started_at"]) if data.get("started_at") else None
177
- ),
178
- ended_at=(datetime.fromisoformat(data["ended_at"]) if data.get("ended_at") else None),
179
- )
180
-
181
- @property
182
- def total_impressions(self) -> int:
183
- """Total impressions across all variants."""
184
- return sum(v.impressions for v in self.variants)
185
-
186
- @property
187
- def control(self) -> Variant | None:
188
- """Get control variant."""
189
- for v in self.variants:
190
- if v.is_control:
191
- return v
192
- return None
193
-
194
- @property
195
- def treatments(self) -> list[Variant]:
196
- """Get treatment variants (non-control)."""
197
- return [v for v in self.variants if not v.is_control]
198
-
199
-
200
- @dataclass
201
- class ExperimentResult:
202
- """Results and analysis of an experiment."""
203
-
204
- experiment: Experiment
205
- winner: Variant | None
206
- is_significant: bool
207
- p_value: float
208
- confidence_interval: tuple[float, float]
209
- lift: float # Percentage improvement over control
210
- recommendation: str
211
-
212
-
213
- # =============================================================================
214
- # STATISTICAL ANALYSIS
215
- # =============================================================================
216
-
217
-
218
- class StatisticalAnalyzer:
219
- """Statistical analysis for A/B tests."""
220
-
221
- @staticmethod
222
- def z_test_proportions(
223
- n1: int,
224
- c1: int,
225
- n2: int,
226
- c2: int,
227
- ) -> tuple[float, float]:
228
- """Two-proportion z-test.
229
-
230
- Args:
231
- n1: Sample size for group 1
232
- c1: Conversions for group 1
233
- n2: Sample size for group 2
234
- c2: Conversions for group 2
235
-
236
- Returns:
237
- (z_score, p_value)
238
- """
239
- if n1 == 0 or n2 == 0:
240
- return 0.0, 1.0
241
-
242
- p1 = c1 / n1
243
- p2 = c2 / n2
244
- p_pooled = (c1 + c2) / (n1 + n2)
245
-
246
- if p_pooled == 0 or p_pooled == 1:
247
- return 0.0, 1.0
248
-
249
- se = math.sqrt(p_pooled * (1 - p_pooled) * (1 / n1 + 1 / n2))
250
- if se == 0:
251
- return 0.0, 1.0
252
-
253
- z = (p1 - p2) / se
254
-
255
- # Approximate p-value using normal CDF
256
- p_value = 2 * (1 - StatisticalAnalyzer._normal_cdf(abs(z)))
257
-
258
- return z, p_value
259
-
260
- @staticmethod
261
- def t_test_means(
262
- n1: int,
263
- mean1: float,
264
- var1: float,
265
- n2: int,
266
- mean2: float,
267
- var2: float,
268
- ) -> tuple[float, float]:
269
- """Welch's t-test for means.
270
-
271
- Args:
272
- n1, mean1, var1: Stats for group 1
273
- n2, mean2, var2: Stats for group 2
274
-
275
- Returns:
276
- (t_score, p_value)
277
- """
278
- if n1 < 2 or n2 < 2:
279
- return 0.0, 1.0
280
-
281
- se = math.sqrt(var1 / n1 + var2 / n2)
282
- if se == 0:
283
- return 0.0, 1.0
284
-
285
- t = (mean1 - mean2) / se
286
-
287
- # Welch-Satterthwaite degrees of freedom
288
- num = (var1 / n1 + var2 / n2) ** 2
289
- denom = (var1 / n1) ** 2 / (n1 - 1) + (var2 / n2) ** 2 / (n2 - 1)
290
- df = num / denom if denom > 0 else 1
291
-
292
- # Approximate p-value using t-distribution
293
- p_value = 2 * StatisticalAnalyzer._t_cdf(-abs(t), df)
294
-
295
- return t, p_value
296
-
297
- @staticmethod
298
- def confidence_interval(
299
- n: int,
300
- successes: int,
301
- confidence: float = 0.95,
302
- ) -> tuple[float, float]:
303
- """Wilson score interval for proportions.
304
-
305
- Args:
306
- n: Sample size
307
- successes: Number of successes
308
- confidence: Confidence level
309
-
310
- Returns:
311
- (lower, upper) bounds
312
- """
313
- if n == 0:
314
- return 0.0, 1.0
315
-
316
- z = StatisticalAnalyzer._z_score(confidence)
317
- p = successes / n
318
-
319
- denominator = 1 + z * z / n
320
- centre = p + z * z / (2 * n)
321
- adjustment = z * math.sqrt((p * (1 - p) + z * z / (4 * n)) / n)
322
-
323
- lower = max(0, (centre - adjustment) / denominator)
324
- upper = min(1, (centre + adjustment) / denominator)
325
-
326
- return lower, upper
327
-
328
- @staticmethod
329
- def _normal_cdf(x: float) -> float:
330
- """Approximate standard normal CDF."""
331
- return 0.5 * (1 + math.erf(x / math.sqrt(2)))
332
-
333
- @staticmethod
334
- def _t_cdf(t: float, df: float) -> float:
335
- """Approximate t-distribution CDF."""
336
- # Use normal approximation for large df
337
- if df > 30:
338
- return StatisticalAnalyzer._normal_cdf(t)
339
-
340
- # Beta function approximation
341
- x = df / (df + t * t)
342
- return 0.5 * StatisticalAnalyzer._incomplete_beta(df / 2, 0.5, x)
343
-
344
- @staticmethod
345
- def _incomplete_beta(a: float, b: float, x: float) -> float:
346
- """Approximate incomplete beta function."""
347
- if x == 0:
348
- return 0
349
- if x == 1:
350
- return 1
351
-
352
- # Continued fraction approximation (simplified)
353
- result = 0.0
354
- for k in range(100):
355
- term = (x**k) * math.gamma(a + k) / (math.gamma(k + 1) * math.gamma(a))
356
- result += term * ((1 - x) ** b) / (a + k)
357
- if abs(term) < 1e-10:
358
- break
359
-
360
- return result * math.gamma(a + b) / (math.gamma(a) * math.gamma(b))
361
-
362
- @staticmethod
363
- def _z_score(confidence: float) -> float:
364
- """Get z-score for confidence level."""
365
- # Common values
366
- z_scores = {
367
- 0.90: 1.645,
368
- 0.95: 1.96,
369
- 0.99: 2.576,
370
- }
371
- return z_scores.get(confidence, 1.96)
372
-
373
-
374
- # =============================================================================
375
- # TRAFFIC ALLOCATOR
376
- # =============================================================================
377
-
378
-
379
- class TrafficAllocator:
380
- """Allocates traffic to experiment variants."""
381
-
382
- def __init__(self, experiment: Experiment):
383
- """Initialize allocator.
384
-
385
- Args:
386
- experiment: The experiment to allocate for
387
- """
388
- self.experiment = experiment
389
- self._random = random.Random()
390
-
391
- def allocate(self, user_id: str) -> Variant:
392
- """Allocate a user to a variant.
393
-
394
- Args:
395
- user_id: Unique user/session identifier
396
-
397
- Returns:
398
- Allocated variant
399
- """
400
- strategy = self.experiment.allocation_strategy
401
-
402
- if strategy == AllocationStrategy.FIXED:
403
- return self._fixed_allocation(user_id)
404
- elif strategy == AllocationStrategy.EPSILON_GREEDY:
405
- return self._epsilon_greedy(epsilon=0.1)
406
- elif strategy == AllocationStrategy.THOMPSON_SAMPLING:
407
- return self._thompson_sampling()
408
- elif strategy == AllocationStrategy.UCB:
409
- return self._ucb_allocation()
410
- else:
411
- return self._fixed_allocation(user_id)
412
-
413
- def _fixed_allocation(self, user_id: str) -> Variant:
414
- """Deterministic allocation based on user ID hash."""
415
- # Hash user ID for consistent assignment (not for security)
416
- hash_val = int(
417
- hashlib.md5(
418
- f"{self.experiment.experiment_id}:{user_id}".encode(), usedforsecurity=False
419
- ).hexdigest(),
420
- 16,
421
- )
422
- bucket = hash_val % 100
423
-
424
- cumulative = 0.0
425
- for variant in self.experiment.variants:
426
- cumulative += variant.traffic_percentage
427
- if bucket < cumulative:
428
- return variant
429
-
430
- return self.experiment.variants[-1]
431
-
432
- def _epsilon_greedy(self, epsilon: float = 0.1) -> Variant:
433
- """Epsilon-greedy: explore with probability epsilon."""
434
- if self._random.random() < epsilon:
435
- # Explore: random variant
436
- return self._random.choice(self.experiment.variants)
437
- else:
438
- # Exploit: best performing variant
439
- return max(
440
- self.experiment.variants,
441
- key=lambda v: v.avg_success_score,
442
- )
443
-
444
- def _thompson_sampling(self) -> Variant:
445
- """Thompson sampling: Bayesian multi-armed bandit."""
446
- samples = []
447
-
448
- for variant in self.experiment.variants:
449
- # Beta distribution parameters
450
- alpha = variant.conversions + 1
451
- beta = (variant.impressions - variant.conversions) + 1
452
-
453
- # Sample from beta distribution
454
- sample = self._random.betavariate(alpha, beta)
455
- samples.append((sample, variant))
456
-
457
- # Select variant with highest sample
458
- return max(samples, key=lambda x: x[0])[1]
459
-
460
- def _ucb_allocation(self) -> Variant:
461
- """Upper Confidence Bound selection."""
462
- total_impressions = self.experiment.total_impressions or 1
463
-
464
- ucb_scores = []
465
- for variant in self.experiment.variants:
466
- if variant.impressions == 0:
467
- # Give unvisited variants high priority
468
- ucb_scores.append((float("inf"), variant))
469
- else:
470
- mean = variant.avg_success_score
471
- exploration = math.sqrt(2 * math.log(total_impressions) / variant.impressions)
472
- ucb = mean + exploration
473
- ucb_scores.append((ucb, variant))
474
-
475
- return max(ucb_scores, key=lambda x: x[0])[1]
476
-
477
-
478
- # =============================================================================
479
- # EXPERIMENT MANAGER
480
- # =============================================================================
481
-
482
-
483
- class ExperimentManager:
484
- """Manages A/B experiments lifecycle."""
485
-
486
- def __init__(self, storage_path: Path | str | None = None):
487
- """Initialize experiment manager.
488
-
489
- Args:
490
- storage_path: Path to persist experiments
491
- """
492
- if storage_path is None:
493
- storage_path = Path.home() / ".empathy" / "socratic" / "experiments.json"
494
- self.storage_path = Path(storage_path)
495
- self._experiments: dict[str, Experiment] = {}
496
- self._allocators: dict[str, TrafficAllocator] = {}
497
-
498
- # Load existing experiments
499
- self._load()
500
-
501
- def create_experiment(
502
- self,
503
- name: str,
504
- description: str,
505
- hypothesis: str,
506
- control_config: dict[str, Any],
507
- treatment_configs: list[dict[str, Any]],
508
- domain_filter: str | None = None,
509
- allocation_strategy: AllocationStrategy = AllocationStrategy.FIXED,
510
- min_sample_size: int = 100,
511
- ) -> Experiment:
512
- """Create a new experiment.
513
-
514
- Args:
515
- name: Experiment name
516
- description: Description
517
- hypothesis: What we're testing
518
- control_config: Configuration for control group
519
- treatment_configs: Configurations for treatment groups
520
- domain_filter: Optional domain to filter
521
- allocation_strategy: How to allocate traffic
522
- min_sample_size: Minimum samples before analysis
523
-
524
- Returns:
525
- Created experiment
526
- """
527
- experiment_id = hashlib.sha256(f"{name}:{time.time()}".encode()).hexdigest()[:12]
528
-
529
- # Create variants
530
- num_variants = 1 + len(treatment_configs)
531
- traffic_each = 100.0 / num_variants
532
-
533
- variants = [
534
- Variant(
535
- variant_id=f"{experiment_id}_control",
536
- name="Control",
537
- description="Control group with existing configuration",
538
- config=control_config,
539
- is_control=True,
540
- traffic_percentage=traffic_each,
541
- )
542
- ]
543
-
544
- for i, config in enumerate(treatment_configs):
545
- variants.append(
546
- Variant(
547
- variant_id=f"{experiment_id}_treatment_{i}",
548
- name=config.get("name", f"Treatment {i + 1}"),
549
- description=config.get("description", ""),
550
- config=config.get("config", config),
551
- is_control=False,
552
- traffic_percentage=traffic_each,
553
- )
554
- )
555
-
556
- experiment = Experiment(
557
- experiment_id=experiment_id,
558
- name=name,
559
- description=description,
560
- hypothesis=hypothesis,
561
- variants=variants,
562
- domain_filter=domain_filter,
563
- allocation_strategy=allocation_strategy,
564
- min_sample_size=min_sample_size,
565
- )
566
-
567
- self._experiments[experiment_id] = experiment
568
- self._save()
569
-
570
- return experiment
571
-
572
- def start_experiment(self, experiment_id: str) -> bool:
573
- """Start an experiment.
574
-
575
- Args:
576
- experiment_id: ID of experiment to start
577
-
578
- Returns:
579
- True if started successfully
580
- """
581
- experiment = self._experiments.get(experiment_id)
582
- if not experiment:
583
- return False
584
-
585
- if experiment.status != ExperimentStatus.DRAFT:
586
- return False
587
-
588
- experiment.status = ExperimentStatus.RUNNING
589
- experiment.started_at = datetime.now()
590
- self._allocators[experiment_id] = TrafficAllocator(experiment)
591
- self._save()
592
-
593
- return True
594
-
595
- def stop_experiment(self, experiment_id: str) -> ExperimentResult | None:
596
- """Stop an experiment and analyze results.
597
-
598
- Args:
599
- experiment_id: ID of experiment to stop
600
-
601
- Returns:
602
- Experiment results with analysis
603
- """
604
- experiment = self._experiments.get(experiment_id)
605
- if not experiment:
606
- return None
607
-
608
- experiment.status = ExperimentStatus.COMPLETED
609
- experiment.ended_at = datetime.now()
610
- self._save()
611
-
612
- return self.analyze_experiment(experiment_id)
613
-
614
- def allocate_variant(
615
- self,
616
- experiment_id: str,
617
- user_id: str,
618
- ) -> Variant | None:
619
- """Allocate a user to a variant.
620
-
621
- Args:
622
- experiment_id: Experiment ID
623
- user_id: User/session ID
624
-
625
- Returns:
626
- Allocated variant or None
627
- """
628
- experiment = self._experiments.get(experiment_id)
629
- if not experiment or experiment.status != ExperimentStatus.RUNNING:
630
- return None
631
-
632
- allocator = self._allocators.get(experiment_id)
633
- if not allocator:
634
- allocator = TrafficAllocator(experiment)
635
- self._allocators[experiment_id] = allocator
636
-
637
- return allocator.allocate(user_id)
638
-
639
- def record_impression(self, experiment_id: str, variant_id: str):
640
- """Record an impression for a variant.
641
-
642
- Args:
643
- experiment_id: Experiment ID
644
- variant_id: Variant ID
645
- """
646
- experiment = self._experiments.get(experiment_id)
647
- if not experiment:
648
- return
649
-
650
- for variant in experiment.variants:
651
- if variant.variant_id == variant_id:
652
- variant.impressions += 1
653
- break
654
-
655
- self._save()
656
-
657
- def record_conversion(
658
- self,
659
- experiment_id: str,
660
- variant_id: str,
661
- success_score: float = 1.0,
662
- ):
663
- """Record a conversion for a variant.
664
-
665
- Args:
666
- experiment_id: Experiment ID
667
- variant_id: Variant ID
668
- success_score: Score from 0-1
669
- """
670
- experiment = self._experiments.get(experiment_id)
671
- if not experiment:
672
- return
673
-
674
- for variant in experiment.variants:
675
- if variant.variant_id == variant_id:
676
- variant.conversions += 1
677
- variant.total_success_score += success_score
678
- break
679
-
680
- self._save()
681
-
682
- def analyze_experiment(self, experiment_id: str) -> ExperimentResult | None:
683
- """Analyze experiment results.
684
-
685
- Args:
686
- experiment_id: Experiment ID
687
-
688
- Returns:
689
- Analysis results
690
- """
691
- experiment = self._experiments.get(experiment_id)
692
- if not experiment:
693
- return None
694
-
695
- control = experiment.control
696
- if not control:
697
- return None
698
-
699
- treatments = experiment.treatments
700
- if not treatments:
701
- return None
702
-
703
- # Find best treatment
704
- best_treatment = max(treatments, key=lambda v: v.conversion_rate)
705
-
706
- # Statistical test
707
- z_score, p_value = StatisticalAnalyzer.z_test_proportions(
708
- control.impressions,
709
- control.conversions,
710
- best_treatment.impressions,
711
- best_treatment.conversions,
712
- )
713
-
714
- is_significant = p_value < (1 - experiment.confidence_level)
715
-
716
- # Calculate lift
717
- if control.conversion_rate > 0:
718
- lift = (
719
- (best_treatment.conversion_rate - control.conversion_rate) / control.conversion_rate
720
- ) * 100
721
- else:
722
- lift = 0.0
723
-
724
- # Confidence interval for treatment
725
- ci = StatisticalAnalyzer.confidence_interval(
726
- best_treatment.impressions,
727
- best_treatment.conversions,
728
- experiment.confidence_level,
729
- )
730
-
731
- # Determine winner
732
- winner = None
733
- recommendation = ""
734
-
735
- if is_significant:
736
- if best_treatment.conversion_rate > control.conversion_rate:
737
- winner = best_treatment
738
- recommendation = (
739
- f"Adopt {best_treatment.name}. It shows {lift:.1f}% improvement "
740
- f"over control with p-value {p_value:.4f}."
741
- )
742
- else:
743
- winner = control
744
- recommendation = "Keep control. Treatment did not show improvement."
745
- else:
746
- recommendation = (
747
- f"No significant difference detected (p={p_value:.4f}). "
748
- f"Consider running longer or increasing sample size."
749
- )
750
-
751
- return ExperimentResult(
752
- experiment=experiment,
753
- winner=winner,
754
- is_significant=is_significant,
755
- p_value=p_value,
756
- confidence_interval=ci,
757
- lift=lift,
758
- recommendation=recommendation,
759
- )
760
-
761
- def get_running_experiments(
762
- self,
763
- domain: str | None = None,
764
- ) -> list[Experiment]:
765
- """Get all running experiments.
766
-
767
- Args:
768
- domain: Optional domain filter
769
-
770
- Returns:
771
- List of running experiments
772
- """
773
- running = []
774
- for exp in self._experiments.values():
775
- if exp.status != ExperimentStatus.RUNNING:
776
- continue
777
- if domain and exp.domain_filter and exp.domain_filter != domain:
778
- continue
779
- running.append(exp)
780
- return running
781
-
782
- def get_experiment(self, experiment_id: str) -> Experiment | None:
783
- """Get experiment by ID."""
784
- return self._experiments.get(experiment_id)
785
-
786
- def list_experiments(self) -> list[Experiment]:
787
- """List all experiments."""
788
- return list(self._experiments.values())
789
-
790
- def _save(self):
791
- """Save experiments to storage."""
792
- self.storage_path.parent.mkdir(parents=True, exist_ok=True)
793
-
794
- data = {
795
- "version": 1,
796
- "experiments": [e.to_dict() for e in self._experiments.values()],
797
- }
798
-
799
- with self.storage_path.open("w") as f:
800
- json.dump(data, f, indent=2)
801
-
802
- def _load(self):
803
- """Load experiments from storage."""
804
- if not self.storage_path.exists():
805
- return
806
-
807
- try:
808
- with self.storage_path.open("r") as f:
809
- data = json.load(f)
810
-
811
- for exp_data in data.get("experiments", []):
812
- exp = Experiment.from_dict(exp_data)
813
- self._experiments[exp.experiment_id] = exp
814
-
815
- # Restore allocators for running experiments
816
- if exp.status == ExperimentStatus.RUNNING:
817
- self._allocators[exp.experiment_id] = TrafficAllocator(exp)
818
-
819
- except Exception as e:
820
- logger.warning(f"Failed to load experiments: {e}")
821
-
822
-
823
- # =============================================================================
824
- # WORKFLOW A/B TESTING INTEGRATION
825
- # =============================================================================
826
-
827
-
828
- class WorkflowABTester:
829
- """High-level API for A/B testing workflow configurations.
830
-
831
- Integrates with the Socratic workflow builder to test different
832
- configurations and optimize over time.
833
- """
834
-
835
- def __init__(self, storage_path: Path | str | None = None):
836
- """Initialize the tester.
837
-
838
- Args:
839
- storage_path: Path to persist data
840
- """
841
- self.manager = ExperimentManager(storage_path)
842
-
843
- def create_workflow_experiment(
844
- self,
845
- name: str,
846
- hypothesis: str,
847
- control_agents: list[str],
848
- treatment_agents_list: list[list[str]],
849
- domain: str | None = None,
850
- ) -> str:
851
- """Create an experiment comparing workflow agent configurations.
852
-
853
- Args:
854
- name: Experiment name
855
- hypothesis: What we're testing
856
- control_agents: Agent list for control
857
- treatment_agents_list: Agent lists for treatments
858
- domain: Domain filter
859
-
860
- Returns:
861
- Experiment ID
862
- """
863
- control_config = {"agents": control_agents}
864
- treatment_configs = [
865
- {
866
- "name": f"Treatment {i + 1}",
867
- "config": {"agents": agents},
868
- }
869
- for i, agents in enumerate(treatment_agents_list)
870
- ]
871
-
872
- experiment = self.manager.create_experiment(
873
- name=name,
874
- description=f"Testing different agent configurations for {domain or 'general'} workflows",
875
- hypothesis=hypothesis,
876
- control_config=control_config,
877
- treatment_configs=treatment_configs,
878
- domain_filter=domain,
879
- allocation_strategy=AllocationStrategy.THOMPSON_SAMPLING,
880
- )
881
-
882
- return experiment.experiment_id
883
-
884
- def get_workflow_config(
885
- self,
886
- session_id: str,
887
- domain: str | None = None,
888
- ) -> tuple[dict[str, Any], str | None, str | None]:
889
- """Get workflow configuration for a session.
890
-
891
- Returns control config or allocates to an experiment.
892
-
893
- Args:
894
- session_id: Session ID for allocation
895
- domain: Optional domain filter
896
-
897
- Returns:
898
- (config, experiment_id, variant_id) or (default_config, None, None)
899
- """
900
- # Check for running experiments
901
- experiments = self.manager.get_running_experiments(domain)
902
-
903
- for exp in experiments:
904
- variant = self.manager.allocate_variant(exp.experiment_id, session_id)
905
- if variant:
906
- self.manager.record_impression(exp.experiment_id, variant.variant_id)
907
- return (variant.config, exp.experiment_id, variant.variant_id)
908
-
909
- # No experiment, return default
910
- return ({}, None, None)
911
-
912
- def record_workflow_result(
913
- self,
914
- experiment_id: str,
915
- variant_id: str,
916
- success: bool,
917
- success_score: float = 0.0,
918
- ):
919
- """Record the result of a workflow execution.
920
-
921
- Args:
922
- experiment_id: Experiment ID
923
- variant_id: Variant ID
924
- success: Whether workflow succeeded
925
- success_score: Success score (0-1)
926
- """
927
- if success:
928
- self.manager.record_conversion(
929
- experiment_id,
930
- variant_id,
931
- success_score,
932
- )
933
-
934
- def get_best_config(self, domain: str | None = None) -> dict[str, Any]:
935
- """Get the best known configuration for a domain.
936
-
937
- Args:
938
- domain: Domain filter
939
-
940
- Returns:
941
- Best configuration based on completed experiments
942
- """
943
- best_config: dict[str, Any] = {}
944
- best_score = 0.0
945
-
946
- for exp in self.manager.list_experiments():
947
- if exp.status != ExperimentStatus.COMPLETED:
948
- continue
949
- if domain and exp.domain_filter != domain:
950
- continue
951
-
952
- result = self.manager.analyze_experiment(exp.experiment_id)
953
- if result and result.winner:
954
- if result.winner.avg_success_score > best_score:
955
- best_score = result.winner.avg_success_score
956
- best_config = result.winner.config
957
-
958
- return best_config