empathy-framework 2.4.0__py3-none-any.whl → 3.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. coach_wizards/__init__.py +13 -12
  2. coach_wizards/accessibility_wizard.py +12 -12
  3. coach_wizards/api_wizard.py +12 -12
  4. coach_wizards/base_wizard.py +26 -20
  5. coach_wizards/cicd_wizard.py +15 -13
  6. coach_wizards/code_reviewer_README.md +60 -0
  7. coach_wizards/code_reviewer_wizard.py +180 -0
  8. coach_wizards/compliance_wizard.py +12 -12
  9. coach_wizards/database_wizard.py +12 -12
  10. coach_wizards/debugging_wizard.py +12 -12
  11. coach_wizards/documentation_wizard.py +12 -12
  12. coach_wizards/generate_wizards.py +1 -2
  13. coach_wizards/localization_wizard.py +101 -19
  14. coach_wizards/migration_wizard.py +12 -12
  15. coach_wizards/monitoring_wizard.py +12 -12
  16. coach_wizards/observability_wizard.py +12 -12
  17. coach_wizards/performance_wizard.py +12 -12
  18. coach_wizards/prompt_engineering_wizard.py +661 -0
  19. coach_wizards/refactoring_wizard.py +12 -12
  20. coach_wizards/scaling_wizard.py +12 -12
  21. coach_wizards/security_wizard.py +12 -12
  22. coach_wizards/testing_wizard.py +12 -12
  23. empathy_framework-3.8.2.dist-info/METADATA +1176 -0
  24. empathy_framework-3.8.2.dist-info/RECORD +333 -0
  25. empathy_framework-3.8.2.dist-info/entry_points.txt +22 -0
  26. {empathy_framework-2.4.0.dist-info → empathy_framework-3.8.2.dist-info}/top_level.txt +5 -1
  27. empathy_healthcare_plugin/__init__.py +1 -2
  28. empathy_healthcare_plugin/monitors/__init__.py +9 -0
  29. empathy_healthcare_plugin/monitors/clinical_protocol_monitor.py +315 -0
  30. empathy_healthcare_plugin/monitors/monitoring/__init__.py +44 -0
  31. empathy_healthcare_plugin/monitors/monitoring/protocol_checker.py +300 -0
  32. empathy_healthcare_plugin/monitors/monitoring/protocol_loader.py +214 -0
  33. empathy_healthcare_plugin/monitors/monitoring/sensor_parsers.py +306 -0
  34. empathy_healthcare_plugin/monitors/monitoring/trajectory_analyzer.py +389 -0
  35. empathy_llm_toolkit/__init__.py +7 -7
  36. empathy_llm_toolkit/agent_factory/__init__.py +53 -0
  37. empathy_llm_toolkit/agent_factory/adapters/__init__.py +85 -0
  38. empathy_llm_toolkit/agent_factory/adapters/autogen_adapter.py +312 -0
  39. empathy_llm_toolkit/agent_factory/adapters/crewai_adapter.py +454 -0
  40. empathy_llm_toolkit/agent_factory/adapters/haystack_adapter.py +298 -0
  41. empathy_llm_toolkit/agent_factory/adapters/langchain_adapter.py +362 -0
  42. empathy_llm_toolkit/agent_factory/adapters/langgraph_adapter.py +333 -0
  43. empathy_llm_toolkit/agent_factory/adapters/native.py +228 -0
  44. empathy_llm_toolkit/agent_factory/adapters/wizard_adapter.py +426 -0
  45. empathy_llm_toolkit/agent_factory/base.py +305 -0
  46. empathy_llm_toolkit/agent_factory/crews/__init__.py +67 -0
  47. empathy_llm_toolkit/agent_factory/crews/code_review.py +1113 -0
  48. empathy_llm_toolkit/agent_factory/crews/health_check.py +1246 -0
  49. empathy_llm_toolkit/agent_factory/crews/refactoring.py +1128 -0
  50. empathy_llm_toolkit/agent_factory/crews/security_audit.py +1018 -0
  51. empathy_llm_toolkit/agent_factory/decorators.py +286 -0
  52. empathy_llm_toolkit/agent_factory/factory.py +558 -0
  53. empathy_llm_toolkit/agent_factory/framework.py +192 -0
  54. empathy_llm_toolkit/agent_factory/memory_integration.py +324 -0
  55. empathy_llm_toolkit/agent_factory/resilient.py +320 -0
  56. empathy_llm_toolkit/claude_memory.py +14 -15
  57. empathy_llm_toolkit/cli/__init__.py +8 -0
  58. empathy_llm_toolkit/cli/sync_claude.py +487 -0
  59. empathy_llm_toolkit/code_health.py +186 -28
  60. empathy_llm_toolkit/config/__init__.py +29 -0
  61. empathy_llm_toolkit/config/unified.py +295 -0
  62. empathy_llm_toolkit/contextual_patterns.py +11 -12
  63. empathy_llm_toolkit/core.py +168 -53
  64. empathy_llm_toolkit/git_pattern_extractor.py +17 -13
  65. empathy_llm_toolkit/levels.py +6 -13
  66. empathy_llm_toolkit/pattern_confidence.py +14 -18
  67. empathy_llm_toolkit/pattern_resolver.py +10 -12
  68. empathy_llm_toolkit/pattern_summary.py +16 -14
  69. empathy_llm_toolkit/providers.py +194 -28
  70. empathy_llm_toolkit/routing/__init__.py +32 -0
  71. empathy_llm_toolkit/routing/model_router.py +362 -0
  72. empathy_llm_toolkit/security/IMPLEMENTATION_SUMMARY.md +413 -0
  73. empathy_llm_toolkit/security/PHASE2_COMPLETE.md +384 -0
  74. empathy_llm_toolkit/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +271 -0
  75. empathy_llm_toolkit/security/QUICK_REFERENCE.md +316 -0
  76. empathy_llm_toolkit/security/README.md +262 -0
  77. empathy_llm_toolkit/security/__init__.py +62 -0
  78. empathy_llm_toolkit/security/audit_logger.py +929 -0
  79. empathy_llm_toolkit/security/audit_logger_example.py +152 -0
  80. empathy_llm_toolkit/security/pii_scrubber.py +640 -0
  81. empathy_llm_toolkit/security/secrets_detector.py +678 -0
  82. empathy_llm_toolkit/security/secrets_detector_example.py +304 -0
  83. empathy_llm_toolkit/security/secure_memdocs.py +1192 -0
  84. empathy_llm_toolkit/security/secure_memdocs_example.py +278 -0
  85. empathy_llm_toolkit/session_status.py +20 -22
  86. empathy_llm_toolkit/state.py +28 -21
  87. empathy_llm_toolkit/wizards/__init__.py +38 -0
  88. empathy_llm_toolkit/wizards/base_wizard.py +364 -0
  89. empathy_llm_toolkit/wizards/customer_support_wizard.py +190 -0
  90. empathy_llm_toolkit/wizards/healthcare_wizard.py +362 -0
  91. empathy_llm_toolkit/wizards/patient_assessment_README.md +64 -0
  92. empathy_llm_toolkit/wizards/patient_assessment_wizard.py +193 -0
  93. empathy_llm_toolkit/wizards/technology_wizard.py +194 -0
  94. empathy_os/__init__.py +125 -84
  95. empathy_os/adaptive/__init__.py +13 -0
  96. empathy_os/adaptive/task_complexity.py +127 -0
  97. empathy_os/{monitoring.py → agent_monitoring.py} +28 -28
  98. empathy_os/cache/__init__.py +117 -0
  99. empathy_os/cache/base.py +166 -0
  100. empathy_os/cache/dependency_manager.py +253 -0
  101. empathy_os/cache/hash_only.py +248 -0
  102. empathy_os/cache/hybrid.py +390 -0
  103. empathy_os/cache/storage.py +282 -0
  104. empathy_os/cli.py +1516 -70
  105. empathy_os/cli_unified.py +597 -0
  106. empathy_os/config/__init__.py +63 -0
  107. empathy_os/config/xml_config.py +239 -0
  108. empathy_os/config.py +95 -37
  109. empathy_os/coordination.py +72 -68
  110. empathy_os/core.py +94 -107
  111. empathy_os/cost_tracker.py +74 -55
  112. empathy_os/dashboard/__init__.py +15 -0
  113. empathy_os/dashboard/server.py +743 -0
  114. empathy_os/discovery.py +17 -14
  115. empathy_os/emergence.py +21 -22
  116. empathy_os/exceptions.py +18 -30
  117. empathy_os/feedback_loops.py +30 -33
  118. empathy_os/levels.py +32 -35
  119. empathy_os/leverage_points.py +31 -32
  120. empathy_os/logging_config.py +19 -16
  121. empathy_os/memory/__init__.py +195 -0
  122. empathy_os/memory/claude_memory.py +466 -0
  123. empathy_os/memory/config.py +224 -0
  124. empathy_os/memory/control_panel.py +1298 -0
  125. empathy_os/memory/edges.py +179 -0
  126. empathy_os/memory/graph.py +567 -0
  127. empathy_os/memory/long_term.py +1194 -0
  128. empathy_os/memory/nodes.py +179 -0
  129. empathy_os/memory/redis_bootstrap.py +540 -0
  130. empathy_os/memory/security/__init__.py +31 -0
  131. empathy_os/memory/security/audit_logger.py +930 -0
  132. empathy_os/memory/security/pii_scrubber.py +640 -0
  133. empathy_os/memory/security/secrets_detector.py +678 -0
  134. empathy_os/memory/short_term.py +2119 -0
  135. empathy_os/memory/storage/__init__.py +15 -0
  136. empathy_os/memory/summary_index.py +583 -0
  137. empathy_os/memory/unified.py +619 -0
  138. empathy_os/metrics/__init__.py +12 -0
  139. empathy_os/metrics/prompt_metrics.py +190 -0
  140. empathy_os/models/__init__.py +136 -0
  141. empathy_os/models/__main__.py +13 -0
  142. empathy_os/models/cli.py +655 -0
  143. empathy_os/models/empathy_executor.py +354 -0
  144. empathy_os/models/executor.py +252 -0
  145. empathy_os/models/fallback.py +671 -0
  146. empathy_os/models/provider_config.py +563 -0
  147. empathy_os/models/registry.py +382 -0
  148. empathy_os/models/tasks.py +302 -0
  149. empathy_os/models/telemetry.py +548 -0
  150. empathy_os/models/token_estimator.py +378 -0
  151. empathy_os/models/validation.py +274 -0
  152. empathy_os/monitoring/__init__.py +52 -0
  153. empathy_os/monitoring/alerts.py +23 -0
  154. empathy_os/monitoring/alerts_cli.py +268 -0
  155. empathy_os/monitoring/multi_backend.py +271 -0
  156. empathy_os/monitoring/otel_backend.py +363 -0
  157. empathy_os/optimization/__init__.py +19 -0
  158. empathy_os/optimization/context_optimizer.py +272 -0
  159. empathy_os/pattern_library.py +30 -29
  160. empathy_os/persistence.py +35 -37
  161. empathy_os/platform_utils.py +261 -0
  162. empathy_os/plugins/__init__.py +28 -0
  163. empathy_os/plugins/base.py +361 -0
  164. empathy_os/plugins/registry.py +268 -0
  165. empathy_os/project_index/__init__.py +30 -0
  166. empathy_os/project_index/cli.py +335 -0
  167. empathy_os/project_index/crew_integration.py +430 -0
  168. empathy_os/project_index/index.py +425 -0
  169. empathy_os/project_index/models.py +501 -0
  170. empathy_os/project_index/reports.py +473 -0
  171. empathy_os/project_index/scanner.py +538 -0
  172. empathy_os/prompts/__init__.py +61 -0
  173. empathy_os/prompts/config.py +77 -0
  174. empathy_os/prompts/context.py +177 -0
  175. empathy_os/prompts/parser.py +285 -0
  176. empathy_os/prompts/registry.py +313 -0
  177. empathy_os/prompts/templates.py +208 -0
  178. empathy_os/redis_config.py +144 -58
  179. empathy_os/redis_memory.py +79 -77
  180. empathy_os/resilience/__init__.py +56 -0
  181. empathy_os/resilience/circuit_breaker.py +256 -0
  182. empathy_os/resilience/fallback.py +179 -0
  183. empathy_os/resilience/health.py +300 -0
  184. empathy_os/resilience/retry.py +209 -0
  185. empathy_os/resilience/timeout.py +135 -0
  186. empathy_os/routing/__init__.py +43 -0
  187. empathy_os/routing/chain_executor.py +433 -0
  188. empathy_os/routing/classifier.py +217 -0
  189. empathy_os/routing/smart_router.py +234 -0
  190. empathy_os/routing/wizard_registry.py +307 -0
  191. empathy_os/templates.py +19 -14
  192. empathy_os/trust/__init__.py +28 -0
  193. empathy_os/trust/circuit_breaker.py +579 -0
  194. empathy_os/trust_building.py +67 -58
  195. empathy_os/validation/__init__.py +19 -0
  196. empathy_os/validation/xml_validator.py +281 -0
  197. empathy_os/wizard_factory_cli.py +170 -0
  198. empathy_os/{workflows.py → workflow_commands.py} +131 -37
  199. empathy_os/workflows/__init__.py +360 -0
  200. empathy_os/workflows/base.py +1660 -0
  201. empathy_os/workflows/bug_predict.py +962 -0
  202. empathy_os/workflows/code_review.py +960 -0
  203. empathy_os/workflows/code_review_adapters.py +310 -0
  204. empathy_os/workflows/code_review_pipeline.py +720 -0
  205. empathy_os/workflows/config.py +600 -0
  206. empathy_os/workflows/dependency_check.py +648 -0
  207. empathy_os/workflows/document_gen.py +1069 -0
  208. empathy_os/workflows/documentation_orchestrator.py +1205 -0
  209. empathy_os/workflows/health_check.py +679 -0
  210. empathy_os/workflows/keyboard_shortcuts/__init__.py +39 -0
  211. empathy_os/workflows/keyboard_shortcuts/generators.py +386 -0
  212. empathy_os/workflows/keyboard_shortcuts/parsers.py +414 -0
  213. empathy_os/workflows/keyboard_shortcuts/prompts.py +295 -0
  214. empathy_os/workflows/keyboard_shortcuts/schema.py +193 -0
  215. empathy_os/workflows/keyboard_shortcuts/workflow.py +505 -0
  216. empathy_os/workflows/manage_documentation.py +804 -0
  217. empathy_os/workflows/new_sample_workflow1.py +146 -0
  218. empathy_os/workflows/new_sample_workflow1_README.md +150 -0
  219. empathy_os/workflows/perf_audit.py +687 -0
  220. empathy_os/workflows/pr_review.py +748 -0
  221. empathy_os/workflows/progress.py +445 -0
  222. empathy_os/workflows/progress_server.py +322 -0
  223. empathy_os/workflows/refactor_plan.py +693 -0
  224. empathy_os/workflows/release_prep.py +808 -0
  225. empathy_os/workflows/research_synthesis.py +404 -0
  226. empathy_os/workflows/secure_release.py +585 -0
  227. empathy_os/workflows/security_adapters.py +297 -0
  228. empathy_os/workflows/security_audit.py +1046 -0
  229. empathy_os/workflows/step_config.py +234 -0
  230. empathy_os/workflows/test5.py +125 -0
  231. empathy_os/workflows/test5_README.md +158 -0
  232. empathy_os/workflows/test_gen.py +1855 -0
  233. empathy_os/workflows/test_lifecycle.py +526 -0
  234. empathy_os/workflows/test_maintenance.py +626 -0
  235. empathy_os/workflows/test_maintenance_cli.py +590 -0
  236. empathy_os/workflows/test_maintenance_crew.py +821 -0
  237. empathy_os/workflows/xml_enhanced_crew.py +285 -0
  238. empathy_software_plugin/__init__.py +1 -2
  239. empathy_software_plugin/cli/__init__.py +120 -0
  240. empathy_software_plugin/cli/inspect.py +362 -0
  241. empathy_software_plugin/cli.py +49 -27
  242. empathy_software_plugin/plugin.py +4 -8
  243. empathy_software_plugin/wizards/__init__.py +42 -0
  244. empathy_software_plugin/wizards/advanced_debugging_wizard.py +392 -0
  245. empathy_software_plugin/wizards/agent_orchestration_wizard.py +511 -0
  246. empathy_software_plugin/wizards/ai_collaboration_wizard.py +503 -0
  247. empathy_software_plugin/wizards/ai_context_wizard.py +441 -0
  248. empathy_software_plugin/wizards/ai_documentation_wizard.py +503 -0
  249. empathy_software_plugin/wizards/base_wizard.py +288 -0
  250. empathy_software_plugin/wizards/book_chapter_wizard.py +519 -0
  251. empathy_software_plugin/wizards/code_review_wizard.py +606 -0
  252. empathy_software_plugin/wizards/debugging/__init__.py +50 -0
  253. empathy_software_plugin/wizards/debugging/bug_risk_analyzer.py +414 -0
  254. empathy_software_plugin/wizards/debugging/config_loaders.py +442 -0
  255. empathy_software_plugin/wizards/debugging/fix_applier.py +469 -0
  256. empathy_software_plugin/wizards/debugging/language_patterns.py +383 -0
  257. empathy_software_plugin/wizards/debugging/linter_parsers.py +470 -0
  258. empathy_software_plugin/wizards/debugging/verification.py +369 -0
  259. empathy_software_plugin/wizards/enhanced_testing_wizard.py +537 -0
  260. empathy_software_plugin/wizards/memory_enhanced_debugging_wizard.py +816 -0
  261. empathy_software_plugin/wizards/multi_model_wizard.py +501 -0
  262. empathy_software_plugin/wizards/pattern_extraction_wizard.py +422 -0
  263. empathy_software_plugin/wizards/pattern_retriever_wizard.py +400 -0
  264. empathy_software_plugin/wizards/performance/__init__.py +9 -0
  265. empathy_software_plugin/wizards/performance/bottleneck_detector.py +221 -0
  266. empathy_software_plugin/wizards/performance/profiler_parsers.py +278 -0
  267. empathy_software_plugin/wizards/performance/trajectory_analyzer.py +429 -0
  268. empathy_software_plugin/wizards/performance_profiling_wizard.py +305 -0
  269. empathy_software_plugin/wizards/prompt_engineering_wizard.py +425 -0
  270. empathy_software_plugin/wizards/rag_pattern_wizard.py +461 -0
  271. empathy_software_plugin/wizards/security/__init__.py +32 -0
  272. empathy_software_plugin/wizards/security/exploit_analyzer.py +290 -0
  273. empathy_software_plugin/wizards/security/owasp_patterns.py +241 -0
  274. empathy_software_plugin/wizards/security/vulnerability_scanner.py +604 -0
  275. empathy_software_plugin/wizards/security_analysis_wizard.py +322 -0
  276. empathy_software_plugin/wizards/security_learning_wizard.py +740 -0
  277. empathy_software_plugin/wizards/tech_debt_wizard.py +726 -0
  278. empathy_software_plugin/wizards/testing/__init__.py +27 -0
  279. empathy_software_plugin/wizards/testing/coverage_analyzer.py +459 -0
  280. empathy_software_plugin/wizards/testing/quality_analyzer.py +531 -0
  281. empathy_software_plugin/wizards/testing/test_suggester.py +533 -0
  282. empathy_software_plugin/wizards/testing_wizard.py +274 -0
  283. hot_reload/README.md +473 -0
  284. hot_reload/__init__.py +62 -0
  285. hot_reload/config.py +84 -0
  286. hot_reload/integration.py +228 -0
  287. hot_reload/reloader.py +298 -0
  288. hot_reload/watcher.py +179 -0
  289. hot_reload/websocket.py +176 -0
  290. scaffolding/README.md +589 -0
  291. scaffolding/__init__.py +35 -0
  292. scaffolding/__main__.py +14 -0
  293. scaffolding/cli.py +240 -0
  294. test_generator/__init__.py +38 -0
  295. test_generator/__main__.py +14 -0
  296. test_generator/cli.py +226 -0
  297. test_generator/generator.py +325 -0
  298. test_generator/risk_analyzer.py +216 -0
  299. workflow_patterns/__init__.py +33 -0
  300. workflow_patterns/behavior.py +249 -0
  301. workflow_patterns/core.py +76 -0
  302. workflow_patterns/output.py +99 -0
  303. workflow_patterns/registry.py +255 -0
  304. workflow_patterns/structural.py +288 -0
  305. workflow_scaffolding/__init__.py +11 -0
  306. workflow_scaffolding/__main__.py +12 -0
  307. workflow_scaffolding/cli.py +206 -0
  308. workflow_scaffolding/generator.py +265 -0
  309. agents/code_inspection/patterns/inspection/recurring_B112.json +0 -18
  310. agents/code_inspection/patterns/inspection/recurring_F541.json +0 -16
  311. agents/code_inspection/patterns/inspection/recurring_FORMAT.json +0 -25
  312. agents/code_inspection/patterns/inspection/recurring_bug_20250822_def456.json +0 -16
  313. agents/code_inspection/patterns/inspection/recurring_bug_20250915_abc123.json +0 -16
  314. agents/code_inspection/patterns/inspection/recurring_bug_20251212_3c5b9951.json +0 -16
  315. agents/code_inspection/patterns/inspection/recurring_bug_20251212_97c0f72f.json +0 -16
  316. agents/code_inspection/patterns/inspection/recurring_bug_20251212_a0871d53.json +0 -16
  317. agents/code_inspection/patterns/inspection/recurring_bug_20251212_a9b6ec41.json +0 -16
  318. agents/code_inspection/patterns/inspection/recurring_bug_null_001.json +0 -16
  319. agents/code_inspection/patterns/inspection/recurring_builtin.json +0 -16
  320. agents/compliance_anticipation_agent.py +0 -1427
  321. agents/epic_integration_wizard.py +0 -541
  322. agents/trust_building_behaviors.py +0 -891
  323. empathy_framework-2.4.0.dist-info/METADATA +0 -485
  324. empathy_framework-2.4.0.dist-info/RECORD +0 -102
  325. empathy_framework-2.4.0.dist-info/entry_points.txt +0 -6
  326. empathy_llm_toolkit/htmlcov/status.json +0 -1
  327. empathy_llm_toolkit/security/htmlcov/status.json +0 -1
  328. {empathy_framework-2.4.0.dist-info → empathy_framework-3.8.2.dist-info}/WHEEL +0 -0
  329. {empathy_framework-2.4.0.dist-info → empathy_framework-3.8.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1855 @@
1
+ """Test Generation Workflow
2
+
3
+ Generates tests targeting areas with historical bugs and low coverage.
4
+ Prioritizes test creation for bug-prone code paths.
5
+
6
+ Stages:
7
+ 1. identify (CHEAP) - Identify files with low coverage or historical bugs
8
+ 2. analyze (CAPABLE) - Analyze code structure and existing test patterns
9
+ 3. generate (CAPABLE) - Generate test cases focusing on edge cases
10
+ 4. review (PREMIUM) - Quality review and deduplication (conditional)
11
+
12
+ Copyright 2025 Smart-AI-Memory
13
+ Licensed under Fair Source License 0.9
14
+ """
15
+
16
+ import ast
17
+ import json
18
+ import re
19
+ from dataclasses import dataclass, field
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ from .base import BaseWorkflow, ModelTier
24
+ from .step_config import WorkflowStepConfig
25
+
26
+ # =============================================================================
27
+ # Default Configuration
28
+ # =============================================================================
29
+
30
+ # Directories to skip during file scanning (configurable via input_data["skip_patterns"])
31
+ DEFAULT_SKIP_PATTERNS = [
32
+ # Version control
33
+ ".git",
34
+ ".hg",
35
+ ".svn",
36
+ # Dependencies
37
+ "node_modules",
38
+ "bower_components",
39
+ "vendor",
40
+ # Python caches
41
+ "__pycache__",
42
+ ".mypy_cache",
43
+ ".pytest_cache",
44
+ ".ruff_cache",
45
+ ".hypothesis",
46
+ # Virtual environments
47
+ "venv",
48
+ ".venv",
49
+ "env",
50
+ ".env",
51
+ "virtualenv",
52
+ ".virtualenv",
53
+ # Build tools
54
+ ".tox",
55
+ ".nox",
56
+ # Build outputs
57
+ "build",
58
+ "dist",
59
+ "eggs",
60
+ ".eggs",
61
+ "site-packages",
62
+ # IDE
63
+ ".idea",
64
+ ".vscode",
65
+ # Framework-specific
66
+ "migrations",
67
+ "alembic",
68
+ # Documentation
69
+ "_build",
70
+ "docs/_build",
71
+ ]
72
+
73
+ # =============================================================================
74
+ # AST-Based Function Analysis
75
+ # =============================================================================
76
+
77
+
78
+ @dataclass
79
+ class FunctionSignature:
80
+ """Detailed function analysis for test generation."""
81
+
82
+ name: str
83
+ params: list[tuple[str, str, str | None]] # (name, type_hint, default)
84
+ return_type: str | None
85
+ is_async: bool
86
+ raises: set[str]
87
+ has_side_effects: bool
88
+ docstring: str | None
89
+ complexity: int = 1 # Rough complexity estimate
90
+ decorators: list[str] = field(default_factory=list)
91
+
92
+
93
+ @dataclass
94
+ class ClassSignature:
95
+ """Detailed class analysis for test generation."""
96
+
97
+ name: str
98
+ methods: list[FunctionSignature]
99
+ init_params: list[tuple[str, str, str | None]] # Constructor params
100
+ base_classes: list[str]
101
+ docstring: str | None
102
+ is_enum: bool = False # True if class inherits from Enum
103
+ is_dataclass: bool = False # True if class has @dataclass decorator
104
+ required_init_params: int = 0 # Number of params without defaults
105
+
106
+
107
+ class ASTFunctionAnalyzer(ast.NodeVisitor):
108
+ """AST-based function analyzer for accurate test generation.
109
+
110
+ Extracts:
111
+ - Function signatures with types
112
+ - Exception types raised
113
+ - Side effects detection
114
+ - Complexity estimation
115
+
116
+ Parse errors are tracked in the `last_error` attribute for debugging.
117
+ """
118
+
119
+ def __init__(self):
120
+ self.functions: list[FunctionSignature] = []
121
+ self.classes: list[ClassSignature] = []
122
+ self._current_class: str | None = None
123
+ self.last_error: str | None = None # Track parse errors for debugging
124
+
125
+ def analyze(
126
+ self,
127
+ code: str,
128
+ file_path: str = "",
129
+ ) -> tuple[list[FunctionSignature], list[ClassSignature]]:
130
+ """Analyze code and extract function/class signatures.
131
+
132
+ Args:
133
+ code: Python source code to analyze
134
+ file_path: Optional file path for error reporting
135
+
136
+ Returns:
137
+ Tuple of (functions, classes) lists. If parsing fails,
138
+ returns empty lists and sets self.last_error with details.
139
+
140
+ """
141
+ self.last_error = None
142
+ try:
143
+ tree = ast.parse(code)
144
+ self.functions = []
145
+ self.classes = []
146
+ self.visit(tree)
147
+ return self.functions, self.classes
148
+ except SyntaxError as e:
149
+ # Track the error for debugging instead of silent failure
150
+ location = f" at line {e.lineno}" if e.lineno else ""
151
+ file_info = f" in {file_path}" if file_path else ""
152
+ self.last_error = f"SyntaxError{file_info}{location}: {e.msg}"
153
+ return [], []
154
+
155
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
156
+ """Extract function signature."""
157
+ if self._current_class is None: # Only top-level functions
158
+ sig = self._extract_function_signature(node)
159
+ self.functions.append(sig)
160
+ self.generic_visit(node)
161
+
162
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
163
+ """Extract async function signature."""
164
+ if self._current_class is None:
165
+ sig = self._extract_function_signature(node, is_async=True)
166
+ self.functions.append(sig)
167
+ self.generic_visit(node)
168
+
169
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
170
+ """Extract class signature with methods."""
171
+ self._current_class = node.name
172
+ methods = []
173
+ init_params: list[tuple[str, str, str | None]] = []
174
+
175
+ # Extract base classes
176
+ base_classes = []
177
+ for base in node.bases:
178
+ if isinstance(base, ast.Name):
179
+ base_classes.append(base.id)
180
+ elif isinstance(base, ast.Attribute):
181
+ base_classes.append(ast.unparse(base))
182
+
183
+ # Detect if this is an Enum
184
+ enum_bases = {"Enum", "IntEnum", "StrEnum", "Flag", "IntFlag", "auto"}
185
+ is_enum = any(b in enum_bases for b in base_classes)
186
+
187
+ # Detect if this is a dataclass
188
+ is_dataclass = False
189
+ for decorator in node.decorator_list:
190
+ if isinstance(decorator, ast.Name) and decorator.id == "dataclass":
191
+ is_dataclass = True
192
+ elif isinstance(decorator, ast.Call):
193
+ if isinstance(decorator.func, ast.Name) and decorator.func.id == "dataclass":
194
+ is_dataclass = True
195
+
196
+ # Process methods
197
+ for item in node.body:
198
+ if isinstance(item, ast.FunctionDef | ast.AsyncFunctionDef):
199
+ method_sig = self._extract_function_signature(
200
+ item,
201
+ is_async=isinstance(item, ast.AsyncFunctionDef),
202
+ )
203
+ methods.append(method_sig)
204
+
205
+ # Extract __init__ params
206
+ if item.name == "__init__":
207
+ init_params = method_sig.params[1:] # Skip 'self'
208
+
209
+ # Count required init params (those without defaults)
210
+ required_init_params = sum(1 for p in init_params if p[2] is None)
211
+
212
+ self.classes.append(
213
+ ClassSignature(
214
+ name=node.name,
215
+ methods=methods,
216
+ init_params=init_params,
217
+ base_classes=base_classes,
218
+ docstring=ast.get_docstring(node),
219
+ is_enum=is_enum,
220
+ is_dataclass=is_dataclass,
221
+ required_init_params=required_init_params,
222
+ ),
223
+ )
224
+
225
+ self._current_class = None
226
+ # Don't call generic_visit to avoid processing methods again
227
+
228
+ def _extract_function_signature(
229
+ self,
230
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
231
+ is_async: bool = False,
232
+ ) -> FunctionSignature:
233
+ """Extract detailed signature from function node."""
234
+ # Extract parameters with types and defaults
235
+ params = []
236
+ defaults = list(node.args.defaults)
237
+ num_defaults = len(defaults)
238
+ num_args = len(node.args.args)
239
+
240
+ for i, arg in enumerate(node.args.args):
241
+ param_name = arg.arg
242
+ param_type = ast.unparse(arg.annotation) if arg.annotation else "Any"
243
+
244
+ # Calculate default index
245
+ default_idx = i - (num_args - num_defaults)
246
+ default_val = None
247
+ if default_idx >= 0:
248
+ try:
249
+ default_val = ast.unparse(defaults[default_idx])
250
+ except Exception:
251
+ default_val = "..."
252
+
253
+ params.append((param_name, param_type, default_val))
254
+
255
+ # Extract return type
256
+ return_type = ast.unparse(node.returns) if node.returns else None
257
+
258
+ # Find raised exceptions
259
+ raises: set[str] = set()
260
+ for child in ast.walk(node):
261
+ if isinstance(child, ast.Raise) and child.exc:
262
+ if isinstance(child.exc, ast.Call):
263
+ if isinstance(child.exc.func, ast.Name):
264
+ raises.add(child.exc.func.id)
265
+ elif isinstance(child.exc.func, ast.Attribute):
266
+ raises.add(child.exc.func.attr)
267
+ elif isinstance(child.exc, ast.Name):
268
+ raises.add(child.exc.id)
269
+
270
+ # Detect side effects (simple heuristic)
271
+ has_side_effects = self._detect_side_effects(node)
272
+
273
+ # Estimate complexity
274
+ complexity = self._estimate_complexity(node)
275
+
276
+ # Extract decorators
277
+ decorators = []
278
+ for dec in node.decorator_list:
279
+ if isinstance(dec, ast.Name):
280
+ decorators.append(dec.id)
281
+ elif isinstance(dec, ast.Attribute):
282
+ decorators.append(ast.unparse(dec))
283
+ elif isinstance(dec, ast.Call):
284
+ if isinstance(dec.func, ast.Name):
285
+ decorators.append(dec.func.id)
286
+
287
+ return FunctionSignature(
288
+ name=node.name,
289
+ params=params,
290
+ return_type=return_type,
291
+ is_async=is_async or isinstance(node, ast.AsyncFunctionDef),
292
+ raises=raises,
293
+ has_side_effects=has_side_effects,
294
+ docstring=ast.get_docstring(node),
295
+ complexity=complexity,
296
+ decorators=decorators,
297
+ )
298
+
299
+ def _detect_side_effects(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
300
+ """Detect if function has side effects (writes to files, global state, etc.)."""
301
+ side_effect_names = {
302
+ "print",
303
+ "write",
304
+ "open",
305
+ "save",
306
+ "delete",
307
+ "remove",
308
+ "update",
309
+ "insert",
310
+ "execute",
311
+ "send",
312
+ "post",
313
+ "put",
314
+ "patch",
315
+ }
316
+
317
+ for child in ast.walk(node):
318
+ if isinstance(child, ast.Call):
319
+ if isinstance(child.func, ast.Name):
320
+ if child.func.id.lower() in side_effect_names:
321
+ return True
322
+ elif isinstance(child.func, ast.Attribute):
323
+ if child.func.attr.lower() in side_effect_names:
324
+ return True
325
+ return False
326
+
327
+ def _estimate_complexity(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> int:
328
+ """Estimate cyclomatic complexity (simplified)."""
329
+ complexity = 1
330
+ for child in ast.walk(node):
331
+ if isinstance(child, ast.If | ast.While | ast.For | ast.ExceptHandler):
332
+ complexity += 1
333
+ elif isinstance(child, ast.BoolOp):
334
+ complexity += len(child.values) - 1
335
+ return complexity
336
+
337
+
338
+ # Define step configurations for executor-based execution
339
+ TEST_GEN_STEPS = {
340
+ "identify": WorkflowStepConfig(
341
+ name="identify",
342
+ task_type="triage", # Cheap tier task
343
+ tier_hint="cheap",
344
+ description="Identify files needing tests",
345
+ max_tokens=2000,
346
+ ),
347
+ "analyze": WorkflowStepConfig(
348
+ name="analyze",
349
+ task_type="code_analysis", # Capable tier task
350
+ tier_hint="capable",
351
+ description="Analyze code structure for test generation",
352
+ max_tokens=3000,
353
+ ),
354
+ "generate": WorkflowStepConfig(
355
+ name="generate",
356
+ task_type="code_generation", # Capable tier task
357
+ tier_hint="capable",
358
+ description="Generate test cases",
359
+ max_tokens=4000,
360
+ ),
361
+ "review": WorkflowStepConfig(
362
+ name="review",
363
+ task_type="final_review", # Premium tier task
364
+ tier_hint="premium",
365
+ description="Review and improve generated test suite",
366
+ max_tokens=3000,
367
+ ),
368
+ }
369
+
370
+
371
+ class TestGenerationWorkflow(BaseWorkflow):
372
+ """Generate tests targeting areas with historical bugs.
373
+
374
+ Prioritizes test generation for files that have historically
375
+ been bug-prone and have low test coverage.
376
+ """
377
+
378
+ name = "test-gen"
379
+ description = "Generate tests targeting areas with historical bugs"
380
+ stages = ["identify", "analyze", "generate", "review"]
381
+ tier_map = {
382
+ "identify": ModelTier.CHEAP,
383
+ "analyze": ModelTier.CAPABLE,
384
+ "generate": ModelTier.CAPABLE,
385
+ "review": ModelTier.PREMIUM,
386
+ }
387
+
388
+ def __init__(
389
+ self,
390
+ patterns_dir: str = "./patterns",
391
+ min_tests_for_review: int = 10,
392
+ write_tests: bool = False,
393
+ output_dir: str = "tests/generated",
394
+ **kwargs: Any,
395
+ ):
396
+ """Initialize test generation workflow.
397
+
398
+ Args:
399
+ patterns_dir: Directory containing learned patterns
400
+ min_tests_for_review: Minimum tests generated to trigger premium review
401
+ write_tests: If True, write generated tests to output_dir
402
+ output_dir: Directory to write generated test files
403
+ **kwargs: Additional arguments passed to BaseWorkflow
404
+
405
+ """
406
+ super().__init__(**kwargs)
407
+ self.patterns_dir = patterns_dir
408
+ self.min_tests_for_review = min_tests_for_review
409
+ self.write_tests = write_tests
410
+ self.output_dir = output_dir
411
+ self._test_count: int = 0
412
+ self._bug_hotspots: list[str] = []
413
+ self._load_bug_hotspots()
414
+
415
+ def _load_bug_hotspots(self) -> None:
416
+ """Load files with historical bugs from pattern library."""
417
+ debugging_file = Path(self.patterns_dir) / "debugging.json"
418
+ if debugging_file.exists():
419
+ try:
420
+ with open(debugging_file) as fh:
421
+ data = json.load(fh)
422
+ patterns = data.get("patterns", [])
423
+ # Extract files from bug patterns
424
+ files = set()
425
+ for p in patterns:
426
+ for file_entry in p.get("files_affected", []):
427
+ if file_entry is None:
428
+ continue
429
+ files.add(str(file_entry))
430
+ self._bug_hotspots = list(files)
431
+ except (json.JSONDecodeError, OSError):
432
+ pass
433
+
434
+ def should_skip_stage(self, stage_name: str, input_data: Any) -> tuple[bool, str | None]:
435
+ """Downgrade review stage if few tests generated.
436
+
437
+ Args:
438
+ stage_name: Name of the stage to check
439
+ input_data: Current workflow data
440
+
441
+ Returns:
442
+ Tuple of (should_skip, reason)
443
+
444
+ """
445
+ if stage_name == "review":
446
+ if self._test_count < self.min_tests_for_review:
447
+ # Downgrade to CAPABLE
448
+ self.tier_map["review"] = ModelTier.CAPABLE
449
+ return False, None
450
+ return False, None
451
+
452
+ async def run_stage(
453
+ self,
454
+ stage_name: str,
455
+ tier: ModelTier,
456
+ input_data: Any,
457
+ ) -> tuple[Any, int, int]:
458
+ """Route to specific stage implementation."""
459
+ if stage_name == "identify":
460
+ return await self._identify(input_data, tier)
461
+ if stage_name == "analyze":
462
+ return await self._analyze(input_data, tier)
463
+ if stage_name == "generate":
464
+ return await self._generate(input_data, tier)
465
+ if stage_name == "review":
466
+ return await self._review(input_data, tier)
467
+ raise ValueError(f"Unknown stage: {stage_name}")
468
+
469
+ async def _identify(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
470
+ """Identify files needing tests.
471
+
472
+ Finds files with low coverage, historical bugs, or
473
+ no existing tests.
474
+
475
+ Configurable options via input_data:
476
+ max_files_to_scan: Maximum files to scan before stopping (default: 1000)
477
+ max_file_size_kb: Skip files larger than this (default: 200)
478
+ max_candidates: Maximum candidates to return (default: 50)
479
+ skip_patterns: List of directory patterns to skip (default: DEFAULT_SKIP_PATTERNS)
480
+ include_all_files: Include files with priority=0 (default: False)
481
+ """
482
+ target_path = input_data.get("path", ".")
483
+ file_types = input_data.get("file_types", [".py"])
484
+
485
+ # Parse configurable limits with sensible defaults
486
+ max_files_to_scan = input_data.get("max_files_to_scan", 1000)
487
+ max_file_size_kb = input_data.get("max_file_size_kb", 200)
488
+ max_candidates = input_data.get("max_candidates", 50)
489
+ skip_patterns = input_data.get("skip_patterns", DEFAULT_SKIP_PATTERNS)
490
+ include_all_files = input_data.get("include_all_files", False)
491
+
492
+ target = Path(target_path)
493
+ candidates: list[dict] = []
494
+
495
+ # Track project scope for enterprise reporting
496
+ total_source_files = 0
497
+ existing_test_files = 0
498
+
499
+ # Track scan summary for debugging/visibility
500
+ # Use separate counters for type safety
501
+ scan_counts = {
502
+ "files_scanned": 0,
503
+ "files_too_large": 0,
504
+ "files_read_error": 0,
505
+ "files_excluded_by_pattern": 0,
506
+ }
507
+ early_exit_reason: str | None = None
508
+
509
+ max_file_size_bytes = max_file_size_kb * 1024
510
+ scan_limit_reached = False
511
+
512
+ if target.exists():
513
+ for ext in file_types:
514
+ if scan_limit_reached:
515
+ break
516
+
517
+ for file_path in target.rglob(f"*{ext}"):
518
+ # Check if we've hit the scan limit
519
+ if scan_counts["files_scanned"] >= max_files_to_scan:
520
+ early_exit_reason = f"max_files_to_scan ({max_files_to_scan}) reached"
521
+ scan_limit_reached = True
522
+ break
523
+
524
+ # Skip non-code directories using configurable patterns
525
+ file_str = str(file_path)
526
+ if any(skip in file_str for skip in skip_patterns):
527
+ scan_counts["files_excluded_by_pattern"] += 1
528
+ continue
529
+
530
+ # Count test files separately for scope awareness
531
+ if "test_" in file_str or "_test." in file_str or "/tests/" in file_str:
532
+ existing_test_files += 1
533
+ continue
534
+
535
+ # Check file size before reading
536
+ try:
537
+ file_size = file_path.stat().st_size
538
+ if file_size > max_file_size_bytes:
539
+ scan_counts["files_too_large"] += 1
540
+ continue
541
+ except OSError:
542
+ scan_counts["files_read_error"] += 1
543
+ continue
544
+
545
+ # Count source files and increment scan counter
546
+ total_source_files += 1
547
+ scan_counts["files_scanned"] += 1
548
+
549
+ try:
550
+ content = file_path.read_text(errors="ignore")
551
+ lines = len(content.splitlines())
552
+
553
+ # Check if in bug hotspots
554
+ is_hotspot = any(hotspot in file_str for hotspot in self._bug_hotspots)
555
+
556
+ # Check for existing tests
557
+ test_file = self._find_test_file(file_path)
558
+ has_tests = test_file.exists() if test_file else False
559
+
560
+ # Calculate priority
561
+ priority = 0
562
+ if is_hotspot:
563
+ priority += 50
564
+ if not has_tests:
565
+ priority += 30
566
+ if lines > 100:
567
+ priority += 10
568
+ if lines > 300:
569
+ priority += 10
570
+
571
+ # Include if priority > 0 OR include_all_files is set
572
+ if priority > 0 or include_all_files:
573
+ candidates.append(
574
+ {
575
+ "file": file_str,
576
+ "lines": lines,
577
+ "is_hotspot": is_hotspot,
578
+ "has_tests": has_tests,
579
+ "priority": priority,
580
+ },
581
+ )
582
+ except OSError:
583
+ scan_counts["files_read_error"] += 1
584
+ continue
585
+
586
+ # Sort by priority
587
+ candidates.sort(key=lambda x: -x["priority"])
588
+
589
+ input_tokens = len(str(input_data)) // 4
590
+ output_tokens = len(str(candidates)) // 4
591
+
592
+ # Calculate scope metrics for enterprise reporting
593
+ analyzed_count = min(max_candidates, len(candidates))
594
+ coverage_pct = (analyzed_count / len(candidates) * 100) if candidates else 100
595
+
596
+ return (
597
+ {
598
+ "candidates": candidates[:max_candidates],
599
+ "total_candidates": len(candidates),
600
+ "hotspot_count": len([c for c in candidates if c["is_hotspot"]]),
601
+ "untested_count": len([c for c in candidates if not c["has_tests"]]),
602
+ # Scope awareness fields for enterprise reporting
603
+ "total_source_files": total_source_files,
604
+ "existing_test_files": existing_test_files,
605
+ "large_project_warning": len(candidates) > 100,
606
+ "analysis_coverage_percent": coverage_pct,
607
+ # Scan summary for debugging/visibility
608
+ "scan_summary": {**scan_counts, "early_exit_reason": early_exit_reason},
609
+ # Pass through config for subsequent stages
610
+ "config": {
611
+ "max_files_to_analyze": input_data.get("max_files_to_analyze", 20),
612
+ "max_functions_per_file": input_data.get("max_functions_per_file", 30),
613
+ "max_classes_per_file": input_data.get("max_classes_per_file", 15),
614
+ "max_files_to_generate": input_data.get("max_files_to_generate", 15),
615
+ "max_functions_to_generate": input_data.get("max_functions_to_generate", 8),
616
+ "max_classes_to_generate": input_data.get("max_classes_to_generate", 4),
617
+ },
618
+ **input_data,
619
+ },
620
+ input_tokens,
621
+ output_tokens,
622
+ )
623
+
624
+ def _find_test_file(self, source_file: Path) -> Path | None:
625
+ """Find corresponding test file for a source file."""
626
+ name = source_file.stem
627
+ parent = source_file.parent
628
+
629
+ # Check common test locations
630
+ possible = [
631
+ parent / f"test_{name}.py",
632
+ parent / "tests" / f"test_{name}.py",
633
+ parent.parent / "tests" / f"test_{name}.py",
634
+ ]
635
+
636
+ for p in possible:
637
+ if p.exists():
638
+ return p
639
+
640
+ return possible[0] # Return expected location even if doesn't exist
641
+
642
+ async def _analyze(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
643
+ """Analyze code structure for test generation.
644
+
645
+ Examines functions, classes, and patterns to determine
646
+ what tests should be generated.
647
+
648
+ Uses config from _identify stage for limits:
649
+ max_files_to_analyze: Maximum files to analyze (default: 20)
650
+ max_functions_per_file: Maximum functions per file (default: 30)
651
+ max_classes_per_file: Maximum classes per file (default: 15)
652
+ """
653
+ # Get config from previous stage or use defaults
654
+ config = input_data.get("config", {})
655
+ max_files_to_analyze = config.get("max_files_to_analyze", 20)
656
+ max_functions_per_file = config.get("max_functions_per_file", 30)
657
+ max_classes_per_file = config.get("max_classes_per_file", 15)
658
+
659
+ candidates = input_data.get("candidates", [])[:max_files_to_analyze]
660
+ analysis: list[dict] = []
661
+ parse_errors: list[str] = [] # Track files that failed to parse
662
+
663
+ for candidate in candidates:
664
+ file_path = Path(candidate["file"])
665
+ if not file_path.exists():
666
+ continue
667
+
668
+ try:
669
+ content = file_path.read_text(errors="ignore")
670
+
671
+ # Extract testable items with configurable limits and error tracking
672
+ functions, func_error = self._extract_functions(
673
+ content,
674
+ candidate["file"],
675
+ max_functions_per_file,
676
+ )
677
+ classes, class_error = self._extract_classes(
678
+ content,
679
+ candidate["file"],
680
+ max_classes_per_file,
681
+ )
682
+
683
+ # Track parse errors for visibility
684
+ if func_error:
685
+ parse_errors.append(func_error)
686
+ if class_error and class_error != func_error:
687
+ parse_errors.append(class_error)
688
+
689
+ analysis.append(
690
+ {
691
+ "file": candidate["file"],
692
+ "priority": candidate["priority"],
693
+ "functions": functions,
694
+ "classes": classes,
695
+ "function_count": len(functions),
696
+ "class_count": len(classes),
697
+ "test_suggestions": self._generate_suggestions(functions, classes),
698
+ },
699
+ )
700
+ except OSError:
701
+ continue
702
+
703
+ input_tokens = len(str(input_data)) // 4
704
+ output_tokens = len(str(analysis)) // 4
705
+
706
+ return (
707
+ {
708
+ "analysis": analysis,
709
+ "total_functions": sum(a["function_count"] for a in analysis),
710
+ "total_classes": sum(a["class_count"] for a in analysis),
711
+ "parse_errors": parse_errors, # Expose errors for debugging
712
+ **input_data,
713
+ },
714
+ input_tokens,
715
+ output_tokens,
716
+ )
717
+
718
+ def _extract_functions(
719
+ self,
720
+ content: str,
721
+ file_path: str = "",
722
+ max_functions: int = 30,
723
+ ) -> tuple[list[dict], str | None]:
724
+ """Extract function definitions from Python code using AST analysis.
725
+
726
+ Args:
727
+ content: Python source code
728
+ file_path: File path for error reporting
729
+ max_functions: Maximum functions to extract (configurable)
730
+
731
+ Returns:
732
+ Tuple of (functions list, error message or None)
733
+
734
+ """
735
+ analyzer = ASTFunctionAnalyzer()
736
+ functions, _ = analyzer.analyze(content, file_path)
737
+
738
+ result = []
739
+ for sig in functions[:max_functions]:
740
+ if not sig.name.startswith("_") or sig.name.startswith("__"):
741
+ result.append(
742
+ {
743
+ "name": sig.name,
744
+ "params": [(p[0], p[1], p[2]) for p in sig.params],
745
+ "param_names": [p[0] for p in sig.params],
746
+ "is_async": sig.is_async,
747
+ "return_type": sig.return_type,
748
+ "raises": list(sig.raises),
749
+ "has_side_effects": sig.has_side_effects,
750
+ "complexity": sig.complexity,
751
+ "docstring": sig.docstring,
752
+ },
753
+ )
754
+ return result, analyzer.last_error
755
+
756
+ def _extract_classes(
757
+ self,
758
+ content: str,
759
+ file_path: str = "",
760
+ max_classes: int = 15,
761
+ ) -> tuple[list[dict], str | None]:
762
+ """Extract class definitions from Python code using AST analysis.
763
+
764
+ Args:
765
+ content: Python source code
766
+ file_path: File path for error reporting
767
+ max_classes: Maximum classes to extract (configurable)
768
+
769
+ Returns:
770
+ Tuple of (classes list, error message or None)
771
+
772
+ """
773
+ analyzer = ASTFunctionAnalyzer()
774
+ _, classes = analyzer.analyze(content, file_path)
775
+
776
+ result = []
777
+ for sig in classes[:max_classes]:
778
+ # Skip enums - they don't need traditional class tests
779
+ if sig.is_enum:
780
+ continue
781
+
782
+ methods = [
783
+ {
784
+ "name": m.name,
785
+ "params": [(p[0], p[1], p[2]) for p in m.params],
786
+ "is_async": m.is_async,
787
+ "raises": list(m.raises),
788
+ }
789
+ for m in sig.methods
790
+ if not m.name.startswith("_") or m.name == "__init__"
791
+ ]
792
+ result.append(
793
+ {
794
+ "name": sig.name,
795
+ "init_params": [(p[0], p[1], p[2]) for p in sig.init_params],
796
+ "methods": methods,
797
+ "base_classes": sig.base_classes,
798
+ "docstring": sig.docstring,
799
+ "is_dataclass": sig.is_dataclass,
800
+ "required_init_params": sig.required_init_params,
801
+ },
802
+ )
803
+ return result, analyzer.last_error
804
+
805
+ def _generate_suggestions(self, functions: list[dict], classes: list[dict]) -> list[str]:
806
+ """Generate test suggestions based on code structure."""
807
+ suggestions = []
808
+
809
+ for func in functions[:5]:
810
+ if func["params"]:
811
+ suggestions.append(f"Test {func['name']} with valid inputs")
812
+ suggestions.append(f"Test {func['name']} with edge cases")
813
+ if func["is_async"]:
814
+ suggestions.append(f"Test {func['name']} async behavior")
815
+
816
+ for cls in classes[:3]:
817
+ suggestions.append(f"Test {cls['name']} initialization")
818
+ suggestions.append(f"Test {cls['name']} methods")
819
+
820
+ return suggestions
821
+
822
+ async def _generate(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
823
+ """Generate test cases.
824
+
825
+ Creates test code targeting identified functions
826
+ and classes, focusing on edge cases.
827
+
828
+ Uses config from _identify stage for limits:
829
+ max_files_to_generate: Maximum files to generate tests for (default: 15)
830
+ max_functions_to_generate: Maximum functions per file (default: 8)
831
+ max_classes_to_generate: Maximum classes per file (default: 4)
832
+ """
833
+ # Get config from previous stages or use defaults
834
+ config = input_data.get("config", {})
835
+ max_files_to_generate = config.get("max_files_to_generate", 15)
836
+ max_functions_to_generate = config.get("max_functions_to_generate", 8)
837
+ max_classes_to_generate = config.get("max_classes_to_generate", 4)
838
+
839
+ analysis = input_data.get("analysis", [])
840
+ generated_tests: list[dict] = []
841
+
842
+ for item in analysis[:max_files_to_generate]:
843
+ file_path = item["file"]
844
+ module_name = Path(file_path).stem
845
+
846
+ tests = []
847
+ for func in item.get("functions", [])[:max_functions_to_generate]:
848
+ test_code = self._generate_test_for_function(module_name, func)
849
+ tests.append(
850
+ {
851
+ "target": func["name"],
852
+ "type": "function",
853
+ "code": test_code,
854
+ },
855
+ )
856
+
857
+ for cls in item.get("classes", [])[:max_classes_to_generate]:
858
+ test_code = self._generate_test_for_class(module_name, cls)
859
+ tests.append(
860
+ {
861
+ "target": cls["name"],
862
+ "type": "class",
863
+ "code": test_code,
864
+ },
865
+ )
866
+
867
+ if tests:
868
+ generated_tests.append(
869
+ {
870
+ "source_file": file_path,
871
+ "test_file": f"test_{module_name}.py",
872
+ "tests": tests,
873
+ "test_count": len(tests),
874
+ },
875
+ )
876
+
877
+ self._test_count = sum(t["test_count"] for t in generated_tests)
878
+
879
+ # Write tests to files if enabled (via input_data or instance config)
880
+ write_tests = input_data.get("write_tests", self.write_tests)
881
+ output_dir = input_data.get("output_dir", self.output_dir)
882
+ written_files: list[str] = []
883
+
884
+ if write_tests and generated_tests:
885
+ output_path = Path(output_dir)
886
+ output_path.mkdir(parents=True, exist_ok=True)
887
+
888
+ for test_item in generated_tests:
889
+ test_filename = test_item["test_file"]
890
+ test_file_path = output_path / test_filename
891
+
892
+ # Combine all test code for this file
893
+ combined_code = []
894
+ imports_added = set()
895
+
896
+ for test in test_item["tests"]:
897
+ code = test["code"]
898
+ # Extract and dedupe imports
899
+ for line in code.split("\n"):
900
+ if line.startswith("import ") or line.startswith("from "):
901
+ if line not in imports_added:
902
+ imports_added.add(line)
903
+ elif line.strip():
904
+ combined_code.append(line)
905
+
906
+ # Write the combined test file
907
+ final_code = "\n".join(sorted(imports_added)) + "\n\n" + "\n".join(combined_code)
908
+ test_file_path.write_text(final_code)
909
+ written_files.append(str(test_file_path))
910
+ test_item["written_to"] = str(test_file_path)
911
+
912
+ input_tokens = len(str(input_data)) // 4
913
+ output_tokens = sum(len(str(t)) for t in generated_tests) // 4
914
+
915
+ return (
916
+ {
917
+ "generated_tests": generated_tests,
918
+ "total_tests_generated": self._test_count,
919
+ "written_files": written_files,
920
+ "tests_written": len(written_files) > 0,
921
+ **input_data,
922
+ },
923
+ input_tokens,
924
+ output_tokens,
925
+ )
926
+
927
+ def _generate_test_for_function(self, module: str, func: dict) -> str:
928
+ """Generate executable tests for a function based on AST analysis."""
929
+ name = func["name"]
930
+ params = func.get("params", []) # List of (name, type, default) tuples
931
+ param_names = func.get("param_names", [p[0] if isinstance(p, tuple) else p for p in params])
932
+ is_async = func.get("is_async", False)
933
+ return_type = func.get("return_type")
934
+ raises = func.get("raises", [])
935
+ has_side_effects = func.get("has_side_effects", False)
936
+
937
+ # Generate test values based on parameter types
938
+ test_cases = self._generate_test_cases_for_params(params)
939
+ param_str = ", ".join(test_cases.get("valid_args", [""] * len(params)))
940
+
941
+ # Build parametrized test if we have multiple test cases
942
+ parametrize_cases = test_cases.get("parametrize_cases", [])
943
+
944
+ tests = []
945
+ tests.append(f"import pytest\nfrom {module} import {name}\n")
946
+
947
+ # Generate parametrized test if we have cases
948
+ if parametrize_cases and len(parametrize_cases) > 1:
949
+ param_names_str = ", ".join(param_names) if param_names else "value"
950
+ cases_str = ",\n ".join(parametrize_cases)
951
+
952
+ if is_async:
953
+ tests.append(
954
+ f'''
955
+ @pytest.mark.parametrize("{param_names_str}", [
956
+ {cases_str},
957
+ ])
958
+ @pytest.mark.asyncio
959
+ async def test_{name}_with_various_inputs({param_names_str}):
960
+ """Test {name} with various input combinations."""
961
+ result = await {name}({", ".join(param_names)})
962
+ assert result is not None
963
+ ''',
964
+ )
965
+ else:
966
+ tests.append(
967
+ f'''
968
+ @pytest.mark.parametrize("{param_names_str}", [
969
+ {cases_str},
970
+ ])
971
+ def test_{name}_with_various_inputs({param_names_str}):
972
+ """Test {name} with various input combinations."""
973
+ result = {name}({", ".join(param_names)})
974
+ assert result is not None
975
+ ''',
976
+ )
977
+ # Simple valid input test
978
+ elif is_async:
979
+ tests.append(
980
+ f'''
981
+ @pytest.mark.asyncio
982
+ async def test_{name}_returns_value():
983
+ """Test that {name} returns a value with valid inputs."""
984
+ result = await {name}({param_str})
985
+ assert result is not None
986
+ ''',
987
+ )
988
+ else:
989
+ tests.append(
990
+ f'''
991
+ def test_{name}_returns_value():
992
+ """Test that {name} returns a value with valid inputs."""
993
+ result = {name}({param_str})
994
+ assert result is not None
995
+ ''',
996
+ )
997
+
998
+ # Generate edge case tests based on parameter types
999
+ edge_cases = test_cases.get("edge_cases", [])
1000
+ if edge_cases:
1001
+ edge_cases_str = ",\n ".join(edge_cases)
1002
+ if is_async:
1003
+ tests.append(
1004
+ f'''
1005
+ @pytest.mark.parametrize("edge_input", [
1006
+ {edge_cases_str},
1007
+ ])
1008
+ @pytest.mark.asyncio
1009
+ async def test_{name}_edge_cases(edge_input):
1010
+ """Test {name} with edge case inputs."""
1011
+ try:
1012
+ result = await {name}(edge_input)
1013
+ # Function should either return a value or raise an expected error
1014
+ assert result is not None or result == 0 or result == "" or result == []
1015
+ except (ValueError, TypeError, KeyError) as e:
1016
+ # Expected error for edge cases
1017
+ assert str(e) # Error message should not be empty
1018
+ ''',
1019
+ )
1020
+ else:
1021
+ tests.append(
1022
+ f'''
1023
+ @pytest.mark.parametrize("edge_input", [
1024
+ {edge_cases_str},
1025
+ ])
1026
+ def test_{name}_edge_cases(edge_input):
1027
+ """Test {name} with edge case inputs."""
1028
+ try:
1029
+ result = {name}(edge_input)
1030
+ # Function should either return a value or raise an expected error
1031
+ assert result is not None or result == 0 or result == "" or result == []
1032
+ except (ValueError, TypeError, KeyError) as e:
1033
+ # Expected error for edge cases
1034
+ assert str(e) # Error message should not be empty
1035
+ ''',
1036
+ )
1037
+
1038
+ # Generate exception tests for each raised exception
1039
+ for exc_type in raises[:3]: # Limit to 3 exception types
1040
+ if is_async:
1041
+ tests.append(
1042
+ f'''
1043
+ @pytest.mark.asyncio
1044
+ async def test_{name}_raises_{exc_type.lower()}():
1045
+ """Test that {name} raises {exc_type} for invalid inputs."""
1046
+ with pytest.raises({exc_type}):
1047
+ await {name}(None) # Adjust input to trigger {exc_type}
1048
+ ''',
1049
+ )
1050
+ else:
1051
+ tests.append(
1052
+ f'''
1053
+ def test_{name}_raises_{exc_type.lower()}():
1054
+ """Test that {name} raises {exc_type} for invalid inputs."""
1055
+ with pytest.raises({exc_type}):
1056
+ {name}(None) # Adjust input to trigger {exc_type}
1057
+ ''',
1058
+ )
1059
+
1060
+ # Add return type assertion if we know the type
1061
+ if return_type and return_type not in ("None", "Any"):
1062
+ type_check = self._get_type_assertion(return_type)
1063
+ if type_check and not has_side_effects:
1064
+ if is_async:
1065
+ tests.append(
1066
+ f'''
1067
+ @pytest.mark.asyncio
1068
+ async def test_{name}_returns_correct_type():
1069
+ """Test that {name} returns the expected type."""
1070
+ result = await {name}({param_str})
1071
+ {type_check}
1072
+ ''',
1073
+ )
1074
+ else:
1075
+ tests.append(
1076
+ f'''
1077
+ def test_{name}_returns_correct_type():
1078
+ """Test that {name} returns the expected type."""
1079
+ result = {name}({param_str})
1080
+ {type_check}
1081
+ ''',
1082
+ )
1083
+
1084
+ return "\n".join(tests)
1085
+
1086
+ def _generate_test_cases_for_params(self, params: list) -> dict:
1087
+ """Generate test cases based on parameter types."""
1088
+ valid_args = []
1089
+ parametrize_cases = []
1090
+ edge_cases = []
1091
+
1092
+ for param in params:
1093
+ if isinstance(param, tuple) and len(param) >= 2:
1094
+ _name, type_hint, default = param[0], param[1], param[2] if len(param) > 2 else None
1095
+ else:
1096
+ _name = param if isinstance(param, str) else str(param)
1097
+ type_hint = "Any"
1098
+ default = None
1099
+
1100
+ # Generate valid value based on type
1101
+ if "str" in type_hint.lower():
1102
+ valid_args.append('"test_value"')
1103
+ parametrize_cases.extend(['"hello"', '"world"', '"test_string"'])
1104
+ edge_cases.extend(['""', '" "', '"a" * 1000'])
1105
+ elif "int" in type_hint.lower():
1106
+ valid_args.append("42")
1107
+ parametrize_cases.extend(["0", "1", "100", "-1"])
1108
+ edge_cases.extend(["0", "-1", "2**31 - 1"])
1109
+ elif "float" in type_hint.lower():
1110
+ valid_args.append("3.14")
1111
+ parametrize_cases.extend(["0.0", "1.0", "-1.5", "100.5"])
1112
+ edge_cases.extend(["0.0", "-0.0", "float('inf')"])
1113
+ elif "bool" in type_hint.lower():
1114
+ valid_args.append("True")
1115
+ parametrize_cases.extend(["True", "False"])
1116
+ elif "list" in type_hint.lower():
1117
+ valid_args.append("[1, 2, 3]")
1118
+ parametrize_cases.extend(["[]", "[1]", "[1, 2, 3]"])
1119
+ edge_cases.extend(["[]", "[None]"])
1120
+ elif "dict" in type_hint.lower():
1121
+ valid_args.append('{"key": "value"}')
1122
+ parametrize_cases.extend(["{}", '{"a": 1}', '{"key": "value"}'])
1123
+ edge_cases.extend(["{}"])
1124
+ elif default is not None:
1125
+ valid_args.append(str(default))
1126
+ else:
1127
+ valid_args.append("None")
1128
+ edge_cases.append("None")
1129
+
1130
+ return {
1131
+ "valid_args": valid_args,
1132
+ "parametrize_cases": parametrize_cases[:5], # Limit cases
1133
+ "edge_cases": list(set(edge_cases))[:5], # Unique edge cases
1134
+ }
1135
+
1136
+ def _get_type_assertion(self, return_type: str) -> str | None:
1137
+ """Generate assertion for return type checking."""
1138
+ type_map = {
1139
+ "str": "assert isinstance(result, str)",
1140
+ "int": "assert isinstance(result, int)",
1141
+ "float": "assert isinstance(result, (int, float))",
1142
+ "bool": "assert isinstance(result, bool)",
1143
+ "list": "assert isinstance(result, list)",
1144
+ "dict": "assert isinstance(result, dict)",
1145
+ "tuple": "assert isinstance(result, tuple)",
1146
+ }
1147
+ for type_name, assertion in type_map.items():
1148
+ if type_name in return_type.lower():
1149
+ return assertion
1150
+ return None
1151
+
1152
+ def _get_param_test_values(self, type_hint: str) -> list[str]:
1153
+ """Get test values for a single parameter based on its type."""
1154
+ type_hint_lower = type_hint.lower()
1155
+ if "str" in type_hint_lower:
1156
+ return ['"hello"', '"world"', '"test_string"']
1157
+ if "int" in type_hint_lower:
1158
+ return ["0", "1", "42", "-1"]
1159
+ if "float" in type_hint_lower:
1160
+ return ["0.0", "1.0", "3.14"]
1161
+ if "bool" in type_hint_lower:
1162
+ return ["True", "False"]
1163
+ if "list" in type_hint_lower:
1164
+ return ["[]", "[1, 2, 3]"]
1165
+ if "dict" in type_hint_lower:
1166
+ return ["{}", '{"key": "value"}']
1167
+ return ['"test_value"']
1168
+
1169
+ def _generate_test_for_class(self, module: str, cls: dict) -> str:
1170
+ """Generate executable test class based on AST analysis."""
1171
+ name = cls["name"]
1172
+ init_params = cls.get("init_params", [])
1173
+ methods = cls.get("methods", [])
1174
+ required_params = cls.get("required_init_params", 0)
1175
+ _docstring = cls.get("docstring", "") # Reserved for future use
1176
+
1177
+ # Generate constructor arguments - ensure we have values for ALL required params
1178
+ init_args = self._generate_test_cases_for_params(init_params)
1179
+ valid_args = init_args.get("valid_args", [])
1180
+
1181
+ # Ensure we have enough args for required params
1182
+ while len(valid_args) < required_params:
1183
+ valid_args.append('"test_value"')
1184
+
1185
+ init_arg_str = ", ".join(valid_args)
1186
+
1187
+ tests = []
1188
+ tests.append(f"import pytest\nfrom {module} import {name}\n")
1189
+
1190
+ # Fixture for class instance
1191
+ tests.append(
1192
+ f'''
1193
+ @pytest.fixture
1194
+ def {name.lower()}_instance():
1195
+ """Create a {name} instance for testing."""
1196
+ return {name}({init_arg_str})
1197
+ ''',
1198
+ )
1199
+
1200
+ # Test initialization
1201
+ tests.append(
1202
+ f'''
1203
+ class Test{name}:
1204
+ """Tests for {name} class."""
1205
+
1206
+ def test_initialization(self):
1207
+ """Test that {name} can be instantiated."""
1208
+ instance = {name}({init_arg_str})
1209
+ assert instance is not None
1210
+ ''',
1211
+ )
1212
+
1213
+ # Only generate parametrized tests for single-param classes to avoid tuple mismatches
1214
+ if len(init_params) == 1 and init_params[0][2] is None:
1215
+ # Single required param - safe to parametrize
1216
+ param_name = init_params[0][0]
1217
+ param_type = init_params[0][1]
1218
+ cases = self._get_param_test_values(param_type)
1219
+ if len(cases) > 1:
1220
+ cases_str = ",\n ".join(cases)
1221
+ tests.append(
1222
+ f'''
1223
+ @pytest.mark.parametrize("{param_name}", [
1224
+ {cases_str},
1225
+ ])
1226
+ def test_initialization_with_various_args(self, {param_name}):
1227
+ """Test {name} initialization with various arguments."""
1228
+ instance = {name}({param_name})
1229
+ assert instance is not None
1230
+ ''',
1231
+ )
1232
+
1233
+ # Generate tests for each public method
1234
+ for method in methods[:5]: # Limit to 5 methods
1235
+ method_name = method.get("name", "")
1236
+ if method_name.startswith("_") and method_name != "__init__":
1237
+ continue
1238
+ if method_name == "__init__":
1239
+ continue
1240
+
1241
+ method_params = method.get("params", [])[1:] # Skip self
1242
+ is_async = method.get("is_async", False)
1243
+ raises = method.get("raises", [])
1244
+
1245
+ # Generate method call args
1246
+ method_args = self._generate_test_cases_for_params(method_params)
1247
+ method_arg_str = ", ".join(method_args.get("valid_args", []))
1248
+
1249
+ if is_async:
1250
+ tests.append(
1251
+ f'''
1252
+ @pytest.mark.asyncio
1253
+ async def test_{method_name}_returns_value(self, {name.lower()}_instance):
1254
+ """Test that {method_name} returns a value."""
1255
+ result = await {name.lower()}_instance.{method_name}({method_arg_str})
1256
+ assert result is not None or result == 0 or result == "" or result == []
1257
+ ''',
1258
+ )
1259
+ else:
1260
+ tests.append(
1261
+ f'''
1262
+ def test_{method_name}_returns_value(self, {name.lower()}_instance):
1263
+ """Test that {method_name} returns a value."""
1264
+ result = {name.lower()}_instance.{method_name}({method_arg_str})
1265
+ assert result is not None or result == 0 or result == "" or result == []
1266
+ ''',
1267
+ )
1268
+
1269
+ # Add exception tests for methods that raise
1270
+ for exc_type in raises[:2]:
1271
+ if is_async:
1272
+ tests.append(
1273
+ f'''
1274
+ @pytest.mark.asyncio
1275
+ async def test_{method_name}_raises_{exc_type.lower()}(self, {name.lower()}_instance):
1276
+ """Test that {method_name} raises {exc_type} for invalid inputs."""
1277
+ with pytest.raises({exc_type}):
1278
+ await {name.lower()}_instance.{method_name}(None)
1279
+ ''',
1280
+ )
1281
+ else:
1282
+ tests.append(
1283
+ f'''
1284
+ def test_{method_name}_raises_{exc_type.lower()}(self, {name.lower()}_instance):
1285
+ """Test that {method_name} raises {exc_type} for invalid inputs."""
1286
+ with pytest.raises({exc_type}):
1287
+ {name.lower()}_instance.{method_name}(None)
1288
+ ''',
1289
+ )
1290
+
1291
+ return "\n".join(tests)
1292
+
1293
+ async def _review(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
1294
+ """Review and improve generated tests using LLM.
1295
+
1296
+ This stage now receives the generated test code and uses the LLM
1297
+ to create the final analysis report.
1298
+ """
1299
+ # Get the generated tests from the previous stage
1300
+ generated_tests = input_data.get("generated_tests", [])
1301
+ if not generated_tests:
1302
+ # If no tests were generated, return the input data as is.
1303
+ return input_data, 0, 0
1304
+
1305
+ # Prepare the context for the LLM by formatting the generated test code
1306
+ test_context = "<generated_tests>\n"
1307
+ total_test_count = 0
1308
+ for test_item in generated_tests:
1309
+ test_context += f' <file path="{test_item["source_file"]}">\n'
1310
+ for test in test_item["tests"]:
1311
+ # Extract ALL test names from code (not just the first one)
1312
+ test_names = []
1313
+ try:
1314
+ # Use findall to get ALL test functions
1315
+ matches = re.findall(r"def\s+(test_\w+)", test["code"])
1316
+ test_names = matches if matches else ["unnamed"]
1317
+ except Exception:
1318
+ test_names = ["unnamed"]
1319
+
1320
+ # Report each test function found
1321
+ for test_name in test_names:
1322
+ test_context += f' <test name="{test_name}" target="{test["target"]}" type="{test.get("type", "unknown")}" />\n'
1323
+ total_test_count += 1
1324
+ test_context += " </file>\n"
1325
+ test_context += "</generated_tests>\n"
1326
+ test_context += f"\n<summary>Total test functions: {total_test_count}</summary>\n"
1327
+
1328
+ # Build the prompt using XML if enabled
1329
+ target_files = [item["source_file"] for item in generated_tests]
1330
+ file_list = "\n".join(f" - {f}" for f in target_files)
1331
+
1332
+ # Check if XML prompts are enabled
1333
+ if self._is_xml_enabled():
1334
+ # Use XML-enhanced prompt for better structure and reliability
1335
+ user_message = self._render_xml_prompt(
1336
+ role="test automation engineer and quality analyst",
1337
+ goal="Analyze generated test suite and identify coverage gaps",
1338
+ instructions=[
1339
+ "Count total test functions generated across all files",
1340
+ "Identify which classes and functions are tested",
1341
+ "Find critical gaps in test coverage (untested edge cases, error paths)",
1342
+ "Assess quality of existing tests (assertions, test data, completeness)",
1343
+ "Prioritize missing tests by impact and risk",
1344
+ "Generate specific, actionable test recommendations",
1345
+ ],
1346
+ constraints=[
1347
+ "Output ONLY the structured report - no conversation or questions",
1348
+ "START with '# Test Gap Analysis Report' - no preamble",
1349
+ "Use markdown tables for metrics and coverage",
1350
+ "Classify gaps by severity (HIGH/MEDIUM/LOW)",
1351
+ "Provide numbered prioritized recommendations",
1352
+ ],
1353
+ input_type="generated_tests",
1354
+ input_payload=test_context,
1355
+ extra={
1356
+ "total_test_count": total_test_count,
1357
+ "files_covered": len(generated_tests),
1358
+ "target_files": ", ".join(target_files),
1359
+ },
1360
+ )
1361
+ system_prompt = None # XML prompt includes all context
1362
+ else:
1363
+ # Use legacy plain text prompts
1364
+ system_prompt = f"""You are an automated test coverage analysis tool. You MUST output a report directly - no conversation, no questions, no preamble.
1365
+
1366
+ CRITICAL RULES (VIOLATIONS WILL CAUSE SYSTEM FAILURE):
1367
+ 1. START your response with "# Test Gap Analysis Report" - no other text before this
1368
+ 2. NEVER ask questions or seek clarification
1369
+ 3. NEVER use phrases like "let me ask", "what's your goal", "would you like"
1370
+ 4. NEVER offer to expand or provide more information
1371
+ 5. Output ONLY the structured report - nothing else
1372
+
1373
+ Target files ({len(generated_tests)}):
1374
+ {file_list}
1375
+
1376
+ REQUIRED OUTPUT FORMAT (follow exactly):
1377
+
1378
+ # Test Gap Analysis Report
1379
+
1380
+ ## Executive Summary
1381
+ | Metric | Value |
1382
+ |--------|-------|
1383
+ | **Total Test Functions** | [count] |
1384
+ | **Files Covered** | [count] |
1385
+ | **Classes Tested** | [count] |
1386
+ | **Functions Tested** | [count] |
1387
+
1388
+ ## Coverage by File
1389
+ [For each file, show a table with Target, Type, Tests count, and Gap Assessment]
1390
+
1391
+ ## Identified Gaps
1392
+ [List specific missing tests with severity: HIGH/MEDIUM/LOW]
1393
+
1394
+ ## Prioritized Recommendations
1395
+ [Numbered list of specific tests to add, ordered by priority]
1396
+
1397
+ END OF REQUIRED FORMAT - output nothing after recommendations."""
1398
+
1399
+ user_message = f"Generate the test gap analysis report for:\n{test_context}"
1400
+
1401
+ # Call the LLM using the provider-agnostic executor from BaseWorkflow
1402
+ step_config = TEST_GEN_STEPS["review"]
1403
+ report, in_tokens, out_tokens, _cost = await self.run_step_with_executor(
1404
+ step=step_config,
1405
+ prompt=user_message,
1406
+ system=system_prompt,
1407
+ )
1408
+
1409
+ # Validate response - check for question patterns that indicate non-compliance
1410
+ total_in = in_tokens
1411
+ total_out = out_tokens
1412
+
1413
+ if self._response_contains_questions(report):
1414
+ # Retry with even stricter prompt
1415
+ retry_prompt = f"""OUTPUT ONLY THIS EXACT FORMAT - NO OTHER TEXT:
1416
+
1417
+ # Test Gap Analysis Report
1418
+
1419
+ ## Executive Summary
1420
+ | Metric | Value |
1421
+ |--------|-------|
1422
+ | **Total Test Functions** | {total_test_count} |
1423
+ | **Files Covered** | {len(generated_tests)} |
1424
+
1425
+ ## Coverage by File
1426
+
1427
+ {self._generate_coverage_table(generated_tests)}
1428
+
1429
+ ## Identified Gaps
1430
+ - Missing error handling tests
1431
+ - Missing edge case tests
1432
+ - Missing integration tests
1433
+
1434
+ ## Prioritized Recommendations
1435
+ 1. Add exception/error tests for each class
1436
+ 2. Add boundary condition tests
1437
+ 3. Add integration tests between components"""
1438
+
1439
+ report, retry_in, retry_out, _ = await self.run_step_with_executor(
1440
+ step=step_config,
1441
+ prompt=retry_prompt,
1442
+ system="You are a report formatter. Output ONLY the text provided. Do not add any commentary.",
1443
+ )
1444
+ total_in += retry_in
1445
+ total_out += retry_out
1446
+
1447
+ # If still asking questions, use fallback programmatic report
1448
+ if self._response_contains_questions(report):
1449
+ report = self._generate_fallback_report(generated_tests, total_test_count)
1450
+
1451
+ # Replace the previous analysis with the final, accurate report
1452
+ input_data["analysis_report"] = report
1453
+ return input_data, total_in, total_out
1454
+
1455
+ def _response_contains_questions(self, response: str) -> bool:
1456
+ """Check if response contains question patterns indicating non-compliance."""
1457
+ if not response:
1458
+ return True
1459
+
1460
+ # Check first 500 chars for question patterns
1461
+ first_part = response[:500].lower()
1462
+
1463
+ question_patterns = [
1464
+ "let me ask",
1465
+ "what's your",
1466
+ "what is your",
1467
+ "would you like",
1468
+ "do you have",
1469
+ "could you",
1470
+ "can you",
1471
+ "clarifying question",
1472
+ "before i generate",
1473
+ "before generating",
1474
+ "i need to know",
1475
+ "please provide",
1476
+ "please clarify",
1477
+ "?", # Questions in first 500 chars is suspicious
1478
+ ]
1479
+
1480
+ # Also check if it doesn't start with expected format
1481
+ if not response.strip().startswith("#"):
1482
+ return True
1483
+
1484
+ return any(pattern in first_part for pattern in question_patterns)
1485
+
1486
+ def _generate_coverage_table(self, generated_tests: list[dict]) -> str:
1487
+ """Generate a simple coverage table for the retry prompt."""
1488
+ lines = []
1489
+ for item in generated_tests[:10]:
1490
+ file_name = Path(item["source_file"]).name
1491
+ test_count = item.get("test_count", 0)
1492
+ lines.append(f"| {file_name} | {test_count} tests | Basic coverage |")
1493
+ return "| File | Tests | Coverage |\n|------|-------|----------|\n" + "\n".join(lines)
1494
+
1495
+ def _generate_fallback_report(self, generated_tests: list[dict], total_test_count: int) -> str:
1496
+ """Generate a programmatic fallback report when LLM fails to comply."""
1497
+ lines = ["# Test Gap Analysis Report", ""]
1498
+ lines.append("## Executive Summary")
1499
+ lines.append("| Metric | Value |")
1500
+ lines.append("|--------|-------|")
1501
+ lines.append(f"| **Total Test Functions** | {total_test_count} |")
1502
+ lines.append(f"| **Files Covered** | {len(generated_tests)} |")
1503
+
1504
+ # Count classes and functions
1505
+ total_classes = sum(
1506
+ len([t for t in item.get("tests", []) if t.get("type") == "class"])
1507
+ for item in generated_tests
1508
+ )
1509
+ total_functions = sum(
1510
+ len([t for t in item.get("tests", []) if t.get("type") == "function"])
1511
+ for item in generated_tests
1512
+ )
1513
+ lines.append(f"| **Classes Tested** | {total_classes} |")
1514
+ lines.append(f"| **Functions Tested** | {total_functions} |")
1515
+ lines.append("")
1516
+
1517
+ lines.append("## Coverage by File")
1518
+ lines.append("| File | Tests | Targets |")
1519
+ lines.append("|------|-------|---------|")
1520
+ for item in generated_tests:
1521
+ file_name = Path(item["source_file"]).name
1522
+ test_count = item.get("test_count", 0)
1523
+ targets = ", ".join(t.get("target", "?") for t in item.get("tests", [])[:3])
1524
+ if len(item.get("tests", [])) > 3:
1525
+ targets += "..."
1526
+ lines.append(f"| {file_name} | {test_count} | {targets} |")
1527
+ lines.append("")
1528
+
1529
+ lines.append("## Identified Gaps")
1530
+ lines.append("- **HIGH**: Missing error/exception handling tests")
1531
+ lines.append("- **MEDIUM**: Missing boundary condition tests")
1532
+ lines.append("- **MEDIUM**: Missing async behavior tests")
1533
+ lines.append("- **LOW**: Missing integration tests")
1534
+ lines.append("")
1535
+
1536
+ lines.append("## Prioritized Recommendations")
1537
+ lines.append("1. Add `pytest.raises` tests for each function that can throw exceptions")
1538
+ lines.append("2. Add edge case tests (empty inputs, None values, large data)")
1539
+ lines.append("3. Add concurrent/async tests for async functions")
1540
+ lines.append("4. Add integration tests between related classes")
1541
+
1542
+ return "\n".join(lines)
1543
+
1544
+ def get_max_tokens(self, stage_name: str) -> int:
1545
+ """Get the maximum token limit for a stage."""
1546
+ # Default to 4096
1547
+ return 4096
1548
+
1549
+
1550
+ def format_test_gen_report(result: dict, input_data: dict) -> str:
1551
+ """Format test generation output as a human-readable report.
1552
+
1553
+ Args:
1554
+ result: The review stage result
1555
+ input_data: Input data from previous stages
1556
+
1557
+ Returns:
1558
+ Formatted report string
1559
+
1560
+ """
1561
+ import re
1562
+
1563
+ lines = []
1564
+
1565
+ # Header
1566
+ total_tests = result.get("total_tests", 0)
1567
+ files_covered = result.get("files_covered", 0)
1568
+
1569
+ lines.append("=" * 60)
1570
+ lines.append("TEST GAP ANALYSIS REPORT")
1571
+ lines.append("=" * 60)
1572
+ lines.append("")
1573
+
1574
+ # Summary stats
1575
+ total_candidates = input_data.get("total_candidates", 0)
1576
+ hotspot_count = input_data.get("hotspot_count", 0)
1577
+ untested_count = input_data.get("untested_count", 0)
1578
+
1579
+ lines.append("-" * 60)
1580
+ lines.append("SUMMARY")
1581
+ lines.append("-" * 60)
1582
+ lines.append(f"Tests Generated: {total_tests}")
1583
+ lines.append(f"Files Covered: {files_covered}")
1584
+ lines.append(f"Total Candidates: {total_candidates}")
1585
+ lines.append(f"Bug Hotspots Found: {hotspot_count}")
1586
+ lines.append(f"Untested Files: {untested_count}")
1587
+ lines.append("")
1588
+
1589
+ # Status indicator
1590
+ if total_tests == 0:
1591
+ lines.append("⚠️ No tests were generated")
1592
+ elif total_tests < 5:
1593
+ lines.append(f"🟡 Generated {total_tests} test(s) - consider adding more coverage")
1594
+ elif total_tests < 20:
1595
+ lines.append(f"🟢 Generated {total_tests} tests - good coverage")
1596
+ else:
1597
+ lines.append(f"✅ Generated {total_tests} tests - excellent coverage")
1598
+ lines.append("")
1599
+
1600
+ # Scope notice for enterprise clarity
1601
+ total_source = input_data.get("total_source_files", 0)
1602
+ existing_tests = input_data.get("existing_test_files", 0)
1603
+ coverage_pct = input_data.get("analysis_coverage_percent", 100)
1604
+ large_project = input_data.get("large_project_warning", False)
1605
+
1606
+ if total_source > 0 or existing_tests > 0:
1607
+ lines.append("-" * 60)
1608
+ lines.append("SCOPE NOTICE")
1609
+ lines.append("-" * 60)
1610
+
1611
+ if large_project:
1612
+ lines.append("⚠️ LARGE PROJECT: Only high-priority files analyzed")
1613
+ lines.append(f" Coverage: {coverage_pct:.0f}% of candidate files")
1614
+ lines.append("")
1615
+
1616
+ lines.append(f"Source Files Found: {total_source}")
1617
+ lines.append(f"Existing Test Files: {existing_tests}")
1618
+ lines.append(f"Files Analyzed: {files_covered}")
1619
+
1620
+ if existing_tests > 0:
1621
+ lines.append("")
1622
+ lines.append("Note: This report identifies gaps in untested files.")
1623
+ lines.append("Run 'pytest --co -q' for full test suite statistics.")
1624
+ lines.append("")
1625
+
1626
+ # Parse XML review feedback if present
1627
+ review = result.get("review_feedback", "")
1628
+ xml_summary = ""
1629
+ xml_findings = []
1630
+ xml_tests = []
1631
+ coverage_improvement = ""
1632
+
1633
+ if review and "<response>" in review:
1634
+ # Extract summary
1635
+ summary_match = re.search(r"<summary>(.*?)</summary>", review, re.DOTALL)
1636
+ if summary_match:
1637
+ xml_summary = summary_match.group(1).strip()
1638
+
1639
+ # Extract coverage improvement
1640
+ coverage_match = re.search(
1641
+ r"<coverage-improvement>(.*?)</coverage-improvement>",
1642
+ review,
1643
+ re.DOTALL,
1644
+ )
1645
+ if coverage_match:
1646
+ coverage_improvement = coverage_match.group(1).strip()
1647
+
1648
+ # Extract findings
1649
+ for finding_match in re.finditer(
1650
+ r'<finding severity="(\w+)">(.*?)</finding>',
1651
+ review,
1652
+ re.DOTALL,
1653
+ ):
1654
+ severity = finding_match.group(1)
1655
+ finding_content = finding_match.group(2)
1656
+
1657
+ title_match = re.search(r"<title>(.*?)</title>", finding_content, re.DOTALL)
1658
+ location_match = re.search(r"<location>(.*?)</location>", finding_content, re.DOTALL)
1659
+ fix_match = re.search(r"<fix>(.*?)</fix>", finding_content, re.DOTALL)
1660
+
1661
+ xml_findings.append(
1662
+ {
1663
+ "severity": severity,
1664
+ "title": title_match.group(1).strip() if title_match else "Unknown",
1665
+ "location": location_match.group(1).strip() if location_match else "",
1666
+ "fix": fix_match.group(1).strip() if fix_match else "",
1667
+ },
1668
+ )
1669
+
1670
+ # Extract suggested tests
1671
+ for test_match in re.finditer(r'<test target="([^"]+)">(.*?)</test>', review, re.DOTALL):
1672
+ target = test_match.group(1)
1673
+ test_content = test_match.group(2)
1674
+
1675
+ type_match = re.search(r"<type>(.*?)</type>", test_content, re.DOTALL)
1676
+ desc_match = re.search(r"<description>(.*?)</description>", test_content, re.DOTALL)
1677
+
1678
+ xml_tests.append(
1679
+ {
1680
+ "target": target,
1681
+ "type": type_match.group(1).strip() if type_match else "unit",
1682
+ "description": desc_match.group(1).strip() if desc_match else "",
1683
+ },
1684
+ )
1685
+
1686
+ # Show parsed summary
1687
+ if xml_summary:
1688
+ lines.append("-" * 60)
1689
+ lines.append("QUALITY ASSESSMENT")
1690
+ lines.append("-" * 60)
1691
+ # Word wrap the summary
1692
+ words = xml_summary.split()
1693
+ current_line = ""
1694
+ for word in words:
1695
+ if len(current_line) + len(word) + 1 <= 58:
1696
+ current_line += (" " if current_line else "") + word
1697
+ else:
1698
+ lines.append(current_line)
1699
+ current_line = word
1700
+ if current_line:
1701
+ lines.append(current_line)
1702
+ lines.append("")
1703
+
1704
+ if coverage_improvement:
1705
+ lines.append(f"📈 {coverage_improvement}")
1706
+ lines.append("")
1707
+
1708
+ # Show findings by severity
1709
+ if xml_findings:
1710
+ lines.append("-" * 60)
1711
+ lines.append("QUALITY FINDINGS")
1712
+ lines.append("-" * 60)
1713
+
1714
+ severity_emoji = {"high": "🔴", "medium": "🟠", "low": "🟡", "info": "🔵"}
1715
+ severity_order = {"high": 0, "medium": 1, "low": 2, "info": 3}
1716
+
1717
+ sorted_findings = sorted(xml_findings, key=lambda f: severity_order.get(f["severity"], 4))
1718
+
1719
+ for finding in sorted_findings:
1720
+ emoji = severity_emoji.get(finding["severity"], "⚪")
1721
+ lines.append(f"{emoji} [{finding['severity'].upper()}] {finding['title']}")
1722
+ if finding["location"]:
1723
+ lines.append(f" Location: {finding['location']}")
1724
+ if finding["fix"]:
1725
+ # Truncate long fix recommendations
1726
+ fix_text = finding["fix"]
1727
+ if len(fix_text) > 70:
1728
+ fix_text = fix_text[:67] + "..."
1729
+ lines.append(f" Fix: {fix_text}")
1730
+ lines.append("")
1731
+
1732
+ # Show suggested tests
1733
+ if xml_tests:
1734
+ lines.append("-" * 60)
1735
+ lines.append("SUGGESTED TESTS TO ADD")
1736
+ lines.append("-" * 60)
1737
+
1738
+ for i, test in enumerate(xml_tests[:5], 1): # Limit to 5
1739
+ lines.append(f"{i}. {test['target']} ({test['type']})")
1740
+ if test["description"]:
1741
+ desc = test["description"]
1742
+ if len(desc) > 55:
1743
+ desc = desc[:52] + "..."
1744
+ lines.append(f" {desc}")
1745
+ lines.append("")
1746
+
1747
+ if len(xml_tests) > 5:
1748
+ lines.append(f" ... and {len(xml_tests) - 5} more suggested tests")
1749
+ lines.append("")
1750
+
1751
+ # Generated tests breakdown (if no XML data)
1752
+ generated_tests = input_data.get("generated_tests", [])
1753
+ if generated_tests and not xml_findings:
1754
+ lines.append("-" * 60)
1755
+ lines.append("GENERATED TESTS BY FILE")
1756
+ lines.append("-" * 60)
1757
+ for test_file in generated_tests[:10]: # Limit display
1758
+ source = test_file.get("source_file", "unknown")
1759
+ test_count = test_file.get("test_count", 0)
1760
+ # Shorten path for display
1761
+ if len(source) > 50:
1762
+ source = "..." + source[-47:]
1763
+ lines.append(f" 📁 {source}")
1764
+ lines.append(
1765
+ f" └─ {test_count} test(s) → {test_file.get('test_file', 'test_*.py')}",
1766
+ )
1767
+ if len(generated_tests) > 10:
1768
+ lines.append(f" ... and {len(generated_tests) - 10} more files")
1769
+ lines.append("")
1770
+
1771
+ # Written files section
1772
+ written_files = input_data.get("written_files", [])
1773
+ if written_files:
1774
+ lines.append("-" * 60)
1775
+ lines.append("TESTS WRITTEN TO DISK")
1776
+ lines.append("-" * 60)
1777
+ for file_path in written_files[:10]:
1778
+ # Shorten path for display
1779
+ if len(file_path) > 55:
1780
+ file_path = "..." + file_path[-52:]
1781
+ lines.append(f" ✅ {file_path}")
1782
+ if len(written_files) > 10:
1783
+ lines.append(f" ... and {len(written_files) - 10} more files")
1784
+ lines.append("")
1785
+ lines.append(" Run: pytest <file> to execute these tests")
1786
+ lines.append("")
1787
+ elif input_data.get("tests_written") is False and total_tests > 0:
1788
+ lines.append("-" * 60)
1789
+ lines.append("GENERATED TESTS (NOT WRITTEN)")
1790
+ lines.append("-" * 60)
1791
+ lines.append(" ⚠️ Tests were generated but not written to disk.")
1792
+ lines.append(" To write tests, run with: write_tests=True")
1793
+ lines.append("")
1794
+
1795
+ # Recommendations
1796
+ lines.append("-" * 60)
1797
+ lines.append("NEXT STEPS")
1798
+ lines.append("-" * 60)
1799
+
1800
+ high_findings = len([f for f in xml_findings if f["severity"] == "high"])
1801
+ medium_findings = len([f for f in xml_findings if f["severity"] == "medium"])
1802
+
1803
+ if high_findings > 0:
1804
+ lines.append(f" 🔴 Address {high_findings} high-priority finding(s) first")
1805
+
1806
+ if medium_findings > 0:
1807
+ lines.append(f" 🟠 Review {medium_findings} medium-priority finding(s)")
1808
+
1809
+ if xml_tests:
1810
+ lines.append(f" 📝 Consider adding {len(xml_tests)} suggested test(s)")
1811
+
1812
+ if hotspot_count > 0:
1813
+ lines.append(f" 🔥 {hotspot_count} bug hotspot file(s) need priority testing")
1814
+
1815
+ if untested_count > 0:
1816
+ lines.append(f" 📁 {untested_count} file(s) have no existing tests")
1817
+
1818
+ if not any([high_findings, medium_findings, xml_tests, hotspot_count, untested_count]):
1819
+ lines.append(" ✅ Test suite is in good shape!")
1820
+
1821
+ lines.append("")
1822
+
1823
+ # Footer
1824
+ lines.append("=" * 60)
1825
+ model_tier = result.get("model_tier_used", "unknown")
1826
+ lines.append(f"Review completed using {model_tier} tier model")
1827
+ lines.append("=" * 60)
1828
+
1829
+ return "\n".join(lines)
1830
+
1831
+
1832
+ def main():
1833
+ """CLI entry point for test generation workflow."""
1834
+ import asyncio
1835
+
1836
+ async def run():
1837
+ workflow = TestGenerationWorkflow()
1838
+ result = await workflow.execute(path=".", file_types=[".py"])
1839
+
1840
+ print("\nTest Generation Results")
1841
+ print("=" * 50)
1842
+ print(f"Provider: {result.provider}")
1843
+ print(f"Success: {result.success}")
1844
+ print(f"Tests Generated: {result.final_output.get('total_tests', 0)}")
1845
+ print("\nCost Report:")
1846
+ print(f" Total Cost: ${result.cost_report.total_cost:.4f}")
1847
+ savings = result.cost_report.savings
1848
+ pct = result.cost_report.savings_percent
1849
+ print(f" Savings: ${savings:.4f} ({pct:.1f}%)")
1850
+
1851
+ asyncio.run(run())
1852
+
1853
+
1854
+ if __name__ == "__main__":
1855
+ main()