attune-ai 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- attune/__init__.py +358 -0
- attune/adaptive/__init__.py +13 -0
- attune/adaptive/task_complexity.py +127 -0
- attune/agent_monitoring.py +414 -0
- attune/cache/__init__.py +117 -0
- attune/cache/base.py +166 -0
- attune/cache/dependency_manager.py +256 -0
- attune/cache/hash_only.py +251 -0
- attune/cache/hybrid.py +457 -0
- attune/cache/storage.py +285 -0
- attune/cache_monitor.py +356 -0
- attune/cache_stats.py +298 -0
- attune/cli/__init__.py +152 -0
- attune/cli/__main__.py +12 -0
- attune/cli/commands/__init__.py +1 -0
- attune/cli/commands/batch.py +264 -0
- attune/cli/commands/cache.py +248 -0
- attune/cli/commands/help.py +331 -0
- attune/cli/commands/info.py +140 -0
- attune/cli/commands/inspect.py +436 -0
- attune/cli/commands/inspection.py +57 -0
- attune/cli/commands/memory.py +48 -0
- attune/cli/commands/metrics.py +92 -0
- attune/cli/commands/orchestrate.py +184 -0
- attune/cli/commands/patterns.py +207 -0
- attune/cli/commands/profiling.py +202 -0
- attune/cli/commands/provider.py +98 -0
- attune/cli/commands/routing.py +285 -0
- attune/cli/commands/setup.py +96 -0
- attune/cli/commands/status.py +235 -0
- attune/cli/commands/sync.py +166 -0
- attune/cli/commands/tier.py +121 -0
- attune/cli/commands/utilities.py +114 -0
- attune/cli/commands/workflow.py +579 -0
- attune/cli/core.py +32 -0
- attune/cli/parsers/__init__.py +68 -0
- attune/cli/parsers/batch.py +118 -0
- attune/cli/parsers/cache.py +65 -0
- attune/cli/parsers/help.py +41 -0
- attune/cli/parsers/info.py +26 -0
- attune/cli/parsers/inspect.py +66 -0
- attune/cli/parsers/metrics.py +42 -0
- attune/cli/parsers/orchestrate.py +61 -0
- attune/cli/parsers/patterns.py +54 -0
- attune/cli/parsers/provider.py +40 -0
- attune/cli/parsers/routing.py +110 -0
- attune/cli/parsers/setup.py +42 -0
- attune/cli/parsers/status.py +47 -0
- attune/cli/parsers/sync.py +31 -0
- attune/cli/parsers/tier.py +33 -0
- attune/cli/parsers/workflow.py +77 -0
- attune/cli/utils/__init__.py +1 -0
- attune/cli/utils/data.py +242 -0
- attune/cli/utils/helpers.py +68 -0
- attune/cli_legacy.py +3957 -0
- attune/cli_minimal.py +1159 -0
- attune/cli_router.py +437 -0
- attune/cli_unified.py +814 -0
- attune/config/__init__.py +66 -0
- attune/config/xml_config.py +286 -0
- attune/config.py +545 -0
- attune/coordination.py +870 -0
- attune/core.py +1511 -0
- attune/core_modules/__init__.py +15 -0
- attune/cost_tracker.py +626 -0
- attune/dashboard/__init__.py +41 -0
- attune/dashboard/app.py +512 -0
- attune/dashboard/simple_server.py +435 -0
- attune/dashboard/standalone_server.py +547 -0
- attune/discovery.py +306 -0
- attune/emergence.py +306 -0
- attune/exceptions.py +123 -0
- attune/feedback_loops.py +373 -0
- attune/hot_reload/README.md +473 -0
- attune/hot_reload/__init__.py +62 -0
- attune/hot_reload/config.py +83 -0
- attune/hot_reload/integration.py +229 -0
- attune/hot_reload/reloader.py +298 -0
- attune/hot_reload/watcher.py +183 -0
- attune/hot_reload/websocket.py +177 -0
- attune/levels.py +577 -0
- attune/leverage_points.py +441 -0
- attune/logging_config.py +261 -0
- attune/mcp/__init__.py +10 -0
- attune/mcp/server.py +506 -0
- attune/memory/__init__.py +237 -0
- attune/memory/claude_memory.py +469 -0
- attune/memory/config.py +224 -0
- attune/memory/control_panel.py +1290 -0
- attune/memory/control_panel_support.py +145 -0
- attune/memory/cross_session.py +845 -0
- attune/memory/edges.py +179 -0
- attune/memory/encryption.py +159 -0
- attune/memory/file_session.py +770 -0
- attune/memory/graph.py +570 -0
- attune/memory/long_term.py +913 -0
- attune/memory/long_term_types.py +99 -0
- attune/memory/mixins/__init__.py +25 -0
- attune/memory/mixins/backend_init_mixin.py +249 -0
- attune/memory/mixins/capabilities_mixin.py +208 -0
- attune/memory/mixins/handoff_mixin.py +208 -0
- attune/memory/mixins/lifecycle_mixin.py +49 -0
- attune/memory/mixins/long_term_mixin.py +352 -0
- attune/memory/mixins/promotion_mixin.py +109 -0
- attune/memory/mixins/short_term_mixin.py +182 -0
- attune/memory/nodes.py +179 -0
- attune/memory/redis_bootstrap.py +540 -0
- attune/memory/security/__init__.py +31 -0
- attune/memory/security/audit_logger.py +932 -0
- attune/memory/security/pii_scrubber.py +640 -0
- attune/memory/security/secrets_detector.py +678 -0
- attune/memory/short_term.py +2192 -0
- attune/memory/simple_storage.py +302 -0
- attune/memory/storage/__init__.py +15 -0
- attune/memory/storage_backend.py +167 -0
- attune/memory/summary_index.py +583 -0
- attune/memory/types.py +446 -0
- attune/memory/unified.py +182 -0
- attune/meta_workflows/__init__.py +74 -0
- attune/meta_workflows/agent_creator.py +248 -0
- attune/meta_workflows/builtin_templates.py +567 -0
- attune/meta_workflows/cli_commands/__init__.py +56 -0
- attune/meta_workflows/cli_commands/agent_commands.py +321 -0
- attune/meta_workflows/cli_commands/analytics_commands.py +442 -0
- attune/meta_workflows/cli_commands/config_commands.py +232 -0
- attune/meta_workflows/cli_commands/memory_commands.py +182 -0
- attune/meta_workflows/cli_commands/template_commands.py +354 -0
- attune/meta_workflows/cli_commands/workflow_commands.py +382 -0
- attune/meta_workflows/cli_meta_workflows.py +59 -0
- attune/meta_workflows/form_engine.py +292 -0
- attune/meta_workflows/intent_detector.py +409 -0
- attune/meta_workflows/models.py +569 -0
- attune/meta_workflows/pattern_learner.py +738 -0
- attune/meta_workflows/plan_generator.py +384 -0
- attune/meta_workflows/session_context.py +397 -0
- attune/meta_workflows/template_registry.py +229 -0
- attune/meta_workflows/workflow.py +984 -0
- attune/metrics/__init__.py +12 -0
- attune/metrics/collector.py +31 -0
- attune/metrics/prompt_metrics.py +194 -0
- attune/models/__init__.py +172 -0
- attune/models/__main__.py +13 -0
- attune/models/adaptive_routing.py +437 -0
- attune/models/auth_cli.py +444 -0
- attune/models/auth_strategy.py +450 -0
- attune/models/cli.py +655 -0
- attune/models/empathy_executor.py +354 -0
- attune/models/executor.py +257 -0
- attune/models/fallback.py +762 -0
- attune/models/provider_config.py +282 -0
- attune/models/registry.py +472 -0
- attune/models/tasks.py +359 -0
- attune/models/telemetry/__init__.py +71 -0
- attune/models/telemetry/analytics.py +594 -0
- attune/models/telemetry/backend.py +196 -0
- attune/models/telemetry/data_models.py +431 -0
- attune/models/telemetry/storage.py +489 -0
- attune/models/token_estimator.py +420 -0
- attune/models/validation.py +280 -0
- attune/monitoring/__init__.py +52 -0
- attune/monitoring/alerts.py +946 -0
- attune/monitoring/alerts_cli.py +448 -0
- attune/monitoring/multi_backend.py +271 -0
- attune/monitoring/otel_backend.py +362 -0
- attune/optimization/__init__.py +19 -0
- attune/optimization/context_optimizer.py +272 -0
- attune/orchestration/__init__.py +67 -0
- attune/orchestration/agent_templates.py +707 -0
- attune/orchestration/config_store.py +499 -0
- attune/orchestration/execution_strategies.py +2111 -0
- attune/orchestration/meta_orchestrator.py +1168 -0
- attune/orchestration/pattern_learner.py +696 -0
- attune/orchestration/real_tools.py +931 -0
- attune/pattern_cache.py +187 -0
- attune/pattern_library.py +542 -0
- attune/patterns/debugging/all_patterns.json +81 -0
- attune/patterns/debugging/workflow_20260107_1770825e.json +77 -0
- attune/patterns/refactoring_memory.json +89 -0
- attune/persistence.py +564 -0
- attune/platform_utils.py +265 -0
- attune/plugins/__init__.py +28 -0
- attune/plugins/base.py +361 -0
- attune/plugins/registry.py +268 -0
- attune/project_index/__init__.py +32 -0
- attune/project_index/cli.py +335 -0
- attune/project_index/index.py +667 -0
- attune/project_index/models.py +504 -0
- attune/project_index/reports.py +474 -0
- attune/project_index/scanner.py +777 -0
- attune/project_index/scanner_parallel.py +291 -0
- attune/prompts/__init__.py +61 -0
- attune/prompts/config.py +77 -0
- attune/prompts/context.py +177 -0
- attune/prompts/parser.py +285 -0
- attune/prompts/registry.py +313 -0
- attune/prompts/templates.py +208 -0
- attune/redis_config.py +302 -0
- attune/redis_memory.py +799 -0
- attune/resilience/__init__.py +56 -0
- attune/resilience/circuit_breaker.py +256 -0
- attune/resilience/fallback.py +179 -0
- attune/resilience/health.py +300 -0
- attune/resilience/retry.py +209 -0
- attune/resilience/timeout.py +135 -0
- attune/routing/__init__.py +43 -0
- attune/routing/chain_executor.py +433 -0
- attune/routing/classifier.py +217 -0
- attune/routing/smart_router.py +234 -0
- attune/routing/workflow_registry.py +343 -0
- attune/scaffolding/README.md +589 -0
- attune/scaffolding/__init__.py +35 -0
- attune/scaffolding/__main__.py +14 -0
- attune/scaffolding/cli.py +240 -0
- attune/scaffolding/templates/base_wizard.py.jinja2 +121 -0
- attune/scaffolding/templates/coach_wizard.py.jinja2 +321 -0
- attune/scaffolding/templates/domain_wizard.py.jinja2 +408 -0
- attune/scaffolding/templates/linear_flow_wizard.py.jinja2 +203 -0
- attune/socratic/__init__.py +256 -0
- attune/socratic/ab_testing.py +958 -0
- attune/socratic/blueprint.py +533 -0
- attune/socratic/cli.py +703 -0
- attune/socratic/collaboration.py +1114 -0
- attune/socratic/domain_templates.py +924 -0
- attune/socratic/embeddings.py +738 -0
- attune/socratic/engine.py +794 -0
- attune/socratic/explainer.py +682 -0
- attune/socratic/feedback.py +772 -0
- attune/socratic/forms.py +629 -0
- attune/socratic/generator.py +732 -0
- attune/socratic/llm_analyzer.py +637 -0
- attune/socratic/mcp_server.py +702 -0
- attune/socratic/session.py +312 -0
- attune/socratic/storage.py +667 -0
- attune/socratic/success.py +730 -0
- attune/socratic/visual_editor.py +860 -0
- attune/socratic/web_ui.py +958 -0
- attune/telemetry/__init__.py +39 -0
- attune/telemetry/agent_coordination.py +475 -0
- attune/telemetry/agent_tracking.py +367 -0
- attune/telemetry/approval_gates.py +545 -0
- attune/telemetry/cli.py +1231 -0
- attune/telemetry/commands/__init__.py +14 -0
- attune/telemetry/commands/dashboard_commands.py +696 -0
- attune/telemetry/event_streaming.py +409 -0
- attune/telemetry/feedback_loop.py +567 -0
- attune/telemetry/usage_tracker.py +591 -0
- attune/templates.py +754 -0
- attune/test_generator/__init__.py +38 -0
- attune/test_generator/__main__.py +14 -0
- attune/test_generator/cli.py +234 -0
- attune/test_generator/generator.py +355 -0
- attune/test_generator/risk_analyzer.py +216 -0
- attune/test_generator/templates/unit_test.py.jinja2 +272 -0
- attune/tier_recommender.py +384 -0
- attune/tools.py +183 -0
- attune/trust/__init__.py +28 -0
- attune/trust/circuit_breaker.py +579 -0
- attune/trust_building.py +527 -0
- attune/validation/__init__.py +19 -0
- attune/validation/xml_validator.py +281 -0
- attune/vscode_bridge.py +173 -0
- attune/workflow_commands.py +780 -0
- attune/workflow_patterns/__init__.py +33 -0
- attune/workflow_patterns/behavior.py +249 -0
- attune/workflow_patterns/core.py +76 -0
- attune/workflow_patterns/output.py +99 -0
- attune/workflow_patterns/registry.py +255 -0
- attune/workflow_patterns/structural.py +288 -0
- attune/workflows/__init__.py +539 -0
- attune/workflows/autonomous_test_gen.py +1268 -0
- attune/workflows/base.py +2667 -0
- attune/workflows/batch_processing.py +342 -0
- attune/workflows/bug_predict.py +1084 -0
- attune/workflows/builder.py +273 -0
- attune/workflows/caching.py +253 -0
- attune/workflows/code_review.py +1048 -0
- attune/workflows/code_review_adapters.py +312 -0
- attune/workflows/code_review_pipeline.py +722 -0
- attune/workflows/config.py +645 -0
- attune/workflows/dependency_check.py +644 -0
- attune/workflows/document_gen/__init__.py +25 -0
- attune/workflows/document_gen/config.py +30 -0
- attune/workflows/document_gen/report_formatter.py +162 -0
- attune/workflows/document_gen/workflow.py +1426 -0
- attune/workflows/document_manager.py +216 -0
- attune/workflows/document_manager_README.md +134 -0
- attune/workflows/documentation_orchestrator.py +1205 -0
- attune/workflows/history.py +510 -0
- attune/workflows/keyboard_shortcuts/__init__.py +39 -0
- attune/workflows/keyboard_shortcuts/generators.py +391 -0
- attune/workflows/keyboard_shortcuts/parsers.py +416 -0
- attune/workflows/keyboard_shortcuts/prompts.py +295 -0
- attune/workflows/keyboard_shortcuts/schema.py +193 -0
- attune/workflows/keyboard_shortcuts/workflow.py +509 -0
- attune/workflows/llm_base.py +363 -0
- attune/workflows/manage_docs.py +87 -0
- attune/workflows/manage_docs_README.md +134 -0
- attune/workflows/manage_documentation.py +821 -0
- attune/workflows/new_sample_workflow1.py +149 -0
- attune/workflows/new_sample_workflow1_README.md +150 -0
- attune/workflows/orchestrated_health_check.py +849 -0
- attune/workflows/orchestrated_release_prep.py +600 -0
- attune/workflows/output.py +413 -0
- attune/workflows/perf_audit.py +863 -0
- attune/workflows/pr_review.py +762 -0
- attune/workflows/progress.py +785 -0
- attune/workflows/progress_server.py +322 -0
- attune/workflows/progressive/README 2.md +454 -0
- attune/workflows/progressive/README.md +454 -0
- attune/workflows/progressive/__init__.py +82 -0
- attune/workflows/progressive/cli.py +219 -0
- attune/workflows/progressive/core.py +488 -0
- attune/workflows/progressive/orchestrator.py +723 -0
- attune/workflows/progressive/reports.py +520 -0
- attune/workflows/progressive/telemetry.py +274 -0
- attune/workflows/progressive/test_gen.py +495 -0
- attune/workflows/progressive/workflow.py +589 -0
- attune/workflows/refactor_plan.py +694 -0
- attune/workflows/release_prep.py +895 -0
- attune/workflows/release_prep_crew.py +969 -0
- attune/workflows/research_synthesis.py +404 -0
- attune/workflows/routing.py +168 -0
- attune/workflows/secure_release.py +593 -0
- attune/workflows/security_adapters.py +297 -0
- attune/workflows/security_audit.py +1329 -0
- attune/workflows/security_audit_phase3.py +355 -0
- attune/workflows/seo_optimization.py +633 -0
- attune/workflows/step_config.py +234 -0
- attune/workflows/telemetry_mixin.py +269 -0
- attune/workflows/test5.py +125 -0
- attune/workflows/test5_README.md +158 -0
- attune/workflows/test_coverage_boost_crew.py +849 -0
- attune/workflows/test_gen/__init__.py +52 -0
- attune/workflows/test_gen/ast_analyzer.py +249 -0
- attune/workflows/test_gen/config.py +88 -0
- attune/workflows/test_gen/data_models.py +38 -0
- attune/workflows/test_gen/report_formatter.py +289 -0
- attune/workflows/test_gen/test_templates.py +381 -0
- attune/workflows/test_gen/workflow.py +655 -0
- attune/workflows/test_gen.py +54 -0
- attune/workflows/test_gen_behavioral.py +477 -0
- attune/workflows/test_gen_parallel.py +341 -0
- attune/workflows/test_lifecycle.py +526 -0
- attune/workflows/test_maintenance.py +627 -0
- attune/workflows/test_maintenance_cli.py +590 -0
- attune/workflows/test_maintenance_crew.py +840 -0
- attune/workflows/test_runner.py +622 -0
- attune/workflows/tier_tracking.py +531 -0
- attune/workflows/xml_enhanced_crew.py +285 -0
- attune_ai-2.0.0.dist-info/METADATA +1026 -0
- attune_ai-2.0.0.dist-info/RECORD +457 -0
- attune_ai-2.0.0.dist-info/WHEEL +5 -0
- attune_ai-2.0.0.dist-info/entry_points.txt +26 -0
- attune_ai-2.0.0.dist-info/licenses/LICENSE +201 -0
- attune_ai-2.0.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
- attune_ai-2.0.0.dist-info/top_level.txt +5 -0
- attune_healthcare/__init__.py +13 -0
- attune_healthcare/monitors/__init__.py +9 -0
- attune_healthcare/monitors/clinical_protocol_monitor.py +315 -0
- attune_healthcare/monitors/monitoring/__init__.py +44 -0
- attune_healthcare/monitors/monitoring/protocol_checker.py +300 -0
- attune_healthcare/monitors/monitoring/protocol_loader.py +214 -0
- attune_healthcare/monitors/monitoring/sensor_parsers.py +306 -0
- attune_healthcare/monitors/monitoring/trajectory_analyzer.py +389 -0
- attune_llm/README.md +553 -0
- attune_llm/__init__.py +28 -0
- attune_llm/agent_factory/__init__.py +53 -0
- attune_llm/agent_factory/adapters/__init__.py +85 -0
- attune_llm/agent_factory/adapters/autogen_adapter.py +312 -0
- attune_llm/agent_factory/adapters/crewai_adapter.py +483 -0
- attune_llm/agent_factory/adapters/haystack_adapter.py +298 -0
- attune_llm/agent_factory/adapters/langchain_adapter.py +362 -0
- attune_llm/agent_factory/adapters/langgraph_adapter.py +333 -0
- attune_llm/agent_factory/adapters/native.py +228 -0
- attune_llm/agent_factory/adapters/wizard_adapter.py +423 -0
- attune_llm/agent_factory/base.py +305 -0
- attune_llm/agent_factory/crews/__init__.py +67 -0
- attune_llm/agent_factory/crews/code_review.py +1113 -0
- attune_llm/agent_factory/crews/health_check.py +1262 -0
- attune_llm/agent_factory/crews/refactoring.py +1128 -0
- attune_llm/agent_factory/crews/security_audit.py +1018 -0
- attune_llm/agent_factory/decorators.py +287 -0
- attune_llm/agent_factory/factory.py +558 -0
- attune_llm/agent_factory/framework.py +193 -0
- attune_llm/agent_factory/memory_integration.py +328 -0
- attune_llm/agent_factory/resilient.py +320 -0
- attune_llm/agents_md/__init__.py +22 -0
- attune_llm/agents_md/loader.py +218 -0
- attune_llm/agents_md/parser.py +271 -0
- attune_llm/agents_md/registry.py +307 -0
- attune_llm/claude_memory.py +466 -0
- attune_llm/cli/__init__.py +8 -0
- attune_llm/cli/sync_claude.py +487 -0
- attune_llm/code_health.py +1313 -0
- attune_llm/commands/__init__.py +51 -0
- attune_llm/commands/context.py +375 -0
- attune_llm/commands/loader.py +301 -0
- attune_llm/commands/models.py +231 -0
- attune_llm/commands/parser.py +371 -0
- attune_llm/commands/registry.py +429 -0
- attune_llm/config/__init__.py +29 -0
- attune_llm/config/unified.py +291 -0
- attune_llm/context/__init__.py +22 -0
- attune_llm/context/compaction.py +455 -0
- attune_llm/context/manager.py +434 -0
- attune_llm/contextual_patterns.py +361 -0
- attune_llm/core.py +907 -0
- attune_llm/git_pattern_extractor.py +435 -0
- attune_llm/hooks/__init__.py +24 -0
- attune_llm/hooks/config.py +306 -0
- attune_llm/hooks/executor.py +289 -0
- attune_llm/hooks/registry.py +302 -0
- attune_llm/hooks/scripts/__init__.py +39 -0
- attune_llm/hooks/scripts/evaluate_session.py +201 -0
- attune_llm/hooks/scripts/first_time_init.py +285 -0
- attune_llm/hooks/scripts/pre_compact.py +207 -0
- attune_llm/hooks/scripts/session_end.py +183 -0
- attune_llm/hooks/scripts/session_start.py +163 -0
- attune_llm/hooks/scripts/suggest_compact.py +225 -0
- attune_llm/learning/__init__.py +30 -0
- attune_llm/learning/evaluator.py +438 -0
- attune_llm/learning/extractor.py +514 -0
- attune_llm/learning/storage.py +560 -0
- attune_llm/levels.py +227 -0
- attune_llm/pattern_confidence.py +414 -0
- attune_llm/pattern_resolver.py +272 -0
- attune_llm/pattern_summary.py +350 -0
- attune_llm/providers.py +967 -0
- attune_llm/routing/__init__.py +32 -0
- attune_llm/routing/model_router.py +362 -0
- attune_llm/security/IMPLEMENTATION_SUMMARY.md +413 -0
- attune_llm/security/PHASE2_COMPLETE.md +384 -0
- attune_llm/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +271 -0
- attune_llm/security/QUICK_REFERENCE.md +316 -0
- attune_llm/security/README.md +262 -0
- attune_llm/security/__init__.py +62 -0
- attune_llm/security/audit_logger.py +929 -0
- attune_llm/security/audit_logger_example.py +152 -0
- attune_llm/security/pii_scrubber.py +640 -0
- attune_llm/security/secrets_detector.py +678 -0
- attune_llm/security/secrets_detector_example.py +304 -0
- attune_llm/security/secure_memdocs.py +1192 -0
- attune_llm/security/secure_memdocs_example.py +278 -0
- attune_llm/session_status.py +745 -0
- attune_llm/state.py +246 -0
- attune_llm/utils/__init__.py +5 -0
- attune_llm/utils/tokens.py +349 -0
- attune_software/SOFTWARE_PLUGIN_README.md +57 -0
- attune_software/__init__.py +13 -0
- attune_software/cli/__init__.py +120 -0
- attune_software/cli/inspect.py +362 -0
- attune_software/cli.py +574 -0
- attune_software/plugin.py +188 -0
- workflow_scaffolding/__init__.py +11 -0
- workflow_scaffolding/__main__.py +12 -0
- workflow_scaffolding/cli.py +206 -0
- workflow_scaffolding/generator.py +265 -0
|
@@ -0,0 +1,1268 @@
|
|
|
1
|
+
"""Autonomous Test Generation with Dashboard Integration - Enhanced Edition.
|
|
2
|
+
|
|
3
|
+
Generates behavioral tests with real-time monitoring via Agent Coordination Dashboard.
|
|
4
|
+
|
|
5
|
+
ENHANCEMENTS (Phase 1):
|
|
6
|
+
- Extended thinking mode for better test planning
|
|
7
|
+
- Prompt caching for 90% cost reduction
|
|
8
|
+
- Full source code (no truncation)
|
|
9
|
+
- Workflow-specific prompts with mocking templates
|
|
10
|
+
- Few-shot learning with examples
|
|
11
|
+
|
|
12
|
+
ENHANCEMENTS (Phase 2 - Multi-Turn Refinement):
|
|
13
|
+
- Iterative test generation with validation loop
|
|
14
|
+
- Automatic failure detection and fixing
|
|
15
|
+
- Conversation history for context preservation
|
|
16
|
+
|
|
17
|
+
ENHANCEMENTS (Phase 3 - Coverage-Guided Generation):
|
|
18
|
+
- Coverage analysis integration
|
|
19
|
+
- Iterative coverage improvement targeting uncovered lines
|
|
20
|
+
- Systematic path to 80%+ coverage
|
|
21
|
+
|
|
22
|
+
Copyright 2026 Smart-AI-Memory
|
|
23
|
+
Licensed under Apache 2.0
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import json
|
|
27
|
+
import logging
|
|
28
|
+
import re
|
|
29
|
+
import subprocess
|
|
30
|
+
import sys
|
|
31
|
+
from dataclasses import dataclass
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import Any
|
|
34
|
+
|
|
35
|
+
from attune.memory.short_term import RedisShortTermMemory
|
|
36
|
+
from attune.telemetry.agent_tracking import HeartbeatCoordinator
|
|
37
|
+
from attune.telemetry.event_streaming import EventStreamer
|
|
38
|
+
from attune.telemetry.feedback_loop import FeedbackLoop
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ValidationResult:
|
|
45
|
+
"""Result of pytest validation."""
|
|
46
|
+
passed: bool
|
|
47
|
+
failures: str
|
|
48
|
+
error_count: int
|
|
49
|
+
output: str
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class CoverageResult:
|
|
54
|
+
"""Result of coverage analysis."""
|
|
55
|
+
coverage: float
|
|
56
|
+
missing_lines: list[int]
|
|
57
|
+
total_statements: int
|
|
58
|
+
covered_statements: int
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class AutonomousTestGenerator:
|
|
62
|
+
"""Generate tests autonomously with dashboard monitoring and Anthropic best practices."""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
agent_id: str,
|
|
67
|
+
batch_num: int,
|
|
68
|
+
modules: list[dict[str, Any]],
|
|
69
|
+
enable_refinement: bool = True,
|
|
70
|
+
max_refinement_iterations: int = 3,
|
|
71
|
+
enable_coverage_guided: bool = False,
|
|
72
|
+
target_coverage: float = 0.80
|
|
73
|
+
):
|
|
74
|
+
"""Initialize generator.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
agent_id: Unique agent identifier
|
|
78
|
+
batch_num: Batch number (1-18)
|
|
79
|
+
modules: List of modules to generate tests for
|
|
80
|
+
enable_refinement: Enable Phase 2 multi-turn refinement (default: True)
|
|
81
|
+
max_refinement_iterations: Max iterations for refinement (default: 3)
|
|
82
|
+
enable_coverage_guided: Enable Phase 3 coverage-guided generation (default: False)
|
|
83
|
+
target_coverage: Target coverage percentage (default: 0.80 = 80%)
|
|
84
|
+
"""
|
|
85
|
+
self.agent_id = agent_id
|
|
86
|
+
self.batch_num = batch_num
|
|
87
|
+
self.modules = modules
|
|
88
|
+
|
|
89
|
+
# Phase 2 & 3 configuration
|
|
90
|
+
self.enable_refinement = enable_refinement
|
|
91
|
+
self.max_refinement_iterations = max_refinement_iterations
|
|
92
|
+
self.enable_coverage_guided = enable_coverage_guided
|
|
93
|
+
self.target_coverage = target_coverage
|
|
94
|
+
|
|
95
|
+
# Initialize memory backend for dashboard integration
|
|
96
|
+
try:
|
|
97
|
+
self.memory = RedisShortTermMemory()
|
|
98
|
+
self.coordinator = HeartbeatCoordinator(memory=self.memory, enable_streaming=True)
|
|
99
|
+
self.event_streamer = EventStreamer(memory=self.memory)
|
|
100
|
+
self.feedback_loop = FeedbackLoop(memory=self.memory)
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logger.warning(f"Failed to initialize memory backend: {e}")
|
|
103
|
+
self.coordinator = HeartbeatCoordinator()
|
|
104
|
+
self.event_streamer = None
|
|
105
|
+
self.feedback_loop = None
|
|
106
|
+
|
|
107
|
+
self.output_dir = Path(f"tests/behavioral/generated/batch{batch_num}")
|
|
108
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
109
|
+
|
|
110
|
+
logger.info(f"Generator initialized: refinement={enable_refinement}, coverage_guided={enable_coverage_guided}")
|
|
111
|
+
|
|
112
|
+
def generate_all(self) -> dict[str, Any]:
|
|
113
|
+
"""Generate tests for all modules with progress tracking.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Summary of generation results
|
|
117
|
+
"""
|
|
118
|
+
# Start tracking
|
|
119
|
+
self.coordinator.start_heartbeat(
|
|
120
|
+
agent_id=self.agent_id,
|
|
121
|
+
metadata={
|
|
122
|
+
"batch": self.batch_num,
|
|
123
|
+
"total_modules": len(self.modules),
|
|
124
|
+
"workflow": "autonomous_test_generation",
|
|
125
|
+
}
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
results = {
|
|
130
|
+
"batch": self.batch_num,
|
|
131
|
+
"total_modules": len(self.modules),
|
|
132
|
+
"completed": 0,
|
|
133
|
+
"failed": 0,
|
|
134
|
+
"tests_generated": 0,
|
|
135
|
+
"files_created": [],
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
for i, module in enumerate(self.modules):
|
|
139
|
+
progress = (i + 1) / len(self.modules)
|
|
140
|
+
module_name = module["file"].replace("src/attune/", "")
|
|
141
|
+
|
|
142
|
+
# Update dashboard
|
|
143
|
+
self.coordinator.beat(
|
|
144
|
+
status="running",
|
|
145
|
+
progress=progress,
|
|
146
|
+
current_task=f"Generating tests for {module_name}"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
# Generate tests for this module
|
|
151
|
+
test_file = self._generate_module_tests(module)
|
|
152
|
+
if test_file:
|
|
153
|
+
results["completed"] += 1
|
|
154
|
+
results["files_created"].append(str(test_file))
|
|
155
|
+
logger.info(f"✅ Generated tests for {module_name}")
|
|
156
|
+
|
|
157
|
+
# Send event to dashboard
|
|
158
|
+
if self.event_streamer:
|
|
159
|
+
self.event_streamer.publish_event(
|
|
160
|
+
event_type="test_file_created",
|
|
161
|
+
data={
|
|
162
|
+
"agent_id": self.agent_id,
|
|
163
|
+
"module": module_name,
|
|
164
|
+
"test_file": str(test_file),
|
|
165
|
+
"batch": self.batch_num
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Record quality feedback
|
|
170
|
+
if self.feedback_loop:
|
|
171
|
+
self.feedback_loop.record_feedback(
|
|
172
|
+
workflow_name="test-generation",
|
|
173
|
+
stage_name="generation",
|
|
174
|
+
tier="capable",
|
|
175
|
+
quality_score=1.0, # Success
|
|
176
|
+
metadata={"module": module_name, "status": "success", "batch": self.batch_num}
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
results["failed"] += 1
|
|
180
|
+
logger.warning(f"⚠️ Skipped {module_name} (validation failed)")
|
|
181
|
+
|
|
182
|
+
# Record failure feedback
|
|
183
|
+
if self.feedback_loop:
|
|
184
|
+
self.feedback_loop.record_feedback(
|
|
185
|
+
workflow_name="test-generation",
|
|
186
|
+
stage_name="validation",
|
|
187
|
+
tier="capable",
|
|
188
|
+
quality_score=0.0, # Failure
|
|
189
|
+
metadata={"module": module_name, "status": "validation_failed", "batch": self.batch_num}
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
except Exception as e:
|
|
193
|
+
results["failed"] += 1
|
|
194
|
+
logger.error(f"❌ Error generating tests for {module_name}: {e}")
|
|
195
|
+
|
|
196
|
+
# Send error event
|
|
197
|
+
if self.event_streamer:
|
|
198
|
+
self.event_streamer.publish_event(
|
|
199
|
+
event_type="test_generation_error",
|
|
200
|
+
data={
|
|
201
|
+
"agent_id": self.agent_id,
|
|
202
|
+
"module": module_name,
|
|
203
|
+
"error": str(e),
|
|
204
|
+
"batch": self.batch_num
|
|
205
|
+
}
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Count total tests
|
|
209
|
+
results["tests_generated"] = self._count_tests()
|
|
210
|
+
|
|
211
|
+
# Final update
|
|
212
|
+
self.coordinator.beat(
|
|
213
|
+
status="completed",
|
|
214
|
+
progress=1.0,
|
|
215
|
+
current_task=f"Completed: {results['completed']}/{results['total_modules']} modules"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
return results
|
|
219
|
+
|
|
220
|
+
except Exception as e:
|
|
221
|
+
# Error tracking
|
|
222
|
+
self.coordinator.beat(
|
|
223
|
+
status="failed",
|
|
224
|
+
progress=0.0,
|
|
225
|
+
current_task=f"Failed: {str(e)}"
|
|
226
|
+
)
|
|
227
|
+
raise
|
|
228
|
+
|
|
229
|
+
finally:
|
|
230
|
+
# Stop heartbeat
|
|
231
|
+
self.coordinator.stop_heartbeat(
|
|
232
|
+
final_status="completed" if results["completed"] > 0 else "failed"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
def _generate_module_tests(self, module: dict[str, Any]) -> Path | None:
|
|
236
|
+
"""Generate tests for a single module using LLM agent.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
module: Module info dict with 'file', 'total', 'missing', etc.
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Path to generated test file, or None if skipped
|
|
243
|
+
"""
|
|
244
|
+
source_file = Path(module["file"])
|
|
245
|
+
module_name = source_file.stem
|
|
246
|
+
|
|
247
|
+
# Skip if module doesn't exist
|
|
248
|
+
if not source_file.exists():
|
|
249
|
+
logger.warning(f"Source file not found: {source_file}")
|
|
250
|
+
return None
|
|
251
|
+
|
|
252
|
+
# Read source to understand what needs testing
|
|
253
|
+
try:
|
|
254
|
+
source_code = source_file.read_text()
|
|
255
|
+
except Exception as e:
|
|
256
|
+
logger.error(f"Cannot read {source_file}: {e}")
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
# Generate test file path
|
|
260
|
+
test_file = self.output_dir / f"test_{module_name}_behavioral.py"
|
|
261
|
+
|
|
262
|
+
# Extract module path for imports
|
|
263
|
+
module_path = str(source_file).replace("src/", "").replace(".py", "").replace("/", ".")
|
|
264
|
+
|
|
265
|
+
# Generate tests using LLM agent with Anthropic best practices
|
|
266
|
+
# Phase 1: Basic generation
|
|
267
|
+
# Phase 2: Multi-turn refinement (if enabled)
|
|
268
|
+
# Phase 3: Coverage-guided improvement (if enabled)
|
|
269
|
+
|
|
270
|
+
if self.enable_refinement:
|
|
271
|
+
logger.info(f"🔄 Using Phase 2: Multi-turn refinement for {module_name}")
|
|
272
|
+
test_content = self._generate_with_refinement(module_name, module_path, source_file, source_code, test_file)
|
|
273
|
+
else:
|
|
274
|
+
logger.info(f"📝 Using Phase 1: Basic generation for {module_name}")
|
|
275
|
+
test_content = self._generate_with_llm(module_name, module_path, source_file, source_code)
|
|
276
|
+
|
|
277
|
+
if not test_content:
|
|
278
|
+
logger.warning(f"LLM generation failed for {module_name}")
|
|
279
|
+
return None
|
|
280
|
+
|
|
281
|
+
logger.info(f"LLM generated {len(test_content)} bytes for {module_name}")
|
|
282
|
+
|
|
283
|
+
# Phase 3: Coverage-guided improvement (if enabled)
|
|
284
|
+
if self.enable_coverage_guided:
|
|
285
|
+
logger.info(f"📊 Applying Phase 3: Coverage-guided improvement for {module_name}")
|
|
286
|
+
improved_content = self._generate_with_coverage_target(
|
|
287
|
+
module_name, module_path, source_file, source_code, test_file, test_content
|
|
288
|
+
)
|
|
289
|
+
if improved_content:
|
|
290
|
+
test_content = improved_content
|
|
291
|
+
logger.info(f"✅ Coverage-guided improvement complete for {module_name}")
|
|
292
|
+
else:
|
|
293
|
+
logger.warning(f"⚠️ Coverage-guided improvement failed, using previous version for {module_name}")
|
|
294
|
+
|
|
295
|
+
# Write final test file
|
|
296
|
+
test_file.write_text(test_content)
|
|
297
|
+
logger.info(f"Wrote test file: {test_file}")
|
|
298
|
+
|
|
299
|
+
# Validate it can be imported
|
|
300
|
+
if not self._validate_test_file(test_file):
|
|
301
|
+
test_file.unlink()
|
|
302
|
+
return None
|
|
303
|
+
|
|
304
|
+
return test_file
|
|
305
|
+
|
|
306
|
+
def _is_workflow_module(self, source_code: str, module_path: str) -> bool:
|
|
307
|
+
"""Detect if module is a workflow requiring special handling.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
source_code: Source code content
|
|
311
|
+
module_path: Python import path
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
True if this is a workflow module needing LLM mocking
|
|
315
|
+
"""
|
|
316
|
+
# Check for workflow indicators
|
|
317
|
+
indicators = [
|
|
318
|
+
r"class\s+\w+Workflow",
|
|
319
|
+
r"async\s+def\s+execute",
|
|
320
|
+
r"tier_routing",
|
|
321
|
+
r"LLMProvider",
|
|
322
|
+
r"TelemetryCollector",
|
|
323
|
+
r"from\s+anthropic\s+import",
|
|
324
|
+
r"messages\.create",
|
|
325
|
+
r"client\.messages"
|
|
326
|
+
]
|
|
327
|
+
|
|
328
|
+
return any(re.search(pattern, source_code) for pattern in indicators)
|
|
329
|
+
|
|
330
|
+
def _get_example_tests(self) -> str:
|
|
331
|
+
"""Get few-shot examples of excellent tests for prompt learning."""
|
|
332
|
+
return """EXAMPLE 1: Testing a utility function with mocking
|
|
333
|
+
```python
|
|
334
|
+
import pytest
|
|
335
|
+
from unittest.mock import Mock, patch
|
|
336
|
+
from mymodule import process_data
|
|
337
|
+
|
|
338
|
+
class TestProcessData:
|
|
339
|
+
def test_processes_valid_data_successfully(self):
|
|
340
|
+
\"\"\"Given valid input data, when processing, then returns expected result.\"\"\"
|
|
341
|
+
# Given
|
|
342
|
+
input_data = {"key": "value", "count": 42}
|
|
343
|
+
|
|
344
|
+
# When
|
|
345
|
+
result = process_data(input_data)
|
|
346
|
+
|
|
347
|
+
# Then
|
|
348
|
+
assert result is not None
|
|
349
|
+
assert result["status"] == "success"
|
|
350
|
+
assert result["processed"] is True
|
|
351
|
+
|
|
352
|
+
def test_handles_invalid_data_with_error(self):
|
|
353
|
+
\"\"\"Given invalid input, when processing, then raises ValueError.\"\"\"
|
|
354
|
+
# Given
|
|
355
|
+
invalid_data = {"missing": "key"}
|
|
356
|
+
|
|
357
|
+
# When/Then
|
|
358
|
+
with pytest.raises(ValueError, match="Required key 'key' not found"):
|
|
359
|
+
process_data(invalid_data)
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
EXAMPLE 2: Testing a workflow with LLM mocking
|
|
363
|
+
```python
|
|
364
|
+
import pytest
|
|
365
|
+
from unittest.mock import Mock, AsyncMock, patch
|
|
366
|
+
from mymodule import MyWorkflow
|
|
367
|
+
|
|
368
|
+
@pytest.fixture
|
|
369
|
+
def mock_llm_client(mocker):
|
|
370
|
+
\"\"\"Mock Anthropic LLM client.\"\"\"
|
|
371
|
+
mock = mocker.patch('anthropic.Anthropic')
|
|
372
|
+
mock_response = Mock()
|
|
373
|
+
mock_response.content = [Mock(text="mock LLM response")]
|
|
374
|
+
mock_response.usage = Mock(input_tokens=100, output_tokens=50)
|
|
375
|
+
mock_response.stop_reason = "end_turn"
|
|
376
|
+
mock.return_value.messages.create = AsyncMock(return_value=mock_response)
|
|
377
|
+
return mock
|
|
378
|
+
|
|
379
|
+
class TestMyWorkflow:
|
|
380
|
+
@pytest.mark.asyncio
|
|
381
|
+
async def test_executes_successfully_with_mocked_llm(self, mock_llm_client):
|
|
382
|
+
\"\"\"Given valid input, when executing workflow, then completes successfully.\"\"\"
|
|
383
|
+
# Given
|
|
384
|
+
workflow = MyWorkflow()
|
|
385
|
+
input_data = {"prompt": "test prompt"}
|
|
386
|
+
|
|
387
|
+
# When
|
|
388
|
+
result = await workflow.execute(input_data)
|
|
389
|
+
|
|
390
|
+
# Then
|
|
391
|
+
assert result is not None
|
|
392
|
+
assert "response" in result
|
|
393
|
+
mock_llm_client.return_value.messages.create.assert_called_once()
|
|
394
|
+
|
|
395
|
+
@pytest.mark.asyncio
|
|
396
|
+
async def test_handles_api_error_gracefully(self, mock_llm_client):
|
|
397
|
+
\"\"\"Given API failure, when executing, then handles error appropriately.\"\"\"
|
|
398
|
+
# Given
|
|
399
|
+
workflow = MyWorkflow()
|
|
400
|
+
mock_llm_client.return_value.messages.create.side_effect = Exception("API Error")
|
|
401
|
+
|
|
402
|
+
# When/Then
|
|
403
|
+
with pytest.raises(Exception, match="API Error"):
|
|
404
|
+
await workflow.execute({"prompt": "test"})
|
|
405
|
+
```
|
|
406
|
+
"""
|
|
407
|
+
|
|
408
|
+
def _get_workflow_specific_prompt(self, module_name: str, module_path: str, source_code: str) -> str:
|
|
409
|
+
"""Get workflow-specific test generation prompt with comprehensive mocking guidance."""
|
|
410
|
+
return f"""Generate comprehensive tests for this WORKFLOW module.
|
|
411
|
+
|
|
412
|
+
⚠️ CRITICAL: This module makes LLM API calls and requires proper mocking.
|
|
413
|
+
|
|
414
|
+
MODULE: {module_name}
|
|
415
|
+
IMPORT PATH: {module_path}
|
|
416
|
+
|
|
417
|
+
SOURCE CODE (COMPLETE - NO TRUNCATION):
|
|
418
|
+
```python
|
|
419
|
+
{source_code}
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
WORKFLOW TESTING REQUIREMENTS:
|
|
423
|
+
|
|
424
|
+
1. **Mock LLM API calls** - NEVER make real API calls in tests
|
|
425
|
+
```python
|
|
426
|
+
@pytest.fixture
|
|
427
|
+
def mock_llm_client(mocker):
|
|
428
|
+
mock = mocker.patch('anthropic.Anthropic')
|
|
429
|
+
mock_response = Mock()
|
|
430
|
+
mock_response.content = [Mock(text="mock response")]
|
|
431
|
+
mock_response.usage = Mock(input_tokens=100, output_tokens=50)
|
|
432
|
+
mock_response.stop_reason = "end_turn"
|
|
433
|
+
mock.return_value.messages.create = AsyncMock(return_value=mock_response)
|
|
434
|
+
return mock
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
2. **Test tier routing** - Verify correct model selection (cheap/capable/premium)
|
|
438
|
+
3. **Test telemetry** - Mock and verify telemetry recording
|
|
439
|
+
4. **Test cost calculation** - Verify token usage and cost tracking
|
|
440
|
+
5. **Test error handling** - Mock API failures, timeouts, rate limits
|
|
441
|
+
6. **Test caching** - Mock cache hits/misses if applicable
|
|
442
|
+
|
|
443
|
+
TARGET COVERAGE: 40-50% (realistic for workflow classes with proper mocking)
|
|
444
|
+
|
|
445
|
+
Generate a complete test file with:
|
|
446
|
+
- Copyright header: "Generated by enhanced autonomous test generation system."
|
|
447
|
+
- Proper imports (from {module_path})
|
|
448
|
+
- Mock fixtures for ALL external dependencies (LLM, databases, APIs, file I/O)
|
|
449
|
+
- Given/When/Then structure in docstrings
|
|
450
|
+
- Both success and failure test cases
|
|
451
|
+
- Edge case handling
|
|
452
|
+
- Docstrings for all tests describing behavior
|
|
453
|
+
|
|
454
|
+
Return ONLY the complete Python test file, no explanations."""
|
|
455
|
+
|
|
456
|
+
def _generate_with_llm(self, module_name: str, module_path: str, source_file: Path, source_code: str) -> str | None:
|
|
457
|
+
"""Generate comprehensive tests using LLM with Anthropic best practices.
|
|
458
|
+
|
|
459
|
+
ENHANCEMENTS (Phase 1):
|
|
460
|
+
- Extended thinking (20K token budget) for thorough test planning
|
|
461
|
+
- Prompt caching for 90% cost reduction
|
|
462
|
+
- Full source code (NO TRUNCATION)
|
|
463
|
+
- Workflow-specific prompts when detected
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
module_name: Name of module being tested
|
|
467
|
+
module_path: Python import path (e.g., attune.config)
|
|
468
|
+
source_file: Path to source file
|
|
469
|
+
source_code: Source code content (FULL, not truncated)
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
Test file content with comprehensive tests, or None if generation failed
|
|
473
|
+
"""
|
|
474
|
+
import os
|
|
475
|
+
|
|
476
|
+
try:
|
|
477
|
+
import anthropic
|
|
478
|
+
except ImportError:
|
|
479
|
+
logger.error("anthropic package not installed")
|
|
480
|
+
return None
|
|
481
|
+
|
|
482
|
+
# Get API key
|
|
483
|
+
api_key = os.getenv("ANTHROPIC_API_KEY")
|
|
484
|
+
if not api_key:
|
|
485
|
+
logger.error("ANTHROPIC_API_KEY not set")
|
|
486
|
+
return None
|
|
487
|
+
|
|
488
|
+
# Detect if this is a workflow module
|
|
489
|
+
is_workflow = self._is_workflow_module(source_code, module_path)
|
|
490
|
+
logger.info(f"Module {module_name}: workflow={is_workflow}, size={len(source_code)} bytes (FULL)")
|
|
491
|
+
|
|
492
|
+
# Build appropriate prompt based on module type
|
|
493
|
+
if is_workflow:
|
|
494
|
+
generation_prompt = self._get_workflow_specific_prompt(module_name, module_path, source_code)
|
|
495
|
+
else:
|
|
496
|
+
generation_prompt = f"""Generate comprehensive behavioral tests for this Python module.
|
|
497
|
+
|
|
498
|
+
SOURCE FILE: {source_file}
|
|
499
|
+
MODULE PATH: {module_path}
|
|
500
|
+
|
|
501
|
+
SOURCE CODE (COMPLETE):
|
|
502
|
+
```python
|
|
503
|
+
{source_code}
|
|
504
|
+
```
|
|
505
|
+
|
|
506
|
+
Generate a complete test file that:
|
|
507
|
+
1. Uses Given/When/Then behavioral test structure
|
|
508
|
+
2. Tests all public functions and classes
|
|
509
|
+
3. Includes edge cases and error handling
|
|
510
|
+
4. Uses proper mocking for external dependencies
|
|
511
|
+
5. Targets 80%+ code coverage for this module
|
|
512
|
+
6. Follows pytest conventions
|
|
513
|
+
|
|
514
|
+
Requirements:
|
|
515
|
+
- Import from {module_path} (not from src/)
|
|
516
|
+
- Use pytest fixtures where appropriate
|
|
517
|
+
- Mock external dependencies (APIs, databases, file I/O)
|
|
518
|
+
- Test both success and failure paths
|
|
519
|
+
- Include docstrings for all tests
|
|
520
|
+
- Use descriptive test names
|
|
521
|
+
- Start with copyright header:
|
|
522
|
+
\"\"\"Behavioral tests for {module_name}.
|
|
523
|
+
|
|
524
|
+
Generated by enhanced autonomous test generation system.
|
|
525
|
+
|
|
526
|
+
Copyright 2026 Smart-AI-Memory
|
|
527
|
+
Licensed under Apache 2.0
|
|
528
|
+
\"\"\"
|
|
529
|
+
|
|
530
|
+
Return ONLY the complete Python test file content, no explanations."""
|
|
531
|
+
|
|
532
|
+
# Build messages with prompt caching (90% cost reduction on retries)
|
|
533
|
+
messages = [
|
|
534
|
+
{
|
|
535
|
+
"role": "user",
|
|
536
|
+
"content": [
|
|
537
|
+
{
|
|
538
|
+
"type": "text",
|
|
539
|
+
"text": "You are an expert Python test engineer. Here are examples of excellent tests:",
|
|
540
|
+
"cache_control": {"type": "ephemeral"}
|
|
541
|
+
},
|
|
542
|
+
{
|
|
543
|
+
"type": "text",
|
|
544
|
+
"text": self._get_example_tests(),
|
|
545
|
+
"cache_control": {"type": "ephemeral"}
|
|
546
|
+
},
|
|
547
|
+
{
|
|
548
|
+
"type": "text",
|
|
549
|
+
"text": generation_prompt
|
|
550
|
+
}
|
|
551
|
+
]
|
|
552
|
+
}
|
|
553
|
+
]
|
|
554
|
+
|
|
555
|
+
try:
|
|
556
|
+
# Call Anthropic API with extended thinking and caching
|
|
557
|
+
logger.info(f"Calling LLM with extended thinking for {module_name} (workflow={is_workflow})")
|
|
558
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
559
|
+
response = client.messages.create(
|
|
560
|
+
model="claude-sonnet-4-5", # capable tier
|
|
561
|
+
max_tokens=40000, # Very generous total budget for comprehensive tests
|
|
562
|
+
thinking={
|
|
563
|
+
"type": "enabled",
|
|
564
|
+
"budget_tokens": 20000 # Generous thinking budget for thorough planning
|
|
565
|
+
},
|
|
566
|
+
messages=messages,
|
|
567
|
+
timeout=900.0, # 15 minutes timeout for extended thinking + generation
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
if not response.content:
|
|
571
|
+
logger.warning(f"Empty LLM response for {module_name}")
|
|
572
|
+
return None
|
|
573
|
+
|
|
574
|
+
# Extract test content (thinking comes first, then text)
|
|
575
|
+
test_content = None
|
|
576
|
+
for block in response.content:
|
|
577
|
+
if block.type == "text":
|
|
578
|
+
test_content = block.text.strip()
|
|
579
|
+
break
|
|
580
|
+
|
|
581
|
+
if not test_content:
|
|
582
|
+
logger.warning(f"No text content in LLM response for {module_name}")
|
|
583
|
+
return None
|
|
584
|
+
|
|
585
|
+
logger.info(f"LLM returned {len(test_content)} bytes for {module_name}")
|
|
586
|
+
|
|
587
|
+
if len(test_content) < 100:
|
|
588
|
+
logger.warning(f"LLM response too short for {module_name}: {test_content[:200]}")
|
|
589
|
+
return None
|
|
590
|
+
|
|
591
|
+
# Clean up response (remove markdown fences if present)
|
|
592
|
+
if test_content.startswith("```python"):
|
|
593
|
+
test_content = test_content[len("```python"):].strip()
|
|
594
|
+
if test_content.endswith("```"):
|
|
595
|
+
test_content = test_content[:-3].strip()
|
|
596
|
+
|
|
597
|
+
# Check for truncation indicators
|
|
598
|
+
if response.stop_reason == "max_tokens":
|
|
599
|
+
logger.warning(f"⚠️ LLM response truncated for {module_name} (hit max_tokens)")
|
|
600
|
+
# Response might be incomplete but let validation catch it
|
|
601
|
+
|
|
602
|
+
# Quick syntax pre-check before returning
|
|
603
|
+
try:
|
|
604
|
+
import ast
|
|
605
|
+
ast.parse(test_content)
|
|
606
|
+
logger.info(f"✓ Quick syntax check passed for {module_name}")
|
|
607
|
+
except SyntaxError as e:
|
|
608
|
+
logger.error(f"❌ LLM generated invalid syntax for {module_name}: {e.msg} at line {e.lineno}")
|
|
609
|
+
return None
|
|
610
|
+
|
|
611
|
+
logger.info(f"Test content cleaned, final size: {len(test_content)} bytes")
|
|
612
|
+
return test_content
|
|
613
|
+
|
|
614
|
+
except Exception as e:
|
|
615
|
+
logger.error(f"LLM generation error for {module_name}: {e}", exc_info=True)
|
|
616
|
+
return None
|
|
617
|
+
|
|
618
|
+
def _run_pytest_validation(self, test_file: Path) -> ValidationResult:
|
|
619
|
+
"""Run pytest on generated tests and collect failures.
|
|
620
|
+
|
|
621
|
+
Args:
|
|
622
|
+
test_file: Path to test file to validate
|
|
623
|
+
|
|
624
|
+
Returns:
|
|
625
|
+
ValidationResult with test outcomes and failure details
|
|
626
|
+
"""
|
|
627
|
+
try:
|
|
628
|
+
result = subprocess.run(
|
|
629
|
+
[sys.executable, "-m", "pytest", str(test_file), "-v", "--tb=short"],
|
|
630
|
+
capture_output=True,
|
|
631
|
+
text=True,
|
|
632
|
+
timeout=60,
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
passed = result.returncode == 0
|
|
636
|
+
output = result.stdout + "\n" + result.stderr
|
|
637
|
+
|
|
638
|
+
# Count errors
|
|
639
|
+
error_count = output.count("FAILED") + output.count("ERROR")
|
|
640
|
+
|
|
641
|
+
# Extract failure details
|
|
642
|
+
failures = ""
|
|
643
|
+
if not passed:
|
|
644
|
+
# Extract relevant failure information
|
|
645
|
+
lines = output.split("\n")
|
|
646
|
+
failure_lines = []
|
|
647
|
+
in_failure = False
|
|
648
|
+
for line in lines:
|
|
649
|
+
if "FAILED" in line or "ERROR" in line:
|
|
650
|
+
in_failure = True
|
|
651
|
+
if in_failure:
|
|
652
|
+
failure_lines.append(line)
|
|
653
|
+
if line.startswith("="): # End of failure section
|
|
654
|
+
in_failure = False
|
|
655
|
+
failures = "\n".join(failure_lines[:100]) # Limit to 100 lines
|
|
656
|
+
|
|
657
|
+
logger.info(f"Pytest validation: passed={passed}, errors={error_count}")
|
|
658
|
+
|
|
659
|
+
return ValidationResult(
|
|
660
|
+
passed=passed,
|
|
661
|
+
failures=failures,
|
|
662
|
+
error_count=error_count,
|
|
663
|
+
output=output
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
except subprocess.TimeoutExpired:
|
|
667
|
+
logger.error(f"Pytest validation timeout for {test_file}")
|
|
668
|
+
return ValidationResult(
|
|
669
|
+
passed=False,
|
|
670
|
+
failures="Validation timeout after 60 seconds",
|
|
671
|
+
error_count=1,
|
|
672
|
+
output="Timeout"
|
|
673
|
+
)
|
|
674
|
+
except Exception as e:
|
|
675
|
+
logger.error(f"Pytest validation exception: {e}")
|
|
676
|
+
return ValidationResult(
|
|
677
|
+
passed=False,
|
|
678
|
+
failures=f"Validation exception: {e}",
|
|
679
|
+
error_count=1,
|
|
680
|
+
output=str(e)
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
def _call_llm_with_history(
|
|
684
|
+
self,
|
|
685
|
+
conversation_history: list[dict[str, Any]],
|
|
686
|
+
api_key: str
|
|
687
|
+
) -> str | None:
|
|
688
|
+
"""Call LLM with conversation history for refinement.
|
|
689
|
+
|
|
690
|
+
Args:
|
|
691
|
+
conversation_history: List of messages (role + content)
|
|
692
|
+
api_key: Anthropic API key
|
|
693
|
+
|
|
694
|
+
Returns:
|
|
695
|
+
Refined test content or None if failed
|
|
696
|
+
"""
|
|
697
|
+
try:
|
|
698
|
+
import anthropic
|
|
699
|
+
|
|
700
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
701
|
+
response = client.messages.create(
|
|
702
|
+
model="claude-sonnet-4-5",
|
|
703
|
+
max_tokens=40000, # Very generous total budget for iterative refinement
|
|
704
|
+
thinking={
|
|
705
|
+
"type": "enabled",
|
|
706
|
+
"budget_tokens": 20000 # Generous thinking budget for thorough analysis
|
|
707
|
+
},
|
|
708
|
+
messages=conversation_history,
|
|
709
|
+
timeout=900.0, # 15 minutes timeout for refinement iterations
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
if not response.content:
|
|
713
|
+
logger.warning("Empty LLM response during refinement")
|
|
714
|
+
return None
|
|
715
|
+
|
|
716
|
+
# Extract text content
|
|
717
|
+
test_content = None
|
|
718
|
+
for block in response.content:
|
|
719
|
+
if block.type == "text":
|
|
720
|
+
test_content = block.text.strip()
|
|
721
|
+
break
|
|
722
|
+
|
|
723
|
+
if not test_content:
|
|
724
|
+
logger.warning("No text content in refinement response")
|
|
725
|
+
return None
|
|
726
|
+
|
|
727
|
+
# Clean up response
|
|
728
|
+
if test_content.startswith("```python"):
|
|
729
|
+
test_content = test_content[len("```python"):].strip()
|
|
730
|
+
if test_content.endswith("```"):
|
|
731
|
+
test_content = test_content[:-3].strip()
|
|
732
|
+
|
|
733
|
+
return test_content
|
|
734
|
+
|
|
735
|
+
except Exception as e:
|
|
736
|
+
logger.error(f"LLM refinement error: {e}", exc_info=True)
|
|
737
|
+
return None
|
|
738
|
+
|
|
739
|
+
def _generate_with_refinement(
|
|
740
|
+
self,
|
|
741
|
+
module_name: str,
|
|
742
|
+
module_path: str,
|
|
743
|
+
source_file: Path,
|
|
744
|
+
source_code: str,
|
|
745
|
+
test_file: Path
|
|
746
|
+
) -> str | None:
|
|
747
|
+
"""Generate tests with iterative refinement (Phase 2).
|
|
748
|
+
|
|
749
|
+
Process:
|
|
750
|
+
1. Generate initial tests
|
|
751
|
+
2. Run pytest validation
|
|
752
|
+
3. If failures, ask Claude to fix
|
|
753
|
+
4. Repeat until tests pass or max iterations
|
|
754
|
+
|
|
755
|
+
Args:
|
|
756
|
+
module_name: Name of module being tested
|
|
757
|
+
module_path: Python import path
|
|
758
|
+
source_file: Path to source file
|
|
759
|
+
source_code: Source code content
|
|
760
|
+
test_file: Path where tests will be written
|
|
761
|
+
|
|
762
|
+
Returns:
|
|
763
|
+
Final test content or None if all attempts failed
|
|
764
|
+
"""
|
|
765
|
+
import os
|
|
766
|
+
|
|
767
|
+
api_key = os.getenv("ANTHROPIC_API_KEY")
|
|
768
|
+
if not api_key:
|
|
769
|
+
logger.error("ANTHROPIC_API_KEY not set")
|
|
770
|
+
return None
|
|
771
|
+
|
|
772
|
+
logger.info(f"🔄 Phase 2: Multi-turn refinement enabled for {module_name} (max {self.max_refinement_iterations} iterations)")
|
|
773
|
+
|
|
774
|
+
# Step 1: Generate initial tests
|
|
775
|
+
test_content = self._generate_with_llm(module_name, module_path, source_file, source_code)
|
|
776
|
+
if not test_content:
|
|
777
|
+
logger.warning("Initial generation failed")
|
|
778
|
+
return None
|
|
779
|
+
|
|
780
|
+
# Build conversation history for subsequent refinements
|
|
781
|
+
is_workflow = self._is_workflow_module(source_code, module_path)
|
|
782
|
+
|
|
783
|
+
# Initial prompt (for history tracking)
|
|
784
|
+
if is_workflow:
|
|
785
|
+
initial_prompt = self._get_workflow_specific_prompt(module_name, module_path, source_code)
|
|
786
|
+
else:
|
|
787
|
+
initial_prompt = f"""Generate comprehensive behavioral tests for {module_name}.
|
|
788
|
+
|
|
789
|
+
SOURCE CODE:
|
|
790
|
+
```python
|
|
791
|
+
{source_code}
|
|
792
|
+
```"""
|
|
793
|
+
|
|
794
|
+
conversation_history = [
|
|
795
|
+
{
|
|
796
|
+
"role": "user",
|
|
797
|
+
"content": [
|
|
798
|
+
{"type": "text", "text": "You are an expert Python test engineer. Examples:", "cache_control": {"type": "ephemeral"}},
|
|
799
|
+
{"type": "text", "text": self._get_example_tests(), "cache_control": {"type": "ephemeral"}},
|
|
800
|
+
{"type": "text", "text": initial_prompt}
|
|
801
|
+
]
|
|
802
|
+
},
|
|
803
|
+
{
|
|
804
|
+
"role": "assistant",
|
|
805
|
+
"content": test_content
|
|
806
|
+
}
|
|
807
|
+
]
|
|
808
|
+
|
|
809
|
+
# Step 2: Iterative refinement loop
|
|
810
|
+
for iteration in range(self.max_refinement_iterations):
|
|
811
|
+
logger.info(f"📝 Refinement iteration {iteration + 1}/{self.max_refinement_iterations} for {module_name}")
|
|
812
|
+
|
|
813
|
+
# Write current version to temp file
|
|
814
|
+
temp_test_file = test_file.parent / f"_temp_{test_file.name}"
|
|
815
|
+
temp_test_file.write_text(test_content)
|
|
816
|
+
|
|
817
|
+
# Validate with pytest
|
|
818
|
+
validation_result = self._run_pytest_validation(temp_test_file)
|
|
819
|
+
|
|
820
|
+
if validation_result.passed:
|
|
821
|
+
logger.info(f"✅ Tests passed on iteration {iteration + 1} for {module_name}")
|
|
822
|
+
temp_test_file.unlink() # Clean up
|
|
823
|
+
return test_content
|
|
824
|
+
|
|
825
|
+
# Tests failed - ask Claude to fix
|
|
826
|
+
logger.warning(f"⚠️ Tests failed on iteration {iteration + 1}: {validation_result.error_count} errors")
|
|
827
|
+
|
|
828
|
+
refinement_prompt = f"""The tests you generated have failures. Please fix these specific issues:
|
|
829
|
+
|
|
830
|
+
FAILURES:
|
|
831
|
+
{validation_result.failures[:2000]}
|
|
832
|
+
|
|
833
|
+
Requirements:
|
|
834
|
+
1. Fix ONLY the failing tests - don't rewrite everything
|
|
835
|
+
2. Ensure imports are correct
|
|
836
|
+
3. Ensure mocking is properly configured
|
|
837
|
+
4. Return the COMPLETE corrected test file (not just the fixes)
|
|
838
|
+
5. Keep the same structure and copyright header
|
|
839
|
+
|
|
840
|
+
Return ONLY the complete Python test file, no explanations."""
|
|
841
|
+
|
|
842
|
+
# Add to conversation history
|
|
843
|
+
conversation_history.append({
|
|
844
|
+
"role": "user",
|
|
845
|
+
"content": refinement_prompt
|
|
846
|
+
})
|
|
847
|
+
|
|
848
|
+
# Call LLM for refinement
|
|
849
|
+
refined_content = self._call_llm_with_history(conversation_history, api_key)
|
|
850
|
+
|
|
851
|
+
if not refined_content:
|
|
852
|
+
logger.error(f"❌ Refinement failed on iteration {iteration + 1}")
|
|
853
|
+
temp_test_file.unlink()
|
|
854
|
+
break
|
|
855
|
+
|
|
856
|
+
# Update content and history
|
|
857
|
+
test_content = refined_content
|
|
858
|
+
conversation_history.append({
|
|
859
|
+
"role": "assistant",
|
|
860
|
+
"content": test_content
|
|
861
|
+
})
|
|
862
|
+
|
|
863
|
+
logger.info(f"🔄 Refinement iteration {iteration + 1} complete, retrying validation...")
|
|
864
|
+
|
|
865
|
+
# Max iterations reached
|
|
866
|
+
logger.warning(f"⚠️ Max refinement iterations reached for {module_name} - returning best attempt")
|
|
867
|
+
return test_content
|
|
868
|
+
|
|
869
|
+
def _run_coverage_analysis(self, test_file: Path, source_file: Path) -> CoverageResult:
|
|
870
|
+
"""Run coverage analysis on tests.
|
|
871
|
+
|
|
872
|
+
Args:
|
|
873
|
+
test_file: Path to test file
|
|
874
|
+
source_file: Path to source file being tested
|
|
875
|
+
|
|
876
|
+
Returns:
|
|
877
|
+
CoverageResult with coverage metrics and missing lines
|
|
878
|
+
"""
|
|
879
|
+
try:
|
|
880
|
+
# Run pytest with coverage
|
|
881
|
+
result = subprocess.run(
|
|
882
|
+
[
|
|
883
|
+
sys.executable, "-m", "pytest",
|
|
884
|
+
str(test_file),
|
|
885
|
+
f"--cov={source_file.parent}",
|
|
886
|
+
"--cov-report=term-missing",
|
|
887
|
+
"--cov-report=json",
|
|
888
|
+
"-v"
|
|
889
|
+
],
|
|
890
|
+
capture_output=True,
|
|
891
|
+
text=True,
|
|
892
|
+
timeout=120,
|
|
893
|
+
cwd=Path.cwd()
|
|
894
|
+
)
|
|
895
|
+
|
|
896
|
+
# Parse coverage from JSON report
|
|
897
|
+
coverage_json_path = Path(".coverage.json")
|
|
898
|
+
if not coverage_json_path.exists():
|
|
899
|
+
logger.warning("Coverage JSON not generated")
|
|
900
|
+
return CoverageResult(
|
|
901
|
+
coverage=0.0,
|
|
902
|
+
missing_lines=[],
|
|
903
|
+
total_statements=0,
|
|
904
|
+
covered_statements=0
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
with open(coverage_json_path) as f:
|
|
908
|
+
coverage_data = json.load(f)
|
|
909
|
+
|
|
910
|
+
# Find coverage for our specific source file
|
|
911
|
+
source_key = str(source_file)
|
|
912
|
+
file_coverage = None
|
|
913
|
+
for key in coverage_data.get("files", {}).keys():
|
|
914
|
+
if source_file.name in key or source_key in key:
|
|
915
|
+
file_coverage = coverage_data["files"][key]
|
|
916
|
+
break
|
|
917
|
+
|
|
918
|
+
if not file_coverage:
|
|
919
|
+
logger.warning(f"No coverage data found for {source_file}")
|
|
920
|
+
return CoverageResult(
|
|
921
|
+
coverage=0.0,
|
|
922
|
+
missing_lines=[],
|
|
923
|
+
total_statements=0,
|
|
924
|
+
covered_statements=0
|
|
925
|
+
)
|
|
926
|
+
|
|
927
|
+
# Extract metrics
|
|
928
|
+
total_statements = file_coverage["summary"]["num_statements"]
|
|
929
|
+
covered_statements = file_coverage["summary"]["covered_lines"]
|
|
930
|
+
coverage_pct = file_coverage["summary"]["percent_covered"] / 100.0
|
|
931
|
+
missing_lines = file_coverage["missing_lines"]
|
|
932
|
+
|
|
933
|
+
logger.info(f"Coverage: {coverage_pct:.1%} ({covered_statements}/{total_statements} statements)")
|
|
934
|
+
|
|
935
|
+
return CoverageResult(
|
|
936
|
+
coverage=coverage_pct,
|
|
937
|
+
missing_lines=missing_lines,
|
|
938
|
+
total_statements=total_statements,
|
|
939
|
+
covered_statements=covered_statements
|
|
940
|
+
)
|
|
941
|
+
|
|
942
|
+
except subprocess.TimeoutExpired:
|
|
943
|
+
logger.error("Coverage analysis timeout")
|
|
944
|
+
return CoverageResult(coverage=0.0, missing_lines=[], total_statements=0, covered_statements=0)
|
|
945
|
+
except Exception as e:
|
|
946
|
+
logger.error(f"Coverage analysis error: {e}", exc_info=True)
|
|
947
|
+
return CoverageResult(coverage=0.0, missing_lines=[], total_statements=0, covered_statements=0)
|
|
948
|
+
|
|
949
|
+
def _extract_uncovered_lines(self, source_file: Path, missing_lines: list[int]) -> str:
|
|
950
|
+
"""Extract source code for uncovered lines.
|
|
951
|
+
|
|
952
|
+
Args:
|
|
953
|
+
source_file: Path to source file
|
|
954
|
+
missing_lines: List of uncovered line numbers
|
|
955
|
+
|
|
956
|
+
Returns:
|
|
957
|
+
Formatted string with uncovered code sections
|
|
958
|
+
"""
|
|
959
|
+
if not missing_lines:
|
|
960
|
+
return "No uncovered lines"
|
|
961
|
+
|
|
962
|
+
try:
|
|
963
|
+
source_lines = source_file.read_text().split("\n")
|
|
964
|
+
|
|
965
|
+
# Group consecutive lines into ranges
|
|
966
|
+
ranges = []
|
|
967
|
+
start = missing_lines[0]
|
|
968
|
+
end = start
|
|
969
|
+
|
|
970
|
+
for line_num in missing_lines[1:]:
|
|
971
|
+
if line_num == end + 1:
|
|
972
|
+
end = line_num
|
|
973
|
+
else:
|
|
974
|
+
ranges.append((start, end))
|
|
975
|
+
start = line_num
|
|
976
|
+
end = start
|
|
977
|
+
ranges.append((start, end))
|
|
978
|
+
|
|
979
|
+
# Extract code for each range with context
|
|
980
|
+
uncovered_sections = []
|
|
981
|
+
for start, end in ranges[:10]: # Limit to 10 ranges
|
|
982
|
+
context_start = max(0, start - 3)
|
|
983
|
+
context_end = min(len(source_lines), end + 2)
|
|
984
|
+
|
|
985
|
+
section = []
|
|
986
|
+
section.append(f"Lines {start}-{end}:")
|
|
987
|
+
for i in range(context_start, context_end):
|
|
988
|
+
line_marker = ">>>" if start <= i + 1 <= end else " "
|
|
989
|
+
section.append(f"{line_marker} {i + 1:4d}: {source_lines[i]}")
|
|
990
|
+
|
|
991
|
+
uncovered_sections.append("\n".join(section))
|
|
992
|
+
|
|
993
|
+
return "\n\n".join(uncovered_sections)
|
|
994
|
+
|
|
995
|
+
except Exception as e:
|
|
996
|
+
logger.error(f"Error extracting uncovered lines: {e}")
|
|
997
|
+
return f"Error extracting lines: {e}"
|
|
998
|
+
|
|
999
|
+
def _generate_with_coverage_target(
|
|
1000
|
+
self,
|
|
1001
|
+
module_name: str,
|
|
1002
|
+
module_path: str,
|
|
1003
|
+
source_file: Path,
|
|
1004
|
+
source_code: str,
|
|
1005
|
+
test_file: Path,
|
|
1006
|
+
initial_test_content: str
|
|
1007
|
+
) -> str | None:
|
|
1008
|
+
"""Generate tests iteratively until coverage target met (Phase 3).
|
|
1009
|
+
|
|
1010
|
+
Process:
|
|
1011
|
+
1. Start with initial tests
|
|
1012
|
+
2. Run coverage analysis
|
|
1013
|
+
3. If target not met, identify uncovered lines
|
|
1014
|
+
4. Ask Claude to add tests for uncovered code
|
|
1015
|
+
5. Repeat until target coverage reached or max iterations
|
|
1016
|
+
|
|
1017
|
+
Args:
|
|
1018
|
+
module_name: Name of module being tested
|
|
1019
|
+
module_path: Python import path
|
|
1020
|
+
source_file: Path to source file
|
|
1021
|
+
source_code: Source code content
|
|
1022
|
+
test_file: Path to test file
|
|
1023
|
+
initial_test_content: Initial test content from Phase 1/2
|
|
1024
|
+
|
|
1025
|
+
Returns:
|
|
1026
|
+
Final test content with improved coverage or None if failed
|
|
1027
|
+
"""
|
|
1028
|
+
import os
|
|
1029
|
+
|
|
1030
|
+
api_key = os.getenv("ANTHROPIC_API_KEY")
|
|
1031
|
+
if not api_key:
|
|
1032
|
+
logger.error("ANTHROPIC_API_KEY not set")
|
|
1033
|
+
return None
|
|
1034
|
+
|
|
1035
|
+
logger.info(f"📊 Phase 3: Coverage-guided generation enabled (target: {self.target_coverage:.0%})")
|
|
1036
|
+
|
|
1037
|
+
test_content = initial_test_content
|
|
1038
|
+
max_coverage_iterations = 5
|
|
1039
|
+
|
|
1040
|
+
for iteration in range(max_coverage_iterations):
|
|
1041
|
+
logger.info(f"📈 Coverage iteration {iteration + 1}/{max_coverage_iterations} for {module_name}")
|
|
1042
|
+
|
|
1043
|
+
# Write current tests
|
|
1044
|
+
test_file.write_text(test_content)
|
|
1045
|
+
|
|
1046
|
+
# Run coverage analysis
|
|
1047
|
+
coverage_result = self._run_coverage_analysis(test_file, source_file)
|
|
1048
|
+
|
|
1049
|
+
logger.info(f"Current coverage: {coverage_result.coverage:.1%}, target: {self.target_coverage:.0%}")
|
|
1050
|
+
|
|
1051
|
+
# Check if target reached
|
|
1052
|
+
if coverage_result.coverage >= self.target_coverage:
|
|
1053
|
+
logger.info(f"✅ Coverage target reached: {coverage_result.coverage:.1%}")
|
|
1054
|
+
return test_content
|
|
1055
|
+
|
|
1056
|
+
# Not enough progress
|
|
1057
|
+
if iteration > 0 and coverage_result.coverage <= 0.05:
|
|
1058
|
+
logger.warning("⚠️ Coverage not improving, stopping")
|
|
1059
|
+
break
|
|
1060
|
+
|
|
1061
|
+
# Identify uncovered code
|
|
1062
|
+
uncovered_code = self._extract_uncovered_lines(source_file, coverage_result.missing_lines)
|
|
1063
|
+
|
|
1064
|
+
# Ask Claude to add tests for uncovered lines
|
|
1065
|
+
refinement_prompt = f"""Current coverage: {coverage_result.coverage:.1%}
|
|
1066
|
+
Target coverage: {self.target_coverage:.0%}
|
|
1067
|
+
Missing: {len(coverage_result.missing_lines)} lines
|
|
1068
|
+
|
|
1069
|
+
UNCOVERED CODE:
|
|
1070
|
+
{uncovered_code[:3000]}
|
|
1071
|
+
|
|
1072
|
+
Please ADD tests to cover these specific uncovered lines. Requirements:
|
|
1073
|
+
1. Focus ONLY on the uncovered lines shown above
|
|
1074
|
+
2. Add new test methods to the existing test classes
|
|
1075
|
+
3. Return the COMPLETE test file with additions (not just new tests)
|
|
1076
|
+
4. Use appropriate mocking for external dependencies
|
|
1077
|
+
5. Keep existing tests intact - just add new ones
|
|
1078
|
+
|
|
1079
|
+
Return ONLY the complete Python test file with additions, no explanations."""
|
|
1080
|
+
|
|
1081
|
+
# Build conversation with caching
|
|
1082
|
+
messages = [
|
|
1083
|
+
{
|
|
1084
|
+
"role": "user",
|
|
1085
|
+
"content": [
|
|
1086
|
+
{"type": "text", "text": "You are an expert Python test engineer. Examples:", "cache_control": {"type": "ephemeral"}},
|
|
1087
|
+
{"type": "text", "text": self._get_example_tests(), "cache_control": {"type": "ephemeral"}},
|
|
1088
|
+
{"type": "text", "text": f"Source code:\n```python\n{source_code}\n```", "cache_control": {"type": "ephemeral"}},
|
|
1089
|
+
{"type": "text", "text": f"Current tests:\n```python\n{test_content}\n```"},
|
|
1090
|
+
{"type": "text", "text": refinement_prompt}
|
|
1091
|
+
]
|
|
1092
|
+
}
|
|
1093
|
+
]
|
|
1094
|
+
|
|
1095
|
+
# Call LLM for coverage improvement
|
|
1096
|
+
try:
|
|
1097
|
+
import anthropic
|
|
1098
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
1099
|
+
response = client.messages.create(
|
|
1100
|
+
model="claude-sonnet-4-5",
|
|
1101
|
+
max_tokens=40000, # Very generous total budget for coverage improvement
|
|
1102
|
+
thinking={"type": "enabled", "budget_tokens": 20000}, # Thorough thinking for coverage gaps
|
|
1103
|
+
messages=messages,
|
|
1104
|
+
timeout=900.0, # 15 minutes timeout for coverage-guided iterations
|
|
1105
|
+
)
|
|
1106
|
+
|
|
1107
|
+
refined_content = None
|
|
1108
|
+
for block in response.content:
|
|
1109
|
+
if block.type == "text":
|
|
1110
|
+
refined_content = block.text.strip()
|
|
1111
|
+
break
|
|
1112
|
+
|
|
1113
|
+
if not refined_content:
|
|
1114
|
+
logger.warning(f"No content in coverage refinement iteration {iteration + 1}")
|
|
1115
|
+
break
|
|
1116
|
+
|
|
1117
|
+
# Clean up
|
|
1118
|
+
if refined_content.startswith("```python"):
|
|
1119
|
+
refined_content = refined_content[len("```python"):].strip()
|
|
1120
|
+
if refined_content.endswith("```"):
|
|
1121
|
+
refined_content = refined_content[:-3].strip()
|
|
1122
|
+
|
|
1123
|
+
test_content = refined_content
|
|
1124
|
+
logger.info(f"🔄 Coverage iteration {iteration + 1} complete, retrying analysis...")
|
|
1125
|
+
|
|
1126
|
+
except Exception as e:
|
|
1127
|
+
logger.error(f"Coverage refinement error on iteration {iteration + 1}: {e}")
|
|
1128
|
+
break
|
|
1129
|
+
|
|
1130
|
+
# Return best attempt
|
|
1131
|
+
logger.info(f"Coverage-guided generation complete: final coverage ~{coverage_result.coverage:.1%}")
|
|
1132
|
+
return test_content
|
|
1133
|
+
|
|
1134
|
+
def _validate_test_file(self, test_file: Path) -> bool:
|
|
1135
|
+
"""Validate test file can be imported and has valid syntax.
|
|
1136
|
+
|
|
1137
|
+
Args:
|
|
1138
|
+
test_file: Path to test file
|
|
1139
|
+
|
|
1140
|
+
Returns:
|
|
1141
|
+
True if valid, False otherwise
|
|
1142
|
+
"""
|
|
1143
|
+
# Step 1: Check for syntax errors with ast.parse (fast)
|
|
1144
|
+
try:
|
|
1145
|
+
import ast
|
|
1146
|
+
content = test_file.read_text()
|
|
1147
|
+
ast.parse(content)
|
|
1148
|
+
logger.info(f"✓ Syntax check passed for {test_file.name}")
|
|
1149
|
+
except SyntaxError as e:
|
|
1150
|
+
logger.error(f"❌ Syntax error in {test_file.name} at line {e.lineno}: {e.msg}")
|
|
1151
|
+
return False
|
|
1152
|
+
except Exception as e:
|
|
1153
|
+
logger.error(f"❌ Cannot parse {test_file.name}: {e}")
|
|
1154
|
+
return False
|
|
1155
|
+
|
|
1156
|
+
# Step 2: Check if pytest can collect the tests
|
|
1157
|
+
try:
|
|
1158
|
+
result = subprocess.run(
|
|
1159
|
+
[sys.executable, "-m", "pytest", "--collect-only", str(test_file)],
|
|
1160
|
+
capture_output=True,
|
|
1161
|
+
text=True,
|
|
1162
|
+
timeout=10,
|
|
1163
|
+
)
|
|
1164
|
+
|
|
1165
|
+
if result.returncode != 0:
|
|
1166
|
+
logger.error(f"❌ Pytest collection failed for {test_file.name}")
|
|
1167
|
+
logger.error(f" Error: {result.stderr[:500]}")
|
|
1168
|
+
return False
|
|
1169
|
+
|
|
1170
|
+
logger.info(f"✓ Pytest collection passed for {test_file.name}")
|
|
1171
|
+
return True
|
|
1172
|
+
|
|
1173
|
+
except subprocess.TimeoutExpired:
|
|
1174
|
+
logger.error(f"❌ Validation timeout for {test_file.name}")
|
|
1175
|
+
return False
|
|
1176
|
+
except Exception as e:
|
|
1177
|
+
logger.error(f"❌ Validation exception for {test_file}: {e}")
|
|
1178
|
+
return False
|
|
1179
|
+
|
|
1180
|
+
def _count_tests(self) -> int:
|
|
1181
|
+
"""Count total tests in generated files.
|
|
1182
|
+
|
|
1183
|
+
Returns:
|
|
1184
|
+
Number of tests
|
|
1185
|
+
"""
|
|
1186
|
+
try:
|
|
1187
|
+
result = subprocess.run(
|
|
1188
|
+
[sys.executable, "-m", "pytest", "--collect-only", "-q", str(self.output_dir)],
|
|
1189
|
+
capture_output=True,
|
|
1190
|
+
text=True,
|
|
1191
|
+
timeout=30,
|
|
1192
|
+
)
|
|
1193
|
+
# Parse output like "123 tests collected"
|
|
1194
|
+
for line in result.stdout.split("\n"):
|
|
1195
|
+
if "tests collected" in line:
|
|
1196
|
+
return int(line.split()[0])
|
|
1197
|
+
return 0
|
|
1198
|
+
except Exception:
|
|
1199
|
+
return 0
|
|
1200
|
+
|
|
1201
|
+
|
|
1202
|
+
def run_batch_generation(
|
|
1203
|
+
batch_num: int,
|
|
1204
|
+
modules_json: str,
|
|
1205
|
+
enable_refinement: bool = True,
|
|
1206
|
+
enable_coverage_guided: bool = False
|
|
1207
|
+
) -> None:
|
|
1208
|
+
"""Run test generation for a batch.
|
|
1209
|
+
|
|
1210
|
+
Args:
|
|
1211
|
+
batch_num: Batch number
|
|
1212
|
+
modules_json: JSON string of modules to process
|
|
1213
|
+
enable_refinement: Enable Phase 2 multi-turn refinement (default: True)
|
|
1214
|
+
enable_coverage_guided: Enable Phase 3 coverage-guided generation (default: False)
|
|
1215
|
+
"""
|
|
1216
|
+
# Parse modules
|
|
1217
|
+
modules = json.loads(modules_json)
|
|
1218
|
+
|
|
1219
|
+
# Create agent with Phase 2 & 3 configuration
|
|
1220
|
+
agent_id = f"test-gen-batch{batch_num}"
|
|
1221
|
+
generator = AutonomousTestGenerator(
|
|
1222
|
+
agent_id,
|
|
1223
|
+
batch_num,
|
|
1224
|
+
modules,
|
|
1225
|
+
enable_refinement=enable_refinement,
|
|
1226
|
+
enable_coverage_guided=enable_coverage_guided
|
|
1227
|
+
)
|
|
1228
|
+
|
|
1229
|
+
# Generate tests
|
|
1230
|
+
print(f"Starting autonomous test generation for batch {batch_num}")
|
|
1231
|
+
print(f"Modules to process: {len(modules)}")
|
|
1232
|
+
print(f"Agent ID: {agent_id}")
|
|
1233
|
+
print("\nENHANCEMENTS:")
|
|
1234
|
+
print(" Phase 1: Extended thinking + Prompt caching + Workflow detection")
|
|
1235
|
+
print(f" Phase 2: Multi-turn refinement = {'ENABLED' if enable_refinement else 'DISABLED'}")
|
|
1236
|
+
print(f" Phase 3: Coverage-guided = {'ENABLED' if enable_coverage_guided else 'DISABLED'}")
|
|
1237
|
+
print("\nMonitor at: http://localhost:8000\n")
|
|
1238
|
+
|
|
1239
|
+
results = generator.generate_all()
|
|
1240
|
+
|
|
1241
|
+
# Report results
|
|
1242
|
+
print(f"\n{'='*60}")
|
|
1243
|
+
print(f"Batch {batch_num} Complete!")
|
|
1244
|
+
print(f"{'='*60}")
|
|
1245
|
+
print(f"Modules processed: {results['completed']}/{results['total_modules']}")
|
|
1246
|
+
print(f"Tests generated: {results['tests_generated']}")
|
|
1247
|
+
print(f"Files created: {len(results['files_created'])}")
|
|
1248
|
+
print(f"Failed: {results['failed']}")
|
|
1249
|
+
|
|
1250
|
+
|
|
1251
|
+
if __name__ == "__main__":
|
|
1252
|
+
import sys
|
|
1253
|
+
|
|
1254
|
+
if len(sys.argv) < 3:
|
|
1255
|
+
print("Usage: python -m attune.workflows.autonomous_test_gen <batch_num> <modules_json> [--no-refinement] [--coverage-guided]")
|
|
1256
|
+
print("\nOptions:")
|
|
1257
|
+
print(" --no-refinement Disable Phase 2 multi-turn refinement")
|
|
1258
|
+
print(" --coverage-guided Enable Phase 3 coverage-guided generation (slower)")
|
|
1259
|
+
sys.exit(1)
|
|
1260
|
+
|
|
1261
|
+
batch_num = int(sys.argv[1])
|
|
1262
|
+
modules_json = sys.argv[2]
|
|
1263
|
+
|
|
1264
|
+
# Parse optional flags
|
|
1265
|
+
enable_refinement = "--no-refinement" not in sys.argv
|
|
1266
|
+
enable_coverage_guided = "--coverage-guided" in sys.argv
|
|
1267
|
+
|
|
1268
|
+
run_batch_generation(batch_num, modules_json, enable_refinement, enable_coverage_guided)
|