attune-ai 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- attune/__init__.py +358 -0
- attune/adaptive/__init__.py +13 -0
- attune/adaptive/task_complexity.py +127 -0
- attune/agent_monitoring.py +414 -0
- attune/cache/__init__.py +117 -0
- attune/cache/base.py +166 -0
- attune/cache/dependency_manager.py +256 -0
- attune/cache/hash_only.py +251 -0
- attune/cache/hybrid.py +457 -0
- attune/cache/storage.py +285 -0
- attune/cache_monitor.py +356 -0
- attune/cache_stats.py +298 -0
- attune/cli/__init__.py +152 -0
- attune/cli/__main__.py +12 -0
- attune/cli/commands/__init__.py +1 -0
- attune/cli/commands/batch.py +264 -0
- attune/cli/commands/cache.py +248 -0
- attune/cli/commands/help.py +331 -0
- attune/cli/commands/info.py +140 -0
- attune/cli/commands/inspect.py +436 -0
- attune/cli/commands/inspection.py +57 -0
- attune/cli/commands/memory.py +48 -0
- attune/cli/commands/metrics.py +92 -0
- attune/cli/commands/orchestrate.py +184 -0
- attune/cli/commands/patterns.py +207 -0
- attune/cli/commands/profiling.py +202 -0
- attune/cli/commands/provider.py +98 -0
- attune/cli/commands/routing.py +285 -0
- attune/cli/commands/setup.py +96 -0
- attune/cli/commands/status.py +235 -0
- attune/cli/commands/sync.py +166 -0
- attune/cli/commands/tier.py +121 -0
- attune/cli/commands/utilities.py +114 -0
- attune/cli/commands/workflow.py +579 -0
- attune/cli/core.py +32 -0
- attune/cli/parsers/__init__.py +68 -0
- attune/cli/parsers/batch.py +118 -0
- attune/cli/parsers/cache.py +65 -0
- attune/cli/parsers/help.py +41 -0
- attune/cli/parsers/info.py +26 -0
- attune/cli/parsers/inspect.py +66 -0
- attune/cli/parsers/metrics.py +42 -0
- attune/cli/parsers/orchestrate.py +61 -0
- attune/cli/parsers/patterns.py +54 -0
- attune/cli/parsers/provider.py +40 -0
- attune/cli/parsers/routing.py +110 -0
- attune/cli/parsers/setup.py +42 -0
- attune/cli/parsers/status.py +47 -0
- attune/cli/parsers/sync.py +31 -0
- attune/cli/parsers/tier.py +33 -0
- attune/cli/parsers/workflow.py +77 -0
- attune/cli/utils/__init__.py +1 -0
- attune/cli/utils/data.py +242 -0
- attune/cli/utils/helpers.py +68 -0
- attune/cli_legacy.py +3957 -0
- attune/cli_minimal.py +1159 -0
- attune/cli_router.py +437 -0
- attune/cli_unified.py +814 -0
- attune/config/__init__.py +66 -0
- attune/config/xml_config.py +286 -0
- attune/config.py +545 -0
- attune/coordination.py +870 -0
- attune/core.py +1511 -0
- attune/core_modules/__init__.py +15 -0
- attune/cost_tracker.py +626 -0
- attune/dashboard/__init__.py +41 -0
- attune/dashboard/app.py +512 -0
- attune/dashboard/simple_server.py +435 -0
- attune/dashboard/standalone_server.py +547 -0
- attune/discovery.py +306 -0
- attune/emergence.py +306 -0
- attune/exceptions.py +123 -0
- attune/feedback_loops.py +373 -0
- attune/hot_reload/README.md +473 -0
- attune/hot_reload/__init__.py +62 -0
- attune/hot_reload/config.py +83 -0
- attune/hot_reload/integration.py +229 -0
- attune/hot_reload/reloader.py +298 -0
- attune/hot_reload/watcher.py +183 -0
- attune/hot_reload/websocket.py +177 -0
- attune/levels.py +577 -0
- attune/leverage_points.py +441 -0
- attune/logging_config.py +261 -0
- attune/mcp/__init__.py +10 -0
- attune/mcp/server.py +506 -0
- attune/memory/__init__.py +237 -0
- attune/memory/claude_memory.py +469 -0
- attune/memory/config.py +224 -0
- attune/memory/control_panel.py +1290 -0
- attune/memory/control_panel_support.py +145 -0
- attune/memory/cross_session.py +845 -0
- attune/memory/edges.py +179 -0
- attune/memory/encryption.py +159 -0
- attune/memory/file_session.py +770 -0
- attune/memory/graph.py +570 -0
- attune/memory/long_term.py +913 -0
- attune/memory/long_term_types.py +99 -0
- attune/memory/mixins/__init__.py +25 -0
- attune/memory/mixins/backend_init_mixin.py +249 -0
- attune/memory/mixins/capabilities_mixin.py +208 -0
- attune/memory/mixins/handoff_mixin.py +208 -0
- attune/memory/mixins/lifecycle_mixin.py +49 -0
- attune/memory/mixins/long_term_mixin.py +352 -0
- attune/memory/mixins/promotion_mixin.py +109 -0
- attune/memory/mixins/short_term_mixin.py +182 -0
- attune/memory/nodes.py +179 -0
- attune/memory/redis_bootstrap.py +540 -0
- attune/memory/security/__init__.py +31 -0
- attune/memory/security/audit_logger.py +932 -0
- attune/memory/security/pii_scrubber.py +640 -0
- attune/memory/security/secrets_detector.py +678 -0
- attune/memory/short_term.py +2192 -0
- attune/memory/simple_storage.py +302 -0
- attune/memory/storage/__init__.py +15 -0
- attune/memory/storage_backend.py +167 -0
- attune/memory/summary_index.py +583 -0
- attune/memory/types.py +446 -0
- attune/memory/unified.py +182 -0
- attune/meta_workflows/__init__.py +74 -0
- attune/meta_workflows/agent_creator.py +248 -0
- attune/meta_workflows/builtin_templates.py +567 -0
- attune/meta_workflows/cli_commands/__init__.py +56 -0
- attune/meta_workflows/cli_commands/agent_commands.py +321 -0
- attune/meta_workflows/cli_commands/analytics_commands.py +442 -0
- attune/meta_workflows/cli_commands/config_commands.py +232 -0
- attune/meta_workflows/cli_commands/memory_commands.py +182 -0
- attune/meta_workflows/cli_commands/template_commands.py +354 -0
- attune/meta_workflows/cli_commands/workflow_commands.py +382 -0
- attune/meta_workflows/cli_meta_workflows.py +59 -0
- attune/meta_workflows/form_engine.py +292 -0
- attune/meta_workflows/intent_detector.py +409 -0
- attune/meta_workflows/models.py +569 -0
- attune/meta_workflows/pattern_learner.py +738 -0
- attune/meta_workflows/plan_generator.py +384 -0
- attune/meta_workflows/session_context.py +397 -0
- attune/meta_workflows/template_registry.py +229 -0
- attune/meta_workflows/workflow.py +984 -0
- attune/metrics/__init__.py +12 -0
- attune/metrics/collector.py +31 -0
- attune/metrics/prompt_metrics.py +194 -0
- attune/models/__init__.py +172 -0
- attune/models/__main__.py +13 -0
- attune/models/adaptive_routing.py +437 -0
- attune/models/auth_cli.py +444 -0
- attune/models/auth_strategy.py +450 -0
- attune/models/cli.py +655 -0
- attune/models/empathy_executor.py +354 -0
- attune/models/executor.py +257 -0
- attune/models/fallback.py +762 -0
- attune/models/provider_config.py +282 -0
- attune/models/registry.py +472 -0
- attune/models/tasks.py +359 -0
- attune/models/telemetry/__init__.py +71 -0
- attune/models/telemetry/analytics.py +594 -0
- attune/models/telemetry/backend.py +196 -0
- attune/models/telemetry/data_models.py +431 -0
- attune/models/telemetry/storage.py +489 -0
- attune/models/token_estimator.py +420 -0
- attune/models/validation.py +280 -0
- attune/monitoring/__init__.py +52 -0
- attune/monitoring/alerts.py +946 -0
- attune/monitoring/alerts_cli.py +448 -0
- attune/monitoring/multi_backend.py +271 -0
- attune/monitoring/otel_backend.py +362 -0
- attune/optimization/__init__.py +19 -0
- attune/optimization/context_optimizer.py +272 -0
- attune/orchestration/__init__.py +67 -0
- attune/orchestration/agent_templates.py +707 -0
- attune/orchestration/config_store.py +499 -0
- attune/orchestration/execution_strategies.py +2111 -0
- attune/orchestration/meta_orchestrator.py +1168 -0
- attune/orchestration/pattern_learner.py +696 -0
- attune/orchestration/real_tools.py +931 -0
- attune/pattern_cache.py +187 -0
- attune/pattern_library.py +542 -0
- attune/patterns/debugging/all_patterns.json +81 -0
- attune/patterns/debugging/workflow_20260107_1770825e.json +77 -0
- attune/patterns/refactoring_memory.json +89 -0
- attune/persistence.py +564 -0
- attune/platform_utils.py +265 -0
- attune/plugins/__init__.py +28 -0
- attune/plugins/base.py +361 -0
- attune/plugins/registry.py +268 -0
- attune/project_index/__init__.py +32 -0
- attune/project_index/cli.py +335 -0
- attune/project_index/index.py +667 -0
- attune/project_index/models.py +504 -0
- attune/project_index/reports.py +474 -0
- attune/project_index/scanner.py +777 -0
- attune/project_index/scanner_parallel.py +291 -0
- attune/prompts/__init__.py +61 -0
- attune/prompts/config.py +77 -0
- attune/prompts/context.py +177 -0
- attune/prompts/parser.py +285 -0
- attune/prompts/registry.py +313 -0
- attune/prompts/templates.py +208 -0
- attune/redis_config.py +302 -0
- attune/redis_memory.py +799 -0
- attune/resilience/__init__.py +56 -0
- attune/resilience/circuit_breaker.py +256 -0
- attune/resilience/fallback.py +179 -0
- attune/resilience/health.py +300 -0
- attune/resilience/retry.py +209 -0
- attune/resilience/timeout.py +135 -0
- attune/routing/__init__.py +43 -0
- attune/routing/chain_executor.py +433 -0
- attune/routing/classifier.py +217 -0
- attune/routing/smart_router.py +234 -0
- attune/routing/workflow_registry.py +343 -0
- attune/scaffolding/README.md +589 -0
- attune/scaffolding/__init__.py +35 -0
- attune/scaffolding/__main__.py +14 -0
- attune/scaffolding/cli.py +240 -0
- attune/scaffolding/templates/base_wizard.py.jinja2 +121 -0
- attune/scaffolding/templates/coach_wizard.py.jinja2 +321 -0
- attune/scaffolding/templates/domain_wizard.py.jinja2 +408 -0
- attune/scaffolding/templates/linear_flow_wizard.py.jinja2 +203 -0
- attune/socratic/__init__.py +256 -0
- attune/socratic/ab_testing.py +958 -0
- attune/socratic/blueprint.py +533 -0
- attune/socratic/cli.py +703 -0
- attune/socratic/collaboration.py +1114 -0
- attune/socratic/domain_templates.py +924 -0
- attune/socratic/embeddings.py +738 -0
- attune/socratic/engine.py +794 -0
- attune/socratic/explainer.py +682 -0
- attune/socratic/feedback.py +772 -0
- attune/socratic/forms.py +629 -0
- attune/socratic/generator.py +732 -0
- attune/socratic/llm_analyzer.py +637 -0
- attune/socratic/mcp_server.py +702 -0
- attune/socratic/session.py +312 -0
- attune/socratic/storage.py +667 -0
- attune/socratic/success.py +730 -0
- attune/socratic/visual_editor.py +860 -0
- attune/socratic/web_ui.py +958 -0
- attune/telemetry/__init__.py +39 -0
- attune/telemetry/agent_coordination.py +475 -0
- attune/telemetry/agent_tracking.py +367 -0
- attune/telemetry/approval_gates.py +545 -0
- attune/telemetry/cli.py +1231 -0
- attune/telemetry/commands/__init__.py +14 -0
- attune/telemetry/commands/dashboard_commands.py +696 -0
- attune/telemetry/event_streaming.py +409 -0
- attune/telemetry/feedback_loop.py +567 -0
- attune/telemetry/usage_tracker.py +591 -0
- attune/templates.py +754 -0
- attune/test_generator/__init__.py +38 -0
- attune/test_generator/__main__.py +14 -0
- attune/test_generator/cli.py +234 -0
- attune/test_generator/generator.py +355 -0
- attune/test_generator/risk_analyzer.py +216 -0
- attune/test_generator/templates/unit_test.py.jinja2 +272 -0
- attune/tier_recommender.py +384 -0
- attune/tools.py +183 -0
- attune/trust/__init__.py +28 -0
- attune/trust/circuit_breaker.py +579 -0
- attune/trust_building.py +527 -0
- attune/validation/__init__.py +19 -0
- attune/validation/xml_validator.py +281 -0
- attune/vscode_bridge.py +173 -0
- attune/workflow_commands.py +780 -0
- attune/workflow_patterns/__init__.py +33 -0
- attune/workflow_patterns/behavior.py +249 -0
- attune/workflow_patterns/core.py +76 -0
- attune/workflow_patterns/output.py +99 -0
- attune/workflow_patterns/registry.py +255 -0
- attune/workflow_patterns/structural.py +288 -0
- attune/workflows/__init__.py +539 -0
- attune/workflows/autonomous_test_gen.py +1268 -0
- attune/workflows/base.py +2667 -0
- attune/workflows/batch_processing.py +342 -0
- attune/workflows/bug_predict.py +1084 -0
- attune/workflows/builder.py +273 -0
- attune/workflows/caching.py +253 -0
- attune/workflows/code_review.py +1048 -0
- attune/workflows/code_review_adapters.py +312 -0
- attune/workflows/code_review_pipeline.py +722 -0
- attune/workflows/config.py +645 -0
- attune/workflows/dependency_check.py +644 -0
- attune/workflows/document_gen/__init__.py +25 -0
- attune/workflows/document_gen/config.py +30 -0
- attune/workflows/document_gen/report_formatter.py +162 -0
- attune/workflows/document_gen/workflow.py +1426 -0
- attune/workflows/document_manager.py +216 -0
- attune/workflows/document_manager_README.md +134 -0
- attune/workflows/documentation_orchestrator.py +1205 -0
- attune/workflows/history.py +510 -0
- attune/workflows/keyboard_shortcuts/__init__.py +39 -0
- attune/workflows/keyboard_shortcuts/generators.py +391 -0
- attune/workflows/keyboard_shortcuts/parsers.py +416 -0
- attune/workflows/keyboard_shortcuts/prompts.py +295 -0
- attune/workflows/keyboard_shortcuts/schema.py +193 -0
- attune/workflows/keyboard_shortcuts/workflow.py +509 -0
- attune/workflows/llm_base.py +363 -0
- attune/workflows/manage_docs.py +87 -0
- attune/workflows/manage_docs_README.md +134 -0
- attune/workflows/manage_documentation.py +821 -0
- attune/workflows/new_sample_workflow1.py +149 -0
- attune/workflows/new_sample_workflow1_README.md +150 -0
- attune/workflows/orchestrated_health_check.py +849 -0
- attune/workflows/orchestrated_release_prep.py +600 -0
- attune/workflows/output.py +413 -0
- attune/workflows/perf_audit.py +863 -0
- attune/workflows/pr_review.py +762 -0
- attune/workflows/progress.py +785 -0
- attune/workflows/progress_server.py +322 -0
- attune/workflows/progressive/README 2.md +454 -0
- attune/workflows/progressive/README.md +454 -0
- attune/workflows/progressive/__init__.py +82 -0
- attune/workflows/progressive/cli.py +219 -0
- attune/workflows/progressive/core.py +488 -0
- attune/workflows/progressive/orchestrator.py +723 -0
- attune/workflows/progressive/reports.py +520 -0
- attune/workflows/progressive/telemetry.py +274 -0
- attune/workflows/progressive/test_gen.py +495 -0
- attune/workflows/progressive/workflow.py +589 -0
- attune/workflows/refactor_plan.py +694 -0
- attune/workflows/release_prep.py +895 -0
- attune/workflows/release_prep_crew.py +969 -0
- attune/workflows/research_synthesis.py +404 -0
- attune/workflows/routing.py +168 -0
- attune/workflows/secure_release.py +593 -0
- attune/workflows/security_adapters.py +297 -0
- attune/workflows/security_audit.py +1329 -0
- attune/workflows/security_audit_phase3.py +355 -0
- attune/workflows/seo_optimization.py +633 -0
- attune/workflows/step_config.py +234 -0
- attune/workflows/telemetry_mixin.py +269 -0
- attune/workflows/test5.py +125 -0
- attune/workflows/test5_README.md +158 -0
- attune/workflows/test_coverage_boost_crew.py +849 -0
- attune/workflows/test_gen/__init__.py +52 -0
- attune/workflows/test_gen/ast_analyzer.py +249 -0
- attune/workflows/test_gen/config.py +88 -0
- attune/workflows/test_gen/data_models.py +38 -0
- attune/workflows/test_gen/report_formatter.py +289 -0
- attune/workflows/test_gen/test_templates.py +381 -0
- attune/workflows/test_gen/workflow.py +655 -0
- attune/workflows/test_gen.py +54 -0
- attune/workflows/test_gen_behavioral.py +477 -0
- attune/workflows/test_gen_parallel.py +341 -0
- attune/workflows/test_lifecycle.py +526 -0
- attune/workflows/test_maintenance.py +627 -0
- attune/workflows/test_maintenance_cli.py +590 -0
- attune/workflows/test_maintenance_crew.py +840 -0
- attune/workflows/test_runner.py +622 -0
- attune/workflows/tier_tracking.py +531 -0
- attune/workflows/xml_enhanced_crew.py +285 -0
- attune_ai-2.0.0.dist-info/METADATA +1026 -0
- attune_ai-2.0.0.dist-info/RECORD +457 -0
- attune_ai-2.0.0.dist-info/WHEEL +5 -0
- attune_ai-2.0.0.dist-info/entry_points.txt +26 -0
- attune_ai-2.0.0.dist-info/licenses/LICENSE +201 -0
- attune_ai-2.0.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
- attune_ai-2.0.0.dist-info/top_level.txt +5 -0
- attune_healthcare/__init__.py +13 -0
- attune_healthcare/monitors/__init__.py +9 -0
- attune_healthcare/monitors/clinical_protocol_monitor.py +315 -0
- attune_healthcare/monitors/monitoring/__init__.py +44 -0
- attune_healthcare/monitors/monitoring/protocol_checker.py +300 -0
- attune_healthcare/monitors/monitoring/protocol_loader.py +214 -0
- attune_healthcare/monitors/monitoring/sensor_parsers.py +306 -0
- attune_healthcare/monitors/monitoring/trajectory_analyzer.py +389 -0
- attune_llm/README.md +553 -0
- attune_llm/__init__.py +28 -0
- attune_llm/agent_factory/__init__.py +53 -0
- attune_llm/agent_factory/adapters/__init__.py +85 -0
- attune_llm/agent_factory/adapters/autogen_adapter.py +312 -0
- attune_llm/agent_factory/adapters/crewai_adapter.py +483 -0
- attune_llm/agent_factory/adapters/haystack_adapter.py +298 -0
- attune_llm/agent_factory/adapters/langchain_adapter.py +362 -0
- attune_llm/agent_factory/adapters/langgraph_adapter.py +333 -0
- attune_llm/agent_factory/adapters/native.py +228 -0
- attune_llm/agent_factory/adapters/wizard_adapter.py +423 -0
- attune_llm/agent_factory/base.py +305 -0
- attune_llm/agent_factory/crews/__init__.py +67 -0
- attune_llm/agent_factory/crews/code_review.py +1113 -0
- attune_llm/agent_factory/crews/health_check.py +1262 -0
- attune_llm/agent_factory/crews/refactoring.py +1128 -0
- attune_llm/agent_factory/crews/security_audit.py +1018 -0
- attune_llm/agent_factory/decorators.py +287 -0
- attune_llm/agent_factory/factory.py +558 -0
- attune_llm/agent_factory/framework.py +193 -0
- attune_llm/agent_factory/memory_integration.py +328 -0
- attune_llm/agent_factory/resilient.py +320 -0
- attune_llm/agents_md/__init__.py +22 -0
- attune_llm/agents_md/loader.py +218 -0
- attune_llm/agents_md/parser.py +271 -0
- attune_llm/agents_md/registry.py +307 -0
- attune_llm/claude_memory.py +466 -0
- attune_llm/cli/__init__.py +8 -0
- attune_llm/cli/sync_claude.py +487 -0
- attune_llm/code_health.py +1313 -0
- attune_llm/commands/__init__.py +51 -0
- attune_llm/commands/context.py +375 -0
- attune_llm/commands/loader.py +301 -0
- attune_llm/commands/models.py +231 -0
- attune_llm/commands/parser.py +371 -0
- attune_llm/commands/registry.py +429 -0
- attune_llm/config/__init__.py +29 -0
- attune_llm/config/unified.py +291 -0
- attune_llm/context/__init__.py +22 -0
- attune_llm/context/compaction.py +455 -0
- attune_llm/context/manager.py +434 -0
- attune_llm/contextual_patterns.py +361 -0
- attune_llm/core.py +907 -0
- attune_llm/git_pattern_extractor.py +435 -0
- attune_llm/hooks/__init__.py +24 -0
- attune_llm/hooks/config.py +306 -0
- attune_llm/hooks/executor.py +289 -0
- attune_llm/hooks/registry.py +302 -0
- attune_llm/hooks/scripts/__init__.py +39 -0
- attune_llm/hooks/scripts/evaluate_session.py +201 -0
- attune_llm/hooks/scripts/first_time_init.py +285 -0
- attune_llm/hooks/scripts/pre_compact.py +207 -0
- attune_llm/hooks/scripts/session_end.py +183 -0
- attune_llm/hooks/scripts/session_start.py +163 -0
- attune_llm/hooks/scripts/suggest_compact.py +225 -0
- attune_llm/learning/__init__.py +30 -0
- attune_llm/learning/evaluator.py +438 -0
- attune_llm/learning/extractor.py +514 -0
- attune_llm/learning/storage.py +560 -0
- attune_llm/levels.py +227 -0
- attune_llm/pattern_confidence.py +414 -0
- attune_llm/pattern_resolver.py +272 -0
- attune_llm/pattern_summary.py +350 -0
- attune_llm/providers.py +967 -0
- attune_llm/routing/__init__.py +32 -0
- attune_llm/routing/model_router.py +362 -0
- attune_llm/security/IMPLEMENTATION_SUMMARY.md +413 -0
- attune_llm/security/PHASE2_COMPLETE.md +384 -0
- attune_llm/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +271 -0
- attune_llm/security/QUICK_REFERENCE.md +316 -0
- attune_llm/security/README.md +262 -0
- attune_llm/security/__init__.py +62 -0
- attune_llm/security/audit_logger.py +929 -0
- attune_llm/security/audit_logger_example.py +152 -0
- attune_llm/security/pii_scrubber.py +640 -0
- attune_llm/security/secrets_detector.py +678 -0
- attune_llm/security/secrets_detector_example.py +304 -0
- attune_llm/security/secure_memdocs.py +1192 -0
- attune_llm/security/secure_memdocs_example.py +278 -0
- attune_llm/session_status.py +745 -0
- attune_llm/state.py +246 -0
- attune_llm/utils/__init__.py +5 -0
- attune_llm/utils/tokens.py +349 -0
- attune_software/SOFTWARE_PLUGIN_README.md +57 -0
- attune_software/__init__.py +13 -0
- attune_software/cli/__init__.py +120 -0
- attune_software/cli/inspect.py +362 -0
- attune_software/cli.py +574 -0
- attune_software/plugin.py +188 -0
- workflow_scaffolding/__init__.py +11 -0
- workflow_scaffolding/__main__.py +12 -0
- workflow_scaffolding/cli.py +206 -0
- workflow_scaffolding/generator.py +265 -0
|
@@ -0,0 +1,730 @@
|
|
|
1
|
+
"""Success Criteria and Measurement System
|
|
2
|
+
|
|
3
|
+
Define and measure success for generated workflows.
|
|
4
|
+
|
|
5
|
+
Success criteria allow users to:
|
|
6
|
+
1. Define what "done" looks like for their workflow
|
|
7
|
+
2. Track progress toward goals
|
|
8
|
+
3. Measure effectiveness over time
|
|
9
|
+
4. Iterate and improve workflows
|
|
10
|
+
|
|
11
|
+
Copyright 2026 Smart-AI-Memory
|
|
12
|
+
Licensed under Fair Source License 0.9
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from collections.abc import Callable
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class MetricType(Enum):
|
|
25
|
+
"""Types of success metrics."""
|
|
26
|
+
|
|
27
|
+
# Numeric metrics
|
|
28
|
+
COUNT = "count" # Integer count (e.g., issues found)
|
|
29
|
+
PERCENTAGE = "percentage" # 0-100 percentage
|
|
30
|
+
RATIO = "ratio" # 0-1 ratio
|
|
31
|
+
DURATION = "duration" # Time in seconds
|
|
32
|
+
|
|
33
|
+
# Boolean metrics
|
|
34
|
+
BOOLEAN = "boolean" # True/False
|
|
35
|
+
|
|
36
|
+
# Comparison metrics
|
|
37
|
+
IMPROVEMENT = "improvement" # Before/after comparison
|
|
38
|
+
THRESHOLD = "threshold" # Above/below threshold
|
|
39
|
+
|
|
40
|
+
# Quality metrics
|
|
41
|
+
SCORE = "score" # 0-10 quality score
|
|
42
|
+
RATING = "rating" # Categorical (good, moderate, poor)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class MetricDirection(Enum):
|
|
46
|
+
"""Which direction indicates success."""
|
|
47
|
+
|
|
48
|
+
HIGHER_IS_BETTER = "higher" # More issues found = better
|
|
49
|
+
LOWER_IS_BETTER = "lower" # Less time = better
|
|
50
|
+
TARGET_VALUE = "target" # Specific value is best
|
|
51
|
+
RANGE = "range" # Within a range is success
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class SuccessMetric:
|
|
56
|
+
"""A single success metric definition.
|
|
57
|
+
|
|
58
|
+
Example:
|
|
59
|
+
>>> metric = SuccessMetric(
|
|
60
|
+
... id="security_issues_found",
|
|
61
|
+
... name="Security Issues Detected",
|
|
62
|
+
... description="Number of security vulnerabilities identified",
|
|
63
|
+
... metric_type=MetricType.COUNT,
|
|
64
|
+
... direction=MetricDirection.HIGHER_IS_BETTER,
|
|
65
|
+
... target_value=None, # No specific target
|
|
66
|
+
... minimum_value=0,
|
|
67
|
+
... unit="issues"
|
|
68
|
+
... )
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
# Unique metric identifier
|
|
72
|
+
id: str
|
|
73
|
+
|
|
74
|
+
# Display name
|
|
75
|
+
name: str
|
|
76
|
+
|
|
77
|
+
# Description of what this measures
|
|
78
|
+
description: str
|
|
79
|
+
|
|
80
|
+
# Type of metric
|
|
81
|
+
metric_type: MetricType
|
|
82
|
+
|
|
83
|
+
# Which direction indicates success
|
|
84
|
+
direction: MetricDirection = MetricDirection.HIGHER_IS_BETTER
|
|
85
|
+
|
|
86
|
+
# Target value (for TARGET_VALUE direction)
|
|
87
|
+
target_value: float | None = None
|
|
88
|
+
|
|
89
|
+
# Minimum acceptable value
|
|
90
|
+
minimum_value: float | None = None
|
|
91
|
+
|
|
92
|
+
# Maximum acceptable value
|
|
93
|
+
maximum_value: float | None = None
|
|
94
|
+
|
|
95
|
+
# Unit of measurement
|
|
96
|
+
unit: str = ""
|
|
97
|
+
|
|
98
|
+
# Weight for composite scoring (0-1)
|
|
99
|
+
weight: float = 1.0
|
|
100
|
+
|
|
101
|
+
# Whether this is a primary success indicator
|
|
102
|
+
is_primary: bool = False
|
|
103
|
+
|
|
104
|
+
# How to extract this metric from workflow output
|
|
105
|
+
extraction_path: str = "" # JSONPath-like expression
|
|
106
|
+
|
|
107
|
+
# Custom extraction function
|
|
108
|
+
extractor: Callable[[dict], float | bool] | None = None
|
|
109
|
+
|
|
110
|
+
def evaluate(
|
|
111
|
+
self,
|
|
112
|
+
value: float | bool,
|
|
113
|
+
baseline: float | bool | None = None,
|
|
114
|
+
) -> tuple[bool, float, str]:
|
|
115
|
+
"""Evaluate if a value meets this metric's success criteria.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
value: The measured value
|
|
119
|
+
baseline: Optional baseline for comparison
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Tuple of (met_criteria, score 0-1, explanation)
|
|
123
|
+
"""
|
|
124
|
+
# Boolean metrics
|
|
125
|
+
if self.metric_type == MetricType.BOOLEAN:
|
|
126
|
+
if isinstance(value, bool):
|
|
127
|
+
met = value
|
|
128
|
+
score = 1.0 if value else 0.0
|
|
129
|
+
explanation = "Criteria met" if met else "Criteria not met"
|
|
130
|
+
return met, score, explanation
|
|
131
|
+
|
|
132
|
+
# Ensure numeric value for other types
|
|
133
|
+
if not isinstance(value, (int, float)):
|
|
134
|
+
return False, 0.0, f"Expected numeric value, got {type(value)}"
|
|
135
|
+
|
|
136
|
+
# Calculate score based on direction
|
|
137
|
+
if self.direction == MetricDirection.HIGHER_IS_BETTER:
|
|
138
|
+
if self.minimum_value is not None:
|
|
139
|
+
met = value >= self.minimum_value
|
|
140
|
+
# Score is ratio of value to minimum (capped at 1.0)
|
|
141
|
+
score = min(value / self.minimum_value, 1.0) if self.minimum_value > 0 else 1.0
|
|
142
|
+
else:
|
|
143
|
+
met = True # No minimum, always met
|
|
144
|
+
score = 1.0
|
|
145
|
+
|
|
146
|
+
elif self.direction == MetricDirection.LOWER_IS_BETTER:
|
|
147
|
+
if self.maximum_value is not None:
|
|
148
|
+
met = value <= self.maximum_value
|
|
149
|
+
# Score is inverse ratio (lower is better)
|
|
150
|
+
score = (
|
|
151
|
+
max(1.0 - (value / self.maximum_value), 0.0) if self.maximum_value > 0 else 1.0
|
|
152
|
+
)
|
|
153
|
+
else:
|
|
154
|
+
met = True
|
|
155
|
+
score = 1.0
|
|
156
|
+
|
|
157
|
+
elif self.direction == MetricDirection.TARGET_VALUE:
|
|
158
|
+
if self.target_value is not None:
|
|
159
|
+
deviation = abs(value - self.target_value)
|
|
160
|
+
# Allow 10% tolerance by default
|
|
161
|
+
tolerance = self.target_value * 0.1 if self.target_value > 0 else 1.0
|
|
162
|
+
met = deviation <= tolerance
|
|
163
|
+
score = max(1.0 - (deviation / max(tolerance, 0.001)), 0.0)
|
|
164
|
+
else:
|
|
165
|
+
met = True
|
|
166
|
+
score = 1.0
|
|
167
|
+
|
|
168
|
+
elif self.direction == MetricDirection.RANGE:
|
|
169
|
+
min_val = self.minimum_value or float("-inf")
|
|
170
|
+
max_val = self.maximum_value or float("inf")
|
|
171
|
+
met = min_val <= value <= max_val
|
|
172
|
+
if met:
|
|
173
|
+
# Score based on position in range (center = best)
|
|
174
|
+
range_size = max_val - min_val
|
|
175
|
+
if range_size > 0 and range_size != float("inf"):
|
|
176
|
+
center = (min_val + max_val) / 2
|
|
177
|
+
distance_from_center = abs(value - center)
|
|
178
|
+
score = 1.0 - (distance_from_center / (range_size / 2))
|
|
179
|
+
else:
|
|
180
|
+
score = 1.0
|
|
181
|
+
else:
|
|
182
|
+
score = 0.0
|
|
183
|
+
else:
|
|
184
|
+
met = True
|
|
185
|
+
score = 1.0
|
|
186
|
+
|
|
187
|
+
# Generate explanation
|
|
188
|
+
explanation = self._generate_explanation(value, met, score, baseline)
|
|
189
|
+
|
|
190
|
+
return met, score, explanation
|
|
191
|
+
|
|
192
|
+
def _generate_explanation(
|
|
193
|
+
self,
|
|
194
|
+
value: float | bool,
|
|
195
|
+
met: bool,
|
|
196
|
+
score: float,
|
|
197
|
+
baseline: float | bool | None,
|
|
198
|
+
) -> str:
|
|
199
|
+
"""Generate human-readable explanation of the evaluation."""
|
|
200
|
+
parts = []
|
|
201
|
+
|
|
202
|
+
# Value statement
|
|
203
|
+
if self.unit:
|
|
204
|
+
parts.append(f"Measured: {value} {self.unit}")
|
|
205
|
+
else:
|
|
206
|
+
parts.append(f"Measured: {value}")
|
|
207
|
+
|
|
208
|
+
# Comparison to baseline
|
|
209
|
+
if (
|
|
210
|
+
baseline is not None
|
|
211
|
+
and isinstance(value, (int, float))
|
|
212
|
+
and isinstance(baseline, (int, float))
|
|
213
|
+
):
|
|
214
|
+
diff = value - baseline
|
|
215
|
+
pct_change = (diff / baseline * 100) if baseline != 0 else 0
|
|
216
|
+
direction = "↑" if diff > 0 else "↓" if diff < 0 else "→"
|
|
217
|
+
parts.append(f"vs baseline: {direction} {abs(pct_change):.1f}%")
|
|
218
|
+
|
|
219
|
+
# Target comparison
|
|
220
|
+
if self.direction == MetricDirection.TARGET_VALUE and self.target_value is not None:
|
|
221
|
+
parts.append(f"Target: {self.target_value} {self.unit}".strip())
|
|
222
|
+
|
|
223
|
+
# Result
|
|
224
|
+
result = "✓ Met" if met else "✗ Not met"
|
|
225
|
+
parts.append(f"{result} (score: {score:.1%})")
|
|
226
|
+
|
|
227
|
+
return " | ".join(parts)
|
|
228
|
+
|
|
229
|
+
def to_dict(self) -> dict[str, Any]:
|
|
230
|
+
"""Serialize to dictionary."""
|
|
231
|
+
return {
|
|
232
|
+
"id": self.id,
|
|
233
|
+
"name": self.name,
|
|
234
|
+
"description": self.description,
|
|
235
|
+
"metric_type": self.metric_type.value,
|
|
236
|
+
"direction": self.direction.value,
|
|
237
|
+
"target_value": self.target_value,
|
|
238
|
+
"minimum_value": self.minimum_value,
|
|
239
|
+
"maximum_value": self.maximum_value,
|
|
240
|
+
"unit": self.unit,
|
|
241
|
+
"weight": self.weight,
|
|
242
|
+
"is_primary": self.is_primary,
|
|
243
|
+
"extraction_path": self.extraction_path,
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
@dataclass
|
|
248
|
+
class MetricResult:
|
|
249
|
+
"""Result of evaluating a single metric."""
|
|
250
|
+
|
|
251
|
+
metric_id: str
|
|
252
|
+
value: float | bool
|
|
253
|
+
met_criteria: bool
|
|
254
|
+
score: float
|
|
255
|
+
explanation: str
|
|
256
|
+
baseline: float | bool | None = None
|
|
257
|
+
timestamp: str = ""
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@dataclass
|
|
261
|
+
class SuccessCriteria:
|
|
262
|
+
"""Complete success criteria for a workflow.
|
|
263
|
+
|
|
264
|
+
Example:
|
|
265
|
+
>>> criteria = SuccessCriteria(
|
|
266
|
+
... id="code_review_success",
|
|
267
|
+
... name="Code Review Success Criteria",
|
|
268
|
+
... description="Measures effectiveness of automated code review",
|
|
269
|
+
... metrics=[
|
|
270
|
+
... SuccessMetric(
|
|
271
|
+
... id="issues_found",
|
|
272
|
+
... name="Issues Found",
|
|
273
|
+
... metric_type=MetricType.COUNT,
|
|
274
|
+
... is_primary=True
|
|
275
|
+
... ),
|
|
276
|
+
... SuccessMetric(
|
|
277
|
+
... id="review_time",
|
|
278
|
+
... name="Review Time",
|
|
279
|
+
... metric_type=MetricType.DURATION,
|
|
280
|
+
... direction=MetricDirection.LOWER_IS_BETTER,
|
|
281
|
+
... maximum_value=60, # seconds
|
|
282
|
+
... ),
|
|
283
|
+
... ],
|
|
284
|
+
... success_threshold=0.7 # 70% overall score = success
|
|
285
|
+
... )
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
# Unique identifier
|
|
289
|
+
id: str = ""
|
|
290
|
+
|
|
291
|
+
# Display name
|
|
292
|
+
name: str = ""
|
|
293
|
+
|
|
294
|
+
# Description
|
|
295
|
+
description: str = ""
|
|
296
|
+
|
|
297
|
+
# List of metrics
|
|
298
|
+
metrics: list[SuccessMetric] = field(default_factory=list)
|
|
299
|
+
|
|
300
|
+
# Threshold for overall success (0-1)
|
|
301
|
+
success_threshold: float = 0.7
|
|
302
|
+
|
|
303
|
+
# Whether ALL metrics must be met (vs weighted average)
|
|
304
|
+
require_all: bool = False
|
|
305
|
+
|
|
306
|
+
# Minimum primary metrics that must pass
|
|
307
|
+
min_primary_metrics: int = 1
|
|
308
|
+
|
|
309
|
+
# Custom success evaluator
|
|
310
|
+
custom_evaluator: Callable[[dict[str, MetricResult]], bool] | None = None
|
|
311
|
+
|
|
312
|
+
def add_metric(self, metric: SuccessMetric) -> None:
|
|
313
|
+
"""Add a metric to the criteria."""
|
|
314
|
+
self.metrics.append(metric)
|
|
315
|
+
|
|
316
|
+
def get_primary_metrics(self) -> list[SuccessMetric]:
|
|
317
|
+
"""Get all primary success indicators."""
|
|
318
|
+
return [m for m in self.metrics if m.is_primary]
|
|
319
|
+
|
|
320
|
+
def evaluate(
|
|
321
|
+
self,
|
|
322
|
+
workflow_output: dict[str, Any],
|
|
323
|
+
baselines: dict[str, float | bool] | None = None,
|
|
324
|
+
) -> SuccessEvaluation:
|
|
325
|
+
"""Evaluate workflow output against success criteria.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
workflow_output: The workflow's output to evaluate
|
|
329
|
+
baselines: Optional baseline values for comparison
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
SuccessEvaluation with detailed results
|
|
333
|
+
"""
|
|
334
|
+
baselines = baselines or {}
|
|
335
|
+
results: list[MetricResult] = []
|
|
336
|
+
timestamp = datetime.now().isoformat()
|
|
337
|
+
|
|
338
|
+
# Evaluate each metric
|
|
339
|
+
for metric in self.metrics:
|
|
340
|
+
# Extract value from output
|
|
341
|
+
value = self._extract_metric_value(metric, workflow_output)
|
|
342
|
+
|
|
343
|
+
if value is None:
|
|
344
|
+
# Metric not found in output
|
|
345
|
+
results.append(
|
|
346
|
+
MetricResult(
|
|
347
|
+
metric_id=metric.id,
|
|
348
|
+
value=0,
|
|
349
|
+
met_criteria=False,
|
|
350
|
+
score=0.0,
|
|
351
|
+
explanation=f"Metric '{metric.name}' not found in output",
|
|
352
|
+
timestamp=timestamp,
|
|
353
|
+
)
|
|
354
|
+
)
|
|
355
|
+
continue
|
|
356
|
+
|
|
357
|
+
# Get baseline if available
|
|
358
|
+
baseline = baselines.get(metric.id)
|
|
359
|
+
|
|
360
|
+
# Evaluate
|
|
361
|
+
met, score, explanation = metric.evaluate(value, baseline)
|
|
362
|
+
|
|
363
|
+
results.append(
|
|
364
|
+
MetricResult(
|
|
365
|
+
metric_id=metric.id,
|
|
366
|
+
value=value,
|
|
367
|
+
met_criteria=met,
|
|
368
|
+
score=score,
|
|
369
|
+
explanation=explanation,
|
|
370
|
+
baseline=baseline,
|
|
371
|
+
timestamp=timestamp,
|
|
372
|
+
)
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
# Calculate overall success
|
|
376
|
+
return self._calculate_overall_success(results)
|
|
377
|
+
|
|
378
|
+
def _extract_metric_value(
|
|
379
|
+
self,
|
|
380
|
+
metric: SuccessMetric,
|
|
381
|
+
output: dict[str, Any],
|
|
382
|
+
) -> float | bool | None:
|
|
383
|
+
"""Extract metric value from workflow output."""
|
|
384
|
+
# Use custom extractor if provided
|
|
385
|
+
if metric.extractor:
|
|
386
|
+
try:
|
|
387
|
+
return metric.extractor(output)
|
|
388
|
+
except (KeyError, TypeError, ValueError):
|
|
389
|
+
return None
|
|
390
|
+
|
|
391
|
+
# Use extraction path
|
|
392
|
+
if metric.extraction_path:
|
|
393
|
+
try:
|
|
394
|
+
value = output
|
|
395
|
+
for key in metric.extraction_path.split("."):
|
|
396
|
+
if isinstance(value, dict):
|
|
397
|
+
value = value[key]
|
|
398
|
+
elif isinstance(value, list) and key.isdigit():
|
|
399
|
+
value = value[int(key)]
|
|
400
|
+
else:
|
|
401
|
+
return None
|
|
402
|
+
return value
|
|
403
|
+
except (KeyError, IndexError, TypeError):
|
|
404
|
+
return None
|
|
405
|
+
|
|
406
|
+
# Try direct key match
|
|
407
|
+
if metric.id in output:
|
|
408
|
+
return output[metric.id]
|
|
409
|
+
|
|
410
|
+
# Try nested in 'metrics' key
|
|
411
|
+
if "metrics" in output and isinstance(output["metrics"], dict):
|
|
412
|
+
if metric.id in output["metrics"]:
|
|
413
|
+
return output["metrics"][metric.id]
|
|
414
|
+
|
|
415
|
+
return None
|
|
416
|
+
|
|
417
|
+
def _calculate_overall_success(
|
|
418
|
+
self,
|
|
419
|
+
results: list[MetricResult],
|
|
420
|
+
) -> SuccessEvaluation:
|
|
421
|
+
"""Calculate overall success from metric results."""
|
|
422
|
+
if not results:
|
|
423
|
+
return SuccessEvaluation(
|
|
424
|
+
overall_success=False,
|
|
425
|
+
overall_score=0.0,
|
|
426
|
+
metric_results=results,
|
|
427
|
+
summary="No metrics to evaluate",
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# Check primary metrics
|
|
431
|
+
primary_results = [
|
|
432
|
+
r for r in results if any(m.id == r.metric_id and m.is_primary for m in self.metrics)
|
|
433
|
+
]
|
|
434
|
+
primary_passed = sum(1 for r in primary_results if r.met_criteria)
|
|
435
|
+
|
|
436
|
+
# Check if minimum primary metrics are met
|
|
437
|
+
primary_check = primary_passed >= self.min_primary_metrics
|
|
438
|
+
|
|
439
|
+
# Check if all required
|
|
440
|
+
if self.require_all:
|
|
441
|
+
all_met = all(r.met_criteria for r in results)
|
|
442
|
+
overall_success = all_met and primary_check
|
|
443
|
+
overall_score = 1.0 if overall_success else sum(r.score for r in results) / len(results)
|
|
444
|
+
else:
|
|
445
|
+
# Weighted average score
|
|
446
|
+
total_weight = sum(
|
|
447
|
+
m.weight for m in self.metrics if any(r.metric_id == m.id for r in results)
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
if total_weight > 0:
|
|
451
|
+
weighted_score = (
|
|
452
|
+
sum(
|
|
453
|
+
r.score * next((m.weight for m in self.metrics if m.id == r.metric_id), 1.0)
|
|
454
|
+
for r in results
|
|
455
|
+
)
|
|
456
|
+
/ total_weight
|
|
457
|
+
)
|
|
458
|
+
else:
|
|
459
|
+
weighted_score = sum(r.score for r in results) / len(results)
|
|
460
|
+
|
|
461
|
+
overall_score = weighted_score
|
|
462
|
+
overall_success = overall_score >= self.success_threshold and primary_check
|
|
463
|
+
|
|
464
|
+
# Custom evaluator override
|
|
465
|
+
if self.custom_evaluator:
|
|
466
|
+
results_dict = {r.metric_id: r for r in results}
|
|
467
|
+
overall_success = self.custom_evaluator(results_dict)
|
|
468
|
+
|
|
469
|
+
# Generate summary
|
|
470
|
+
summary = self._generate_summary(results, overall_success, overall_score)
|
|
471
|
+
|
|
472
|
+
return SuccessEvaluation(
|
|
473
|
+
overall_success=overall_success,
|
|
474
|
+
overall_score=overall_score,
|
|
475
|
+
metric_results=results,
|
|
476
|
+
summary=summary,
|
|
477
|
+
primary_metrics_passed=primary_passed,
|
|
478
|
+
total_primary_metrics=len(primary_results),
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
def _generate_summary(
|
|
482
|
+
self,
|
|
483
|
+
results: list[MetricResult],
|
|
484
|
+
success: bool,
|
|
485
|
+
score: float,
|
|
486
|
+
) -> str:
|
|
487
|
+
"""Generate human-readable summary."""
|
|
488
|
+
status = "✓ SUCCESS" if success else "✗ NOT MET"
|
|
489
|
+
met_count = sum(1 for r in results if r.met_criteria)
|
|
490
|
+
|
|
491
|
+
lines = [
|
|
492
|
+
f"{status} - Overall score: {score:.1%}",
|
|
493
|
+
f"Metrics: {met_count}/{len(results)} met criteria",
|
|
494
|
+
"",
|
|
495
|
+
"Details:",
|
|
496
|
+
]
|
|
497
|
+
|
|
498
|
+
for result in results:
|
|
499
|
+
metric = next((m for m in self.metrics if m.id == result.metric_id), None)
|
|
500
|
+
name = metric.name if metric else result.metric_id
|
|
501
|
+
indicator = "✓" if result.met_criteria else "✗"
|
|
502
|
+
lines.append(f" {indicator} {name}: {result.explanation}")
|
|
503
|
+
|
|
504
|
+
return "\n".join(lines)
|
|
505
|
+
|
|
506
|
+
def to_dict(self) -> dict[str, Any]:
|
|
507
|
+
"""Serialize to dictionary."""
|
|
508
|
+
return {
|
|
509
|
+
"id": self.id,
|
|
510
|
+
"name": self.name,
|
|
511
|
+
"description": self.description,
|
|
512
|
+
"metrics": [m.to_dict() for m in self.metrics],
|
|
513
|
+
"success_threshold": self.success_threshold,
|
|
514
|
+
"require_all": self.require_all,
|
|
515
|
+
"min_primary_metrics": self.min_primary_metrics,
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
@dataclass
|
|
520
|
+
class SuccessEvaluation:
|
|
521
|
+
"""Result of evaluating success criteria."""
|
|
522
|
+
|
|
523
|
+
# Whether overall success criteria were met
|
|
524
|
+
overall_success: bool
|
|
525
|
+
|
|
526
|
+
# Overall score (0-1)
|
|
527
|
+
overall_score: float
|
|
528
|
+
|
|
529
|
+
# Individual metric results
|
|
530
|
+
metric_results: list[MetricResult]
|
|
531
|
+
|
|
532
|
+
# Human-readable summary
|
|
533
|
+
summary: str
|
|
534
|
+
|
|
535
|
+
# Primary metrics that passed
|
|
536
|
+
primary_metrics_passed: int = 0
|
|
537
|
+
|
|
538
|
+
# Total primary metrics
|
|
539
|
+
total_primary_metrics: int = 0
|
|
540
|
+
|
|
541
|
+
# Timestamp of evaluation
|
|
542
|
+
evaluated_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
543
|
+
|
|
544
|
+
def to_dict(self) -> dict[str, Any]:
|
|
545
|
+
"""Serialize to dictionary."""
|
|
546
|
+
return {
|
|
547
|
+
"overall_success": self.overall_success,
|
|
548
|
+
"overall_score": self.overall_score,
|
|
549
|
+
"metric_results": [
|
|
550
|
+
{
|
|
551
|
+
"metric_id": r.metric_id,
|
|
552
|
+
"value": r.value,
|
|
553
|
+
"met_criteria": r.met_criteria,
|
|
554
|
+
"score": r.score,
|
|
555
|
+
"explanation": r.explanation,
|
|
556
|
+
"baseline": r.baseline,
|
|
557
|
+
}
|
|
558
|
+
for r in self.metric_results
|
|
559
|
+
],
|
|
560
|
+
"summary": self.summary,
|
|
561
|
+
"primary_metrics_passed": self.primary_metrics_passed,
|
|
562
|
+
"total_primary_metrics": self.total_primary_metrics,
|
|
563
|
+
"evaluated_at": self.evaluated_at,
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
# =============================================================================
|
|
568
|
+
# PREDEFINED SUCCESS CRITERIA TEMPLATES
|
|
569
|
+
# =============================================================================
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def code_review_criteria() -> SuccessCriteria:
|
|
573
|
+
"""Create standard success criteria for code review workflows."""
|
|
574
|
+
return SuccessCriteria(
|
|
575
|
+
id="code_review_success",
|
|
576
|
+
name="Code Review Success",
|
|
577
|
+
description="Standard metrics for code review effectiveness",
|
|
578
|
+
metrics=[
|
|
579
|
+
SuccessMetric(
|
|
580
|
+
id="issues_found",
|
|
581
|
+
name="Issues Found",
|
|
582
|
+
description="Number of issues identified",
|
|
583
|
+
metric_type=MetricType.COUNT,
|
|
584
|
+
direction=MetricDirection.HIGHER_IS_BETTER,
|
|
585
|
+
is_primary=True,
|
|
586
|
+
weight=1.0,
|
|
587
|
+
extraction_path="findings_count",
|
|
588
|
+
),
|
|
589
|
+
SuccessMetric(
|
|
590
|
+
id="severity_coverage",
|
|
591
|
+
name="Severity Coverage",
|
|
592
|
+
description="Percentage of severity levels covered",
|
|
593
|
+
metric_type=MetricType.PERCENTAGE,
|
|
594
|
+
direction=MetricDirection.HIGHER_IS_BETTER,
|
|
595
|
+
minimum_value=50,
|
|
596
|
+
weight=0.8,
|
|
597
|
+
extraction_path="severity_coverage",
|
|
598
|
+
),
|
|
599
|
+
SuccessMetric(
|
|
600
|
+
id="review_time",
|
|
601
|
+
name="Review Duration",
|
|
602
|
+
description="Time to complete review",
|
|
603
|
+
metric_type=MetricType.DURATION,
|
|
604
|
+
direction=MetricDirection.LOWER_IS_BETTER,
|
|
605
|
+
maximum_value=120, # 2 minutes
|
|
606
|
+
unit="seconds",
|
|
607
|
+
weight=0.6,
|
|
608
|
+
extraction_path="duration_seconds",
|
|
609
|
+
),
|
|
610
|
+
SuccessMetric(
|
|
611
|
+
id="actionable_recommendations",
|
|
612
|
+
name="Actionable Recommendations",
|
|
613
|
+
description="Whether recommendations are actionable",
|
|
614
|
+
metric_type=MetricType.BOOLEAN,
|
|
615
|
+
is_primary=True,
|
|
616
|
+
weight=1.0,
|
|
617
|
+
extraction_path="has_recommendations",
|
|
618
|
+
),
|
|
619
|
+
],
|
|
620
|
+
success_threshold=0.7,
|
|
621
|
+
min_primary_metrics=1,
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
def security_audit_criteria() -> SuccessCriteria:
|
|
626
|
+
"""Create success criteria for security audit workflows."""
|
|
627
|
+
return SuccessCriteria(
|
|
628
|
+
id="security_audit_success",
|
|
629
|
+
name="Security Audit Success",
|
|
630
|
+
description="Metrics for security audit effectiveness",
|
|
631
|
+
metrics=[
|
|
632
|
+
SuccessMetric(
|
|
633
|
+
id="vulnerabilities_found",
|
|
634
|
+
name="Vulnerabilities Found",
|
|
635
|
+
description="Security vulnerabilities identified",
|
|
636
|
+
metric_type=MetricType.COUNT,
|
|
637
|
+
direction=MetricDirection.HIGHER_IS_BETTER,
|
|
638
|
+
is_primary=True,
|
|
639
|
+
weight=1.0,
|
|
640
|
+
extraction_path="vulnerabilities.count",
|
|
641
|
+
),
|
|
642
|
+
SuccessMetric(
|
|
643
|
+
id="critical_issues",
|
|
644
|
+
name="Critical Issues",
|
|
645
|
+
description="High/critical severity issues found",
|
|
646
|
+
metric_type=MetricType.COUNT,
|
|
647
|
+
direction=MetricDirection.HIGHER_IS_BETTER,
|
|
648
|
+
is_primary=True,
|
|
649
|
+
weight=1.2, # Extra weight for critical issues
|
|
650
|
+
extraction_path="vulnerabilities.critical_count",
|
|
651
|
+
),
|
|
652
|
+
SuccessMetric(
|
|
653
|
+
id="owasp_coverage",
|
|
654
|
+
name="OWASP Coverage",
|
|
655
|
+
description="OWASP Top 10 categories checked",
|
|
656
|
+
metric_type=MetricType.PERCENTAGE,
|
|
657
|
+
direction=MetricDirection.HIGHER_IS_BETTER,
|
|
658
|
+
minimum_value=80,
|
|
659
|
+
weight=0.9,
|
|
660
|
+
extraction_path="owasp_coverage_percent",
|
|
661
|
+
),
|
|
662
|
+
SuccessMetric(
|
|
663
|
+
id="false_positive_rate",
|
|
664
|
+
name="False Positive Rate",
|
|
665
|
+
description="Estimated false positive rate",
|
|
666
|
+
metric_type=MetricType.PERCENTAGE,
|
|
667
|
+
direction=MetricDirection.LOWER_IS_BETTER,
|
|
668
|
+
maximum_value=20,
|
|
669
|
+
weight=0.7,
|
|
670
|
+
extraction_path="estimated_fp_rate",
|
|
671
|
+
),
|
|
672
|
+
],
|
|
673
|
+
success_threshold=0.75,
|
|
674
|
+
min_primary_metrics=1,
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
def test_generation_criteria() -> SuccessCriteria:
|
|
679
|
+
"""Create success criteria for test generation workflows."""
|
|
680
|
+
return SuccessCriteria(
|
|
681
|
+
id="test_generation_success",
|
|
682
|
+
name="Test Generation Success",
|
|
683
|
+
description="Metrics for test generation effectiveness",
|
|
684
|
+
metrics=[
|
|
685
|
+
SuccessMetric(
|
|
686
|
+
id="tests_generated",
|
|
687
|
+
name="Tests Generated",
|
|
688
|
+
description="Number of test cases generated",
|
|
689
|
+
metric_type=MetricType.COUNT,
|
|
690
|
+
direction=MetricDirection.HIGHER_IS_BETTER,
|
|
691
|
+
minimum_value=1,
|
|
692
|
+
is_primary=True,
|
|
693
|
+
weight=1.0,
|
|
694
|
+
extraction_path="tests.count",
|
|
695
|
+
),
|
|
696
|
+
SuccessMetric(
|
|
697
|
+
id="coverage_increase",
|
|
698
|
+
name="Coverage Increase",
|
|
699
|
+
description="Increase in code coverage",
|
|
700
|
+
metric_type=MetricType.IMPROVEMENT,
|
|
701
|
+
direction=MetricDirection.HIGHER_IS_BETTER,
|
|
702
|
+
minimum_value=5, # At least 5% increase
|
|
703
|
+
unit="%",
|
|
704
|
+
weight=1.0,
|
|
705
|
+
extraction_path="coverage.increase_percent",
|
|
706
|
+
),
|
|
707
|
+
SuccessMetric(
|
|
708
|
+
id="tests_passing",
|
|
709
|
+
name="Tests Passing",
|
|
710
|
+
description="Percentage of generated tests that pass",
|
|
711
|
+
metric_type=MetricType.PERCENTAGE,
|
|
712
|
+
direction=MetricDirection.HIGHER_IS_BETTER,
|
|
713
|
+
minimum_value=80,
|
|
714
|
+
is_primary=True,
|
|
715
|
+
weight=1.2,
|
|
716
|
+
extraction_path="tests.pass_rate",
|
|
717
|
+
),
|
|
718
|
+
SuccessMetric(
|
|
719
|
+
id="edge_cases_covered",
|
|
720
|
+
name="Edge Cases Covered",
|
|
721
|
+
description="Number of edge cases with tests",
|
|
722
|
+
metric_type=MetricType.COUNT,
|
|
723
|
+
direction=MetricDirection.HIGHER_IS_BETTER,
|
|
724
|
+
weight=0.8,
|
|
725
|
+
extraction_path="edge_cases.count",
|
|
726
|
+
),
|
|
727
|
+
],
|
|
728
|
+
success_threshold=0.7,
|
|
729
|
+
min_primary_metrics=2,
|
|
730
|
+
)
|