PyPI - attune-ai - Versions diffs - 2.0.0__py3-none-any.whl - Mend

attune-ai 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (457) hide show

attune/__init__.py +358 -0
attune/adaptive/__init__.py +13 -0
attune/adaptive/task_complexity.py +127 -0
attune/agent_monitoring.py +414 -0
attune/cache/__init__.py +117 -0
attune/cache/base.py +166 -0
attune/cache/dependency_manager.py +256 -0
attune/cache/hash_only.py +251 -0
attune/cache/hybrid.py +457 -0
attune/cache/storage.py +285 -0
attune/cache_monitor.py +356 -0
attune/cache_stats.py +298 -0
attune/cli/__init__.py +152 -0
attune/cli/__main__.py +12 -0
attune/cli/commands/__init__.py +1 -0
attune/cli/commands/batch.py +264 -0
attune/cli/commands/cache.py +248 -0
attune/cli/commands/help.py +331 -0
attune/cli/commands/info.py +140 -0
attune/cli/commands/inspect.py +436 -0
attune/cli/commands/inspection.py +57 -0
attune/cli/commands/memory.py +48 -0
attune/cli/commands/metrics.py +92 -0
attune/cli/commands/orchestrate.py +184 -0
attune/cli/commands/patterns.py +207 -0
attune/cli/commands/profiling.py +202 -0
attune/cli/commands/provider.py +98 -0
attune/cli/commands/routing.py +285 -0
attune/cli/commands/setup.py +96 -0
attune/cli/commands/status.py +235 -0
attune/cli/commands/sync.py +166 -0
attune/cli/commands/tier.py +121 -0
attune/cli/commands/utilities.py +114 -0
attune/cli/commands/workflow.py +579 -0
attune/cli/core.py +32 -0
attune/cli/parsers/__init__.py +68 -0
attune/cli/parsers/batch.py +118 -0
attune/cli/parsers/cache.py +65 -0
attune/cli/parsers/help.py +41 -0
attune/cli/parsers/info.py +26 -0
attune/cli/parsers/inspect.py +66 -0
attune/cli/parsers/metrics.py +42 -0
attune/cli/parsers/orchestrate.py +61 -0
attune/cli/parsers/patterns.py +54 -0
attune/cli/parsers/provider.py +40 -0
attune/cli/parsers/routing.py +110 -0
attune/cli/parsers/setup.py +42 -0
attune/cli/parsers/status.py +47 -0
attune/cli/parsers/sync.py +31 -0
attune/cli/parsers/tier.py +33 -0
attune/cli/parsers/workflow.py +77 -0
attune/cli/utils/__init__.py +1 -0
attune/cli/utils/data.py +242 -0
attune/cli/utils/helpers.py +68 -0
attune/cli_legacy.py +3957 -0
attune/cli_minimal.py +1159 -0
attune/cli_router.py +437 -0
attune/cli_unified.py +814 -0
attune/config/__init__.py +66 -0
attune/config/xml_config.py +286 -0
attune/config.py +545 -0
attune/coordination.py +870 -0
attune/core.py +1511 -0
attune/core_modules/__init__.py +15 -0
attune/cost_tracker.py +626 -0
attune/dashboard/__init__.py +41 -0
attune/dashboard/app.py +512 -0
attune/dashboard/simple_server.py +435 -0
attune/dashboard/standalone_server.py +547 -0
attune/discovery.py +306 -0
attune/emergence.py +306 -0
attune/exceptions.py +123 -0
attune/feedback_loops.py +373 -0
attune/hot_reload/README.md +473 -0
attune/hot_reload/__init__.py +62 -0
attune/hot_reload/config.py +83 -0
attune/hot_reload/integration.py +229 -0
attune/hot_reload/reloader.py +298 -0
attune/hot_reload/watcher.py +183 -0
attune/hot_reload/websocket.py +177 -0
attune/levels.py +577 -0
attune/leverage_points.py +441 -0
attune/logging_config.py +261 -0
attune/mcp/__init__.py +10 -0
attune/mcp/server.py +506 -0
attune/memory/__init__.py +237 -0
attune/memory/claude_memory.py +469 -0
attune/memory/config.py +224 -0
attune/memory/control_panel.py +1290 -0
attune/memory/control_panel_support.py +145 -0
attune/memory/cross_session.py +845 -0
attune/memory/edges.py +179 -0
attune/memory/encryption.py +159 -0
attune/memory/file_session.py +770 -0
attune/memory/graph.py +570 -0
attune/memory/long_term.py +913 -0
attune/memory/long_term_types.py +99 -0
attune/memory/mixins/__init__.py +25 -0
attune/memory/mixins/backend_init_mixin.py +249 -0
attune/memory/mixins/capabilities_mixin.py +208 -0
attune/memory/mixins/handoff_mixin.py +208 -0
attune/memory/mixins/lifecycle_mixin.py +49 -0
attune/memory/mixins/long_term_mixin.py +352 -0
attune/memory/mixins/promotion_mixin.py +109 -0
attune/memory/mixins/short_term_mixin.py +182 -0
attune/memory/nodes.py +179 -0
attune/memory/redis_bootstrap.py +540 -0
attune/memory/security/__init__.py +31 -0
attune/memory/security/audit_logger.py +932 -0
attune/memory/security/pii_scrubber.py +640 -0
attune/memory/security/secrets_detector.py +678 -0
attune/memory/short_term.py +2192 -0
attune/memory/simple_storage.py +302 -0
attune/memory/storage/__init__.py +15 -0
attune/memory/storage_backend.py +167 -0
attune/memory/summary_index.py +583 -0
attune/memory/types.py +446 -0
attune/memory/unified.py +182 -0
attune/meta_workflows/__init__.py +74 -0
attune/meta_workflows/agent_creator.py +248 -0
attune/meta_workflows/builtin_templates.py +567 -0
attune/meta_workflows/cli_commands/__init__.py +56 -0
attune/meta_workflows/cli_commands/agent_commands.py +321 -0
attune/meta_workflows/cli_commands/analytics_commands.py +442 -0
attune/meta_workflows/cli_commands/config_commands.py +232 -0
attune/meta_workflows/cli_commands/memory_commands.py +182 -0
attune/meta_workflows/cli_commands/template_commands.py +354 -0
attune/meta_workflows/cli_commands/workflow_commands.py +382 -0
attune/meta_workflows/cli_meta_workflows.py +59 -0
attune/meta_workflows/form_engine.py +292 -0
attune/meta_workflows/intent_detector.py +409 -0
attune/meta_workflows/models.py +569 -0
attune/meta_workflows/pattern_learner.py +738 -0
attune/meta_workflows/plan_generator.py +384 -0
attune/meta_workflows/session_context.py +397 -0
attune/meta_workflows/template_registry.py +229 -0
attune/meta_workflows/workflow.py +984 -0
attune/metrics/__init__.py +12 -0
attune/metrics/collector.py +31 -0
attune/metrics/prompt_metrics.py +194 -0
attune/models/__init__.py +172 -0
attune/models/__main__.py +13 -0
attune/models/adaptive_routing.py +437 -0
attune/models/auth_cli.py +444 -0
attune/models/auth_strategy.py +450 -0
attune/models/cli.py +655 -0
attune/models/empathy_executor.py +354 -0
attune/models/executor.py +257 -0
attune/models/fallback.py +762 -0
attune/models/provider_config.py +282 -0
attune/models/registry.py +472 -0
attune/models/tasks.py +359 -0
attune/models/telemetry/__init__.py +71 -0
attune/models/telemetry/analytics.py +594 -0
attune/models/telemetry/backend.py +196 -0
attune/models/telemetry/data_models.py +431 -0
attune/models/telemetry/storage.py +489 -0
attune/models/token_estimator.py +420 -0
attune/models/validation.py +280 -0
attune/monitoring/__init__.py +52 -0
attune/monitoring/alerts.py +946 -0
attune/monitoring/alerts_cli.py +448 -0
attune/monitoring/multi_backend.py +271 -0
attune/monitoring/otel_backend.py +362 -0
attune/optimization/__init__.py +19 -0
attune/optimization/context_optimizer.py +272 -0
attune/orchestration/__init__.py +67 -0
attune/orchestration/agent_templates.py +707 -0
attune/orchestration/config_store.py +499 -0
attune/orchestration/execution_strategies.py +2111 -0
attune/orchestration/meta_orchestrator.py +1168 -0
attune/orchestration/pattern_learner.py +696 -0
attune/orchestration/real_tools.py +931 -0
attune/pattern_cache.py +187 -0
attune/pattern_library.py +542 -0
attune/patterns/debugging/all_patterns.json +81 -0
attune/patterns/debugging/workflow_20260107_1770825e.json +77 -0
attune/patterns/refactoring_memory.json +89 -0
attune/persistence.py +564 -0
attune/platform_utils.py +265 -0
attune/plugins/__init__.py +28 -0
attune/plugins/base.py +361 -0
attune/plugins/registry.py +268 -0
attune/project_index/__init__.py +32 -0
attune/project_index/cli.py +335 -0
attune/project_index/index.py +667 -0
attune/project_index/models.py +504 -0
attune/project_index/reports.py +474 -0
attune/project_index/scanner.py +777 -0
attune/project_index/scanner_parallel.py +291 -0
attune/prompts/__init__.py +61 -0
attune/prompts/config.py +77 -0
attune/prompts/context.py +177 -0
attune/prompts/parser.py +285 -0
attune/prompts/registry.py +313 -0
attune/prompts/templates.py +208 -0
attune/redis_config.py +302 -0
attune/redis_memory.py +799 -0
attune/resilience/__init__.py +56 -0
attune/resilience/circuit_breaker.py +256 -0
attune/resilience/fallback.py +179 -0
attune/resilience/health.py +300 -0
attune/resilience/retry.py +209 -0
attune/resilience/timeout.py +135 -0
attune/routing/__init__.py +43 -0
attune/routing/chain_executor.py +433 -0
attune/routing/classifier.py +217 -0
attune/routing/smart_router.py +234 -0
attune/routing/workflow_registry.py +343 -0
attune/scaffolding/README.md +589 -0
attune/scaffolding/__init__.py +35 -0
attune/scaffolding/__main__.py +14 -0
attune/scaffolding/cli.py +240 -0
attune/scaffolding/templates/base_wizard.py.jinja2 +121 -0
attune/scaffolding/templates/coach_wizard.py.jinja2 +321 -0
attune/scaffolding/templates/domain_wizard.py.jinja2 +408 -0
attune/scaffolding/templates/linear_flow_wizard.py.jinja2 +203 -0
attune/socratic/__init__.py +256 -0
attune/socratic/ab_testing.py +958 -0
attune/socratic/blueprint.py +533 -0
attune/socratic/cli.py +703 -0
attune/socratic/collaboration.py +1114 -0
attune/socratic/domain_templates.py +924 -0
attune/socratic/embeddings.py +738 -0
attune/socratic/engine.py +794 -0
attune/socratic/explainer.py +682 -0
attune/socratic/feedback.py +772 -0
attune/socratic/forms.py +629 -0
attune/socratic/generator.py +732 -0
attune/socratic/llm_analyzer.py +637 -0
attune/socratic/mcp_server.py +702 -0
attune/socratic/session.py +312 -0
attune/socratic/storage.py +667 -0
attune/socratic/success.py +730 -0
attune/socratic/visual_editor.py +860 -0
attune/socratic/web_ui.py +958 -0
attune/telemetry/__init__.py +39 -0
attune/telemetry/agent_coordination.py +475 -0
attune/telemetry/agent_tracking.py +367 -0
attune/telemetry/approval_gates.py +545 -0
attune/telemetry/cli.py +1231 -0
attune/telemetry/commands/__init__.py +14 -0
attune/telemetry/commands/dashboard_commands.py +696 -0
attune/telemetry/event_streaming.py +409 -0
attune/telemetry/feedback_loop.py +567 -0
attune/telemetry/usage_tracker.py +591 -0
attune/templates.py +754 -0
attune/test_generator/__init__.py +38 -0
attune/test_generator/__main__.py +14 -0
attune/test_generator/cli.py +234 -0
attune/test_generator/generator.py +355 -0
attune/test_generator/risk_analyzer.py +216 -0
attune/test_generator/templates/unit_test.py.jinja2 +272 -0
attune/tier_recommender.py +384 -0
attune/tools.py +183 -0
attune/trust/__init__.py +28 -0
attune/trust/circuit_breaker.py +579 -0
attune/trust_building.py +527 -0
attune/validation/__init__.py +19 -0
attune/validation/xml_validator.py +281 -0
attune/vscode_bridge.py +173 -0
attune/workflow_commands.py +780 -0
attune/workflow_patterns/__init__.py +33 -0
attune/workflow_patterns/behavior.py +249 -0
attune/workflow_patterns/core.py +76 -0
attune/workflow_patterns/output.py +99 -0
attune/workflow_patterns/registry.py +255 -0
attune/workflow_patterns/structural.py +288 -0
attune/workflows/__init__.py +539 -0
attune/workflows/autonomous_test_gen.py +1268 -0
attune/workflows/base.py +2667 -0
attune/workflows/batch_processing.py +342 -0
attune/workflows/bug_predict.py +1084 -0
attune/workflows/builder.py +273 -0
attune/workflows/caching.py +253 -0
attune/workflows/code_review.py +1048 -0
attune/workflows/code_review_adapters.py +312 -0
attune/workflows/code_review_pipeline.py +722 -0
attune/workflows/config.py +645 -0
attune/workflows/dependency_check.py +644 -0
attune/workflows/document_gen/__init__.py +25 -0
attune/workflows/document_gen/config.py +30 -0
attune/workflows/document_gen/report_formatter.py +162 -0
attune/workflows/document_gen/workflow.py +1426 -0
attune/workflows/document_manager.py +216 -0
attune/workflows/document_manager_README.md +134 -0
attune/workflows/documentation_orchestrator.py +1205 -0
attune/workflows/history.py +510 -0
attune/workflows/keyboard_shortcuts/__init__.py +39 -0
attune/workflows/keyboard_shortcuts/generators.py +391 -0
attune/workflows/keyboard_shortcuts/parsers.py +416 -0
attune/workflows/keyboard_shortcuts/prompts.py +295 -0
attune/workflows/keyboard_shortcuts/schema.py +193 -0
attune/workflows/keyboard_shortcuts/workflow.py +509 -0
attune/workflows/llm_base.py +363 -0
attune/workflows/manage_docs.py +87 -0
attune/workflows/manage_docs_README.md +134 -0
attune/workflows/manage_documentation.py +821 -0
attune/workflows/new_sample_workflow1.py +149 -0
attune/workflows/new_sample_workflow1_README.md +150 -0
attune/workflows/orchestrated_health_check.py +849 -0
attune/workflows/orchestrated_release_prep.py +600 -0
attune/workflows/output.py +413 -0
attune/workflows/perf_audit.py +863 -0
attune/workflows/pr_review.py +762 -0
attune/workflows/progress.py +785 -0
attune/workflows/progress_server.py +322 -0
attune/workflows/progressive/README 2.md +454 -0
attune/workflows/progressive/README.md +454 -0
attune/workflows/progressive/__init__.py +82 -0
attune/workflows/progressive/cli.py +219 -0
attune/workflows/progressive/core.py +488 -0
attune/workflows/progressive/orchestrator.py +723 -0
attune/workflows/progressive/reports.py +520 -0
attune/workflows/progressive/telemetry.py +274 -0
attune/workflows/progressive/test_gen.py +495 -0
attune/workflows/progressive/workflow.py +589 -0
attune/workflows/refactor_plan.py +694 -0
attune/workflows/release_prep.py +895 -0
attune/workflows/release_prep_crew.py +969 -0
attune/workflows/research_synthesis.py +404 -0
attune/workflows/routing.py +168 -0
attune/workflows/secure_release.py +593 -0
attune/workflows/security_adapters.py +297 -0
attune/workflows/security_audit.py +1329 -0
attune/workflows/security_audit_phase3.py +355 -0
attune/workflows/seo_optimization.py +633 -0
attune/workflows/step_config.py +234 -0
attune/workflows/telemetry_mixin.py +269 -0
attune/workflows/test5.py +125 -0
attune/workflows/test5_README.md +158 -0
attune/workflows/test_coverage_boost_crew.py +849 -0
attune/workflows/test_gen/__init__.py +52 -0
attune/workflows/test_gen/ast_analyzer.py +249 -0
attune/workflows/test_gen/config.py +88 -0
attune/workflows/test_gen/data_models.py +38 -0
attune/workflows/test_gen/report_formatter.py +289 -0
attune/workflows/test_gen/test_templates.py +381 -0
attune/workflows/test_gen/workflow.py +655 -0
attune/workflows/test_gen.py +54 -0
attune/workflows/test_gen_behavioral.py +477 -0
attune/workflows/test_gen_parallel.py +341 -0
attune/workflows/test_lifecycle.py +526 -0
attune/workflows/test_maintenance.py +627 -0
attune/workflows/test_maintenance_cli.py +590 -0
attune/workflows/test_maintenance_crew.py +840 -0
attune/workflows/test_runner.py +622 -0
attune/workflows/tier_tracking.py +531 -0
attune/workflows/xml_enhanced_crew.py +285 -0
attune_ai-2.0.0.dist-info/METADATA +1026 -0
attune_ai-2.0.0.dist-info/RECORD +457 -0
attune_ai-2.0.0.dist-info/WHEEL +5 -0
attune_ai-2.0.0.dist-info/entry_points.txt +26 -0
attune_ai-2.0.0.dist-info/licenses/LICENSE +201 -0
attune_ai-2.0.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
attune_ai-2.0.0.dist-info/top_level.txt +5 -0
attune_healthcare/__init__.py +13 -0
attune_healthcare/monitors/__init__.py +9 -0
attune_healthcare/monitors/clinical_protocol_monitor.py +315 -0
attune_healthcare/monitors/monitoring/__init__.py +44 -0
attune_healthcare/monitors/monitoring/protocol_checker.py +300 -0
attune_healthcare/monitors/monitoring/protocol_loader.py +214 -0
attune_healthcare/monitors/monitoring/sensor_parsers.py +306 -0
attune_healthcare/monitors/monitoring/trajectory_analyzer.py +389 -0
attune_llm/README.md +553 -0
attune_llm/__init__.py +28 -0
attune_llm/agent_factory/__init__.py +53 -0
attune_llm/agent_factory/adapters/__init__.py +85 -0
attune_llm/agent_factory/adapters/autogen_adapter.py +312 -0
attune_llm/agent_factory/adapters/crewai_adapter.py +483 -0
attune_llm/agent_factory/adapters/haystack_adapter.py +298 -0
attune_llm/agent_factory/adapters/langchain_adapter.py +362 -0
attune_llm/agent_factory/adapters/langgraph_adapter.py +333 -0
attune_llm/agent_factory/adapters/native.py +228 -0
attune_llm/agent_factory/adapters/wizard_adapter.py +423 -0
attune_llm/agent_factory/base.py +305 -0
attune_llm/agent_factory/crews/__init__.py +67 -0
attune_llm/agent_factory/crews/code_review.py +1113 -0
attune_llm/agent_factory/crews/health_check.py +1262 -0
attune_llm/agent_factory/crews/refactoring.py +1128 -0
attune_llm/agent_factory/crews/security_audit.py +1018 -0
attune_llm/agent_factory/decorators.py +287 -0
attune_llm/agent_factory/factory.py +558 -0
attune_llm/agent_factory/framework.py +193 -0
attune_llm/agent_factory/memory_integration.py +328 -0
attune_llm/agent_factory/resilient.py +320 -0
attune_llm/agents_md/__init__.py +22 -0
attune_llm/agents_md/loader.py +218 -0
attune_llm/agents_md/parser.py +271 -0
attune_llm/agents_md/registry.py +307 -0
attune_llm/claude_memory.py +466 -0
attune_llm/cli/__init__.py +8 -0
attune_llm/cli/sync_claude.py +487 -0
attune_llm/code_health.py +1313 -0
attune_llm/commands/__init__.py +51 -0
attune_llm/commands/context.py +375 -0
attune_llm/commands/loader.py +301 -0
attune_llm/commands/models.py +231 -0
attune_llm/commands/parser.py +371 -0
attune_llm/commands/registry.py +429 -0
attune_llm/config/__init__.py +29 -0
attune_llm/config/unified.py +291 -0
attune_llm/context/__init__.py +22 -0
attune_llm/context/compaction.py +455 -0
attune_llm/context/manager.py +434 -0
attune_llm/contextual_patterns.py +361 -0
attune_llm/core.py +907 -0
attune_llm/git_pattern_extractor.py +435 -0
attune_llm/hooks/__init__.py +24 -0
attune_llm/hooks/config.py +306 -0
attune_llm/hooks/executor.py +289 -0
attune_llm/hooks/registry.py +302 -0
attune_llm/hooks/scripts/__init__.py +39 -0
attune_llm/hooks/scripts/evaluate_session.py +201 -0
attune_llm/hooks/scripts/first_time_init.py +285 -0
attune_llm/hooks/scripts/pre_compact.py +207 -0
attune_llm/hooks/scripts/session_end.py +183 -0
attune_llm/hooks/scripts/session_start.py +163 -0
attune_llm/hooks/scripts/suggest_compact.py +225 -0
attune_llm/learning/__init__.py +30 -0
attune_llm/learning/evaluator.py +438 -0
attune_llm/learning/extractor.py +514 -0
attune_llm/learning/storage.py +560 -0
attune_llm/levels.py +227 -0
attune_llm/pattern_confidence.py +414 -0
attune_llm/pattern_resolver.py +272 -0
attune_llm/pattern_summary.py +350 -0
attune_llm/providers.py +967 -0
attune_llm/routing/__init__.py +32 -0
attune_llm/routing/model_router.py +362 -0
attune_llm/security/IMPLEMENTATION_SUMMARY.md +413 -0
attune_llm/security/PHASE2_COMPLETE.md +384 -0
attune_llm/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +271 -0
attune_llm/security/QUICK_REFERENCE.md +316 -0
attune_llm/security/README.md +262 -0
attune_llm/security/__init__.py +62 -0
attune_llm/security/audit_logger.py +929 -0
attune_llm/security/audit_logger_example.py +152 -0
attune_llm/security/pii_scrubber.py +640 -0
attune_llm/security/secrets_detector.py +678 -0
attune_llm/security/secrets_detector_example.py +304 -0
attune_llm/security/secure_memdocs.py +1192 -0
attune_llm/security/secure_memdocs_example.py +278 -0
attune_llm/session_status.py +745 -0
attune_llm/state.py +246 -0
attune_llm/utils/__init__.py +5 -0
attune_llm/utils/tokens.py +349 -0
attune_software/SOFTWARE_PLUGIN_README.md +57 -0
attune_software/__init__.py +13 -0
attune_software/cli/__init__.py +120 -0
attune_software/cli/inspect.py +362 -0
attune_software/cli.py +574 -0
attune_software/plugin.py +188 -0
workflow_scaffolding/__init__.py +11 -0
workflow_scaffolding/__main__.py +12 -0
workflow_scaffolding/cli.py +206 -0
workflow_scaffolding/generator.py +265 -0

attune_llm/providers.py ADDED Viewed

@@ -0,0 +1,967 @@
+"""LLM Provider Adapters
+Unified interface for different LLM providers (OpenAI, Anthropic, local models).
+Copyright 2025 Smart AI Memory, LLC
+Licensed under Fair Source 0.9
+"""
+import asyncio
+import logging
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any
+logger = logging.getLogger(__name__)
+@dataclass
+class LLMResponse:
+    """Standardized response from any LLM provider"""
+    content: str
+    model: str
+    tokens_used: int
+    finish_reason: str
+    metadata: dict[str, Any]
+class BaseLLMProvider(ABC):
+    """Base class for all LLM providers.
+    Provides unified interface regardless of backend.
+    """
+    def __init__(self, api_key: str | None = None, **kwargs):
+        self.api_key = api_key
+        self.config = kwargs
+    @abstractmethod
+    async def generate(
+        self,
+        messages: list[dict[str, str]],
+        system_prompt: str | None = None,
+        temperature: float = 0.7,
+        max_tokens: int = 1024,
+        **kwargs,
+    ) -> LLMResponse:
+        """Generate response from LLM.
+        Args:
+            messages: List of {"role": "user/assistant", "content": "..."}
+            system_prompt: Optional system prompt
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens in response
+            **kwargs: Provider-specific options
+        Returns:
+            LLMResponse with standardized format
+        """
+    @abstractmethod
+    def get_model_info(self) -> dict[str, Any]:
+        """Get information about the model being used"""
+    def estimate_tokens(self, text: str) -> int:
+        """Estimate token count for text.
+        Rough approximation: ~4 chars per token
+        """
+        return len(text) // 4
+class AnthropicProvider(BaseLLMProvider):
+    """Anthropic (Claude) provider with enhanced features.
+    Supports Claude 3 family models with advanced capabilities:
+    - Extended context windows (200K tokens)
+    - Prompt caching for faster repeated queries
+    - Thinking mode for complex reasoning
+    - Batch processing for cost optimization
+    """
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model: str = "claude-sonnet-4-5-20250929",
+        use_prompt_caching: bool = True,  # CHANGED: Default to True for 20-30% cost savings
+        use_thinking: bool = False,
+        use_batch: bool = False,
+        **kwargs,
+    ):
+        super().__init__(api_key, **kwargs)
+        self.model = model
+        self.use_prompt_caching = use_prompt_caching
+        self.use_thinking = use_thinking
+        self.use_batch = use_batch
+        # Validate API key is provided
+        if not api_key or not api_key.strip():
+            raise ValueError(
+                "API key is required for Anthropic provider. "
+                "Provide via api_key parameter or ANTHROPIC_API_KEY environment variable",
+            )
+        # Lazy import to avoid requiring anthropic if not used
+        # v4.6.3: Use AsyncAnthropic for true async I/O (prevents event loop blocking)
+        try:
+            import anthropic
+            self.client = anthropic.AsyncAnthropic(api_key=api_key)
+        except ImportError as e:
+            raise ImportError(
+                "anthropic package required. Install with: pip install anthropic",
+            ) from e
+        # Initialize batch provider if needed
+        if use_batch:
+            self.batch_provider = AnthropicBatchProvider(api_key=api_key)
+        else:
+            self.batch_provider = None
+    async def generate(
+        self,
+        messages: list[dict[str, str]],
+        system_prompt: str | None = None,
+        temperature: float = 0.7,
+        max_tokens: int = 1024,
+        **kwargs,
+    ) -> LLMResponse:
+        """Generate response using Anthropic API with enhanced features.
+        Claude-specific enhancements:
+        - Prompt caching for repeated system prompts (90% cost reduction)
+        - Extended context (200K tokens) for large codebase analysis
+        - Thinking mode for complex reasoning tasks
+        Prompt caching is enabled by default (use_prompt_caching=True).
+        This marks system prompts with cache_control for Anthropic's cache.
+        Break-even: ~3 requests with same context, 5-minute TTL.
+        """
+        # Build kwargs for Anthropic
+        api_kwargs = {
+            "model": self.model,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "messages": messages,
+        }
+        # Enable prompt caching for system prompts (Claude-specific)
+        if system_prompt and self.use_prompt_caching:
+            api_kwargs["system"] = [
+                {
+                    "type": "text",
+                    "text": system_prompt,
+                    "cache_control": {"type": "ephemeral"},  # Cache for 5 minutes
+                },
+            ]
+        elif system_prompt:
+            api_kwargs["system"] = system_prompt
+        # Enable extended thinking for complex tasks (Claude-specific)
+        if self.use_thinking:
+            api_kwargs["thinking"] = {
+                "type": "enabled",
+                "budget_tokens": 2000,  # Allow 2K tokens for reasoning
+            }
+        # Add any additional kwargs
+        api_kwargs.update(kwargs)
+        # Call Anthropic API (async with AsyncAnthropic)
+        response = await self.client.messages.create(**api_kwargs)  # type: ignore[call-overload]
+        # Extract thinking content if present
+        thinking_content = None
+        response_content = ""
+        for block in response.content:
+            if hasattr(block, "type"):
+                if block.type == "thinking":
+                    thinking_content = block.thinking
+                elif block.type == "text":
+                    response_content = block.text
+            else:
+                response_content = block.text
+        # Convert to standardized format
+        metadata = {
+            "input_tokens": response.usage.input_tokens,
+            "output_tokens": response.usage.output_tokens,
+            "provider": "anthropic",
+            "model_family": "claude-3",
+        }
+        # Add cache performance metrics if available
+        if hasattr(response.usage, "cache_creation_input_tokens"):
+            cache_creation = getattr(response.usage, "cache_creation_input_tokens", 0)
+            cache_read = getattr(response.usage, "cache_read_input_tokens", 0)
+            # Ensure values are numeric (handle mock objects in tests)
+            if isinstance(cache_creation, int) and isinstance(cache_read, int):
+                metadata["cache_creation_tokens"] = cache_creation
+                metadata["cache_read_tokens"] = cache_read
+                # Log cache performance for monitoring with detailed cost savings
+                # Cache reads cost 90% less than regular input tokens
+                # Cache writes cost 25% more than regular input tokens
+                if cache_read > 0:
+                    # Sonnet 4.5 input: $3/M tokens, cache read: $0.30/M tokens (90% discount)
+                    savings_per_token = 0.003 / 1000 * 0.9  # 90% of regular cost
+                    total_savings = cache_read * savings_per_token
+                    logger.info(
+                        f"Cache HIT: {cache_read:,} tokens read from cache "
+                        f"(saved ${total_savings:.4f} vs full price)"
+                    )
+                if cache_creation > 0:
+                    # Cache write cost: $3.75/M tokens (25% markup)
+                    write_cost = cache_creation * 0.00375 / 1000
+                    logger.debug(
+                        f"Cache WRITE: {cache_creation:,} tokens written to cache "
+                        f"(cost ${write_cost:.4f})"
+                    )
+        # Add thinking content if present
+        if thinking_content:
+            metadata["thinking"] = thinking_content
+        return LLMResponse(
+            content=response_content,
+            model=response.model,
+            tokens_used=response.usage.input_tokens + response.usage.output_tokens,
+            finish_reason=response.stop_reason,
+            metadata=metadata,
+        )
+    async def analyze_large_codebase(
+        self,
+        codebase_files: list[dict[str, str]],
+        analysis_prompt: str,
+        **kwargs,
+    ) -> LLMResponse:
+        """Analyze large codebases using Claude's 200K context window.
+        Claude-specific feature: Can process entire repositories in one call.
+        Args:
+            codebase_files: List of {"path": "...", "content": "..."} dicts
+            analysis_prompt: What to analyze for
+            **kwargs: Additional generation parameters
+        Returns:
+            LLMResponse with analysis results
+        """
+        # Build context from all files
+        file_context = "\n\n".join(
+            [f"# File: {file['path']}\n{file['content']}" for file in codebase_files],
+        )
+        # Create system prompt with caching for file context
+        system_parts = [
+            {
+                "type": "text",
+                "text": "You are a code analysis expert using the Empathy Framework.",
+            },
+            {
+                "type": "text",
+                "text": f"Codebase files:\n\n{file_context}",
+                "cache_control": {"type": "ephemeral"},  # Cache the codebase
+            },
+        ]
+        messages = [{"role": "user", "content": analysis_prompt}]
+        # Use extended max_tokens for comprehensive analysis
+        return await self.generate(
+            messages=messages,
+            system_prompt=None,  # We'll pass it directly in api_kwargs
+            max_tokens=kwargs.pop("max_tokens", 4096),
+            **{**kwargs, "system": system_parts},
+        )
+    def get_model_info(self) -> dict[str, Any]:
+        """Get Claude model information with extended context capabilities"""
+        model_info = {
+            "claude-3-opus-20240229": {
+                "max_tokens": 200000,
+                "cost_per_1m_input": 15.00,
+                "cost_per_1m_output": 75.00,
+                "supports_prompt_caching": True,
+                "supports_thinking": True,
+                "ideal_for": "Complex reasoning, large codebases",
+            },
+            "claude-3-5-sonnet-20241022": {
+                "max_tokens": 200000,
+                "cost_per_1m_input": 3.00,
+                "cost_per_1m_output": 15.00,
+                "supports_prompt_caching": True,
+                "supports_thinking": True,
+                "ideal_for": "General development, balanced cost/performance",
+            },
+            "claude-3-haiku-20240307": {
+                "max_tokens": 200000,
+                "cost_per_1m_input": 0.25,
+                "cost_per_1m_output": 1.25,
+                "supports_prompt_caching": True,
+                "supports_thinking": False,
+                "ideal_for": "Fast responses, simple tasks",
+            },
+        }
+        return model_info.get(
+            self.model,
+            {
+                "max_tokens": 200000,
+                "cost_per_1m_input": 3.00,
+                "cost_per_1m_output": 15.00,
+                "supports_prompt_caching": True,
+                "supports_thinking": True,
+            },
+        )
+    def estimate_tokens(self, text: str) -> int:
+        """Estimate token count using accurate token counter (overrides base class).
+        Uses tiktoken for fast local estimation (~98% accurate).
+        Falls back to heuristic if tiktoken unavailable.
+        Args:
+            text: Text to count tokens for
+        Returns:
+            Estimated token count
+        """
+        try:
+            from .utils.tokens import count_tokens
+            return count_tokens(text, model=self.model, use_api=False)
+        except ImportError:
+            # Fallback to base class heuristic if utils not available
+            return super().estimate_tokens(text)
+    def calculate_actual_cost(
+        self,
+        input_tokens: int,
+        output_tokens: int,
+        cache_creation_tokens: int = 0,
+        cache_read_tokens: int = 0,
+    ) -> dict[str, Any]:
+        """Calculate actual cost based on precise token counts.
+        Includes Anthropic prompt caching cost adjustments:
+        - Cache writes: 25% markup over standard input pricing
+        - Cache reads: 90% discount from standard input pricing
+        Args:
+            input_tokens: Regular input tokens (not cached)
+            output_tokens: Output tokens
+            cache_creation_tokens: Tokens written to cache
+            cache_read_tokens: Tokens read from cache
+        Returns:
+            Dictionary with cost breakdown:
+            - base_cost: Cost for regular input/output tokens
+            - cache_write_cost: Cost for cache creation (if any)
+            - cache_read_cost: Cost for cache reads (if any)
+            - total_cost: Total cost including all components
+            - savings: Amount saved by cache reads vs. full price
+        Example:
+            >>> provider = AnthropicProvider(api_key="...")
+            >>> cost = provider.calculate_actual_cost(
+            ...     input_tokens=1000,
+            ...     output_tokens=500,
+            ...     cache_read_tokens=10000
+            ... )
+            >>> cost["total_cost"]
+            0.0105  # Significantly less than without cache
+        """
+        # Get pricing for this model
+        model_info = self.get_model_info()
+        input_price_per_million = model_info["cost_per_1m_input"]
+        output_price_per_million = model_info["cost_per_1m_output"]
+        # Base cost (non-cached tokens)
+        base_cost = (input_tokens / 1_000_000) * input_price_per_million
+        base_cost += (output_tokens / 1_000_000) * output_price_per_million
+        # Cache write cost (25% markup)
+        cache_write_price = input_price_per_million * 1.25
+        cache_write_cost = (cache_creation_tokens / 1_000_000) * cache_write_price
+        # Cache read cost (90% discount = 10% of input price)
+        cache_read_price = input_price_per_million * 0.1
+        cache_read_cost = (cache_read_tokens / 1_000_000) * cache_read_price
+        # Calculate savings from cache reads
+        full_price_for_cached = (cache_read_tokens / 1_000_000) * input_price_per_million
+        savings = full_price_for_cached - cache_read_cost
+        return {
+            "base_cost": round(base_cost, 6),
+            "cache_write_cost": round(cache_write_cost, 6),
+            "cache_read_cost": round(cache_read_cost, 6),
+            "total_cost": round(base_cost + cache_write_cost + cache_read_cost, 6),
+            "savings": round(savings, 6),
+            "currency": "USD",
+        }
+class AnthropicBatchProvider:
+    """Provider for Anthropic Batch API (50% cost reduction).
+    The Batch API processes requests asynchronously within 24 hours
+    at 50% of the standard API cost. Ideal for non-urgent, bulk tasks.
+    Example:
+        >>> provider = AnthropicBatchProvider(api_key="sk-ant-...")
+        >>> requests = [
+        ...     {
+        ...         "custom_id": "task_1",
+        ...         "model": "claude-sonnet-4-5",
+        ...         "messages": [{"role": "user", "content": "Analyze X"}],
+        ...         "max_tokens": 1024
+        ...     }
+        ... ]
+        >>> batch_id = provider.create_batch(requests)
+        >>> # Wait for processing (up to 24 hours)
+        >>> results = await provider.wait_for_batch(batch_id)
+    """
+    def __init__(self, api_key: str | None = None):
+        """Initialize batch provider.
+        Args:
+            api_key: Anthropic API key (defaults to ANTHROPIC_API_KEY env var)
+        """
+        if not api_key or not api_key.strip():
+            raise ValueError(
+                "API key is required for Anthropic Batch API. "
+                "Provide via api_key parameter or ANTHROPIC_API_KEY environment variable"
+            )
+        try:
+            import anthropic
+            self.client = anthropic.Anthropic(api_key=api_key)
+            self._batch_jobs: dict[str, Any] = {}
+        except ImportError as e:
+            raise ImportError(
+                "anthropic package required for Batch API. Install with: pip install anthropic"
+            ) from e
+    def create_batch(self, requests: list[dict[str, Any]], job_id: str | None = None) -> str:
+        """Create a batch job.
+        Args:
+            requests: List of request dicts with 'custom_id' and 'params' containing message creation parameters.
+                Format: [{"custom_id": "id1", "params": {"model": "...", "messages": [...], "max_tokens": 1024}}]
+            job_id: Optional job identifier for tracking (unused, for API compatibility)
+        Returns:
+            Batch job ID for polling status
+        Raises:
+            ValueError: If requests is empty or invalid
+            RuntimeError: If API call fails
+        Example:
+            >>> requests = [
+            ...     {
+            ...         "custom_id": "task_1",
+            ...         "params": {
+            ...             "model": "claude-sonnet-4-5-20250929",
+            ...             "messages": [{"role": "user", "content": "Test"}],
+            ...             "max_tokens": 1024
+            ...         }
+            ...     }
+            ... ]
+            >>> batch_id = provider.create_batch(requests)
+            >>> print(f"Batch created: {batch_id}")
+            Batch created: msgbatch_abc123
+        """
+        if not requests:
+            raise ValueError("requests cannot be empty")
+        # Validate and convert old format to new format if needed
+        formatted_requests = []
+        for req in requests:
+            if "params" not in req:
+                # Old format: convert to new format with params wrapper
+                formatted_req = {
+                    "custom_id": req.get("custom_id", f"req_{id(req)}"),
+                    "params": {
+                        "model": req.get("model", "claude-sonnet-4-5-20250929"),
+                        "messages": req.get("messages", []),
+                        "max_tokens": req.get("max_tokens", 4096),
+                    },
+                }
+                # Copy other optional params
+                for key in ["temperature", "system", "stop_sequences"]:
+                    if key in req:
+                        formatted_req["params"][key] = req[key]
+                formatted_requests.append(formatted_req)
+            else:
+                formatted_requests.append(req)
+        try:
+            # Use correct Message Batches API endpoint
+            batch = self.client.messages.batches.create(requests=formatted_requests)
+            self._batch_jobs[batch.id] = batch
+            logger.info(f"Created batch {batch.id} with {len(formatted_requests)} requests")
+            return batch.id
+        except Exception as e:
+            logger.error(f"Failed to create batch: {e}")
+            raise RuntimeError(f"Batch creation failed: {e}") from e
+    def get_batch_status(self, batch_id: str) -> Any:
+        """Get status of batch job.
+        Args:
+            batch_id: Batch job ID
+        Returns:
+            MessageBatch object with processing_status field:
+            - "in_progress": Batch is being processed
+            - "canceling": Cancellation initiated
+            - "ended": Batch processing ended (check request_counts for success/errors)
+        Example:
+            >>> status = provider.get_batch_status("msgbatch_abc123")
+            >>> print(status.processing_status)
+            in_progress
+            >>> print(f"Succeeded: {status.request_counts.succeeded}")
+        """
+        try:
+            # Use correct Message Batches API endpoint
+            batch = self.client.messages.batches.retrieve(batch_id)
+            self._batch_jobs[batch_id] = batch
+            return batch
+        except Exception as e:
+            logger.error(f"Failed to get batch status for {batch_id}: {e}")
+            raise RuntimeError(f"Failed to get batch status: {e}") from e
+    def get_batch_results(self, batch_id: str) -> list[dict[str, Any]]:
+        """Get results from completed batch.
+        Args:
+            batch_id: Batch job ID
+        Returns:
+            List of result dicts. Each dict contains:
+            - custom_id: Request identifier
+            - result: Either {"type": "succeeded", "message": {...}} or {"type": "errored", "error": {...}}
+        Raises:
+            ValueError: If batch has not ended processing
+            RuntimeError: If API call fails
+        Example:
+            >>> results = provider.get_batch_results("msgbatch_abc123")
+            >>> for result in results:
+            ...     if result['result']['type'] == 'succeeded':
+            ...         message = result['result']['message']
+            ...         print(f"{result['custom_id']}: {message.content[0].text}")
+            ...     else:
+            ...         error = result['result']['error']
+            ...         print(f"{result['custom_id']}: Error {error['type']}")
+        """
+        status = self.get_batch_status(batch_id)
+        # Check processing_status instead of status
+        if status.processing_status != "ended":
+            raise ValueError(
+                f"Batch {batch_id} has not ended processing (status: {status.processing_status})"
+            )
+        try:
+            # Use correct Message Batches API endpoint
+            # results() returns an iterator, convert to list
+            results_iterator = self.client.messages.batches.results(batch_id)
+            return list(results_iterator)
+        except Exception as e:
+            logger.error(f"Failed to get batch results for {batch_id}: {e}")
+            raise RuntimeError(f"Failed to get batch results: {e}") from e
+    async def wait_for_batch(
+        self,
+        batch_id: str,
+        poll_interval: int = 60,
+        timeout: int = 86400,  # 24 hours
+    ) -> list[dict[str, Any]]:
+        """Wait for batch to complete with polling.
+        Args:
+            batch_id: Batch job ID
+            poll_interval: Seconds between status checks (default: 60)
+            timeout: Maximum wait time in seconds (default: 86400 = 24 hours)
+        Returns:
+            Batch results when processing ends
+        Raises:
+            TimeoutError: If batch doesn't complete within timeout
+            RuntimeError: If batch had errors during processing
+        Example:
+            >>> results = await provider.wait_for_batch(
+            ...     "msgbatch_abc123",
+            ...     poll_interval=300,  # Check every 5 minutes
+            ... )
+            >>> print(f"Batch completed: {len(results)} results")
+        """
+        start_time = datetime.now()
+        while True:
+            status = self.get_batch_status(batch_id)
+            # Check if batch processing has ended
+            if status.processing_status == "ended":
+                # Check request counts to see if there were errors
+                counts = status.request_counts
+                logger.info(
+                    f"Batch {batch_id} ended: "
+                    f"{counts.succeeded} succeeded, {counts.errored} errored, "
+                    f"{counts.canceled} canceled, {counts.expired} expired"
+                )
+                # Return results even if some requests failed
+                # The caller can inspect individual results for errors
+                return self.get_batch_results(batch_id)
+            # Check timeout
+            elapsed = (datetime.now() - start_time).total_seconds()
+            if elapsed > timeout:
+                raise TimeoutError(f"Batch {batch_id} did not complete within {timeout}s")
+            # Log progress with request counts
+            try:
+                counts = status.request_counts
+                logger.debug(
+                    f"Batch {batch_id} status: {status.processing_status} "
+                    f"(processing: {counts.processing}, elapsed: {elapsed:.0f}s)"
+                )
+            except AttributeError:
+                logger.debug(
+                    f"Batch {batch_id} status: {status.processing_status} (elapsed: {elapsed:.0f}s)"
+                )
+            # Wait before next poll
+            await asyncio.sleep(poll_interval)
+class OpenAIProvider(BaseLLMProvider):
+    """OpenAI provider.
+    Supports GPT-4, GPT-3.5, and other OpenAI models.
+    """
+    def __init__(self, api_key: str | None = None, model: str = "gpt-4-turbo-preview", **kwargs):
+        super().__init__(api_key, **kwargs)
+        self.model = model
+        # Validate API key is provided
+        if not api_key or not api_key.strip():
+            raise ValueError(
+                "API key is required for OpenAI provider. "
+                "Provide via api_key parameter or OPENAI_API_KEY environment variable",
+            )
+        # Lazy import
+        try:
+            import openai
+            self.client = openai.AsyncOpenAI(api_key=api_key)
+        except ImportError as e:
+            raise ImportError("openai package required. Install with: pip install openai") from e
+    async def generate(
+        self,
+        messages: list[dict[str, str]],
+        system_prompt: str | None = None,
+        temperature: float = 0.7,
+        max_tokens: int = 1024,
+        **kwargs,
+    ) -> LLMResponse:
+        """Generate response using OpenAI API"""
+        # Add system prompt if provided
+        if system_prompt:
+            messages = [{"role": "system", "content": system_prompt}] + messages
+        # Call OpenAI API
+        response = await self.client.chat.completions.create(
+            model=self.model,
+            messages=messages,  # type: ignore[arg-type]
+            temperature=temperature,
+            max_tokens=max_tokens,
+            **kwargs,
+        )
+        # Convert to standardized format
+        content = response.choices[0].message.content or ""
+        usage = response.usage
+        return LLMResponse(
+            content=content,
+            model=response.model,
+            tokens_used=usage.total_tokens if usage else 0,
+            finish_reason=response.choices[0].finish_reason,
+            metadata={
+                "input_tokens": usage.prompt_tokens if usage else 0,
+                "output_tokens": usage.completion_tokens if usage else 0,
+                "provider": "openai",
+            },
+        )
+    def get_model_info(self) -> dict[str, Any]:
+        """Get OpenAI model information"""
+        model_info = {
+            "gpt-4-turbo-preview": {
+                "max_tokens": 128000,
+                "cost_per_1m_input": 10.00,
+                "cost_per_1m_output": 30.00,
+            },
+            "gpt-4": {"max_tokens": 8192, "cost_per_1m_input": 30.00, "cost_per_1m_output": 60.00},
+            "gpt-3.5-turbo": {
+                "max_tokens": 16385,
+                "cost_per_1m_input": 0.50,
+                "cost_per_1m_output": 1.50,
+            },
+        }
+        return model_info.get(
+            self.model,
+            {"max_tokens": 128000, "cost_per_1m_input": 10.00, "cost_per_1m_output": 30.00},
+        )
+class GeminiProvider(BaseLLMProvider):
+    """Google Gemini provider with cost tracking integration.
+    Supports Gemini models:
+    - gemini-2.0-flash-exp: Fast, cheap tier (1M context)
+    - gemini-1.5-pro: Balanced, capable tier (2M context)
+    - gemini-2.5-pro: Premium reasoning tier
+    """
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model: str = "gemini-1.5-pro",
+        **kwargs,
+    ):
+        super().__init__(api_key, **kwargs)
+        self.model = model
+        # Validate API key is provided
+        if not api_key or not api_key.strip():
+            raise ValueError(
+                "API key is required for Gemini provider. "
+                "Provide via api_key parameter or GOOGLE_API_KEY environment variable",
+            )
+        # Lazy import to avoid requiring google-generativeai if not used
+        try:
+            import google.generativeai as genai
+            genai.configure(api_key=api_key)
+            self.genai = genai
+            self.client = genai.GenerativeModel(model)
+        except ImportError as e:
+            raise ImportError(
+                "google-generativeai package required. Install with: pip install google-generativeai",
+            ) from e
+    async def generate(
+        self,
+        messages: list[dict[str, str]],
+        system_prompt: str | None = None,
+        temperature: float = 0.7,
+        max_tokens: int = 1024,
+        **kwargs,
+    ) -> LLMResponse:
+        """Generate response using Google Gemini API.
+        Gemini-specific features:
+        - Large context windows (1M-2M tokens)
+        - Multimodal support
+        - Grounding with Google Search
+        """
+        import asyncio
+        # Convert messages to Gemini format
+        gemini_messages = []
+        for msg in messages:
+            role = "user" if msg["role"] == "user" else "model"
+            gemini_messages.append({"role": role, "parts": [msg["content"]]})
+        # Build generation config
+        generation_config = self.genai.GenerationConfig(
+            temperature=temperature,
+            max_output_tokens=max_tokens,
+        )
+        # Create model with system instruction if provided
+        if system_prompt:
+            model = self.genai.GenerativeModel(
+                self.model,
+                system_instruction=system_prompt,
+            )
+        else:
+            model = self.client
+        # Call Gemini API (run sync in thread pool for async compatibility)
+        loop = asyncio.get_event_loop()
+        response = await loop.run_in_executor(
+            None,
+            lambda: model.generate_content(
+                gemini_messages,  # type: ignore[arg-type]
+                generation_config=generation_config,
+            ),
+        )
+        # Extract token counts from usage metadata
+        input_tokens = 0
+        output_tokens = 0
+        if hasattr(response, "usage_metadata"):
+            input_tokens = getattr(response.usage_metadata, "prompt_token_count", 0)
+            output_tokens = getattr(response.usage_metadata, "candidates_token_count", 0)
+        # Log to cost tracker
+        try:
+            from attune.cost_tracker import log_request
+            tier = self._get_tier()
+            log_request(
+                model=self.model,
+                input_tokens=input_tokens,
+                output_tokens=output_tokens,
+                task_type=kwargs.get("task_type", "gemini_generate"),
+                tier=tier,
+            )
+        except ImportError:
+            pass  # Cost tracking not available
+        # Convert to standardized format
+        content = ""
+        if response.candidates:
+            content = response.candidates[0].content.parts[0].text
+        finish_reason = "stop"
+        if response.candidates and hasattr(response.candidates[0], "finish_reason"):
+            finish_reason = str(response.candidates[0].finish_reason.name).lower()
+        return LLMResponse(
+            content=content,
+            model=self.model,
+            tokens_used=input_tokens + output_tokens,
+            finish_reason=finish_reason,
+            metadata={
+                "input_tokens": input_tokens,
+                "output_tokens": output_tokens,
+                "provider": "google",
+                "model_family": "gemini",
+            },
+        )
+    def _get_tier(self) -> str:
+        """Determine tier from model name."""
+        if "flash" in self.model.lower():
+            return "cheap"
+        if "2.5" in self.model or "ultra" in self.model.lower():
+            return "premium"
+        return "capable"
+    def get_model_info(self) -> dict[str, Any]:
+        """Get Gemini model information"""
+        model_info = {
+            "gemini-2.0-flash-exp": {
+                "max_tokens": 1000000,
+                "cost_per_1m_input": 0.075,
+                "cost_per_1m_output": 0.30,
+                "supports_vision": True,
+                "ideal_for": "Fast responses, simple tasks, large context",
+            },
+            "gemini-1.5-pro": {
+                "max_tokens": 2000000,
+                "cost_per_1m_input": 1.25,
+                "cost_per_1m_output": 5.00,
+                "supports_vision": True,
+                "ideal_for": "Complex reasoning, large codebases",
+            },
+            "gemini-2.5-pro": {
+                "max_tokens": 1000000,
+                "cost_per_1m_input": 2.50,
+                "cost_per_1m_output": 10.00,
+                "supports_vision": True,
+                "ideal_for": "Advanced reasoning, complex tasks",
+            },
+        }
+        return model_info.get(
+            self.model,
+            {
+                "max_tokens": 1000000,
+                "cost_per_1m_input": 1.25,
+                "cost_per_1m_output": 5.00,
+                "supports_vision": True,
+            },
+        )
+class LocalProvider(BaseLLMProvider):
+    """Local model provider (Ollama, LM Studio, etc.).
+    For running models locally.
+    """
+    def __init__(self, endpoint: str = "http://localhost:11434", model: str = "llama2", **kwargs):
+        super().__init__(api_key=None, **kwargs)
+        self.endpoint = endpoint
+        self.model = model
+    async def generate(
+        self,
+        messages: list[dict[str, str]],
+        system_prompt: str | None = None,
+        temperature: float = 0.7,
+        max_tokens: int = 1024,
+        **kwargs,
+    ) -> LLMResponse:
+        """Generate response using local model"""
+        import aiohttp
+        # Format for Ollama-style API
+        payload = {
+            "model": self.model,
+            "messages": messages,
+            "stream": False,
+            "options": {"temperature": temperature, "num_predict": max_tokens},
+        }
+        if system_prompt:
+            payload["system"] = system_prompt
+        async with aiohttp.ClientSession() as session:
+            async with session.post(f"{self.endpoint}/api/chat", json=payload) as response:
+                result = await response.json()
+                return LLMResponse(
+                    content=result.get("message", {}).get("content", ""),
+                    model=self.model,
+                    tokens_used=result.get("eval_count", 0) + result.get("prompt_eval_count", 0),
+                    finish_reason="stop",
+                    metadata={"provider": "local", "endpoint": self.endpoint},
+                )
+    def get_model_info(self) -> dict[str, Any]:
+        """Get local model information"""
+        return {
+            "max_tokens": 4096,  # Depends on model
+            "cost_per_1m_input": 0.0,  # Free (local)
+            "cost_per_1m_output": 0.0,
+            "endpoint": self.endpoint,
+        }