attune-ai 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (457) hide show
  1. attune/__init__.py +358 -0
  2. attune/adaptive/__init__.py +13 -0
  3. attune/adaptive/task_complexity.py +127 -0
  4. attune/agent_monitoring.py +414 -0
  5. attune/cache/__init__.py +117 -0
  6. attune/cache/base.py +166 -0
  7. attune/cache/dependency_manager.py +256 -0
  8. attune/cache/hash_only.py +251 -0
  9. attune/cache/hybrid.py +457 -0
  10. attune/cache/storage.py +285 -0
  11. attune/cache_monitor.py +356 -0
  12. attune/cache_stats.py +298 -0
  13. attune/cli/__init__.py +152 -0
  14. attune/cli/__main__.py +12 -0
  15. attune/cli/commands/__init__.py +1 -0
  16. attune/cli/commands/batch.py +264 -0
  17. attune/cli/commands/cache.py +248 -0
  18. attune/cli/commands/help.py +331 -0
  19. attune/cli/commands/info.py +140 -0
  20. attune/cli/commands/inspect.py +436 -0
  21. attune/cli/commands/inspection.py +57 -0
  22. attune/cli/commands/memory.py +48 -0
  23. attune/cli/commands/metrics.py +92 -0
  24. attune/cli/commands/orchestrate.py +184 -0
  25. attune/cli/commands/patterns.py +207 -0
  26. attune/cli/commands/profiling.py +202 -0
  27. attune/cli/commands/provider.py +98 -0
  28. attune/cli/commands/routing.py +285 -0
  29. attune/cli/commands/setup.py +96 -0
  30. attune/cli/commands/status.py +235 -0
  31. attune/cli/commands/sync.py +166 -0
  32. attune/cli/commands/tier.py +121 -0
  33. attune/cli/commands/utilities.py +114 -0
  34. attune/cli/commands/workflow.py +579 -0
  35. attune/cli/core.py +32 -0
  36. attune/cli/parsers/__init__.py +68 -0
  37. attune/cli/parsers/batch.py +118 -0
  38. attune/cli/parsers/cache.py +65 -0
  39. attune/cli/parsers/help.py +41 -0
  40. attune/cli/parsers/info.py +26 -0
  41. attune/cli/parsers/inspect.py +66 -0
  42. attune/cli/parsers/metrics.py +42 -0
  43. attune/cli/parsers/orchestrate.py +61 -0
  44. attune/cli/parsers/patterns.py +54 -0
  45. attune/cli/parsers/provider.py +40 -0
  46. attune/cli/parsers/routing.py +110 -0
  47. attune/cli/parsers/setup.py +42 -0
  48. attune/cli/parsers/status.py +47 -0
  49. attune/cli/parsers/sync.py +31 -0
  50. attune/cli/parsers/tier.py +33 -0
  51. attune/cli/parsers/workflow.py +77 -0
  52. attune/cli/utils/__init__.py +1 -0
  53. attune/cli/utils/data.py +242 -0
  54. attune/cli/utils/helpers.py +68 -0
  55. attune/cli_legacy.py +3957 -0
  56. attune/cli_minimal.py +1159 -0
  57. attune/cli_router.py +437 -0
  58. attune/cli_unified.py +814 -0
  59. attune/config/__init__.py +66 -0
  60. attune/config/xml_config.py +286 -0
  61. attune/config.py +545 -0
  62. attune/coordination.py +870 -0
  63. attune/core.py +1511 -0
  64. attune/core_modules/__init__.py +15 -0
  65. attune/cost_tracker.py +626 -0
  66. attune/dashboard/__init__.py +41 -0
  67. attune/dashboard/app.py +512 -0
  68. attune/dashboard/simple_server.py +435 -0
  69. attune/dashboard/standalone_server.py +547 -0
  70. attune/discovery.py +306 -0
  71. attune/emergence.py +306 -0
  72. attune/exceptions.py +123 -0
  73. attune/feedback_loops.py +373 -0
  74. attune/hot_reload/README.md +473 -0
  75. attune/hot_reload/__init__.py +62 -0
  76. attune/hot_reload/config.py +83 -0
  77. attune/hot_reload/integration.py +229 -0
  78. attune/hot_reload/reloader.py +298 -0
  79. attune/hot_reload/watcher.py +183 -0
  80. attune/hot_reload/websocket.py +177 -0
  81. attune/levels.py +577 -0
  82. attune/leverage_points.py +441 -0
  83. attune/logging_config.py +261 -0
  84. attune/mcp/__init__.py +10 -0
  85. attune/mcp/server.py +506 -0
  86. attune/memory/__init__.py +237 -0
  87. attune/memory/claude_memory.py +469 -0
  88. attune/memory/config.py +224 -0
  89. attune/memory/control_panel.py +1290 -0
  90. attune/memory/control_panel_support.py +145 -0
  91. attune/memory/cross_session.py +845 -0
  92. attune/memory/edges.py +179 -0
  93. attune/memory/encryption.py +159 -0
  94. attune/memory/file_session.py +770 -0
  95. attune/memory/graph.py +570 -0
  96. attune/memory/long_term.py +913 -0
  97. attune/memory/long_term_types.py +99 -0
  98. attune/memory/mixins/__init__.py +25 -0
  99. attune/memory/mixins/backend_init_mixin.py +249 -0
  100. attune/memory/mixins/capabilities_mixin.py +208 -0
  101. attune/memory/mixins/handoff_mixin.py +208 -0
  102. attune/memory/mixins/lifecycle_mixin.py +49 -0
  103. attune/memory/mixins/long_term_mixin.py +352 -0
  104. attune/memory/mixins/promotion_mixin.py +109 -0
  105. attune/memory/mixins/short_term_mixin.py +182 -0
  106. attune/memory/nodes.py +179 -0
  107. attune/memory/redis_bootstrap.py +540 -0
  108. attune/memory/security/__init__.py +31 -0
  109. attune/memory/security/audit_logger.py +932 -0
  110. attune/memory/security/pii_scrubber.py +640 -0
  111. attune/memory/security/secrets_detector.py +678 -0
  112. attune/memory/short_term.py +2192 -0
  113. attune/memory/simple_storage.py +302 -0
  114. attune/memory/storage/__init__.py +15 -0
  115. attune/memory/storage_backend.py +167 -0
  116. attune/memory/summary_index.py +583 -0
  117. attune/memory/types.py +446 -0
  118. attune/memory/unified.py +182 -0
  119. attune/meta_workflows/__init__.py +74 -0
  120. attune/meta_workflows/agent_creator.py +248 -0
  121. attune/meta_workflows/builtin_templates.py +567 -0
  122. attune/meta_workflows/cli_commands/__init__.py +56 -0
  123. attune/meta_workflows/cli_commands/agent_commands.py +321 -0
  124. attune/meta_workflows/cli_commands/analytics_commands.py +442 -0
  125. attune/meta_workflows/cli_commands/config_commands.py +232 -0
  126. attune/meta_workflows/cli_commands/memory_commands.py +182 -0
  127. attune/meta_workflows/cli_commands/template_commands.py +354 -0
  128. attune/meta_workflows/cli_commands/workflow_commands.py +382 -0
  129. attune/meta_workflows/cli_meta_workflows.py +59 -0
  130. attune/meta_workflows/form_engine.py +292 -0
  131. attune/meta_workflows/intent_detector.py +409 -0
  132. attune/meta_workflows/models.py +569 -0
  133. attune/meta_workflows/pattern_learner.py +738 -0
  134. attune/meta_workflows/plan_generator.py +384 -0
  135. attune/meta_workflows/session_context.py +397 -0
  136. attune/meta_workflows/template_registry.py +229 -0
  137. attune/meta_workflows/workflow.py +984 -0
  138. attune/metrics/__init__.py +12 -0
  139. attune/metrics/collector.py +31 -0
  140. attune/metrics/prompt_metrics.py +194 -0
  141. attune/models/__init__.py +172 -0
  142. attune/models/__main__.py +13 -0
  143. attune/models/adaptive_routing.py +437 -0
  144. attune/models/auth_cli.py +444 -0
  145. attune/models/auth_strategy.py +450 -0
  146. attune/models/cli.py +655 -0
  147. attune/models/empathy_executor.py +354 -0
  148. attune/models/executor.py +257 -0
  149. attune/models/fallback.py +762 -0
  150. attune/models/provider_config.py +282 -0
  151. attune/models/registry.py +472 -0
  152. attune/models/tasks.py +359 -0
  153. attune/models/telemetry/__init__.py +71 -0
  154. attune/models/telemetry/analytics.py +594 -0
  155. attune/models/telemetry/backend.py +196 -0
  156. attune/models/telemetry/data_models.py +431 -0
  157. attune/models/telemetry/storage.py +489 -0
  158. attune/models/token_estimator.py +420 -0
  159. attune/models/validation.py +280 -0
  160. attune/monitoring/__init__.py +52 -0
  161. attune/monitoring/alerts.py +946 -0
  162. attune/monitoring/alerts_cli.py +448 -0
  163. attune/monitoring/multi_backend.py +271 -0
  164. attune/monitoring/otel_backend.py +362 -0
  165. attune/optimization/__init__.py +19 -0
  166. attune/optimization/context_optimizer.py +272 -0
  167. attune/orchestration/__init__.py +67 -0
  168. attune/orchestration/agent_templates.py +707 -0
  169. attune/orchestration/config_store.py +499 -0
  170. attune/orchestration/execution_strategies.py +2111 -0
  171. attune/orchestration/meta_orchestrator.py +1168 -0
  172. attune/orchestration/pattern_learner.py +696 -0
  173. attune/orchestration/real_tools.py +931 -0
  174. attune/pattern_cache.py +187 -0
  175. attune/pattern_library.py +542 -0
  176. attune/patterns/debugging/all_patterns.json +81 -0
  177. attune/patterns/debugging/workflow_20260107_1770825e.json +77 -0
  178. attune/patterns/refactoring_memory.json +89 -0
  179. attune/persistence.py +564 -0
  180. attune/platform_utils.py +265 -0
  181. attune/plugins/__init__.py +28 -0
  182. attune/plugins/base.py +361 -0
  183. attune/plugins/registry.py +268 -0
  184. attune/project_index/__init__.py +32 -0
  185. attune/project_index/cli.py +335 -0
  186. attune/project_index/index.py +667 -0
  187. attune/project_index/models.py +504 -0
  188. attune/project_index/reports.py +474 -0
  189. attune/project_index/scanner.py +777 -0
  190. attune/project_index/scanner_parallel.py +291 -0
  191. attune/prompts/__init__.py +61 -0
  192. attune/prompts/config.py +77 -0
  193. attune/prompts/context.py +177 -0
  194. attune/prompts/parser.py +285 -0
  195. attune/prompts/registry.py +313 -0
  196. attune/prompts/templates.py +208 -0
  197. attune/redis_config.py +302 -0
  198. attune/redis_memory.py +799 -0
  199. attune/resilience/__init__.py +56 -0
  200. attune/resilience/circuit_breaker.py +256 -0
  201. attune/resilience/fallback.py +179 -0
  202. attune/resilience/health.py +300 -0
  203. attune/resilience/retry.py +209 -0
  204. attune/resilience/timeout.py +135 -0
  205. attune/routing/__init__.py +43 -0
  206. attune/routing/chain_executor.py +433 -0
  207. attune/routing/classifier.py +217 -0
  208. attune/routing/smart_router.py +234 -0
  209. attune/routing/workflow_registry.py +343 -0
  210. attune/scaffolding/README.md +589 -0
  211. attune/scaffolding/__init__.py +35 -0
  212. attune/scaffolding/__main__.py +14 -0
  213. attune/scaffolding/cli.py +240 -0
  214. attune/scaffolding/templates/base_wizard.py.jinja2 +121 -0
  215. attune/scaffolding/templates/coach_wizard.py.jinja2 +321 -0
  216. attune/scaffolding/templates/domain_wizard.py.jinja2 +408 -0
  217. attune/scaffolding/templates/linear_flow_wizard.py.jinja2 +203 -0
  218. attune/socratic/__init__.py +256 -0
  219. attune/socratic/ab_testing.py +958 -0
  220. attune/socratic/blueprint.py +533 -0
  221. attune/socratic/cli.py +703 -0
  222. attune/socratic/collaboration.py +1114 -0
  223. attune/socratic/domain_templates.py +924 -0
  224. attune/socratic/embeddings.py +738 -0
  225. attune/socratic/engine.py +794 -0
  226. attune/socratic/explainer.py +682 -0
  227. attune/socratic/feedback.py +772 -0
  228. attune/socratic/forms.py +629 -0
  229. attune/socratic/generator.py +732 -0
  230. attune/socratic/llm_analyzer.py +637 -0
  231. attune/socratic/mcp_server.py +702 -0
  232. attune/socratic/session.py +312 -0
  233. attune/socratic/storage.py +667 -0
  234. attune/socratic/success.py +730 -0
  235. attune/socratic/visual_editor.py +860 -0
  236. attune/socratic/web_ui.py +958 -0
  237. attune/telemetry/__init__.py +39 -0
  238. attune/telemetry/agent_coordination.py +475 -0
  239. attune/telemetry/agent_tracking.py +367 -0
  240. attune/telemetry/approval_gates.py +545 -0
  241. attune/telemetry/cli.py +1231 -0
  242. attune/telemetry/commands/__init__.py +14 -0
  243. attune/telemetry/commands/dashboard_commands.py +696 -0
  244. attune/telemetry/event_streaming.py +409 -0
  245. attune/telemetry/feedback_loop.py +567 -0
  246. attune/telemetry/usage_tracker.py +591 -0
  247. attune/templates.py +754 -0
  248. attune/test_generator/__init__.py +38 -0
  249. attune/test_generator/__main__.py +14 -0
  250. attune/test_generator/cli.py +234 -0
  251. attune/test_generator/generator.py +355 -0
  252. attune/test_generator/risk_analyzer.py +216 -0
  253. attune/test_generator/templates/unit_test.py.jinja2 +272 -0
  254. attune/tier_recommender.py +384 -0
  255. attune/tools.py +183 -0
  256. attune/trust/__init__.py +28 -0
  257. attune/trust/circuit_breaker.py +579 -0
  258. attune/trust_building.py +527 -0
  259. attune/validation/__init__.py +19 -0
  260. attune/validation/xml_validator.py +281 -0
  261. attune/vscode_bridge.py +173 -0
  262. attune/workflow_commands.py +780 -0
  263. attune/workflow_patterns/__init__.py +33 -0
  264. attune/workflow_patterns/behavior.py +249 -0
  265. attune/workflow_patterns/core.py +76 -0
  266. attune/workflow_patterns/output.py +99 -0
  267. attune/workflow_patterns/registry.py +255 -0
  268. attune/workflow_patterns/structural.py +288 -0
  269. attune/workflows/__init__.py +539 -0
  270. attune/workflows/autonomous_test_gen.py +1268 -0
  271. attune/workflows/base.py +2667 -0
  272. attune/workflows/batch_processing.py +342 -0
  273. attune/workflows/bug_predict.py +1084 -0
  274. attune/workflows/builder.py +273 -0
  275. attune/workflows/caching.py +253 -0
  276. attune/workflows/code_review.py +1048 -0
  277. attune/workflows/code_review_adapters.py +312 -0
  278. attune/workflows/code_review_pipeline.py +722 -0
  279. attune/workflows/config.py +645 -0
  280. attune/workflows/dependency_check.py +644 -0
  281. attune/workflows/document_gen/__init__.py +25 -0
  282. attune/workflows/document_gen/config.py +30 -0
  283. attune/workflows/document_gen/report_formatter.py +162 -0
  284. attune/workflows/document_gen/workflow.py +1426 -0
  285. attune/workflows/document_manager.py +216 -0
  286. attune/workflows/document_manager_README.md +134 -0
  287. attune/workflows/documentation_orchestrator.py +1205 -0
  288. attune/workflows/history.py +510 -0
  289. attune/workflows/keyboard_shortcuts/__init__.py +39 -0
  290. attune/workflows/keyboard_shortcuts/generators.py +391 -0
  291. attune/workflows/keyboard_shortcuts/parsers.py +416 -0
  292. attune/workflows/keyboard_shortcuts/prompts.py +295 -0
  293. attune/workflows/keyboard_shortcuts/schema.py +193 -0
  294. attune/workflows/keyboard_shortcuts/workflow.py +509 -0
  295. attune/workflows/llm_base.py +363 -0
  296. attune/workflows/manage_docs.py +87 -0
  297. attune/workflows/manage_docs_README.md +134 -0
  298. attune/workflows/manage_documentation.py +821 -0
  299. attune/workflows/new_sample_workflow1.py +149 -0
  300. attune/workflows/new_sample_workflow1_README.md +150 -0
  301. attune/workflows/orchestrated_health_check.py +849 -0
  302. attune/workflows/orchestrated_release_prep.py +600 -0
  303. attune/workflows/output.py +413 -0
  304. attune/workflows/perf_audit.py +863 -0
  305. attune/workflows/pr_review.py +762 -0
  306. attune/workflows/progress.py +785 -0
  307. attune/workflows/progress_server.py +322 -0
  308. attune/workflows/progressive/README 2.md +454 -0
  309. attune/workflows/progressive/README.md +454 -0
  310. attune/workflows/progressive/__init__.py +82 -0
  311. attune/workflows/progressive/cli.py +219 -0
  312. attune/workflows/progressive/core.py +488 -0
  313. attune/workflows/progressive/orchestrator.py +723 -0
  314. attune/workflows/progressive/reports.py +520 -0
  315. attune/workflows/progressive/telemetry.py +274 -0
  316. attune/workflows/progressive/test_gen.py +495 -0
  317. attune/workflows/progressive/workflow.py +589 -0
  318. attune/workflows/refactor_plan.py +694 -0
  319. attune/workflows/release_prep.py +895 -0
  320. attune/workflows/release_prep_crew.py +969 -0
  321. attune/workflows/research_synthesis.py +404 -0
  322. attune/workflows/routing.py +168 -0
  323. attune/workflows/secure_release.py +593 -0
  324. attune/workflows/security_adapters.py +297 -0
  325. attune/workflows/security_audit.py +1329 -0
  326. attune/workflows/security_audit_phase3.py +355 -0
  327. attune/workflows/seo_optimization.py +633 -0
  328. attune/workflows/step_config.py +234 -0
  329. attune/workflows/telemetry_mixin.py +269 -0
  330. attune/workflows/test5.py +125 -0
  331. attune/workflows/test5_README.md +158 -0
  332. attune/workflows/test_coverage_boost_crew.py +849 -0
  333. attune/workflows/test_gen/__init__.py +52 -0
  334. attune/workflows/test_gen/ast_analyzer.py +249 -0
  335. attune/workflows/test_gen/config.py +88 -0
  336. attune/workflows/test_gen/data_models.py +38 -0
  337. attune/workflows/test_gen/report_formatter.py +289 -0
  338. attune/workflows/test_gen/test_templates.py +381 -0
  339. attune/workflows/test_gen/workflow.py +655 -0
  340. attune/workflows/test_gen.py +54 -0
  341. attune/workflows/test_gen_behavioral.py +477 -0
  342. attune/workflows/test_gen_parallel.py +341 -0
  343. attune/workflows/test_lifecycle.py +526 -0
  344. attune/workflows/test_maintenance.py +627 -0
  345. attune/workflows/test_maintenance_cli.py +590 -0
  346. attune/workflows/test_maintenance_crew.py +840 -0
  347. attune/workflows/test_runner.py +622 -0
  348. attune/workflows/tier_tracking.py +531 -0
  349. attune/workflows/xml_enhanced_crew.py +285 -0
  350. attune_ai-2.0.0.dist-info/METADATA +1026 -0
  351. attune_ai-2.0.0.dist-info/RECORD +457 -0
  352. attune_ai-2.0.0.dist-info/WHEEL +5 -0
  353. attune_ai-2.0.0.dist-info/entry_points.txt +26 -0
  354. attune_ai-2.0.0.dist-info/licenses/LICENSE +201 -0
  355. attune_ai-2.0.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
  356. attune_ai-2.0.0.dist-info/top_level.txt +5 -0
  357. attune_healthcare/__init__.py +13 -0
  358. attune_healthcare/monitors/__init__.py +9 -0
  359. attune_healthcare/monitors/clinical_protocol_monitor.py +315 -0
  360. attune_healthcare/monitors/monitoring/__init__.py +44 -0
  361. attune_healthcare/monitors/monitoring/protocol_checker.py +300 -0
  362. attune_healthcare/monitors/monitoring/protocol_loader.py +214 -0
  363. attune_healthcare/monitors/monitoring/sensor_parsers.py +306 -0
  364. attune_healthcare/monitors/monitoring/trajectory_analyzer.py +389 -0
  365. attune_llm/README.md +553 -0
  366. attune_llm/__init__.py +28 -0
  367. attune_llm/agent_factory/__init__.py +53 -0
  368. attune_llm/agent_factory/adapters/__init__.py +85 -0
  369. attune_llm/agent_factory/adapters/autogen_adapter.py +312 -0
  370. attune_llm/agent_factory/adapters/crewai_adapter.py +483 -0
  371. attune_llm/agent_factory/adapters/haystack_adapter.py +298 -0
  372. attune_llm/agent_factory/adapters/langchain_adapter.py +362 -0
  373. attune_llm/agent_factory/adapters/langgraph_adapter.py +333 -0
  374. attune_llm/agent_factory/adapters/native.py +228 -0
  375. attune_llm/agent_factory/adapters/wizard_adapter.py +423 -0
  376. attune_llm/agent_factory/base.py +305 -0
  377. attune_llm/agent_factory/crews/__init__.py +67 -0
  378. attune_llm/agent_factory/crews/code_review.py +1113 -0
  379. attune_llm/agent_factory/crews/health_check.py +1262 -0
  380. attune_llm/agent_factory/crews/refactoring.py +1128 -0
  381. attune_llm/agent_factory/crews/security_audit.py +1018 -0
  382. attune_llm/agent_factory/decorators.py +287 -0
  383. attune_llm/agent_factory/factory.py +558 -0
  384. attune_llm/agent_factory/framework.py +193 -0
  385. attune_llm/agent_factory/memory_integration.py +328 -0
  386. attune_llm/agent_factory/resilient.py +320 -0
  387. attune_llm/agents_md/__init__.py +22 -0
  388. attune_llm/agents_md/loader.py +218 -0
  389. attune_llm/agents_md/parser.py +271 -0
  390. attune_llm/agents_md/registry.py +307 -0
  391. attune_llm/claude_memory.py +466 -0
  392. attune_llm/cli/__init__.py +8 -0
  393. attune_llm/cli/sync_claude.py +487 -0
  394. attune_llm/code_health.py +1313 -0
  395. attune_llm/commands/__init__.py +51 -0
  396. attune_llm/commands/context.py +375 -0
  397. attune_llm/commands/loader.py +301 -0
  398. attune_llm/commands/models.py +231 -0
  399. attune_llm/commands/parser.py +371 -0
  400. attune_llm/commands/registry.py +429 -0
  401. attune_llm/config/__init__.py +29 -0
  402. attune_llm/config/unified.py +291 -0
  403. attune_llm/context/__init__.py +22 -0
  404. attune_llm/context/compaction.py +455 -0
  405. attune_llm/context/manager.py +434 -0
  406. attune_llm/contextual_patterns.py +361 -0
  407. attune_llm/core.py +907 -0
  408. attune_llm/git_pattern_extractor.py +435 -0
  409. attune_llm/hooks/__init__.py +24 -0
  410. attune_llm/hooks/config.py +306 -0
  411. attune_llm/hooks/executor.py +289 -0
  412. attune_llm/hooks/registry.py +302 -0
  413. attune_llm/hooks/scripts/__init__.py +39 -0
  414. attune_llm/hooks/scripts/evaluate_session.py +201 -0
  415. attune_llm/hooks/scripts/first_time_init.py +285 -0
  416. attune_llm/hooks/scripts/pre_compact.py +207 -0
  417. attune_llm/hooks/scripts/session_end.py +183 -0
  418. attune_llm/hooks/scripts/session_start.py +163 -0
  419. attune_llm/hooks/scripts/suggest_compact.py +225 -0
  420. attune_llm/learning/__init__.py +30 -0
  421. attune_llm/learning/evaluator.py +438 -0
  422. attune_llm/learning/extractor.py +514 -0
  423. attune_llm/learning/storage.py +560 -0
  424. attune_llm/levels.py +227 -0
  425. attune_llm/pattern_confidence.py +414 -0
  426. attune_llm/pattern_resolver.py +272 -0
  427. attune_llm/pattern_summary.py +350 -0
  428. attune_llm/providers.py +967 -0
  429. attune_llm/routing/__init__.py +32 -0
  430. attune_llm/routing/model_router.py +362 -0
  431. attune_llm/security/IMPLEMENTATION_SUMMARY.md +413 -0
  432. attune_llm/security/PHASE2_COMPLETE.md +384 -0
  433. attune_llm/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +271 -0
  434. attune_llm/security/QUICK_REFERENCE.md +316 -0
  435. attune_llm/security/README.md +262 -0
  436. attune_llm/security/__init__.py +62 -0
  437. attune_llm/security/audit_logger.py +929 -0
  438. attune_llm/security/audit_logger_example.py +152 -0
  439. attune_llm/security/pii_scrubber.py +640 -0
  440. attune_llm/security/secrets_detector.py +678 -0
  441. attune_llm/security/secrets_detector_example.py +304 -0
  442. attune_llm/security/secure_memdocs.py +1192 -0
  443. attune_llm/security/secure_memdocs_example.py +278 -0
  444. attune_llm/session_status.py +745 -0
  445. attune_llm/state.py +246 -0
  446. attune_llm/utils/__init__.py +5 -0
  447. attune_llm/utils/tokens.py +349 -0
  448. attune_software/SOFTWARE_PLUGIN_README.md +57 -0
  449. attune_software/__init__.py +13 -0
  450. attune_software/cli/__init__.py +120 -0
  451. attune_software/cli/inspect.py +362 -0
  452. attune_software/cli.py +574 -0
  453. attune_software/plugin.py +188 -0
  454. workflow_scaffolding/__init__.py +11 -0
  455. workflow_scaffolding/__main__.py +12 -0
  456. workflow_scaffolding/cli.py +206 -0
  457. workflow_scaffolding/generator.py +265 -0
@@ -0,0 +1,967 @@
1
+ """LLM Provider Adapters
2
+
3
+ Unified interface for different LLM providers (OpenAI, Anthropic, local models).
4
+
5
+ Copyright 2025 Smart AI Memory, LLC
6
+ Licensed under Fair Source 0.9
7
+ """
8
+
9
+ import asyncio
10
+ import logging
11
+ from abc import ABC, abstractmethod
12
+ from dataclasses import dataclass
13
+ from datetime import datetime
14
+ from typing import Any
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass
20
+ class LLMResponse:
21
+ """Standardized response from any LLM provider"""
22
+
23
+ content: str
24
+ model: str
25
+ tokens_used: int
26
+ finish_reason: str
27
+ metadata: dict[str, Any]
28
+
29
+
30
+ class BaseLLMProvider(ABC):
31
+ """Base class for all LLM providers.
32
+
33
+ Provides unified interface regardless of backend.
34
+ """
35
+
36
+ def __init__(self, api_key: str | None = None, **kwargs):
37
+ self.api_key = api_key
38
+ self.config = kwargs
39
+
40
+ @abstractmethod
41
+ async def generate(
42
+ self,
43
+ messages: list[dict[str, str]],
44
+ system_prompt: str | None = None,
45
+ temperature: float = 0.7,
46
+ max_tokens: int = 1024,
47
+ **kwargs,
48
+ ) -> LLMResponse:
49
+ """Generate response from LLM.
50
+
51
+ Args:
52
+ messages: List of {"role": "user/assistant", "content": "..."}
53
+ system_prompt: Optional system prompt
54
+ temperature: Sampling temperature
55
+ max_tokens: Maximum tokens in response
56
+ **kwargs: Provider-specific options
57
+
58
+ Returns:
59
+ LLMResponse with standardized format
60
+
61
+ """
62
+
63
+ @abstractmethod
64
+ def get_model_info(self) -> dict[str, Any]:
65
+ """Get information about the model being used"""
66
+
67
+ def estimate_tokens(self, text: str) -> int:
68
+ """Estimate token count for text.
69
+
70
+ Rough approximation: ~4 chars per token
71
+ """
72
+ return len(text) // 4
73
+
74
+
75
+ class AnthropicProvider(BaseLLMProvider):
76
+ """Anthropic (Claude) provider with enhanced features.
77
+
78
+ Supports Claude 3 family models with advanced capabilities:
79
+ - Extended context windows (200K tokens)
80
+ - Prompt caching for faster repeated queries
81
+ - Thinking mode for complex reasoning
82
+ - Batch processing for cost optimization
83
+ """
84
+
85
+ def __init__(
86
+ self,
87
+ api_key: str | None = None,
88
+ model: str = "claude-sonnet-4-5-20250929",
89
+ use_prompt_caching: bool = True, # CHANGED: Default to True for 20-30% cost savings
90
+ use_thinking: bool = False,
91
+ use_batch: bool = False,
92
+ **kwargs,
93
+ ):
94
+ super().__init__(api_key, **kwargs)
95
+ self.model = model
96
+ self.use_prompt_caching = use_prompt_caching
97
+ self.use_thinking = use_thinking
98
+ self.use_batch = use_batch
99
+
100
+ # Validate API key is provided
101
+ if not api_key or not api_key.strip():
102
+ raise ValueError(
103
+ "API key is required for Anthropic provider. "
104
+ "Provide via api_key parameter or ANTHROPIC_API_KEY environment variable",
105
+ )
106
+
107
+ # Lazy import to avoid requiring anthropic if not used
108
+ # v4.6.3: Use AsyncAnthropic for true async I/O (prevents event loop blocking)
109
+ try:
110
+ import anthropic
111
+
112
+ self.client = anthropic.AsyncAnthropic(api_key=api_key)
113
+ except ImportError as e:
114
+ raise ImportError(
115
+ "anthropic package required. Install with: pip install anthropic",
116
+ ) from e
117
+
118
+ # Initialize batch provider if needed
119
+ if use_batch:
120
+ self.batch_provider = AnthropicBatchProvider(api_key=api_key)
121
+ else:
122
+ self.batch_provider = None
123
+
124
+ async def generate(
125
+ self,
126
+ messages: list[dict[str, str]],
127
+ system_prompt: str | None = None,
128
+ temperature: float = 0.7,
129
+ max_tokens: int = 1024,
130
+ **kwargs,
131
+ ) -> LLMResponse:
132
+ """Generate response using Anthropic API with enhanced features.
133
+
134
+ Claude-specific enhancements:
135
+ - Prompt caching for repeated system prompts (90% cost reduction)
136
+ - Extended context (200K tokens) for large codebase analysis
137
+ - Thinking mode for complex reasoning tasks
138
+
139
+ Prompt caching is enabled by default (use_prompt_caching=True).
140
+ This marks system prompts with cache_control for Anthropic's cache.
141
+ Break-even: ~3 requests with same context, 5-minute TTL.
142
+ """
143
+ # Build kwargs for Anthropic
144
+ api_kwargs = {
145
+ "model": self.model,
146
+ "max_tokens": max_tokens,
147
+ "temperature": temperature,
148
+ "messages": messages,
149
+ }
150
+
151
+ # Enable prompt caching for system prompts (Claude-specific)
152
+ if system_prompt and self.use_prompt_caching:
153
+ api_kwargs["system"] = [
154
+ {
155
+ "type": "text",
156
+ "text": system_prompt,
157
+ "cache_control": {"type": "ephemeral"}, # Cache for 5 minutes
158
+ },
159
+ ]
160
+ elif system_prompt:
161
+ api_kwargs["system"] = system_prompt
162
+
163
+ # Enable extended thinking for complex tasks (Claude-specific)
164
+ if self.use_thinking:
165
+ api_kwargs["thinking"] = {
166
+ "type": "enabled",
167
+ "budget_tokens": 2000, # Allow 2K tokens for reasoning
168
+ }
169
+
170
+ # Add any additional kwargs
171
+ api_kwargs.update(kwargs)
172
+
173
+ # Call Anthropic API (async with AsyncAnthropic)
174
+ response = await self.client.messages.create(**api_kwargs) # type: ignore[call-overload]
175
+
176
+ # Extract thinking content if present
177
+ thinking_content = None
178
+ response_content = ""
179
+
180
+ for block in response.content:
181
+ if hasattr(block, "type"):
182
+ if block.type == "thinking":
183
+ thinking_content = block.thinking
184
+ elif block.type == "text":
185
+ response_content = block.text
186
+ else:
187
+ response_content = block.text
188
+
189
+ # Convert to standardized format
190
+ metadata = {
191
+ "input_tokens": response.usage.input_tokens,
192
+ "output_tokens": response.usage.output_tokens,
193
+ "provider": "anthropic",
194
+ "model_family": "claude-3",
195
+ }
196
+
197
+ # Add cache performance metrics if available
198
+ if hasattr(response.usage, "cache_creation_input_tokens"):
199
+ cache_creation = getattr(response.usage, "cache_creation_input_tokens", 0)
200
+ cache_read = getattr(response.usage, "cache_read_input_tokens", 0)
201
+
202
+ # Ensure values are numeric (handle mock objects in tests)
203
+ if isinstance(cache_creation, int) and isinstance(cache_read, int):
204
+ metadata["cache_creation_tokens"] = cache_creation
205
+ metadata["cache_read_tokens"] = cache_read
206
+
207
+ # Log cache performance for monitoring with detailed cost savings
208
+ # Cache reads cost 90% less than regular input tokens
209
+ # Cache writes cost 25% more than regular input tokens
210
+ if cache_read > 0:
211
+ # Sonnet 4.5 input: $3/M tokens, cache read: $0.30/M tokens (90% discount)
212
+ savings_per_token = 0.003 / 1000 * 0.9 # 90% of regular cost
213
+ total_savings = cache_read * savings_per_token
214
+ logger.info(
215
+ f"Cache HIT: {cache_read:,} tokens read from cache "
216
+ f"(saved ${total_savings:.4f} vs full price)"
217
+ )
218
+ if cache_creation > 0:
219
+ # Cache write cost: $3.75/M tokens (25% markup)
220
+ write_cost = cache_creation * 0.00375 / 1000
221
+ logger.debug(
222
+ f"Cache WRITE: {cache_creation:,} tokens written to cache "
223
+ f"(cost ${write_cost:.4f})"
224
+ )
225
+
226
+ # Add thinking content if present
227
+ if thinking_content:
228
+ metadata["thinking"] = thinking_content
229
+
230
+ return LLMResponse(
231
+ content=response_content,
232
+ model=response.model,
233
+ tokens_used=response.usage.input_tokens + response.usage.output_tokens,
234
+ finish_reason=response.stop_reason,
235
+ metadata=metadata,
236
+ )
237
+
238
+ async def analyze_large_codebase(
239
+ self,
240
+ codebase_files: list[dict[str, str]],
241
+ analysis_prompt: str,
242
+ **kwargs,
243
+ ) -> LLMResponse:
244
+ """Analyze large codebases using Claude's 200K context window.
245
+
246
+ Claude-specific feature: Can process entire repositories in one call.
247
+
248
+ Args:
249
+ codebase_files: List of {"path": "...", "content": "..."} dicts
250
+ analysis_prompt: What to analyze for
251
+ **kwargs: Additional generation parameters
252
+
253
+ Returns:
254
+ LLMResponse with analysis results
255
+
256
+ """
257
+ # Build context from all files
258
+ file_context = "\n\n".join(
259
+ [f"# File: {file['path']}\n{file['content']}" for file in codebase_files],
260
+ )
261
+
262
+ # Create system prompt with caching for file context
263
+ system_parts = [
264
+ {
265
+ "type": "text",
266
+ "text": "You are a code analysis expert using the Empathy Framework.",
267
+ },
268
+ {
269
+ "type": "text",
270
+ "text": f"Codebase files:\n\n{file_context}",
271
+ "cache_control": {"type": "ephemeral"}, # Cache the codebase
272
+ },
273
+ ]
274
+
275
+ messages = [{"role": "user", "content": analysis_prompt}]
276
+
277
+ # Use extended max_tokens for comprehensive analysis
278
+ return await self.generate(
279
+ messages=messages,
280
+ system_prompt=None, # We'll pass it directly in api_kwargs
281
+ max_tokens=kwargs.pop("max_tokens", 4096),
282
+ **{**kwargs, "system": system_parts},
283
+ )
284
+
285
+ def get_model_info(self) -> dict[str, Any]:
286
+ """Get Claude model information with extended context capabilities"""
287
+ model_info = {
288
+ "claude-3-opus-20240229": {
289
+ "max_tokens": 200000,
290
+ "cost_per_1m_input": 15.00,
291
+ "cost_per_1m_output": 75.00,
292
+ "supports_prompt_caching": True,
293
+ "supports_thinking": True,
294
+ "ideal_for": "Complex reasoning, large codebases",
295
+ },
296
+ "claude-3-5-sonnet-20241022": {
297
+ "max_tokens": 200000,
298
+ "cost_per_1m_input": 3.00,
299
+ "cost_per_1m_output": 15.00,
300
+ "supports_prompt_caching": True,
301
+ "supports_thinking": True,
302
+ "ideal_for": "General development, balanced cost/performance",
303
+ },
304
+ "claude-3-haiku-20240307": {
305
+ "max_tokens": 200000,
306
+ "cost_per_1m_input": 0.25,
307
+ "cost_per_1m_output": 1.25,
308
+ "supports_prompt_caching": True,
309
+ "supports_thinking": False,
310
+ "ideal_for": "Fast responses, simple tasks",
311
+ },
312
+ }
313
+
314
+ return model_info.get(
315
+ self.model,
316
+ {
317
+ "max_tokens": 200000,
318
+ "cost_per_1m_input": 3.00,
319
+ "cost_per_1m_output": 15.00,
320
+ "supports_prompt_caching": True,
321
+ "supports_thinking": True,
322
+ },
323
+ )
324
+
325
+ def estimate_tokens(self, text: str) -> int:
326
+ """Estimate token count using accurate token counter (overrides base class).
327
+
328
+ Uses tiktoken for fast local estimation (~98% accurate).
329
+ Falls back to heuristic if tiktoken unavailable.
330
+
331
+ Args:
332
+ text: Text to count tokens for
333
+
334
+ Returns:
335
+ Estimated token count
336
+ """
337
+ try:
338
+ from .utils.tokens import count_tokens
339
+
340
+ return count_tokens(text, model=self.model, use_api=False)
341
+ except ImportError:
342
+ # Fallback to base class heuristic if utils not available
343
+ return super().estimate_tokens(text)
344
+
345
+ def calculate_actual_cost(
346
+ self,
347
+ input_tokens: int,
348
+ output_tokens: int,
349
+ cache_creation_tokens: int = 0,
350
+ cache_read_tokens: int = 0,
351
+ ) -> dict[str, Any]:
352
+ """Calculate actual cost based on precise token counts.
353
+
354
+ Includes Anthropic prompt caching cost adjustments:
355
+ - Cache writes: 25% markup over standard input pricing
356
+ - Cache reads: 90% discount from standard input pricing
357
+
358
+ Args:
359
+ input_tokens: Regular input tokens (not cached)
360
+ output_tokens: Output tokens
361
+ cache_creation_tokens: Tokens written to cache
362
+ cache_read_tokens: Tokens read from cache
363
+
364
+ Returns:
365
+ Dictionary with cost breakdown:
366
+ - base_cost: Cost for regular input/output tokens
367
+ - cache_write_cost: Cost for cache creation (if any)
368
+ - cache_read_cost: Cost for cache reads (if any)
369
+ - total_cost: Total cost including all components
370
+ - savings: Amount saved by cache reads vs. full price
371
+
372
+ Example:
373
+ >>> provider = AnthropicProvider(api_key="...")
374
+ >>> cost = provider.calculate_actual_cost(
375
+ ... input_tokens=1000,
376
+ ... output_tokens=500,
377
+ ... cache_read_tokens=10000
378
+ ... )
379
+ >>> cost["total_cost"]
380
+ 0.0105 # Significantly less than without cache
381
+ """
382
+ # Get pricing for this model
383
+ model_info = self.get_model_info()
384
+ input_price_per_million = model_info["cost_per_1m_input"]
385
+ output_price_per_million = model_info["cost_per_1m_output"]
386
+
387
+ # Base cost (non-cached tokens)
388
+ base_cost = (input_tokens / 1_000_000) * input_price_per_million
389
+ base_cost += (output_tokens / 1_000_000) * output_price_per_million
390
+
391
+ # Cache write cost (25% markup)
392
+ cache_write_price = input_price_per_million * 1.25
393
+ cache_write_cost = (cache_creation_tokens / 1_000_000) * cache_write_price
394
+
395
+ # Cache read cost (90% discount = 10% of input price)
396
+ cache_read_price = input_price_per_million * 0.1
397
+ cache_read_cost = (cache_read_tokens / 1_000_000) * cache_read_price
398
+
399
+ # Calculate savings from cache reads
400
+ full_price_for_cached = (cache_read_tokens / 1_000_000) * input_price_per_million
401
+ savings = full_price_for_cached - cache_read_cost
402
+
403
+ return {
404
+ "base_cost": round(base_cost, 6),
405
+ "cache_write_cost": round(cache_write_cost, 6),
406
+ "cache_read_cost": round(cache_read_cost, 6),
407
+ "total_cost": round(base_cost + cache_write_cost + cache_read_cost, 6),
408
+ "savings": round(savings, 6),
409
+ "currency": "USD",
410
+ }
411
+
412
+
413
+ class AnthropicBatchProvider:
414
+ """Provider for Anthropic Batch API (50% cost reduction).
415
+
416
+ The Batch API processes requests asynchronously within 24 hours
417
+ at 50% of the standard API cost. Ideal for non-urgent, bulk tasks.
418
+
419
+ Example:
420
+ >>> provider = AnthropicBatchProvider(api_key="sk-ant-...")
421
+ >>> requests = [
422
+ ... {
423
+ ... "custom_id": "task_1",
424
+ ... "model": "claude-sonnet-4-5",
425
+ ... "messages": [{"role": "user", "content": "Analyze X"}],
426
+ ... "max_tokens": 1024
427
+ ... }
428
+ ... ]
429
+ >>> batch_id = provider.create_batch(requests)
430
+ >>> # Wait for processing (up to 24 hours)
431
+ >>> results = await provider.wait_for_batch(batch_id)
432
+ """
433
+
434
+ def __init__(self, api_key: str | None = None):
435
+ """Initialize batch provider.
436
+
437
+ Args:
438
+ api_key: Anthropic API key (defaults to ANTHROPIC_API_KEY env var)
439
+ """
440
+ if not api_key or not api_key.strip():
441
+ raise ValueError(
442
+ "API key is required for Anthropic Batch API. "
443
+ "Provide via api_key parameter or ANTHROPIC_API_KEY environment variable"
444
+ )
445
+
446
+ try:
447
+ import anthropic
448
+
449
+ self.client = anthropic.Anthropic(api_key=api_key)
450
+ self._batch_jobs: dict[str, Any] = {}
451
+ except ImportError as e:
452
+ raise ImportError(
453
+ "anthropic package required for Batch API. Install with: pip install anthropic"
454
+ ) from e
455
+
456
+ def create_batch(self, requests: list[dict[str, Any]], job_id: str | None = None) -> str:
457
+ """Create a batch job.
458
+
459
+ Args:
460
+ requests: List of request dicts with 'custom_id' and 'params' containing message creation parameters.
461
+ Format: [{"custom_id": "id1", "params": {"model": "...", "messages": [...], "max_tokens": 1024}}]
462
+ job_id: Optional job identifier for tracking (unused, for API compatibility)
463
+
464
+ Returns:
465
+ Batch job ID for polling status
466
+
467
+ Raises:
468
+ ValueError: If requests is empty or invalid
469
+ RuntimeError: If API call fails
470
+
471
+ Example:
472
+ >>> requests = [
473
+ ... {
474
+ ... "custom_id": "task_1",
475
+ ... "params": {
476
+ ... "model": "claude-sonnet-4-5-20250929",
477
+ ... "messages": [{"role": "user", "content": "Test"}],
478
+ ... "max_tokens": 1024
479
+ ... }
480
+ ... }
481
+ ... ]
482
+ >>> batch_id = provider.create_batch(requests)
483
+ >>> print(f"Batch created: {batch_id}")
484
+ Batch created: msgbatch_abc123
485
+ """
486
+ if not requests:
487
+ raise ValueError("requests cannot be empty")
488
+
489
+ # Validate and convert old format to new format if needed
490
+ formatted_requests = []
491
+ for req in requests:
492
+ if "params" not in req:
493
+ # Old format: convert to new format with params wrapper
494
+ formatted_req = {
495
+ "custom_id": req.get("custom_id", f"req_{id(req)}"),
496
+ "params": {
497
+ "model": req.get("model", "claude-sonnet-4-5-20250929"),
498
+ "messages": req.get("messages", []),
499
+ "max_tokens": req.get("max_tokens", 4096),
500
+ },
501
+ }
502
+ # Copy other optional params
503
+ for key in ["temperature", "system", "stop_sequences"]:
504
+ if key in req:
505
+ formatted_req["params"][key] = req[key]
506
+ formatted_requests.append(formatted_req)
507
+ else:
508
+ formatted_requests.append(req)
509
+
510
+ try:
511
+ # Use correct Message Batches API endpoint
512
+ batch = self.client.messages.batches.create(requests=formatted_requests)
513
+ self._batch_jobs[batch.id] = batch
514
+ logger.info(f"Created batch {batch.id} with {len(formatted_requests)} requests")
515
+ return batch.id
516
+ except Exception as e:
517
+ logger.error(f"Failed to create batch: {e}")
518
+ raise RuntimeError(f"Batch creation failed: {e}") from e
519
+
520
+ def get_batch_status(self, batch_id: str) -> Any:
521
+ """Get status of batch job.
522
+
523
+ Args:
524
+ batch_id: Batch job ID
525
+
526
+ Returns:
527
+ MessageBatch object with processing_status field:
528
+ - "in_progress": Batch is being processed
529
+ - "canceling": Cancellation initiated
530
+ - "ended": Batch processing ended (check request_counts for success/errors)
531
+
532
+ Example:
533
+ >>> status = provider.get_batch_status("msgbatch_abc123")
534
+ >>> print(status.processing_status)
535
+ in_progress
536
+ >>> print(f"Succeeded: {status.request_counts.succeeded}")
537
+ """
538
+ try:
539
+ # Use correct Message Batches API endpoint
540
+ batch = self.client.messages.batches.retrieve(batch_id)
541
+ self._batch_jobs[batch_id] = batch
542
+ return batch
543
+ except Exception as e:
544
+ logger.error(f"Failed to get batch status for {batch_id}: {e}")
545
+ raise RuntimeError(f"Failed to get batch status: {e}") from e
546
+
547
+ def get_batch_results(self, batch_id: str) -> list[dict[str, Any]]:
548
+ """Get results from completed batch.
549
+
550
+ Args:
551
+ batch_id: Batch job ID
552
+
553
+ Returns:
554
+ List of result dicts. Each dict contains:
555
+ - custom_id: Request identifier
556
+ - result: Either {"type": "succeeded", "message": {...}} or {"type": "errored", "error": {...}}
557
+
558
+ Raises:
559
+ ValueError: If batch has not ended processing
560
+ RuntimeError: If API call fails
561
+
562
+ Example:
563
+ >>> results = provider.get_batch_results("msgbatch_abc123")
564
+ >>> for result in results:
565
+ ... if result['result']['type'] == 'succeeded':
566
+ ... message = result['result']['message']
567
+ ... print(f"{result['custom_id']}: {message.content[0].text}")
568
+ ... else:
569
+ ... error = result['result']['error']
570
+ ... print(f"{result['custom_id']}: Error {error['type']}")
571
+ """
572
+ status = self.get_batch_status(batch_id)
573
+
574
+ # Check processing_status instead of status
575
+ if status.processing_status != "ended":
576
+ raise ValueError(
577
+ f"Batch {batch_id} has not ended processing (status: {status.processing_status})"
578
+ )
579
+
580
+ try:
581
+ # Use correct Message Batches API endpoint
582
+ # results() returns an iterator, convert to list
583
+ results_iterator = self.client.messages.batches.results(batch_id)
584
+ return list(results_iterator)
585
+ except Exception as e:
586
+ logger.error(f"Failed to get batch results for {batch_id}: {e}")
587
+ raise RuntimeError(f"Failed to get batch results: {e}") from e
588
+
589
+ async def wait_for_batch(
590
+ self,
591
+ batch_id: str,
592
+ poll_interval: int = 60,
593
+ timeout: int = 86400, # 24 hours
594
+ ) -> list[dict[str, Any]]:
595
+ """Wait for batch to complete with polling.
596
+
597
+ Args:
598
+ batch_id: Batch job ID
599
+ poll_interval: Seconds between status checks (default: 60)
600
+ timeout: Maximum wait time in seconds (default: 86400 = 24 hours)
601
+
602
+ Returns:
603
+ Batch results when processing ends
604
+
605
+ Raises:
606
+ TimeoutError: If batch doesn't complete within timeout
607
+ RuntimeError: If batch had errors during processing
608
+
609
+ Example:
610
+ >>> results = await provider.wait_for_batch(
611
+ ... "msgbatch_abc123",
612
+ ... poll_interval=300, # Check every 5 minutes
613
+ ... )
614
+ >>> print(f"Batch completed: {len(results)} results")
615
+ """
616
+
617
+ start_time = datetime.now()
618
+
619
+ while True:
620
+ status = self.get_batch_status(batch_id)
621
+
622
+ # Check if batch processing has ended
623
+ if status.processing_status == "ended":
624
+ # Check request counts to see if there were errors
625
+ counts = status.request_counts
626
+ logger.info(
627
+ f"Batch {batch_id} ended: "
628
+ f"{counts.succeeded} succeeded, {counts.errored} errored, "
629
+ f"{counts.canceled} canceled, {counts.expired} expired"
630
+ )
631
+
632
+ # Return results even if some requests failed
633
+ # The caller can inspect individual results for errors
634
+ return self.get_batch_results(batch_id)
635
+
636
+ # Check timeout
637
+ elapsed = (datetime.now() - start_time).total_seconds()
638
+ if elapsed > timeout:
639
+ raise TimeoutError(f"Batch {batch_id} did not complete within {timeout}s")
640
+
641
+ # Log progress with request counts
642
+ try:
643
+ counts = status.request_counts
644
+ logger.debug(
645
+ f"Batch {batch_id} status: {status.processing_status} "
646
+ f"(processing: {counts.processing}, elapsed: {elapsed:.0f}s)"
647
+ )
648
+ except AttributeError:
649
+ logger.debug(
650
+ f"Batch {batch_id} status: {status.processing_status} (elapsed: {elapsed:.0f}s)"
651
+ )
652
+
653
+ # Wait before next poll
654
+ await asyncio.sleep(poll_interval)
655
+
656
+
657
+ class OpenAIProvider(BaseLLMProvider):
658
+ """OpenAI provider.
659
+
660
+ Supports GPT-4, GPT-3.5, and other OpenAI models.
661
+ """
662
+
663
+ def __init__(self, api_key: str | None = None, model: str = "gpt-4-turbo-preview", **kwargs):
664
+ super().__init__(api_key, **kwargs)
665
+ self.model = model
666
+
667
+ # Validate API key is provided
668
+ if not api_key or not api_key.strip():
669
+ raise ValueError(
670
+ "API key is required for OpenAI provider. "
671
+ "Provide via api_key parameter or OPENAI_API_KEY environment variable",
672
+ )
673
+
674
+ # Lazy import
675
+ try:
676
+ import openai
677
+
678
+ self.client = openai.AsyncOpenAI(api_key=api_key)
679
+ except ImportError as e:
680
+ raise ImportError("openai package required. Install with: pip install openai") from e
681
+
682
+ async def generate(
683
+ self,
684
+ messages: list[dict[str, str]],
685
+ system_prompt: str | None = None,
686
+ temperature: float = 0.7,
687
+ max_tokens: int = 1024,
688
+ **kwargs,
689
+ ) -> LLMResponse:
690
+ """Generate response using OpenAI API"""
691
+ # Add system prompt if provided
692
+ if system_prompt:
693
+ messages = [{"role": "system", "content": system_prompt}] + messages
694
+
695
+ # Call OpenAI API
696
+ response = await self.client.chat.completions.create(
697
+ model=self.model,
698
+ messages=messages, # type: ignore[arg-type]
699
+ temperature=temperature,
700
+ max_tokens=max_tokens,
701
+ **kwargs,
702
+ )
703
+
704
+ # Convert to standardized format
705
+ content = response.choices[0].message.content or ""
706
+ usage = response.usage
707
+ return LLMResponse(
708
+ content=content,
709
+ model=response.model,
710
+ tokens_used=usage.total_tokens if usage else 0,
711
+ finish_reason=response.choices[0].finish_reason,
712
+ metadata={
713
+ "input_tokens": usage.prompt_tokens if usage else 0,
714
+ "output_tokens": usage.completion_tokens if usage else 0,
715
+ "provider": "openai",
716
+ },
717
+ )
718
+
719
+ def get_model_info(self) -> dict[str, Any]:
720
+ """Get OpenAI model information"""
721
+ model_info = {
722
+ "gpt-4-turbo-preview": {
723
+ "max_tokens": 128000,
724
+ "cost_per_1m_input": 10.00,
725
+ "cost_per_1m_output": 30.00,
726
+ },
727
+ "gpt-4": {"max_tokens": 8192, "cost_per_1m_input": 30.00, "cost_per_1m_output": 60.00},
728
+ "gpt-3.5-turbo": {
729
+ "max_tokens": 16385,
730
+ "cost_per_1m_input": 0.50,
731
+ "cost_per_1m_output": 1.50,
732
+ },
733
+ }
734
+
735
+ return model_info.get(
736
+ self.model,
737
+ {"max_tokens": 128000, "cost_per_1m_input": 10.00, "cost_per_1m_output": 30.00},
738
+ )
739
+
740
+
741
+ class GeminiProvider(BaseLLMProvider):
742
+ """Google Gemini provider with cost tracking integration.
743
+
744
+ Supports Gemini models:
745
+ - gemini-2.0-flash-exp: Fast, cheap tier (1M context)
746
+ - gemini-1.5-pro: Balanced, capable tier (2M context)
747
+ - gemini-2.5-pro: Premium reasoning tier
748
+ """
749
+
750
+ def __init__(
751
+ self,
752
+ api_key: str | None = None,
753
+ model: str = "gemini-1.5-pro",
754
+ **kwargs,
755
+ ):
756
+ super().__init__(api_key, **kwargs)
757
+ self.model = model
758
+
759
+ # Validate API key is provided
760
+ if not api_key or not api_key.strip():
761
+ raise ValueError(
762
+ "API key is required for Gemini provider. "
763
+ "Provide via api_key parameter or GOOGLE_API_KEY environment variable",
764
+ )
765
+
766
+ # Lazy import to avoid requiring google-generativeai if not used
767
+ try:
768
+ import google.generativeai as genai
769
+
770
+ genai.configure(api_key=api_key)
771
+ self.genai = genai
772
+ self.client = genai.GenerativeModel(model)
773
+ except ImportError as e:
774
+ raise ImportError(
775
+ "google-generativeai package required. Install with: pip install google-generativeai",
776
+ ) from e
777
+
778
+ async def generate(
779
+ self,
780
+ messages: list[dict[str, str]],
781
+ system_prompt: str | None = None,
782
+ temperature: float = 0.7,
783
+ max_tokens: int = 1024,
784
+ **kwargs,
785
+ ) -> LLMResponse:
786
+ """Generate response using Google Gemini API.
787
+
788
+ Gemini-specific features:
789
+ - Large context windows (1M-2M tokens)
790
+ - Multimodal support
791
+ - Grounding with Google Search
792
+ """
793
+ import asyncio
794
+
795
+ # Convert messages to Gemini format
796
+ gemini_messages = []
797
+ for msg in messages:
798
+ role = "user" if msg["role"] == "user" else "model"
799
+ gemini_messages.append({"role": role, "parts": [msg["content"]]})
800
+
801
+ # Build generation config
802
+ generation_config = self.genai.GenerationConfig(
803
+ temperature=temperature,
804
+ max_output_tokens=max_tokens,
805
+ )
806
+
807
+ # Create model with system instruction if provided
808
+ if system_prompt:
809
+ model = self.genai.GenerativeModel(
810
+ self.model,
811
+ system_instruction=system_prompt,
812
+ )
813
+ else:
814
+ model = self.client
815
+
816
+ # Call Gemini API (run sync in thread pool for async compatibility)
817
+ loop = asyncio.get_event_loop()
818
+ response = await loop.run_in_executor(
819
+ None,
820
+ lambda: model.generate_content(
821
+ gemini_messages, # type: ignore[arg-type]
822
+ generation_config=generation_config,
823
+ ),
824
+ )
825
+
826
+ # Extract token counts from usage metadata
827
+ input_tokens = 0
828
+ output_tokens = 0
829
+ if hasattr(response, "usage_metadata"):
830
+ input_tokens = getattr(response.usage_metadata, "prompt_token_count", 0)
831
+ output_tokens = getattr(response.usage_metadata, "candidates_token_count", 0)
832
+
833
+ # Log to cost tracker
834
+ try:
835
+ from attune.cost_tracker import log_request
836
+
837
+ tier = self._get_tier()
838
+ log_request(
839
+ model=self.model,
840
+ input_tokens=input_tokens,
841
+ output_tokens=output_tokens,
842
+ task_type=kwargs.get("task_type", "gemini_generate"),
843
+ tier=tier,
844
+ )
845
+ except ImportError:
846
+ pass # Cost tracking not available
847
+
848
+ # Convert to standardized format
849
+ content = ""
850
+ if response.candidates:
851
+ content = response.candidates[0].content.parts[0].text
852
+
853
+ finish_reason = "stop"
854
+ if response.candidates and hasattr(response.candidates[0], "finish_reason"):
855
+ finish_reason = str(response.candidates[0].finish_reason.name).lower()
856
+
857
+ return LLMResponse(
858
+ content=content,
859
+ model=self.model,
860
+ tokens_used=input_tokens + output_tokens,
861
+ finish_reason=finish_reason,
862
+ metadata={
863
+ "input_tokens": input_tokens,
864
+ "output_tokens": output_tokens,
865
+ "provider": "google",
866
+ "model_family": "gemini",
867
+ },
868
+ )
869
+
870
+ def _get_tier(self) -> str:
871
+ """Determine tier from model name."""
872
+ if "flash" in self.model.lower():
873
+ return "cheap"
874
+ if "2.5" in self.model or "ultra" in self.model.lower():
875
+ return "premium"
876
+ return "capable"
877
+
878
+ def get_model_info(self) -> dict[str, Any]:
879
+ """Get Gemini model information"""
880
+ model_info = {
881
+ "gemini-2.0-flash-exp": {
882
+ "max_tokens": 1000000,
883
+ "cost_per_1m_input": 0.075,
884
+ "cost_per_1m_output": 0.30,
885
+ "supports_vision": True,
886
+ "ideal_for": "Fast responses, simple tasks, large context",
887
+ },
888
+ "gemini-1.5-pro": {
889
+ "max_tokens": 2000000,
890
+ "cost_per_1m_input": 1.25,
891
+ "cost_per_1m_output": 5.00,
892
+ "supports_vision": True,
893
+ "ideal_for": "Complex reasoning, large codebases",
894
+ },
895
+ "gemini-2.5-pro": {
896
+ "max_tokens": 1000000,
897
+ "cost_per_1m_input": 2.50,
898
+ "cost_per_1m_output": 10.00,
899
+ "supports_vision": True,
900
+ "ideal_for": "Advanced reasoning, complex tasks",
901
+ },
902
+ }
903
+
904
+ return model_info.get(
905
+ self.model,
906
+ {
907
+ "max_tokens": 1000000,
908
+ "cost_per_1m_input": 1.25,
909
+ "cost_per_1m_output": 5.00,
910
+ "supports_vision": True,
911
+ },
912
+ )
913
+
914
+
915
+ class LocalProvider(BaseLLMProvider):
916
+ """Local model provider (Ollama, LM Studio, etc.).
917
+
918
+ For running models locally.
919
+ """
920
+
921
+ def __init__(self, endpoint: str = "http://localhost:11434", model: str = "llama2", **kwargs):
922
+ super().__init__(api_key=None, **kwargs)
923
+ self.endpoint = endpoint
924
+ self.model = model
925
+
926
+ async def generate(
927
+ self,
928
+ messages: list[dict[str, str]],
929
+ system_prompt: str | None = None,
930
+ temperature: float = 0.7,
931
+ max_tokens: int = 1024,
932
+ **kwargs,
933
+ ) -> LLMResponse:
934
+ """Generate response using local model"""
935
+ import aiohttp
936
+
937
+ # Format for Ollama-style API
938
+ payload = {
939
+ "model": self.model,
940
+ "messages": messages,
941
+ "stream": False,
942
+ "options": {"temperature": temperature, "num_predict": max_tokens},
943
+ }
944
+
945
+ if system_prompt:
946
+ payload["system"] = system_prompt
947
+
948
+ async with aiohttp.ClientSession() as session:
949
+ async with session.post(f"{self.endpoint}/api/chat", json=payload) as response:
950
+ result = await response.json()
951
+
952
+ return LLMResponse(
953
+ content=result.get("message", {}).get("content", ""),
954
+ model=self.model,
955
+ tokens_used=result.get("eval_count", 0) + result.get("prompt_eval_count", 0),
956
+ finish_reason="stop",
957
+ metadata={"provider": "local", "endpoint": self.endpoint},
958
+ )
959
+
960
+ def get_model_info(self) -> dict[str, Any]:
961
+ """Get local model information"""
962
+ return {
963
+ "max_tokens": 4096, # Depends on model
964
+ "cost_per_1m_input": 0.0, # Free (local)
965
+ "cost_per_1m_output": 0.0,
966
+ "endpoint": self.endpoint,
967
+ }