flowra 0.0.25.dev35__tar.gz → 0.0.26.dev37__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (280) hide show
  1. flowra-0.0.26.dev37/.claude/commands/update-pricing.md +74 -0
  2. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/CHANGELOG.md +17 -0
  3. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/CLAUDE.md +1 -1
  4. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/PKG-INFO +1 -1
  5. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/context7.json +2 -2
  6. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/lib.md +1 -0
  7. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/llm.md +1 -0
  8. flowra-0.0.26.dev37/docs/research/llm_retry_backoff.md +244 -0
  9. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/pricing_complexity.md +5 -5
  10. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/todo.md +13 -0
  11. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/ext/mlflow.py +4 -1
  12. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/ext/otel.py +4 -0
  13. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/llm_call/agent.py +1 -0
  14. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/llm_config.py +1 -0
  15. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/agent.py +1 -0
  16. flowra-0.0.26.dev37/flowra/llm/pricing/__init__.py +40 -0
  17. flowra-0.0.26.dev37/flowra/llm/pricing/data/custom.json +52 -0
  18. flowra-0.0.26.dev37/flowra/llm/pricing/data/generated.json +791 -0
  19. flowra-0.0.26.dev37/flowra/llm/pricing/registry.py +186 -0
  20. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/providers/anthropic_vertex.py +6 -4
  21. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/providers/google_vertex.py +10 -3
  22. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/providers/openai.py +10 -3
  23. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/request.py +1 -0
  24. flowra-0.0.26.dev37/flowra/version.py +2 -0
  25. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/ext/test_otel.py +8 -1
  26. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_llm_call_agent.py +2 -1
  27. flowra-0.0.26.dev37/tests/llm/pricing/test_registry.py +382 -0
  28. flowra-0.0.26.dev37/tools/sync_pricing.py +218 -0
  29. flowra-0.0.25.dev35/.claude/commands/update-pricing.md +0 -48
  30. flowra-0.0.25.dev35/flowra/llm/pricing/__init__.py +0 -3
  31. flowra-0.0.25.dev35/flowra/llm/pricing/anthropic.py +0 -68
  32. flowra-0.0.25.dev35/flowra/llm/pricing/google.py +0 -56
  33. flowra-0.0.25.dev35/flowra/llm/pricing/openai.py +0 -70
  34. flowra-0.0.25.dev35/flowra/version.py +0 -2
  35. flowra-0.0.25.dev35/tests/llm/pricing/test_anthropic.py +0 -71
  36. flowra-0.0.25.dev35/tests/llm/pricing/test_google.py +0 -43
  37. flowra-0.0.25.dev35/tests/llm/pricing/test_openai.py +0 -50
  38. flowra-0.0.25.dev35/tools/sync_pricing.py +0 -360
  39. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.env.example +0 -0
  40. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.github/workflows/master.yml +0 -0
  41. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.github/workflows/publish.yml +0 -0
  42. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.github/workflows/pull_request.yml +0 -0
  43. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.github/workflows/pull_request_e2e.yml +0 -0
  44. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.gitignore +0 -0
  45. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.python-version +0 -0
  46. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/LICENSE +0 -0
  47. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/Makefile +0 -0
  48. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/README.md +0 -0
  49. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/agents.md +0 -0
  50. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/getting-started.md +0 -0
  51. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/agent.md +0 -0
  52. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/architecture.md +0 -0
  53. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/ext/mlflow.md +0 -0
  54. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/ext/otel.md +0 -0
  55. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/ext/tracing-guide.md +0 -0
  56. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/ext.md +0 -0
  57. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/lib/anthropic.md +0 -0
  58. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/patterns.md +0 -0
  59. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/tools.md +0 -0
  60. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/llm.md +0 -0
  61. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/observability.md +0 -0
  62. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/patterns.md +0 -0
  63. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/flowing_context.md +0 -0
  64. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/hooks_redesign.md +0 -0
  65. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/mlflow_context_migration.md +0 -0
  66. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/model_fallback.md +0 -0
  67. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/otel_integration.md +0 -0
  68. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/provider_extensions.md +0 -0
  69. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/spawn_strategies.md +0 -0
  70. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/strands_comparison.md +0 -0
  71. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/tool_error_signals.md +0 -0
  72. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/tool_search_tool.md +0 -0
  73. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/voice_stt.md +0 -0
  74. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step1_structure.md +0 -0
  75. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step2_code_style.md +0 -0
  76. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step3_documentation.md +0 -0
  77. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step4_doc_readability.md +0 -0
  78. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step5_doc_audit.md +0 -0
  79. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step6_tests.md +0 -0
  80. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/tools.md +0 -0
  81. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/TRACING_COMBINATIONS.md +0 -0
  82. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/__init__.py +0 -0
  83. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/agent_as_tool.py +0 -0
  84. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/app_agent.py +0 -0
  85. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/console_chat.py +0 -0
  86. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/__init__.py +0 -0
  87. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/agents_custom.py +0 -0
  88. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/agents_parallel.py +0 -0
  89. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/getting_started_chat.py +0 -0
  90. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/getting_started_streaming.py +0 -0
  91. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/getting_started_tools.py +0 -0
  92. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/llm_streaming.py +0 -0
  93. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/llm_structured_output.py +0 -0
  94. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/tools_service_injection.py +0 -0
  95. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/escalation.py +0 -0
  96. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/llm_logging.py +0 -0
  97. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/llm_routing.py +0 -0
  98. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/menu_agent.py +0 -0
  99. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/menu_agent_class.py +0 -0
  100. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_demo.py +0 -0
  101. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_dual_export_demo.py +0 -0
  102. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_nested_demo.py +0 -0
  103. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_otel_both_demo.py +0 -0
  104. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_otel_nested_demo.py +0 -0
  105. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_parallel_demo.py +0 -0
  106. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/model_registry.py +0 -0
  107. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/otel_demo.py +0 -0
  108. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/otel_jaeger_demo.py +0 -0
  109. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/otel_nested_demo.py +0 -0
  110. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/otel_visualize.py +0 -0
  111. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/race.py +0 -0
  112. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/span_crash_demo.py +0 -0
  113. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/span_demo.py +0 -0
  114. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/system_prompt.txt +0 -0
  115. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/tools/__init__.py +0 -0
  116. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/tools/calculator.py +0 -0
  117. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/tools/random_numbers.py +0 -0
  118. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/tools/switch_model.py +0 -0
  119. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/tui_chat.py +0 -0
  120. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/__init__.py +0 -0
  121. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/_sentinel.py +0 -0
  122. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/__init__.py +0 -0
  123. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/__init__.py +0 -0
  124. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/agent.py +0 -0
  125. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/agent_arg.py +0 -0
  126. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/__init__.py +0 -0
  127. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/compiler.py +0 -0
  128. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/contract.py +0 -0
  129. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/init_params.py +0 -0
  130. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/instance.py +0 -0
  131. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/step_params.py +0 -0
  132. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/step_validation.py +0 -0
  133. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/steps.py +0 -0
  134. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/type_helpers.py +0 -0
  135. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/type_registry.py +0 -0
  136. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/model.py +0 -0
  137. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/registry.py +0 -0
  138. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/step.py +0 -0
  139. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/step_arg.py +0 -0
  140. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/step_helpers.py +0 -0
  141. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/__init__.py +0 -0
  142. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/actions.py +0 -0
  143. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/context.py +0 -0
  144. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/flowing_registry.py +0 -0
  145. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/hooks.py +0 -0
  146. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/interrupt.py +0 -0
  147. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/interrupt_helpers.py +0 -0
  148. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/spawn.py +0 -0
  149. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/timeout.py +0 -0
  150. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/__init__.py +0 -0
  151. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/engine.py +0 -0
  152. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/execution.py +0 -0
  153. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/instance_factory.py +0 -0
  154. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/runtime.py +0 -0
  155. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/scope.py +0 -0
  156. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/serialization.py +0 -0
  157. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/spans.py +0 -0
  158. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/spawn_tree.py +0 -0
  159. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/services.py +0 -0
  160. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/state/__init__.py +0 -0
  161. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/state/markers.py +0 -0
  162. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/state/store.py +0 -0
  163. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/state/values.py +0 -0
  164. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/storage/__init__.py +0 -0
  165. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/storage/file.py +0 -0
  166. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/storage/in_memory.py +0 -0
  167. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/storage/session_storage.py +0 -0
  168. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/ext/__init__.py +0 -0
  169. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/__init__.py +0 -0
  170. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/anthropic/__init__.py +0 -0
  171. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/anthropic/cache.py +0 -0
  172. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/anthropic/presets.py +0 -0
  173. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/anthropic/tool_search.py +0 -0
  174. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/chat/__init__.py +0 -0
  175. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/chat/agent.py +0 -0
  176. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/chat/config.py +0 -0
  177. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/chat/hook_events.py +0 -0
  178. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/chat/spec.py +0 -0
  179. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/config_value.py +0 -0
  180. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/llm_call/__init__.py +0 -0
  181. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/llm_call/spec.py +0 -0
  182. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/observability/__init__.py +0 -0
  183. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/observability/llm_hooks.py +0 -0
  184. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/__init__.py +0 -0
  185. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/config.py +0 -0
  186. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/context.py +0 -0
  187. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/hook_events.py +0 -0
  188. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/spec.py +0 -0
  189. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/tool_call/__init__.py +0 -0
  190. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/tool_call/agent.py +0 -0
  191. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/tool_call/agent_tool.py +0 -0
  192. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/tool_call/context.py +0 -0
  193. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/__init__.py +0 -0
  194. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/base.py +0 -0
  195. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/blocks.py +0 -0
  196. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/messages.py +0 -0
  197. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/provider.py +0 -0
  198. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/providers/__init__.py +0 -0
  199. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/response.py +0 -0
  200. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/schema_formatting.py +0 -0
  201. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/schema_validation.py +0 -0
  202. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/stream.py +0 -0
  203. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/tools.py +0 -0
  204. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/py.typed +0 -0
  205. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/__init__.py +0 -0
  206. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/local_tool.py +0 -0
  207. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/mcp_connection.py +0 -0
  208. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/tool_arg.py +0 -0
  209. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/tool_group.py +0 -0
  210. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/tool_registry.py +0 -0
  211. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/types.py +0 -0
  212. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/pyproject.toml +0 -0
  213. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/__init__.py +0 -0
  214. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/__init__.py +0 -0
  215. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/__init__.py +0 -0
  216. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/compile/__init__.py +0 -0
  217. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/compile/test_compile.py +0 -0
  218. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/compile/test_type_helpers.py +0 -0
  219. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/test_agent.py +0 -0
  220. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/test_registry.py +0 -0
  221. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/test_step_helpers.py +0 -0
  222. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/__init__.py +0 -0
  223. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_agent_def.py +0 -0
  224. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_context.py +0 -0
  225. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_flowing_registry.py +0 -0
  226. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_flowing_registry_tasks.py +0 -0
  227. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_flowing_sync.py +0 -0
  228. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_hooks.py +0 -0
  229. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_interrupt.py +0 -0
  230. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_spans.py +0 -0
  231. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_timeout.py +0 -0
  232. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_with_interrupt.py +0 -0
  233. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/__init__.py +0 -0
  234. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_engine.py +0 -0
  235. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_engine_spans.py +0 -0
  236. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_hook_context.py +0 -0
  237. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_persistence.py +0 -0
  238. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_runtime.py +0 -0
  239. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_scope.py +0 -0
  240. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_serialization.py +0 -0
  241. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_spec_in_constructor.py +0 -0
  242. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/state/__init__.py +0 -0
  243. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/state/test_values.py +0 -0
  244. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/storage/__init__.py +0 -0
  245. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/storage/test_file.py +0 -0
  246. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/storage/test_in_memory.py +0 -0
  247. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/test_missing_scenarios.py +0 -0
  248. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/ext/__init__.py +0 -0
  249. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/ext/test_mlflow.py +0 -0
  250. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/__init__.py +0 -0
  251. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/anthropic/__init__.py +0 -0
  252. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/anthropic/test_anthropic.py +0 -0
  253. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_chat_agent.py +0 -0
  254. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_config_value.py +0 -0
  255. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_matches_tool_filter.py +0 -0
  256. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_tool_call_agent.py +0 -0
  257. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_tool_call_agent_call_agent.py +0 -0
  258. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_tool_loop_agent.py +0 -0
  259. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/tool_loop/__init__.py +0 -0
  260. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/__init__.py +0 -0
  261. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/pricing/__init__.py +0 -0
  262. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/__init__.py +0 -0
  263. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_anthropic_e2e.py +0 -0
  264. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_anthropic_vertex.py +0 -0
  265. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_google_vertex.py +0 -0
  266. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_google_vertex_e2e.py +0 -0
  267. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_openai_e2e.py +0 -0
  268. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_openai_provider.py +0 -0
  269. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_cost_breakdown.py +0 -0
  270. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_metadata.py +0 -0
  271. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_response.py +0 -0
  272. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_schema_formatting.py +0 -0
  273. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_schema_validation.py +0 -0
  274. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_stream.py +0 -0
  275. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/tools/__init__.py +0 -0
  276. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/tools/test_local_tool.py +0 -0
  277. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/tools/test_mcp_connection.py +0 -0
  278. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/tools/test_tool_group.py +0 -0
  279. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/tools/test_tool_registry.py +0 -0
  280. {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/uv.lock +0 -0
@@ -0,0 +1,74 @@
1
+ # Update Pricing
2
+
3
+ Update the LLM pricing data in `flowra/llm/pricing/data/` with current pricing from the web.
4
+
5
+ ## Instructions
6
+
7
+ You are updating the pricing data for LLM models used by this project.
8
+
9
+ ### Project structure
10
+
11
+ Pricing is stored in JSON files under `flowra/llm/pricing/data/`:
12
+
13
+ - `generated.json` — auto-generated from litellm via `tools/sync_pricing.py`. **Do not edit manually.**
14
+ - `custom.json` — manual overrides and additions for models/fields missing from litellm. Custom entries are **merged** with generated entries (custom fields override, other fields preserved from generated).
15
+
16
+ The JSON format is `provider → model → pricing_fields`:
17
+
18
+ ```json
19
+ {
20
+ "anthropic": {
21
+ "claude-sonnet-4-6": {
22
+ "input": 3.0,
23
+ "output": 15.0,
24
+ "cache_read": 0.3,
25
+ "cache_creation": 3.75,
26
+ "cache_creation_1h": 6.0,
27
+ "input_above_200k": 6.0,
28
+ "output_above_200k": 22.5
29
+ }
30
+ }
31
+ }
32
+ ```
33
+
34
+ Provider keys: `anthropic`, `openai`, `google` (future: `bedrock/us`, `azure/eu`, etc.)
35
+
36
+ All prices in **$/1M tokens**. Omit fields that are zero.
37
+
38
+ Available pricing fields:
39
+ - `input`, `output` — base rates
40
+ - `cache_read` — cache read cost
41
+ - `cache_creation` — cache creation cost (5-minute ephemeral for Anthropic)
42
+ - `cache_creation_1h` — Anthropic 1-hour cache creation cost
43
+ - `reasoning_output` — separate reasoning token rate (if different from output)
44
+ - `input_above_200k`, `output_above_200k`, `cache_read_above_200k`, `cache_creation_above_200k`, `cache_creation_1h_above_200k` — context tier rates for >200k tokens
45
+
46
+ ### Steps to follow
47
+
48
+ 1. **Read current pricing data**: Read `custom.json` and `generated.json` to see existing models and prices.
49
+
50
+ 2. **Determine which models to update**:
51
+ - If the user provided arguments (e.g., `/update-pricing all Gemini models`), search for pricing for those specific models.
52
+ - If no arguments were provided, refresh pricing for models in `custom.json`.
53
+
54
+ 3. **Web search for current pricing**: Use WebSearch to find the most up-to-date pricing:
55
+ - Search official pricing pages: Anthropic (anthropic.com/pricing), OpenAI (openai.com/api/pricing), Google Cloud (cloud.google.com/vertex-ai/generative-ai/pricing)
56
+ - Focus on models/fields that `generated.json` is missing (check litellm gaps)
57
+
58
+ 4. **Update `custom.json`**:
59
+ - Add/update entries for models or fields not covered by `generated.json`
60
+ - For partially missing fields (e.g., litellm has base rates but not `cache_creation_1h`), only include the missing fields — they will be merged with generated data
61
+ - For completely missing models, include all known pricing fields
62
+ - Remove entries from `custom.json` that are now fully covered by `generated.json`
63
+
64
+ 5. **Verify**: Run `make test name=pricing` to ensure nothing is broken.
65
+
66
+ ## Important notes
67
+
68
+ - All prices are in dollars per 1 million tokens
69
+ - `custom.json` entries **merge** with `generated.json` — you only need to specify fields that differ or are missing
70
+ - Anthropic has separate 5-minute and 1-hour cache creation prices. 5m = 1.25x base input, 1h = 2x base input
71
+ - OpenAI and Google cache creation is free — no `cache_creation` field needed
72
+ - Model matching uses substring matching, so keys should be specific enough to avoid false matches
73
+ - Always prioritize official pricing pages from model providers
74
+ - To regenerate `generated.json` from litellm, run: `python tools/sync_pricing.py --apply`
@@ -7,6 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org).
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ### Added
11
+ - **`top_p`** parameter in `LLMRequest` and `LLMConfig` — nucleus sampling, supported by all providers.
12
+
13
+ ### Changed
14
+ - **Universal pricing registry** — replaced three separate per-protocol pricing modules
15
+ (`anthropic.py`, `openai.py`, `google.py`) with a single JSON-backed `PricingRegistry`.
16
+ Pricing data now lives in `flowra/llm/pricing/data/generated.json` (auto-generated from
17
+ litellm) and `custom.json` (manual overrides). Supports context tiers (>200k tokens),
18
+ reasoning tokens, and cache creation TTL variants through a uniform `estimate_cost()` API.
19
+
20
+ ## [0.0.25] - 2026-03-24
21
+
22
+ ### Changed
23
+ - **MLflow tool output** — JSON tool results are now parsed into structured dicts
24
+ for display in MLflow UI, instead of escaped strings.
25
+ - **`SessionStorage`** and **`ChangeSet`** are now exported from `flowra.agent`.
26
+
10
27
  ## [0.0.24] - 2026-03-24
11
28
 
12
29
  ### Added
@@ -51,7 +51,7 @@ Provider-agnostic interface for calling LLMs:
51
51
 
52
52
  Providers live in `flowra/llm/providers/`. Currently: `AnthropicVertexProvider`, `OpenAIProvider`, `GoogleVertexProvider`.
53
53
 
54
- Pricing utilities live in `flowra/llm/pricing/` — per-protocol cost estimation (anthropic, openai, google). Providers use these to populate `Usage.cost_usd` and `Usage.cost` (`CostBreakdown`).
54
+ Pricing lives in `flowra/llm/pricing/` — universal JSON-backed registry (`PricingRegistry`) with per-provider cost estimation. Data files in `data/generated.json` (auto-generated from litellm via `tools/sync_pricing.py`) and `data/custom.json` (manual overrides, merged on load). Supports context tiers (>200k tokens), reasoning tokens, cache creation TTL variants. Providers call `estimate_cost(model, provider=..., ...)` → `CostBreakdown`.
55
55
 
56
56
  ### Flowing context (`flowra/agent/flow/flowing_registry.py`)
57
57
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flowra
3
- Version: 0.0.25.dev35
3
+ Version: 0.0.26.dev37
4
4
  Summary: Flowra — flow infrastructure for building stateful LLM agents
5
5
  Project-URL: Repository, https://github.com/anna-money/flowra
6
6
  Project-URL: Changelog, https://github.com/anna-money/flowra/blob/master/CHANGELOG.md
@@ -14,7 +14,7 @@
14
14
  "LLM ABSTRACTION: LLMProvider is the core interface — two methods: async call(LLMRequest) -> LLMResponse and async stream(LLMRequest) -> AsyncIterator[StreamEvent]. Also an async context manager: supports aclose() and 'async with provider:' for resource cleanup",
15
15
  "stream() returns StreamEvent = TextDelta | ThinkingDelta | ContentComplete. TextDelta/ThinkingDelta carry incremental text; ContentComplete is always last and contains the full LLMResponse",
16
16
  "Default stream() implementation calls call() and yields a single ContentComplete — providers override for real-time streaming",
17
- "LLMRequest contains: model, system (list[SystemMessage], default []), messages (list[UserMessage | AssistantMessage], default []), tools, json_schema, temperature, max_tokens, stop_sequences, additional_config, max_schema_retries. System messages are separate from conversation messages",
17
+ "LLMRequest contains: model, system (list[SystemMessage], default []), messages (list[UserMessage | AssistantMessage], default []), tools, json_schema, temperature, top_p, max_tokens, stop_sequences, additional_config, max_schema_retries. System messages are separate from conversation messages",
18
18
  "LLMResponse contains: message (AssistantMessage), stop_reason (StopReason), stop_sequence (str | None), usage (Usage | None), extra (dict[str, Any] — provider-specific data like provider_stop_reason, id)",
19
19
  "Usage contains: input_tokens, output_tokens, cache_read_input_tokens, cache_creation_input_tokens, cost_usd (total), cost (CostBreakdown with input/output/cache_read/cache_creation and total property). Token contract: input_tokens excludes cached tokens",
20
20
  "Messages: SystemMessage, UserMessage, AssistantMessage. System messages go in LLMRequest.system, conversation messages in LLMRequest.messages",
@@ -107,7 +107,7 @@
107
107
  "transient hint: blocks/messages/tools with transient=True are (1) skipped by NonTransient caching bundles (which cache the non-transient prefix — stop at first transient) and (2) auto-filtered from ChatAgent session history",
108
108
  "Anthropic extra passthrough: AnthropicVertexProvider merges block.extra into output dicts (**block.extra), so cache_control and other Anthropic-specific fields pass through directly",
109
109
 
110
- "CONFIG: LLMConfig(model, temperature, max_tokens, stop_sequences, additional_config) configures LLM calls",
110
+ "CONFIG: LLMConfig(model, temperature, top_p, max_tokens, stop_sequences, additional_config) configures LLM calls",
111
111
  "ConfigValue[T] wraps static or dynamic (callable) config values: ConfigValue[str] | ConfigValue[Callable[[], str]]",
112
112
 
113
113
  "QUICK START: Create provider -> create ToolRegistry -> create Config -> create AgentRuntime -> runtime.run()",
@@ -362,6 +362,7 @@ Shared LLM configuration used by all lib agents:
362
362
  LLMConfig(
363
363
  model="claude-sonnet-4-5@20250929",
364
364
  temperature=0.7, # optional
365
+ top_p=0.9, # optional
365
366
  max_tokens=4096, # optional
366
367
  stop_sequences=["END"], # optional
367
368
  additional_config={}, # provider-specific
@@ -321,6 +321,7 @@ LLMRequest(
321
321
  | `tools` | `list[Tool] \| None` | `None` | Available tools |
322
322
  | `json_schema` | `dict[str, Any] \| None` | `None` | JSON Schema for structured output |
323
323
  | `temperature` | `float \| None` | `None` | Generation temperature |
324
+ | `top_p` | `float \| None` | `None` | Nucleus sampling threshold |
324
325
  | `max_tokens` | `int \| None` | `None` | Maximum tokens in response |
325
326
  | `stop_sequences` | `list[str] \| None` | `None` | Stop sequences |
326
327
  | `max_schema_retries` | `int` | `3` | Retries on schema validation failure |
@@ -0,0 +1,244 @@
1
+ # Built-in LLM Retry with Backoff in Tool Loop — Research (March 2026)
2
+
3
+ Research date: 2026-03-24
4
+
5
+ ## Problem
6
+
7
+ When LLM providers return transient errors (429 rate limit, 5xx server errors,
8
+ network timeouts), the tool loop has no retry logic. The exception propagates
9
+ through `ToolLoopAgent.call_llm()` → `Engine.advance()` → `AgentRuntime` and
10
+ kills the agent. The user gets an unrecoverable crash on a transient error.
11
+
12
+ This is a gap in tool loop as a "batteries included" building block. Users
13
+ should not need to write a wrapper agent or custom provider to handle the most
14
+ common LLM failure mode.
15
+
16
+ Reference: Strands SDK implements this as `ModelRetryStrategy` — a hook-based
17
+ plugin with exponential backoff on `ModelThrottledException`.
18
+
19
+ ## Current state
20
+
21
+ ### What happens on LLM error
22
+
23
+ ```
24
+ ToolLoopAgent.call_llm()
25
+ └─ self.__provider.call(request) ← no try/except
26
+ └─ raises e.g. anthropic.RateLimitError
27
+ └─ propagates to Engine.advance()
28
+ └─ Engine closes all spans with error, re-raises
29
+ └─ AgentRuntime.__run_loop_inner() sees exception, crashes
30
+ ```
31
+
32
+ **No catch, no retry, no backoff.** The agent is dead.
33
+
34
+ ### Provider exceptions
35
+
36
+ | Provider | Throttling exception | Other transient |
37
+ |---|---|---|
38
+ | Anthropic (`anthropic` SDK) | `anthropic.RateLimitError` (subclass of `APIStatusError`, status 429) | `APIConnectionError`, `APITimeoutError`, `InternalServerError` (5xx) |
39
+ | OpenAI (`openai` SDK) | `openai.RateLimitError` (status 429) | `APIConnectionError`, `APITimeoutError`, `InternalServerError` (5xx) |
40
+ | Google (`google.genai`) | `google.api_core.exceptions.ResourceExhausted` (429) | `ServiceUnavailable` (503), `DeadlineExceeded`, transient gRPC errors |
41
+
42
+ Note: both `anthropic` and `openai` SDKs have their own built-in retry logic
43
+ (2 retries by default), so by the time the exception reaches us, the SDK has
44
+ already retried. But SDK retries are short (seconds), while real throttling
45
+ can last minutes. And SDK retries don't help with extended outages.
46
+
47
+ ### What exists
48
+
49
+ - **JSON schema validation retry** — in `AnthropicVertexProvider` only, for
50
+ structured output. Not for API errors.
51
+ - **`max_consecutive_errors`** — in `ToolLoopConfig`, but for tool execution
52
+ errors (wrong tool calls), not LLM API errors.
53
+ - **Crash recovery** — `SessionStorage` can resume after crash, but the user
54
+ has to restart the agent manually. Not the same as automatic retry.
55
+
56
+ ## Where to add retry
57
+
58
+ ### Option A: Inside ToolLoopAgent.call_llm() (recommended)
59
+
60
+ Wrap the LLM call in a retry loop directly in the tool loop step:
61
+
62
+ ```python
63
+ @step("call_llm")
64
+ async def call_llm(self) -> GotoStep | Spawn | ToolLoopResult:
65
+ ...
66
+ retry_config = self.__config.retry # RetryConfig(max_attempts=5, initial_delay=4, max_delay=240)
67
+
68
+ for attempt in range(retry_config.max_attempts):
69
+ try:
70
+ async with self.__emitter.span(LLMCallSpan(request=request)) as llm_span:
71
+ response = await self.__provider.call(request)
72
+ llm_span.response = response
73
+ break
74
+ except RETRYABLE_EXCEPTIONS as exc:
75
+ if attempt == retry_config.max_attempts - 1:
76
+ raise
77
+ delay = min(retry_config.initial_delay * (2 ** attempt), retry_config.max_delay)
78
+ # fire event for observability
79
+ await self.__emitter.emit(LLMRetryEvent(attempt=attempt, delay=delay, error=exc))
80
+ await asyncio.sleep(delay)
81
+ ...
82
+ ```
83
+
84
+ **Pros:**
85
+ - Simple, self-contained, no new abstractions
86
+ - Works for both `call()` and `stream()`
87
+ - Retry is per-LLM-call, not per-agent — exactly the right scope
88
+ - Composable with crash recovery (if all retries fail → crash → resume)
89
+ - The tool loop is THE building block; it should own this
90
+
91
+ **Cons:**
92
+ - Hardcoded in tool loop — not configurable via hooks
93
+ - But: this is infrastructure, not business logic. Like TCP retransmission.
94
+
95
+ ### Option B: LLMProvider wrapper (decorator)
96
+
97
+ ```python
98
+ class RetryProvider(LLMProvider):
99
+ def __init__(self, inner: LLMProvider, config: RetryConfig): ...
100
+ async def call(self, request): ... # retry loop around inner.call()
101
+ ```
102
+
103
+ **Pros:**
104
+ - Decoupled from tool loop — works outside agents too
105
+ - Provider-agnostic
106
+
107
+ **Cons:**
108
+ - User must wrap every provider manually
109
+ - Not "batteries included" — the opposite of what we want
110
+ - Streaming retry is tricky (partially consumed stream)
111
+ - Doesn't integrate with tool loop observability
112
+
113
+ ### Option C: Hook-based (like Strands)
114
+
115
+ A hook subscribes to a new `AfterLLMCallEvent` with an `error` field and sets
116
+ `event.retry = True`.
117
+
118
+ **Pros:**
119
+ - Pluggable, configurable
120
+ - Follows our hook pattern
121
+
122
+ **Cons:**
123
+ - Requires mutable event + retry flag + loop in the tool loop
124
+ - Overengineered for what is essentially "sleep and retry on 429"
125
+ - Hook state management across retries is subtle
126
+ - The Strands approach stores mutable state on the strategy object —
127
+ not great for parallel/concurrent agents sharing the same strategy
128
+
129
+ ### Recommendation: Option A
130
+
131
+ Retry on transient LLM errors is infrastructure. It belongs in the tool loop
132
+ with a simple config, not in a pluggable hook system. The hook system is for
133
+ business logic (model fallback, guardrails, caching). Retrying a 429 is not
134
+ business logic — it's plumbing.
135
+
136
+ ## Design details
137
+
138
+ ### RetryConfig
139
+
140
+ ```python
141
+ @dataclass(frozen=True)
142
+ class RetryConfig:
143
+ max_attempts: int = 5 # total attempts (1 = no retry)
144
+ initial_delay: float = 4.0 # seconds
145
+ max_delay: float = 240.0 # seconds
146
+ backoff_factor: float = 2.0 # exponential multiplier
147
+ retryable: Callable[[BaseException], bool] | None = None # custom predicate
148
+ ```
149
+
150
+ Default in `ToolLoopConfig`:
151
+ ```python
152
+ retry: RetryConfig = RetryConfig()
153
+ ```
154
+
155
+ ### What to retry
156
+
157
+ Default retryable predicate — check for known transient exceptions from all
158
+ three provider SDKs. The user can override with a custom predicate.
159
+
160
+ Key question: should we catch broad `Exception` and check status codes, or
161
+ import provider-specific exceptions? Importing provider SDKs creates unwanted
162
+ dependencies. Better approach: a generic check:
163
+
164
+ ```python
165
+ def is_retryable(exc: BaseException) -> bool:
166
+ # Check for status code attribute (anthropic, openai)
167
+ status = getattr(exc, "status_code", None) or getattr(exc, "status", None)
168
+ if status in (429, 500, 502, 503, 529):
169
+ return True
170
+ # Check for connection/timeout errors by name pattern
171
+ type_name = type(exc).__name__
172
+ if any(s in type_name for s in ("Timeout", "Connection", "Unavailable")):
173
+ return True
174
+ return False
175
+ ```
176
+
177
+ No provider SDK imports needed. Works with any provider.
178
+
179
+ ### Observability
180
+
181
+ Fire an event/span for each retry so the user can see what's happening:
182
+ - `LLMRetryEvent(attempt, delay, error)` — hook event
183
+ - Or simply log + emit through existing span hooks
184
+
185
+ The `LLMCallSpan` should capture the final successful call, not the failed
186
+ attempts. Failed attempts can be logged as sub-events or separate lightweight
187
+ spans.
188
+
189
+ ### Streaming
190
+
191
+ For `stream()`, the retry wraps the entire stream creation. If the stream
192
+ fails mid-way (connection drop), that's harder — the response is partially
193
+ consumed. Options:
194
+
195
+ 1. **Retry only on initial connection errors** — if `stream()` raises before
196
+ yielding any events, retry. If it fails mid-stream, propagate the error.
197
+ This covers 429 (rejected before streaming starts).
198
+
199
+ 2. **Full stream retry** — buffer events and replay on retry. Complex, and
200
+ the user may have already processed some deltas.
201
+
202
+ Start with (1). Mid-stream failures are rare and a different problem.
203
+
204
+ ### Interrupt integration
205
+
206
+ The `asyncio.sleep(delay)` during retry backoff should respect the interrupt
207
+ token. If the agent is interrupted during a retry wait, it should stop
208
+ immediately:
209
+
210
+ ```python
211
+ await with_interrupt(asyncio.sleep(delay), self.__interrupt)
212
+ ```
213
+
214
+ This already exists in the codebase for stream interruption.
215
+
216
+ ## Relation to other features
217
+
218
+ | Feature | Relationship |
219
+ |---|---|
220
+ | **Model fallback** (`model_fallback.md`) | Complementary. Retry handles transient errors (same model). Fallback handles persistent errors (switch model). Could chain: retry N times → fall back to stronger model. |
221
+ | **Crash recovery** | Retry is the first line of defense. If all retries fail, crash recovery kicks in. |
222
+ | **Hooks** | Retry emits events for observability but is not hook-driven. |
223
+ | **`max_consecutive_errors`** | Different scope — tool execution errors, not LLM API errors. |
224
+
225
+ ## Open questions
226
+
227
+ 1. **Should retry config be a `ConfigValue` (callable)?** Probably not — retry
228
+ config rarely needs to change dynamically. Keep it simple.
229
+
230
+ 2. **Jitter?** Exponential backoff with jitter is best practice to avoid
231
+ thundering herd. Add random jitter (±25%) to the delay.
232
+
233
+ 3. **Retry-After header?** Some providers return a `Retry-After` header with
234
+ 429 responses. Should we parse it? The SDKs might already handle this in
235
+ their built-in retries, so by the time it reaches us, there's no header.
236
+ Low priority.
237
+
238
+ 4. **Per-provider retry?** Some providers are more aggressive with throttling
239
+ (Google Vertex with short bursts). Should retry config be per-provider?
240
+ Probably not — keep one config, the user can tune it.
241
+
242
+ 5. **Default: on or off?** Should retry be enabled by default? Yes — the
243
+ default `RetryConfig()` should retry with sensible defaults. Users who want
244
+ fail-fast can set `max_attempts=1`.
@@ -13,11 +13,11 @@ Real LLM pricing is significantly more complex.
13
13
 
14
14
  The same model costs differently depending on how you access it:
15
15
 
16
- | Model | Direct API | Vertex AI | Bedrock | Azure |
17
- |---|---|---|---|---|
18
- | Claude Sonnet 4.6 | $3/$15 | different | different | N/A |
19
- | GPT-4o | $2.50/$10 | N/A | N/A | different |
20
- | Gemini 2.5 Pro | N/A | $1.25/$10 | N/A | N/A |
16
+ | Model | Direct API | Vertex AI | Bedrock | Azure |
17
+ |-------------------|--------------|------------|-----------|-----------|
18
+ | Claude Sonnet 4.6 | $3/$15 | different | different | N/A |
19
+ | GPT-4o | $2.50/$10 | N/A | N/A | different |
20
+ | Gemini 2.5 Pro | N/A | $1.25/$10 | N/A | N/A |
21
21
 
22
22
  litellm tracks this — each model has separate entries per provider:
23
23
  - `claude-sonnet-4-6` (litellm_provider: "anthropic")
@@ -26,6 +26,19 @@
26
26
 
27
27
 
28
28
 
29
+ ## Built-in LLM retry with backoff
30
+
31
+ - **Automatic retry with exponential backoff for transient LLM errors (429, 5xx, timeouts).**
32
+ The tool loop is a "batteries included" building block — retry on transient provider errors
33
+ should be built in, not require a wrapper agent. Add `RetryConfig` to `ToolLoopConfig`
34
+ (max_attempts, initial_delay, max_delay, backoff_factor, custom retryable predicate).
35
+ Wrap the LLM call in `ToolLoopAgent.call_llm()` with a retry loop. Use a generic
36
+ `is_retryable()` check (status codes + exception name patterns) to avoid importing
37
+ provider SDKs. Respect interrupt tokens during backoff sleep. Emit observability events
38
+ on retry. Start with retry on initial connection errors only (not mid-stream failures).
39
+ See `docs/research/llm_retry_backoff.md`.
40
+
41
+
29
42
  ## Documentation benchmark suite
30
43
 
31
44
  - A series of tasks given to a coding agent that has access only to documentation
@@ -65,8 +65,8 @@ def _resolve_experiment_id(experiment_name: str) -> str:
65
65
  class _MlflowTracing:
66
66
  __slots__ = ()
67
67
 
68
+ @staticmethod
68
69
  def install(
69
- self,
70
70
  runtime: AgentRuntime,
71
71
  *,
72
72
  experiment_name: str | None = None,
@@ -347,8 +347,11 @@ def _format_chat_inputs(req: LLMRequest) -> dict[str, Any]:
347
347
  inputs: dict[str, Any] = {"model": req.model}
348
348
  if req.temperature is not None:
349
349
  inputs["temperature"] = req.temperature
350
+ if req.top_p is not None:
351
+ inputs["top_p"] = req.top_p
350
352
  if req.max_tokens is not None:
351
353
  inputs["max_tokens"] = req.max_tokens
354
+ inputs.update(req.additional_config)
352
355
 
353
356
  messages: list[dict[str, Any]] = []
354
357
  for msg in req.system:
@@ -200,8 +200,12 @@ def _make_llm_handler(
200
200
  }
201
201
  if span.request.temperature is not None:
202
202
  attrs["gen_ai.request.temperature"] = span.request.temperature
203
+ if span.request.top_p is not None:
204
+ attrs["gen_ai.request.top_p"] = span.request.top_p
203
205
  if span.request.max_tokens is not None:
204
206
  attrs["gen_ai.request.max_tokens"] = span.request.max_tokens
207
+ for key, val in span.request.additional_config.items():
208
+ attrs[f"gen_ai.request.{key}"] = val if isinstance(val, str | int | float | bool) else str(val)
205
209
 
206
210
  otel_span = _start_span(otel_parent, tracer, f"chat {model_name}", kind=SpanKind.CLIENT, attributes=attrs)
207
211
  otel_parent.set(otel_span)
@@ -36,6 +36,7 @@ class LLMCallAgent(Agent[LLMCallSpec, LLMCallResult]):
36
36
  system=list(spec.system),
37
37
  messages=list(spec.messages),
38
38
  temperature=llm_config.temperature,
39
+ top_p=llm_config.top_p,
39
40
  max_tokens=llm_config.max_tokens,
40
41
  stop_sequences=llm_config.stop_sequences,
41
42
  additional_config=llm_config.additional_config,
@@ -8,6 +8,7 @@ __all__ = ["LLMConfig"]
8
8
  class LLMConfig:
9
9
  model: str
10
10
  temperature: float | None = None
11
+ top_p: float | None = None
11
12
  max_tokens: int | None = None
12
13
  stop_sequences: list[str] | None = None
13
14
  additional_config: dict[str, Any] = dataclasses.field(default_factory=dict)
@@ -160,6 +160,7 @@ class ToolLoopAgent(Agent[ToolLoopSpec, ToolLoopResult]):
160
160
  tools=tools,
161
161
  json_schema=json_schema,
162
162
  temperature=llm_config.temperature,
163
+ top_p=llm_config.top_p,
163
164
  max_tokens=llm_config.max_tokens,
164
165
  stop_sequences=llm_config.stop_sequences,
165
166
  additional_config=llm_config.additional_config,
@@ -0,0 +1,40 @@
1
+ """Universal LLM pricing — JSON-backed registry with per-provider cost estimation."""
2
+
3
+ from ..response import CostBreakdown
4
+ from .registry import ModelPricing, PricingRegistry
5
+
6
+ __all__ = ["CostBreakdown", "ModelPricing", "PricingRegistry", "estimate_cost", "get_registry"]
7
+
8
+ _default: PricingRegistry | None = None
9
+
10
+
11
+ def get_registry() -> PricingRegistry:
12
+ """Return the default :class:`PricingRegistry` (lazy-loaded singleton)."""
13
+ global _default
14
+ if _default is None:
15
+ _default = PricingRegistry.load_default()
16
+ return _default
17
+
18
+
19
+ def estimate_cost(
20
+ model: str,
21
+ *,
22
+ provider: str,
23
+ input_tokens: int,
24
+ output_tokens: int,
25
+ cache_read_tokens: int = 0,
26
+ cache_creation_tokens: int = 0,
27
+ cache_creation_1h_tokens: int = 0,
28
+ reasoning_tokens: int = 0,
29
+ ) -> CostBreakdown | None:
30
+ """Convenience wrapper around :meth:`PricingRegistry.estimate_cost`."""
31
+ return get_registry().estimate_cost(
32
+ model,
33
+ provider=provider,
34
+ input_tokens=input_tokens,
35
+ output_tokens=output_tokens,
36
+ cache_read_tokens=cache_read_tokens,
37
+ cache_creation_tokens=cache_creation_tokens,
38
+ cache_creation_1h_tokens=cache_creation_1h_tokens,
39
+ reasoning_tokens=reasoning_tokens,
40
+ )
@@ -0,0 +1,52 @@
1
+ {
2
+ "anthropic": {
3
+ "claude-sonnet-4-6": {
4
+ "cache_creation_1h": 6.0
5
+ },
6
+ "claude-sonnet-4-5": {
7
+ "cache_creation_1h": 6.0
8
+ },
9
+ "claude-sonnet-4": {
10
+ "cache_creation_1h": 6.0
11
+ },
12
+ "claude-sonnet-3-7": {
13
+ "input": 3.0,
14
+ "output": 15.0,
15
+ "cache_read": 0.3,
16
+ "cache_creation": 3.75,
17
+ "cache_creation_1h": 6.0
18
+ },
19
+ "claude-haiku-3-5": {
20
+ "input": 0.8,
21
+ "output": 4.0,
22
+ "cache_read": 0.08,
23
+ "cache_creation": 1.0,
24
+ "cache_creation_1h": 1.6
25
+ },
26
+ "claude-haiku-3": {
27
+ "input": 0.25,
28
+ "output": 1.25,
29
+ "cache_read": 0.03,
30
+ "cache_creation": 0.3,
31
+ "cache_creation_1h": 0.5
32
+ },
33
+ "claude-opus-3": {
34
+ "input": 15.0,
35
+ "output": 75.0,
36
+ "cache_read": 1.5,
37
+ "cache_creation": 18.75,
38
+ "cache_creation_1h": 30.0
39
+ }
40
+ },
41
+ "openai": {
42
+ "mercury-coder": {
43
+ "input": 0.25,
44
+ "output": 1.0
45
+ },
46
+ "mercury-2": {
47
+ "input": 0.25,
48
+ "output": 0.75,
49
+ "cache_read": 0.025
50
+ }
51
+ }
52
+ }