flowra 0.0.25.dev35__tar.gz → 0.0.26.dev37__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowra-0.0.26.dev37/.claude/commands/update-pricing.md +74 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/CHANGELOG.md +17 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/CLAUDE.md +1 -1
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/PKG-INFO +1 -1
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/context7.json +2 -2
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/lib.md +1 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/llm.md +1 -0
- flowra-0.0.26.dev37/docs/research/llm_retry_backoff.md +244 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/pricing_complexity.md +5 -5
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/todo.md +13 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/ext/mlflow.py +4 -1
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/ext/otel.py +4 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/llm_call/agent.py +1 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/llm_config.py +1 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/agent.py +1 -0
- flowra-0.0.26.dev37/flowra/llm/pricing/__init__.py +40 -0
- flowra-0.0.26.dev37/flowra/llm/pricing/data/custom.json +52 -0
- flowra-0.0.26.dev37/flowra/llm/pricing/data/generated.json +791 -0
- flowra-0.0.26.dev37/flowra/llm/pricing/registry.py +186 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/providers/anthropic_vertex.py +6 -4
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/providers/google_vertex.py +10 -3
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/providers/openai.py +10 -3
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/request.py +1 -0
- flowra-0.0.26.dev37/flowra/version.py +2 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/ext/test_otel.py +8 -1
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_llm_call_agent.py +2 -1
- flowra-0.0.26.dev37/tests/llm/pricing/test_registry.py +382 -0
- flowra-0.0.26.dev37/tools/sync_pricing.py +218 -0
- flowra-0.0.25.dev35/.claude/commands/update-pricing.md +0 -48
- flowra-0.0.25.dev35/flowra/llm/pricing/__init__.py +0 -3
- flowra-0.0.25.dev35/flowra/llm/pricing/anthropic.py +0 -68
- flowra-0.0.25.dev35/flowra/llm/pricing/google.py +0 -56
- flowra-0.0.25.dev35/flowra/llm/pricing/openai.py +0 -70
- flowra-0.0.25.dev35/flowra/version.py +0 -2
- flowra-0.0.25.dev35/tests/llm/pricing/test_anthropic.py +0 -71
- flowra-0.0.25.dev35/tests/llm/pricing/test_google.py +0 -43
- flowra-0.0.25.dev35/tests/llm/pricing/test_openai.py +0 -50
- flowra-0.0.25.dev35/tools/sync_pricing.py +0 -360
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.env.example +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.github/workflows/master.yml +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.github/workflows/publish.yml +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.github/workflows/pull_request.yml +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.github/workflows/pull_request_e2e.yml +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.gitignore +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/.python-version +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/LICENSE +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/Makefile +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/README.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/agents.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/getting-started.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/agent.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/architecture.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/ext/mlflow.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/ext/otel.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/ext/tracing-guide.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/ext.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/lib/anthropic.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/patterns.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/internal/tools.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/llm.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/observability.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/patterns.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/flowing_context.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/hooks_redesign.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/mlflow_context_migration.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/model_fallback.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/otel_integration.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/provider_extensions.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/spawn_strategies.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/strands_comparison.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/tool_error_signals.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/tool_search_tool.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/research/voice_stt.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step1_structure.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step2_code_style.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step3_documentation.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step4_doc_readability.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step5_doc_audit.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/review_prompts/step6_tests.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/docs/tools.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/TRACING_COMBINATIONS.md +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/agent_as_tool.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/app_agent.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/console_chat.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/agents_custom.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/agents_parallel.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/getting_started_chat.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/getting_started_streaming.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/getting_started_tools.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/llm_streaming.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/llm_structured_output.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/docs/tools_service_injection.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/escalation.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/llm_logging.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/llm_routing.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/menu_agent.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/menu_agent_class.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_demo.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_dual_export_demo.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_nested_demo.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_otel_both_demo.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_otel_nested_demo.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/mlflow_parallel_demo.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/model_registry.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/otel_demo.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/otel_jaeger_demo.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/otel_nested_demo.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/otel_visualize.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/race.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/span_crash_demo.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/span_demo.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/system_prompt.txt +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/tools/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/tools/calculator.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/tools/random_numbers.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/tools/switch_model.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/examples/tui_chat.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/_sentinel.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/agent.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/agent_arg.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/compiler.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/contract.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/init_params.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/instance.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/step_params.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/step_validation.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/steps.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/type_helpers.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/compile/type_registry.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/model.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/registry.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/step.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/step_arg.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/definition/step_helpers.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/actions.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/context.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/flowing_registry.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/hooks.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/interrupt.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/interrupt_helpers.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/spawn.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/flow/timeout.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/engine.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/execution.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/instance_factory.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/runtime.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/scope.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/serialization.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/spans.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/runtime/spawn_tree.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/services.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/state/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/state/markers.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/state/store.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/state/values.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/storage/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/storage/file.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/storage/in_memory.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/agent/storage/session_storage.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/ext/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/anthropic/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/anthropic/cache.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/anthropic/presets.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/anthropic/tool_search.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/chat/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/chat/agent.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/chat/config.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/chat/hook_events.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/chat/spec.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/config_value.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/llm_call/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/llm_call/spec.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/observability/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/observability/llm_hooks.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/config.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/context.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/hook_events.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/spec.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/tool_call/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/tool_call/agent.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/tool_call/agent_tool.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/lib/tool_loop/tool_call/context.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/base.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/blocks.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/messages.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/provider.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/providers/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/response.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/schema_formatting.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/schema_validation.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/stream.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/llm/tools.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/py.typed +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/local_tool.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/mcp_connection.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/tool_arg.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/tool_group.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/tool_registry.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/flowra/tools/types.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/pyproject.toml +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/compile/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/compile/test_compile.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/compile/test_type_helpers.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/test_agent.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/test_registry.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/definition/test_step_helpers.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_agent_def.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_context.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_flowing_registry.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_flowing_registry_tasks.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_flowing_sync.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_hooks.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_interrupt.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_spans.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_timeout.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/flow/test_with_interrupt.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_engine.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_engine_spans.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_hook_context.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_persistence.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_runtime.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_scope.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_serialization.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/runtime/test_spec_in_constructor.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/state/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/state/test_values.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/storage/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/storage/test_file.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/storage/test_in_memory.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/agent/test_missing_scenarios.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/ext/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/ext/test_mlflow.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/anthropic/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/anthropic/test_anthropic.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_chat_agent.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_config_value.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_matches_tool_filter.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_tool_call_agent.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_tool_call_agent_call_agent.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/test_tool_loop_agent.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/lib/tool_loop/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/pricing/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_anthropic_e2e.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_anthropic_vertex.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_google_vertex.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_google_vertex_e2e.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_openai_e2e.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/providers/test_openai_provider.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_cost_breakdown.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_metadata.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_response.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_schema_formatting.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_schema_validation.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/llm/test_stream.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/tools/__init__.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/tools/test_local_tool.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/tools/test_mcp_connection.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/tools/test_tool_group.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/tests/tools/test_tool_registry.py +0 -0
- {flowra-0.0.25.dev35 → flowra-0.0.26.dev37}/uv.lock +0 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Update Pricing
|
|
2
|
+
|
|
3
|
+
Update the LLM pricing data in `flowra/llm/pricing/data/` with current pricing from the web.
|
|
4
|
+
|
|
5
|
+
## Instructions
|
|
6
|
+
|
|
7
|
+
You are updating the pricing data for LLM models used by this project.
|
|
8
|
+
|
|
9
|
+
### Project structure
|
|
10
|
+
|
|
11
|
+
Pricing is stored in JSON files under `flowra/llm/pricing/data/`:
|
|
12
|
+
|
|
13
|
+
- `generated.json` — auto-generated from litellm via `tools/sync_pricing.py`. **Do not edit manually.**
|
|
14
|
+
- `custom.json` — manual overrides and additions for models/fields missing from litellm. Custom entries are **merged** with generated entries (custom fields override, other fields preserved from generated).
|
|
15
|
+
|
|
16
|
+
The JSON format is `provider → model → pricing_fields`:
|
|
17
|
+
|
|
18
|
+
```json
|
|
19
|
+
{
|
|
20
|
+
"anthropic": {
|
|
21
|
+
"claude-sonnet-4-6": {
|
|
22
|
+
"input": 3.0,
|
|
23
|
+
"output": 15.0,
|
|
24
|
+
"cache_read": 0.3,
|
|
25
|
+
"cache_creation": 3.75,
|
|
26
|
+
"cache_creation_1h": 6.0,
|
|
27
|
+
"input_above_200k": 6.0,
|
|
28
|
+
"output_above_200k": 22.5
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Provider keys: `anthropic`, `openai`, `google` (future: `bedrock/us`, `azure/eu`, etc.)
|
|
35
|
+
|
|
36
|
+
All prices in **$/1M tokens**. Omit fields that are zero.
|
|
37
|
+
|
|
38
|
+
Available pricing fields:
|
|
39
|
+
- `input`, `output` — base rates
|
|
40
|
+
- `cache_read` — cache read cost
|
|
41
|
+
- `cache_creation` — cache creation cost (5-minute ephemeral for Anthropic)
|
|
42
|
+
- `cache_creation_1h` — Anthropic 1-hour cache creation cost
|
|
43
|
+
- `reasoning_output` — separate reasoning token rate (if different from output)
|
|
44
|
+
- `input_above_200k`, `output_above_200k`, `cache_read_above_200k`, `cache_creation_above_200k`, `cache_creation_1h_above_200k` — context tier rates for >200k tokens
|
|
45
|
+
|
|
46
|
+
### Steps to follow
|
|
47
|
+
|
|
48
|
+
1. **Read current pricing data**: Read `custom.json` and `generated.json` to see existing models and prices.
|
|
49
|
+
|
|
50
|
+
2. **Determine which models to update**:
|
|
51
|
+
- If the user provided arguments (e.g., `/update-pricing all Gemini models`), search for pricing for those specific models.
|
|
52
|
+
- If no arguments were provided, refresh pricing for models in `custom.json`.
|
|
53
|
+
|
|
54
|
+
3. **Web search for current pricing**: Use WebSearch to find the most up-to-date pricing:
|
|
55
|
+
- Search official pricing pages: Anthropic (anthropic.com/pricing), OpenAI (openai.com/api/pricing), Google Cloud (cloud.google.com/vertex-ai/generative-ai/pricing)
|
|
56
|
+
- Focus on models/fields that `generated.json` is missing (check litellm gaps)
|
|
57
|
+
|
|
58
|
+
4. **Update `custom.json`**:
|
|
59
|
+
- Add/update entries for models or fields not covered by `generated.json`
|
|
60
|
+
- For partially missing fields (e.g., litellm has base rates but not `cache_creation_1h`), only include the missing fields — they will be merged with generated data
|
|
61
|
+
- For completely missing models, include all known pricing fields
|
|
62
|
+
- Remove entries from `custom.json` that are now fully covered by `generated.json`
|
|
63
|
+
|
|
64
|
+
5. **Verify**: Run `make test name=pricing` to ensure nothing is broken.
|
|
65
|
+
|
|
66
|
+
## Important notes
|
|
67
|
+
|
|
68
|
+
- All prices are in dollars per 1 million tokens
|
|
69
|
+
- `custom.json` entries **merge** with `generated.json` — you only need to specify fields that differ or are missing
|
|
70
|
+
- Anthropic has separate 5-minute and 1-hour cache creation prices. 5m = 1.25x base input, 1h = 2x base input
|
|
71
|
+
- OpenAI and Google cache creation is free — no `cache_creation` field needed
|
|
72
|
+
- Model matching uses substring matching, so keys should be specific enough to avoid false matches
|
|
73
|
+
- Always prioritize official pricing pages from model providers
|
|
74
|
+
- To regenerate `generated.json` from litellm, run: `python tools/sync_pricing.py --apply`
|
|
@@ -7,6 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org).
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
### Added
|
|
11
|
+
- **`top_p`** parameter in `LLMRequest` and `LLMConfig` — nucleus sampling, supported by all providers.
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
- **Universal pricing registry** — replaced three separate per-protocol pricing modules
|
|
15
|
+
(`anthropic.py`, `openai.py`, `google.py`) with a single JSON-backed `PricingRegistry`.
|
|
16
|
+
Pricing data now lives in `flowra/llm/pricing/data/generated.json` (auto-generated from
|
|
17
|
+
litellm) and `custom.json` (manual overrides). Supports context tiers (>200k tokens),
|
|
18
|
+
reasoning tokens, and cache creation TTL variants through a uniform `estimate_cost()` API.
|
|
19
|
+
|
|
20
|
+
## [0.0.25] - 2026-03-24
|
|
21
|
+
|
|
22
|
+
### Changed
|
|
23
|
+
- **MLflow tool output** — JSON tool results are now parsed into structured dicts
|
|
24
|
+
for display in MLflow UI, instead of escaped strings.
|
|
25
|
+
- **`SessionStorage`** and **`ChangeSet`** are now exported from `flowra.agent`.
|
|
26
|
+
|
|
10
27
|
## [0.0.24] - 2026-03-24
|
|
11
28
|
|
|
12
29
|
### Added
|
|
@@ -51,7 +51,7 @@ Provider-agnostic interface for calling LLMs:
|
|
|
51
51
|
|
|
52
52
|
Providers live in `flowra/llm/providers/`. Currently: `AnthropicVertexProvider`, `OpenAIProvider`, `GoogleVertexProvider`.
|
|
53
53
|
|
|
54
|
-
Pricing
|
|
54
|
+
Pricing lives in `flowra/llm/pricing/` — universal JSON-backed registry (`PricingRegistry`) with per-provider cost estimation. Data files in `data/generated.json` (auto-generated from litellm via `tools/sync_pricing.py`) and `data/custom.json` (manual overrides, merged on load). Supports context tiers (>200k tokens), reasoning tokens, cache creation TTL variants. Providers call `estimate_cost(model, provider=..., ...)` → `CostBreakdown`.
|
|
55
55
|
|
|
56
56
|
### Flowing context (`flowra/agent/flow/flowing_registry.py`)
|
|
57
57
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flowra
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.26.dev37
|
|
4
4
|
Summary: Flowra — flow infrastructure for building stateful LLM agents
|
|
5
5
|
Project-URL: Repository, https://github.com/anna-money/flowra
|
|
6
6
|
Project-URL: Changelog, https://github.com/anna-money/flowra/blob/master/CHANGELOG.md
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"LLM ABSTRACTION: LLMProvider is the core interface — two methods: async call(LLMRequest) -> LLMResponse and async stream(LLMRequest) -> AsyncIterator[StreamEvent]. Also an async context manager: supports aclose() and 'async with provider:' for resource cleanup",
|
|
15
15
|
"stream() returns StreamEvent = TextDelta | ThinkingDelta | ContentComplete. TextDelta/ThinkingDelta carry incremental text; ContentComplete is always last and contains the full LLMResponse",
|
|
16
16
|
"Default stream() implementation calls call() and yields a single ContentComplete — providers override for real-time streaming",
|
|
17
|
-
"LLMRequest contains: model, system (list[SystemMessage], default []), messages (list[UserMessage | AssistantMessage], default []), tools, json_schema, temperature, max_tokens, stop_sequences, additional_config, max_schema_retries. System messages are separate from conversation messages",
|
|
17
|
+
"LLMRequest contains: model, system (list[SystemMessage], default []), messages (list[UserMessage | AssistantMessage], default []), tools, json_schema, temperature, top_p, max_tokens, stop_sequences, additional_config, max_schema_retries. System messages are separate from conversation messages",
|
|
18
18
|
"LLMResponse contains: message (AssistantMessage), stop_reason (StopReason), stop_sequence (str | None), usage (Usage | None), extra (dict[str, Any] — provider-specific data like provider_stop_reason, id)",
|
|
19
19
|
"Usage contains: input_tokens, output_tokens, cache_read_input_tokens, cache_creation_input_tokens, cost_usd (total), cost (CostBreakdown with input/output/cache_read/cache_creation and total property). Token contract: input_tokens excludes cached tokens",
|
|
20
20
|
"Messages: SystemMessage, UserMessage, AssistantMessage. System messages go in LLMRequest.system, conversation messages in LLMRequest.messages",
|
|
@@ -107,7 +107,7 @@
|
|
|
107
107
|
"transient hint: blocks/messages/tools with transient=True are (1) skipped by NonTransient caching bundles (which cache the non-transient prefix — stop at first transient) and (2) auto-filtered from ChatAgent session history",
|
|
108
108
|
"Anthropic extra passthrough: AnthropicVertexProvider merges block.extra into output dicts (**block.extra), so cache_control and other Anthropic-specific fields pass through directly",
|
|
109
109
|
|
|
110
|
-
"CONFIG: LLMConfig(model, temperature, max_tokens, stop_sequences, additional_config) configures LLM calls",
|
|
110
|
+
"CONFIG: LLMConfig(model, temperature, top_p, max_tokens, stop_sequences, additional_config) configures LLM calls",
|
|
111
111
|
"ConfigValue[T] wraps static or dynamic (callable) config values: ConfigValue[str] | ConfigValue[Callable[[], str]]",
|
|
112
112
|
|
|
113
113
|
"QUICK START: Create provider -> create ToolRegistry -> create Config -> create AgentRuntime -> runtime.run()",
|
|
@@ -362,6 +362,7 @@ Shared LLM configuration used by all lib agents:
|
|
|
362
362
|
LLMConfig(
|
|
363
363
|
model="claude-sonnet-4-5@20250929",
|
|
364
364
|
temperature=0.7, # optional
|
|
365
|
+
top_p=0.9, # optional
|
|
365
366
|
max_tokens=4096, # optional
|
|
366
367
|
stop_sequences=["END"], # optional
|
|
367
368
|
additional_config={}, # provider-specific
|
|
@@ -321,6 +321,7 @@ LLMRequest(
|
|
|
321
321
|
| `tools` | `list[Tool] \| None` | `None` | Available tools |
|
|
322
322
|
| `json_schema` | `dict[str, Any] \| None` | `None` | JSON Schema for structured output |
|
|
323
323
|
| `temperature` | `float \| None` | `None` | Generation temperature |
|
|
324
|
+
| `top_p` | `float \| None` | `None` | Nucleus sampling threshold |
|
|
324
325
|
| `max_tokens` | `int \| None` | `None` | Maximum tokens in response |
|
|
325
326
|
| `stop_sequences` | `list[str] \| None` | `None` | Stop sequences |
|
|
326
327
|
| `max_schema_retries` | `int` | `3` | Retries on schema validation failure |
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
# Built-in LLM Retry with Backoff in Tool Loop — Research (March 2026)
|
|
2
|
+
|
|
3
|
+
Research date: 2026-03-24
|
|
4
|
+
|
|
5
|
+
## Problem
|
|
6
|
+
|
|
7
|
+
When LLM providers return transient errors (429 rate limit, 5xx server errors,
|
|
8
|
+
network timeouts), the tool loop has no retry logic. The exception propagates
|
|
9
|
+
through `ToolLoopAgent.call_llm()` → `Engine.advance()` → `AgentRuntime` and
|
|
10
|
+
kills the agent. The user gets an unrecoverable crash on a transient error.
|
|
11
|
+
|
|
12
|
+
This is a gap in tool loop as a "batteries included" building block. Users
|
|
13
|
+
should not need to write a wrapper agent or custom provider to handle the most
|
|
14
|
+
common LLM failure mode.
|
|
15
|
+
|
|
16
|
+
Reference: Strands SDK implements this as `ModelRetryStrategy` — a hook-based
|
|
17
|
+
plugin with exponential backoff on `ModelThrottledException`.
|
|
18
|
+
|
|
19
|
+
## Current state
|
|
20
|
+
|
|
21
|
+
### What happens on LLM error
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
ToolLoopAgent.call_llm()
|
|
25
|
+
└─ self.__provider.call(request) ← no try/except
|
|
26
|
+
└─ raises e.g. anthropic.RateLimitError
|
|
27
|
+
└─ propagates to Engine.advance()
|
|
28
|
+
└─ Engine closes all spans with error, re-raises
|
|
29
|
+
└─ AgentRuntime.__run_loop_inner() sees exception, crashes
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
**No catch, no retry, no backoff.** The agent is dead.
|
|
33
|
+
|
|
34
|
+
### Provider exceptions
|
|
35
|
+
|
|
36
|
+
| Provider | Throttling exception | Other transient |
|
|
37
|
+
|---|---|---|
|
|
38
|
+
| Anthropic (`anthropic` SDK) | `anthropic.RateLimitError` (subclass of `APIStatusError`, status 429) | `APIConnectionError`, `APITimeoutError`, `InternalServerError` (5xx) |
|
|
39
|
+
| OpenAI (`openai` SDK) | `openai.RateLimitError` (status 429) | `APIConnectionError`, `APITimeoutError`, `InternalServerError` (5xx) |
|
|
40
|
+
| Google (`google.genai`) | `google.api_core.exceptions.ResourceExhausted` (429) | `ServiceUnavailable` (503), `DeadlineExceeded`, transient gRPC errors |
|
|
41
|
+
|
|
42
|
+
Note: both `anthropic` and `openai` SDKs have their own built-in retry logic
|
|
43
|
+
(2 retries by default), so by the time the exception reaches us, the SDK has
|
|
44
|
+
already retried. But SDK retries are short (seconds), while real throttling
|
|
45
|
+
can last minutes. And SDK retries don't help with extended outages.
|
|
46
|
+
|
|
47
|
+
### What exists
|
|
48
|
+
|
|
49
|
+
- **JSON schema validation retry** — in `AnthropicVertexProvider` only, for
|
|
50
|
+
structured output. Not for API errors.
|
|
51
|
+
- **`max_consecutive_errors`** — in `ToolLoopConfig`, but for tool execution
|
|
52
|
+
errors (wrong tool calls), not LLM API errors.
|
|
53
|
+
- **Crash recovery** — `SessionStorage` can resume after crash, but the user
|
|
54
|
+
has to restart the agent manually. Not the same as automatic retry.
|
|
55
|
+
|
|
56
|
+
## Where to add retry
|
|
57
|
+
|
|
58
|
+
### Option A: Inside ToolLoopAgent.call_llm() (recommended)
|
|
59
|
+
|
|
60
|
+
Wrap the LLM call in a retry loop directly in the tool loop step:
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
@step("call_llm")
|
|
64
|
+
async def call_llm(self) -> GotoStep | Spawn | ToolLoopResult:
|
|
65
|
+
...
|
|
66
|
+
retry_config = self.__config.retry # RetryConfig(max_attempts=5, initial_delay=4, max_delay=240)
|
|
67
|
+
|
|
68
|
+
for attempt in range(retry_config.max_attempts):
|
|
69
|
+
try:
|
|
70
|
+
async with self.__emitter.span(LLMCallSpan(request=request)) as llm_span:
|
|
71
|
+
response = await self.__provider.call(request)
|
|
72
|
+
llm_span.response = response
|
|
73
|
+
break
|
|
74
|
+
except RETRYABLE_EXCEPTIONS as exc:
|
|
75
|
+
if attempt == retry_config.max_attempts - 1:
|
|
76
|
+
raise
|
|
77
|
+
delay = min(retry_config.initial_delay * (2 ** attempt), retry_config.max_delay)
|
|
78
|
+
# fire event for observability
|
|
79
|
+
await self.__emitter.emit(LLMRetryEvent(attempt=attempt, delay=delay, error=exc))
|
|
80
|
+
await asyncio.sleep(delay)
|
|
81
|
+
...
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**Pros:**
|
|
85
|
+
- Simple, self-contained, no new abstractions
|
|
86
|
+
- Works for both `call()` and `stream()`
|
|
87
|
+
- Retry is per-LLM-call, not per-agent — exactly the right scope
|
|
88
|
+
- Composable with crash recovery (if all retries fail → crash → resume)
|
|
89
|
+
- The tool loop is THE building block; it should own this
|
|
90
|
+
|
|
91
|
+
**Cons:**
|
|
92
|
+
- Hardcoded in tool loop — not configurable via hooks
|
|
93
|
+
- But: this is infrastructure, not business logic. Like TCP retransmission.
|
|
94
|
+
|
|
95
|
+
### Option B: LLMProvider wrapper (decorator)
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
class RetryProvider(LLMProvider):
|
|
99
|
+
def __init__(self, inner: LLMProvider, config: RetryConfig): ...
|
|
100
|
+
async def call(self, request): ... # retry loop around inner.call()
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
**Pros:**
|
|
104
|
+
- Decoupled from tool loop — works outside agents too
|
|
105
|
+
- Provider-agnostic
|
|
106
|
+
|
|
107
|
+
**Cons:**
|
|
108
|
+
- User must wrap every provider manually
|
|
109
|
+
- Not "batteries included" — the opposite of what we want
|
|
110
|
+
- Streaming retry is tricky (partially consumed stream)
|
|
111
|
+
- Doesn't integrate with tool loop observability
|
|
112
|
+
|
|
113
|
+
### Option C: Hook-based (like Strands)
|
|
114
|
+
|
|
115
|
+
A hook subscribes to a new `AfterLLMCallEvent` with an `error` field and sets
|
|
116
|
+
`event.retry = True`.
|
|
117
|
+
|
|
118
|
+
**Pros:**
|
|
119
|
+
- Pluggable, configurable
|
|
120
|
+
- Follows our hook pattern
|
|
121
|
+
|
|
122
|
+
**Cons:**
|
|
123
|
+
- Requires mutable event + retry flag + loop in the tool loop
|
|
124
|
+
- Overengineered for what is essentially "sleep and retry on 429"
|
|
125
|
+
- Hook state management across retries is subtle
|
|
126
|
+
- The Strands approach stores mutable state on the strategy object —
|
|
127
|
+
not great for parallel/concurrent agents sharing the same strategy
|
|
128
|
+
|
|
129
|
+
### Recommendation: Option A
|
|
130
|
+
|
|
131
|
+
Retry on transient LLM errors is infrastructure. It belongs in the tool loop
|
|
132
|
+
with a simple config, not in a pluggable hook system. The hook system is for
|
|
133
|
+
business logic (model fallback, guardrails, caching). Retrying a 429 is not
|
|
134
|
+
business logic — it's plumbing.
|
|
135
|
+
|
|
136
|
+
## Design details
|
|
137
|
+
|
|
138
|
+
### RetryConfig
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
@dataclass(frozen=True)
|
|
142
|
+
class RetryConfig:
|
|
143
|
+
max_attempts: int = 5 # total attempts (1 = no retry)
|
|
144
|
+
initial_delay: float = 4.0 # seconds
|
|
145
|
+
max_delay: float = 240.0 # seconds
|
|
146
|
+
backoff_factor: float = 2.0 # exponential multiplier
|
|
147
|
+
retryable: Callable[[BaseException], bool] | None = None # custom predicate
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Default in `ToolLoopConfig`:
|
|
151
|
+
```python
|
|
152
|
+
retry: RetryConfig = RetryConfig()
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### What to retry
|
|
156
|
+
|
|
157
|
+
Default retryable predicate — check for known transient exceptions from all
|
|
158
|
+
three provider SDKs. The user can override with a custom predicate.
|
|
159
|
+
|
|
160
|
+
Key question: should we catch broad `Exception` and check status codes, or
|
|
161
|
+
import provider-specific exceptions? Importing provider SDKs creates unwanted
|
|
162
|
+
dependencies. Better approach: a generic check:
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
def is_retryable(exc: BaseException) -> bool:
|
|
166
|
+
# Check for status code attribute (anthropic, openai)
|
|
167
|
+
status = getattr(exc, "status_code", None) or getattr(exc, "status", None)
|
|
168
|
+
if status in (429, 500, 502, 503, 529):
|
|
169
|
+
return True
|
|
170
|
+
# Check for connection/timeout errors by name pattern
|
|
171
|
+
type_name = type(exc).__name__
|
|
172
|
+
if any(s in type_name for s in ("Timeout", "Connection", "Unavailable")):
|
|
173
|
+
return True
|
|
174
|
+
return False
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
No provider SDK imports needed. Works with any provider.
|
|
178
|
+
|
|
179
|
+
### Observability
|
|
180
|
+
|
|
181
|
+
Fire an event/span for each retry so the user can see what's happening:
|
|
182
|
+
- `LLMRetryEvent(attempt, delay, error)` — hook event
|
|
183
|
+
- Or simply log + emit through existing span hooks
|
|
184
|
+
|
|
185
|
+
The `LLMCallSpan` should capture the final successful call, not the failed
|
|
186
|
+
attempts. Failed attempts can be logged as sub-events or separate lightweight
|
|
187
|
+
spans.
|
|
188
|
+
|
|
189
|
+
### Streaming
|
|
190
|
+
|
|
191
|
+
For `stream()`, the retry wraps the entire stream creation. If the stream
|
|
192
|
+
fails mid-way (connection drop), that's harder — the response is partially
|
|
193
|
+
consumed. Options:
|
|
194
|
+
|
|
195
|
+
1. **Retry only on initial connection errors** — if `stream()` raises before
|
|
196
|
+
yielding any events, retry. If it fails mid-stream, propagate the error.
|
|
197
|
+
This covers 429 (rejected before streaming starts).
|
|
198
|
+
|
|
199
|
+
2. **Full stream retry** — buffer events and replay on retry. Complex, and
|
|
200
|
+
the user may have already processed some deltas.
|
|
201
|
+
|
|
202
|
+
Start with (1). Mid-stream failures are rare and a different problem.
|
|
203
|
+
|
|
204
|
+
### Interrupt integration
|
|
205
|
+
|
|
206
|
+
The `asyncio.sleep(delay)` during retry backoff should respect the interrupt
|
|
207
|
+
token. If the agent is interrupted during a retry wait, it should stop
|
|
208
|
+
immediately:
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
await with_interrupt(asyncio.sleep(delay), self.__interrupt)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
This already exists in the codebase for stream interruption.
|
|
215
|
+
|
|
216
|
+
## Relation to other features
|
|
217
|
+
|
|
218
|
+
| Feature | Relationship |
|
|
219
|
+
|---|---|
|
|
220
|
+
| **Model fallback** (`model_fallback.md`) | Complementary. Retry handles transient errors (same model). Fallback handles persistent errors (switch model). Could chain: retry N times → fall back to stronger model. |
|
|
221
|
+
| **Crash recovery** | Retry is the first line of defense. If all retries fail, crash recovery kicks in. |
|
|
222
|
+
| **Hooks** | Retry emits events for observability but is not hook-driven. |
|
|
223
|
+
| **`max_consecutive_errors`** | Different scope — tool execution errors, not LLM API errors. |
|
|
224
|
+
|
|
225
|
+
## Open questions
|
|
226
|
+
|
|
227
|
+
1. **Should retry config be a `ConfigValue` (callable)?** Probably not — retry
|
|
228
|
+
config rarely needs to change dynamically. Keep it simple.
|
|
229
|
+
|
|
230
|
+
2. **Jitter?** Exponential backoff with jitter is best practice to avoid
|
|
231
|
+
thundering herd. Add random jitter (±25%) to the delay.
|
|
232
|
+
|
|
233
|
+
3. **Retry-After header?** Some providers return a `Retry-After` header with
|
|
234
|
+
429 responses. Should we parse it? The SDKs might already handle this in
|
|
235
|
+
their built-in retries, so by the time it reaches us, there's no header.
|
|
236
|
+
Low priority.
|
|
237
|
+
|
|
238
|
+
4. **Per-provider retry?** Some providers are more aggressive with throttling
|
|
239
|
+
(Google Vertex with short bursts). Should retry config be per-provider?
|
|
240
|
+
Probably not — keep one config, the user can tune it.
|
|
241
|
+
|
|
242
|
+
5. **Default: on or off?** Should retry be enabled by default? Yes — the
|
|
243
|
+
default `RetryConfig()` should retry with sensible defaults. Users who want
|
|
244
|
+
fail-fast can set `max_attempts=1`.
|
|
@@ -13,11 +13,11 @@ Real LLM pricing is significantly more complex.
|
|
|
13
13
|
|
|
14
14
|
The same model costs differently depending on how you access it:
|
|
15
15
|
|
|
16
|
-
| Model
|
|
17
|
-
|
|
18
|
-
| Claude Sonnet 4.6 | $3/$15
|
|
19
|
-
| GPT-4o
|
|
20
|
-
| Gemini 2.5 Pro
|
|
16
|
+
| Model | Direct API | Vertex AI | Bedrock | Azure |
|
|
17
|
+
|-------------------|--------------|------------|-----------|-----------|
|
|
18
|
+
| Claude Sonnet 4.6 | $3/$15 | different | different | N/A |
|
|
19
|
+
| GPT-4o | $2.50/$10 | N/A | N/A | different |
|
|
20
|
+
| Gemini 2.5 Pro | N/A | $1.25/$10 | N/A | N/A |
|
|
21
21
|
|
|
22
22
|
litellm tracks this — each model has separate entries per provider:
|
|
23
23
|
- `claude-sonnet-4-6` (litellm_provider: "anthropic")
|
|
@@ -26,6 +26,19 @@
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
|
|
29
|
+
## Built-in LLM retry with backoff
|
|
30
|
+
|
|
31
|
+
- **Automatic retry with exponential backoff for transient LLM errors (429, 5xx, timeouts).**
|
|
32
|
+
The tool loop is a "batteries included" building block — retry on transient provider errors
|
|
33
|
+
should be built in, not require a wrapper agent. Add `RetryConfig` to `ToolLoopConfig`
|
|
34
|
+
(max_attempts, initial_delay, max_delay, backoff_factor, custom retryable predicate).
|
|
35
|
+
Wrap the LLM call in `ToolLoopAgent.call_llm()` with a retry loop. Use a generic
|
|
36
|
+
`is_retryable()` check (status codes + exception name patterns) to avoid importing
|
|
37
|
+
provider SDKs. Respect interrupt tokens during backoff sleep. Emit observability events
|
|
38
|
+
on retry. Start with retry on initial connection errors only (not mid-stream failures).
|
|
39
|
+
See `docs/research/llm_retry_backoff.md`.
|
|
40
|
+
|
|
41
|
+
|
|
29
42
|
## Documentation benchmark suite
|
|
30
43
|
|
|
31
44
|
- A series of tasks given to a coding agent that has access only to documentation
|
|
@@ -65,8 +65,8 @@ def _resolve_experiment_id(experiment_name: str) -> str:
|
|
|
65
65
|
class _MlflowTracing:
|
|
66
66
|
__slots__ = ()
|
|
67
67
|
|
|
68
|
+
@staticmethod
|
|
68
69
|
def install(
|
|
69
|
-
self,
|
|
70
70
|
runtime: AgentRuntime,
|
|
71
71
|
*,
|
|
72
72
|
experiment_name: str | None = None,
|
|
@@ -347,8 +347,11 @@ def _format_chat_inputs(req: LLMRequest) -> dict[str, Any]:
|
|
|
347
347
|
inputs: dict[str, Any] = {"model": req.model}
|
|
348
348
|
if req.temperature is not None:
|
|
349
349
|
inputs["temperature"] = req.temperature
|
|
350
|
+
if req.top_p is not None:
|
|
351
|
+
inputs["top_p"] = req.top_p
|
|
350
352
|
if req.max_tokens is not None:
|
|
351
353
|
inputs["max_tokens"] = req.max_tokens
|
|
354
|
+
inputs.update(req.additional_config)
|
|
352
355
|
|
|
353
356
|
messages: list[dict[str, Any]] = []
|
|
354
357
|
for msg in req.system:
|
|
@@ -200,8 +200,12 @@ def _make_llm_handler(
|
|
|
200
200
|
}
|
|
201
201
|
if span.request.temperature is not None:
|
|
202
202
|
attrs["gen_ai.request.temperature"] = span.request.temperature
|
|
203
|
+
if span.request.top_p is not None:
|
|
204
|
+
attrs["gen_ai.request.top_p"] = span.request.top_p
|
|
203
205
|
if span.request.max_tokens is not None:
|
|
204
206
|
attrs["gen_ai.request.max_tokens"] = span.request.max_tokens
|
|
207
|
+
for key, val in span.request.additional_config.items():
|
|
208
|
+
attrs[f"gen_ai.request.{key}"] = val if isinstance(val, str | int | float | bool) else str(val)
|
|
205
209
|
|
|
206
210
|
otel_span = _start_span(otel_parent, tracer, f"chat {model_name}", kind=SpanKind.CLIENT, attributes=attrs)
|
|
207
211
|
otel_parent.set(otel_span)
|
|
@@ -36,6 +36,7 @@ class LLMCallAgent(Agent[LLMCallSpec, LLMCallResult]):
|
|
|
36
36
|
system=list(spec.system),
|
|
37
37
|
messages=list(spec.messages),
|
|
38
38
|
temperature=llm_config.temperature,
|
|
39
|
+
top_p=llm_config.top_p,
|
|
39
40
|
max_tokens=llm_config.max_tokens,
|
|
40
41
|
stop_sequences=llm_config.stop_sequences,
|
|
41
42
|
additional_config=llm_config.additional_config,
|
|
@@ -8,6 +8,7 @@ __all__ = ["LLMConfig"]
|
|
|
8
8
|
class LLMConfig:
|
|
9
9
|
model: str
|
|
10
10
|
temperature: float | None = None
|
|
11
|
+
top_p: float | None = None
|
|
11
12
|
max_tokens: int | None = None
|
|
12
13
|
stop_sequences: list[str] | None = None
|
|
13
14
|
additional_config: dict[str, Any] = dataclasses.field(default_factory=dict)
|
|
@@ -160,6 +160,7 @@ class ToolLoopAgent(Agent[ToolLoopSpec, ToolLoopResult]):
|
|
|
160
160
|
tools=tools,
|
|
161
161
|
json_schema=json_schema,
|
|
162
162
|
temperature=llm_config.temperature,
|
|
163
|
+
top_p=llm_config.top_p,
|
|
163
164
|
max_tokens=llm_config.max_tokens,
|
|
164
165
|
stop_sequences=llm_config.stop_sequences,
|
|
165
166
|
additional_config=llm_config.additional_config,
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Universal LLM pricing — JSON-backed registry with per-provider cost estimation."""
|
|
2
|
+
|
|
3
|
+
from ..response import CostBreakdown
|
|
4
|
+
from .registry import ModelPricing, PricingRegistry
|
|
5
|
+
|
|
6
|
+
__all__ = ["CostBreakdown", "ModelPricing", "PricingRegistry", "estimate_cost", "get_registry"]
|
|
7
|
+
|
|
8
|
+
_default: PricingRegistry | None = None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_registry() -> PricingRegistry:
|
|
12
|
+
"""Return the default :class:`PricingRegistry` (lazy-loaded singleton)."""
|
|
13
|
+
global _default
|
|
14
|
+
if _default is None:
|
|
15
|
+
_default = PricingRegistry.load_default()
|
|
16
|
+
return _default
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def estimate_cost(
|
|
20
|
+
model: str,
|
|
21
|
+
*,
|
|
22
|
+
provider: str,
|
|
23
|
+
input_tokens: int,
|
|
24
|
+
output_tokens: int,
|
|
25
|
+
cache_read_tokens: int = 0,
|
|
26
|
+
cache_creation_tokens: int = 0,
|
|
27
|
+
cache_creation_1h_tokens: int = 0,
|
|
28
|
+
reasoning_tokens: int = 0,
|
|
29
|
+
) -> CostBreakdown | None:
|
|
30
|
+
"""Convenience wrapper around :meth:`PricingRegistry.estimate_cost`."""
|
|
31
|
+
return get_registry().estimate_cost(
|
|
32
|
+
model,
|
|
33
|
+
provider=provider,
|
|
34
|
+
input_tokens=input_tokens,
|
|
35
|
+
output_tokens=output_tokens,
|
|
36
|
+
cache_read_tokens=cache_read_tokens,
|
|
37
|
+
cache_creation_tokens=cache_creation_tokens,
|
|
38
|
+
cache_creation_1h_tokens=cache_creation_1h_tokens,
|
|
39
|
+
reasoning_tokens=reasoning_tokens,
|
|
40
|
+
)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"anthropic": {
|
|
3
|
+
"claude-sonnet-4-6": {
|
|
4
|
+
"cache_creation_1h": 6.0
|
|
5
|
+
},
|
|
6
|
+
"claude-sonnet-4-5": {
|
|
7
|
+
"cache_creation_1h": 6.0
|
|
8
|
+
},
|
|
9
|
+
"claude-sonnet-4": {
|
|
10
|
+
"cache_creation_1h": 6.0
|
|
11
|
+
},
|
|
12
|
+
"claude-sonnet-3-7": {
|
|
13
|
+
"input": 3.0,
|
|
14
|
+
"output": 15.0,
|
|
15
|
+
"cache_read": 0.3,
|
|
16
|
+
"cache_creation": 3.75,
|
|
17
|
+
"cache_creation_1h": 6.0
|
|
18
|
+
},
|
|
19
|
+
"claude-haiku-3-5": {
|
|
20
|
+
"input": 0.8,
|
|
21
|
+
"output": 4.0,
|
|
22
|
+
"cache_read": 0.08,
|
|
23
|
+
"cache_creation": 1.0,
|
|
24
|
+
"cache_creation_1h": 1.6
|
|
25
|
+
},
|
|
26
|
+
"claude-haiku-3": {
|
|
27
|
+
"input": 0.25,
|
|
28
|
+
"output": 1.25,
|
|
29
|
+
"cache_read": 0.03,
|
|
30
|
+
"cache_creation": 0.3,
|
|
31
|
+
"cache_creation_1h": 0.5
|
|
32
|
+
},
|
|
33
|
+
"claude-opus-3": {
|
|
34
|
+
"input": 15.0,
|
|
35
|
+
"output": 75.0,
|
|
36
|
+
"cache_read": 1.5,
|
|
37
|
+
"cache_creation": 18.75,
|
|
38
|
+
"cache_creation_1h": 30.0
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
"openai": {
|
|
42
|
+
"mercury-coder": {
|
|
43
|
+
"input": 0.25,
|
|
44
|
+
"output": 1.0
|
|
45
|
+
},
|
|
46
|
+
"mercury-2": {
|
|
47
|
+
"input": 0.25,
|
|
48
|
+
"output": 0.75,
|
|
49
|
+
"cache_read": 0.025
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|