flowra 0.0.1.dev2__tar.gz → 0.0.2.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.github/workflows/publish.yml +1 -1
- flowra-0.0.2.dev5/CHANGELOG.md +31 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/CLAUDE.md +8 -3
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/PKG-INFO +7 -2
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/README.md +6 -1
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/context7.json +16 -7
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/agent.md +22 -1
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/architecture.md +3 -2
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/lib.md +23 -11
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/llm.md +99 -8
- flowra-0.0.2.dev5/docs/research/strands_comparison.md +46 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/console_chat.py +10 -1
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/llm_routing.py +13 -3
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/menu_agent.py +1 -1
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/menu_agent_class.py +1 -1
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/model_registry.py +5 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/tui_chat.py +130 -54
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/__init__.py +2 -0
- flowra-0.0.2.dev5/flowra/agent/interrupt_helpers.py +50 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/__init__.py +8 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/agent.py +32 -3
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/context.py +1 -1
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/hook_executor.py +32 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/hooks.py +47 -1
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/__init__.py +5 -0
- flowra-0.0.2.dev5/flowra/llm/provider.py +19 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/providers/__init__.py +3 -2
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/providers/anthropic_vertex.py +56 -12
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/providers/google_vertex.py +93 -14
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/providers/openai.py +19 -1
- flowra-0.0.2.dev5/flowra/llm/stream.py +28 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/engine.py +2 -2
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/version.py +1 -1
- flowra-0.0.2.dev5/tests/agent/test_with_interrupt.py +170 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/test_tool_loop_agent.py +303 -1
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_anthropic_e2e.py +74 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_anthropic_vertex.py +83 -1
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_google_vertex.py +94 -8
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_google_vertex_e2e.py +22 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_openai_e2e.py +26 -0
- flowra-0.0.2.dev5/tests/llm/test_stream.py +37 -0
- flowra-0.0.1.dev2/CHANGELOG.md +0 -21
- flowra-0.0.1.dev2/flowra/llm/provider.py +0 -13
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.claude/commands/update-pricing.md +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.env.example +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.github/workflows/master.yml +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.github/workflows/pull_request.yml +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.github/workflows/pull_request_e2e.yml +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.gitignore +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.python-version +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/LICENSE +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/Makefile +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_plan.md +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step1_structure.md +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step2_code_style.md +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step3_documentation.md +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step4_doc_readability.md +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step5_doc_audit.md +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step6_tests.md +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/runtime.md +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/todo.md +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/tools.md +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/app_agent.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/llm_logging.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/system_prompt.txt +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/tools/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/tools/calculator.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/tools/random_numbers.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/tools/switch_model.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/agent.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/agent_def.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/agent_registry.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/agent_store.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/compile.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/interrupt_token.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/service_locator.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/step_decorator.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/stored_values.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/agent.py +4 -4
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/config.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/hook_executor.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/hooks.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/spec.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/config_value.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/llm_config.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/_tool_call_agent.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/cache.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/config.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/spec.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/_base.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/blocks.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/messages.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/pricing/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/pricing/anthropic.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/pricing/google.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/pricing/openai.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/request.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/response.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/schema_formatting.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/schema_validation.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/tools.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/py.typed +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/_sealed_scope.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/execution.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/interrupt.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/runtime.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/runtime_scope.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/serialization.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/storage/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/storage/file.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/storage/in_memory.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/storage/session_storage.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/local_tool.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/mcp_connection.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/tool_group.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/tool_registry.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/types.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/pyproject.toml +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_agent.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_agent_def.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_agent_registry.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_compile.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_step_ref.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_values.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/test_chat_agent.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/test_config_value.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/test_tool_call_agent.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/tool_loop/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/tool_loop/test_cache.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/pricing/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/pricing/test_anthropic.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/pricing/test_google.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/pricing/test_openai.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_openai_provider.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/test_metadata.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/test_response.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/test_schema_formatting.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/test_schema_validation.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/storage/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/storage/test_file.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/storage/test_in_memory.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_engine.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_interrupt.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_persistence.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_runtime.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_scope.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_serialization.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/tools/__init__.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/tools/test_local_tool.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/tools/test_mcp_connection.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/tools/test_tool_group.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/tools/test_tool_registry.py +0 -0
- {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/uv.lock +0 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **Streaming**: `LLMProvider.stream()` method returns `AsyncIterator[StreamEvent]` with `TextDelta`, `ThinkingDelta`, and `ContentComplete` events. All three built-in providers implement real-time streaming. Default fallback calls `call()` and yields `ContentComplete`.
|
|
12
|
+
- **Anthropic thinking**: `AnthropicVertexAdditionalConfig` with `thinking_budget_tokens` enables extended thinking on Claude models. Thinking blocks are now parsed from Anthropic responses (`ThinkingBlock`).
|
|
13
|
+
- **Streaming hooks**: `on_text_delta` and `on_thinking_delta` hooks in `ToolLoopHooks`. When set, the agent automatically uses `stream()` instead of `call()`. Streaming respects `InterruptToken` — exits immediately even if the LLM is blocked.
|
|
14
|
+
- **`with_interrupt`**: Generic async iterator wrapper that races `__anext__()` against `InterruptToken.wait()` via `asyncio.wait(FIRST_COMPLETED)`. Used internally by `ToolLoopAgent` for streaming; available as `from flowra.agent import with_interrupt`.
|
|
15
|
+
- **Thinking model entry**: `anthropic/sonnet-4-5-think` in example model registry with 4000 token thinking budget.
|
|
16
|
+
- **Console/TUI streaming**: `--stream` flag for `console_chat.py` and `tui_chat.py` examples.
|
|
17
|
+
|
|
18
|
+
## [0.0.1] - 2026-03-07
|
|
19
|
+
|
|
20
|
+
Initial release.
|
|
21
|
+
|
|
22
|
+
### Added
|
|
23
|
+
- State machine agents with `@step` methods, `Goto`, `Spawn`, and stored values (`Scalar`, `AppendOnlyList`)
|
|
24
|
+
- Provider-agnostic LLM abstraction (`LLMProvider`, `LLMRequest`, `LLMResponse`)
|
|
25
|
+
- LLM providers: `AnthropicVertexProvider`, `GoogleVertexProvider`, `OpenAIProvider`
|
|
26
|
+
- Tool integration: `@tool` decorator, MCP server support, DI into tool handlers
|
|
27
|
+
- Execution engine with persistence, crash recovery, and cooperative interrupts
|
|
28
|
+
- Pre-built agents: `ChatAgent` (multi-turn chat) and `ToolLoopAgent` (tool loop with hooks)
|
|
29
|
+
- `ChatHooks` with `on_save_turn_messages` for transient message filtering
|
|
30
|
+
- Optional provider dependencies via extras: `flowra[anthropic]`, `flowra[openai]`, `flowra[google]`, `flowra[all]`
|
|
31
|
+
- Python 3.12, 3.13, 3.14 support
|
|
@@ -30,12 +30,13 @@ Python 3.12+ library. Package manager: **uv**. All config in `pyproject.toml`.
|
|
|
30
30
|
|
|
31
31
|
Provider-agnostic interface for calling LLMs:
|
|
32
32
|
|
|
33
|
-
- `LLMProvider` (abc) —
|
|
34
|
-
- `LLMRequest` — model, messages, tools, json_schema, temperature, max_tokens, stop_sequences
|
|
33
|
+
- `LLMProvider` (abc) — `async call(LLMRequest) -> LLMResponse` and `async stream(LLMRequest) -> AsyncIterator[StreamEvent]`
|
|
34
|
+
- `LLMRequest` — model, messages, tools, json_schema, temperature, max_tokens, stop_sequences, additional_config, max_schema_retries
|
|
35
35
|
- `LLMResponse` — message (AssistantMessage), stop_reason, usage
|
|
36
|
+
- `StreamEvent` = `TextDelta | ThinkingDelta | ContentComplete` — stream events for real-time token delivery
|
|
36
37
|
- `Usage` — input_tokens, output_tokens, cache_read_input_tokens, cache_creation_input_tokens, cost_usd. Token contract: `input_tokens` excludes cached tokens
|
|
37
38
|
- Messages: `SystemMessage`, `UserMessage`, `AssistantMessage` — system messages must be at the beginning of the messages list
|
|
38
|
-
- Blocks: `TextBlock` (with `cache: bool` for prompt caching), `ImageBlock`, `ToolUseBlock`, `ToolResultBlock`
|
|
39
|
+
- Blocks: `TextBlock` (with `cache: bool` for prompt caching), `ImageBlock`, `ToolUseBlock`, `ToolResultBlock`, `ThinkingBlock`
|
|
39
40
|
- `Tool` — name, description, input_schema, output_schema, cache
|
|
40
41
|
|
|
41
42
|
Providers live in `flowra/llm/providers/`. Currently: `AnthropicVertexProvider`, `OpenAIProvider`, `GoogleVertexProvider`.
|
|
@@ -56,3 +57,7 @@ Review prompts live in `docs/review_prompts/`:
|
|
|
56
57
|
### Tests
|
|
57
58
|
|
|
58
59
|
Test directory structure mirrors `flowra/`. E2E tests use `_e2e` suffix (e.g., `test_anthropic_e2e.py`). Environment variables loaded from `.env` via Makefile.
|
|
60
|
+
|
|
61
|
+
## Maintenance
|
|
62
|
+
|
|
63
|
+
- **`context7.json`** — project description for [Context7](https://context7.com). Must be updated when adding new features, changing public APIs, or modifying architecture. Keep rules in sync with actual capabilities.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flowra
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.2.dev5
|
|
4
4
|
Summary: Flowra — flow infrastructure for building stateful LLM agents
|
|
5
5
|
Project-URL: Repository, https://github.com/anna-money/flowra
|
|
6
6
|
Project-URL: Changelog, https://github.com/anna-money/flowra/blob/master/CHANGELOG.md
|
|
@@ -33,6 +33,11 @@ Description-Content-Type: text/markdown
|
|
|
33
33
|
|
|
34
34
|
# Flowra
|
|
35
35
|
|
|
36
|
+
[](https://pypi.org/project/flowra/)
|
|
37
|
+
[](https://pypi.org/project/flowra/)
|
|
38
|
+
[](https://github.com/anna-money/flowra/blob/master/LICENSE)
|
|
39
|
+
[](https://github.com/anna-money/flowra/actions/workflows/master.yml)
|
|
40
|
+
|
|
36
41
|
**Flow infra** for building stateful, persistent LLM agents with tool use,
|
|
37
42
|
parallel execution, and crash recovery. Requires Python 3.12+.
|
|
38
43
|
|
|
@@ -45,7 +50,7 @@ parallel execution, and crash recovery. Requires Python 3.12+.
|
|
|
45
50
|
- **Tool integration** — `@tool` decorator for local functions, MCP server support,
|
|
46
51
|
DI into tool handlers
|
|
47
52
|
- **LLM abstraction** — provider-agnostic `LLMProvider` interface with immutable
|
|
48
|
-
message types (ships `AnthropicVertexProvider`, `GoogleVertexProvider`, `OpenAIProvider`)
|
|
53
|
+
message types and real-time streaming (ships `AnthropicVertexProvider`, `GoogleVertexProvider`, `OpenAIProvider`)
|
|
49
54
|
- **Cooperative interrupts** — `InterruptToken` for graceful cancellation across
|
|
50
55
|
the entire execution tree
|
|
51
56
|
- **Pre-built agents** — `ChatAgent` (multi-turn chat with session history) and
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# Flowra
|
|
2
2
|
|
|
3
|
+
[](https://pypi.org/project/flowra/)
|
|
4
|
+
[](https://pypi.org/project/flowra/)
|
|
5
|
+
[](https://github.com/anna-money/flowra/blob/master/LICENSE)
|
|
6
|
+
[](https://github.com/anna-money/flowra/actions/workflows/master.yml)
|
|
7
|
+
|
|
3
8
|
**Flow infra** for building stateful, persistent LLM agents with tool use,
|
|
4
9
|
parallel execution, and crash recovery. Requires Python 3.12+.
|
|
5
10
|
|
|
@@ -12,7 +17,7 @@ parallel execution, and crash recovery. Requires Python 3.12+.
|
|
|
12
17
|
- **Tool integration** — `@tool` decorator for local functions, MCP server support,
|
|
13
18
|
DI into tool handlers
|
|
14
19
|
- **LLM abstraction** — provider-agnostic `LLMProvider` interface with immutable
|
|
15
|
-
message types (ships `AnthropicVertexProvider`, `GoogleVertexProvider`, `OpenAIProvider`)
|
|
20
|
+
message types and real-time streaming (ships `AnthropicVertexProvider`, `GoogleVertexProvider`, `OpenAIProvider`)
|
|
16
21
|
- **Cooperative interrupts** — `InterruptToken` for graceful cancellation across
|
|
17
22
|
the entire execution tree
|
|
18
23
|
- **Pre-built agents** — `ChatAgent` (multi-turn chat with session history) and
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://context7.com/schema/context7.json",
|
|
3
3
|
"projectTitle": "flowra",
|
|
4
|
-
"description": "Flow infrastructure for building stateful, persistent LLM agents with tool use, parallel execution, and
|
|
4
|
+
"description": "Flow infrastructure for building stateful, persistent LLM agents with tool use, parallel execution, crash recovery, and real-time streaming. Requires Python 3.12+. Features state machine agents with @step methods, persistent state (Scalar, AppendOnlyList) with dirty tracking, tool integration (@tool decorator + MCP), provider-agnostic LLM abstraction with streaming support, extended thinking (Anthropic, Google), cooperative interrupts, and pre-built ChatAgent/ToolLoopAgent.",
|
|
5
5
|
"folders": ["flowra", "examples"],
|
|
6
6
|
"excludeFolders": ["tests", ".github", "logs", ".chat_sessions"],
|
|
7
7
|
"excludeFiles": [],
|
|
@@ -11,15 +11,22 @@
|
|
|
11
11
|
"PACKAGE STRUCTURE: flowra/llm/ (LLM abstraction), flowra/tools/ (tool system), flowra/agent/ (state machine framework), flowra/runtime/ (execution engine), flowra/lib/ (pre-built agents)",
|
|
12
12
|
"Dependency graph: llm (no deps) -> tools (llm) -> agent (no deps) -> runtime (agent, llm) -> lib (agent, llm, tools, runtime). No circular dependencies",
|
|
13
13
|
|
|
14
|
-
"LLM ABSTRACTION: LLMProvider is the core interface —
|
|
15
|
-
"
|
|
14
|
+
"LLM ABSTRACTION: LLMProvider is the core interface — two methods: async call(LLMRequest) -> LLMResponse and async stream(LLMRequest) -> AsyncIterator[StreamEvent]",
|
|
15
|
+
"stream() returns StreamEvent = TextDelta | ThinkingDelta | ContentComplete. TextDelta/ThinkingDelta carry incremental text; ContentComplete is always last and contains the full LLMResponse",
|
|
16
|
+
"Default stream() implementation calls call() and yields a single ContentComplete — providers override for real-time streaming",
|
|
17
|
+
"LLMRequest contains: model, messages, tools, json_schema, temperature, max_tokens, stop_sequences, additional_config, max_schema_retries",
|
|
16
18
|
"LLMResponse contains: message (AssistantMessage), stop_reason (StopReason), usage (Usage)",
|
|
17
19
|
"Usage contains: input_tokens, output_tokens, cache_read_input_tokens, cache_creation_input_tokens, cost_usd. Token contract: input_tokens excludes cached tokens",
|
|
18
20
|
"Messages: SystemMessage, UserMessage, AssistantMessage. System messages must be at the beginning of the messages list",
|
|
19
21
|
"Blocks: TextBlock (with cache: bool for prompt caching), ImageBlock, ToolUseBlock, ToolResultBlock, ThinkingBlock",
|
|
22
|
+
"ThinkingBlock holds reasoning/thinking text from models with extended thinking (Anthropic Claude, Google Gemini). Not sent back to the API",
|
|
20
23
|
"Tool definition for LLM: Tool(name, description, input_schema, output_schema, cache)",
|
|
21
|
-
"Three built-in providers: AnthropicVertexProvider (Claude via Vertex AI), OpenAIProvider (OpenAI-compatible APIs), GoogleVertexProvider (Gemini via Vertex AI)",
|
|
24
|
+
"Three built-in providers: AnthropicVertexProvider (Claude via Vertex AI), OpenAIProvider (OpenAI-compatible APIs), GoogleVertexProvider (Gemini via Vertex AI). All three implement stream() with real-time deltas",
|
|
22
25
|
"Import providers: from flowra.llm.providers.anthropic_vertex import AnthropicVertexProvider, from flowra.llm.providers.openai import OpenAIProvider, from flowra.llm.providers.google_vertex import GoogleVertexProvider",
|
|
26
|
+
"Import stream types: from flowra.llm import TextDelta, ThinkingDelta, ContentComplete",
|
|
27
|
+
"AnthropicVertexAdditionalConfig(thinking_budget_tokens: int) enables extended thinking on Claude. Pass via additional_config={'thinking_budget_tokens': 4000}",
|
|
28
|
+
"GoogleVertexAdditionalConfig(thinking_level: ThinkingLevel, thinking_budget: int) configures Gemini thinking. Pass via additional_config={'thinking_level': 'medium'} or {'thinking_budget': 4096}",
|
|
29
|
+
"AnthropicVertexProvider falls back to non-streaming when json_schema is set (retry loop requires full responses)",
|
|
23
30
|
|
|
24
31
|
"TOOL SYSTEM: @tool decorator turns a Python function into a tool definition",
|
|
25
32
|
"get_local_tool(func) wraps a @tool-decorated function into a LocalTool",
|
|
@@ -49,6 +56,7 @@
|
|
|
49
56
|
|
|
50
57
|
"COOPERATIVE INTERRUPTS: InterruptToken for graceful cancellation across the entire execution tree",
|
|
51
58
|
"InterruptTokenSource creates tokens: source = InterruptTokenSource(); token = source.token; source.interrupt()",
|
|
59
|
+
"with_interrupt(ait, token) wraps any AsyncIterator so it exits immediately when token fires — races __anext__() vs token.wait() via asyncio.wait. Import: from flowra.agent import with_interrupt",
|
|
52
60
|
|
|
53
61
|
"PRE-BUILT AGENTS — ChatAgent: multi-turn chat with session history persistence",
|
|
54
62
|
"ChatAgent usage: runtime.run(agent=ChatAgent, step=ChatAgent.process_message, spec=ChatSpec(user_message=text))",
|
|
@@ -61,19 +69,20 @@
|
|
|
61
69
|
"PRE-BUILT AGENTS — ToolLoopAgent: single-turn LLM tool loop with hooks and caching",
|
|
62
70
|
"ToolLoopAgent sends messages to LLM, executes tool calls, feeds results back, repeats until done",
|
|
63
71
|
"ToolLoopConfig configures ToolLoopAgent: ToolLoopConfig(llm_config=LLMConfig(model='...'), cache_config=CacheConfig(...))",
|
|
64
|
-
"ToolLoopHooks provides lifecycle callbacks: on_user_message, on_start_iteration, on_before_llm_call,
|
|
72
|
+
"ToolLoopHooks provides lifecycle callbacks: on_user_message, on_message_accepted, on_start_iteration, on_before_llm_call, on_text_delta, on_thinking_delta, on_after_llm_call, on_text_reasoning, on_thinking, on_result_message, on_before_tool_call, on_after_tool_call",
|
|
73
|
+
"When on_text_delta or on_thinking_delta hooks are set, ToolLoopAgent automatically uses provider.stream() instead of provider.call()",
|
|
65
74
|
|
|
66
75
|
"CACHING: CacheConfig(system_prompt, tools, messages) controls prompt caching strategies",
|
|
67
76
|
"Predefined configs: CACHE_ALL, CACHE_SESSION, CACHE_MANUAL, NO_CACHE",
|
|
68
77
|
"Cache strategies: cache_last_system_prompt, cache_last_tool, cache_last_message, cache_last_session_message, no_cache_*",
|
|
69
78
|
|
|
70
|
-
"CONFIG: LLMConfig(model, temperature, max_tokens, stop_sequences) configures LLM calls",
|
|
79
|
+
"CONFIG: LLMConfig(model, temperature, max_tokens, stop_sequences, additional_config) configures LLM calls",
|
|
71
80
|
"ConfigValue[T] wraps static or dynamic (callable) config values: ConfigValue[str] | ConfigValue[Callable[[], str]]",
|
|
72
81
|
|
|
73
82
|
"QUICK START: Create provider -> create ToolRegistry -> create Config -> create AgentRuntime -> runtime.run()",
|
|
74
83
|
"Import ChatAgent: from flowra.lib.chat import ChatAgent, ChatConfig, ChatHooks, ChatResult, ChatSpec",
|
|
75
84
|
"Import LLMConfig: from flowra.lib import LLMConfig",
|
|
76
|
-
"Import LLM types: from flowra.llm import LLMProvider, SystemMessage, TextBlock, Usage",
|
|
85
|
+
"Import LLM types: from flowra.llm import LLMProvider, SystemMessage, TextBlock, Usage, TextDelta, ThinkingDelta, ContentComplete",
|
|
77
86
|
"Import runtime: from flowra.runtime import AgentRuntime, FileSessionStorage",
|
|
78
87
|
"Import tools: from flowra.tools import ToolRegistry, get_local_tool, tool"
|
|
79
88
|
],
|
|
@@ -11,7 +11,8 @@ flowra/agent/
|
|
|
11
11
|
├── agent_def.py # Control flow (Goto, Call, Spawn), type aliases, resolve functions
|
|
12
12
|
├── agent_store.py # AgentStore (ABC) — flush interface
|
|
13
13
|
├── service_locator.py # ServiceLocator (ABC) — service provision and access
|
|
14
|
-
├── interrupt_token.py
|
|
14
|
+
├── interrupt_token.py # InterruptToken (ABC) — cooperative interrupt interface
|
|
15
|
+
├── interrupt_helpers.py # with_interrupt() — race async iterators against InterruptToken
|
|
15
16
|
├── agent_registry.py # AgentRegistry — hierarchical agent name/type resolution
|
|
16
17
|
├── stored_values.py # Scalar[T], AppendOnlyList[T], slot() — dirty-tracked state containers
|
|
17
18
|
└── compile.py # compile_agent() — introspection, slot discovery, type registry
|
|
@@ -543,6 +544,26 @@ class MyAgent(Agent):
|
|
|
543
544
|
# ... continue processing ...
|
|
544
545
|
```
|
|
545
546
|
|
|
547
|
+
### `with_interrupt` — racing async iterators
|
|
548
|
+
|
|
549
|
+
`with_interrupt` wraps any `AsyncIterator[T]` so it exits immediately when
|
|
550
|
+
the token fires — even if `__anext__()` is blocked on I/O:
|
|
551
|
+
|
|
552
|
+
```python
|
|
553
|
+
from flowra.agent import InterruptToken, with_interrupt
|
|
554
|
+
|
|
555
|
+
async def consume(stream: AsyncIterator[str], token: InterruptToken) -> list[str]:
|
|
556
|
+
items = []
|
|
557
|
+
async for item in with_interrupt(stream, token):
|
|
558
|
+
items.append(item)
|
|
559
|
+
return items # partial results if interrupted
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
On each iteration, `__anext__()` and `token.wait()` are raced via
|
|
563
|
+
`asyncio.wait(FIRST_COMPLETED)`. If the token wins, the underlying iterator
|
|
564
|
+
is closed (`aclose()`) and the wrapper ends. This is used internally by
|
|
565
|
+
`ToolLoopAgent` to interrupt LLM streaming immediately.
|
|
566
|
+
|
|
546
567
|
## Dependency injection
|
|
547
568
|
|
|
548
569
|
### Constructor injection
|
|
@@ -20,7 +20,8 @@ No circular dependencies.
|
|
|
20
20
|
### `llm` — LLM abstraction
|
|
21
21
|
|
|
22
22
|
Protocol layer between the SDK and any LLM. The core abstraction is `LLMProvider` —
|
|
23
|
-
|
|
23
|
+
`call(LLMRequest) → LLMResponse` for full responses, and `stream(LLMRequest) →
|
|
24
|
+
AsyncIterator[StreamEvent]` for real-time text/thinking deltas. Request and response use
|
|
24
25
|
a shared set of message and block types. Ships three providers:
|
|
25
26
|
`AnthropicVertexProvider` (Claude via Vertex AI), `OpenAIProvider` (OpenAI-compatible APIs),
|
|
26
27
|
`GoogleVertexProvider` (Gemini via Vertex AI). → [docs/llm.md](llm.md)
|
|
@@ -66,7 +67,7 @@ User message
|
|
|
66
67
|
│
|
|
67
68
|
▼
|
|
68
69
|
ChatAgent.process_message
|
|
69
|
-
│
|
|
70
|
+
│ Spawns ToolLoopAgent via Call inside Spawn
|
|
70
71
|
▼
|
|
71
72
|
ToolLoopAgent.start
|
|
72
73
|
│ Saves user message to turn messages
|
|
@@ -305,6 +305,8 @@ runtime = AgentRuntime(
|
|
|
305
305
|
| `on_before_llm_call` | `OnBeforeLLMCall \| OnBeforeLLMCallAsync \| None` | `None` |
|
|
306
306
|
| `on_after_llm_call` | `OnAfterLLMCall \| OnAfterLLMCallAsync \| None` | `None` |
|
|
307
307
|
| `on_result_message` | `OnResultMessage \| OnResultMessageAsync \| None` | `None` |
|
|
308
|
+
| `on_text_delta` | `OnTextDelta \| OnTextDeltaAsync \| None` | `None` |
|
|
309
|
+
| `on_thinking_delta` | `OnThinkingDelta \| OnThinkingDeltaAsync \| None` | `None` |
|
|
308
310
|
| `on_text_reasoning` | `OnTextReasoning \| OnTextReasoningAsync \| None` | `None` |
|
|
309
311
|
| `on_thinking` | `OnThinking \| OnThinkingAsync \| None` | `None` |
|
|
310
312
|
| `on_before_tool_call` | `OnBeforeToolCall \| OnBeforeToolCallAsync \| None` | `None` |
|
|
@@ -328,30 +330,37 @@ Hooks fire in this order during a single tool loop iteration:
|
|
|
328
330
|
4. **`on_before_llm_call`** — before each LLM request. Receives `LLMRequest` and context.
|
|
329
331
|
Observational only (no return value).
|
|
330
332
|
|
|
331
|
-
5. **`
|
|
333
|
+
5. **`on_text_delta`** / **`on_thinking_delta`** — when either hook is set, the agent
|
|
334
|
+
uses `provider.stream()` instead of `provider.call()`. `on_text_delta` fires for
|
|
335
|
+
each incremental text chunk; `on_thinking_delta` fires for each thinking chunk.
|
|
336
|
+
These fire **during** the LLM call, before `on_after_llm_call`. The stream is
|
|
337
|
+
wrapped with `with_interrupt`, so an `InterruptToken` signal exits immediately —
|
|
338
|
+
even if the LLM is slow to produce the next token.
|
|
339
|
+
|
|
340
|
+
6. **`on_after_llm_call`** — after each LLM response. Receives `LLMRequest`, `LLMResponse`,
|
|
332
341
|
and context. Observational only.
|
|
333
342
|
|
|
334
|
-
|
|
335
|
-
regardless of stop reason — useful for
|
|
343
|
+
7. **`on_text_reasoning`** — for each `TextBlock` in the assistant response. Fires
|
|
344
|
+
regardless of stop reason — useful for observing text output even when tool calls
|
|
336
345
|
are also present.
|
|
337
346
|
|
|
338
|
-
|
|
347
|
+
8. **`on_thinking`** — for each `ThinkingBlock` in the assistant response. Fires
|
|
339
348
|
for models with thinking/reasoning enabled (e.g. extended thinking).
|
|
340
349
|
|
|
341
350
|
Then the flow branches based on stop reason:
|
|
342
351
|
|
|
343
352
|
- **If `TOOL_USE`:**
|
|
344
353
|
|
|
345
|
-
|
|
354
|
+
9. **`on_before_tool_call`** — before each tool execution. Return `BeforeToolCallResult`
|
|
346
355
|
with `amended_tool_use` to modify tool parameters.
|
|
347
356
|
|
|
348
|
-
|
|
349
|
-
|
|
357
|
+
10. **`on_after_tool_call`** — after each tool execution. Return `AfterToolCallResult`
|
|
358
|
+
with `amended_result` and/or `additional_messages`.
|
|
350
359
|
|
|
351
360
|
- **If `END_TURN`:**
|
|
352
361
|
|
|
353
|
-
|
|
354
|
-
|
|
362
|
+
11. **`on_result_message`** — return `ResultMessageResult` with `continue_messages`
|
|
363
|
+
to force the loop to continue instead of finishing.
|
|
355
364
|
|
|
356
365
|
### Hook result types
|
|
357
366
|
|
|
@@ -515,6 +524,8 @@ OnMessageAccepted / OnMessageAcceptedAsync
|
|
|
515
524
|
OnStartIteration / OnStartIterationAsync
|
|
516
525
|
OnBeforeLLMCall / OnBeforeLLMCallAsync
|
|
517
526
|
OnAfterLLMCall / OnAfterLLMCallAsync
|
|
527
|
+
OnTextDelta / OnTextDeltaAsync
|
|
528
|
+
OnThinkingDelta / OnThinkingDeltaAsync
|
|
518
529
|
OnTextReasoning / OnTextReasoningAsync
|
|
519
530
|
OnThinking / OnThinkingAsync
|
|
520
531
|
OnResultMessage / OnResultMessageAsync
|
|
@@ -546,8 +557,9 @@ ChatAgent
|
|
|
546
557
|
1. `start` — accepts user message, runs `on_user_message` hook, appends to
|
|
547
558
|
`turn_messages`, flushes, fires `on_message_accepted`, then gotos `call_llm`.
|
|
548
559
|
2. `call_llm` — checks interrupt/finish/max_iterations, runs `on_start_iteration`,
|
|
549
|
-
builds `LLMRequest`, runs `on_before_llm_call`, calls LLM
|
|
550
|
-
|
|
560
|
+
builds `LLMRequest`, runs `on_before_llm_call`, calls LLM (streaming deltas via
|
|
561
|
+
`on_text_delta`/`on_thinking_delta` if set), runs `on_after_llm_call`,
|
|
562
|
+
`on_text_reasoning`, and `on_thinking`, then:
|
|
551
563
|
- `END_TURN` → runs `on_result_message`, returns `ToolLoopResult` (or continues
|
|
552
564
|
if `continue_messages` is non-empty)
|
|
553
565
|
- `TOOL_USE` → runs `on_before_tool_call` for each tool, spawns `ToolCallAgent`
|
|
@@ -11,7 +11,8 @@ flowra/llm/
|
|
|
11
11
|
├── tools.py # Tool
|
|
12
12
|
├── request.py # LLMRequest
|
|
13
13
|
├── response.py # LLMResponse, StopReason, Usage
|
|
14
|
-
├── provider.py # LLMProvider (abc)
|
|
14
|
+
├── provider.py # LLMProvider (abc) — call() + stream()
|
|
15
|
+
├── stream.py # StreamEvent, TextDelta, ThinkingDelta, ContentComplete
|
|
15
16
|
├── schema_formatting.py # JSON schema formatting for LLM prompts
|
|
16
17
|
├── schema_validation.py # JSON schema validation and markdown stripping
|
|
17
18
|
├── pricing/
|
|
@@ -137,8 +138,8 @@ ToolResultBlock(tool_use_id="toolu_123", content="Division by zero", is_error=Tr
|
|
|
137
138
|
### `ThinkingBlock`
|
|
138
139
|
|
|
139
140
|
Thinking/reasoning content from the LLM. Produced by models that support extended
|
|
140
|
-
thinking (e.g.
|
|
141
|
-
any provider.
|
|
141
|
+
thinking (e.g. Anthropic Claude with `thinking_budget_tokens`, Google Gemini with
|
|
142
|
+
thinking enabled). Not sent back to the API by any provider.
|
|
142
143
|
|
|
143
144
|
```python
|
|
144
145
|
# Usually not created manually — comes from AssistantMessage via LLM
|
|
@@ -346,12 +347,17 @@ if response.usage is not None:
|
|
|
346
347
|
|
|
347
348
|
### `LLMProvider`
|
|
348
349
|
|
|
349
|
-
Abstract base class for calling an LLM. Defines
|
|
350
|
+
Abstract base class for calling an LLM. Defines `call()` (abstract) and `stream()`
|
|
351
|
+
(optional, with a default fallback):
|
|
350
352
|
|
|
351
353
|
```python
|
|
352
354
|
class LLMProvider(abc.ABC):
|
|
353
355
|
@abc.abstractmethod
|
|
354
356
|
async def call(self, request: LLMRequest) -> LLMResponse: ...
|
|
357
|
+
|
|
358
|
+
async def stream(self, request: LLMRequest) -> AsyncIterator[StreamEvent]:
|
|
359
|
+
response = await self.call(request)
|
|
360
|
+
yield ContentComplete(response=response)
|
|
355
361
|
```
|
|
356
362
|
|
|
357
363
|
The provider is responsible for converting `LLMRequest` into the target API's format,
|
|
@@ -359,6 +365,39 @@ calling the API, and converting the response back to `LLMResponse`. If `json_sch
|
|
|
359
365
|
is set, the provider should also handle validation and retries (see
|
|
360
366
|
[Structured output](#structured-output-json-schema)).
|
|
361
367
|
|
|
368
|
+
#### Streaming
|
|
369
|
+
|
|
370
|
+
`stream()` returns an `AsyncIterator[StreamEvent]` that yields incremental events
|
|
371
|
+
as the LLM generates its response. The default implementation calls `call()` and
|
|
372
|
+
yields a single `ContentComplete` event — providers override this for real-time streaming.
|
|
373
|
+
|
|
374
|
+
**Stream events:**
|
|
375
|
+
|
|
376
|
+
| Event | Fields | Description |
|
|
377
|
+
|-------------------|---------------------|------------------------------------------|
|
|
378
|
+
| `TextDelta` | `text: str` | Incremental text content |
|
|
379
|
+
| `ThinkingDelta` | `text: str` | Incremental thinking/reasoning content |
|
|
380
|
+
| `ContentComplete` | `response: LLMResponse` | Always last — full response |
|
|
381
|
+
|
|
382
|
+
`ContentComplete` is always the final event and contains the same `LLMResponse` you
|
|
383
|
+
would get from `call()`.
|
|
384
|
+
|
|
385
|
+
```python
|
|
386
|
+
async for event in provider.stream(request):
|
|
387
|
+
match event:
|
|
388
|
+
case TextDelta(text=text):
|
|
389
|
+
print(text, end="", flush=True)
|
|
390
|
+
case ThinkingDelta(text=text):
|
|
391
|
+
print(f"[thinking] {text}", end="")
|
|
392
|
+
case ContentComplete(response=response):
|
|
393
|
+
print() # newline after streaming
|
|
394
|
+
# response.message, response.usage, etc. are available here
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
All three built-in providers implement `stream()` with real-time deltas.
|
|
398
|
+
`AnthropicVertexProvider` falls back to non-streaming when `json_schema` is set
|
|
399
|
+
(the retry loop requires full responses).
|
|
400
|
+
|
|
362
401
|
### `AnthropicVertexProvider`
|
|
363
402
|
|
|
364
403
|
Implementation for Claude via Vertex AI.
|
|
@@ -492,6 +531,42 @@ on failure. Used internally by `AnthropicVertexProvider` for structured output r
|
|
|
492
531
|
Internally, `strip_markdown_code_block(text)` removes surrounding markdown code fences
|
|
493
532
|
(`` ```...``` ``) before parsing. This is an implementation detail, not part of the public API.
|
|
494
533
|
|
|
534
|
+
#### `AnthropicVertexAdditionalConfig`
|
|
535
|
+
|
|
536
|
+
Provider-specific configuration passed via `LLMRequest.additional_config`:
|
|
537
|
+
|
|
538
|
+
| Field | Type | Default | Description |
|
|
539
|
+
|-------------------------|----------------|---------|-------------------------------------------------|
|
|
540
|
+
| `thinking_budget_tokens` | `int \| None` | `None` | Token budget for extended thinking (enables thinking mode) |
|
|
541
|
+
|
|
542
|
+
When `thinking_budget_tokens` is set, the provider passes `thinking: {"type": "enabled",
|
|
543
|
+
"budget_tokens": N}` to the API and forces `temperature=1` (Anthropic requirement for
|
|
544
|
+
thinking mode). The response will contain `ThinkingBlock` blocks with the model's
|
|
545
|
+
chain-of-thought reasoning.
|
|
546
|
+
|
|
547
|
+
```python
|
|
548
|
+
from flowra.llm.providers.anthropic_vertex import AnthropicVertexAdditionalConfig
|
|
549
|
+
|
|
550
|
+
response = await provider.call(
|
|
551
|
+
LLMRequest(
|
|
552
|
+
model="claude-sonnet-4-5@20250929",
|
|
553
|
+
messages=[UserMessage(blocks=[TextBlock(text="Solve this step by step...")])],
|
|
554
|
+
max_tokens=8192,
|
|
555
|
+
additional_config={"thinking_budget_tokens": 4000},
|
|
556
|
+
)
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
# response.message.blocks may contain ThinkingBlock + TextBlock
|
|
560
|
+
for block in response.message.blocks:
|
|
561
|
+
if isinstance(block, ThinkingBlock):
|
|
562
|
+
print(f"[thinking] {block.text}")
|
|
563
|
+
elif isinstance(block, TextBlock):
|
|
564
|
+
print(block.text)
|
|
565
|
+
```
|
|
566
|
+
|
|
567
|
+
**Note:** Streaming + `json_schema` is not supported with Anthropic — `stream()` falls
|
|
568
|
+
back to non-streaming in that case.
|
|
569
|
+
|
|
495
570
|
### `OpenAIProvider`
|
|
496
571
|
|
|
497
572
|
Implementation for OpenAI-compatible APIs (OpenAI, Inception AI, etc.).
|
|
@@ -573,21 +648,37 @@ removes `additionalProperties`, converts type arrays to `anyOf`).
|
|
|
573
648
|
|
|
574
649
|
Provider-specific configuration passed via `LLMRequest.additional_config`:
|
|
575
650
|
|
|
576
|
-
| Field
|
|
577
|
-
|
|
578
|
-
| `thinking_level`
|
|
651
|
+
| Field | Type | Default | Description |
|
|
652
|
+
|--------------------|-------------------------------------|---------|----------------------------------------------------------------|
|
|
653
|
+
| `thinking_level` | `genai_types.ThinkingLevel \| None` | `None` | Thinking level (MINIMAL, LOW, MEDIUM, HIGH) — for Gemini 3 |
|
|
654
|
+
| `thinking_budget` | `int \| None` | `None` | Token budget for thinking — for Gemini 2.5 (min 128 for Pro) |
|
|
655
|
+
|
|
656
|
+
Either field (or both) enables thinking mode. `thinking_level` controls reasoning
|
|
657
|
+
depth for Gemini 3 models. `thinking_budget` sets a token budget for Gemini 2.5
|
|
658
|
+
models (setting to 0 disables thinking on Flash; minimum 128 on Pro).
|
|
579
659
|
|
|
580
660
|
```python
|
|
581
661
|
from flowra.llm.providers.google_vertex import GoogleVertexAdditionalConfig
|
|
582
662
|
|
|
663
|
+
# Gemini 3 — thinking level
|
|
583
664
|
response = await provider.call(
|
|
584
665
|
LLMRequest(
|
|
585
|
-
model="gemini-
|
|
666
|
+
model="gemini-3-pro-preview",
|
|
586
667
|
messages=[UserMessage(blocks=[TextBlock(text="Solve this step by step...")])],
|
|
587
668
|
max_tokens=4096,
|
|
588
669
|
additional_config={"thinking_level": "medium"},
|
|
589
670
|
)
|
|
590
671
|
)
|
|
672
|
+
|
|
673
|
+
# Gemini 2.5 — thinking budget
|
|
674
|
+
response = await provider.call(
|
|
675
|
+
LLMRequest(
|
|
676
|
+
model="gemini-2.5-pro",
|
|
677
|
+
messages=[UserMessage(blocks=[TextBlock(text="Solve this step by step...")])],
|
|
678
|
+
max_tokens=4096,
|
|
679
|
+
additional_config={"thinking_budget": 4096},
|
|
680
|
+
)
|
|
681
|
+
)
|
|
591
682
|
```
|
|
592
683
|
|
|
593
684
|
### Adding a new provider
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Strands Agents SDK vs Flowra — Comparison (March 2026)
|
|
2
|
+
|
|
3
|
+
Research date: 2026-03-08
|
|
4
|
+
|
|
5
|
+
## What Strands has that Flowra doesn't
|
|
6
|
+
|
|
7
|
+
| Capability | Strands | Flowra | Priority |
|
|
8
|
+
|---|---|---|---|
|
|
9
|
+
| **Streaming** | Full event streaming — each agent step streams to client | `LLMProvider.stream()` with `TextDelta`/`ThinkingDelta`/`ContentComplete` events; `ToolLoopAgent` auto-switches when delta hooks are set | — (implemented) |
|
|
10
|
+
| **Observability (OpenTelemetry)** | Built-in traces, metrics, export to X-Ray/CloudWatch/Jaeger | No — only manual hooks in examples (`on_before_llm_call` etc.) | **High** — necessary for production |
|
|
11
|
+
| **Multi-agent patterns (Swarm, Graph, Workflow)** | Built-in coordinators: orchestrator-worker, peer swarm, DAG graph with auto-parallelization | `Spawn` (parallel children) and `Call` — but no ready-made abstractions like "agent graph" or "swarm" | Medium — our primitives allow building this, but nothing ready-made |
|
|
12
|
+
| **More providers out of the box** | Bedrock, Anthropic, OpenAI, Gemini, Ollama, LiteLLM, llama.cpp | Anthropic Vertex, OpenAI, Google Vertex | Medium — LiteLLM adapter would cover everything |
|
|
13
|
+
| **Guardrails** | Integration with Amazon Bedrock Guardrails — content filtering, topic blocking, PII protection | None | Medium — depends on use case |
|
|
14
|
+
| **A2A (Agent-to-Agent) protocol** | Agents communicate across processes/services via standard protocol | No — agents only within a single runtime | Low for now — relevant for distributed systems |
|
|
15
|
+
| **TypeScript SDK** | Yes (preview) | No | Low — we are a Python library |
|
|
16
|
+
| **Session management with external stores** | DynamoDB, Bedrock AgentCore Memory, custom | InMemory, File, custom (but no ready-made cloud DB adapters) | Medium |
|
|
17
|
+
| **"Agents as tools"** | An agent can be a tool of another agent directly | Requires manual wrapping | Low — `Call`/`Spawn` solve this differently |
|
|
18
|
+
|
|
19
|
+
## What Flowra has that Strands doesn't (or weaker)
|
|
20
|
+
|
|
21
|
+
| Capability | Flowra | Strands |
|
|
22
|
+
|---|---|---|
|
|
23
|
+
| **Crash recovery** | Full: persistence after each step, resume after crash | Session persistence exists, but step-level crash recovery — no |
|
|
24
|
+
| **Incremental dirty-tracking** | `Scalar[T]` and `AppendOnlyList[T]` save only changes | Saves state entirely |
|
|
25
|
+
| **State machine with compile-time checks** | `@step`, `Goto`, `Spawn`, `Call` — compiler checks slots and types at class definition time | Model-driven loop — no explicit state machine |
|
|
26
|
+
| **Cooperative interrupts** | `InterruptToken` propagates through entire execution tree | No equivalent |
|
|
27
|
+
| **DI into tool handlers** | `ToolService` marker — services injected into tool functions | Tools receive only tool input |
|
|
28
|
+
|
|
29
|
+
## Priority action items
|
|
30
|
+
|
|
31
|
+
1. ~~**Streaming** — most visible gap for user experience.~~ ✅ Implemented: `LLMProvider.stream()`, `on_text_delta`/`on_thinking_delta` hooks, TUI/console examples support streaming.
|
|
32
|
+
2. **Observability** — at minimum OpenTelemetry spans for LLM calls and tool execution. Our hooks are a good foundation.
|
|
33
|
+
3. **More providers** — Ollama/LiteLLM adapter would be useful for local development.
|
|
34
|
+
4. **Ready-made multi-agent patterns** — Graph/Workflow on top of our primitives.
|
|
35
|
+
|
|
36
|
+
## Sources
|
|
37
|
+
|
|
38
|
+
- [Introducing Strands Agents (AWS Blog)](https://aws.amazon.com/blogs/opensource/introducing-strands-agents-an-open-source-ai-agents-sdk/)
|
|
39
|
+
- [Strands Agents Documentation](https://strandsagents.com/latest/documentation/docs/)
|
|
40
|
+
- [Technical Deep Dive (AWS Blog)](https://aws.amazon.com/blogs/machine-learning/strands-agents-sdk-a-technical-deep-dive-into-agent-architectures-and-observability/)
|
|
41
|
+
- [Multi-Agent Patterns](https://dev.to/aws-builders/understanding-multi-agent-patterns-in-strands-agent-graph-swarm-and-workflow-4nb8)
|
|
42
|
+
- [Session Management](https://strandsagents.com/latest/documentation/docs/user-guide/concepts/agents/session-management/)
|
|
43
|
+
- [A2A Protocol](https://strandsagents.com/latest/documentation/docs/user-guide/concepts/multi-agent/agent-to-agent/)
|
|
44
|
+
- [Guardrails](https://strandsagents.com/latest/documentation/docs/user-guide/safety-security/guardrails/)
|
|
45
|
+
- [Streaming](https://strandsagents.com/latest/documentation/docs/user-guide/concepts/streaming/)
|
|
46
|
+
- [GitHub - sdk-python](https://github.com/strands-agents/sdk-python)
|
|
@@ -27,7 +27,7 @@ from examples.model_registry import DEFAULT_MODEL, create_router
|
|
|
27
27
|
from examples.tools import calculate, random_numbers
|
|
28
28
|
from examples.tools.switch_model import create_switch_model_tool
|
|
29
29
|
from flowra.lib.chat import ChatResult, ChatSpec
|
|
30
|
-
from flowra.lib.tool_loop import ToolLoopHooks
|
|
30
|
+
from flowra.lib.tool_loop import ToolLoopAgentContext, ToolLoopHooks
|
|
31
31
|
from flowra.llm import LLMProvider, SystemMessage, TextBlock, Usage
|
|
32
32
|
from flowra.runtime import AgentRuntime, FileSessionStorage
|
|
33
33
|
from flowra.tools import ToolRegistry, get_local_tool
|
|
@@ -128,6 +128,7 @@ async def main() -> None:
|
|
|
128
128
|
parser.add_argument("--model", default=DEFAULT_MODEL, help="Model key (e.g. anthropic/sonnet)")
|
|
129
129
|
parser.add_argument("--resume", metavar="SESSION_ID", help="Resume a session ('last' for most recent)")
|
|
130
130
|
parser.add_argument("--input", metavar="MESSAGE", help="Send a single message and exit (batch mode)")
|
|
131
|
+
parser.add_argument("--stream", action="store_true", help="Enable streaming (print text as it arrives)")
|
|
131
132
|
args = parser.parse_args()
|
|
132
133
|
|
|
133
134
|
session_id: str | None = None
|
|
@@ -160,9 +161,17 @@ async def main() -> None:
|
|
|
160
161
|
],
|
|
161
162
|
)
|
|
162
163
|
|
|
164
|
+
def on_text_delta(text: str, context: ToolLoopAgentContext) -> None:
|
|
165
|
+
print(text, end="", flush=True)
|
|
166
|
+
|
|
167
|
+
def on_thinking_delta(text: str, context: ToolLoopAgentContext) -> None:
|
|
168
|
+
print(f"\033[2m{text}\033[0m", end="", flush=True)
|
|
169
|
+
|
|
163
170
|
hooks = ToolLoopHooks(
|
|
164
171
|
on_before_llm_call=log_before_llm_call,
|
|
165
172
|
on_after_llm_call=log_after_llm_call,
|
|
173
|
+
on_text_delta=on_text_delta if args.stream else None,
|
|
174
|
+
on_thinking_delta=on_thinking_delta if args.stream else None,
|
|
166
175
|
)
|
|
167
176
|
storage = FileSessionStorage(base_dir=_SESSION_DIR, session_id=session_id)
|
|
168
177
|
runtime = AgentRuntime(
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
"""LLM router for chat examples — routes requests to providers by model key."""
|
|
2
2
|
|
|
3
3
|
import dataclasses
|
|
4
|
+
from collections.abc import AsyncIterator
|
|
4
5
|
from typing import Any
|
|
5
6
|
|
|
6
|
-
from flowra.llm import LLMProvider, LLMRequest, LLMResponse
|
|
7
|
+
from flowra.llm import LLMProvider, LLMRequest, LLMResponse, StreamEvent
|
|
7
8
|
|
|
8
9
|
__all__ = ["ChatLLMRouter", "ModelEntry"]
|
|
9
10
|
|
|
@@ -25,11 +26,20 @@ class ChatLLMRouter(LLMProvider):
|
|
|
25
26
|
def available_models(self) -> list[str]:
|
|
26
27
|
return sorted(self.__models)
|
|
27
28
|
|
|
28
|
-
|
|
29
|
+
def __resolve(self, request: LLMRequest) -> tuple[LLMProvider, LLMRequest]:
|
|
29
30
|
entry = self.__models[request.model]
|
|
30
31
|
actual_request = dataclasses.replace(
|
|
31
32
|
request,
|
|
32
33
|
model=entry.model_id,
|
|
33
34
|
additional_config={**entry.additional_config, **request.additional_config},
|
|
34
35
|
)
|
|
35
|
-
return
|
|
36
|
+
return entry.provider, actual_request
|
|
37
|
+
|
|
38
|
+
async def call(self, request: LLMRequest) -> LLMResponse:
|
|
39
|
+
provider, actual_request = self.__resolve(request)
|
|
40
|
+
return await provider.call(actual_request)
|
|
41
|
+
|
|
42
|
+
async def stream(self, request: LLMRequest) -> AsyncIterator[StreamEvent]:
|
|
43
|
+
provider, actual_request = self.__resolve(request)
|
|
44
|
+
async for event in provider.stream(actual_request):
|
|
45
|
+
yield event
|
|
@@ -4,7 +4,7 @@ Same menu/calc/echo demo as menu_agent.py, but using the Agent base class
|
|
|
4
4
|
with type-based references and direct Goto/Call/Spawn constructors.
|
|
5
5
|
|
|
6
6
|
Usage:
|
|
7
|
-
uv run python
|
|
7
|
+
uv run python examples/menu_agent_class.py
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import asyncio
|
|
@@ -56,6 +56,11 @@ def create_router() -> ChatLLMRouter:
|
|
|
56
56
|
anthropic_provider = AnthropicVertexProvider(project=project, location=location, credentials=credentials_b64)
|
|
57
57
|
|
|
58
58
|
models["anthropic/sonnet-4-5"] = ModelEntry(provider=anthropic_provider, model_id="claude-sonnet-4-5@20250929")
|
|
59
|
+
models["anthropic/sonnet-4-5-think"] = ModelEntry(
|
|
60
|
+
provider=anthropic_provider,
|
|
61
|
+
model_id="claude-sonnet-4-5@20250929",
|
|
62
|
+
additional_config={"thinking_budget_tokens": 4000},
|
|
63
|
+
)
|
|
59
64
|
models["anthropic/haiku-4-5"] = ModelEntry(provider=anthropic_provider, model_id="claude-haiku-4-5@20251001")
|
|
60
65
|
|
|
61
66
|
# --- OpenAI ---
|