flowra 0.0.1.dev2__tar.gz → 0.0.2.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.github/workflows/publish.yml +1 -1
  2. flowra-0.0.2.dev5/CHANGELOG.md +31 -0
  3. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/CLAUDE.md +8 -3
  4. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/PKG-INFO +7 -2
  5. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/README.md +6 -1
  6. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/context7.json +16 -7
  7. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/agent.md +22 -1
  8. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/architecture.md +3 -2
  9. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/lib.md +23 -11
  10. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/llm.md +99 -8
  11. flowra-0.0.2.dev5/docs/research/strands_comparison.md +46 -0
  12. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/console_chat.py +10 -1
  13. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/llm_routing.py +13 -3
  14. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/menu_agent.py +1 -1
  15. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/menu_agent_class.py +1 -1
  16. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/model_registry.py +5 -0
  17. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/tui_chat.py +130 -54
  18. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/__init__.py +2 -0
  19. flowra-0.0.2.dev5/flowra/agent/interrupt_helpers.py +50 -0
  20. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/__init__.py +8 -0
  21. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/agent.py +32 -3
  22. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/context.py +1 -1
  23. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/hook_executor.py +32 -0
  24. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/hooks.py +47 -1
  25. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/__init__.py +5 -0
  26. flowra-0.0.2.dev5/flowra/llm/provider.py +19 -0
  27. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/providers/__init__.py +3 -2
  28. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/providers/anthropic_vertex.py +56 -12
  29. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/providers/google_vertex.py +93 -14
  30. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/providers/openai.py +19 -1
  31. flowra-0.0.2.dev5/flowra/llm/stream.py +28 -0
  32. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/engine.py +2 -2
  33. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/version.py +1 -1
  34. flowra-0.0.2.dev5/tests/agent/test_with_interrupt.py +170 -0
  35. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/test_tool_loop_agent.py +303 -1
  36. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_anthropic_e2e.py +74 -0
  37. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_anthropic_vertex.py +83 -1
  38. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_google_vertex.py +94 -8
  39. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_google_vertex_e2e.py +22 -0
  40. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_openai_e2e.py +26 -0
  41. flowra-0.0.2.dev5/tests/llm/test_stream.py +37 -0
  42. flowra-0.0.1.dev2/CHANGELOG.md +0 -21
  43. flowra-0.0.1.dev2/flowra/llm/provider.py +0 -13
  44. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.claude/commands/update-pricing.md +0 -0
  45. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.env.example +0 -0
  46. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.github/workflows/master.yml +0 -0
  47. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.github/workflows/pull_request.yml +0 -0
  48. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.github/workflows/pull_request_e2e.yml +0 -0
  49. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.gitignore +0 -0
  50. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/.python-version +0 -0
  51. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/LICENSE +0 -0
  52. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/Makefile +0 -0
  53. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_plan.md +0 -0
  54. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step1_structure.md +0 -0
  55. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step2_code_style.md +0 -0
  56. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step3_documentation.md +0 -0
  57. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step4_doc_readability.md +0 -0
  58. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step5_doc_audit.md +0 -0
  59. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/review_prompts/step6_tests.md +0 -0
  60. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/runtime.md +0 -0
  61. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/todo.md +0 -0
  62. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/docs/tools.md +0 -0
  63. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/__init__.py +0 -0
  64. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/app_agent.py +0 -0
  65. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/llm_logging.py +0 -0
  66. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/system_prompt.txt +0 -0
  67. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/tools/__init__.py +0 -0
  68. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/tools/calculator.py +0 -0
  69. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/tools/random_numbers.py +0 -0
  70. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/examples/tools/switch_model.py +0 -0
  71. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/__init__.py +0 -0
  72. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/agent.py +0 -0
  73. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/agent_def.py +0 -0
  74. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/agent_registry.py +0 -0
  75. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/agent_store.py +0 -0
  76. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/compile.py +0 -0
  77. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/interrupt_token.py +0 -0
  78. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/service_locator.py +0 -0
  79. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/step_decorator.py +0 -0
  80. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/agent/stored_values.py +0 -0
  81. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/__init__.py +0 -0
  82. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/__init__.py +0 -0
  83. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/agent.py +4 -4
  84. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/config.py +0 -0
  85. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/hook_executor.py +0 -0
  86. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/hooks.py +0 -0
  87. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/chat/spec.py +0 -0
  88. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/config_value.py +0 -0
  89. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/llm_config.py +0 -0
  90. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/_tool_call_agent.py +0 -0
  91. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/cache.py +0 -0
  92. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/config.py +0 -0
  93. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/lib/tool_loop/spec.py +0 -0
  94. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/_base.py +0 -0
  95. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/blocks.py +0 -0
  96. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/messages.py +0 -0
  97. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/pricing/__init__.py +0 -0
  98. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/pricing/anthropic.py +0 -0
  99. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/pricing/google.py +0 -0
  100. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/pricing/openai.py +0 -0
  101. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/request.py +0 -0
  102. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/response.py +0 -0
  103. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/schema_formatting.py +0 -0
  104. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/schema_validation.py +0 -0
  105. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/llm/tools.py +0 -0
  106. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/py.typed +0 -0
  107. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/__init__.py +0 -0
  108. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/_sealed_scope.py +0 -0
  109. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/execution.py +0 -0
  110. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/interrupt.py +0 -0
  111. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/runtime.py +0 -0
  112. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/runtime_scope.py +0 -0
  113. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/serialization.py +0 -0
  114. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/storage/__init__.py +0 -0
  115. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/storage/file.py +0 -0
  116. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/storage/in_memory.py +0 -0
  117. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/runtime/storage/session_storage.py +0 -0
  118. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/__init__.py +0 -0
  119. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/local_tool.py +0 -0
  120. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/mcp_connection.py +0 -0
  121. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/tool_group.py +0 -0
  122. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/tool_registry.py +0 -0
  123. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/flowra/tools/types.py +0 -0
  124. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/pyproject.toml +0 -0
  125. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/__init__.py +0 -0
  126. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/__init__.py +0 -0
  127. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_agent.py +0 -0
  128. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_agent_def.py +0 -0
  129. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_agent_registry.py +0 -0
  130. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_compile.py +0 -0
  131. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_step_ref.py +0 -0
  132. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/agent/test_values.py +0 -0
  133. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/__init__.py +0 -0
  134. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/test_chat_agent.py +0 -0
  135. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/test_config_value.py +0 -0
  136. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/test_tool_call_agent.py +0 -0
  137. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/tool_loop/__init__.py +0 -0
  138. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/lib/tool_loop/test_cache.py +0 -0
  139. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/__init__.py +0 -0
  140. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/pricing/__init__.py +0 -0
  141. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/pricing/test_anthropic.py +0 -0
  142. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/pricing/test_google.py +0 -0
  143. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/pricing/test_openai.py +0 -0
  144. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/__init__.py +0 -0
  145. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/providers/test_openai_provider.py +0 -0
  146. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/test_metadata.py +0 -0
  147. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/test_response.py +0 -0
  148. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/test_schema_formatting.py +0 -0
  149. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/llm/test_schema_validation.py +0 -0
  150. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/__init__.py +0 -0
  151. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/storage/__init__.py +0 -0
  152. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/storage/test_file.py +0 -0
  153. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/storage/test_in_memory.py +0 -0
  154. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_engine.py +0 -0
  155. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_interrupt.py +0 -0
  156. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_persistence.py +0 -0
  157. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_runtime.py +0 -0
  158. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_scope.py +0 -0
  159. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/runtime/test_serialization.py +0 -0
  160. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/tools/__init__.py +0 -0
  161. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/tools/test_local_tool.py +0 -0
  162. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/tools/test_mcp_connection.py +0 -0
  163. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/tools/test_tool_group.py +0 -0
  164. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/tests/tools/test_tool_registry.py +0 -0
  165. {flowra-0.0.1.dev2 → flowra-0.0.2.dev5}/uv.lock +0 -0
@@ -56,7 +56,7 @@ jobs:
56
56
  - name: Checkout code
57
57
  uses: actions/checkout@v6
58
58
  with:
59
- token: ${{ secrets.GITHUB_TOKEN }}
59
+ ssh-key: ${{ secrets.DEPLOY_KEY }}
60
60
  - name: Set up uv
61
61
  uses: astral-sh/setup-uv@v7
62
62
  - name: Install deps
@@ -0,0 +1,31 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com),
6
+ and this project adheres to [Semantic Versioning](https://semver.org).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Added
11
+ - **Streaming**: `LLMProvider.stream()` method returns `AsyncIterator[StreamEvent]` with `TextDelta`, `ThinkingDelta`, and `ContentComplete` events. All three built-in providers implement real-time streaming. Default fallback calls `call()` and yields `ContentComplete`.
12
+ - **Anthropic thinking**: `AnthropicVertexAdditionalConfig` with `thinking_budget_tokens` enables extended thinking on Claude models. Thinking blocks are now parsed from Anthropic responses (`ThinkingBlock`).
13
+ - **Streaming hooks**: `on_text_delta` and `on_thinking_delta` hooks in `ToolLoopHooks`. When set, the agent automatically uses `stream()` instead of `call()`. Streaming respects `InterruptToken` — exits immediately even if the LLM is blocked.
14
+ - **`with_interrupt`**: Generic async iterator wrapper that races `__anext__()` against `InterruptToken.wait()` via `asyncio.wait(FIRST_COMPLETED)`. Used internally by `ToolLoopAgent` for streaming; available as `from flowra.agent import with_interrupt`.
15
+ - **Thinking model entry**: `anthropic/sonnet-4-5-think` in example model registry with 4000 token thinking budget.
16
+ - **Console/TUI streaming**: `--stream` flag for `console_chat.py` and `tui_chat.py` examples.
17
+
18
+ ## [0.0.1] - 2026-03-07
19
+
20
+ Initial release.
21
+
22
+ ### Added
23
+ - State machine agents with `@step` methods, `Goto`, `Spawn`, and stored values (`Scalar`, `AppendOnlyList`)
24
+ - Provider-agnostic LLM abstraction (`LLMProvider`, `LLMRequest`, `LLMResponse`)
25
+ - LLM providers: `AnthropicVertexProvider`, `GoogleVertexProvider`, `OpenAIProvider`
26
+ - Tool integration: `@tool` decorator, MCP server support, DI into tool handlers
27
+ - Execution engine with persistence, crash recovery, and cooperative interrupts
28
+ - Pre-built agents: `ChatAgent` (multi-turn chat) and `ToolLoopAgent` (tool loop with hooks)
29
+ - `ChatHooks` with `on_save_turn_messages` for transient message filtering
30
+ - Optional provider dependencies via extras: `flowra[anthropic]`, `flowra[openai]`, `flowra[google]`, `flowra[all]`
31
+ - Python 3.12, 3.13, 3.14 support
@@ -30,12 +30,13 @@ Python 3.12+ library. Package manager: **uv**. All config in `pyproject.toml`.
30
30
 
31
31
  Provider-agnostic interface for calling LLMs:
32
32
 
33
- - `LLMProvider` (abc) — single method `async call(LLMRequest) -> LLMResponse`
34
- - `LLMRequest` — model, messages, tools, json_schema, temperature, max_tokens, stop_sequences
33
+ - `LLMProvider` (abc) — `async call(LLMRequest) -> LLMResponse` and `async stream(LLMRequest) -> AsyncIterator[StreamEvent]`
34
+ - `LLMRequest` — model, messages, tools, json_schema, temperature, max_tokens, stop_sequences, additional_config, max_schema_retries
35
35
  - `LLMResponse` — message (AssistantMessage), stop_reason, usage
36
+ - `StreamEvent` = `TextDelta | ThinkingDelta | ContentComplete` — stream events for real-time token delivery
36
37
  - `Usage` — input_tokens, output_tokens, cache_read_input_tokens, cache_creation_input_tokens, cost_usd. Token contract: `input_tokens` excludes cached tokens
37
38
  - Messages: `SystemMessage`, `UserMessage`, `AssistantMessage` — system messages must be at the beginning of the messages list
38
- - Blocks: `TextBlock` (with `cache: bool` for prompt caching), `ImageBlock`, `ToolUseBlock`, `ToolResultBlock`
39
+ - Blocks: `TextBlock` (with `cache: bool` for prompt caching), `ImageBlock`, `ToolUseBlock`, `ToolResultBlock`, `ThinkingBlock`
39
40
  - `Tool` — name, description, input_schema, output_schema, cache
40
41
 
41
42
  Providers live in `flowra/llm/providers/`. Currently: `AnthropicVertexProvider`, `OpenAIProvider`, `GoogleVertexProvider`.
@@ -56,3 +57,7 @@ Review prompts live in `docs/review_prompts/`:
56
57
  ### Tests
57
58
 
58
59
  Test directory structure mirrors `flowra/`. E2E tests use `_e2e` suffix (e.g., `test_anthropic_e2e.py`). Environment variables loaded from `.env` via Makefile.
60
+
61
+ ## Maintenance
62
+
63
+ - **`context7.json`** — project description for [Context7](https://context7.com). Must be updated when adding new features, changing public APIs, or modifying architecture. Keep rules in sync with actual capabilities.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flowra
3
- Version: 0.0.1.dev2
3
+ Version: 0.0.2.dev5
4
4
  Summary: Flowra — flow infrastructure for building stateful LLM agents
5
5
  Project-URL: Repository, https://github.com/anna-money/flowra
6
6
  Project-URL: Changelog, https://github.com/anna-money/flowra/blob/master/CHANGELOG.md
@@ -33,6 +33,11 @@ Description-Content-Type: text/markdown
33
33
 
34
34
  # Flowra
35
35
 
36
+ [![PyPI](https://img.shields.io/pypi/v/flowra)](https://pypi.org/project/flowra/)
37
+ [![Python](https://img.shields.io/pypi/pyversions/flowra)](https://pypi.org/project/flowra/)
38
+ [![License](https://img.shields.io/pypi/l/flowra)](https://github.com/anna-money/flowra/blob/master/LICENSE)
39
+ [![CI](https://github.com/anna-money/flowra/actions/workflows/master.yml/badge.svg)](https://github.com/anna-money/flowra/actions/workflows/master.yml)
40
+
36
41
  **Flow infra** for building stateful, persistent LLM agents with tool use,
37
42
  parallel execution, and crash recovery. Requires Python 3.12+.
38
43
 
@@ -45,7 +50,7 @@ parallel execution, and crash recovery. Requires Python 3.12+.
45
50
  - **Tool integration** — `@tool` decorator for local functions, MCP server support,
46
51
  DI into tool handlers
47
52
  - **LLM abstraction** — provider-agnostic `LLMProvider` interface with immutable
48
- message types (ships `AnthropicVertexProvider`, `GoogleVertexProvider`, `OpenAIProvider`)
53
+ message types and real-time streaming (ships `AnthropicVertexProvider`, `GoogleVertexProvider`, `OpenAIProvider`)
49
54
  - **Cooperative interrupts** — `InterruptToken` for graceful cancellation across
50
55
  the entire execution tree
51
56
  - **Pre-built agents** — `ChatAgent` (multi-turn chat with session history) and
@@ -1,5 +1,10 @@
1
1
  # Flowra
2
2
 
3
+ [![PyPI](https://img.shields.io/pypi/v/flowra)](https://pypi.org/project/flowra/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/flowra)](https://pypi.org/project/flowra/)
5
+ [![License](https://img.shields.io/pypi/l/flowra)](https://github.com/anna-money/flowra/blob/master/LICENSE)
6
+ [![CI](https://github.com/anna-money/flowra/actions/workflows/master.yml/badge.svg)](https://github.com/anna-money/flowra/actions/workflows/master.yml)
7
+
3
8
  **Flow infra** for building stateful, persistent LLM agents with tool use,
4
9
  parallel execution, and crash recovery. Requires Python 3.12+.
5
10
 
@@ -12,7 +17,7 @@ parallel execution, and crash recovery. Requires Python 3.12+.
12
17
  - **Tool integration** — `@tool` decorator for local functions, MCP server support,
13
18
  DI into tool handlers
14
19
  - **LLM abstraction** — provider-agnostic `LLMProvider` interface with immutable
15
- message types (ships `AnthropicVertexProvider`, `GoogleVertexProvider`, `OpenAIProvider`)
20
+ message types and real-time streaming (ships `AnthropicVertexProvider`, `GoogleVertexProvider`, `OpenAIProvider`)
16
21
  - **Cooperative interrupts** — `InterruptToken` for graceful cancellation across
17
22
  the entire execution tree
18
23
  - **Pre-built agents** — `ChatAgent` (multi-turn chat with session history) and
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://context7.com/schema/context7.json",
3
3
  "projectTitle": "flowra",
4
- "description": "Flow infrastructure for building stateful, persistent LLM agents with tool use, parallel execution, and crash recovery. Requires Python 3.12+. Features state machine agents with @step methods, persistent state (Scalar, AppendOnlyList) with dirty tracking, tool integration (@tool decorator + MCP), provider-agnostic LLM abstraction, cooperative interrupts, and pre-built ChatAgent/ToolLoopAgent.",
4
+ "description": "Flow infrastructure for building stateful, persistent LLM agents with tool use, parallel execution, crash recovery, and real-time streaming. Requires Python 3.12+. Features state machine agents with @step methods, persistent state (Scalar, AppendOnlyList) with dirty tracking, tool integration (@tool decorator + MCP), provider-agnostic LLM abstraction with streaming support, extended thinking (Anthropic, Google), cooperative interrupts, and pre-built ChatAgent/ToolLoopAgent.",
5
5
  "folders": ["flowra", "examples"],
6
6
  "excludeFolders": ["tests", ".github", "logs", ".chat_sessions"],
7
7
  "excludeFiles": [],
@@ -11,15 +11,22 @@
11
11
  "PACKAGE STRUCTURE: flowra/llm/ (LLM abstraction), flowra/tools/ (tool system), flowra/agent/ (state machine framework), flowra/runtime/ (execution engine), flowra/lib/ (pre-built agents)",
12
12
  "Dependency graph: llm (no deps) -> tools (llm) -> agent (no deps) -> runtime (agent, llm) -> lib (agent, llm, tools, runtime). No circular dependencies",
13
13
 
14
- "LLM ABSTRACTION: LLMProvider is the core interface — single method: async call(LLMRequest) -> LLMResponse",
15
- "LLMRequest contains: model, messages, tools, json_schema, temperature, max_tokens, stop_sequences",
14
+ "LLM ABSTRACTION: LLMProvider is the core interface — two methods: async call(LLMRequest) -> LLMResponse and async stream(LLMRequest) -> AsyncIterator[StreamEvent]",
15
+ "stream() returns StreamEvent = TextDelta | ThinkingDelta | ContentComplete. TextDelta/ThinkingDelta carry incremental text; ContentComplete is always last and contains the full LLMResponse",
16
+ "Default stream() implementation calls call() and yields a single ContentComplete — providers override for real-time streaming",
17
+ "LLMRequest contains: model, messages, tools, json_schema, temperature, max_tokens, stop_sequences, additional_config, max_schema_retries",
16
18
  "LLMResponse contains: message (AssistantMessage), stop_reason (StopReason), usage (Usage)",
17
19
  "Usage contains: input_tokens, output_tokens, cache_read_input_tokens, cache_creation_input_tokens, cost_usd. Token contract: input_tokens excludes cached tokens",
18
20
  "Messages: SystemMessage, UserMessage, AssistantMessage. System messages must be at the beginning of the messages list",
19
21
  "Blocks: TextBlock (with cache: bool for prompt caching), ImageBlock, ToolUseBlock, ToolResultBlock, ThinkingBlock",
22
+ "ThinkingBlock holds reasoning/thinking text from models with extended thinking (Anthropic Claude, Google Gemini). Not sent back to the API",
20
23
  "Tool definition for LLM: Tool(name, description, input_schema, output_schema, cache)",
21
- "Three built-in providers: AnthropicVertexProvider (Claude via Vertex AI), OpenAIProvider (OpenAI-compatible APIs), GoogleVertexProvider (Gemini via Vertex AI)",
24
+ "Three built-in providers: AnthropicVertexProvider (Claude via Vertex AI), OpenAIProvider (OpenAI-compatible APIs), GoogleVertexProvider (Gemini via Vertex AI). All three implement stream() with real-time deltas",
22
25
  "Import providers: from flowra.llm.providers.anthropic_vertex import AnthropicVertexProvider, from flowra.llm.providers.openai import OpenAIProvider, from flowra.llm.providers.google_vertex import GoogleVertexProvider",
26
+ "Import stream types: from flowra.llm import TextDelta, ThinkingDelta, ContentComplete",
27
+ "AnthropicVertexAdditionalConfig(thinking_budget_tokens: int) enables extended thinking on Claude. Pass via additional_config={'thinking_budget_tokens': 4000}",
28
+ "GoogleVertexAdditionalConfig(thinking_level: ThinkingLevel, thinking_budget: int) configures Gemini thinking. Pass via additional_config={'thinking_level': 'medium'} or {'thinking_budget': 4096}",
29
+ "AnthropicVertexProvider falls back to non-streaming when json_schema is set (retry loop requires full responses)",
23
30
 
24
31
  "TOOL SYSTEM: @tool decorator turns a Python function into a tool definition",
25
32
  "get_local_tool(func) wraps a @tool-decorated function into a LocalTool",
@@ -49,6 +56,7 @@
49
56
 
50
57
  "COOPERATIVE INTERRUPTS: InterruptToken for graceful cancellation across the entire execution tree",
51
58
  "InterruptTokenSource creates tokens: source = InterruptTokenSource(); token = source.token; source.interrupt()",
59
+ "with_interrupt(ait, token) wraps any AsyncIterator so it exits immediately when token fires — races __anext__() vs token.wait() via asyncio.wait. Import: from flowra.agent import with_interrupt",
52
60
 
53
61
  "PRE-BUILT AGENTS — ChatAgent: multi-turn chat with session history persistence",
54
62
  "ChatAgent usage: runtime.run(agent=ChatAgent, step=ChatAgent.process_message, spec=ChatSpec(user_message=text))",
@@ -61,19 +69,20 @@
61
69
  "PRE-BUILT AGENTS — ToolLoopAgent: single-turn LLM tool loop with hooks and caching",
62
70
  "ToolLoopAgent sends messages to LLM, executes tool calls, feeds results back, repeats until done",
63
71
  "ToolLoopConfig configures ToolLoopAgent: ToolLoopConfig(llm_config=LLMConfig(model='...'), cache_config=CacheConfig(...))",
64
- "ToolLoopHooks provides lifecycle callbacks: on_user_message, on_start_iteration, on_before_llm_call, on_after_llm_call, on_before_tool_call, on_after_tool_call, on_result_message, on_thinking, on_text_reasoning, on_message_accepted",
72
+ "ToolLoopHooks provides lifecycle callbacks: on_user_message, on_message_accepted, on_start_iteration, on_before_llm_call, on_text_delta, on_thinking_delta, on_after_llm_call, on_text_reasoning, on_thinking, on_result_message, on_before_tool_call, on_after_tool_call",
73
+ "When on_text_delta or on_thinking_delta hooks are set, ToolLoopAgent automatically uses provider.stream() instead of provider.call()",
65
74
 
66
75
  "CACHING: CacheConfig(system_prompt, tools, messages) controls prompt caching strategies",
67
76
  "Predefined configs: CACHE_ALL, CACHE_SESSION, CACHE_MANUAL, NO_CACHE",
68
77
  "Cache strategies: cache_last_system_prompt, cache_last_tool, cache_last_message, cache_last_session_message, no_cache_*",
69
78
 
70
- "CONFIG: LLMConfig(model, temperature, max_tokens, stop_sequences) configures LLM calls",
79
+ "CONFIG: LLMConfig(model, temperature, max_tokens, stop_sequences, additional_config) configures LLM calls",
71
80
  "ConfigValue[T] wraps static or dynamic (callable) config values: ConfigValue[str] | ConfigValue[Callable[[], str]]",
72
81
 
73
82
  "QUICK START: Create provider -> create ToolRegistry -> create Config -> create AgentRuntime -> runtime.run()",
74
83
  "Import ChatAgent: from flowra.lib.chat import ChatAgent, ChatConfig, ChatHooks, ChatResult, ChatSpec",
75
84
  "Import LLMConfig: from flowra.lib import LLMConfig",
76
- "Import LLM types: from flowra.llm import LLMProvider, SystemMessage, TextBlock, Usage",
85
+ "Import LLM types: from flowra.llm import LLMProvider, SystemMessage, TextBlock, Usage, TextDelta, ThinkingDelta, ContentComplete",
77
86
  "Import runtime: from flowra.runtime import AgentRuntime, FileSessionStorage",
78
87
  "Import tools: from flowra.tools import ToolRegistry, get_local_tool, tool"
79
88
  ],
@@ -11,7 +11,8 @@ flowra/agent/
11
11
  ├── agent_def.py # Control flow (Goto, Call, Spawn), type aliases, resolve functions
12
12
  ├── agent_store.py # AgentStore (ABC) — flush interface
13
13
  ├── service_locator.py # ServiceLocator (ABC) — service provision and access
14
- ├── interrupt_token.py # InterruptToken (ABC) — cooperative interrupt interface
14
+ ├── interrupt_token.py # InterruptToken (ABC) — cooperative interrupt interface
15
+ ├── interrupt_helpers.py # with_interrupt() — race async iterators against InterruptToken
15
16
  ├── agent_registry.py # AgentRegistry — hierarchical agent name/type resolution
16
17
  ├── stored_values.py # Scalar[T], AppendOnlyList[T], slot() — dirty-tracked state containers
17
18
  └── compile.py # compile_agent() — introspection, slot discovery, type registry
@@ -543,6 +544,26 @@ class MyAgent(Agent):
543
544
  # ... continue processing ...
544
545
  ```
545
546
 
547
+ ### `with_interrupt` — racing async iterators
548
+
549
+ `with_interrupt` wraps any `AsyncIterator[T]` so it exits immediately when
550
+ the token fires — even if `__anext__()` is blocked on I/O:
551
+
552
+ ```python
553
+ from flowra.agent import InterruptToken, with_interrupt
554
+
555
+ async def consume(stream: AsyncIterator[str], token: InterruptToken) -> list[str]:
556
+ items = []
557
+ async for item in with_interrupt(stream, token):
558
+ items.append(item)
559
+ return items # partial results if interrupted
560
+ ```
561
+
562
+ On each iteration, `__anext__()` and `token.wait()` are raced via
563
+ `asyncio.wait(FIRST_COMPLETED)`. If the token wins, the underlying iterator
564
+ is closed (`aclose()`) and the wrapper ends. This is used internally by
565
+ `ToolLoopAgent` to interrupt LLM streaming immediately.
566
+
546
567
  ## Dependency injection
547
568
 
548
569
  ### Constructor injection
@@ -20,7 +20,8 @@ No circular dependencies.
20
20
  ### `llm` — LLM abstraction
21
21
 
22
22
  Protocol layer between the SDK and any LLM. The core abstraction is `LLMProvider` —
23
- a single-method interface (`call(LLMRequest) → LLMResponse`). Request and response use
23
+ `call(LLMRequest) → LLMResponse` for full responses, and `stream(LLMRequest)
24
+ AsyncIterator[StreamEvent]` for real-time text/thinking deltas. Request and response use
24
25
  a shared set of message and block types. Ships three providers:
25
26
  `AnthropicVertexProvider` (Claude via Vertex AI), `OpenAIProvider` (OpenAI-compatible APIs),
26
27
  `GoogleVertexProvider` (Gemini via Vertex AI). → [docs/llm.md](llm.md)
@@ -66,7 +67,7 @@ User message
66
67
 
67
68
 
68
69
  ChatAgent.process_message
69
- Delegates to ToolLoopAgent via Call
70
+ Spawns ToolLoopAgent via Call inside Spawn
70
71
 
71
72
  ToolLoopAgent.start
72
73
  │ Saves user message to turn messages
@@ -305,6 +305,8 @@ runtime = AgentRuntime(
305
305
  | `on_before_llm_call` | `OnBeforeLLMCall \| OnBeforeLLMCallAsync \| None` | `None` |
306
306
  | `on_after_llm_call` | `OnAfterLLMCall \| OnAfterLLMCallAsync \| None` | `None` |
307
307
  | `on_result_message` | `OnResultMessage \| OnResultMessageAsync \| None` | `None` |
308
+ | `on_text_delta` | `OnTextDelta \| OnTextDeltaAsync \| None` | `None` |
309
+ | `on_thinking_delta` | `OnThinkingDelta \| OnThinkingDeltaAsync \| None` | `None` |
308
310
  | `on_text_reasoning` | `OnTextReasoning \| OnTextReasoningAsync \| None` | `None` |
309
311
  | `on_thinking` | `OnThinking \| OnThinkingAsync \| None` | `None` |
310
312
  | `on_before_tool_call` | `OnBeforeToolCall \| OnBeforeToolCallAsync \| None` | `None` |
@@ -328,30 +330,37 @@ Hooks fire in this order during a single tool loop iteration:
328
330
  4. **`on_before_llm_call`** — before each LLM request. Receives `LLMRequest` and context.
329
331
  Observational only (no return value).
330
332
 
331
- 5. **`on_after_llm_call`** — after each LLM response. Receives `LLMRequest`, `LLMResponse`,
333
+ 5. **`on_text_delta`** / **`on_thinking_delta`** when either hook is set, the agent
334
+ uses `provider.stream()` instead of `provider.call()`. `on_text_delta` fires for
335
+ each incremental text chunk; `on_thinking_delta` fires for each thinking chunk.
336
+ These fire **during** the LLM call, before `on_after_llm_call`. The stream is
337
+ wrapped with `with_interrupt`, so an `InterruptToken` signal exits immediately —
338
+ even if the LLM is slow to produce the next token.
339
+
340
+ 6. **`on_after_llm_call`** — after each LLM response. Receives `LLMRequest`, `LLMResponse`,
332
341
  and context. Observational only.
333
342
 
334
- 6. **`on_text_reasoning`** — for each `TextBlock` in the assistant response. Fires
335
- regardless of stop reason — useful for streaming text output even when tool calls
343
+ 7. **`on_text_reasoning`** — for each `TextBlock` in the assistant response. Fires
344
+ regardless of stop reason — useful for observing text output even when tool calls
336
345
  are also present.
337
346
 
338
- 7. **`on_thinking`** — for each `ThinkingBlock` in the assistant response. Fires
347
+ 8. **`on_thinking`** — for each `ThinkingBlock` in the assistant response. Fires
339
348
  for models with thinking/reasoning enabled (e.g. extended thinking).
340
349
 
341
350
  Then the flow branches based on stop reason:
342
351
 
343
352
  - **If `TOOL_USE`:**
344
353
 
345
- 8. **`on_before_tool_call`** — before each tool execution. Return `BeforeToolCallResult`
354
+ 9. **`on_before_tool_call`** — before each tool execution. Return `BeforeToolCallResult`
346
355
  with `amended_tool_use` to modify tool parameters.
347
356
 
348
- 9. **`on_after_tool_call`** — after each tool execution. Return `AfterToolCallResult`
349
- with `amended_result` and/or `additional_messages`.
357
+ 10. **`on_after_tool_call`** — after each tool execution. Return `AfterToolCallResult`
358
+ with `amended_result` and/or `additional_messages`.
350
359
 
351
360
  - **If `END_TURN`:**
352
361
 
353
- 10. **`on_result_message`** — return `ResultMessageResult` with `continue_messages`
354
- to force the loop to continue instead of finishing.
362
+ 11. **`on_result_message`** — return `ResultMessageResult` with `continue_messages`
363
+ to force the loop to continue instead of finishing.
355
364
 
356
365
  ### Hook result types
357
366
 
@@ -515,6 +524,8 @@ OnMessageAccepted / OnMessageAcceptedAsync
515
524
  OnStartIteration / OnStartIterationAsync
516
525
  OnBeforeLLMCall / OnBeforeLLMCallAsync
517
526
  OnAfterLLMCall / OnAfterLLMCallAsync
527
+ OnTextDelta / OnTextDeltaAsync
528
+ OnThinkingDelta / OnThinkingDeltaAsync
518
529
  OnTextReasoning / OnTextReasoningAsync
519
530
  OnThinking / OnThinkingAsync
520
531
  OnResultMessage / OnResultMessageAsync
@@ -546,8 +557,9 @@ ChatAgent
546
557
  1. `start` — accepts user message, runs `on_user_message` hook, appends to
547
558
  `turn_messages`, flushes, fires `on_message_accepted`, then gotos `call_llm`.
548
559
  2. `call_llm` — checks interrupt/finish/max_iterations, runs `on_start_iteration`,
549
- builds `LLMRequest`, runs `on_before_llm_call`, calls LLM, runs `on_after_llm_call`
550
- and `on_text_reasoning`, then:
560
+ builds `LLMRequest`, runs `on_before_llm_call`, calls LLM (streaming deltas via
561
+ `on_text_delta`/`on_thinking_delta` if set), runs `on_after_llm_call`,
562
+ `on_text_reasoning`, and `on_thinking`, then:
551
563
  - `END_TURN` → runs `on_result_message`, returns `ToolLoopResult` (or continues
552
564
  if `continue_messages` is non-empty)
553
565
  - `TOOL_USE` → runs `on_before_tool_call` for each tool, spawns `ToolCallAgent`
@@ -11,7 +11,8 @@ flowra/llm/
11
11
  ├── tools.py # Tool
12
12
  ├── request.py # LLMRequest
13
13
  ├── response.py # LLMResponse, StopReason, Usage
14
- ├── provider.py # LLMProvider (abc)
14
+ ├── provider.py # LLMProvider (abc) — call() + stream()
15
+ ├── stream.py # StreamEvent, TextDelta, ThinkingDelta, ContentComplete
15
16
  ├── schema_formatting.py # JSON schema formatting for LLM prompts
16
17
  ├── schema_validation.py # JSON schema validation and markdown stripping
17
18
  ├── pricing/
@@ -137,8 +138,8 @@ ToolResultBlock(tool_use_id="toolu_123", content="Division by zero", is_error=Tr
137
138
  ### `ThinkingBlock`
138
139
 
139
140
  Thinking/reasoning content from the LLM. Produced by models that support extended
140
- thinking (e.g. Google Gemini with thinking enabled). Not sent back to the API by
141
- any provider.
141
+ thinking (e.g. Anthropic Claude with `thinking_budget_tokens`, Google Gemini with
142
+ thinking enabled). Not sent back to the API by any provider.
142
143
 
143
144
  ```python
144
145
  # Usually not created manually — comes from AssistantMessage via LLM
@@ -346,12 +347,17 @@ if response.usage is not None:
346
347
 
347
348
  ### `LLMProvider`
348
349
 
349
- Abstract base class for calling an LLM. Defines a single `call()` method:
350
+ Abstract base class for calling an LLM. Defines `call()` (abstract) and `stream()`
351
+ (optional, with a default fallback):
350
352
 
351
353
  ```python
352
354
  class LLMProvider(abc.ABC):
353
355
  @abc.abstractmethod
354
356
  async def call(self, request: LLMRequest) -> LLMResponse: ...
357
+
358
+ async def stream(self, request: LLMRequest) -> AsyncIterator[StreamEvent]:
359
+ response = await self.call(request)
360
+ yield ContentComplete(response=response)
355
361
  ```
356
362
 
357
363
  The provider is responsible for converting `LLMRequest` into the target API's format,
@@ -359,6 +365,39 @@ calling the API, and converting the response back to `LLMResponse`. If `json_sch
359
365
  is set, the provider should also handle validation and retries (see
360
366
  [Structured output](#structured-output-json-schema)).
361
367
 
368
+ #### Streaming
369
+
370
+ `stream()` returns an `AsyncIterator[StreamEvent]` that yields incremental events
371
+ as the LLM generates its response. The default implementation calls `call()` and
372
+ yields a single `ContentComplete` event — providers override this for real-time streaming.
373
+
374
+ **Stream events:**
375
+
376
+ | Event | Fields | Description |
377
+ |-------------------|---------------------|------------------------------------------|
378
+ | `TextDelta` | `text: str` | Incremental text content |
379
+ | `ThinkingDelta` | `text: str` | Incremental thinking/reasoning content |
380
+ | `ContentComplete` | `response: LLMResponse` | Always last — full response |
381
+
382
+ `ContentComplete` is always the final event and contains the same `LLMResponse` you
383
+ would get from `call()`.
384
+
385
+ ```python
386
+ async for event in provider.stream(request):
387
+ match event:
388
+ case TextDelta(text=text):
389
+ print(text, end="", flush=True)
390
+ case ThinkingDelta(text=text):
391
+ print(f"[thinking] {text}", end="")
392
+ case ContentComplete(response=response):
393
+ print() # newline after streaming
394
+ # response.message, response.usage, etc. are available here
395
+ ```
396
+
397
+ All three built-in providers implement `stream()` with real-time deltas.
398
+ `AnthropicVertexProvider` falls back to non-streaming when `json_schema` is set
399
+ (the retry loop requires full responses).
400
+
362
401
  ### `AnthropicVertexProvider`
363
402
 
364
403
  Implementation for Claude via Vertex AI.
@@ -492,6 +531,42 @@ on failure. Used internally by `AnthropicVertexProvider` for structured output r
492
531
  Internally, `strip_markdown_code_block(text)` removes surrounding markdown code fences
493
532
  (`` ```...``` ``) before parsing. This is an implementation detail, not part of the public API.
494
533
 
534
+ #### `AnthropicVertexAdditionalConfig`
535
+
536
+ Provider-specific configuration passed via `LLMRequest.additional_config`:
537
+
538
+ | Field | Type | Default | Description |
539
+ |-------------------------|----------------|---------|-------------------------------------------------|
540
+ | `thinking_budget_tokens` | `int \| None` | `None` | Token budget for extended thinking (enables thinking mode) |
541
+
542
+ When `thinking_budget_tokens` is set, the provider passes `thinking: {"type": "enabled",
543
+ "budget_tokens": N}` to the API and forces `temperature=1` (Anthropic requirement for
544
+ thinking mode). The response will contain `ThinkingBlock` blocks with the model's
545
+ chain-of-thought reasoning.
546
+
547
+ ```python
548
+ from flowra.llm.providers.anthropic_vertex import AnthropicVertexAdditionalConfig
549
+
550
+ response = await provider.call(
551
+ LLMRequest(
552
+ model="claude-sonnet-4-5@20250929",
553
+ messages=[UserMessage(blocks=[TextBlock(text="Solve this step by step...")])],
554
+ max_tokens=8192,
555
+ additional_config={"thinking_budget_tokens": 4000},
556
+ )
557
+ )
558
+
559
+ # response.message.blocks may contain ThinkingBlock + TextBlock
560
+ for block in response.message.blocks:
561
+ if isinstance(block, ThinkingBlock):
562
+ print(f"[thinking] {block.text}")
563
+ elif isinstance(block, TextBlock):
564
+ print(block.text)
565
+ ```
566
+
567
+ **Note:** Streaming + `json_schema` is not supported with Anthropic — `stream()` falls
568
+ back to non-streaming in that case.
569
+
495
570
  ### `OpenAIProvider`
496
571
 
497
572
  Implementation for OpenAI-compatible APIs (OpenAI, Inception AI, etc.).
@@ -573,21 +648,37 @@ removes `additionalProperties`, converts type arrays to `anyOf`).
573
648
 
574
649
  Provider-specific configuration passed via `LLMRequest.additional_config`:
575
650
 
576
- | Field | Type | Default | Description |
577
- |------------------|-------------------------------------|---------|--------------------------------------|
578
- | `thinking_level` | `genai_types.ThinkingLevel \| None` | `None` | Thinking level for extended thinking |
651
+ | Field | Type | Default | Description |
652
+ |--------------------|-------------------------------------|---------|----------------------------------------------------------------|
653
+ | `thinking_level` | `genai_types.ThinkingLevel \| None` | `None` | Thinking level (MINIMAL, LOW, MEDIUM, HIGH) — for Gemini 3 |
654
+ | `thinking_budget` | `int \| None` | `None` | Token budget for thinking — for Gemini 2.5 (min 128 for Pro) |
655
+
656
+ Either field (or both) enables thinking mode. `thinking_level` controls reasoning
657
+ depth for Gemini 3 models. `thinking_budget` sets a token budget for Gemini 2.5
658
+ models (setting to 0 disables thinking on Flash; minimum 128 on Pro).
579
659
 
580
660
  ```python
581
661
  from flowra.llm.providers.google_vertex import GoogleVertexAdditionalConfig
582
662
 
663
+ # Gemini 3 — thinking level
583
664
  response = await provider.call(
584
665
  LLMRequest(
585
- model="gemini-2.5-pro",
666
+ model="gemini-3-pro-preview",
586
667
  messages=[UserMessage(blocks=[TextBlock(text="Solve this step by step...")])],
587
668
  max_tokens=4096,
588
669
  additional_config={"thinking_level": "medium"},
589
670
  )
590
671
  )
672
+
673
+ # Gemini 2.5 — thinking budget
674
+ response = await provider.call(
675
+ LLMRequest(
676
+ model="gemini-2.5-pro",
677
+ messages=[UserMessage(blocks=[TextBlock(text="Solve this step by step...")])],
678
+ max_tokens=4096,
679
+ additional_config={"thinking_budget": 4096},
680
+ )
681
+ )
591
682
  ```
592
683
 
593
684
  ### Adding a new provider
@@ -0,0 +1,46 @@
1
+ # Strands Agents SDK vs Flowra — Comparison (March 2026)
2
+
3
+ Research date: 2026-03-08
4
+
5
+ ## What Strands has that Flowra doesn't
6
+
7
+ | Capability | Strands | Flowra | Priority |
8
+ |---|---|---|---|
9
+ | **Streaming** | Full event streaming — each agent step streams to client | `LLMProvider.stream()` with `TextDelta`/`ThinkingDelta`/`ContentComplete` events; `ToolLoopAgent` auto-switches when delta hooks are set | — (implemented) |
10
+ | **Observability (OpenTelemetry)** | Built-in traces, metrics, export to X-Ray/CloudWatch/Jaeger | No — only manual hooks in examples (`on_before_llm_call` etc.) | **High** — necessary for production |
11
+ | **Multi-agent patterns (Swarm, Graph, Workflow)** | Built-in coordinators: orchestrator-worker, peer swarm, DAG graph with auto-parallelization | `Spawn` (parallel children) and `Call` — but no ready-made abstractions like "agent graph" or "swarm" | Medium — our primitives allow building this, but nothing ready-made |
12
+ | **More providers out of the box** | Bedrock, Anthropic, OpenAI, Gemini, Ollama, LiteLLM, llama.cpp | Anthropic Vertex, OpenAI, Google Vertex | Medium — LiteLLM adapter would cover everything |
13
+ | **Guardrails** | Integration with Amazon Bedrock Guardrails — content filtering, topic blocking, PII protection | None | Medium — depends on use case |
14
+ | **A2A (Agent-to-Agent) protocol** | Agents communicate across processes/services via standard protocol | No — agents only within a single runtime | Low for now — relevant for distributed systems |
15
+ | **TypeScript SDK** | Yes (preview) | No | Low — we are a Python library |
16
+ | **Session management with external stores** | DynamoDB, Bedrock AgentCore Memory, custom | InMemory, File, custom (but no ready-made cloud DB adapters) | Medium |
17
+ | **"Agents as tools"** | An agent can be a tool of another agent directly | Requires manual wrapping | Low — `Call`/`Spawn` solve this differently |
18
+
19
+ ## What Flowra has that Strands doesn't (or weaker)
20
+
21
+ | Capability | Flowra | Strands |
22
+ |---|---|---|
23
+ | **Crash recovery** | Full: persistence after each step, resume after crash | Session persistence exists, but step-level crash recovery — no |
24
+ | **Incremental dirty-tracking** | `Scalar[T]` and `AppendOnlyList[T]` save only changes | Saves state entirely |
25
+ | **State machine with compile-time checks** | `@step`, `Goto`, `Spawn`, `Call` — compiler checks slots and types at class definition time | Model-driven loop — no explicit state machine |
26
+ | **Cooperative interrupts** | `InterruptToken` propagates through entire execution tree | No equivalent |
27
+ | **DI into tool handlers** | `ToolService` marker — services injected into tool functions | Tools receive only tool input |
28
+
29
+ ## Priority action items
30
+
31
+ 1. ~~**Streaming** — most visible gap for user experience.~~ ✅ Implemented: `LLMProvider.stream()`, `on_text_delta`/`on_thinking_delta` hooks, TUI/console examples support streaming.
32
+ 2. **Observability** — at minimum OpenTelemetry spans for LLM calls and tool execution. Our hooks are a good foundation.
33
+ 3. **More providers** — Ollama/LiteLLM adapter would be useful for local development.
34
+ 4. **Ready-made multi-agent patterns** — Graph/Workflow on top of our primitives.
35
+
36
+ ## Sources
37
+
38
+ - [Introducing Strands Agents (AWS Blog)](https://aws.amazon.com/blogs/opensource/introducing-strands-agents-an-open-source-ai-agents-sdk/)
39
+ - [Strands Agents Documentation](https://strandsagents.com/latest/documentation/docs/)
40
+ - [Technical Deep Dive (AWS Blog)](https://aws.amazon.com/blogs/machine-learning/strands-agents-sdk-a-technical-deep-dive-into-agent-architectures-and-observability/)
41
+ - [Multi-Agent Patterns](https://dev.to/aws-builders/understanding-multi-agent-patterns-in-strands-agent-graph-swarm-and-workflow-4nb8)
42
+ - [Session Management](https://strandsagents.com/latest/documentation/docs/user-guide/concepts/agents/session-management/)
43
+ - [A2A Protocol](https://strandsagents.com/latest/documentation/docs/user-guide/concepts/multi-agent/agent-to-agent/)
44
+ - [Guardrails](https://strandsagents.com/latest/documentation/docs/user-guide/safety-security/guardrails/)
45
+ - [Streaming](https://strandsagents.com/latest/documentation/docs/user-guide/concepts/streaming/)
46
+ - [GitHub - sdk-python](https://github.com/strands-agents/sdk-python)
@@ -27,7 +27,7 @@ from examples.model_registry import DEFAULT_MODEL, create_router
27
27
  from examples.tools import calculate, random_numbers
28
28
  from examples.tools.switch_model import create_switch_model_tool
29
29
  from flowra.lib.chat import ChatResult, ChatSpec
30
- from flowra.lib.tool_loop import ToolLoopHooks
30
+ from flowra.lib.tool_loop import ToolLoopAgentContext, ToolLoopHooks
31
31
  from flowra.llm import LLMProvider, SystemMessage, TextBlock, Usage
32
32
  from flowra.runtime import AgentRuntime, FileSessionStorage
33
33
  from flowra.tools import ToolRegistry, get_local_tool
@@ -128,6 +128,7 @@ async def main() -> None:
128
128
  parser.add_argument("--model", default=DEFAULT_MODEL, help="Model key (e.g. anthropic/sonnet)")
129
129
  parser.add_argument("--resume", metavar="SESSION_ID", help="Resume a session ('last' for most recent)")
130
130
  parser.add_argument("--input", metavar="MESSAGE", help="Send a single message and exit (batch mode)")
131
+ parser.add_argument("--stream", action="store_true", help="Enable streaming (print text as it arrives)")
131
132
  args = parser.parse_args()
132
133
 
133
134
  session_id: str | None = None
@@ -160,9 +161,17 @@ async def main() -> None:
160
161
  ],
161
162
  )
162
163
 
164
+ def on_text_delta(text: str, context: ToolLoopAgentContext) -> None:
165
+ print(text, end="", flush=True)
166
+
167
+ def on_thinking_delta(text: str, context: ToolLoopAgentContext) -> None:
168
+ print(f"\033[2m{text}\033[0m", end="", flush=True)
169
+
163
170
  hooks = ToolLoopHooks(
164
171
  on_before_llm_call=log_before_llm_call,
165
172
  on_after_llm_call=log_after_llm_call,
173
+ on_text_delta=on_text_delta if args.stream else None,
174
+ on_thinking_delta=on_thinking_delta if args.stream else None,
166
175
  )
167
176
  storage = FileSessionStorage(base_dir=_SESSION_DIR, session_id=session_id)
168
177
  runtime = AgentRuntime(
@@ -1,9 +1,10 @@
1
1
  """LLM router for chat examples — routes requests to providers by model key."""
2
2
 
3
3
  import dataclasses
4
+ from collections.abc import AsyncIterator
4
5
  from typing import Any
5
6
 
6
- from flowra.llm import LLMProvider, LLMRequest, LLMResponse
7
+ from flowra.llm import LLMProvider, LLMRequest, LLMResponse, StreamEvent
7
8
 
8
9
  __all__ = ["ChatLLMRouter", "ModelEntry"]
9
10
 
@@ -25,11 +26,20 @@ class ChatLLMRouter(LLMProvider):
25
26
  def available_models(self) -> list[str]:
26
27
  return sorted(self.__models)
27
28
 
28
- async def call(self, request: LLMRequest) -> LLMResponse:
29
+ def __resolve(self, request: LLMRequest) -> tuple[LLMProvider, LLMRequest]:
29
30
  entry = self.__models[request.model]
30
31
  actual_request = dataclasses.replace(
31
32
  request,
32
33
  model=entry.model_id,
33
34
  additional_config={**entry.additional_config, **request.additional_config},
34
35
  )
35
- return await entry.provider.call(actual_request)
36
+ return entry.provider, actual_request
37
+
38
+ async def call(self, request: LLMRequest) -> LLMResponse:
39
+ provider, actual_request = self.__resolve(request)
40
+ return await provider.call(actual_request)
41
+
42
+ async def stream(self, request: LLMRequest) -> AsyncIterator[StreamEvent]:
43
+ provider, actual_request = self.__resolve(request)
44
+ async for event in provider.stream(actual_request):
45
+ yield event
@@ -5,7 +5,7 @@ Agents contain NO I/O — each step returns a prompt or result.
5
5
  The single input loop in main() handles all user interaction.
6
6
 
7
7
  Usage:
8
- uv run python -m examples.menu_agent
8
+ uv run python examples/menu_agent.py
9
9
  """
10
10
 
11
11
  import asyncio
@@ -4,7 +4,7 @@ Same menu/calc/echo demo as menu_agent.py, but using the Agent base class
4
4
  with type-based references and direct Goto/Call/Spawn constructors.
5
5
 
6
6
  Usage:
7
- uv run python -m examples.menu_agent_class
7
+ uv run python examples/menu_agent_class.py
8
8
  """
9
9
 
10
10
  import asyncio
@@ -56,6 +56,11 @@ def create_router() -> ChatLLMRouter:
56
56
  anthropic_provider = AnthropicVertexProvider(project=project, location=location, credentials=credentials_b64)
57
57
 
58
58
  models["anthropic/sonnet-4-5"] = ModelEntry(provider=anthropic_provider, model_id="claude-sonnet-4-5@20250929")
59
+ models["anthropic/sonnet-4-5-think"] = ModelEntry(
60
+ provider=anthropic_provider,
61
+ model_id="claude-sonnet-4-5@20250929",
62
+ additional_config={"thinking_budget_tokens": 4000},
63
+ )
59
64
  models["anthropic/haiku-4-5"] = ModelEntry(provider=anthropic_provider, model_id="claude-haiku-4-5@20251001")
60
65
 
61
66
  # --- OpenAI ---