phronomy 0.5.4 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +21 -0
  3. data/CHANGELOG.md +379 -0
  4. data/CONTRIBUTING.md +102 -0
  5. data/README.md +262 -48
  6. data/RELEASE_CHECKLIST.md +86 -0
  7. data/SECURITY.md +80 -0
  8. data/benchmark/baseline.json +9 -0
  9. data/benchmark/bench_agent_invoke.rb +105 -0
  10. data/benchmark/bench_context_assembler.rb +46 -0
  11. data/benchmark/bench_regression.rb +171 -0
  12. data/benchmark/bench_token_estimator.rb +44 -0
  13. data/benchmark/bench_tool_schema.rb +69 -0
  14. data/benchmark/bench_vector_store.rb +39 -0
  15. data/benchmark/bench_workflow.rb +55 -0
  16. data/benchmark/run_all.rb +118 -0
  17. data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
  18. data/docs/decisions/002-workflow-context-immutability.md +42 -0
  19. data/docs/decisions/003-event-loop-singleton.md +48 -0
  20. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +51 -0
  21. data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
  22. data/docs/decisions/006-no-built-in-guardrails.md +48 -0
  23. data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
  24. data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
  25. data/docs/decisions/009-state-store-abstraction.md +141 -0
  26. data/lib/phronomy/agent/base.rb +281 -13
  27. data/lib/phronomy/agent/before_completion_context.rb +1 -0
  28. data/lib/phronomy/agent/checkpoint.rb +1 -0
  29. data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
  30. data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
  31. data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
  32. data/lib/phronomy/agent/concerns/retryable.rb +12 -1
  33. data/lib/phronomy/agent/concerns/suspendable.rb +4 -0
  34. data/lib/phronomy/agent/fsm.rb +180 -0
  35. data/lib/phronomy/agent/handoff.rb +3 -0
  36. data/lib/phronomy/agent/orchestrator.rb +123 -11
  37. data/lib/phronomy/agent/parallel_tool_chat.rb +92 -0
  38. data/lib/phronomy/agent/react_agent.rb +8 -6
  39. data/lib/phronomy/agent/runner.rb +2 -0
  40. data/lib/phronomy/agent/shared_state.rb +11 -0
  41. data/lib/phronomy/agent/suspend_signal.rb +2 -0
  42. data/lib/phronomy/agent/team_coordinator.rb +17 -5
  43. data/lib/phronomy/cancellation_token.rb +92 -0
  44. data/lib/phronomy/configuration.rb +32 -2
  45. data/lib/phronomy/context/assembler.rb +6 -0
  46. data/lib/phronomy/context/compaction_context.rb +2 -0
  47. data/lib/phronomy/context/context_version_cache.rb +2 -0
  48. data/lib/phronomy/context/token_budget.rb +3 -0
  49. data/lib/phronomy/context/token_estimator.rb +9 -2
  50. data/lib/phronomy/context/trigger_context.rb +1 -0
  51. data/lib/phronomy/context/trim_context.rb +4 -0
  52. data/lib/phronomy/context.rb +0 -1
  53. data/lib/phronomy/embeddings/base.rb +5 -2
  54. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
  55. data/lib/phronomy/eval/comparison.rb +2 -0
  56. data/lib/phronomy/eval/dataset.rb +4 -0
  57. data/lib/phronomy/eval/metrics.rb +6 -0
  58. data/lib/phronomy/eval/runner.rb +2 -0
  59. data/lib/phronomy/eval/scorer/base.rb +1 -0
  60. data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
  61. data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
  62. data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
  63. data/lib/phronomy/event.rb +14 -0
  64. data/lib/phronomy/event_loop.rb +254 -0
  65. data/lib/phronomy/fsm_session.rb +201 -0
  66. data/lib/phronomy/generator_verifier.rb +24 -22
  67. data/lib/phronomy/guardrail/base.rb +3 -0
  68. data/lib/phronomy/guardrail.rb +0 -1
  69. data/lib/phronomy/knowledge_source/base.rb +6 -2
  70. data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
  71. data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
  72. data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
  73. data/lib/phronomy/loader/base.rb +1 -0
  74. data/lib/phronomy/loader/csv_loader.rb +2 -0
  75. data/lib/phronomy/loader/markdown_loader.rb +2 -0
  76. data/lib/phronomy/loader/plain_text_loader.rb +1 -0
  77. data/lib/phronomy/output_parser/base.rb +1 -0
  78. data/lib/phronomy/output_parser/json_parser.rb +22 -3
  79. data/lib/phronomy/output_parser/structured_parser.rb +2 -0
  80. data/lib/phronomy/prompt_template.rb +5 -0
  81. data/lib/phronomy/runnable.rb +20 -3
  82. data/lib/phronomy/splitter/base.rb +2 -0
  83. data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
  84. data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
  85. data/lib/phronomy/state_store/base.rb +48 -0
  86. data/lib/phronomy/state_store/in_memory.rb +62 -0
  87. data/lib/phronomy/tool/agent_tool.rb +1 -0
  88. data/lib/phronomy/tool/base.rb +189 -27
  89. data/lib/phronomy/tool/mcp_tool.rb +68 -13
  90. data/lib/phronomy/tracing/base.rb +3 -0
  91. data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
  92. data/lib/phronomy/tracing/open_telemetry_tracer.rb +2 -0
  93. data/lib/phronomy/vector_store/base.rb +33 -7
  94. data/lib/phronomy/vector_store/in_memory.rb +16 -7
  95. data/lib/phronomy/vector_store/pgvector.rb +40 -9
  96. data/lib/phronomy/vector_store/redis_search.rb +29 -8
  97. data/lib/phronomy/version.rb +1 -1
  98. data/lib/phronomy/workflow.rb +175 -74
  99. data/lib/phronomy/workflow_context.rb +55 -5
  100. data/lib/phronomy/workflow_runner.rb +197 -114
  101. data/lib/phronomy.rb +74 -1
  102. data/scripts/api_snapshot.rb +91 -0
  103. data/scripts/check_api_annotations.rb +68 -0
  104. data/scripts/check_private_enforcement.rb +93 -0
  105. data/scripts/check_readme_runnable.rb +98 -0
  106. data/scripts/run_mutation.sh +46 -0
  107. metadata +50 -6
  108. data/lib/phronomy/context/builder.rb +0 -92
  109. data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +0 -100
  110. data/lib/phronomy/guardrail/builtin/prompt_injection_detector.rb +0 -67
  111. data/lib/phronomy/guardrail/builtin.rb +0 -16
@@ -0,0 +1,42 @@
1
+ # ADR-002: WorkflowContext#merge Returns a New Instance (Immutability Contract)
2
+
3
+ ## Status
4
+
5
+ Accepted
6
+
7
+ ## Context
8
+
9
+ `WorkflowContext` carries the shared state that flows through workflow nodes.
10
+ Two design options existed:
11
+
12
+ 1. **Mutable**: nodes modify state in-place; the same object is passed through
13
+ the entire graph.
14
+ 2. **Immutable**: `merge` always returns a new instance; nodes return the new
15
+ state rather than mutating the old one.
16
+
17
+ Mutable state is simpler to implement but creates hazards: parallel node
18
+ execution (current or future) would require locking to prevent races. It also
19
+ makes debugging harder — it is difficult to track where a field was last changed
20
+ without snapshotting state at each step.
21
+
22
+ ## Decision
23
+
24
+ `WorkflowContext#merge` (and all field writers) return a new `WorkflowContext`
25
+ instance. Nodes receive a state object and must return (or yield) a new state
26
+ object. The framework replaces the current state with the returned value.
27
+
28
+ ## Consequences
29
+
30
+ **Positive:**
31
+ - Future parallel node execution (fork/join) can safely hand each branch a
32
+ separate copy of the state without defensive copying at the call site.
33
+ - Checkpoint/replay for `interrupt_before`/`interrupt_after` is straightforward
34
+ — a checkpoint is just a serialized state value.
35
+ - Accidental mutation by a node does not silently corrupt shared state.
36
+
37
+ **Negative / Tradeoffs:**
38
+ - A small allocation overhead per `merge` call. Acceptable at typical workflow
39
+ depth (tens of nodes, not millions).
40
+ - Node authors must remember to return the new state; forgetting to do so
41
+ discards the update silently. (Future work: frozen state objects or
42
+ a strict return type could catch this at dev time.)
@@ -0,0 +1,48 @@
1
+ # ADR-003: EventLoop Is a Process-Wide Singleton
2
+
3
+ ## Status
4
+
5
+ Accepted
6
+
7
+ ## Context
8
+
9
+ The `EventLoop` routes asynchronous events (tool results, child completions,
10
+ timeouts) between concurrent agent invocations. The design choices were:
11
+
12
+ 1. **Per-agent event loop**: each `Agent::Base#invoke` owns its own event loop
13
+ thread.
14
+ 2. **Per-`Orchestrator` event loop**: one loop per orchestrator instance.
15
+ 3. **Process-wide singleton**: one `EventLoop` serves all concurrent
16
+ invocations in the process.
17
+
18
+ Per-agent loops would require creating and tearing down threads on every
19
+ `invoke` call. In Rails / Puma environments where multiple requests invoke
20
+ agents concurrently, this creates significant thread churn. Per-orchestrator
21
+ loops are better, but `Orchestrator` instances are often short-lived, and
22
+ coordinating parent-child event delivery across loop boundaries is complex.
23
+
24
+ Ruby's GVL means true parallelism is limited to I/O-bound work, so the
25
+ throughput gain from per-agent loops would be minimal. A singleton loop with a
26
+ thread-safe queue is simpler and avoids inter-loop coordination.
27
+
28
+ ## Decision
29
+
30
+ `Phronomy::EventLoop` is implemented as a process-wide singleton that runs on a
31
+ dedicated background thread. Agents post events to a shared `Queue`; the loop
32
+ dispatches them to registered handlers (identified by `target_id`).
33
+
34
+ ## Consequences
35
+
36
+ **Positive:**
37
+ - Single background thread; no thread churn per invocation.
38
+ - Cross-agent event delivery (e.g., child-to-parent completion) is natural
39
+ because all agents share the same loop.
40
+ - Startup/shutdown logic is simple (one thread to manage).
41
+
42
+ **Negative / Tradeoffs:**
43
+ - The singleton is global mutable state; tests must call
44
+ `Phronomy.reset_runtime!` between examples.
45
+ - A bug in the event loop (or a blocking handler) affects all concurrent
46
+ invocations, not just one agent.
47
+ - In multi-process environments (e.g., Sidekiq workers), each process has its
48
+ own singleton — cross-process event delivery is not supported.
@@ -0,0 +1,51 @@
1
+ # ADR-004: invoke_timeout Is a Wait Timeout, Not a Cancellation Signal
2
+
3
+ ## Status
4
+
5
+ Accepted
6
+
7
+ ## Context
8
+
9
+ `Agent::Base` exposes `invoke_timeout N` as a class-level DSL. When an invocation
10
+ exceeds the timeout, `Phronomy::TimeoutError` is raised to the caller.
11
+
12
+ The question is: should the timeout also stop the agent's background work?
13
+
14
+ Ruby's `Timeout.timeout` / `Thread#kill` can interrupt a running thread, but
15
+ doing so is unsafe: it can leave mutexes locked, database connections in a broken
16
+ state, and external API calls mid-flight without cleanup. `Thread#raise` has the
17
+ same hazards because it can interrupt anywhere inside a `rescue`/`ensure` block.
18
+
19
+ Cooperative cancellation (checking a shared flag periodically) is safe but
20
+ requires every tool, every LLM call, and every framework-internal loop to
21
+ participate — a significant API surface change.
22
+
23
+ ## Decision
24
+
25
+ `invoke_timeout` is a **wait timeout only**. When the deadline is reached:
26
+
27
+ - `TimeoutError` is raised in the calling thread.
28
+ - The agent's background thread continues running until it either completes
29
+ normally or is garbage-collected when the process ends.
30
+ - No cancellation signal is sent to the agent.
31
+
32
+ This is explicitly documented in the README and in the DSL source.
33
+
34
+ A proper cooperative cancellation mechanism is tracked in Issue #216
35
+ (`CancellationToken`), which is a separate feature requiring agent, tool, and
36
+ transport layer participation.
37
+
38
+ ## Consequences
39
+
40
+ **Positive:**
41
+ - No risk of leaving shared resources (DB connections, mutexes, sockets) in a
42
+ broken state due to forced thread interruption.
43
+ - Implementation is simple: `Timeout.timeout` on the calling side only.
44
+ - The contract is explicit and predictable.
45
+
46
+ **Negative / Tradeoffs:**
47
+ - Background threads may continue consuming resources (LLM API quota, etc.)
48
+ after the caller has given up.
49
+ - Users who expect "cancel" semantics from a timeout will be surprised.
50
+ - Proper cancellation requires the `CancellationToken` feature (#216), which
51
+ has not yet been implemented.
@@ -0,0 +1,45 @@
1
+ # ADR-005: static_knowledge Chunks Are Cached at the Class Level
2
+
3
+ ## Status
4
+
5
+ Accepted
6
+
7
+ ## Context
8
+
9
+ `Agent::Base` supports `static_knowledge` — a list of `KnowledgeSource` objects
10
+ whose content is prepended to the system prompt on every invocation. Sources
11
+ declared as `static?` (e.g., `StaticKnowledge`) return content that never
12
+ changes at runtime.
13
+
14
+ Two caching strategies were considered:
15
+
16
+ 1. **Instance-level cache**: each agent instance fetches and caches static chunks
17
+ after first `invoke`.
18
+ 2. **Class-level cache**: static chunks are fetched once per class and shared
19
+ across all instances.
20
+
21
+ If agents are short-lived (created per-request), instance-level caching
22
+ provides no benefit — the cache is thrown away with the instance. A class-level
23
+ cache persists across request boundaries and is shared by all instances of the
24
+ same agent class, giving the same hit rate with less overhead.
25
+
26
+ ## Decision
27
+
28
+ `static_knowledge` chunks from sources that return `static? == true` are
29
+ memoized in a class-level instance variable (`@_static_knowledge_cache`).
30
+ Non-static sources (e.g., `EntityKnowledge`, `RAGKnowledge`) are always
31
+ re-fetched on each invocation because their content depends on runtime state.
32
+
33
+ ## Consequences
34
+
35
+ **Positive:**
36
+ - Eliminates redundant fetches for read-only knowledge bases in request-per-
37
+ agent patterns.
38
+ - Memory is shared: all instances of a class point to the same chunk array.
39
+
40
+ **Negative / Tradeoffs:**
41
+ - If the underlying file or data changes while the process is running, the cache
42
+ serves stale content. Applications that need live refresh must use a
43
+ non-static knowledge source.
44
+ - In tests, the cache must be cleared between examples. `Phronomy.reset_runtime!`
45
+ handles this.
@@ -0,0 +1,48 @@
1
+ # ADR-006: Built-in Guardrail Implementations Are Not Shipped
2
+
3
+ ## Status
4
+
5
+ Accepted
6
+
7
+ ## Context
8
+
9
+ Phronomy provides `Guardrail::InputGuardrail` and `Guardrail::OutputGuardrail`
10
+ as base classes. The question is whether to ship a library of built-in
11
+ implementations (e.g., prompt injection detector, PII scanner, toxic content
12
+ filter, word-count limit).
13
+
14
+ Arguments for built-ins:
15
+ - Lower barrier to entry; users get safety out of the box.
16
+ - Consistent quality baseline across applications.
17
+
18
+ Arguments against:
19
+ - Guardrail correctness is highly domain-specific. A PII pattern for US Social
20
+ Security Numbers is irrelevant to a Japanese-language application.
21
+ - Prompt injection patterns evolve rapidly; a built-in detector would require
22
+ frequent updates and could give false confidence.
23
+ - Shipping third-party detection libraries (NLP, regex banks) as hard
24
+ dependencies increases gem weight and potential supply-chain risk.
25
+ - The guardrail interface is intentionally minimal (`check(input)` / `fail!`).
26
+ Custom implementations are one-class affairs.
27
+
28
+ ## Decision
29
+
30
+ Phronomy ships no built-in guardrail implementations. The framework provides:
31
+
32
+ 1. `Guardrail::InputGuardrail` and `Guardrail::OutputGuardrail` base classes
33
+ with `check` and `fail!` hooks.
34
+ 2. Documentation and examples showing how to implement custom guardrails.
35
+
36
+ Users are responsible for implementing domain-specific guardrail logic.
37
+
38
+ ## Consequences
39
+
40
+ **Positive:**
41
+ - No false sense of security from a generic built-in that does not match the
42
+ application's actual threat model.
43
+ - Gem remains dependency-light.
44
+ - The interface is stable regardless of how the threat landscape evolves.
45
+
46
+ **Negative / Tradeoffs:**
47
+ - Users must implement their own guardrails from scratch. Providing a cookbook
48
+ of example patterns in the README partially mitigates this.
@@ -0,0 +1,51 @@
1
+ # ADR-007: MCP Tool Support Is Classified as Beta Stability
2
+
3
+ ## Status
4
+
5
+ Accepted
6
+
7
+ ## Context
8
+
9
+ Phronomy's `McpTool` integrates with the Model Context Protocol (MCP), allowing
10
+ agents to call tools exposed by external MCP servers over stdio or HTTP. The
11
+ protocol specification is still evolving (as of 2025), and the surface area of
12
+ the integration is large:
13
+
14
+ - Two transports: `StdioTransport` and `HttpTransport`
15
+ - JSON-RPC 2.0 framing, capability negotiation, tool listing, tool invocation
16
+ - Custom authentication headers, environment forwarding, startup timeouts
17
+
18
+ The phronomy README stability table uses three tiers: **Stable**, **Beta**, and
19
+ **Experimental**. The distinction matters because:
20
+
21
+ - **Stable** APIs are covered by the public API compatibility snapshot (#210)
22
+ and breaking changes require a major version bump.
23
+ - **Beta** APIs can change between minor versions with a CHANGELOG entry.
24
+ - **Experimental** APIs can change between patch versions without notice.
25
+
26
+ Classifying MCP as Stable would lock in the current API before the protocol and
27
+ integration have been exercised in production at scale. Classifying it as
28
+ Experimental would be too conservative — the API is intentionally designed and
29
+ documented.
30
+
31
+ ## Decision
32
+
33
+ `McpTool` and its transport classes (`StdioTransport`, `HttpTransport`) are
34
+ classified as **Beta** in the README stability table and in YARD documentation.
35
+
36
+ This signals:
37
+ - The interface is intentional and useful but may change as MCP specification
38
+ and real-world usage reveal gaps.
39
+ - Users should pin minor versions when using MCP in production.
40
+
41
+ ## Consequences
42
+
43
+ **Positive:**
44
+ - Honest representation of the API maturity.
45
+ - Allows breaking changes (e.g., richer error types, capability negotiation
46
+ changes) between minor versions without a major bump.
47
+
48
+ **Negative / Tradeoffs:**
49
+ - Some users may avoid a Beta-labeled feature in production. Documentation
50
+ should clarify that "Beta" reflects protocol evolution risk, not
51
+ implementation quality.
@@ -0,0 +1,52 @@
1
+ # ADR-008: Orchestrator Uses OS Threads for Parallel Dispatch
2
+
3
+ ## Status
4
+
5
+ Accepted
6
+
7
+ ## Context
8
+
9
+ `Agent::Orchestrator#dispatch_parallel` runs multiple sub-agent invocations
10
+ concurrently. The Ruby concurrency primitives available are:
11
+
12
+ 1. **OS threads** (`Thread`): true OS-level threads, subject to Ruby's GVL for
13
+ CPU-bound work, but I/O-bound work (LLM API calls, tool HTTP requests)
14
+ releases the GVL and runs in parallel.
15
+ 2. **Ractors**: actor-model isolation, no shared mutable state between Ractors.
16
+ True parallel for CPU-bound work but requires strict object isolation.
17
+ 3. **Fibers / async**: cooperative concurrency via Fiber scheduler (e.g.,
18
+ `async` gem). Non-blocking I/O without multiple threads.
19
+ 4. **`concurrent-ruby` thread pool**: managed pool of OS threads.
20
+
21
+ LLM calls and tool invocations are overwhelmingly I/O-bound (HTTP requests).
22
+ Under the GVL, OS threads are sufficient to achieve meaningful parallelism for
23
+ these workloads. Ractors require that all objects passed between them are
24
+ shareable, which is incompatible with RubyLLM's mutable chat objects and
25
+ `WorkflowContext` instances without significant refactoring.
26
+
27
+ Fibers require an async-compatible HTTP library stack throughout (RubyLLM,
28
+ Faraday, etc.), which is not guaranteed today.
29
+
30
+ ## Decision
31
+
32
+ `dispatch_parallel` spawns one OS thread per task using Ruby's `Thread.new`.
33
+ A `max_concurrency:` cap (default: unlimited) uses a `Mutex`-guarded counter to
34
+ limit the number of simultaneously active threads when specified.
35
+
36
+ ## Consequences
37
+
38
+ **Positive:**
39
+ - Transparent parallelism for I/O-bound LLM/tool calls with no dependency
40
+ changes.
41
+ - Compatible with all Ruby versions in the support matrix (3.2, 3.3, 3.4, head).
42
+ - Simple to reason about: each task is an independent thread; results are
43
+ collected in input order.
44
+
45
+ **Negative / Tradeoffs:**
46
+ - CPU-bound work inside agents does not benefit from true parallelism due to
47
+ the GVL. (In practice, agents are almost always I/O-bound.)
48
+ - Spawning many threads simultaneously (no `max_concurrency:`) can exhaust
49
+ system thread limits under high load. Users should set `max_concurrency:` for
50
+ large fan-outs.
51
+ - Ractor-based isolation (if ever needed for security sandboxing) would require
52
+ significant API changes to `WorkflowContext` and RubyLLM integration.
@@ -0,0 +1,141 @@
1
+ # ADR 009: StateStore Abstraction for Workflow Persistence
2
+
3
+ **Status**: Accepted
4
+ **Date**: 2025-01
5
+ **Issue**: [#250](https://github.com/Raizo-TCS/phronomy/issues/250)
6
+
7
+ ---
8
+
9
+ ## Context
10
+
11
+ `Phronomy::WorkflowContext` stores execution state in a plain Ruby object that lives only
12
+ in the process heap for the duration of a single `invoke` call. This is intentional for
13
+ simple, stateless pipelines but creates three gaps:
14
+
15
+ 1. **Process restart** destroys all in-flight workflow state. Long-running workflows (approval
16
+ pipelines, human-in-the-loop, async batch jobs) cannot survive a deploy or crash.
17
+ 2. **Multi-process fan-out** is impossible when all state is local. Horizontally-scaled
18
+ workers cannot hand off an in-progress workflow to a sibling process.
19
+ 3. **Debugging & auditability** — there is no canonical, queryable record of what a workflow
20
+ produced at each turn.
21
+
22
+ The only persistence mechanism that existed before this ADR was the caller manually
23
+ persisting the `WorkflowContext` object returned by `invoke` and passing it back to
24
+ `send_event`. This is fragile and couples the caller to Phronomy internals.
25
+
26
+ ---
27
+
28
+ ## Decision
29
+
30
+ Introduce a `StateStore` abstraction — a single, narrow interface that `WorkflowRunner`
31
+ uses for all state reads and writes. Callers opt in by passing a `state_store:` argument
32
+ to `Workflow.define` or by setting `Phronomy.configure { |c| c.state_store = … }`.
33
+
34
+ ### Interface
35
+
36
+ ```ruby
37
+ # lib/phronomy/state_store/base.rb
38
+ module Phronomy
39
+ module StateStore
40
+ class Base
41
+ def load(thread_id) = raise NotImplementedError # → Hash | nil
42
+ def save(thread_id, snapshot) = raise NotImplementedError # → void
43
+ def delete(thread_id) = raise NotImplementedError # → void
44
+ end
45
+ end
46
+ end
47
+ ```
48
+
49
+ A **snapshot** is a plain `Hash` with two keys:
50
+
51
+ | Key | Type | Description |
52
+ |-----|------|-------------|
53
+ | `:fields` | `Hash` | Output of `context.to_h` — user-defined field values |
54
+ | `:phase` | `String` | `context.phase.to_s` — last recorded execution phase |
55
+
56
+ ### Built-in backends
57
+
58
+ | Class | Dependency | Use case |
59
+ |-------|-----------|---------|
60
+ | `StateStore::InMemory` | none | Testing, single-process, default |
61
+ | `StateStore::SQLite` | `sqlite3` gem | Simple production durability (not in scope) |
62
+ | `StateStore::Redis` | `redis` gem | Multi-process, TTL support (not in scope) |
63
+
64
+ This ADR covers only the interface contract and `InMemory`. SQLite and Redis backends
65
+ are deferred to separate issues.
66
+
67
+ ### Integration with WorkflowRunner
68
+
69
+ `WorkflowRunner` resolves the store in the following priority order:
70
+
71
+ 1. `config[:state_store]` passed per-invocation
72
+ 2. `state_store:` kwarg passed to `WorkflowRunner#initialize`
73
+ 3. `Phronomy.configuration.state_store`
74
+ 4. `nil` → no persistence (current default behaviour)
75
+
76
+ On each `invoke` call that provides an explicit `thread_id`:
77
+
78
+ ```
79
+ invoke(input, config: { thread_id: "t1" })
80
+ → load snapshot for "t1" (if store present)
81
+ → merge stored fields with input (input overrides stored values)
82
+ → run workflow from entry_point
83
+ → save final context snapshot for "t1"
84
+ ```
85
+
86
+ On subsequent calls with the same `thread_id`, the stored fields are loaded as the
87
+ initial context. The `phase` field in the snapshot is recorded but `invoke` always
88
+ restarts from the entry point — the stored phase is informational. To resume a halted
89
+ workflow at a specific named event, callers use `send_event`.
90
+
91
+ ---
92
+
93
+ ## Consequences
94
+
95
+ ### Positive
96
+
97
+ - Pluggable backends: tests use `InMemory`; production uses `SQLite` or `Redis`.
98
+ - Zero-change opt-out: when no store is configured, `WorkflowRunner` behaviour is
99
+ identical to the pre-ADR implementation (no reads, no writes).
100
+ - `InMemory` replaces the implicit in-memory hash that previously lived only for the
101
+ duration of a single call: callers can now persist state across multiple `invoke`
102
+ calls in the same process without glue code.
103
+ - Shared RSpec examples (`"a state store"`) enforce the contract for all backends.
104
+
105
+ ### Negative / Trade-offs
106
+
107
+ - Snapshot serialization is the caller's responsibility for complex field types. Fields
108
+ that contain non-JSON-safe objects (e.g. `Proc`, `Symbol` keys) must serialize cleanly
109
+ via `to_h`. `InMemory` stores Ruby objects directly (no serialisation), so it does
110
+ not expose this issue during tests.
111
+ - `invoke` always re-runs the workflow from the entry point — loading stored state does
112
+ NOT automatically resume a halted wait_state. Callers who want resume-from-halt
113
+ semantics must call `send_event` explicitly (as before).
114
+ - The `:phase` field in the snapshot is currently informational (not used by `invoke`
115
+ to auto-resume). A future ADR may introduce auto-resume semantics.
116
+
117
+ ---
118
+
119
+ ## Alternatives Considered
120
+
121
+ ### A. Make InMemory the global default
122
+
123
+ Set `Phronomy.configure { c.state_store = InMemory.new }` automatically on boot.
124
+ Rejected: this would change the memory lifecycle of all workflows silently; existing
125
+ tests relying on ephemeral state would accumulate in the default store indefinitely,
126
+ and the store would become a slow memory leak in long-running processes.
127
+
128
+ ### B. Serialize snapshots to JSON at the InMemory layer
129
+
130
+ Round-trip through JSON in `InMemory#save` to catch serialisation issues early.
131
+ Rejected for this ADR: `InMemory` is intended as a zero-friction default for tests and
132
+ local development. JSON round-tripping would penalise symbol keys (a common pattern in
133
+ Ruby apps). The SQLite/Redis backends will enforce JSON round-tripping at their layer.
134
+
135
+ ### C. Auto-resume halted workflows in `invoke`
136
+
137
+ When `invoke` finds a stored snapshot with `phase != "__end__"`, automatically call
138
+ `send_event` with a synthetic `:resume` event.
139
+ Rejected: the correct event to fire depends on the wait_state's declared external events,
140
+ which may require named events (e.g. `:approve`, `:reject`). Inferring the right event
141
+ automatically is ambiguous and would hide programmer errors.