phronomy 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +22 -0
  3. data/CHANGELOG.md +488 -0
  4. data/CONTRIBUTING.md +102 -0
  5. data/README.md +374 -36
  6. data/RELEASE_CHECKLIST.md +86 -0
  7. data/Rakefile +33 -0
  8. data/SECURITY.md +80 -0
  9. data/benchmark/baseline.json +9 -0
  10. data/benchmark/bench_agent_invoke.rb +105 -0
  11. data/benchmark/bench_context_assembler.rb +46 -0
  12. data/benchmark/bench_regression.rb +172 -0
  13. data/benchmark/bench_token_estimator.rb +44 -0
  14. data/benchmark/bench_tool_schema.rb +69 -0
  15. data/benchmark/bench_vector_store.rb +39 -0
  16. data/benchmark/bench_workflow.rb +55 -0
  17. data/benchmark/run_all.rb +118 -0
  18. data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
  19. data/docs/decisions/002-workflow-context-immutability.md +42 -0
  20. data/docs/decisions/003-event-loop-singleton.md +48 -0
  21. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +75 -0
  22. data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
  23. data/docs/decisions/006-no-built-in-guardrails.md +66 -0
  24. data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
  25. data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
  26. data/docs/decisions/009-state-store-abstraction.md +141 -0
  27. data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
  28. data/lib/phronomy/agent/base.rb +416 -49
  29. data/lib/phronomy/agent/before_completion_context.rb +1 -0
  30. data/lib/phronomy/agent/checkpoint.rb +1 -0
  31. data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
  32. data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
  33. data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
  34. data/lib/phronomy/agent/concerns/retryable.rb +12 -1
  35. data/lib/phronomy/agent/concerns/suspendable.rb +19 -0
  36. data/lib/phronomy/agent/fsm.rb +44 -52
  37. data/lib/phronomy/agent/handoff.rb +3 -0
  38. data/lib/phronomy/agent/orchestrator.rb +191 -54
  39. data/lib/phronomy/agent/parallel_tool_chat.rb +87 -13
  40. data/lib/phronomy/agent/react_agent.rb +16 -6
  41. data/lib/phronomy/agent/runner.rb +2 -0
  42. data/lib/phronomy/agent/shared_state.rb +11 -0
  43. data/lib/phronomy/agent/suspend_signal.rb +2 -0
  44. data/lib/phronomy/agent/team_coordinator.rb +17 -5
  45. data/lib/phronomy/async_queue.rb +155 -0
  46. data/lib/phronomy/blocking_adapter_pool.rb +435 -0
  47. data/lib/phronomy/cancellation_scope.rb +123 -0
  48. data/lib/phronomy/cancellation_token.rb +133 -0
  49. data/lib/phronomy/concurrency_gate.rb +155 -0
  50. data/lib/phronomy/configuration.rb +168 -2
  51. data/lib/phronomy/context/assembler.rb +6 -0
  52. data/lib/phronomy/context/compaction_context.rb +2 -0
  53. data/lib/phronomy/context/context_version_cache.rb +2 -0
  54. data/lib/phronomy/context/token_budget.rb +3 -0
  55. data/lib/phronomy/context/token_estimator.rb +9 -2
  56. data/lib/phronomy/context/trigger_context.rb +1 -0
  57. data/lib/phronomy/context/trim_context.rb +4 -0
  58. data/lib/phronomy/deadline.rb +63 -0
  59. data/lib/phronomy/diagnostics.rb +62 -0
  60. data/lib/phronomy/embeddings/base.rb +22 -2
  61. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
  62. data/lib/phronomy/eval/comparison.rb +2 -0
  63. data/lib/phronomy/eval/dataset.rb +4 -0
  64. data/lib/phronomy/eval/metrics.rb +6 -0
  65. data/lib/phronomy/eval/runner.rb +11 -9
  66. data/lib/phronomy/eval/scorer/base.rb +1 -0
  67. data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
  68. data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
  69. data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
  70. data/lib/phronomy/event_loop.rb +275 -30
  71. data/lib/phronomy/fsm_session.rb +57 -4
  72. data/lib/phronomy/generator_verifier.rb +2 -0
  73. data/lib/phronomy/guardrail/base.rb +3 -0
  74. data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
  75. data/lib/phronomy/invocation_context.rb +152 -0
  76. data/lib/phronomy/knowledge_source/base.rb +24 -2
  77. data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
  78. data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
  79. data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
  80. data/lib/phronomy/llm_adapter/base.rb +104 -0
  81. data/lib/phronomy/llm_adapter/ruby_llm.rb +41 -0
  82. data/lib/phronomy/llm_adapter.rb +20 -0
  83. data/lib/phronomy/loader/base.rb +1 -0
  84. data/lib/phronomy/loader/csv_loader.rb +2 -0
  85. data/lib/phronomy/loader/markdown_loader.rb +2 -0
  86. data/lib/phronomy/loader/plain_text_loader.rb +1 -0
  87. data/lib/phronomy/metrics.rb +38 -0
  88. data/lib/phronomy/output_parser/base.rb +1 -0
  89. data/lib/phronomy/output_parser/json_parser.rb +22 -3
  90. data/lib/phronomy/output_parser/structured_parser.rb +2 -0
  91. data/lib/phronomy/prompt_template.rb +5 -0
  92. data/lib/phronomy/runnable.rb +20 -3
  93. data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
  94. data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
  95. data/lib/phronomy/runtime/gate_registry.rb +52 -0
  96. data/lib/phronomy/runtime/pool_registry.rb +57 -0
  97. data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
  98. data/lib/phronomy/runtime/scheduler.rb +98 -0
  99. data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
  100. data/lib/phronomy/runtime/task_registry.rb +48 -0
  101. data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
  102. data/lib/phronomy/runtime/timer_queue.rb +106 -0
  103. data/lib/phronomy/runtime/timer_service.rb +42 -0
  104. data/lib/phronomy/runtime.rb +374 -0
  105. data/lib/phronomy/splitter/base.rb +2 -0
  106. data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
  107. data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
  108. data/lib/phronomy/state_store/base.rb +48 -0
  109. data/lib/phronomy/state_store/in_memory.rb +62 -0
  110. data/lib/phronomy/task/backend.rb +80 -0
  111. data/lib/phronomy/task/fiber_backend.rb +157 -0
  112. data/lib/phronomy/task/immediate_backend.rb +89 -0
  113. data/lib/phronomy/task/thread_backend.rb +84 -0
  114. data/lib/phronomy/task.rb +275 -0
  115. data/lib/phronomy/task_group.rb +265 -0
  116. data/lib/phronomy/testing/fake_clock.rb +109 -0
  117. data/lib/phronomy/testing/fake_scheduler.rb +104 -0
  118. data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
  119. data/lib/phronomy/testing.rb +12 -0
  120. data/lib/phronomy/tool/agent_tool.rb +1 -0
  121. data/lib/phronomy/tool/base.rb +298 -28
  122. data/lib/phronomy/tool/mcp_tool.rb +103 -17
  123. data/lib/phronomy/tool/scope_policy.rb +50 -0
  124. data/lib/phronomy/tool_executor.rb +106 -0
  125. data/lib/phronomy/tracing/base.rb +3 -0
  126. data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
  127. data/lib/phronomy/tracing/open_telemetry_tracer.rb +36 -0
  128. data/lib/phronomy/vector_store/async_backend.rb +110 -0
  129. data/lib/phronomy/vector_store/base.rb +40 -7
  130. data/lib/phronomy/vector_store/in_memory.rb +16 -7
  131. data/lib/phronomy/vector_store/pgvector.rb +40 -9
  132. data/lib/phronomy/vector_store/redis_search.rb +29 -8
  133. data/lib/phronomy/version.rb +1 -1
  134. data/lib/phronomy/workflow.rb +147 -11
  135. data/lib/phronomy/workflow_context.rb +83 -6
  136. data/lib/phronomy/workflow_runner.rb +106 -7
  137. data/lib/phronomy.rb +112 -1
  138. data/scripts/api_snapshot.rb +91 -0
  139. data/scripts/check_api_annotations.rb +68 -0
  140. data/scripts/check_private_enforcement.rb +93 -0
  141. data/scripts/check_readme_runnable.rb +98 -0
  142. data/scripts/run_mutation.sh +46 -0
  143. metadata +83 -2
data/Rakefile CHANGED
@@ -7,4 +7,37 @@ RSpec::Core::RakeTask.new(:spec)
7
7
 
8
8
  require "standard/rake"
9
9
 
10
+ # Verify that @api private classes do not leak into the public YARD output.
11
+ # Any class or module without @api private that ends up in the public doc must
12
+ # have a corresponding entry in the Features table in README.md.
13
+ #
14
+ # Usage: bundle exec rake yard_check
15
+ desc "Build YARD docs excluding @api private items and check for undocumented public APIs"
16
+ task :yard_check do
17
+ require "yard"
18
+ YARD::Registry.clear
19
+ YARD.parse(Dir["lib/**/*.rb"])
20
+
21
+ undocumented = []
22
+ YARD::Registry.all(:class, :module).each do |obj|
23
+ next if obj.visibility == :private
24
+ next if obj.tag(:api)&.name == "private"
25
+ next if obj.docstring.blank?
26
+
27
+ # Classes/modules with no docstring that are not @api private are worth
28
+ # noting, but only raise on truly undocumented public objects.
29
+ if obj.docstring.empty?
30
+ undocumented << obj.path
31
+ end
32
+ end
33
+
34
+ unless undocumented.empty?
35
+ warn "The following public classes/modules have no YARD documentation:\n" \
36
+ " #{undocumented.join("\n ")}\n" \
37
+ "Either add a docstring or mark them @api private."
38
+ exit 1
39
+ end
40
+ puts "yard_check passed — no undocumented public classes/modules found."
41
+ end
42
+
10
43
  task default: %i[spec standard]
data/SECURITY.md ADDED
@@ -0,0 +1,80 @@
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ | Version | Supported |
6
+ |---------|-----------|
7
+ | Latest release (main branch) | ✅ |
8
+ | Older versions | ❌ — please upgrade |
9
+
10
+ Only the latest released version of `phronomy` receives security patches. If you
11
+ are running an older version, please upgrade before filing a report.
12
+
13
+ ---
14
+
15
+ ## Reporting a Vulnerability
16
+
17
+ **Please do NOT open a public GitHub Issue for security vulnerabilities.**
18
+
19
+ Use [GitHub's private vulnerability reporting](https://docs.github.com/en/code-security/security-advisories/guidance-on-reporting-and-writing/privately-reporting-a-security-vulnerability)
20
+ instead:
21
+
22
+ 1. Navigate to the [Security tab](https://github.com/Raizo-TCS/phronomy/security)
23
+ of this repository.
24
+ 2. Click **"Report a vulnerability"**.
25
+ 3. Fill in the advisory form with as much detail as possible.
26
+
27
+ This creates a private draft advisory visible only to maintainers, keeping the
28
+ details confidential until a fix is prepared and released.
29
+
30
+ ---
31
+
32
+ ## Response Timeline
33
+
34
+ | Milestone | Target |
35
+ |-----------|--------|
36
+ | Acknowledgement of report | Within **7 days** |
37
+ | Triage and initial assessment | Within **14 days** |
38
+ | Patch release (critical / high severity) | Within **30 days** |
39
+ | Patch release (medium / low severity) | Best effort; typically within **60 days** |
40
+
41
+ If you do not receive an acknowledgement within 7 days, please follow up by
42
+ opening a **public** Issue with the subject "Security report follow-up (no
43
+ response)" — do **not** include vulnerability details in the public Issue.
44
+
45
+ ---
46
+
47
+ ## Scope
48
+
49
+ **In scope:**
50
+
51
+ - Vulnerabilities in the `phronomy` gem source code (`lib/`, `spec/`).
52
+ - Dependency vulnerabilities that affect gem consumers when `phronomy` is used as intended.
53
+ - Information disclosure via tracing/logging APIs (e.g. `trace_pii: false` bypass).
54
+ - Approval gate bypasses (tool execution without the registered approval handler).
55
+
56
+ **Out of scope:**
57
+
58
+ - Security of consumer applications built on top of `phronomy`.
59
+ - Vulnerabilities in the LLM provider (OpenAI, Anthropic, etc.) or in `ruby_llm`.
60
+ - Attacks that require an attacker to already have write access to the host system.
61
+ - Prompt injection via LLM output — the gem forwards LLM output faithfully; prompt
62
+ injection resistance is the responsibility of the LLM provider and the application.
63
+
64
+ ---
65
+
66
+ ## Disclosure Policy
67
+
68
+ - Maintainers will coordinate with you on the release date and credit you in the
69
+ `CHANGELOG.md` entry and GitHub release notes.
70
+ - If you wish to remain anonymous, let us know in the advisory.
71
+ - We follow a **coordinated disclosure** model: the advisory will be made public
72
+ after a patch is released (or after 90 days, whichever comes first).
73
+
74
+ ---
75
+
76
+ ## Credit
77
+
78
+ Security reporters are credited in the `CHANGELOG.md` entry for the patch release,
79
+ in the GitHub Security Advisory, and in the release notes — unless they request
80
+ anonymity.
@@ -0,0 +1,9 @@
1
+ {
2
+ "workflow_context_merge": 124364.81010472385,
3
+ "workflow_define": 2179.945274115319,
4
+ "tool_params_schema_definition": 19534379.159046534,
5
+ "dispatch_parallel_10": 886.0,
6
+ "cancellation_token_cancelled": 4335060.97443425,
7
+ "cancellation_token_raise_if_cancelled_noop": 3566903.189098373,
8
+ "trim_context_remove_2000": 1761.5700678986254
9
+ }
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ # bench_agent_invoke.rb — Agent#invoke framework overhead benchmark.
4
+ #
5
+ # Measures the per-invoke cost of the Phronomy::Agent::Base framework path
6
+ # (context assembly, guardrail checks, before_completion hooks, response
7
+ # handling) with a fully stubbed LLM. No network calls are made.
8
+ #
9
+ # Scenarios:
10
+ # 1. Minimal agent (no tools, no knowledge) — baseline framework overhead.
11
+ # 2. Tool-aware agent with max_parallel_tools=4 (4 stub tools per turn).
12
+ # 3. Agent#stream setup latency (first-chunk time with stubbed stream).
13
+
14
+ require "benchmark"
15
+ require_relative "../lib/phronomy"
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Shared stubs
19
+ # ---------------------------------------------------------------------------
20
+
21
+ BenchAgentMessage = Struct.new(:role, :content, :tool_calls, :tokens) do
22
+ def self.assistant(content = "done")
23
+ new(:assistant, content, nil,
24
+ Struct.new(:input, :output, :cached, :cache_creation).new(5, 5, 0, 0))
25
+ end
26
+ end
27
+
28
+ # A minimal stub Chat that returns a pre-built response immediately.
29
+ class BenchStubChat
30
+ attr_reader :messages
31
+
32
+ def initialize(response)
33
+ @response = response
34
+ @messages = []
35
+ end
36
+
37
+ def with_instructions(_) = self
38
+ def with_tool(_) = self
39
+ def with_temperature(_) = self
40
+ def with_cache_instructions(_) = self
41
+ def with_output_schema(_) = self
42
+ def last_message = @response
43
+
44
+ def ask(_)
45
+ @messages << @response
46
+ @response
47
+ end
48
+
49
+ def stream(*)
50
+ yield @response.content if block_given?
51
+ @response
52
+ end
53
+ end
54
+
55
+ # A stub tool that does nothing but conforms to the Tool::Base interface.
56
+ class BenchNullTool < Phronomy::Tool::Base
57
+ description "No-op benchmark tool"
58
+ param :x, type: :string, desc: "input"
59
+
60
+ def execute(x:)
61
+ "result:#{x}"
62
+ end
63
+ end
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Agent classes
67
+ # ---------------------------------------------------------------------------
68
+
69
+ BENCH_RESP = BenchAgentMessage.assistant("benchmark complete")
70
+ BENCH_RESP_CHAT = BenchStubChat.new(BENCH_RESP)
71
+
72
+ bench_minimal_class = Class.new(Phronomy::Agent::Base) do
73
+ model "stub-model"
74
+
75
+ define_method(:build_chat) { |*| BenchStubChat.new(BENCH_RESP) }
76
+ end
77
+
78
+ bench_tool_class = Class.new(Phronomy::Agent::Base) do
79
+ model "stub-model"
80
+ tools BenchNullTool
81
+ max_parallel_tools 4
82
+
83
+ define_method(:build_chat) { |*| BenchStubChat.new(BENCH_RESP) }
84
+ end
85
+
86
+ BENCH_AGENT_MINIMAL = bench_minimal_class.new
87
+ BENCH_AGENT_TOOLS = bench_tool_class.new
88
+
89
+ AGENT_INVOKE_ITERATIONS = 200
90
+
91
+ puts "=== bench_agent_invoke ==="
92
+ Benchmark.bm(50) do |x|
93
+ x.report("Agent#invoke — minimal (no tools), #{AGENT_INVOKE_ITERATIONS} iters") do
94
+ AGENT_INVOKE_ITERATIONS.times do
95
+ BENCH_AGENT_MINIMAL.invoke("ping", thread_id: "bench-#{rand(1_000_000)}")
96
+ end
97
+ end
98
+
99
+ x.report("Agent#invoke — 4 parallel stub tools, #{AGENT_INVOKE_ITERATIONS} iters") do
100
+ AGENT_INVOKE_ITERATIONS.times do
101
+ BENCH_AGENT_TOOLS.invoke("ping", thread_id: "bench-#{rand(1_000_000)}")
102
+ end
103
+ end
104
+ end
105
+ puts
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Benchmark: Context::Assembler#build
4
+ #
5
+ # Tests context assembly performance for varying numbers of messages and
6
+ # knowledge chunks. This path is exercised on every agent turn.
7
+
8
+ require "benchmark"
9
+ require_relative "../lib/phronomy"
10
+
11
+ BenchAsmMessage = Struct.new(:content)
12
+
13
+ def make_assembler(n_messages:, n_chunks:, with_budget: false)
14
+ budget = if with_budget
15
+ Phronomy::Context::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
16
+ end
17
+ asm = Phronomy::Context::Assembler.new(budget: budget)
18
+ asm.add_instruction("You are a helpful assistant. Answer the user's question.")
19
+ n_chunks.times do |i|
20
+ asm.add_knowledge("Fact #{i}: The capital of country #{i} is City #{i}.", type: :entity, trusted: true)
21
+ end
22
+ msgs = Array.new(n_messages) { BenchAsmMessage.new("This is a conversation message.") }
23
+ asm.add_messages(msgs)
24
+ asm
25
+ end
26
+
27
+ BENCH_ASM_ITERATIONS = 1_000
28
+
29
+ puts "=== bench_context_assembler ==="
30
+ Benchmark.bm(40) do |x|
31
+ x.report("build(10 msgs, 0 chunks)") do
32
+ BENCH_ASM_ITERATIONS.times { make_assembler(n_messages: 10, n_chunks: 0).build }
33
+ end
34
+
35
+ x.report("build(100 msgs, 5 chunks)") do
36
+ BENCH_ASM_ITERATIONS.times { make_assembler(n_messages: 100, n_chunks: 5).build }
37
+ end
38
+
39
+ x.report("build(1000 msgs, 10 chunks, no budget)") do
40
+ (BENCH_ASM_ITERATIONS / 10).times { make_assembler(n_messages: 1000, n_chunks: 10).build }
41
+ end
42
+
43
+ x.report("build(1000 msgs, 10 chunks, budgeted)") do
44
+ (BENCH_ASM_ITERATIONS / 10).times { make_assembler(n_messages: 1000, n_chunks: 10, with_budget: true).build }
45
+ end
46
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ # bench_regression.rb — Targeted regression benchmarks.
4
+ #
5
+ # Measures the five minimum regression targets defined in Issue #232:
6
+ # 1. WorkflowContext#merge throughput
7
+ # 2. Workflow.define (graph build) time
8
+ # 3. Tool::Base#params_schema generation (10 params)
9
+ # 4. Orchestrator#dispatch_parallel overhead (10 stub agents, no LLM)
10
+ # 5. CancellationToken#cancelled? throughput (shared token, 8 threads)
11
+ #
12
+ # Results are stored in a global REGRESSION_RESULTS hash (keyed by metric name,
13
+ # value = iterations per second) for use by run_all.rb baseline comparison.
14
+
15
+ require "benchmark"
16
+ require_relative "../lib/phronomy"
17
+
18
+ REGRESSION_ITERATIONS = 5_000
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Target 1: WorkflowContext#merge throughput
22
+ # ---------------------------------------------------------------------------
23
+ context_class = Class.new do
24
+ include Phronomy::WorkflowContext
25
+
26
+ field :value, type: :replace, default: -> { 0 }
27
+ field :log, type: :append, default: -> { [] }
28
+ end
29
+
30
+ sample_ctx = context_class.new(value: 42, log: ["a"])
31
+
32
+ t1 = Benchmark.measure("WorkflowContext#merge") do
33
+ REGRESSION_ITERATIONS.times { sample_ctx.merge(value: 99, log: "b") }
34
+ end
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Target 2: Workflow.define graph build time
38
+ # ---------------------------------------------------------------------------
39
+ BUILD_ITERATIONS = 1_000
40
+
41
+ t2 = Benchmark.measure("Workflow.define (5 states)") do
42
+ BUILD_ITERATIONS.times do
43
+ build_ctx = Class.new do
44
+ include Phronomy::WorkflowContext
45
+
46
+ field :x, type: :replace, default: -> { 0 }
47
+ end
48
+ Phronomy::Workflow.define(build_ctx) do
49
+ initial :a
50
+ %i[a b c d].each_with_index do |state, i|
51
+ next_state = %i[a b c d e][i + 1]
52
+ action = ->(s) { s.merge(x: s.x + 1) }
53
+ self.state state, action: action
54
+ transition from: state, to: next_state
55
+ end
56
+ self.state :e, action: ->(s) { s }
57
+ transition from: :e, to: :__finish__
58
+ end
59
+ end
60
+ end
61
+
62
+ # ---------------------------------------------------------------------------
63
+ # Target 3: Tool::Base#params_schema generation (10 params)
64
+ # ---------------------------------------------------------------------------
65
+ tool_class = Class.new(Phronomy::Tool::Base) do
66
+ description "Test tool with 10 params"
67
+ param :p1, type: :string, desc: "param 1"
68
+ param :p2, type: :string, desc: "param 2"
69
+ param :p3, type: :string, desc: "param 3"
70
+ param :p4, type: :string, desc: "param 4"
71
+ param :p5, type: :string, desc: "param 5"
72
+ param :p6, type: :string, desc: "param 6"
73
+ param :p7, type: :string, desc: "param 7"
74
+ param :p8, type: :string, desc: "param 8"
75
+ param :p9, type: :string, desc: "param 9"
76
+ param :p10, type: :string, desc: "param 10"
77
+
78
+ def execute(**_kwargs)
79
+ "ok"
80
+ end
81
+ end
82
+
83
+ t3 = Benchmark.measure("Tool::Base#params_schema_definition (10 params)") do
84
+ REGRESSION_ITERATIONS.times { tool_class.params_schema_definition }
85
+ end
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Target 4: Orchestrator#dispatch_parallel overhead (10 stub agents, no LLM)
89
+ # ---------------------------------------------------------------------------
90
+ stub_agent_class = Class.new(Phronomy::Agent::Base) do
91
+ define_method(:invoke) do |_input, messages: [], thread_id: nil, config: {}|
92
+ {output: "stub", messages: []}
93
+ end
94
+ define_method(:invoke_async) { |input, **_kw| Phronomy::Runtime.instance.spawn(name: "bench-stub") { invoke(input) } }
95
+ end
96
+
97
+ orchestrator_class = Class.new(Phronomy::Agent::Orchestrator)
98
+ orchestrator = orchestrator_class.new
99
+
100
+ PARALLEL_ITERATIONS = 200
101
+
102
+ t4 = Benchmark.measure("Orchestrator#dispatch_parallel (10 agents)") do
103
+ PARALLEL_ITERATIONS.times do
104
+ tasks = Array.new(10) { {agent: stub_agent_class, input: "x"} }
105
+ orchestrator.dispatch_parallel(*tasks)
106
+ end
107
+ end
108
+
109
+ # ---------------------------------------------------------------------------
110
+ # Target 5: CancellationToken#cancelled? throughput (8 threads)
111
+ # ---------------------------------------------------------------------------
112
+ CANCEL_TOKEN = Phronomy::CancellationToken.new
113
+ CANCEL_ITERATIONS = 10_000
114
+
115
+ t5 = Benchmark.measure("CancellationToken#cancelled? (8 threads)") do
116
+ threads = 8.times.map do
117
+ Thread.new { CANCEL_ITERATIONS.times { CANCEL_TOKEN.cancelled? } }
118
+ end
119
+ threads.each(&:join)
120
+ end
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # Target 6: CancellationToken#raise_if_cancelled! hot path (no-op, single thread)
124
+ # ---------------------------------------------------------------------------
125
+ RAISE_TOKEN = Phronomy::CancellationToken.new # not cancelled — no-op path
126
+ RAISE_ITERATIONS = 200_000
127
+
128
+ t6 = Benchmark.measure("CancellationToken#raise_if_cancelled! (no-op)") do
129
+ RAISE_ITERATIONS.times { RAISE_TOKEN.raise_if_cancelled! }
130
+ end
131
+
132
+ # ---------------------------------------------------------------------------
133
+ # Target 7: Context::TrimContext#remove on a 2000-element history
134
+ # ---------------------------------------------------------------------------
135
+ BenchMsg = Struct.new(:content) unless defined?(BenchMsg)
136
+
137
+ TRIM_ELEMENTS = Array.new(2_000) { |i| {seq: i, message: BenchMsg.new("msg #{i}"), tokens: 10, role: :user} }
138
+ TRIM_BUDGET = Phronomy::Context::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
139
+ TRIM_ITERATIONS = 500
140
+
141
+ t7 = Benchmark.measure("TrimContext#remove (2000-element history)") do
142
+ TRIM_ITERATIONS.times do
143
+ tc = Phronomy::Context::TrimContext.new(message_elements: TRIM_ELEMENTS, budget: TRIM_BUDGET)
144
+ tc.remove((0...200).to_a) # remove 200 oldest messages
145
+ end
146
+ end
147
+
148
+ # ---------------------------------------------------------------------------
149
+ # Print results and store in REGRESSION_RESULTS
150
+ # ---------------------------------------------------------------------------
151
+ puts "=== bench_regression ==="
152
+ printf("%-46s %8s %12s\n", "Metric", "Real (s)", "Iter/s")
153
+ puts "-" * 70
154
+
155
+ metrics = {
156
+ "workflow_context_merge" => [t1, REGRESSION_ITERATIONS],
157
+ "workflow_define" => [t2, BUILD_ITERATIONS],
158
+ "tool_params_schema_definition" => [t3, REGRESSION_ITERATIONS],
159
+ "dispatch_parallel_10" => [t4, PARALLEL_ITERATIONS],
160
+ "cancellation_token_cancelled" => [t5, 8 * CANCEL_ITERATIONS],
161
+ "cancellation_token_raise_if_cancelled_noop" => [t6, RAISE_ITERATIONS],
162
+ "trim_context_remove_2000" => [t7, TRIM_ITERATIONS]
163
+ }
164
+
165
+ REGRESSION_RESULTS = {} # rubocop:disable Style/MutableConstant
166
+
167
+ metrics.each do |key, (measure, iters)|
168
+ ips = iters / measure.real
169
+ REGRESSION_RESULTS[key] = ips
170
+ printf("%-46s %8.3f %12.0f\n", key, measure.real, ips)
171
+ end
172
+ puts
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Benchmark: Context::TokenEstimator.estimate
4
+ #
5
+ # Tests estimation speed for short, medium, and long text inputs, and for
6
+ # Arrays of message-like objects. This method is called on every message in
7
+ # every agent turn, so it must be consistently fast.
8
+
9
+ require "benchmark"
10
+ require_relative "../lib/phronomy"
11
+
12
+ SHORT_TEXT = "Hello, how are you today?"
13
+ MEDIUM_TEXT = "A" * 500
14
+ LONG_TEXT = "A" * 10_000
15
+
16
+ BenchMessage = Struct.new(:content)
17
+
18
+ MESSAGES_100 = Array.new(100) { BenchMessage.new("A" * 100) }
19
+ MESSAGES_1000 = Array.new(1000) { BenchMessage.new("A" * 100) }
20
+
21
+ BENCH_TOKEN_ITERATIONS = 10_000
22
+
23
+ puts "=== bench_token_estimator ==="
24
+ Benchmark.bm(30) do |x|
25
+ x.report("estimate(short text)") do
26
+ BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(SHORT_TEXT) }
27
+ end
28
+
29
+ x.report("estimate(medium text 500c)") do
30
+ BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(MEDIUM_TEXT) }
31
+ end
32
+
33
+ x.report("estimate(long text 10k c)") do
34
+ BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(LONG_TEXT) }
35
+ end
36
+
37
+ x.report("estimate(100 messages)") do
38
+ BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(MESSAGES_100) }
39
+ end
40
+
41
+ x.report("estimate(1000 messages)") do
42
+ (BENCH_TOKEN_ITERATIONS / 10).times { Phronomy::Context::TokenEstimator.estimate(MESSAGES_1000) }
43
+ end
44
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Benchmark: Tool::Base params_schema generation and static_knowledge_chunks cache
4
+ #
5
+ # Tool schema generation happens once per tool class (lazily memoised).
6
+ # static_knowledge_chunks is cached at the class level; cache-hit overhead
7
+ # should be negligible compared to cache-miss (which calls the knowledge source).
8
+
9
+ require "benchmark"
10
+ require_relative "../lib/phronomy"
11
+
12
+ # --- Tool schema ---
13
+
14
+ class BenchTool10Params < Phronomy::Tool::Base
15
+ description "A tool with 10 parameters for benchmarking purposes"
16
+ param :param1, type: :string, desc: "First parameter"
17
+ param :param2, type: :integer, desc: "Second parameter"
18
+ param :param3, type: :number, desc: "Third parameter"
19
+ param :param4, type: :boolean, desc: "Fourth parameter"
20
+ param :param5, type: :string, desc: "Fifth parameter"
21
+ param :param6, type: :string, desc: "Sixth parameter", required: false
22
+ param :param7, type: :integer, desc: "Seventh parameter", required: false
23
+ param :param8, type: :string, desc: "Eighth parameter", required: false
24
+ param :param9, type: :string, desc: "Ninth parameter", required: false
25
+ param :param10, type: :string, desc: "Tenth parameter", required: false
26
+
27
+ def execute(**_)
28
+ "ok"
29
+ end
30
+ end
31
+
32
+ # Warm up memoisation
33
+ BenchTool10Params.params_schema_definition
34
+
35
+ BENCH_TOOL_ITERATIONS = 50_000
36
+
37
+ puts "=== bench_tool_schema ==="
38
+ Benchmark.bm(35) do |x|
39
+ x.report("params_schema_definition (memoised, 10p)") do
40
+ BENCH_TOOL_ITERATIONS.times { BenchTool10Params.params_schema_definition }
41
+ end
42
+ end
43
+
44
+ # --- static_knowledge_chunks cache ---
45
+
46
+ class BenchKnowledgeSource < Phronomy::KnowledgeSource::Base
47
+ def fetch(query: nil)
48
+ [{content: "Cached knowledge fact.", type: :static}]
49
+ end
50
+
51
+ def static?
52
+ true
53
+ end
54
+ end
55
+
56
+ class BenchAgentWithKnowledge < Phronomy::Agent::Base
57
+ model "gpt-4o-mini"
58
+ static_knowledge BenchKnowledgeSource.new
59
+ end
60
+
61
+ # Warm up cache
62
+ BenchAgentWithKnowledge.static_knowledge_chunks
63
+
64
+ puts "\n=== bench_static_knowledge_cache ==="
65
+ Benchmark.bm(35) do |x|
66
+ x.report("static_knowledge_chunks (hit)") do
67
+ BENCH_TOOL_ITERATIONS.times { BenchAgentWithKnowledge.static_knowledge_chunks }
68
+ end
69
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Benchmark: VectorStore::InMemory#search
4
+ #
5
+ # Tests search performance at different corpus sizes (100, 1000, 10_000 docs).
6
+ # Linear scan is expected; this benchmark establishes the scaling baseline.
7
+
8
+ require "benchmark"
9
+ require_relative "../lib/phronomy"
10
+
11
+ DIM = 64
12
+
13
+ def random_embedding(dim)
14
+ Array.new(dim) { rand(-1.0..1.0) }
15
+ end
16
+
17
+ def populate(store, n)
18
+ n.times do |i|
19
+ store.add(id: "doc#{i}", embedding: random_embedding(DIM), metadata: {text: "Document #{i}"})
20
+ end
21
+ end
22
+
23
+ QUERY = random_embedding(DIM)
24
+
25
+ # Use fewer iterations for larger corpora to keep total run time reasonable.
26
+ BENCH_VS_ITERS = {100 => 100, 1_000 => 20, 10_000 => 5}.freeze
27
+
28
+ puts "=== bench_vector_store_inmemory ==="
29
+ Benchmark.bm(35) do |x|
30
+ [100, 1_000, 10_000].each do |n|
31
+ store = Phronomy::VectorStore::InMemory.new(dimension: DIM)
32
+ populate(store, n)
33
+ iters = BENCH_VS_ITERS[n]
34
+
35
+ x.report("search(k=5, corpus=#{n}, iters=#{iters})") do
36
+ iters.times { store.search(query_embedding: QUERY, k: 5) }
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Benchmark: Workflow transition loop
4
+ #
5
+ # Builds a linear chain of N states and measures how long it takes to run
6
+ # the full workflow to completion. 100 transitions must complete in <10ms.
7
+
8
+ require "benchmark"
9
+ require_relative "../lib/phronomy"
10
+
11
+ # Build a linear workflow: state_0 -> state_1 -> ... -> state_(N-1) -> __finish__
12
+ def build_linear_workflow(n)
13
+ context_class = Class.new do
14
+ include Phronomy::WorkflowContext
15
+
16
+ field :count, type: :replace, default: -> { 0 }
17
+ end
18
+
19
+ Phronomy::Workflow.define(context_class) do
20
+ initial :state_0
21
+
22
+ n.times do |i|
23
+ state :"state_#{i}", action: ->(s) { s.merge(count: s.count + 1) }
24
+ transition from: :"state_#{i}", to: (i + 1 < n) ? :"state_#{i + 1}" : :__finish__
25
+ end
26
+ end
27
+ end
28
+
29
+ BENCH_WF_ITERATIONS = 50
30
+
31
+ puts "=== bench_workflow_transition ==="
32
+ Benchmark.bm(30) do |x|
33
+ [10, 50, 100].each do |n|
34
+ app = build_linear_workflow(n)
35
+ cfg = {recursion_limit: n + 5}
36
+
37
+ x.report("#{n} transitions") do
38
+ BENCH_WF_ITERATIONS.times { app.invoke({}, config: cfg) }
39
+ end
40
+ end
41
+ end
42
+
43
+ # Threshold assertion: 100 transitions should complete in <10ms on average
44
+ puts "\nThreshold check: 100 transitions < 10ms average..."
45
+ app100 = build_linear_workflow(100)
46
+ cfg100 = {recursion_limit: 110}
47
+ samples = 20
48
+ elapsed = Benchmark.realtime { samples.times { app100.invoke({}, config: cfg100) } }
49
+ avg_ms = (elapsed / samples) * 1000.0
50
+ puts " Average: #{"%.2f" % avg_ms}ms per run"
51
+ if avg_ms < 10.0
52
+ puts " PASS (< 10ms)"
53
+ else
54
+ warn " WARN: #{avg_ms.round(2)}ms exceeds 10ms threshold (environment may be slow)"
55
+ end