RubyGems - phronomy - Versions diffs - 0.6.0 → 0.7.0 - Mend

phronomy 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

checksums.yaml +4 -4
data/.mutant.yml +21 -0
data/CHANGELOG.md +338 -0
data/CONTRIBUTING.md +102 -0
data/README.md +242 -27
data/RELEASE_CHECKLIST.md +86 -0
data/SECURITY.md +80 -0
data/benchmark/baseline.json +9 -0
data/benchmark/bench_agent_invoke.rb +105 -0
data/benchmark/bench_context_assembler.rb +46 -0
data/benchmark/bench_regression.rb +171 -0
data/benchmark/bench_token_estimator.rb +44 -0
data/benchmark/bench_tool_schema.rb +69 -0
data/benchmark/bench_vector_store.rb +39 -0
data/benchmark/bench_workflow.rb +55 -0
data/benchmark/run_all.rb +118 -0
data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
data/docs/decisions/002-workflow-context-immutability.md +42 -0
data/docs/decisions/003-event-loop-singleton.md +48 -0
data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +51 -0
data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
data/docs/decisions/006-no-built-in-guardrails.md +48 -0
data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
data/docs/decisions/009-state-store-abstraction.md +141 -0
data/lib/phronomy/agent/base.rb +194 -12
data/lib/phronomy/agent/before_completion_context.rb +1 -0
data/lib/phronomy/agent/checkpoint.rb +1 -0
data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
data/lib/phronomy/agent/concerns/retryable.rb +12 -1
data/lib/phronomy/agent/concerns/suspendable.rb +4 -0
data/lib/phronomy/agent/fsm.rb +15 -0
data/lib/phronomy/agent/handoff.rb +3 -0
data/lib/phronomy/agent/orchestrator.rb +123 -11
data/lib/phronomy/agent/parallel_tool_chat.rb +21 -4
data/lib/phronomy/agent/react_agent.rb +8 -6
data/lib/phronomy/agent/runner.rb +2 -0
data/lib/phronomy/agent/shared_state.rb +11 -0
data/lib/phronomy/agent/suspend_signal.rb +2 -0
data/lib/phronomy/agent/team_coordinator.rb +17 -5
data/lib/phronomy/cancellation_token.rb +92 -0
data/lib/phronomy/configuration.rb +26 -2
data/lib/phronomy/context/assembler.rb +6 -0
data/lib/phronomy/context/compaction_context.rb +2 -0
data/lib/phronomy/context/context_version_cache.rb +2 -0
data/lib/phronomy/context/token_budget.rb +3 -0
data/lib/phronomy/context/token_estimator.rb +9 -2
data/lib/phronomy/context/trigger_context.rb +1 -0
data/lib/phronomy/context/trim_context.rb +4 -0
data/lib/phronomy/embeddings/base.rb +5 -2
data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
data/lib/phronomy/eval/comparison.rb +2 -0
data/lib/phronomy/eval/dataset.rb +4 -0
data/lib/phronomy/eval/metrics.rb +6 -0
data/lib/phronomy/eval/runner.rb +2 -0
data/lib/phronomy/eval/scorer/base.rb +1 -0
data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
data/lib/phronomy/event_loop.rb +114 -7
data/lib/phronomy/fsm_session.rb +8 -1
data/lib/phronomy/generator_verifier.rb +2 -0
data/lib/phronomy/guardrail/base.rb +3 -0
data/lib/phronomy/knowledge_source/base.rb +6 -2
data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
data/lib/phronomy/loader/base.rb +1 -0
data/lib/phronomy/loader/csv_loader.rb +2 -0
data/lib/phronomy/loader/markdown_loader.rb +2 -0
data/lib/phronomy/loader/plain_text_loader.rb +1 -0
data/lib/phronomy/output_parser/base.rb +1 -0
data/lib/phronomy/output_parser/json_parser.rb +22 -3
data/lib/phronomy/output_parser/structured_parser.rb +2 -0
data/lib/phronomy/prompt_template.rb +5 -0
data/lib/phronomy/runnable.rb +20 -3
data/lib/phronomy/splitter/base.rb +2 -0
data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
data/lib/phronomy/state_store/base.rb +48 -0
data/lib/phronomy/state_store/in_memory.rb +62 -0
data/lib/phronomy/tool/agent_tool.rb +1 -0
data/lib/phronomy/tool/base.rb +189 -27
data/lib/phronomy/tool/mcp_tool.rb +68 -13
data/lib/phronomy/tracing/base.rb +3 -0
data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
data/lib/phronomy/tracing/open_telemetry_tracer.rb +2 -0
data/lib/phronomy/vector_store/base.rb +33 -7
data/lib/phronomy/vector_store/in_memory.rb +16 -7
data/lib/phronomy/vector_store/pgvector.rb +40 -9
data/lib/phronomy/vector_store/redis_search.rb +29 -8
data/lib/phronomy/version.rb +1 -1
data/lib/phronomy/workflow.rb +96 -7
data/lib/phronomy/workflow_context.rb +54 -4
data/lib/phronomy/workflow_runner.rb +35 -7
data/lib/phronomy.rb +70 -1
data/scripts/api_snapshot.rb +91 -0
data/scripts/check_api_annotations.rb +68 -0
data/scripts/check_private_enforcement.rb +93 -0
data/scripts/check_readme_runnable.rb +98 -0
data/scripts/run_mutation.sh +46 -0
metadata +45 -2

data/lib/phronomy/workflow_runner.rb CHANGED Viewed

@@ -17,8 +17,11 @@ module Phronomy
   # determined by the declared state machine topology, never by Phronomy internals.
   #
   # Entry and exit actions are registered as state_machines +after_transition to:+
-  # and +before_transition from:+ callbacks respectively. The WorkflowContext is
-  # mutable; actions receive it and modify fields in place.
+  # and +before_transition from:+ callbacks respectively. Entry actions may either
+  # mutate the context in place or return a new context (e.g. via +s.merge(...)+).
+  # When an entry action returns a Phronomy::WorkflowContext, that value replaces
+  # the current context; otherwise the return value is ignored.
+  # Exit actions are always mutation-in-place; their return value is ignored.
   #
   # The sole exception is the initial state: state_machines does not fire transition
   # callbacks on initialization, so the entry action for the entry point is invoked
@@ -35,13 +38,14 @@ module Phronomy
   #   2. <event_name>    — external events triggered by human input, originating
   #                        from wait states
   #                        (declared with +transition from: :awaiting, on: :approve, to: :run+)
+  # @api private
   class WorkflowRunner
     include Phronomy::Runnable
     # Sentinel value for the terminal state of a workflow.
     FINISH = :__end__
-    def initialize(state_class:, entry_actions:, declared_states:, auto_transitions:, external_events:, entry_point:, exit_actions: {}, wait_state_names: [])
+    def initialize(state_class:, entry_actions:, declared_states:, auto_transitions:, external_events:, entry_point:, exit_actions: {}, wait_state_names: [], state_store: nil)
       @state_class = state_class
       @entry_actions = entry_actions   # { state_name => [callable, ...] }
       @declared_states = declared_states
@@ -50,13 +54,15 @@ module Phronomy
       @external_events = external_events    # { name => [{from:, to:, guard:}, ...] }
       @entry_point = entry_point
       @wait_state_names = wait_state_names
+      @state_store = state_store
       @phase_machine_class = build_phase_machine_class(auto_transitions, exit_actions)
     end
     # Executes the workflow from the initial state.
     # @param input [Hash] initial context field values
-    # @param config [Hash] { thread_id:, recursion_limit:, user_id:, session_id: }
+    # @param config [Hash] { thread_id:, recursion_limit:, user_id:, session_id:, state_store: }
     # @return [Object] final context (includes Phronomy::WorkflowContext)
+    # @api private
     def invoke(input, config: {})
       caller_meta = {}
       caller_meta[:user_id] = config[:user_id] if config[:user_id]
@@ -65,13 +71,23 @@ module Phronomy
       trace("workflow.invoke", input: input.inspect, **caller_meta) do |_span|
         thread_id = config[:thread_id] || SecureRandom.uuid
         recursion_limit = config.fetch(:recursion_limit, Phronomy.configuration.recursion_limit)
-        state = @state_class.new(**input)
+        store = config.fetch(:state_store, @state_store) || Phronomy.configuration.state_store
+        snapshot = (store && config[:thread_id]) ? store.load(thread_id) : nil
+        initial_fields = if snapshot && snapshot[:fields]
+          snapshot[:fields].transform_keys(&:to_sym).merge(input.transform_keys(&:to_sym))
+        else
+          input
+        end
+        state = @state_class.new(**initial_fields)
         state.set_graph_metadata(thread_id: thread_id)
         result = if Phronomy.configuration.event_loop
           run_via_event_loop(state, recursion_limit: recursion_limit)
         else
           run_workflow(state, recursion_limit: recursion_limit)
         end
+        store&.save(thread_id, {fields: result.to_h, phase: result.phase.to_s}) if config[:thread_id]
         [result, nil]
       end
     end
@@ -80,6 +96,7 @@ module Phronomy
     # @param state [Object] halted context
     # @param input [Hash, nil] optional field updates to merge before resuming
     # @return [Object] final context
+    # @api private
     def resume(state:, input: nil)
       send_event(state: state, event: :resume, input: input)
     end
@@ -93,6 +110,7 @@ module Phronomy
     # @param event [Symbol] named event or +:resume+ for generic resumption
     # @param input [Hash, nil] optional field updates to merge before resuming
     # @return [Object] final context
+    # @api private
     def send_event(state:, event:, input: nil)
       state = state.merge(input) if input
       event = event.to_sym
@@ -128,6 +146,7 @@ module Phronomy
     # @param config [Hash]
     # @yield [Hash]
     # @return [Object] final context
+    # @api private
     def stream(input, config: {}, &block)
       thread_id = config[:thread_id] || SecureRandom.uuid
       recursion_limit = config.fetch(:recursion_limit, Phronomy.configuration.recursion_limit)
@@ -180,6 +199,7 @@ module Phronomy
         tracker = new_phase_machine(current_state)
         tracker.context = ctx
         fire_event!(tracker, resume_event, current_state)
+        ctx = tracker.context
         next_phase = tracker.phase.to_sym
         current_state = (next_phase == current_state) ? FINISH : next_phase
       else
@@ -189,7 +209,11 @@ module Phronomy
         tracker.context = ctx
         # state_machines only fires after_transition callbacks on transitions.
         # The entry point has no prior transition, so we invoke its entry actions directly.
-        @entry_actions[current_state]&.each { |c| c.call(ctx) }
+        @entry_actions[current_state]&.each do |c|
+          result = c.call(ctx)
+          ctx = result if result.is_a?(Phronomy::WorkflowContext)
+        end
+        tracker.context = ctx
       end
       # Event queue: decouple action execution from transition firing.
@@ -211,6 +235,7 @@ module Phronomy
           end
           fire_event!(tracker, event, current_state)
+          ctx = tracker.context
           next_phase = tracker.phase.to_sym
           # When next_phase == current_state no transition matched → terminal state.
           current_state = (next_phase == current_state) ? FINISH : next_phase
@@ -316,10 +341,13 @@ module Phronomy
           # Entry callbacks: fire after_transition into each state.
           #    Each callable is registered as a separate callback; state_machines
           #    accumulates them and fires in declaration order.
+          #    If the callable returns a WorkflowContext (e.g. via s.merge(...)),
+          #    the returned context replaces the current one on the tracker.
           entry_acts.each do |state_name, callables|
             callables.each do |callable|
               after_transition to: state_name do |machine|
-                callable.call(machine.context)
+                result = callable.call(machine.context)
+                machine.context = result if result.is_a?(Phronomy::WorkflowContext)
               end
             end
           end

data/lib/phronomy.rb CHANGED Viewed

@@ -23,11 +23,33 @@ module Phronomy
   class ParseError < Error; end
   class RecursionLimitError < Error; end
   class ToolError < Error; end
+  # Raised when an agent invocation exceeds the timeout set via +invoke_timeout+.
+  class TimeoutError < Error; end
   class ConfigurationError < Error; end
   class HandoffError < Error; end
+  # Raised when a network or transport layer call fails (e.g. LLM API unreachable,
+  # MCP server connection refused). Distinguishable from application-level errors
+  # so callers can apply network-specific retry logic.
+  class TransportError < Error; end
+  # Raised when the LLM API returns a rate-limit response (HTTP 429 or equivalent).
+  # Callers should back off and retry after the indicated delay.
+  class RateLimitError < TransportError; end
+  # Raised when the LLM API rejects the request due to an invalid or revoked API key.
+  # Callers should not retry without fixing the credentials.
+  class AuthenticationError < TransportError; end
+  # Raised when the prompt exceeds the model's context window limit.
+  class ContextLengthError < Error; end
+  # Raised when a workflow or agent execution is explicitly cancelled.
+  # Separate from TimeoutError (deadline exceeded) — this is an intentional stop.
+  class CancellationError < Error; end
   # Raised by {Phronomy::GeneratorVerifier#invoke} when +raise_if_untrusted: true+
   # and the pipeline's combined confidence score falls below the configured threshold.
   #
@@ -63,9 +85,56 @@ module Phronomy
       yield configuration
     end
-    # Resets configuration; primarily used in tests.
+    # Resets the global Phronomy configuration to defaults.
+    #
+    # **Intended for test suites only.** Calling this in a production process
+    # will drop all runtime configuration (tracer, model, tokenizer, etc.)
+    # globally and immediately affect all subsequent agent and workflow calls.
+    #
+    # **Parallel test suites warning:** When tests run in parallel (e.g.
+    # `parallel_tests` or `parallel_rspec`), +reset_configuration!+ in one
+    # worker will clear configuration shared with other workers in the same
+    # process. Prefer process-isolation strategies (forked workers) over
+    # thread-based parallelism when using this method.
+    #
+    # Typical usage in a sequential test suite:
+    #   after { Phronomy.reset_configuration! }
     def reset_configuration!
       @configuration = Configuration.new
     end
+    # Yields the current {Configuration} object, then restores the original
+    # configuration on exit (even if the block raises).
+    #
+    # Intended for test helpers that need to temporarily override settings
+    # without permanently mutating the global configuration.
+    #
+    # @yield [config] the current {Configuration} instance (mutable)
+    # @example
+    #   Phronomy.with_configuration do |c|
+    #     c.logger = Logger.new($stdout)
+    #   end
+    # @api public
+    def with_configuration
+      original = @configuration&.dup
+      yield configuration
+    ensure
+      @configuration = original
+    end
+    # Resets all Phronomy runtime state: configuration and the EventLoop
+    # singleton (if running).
+    #
+    # **Intended for test suites only.**  Stops any running EventLoop thread,
+    # clears the EventLoop singleton, and resets configuration to defaults.
+    # Call once before/after each example to ensure test isolation.
+    #
+    # @example
+    #   config.around { |ex| Phronomy.reset_runtime! ; ex.run ; Phronomy.reset_runtime! }
+    # @api public
+    def reset_runtime!
+      Phronomy::EventLoop.reset!
+      @configuration = Configuration.new
+    end
   end
 end

data/scripts/api_snapshot.rb ADDED Viewed

@@ -0,0 +1,91 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# scripts/api_snapshot.rb
+#
+# Dumps the public instance methods of all Stable/Beta public API classes to
+# JSON.  The snapshot is stored in spec/fixtures/api_snapshot.json and is used
+# by spec/phronomy/api_compatibility_spec.rb to detect unintended API removals.
+#
+# Usage:
+#   # Regenerate spec/fixtures/api_snapshot.json (run when intentionally adding
+#   # or removing public API methods after updating the stability table):
+#   ruby scripts/api_snapshot.rb --write
+#
+#   # Print snapshot to stdout (useful for manual inspection):
+#   ruby scripts/api_snapshot.rb
+require "json"
+require "fileutils"
+require_relative "../lib/phronomy"
+# Classes and modules whose public API is tracked.
+# Add an entry whenever a new class/module is promoted to Stable or Beta in README.md.
+PUBLIC_API_ENTRIES = [
+  # Stable
+  Phronomy::Agent::Base,
+  Phronomy::Tool::Base,
+  Phronomy::Workflow,
+  Phronomy::WorkflowContext,
+  Phronomy::Runnable,
+  Phronomy::PromptTemplate,
+  # Beta
+  Phronomy::Agent::ReactAgent,
+  Phronomy::Agent::Orchestrator,
+  Phronomy::Agent::TeamCoordinator,
+  Phronomy::Guardrail::InputGuardrail,
+  Phronomy::Guardrail::OutputGuardrail,
+  Phronomy::VectorStore::Base,
+  Phronomy::VectorStore::InMemory,
+  Phronomy::Embeddings::Base,
+  Phronomy::KnowledgeSource::Base,
+  Phronomy::KnowledgeSource::StaticKnowledge,
+  Phronomy::KnowledgeSource::RAGKnowledge,
+  Phronomy::Tracing::Base,
+  Phronomy::Tracing::NullTracer,
+  Phronomy::Eval::Runner
+].freeze
+# Baseline methods common to all Ruby objects — excluded from the snapshot.
+BASELINE_INSTANCE_METHODS = (
+  Object.public_instance_methods |
+  Kernel.public_instance_methods
+).uniq.freeze
+BASELINE_CLASS_METHODS = (
+  Class.public_methods |
+  Module.public_methods
+).uniq.freeze
+def snapshot_entry(klass)
+  if klass.instance_of?(Module)
+    # Module — capture instance methods defined in this module only
+    own_methods = klass.public_instance_methods(false).sort
+    {
+      "name" => klass.name,
+      "type" => "module",
+      "public_instance_methods" => own_methods
+    }
+  else
+    # Class — capture public instance methods minus universal baseline
+    instance_methods = (klass.public_instance_methods - BASELINE_INSTANCE_METHODS).sort
+    class_methods = (klass.public_methods(false) - BASELINE_CLASS_METHODS).sort
+    {
+      "name" => klass.name,
+      "type" => "class",
+      "public_instance_methods" => instance_methods,
+      "public_class_methods" => class_methods
+    }
+  end
+end
+snapshot = PUBLIC_API_ENTRIES.map { |entry| snapshot_entry(entry) }
+if ARGV.include?("--write")
+  path = File.expand_path("../spec/fixtures/api_snapshot.json", __dir__)
+  FileUtils.mkdir_p(File.dirname(path))
+  File.write(path, JSON.pretty_generate(snapshot) + "\n")
+  puts "Wrote #{path}"
+else
+  puts JSON.pretty_generate(snapshot)
+end

data/scripts/check_api_annotations.rb ADDED Viewed

@@ -0,0 +1,68 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# check_api_annotations.rb
+#
+# Verifies that every YARD-documented public method in lib/ carries either
+# "@api public" or "@api private".
+#
+# A method is considered "YARD-documented" when its preceding comment block
+# contains at least one @param, @return, @raise, @yield, @example, or
+# @overload tag.  Methods with only a plain prose description (no @ tags)
+# are exempt.
+#
+# Usage (run from the phronomy/ repository root):
+#   ruby scripts/check_api_annotations.rb
+#
+# Exit codes:
+#   0 — all documented methods carry @api annotations
+#   1 — one or more documented methods are missing @api annotations
+lib_dir = File.expand_path("../lib", __dir__)
+unless File.directory?(lib_dir)
+  warn "ERROR: lib directory not found at #{lib_dir}"
+  exit 1
+end
+errors = []
+Dir.glob(File.join(lib_dir, "**", "*.rb")).sort.each do |file|
+  lines = File.readlines(file)
+  lines.each_with_index do |line, i|
+    next unless line.match?(/^\s*def\s+\w/)
+    # Collect the contiguous comment block immediately above this def.
+    comment_lines = []
+    j = i - 1
+    while j >= 0 && lines[j].match?(/^\s*#/)
+      comment_lines.unshift(lines[j])
+      j -= 1
+    end
+    next if comment_lines.empty?
+    comment = comment_lines.join
+    # Only lint methods that carry at least one YARD type tag.
+    next unless comment.match?(/#[ \t]+@(param|return|raise|yield|example|overload)/)
+    # Pass if an @api tag is already present.
+    next if comment.match?(/#[ \t]+@api[ \t]+(public|private)/)
+    rel_path = file.sub("#{lib_dir}/../", "")
+    m = line.match(/def\s+(\w+[!?=]?)/)
+    method_name = m ? m[1] : "unknown"
+    errors << "#{rel_path}:#{i + 1}  def #{method_name}  (missing @api public or @api private)"
+  end
+end
+if errors.empty?
+  puts "OK: all YARD-documented methods carry @api annotations"
+  exit 0
+else
+  puts "FAIL: #{errors.size} method(s) missing @api annotation:"
+  errors.each { |e| puts "  #{e}" }
+  exit 1
+end

data/scripts/check_private_enforcement.rb ADDED Viewed

@@ -0,0 +1,93 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# check_private_enforcement.rb
+#
+# Verifies that every instance method annotated @api private in lib/ is
+# actually non-public at the Ruby level (i.e., NOT in Module#public_instance_methods).
+#
+# Class methods (def self.xxx) are excluded from this check because their
+# visibility is managed separately on the singleton class and rarely causes
+# accidental public exposure to consumers.
+#
+# Usage (run from the phronomy/ repository root):
+#   bundle exec ruby scripts/check_private_enforcement.rb
+#
+# Exit codes:
+#   0 — all @api private instance methods are non-public (or have no Ruby def)
+#   1 — one or more @api private instance methods are exposed as public
+require "bundler/setup"
+require_relative "../lib/phronomy"
+lib_dir = File.expand_path("../lib", __dir__)
+unless File.directory?(lib_dir)
+  warn "ERROR: lib directory not found at #{lib_dir}"
+  exit 1
+end
+# Step 1: Collect instance methods annotated @api private via static analysis.
+api_private_entries = []
+Dir.glob(File.join(lib_dir, "**", "*.rb")).sort.each do |file|
+  lines = File.readlines(file)
+  lines.each_with_index do |line, i|
+    next unless line.match?(/^\s*#\s*@api\s+private\s*$/)
+    # Advance past any further comment or blank lines to reach the def.
+    j = i + 1
+    j += 1 while j < lines.size && lines[j].match?(/^\s*(#|$)/)
+    next unless j < lines.size
+    # Skip class-level methods — they live on the singleton class, not as
+    # public instance methods accessible to consumers.
+    next if lines[j].match?(/def\s+self\./)
+    # Match both plain def and "private def".
+    m = lines[j].match(/^\s*(?:private\s+)?def\s+(\w+[!?=]?)/)
+    next unless m
+    rel_path = file.sub("#{lib_dir}/../", "")
+    api_private_entries << {name: m[1].to_sym, file: rel_path, line: j + 1}
+  end
+end
+if api_private_entries.empty?
+  puts "No @api private instance methods found."
+  exit 0
+end
+# Step 2: Build a map of publicly exposed instance methods across all
+# Phronomy-namespaced modules/classes (own methods only, no inheritance).
+all_phronomy_modules = ObjectSpace.each_object(Module).select do |mod|
+  mod.name&.start_with?("Phronomy")
+end
+public_exposure_map = {}
+all_phronomy_modules.each do |mod|
+  mod.public_instance_methods(false).each do |meth|
+    (public_exposure_map[meth] ||= []) << mod.name
+  end
+end
+# Step 3: Report violations — @api private methods that are still public.
+errors = []
+api_private_entries.each do |entry|
+  exposing_modules = public_exposure_map[entry[:name]]
+  next unless exposing_modules
+  errors << "#{entry[:file]}:#{entry[:line]}  def #{entry[:name]}" \
+            "  (annotated @api private but public in: #{exposing_modules.join(", ")})"
+end
+if errors.empty?
+  puts "OK: all #{api_private_entries.size} @api private instance methods are non-public."
+  exit 0
+else
+  warn "ERROR: #{errors.size} @api private instance method(s) are exposed as public:"
+  errors.each { |e| warn "  #{e}" }
+  exit 1
+end

data/scripts/check_readme_runnable.rb ADDED Viewed

@@ -0,0 +1,98 @@
+# frozen_string_literal: true
+# scripts/check_readme_runnable.rb
+#
+# Extracts ```ruby runnable blocks from README.md and executes each in an
+# isolated subprocess with a fake LLM stub to catch API drift.
+#
+# Any block that raises NoMethodError / ArgumentError / NameError causes a
+# non-zero exit, failing the CI step.
+#
+# Usage (from the phronomy/ root):
+#   bundle exec ruby scripts/check_readme_runnable.rb
+require "tempfile"
+require "open3"
+REPO_ROOT = File.expand_path("..", __dir__)
+README_PATH = File.join(REPO_ROOT, "README.md")
+# Injected before every runnable block.
+# Uses the Gemfile of this project so subprocesses can load phronomy.
+PREAMBLE = <<~RUBY
+  # frozen_string_literal: true
+  # --- CI preamble: stub LLM calls so no real network requests are made ---
+  ENV["BUNDLE_GEMFILE"] ||= "#{File.join(REPO_ROOT, "Gemfile")}"
+  require "bundler/setup"
+  require "phronomy"
+  # Patch invoke methods to return canned responses instead of calling the LLM.
+  module Phronomy
+    module Agent
+      class Base
+        def invoke(input = nil, **)
+          {output: "ci-stub-output", messages: []}
+        end
+      end
+      class Runner
+        def invoke(input = nil, **)
+          {output: "ci-stub-output", agent: nil, messages: []}
+        end
+      end
+    end
+    module Chain
+      class LLMChain
+        def invoke(vars = {})
+          "ci-stub-chain"
+        end
+      end
+    end
+  end
+  # --- end CI preamble ---
+RUBY
+readme = File.read(README_PATH)
+# Match opening fence with 'runnable' annotation: ```ruby runnable
+blocks = readme.scan(/^```ruby runnable\n(.*?)^```/m).map.with_index(1) { |(code), i| [i, code] }
+if blocks.empty?
+  puts "No 'ruby runnable' blocks found in README.md."
+  exit 0
+end
+puts "Checking #{blocks.size} runnable Ruby block(s) in README.md..."
+failures = []
+blocks.each do |index, code|
+  Tempfile.create(["readme_runnable_#{index}", ".rb"]) do |f|
+    f.write(PREAMBLE)
+    f.write(code)
+    f.flush
+    out, err, status = Open3.capture3(RbConfig.ruby, f.path)
+    combined = (out + err).gsub(f.path, "block ##{index}")
+    if status.success?
+      puts "  OK   block ##{index}"
+    else
+      failures << index
+      puts "  FAIL block ##{index}"
+      # Print at most 15 lines of output to keep CI logs readable.
+      puts combined.lines.first(15).join
+    end
+  end
+end
+puts
+if failures.empty?
+  puts "All #{blocks.size} runnable block(s) passed."
+  exit 0
+else
+  puts "#{failures.size} block(s) failed: #{failures.join(", ")}"
+  exit 1
+end

data/scripts/run_mutation.sh ADDED Viewed

@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# scripts/run_mutation.sh — Run mutation tests on core Phronomy domain classes.
+#
+# Usage:
+#   bash scripts/run_mutation.sh [SUBJECT_PATTERN]
+#
+# SUBJECT_PATTERN (optional): restrict to a specific subject, e.g. "Phronomy::WorkflowContext"
+# When omitted, all subjects listed in .mutant.yml are tested.
+#
+# Requires mutant-rspec (in Gemfile development group):
+#   gem "mutant-rspec", "~> 0.15.1"
+#
+# Target: mutation score >= 80% for each listed subject.
+# Baseline scores (as of initial run):
+#   Phronomy::WorkflowContext  84.85%
+#   Phronomy::Tool::Base       55.74%
+#
+# Note: mutation testing is slow (~1-5 min per subject). Run locally or via
+# the nightly-mutation GitHub Actions workflow.
+set -euo pipefail
+cd "$(dirname "$0")/.."
+if ! bundle exec mutant --version &>/dev/null; then
+  echo "ERROR: mutant is not available. Run: bundle install"
+  exit 1
+fi
+SUBJECT="${1:-}"
+echo "=== Phronomy Mutation Test ==="
+echo "Date: $(date -u +"%Y-%m-%dT%H:%M:%SZ")"
+echo "Ruby: $(ruby --version)"
+echo "Mutant: $(bundle exec mutant --version 2>&1 | grep -v warning | head -1)"
+echo ""
+if [[ -n "$SUBJECT" ]]; then
+  echo "Subject: $SUBJECT"
+  echo ""
+  bundle exec mutant run -- "$SUBJECT"
+else
+  echo "Subjects: all (see .mutant.yml)"
+  echo ""
+  bundle exec mutant run
+fi