riffer 0.31.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. checksums.yaml +4 -4
  2. data/.agents/code-style.md +63 -4
  3. data/.agents/rbs-inline.md +1 -6
  4. data/.release-please-manifest.json +1 -1
  5. data/AGENTS.md +1 -2
  6. data/CHANGELOG.md +18 -0
  7. data/docs/08_MESSAGES.md +1 -1
  8. data/docs/14_MCP.md +50 -5
  9. data/docs/providers/02_AMAZON_BEDROCK.md +14 -0
  10. data/lib/riffer/agent/config.rb +42 -47
  11. data/lib/riffer/agent/context.rb +70 -50
  12. data/lib/riffer/agent/response.rb +4 -20
  13. data/lib/riffer/agent/run.rb +28 -67
  14. data/lib/riffer/agent/serializer.rb +22 -81
  15. data/lib/riffer/agent/session/repair.rb +14 -40
  16. data/lib/riffer/agent/session.rb +25 -67
  17. data/lib/riffer/agent/structured_output/result.rb +3 -11
  18. data/lib/riffer/agent/structured_output.rb +5 -13
  19. data/lib/riffer/agent.rb +74 -192
  20. data/lib/riffer/config.rb +34 -101
  21. data/lib/riffer/evals/evaluator.rb +7 -27
  22. data/lib/riffer/evals/evaluator_runner.rb +11 -19
  23. data/lib/riffer/evals/judge.rb +4 -25
  24. data/lib/riffer/evals/result.rb +1 -18
  25. data/lib/riffer/evals/run_result.rb +0 -11
  26. data/lib/riffer/evals/scenario_result.rb +0 -14
  27. data/lib/riffer/evals.rb +0 -6
  28. data/lib/riffer/guardrail.rb +4 -27
  29. data/lib/riffer/guardrails/modification.rb +0 -10
  30. data/lib/riffer/guardrails/result.rb +3 -30
  31. data/lib/riffer/guardrails/runner.rb +5 -22
  32. data/lib/riffer/guardrails/tripwire.rb +1 -19
  33. data/lib/riffer/guardrails.rb +2 -4
  34. data/lib/riffer/helpers/call_or_value.rb +4 -3
  35. data/lib/riffer/helpers/class_name_converter.rb +3 -1
  36. data/lib/riffer/helpers/dependencies.rb +5 -7
  37. data/lib/riffer/helpers.rb +0 -5
  38. data/lib/riffer/mcp/authenticated_tool.rb +9 -9
  39. data/lib/riffer/mcp/client.rb +12 -17
  40. data/lib/riffer/mcp/manifest.rb +13 -10
  41. data/lib/riffer/mcp/registration.rb +2 -11
  42. data/lib/riffer/mcp/registry.rb +44 -52
  43. data/lib/riffer/mcp/search_tool.rb +53 -0
  44. data/lib/riffer/mcp/tool_factory.rb +13 -18
  45. data/lib/riffer/mcp.rb +12 -17
  46. data/lib/riffer/messages/assistant.rb +2 -9
  47. data/lib/riffer/messages/base.rb +46 -16
  48. data/lib/riffer/messages/file_part.rb +32 -24
  49. data/lib/riffer/messages/system.rb +0 -5
  50. data/lib/riffer/messages/tool.rb +0 -10
  51. data/lib/riffer/messages/user.rb +0 -10
  52. data/lib/riffer/messages.rb +0 -7
  53. data/lib/riffer/params/boolean.rb +2 -4
  54. data/lib/riffer/params/param.rb +28 -39
  55. data/lib/riffer/params.rb +9 -21
  56. data/lib/riffer/providers/amazon_bedrock.rb +42 -28
  57. data/lib/riffer/providers/anthropic.rb +4 -9
  58. data/lib/riffer/providers/azure_open_ai.rb +3 -19
  59. data/lib/riffer/providers/base.rb +13 -26
  60. data/lib/riffer/providers/gemini.rb +4 -4
  61. data/lib/riffer/providers/mock.rb +6 -26
  62. data/lib/riffer/providers/open_ai.rb +6 -8
  63. data/lib/riffer/providers/open_router.rb +4 -10
  64. data/lib/riffer/providers/repository.rb +4 -3
  65. data/lib/riffer/providers/token_usage.rb +9 -20
  66. data/lib/riffer/providers.rb +0 -8
  67. data/lib/riffer/runner/fibers.rb +10 -16
  68. data/lib/riffer/runner/sequential.rb +1 -4
  69. data/lib/riffer/runner/threaded.rb +3 -14
  70. data/lib/riffer/runner.rb +2 -15
  71. data/lib/riffer/skills/activate_tool.rb +2 -11
  72. data/lib/riffer/skills/adapter.rb +4 -22
  73. data/lib/riffer/skills/backend.rb +7 -21
  74. data/lib/riffer/skills/config.rb +10 -31
  75. data/lib/riffer/skills/context.rb +5 -20
  76. data/lib/riffer/skills/filesystem_backend.rb +7 -25
  77. data/lib/riffer/skills/frontmatter.rb +10 -28
  78. data/lib/riffer/skills/markdown_adapter.rb +2 -9
  79. data/lib/riffer/skills/xml_adapter.rb +2 -8
  80. data/lib/riffer/stream_events/base.rb +1 -6
  81. data/lib/riffer/stream_events/guardrail_modification.rb +1 -8
  82. data/lib/riffer/stream_events/guardrail_tripwire.rb +1 -8
  83. data/lib/riffer/stream_events/interrupt.rb +4 -7
  84. data/lib/riffer/stream_events/reasoning_delta.rb +2 -4
  85. data/lib/riffer/stream_events/reasoning_done.rb +2 -4
  86. data/lib/riffer/stream_events/skill_activation.rb +2 -4
  87. data/lib/riffer/stream_events/text_delta.rb +0 -2
  88. data/lib/riffer/stream_events/text_done.rb +1 -3
  89. data/lib/riffer/stream_events/token_usage_done.rb +1 -8
  90. data/lib/riffer/stream_events/tool_call_delta.rb +2 -3
  91. data/lib/riffer/stream_events/tool_call_done.rb +1 -3
  92. data/lib/riffer/stream_events/web_search_done.rb +1 -3
  93. data/lib/riffer/stream_events/web_search_status.rb +2 -3
  94. data/lib/riffer/stream_events.rb +0 -10
  95. data/lib/riffer/tool.rb +6 -13
  96. data/lib/riffer/tools/response.rb +8 -4
  97. data/lib/riffer/tools/runtime/fibers.rb +0 -3
  98. data/lib/riffer/tools/runtime/inline.rb +1 -4
  99. data/lib/riffer/tools/runtime/threaded.rb +0 -2
  100. data/lib/riffer/tools/runtime.rb +5 -38
  101. data/lib/riffer/tools/toolable.rb +5 -16
  102. data/lib/riffer/tools.rb +0 -4
  103. data/lib/riffer/version.rb +1 -1
  104. data/lib/riffer.rb +7 -8
  105. data/sig/generated/riffer/agent/config.rbs +29 -46
  106. data/sig/generated/riffer/agent/context.rbs +40 -48
  107. data/sig/generated/riffer/agent/response.rbs +4 -20
  108. data/sig/generated/riffer/agent/run.rbs +12 -61
  109. data/sig/generated/riffer/agent/serializer.rbs +21 -80
  110. data/sig/generated/riffer/agent/session/repair.rbs +12 -40
  111. data/sig/generated/riffer/agent/session.rbs +25 -67
  112. data/sig/generated/riffer/agent/structured_output/result.rbs +2 -10
  113. data/sig/generated/riffer/agent/structured_output.rbs +5 -12
  114. data/sig/generated/riffer/agent.rbs +57 -186
  115. data/sig/generated/riffer/config.rbs +34 -100
  116. data/sig/generated/riffer/evals/evaluator.rbs +7 -27
  117. data/sig/generated/riffer/evals/evaluator_runner.rbs +9 -19
  118. data/sig/generated/riffer/evals/judge.rbs +4 -24
  119. data/sig/generated/riffer/evals/result.rbs +1 -17
  120. data/sig/generated/riffer/evals/run_result.rbs +0 -10
  121. data/sig/generated/riffer/evals/scenario_result.rbs +0 -13
  122. data/sig/generated/riffer/evals.rbs +0 -6
  123. data/sig/generated/riffer/guardrail.rbs +4 -27
  124. data/sig/generated/riffer/guardrails/modification.rbs +0 -10
  125. data/sig/generated/riffer/guardrails/result.rbs +3 -30
  126. data/sig/generated/riffer/guardrails/runner.rbs +5 -22
  127. data/sig/generated/riffer/guardrails/tripwire.rbs +1 -19
  128. data/sig/generated/riffer/guardrails.rbs +2 -4
  129. data/sig/generated/riffer/helpers/call_or_value.rbs +4 -3
  130. data/sig/generated/riffer/helpers/class_name_converter.rbs +1 -1
  131. data/sig/generated/riffer/helpers/dependencies.rbs +3 -7
  132. data/sig/generated/riffer/helpers.rbs +0 -5
  133. data/sig/generated/riffer/mcp/authenticated_tool.rbs +5 -4
  134. data/sig/generated/riffer/mcp/client.rbs +10 -16
  135. data/sig/generated/riffer/mcp/manifest.rbs +9 -9
  136. data/sig/generated/riffer/mcp/registration.rbs +2 -10
  137. data/sig/generated/riffer/mcp/registry.rbs +11 -18
  138. data/sig/generated/riffer/mcp/search_tool.rbs +26 -0
  139. data/sig/generated/riffer/mcp/tool_factory.rbs +10 -15
  140. data/sig/generated/riffer/mcp.rbs +10 -17
  141. data/sig/generated/riffer/messages/assistant.rbs +2 -8
  142. data/sig/generated/riffer/messages/base.rbs +11 -16
  143. data/sig/generated/riffer/messages/file_part.rbs +13 -23
  144. data/sig/generated/riffer/messages/system.rbs +0 -4
  145. data/sig/generated/riffer/messages/tool.rbs +0 -9
  146. data/sig/generated/riffer/messages/user.rbs +0 -9
  147. data/sig/generated/riffer/messages.rbs +0 -7
  148. data/sig/generated/riffer/params/boolean.rbs +2 -4
  149. data/sig/generated/riffer/params/param.rbs +21 -39
  150. data/sig/generated/riffer/params.rbs +9 -21
  151. data/sig/generated/riffer/providers/amazon_bedrock.rbs +21 -25
  152. data/sig/generated/riffer/providers/anthropic.rbs +2 -7
  153. data/sig/generated/riffer/providers/azure_open_ai.rbs +3 -18
  154. data/sig/generated/riffer/providers/base.rbs +9 -25
  155. data/sig/generated/riffer/providers/gemini.rbs +0 -2
  156. data/sig/generated/riffer/providers/mock.rbs +6 -26
  157. data/sig/generated/riffer/providers/open_ai.rbs +1 -5
  158. data/sig/generated/riffer/providers/open_router.rbs +4 -10
  159. data/sig/generated/riffer/providers/repository.rbs +2 -3
  160. data/sig/generated/riffer/providers/token_usage.rbs +6 -16
  161. data/sig/generated/riffer/providers.rbs +0 -8
  162. data/sig/generated/riffer/runner/fibers.rbs +8 -15
  163. data/sig/generated/riffer/runner/sequential.rbs +1 -3
  164. data/sig/generated/riffer/runner/threaded.rbs +3 -13
  165. data/sig/generated/riffer/runner.rbs +2 -14
  166. data/sig/generated/riffer/skills/activate_tool.rbs +2 -11
  167. data/sig/generated/riffer/skills/adapter.rbs +4 -22
  168. data/sig/generated/riffer/skills/backend.rbs +7 -21
  169. data/sig/generated/riffer/skills/config.rbs +10 -31
  170. data/sig/generated/riffer/skills/context.rbs +5 -20
  171. data/sig/generated/riffer/skills/filesystem_backend.rbs +7 -24
  172. data/sig/generated/riffer/skills/frontmatter.rbs +10 -27
  173. data/sig/generated/riffer/skills/markdown_adapter.rbs +2 -9
  174. data/sig/generated/riffer/skills/xml_adapter.rbs +2 -8
  175. data/sig/generated/riffer/stream_events/base.rbs +1 -6
  176. data/sig/generated/riffer/stream_events/guardrail_modification.rbs +1 -8
  177. data/sig/generated/riffer/stream_events/guardrail_tripwire.rbs +1 -8
  178. data/sig/generated/riffer/stream_events/interrupt.rbs +4 -7
  179. data/sig/generated/riffer/stream_events/reasoning_delta.rbs +2 -4
  180. data/sig/generated/riffer/stream_events/reasoning_done.rbs +2 -4
  181. data/sig/generated/riffer/stream_events/skill_activation.rbs +2 -4
  182. data/sig/generated/riffer/stream_events/text_delta.rbs +0 -2
  183. data/sig/generated/riffer/stream_events/text_done.rbs +1 -3
  184. data/sig/generated/riffer/stream_events/token_usage_done.rbs +1 -7
  185. data/sig/generated/riffer/stream_events/tool_call_delta.rbs +2 -3
  186. data/sig/generated/riffer/stream_events/tool_call_done.rbs +1 -3
  187. data/sig/generated/riffer/stream_events/web_search_done.rbs +1 -3
  188. data/sig/generated/riffer/stream_events/web_search_status.rbs +2 -3
  189. data/sig/generated/riffer/stream_events.rbs +0 -10
  190. data/sig/generated/riffer/tool.rbs +5 -12
  191. data/sig/generated/riffer/tools/response.rbs +6 -4
  192. data/sig/generated/riffer/tools/runtime/fibers.rbs +0 -3
  193. data/sig/generated/riffer/tools/runtime/inline.rbs +1 -3
  194. data/sig/generated/riffer/tools/runtime/threaded.rbs +0 -2
  195. data/sig/generated/riffer/tools/runtime.rbs +5 -37
  196. data/sig/generated/riffer/tools/toolable.rbs +4 -14
  197. data/sig/generated/riffer/tools.rbs +0 -4
  198. data/sig/generated/riffer.rbs +5 -4
  199. data/sig/manual/riffer/agent/session/repair.rbs +5 -0
  200. data/sig/manual/riffer/evals/evaluator_runner.rbs +5 -0
  201. data/sig/manual/riffer/helpers/class_name_converter.rbs +5 -0
  202. data/sig/manual/riffer/helpers/dependencies.rbs +5 -0
  203. data/sig/manual/riffer/mcp/authenticated_tool.rbs +5 -0
  204. data/sig/manual/riffer/mcp/registry.rbs +5 -0
  205. data/sig/manual/riffer/mcp/tool_factory.rbs +5 -0
  206. data/sig/manual/riffer/mcp.rbs +5 -0
  207. data/sig/manual/riffer/providers/repository.rbs +5 -0
  208. data/sig/manual/riffer.rbs +5 -0
  209. metadata +17 -9
  210. data/.agents/rdoc.md +0 -69
  211. data/lib/riffer/messages/converter.rb +0 -90
  212. data/sig/generated/riffer/messages/converter.rbs +0 -33
  213. data/sig/manual/riffer/tools/toolable.rbs +0 -6
data/lib/riffer/config.rb CHANGED
@@ -2,20 +2,6 @@
2
2
  # rbs_inline: enabled
3
3
 
4
4
  # Configuration for the Riffer framework.
5
- #
6
- # Provides configuration options for AI providers and other settings.
7
- #
8
- # Riffer.config.openai.api_key = "sk-..."
9
- #
10
- # Riffer.config.amazon_bedrock.region = "us-east-1"
11
- # Riffer.config.amazon_bedrock.api_token = "..."
12
- #
13
- # Riffer.config.anthropic.api_key = "sk-ant-..."
14
- #
15
- # Riffer.config.openrouter.api_key = "sk-or-..."
16
- #
17
- # Riffer.config.evals.judge_model = "anthropic/claude-sonnet-4-20250514"
18
- #
19
5
  class Riffer::Config
20
6
  AmazonBedrock = Struct.new(:api_token, :region, keyword_init: true)
21
7
  Anthropic = Struct.new(:api_key, keyword_init: true)
@@ -27,22 +13,13 @@ class Riffer::Config
27
13
  Mcp = Struct.new(:credentials, :discovery_runner, keyword_init: true)
28
14
 
29
15
  # Skills-related global configuration.
30
- #
31
- # See <tt>Riffer.config.skills.default_activate_tool</tt> and
32
- # <tt>Riffer.config.skills.default_backend</tt>.
33
16
  class Skills
34
- # Default skill activation tool class.
35
- #
36
- # The tool class the LLM calls to activate a skill. Defaults to
37
- # <tt>Riffer::Skills::ActivateTool</tt>. Per-agent override is available
38
- # via <tt>skills do; activate_tool ...; end</tt>.
17
+ # The tool class the LLM calls to activate a skill; defaults to
18
+ # <tt>Riffer::Skills::ActivateTool</tt>.
39
19
  attr_reader :default_activate_tool #: singleton(Riffer::Tool)
40
20
 
41
- # Default skills backend.
42
- #
43
- # Used by agents that declare a +skills+ block without specifying a
44
- # backend. Accepts a Riffer::Skills::Backend instance or a Proc.
45
- # Defaults to +nil+ (no global default).
21
+ # Default skills backend for agents that declare a +skills+ block without
22
+ # one; defaults to +nil+.
46
23
  attr_reader :default_backend #: (Riffer::Skills::Backend | Proc)?
47
24
 
48
25
  #--
@@ -52,10 +29,8 @@ class Riffer::Config
52
29
  @default_backend = nil
53
30
  end
54
31
 
55
- # Sets the default skill activation tool class.
56
- #
57
- # Raises +Riffer::ArgumentError+ if the value is not a Riffer::Tool subclass.
58
- #
32
+ # Sets the default skill activation tool class. Raises Riffer::ArgumentError
33
+ # on an invalid value.
59
34
  #--
60
35
  #: (singleton(Riffer::Tool)) -> void
61
36
  def default_activate_tool=(value)
@@ -63,11 +38,8 @@ class Riffer::Config
63
38
  @default_activate_tool = value
64
39
  end
65
40
 
66
- # Sets the default skills backend.
67
- #
68
- # Raises +Riffer::ArgumentError+ if the value is not a
69
- # Riffer::Skills::Backend instance, a Proc, or +nil+.
70
- #
41
+ # Sets the default skills backend. Raises Riffer::ArgumentError on an
42
+ # invalid value.
71
43
  #--
72
44
  #: ((Riffer::Skills::Backend | Proc)?) -> void
73
45
  def default_backend=(value)
@@ -79,49 +51,38 @@ class Riffer::Config
79
51
 
80
52
  VALID_MESSAGE_ID_STRATEGIES = %i[none uuid uuidv7].freeze
81
53
 
82
- # Amazon Bedrock configuration (Struct with +api_token+ and +region+).
54
+ # Amazon Bedrock configuration.
83
55
  attr_reader :amazon_bedrock #: Riffer::Config::AmazonBedrock
84
56
 
85
- # Anthropic configuration (Struct with +api_key+).
57
+ # Anthropic configuration.
86
58
  attr_reader :anthropic #: Riffer::Config::Anthropic
87
59
 
88
- # Azure OpenAI configuration (Struct with +api_key+ and +endpoint+).
60
+ # Azure OpenAI configuration.
89
61
  attr_reader :azure_openai #: Riffer::Config::AzureOpenAI
90
62
 
91
- # Google Gemini configuration (Struct with +api_key+, +open_timeout+, and +read_timeout+).
63
+ # Google Gemini configuration.
92
64
  attr_reader :gemini #: Riffer::Config::Gemini
93
65
 
94
- # OpenAI configuration (Struct with +api_key+).
66
+ # OpenAI configuration.
95
67
  attr_reader :openai #: Riffer::Config::OpenAI
96
68
 
97
- # OpenRouter configuration (Struct with +api_key+).
69
+ # OpenRouter configuration.
98
70
  attr_reader :openrouter #: Riffer::Config::OpenRouter
99
71
 
100
- # Evals configuration (Struct with +judge_model+).
72
+ # Evals configuration.
101
73
  attr_reader :evals #: Riffer::Config::Evals
102
74
 
103
- # MCP configuration (Struct with +credentials+ and +discovery_runner+).
104
- #
105
- # +credentials+ is an optional Proc for per-run MCP +tools/call+ HTTP headers.
106
- # Signature: +->(manifest:, matched_tags:, context:) { Hash or nil }+.
107
- # +nil+ from the proc at tool-resolution time omits that server's tools; +nil+
108
- # at tool-call time raises Riffer::Mcp::CredentialsDeniedError.
109
- #
110
- # +discovery_runner+ is the Riffer::Runner used to execute tool discovery
111
- # (default +Runner::Sequential+).
75
+ # MCP configuration. +credentials+ is an optional Proc returning per-run
76
+ # +tools/call+ headers (or +nil+ to deny); +discovery_runner+ runs tool
77
+ # discovery.
112
78
  attr_reader :mcp #: Riffer::Config::Mcp
113
79
 
114
- # Global tool runtime configuration (experimental).
115
- #
116
- # Accepts a Riffer::Tools::Runtime subclass, a Riffer::Tools::Runtime instance,
117
- # or a Proc. Defaults to <tt>Riffer::Tools::Runtime::Inline.new</tt>.
80
+ # Global tool runtime configuration (experimental); defaults to
81
+ # <tt>Riffer::Tools::Runtime::Inline.new</tt>.
118
82
  attr_reader :tool_runtime #: (singleton(Riffer::Tools::Runtime) | Riffer::Tools::Runtime | Proc)
119
83
 
120
- # Sets the global tool runtime.
121
- #
122
- # Raises +Riffer::ArgumentError+ if the value is not a valid runtime
123
- # (ToolRuntime subclass, ToolRuntime instance, or Proc).
124
- #
84
+ # Sets the global tool runtime. Raises Riffer::ArgumentError on an invalid
85
+ # value.
125
86
  #--
126
87
  #: ((singleton(Riffer::Tools::Runtime) | Riffer::Tools::Runtime | Proc)) -> void
127
88
  def tool_runtime=(value)
@@ -130,23 +91,16 @@ class Riffer::Config
130
91
  @tool_runtime = value
131
92
  end
132
93
 
133
- # Skills-related global configuration. Returns a Riffer::Config::Skills
134
- # object — see <tt>Riffer.config.skills.default_activate_tool</tt>.
94
+ # Skills-related global configuration.
135
95
  attr_reader :skills #: Riffer::Config::Skills
136
96
 
137
- # Strategy for auto-generating message ids. One of +:none+ (default, no id),
138
- # +:uuid+ (UUIDv4), or +:uuidv7+ (time-ordered UUIDv7).
139
- #
140
- # When set to anything other than +:none+, each +Riffer::Messages::Base+
141
- # instance gets an +id+ populated at construction time, and seeded messages
142
- # passed to +Riffer::Agent#generate+ must carry their own +:id+.
97
+ # Strategy for auto-generating message ids: +:none+ (default), +:uuid+, or
98
+ # +:uuidv7+. When not +:none+, messages get an +id+ at construction, and
99
+ # seeded messages passed to +Riffer::Agent#generate+ must carry their own.
143
100
  attr_reader :message_id_strategy #: Symbol
144
101
 
145
- # Sets the message id strategy.
146
- #
147
- # Raises +Riffer::ArgumentError+ if the value is not one of
148
- # +:none+, +:uuid+, or +:uuidv7+.
149
- #
102
+ # Sets the message id strategy. Raises Riffer::ArgumentError unless the value
103
+ # is +:none+, +:uuid+, or +:uuidv7+.
150
104
  #--
151
105
  #: (Symbol) -> void
152
106
  def message_id_strategy=(value)
@@ -157,35 +111,14 @@ class Riffer::Config
157
111
  @message_id_strategy = value
158
112
  end
159
113
 
160
- # Experimental: when +true+, riffer keeps the +tool_use+ ↔ +tool_result+
161
- # invariant intact on its own.
162
- #
163
- # - On +Riffer::Agent#generate(messages_array)+, orphaned +tool_use+
164
- # exchanges and parentless +Riffer::Messages::Tool+ messages are
165
- # silently stripped from the seed. Pending tool calls on the resume
166
- # boundary (last assistant whose tail is purely Tool results) are
167
- # preserved for +execute_pending_tool_calls+.
168
- # - On any interrupt (caller-issued +interrupt!+ or
169
- # +INTERRUPT_MAX_STEPS+), riffer fills any orphaned +tool_use+ with a
170
- # placeholder +Riffer::Messages::Tool+ carrying
171
- # +error_type: :interrupted+, leaving history valid for the next turn.
172
- # Filled call_ids are exposed on
173
- # +Riffer::Agent::Response#healed_tool_call_ids+ (and the streaming
174
- # +Riffer::StreamEvents::Interrupt+ event).
175
- #
176
- # Defaults to +false+ — the pre-healing behavior. Experimental: the
177
- # surface and default may change without notice.
114
+ # Experimental: when +true+, riffer maintains the +tool_use+ ↔ +tool_result+
115
+ # invariant itself stripping orphaned exchanges and filling interrupted
116
+ # ones. Defaults to +false+; the surface may change without notice.
178
117
  attr_reader :experimental_history_healing #: bool
179
118
 
180
- # Sets the +experimental_history_healing+ flag.
181
- #
182
- # Coerces common boolean representations so values pulled from
183
- # environment variables don't silently enable healing — the string
184
- # +"false"+ is truthy in Ruby and would otherwise flip the flag on.
185
- # Accepts +true+/+false+, +"true"+/+"false"+, +1+/+0+, +"1"+/+"0"+, and
186
- # +nil+ (treated as +false+, the default). Raises
187
- # +Riffer::ArgumentError+ for any other value.
188
- #
119
+ # Sets the +experimental_history_healing+ flag, coercing boolean-ish values so
120
+ # an env-var +"false"+ (truthy in Ruby) doesn't silently enable healing.
121
+ # Raises Riffer::ArgumentError on an unrecognized value.
189
122
  #--
190
123
  #: (untyped) -> void
191
124
  def experimental_history_healing=(value)
@@ -1,13 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
  # rbs_inline: enabled
3
3
 
4
- # Base class for all evaluators in the Riffer framework.
5
- #
6
- # Provides a DSL for defining evaluator metadata and the evaluate method.
7
- # Simple evaluators only need to set +instructions+ — the base class
8
- # handles calling the judge automatically.
9
- #
10
- # See examples/evaluators/ for reference implementations.
4
+ # Base class for all evaluators. Set +instructions+ and the base class calls
5
+ # the judge automatically; override +#evaluate+ for custom logic. See
6
+ # examples/evaluators/ for reference implementations.
11
7
  #
12
8
  # class MyEvaluator < Riffer::Evals::Evaluator
13
9
  # instructions "Assess medical accuracy of the response..."
@@ -54,18 +50,9 @@ class Riffer::Evals::Evaluator
54
50
  end
55
51
  end
56
52
 
57
- # Evaluates an input/output pair.
58
- #
59
- # The default implementation calls the judge with the class-level +instructions+.
60
- # Override this method for custom evaluation logic (e.g. rule-based evaluators).
61
- #
62
- # [input] the input to evaluate; String or Array of message hashes/Message objects.
63
- # [output] the agent's response to evaluate.
64
- # [ground_truth] optional reference answer for comparison.
65
- # [messages] the full message history from the agent conversation.
66
- #
67
- # Raises NotImplementedError if neither +instructions+ is set nor +evaluate+ is overridden.
68
- #
53
+ # Evaluates an input/output pair. The default calls the judge with the
54
+ # class-level +instructions+; override for custom logic (e.g. rule-based
55
+ # evaluators).
69
56
  #--
70
57
  #: (input: String | Array[Hash[Symbol, untyped] | Riffer::Messages::Base], output: String, ?ground_truth: String?, ?messages: Array[Riffer::Messages::Base]) -> Riffer::Evals::Result
71
58
  def evaluate(input:, output:, ground_truth: nil, messages: [])
@@ -84,12 +71,6 @@ class Riffer::Evals::Evaluator
84
71
 
85
72
  private
86
73
 
87
- # Formats the input for the judge.
88
- #
89
- # String inputs are passed through as-is.
90
- # Array inputs (message hashes or Message objects) are formatted
91
- # as labeled role/content pairs separated by blank lines.
92
- #
93
74
  #--
94
75
  #: (String | Array[Hash[Symbol, untyped] | Riffer::Messages::Base]) -> String
95
76
  def format_input(input)
@@ -122,8 +103,7 @@ class Riffer::Evals::Evaluator
122
103
  end
123
104
  end
124
105
 
125
- # Helper to build a Result object.
126
- #
106
+ # Builds a Result for this evaluator.
127
107
  #--
128
108
  #: (score: Float, ?reason: String?, ?metadata: Hash[Symbol, untyped]) -> Riffer::Evals::Result
129
109
  def result(score:, reason: nil, metadata: {})
@@ -3,10 +3,6 @@
3
3
 
4
4
  # Orchestrates running evaluators against an agent across multiple scenarios.
5
5
  #
6
- # Accepts an agent class, a list of scenarios, and evaluator classes.
7
- # Generates agent output for each scenario and runs all evaluators,
8
- # returning a RunResult with per-scenario details and aggregate scores.
9
- #
10
6
  # result = Riffer::Evals::EvaluatorRunner.run(
11
7
  # agent: MyAgent,
12
8
  # scenarios: [
@@ -18,20 +14,14 @@
18
14
  #
19
15
  # result.scores # => { AnswerRelevancyEvaluator => 0.85 }
20
16
  #
21
- class Riffer::Evals::EvaluatorRunner
22
- # Runs evaluators against an agent for the given scenarios.
23
- #
24
- # [agent] an Agent subclass (not an instance).
25
- # [scenarios] array of hashes with +:input+, optional +:ground_truth+, and optional +:context+.
26
- # [evaluators] array of Evaluator subclasses to run against each scenario.
27
- # [context] optional hash passed to +agent.generate+. Per-scenario +:context+ takes precedence.
28
- #
29
- # Raises Riffer::ArgumentError if agent is not a Riffer::Agent subclass
30
- # or any eval is not a Riffer::Evals::Evaluator subclass.
31
- #
17
+ module Riffer::Evals::EvaluatorRunner
18
+ extend self
19
+
20
+ # Runs evaluators against an agent for the given scenarios. Raises
21
+ # Riffer::ArgumentError on an invalid agent or evaluator.
32
22
  #--
33
23
  #: (agent: singleton(Riffer::Agent), scenarios: Array[Hash[Symbol, untyped]], evaluators: Array[singleton(Riffer::Evals::Evaluator)], ?context: Hash[Symbol, untyped]?) -> Riffer::Evals::RunResult
34
- def self.run(agent:, scenarios:, evaluators:, context: nil)
24
+ def run(agent:, scenarios:, evaluators:, context: nil)
35
25
  validate_agent!(agent)
36
26
  validate_evaluators!(evaluators)
37
27
 
@@ -42,9 +32,11 @@ class Riffer::Evals::EvaluatorRunner
42
32
  Riffer::Evals::RunResult.new(scenario_results: scenario_results)
43
33
  end
44
34
 
35
+ private
36
+
45
37
  #--
46
38
  #: (singleton(Riffer::Agent)) -> void
47
- private_class_method def self.validate_agent!(agent)
39
+ def validate_agent!(agent)
48
40
  return if agent.is_a?(Class) && agent < Riffer::Agent
49
41
 
50
42
  raise Riffer::ArgumentError, "agent must be a subclass of Riffer::Agent, got #{agent.inspect}"
@@ -52,7 +44,7 @@ class Riffer::Evals::EvaluatorRunner
52
44
 
53
45
  #--
54
46
  #: (Array[singleton(Riffer::Evals::Evaluator)]) -> void
55
- private_class_method def self.validate_evaluators!(evaluators)
47
+ def validate_evaluators!(evaluators)
56
48
  evaluators.each do |evaluator_class|
57
49
  next if evaluator_class.is_a?(Class) && evaluator_class < Riffer::Evals::Evaluator
58
50
 
@@ -62,7 +54,7 @@ class Riffer::Evals::EvaluatorRunner
62
54
 
63
55
  #--
64
56
  #: (agent: singleton(Riffer::Agent), scenario: Hash[Symbol, untyped], evaluators: Array[singleton(Riffer::Evals::Evaluator)], ?context: Hash[Symbol, untyped]?) -> Riffer::Evals::ScenarioResult
65
- private_class_method def self.run_scenario(agent:, scenario:, evaluators:, context: nil)
57
+ def run_scenario(agent:, scenario:, evaluators:, context: nil)
66
58
  input = scenario[:input]
67
59
  ground_truth = scenario[:ground_truth]
68
60
  resolved_context = scenario[:context] || context
@@ -3,21 +3,8 @@
3
3
 
4
4
  require "json"
5
5
 
6
- # Executes LLM-as-judge evaluations using the provider infrastructure.
7
- #
8
- # The Judge class handles calling an LLM to evaluate agent outputs
9
- # and parsing the structured response. It uses tool calling internally
10
- # to get guaranteed structured output from the judge model.
11
- #
12
- # judge = Riffer::Evals::Judge.new(model: "anthropic/claude-opus-4-5-20251101")
13
- # result = judge.evaluate(
14
- # instructions: "Assess answer relevancy...",
15
- # input: "What is Ruby?",
16
- # output: "Ruby is a programming language."
17
- # )
18
- # result[:score] # => 0.85
19
- # result[:reason] # => "The response is relevant..."
20
- #
6
+ # Executes LLM-as-judge evaluations, using tool calling internally to get
7
+ # structured output from the judge model.
21
8
  class Riffer::Evals::Judge
22
9
  # @rbs @provider_options: Hash[Symbol, untyped]
23
10
  # @rbs @provider_instance: Riffer::Providers::Base?
@@ -44,8 +31,7 @@ class Riffer::Evals::Judge
44
31
  # The model string (provider/model format).
45
32
  attr_reader :model #: String
46
33
 
47
- # Initializes a new judge.
48
- #
34
+ # Raises Riffer::ArgumentError unless +model+ is "provider/model" format.
49
35
  #--
50
36
  #: (model: String, ?provider_options: Hash[Symbol, untyped]) -> void
51
37
  def initialize(model:, provider_options: {})
@@ -58,14 +44,7 @@ class Riffer::Evals::Judge
58
44
  @provider_options = provider_options
59
45
  end
60
46
 
61
- # Evaluates using the configured LLM.
62
- #
63
- # Composes system and user messages from the semantic fields:
64
- # [instructions] evaluation criteria and scoring rubric.
65
- # [input] the original input/question.
66
- # [output] the agent's response to evaluate.
67
- # [ground_truth] optional reference answer for comparison.
68
- #
47
+ # Evaluates an input/output pair using the configured LLM.
69
48
  #--
70
49
  #: (instructions: String, input: String, output: String, ?ground_truth: String?) -> Hash[Symbol, untyped]
71
50
  def evaluate(instructions:, input:, output:, ground_truth: nil)
@@ -2,20 +2,6 @@
2
2
  # rbs_inline: enabled
3
3
 
4
4
  # Represents the result of a single evaluation.
5
- #
6
- # Contains the score, reason, and metadata from running an evaluator.
7
- #
8
- # result = Riffer::Evals::Result.new(
9
- # evaluator: AnswerRelevancyEvaluator,
10
- # score: 0.85,
11
- # reason: "The response addresses the question directly.",
12
- # higher_is_better: true
13
- # )
14
- #
15
- # result.score # => 0.85
16
- # result.evaluator # => AnswerRelevancyEvaluator
17
- # result.higher_is_better # => true
18
- #
19
5
  class Riffer::Evals::Result
20
6
  # The evaluator class that produced this result.
21
7
  attr_reader :evaluator #: singleton(Riffer::Evals::Evaluator)
@@ -32,10 +18,7 @@ class Riffer::Evals::Result
32
18
  # Whether higher scores are better for this evaluator.
33
19
  attr_reader :higher_is_better #: bool
34
20
 
35
- # Initializes a new evaluation result.
36
- #
37
- # Raises Riffer::ArgumentError if score is not between 0.0 and 1.0.
38
- #
21
+ # Raises Riffer::ArgumentError if +score+ is not between 0.0 and 1.0.
39
22
  #--
40
23
  #: (evaluator: singleton(Riffer::Evals::Evaluator), score: Float, ?reason: String?, ?metadata: Hash[Symbol, untyped], ?higher_is_better: bool) -> void
41
24
  def initialize(evaluator:, score:, reason: nil, metadata: {}, higher_is_better: true)
@@ -2,21 +2,10 @@
2
2
  # rbs_inline: enabled
3
3
 
4
4
  # Represents the complete result of an evaluation run across multiple scenarios.
5
- #
6
- # Contains per-scenario results and provides aggregate scores.
7
- #
8
- # run_result = Riffer::Evals::RunResult.new(
9
- # scenario_results: [scenario_result1, scenario_result2]
10
- # )
11
- #
12
- # run_result.scores # => { MyEvaluator => 0.85 }
13
- #
14
5
  class Riffer::Evals::RunResult
15
6
  # Per-scenario evaluation results.
16
7
  attr_reader :scenario_results #: Array[Riffer::Evals::ScenarioResult]
17
8
 
18
- # Initializes a new run result.
19
- #
20
9
  #--
21
10
  #: (scenario_results: Array[Riffer::Evals::ScenarioResult]) -> void
22
11
  def initialize(scenario_results:)
@@ -2,18 +2,6 @@
2
2
  # rbs_inline: enabled
3
3
 
4
4
  # Represents the result of evaluating a single scenario.
5
- #
6
- # Contains the input, output, ground truth, and individual evaluator results.
7
- #
8
- # scenario_result = Riffer::Evals::ScenarioResult.new(
9
- # input: "What is Ruby?",
10
- # output: "A programming language.",
11
- # ground_truth: "A programming language",
12
- # results: [result1, result2]
13
- # )
14
- #
15
- # scenario_result.scores # => { MyEvaluator => 0.85 }
16
- #
17
5
  class Riffer::Evals::ScenarioResult
18
6
  # The input that was evaluated.
19
7
  attr_reader :input #: String
@@ -30,8 +18,6 @@ class Riffer::Evals::ScenarioResult
30
18
  # The full message history from the agent conversation.
31
19
  attr_reader :messages #: Array[Riffer::Messages::Base]
32
20
 
33
- # Initializes a new scenario result.
34
- #
35
21
  #--
36
22
  #: (input: String, output: String, ground_truth: String?, results: Array[Riffer::Evals::Result], ?messages: Array[Riffer::Messages::Base]) -> void
37
23
  def initialize(input:, output:, ground_truth:, results:, messages: [])
data/lib/riffer/evals.rb CHANGED
@@ -1,11 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # rbs_inline: enabled
3
3
 
4
- # Riffer::Evals provides LLM-as-judge evaluation capabilities.
5
- #
6
- # Evals allow you to measure the quality of agent outputs using
7
- # configurable evaluators and scenarios.
8
- #
9
- # See Riffer::Evals::Evaluator, Riffer::Evals::EvaluatorRunner, and Riffer::Evals::RunResult.
10
4
  module Riffer::Evals
11
5
  end
@@ -3,8 +3,6 @@
3
3
 
4
4
  # Base class for guardrails that process input and output in the agent pipeline.
5
5
  #
6
- # Subclass this to create custom guardrails:
7
- #
8
6
  # class MyGuardrail < Riffer::Guardrail
9
7
  # def process_input(messages, context:)
10
8
  # # Return pass(messages), transform(modified_messages), or block(reason)
@@ -17,27 +15,16 @@
17
15
  # end
18
16
  # end
19
17
  class Riffer::Guardrail
20
- # Processes input messages before they are sent to the LLM.
21
- #
22
- # Override this method in subclasses to implement input processing.
23
- #
24
- # [messages] the input messages.
25
- # [context] optional context passed to the agent.
26
- #
18
+ # Processes input messages before they're sent to the LLM; override in
19
+ # subclasses.
27
20
  #--
28
21
  #: (Array[Riffer::Messages::Base], context: untyped) -> Riffer::Guardrails::Result
29
22
  def process_input(messages, context:)
30
23
  pass(messages)
31
24
  end
32
25
 
33
- # Processes output response after it is received from the LLM.
34
- #
35
- # Override this method in subclasses to implement output processing.
36
- #
37
- # [response] the LLM response.
38
- # [messages] the conversation messages.
39
- # [context] optional context passed to the agent.
40
- #
26
+ # Processes the output response after it's received from the LLM; override in
27
+ # subclasses.
41
28
  #--
42
29
  #: (Riffer::Messages::Assistant, messages: Array[Riffer::Messages::Base], context: untyped) -> Riffer::Guardrails::Result
43
30
  def process_output(response, messages:, context:)
@@ -47,9 +34,6 @@ class Riffer::Guardrail
47
34
  protected
48
35
 
49
36
  # Creates a pass result that continues with unchanged data.
50
- #
51
- # [data] the original data to pass through.
52
- #
53
37
  #--
54
38
  #: (untyped) -> Riffer::Guardrails::Result
55
39
  def pass(data)
@@ -57,9 +41,6 @@ class Riffer::Guardrail
57
41
  end
58
42
 
59
43
  # Creates a transform result that continues with transformed data.
60
- #
61
- # [data] the transformed data.
62
- #
63
44
  #--
64
45
  #: (untyped) -> Riffer::Guardrails::Result
65
46
  def transform(data)
@@ -67,10 +48,6 @@ class Riffer::Guardrail
67
48
  end
68
49
 
69
50
  # Creates a block result that halts execution.
70
- #
71
- # [reason] the reason for blocking.
72
- # [metadata] optional additional information.
73
- #
74
51
  #--
75
52
  #: (String, ?metadata: Hash[Symbol, untyped]?) -> Riffer::Guardrails::Result
76
53
  def block(reason, metadata: nil)
@@ -2,10 +2,6 @@
2
2
  # rbs_inline: enabled
3
3
 
4
4
  # Records a guardrail transformation event.
5
- #
6
- # When a guardrail transforms data (via +transform+), a Modification is
7
- # created to record which guardrail made the change, in which phase, and
8
- # which message indices were affected.
9
5
  class Riffer::Guardrails::Modification
10
6
  # The guardrail class that transformed data.
11
7
  attr_reader :guardrail #: singleton(Riffer::Guardrail)
@@ -16,12 +12,6 @@ class Riffer::Guardrails::Modification
16
12
  # The indices of messages that were changed.
17
13
  attr_reader :message_indices #: Array[Integer]
18
14
 
19
- # Creates a new modification record.
20
- #
21
- # [guardrail] the guardrail class that transformed.
22
- # [phase] :before or :after.
23
- # [message_indices] indices of changed messages.
24
- #
25
15
  #--
26
16
  #: (guardrail: singleton(Riffer::Guardrail), phase: Symbol, message_indices: Array[Integer]) -> void
27
17
  def initialize(guardrail:, phase:, message_indices:)
@@ -1,18 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  # rbs_inline: enabled
3
3
 
4
- # Represents the result of a guardrail execution.
5
- #
6
- # Results can be one of three types:
7
- # - pass: Continue with the original data unchanged
8
- # - transform: Continue with transformed data
9
- # - block: Halt execution with a reason
10
- #
11
- # Use the factory methods to create results:
12
- #
13
- # Result.pass(data)
14
- # Result.transform(data)
15
- # Result.block(reason, metadata: nil)
4
+ # Represents the result of a guardrail execution: +pass+ (continue unchanged),
5
+ # +transform+ (continue with changed data), or +block+ (halt with a reason).
16
6
  class Riffer::Guardrails::Result
17
7
  TYPES = %i[pass transform block].freeze #: Array[Symbol]
18
8
 
@@ -27,9 +17,6 @@ class Riffer::Guardrails::Result
27
17
 
28
18
  class << self
29
19
  # Creates a pass result that continues with unchanged data.
30
- #
31
- # [data] the original data to pass through.
32
- #
33
20
  #--
34
21
  #: (untyped) -> Riffer::Guardrails::Result
35
22
  def pass(data)
@@ -37,9 +24,6 @@ class Riffer::Guardrails::Result
37
24
  end
38
25
 
39
26
  # Creates a transform result that continues with transformed data.
40
- #
41
- # [data] the transformed data.
42
- #
43
27
  #--
44
28
  #: (untyped) -> Riffer::Guardrails::Result
45
29
  def transform(data)
@@ -47,10 +31,6 @@ class Riffer::Guardrails::Result
47
31
  end
48
32
 
49
33
  # Creates a block result that halts execution.
50
- #
51
- # [reason] the reason for blocking.
52
- # [metadata] optional additional information.
53
- #
54
34
  #--
55
35
  #: (String, ?metadata: Hash[Symbol, untyped]?) -> Riffer::Guardrails::Result
56
36
  def block(reason, metadata: nil)
@@ -58,14 +38,7 @@ class Riffer::Guardrails::Result
58
38
  end
59
39
  end
60
40
 
61
- # Creates a new result.
62
- #
63
- # [type] the result type (:pass, :transform, or :block).
64
- # [data] the data or reason.
65
- # [metadata] optional metadata for block results.
66
- #
67
- # Raises Riffer::ArgumentError if the result type is invalid.
68
- #
41
+ # Raises Riffer::ArgumentError if +type+ is not :pass, :transform, or :block.
69
42
  #--
70
43
  #: (Symbol, untyped, ?metadata: Hash[Symbol, untyped]?) -> void
71
44
  def initialize(type, data, metadata: nil)