riffer 0.32.0 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/.release-please-manifest.json +1 -1
  3. data/.ruby-version +1 -1
  4. data/CHANGELOG.md +34 -0
  5. data/README.md +13 -11
  6. data/docs/01_OVERVIEW.md +2 -0
  7. data/docs/04_AGENT_LIFECYCLE.md +15 -13
  8. data/docs/08_MESSAGES.md +39 -5
  9. data/docs/09_STREAM_EVENTS.md +14 -0
  10. data/docs/10_CONFIGURATION.md +73 -4
  11. data/docs/13_SKILLS.md +66 -4
  12. data/docs/14_MCP.md +2 -1
  13. data/docs/16_TRACING.md +250 -0
  14. data/docs/17_METRICS.md +123 -0
  15. data/docs/providers/07_CUSTOM_PROVIDERS.md +44 -0
  16. data/lib/riffer/agent/response.rb +11 -2
  17. data/lib/riffer/agent/run.rb +136 -35
  18. data/lib/riffer/agent.rb +5 -5
  19. data/lib/riffer/config.rb +231 -15
  20. data/lib/riffer/guardrail.rb +8 -0
  21. data/lib/riffer/guardrails/runner.rb +33 -0
  22. data/lib/riffer/helpers/boolean.rb +22 -0
  23. data/lib/riffer/mcp/authenticated_tool.rb +14 -20
  24. data/lib/riffer/mcp/registration.rb +4 -4
  25. data/lib/riffer/mcp/tool.rb +23 -0
  26. data/lib/riffer/mcp/tool_factory.rb +14 -22
  27. data/lib/riffer/messages/assistant.rb +15 -3
  28. data/lib/riffer/messages/base.rb +2 -1
  29. data/lib/riffer/metrics/instruments.rb +25 -0
  30. data/lib/riffer/metrics/null.rb +14 -0
  31. data/lib/riffer/metrics/otel.rb +79 -0
  32. data/lib/riffer/metrics.rb +93 -0
  33. data/lib/riffer/providers/amazon_bedrock.rb +57 -21
  34. data/lib/riffer/providers/anthropic.rb +59 -24
  35. data/lib/riffer/providers/azure_open_ai.rb +7 -0
  36. data/lib/riffer/providers/base.rb +247 -15
  37. data/lib/riffer/providers/finish_reason.rb +27 -0
  38. data/lib/riffer/providers/gemini.rb +59 -11
  39. data/lib/riffer/providers/mock.rb +30 -9
  40. data/lib/riffer/providers/open_ai.rb +78 -24
  41. data/lib/riffer/providers/open_router.rb +56 -16
  42. data/lib/riffer/providers/repository.rb +9 -0
  43. data/lib/riffer/providers/token_usage.rb +27 -11
  44. data/lib/riffer/skills/activate_tool.rb +12 -2
  45. data/lib/riffer/skills/adapter.rb +15 -0
  46. data/lib/riffer/skills/context.rb +78 -11
  47. data/lib/riffer/skills/frontmatter.rb +13 -5
  48. data/lib/riffer/skills/markdown_adapter.rb +1 -1
  49. data/lib/riffer/skills/xml_adapter.rb +1 -1
  50. data/lib/riffer/stream_events/finish_reason_done.rb +34 -0
  51. data/lib/riffer/tools/runtime.rb +99 -3
  52. data/lib/riffer/tracing/capture.rb +92 -0
  53. data/lib/riffer/tracing/null.rb +61 -0
  54. data/lib/riffer/tracing/otel.rb +131 -0
  55. data/lib/riffer/tracing/stream_recorder.rb +51 -0
  56. data/lib/riffer/tracing.rb +78 -0
  57. data/lib/riffer/version.rb +1 -1
  58. data/sig/_private/opentelemetry.rbs +22 -0
  59. data/sig/generated/riffer/agent/response.rbs +9 -2
  60. data/sig/generated/riffer/agent/run.rbs +28 -8
  61. data/sig/generated/riffer/config.rbs +162 -16
  62. data/sig/generated/riffer/guardrail.rbs +6 -0
  63. data/sig/generated/riffer/guardrails/runner.rbs +14 -0
  64. data/sig/generated/riffer/helpers/boolean.rbs +11 -0
  65. data/sig/generated/riffer/mcp/authenticated_tool.rbs +6 -8
  66. data/sig/generated/riffer/mcp/registration.rbs +4 -4
  67. data/sig/generated/riffer/mcp/tool.rbs +19 -0
  68. data/sig/generated/riffer/mcp/tool_factory.rbs +8 -7
  69. data/sig/generated/riffer/messages/assistant.rbs +10 -4
  70. data/sig/generated/riffer/metrics/instruments.rbs +13 -0
  71. data/sig/generated/riffer/metrics/null.rbs +10 -0
  72. data/sig/generated/riffer/metrics/otel.rbs +47 -0
  73. data/sig/generated/riffer/metrics.rbs +71 -0
  74. data/sig/generated/riffer/providers/amazon_bedrock.rbs +35 -14
  75. data/sig/generated/riffer/providers/anthropic.rbs +41 -20
  76. data/sig/generated/riffer/providers/azure_open_ai.rbs +5 -0
  77. data/sig/generated/riffer/providers/base.rbs +78 -2
  78. data/sig/generated/riffer/providers/finish_reason.rbs +19 -0
  79. data/sig/generated/riffer/providers/gemini.rbs +25 -2
  80. data/sig/generated/riffer/providers/mock.rbs +16 -5
  81. data/sig/generated/riffer/providers/open_ai.rbs +44 -22
  82. data/sig/generated/riffer/providers/open_router.rbs +31 -12
  83. data/sig/generated/riffer/providers/repository.rbs +7 -0
  84. data/sig/generated/riffer/providers/token_usage.rbs +20 -10
  85. data/sig/generated/riffer/skills/activate_tool.rbs +7 -1
  86. data/sig/generated/riffer/skills/adapter.rbs +10 -0
  87. data/sig/generated/riffer/skills/context.rbs +52 -4
  88. data/sig/generated/riffer/skills/frontmatter.rbs +10 -3
  89. data/sig/generated/riffer/stream_events/finish_reason_done.rbs +21 -0
  90. data/sig/generated/riffer/tools/runtime.rbs +35 -0
  91. data/sig/generated/riffer/tracing/capture.rbs +46 -0
  92. data/sig/generated/riffer/tracing/null.rbs +46 -0
  93. data/sig/generated/riffer/tracing/otel.rbs +83 -0
  94. data/sig/generated/riffer/tracing/stream_recorder.rbs +31 -0
  95. data/sig/generated/riffer/tracing.rbs +52 -0
  96. data/sig/manual/riffer/helpers/boolean.rbs +5 -0
  97. data/sig/manual/riffer/metrics/null.rbs +5 -0
  98. data/sig/manual/riffer/metrics.rbs +5 -0
  99. data/sig/manual/riffer/providers.rbs +9 -0
  100. data/sig/manual/riffer/tracing/capture.rbs +5 -0
  101. data/sig/manual/riffer/tracing/null.rbs +5 -0
  102. data/sig/manual/riffer/tracing.rbs +5 -0
  103. metadata +40 -4
@@ -6,6 +6,14 @@
6
6
  class Riffer::Providers::Anthropic < Riffer::Providers::Base
7
7
  WEB_SEARCH_TOOL_TYPE = "web_search_20250305" #: String
8
8
 
9
+ FINISH_REASONS = {
10
+ "end_turn" => :stop,
11
+ "stop_sequence" => :stop,
12
+ "max_tokens" => :length,
13
+ "tool_use" => :tool_calls,
14
+ "refusal" => :content_filter
15
+ }.freeze #: Hash[String, Symbol]
16
+
9
17
  # Returns the XML skill adapter for Anthropic/Claude.
10
18
  #
11
19
  #--
@@ -14,6 +22,13 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
14
22
  Riffer::Skills::XmlAdapter
15
23
  end
16
24
 
25
+ # The GenAI semconv well-known provider name.
26
+ #--
27
+ #: () -> String
28
+ def self.semconv_provider_name
29
+ "anthropic"
30
+ end
31
+
17
32
  #--
18
33
  #: (?api_key: String?, **untyped) -> void
19
34
  def initialize(api_key: nil, **options)
@@ -81,14 +96,39 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
81
96
  #: (untyped) -> Riffer::Providers::TokenUsage?
82
97
  def extract_token_usage(response)
83
98
  message = response #: Anthropic::Models::Message
84
- usage = message.usage
99
+ build_token_usage(message.usage)
100
+ end
101
+
102
+ #--
103
+ #: (untyped) -> Riffer::Providers::FinishReason?
104
+ def extract_finish_reason(response)
105
+ message = response #: Anthropic::Models::Message
106
+ build_finish_reason(message.stop_reason)
107
+ end
85
108
 
86
- Riffer::Providers::TokenUsage.new(
87
- input_tokens: usage.input_tokens,
109
+ #--
110
+ #: (untyped) -> Riffer::Providers::FinishReason?
111
+ def build_finish_reason(stop_reason)
112
+ return nil unless stop_reason
113
+
114
+ raw = stop_reason.to_s
115
+ Riffer::Providers::FinishReason.new(reason: FINISH_REASONS.fetch(raw, :other), raw: raw)
116
+ end
117
+
118
+ # Anthropic's +input_tokens+ excludes the cache buckets; TokenUsage's
119
+ # input includes them.
120
+ #--
121
+ #: (untyped) -> Riffer::Providers::TokenUsage
122
+ def build_token_usage(usage)
123
+ cache_write = usage.cache_creation_input_tokens
124
+ cache_read = usage.cache_read_input_tokens
125
+
126
+ apply_pricing(Riffer::Providers::TokenUsage.new(
127
+ input_tokens: usage.input_tokens + (cache_write || 0) + (cache_read || 0),
88
128
  output_tokens: usage.output_tokens,
89
- cache_write_tokens: usage.cache_creation_input_tokens,
90
- cache_read_tokens: usage.cache_read_input_tokens
91
- )
129
+ cache_write_tokens: cache_write,
130
+ cache_read_tokens: cache_read
131
+ ))
92
132
  end
93
133
 
94
134
  #--
@@ -130,7 +170,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
130
170
  end
131
171
 
132
172
  #--
133
- #: (Hash[Symbol, untyped], Enumerator::Yielder) -> void
173
+ #: (Hash[Symbol, untyped], Riffer::Providers::_EventSink) -> void
134
174
  def execute_stream(params, yielder)
135
175
  current_state = {
136
176
  text: nil,
@@ -200,7 +240,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
200
240
  end
201
241
 
202
242
  #--
203
- #: (untyped, state: Hash[Symbol, untyped], yielder: Enumerator::Yielder) -> void
243
+ #: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
204
244
  def handle_text_event(event, state:, yielder:)
205
245
  state[:text] ||= ""
206
246
  state[:text] += event.text
@@ -208,7 +248,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
208
248
  end
209
249
 
210
250
  #--
211
- #: (untyped, state: Hash[Symbol, untyped], yielder: Enumerator::Yielder) -> void
251
+ #: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
212
252
  def handle_thinking_event(event, state:, yielder:)
213
253
  state[:reasoning] ||= ""
214
254
  state[:reasoning] += event.thinking
@@ -216,7 +256,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
216
256
  end
217
257
 
218
258
  #--
219
- #: (untyped, state: Hash[Symbol, untyped], yielder: Enumerator::Yielder) -> void
259
+ #: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
220
260
  def handle_input_json_event(event, state:, yielder:)
221
261
  if state[:tool_call].nil?
222
262
  state[:tool_call] = {id: nil, name: nil, arguments: ""}
@@ -230,7 +270,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
230
270
  end
231
271
 
232
272
  #--
233
- #: (untyped, state: Hash[Symbol, untyped], yielder: Enumerator::Yielder) -> void
273
+ #: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
234
274
  def handle_content_block_stop_tool_use(event, state:, yielder:)
235
275
  content_block = event.content_block
236
276
  arguments = content_block.input.is_a?(String) ? content_block.input : content_block.input.to_json
@@ -244,21 +284,21 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
244
284
  end
245
285
 
246
286
  #--
247
- #: (untyped, state: Hash[Symbol, untyped], yielder: Enumerator::Yielder) -> void
287
+ #: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
248
288
  def handle_content_block_stop_thinking(_event, state:, yielder:)
249
289
  yielder << Riffer::StreamEvents::ReasoningDone.new(state[:reasoning])
250
290
  state[:reasoning] = nil
251
291
  end
252
292
 
253
293
  #--
254
- #: (untyped, state: Hash[Symbol, untyped], yielder: Enumerator::Yielder) -> void
294
+ #: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
255
295
  def handle_content_block_stop_text(_event, state:, yielder:)
256
296
  yielder << Riffer::StreamEvents::TextDone.new(state[:text])
257
297
  state[:text] = nil
258
298
  end
259
299
 
260
300
  #--
261
- #: (untyped, state: Hash[Symbol, untyped], yielder: Enumerator::Yielder) -> void
301
+ #: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
262
302
  def handle_content_block_stop_server_tool_use(_event, state:, yielder:)
263
303
  return unless state[:web_search_json]
264
304
 
@@ -270,7 +310,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
270
310
  end
271
311
 
272
312
  #--
273
- #: (untyped, state: Hash[Symbol, untyped], yielder: Enumerator::Yielder) -> void
313
+ #: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
274
314
  def handle_content_block_stop_web_search_result(event, state:, yielder:)
275
315
  content_block = event.content_block
276
316
  sources = (content_block.content || []).filter_map do |item|
@@ -283,20 +323,15 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
283
323
  end
284
324
 
285
325
  #--
286
- #: (untyped, accumulated_message: untyped, yielder: Enumerator::Yielder) -> void
326
+ #: (untyped, accumulated_message: untyped, yielder: Riffer::Providers::_EventSink) -> void
287
327
  def handle_message_stop(_event, accumulated_message:, yielder:)
288
328
  message = accumulated_message #: Anthropic::Models::Message?
329
+ yield_finish_reason(yielder, build_finish_reason(message&.stop_reason))
330
+
289
331
  usage = message&.usage
290
332
  return unless usage
291
333
 
292
- yielder << Riffer::StreamEvents::TokenUsageDone.new(
293
- token_usage: Riffer::Providers::TokenUsage.new(
294
- input_tokens: usage.input_tokens,
295
- output_tokens: usage.output_tokens,
296
- cache_write_tokens: usage.cache_creation_input_tokens,
297
- cache_read_tokens: usage.cache_read_input_tokens
298
- )
299
- )
334
+ yielder << Riffer::StreamEvents::TokenUsageDone.new(token_usage: build_token_usage(usage))
300
335
  end
301
336
 
302
337
  #--
@@ -5,6 +5,13 @@
5
5
  # gem. Credentials resolve from kwargs, then config, then
6
6
  # +AZURE_OPENAI_API_KEY+ / +AZURE_OPENAI_ENDPOINT+.
7
7
  class Riffer::Providers::AzureOpenAI < Riffer::Providers::OpenAI
8
+ # The GenAI semconv well-known provider name.
9
+ #--
10
+ #: () -> String
11
+ def self.semconv_provider_name
12
+ "azure.ai.openai"
13
+ end
14
+
8
15
  #--
9
16
  #: (**untyped) -> void
10
17
  def initialize(**options)
@@ -9,6 +9,7 @@ require "json"
9
9
  # class orchestrates them.
10
10
  class Riffer::Providers::Base
11
11
  # @rbs @current_tools: Array[singleton(Riffer::Tool)]
12
+ # @rbs @current_model: String?
12
13
 
13
14
  WIRE_SEPARATOR = "__" #: String
14
15
 
@@ -20,6 +21,19 @@ class Riffer::Providers::Base
20
21
  Riffer::Skills::MarkdownAdapter
21
22
  end
22
23
 
24
+ # Returns the provider name stamped as <tt>gen_ai.provider.name</tt> on trace
25
+ # spans, ideally a GenAI semconv well-known value. Defaults to the snake_cased
26
+ # class name rather than raising like the abstract provider methods, so
27
+ # enabling tracing never breaks an otherwise-working custom provider.
28
+ #--
29
+ #: () -> String
30
+ def self.semconv_provider_name
31
+ class_name = name
32
+ return "unknown" unless class_name
33
+
34
+ Riffer::Helpers::ClassNameConverter.convert(class_name.split("::").last.to_s)
35
+ end
36
+
23
37
  # Generates text using the provider.
24
38
  #
25
39
  #--
@@ -27,23 +41,35 @@ class Riffer::Providers::Base
27
41
  def generate_text(prompt: nil, system: nil, messages: nil, model: nil, files: nil, **options)
28
42
  validate_input!(prompt: prompt, system: system, messages: messages)
29
43
  @current_tools = options[:tools] || [] #: Array[singleton(Riffer::Tool)]
44
+ @current_model = model
30
45
  messages = normalize_messages(prompt: prompt, system: system, messages: messages, files: files)
31
46
  validate_normalized_messages!(messages)
32
47
  messages = merge_consecutive_messages(messages)
33
48
  params = build_request_params(messages, model, options)
34
- response = execute_generate(params)
35
-
36
- content = extract_content(response)
37
- tool_calls = extract_tool_calls(response)
38
- token_usage = extract_token_usage(response)
39
- structured_output = parse_structured_output(content) if options[:structured_output] && tool_calls.empty?
40
-
41
- Riffer::Messages::Assistant.new(
42
- content,
43
- tool_calls: tool_calls,
44
- token_usage: token_usage,
45
- structured_output: structured_output
46
- )
49
+
50
+ in_chat_span(model, messages, options) do |span|
51
+ response = execute_generate(params)
52
+
53
+ content = extract_content(response)
54
+ tool_calls = extract_tool_calls(response)
55
+ token_usage = extract_token_usage(response)
56
+ finish_reason = extract_finish_reason(response)
57
+ structured_output = parse_structured_output(content) if options[:structured_output] && tool_calls.empty?
58
+
59
+ Riffer::Tracing.record_usage(span, token_usage)
60
+ record_token_usage_metric(model, token_usage)
61
+ record_cost_metric(model, token_usage)
62
+ record_finish_reason(span, finish_reason&.reason, finish_reason&.raw)
63
+ capture_output(span, content: content, tool_calls: tool_calls, finish_reason: finish_reason&.reason)
64
+
65
+ Riffer::Messages::Assistant.new(
66
+ content,
67
+ tool_calls: tool_calls,
68
+ token_usage: token_usage,
69
+ structured_output: structured_output,
70
+ finish_reason: finish_reason&.reason
71
+ )
72
+ end
47
73
  end
48
74
 
49
75
  # Streams text from the provider.
@@ -53,12 +79,31 @@ class Riffer::Providers::Base
53
79
  def stream_text(prompt: nil, system: nil, messages: nil, model: nil, files: nil, **options)
54
80
  validate_input!(prompt: prompt, system: system, messages: messages)
55
81
  @current_tools = options[:tools] || [] #: Array[singleton(Riffer::Tool)]
82
+ @current_model = model
56
83
  messages = normalize_messages(prompt: prompt, system: system, messages: messages, files: files)
57
84
  validate_normalized_messages!(messages)
58
85
  messages = merge_consecutive_messages(messages)
59
86
  params = build_request_params(messages, model, options)
87
+
88
+ # The enumerator body runs in its own fiber, where the fiber-local OTEL
89
+ # context is empty — capture here so the chat span parents to the
90
+ # caller's trace.
91
+ trace_context = Riffer::Tracing.current_context
60
92
  Enumerator.new do |yielder|
61
- execute_stream(params, yielder)
93
+ Riffer::Tracing.with_context(trace_context) do
94
+ in_chat_span(model, messages, options) do |span|
95
+ # The recorder feeds both the span and the token-usage metric, so build
96
+ # it whenever either is live — metrics fire even with tracing off.
97
+ observe = span.recording? || Riffer::Metrics.recording?
98
+ sink = observe ? Riffer::Tracing::StreamRecorder.new(yielder) : yielder
99
+ execute_stream(params, sink)
100
+ if sink.is_a?(Riffer::Tracing::StreamRecorder)
101
+ record_stream_outcome(span, sink)
102
+ record_token_usage_metric(model, sink.token_usage)
103
+ record_cost_metric(model, sink.token_usage)
104
+ end
105
+ end
106
+ end
62
107
  end
63
108
  end
64
109
 
@@ -95,7 +140,7 @@ class Riffer::Providers::Base
95
140
  end
96
141
 
97
142
  #--
98
- #: (Hash[Symbol, untyped], Enumerator::Yielder) -> void
143
+ #: (Hash[Symbol, untyped], Riffer::Providers::_EventSink) -> void
99
144
  def execute_stream(params, yielder)
100
145
  raise NotImplementedError, "Subclasses must implement #execute_stream"
101
146
  end
@@ -106,6 +151,49 @@ class Riffer::Providers::Base
106
151
  raise NotImplementedError, "Subclasses must implement #extract_token_usage"
107
152
  end
108
153
 
154
+ #: (Riffer::Providers::TokenUsage) -> Riffer::Providers::TokenUsage
155
+ def apply_pricing(usage)
156
+ rates = pricing_rates
157
+ return usage unless rates
158
+
159
+ cost = rates.cost_for(
160
+ input_tokens: usage.input_tokens,
161
+ output_tokens: usage.output_tokens,
162
+ cache_read_tokens: usage.cache_read_tokens,
163
+ cache_write_tokens: usage.cache_write_tokens
164
+ )
165
+ Riffer::Providers::TokenUsage.new(
166
+ input_tokens: usage.input_tokens,
167
+ output_tokens: usage.output_tokens,
168
+ cache_write_tokens: usage.cache_write_tokens,
169
+ cache_read_tokens: usage.cache_read_tokens,
170
+ cost: cost
171
+ )
172
+ end
173
+
174
+ #--
175
+ #: () -> Riffer::Config::Pricing::Rates?
176
+ def pricing_rates
177
+ model = @current_model
178
+ return nil unless model
179
+
180
+ pricing = Riffer.config.pricing
181
+ return nil if pricing.empty?
182
+
183
+ key = Riffer::Providers::Repository.key_for(self.class)
184
+ return nil unless key
185
+
186
+ pricing.rates_for("#{key}/#{model}")
187
+ end
188
+
189
+ # Defaults to nil rather than raising — finish reasons are optional, so
190
+ # providers that don't report one stay valid.
191
+ #--
192
+ #: (untyped) -> Riffer::Providers::FinishReason?
193
+ def extract_finish_reason(response)
194
+ nil
195
+ end
196
+
109
197
  #--
110
198
  #: (untyped) -> String
111
199
  def extract_content(response)
@@ -118,6 +206,150 @@ class Riffer::Providers::Base
118
206
  raise NotImplementedError, "Subclasses must implement #extract_tool_calls"
119
207
  end
120
208
 
209
+ # A deliberate whitelist — caller options outside it stay off spans.
210
+ REQUEST_PARAM_ATTRIBUTES = {
211
+ temperature: "gen_ai.request.temperature",
212
+ max_tokens: "gen_ai.request.max_tokens",
213
+ max_output_tokens: "gen_ai.request.max_tokens",
214
+ top_p: "gen_ai.request.top_p",
215
+ top_k: "gen_ai.request.top_k",
216
+ frequency_penalty: "gen_ai.request.frequency_penalty",
217
+ presence_penalty: "gen_ai.request.presence_penalty",
218
+ seed: "gen_ai.request.seed",
219
+ stop_sequences: "gen_ai.request.stop_sequences"
220
+ }.freeze #: Hash[Symbol, String]
221
+
222
+ #--
223
+ #: [R] (String?, Array[Riffer::Messages::Base], Hash[Symbol, untyped]) { (Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span) -> R } -> R
224
+ def in_chat_span(model, messages, options)
225
+ start = Riffer::Metrics.monotonic_now
226
+ error_type = nil #: String?
227
+ begin
228
+ Riffer::Tracing.in_span(model ? "chat #{model}" : "chat", attributes: chat_span_attributes(model, options), kind: :client) do |span|
229
+ capture_input(span, messages)
230
+ yield span
231
+ rescue => error
232
+ # The backend records the exception and error status on the re-raise;
233
+ # error.type is the one semconv attribute it doesn't set.
234
+ span.set_attribute("error.type", error.class.name)
235
+ raise
236
+ end
237
+ rescue => error
238
+ # The inner rescue tags the span; capture error.type here too, at method
239
+ # scope, where the ensure can read it onto the metric.
240
+ error_type = error.class.name #: String?
241
+ raise
242
+ ensure
243
+ Riffer::Metrics::Instruments::OPERATION_DURATION.record(Riffer::Metrics.monotonic_now - start, attributes: chat_metric_attributes(model, error_type))
244
+ end
245
+ end
246
+
247
+ #--
248
+ #: (String?, Hash[Symbol, untyped]) -> Hash[String, untyped]
249
+ def chat_span_attributes(model, options)
250
+ attributes = {
251
+ "gen_ai.operation.name" => "chat",
252
+ "gen_ai.provider.name" => self.class.semconv_provider_name
253
+ } #: Hash[String, untyped]
254
+ attributes["gen_ai.request.model"] = model if model
255
+
256
+ REQUEST_PARAM_ATTRIBUTES.each do |key, attribute|
257
+ value = options[key]
258
+ attributes[attribute] = value unless value.nil?
259
+ end
260
+
261
+ attributes
262
+ end
263
+
264
+ #--
265
+ #: (String?) -> Hash[String, untyped]
266
+ def chat_metric_base_attributes(model)
267
+ attributes = {
268
+ "gen_ai.operation.name" => "chat",
269
+ "gen_ai.provider.name" => self.class.semconv_provider_name
270
+ } #: Hash[String, untyped]
271
+ attributes["gen_ai.request.model"] = model if model
272
+ attributes
273
+ end
274
+
275
+ #--
276
+ #: (String?, String?) -> Hash[String, untyped]
277
+ def chat_metric_attributes(model, error_type)
278
+ attributes = chat_metric_base_attributes(model)
279
+ attributes["error.type"] = error_type if error_type
280
+ attributes
281
+ end
282
+
283
+ # Per-call only — the run level would double-count an aggregate.
284
+ #--
285
+ #: (String?, Riffer::Providers::TokenUsage?) -> void
286
+ def record_token_usage_metric(model, usage)
287
+ return unless usage
288
+
289
+ base = chat_metric_base_attributes(model)
290
+ Riffer::Metrics::Instruments::TOKEN_USAGE.record(usage.input_tokens, attributes: base.merge("gen_ai.token.type" => "input"))
291
+ Riffer::Metrics::Instruments::TOKEN_USAGE.record(usage.output_tokens, attributes: base.merge("gen_ai.token.type" => "output"))
292
+ end
293
+
294
+ # Per-call only — the run level would double-count an aggregate.
295
+ #--
296
+ #: (String?, Riffer::Providers::TokenUsage?) -> void
297
+ def record_cost_metric(model, usage)
298
+ cost = usage&.cost
299
+ return unless cost
300
+
301
+ Riffer::Metrics::Instruments::COST.record(cost, attributes: chat_metric_base_attributes(model))
302
+ end
303
+
304
+ #--
305
+ #: ((Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span), Symbol?, String?) -> void
306
+ def record_finish_reason(span, reason, raw)
307
+ return unless reason
308
+
309
+ span.set_attribute("gen_ai.response.finish_reasons", [reason.to_s])
310
+ span.set_attribute("riffer.finish_reason.raw", raw) if raw && raw != reason.to_s
311
+ end
312
+
313
+ #--
314
+ #: ((Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span), Riffer::Tracing::StreamRecorder) -> void
315
+ def record_stream_outcome(span, recorder)
316
+ Riffer::Tracing.record_usage(span, recorder.token_usage)
317
+ record_finish_reason(span, recorder.finish_reason, recorder.raw_finish_reason)
318
+ capture_output(span, content: recorder.content, tool_calls: recorder.tool_calls, finish_reason: recorder.finish_reason)
319
+ end
320
+
321
+ #--
322
+ #: ((Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span), Array[Riffer::Messages::Base]) -> void
323
+ def capture_input(span, messages)
324
+ return unless capture_messages?(span)
325
+
326
+ span.set_attribute("gen_ai.input.messages", Riffer::Tracing::Capture.input_messages(messages))
327
+ system_instructions = Riffer::Tracing::Capture.system_instructions(messages)
328
+ span.set_attribute("gen_ai.system_instructions", system_instructions) if system_instructions
329
+ end
330
+
331
+ #--
332
+ #: ((Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span), content: String?, tool_calls: Array[Riffer::Messages::Assistant::ToolCall], finish_reason: Symbol?) -> void
333
+ def capture_output(span, content:, tool_calls:, finish_reason:)
334
+ return unless capture_messages?(span)
335
+
336
+ span.set_attribute("gen_ai.output.messages", Riffer::Tracing::Capture.output_messages(content: content, tool_calls: tool_calls, finish_reason: finish_reason))
337
+ end
338
+
339
+ #--
340
+ #: ((Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span)) -> bool
341
+ def capture_messages?(span)
342
+ Riffer.config.tracing.capture_messages && span.recording?
343
+ end
344
+
345
+ #--
346
+ #: (Riffer::Providers::_EventSink, Riffer::Providers::FinishReason?) -> void
347
+ def yield_finish_reason(yielder, finish_reason)
348
+ return unless finish_reason
349
+
350
+ yielder << Riffer::StreamEvents::FinishReasonDone.new(finish_reason: finish_reason.reason, raw_finish_reason: finish_reason.raw)
351
+ end
352
+
121
353
  #--
122
354
  #: (String) -> Hash[Symbol, untyped]?
123
355
  def parse_structured_output(content)
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+ # rbs_inline: enabled
3
+
4
+ # Normalized reason an LLM call finished, paired with the provider's raw
5
+ # wire value. +reason+ carries the same meaning for every provider.
6
+ class Riffer::Providers::FinishReason
7
+ # The normalized vocabulary every provider maps into.
8
+ VALUES = %i[stop length tool_calls content_filter error other].freeze #: Array[Symbol]
9
+
10
+ # The normalized reason.
11
+ attr_reader :reason #: Symbol
12
+
13
+ # The provider's raw finish-reason value, when one exists on the wire.
14
+ attr_reader :raw #: String?
15
+
16
+ # Raises Riffer::ArgumentError when +reason+ is outside VALUES.
17
+ #--
18
+ #: (reason: Symbol, ?raw: String?) -> void
19
+ def initialize(reason:, raw: nil)
20
+ unless VALUES.include?(reason)
21
+ raise Riffer::ArgumentError, "reason must be one of #{VALUES.inspect}, got #{reason.inspect}"
22
+ end
23
+
24
+ @reason = reason
25
+ @raw = raw
26
+ end
27
+ end
@@ -17,6 +17,25 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
17
17
  DEFAULT_OPEN_TIMEOUT = 10 #: Integer
18
18
  DEFAULT_READ_TIMEOUT = 60 #: Integer
19
19
 
20
+ FINISH_REASONS = {
21
+ "STOP" => :stop,
22
+ "MAX_TOKENS" => :length,
23
+ "SAFETY" => :content_filter,
24
+ "RECITATION" => :content_filter,
25
+ "BLOCKLIST" => :content_filter,
26
+ "PROHIBITED_CONTENT" => :content_filter,
27
+ "SPII" => :content_filter,
28
+ "IMAGE_SAFETY" => :content_filter,
29
+ "MALFORMED_FUNCTION_CALL" => :error
30
+ }.freeze #: Hash[String, Symbol]
31
+
32
+ # The GenAI semconv well-known provider name.
33
+ #--
34
+ #: () -> String
35
+ def self.semconv_provider_name
36
+ "gcp.gemini"
37
+ end
38
+
20
39
  #--
21
40
  #: (?api_key: String?, ?open_timeout: Integer?, ?read_timeout: Integer?, **untyped) -> void
22
41
  def initialize(api_key: nil, open_timeout: nil, read_timeout: nil, **options)
@@ -103,15 +122,44 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
103
122
  usage = response[:usageMetadata]
104
123
  return nil unless usage
105
124
 
106
- Riffer::Providers::TokenUsage.new(
125
+ build_token_usage(usage)
126
+ end
127
+
128
+ #--
129
+ #: (Hash[Symbol, untyped]) -> Riffer::Providers::FinishReason?
130
+ def extract_finish_reason(response)
131
+ parts = response.dig(:candidates, 0, :content, :parts)
132
+ has_function_call = !!parts&.any? { |part| part[:functionCall] }
133
+ build_finish_reason(response.dig(:candidates, 0, :finishReason), tool_calls: has_function_call)
134
+ end
135
+
136
+ # Gemini reports STOP even when the candidate carries functionCall parts,
137
+ # so tool-call presence overrides the raw value.
138
+ #--
139
+ #: (String?, tool_calls: bool) -> Riffer::Providers::FinishReason?
140
+ def build_finish_reason(raw_reason, tool_calls:)
141
+ return nil unless raw_reason
142
+
143
+ raw = raw_reason.to_s
144
+ reason = FINISH_REASONS.fetch(raw, :other)
145
+ reason = :tool_calls if reason == :stop && tool_calls
146
+ Riffer::Providers::FinishReason.new(reason: reason, raw: raw)
147
+ end
148
+
149
+ # Gemini reports thinking tokens outside +candidatesTokenCount+;
150
+ # TokenUsage's output includes them.
151
+ #--
152
+ #: (Hash[Symbol, untyped]) -> Riffer::Providers::TokenUsage
153
+ def build_token_usage(usage)
154
+ apply_pricing(Riffer::Providers::TokenUsage.new(
107
155
  input_tokens: usage[:promptTokenCount] || 0,
108
- output_tokens: usage[:candidatesTokenCount] || 0,
156
+ output_tokens: (usage[:candidatesTokenCount] || 0) + (usage[:thoughtsTokenCount] || 0),
109
157
  cache_read_tokens: usage[:cachedContentTokenCount]
110
- )
158
+ ))
111
159
  end
112
160
 
113
161
  #--
114
- #: (Hash[Symbol, untyped], Enumerator::Yielder) -> void
162
+ #: (Hash[Symbol, untyped], Riffer::Providers::_EventSink) -> void
115
163
  def execute_stream(params, yielder)
116
164
  model = params[:model]
117
165
  body = params.except(:model)
@@ -125,6 +173,8 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
125
173
 
126
174
  full_text = +""
127
175
  buffer = +""
176
+ raw_finish_reason = nil #: String?
177
+ saw_function_call = false
128
178
 
129
179
  process_chunk = lambda do |chunk|
130
180
  buffer << chunk
@@ -146,6 +196,7 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
146
196
  yielder << Riffer::StreamEvents::TextDelta.new(part[:text])
147
197
  elsif part[:functionCall]
148
198
  fc = part[:functionCall]
199
+ saw_function_call = true
149
200
  call_id = "gemini_call_#{SecureRandom.hex(12)}"
150
201
  arguments = encode_tool_arguments(fc[:args])
151
202
  yielder << Riffer::StreamEvents::ToolCallDone.new(
@@ -157,15 +208,11 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
157
208
  end
158
209
  end
159
210
 
211
+ raw_finish_reason = parsed.dig(:candidates, 0, :finishReason) || raw_finish_reason
212
+
160
213
  usage = parsed[:usageMetadata]
161
214
  if usage && usage[:candidatesTokenCount]
162
- yielder << Riffer::StreamEvents::TokenUsageDone.new(
163
- token_usage: Riffer::Providers::TokenUsage.new(
164
- input_tokens: usage[:promptTokenCount] || 0,
165
- output_tokens: usage[:candidatesTokenCount] || 0,
166
- cache_read_tokens: usage[:cachedContentTokenCount]
167
- )
168
- )
215
+ yielder << Riffer::StreamEvents::TokenUsageDone.new(token_usage: build_token_usage(usage))
169
216
  end
170
217
  end
171
218
  end
@@ -183,6 +230,7 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
183
230
  end
184
231
 
185
232
  yielder << Riffer::StreamEvents::TextDone.new(full_text) unless full_text.empty?
233
+ yield_finish_reason(yielder, build_finish_reason(raw_finish_reason, tool_calls: saw_function_call))
186
234
  end
187
235
 
188
236
  #--