riffer 0.32.0 → 0.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.release-please-manifest.json +1 -1
- data/.ruby-version +1 -1
- data/CHANGELOG.md +34 -0
- data/README.md +13 -11
- data/docs/01_OVERVIEW.md +2 -0
- data/docs/04_AGENT_LIFECYCLE.md +15 -13
- data/docs/08_MESSAGES.md +39 -5
- data/docs/09_STREAM_EVENTS.md +14 -0
- data/docs/10_CONFIGURATION.md +73 -4
- data/docs/13_SKILLS.md +66 -4
- data/docs/14_MCP.md +2 -1
- data/docs/16_TRACING.md +250 -0
- data/docs/17_METRICS.md +123 -0
- data/docs/providers/07_CUSTOM_PROVIDERS.md +44 -0
- data/lib/riffer/agent/response.rb +11 -2
- data/lib/riffer/agent/run.rb +136 -35
- data/lib/riffer/agent.rb +5 -5
- data/lib/riffer/config.rb +231 -15
- data/lib/riffer/guardrail.rb +8 -0
- data/lib/riffer/guardrails/runner.rb +33 -0
- data/lib/riffer/helpers/boolean.rb +22 -0
- data/lib/riffer/mcp/authenticated_tool.rb +14 -20
- data/lib/riffer/mcp/registration.rb +4 -4
- data/lib/riffer/mcp/tool.rb +23 -0
- data/lib/riffer/mcp/tool_factory.rb +14 -22
- data/lib/riffer/messages/assistant.rb +15 -3
- data/lib/riffer/messages/base.rb +2 -1
- data/lib/riffer/metrics/instruments.rb +25 -0
- data/lib/riffer/metrics/null.rb +14 -0
- data/lib/riffer/metrics/otel.rb +79 -0
- data/lib/riffer/metrics.rb +93 -0
- data/lib/riffer/providers/amazon_bedrock.rb +57 -21
- data/lib/riffer/providers/anthropic.rb +59 -24
- data/lib/riffer/providers/azure_open_ai.rb +7 -0
- data/lib/riffer/providers/base.rb +247 -15
- data/lib/riffer/providers/finish_reason.rb +27 -0
- data/lib/riffer/providers/gemini.rb +59 -11
- data/lib/riffer/providers/mock.rb +30 -9
- data/lib/riffer/providers/open_ai.rb +78 -24
- data/lib/riffer/providers/open_router.rb +56 -16
- data/lib/riffer/providers/repository.rb +9 -0
- data/lib/riffer/providers/token_usage.rb +27 -11
- data/lib/riffer/skills/activate_tool.rb +12 -2
- data/lib/riffer/skills/adapter.rb +15 -0
- data/lib/riffer/skills/context.rb +78 -11
- data/lib/riffer/skills/frontmatter.rb +13 -5
- data/lib/riffer/skills/markdown_adapter.rb +1 -1
- data/lib/riffer/skills/xml_adapter.rb +1 -1
- data/lib/riffer/stream_events/finish_reason_done.rb +34 -0
- data/lib/riffer/tools/runtime.rb +99 -3
- data/lib/riffer/tracing/capture.rb +92 -0
- data/lib/riffer/tracing/null.rb +61 -0
- data/lib/riffer/tracing/otel.rb +131 -0
- data/lib/riffer/tracing/stream_recorder.rb +51 -0
- data/lib/riffer/tracing.rb +78 -0
- data/lib/riffer/version.rb +1 -1
- data/sig/_private/opentelemetry.rbs +22 -0
- data/sig/generated/riffer/agent/response.rbs +9 -2
- data/sig/generated/riffer/agent/run.rbs +28 -8
- data/sig/generated/riffer/config.rbs +162 -16
- data/sig/generated/riffer/guardrail.rbs +6 -0
- data/sig/generated/riffer/guardrails/runner.rbs +14 -0
- data/sig/generated/riffer/helpers/boolean.rbs +11 -0
- data/sig/generated/riffer/mcp/authenticated_tool.rbs +6 -8
- data/sig/generated/riffer/mcp/registration.rbs +4 -4
- data/sig/generated/riffer/mcp/tool.rbs +19 -0
- data/sig/generated/riffer/mcp/tool_factory.rbs +8 -7
- data/sig/generated/riffer/messages/assistant.rbs +10 -4
- data/sig/generated/riffer/metrics/instruments.rbs +13 -0
- data/sig/generated/riffer/metrics/null.rbs +10 -0
- data/sig/generated/riffer/metrics/otel.rbs +47 -0
- data/sig/generated/riffer/metrics.rbs +71 -0
- data/sig/generated/riffer/providers/amazon_bedrock.rbs +35 -14
- data/sig/generated/riffer/providers/anthropic.rbs +41 -20
- data/sig/generated/riffer/providers/azure_open_ai.rbs +5 -0
- data/sig/generated/riffer/providers/base.rbs +78 -2
- data/sig/generated/riffer/providers/finish_reason.rbs +19 -0
- data/sig/generated/riffer/providers/gemini.rbs +25 -2
- data/sig/generated/riffer/providers/mock.rbs +16 -5
- data/sig/generated/riffer/providers/open_ai.rbs +44 -22
- data/sig/generated/riffer/providers/open_router.rbs +31 -12
- data/sig/generated/riffer/providers/repository.rbs +7 -0
- data/sig/generated/riffer/providers/token_usage.rbs +20 -10
- data/sig/generated/riffer/skills/activate_tool.rbs +7 -1
- data/sig/generated/riffer/skills/adapter.rbs +10 -0
- data/sig/generated/riffer/skills/context.rbs +52 -4
- data/sig/generated/riffer/skills/frontmatter.rbs +10 -3
- data/sig/generated/riffer/stream_events/finish_reason_done.rbs +21 -0
- data/sig/generated/riffer/tools/runtime.rbs +35 -0
- data/sig/generated/riffer/tracing/capture.rbs +46 -0
- data/sig/generated/riffer/tracing/null.rbs +46 -0
- data/sig/generated/riffer/tracing/otel.rbs +83 -0
- data/sig/generated/riffer/tracing/stream_recorder.rbs +31 -0
- data/sig/generated/riffer/tracing.rbs +52 -0
- data/sig/manual/riffer/helpers/boolean.rbs +5 -0
- data/sig/manual/riffer/metrics/null.rbs +5 -0
- data/sig/manual/riffer/metrics.rbs +5 -0
- data/sig/manual/riffer/providers.rbs +9 -0
- data/sig/manual/riffer/tracing/capture.rbs +5 -0
- data/sig/manual/riffer/tracing/null.rbs +5 -0
- data/sig/manual/riffer/tracing.rbs +5 -0
- metadata +40 -4
|
@@ -6,6 +6,14 @@
|
|
|
6
6
|
class Riffer::Providers::Anthropic < Riffer::Providers::Base
|
|
7
7
|
WEB_SEARCH_TOOL_TYPE = "web_search_20250305" #: String
|
|
8
8
|
|
|
9
|
+
FINISH_REASONS = {
|
|
10
|
+
"end_turn" => :stop,
|
|
11
|
+
"stop_sequence" => :stop,
|
|
12
|
+
"max_tokens" => :length,
|
|
13
|
+
"tool_use" => :tool_calls,
|
|
14
|
+
"refusal" => :content_filter
|
|
15
|
+
}.freeze #: Hash[String, Symbol]
|
|
16
|
+
|
|
9
17
|
# Returns the XML skill adapter for Anthropic/Claude.
|
|
10
18
|
#
|
|
11
19
|
#--
|
|
@@ -14,6 +22,13 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
|
|
|
14
22
|
Riffer::Skills::XmlAdapter
|
|
15
23
|
end
|
|
16
24
|
|
|
25
|
+
# The GenAI semconv well-known provider name.
|
|
26
|
+
#--
|
|
27
|
+
#: () -> String
|
|
28
|
+
def self.semconv_provider_name
|
|
29
|
+
"anthropic"
|
|
30
|
+
end
|
|
31
|
+
|
|
17
32
|
#--
|
|
18
33
|
#: (?api_key: String?, **untyped) -> void
|
|
19
34
|
def initialize(api_key: nil, **options)
|
|
@@ -81,14 +96,39 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
|
|
|
81
96
|
#: (untyped) -> Riffer::Providers::TokenUsage?
|
|
82
97
|
def extract_token_usage(response)
|
|
83
98
|
message = response #: Anthropic::Models::Message
|
|
84
|
-
|
|
99
|
+
build_token_usage(message.usage)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
#--
|
|
103
|
+
#: (untyped) -> Riffer::Providers::FinishReason?
|
|
104
|
+
def extract_finish_reason(response)
|
|
105
|
+
message = response #: Anthropic::Models::Message
|
|
106
|
+
build_finish_reason(message.stop_reason)
|
|
107
|
+
end
|
|
85
108
|
|
|
86
|
-
|
|
87
|
-
|
|
109
|
+
#--
|
|
110
|
+
#: (untyped) -> Riffer::Providers::FinishReason?
|
|
111
|
+
def build_finish_reason(stop_reason)
|
|
112
|
+
return nil unless stop_reason
|
|
113
|
+
|
|
114
|
+
raw = stop_reason.to_s
|
|
115
|
+
Riffer::Providers::FinishReason.new(reason: FINISH_REASONS.fetch(raw, :other), raw: raw)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Anthropic's +input_tokens+ excludes the cache buckets; TokenUsage's
|
|
119
|
+
# input includes them.
|
|
120
|
+
#--
|
|
121
|
+
#: (untyped) -> Riffer::Providers::TokenUsage
|
|
122
|
+
def build_token_usage(usage)
|
|
123
|
+
cache_write = usage.cache_creation_input_tokens
|
|
124
|
+
cache_read = usage.cache_read_input_tokens
|
|
125
|
+
|
|
126
|
+
apply_pricing(Riffer::Providers::TokenUsage.new(
|
|
127
|
+
input_tokens: usage.input_tokens + (cache_write || 0) + (cache_read || 0),
|
|
88
128
|
output_tokens: usage.output_tokens,
|
|
89
|
-
cache_write_tokens:
|
|
90
|
-
cache_read_tokens:
|
|
91
|
-
)
|
|
129
|
+
cache_write_tokens: cache_write,
|
|
130
|
+
cache_read_tokens: cache_read
|
|
131
|
+
))
|
|
92
132
|
end
|
|
93
133
|
|
|
94
134
|
#--
|
|
@@ -130,7 +170,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
|
|
|
130
170
|
end
|
|
131
171
|
|
|
132
172
|
#--
|
|
133
|
-
#: (Hash[Symbol, untyped],
|
|
173
|
+
#: (Hash[Symbol, untyped], Riffer::Providers::_EventSink) -> void
|
|
134
174
|
def execute_stream(params, yielder)
|
|
135
175
|
current_state = {
|
|
136
176
|
text: nil,
|
|
@@ -200,7 +240,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
|
|
|
200
240
|
end
|
|
201
241
|
|
|
202
242
|
#--
|
|
203
|
-
#: (untyped, state: Hash[Symbol, untyped], yielder:
|
|
243
|
+
#: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
|
|
204
244
|
def handle_text_event(event, state:, yielder:)
|
|
205
245
|
state[:text] ||= ""
|
|
206
246
|
state[:text] += event.text
|
|
@@ -208,7 +248,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
|
|
|
208
248
|
end
|
|
209
249
|
|
|
210
250
|
#--
|
|
211
|
-
#: (untyped, state: Hash[Symbol, untyped], yielder:
|
|
251
|
+
#: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
|
|
212
252
|
def handle_thinking_event(event, state:, yielder:)
|
|
213
253
|
state[:reasoning] ||= ""
|
|
214
254
|
state[:reasoning] += event.thinking
|
|
@@ -216,7 +256,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
|
|
|
216
256
|
end
|
|
217
257
|
|
|
218
258
|
#--
|
|
219
|
-
#: (untyped, state: Hash[Symbol, untyped], yielder:
|
|
259
|
+
#: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
|
|
220
260
|
def handle_input_json_event(event, state:, yielder:)
|
|
221
261
|
if state[:tool_call].nil?
|
|
222
262
|
state[:tool_call] = {id: nil, name: nil, arguments: ""}
|
|
@@ -230,7 +270,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
|
|
|
230
270
|
end
|
|
231
271
|
|
|
232
272
|
#--
|
|
233
|
-
#: (untyped, state: Hash[Symbol, untyped], yielder:
|
|
273
|
+
#: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
|
|
234
274
|
def handle_content_block_stop_tool_use(event, state:, yielder:)
|
|
235
275
|
content_block = event.content_block
|
|
236
276
|
arguments = content_block.input.is_a?(String) ? content_block.input : content_block.input.to_json
|
|
@@ -244,21 +284,21 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
|
|
|
244
284
|
end
|
|
245
285
|
|
|
246
286
|
#--
|
|
247
|
-
#: (untyped, state: Hash[Symbol, untyped], yielder:
|
|
287
|
+
#: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
|
|
248
288
|
def handle_content_block_stop_thinking(_event, state:, yielder:)
|
|
249
289
|
yielder << Riffer::StreamEvents::ReasoningDone.new(state[:reasoning])
|
|
250
290
|
state[:reasoning] = nil
|
|
251
291
|
end
|
|
252
292
|
|
|
253
293
|
#--
|
|
254
|
-
#: (untyped, state: Hash[Symbol, untyped], yielder:
|
|
294
|
+
#: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
|
|
255
295
|
def handle_content_block_stop_text(_event, state:, yielder:)
|
|
256
296
|
yielder << Riffer::StreamEvents::TextDone.new(state[:text])
|
|
257
297
|
state[:text] = nil
|
|
258
298
|
end
|
|
259
299
|
|
|
260
300
|
#--
|
|
261
|
-
#: (untyped, state: Hash[Symbol, untyped], yielder:
|
|
301
|
+
#: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
|
|
262
302
|
def handle_content_block_stop_server_tool_use(_event, state:, yielder:)
|
|
263
303
|
return unless state[:web_search_json]
|
|
264
304
|
|
|
@@ -270,7 +310,7 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
|
|
|
270
310
|
end
|
|
271
311
|
|
|
272
312
|
#--
|
|
273
|
-
#: (untyped, state: Hash[Symbol, untyped], yielder:
|
|
313
|
+
#: (untyped, state: Hash[Symbol, untyped], yielder: Riffer::Providers::_EventSink) -> void
|
|
274
314
|
def handle_content_block_stop_web_search_result(event, state:, yielder:)
|
|
275
315
|
content_block = event.content_block
|
|
276
316
|
sources = (content_block.content || []).filter_map do |item|
|
|
@@ -283,20 +323,15 @@ class Riffer::Providers::Anthropic < Riffer::Providers::Base
|
|
|
283
323
|
end
|
|
284
324
|
|
|
285
325
|
#--
|
|
286
|
-
#: (untyped, accumulated_message: untyped, yielder:
|
|
326
|
+
#: (untyped, accumulated_message: untyped, yielder: Riffer::Providers::_EventSink) -> void
|
|
287
327
|
def handle_message_stop(_event, accumulated_message:, yielder:)
|
|
288
328
|
message = accumulated_message #: Anthropic::Models::Message?
|
|
329
|
+
yield_finish_reason(yielder, build_finish_reason(message&.stop_reason))
|
|
330
|
+
|
|
289
331
|
usage = message&.usage
|
|
290
332
|
return unless usage
|
|
291
333
|
|
|
292
|
-
yielder << Riffer::StreamEvents::TokenUsageDone.new(
|
|
293
|
-
token_usage: Riffer::Providers::TokenUsage.new(
|
|
294
|
-
input_tokens: usage.input_tokens,
|
|
295
|
-
output_tokens: usage.output_tokens,
|
|
296
|
-
cache_write_tokens: usage.cache_creation_input_tokens,
|
|
297
|
-
cache_read_tokens: usage.cache_read_input_tokens
|
|
298
|
-
)
|
|
299
|
-
)
|
|
334
|
+
yielder << Riffer::StreamEvents::TokenUsageDone.new(token_usage: build_token_usage(usage))
|
|
300
335
|
end
|
|
301
336
|
|
|
302
337
|
#--
|
|
@@ -5,6 +5,13 @@
|
|
|
5
5
|
# gem. Credentials resolve from kwargs, then config, then
|
|
6
6
|
# +AZURE_OPENAI_API_KEY+ / +AZURE_OPENAI_ENDPOINT+.
|
|
7
7
|
class Riffer::Providers::AzureOpenAI < Riffer::Providers::OpenAI
|
|
8
|
+
# The GenAI semconv well-known provider name.
|
|
9
|
+
#--
|
|
10
|
+
#: () -> String
|
|
11
|
+
def self.semconv_provider_name
|
|
12
|
+
"azure.ai.openai"
|
|
13
|
+
end
|
|
14
|
+
|
|
8
15
|
#--
|
|
9
16
|
#: (**untyped) -> void
|
|
10
17
|
def initialize(**options)
|
|
@@ -9,6 +9,7 @@ require "json"
|
|
|
9
9
|
# class orchestrates them.
|
|
10
10
|
class Riffer::Providers::Base
|
|
11
11
|
# @rbs @current_tools: Array[singleton(Riffer::Tool)]
|
|
12
|
+
# @rbs @current_model: String?
|
|
12
13
|
|
|
13
14
|
WIRE_SEPARATOR = "__" #: String
|
|
14
15
|
|
|
@@ -20,6 +21,19 @@ class Riffer::Providers::Base
|
|
|
20
21
|
Riffer::Skills::MarkdownAdapter
|
|
21
22
|
end
|
|
22
23
|
|
|
24
|
+
# Returns the provider name stamped as <tt>gen_ai.provider.name</tt> on trace
|
|
25
|
+
# spans, ideally a GenAI semconv well-known value. Defaults to the snake_cased
|
|
26
|
+
# class name rather than raising like the abstract provider methods, so
|
|
27
|
+
# enabling tracing never breaks an otherwise-working custom provider.
|
|
28
|
+
#--
|
|
29
|
+
#: () -> String
|
|
30
|
+
def self.semconv_provider_name
|
|
31
|
+
class_name = name
|
|
32
|
+
return "unknown" unless class_name
|
|
33
|
+
|
|
34
|
+
Riffer::Helpers::ClassNameConverter.convert(class_name.split("::").last.to_s)
|
|
35
|
+
end
|
|
36
|
+
|
|
23
37
|
# Generates text using the provider.
|
|
24
38
|
#
|
|
25
39
|
#--
|
|
@@ -27,23 +41,35 @@ class Riffer::Providers::Base
|
|
|
27
41
|
def generate_text(prompt: nil, system: nil, messages: nil, model: nil, files: nil, **options)
|
|
28
42
|
validate_input!(prompt: prompt, system: system, messages: messages)
|
|
29
43
|
@current_tools = options[:tools] || [] #: Array[singleton(Riffer::Tool)]
|
|
44
|
+
@current_model = model
|
|
30
45
|
messages = normalize_messages(prompt: prompt, system: system, messages: messages, files: files)
|
|
31
46
|
validate_normalized_messages!(messages)
|
|
32
47
|
messages = merge_consecutive_messages(messages)
|
|
33
48
|
params = build_request_params(messages, model, options)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
content
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
49
|
+
|
|
50
|
+
in_chat_span(model, messages, options) do |span|
|
|
51
|
+
response = execute_generate(params)
|
|
52
|
+
|
|
53
|
+
content = extract_content(response)
|
|
54
|
+
tool_calls = extract_tool_calls(response)
|
|
55
|
+
token_usage = extract_token_usage(response)
|
|
56
|
+
finish_reason = extract_finish_reason(response)
|
|
57
|
+
structured_output = parse_structured_output(content) if options[:structured_output] && tool_calls.empty?
|
|
58
|
+
|
|
59
|
+
Riffer::Tracing.record_usage(span, token_usage)
|
|
60
|
+
record_token_usage_metric(model, token_usage)
|
|
61
|
+
record_cost_metric(model, token_usage)
|
|
62
|
+
record_finish_reason(span, finish_reason&.reason, finish_reason&.raw)
|
|
63
|
+
capture_output(span, content: content, tool_calls: tool_calls, finish_reason: finish_reason&.reason)
|
|
64
|
+
|
|
65
|
+
Riffer::Messages::Assistant.new(
|
|
66
|
+
content,
|
|
67
|
+
tool_calls: tool_calls,
|
|
68
|
+
token_usage: token_usage,
|
|
69
|
+
structured_output: structured_output,
|
|
70
|
+
finish_reason: finish_reason&.reason
|
|
71
|
+
)
|
|
72
|
+
end
|
|
47
73
|
end
|
|
48
74
|
|
|
49
75
|
# Streams text from the provider.
|
|
@@ -53,12 +79,31 @@ class Riffer::Providers::Base
|
|
|
53
79
|
def stream_text(prompt: nil, system: nil, messages: nil, model: nil, files: nil, **options)
|
|
54
80
|
validate_input!(prompt: prompt, system: system, messages: messages)
|
|
55
81
|
@current_tools = options[:tools] || [] #: Array[singleton(Riffer::Tool)]
|
|
82
|
+
@current_model = model
|
|
56
83
|
messages = normalize_messages(prompt: prompt, system: system, messages: messages, files: files)
|
|
57
84
|
validate_normalized_messages!(messages)
|
|
58
85
|
messages = merge_consecutive_messages(messages)
|
|
59
86
|
params = build_request_params(messages, model, options)
|
|
87
|
+
|
|
88
|
+
# The enumerator body runs in its own fiber, where the fiber-local OTEL
|
|
89
|
+
# context is empty — capture here so the chat span parents to the
|
|
90
|
+
# caller's trace.
|
|
91
|
+
trace_context = Riffer::Tracing.current_context
|
|
60
92
|
Enumerator.new do |yielder|
|
|
61
|
-
|
|
93
|
+
Riffer::Tracing.with_context(trace_context) do
|
|
94
|
+
in_chat_span(model, messages, options) do |span|
|
|
95
|
+
# The recorder feeds both the span and the token-usage metric, so build
|
|
96
|
+
# it whenever either is live — metrics fire even with tracing off.
|
|
97
|
+
observe = span.recording? || Riffer::Metrics.recording?
|
|
98
|
+
sink = observe ? Riffer::Tracing::StreamRecorder.new(yielder) : yielder
|
|
99
|
+
execute_stream(params, sink)
|
|
100
|
+
if sink.is_a?(Riffer::Tracing::StreamRecorder)
|
|
101
|
+
record_stream_outcome(span, sink)
|
|
102
|
+
record_token_usage_metric(model, sink.token_usage)
|
|
103
|
+
record_cost_metric(model, sink.token_usage)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
62
107
|
end
|
|
63
108
|
end
|
|
64
109
|
|
|
@@ -95,7 +140,7 @@ class Riffer::Providers::Base
|
|
|
95
140
|
end
|
|
96
141
|
|
|
97
142
|
#--
|
|
98
|
-
#: (Hash[Symbol, untyped],
|
|
143
|
+
#: (Hash[Symbol, untyped], Riffer::Providers::_EventSink) -> void
|
|
99
144
|
def execute_stream(params, yielder)
|
|
100
145
|
raise NotImplementedError, "Subclasses must implement #execute_stream"
|
|
101
146
|
end
|
|
@@ -106,6 +151,49 @@ class Riffer::Providers::Base
|
|
|
106
151
|
raise NotImplementedError, "Subclasses must implement #extract_token_usage"
|
|
107
152
|
end
|
|
108
153
|
|
|
154
|
+
#: (Riffer::Providers::TokenUsage) -> Riffer::Providers::TokenUsage
|
|
155
|
+
def apply_pricing(usage)
|
|
156
|
+
rates = pricing_rates
|
|
157
|
+
return usage unless rates
|
|
158
|
+
|
|
159
|
+
cost = rates.cost_for(
|
|
160
|
+
input_tokens: usage.input_tokens,
|
|
161
|
+
output_tokens: usage.output_tokens,
|
|
162
|
+
cache_read_tokens: usage.cache_read_tokens,
|
|
163
|
+
cache_write_tokens: usage.cache_write_tokens
|
|
164
|
+
)
|
|
165
|
+
Riffer::Providers::TokenUsage.new(
|
|
166
|
+
input_tokens: usage.input_tokens,
|
|
167
|
+
output_tokens: usage.output_tokens,
|
|
168
|
+
cache_write_tokens: usage.cache_write_tokens,
|
|
169
|
+
cache_read_tokens: usage.cache_read_tokens,
|
|
170
|
+
cost: cost
|
|
171
|
+
)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
#--
|
|
175
|
+
#: () -> Riffer::Config::Pricing::Rates?
|
|
176
|
+
def pricing_rates
|
|
177
|
+
model = @current_model
|
|
178
|
+
return nil unless model
|
|
179
|
+
|
|
180
|
+
pricing = Riffer.config.pricing
|
|
181
|
+
return nil if pricing.empty?
|
|
182
|
+
|
|
183
|
+
key = Riffer::Providers::Repository.key_for(self.class)
|
|
184
|
+
return nil unless key
|
|
185
|
+
|
|
186
|
+
pricing.rates_for("#{key}/#{model}")
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Defaults to nil rather than raising — finish reasons are optional, so
|
|
190
|
+
# providers that don't report one stay valid.
|
|
191
|
+
#--
|
|
192
|
+
#: (untyped) -> Riffer::Providers::FinishReason?
|
|
193
|
+
def extract_finish_reason(response)
|
|
194
|
+
nil
|
|
195
|
+
end
|
|
196
|
+
|
|
109
197
|
#--
|
|
110
198
|
#: (untyped) -> String
|
|
111
199
|
def extract_content(response)
|
|
@@ -118,6 +206,150 @@ class Riffer::Providers::Base
|
|
|
118
206
|
raise NotImplementedError, "Subclasses must implement #extract_tool_calls"
|
|
119
207
|
end
|
|
120
208
|
|
|
209
|
+
# A deliberate whitelist — caller options outside it stay off spans.
|
|
210
|
+
REQUEST_PARAM_ATTRIBUTES = {
|
|
211
|
+
temperature: "gen_ai.request.temperature",
|
|
212
|
+
max_tokens: "gen_ai.request.max_tokens",
|
|
213
|
+
max_output_tokens: "gen_ai.request.max_tokens",
|
|
214
|
+
top_p: "gen_ai.request.top_p",
|
|
215
|
+
top_k: "gen_ai.request.top_k",
|
|
216
|
+
frequency_penalty: "gen_ai.request.frequency_penalty",
|
|
217
|
+
presence_penalty: "gen_ai.request.presence_penalty",
|
|
218
|
+
seed: "gen_ai.request.seed",
|
|
219
|
+
stop_sequences: "gen_ai.request.stop_sequences"
|
|
220
|
+
}.freeze #: Hash[Symbol, String]
|
|
221
|
+
|
|
222
|
+
#--
|
|
223
|
+
#: [R] (String?, Array[Riffer::Messages::Base], Hash[Symbol, untyped]) { (Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span) -> R } -> R
|
|
224
|
+
def in_chat_span(model, messages, options)
|
|
225
|
+
start = Riffer::Metrics.monotonic_now
|
|
226
|
+
error_type = nil #: String?
|
|
227
|
+
begin
|
|
228
|
+
Riffer::Tracing.in_span(model ? "chat #{model}" : "chat", attributes: chat_span_attributes(model, options), kind: :client) do |span|
|
|
229
|
+
capture_input(span, messages)
|
|
230
|
+
yield span
|
|
231
|
+
rescue => error
|
|
232
|
+
# The backend records the exception and error status on the re-raise;
|
|
233
|
+
# error.type is the one semconv attribute it doesn't set.
|
|
234
|
+
span.set_attribute("error.type", error.class.name)
|
|
235
|
+
raise
|
|
236
|
+
end
|
|
237
|
+
rescue => error
|
|
238
|
+
# The inner rescue tags the span; capture error.type here too, at method
|
|
239
|
+
# scope, where the ensure can read it onto the metric.
|
|
240
|
+
error_type = error.class.name #: String?
|
|
241
|
+
raise
|
|
242
|
+
ensure
|
|
243
|
+
Riffer::Metrics::Instruments::OPERATION_DURATION.record(Riffer::Metrics.monotonic_now - start, attributes: chat_metric_attributes(model, error_type))
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
#--
|
|
248
|
+
#: (String?, Hash[Symbol, untyped]) -> Hash[String, untyped]
|
|
249
|
+
def chat_span_attributes(model, options)
|
|
250
|
+
attributes = {
|
|
251
|
+
"gen_ai.operation.name" => "chat",
|
|
252
|
+
"gen_ai.provider.name" => self.class.semconv_provider_name
|
|
253
|
+
} #: Hash[String, untyped]
|
|
254
|
+
attributes["gen_ai.request.model"] = model if model
|
|
255
|
+
|
|
256
|
+
REQUEST_PARAM_ATTRIBUTES.each do |key, attribute|
|
|
257
|
+
value = options[key]
|
|
258
|
+
attributes[attribute] = value unless value.nil?
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
attributes
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
#--
|
|
265
|
+
#: (String?) -> Hash[String, untyped]
|
|
266
|
+
def chat_metric_base_attributes(model)
|
|
267
|
+
attributes = {
|
|
268
|
+
"gen_ai.operation.name" => "chat",
|
|
269
|
+
"gen_ai.provider.name" => self.class.semconv_provider_name
|
|
270
|
+
} #: Hash[String, untyped]
|
|
271
|
+
attributes["gen_ai.request.model"] = model if model
|
|
272
|
+
attributes
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
#--
|
|
276
|
+
#: (String?, String?) -> Hash[String, untyped]
|
|
277
|
+
def chat_metric_attributes(model, error_type)
|
|
278
|
+
attributes = chat_metric_base_attributes(model)
|
|
279
|
+
attributes["error.type"] = error_type if error_type
|
|
280
|
+
attributes
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Per-call only — the run level would double-count an aggregate.
|
|
284
|
+
#--
|
|
285
|
+
#: (String?, Riffer::Providers::TokenUsage?) -> void
|
|
286
|
+
def record_token_usage_metric(model, usage)
|
|
287
|
+
return unless usage
|
|
288
|
+
|
|
289
|
+
base = chat_metric_base_attributes(model)
|
|
290
|
+
Riffer::Metrics::Instruments::TOKEN_USAGE.record(usage.input_tokens, attributes: base.merge("gen_ai.token.type" => "input"))
|
|
291
|
+
Riffer::Metrics::Instruments::TOKEN_USAGE.record(usage.output_tokens, attributes: base.merge("gen_ai.token.type" => "output"))
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Per-call only — the run level would double-count an aggregate.
|
|
295
|
+
#--
|
|
296
|
+
#: (String?, Riffer::Providers::TokenUsage?) -> void
|
|
297
|
+
def record_cost_metric(model, usage)
|
|
298
|
+
cost = usage&.cost
|
|
299
|
+
return unless cost
|
|
300
|
+
|
|
301
|
+
Riffer::Metrics::Instruments::COST.record(cost, attributes: chat_metric_base_attributes(model))
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
#--
|
|
305
|
+
#: ((Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span), Symbol?, String?) -> void
|
|
306
|
+
def record_finish_reason(span, reason, raw)
|
|
307
|
+
return unless reason
|
|
308
|
+
|
|
309
|
+
span.set_attribute("gen_ai.response.finish_reasons", [reason.to_s])
|
|
310
|
+
span.set_attribute("riffer.finish_reason.raw", raw) if raw && raw != reason.to_s
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
#--
|
|
314
|
+
#: ((Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span), Riffer::Tracing::StreamRecorder) -> void
|
|
315
|
+
def record_stream_outcome(span, recorder)
|
|
316
|
+
Riffer::Tracing.record_usage(span, recorder.token_usage)
|
|
317
|
+
record_finish_reason(span, recorder.finish_reason, recorder.raw_finish_reason)
|
|
318
|
+
capture_output(span, content: recorder.content, tool_calls: recorder.tool_calls, finish_reason: recorder.finish_reason)
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
#--
|
|
322
|
+
#: ((Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span), Array[Riffer::Messages::Base]) -> void
|
|
323
|
+
def capture_input(span, messages)
|
|
324
|
+
return unless capture_messages?(span)
|
|
325
|
+
|
|
326
|
+
span.set_attribute("gen_ai.input.messages", Riffer::Tracing::Capture.input_messages(messages))
|
|
327
|
+
system_instructions = Riffer::Tracing::Capture.system_instructions(messages)
|
|
328
|
+
span.set_attribute("gen_ai.system_instructions", system_instructions) if system_instructions
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
#--
|
|
332
|
+
#: ((Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span), content: String?, tool_calls: Array[Riffer::Messages::Assistant::ToolCall], finish_reason: Symbol?) -> void
|
|
333
|
+
def capture_output(span, content:, tool_calls:, finish_reason:)
|
|
334
|
+
return unless capture_messages?(span)
|
|
335
|
+
|
|
336
|
+
span.set_attribute("gen_ai.output.messages", Riffer::Tracing::Capture.output_messages(content: content, tool_calls: tool_calls, finish_reason: finish_reason))
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
#--
|
|
340
|
+
#: ((Riffer::Tracing::Otel::Span | Riffer::Tracing::Null::Span)) -> bool
|
|
341
|
+
def capture_messages?(span)
|
|
342
|
+
Riffer.config.tracing.capture_messages && span.recording?
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
#--
|
|
346
|
+
#: (Riffer::Providers::_EventSink, Riffer::Providers::FinishReason?) -> void
|
|
347
|
+
def yield_finish_reason(yielder, finish_reason)
|
|
348
|
+
return unless finish_reason
|
|
349
|
+
|
|
350
|
+
yielder << Riffer::StreamEvents::FinishReasonDone.new(finish_reason: finish_reason.reason, raw_finish_reason: finish_reason.raw)
|
|
351
|
+
end
|
|
352
|
+
|
|
121
353
|
#--
|
|
122
354
|
#: (String) -> Hash[Symbol, untyped]?
|
|
123
355
|
def parse_structured_output(content)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# rbs_inline: enabled
|
|
3
|
+
|
|
4
|
+
# Normalized reason an LLM call finished, paired with the provider's raw
|
|
5
|
+
# wire value. +reason+ carries the same meaning for every provider.
|
|
6
|
+
class Riffer::Providers::FinishReason
|
|
7
|
+
# The normalized vocabulary every provider maps into.
|
|
8
|
+
VALUES = %i[stop length tool_calls content_filter error other].freeze #: Array[Symbol]
|
|
9
|
+
|
|
10
|
+
# The normalized reason.
|
|
11
|
+
attr_reader :reason #: Symbol
|
|
12
|
+
|
|
13
|
+
# The provider's raw finish-reason value, when one exists on the wire.
|
|
14
|
+
attr_reader :raw #: String?
|
|
15
|
+
|
|
16
|
+
# Raises Riffer::ArgumentError when +reason+ is outside VALUES.
|
|
17
|
+
#--
|
|
18
|
+
#: (reason: Symbol, ?raw: String?) -> void
|
|
19
|
+
def initialize(reason:, raw: nil)
|
|
20
|
+
unless VALUES.include?(reason)
|
|
21
|
+
raise Riffer::ArgumentError, "reason must be one of #{VALUES.inspect}, got #{reason.inspect}"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
@reason = reason
|
|
25
|
+
@raw = raw
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -17,6 +17,25 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
|
|
|
17
17
|
DEFAULT_OPEN_TIMEOUT = 10 #: Integer
|
|
18
18
|
DEFAULT_READ_TIMEOUT = 60 #: Integer
|
|
19
19
|
|
|
20
|
+
FINISH_REASONS = {
|
|
21
|
+
"STOP" => :stop,
|
|
22
|
+
"MAX_TOKENS" => :length,
|
|
23
|
+
"SAFETY" => :content_filter,
|
|
24
|
+
"RECITATION" => :content_filter,
|
|
25
|
+
"BLOCKLIST" => :content_filter,
|
|
26
|
+
"PROHIBITED_CONTENT" => :content_filter,
|
|
27
|
+
"SPII" => :content_filter,
|
|
28
|
+
"IMAGE_SAFETY" => :content_filter,
|
|
29
|
+
"MALFORMED_FUNCTION_CALL" => :error
|
|
30
|
+
}.freeze #: Hash[String, Symbol]
|
|
31
|
+
|
|
32
|
+
# The GenAI semconv well-known provider name.
|
|
33
|
+
#--
|
|
34
|
+
#: () -> String
|
|
35
|
+
def self.semconv_provider_name
|
|
36
|
+
"gcp.gemini"
|
|
37
|
+
end
|
|
38
|
+
|
|
20
39
|
#--
|
|
21
40
|
#: (?api_key: String?, ?open_timeout: Integer?, ?read_timeout: Integer?, **untyped) -> void
|
|
22
41
|
def initialize(api_key: nil, open_timeout: nil, read_timeout: nil, **options)
|
|
@@ -103,15 +122,44 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
|
|
|
103
122
|
usage = response[:usageMetadata]
|
|
104
123
|
return nil unless usage
|
|
105
124
|
|
|
106
|
-
|
|
125
|
+
build_token_usage(usage)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
#--
|
|
129
|
+
#: (Hash[Symbol, untyped]) -> Riffer::Providers::FinishReason?
|
|
130
|
+
def extract_finish_reason(response)
|
|
131
|
+
parts = response.dig(:candidates, 0, :content, :parts)
|
|
132
|
+
has_function_call = !!parts&.any? { |part| part[:functionCall] }
|
|
133
|
+
build_finish_reason(response.dig(:candidates, 0, :finishReason), tool_calls: has_function_call)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Gemini reports STOP even when the candidate carries functionCall parts,
|
|
137
|
+
# so tool-call presence overrides the raw value.
|
|
138
|
+
#--
|
|
139
|
+
#: (String?, tool_calls: bool) -> Riffer::Providers::FinishReason?
|
|
140
|
+
def build_finish_reason(raw_reason, tool_calls:)
|
|
141
|
+
return nil unless raw_reason
|
|
142
|
+
|
|
143
|
+
raw = raw_reason.to_s
|
|
144
|
+
reason = FINISH_REASONS.fetch(raw, :other)
|
|
145
|
+
reason = :tool_calls if reason == :stop && tool_calls
|
|
146
|
+
Riffer::Providers::FinishReason.new(reason: reason, raw: raw)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Gemini reports thinking tokens outside +candidatesTokenCount+;
|
|
150
|
+
# TokenUsage's output includes them.
|
|
151
|
+
#--
|
|
152
|
+
#: (Hash[Symbol, untyped]) -> Riffer::Providers::TokenUsage
|
|
153
|
+
def build_token_usage(usage)
|
|
154
|
+
apply_pricing(Riffer::Providers::TokenUsage.new(
|
|
107
155
|
input_tokens: usage[:promptTokenCount] || 0,
|
|
108
|
-
output_tokens: usage[:candidatesTokenCount] || 0,
|
|
156
|
+
output_tokens: (usage[:candidatesTokenCount] || 0) + (usage[:thoughtsTokenCount] || 0),
|
|
109
157
|
cache_read_tokens: usage[:cachedContentTokenCount]
|
|
110
|
-
)
|
|
158
|
+
))
|
|
111
159
|
end
|
|
112
160
|
|
|
113
161
|
#--
|
|
114
|
-
#: (Hash[Symbol, untyped],
|
|
162
|
+
#: (Hash[Symbol, untyped], Riffer::Providers::_EventSink) -> void
|
|
115
163
|
def execute_stream(params, yielder)
|
|
116
164
|
model = params[:model]
|
|
117
165
|
body = params.except(:model)
|
|
@@ -125,6 +173,8 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
|
|
|
125
173
|
|
|
126
174
|
full_text = +""
|
|
127
175
|
buffer = +""
|
|
176
|
+
raw_finish_reason = nil #: String?
|
|
177
|
+
saw_function_call = false
|
|
128
178
|
|
|
129
179
|
process_chunk = lambda do |chunk|
|
|
130
180
|
buffer << chunk
|
|
@@ -146,6 +196,7 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
|
|
|
146
196
|
yielder << Riffer::StreamEvents::TextDelta.new(part[:text])
|
|
147
197
|
elsif part[:functionCall]
|
|
148
198
|
fc = part[:functionCall]
|
|
199
|
+
saw_function_call = true
|
|
149
200
|
call_id = "gemini_call_#{SecureRandom.hex(12)}"
|
|
150
201
|
arguments = encode_tool_arguments(fc[:args])
|
|
151
202
|
yielder << Riffer::StreamEvents::ToolCallDone.new(
|
|
@@ -157,15 +208,11 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
|
|
|
157
208
|
end
|
|
158
209
|
end
|
|
159
210
|
|
|
211
|
+
raw_finish_reason = parsed.dig(:candidates, 0, :finishReason) || raw_finish_reason
|
|
212
|
+
|
|
160
213
|
usage = parsed[:usageMetadata]
|
|
161
214
|
if usage && usage[:candidatesTokenCount]
|
|
162
|
-
yielder << Riffer::StreamEvents::TokenUsageDone.new(
|
|
163
|
-
token_usage: Riffer::Providers::TokenUsage.new(
|
|
164
|
-
input_tokens: usage[:promptTokenCount] || 0,
|
|
165
|
-
output_tokens: usage[:candidatesTokenCount] || 0,
|
|
166
|
-
cache_read_tokens: usage[:cachedContentTokenCount]
|
|
167
|
-
)
|
|
168
|
-
)
|
|
215
|
+
yielder << Riffer::StreamEvents::TokenUsageDone.new(token_usage: build_token_usage(usage))
|
|
169
216
|
end
|
|
170
217
|
end
|
|
171
218
|
end
|
|
@@ -183,6 +230,7 @@ class Riffer::Providers::Gemini < Riffer::Providers::Base
|
|
|
183
230
|
end
|
|
184
231
|
|
|
185
232
|
yielder << Riffer::StreamEvents::TextDone.new(full_text) unless full_text.empty?
|
|
233
|
+
yield_finish_reason(yielder, build_finish_reason(raw_finish_reason, tool_calls: saw_function_call))
|
|
186
234
|
end
|
|
187
235
|
|
|
188
236
|
#--
|