riffer 0.32.1 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.release-please-manifest.json +1 -1
  3. data/.ruby-version +1 -1
  4. data/CHANGELOG.md +27 -0
  5. data/README.md +13 -11
  6. data/docs/01_OVERVIEW.md +2 -0
  7. data/docs/04_AGENT_LIFECYCLE.md +15 -13
  8. data/docs/08_MESSAGES.md +39 -5
  9. data/docs/09_STREAM_EVENTS.md +14 -0
  10. data/docs/10_CONFIGURATION.md +73 -4
  11. data/docs/13_SKILLS.md +62 -4
  12. data/docs/14_MCP.md +2 -1
  13. data/docs/16_TRACING.md +250 -0
  14. data/docs/17_METRICS.md +123 -0
  15. data/docs/providers/07_CUSTOM_PROVIDERS.md +44 -0
  16. data/lib/riffer/agent/response.rb +11 -2
  17. data/lib/riffer/agent/run.rb +136 -35
  18. data/lib/riffer/agent.rb +1 -1
  19. data/lib/riffer/config.rb +231 -15
  20. data/lib/riffer/guardrail.rb +8 -0
  21. data/lib/riffer/guardrails/runner.rb +33 -0
  22. data/lib/riffer/helpers/boolean.rb +22 -0
  23. data/lib/riffer/mcp/authenticated_tool.rb +14 -20
  24. data/lib/riffer/mcp/registration.rb +4 -4
  25. data/lib/riffer/mcp/tool.rb +23 -0
  26. data/lib/riffer/mcp/tool_factory.rb +14 -22
  27. data/lib/riffer/messages/assistant.rb +15 -3
  28. data/lib/riffer/messages/base.rb +2 -1
  29. data/lib/riffer/metrics/instruments.rb +25 -0
  30. data/lib/riffer/metrics/null.rb +14 -0
  31. data/lib/riffer/metrics/otel.rb +79 -0
  32. data/lib/riffer/metrics.rb +93 -0
  33. data/lib/riffer/providers/amazon_bedrock.rb +57 -21
  34. data/lib/riffer/providers/anthropic.rb +59 -24
  35. data/lib/riffer/providers/azure_open_ai.rb +7 -0
  36. data/lib/riffer/providers/base.rb +247 -15
  37. data/lib/riffer/providers/finish_reason.rb +27 -0
  38. data/lib/riffer/providers/gemini.rb +59 -11
  39. data/lib/riffer/providers/mock.rb +30 -9
  40. data/lib/riffer/providers/open_ai.rb +78 -24
  41. data/lib/riffer/providers/open_router.rb +56 -16
  42. data/lib/riffer/providers/repository.rb +9 -0
  43. data/lib/riffer/providers/token_usage.rb +27 -11
  44. data/lib/riffer/skills/activate_tool.rb +11 -2
  45. data/lib/riffer/skills/adapter.rb +15 -0
  46. data/lib/riffer/skills/context.rb +63 -11
  47. data/lib/riffer/skills/markdown_adapter.rb +1 -1
  48. data/lib/riffer/skills/xml_adapter.rb +1 -1
  49. data/lib/riffer/stream_events/finish_reason_done.rb +34 -0
  50. data/lib/riffer/tools/runtime.rb +99 -3
  51. data/lib/riffer/tracing/capture.rb +92 -0
  52. data/lib/riffer/tracing/null.rb +61 -0
  53. data/lib/riffer/tracing/otel.rb +131 -0
  54. data/lib/riffer/tracing/stream_recorder.rb +51 -0
  55. data/lib/riffer/tracing.rb +78 -0
  56. data/lib/riffer/version.rb +1 -1
  57. data/sig/_private/opentelemetry.rbs +22 -0
  58. data/sig/generated/riffer/agent/response.rbs +9 -2
  59. data/sig/generated/riffer/agent/run.rbs +28 -8
  60. data/sig/generated/riffer/config.rbs +162 -16
  61. data/sig/generated/riffer/guardrail.rbs +6 -0
  62. data/sig/generated/riffer/guardrails/runner.rbs +14 -0
  63. data/sig/generated/riffer/helpers/boolean.rbs +11 -0
  64. data/sig/generated/riffer/mcp/authenticated_tool.rbs +6 -8
  65. data/sig/generated/riffer/mcp/registration.rbs +4 -4
  66. data/sig/generated/riffer/mcp/tool.rbs +19 -0
  67. data/sig/generated/riffer/mcp/tool_factory.rbs +8 -7
  68. data/sig/generated/riffer/messages/assistant.rbs +10 -4
  69. data/sig/generated/riffer/metrics/instruments.rbs +13 -0
  70. data/sig/generated/riffer/metrics/null.rbs +10 -0
  71. data/sig/generated/riffer/metrics/otel.rbs +47 -0
  72. data/sig/generated/riffer/metrics.rbs +71 -0
  73. data/sig/generated/riffer/providers/amazon_bedrock.rbs +35 -14
  74. data/sig/generated/riffer/providers/anthropic.rbs +41 -20
  75. data/sig/generated/riffer/providers/azure_open_ai.rbs +5 -0
  76. data/sig/generated/riffer/providers/base.rbs +78 -2
  77. data/sig/generated/riffer/providers/finish_reason.rbs +19 -0
  78. data/sig/generated/riffer/providers/gemini.rbs +25 -2
  79. data/sig/generated/riffer/providers/mock.rbs +16 -5
  80. data/sig/generated/riffer/providers/open_ai.rbs +44 -22
  81. data/sig/generated/riffer/providers/open_router.rbs +31 -12
  82. data/sig/generated/riffer/providers/repository.rbs +7 -0
  83. data/sig/generated/riffer/providers/token_usage.rbs +20 -10
  84. data/sig/generated/riffer/skills/activate_tool.rbs +7 -1
  85. data/sig/generated/riffer/skills/adapter.rbs +10 -0
  86. data/sig/generated/riffer/skills/context.rbs +42 -4
  87. data/sig/generated/riffer/stream_events/finish_reason_done.rbs +21 -0
  88. data/sig/generated/riffer/tools/runtime.rbs +35 -0
  89. data/sig/generated/riffer/tracing/capture.rbs +46 -0
  90. data/sig/generated/riffer/tracing/null.rbs +46 -0
  91. data/sig/generated/riffer/tracing/otel.rbs +83 -0
  92. data/sig/generated/riffer/tracing/stream_recorder.rbs +31 -0
  93. data/sig/generated/riffer/tracing.rbs +52 -0
  94. data/sig/manual/riffer/helpers/boolean.rbs +5 -0
  95. data/sig/manual/riffer/metrics/null.rbs +5 -0
  96. data/sig/manual/riffer/metrics.rbs +5 -0
  97. data/sig/manual/riffer/providers.rbs +9 -0
  98. data/sig/manual/riffer/tracing/capture.rbs +5 -0
  99. data/sig/manual/riffer/tracing/null.rbs +5 -0
  100. data/sig/manual/riffer/tracing.rbs +5 -0
  101. metadata +40 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 06d5acfa86b1320573aeb4dd2135a8540dbda454871a1e49cca8ee7b30e43336
4
- data.tar.gz: b216a00806b2f08516b197cf03c4bc23766c756174ab43fc3a5e45fb5a4f83d2
3
+ metadata.gz: e72e36f6d29b3a2387246615224e19becb1d157e4a2327f8229ee481f3704e7e
4
+ data.tar.gz: 89deb58a1ec3a5f0af904d86b38057a2d7bf301490deeadf492bbe11942d0e96
5
5
  SHA512:
6
- metadata.gz: 6408ae651fb7944a4618eced7dcc19658262560748a6fedf96cab6989cd7497acde23beb1962a4117e67e595f23e870e241d511c60329f10dd2aafa4f2cca8ff
7
- data.tar.gz: ce8962451448533b266b8411172e49311616b6b81a3c54a71c4484a249420f16ad474c5bf06e40d7d4e811ec37edf131db150e25b15e001e7970fa56f0ea711c
6
+ metadata.gz: 11fadbbfd249ce864885709684c51801d3ec2aeba83a3f750ee7afdb390750798d9598df69796bf047afa4c4b4e7136cfc7cff0edf851a0a8d945bb9f8549204
7
+ data.tar.gz: 60b6c3b2c8e7201cdfb6822709531640b501f4218e63fc2768349c9509b67ba286299fc00372a0c019b07a01390f4a7e77c5a4382e7ffe21ed7cfd250acb67b9
@@ -1,3 +1,3 @@
1
1
  {
2
- ".": "0.32.1"
2
+ ".": "0.33.0"
3
3
  }
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 4.0.3
1
+ 4.0.5
data/CHANGELOG.md CHANGED
@@ -5,6 +5,33 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.33.0](https://github.com/janeapp/riffer/compare/riffer/v0.32.1...riffer/v0.33.0) (2026-06-18)
9
+
10
+
11
+ ### ⚠ BREAKING CHANGES
12
+
13
+ * reported input_tokens grows by the cache token counts on Anthropic and Bedrock, and output_tokens grows by the thinking token count on Gemini, whenever those features are active.
14
+
15
+ ### Features
16
+
17
+ * add the Riffer::Metrics OpenTelemetry port foundation ([#325](https://github.com/janeapp/riffer/issues/325)) ([92d060c](https://github.com/janeapp/riffer/commit/92d060c9f006f9f6e904aa57e749909f360b8cd3))
18
+ * add tracing foundation with optional OTEL backend ([#307](https://github.com/janeapp/riffer/issues/307)) ([938194c](https://github.com/janeapp/riffer/commit/938194caa7c9bfcdada9039047adf2e9fd599c27))
19
+ * compute per-model cost on token usage ([#322](https://github.com/janeapp/riffer/issues/322)) ([f637f73](https://github.com/janeapp/riffer/commit/f637f73875cb5aa4563b5bae992269b5d7185ae2))
20
+ * emit a chat span per LLM call with normalized finish reasons ([#312](https://github.com/janeapp/riffer/issues/312)) ([42f39dd](https://github.com/janeapp/riffer/commit/42f39dd244295cf8d398fd7daf14c33d2a169c32))
21
+ * emit an execute_guardrail span per guardrail ([#324](https://github.com/janeapp/riffer/issues/324)) ([b280c71](https://github.com/janeapp/riffer/commit/b280c71bb8089ed7e948d8966f841535178f167f))
22
+ * emit an execute_tool span per tool call ([#318](https://github.com/janeapp/riffer/issues/318)) ([e548450](https://github.com/janeapp/riffer/commit/e548450a980e6a03d06bda488ab94704ad33a37a))
23
+ * emit gen_ai.client.operation.duration metric ([#326](https://github.com/janeapp/riffer/issues/326)) ([a9399b8](https://github.com/janeapp/riffer/commit/a9399b8f1b10bc947b1e0716a483e2921ab2a852))
24
+ * emit gen_ai.client.token.usage metric ([#327](https://github.com/janeapp/riffer/issues/327)) ([f77f0f7](https://github.com/janeapp/riffer/commit/f77f0f7cd8db7cbd3a3a36f10e4ab08e70e909a0))
25
+ * emit invoke_agent span per agent run ([#310](https://github.com/janeapp/riffer/issues/310)) ([49c8c79](https://github.com/janeapp/riffer/commit/49c8c79f3ba9c13c83ed6cd0e427e842a0459176))
26
+ * emit riffer.gen_ai.cost metric from TokenUsage cost ([#328](https://github.com/janeapp/riffer/issues/328)) ([1ee4772](https://github.com/janeapp/riffer/commit/1ee47726def4ca2e812e88157efce1bf2a8d2e69))
27
+ * support user-explicit skill activation and dedupe re-activations ([#305](https://github.com/janeapp/riffer/issues/305)) ([f95b908](https://github.com/janeapp/riffer/commit/f95b90897f60fc4b8f930297e0b22ce61a1a330f))
28
+ * surface cost on LLM-call and run spans ([#323](https://github.com/janeapp/riffer/issues/323)) ([a9074b4](https://github.com/janeapp/riffer/commit/a9074b47defb4a0426c8a51dd90b60260098a14b))
29
+
30
+
31
+ ### Code Refactoring
32
+
33
+ * normalize token usage semantics across providers ([#309](https://github.com/janeapp/riffer/issues/309)) ([990f86d](https://github.com/janeapp/riffer/commit/990f86d9ec74cfe85329a7ab583d51b72628c85f))
34
+
8
35
  ## [0.32.1](https://github.com/janeapp/riffer/compare/riffer/v0.32.0...riffer/v0.32.1) (2026-06-10)
9
36
 
10
37
 
data/README.md CHANGED
@@ -62,6 +62,8 @@ For comprehensive documentation, see the [docs](docs/) directory:
62
62
  - [Skills](docs/13_SKILLS.md) - Packaged agent capabilities
63
63
  - [MCP](docs/14_MCP.md) - Integrating third-party MCP servers
64
64
  - [Serialization](docs/15_SERIALIZATION.md) - Persisting and transferring agent definitions
65
+ - [Tracing](docs/16_TRACING.md) - OpenTelemetry span contract and host wiring
66
+ - [Metrics](docs/17_METRICS.md) - OpenTelemetry metric contract and host wiring
65
67
  - [Providers](docs/providers/01_PROVIDERS.md) - LLM provider adapters
66
68
 
67
69
  ### API Reference
@@ -85,17 +87,17 @@ bin/setup
85
87
  Common workflows are wrapped in `bin/`. Each is a thin `exec bundle exec …` script — use them
86
88
  instead of typing `bundle exec` yourself:
87
89
 
88
- | Command | Description |
89
- | --------------- | -------------------------------------------- |
90
- | `bin/rake` | Default task: test + standard + steep:check |
91
- | `bin/test` | Run tests |
92
- | `bin/lint` | Check code style (pass `--fix` to auto-fix) |
93
- | `bin/typecheck` | Run Steep type checker |
94
- | `bin/rbs` | Generate RBS type signatures |
95
- | `bin/rbs-watch` | Watch and regenerate RBS files |
96
- | `bin/docs` | Build RDoc HTML |
97
- | `bin/build` | Build the gem package |
98
- | `bin/console` | Interactive console |
90
+ | Command | Description |
91
+ | --------------- | ------------------------------------------- |
92
+ | `bin/rake` | Default task: test + standard + steep:check |
93
+ | `bin/test` | Run tests |
94
+ | `bin/lint` | Check code style (pass `--fix` to auto-fix) |
95
+ | `bin/typecheck` | Run Steep type checker |
96
+ | `bin/rbs` | Generate RBS type signatures |
97
+ | `bin/rbs-watch` | Watch and regenerate RBS files |
98
+ | `bin/docs` | Build RDoc HTML |
99
+ | `bin/build` | Build the gem package |
100
+ | `bin/console` | Interactive console |
99
101
 
100
102
  `bin/rake <task>` is the escape hatch for any rake task without a named wrapper (e.g.
101
103
  `bin/rake test:slow`, `bin/rake release`).
data/docs/01_OVERVIEW.md CHANGED
@@ -131,6 +131,8 @@ Response
131
131
  - [Agents](03_AGENTS.md) - Agent configuration and usage
132
132
  - [Tools](06_TOOLS.md) - Creating tools
133
133
  - [Configuration](10_CONFIGURATION.md) - Global configuration
134
+ - [Tracing](16_TRACING.md) - OpenTelemetry span contract and host wiring
135
+ - [Metrics](17_METRICS.md) - OpenTelemetry metric contract and host wiring
134
136
  - [Evals](11_EVALS.md) - Evaluating agent quality
135
137
  - [Guardrails](12_GUARDRAILS.md) - Input/output validation
136
138
  - [Skills](13_SKILLS.md) - Packaged agent capabilities
@@ -288,7 +288,7 @@ Mutators do **not** fire `on_message` — that callback is reserved for messages
288
288
  The mutable runtime context. A `Hash` threaded into every Proc-based DSL setting, guardrail, tool runtime, and skills resolution, and shared with every `Riffer::Agent::Run` this agent executes. Carries:
289
289
 
290
290
  - `context[:skills]` — the resolved `Riffer::Skills::Context` when skills are configured.
291
- - `context[:token_usage]` — the cumulative `Riffer::Providers::TokenUsage`, mutated by each Run as the loop progresses.
291
+ - `context[:token_usage]` — the cumulative `Riffer::Providers::TokenUsage`, mutated by each Run as the loop progresses. Per-run totals are on `response.token_usage`.
292
292
  - any caller-provided keys passed via `Agent.new(context: ...)`.
293
293
 
294
294
  ```ruby
@@ -303,18 +303,20 @@ agent.context[:skills] # the Skills::Context, if skills configured
303
303
 
304
304
  `Riffer::Agent::Response` is returned by `generate`:
305
305
 
306
- | Attribute | Type | Description |
307
- | ---------------------- | --------------------------- | ------------------------------------------------------------------------------------ |
308
- | `content` | `String` | The response text |
309
- | `structured_output` | `Hash` / `nil` | Parsed and validated structured output (see below) |
310
- | `blocked?` | `Boolean` | `true` if a guardrail tripwire fired |
311
- | `tripwire` | `Tripwire` / `nil` | The guardrail tripwire that blocked the request |
312
- | `modified?` | `Boolean` | `true` if a guardrail modified the content |
313
- | `modifications` | `Array` | List of guardrail modifications applied |
314
- | `interrupted?` | `Boolean` | `true` if the loop was interrupted |
315
- | `interrupt_reason` | `String` / `Symbol` / `nil` | The reason passed to `throw :riffer_interrupt` |
316
- | `messages` | `Array` | Full message history from the conversation |
317
- | `healed_tool_call_ids` | `Array[String]` | `tool_call` ids filled with placeholder results during interrupt healing (else `[]`) |
306
+ | Attribute | Type | Description |
307
+ | ---------------------- | --------------------------- | ------------------------------------------------------------------------------------------------ |
308
+ | `content` | `String` | The response text |
309
+ | `structured_output` | `Hash` / `nil` | Parsed and validated structured output (see below) |
310
+ | `blocked?` | `Boolean` | `true` if a guardrail tripwire fired |
311
+ | `tripwire` | `Tripwire` / `nil` | The guardrail tripwire that blocked the request |
312
+ | `modified?` | `Boolean` | `true` if a guardrail modified the content |
313
+ | `modifications` | `Array` | List of guardrail modifications applied |
314
+ | `interrupted?` | `Boolean` | `true` if the loop was interrupted |
315
+ | `interrupt_reason` | `String` / `Symbol` / `nil` | The reason passed to `throw :riffer_interrupt` |
316
+ | `messages` | `Array` | Full message history from the conversation |
317
+ | `healed_tool_call_ids` | `Array[String]` | `tool_call` ids filled with placeholder results during interrupt healing (else `[]`) |
318
+ | `token_usage` | `TokenUsage` / `nil` | Aggregate `Riffer::Providers::TokenUsage` across this run's LLM calls (`nil` when none reported) |
319
+ | `steps` | `Integer` | LLM calls made during this run (`0` when a before-guardrail blocks first); not the session's cumulative count |
318
320
 
319
321
  ### response.structured_output
320
322
 
data/docs/08_MESSAGES.md CHANGED
@@ -40,15 +40,16 @@ msg.to_h # => {role: :user, content: "Describe this image", files: [{...}]}
40
40
 
41
41
  ### Assistant
42
42
 
43
- Assistant messages represent LLM responses, potentially including tool calls and token usage data:
43
+ Assistant messages represent LLM responses, potentially including tool calls, token usage data, and the reason the model finished:
44
44
 
45
45
  ```ruby
46
46
  # Text-only response
47
47
  msg = Riffer::Messages::Assistant.new("I'm doing well, thank you!")
48
- msg.role # => :assistant
49
- msg.content # => "I'm doing well, thank you!"
50
- msg.tool_calls # => []
51
- msg.token_usage # => nil or Riffer::Providers::TokenUsage
48
+ msg.role # => :assistant
49
+ msg.content # => "I'm doing well, thank you!"
50
+ msg.tool_calls # => []
51
+ msg.token_usage # => nil or Riffer::Providers::TokenUsage
52
+ msg.finish_reason # => nil or a normalized Symbol (see below)
52
53
 
53
54
  # Response with tool calls
54
55
  msg = Riffer::Messages::Assistant.new("", tool_calls: [
@@ -65,6 +66,39 @@ if msg.token_usage
65
66
  end
66
67
  ```
67
68
 
69
+ #### Token Usage Semantics
70
+
71
+ `TokenUsage` buckets carry the same meaning for every provider, regardless of how the provider reports its raw usage:
72
+
73
+ - `input_tokens` — every token entering the context window, including cache reads and writes.
74
+ - `output_tokens` — every token the model generated, including reasoning/thinking tokens.
75
+ - `cache_read_tokens` — the subset of `input_tokens` read from the provider's prompt cache; `nil` when the provider doesn't report it.
76
+ - `cache_write_tokens` — the subset of `input_tokens` written to the provider's prompt cache; `nil` when the provider doesn't report it.
77
+
78
+ The cache buckets are subsets of `input_tokens`, never additions to it — summing `input_tokens + cache_read_tokens` double-counts. `total_tokens` (input + output) matches the totals providers report on their dashboards.
79
+
80
+ - `cost` — the computed cost of the call, set when pricing is configured for the model in use (see [Configuration → Pricing](10_CONFIGURATION.md#pricing)); `nil` when the model is unpriced. It's for observability, not billing. Run-level usage sums per-call costs through `TokenUsage#+`, so `response.token_usage.cost` is the total spend across the run — but the sum is `nil` if any call in the run used an unpriced model, rather than silently under-reporting.
81
+
82
+ #### Finish Reasons
83
+
84
+ `finish_reason` carries the same meaning for every provider — each adapter maps its raw wire value (Anthropic's `end_turn`, OpenAI's response status, Gemini's `STOP`, …) into a normalized vocabulary:
85
+
86
+ | Value | Meaning |
87
+ | ----------------- | --------------------------------------------------------------- |
88
+ | `:stop` | The model finished its turn naturally (or hit a stop sequence). |
89
+ | `:length` | Output was truncated at the max-token limit. |
90
+ | `:tool_calls` | The model stopped to call tools. |
91
+ | `:content_filter` | A provider safety system blocked or cut the response. |
92
+ | `:error` | The provider reported an error finish. |
93
+ | `:other` | A provider-specific value with no normalized equivalent. |
94
+
95
+ `finish_reason` is `nil` when the provider doesn't report one. Use it to detect truncation without parsing provider responses:
96
+
97
+ ```ruby
98
+ response = agent.generate("Summarize this document")
99
+ retry_with_higher_limit if agent.session.messages.last.finish_reason == :length
100
+ ```
101
+
68
102
  #### Structured Output on Messages
69
103
 
70
104
  When an agent has `structured_output` configured, the final assistant message stores the parsed hash directly. The `structured_output?` predicate checks for a non-nil value:
@@ -258,6 +258,20 @@ event.to_h # => {role: :assistant, token_usage: {input_
258
258
 
259
259
  Use this to track token consumption in real-time during streaming.
260
260
 
261
+ ### FinishReasonDone
262
+
263
+ Emitted once near the end of the stream with the normalized reason the model finished (no ordering guarantee relative to `TokenUsageDone`):
264
+
265
+ ```ruby
266
+ event = Riffer::StreamEvents::FinishReasonDone.new(finish_reason: :length, raw_finish_reason: "max_tokens")
267
+ event.role # => :assistant
268
+ event.finish_reason # => :length (see Messages — Finish Reasons for the vocabulary)
269
+ event.raw_finish_reason # => "max_tokens" (the provider's raw wire value, or nil)
270
+ event.to_h # => {role: :assistant, finish_reason: :length, raw_finish_reason: "max_tokens"}
271
+ ```
272
+
273
+ The agent loop stamps this value onto the accumulated assistant message's `finish_reason`.
274
+
261
275
  ## Streaming with Tools
262
276
 
263
277
  When an agent uses tools during streaming, the flow is:
@@ -65,11 +65,11 @@ Riffer.configure do |config|
65
65
  end
66
66
  ```
67
67
 
68
- | Value | Description |
69
- | ------------------------------ | ------------------------------------------------------------------------------------------------- |
68
+ | Value | Description |
69
+ | --------------------------------- | ------------------------------------------------------------------------------------------------------- |
70
70
  | `Riffer::Tools::Runtime` subclass | Instantiated automatically (e.g., `Riffer::Tools::Runtime::Inline`, `Riffer::Tools::Runtime::Threaded`) |
71
- | `Riffer::Tools::Runtime` instance | Custom runtime with specific options |
72
- | `Proc` | Dynamic resolution |
71
+ | `Riffer::Tools::Runtime` instance | Custom runtime with specific options |
72
+ | `Proc` | Dynamic resolution |
73
73
 
74
74
  Per-agent configuration overrides this global default. See [Advanced Tool Configuration — Tool Runtime](07_TOOL_ADVANCED.md#tool-runtime-experimental) for details.
75
75
 
@@ -101,6 +101,75 @@ end
101
101
 
102
102
  Accepts a `Riffer::Skills::Backend` instance or a `Proc` that receives `context` and returns a backend. Defaults to `nil` — agents that don't set their own backend get no skills, matching pre-existing behavior. Per-agent backends override this default.
103
103
 
104
+ ### Tracing
105
+
106
+ Tracing-related global configuration lives under `config.tracing`. Riffer detects the OpenTelemetry API at runtime — without it (or without a host-configured OTEL SDK) every span is a silent no-op, and riffer carries no OTEL gem dependency either way.
107
+
108
+ ```ruby
109
+ Riffer.configure do |config|
110
+ config.tracing.enabled = ENV.fetch("RIFFER_TRACING_ENABLED", "true")
111
+ end
112
+ ```
113
+
114
+ | Option | Description |
115
+ | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
116
+ | `enabled` | The kill switch, consulted on every span — flipping it at runtime takes effect immediately. Accepts booleans or `'true'`/`'false'`/`'1'`/`'0'`. Defaults to `true`. |
117
+ | `capture_messages` | Opt-in capture of full message content on LLM-call spans (`gen_ai.input.messages`, `gen_ai.output.messages`, `gen_ai.system_instructions`) as GenAI-semconv JSON. Defaults to `false` — message content routinely carries sensitive data. File attachments serialize as metadata-only stubs (media type and name, never bytes), and riffer applies no size limit of its own — cap oversized attributes with the OTEL SDK attribute length limits. |
118
+ | `tracer_provider` | Explicit OTEL tracer provider (e.g. the SDK's in-memory provider in tests). Defaults to `nil`, which resolves the global `OpenTelemetry.tracer_provider` lazily at first span. Raises `Riffer::ArgumentError` if the `opentelemetry-api` gem isn't available at a supported version (>= 1.1, < 2). |
119
+
120
+ Hosts own SDK and exporter wiring — riffer only emits spans through whatever provider the host configures. See [Tracing](16_TRACING.md) for the emitted span contract — names, attributes, hierarchy, and host wiring.
121
+
122
+ ### Metrics
123
+
124
+ Metrics-related global configuration lives under `config.metrics`, **independent** of `config.tracing` — each has its own kill switch, so you can run one signal without the other. Riffer detects the OpenTelemetry metrics API at runtime — without it (or without a host-configured OTEL metrics SDK) every measurement is a silent no-op, and riffer carries no OTEL gem dependency either way. The metrics API and SDK are separate, still-experimental (pre-1.0) gems from the traces API.
125
+
126
+ ```ruby
127
+ Riffer.configure do |config|
128
+ config.metrics.enabled = ENV.fetch("RIFFER_METRICS_ENABLED", "true")
129
+ end
130
+ ```
131
+
132
+ | Option | Description |
133
+ | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
134
+ | `enabled` | The kill switch, consulted on every measurement — flipping it at runtime takes effect immediately. Accepts booleans or `'true'`/`'false'`/`'1'`/`'0'`. Defaults to `true`. |
135
+ | `meter_provider` | Explicit OTEL meter provider (e.g. the SDK's in-memory provider in tests). Defaults to `nil`, which resolves the global `OpenTelemetry.meter_provider` lazily at first record. Raises `Riffer::ArgumentError` if the `opentelemetry-metrics-api` gem isn't available at a supported version (>= 0.2, < 1.0). |
136
+
137
+ Hosts own SDK, reader, exporter, and aggregation wiring — riffer only records instruments through whatever provider the host configures, and histogram bucket boundaries are set host-side via Views. See [Metrics](17_METRICS.md) for the instrument contract — names, units, attributes, and host wiring.
138
+
139
+ ### Pricing
140
+
141
+ Configure per-model token prices and riffer computes the cost of each LLM call onto its [`TokenUsage`](08_MESSAGES.md#token-usage-semantics). Riffer ships **no** price table — so an unconfigured model simply carries no cost (`token_usage.cost` is `nil`).
142
+
143
+ ```ruby
144
+ Riffer.configure do |config|
145
+ # Rates are per million tokens, keyed by the same "provider/model" id you give the agent.
146
+ config.pricing.set("anthropic/claude-sonnet-4-6", input: 3.0, output: 15.0, cache_read: 0.30, cache_write: 3.75)
147
+ config.pricing.set("openai/gpt-4", input: 30.0, output: 60.0)
148
+
149
+ # Pass an array to share one set of rates across a model family:
150
+ config.pricing.set(["openai/gpt-4", "openai/gpt-4-0613"], input: 30.0, output: 60.0)
151
+ end
152
+ ```
153
+
154
+ | Argument | Description |
155
+ | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
156
+ | `models` | A `provider/model` id (e.g. `"openai/gpt-4"`) — the same string you pass to `model` — or an array of ids that share one set of rates. No alias matching; raises on a malformed id. |
157
+ | `input:` | Price per **million** input tokens. Required. Applies to the uncached portion of `input_tokens`. |
158
+ | `output:` | Price per **million** output tokens. Required. |
159
+ | `cache_read:` | Price per million cache-read tokens. Optional — when omitted, cache reads bill at the `input:` rate. |
160
+ | `cache_write:` | Price per million cache-write tokens. Optional — when omitted, cache writes bill at the `input:` rate. |
161
+
162
+ Because the cache buckets are subsets of `input_tokens`, the cost formula subtracts them before applying the input rate:
163
+
164
+ ```text
165
+ cost = (input − cache_read − cache_write) × input_rate
166
+ + cache_read × cache_read_rate
167
+ + cache_write × cache_write_rate
168
+ + output × output_rate
169
+ ```
170
+
171
+ (all rates ÷ 1,000,000; an unset cache rate falls back to `input_rate`.) Cost is for observability, not billing — it's a `Float`, and sub-cent rounding can accumulate over a long run. See [Messages → Token Usage Semantics](08_MESSAGES.md#token-usage-semantics) for how cost surfaces and aggregates.
172
+
104
173
  ### Message ID Strategy
105
174
 
106
175
  Opt in to stable identifiers on every message for logging, persistence, or replay:
data/docs/13_SKILLS.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Skills
2
2
 
3
- Skills are packaged AI agent capabilities per the [Agent Skills spec](https://agentskills.io/). Each skill is a directory containing a `SKILL.md` file with YAML frontmatter and Markdown instructions. The framework discovers skills through a pluggable backend, injects a compact catalog into the system prompt (~50 tokens/skill), and provides a tool for the LLM to activate skills on demand.
3
+ Skills are packaged AI agent capabilities per the [Agent Skills spec](https://agentskills.io/). Each skill is a directory containing a `SKILL.md` file with YAML frontmatter and Markdown instructions. The framework discovers skills through a pluggable backend, injects a compact catalog into the system prompt (~50 tokens/skill), and supports both activation channels the spec describes: the LLM activates skills on demand through a tool, and your application injects skills the user asked for as conversation content (see [User-Triggered Activation](#user-triggered-activation)).
4
4
 
5
5
  ## Creating a Skill
6
6
 
@@ -38,7 +38,7 @@ Review the code for:
38
38
 
39
39
  **Optional frontmatter fields:**
40
40
 
41
- - `disable-model-invocation` — when `true`, the skill is hidden from the catalog and the LLM cannot activate it via the `skill_activate` tool. It stays reachable through the programmatic `activate` config (see [Activated Skills](#activated-skills)), so you can inject it under your own logic instead of the model's. Only the literal value `true` disables invocation; any other value (or its absence) leaves the skill model-invocable.
41
+ - `disable-model-invocation` — when `true`, the skill is hidden from the catalog and the LLM cannot activate it via the `skill_activate` tool. It stays reachable through the programmatic `activate` config (see [Activated Skills](#activated-skills)) and through `activation_prompt` (see [User-Triggered Activation](#user-triggered-activation)), so you can inject it under your own logic instead of the model's. Only the literal value `true` disables invocation; any other value (or its absence) leaves the skill model-invocable.
42
42
 
43
43
  Any other frontmatter keys are passed through as metadata.
44
44
 
@@ -85,7 +85,7 @@ end
85
85
 
86
86
  ### Activated Skills
87
87
 
88
- Load skill instructions into the system prompt at startup (no tool call needed). This is also the only way to surface a skill marked `disable-model-invocation: true`, which the model can never activate on its own:
88
+ Load skill instructions into the system prompt at startup (no tool call needed). Use this for skills that should govern the whole session for skills the user requests mid-conversation, prefer [User-Triggered Activation](#user-triggered-activation), which keeps the system prompt (and its provider-side cache) stable:
89
89
 
90
90
  ```ruby
91
91
  skills do
@@ -107,8 +107,44 @@ end
107
107
 
108
108
  1. **Discovery** — At the start of `generate`/`stream`, the backend's `list_skills` returns frontmatter for all available skills.
109
109
  2. **Catalog injection** — The adapter formats the catalog and appends it to the system prompt.
110
- 3. **Activation** — When the LLM matches a task to a skill, it calls the `skill_activate` tool with the skill name. The tool returns the full SKILL.md body.
110
+ 3. **Activation** — When the LLM matches a task to a skill, it calls the `skill_activate` tool with the skill name. The tool returns the full SKILL.md body wrapped in `<skill_content name="...">` tags.
111
111
  4. **Execution** — The LLM follows the skill's instructions to complete the task.
112
+ 5. **Deduplication** — Re-activating an already-active skill returns a short pointer ("already active") instead of the body again, so repeated activations don't fill the context with duplicate instructions. This applies whichever channel activated the skill first — tool call, `activation_prompt`, or the `activate` config.
113
+
114
+ Activation state lives in memory on the `Riffer::Skills::Context`, not in the session. When you rebuild an agent from a persisted session, the first re-activation of each skill returns the full body again (the conversation history still carries the earlier copy); deduplication resumes from there. If you prune skill content out of a session yourself, call `deactivate(name)` so the next activation returns the body instead of a pointer to content that no longer exists.
115
+
116
+ ## User-Triggered Activation
117
+
118
+ When a user explicitly invokes a skill (a slash command, a button, a mention), don't wait for the model to discover it — inject the skill body into the conversation as a user message. `activation_prompt` returns the body wrapped for injection and records the activation, so a later model-side `skill_activate` call for the same skill gets the pointer instead of a duplicate body:
119
+
120
+ ```ruby
121
+ agent = MyAgent.new
122
+ skills = agent.context.skills
123
+
124
+ # User typed: /code-review focus on security
125
+ if skills.activated?("code-review")
126
+ agent.generate("The code-review skill was invoked again — its instructions are above. focus on security")
127
+ else
128
+ agent.generate("#{skills.activation_prompt("code-review")}\n\nfocus on security")
129
+ end
130
+ ```
131
+
132
+ `activation_prompt("code-review")` returns:
133
+
134
+ ```
135
+ <skill_content name="code-review">
136
+ You are a code review assistant.
137
+ ...
138
+ </skill_content>
139
+ ```
140
+
141
+ How a repeat invocation behaves is your choice — re-inject the full body (`activation_prompt` always returns it), or send a short reference as above. The check via `activated?` covers both channels, so a skill the model already activated through the tool counts too.
142
+
143
+ For reading a skill body **without** recording an activation — a UI preview, or delegating the skill to a subagent whose context is separate — use `read`:
144
+
145
+ ```ruby
146
+ body = skills.read("code-review") # no activation recorded
147
+ ```
112
148
 
113
149
  ## Custom Backends
114
150
 
@@ -158,6 +194,28 @@ class InstrumentedActivateTool < Riffer::Skills::ActivateTool
158
194
  end
159
195
  end
160
196
 
197
+ # Change what a re-activation returns (default: a short "already active" pointer)
198
+ class CustomPointerActivateTool < Riffer::Skills::ActivateTool
199
+ private
200
+
201
+ def already_active_message(name)
202
+ "'#{name}' is loaded — scroll up for its instructions."
203
+ end
204
+ end
205
+
206
+ # Return the full body on every activation (no deduplication)
207
+ class AlwaysFullBodyActivateTool < Riffer::Skills::ActivateTool
208
+ def call(context:, name:)
209
+ skills_context = context&.skills
210
+ return error("Skills not configured") unless skills_context
211
+ return error("Unknown skill: '#{name}'") unless skills_context.model_invocable?(name)
212
+
213
+ text(skills_context.activation_prompt(name))
214
+ rescue Riffer::ArgumentError => e
215
+ error(e.message)
216
+ end
217
+ end
218
+
161
219
  # Global default
162
220
  Riffer.config.skills.default_activate_tool = InstrumentedActivateTool
163
221
 
data/docs/14_MCP.md CHANGED
@@ -109,6 +109,7 @@ end
109
109
  Only use `progressive: false` when the server has a small, stable set of tools you always want available.
110
110
 
111
111
  **`mcp_search`** — Search for available tools by name or description.
112
+
112
113
  - `query` (required, non-empty) — filter by name or description substring.
113
114
 
114
115
  On a successful search, matching tools are injected into the agent's active tool list. The model calls them natively on the next turn — no proxy or JSON-encoded arguments.
@@ -150,7 +151,7 @@ Riffer::Mcp.registrations
150
151
  # => {"github" => #<Riffer::Mcp::Registration ...>, ...}
151
152
 
152
153
  reg = Riffer::Mcp.registrations["github"]
153
- reg.tools # => [<Class:...>, ...] (Riffer::Tool subclasses)
154
+ reg.tools # => [<Class:...>, ...] (Riffer::Mcp::Tool subclasses; .mcp_server_tool_name returns the server-side name)
154
155
  ```
155
156
 
156
157
  Discovery failures raise from `register` directly, typically `Faraday::Error` for network issues or `Riffer::DependencyError` if the `mcp`/`faraday` gems are missing. Rescue `StandardError` for graceful degradation: