RubyGems - riffer - Versions diffs - 0.32.1 → 0.33.0 - Mend

riffer 0.32.1 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

checksums.yaml +4 -4
data/.release-please-manifest.json +1 -1
data/.ruby-version +1 -1
data/CHANGELOG.md +27 -0
data/README.md +13 -11
data/docs/01_OVERVIEW.md +2 -0
data/docs/04_AGENT_LIFECYCLE.md +15 -13
data/docs/08_MESSAGES.md +39 -5
data/docs/09_STREAM_EVENTS.md +14 -0
data/docs/10_CONFIGURATION.md +73 -4
data/docs/13_SKILLS.md +62 -4
data/docs/14_MCP.md +2 -1
data/docs/16_TRACING.md +250 -0
data/docs/17_METRICS.md +123 -0
data/docs/providers/07_CUSTOM_PROVIDERS.md +44 -0
data/lib/riffer/agent/response.rb +11 -2
data/lib/riffer/agent/run.rb +136 -35
data/lib/riffer/agent.rb +1 -1
data/lib/riffer/config.rb +231 -15
data/lib/riffer/guardrail.rb +8 -0
data/lib/riffer/guardrails/runner.rb +33 -0
data/lib/riffer/helpers/boolean.rb +22 -0
data/lib/riffer/mcp/authenticated_tool.rb +14 -20
data/lib/riffer/mcp/registration.rb +4 -4
data/lib/riffer/mcp/tool.rb +23 -0
data/lib/riffer/mcp/tool_factory.rb +14 -22
data/lib/riffer/messages/assistant.rb +15 -3
data/lib/riffer/messages/base.rb +2 -1
data/lib/riffer/metrics/instruments.rb +25 -0
data/lib/riffer/metrics/null.rb +14 -0
data/lib/riffer/metrics/otel.rb +79 -0
data/lib/riffer/metrics.rb +93 -0
data/lib/riffer/providers/amazon_bedrock.rb +57 -21
data/lib/riffer/providers/anthropic.rb +59 -24
data/lib/riffer/providers/azure_open_ai.rb +7 -0
data/lib/riffer/providers/base.rb +247 -15
data/lib/riffer/providers/finish_reason.rb +27 -0
data/lib/riffer/providers/gemini.rb +59 -11
data/lib/riffer/providers/mock.rb +30 -9
data/lib/riffer/providers/open_ai.rb +78 -24
data/lib/riffer/providers/open_router.rb +56 -16
data/lib/riffer/providers/repository.rb +9 -0
data/lib/riffer/providers/token_usage.rb +27 -11
data/lib/riffer/skills/activate_tool.rb +11 -2
data/lib/riffer/skills/adapter.rb +15 -0
data/lib/riffer/skills/context.rb +63 -11
data/lib/riffer/skills/markdown_adapter.rb +1 -1
data/lib/riffer/skills/xml_adapter.rb +1 -1
data/lib/riffer/stream_events/finish_reason_done.rb +34 -0
data/lib/riffer/tools/runtime.rb +99 -3
data/lib/riffer/tracing/capture.rb +92 -0
data/lib/riffer/tracing/null.rb +61 -0
data/lib/riffer/tracing/otel.rb +131 -0
data/lib/riffer/tracing/stream_recorder.rb +51 -0
data/lib/riffer/tracing.rb +78 -0
data/lib/riffer/version.rb +1 -1
data/sig/_private/opentelemetry.rbs +22 -0
data/sig/generated/riffer/agent/response.rbs +9 -2
data/sig/generated/riffer/agent/run.rbs +28 -8
data/sig/generated/riffer/config.rbs +162 -16
data/sig/generated/riffer/guardrail.rbs +6 -0
data/sig/generated/riffer/guardrails/runner.rbs +14 -0
data/sig/generated/riffer/helpers/boolean.rbs +11 -0
data/sig/generated/riffer/mcp/authenticated_tool.rbs +6 -8
data/sig/generated/riffer/mcp/registration.rbs +4 -4
data/sig/generated/riffer/mcp/tool.rbs +19 -0
data/sig/generated/riffer/mcp/tool_factory.rbs +8 -7
data/sig/generated/riffer/messages/assistant.rbs +10 -4
data/sig/generated/riffer/metrics/instruments.rbs +13 -0
data/sig/generated/riffer/metrics/null.rbs +10 -0
data/sig/generated/riffer/metrics/otel.rbs +47 -0
data/sig/generated/riffer/metrics.rbs +71 -0
data/sig/generated/riffer/providers/amazon_bedrock.rbs +35 -14
data/sig/generated/riffer/providers/anthropic.rbs +41 -20
data/sig/generated/riffer/providers/azure_open_ai.rbs +5 -0
data/sig/generated/riffer/providers/base.rbs +78 -2
data/sig/generated/riffer/providers/finish_reason.rbs +19 -0
data/sig/generated/riffer/providers/gemini.rbs +25 -2
data/sig/generated/riffer/providers/mock.rbs +16 -5
data/sig/generated/riffer/providers/open_ai.rbs +44 -22
data/sig/generated/riffer/providers/open_router.rbs +31 -12
data/sig/generated/riffer/providers/repository.rbs +7 -0
data/sig/generated/riffer/providers/token_usage.rbs +20 -10
data/sig/generated/riffer/skills/activate_tool.rbs +7 -1
data/sig/generated/riffer/skills/adapter.rbs +10 -0
data/sig/generated/riffer/skills/context.rbs +42 -4
data/sig/generated/riffer/stream_events/finish_reason_done.rbs +21 -0
data/sig/generated/riffer/tools/runtime.rbs +35 -0
data/sig/generated/riffer/tracing/capture.rbs +46 -0
data/sig/generated/riffer/tracing/null.rbs +46 -0
data/sig/generated/riffer/tracing/otel.rbs +83 -0
data/sig/generated/riffer/tracing/stream_recorder.rbs +31 -0
data/sig/generated/riffer/tracing.rbs +52 -0
data/sig/manual/riffer/helpers/boolean.rbs +5 -0
data/sig/manual/riffer/metrics/null.rbs +5 -0
data/sig/manual/riffer/metrics.rbs +5 -0
data/sig/manual/riffer/providers.rbs +9 -0
data/sig/manual/riffer/tracing/capture.rbs +5 -0
data/sig/manual/riffer/tracing/null.rbs +5 -0
data/sig/manual/riffer/tracing.rbs +5 -0
metadata +40 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 06d5acfa86b1320573aeb4dd2135a8540dbda454871a1e49cca8ee7b30e43336
-  data.tar.gz: b216a00806b2f08516b197cf03c4bc23766c756174ab43fc3a5e45fb5a4f83d2
+  metadata.gz: e72e36f6d29b3a2387246615224e19becb1d157e4a2327f8229ee481f3704e7e
+  data.tar.gz: 89deb58a1ec3a5f0af904d86b38057a2d7bf301490deeadf492bbe11942d0e96
 SHA512:
-  metadata.gz: 6408ae651fb7944a4618eced7dcc19658262560748a6fedf96cab6989cd7497acde23beb1962a4117e67e595f23e870e241d511c60329f10dd2aafa4f2cca8ff
-  data.tar.gz: ce8962451448533b266b8411172e49311616b6b81a3c54a71c4484a249420f16ad474c5bf06e40d7d4e811ec37edf131db150e25b15e001e7970fa56f0ea711c
+  metadata.gz: 11fadbbfd249ce864885709684c51801d3ec2aeba83a3f750ee7afdb390750798d9598df69796bf047afa4c4b4e7136cfc7cff0edf851a0a8d945bb9f8549204
+  data.tar.gz: 60b6c3b2c8e7201cdfb6822709531640b501f4218e63fc2768349c9509b67ba286299fc00372a0c019b07a01390f4a7e77c5a4382e7ffe21ed7cfd250acb67b9

data/.release-please-manifest.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-  ".": "0.32.1"
+  ".": "0.33.0"
 }

data/.ruby-version CHANGED Viewed

	@@ -1 +1 @@
1	- 4.0.3
1	+ 4.0.5

data/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,33 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.33.0](https://github.com/janeapp/riffer/compare/riffer/v0.32.1...riffer/v0.33.0) (2026-06-18)
+### ⚠ BREAKING CHANGES
+* reported input_tokens grows by the cache token counts on Anthropic and Bedrock, and output_tokens grows by the thinking token count on Gemini, whenever those features are active.
+### Features
+* add the Riffer::Metrics OpenTelemetry port foundation ([#325](https://github.com/janeapp/riffer/issues/325)) ([92d060c](https://github.com/janeapp/riffer/commit/92d060c9f006f9f6e904aa57e749909f360b8cd3))
+* add tracing foundation with optional OTEL backend ([#307](https://github.com/janeapp/riffer/issues/307)) ([938194c](https://github.com/janeapp/riffer/commit/938194caa7c9bfcdada9039047adf2e9fd599c27))
+* compute per-model cost on token usage ([#322](https://github.com/janeapp/riffer/issues/322)) ([f637f73](https://github.com/janeapp/riffer/commit/f637f73875cb5aa4563b5bae992269b5d7185ae2))
+* emit a chat span per LLM call with normalized finish reasons ([#312](https://github.com/janeapp/riffer/issues/312)) ([42f39dd](https://github.com/janeapp/riffer/commit/42f39dd244295cf8d398fd7daf14c33d2a169c32))
+* emit an execute_guardrail span per guardrail ([#324](https://github.com/janeapp/riffer/issues/324)) ([b280c71](https://github.com/janeapp/riffer/commit/b280c71bb8089ed7e948d8966f841535178f167f))
+* emit an execute_tool span per tool call ([#318](https://github.com/janeapp/riffer/issues/318)) ([e548450](https://github.com/janeapp/riffer/commit/e548450a980e6a03d06bda488ab94704ad33a37a))
+* emit gen_ai.client.operation.duration metric ([#326](https://github.com/janeapp/riffer/issues/326)) ([a9399b8](https://github.com/janeapp/riffer/commit/a9399b8f1b10bc947b1e0716a483e2921ab2a852))
+* emit gen_ai.client.token.usage metric ([#327](https://github.com/janeapp/riffer/issues/327)) ([f77f0f7](https://github.com/janeapp/riffer/commit/f77f0f7cd8db7cbd3a3a36f10e4ab08e70e909a0))
+* emit invoke_agent span per agent run ([#310](https://github.com/janeapp/riffer/issues/310)) ([49c8c79](https://github.com/janeapp/riffer/commit/49c8c79f3ba9c13c83ed6cd0e427e842a0459176))
+* emit riffer.gen_ai.cost metric from TokenUsage cost ([#328](https://github.com/janeapp/riffer/issues/328)) ([1ee4772](https://github.com/janeapp/riffer/commit/1ee47726def4ca2e812e88157efce1bf2a8d2e69))
+* support user-explicit skill activation and dedupe re-activations ([#305](https://github.com/janeapp/riffer/issues/305)) ([f95b908](https://github.com/janeapp/riffer/commit/f95b90897f60fc4b8f930297e0b22ce61a1a330f))
+* surface cost on LLM-call and run spans ([#323](https://github.com/janeapp/riffer/issues/323)) ([a9074b4](https://github.com/janeapp/riffer/commit/a9074b47defb4a0426c8a51dd90b60260098a14b))
+### Code Refactoring
+* normalize token usage semantics across providers ([#309](https://github.com/janeapp/riffer/issues/309)) ([990f86d](https://github.com/janeapp/riffer/commit/990f86d9ec74cfe85329a7ab583d51b72628c85f))
 ## [0.32.1](https://github.com/janeapp/riffer/compare/riffer/v0.32.0...riffer/v0.32.1) (2026-06-10)

data/README.md CHANGED Viewed

@@ -62,6 +62,8 @@ For comprehensive documentation, see the [docs](docs/) directory:
 - [Skills](docs/13_SKILLS.md) - Packaged agent capabilities
 - [MCP](docs/14_MCP.md) - Integrating third-party MCP servers
 - [Serialization](docs/15_SERIALIZATION.md) - Persisting and transferring agent definitions
+- [Tracing](docs/16_TRACING.md) - OpenTelemetry span contract and host wiring
+- [Metrics](docs/17_METRICS.md) - OpenTelemetry metric contract and host wiring
 - [Providers](docs/providers/01_PROVIDERS.md) - LLM provider adapters
 ### API Reference
@@ -85,17 +87,17 @@ bin/setup
 Common workflows are wrapped in `bin/`. Each is a thin `exec bundle exec …` script — use them
 instead of typing `bundle exec` yourself:
-| Command         | Description                                  |
-| --------------- | -------------------------------------------- |
-| `bin/rake`      | Default task: test + standard + steep:check  |
-| `bin/test`      | Run tests                                    |
-| `bin/lint`      | Check code style (pass `--fix` to auto-fix)  |
-| `bin/typecheck` | Run Steep type checker                       |
-| `bin/rbs`       | Generate RBS type signatures                 |
-| `bin/rbs-watch` | Watch and regenerate RBS files               |
-| `bin/docs`      | Build RDoc HTML                              |
-| `bin/build`     | Build the gem package                        |
-| `bin/console`   | Interactive console                          |
+| Command         | Description                                 |
+| --------------- | ------------------------------------------- |
+| `bin/rake`      | Default task: test + standard + steep:check |
+| `bin/test`      | Run tests                                   |
+| `bin/lint`      | Check code style (pass `--fix` to auto-fix) |
+| `bin/typecheck` | Run Steep type checker                      |
+| `bin/rbs`       | Generate RBS type signatures                |
+| `bin/rbs-watch` | Watch and regenerate RBS files              |
+| `bin/docs`      | Build RDoc HTML                             |
+| `bin/build`     | Build the gem package                       |
+| `bin/console`   | Interactive console                         |
 `bin/rake <task>` is the escape hatch for any rake task without a named wrapper (e.g.
 `bin/rake test:slow`, `bin/rake release`).

data/docs/01_OVERVIEW.md CHANGED Viewed

@@ -131,6 +131,8 @@ Response
 - [Agents](03_AGENTS.md) - Agent configuration and usage
 - [Tools](06_TOOLS.md) - Creating tools
 - [Configuration](10_CONFIGURATION.md) - Global configuration
+- [Tracing](16_TRACING.md) - OpenTelemetry span contract and host wiring
+- [Metrics](17_METRICS.md) - OpenTelemetry metric contract and host wiring
 - [Evals](11_EVALS.md) - Evaluating agent quality
 - [Guardrails](12_GUARDRAILS.md) - Input/output validation
 - [Skills](13_SKILLS.md) - Packaged agent capabilities

data/docs/04_AGENT_LIFECYCLE.md CHANGED Viewed

@@ -288,7 +288,7 @@ Mutators do **not** fire `on_message` — that callback is reserved for messages
 The mutable runtime context. A `Hash` threaded into every Proc-based DSL setting, guardrail, tool runtime, and skills resolution, and shared with every `Riffer::Agent::Run` this agent executes. Carries:
 - `context[:skills]` — the resolved `Riffer::Skills::Context` when skills are configured.
-- `context[:token_usage]` — the cumulative `Riffer::Providers::TokenUsage`, mutated by each Run as the loop progresses.
+- `context[:token_usage]` — the cumulative `Riffer::Providers::TokenUsage`, mutated by each Run as the loop progresses. Per-run totals are on `response.token_usage`.
 - any caller-provided keys passed via `Agent.new(context: ...)`.
 ```ruby
@@ -303,18 +303,20 @@ agent.context[:skills]        # the Skills::Context, if skills configured
 `Riffer::Agent::Response` is returned by `generate`:
-| Attribute              | Type                        | Description                                                                          |
-| ---------------------- | --------------------------- | ------------------------------------------------------------------------------------ |
-| `content`              | `String`                    | The response text                                                                    |
-| `structured_output`    | `Hash` / `nil`              | Parsed and validated structured output (see below)                                   |
-| `blocked?`             | `Boolean`                   | `true` if a guardrail tripwire fired                                                 |
-| `tripwire`             | `Tripwire` / `nil`          | The guardrail tripwire that blocked the request                                      |
-| `modified?`            | `Boolean`                   | `true` if a guardrail modified the content                                           |
-| `modifications`        | `Array`                     | List of guardrail modifications applied                                              |
-| `interrupted?`         | `Boolean`                   | `true` if the loop was interrupted                                                   |
-| `interrupt_reason`     | `String` / `Symbol` / `nil` | The reason passed to `throw :riffer_interrupt`                                       |
-| `messages`             | `Array`                     | Full message history from the conversation                                           |
-| `healed_tool_call_ids` | `Array[String]`             | `tool_call` ids filled with placeholder results during interrupt healing (else `[]`) |
+| Attribute              | Type                        | Description                                                                                      |
+| ---------------------- | --------------------------- | ------------------------------------------------------------------------------------------------ |
+| `content`              | `String`                    | The response text                                                                                |
+| `structured_output`    | `Hash` / `nil`              | Parsed and validated structured output (see below)                                               |
+| `blocked?`             | `Boolean`                   | `true` if a guardrail tripwire fired                                                             |
+| `tripwire`             | `Tripwire` / `nil`          | The guardrail tripwire that blocked the request                                                  |
+| `modified?`            | `Boolean`                   | `true` if a guardrail modified the content                                                       |
+| `modifications`        | `Array`                     | List of guardrail modifications applied                                                          |
+| `interrupted?`         | `Boolean`                   | `true` if the loop was interrupted                                                               |
+| `interrupt_reason`     | `String` / `Symbol` / `nil` | The reason passed to `throw :riffer_interrupt`                                                   |
+| `messages`             | `Array`                     | Full message history from the conversation                                                       |
+| `healed_tool_call_ids` | `Array[String]`             | `tool_call` ids filled with placeholder results during interrupt healing (else `[]`)             |
+| `token_usage`          | `TokenUsage` / `nil`        | Aggregate `Riffer::Providers::TokenUsage` across this run's LLM calls (`nil` when none reported) |
+| `steps`                | `Integer`                   | LLM calls made during this run (`0` when a before-guardrail blocks first); not the session's cumulative count |
 ### response.structured_output

data/docs/08_MESSAGES.md CHANGED Viewed

@@ -40,15 +40,16 @@ msg.to_h     # => {role: :user, content: "Describe this image", files: [{...}]}
 ### Assistant
-Assistant messages represent LLM responses, potentially including tool calls and token usage data:
+Assistant messages represent LLM responses, potentially including tool calls, token usage data, and the reason the model finished:
 ```ruby
 # Text-only response
 msg = Riffer::Messages::Assistant.new("I'm doing well, thank you!")
-msg.role         # => :assistant
-msg.content      # => "I'm doing well, thank you!"
-msg.tool_calls   # => []
-msg.token_usage  # => nil or Riffer::Providers::TokenUsage
+msg.role           # => :assistant
+msg.content        # => "I'm doing well, thank you!"
+msg.tool_calls     # => []
+msg.token_usage    # => nil or Riffer::Providers::TokenUsage
+msg.finish_reason  # => nil or a normalized Symbol (see below)
 # Response with tool calls
 msg = Riffer::Messages::Assistant.new("", tool_calls: [
@@ -65,6 +66,39 @@ if msg.token_usage
 end
 ```
+#### Token Usage Semantics
+`TokenUsage` buckets carry the same meaning for every provider, regardless of how the provider reports its raw usage:
+- `input_tokens` — every token entering the context window, including cache reads and writes.
+- `output_tokens` — every token the model generated, including reasoning/thinking tokens.
+- `cache_read_tokens` — the subset of `input_tokens` read from the provider's prompt cache; `nil` when the provider doesn't report it.
+- `cache_write_tokens` — the subset of `input_tokens` written to the provider's prompt cache; `nil` when the provider doesn't report it.
+The cache buckets are subsets of `input_tokens`, never additions to it — summing `input_tokens + cache_read_tokens` double-counts. `total_tokens` (input + output) matches the totals providers report on their dashboards.
+- `cost` — the computed cost of the call, set when pricing is configured for the model in use (see [Configuration → Pricing](10_CONFIGURATION.md#pricing)); `nil` when the model is unpriced. It's for observability, not billing. Run-level usage sums per-call costs through `TokenUsage#+`, so `response.token_usage.cost` is the total spend across the run — but the sum is `nil` if any call in the run used an unpriced model, rather than silently under-reporting.
+#### Finish Reasons
+`finish_reason` carries the same meaning for every provider — each adapter maps its raw wire value (Anthropic's `end_turn`, OpenAI's response status, Gemini's `STOP`, …) into a normalized vocabulary:
+| Value             | Meaning                                                         |
+| ----------------- | --------------------------------------------------------------- |
+| `:stop`           | The model finished its turn naturally (or hit a stop sequence). |
+| `:length`         | Output was truncated at the max-token limit.                    |
+| `:tool_calls`     | The model stopped to call tools.                                |
+| `:content_filter` | A provider safety system blocked or cut the response.           |
+| `:error`          | The provider reported an error finish.                          |
+| `:other`          | A provider-specific value with no normalized equivalent.        |
+`finish_reason` is `nil` when the provider doesn't report one. Use it to detect truncation without parsing provider responses:
+```ruby
+response = agent.generate("Summarize this document")
+retry_with_higher_limit if agent.session.messages.last.finish_reason == :length
+```
 #### Structured Output on Messages
 When an agent has `structured_output` configured, the final assistant message stores the parsed hash directly. The `structured_output?` predicate checks for a non-nil value:

data/docs/09_STREAM_EVENTS.md CHANGED Viewed

@@ -258,6 +258,20 @@ event.to_h                          # => {role: :assistant, token_usage: {input_
 Use this to track token consumption in real-time during streaming.
+### FinishReasonDone
+Emitted once near the end of the stream with the normalized reason the model finished (no ordering guarantee relative to `TokenUsageDone`):
+```ruby
+event = Riffer::StreamEvents::FinishReasonDone.new(finish_reason: :length, raw_finish_reason: "max_tokens")
+event.role               # => :assistant
+event.finish_reason      # => :length (see Messages — Finish Reasons for the vocabulary)
+event.raw_finish_reason  # => "max_tokens" (the provider's raw wire value, or nil)
+event.to_h               # => {role: :assistant, finish_reason: :length, raw_finish_reason: "max_tokens"}
+```
+The agent loop stamps this value onto the accumulated assistant message's `finish_reason`.
 ## Streaming with Tools
 When an agent uses tools during streaming, the flow is:

data/docs/10_CONFIGURATION.md CHANGED Viewed

@@ -65,11 +65,11 @@ Riffer.configure do |config|
 end
 ```
-| Value                          | Description                                                                                       |
-| ------------------------------ | ------------------------------------------------------------------------------------------------- |
+| Value                             | Description                                                                                             |
+| --------------------------------- | ------------------------------------------------------------------------------------------------------- |
 | `Riffer::Tools::Runtime` subclass | Instantiated automatically (e.g., `Riffer::Tools::Runtime::Inline`, `Riffer::Tools::Runtime::Threaded`) |
-| `Riffer::Tools::Runtime` instance | Custom runtime with specific options                                                              |
-| `Proc`                         | Dynamic resolution                                                                                |
+| `Riffer::Tools::Runtime` instance | Custom runtime with specific options                                                                    |
+| `Proc`                            | Dynamic resolution                                                                                      |
 Per-agent configuration overrides this global default. See [Advanced Tool Configuration — Tool Runtime](07_TOOL_ADVANCED.md#tool-runtime-experimental) for details.
@@ -101,6 +101,75 @@ end
 Accepts a `Riffer::Skills::Backend` instance or a `Proc` that receives `context` and returns a backend. Defaults to `nil` — agents that don't set their own backend get no skills, matching pre-existing behavior. Per-agent backends override this default.
+### Tracing
+Tracing-related global configuration lives under `config.tracing`. Riffer detects the OpenTelemetry API at runtime — without it (or without a host-configured OTEL SDK) every span is a silent no-op, and riffer carries no OTEL gem dependency either way.
+```ruby
+Riffer.configure do |config|
+  config.tracing.enabled = ENV.fetch("RIFFER_TRACING_ENABLED", "true")
+end
+```
+| Option             | Description                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `enabled`          | The kill switch, consulted on every span — flipping it at runtime takes effect immediately. Accepts booleans or `'true'`/`'false'`/`'1'`/`'0'`. Defaults to `true`.                                                                                                                                                                                                                                                                               |
+| `capture_messages` | Opt-in capture of full message content on LLM-call spans (`gen_ai.input.messages`, `gen_ai.output.messages`, `gen_ai.system_instructions`) as GenAI-semconv JSON. Defaults to `false` — message content routinely carries sensitive data. File attachments serialize as metadata-only stubs (media type and name, never bytes), and riffer applies no size limit of its own — cap oversized attributes with the OTEL SDK attribute length limits. |
+| `tracer_provider`  | Explicit OTEL tracer provider (e.g. the SDK's in-memory provider in tests). Defaults to `nil`, which resolves the global `OpenTelemetry.tracer_provider` lazily at first span. Raises `Riffer::ArgumentError` if the `opentelemetry-api` gem isn't available at a supported version (>= 1.1, < 2).                                                                                                                                                |
+Hosts own SDK and exporter wiring — riffer only emits spans through whatever provider the host configures. See [Tracing](16_TRACING.md) for the emitted span contract — names, attributes, hierarchy, and host wiring.
+### Metrics
+Metrics-related global configuration lives under `config.metrics`, **independent** of `config.tracing` — each has its own kill switch, so you can run one signal without the other. Riffer detects the OpenTelemetry metrics API at runtime — without it (or without a host-configured OTEL metrics SDK) every measurement is a silent no-op, and riffer carries no OTEL gem dependency either way. The metrics API and SDK are separate, still-experimental (pre-1.0) gems from the traces API.
+```ruby
+Riffer.configure do |config|
+  config.metrics.enabled = ENV.fetch("RIFFER_METRICS_ENABLED", "true")
+end
+```
+| Option           | Description                                                                                                                                                                                                                                                                                                  |
+| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `enabled`        | The kill switch, consulted on every measurement — flipping it at runtime takes effect immediately. Accepts booleans or `'true'`/`'false'`/`'1'`/`'0'`. Defaults to `true`.                                                                                                                                   |
+| `meter_provider` | Explicit OTEL meter provider (e.g. the SDK's in-memory provider in tests). Defaults to `nil`, which resolves the global `OpenTelemetry.meter_provider` lazily at first record. Raises `Riffer::ArgumentError` if the `opentelemetry-metrics-api` gem isn't available at a supported version (>= 0.2, < 1.0). |
+Hosts own SDK, reader, exporter, and aggregation wiring — riffer only records instruments through whatever provider the host configures, and histogram bucket boundaries are set host-side via Views. See [Metrics](17_METRICS.md) for the instrument contract — names, units, attributes, and host wiring.
+### Pricing
+Configure per-model token prices and riffer computes the cost of each LLM call onto its [`TokenUsage`](08_MESSAGES.md#token-usage-semantics). Riffer ships **no** price table — so an unconfigured model simply carries no cost (`token_usage.cost` is `nil`).
+```ruby
+Riffer.configure do |config|
+  # Rates are per million tokens, keyed by the same "provider/model" id you give the agent.
+  config.pricing.set("anthropic/claude-sonnet-4-6", input: 3.0, output: 15.0, cache_read: 0.30, cache_write: 3.75)
+  config.pricing.set("openai/gpt-4", input: 30.0, output: 60.0)
+  # Pass an array to share one set of rates across a model family:
+  config.pricing.set(["openai/gpt-4", "openai/gpt-4-0613"], input: 30.0, output: 60.0)
+end
+```
+| Argument       | Description                                                                                                                                                                        |
+| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `models`       | A `provider/model` id (e.g. `"openai/gpt-4"`) — the same string you pass to `model` — or an array of ids that share one set of rates. No alias matching; raises on a malformed id. |
+| `input:`       | Price per **million** input tokens. Required. Applies to the uncached portion of `input_tokens`.                                                                                   |
+| `output:`      | Price per **million** output tokens. Required.                                                                                                                                     |
+| `cache_read:`  | Price per million cache-read tokens. Optional — when omitted, cache reads bill at the `input:` rate.                                                                               |
+| `cache_write:` | Price per million cache-write tokens. Optional — when omitted, cache writes bill at the `input:` rate.                                                                             |
+Because the cache buckets are subsets of `input_tokens`, the cost formula subtracts them before applying the input rate:
+```text
+cost = (input − cache_read − cache_write) × input_rate
+     + cache_read  × cache_read_rate
+     + cache_write × cache_write_rate
+     + output      × output_rate
+```
+(all rates ÷ 1,000,000; an unset cache rate falls back to `input_rate`.) Cost is for observability, not billing — it's a `Float`, and sub-cent rounding can accumulate over a long run. See [Messages → Token Usage Semantics](08_MESSAGES.md#token-usage-semantics) for how cost surfaces and aggregates.
 ### Message ID Strategy
 Opt in to stable identifiers on every message for logging, persistence, or replay:

data/docs/13_SKILLS.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Skills
-Skills are packaged AI agent capabilities per the [Agent Skills spec](https://agentskills.io/). Each skill is a directory containing a `SKILL.md` file with YAML frontmatter and Markdown instructions. The framework discovers skills through a pluggable backend, injects a compact catalog into the system prompt (~50 tokens/skill), and provides a tool for the LLM to activate skills on demand.
+Skills are packaged AI agent capabilities per the [Agent Skills spec](https://agentskills.io/). Each skill is a directory containing a `SKILL.md` file with YAML frontmatter and Markdown instructions. The framework discovers skills through a pluggable backend, injects a compact catalog into the system prompt (~50 tokens/skill), and supports both activation channels the spec describes: the LLM activates skills on demand through a tool, and your application injects skills the user asked for as conversation content (see [User-Triggered Activation](#user-triggered-activation)).
 ## Creating a Skill
@@ -38,7 +38,7 @@ Review the code for:
 **Optional frontmatter fields:**
-- `disable-model-invocation` — when `true`, the skill is hidden from the catalog and the LLM cannot activate it via the `skill_activate` tool. It stays reachable through the programmatic `activate` config (see [Activated Skills](#activated-skills)), so you can inject it under your own logic instead of the model's. Only the literal value `true` disables invocation; any other value (or its absence) leaves the skill model-invocable.
+- `disable-model-invocation` — when `true`, the skill is hidden from the catalog and the LLM cannot activate it via the `skill_activate` tool. It stays reachable through the programmatic `activate` config (see [Activated Skills](#activated-skills)) and through `activation_prompt` (see [User-Triggered Activation](#user-triggered-activation)), so you can inject it under your own logic instead of the model's. Only the literal value `true` disables invocation; any other value (or its absence) leaves the skill model-invocable.
 Any other frontmatter keys are passed through as metadata.
@@ -85,7 +85,7 @@ end
 ### Activated Skills
-Load skill instructions into the system prompt at startup (no tool call needed). This is also the only way to surface a skill marked `disable-model-invocation: true`, which the model can never activate on its own:
+Load skill instructions into the system prompt at startup (no tool call needed). Use this for skills that should govern the whole session — for skills the user requests mid-conversation, prefer [User-Triggered Activation](#user-triggered-activation), which keeps the system prompt (and its provider-side cache) stable:
 ```ruby
 skills do
@@ -107,8 +107,44 @@ end
 1. **Discovery** — At the start of `generate`/`stream`, the backend's `list_skills` returns frontmatter for all available skills.
 2. **Catalog injection** — The adapter formats the catalog and appends it to the system prompt.
-3. **Activation** — When the LLM matches a task to a skill, it calls the `skill_activate` tool with the skill name. The tool returns the full SKILL.md body.
+3. **Activation** — When the LLM matches a task to a skill, it calls the `skill_activate` tool with the skill name. The tool returns the full SKILL.md body wrapped in `<skill_content name="...">` tags.
 4. **Execution** — The LLM follows the skill's instructions to complete the task.
+5. **Deduplication** — Re-activating an already-active skill returns a short pointer ("already active") instead of the body again, so repeated activations don't fill the context with duplicate instructions. This applies whichever channel activated the skill first — tool call, `activation_prompt`, or the `activate` config.
+Activation state lives in memory on the `Riffer::Skills::Context`, not in the session. When you rebuild an agent from a persisted session, the first re-activation of each skill returns the full body again (the conversation history still carries the earlier copy); deduplication resumes from there. If you prune skill content out of a session yourself, call `deactivate(name)` so the next activation returns the body instead of a pointer to content that no longer exists.
+## User-Triggered Activation
+When a user explicitly invokes a skill (a slash command, a button, a mention), don't wait for the model to discover it — inject the skill body into the conversation as a user message. `activation_prompt` returns the body wrapped for injection and records the activation, so a later model-side `skill_activate` call for the same skill gets the pointer instead of a duplicate body:
+```ruby
+agent = MyAgent.new
+skills = agent.context.skills
+# User typed: /code-review focus on security
+if skills.activated?("code-review")
+  agent.generate("The code-review skill was invoked again — its instructions are above. focus on security")
+else
+  agent.generate("#{skills.activation_prompt("code-review")}\n\nfocus on security")
+end
+```
+`activation_prompt("code-review")` returns:
+```
+<skill_content name="code-review">
+You are a code review assistant.
+...
+</skill_content>
+```
+How a repeat invocation behaves is your choice — re-inject the full body (`activation_prompt` always returns it), or send a short reference as above. The check via `activated?` covers both channels, so a skill the model already activated through the tool counts too.
+For reading a skill body **without** recording an activation — a UI preview, or delegating the skill to a subagent whose context is separate — use `read`:
+```ruby
+body = skills.read("code-review") # no activation recorded
+```
 ## Custom Backends
@@ -158,6 +194,28 @@ class InstrumentedActivateTool < Riffer::Skills::ActivateTool
   end
 end
+# Change what a re-activation returns (default: a short "already active" pointer)
+class CustomPointerActivateTool < Riffer::Skills::ActivateTool
+  private
+  def already_active_message(name)
+    "'#{name}' is loaded — scroll up for its instructions."
+  end
+end
+# Return the full body on every activation (no deduplication)
+class AlwaysFullBodyActivateTool < Riffer::Skills::ActivateTool
+  def call(context:, name:)
+    skills_context = context&.skills
+    return error("Skills not configured") unless skills_context
+    return error("Unknown skill: '#{name}'") unless skills_context.model_invocable?(name)
+    text(skills_context.activation_prompt(name))
+  rescue Riffer::ArgumentError => e
+    error(e.message)
+  end
+end
 # Global default
 Riffer.config.skills.default_activate_tool = InstrumentedActivateTool

data/docs/14_MCP.md CHANGED Viewed

@@ -109,6 +109,7 @@ end
 Only use `progressive: false` when the server has a small, stable set of tools you always want available.
 **`mcp_search`** — Search for available tools by name or description.
 - `query` (required, non-empty) — filter by name or description substring.
 On a successful search, matching tools are injected into the agent's active tool list. The model calls them natively on the next turn — no proxy or JSON-encoded arguments.
@@ -150,7 +151,7 @@ Riffer::Mcp.registrations
 # => {"github" => #<Riffer::Mcp::Registration ...>, ...}
 reg = Riffer::Mcp.registrations["github"]
-reg.tools    # => [<Class:...>, ...]  (Riffer::Tool subclasses)
+reg.tools    # => [<Class:...>, ...]  (Riffer::Mcp::Tool subclasses; .mcp_server_tool_name returns the server-side name)
 ```
 Discovery failures raise from `register` directly, typically `Faraday::Error` for network issues or `Riffer::DependencyError` if the `mcp`/`faraday` gems are missing. Rescue `StandardError` for graceful degradation: