riffer 0.32.0 → 0.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.release-please-manifest.json +1 -1
- data/.ruby-version +1 -1
- data/CHANGELOG.md +34 -0
- data/README.md +13 -11
- data/docs/01_OVERVIEW.md +2 -0
- data/docs/04_AGENT_LIFECYCLE.md +15 -13
- data/docs/08_MESSAGES.md +39 -5
- data/docs/09_STREAM_EVENTS.md +14 -0
- data/docs/10_CONFIGURATION.md +73 -4
- data/docs/13_SKILLS.md +66 -4
- data/docs/14_MCP.md +2 -1
- data/docs/16_TRACING.md +250 -0
- data/docs/17_METRICS.md +123 -0
- data/docs/providers/07_CUSTOM_PROVIDERS.md +44 -0
- data/lib/riffer/agent/response.rb +11 -2
- data/lib/riffer/agent/run.rb +136 -35
- data/lib/riffer/agent.rb +5 -5
- data/lib/riffer/config.rb +231 -15
- data/lib/riffer/guardrail.rb +8 -0
- data/lib/riffer/guardrails/runner.rb +33 -0
- data/lib/riffer/helpers/boolean.rb +22 -0
- data/lib/riffer/mcp/authenticated_tool.rb +14 -20
- data/lib/riffer/mcp/registration.rb +4 -4
- data/lib/riffer/mcp/tool.rb +23 -0
- data/lib/riffer/mcp/tool_factory.rb +14 -22
- data/lib/riffer/messages/assistant.rb +15 -3
- data/lib/riffer/messages/base.rb +2 -1
- data/lib/riffer/metrics/instruments.rb +25 -0
- data/lib/riffer/metrics/null.rb +14 -0
- data/lib/riffer/metrics/otel.rb +79 -0
- data/lib/riffer/metrics.rb +93 -0
- data/lib/riffer/providers/amazon_bedrock.rb +57 -21
- data/lib/riffer/providers/anthropic.rb +59 -24
- data/lib/riffer/providers/azure_open_ai.rb +7 -0
- data/lib/riffer/providers/base.rb +247 -15
- data/lib/riffer/providers/finish_reason.rb +27 -0
- data/lib/riffer/providers/gemini.rb +59 -11
- data/lib/riffer/providers/mock.rb +30 -9
- data/lib/riffer/providers/open_ai.rb +78 -24
- data/lib/riffer/providers/open_router.rb +56 -16
- data/lib/riffer/providers/repository.rb +9 -0
- data/lib/riffer/providers/token_usage.rb +27 -11
- data/lib/riffer/skills/activate_tool.rb +12 -2
- data/lib/riffer/skills/adapter.rb +15 -0
- data/lib/riffer/skills/context.rb +78 -11
- data/lib/riffer/skills/frontmatter.rb +13 -5
- data/lib/riffer/skills/markdown_adapter.rb +1 -1
- data/lib/riffer/skills/xml_adapter.rb +1 -1
- data/lib/riffer/stream_events/finish_reason_done.rb +34 -0
- data/lib/riffer/tools/runtime.rb +99 -3
- data/lib/riffer/tracing/capture.rb +92 -0
- data/lib/riffer/tracing/null.rb +61 -0
- data/lib/riffer/tracing/otel.rb +131 -0
- data/lib/riffer/tracing/stream_recorder.rb +51 -0
- data/lib/riffer/tracing.rb +78 -0
- data/lib/riffer/version.rb +1 -1
- data/sig/_private/opentelemetry.rbs +22 -0
- data/sig/generated/riffer/agent/response.rbs +9 -2
- data/sig/generated/riffer/agent/run.rbs +28 -8
- data/sig/generated/riffer/config.rbs +162 -16
- data/sig/generated/riffer/guardrail.rbs +6 -0
- data/sig/generated/riffer/guardrails/runner.rbs +14 -0
- data/sig/generated/riffer/helpers/boolean.rbs +11 -0
- data/sig/generated/riffer/mcp/authenticated_tool.rbs +6 -8
- data/sig/generated/riffer/mcp/registration.rbs +4 -4
- data/sig/generated/riffer/mcp/tool.rbs +19 -0
- data/sig/generated/riffer/mcp/tool_factory.rbs +8 -7
- data/sig/generated/riffer/messages/assistant.rbs +10 -4
- data/sig/generated/riffer/metrics/instruments.rbs +13 -0
- data/sig/generated/riffer/metrics/null.rbs +10 -0
- data/sig/generated/riffer/metrics/otel.rbs +47 -0
- data/sig/generated/riffer/metrics.rbs +71 -0
- data/sig/generated/riffer/providers/amazon_bedrock.rbs +35 -14
- data/sig/generated/riffer/providers/anthropic.rbs +41 -20
- data/sig/generated/riffer/providers/azure_open_ai.rbs +5 -0
- data/sig/generated/riffer/providers/base.rbs +78 -2
- data/sig/generated/riffer/providers/finish_reason.rbs +19 -0
- data/sig/generated/riffer/providers/gemini.rbs +25 -2
- data/sig/generated/riffer/providers/mock.rbs +16 -5
- data/sig/generated/riffer/providers/open_ai.rbs +44 -22
- data/sig/generated/riffer/providers/open_router.rbs +31 -12
- data/sig/generated/riffer/providers/repository.rbs +7 -0
- data/sig/generated/riffer/providers/token_usage.rbs +20 -10
- data/sig/generated/riffer/skills/activate_tool.rbs +7 -1
- data/sig/generated/riffer/skills/adapter.rbs +10 -0
- data/sig/generated/riffer/skills/context.rbs +52 -4
- data/sig/generated/riffer/skills/frontmatter.rbs +10 -3
- data/sig/generated/riffer/stream_events/finish_reason_done.rbs +21 -0
- data/sig/generated/riffer/tools/runtime.rbs +35 -0
- data/sig/generated/riffer/tracing/capture.rbs +46 -0
- data/sig/generated/riffer/tracing/null.rbs +46 -0
- data/sig/generated/riffer/tracing/otel.rbs +83 -0
- data/sig/generated/riffer/tracing/stream_recorder.rbs +31 -0
- data/sig/generated/riffer/tracing.rbs +52 -0
- data/sig/manual/riffer/helpers/boolean.rbs +5 -0
- data/sig/manual/riffer/metrics/null.rbs +5 -0
- data/sig/manual/riffer/metrics.rbs +5 -0
- data/sig/manual/riffer/providers.rbs +9 -0
- data/sig/manual/riffer/tracing/capture.rbs +5 -0
- data/sig/manual/riffer/tracing/null.rbs +5 -0
- data/sig/manual/riffer/tracing.rbs +5 -0
- metadata +40 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e72e36f6d29b3a2387246615224e19becb1d157e4a2327f8229ee481f3704e7e
|
|
4
|
+
data.tar.gz: 89deb58a1ec3a5f0af904d86b38057a2d7bf301490deeadf492bbe11942d0e96
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 11fadbbfd249ce864885709684c51801d3ec2aeba83a3f750ee7afdb390750798d9598df69796bf047afa4c4b4e7136cfc7cff0edf851a0a8d945bb9f8549204
|
|
7
|
+
data.tar.gz: 60b6c3b2c8e7201cdfb6822709531640b501f4218e63fc2768349c9509b67ba286299fc00372a0c019b07a01390f4a7e77c5a4382e7ffe21ed7cfd250acb67b9
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
4.0.
|
|
1
|
+
4.0.5
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,40 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.33.0](https://github.com/janeapp/riffer/compare/riffer/v0.32.1...riffer/v0.33.0) (2026-06-18)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### ⚠ BREAKING CHANGES
|
|
12
|
+
|
|
13
|
+
* reported input_tokens grows by the cache token counts on Anthropic and Bedrock, and output_tokens grows by the thinking token count on Gemini, whenever those features are active.
|
|
14
|
+
|
|
15
|
+
### Features
|
|
16
|
+
|
|
17
|
+
* add the Riffer::Metrics OpenTelemetry port foundation ([#325](https://github.com/janeapp/riffer/issues/325)) ([92d060c](https://github.com/janeapp/riffer/commit/92d060c9f006f9f6e904aa57e749909f360b8cd3))
|
|
18
|
+
* add tracing foundation with optional OTEL backend ([#307](https://github.com/janeapp/riffer/issues/307)) ([938194c](https://github.com/janeapp/riffer/commit/938194caa7c9bfcdada9039047adf2e9fd599c27))
|
|
19
|
+
* compute per-model cost on token usage ([#322](https://github.com/janeapp/riffer/issues/322)) ([f637f73](https://github.com/janeapp/riffer/commit/f637f73875cb5aa4563b5bae992269b5d7185ae2))
|
|
20
|
+
* emit a chat span per LLM call with normalized finish reasons ([#312](https://github.com/janeapp/riffer/issues/312)) ([42f39dd](https://github.com/janeapp/riffer/commit/42f39dd244295cf8d398fd7daf14c33d2a169c32))
|
|
21
|
+
* emit an execute_guardrail span per guardrail ([#324](https://github.com/janeapp/riffer/issues/324)) ([b280c71](https://github.com/janeapp/riffer/commit/b280c71bb8089ed7e948d8966f841535178f167f))
|
|
22
|
+
* emit an execute_tool span per tool call ([#318](https://github.com/janeapp/riffer/issues/318)) ([e548450](https://github.com/janeapp/riffer/commit/e548450a980e6a03d06bda488ab94704ad33a37a))
|
|
23
|
+
* emit gen_ai.client.operation.duration metric ([#326](https://github.com/janeapp/riffer/issues/326)) ([a9399b8](https://github.com/janeapp/riffer/commit/a9399b8f1b10bc947b1e0716a483e2921ab2a852))
|
|
24
|
+
* emit gen_ai.client.token.usage metric ([#327](https://github.com/janeapp/riffer/issues/327)) ([f77f0f7](https://github.com/janeapp/riffer/commit/f77f0f7cd8db7cbd3a3a36f10e4ab08e70e909a0))
|
|
25
|
+
* emit invoke_agent span per agent run ([#310](https://github.com/janeapp/riffer/issues/310)) ([49c8c79](https://github.com/janeapp/riffer/commit/49c8c79f3ba9c13c83ed6cd0e427e842a0459176))
|
|
26
|
+
* emit riffer.gen_ai.cost metric from TokenUsage cost ([#328](https://github.com/janeapp/riffer/issues/328)) ([1ee4772](https://github.com/janeapp/riffer/commit/1ee47726def4ca2e812e88157efce1bf2a8d2e69))
|
|
27
|
+
* support user-explicit skill activation and dedupe re-activations ([#305](https://github.com/janeapp/riffer/issues/305)) ([f95b908](https://github.com/janeapp/riffer/commit/f95b90897f60fc4b8f930297e0b22ce61a1a330f))
|
|
28
|
+
* surface cost on LLM-call and run spans ([#323](https://github.com/janeapp/riffer/issues/323)) ([a9074b4](https://github.com/janeapp/riffer/commit/a9074b47defb4a0426c8a51dd90b60260098a14b))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
### Code Refactoring
|
|
32
|
+
|
|
33
|
+
* normalize token usage semantics across providers ([#309](https://github.com/janeapp/riffer/issues/309)) ([990f86d](https://github.com/janeapp/riffer/commit/990f86d9ec74cfe85329a7ab583d51b72628c85f))
|
|
34
|
+
|
|
35
|
+
## [0.32.1](https://github.com/janeapp/riffer/compare/riffer/v0.32.0...riffer/v0.32.1) (2026-06-10)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
### Bug Fixes
|
|
39
|
+
|
|
40
|
+
* respect disable-model-invocation in skills ([#303](https://github.com/janeapp/riffer/issues/303)) ([2cf8719](https://github.com/janeapp/riffer/commit/2cf8719ecc36d748fabf7f03c8427e2b3043d30c))
|
|
41
|
+
|
|
8
42
|
## [0.32.0](https://github.com/janeapp/riffer/compare/riffer/v0.31.0...riffer/v0.32.0) (2026-06-08)
|
|
9
43
|
|
|
10
44
|
|
data/README.md
CHANGED
|
@@ -62,6 +62,8 @@ For comprehensive documentation, see the [docs](docs/) directory:
|
|
|
62
62
|
- [Skills](docs/13_SKILLS.md) - Packaged agent capabilities
|
|
63
63
|
- [MCP](docs/14_MCP.md) - Integrating third-party MCP servers
|
|
64
64
|
- [Serialization](docs/15_SERIALIZATION.md) - Persisting and transferring agent definitions
|
|
65
|
+
- [Tracing](docs/16_TRACING.md) - OpenTelemetry span contract and host wiring
|
|
66
|
+
- [Metrics](docs/17_METRICS.md) - OpenTelemetry metric contract and host wiring
|
|
65
67
|
- [Providers](docs/providers/01_PROVIDERS.md) - LLM provider adapters
|
|
66
68
|
|
|
67
69
|
### API Reference
|
|
@@ -85,17 +87,17 @@ bin/setup
|
|
|
85
87
|
Common workflows are wrapped in `bin/`. Each is a thin `exec bundle exec …` script — use them
|
|
86
88
|
instead of typing `bundle exec` yourself:
|
|
87
89
|
|
|
88
|
-
| Command | Description
|
|
89
|
-
| --------------- |
|
|
90
|
-
| `bin/rake` | Default task: test + standard + steep:check
|
|
91
|
-
| `bin/test` | Run tests
|
|
92
|
-
| `bin/lint` | Check code style (pass `--fix` to auto-fix)
|
|
93
|
-
| `bin/typecheck` | Run Steep type checker
|
|
94
|
-
| `bin/rbs` | Generate RBS type signatures
|
|
95
|
-
| `bin/rbs-watch` | Watch and regenerate RBS files
|
|
96
|
-
| `bin/docs` | Build RDoc HTML
|
|
97
|
-
| `bin/build` | Build the gem package
|
|
98
|
-
| `bin/console` | Interactive console
|
|
90
|
+
| Command | Description |
|
|
91
|
+
| --------------- | ------------------------------------------- |
|
|
92
|
+
| `bin/rake` | Default task: test + standard + steep:check |
|
|
93
|
+
| `bin/test` | Run tests |
|
|
94
|
+
| `bin/lint` | Check code style (pass `--fix` to auto-fix) |
|
|
95
|
+
| `bin/typecheck` | Run Steep type checker |
|
|
96
|
+
| `bin/rbs` | Generate RBS type signatures |
|
|
97
|
+
| `bin/rbs-watch` | Watch and regenerate RBS files |
|
|
98
|
+
| `bin/docs` | Build RDoc HTML |
|
|
99
|
+
| `bin/build` | Build the gem package |
|
|
100
|
+
| `bin/console` | Interactive console |
|
|
99
101
|
|
|
100
102
|
`bin/rake <task>` is the escape hatch for any rake task without a named wrapper (e.g.
|
|
101
103
|
`bin/rake test:slow`, `bin/rake release`).
|
data/docs/01_OVERVIEW.md
CHANGED
|
@@ -131,6 +131,8 @@ Response
|
|
|
131
131
|
- [Agents](03_AGENTS.md) - Agent configuration and usage
|
|
132
132
|
- [Tools](06_TOOLS.md) - Creating tools
|
|
133
133
|
- [Configuration](10_CONFIGURATION.md) - Global configuration
|
|
134
|
+
- [Tracing](16_TRACING.md) - OpenTelemetry span contract and host wiring
|
|
135
|
+
- [Metrics](17_METRICS.md) - OpenTelemetry metric contract and host wiring
|
|
134
136
|
- [Evals](11_EVALS.md) - Evaluating agent quality
|
|
135
137
|
- [Guardrails](12_GUARDRAILS.md) - Input/output validation
|
|
136
138
|
- [Skills](13_SKILLS.md) - Packaged agent capabilities
|
data/docs/04_AGENT_LIFECYCLE.md
CHANGED
|
@@ -288,7 +288,7 @@ Mutators do **not** fire `on_message` — that callback is reserved for messages
|
|
|
288
288
|
The mutable runtime context. A `Hash` threaded into every Proc-based DSL setting, guardrail, tool runtime, and skills resolution, and shared with every `Riffer::Agent::Run` this agent executes. Carries:
|
|
289
289
|
|
|
290
290
|
- `context[:skills]` — the resolved `Riffer::Skills::Context` when skills are configured.
|
|
291
|
-
- `context[:token_usage]` — the cumulative `Riffer::Providers::TokenUsage`, mutated by each Run as the loop progresses.
|
|
291
|
+
- `context[:token_usage]` — the cumulative `Riffer::Providers::TokenUsage`, mutated by each Run as the loop progresses. Per-run totals are on `response.token_usage`.
|
|
292
292
|
- any caller-provided keys passed via `Agent.new(context: ...)`.
|
|
293
293
|
|
|
294
294
|
```ruby
|
|
@@ -303,18 +303,20 @@ agent.context[:skills] # the Skills::Context, if skills configured
|
|
|
303
303
|
|
|
304
304
|
`Riffer::Agent::Response` is returned by `generate`:
|
|
305
305
|
|
|
306
|
-
| Attribute | Type | Description
|
|
307
|
-
| ---------------------- | --------------------------- |
|
|
308
|
-
| `content` | `String` | The response text
|
|
309
|
-
| `structured_output` | `Hash` / `nil` | Parsed and validated structured output (see below)
|
|
310
|
-
| `blocked?` | `Boolean` | `true` if a guardrail tripwire fired
|
|
311
|
-
| `tripwire` | `Tripwire` / `nil` | The guardrail tripwire that blocked the request
|
|
312
|
-
| `modified?` | `Boolean` | `true` if a guardrail modified the content
|
|
313
|
-
| `modifications` | `Array` | List of guardrail modifications applied
|
|
314
|
-
| `interrupted?` | `Boolean` | `true` if the loop was interrupted
|
|
315
|
-
| `interrupt_reason` | `String` / `Symbol` / `nil` | The reason passed to `throw :riffer_interrupt`
|
|
316
|
-
| `messages` | `Array` | Full message history from the conversation
|
|
317
|
-
| `healed_tool_call_ids` | `Array[String]` | `tool_call` ids filled with placeholder results during interrupt healing (else `[]`)
|
|
306
|
+
| Attribute | Type | Description |
|
|
307
|
+
| ---------------------- | --------------------------- | ------------------------------------------------------------------------------------------------ |
|
|
308
|
+
| `content` | `String` | The response text |
|
|
309
|
+
| `structured_output` | `Hash` / `nil` | Parsed and validated structured output (see below) |
|
|
310
|
+
| `blocked?` | `Boolean` | `true` if a guardrail tripwire fired |
|
|
311
|
+
| `tripwire` | `Tripwire` / `nil` | The guardrail tripwire that blocked the request |
|
|
312
|
+
| `modified?` | `Boolean` | `true` if a guardrail modified the content |
|
|
313
|
+
| `modifications` | `Array` | List of guardrail modifications applied |
|
|
314
|
+
| `interrupted?` | `Boolean` | `true` if the loop was interrupted |
|
|
315
|
+
| `interrupt_reason` | `String` / `Symbol` / `nil` | The reason passed to `throw :riffer_interrupt` |
|
|
316
|
+
| `messages` | `Array` | Full message history from the conversation |
|
|
317
|
+
| `healed_tool_call_ids` | `Array[String]` | `tool_call` ids filled with placeholder results during interrupt healing (else `[]`) |
|
|
318
|
+
| `token_usage` | `TokenUsage` / `nil` | Aggregate `Riffer::Providers::TokenUsage` across this run's LLM calls (`nil` when none reported) |
|
|
319
|
+
| `steps` | `Integer` | LLM calls made during this run (`0` when a before-guardrail blocks first); not the session's cumulative count |
|
|
318
320
|
|
|
319
321
|
### response.structured_output
|
|
320
322
|
|
data/docs/08_MESSAGES.md
CHANGED
|
@@ -40,15 +40,16 @@ msg.to_h # => {role: :user, content: "Describe this image", files: [{...}]}
|
|
|
40
40
|
|
|
41
41
|
### Assistant
|
|
42
42
|
|
|
43
|
-
Assistant messages represent LLM responses, potentially including tool calls
|
|
43
|
+
Assistant messages represent LLM responses, potentially including tool calls, token usage data, and the reason the model finished:
|
|
44
44
|
|
|
45
45
|
```ruby
|
|
46
46
|
# Text-only response
|
|
47
47
|
msg = Riffer::Messages::Assistant.new("I'm doing well, thank you!")
|
|
48
|
-
msg.role
|
|
49
|
-
msg.content
|
|
50
|
-
msg.tool_calls
|
|
51
|
-
msg.token_usage
|
|
48
|
+
msg.role # => :assistant
|
|
49
|
+
msg.content # => "I'm doing well, thank you!"
|
|
50
|
+
msg.tool_calls # => []
|
|
51
|
+
msg.token_usage # => nil or Riffer::Providers::TokenUsage
|
|
52
|
+
msg.finish_reason # => nil or a normalized Symbol (see below)
|
|
52
53
|
|
|
53
54
|
# Response with tool calls
|
|
54
55
|
msg = Riffer::Messages::Assistant.new("", tool_calls: [
|
|
@@ -65,6 +66,39 @@ if msg.token_usage
|
|
|
65
66
|
end
|
|
66
67
|
```
|
|
67
68
|
|
|
69
|
+
#### Token Usage Semantics
|
|
70
|
+
|
|
71
|
+
`TokenUsage` buckets carry the same meaning for every provider, regardless of how the provider reports its raw usage:
|
|
72
|
+
|
|
73
|
+
- `input_tokens` — every token entering the context window, including cache reads and writes.
|
|
74
|
+
- `output_tokens` — every token the model generated, including reasoning/thinking tokens.
|
|
75
|
+
- `cache_read_tokens` — the subset of `input_tokens` read from the provider's prompt cache; `nil` when the provider doesn't report it.
|
|
76
|
+
- `cache_write_tokens` — the subset of `input_tokens` written to the provider's prompt cache; `nil` when the provider doesn't report it.
|
|
77
|
+
|
|
78
|
+
The cache buckets are subsets of `input_tokens`, never additions to it — summing `input_tokens + cache_read_tokens` double-counts. `total_tokens` (input + output) matches the totals providers report on their dashboards.
|
|
79
|
+
|
|
80
|
+
- `cost` — the computed cost of the call, set when pricing is configured for the model in use (see [Configuration → Pricing](10_CONFIGURATION.md#pricing)); `nil` when the model is unpriced. It's for observability, not billing. Run-level usage sums per-call costs through `TokenUsage#+`, so `response.token_usage.cost` is the total spend across the run — but the sum is `nil` if any call in the run used an unpriced model, rather than silently under-reporting.
|
|
81
|
+
|
|
82
|
+
#### Finish Reasons
|
|
83
|
+
|
|
84
|
+
`finish_reason` carries the same meaning for every provider — each adapter maps its raw wire value (Anthropic's `end_turn`, OpenAI's response status, Gemini's `STOP`, …) into a normalized vocabulary:
|
|
85
|
+
|
|
86
|
+
| Value | Meaning |
|
|
87
|
+
| ----------------- | --------------------------------------------------------------- |
|
|
88
|
+
| `:stop` | The model finished its turn naturally (or hit a stop sequence). |
|
|
89
|
+
| `:length` | Output was truncated at the max-token limit. |
|
|
90
|
+
| `:tool_calls` | The model stopped to call tools. |
|
|
91
|
+
| `:content_filter` | A provider safety system blocked or cut the response. |
|
|
92
|
+
| `:error` | The provider reported an error finish. |
|
|
93
|
+
| `:other` | A provider-specific value with no normalized equivalent. |
|
|
94
|
+
|
|
95
|
+
`finish_reason` is `nil` when the provider doesn't report one. Use it to detect truncation without parsing provider responses:
|
|
96
|
+
|
|
97
|
+
```ruby
|
|
98
|
+
response = agent.generate("Summarize this document")
|
|
99
|
+
retry_with_higher_limit if agent.session.messages.last.finish_reason == :length
|
|
100
|
+
```
|
|
101
|
+
|
|
68
102
|
#### Structured Output on Messages
|
|
69
103
|
|
|
70
104
|
When an agent has `structured_output` configured, the final assistant message stores the parsed hash directly. The `structured_output?` predicate checks for a non-nil value:
|
data/docs/09_STREAM_EVENTS.md
CHANGED
|
@@ -258,6 +258,20 @@ event.to_h # => {role: :assistant, token_usage: {input_
|
|
|
258
258
|
|
|
259
259
|
Use this to track token consumption in real-time during streaming.
|
|
260
260
|
|
|
261
|
+
### FinishReasonDone
|
|
262
|
+
|
|
263
|
+
Emitted once near the end of the stream with the normalized reason the model finished (no ordering guarantee relative to `TokenUsageDone`):
|
|
264
|
+
|
|
265
|
+
```ruby
|
|
266
|
+
event = Riffer::StreamEvents::FinishReasonDone.new(finish_reason: :length, raw_finish_reason: "max_tokens")
|
|
267
|
+
event.role # => :assistant
|
|
268
|
+
event.finish_reason # => :length (see Messages — Finish Reasons for the vocabulary)
|
|
269
|
+
event.raw_finish_reason # => "max_tokens" (the provider's raw wire value, or nil)
|
|
270
|
+
event.to_h # => {role: :assistant, finish_reason: :length, raw_finish_reason: "max_tokens"}
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
The agent loop stamps this value onto the accumulated assistant message's `finish_reason`.
|
|
274
|
+
|
|
261
275
|
## Streaming with Tools
|
|
262
276
|
|
|
263
277
|
When an agent uses tools during streaming, the flow is:
|
data/docs/10_CONFIGURATION.md
CHANGED
|
@@ -65,11 +65,11 @@ Riffer.configure do |config|
|
|
|
65
65
|
end
|
|
66
66
|
```
|
|
67
67
|
|
|
68
|
-
| Value
|
|
69
|
-
|
|
|
68
|
+
| Value | Description |
|
|
69
|
+
| --------------------------------- | ------------------------------------------------------------------------------------------------------- |
|
|
70
70
|
| `Riffer::Tools::Runtime` subclass | Instantiated automatically (e.g., `Riffer::Tools::Runtime::Inline`, `Riffer::Tools::Runtime::Threaded`) |
|
|
71
|
-
| `Riffer::Tools::Runtime` instance | Custom runtime with specific options
|
|
72
|
-
| `Proc`
|
|
71
|
+
| `Riffer::Tools::Runtime` instance | Custom runtime with specific options |
|
|
72
|
+
| `Proc` | Dynamic resolution |
|
|
73
73
|
|
|
74
74
|
Per-agent configuration overrides this global default. See [Advanced Tool Configuration — Tool Runtime](07_TOOL_ADVANCED.md#tool-runtime-experimental) for details.
|
|
75
75
|
|
|
@@ -101,6 +101,75 @@ end
|
|
|
101
101
|
|
|
102
102
|
Accepts a `Riffer::Skills::Backend` instance or a `Proc` that receives `context` and returns a backend. Defaults to `nil` — agents that don't set their own backend get no skills, matching pre-existing behavior. Per-agent backends override this default.
|
|
103
103
|
|
|
104
|
+
### Tracing
|
|
105
|
+
|
|
106
|
+
Tracing-related global configuration lives under `config.tracing`. Riffer detects the OpenTelemetry API at runtime — without it (or without a host-configured OTEL SDK) every span is a silent no-op, and riffer carries no OTEL gem dependency either way.
|
|
107
|
+
|
|
108
|
+
```ruby
|
|
109
|
+
Riffer.configure do |config|
|
|
110
|
+
config.tracing.enabled = ENV.fetch("RIFFER_TRACING_ENABLED", "true")
|
|
111
|
+
end
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
| Option | Description |
|
|
115
|
+
| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
116
|
+
| `enabled` | The kill switch, consulted on every span — flipping it at runtime takes effect immediately. Accepts booleans or `'true'`/`'false'`/`'1'`/`'0'`. Defaults to `true`. |
|
|
117
|
+
| `capture_messages` | Opt-in capture of full message content on LLM-call spans (`gen_ai.input.messages`, `gen_ai.output.messages`, `gen_ai.system_instructions`) as GenAI-semconv JSON. Defaults to `false` — message content routinely carries sensitive data. File attachments serialize as metadata-only stubs (media type and name, never bytes), and riffer applies no size limit of its own — cap oversized attributes with the OTEL SDK attribute length limits. |
|
|
118
|
+
| `tracer_provider` | Explicit OTEL tracer provider (e.g. the SDK's in-memory provider in tests). Defaults to `nil`, which resolves the global `OpenTelemetry.tracer_provider` lazily at first span. Raises `Riffer::ArgumentError` if the `opentelemetry-api` gem isn't available at a supported version (>= 1.1, < 2). |
|
|
119
|
+
|
|
120
|
+
Hosts own SDK and exporter wiring — riffer only emits spans through whatever provider the host configures. See [Tracing](16_TRACING.md) for the emitted span contract — names, attributes, hierarchy, and host wiring.
|
|
121
|
+
|
|
122
|
+
### Metrics
|
|
123
|
+
|
|
124
|
+
Metrics-related global configuration lives under `config.metrics`, **independent** of `config.tracing` — each has its own kill switch, so you can run one signal without the other. Riffer detects the OpenTelemetry metrics API at runtime — without it (or without a host-configured OTEL metrics SDK) every measurement is a silent no-op, and riffer carries no OTEL gem dependency either way. The metrics API and SDK are separate, still-experimental (pre-1.0) gems from the traces API.
|
|
125
|
+
|
|
126
|
+
```ruby
|
|
127
|
+
Riffer.configure do |config|
|
|
128
|
+
config.metrics.enabled = ENV.fetch("RIFFER_METRICS_ENABLED", "true")
|
|
129
|
+
end
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
| Option | Description |
|
|
133
|
+
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
|
134
|
+
| `enabled` | The kill switch, consulted on every measurement — flipping it at runtime takes effect immediately. Accepts booleans or `'true'`/`'false'`/`'1'`/`'0'`. Defaults to `true`. |
|
|
135
|
+
| `meter_provider` | Explicit OTEL meter provider (e.g. the SDK's in-memory provider in tests). Defaults to `nil`, which resolves the global `OpenTelemetry.meter_provider` lazily at first record. Raises `Riffer::ArgumentError` if the `opentelemetry-metrics-api` gem isn't available at a supported version (>= 0.2, < 1.0). |
|
|
136
|
+
|
|
137
|
+
Hosts own SDK, reader, exporter, and aggregation wiring — riffer only records instruments through whatever provider the host configures, and histogram bucket boundaries are set host-side via Views. See [Metrics](17_METRICS.md) for the instrument contract — names, units, attributes, and host wiring.
|
|
138
|
+
|
|
139
|
+
### Pricing
|
|
140
|
+
|
|
141
|
+
Configure per-model token prices and riffer computes the cost of each LLM call onto its [`TokenUsage`](08_MESSAGES.md#token-usage-semantics). Riffer ships **no** price table — so an unconfigured model simply carries no cost (`token_usage.cost` is `nil`).
|
|
142
|
+
|
|
143
|
+
```ruby
|
|
144
|
+
Riffer.configure do |config|
|
|
145
|
+
# Rates are per million tokens, keyed by the same "provider/model" id you give the agent.
|
|
146
|
+
config.pricing.set("anthropic/claude-sonnet-4-6", input: 3.0, output: 15.0, cache_read: 0.30, cache_write: 3.75)
|
|
147
|
+
config.pricing.set("openai/gpt-4", input: 30.0, output: 60.0)
|
|
148
|
+
|
|
149
|
+
# Pass an array to share one set of rates across a model family:
|
|
150
|
+
config.pricing.set(["openai/gpt-4", "openai/gpt-4-0613"], input: 30.0, output: 60.0)
|
|
151
|
+
end
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
| Argument | Description |
|
|
155
|
+
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
156
|
+
| `models` | A `provider/model` id (e.g. `"openai/gpt-4"`) — the same string you pass to `model` — or an array of ids that share one set of rates. No alias matching; raises on a malformed id. |
|
|
157
|
+
| `input:` | Price per **million** input tokens. Required. Applies to the uncached portion of `input_tokens`. |
|
|
158
|
+
| `output:` | Price per **million** output tokens. Required. |
|
|
159
|
+
| `cache_read:` | Price per million cache-read tokens. Optional — when omitted, cache reads bill at the `input:` rate. |
|
|
160
|
+
| `cache_write:` | Price per million cache-write tokens. Optional — when omitted, cache writes bill at the `input:` rate. |
|
|
161
|
+
|
|
162
|
+
Because the cache buckets are subsets of `input_tokens`, the cost formula subtracts them before applying the input rate:
|
|
163
|
+
|
|
164
|
+
```text
|
|
165
|
+
cost = (input − cache_read − cache_write) × input_rate
|
|
166
|
+
+ cache_read × cache_read_rate
|
|
167
|
+
+ cache_write × cache_write_rate
|
|
168
|
+
+ output × output_rate
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
(all rates ÷ 1,000,000; an unset cache rate falls back to `input_rate`.) Cost is for observability, not billing — it's a `Float`, and sub-cent rounding can accumulate over a long run. See [Messages → Token Usage Semantics](08_MESSAGES.md#token-usage-semantics) for how cost surfaces and aggregates.
|
|
172
|
+
|
|
104
173
|
### Message ID Strategy
|
|
105
174
|
|
|
106
175
|
Opt in to stable identifiers on every message for logging, persistence, or replay:
|
data/docs/13_SKILLS.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Skills
|
|
2
2
|
|
|
3
|
-
Skills are packaged AI agent capabilities per the [Agent Skills spec](https://agentskills.io/). Each skill is a directory containing a `SKILL.md` file with YAML frontmatter and Markdown instructions. The framework discovers skills through a pluggable backend, injects a compact catalog into the system prompt (~50 tokens/skill), and
|
|
3
|
+
Skills are packaged AI agent capabilities per the [Agent Skills spec](https://agentskills.io/). Each skill is a directory containing a `SKILL.md` file with YAML frontmatter and Markdown instructions. The framework discovers skills through a pluggable backend, injects a compact catalog into the system prompt (~50 tokens/skill), and supports both activation channels the spec describes: the LLM activates skills on demand through a tool, and your application injects skills the user asked for as conversation content (see [User-Triggered Activation](#user-triggered-activation)).
|
|
4
4
|
|
|
5
5
|
## Creating a Skill
|
|
6
6
|
|
|
@@ -36,7 +36,11 @@ Review the code for:
|
|
|
36
36
|
- `name` — lowercase alphanumeric with hyphens, 1-64 chars (must match directory name)
|
|
37
37
|
- `description` — 1-1024 chars, helps the LLM decide when to activate
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
**Optional frontmatter fields:**
|
|
40
|
+
|
|
41
|
+
- `disable-model-invocation` — when `true`, the skill is hidden from the catalog and the LLM cannot activate it via the `skill_activate` tool. It stays reachable through the programmatic `activate` config (see [Activated Skills](#activated-skills)) and through `activation_prompt` (see [User-Triggered Activation](#user-triggered-activation)), so you can inject it under your own logic instead of the model's. Only the literal value `true` disables invocation; any other value (or its absence) leaves the skill model-invocable.
|
|
42
|
+
|
|
43
|
+
Any other frontmatter keys are passed through as metadata.
|
|
40
44
|
|
|
41
45
|
## Configuring an Agent
|
|
42
46
|
|
|
@@ -81,7 +85,7 @@ end
|
|
|
81
85
|
|
|
82
86
|
### Activated Skills
|
|
83
87
|
|
|
84
|
-
Load skill instructions into the system prompt at startup (no tool call needed):
|
|
88
|
+
Load skill instructions into the system prompt at startup (no tool call needed). Use this for skills that should govern the whole session — for skills the user requests mid-conversation, prefer [User-Triggered Activation](#user-triggered-activation), which keeps the system prompt (and its provider-side cache) stable:
|
|
85
89
|
|
|
86
90
|
```ruby
|
|
87
91
|
skills do
|
|
@@ -103,8 +107,44 @@ end
|
|
|
103
107
|
|
|
104
108
|
1. **Discovery** — At the start of `generate`/`stream`, the backend's `list_skills` returns frontmatter for all available skills.
|
|
105
109
|
2. **Catalog injection** — The adapter formats the catalog and appends it to the system prompt.
|
|
106
|
-
3. **Activation** — When the LLM matches a task to a skill, it calls the `skill_activate` tool with the skill name. The tool returns the full SKILL.md body.
|
|
110
|
+
3. **Activation** — When the LLM matches a task to a skill, it calls the `skill_activate` tool with the skill name. The tool returns the full SKILL.md body wrapped in `<skill_content name="...">` tags.
|
|
107
111
|
4. **Execution** — The LLM follows the skill's instructions to complete the task.
|
|
112
|
+
5. **Deduplication** — Re-activating an already-active skill returns a short pointer ("already active") instead of the body again, so repeated activations don't fill the context with duplicate instructions. This applies whichever channel activated the skill first — tool call, `activation_prompt`, or the `activate` config.
|
|
113
|
+
|
|
114
|
+
Activation state lives in memory on the `Riffer::Skills::Context`, not in the session. When you rebuild an agent from a persisted session, the first re-activation of each skill returns the full body again (the conversation history still carries the earlier copy); deduplication resumes from there. If you prune skill content out of a session yourself, call `deactivate(name)` so the next activation returns the body instead of a pointer to content that no longer exists.
|
|
115
|
+
|
|
116
|
+
## User-Triggered Activation
|
|
117
|
+
|
|
118
|
+
When a user explicitly invokes a skill (a slash command, a button, a mention), don't wait for the model to discover it — inject the skill body into the conversation as a user message. `activation_prompt` returns the body wrapped for injection and records the activation, so a later model-side `skill_activate` call for the same skill gets the pointer instead of a duplicate body:
|
|
119
|
+
|
|
120
|
+
```ruby
|
|
121
|
+
agent = MyAgent.new
|
|
122
|
+
skills = agent.context.skills
|
|
123
|
+
|
|
124
|
+
# User typed: /code-review focus on security
|
|
125
|
+
if skills.activated?("code-review")
|
|
126
|
+
agent.generate("The code-review skill was invoked again — its instructions are above. focus on security")
|
|
127
|
+
else
|
|
128
|
+
agent.generate("#{skills.activation_prompt("code-review")}\n\nfocus on security")
|
|
129
|
+
end
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
`activation_prompt("code-review")` returns:
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
<skill_content name="code-review">
|
|
136
|
+
You are a code review assistant.
|
|
137
|
+
...
|
|
138
|
+
</skill_content>
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
How a repeat invocation behaves is your choice — re-inject the full body (`activation_prompt` always returns it), or send a short reference as above. The check via `activated?` covers both channels, so a skill the model already activated through the tool counts too.
|
|
142
|
+
|
|
143
|
+
For reading a skill body **without** recording an activation — a UI preview, or delegating the skill to a subagent whose context is separate — use `read`:
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
body = skills.read("code-review") # no activation recorded
|
|
147
|
+
```
|
|
108
148
|
|
|
109
149
|
## Custom Backends
|
|
110
150
|
|
|
@@ -154,6 +194,28 @@ class InstrumentedActivateTool < Riffer::Skills::ActivateTool
|
|
|
154
194
|
end
|
|
155
195
|
end
|
|
156
196
|
|
|
197
|
+
# Change what a re-activation returns (default: a short "already active" pointer)
|
|
198
|
+
class CustomPointerActivateTool < Riffer::Skills::ActivateTool
|
|
199
|
+
private
|
|
200
|
+
|
|
201
|
+
def already_active_message(name)
|
|
202
|
+
"'#{name}' is loaded — scroll up for its instructions."
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Return the full body on every activation (no deduplication)
|
|
207
|
+
class AlwaysFullBodyActivateTool < Riffer::Skills::ActivateTool
|
|
208
|
+
def call(context:, name:)
|
|
209
|
+
skills_context = context&.skills
|
|
210
|
+
return error("Skills not configured") unless skills_context
|
|
211
|
+
return error("Unknown skill: '#{name}'") unless skills_context.model_invocable?(name)
|
|
212
|
+
|
|
213
|
+
text(skills_context.activation_prompt(name))
|
|
214
|
+
rescue Riffer::ArgumentError => e
|
|
215
|
+
error(e.message)
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
|
|
157
219
|
# Global default
|
|
158
220
|
Riffer.config.skills.default_activate_tool = InstrumentedActivateTool
|
|
159
221
|
|
data/docs/14_MCP.md
CHANGED
|
@@ -109,6 +109,7 @@ end
|
|
|
109
109
|
Only use `progressive: false` when the server has a small, stable set of tools you always want available.
|
|
110
110
|
|
|
111
111
|
**`mcp_search`** — Search for available tools by name or description.
|
|
112
|
+
|
|
112
113
|
- `query` (required, non-empty) — filter by name or description substring.
|
|
113
114
|
|
|
114
115
|
On a successful search, matching tools are injected into the agent's active tool list. The model calls them natively on the next turn — no proxy or JSON-encoded arguments.
|
|
@@ -150,7 +151,7 @@ Riffer::Mcp.registrations
|
|
|
150
151
|
# => {"github" => #<Riffer::Mcp::Registration ...>, ...}
|
|
151
152
|
|
|
152
153
|
reg = Riffer::Mcp.registrations["github"]
|
|
153
|
-
reg.tools # => [<Class:...>, ...] (Riffer::Tool subclasses)
|
|
154
|
+
reg.tools # => [<Class:...>, ...] (Riffer::Mcp::Tool subclasses; .mcp_server_tool_name returns the server-side name)
|
|
154
155
|
```
|
|
155
156
|
|
|
156
157
|
Discovery failures raise from `register` directly, typically `Faraday::Error` for network issues or `Riffer::DependencyError` if the `mcp`/`faraday` gems are missing. Rescue `StandardError` for graceful degradation:
|