tokenr-ruby 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +36 -1
- data/lib/tokenr/integrations/anthropic.rb +26 -11
- data/lib/tokenr/integrations/openai.rb +65 -21
- data/lib/tokenr/tracker.rb +20 -16
- data/lib/tokenr/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 011b6bda965d1ebb0baa5949398253f2e010b86bd8301b3166745494c254394e
|
|
4
|
+
data.tar.gz: ecd282c0a1e3ede564bef3b1ff90240b530f36c59222e3c11a56e5e8d2b7a2d1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 78b3feeecae3404792b229e412c66bae6530d4bf489232affc33cfd8616bd22cb02f2c764bac9a0e01cd5ff8b52deeca8c7a3531cf30f6e4c5b7aaf953e35586
|
|
7
|
+
data.tar.gz: eac13684791536a785e1bc18f426b08a8e7914b7371f3e10849864b78c88d4724660f5cbc1bb5dd9db537e5b85f31ab083355f5bb7abf5ef0e364f9ce1e9d000
|
data/README.md
CHANGED
|
@@ -185,12 +185,33 @@ Tokenr.client.get_costs_by_agent(limit: 20)
|
|
|
185
185
|
Tokenr.client.get_timeseries(interval: "day")
|
|
186
186
|
```
|
|
187
187
|
|
|
188
|
+
## Prompt Caching
|
|
189
|
+
|
|
190
|
+
Both OpenAI and Anthropic support prompt caching, and the SDK handles it automatically.
|
|
191
|
+
|
|
192
|
+
**OpenAI** includes cached tokens inside `prompt_tokens`. The SDK reads `prompt_tokens_details["cached_tokens"]` and separates them so Tokenr can price each category at the correct rate.
|
|
193
|
+
|
|
194
|
+
**Anthropic** reports cache tokens as separate fields (`cache_creation_input_tokens` and `cache_read_input_tokens`). The SDK passes these through directly.
|
|
195
|
+
|
|
196
|
+
For manual tracking, you can pass cache tokens explicitly:
|
|
197
|
+
|
|
198
|
+
```ruby
|
|
199
|
+
Tokenr.track(
|
|
200
|
+
provider: "anthropic",
|
|
201
|
+
model: "claude-sonnet-4-20250514",
|
|
202
|
+
input_tokens: 500,
|
|
203
|
+
output_tokens: 200,
|
|
204
|
+
cache_read_tokens: 8000,
|
|
205
|
+
cache_write_tokens: 2000,
|
|
206
|
+
)
|
|
207
|
+
```
|
|
208
|
+
|
|
188
209
|
## How It Works
|
|
189
210
|
|
|
190
211
|
1. `Tokenr::Integrations::OpenAI.wrap(client)` returns a thin wrapper around your existing client
|
|
191
212
|
2. After each call the wrapper reads token counts from the response `usage` field
|
|
192
213
|
3. Events are pushed onto an in-process queue and flushed to Tokenr in the background
|
|
193
|
-
4. If tracking fails for any reason, the exception is swallowed
|
|
214
|
+
4. If tracking fails for any reason, the exception is swallowed and your app is unaffected
|
|
194
215
|
5. On process exit, `at_exit` flushes any remaining queued events
|
|
195
216
|
|
|
196
217
|
## Supported Providers
|
|
@@ -212,6 +233,20 @@ Tokenr.client.get_timeseries(interval: "day")
|
|
|
212
233
|
export TOKENR_TOKEN="your-token-here"
|
|
213
234
|
```
|
|
214
235
|
|
|
236
|
+
## Running Tests
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
# Unit and mock tests (no API keys needed)
|
|
240
|
+
bundle exec rspec
|
|
241
|
+
|
|
242
|
+
# Live integration tests (makes real API calls, costs fractions of a cent)
|
|
243
|
+
OPENAI_API_KEY=sk-... ANTHROPIC_API_KEY=sk-ant-... bundle exec rspec spec/live_integration_spec.rb -fd
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
The live tests make a real call to each provider, then verify that the token counts in the Tokenr payload match what the provider actually returned. This includes a test that triggers Anthropic prompt caching and confirms cache tokens are extracted correctly.
|
|
247
|
+
|
|
248
|
+
Note: the live tests require the `ruby-openai` and `anthropic` gems to be installed. They are not in the Gemfile since they are optional runtime dependencies.
|
|
249
|
+
|
|
215
250
|
## Security
|
|
216
251
|
|
|
217
252
|
This SDK is open source so you can audit exactly what data is sent and when. The short version:
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module Tokenr
|
|
4
4
|
module Integrations
|
|
5
|
-
# Wrap an Anthropic client to automatically track costs
|
|
5
|
+
# Wrap an Anthropic client to automatically track costs, including
|
|
6
|
+
# prompt-cache token costs (cache_creation and cache_read).
|
|
6
7
|
#
|
|
7
8
|
# Usage:
|
|
8
9
|
# require "anthropic"
|
|
@@ -42,7 +43,7 @@ module Tokenr
|
|
|
42
43
|
response = client.messages.create(model: model, messages: messages, **params)
|
|
43
44
|
latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
|
44
45
|
|
|
45
|
-
track_response(
|
|
46
|
+
track_response(response, latency)
|
|
46
47
|
response
|
|
47
48
|
end
|
|
48
49
|
|
|
@@ -56,23 +57,37 @@ module Tokenr
|
|
|
56
57
|
|
|
57
58
|
private
|
|
58
59
|
|
|
59
|
-
def track_response(
|
|
60
|
+
def track_response(response, latency_ms)
|
|
60
61
|
usage = response.usage
|
|
61
62
|
return unless usage
|
|
62
63
|
|
|
64
|
+
# Anthropic reports all three token categories separately.
|
|
65
|
+
# input_tokens = non-cached input only (excludes cache hits and writes).
|
|
66
|
+
# cache_creation_input_tokens = tokens written to cache this turn.
|
|
67
|
+
# cache_read_input_tokens = tokens served from cache (billed at ~10% of input rate).
|
|
68
|
+
cache_write = safe_int(usage, :cache_creation_input_tokens)
|
|
69
|
+
cache_read = safe_int(usage, :cache_read_input_tokens)
|
|
70
|
+
|
|
63
71
|
Tokenr.track(
|
|
64
|
-
model:
|
|
65
|
-
provider:
|
|
66
|
-
input_tokens:
|
|
67
|
-
output_tokens:
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
+
model: response.model,
|
|
73
|
+
provider: "anthropic",
|
|
74
|
+
input_tokens: usage.input_tokens || 0,
|
|
75
|
+
output_tokens: usage.output_tokens || 0,
|
|
76
|
+
cache_write_tokens: cache_write,
|
|
77
|
+
cache_read_tokens: cache_read,
|
|
78
|
+
latency_ms: latency_ms,
|
|
79
|
+
agent_id: agent_id,
|
|
80
|
+
feature_name: feature_name,
|
|
81
|
+
tags: tags
|
|
72
82
|
)
|
|
73
83
|
rescue StandardError
|
|
74
84
|
# Never let tracking errors surface to the caller
|
|
75
85
|
end
|
|
86
|
+
|
|
87
|
+
def safe_int(usage, method_name)
|
|
88
|
+
return 0 unless usage.respond_to?(method_name)
|
|
89
|
+
(usage.public_send(method_name) || 0).to_i
|
|
90
|
+
end
|
|
76
91
|
end
|
|
77
92
|
end
|
|
78
93
|
end
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module Tokenr
|
|
4
4
|
module Integrations
|
|
5
|
-
# Wrap an OpenAI client to automatically
|
|
5
|
+
# Wrap an OpenAI client (or any OpenAI-compatible client) to automatically
|
|
6
|
+
# track costs, including prompt-cache token costs.
|
|
6
7
|
#
|
|
7
8
|
# Usage:
|
|
8
9
|
# require "openai"
|
|
@@ -16,28 +17,51 @@ module Tokenr
|
|
|
16
17
|
# response = tracked.chat(parameters: { model: "gpt-4o", messages: [...] })
|
|
17
18
|
# # Cost is tracked automatically — no other changes needed.
|
|
18
19
|
#
|
|
20
|
+
# # For OpenAI-compatible providers (MiniMax, DeepSeek, etc.) the provider
|
|
21
|
+
# # is auto-detected from the client's URI base. Pass provider: explicitly
|
|
22
|
+
# # to override if needed.
|
|
23
|
+
# minimax_client = OpenAI::Client.new(
|
|
24
|
+
# access_token: ENV["MINIMAX_API_KEY"],
|
|
25
|
+
# uri_base: "https://api.minimax.io/v1/"
|
|
26
|
+
# )
|
|
27
|
+
# tracked = Tokenr::Integrations::OpenAI.wrap(minimax_client, agent_id: "my-bot")
|
|
28
|
+
#
|
|
19
29
|
module OpenAI
|
|
30
|
+
# Maps URI base substrings to Tokenr provider slugs.
|
|
31
|
+
PROVIDER_MAP = {
|
|
32
|
+
"minimax" => "minimax",
|
|
33
|
+
"anthropic" => "anthropic",
|
|
34
|
+
"googleapis" => "google",
|
|
35
|
+
"mistral" => "mistral",
|
|
36
|
+
"cohere" => "cohere",
|
|
37
|
+
"deepseek" => "deepseek",
|
|
38
|
+
"x.ai" => "xai",
|
|
39
|
+
"xai" => "xai",
|
|
40
|
+
"azure" => "azure_openai",
|
|
41
|
+
}.freeze
|
|
42
|
+
|
|
20
43
|
class << self
|
|
21
|
-
def wrap(client, agent_id: nil, feature_name: nil, tags: {})
|
|
22
|
-
Wrapper.new(client, agent_id: agent_id, feature_name: feature_name, tags: tags)
|
|
44
|
+
def wrap(client, agent_id: nil, feature_name: nil, tags: {}, provider: nil)
|
|
45
|
+
Wrapper.new(client, agent_id: agent_id, feature_name: feature_name, tags: tags, provider: provider)
|
|
23
46
|
end
|
|
24
47
|
end
|
|
25
48
|
|
|
26
49
|
class Wrapper
|
|
27
50
|
attr_reader :client, :agent_id, :feature_name, :tags
|
|
28
51
|
|
|
29
|
-
def initialize(client, agent_id: nil, feature_name: nil, tags: {})
|
|
30
|
-
@client
|
|
31
|
-
@agent_id
|
|
32
|
-
@feature_name
|
|
33
|
-
@tags
|
|
52
|
+
def initialize(client, agent_id: nil, feature_name: nil, tags: {}, provider: nil)
|
|
53
|
+
@client = client
|
|
54
|
+
@agent_id = agent_id
|
|
55
|
+
@feature_name = feature_name
|
|
56
|
+
@tags = tags
|
|
57
|
+
@explicit_provider = provider
|
|
34
58
|
end
|
|
35
59
|
|
|
36
60
|
def chat(parameters:)
|
|
37
61
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
38
62
|
response = client.chat(parameters: parameters)
|
|
39
63
|
latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
|
40
|
-
track_chat_response(
|
|
64
|
+
track_chat_response(response, latency)
|
|
41
65
|
response
|
|
42
66
|
end
|
|
43
67
|
|
|
@@ -45,7 +69,7 @@ module Tokenr
|
|
|
45
69
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
46
70
|
response = client.completions(parameters: parameters)
|
|
47
71
|
latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
|
48
|
-
track_chat_response(
|
|
72
|
+
track_chat_response(response, latency)
|
|
49
73
|
response
|
|
50
74
|
end
|
|
51
75
|
|
|
@@ -56,8 +80,8 @@ module Tokenr
|
|
|
56
80
|
usage = response.dig("usage")
|
|
57
81
|
if usage
|
|
58
82
|
Tokenr.track(
|
|
59
|
-
model: parameters[:model],
|
|
60
|
-
provider:
|
|
83
|
+
model: response.dig("model") || parameters[:model],
|
|
84
|
+
provider: provider,
|
|
61
85
|
input_tokens: usage["prompt_tokens"] || 0,
|
|
62
86
|
output_tokens: 0,
|
|
63
87
|
latency_ms: latency,
|
|
@@ -80,19 +104,39 @@ module Tokenr
|
|
|
80
104
|
|
|
81
105
|
private
|
|
82
106
|
|
|
83
|
-
def
|
|
107
|
+
def provider
|
|
108
|
+
return @explicit_provider if @explicit_provider
|
|
109
|
+
|
|
110
|
+
# Try to read the URI base from the wrapped client so we can
|
|
111
|
+
# identify OpenAI-compatible providers automatically.
|
|
112
|
+
uri = [:@uri_base, :@api_base, :@base_url].reduce("") do |acc, ivar|
|
|
113
|
+
acc.empty? ? client.instance_variable_get(ivar).to_s : acc
|
|
114
|
+
end.downcase
|
|
115
|
+
|
|
116
|
+
PROVIDER_MAP.each { |keyword, slug| return slug if uri.include?(keyword) }
|
|
117
|
+
"openai"
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def track_chat_response(response, latency_ms)
|
|
84
121
|
usage = response.dig("usage")
|
|
85
122
|
return unless usage
|
|
86
123
|
|
|
124
|
+
# prompt_tokens_details.cached_tokens = tokens served from cache.
|
|
125
|
+
# These are included in prompt_tokens, billed at a lower rate.
|
|
126
|
+
details = response.dig("usage", "prompt_tokens_details") || {}
|
|
127
|
+
cache_read = (details["cached_tokens"] || 0).to_i
|
|
128
|
+
non_cached_input = [(usage["prompt_tokens"] || 0).to_i - cache_read, 0].max
|
|
129
|
+
|
|
87
130
|
Tokenr.track(
|
|
88
|
-
model:
|
|
89
|
-
provider:
|
|
90
|
-
input_tokens:
|
|
91
|
-
output_tokens:
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
131
|
+
model: response.dig("model"),
|
|
132
|
+
provider: provider,
|
|
133
|
+
input_tokens: non_cached_input,
|
|
134
|
+
output_tokens: (usage["completion_tokens"] || 0).to_i,
|
|
135
|
+
cache_read_tokens: cache_read,
|
|
136
|
+
latency_ms: latency_ms,
|
|
137
|
+
agent_id: agent_id,
|
|
138
|
+
feature_name: feature_name,
|
|
139
|
+
tags: tags
|
|
96
140
|
)
|
|
97
141
|
end
|
|
98
142
|
end
|
data/lib/tokenr/tracker.rb
CHANGED
|
@@ -12,8 +12,8 @@ module Tokenr
|
|
|
12
12
|
start_flusher if client.config.async
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
def track(model:, input_tokens:, output_tokens:, **options)
|
|
16
|
-
data = build_request(model, input_tokens, output_tokens, options)
|
|
15
|
+
def track(model:, input_tokens:, output_tokens:, cache_read_tokens: 0, cache_write_tokens: 0, **options)
|
|
16
|
+
data = build_request(model, input_tokens, output_tokens, options.merge(cache_read_tokens: cache_read_tokens, cache_write_tokens: cache_write_tokens))
|
|
17
17
|
|
|
18
18
|
if client.config.async
|
|
19
19
|
enqueue(data)
|
|
@@ -58,21 +58,25 @@ module Tokenr
|
|
|
58
58
|
|
|
59
59
|
def build_request(model, input_tokens, output_tokens, options)
|
|
60
60
|
config = client.config
|
|
61
|
+
cache_read = options[:cache_read_tokens].to_i
|
|
62
|
+
cache_write = options[:cache_write_tokens].to_i
|
|
61
63
|
{
|
|
62
|
-
model:
|
|
63
|
-
input_tokens:
|
|
64
|
-
output_tokens:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
64
|
+
model: model,
|
|
65
|
+
input_tokens: input_tokens,
|
|
66
|
+
output_tokens: output_tokens,
|
|
67
|
+
cache_read_tokens: cache_read > 0 ? cache_read : nil,
|
|
68
|
+
cache_write_tokens: cache_write > 0 ? cache_write : nil,
|
|
69
|
+
agent_id: options[:agent_id] || config.agent_id,
|
|
70
|
+
team_id: options[:team_id] || config.team_id,
|
|
71
|
+
feature_name: options[:feature_name],
|
|
72
|
+
provider: options[:provider],
|
|
73
|
+
latency_ms: options[:latency_ms],
|
|
74
|
+
status: options[:status] || "success",
|
|
75
|
+
external_id: options[:external_id],
|
|
76
|
+
total_cost: options[:total_cost],
|
|
77
|
+
requested_at: options[:requested_at] || Time.now.iso8601,
|
|
78
|
+
tags: config.default_tags.merge(options[:tags] || {}),
|
|
79
|
+
metrics: options[:metrics]
|
|
76
80
|
}.compact
|
|
77
81
|
end
|
|
78
82
|
|
data/lib/tokenr/version.rb
CHANGED