tokenr-ruby 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 15ea6965561601d93515ffdf3448f286976ad610bcb2374f225d30404fa6d008
4
- data.tar.gz: d4e501b932db24625348ed27c82003e702dcf4e05ec32664e7bd1255eb13ac43
3
+ metadata.gz: 011b6bda965d1ebb0baa5949398253f2e010b86bd8301b3166745494c254394e
4
+ data.tar.gz: ecd282c0a1e3ede564bef3b1ff90240b530f36c59222e3c11a56e5e8d2b7a2d1
5
5
  SHA512:
6
- metadata.gz: 255f3b2269c15a41ad3e29c3f302dd79a2498e4098351afa8827db98cc8dde58d75b0acc625e899af24d68ce2bd892a2128e8471f8ac599b6a8aad2fe5555e2f
7
- data.tar.gz: 9f46b51a1e802659dd855474f4fa1c67cbfc8a320560d16130c6ba6e87bb8c8ae84f62f635b8035c5454fbbcbaa2e0d83e8e44d9c4fa8393f9956a0e13588349
6
+ metadata.gz: 78b3feeecae3404792b229e412c66bae6530d4bf489232affc33cfd8616bd22cb02f2c764bac9a0e01cd5ff8b52deeca8c7a3531cf30f6e4c5b7aaf953e35586
7
+ data.tar.gz: eac13684791536a785e1bc18f426b08a8e7914b7371f3e10849864b78c88d4724660f5cbc1bb5dd9db537e5b85f31ab083355f5bb7abf5ef0e364f9ce1e9d000
data/README.md CHANGED
@@ -185,12 +185,33 @@ Tokenr.client.get_costs_by_agent(limit: 20)
185
185
  Tokenr.client.get_timeseries(interval: "day")
186
186
  ```
187
187
 
188
+ ## Prompt Caching
189
+
190
+ Both OpenAI and Anthropic support prompt caching, and the SDK handles it automatically.
191
+
192
+ **OpenAI** includes cached tokens inside `prompt_tokens`. The SDK reads `prompt_tokens_details["cached_tokens"]` and separates them so Tokenr can price each category at the correct rate.
193
+
194
+ **Anthropic** reports cache tokens as separate fields (`cache_creation_input_tokens` and `cache_read_input_tokens`). The SDK passes these through directly.
195
+
196
+ For manual tracking, you can pass cache tokens explicitly:
197
+
198
+ ```ruby
199
+ Tokenr.track(
200
+ provider: "anthropic",
201
+ model: "claude-sonnet-4-20250514",
202
+ input_tokens: 500,
203
+ output_tokens: 200,
204
+ cache_read_tokens: 8000,
205
+ cache_write_tokens: 2000,
206
+ )
207
+ ```
208
+
188
209
  ## How It Works
189
210
 
190
211
  1. `Tokenr::Integrations::OpenAI.wrap(client)` returns a thin wrapper around your existing client
191
212
  2. After each call the wrapper reads token counts from the response `usage` field
192
213
  3. Events are pushed onto an in-process queue and flushed to Tokenr in the background
193
- 4. If tracking fails for any reason, the exception is swallowed your app is unaffected
214
+ 4. If tracking fails for any reason, the exception is swallowed and your app is unaffected
194
215
  5. On process exit, `at_exit` flushes any remaining queued events
195
216
 
196
217
  ## Supported Providers
@@ -212,6 +233,20 @@ Tokenr.client.get_timeseries(interval: "day")
212
233
  export TOKENR_TOKEN="your-token-here"
213
234
  ```
214
235
 
236
+ ## Running Tests
237
+
238
+ ```bash
239
+ # Unit and mock tests (no API keys needed)
240
+ bundle exec rspec
241
+
242
+ # Live integration tests (makes real API calls, costs fractions of a cent)
243
+ OPENAI_API_KEY=sk-... ANTHROPIC_API_KEY=sk-ant-... bundle exec rspec spec/live_integration_spec.rb -fd
244
+ ```
245
+
246
+ The live tests make a real call to each provider, then verify that the token counts in the Tokenr payload match what the provider actually returned. This includes a test that triggers Anthropic prompt caching and confirms cache tokens are extracted correctly.
247
+
248
+ Note: the live tests require the `ruby-openai` and `anthropic` gems to be installed. They are not in the Gemfile since they are optional runtime dependencies.
249
+
215
250
  ## Security
216
251
 
217
252
  This SDK is open source so you can audit exactly what data is sent and when. The short version:
@@ -2,7 +2,8 @@
2
2
 
3
3
  module Tokenr
4
4
  module Integrations
5
- # Wrap an Anthropic client to automatically track costs.
5
+ # Wrap an Anthropic client to automatically track costs, including
6
+ # prompt-cache token costs (cache_creation and cache_read).
6
7
  #
7
8
  # Usage:
8
9
  # require "anthropic"
@@ -42,7 +43,7 @@ module Tokenr
42
43
  response = client.messages.create(model: model, messages: messages, **params)
43
44
  latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
44
45
 
45
- track_response(model, response, latency)
46
+ track_response(response, latency)
46
47
  response
47
48
  end
48
49
 
@@ -56,23 +57,37 @@ module Tokenr
56
57
 
57
58
  private
58
59
 
59
- def track_response(model, response, latency_ms)
60
+ def track_response(response, latency_ms)
60
61
  usage = response.usage
61
62
  return unless usage
62
63
 
64
+ # Anthropic reports all three token categories separately.
65
+ # input_tokens = non-cached input only (excludes cache hits and writes).
66
+ # cache_creation_input_tokens = tokens written to cache this turn.
67
+ # cache_read_input_tokens = tokens served from cache (billed at ~10% of input rate).
68
+ cache_write = safe_int(usage, :cache_creation_input_tokens)
69
+ cache_read = safe_int(usage, :cache_read_input_tokens)
70
+
63
71
  Tokenr.track(
64
- model: model,
65
- provider: "anthropic",
66
- input_tokens: usage.input_tokens || 0,
67
- output_tokens: usage.output_tokens || 0,
68
- latency_ms: latency_ms,
69
- agent_id: agent_id,
70
- feature_name: feature_name,
71
- tags: tags
72
+ model: response.model,
73
+ provider: "anthropic",
74
+ input_tokens: usage.input_tokens || 0,
75
+ output_tokens: usage.output_tokens || 0,
76
+ cache_write_tokens: cache_write,
77
+ cache_read_tokens: cache_read,
78
+ latency_ms: latency_ms,
79
+ agent_id: agent_id,
80
+ feature_name: feature_name,
81
+ tags: tags
72
82
  )
73
83
  rescue StandardError
74
84
  # Never let tracking errors surface to the caller
75
85
  end
86
+
87
+ def safe_int(usage, method_name)
88
+ return 0 unless usage.respond_to?(method_name)
89
+ (usage.public_send(method_name) || 0).to_i
90
+ end
76
91
  end
77
92
  end
78
93
  end
@@ -2,7 +2,8 @@
2
2
 
3
3
  module Tokenr
4
4
  module Integrations
5
- # Wrap an OpenAI client to automatically track costs.
5
+ # Wrap an OpenAI client (or any OpenAI-compatible client) to automatically
6
+ # track costs, including prompt-cache token costs.
6
7
  #
7
8
  # Usage:
8
9
  # require "openai"
@@ -16,28 +17,51 @@ module Tokenr
16
17
  # response = tracked.chat(parameters: { model: "gpt-4o", messages: [...] })
17
18
  # # Cost is tracked automatically — no other changes needed.
18
19
  #
20
+ # # For OpenAI-compatible providers (MiniMax, DeepSeek, etc.) the provider
21
+ # # is auto-detected from the client's URI base. Pass provider: explicitly
22
+ # # to override if needed.
23
+ # minimax_client = OpenAI::Client.new(
24
+ # access_token: ENV["MINIMAX_API_KEY"],
25
+ # uri_base: "https://api.minimax.io/v1/"
26
+ # )
27
+ # tracked = Tokenr::Integrations::OpenAI.wrap(minimax_client, agent_id: "my-bot")
28
+ #
19
29
  module OpenAI
30
+ # Maps URI base substrings to Tokenr provider slugs.
31
+ PROVIDER_MAP = {
32
+ "minimax" => "minimax",
33
+ "anthropic" => "anthropic",
34
+ "googleapis" => "google",
35
+ "mistral" => "mistral",
36
+ "cohere" => "cohere",
37
+ "deepseek" => "deepseek",
38
+ "x.ai" => "xai",
39
+ "xai" => "xai",
40
+ "azure" => "azure_openai",
41
+ }.freeze
42
+
20
43
  class << self
21
- def wrap(client, agent_id: nil, feature_name: nil, tags: {})
22
- Wrapper.new(client, agent_id: agent_id, feature_name: feature_name, tags: tags)
44
+ def wrap(client, agent_id: nil, feature_name: nil, tags: {}, provider: nil)
45
+ Wrapper.new(client, agent_id: agent_id, feature_name: feature_name, tags: tags, provider: provider)
23
46
  end
24
47
  end
25
48
 
26
49
  class Wrapper
27
50
  attr_reader :client, :agent_id, :feature_name, :tags
28
51
 
29
- def initialize(client, agent_id: nil, feature_name: nil, tags: {})
30
- @client = client
31
- @agent_id = agent_id
32
- @feature_name = feature_name
33
- @tags = tags
52
+ def initialize(client, agent_id: nil, feature_name: nil, tags: {}, provider: nil)
53
+ @client = client
54
+ @agent_id = agent_id
55
+ @feature_name = feature_name
56
+ @tags = tags
57
+ @explicit_provider = provider
34
58
  end
35
59
 
36
60
  def chat(parameters:)
37
61
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
38
62
  response = client.chat(parameters: parameters)
39
63
  latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
40
- track_chat_response(parameters[:model], response, latency)
64
+ track_chat_response(response, latency)
41
65
  response
42
66
  end
43
67
 
@@ -45,7 +69,7 @@ module Tokenr
45
69
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
46
70
  response = client.completions(parameters: parameters)
47
71
  latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
48
- track_chat_response(parameters[:model], response, latency)
72
+ track_chat_response(response, latency)
49
73
  response
50
74
  end
51
75
 
@@ -56,8 +80,8 @@ module Tokenr
56
80
  usage = response.dig("usage")
57
81
  if usage
58
82
  Tokenr.track(
59
- model: parameters[:model],
60
- provider: "openai",
83
+ model: response.dig("model") || parameters[:model],
84
+ provider: provider,
61
85
  input_tokens: usage["prompt_tokens"] || 0,
62
86
  output_tokens: 0,
63
87
  latency_ms: latency,
@@ -80,19 +104,39 @@ module Tokenr
80
104
 
81
105
  private
82
106
 
83
- def track_chat_response(model, response, latency_ms)
107
+ def provider
108
+ return @explicit_provider if @explicit_provider
109
+
110
+ # Try to read the URI base from the wrapped client so we can
111
+ # identify OpenAI-compatible providers automatically.
112
+ uri = [:@uri_base, :@api_base, :@base_url].reduce("") do |acc, ivar|
113
+ acc.empty? ? client.instance_variable_get(ivar).to_s : acc
114
+ end.downcase
115
+
116
+ PROVIDER_MAP.each { |keyword, slug| return slug if uri.include?(keyword) }
117
+ "openai"
118
+ end
119
+
120
+ def track_chat_response(response, latency_ms)
84
121
  usage = response.dig("usage")
85
122
  return unless usage
86
123
 
124
+ # prompt_tokens_details.cached_tokens = tokens served from cache.
125
+ # These are included in prompt_tokens, billed at a lower rate.
126
+ details = response.dig("usage", "prompt_tokens_details") || {}
127
+ cache_read = (details["cached_tokens"] || 0).to_i
128
+ non_cached_input = [(usage["prompt_tokens"] || 0).to_i - cache_read, 0].max
129
+
87
130
  Tokenr.track(
88
- model: model,
89
- provider: "openai",
90
- input_tokens: usage["prompt_tokens"] || 0,
91
- output_tokens: usage["completion_tokens"] || 0,
92
- latency_ms: latency_ms,
93
- agent_id: agent_id,
94
- feature_name: feature_name,
95
- tags: tags
131
+ model: response.dig("model"),
132
+ provider: provider,
133
+ input_tokens: non_cached_input,
134
+ output_tokens: (usage["completion_tokens"] || 0).to_i,
135
+ cache_read_tokens: cache_read,
136
+ latency_ms: latency_ms,
137
+ agent_id: agent_id,
138
+ feature_name: feature_name,
139
+ tags: tags
96
140
  )
97
141
  end
98
142
  end
@@ -12,8 +12,8 @@ module Tokenr
12
12
  start_flusher if client.config.async
13
13
  end
14
14
 
15
- def track(model:, input_tokens:, output_tokens:, **options)
16
- data = build_request(model, input_tokens, output_tokens, options)
15
+ def track(model:, input_tokens:, output_tokens:, cache_read_tokens: 0, cache_write_tokens: 0, **options)
16
+ data = build_request(model, input_tokens, output_tokens, options.merge(cache_read_tokens: cache_read_tokens, cache_write_tokens: cache_write_tokens))
17
17
 
18
18
  if client.config.async
19
19
  enqueue(data)
@@ -58,21 +58,25 @@ module Tokenr
58
58
 
59
59
  def build_request(model, input_tokens, output_tokens, options)
60
60
  config = client.config
61
+ cache_read = options[:cache_read_tokens].to_i
62
+ cache_write = options[:cache_write_tokens].to_i
61
63
  {
62
- model: model,
63
- input_tokens: input_tokens,
64
- output_tokens: output_tokens,
65
- agent_id: options[:agent_id] || config.agent_id,
66
- team_id: options[:team_id] || config.team_id,
67
- feature_name: options[:feature_name],
68
- provider: options[:provider],
69
- latency_ms: options[:latency_ms],
70
- status: options[:status] || "success",
71
- external_id: options[:external_id],
72
- total_cost: options[:total_cost],
73
- requested_at: options[:requested_at] || Time.now.iso8601,
74
- tags: config.default_tags.merge(options[:tags] || {}),
75
- metrics: options[:metrics]
64
+ model: model,
65
+ input_tokens: input_tokens,
66
+ output_tokens: output_tokens,
67
+ cache_read_tokens: cache_read > 0 ? cache_read : nil,
68
+ cache_write_tokens: cache_write > 0 ? cache_write : nil,
69
+ agent_id: options[:agent_id] || config.agent_id,
70
+ team_id: options[:team_id] || config.team_id,
71
+ feature_name: options[:feature_name],
72
+ provider: options[:provider],
73
+ latency_ms: options[:latency_ms],
74
+ status: options[:status] || "success",
75
+ external_id: options[:external_id],
76
+ total_cost: options[:total_cost],
77
+ requested_at: options[:requested_at] || Time.now.iso8601,
78
+ tags: config.default_tags.merge(options[:tags] || {}),
79
+ metrics: options[:metrics]
76
80
  }.compact
77
81
  end
78
82
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Tokenr
4
- VERSION = "0.1.3"
4
+ VERSION = "0.1.5"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenr-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tokenr