tokenr-ruby 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 15ea6965561601d93515ffdf3448f286976ad610bcb2374f225d30404fa6d008
4
- data.tar.gz: d4e501b932db24625348ed27c82003e702dcf4e05ec32664e7bd1255eb13ac43
3
+ metadata.gz: e749473bced8cc1dd80e3173317ce1308b2a6fd2ff24cacdcf535744474d53bb
4
+ data.tar.gz: b127cc4c1bee8cdba896a8a8cfd9c0b211c041f9a9a5b05dcc2e250902471ad9
5
5
  SHA512:
6
- metadata.gz: 255f3b2269c15a41ad3e29c3f302dd79a2498e4098351afa8827db98cc8dde58d75b0acc625e899af24d68ce2bd892a2128e8471f8ac599b6a8aad2fe5555e2f
7
- data.tar.gz: 9f46b51a1e802659dd855474f4fa1c67cbfc8a320560d16130c6ba6e87bb8c8ae84f62f635b8035c5454fbbcbaa2e0d83e8e44d9c4fa8393f9956a0e13588349
6
+ metadata.gz: e8198874061d6ca0378f49fa7c2f40909e938dd03d72ab03c2b4cef7fc64b9cc17eabfb70fda66ccec3675cd0bb2d745ea095f9d4264d44487d2e75656c34627
7
+ data.tar.gz: b0e5cc6cdbdbb3942477e6637465826451ed09e60ee9cfa4f3ff8a99e9061193fd567158e5edec211cb287fb493e4ea04ffc47a9a573252f53ddeffb7cc4e57f
@@ -2,7 +2,8 @@
2
2
 
3
3
  module Tokenr
4
4
  module Integrations
5
- # Wrap an Anthropic client to automatically track costs.
5
+ # Wrap an Anthropic client to automatically track costs, including
6
+ # prompt-cache token costs (cache_creation and cache_read).
6
7
  #
7
8
  # Usage:
8
9
  # require "anthropic"
@@ -42,7 +43,7 @@ module Tokenr
42
43
  response = client.messages.create(model: model, messages: messages, **params)
43
44
  latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
44
45
 
45
- track_response(model, response, latency)
46
+ track_response(response, latency)
46
47
  response
47
48
  end
48
49
 
@@ -56,23 +57,37 @@ module Tokenr
56
57
 
57
58
  private
58
59
 
59
- def track_response(model, response, latency_ms)
60
+ def track_response(response, latency_ms)
60
61
  usage = response.usage
61
62
  return unless usage
62
63
 
64
+ # Anthropic reports all three token categories separately.
65
+ # input_tokens = non-cached input only (excludes cache hits and writes).
66
+ # cache_creation_input_tokens = tokens written to cache this turn.
67
+ # cache_read_input_tokens = tokens served from cache (billed at ~10% of input rate).
68
+ cache_write = safe_int(usage, :cache_creation_input_tokens)
69
+ cache_read = safe_int(usage, :cache_read_input_tokens)
70
+
63
71
  Tokenr.track(
64
- model: model,
65
- provider: "anthropic",
66
- input_tokens: usage.input_tokens || 0,
67
- output_tokens: usage.output_tokens || 0,
68
- latency_ms: latency_ms,
69
- agent_id: agent_id,
70
- feature_name: feature_name,
71
- tags: tags
72
+ model: response.model,
73
+ provider: "anthropic",
74
+ input_tokens: usage.input_tokens || 0,
75
+ output_tokens: usage.output_tokens || 0,
76
+ cache_write_tokens: cache_write,
77
+ cache_read_tokens: cache_read,
78
+ latency_ms: latency_ms,
79
+ agent_id: agent_id,
80
+ feature_name: feature_name,
81
+ tags: tags
72
82
  )
73
83
  rescue StandardError
74
84
  # Never let tracking errors surface to the caller
75
85
  end
86
+
87
+ def safe_int(usage, method_name)
88
+ return 0 unless usage.respond_to?(method_name)
89
+ (usage.public_send(method_name) || 0).to_i
90
+ end
76
91
  end
77
92
  end
78
93
  end
@@ -2,7 +2,8 @@
2
2
 
3
3
  module Tokenr
4
4
  module Integrations
5
- # Wrap an OpenAI client to automatically track costs.
5
+ # Wrap an OpenAI client (or any OpenAI-compatible client) to automatically
6
+ # track costs, including prompt-cache token costs.
6
7
  #
7
8
  # Usage:
8
9
  # require "openai"
@@ -16,28 +17,51 @@ module Tokenr
16
17
  # response = tracked.chat(parameters: { model: "gpt-4o", messages: [...] })
17
18
  # # Cost is tracked automatically — no other changes needed.
18
19
  #
20
+ # # For OpenAI-compatible providers (MiniMax, DeepSeek, etc.) the provider
21
+ # # is auto-detected from the client's URI base. Pass provider: explicitly
22
+ # # to override if needed.
23
+ # minimax_client = OpenAI::Client.new(
24
+ # access_token: ENV["MINIMAX_API_KEY"],
25
+ # uri_base: "https://api.minimax.io/v1/"
26
+ # )
27
+ # tracked = Tokenr::Integrations::OpenAI.wrap(minimax_client, agent_id: "my-bot")
28
+ #
19
29
  module OpenAI
30
+ # Maps URI base substrings to Tokenr provider slugs.
31
+ PROVIDER_MAP = {
32
+ "minimax" => "minimax",
33
+ "anthropic" => "anthropic",
34
+ "googleapis" => "google",
35
+ "mistral" => "mistral",
36
+ "cohere" => "cohere",
37
+ "deepseek" => "deepseek",
38
+ "x.ai" => "xai",
39
+ "xai" => "xai",
40
+ "azure" => "azure_openai",
41
+ }.freeze
42
+
20
43
  class << self
21
- def wrap(client, agent_id: nil, feature_name: nil, tags: {})
22
- Wrapper.new(client, agent_id: agent_id, feature_name: feature_name, tags: tags)
44
+ def wrap(client, agent_id: nil, feature_name: nil, tags: {}, provider: nil)
45
+ Wrapper.new(client, agent_id: agent_id, feature_name: feature_name, tags: tags, provider: provider)
23
46
  end
24
47
  end
25
48
 
26
49
  class Wrapper
27
50
  attr_reader :client, :agent_id, :feature_name, :tags
28
51
 
29
- def initialize(client, agent_id: nil, feature_name: nil, tags: {})
30
- @client = client
31
- @agent_id = agent_id
32
- @feature_name = feature_name
33
- @tags = tags
52
+ def initialize(client, agent_id: nil, feature_name: nil, tags: {}, provider: nil)
53
+ @client = client
54
+ @agent_id = agent_id
55
+ @feature_name = feature_name
56
+ @tags = tags
57
+ @explicit_provider = provider
34
58
  end
35
59
 
36
60
  def chat(parameters:)
37
61
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
38
62
  response = client.chat(parameters: parameters)
39
63
  latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
40
- track_chat_response(parameters[:model], response, latency)
64
+ track_chat_response(response, latency)
41
65
  response
42
66
  end
43
67
 
@@ -45,7 +69,7 @@ module Tokenr
45
69
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
46
70
  response = client.completions(parameters: parameters)
47
71
  latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
48
- track_chat_response(parameters[:model], response, latency)
72
+ track_chat_response(response, latency)
49
73
  response
50
74
  end
51
75
 
@@ -56,8 +80,8 @@ module Tokenr
56
80
  usage = response.dig("usage")
57
81
  if usage
58
82
  Tokenr.track(
59
- model: parameters[:model],
60
- provider: "openai",
83
+ model: response.dig("model") || parameters[:model],
84
+ provider: provider,
61
85
  input_tokens: usage["prompt_tokens"] || 0,
62
86
  output_tokens: 0,
63
87
  latency_ms: latency,
@@ -80,19 +104,39 @@ module Tokenr
80
104
 
81
105
  private
82
106
 
83
- def track_chat_response(model, response, latency_ms)
107
+ def provider
108
+ return @explicit_provider if @explicit_provider
109
+
110
+ # Try to read the URI base from the wrapped client so we can
111
+ # identify OpenAI-compatible providers automatically.
112
+ uri = [:@uri_base, :@api_base, :@base_url].reduce("") do |acc, ivar|
113
+ acc.empty? ? client.instance_variable_get(ivar).to_s : acc
114
+ end.downcase
115
+
116
+ PROVIDER_MAP.each { |keyword, slug| return slug if uri.include?(keyword) }
117
+ "openai"
118
+ end
119
+
120
+ def track_chat_response(response, latency_ms)
84
121
  usage = response.dig("usage")
85
122
  return unless usage
86
123
 
124
+ # prompt_tokens_details.cached_tokens = tokens served from cache.
125
+ # These are included in prompt_tokens, billed at a lower rate.
126
+ details = response.dig("usage", "prompt_tokens_details") || {}
127
+ cache_read = (details["cached_tokens"] || 0).to_i
128
+ non_cached_input = [(usage["prompt_tokens"] || 0).to_i - cache_read, 0].max
129
+
87
130
  Tokenr.track(
88
- model: model,
89
- provider: "openai",
90
- input_tokens: usage["prompt_tokens"] || 0,
91
- output_tokens: usage["completion_tokens"] || 0,
92
- latency_ms: latency_ms,
93
- agent_id: agent_id,
94
- feature_name: feature_name,
95
- tags: tags
131
+ model: response.dig("model"),
132
+ provider: provider,
133
+ input_tokens: non_cached_input,
134
+ output_tokens: (usage["completion_tokens"] || 0).to_i,
135
+ cache_read_tokens: cache_read,
136
+ latency_ms: latency_ms,
137
+ agent_id: agent_id,
138
+ feature_name: feature_name,
139
+ tags: tags
96
140
  )
97
141
  end
98
142
  end
@@ -12,8 +12,8 @@ module Tokenr
12
12
  start_flusher if client.config.async
13
13
  end
14
14
 
15
- def track(model:, input_tokens:, output_tokens:, **options)
16
- data = build_request(model, input_tokens, output_tokens, options)
15
+ def track(model:, input_tokens:, output_tokens:, cache_read_tokens: 0, cache_write_tokens: 0, **options)
16
+ data = build_request(model, input_tokens, output_tokens, options.merge(cache_read_tokens: cache_read_tokens, cache_write_tokens: cache_write_tokens))
17
17
 
18
18
  if client.config.async
19
19
  enqueue(data)
@@ -58,21 +58,25 @@ module Tokenr
58
58
 
59
59
  def build_request(model, input_tokens, output_tokens, options)
60
60
  config = client.config
61
+ cache_read = options[:cache_read_tokens].to_i
62
+ cache_write = options[:cache_write_tokens].to_i
61
63
  {
62
- model: model,
63
- input_tokens: input_tokens,
64
- output_tokens: output_tokens,
65
- agent_id: options[:agent_id] || config.agent_id,
66
- team_id: options[:team_id] || config.team_id,
67
- feature_name: options[:feature_name],
68
- provider: options[:provider],
69
- latency_ms: options[:latency_ms],
70
- status: options[:status] || "success",
71
- external_id: options[:external_id],
72
- total_cost: options[:total_cost],
73
- requested_at: options[:requested_at] || Time.now.iso8601,
74
- tags: config.default_tags.merge(options[:tags] || {}),
75
- metrics: options[:metrics]
64
+ model: model,
65
+ input_tokens: input_tokens,
66
+ output_tokens: output_tokens,
67
+ cache_read_tokens: cache_read > 0 ? cache_read : nil,
68
+ cache_write_tokens: cache_write > 0 ? cache_write : nil,
69
+ agent_id: options[:agent_id] || config.agent_id,
70
+ team_id: options[:team_id] || config.team_id,
71
+ feature_name: options[:feature_name],
72
+ provider: options[:provider],
73
+ latency_ms: options[:latency_ms],
74
+ status: options[:status] || "success",
75
+ external_id: options[:external_id],
76
+ total_cost: options[:total_cost],
77
+ requested_at: options[:requested_at] || Time.now.iso8601,
78
+ tags: config.default_tags.merge(options[:tags] || {}),
79
+ metrics: options[:metrics]
76
80
  }.compact
77
81
  end
78
82
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Tokenr
4
- VERSION = "0.1.3"
4
+ VERSION = "0.1.4"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenr-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tokenr