tokenr-ruby 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tokenr/integrations/anthropic.rb +26 -11
- data/lib/tokenr/integrations/openai.rb +65 -21
- data/lib/tokenr/tracker.rb +20 -16
- data/lib/tokenr/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e749473bced8cc1dd80e3173317ce1308b2a6fd2ff24cacdcf535744474d53bb
|
|
4
|
+
data.tar.gz: b127cc4c1bee8cdba896a8a8cfd9c0b211c041f9a9a5b05dcc2e250902471ad9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e8198874061d6ca0378f49fa7c2f40909e938dd03d72ab03c2b4cef7fc64b9cc17eabfb70fda66ccec3675cd0bb2d745ea095f9d4264d44487d2e75656c34627
|
|
7
|
+
data.tar.gz: b0e5cc6cdbdbb3942477e6637465826451ed09e60ee9cfa4f3ff8a99e9061193fd567158e5edec211cb287fb493e4ea04ffc47a9a573252f53ddeffb7cc4e57f
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module Tokenr
|
|
4
4
|
module Integrations
|
|
5
|
-
# Wrap an Anthropic client to automatically track costs
|
|
5
|
+
# Wrap an Anthropic client to automatically track costs, including
|
|
6
|
+
# prompt-cache token costs (cache_creation and cache_read).
|
|
6
7
|
#
|
|
7
8
|
# Usage:
|
|
8
9
|
# require "anthropic"
|
|
@@ -42,7 +43,7 @@ module Tokenr
|
|
|
42
43
|
response = client.messages.create(model: model, messages: messages, **params)
|
|
43
44
|
latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
|
44
45
|
|
|
45
|
-
track_response(
|
|
46
|
+
track_response(response, latency)
|
|
46
47
|
response
|
|
47
48
|
end
|
|
48
49
|
|
|
@@ -56,23 +57,37 @@ module Tokenr
|
|
|
56
57
|
|
|
57
58
|
private
|
|
58
59
|
|
|
59
|
-
def track_response(
|
|
60
|
+
def track_response(response, latency_ms)
|
|
60
61
|
usage = response.usage
|
|
61
62
|
return unless usage
|
|
62
63
|
|
|
64
|
+
# Anthropic reports all three token categories separately.
|
|
65
|
+
# input_tokens = non-cached input only (excludes cache hits and writes).
|
|
66
|
+
# cache_creation_input_tokens = tokens written to cache this turn.
|
|
67
|
+
# cache_read_input_tokens = tokens served from cache (billed at ~10% of input rate).
|
|
68
|
+
cache_write = safe_int(usage, :cache_creation_input_tokens)
|
|
69
|
+
cache_read = safe_int(usage, :cache_read_input_tokens)
|
|
70
|
+
|
|
63
71
|
Tokenr.track(
|
|
64
|
-
model:
|
|
65
|
-
provider:
|
|
66
|
-
input_tokens:
|
|
67
|
-
output_tokens:
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
+
model: response.model,
|
|
73
|
+
provider: "anthropic",
|
|
74
|
+
input_tokens: usage.input_tokens || 0,
|
|
75
|
+
output_tokens: usage.output_tokens || 0,
|
|
76
|
+
cache_write_tokens: cache_write,
|
|
77
|
+
cache_read_tokens: cache_read,
|
|
78
|
+
latency_ms: latency_ms,
|
|
79
|
+
agent_id: agent_id,
|
|
80
|
+
feature_name: feature_name,
|
|
81
|
+
tags: tags
|
|
72
82
|
)
|
|
73
83
|
rescue StandardError
|
|
74
84
|
# Never let tracking errors surface to the caller
|
|
75
85
|
end
|
|
86
|
+
|
|
87
|
+
def safe_int(usage, method_name)
|
|
88
|
+
return 0 unless usage.respond_to?(method_name)
|
|
89
|
+
(usage.public_send(method_name) || 0).to_i
|
|
90
|
+
end
|
|
76
91
|
end
|
|
77
92
|
end
|
|
78
93
|
end
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module Tokenr
|
|
4
4
|
module Integrations
|
|
5
|
-
# Wrap an OpenAI client to automatically
|
|
5
|
+
# Wrap an OpenAI client (or any OpenAI-compatible client) to automatically
|
|
6
|
+
# track costs, including prompt-cache token costs.
|
|
6
7
|
#
|
|
7
8
|
# Usage:
|
|
8
9
|
# require "openai"
|
|
@@ -16,28 +17,51 @@ module Tokenr
|
|
|
16
17
|
# response = tracked.chat(parameters: { model: "gpt-4o", messages: [...] })
|
|
17
18
|
# # Cost is tracked automatically — no other changes needed.
|
|
18
19
|
#
|
|
20
|
+
# # For OpenAI-compatible providers (MiniMax, DeepSeek, etc.) the provider
|
|
21
|
+
# # is auto-detected from the client's URI base. Pass provider: explicitly
|
|
22
|
+
# # to override if needed.
|
|
23
|
+
# minimax_client = OpenAI::Client.new(
|
|
24
|
+
# access_token: ENV["MINIMAX_API_KEY"],
|
|
25
|
+
# uri_base: "https://api.minimax.io/v1/"
|
|
26
|
+
# )
|
|
27
|
+
# tracked = Tokenr::Integrations::OpenAI.wrap(minimax_client, agent_id: "my-bot")
|
|
28
|
+
#
|
|
19
29
|
module OpenAI
|
|
30
|
+
# Maps URI base substrings to Tokenr provider slugs.
|
|
31
|
+
PROVIDER_MAP = {
|
|
32
|
+
"minimax" => "minimax",
|
|
33
|
+
"anthropic" => "anthropic",
|
|
34
|
+
"googleapis" => "google",
|
|
35
|
+
"mistral" => "mistral",
|
|
36
|
+
"cohere" => "cohere",
|
|
37
|
+
"deepseek" => "deepseek",
|
|
38
|
+
"x.ai" => "xai",
|
|
39
|
+
"xai" => "xai",
|
|
40
|
+
"azure" => "azure_openai",
|
|
41
|
+
}.freeze
|
|
42
|
+
|
|
20
43
|
class << self
|
|
21
|
-
def wrap(client, agent_id: nil, feature_name: nil, tags: {})
|
|
22
|
-
Wrapper.new(client, agent_id: agent_id, feature_name: feature_name, tags: tags)
|
|
44
|
+
def wrap(client, agent_id: nil, feature_name: nil, tags: {}, provider: nil)
|
|
45
|
+
Wrapper.new(client, agent_id: agent_id, feature_name: feature_name, tags: tags, provider: provider)
|
|
23
46
|
end
|
|
24
47
|
end
|
|
25
48
|
|
|
26
49
|
class Wrapper
|
|
27
50
|
attr_reader :client, :agent_id, :feature_name, :tags
|
|
28
51
|
|
|
29
|
-
def initialize(client, agent_id: nil, feature_name: nil, tags: {})
|
|
30
|
-
@client
|
|
31
|
-
@agent_id
|
|
32
|
-
@feature_name
|
|
33
|
-
@tags
|
|
52
|
+
def initialize(client, agent_id: nil, feature_name: nil, tags: {}, provider: nil)
|
|
53
|
+
@client = client
|
|
54
|
+
@agent_id = agent_id
|
|
55
|
+
@feature_name = feature_name
|
|
56
|
+
@tags = tags
|
|
57
|
+
@explicit_provider = provider
|
|
34
58
|
end
|
|
35
59
|
|
|
36
60
|
def chat(parameters:)
|
|
37
61
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
38
62
|
response = client.chat(parameters: parameters)
|
|
39
63
|
latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
|
40
|
-
track_chat_response(
|
|
64
|
+
track_chat_response(response, latency)
|
|
41
65
|
response
|
|
42
66
|
end
|
|
43
67
|
|
|
@@ -45,7 +69,7 @@ module Tokenr
|
|
|
45
69
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
46
70
|
response = client.completions(parameters: parameters)
|
|
47
71
|
latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
|
48
|
-
track_chat_response(
|
|
72
|
+
track_chat_response(response, latency)
|
|
49
73
|
response
|
|
50
74
|
end
|
|
51
75
|
|
|
@@ -56,8 +80,8 @@ module Tokenr
|
|
|
56
80
|
usage = response.dig("usage")
|
|
57
81
|
if usage
|
|
58
82
|
Tokenr.track(
|
|
59
|
-
model: parameters[:model],
|
|
60
|
-
provider:
|
|
83
|
+
model: response.dig("model") || parameters[:model],
|
|
84
|
+
provider: provider,
|
|
61
85
|
input_tokens: usage["prompt_tokens"] || 0,
|
|
62
86
|
output_tokens: 0,
|
|
63
87
|
latency_ms: latency,
|
|
@@ -80,19 +104,39 @@ module Tokenr
|
|
|
80
104
|
|
|
81
105
|
private
|
|
82
106
|
|
|
83
|
-
def
|
|
107
|
+
def provider
|
|
108
|
+
return @explicit_provider if @explicit_provider
|
|
109
|
+
|
|
110
|
+
# Try to read the URI base from the wrapped client so we can
|
|
111
|
+
# identify OpenAI-compatible providers automatically.
|
|
112
|
+
uri = [:@uri_base, :@api_base, :@base_url].reduce("") do |acc, ivar|
|
|
113
|
+
acc.empty? ? client.instance_variable_get(ivar).to_s : acc
|
|
114
|
+
end.downcase
|
|
115
|
+
|
|
116
|
+
PROVIDER_MAP.each { |keyword, slug| return slug if uri.include?(keyword) }
|
|
117
|
+
"openai"
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def track_chat_response(response, latency_ms)
|
|
84
121
|
usage = response.dig("usage")
|
|
85
122
|
return unless usage
|
|
86
123
|
|
|
124
|
+
# prompt_tokens_details.cached_tokens = tokens served from cache.
|
|
125
|
+
# These are included in prompt_tokens, billed at a lower rate.
|
|
126
|
+
details = response.dig("usage", "prompt_tokens_details") || {}
|
|
127
|
+
cache_read = (details["cached_tokens"] || 0).to_i
|
|
128
|
+
non_cached_input = [(usage["prompt_tokens"] || 0).to_i - cache_read, 0].max
|
|
129
|
+
|
|
87
130
|
Tokenr.track(
|
|
88
|
-
model:
|
|
89
|
-
provider:
|
|
90
|
-
input_tokens:
|
|
91
|
-
output_tokens:
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
131
|
+
model: response.dig("model"),
|
|
132
|
+
provider: provider,
|
|
133
|
+
input_tokens: non_cached_input,
|
|
134
|
+
output_tokens: (usage["completion_tokens"] || 0).to_i,
|
|
135
|
+
cache_read_tokens: cache_read,
|
|
136
|
+
latency_ms: latency_ms,
|
|
137
|
+
agent_id: agent_id,
|
|
138
|
+
feature_name: feature_name,
|
|
139
|
+
tags: tags
|
|
96
140
|
)
|
|
97
141
|
end
|
|
98
142
|
end
|
data/lib/tokenr/tracker.rb
CHANGED
|
@@ -12,8 +12,8 @@ module Tokenr
|
|
|
12
12
|
start_flusher if client.config.async
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
def track(model:, input_tokens:, output_tokens:, **options)
|
|
16
|
-
data = build_request(model, input_tokens, output_tokens, options)
|
|
15
|
+
def track(model:, input_tokens:, output_tokens:, cache_read_tokens: 0, cache_write_tokens: 0, **options)
|
|
16
|
+
data = build_request(model, input_tokens, output_tokens, options.merge(cache_read_tokens: cache_read_tokens, cache_write_tokens: cache_write_tokens))
|
|
17
17
|
|
|
18
18
|
if client.config.async
|
|
19
19
|
enqueue(data)
|
|
@@ -58,21 +58,25 @@ module Tokenr
|
|
|
58
58
|
|
|
59
59
|
def build_request(model, input_tokens, output_tokens, options)
|
|
60
60
|
config = client.config
|
|
61
|
+
cache_read = options[:cache_read_tokens].to_i
|
|
62
|
+
cache_write = options[:cache_write_tokens].to_i
|
|
61
63
|
{
|
|
62
|
-
model:
|
|
63
|
-
input_tokens:
|
|
64
|
-
output_tokens:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
64
|
+
model: model,
|
|
65
|
+
input_tokens: input_tokens,
|
|
66
|
+
output_tokens: output_tokens,
|
|
67
|
+
cache_read_tokens: cache_read > 0 ? cache_read : nil,
|
|
68
|
+
cache_write_tokens: cache_write > 0 ? cache_write : nil,
|
|
69
|
+
agent_id: options[:agent_id] || config.agent_id,
|
|
70
|
+
team_id: options[:team_id] || config.team_id,
|
|
71
|
+
feature_name: options[:feature_name],
|
|
72
|
+
provider: options[:provider],
|
|
73
|
+
latency_ms: options[:latency_ms],
|
|
74
|
+
status: options[:status] || "success",
|
|
75
|
+
external_id: options[:external_id],
|
|
76
|
+
total_cost: options[:total_cost],
|
|
77
|
+
requested_at: options[:requested_at] || Time.now.iso8601,
|
|
78
|
+
tags: config.default_tags.merge(options[:tags] || {}),
|
|
79
|
+
metrics: options[:metrics]
|
|
76
80
|
}.compact
|
|
77
81
|
end
|
|
78
82
|
|
data/lib/tokenr/version.rb
CHANGED