RubyGems - tokenr-ruby - Versions diffs - 0.1.3 → 0.1.4 - Mend

tokenr-ruby 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/lib/tokenr/integrations/anthropic.rb +26 -11
data/lib/tokenr/integrations/openai.rb +65 -21
data/lib/tokenr/tracker.rb +20 -16
data/lib/tokenr/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 15ea6965561601d93515ffdf3448f286976ad610bcb2374f225d30404fa6d008
-  data.tar.gz: d4e501b932db24625348ed27c82003e702dcf4e05ec32664e7bd1255eb13ac43
+  metadata.gz: e749473bced8cc1dd80e3173317ce1308b2a6fd2ff24cacdcf535744474d53bb
+  data.tar.gz: b127cc4c1bee8cdba896a8a8cfd9c0b211c041f9a9a5b05dcc2e250902471ad9
 SHA512:
-  metadata.gz: 255f3b2269c15a41ad3e29c3f302dd79a2498e4098351afa8827db98cc8dde58d75b0acc625e899af24d68ce2bd892a2128e8471f8ac599b6a8aad2fe5555e2f
-  data.tar.gz: 9f46b51a1e802659dd855474f4fa1c67cbfc8a320560d16130c6ba6e87bb8c8ae84f62f635b8035c5454fbbcbaa2e0d83e8e44d9c4fa8393f9956a0e13588349
+  metadata.gz: e8198874061d6ca0378f49fa7c2f40909e938dd03d72ab03c2b4cef7fc64b9cc17eabfb70fda66ccec3675cd0bb2d745ea095f9d4264d44487d2e75656c34627
+  data.tar.gz: b0e5cc6cdbdbb3942477e6637465826451ed09e60ee9cfa4f3ff8a99e9061193fd567158e5edec211cb287fb493e4ea04ffc47a9a573252f53ddeffb7cc4e57f

data/lib/tokenr/integrations/anthropic.rb CHANGED Viewed

@@ -2,7 +2,8 @@
 module Tokenr
   module Integrations
-    # Wrap an Anthropic client to automatically track costs.
+    # Wrap an Anthropic client to automatically track costs, including
+    # prompt-cache token costs (cache_creation and cache_read).
     #
     # Usage:
     #   require "anthropic"
@@ -42,7 +43,7 @@ module Tokenr
           response = client.messages.create(model: model, messages: messages, **params)
           latency  = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
-          track_response(model, response, latency)
+          track_response(response, latency)
           response
         end
@@ -56,23 +57,37 @@ module Tokenr
         private
-        def track_response(model, response, latency_ms)
+        def track_response(response, latency_ms)
           usage = response.usage
           return unless usage
+          # Anthropic reports all three token categories separately.
+          # input_tokens = non-cached input only (excludes cache hits and writes).
+          # cache_creation_input_tokens = tokens written to cache this turn.
+          # cache_read_input_tokens = tokens served from cache (billed at ~10% of input rate).
+          cache_write = safe_int(usage, :cache_creation_input_tokens)
+          cache_read  = safe_int(usage, :cache_read_input_tokens)
           Tokenr.track(
-            model:         model,
-            provider:      "anthropic",
-            input_tokens:  usage.input_tokens  || 0,
-            output_tokens: usage.output_tokens || 0,
-            latency_ms:    latency_ms,
-            agent_id:      agent_id,
-            feature_name:  feature_name,
-            tags:          tags
+            model:              response.model,
+            provider:           "anthropic",
+            input_tokens:       usage.input_tokens  || 0,
+            output_tokens:      usage.output_tokens || 0,
+            cache_write_tokens: cache_write,
+            cache_read_tokens:  cache_read,
+            latency_ms:         latency_ms,
+            agent_id:           agent_id,
+            feature_name:       feature_name,
+            tags:               tags
           )
         rescue StandardError
           # Never let tracking errors surface to the caller
         end
+        def safe_int(usage, method_name)
+          return 0 unless usage.respond_to?(method_name)
+          (usage.public_send(method_name) || 0).to_i
+        end
       end
     end
   end

data/lib/tokenr/integrations/openai.rb CHANGED Viewed

@@ -2,7 +2,8 @@
 module Tokenr
   module Integrations
-    # Wrap an OpenAI client to automatically track costs.
+    # Wrap an OpenAI client (or any OpenAI-compatible client) to automatically
+    # track costs, including prompt-cache token costs.
     #
     # Usage:
     #   require "openai"
@@ -16,28 +17,51 @@ module Tokenr
     #   response = tracked.chat(parameters: { model: "gpt-4o", messages: [...] })
     #   # Cost is tracked automatically — no other changes needed.
     #
+    #   # For OpenAI-compatible providers (MiniMax, DeepSeek, etc.) the provider
+    #   # is auto-detected from the client's URI base. Pass provider: explicitly
+    #   # to override if needed.
+    #   minimax_client = OpenAI::Client.new(
+    #     access_token: ENV["MINIMAX_API_KEY"],
+    #     uri_base: "https://api.minimax.io/v1/"
+    #   )
+    #   tracked = Tokenr::Integrations::OpenAI.wrap(minimax_client, agent_id: "my-bot")
+    #
     module OpenAI
+      # Maps URI base substrings to Tokenr provider slugs.
+      PROVIDER_MAP = {
+        "minimax"    => "minimax",
+        "anthropic"  => "anthropic",
+        "googleapis" => "google",
+        "mistral"    => "mistral",
+        "cohere"     => "cohere",
+        "deepseek"   => "deepseek",
+        "x.ai"       => "xai",
+        "xai"        => "xai",
+        "azure"      => "azure_openai",
+      }.freeze
       class << self
-        def wrap(client, agent_id: nil, feature_name: nil, tags: {})
-          Wrapper.new(client, agent_id: agent_id, feature_name: feature_name, tags: tags)
+        def wrap(client, agent_id: nil, feature_name: nil, tags: {}, provider: nil)
+          Wrapper.new(client, agent_id: agent_id, feature_name: feature_name, tags: tags, provider: provider)
         end
       end
       class Wrapper
         attr_reader :client, :agent_id, :feature_name, :tags
-        def initialize(client, agent_id: nil, feature_name: nil, tags: {})
-          @client       = client
-          @agent_id     = agent_id
-          @feature_name = feature_name
-          @tags         = tags
+        def initialize(client, agent_id: nil, feature_name: nil, tags: {}, provider: nil)
+          @client           = client
+          @agent_id         = agent_id
+          @feature_name     = feature_name
+          @tags             = tags
+          @explicit_provider = provider
         end
         def chat(parameters:)
           start    = Process.clock_gettime(Process::CLOCK_MONOTONIC)
           response = client.chat(parameters: parameters)
           latency  = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
-          track_chat_response(parameters[:model], response, latency)
+          track_chat_response(response, latency)
           response
         end
@@ -45,7 +69,7 @@ module Tokenr
           start    = Process.clock_gettime(Process::CLOCK_MONOTONIC)
           response = client.completions(parameters: parameters)
           latency  = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
-          track_chat_response(parameters[:model], response, latency)
+          track_chat_response(response, latency)
           response
         end
@@ -56,8 +80,8 @@ module Tokenr
           usage    = response.dig("usage")
           if usage
             Tokenr.track(
-              model:         parameters[:model],
-              provider:      "openai",
+              model:         response.dig("model") || parameters[:model],
+              provider:      provider,
               input_tokens:  usage["prompt_tokens"] || 0,
               output_tokens: 0,
               latency_ms:    latency,
@@ -80,19 +104,39 @@ module Tokenr
         private
-        def track_chat_response(model, response, latency_ms)
+        def provider
+          return @explicit_provider if @explicit_provider
+          # Try to read the URI base from the wrapped client so we can
+          # identify OpenAI-compatible providers automatically.
+          uri = [:@uri_base, :@api_base, :@base_url].reduce("") do |acc, ivar|
+            acc.empty? ? client.instance_variable_get(ivar).to_s : acc
+          end.downcase
+          PROVIDER_MAP.each { |keyword, slug| return slug if uri.include?(keyword) }
+          "openai"
+        end
+        def track_chat_response(response, latency_ms)
           usage = response.dig("usage")
           return unless usage
+          # prompt_tokens_details.cached_tokens = tokens served from cache.
+          # These are included in prompt_tokens, billed at a lower rate.
+          details   = response.dig("usage", "prompt_tokens_details") || {}
+          cache_read = (details["cached_tokens"] || 0).to_i
+          non_cached_input = [(usage["prompt_tokens"] || 0).to_i - cache_read, 0].max
           Tokenr.track(
-            model:         model,
-            provider:      "openai",
-            input_tokens:  usage["prompt_tokens"]     || 0,
-            output_tokens: usage["completion_tokens"] || 0,
-            latency_ms:    latency_ms,
-            agent_id:      agent_id,
-            feature_name:  feature_name,
-            tags:          tags
+            model:             response.dig("model"),
+            provider:          provider,
+            input_tokens:      non_cached_input,
+            output_tokens:     (usage["completion_tokens"] || 0).to_i,
+            cache_read_tokens: cache_read,
+            latency_ms:        latency_ms,
+            agent_id:          agent_id,
+            feature_name:      feature_name,
+            tags:              tags
           )
         end
       end

data/lib/tokenr/tracker.rb CHANGED Viewed

@@ -12,8 +12,8 @@ module Tokenr
       start_flusher if client.config.async
     end
-    def track(model:, input_tokens:, output_tokens:, **options)
-      data = build_request(model, input_tokens, output_tokens, options)
+    def track(model:, input_tokens:, output_tokens:, cache_read_tokens: 0, cache_write_tokens: 0, **options)
+      data = build_request(model, input_tokens, output_tokens, options.merge(cache_read_tokens: cache_read_tokens, cache_write_tokens: cache_write_tokens))
       if client.config.async
         enqueue(data)
@@ -58,21 +58,25 @@ module Tokenr
     def build_request(model, input_tokens, output_tokens, options)
       config = client.config
+      cache_read  = options[:cache_read_tokens].to_i
+      cache_write = options[:cache_write_tokens].to_i
       {
-        model:        model,
-        input_tokens: input_tokens,
-        output_tokens: output_tokens,
-        agent_id:     options[:agent_id]     || config.agent_id,
-        team_id:      options[:team_id]      || config.team_id,
-        feature_name: options[:feature_name],
-        provider:     options[:provider],
-        latency_ms:   options[:latency_ms],
-        status:       options[:status]       || "success",
-        external_id:  options[:external_id],
-        total_cost:   options[:total_cost],
-        requested_at: options[:requested_at] || Time.now.iso8601,
-        tags:         config.default_tags.merge(options[:tags] || {}),
-        metrics:      options[:metrics]
+        model:              model,
+        input_tokens:       input_tokens,
+        output_tokens:      output_tokens,
+        cache_read_tokens:  cache_read  > 0 ? cache_read  : nil,
+        cache_write_tokens: cache_write > 0 ? cache_write : nil,
+        agent_id:           options[:agent_id]     || config.agent_id,
+        team_id:            options[:team_id]      || config.team_id,
+        feature_name:       options[:feature_name],
+        provider:           options[:provider],
+        latency_ms:         options[:latency_ms],
+        status:             options[:status]       || "success",
+        external_id:        options[:external_id],
+        total_cost:         options[:total_cost],
+        requested_at:       options[:requested_at] || Time.now.iso8601,
+        tags:               config.default_tags.merge(options[:tags] || {}),
+        metrics:            options[:metrics]
       }.compact
     end

data/lib/tokenr/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Tokenr
-  VERSION = "0.1.3"
+  VERSION = "0.1.4"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: tokenr-ruby
 version: !ruby/object:Gem::Version
-  version: 0.1.3
+  version: 0.1.4
 platform: ruby
 authors:
 - Tokenr