llm_cost_tracker 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +55 -0
- data/README.md +7 -4
- data/app/assets/llm_cost_tracker/application.css +8 -7
- data/app/controllers/llm_cost_tracker/calls_controller.rb +5 -5
- data/app/controllers/llm_cost_tracker/dashboard_controller.rb +1 -1
- data/app/controllers/llm_cost_tracker/pricing_controller.rb +1 -1
- data/app/helpers/llm_cost_tracker/application_helper.rb +6 -15
- data/app/helpers/llm_cost_tracker/dashboard_filter_options_helper.rb +1 -11
- data/app/helpers/llm_cost_tracker/sortable_table_helper.rb +4 -4
- data/app/helpers/llm_cost_tracker/token_usage_helper.rb +4 -6
- data/app/models/llm_cost_tracker/call.rb +28 -63
- data/app/models/llm_cost_tracker/call_line_item.rb +2 -2
- data/app/models/llm_cost_tracker/call_rollup.rb +38 -0
- data/app/models/llm_cost_tracker/call_tag.rb +0 -2
- data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +2 -0
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +64 -43
- data/app/services/llm_cost_tracker/dashboard/filter.rb +5 -0
- data/app/services/llm_cost_tracker/dashboard/masking.rb +31 -0
- data/app/services/llm_cost_tracker/dashboard/monthly_budget.rb +63 -0
- data/app/services/llm_cost_tracker/dashboard/overview_stats.rb +5 -71
- data/app/services/llm_cost_tracker/dashboard/pagination.rb +2 -5
- data/app/services/llm_cost_tracker/dashboard/pricing_overview.rb +30 -44
- data/app/services/llm_cost_tracker/dashboard/setup_state.rb +4 -60
- data/app/services/llm_cost_tracker/dashboard/tag_breakdown.rb +1 -7
- data/app/services/llm_cost_tracker/dashboard/tag_key_explorer.rb +1 -1
- data/app/views/layouts/llm_cost_tracker/application.html.erb +0 -6
- data/app/views/llm_cost_tracker/calls/index.html.erb +8 -8
- data/app/views/llm_cost_tracker/calls/show.html.erb +31 -23
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +8 -8
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +62 -117
- data/app/views/llm_cost_tracker/models/index.html.erb +5 -5
- data/app/views/llm_cost_tracker/pricing/index.html.erb +2 -2
- data/app/views/llm_cost_tracker/shared/_filter_pill_model.html.erb +1 -1
- data/app/views/llm_cost_tracker/shared/_filter_pill_provider.html.erb +1 -1
- data/app/views/llm_cost_tracker/shared/_filter_pill_stream.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/index.html.erb +3 -3
- data/app/views/llm_cost_tracker/tags/show.html.erb +10 -10
- data/config/routes.rb +2 -3
- data/lib/llm_cost_tracker/budget.rb +24 -26
- data/lib/llm_cost_tracker/capture/sdk_payload.rb +34 -0
- data/lib/llm_cost_tracker/capture/sse.rb +1 -0
- data/lib/llm_cost_tracker/capture/stream_collector.rb +28 -36
- data/lib/llm_cost_tracker/capture/stream_tracker.rb +17 -28
- data/lib/llm_cost_tracker/capture_verifier.rb +59 -0
- data/lib/llm_cost_tracker/charges/cost.rb +27 -0
- data/lib/llm_cost_tracker/{billing → charges}/cost_status.rb +14 -4
- data/lib/llm_cost_tracker/{billing → charges}/line_item.rb +40 -44
- data/lib/llm_cost_tracker/check.rb +5 -0
- data/lib/llm_cost_tracker/configuration.rb +13 -44
- data/lib/llm_cost_tracker/currency.rb +5 -0
- data/lib/llm_cost_tracker/doctor/ingestion_check.rb +15 -49
- data/lib/llm_cost_tracker/doctor/price_check.rb +1 -1
- data/lib/llm_cost_tracker/doctor/probe.rb +3 -4
- data/lib/llm_cost_tracker/doctor/schema_check.rb +3 -6
- data/lib/llm_cost_tracker/doctor.rb +5 -69
- data/lib/llm_cost_tracker/engine.rb +4 -4
- data/lib/llm_cost_tracker/event.rb +12 -20
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +2 -3
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/prices_generator.rb +5 -2
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +4 -5
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +3 -2
- data/lib/llm_cost_tracker/ingestion/batch.rb +39 -8
- data/lib/llm_cost_tracker/ingestion/inbox.rb +7 -8
- data/lib/llm_cost_tracker/ingestion/pool.rb +3 -11
- data/lib/llm_cost_tracker/ingestion/worker.rb +7 -17
- data/lib/llm_cost_tracker/ingestion.rb +24 -36
- data/lib/llm_cost_tracker/integrations/anthropic.rb +92 -106
- data/lib/llm_cost_tracker/integrations/base.rb +39 -57
- data/lib/llm_cost_tracker/integrations/openai/batch_capture.rb +84 -0
- data/lib/llm_cost_tracker/integrations/openai/patches.rb +81 -0
- data/lib/llm_cost_tracker/integrations/openai.rb +70 -276
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +87 -99
- data/lib/llm_cost_tracker/integrations.rb +32 -25
- data/lib/llm_cost_tracker/ledger/period/totals.rb +27 -42
- data/lib/llm_cost_tracker/ledger/period.rb +5 -10
- data/lib/llm_cost_tracker/ledger/rollups.rb +67 -98
- data/lib/llm_cost_tracker/ledger/schema/adapter.rb +12 -13
- data/lib/llm_cost_tracker/ledger/schema/base.rb +51 -0
- data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +24 -79
- data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +3 -35
- data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +4 -41
- data/lib/llm_cost_tracker/ledger/schema/calls.rb +30 -99
- data/lib/llm_cost_tracker/ledger/schema/ingestion/inbox_entries.rb +26 -0
- data/lib/llm_cost_tracker/ledger/schema/ingestion/leases.rb +17 -0
- data/lib/llm_cost_tracker/ledger/schema.rb +26 -0
- data/lib/llm_cost_tracker/ledger/store.rb +18 -42
- data/lib/llm_cost_tracker/ledger/tags/{sql.rb → breakdown.rb} +1 -1
- data/lib/llm_cost_tracker/ledger/tags/encoding.rb +4 -6
- data/lib/llm_cost_tracker/ledger.rb +8 -18
- data/lib/llm_cost_tracker/logging.rb +4 -21
- data/lib/llm_cost_tracker/middleware/faraday.rb +61 -50
- data/lib/llm_cost_tracker/parsers.rb +139 -26
- data/lib/llm_cost_tracker/prices.json +1707 -1
- data/lib/llm_cost_tracker/pricing/backfill.rb +52 -80
- data/lib/llm_cost_tracker/pricing/calculation.rb +260 -0
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +17 -18
- data/lib/llm_cost_tracker/pricing/estimator.rb +2 -2
- data/lib/llm_cost_tracker/pricing/matcher.rb +84 -0
- data/lib/llm_cost_tracker/pricing/mode.rb +40 -52
- data/lib/llm_cost_tracker/pricing/price_key.rb +56 -0
- data/lib/llm_cost_tracker/pricing/rate.rb +18 -0
- data/lib/llm_cost_tracker/pricing/registry.rb +189 -100
- data/lib/llm_cost_tracker/pricing/service_rates.rb +69 -0
- data/lib/llm_cost_tracker/pricing/source.rb +7 -0
- data/lib/llm_cost_tracker/pricing/sync/fetcher.rb +2 -3
- data/lib/llm_cost_tracker/pricing/sync/registry_diff.rb +4 -10
- data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +10 -3
- data/lib/llm_cost_tracker/pricing/sync.rb +9 -11
- data/lib/llm_cost_tracker/pricing/unknown.rb +1 -5
- data/lib/llm_cost_tracker/pricing.rb +10 -278
- data/lib/llm_cost_tracker/providers/anthropic/parser.rb +93 -0
- data/lib/llm_cost_tracker/providers/anthropic/response_parser.rb +30 -0
- data/lib/llm_cost_tracker/providers/anthropic/usage_extractor.rb +76 -0
- data/lib/llm_cost_tracker/providers/azure/hosts.rb +1 -4
- data/lib/llm_cost_tracker/providers/azure/parser.rb +44 -0
- data/lib/llm_cost_tracker/providers/gemini/model_families.rb +1 -4
- data/lib/llm_cost_tracker/providers/gemini/parser.rb +177 -0
- data/lib/llm_cost_tracker/providers/gemini/usage_extractor.rb +76 -0
- data/lib/llm_cost_tracker/providers/openai/hosts.rb +1 -7
- data/lib/llm_cost_tracker/providers/openai/model_families.rb +5 -8
- data/lib/llm_cost_tracker/providers/openai/parser.rb +39 -0
- data/lib/llm_cost_tracker/providers/openai/response_parser.rb +152 -0
- data/lib/llm_cost_tracker/providers/openai/service_charges.rb +63 -39
- data/lib/llm_cost_tracker/providers/openai/usage_extractor.rb +72 -0
- data/lib/llm_cost_tracker/providers/openai_compatible/parser.rb +36 -0
- data/lib/llm_cost_tracker/providers.rb +35 -0
- data/lib/llm_cost_tracker/railtie.rb +0 -3
- data/lib/llm_cost_tracker/report/data.rb +3 -4
- data/lib/llm_cost_tracker/report/formatter.rb +1 -1
- data/lib/llm_cost_tracker/report.rb +1 -1
- data/lib/llm_cost_tracker/retention.rb +6 -19
- data/lib/llm_cost_tracker/tags/context.rb +9 -6
- data/lib/llm_cost_tracker/tags/sanitizer.rb +10 -0
- data/lib/llm_cost_tracker/timing.rb +2 -4
- data/lib/llm_cost_tracker/tracker.rb +24 -36
- data/lib/llm_cost_tracker/usage/catalog.rb +58 -0
- data/lib/llm_cost_tracker/usage/dimension.rb +21 -0
- data/lib/llm_cost_tracker/{billing/components.yml → usage/dimensions.yml} +24 -46
- data/lib/llm_cost_tracker/usage/source.rb +14 -0
- data/lib/llm_cost_tracker/usage/token_usage.rb +100 -0
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +43 -52
- data/lib/tasks/llm_cost_tracker.rake +14 -73
- metadata +81 -55
- data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +0 -100
- data/app/helpers/llm_cost_tracker/dashboard_filter_helper.rb +0 -28
- data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +0 -13
- data/app/models/llm_cost_tracker/provider_invoice.rb +0 -13
- data/app/models/llm_cost_tracker/provider_invoice_import.rb +0 -29
- data/app/views/llm_cost_tracker/reconciliation/index.html.erb +0 -174
- data/lib/llm_cost_tracker/billing/components.rb +0 -95
- data/lib/llm_cost_tracker/capture/stream.rb +0 -9
- data/lib/llm_cost_tracker/doctor/capture_verifier.rb +0 -61
- data/lib/llm_cost_tracker/doctor/check.rb +0 -7
- data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +0 -56
- data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +0 -164
- data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +0 -34
- data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +0 -20
- data/lib/llm_cost_tracker/doctor/pricing_snapshot_drift_check.rb +0 -85
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +0 -34
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +0 -60
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoice_imports_provider.rb.erb +0 -36
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoices_metadata_index.rb.erb +0 -27
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoice_imports_provider_generator.rb +0 -31
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoices_metadata_index_generator.rb +0 -31
- data/lib/llm_cost_tracker/ledger/rollups/upsert_sql.rb +0 -40
- data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +0 -57
- data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +0 -52
- data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +0 -56
- data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +0 -72
- data/lib/llm_cost_tracker/masking.rb +0 -39
- data/lib/llm_cost_tracker/parsers/anthropic.rb +0 -176
- data/lib/llm_cost_tracker/parsers/azure.rb +0 -46
- data/lib/llm_cost_tracker/parsers/base.rb +0 -131
- data/lib/llm_cost_tracker/parsers/gemini.rb +0 -230
- data/lib/llm_cost_tracker/parsers/openai.rb +0 -41
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +0 -45
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +0 -228
- data/lib/llm_cost_tracker/pricing/explainer.rb +0 -74
- data/lib/llm_cost_tracker/pricing/lookup.rb +0 -236
- data/lib/llm_cost_tracker/pricing/service_charges.rb +0 -206
- data/lib/llm_cost_tracker/providers/anthropic/server_tools.rb +0 -15
- data/lib/llm_cost_tracker/providers/anthropic/tier_classification.rb +0 -22
- data/lib/llm_cost_tracker/reconcile_tasks.rb +0 -131
- data/lib/llm_cost_tracker/reconciliation/diff.rb +0 -409
- data/lib/llm_cost_tracker/reconciliation/diff_result.rb +0 -44
- data/lib/llm_cost_tracker/reconciliation/import_result.rb +0 -19
- data/lib/llm_cost_tracker/reconciliation/importer.rb +0 -249
- data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +0 -148
- data/lib/llm_cost_tracker/reconciliation/sources/coercion.rb +0 -40
- data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +0 -20
- data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +0 -118
- data/lib/llm_cost_tracker/reconciliation.rb +0 -118
- data/lib/llm_cost_tracker/token_usage.rb +0 -93
|
@@ -1,40 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "base"
|
|
4
|
-
require_relative "../providers/anthropic/tier_classification"
|
|
5
4
|
|
|
6
5
|
module LlmCostTracker
|
|
7
6
|
module Integrations
|
|
8
7
|
module RubyLlm
|
|
9
8
|
extend Base
|
|
10
9
|
|
|
11
|
-
|
|
12
|
-
def integration_name
|
|
13
|
-
:ruby_llm
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
def minimum_version
|
|
17
|
-
"1.14.1"
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
def version_constant
|
|
21
|
-
"RubyLLM::VERSION"
|
|
22
|
-
end
|
|
10
|
+
minimum_version "1.15.0"
|
|
23
11
|
|
|
12
|
+
class << self
|
|
24
13
|
def patch_targets
|
|
25
|
-
[
|
|
26
|
-
patch_target(
|
|
27
|
-
"RubyLLM::Provider",
|
|
28
|
-
with: ProviderPatch,
|
|
29
|
-
methods: %i[slug complete embed transcribe paint moderate]
|
|
30
|
-
)
|
|
31
|
-
]
|
|
14
|
+
[patch_target("RubyLLM::Provider", with: ProviderPatch)]
|
|
32
15
|
end
|
|
33
16
|
|
|
34
17
|
def record_completion(provider, response, request:, latency_ms:, has_block:)
|
|
35
18
|
record_usage(
|
|
36
|
-
provider:
|
|
37
|
-
model: response_model_id(response) ||
|
|
19
|
+
provider: provider.slug.to_s,
|
|
20
|
+
model: response_model_id(response) || model_id_from_request(request[:model]),
|
|
38
21
|
response: response,
|
|
39
22
|
latency_ms: latency_ms,
|
|
40
23
|
stream: has_block || request[:stream] == true
|
|
@@ -43,8 +26,8 @@ module LlmCostTracker
|
|
|
43
26
|
|
|
44
27
|
def record_embedding(provider, response, request:, latency_ms:)
|
|
45
28
|
record_usage(
|
|
46
|
-
provider:
|
|
47
|
-
model: response_model_id(response) ||
|
|
29
|
+
provider: provider.slug.to_s,
|
|
30
|
+
model: response_model_id(response) || model_id_from_request(request[:model]),
|
|
48
31
|
response: response,
|
|
49
32
|
latency_ms: latency_ms,
|
|
50
33
|
stream: false,
|
|
@@ -54,8 +37,8 @@ module LlmCostTracker
|
|
|
54
37
|
|
|
55
38
|
def record_transcription(provider, response, request:, latency_ms:)
|
|
56
39
|
record_usage(
|
|
57
|
-
provider:
|
|
58
|
-
model: response_model_id(response) ||
|
|
40
|
+
provider: provider.slug.to_s,
|
|
41
|
+
model: response_model_id(response) || model_id_from_request(request[:model]),
|
|
59
42
|
response: response,
|
|
60
43
|
latency_ms: latency_ms,
|
|
61
44
|
stream: false
|
|
@@ -63,30 +46,27 @@ module LlmCostTracker
|
|
|
63
46
|
end
|
|
64
47
|
|
|
65
48
|
def record_image(provider, response, request:, latency_ms:)
|
|
66
|
-
usage =
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
raw_output = (usage[:output_tokens] || usage["output_tokens"]).to_i
|
|
49
|
+
usage = response.usage.with_indifferent_access
|
|
50
|
+
raw_input = usage[:input_tokens].to_i
|
|
51
|
+
raw_output = usage[:output_tokens].to_i
|
|
70
52
|
image_input = image_token_detail(usage, :input)
|
|
71
53
|
image_output = image_token_detail(usage, :output)
|
|
72
|
-
text_input = [raw_input - image_input, 0].max
|
|
73
|
-
text_output = [raw_output - image_output, 0].max
|
|
74
54
|
record_passthrough(
|
|
75
|
-
provider:
|
|
76
|
-
model: response_model_id(response) ||
|
|
55
|
+
provider: provider.slug.to_s,
|
|
56
|
+
model: response_model_id(response) || model_id_from_request(request[:model]),
|
|
77
57
|
response: response,
|
|
78
58
|
latency_ms: latency_ms,
|
|
79
|
-
input_tokens:
|
|
59
|
+
input_tokens: [raw_input - image_input, 0].max,
|
|
80
60
|
image_input_tokens: image_input,
|
|
81
|
-
output_tokens:
|
|
61
|
+
output_tokens: [raw_output - image_output, 0].max,
|
|
82
62
|
image_output_tokens: image_output
|
|
83
63
|
)
|
|
84
64
|
end
|
|
85
65
|
|
|
86
66
|
def record_moderation(provider, response, request:, latency_ms:)
|
|
87
67
|
record_passthrough(
|
|
88
|
-
provider:
|
|
89
|
-
model: response_model_id(response) ||
|
|
68
|
+
provider: provider.slug.to_s,
|
|
69
|
+
model: response_model_id(response) || model_id_from_request(request[:model]),
|
|
90
70
|
response: response,
|
|
91
71
|
latency_ms: latency_ms,
|
|
92
72
|
input_tokens: 0,
|
|
@@ -96,14 +76,20 @@ module LlmCostTracker
|
|
|
96
76
|
|
|
97
77
|
def image_token_detail(usage, direction)
|
|
98
78
|
container_key = direction == :input ? :input_tokens_details : :output_tokens_details
|
|
99
|
-
details = usage[container_key]
|
|
79
|
+
details = usage[container_key]
|
|
100
80
|
return 0 unless details.is_a?(Hash)
|
|
101
81
|
|
|
102
|
-
|
|
82
|
+
details.with_indifferent_access[:image_tokens].to_i
|
|
103
83
|
end
|
|
104
84
|
|
|
105
|
-
def record_passthrough(provider:,
|
|
106
|
-
|
|
85
|
+
def record_passthrough(provider:,
|
|
86
|
+
model:,
|
|
87
|
+
response:,
|
|
88
|
+
latency_ms:,
|
|
89
|
+
input_tokens:,
|
|
90
|
+
output_tokens:,
|
|
91
|
+
image_input_tokens: 0,
|
|
92
|
+
image_output_tokens: 0)
|
|
107
93
|
return unless active?
|
|
108
94
|
|
|
109
95
|
record_safely do
|
|
@@ -111,14 +97,14 @@ module LlmCostTracker
|
|
|
111
97
|
event: Event.build(
|
|
112
98
|
provider: provider,
|
|
113
99
|
model: model,
|
|
114
|
-
token_usage: TokenUsage.build(
|
|
100
|
+
token_usage: Usage::TokenUsage.build(
|
|
115
101
|
input_tokens: input_tokens,
|
|
116
102
|
output_tokens: output_tokens,
|
|
117
103
|
image_input_tokens: image_input_tokens,
|
|
118
104
|
image_output_tokens: image_output_tokens
|
|
119
105
|
),
|
|
120
|
-
usage_source:
|
|
121
|
-
provider_response_id:
|
|
106
|
+
usage_source: LlmCostTracker::Usage::Source::SDK_RESPONSE,
|
|
107
|
+
provider_response_id: provider_response_id_for(response)
|
|
122
108
|
),
|
|
123
109
|
latency_ms: latency_ms
|
|
124
110
|
)
|
|
@@ -129,104 +115,106 @@ module LlmCostTracker
|
|
|
129
115
|
return unless active?
|
|
130
116
|
|
|
131
117
|
record_safely do
|
|
132
|
-
input_tokens =
|
|
133
|
-
output_tokens =
|
|
118
|
+
input_tokens = response.input_tokens
|
|
119
|
+
output_tokens = response.output_tokens if output_tokens.nil?
|
|
134
120
|
next if input_tokens.nil? && output_tokens.nil?
|
|
135
121
|
|
|
136
|
-
|
|
137
|
-
hidden_output = object_value(response, :thinking_tokens, :reasoning_tokens).to_i
|
|
138
|
-
|
|
122
|
+
cache_write_5m, cache_write_1h = cache_creation_split(provider, response)
|
|
139
123
|
LlmCostTracker::Tracker.record(
|
|
140
124
|
event: Event.build(
|
|
141
125
|
provider: provider,
|
|
142
126
|
model: model,
|
|
143
|
-
pricing_mode:
|
|
144
|
-
token_usage: TokenUsage.build(
|
|
145
|
-
input_tokens:
|
|
127
|
+
pricing_mode: pricing_mode_for(provider: provider, response: response),
|
|
128
|
+
token_usage: Usage::TokenUsage.build(
|
|
129
|
+
input_tokens: input_tokens.to_i,
|
|
146
130
|
output_tokens: output_tokens.to_i,
|
|
147
|
-
cache_read_input_tokens:
|
|
148
|
-
cache_write_input_tokens:
|
|
149
|
-
|
|
131
|
+
cache_read_input_tokens: response.try(:cached_tokens).to_i,
|
|
132
|
+
cache_write_input_tokens: cache_write_5m,
|
|
133
|
+
cache_write_extended_input_tokens: cache_write_1h,
|
|
134
|
+
hidden_output_tokens: response.try(:thinking_tokens).to_i
|
|
150
135
|
),
|
|
151
136
|
stream: stream,
|
|
152
|
-
usage_source:
|
|
153
|
-
provider_response_id:
|
|
137
|
+
usage_source: LlmCostTracker::Usage::Source::SDK_RESPONSE,
|
|
138
|
+
provider_response_id: provider_response_id_for(response)
|
|
154
139
|
),
|
|
155
140
|
latency_ms: latency_ms
|
|
156
141
|
)
|
|
157
142
|
end
|
|
158
143
|
end
|
|
159
144
|
|
|
160
|
-
def
|
|
161
|
-
[
|
|
162
|
-
|
|
145
|
+
def cache_creation_split(provider, response)
|
|
146
|
+
return [response.try(:cache_creation_tokens).to_i, 0] unless provider == "anthropic"
|
|
147
|
+
|
|
148
|
+
cache = raw_body(response).dig("usage", "cache_creation")
|
|
149
|
+
return [response.try(:cache_creation_tokens).to_i, 0] unless cache.is_a?(Hash)
|
|
163
150
|
|
|
164
|
-
|
|
165
|
-
object_value(provider, :slug).to_s
|
|
151
|
+
[cache["ephemeral_5m_input_tokens"].to_i, cache["ephemeral_1h_input_tokens"].to_i]
|
|
166
152
|
end
|
|
167
153
|
|
|
168
|
-
def
|
|
169
|
-
return nil if
|
|
154
|
+
def model_id_from_request(value)
|
|
155
|
+
return nil if value.nil?
|
|
156
|
+
return value.to_s if value.is_a?(String) || value.is_a?(Symbol)
|
|
170
157
|
|
|
171
|
-
value
|
|
172
|
-
value ||= object if object.is_a?(String) || object.is_a?(Symbol)
|
|
173
|
-
value&.to_s
|
|
158
|
+
(value.try(:id) || value.try(:model_id) || value.try(:model))&.to_s
|
|
174
159
|
end
|
|
175
160
|
|
|
176
|
-
def
|
|
177
|
-
|
|
178
|
-
|
|
161
|
+
def provider_response_id_for(response)
|
|
162
|
+
body = raw_body(response)
|
|
163
|
+
body["id"] || body["responseId"]
|
|
179
164
|
end
|
|
180
165
|
|
|
181
|
-
def
|
|
182
|
-
|
|
166
|
+
def raw_body(response)
|
|
167
|
+
body = response.try(:raw)&.body
|
|
168
|
+
body.is_a?(Hash) ? body : {}
|
|
183
169
|
end
|
|
184
170
|
|
|
185
|
-
def
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
171
|
+
def response_model_id(response)
|
|
172
|
+
(response.try(:model_id) || response.try(:model))&.to_s
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def pricing_mode_for(provider:, response:)
|
|
176
|
+
body = raw_body(response)
|
|
177
|
+
case provider
|
|
178
|
+
when "anthropic" then body.dig("usage", "service_tier")
|
|
179
|
+
when "gemini" then body.dig("usageMetadata", "serviceTier")
|
|
180
|
+
else body["service_tier"]
|
|
189
181
|
end
|
|
182
|
+
end
|
|
190
183
|
|
|
191
|
-
|
|
184
|
+
def blocking_seam(resource, record_method, **extras)
|
|
185
|
+
{
|
|
186
|
+
provider: resource.slug.to_s,
|
|
187
|
+
record: lambda do |response, request, latency_ms|
|
|
188
|
+
public_send(record_method, resource, response, request: request, latency_ms: latency_ms, **extras)
|
|
189
|
+
end
|
|
190
|
+
}
|
|
192
191
|
end
|
|
193
192
|
end
|
|
194
193
|
|
|
195
194
|
module ProviderPatch
|
|
196
195
|
def complete(*args, **kwargs, &)
|
|
197
|
-
|
|
196
|
+
seam = LlmCostTracker::Integrations::RubyLlm.blocking_seam(self, :record_completion, has_block: block_given?)
|
|
197
|
+
LlmCostTracker::Integrations::RubyLlm.wrap_blocking(args, kwargs, **seam) { super }
|
|
198
198
|
end
|
|
199
199
|
|
|
200
200
|
def embed(*args, **kwargs)
|
|
201
|
-
|
|
201
|
+
seam = LlmCostTracker::Integrations::RubyLlm.blocking_seam(self, :record_embedding)
|
|
202
|
+
LlmCostTracker::Integrations::RubyLlm.wrap_blocking(args, kwargs, **seam) { super }
|
|
202
203
|
end
|
|
203
204
|
|
|
204
205
|
def transcribe(*args, **kwargs)
|
|
205
|
-
|
|
206
|
+
seam = LlmCostTracker::Integrations::RubyLlm.blocking_seam(self, :record_transcription)
|
|
207
|
+
LlmCostTracker::Integrations::RubyLlm.wrap_blocking(args, kwargs, **seam) { super }
|
|
206
208
|
end
|
|
207
209
|
|
|
208
210
|
def paint(*args, **kwargs)
|
|
209
|
-
|
|
211
|
+
seam = LlmCostTracker::Integrations::RubyLlm.blocking_seam(self, :record_image)
|
|
212
|
+
LlmCostTracker::Integrations::RubyLlm.wrap_blocking(args, kwargs, **seam) { super }
|
|
210
213
|
end
|
|
211
214
|
|
|
212
215
|
def moderate(*args, **kwargs)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
private
|
|
217
|
-
|
|
218
|
-
def measure(args, kwargs, recorder:, **extras)
|
|
219
|
-
request = RubyLlm.request_params(args, kwargs)
|
|
220
|
-
RubyLlm.enforce_budget!(request: request)
|
|
221
|
-
started_at = LlmCostTracker::Timing.now_monotonic
|
|
222
|
-
response = yield
|
|
223
|
-
RubyLlm.public_send(
|
|
224
|
-
recorder, self, response,
|
|
225
|
-
request: request,
|
|
226
|
-
latency_ms: LlmCostTracker::Timing.elapsed_ms(started_at),
|
|
227
|
-
**extras
|
|
228
|
-
)
|
|
229
|
-
response
|
|
216
|
+
seam = LlmCostTracker::Integrations::RubyLlm.blocking_seam(self, :record_moderation)
|
|
217
|
+
LlmCostTracker::Integrations::RubyLlm.wrap_blocking(args, kwargs, **seam) { super }
|
|
230
218
|
end
|
|
231
219
|
end
|
|
232
220
|
end
|
|
@@ -1,41 +1,50 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "active_support/core_ext/string/inflections"
|
|
4
|
+
require_relative "check"
|
|
3
5
|
require_relative "errors"
|
|
4
|
-
require_relative "logging"
|
|
5
6
|
|
|
6
7
|
module LlmCostTracker
|
|
7
8
|
module Integrations
|
|
8
9
|
autoload :Base, "llm_cost_tracker/integrations/base"
|
|
9
|
-
autoload :Openai, "llm_cost_tracker/integrations/openai"
|
|
10
|
-
autoload :Anthropic, "llm_cost_tracker/integrations/anthropic"
|
|
11
|
-
autoload :RubyLlm, "llm_cost_tracker/integrations/ruby_llm"
|
|
12
10
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
11
|
+
Dir.glob(File.join(__dir__, "integrations", "*.rb")).each do |path|
|
|
12
|
+
basename = File.basename(path, ".rb")
|
|
13
|
+
next if basename == "base"
|
|
16
14
|
|
|
17
|
-
|
|
15
|
+
autoload basename.camelize.to_sym, "llm_cost_tracker/integrations/#{basename}"
|
|
16
|
+
end
|
|
18
17
|
|
|
19
|
-
def install!(names = LlmCostTracker.configuration.instrumented_integrations)
|
|
18
|
+
def self.install!(names = LlmCostTracker.configuration.instrumented_integrations)
|
|
20
19
|
normalized = normalize(names)
|
|
21
20
|
warn_double_instrumentation(normalized)
|
|
22
|
-
normalized.each
|
|
21
|
+
normalized.each do |name|
|
|
22
|
+
integration = fetch(name)
|
|
23
|
+
next integration.install if integration
|
|
24
|
+
|
|
25
|
+
Logging.warn("Unknown integration: #{name.inspect}. Known: #{self.names.map(&:inspect).join(', ')}")
|
|
26
|
+
end
|
|
23
27
|
end
|
|
24
28
|
|
|
25
|
-
def checks(names = LlmCostTracker.configuration.instrumented_integrations)
|
|
26
|
-
return [
|
|
29
|
+
def self.checks(names = LlmCostTracker.configuration.instrumented_integrations)
|
|
30
|
+
return [Check.new(:ok, "integrations", "no SDK integrations enabled")] if names.empty?
|
|
27
31
|
|
|
28
|
-
normalize(names).map
|
|
32
|
+
normalize(names).map do |name|
|
|
33
|
+
integration = fetch(name)
|
|
34
|
+
next integration.status if integration
|
|
35
|
+
|
|
36
|
+
Check.new(:warn, name.to_s, "unknown integration; check your config.instrument(...) call")
|
|
37
|
+
end
|
|
29
38
|
end
|
|
30
39
|
|
|
31
|
-
def normalize(names)
|
|
40
|
+
def self.normalize(names)
|
|
32
41
|
Array(names).flatten.uniq
|
|
33
42
|
end
|
|
34
43
|
|
|
35
|
-
def warn_double_instrumentation(names)
|
|
44
|
+
def self.warn_double_instrumentation(names)
|
|
36
45
|
return unless names.include?(:ruby_llm)
|
|
37
46
|
|
|
38
|
-
overlapping = names
|
|
47
|
+
overlapping = names - [:ruby_llm]
|
|
39
48
|
return if overlapping.empty?
|
|
40
49
|
|
|
41
50
|
Logging.warn(
|
|
@@ -45,18 +54,16 @@ module LlmCostTracker
|
|
|
45
54
|
)
|
|
46
55
|
end
|
|
47
56
|
|
|
48
|
-
def fetch(name)
|
|
49
|
-
const_name =
|
|
50
|
-
unless const_name
|
|
51
|
-
|
|
52
|
-
"Unknown integration: #{name.inspect}. Use one of: #{names.join(', ')}"
|
|
53
|
-
end
|
|
57
|
+
def self.fetch(name)
|
|
58
|
+
const_name = name.to_s.camelize
|
|
59
|
+
return nil unless const_name.match?(/\A[A-Z]\w*\z/)
|
|
60
|
+
return nil unless const_defined?(const_name, false)
|
|
54
61
|
|
|
55
|
-
const_get(const_name)
|
|
62
|
+
const_get(const_name, false)
|
|
56
63
|
end
|
|
57
64
|
|
|
58
|
-
def names
|
|
59
|
-
|
|
65
|
+
def self.names
|
|
66
|
+
constants(false).reject { |c| c == :Base }.map { |c| c.to_s.underscore.to_sym }.sort
|
|
60
67
|
end
|
|
61
68
|
end
|
|
62
69
|
end
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "bigdecimal"
|
|
3
|
+
require "bigdecimal/util"
|
|
4
4
|
|
|
5
5
|
require_relative "../period"
|
|
6
6
|
|
|
@@ -20,66 +20,51 @@ module LlmCostTracker
|
|
|
20
20
|
def totals
|
|
21
21
|
return {} if periods.empty?
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
values = periods.to_h { |period| [period, BigDecimal("0")] }
|
|
24
|
+
period_by_name = periods.to_h { |period| [period.to_s, period] }
|
|
25
|
+
LlmCostTracker::Call.find_by_sql(union_sql).each do |row|
|
|
26
|
+
values[period_by_name.fetch(row.period_key)] = row.total_cost.to_d
|
|
27
|
+
end
|
|
28
|
+
values
|
|
24
29
|
end
|
|
25
30
|
|
|
26
31
|
private
|
|
27
32
|
|
|
28
33
|
attr_reader :periods, :time
|
|
29
34
|
|
|
30
|
-
def
|
|
31
|
-
|
|
32
|
-
period_by_name = periods.to_h { |period| [period.name, period] }
|
|
33
|
-
sql = periods.map { |period| snapshot_select(period) }.join(" UNION ALL ")
|
|
34
|
-
LlmCostTracker::Call.find_by_sql(sql).each do |row|
|
|
35
|
-
period = period_by_name.fetch(row.period_key)
|
|
36
|
-
values[period] = BigDecimal(row.total_cost.to_s)
|
|
37
|
-
end
|
|
38
|
-
values
|
|
35
|
+
def union_sql
|
|
36
|
+
periods.map { |period| period_select(period) }.join(" UNION ALL ")
|
|
39
37
|
end
|
|
40
38
|
|
|
41
|
-
def
|
|
39
|
+
def period_select(period)
|
|
42
40
|
start = Period.range_start(period, time)
|
|
43
|
-
components = [
|
|
44
|
-
components <<
|
|
45
|
-
"SELECT #{
|
|
46
|
-
"(#{components.join(') + (')}) AS total_cost"
|
|
41
|
+
components = ["(#{recorded_sql(period, start)})"]
|
|
42
|
+
components << "(#{pending_sql(start)})" if Ingestion.async?
|
|
43
|
+
"SELECT #{quote(period.to_s)} AS period_key, #{components.join(' + ')} AS total_cost"
|
|
47
44
|
end
|
|
48
45
|
|
|
49
|
-
def
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
46
|
+
def recorded_sql(period, start)
|
|
47
|
+
calls = "COALESCE(#{sum_sql(LlmCostTracker::Call.between(start, time))}, 0)"
|
|
48
|
+
return calls unless LlmCostTracker.configuration.cache_rollups
|
|
49
|
+
|
|
50
|
+
rollup = "COALESCE(#{sum_sql(rollup_scope(period))}, 0)"
|
|
51
|
+
"GREATEST(#{rollup}, #{calls})"
|
|
55
52
|
end
|
|
56
53
|
|
|
57
|
-
def
|
|
58
|
-
|
|
59
|
-
"(SELECT SUM(total_cost) FROM #{table} " \
|
|
60
|
-
"WHERE period = #{connection.quote(Period::PERIODS.fetch(period))} " \
|
|
61
|
-
"AND period_start = #{connection.quote(Period.bucket(period, time))})"
|
|
54
|
+
def pending_sql(start)
|
|
55
|
+
"COALESCE(#{sum_sql(Ingestion::InboxEntry.pending.where(tracked_at: start..time))}, 0)"
|
|
62
56
|
end
|
|
63
57
|
|
|
64
|
-
def
|
|
65
|
-
|
|
66
|
-
tracked_at = connection.quote_column_name("tracked_at")
|
|
67
|
-
"(SELECT SUM(total_cost) FROM #{table} " \
|
|
68
|
-
"WHERE #{tracked_at} BETWEEN #{connection.quote(start)} AND #{connection.quote(time)})"
|
|
58
|
+
def rollup_scope(period)
|
|
59
|
+
LlmCostTracker::CallRollup.where(period: period.to_s, period_start: Period.bucket(period, time))
|
|
69
60
|
end
|
|
70
61
|
|
|
71
|
-
def
|
|
72
|
-
|
|
73
|
-
total_cost = connection.quote_column_name("total_cost")
|
|
74
|
-
tracked_at = connection.quote_column_name("tracked_at")
|
|
75
|
-
attempts = connection.quote_column_name("attempts")
|
|
76
|
-
"COALESCE((SELECT SUM(#{total_cost}) FROM #{table} " \
|
|
77
|
-
"WHERE #{attempts} < #{Ingestion::InboxEntry::MAX_ATTEMPTS_BEFORE_QUARANTINE} " \
|
|
78
|
-
"AND #{tracked_at} BETWEEN #{connection.quote(start)} AND #{connection.quote(time)}), 0)"
|
|
62
|
+
def sum_sql(scope)
|
|
63
|
+
"(#{scope.select('SUM(total_cost)').to_sql})"
|
|
79
64
|
end
|
|
80
65
|
|
|
81
|
-
def
|
|
82
|
-
LlmCostTracker::Call.connection
|
|
66
|
+
def quote(value)
|
|
67
|
+
LlmCostTracker::Call.connection.quote(value)
|
|
83
68
|
end
|
|
84
69
|
end
|
|
85
70
|
end
|
|
@@ -3,18 +3,13 @@
|
|
|
3
3
|
module LlmCostTracker
|
|
4
4
|
module Ledger
|
|
5
5
|
module Period
|
|
6
|
-
PERIODS =
|
|
7
|
-
month: "month",
|
|
8
|
-
day: "day"
|
|
9
|
-
}.freeze
|
|
6
|
+
PERIODS = %i[month day].freeze
|
|
10
7
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def valid_keys(periods)
|
|
14
|
-
periods.select { |period| PERIODS.key?(period) }
|
|
8
|
+
def self.valid_keys(periods)
|
|
9
|
+
PERIODS & periods
|
|
15
10
|
end
|
|
16
11
|
|
|
17
|
-
def range_start(period, time)
|
|
12
|
+
def self.range_start(period, time)
|
|
18
13
|
utc_time = time.to_time.utc
|
|
19
14
|
|
|
20
15
|
case period
|
|
@@ -23,7 +18,7 @@ module LlmCostTracker
|
|
|
23
18
|
end
|
|
24
19
|
end
|
|
25
20
|
|
|
26
|
-
def bucket(period, time)
|
|
21
|
+
def self.bucket(period, time)
|
|
27
22
|
range_start(period, time).to_date
|
|
28
23
|
end
|
|
29
24
|
end
|