llm_cost_tracker 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +136 -0
- data/README.md +14 -6
- data/app/assets/llm_cost_tracker/application.css +65 -5
- data/app/controllers/llm_cost_tracker/application_controller.rb +25 -33
- data/app/controllers/llm_cost_tracker/assets_controller.rb +1 -1
- data/app/controllers/llm_cost_tracker/calls_controller.rb +21 -11
- data/app/controllers/llm_cost_tracker/data_quality_controller.rb +4 -0
- data/app/controllers/llm_cost_tracker/reconciliation_controller.rb +106 -0
- data/app/controllers/llm_cost_tracker/tags_controller.rb +15 -1
- data/app/helpers/llm_cost_tracker/application_helper.rb +11 -1
- data/app/helpers/llm_cost_tracker/inline_style_helper.rb +28 -0
- data/app/helpers/llm_cost_tracker/reconciliation_helper.rb +13 -0
- data/app/helpers/llm_cost_tracker/token_usage_helper.rb +5 -1
- data/app/models/llm_cost_tracker/call.rb +0 -3
- data/app/models/llm_cost_tracker/call_line_item.rb +1 -5
- data/app/models/llm_cost_tracker/call_rollup.rb +0 -3
- data/app/models/llm_cost_tracker/call_tag.rb +0 -4
- data/app/models/llm_cost_tracker/ingestion/inbox_entry.rb +0 -4
- data/app/models/llm_cost_tracker/ingestion/lease.rb +0 -3
- data/app/models/llm_cost_tracker/provider_invoice.rb +7 -3
- data/app/models/llm_cost_tracker/provider_invoice_import.rb +29 -0
- data/app/services/llm_cost_tracker/dashboard/data_quality.rb +33 -4
- data/app/services/llm_cost_tracker/dashboard/filter.rb +6 -4
- data/app/services/llm_cost_tracker/dashboard/setup_state.rb +110 -0
- data/app/views/layouts/llm_cost_tracker/application.html.erb +6 -1
- data/app/views/llm_cost_tracker/calls/show.html.erb +26 -41
- data/app/views/llm_cost_tracker/dashboard/index.html.erb +9 -9
- data/app/views/llm_cost_tracker/data_quality/index.html.erb +92 -53
- data/app/views/llm_cost_tracker/reconciliation/index.html.erb +183 -0
- data/app/views/llm_cost_tracker/shared/_bar.html.erb +1 -1
- data/app/views/llm_cost_tracker/shared/_filters.html.erb +3 -0
- data/app/views/llm_cost_tracker/shared/_metric_stack.html.erb +1 -1
- data/app/views/llm_cost_tracker/tags/show.html.erb +60 -0
- data/config/routes.rb +3 -2
- data/lib/llm_cost_tracker/billing/components.rb +45 -3
- data/lib/llm_cost_tracker/billing/components.yml +71 -0
- data/lib/llm_cost_tracker/billing/cost_status.rb +21 -25
- data/lib/llm_cost_tracker/billing/line_item.rb +16 -50
- data/lib/llm_cost_tracker/budget.rb +31 -7
- data/lib/llm_cost_tracker/capture/stream_collector.rb +113 -34
- data/lib/llm_cost_tracker/capture/stream_tracker.rb +40 -5
- data/lib/llm_cost_tracker/configuration.rb +72 -17
- data/lib/llm_cost_tracker/doctor/capture_verifier.rb +1 -1
- data/lib/llm_cost_tracker/doctor/cost_drift_check.rb +2 -0
- data/lib/llm_cost_tracker/doctor/ingestion_check.rb +30 -4
- data/lib/llm_cost_tracker/doctor/invoice_reconciliation_check.rb +164 -0
- data/lib/llm_cost_tracker/doctor/legacy_audit_check.rb +0 -2
- data/lib/llm_cost_tracker/doctor/legacy_billing_status_check.rb +0 -2
- data/lib/llm_cost_tracker/doctor/schema_check.rb +5 -2
- data/lib/llm_cost_tracker/doctor.rb +72 -14
- data/lib/llm_cost_tracker/engine.rb +8 -0
- data/lib/llm_cost_tracker/errors.rb +3 -2
- data/lib/llm_cost_tracker/event.rb +48 -1
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/async_ingestion_generator.rb +43 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/call_rollups_generator.rb +43 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/install_generator.rb +17 -26
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/reconciliation_generator.rb +34 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_async_ingestion.rb.erb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_call_rollups.rb.erb +15 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_calls.rb.erb +5 -58
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/create_llm_cost_tracker_reconciliation.rb.erb +60 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/initializer.rb.erb +35 -25
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_rollups_provider.rb.erb +35 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_call_tags_key_value_index.rb.erb +32 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_image_tokens.rb.erb +18 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoice_imports_provider.rb.erb +32 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/templates/upgrade_provider_invoices_metadata_index.rb.erb +25 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_rollups_provider_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_call_tags_key_value_index_generator.rb +30 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_image_tokens_generator.rb +29 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoice_imports_provider_generator.rb +31 -0
- data/lib/llm_cost_tracker/generators/llm_cost_tracker/upgrade_provider_invoices_metadata_index_generator.rb +31 -0
- data/lib/llm_cost_tracker/ingestion/batch.rb +5 -2
- data/lib/llm_cost_tracker/ingestion/inbox.rb +3 -25
- data/lib/llm_cost_tracker/ingestion/pool.rb +44 -0
- data/lib/llm_cost_tracker/ingestion/worker.rb +28 -34
- data/lib/llm_cost_tracker/ingestion.rb +48 -11
- data/lib/llm_cost_tracker/integrations/anthropic.rb +31 -26
- data/lib/llm_cost_tracker/integrations/base.rb +35 -15
- data/lib/llm_cost_tracker/integrations/openai.rb +345 -84
- data/lib/llm_cost_tracker/integrations/ruby_llm.rb +111 -14
- data/lib/llm_cost_tracker/integrations.rb +33 -14
- data/lib/llm_cost_tracker/ledger/period/totals.rb +25 -7
- data/lib/llm_cost_tracker/ledger/rollups.rb +22 -17
- data/lib/llm_cost_tracker/ledger/schema/call_line_items.rb +41 -1
- data/lib/llm_cost_tracker/ledger/schema/call_rollups.rb +16 -6
- data/lib/llm_cost_tracker/ledger/schema/call_tags.rb +28 -2
- data/lib/llm_cost_tracker/ledger/schema/calls.rb +2 -4
- data/lib/llm_cost_tracker/ledger/schema/ingestion_inbox_entries.rb +57 -0
- data/lib/llm_cost_tracker/ledger/schema/ingestion_leases.rb +52 -0
- data/lib/llm_cost_tracker/ledger/schema/provider_invoice_imports.rb +56 -0
- data/lib/llm_cost_tracker/ledger/schema/provider_invoices.rb +28 -13
- data/lib/llm_cost_tracker/ledger/store.rb +34 -31
- data/lib/llm_cost_tracker/ledger/tags/encoding.rb +37 -0
- data/lib/llm_cost_tracker/ledger/tags/query.rb +2 -2
- data/lib/llm_cost_tracker/ledger.rb +2 -1
- data/lib/llm_cost_tracker/logging.rb +0 -4
- data/lib/llm_cost_tracker/masking.rb +39 -0
- data/lib/llm_cost_tracker/middleware/faraday.rb +120 -33
- data/lib/llm_cost_tracker/parsers/anthropic.rb +36 -28
- data/lib/llm_cost_tracker/parsers/azure.rb +46 -0
- data/lib/llm_cost_tracker/parsers/base.rb +53 -43
- data/lib/llm_cost_tracker/parsers/gemini.rb +24 -22
- data/lib/llm_cost_tracker/parsers/openai.rb +20 -38
- data/lib/llm_cost_tracker/parsers/openai_compatible.rb +26 -39
- data/lib/llm_cost_tracker/parsers/openai_service_charges.rb +81 -13
- data/lib/llm_cost_tracker/parsers/openai_usage.rb +126 -59
- data/lib/llm_cost_tracker/parsers.rb +31 -4
- data/lib/llm_cost_tracker/prices.json +572 -493
- data/lib/llm_cost_tracker/pricing/backfill.rb +140 -0
- data/lib/llm_cost_tracker/pricing/effective_prices.rb +7 -40
- data/lib/llm_cost_tracker/pricing/estimator.rb +33 -0
- data/lib/llm_cost_tracker/pricing/explainer.rb +4 -1
- data/lib/llm_cost_tracker/pricing/lookup.rb +73 -5
- data/lib/llm_cost_tracker/pricing/mode.rb +76 -0
- data/lib/llm_cost_tracker/pricing/registry.rb +3 -8
- data/lib/llm_cost_tracker/pricing/service_charges.rb +14 -12
- data/lib/llm_cost_tracker/pricing/{sync_change_printer.rb → sync/change_printer.rb} +3 -3
- data/lib/llm_cost_tracker/pricing/sync/registry_writer.rb +62 -1
- data/lib/llm_cost_tracker/pricing/sync.rb +4 -10
- data/lib/llm_cost_tracker/pricing/unknown.rb +5 -2
- data/lib/llm_cost_tracker/pricing.rb +117 -44
- data/lib/llm_cost_tracker/providers/anthropic/tier_classification.rb +22 -0
- data/lib/llm_cost_tracker/providers/azure/hosts.rb +17 -0
- data/lib/llm_cost_tracker/providers/gemini/model_families.rb +17 -0
- data/lib/llm_cost_tracker/providers/openai/hosts.rb +35 -0
- data/lib/llm_cost_tracker/providers/openai/model_families.rb +51 -0
- data/lib/llm_cost_tracker/railtie.rb +8 -0
- data/lib/llm_cost_tracker/reconcile_tasks.rb +134 -0
- data/lib/llm_cost_tracker/reconciliation/diff.rb +409 -0
- data/lib/llm_cost_tracker/reconciliation/diff_result.rb +44 -0
- data/lib/llm_cost_tracker/reconciliation/import_result.rb +19 -0
- data/lib/llm_cost_tracker/reconciliation/importer.rb +254 -0
- data/lib/llm_cost_tracker/reconciliation/sources/anthropic_usage.rb +172 -0
- data/lib/llm_cost_tracker/reconciliation/sources/fingerprint.rb +20 -0
- data/lib/llm_cost_tracker/reconciliation/sources/openai_usage.rb +142 -0
- data/lib/llm_cost_tracker/reconciliation.rb +118 -0
- data/lib/llm_cost_tracker/report/data.rb +4 -1
- data/lib/llm_cost_tracker/report.rb +0 -4
- data/lib/llm_cost_tracker/retention.rb +31 -6
- data/lib/llm_cost_tracker/tags/context.rb +3 -4
- data/lib/llm_cost_tracker/tags/sanitizer.rb +73 -21
- data/lib/llm_cost_tracker/token_usage.rb +14 -2
- data/lib/llm_cost_tracker/tracker.rb +41 -55
- data/lib/llm_cost_tracker/version.rb +1 -1
- data/lib/llm_cost_tracker.rb +19 -14
- data/lib/tasks/llm_cost_tracker.rake +41 -4
- metadata +49 -3
- data/lib/llm_cost_tracker/usage_capture.rb +0 -58
|
@@ -68,6 +68,26 @@
|
|
|
68
68
|
token_key: audio_output_tokens
|
|
69
69
|
cost_key: audio_output_cost
|
|
70
70
|
|
|
71
|
+
- key: image_input
|
|
72
|
+
kind: image_token
|
|
73
|
+
direction: input
|
|
74
|
+
modality: image
|
|
75
|
+
cache_state: none
|
|
76
|
+
unit: token
|
|
77
|
+
category: token
|
|
78
|
+
token_key: image_input_tokens
|
|
79
|
+
cost_key: image_input_cost
|
|
80
|
+
|
|
81
|
+
- key: image_output
|
|
82
|
+
kind: image_token
|
|
83
|
+
direction: output
|
|
84
|
+
modality: image
|
|
85
|
+
cache_state: none
|
|
86
|
+
unit: token
|
|
87
|
+
category: token
|
|
88
|
+
token_key: image_output_tokens
|
|
89
|
+
cost_key: image_output_cost
|
|
90
|
+
|
|
71
91
|
- key: web_search_request
|
|
72
92
|
kind: web_search_request
|
|
73
93
|
direction: neither
|
|
@@ -75,6 +95,34 @@
|
|
|
75
95
|
cache_state: none
|
|
76
96
|
unit: request
|
|
77
97
|
category: tool
|
|
98
|
+
rate_basis: per_1k_requests
|
|
99
|
+
|
|
100
|
+
- key: web_search_preview_request_reasoning
|
|
101
|
+
kind: web_search_preview_request_reasoning
|
|
102
|
+
direction: neither
|
|
103
|
+
modality: text
|
|
104
|
+
cache_state: none
|
|
105
|
+
unit: request
|
|
106
|
+
category: tool
|
|
107
|
+
rate_basis: per_1k_requests
|
|
108
|
+
|
|
109
|
+
- key: web_search_preview_request_non_reasoning
|
|
110
|
+
kind: web_search_preview_request_non_reasoning
|
|
111
|
+
direction: neither
|
|
112
|
+
modality: text
|
|
113
|
+
cache_state: none
|
|
114
|
+
unit: request
|
|
115
|
+
category: tool
|
|
116
|
+
rate_basis: per_1k_requests
|
|
117
|
+
|
|
118
|
+
- key: web_fetch_request
|
|
119
|
+
kind: web_fetch_request
|
|
120
|
+
direction: neither
|
|
121
|
+
modality: text
|
|
122
|
+
cache_state: none
|
|
123
|
+
unit: request
|
|
124
|
+
category: tool
|
|
125
|
+
rate_basis: per_1k_requests
|
|
78
126
|
|
|
79
127
|
- key: file_search_call
|
|
80
128
|
kind: file_search_call
|
|
@@ -83,6 +131,7 @@
|
|
|
83
131
|
cache_state: none
|
|
84
132
|
unit: request
|
|
85
133
|
category: tool
|
|
134
|
+
rate_basis: per_1k_requests
|
|
86
135
|
|
|
87
136
|
- key: container_session
|
|
88
137
|
kind: container_session
|
|
@@ -91,6 +140,7 @@
|
|
|
91
140
|
cache_state: none
|
|
92
141
|
unit: session
|
|
93
142
|
category: runtime
|
|
143
|
+
rate_basis: per_session
|
|
94
144
|
|
|
95
145
|
- key: code_execution_request
|
|
96
146
|
kind: code_execution_request
|
|
@@ -99,6 +149,7 @@
|
|
|
99
149
|
cache_state: none
|
|
100
150
|
unit: request
|
|
101
151
|
category: runtime
|
|
152
|
+
rate_basis: per_1k_requests
|
|
102
153
|
|
|
103
154
|
- key: code_execution_hour
|
|
104
155
|
kind: code_execution_hour
|
|
@@ -107,6 +158,7 @@
|
|
|
107
158
|
cache_state: none
|
|
108
159
|
unit: hour
|
|
109
160
|
category: runtime
|
|
161
|
+
rate_basis: per_hour
|
|
110
162
|
|
|
111
163
|
- key: grounding_request
|
|
112
164
|
kind: grounding_request
|
|
@@ -115,3 +167,22 @@
|
|
|
115
167
|
cache_state: none
|
|
116
168
|
unit: request
|
|
117
169
|
category: tool
|
|
170
|
+
rate_basis: per_1k_requests
|
|
171
|
+
|
|
172
|
+
- key: text_to_speech_character
|
|
173
|
+
kind: text_to_speech_character
|
|
174
|
+
direction: output
|
|
175
|
+
modality: audio
|
|
176
|
+
cache_state: none
|
|
177
|
+
unit: character
|
|
178
|
+
category: tool
|
|
179
|
+
rate_basis: per_million_characters
|
|
180
|
+
|
|
181
|
+
- key: mcp_call
|
|
182
|
+
kind: mcp_call
|
|
183
|
+
direction: neither
|
|
184
|
+
modality: text
|
|
185
|
+
cache_state: none
|
|
186
|
+
unit: request
|
|
187
|
+
category: tool
|
|
188
|
+
rate_basis: per_request
|
|
@@ -10,36 +10,32 @@ module LlmCostTracker
|
|
|
10
10
|
PARTIAL = "partial"
|
|
11
11
|
UNKNOWN = "unknown"
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
return UNKNOWN if usage_source == :unknown
|
|
13
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
14
|
+
def self.call(token_usage:, usage_source:, token_cost:, service_line_items:, total_cost:,
|
|
15
|
+
token_pricing_partial: false)
|
|
16
|
+
return UNKNOWN if usage_source == :unknown
|
|
18
17
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
service_line_items.each do |line_item|
|
|
26
|
-
next unless line_item.billable?
|
|
18
|
+
token_billable = token_usage.priced_quantities.any? { |_key, quantity| quantity.positive? }
|
|
19
|
+
service_billable = false
|
|
20
|
+
service_priced = false
|
|
21
|
+
service_unpriced = false
|
|
22
|
+
service_line_items.each do |line_item|
|
|
23
|
+
next unless line_item.billable?
|
|
27
24
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
25
|
+
service_billable = true
|
|
26
|
+
service_priced ||= line_item.priced?
|
|
27
|
+
service_unpriced ||= line_item.unpriced?
|
|
28
|
+
break if service_priced && service_unpriced
|
|
29
|
+
end
|
|
33
30
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
31
|
+
priced = (token_billable && !token_cost.nil?) || service_priced || (!token_billable && !service_billable)
|
|
32
|
+
unpriced = (token_billable && (token_cost.nil? || token_pricing_partial)) || service_unpriced
|
|
33
|
+
return UNKNOWN if unpriced && !priced
|
|
34
|
+
return PARTIAL if unpriced
|
|
38
35
|
|
|
39
|
-
|
|
40
|
-
end
|
|
41
|
-
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
36
|
+
total_cost.nil? || total_cost.zero? ? FREE : COMPLETE
|
|
42
37
|
end
|
|
38
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
43
39
|
end
|
|
44
40
|
end
|
|
45
41
|
end
|
|
@@ -30,28 +30,11 @@ module LlmCostTracker
|
|
|
30
30
|
|
|
31
31
|
class LineItem
|
|
32
32
|
USD = "USD"
|
|
33
|
-
OPTIONAL_ATTRIBUTES = %i[
|
|
34
|
-
pricing_basis
|
|
35
|
-
price_key
|
|
36
|
-
price_source
|
|
37
|
-
price_source_version
|
|
38
|
-
provider_field
|
|
39
|
-
provider_item_id
|
|
40
|
-
].freeze
|
|
41
|
-
SYMBOL_ATTRIBUTES = %i[
|
|
42
|
-
kind
|
|
43
|
-
direction
|
|
44
|
-
modality
|
|
45
|
-
cache_state
|
|
46
|
-
unit
|
|
47
|
-
pricing_basis
|
|
48
|
-
price_source
|
|
49
|
-
].freeze
|
|
50
33
|
|
|
51
34
|
def self.build(attributes)
|
|
52
35
|
attributes = attributes.to_h
|
|
53
36
|
component = component_for(attributes)
|
|
54
|
-
|
|
37
|
+
new(
|
|
55
38
|
kind: symbol_or_nil(attributes[:kind]) || component&.kind,
|
|
56
39
|
direction: symbol_or_nil(attributes[:direction]) || component&.direction,
|
|
57
40
|
modality: symbol_or_nil(attributes[:modality]) || component&.modality,
|
|
@@ -63,38 +46,30 @@ module LlmCostTracker
|
|
|
63
46
|
cost: decimal_or_nil(attributes[:cost]),
|
|
64
47
|
currency: attributes[:currency] || USD,
|
|
65
48
|
cost_status: cost_status_for(attributes),
|
|
49
|
+
pricing_basis: symbol_or_nil(attributes[:pricing_basis]),
|
|
50
|
+
price_key: attributes[:price_key],
|
|
51
|
+
price_source: symbol_or_nil(attributes[:price_source]),
|
|
52
|
+
price_source_version: attributes[:price_source_version],
|
|
53
|
+
provider_field: attributes[:provider_field],
|
|
54
|
+
provider_item_id: attributes[:provider_item_id],
|
|
66
55
|
details: attributes[:details] || {}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
new(**normalized)
|
|
56
|
+
)
|
|
70
57
|
end
|
|
71
58
|
|
|
72
59
|
def self.from_token_usage(token_usage)
|
|
73
60
|
return [] unless token_usage
|
|
74
61
|
|
|
75
|
-
|
|
76
|
-
quantity = token_usage.public_send(component.token_key)
|
|
62
|
+
token_usage.priced_quantities.filter_map do |key, quantity|
|
|
77
63
|
next unless quantity.positive?
|
|
78
64
|
|
|
79
|
-
|
|
65
|
+
component = Components::BY_KEY.fetch(key)
|
|
66
|
+
build(
|
|
80
67
|
kind: component.kind,
|
|
81
68
|
direction: component.direction,
|
|
82
69
|
modality: component.modality,
|
|
83
70
|
cache_state: component.cache_state,
|
|
84
|
-
quantity:
|
|
85
|
-
unit: component.unit
|
|
86
|
-
rate_amount: nil,
|
|
87
|
-
rate_quantity: BigDecimal("1"),
|
|
88
|
-
cost: nil,
|
|
89
|
-
currency: USD,
|
|
90
|
-
cost_status: CostStatus::UNKNOWN,
|
|
91
|
-
pricing_basis: nil,
|
|
92
|
-
price_key: nil,
|
|
93
|
-
price_source: nil,
|
|
94
|
-
price_source_version: nil,
|
|
95
|
-
provider_field: nil,
|
|
96
|
-
provider_item_id: nil,
|
|
97
|
-
details: {}
|
|
71
|
+
quantity: quantity,
|
|
72
|
+
unit: component.unit
|
|
98
73
|
)
|
|
99
74
|
end
|
|
100
75
|
end
|
|
@@ -119,7 +94,7 @@ module LlmCostTracker
|
|
|
119
94
|
def self.symbol_or_nil(value)
|
|
120
95
|
return nil if value.nil?
|
|
121
96
|
|
|
122
|
-
value.
|
|
97
|
+
value.to_s.to_sym
|
|
123
98
|
end
|
|
124
99
|
|
|
125
100
|
def self.decimal_or_nil(value)
|
|
@@ -132,16 +107,7 @@ module LlmCostTracker
|
|
|
132
107
|
decimal_or_nil(value) || BigDecimal("0")
|
|
133
108
|
end
|
|
134
109
|
|
|
135
|
-
|
|
136
|
-
OPTIONAL_ATTRIBUTES.to_h do |key|
|
|
137
|
-
value = attributes[key]
|
|
138
|
-
value = value.to_sym if value.is_a?(String) && SYMBOL_ATTRIBUTES.include?(key)
|
|
139
|
-
[key, value]
|
|
140
|
-
end
|
|
141
|
-
end
|
|
142
|
-
|
|
143
|
-
private_class_method :cost_status_for, :component_for, :symbol_or_nil, :decimal_or_nil, :decimal_or_zero,
|
|
144
|
-
:optional_attributes_for
|
|
110
|
+
private_class_method :cost_status_for, :component_for, :symbol_or_nil, :decimal_or_nil, :decimal_or_zero
|
|
145
111
|
|
|
146
112
|
def billable?
|
|
147
113
|
quantity.positive?
|
|
@@ -163,7 +129,7 @@ module LlmCostTracker
|
|
|
163
129
|
cost || BigDecimal("0")
|
|
164
130
|
end
|
|
165
131
|
|
|
166
|
-
def
|
|
132
|
+
def with_rate(rate)
|
|
167
133
|
rate_amount = rate.fetch(:amount)
|
|
168
134
|
rate_quantity = rate.fetch(:quantity)
|
|
169
135
|
applied_cost = (quantity / rate_quantity) * rate_amount
|
|
@@ -1,26 +1,35 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bigdecimal"
|
|
4
|
+
|
|
3
5
|
require_relative "logging"
|
|
4
6
|
require_relative "ledger"
|
|
7
|
+
require_relative "pricing/estimator"
|
|
5
8
|
|
|
6
9
|
module LlmCostTracker
|
|
7
10
|
class Budget
|
|
8
11
|
BUDGET_TYPE_TO_PERIOD = { monthly: :month, daily: :day }.freeze
|
|
9
12
|
|
|
10
13
|
class << self
|
|
11
|
-
def enforce!
|
|
14
|
+
def enforce!(provider: nil, model: nil, request: nil)
|
|
12
15
|
config = LlmCostTracker.configuration
|
|
13
16
|
return unless config.budget_exceeded_behavior == :block_requests
|
|
14
17
|
|
|
18
|
+
estimate = estimate_cost(provider: provider, model: model, request: request)
|
|
19
|
+
raise_per_call_pre_send(estimate, config.per_call_budget) if config.per_call_budget && estimate.positive?
|
|
20
|
+
|
|
15
21
|
budgets = { monthly: config.monthly_budget, daily: config.daily_budget }.compact
|
|
16
22
|
return if budgets.empty?
|
|
17
23
|
|
|
18
24
|
totals = totals_for(budgets.keys, time: Time.now.utc)
|
|
19
25
|
|
|
20
26
|
budgets.each do |budget_type, budget|
|
|
21
|
-
total = totals.fetch(budget_type)
|
|
27
|
+
total = totals.fetch(budget_type) + estimate
|
|
28
|
+
next unless total >= budget
|
|
22
29
|
|
|
23
|
-
|
|
30
|
+
raise BudgetExceededError.new(**budget_payload(
|
|
31
|
+
budget_type: budget_type, total: total, budget: budget, last_event: nil, stage: :pre_send
|
|
32
|
+
))
|
|
24
33
|
end
|
|
25
34
|
end
|
|
26
35
|
|
|
@@ -41,6 +50,20 @@ module LlmCostTracker
|
|
|
41
50
|
|
|
42
51
|
private
|
|
43
52
|
|
|
53
|
+
def estimate_cost(provider:, model:, request:)
|
|
54
|
+
return BigDecimal("0") unless provider && model && request
|
|
55
|
+
|
|
56
|
+
Pricing::Estimator.call(provider: provider, model: model, request: request) || BigDecimal("0")
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def raise_per_call_pre_send(estimate, budget)
|
|
60
|
+
return unless estimate >= budget
|
|
61
|
+
|
|
62
|
+
raise BudgetExceededError.new(**budget_payload(
|
|
63
|
+
budget_type: :per_call, total: estimate, budget: budget, last_event: nil, stage: :pre_send
|
|
64
|
+
))
|
|
65
|
+
end
|
|
66
|
+
|
|
44
67
|
def check_per_call_budget(event, config)
|
|
45
68
|
budget = config.per_call_budget
|
|
46
69
|
return unless budget
|
|
@@ -67,7 +90,8 @@ module LlmCostTracker
|
|
|
67
90
|
budget_type: budget_type,
|
|
68
91
|
total: total,
|
|
69
92
|
budget: budget,
|
|
70
|
-
last_event: last_event
|
|
93
|
+
last_event: last_event,
|
|
94
|
+
stage: :post_spend
|
|
71
95
|
)
|
|
72
96
|
|
|
73
97
|
if notify_exceeded?(config, budget_type: budget_type, total: total, budget: budget, last_event: last_event)
|
|
@@ -76,18 +100,18 @@ module LlmCostTracker
|
|
|
76
100
|
raise BudgetExceededError.new(**payload) if %i[raise block_requests].include?(config.budget_exceeded_behavior)
|
|
77
101
|
end
|
|
78
102
|
|
|
79
|
-
def budget_payload(budget_type:, total:, budget:, last_event:)
|
|
103
|
+
def budget_payload(budget_type:, total:, budget:, last_event:, stage:)
|
|
80
104
|
{
|
|
81
105
|
budget_type: budget_type,
|
|
82
106
|
total: total,
|
|
83
107
|
budget: budget,
|
|
84
|
-
last_event: last_event
|
|
108
|
+
last_event: last_event,
|
|
109
|
+
stage: stage
|
|
85
110
|
}
|
|
86
111
|
end
|
|
87
112
|
|
|
88
113
|
def notify_exceeded?(config, budget_type:, total:, budget:, last_event:)
|
|
89
114
|
return false unless config.on_budget_exceeded
|
|
90
|
-
return true unless config.budget_exceeded_behavior == :notify
|
|
91
115
|
return true unless last_event&.total_cost
|
|
92
116
|
return true if budget_type == :per_call
|
|
93
117
|
|
|
@@ -5,6 +5,7 @@ require "active_support/core_ext/object/deep_dup"
|
|
|
5
5
|
require "json"
|
|
6
6
|
|
|
7
7
|
require_relative "stream"
|
|
8
|
+
require_relative "../pricing/mode"
|
|
8
9
|
require_relative "../timing"
|
|
9
10
|
|
|
10
11
|
module LlmCostTracker
|
|
@@ -14,7 +15,7 @@ module LlmCostTracker
|
|
|
14
15
|
|
|
15
16
|
def initialize(provider:, model:, latency_ms: nil, provider_response_id: nil, provider_project_id: nil,
|
|
16
17
|
provider_api_key_id: nil, provider_workspace_id: nil, batch: nil, pricing_mode: nil,
|
|
17
|
-
metadata: {}, context_tags: nil)
|
|
18
|
+
metadata: {}, context_tags: nil, request: nil)
|
|
18
19
|
@provider = provider.to_s
|
|
19
20
|
@model = model
|
|
20
21
|
@latency_ms = latency_ms
|
|
@@ -26,12 +27,14 @@ module LlmCostTracker
|
|
|
26
27
|
@pricing_mode = pricing_mode
|
|
27
28
|
@metadata = (metadata || {}).deep_dup
|
|
28
29
|
@context_tags = (context_tags || LlmCostTracker::Tags::Context.tags).deep_dup
|
|
30
|
+
@request = request
|
|
29
31
|
@events = []
|
|
30
32
|
@captured_bytes = 0
|
|
31
33
|
@overflowed = false
|
|
32
34
|
@explicit_usage = nil
|
|
33
35
|
@started_at = LlmCostTracker::Timing.now_monotonic
|
|
34
36
|
@finished = false
|
|
37
|
+
@recording = false
|
|
35
38
|
@mutex = Mutex.new
|
|
36
39
|
end
|
|
37
40
|
|
|
@@ -66,7 +69,6 @@ module LlmCostTracker
|
|
|
66
69
|
ensure_open!
|
|
67
70
|
capture_event(data, type: type) unless data.nil?
|
|
68
71
|
end
|
|
69
|
-
self
|
|
70
72
|
end
|
|
71
73
|
|
|
72
74
|
def usage(input_tokens:, output_tokens:, **extra)
|
|
@@ -84,14 +86,22 @@ module LlmCostTracker
|
|
|
84
86
|
output_tokens: output_tokens
|
|
85
87
|
)
|
|
86
88
|
end
|
|
87
|
-
self
|
|
88
89
|
end
|
|
89
90
|
|
|
90
91
|
def finish!(errored: false)
|
|
91
|
-
snapshot =
|
|
92
|
-
|
|
92
|
+
snapshot = claim_recording_slot
|
|
93
|
+
return if snapshot.nil?
|
|
93
94
|
|
|
94
|
-
|
|
95
|
+
record_snapshot(snapshot, errored: errored)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
private
|
|
99
|
+
|
|
100
|
+
def claim_recording_slot
|
|
101
|
+
@mutex.synchronize do
|
|
102
|
+
return nil if @finished || @recording
|
|
103
|
+
|
|
104
|
+
@recording = true
|
|
95
105
|
pricing_mode = Pricing.normalize_mode(@pricing_mode)
|
|
96
106
|
{
|
|
97
107
|
events: @events.dup,
|
|
@@ -103,27 +113,50 @@ module LlmCostTracker
|
|
|
103
113
|
capture_dimensions: capture_dimensions(pricing_mode),
|
|
104
114
|
pricing_mode: pricing_mode,
|
|
105
115
|
metadata: @metadata.deep_dup,
|
|
106
|
-
context_tags: @context_tags.deep_dup
|
|
116
|
+
context_tags: @context_tags.deep_dup,
|
|
117
|
+
request: @request
|
|
107
118
|
}
|
|
108
119
|
end
|
|
120
|
+
end
|
|
109
121
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
122
|
+
def record_snapshot(snapshot, errored:)
|
|
123
|
+
save_succeeded = false
|
|
124
|
+
begin
|
|
125
|
+
event = build_event(snapshot)
|
|
126
|
+
provider_response_id = event.provider_response_id || snapshot[:provider_response_id]
|
|
127
|
+
event = event.with(provider_response_id: provider_response_id)
|
|
113
128
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
129
|
+
Tracker.record(
|
|
130
|
+
event: event,
|
|
131
|
+
latency_ms: snapshot[:latency_ms] || LlmCostTracker::Timing.elapsed_ms(@started_at),
|
|
132
|
+
pricing_mode: merge_pricing_modes(event.pricing_mode, snapshot[:pricing_mode]),
|
|
133
|
+
metadata: (errored ? { stream_errored: true } : {}).merge(snapshot[:metadata]),
|
|
134
|
+
context_tags: snapshot[:context_tags]
|
|
135
|
+
) { save_succeeded = true }
|
|
136
|
+
ensure
|
|
137
|
+
@mutex.synchronize do
|
|
138
|
+
@finished = save_succeeded
|
|
139
|
+
@recording = false
|
|
140
|
+
end
|
|
141
|
+
end
|
|
121
142
|
end
|
|
122
143
|
|
|
123
|
-
|
|
144
|
+
HOST_DERIVED_MODE_TOKENS = %i[data_residency].freeze
|
|
145
|
+
private_constant :HOST_DERIVED_MODE_TOKENS
|
|
146
|
+
|
|
147
|
+
def merge_pricing_modes(provider_mode, request_mode)
|
|
148
|
+
return Pricing.normalize_mode(request_mode) if provider_mode.to_s.strip.empty?
|
|
149
|
+
|
|
150
|
+
provider_tokens = Pricing::Mode.tokenize(provider_mode) - Pricing::STANDARD_MODE_VALUES
|
|
151
|
+
request_host_tokens = Pricing::Mode.tokenize(request_mode || "") & HOST_DERIVED_MODE_TOKENS
|
|
152
|
+
combined = provider_tokens | request_host_tokens
|
|
153
|
+
return nil if combined.empty?
|
|
154
|
+
|
|
155
|
+
Pricing.normalize_mode(combined.join("_"))
|
|
156
|
+
end
|
|
124
157
|
|
|
125
158
|
def capture_dimensions(pricing_mode)
|
|
126
|
-
batch = @batch.nil? ?
|
|
159
|
+
batch = @batch.nil? ? Event.batch_from_pricing_mode?(pricing_mode).presence : @batch
|
|
127
160
|
{
|
|
128
161
|
provider_project_id: @provider_project_id.to_s.strip.presence,
|
|
129
162
|
provider_api_key_id: @provider_api_key_id.to_s.strip.presence,
|
|
@@ -138,34 +171,44 @@ module LlmCostTracker
|
|
|
138
171
|
raise FrozenError, "can't modify finished LlmCostTracker::Capture::StreamCollector"
|
|
139
172
|
end
|
|
140
173
|
|
|
141
|
-
def
|
|
174
|
+
def build_event(snapshot)
|
|
142
175
|
return build_from_explicit_usage(snapshot) if snapshot[:explicit_usage]
|
|
176
|
+
return build_unknown_usage(snapshot) if snapshot[:overflowed]
|
|
143
177
|
|
|
144
|
-
|
|
178
|
+
event = Parsers.find_for_provider(@provider)&.parse_stream(
|
|
145
179
|
response_status: 200,
|
|
146
|
-
events: snapshot[:events]
|
|
180
|
+
events: snapshot[:events],
|
|
181
|
+
request_body: request_body_for(snapshot[:request])
|
|
147
182
|
)
|
|
148
|
-
if
|
|
149
|
-
model = present_model(
|
|
150
|
-
return
|
|
183
|
+
if event
|
|
184
|
+
model = present_model(event.model) || present_model(snapshot[:model]) || Event::UNKNOWN_MODEL
|
|
185
|
+
return event.with(provider: @provider, model: model, **snapshot.fetch(:capture_dimensions))
|
|
151
186
|
end
|
|
152
187
|
|
|
153
188
|
build_unknown_usage(snapshot)
|
|
154
189
|
end
|
|
155
190
|
|
|
191
|
+
def request_body_for(request)
|
|
192
|
+
return nil unless request
|
|
193
|
+
|
|
194
|
+
JSON.generate(request)
|
|
195
|
+
rescue StandardError
|
|
196
|
+
nil
|
|
197
|
+
end
|
|
198
|
+
|
|
156
199
|
def present_model(value)
|
|
157
200
|
return nil if value.nil?
|
|
158
201
|
|
|
159
202
|
string = value.to_s.presence
|
|
160
|
-
return nil if string.nil? || string ==
|
|
203
|
+
return nil if string.nil? || string == Event::UNKNOWN_MODEL
|
|
161
204
|
|
|
162
205
|
string
|
|
163
206
|
end
|
|
164
207
|
|
|
165
208
|
def build_from_explicit_usage(snapshot)
|
|
166
|
-
|
|
209
|
+
Event.build(
|
|
167
210
|
provider: @provider,
|
|
168
|
-
model: snapshot[:model] ||
|
|
211
|
+
model: snapshot[:model] || Event::UNKNOWN_MODEL,
|
|
169
212
|
token_usage: snapshot[:explicit_usage],
|
|
170
213
|
stream: true,
|
|
171
214
|
usage_source: :manual,
|
|
@@ -175,9 +218,9 @@ module LlmCostTracker
|
|
|
175
218
|
end
|
|
176
219
|
|
|
177
220
|
def build_unknown_usage(snapshot)
|
|
178
|
-
|
|
221
|
+
Event.build(
|
|
179
222
|
provider: @provider,
|
|
180
|
-
model: snapshot[:model] ||
|
|
223
|
+
model: snapshot[:model] || Event::UNKNOWN_MODEL,
|
|
181
224
|
token_usage: TokenUsage.build(input_tokens: 0, output_tokens: 0, total_tokens: 0),
|
|
182
225
|
stream: true,
|
|
183
226
|
usage_source: :unknown,
|
|
@@ -186,18 +229,54 @@ module LlmCostTracker
|
|
|
186
229
|
)
|
|
187
230
|
end
|
|
188
231
|
|
|
232
|
+
IGNORED_PAYLOAD_KEYS = %w[b64_json partial_image_b64].freeze
|
|
233
|
+
private_constant :IGNORED_PAYLOAD_KEYS
|
|
234
|
+
|
|
235
|
+
HEAVY_STRING_BYTES = 8 * 1024
|
|
236
|
+
private_constant :HEAVY_STRING_BYTES
|
|
237
|
+
|
|
189
238
|
def capture_event(data, type:)
|
|
190
|
-
event = { event: type, data: data }
|
|
191
|
-
size =
|
|
239
|
+
event = { event: type, data: strip_heavy_payload(data) }
|
|
240
|
+
size = approximate_bytesize(event)
|
|
192
241
|
if @captured_bytes + size <= Capture::Stream::LIMIT_BYTES
|
|
193
|
-
@events << event
|
|
242
|
+
@events << event
|
|
194
243
|
@captured_bytes += size
|
|
195
244
|
else
|
|
196
245
|
@overflowed = true
|
|
197
246
|
end
|
|
198
|
-
rescue
|
|
247
|
+
rescue TypeError, SystemStackError
|
|
199
248
|
@overflowed = true
|
|
200
249
|
end
|
|
250
|
+
|
|
251
|
+
def strip_heavy_payload(value)
|
|
252
|
+
case value
|
|
253
|
+
when Hash
|
|
254
|
+
value.each_with_object({}) do |(key, nested), out|
|
|
255
|
+
next if IGNORED_PAYLOAD_KEYS.include?(key.to_s)
|
|
256
|
+
|
|
257
|
+
out[key] = strip_heavy_payload(nested)
|
|
258
|
+
end
|
|
259
|
+
when Array
|
|
260
|
+
value.map { |nested| strip_heavy_payload(nested) }
|
|
261
|
+
when String
|
|
262
|
+
value.bytesize > HEAVY_STRING_BYTES ? "" : value
|
|
263
|
+
else
|
|
264
|
+
value
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
def approximate_bytesize(value)
|
|
269
|
+
case value
|
|
270
|
+
when Hash
|
|
271
|
+
value.sum { |key, nested| approximate_bytesize(key) + approximate_bytesize(nested) + 4 }
|
|
272
|
+
when Array
|
|
273
|
+
value.sum { |nested| approximate_bytesize(nested) + 2 }
|
|
274
|
+
when Numeric, true, false, nil
|
|
275
|
+
8
|
|
276
|
+
else
|
|
277
|
+
value.to_s.bytesize + 2
|
|
278
|
+
end
|
|
279
|
+
end
|
|
201
280
|
end
|
|
202
281
|
end
|
|
203
282
|
end
|