agentbill-sdk 5.0.1 → 7.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +5 -4
- data/CHANGELOG.md +29 -0
- data/examples/ollama_basic.rb +81 -0
- data/examples/perplexity_basic.rb +66 -0
- data/lib/agentbill/agents.rb +226 -0
- data/lib/agentbill/customers.rb +164 -0
- data/lib/agentbill/distributed.rb +109 -0
- data/lib/agentbill/exceptions.rb +84 -0
- data/lib/agentbill/ollama_wrapper.rb +153 -0
- data/lib/agentbill/orders.rb +283 -0
- data/lib/agentbill/perplexity_wrapper.rb +101 -0
- data/lib/agentbill/pricing.rb +52 -0
- data/lib/agentbill/signal_types.rb +179 -0
- data/lib/agentbill/signals.rb +199 -0
- data/lib/agentbill/tracer.rb +68 -11
- data/lib/agentbill/tracing.rb +343 -0
- data/lib/agentbill/version.rb +1 -1
- data/lib/agentbill/wrappers.rb +384 -0
- data/lib/agentbill.rb +252 -45
- metadata +16 -2
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
require 'net/http'
|
|
5
|
+
require 'json'
|
|
6
|
+
require 'uri'
|
|
7
|
+
require 'digest'
|
|
8
|
+
require 'time'
|
|
9
|
+
|
|
10
|
+
require_relative 'exceptions'
|
|
11
|
+
require_relative 'distributed'
|
|
12
|
+
require_relative 'validation'
|
|
13
|
+
|
|
14
|
+
module AgentBill
|
|
15
|
+
# Base class for AgentBill wrapper classes.
|
|
16
|
+
class BaseWrapper
|
|
17
|
+
BASE_URL = 'https://api.agentbill.io/functions/v1'
|
|
18
|
+
VERSION = '7.16.1'
|
|
19
|
+
|
|
20
|
+
attr_reader :provider_name
|
|
21
|
+
|
|
22
|
+
def initialize(api_key:, customer_id: nil, daily_budget: nil, monthly_budget: nil,
|
|
23
|
+
base_url: nil, debug: false, **kwargs)
|
|
24
|
+
Validation.validate_api_key(api_key)
|
|
25
|
+
Validation.validate_customer_id(customer_id) if customer_id
|
|
26
|
+
|
|
27
|
+
@api_key = api_key
|
|
28
|
+
@customer_id = customer_id
|
|
29
|
+
@daily_budget = daily_budget
|
|
30
|
+
@monthly_budget = monthly_budget
|
|
31
|
+
@base_url = base_url || BASE_URL
|
|
32
|
+
@debug = debug
|
|
33
|
+
@provider_kwargs = kwargs
|
|
34
|
+
@provider_name = 'unknown'
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
protected
|
|
38
|
+
|
|
39
|
+
def log(message)
|
|
40
|
+
puts "[AgentBill] #{message}" if @debug
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def validate_budget_sync(model, messages = nil)
|
|
44
|
+
payload = {
|
|
45
|
+
api_key: @api_key,
|
|
46
|
+
customer_id: @customer_id,
|
|
47
|
+
model: model,
|
|
48
|
+
daily_budget: @daily_budget,
|
|
49
|
+
monthly_budget: @monthly_budget
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
payload[:messages] = messages if messages
|
|
53
|
+
|
|
54
|
+
begin
|
|
55
|
+
url = URI("#{@base_url}/ai-cost-guard-router")
|
|
56
|
+
http = Net::HTTP.new(url.host, url.port)
|
|
57
|
+
http.use_ssl = true
|
|
58
|
+
http.read_timeout = 5
|
|
59
|
+
|
|
60
|
+
request = Net::HTTP::Post.new(url)
|
|
61
|
+
request['Content-Type'] = 'application/json'
|
|
62
|
+
request.body = JSON.generate(payload)
|
|
63
|
+
|
|
64
|
+
response = http.request(request)
|
|
65
|
+
result = JSON.parse(response.body)
|
|
66
|
+
log("Cost Guard validation: #{result}")
|
|
67
|
+
result
|
|
68
|
+
rescue StandardError => e
|
|
69
|
+
log("Cost Guard validation failed (allowing): #{e.message}")
|
|
70
|
+
{ 'allowed' => true, 'reason' => 'validation_failed_open' }
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def track_usage_sync(model:, input_tokens:, output_tokens:, latency_ms:, prompt_hash:,
|
|
75
|
+
event_name: nil, metadata: nil)
|
|
76
|
+
ctx = Distributed.get_trace_context
|
|
77
|
+
if ctx
|
|
78
|
+
trace_id = ctx[:trace_id]
|
|
79
|
+
parent_span_id = ctx[:span_id]
|
|
80
|
+
span_id = SecureRandom.hex(8)
|
|
81
|
+
Distributed.set_trace_context(trace_id, span_id, parent_span_id)
|
|
82
|
+
log("Using trace context: trace_id=#{trace_id[0..7]}..., span_id=#{span_id[0..7]}...")
|
|
83
|
+
else
|
|
84
|
+
trace_id = SecureRandom.hex(16)
|
|
85
|
+
span_id = SecureRandom.hex(8)
|
|
86
|
+
parent_span_id = nil
|
|
87
|
+
Distributed.set_trace_context(trace_id, span_id)
|
|
88
|
+
log("Generated new trace context: trace_id=#{trace_id[0..7]}...")
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
now = Time.now.to_f
|
|
92
|
+
start_time_ns = ((now - latency_ms / 1000.0) * 1_000_000_000).to_i.to_s
|
|
93
|
+
end_time_ns = (now * 1_000_000_000).to_i.to_s
|
|
94
|
+
|
|
95
|
+
span_attributes = [
|
|
96
|
+
{ key: 'gen_ai.request.model', value: { stringValue: model } },
|
|
97
|
+
{ key: 'gen_ai.system', value: { stringValue: @provider_name } },
|
|
98
|
+
{ key: 'gen_ai.usage.prompt_tokens', value: { intValue: input_tokens } },
|
|
99
|
+
{ key: 'gen_ai.usage.completion_tokens', value: { intValue: output_tokens } },
|
|
100
|
+
{ key: 'gen_ai.usage.total_tokens', value: { intValue: input_tokens + output_tokens } },
|
|
101
|
+
{ key: 'agentbill.latency_ms', value: { doubleValue: latency_ms } },
|
|
102
|
+
{ key: 'agentbill.prompt_hash', value: { stringValue: prompt_hash } },
|
|
103
|
+
{ key: 'agentbill.event_name', value: { stringValue: event_name || 'ai_call' } }
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
span_attributes << { key: 'agentbill.metadata', value: { stringValue: JSON.generate(metadata) } } if metadata
|
|
107
|
+
|
|
108
|
+
resource_attributes = [
|
|
109
|
+
{ key: 'service.name', value: { stringValue: 'agentbill-ruby-sdk' } },
|
|
110
|
+
{ key: 'service.version', value: { stringValue: VERSION } }
|
|
111
|
+
]
|
|
112
|
+
resource_attributes << { key: 'customer.id', value: { stringValue: @customer_id } } if @customer_id
|
|
113
|
+
|
|
114
|
+
otel_span = {
|
|
115
|
+
traceId: trace_id,
|
|
116
|
+
spanId: span_id,
|
|
117
|
+
name: 'agentbill.trace.signal',
|
|
118
|
+
kind: 1,
|
|
119
|
+
startTimeUnixNano: start_time_ns,
|
|
120
|
+
endTimeUnixNano: end_time_ns,
|
|
121
|
+
attributes: span_attributes,
|
|
122
|
+
status: { code: 0 }
|
|
123
|
+
}
|
|
124
|
+
otel_span[:parentSpanId] = parent_span_id if parent_span_id
|
|
125
|
+
|
|
126
|
+
payload = {
|
|
127
|
+
resourceSpans: [{
|
|
128
|
+
resource: { attributes: resource_attributes },
|
|
129
|
+
scopeSpans: [{
|
|
130
|
+
scope: { name: 'agentbill', version: VERSION },
|
|
131
|
+
spans: [otel_span]
|
|
132
|
+
}]
|
|
133
|
+
}]
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
begin
|
|
137
|
+
url = URI("#{@base_url}/otel-collector")
|
|
138
|
+
http = Net::HTTP.new(url.host, url.port)
|
|
139
|
+
http.use_ssl = true
|
|
140
|
+
http.read_timeout = 5
|
|
141
|
+
|
|
142
|
+
request = Net::HTTP::Post.new(url)
|
|
143
|
+
request['Content-Type'] = 'application/json'
|
|
144
|
+
request['X-API-Key'] = @api_key
|
|
145
|
+
request.body = JSON.generate(payload)
|
|
146
|
+
|
|
147
|
+
response = http.request(request)
|
|
148
|
+
log("OTEL span sent: #{response.code}, trace_id=#{trace_id[0..7]}...")
|
|
149
|
+
rescue StandardError => e
|
|
150
|
+
log("Failed to send OTEL span: #{e.message}")
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def cache_response_sync(model:, prompt_hash:, prompt_content:, response_content:,
|
|
155
|
+
input_tokens:, output_tokens:, cost: 0.0, request_id: nil,
|
|
156
|
+
cacheable: true, ttl_hours: 24)
|
|
157
|
+
prompt_str = prompt_content.is_a?(String) ? prompt_content : JSON.generate(prompt_content)
|
|
158
|
+
|
|
159
|
+
# v7.15.2: Send prompt_tokens and completion_tokens separately for accurate cost calculation
|
|
160
|
+
payload = {
|
|
161
|
+
api_key: @api_key,
|
|
162
|
+
prompt_hash: prompt_hash,
|
|
163
|
+
response_content: response_content,
|
|
164
|
+
model: model,
|
|
165
|
+
prompt_content: prompt_str,
|
|
166
|
+
tokens_used: input_tokens + output_tokens,
|
|
167
|
+
prompt_tokens: input_tokens,
|
|
168
|
+
completion_tokens: output_tokens,
|
|
169
|
+
cost: cost,
|
|
170
|
+
cacheable: cacheable,
|
|
171
|
+
ttl_hours: ttl_hours
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
payload[:customer_id] = @customer_id if @customer_id
|
|
175
|
+
payload[:request_id] = request_id if request_id
|
|
176
|
+
|
|
177
|
+
begin
|
|
178
|
+
url = URI("#{@base_url}/cache-ai-response")
|
|
179
|
+
http = Net::HTTP.new(url.host, url.port)
|
|
180
|
+
http.use_ssl = true
|
|
181
|
+
http.read_timeout = 5
|
|
182
|
+
|
|
183
|
+
request = Net::HTTP::Post.new(url)
|
|
184
|
+
request['Content-Type'] = 'application/json'
|
|
185
|
+
request.body = JSON.generate(payload)
|
|
186
|
+
|
|
187
|
+
response = http.request(request)
|
|
188
|
+
result = JSON.parse(response.body)
|
|
189
|
+
log("Cache response: cached=#{result['cached']}, cache_id=#{result['cache_id'] || 'none'}")
|
|
190
|
+
rescue StandardError => e
|
|
191
|
+
log("Failed to cache response (non-blocking): #{e.message}")
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def hash_prompt(messages)
|
|
196
|
+
content = messages.is_a?(String) ? messages : JSON.generate(messages)
|
|
197
|
+
Digest::SHA256.hexdigest(content)
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# OpenAI wrapper with automatic tracking
|
|
202
|
+
class AgentBillOpenAI < BaseWrapper
|
|
203
|
+
def initialize(**kwargs)
|
|
204
|
+
super(**kwargs)
|
|
205
|
+
@provider_name = 'openai'
|
|
206
|
+
|
|
207
|
+
# Lazy load OpenAI
|
|
208
|
+
require 'openai'
|
|
209
|
+
openai_options = @provider_kwargs.reject { |k, _| %i[api_key customer_id daily_budget monthly_budget base_url debug].include?(k) }
|
|
210
|
+
@client = OpenAI::Client.new(**openai_options)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def chat
|
|
214
|
+
@chat_namespace ||= ChatNamespace.new(self)
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
class ChatNamespace
|
|
218
|
+
def initialize(wrapper)
|
|
219
|
+
@wrapper = wrapper
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def completions
|
|
223
|
+
@completions_namespace ||= CompletionsNamespace.new(@wrapper)
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
class CompletionsNamespace
|
|
228
|
+
def initialize(wrapper)
|
|
229
|
+
@wrapper = wrapper
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def create(model:, messages:, **kwargs)
|
|
233
|
+
start_time = Time.now
|
|
234
|
+
|
|
235
|
+
# Validate budget first (CRITICAL: Always call router)
|
|
236
|
+
validation = @wrapper.send(:validate_budget_sync, model, messages)
|
|
237
|
+
|
|
238
|
+
unless validation['allowed']
|
|
239
|
+
reason = validation['reason'] || 'Budget exceeded'
|
|
240
|
+
reason_lower = reason.downcase
|
|
241
|
+
|
|
242
|
+
if reason_lower.include?('budget')
|
|
243
|
+
raise BudgetExceededError.new(reason, validation)
|
|
244
|
+
elsif reason_lower.include?('rate')
|
|
245
|
+
raise RateLimitExceededError.new(reason, validation)
|
|
246
|
+
else
|
|
247
|
+
raise PolicyViolationError.new(reason, validation)
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Check for cache hit
|
|
252
|
+
if validation['cache_hit'] && validation['cached_response']
|
|
253
|
+
@wrapper.send(:log, '✓ Cache hit - returning cached response')
|
|
254
|
+
return validation['cached_response']
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Make the actual API call
|
|
258
|
+
response = @wrapper.instance_variable_get(:@client).chat(
|
|
259
|
+
parameters: { model: model, messages: messages, **kwargs }
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
latency_ms = (Time.now - start_time) * 1000
|
|
263
|
+
input_tokens = response.dig('usage', 'prompt_tokens') || 0
|
|
264
|
+
output_tokens = response.dig('usage', 'completion_tokens') || 0
|
|
265
|
+
prompt_hash = @wrapper.send(:hash_prompt, messages)
|
|
266
|
+
|
|
267
|
+
# v7.16.0: Track OpenAI native prompt prefix caching (different from AgentBill semantic cache)
|
|
268
|
+
# This is OpenAI's ~50% discount on repeated prompt prefixes, NOT our full semantic cache
|
|
269
|
+
cached_input_tokens = response.dig('usage', 'prompt_tokens_details', 'cached_tokens') || 0
|
|
270
|
+
reasoning_output_tokens = response.dig('usage', 'completion_tokens_details', 'reasoning_tokens') || 0
|
|
271
|
+
|
|
272
|
+
metadata = {}
|
|
273
|
+
metadata[:openai_cached_input_tokens] = cached_input_tokens if cached_input_tokens > 0
|
|
274
|
+
metadata[:openai_reasoning_output_tokens] = reasoning_output_tokens if reasoning_output_tokens > 0
|
|
275
|
+
|
|
276
|
+
# Track usage via OTEL
|
|
277
|
+
@wrapper.send(:track_usage_sync,
|
|
278
|
+
model: model,
|
|
279
|
+
input_tokens: input_tokens,
|
|
280
|
+
output_tokens: output_tokens,
|
|
281
|
+
latency_ms: latency_ms,
|
|
282
|
+
prompt_hash: prompt_hash,
|
|
283
|
+
metadata: metadata.empty? ? nil : metadata)
|
|
284
|
+
|
|
285
|
+
# Cache the response
|
|
286
|
+
response_content = response.dig('choices', 0, 'message', 'content') || ''
|
|
287
|
+
@wrapper.send(:cache_response_sync,
|
|
288
|
+
model: model,
|
|
289
|
+
prompt_hash: prompt_hash,
|
|
290
|
+
prompt_content: messages,
|
|
291
|
+
response_content: response_content,
|
|
292
|
+
input_tokens: input_tokens,
|
|
293
|
+
output_tokens: output_tokens,
|
|
294
|
+
request_id: validation['request_id'])
|
|
295
|
+
|
|
296
|
+
response
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Anthropic wrapper with automatic tracking
|
|
302
|
+
class AgentBillAnthropic < BaseWrapper
|
|
303
|
+
def initialize(**kwargs)
|
|
304
|
+
super(**kwargs)
|
|
305
|
+
@provider_name = 'anthropic'
|
|
306
|
+
|
|
307
|
+
# Lazy load Anthropic
|
|
308
|
+
require 'anthropic'
|
|
309
|
+
anthropic_options = @provider_kwargs.reject { |k, _| %i[api_key customer_id daily_budget monthly_budget base_url debug].include?(k) }
|
|
310
|
+
@client = Anthropic::Client.new(**anthropic_options)
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def messages
|
|
314
|
+
@messages_namespace ||= MessagesNamespace.new(self)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
class MessagesNamespace
|
|
318
|
+
def initialize(wrapper)
|
|
319
|
+
@wrapper = wrapper
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def create(model:, max_tokens:, messages:, **kwargs)
|
|
323
|
+
start_time = Time.now
|
|
324
|
+
|
|
325
|
+
# Validate budget first (CRITICAL: Always call router)
|
|
326
|
+
validation = @wrapper.send(:validate_budget_sync, model, messages)
|
|
327
|
+
|
|
328
|
+
unless validation['allowed']
|
|
329
|
+
reason = validation['reason'] || 'Budget exceeded'
|
|
330
|
+
reason_lower = reason.downcase
|
|
331
|
+
|
|
332
|
+
if reason_lower.include?('budget')
|
|
333
|
+
raise BudgetExceededError.new(reason, validation)
|
|
334
|
+
elsif reason_lower.include?('rate')
|
|
335
|
+
raise RateLimitExceededError.new(reason, validation)
|
|
336
|
+
else
|
|
337
|
+
raise PolicyViolationError.new(reason, validation)
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
# Check for cache hit
|
|
342
|
+
if validation['cache_hit'] && validation['cached_response']
|
|
343
|
+
@wrapper.send(:log, '✓ Cache hit - returning cached response')
|
|
344
|
+
return validation['cached_response']
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# Make the actual API call
|
|
348
|
+
response = @wrapper.instance_variable_get(:@client).messages(
|
|
349
|
+
model: model,
|
|
350
|
+
max_tokens: max_tokens,
|
|
351
|
+
messages: messages,
|
|
352
|
+
**kwargs
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
latency_ms = (Time.now - start_time) * 1000
|
|
356
|
+
input_tokens = response.dig('usage', 'input_tokens') || 0
|
|
357
|
+
output_tokens = response.dig('usage', 'output_tokens') || 0
|
|
358
|
+
prompt_hash = @wrapper.send(:hash_prompt, messages)
|
|
359
|
+
|
|
360
|
+
# Track usage via OTEL
|
|
361
|
+
@wrapper.send(:track_usage_sync,
|
|
362
|
+
model: model,
|
|
363
|
+
input_tokens: input_tokens,
|
|
364
|
+
output_tokens: output_tokens,
|
|
365
|
+
latency_ms: latency_ms,
|
|
366
|
+
prompt_hash: prompt_hash)
|
|
367
|
+
|
|
368
|
+
# Cache the response
|
|
369
|
+
content_block = response.dig('content', 0)
|
|
370
|
+
response_content = content_block.is_a?(Hash) ? (content_block['text'] || '') : ''
|
|
371
|
+
@wrapper.send(:cache_response_sync,
|
|
372
|
+
model: model,
|
|
373
|
+
prompt_hash: prompt_hash,
|
|
374
|
+
prompt_content: messages,
|
|
375
|
+
response_content: response_content,
|
|
376
|
+
input_tokens: input_tokens,
|
|
377
|
+
output_tokens: output_tokens,
|
|
378
|
+
request_id: validation['request_id'])
|
|
379
|
+
|
|
380
|
+
response
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
end
|