agentbill-sdk 5.0.1 → 7.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,384 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+ require 'net/http'
5
+ require 'json'
6
+ require 'uri'
7
+ require 'digest'
8
+ require 'time'
9
+
10
+ require_relative 'exceptions'
11
+ require_relative 'distributed'
12
+ require_relative 'validation'
13
+
14
+ module AgentBill
15
+ # Base class for AgentBill wrapper classes.
16
+ class BaseWrapper
17
+ BASE_URL = 'https://api.agentbill.io/functions/v1'
18
+ VERSION = '7.16.1'
19
+
20
+ attr_reader :provider_name
21
+
22
+ def initialize(api_key:, customer_id: nil, daily_budget: nil, monthly_budget: nil,
23
+ base_url: nil, debug: false, **kwargs)
24
+ Validation.validate_api_key(api_key)
25
+ Validation.validate_customer_id(customer_id) if customer_id
26
+
27
+ @api_key = api_key
28
+ @customer_id = customer_id
29
+ @daily_budget = daily_budget
30
+ @monthly_budget = monthly_budget
31
+ @base_url = base_url || BASE_URL
32
+ @debug = debug
33
+ @provider_kwargs = kwargs
34
+ @provider_name = 'unknown'
35
+ end
36
+
37
+ protected
38
+
39
+ def log(message)
40
+ puts "[AgentBill] #{message}" if @debug
41
+ end
42
+
43
+ def validate_budget_sync(model, messages = nil)
44
+ payload = {
45
+ api_key: @api_key,
46
+ customer_id: @customer_id,
47
+ model: model,
48
+ daily_budget: @daily_budget,
49
+ monthly_budget: @monthly_budget
50
+ }
51
+
52
+ payload[:messages] = messages if messages
53
+
54
+ begin
55
+ url = URI("#{@base_url}/ai-cost-guard-router")
56
+ http = Net::HTTP.new(url.host, url.port)
57
+ http.use_ssl = true
58
+ http.read_timeout = 5
59
+
60
+ request = Net::HTTP::Post.new(url)
61
+ request['Content-Type'] = 'application/json'
62
+ request.body = JSON.generate(payload)
63
+
64
+ response = http.request(request)
65
+ result = JSON.parse(response.body)
66
+ log("Cost Guard validation: #{result}")
67
+ result
68
+ rescue StandardError => e
69
+ log("Cost Guard validation failed (allowing): #{e.message}")
70
+ { 'allowed' => true, 'reason' => 'validation_failed_open' }
71
+ end
72
+ end
73
+
74
+ def track_usage_sync(model:, input_tokens:, output_tokens:, latency_ms:, prompt_hash:,
75
+ event_name: nil, metadata: nil)
76
+ ctx = Distributed.get_trace_context
77
+ if ctx
78
+ trace_id = ctx[:trace_id]
79
+ parent_span_id = ctx[:span_id]
80
+ span_id = SecureRandom.hex(8)
81
+ Distributed.set_trace_context(trace_id, span_id, parent_span_id)
82
+ log("Using trace context: trace_id=#{trace_id[0..7]}..., span_id=#{span_id[0..7]}...")
83
+ else
84
+ trace_id = SecureRandom.hex(16)
85
+ span_id = SecureRandom.hex(8)
86
+ parent_span_id = nil
87
+ Distributed.set_trace_context(trace_id, span_id)
88
+ log("Generated new trace context: trace_id=#{trace_id[0..7]}...")
89
+ end
90
+
91
+ now = Time.now.to_f
92
+ start_time_ns = ((now - latency_ms / 1000.0) * 1_000_000_000).to_i.to_s
93
+ end_time_ns = (now * 1_000_000_000).to_i.to_s
94
+
95
+ span_attributes = [
96
+ { key: 'gen_ai.request.model', value: { stringValue: model } },
97
+ { key: 'gen_ai.system', value: { stringValue: @provider_name } },
98
+ { key: 'gen_ai.usage.prompt_tokens', value: { intValue: input_tokens } },
99
+ { key: 'gen_ai.usage.completion_tokens', value: { intValue: output_tokens } },
100
+ { key: 'gen_ai.usage.total_tokens', value: { intValue: input_tokens + output_tokens } },
101
+ { key: 'agentbill.latency_ms', value: { doubleValue: latency_ms } },
102
+ { key: 'agentbill.prompt_hash', value: { stringValue: prompt_hash } },
103
+ { key: 'agentbill.event_name', value: { stringValue: event_name || 'ai_call' } }
104
+ ]
105
+
106
+ span_attributes << { key: 'agentbill.metadata', value: { stringValue: JSON.generate(metadata) } } if metadata
107
+
108
+ resource_attributes = [
109
+ { key: 'service.name', value: { stringValue: 'agentbill-ruby-sdk' } },
110
+ { key: 'service.version', value: { stringValue: VERSION } }
111
+ ]
112
+ resource_attributes << { key: 'customer.id', value: { stringValue: @customer_id } } if @customer_id
113
+
114
+ otel_span = {
115
+ traceId: trace_id,
116
+ spanId: span_id,
117
+ name: 'agentbill.trace.signal',
118
+ kind: 1,
119
+ startTimeUnixNano: start_time_ns,
120
+ endTimeUnixNano: end_time_ns,
121
+ attributes: span_attributes,
122
+ status: { code: 0 }
123
+ }
124
+ otel_span[:parentSpanId] = parent_span_id if parent_span_id
125
+
126
+ payload = {
127
+ resourceSpans: [{
128
+ resource: { attributes: resource_attributes },
129
+ scopeSpans: [{
130
+ scope: { name: 'agentbill', version: VERSION },
131
+ spans: [otel_span]
132
+ }]
133
+ }]
134
+ }
135
+
136
+ begin
137
+ url = URI("#{@base_url}/otel-collector")
138
+ http = Net::HTTP.new(url.host, url.port)
139
+ http.use_ssl = true
140
+ http.read_timeout = 5
141
+
142
+ request = Net::HTTP::Post.new(url)
143
+ request['Content-Type'] = 'application/json'
144
+ request['X-API-Key'] = @api_key
145
+ request.body = JSON.generate(payload)
146
+
147
+ response = http.request(request)
148
+ log("OTEL span sent: #{response.code}, trace_id=#{trace_id[0..7]}...")
149
+ rescue StandardError => e
150
+ log("Failed to send OTEL span: #{e.message}")
151
+ end
152
+ end
153
+
154
+ def cache_response_sync(model:, prompt_hash:, prompt_content:, response_content:,
155
+ input_tokens:, output_tokens:, cost: 0.0, request_id: nil,
156
+ cacheable: true, ttl_hours: 24)
157
+ prompt_str = prompt_content.is_a?(String) ? prompt_content : JSON.generate(prompt_content)
158
+
159
+ # v7.15.2: Send prompt_tokens and completion_tokens separately for accurate cost calculation
160
+ payload = {
161
+ api_key: @api_key,
162
+ prompt_hash: prompt_hash,
163
+ response_content: response_content,
164
+ model: model,
165
+ prompt_content: prompt_str,
166
+ tokens_used: input_tokens + output_tokens,
167
+ prompt_tokens: input_tokens,
168
+ completion_tokens: output_tokens,
169
+ cost: cost,
170
+ cacheable: cacheable,
171
+ ttl_hours: ttl_hours
172
+ }
173
+
174
+ payload[:customer_id] = @customer_id if @customer_id
175
+ payload[:request_id] = request_id if request_id
176
+
177
+ begin
178
+ url = URI("#{@base_url}/cache-ai-response")
179
+ http = Net::HTTP.new(url.host, url.port)
180
+ http.use_ssl = true
181
+ http.read_timeout = 5
182
+
183
+ request = Net::HTTP::Post.new(url)
184
+ request['Content-Type'] = 'application/json'
185
+ request.body = JSON.generate(payload)
186
+
187
+ response = http.request(request)
188
+ result = JSON.parse(response.body)
189
+ log("Cache response: cached=#{result['cached']}, cache_id=#{result['cache_id'] || 'none'}")
190
+ rescue StandardError => e
191
+ log("Failed to cache response (non-blocking): #{e.message}")
192
+ end
193
+ end
194
+
195
+ def hash_prompt(messages)
196
+ content = messages.is_a?(String) ? messages : JSON.generate(messages)
197
+ Digest::SHA256.hexdigest(content)
198
+ end
199
+ end
200
+
201
+ # OpenAI wrapper with automatic tracking
202
+ class AgentBillOpenAI < BaseWrapper
203
+ def initialize(**kwargs)
204
+ super(**kwargs)
205
+ @provider_name = 'openai'
206
+
207
+ # Lazy load OpenAI
208
+ require 'openai'
209
+ openai_options = @provider_kwargs.reject { |k, _| %i[api_key customer_id daily_budget monthly_budget base_url debug].include?(k) }
210
+ @client = OpenAI::Client.new(**openai_options)
211
+ end
212
+
213
+ def chat
214
+ @chat_namespace ||= ChatNamespace.new(self)
215
+ end
216
+
217
+ class ChatNamespace
218
+ def initialize(wrapper)
219
+ @wrapper = wrapper
220
+ end
221
+
222
+ def completions
223
+ @completions_namespace ||= CompletionsNamespace.new(@wrapper)
224
+ end
225
+ end
226
+
227
+ class CompletionsNamespace
228
+ def initialize(wrapper)
229
+ @wrapper = wrapper
230
+ end
231
+
232
+ def create(model:, messages:, **kwargs)
233
+ start_time = Time.now
234
+
235
+ # Validate budget first (CRITICAL: Always call router)
236
+ validation = @wrapper.send(:validate_budget_sync, model, messages)
237
+
238
+ unless validation['allowed']
239
+ reason = validation['reason'] || 'Budget exceeded'
240
+ reason_lower = reason.downcase
241
+
242
+ if reason_lower.include?('budget')
243
+ raise BudgetExceededError.new(reason, validation)
244
+ elsif reason_lower.include?('rate')
245
+ raise RateLimitExceededError.new(reason, validation)
246
+ else
247
+ raise PolicyViolationError.new(reason, validation)
248
+ end
249
+ end
250
+
251
+ # Check for cache hit
252
+ if validation['cache_hit'] && validation['cached_response']
253
+ @wrapper.send(:log, '✓ Cache hit - returning cached response')
254
+ return validation['cached_response']
255
+ end
256
+
257
+ # Make the actual API call
258
+ response = @wrapper.instance_variable_get(:@client).chat(
259
+ parameters: { model: model, messages: messages, **kwargs }
260
+ )
261
+
262
+ latency_ms = (Time.now - start_time) * 1000
263
+ input_tokens = response.dig('usage', 'prompt_tokens') || 0
264
+ output_tokens = response.dig('usage', 'completion_tokens') || 0
265
+ prompt_hash = @wrapper.send(:hash_prompt, messages)
266
+
267
+ # v7.16.0: Track OpenAI native prompt prefix caching (different from AgentBill semantic cache)
268
+ # This is OpenAI's ~50% discount on repeated prompt prefixes, NOT our full semantic cache
269
+ cached_input_tokens = response.dig('usage', 'prompt_tokens_details', 'cached_tokens') || 0
270
+ reasoning_output_tokens = response.dig('usage', 'completion_tokens_details', 'reasoning_tokens') || 0
271
+
272
+ metadata = {}
273
+ metadata[:openai_cached_input_tokens] = cached_input_tokens if cached_input_tokens > 0
274
+ metadata[:openai_reasoning_output_tokens] = reasoning_output_tokens if reasoning_output_tokens > 0
275
+
276
+ # Track usage via OTEL
277
+ @wrapper.send(:track_usage_sync,
278
+ model: model,
279
+ input_tokens: input_tokens,
280
+ output_tokens: output_tokens,
281
+ latency_ms: latency_ms,
282
+ prompt_hash: prompt_hash,
283
+ metadata: metadata.empty? ? nil : metadata)
284
+
285
+ # Cache the response
286
+ response_content = response.dig('choices', 0, 'message', 'content') || ''
287
+ @wrapper.send(:cache_response_sync,
288
+ model: model,
289
+ prompt_hash: prompt_hash,
290
+ prompt_content: messages,
291
+ response_content: response_content,
292
+ input_tokens: input_tokens,
293
+ output_tokens: output_tokens,
294
+ request_id: validation['request_id'])
295
+
296
+ response
297
+ end
298
+ end
299
+ end
300
+
301
+ # Anthropic wrapper with automatic tracking
302
+ class AgentBillAnthropic < BaseWrapper
303
+ def initialize(**kwargs)
304
+ super(**kwargs)
305
+ @provider_name = 'anthropic'
306
+
307
+ # Lazy load Anthropic
308
+ require 'anthropic'
309
+ anthropic_options = @provider_kwargs.reject { |k, _| %i[api_key customer_id daily_budget monthly_budget base_url debug].include?(k) }
310
+ @client = Anthropic::Client.new(**anthropic_options)
311
+ end
312
+
313
+ def messages
314
+ @messages_namespace ||= MessagesNamespace.new(self)
315
+ end
316
+
317
+ class MessagesNamespace
318
+ def initialize(wrapper)
319
+ @wrapper = wrapper
320
+ end
321
+
322
+ def create(model:, max_tokens:, messages:, **kwargs)
323
+ start_time = Time.now
324
+
325
+ # Validate budget first (CRITICAL: Always call router)
326
+ validation = @wrapper.send(:validate_budget_sync, model, messages)
327
+
328
+ unless validation['allowed']
329
+ reason = validation['reason'] || 'Budget exceeded'
330
+ reason_lower = reason.downcase
331
+
332
+ if reason_lower.include?('budget')
333
+ raise BudgetExceededError.new(reason, validation)
334
+ elsif reason_lower.include?('rate')
335
+ raise RateLimitExceededError.new(reason, validation)
336
+ else
337
+ raise PolicyViolationError.new(reason, validation)
338
+ end
339
+ end
340
+
341
+ # Check for cache hit
342
+ if validation['cache_hit'] && validation['cached_response']
343
+ @wrapper.send(:log, '✓ Cache hit - returning cached response')
344
+ return validation['cached_response']
345
+ end
346
+
347
+ # Make the actual API call
348
+ response = @wrapper.instance_variable_get(:@client).messages(
349
+ model: model,
350
+ max_tokens: max_tokens,
351
+ messages: messages,
352
+ **kwargs
353
+ )
354
+
355
+ latency_ms = (Time.now - start_time) * 1000
356
+ input_tokens = response.dig('usage', 'input_tokens') || 0
357
+ output_tokens = response.dig('usage', 'output_tokens') || 0
358
+ prompt_hash = @wrapper.send(:hash_prompt, messages)
359
+
360
+ # Track usage via OTEL
361
+ @wrapper.send(:track_usage_sync,
362
+ model: model,
363
+ input_tokens: input_tokens,
364
+ output_tokens: output_tokens,
365
+ latency_ms: latency_ms,
366
+ prompt_hash: prompt_hash)
367
+
368
+ # Cache the response
369
+ content_block = response.dig('content', 0)
370
+ response_content = content_block.is_a?(Hash) ? (content_block['text'] || '') : ''
371
+ @wrapper.send(:cache_response_sync,
372
+ model: model,
373
+ prompt_hash: prompt_hash,
374
+ prompt_content: messages,
375
+ response_content: response_content,
376
+ input_tokens: input_tokens,
377
+ output_tokens: output_tokens,
378
+ request_id: validation['request_id'])
379
+
380
+ response
381
+ end
382
+ end
383
+ end
384
+ end