paygent-sdk 1.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
paygent_sdk/__init__.py CHANGED
@@ -7,7 +7,17 @@ For the Go SDK equivalent, see: https://github.com/paygent/paygent-sdk-go
7
7
  """
8
8
 
9
9
  from .client import Client
10
- from .models import UsageData, UsageDataWithStrings, APIRequest, ModelPricing, MODEL_PRICING
10
+ from .models import (
11
+ UsageData, UsageDataWithStrings, APIRequest, ModelPricing, MODEL_PRICING,
12
+ SttUsageData, TtsUsageData, SttModelPricing, TtsModelPricing
13
+ )
14
+ from .voice_client import send_stt_usage, send_tts_usage # Import to attach methods to Client
15
+
16
+ # Wrappers are lazily imported in the wrappers module to avoid requiring
17
+ # installation of peer dependencies (openai, anthropic, mistral, etc.) that aren't being used.
18
+ # You can still import them normally:
19
+ # from paygent_sdk import PaygentOpenAI, PaygentGemini, etc.
20
+ # But they will only actually load when first accessed.
11
21
  from .constants import (
12
22
  ServiceProvider,
13
23
  OpenAIModels,
@@ -18,6 +28,18 @@ from .constants import (
18
28
  MistralAIModels,
19
29
  CohereModels,
20
30
  DeepSeekModels,
31
+ MoonshotAIModels,
32
+ DeepgramSTTModels,
33
+ MicrosoftAzureSpeechSTTModels,
34
+ GoogleCloudSpeechSTTModels,
35
+ AssemblyAISTTModels,
36
+ ElevenLabsSTTModels,
37
+ SonioxSTTModels,
38
+ AmazonPollyTTSModels,
39
+ MicrosoftAzureSpeechTTSModels,
40
+ GoogleCloudTextToSpeechTTSModels,
41
+ DeepgramTTSModels,
42
+ ElevenLabsTTSModels,
21
43
  is_model_supported
22
44
  )
23
45
 
@@ -31,6 +53,18 @@ __all__ = [
31
53
  "ModelPricing",
32
54
  "MODEL_PRICING",
33
55
 
56
+ # Voice data models
57
+ "SttUsageData",
58
+ "TtsUsageData",
59
+ "SttModelPricing",
60
+ "TtsModelPricing",
61
+
62
+ # Wrappers
63
+ "PaygentOpenAI",
64
+ "PaygentAnthropic",
65
+ "PaygentMistral",
66
+ "PaygentGemini",
67
+
34
68
  # Constants
35
69
  "ServiceProvider",
36
70
  "OpenAIModels",
@@ -41,7 +75,38 @@ __all__ = [
41
75
  "MistralAIModels",
42
76
  "CohereModels",
43
77
  "DeepSeekModels",
78
+ "MoonshotAIModels",
79
+
80
+ # STT/TTS Model constants
81
+ "DeepgramSTTModels",
82
+ "MicrosoftAzureSpeechSTTModels",
83
+ "GoogleCloudSpeechSTTModels",
84
+ "AssemblyAISTTModels",
85
+ "ElevenLabsSTTModels",
86
+ "SonioxSTTModels",
87
+ "AmazonPollyTTSModels",
88
+ "MicrosoftAzureSpeechTTSModels",
89
+ "GoogleCloudTextToSpeechTTSModels",
90
+ "DeepgramTTSModels",
91
+ "ElevenLabsTTSModels",
44
92
 
45
93
  # Utility functions
46
94
  "is_model_supported"
47
95
  ]
96
+
97
+
98
+ def __getattr__(name):
99
+ """
100
+ Lazy import wrapper classes to avoid requiring peer dependencies that aren't being used.
101
+
102
+ This allows importing wrappers like:
103
+ from paygent_sdk import PaygentOpenAI
104
+
105
+ But the actual import only happens when accessed, so if you never use PaygentOpenAI,
106
+ you don't need the openai package installed.
107
+ """
108
+ if name in ["PaygentOpenAI", "PaygentAnthropic", "PaygentMistral", "PaygentGemini", "PaygentLangChainCallback"]:
109
+ from . import wrappers
110
+ return getattr(wrappers, name)
111
+
112
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
paygent_sdk/client.py CHANGED
@@ -23,20 +23,22 @@ from .models import UsageData, UsageDataWithStrings, APIRequest, ModelPricing, M
23
23
  class Client:
24
24
  """Paygent SDK client for tracking usage and costs for AI models."""
25
25
 
26
- def __init__(self, api_key: str, base_url: str = "http://13.201.118.45:8080"):
26
+ def __init__(self, api_key: str):
27
27
  """
28
28
  Initialize the Paygent SDK client.
29
29
 
30
30
  Args:
31
31
  api_key: Your Paygent API key
32
- base_url: Base URL for the Paygent API (default: http://13.201.118.45:8080)
33
32
  """
34
33
  self.api_key = api_key
35
- self.base_url = base_url.rstrip('/')
34
+ # Locked configuration - cannot be changed by users
35
+ self.base_url = "https://cp-api.withpaygent.com"
36
+ # self.base_url = "http://localhost:8082"
37
+ self.timeout = 3000
36
38
 
37
- # Setup logging
39
+ # Setup logging with ERROR level by default (minimal logging)
38
40
  self.logger = logging.getLogger(f"paygent_sdk.{id(self)}")
39
- self.logger.setLevel(logging.INFO)
41
+ self.logger.setLevel(logging.ERROR)
40
42
 
41
43
  # Add console handler if no handlers exist
42
44
  if not self.logger.handlers:
@@ -61,13 +63,13 @@ class Client:
61
63
  self.session.mount("http://", adapter)
62
64
  self.session.mount("https://", adapter)
63
65
 
64
- # Set default timeout
65
- self.session.timeout = 30
66
+ # Set timeout from locked configuration
67
+ self.session.timeout = self.timeout
66
68
 
67
69
  @classmethod
68
70
  def new_client(cls, api_key: str) -> 'Client':
69
71
  """
70
- Create a new Paygent SDK client with the default API URL.
72
+ Create a new Paygent SDK client.
71
73
 
72
74
  Args:
73
75
  api_key: Your Paygent API key
@@ -77,20 +79,6 @@ class Client:
77
79
  """
78
80
  return cls(api_key)
79
81
 
80
- @classmethod
81
- def new_client_with_url(cls, api_key: str, base_url: str) -> 'Client':
82
- """
83
- Create a new Paygent SDK client with a custom base URL.
84
-
85
- Args:
86
- api_key: Your Paygent API key
87
- base_url: Custom base URL for the Paygent API
88
-
89
- Returns:
90
- Client instance
91
- """
92
- return cls(api_key, base_url)
93
-
94
82
  def _calculate_cost(self, model: str, usage_data: UsageData) -> float:
95
83
  """
96
84
  Calculate the cost based on model and usage data.
@@ -113,12 +101,25 @@ class Client:
113
101
 
114
102
  # Calculate cost per 1000 tokens
115
103
  prompt_cost = (usage_data.prompt_tokens / 1000.0) * pricing.prompt_tokens_cost
104
+
105
+ # Handle cached tokens: if model doesn't support caching (cached_tokens_cost is None),
106
+ # bill cached tokens at regular prompt token rate
107
+ cached_cost = 0.0
108
+ if usage_data.cached_tokens and usage_data.cached_tokens > 0:
109
+ if pricing.cached_tokens_cost is not None:
110
+ # Model supports caching - use cached token price
111
+ cached_cost = (usage_data.cached_tokens / 1000.0) * pricing.cached_tokens_cost
112
+ else:
113
+ # Model doesn't support caching - bill at prompt token rate
114
+ cached_cost = (usage_data.cached_tokens / 1000.0) * pricing.prompt_tokens_cost
115
+
116
116
  completion_cost = (usage_data.completion_tokens / 1000.0) * pricing.completion_tokens_cost
117
- total_cost = prompt_cost + completion_cost
117
+ total_cost = prompt_cost + cached_cost + completion_cost
118
118
 
119
119
  self.logger.debug(
120
120
  f"Cost calculation for model '{model}': "
121
121
  f"prompt_tokens={usage_data.prompt_tokens} ({prompt_cost:.6f}), "
122
+ f"cached_tokens={usage_data.cached_tokens or 0} ({cached_cost:.6f}), "
122
123
  f"completion_tokens={usage_data.completion_tokens} ({completion_cost:.6f}), "
123
124
  f"total={total_cost:.6f}"
124
125
  )
@@ -145,19 +146,32 @@ class Client:
145
146
  requests.RequestException: If the HTTP request fails
146
147
  ValueError: If the usage data is invalid
147
148
  """
148
- self.logger.info(
149
- f"Starting sendUsage for agentID={agent_id}, customerID={customer_id}, "
150
- f"indicator={indicator}, model={usage_data.model}"
149
+ # Removed verbose logging - only log errors
150
+
151
+ # 🎯 AUTOMATIC CACHED TOKEN HANDLING
152
+ # Users can pass total prompt tokens - we automatically subtract cached tokens
153
+ # This makes manual tracking easier (no math required!)
154
+ cached_tokens = usage_data.cached_tokens or 0
155
+ regular_prompt_tokens = usage_data.prompt_tokens - cached_tokens
156
+
157
+ # Calculate cost using separated token counts
158
+ adjusted_usage_data = UsageData(
159
+ service_provider=usage_data.service_provider,
160
+ model=usage_data.model,
161
+ prompt_tokens=regular_prompt_tokens,
162
+ completion_tokens=usage_data.completion_tokens,
163
+ total_tokens=usage_data.total_tokens,
164
+ cached_tokens=cached_tokens
151
165
  )
152
166
 
153
167
  # Calculate cost
154
168
  try:
155
- cost = self._calculate_cost(usage_data.model, usage_data)
169
+ cost = self._calculate_cost(usage_data.model, adjusted_usage_data)
156
170
  except Exception as e:
157
171
  self.logger.error(f"Failed to calculate cost: {e}")
158
172
  raise ValueError(f"Failed to calculate cost: {e}") from e
159
173
 
160
- self.logger.info(f"Calculated cost: {cost:.6f} for model {usage_data.model}")
174
+ # Cost calculated (no logging for performance)
161
175
 
162
176
  # Prepare API request
163
177
  api_request = APIRequest(
@@ -173,7 +187,8 @@ class Client:
173
187
  "customerId": api_request.customer_id,
174
188
  "indicator": api_request.indicator,
175
189
  "amount": api_request.amount,
176
- "inputToken": usage_data.prompt_tokens,
190
+ "inputToken": regular_prompt_tokens, # Send non-cached tokens
191
+ "cachedToken": cached_tokens, # Send cached tokens separately
177
192
  "outputToken": usage_data.completion_tokens,
178
193
  "model": usage_data.model,
179
194
  "serviceProvider": usage_data.service_provider
@@ -207,10 +222,7 @@ class Client:
207
222
 
208
223
  # Check response status
209
224
  if 200 <= response.status_code < 300:
210
- self.logger.info(
211
- f"Successfully sent usage data for agentID={agent_id}, "
212
- f"customerID={customer_id}, cost={cost:.6f}"
213
- )
225
+ # Success - no logging to minimize verbosity
214
226
  return
215
227
 
216
228
  # Handle error response
@@ -381,11 +393,7 @@ class Client:
381
393
  requests.RequestException: If the HTTP request fails
382
394
  ValueError: If the usage data is invalid
383
395
  """
384
- self.logger.info(
385
- f"Starting sendUsageWithTokenString for agentID={agent_id}, customerID={customer_id}, "
386
- f"indicator={indicator}, serviceProvider={usage_data.service_provider}, model={usage_data.model}"
387
- )
388
-
396
+ # Removed verbose logging - only log errors
389
397
  # Calculate cost from strings
390
398
  try:
391
399
  cost = self._calculate_cost_from_strings(usage_data.model, usage_data)
@@ -393,7 +401,7 @@ class Client:
393
401
  self.logger.error(f"Failed to calculate cost from strings: {e}")
394
402
  raise ValueError(f"Failed to calculate cost from strings: {e}") from e
395
403
 
396
- self.logger.info(f"Calculated cost: {cost:.6f} for model {usage_data.model} from strings")
404
+ # Cost calculated from strings (no logging for performance)
397
405
 
398
406
  # Calculate token counts for API request
399
407
  prompt_tokens = self._get_token_count(usage_data.model, usage_data.prompt_string)
@@ -447,10 +455,7 @@ class Client:
447
455
 
448
456
  # Check response status
449
457
  if 200 <= response.status_code < 300:
450
- self.logger.info(
451
- f"Successfully sent usage data from strings for agentID={agent_id}, "
452
- f"customerID={customer_id}, cost={cost:.6f}"
453
- )
458
+ # Success - no logging to minimize verbosity
454
459
  return
455
460
 
456
461
  # Handle error response
paygent_sdk/constants.py CHANGED
@@ -18,7 +18,23 @@ class ServiceProvider:
18
18
  MISTRAL_AI = "Mistral AI"
19
19
  COHERE = "Cohere"
20
20
  DEEPSEEK = "DeepSeek"
21
+ MOONSHOT_AI = "Moonshot AI"
21
22
  CUSTOM = "Custom"
23
+
24
+ # STT Service Providers
25
+ DEEPGRAM = "Deepgram"
26
+ MICROSOFT_AZURE_SPEECH = "Microsoft Azure Speech Service"
27
+ GOOGLE_CLOUD_SPEECH = "Google Cloud Speech-to-Text"
28
+ ASSEMBLY_AI = "AssemblyAI"
29
+ ELEVEN_LABS_STT = "Eleven Labs STT"
30
+ SONIOX = "Soniox"
31
+
32
+ # TTS Service Providers
33
+ AMAZON_POLLY = "Amazon Polly"
34
+ MICROSOFT_AZURE_SPEECH_TTS = "Microsoft Azure Speech Service"
35
+ GOOGLE_CLOUD_TEXT_TO_SPEECH = "Google Cloud Text-to-Speech"
36
+ DEEPGRAM_TTS = "Deepgram"
37
+ ELEVEN_LABS_TTS = "Eleven Labs TTS"
22
38
 
23
39
 
24
40
  # OpenAI Models
@@ -198,8 +214,113 @@ class DeepSeekModels:
198
214
  DEEPSEEK_V3_2_EXP = "DeepSeek V3.2-Exp"
199
215
 
200
216
 
217
+ # Moonshot AI Models
218
+ class MoonshotAIModels:
219
+ """Moonshot AI / Kimi model constants."""
220
+ KIMI_K2_INSTRUCT_0905 = "Kimi k2-instruct-0905"
221
+ KIMI_K2_0905_1T_256K = "Kimi k2-0905-1T-256K"
222
+
223
+
224
+
225
+
226
+
227
+
228
+ # Deepgram STT Models
229
+ class DeepgramSTTModels:
230
+ """Deepgram STT model constants."""
231
+ FLUX = "Flux"
232
+ NOVA_3_MONOLINGUAL = "Nova-3 (Monolingual)"
233
+ NOVA_3_MULTILINGUAL = "Nova-3 (Multilingual)"
234
+ NOVA_1 = "Nova-1"
235
+ NOVA_2 = "Nova-2"
236
+ ENHANCED = "Enhanced"
237
+ BASE = "Base"
238
+ REDACTION = "Redaction (Add-on)"
239
+ KEYTERM_PROMPTING = "Keyterm Prompting (Add-on)"
240
+ SPEAKER_DIARIZATION = "Speaker Diarization (Add-on)"
241
+ # Growth tier models
242
+ GROWTH_NOVA_3_MONOLINGUAL = "Growth Nova-3 (Monolingual)"
243
+ GROWTH_NOVA_3_MULTILINGUAL = "Growth Nova-3 (Multilingual)"
244
+ GROWTH_NOVA_1 = "Growth Nova-1"
245
+ GROWTH_NOVA_2 = "Growth Nova-2"
246
+
247
+
248
+ # Microsoft Azure Speech Service STT Models
249
+ class MicrosoftAzureSpeechSTTModels:
250
+ """Microsoft Azure Speech Service STT model constants."""
251
+ STANDARD = "Azure Speech Standard"
252
+ CUSTOM = "Azure Speech Custom"
253
+
254
+
255
+ # Google Cloud Speech-to-Text STT Models
256
+ class GoogleCloudSpeechSTTModels:
257
+ """Google Cloud Speech-to-Text STT model constants."""
258
+ STANDARD = "Google Cloud Speech Standard"
259
+
260
+
261
+ # AssemblyAI STT Models
262
+ class AssemblyAISTTModels:
263
+ """AssemblyAI STT model constants."""
264
+ UNIVERSAL_STREAMING = "Universal-Streaming"
265
+ UNIVERSAL_STREAMING_MULTILANG = "Universal-Streaming Multilingual"
266
+ KEYTERMS_PROMPTING = "Keyterms Prompting"
267
+
268
+
269
+ # Eleven Labs STT Models
270
+ class ElevenLabsSTTModels:
271
+ """Eleven Labs STT model constants."""
272
+ BUSINESS_SCRIBE_V1_V2 = "Eleven Labs Business Scribe V1/V2"
273
+
274
+
275
+ # Soniox STT Models
276
+ class SonioxSTTModels:
277
+ """Soniox STT model constants."""
278
+ REAL_TIME = "Soniox Real Time"
279
+
280
+
281
+ # Amazon Polly TTS Models
282
+ class AmazonPollyTTSModels:
283
+ """Amazon Polly TTS model constants."""
284
+ STANDARD = "Amazon Polly Standard"
285
+ NEURAL = "Amazon Polly Neural"
286
+ LONG_FORM = "Amazon Polly Long-form"
287
+ GENERATIVE = "Amazon Polly Generative"
288
+
289
+
290
+ # Microsoft Azure Speech Service TTS Models
291
+ class MicrosoftAzureSpeechTTSModels:
292
+ """Microsoft Azure Speech Service TTS model constants."""
293
+ STANDARD_NEURAL = "Azure TTS Standard Neural"
294
+ CUSTOM_SYNTHESIS = "Azure TTS Custom Synthesis"
295
+ CUSTOM_SYNTHESIS_NEURAL_HD = "Azure TTS Custom Synthesis Neural HD"
296
+
297
+
298
+ # Google Cloud Text-to-Speech TTS Models
299
+ class GoogleCloudTextToSpeechTTSModels:
300
+ """Google Cloud Text-to-Speech TTS model constants."""
301
+ CHIRP_3_HD = "Google Cloud TTS Chirp 3: HD"
302
+ INSTANT_CUSTOM = "Google Cloud TTS Instant custom"
303
+ WAVENET = "Google Cloud TTS WaveNet"
304
+ STUDIO = "Google Cloud TTS Studio"
305
+ STANDARD = "Google Cloud TTS Standard"
306
+ NEURAL2 = "Google Cloud TTS Neural2"
307
+ POLYGLOT_PREVIEW = "Google Cloud TTS Polyglot (Preview)"
308
+
309
+
310
+ # Deepgram TTS Models
311
+ class DeepgramTTSModels:
312
+ """Deepgram TTS model constants."""
313
+ AURA_2 = "Deepgram Aura-2"
314
+ AURA_1 = "Deepgram Aura-1"
315
+ # Growth tier models
316
+ GROWTH_AURA_2 = "Deepgram Growth Aura-2"
317
+ GROWTH_AURA_1 = "Deepgram Growth Aura-1"
201
318
 
202
319
 
320
+ # Eleven Labs TTS Models
321
+ class ElevenLabsTTSModels:
322
+ """Eleven Labs TTS model constants."""
323
+ BUSINESS_MULTILINGUAL_V2_V3 = "Eleven Labs Business Multilingual V2/V3"
203
324
 
204
325
 
205
326
  def is_model_supported(model: str) -> bool:
paygent_sdk/models.py CHANGED
@@ -15,6 +15,7 @@ from .constants import (
15
15
  MistralAIModels,
16
16
  CohereModels,
17
17
  DeepSeekModels,
18
+ MoonshotAIModels,
18
19
  )
19
20
 
20
21
 
@@ -26,6 +27,7 @@ class UsageData:
26
27
  prompt_tokens: int
27
28
  completion_tokens: int
28
29
  total_tokens: int
30
+ cached_tokens: Optional[int] = None # Optional cached tokens
29
31
 
30
32
 
31
33
  @dataclass
@@ -51,6 +53,35 @@ class ModelPricing:
51
53
  """Represents pricing information for different models."""
52
54
  prompt_tokens_cost: float
53
55
  completion_tokens_cost: float
56
+ cached_tokens_cost: Optional[float] = None # Optional cached token cost (if None, model doesn't support caching)
57
+
58
+
59
+ @dataclass
60
+ class SttUsageData:
61
+ """Represents the STT usage data structure."""
62
+ service_provider: str
63
+ model: str
64
+ audio_duration: int # Duration in seconds
65
+
66
+
67
+ @dataclass
68
+ class TtsUsageData:
69
+ """Represents the TTS usage data structure."""
70
+ service_provider: str
71
+ model: str
72
+ character_count: int # Number of characters
73
+
74
+
75
+ @dataclass
76
+ class SttModelPricing:
77
+ """Represents pricing information for STT models (cost per hour in USD)."""
78
+ cost_per_hour: float # Cost per hour in USD
79
+
80
+
81
+ @dataclass
82
+ class TtsModelPricing:
83
+ """Represents pricing information for TTS models (cost per 1 million characters in USD)."""
84
+ cost_per_million_characters: float # Cost per 1 million characters in USD
54
85
 
55
86
 
56
87
  # Default model pricing (cost per 1000 tokens in USD)
@@ -58,22 +89,27 @@ MODEL_PRICING: Dict[str, ModelPricing] = {
58
89
  # OpenAI Models (pricing per 1000 tokens)
59
90
  OpenAIModels.GPT_5: ModelPricing(
60
91
  prompt_tokens_cost=0.00125, # $0.00125 per 1000 tokens
92
+ cached_tokens_cost=0.000125, # 90% discount for cached tokens
61
93
  completion_tokens_cost=0.01 # $0.01 per 1000 tokens
62
94
  ),
63
95
  OpenAIModels.GPT_5_MINI: ModelPricing(
64
96
  prompt_tokens_cost=0.00025, # $0.00025 per 1000 tokens
97
+ cached_tokens_cost=0.000025, # 90% discount for cached tokens
65
98
  completion_tokens_cost=0.002 # $0.002 per 1000 tokens
66
99
  ),
67
100
  OpenAIModels.GPT_5_NANO: ModelPricing(
68
101
  prompt_tokens_cost=0.00005, # $0.00005 per 1000 tokens
102
+ cached_tokens_cost=0.000005, # 90% discount for cached tokens
69
103
  completion_tokens_cost=0.0004 # $0.0004 per 1000 tokens
70
104
  ),
71
105
  OpenAIModels.GPT_5_CHAT_LATEST: ModelPricing(
72
106
  prompt_tokens_cost=0.00125, # $0.00125 per 1000 tokens
107
+ cached_tokens_cost=0.000125, # 90% discount for cached tokens
73
108
  completion_tokens_cost=0.01 # $0.01 per 1000 tokens
74
109
  ),
75
110
  OpenAIModels.GPT_5_CODEX: ModelPricing(
76
111
  prompt_tokens_cost=0.00125, # $0.00125 per 1000 tokens
112
+ cached_tokens_cost=0.000125, # 90% discount for cached tokens
77
113
  completion_tokens_cost=0.01 # $0.01 per 1000 tokens
78
114
  ),
79
115
  OpenAIModels.GPT_5_PRO: ModelPricing(
@@ -86,26 +122,32 @@ MODEL_PRICING: Dict[str, ModelPricing] = {
86
122
  ),
87
123
  OpenAIModels.GPT_4_1: ModelPricing(
88
124
  prompt_tokens_cost=0.002, # $0.002 per 1000 tokens
125
+ cached_tokens_cost=0.0005, # 50% discount for cached tokens
89
126
  completion_tokens_cost=0.008 # $0.008 per 1000 tokens
90
127
  ),
91
128
  OpenAIModels.GPT_4_1_MINI: ModelPricing(
92
129
  prompt_tokens_cost=0.0004, # $0.0004 per 1000 tokens
130
+ cached_tokens_cost=0.0001, # 50% discount for cached tokens
93
131
  completion_tokens_cost=0.0016 # $0.0016 per 1000 tokens
94
132
  ),
95
133
  OpenAIModels.GPT_4_1_NANO: ModelPricing(
96
134
  prompt_tokens_cost=0.0001, # $0.0001 per 1000 tokens
135
+ cached_tokens_cost=0.000025, # 50% discount for cached tokens
97
136
  completion_tokens_cost=0.0004 # $0.0004 per 1000 tokens
98
137
  ),
99
138
  OpenAIModels.GPT_4O: ModelPricing(
100
139
  prompt_tokens_cost=0.0025, # $0.0025 per 1000 tokens
140
+ cached_tokens_cost=0.00125, # 50% discount for cached tok
101
141
  completion_tokens_cost=0.01 # $0.01 per 1000 tokens
102
142
  ),
103
143
  OpenAIModels.GPT_4O_2024_05_13: ModelPricing(
104
144
  prompt_tokens_cost=0.005, # $0.005 per 1000 tokens
145
+ cached_tokens_cost=0.0025, # 50% discount for cached tokens
105
146
  completion_tokens_cost=0.015 # $0.015 per 1000 tokens
106
147
  ),
107
148
  OpenAIModels.GPT_4O_MINI: ModelPricing(
108
149
  prompt_tokens_cost=0.00015, # $0.00015 per 1000 tokens
150
+ cached_tokens_cost=0.000075, # 50% discount for cached tokens
109
151
  completion_tokens_cost=0.0006 # $0.0006 per 1000 tokens
110
152
  ),
111
153
  OpenAIModels.GPT_REALTIME: ModelPricing(
@@ -118,10 +160,12 @@ MODEL_PRICING: Dict[str, ModelPricing] = {
118
160
  ),
119
161
  OpenAIModels.GPT_4O_REALTIME_PREVIEW: ModelPricing(
120
162
  prompt_tokens_cost=0.005, # $0.005 per 1000 tokens
163
+ cached_tokens_cost=0.0025, # 50% discount for cached tokens
121
164
  completion_tokens_cost=0.02 # $0.02 per 1000 tokens
122
165
  ),
123
166
  OpenAIModels.GPT_4O_MINI_REALTIME_PREVIEW: ModelPricing(
124
167
  prompt_tokens_cost=0.0006, # $0.0006 per 1000 tokens
168
+ cached_tokens_cost=0.0003, # 50% discount for cached tokens
125
169
  completion_tokens_cost=0.0024 # $0.0024 per 1000 tokens
126
170
  ),
127
171
  OpenAIModels.GPT_AUDIO: ModelPricing(
@@ -482,4 +526,16 @@ MODEL_PRICING: Dict[str, ModelPricing] = {
482
526
  prompt_tokens_cost=0.000028, # $0.000028 per 1000 tokens
483
527
  completion_tokens_cost=0.00042 # $0.00042 per 1000 tokens
484
528
  ),
529
+
530
+ # Moonshot AI / Kimi Models (pricing per 1000 tokens)
531
+ MoonshotAIModels.KIMI_K2_INSTRUCT_0905: ModelPricing(
532
+ prompt_tokens_cost=0.001, # $0.001 per 1000 tokens
533
+ cached_tokens_cost=0.0005, # $0.0005 per 1000 tokens (as specified)
534
+ completion_tokens_cost=0.003 # $0.003 per 1000 tokens
535
+ ),
536
+ MoonshotAIModels.KIMI_K2_0905_1T_256K: ModelPricing(
537
+ prompt_tokens_cost=0.001, # $0.001 per 1000 tokens
538
+ # cached_tokens_cost not specified - model doesn't support cached tokens
539
+ completion_tokens_cost=0.003 # $0.003 per 1000 tokens
540
+ ),
485
541
  }