genai-otel-instrument 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. genai_otel/__init__.py +132 -0
  2. genai_otel/__version__.py +34 -0
  3. genai_otel/auto_instrument.py +602 -0
  4. genai_otel/cli.py +92 -0
  5. genai_otel/config.py +333 -0
  6. genai_otel/cost_calculator.py +467 -0
  7. genai_otel/cost_enriching_exporter.py +207 -0
  8. genai_otel/cost_enrichment_processor.py +174 -0
  9. genai_otel/evaluation/__init__.py +76 -0
  10. genai_otel/evaluation/bias_detector.py +364 -0
  11. genai_otel/evaluation/config.py +261 -0
  12. genai_otel/evaluation/hallucination_detector.py +525 -0
  13. genai_otel/evaluation/pii_detector.py +356 -0
  14. genai_otel/evaluation/prompt_injection_detector.py +262 -0
  15. genai_otel/evaluation/restricted_topics_detector.py +316 -0
  16. genai_otel/evaluation/span_processor.py +962 -0
  17. genai_otel/evaluation/toxicity_detector.py +406 -0
  18. genai_otel/exceptions.py +17 -0
  19. genai_otel/gpu_metrics.py +516 -0
  20. genai_otel/instrumentors/__init__.py +71 -0
  21. genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
  22. genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
  23. genai_otel/instrumentors/autogen_instrumentor.py +394 -0
  24. genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
  25. genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
  26. genai_otel/instrumentors/base.py +919 -0
  27. genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
  28. genai_otel/instrumentors/cohere_instrumentor.py +140 -0
  29. genai_otel/instrumentors/crewai_instrumentor.py +311 -0
  30. genai_otel/instrumentors/dspy_instrumentor.py +661 -0
  31. genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
  32. genai_otel/instrumentors/groq_instrumentor.py +106 -0
  33. genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
  34. genai_otel/instrumentors/haystack_instrumentor.py +503 -0
  35. genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
  36. genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
  37. genai_otel/instrumentors/instructor_instrumentor.py +425 -0
  38. genai_otel/instrumentors/langchain_instrumentor.py +340 -0
  39. genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
  40. genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
  41. genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
  42. genai_otel/instrumentors/ollama_instrumentor.py +197 -0
  43. genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
  44. genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
  45. genai_otel/instrumentors/openai_instrumentor.py +260 -0
  46. genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
  47. genai_otel/instrumentors/replicate_instrumentor.py +87 -0
  48. genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
  49. genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
  50. genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
  51. genai_otel/llm_pricing.json +1676 -0
  52. genai_otel/logging_config.py +45 -0
  53. genai_otel/mcp_instrumentors/__init__.py +14 -0
  54. genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
  55. genai_otel/mcp_instrumentors/base.py +105 -0
  56. genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
  57. genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
  58. genai_otel/mcp_instrumentors/manager.py +139 -0
  59. genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
  60. genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
  61. genai_otel/metrics.py +148 -0
  62. genai_otel/py.typed +2 -0
  63. genai_otel/server_metrics.py +197 -0
  64. genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
  65. genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
  66. genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
  67. genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
  68. genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
  69. genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1676 @@
1
+ {
2
+ "embeddings": {
3
+ "text-embedding-ada-002": 0.0001,
4
+ "text-embedding-3-small": 2e-05,
5
+ "text-embedding-3-large": 0.00013,
6
+ "ada": 0.0001,
7
+ "ada-v2": 0.0001,
8
+ "text-ada-001": 0.0001,
9
+ "azure_text-embedding-ada-002": 0.0001,
10
+ "azure_text-embedding-3-small": 2e-05,
11
+ "azure_text-embedding-3-large": 0.00013,
12
+ "azure_ada": 0.0001,
13
+ "azure_ada-v2": 0.0001,
14
+ "azure_text-ada-001": 0.0001,
15
+ "embed-english-v3.0": 0.0001,
16
+ "embed-multilingual-v3.0": 0.0001,
17
+ "embed-english-light-v3.0": 0.0001,
18
+ "embed-multilingual-light-v3.0": 0.0001,
19
+ "embed-english-v2.0": 0.0001,
20
+ "embed-english-light-v2.0": 0.0001,
21
+ "embed-multilingual-v2.0": 0.0001,
22
+ "mistral-embed": 0.0001,
23
+ "amazon.titan-embed-text-v1": 0.0001,
24
+ "amazon.titan-embed-text-v2": 2e-05,
25
+ "textembedding-gecko": 0.0001,
26
+ "textembedding-gecko@001": 0.0001,
27
+ "textembedding-gecko@002": 0.0001,
28
+ "textembedding-gecko@003": 0.0001,
29
+ "google/embeddinggemma-300m": {
30
+ "promptPrice": 2e-05,
31
+ "completionPrice": 2e-05,
32
+ "note": "Googles embedding model - already noted in earlier analysis"
33
+ },
34
+ "Snowflake/snowflake-arctic-embed-m": {
35
+ "promptPrice": 3e-05,
36
+ "completionPrice": 3e-05,
37
+ "note": "Medium Arctic embedding - 496K downloads"
38
+ },
39
+ "Snowflake/snowflake-arctic-embed-s": {
40
+ "promptPrice": 2e-05,
41
+ "completionPrice": 2e-05,
42
+ "note": "Small Arctic embedding"
43
+ },
44
+ "Snowflake/snowflake-arctic-embed-m-v2.0": {
45
+ "promptPrice": 3e-05,
46
+ "completionPrice": 3e-05,
47
+ "note": "Medium v2.0 Arctic embedding"
48
+ },
49
+ "Snowflake/snowflake-arctic-embed-xs": {
50
+ "promptPrice": 1e-05,
51
+ "completionPrice": 1e-05,
52
+ "note": "Extra small Arctic embedding"
53
+ },
54
+ "nvidia/NV-Embed-v2": {
55
+ "promptPrice": 5e-05,
56
+ "completionPrice": 5e-05,
57
+ "note": "NV-Embed v2 - high-quality text embeddings, 198K downloads"
58
+ },
59
+ "nvidia/llama-embed-nemotron-8b": {
60
+ "promptPrice": 6e-05,
61
+ "completionPrice": 6e-05,
62
+ "note": "Llama-based embedding model - multilingual"
63
+ },
64
+ "nvidia/omni-embed-nemotron-3b": {
65
+ "promptPrice": 4e-05,
66
+ "completionPrice": 4e-05,
67
+ "note": "Multimodal embedding - text, image, video, audio"
68
+ }
69
+ },
70
+ "images": {
71
+ "dall-e-3": {
72
+ "standard": {
73
+ "1024x1024": 0.04,
74
+ "1024x1792": 0.02,
75
+ "1792x1024": 0.08
76
+ },
77
+ "hd": {
78
+ "1024x1024": 0.08,
79
+ "1024x1792": 0.12,
80
+ "1792x1024": 0.12
81
+ }
82
+ },
83
+ "azure_dall-e-3": {
84
+ "standard": {
85
+ "1024x1024": 0.04,
86
+ "1024x1792": 0.02,
87
+ "1792x1024": 0.08
88
+ },
89
+ "hd": {
90
+ "1024x1024": 0.08,
91
+ "1024x1792": 0.12,
92
+ "1792x1024": 0.12
93
+ }
94
+ },
95
+ "dall-e-2": {
96
+ "standard": {
97
+ "1024x1024": 0.02,
98
+ "512x512": 0.018,
99
+ "256x256": 0.016
100
+ }
101
+ },
102
+ "black-forest-labs/FLUX.1-dev": {
103
+ "standard": {
104
+ "1000000": 0.025
105
+ }
106
+ },
107
+ "black-forest-labs/FLUX.1-canny": {
108
+ "standard": {
109
+ "1000000": 0.025
110
+ }
111
+ },
112
+ "black-forest-labs/FLUX.1-depth": {
113
+ "standard": {
114
+ "1000000": 0.025
115
+ }
116
+ },
117
+ "black-forest-labs/FLUX.1-redux": {
118
+ "standard": {
119
+ "1000000": 0.025
120
+ }
121
+ },
122
+ "black-forest-labs/FLUX.1-schnell": {
123
+ "standard": {
124
+ "1000000": 0.0027
125
+ }
126
+ },
127
+ "black-forest-labs/FLUX.1-pro": {
128
+ "standard": {
129
+ "1000000": 0.04
130
+ }
131
+ },
132
+ "black-forest-labs/FLUX.1.1-pro": {
133
+ "standard": {
134
+ "1000000": 0.05
135
+ }
136
+ },
137
+ "stabilityai/stable-diffusion-xl-base-1.0": {
138
+ "standard": {
139
+ "512X512": 0.001,
140
+ "1024x1024": 0.01
141
+ }
142
+ },
143
+ "amazon.titan-image-generator-v1": {
144
+ "standard": {
145
+ "512x512": 0.008,
146
+ "1024x1024": 0.01
147
+ },
148
+ "premium": {
149
+ "512x512": 0.01,
150
+ "1024x1024": 0.012
151
+ }
152
+ },
153
+ "grok-image": {
154
+ "standard": {
155
+ "per_image": 0.07
156
+ }
157
+ },
158
+ "xai-grok-image": {
159
+ "standard": {
160
+ "per_image": 0.07
161
+ }
162
+ }
163
+ },
164
+ "audio": {
165
+ "tts-1": 0.015,
166
+ "tts-1-hd": 0.03,
167
+ "eleven_multilingual_v2": 0.24,
168
+ "eleven_multilingual_v1": 0.24,
169
+ "eleven_monolingual_v1": 0.24,
170
+ "eleven_english_v1": 0.24,
171
+ "eleven_turbo_v2": 0.24,
172
+ "eleven_english_sts_v2": 0.24,
173
+ "eleven_multilingual_sts_v2": 0.24,
174
+ "best": 0.00010277777,
175
+ "nano": 3.333333e-05
176
+ },
177
+ "chat": {
178
+ "gpt-4.1-nano-2025-04-14": {
179
+ "promptPrice": 0.0001,
180
+ "completionPrice": 0.0004
181
+ },
182
+ "gpt-4.1-nano": {
183
+ "promptPrice": 0.0001,
184
+ "completionPrice": 0.0004
185
+ },
186
+ "gpt-4.1-mini-2025-04-14": {
187
+ "promptPrice": 0.0004,
188
+ "completionPrice": 0.0016
189
+ },
190
+ "gpt-4.1-mini": {
191
+ "promptPrice": 0.0004,
192
+ "completionPrice": 0.0016
193
+ },
194
+ "gpt-4.1-2025-04-14": {
195
+ "promptPrice": 0.002,
196
+ "completionPrice": 0.008
197
+ },
198
+ "gpt-4.1": {
199
+ "promptPrice": 0.002,
200
+ "completionPrice": 0.008
201
+ },
202
+ "gpt-5": {
203
+ "promptPrice": 0.00125,
204
+ "completionPrice": 0.01
205
+ },
206
+ "gpt-5-2025-08-07": {
207
+ "promptPrice": 0.00125,
208
+ "completionPrice": 0.01
209
+ },
210
+ "gpt-5-mini": {
211
+ "promptPrice": 0.00025,
212
+ "completionPrice": 0.002
213
+ },
214
+ "gpt-5-nano": {
215
+ "promptPrice": 0.0001,
216
+ "completionPrice": 0.0004
217
+ },
218
+ "gpt-4o": {
219
+ "promptPrice": 0.0005,
220
+ "completionPrice": 0.0015
221
+ },
222
+ "gpt-4o-2024-08-06": {
223
+ "promptPrice": 0.0025,
224
+ "completionPrice": 0.01
225
+ },
226
+ "gpt-4o-2024-05-13": {
227
+ "promptPrice": 0.005,
228
+ "completionPrice": 0.015
229
+ },
230
+ "gpt-4o-mini": {
231
+ "promptPrice": 0.00015,
232
+ "completionPrice": 0.0006
233
+ },
234
+ "gpt-4o-mini-2024-07-18": {
235
+ "promptPrice": 0.00015,
236
+ "completionPrice": 0.0006
237
+ },
238
+ "o1-mini": {
239
+ "promptPrice": 0.003,
240
+ "completionPrice": 0.012
241
+ },
242
+ "o1-mini-2024-09-12": {
243
+ "promptPrice": 0.003,
244
+ "completionPrice": 0.012
245
+ },
246
+ "o1-preview": {
247
+ "promptPrice": 0.015,
248
+ "completionPrice": 0.06
249
+ },
250
+ "o1-preview-2024-09-12": {
251
+ "promptPrice": 0.015,
252
+ "completionPrice": 0.06
253
+ },
254
+ "gpt-3.5-turbo": {
255
+ "promptPrice": 0.0005,
256
+ "completionPrice": 0.0015
257
+ },
258
+ "gpt-3.5-turbo-0125": {
259
+ "promptPrice": 0.0005,
260
+ "completionPrice": 0.0015
261
+ },
262
+ "azure_gpt-35-turbo": {
263
+ "promptPrice": 0.0005,
264
+ "completionPrice": 0.0015
265
+ },
266
+ "azure_gpt-35-turbo-16k": {
267
+ "promptPrice": 0.0005,
268
+ "completionPrice": 0.0015
269
+ },
270
+ "azure_gpt-35-turbo-instruct": {
271
+ "promptPrice": 0.0015,
272
+ "completionPrice": 0.002
273
+ },
274
+ "gpt-4": {
275
+ "promptPrice": 0.03,
276
+ "completionPrice": 0.06
277
+ },
278
+ "gpt-4-turbo": {
279
+ "promptPrice": 0.01,
280
+ "completionPrice": 0.03
281
+ },
282
+ "gpt-4-32k": {
283
+ "promptPrice": 0.06,
284
+ "completionPrice": 0.12
285
+ },
286
+ "gpt-4-1106-preview": {
287
+ "promptPrice": 0.01,
288
+ "completionPrice": 0.03
289
+ },
290
+ "gpt-4-0125-preview": {
291
+ "promptPrice": 0.01,
292
+ "completionPrice": 0.03
293
+ },
294
+ "gpt-4-preview": {
295
+ "promptPrice": 0.01,
296
+ "completionPrice": 0.03
297
+ },
298
+ "gpt-4-1106-vision-preview": {
299
+ "promptPrice": 0.01,
300
+ "completionPrice": 0.03
301
+ },
302
+ "gpt-4-vision-preview": {
303
+ "promptPrice": 0.01,
304
+ "completionPrice": 0.03
305
+ },
306
+ "azure_gpt-4": {
307
+ "promptPrice": 0.03,
308
+ "completionPrice": 0.06
309
+ },
310
+ "azure_gpt-4-32k": {
311
+ "promptPrice": 0.06,
312
+ "completionPrice": 0.12
313
+ },
314
+ "claude-3-opus-20240229": {
315
+ "promptPrice": 0.015,
316
+ "completionPrice": 0.075
317
+ },
318
+ "claude-4-opus": {
319
+ "promptPrice": 0.015,
320
+ "completionPrice": 0.075
321
+ },
322
+ "claude-opus-4": {
323
+ "promptPrice": 0.015,
324
+ "completionPrice": 0.075
325
+ },
326
+ "claude-opus-4-1": {
327
+ "promptPrice": 0.015,
328
+ "completionPrice": 0.075
329
+ },
330
+ "claude-opus-4.1": {
331
+ "promptPrice": 0.015,
332
+ "completionPrice": 0.075
333
+ },
334
+ "claude-3-sonnet-20240229": {
335
+ "promptPrice": 0.003,
336
+ "completionPrice": 0.015
337
+ },
338
+ "claude-3-haiku-20240307": {
339
+ "promptPrice": 0.00025,
340
+ "completionPrice": 0.00125
341
+ },
342
+ "claude-3-5-sonnet-20240620": {
343
+ "promptPrice": 0.003,
344
+ "completionPrice": 0.015
345
+ },
346
+ "claude-3-5-sonnet-20241022": {
347
+ "promptPrice": 0.003,
348
+ "completionPrice": 0.015
349
+ },
350
+ "claude-3-5-haiku-20241022": {
351
+ "promptPrice": 0.0008,
352
+ "completionPrice": 0.004
353
+ },
354
+ "claude-sonnet-4-5": {
355
+ "promptPrice": 0.003,
356
+ "completionPrice": 0.015
357
+ },
358
+ "claude-sonnet-4-5-20250929": {
359
+ "promptPrice": 0.003,
360
+ "completionPrice": 0.015
361
+ },
362
+ "claude-3-7-sonnet": {
363
+ "promptPrice": 0.003,
364
+ "completionPrice": 0.015
365
+ },
366
+ "claude-haiku-4-5": {
367
+ "promptPrice": 0.001,
368
+ "completionPrice": 0.005
369
+ },
370
+ "command": {
371
+ "promptPrice": 0.001,
372
+ "completionPrice": 0.002
373
+ },
374
+ "command-nightly": {
375
+ "promptPrice": 0.001,
376
+ "completionPrice": 0.002
377
+ },
378
+ "command-light": {
379
+ "promptPrice": 0.0003,
380
+ "completionPrice": 0.0006
381
+ },
382
+ "command-light-nightly": {
383
+ "promptPrice": 0.0003,
384
+ "completionPrice": 0.0006
385
+ },
386
+ "command-r-plus": {
387
+ "promptPrice": 0.0025,
388
+ "completionPrice": 0.01
389
+ },
390
+ "command-r+": {
391
+ "promptPrice": 0.0025,
392
+ "completionPrice": 0.01
393
+ },
394
+ "command-r": {
395
+ "promptPrice": 0.00015,
396
+ "completionPrice": 0.0006
397
+ },
398
+ "command-r7b": {
399
+ "promptPrice": 3.75e-05,
400
+ "completionPrice": 0.00015
401
+ },
402
+ "command-a": {
403
+ "promptPrice": 0.0025,
404
+ "completionPrice": 0.01
405
+ },
406
+ "open-mistral-7b": {
407
+ "promptPrice": 0.00025,
408
+ "completionPrice": 0.00025
409
+ },
410
+ "open-mixtral-8x7b": {
411
+ "promptPrice": 0.0007,
412
+ "completionPrice": 0.0007
413
+ },
414
+ "mistral-small-latest": {
415
+ "promptPrice": 0.002,
416
+ "completionPrice": 0.006
417
+ },
418
+ "mistral-medium-latest": {
419
+ "promptPrice": 0.0027,
420
+ "completionPrice": 0.0081
421
+ },
422
+ "mistral-large-latest": {
423
+ "promptPrice": 0.008,
424
+ "completionPrice": 0.024
425
+ },
426
+ "amazon.titan-text-express-v1": {
427
+ "promptPrice": 0.0008,
428
+ "completionPrice": 0.0016
429
+ },
430
+ "amazon.titan-text-lite-v1": {
431
+ "promptPrice": 0.0003,
432
+ "completionPrice": 0.0004
433
+ },
434
+ "mistral.mistral-7b-instruct-v0:2": {
435
+ "promptPrice": 0.00015,
436
+ "completionPrice": 0.0002
437
+ },
438
+ "mistral.mixtral-8x7b-instruct-v0:1": {
439
+ "promptPrice": 0.00045,
440
+ "completionPrice": 0.0007
441
+ },
442
+ "mistral.mistral-large-2402-v1:0": {
443
+ "promptPrice": 0.008,
444
+ "completionPrice": 0.024
445
+ },
446
+ "anthropic.claude-3-haiku-20240307-v1:0": {
447
+ "promptPrice": 0.00025,
448
+ "completionPrice": 0.00125
449
+ },
450
+ "anthropic.claude-v2": {
451
+ "promptPrice": 0.008,
452
+ "completionPrice": 0.024
453
+ },
454
+ "anthropic.claude-3-sonnet-20240229-v1:0": {
455
+ "promptPrice": 0.003,
456
+ "completionPrice": 0.015
457
+ },
458
+ "anthropic.claude-3-opus-20240229-v1:0": {
459
+ "promptPrice": 0.015,
460
+ "completionPrice": 0.075
461
+ },
462
+ "meta.llama3-8b-instruct-v1:0": {
463
+ "promptPrice": 0.0004,
464
+ "completionPrice": 0.0006
465
+ },
466
+ "meta.llama3-70b-instruct-v1:0": {
467
+ "promptPrice": 0.00265,
468
+ "completionPrice": 0.0035
469
+ },
470
+ "meta.llama2-13b-chat-v1": {
471
+ "promptPrice": 0.00075,
472
+ "completionPrice": 0.001
473
+ },
474
+ "meta.llama2-70b-chat-v1": {
475
+ "promptPrice": 0.00195,
476
+ "completionPrice": 0.00256
477
+ },
478
+ "cohere.command-text-v14": {
479
+ "promptPrice": 0.0015,
480
+ "completionPrice": 0.002
481
+ },
482
+ "cohere.command-light-text-v14": {
483
+ "promptPrice": 0.0003,
484
+ "completionPrice": 0.0006
485
+ },
486
+ "ai21.j2-mid-v1": {
487
+ "promptPrice": 0.0125,
488
+ "completionPrice": 0.0125
489
+ },
490
+ "ai21.j2-ultra-v1": {
491
+ "promptPrice": 0.0188,
492
+ "completionPrice": 0.0188
493
+ },
494
+ "gemini-1.0-pro": {
495
+ "promptPrice": 0.0005,
496
+ "completionPrice": 0.0015
497
+ },
498
+ "gemini-1.5-flash": {
499
+ "promptPrice": 7.5e-05,
500
+ "completionPrice": 0.0003
501
+ },
502
+ "gemini-1.5-pro": {
503
+ "promptPrice": 0.00125,
504
+ "completionPrice": 0.005
505
+ },
506
+ "gemini-1.0-pro-002": {
507
+ "promptPrice": 0.0005,
508
+ "completionPrice": 0.0015
509
+ },
510
+ "gemini-1.0-pro-001": {
511
+ "promptPrice": 0.0005,
512
+ "completionPrice": 0.0015
513
+ },
514
+ "gemini-1.5-pro-preview-0409": {
515
+ "promptPrice": 0.005,
516
+ "completionPrice": 0.015
517
+ },
518
+ "gemini-1.5-pro-preview-0514": {
519
+ "promptPrice": 0.005,
520
+ "completionPrice": 0.015
521
+ },
522
+ "gemini-1.5-flash-preview-0514": {
523
+ "promptPrice": 0.0005,
524
+ "completionPrice": 0.0015
525
+ },
526
+ "gemini-2.0-flash": {
527
+ "promptPrice": 0.0001,
528
+ "completionPrice": 0.0004
529
+ },
530
+ "gemini-2.0-flash-lite": {
531
+ "promptPrice": 7.5e-05,
532
+ "completionPrice": 0.0003
533
+ },
534
+ "gemini-2.5-flash": {
535
+ "promptPrice": 0.0003,
536
+ "completionPrice": 0.0025
537
+ },
538
+ "gemini-2.5-flash-preview": {
539
+ "promptPrice": 0.0003,
540
+ "completionPrice": 0.0025
541
+ },
542
+ "gemini-2.5-flash-lite": {
543
+ "promptPrice": 0.0001,
544
+ "completionPrice": 0.0004
545
+ },
546
+ "gemini-2.5-flash-lite-preview": {
547
+ "promptPrice": 0.0001,
548
+ "completionPrice": 0.0004
549
+ },
550
+ "gemini-2.5-pro": {
551
+ "promptPrice": 0.00125,
552
+ "completionPrice": 0.01
553
+ },
554
+ "gemini-2-5-flash-image": {
555
+ "promptPrice": 0.0003,
556
+ "completionPrice": 0.03
557
+ },
558
+ "nano-banana": {
559
+ "promptPrice": 0.0003,
560
+ "completionPrice": 0.03
561
+ },
562
+ "text-bison": {
563
+ "promptPrice": 0.001,
564
+ "completionPrice": 0.002
565
+ },
566
+ "text-bison@002": {
567
+ "promptPrice": 0.001,
568
+ "completionPrice": 0.002
569
+ },
570
+ "text-bison-32k": {
571
+ "promptPrice": 0.001,
572
+ "completionPrice": 0.002
573
+ },
574
+ "text-bison-32k@002": {
575
+ "promptPrice": 0.001,
576
+ "completionPrice": 0.002
577
+ },
578
+ "text-unicorn": {
579
+ "promptPrice": 0.01,
580
+ "completionPrice": 0.03
581
+ },
582
+ "text-unicorn@001": {
583
+ "promptPrice": 0.01,
584
+ "completionPrice": 0.03
585
+ },
586
+ "chat-bison": {
587
+ "promptPrice": 0.001,
588
+ "completionPrice": 0.002
589
+ },
590
+ "chat-bison@002": {
591
+ "promptPrice": 0.001,
592
+ "completionPrice": 0.002
593
+ },
594
+ "chat-bison-32k": {
595
+ "promptPrice": 0.001,
596
+ "completionPrice": 0.002
597
+ },
598
+ "chat-bison-32k@002": {
599
+ "promptPrice": 0.001,
600
+ "completionPrice": 0.002
601
+ },
602
+ "llama3-8b-8192": {
603
+ "promptPrice": 5e-05,
604
+ "completionPrice": 0.0001
605
+ },
606
+ "llama3-70b-8192": {
607
+ "promptPrice": 0.00059,
608
+ "completionPrice": 0.00079
609
+ },
610
+ "mixtral-8x7b-32768": {
611
+ "promptPrice": 0.00024,
612
+ "completionPrice": 0.00024
613
+ },
614
+ "gemma-7b-it": {
615
+ "promptPrice": 0.0001,
616
+ "completionPrice": 0.0001
617
+ },
618
+ "reka-core": {
619
+ "promptPrice": 0.002,
620
+ "completionPrice": 0.002
621
+ },
622
+ "reka-core-20240415": {
623
+ "promptPrice": 0.002,
624
+ "completionPrice": 0.002
625
+ },
626
+ "reka-core-20240501": {
627
+ "promptPrice": 0.002,
628
+ "completionPrice": 0.002
629
+ },
630
+ "reka-flash": {
631
+ "promptPrice": 0.0002,
632
+ "completionPrice": 0.0008
633
+ },
634
+ "reka-flash-20240226": {
635
+ "promptPrice": 0.0002,
636
+ "completionPrice": 0.0008
637
+ },
638
+ "reka-edge": {
639
+ "promptPrice": 0.0001,
640
+ "completionPrice": 0.0001
641
+ },
642
+ "reka-edge-20240208": {
643
+ "promptPrice": 0.0001,
644
+ "completionPrice": 0.0001
645
+ },
646
+ "reka-spark": {
647
+ "promptPrice": 5e-05,
648
+ "completionPrice": 5e-05
649
+ },
650
+ "grok-beta": {
651
+ "promptPrice": 0.0005,
652
+ "completionPrice": 0.0015
653
+ },
654
+ "grok-vision-beta": {
655
+ "promptPrice": 0.0005,
656
+ "completionPrice": 0.0015
657
+ },
658
+ "grok-2-1212": {
659
+ "promptPrice": 0.002,
660
+ "completionPrice": 0.01
661
+ },
662
+ "grok-2-vision-1212": {
663
+ "promptPrice": 0.002,
664
+ "completionPrice": 0.01
665
+ },
666
+ "grok-3": {
667
+ "promptPrice": 0.003,
668
+ "completionPrice": 0.015
669
+ },
670
+ "grok-3-mini": {
671
+ "promptPrice": 0.0003,
672
+ "completionPrice": 0.0005
673
+ },
674
+ "grok-3-fast": {
675
+ "promptPrice": 0.005,
676
+ "completionPrice": 0.025
677
+ },
678
+ "grok-3-mini-fast": {
679
+ "promptPrice": 0.0006,
680
+ "completionPrice": 0.004
681
+ },
682
+ "grok-4": {
683
+ "promptPrice": 0.003,
684
+ "completionPrice": 0.015
685
+ },
686
+ "grok-4-fast": {
687
+ "promptPrice": 0.0002,
688
+ "completionPrice": 0.0005
689
+ },
690
+ "jamba-1.5-mini": {
691
+ "promptPrice": 0.0002,
692
+ "completionPrice": 0.0004
693
+ },
694
+ "jamba-1.5-large": {
695
+ "promptPrice": 0.002,
696
+ "completionPrice": 0.008
697
+ },
698
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo": {
699
+ "promptPrice": 0.00088,
700
+ "completionPrice": 0.00088
701
+ },
702
+ "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-vLLM-json": {
703
+ "promptPrice": 0.00088,
704
+ "completionPrice": 0.00088
705
+ },
706
+ "meta-llama/Meta-Llama-3.1-405B-Instruct-Lite-Pro-lora": {
707
+ "promptPrice": 0.00088,
708
+ "completionPrice": 0.00088
709
+ },
710
+ "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
711
+ "promptPrice": 0.00088,
712
+ "completionPrice": 0.00088
713
+ },
714
+ "Qwen/QwQ-32B-Preview": {
715
+ "promptPrice": 0.0012,
716
+ "completionPrice": 0.0012
717
+ },
718
+ "qwen3-next-80b-a3b-instruct": {
719
+ "promptPrice": 0.000525,
720
+ "completionPrice": 0.0021
721
+ },
722
+ "qwen3-next-80b-a3b-thinking": {
723
+ "promptPrice": 0.000525,
724
+ "completionPrice": 0.0063
725
+ },
726
+ "qwen3-coder-480b-a35b-instruct": {
727
+ "promptPrice": 0.001,
728
+ "completionPrice": 0.005
729
+ },
730
+ "qwen3-max": {
731
+ "promptPrice": 0.0012,
732
+ "completionPrice": 0.006
733
+ },
734
+ "qwen-qwen3-max": {
735
+ "promptPrice": 0.0012,
736
+ "completionPrice": 0.006
737
+ },
738
+ "codellama/CodeLlama-34b-Instruct-hf": {
739
+ "promptPrice": 0.0008,
740
+ "completionPrice": 0.0008
741
+ },
742
+ "databricks/dbrx-instruct": {
743
+ "promptPrice": 0.0012,
744
+ "completionPrice": 0.0012
745
+ },
746
+ "deepseek-ai/deepseek-llm-67b-chat": {
747
+ "promptPrice": 0.0009,
748
+ "completionPrice": 0.0009
749
+ },
750
+ "google/gemma-2b-it": {
751
+ "promptPrice": 0.0001,
752
+ "completionPrice": 0.0001
753
+ },
754
+ "google/gemma-2-27b-it": {
755
+ "promptPrice": 0.0008,
756
+ "completionPrice": 0.0008
757
+ },
758
+ "google/gemma-2-9b-it": {
759
+ "promptPrice": 0.0003,
760
+ "completionPrice": 0.0003
761
+ },
762
+ "Gryphe/MythoMax-L2-13b-Lite": {
763
+ "promptPrice": 0.0003,
764
+ "completionPrice": 0.0003
765
+ },
766
+ "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": {
767
+ "promptPrice": 0.0088,
768
+ "completionPrice": 0.0088
769
+ },
770
+ "deepseek-chat": {
771
+ "promptPrice": 0.00014,
772
+ "completionPrice": 0.00028
773
+ },
774
+ "deepseek-reasoner": {
775
+ "promptPrice": 0.00055,
776
+ "completionPrice": 0.00219
777
+ },
778
+ "llama-3.1-8b-instant": {
779
+ "promptPrice": 5e-05,
780
+ "completionPrice": 8e-05
781
+ },
782
+ "llama-3.3-70b-versatile": {
783
+ "promptPrice": 0.00059,
784
+ "completionPrice": 0.00079
785
+ },
786
+ "llama-4-scout": {
787
+ "promptPrice": 0.00015,
788
+ "completionPrice": 0.0005
789
+ },
790
+ "llama-4-scout-17bx16e-128k": {
791
+ "promptPrice": 0.00015,
792
+ "completionPrice": 0.0005
793
+ },
794
+ "llama-4-maverick": {
795
+ "promptPrice": 0.00022,
796
+ "completionPrice": 0.00085
797
+ },
798
+ "llama-4-maverick-17bx128e-128k": {
799
+ "promptPrice": 0.00022,
800
+ "completionPrice": 0.00085
801
+ },
802
+ "meta-llama/Llama-4-Scout": {
803
+ "promptPrice": 0.00018,
804
+ "completionPrice": 0.00059
805
+ },
806
+ "meta-llama/Llama-4-Maverick": {
807
+ "promptPrice": 0.00027,
808
+ "completionPrice": 0.00085
809
+ },
810
+ "llama-guard-4-12b": {
811
+ "promptPrice": 0.0002,
812
+ "completionPrice": 0.0002
813
+ },
814
+ "qwen3-32b": {
815
+ "promptPrice": 0.00029,
816
+ "completionPrice": 0.00059
817
+ },
818
+ "gpt-oss-20b": {
819
+ "promptPrice": 7.5e-05,
820
+ "completionPrice": 0.0003
821
+ },
822
+ "gpt-oss-120b": {
823
+ "promptPrice": 0.00015,
824
+ "completionPrice": 0.0006
825
+ },
826
+ "kimi-k2-0905": {
827
+ "promptPrice": 0.001,
828
+ "completionPrice": 0.003
829
+ },
830
+ "meta-llama/Llama-3.2-3B-Instruct-Turbo": {
831
+ "promptPrice": 6e-05,
832
+ "completionPrice": 6e-05
833
+ },
834
+ "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
835
+ "promptPrice": 0.00018,
836
+ "completionPrice": 0.00018
837
+ },
838
+ "meta-llama/Llama-3-8B-Instruct-Lite": {
839
+ "promptPrice": 0.0001,
840
+ "completionPrice": 0.0001
841
+ },
842
+ "DeepSeek-R1": {
843
+ "promptPrice": 0.003,
844
+ "completionPrice": 0.007
845
+ },
846
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": {
847
+ "promptPrice": 0.00018,
848
+ "completionPrice": 0.00018
849
+ },
850
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": {
851
+ "promptPrice": 0.002,
852
+ "completionPrice": 0.002
853
+ },
854
+ "deepseek-ai/DeepSeek-V3": {
855
+ "promptPrice": 0.00125,
856
+ "completionPrice": 0.00125
857
+ },
858
+ "deepseek-ai/DeepSeek-V3-1": {
859
+ "promptPrice": 0.0006,
860
+ "completionPrice": 0.0017
861
+ },
862
+ "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8": {
863
+ "promptPrice": 0.0002,
864
+ "completionPrice": 0.0006
865
+ },
866
+ "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8": {
867
+ "promptPrice": 0.00065,
868
+ "completionPrice": 0.003
869
+ },
870
+ "Qwen/Qwen2.5-72B-Instruct": {
871
+ "promptPrice": 0.0012,
872
+ "completionPrice": 0.0012
873
+ },
874
+ "Qwen/Qwen2.5-Coder-32B-Instruct": {
875
+ "promptPrice": 0.0008,
876
+ "completionPrice": 0.0008
877
+ },
878
+ "Qwen/Qwen2.5-7B-Instruct-Turbo": {
879
+ "promptPrice": 0.0003,
880
+ "completionPrice": 0.0003
881
+ },
882
+ "mistralai/Mistral-7B-Instruct-v0.2": {
883
+ "promptPrice": 0.0002,
884
+ "completionPrice": 0.0002
885
+ },
886
+ "mistralai/Mistral-Small-3": {
887
+ "promptPrice": 0.0008,
888
+ "completionPrice": 0.0008
889
+ },
890
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": {
891
+ "promptPrice": 0.0006,
892
+ "completionPrice": 0.0006
893
+ },
894
+ "THUDM/GLM-4.5-Air": {
895
+ "promptPrice": 0.0002,
896
+ "completionPrice": 0.0011
897
+ },
898
+ "Kimi-K2-Instruct": {
899
+ "promptPrice": 0.001,
900
+ "completionPrice": 0.003
901
+ },
902
+ "smollm2:135m": {
903
+ "promptPrice": 0.0001,
904
+ "completionPrice": 0.0002
905
+ },
906
+ "smollm2:360m": {
907
+ "promptPrice": 0.0001,
908
+ "completionPrice": 0.0002
909
+ },
910
+ "tinyllama": {
911
+ "promptPrice": 0.0003,
912
+ "completionPrice": 0.0006
913
+ },
914
+ "llama3.2:1b": {
915
+ "promptPrice": 0.0003,
916
+ "completionPrice": 0.0006
917
+ },
918
+ "llama3.2:3b": {
919
+ "promptPrice": 0.0003,
920
+ "completionPrice": 0.0006
921
+ },
922
+ "gemma:2b": {
923
+ "promptPrice": 0.0003,
924
+ "completionPrice": 0.0006
925
+ },
926
+ "gemma2:2b": {
927
+ "promptPrice": 0.0003,
928
+ "completionPrice": 0.0006
929
+ },
930
+ "phi3:3.8b": {
931
+ "promptPrice": 0.0003,
932
+ "completionPrice": 0.0006
933
+ },
934
+ "phi4:14b": {
935
+ "promptPrice": 0.0005,
936
+ "completionPrice": 0.001
937
+ },
938
+ "qwen2.5:0.5b": {
939
+ "promptPrice": 0.0001,
940
+ "completionPrice": 0.0002
941
+ },
942
+ "qwen2.5:1.5b": {
943
+ "promptPrice": 0.0003,
944
+ "completionPrice": 0.0006
945
+ },
946
+ "qwen2.5:3b": {
947
+ "promptPrice": 0.0003,
948
+ "completionPrice": 0.0006
949
+ },
950
+ "qwen2.5:7b": {
951
+ "promptPrice": 0.0003,
952
+ "completionPrice": 0.0006
953
+ },
954
+ "qwen3:4b": {
955
+ "promptPrice": 0.0003,
956
+ "completionPrice": 0.0006
957
+ },
958
+ "qwen3:8b": {
959
+ "promptPrice": 0.0003,
960
+ "completionPrice": 0.0006
961
+ },
962
+ "llama2:7b": {
963
+ "promptPrice": 0.0003,
964
+ "completionPrice": 0.0006
965
+ },
966
+ "llama3:8b": {
967
+ "promptPrice": 0.0003,
968
+ "completionPrice": 0.0006
969
+ },
970
+ "llama3.1:8b": {
971
+ "promptPrice": 0.0003,
972
+ "completionPrice": 0.0006
973
+ },
974
+ "mistral:7b": {
975
+ "promptPrice": 0.0003,
976
+ "completionPrice": 0.0006
977
+ },
978
+ "gemma:7b": {
979
+ "promptPrice": 0.0003,
980
+ "completionPrice": 0.0006
981
+ },
982
+ "gemma2:9b": {
983
+ "promptPrice": 0.0003,
984
+ "completionPrice": 0.0006
985
+ },
986
+ "codellama:7b": {
987
+ "promptPrice": 0.0003,
988
+ "completionPrice": 0.0006
989
+ },
990
+ "llama2:13b": {
991
+ "promptPrice": 0.0005,
992
+ "completionPrice": 0.001
993
+ },
994
+ "codellama:13b": {
995
+ "promptPrice": 0.0005,
996
+ "completionPrice": 0.001
997
+ },
998
+ "llama2:70b": {
999
+ "promptPrice": 0.0008,
1000
+ "completionPrice": 0.0008
1001
+ },
1002
+ "llama3:70b": {
1003
+ "promptPrice": 0.0008,
1004
+ "completionPrice": 0.0008
1005
+ },
1006
+ "llama3.1:70b": {
1007
+ "promptPrice": 0.0008,
1008
+ "completionPrice": 0.0008
1009
+ },
1010
+ "qwen2.5:72b": {
1011
+ "promptPrice": 0.0008,
1012
+ "completionPrice": 0.0008
1013
+ },
1014
+ "codellama:34b": {
1015
+ "promptPrice": 0.0008,
1016
+ "completionPrice": 0.0008
1017
+ },
1018
+ "mixtral:8x7b": {
1019
+ "promptPrice": 0.0008,
1020
+ "completionPrice": 0.0008
1021
+ },
1022
+ "deepseek-r1:7b": {
1023
+ "promptPrice": 0.0003,
1024
+ "completionPrice": 0.0006
1025
+ },
1026
+ "deepseek-r1:14b": {
1027
+ "promptPrice": 0.0005,
1028
+ "completionPrice": 0.001
1029
+ },
1030
+ "deepseek-r1:32b": {
1031
+ "promptPrice": 0.0008,
1032
+ "completionPrice": 0.0008
1033
+ },
1034
+ "deepseek-r1:70b": {
1035
+ "promptPrice": 0.0008,
1036
+ "completionPrice": 0.0008
1037
+ },
1038
+ "ibm-granite-3-1-8b-instruct": {
1039
+ "promptPrice": 0.0002,
1040
+ "completionPrice": 0.0002
1041
+ },
1042
+ "ibm-granite-3-8b-instruct": {
1043
+ "promptPrice": 0.0002,
1044
+ "completionPrice": 0.0002
1045
+ },
1046
+ "granite-3-8b-instruct": {
1047
+ "promptPrice": 0.0002,
1048
+ "completionPrice": 0.0002
1049
+ },
1050
+ "granite-embedding-107m-multilingual": {
1051
+ "promptPrice": 0.0001,
1052
+ "completionPrice": 0.0001
1053
+ },
1054
+ "granite-embedding-278m-multilingual": {
1055
+ "promptPrice": 0.0001,
1056
+ "completionPrice": 0.0001
1057
+ },
1058
+ "deepseek-v3.1": {
1059
+ "promptPrice": 0.00056,
1060
+ "completionPrice": 0.00168
1061
+ },
1062
+ "sarvam-m": {
1063
+ "promptPrice": 0,
1064
+ "completionPrice": 0
1065
+ },
1066
+ "sarvamai/sarvam-m": {
1067
+ "promptPrice": 0,
1068
+ "completionPrice": 0
1069
+ },
1070
+ "sarvam-chat": {
1071
+ "promptPrice": 0,
1072
+ "completionPrice": 0
1073
+ },
1074
+ "granite-4-0-h-small": {
1075
+ "promptPrice": 0.0002,
1076
+ "completionPrice": 0.0002
1077
+ },
1078
+ "granite-4-0-h-tiny": {
1079
+ "promptPrice": 0.0002,
1080
+ "completionPrice": 0.0002
1081
+ },
1082
+ "granite-4-0-h-micro": {
1083
+ "promptPrice": 0.0002,
1084
+ "completionPrice": 0.0002
1085
+ },
1086
+ "granite-4-0-micro": {
1087
+ "promptPrice": 0.0002,
1088
+ "completionPrice": 0.0002
1089
+ },
1090
+ "ibm-granite/granite-4.0-h-small": {
1091
+ "promptPrice": 0.0002,
1092
+ "completionPrice": 0.0002
1093
+ },
1094
+ "ibm-granite/granite-4.0-h-tiny": {
1095
+ "promptPrice": 0.0002,
1096
+ "completionPrice": 0.0002
1097
+ },
1098
+ "ibm-granite/granite-4.0-h-micro": {
1099
+ "promptPrice": 0.0002,
1100
+ "completionPrice": 0.0002
1101
+ },
1102
+ "granite:3b": {
1103
+ "promptPrice": 0.0002,
1104
+ "completionPrice": 0.0002
1105
+ },
1106
+ "granite:8b": {
1107
+ "promptPrice": 0.0002,
1108
+ "completionPrice": 0.0002
1109
+ },
1110
+ "mistral-large-24-11": {
1111
+ "promptPrice": 0.008,
1112
+ "completionPrice": 0.024
1113
+ },
1114
+ "mistral-large-2411": {
1115
+ "promptPrice": 0.008,
1116
+ "completionPrice": 0.024
1117
+ },
1118
+ "mistral-small-3-1": {
1119
+ "promptPrice": 0.001,
1120
+ "completionPrice": 0.003
1121
+ },
1122
+ "mistral-small-3.1": {
1123
+ "promptPrice": 0.001,
1124
+ "completionPrice": 0.003
1125
+ },
1126
+ "mistral-medium-3": {
1127
+ "promptPrice": 0.0004,
1128
+ "completionPrice": 0.002
1129
+ },
1130
+ "mistral-medium-2025": {
1131
+ "promptPrice": 0.0004,
1132
+ "completionPrice": 0.002
1133
+ },
1134
+ "magistral-small": {
1135
+ "promptPrice": 0.001,
1136
+ "completionPrice": 0.003
1137
+ },
1138
+ "magistral-medium": {
1139
+ "promptPrice": 0.003,
1140
+ "completionPrice": 0.009
1141
+ },
1142
+ "codestral-25-01": {
1143
+ "promptPrice": 0.001,
1144
+ "completionPrice": 0.003
1145
+ },
1146
+ "codestral-2501": {
1147
+ "promptPrice": 0.001,
1148
+ "completionPrice": 0.003
1149
+ },
1150
+ "lfm-7b": {
1151
+ "promptPrice": 0.0003,
1152
+ "completionPrice": 0.0006
1153
+ },
1154
+ "liquid/lfm-7b": {
1155
+ "promptPrice": 0.0003,
1156
+ "completionPrice": 0.0006
1157
+ },
1158
+ "snowflake-arctic": {
1159
+ "promptPrice": 0.0008,
1160
+ "completionPrice": 0.0024
1161
+ },
1162
+ "snowflake-arctic-instruct": {
1163
+ "promptPrice": 0.0008,
1164
+ "completionPrice": 0.0024
1165
+ },
1166
+ "snowflake/snowflake-arctic-instruct": {
1167
+ "promptPrice": 0.0008,
1168
+ "completionPrice": 0.0024
1169
+ },
1170
+ "snowflake-arctic-embed-l-v2.0": {
1171
+ "promptPrice": 5e-05,
1172
+ "completionPrice": 5e-05
1173
+ },
1174
+ "nvidia-nemotron-4-340b-instruct": {
1175
+ "promptPrice": 0.003,
1176
+ "completionPrice": 0.009
1177
+ },
1178
+ "nvidia/nemotron-4-340b-instruct": {
1179
+ "promptPrice": 0.003,
1180
+ "completionPrice": 0.009
1181
+ },
1182
+ "nvidia-nemotron-mini": {
1183
+ "promptPrice": 0.0002,
1184
+ "completionPrice": 0.0004
1185
+ },
1186
+ "nvidia/llama-3.1-nemotron-70b-instruct": {
1187
+ "promptPrice": 0.0008,
1188
+ "completionPrice": 0.0008
1189
+ },
1190
+ "servicenow-now-assist": {
1191
+ "promptPrice": 0.001,
1192
+ "completionPrice": 0.003
1193
+ },
1194
+ "llama3.1:405b": {
1195
+ "promptPrice": 0.0012,
1196
+ "completionPrice": 0.0012
1197
+ },
1198
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": {
1199
+ "promptPrice": 0.0008,
1200
+ "completionPrice": 0.0024,
1201
+ "note": "R1 distilled into Qwen-32B - VERY POPULAR (1.7M downloads)"
1202
+ },
1203
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": {
1204
+ "promptPrice": 0.0002,
1205
+ "completionPrice": 0.0004,
1206
+ "note": "Tiny R1 distillation - 1M+ downloads"
1207
+ },
1208
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-8B": {
1209
+ "promptPrice": 0.0005,
1210
+ "completionPrice": 0.001,
1211
+ "note": "R1 distilled into Llama-8B"
1212
+ },
1213
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": {
1214
+ "promptPrice": 0.0004,
1215
+ "completionPrice": 0.0008,
1216
+ "note": "R1 distilled into Qwen-7B"
1217
+ },
1218
+ "deepseek-ai/DeepSeek-R1-0528": {
1219
+ "promptPrice": 0.0014,
1220
+ "completionPrice": 0.0028,
1221
+ "note": "Latest R1 release from May 2025"
1222
+ },
1223
+ "deepseek-ai/DeepSeek-V3.1": {
1224
+ "promptPrice": 0.0006,
1225
+ "completionPrice": 0.0017,
1226
+ "note": "V3.1 - improved version, similar to your existing deepseek-v3.1 entry"
1227
+ },
1228
+ "deepseek-ai/DeepSeek-V3-0324": {
1229
+ "promptPrice": 0.0006,
1230
+ "completionPrice": 0.0017,
1231
+ "note": "V3 March 2025 release"
1232
+ },
1233
+ "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B": {
1234
+ "promptPrice": 0.0005,
1235
+ "completionPrice": 0.001,
1236
+ "note": "R1 based on Qwen3-8B backbone"
1237
+ },
1238
+ "deepseek-ai/DeepSeek-V3.2-Exp": {
1239
+ "promptPrice": 0.0008,
1240
+ "completionPrice": 0.002,
1241
+ "note": "V3.2 experimental release"
1242
+ },
1243
+ "deepseek-ai/DeepSeek-V3.1-Terminus": {
1244
+ "promptPrice": 0.0006,
1245
+ "completionPrice": 0.0017,
1246
+ "note": "V3.1 Terminus variant"
1247
+ },
1248
+ "deepseek-ai/DeepSeek-OCR": {
1249
+ "promptPrice": 0.001,
1250
+ "completionPrice": 0.003,
1251
+ "note": "HIGHEST PRIORITY - OCR specialist, 3.6M downloads - already noted in earlier analysis"
1252
+ },
1253
+ "deepseek-ai/deepseek-vl2": {
1254
+ "promptPrice": 0.0008,
1255
+ "completionPrice": 0.0024,
1256
+ "note": "Vision-language model v2"
1257
+ },
1258
+ "deepseek-ai/Janus-Pro-7B": {
1259
+ "promptPrice": 0.0008,
1260
+ "completionPrice": 0.0024,
1261
+ "note": "Multimodal any-to-any model including text-to-image"
1262
+ },
1263
+ "LiquidAI/LFM2-1.2B": {
1264
+ "promptPrice": 0.0002,
1265
+ "completionPrice": 0.0004,
1266
+ "note": "MOST POPULAR LiquidAI model - 506K downloads, edge-optimized"
1267
+ },
1268
+ "LiquidAI/LFM2-2.6B": {
1269
+ "promptPrice": 0.0003,
1270
+ "completionPrice": 0.0006,
1271
+ "note": "Larger LFM2 variant"
1272
+ },
1273
+ "LiquidAI/LFM2-350M": {
1274
+ "promptPrice": 0.0001,
1275
+ "completionPrice": 0.0002,
1276
+ "note": "Smallest LFM2 variant, edge-optimized"
1277
+ },
1278
+ "LiquidAI/LFM2-700M": {
1279
+ "promptPrice": 0.00015,
1280
+ "completionPrice": 0.0003,
1281
+ "note": "Mid-size LFM2 variant"
1282
+ },
1283
+ "LiquidAI/LFM2-8B-A1B": {
1284
+ "promptPrice": 0.0003,
1285
+ "completionPrice": 0.0009,
1286
+ "note": "MoE model with 8B total, 1B active parameters"
1287
+ },
1288
+ "LiquidAI/LFM2-VL-450M": {
1289
+ "promptPrice": 0.0002,
1290
+ "completionPrice": 0.0006,
1291
+ "note": "Vision-language 450M model"
1292
+ },
1293
+ "LiquidAI/LFM2-VL-1.6B": {
1294
+ "promptPrice": 0.0003,
1295
+ "completionPrice": 0.0009,
1296
+ "note": "Vision-language 1.6B model"
1297
+ },
1298
+ "LiquidAI/LFM2-VL-3B": {
1299
+ "promptPrice": 0.0004,
1300
+ "completionPrice": 0.0012,
1301
+ "note": "Largest LiquidAI vision model"
1302
+ },
1303
+ "HuggingFaceTB/SmolLM2-135M": {
1304
+ "promptPrice": 5e-05,
1305
+ "completionPrice": 0.0001,
1306
+ "note": "MOST POPULAR SmolLM - 733K downloads, tiny edge model"
1307
+ },
1308
+ "HuggingFaceTB/SmolLM2-360M-Instruct": {
1309
+ "promptPrice": 0.0001,
1310
+ "completionPrice": 0.0002,
1311
+ "note": "360M instruct variant - very popular"
1312
+ },
1313
+ "HuggingFaceTB/SmolLM-135M": {
1314
+ "promptPrice": 5e-05,
1315
+ "completionPrice": 0.0001,
1316
+ "note": "Original SmolLM v1 - 135M"
1317
+ },
1318
+ "HuggingFaceTB/SmolLM2-360M": {
1319
+ "promptPrice": 0.0001,
1320
+ "completionPrice": 0.0002,
1321
+ "note": "360M base model"
1322
+ },
1323
+ "HuggingFaceTB/SmolLM2-135M-Instruct": {
1324
+ "promptPrice": 5e-05,
1325
+ "completionPrice": 0.0001,
1326
+ "note": "135M instruct variant"
1327
+ },
1328
+ "HuggingFaceTB/SmolLM3-3B": {
1329
+ "promptPrice": 0.0003,
1330
+ "completionPrice": 0.0006,
1331
+ "note": "Latest SmolLM3 3B model - LARGEST in series"
1332
+ },
1333
+ "HuggingFaceTB/SmolLM2-1.7B-Instruct": {
1334
+ "promptPrice": 0.0002,
1335
+ "completionPrice": 0.0004,
1336
+ "note": "1.7B instruct variant"
1337
+ },
1338
+ "HuggingFaceTB/SmolLM2-1.7B": {
1339
+ "promptPrice": 0.0002,
1340
+ "completionPrice": 0.0004,
1341
+ "note": "1.7B base model"
1342
+ },
1343
+ "meta-llama/Llama-3.1-8B-Instruct": {
1344
+ "promptPrice": 0.0005,
1345
+ "completionPrice": 0.0015,
1346
+ "note": "HIGHEST PRIORITY - Most popular Llama 3.1 model with 5M downloads"
1347
+ },
1348
+ "meta-llama/Llama-3.2-1B-Instruct": {
1349
+ "promptPrice": 0.0001,
1350
+ "completionPrice": 0.0003,
1351
+ "note": "Small edge-optimized model - 3.7M downloads"
1352
+ },
1353
+ "meta-llama/Llama-3.2-3B-Instruct": {
1354
+ "promptPrice": 0.0003,
1355
+ "completionPrice": 0.0006,
1356
+ "note": "Mid-size edge model - 1.8M downloads"
1357
+ },
1358
+ "meta-llama/Llama-3.2-1B": {
1359
+ "promptPrice": 0.0001,
1360
+ "completionPrice": 0.0003,
1361
+ "note": "Base 1B model - 1.7M downloads"
1362
+ },
1363
+ "meta-llama/Llama-3.1-70B-Instruct": {
1364
+ "promptPrice": 0.002,
1365
+ "completionPrice": 0.006,
1366
+ "note": "Large 70B model - 725K downloads"
1367
+ },
1368
+ "meta-llama/Llama-3.3-70B-Instruct": {
1369
+ "promptPrice": 0.002,
1370
+ "completionPrice": 0.006,
1371
+ "note": "Latest Llama 3.3 70B - 659K downloads"
1372
+ },
1373
+ "meta-llama/Llama-3.2-3B": {
1374
+ "promptPrice": 0.0003,
1375
+ "completionPrice": 0.0006,
1376
+ "note": "Base 3B model"
1377
+ },
1378
+ "meta-llama/Llama-Guard-3-8B": {
1379
+ "promptPrice": 0.0005,
1380
+ "completionPrice": 0.0015,
1381
+ "note": "Safety/moderation model - 183K downloads"
1382
+ },
1383
+ "meta-llama/Llama-Guard-3-1B": {
1384
+ "promptPrice": 0.0001,
1385
+ "completionPrice": 0.0003,
1386
+ "note": "Small safety model"
1387
+ },
1388
+ "meta-llama/Llama-3.2-11B-Vision-Instruct": {
1389
+ "promptPrice": 0.001,
1390
+ "completionPrice": 0.003,
1391
+ "note": "Llama 3.2 vision model - 257K downloads"
1392
+ },
1393
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct": {
1394
+ "promptPrice": 0.0012,
1395
+ "completionPrice": 0.0036,
1396
+ "note": "NEW LLAMA 4 vision model - 199K downloads"
1397
+ },
1398
+ "google/gemma-3-1b-it": {
1399
+ "promptPrice": 0.0001,
1400
+ "completionPrice": 0.0002,
1401
+ "note": "HIGHEST PRIORITY - Most popular Gemma 3, already noted in earlier analysis"
1402
+ },
1403
+ "google/gemma-2-2b-it": {
1404
+ "promptPrice": 0.0002,
1405
+ "completionPrice": 0.0004,
1406
+ "note": "Gemma 2 2B instruct"
1407
+ },
1408
+ "google/gemma-3-12b-it": {
1409
+ "promptPrice": 0.001,
1410
+ "completionPrice": 0.003,
1411
+ "note": "Gemma 3 12B with vision capabilities - 1.5M downloads"
1412
+ },
1413
+ "google/gemma-3-4b-it": {
1414
+ "promptPrice": 0.0005,
1415
+ "completionPrice": 0.0015,
1416
+ "note": "Gemma 3 4B with vision"
1417
+ },
1418
+ "google/gemma-3-27b-it": {
1419
+ "promptPrice": 0.0015,
1420
+ "completionPrice": 0.0045,
1421
+ "note": "Large Gemma 3 27B with vision"
1422
+ },
1423
+ "google/medgemma-4b-it": {
1424
+ "promptPrice": 0.0005,
1425
+ "completionPrice": 0.0015,
1426
+ "note": "Medical-specialized vision model - radiology, clinical reasoning, dermatology"
1427
+ },
1428
+ "ServiceNow-AI/Apriel-5B-Instruct": {
1429
+ "promptPrice": 0.0005,
1430
+ "completionPrice": 0.0015,
1431
+ "note": "ServiceNow 5B instruct model"
1432
+ },
1433
+ "ServiceNow-AI/Apriel-Nemotron-15b-Thinker": {
1434
+ "promptPrice": 0.001,
1435
+ "completionPrice": 0.003,
1436
+ "note": "ServiceNow reasoning model based on Nemotron"
1437
+ },
1438
+ "ServiceNow-AI/Apriel-1.5-15b-Thinker": {
1439
+ "promptPrice": 0.001,
1440
+ "completionPrice": 0.003,
1441
+ "note": "ServiceNows vision-language reasoning model - 49K downloads"
1442
+ },
1443
+ "nvidia/NVIDIA-Nemotron-Nano-9B-v2": {
1444
+ "promptPrice": 0.0005,
1445
+ "completionPrice": 0.0015,
1446
+ "note": "Nano series 9B v2 - efficient edge model"
1447
+ },
1448
+ "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5": {
1449
+ "promptPrice": 0.0015,
1450
+ "completionPrice": 0.0045,
1451
+ "note": "Nemotron Super 49B - high-performance model"
1452
+ },
1453
+ "nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1": {
1454
+ "promptPrice": 0.0003,
1455
+ "completionPrice": 0.0009,
1456
+ "note": "Nano 4B - small efficient model"
1457
+ },
1458
+ "nvidia/OpenReasoning-Nemotron-7B": {
1459
+ "promptPrice": 0.0004,
1460
+ "completionPrice": 0.0012,
1461
+ "note": "Reasoning-focused 7B model based on Qwen2.5"
1462
+ },
1463
+ "nvidia/NVIDIA-Nemotron-Nano-12B-v2": {
1464
+ "promptPrice": 0.0007,
1465
+ "completionPrice": 0.0021,
1466
+ "note": "Nano series 12B v2"
1467
+ },
1468
+ "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1": {
1469
+ "promptPrice": 0.001,
1470
+ "completionPrice": 0.003,
1471
+ "note": "HIGHEST PRIORITY - Most popular Nvidia vision model, 747K downloads"
1472
+ },
1473
+ "nvidia/Cosmos-Reason1-7B": {
1474
+ "promptPrice": 0.0008,
1475
+ "completionPrice": 0.0024,
1476
+ "note": "Cosmos reasoning vision model - 413K downloads"
1477
+ },
1478
+ "nvidia/NVLM-D-72B": {
1479
+ "promptPrice": 0.002,
1480
+ "completionPrice": 0.006,
1481
+ "note": "Large 72B vision-language model"
1482
+ },
1483
+ "Qwen/Qwen3-0.6B": {
1484
+ "promptPrice": 5e-05,
1485
+ "completionPrice": 0.0001,
1486
+ "note": "600M parameter model, smallest Qwen3 variant"
1487
+ },
1488
+ "Qwen/Qwen3-1.7B": {
1489
+ "promptPrice": 0.0001,
1490
+ "completionPrice": 0.0002,
1491
+ "note": "1.7B parameter model"
1492
+ },
1493
+ "Qwen/Qwen3-4B": {
1494
+ "promptPrice": 0.0003,
1495
+ "completionPrice": 0.0006,
1496
+ "note": "4B parameter base model"
1497
+ },
1498
+ "Qwen/Qwen3-8B": {
1499
+ "promptPrice": 0.0005,
1500
+ "completionPrice": 0.001,
1501
+ "note": "8B parameter base model"
1502
+ },
1503
+ "Qwen/Qwen3-14B": {
1504
+ "promptPrice": 0.0008,
1505
+ "completionPrice": 0.0016,
1506
+ "note": "14B parameter base model"
1507
+ },
1508
+ "Qwen/Qwen3-32B": {
1509
+ "promptPrice": 0.0012,
1510
+ "completionPrice": 0.0024,
1511
+ "note": "32B parameter base model"
1512
+ },
1513
+ "Qwen/Qwen3-4B-Instruct-2507": {
1514
+ "promptPrice": 0.0003,
1515
+ "completionPrice": 0.0006,
1516
+ "note": "4B instruct model, very popular (5M+ downloads)"
1517
+ },
1518
+ "Qwen/Qwen3-4B-Thinking-2507": {
1519
+ "promptPrice": 0.0006,
1520
+ "completionPrice": 0.0018,
1521
+ "note": "4B reasoning model with thinking process"
1522
+ },
1523
+ "Qwen/Qwen3-30B-A3B-Instruct-2507": {
1524
+ "promptPrice": 0.0004,
1525
+ "completionPrice": 0.0012,
1526
+ "note": "MoE model with 30B total, 3B active params"
1527
+ },
1528
+ "Qwen/Qwen3-30B-A3B-Thinking-2507": {
1529
+ "promptPrice": 0.0008,
1530
+ "completionPrice": 0.0024,
1531
+ "note": "MoE reasoning model with 30B total, 3B active"
1532
+ },
1533
+ "Qwen/Qwen3-Next-80B-A3B-Instruct": {
1534
+ "promptPrice": 0.0006,
1535
+ "completionPrice": 0.0018,
1536
+ "note": "Large MoE model with 80B total, 3B active"
1537
+ },
1538
+ "Qwen/Qwen3-235B-A22B": {
1539
+ "promptPrice": 0.0005,
1540
+ "completionPrice": 0.0015,
1541
+ "note": "Largest Qwen3 MoE with 235B total, 22B active - similar to existing Qwen3-235B-A22B models in your list"
1542
+ },
1543
+ "Qwen/Qwen3-Coder-30B-A3B-Instruct": {
1544
+ "promptPrice": 0.0004,
1545
+ "completionPrice": 0.0012,
1546
+ "note": "Code-specialized MoE model"
1547
+ },
1548
+ "Qwen/Qwen3-Omni-30B-A3B-Instruct": {
1549
+ "promptPrice": 0.0006,
1550
+ "completionPrice": 0.0018,
1551
+ "note": "Multimodal model with text-to-audio, any-to-any capabilities"
1552
+ },
1553
+ "gemma3:1b": {
1554
+ "promptPrice": 0.0001,
1555
+ "completionPrice": 0.0002,
1556
+ "note": "Ollama variant of google/gemma-3-1b-it - 2.6M downloads"
1557
+ },
1558
+ "gemma3:4b": {
1559
+ "promptPrice": 0.0005,
1560
+ "completionPrice": 0.0015,
1561
+ "note": "Ollama variant of google/gemma-3-4b-it - vision capable"
1562
+ },
1563
+ "gemma3:12b": {
1564
+ "promptPrice": 0.001,
1565
+ "completionPrice": 0.003,
1566
+ "note": "Ollama variant of google/gemma-3-12b-it - vision capable"
1567
+ },
1568
+ "deepseek-r1:1.5b": {
1569
+ "promptPrice": 0.0002,
1570
+ "completionPrice": 0.0004,
1571
+ "note": "Ollama variant of deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B - 1.0M downloads"
1572
+ },
1573
+ "deepseek-r1:671b": {
1574
+ "promptPrice": 0.0014,
1575
+ "completionPrice": 0.0028,
1576
+ "note": "Ollama variant of deepseek-ai/DeepSeek-R1"
1577
+ },
1578
+ "llama3.3:70b": {
1579
+ "promptPrice": 0.002,
1580
+ "completionPrice": 0.006,
1581
+ "note": "Ollama variant of meta-llama/Llama-3.3-70B-Instruct - 659K downloads"
1582
+ },
1583
+ "granite3.1:8b": {
1584
+ "promptPrice": 0.0005,
1585
+ "completionPrice": 0.0015,
1586
+ "note": "Ollama variant of ibm-granite/granite-3.1-8b-instruct"
1587
+ },
1588
+ "granite3.1:3b": {
1589
+ "promptPrice": 0.0003,
1590
+ "completionPrice": 0.0009,
1591
+ "note": "Ollama variant of ibm-granite/granite-3.1-3b-instruct"
1592
+ },
1593
+ "granite3.1:1b": {
1594
+ "promptPrice": 0.0001,
1595
+ "completionPrice": 0.0003,
1596
+ "note": "Ollama variant of ibm-granite/granite-3.1-1b-instruct"
1597
+ },
1598
+ "phi4": {
1599
+ "promptPrice": 0.0008,
1600
+ "completionPrice": 0.0024,
1601
+ "note": "Ollama variant of microsoft/Phi-4"
1602
+ },
1603
+ "qwen3:14b": {
1604
+ "promptPrice": 0.0007,
1605
+ "completionPrice": 0.0021,
1606
+ "note": "Ollama variant of Qwen/Qwen3-14B-Instruct"
1607
+ },
1608
+ "qwen3:32b": {
1609
+ "promptPrice": 0.0015,
1610
+ "completionPrice": 0.0045,
1611
+ "note": "Ollama variant of Qwen/Qwen3-32B-Instruct"
1612
+ },
1613
+ "mistral-small": {
1614
+ "promptPrice": 0.001,
1615
+ "completionPrice": 0.003,
1616
+ "note": "Ollama variant of mistralai/Mistral-Small-Instruct-2409"
1617
+ },
1618
+ "mistral-large": {
1619
+ "promptPrice": 0.002,
1620
+ "completionPrice": 0.006,
1621
+ "note": "Ollama variant of mistralai/Mistral-Large-Instruct-2407"
1622
+ }
1623
+ },
1624
+ "speech_to_text": {
1625
+ "nvidia/parakeet-tdt-0.6b-v2": {
1626
+ "promptPrice": 0.00015,
1627
+ "completionPrice": 0.00015,
1628
+ "note": "HIGHEST PRIORITY - Most popular ASR model, 3.7M downloads"
1629
+ },
1630
+ "nvidia/parakeet-rnnt-0.6b": {
1631
+ "promptPrice": 0.00015,
1632
+ "completionPrice": 0.00015,
1633
+ "note": "Parakeet RNNT ASR - 3.1M downloads"
1634
+ },
1635
+ "nvidia/parakeet-tdt-0.6b-v3": {
1636
+ "promptPrice": 0.00015,
1637
+ "completionPrice": 0.00015,
1638
+ "note": "Multilingual ASR v3 - 49 languages"
1639
+ },
1640
+ "nvidia/canary-1b-v2": {
1641
+ "promptPrice": 0.0002,
1642
+ "completionPrice": 0.0002,
1643
+ "note": "ASR + Translation - 30+ languages"
1644
+ },
1645
+ "Llama-4-Maverick-17B-128E-Instruct": {
1646
+ "promptPrice": 0.0001,
1647
+ "completionPrice": 0.0002,
1648
+ "note": "SambaNova - Maverick model (estimated pricing)"
1649
+ },
1650
+ "Qwen/Qwen3-Next-80B-A3B-Thinking": {
1651
+ "promptPrice": 0.0004,
1652
+ "completionPrice": 0.0004,
1653
+ "note": "Hyperbolic - Qwen3 Next 80B"
1654
+ },
1655
+ "Qwen/Qwen3-235B": {
1656
+ "promptPrice": 0.0004,
1657
+ "completionPrice": 0.0004,
1658
+ "note": "Hyperbolic - Qwen3 235B"
1659
+ },
1660
+ "deepseek-ai/DeepSeek-R1": {
1661
+ "promptPrice": 0.002,
1662
+ "completionPrice": 0.002,
1663
+ "note": "Hyperbolic - DeepSeek R1"
1664
+ },
1665
+ "deepseek-ai/DeepSeek-V3": {
1666
+ "promptPrice": 0.00025,
1667
+ "completionPrice": 0.00025,
1668
+ "note": "Hyperbolic - DeepSeek V3"
1669
+ },
1670
+ "openai/gpt-oss-120b": {
1671
+ "promptPrice": 0.0005,
1672
+ "completionPrice": 0.0015,
1673
+ "note": "Nebius - GPT OSS 120B (estimated pricing)"
1674
+ }
1675
+ }
1676
+ }