@revenium/openai 1.0.10 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/.env.example +20 -0
  2. package/CHANGELOG.md +52 -0
  3. package/LICENSE +21 -21
  4. package/README.md +682 -1152
  5. package/dist/cjs/core/config/loader.js +1 -1
  6. package/dist/cjs/core/config/loader.js.map +1 -1
  7. package/dist/cjs/core/tracking/api-client.js +1 -1
  8. package/dist/cjs/core/tracking/api-client.js.map +1 -1
  9. package/dist/cjs/index.js +4 -4
  10. package/dist/cjs/index.js.map +1 -1
  11. package/dist/cjs/types/openai-augmentation.js +1 -1
  12. package/dist/cjs/utils/url-builder.js +32 -7
  13. package/dist/cjs/utils/url-builder.js.map +1 -1
  14. package/dist/esm/core/config/loader.js +1 -1
  15. package/dist/esm/core/config/loader.js.map +1 -1
  16. package/dist/esm/core/tracking/api-client.js +1 -1
  17. package/dist/esm/core/tracking/api-client.js.map +1 -1
  18. package/dist/esm/index.js +4 -4
  19. package/dist/esm/index.js.map +1 -1
  20. package/dist/esm/types/openai-augmentation.js +1 -1
  21. package/dist/esm/utils/url-builder.js +32 -7
  22. package/dist/esm/utils/url-builder.js.map +1 -1
  23. package/dist/types/index.d.ts +4 -4
  24. package/dist/types/types/index.d.ts +2 -2
  25. package/dist/types/types/index.d.ts.map +1 -1
  26. package/dist/types/types/openai-augmentation.d.ts +1 -1
  27. package/dist/types/utils/url-builder.d.ts +11 -3
  28. package/dist/types/utils/url-builder.d.ts.map +1 -1
  29. package/examples/README.md +357 -0
  30. package/examples/azure-basic.ts +206 -0
  31. package/examples/azure-responses-basic.ts +233 -0
  32. package/examples/azure-responses-streaming.ts +255 -0
  33. package/examples/azure-streaming.ts +209 -0
  34. package/examples/getting_started.ts +54 -0
  35. package/examples/openai-basic.ts +147 -0
  36. package/examples/openai-function-calling.ts +259 -0
  37. package/examples/openai-responses-basic.ts +212 -0
  38. package/examples/openai-responses-streaming.ts +232 -0
  39. package/examples/openai-streaming.ts +172 -0
  40. package/examples/openai-vision.ts +289 -0
  41. package/package.json +81 -84
  42. package/src/core/config/azure-config.ts +72 -0
  43. package/src/core/config/index.ts +23 -0
  44. package/src/core/config/loader.ts +66 -0
  45. package/src/core/config/manager.ts +94 -0
  46. package/src/core/config/validator.ts +89 -0
  47. package/src/core/providers/detector.ts +159 -0
  48. package/src/core/providers/index.ts +16 -0
  49. package/src/core/tracking/api-client.ts +78 -0
  50. package/src/core/tracking/index.ts +21 -0
  51. package/src/core/tracking/payload-builder.ts +132 -0
  52. package/src/core/tracking/usage-tracker.ts +189 -0
  53. package/src/core/wrapper/index.ts +9 -0
  54. package/src/core/wrapper/instance-patcher.ts +288 -0
  55. package/src/core/wrapper/request-handler.ts +423 -0
  56. package/src/core/wrapper/stream-wrapper.ts +100 -0
  57. package/src/index.ts +336 -0
  58. package/src/types/function-parameters.ts +251 -0
  59. package/src/types/index.ts +313 -0
  60. package/src/types/openai-augmentation.ts +233 -0
  61. package/src/types/responses-api.ts +308 -0
  62. package/src/utils/azure-model-resolver.ts +220 -0
  63. package/src/utils/constants.ts +21 -0
  64. package/src/utils/error-handler.ts +251 -0
  65. package/src/utils/metadata-builder.ts +219 -0
  66. package/src/utils/provider-detection.ts +257 -0
  67. package/src/utils/request-handler-factory.ts +285 -0
  68. package/src/utils/stop-reason-mapper.ts +74 -0
  69. package/src/utils/type-guards.ts +202 -0
  70. package/src/utils/url-builder.ts +68 -0
@@ -0,0 +1,308 @@
1
+ /**
2
+ * Types for OpenAI Responses API support
3
+ *
4
+ * This module defines types for the new OpenAI Responses API that replaces
5
+ * the traditional Chat Completions API. The Responses API provides a unified
6
+ * interface for building agent-like applications with built-in tools and capabilities.
7
+ *
8
+ * Reference: https://platform.openai.com/docs/guides/migrate-to-responses
9
+ */
10
+
11
+ import { UsageMetadata } from './index.js';
12
+
13
+ /**
14
+ * OpenAI Responses API request parameters
15
+ * Based on the official OpenAI Responses API documentation
16
+ * Reference: https://platform.openai.com/docs/guides/migrate-to-responses
17
+ */
18
+ export interface OpenAIResponsesRequest {
19
+ /** The model to use for the response */
20
+ model: string;
21
+
22
+ /** Input for the response - can be string or message array */
23
+ input:
24
+ | string
25
+ | Array<{
26
+ role: 'user' | 'assistant' | 'system';
27
+ content: string;
28
+ }>;
29
+
30
+ /** Whether to stream the response */
31
+ stream?: boolean;
32
+
33
+ /** Maximum number of output tokens to generate */
34
+ max_output_tokens?: number;
35
+
36
+ /** Temperature for response generation (0.0 to 2.0) */
37
+ temperature?: number;
38
+
39
+ /** Top-p sampling parameter (0.0 to 1.0) */
40
+ top_p?: number;
41
+
42
+ /** Instructions for the model (replaces system messages) */
43
+ instructions?: string;
44
+
45
+ /** Tools available to the model */
46
+ tools?: Array<{
47
+ type: 'function' | 'web_search' | 'file_search' | 'code_interpreter' | 'image_generation';
48
+ // For function tools (internally tagged)
49
+ name?: string;
50
+ description?: string;
51
+ parameters?: {
52
+ type: 'object';
53
+ properties: Record<string, unknown>;
54
+ required?: string[];
55
+ additionalProperties?: boolean;
56
+ };
57
+ // For other tools
58
+ [key: string]: unknown;
59
+ }>;
60
+
61
+ /** Tool choice configuration */
62
+ tool_choice?: 'auto' | 'none' | { type: 'function'; name: string };
63
+
64
+ /** Structured output configuration */
65
+ text?: {
66
+ format?: {
67
+ type: 'json_schema';
68
+ name: string;
69
+ strict?: boolean;
70
+ schema: {
71
+ type: 'object';
72
+ properties: Record<string, unknown>;
73
+ required?: string[];
74
+ additionalProperties?: boolean;
75
+ };
76
+ };
77
+ };
78
+
79
+ /** Metadata for the request */
80
+ metadata?: Record<string, unknown>;
81
+
82
+ /** ID of previous response to continue from */
83
+ previous_response_id?: string;
84
+
85
+ /** Whether to run in background mode */
86
+ background?: boolean;
87
+
88
+ /** Whether to store the response (default: true) */
89
+ store?: boolean;
90
+
91
+ /** Parallel tool calls configuration */
92
+ parallel_tool_calls?: boolean;
93
+
94
+ /** Reasoning effort level */
95
+ reasoning_effort?: 'low' | 'medium' | 'high';
96
+
97
+ /** Include additional data in response */
98
+ include?: Array<'reasoning.encrypted_content'>;
99
+
100
+ /** Custom metadata field for Revenium tracking */
101
+ usageMetadata?: UsageMetadata;
102
+
103
+ /** Additional parameters */
104
+ [key: string]: unknown;
105
+ }
106
+
107
+ /**
108
+ * OpenAI Responses API response structure
109
+ * Based on the official Azure OpenAI Responses API documentation
110
+ */
111
+ export interface OpenAIResponsesResponse {
112
+ /** Unique identifier for the response */
113
+ id: string;
114
+
115
+ /** Timestamp when the response was created */
116
+ created_at: number;
117
+
118
+ /** The model used for the response */
119
+ model: string;
120
+
121
+ /** Response object type */
122
+ object: 'response';
123
+
124
+ /** Response status */
125
+ status: 'queued' | 'in_progress' | 'completed' | 'incomplete' | 'cancelled' | 'failed';
126
+
127
+ /** Response output array */
128
+ output: Array<{
129
+ id: string;
130
+ type: 'message' | 'function_call' | 'function_call_output' | 'image_generation_call';
131
+ role?: 'assistant';
132
+ content?: Array<{
133
+ type: 'output_text' | 'text';
134
+ text?: string;
135
+ annotations?: Array<unknown>;
136
+ }>;
137
+ name?: string;
138
+ call_id?: string;
139
+ output?: string;
140
+ result?: string;
141
+ status?: string | null;
142
+ }>;
143
+
144
+ /** Simplified output text (convenience field) */
145
+ output_text?: string;
146
+
147
+ /** Usage statistics */
148
+ usage?: {
149
+ input_tokens: number;
150
+ output_tokens: number;
151
+ total_tokens: number;
152
+ output_tokens_details?: {
153
+ reasoning_tokens: number;
154
+ };
155
+ };
156
+
157
+ /** Response metadata */
158
+ metadata?: Record<string, unknown>;
159
+
160
+ /** Instructions used */
161
+ instructions?: string | null;
162
+
163
+ /** Tools configuration */
164
+ tools?: Array<unknown>;
165
+
166
+ /** Tool choice configuration */
167
+ tool_choice?: unknown;
168
+
169
+ /** Parallel tool calls setting */
170
+ parallel_tool_calls?: boolean | null;
171
+
172
+ /** Temperature used */
173
+ temperature?: number;
174
+
175
+ /** Top-p used */
176
+ top_p?: number;
177
+
178
+ /** Max output tokens */
179
+ max_output_tokens?: number | null;
180
+
181
+ /** Previous response ID */
182
+ previous_response_id?: string | null;
183
+
184
+ /** Error information */
185
+ error?: unknown | null;
186
+
187
+ /** Incomplete details */
188
+ incomplete_details?: unknown | null;
189
+
190
+ /** Reasoning information */
191
+ reasoning?: unknown | null;
192
+
193
+ /** Text field */
194
+ text?: unknown | null;
195
+
196
+ /** Truncation information */
197
+ truncation?: unknown | null;
198
+
199
+ /** User information */
200
+ user?: unknown | null;
201
+
202
+ /** Reasoning effort */
203
+ reasoning_effort?: unknown | null;
204
+ }
205
+
206
+ /**
207
+ * Streaming chunk for Responses API
208
+ */
209
+ export interface OpenAIResponsesStreamChunk {
210
+ /** Unique identifier for the response */
211
+ id: string;
212
+
213
+ /** The model used */
214
+ model: string;
215
+
216
+ /** Delta content for this chunk */
217
+ delta?: {
218
+ content?: Array<{
219
+ type: 'text' | 'tool_use';
220
+ text?: string;
221
+ [key: string]: unknown;
222
+ }>;
223
+ };
224
+
225
+ /** Usage information (typically in final chunk) */
226
+ usage?: {
227
+ input_tokens?: number;
228
+ output_tokens?: number;
229
+ total_tokens?: number;
230
+ reasoning_tokens?: number;
231
+ cached_tokens?: number;
232
+ };
233
+
234
+ /** Finish reason (in final chunk) */
235
+ finish_reason?: string | null;
236
+
237
+ /** Additional chunk fields */
238
+ [key: string]: unknown;
239
+ }
240
+
241
+ /**
242
+ * Type guard to check if a request is for Responses API
243
+ */
244
+ export function isResponsesRequest(params: unknown): params is OpenAIResponsesRequest {
245
+ return typeof params === 'object' && params !== null && 'input' in params && 'model' in params;
246
+ }
247
+
248
+ /**
249
+ * Type guard to check if a response is from Responses API
250
+ */
251
+ export function isResponsesResponse(response: unknown): response is OpenAIResponsesResponse {
252
+ return (
253
+ typeof response === 'object' &&
254
+ response !== null &&
255
+ 'id' in response &&
256
+ 'model' in response &&
257
+ ('output' in response || 'usage' in response)
258
+ );
259
+ }
260
+
261
+ /**
262
+ * Simplified interface for Responses API create parameters (for examples)
263
+ */
264
+ export interface ResponsesCreateParams {
265
+ model: string;
266
+ input: string | Array<{ role: 'user' | 'assistant' | 'system'; content: string }>;
267
+ stream?: boolean;
268
+ max_output_tokens?: number;
269
+ temperature?: number;
270
+ instructions?: string;
271
+ tools?: Array<{
272
+ type: 'function' | 'web_search' | 'file_search' | 'code_interpreter' | 'image_generation';
273
+ function?: {
274
+ name: string;
275
+ description?: string;
276
+ parameters?: Record<string, unknown>;
277
+ };
278
+ }>;
279
+ usageMetadata?: UsageMetadata;
280
+ }
281
+
282
+ /**
283
+ * Simplified interface for Responses API response (for examples)
284
+ */
285
+ export interface ResponsesResponse {
286
+ id: string;
287
+ model: string;
288
+ object: 'response';
289
+ status: string;
290
+ output: Array<{
291
+ id: string;
292
+ type: string;
293
+ role?: string;
294
+ content?: Array<{
295
+ type: 'output_text' | 'text';
296
+ text?: string;
297
+ }>;
298
+ }>;
299
+ output_text?: string;
300
+ usage?: {
301
+ input_tokens: number;
302
+ output_tokens: number;
303
+ total_tokens: number;
304
+ output_tokens_details?: {
305
+ reasoning_tokens: number;
306
+ };
307
+ };
308
+ }
@@ -0,0 +1,220 @@
1
+ import { getLogger } from '../core/config/index.js';
2
+ import { knownModels } from './constants.js';
3
+
4
+ /**
5
+ * Azure Model Name Resolution Module
6
+ *
7
+ * This module maps Azure deployment names to LiteLLM-compatible model names for accurate pricing.
8
+ * Based on learnings from the Python implementation, it uses heuristic pattern matching
9
+ * with fallback strategies to ensure reliable model name resolution.
10
+ *
11
+ * Key patterns observed in real Azure deployments:
12
+ * - "gpt-4o-2024-11-20" → "gpt-4o"
13
+ * - "text-embedding-3-large" → "text-embedding-3-large" (exact match)
14
+ * - "o4-mini" → "gpt-4o-mini"
15
+ * - "gpt4o-prod" → "gpt-4o"
16
+ * - "gpt-35-turbo-dev" → "gpt-3.5-turbo"
17
+ */
18
+
19
+ /**
20
+ * In-memory cache for resolved model names
21
+ * Using Map for thread-safe operations in Node.js
22
+ */
23
+ const modelNameCache = new Map<string, string>();
24
+
25
+ /**
26
+ * Cache for failed resolution attempts to avoid repeated warnings
27
+ */
28
+ const failedResolutionCache = new Set<string>();
29
+
30
+ // Global logger
31
+ const logger = getLogger();
32
+
33
+ /**
34
+ * Resolve Azure deployment name to LiteLLM-compatible model name
35
+ *
36
+ * @param deploymentName - Azure deployment name
37
+ * @param useCache - Whether to use cached results (default: true)
38
+ * @returns LiteLLM-compatible model name
39
+ */
40
+ export function resolveAzureModelName(deploymentName: string, useCache: boolean = true): string {
41
+ if (!deploymentName) {
42
+ logger.warn('Empty deployment name provided to model resolver');
43
+ return deploymentName;
44
+ }
45
+
46
+ // Check cache first
47
+ if (useCache && modelNameCache.has(deploymentName)) {
48
+ const cachedResult = modelNameCache.get(deploymentName)!;
49
+ logger.debug('Model name resolved from cache', {
50
+ deployment: deploymentName,
51
+ resolved: cachedResult,
52
+ });
53
+ return cachedResult;
54
+ }
55
+
56
+ try {
57
+ const resolved = resolveModelNameHeuristic(deploymentName);
58
+
59
+ // Cache the result
60
+ if (useCache) {
61
+ modelNameCache.set(deploymentName, resolved);
62
+ }
63
+
64
+ // Log successful resolution
65
+ if (resolved !== deploymentName) {
66
+ logger.debug('Model name resolved via heuristics', {
67
+ deployment: deploymentName,
68
+ resolved,
69
+ });
70
+ }
71
+
72
+ return resolved;
73
+ } catch (error) {
74
+ logger.error('Error during model name resolution', {
75
+ deployment: deploymentName,
76
+ error: error instanceof Error ? error.message : String(error),
77
+ });
78
+
79
+ // Fallback to deployment name
80
+ return deploymentName;
81
+ }
82
+ }
83
+
84
+ /**
85
+ * Heuristic pattern matching for Azure deployment names
86
+ * Based on real-world patterns observed in the Python implementation
87
+ *
88
+ * @param deploymentName - Azure deployment name
89
+ * @returns LiteLLM-compatible model name
90
+ */
91
+ function resolveModelNameHeuristic(deploymentName: string): string {
92
+ const nameLower = deploymentName.toLowerCase();
93
+
94
+ // GPT-4o family - handle both "gpt-4o" and "o4" patterns
95
+ if (/gpt-?4o/.test(nameLower) || /o4/.test(nameLower)) {
96
+ if (/mini/.test(nameLower)) return 'gpt-4o-mini';
97
+ return 'gpt-4o';
98
+ }
99
+
100
+ // GPT-4 family (non-omni)
101
+ if (/gpt-?4(?!o)/.test(nameLower)) {
102
+ if (/turbo/.test(nameLower)) return 'gpt-4-turbo';
103
+ if (/vision/.test(nameLower) || /v/.test(nameLower)) return 'gpt-4-vision-preview';
104
+ return 'gpt-4';
105
+ }
106
+
107
+ // GPT-3.5 family
108
+ if (/gpt-?3\.?5/.test(nameLower) || /35-turbo/.test(nameLower) || /gpt-35/.test(nameLower)) {
109
+ if (/instruct/.test(nameLower)) return 'gpt-3.5-turbo-instruct';
110
+ return 'gpt-3.5-turbo';
111
+ }
112
+
113
+ // Embedding models - exact matches work well
114
+ if (/embed/.test(nameLower)) {
115
+ if (/text-embedding-3-large/.test(nameLower)) return 'text-embedding-3-large';
116
+ if (/text-embedding-3-small/.test(nameLower)) return 'text-embedding-3-small';
117
+ if (/text-embedding-ada-002/.test(nameLower) || /ada-002/.test(nameLower))
118
+ return 'text-embedding-ada-002';
119
+ if (/3-large/.test(nameLower)) return 'text-embedding-3-large';
120
+ if (/3-small/.test(nameLower)) return 'text-embedding-3-small';
121
+ }
122
+
123
+ // Ada-002 pattern (can appear without "embed" in deployment name)
124
+ if (/ada-002/.test(nameLower)) return 'text-embedding-ada-002';
125
+
126
+ // DALL-E models
127
+ if (/dall-?e/.test(nameLower)) {
128
+ if (/3/.test(nameLower)) return 'dall-e-3';
129
+ if (/2/.test(nameLower)) return 'dall-e-2';
130
+ }
131
+
132
+ // Whisper models
133
+ if (/whisper/.test(nameLower)) return 'whisper-1';
134
+
135
+ // TTS models
136
+ if (/tts/.test(nameLower)) {
137
+ if (/hd/.test(nameLower)) return 'tts-1-hd';
138
+ return 'tts-1';
139
+ }
140
+
141
+ if (knownModels.includes(nameLower)) return nameLower;
142
+
143
+ // No heuristic match found - log warning and use deployment name
144
+ if (!failedResolutionCache.has(deploymentName)) {
145
+ logger.warn(
146
+ `⚠️ No heuristic match for Azure deployment: ${deploymentName}. Using deployment name for pricing. Consider adding pattern to azure-model-resolver.ts`
147
+ );
148
+ failedResolutionCache.add(deploymentName);
149
+ }
150
+ return deploymentName;
151
+ }
152
+
153
+ /**
154
+ * Clear the model name cache
155
+ * Useful for testing or when deployment configurations change
156
+ */
157
+ export function clearModelNameCache(): void {
158
+ modelNameCache.clear();
159
+ failedResolutionCache.clear();
160
+ getLogger().debug('Model name cache cleared');
161
+ }
162
+
163
+ /**
164
+ * Get cache statistics for monitoring
165
+ */
166
+ export function getModelNameCacheStats(): {
167
+ cacheSize: number;
168
+ failedResolutionCount: number;
169
+ cacheEntries: Array<{ deployment: string; resolved: string }>;
170
+ } {
171
+ return {
172
+ cacheSize: modelNameCache.size,
173
+ failedResolutionCount: failedResolutionCache.size,
174
+ cacheEntries: Array.from(modelNameCache.entries()).map(([deployment, resolved]) => ({
175
+ deployment,
176
+ resolved,
177
+ })),
178
+ };
179
+ }
180
+
181
+ /**
182
+ * Batch resolve multiple deployment names
183
+ * Useful for pre-warming cache or bulk operations
184
+ *
185
+ * @param deploymentNames - Array of deployment names to resolve
186
+ * @returns Map of deployment name to resolved model name
187
+ */
188
+ export function batchResolveModelNames(deploymentNames: string[]): Map<string, string> {
189
+ const results = new Map<string, string>();
190
+ logger.debug('Batch resolving model names', {
191
+ count: deploymentNames.length,
192
+ deployments: deploymentNames,
193
+ });
194
+
195
+ for (const deployment of deploymentNames) {
196
+ try {
197
+ const resolved = resolveAzureModelName(deployment);
198
+ results.set(deployment, resolved);
199
+ } catch (error) {
200
+ logger.error('Error in batch resolution', {
201
+ deployment,
202
+ error: error instanceof Error ? error.message : String(error),
203
+ });
204
+ results.set(deployment, deployment); // Fallback to original name
205
+ }
206
+ }
207
+ return results;
208
+ }
209
+
210
+ /**
211
+ * Check if a deployment name would be resolved to a different model name
212
+ * Useful for validation and testing
213
+ *
214
+ * @param deploymentName - Azure deployment name
215
+ * @returns true if the deployment name would be transformed
216
+ */
217
+ export function wouldTransformDeploymentName(deploymentName: string): boolean {
218
+ const resolved = resolveAzureModelName(deploymentName, false); // Don't use cache for this check
219
+ return resolved !== deploymentName;
220
+ }
@@ -0,0 +1,21 @@
1
+ // Direct match check for known LiteLLM model names
2
+ export const knownModels = [
3
+ 'gpt-4o',
4
+ 'gpt-4o-mini',
5
+ 'gpt-4',
6
+ 'gpt-4-turbo',
7
+ 'gpt-4-vision-preview',
8
+ 'gpt-3.5-turbo',
9
+ 'gpt-3.5-turbo-instruct',
10
+ 'text-embedding-3-large',
11
+ 'text-embedding-3-small',
12
+ 'text-embedding-ada-002',
13
+ 'dall-e-3',
14
+ 'dall-e-2',
15
+ 'whisper-1',
16
+ 'tts-1',
17
+ 'tts-1-hd'
18
+ ];
19
+
20
+ export const MESSAGE_PATTERNS_TYPE_NETWORK = ["network", "timeout", "ECONNRESET"];
21
+ export const ERROR_MESSAGE_PATTERNS_TYPE_CONFIG = ["config", "key", "unauthorized"];