@revenium/openai 1.0.10 → 1.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +20 -0
- package/CHANGELOG.md +52 -0
- package/LICENSE +21 -21
- package/README.md +682 -1152
- package/dist/cjs/core/config/loader.js +1 -1
- package/dist/cjs/core/config/loader.js.map +1 -1
- package/dist/cjs/core/tracking/api-client.js +1 -1
- package/dist/cjs/core/tracking/api-client.js.map +1 -1
- package/dist/cjs/index.js +4 -4
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/types/openai-augmentation.js +1 -1
- package/dist/cjs/utils/url-builder.js +32 -7
- package/dist/cjs/utils/url-builder.js.map +1 -1
- package/dist/esm/core/config/loader.js +1 -1
- package/dist/esm/core/config/loader.js.map +1 -1
- package/dist/esm/core/tracking/api-client.js +1 -1
- package/dist/esm/core/tracking/api-client.js.map +1 -1
- package/dist/esm/index.js +4 -4
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/types/openai-augmentation.js +1 -1
- package/dist/esm/utils/url-builder.js +32 -7
- package/dist/esm/utils/url-builder.js.map +1 -1
- package/dist/types/index.d.ts +4 -4
- package/dist/types/types/index.d.ts +2 -2
- package/dist/types/types/index.d.ts.map +1 -1
- package/dist/types/types/openai-augmentation.d.ts +1 -1
- package/dist/types/utils/url-builder.d.ts +11 -3
- package/dist/types/utils/url-builder.d.ts.map +1 -1
- package/examples/README.md +357 -0
- package/examples/azure-basic.ts +206 -0
- package/examples/azure-responses-basic.ts +233 -0
- package/examples/azure-responses-streaming.ts +255 -0
- package/examples/azure-streaming.ts +209 -0
- package/examples/getting_started.ts +54 -0
- package/examples/openai-basic.ts +147 -0
- package/examples/openai-function-calling.ts +259 -0
- package/examples/openai-responses-basic.ts +212 -0
- package/examples/openai-responses-streaming.ts +232 -0
- package/examples/openai-streaming.ts +172 -0
- package/examples/openai-vision.ts +289 -0
- package/package.json +81 -84
- package/src/core/config/azure-config.ts +72 -0
- package/src/core/config/index.ts +23 -0
- package/src/core/config/loader.ts +66 -0
- package/src/core/config/manager.ts +94 -0
- package/src/core/config/validator.ts +89 -0
- package/src/core/providers/detector.ts +159 -0
- package/src/core/providers/index.ts +16 -0
- package/src/core/tracking/api-client.ts +78 -0
- package/src/core/tracking/index.ts +21 -0
- package/src/core/tracking/payload-builder.ts +132 -0
- package/src/core/tracking/usage-tracker.ts +189 -0
- package/src/core/wrapper/index.ts +9 -0
- package/src/core/wrapper/instance-patcher.ts +288 -0
- package/src/core/wrapper/request-handler.ts +423 -0
- package/src/core/wrapper/stream-wrapper.ts +100 -0
- package/src/index.ts +336 -0
- package/src/types/function-parameters.ts +251 -0
- package/src/types/index.ts +313 -0
- package/src/types/openai-augmentation.ts +233 -0
- package/src/types/responses-api.ts +308 -0
- package/src/utils/azure-model-resolver.ts +220 -0
- package/src/utils/constants.ts +21 -0
- package/src/utils/error-handler.ts +251 -0
- package/src/utils/metadata-builder.ts +219 -0
- package/src/utils/provider-detection.ts +257 -0
- package/src/utils/request-handler-factory.ts +285 -0
- package/src/utils/stop-reason-mapper.ts +74 -0
- package/src/utils/type-guards.ts +202 -0
- package/src/utils/url-builder.ts +68 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for OpenAI Responses API support
|
|
3
|
+
*
|
|
4
|
+
* This module defines types for the new OpenAI Responses API that replaces
|
|
5
|
+
* the traditional Chat Completions API. The Responses API provides a unified
|
|
6
|
+
* interface for building agent-like applications with built-in tools and capabilities.
|
|
7
|
+
*
|
|
8
|
+
* Reference: https://platform.openai.com/docs/guides/migrate-to-responses
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { UsageMetadata } from './index.js';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* OpenAI Responses API request parameters
|
|
15
|
+
* Based on the official OpenAI Responses API documentation
|
|
16
|
+
* Reference: https://platform.openai.com/docs/guides/migrate-to-responses
|
|
17
|
+
*/
|
|
18
|
+
export interface OpenAIResponsesRequest {
|
|
19
|
+
/** The model to use for the response */
|
|
20
|
+
model: string;
|
|
21
|
+
|
|
22
|
+
/** Input for the response - can be string or message array */
|
|
23
|
+
input:
|
|
24
|
+
| string
|
|
25
|
+
| Array<{
|
|
26
|
+
role: 'user' | 'assistant' | 'system';
|
|
27
|
+
content: string;
|
|
28
|
+
}>;
|
|
29
|
+
|
|
30
|
+
/** Whether to stream the response */
|
|
31
|
+
stream?: boolean;
|
|
32
|
+
|
|
33
|
+
/** Maximum number of output tokens to generate */
|
|
34
|
+
max_output_tokens?: number;
|
|
35
|
+
|
|
36
|
+
/** Temperature for response generation (0.0 to 2.0) */
|
|
37
|
+
temperature?: number;
|
|
38
|
+
|
|
39
|
+
/** Top-p sampling parameter (0.0 to 1.0) */
|
|
40
|
+
top_p?: number;
|
|
41
|
+
|
|
42
|
+
/** Instructions for the model (replaces system messages) */
|
|
43
|
+
instructions?: string;
|
|
44
|
+
|
|
45
|
+
/** Tools available to the model */
|
|
46
|
+
tools?: Array<{
|
|
47
|
+
type: 'function' | 'web_search' | 'file_search' | 'code_interpreter' | 'image_generation';
|
|
48
|
+
// For function tools (internally tagged)
|
|
49
|
+
name?: string;
|
|
50
|
+
description?: string;
|
|
51
|
+
parameters?: {
|
|
52
|
+
type: 'object';
|
|
53
|
+
properties: Record<string, unknown>;
|
|
54
|
+
required?: string[];
|
|
55
|
+
additionalProperties?: boolean;
|
|
56
|
+
};
|
|
57
|
+
// For other tools
|
|
58
|
+
[key: string]: unknown;
|
|
59
|
+
}>;
|
|
60
|
+
|
|
61
|
+
/** Tool choice configuration */
|
|
62
|
+
tool_choice?: 'auto' | 'none' | { type: 'function'; name: string };
|
|
63
|
+
|
|
64
|
+
/** Structured output configuration */
|
|
65
|
+
text?: {
|
|
66
|
+
format?: {
|
|
67
|
+
type: 'json_schema';
|
|
68
|
+
name: string;
|
|
69
|
+
strict?: boolean;
|
|
70
|
+
schema: {
|
|
71
|
+
type: 'object';
|
|
72
|
+
properties: Record<string, unknown>;
|
|
73
|
+
required?: string[];
|
|
74
|
+
additionalProperties?: boolean;
|
|
75
|
+
};
|
|
76
|
+
};
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
/** Metadata for the request */
|
|
80
|
+
metadata?: Record<string, unknown>;
|
|
81
|
+
|
|
82
|
+
/** ID of previous response to continue from */
|
|
83
|
+
previous_response_id?: string;
|
|
84
|
+
|
|
85
|
+
/** Whether to run in background mode */
|
|
86
|
+
background?: boolean;
|
|
87
|
+
|
|
88
|
+
/** Whether to store the response (default: true) */
|
|
89
|
+
store?: boolean;
|
|
90
|
+
|
|
91
|
+
/** Parallel tool calls configuration */
|
|
92
|
+
parallel_tool_calls?: boolean;
|
|
93
|
+
|
|
94
|
+
/** Reasoning effort level */
|
|
95
|
+
reasoning_effort?: 'low' | 'medium' | 'high';
|
|
96
|
+
|
|
97
|
+
/** Include additional data in response */
|
|
98
|
+
include?: Array<'reasoning.encrypted_content'>;
|
|
99
|
+
|
|
100
|
+
/** Custom metadata field for Revenium tracking */
|
|
101
|
+
usageMetadata?: UsageMetadata;
|
|
102
|
+
|
|
103
|
+
/** Additional parameters */
|
|
104
|
+
[key: string]: unknown;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* OpenAI Responses API response structure
|
|
109
|
+
* Based on the official Azure OpenAI Responses API documentation
|
|
110
|
+
*/
|
|
111
|
+
export interface OpenAIResponsesResponse {
|
|
112
|
+
/** Unique identifier for the response */
|
|
113
|
+
id: string;
|
|
114
|
+
|
|
115
|
+
/** Timestamp when the response was created */
|
|
116
|
+
created_at: number;
|
|
117
|
+
|
|
118
|
+
/** The model used for the response */
|
|
119
|
+
model: string;
|
|
120
|
+
|
|
121
|
+
/** Response object type */
|
|
122
|
+
object: 'response';
|
|
123
|
+
|
|
124
|
+
/** Response status */
|
|
125
|
+
status: 'queued' | 'in_progress' | 'completed' | 'incomplete' | 'cancelled' | 'failed';
|
|
126
|
+
|
|
127
|
+
/** Response output array */
|
|
128
|
+
output: Array<{
|
|
129
|
+
id: string;
|
|
130
|
+
type: 'message' | 'function_call' | 'function_call_output' | 'image_generation_call';
|
|
131
|
+
role?: 'assistant';
|
|
132
|
+
content?: Array<{
|
|
133
|
+
type: 'output_text' | 'text';
|
|
134
|
+
text?: string;
|
|
135
|
+
annotations?: Array<unknown>;
|
|
136
|
+
}>;
|
|
137
|
+
name?: string;
|
|
138
|
+
call_id?: string;
|
|
139
|
+
output?: string;
|
|
140
|
+
result?: string;
|
|
141
|
+
status?: string | null;
|
|
142
|
+
}>;
|
|
143
|
+
|
|
144
|
+
/** Simplified output text (convenience field) */
|
|
145
|
+
output_text?: string;
|
|
146
|
+
|
|
147
|
+
/** Usage statistics */
|
|
148
|
+
usage?: {
|
|
149
|
+
input_tokens: number;
|
|
150
|
+
output_tokens: number;
|
|
151
|
+
total_tokens: number;
|
|
152
|
+
output_tokens_details?: {
|
|
153
|
+
reasoning_tokens: number;
|
|
154
|
+
};
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
/** Response metadata */
|
|
158
|
+
metadata?: Record<string, unknown>;
|
|
159
|
+
|
|
160
|
+
/** Instructions used */
|
|
161
|
+
instructions?: string | null;
|
|
162
|
+
|
|
163
|
+
/** Tools configuration */
|
|
164
|
+
tools?: Array<unknown>;
|
|
165
|
+
|
|
166
|
+
/** Tool choice configuration */
|
|
167
|
+
tool_choice?: unknown;
|
|
168
|
+
|
|
169
|
+
/** Parallel tool calls setting */
|
|
170
|
+
parallel_tool_calls?: boolean | null;
|
|
171
|
+
|
|
172
|
+
/** Temperature used */
|
|
173
|
+
temperature?: number;
|
|
174
|
+
|
|
175
|
+
/** Top-p used */
|
|
176
|
+
top_p?: number;
|
|
177
|
+
|
|
178
|
+
/** Max output tokens */
|
|
179
|
+
max_output_tokens?: number | null;
|
|
180
|
+
|
|
181
|
+
/** Previous response ID */
|
|
182
|
+
previous_response_id?: string | null;
|
|
183
|
+
|
|
184
|
+
/** Error information */
|
|
185
|
+
error?: unknown | null;
|
|
186
|
+
|
|
187
|
+
/** Incomplete details */
|
|
188
|
+
incomplete_details?: unknown | null;
|
|
189
|
+
|
|
190
|
+
/** Reasoning information */
|
|
191
|
+
reasoning?: unknown | null;
|
|
192
|
+
|
|
193
|
+
/** Text field */
|
|
194
|
+
text?: unknown | null;
|
|
195
|
+
|
|
196
|
+
/** Truncation information */
|
|
197
|
+
truncation?: unknown | null;
|
|
198
|
+
|
|
199
|
+
/** User information */
|
|
200
|
+
user?: unknown | null;
|
|
201
|
+
|
|
202
|
+
/** Reasoning effort */
|
|
203
|
+
reasoning_effort?: unknown | null;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Streaming chunk for Responses API
|
|
208
|
+
*/
|
|
209
|
+
export interface OpenAIResponsesStreamChunk {
|
|
210
|
+
/** Unique identifier for the response */
|
|
211
|
+
id: string;
|
|
212
|
+
|
|
213
|
+
/** The model used */
|
|
214
|
+
model: string;
|
|
215
|
+
|
|
216
|
+
/** Delta content for this chunk */
|
|
217
|
+
delta?: {
|
|
218
|
+
content?: Array<{
|
|
219
|
+
type: 'text' | 'tool_use';
|
|
220
|
+
text?: string;
|
|
221
|
+
[key: string]: unknown;
|
|
222
|
+
}>;
|
|
223
|
+
};
|
|
224
|
+
|
|
225
|
+
/** Usage information (typically in final chunk) */
|
|
226
|
+
usage?: {
|
|
227
|
+
input_tokens?: number;
|
|
228
|
+
output_tokens?: number;
|
|
229
|
+
total_tokens?: number;
|
|
230
|
+
reasoning_tokens?: number;
|
|
231
|
+
cached_tokens?: number;
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
/** Finish reason (in final chunk) */
|
|
235
|
+
finish_reason?: string | null;
|
|
236
|
+
|
|
237
|
+
/** Additional chunk fields */
|
|
238
|
+
[key: string]: unknown;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Type guard to check if a request is for Responses API
|
|
243
|
+
*/
|
|
244
|
+
export function isResponsesRequest(params: unknown): params is OpenAIResponsesRequest {
|
|
245
|
+
return typeof params === 'object' && params !== null && 'input' in params && 'model' in params;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Type guard to check if a response is from Responses API
|
|
250
|
+
*/
|
|
251
|
+
export function isResponsesResponse(response: unknown): response is OpenAIResponsesResponse {
|
|
252
|
+
return (
|
|
253
|
+
typeof response === 'object' &&
|
|
254
|
+
response !== null &&
|
|
255
|
+
'id' in response &&
|
|
256
|
+
'model' in response &&
|
|
257
|
+
('output' in response || 'usage' in response)
|
|
258
|
+
);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Simplified interface for Responses API create parameters (for examples)
|
|
263
|
+
*/
|
|
264
|
+
export interface ResponsesCreateParams {
|
|
265
|
+
model: string;
|
|
266
|
+
input: string | Array<{ role: 'user' | 'assistant' | 'system'; content: string }>;
|
|
267
|
+
stream?: boolean;
|
|
268
|
+
max_output_tokens?: number;
|
|
269
|
+
temperature?: number;
|
|
270
|
+
instructions?: string;
|
|
271
|
+
tools?: Array<{
|
|
272
|
+
type: 'function' | 'web_search' | 'file_search' | 'code_interpreter' | 'image_generation';
|
|
273
|
+
function?: {
|
|
274
|
+
name: string;
|
|
275
|
+
description?: string;
|
|
276
|
+
parameters?: Record<string, unknown>;
|
|
277
|
+
};
|
|
278
|
+
}>;
|
|
279
|
+
usageMetadata?: UsageMetadata;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Simplified interface for Responses API response (for examples)
|
|
284
|
+
*/
|
|
285
|
+
export interface ResponsesResponse {
|
|
286
|
+
id: string;
|
|
287
|
+
model: string;
|
|
288
|
+
object: 'response';
|
|
289
|
+
status: string;
|
|
290
|
+
output: Array<{
|
|
291
|
+
id: string;
|
|
292
|
+
type: string;
|
|
293
|
+
role?: string;
|
|
294
|
+
content?: Array<{
|
|
295
|
+
type: 'output_text' | 'text';
|
|
296
|
+
text?: string;
|
|
297
|
+
}>;
|
|
298
|
+
}>;
|
|
299
|
+
output_text?: string;
|
|
300
|
+
usage?: {
|
|
301
|
+
input_tokens: number;
|
|
302
|
+
output_tokens: number;
|
|
303
|
+
total_tokens: number;
|
|
304
|
+
output_tokens_details?: {
|
|
305
|
+
reasoning_tokens: number;
|
|
306
|
+
};
|
|
307
|
+
};
|
|
308
|
+
}
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import { getLogger } from '../core/config/index.js';
|
|
2
|
+
import { knownModels } from './constants.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Azure Model Name Resolution Module
|
|
6
|
+
*
|
|
7
|
+
* This module maps Azure deployment names to LiteLLM-compatible model names for accurate pricing.
|
|
8
|
+
* Based on learnings from the Python implementation, it uses heuristic pattern matching
|
|
9
|
+
* with fallback strategies to ensure reliable model name resolution.
|
|
10
|
+
*
|
|
11
|
+
* Key patterns observed in real Azure deployments:
|
|
12
|
+
* - "gpt-4o-2024-11-20" → "gpt-4o"
|
|
13
|
+
* - "text-embedding-3-large" → "text-embedding-3-large" (exact match)
|
|
14
|
+
* - "o4-mini" → "gpt-4o-mini"
|
|
15
|
+
* - "gpt4o-prod" → "gpt-4o"
|
|
16
|
+
* - "gpt-35-turbo-dev" → "gpt-3.5-turbo"
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* In-memory cache for resolved model names
|
|
21
|
+
* Using Map for thread-safe operations in Node.js
|
|
22
|
+
*/
|
|
23
|
+
const modelNameCache = new Map<string, string>();
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Cache for failed resolution attempts to avoid repeated warnings
|
|
27
|
+
*/
|
|
28
|
+
const failedResolutionCache = new Set<string>();
|
|
29
|
+
|
|
30
|
+
// Global logger
|
|
31
|
+
const logger = getLogger();
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Resolve Azure deployment name to LiteLLM-compatible model name
|
|
35
|
+
*
|
|
36
|
+
* @param deploymentName - Azure deployment name
|
|
37
|
+
* @param useCache - Whether to use cached results (default: true)
|
|
38
|
+
* @returns LiteLLM-compatible model name
|
|
39
|
+
*/
|
|
40
|
+
export function resolveAzureModelName(deploymentName: string, useCache: boolean = true): string {
|
|
41
|
+
if (!deploymentName) {
|
|
42
|
+
logger.warn('Empty deployment name provided to model resolver');
|
|
43
|
+
return deploymentName;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Check cache first
|
|
47
|
+
if (useCache && modelNameCache.has(deploymentName)) {
|
|
48
|
+
const cachedResult = modelNameCache.get(deploymentName)!;
|
|
49
|
+
logger.debug('Model name resolved from cache', {
|
|
50
|
+
deployment: deploymentName,
|
|
51
|
+
resolved: cachedResult,
|
|
52
|
+
});
|
|
53
|
+
return cachedResult;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
const resolved = resolveModelNameHeuristic(deploymentName);
|
|
58
|
+
|
|
59
|
+
// Cache the result
|
|
60
|
+
if (useCache) {
|
|
61
|
+
modelNameCache.set(deploymentName, resolved);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Log successful resolution
|
|
65
|
+
if (resolved !== deploymentName) {
|
|
66
|
+
logger.debug('Model name resolved via heuristics', {
|
|
67
|
+
deployment: deploymentName,
|
|
68
|
+
resolved,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return resolved;
|
|
73
|
+
} catch (error) {
|
|
74
|
+
logger.error('Error during model name resolution', {
|
|
75
|
+
deployment: deploymentName,
|
|
76
|
+
error: error instanceof Error ? error.message : String(error),
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
// Fallback to deployment name
|
|
80
|
+
return deploymentName;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Heuristic pattern matching for Azure deployment names
|
|
86
|
+
* Based on real-world patterns observed in the Python implementation
|
|
87
|
+
*
|
|
88
|
+
* @param deploymentName - Azure deployment name
|
|
89
|
+
* @returns LiteLLM-compatible model name
|
|
90
|
+
*/
|
|
91
|
+
function resolveModelNameHeuristic(deploymentName: string): string {
|
|
92
|
+
const nameLower = deploymentName.toLowerCase();
|
|
93
|
+
|
|
94
|
+
// GPT-4o family - handle both "gpt-4o" and "o4" patterns
|
|
95
|
+
if (/gpt-?4o/.test(nameLower) || /o4/.test(nameLower)) {
|
|
96
|
+
if (/mini/.test(nameLower)) return 'gpt-4o-mini';
|
|
97
|
+
return 'gpt-4o';
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// GPT-4 family (non-omni)
|
|
101
|
+
if (/gpt-?4(?!o)/.test(nameLower)) {
|
|
102
|
+
if (/turbo/.test(nameLower)) return 'gpt-4-turbo';
|
|
103
|
+
if (/vision/.test(nameLower) || /v/.test(nameLower)) return 'gpt-4-vision-preview';
|
|
104
|
+
return 'gpt-4';
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// GPT-3.5 family
|
|
108
|
+
if (/gpt-?3\.?5/.test(nameLower) || /35-turbo/.test(nameLower) || /gpt-35/.test(nameLower)) {
|
|
109
|
+
if (/instruct/.test(nameLower)) return 'gpt-3.5-turbo-instruct';
|
|
110
|
+
return 'gpt-3.5-turbo';
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Embedding models - exact matches work well
|
|
114
|
+
if (/embed/.test(nameLower)) {
|
|
115
|
+
if (/text-embedding-3-large/.test(nameLower)) return 'text-embedding-3-large';
|
|
116
|
+
if (/text-embedding-3-small/.test(nameLower)) return 'text-embedding-3-small';
|
|
117
|
+
if (/text-embedding-ada-002/.test(nameLower) || /ada-002/.test(nameLower))
|
|
118
|
+
return 'text-embedding-ada-002';
|
|
119
|
+
if (/3-large/.test(nameLower)) return 'text-embedding-3-large';
|
|
120
|
+
if (/3-small/.test(nameLower)) return 'text-embedding-3-small';
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Ada-002 pattern (can appear without "embed" in deployment name)
|
|
124
|
+
if (/ada-002/.test(nameLower)) return 'text-embedding-ada-002';
|
|
125
|
+
|
|
126
|
+
// DALL-E models
|
|
127
|
+
if (/dall-?e/.test(nameLower)) {
|
|
128
|
+
if (/3/.test(nameLower)) return 'dall-e-3';
|
|
129
|
+
if (/2/.test(nameLower)) return 'dall-e-2';
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Whisper models
|
|
133
|
+
if (/whisper/.test(nameLower)) return 'whisper-1';
|
|
134
|
+
|
|
135
|
+
// TTS models
|
|
136
|
+
if (/tts/.test(nameLower)) {
|
|
137
|
+
if (/hd/.test(nameLower)) return 'tts-1-hd';
|
|
138
|
+
return 'tts-1';
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (knownModels.includes(nameLower)) return nameLower;
|
|
142
|
+
|
|
143
|
+
// No heuristic match found - log warning and use deployment name
|
|
144
|
+
if (!failedResolutionCache.has(deploymentName)) {
|
|
145
|
+
logger.warn(
|
|
146
|
+
`⚠️ No heuristic match for Azure deployment: ${deploymentName}. Using deployment name for pricing. Consider adding pattern to azure-model-resolver.ts`
|
|
147
|
+
);
|
|
148
|
+
failedResolutionCache.add(deploymentName);
|
|
149
|
+
}
|
|
150
|
+
return deploymentName;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Clear the model name cache
|
|
155
|
+
* Useful for testing or when deployment configurations change
|
|
156
|
+
*/
|
|
157
|
+
export function clearModelNameCache(): void {
|
|
158
|
+
modelNameCache.clear();
|
|
159
|
+
failedResolutionCache.clear();
|
|
160
|
+
getLogger().debug('Model name cache cleared');
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Get cache statistics for monitoring
|
|
165
|
+
*/
|
|
166
|
+
export function getModelNameCacheStats(): {
|
|
167
|
+
cacheSize: number;
|
|
168
|
+
failedResolutionCount: number;
|
|
169
|
+
cacheEntries: Array<{ deployment: string; resolved: string }>;
|
|
170
|
+
} {
|
|
171
|
+
return {
|
|
172
|
+
cacheSize: modelNameCache.size,
|
|
173
|
+
failedResolutionCount: failedResolutionCache.size,
|
|
174
|
+
cacheEntries: Array.from(modelNameCache.entries()).map(([deployment, resolved]) => ({
|
|
175
|
+
deployment,
|
|
176
|
+
resolved,
|
|
177
|
+
})),
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Batch resolve multiple deployment names
|
|
183
|
+
* Useful for pre-warming cache or bulk operations
|
|
184
|
+
*
|
|
185
|
+
* @param deploymentNames - Array of deployment names to resolve
|
|
186
|
+
* @returns Map of deployment name to resolved model name
|
|
187
|
+
*/
|
|
188
|
+
export function batchResolveModelNames(deploymentNames: string[]): Map<string, string> {
|
|
189
|
+
const results = new Map<string, string>();
|
|
190
|
+
logger.debug('Batch resolving model names', {
|
|
191
|
+
count: deploymentNames.length,
|
|
192
|
+
deployments: deploymentNames,
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
for (const deployment of deploymentNames) {
|
|
196
|
+
try {
|
|
197
|
+
const resolved = resolveAzureModelName(deployment);
|
|
198
|
+
results.set(deployment, resolved);
|
|
199
|
+
} catch (error) {
|
|
200
|
+
logger.error('Error in batch resolution', {
|
|
201
|
+
deployment,
|
|
202
|
+
error: error instanceof Error ? error.message : String(error),
|
|
203
|
+
});
|
|
204
|
+
results.set(deployment, deployment); // Fallback to original name
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return results;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Check if a deployment name would be resolved to a different model name
|
|
212
|
+
* Useful for validation and testing
|
|
213
|
+
*
|
|
214
|
+
* @param deploymentName - Azure deployment name
|
|
215
|
+
* @returns true if the deployment name would be transformed
|
|
216
|
+
*/
|
|
217
|
+
export function wouldTransformDeploymentName(deploymentName: string): boolean {
|
|
218
|
+
const resolved = resolveAzureModelName(deploymentName, false); // Don't use cache for this check
|
|
219
|
+
return resolved !== deploymentName;
|
|
220
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
// Direct match check for known LiteLLM model names
|
|
2
|
+
export const knownModels = [
|
|
3
|
+
'gpt-4o',
|
|
4
|
+
'gpt-4o-mini',
|
|
5
|
+
'gpt-4',
|
|
6
|
+
'gpt-4-turbo',
|
|
7
|
+
'gpt-4-vision-preview',
|
|
8
|
+
'gpt-3.5-turbo',
|
|
9
|
+
'gpt-3.5-turbo-instruct',
|
|
10
|
+
'text-embedding-3-large',
|
|
11
|
+
'text-embedding-3-small',
|
|
12
|
+
'text-embedding-ada-002',
|
|
13
|
+
'dall-e-3',
|
|
14
|
+
'dall-e-2',
|
|
15
|
+
'whisper-1',
|
|
16
|
+
'tts-1',
|
|
17
|
+
'tts-1-hd'
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
export const MESSAGE_PATTERNS_TYPE_NETWORK = ["network", "timeout", "ECONNRESET"];
|
|
21
|
+
export const ERROR_MESSAGE_PATTERNS_TYPE_CONFIG = ["config", "key", "unauthorized"];
|