@revenium/openai 1.0.11 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/.env.example +20 -0
  2. package/CHANGELOG.md +21 -47
  3. package/README.md +141 -690
  4. package/dist/cjs/core/config/loader.js +1 -1
  5. package/dist/cjs/core/config/loader.js.map +1 -1
  6. package/dist/cjs/core/tracking/api-client.js +1 -1
  7. package/dist/cjs/core/tracking/api-client.js.map +1 -1
  8. package/dist/cjs/index.js +2 -2
  9. package/dist/cjs/index.js.map +1 -1
  10. package/dist/cjs/utils/url-builder.js +32 -7
  11. package/dist/cjs/utils/url-builder.js.map +1 -1
  12. package/dist/esm/core/config/loader.js +1 -1
  13. package/dist/esm/core/config/loader.js.map +1 -1
  14. package/dist/esm/core/tracking/api-client.js +1 -1
  15. package/dist/esm/core/tracking/api-client.js.map +1 -1
  16. package/dist/esm/index.js +2 -2
  17. package/dist/esm/index.js.map +1 -1
  18. package/dist/esm/utils/url-builder.js +32 -7
  19. package/dist/esm/utils/url-builder.js.map +1 -1
  20. package/dist/types/index.d.ts +2 -2
  21. package/dist/types/types/index.d.ts +2 -2
  22. package/dist/types/types/index.d.ts.map +1 -1
  23. package/dist/types/utils/url-builder.d.ts +11 -3
  24. package/dist/types/utils/url-builder.d.ts.map +1 -1
  25. package/examples/README.md +250 -254
  26. package/examples/azure-basic.ts +25 -13
  27. package/examples/azure-responses-basic.ts +36 -7
  28. package/examples/azure-responses-streaming.ts +36 -7
  29. package/examples/azure-streaming.ts +40 -19
  30. package/examples/getting_started.ts +54 -0
  31. package/examples/openai-basic.ts +39 -17
  32. package/examples/openai-function-calling.ts +259 -0
  33. package/examples/openai-responses-basic.ts +36 -7
  34. package/examples/openai-responses-streaming.ts +36 -7
  35. package/examples/openai-streaming.ts +24 -13
  36. package/examples/openai-vision.ts +289 -0
  37. package/package.json +3 -9
  38. package/src/core/config/azure-config.ts +72 -0
  39. package/src/core/config/index.ts +23 -0
  40. package/src/core/config/loader.ts +66 -0
  41. package/src/core/config/manager.ts +94 -0
  42. package/src/core/config/validator.ts +89 -0
  43. package/src/core/providers/detector.ts +159 -0
  44. package/src/core/providers/index.ts +16 -0
  45. package/src/core/tracking/api-client.ts +78 -0
  46. package/src/core/tracking/index.ts +21 -0
  47. package/src/core/tracking/payload-builder.ts +132 -0
  48. package/src/core/tracking/usage-tracker.ts +189 -0
  49. package/src/core/wrapper/index.ts +9 -0
  50. package/src/core/wrapper/instance-patcher.ts +288 -0
  51. package/src/core/wrapper/request-handler.ts +423 -0
  52. package/src/core/wrapper/stream-wrapper.ts +100 -0
  53. package/src/index.ts +336 -0
  54. package/src/types/function-parameters.ts +251 -0
  55. package/src/types/index.ts +313 -0
  56. package/src/types/openai-augmentation.ts +233 -0
  57. package/src/types/responses-api.ts +308 -0
  58. package/src/utils/azure-model-resolver.ts +220 -0
  59. package/src/utils/constants.ts +21 -0
  60. package/src/utils/error-handler.ts +251 -0
  61. package/src/utils/metadata-builder.ts +219 -0
  62. package/src/utils/provider-detection.ts +257 -0
  63. package/src/utils/request-handler-factory.ts +285 -0
  64. package/src/utils/stop-reason-mapper.ts +74 -0
  65. package/src/utils/type-guards.ts +202 -0
  66. package/src/utils/url-builder.ts +68 -0
@@ -0,0 +1,288 @@
1
+ /**
2
+ * Instance Patcher Module
3
+ *
4
+ * Handles patching of OpenAI client instances.
5
+ * Extracted from wrapper.ts for better organization.
6
+ */
7
+
8
+ import OpenAI from 'openai';
9
+ import type { ChatCompletionCreateParams } from 'openai/resources/chat/completions';
10
+ import type { EmbeddingCreateParams } from 'openai/resources/embeddings';
11
+ import { ProviderInfo } from '../../types/index.js';
12
+ import {
13
+ OpenAIClientInstance,
14
+ OpenAIChatRequest,
15
+ OpenAIEmbeddingRequest,
16
+ OpenAIRequestOptions,
17
+ OpenAIResponsesOriginalFunction,
18
+ } from '../../types/function-parameters.js';
19
+ import { isOpenAIClientInstance } from '../../utils/type-guards.js';
20
+ import { extractMetadata, createLoggingContext } from '../../utils/metadata-builder.js';
21
+ import {
22
+ requestHandlerFactory,
23
+ ensureFactoryInitialized,
24
+ } from '../../utils/request-handler-factory.js';
25
+ import { getLogger, getConfig } from '../config/index.js';
26
+ import { detectProvider } from '../providers/index.js';
27
+ import { OpenAIResponsesRequest } from '../../types/responses-api.js';
28
+
29
+ // Import the type augmentations to ensure they're available
30
+ import '../../types/openai-augmentation.js';
31
+
32
+ // Interface for OpenAI instance with Responses API
33
+ interface OpenAIWithResponses extends OpenAIClientInstance {
34
+ responses?: {
35
+ create: (params: OpenAIResponsesRequest, options?: OpenAIRequestOptions) => Promise<unknown>;
36
+ };
37
+ }
38
+
39
+ /**
40
+ * Set to track patched instances
41
+ */
42
+ const patchedInstances = new WeakSet();
43
+
44
+ /**
45
+ * WeakMap to store provider information for each client instance
46
+ */
47
+ const instanceProviders = new WeakMap<OpenAIClientInstance, ProviderInfo>();
48
+
49
+ // Global logger
50
+ const logger = getLogger();
51
+
52
+ /**
53
+ * Get provider information for a client instance
54
+ */
55
+ export function getProviderInfo(instance: OpenAIClientInstance): ProviderInfo | undefined {
56
+ return instanceProviders.get(instance);
57
+ }
58
+
59
+ /**
60
+ * Simple approach: Only patch instances when users create them
61
+ * No global patching, no dummy keys - just clean instance patching
62
+ */
63
+ export function patchOpenAI(): void {
64
+ logger.info('Revenium OpenAI middleware loaded and ready');
65
+ logger.debug('Use patchOpenAIInstance() to patch specific OpenAI instances');
66
+ }
67
+
68
+ /**
69
+ * Manually patch an existing OpenAI instance
70
+ * This is the main function users should call
71
+ */
72
+ export function patchOpenAIInstance(instance: OpenAI): OpenAI {
73
+ // Check if middleware is initialized
74
+ const config = getConfig();
75
+ if (!config) {
76
+ logger.warn('Revenium middleware not initialized.');
77
+ logger.warn(
78
+ 'Auto-initialization may have failed. Try calling initializeReveniumFromEnv() explicitly.'
79
+ );
80
+ logger.warn('Check that REVENIUM_METERING_API_KEY environment variable is set.');
81
+ logger.warn(
82
+ 'OpenAI instance will be patched but tracking may not work without proper configuration.'
83
+ );
84
+ } else {
85
+ logger.debug('Revenium middleware is properly configured');
86
+ }
87
+
88
+ if (patchedInstances.has(instance)) {
89
+ logger.debug('OpenAI instance already patched, skipping');
90
+ return instance;
91
+ }
92
+
93
+ patchInstance(instance as unknown as OpenAIClientInstance);
94
+ logger.debug('OpenAI instance patched successfully');
95
+
96
+ return instance;
97
+ }
98
+
99
+ /**
100
+ * Patch an individual OpenAI instance
101
+ */
102
+ function patchInstance(instance: OpenAIClientInstance): void {
103
+ try {
104
+ // Validate instance
105
+ if (!isOpenAIClientInstance(instance)) {
106
+ logger.error('Invalid OpenAI client instance provided to patchInstance');
107
+ return;
108
+ }
109
+
110
+ // Detect provider type for this instance
111
+ const providerInfo = detectProvider(instance);
112
+ instanceProviders.set(instance, providerInfo);
113
+
114
+ logger.debug('Provider detection completed for instance', {
115
+ provider: providerInfo.provider,
116
+ isAzure: providerInfo.isAzure,
117
+ hasAzureConfig: !!providerInfo.azureConfig,
118
+ });
119
+
120
+ // Patch chat completions
121
+ patchChatCompletions(instance);
122
+
123
+ // Patch embeddings
124
+ patchEmbeddings(instance);
125
+
126
+ // Patch responses API (new OpenAI Responses API)
127
+ patchResponses(instance);
128
+
129
+ // Mark as patched
130
+ patchedInstances.add(instance);
131
+ } catch (error) {
132
+ logger.error('Failed to patch OpenAI instance', {
133
+ error: error instanceof Error ? error.message : String(error),
134
+ });
135
+ }
136
+ }
137
+
138
+ /**
139
+ * Patch chat completions endpoint
140
+ */
141
+ function patchChatCompletions(instance: OpenAIClientInstance): void {
142
+ if (!instance.chat || !instance.chat.completions || !instance.chat.completions.create) {
143
+ return logger.warn('OpenAI instance missing chat.completions.create, skipping chat patch');
144
+ }
145
+
146
+ // Store the original create method
147
+ const originalCreate = instance.chat.completions.create.bind(instance.chat.completions);
148
+
149
+ // Replace the create method with our wrapped version
150
+ instance.chat.completions.create = async function (
151
+ params: ChatCompletionCreateParams,
152
+ options?: OpenAIRequestOptions
153
+ ) {
154
+ // Extract metadata using utility
155
+ const { metadata, cleanParams } = extractMetadata(params as OpenAIChatRequest);
156
+ const typedParams = params as OpenAIChatRequest;
157
+
158
+ logger.debug('OpenAI chat.completions.create intercepted', {
159
+ ...createLoggingContext(metadata),
160
+ model: typedParams.model,
161
+ stream: !!typedParams.stream,
162
+ });
163
+
164
+ // Record request start time
165
+ const requestStartTime = Date.now();
166
+
167
+ // Ensure factory is initialized and route request
168
+ try {
169
+ await ensureFactoryInitialized();
170
+ } catch (error) {
171
+ logger.error('Failed to initialize request handler factory', { error });
172
+ throw new Error('Middleware initialization failed - cannot process request');
173
+ }
174
+ return requestHandlerFactory.routeChatRequest(
175
+ originalCreate,
176
+ cleanParams,
177
+ options,
178
+ metadata,
179
+ requestStartTime,
180
+ instance
181
+ );
182
+ };
183
+ }
184
+
185
+ /**
186
+ * Patch embeddings endpoint
187
+ */
188
+ function patchEmbeddings(instance: OpenAIClientInstance): void {
189
+ if (!instance.embeddings || !instance.embeddings.create) {
190
+ return logger.warn('OpenAI instance missing embeddings.create, skipping embeddings patch');
191
+ }
192
+ // Store the original embeddings create method
193
+ const originalEmbeddingsCreate = instance.embeddings.create.bind(instance.embeddings);
194
+
195
+ // Replace the embeddings create method with our wrapped version
196
+ instance.embeddings.create = async function (
197
+ params: EmbeddingCreateParams,
198
+ options?: OpenAIRequestOptions
199
+ ) {
200
+ // Extract metadata using utility
201
+ const { metadata, cleanParams } = extractMetadata(params as OpenAIEmbeddingRequest);
202
+ const typedParams = params as OpenAIEmbeddingRequest;
203
+
204
+ logger.debug('OpenAI embeddings.create intercepted', {
205
+ ...createLoggingContext(metadata),
206
+ model: typedParams.model,
207
+ inputType: typeof typedParams.input,
208
+ });
209
+
210
+ // Record request start time
211
+ const requestStartTime = Date.now();
212
+
213
+ // Ensure factory is initialized and route request
214
+ try {
215
+ await ensureFactoryInitialized();
216
+ } catch (error) {
217
+ logger.error('Failed to initialize request handler factory', { error });
218
+ throw new Error('Middleware initialization failed - cannot process request');
219
+ }
220
+ return requestHandlerFactory.routeEmbeddingsRequest(
221
+ originalEmbeddingsCreate,
222
+ cleanParams,
223
+ options,
224
+ metadata,
225
+ requestStartTime,
226
+ instance
227
+ );
228
+ };
229
+ }
230
+
231
+ /**
232
+ * Patch responses endpoint (new OpenAI Responses API)
233
+ */
234
+ function patchResponses(instance: OpenAIClientInstance): void {
235
+ // Type assertion for new Responses API (not yet in OpenAI types)
236
+ const responsesAPI = instance as OpenAIWithResponses;
237
+
238
+ // Check if the instance has the responses API (it's a newer feature)
239
+ if (!responsesAPI.responses || !responsesAPI.responses.create) {
240
+ logger.debug(
241
+ 'OpenAI instance missing responses.create, skipping responses patch (this is normal for older SDK versions)'
242
+ );
243
+ return;
244
+ }
245
+
246
+ // Store the original responses create method
247
+ const originalResponsesCreate = responsesAPI.responses.create.bind(responsesAPI.responses);
248
+
249
+ // Replace the responses create method with our wrapped version
250
+ responsesAPI.responses.create = async function (
251
+ params: OpenAIResponsesRequest,
252
+ options?: OpenAIRequestOptions
253
+ ) {
254
+ // Extract metadata using utility (similar to chat completions)
255
+ const { metadata, cleanParams } = extractMetadata(params);
256
+
257
+ logger.debug('OpenAI responses.create intercepted', {
258
+ ...createLoggingContext(metadata),
259
+ model: params.model,
260
+ stream: !!params.stream,
261
+ inputType: typeof params.input,
262
+ });
263
+
264
+ // Record request start time
265
+ const requestStartTime = Date.now();
266
+
267
+ // Ensure factory is initialized and route request
268
+ try {
269
+ await ensureFactoryInitialized();
270
+ } catch (error) {
271
+ logger.error('Failed to initialize request handler factory', { error });
272
+ throw new Error('Middleware initialization failed - cannot process request');
273
+ }
274
+ return requestHandlerFactory.routeResponsesRequest(
275
+ originalResponsesCreate as OpenAIResponsesOriginalFunction,
276
+ cleanParams,
277
+ options,
278
+ metadata,
279
+ requestStartTime,
280
+ instance
281
+ );
282
+ };
283
+ }
284
+
285
+ /**
286
+ * Export instance providers for request handlers
287
+ */
288
+ export { instanceProviders };
@@ -0,0 +1,423 @@
1
+ /**
2
+ * Request Handler Module
3
+ *
4
+ * Handles different types of OpenAI requests (streaming, non-streaming, embeddings).
5
+ * Extracted from wrapper.ts for better organization.
6
+ */
7
+
8
+ import { randomUUID } from 'crypto';
9
+ import { UsageMetadata, OpenAIResponsesRequest } from '../../types/index.js';
10
+ import {
11
+ OpenAIChatResponse,
12
+ OpenAIEmbeddingResponse,
13
+ OpenAIChatRequest,
14
+ OpenAIEmbeddingRequest,
15
+ OpenAIClientInstance,
16
+ OpenAIRequestOptions,
17
+ OpenAIOriginalFunction,
18
+ OpenAIResponsesOriginalFunction,
19
+ } from '../../types/function-parameters.js';
20
+ import {
21
+ isOpenAIChatResponse,
22
+ isOpenAIEmbeddingResponse,
23
+ hasValidUsage,
24
+ } from '../../utils/type-guards.js';
25
+ import { safeAsyncOperation, NetworkError, classifyError } from '../../utils/error-handler.js';
26
+ import { createLoggingContext } from '../../utils/metadata-builder.js';
27
+ import { trackUsageAsync, trackEmbeddingsUsageAsync } from '../tracking/index.js';
28
+ import { getLogger } from '../config/index.js';
29
+ import { instanceProviders } from './instance-patcher.js';
30
+ import { createTrackingStreamWrapper } from './stream-wrapper.js';
31
+
32
+ // Global logger
33
+ const logger = getLogger();
34
+
35
+ /**
36
+ * Handle non-streaming OpenAI requests
37
+ */
38
+ export async function handleNonStreamingRequest(
39
+ originalCreate: OpenAIOriginalFunction,
40
+ params: Omit<OpenAIChatRequest, 'usageMetadata'> | Omit<OpenAIEmbeddingRequest, 'usageMetadata'>,
41
+ options: OpenAIRequestOptions | undefined,
42
+ usageMetadata: UsageMetadata | undefined,
43
+ requestStartTime: number,
44
+ instance: OpenAIClientInstance
45
+ ): Promise<OpenAIChatResponse | OpenAIEmbeddingResponse> {
46
+ const loggingContext = createLoggingContext(usageMetadata);
47
+
48
+ const result = await safeAsyncOperation(
49
+ async () => {
50
+ // Call the original OpenAI method (cast params back to original type since usageMetadata is removed)
51
+ const response = await originalCreate(params as any, options);
52
+
53
+ // Validate response structure
54
+ if (!hasValidUsage(response)) {
55
+ logger.warn('Invalid response structure from OpenAI API', {
56
+ ...loggingContext,
57
+ response,
58
+ });
59
+ return response;
60
+ }
61
+
62
+ // Calculate duration
63
+ const duration = Date.now() - requestStartTime;
64
+
65
+ // Get provider info for this instance
66
+ const providerInfo = instanceProviders.get(instance);
67
+
68
+ // Track usage for chat completions
69
+ if (isOpenAIChatResponse(response)) {
70
+ trackUsageAsync({
71
+ requestId: response.id,
72
+ model: response.model,
73
+ promptTokens: response.usage.prompt_tokens,
74
+ completionTokens: response.usage.completion_tokens || 0,
75
+ totalTokens: response.usage.total_tokens,
76
+ reasoningTokens: response.usage.reasoning_tokens,
77
+ cachedTokens: response.usage.cached_tokens,
78
+ duration,
79
+ finishReason: response.choices?.[0]?.finish_reason || null,
80
+ usageMetadata,
81
+ isStreamed: false,
82
+ providerInfo,
83
+ });
84
+ }
85
+
86
+ logger.debug('Chat completion request completed', {
87
+ ...loggingContext,
88
+ model: response.model,
89
+ duration,
90
+ totalTokens: response.usage.total_tokens,
91
+ });
92
+
93
+ return response;
94
+ },
95
+ 'Non-streaming OpenAI request',
96
+ {
97
+ logError: true,
98
+ rethrow: true,
99
+ messagePrefix: 'Chat completion request failed: ',
100
+ transformError: error => {
101
+ const classified = classifyError(error);
102
+ if (classified.type === 'network') {
103
+ return new NetworkError(classified.message, {
104
+ ...loggingContext,
105
+ duration: Date.now() - requestStartTime,
106
+ });
107
+ }
108
+ return error instanceof Error ? error : new Error(String(error));
109
+ },
110
+ },
111
+ logger
112
+ );
113
+
114
+ if (!result) throw new Error('OpenAI request failed without specific error');
115
+ return result;
116
+ }
117
+
118
+ /**
119
+ * Handle streaming OpenAI requests
120
+ */
121
+ export async function handleStreamingRequest(
122
+ originalCreate: OpenAIOriginalFunction,
123
+ params: Omit<OpenAIChatRequest, 'usageMetadata'>,
124
+ options: OpenAIRequestOptions | undefined,
125
+ usageMetadata: UsageMetadata | undefined,
126
+ requestStartTime: number,
127
+ instance: OpenAIClientInstance
128
+ ): Promise<AsyncIterable<unknown>> {
129
+ try {
130
+ // Ensure stream_options includes usage data for token tracking
131
+ const enhancedParams = {
132
+ ...params,
133
+ stream_options: {
134
+ include_usage: true,
135
+ ...(params.stream_options || {}),
136
+ },
137
+ };
138
+
139
+ logger.debug('Enhanced streaming params with usage tracking', {
140
+ originalStreamOptions: params.stream_options,
141
+ enhancedStreamOptions: enhancedParams.stream_options,
142
+ });
143
+
144
+ // Call the original OpenAI method to get the stream (cast params back to original type since usageMetadata is removed)
145
+ const originalStream = await originalCreate(enhancedParams as any, options);
146
+
147
+ logger.debug('Chat completion streaming request initiated', {
148
+ model: params.model,
149
+ });
150
+
151
+ // Return a wrapped stream that tracks usage when complete
152
+ return createTrackingStreamWrapper(
153
+ originalStream as unknown as AsyncIterable<unknown>,
154
+ usageMetadata,
155
+ requestStartTime,
156
+ instance
157
+ );
158
+ } catch (error) {
159
+ const duration = Date.now() - requestStartTime;
160
+ logger.error('Chat completion streaming request failed', {
161
+ error: error instanceof Error ? error.message : String(error),
162
+ duration,
163
+ });
164
+
165
+ // Re-throw the error to maintain original behavior
166
+ throw error;
167
+ }
168
+ }
169
+
170
+ /**
171
+ * Handle embeddings requests
172
+ */
173
+ export async function handleEmbeddingsRequest(
174
+ originalCreate: OpenAIOriginalFunction,
175
+ params: Omit<OpenAIEmbeddingRequest, 'usageMetadata'>,
176
+ options: OpenAIRequestOptions | undefined,
177
+ usageMetadata: UsageMetadata | undefined,
178
+ requestStartTime: number,
179
+ instance: OpenAIClientInstance
180
+ ): Promise<OpenAIEmbeddingResponse> {
181
+ try {
182
+ // Call the original OpenAI method (cast params back to original type since usageMetadata is removed)
183
+ const response = await originalCreate(params as any, options);
184
+
185
+ // Validate response structure
186
+ if (!isOpenAIEmbeddingResponse(response)) {
187
+ logger.warn('Invalid embeddings response structure from OpenAI API', { response });
188
+ return response as unknown as OpenAIEmbeddingResponse;
189
+ }
190
+
191
+ // Calculate duration
192
+ const duration = Date.now() - requestStartTime;
193
+
194
+ // Get provider info for this instance
195
+ const providerInfo = instanceProviders.get(instance);
196
+
197
+ // Track embeddings usage
198
+ trackEmbeddingsUsageAsync({
199
+ transactionId: `embed-${randomUUID()}`,
200
+ model: response.model,
201
+ promptTokens: response.usage.prompt_tokens,
202
+ totalTokens: response.usage.total_tokens,
203
+ duration,
204
+ usageMetadata,
205
+ requestStartTime,
206
+ providerInfo,
207
+ });
208
+
209
+ logger.debug('Embeddings request completed', {
210
+ model: response.model,
211
+ duration,
212
+ totalTokens: response.usage.total_tokens,
213
+ });
214
+
215
+ return response;
216
+ } catch (error) {
217
+ const duration = Date.now() - requestStartTime;
218
+ logger.error('Embeddings request failed', {
219
+ error: error instanceof Error ? error.message : String(error),
220
+ duration,
221
+ });
222
+
223
+ // Re-throw the error to maintain original behavior
224
+ throw error;
225
+ }
226
+ }
227
+
228
+ /**
229
+ * Handle non-streaming OpenAI Responses API requests
230
+ */
231
+ export async function handleResponsesNonStreamingRequest(
232
+ originalCreate: OpenAIResponsesOriginalFunction,
233
+ params: Omit<OpenAIResponsesRequest, 'usageMetadata'>,
234
+ options: OpenAIRequestOptions | undefined,
235
+ usageMetadata: UsageMetadata | undefined,
236
+ requestStartTime: number,
237
+ instance: OpenAIClientInstance
238
+ ): Promise<unknown> {
239
+ const loggingContext = createLoggingContext(usageMetadata);
240
+
241
+ const result = await safeAsyncOperation(
242
+ async () => {
243
+ // Call the original OpenAI method (cast params back to original type since usageMetadata is removed)
244
+ const response = await originalCreate(params as any, options);
245
+
246
+ // Validate response structure
247
+ if (!response || typeof response !== 'object') {
248
+ throw new Error('Invalid response from OpenAI Responses API');
249
+ }
250
+
251
+ const duration = Date.now() - requestStartTime;
252
+
253
+ // Extract usage information (Responses API may have different structure)
254
+ const usage = (response as any).usage;
255
+ if (usage) {
256
+ // Track usage asynchronously using similar pattern to chat completions
257
+ trackUsageAsync({
258
+ requestId: (response as any).id || randomUUID(),
259
+ model: (response as any).model || params.model,
260
+ promptTokens: usage.input_tokens || 0,
261
+ completionTokens: usage.output_tokens || 0,
262
+ totalTokens: usage.total_tokens || 0,
263
+ reasoningTokens: usage.reasoning_tokens,
264
+ cachedTokens: usage.cached_tokens,
265
+ duration,
266
+ finishReason: (response as any).finish_reason || 'completed',
267
+ usageMetadata,
268
+ isStreamed: false,
269
+ providerInfo: instanceProviders.get(instance),
270
+ });
271
+ }
272
+
273
+ logger.debug('Responses API request completed', {
274
+ ...loggingContext,
275
+ model: (response as any).model,
276
+ duration,
277
+ totalTokens: usage?.total_tokens,
278
+ });
279
+
280
+ return response;
281
+ },
282
+ 'Non-streaming OpenAI Responses API request',
283
+ {
284
+ logError: true,
285
+ rethrow: true,
286
+ messagePrefix: 'Responses API request failed: ',
287
+ transformError: error => {
288
+ const classified = classifyError(error);
289
+ if (classified.type === 'network') {
290
+ return new NetworkError(classified.message, {
291
+ ...loggingContext,
292
+ duration: Date.now() - requestStartTime,
293
+ });
294
+ }
295
+ return error instanceof Error ? error : new Error(String(error));
296
+ },
297
+ },
298
+ logger
299
+ );
300
+
301
+ if (!result) throw new Error('OpenAI Responses API request failed without specific error');
302
+ return result;
303
+ }
304
+
305
+ /**
306
+ * Handle streaming OpenAI Responses API requests
307
+ */
308
+ export async function handleResponsesStreamingRequest(
309
+ originalCreate: OpenAIResponsesOriginalFunction,
310
+ params: Omit<OpenAIResponsesRequest, 'usageMetadata'>,
311
+ options: OpenAIRequestOptions | undefined,
312
+ usageMetadata: UsageMetadata | undefined,
313
+ requestStartTime: number,
314
+ instance: OpenAIClientInstance
315
+ ): Promise<AsyncIterable<unknown>> {
316
+ try {
317
+ // Call the original OpenAI method to get the stream (cast params back to original type since usageMetadata is removed)
318
+ const originalStream = await originalCreate(params as any, options);
319
+
320
+ logger.debug('Responses API streaming request initiated', {
321
+ model: params.model,
322
+ });
323
+
324
+ // Return a wrapped stream that tracks usage when complete
325
+ // We'll use a similar pattern to chat completions but adapted for Responses API
326
+ return createResponsesTrackingStreamWrapper(
327
+ originalStream as unknown as AsyncIterable<unknown>,
328
+ usageMetadata,
329
+ requestStartTime,
330
+ instance
331
+ );
332
+ } catch (error) {
333
+ const duration = Date.now() - requestStartTime;
334
+ logger.error('Responses API streaming request failed', {
335
+ error: error instanceof Error ? error.message : String(error),
336
+ duration,
337
+ });
338
+
339
+ // Re-throw the error to maintain original behavior
340
+ throw error;
341
+ }
342
+ }
343
+
344
+ /**
345
+ * Create a tracking stream wrapper for Responses API
346
+ * Similar to createTrackingStreamWrapper but adapted for Responses API structure
347
+ */
348
+ async function* createResponsesTrackingStreamWrapper(
349
+ originalStream: AsyncIterable<unknown>,
350
+ usageMetadata: UsageMetadata | undefined,
351
+ requestStartTime: number,
352
+ instance: OpenAIClientInstance
353
+ ): AsyncIterable<unknown> {
354
+ let firstChunkTime: number | undefined;
355
+ let finalUsage: any = null;
356
+ let model = '';
357
+ let requestId = '';
358
+ let finishReason: string | null = null;
359
+
360
+ try {
361
+ for await (const chunk of originalStream) {
362
+ // Record time to first token
363
+ if (!firstChunkTime) {
364
+ firstChunkTime = Date.now();
365
+ }
366
+
367
+ // Extract information from chunk (Responses API structure may differ)
368
+ if (chunk && typeof chunk === 'object') {
369
+ const chunkObj = chunk as any;
370
+
371
+ // Extract model and ID from chunk
372
+ if (chunkObj.model) model = chunkObj.model;
373
+ if (chunkObj.id) requestId = chunkObj.id;
374
+
375
+ // Check for final usage information
376
+ if (chunkObj.usage) {
377
+ finalUsage = chunkObj.usage;
378
+ }
379
+
380
+ // Check for finish reason
381
+ if (chunkObj.finish_reason) {
382
+ finishReason = chunkObj.finish_reason;
383
+ }
384
+ }
385
+
386
+ yield chunk;
387
+ }
388
+
389
+ // Track usage after stream completes
390
+ if (finalUsage) {
391
+ const duration = Date.now() - requestStartTime;
392
+ const timeToFirstToken = firstChunkTime ? firstChunkTime - requestStartTime : undefined;
393
+
394
+ trackUsageAsync({
395
+ requestId: requestId || randomUUID(),
396
+ model: model,
397
+ promptTokens: finalUsage.input_tokens || 0,
398
+ completionTokens: finalUsage.output_tokens || 0,
399
+ totalTokens: finalUsage.total_tokens || 0,
400
+ reasoningTokens: finalUsage.reasoning_tokens,
401
+ cachedTokens: finalUsage.cached_tokens,
402
+ duration,
403
+ finishReason: finishReason || 'completed',
404
+ usageMetadata,
405
+ isStreamed: true,
406
+ timeToFirstToken,
407
+ providerInfo: instanceProviders.get(instance),
408
+ });
409
+
410
+ logger.debug('Responses API streaming completed', {
411
+ model,
412
+ duration,
413
+ timeToFirstToken,
414
+ totalTokens: finalUsage.total_tokens,
415
+ });
416
+ }
417
+ } catch (error) {
418
+ logger.error('Error in Responses API stream wrapper', {
419
+ error: error instanceof Error ? error.message : String(error),
420
+ });
421
+ throw error;
422
+ }
423
+ }