@observyze/sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/INSTRUMENTATION_SUMMARY.md +184 -0
- package/README.md +198 -0
- package/examples/auto-instrumentation.ts +210 -0
- package/package.json +43 -0
- package/src/client.ts +578 -0
- package/src/index.ts +21 -0
- package/src/instrumentation/README.md +227 -0
- package/src/instrumentation/anthropic.ts +233 -0
- package/src/instrumentation/index.ts +43 -0
- package/src/instrumentation/openai.ts +193 -0
- package/src/trace.ts +242 -0
- package/src/types.ts +102 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# Auto-Instrumentation
|
|
2
|
+
|
|
3
|
+
The Observyze SDK provides automatic instrumentation for popular LLM providers, allowing you to capture traces with minimal code changes.
|
|
4
|
+
|
|
5
|
+
## Supported Providers
|
|
6
|
+
|
|
7
|
+
- **OpenAI** - `chat.completions.create` (streaming and non-streaming)
|
|
8
|
+
- **Anthropic** - `messages.create` (streaming and non-streaming)
|
|
9
|
+
|
|
10
|
+
## Usage
|
|
11
|
+
|
|
12
|
+
### Basic Usage with `nw.wrap()`
|
|
13
|
+
|
|
14
|
+
The simplest way to enable auto-instrumentation is using the `wrap()` method:
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
import OpenAI from 'openai'
|
|
18
|
+
import { ObservyzeClient } from '@observyze/sdk'
|
|
19
|
+
|
|
20
|
+
// Initialize Observyze
|
|
21
|
+
const nw = new ObservyzeClient({
|
|
22
|
+
apiKey: process.env.Observyze_API_KEY!,
|
|
23
|
+
organizationId: 'your-org-id',
|
|
24
|
+
projectId: 'your-project-id'
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
// Initialize OpenAI client
|
|
28
|
+
const openai = new OpenAI({
|
|
29
|
+
apiKey: process.env.OPENAI_API_KEY!
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
// Wrap the client to enable auto-instrumentation
|
|
33
|
+
nw.wrap(openai)
|
|
34
|
+
|
|
35
|
+
// All calls are now automatically traced!
|
|
36
|
+
const response = await openai.chat.completions.create({
|
|
37
|
+
model: 'gpt-4',
|
|
38
|
+
messages: [
|
|
39
|
+
{ role: 'user', content: 'What is the capital of France?' }
|
|
40
|
+
]
|
|
41
|
+
})
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Anthropic Example
|
|
45
|
+
|
|
46
|
+
```typescript
|
|
47
|
+
import Anthropic from '@anthropic-ai/sdk'
|
|
48
|
+
import { ObservyzeClient } from '@observyze/sdk'
|
|
49
|
+
|
|
50
|
+
const nw = new ObservyzeClient({
|
|
51
|
+
apiKey: process.env.Observyze_API_KEY!
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
const anthropic = new Anthropic({
|
|
55
|
+
apiKey: process.env.ANTHROPIC_API_KEY!
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
// Wrap the client
|
|
59
|
+
nw.wrap(anthropic)
|
|
60
|
+
|
|
61
|
+
// All calls are automatically traced
|
|
62
|
+
const message = await anthropic.messages.create({
|
|
63
|
+
model: 'claude-3-opus-20240229',
|
|
64
|
+
max_tokens: 1024,
|
|
65
|
+
messages: [
|
|
66
|
+
{ role: 'user', content: 'Hello, Claude!' }
|
|
67
|
+
]
|
|
68
|
+
})
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Streaming Support
|
|
72
|
+
|
|
73
|
+
Auto-instrumentation fully supports streaming responses with zero added latency:
|
|
74
|
+
|
|
75
|
+
```typescript
|
|
76
|
+
// OpenAI streaming
|
|
77
|
+
const stream = await openai.chat.completions.create({
|
|
78
|
+
model: 'gpt-4',
|
|
79
|
+
messages: [{ role: 'user', content: 'Tell me a story' }],
|
|
80
|
+
stream: true
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
for await (const chunk of stream) {
|
|
84
|
+
process.stdout.write(chunk.choices[0]?.delta?.content || '')
|
|
85
|
+
}
|
|
86
|
+
// Trace is automatically captured when stream completes
|
|
87
|
+
|
|
88
|
+
// Anthropic streaming
|
|
89
|
+
const stream = await anthropic.messages.create({
|
|
90
|
+
model: 'claude-3-opus-20240229',
|
|
91
|
+
max_tokens: 1024,
|
|
92
|
+
messages: [{ role: 'user', content: 'Tell me a story' }],
|
|
93
|
+
stream: true
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
for await (const event of stream) {
|
|
97
|
+
if (event.type === 'content_block_delta') {
|
|
98
|
+
process.stdout.write(event.delta.text || '')
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
// Trace is automatically captured when stream completes
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## What Gets Captured
|
|
105
|
+
|
|
106
|
+
For each LLM call, the SDK automatically captures:
|
|
107
|
+
|
|
108
|
+
### Inputs
|
|
109
|
+
- Model name
|
|
110
|
+
- Messages/prompts
|
|
111
|
+
- Parameters (temperature, max_tokens, etc.)
|
|
112
|
+
- System prompts (for Anthropic)
|
|
113
|
+
|
|
114
|
+
### Outputs
|
|
115
|
+
- Complete response content
|
|
116
|
+
- Response ID
|
|
117
|
+
- Finish reason/stop reason
|
|
118
|
+
|
|
119
|
+
### Metadata
|
|
120
|
+
- Provider (openai, anthropic)
|
|
121
|
+
- Model name
|
|
122
|
+
- Temperature
|
|
123
|
+
- Max tokens
|
|
124
|
+
- Latency (milliseconds)
|
|
125
|
+
- Streaming flag (for streaming responses)
|
|
126
|
+
|
|
127
|
+
### Token Usage
|
|
128
|
+
- Input tokens (prompt tokens)
|
|
129
|
+
- Output tokens (completion tokens)
|
|
130
|
+
- Total tokens
|
|
131
|
+
|
|
132
|
+
### Errors
|
|
133
|
+
- Error message
|
|
134
|
+
- Stack trace
|
|
135
|
+
- Error code (if available)
|
|
136
|
+
|
|
137
|
+
## Advanced Usage
|
|
138
|
+
|
|
139
|
+
### Provider-Specific Wrappers
|
|
140
|
+
|
|
141
|
+
If you need more control, you can use provider-specific wrappers:
|
|
142
|
+
|
|
143
|
+
```typescript
|
|
144
|
+
import { wrapOpenAI, wrapAnthropic } from '@observyze/sdk'
|
|
145
|
+
|
|
146
|
+
// Wrap OpenAI specifically
|
|
147
|
+
const wrappedOpenAI = wrapOpenAI(openai, nw)
|
|
148
|
+
|
|
149
|
+
// Wrap Anthropic specifically
|
|
150
|
+
const wrappedAnthropic = wrapAnthropic(anthropic, nw)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Standalone Usage
|
|
154
|
+
|
|
155
|
+
You can also import the `wrap` function directly:
|
|
156
|
+
|
|
157
|
+
```typescript
|
|
158
|
+
import { wrap } from '@observyze/sdk'
|
|
159
|
+
|
|
160
|
+
const wrappedClient = wrap(openai, nw)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## How It Works
|
|
164
|
+
|
|
165
|
+
The auto-instrumentation works by:
|
|
166
|
+
|
|
167
|
+
1. **Monkey-patching** - The SDK wraps the client's API methods
|
|
168
|
+
2. **Transparent proxying** - All calls pass through to the original client
|
|
169
|
+
3. **Automatic tracing** - Traces are created and captured automatically
|
|
170
|
+
4. **Stream buffering** - For streaming responses, chunks are buffered and reported when the stream completes
|
|
171
|
+
5. **Zero latency** - Instrumentation adds < 2ms overhead on the critical path
|
|
172
|
+
|
|
173
|
+
## Error Handling
|
|
174
|
+
|
|
175
|
+
The SDK is designed to never break your application:
|
|
176
|
+
|
|
177
|
+
- If tracing fails, the original LLM call still succeeds
|
|
178
|
+
- Errors are logged but never thrown
|
|
179
|
+
- Network failures are handled with exponential backoff retry
|
|
180
|
+
- Traces are queued in-memory if the ingestion service is unreachable
|
|
181
|
+
|
|
182
|
+
## Performance
|
|
183
|
+
|
|
184
|
+
- **Non-streaming calls**: < 2ms overhead
|
|
185
|
+
- **Streaming calls**: Zero added latency (buffering happens in parallel)
|
|
186
|
+
- **Memory usage**: Minimal - traces are batched and flushed automatically
|
|
187
|
+
- **Network**: Batched sends (up to 100 traces per request)
|
|
188
|
+
|
|
189
|
+
## Configuration
|
|
190
|
+
|
|
191
|
+
Auto-instrumentation respects all SDK configuration options:
|
|
192
|
+
|
|
193
|
+
```typescript
|
|
194
|
+
const nw = new ObservyzeClient({
|
|
195
|
+
apiKey: 'your-key',
|
|
196
|
+
batchSize: 100, // Batch up to 100 traces
|
|
197
|
+
flushInterval: 5000, // Flush every 5 seconds
|
|
198
|
+
dryRun: true, // Test mode - don't send traces
|
|
199
|
+
debug: true // Enable debug logging
|
|
200
|
+
})
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Testing
|
|
204
|
+
|
|
205
|
+
In test environments, set `dryRun: true` to prevent traces from being sent:
|
|
206
|
+
|
|
207
|
+
```typescript
|
|
208
|
+
const nw = new ObservyzeClient({
|
|
209
|
+
apiKey: 'test-key',
|
|
210
|
+
dryRun: process.env.NODE_ENV === 'test'
|
|
211
|
+
})
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Limitations
|
|
215
|
+
|
|
216
|
+
- Only supports the listed providers (OpenAI, Anthropic)
|
|
217
|
+
- Requires the client to follow the standard API structure
|
|
218
|
+
- Custom client implementations may not be supported
|
|
219
|
+
|
|
220
|
+
## Future Support
|
|
221
|
+
|
|
222
|
+
Coming soon:
|
|
223
|
+
- Vercel AI SDK
|
|
224
|
+
- LangChain
|
|
225
|
+
- LlamaIndex
|
|
226
|
+
- Google Gemini
|
|
227
|
+
- Cohere
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anthropic Auto-Instrumentation
|
|
3
|
+
* Monkey-patches Anthropic client to automatically capture traces
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { Span } from '../trace'
|
|
7
|
+
import { SpanType } from '../types'
|
|
8
|
+
import type { ObservyzeClient } from '../client'
|
|
9
|
+
|
|
10
|
+
interface AnthropicClient {
|
|
11
|
+
messages: {
|
|
12
|
+
create: Function
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
interface AnthropicMessage {
|
|
17
|
+
role: string
|
|
18
|
+
content: string | Array<{ type: string; text?: string; [key: string]: any }>
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
interface AnthropicMessageParams {
|
|
22
|
+
model: string
|
|
23
|
+
messages: AnthropicMessage[]
|
|
24
|
+
max_tokens: number
|
|
25
|
+
stream?: boolean
|
|
26
|
+
temperature?: number
|
|
27
|
+
system?: string
|
|
28
|
+
[key: string]: any
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
interface AnthropicMessageResponse {
|
|
32
|
+
id: string
|
|
33
|
+
type: 'message'
|
|
34
|
+
role: 'assistant'
|
|
35
|
+
content: Array<{
|
|
36
|
+
type: 'text'
|
|
37
|
+
text: string
|
|
38
|
+
}>
|
|
39
|
+
model: string
|
|
40
|
+
stop_reason: string | null
|
|
41
|
+
usage: {
|
|
42
|
+
input_tokens: number
|
|
43
|
+
output_tokens: number
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
interface AnthropicStreamEvent {
|
|
48
|
+
type: string
|
|
49
|
+
message?: AnthropicMessageResponse
|
|
50
|
+
content_block?: {
|
|
51
|
+
type: string
|
|
52
|
+
text: string
|
|
53
|
+
}
|
|
54
|
+
delta?: {
|
|
55
|
+
type: string
|
|
56
|
+
text?: string
|
|
57
|
+
}
|
|
58
|
+
usage?: {
|
|
59
|
+
input_tokens?: number
|
|
60
|
+
output_tokens?: number
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Wrap an Anthropic client instance to enable auto-instrumentation
|
|
66
|
+
*/
|
|
67
|
+
export function wrapAnthropic(client: AnthropicClient, nwClient: ObservyzeClient): AnthropicClient {
|
|
68
|
+
const originalCreate = client.messages.create.bind(client.messages)
|
|
69
|
+
|
|
70
|
+
client.messages.create = async function (params: AnthropicMessageParams, options?: any) {
|
|
71
|
+
// Start a trace for this LLM call
|
|
72
|
+
const trace = nwClient.startTrace(`anthropic.messages.create`, {
|
|
73
|
+
provider: 'anthropic',
|
|
74
|
+
model: params.model
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
// Start a span for the LLM call
|
|
78
|
+
const span = trace.startSpan('messages.create', SpanType.LLM)
|
|
79
|
+
span.setMetadata('model', params.model)
|
|
80
|
+
span.setMetadata('provider', 'anthropic')
|
|
81
|
+
|
|
82
|
+
if (params.temperature !== undefined) {
|
|
83
|
+
span.setMetadata('temperature', params.temperature)
|
|
84
|
+
}
|
|
85
|
+
if (params.max_tokens !== undefined) {
|
|
86
|
+
span.setMetadata('max_tokens', params.max_tokens)
|
|
87
|
+
}
|
|
88
|
+
if (params.system !== undefined) {
|
|
89
|
+
span.setMetadata('system', params.system)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Capture input
|
|
93
|
+
span.setInput({
|
|
94
|
+
model: params.model,
|
|
95
|
+
messages: params.messages,
|
|
96
|
+
max_tokens: params.max_tokens,
|
|
97
|
+
temperature: params.temperature,
|
|
98
|
+
system: params.system
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
const startTime = Date.now()
|
|
102
|
+
|
|
103
|
+
try {
|
|
104
|
+
const response = await originalCreate(params, options)
|
|
105
|
+
|
|
106
|
+
// Handle streaming responses
|
|
107
|
+
if (params.stream) {
|
|
108
|
+
return wrapAnthropicStream(response, span, trace, startTime)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Handle non-streaming responses
|
|
112
|
+
const messageResponse = response as AnthropicMessageResponse
|
|
113
|
+
const latency = Date.now() - startTime
|
|
114
|
+
|
|
115
|
+
// Capture output
|
|
116
|
+
span.setOutput({
|
|
117
|
+
id: messageResponse.id,
|
|
118
|
+
model: messageResponse.model,
|
|
119
|
+
role: messageResponse.role,
|
|
120
|
+
content: messageResponse.content,
|
|
121
|
+
stop_reason: messageResponse.stop_reason
|
|
122
|
+
})
|
|
123
|
+
|
|
124
|
+
// Capture token usage
|
|
125
|
+
if (messageResponse.usage) {
|
|
126
|
+
span.setTokens({
|
|
127
|
+
input: messageResponse.usage.input_tokens,
|
|
128
|
+
output: messageResponse.usage.output_tokens,
|
|
129
|
+
total: messageResponse.usage.input_tokens + messageResponse.usage.output_tokens
|
|
130
|
+
})
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
span.setMetadata('latency_ms', latency)
|
|
134
|
+
span.end()
|
|
135
|
+
trace.end()
|
|
136
|
+
|
|
137
|
+
return response
|
|
138
|
+
} catch (error) {
|
|
139
|
+
const latency = Date.now() - startTime
|
|
140
|
+
span.setMetadata('latency_ms', latency)
|
|
141
|
+
span.setError(error as Error)
|
|
142
|
+
span.end()
|
|
143
|
+
trace.end()
|
|
144
|
+
throw error
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return client
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Wrap an Anthropic streaming response to capture complete output
|
|
153
|
+
*/
|
|
154
|
+
function wrapAnthropicStream(
|
|
155
|
+
stream: AsyncIterable<AnthropicStreamEvent>,
|
|
156
|
+
span: Span,
|
|
157
|
+
trace: any,
|
|
158
|
+
startTime: number
|
|
159
|
+
): AsyncIterable<AnthropicStreamEvent> {
|
|
160
|
+
const bufferedChunks: string[] = []
|
|
161
|
+
let messageId = ''
|
|
162
|
+
let messageModel = ''
|
|
163
|
+
let stopReason: string | null = null
|
|
164
|
+
let inputTokens = 0
|
|
165
|
+
let outputTokens = 0
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
[Symbol.asyncIterator]: async function* () {
|
|
169
|
+
try {
|
|
170
|
+
for await (const event of stream) {
|
|
171
|
+
// Track message metadata
|
|
172
|
+
if (event.type === 'message_start' && event.message) {
|
|
173
|
+
messageId = event.message.id
|
|
174
|
+
messageModel = event.message.model
|
|
175
|
+
if (event.message.usage) {
|
|
176
|
+
inputTokens = event.message.usage.input_tokens
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Buffer content deltas
|
|
181
|
+
if (event.type === 'content_block_delta' && event.delta?.text) {
|
|
182
|
+
bufferedChunks.push(event.delta.text)
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Track message completion
|
|
186
|
+
if (event.type === 'message_delta' && event.delta) {
|
|
187
|
+
if ((event.delta as any).stop_reason) {
|
|
188
|
+
stopReason = (event.delta as any).stop_reason
|
|
189
|
+
}
|
|
190
|
+
if (event.usage?.output_tokens) {
|
|
191
|
+
outputTokens = event.usage.output_tokens
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Yield the event to the caller (no added latency)
|
|
196
|
+
yield event
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Stream completed - record the complete output
|
|
200
|
+
const latency = Date.now() - startTime
|
|
201
|
+
const completeOutput = bufferedChunks.join('')
|
|
202
|
+
|
|
203
|
+
span.setOutput({
|
|
204
|
+
id: messageId,
|
|
205
|
+
model: messageModel,
|
|
206
|
+
content: completeOutput,
|
|
207
|
+
stop_reason: stopReason
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
// Set token usage if available
|
|
211
|
+
if (inputTokens > 0 || outputTokens > 0) {
|
|
212
|
+
span.setTokens({
|
|
213
|
+
input: inputTokens,
|
|
214
|
+
output: outputTokens,
|
|
215
|
+
total: inputTokens + outputTokens
|
|
216
|
+
})
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
span.setMetadata('latency_ms', latency)
|
|
220
|
+
span.setMetadata('streaming', true)
|
|
221
|
+
span.end()
|
|
222
|
+
trace.end()
|
|
223
|
+
} catch (error) {
|
|
224
|
+
const latency = Date.now() - startTime
|
|
225
|
+
span.setMetadata('latency_ms', latency)
|
|
226
|
+
span.setError(error as Error)
|
|
227
|
+
span.end()
|
|
228
|
+
trace.end()
|
|
229
|
+
throw error
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-Instrumentation API
|
|
3
|
+
* Provides nw.wrap() API for instrumenting LLM clients
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { wrapOpenAI } from './openai'
|
|
7
|
+
import { wrapAnthropic } from './anthropic'
|
|
8
|
+
import type { ObservyzeClient } from '../client'
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Supported client types for auto-instrumentation
|
|
12
|
+
*/
|
|
13
|
+
export type SupportedClient =
|
|
14
|
+
| { chat: { completions: { create: Function } } } // OpenAI
|
|
15
|
+
| { messages: { create: Function } } // Anthropic
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Detect the type of LLM client and apply appropriate instrumentation
|
|
19
|
+
*/
|
|
20
|
+
export function wrap<T extends SupportedClient>(
|
|
21
|
+
client: T,
|
|
22
|
+
nwClient: ObservyzeClient
|
|
23
|
+
): T {
|
|
24
|
+
// Detect OpenAI client
|
|
25
|
+
if ('chat' in client && client.chat && 'completions' in client.chat) {
|
|
26
|
+
return wrapOpenAI(client as any, nwClient) as T
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Detect Anthropic client
|
|
30
|
+
if ('messages' in client && client.messages && 'create' in client.messages) {
|
|
31
|
+
return wrapAnthropic(client as any, nwClient) as T
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Unknown client type
|
|
35
|
+
throw new Error(
|
|
36
|
+
'Observyze SDK: Unsupported client type. ' +
|
|
37
|
+
'Supported clients: OpenAI, Anthropic'
|
|
38
|
+
)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Export individual wrappers for advanced use cases
|
|
42
|
+
export { wrapOpenAI } from './openai'
|
|
43
|
+
export { wrapAnthropic } from './anthropic'
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Auto-Instrumentation
|
|
3
|
+
* Monkey-patches OpenAI client to automatically capture traces
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { Span } from '../trace'
|
|
7
|
+
import { SpanType } from '../types'
|
|
8
|
+
import type { ObservyzeClient } from '../client'
|
|
9
|
+
|
|
10
|
+
interface OpenAIClient {
|
|
11
|
+
chat: {
|
|
12
|
+
completions: {
|
|
13
|
+
create: Function
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
interface OpenAIMessage {
|
|
19
|
+
role: string
|
|
20
|
+
content: string
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
interface OpenAICompletionParams {
|
|
24
|
+
model: string
|
|
25
|
+
messages: OpenAIMessage[]
|
|
26
|
+
stream?: boolean
|
|
27
|
+
temperature?: number
|
|
28
|
+
max_tokens?: number
|
|
29
|
+
[key: string]: any
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
interface OpenAICompletionResponse {
|
|
33
|
+
id: string
|
|
34
|
+
model: string
|
|
35
|
+
choices: Array<{
|
|
36
|
+
message: OpenAIMessage
|
|
37
|
+
finish_reason: string
|
|
38
|
+
}>
|
|
39
|
+
usage?: {
|
|
40
|
+
prompt_tokens: number
|
|
41
|
+
completion_tokens: number
|
|
42
|
+
total_tokens: number
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
interface OpenAIStreamChunk {
|
|
47
|
+
id: string
|
|
48
|
+
model: string
|
|
49
|
+
choices: Array<{
|
|
50
|
+
delta: {
|
|
51
|
+
role?: string
|
|
52
|
+
content?: string
|
|
53
|
+
}
|
|
54
|
+
finish_reason: string | null
|
|
55
|
+
}>
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Wrap an OpenAI client instance to enable auto-instrumentation
|
|
60
|
+
*/
|
|
61
|
+
export function wrapOpenAI(client: OpenAIClient, nwClient: ObservyzeClient): OpenAIClient {
|
|
62
|
+
const originalCreate = client.chat.completions.create.bind(client.chat.completions)
|
|
63
|
+
|
|
64
|
+
client.chat.completions.create = async function (params: OpenAICompletionParams, options?: any) {
|
|
65
|
+
// Start a trace for this LLM call
|
|
66
|
+
const trace = nwClient.startTrace(`openai.chat.completions.create`, {
|
|
67
|
+
provider: 'openai',
|
|
68
|
+
model: params.model
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
// Start a span for the LLM call
|
|
72
|
+
const span = trace.startSpan('chat.completions.create', SpanType.LLM)
|
|
73
|
+
span.setMetadata('model', params.model)
|
|
74
|
+
span.setMetadata('provider', 'openai')
|
|
75
|
+
|
|
76
|
+
if (params.temperature !== undefined) {
|
|
77
|
+
span.setMetadata('temperature', params.temperature)
|
|
78
|
+
}
|
|
79
|
+
if (params.max_tokens !== undefined) {
|
|
80
|
+
span.setMetadata('max_tokens', params.max_tokens)
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Capture input
|
|
84
|
+
span.setInput({
|
|
85
|
+
model: params.model,
|
|
86
|
+
messages: params.messages,
|
|
87
|
+
temperature: params.temperature,
|
|
88
|
+
max_tokens: params.max_tokens
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
const startTime = Date.now()
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
const response = await originalCreate(params, options)
|
|
95
|
+
|
|
96
|
+
// Handle streaming responses
|
|
97
|
+
if (params.stream) {
|
|
98
|
+
return wrapOpenAIStream(response, span, trace, startTime)
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Handle non-streaming responses
|
|
102
|
+
const completionResponse = response as OpenAICompletionResponse
|
|
103
|
+
const latency = Date.now() - startTime
|
|
104
|
+
|
|
105
|
+
// Capture output
|
|
106
|
+
span.setOutput({
|
|
107
|
+
id: completionResponse.id,
|
|
108
|
+
model: completionResponse.model,
|
|
109
|
+
choices: completionResponse.choices
|
|
110
|
+
})
|
|
111
|
+
|
|
112
|
+
// Capture token usage
|
|
113
|
+
if (completionResponse.usage) {
|
|
114
|
+
span.setTokens({
|
|
115
|
+
input: completionResponse.usage.prompt_tokens,
|
|
116
|
+
output: completionResponse.usage.completion_tokens,
|
|
117
|
+
total: completionResponse.usage.total_tokens
|
|
118
|
+
})
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
span.setMetadata('latency_ms', latency)
|
|
122
|
+
span.end()
|
|
123
|
+
trace.end()
|
|
124
|
+
|
|
125
|
+
return response
|
|
126
|
+
} catch (error) {
|
|
127
|
+
const latency = Date.now() - startTime
|
|
128
|
+
span.setMetadata('latency_ms', latency)
|
|
129
|
+
span.setError(error as Error)
|
|
130
|
+
span.end()
|
|
131
|
+
trace.end()
|
|
132
|
+
throw error
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return client
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Wrap an OpenAI streaming response to capture complete output
|
|
141
|
+
*/
|
|
142
|
+
function wrapOpenAIStream(
|
|
143
|
+
stream: AsyncIterable<OpenAIStreamChunk>,
|
|
144
|
+
span: Span,
|
|
145
|
+
trace: any,
|
|
146
|
+
startTime: number
|
|
147
|
+
): AsyncIterable<OpenAIStreamChunk> {
|
|
148
|
+
const bufferedChunks: string[] = []
|
|
149
|
+
let streamId = ''
|
|
150
|
+
let streamModel = ''
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
[Symbol.asyncIterator]: async function* () {
|
|
154
|
+
try {
|
|
155
|
+
for await (const chunk of stream) {
|
|
156
|
+
// Buffer the chunk content
|
|
157
|
+
if (chunk.id) streamId = chunk.id
|
|
158
|
+
if (chunk.model) streamModel = chunk.model
|
|
159
|
+
|
|
160
|
+
const delta = chunk.choices[0]?.delta
|
|
161
|
+
if (delta?.content) {
|
|
162
|
+
bufferedChunks.push(delta.content)
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Yield the chunk to the caller (no added latency)
|
|
166
|
+
yield chunk
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Stream completed - record the complete output
|
|
170
|
+
const latency = Date.now() - startTime
|
|
171
|
+
const completeOutput = bufferedChunks.join('')
|
|
172
|
+
|
|
173
|
+
span.setOutput({
|
|
174
|
+
id: streamId,
|
|
175
|
+
model: streamModel,
|
|
176
|
+
content: completeOutput
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
span.setMetadata('latency_ms', latency)
|
|
180
|
+
span.setMetadata('streaming', true)
|
|
181
|
+
span.end()
|
|
182
|
+
trace.end()
|
|
183
|
+
} catch (error) {
|
|
184
|
+
const latency = Date.now() - startTime
|
|
185
|
+
span.setMetadata('latency_ms', latency)
|
|
186
|
+
span.setError(error as Error)
|
|
187
|
+
span.end()
|
|
188
|
+
trace.end()
|
|
189
|
+
throw error
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|