@observyze/sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,227 @@
1
+ # Auto-Instrumentation
2
+
3
+ The Observyze SDK provides automatic instrumentation for popular LLM providers, allowing you to capture traces with minimal code changes.
4
+
5
+ ## Supported Providers
6
+
7
+ - **OpenAI** - `chat.completions.create` (streaming and non-streaming)
8
+ - **Anthropic** - `messages.create` (streaming and non-streaming)
9
+
10
+ ## Usage
11
+
12
+ ### Basic Usage with `nw.wrap()`
13
+
14
+ The simplest way to enable auto-instrumentation is using the `wrap()` method:
15
+
16
+ ```typescript
17
+ import OpenAI from 'openai'
18
+ import { ObservyzeClient } from '@observyze/sdk'
19
+
20
+ // Initialize Observyze
21
+ const nw = new ObservyzeClient({
22
+ apiKey: process.env.Observyze_API_KEY!,
23
+ organizationId: 'your-org-id',
24
+ projectId: 'your-project-id'
25
+ })
26
+
27
+ // Initialize OpenAI client
28
+ const openai = new OpenAI({
29
+ apiKey: process.env.OPENAI_API_KEY!
30
+ })
31
+
32
+ // Wrap the client to enable auto-instrumentation
33
+ nw.wrap(openai)
34
+
35
+ // All calls are now automatically traced!
36
+ const response = await openai.chat.completions.create({
37
+ model: 'gpt-4',
38
+ messages: [
39
+ { role: 'user', content: 'What is the capital of France?' }
40
+ ]
41
+ })
42
+ ```
43
+
44
+ ### Anthropic Example
45
+
46
+ ```typescript
47
+ import Anthropic from '@anthropic-ai/sdk'
48
+ import { ObservyzeClient } from '@observyze/sdk'
49
+
50
+ const nw = new ObservyzeClient({
51
+ apiKey: process.env.Observyze_API_KEY!
52
+ })
53
+
54
+ const anthropic = new Anthropic({
55
+ apiKey: process.env.ANTHROPIC_API_KEY!
56
+ })
57
+
58
+ // Wrap the client
59
+ nw.wrap(anthropic)
60
+
61
+ // All calls are automatically traced
62
+ const message = await anthropic.messages.create({
63
+ model: 'claude-3-opus-20240229',
64
+ max_tokens: 1024,
65
+ messages: [
66
+ { role: 'user', content: 'Hello, Claude!' }
67
+ ]
68
+ })
69
+ ```
70
+
71
+ ### Streaming Support
72
+
73
+ Auto-instrumentation fully supports streaming responses with zero added latency:
74
+
75
+ ```typescript
76
+ // OpenAI streaming
77
+ const stream = await openai.chat.completions.create({
78
+ model: 'gpt-4',
79
+ messages: [{ role: 'user', content: 'Tell me a story' }],
80
+ stream: true
81
+ })
82
+
83
+ for await (const chunk of stream) {
84
+ process.stdout.write(chunk.choices[0]?.delta?.content || '')
85
+ }
86
+ // Trace is automatically captured when stream completes
87
+
88
+ // Anthropic streaming
89
+ const stream = await anthropic.messages.create({
90
+ model: 'claude-3-opus-20240229',
91
+ max_tokens: 1024,
92
+ messages: [{ role: 'user', content: 'Tell me a story' }],
93
+ stream: true
94
+ })
95
+
96
+ for await (const event of stream) {
97
+ if (event.type === 'content_block_delta') {
98
+ process.stdout.write(event.delta.text || '')
99
+ }
100
+ }
101
+ // Trace is automatically captured when stream completes
102
+ ```
103
+
104
+ ## What Gets Captured
105
+
106
+ For each LLM call, the SDK automatically captures:
107
+
108
+ ### Inputs
109
+ - Model name
110
+ - Messages/prompts
111
+ - Parameters (temperature, max_tokens, etc.)
112
+ - System prompts (for Anthropic)
113
+
114
+ ### Outputs
115
+ - Complete response content
116
+ - Response ID
117
+ - Finish reason/stop reason
118
+
119
+ ### Metadata
120
+ - Provider (openai, anthropic)
121
+ - Model name
122
+ - Temperature
123
+ - Max tokens
124
+ - Latency (milliseconds)
125
+ - Streaming flag (for streaming responses)
126
+
127
+ ### Token Usage
128
+ - Input tokens (prompt tokens)
129
+ - Output tokens (completion tokens)
130
+ - Total tokens
131
+
132
+ ### Errors
133
+ - Error message
134
+ - Stack trace
135
+ - Error code (if available)
136
+
137
+ ## Advanced Usage
138
+
139
+ ### Provider-Specific Wrappers
140
+
141
+ If you need more control, you can use provider-specific wrappers:
142
+
143
+ ```typescript
144
+ import { wrapOpenAI, wrapAnthropic } from '@observyze/sdk'
145
+
146
+ // Wrap OpenAI specifically
147
+ const wrappedOpenAI = wrapOpenAI(openai, nw)
148
+
149
+ // Wrap Anthropic specifically
150
+ const wrappedAnthropic = wrapAnthropic(anthropic, nw)
151
+ ```
152
+
153
+ ### Standalone Usage
154
+
155
+ You can also import the `wrap` function directly:
156
+
157
+ ```typescript
158
+ import { wrap } from '@observyze/sdk'
159
+
160
+ const wrappedClient = wrap(openai, nw)
161
+ ```
162
+
163
+ ## How It Works
164
+
165
+ The auto-instrumentation works by:
166
+
167
+ 1. **Monkey-patching** - The SDK wraps the client's API methods
168
+ 2. **Transparent proxying** - All calls pass through to the original client
169
+ 3. **Automatic tracing** - Traces are created and captured automatically
170
+ 4. **Stream buffering** - For streaming responses, chunks are buffered and reported when the stream completes
171
+ 5. **Zero latency** - Instrumentation adds < 2ms overhead on the critical path
172
+
173
+ ## Error Handling
174
+
175
+ The SDK is designed to never break your application:
176
+
177
+ - If tracing fails, the original LLM call still succeeds
178
+ - Errors are logged but never thrown
179
+ - Network failures are handled with exponential backoff retry
180
+ - Traces are queued in-memory if the ingestion service is unreachable
181
+
182
+ ## Performance
183
+
184
+ - **Non-streaming calls**: < 2ms overhead
185
+ - **Streaming calls**: Zero added latency (buffering happens in parallel)
186
+ - **Memory usage**: Minimal - traces are batched and flushed automatically
187
+ - **Network**: Batched sends (up to 100 traces per request)
188
+
189
+ ## Configuration
190
+
191
+ Auto-instrumentation respects all SDK configuration options:
192
+
193
+ ```typescript
194
+ const nw = new ObservyzeClient({
195
+ apiKey: 'your-key',
196
+ batchSize: 100, // Batch up to 100 traces
197
+ flushInterval: 5000, // Flush every 5 seconds
198
+ dryRun: true, // Test mode - don't send traces
199
+ debug: true // Enable debug logging
200
+ })
201
+ ```
202
+
203
+ ## Testing
204
+
205
+ In test environments, set `dryRun: true` to prevent traces from being sent:
206
+
207
+ ```typescript
208
+ const nw = new ObservyzeClient({
209
+ apiKey: 'test-key',
210
+ dryRun: process.env.NODE_ENV === 'test'
211
+ })
212
+ ```
213
+
214
+ ## Limitations
215
+
216
+ - Only supports the listed providers (OpenAI, Anthropic)
217
+ - Requires the client to follow the standard API structure
218
+ - Custom client implementations may not be supported
219
+
220
+ ## Future Support
221
+
222
+ Coming soon:
223
+ - Vercel AI SDK
224
+ - LangChain
225
+ - LlamaIndex
226
+ - Google Gemini
227
+ - Cohere
@@ -0,0 +1,233 @@
1
+ /**
2
+ * Anthropic Auto-Instrumentation
3
+ * Monkey-patches Anthropic client to automatically capture traces
4
+ */
5
+
6
+ import { Span } from '../trace'
7
+ import { SpanType } from '../types'
8
+ import type { ObservyzeClient } from '../client'
9
+
10
+ interface AnthropicClient {
11
+ messages: {
12
+ create: Function
13
+ }
14
+ }
15
+
16
+ interface AnthropicMessage {
17
+ role: string
18
+ content: string | Array<{ type: string; text?: string; [key: string]: any }>
19
+ }
20
+
21
+ interface AnthropicMessageParams {
22
+ model: string
23
+ messages: AnthropicMessage[]
24
+ max_tokens: number
25
+ stream?: boolean
26
+ temperature?: number
27
+ system?: string
28
+ [key: string]: any
29
+ }
30
+
31
+ interface AnthropicMessageResponse {
32
+ id: string
33
+ type: 'message'
34
+ role: 'assistant'
35
+ content: Array<{
36
+ type: 'text'
37
+ text: string
38
+ }>
39
+ model: string
40
+ stop_reason: string | null
41
+ usage: {
42
+ input_tokens: number
43
+ output_tokens: number
44
+ }
45
+ }
46
+
47
+ interface AnthropicStreamEvent {
48
+ type: string
49
+ message?: AnthropicMessageResponse
50
+ content_block?: {
51
+ type: string
52
+ text: string
53
+ }
54
+ delta?: {
55
+ type: string
56
+ text?: string
57
+ }
58
+ usage?: {
59
+ input_tokens?: number
60
+ output_tokens?: number
61
+ }
62
+ }
63
+
64
+ /**
65
+ * Wrap an Anthropic client instance to enable auto-instrumentation
66
+ */
67
+ export function wrapAnthropic(client: AnthropicClient, nwClient: ObservyzeClient): AnthropicClient {
68
+ const originalCreate = client.messages.create.bind(client.messages)
69
+
70
+ client.messages.create = async function (params: AnthropicMessageParams, options?: any) {
71
+ // Start a trace for this LLM call
72
+ const trace = nwClient.startTrace(`anthropic.messages.create`, {
73
+ provider: 'anthropic',
74
+ model: params.model
75
+ })
76
+
77
+ // Start a span for the LLM call
78
+ const span = trace.startSpan('messages.create', SpanType.LLM)
79
+ span.setMetadata('model', params.model)
80
+ span.setMetadata('provider', 'anthropic')
81
+
82
+ if (params.temperature !== undefined) {
83
+ span.setMetadata('temperature', params.temperature)
84
+ }
85
+ if (params.max_tokens !== undefined) {
86
+ span.setMetadata('max_tokens', params.max_tokens)
87
+ }
88
+ if (params.system !== undefined) {
89
+ span.setMetadata('system', params.system)
90
+ }
91
+
92
+ // Capture input
93
+ span.setInput({
94
+ model: params.model,
95
+ messages: params.messages,
96
+ max_tokens: params.max_tokens,
97
+ temperature: params.temperature,
98
+ system: params.system
99
+ })
100
+
101
+ const startTime = Date.now()
102
+
103
+ try {
104
+ const response = await originalCreate(params, options)
105
+
106
+ // Handle streaming responses
107
+ if (params.stream) {
108
+ return wrapAnthropicStream(response, span, trace, startTime)
109
+ }
110
+
111
+ // Handle non-streaming responses
112
+ const messageResponse = response as AnthropicMessageResponse
113
+ const latency = Date.now() - startTime
114
+
115
+ // Capture output
116
+ span.setOutput({
117
+ id: messageResponse.id,
118
+ model: messageResponse.model,
119
+ role: messageResponse.role,
120
+ content: messageResponse.content,
121
+ stop_reason: messageResponse.stop_reason
122
+ })
123
+
124
+ // Capture token usage
125
+ if (messageResponse.usage) {
126
+ span.setTokens({
127
+ input: messageResponse.usage.input_tokens,
128
+ output: messageResponse.usage.output_tokens,
129
+ total: messageResponse.usage.input_tokens + messageResponse.usage.output_tokens
130
+ })
131
+ }
132
+
133
+ span.setMetadata('latency_ms', latency)
134
+ span.end()
135
+ trace.end()
136
+
137
+ return response
138
+ } catch (error) {
139
+ const latency = Date.now() - startTime
140
+ span.setMetadata('latency_ms', latency)
141
+ span.setError(error as Error)
142
+ span.end()
143
+ trace.end()
144
+ throw error
145
+ }
146
+ }
147
+
148
+ return client
149
+ }
150
+
151
+ /**
152
+ * Wrap an Anthropic streaming response to capture complete output
153
+ */
154
+ function wrapAnthropicStream(
155
+ stream: AsyncIterable<AnthropicStreamEvent>,
156
+ span: Span,
157
+ trace: any,
158
+ startTime: number
159
+ ): AsyncIterable<AnthropicStreamEvent> {
160
+ const bufferedChunks: string[] = []
161
+ let messageId = ''
162
+ let messageModel = ''
163
+ let stopReason: string | null = null
164
+ let inputTokens = 0
165
+ let outputTokens = 0
166
+
167
+ return {
168
+ [Symbol.asyncIterator]: async function* () {
169
+ try {
170
+ for await (const event of stream) {
171
+ // Track message metadata
172
+ if (event.type === 'message_start' && event.message) {
173
+ messageId = event.message.id
174
+ messageModel = event.message.model
175
+ if (event.message.usage) {
176
+ inputTokens = event.message.usage.input_tokens
177
+ }
178
+ }
179
+
180
+ // Buffer content deltas
181
+ if (event.type === 'content_block_delta' && event.delta?.text) {
182
+ bufferedChunks.push(event.delta.text)
183
+ }
184
+
185
+ // Track message completion
186
+ if (event.type === 'message_delta' && event.delta) {
187
+ if ((event.delta as any).stop_reason) {
188
+ stopReason = (event.delta as any).stop_reason
189
+ }
190
+ if (event.usage?.output_tokens) {
191
+ outputTokens = event.usage.output_tokens
192
+ }
193
+ }
194
+
195
+ // Yield the event to the caller (no added latency)
196
+ yield event
197
+ }
198
+
199
+ // Stream completed - record the complete output
200
+ const latency = Date.now() - startTime
201
+ const completeOutput = bufferedChunks.join('')
202
+
203
+ span.setOutput({
204
+ id: messageId,
205
+ model: messageModel,
206
+ content: completeOutput,
207
+ stop_reason: stopReason
208
+ })
209
+
210
+ // Set token usage if available
211
+ if (inputTokens > 0 || outputTokens > 0) {
212
+ span.setTokens({
213
+ input: inputTokens,
214
+ output: outputTokens,
215
+ total: inputTokens + outputTokens
216
+ })
217
+ }
218
+
219
+ span.setMetadata('latency_ms', latency)
220
+ span.setMetadata('streaming', true)
221
+ span.end()
222
+ trace.end()
223
+ } catch (error) {
224
+ const latency = Date.now() - startTime
225
+ span.setMetadata('latency_ms', latency)
226
+ span.setError(error as Error)
227
+ span.end()
228
+ trace.end()
229
+ throw error
230
+ }
231
+ }
232
+ }
233
+ }
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Auto-Instrumentation API
3
+ * Provides nw.wrap() API for instrumenting LLM clients
4
+ */
5
+
6
+ import { wrapOpenAI } from './openai'
7
+ import { wrapAnthropic } from './anthropic'
8
+ import type { ObservyzeClient } from '../client'
9
+
10
+ /**
11
+ * Supported client types for auto-instrumentation
12
+ */
13
+ export type SupportedClient =
14
+ | { chat: { completions: { create: Function } } } // OpenAI
15
+ | { messages: { create: Function } } // Anthropic
16
+
17
+ /**
18
+ * Detect the type of LLM client and apply appropriate instrumentation
19
+ */
20
+ export function wrap<T extends SupportedClient>(
21
+ client: T,
22
+ nwClient: ObservyzeClient
23
+ ): T {
24
+ // Detect OpenAI client
25
+ if ('chat' in client && client.chat && 'completions' in client.chat) {
26
+ return wrapOpenAI(client as any, nwClient) as T
27
+ }
28
+
29
+ // Detect Anthropic client
30
+ if ('messages' in client && client.messages && 'create' in client.messages) {
31
+ return wrapAnthropic(client as any, nwClient) as T
32
+ }
33
+
34
+ // Unknown client type
35
+ throw new Error(
36
+ 'Observyze SDK: Unsupported client type. ' +
37
+ 'Supported clients: OpenAI, Anthropic'
38
+ )
39
+ }
40
+
41
+ // Export individual wrappers for advanced use cases
42
+ export { wrapOpenAI } from './openai'
43
+ export { wrapAnthropic } from './anthropic'
@@ -0,0 +1,193 @@
1
+ /**
2
+ * OpenAI Auto-Instrumentation
3
+ * Monkey-patches OpenAI client to automatically capture traces
4
+ */
5
+
6
+ import { Span } from '../trace'
7
+ import { SpanType } from '../types'
8
+ import type { ObservyzeClient } from '../client'
9
+
10
+ interface OpenAIClient {
11
+ chat: {
12
+ completions: {
13
+ create: Function
14
+ }
15
+ }
16
+ }
17
+
18
+ interface OpenAIMessage {
19
+ role: string
20
+ content: string
21
+ }
22
+
23
+ interface OpenAICompletionParams {
24
+ model: string
25
+ messages: OpenAIMessage[]
26
+ stream?: boolean
27
+ temperature?: number
28
+ max_tokens?: number
29
+ [key: string]: any
30
+ }
31
+
32
+ interface OpenAICompletionResponse {
33
+ id: string
34
+ model: string
35
+ choices: Array<{
36
+ message: OpenAIMessage
37
+ finish_reason: string
38
+ }>
39
+ usage?: {
40
+ prompt_tokens: number
41
+ completion_tokens: number
42
+ total_tokens: number
43
+ }
44
+ }
45
+
46
+ interface OpenAIStreamChunk {
47
+ id: string
48
+ model: string
49
+ choices: Array<{
50
+ delta: {
51
+ role?: string
52
+ content?: string
53
+ }
54
+ finish_reason: string | null
55
+ }>
56
+ }
57
+
58
+ /**
59
+ * Wrap an OpenAI client instance to enable auto-instrumentation
60
+ */
61
+ export function wrapOpenAI(client: OpenAIClient, nwClient: ObservyzeClient): OpenAIClient {
62
+ const originalCreate = client.chat.completions.create.bind(client.chat.completions)
63
+
64
+ client.chat.completions.create = async function (params: OpenAICompletionParams, options?: any) {
65
+ // Start a trace for this LLM call
66
+ const trace = nwClient.startTrace(`openai.chat.completions.create`, {
67
+ provider: 'openai',
68
+ model: params.model
69
+ })
70
+
71
+ // Start a span for the LLM call
72
+ const span = trace.startSpan('chat.completions.create', SpanType.LLM)
73
+ span.setMetadata('model', params.model)
74
+ span.setMetadata('provider', 'openai')
75
+
76
+ if (params.temperature !== undefined) {
77
+ span.setMetadata('temperature', params.temperature)
78
+ }
79
+ if (params.max_tokens !== undefined) {
80
+ span.setMetadata('max_tokens', params.max_tokens)
81
+ }
82
+
83
+ // Capture input
84
+ span.setInput({
85
+ model: params.model,
86
+ messages: params.messages,
87
+ temperature: params.temperature,
88
+ max_tokens: params.max_tokens
89
+ })
90
+
91
+ const startTime = Date.now()
92
+
93
+ try {
94
+ const response = await originalCreate(params, options)
95
+
96
+ // Handle streaming responses
97
+ if (params.stream) {
98
+ return wrapOpenAIStream(response, span, trace, startTime)
99
+ }
100
+
101
+ // Handle non-streaming responses
102
+ const completionResponse = response as OpenAICompletionResponse
103
+ const latency = Date.now() - startTime
104
+
105
+ // Capture output
106
+ span.setOutput({
107
+ id: completionResponse.id,
108
+ model: completionResponse.model,
109
+ choices: completionResponse.choices
110
+ })
111
+
112
+ // Capture token usage
113
+ if (completionResponse.usage) {
114
+ span.setTokens({
115
+ input: completionResponse.usage.prompt_tokens,
116
+ output: completionResponse.usage.completion_tokens,
117
+ total: completionResponse.usage.total_tokens
118
+ })
119
+ }
120
+
121
+ span.setMetadata('latency_ms', latency)
122
+ span.end()
123
+ trace.end()
124
+
125
+ return response
126
+ } catch (error) {
127
+ const latency = Date.now() - startTime
128
+ span.setMetadata('latency_ms', latency)
129
+ span.setError(error as Error)
130
+ span.end()
131
+ trace.end()
132
+ throw error
133
+ }
134
+ }
135
+
136
+ return client
137
+ }
138
+
139
+ /**
140
+ * Wrap an OpenAI streaming response to capture complete output
141
+ */
142
+ function wrapOpenAIStream(
143
+ stream: AsyncIterable<OpenAIStreamChunk>,
144
+ span: Span,
145
+ trace: any,
146
+ startTime: number
147
+ ): AsyncIterable<OpenAIStreamChunk> {
148
+ const bufferedChunks: string[] = []
149
+ let streamId = ''
150
+ let streamModel = ''
151
+
152
+ return {
153
+ [Symbol.asyncIterator]: async function* () {
154
+ try {
155
+ for await (const chunk of stream) {
156
+ // Buffer the chunk content
157
+ if (chunk.id) streamId = chunk.id
158
+ if (chunk.model) streamModel = chunk.model
159
+
160
+ const delta = chunk.choices[0]?.delta
161
+ if (delta?.content) {
162
+ bufferedChunks.push(delta.content)
163
+ }
164
+
165
+ // Yield the chunk to the caller (no added latency)
166
+ yield chunk
167
+ }
168
+
169
+ // Stream completed - record the complete output
170
+ const latency = Date.now() - startTime
171
+ const completeOutput = bufferedChunks.join('')
172
+
173
+ span.setOutput({
174
+ id: streamId,
175
+ model: streamModel,
176
+ content: completeOutput
177
+ })
178
+
179
+ span.setMetadata('latency_ms', latency)
180
+ span.setMetadata('streaming', true)
181
+ span.end()
182
+ trace.end()
183
+ } catch (error) {
184
+ const latency = Date.now() - startTime
185
+ span.setMetadata('latency_ms', latency)
186
+ span.setError(error as Error)
187
+ span.end()
188
+ trace.end()
189
+ throw error
190
+ }
191
+ }
192
+ }
193
+ }