@struktur/telemetry 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/factory.ts ADDED
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Factory function for creating telemetry adapters
3
+ *
4
+ * This module provides a factory function to create telemetry adapters
5
+ * based on the specified provider. It supports lazy loading of adapter
6
+ * implementations to avoid loading unused dependencies.
7
+ */
8
+
9
+ import type {
10
+ TelemetryAdapter,
11
+ TelemetryOptions,
12
+ PhoenixConfig,
13
+ LangfuseConfig
14
+ } from "./types.js";
15
+ import { NoopTelemetryAdapter } from "./types.js";
16
+
17
+ /**
18
+ * Create a telemetry adapter based on the specified provider.
19
+ *
20
+ * @param options - Telemetry configuration options
21
+ * @returns A telemetry adapter instance, or null if disabled
22
+ * @throws Error if the provider is unknown or required deps are missing
23
+ *
24
+ * @example
25
+ * ```typescript
26
+ * // Phoenix
27
+ * const telemetry = await createTelemetry({
28
+ * provider: "phoenix",
29
+ * config: {
30
+ * projectName: "my-app",
31
+ * url: "http://localhost:6006",
32
+ * } satisfies PhoenixConfig
33
+ * });
34
+ *
35
+ * // Langfuse
36
+ * const telemetry = await createTelemetry({
37
+ * provider: "langfuse",
38
+ * config: {
39
+ * publicKey: "pk-lf-xxx",
40
+ * secretKey: "sk-lf-xxx",
41
+ * } satisfies LangfuseConfig
42
+ * });
43
+ * ```
44
+ */
45
+ export async function createTelemetry(options: TelemetryOptions): Promise<TelemetryAdapter | null> {
46
+ if (options.enabled === false) {
47
+ return null;
48
+ }
49
+
50
+ const { provider, config } = options;
51
+
52
+ try {
53
+ switch (provider) {
54
+ case "phoenix": {
55
+ const { PhoenixAdapter } = await import("./adapters/phoenix/index.js");
56
+ return new PhoenixAdapter(config as unknown as PhoenixConfig);
57
+ }
58
+
59
+ case "langfuse": {
60
+ const { LangfuseAdapter } = await import("./adapters/langfuse/index.js");
61
+ return new LangfuseAdapter(config as unknown as LangfuseConfig);
62
+ }
63
+
64
+ default:
65
+ throw new Error(
66
+ `Unknown telemetry provider: ${provider}. ` +
67
+ `Supported providers: phoenix, langfuse`
68
+ );
69
+ }
70
+ } catch (error) {
71
+ if (error instanceof Error && error.message.includes("Cannot find module")) {
72
+ throw new Error(
73
+ `Provider '${provider}' requires optional dependencies. ` +
74
+ `Install them with: bun add @arizeai/phoenix-otel @arizeai/openinference-core ` +
75
+ `or bun add @langfuse/otel`
76
+ );
77
+ }
78
+ throw error;
79
+ }
80
+ }
81
+
82
+ /**
83
+ * Create a Phoenix telemetry adapter.
84
+ *
85
+ * @param config - Phoenix configuration
86
+ * @returns Phoenix telemetry adapter
87
+ *
88
+ * @example
89
+ * ```typescript
90
+ * import { createPhoenixTelemetry } from "@struktur/telemetry";
91
+ *
92
+ * const telemetry = await createPhoenixTelemetry({
93
+ * projectName: "production-extractions",
94
+ * url: "https://app.phoenix.arize.com/s/my-space",
95
+ * apiKey: process.env.PHOENIX_API_KEY,
96
+ * });
97
+ * ```
98
+ */
99
+ export async function createPhoenixTelemetry(config: PhoenixConfig): Promise<TelemetryAdapter> {
100
+ const { PhoenixAdapter } = await import("./adapters/phoenix/index.js");
101
+ return new PhoenixAdapter(config);
102
+ }
103
+
104
+ /**
105
+ * Create a Langfuse telemetry adapter.
106
+ *
107
+ * @param config - Langfuse configuration
108
+ * @returns Langfuse telemetry adapter
109
+ *
110
+ * @example
111
+ * ```typescript
112
+ * import { createLangfuseTelemetry } from "@struktur/telemetry";
113
+ *
114
+ * const telemetry = await createLangfuseTelemetry({
115
+ * publicKey: process.env.LANGFUSE_PUBLIC_KEY,
116
+ * secretKey: process.env.LANGFUSE_SECRET_KEY,
117
+ * baseUrl: "https://cloud.langfuse.com",
118
+ * });
119
+ * ```
120
+ */
121
+ export async function createLangfuseTelemetry(config: LangfuseConfig): Promise<TelemetryAdapter> {
122
+ const { LangfuseAdapter } = await import("./adapters/langfuse/index.js");
123
+ return new LangfuseAdapter(config);
124
+ }
125
+
126
+ /**
127
+ * Create a no-op telemetry adapter (for testing or when telemetry is disabled).
128
+ *
129
+ * @returns No-op telemetry adapter
130
+ */
131
+ export function createNoopTelemetry(): TelemetryAdapter {
132
+ return new NoopTelemetryAdapter();
133
+ }
package/src/index.ts ADDED
@@ -0,0 +1,55 @@
1
+ /**
2
+ * @struktur/telemetry
3
+ *
4
+ * Telemetry and observability for Struktur structured data extraction.
5
+ * Supports multiple providers including Phoenix (Arize) and Langfuse.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { createPhoenixTelemetry } from "@struktur/telemetry";
10
+ *
11
+ * const telemetry = await createPhoenixTelemetry({
12
+ * projectName: "my-app",
13
+ * url: "http://localhost:6006",
14
+ * });
15
+ *
16
+ * await extract({ artifacts, schema, telemetry });
17
+ * ```
18
+ */
19
+
20
+ // Core types
21
+ export type {
22
+ TelemetryAdapter,
23
+ SpanContext,
24
+ Span,
25
+ SpanKind,
26
+ SpanResult,
27
+ TelemetryContext,
28
+ TelemetryEvent,
29
+ LLMCallEvent,
30
+ ValidationEvent,
31
+ ChunkEvent,
32
+ ToolCallEvent,
33
+ MergeEvent,
34
+ ParseEvent,
35
+ TokenUsage,
36
+ TelemetryOptions,
37
+ PhoenixConfig,
38
+ LangfuseConfig,
39
+ } from "./types.js";
40
+
41
+ export { NoopTelemetryAdapter } from "./types.js";
42
+
43
+ // Factory functions
44
+ export {
45
+ createTelemetry,
46
+ createPhoenixTelemetry,
47
+ createLangfuseTelemetry,
48
+ createNoopTelemetry,
49
+ } from "./factory.js";
50
+
51
+ // Phoenix adapter
52
+ export { PhoenixAdapter, createPhoenixAdapter } from "./adapters/phoenix/index.js";
53
+
54
+ // Langfuse adapter
55
+ export { LangfuseAdapter, createLangfuseAdapter } from "./adapters/langfuse/index.js";
package/src/types.ts ADDED
@@ -0,0 +1,453 @@
1
+ /**
2
+ * Core telemetry types and interfaces for Struktur
3
+ *
4
+ * This module defines the TelemetryAdapter interface that all telemetry providers
5
+ * must implement. The SDK uses this interface to emit telemetry events without
6
+ * knowing about specific provider implementations.
7
+ */
8
+
9
+ /**
10
+ * Represents the different kinds of spans that can be created during extraction
11
+ */
12
+ export type SpanKind =
13
+ | "CHAIN" // Extraction pipeline, strategies
14
+ | "LLM" // LLM calls (generateText, generateObject)
15
+ | "TOOL" // Agent tool calls (bash, read, etc.)
16
+ | "AGENT" // Agent strategy execution
17
+ | "RETRIEVER" // Document parsing, chunking
18
+ | "EMBEDDING" // Vector embeddings (future)
19
+ | "RERANKER"; // Reranking (future)
20
+
21
+ /**
22
+ * Context for creating a new span
23
+ */
24
+ export interface SpanContext {
25
+ /** Human-readable name for the span */
26
+ name: string;
27
+
28
+ /** Type of span */
29
+ kind: SpanKind;
30
+
31
+ /** Parent span for creating hierarchical traces */
32
+ parentSpan?: Span;
33
+
34
+ /** Initial attributes to set on the span */
35
+ attributes?: Record<string, unknown>;
36
+
37
+ /** Start time (defaults to now) */
38
+ startTime?: number;
39
+ }
40
+
41
+ /**
42
+ * Represents an active span in the telemetry system
43
+ */
44
+ export interface Span {
45
+ /** Unique identifier for this span */
46
+ id: string;
47
+
48
+ /** Trace identifier that groups related spans */
49
+ traceId: string;
50
+
51
+ /** Human-readable name */
52
+ name: string;
53
+
54
+ /** Type of span */
55
+ kind: SpanKind;
56
+
57
+ /** Unix timestamp when span started */
58
+ startTime: number;
59
+
60
+ /** Parent span ID (if any) */
61
+ parentId?: string;
62
+ }
63
+
64
+ /**
65
+ * Result of a completed span
66
+ */
67
+ export interface SpanResult {
68
+ /** Whether the operation succeeded or failed */
69
+ status: "ok" | "error";
70
+
71
+ /** Error details if status is "error" */
72
+ error?: Error;
73
+
74
+ /** Output data from the operation */
75
+ output?: unknown;
76
+
77
+ /** Latency in milliseconds */
78
+ latencyMs?: number;
79
+ }
80
+
81
+ /**
82
+ * Context that applies to all spans in a trace
83
+ */
84
+ export interface TelemetryContext {
85
+ /** Session identifier for grouping related extractions */
86
+ sessionId?: string;
87
+
88
+ /** User identifier */
89
+ userId?: string;
90
+
91
+ /** Additional metadata */
92
+ metadata?: Record<string, unknown>;
93
+
94
+ /** Tags for categorization */
95
+ tags?: string[];
96
+ }
97
+
98
+ /**
99
+ * Token usage information from LLM calls
100
+ */
101
+ export interface TokenUsage {
102
+ /** Input/prompt tokens */
103
+ input: number;
104
+
105
+ /** Output/completion tokens */
106
+ output: number;
107
+
108
+ /** Total tokens (input + output) */
109
+ total: number;
110
+ }
111
+
112
+ /**
113
+ * Event emitted when an LLM call is made
114
+ */
115
+ export interface LLMCallEvent {
116
+ type: "llm_call";
117
+
118
+ /** Model identifier (e.g., "gpt-4o", "claude-3-opus") */
119
+ model: string;
120
+
121
+ /** Provider name (e.g., "openai", "anthropic") */
122
+ provider: string;
123
+
124
+ /** Input parameters */
125
+ input: {
126
+ /** Messages sent to the LLM */
127
+ messages: Array<{ role: string; content: string }>;
128
+
129
+ /** Temperature parameter (if set) */
130
+ temperature?: number;
131
+
132
+ /** Max tokens parameter (if set) */
133
+ maxTokens?: number;
134
+
135
+ /** JSON schema for structured output (if set) */
136
+ schema?: unknown;
137
+ };
138
+
139
+ /** Output from the LLM (if successful) */
140
+ output?: {
141
+ /** Raw content from the LLM */
142
+ content: string;
143
+
144
+ /** Whether this was structured (JSON) output */
145
+ structured?: boolean;
146
+
147
+ /** Token usage information */
148
+ usage?: TokenUsage;
149
+ };
150
+
151
+ /** Latency in milliseconds */
152
+ latencyMs: number;
153
+
154
+ /** Error if the call failed */
155
+ error?: Error;
156
+ }
157
+
158
+ /**
159
+ * Event emitted during schema validation
160
+ */
161
+ export interface ValidationEvent {
162
+ type: "validation";
163
+
164
+ /** Current attempt number */
165
+ attempt: number;
166
+
167
+ /** Maximum allowed attempts */
168
+ maxAttempts: number;
169
+
170
+ /** Schema being validated against */
171
+ schema: unknown;
172
+
173
+ /** Input data being validated */
174
+ input: unknown;
175
+
176
+ /** Whether validation succeeded */
177
+ success: boolean;
178
+
179
+ /** Validation errors (if failed) */
180
+ errors?: Array<{ path: string; message: string }>;
181
+
182
+ /** Latency in milliseconds */
183
+ latencyMs?: number;
184
+ }
185
+
186
+ /**
187
+ * Event emitted when chunking documents
188
+ */
189
+ export interface ChunkEvent {
190
+ type: "chunk";
191
+
192
+ /** Index of this chunk (0-based) */
193
+ chunkIndex: number;
194
+
195
+ /** Total number of chunks */
196
+ totalChunks: number;
197
+
198
+ /** Number of tokens in this chunk */
199
+ tokens: number;
200
+
201
+ /** Number of images in this chunk */
202
+ images: number;
203
+
204
+ /** Preview of chunk content (optional) */
205
+ content?: string;
206
+ }
207
+
208
+ /**
209
+ * Event emitted when agent tools are called
210
+ */
211
+ export interface ToolCallEvent {
212
+ type: "tool_call";
213
+
214
+ /** Name of the tool */
215
+ toolName: string;
216
+
217
+ /** Arguments passed to the tool */
218
+ args: Record<string, unknown>;
219
+
220
+ /** Result from the tool (if successful) */
221
+ result?: unknown;
222
+
223
+ /** Error if the tool failed */
224
+ error?: Error;
225
+
226
+ /** Latency in milliseconds */
227
+ latencyMs?: number;
228
+ }
229
+
230
+ /**
231
+ * Event emitted when merging results from multiple chunks
232
+ */
233
+ export interface MergeEvent {
234
+ type: "merge";
235
+
236
+ /** Merge strategy used */
237
+ strategy: string;
238
+
239
+ /** Number of input items merged */
240
+ inputCount: number;
241
+
242
+ /** Number of items after merge */
243
+ outputCount: number;
244
+
245
+ /** Number of items removed during deduplication (if applicable) */
246
+ deduped?: number;
247
+ }
248
+
249
+ /**
250
+ * Event emitted when parsing input files
251
+ */
252
+ export interface ParseEvent {
253
+ type: "parse";
254
+
255
+ /** MIME type of the file */
256
+ mimeType: string;
257
+
258
+ /** Parser used (e.g., "pdf-parse", "text") */
259
+ parser: string;
260
+
261
+ /** Input file size in bytes */
262
+ inputSize: number;
263
+
264
+ /** Number of tokens in output */
265
+ outputTokens: number;
266
+
267
+ /** Number of images extracted */
268
+ outputImages: number;
269
+
270
+ /** Latency in milliseconds */
271
+ latencyMs: number;
272
+ }
273
+
274
+ /**
275
+ * All possible telemetry events
276
+ */
277
+ export type TelemetryEvent =
278
+ | LLMCallEvent
279
+ | ValidationEvent
280
+ | ChunkEvent
281
+ | ToolCallEvent
282
+ | MergeEvent
283
+ | ParseEvent;
284
+
285
+ /**
286
+ * Interface that all telemetry adapters must implement.
287
+ * This allows the SDK to emit telemetry without knowing about specific providers.
288
+ */
289
+ export interface TelemetryAdapter {
290
+ /** Provider name */
291
+ readonly name: string;
292
+
293
+ /** Adapter version */
294
+ readonly version: string;
295
+
296
+ /**
297
+ * Initialize the telemetry adapter.
298
+ * Must be called before any other operations.
299
+ */
300
+ initialize(): Promise<void>;
301
+
302
+ /**
303
+ * Shutdown the telemetry adapter.
304
+ * Flushes any pending telemetry data.
305
+ */
306
+ shutdown(): Promise<void>;
307
+
308
+ /**
309
+ * Start a new span.
310
+ * @param context - Span creation context
311
+ * @returns The created span
312
+ */
313
+ startSpan(context: SpanContext): Span;
314
+
315
+ /**
316
+ * End a span and record its result.
317
+ * @param span - Span to end
318
+ * @param result - Optional result of the operation
319
+ */
320
+ endSpan(span: Span, result?: SpanResult): void;
321
+
322
+ /**
323
+ * Record an event within a span.
324
+ * @param span - Active span to record event in
325
+ * @param event - Event to record
326
+ */
327
+ recordEvent(span: Span, event: TelemetryEvent): void;
328
+
329
+ /**
330
+ * Set attributes on a span.
331
+ * @param span - Active span
332
+ * @param attributes - Attributes to set
333
+ */
334
+ setAttributes(span: Span, attributes: Record<string, unknown>): void;
335
+
336
+ /**
337
+ * Set context that applies to all spans in this trace.
338
+ * @param context - Context to set
339
+ */
340
+ setContext(context: TelemetryContext): void;
341
+ }
342
+
343
+ /**
344
+ * Configuration options for creating a telemetry adapter
345
+ */
346
+ export interface TelemetryOptions {
347
+ /** Provider name */
348
+ provider: string;
349
+
350
+ /** Provider-specific configuration */
351
+ config: Record<string, unknown>;
352
+
353
+ /** Whether telemetry is enabled (defaults to true) */
354
+ enabled?: boolean;
355
+
356
+ /** Sampling rate from 0.0 to 1.0 (1.0 = all traces) */
357
+ sampleRate?: number;
358
+
359
+ /** Whether to redact PII from traces */
360
+ redactPii?: boolean;
361
+
362
+ /** Maximum length for input content (truncate if longer) */
363
+ maxInputLength?: number;
364
+
365
+ /** Maximum length for output content (truncate if longer) */
366
+ maxOutputLength?: number;
367
+ }
368
+
369
+ /**
370
+ * Configuration for Phoenix telemetry
371
+ */
372
+ export interface PhoenixConfig {
373
+ /** Project name in Phoenix */
374
+ projectName: string;
375
+
376
+ /** Phoenix collector endpoint URL (defaults to http://localhost:6006) */
377
+ url?: string;
378
+
379
+ /** API key for Phoenix Cloud */
380
+ apiKey?: string;
381
+
382
+ /** Use batch processing (defaults to true for production) */
383
+ batch?: boolean;
384
+
385
+ /** Additional headers for OTLP requests */
386
+ headers?: Record<string, string>;
387
+ }
388
+
389
+ /**
390
+ * Configuration for Langfuse telemetry
391
+ */
392
+ export interface LangfuseConfig {
393
+ /** Langfuse public key */
394
+ publicKey: string;
395
+
396
+ /** Langfuse secret key */
397
+ secretKey: string;
398
+
399
+ /** Langfuse base URL (defaults to https://cloud.langfuse.com) */
400
+ baseUrl?: string;
401
+
402
+ /** Project name (optional) */
403
+ projectName?: string;
404
+ }
405
+
406
+ /**
407
+ * No-op adapter for when telemetry is disabled
408
+ */
409
+ export class NoopTelemetryAdapter implements TelemetryAdapter {
410
+ readonly name = "noop";
411
+ readonly version = "1.0.0";
412
+
413
+ private currentId = 0;
414
+ private mockSpans = new Map<string, Span>();
415
+
416
+ async initialize(): Promise<void> {
417
+ // No-op
418
+ }
419
+
420
+ async shutdown(): Promise<void> {
421
+ // No-op
422
+ }
423
+
424
+ startSpan(context: SpanContext): Span {
425
+ const id = `noop-${++this.currentId}`;
426
+ const span: Span = {
427
+ id,
428
+ traceId: `trace-${this.currentId}`,
429
+ name: context.name,
430
+ kind: context.kind,
431
+ startTime: Date.now(),
432
+ parentId: context.parentSpan?.id,
433
+ };
434
+ this.mockSpans.set(id, span);
435
+ return span;
436
+ }
437
+
438
+ endSpan(span: Span, _result?: SpanResult): void {
439
+ this.mockSpans.delete(span.id);
440
+ }
441
+
442
+ recordEvent(_span: Span, _event: TelemetryEvent): void {
443
+ // No-op
444
+ }
445
+
446
+ setAttributes(_span: Span, _attributes: Record<string, unknown>): void {
447
+ // No-op
448
+ }
449
+
450
+ setContext(_context: TelemetryContext): void {
451
+ // No-op
452
+ }
453
+ }