@struktur/sdk 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/auth/config.ts +57 -0
- package/src/extract.ts +55 -19
- package/src/index.ts +13 -0
- package/src/llm/LLMClient.ts +88 -7
- package/src/llm/RetryingRunner.ts +83 -1
- package/src/strategies/DoublePassAutoMergeStrategy.ts +140 -0
- package/src/strategies/DoublePassStrategy.ts +87 -0
- package/src/strategies/ParallelAutoMergeStrategy.ts +104 -0
- package/src/strategies/ParallelStrategy.ts +51 -0
- package/src/strategies/SequentialAutoMergeStrategy.ts +103 -0
- package/src/strategies/SequentialStrategy.ts +23 -0
- package/src/strategies/SimpleStrategy.ts +20 -0
- package/src/strategies/utils.ts +42 -3
- package/src/types.ts +66 -9
package/src/strategies/utils.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Artifact, ExtractionEvents, Usage } from "../types";
|
|
1
|
+
import type { Artifact, ExtractionEvents, Usage, TelemetryAdapter } from "../types";
|
|
2
2
|
import type { DebugLogger } from "../debug/logger";
|
|
3
3
|
import { batchArtifacts, type BatchOptions } from "../chunking/ArtifactBatcher";
|
|
4
4
|
import { buildUserContent } from "../llm/message";
|
|
@@ -22,9 +22,44 @@ export const mergeUsage = (usages: Usage[]) => {
|
|
|
22
22
|
export const getBatches = (
|
|
23
23
|
artifacts: Artifact[],
|
|
24
24
|
options: BatchOptions,
|
|
25
|
-
debug?: DebugLogger
|
|
25
|
+
debug?: DebugLogger,
|
|
26
|
+
telemetry?: TelemetryAdapter,
|
|
27
|
+
parentSpan?: { id: string; traceId: string; name: string; kind: string; startTime: number; parentId?: string }
|
|
26
28
|
) => {
|
|
27
|
-
|
|
29
|
+
// Create chunking span if telemetry is enabled
|
|
30
|
+
const chunkingSpan = telemetry?.startSpan({
|
|
31
|
+
name: "struktur.chunking",
|
|
32
|
+
kind: "RETRIEVER",
|
|
33
|
+
parentSpan,
|
|
34
|
+
attributes: {
|
|
35
|
+
"chunking.artifact_count": artifacts.length,
|
|
36
|
+
"chunking.max_tokens": options.maxTokens,
|
|
37
|
+
"chunking.max_images": options.maxImages,
|
|
38
|
+
},
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
const batches = batchArtifacts(artifacts, { ...options, debug });
|
|
42
|
+
|
|
43
|
+
// Record chunking results
|
|
44
|
+
if (chunkingSpan && telemetry) {
|
|
45
|
+
batches.forEach((batch, index) => {
|
|
46
|
+
telemetry.recordEvent(chunkingSpan, {
|
|
47
|
+
type: "chunk",
|
|
48
|
+
chunkIndex: index,
|
|
49
|
+
totalChunks: batches.length,
|
|
50
|
+
tokens: batch.reduce((sum, a) => sum + (a.tokens || 0), 0),
|
|
51
|
+
images: batch.reduce((sum, a) =>
|
|
52
|
+
sum + (a.contents?.flatMap((c) => c.media || []).length || 0), 0),
|
|
53
|
+
});
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
telemetry.endSpan(chunkingSpan, {
|
|
57
|
+
status: "ok",
|
|
58
|
+
output: { batchCount: batches.length },
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return batches;
|
|
28
63
|
};
|
|
29
64
|
|
|
30
65
|
export const extractWithPrompt = async <T>(options: {
|
|
@@ -38,6 +73,8 @@ export const extractWithPrompt = async <T>(options: {
|
|
|
38
73
|
strict?: boolean;
|
|
39
74
|
debug?: DebugLogger;
|
|
40
75
|
callId?: string;
|
|
76
|
+
telemetry?: TelemetryAdapter;
|
|
77
|
+
parentSpan?: { id: string; traceId: string; name: string; kind: string; startTime: number; parentId?: string };
|
|
41
78
|
}) => {
|
|
42
79
|
const userContent = buildUserContent(options.user, options.artifacts);
|
|
43
80
|
const result = await runWithRetries<T>({
|
|
@@ -50,6 +87,8 @@ export const extractWithPrompt = async <T>(options: {
|
|
|
50
87
|
strict: options.strict,
|
|
51
88
|
debug: options.debug,
|
|
52
89
|
callId: options.callId,
|
|
90
|
+
telemetry: options.telemetry,
|
|
91
|
+
parentSpan: options.parentSpan,
|
|
53
92
|
});
|
|
54
93
|
|
|
55
94
|
return result;
|
package/src/types.ts
CHANGED
|
@@ -45,6 +45,29 @@ export type ExtractionResult<T> = {
|
|
|
45
45
|
error?: Error;
|
|
46
46
|
};
|
|
47
47
|
|
|
48
|
+
/**
|
|
49
|
+
* Telemetry adapter interface for tracing extraction operations.
|
|
50
|
+
* This is a minimal interface that matches the full TelemetryAdapter from @struktur/telemetry.
|
|
51
|
+
* SDK users should import adapters from @struktur/telemetry package.
|
|
52
|
+
*/
|
|
53
|
+
export interface TelemetryAdapter {
|
|
54
|
+
readonly name: string;
|
|
55
|
+
readonly version: string;
|
|
56
|
+
initialize(): Promise<void>;
|
|
57
|
+
shutdown(): Promise<void>;
|
|
58
|
+
startSpan(context: {
|
|
59
|
+
name: string;
|
|
60
|
+
kind: "CHAIN" | "LLM" | "TOOL" | "AGENT" | "RETRIEVER" | "EMBEDDING" | "RERANKER";
|
|
61
|
+
parentSpan?: { id: string; traceId: string };
|
|
62
|
+
attributes?: Record<string, unknown>;
|
|
63
|
+
startTime?: number;
|
|
64
|
+
}): { id: string; traceId: string; name: string; kind: string; startTime: number; parentId?: string };
|
|
65
|
+
endSpan(span: { id: string }, result?: { status: "ok" | "error"; error?: Error; output?: unknown; latencyMs?: number }): void;
|
|
66
|
+
recordEvent(span: { id: string }, event: unknown): void;
|
|
67
|
+
setAttributes(span: { id: string }, attributes: Record<string, unknown>): void;
|
|
68
|
+
setContext(context: { sessionId?: string; userId?: string; metadata?: Record<string, unknown>; tags?: string[] }): void;
|
|
69
|
+
}
|
|
70
|
+
|
|
48
71
|
export type StepInfo = {
|
|
49
72
|
step: number;
|
|
50
73
|
total?: number;
|
|
@@ -68,19 +91,47 @@ export type TokenUsageInfo = Usage & {
|
|
|
68
91
|
};
|
|
69
92
|
|
|
70
93
|
export type RetryInfo = {
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
94
|
+
attempt: number;
|
|
95
|
+
maxAttempts: number;
|
|
96
|
+
reason?: string;
|
|
74
97
|
};
|
|
75
98
|
|
|
76
|
-
export type
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
99
|
+
export type AgentToolStartInfo = {
|
|
100
|
+
toolName: string;
|
|
101
|
+
toolCallId: string;
|
|
102
|
+
args: Record<string, unknown>;
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
export type AgentToolEndInfo = {
|
|
106
|
+
toolCallId: string;
|
|
107
|
+
result?: Record<string, unknown>;
|
|
108
|
+
error?: string;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
export type AgentMessageInfo = {
|
|
112
|
+
content: string;
|
|
113
|
+
role?: "assistant" | "user";
|
|
82
114
|
};
|
|
83
115
|
|
|
116
|
+
export type AgentReasoningInfo = {
|
|
117
|
+
thought: string;
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
export type AgentEvents = {
|
|
121
|
+
onAgentToolStart?: (info: AgentToolStartInfo) => void | Promise<void>;
|
|
122
|
+
onAgentToolEnd?: (info: AgentToolEndInfo) => void | Promise<void>;
|
|
123
|
+
onAgentMessage?: (info: AgentMessageInfo) => void | Promise<void>;
|
|
124
|
+
onAgentReasoning?: (info: AgentReasoningInfo) => void | Promise<void>;
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
export type ExtractionEvents = {
|
|
128
|
+
onStep?: (info: StepInfo) => void | Promise<void>;
|
|
129
|
+
onMessage?: (info: MessageInfo) => void | Promise<void>;
|
|
130
|
+
onProgress?: (info: ProgressInfo) => void | Promise<void>;
|
|
131
|
+
onTokenUsage?: (info: TokenUsageInfo) => void | Promise<void>;
|
|
132
|
+
onRetry?: (info: RetryInfo) => void | Promise<void>;
|
|
133
|
+
} & AgentEvents;
|
|
134
|
+
|
|
84
135
|
export type AnyJSONSchema = Record<string, unknown>;
|
|
85
136
|
export type TypedJSONSchema<T> = JSONSchemaType<T>;
|
|
86
137
|
|
|
@@ -108,6 +159,12 @@ export type ExtractionOptions<T> = {
|
|
|
108
159
|
events?: ExtractionEvents;
|
|
109
160
|
debug?: DebugLogger;
|
|
110
161
|
strict?: boolean;
|
|
162
|
+
/**
|
|
163
|
+
* Telemetry adapter for tracing extraction operations.
|
|
164
|
+
* Supports Phoenix (Arize), Langfuse, and other OpenTelemetry-compatible providers.
|
|
165
|
+
* Import from `@struktur/telemetry` package and pass the adapter here.
|
|
166
|
+
*/
|
|
167
|
+
telemetry?: TelemetryAdapter | null;
|
|
111
168
|
}
|
|
112
169
|
|
|
113
170
|
export interface ExtractionStrategy<T> {
|