agentic-dataset-builder 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,338 @@
1
+ import { z } from 'zod';
2
+ export declare const Qwen35TextBlockSchema: z.ZodObject<{
3
+ type: z.ZodLiteral<"text">;
4
+ text: z.ZodString;
5
+ }, z.core.$strip>;
6
+ export declare const Qwen35ImageBlockSchema: z.ZodObject<{
7
+ type: z.ZodLiteral<"image">;
8
+ image_url: z.ZodOptional<z.ZodString>;
9
+ placeholder: z.ZodOptional<z.ZodBoolean>;
10
+ placeholder_token: z.ZodOptional<z.ZodString>;
11
+ source_kind: z.ZodOptional<z.ZodString>;
12
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
13
+ }, z.core.$strip>;
14
+ export declare const Qwen35VideoBlockSchema: z.ZodObject<{
15
+ type: z.ZodLiteral<"video">;
16
+ video_url: z.ZodOptional<z.ZodString>;
17
+ placeholder: z.ZodOptional<z.ZodBoolean>;
18
+ placeholder_token: z.ZodOptional<z.ZodString>;
19
+ source_kind: z.ZodOptional<z.ZodString>;
20
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
21
+ }, z.core.$strip>;
22
+ export declare const Qwen35ContentSchema: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
23
+ type: z.ZodLiteral<"text">;
24
+ text: z.ZodString;
25
+ }, z.core.$strip>, z.ZodObject<{
26
+ type: z.ZodLiteral<"image">;
27
+ image_url: z.ZodOptional<z.ZodString>;
28
+ placeholder: z.ZodOptional<z.ZodBoolean>;
29
+ placeholder_token: z.ZodOptional<z.ZodString>;
30
+ source_kind: z.ZodOptional<z.ZodString>;
31
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
32
+ }, z.core.$strip>, z.ZodObject<{
33
+ type: z.ZodLiteral<"video">;
34
+ video_url: z.ZodOptional<z.ZodString>;
35
+ placeholder: z.ZodOptional<z.ZodBoolean>;
36
+ placeholder_token: z.ZodOptional<z.ZodString>;
37
+ source_kind: z.ZodOptional<z.ZodString>;
38
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
39
+ }, z.core.$strip>]>>]>;
40
+ export declare const Qwen35ToolCallSchema: z.ZodObject<{
41
+ type: z.ZodDefault<z.ZodLiteral<"function">>;
42
+ id: z.ZodOptional<z.ZodString>;
43
+ function: z.ZodObject<{
44
+ name: z.ZodString;
45
+ arguments: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
46
+ }, z.core.$strip>;
47
+ }, z.core.$strip>;
48
+ export declare const Qwen35ToolSpecSchema: z.ZodObject<{
49
+ name: z.ZodString;
50
+ description: z.ZodOptional<z.ZodString>;
51
+ parameters: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
52
+ }, z.core.$strip>;
53
+ export declare const Qwen35MessageSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
54
+ role: z.ZodLiteral<"system">;
55
+ content: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
56
+ type: z.ZodLiteral<"text">;
57
+ text: z.ZodString;
58
+ }, z.core.$strip>, z.ZodObject<{
59
+ type: z.ZodLiteral<"image">;
60
+ image_url: z.ZodOptional<z.ZodString>;
61
+ placeholder: z.ZodOptional<z.ZodBoolean>;
62
+ placeholder_token: z.ZodOptional<z.ZodString>;
63
+ source_kind: z.ZodOptional<z.ZodString>;
64
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
65
+ }, z.core.$strip>, z.ZodObject<{
66
+ type: z.ZodLiteral<"video">;
67
+ video_url: z.ZodOptional<z.ZodString>;
68
+ placeholder: z.ZodOptional<z.ZodBoolean>;
69
+ placeholder_token: z.ZodOptional<z.ZodString>;
70
+ source_kind: z.ZodOptional<z.ZodString>;
71
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
72
+ }, z.core.$strip>]>>]>;
73
+ }, z.core.$strip>, z.ZodObject<{
74
+ role: z.ZodLiteral<"user">;
75
+ content: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
76
+ type: z.ZodLiteral<"text">;
77
+ text: z.ZodString;
78
+ }, z.core.$strip>, z.ZodObject<{
79
+ type: z.ZodLiteral<"image">;
80
+ image_url: z.ZodOptional<z.ZodString>;
81
+ placeholder: z.ZodOptional<z.ZodBoolean>;
82
+ placeholder_token: z.ZodOptional<z.ZodString>;
83
+ source_kind: z.ZodOptional<z.ZodString>;
84
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
85
+ }, z.core.$strip>, z.ZodObject<{
86
+ type: z.ZodLiteral<"video">;
87
+ video_url: z.ZodOptional<z.ZodString>;
88
+ placeholder: z.ZodOptional<z.ZodBoolean>;
89
+ placeholder_token: z.ZodOptional<z.ZodString>;
90
+ source_kind: z.ZodOptional<z.ZodString>;
91
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
92
+ }, z.core.$strip>]>>]>;
93
+ }, z.core.$strip>, z.ZodObject<{
94
+ role: z.ZodLiteral<"assistant">;
95
+ content: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
96
+ type: z.ZodLiteral<"text">;
97
+ text: z.ZodString;
98
+ }, z.core.$strip>, z.ZodObject<{
99
+ type: z.ZodLiteral<"image">;
100
+ image_url: z.ZodOptional<z.ZodString>;
101
+ placeholder: z.ZodOptional<z.ZodBoolean>;
102
+ placeholder_token: z.ZodOptional<z.ZodString>;
103
+ source_kind: z.ZodOptional<z.ZodString>;
104
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
105
+ }, z.core.$strip>, z.ZodObject<{
106
+ type: z.ZodLiteral<"video">;
107
+ video_url: z.ZodOptional<z.ZodString>;
108
+ placeholder: z.ZodOptional<z.ZodBoolean>;
109
+ placeholder_token: z.ZodOptional<z.ZodString>;
110
+ source_kind: z.ZodOptional<z.ZodString>;
111
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
112
+ }, z.core.$strip>]>>]>;
113
+ reasoning_content: z.ZodOptional<z.ZodString>;
114
+ tool_calls: z.ZodOptional<z.ZodArray<z.ZodObject<{
115
+ type: z.ZodDefault<z.ZodLiteral<"function">>;
116
+ id: z.ZodOptional<z.ZodString>;
117
+ function: z.ZodObject<{
118
+ name: z.ZodString;
119
+ arguments: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
120
+ }, z.core.$strip>;
121
+ }, z.core.$strip>>>;
122
+ }, z.core.$strip>, z.ZodObject<{
123
+ role: z.ZodLiteral<"tool">;
124
+ content: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
125
+ type: z.ZodLiteral<"text">;
126
+ text: z.ZodString;
127
+ }, z.core.$strip>, z.ZodObject<{
128
+ type: z.ZodLiteral<"image">;
129
+ image_url: z.ZodOptional<z.ZodString>;
130
+ placeholder: z.ZodOptional<z.ZodBoolean>;
131
+ placeholder_token: z.ZodOptional<z.ZodString>;
132
+ source_kind: z.ZodOptional<z.ZodString>;
133
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
134
+ }, z.core.$strip>, z.ZodObject<{
135
+ type: z.ZodLiteral<"video">;
136
+ video_url: z.ZodOptional<z.ZodString>;
137
+ placeholder: z.ZodOptional<z.ZodBoolean>;
138
+ placeholder_token: z.ZodOptional<z.ZodString>;
139
+ source_kind: z.ZodOptional<z.ZodString>;
140
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
141
+ }, z.core.$strip>]>>]>;
142
+ tool_call_id: z.ZodOptional<z.ZodString>;
143
+ name: z.ZodOptional<z.ZodString>;
144
+ }, z.core.$strip>], "role">;
145
+ export declare const Qwen35MetaSchema: z.ZodObject<{
146
+ endpoint: z.ZodString;
147
+ status: z.ZodNumber;
148
+ ts: z.ZodString;
149
+ key: z.ZodOptional<z.ZodString>;
150
+ source: z.ZodOptional<z.ZodString>;
151
+ requested_model: z.ZodOptional<z.ZodNullable<z.ZodString>>;
152
+ actual_model: z.ZodOptional<z.ZodNullable<z.ZodString>>;
153
+ stream: z.ZodOptional<z.ZodBoolean>;
154
+ thinking_level: z.ZodOptional<z.ZodNullable<z.ZodString>>;
155
+ reasoning_summary_mode: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnknown>, z.ZodRecord<z.ZodString, z.ZodUnknown>]>>;
156
+ thinking_type: z.ZodOptional<z.ZodNullable<z.ZodString>>;
157
+ thinking_budget_tokens: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
158
+ max_output_tokens: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
159
+ tool_spec_count: z.ZodOptional<z.ZodNumber>;
160
+ tool_choice: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnknown>, z.ZodRecord<z.ZodString, z.ZodUnknown>]>>;
161
+ request_contains_non_text_content: z.ZodDefault<z.ZodBoolean>;
162
+ request_image_block_count: z.ZodDefault<z.ZodNumber>;
163
+ request_video_block_count: z.ZodDefault<z.ZodNumber>;
164
+ request_tool_call_block_count: z.ZodDefault<z.ZodNumber>;
165
+ request_tool_result_block_count: z.ZodDefault<z.ZodNumber>;
166
+ request_thinking_block_count: z.ZodDefault<z.ZodNumber>;
167
+ response_contains_non_text_content: z.ZodDefault<z.ZodBoolean>;
168
+ response_image_block_count: z.ZodDefault<z.ZodNumber>;
169
+ response_video_block_count: z.ZodDefault<z.ZodNumber>;
170
+ response_tool_call_block_count: z.ZodDefault<z.ZodNumber>;
171
+ response_tool_result_block_count: z.ZodDefault<z.ZodNumber>;
172
+ response_thinking_block_count: z.ZodDefault<z.ZodNumber>;
173
+ request_truncated: z.ZodDefault<z.ZodBoolean>;
174
+ response_truncated: z.ZodDefault<z.ZodBoolean>;
175
+ lossy_source: z.ZodDefault<z.ZodBoolean>;
176
+ lossy_reasons: z.ZodDefault<z.ZodArray<z.ZodString>>;
177
+ dataset_label: z.ZodOptional<z.ZodString>;
178
+ dataset_source_system: z.ZodOptional<z.ZodString>;
179
+ dataset_source_bucket: z.ZodOptional<z.ZodString>;
180
+ dataset_source_file: z.ZodOptional<z.ZodString>;
181
+ dataset_has_reasoning: z.ZodOptional<z.ZodBoolean>;
182
+ dataset_reasoning_chars: z.ZodOptional<z.ZodNumber>;
183
+ }, z.core.$strip>;
184
+ export declare const Qwen35RecordSchema: z.ZodObject<{
185
+ id: z.ZodString;
186
+ request_id: z.ZodOptional<z.ZodString>;
187
+ messages: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
188
+ role: z.ZodLiteral<"system">;
189
+ content: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
190
+ type: z.ZodLiteral<"text">;
191
+ text: z.ZodString;
192
+ }, z.core.$strip>, z.ZodObject<{
193
+ type: z.ZodLiteral<"image">;
194
+ image_url: z.ZodOptional<z.ZodString>;
195
+ placeholder: z.ZodOptional<z.ZodBoolean>;
196
+ placeholder_token: z.ZodOptional<z.ZodString>;
197
+ source_kind: z.ZodOptional<z.ZodString>;
198
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
199
+ }, z.core.$strip>, z.ZodObject<{
200
+ type: z.ZodLiteral<"video">;
201
+ video_url: z.ZodOptional<z.ZodString>;
202
+ placeholder: z.ZodOptional<z.ZodBoolean>;
203
+ placeholder_token: z.ZodOptional<z.ZodString>;
204
+ source_kind: z.ZodOptional<z.ZodString>;
205
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
206
+ }, z.core.$strip>]>>]>;
207
+ }, z.core.$strip>, z.ZodObject<{
208
+ role: z.ZodLiteral<"user">;
209
+ content: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
210
+ type: z.ZodLiteral<"text">;
211
+ text: z.ZodString;
212
+ }, z.core.$strip>, z.ZodObject<{
213
+ type: z.ZodLiteral<"image">;
214
+ image_url: z.ZodOptional<z.ZodString>;
215
+ placeholder: z.ZodOptional<z.ZodBoolean>;
216
+ placeholder_token: z.ZodOptional<z.ZodString>;
217
+ source_kind: z.ZodOptional<z.ZodString>;
218
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
219
+ }, z.core.$strip>, z.ZodObject<{
220
+ type: z.ZodLiteral<"video">;
221
+ video_url: z.ZodOptional<z.ZodString>;
222
+ placeholder: z.ZodOptional<z.ZodBoolean>;
223
+ placeholder_token: z.ZodOptional<z.ZodString>;
224
+ source_kind: z.ZodOptional<z.ZodString>;
225
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
226
+ }, z.core.$strip>]>>]>;
227
+ }, z.core.$strip>, z.ZodObject<{
228
+ role: z.ZodLiteral<"assistant">;
229
+ content: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
230
+ type: z.ZodLiteral<"text">;
231
+ text: z.ZodString;
232
+ }, z.core.$strip>, z.ZodObject<{
233
+ type: z.ZodLiteral<"image">;
234
+ image_url: z.ZodOptional<z.ZodString>;
235
+ placeholder: z.ZodOptional<z.ZodBoolean>;
236
+ placeholder_token: z.ZodOptional<z.ZodString>;
237
+ source_kind: z.ZodOptional<z.ZodString>;
238
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
239
+ }, z.core.$strip>, z.ZodObject<{
240
+ type: z.ZodLiteral<"video">;
241
+ video_url: z.ZodOptional<z.ZodString>;
242
+ placeholder: z.ZodOptional<z.ZodBoolean>;
243
+ placeholder_token: z.ZodOptional<z.ZodString>;
244
+ source_kind: z.ZodOptional<z.ZodString>;
245
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
246
+ }, z.core.$strip>]>>]>;
247
+ reasoning_content: z.ZodOptional<z.ZodString>;
248
+ tool_calls: z.ZodOptional<z.ZodArray<z.ZodObject<{
249
+ type: z.ZodDefault<z.ZodLiteral<"function">>;
250
+ id: z.ZodOptional<z.ZodString>;
251
+ function: z.ZodObject<{
252
+ name: z.ZodString;
253
+ arguments: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
254
+ }, z.core.$strip>;
255
+ }, z.core.$strip>>>;
256
+ }, z.core.$strip>, z.ZodObject<{
257
+ role: z.ZodLiteral<"tool">;
258
+ content: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
259
+ type: z.ZodLiteral<"text">;
260
+ text: z.ZodString;
261
+ }, z.core.$strip>, z.ZodObject<{
262
+ type: z.ZodLiteral<"image">;
263
+ image_url: z.ZodOptional<z.ZodString>;
264
+ placeholder: z.ZodOptional<z.ZodBoolean>;
265
+ placeholder_token: z.ZodOptional<z.ZodString>;
266
+ source_kind: z.ZodOptional<z.ZodString>;
267
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
268
+ }, z.core.$strip>, z.ZodObject<{
269
+ type: z.ZodLiteral<"video">;
270
+ video_url: z.ZodOptional<z.ZodString>;
271
+ placeholder: z.ZodOptional<z.ZodBoolean>;
272
+ placeholder_token: z.ZodOptional<z.ZodString>;
273
+ source_kind: z.ZodOptional<z.ZodString>;
274
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
275
+ }, z.core.$strip>]>>]>;
276
+ tool_call_id: z.ZodOptional<z.ZodString>;
277
+ name: z.ZodOptional<z.ZodString>;
278
+ }, z.core.$strip>], "role">>;
279
+ tools: z.ZodDefault<z.ZodArray<z.ZodObject<{
280
+ name: z.ZodString;
281
+ description: z.ZodOptional<z.ZodString>;
282
+ parameters: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
283
+ }, z.core.$strip>>>;
284
+ meta: z.ZodObject<{
285
+ endpoint: z.ZodString;
286
+ status: z.ZodNumber;
287
+ ts: z.ZodString;
288
+ key: z.ZodOptional<z.ZodString>;
289
+ source: z.ZodOptional<z.ZodString>;
290
+ requested_model: z.ZodOptional<z.ZodNullable<z.ZodString>>;
291
+ actual_model: z.ZodOptional<z.ZodNullable<z.ZodString>>;
292
+ stream: z.ZodOptional<z.ZodBoolean>;
293
+ thinking_level: z.ZodOptional<z.ZodNullable<z.ZodString>>;
294
+ reasoning_summary_mode: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnknown>, z.ZodRecord<z.ZodString, z.ZodUnknown>]>>;
295
+ thinking_type: z.ZodOptional<z.ZodNullable<z.ZodString>>;
296
+ thinking_budget_tokens: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
297
+ max_output_tokens: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
298
+ tool_spec_count: z.ZodOptional<z.ZodNumber>;
299
+ tool_choice: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodUnknown>, z.ZodRecord<z.ZodString, z.ZodUnknown>]>>;
300
+ request_contains_non_text_content: z.ZodDefault<z.ZodBoolean>;
301
+ request_image_block_count: z.ZodDefault<z.ZodNumber>;
302
+ request_video_block_count: z.ZodDefault<z.ZodNumber>;
303
+ request_tool_call_block_count: z.ZodDefault<z.ZodNumber>;
304
+ request_tool_result_block_count: z.ZodDefault<z.ZodNumber>;
305
+ request_thinking_block_count: z.ZodDefault<z.ZodNumber>;
306
+ response_contains_non_text_content: z.ZodDefault<z.ZodBoolean>;
307
+ response_image_block_count: z.ZodDefault<z.ZodNumber>;
308
+ response_video_block_count: z.ZodDefault<z.ZodNumber>;
309
+ response_tool_call_block_count: z.ZodDefault<z.ZodNumber>;
310
+ response_tool_result_block_count: z.ZodDefault<z.ZodNumber>;
311
+ response_thinking_block_count: z.ZodDefault<z.ZodNumber>;
312
+ request_truncated: z.ZodDefault<z.ZodBoolean>;
313
+ response_truncated: z.ZodDefault<z.ZodBoolean>;
314
+ lossy_source: z.ZodDefault<z.ZodBoolean>;
315
+ lossy_reasons: z.ZodDefault<z.ZodArray<z.ZodString>>;
316
+ dataset_label: z.ZodOptional<z.ZodString>;
317
+ dataset_source_system: z.ZodOptional<z.ZodString>;
318
+ dataset_source_bucket: z.ZodOptional<z.ZodString>;
319
+ dataset_source_file: z.ZodOptional<z.ZodString>;
320
+ dataset_has_reasoning: z.ZodOptional<z.ZodBoolean>;
321
+ dataset_reasoning_chars: z.ZodOptional<z.ZodNumber>;
322
+ }, z.core.$strip>;
323
+ label: z.ZodOptional<z.ZodString>;
324
+ source_system: z.ZodOptional<z.ZodString>;
325
+ source_bucket: z.ZodOptional<z.ZodString>;
326
+ source_file: z.ZodOptional<z.ZodString>;
327
+ agentic_label: z.ZodOptional<z.ZodObject<{
328
+ label: z.ZodString;
329
+ tool_call_count: z.ZodOptional<z.ZodNumber>;
330
+ tool_message_count: z.ZodOptional<z.ZodNumber>;
331
+ dialogue_rounds_est: z.ZodOptional<z.ZodNumber>;
332
+ reasoning_chars: z.ZodOptional<z.ZodNumber>;
333
+ has_reasoning: z.ZodOptional<z.ZodBoolean>;
334
+ lossy_source: z.ZodOptional<z.ZodBoolean>;
335
+ lossy_reasons: z.ZodOptional<z.ZodArray<z.ZodString>>;
336
+ }, z.core.$strip>>;
337
+ }, z.core.$strip>;
338
+ export type Qwen35Record = z.infer<typeof Qwen35RecordSchema>;
@@ -0,0 +1,139 @@
1
+ import { z } from 'zod';
2
+ export const Qwen35TextBlockSchema = z.object({
3
+ type: z.literal('text'),
4
+ text: z.string(),
5
+ });
6
+ export const Qwen35ImageBlockSchema = z.object({
7
+ type: z.literal('image'),
8
+ image_url: z.string().optional(),
9
+ placeholder: z.boolean().optional(),
10
+ placeholder_token: z.string().optional(),
11
+ source_kind: z.string().optional(),
12
+ metadata: z.record(z.string(), z.unknown()).optional(),
13
+ });
14
+ export const Qwen35VideoBlockSchema = z.object({
15
+ type: z.literal('video'),
16
+ video_url: z.string().optional(),
17
+ placeholder: z.boolean().optional(),
18
+ placeholder_token: z.string().optional(),
19
+ source_kind: z.string().optional(),
20
+ metadata: z.record(z.string(), z.unknown()).optional(),
21
+ });
22
+ export const Qwen35ContentSchema = z.union([
23
+ z.string(),
24
+ z.array(z.union([Qwen35TextBlockSchema, Qwen35ImageBlockSchema, Qwen35VideoBlockSchema])),
25
+ ]);
26
+ export const Qwen35ToolCallSchema = z.object({
27
+ type: z.literal('function').default('function'),
28
+ id: z.string().optional(),
29
+ function: z.object({
30
+ name: z.string(),
31
+ arguments: z.record(z.string(), z.unknown()).default({}),
32
+ }),
33
+ });
34
+ export const Qwen35ToolSpecSchema = z.object({
35
+ name: z.string(),
36
+ description: z.string().optional(),
37
+ parameters: z.record(z.string(), z.unknown()).optional(),
38
+ });
39
+ export const Qwen35MessageSchema = z.discriminatedUnion('role', [
40
+ z.object({ role: z.literal('system'), content: Qwen35ContentSchema }),
41
+ z.object({ role: z.literal('user'), content: Qwen35ContentSchema }),
42
+ z.object({
43
+ role: z.literal('assistant'),
44
+ content: Qwen35ContentSchema,
45
+ reasoning_content: z.string().optional(),
46
+ tool_calls: z.array(Qwen35ToolCallSchema).optional(),
47
+ }),
48
+ z.object({
49
+ role: z.literal('tool'),
50
+ content: Qwen35ContentSchema,
51
+ tool_call_id: z.string().optional(),
52
+ name: z.string().optional(),
53
+ }),
54
+ ]);
55
+ export const Qwen35MetaSchema = z.object({
56
+ endpoint: z.string(),
57
+ status: z.number().int().min(100).max(599),
58
+ ts: z.string(),
59
+ key: z.string().optional(),
60
+ source: z.string().optional(),
61
+ requested_model: z.string().nullable().optional(),
62
+ actual_model: z.string().nullable().optional(),
63
+ stream: z.boolean().optional(),
64
+ thinking_level: z.string().nullable().optional(),
65
+ reasoning_summary_mode: z.union([z.string(), z.array(z.unknown()), z.record(z.string(), z.unknown())]).optional(),
66
+ thinking_type: z.string().nullable().optional(),
67
+ thinking_budget_tokens: z.number().int().nonnegative().nullable().optional(),
68
+ max_output_tokens: z.number().int().nonnegative().nullable().optional(),
69
+ tool_spec_count: z.number().int().nonnegative().optional(),
70
+ tool_choice: z.union([z.string(), z.array(z.unknown()), z.record(z.string(), z.unknown())]).optional(),
71
+ request_contains_non_text_content: z.boolean().default(false),
72
+ request_image_block_count: z.number().int().nonnegative().default(0),
73
+ request_video_block_count: z.number().int().nonnegative().default(0),
74
+ request_tool_call_block_count: z.number().int().nonnegative().default(0),
75
+ request_tool_result_block_count: z.number().int().nonnegative().default(0),
76
+ request_thinking_block_count: z.number().int().nonnegative().default(0),
77
+ response_contains_non_text_content: z.boolean().default(false),
78
+ response_image_block_count: z.number().int().nonnegative().default(0),
79
+ response_video_block_count: z.number().int().nonnegative().default(0),
80
+ response_tool_call_block_count: z.number().int().nonnegative().default(0),
81
+ response_tool_result_block_count: z.number().int().nonnegative().default(0),
82
+ response_thinking_block_count: z.number().int().nonnegative().default(0),
83
+ request_truncated: z.boolean().default(false),
84
+ response_truncated: z.boolean().default(false),
85
+ lossy_source: z.boolean().default(false),
86
+ lossy_reasons: z.array(z.string()).default([]),
87
+ dataset_label: z.string().optional(),
88
+ dataset_source_system: z.string().optional(),
89
+ dataset_source_bucket: z.string().optional(),
90
+ dataset_source_file: z.string().optional(),
91
+ dataset_has_reasoning: z.boolean().optional(),
92
+ dataset_reasoning_chars: z.number().int().nonnegative().optional(),
93
+ });
94
+ export const Qwen35RecordSchema = z.object({
95
+ id: z.string(),
96
+ request_id: z.string().optional(),
97
+ messages: z.array(Qwen35MessageSchema).min(1),
98
+ tools: z.array(Qwen35ToolSpecSchema).default([]),
99
+ meta: Qwen35MetaSchema,
100
+ label: z.string().optional(),
101
+ source_system: z.string().optional(),
102
+ source_bucket: z.string().optional(),
103
+ source_file: z.string().optional(),
104
+ agentic_label: z
105
+ .object({
106
+ label: z.string(),
107
+ tool_call_count: z.number().int().nonnegative().optional(),
108
+ tool_message_count: z.number().int().nonnegative().optional(),
109
+ dialogue_rounds_est: z.number().int().nonnegative().optional(),
110
+ reasoning_chars: z.number().int().nonnegative().optional(),
111
+ has_reasoning: z.boolean().optional(),
112
+ lossy_source: z.boolean().optional(),
113
+ lossy_reasons: z.array(z.string()).optional(),
114
+ })
115
+ .optional(),
116
+ }).superRefine((record, ctx) => {
117
+ const seenUser = record.messages.some((message) => message.role === 'user');
118
+ if (!seenUser) {
119
+ ctx.addIssue({ code: 'custom', message: 'at least one user message is required' });
120
+ }
121
+ let seenNonSystem = false;
122
+ for (const message of record.messages) {
123
+ if (message.role !== 'system') {
124
+ seenNonSystem = true;
125
+ }
126
+ else if (seenNonSystem) {
127
+ ctx.addIssue({ code: 'custom', message: 'system messages must appear only at the beginning' });
128
+ break;
129
+ }
130
+ if (message.role === 'assistant' && typeof message.reasoning_content === 'string') {
131
+ if (message.reasoning_content.includes('<think>') || message.reasoning_content.includes('</think>')) {
132
+ ctx.addIssue({ code: 'custom', message: 'reasoning_content must not include <think> wrappers' });
133
+ }
134
+ }
135
+ }
136
+ if (record.meta.lossy_source && record.meta.lossy_reasons.length === 0) {
137
+ ctx.addIssue({ code: 'custom', message: 'lossy_source requires lossy_reasons' });
138
+ }
139
+ });
@@ -0,0 +1,2 @@
1
+ import { type Qwen35Record } from '../schemas/qwen35.js';
2
+ export declare function collectClaudePromptOnlyRecords(root: string): Promise<Qwen35Record[]>;
@@ -0,0 +1,64 @@
1
+ import fg from 'fast-glob';
2
+ import { z } from 'zod';
3
+ import { Qwen35RecordSchema } from '../schemas/qwen35.js';
4
+ import { readJsonl } from '../utils/jsonl.js';
5
+ const EntrySchema = z.record(z.string(), z.unknown());
6
+ export async function collectClaudePromptOnlyRecords(root) {
7
+ const files = await fg('**/*.jsonl', { cwd: root, absolute: true, onlyFiles: true });
8
+ const records = [];
9
+ for (const file of files.sort()) {
10
+ const entries = (await readJsonl(file)).map((row) => EntrySchema.parse(row));
11
+ for (const entry of entries) {
12
+ if (entry.type !== 'user')
13
+ continue;
14
+ const message = isRecord(entry.message) ? entry.message : {};
15
+ const content = asString(message.content);
16
+ if (!content)
17
+ continue;
18
+ records.push(Qwen35RecordSchema.parse({
19
+ id: `${asString(entry.sessionId) ?? file}:${asString(entry.promptId) ?? asString(entry.uuid) ?? 'prompt'}`,
20
+ request_id: asString(entry.promptId) ?? asString(entry.uuid) ?? undefined,
21
+ messages: [{ role: 'user', content }],
22
+ tools: [],
23
+ meta: {
24
+ endpoint: 'claude/prompt_history',
25
+ status: 200,
26
+ ts: asString(entry.timestamp) ?? '',
27
+ key: asString(entry.sessionId) ?? undefined,
28
+ source: `claude:session=${asString(entry.sessionId)}:cwd=${asString(entry.cwd)}:entrypoint=${asString(entry.entrypoint)}`,
29
+ requested_model: undefined,
30
+ actual_model: undefined,
31
+ stream: false,
32
+ thinking_level: undefined,
33
+ reasoning_summary_mode: 'claude_prompt_only',
34
+ thinking_type: 'prompt_history_only',
35
+ tool_spec_count: 0,
36
+ tool_choice: { mode: 'prompt_only' },
37
+ request_contains_non_text_content: false,
38
+ request_image_block_count: 0,
39
+ request_video_block_count: 0,
40
+ request_tool_call_block_count: 0,
41
+ request_tool_result_block_count: 0,
42
+ request_thinking_block_count: 0,
43
+ response_contains_non_text_content: false,
44
+ response_image_block_count: 0,
45
+ response_video_block_count: 0,
46
+ response_tool_call_block_count: 0,
47
+ response_tool_result_block_count: 0,
48
+ response_thinking_block_count: 0,
49
+ request_truncated: false,
50
+ response_truncated: false,
51
+ lossy_source: true,
52
+ lossy_reasons: ['prompt_history_only', 'assistant_trace_unavailable'],
53
+ },
54
+ }));
55
+ }
56
+ }
57
+ return records;
58
+ }
59
+ function asString(value) {
60
+ return typeof value === 'string' ? value : undefined;
61
+ }
62
+ function isRecord(value) {
63
+ return Boolean(value) && typeof value === 'object' && !Array.isArray(value);
64
+ }
@@ -0,0 +1,2 @@
1
+ import { type Qwen35Record } from '../schemas/qwen35.js';
2
+ export declare function collectCodexRecords(root: string): Promise<Qwen35Record[]>;