@ekairos/dataset 1.22.34-beta.development.0 → 1.22.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +347 -0
  2. package/dist/agents.d.ts +8 -0
  3. package/dist/agents.js +8 -0
  4. package/dist/builder/agentMaterializers.d.ts +9 -0
  5. package/dist/builder/agentMaterializers.js +10 -0
  6. package/dist/builder/context.d.ts +15 -0
  7. package/dist/builder/context.js +251 -0
  8. package/dist/builder/instructions.d.ts +5 -0
  9. package/dist/builder/instructions.js +40 -0
  10. package/dist/builder/materialize.d.ts +83 -0
  11. package/dist/builder/materialize.js +548 -0
  12. package/dist/builder/materializeQuery.d.ts +12 -0
  13. package/dist/builder/materializeQuery.js +31 -0
  14. package/dist/builder/persistence.d.ts +22 -0
  15. package/dist/builder/persistence.js +153 -0
  16. package/dist/builder/rows.d.ts +7 -0
  17. package/dist/builder/rows.js +56 -0
  18. package/dist/builder/schemaInference.d.ts +3 -0
  19. package/dist/builder/schemaInference.js +61 -0
  20. package/dist/builder/types.d.ts +140 -0
  21. package/dist/builder/types.js +1 -0
  22. package/dist/clearDataset.tool.d.ts +2 -3
  23. package/dist/clearDataset.tool.js +13 -17
  24. package/dist/completeDataset.steps.d.ts +117 -0
  25. package/dist/completeDataset.steps.js +487 -0
  26. package/dist/completeDataset.tool.d.ts +132 -7
  27. package/dist/completeDataset.tool.js +46 -192
  28. package/dist/contextResources.d.ts +31 -0
  29. package/dist/contextResources.js +151 -0
  30. package/dist/contextWorkspace.d.ts +79 -0
  31. package/dist/contextWorkspace.js +234 -0
  32. package/dist/dataset/steps.d.ts +39 -15
  33. package/dist/dataset/steps.js +96 -39
  34. package/dist/dataset.d.ts +3 -67
  35. package/dist/dataset.js +129 -520
  36. package/dist/datasetFiles.d.ts +5 -1
  37. package/dist/datasetFiles.js +29 -27
  38. package/dist/domain.d.ts +1 -2
  39. package/dist/domain.js +1 -6
  40. package/dist/executeCommand.tool.d.ts +2 -30
  41. package/dist/executeCommand.tool.js +165 -39
  42. package/dist/file/file-dataset.agent.d.ts +19 -56
  43. package/dist/file/file-dataset.agent.js +176 -132
  44. package/dist/file/file-dataset.steps.d.ts +27 -0
  45. package/dist/file/file-dataset.steps.js +47 -0
  46. package/dist/file/file-dataset.types.d.ts +64 -0
  47. package/dist/file/file-dataset.types.js +1 -0
  48. package/dist/file/filepreview.d.ts +5 -35
  49. package/dist/file/filepreview.js +60 -107
  50. package/dist/file/filepreview.types.d.ts +31 -0
  51. package/dist/file/filepreview.types.js +1 -0
  52. package/dist/file/generateSchema.tool.d.ts +2 -3
  53. package/dist/file/generateSchema.tool.js +11 -15
  54. package/dist/file/index.d.ts +1 -2
  55. package/dist/file/index.js +1 -18
  56. package/dist/file/prompts.d.ts +2 -3
  57. package/dist/file/prompts.js +134 -27
  58. package/dist/file/scripts.generated.d.ts +1 -0
  59. package/dist/file/scripts.generated.js +11 -0
  60. package/dist/file/steps.d.ts +1 -2
  61. package/dist/file/steps.js +9 -7
  62. package/dist/id.d.ts +1 -0
  63. package/dist/id.js +10 -0
  64. package/dist/index.d.ts +8 -7
  65. package/dist/index.js +8 -23
  66. package/dist/materializeDataset.tool.d.ts +52 -32
  67. package/dist/materializeDataset.tool.js +81 -65
  68. package/dist/query/index.d.ts +1 -2
  69. package/dist/query/index.js +1 -18
  70. package/dist/query/queryDomain.d.ts +3 -4
  71. package/dist/query/queryDomain.js +3 -40
  72. package/dist/query/queryDomain.step.d.ts +1 -1
  73. package/dist/query/queryDomain.step.js +13 -13
  74. package/dist/sandbox/steps.d.ts +23 -15
  75. package/dist/sandbox/steps.js +73 -76
  76. package/dist/sandbox.steps.d.ts +1 -2
  77. package/dist/sandbox.steps.js +1 -18
  78. package/dist/schema.d.ts +13 -13
  79. package/dist/schema.js +25 -37
  80. package/dist/service.d.ts +8 -5
  81. package/dist/service.js +70 -15
  82. package/dist/skill.d.ts +0 -1
  83. package/dist/skill.js +12 -17
  84. package/dist/transform/filepreview.d.ts +2 -3
  85. package/dist/transform/filepreview.js +9 -26
  86. package/dist/transform/index.d.ts +2 -3
  87. package/dist/transform/index.js +2 -8
  88. package/dist/transform/prompts.d.ts +1 -34
  89. package/dist/transform/prompts.js +58 -43
  90. package/dist/transform/transform-dataset.agent.d.ts +20 -45
  91. package/dist/transform/transform-dataset.agent.js +146 -89
  92. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  93. package/dist/transform/transform-dataset.steps.js +61 -0
  94. package/dist/transform/transform-dataset.types.d.ts +95 -0
  95. package/dist/transform/transform-dataset.types.js +1 -0
  96. package/dist/transform/transformDataset.d.ts +3 -3
  97. package/dist/transform/transformDataset.js +15 -18
  98. package/dist/writeDatasetRows.tool.d.ts +188 -0
  99. package/dist/writeDatasetRows.tool.js +258 -0
  100. package/package.json +36 -11
  101. package/dist/clearDataset.tool.d.ts.map +0 -1
  102. package/dist/clearDataset.tool.js.map +0 -1
  103. package/dist/completeDataset.tool.d.ts.map +0 -1
  104. package/dist/completeDataset.tool.js.map +0 -1
  105. package/dist/dataset/steps.d.ts.map +0 -1
  106. package/dist/dataset/steps.js.map +0 -1
  107. package/dist/dataset.d.ts.map +0 -1
  108. package/dist/dataset.js.map +0 -1
  109. package/dist/datasetFiles.d.ts.map +0 -1
  110. package/dist/datasetFiles.js.map +0 -1
  111. package/dist/domain.d.ts.map +0 -1
  112. package/dist/domain.js.map +0 -1
  113. package/dist/executeCommand.tool.d.ts.map +0 -1
  114. package/dist/executeCommand.tool.js.map +0 -1
  115. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  116. package/dist/file/file-dataset.agent.js.map +0 -1
  117. package/dist/file/filepreview.d.ts.map +0 -1
  118. package/dist/file/filepreview.js.map +0 -1
  119. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  120. package/dist/file/generateSchema.tool.js.map +0 -1
  121. package/dist/file/index.d.ts.map +0 -1
  122. package/dist/file/index.js.map +0 -1
  123. package/dist/file/prompts.d.ts.map +0 -1
  124. package/dist/file/prompts.js.map +0 -1
  125. package/dist/file/steps.d.ts.map +0 -1
  126. package/dist/file/steps.js.map +0 -1
  127. package/dist/index.d.ts.map +0 -1
  128. package/dist/index.js.map +0 -1
  129. package/dist/materializeDataset.tool.d.ts.map +0 -1
  130. package/dist/materializeDataset.tool.js.map +0 -1
  131. package/dist/query/index.d.ts.map +0 -1
  132. package/dist/query/index.js.map +0 -1
  133. package/dist/query/queryDomain.d.ts.map +0 -1
  134. package/dist/query/queryDomain.js.map +0 -1
  135. package/dist/query/queryDomain.step.d.ts.map +0 -1
  136. package/dist/query/queryDomain.step.js.map +0 -1
  137. package/dist/sandbox/steps.d.ts.map +0 -1
  138. package/dist/sandbox/steps.js.map +0 -1
  139. package/dist/sandbox.steps.d.ts.map +0 -1
  140. package/dist/sandbox.steps.js.map +0 -1
  141. package/dist/schema.d.ts.map +0 -1
  142. package/dist/schema.js.map +0 -1
  143. package/dist/service.d.ts.map +0 -1
  144. package/dist/service.js.map +0 -1
  145. package/dist/skill.d.ts.map +0 -1
  146. package/dist/skill.js.map +0 -1
  147. package/dist/transform/filepreview.d.ts.map +0 -1
  148. package/dist/transform/filepreview.js.map +0 -1
  149. package/dist/transform/index.d.ts.map +0 -1
  150. package/dist/transform/index.js.map +0 -1
  151. package/dist/transform/prompts.d.ts.map +0 -1
  152. package/dist/transform/prompts.js.map +0 -1
  153. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  154. package/dist/transform/transform-dataset.agent.js.map +0 -1
  155. package/dist/transform/transformDataset.d.ts.map +0 -1
  156. package/dist/transform/transformDataset.js.map +0 -1
package/dist/dataset.js CHANGED
@@ -1,506 +1,63 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.dataset = dataset;
7
- const admin_1 = require("@instantdb/admin");
8
- const runtime_1 = require("@ekairos/domain/runtime");
9
- const runtime_2 = require("@ekairos/events/runtime");
10
- const ajv_1 = __importDefault(require("ajv"));
11
- const file_dataset_agent_1 = require("./file/file-dataset.agent");
12
- const transform_dataset_agent_1 = require("./transform/transform-dataset.agent");
13
- const service_1 = require("./service");
14
- const ajv = new ajv_1.default({ allErrors: true, strict: false });
15
- function defaultTextSourceName(source) {
16
- if (source.name?.trim())
17
- return source.name.trim();
18
- const mimeType = String(source.mimeType ?? "").toLowerCase();
19
- if (mimeType.includes("csv"))
20
- return "source.csv";
21
- if (mimeType.includes("json"))
22
- return "source.json";
23
- if (mimeType.includes("yaml") || mimeType.includes("yml"))
24
- return "source.yaml";
25
- return "source.txt";
26
- }
27
- function inferJsonSchemaType(value) {
28
- if (value === null)
29
- return { type: "null" };
30
- if (Array.isArray(value))
31
- return { type: "array" };
32
- switch (typeof value) {
33
- case "number":
34
- return { type: "number" };
35
- case "boolean":
36
- return { type: "boolean" };
37
- case "object":
38
- return { type: "object", additionalProperties: true };
39
- default:
40
- return { type: "string" };
41
- }
42
- }
43
- function inferDatasetSchema(rows, title = "DatasetRow", description = "One dataset row") {
44
- const properties = {};
45
- const required = [];
46
- const keys = new Set();
47
- for (const row of rows) {
48
- if (!row || typeof row !== "object")
49
- continue;
50
- for (const key of Object.keys(row)) {
51
- keys.add(key);
52
- }
53
- }
54
- for (const key of keys) {
55
- const values = rows.map((row) => (row && typeof row === "object" ? row[key] : undefined));
56
- const firstDefined = values.find((value) => value !== undefined);
57
- properties[key] = {
58
- ...inferJsonSchemaType(firstDefined),
59
- description: `${key} value`,
60
- };
61
- if (values.every((value) => value !== undefined)) {
62
- required.push(key);
63
- }
64
- }
65
- return {
66
- title,
67
- description,
68
- schema: {
69
- type: "object",
70
- additionalProperties: false,
71
- properties,
72
- required,
73
- },
74
- };
75
- }
76
- function validateRows(rows, schema) {
77
- const validator = ajv.compile(schema.schema);
78
- for (const row of rows) {
79
- const valid = validator(row);
80
- if (!valid) {
81
- const error = validator.errors?.map((entry) => entry.message || "validation_error").join("; ");
82
- throw new Error(error || "dataset_schema_validation_failed");
83
- }
84
- }
85
- }
86
- function rowsToJsonl(rows) {
87
- return rows
88
- .map((row) => JSON.stringify({
89
- type: "row",
90
- data: row,
91
- }))
92
- .join("\n")
93
- .concat(rows.length > 0 ? "\n" : "");
94
- }
95
- function normalizeQueryRows(result) {
96
- if (!result || typeof result !== "object")
97
- return [];
98
- const entries = Object.entries(result);
99
- if (entries.length === 0)
100
- return [];
101
- if (entries.length === 1) {
102
- const [key, value] = entries[0];
103
- if (Array.isArray(value)) {
104
- return value.map((row) => (row && typeof row === "object" ? row : { value: row }));
105
- }
106
- if (value && typeof value === "object") {
107
- return [value];
108
- }
109
- return [{ [key]: value }];
110
- }
111
- const rows = [];
112
- for (const [key, value] of entries) {
113
- if (Array.isArray(value)) {
114
- for (const row of value) {
115
- if (row && typeof row === "object") {
116
- rows.push({ __entity: key, ...row });
117
- }
118
- else {
119
- rows.push({ __entity: key, value: row });
120
- }
121
- }
122
- continue;
123
- }
124
- if (value && typeof value === "object") {
125
- rows.push({ __entity: key, ...value });
126
- continue;
127
- }
128
- rows.push({ __entity: key, value });
129
- }
130
- return rows;
131
- }
132
- function getDomainDescriptor(domain) {
133
- const meta = domain?.meta ?? {};
134
- const context = typeof domain?.context === "function" ? domain.context() : {};
135
- const name = String(meta?.name ?? context?.name ?? "domain");
136
- const packageName = String(meta?.packageName ?? "");
137
- return {
138
- domainName: name,
139
- ...(packageName ? { domainPackageName: packageName } : {}),
140
- };
141
- }
142
- function makeIntermediateDatasetId(targetDatasetId, sourceKind, index) {
143
- return `${targetDatasetId}__${sourceKind}_${index}`;
144
- }
145
- function buildFileDefaultInstructions(schema) {
146
- if (schema) {
147
- return "Create a dataset from the source file and ensure each output row matches the provided dataset schema exactly.";
148
- }
149
- return "Create a dataset representing the source content as structured rows.";
150
- }
151
- function buildRawSourceInstructions(sourceKind) {
152
- if (sourceKind === "text") {
153
- return "Create a dataset representing the raw text content as structured rows without applying business transformations.";
154
- }
155
- return "Create a dataset representing the raw file content as structured rows without applying business transformations.";
156
- }
157
- function buildTransformInstructions(sourceCount, userInstructions, schema) {
158
- const explicit = String(userInstructions ?? "").trim();
159
- if (explicit)
160
- return explicit;
161
- if (sourceCount > 1) {
162
- if (schema) {
163
- return "Combine the source datasets into a new dataset that matches the provided output schema exactly.";
164
- }
165
- return "Combine the source datasets into one coherent dataset.";
166
- }
167
- if (schema) {
168
- return "Transform the source dataset into a new dataset that matches the provided output schema exactly.";
169
- }
170
- return "Transform the source dataset into a new useful dataset.";
171
- }
172
- async function getDatasetDb(env) {
173
- const runtime = (await (0, runtime_2.getContextRuntime)(env));
174
- return runtime.db;
175
- }
176
- async function createOrUpdateDatasetMetadata(env, params) {
177
- const db = await getDatasetDb(env);
178
- const service = new service_1.DatasetService(db);
179
- const result = await service.createDataset({
180
- id: params.datasetId,
181
- title: params.title ?? params.datasetId,
182
- instructions: params.instructions ?? "",
183
- sources: params.sources,
184
- sourceKinds: params.sourceKinds,
185
- analysis: params.analysis,
186
- schema: params.schema,
187
- status: params.status ?? "building",
188
- organizationId: env.orgId,
189
- });
190
- if (!result.ok) {
191
- throw new Error(result.error);
192
- }
193
- }
194
- async function materializeRowsToDataset(env, params) {
195
- if (params.first && params.rows.length > 1) {
196
- throw new Error("dataset_first_expected_zero_or_one_row");
197
- }
198
- const resolvedSchema = params.schema ??
199
- inferDatasetSchema(params.rows, params.title ? `${params.title}Row` : "DatasetRow", params.title ? `One row for ${params.title}` : "One dataset row");
200
- validateRows(params.rows, resolvedSchema);
201
- await createOrUpdateDatasetMetadata(env, {
202
- datasetId: params.datasetId,
203
- sandboxId: params.sandboxId,
204
- title: params.title,
205
- instructions: params.instructions,
206
- sources: params.sources,
207
- sourceKinds: params.sourceKinds,
208
- analysis: params.analysis,
209
- schema: resolvedSchema,
210
- status: "building",
211
- });
212
- const db = await getDatasetDb(env);
213
- const service = new service_1.DatasetService(db);
214
- const uploadResult = await service.uploadDatasetOutputFile({
215
- datasetId: params.datasetId,
216
- fileBuffer: Buffer.from(rowsToJsonl(params.rows), "utf-8"),
217
- });
218
- if (!uploadResult.ok) {
219
- throw new Error(uploadResult.error);
220
- }
221
- const statusResult = await service.updateDatasetStatus({
222
- datasetId: params.datasetId,
223
- status: "completed",
224
- calculatedTotalRows: params.rows.length,
225
- actualGeneratedRowCount: params.rows.length,
226
- });
227
- if (!statusResult.ok) {
228
- throw new Error(statusResult.error);
229
- }
230
- return params.datasetId;
231
- }
232
- async function uploadInlineTextSource(env, datasetId, source) {
233
- const db = await getDatasetDb(env);
234
- const fileName = defaultTextSourceName(source);
235
- const storagePath = `/dataset/source/${datasetId}/${Date.now()}-${fileName}`;
236
- const uploadResult = await db.storage.uploadFile(storagePath, Buffer.from(source.text, "utf-8"), {
237
- contentType: source.mimeType ?? "text/plain",
238
- contentDisposition: fileName,
239
- });
240
- const fileId = uploadResult?.data?.id;
241
- if (!fileId) {
242
- throw new Error("dataset_text_source_upload_failed");
243
- }
244
- return fileId;
245
- }
246
- async function finalizeBuildResult(env, datasetId, withFirst) {
247
- const db = await getDatasetDb(env);
248
- const service = new service_1.DatasetService(db);
249
- const datasetResult = await service.getDatasetById(datasetId);
250
- if (!datasetResult.ok) {
251
- throw new Error(datasetResult.error);
252
- }
253
- const previewResult = await service.previewRows(datasetId, 20);
254
- if (!previewResult.ok) {
255
- throw new Error(previewResult.error);
256
- }
257
- const reader = {
258
- async read(cursorOrParams, limit) {
259
- const params = typeof cursorOrParams === "object" && cursorOrParams !== null
260
- ? cursorOrParams
261
- : { cursor: cursorOrParams, limit };
262
- const rowsResult = await service.readRows({
263
- datasetId,
264
- cursor: params.cursor,
265
- limit: params.limit,
266
- });
267
- if (!rowsResult.ok) {
268
- throw new Error(rowsResult.error);
269
- }
270
- return rowsResult.data;
271
- },
272
- };
273
- if (!withFirst) {
274
- return {
275
- datasetId,
276
- dataset: datasetResult.data,
277
- previewRows: previewResult.data,
278
- reader,
279
- };
280
- }
281
- const firstResult = await service.readOne(datasetId);
282
- if (!firstResult.ok) {
283
- throw new Error(firstResult.error);
284
- }
285
- return {
286
- datasetId,
287
- dataset: datasetResult.data,
288
- previewRows: previewResult.data,
289
- reader,
290
- firstRow: firstResult.data,
291
- };
292
- }
293
- async function materializeQuerySource(env, source, params) {
294
- const runtime = await (0, runtime_1.resolveRuntime)(source.domain, env);
295
- const result = await runtime.db.query(source.query);
296
- const rows = normalizeQueryRows(result);
297
- const domainDescriptor = getDomainDescriptor(source.domain);
298
- return await materializeRowsToDataset(env, {
299
- datasetId: params.datasetId,
300
- sandboxId: params.sandboxId,
301
- title: params.title ?? source.title,
302
- instructions: params.instructions,
303
- sources: [
304
- {
305
- kind: "query",
306
- query: source.query,
307
- title: source.title,
308
- explanation: source.explanation,
309
- ...domainDescriptor,
310
- },
311
- ],
312
- sourceKinds: ["query"],
313
- analysis: {
314
- query: source.query,
315
- explanation: source.explanation,
316
- ...domainDescriptor,
317
- },
318
- rows,
319
- schema: params.schema,
320
- inferSchema: !params.schema,
321
- first: params.first,
322
- });
323
- }
324
- async function materializeSingleFileLikeSource(state, source, targetDatasetId) {
325
- if (!state.reactor) {
326
- throw new Error("dataset_reactor_required");
327
- }
328
- if (!state.sandboxId) {
329
- throw new Error("dataset_sandbox_required");
330
- }
331
- const fileId = source.kind === "file"
332
- ? source.fileId
333
- : await uploadInlineTextSource(state.env, targetDatasetId, source);
334
- await createOrUpdateDatasetMetadata(state.env, {
335
- datasetId: targetDatasetId,
336
- sandboxId: state.sandboxId,
337
- title: state.title ?? targetDatasetId,
338
- instructions: state.instructions,
339
- sources: [
340
- source.kind === "file"
341
- ? { kind: "file", fileId: source.fileId, description: source.description }
342
- : {
343
- kind: "text",
344
- mimeType: source.mimeType,
345
- name: source.name,
346
- description: source.description,
347
- },
348
- ],
349
- sourceKinds: [source.kind],
350
- schema: state.outputSchema,
351
- status: "building",
352
- });
353
- const parseStory = (0, file_dataset_agent_1.createFileParseStory)(fileId, {
354
- datasetId: targetDatasetId,
355
- instructions: state.instructions ?? buildFileDefaultInstructions(state.outputSchema),
356
- reactor: state.reactor,
357
- sandboxId: state.sandboxId,
358
- });
359
- await parseStory.parse(state.env);
360
- if (!state.outputSchema) {
361
- const db = await getDatasetDb(state.env);
362
- const service = new service_1.DatasetService(db);
363
- const readResult = await service.readRows({ datasetId: targetDatasetId, cursor: 0, limit: 1000 });
364
- if (!readResult.ok) {
365
- throw new Error(readResult.error);
366
- }
367
- const inferred = inferDatasetSchema(readResult.data.rows, `${targetDatasetId}Row`, "One dataset row");
368
- const updateResult = await service.updateDatasetSchema({
369
- datasetId: targetDatasetId,
370
- schema: inferred,
371
- status: "completed",
372
- });
373
- if (!updateResult.ok) {
374
- throw new Error(updateResult.error);
375
- }
376
- }
377
- if (state.first) {
378
- const db = await getDatasetDb(state.env);
379
- const service = new service_1.DatasetService(db);
380
- const firstResult = await service.readOne(targetDatasetId);
381
- if (!firstResult.ok) {
382
- throw new Error(firstResult.error);
383
- }
384
- }
385
- return targetDatasetId;
386
- }
387
- async function normalizeSourceToDatasetId(state, source, targetDatasetId, sourceIndex) {
388
- if (source.kind === "dataset") {
389
- return source.datasetId;
390
- }
391
- const intermediateDatasetId = makeIntermediateDatasetId(targetDatasetId, source.kind, sourceIndex);
392
- if (source.kind === "query") {
393
- await materializeQuerySource(state.env, source, {
394
- datasetId: intermediateDatasetId,
395
- sandboxId: state.sandboxId,
396
- title: source.title,
397
- first: false,
398
- });
399
- return intermediateDatasetId;
400
- }
401
- await materializeSingleFileLikeSource({
402
- ...state,
403
- outputSchema: undefined,
404
- first: false,
405
- instructions: buildRawSourceInstructions(source.kind),
406
- }, source, intermediateDatasetId);
407
- return intermediateDatasetId;
408
- }
409
- async function materializeDerivedDataset(state, targetDatasetId) {
410
- if (!state.reactor) {
411
- throw new Error("dataset_reactor_required");
412
- }
413
- if (!state.sandboxId) {
414
- throw new Error("dataset_sandbox_required");
415
- }
416
- const normalizedSources = [];
417
- for (let index = 0; index < state.sources.length; index++) {
418
- normalizedSources.push(await normalizeSourceToDatasetId(state, state.sources[index], targetDatasetId, index));
419
- }
420
- const transformSchema = state.outputSchema ??
421
- {
422
- title: "DatasetRow",
423
- description: "One dataset row",
424
- schema: {
425
- type: "object",
426
- additionalProperties: true,
427
- properties: {},
428
- },
429
- };
430
- await createOrUpdateDatasetMetadata(state.env, {
431
- datasetId: targetDatasetId,
432
- sandboxId: state.sandboxId,
433
- title: state.title ?? targetDatasetId,
434
- instructions: state.instructions,
435
- sources: state.sources.map((source) => source.kind === "query"
436
- ? {
437
- kind: "query",
438
- query: source.query,
439
- title: source.title,
440
- explanation: source.explanation,
441
- ...getDomainDescriptor(source.domain),
442
- }
443
- : source),
444
- sourceKinds: state.sources.map((source) => source.kind),
445
- schema: transformSchema,
446
- status: "building",
447
- });
448
- const transformStory = (0, transform_dataset_agent_1.createTransformDatasetStory)({
449
- sourceDatasetIds: normalizedSources,
450
- outputSchema: transformSchema,
451
- instructions: buildTransformInstructions(normalizedSources.length, state.instructions, state.outputSchema),
452
- datasetId: targetDatasetId,
453
- reactor: state.reactor,
454
- sandboxId: state.sandboxId,
455
- });
456
- await transformStory.transform(state.env);
457
- const db = await getDatasetDb(state.env);
458
- const service = new service_1.DatasetService(db);
459
- if (!state.outputSchema) {
460
- const readResult = await service.readRows({ datasetId: targetDatasetId, cursor: 0, limit: 1000 });
461
- if (!readResult.ok) {
462
- throw new Error(readResult.error);
463
- }
464
- const inferred = inferDatasetSchema(readResult.data.rows, `${targetDatasetId}Row`, "One dataset row");
465
- const updateResult = await service.updateDatasetSchema({
466
- datasetId: targetDatasetId,
467
- schema: inferred,
468
- status: "completed",
469
- });
470
- if (!updateResult.ok) {
471
- throw new Error(updateResult.error);
472
- }
473
- }
474
- if (state.first) {
475
- const firstResult = await service.readOne(targetDatasetId);
476
- if (!firstResult.ok) {
477
- throw new Error(firstResult.error);
478
- }
479
- }
480
- return targetDatasetId;
481
- }
482
- function dataset(env) {
1
+ import { buildObjectOutputInstructions } from "./builder/instructions.js";
2
+ import { resolveDatasetResourceContext } from "./builder/context.js";
3
+ import { createDatasetId } from "./id.js";
4
+ import { completeDatasetStep, materializeDerivedDataset, materializeSingleFileLikeResource, } from "./builder/materialize.js";
5
+ import { materializeQueryResource } from "./builder/materializeQuery.js";
6
+ import { createDatasetBuildResult, finalizeBuildResult, } from "./builder/persistence.js";
7
+ export function dataset(runtime, options = {}) {
8
+ const datasetId = normalizeDatasetId(options.datasetId);
9
+ const typedRuntime = runtime;
483
10
  const state = {
484
- env,
485
- sources: [],
11
+ runtime: typedRuntime,
12
+ env: typedRuntime.env,
13
+ resources: [],
14
+ output: "rows",
486
15
  inferSchema: false,
16
+ durable: options.durable,
487
17
  first: false,
488
18
  };
489
19
  const api = {
490
- fromFile(source) {
491
- state.sources.push({ kind: "file", ...source });
20
+ datasetId,
21
+ fromFile(resource) {
22
+ state.resources.push({ kind: "file", ...resource });
492
23
  return api;
493
24
  },
494
- fromText(source) {
495
- state.sources.push({ kind: "text", ...source });
25
+ fromText(resource) {
26
+ state.resources.push({ kind: "text", ...resource });
496
27
  return api;
497
28
  },
498
- fromDataset(source) {
499
- state.sources.push({ kind: "dataset", ...source });
29
+ fromDataset(resource) {
30
+ state.resources.push({ kind: "dataset", ...resource });
500
31
  return api;
501
32
  },
502
- fromQuery(domain, source) {
503
- state.sources.push({ kind: "query", domain, ...source });
33
+ fromContext(context) {
34
+ state.resources.push({ kind: "context", ...context });
35
+ return api;
36
+ },
37
+ from(...resources) {
38
+ for (const resource of resources) {
39
+ if ("kind" in resource) {
40
+ state.resources.push(resource);
41
+ continue;
42
+ }
43
+ if ("fileId" in resource) {
44
+ state.resources.push({ kind: "file", ...resource });
45
+ continue;
46
+ }
47
+ if ("datasetId" in resource) {
48
+ state.resources.push({ kind: "dataset", ...resource });
49
+ continue;
50
+ }
51
+ if ("id" in resource || "key" in resource) {
52
+ state.resources.push({ kind: "context", ...resource });
53
+ continue;
54
+ }
55
+ state.resources.push({ kind: "text", ...resource });
56
+ }
57
+ return api;
58
+ },
59
+ fromQuery(domain, resource) {
60
+ state.resources.push({ kind: "query", domain, ...resource });
504
61
  return api;
505
62
  },
506
63
  title(title) {
@@ -521,6 +78,20 @@ function dataset(env) {
521
78
  state.inferSchema = true;
522
79
  return api;
523
80
  },
81
+ auto() {
82
+ state.outputSchema = undefined;
83
+ state.inferSchema = true;
84
+ return api;
85
+ },
86
+ asRows() {
87
+ state.output = "rows";
88
+ return api;
89
+ },
90
+ asObject() {
91
+ state.output = "object";
92
+ state.first = true;
93
+ return api;
94
+ },
524
95
  instructions(instructions) {
525
96
  state.instructions = instructions;
526
97
  return api;
@@ -534,44 +105,82 @@ function dataset(env) {
534
105
  return api;
535
106
  },
536
107
  async build(options) {
537
- if (state.sources.length === 0) {
538
- throw new Error("dataset_sources_required");
108
+ if (state.resources.length === 0) {
109
+ throw new Error("dataset_resources_required");
539
110
  }
540
- const targetDatasetId = String(options?.datasetId ?? (0, admin_1.id)());
541
- const onlySource = state.sources[0];
542
- const isSingleSource = state.sources.length === 1;
543
- const hasInstructions = Boolean(String(state.instructions ?? "").trim());
544
- if (isSingleSource && onlySource.kind === "query" && !hasInstructions) {
545
- await materializeQuerySource(state.env, onlySource, {
111
+ const targetDatasetId = options?.datasetId
112
+ ? normalizeDatasetId(options.datasetId)
113
+ : datasetId;
114
+ const stateWithBuildOptions = {
115
+ ...state,
116
+ durable: options?.durable ?? state.durable,
117
+ };
118
+ const context = await resolveDatasetResourceContext(typedRuntime, targetDatasetId, stateWithBuildOptions.resources);
119
+ stateWithBuildOptions.resources = context.resources;
120
+ stateWithBuildOptions.contextId = context.contextId;
121
+ stateWithBuildOptions.contextResources = context.contextResources;
122
+ const effectiveState = stateWithBuildOptions.output === "object"
123
+ ? {
124
+ ...stateWithBuildOptions,
125
+ first: true,
126
+ instructions: buildObjectOutputInstructions(stateWithBuildOptions.instructions),
127
+ }
128
+ : stateWithBuildOptions;
129
+ const onlyResource = effectiveState.resources[0];
130
+ const isSingleResource = effectiveState.resources.length === 1;
131
+ const hasInstructions = Boolean(String(effectiveState.instructions ?? "").trim());
132
+ if (isSingleResource && onlyResource.kind === "query" && !hasInstructions) {
133
+ await materializeQueryResource(effectiveState.runtime, onlyResource, {
546
134
  datasetId: targetDatasetId,
547
- sandboxId: state.sandboxId,
548
- schema: state.outputSchema,
549
- title: state.title ?? onlySource.title,
550
- instructions: state.instructions,
551
- first: state.first,
135
+ sandboxId: effectiveState.sandboxId,
136
+ schema: effectiveState.outputSchema,
137
+ title: effectiveState.title ?? onlyResource.title,
138
+ instructions: effectiveState.instructions,
139
+ first: effectiveState.first,
140
+ contextId: effectiveState.contextId ?? "",
552
141
  });
553
- return await finalizeBuildResult(state.env, targetDatasetId, state.first);
142
+ return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
554
143
  }
555
- if (isSingleSource && (onlySource.kind === "file" || onlySource.kind === "text")) {
556
- if (!state.sandboxId) {
557
- throw new Error("dataset_sandbox_required");
558
- }
559
- if (!state.reactor) {
144
+ if (isSingleResource && (onlyResource.kind === "file" || onlyResource.kind === "text")) {
145
+ if (!effectiveState.reactor) {
560
146
  throw new Error("dataset_reactor_required");
561
147
  }
562
- await materializeSingleFileLikeSource(state, onlySource, targetDatasetId);
563
- return await finalizeBuildResult(state.env, targetDatasetId, state.first);
564
- }
565
- if (!state.sandboxId) {
566
- throw new Error("dataset_sandbox_required");
148
+ await materializeSingleFileLikeResource(effectiveState, onlyResource, targetDatasetId);
149
+ const completed = await completeDatasetStep({
150
+ runtime: effectiveState.runtime,
151
+ datasetId: targetDatasetId,
152
+ schema: effectiveState.outputSchema,
153
+ first: effectiveState.first,
154
+ });
155
+ return finalizeOutputResult(createDatasetBuildResult(effectiveState.runtime, completed), effectiveState.output);
567
156
  }
568
- if (!state.reactor) {
157
+ if (!effectiveState.reactor) {
569
158
  throw new Error("dataset_reactor_required");
570
159
  }
571
- await materializeDerivedDataset(state, targetDatasetId);
572
- return await finalizeBuildResult(state.env, targetDatasetId, state.first);
160
+ await materializeDerivedDataset(effectiveState, targetDatasetId);
161
+ const completed = await completeDatasetStep({
162
+ runtime: effectiveState.runtime,
163
+ datasetId: targetDatasetId,
164
+ schema: effectiveState.outputSchema,
165
+ first: effectiveState.first,
166
+ });
167
+ return finalizeOutputResult(createDatasetBuildResult(effectiveState.runtime, completed), effectiveState.output);
573
168
  },
574
169
  };
575
170
  return api;
576
171
  }
577
- //# sourceMappingURL=dataset.js.map
172
+ function normalizeDatasetId(datasetId) {
173
+ const normalized = String(datasetId ?? createDatasetId()).trim();
174
+ if (!normalized) {
175
+ throw new Error("dataset_id_required");
176
+ }
177
+ return normalized;
178
+ }
179
+ function finalizeOutputResult(result, output) {
180
+ if (output !== "object")
181
+ return result;
182
+ return {
183
+ ...result,
184
+ object: result.firstRow ?? null,
185
+ };
186
+ }