@ekairos/dataset 1.22.39-beta.development.0 → 1.22.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/README.md +347 -0
  2. package/dist/agents.d.ts +8 -0
  3. package/dist/agents.js +8 -0
  4. package/dist/builder/agentMaterializers.d.ts +9 -0
  5. package/dist/builder/agentMaterializers.js +10 -0
  6. package/dist/builder/context.d.ts +15 -0
  7. package/dist/builder/context.js +251 -0
  8. package/dist/builder/instructions.d.ts +5 -0
  9. package/dist/builder/instructions.js +40 -0
  10. package/dist/builder/materialize.d.ts +83 -0
  11. package/dist/builder/materialize.js +548 -0
  12. package/dist/builder/materializeQuery.d.ts +12 -0
  13. package/dist/builder/materializeQuery.js +31 -0
  14. package/dist/builder/persistence.d.ts +22 -0
  15. package/dist/builder/persistence.js +192 -0
  16. package/dist/builder/rows.d.ts +7 -0
  17. package/dist/builder/rows.js +56 -0
  18. package/dist/builder/schemaInference.d.ts +3 -0
  19. package/dist/builder/schemaInference.js +61 -0
  20. package/dist/builder/types.d.ts +144 -0
  21. package/dist/builder/types.js +1 -0
  22. package/dist/clearDataset.tool.d.ts +2 -3
  23. package/dist/clearDataset.tool.js +13 -17
  24. package/dist/completeDataset.steps.d.ts +117 -0
  25. package/dist/completeDataset.steps.js +537 -0
  26. package/dist/completeDataset.tool.d.ts +132 -7
  27. package/dist/completeDataset.tool.js +46 -192
  28. package/dist/contextResources.d.ts +31 -0
  29. package/dist/contextResources.js +151 -0
  30. package/dist/contextWorkspace.d.ts +79 -0
  31. package/dist/contextWorkspace.js +234 -0
  32. package/dist/dataset/steps.d.ts +39 -15
  33. package/dist/dataset/steps.js +96 -39
  34. package/dist/dataset.d.ts +3 -67
  35. package/dist/dataset.js +129 -521
  36. package/dist/datasetFiles.d.ts +5 -1
  37. package/dist/datasetFiles.js +29 -27
  38. package/dist/defineNotation.tool.d.ts +49 -0
  39. package/dist/defineNotation.tool.js +154 -0
  40. package/dist/domain.d.ts +1 -2
  41. package/dist/domain.js +1 -6
  42. package/dist/executeCommand.tool.d.ts +2 -30
  43. package/dist/executeCommand.tool.js +165 -39
  44. package/dist/file/file-dataset.agent.d.ts +19 -56
  45. package/dist/file/file-dataset.agent.js +181 -134
  46. package/dist/file/file-dataset.steps.d.ts +27 -0
  47. package/dist/file/file-dataset.steps.js +47 -0
  48. package/dist/file/file-dataset.types.d.ts +64 -0
  49. package/dist/file/file-dataset.types.js +1 -0
  50. package/dist/file/filepreview.d.ts +5 -35
  51. package/dist/file/filepreview.js +60 -107
  52. package/dist/file/filepreview.types.d.ts +31 -0
  53. package/dist/file/filepreview.types.js +1 -0
  54. package/dist/file/generateSchema.tool.d.ts +2 -3
  55. package/dist/file/generateSchema.tool.js +11 -15
  56. package/dist/file/index.d.ts +1 -2
  57. package/dist/file/index.js +1 -18
  58. package/dist/file/prompts.d.ts +2 -3
  59. package/dist/file/prompts.js +152 -32
  60. package/dist/file/scripts.generated.d.ts +1 -0
  61. package/dist/file/scripts.generated.js +11 -0
  62. package/dist/file/steps.d.ts +1 -2
  63. package/dist/file/steps.js +9 -7
  64. package/dist/id.d.ts +1 -0
  65. package/dist/id.js +10 -0
  66. package/dist/index.d.ts +9 -7
  67. package/dist/index.js +9 -23
  68. package/dist/materializeDataset.tool.d.ts +51 -31
  69. package/dist/materializeDataset.tool.js +81 -65
  70. package/dist/notation.d.ts +205 -0
  71. package/dist/notation.js +424 -0
  72. package/dist/query/index.d.ts +1 -2
  73. package/dist/query/index.js +1 -18
  74. package/dist/query/queryDomain.d.ts +3 -4
  75. package/dist/query/queryDomain.js +3 -40
  76. package/dist/query/queryDomain.step.d.ts +1 -1
  77. package/dist/query/queryDomain.step.js +24 -13
  78. package/dist/sandbox/steps.d.ts +23 -15
  79. package/dist/sandbox/steps.js +73 -76
  80. package/dist/sandbox.steps.d.ts +1 -2
  81. package/dist/sandbox.steps.js +1 -18
  82. package/dist/schema.d.ts +15 -13
  83. package/dist/schema.js +27 -37
  84. package/dist/service.d.ts +12 -5
  85. package/dist/service.js +88 -15
  86. package/dist/skill.d.ts +0 -1
  87. package/dist/skill.js +12 -17
  88. package/dist/transform/filepreview.d.ts +2 -3
  89. package/dist/transform/filepreview.js +9 -26
  90. package/dist/transform/index.d.ts +2 -3
  91. package/dist/transform/index.js +2 -8
  92. package/dist/transform/prompts.d.ts +1 -34
  93. package/dist/transform/prompts.js +66 -46
  94. package/dist/transform/transform-dataset.agent.d.ts +20 -45
  95. package/dist/transform/transform-dataset.agent.js +151 -91
  96. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  97. package/dist/transform/transform-dataset.steps.js +61 -0
  98. package/dist/transform/transform-dataset.types.d.ts +95 -0
  99. package/dist/transform/transform-dataset.types.js +1 -0
  100. package/dist/transform/transformDataset.d.ts +3 -3
  101. package/dist/transform/transformDataset.js +15 -18
  102. package/dist/writeDatasetRows.tool.d.ts +188 -0
  103. package/dist/writeDatasetRows.tool.js +258 -0
  104. package/package.json +33 -8
  105. package/dist/clearDataset.tool.d.ts.map +0 -1
  106. package/dist/clearDataset.tool.js.map +0 -1
  107. package/dist/completeDataset.tool.d.ts.map +0 -1
  108. package/dist/completeDataset.tool.js.map +0 -1
  109. package/dist/dataset/steps.d.ts.map +0 -1
  110. package/dist/dataset/steps.js.map +0 -1
  111. package/dist/dataset.d.ts.map +0 -1
  112. package/dist/dataset.js.map +0 -1
  113. package/dist/datasetFiles.d.ts.map +0 -1
  114. package/dist/datasetFiles.js.map +0 -1
  115. package/dist/domain.d.ts.map +0 -1
  116. package/dist/domain.js.map +0 -1
  117. package/dist/eventsReactRuntime.d.ts +0 -22
  118. package/dist/eventsReactRuntime.d.ts.map +0 -1
  119. package/dist/eventsReactRuntime.js +0 -29
  120. package/dist/eventsReactRuntime.js.map +0 -1
  121. package/dist/executeCommand.tool.d.ts.map +0 -1
  122. package/dist/executeCommand.tool.js.map +0 -1
  123. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  124. package/dist/file/file-dataset.agent.js.map +0 -1
  125. package/dist/file/filepreview.d.ts.map +0 -1
  126. package/dist/file/filepreview.js.map +0 -1
  127. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  128. package/dist/file/generateSchema.tool.js.map +0 -1
  129. package/dist/file/index.d.ts.map +0 -1
  130. package/dist/file/index.js.map +0 -1
  131. package/dist/file/prompts.d.ts.map +0 -1
  132. package/dist/file/prompts.js.map +0 -1
  133. package/dist/file/steps.d.ts.map +0 -1
  134. package/dist/file/steps.js.map +0 -1
  135. package/dist/index.d.ts.map +0 -1
  136. package/dist/index.js.map +0 -1
  137. package/dist/materializeDataset.tool.d.ts.map +0 -1
  138. package/dist/materializeDataset.tool.js.map +0 -1
  139. package/dist/query/index.d.ts.map +0 -1
  140. package/dist/query/index.js.map +0 -1
  141. package/dist/query/queryDomain.d.ts.map +0 -1
  142. package/dist/query/queryDomain.js.map +0 -1
  143. package/dist/query/queryDomain.step.d.ts.map +0 -1
  144. package/dist/query/queryDomain.step.js.map +0 -1
  145. package/dist/sandbox/steps.d.ts.map +0 -1
  146. package/dist/sandbox/steps.js.map +0 -1
  147. package/dist/sandbox.steps.d.ts.map +0 -1
  148. package/dist/sandbox.steps.js.map +0 -1
  149. package/dist/schema.d.ts.map +0 -1
  150. package/dist/schema.js.map +0 -1
  151. package/dist/service.d.ts.map +0 -1
  152. package/dist/service.js.map +0 -1
  153. package/dist/skill.d.ts.map +0 -1
  154. package/dist/skill.js.map +0 -1
  155. package/dist/transform/filepreview.d.ts.map +0 -1
  156. package/dist/transform/filepreview.js.map +0 -1
  157. package/dist/transform/index.d.ts.map +0 -1
  158. package/dist/transform/index.js.map +0 -1
  159. package/dist/transform/prompts.d.ts.map +0 -1
  160. package/dist/transform/prompts.js.map +0 -1
  161. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  162. package/dist/transform/transform-dataset.agent.js.map +0 -1
  163. package/dist/transform/transformDataset.d.ts.map +0 -1
  164. package/dist/transform/transformDataset.js.map +0 -1
package/dist/dataset.js CHANGED
@@ -1,507 +1,63 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.dataset = dataset;
7
- const admin_1 = require("@instantdb/admin");
8
- const runtime_1 = require("@ekairos/domain/runtime");
9
- const runtime_2 = require("@ekairos/events/runtime");
10
- const ajv_1 = __importDefault(require("ajv"));
11
- const file_dataset_agent_1 = require("./file/file-dataset.agent");
12
- const transform_dataset_agent_1 = require("./transform/transform-dataset.agent");
13
- const service_1 = require("./service");
14
- const ajv = new ajv_1.default({ allErrors: true, strict: false });
15
- function defaultTextSourceName(source) {
16
- if (source.name?.trim())
17
- return source.name.trim();
18
- const mimeType = String(source.mimeType ?? "").toLowerCase();
19
- if (mimeType.includes("csv"))
20
- return "source.csv";
21
- if (mimeType.includes("json"))
22
- return "source.json";
23
- if (mimeType.includes("yaml") || mimeType.includes("yml"))
24
- return "source.yaml";
25
- return "source.txt";
26
- }
27
- function inferJsonSchemaType(value) {
28
- if (value === null)
29
- return { type: "null" };
30
- if (Array.isArray(value))
31
- return { type: "array" };
32
- switch (typeof value) {
33
- case "number":
34
- return { type: "number" };
35
- case "boolean":
36
- return { type: "boolean" };
37
- case "object":
38
- return { type: "object", additionalProperties: true };
39
- default:
40
- return { type: "string" };
41
- }
42
- }
43
- function inferDatasetSchema(rows, title = "DatasetRow", description = "One dataset row") {
44
- const properties = {};
45
- const required = [];
46
- const keys = new Set();
47
- for (const row of rows) {
48
- if (!row || typeof row !== "object")
49
- continue;
50
- for (const key of Object.keys(row)) {
51
- keys.add(key);
52
- }
53
- }
54
- for (const key of keys) {
55
- const values = rows.map((row) => (row && typeof row === "object" ? row[key] : undefined));
56
- const firstDefined = values.find((value) => value !== undefined);
57
- properties[key] = {
58
- ...inferJsonSchemaType(firstDefined),
59
- description: `${key} value`,
60
- };
61
- if (values.every((value) => value !== undefined)) {
62
- required.push(key);
63
- }
64
- }
65
- return {
66
- title,
67
- description,
68
- schema: {
69
- type: "object",
70
- additionalProperties: false,
71
- properties,
72
- required,
73
- },
74
- };
75
- }
76
- function validateRows(rows, schema) {
77
- const validator = ajv.compile(schema.schema);
78
- for (const row of rows) {
79
- const valid = validator(row);
80
- if (!valid) {
81
- const error = validator.errors?.map((entry) => entry.message || "validation_error").join("; ");
82
- throw new Error(error || "dataset_schema_validation_failed");
83
- }
84
- }
85
- }
86
- function rowsToJsonl(rows) {
87
- return rows
88
- .map((row) => JSON.stringify({
89
- type: "row",
90
- data: row,
91
- }))
92
- .join("\n")
93
- .concat(rows.length > 0 ? "\n" : "");
94
- }
95
- function normalizeQueryRows(result) {
96
- if (!result || typeof result !== "object")
97
- return [];
98
- const entries = Object.entries(result);
99
- if (entries.length === 0)
100
- return [];
101
- if (entries.length === 1) {
102
- const [key, value] = entries[0];
103
- if (Array.isArray(value)) {
104
- return value.map((row) => (row && typeof row === "object" ? row : { value: row }));
105
- }
106
- if (value && typeof value === "object") {
107
- return [value];
108
- }
109
- return [{ [key]: value }];
110
- }
111
- const rows = [];
112
- for (const [key, value] of entries) {
113
- if (Array.isArray(value)) {
114
- for (const row of value) {
115
- if (row && typeof row === "object") {
116
- rows.push({ __entity: key, ...row });
117
- }
118
- else {
119
- rows.push({ __entity: key, value: row });
120
- }
121
- }
122
- continue;
123
- }
124
- if (value && typeof value === "object") {
125
- rows.push({ __entity: key, ...value });
126
- continue;
127
- }
128
- rows.push({ __entity: key, value });
129
- }
130
- return rows;
131
- }
132
- function getDomainDescriptor(domain) {
133
- const meta = domain?.meta ?? {};
134
- const context = typeof domain?.context === "function" ? domain.context() : {};
135
- const name = String(meta?.name ?? context?.name ?? "domain");
136
- const packageName = String(meta?.packageName ?? "");
137
- return {
138
- domainName: name,
139
- ...(packageName ? { domainPackageName: packageName } : {}),
140
- };
141
- }
142
- function makeIntermediateDatasetId(targetDatasetId, sourceKind, index) {
143
- return `${targetDatasetId}__${sourceKind}_${index}`;
144
- }
145
- function buildFileDefaultInstructions(schema) {
146
- if (schema) {
147
- return "Create a dataset from the source file and ensure each output row matches the provided dataset schema exactly.";
148
- }
149
- return "Create a dataset representing the source content as structured rows.";
150
- }
151
- function buildRawSourceInstructions(sourceKind) {
152
- if (sourceKind === "text") {
153
- return "Create a dataset representing the raw text content as structured rows without applying business transformations.";
154
- }
155
- return "Create a dataset representing the raw file content as structured rows without applying business transformations.";
156
- }
157
- function buildTransformInstructions(sourceCount, userInstructions, schema) {
158
- const explicit = String(userInstructions ?? "").trim();
159
- if (explicit)
160
- return explicit;
161
- if (sourceCount > 1) {
162
- if (schema) {
163
- return "Combine the source datasets into a new dataset that matches the provided output schema exactly.";
164
- }
165
- return "Combine the source datasets into one coherent dataset.";
166
- }
167
- if (schema) {
168
- return "Transform the source dataset into a new dataset that matches the provided output schema exactly.";
169
- }
170
- return "Transform the source dataset into a new useful dataset.";
171
- }
172
- async function getDatasetDb(env) {
173
- const runtime = (await (0, runtime_2.getContextRuntime)(env));
174
- return runtime.db;
175
- }
176
- async function createOrUpdateDatasetMetadata(env, params) {
177
- const db = await getDatasetDb(env);
178
- const service = new service_1.DatasetService(db);
179
- const result = await service.createDataset({
180
- id: params.datasetId,
181
- sandboxId: params.sandboxId,
182
- title: params.title ?? params.datasetId,
183
- instructions: params.instructions ?? "",
184
- sources: params.sources,
185
- sourceKinds: params.sourceKinds,
186
- analysis: params.analysis,
187
- schema: params.schema,
188
- status: params.status ?? "building",
189
- organizationId: env.orgId,
190
- });
191
- if (!result.ok) {
192
- throw new Error(result.error);
193
- }
194
- }
195
- async function materializeRowsToDataset(env, params) {
196
- if (params.first && params.rows.length > 1) {
197
- throw new Error("dataset_first_expected_zero_or_one_row");
198
- }
199
- const resolvedSchema = params.schema ??
200
- inferDatasetSchema(params.rows, params.title ? `${params.title}Row` : "DatasetRow", params.title ? `One row for ${params.title}` : "One dataset row");
201
- validateRows(params.rows, resolvedSchema);
202
- await createOrUpdateDatasetMetadata(env, {
203
- datasetId: params.datasetId,
204
- sandboxId: params.sandboxId,
205
- title: params.title,
206
- instructions: params.instructions,
207
- sources: params.sources,
208
- sourceKinds: params.sourceKinds,
209
- analysis: params.analysis,
210
- schema: resolvedSchema,
211
- status: "building",
212
- });
213
- const db = await getDatasetDb(env);
214
- const service = new service_1.DatasetService(db);
215
- const uploadResult = await service.uploadDatasetOutputFile({
216
- datasetId: params.datasetId,
217
- fileBuffer: Buffer.from(rowsToJsonl(params.rows), "utf-8"),
218
- });
219
- if (!uploadResult.ok) {
220
- throw new Error(uploadResult.error);
221
- }
222
- const statusResult = await service.updateDatasetStatus({
223
- datasetId: params.datasetId,
224
- status: "completed",
225
- calculatedTotalRows: params.rows.length,
226
- actualGeneratedRowCount: params.rows.length,
227
- });
228
- if (!statusResult.ok) {
229
- throw new Error(statusResult.error);
230
- }
231
- return params.datasetId;
232
- }
233
- async function uploadInlineTextSource(env, datasetId, source) {
234
- const db = await getDatasetDb(env);
235
- const fileName = defaultTextSourceName(source);
236
- const storagePath = `/dataset/source/${datasetId}/${Date.now()}-${fileName}`;
237
- const uploadResult = await db.storage.uploadFile(storagePath, Buffer.from(source.text, "utf-8"), {
238
- contentType: source.mimeType ?? "text/plain",
239
- contentDisposition: fileName,
240
- });
241
- const fileId = uploadResult?.data?.id;
242
- if (!fileId) {
243
- throw new Error("dataset_text_source_upload_failed");
244
- }
245
- return fileId;
246
- }
247
- async function finalizeBuildResult(env, datasetId, withFirst) {
248
- const db = await getDatasetDb(env);
249
- const service = new service_1.DatasetService(db);
250
- const datasetResult = await service.getDatasetById(datasetId);
251
- if (!datasetResult.ok) {
252
- throw new Error(datasetResult.error);
253
- }
254
- const previewResult = await service.previewRows(datasetId, 20);
255
- if (!previewResult.ok) {
256
- throw new Error(previewResult.error);
257
- }
258
- const reader = {
259
- async read(cursorOrParams, limit) {
260
- const params = typeof cursorOrParams === "object" && cursorOrParams !== null
261
- ? cursorOrParams
262
- : { cursor: cursorOrParams, limit };
263
- const rowsResult = await service.readRows({
264
- datasetId,
265
- cursor: params.cursor,
266
- limit: params.limit,
267
- });
268
- if (!rowsResult.ok) {
269
- throw new Error(rowsResult.error);
270
- }
271
- return rowsResult.data;
272
- },
273
- };
274
- if (!withFirst) {
275
- return {
276
- datasetId,
277
- dataset: datasetResult.data,
278
- previewRows: previewResult.data,
279
- reader,
280
- };
281
- }
282
- const firstResult = await service.readOne(datasetId);
283
- if (!firstResult.ok) {
284
- throw new Error(firstResult.error);
285
- }
286
- return {
287
- datasetId,
288
- dataset: datasetResult.data,
289
- previewRows: previewResult.data,
290
- reader,
291
- firstRow: firstResult.data,
292
- };
293
- }
294
- async function materializeQuerySource(env, source, params) {
295
- const runtime = await (0, runtime_1.resolveRuntime)(source.domain, env);
296
- const result = await runtime.db.query(source.query);
297
- const rows = normalizeQueryRows(result);
298
- const domainDescriptor = getDomainDescriptor(source.domain);
299
- return await materializeRowsToDataset(env, {
300
- datasetId: params.datasetId,
301
- sandboxId: params.sandboxId,
302
- title: params.title ?? source.title,
303
- instructions: params.instructions,
304
- sources: [
305
- {
306
- kind: "query",
307
- query: source.query,
308
- title: source.title,
309
- explanation: source.explanation,
310
- ...domainDescriptor,
311
- },
312
- ],
313
- sourceKinds: ["query"],
314
- analysis: {
315
- query: source.query,
316
- explanation: source.explanation,
317
- ...domainDescriptor,
318
- },
319
- rows,
320
- schema: params.schema,
321
- inferSchema: !params.schema,
322
- first: params.first,
323
- });
324
- }
325
- async function materializeSingleFileLikeSource(state, source, targetDatasetId) {
326
- if (!state.reactor) {
327
- throw new Error("dataset_reactor_required");
328
- }
329
- if (!state.sandboxId) {
330
- throw new Error("dataset_sandbox_required");
331
- }
332
- const fileId = source.kind === "file"
333
- ? source.fileId
334
- : await uploadInlineTextSource(state.env, targetDatasetId, source);
335
- await createOrUpdateDatasetMetadata(state.env, {
336
- datasetId: targetDatasetId,
337
- sandboxId: state.sandboxId,
338
- title: state.title ?? targetDatasetId,
339
- instructions: state.instructions,
340
- sources: [
341
- source.kind === "file"
342
- ? { kind: "file", fileId: source.fileId, description: source.description }
343
- : {
344
- kind: "text",
345
- mimeType: source.mimeType,
346
- name: source.name,
347
- description: source.description,
348
- },
349
- ],
350
- sourceKinds: [source.kind],
351
- schema: state.outputSchema,
352
- status: "building",
353
- });
354
- const parseStory = (0, file_dataset_agent_1.createFileParseStory)(fileId, {
355
- datasetId: targetDatasetId,
356
- instructions: state.instructions ?? buildFileDefaultInstructions(state.outputSchema),
357
- reactor: state.reactor,
358
- sandboxId: state.sandboxId,
359
- });
360
- await parseStory.parse(state.env);
361
- if (!state.outputSchema) {
362
- const db = await getDatasetDb(state.env);
363
- const service = new service_1.DatasetService(db);
364
- const readResult = await service.readRows({ datasetId: targetDatasetId, cursor: 0, limit: 1000 });
365
- if (!readResult.ok) {
366
- throw new Error(readResult.error);
367
- }
368
- const inferred = inferDatasetSchema(readResult.data.rows, `${targetDatasetId}Row`, "One dataset row");
369
- const updateResult = await service.updateDatasetSchema({
370
- datasetId: targetDatasetId,
371
- schema: inferred,
372
- status: "completed",
373
- });
374
- if (!updateResult.ok) {
375
- throw new Error(updateResult.error);
376
- }
377
- }
378
- if (state.first) {
379
- const db = await getDatasetDb(state.env);
380
- const service = new service_1.DatasetService(db);
381
- const firstResult = await service.readOne(targetDatasetId);
382
- if (!firstResult.ok) {
383
- throw new Error(firstResult.error);
384
- }
385
- }
386
- return targetDatasetId;
387
- }
388
- async function normalizeSourceToDatasetId(state, source, targetDatasetId, sourceIndex) {
389
- if (source.kind === "dataset") {
390
- return source.datasetId;
391
- }
392
- const intermediateDatasetId = makeIntermediateDatasetId(targetDatasetId, source.kind, sourceIndex);
393
- if (source.kind === "query") {
394
- await materializeQuerySource(state.env, source, {
395
- datasetId: intermediateDatasetId,
396
- sandboxId: state.sandboxId,
397
- title: source.title,
398
- first: false,
399
- });
400
- return intermediateDatasetId;
401
- }
402
- await materializeSingleFileLikeSource({
403
- ...state,
404
- outputSchema: undefined,
405
- first: false,
406
- instructions: buildRawSourceInstructions(source.kind),
407
- }, source, intermediateDatasetId);
408
- return intermediateDatasetId;
409
- }
410
- async function materializeDerivedDataset(state, targetDatasetId) {
411
- if (!state.reactor) {
412
- throw new Error("dataset_reactor_required");
413
- }
414
- if (!state.sandboxId) {
415
- throw new Error("dataset_sandbox_required");
416
- }
417
- const normalizedSources = [];
418
- for (let index = 0; index < state.sources.length; index++) {
419
- normalizedSources.push(await normalizeSourceToDatasetId(state, state.sources[index], targetDatasetId, index));
420
- }
421
- const transformSchema = state.outputSchema ??
422
- {
423
- title: "DatasetRow",
424
- description: "One dataset row",
425
- schema: {
426
- type: "object",
427
- additionalProperties: true,
428
- properties: {},
429
- },
430
- };
431
- await createOrUpdateDatasetMetadata(state.env, {
432
- datasetId: targetDatasetId,
433
- sandboxId: state.sandboxId,
434
- title: state.title ?? targetDatasetId,
435
- instructions: state.instructions,
436
- sources: state.sources.map((source) => source.kind === "query"
437
- ? {
438
- kind: "query",
439
- query: source.query,
440
- title: source.title,
441
- explanation: source.explanation,
442
- ...getDomainDescriptor(source.domain),
443
- }
444
- : source),
445
- sourceKinds: state.sources.map((source) => source.kind),
446
- schema: transformSchema,
447
- status: "building",
448
- });
449
- const transformStory = (0, transform_dataset_agent_1.createTransformDatasetStory)({
450
- sourceDatasetIds: normalizedSources,
451
- outputSchema: transformSchema,
452
- instructions: buildTransformInstructions(normalizedSources.length, state.instructions, state.outputSchema),
453
- datasetId: targetDatasetId,
454
- reactor: state.reactor,
455
- sandboxId: state.sandboxId,
456
- });
457
- await transformStory.transform(state.env);
458
- const db = await getDatasetDb(state.env);
459
- const service = new service_1.DatasetService(db);
460
- if (!state.outputSchema) {
461
- const readResult = await service.readRows({ datasetId: targetDatasetId, cursor: 0, limit: 1000 });
462
- if (!readResult.ok) {
463
- throw new Error(readResult.error);
464
- }
465
- const inferred = inferDatasetSchema(readResult.data.rows, `${targetDatasetId}Row`, "One dataset row");
466
- const updateResult = await service.updateDatasetSchema({
467
- datasetId: targetDatasetId,
468
- schema: inferred,
469
- status: "completed",
470
- });
471
- if (!updateResult.ok) {
472
- throw new Error(updateResult.error);
473
- }
474
- }
475
- if (state.first) {
476
- const firstResult = await service.readOne(targetDatasetId);
477
- if (!firstResult.ok) {
478
- throw new Error(firstResult.error);
479
- }
480
- }
481
- return targetDatasetId;
482
- }
483
- function dataset(env) {
1
+ import { buildObjectOutputInstructions } from "./builder/instructions.js";
2
+ import { resolveDatasetResourceContext } from "./builder/context.js";
3
+ import { createDatasetId } from "./id.js";
4
+ import { completeDatasetStep, materializeDerivedDataset, materializeSingleFileLikeResource, } from "./builder/materialize.js";
5
+ import { materializeQueryResource } from "./builder/materializeQuery.js";
6
+ import { createDatasetBuildResult, finalizeBuildResult, } from "./builder/persistence.js";
7
+ export function dataset(runtime, options = {}) {
8
+ const datasetId = normalizeDatasetId(options.datasetId);
9
+ const typedRuntime = runtime;
484
10
  const state = {
485
- env,
486
- sources: [],
11
+ runtime: typedRuntime,
12
+ env: typedRuntime.env,
13
+ resources: [],
14
+ output: "rows",
487
15
  inferSchema: false,
16
+ durable: options.durable,
488
17
  first: false,
489
18
  };
490
19
  const api = {
491
- fromFile(source) {
492
- state.sources.push({ kind: "file", ...source });
20
+ datasetId,
21
+ fromFile(resource) {
22
+ state.resources.push({ kind: "file", ...resource });
493
23
  return api;
494
24
  },
495
- fromText(source) {
496
- state.sources.push({ kind: "text", ...source });
25
+ fromText(resource) {
26
+ state.resources.push({ kind: "text", ...resource });
497
27
  return api;
498
28
  },
499
- fromDataset(source) {
500
- state.sources.push({ kind: "dataset", ...source });
29
+ fromDataset(resource) {
30
+ state.resources.push({ kind: "dataset", ...resource });
501
31
  return api;
502
32
  },
503
- fromQuery(domain, source) {
504
- state.sources.push({ kind: "query", domain, ...source });
33
+ fromContext(context) {
34
+ state.resources.push({ kind: "context", ...context });
35
+ return api;
36
+ },
37
+ from(...resources) {
38
+ for (const resource of resources) {
39
+ if ("kind" in resource) {
40
+ state.resources.push(resource);
41
+ continue;
42
+ }
43
+ if ("fileId" in resource) {
44
+ state.resources.push({ kind: "file", ...resource });
45
+ continue;
46
+ }
47
+ if ("datasetId" in resource) {
48
+ state.resources.push({ kind: "dataset", ...resource });
49
+ continue;
50
+ }
51
+ if ("id" in resource || "key" in resource) {
52
+ state.resources.push({ kind: "context", ...resource });
53
+ continue;
54
+ }
55
+ state.resources.push({ kind: "text", ...resource });
56
+ }
57
+ return api;
58
+ },
59
+ fromQuery(domain, resource) {
60
+ state.resources.push({ kind: "query", domain, ...resource });
505
61
  return api;
506
62
  },
507
63
  title(title) {
@@ -522,6 +78,20 @@ function dataset(env) {
522
78
  state.inferSchema = true;
523
79
  return api;
524
80
  },
81
+ auto() {
82
+ state.outputSchema = undefined;
83
+ state.inferSchema = true;
84
+ return api;
85
+ },
86
+ asRows() {
87
+ state.output = "rows";
88
+ return api;
89
+ },
90
+ asObject() {
91
+ state.output = "object";
92
+ state.first = true;
93
+ return api;
94
+ },
525
95
  instructions(instructions) {
526
96
  state.instructions = instructions;
527
97
  return api;
@@ -535,44 +105,82 @@ function dataset(env) {
535
105
  return api;
536
106
  },
537
107
  async build(options) {
538
- if (state.sources.length === 0) {
539
- throw new Error("dataset_sources_required");
108
+ if (state.resources.length === 0) {
109
+ throw new Error("dataset_resources_required");
540
110
  }
541
- const targetDatasetId = String(options?.datasetId ?? (0, admin_1.id)());
542
- const onlySource = state.sources[0];
543
- const isSingleSource = state.sources.length === 1;
544
- const hasInstructions = Boolean(String(state.instructions ?? "").trim());
545
- if (isSingleSource && onlySource.kind === "query" && !hasInstructions) {
546
- await materializeQuerySource(state.env, onlySource, {
111
+ const targetDatasetId = options?.datasetId
112
+ ? normalizeDatasetId(options.datasetId)
113
+ : datasetId;
114
+ const stateWithBuildOptions = {
115
+ ...state,
116
+ durable: options?.durable ?? state.durable,
117
+ };
118
+ const context = await resolveDatasetResourceContext(typedRuntime, targetDatasetId, stateWithBuildOptions.resources);
119
+ stateWithBuildOptions.resources = context.resources;
120
+ stateWithBuildOptions.contextId = context.contextId;
121
+ stateWithBuildOptions.contextResources = context.contextResources;
122
+ const effectiveState = stateWithBuildOptions.output === "object"
123
+ ? {
124
+ ...stateWithBuildOptions,
125
+ first: true,
126
+ instructions: buildObjectOutputInstructions(stateWithBuildOptions.instructions),
127
+ }
128
+ : stateWithBuildOptions;
129
+ const onlyResource = effectiveState.resources[0];
130
+ const isSingleResource = effectiveState.resources.length === 1;
131
+ const hasInstructions = Boolean(String(effectiveState.instructions ?? "").trim());
132
+ if (isSingleResource && onlyResource.kind === "query" && !hasInstructions) {
133
+ await materializeQueryResource(effectiveState.runtime, onlyResource, {
547
134
  datasetId: targetDatasetId,
548
- sandboxId: state.sandboxId,
549
- schema: state.outputSchema,
550
- title: state.title ?? onlySource.title,
551
- instructions: state.instructions,
552
- first: state.first,
135
+ sandboxId: effectiveState.sandboxId,
136
+ schema: effectiveState.outputSchema,
137
+ title: effectiveState.title ?? onlyResource.title,
138
+ instructions: effectiveState.instructions,
139
+ first: effectiveState.first,
140
+ contextId: effectiveState.contextId ?? "",
553
141
  });
554
- return await finalizeBuildResult(state.env, targetDatasetId, state.first);
142
+ return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
555
143
  }
556
- if (isSingleSource && (onlySource.kind === "file" || onlySource.kind === "text")) {
557
- if (!state.sandboxId) {
558
- throw new Error("dataset_sandbox_required");
559
- }
560
- if (!state.reactor) {
144
+ if (isSingleResource && (onlyResource.kind === "file" || onlyResource.kind === "text")) {
145
+ if (!effectiveState.reactor) {
561
146
  throw new Error("dataset_reactor_required");
562
147
  }
563
- await materializeSingleFileLikeSource(state, onlySource, targetDatasetId);
564
- return await finalizeBuildResult(state.env, targetDatasetId, state.first);
565
- }
566
- if (!state.sandboxId) {
567
- throw new Error("dataset_sandbox_required");
148
+ await materializeSingleFileLikeResource(effectiveState, onlyResource, targetDatasetId);
149
+ const completed = await completeDatasetStep({
150
+ runtime: effectiveState.runtime,
151
+ datasetId: targetDatasetId,
152
+ schema: effectiveState.outputSchema,
153
+ first: effectiveState.first,
154
+ });
155
+ return finalizeOutputResult(createDatasetBuildResult(effectiveState.runtime, completed), effectiveState.output);
568
156
  }
569
- if (!state.reactor) {
157
+ if (!effectiveState.reactor) {
570
158
  throw new Error("dataset_reactor_required");
571
159
  }
572
- await materializeDerivedDataset(state, targetDatasetId);
573
- return await finalizeBuildResult(state.env, targetDatasetId, state.first);
160
+ await materializeDerivedDataset(effectiveState, targetDatasetId);
161
+ const completed = await completeDatasetStep({
162
+ runtime: effectiveState.runtime,
163
+ datasetId: targetDatasetId,
164
+ schema: effectiveState.outputSchema,
165
+ first: effectiveState.first,
166
+ });
167
+ return finalizeOutputResult(createDatasetBuildResult(effectiveState.runtime, completed), effectiveState.output);
574
168
  },
575
169
  };
576
170
  return api;
577
171
  }
578
- //# sourceMappingURL=dataset.js.map
172
+ function normalizeDatasetId(datasetId) {
173
+ const normalized = String(datasetId ?? createDatasetId()).trim();
174
+ if (!normalized) {
175
+ throw new Error("dataset_id_required");
176
+ }
177
+ return normalized;
178
+ }
179
+ function finalizeOutputResult(result, output) {
180
+ if (output !== "object")
181
+ return result;
182
+ return {
183
+ ...result,
184
+ object: result.firstRow ?? null,
185
+ };
186
+ }