@ekairos/dataset 1.22.40-beta.development.0 → 1.22.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/dist/agents.d.ts +8 -0
  2. package/dist/agents.js +8 -0
  3. package/dist/builder/agentMaterializers.d.ts +9 -0
  4. package/dist/builder/agentMaterializers.js +10 -0
  5. package/dist/builder/context.d.ts +15 -0
  6. package/dist/builder/context.js +251 -0
  7. package/dist/builder/instructions.d.ts +4 -5
  8. package/dist/builder/instructions.js +15 -21
  9. package/dist/builder/materialize.d.ts +77 -10
  10. package/dist/builder/materialize.js +495 -152
  11. package/dist/builder/materializeQuery.d.ts +12 -0
  12. package/dist/builder/materializeQuery.js +31 -0
  13. package/dist/builder/persistence.d.ts +10 -6
  14. package/dist/builder/persistence.js +107 -62
  15. package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -1
  16. package/dist/builder/{sourceRows.js → rows.js} +3 -9
  17. package/dist/builder/schemaInference.d.ts +1 -2
  18. package/dist/builder/schemaInference.js +4 -12
  19. package/dist/builder/types.d.ts +41 -26
  20. package/dist/builder/types.js +1 -3
  21. package/dist/clearDataset.tool.d.ts +2 -3
  22. package/dist/clearDataset.tool.js +13 -17
  23. package/dist/completeDataset.steps.d.ts +117 -0
  24. package/dist/completeDataset.steps.js +537 -0
  25. package/dist/completeDataset.tool.d.ts +132 -7
  26. package/dist/completeDataset.tool.js +46 -192
  27. package/dist/contextResources.d.ts +31 -0
  28. package/dist/contextResources.js +151 -0
  29. package/dist/contextWorkspace.d.ts +79 -0
  30. package/dist/contextWorkspace.js +234 -0
  31. package/dist/dataset/steps.d.ts +39 -15
  32. package/dist/dataset/steps.js +96 -39
  33. package/dist/dataset.d.ts +2 -3
  34. package/dist/dataset.js +73 -51
  35. package/dist/datasetFiles.d.ts +5 -1
  36. package/dist/datasetFiles.js +29 -27
  37. package/dist/defineNotation.tool.d.ts +49 -0
  38. package/dist/defineNotation.tool.js +154 -0
  39. package/dist/domain.d.ts +1 -2
  40. package/dist/domain.js +1 -6
  41. package/dist/executeCommand.tool.d.ts +2 -30
  42. package/dist/executeCommand.tool.js +165 -39
  43. package/dist/file/file-dataset.agent.d.ts +19 -56
  44. package/dist/file/file-dataset.agent.js +182 -136
  45. package/dist/file/file-dataset.steps.d.ts +27 -0
  46. package/dist/file/file-dataset.steps.js +47 -0
  47. package/dist/file/file-dataset.types.d.ts +64 -0
  48. package/dist/file/file-dataset.types.js +1 -0
  49. package/dist/file/filepreview.d.ts +5 -35
  50. package/dist/file/filepreview.js +60 -107
  51. package/dist/file/filepreview.types.d.ts +31 -0
  52. package/dist/file/filepreview.types.js +1 -0
  53. package/dist/file/generateSchema.tool.d.ts +2 -3
  54. package/dist/file/generateSchema.tool.js +11 -15
  55. package/dist/file/index.d.ts +1 -2
  56. package/dist/file/index.js +1 -18
  57. package/dist/file/prompts.d.ts +2 -3
  58. package/dist/file/prompts.js +152 -32
  59. package/dist/file/scripts.generated.d.ts +1 -0
  60. package/dist/file/scripts.generated.js +11 -0
  61. package/dist/file/steps.d.ts +1 -2
  62. package/dist/file/steps.js +9 -7
  63. package/dist/id.d.ts +1 -0
  64. package/dist/id.js +10 -0
  65. package/dist/index.d.ts +9 -7
  66. package/dist/index.js +9 -23
  67. package/dist/materializeDataset.tool.d.ts +35 -28
  68. package/dist/materializeDataset.tool.js +74 -68
  69. package/dist/notation.d.ts +205 -0
  70. package/dist/notation.js +424 -0
  71. package/dist/query/index.d.ts +1 -2
  72. package/dist/query/index.js +1 -18
  73. package/dist/query/queryDomain.d.ts +3 -4
  74. package/dist/query/queryDomain.js +3 -40
  75. package/dist/query/queryDomain.step.d.ts +1 -1
  76. package/dist/query/queryDomain.step.js +24 -13
  77. package/dist/sandbox/steps.d.ts +23 -15
  78. package/dist/sandbox/steps.js +73 -76
  79. package/dist/sandbox.steps.d.ts +1 -2
  80. package/dist/sandbox.steps.js +1 -18
  81. package/dist/schema.d.ts +14 -3
  82. package/dist/schema.js +27 -26
  83. package/dist/service.d.ts +12 -5
  84. package/dist/service.js +88 -15
  85. package/dist/skill.d.ts +0 -1
  86. package/dist/skill.js +12 -17
  87. package/dist/transform/filepreview.d.ts +2 -3
  88. package/dist/transform/filepreview.js +9 -26
  89. package/dist/transform/index.d.ts +2 -3
  90. package/dist/transform/index.js +2 -8
  91. package/dist/transform/prompts.d.ts +1 -34
  92. package/dist/transform/prompts.js +66 -46
  93. package/dist/transform/transform-dataset.agent.d.ts +21 -46
  94. package/dist/transform/transform-dataset.agent.js +152 -93
  95. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  96. package/dist/transform/transform-dataset.steps.js +61 -0
  97. package/dist/transform/transform-dataset.types.d.ts +96 -0
  98. package/dist/transform/transform-dataset.types.js +1 -0
  99. package/dist/transform/transformDataset.d.ts +3 -3
  100. package/dist/transform/transformDataset.js +15 -18
  101. package/dist/writeDatasetRows.tool.d.ts +188 -0
  102. package/dist/writeDatasetRows.tool.js +258 -0
  103. package/package.json +33 -8
  104. package/dist/builder/instructions.d.ts.map +0 -1
  105. package/dist/builder/instructions.js.map +0 -1
  106. package/dist/builder/materialize.d.ts.map +0 -1
  107. package/dist/builder/materialize.js.map +0 -1
  108. package/dist/builder/persistence.d.ts.map +0 -1
  109. package/dist/builder/persistence.js.map +0 -1
  110. package/dist/builder/schemaInference.d.ts.map +0 -1
  111. package/dist/builder/schemaInference.js.map +0 -1
  112. package/dist/builder/sourceRows.d.ts.map +0 -1
  113. package/dist/builder/sourceRows.js.map +0 -1
  114. package/dist/builder/types.d.ts.map +0 -1
  115. package/dist/builder/types.js.map +0 -1
  116. package/dist/clearDataset.tool.d.ts.map +0 -1
  117. package/dist/clearDataset.tool.js.map +0 -1
  118. package/dist/completeDataset.tool.d.ts.map +0 -1
  119. package/dist/completeDataset.tool.js.map +0 -1
  120. package/dist/dataset/steps.d.ts.map +0 -1
  121. package/dist/dataset/steps.js.map +0 -1
  122. package/dist/dataset.d.ts.map +0 -1
  123. package/dist/dataset.js.map +0 -1
  124. package/dist/datasetFiles.d.ts.map +0 -1
  125. package/dist/datasetFiles.js.map +0 -1
  126. package/dist/domain.d.ts.map +0 -1
  127. package/dist/domain.js.map +0 -1
  128. package/dist/eventsReactRuntime.d.ts +0 -22
  129. package/dist/eventsReactRuntime.d.ts.map +0 -1
  130. package/dist/eventsReactRuntime.js +0 -29
  131. package/dist/eventsReactRuntime.js.map +0 -1
  132. package/dist/executeCommand.tool.d.ts.map +0 -1
  133. package/dist/executeCommand.tool.js.map +0 -1
  134. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  135. package/dist/file/file-dataset.agent.js.map +0 -1
  136. package/dist/file/filepreview.d.ts.map +0 -1
  137. package/dist/file/filepreview.js.map +0 -1
  138. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  139. package/dist/file/generateSchema.tool.js.map +0 -1
  140. package/dist/file/index.d.ts.map +0 -1
  141. package/dist/file/index.js.map +0 -1
  142. package/dist/file/prompts.d.ts.map +0 -1
  143. package/dist/file/prompts.js.map +0 -1
  144. package/dist/file/steps.d.ts.map +0 -1
  145. package/dist/file/steps.js.map +0 -1
  146. package/dist/index.d.ts.map +0 -1
  147. package/dist/index.js.map +0 -1
  148. package/dist/materializeDataset.tool.d.ts.map +0 -1
  149. package/dist/materializeDataset.tool.js.map +0 -1
  150. package/dist/query/index.d.ts.map +0 -1
  151. package/dist/query/index.js.map +0 -1
  152. package/dist/query/queryDomain.d.ts.map +0 -1
  153. package/dist/query/queryDomain.js.map +0 -1
  154. package/dist/query/queryDomain.step.d.ts.map +0 -1
  155. package/dist/query/queryDomain.step.js.map +0 -1
  156. package/dist/sandbox/steps.d.ts.map +0 -1
  157. package/dist/sandbox/steps.js.map +0 -1
  158. package/dist/sandbox.steps.d.ts.map +0 -1
  159. package/dist/sandbox.steps.js.map +0 -1
  160. package/dist/schema.d.ts.map +0 -1
  161. package/dist/schema.js.map +0 -1
  162. package/dist/service.d.ts.map +0 -1
  163. package/dist/service.js.map +0 -1
  164. package/dist/skill.d.ts.map +0 -1
  165. package/dist/skill.js.map +0 -1
  166. package/dist/transform/filepreview.d.ts.map +0 -1
  167. package/dist/transform/filepreview.js.map +0 -1
  168. package/dist/transform/index.d.ts.map +0 -1
  169. package/dist/transform/index.js.map +0 -1
  170. package/dist/transform/prompts.d.ts.map +0 -1
  171. package/dist/transform/prompts.js.map +0 -1
  172. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  173. package/dist/transform/transform-dataset.agent.js.map +0 -1
  174. package/dist/transform/transformDataset.d.ts.map +0 -1
  175. package/dist/transform/transformDataset.js.map +0 -1
@@ -0,0 +1,8 @@
1
+ import "./builder/materialize.js";
2
+ export * from "./dataset.js";
3
+ export * from "./domain.js";
4
+ export * from "./materializeDataset.tool.js";
5
+ export * from "./schema.js";
6
+ export * from "./service.js";
7
+ export * from "./file/file-dataset.agent.js";
8
+ export * from "./transform/index.js";
package/dist/agents.js ADDED
@@ -0,0 +1,8 @@
1
+ import "./builder/materialize.js";
2
+ export * from "./dataset.js";
3
+ export * from "./domain.js";
4
+ export * from "./materializeDataset.tool.js";
5
+ export * from "./schema.js";
6
+ export * from "./service.js";
7
+ export * from "./file/file-dataset.agent.js";
8
+ export * from "./transform/index.js";
@@ -0,0 +1,9 @@
1
+ import type { AnyDatasetRuntime, DatasetBuilderState, InternalDatasetResource } from "./types.js";
2
+ export type DatasetAgentMaterializers = {
3
+ materializeSingleFileLikeResource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, resource: Extract<InternalDatasetResource, {
4
+ kind: "file" | "text";
5
+ }>, targetDatasetId: string): Promise<string>;
6
+ materializeDerivedDataset<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, targetDatasetId: string): Promise<string>;
7
+ };
8
+ export declare function registerDatasetAgentMaterializers(materializers: DatasetAgentMaterializers): void;
9
+ export declare function getDatasetAgentMaterializers(): DatasetAgentMaterializers;
@@ -0,0 +1,10 @@
1
+ let agentMaterializers = null;
2
+ export function registerDatasetAgentMaterializers(materializers) {
3
+ agentMaterializers = materializers;
4
+ }
5
+ export function getDatasetAgentMaterializers() {
6
+ if (!agentMaterializers) {
7
+ throw new Error("dataset_agent_materializers_not_registered");
8
+ }
9
+ return agentMaterializers;
10
+ }
@@ -0,0 +1,15 @@
1
+ import type { AnyDatasetRuntime, InternalDatasetResource } from "./types.js";
2
+ type DatasetContextResolution = {
3
+ contextId: string;
4
+ resources: InternalDatasetResource[];
5
+ contextResources: DatasetContextResourceRecord[];
6
+ };
7
+ type DatasetContextResourceRecord = {
8
+ key: string;
9
+ type: string;
10
+ name: string;
11
+ description: string;
12
+ [key: string]: unknown;
13
+ };
14
+ export declare function resolveDatasetResourceContext<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, resources: InternalDatasetResource[]): Promise<DatasetContextResolution>;
15
+ export {};
@@ -0,0 +1,251 @@
1
+ import { eventsDomain } from "@ekairos/events";
2
+ import { createDatasetId } from "../id.js";
3
+ import { datasetDomain } from "../schema.js";
4
+ import { DatasetService } from "../service.js";
5
+ import { getDomainDescriptor } from "./rows.js";
6
+ function getContextWhere(context) {
7
+ return "id" in context ? { id: context.id } : { key: context.key };
8
+ }
9
+ async function getEventsDb(runtime) {
10
+ const scoped = await runtime.use(eventsDomain);
11
+ return scoped.db;
12
+ }
13
+ async function getDatasetDb(runtime) {
14
+ const scoped = await runtime.use(datasetDomain);
15
+ return scoped.db;
16
+ }
17
+ function resourceKey(index, resource) {
18
+ if (resource.kind === "file")
19
+ return `file:${index}:${resource.fileId}`;
20
+ if (resource.kind === "text")
21
+ return `text:${index}:${resource.name ?? "inline"}`;
22
+ if (resource.kind === "dataset")
23
+ return `dataset:${index}:${resource.datasetId}`;
24
+ if (resource.kind === "query")
25
+ return `query:${index}:${resource.title ?? "query"}`;
26
+ return `resource:${index}`;
27
+ }
28
+ function resourceName(index, resource) {
29
+ if (resource.kind === "file")
30
+ return resource.filename ?? `File ${index + 1}`;
31
+ if (resource.kind === "text")
32
+ return resource.name ?? `Text ${index + 1}`;
33
+ if (resource.kind === "dataset")
34
+ return resource.datasetId;
35
+ if (resource.kind === "query")
36
+ return resource.title ?? `Query ${index + 1}`;
37
+ return `Resource ${index + 1}`;
38
+ }
39
+ function resourceDescription(resource) {
40
+ if ("description" in resource && typeof resource.description === "string" && resource.description.trim()) {
41
+ return resource.description.trim();
42
+ }
43
+ if (resource.kind === "query" && typeof resource.explanation === "string" && resource.explanation.trim()) {
44
+ return resource.explanation.trim();
45
+ }
46
+ return `Dataset ${resource.kind} resource.`;
47
+ }
48
+ function resourceToContextResource(index, resource) {
49
+ const base = {
50
+ key: resourceKey(index, resource),
51
+ type: resource.kind,
52
+ name: resourceName(index, resource),
53
+ description: resourceDescription(resource),
54
+ };
55
+ if (resource.kind === "file") {
56
+ return {
57
+ ...base,
58
+ fileId: resource.fileId,
59
+ filename: resource.filename,
60
+ mediaType: resource.mediaType,
61
+ };
62
+ }
63
+ if (resource.kind === "text") {
64
+ return {
65
+ ...base,
66
+ text: resource.text,
67
+ mimeType: resource.mimeType,
68
+ };
69
+ }
70
+ if (resource.kind === "dataset") {
71
+ return {
72
+ ...base,
73
+ datasetId: resource.datasetId,
74
+ };
75
+ }
76
+ if (resource.kind === "query") {
77
+ return {
78
+ ...base,
79
+ query: resource.query,
80
+ title: resource.title,
81
+ explanation: resource.explanation,
82
+ ...getDomainDescriptor(resource.domain),
83
+ };
84
+ }
85
+ return base;
86
+ }
87
+ async function createDatasetResourceContextStep(params) {
88
+ "use step";
89
+ const db = await getEventsDb(params.runtime);
90
+ const contextKey = `dataset:${params.datasetId}`;
91
+ const existing = await db.query({
92
+ event_contexts: {
93
+ $: { where: { key: contextKey }, limit: 1 },
94
+ },
95
+ });
96
+ const contextId = existing.event_contexts?.[0]?.id ?? createDatasetId();
97
+ const now = new Date();
98
+ const resources = await enrichDatasetContextResources(params.runtime, params.resources);
99
+ await db.transact([
100
+ db.tx.event_contexts[contextId].update({
101
+ key: contextKey,
102
+ createdAt: now,
103
+ updatedAt: now,
104
+ name: `Dataset ${params.datasetId}`,
105
+ status: "open_idle",
106
+ content: {
107
+ datasetId: params.datasetId,
108
+ resourceCount: resources.length,
109
+ },
110
+ resources,
111
+ description: `Dataset execution context for ${params.datasetId}.`,
112
+ goal: "Produce the dataset output from the resources declared in this context.",
113
+ }),
114
+ ]);
115
+ return {
116
+ contextId,
117
+ };
118
+ }
119
+ async function enrichDatasetContextResources(runtime, resources) {
120
+ const datasetResources = resources.filter((resource) => resource.type === "dataset" && typeof resource.datasetId === "string");
121
+ if (datasetResources.length === 0)
122
+ return resources;
123
+ const db = await getDatasetDb(runtime);
124
+ const service = new DatasetService(db);
125
+ const enriched = [];
126
+ for (const resource of resources) {
127
+ if (resource.type !== "dataset" || typeof resource.datasetId !== "string") {
128
+ enriched.push(resource);
129
+ continue;
130
+ }
131
+ const preview = await service.previewRows(resource.datasetId, 20);
132
+ if (!preview.ok) {
133
+ enriched.push({
134
+ ...resource,
135
+ previewError: preview.error,
136
+ });
137
+ continue;
138
+ }
139
+ enriched.push({
140
+ ...resource,
141
+ previewRows: preview.data,
142
+ previewLimit: 20,
143
+ });
144
+ }
145
+ return enriched;
146
+ }
147
+ function contextResourceToDatasetResource(resource) {
148
+ if (resource.type === "file" && typeof resource.fileId === "string" && resource.fileId.trim()) {
149
+ return {
150
+ kind: "file",
151
+ fileId: resource.fileId.trim(),
152
+ description: resource.description,
153
+ filename: typeof resource.filename === "string" ? resource.filename : undefined,
154
+ mediaType: typeof resource.mediaType === "string" ? resource.mediaType : undefined,
155
+ };
156
+ }
157
+ if (resource.type === "dataset" &&
158
+ typeof resource.datasetId === "string" &&
159
+ resource.datasetId.trim()) {
160
+ return {
161
+ kind: "dataset",
162
+ datasetId: resource.datasetId.trim(),
163
+ description: resource.description,
164
+ };
165
+ }
166
+ if (resource.type === "text" && typeof resource.text === "string") {
167
+ return {
168
+ kind: "text",
169
+ text: String(resource.text),
170
+ mimeType: typeof resource.mimeType === "string"
171
+ ? String(resource.mimeType)
172
+ : "text/plain",
173
+ name: resource.name,
174
+ description: resource.description,
175
+ };
176
+ }
177
+ if (resource.type === "query") {
178
+ throw new Error("dataset_context_query_resource_requires_builder_shortcut");
179
+ }
180
+ return {
181
+ kind: "text",
182
+ text: JSON.stringify({ resource }, null, 2),
183
+ mimeType: "application/vnd.ekairos.context-resource+json",
184
+ name: `${resource.key}.context-resource.json`,
185
+ description: resource.description,
186
+ };
187
+ }
188
+ async function readExistingContext(params) {
189
+ "use step";
190
+ const db = await getEventsDb(params.runtime);
191
+ const res = await db.query({
192
+ event_contexts: {
193
+ $: {
194
+ where: getContextWhere(params.context),
195
+ limit: 1,
196
+ },
197
+ },
198
+ });
199
+ const row = res?.event_contexts?.[0];
200
+ if (!row?.id)
201
+ throw new Error("dataset_context_not_found");
202
+ const resources = Array.isArray(row.resources)
203
+ ? row.resources
204
+ : [];
205
+ if (resources.length === 0) {
206
+ throw new Error("dataset_context_resources_required");
207
+ }
208
+ const sourceContextId = String(row.id);
209
+ const copiedResources = resources.map((resource) => ({
210
+ ...resource,
211
+ sourceContextId: resource.sourceContextId ?? sourceContextId,
212
+ sourceResourceKey: resource.sourceResourceKey ?? resource.key,
213
+ }));
214
+ return {
215
+ contextId: sourceContextId,
216
+ resources: resources.map((resource) => contextResourceToDatasetResource(resource)),
217
+ contextResources: copiedResources,
218
+ };
219
+ }
220
+ export async function resolveDatasetResourceContext(runtime, datasetId, resources) {
221
+ const contextRefs = resources.filter((resource) => resource.kind === "context");
222
+ if (contextRefs.length > 1) {
223
+ throw new Error("dataset_context_resource_must_be_unique");
224
+ }
225
+ if (contextRefs.length === 1) {
226
+ if (resources.length > 1) {
227
+ throw new Error("dataset_context_resource_is_exclusive");
228
+ }
229
+ const source = await readExistingContext({ runtime, context: contextRefs[0] });
230
+ const created = await createDatasetResourceContextStep({
231
+ runtime,
232
+ datasetId,
233
+ resources: source.contextResources,
234
+ });
235
+ return {
236
+ ...source,
237
+ contextId: created.contextId,
238
+ };
239
+ }
240
+ const contextResourceRecords = resources.map((resource, index) => resourceToContextResource(index, resource));
241
+ const created = await createDatasetResourceContextStep({
242
+ runtime,
243
+ datasetId,
244
+ resources: contextResourceRecords,
245
+ });
246
+ return {
247
+ contextId: created.contextId,
248
+ resources,
249
+ contextResources: contextResourceRecords,
250
+ };
251
+ }
@@ -1,6 +1,5 @@
1
- import type { DatasetSchemaInput } from "./types";
2
- export declare function buildFileDefaultInstructions(schema?: DatasetSchemaInput): "Create a dataset from the source file and ensure each output row matches the provided dataset schema exactly." | "Create a dataset representing the source content as structured rows.";
3
- export declare function buildRawSourceInstructions(sourceKind: "file" | "text"): "Create a dataset representing the raw text content as structured rows without applying business transformations." | "Create a dataset representing the raw file content as structured rows without applying business transformations.";
4
- export declare function buildTransformInstructions(sourceCount: number, userInstructions?: string, schema?: DatasetSchemaInput): string;
1
+ import type { DatasetSchemaInput } from "./types.js";
2
+ export declare function buildFileDefaultInstructions(schema?: DatasetSchemaInput): "Create a dataset from the resource file and ensure each output row matches the provided dataset schema exactly." | "Create a dataset representing the resource content as structured rows.";
3
+ export declare function buildRawResourceInstructions(resourceKind: "file" | "text"): "Create a dataset representing the raw text content as structured rows without applying business transformations." | "Create a dataset representing the raw file content as structured rows without applying business transformations.";
4
+ export declare function buildTransformInstructions(resourceCount: number, userInstructions?: string, schema?: DatasetSchemaInput): string;
5
5
  export declare function buildObjectOutputInstructions(userInstructions?: string): string;
6
- //# sourceMappingURL=instructions.d.ts.map
@@ -1,46 +1,40 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.buildFileDefaultInstructions = buildFileDefaultInstructions;
4
- exports.buildRawSourceInstructions = buildRawSourceInstructions;
5
- exports.buildTransformInstructions = buildTransformInstructions;
6
- exports.buildObjectOutputInstructions = buildObjectOutputInstructions;
7
- function buildFileDefaultInstructions(schema) {
1
+ export function buildFileDefaultInstructions(schema) {
8
2
  if (schema) {
9
- return "Create a dataset from the source file and ensure each output row matches the provided dataset schema exactly.";
3
+ return "Create a dataset from the resource file and ensure each output row matches the provided dataset schema exactly.";
10
4
  }
11
- return "Create a dataset representing the source content as structured rows.";
5
+ return "Create a dataset representing the resource content as structured rows.";
12
6
  }
13
- function buildRawSourceInstructions(sourceKind) {
14
- if (sourceKind === "text") {
7
+ export function buildRawResourceInstructions(resourceKind) {
8
+ if (resourceKind === "text") {
15
9
  return "Create a dataset representing the raw text content as structured rows without applying business transformations.";
16
10
  }
17
11
  return "Create a dataset representing the raw file content as structured rows without applying business transformations.";
18
12
  }
19
- function buildTransformInstructions(sourceCount, userInstructions, schema) {
13
+ export function buildTransformInstructions(resourceCount, userInstructions, schema) {
20
14
  const explicit = String(userInstructions ?? "").trim();
21
15
  if (explicit)
22
16
  return explicit;
23
- if (sourceCount > 1) {
17
+ if (resourceCount > 1) {
24
18
  if (schema) {
25
- return "Combine the source datasets into a new dataset that matches the provided output schema exactly.";
19
+ return "Combine the input datasets into a new dataset that matches the provided output schema exactly.";
26
20
  }
27
- return "Combine the source datasets into one coherent dataset.";
21
+ return "Combine the input datasets into one coherent dataset.";
28
22
  }
29
23
  if (schema) {
30
- return "Transform the source dataset into a new dataset that matches the provided output schema exactly.";
24
+ return "Transform the input dataset into a new dataset that matches the provided output schema exactly.";
31
25
  }
32
- return "Transform the source dataset into a new useful dataset.";
26
+ return "Transform the input dataset into a new useful dataset.";
33
27
  }
34
- function buildObjectOutputInstructions(userInstructions) {
28
+ export function buildObjectOutputInstructions(userInstructions) {
35
29
  const base = String(userInstructions ?? "").trim();
36
30
  const objectContract = [
37
31
  "Output mode is object.",
38
- "Produce exactly one JSONL row in output.jsonl.",
39
- "That row must be {\"type\":\"row\",\"data\":<the final object>}.",
32
+ "Produce exactly one final object.",
33
+ "completeObject({ data: <the final object>, summary }) is available to complete the dataset directly.",
34
+ "If you use output.jsonl instead, produce exactly one row: {\"type\":\"row\",\"data\":<the final object>}.",
40
35
  "Do not emit multiple rows, headers, summaries, or metadata rows.",
41
36
  ].join("\n");
42
37
  if (!base)
43
38
  return objectContract;
44
39
  return [base, "", objectContract].join("\n");
45
40
  }
46
- //# sourceMappingURL=instructions.js.map
@@ -1,16 +1,83 @@
1
- import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalSource } from "./types";
2
- export declare function materializeQuerySource<Runtime extends AnyDatasetRuntime>(runtime: DatasetBuilderState<Runtime>["runtime"], source: Extract<InternalSource, {
3
- kind: "query";
4
- }>, params: {
1
+ import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalDatasetResource } from "./types.js";
2
+ import type { SandboxState } from "../file/file-dataset.types.js";
3
+ import type { FilePreviewContext } from "../file/filepreview.types.js";
4
+ import type { TransformSandboxState, TransformInputPreviewContext } from "../transform/transform-dataset.types.js";
5
+ export declare function resolveDatasetAgentDurable(requestedDurable?: boolean): Promise<boolean>;
6
+ type PreparedFileDatasetContext = {
7
+ kind: "file";
5
8
  datasetId: string;
6
- sandboxId?: string;
7
- schema?: DatasetSchemaInput;
9
+ sandboxId: string;
10
+ fileId: string;
11
+ sandboxState: SandboxState;
12
+ filePreview?: FilePreviewContext;
13
+ schema?: DatasetSchemaInput | null;
14
+ filename?: string;
15
+ mediaType?: string;
16
+ };
17
+ type PreparedTransformDatasetContext = {
18
+ kind: "transform";
19
+ datasetId: string;
20
+ sandboxId: string;
21
+ inputDatasetIds: string[];
22
+ outputSchema: DatasetSchemaInput;
23
+ sandboxState: TransformSandboxState;
24
+ inputPreviews?: Array<{
25
+ datasetId: string;
26
+ preview: TransformInputPreviewContext;
27
+ }>;
28
+ };
29
+ type PreparedDatasetContext = PreparedFileDatasetContext | PreparedTransformDatasetContext;
30
+ type DatasetContextInitialization = PreparedDatasetContext & {
31
+ prompt: string;
32
+ instructions?: string;
33
+ };
34
+ export declare function initializeDatasetStep<Runtime extends AnyDatasetRuntime>(params: {
35
+ runtime: Runtime;
36
+ datasetId: string;
37
+ sandboxId: string;
8
38
  title?: string;
9
39
  instructions?: string;
10
- first?: boolean;
11
- }): Promise<string>;
12
- export declare function materializeSingleFileLikeSource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, source: Extract<InternalSource, {
40
+ contextId: string;
41
+ schema?: DatasetSchemaInput;
42
+ }): Promise<{
43
+ datasetId: string;
44
+ sandboxId: string;
45
+ }>;
46
+ export declare function prepareDatasetResourcesStep<Runtime extends AnyDatasetRuntime>(params: {
47
+ kind: "file";
48
+ runtime: Runtime;
49
+ datasetId: string;
50
+ sandboxId: string;
51
+ resource: Extract<InternalDatasetResource, {
52
+ kind: "file" | "text";
53
+ }>;
54
+ schema?: DatasetSchemaInput;
55
+ } | {
56
+ kind: "transform";
57
+ runtime: Runtime;
58
+ datasetId: string;
59
+ sandboxId: string;
60
+ inputDatasetIds: string[];
61
+ outputSchema: DatasetSchemaInput;
62
+ }): Promise<PreparedDatasetContext>;
63
+ export declare function initializeDatasetContextStep(params: {
64
+ prepared: PreparedDatasetContext;
65
+ instructions?: string;
66
+ outputSchema?: DatasetSchemaInput;
67
+ }): Promise<DatasetContextInitialization>;
68
+ export declare function completeDatasetStep<Runtime extends AnyDatasetRuntime>(params: {
69
+ runtime: Runtime;
70
+ datasetId: string;
71
+ schema?: DatasetSchemaInput;
72
+ first: boolean;
73
+ }): Promise<{
74
+ datasetId: string;
75
+ dataset: any;
76
+ previewRows: any[];
77
+ firstRow: any;
78
+ }>;
79
+ export declare function materializeSingleFileLikeResource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, resource: Extract<InternalDatasetResource, {
13
80
  kind: "file" | "text";
14
81
  }>, targetDatasetId: string): Promise<string>;
15
82
  export declare function materializeDerivedDataset<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, targetDatasetId: string): Promise<string>;
16
- //# sourceMappingURL=materialize.d.ts.map
83
+ export {};