@ekairos/dataset 1.22.79-beta.development.0 → 1.22.81-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,10 +26,14 @@ export type FileParseContextParams = {
26
26
  datasetId?: string;
27
27
  model?: string;
28
28
  reactor?: ContextReactor<any, any>;
29
+ sandboxState?: SandboxState;
30
+ filePreview?: FilePreviewContext;
31
+ schema?: any | null;
29
32
  };
30
33
  export type FileParseRunOptions = {
31
34
  prompt?: string;
32
35
  durable?: boolean;
36
+ initialContent?: Record<string, any>;
33
37
  };
34
38
  export type FileParseContextBuilder<Env extends {
35
39
  orgId: string;
@@ -5,7 +5,6 @@ interface PreviewOptions {
5
5
  tailLines?: number;
6
6
  midLines?: number;
7
7
  }
8
- export declare function resolveFilePreviewScriptPath(scriptName: string): string;
9
8
  export declare function getEmbeddedFilePreviewScriptBase64(scriptName: string): string;
10
- export declare function ensurePreviewScriptsAvailable(runtime: any, sandboxId: string): Promise<void>;
9
+ export declare function ensurePreviewScriptsAvailable(_runtime: any, _sandboxId: string): Promise<void>;
11
10
  export declare function generateFilePreview(runtime: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
@@ -1,40 +1,8 @@
1
- import { existsSync, readFileSync } from "node:fs";
2
- import { dirname, join } from "node:path";
3
- import { fileURLToPath } from "node:url";
4
- import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
1
+ import { runDatasetSandboxCommandStep } from "../sandbox/steps.js";
5
2
  import { PYTHON_SCRIPT_BASE64_BY_NAME } from "./scripts.generated.js";
6
3
  const DEFAULT_HEAD_LINES = 50;
7
4
  const DEFAULT_TAIL_LINES = 20;
8
5
  const DEFAULT_MID_LINES = 20;
9
- const SANDBOX_SCRIPT_DIRECTORY = "/tmp/ekairos/dataset/file/scripts";
10
- const PYTHON_SCRIPT_FILES = [
11
- "file_metadata.py",
12
- "preview_head_csv.py",
13
- "preview_head_excel.py",
14
- "preview_mid_csv.py",
15
- "preview_mid_excel.py",
16
- "preview_tail_csv.py",
17
- "preview_tail_excel.py",
18
- ];
19
- export function resolveFilePreviewScriptPath(scriptName) {
20
- const currentDir = dirname(fileURLToPath(import.meta.url));
21
- const taskRoot = String(process.env.LAMBDA_TASK_ROOT ?? "").trim();
22
- const candidates = [
23
- join(currentDir, "scripts", scriptName),
24
- join(process.cwd(), "node_modules", "@ekairos", "dataset", "dist", "file", "scripts", scriptName),
25
- taskRoot
26
- ? join(taskRoot, "node_modules", "@ekairos", "dataset", "dist", "file", "scripts", scriptName)
27
- : "",
28
- join(process.cwd(), "packages", "dataset", "dist", "file", "scripts", scriptName),
29
- join(process.cwd(), "packages", "dataset", "src", "file", "scripts", scriptName),
30
- ].filter(Boolean);
31
- for (const candidate of candidates) {
32
- if (existsSync(candidate)) {
33
- return candidate;
34
- }
35
- }
36
- throw new Error(`dataset_preview_script_not_found:${scriptName}; searched=${candidates.join(",")}`);
37
- }
38
6
  export function getEmbeddedFilePreviewScriptBase64(scriptName) {
39
7
  const embedded = PYTHON_SCRIPT_BASE64_BY_NAME[scriptName];
40
8
  if (!embedded) {
@@ -42,31 +10,9 @@ export function getEmbeddedFilePreviewScriptBase64(scriptName) {
42
10
  }
43
11
  return embedded;
44
12
  }
45
- function readFilePreviewScriptBase64(scriptName) {
46
- try {
47
- const scriptPath = resolveFilePreviewScriptPath(scriptName);
48
- return Buffer.from(readFileSync(scriptPath)).toString("base64");
49
- }
50
- catch (error) {
51
- try {
52
- return getEmbeddedFilePreviewScriptBase64(scriptName);
53
- }
54
- catch {
55
- throw error;
56
- }
57
- }
58
- }
59
13
  function readFilePreviewScriptText(scriptName) {
60
- try {
61
- const scriptPath = resolveFilePreviewScriptPath(scriptName);
62
- return readFileSync(scriptPath, "utf-8");
63
- }
64
- catch {
65
- return Buffer.from(getEmbeddedFilePreviewScriptBase64(scriptName), "base64").toString("utf-8");
66
- }
14
+ return Buffer.from(getEmbeddedFilePreviewScriptBase64(scriptName), "base64").toString("utf-8");
67
15
  }
68
- const preparedSandboxIds = new Set();
69
- const sandboxSetupPromises = new Map();
70
16
  function sanitizePreviewText(value) {
71
17
  return String(value ?? "")
72
18
  .replace(/\u0000/g, "")
@@ -99,65 +45,16 @@ function validateScriptResult(result, context) {
99
45
  throw new Error(`${context} failed: ${stderr.substring(0, 500)}`);
100
46
  }
101
47
  }
102
- export async function ensurePreviewScriptsAvailable(runtime, sandboxId) {
103
- if (preparedSandboxIds.has(sandboxId)) {
104
- return;
105
- }
106
- const inFlight = sandboxSetupPromises.get(sandboxId);
107
- if (inFlight) {
108
- await inFlight;
109
- return;
110
- }
111
- const setupPromise = (async () => {
112
- try {
113
- await runDatasetSandboxCommandStep({
114
- runtime,
115
- sandboxId,
116
- cmd: "mkdir",
117
- args: ["-p", SANDBOX_SCRIPT_DIRECTORY],
118
- });
119
- }
120
- catch (error) {
121
- console.warn("[Dataset Scripts] Failed to create sandbox scripts directory", error);
122
- }
123
- const filesToWrite = [];
124
- for (const scriptName of PYTHON_SCRIPT_FILES) {
125
- try {
126
- filesToWrite.push({
127
- path: `${SANDBOX_SCRIPT_DIRECTORY}/${scriptName}`,
128
- contentBase64: readFilePreviewScriptBase64(scriptName),
129
- });
130
- }
131
- catch (error) {
132
- console.error(`[Dataset Scripts] Failed to read script ${scriptName}`, error);
133
- throw error;
134
- }
135
- }
136
- if (filesToWrite.length > 0) {
137
- await writeDatasetSandboxFilesStep({
138
- runtime,
139
- sandboxId,
140
- files: filesToWrite,
141
- });
142
- }
143
- })();
144
- sandboxSetupPromises.set(sandboxId, setupPromise);
145
- try {
146
- await setupPromise;
147
- preparedSandboxIds.add(sandboxId);
148
- }
149
- catch (error) {
150
- sandboxSetupPromises.delete(sandboxId);
151
- throw error;
152
- }
48
+ export async function ensurePreviewScriptsAvailable(_runtime, _sandboxId) {
49
+ return;
153
50
  }
154
51
  export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId, options = {}) {
155
52
  const context = {
156
53
  totalRows: 0,
157
54
  };
158
55
  try {
159
- await ensurePreviewScriptsAvailable(runtime, sandboxId);
160
56
  const metadataResult = await runScript(runtime, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
57
+ validateScriptResult(metadataResult, `preview_metadata for ${datasetId}`);
161
58
  context.metadata = metadataResult;
162
59
  let previewKind = null;
163
60
  if (metadataResult.stdout) {
@@ -219,25 +116,19 @@ export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, d
219
116
  }
220
117
  catch (error) {
221
118
  console.error(`[Dataset ${datasetId}] Error generating file preview:`, error);
119
+ throw error;
222
120
  }
223
121
  return context;
224
122
  }
225
123
  async function runScript(runtime, sandboxId, scriptName, args, description) {
226
- const scriptPath = `${SANDBOX_SCRIPT_DIRECTORY}/${scriptName}`;
227
- const command = `python ${scriptPath} ${args.join(" ")}`;
228
- let scriptContent = "";
229
- try {
230
- scriptContent = readFilePreviewScriptText(scriptName);
231
- }
232
- catch (error) {
233
- console.warn(`Failed to read script ${scriptName}:`, error);
234
- }
124
+ const scriptContent = readFilePreviewScriptText(scriptName);
125
+ const command = `python -c <${scriptName}> ${args.join(" ")}`;
235
126
  try {
236
127
  const result = await runDatasetSandboxCommandStep({
237
128
  runtime,
238
129
  sandboxId,
239
130
  cmd: "python",
240
- args: [scriptPath, ...args],
131
+ args: ["-c", scriptContent, ...args],
241
132
  });
242
133
  return {
243
134
  description,
@@ -89,7 +89,8 @@ function buildErrorsSection(errors) {
89
89
  return null;
90
90
  }
91
91
  let xml = create()
92
- .ele("PreviousErrors");
92
+ .ele("PreviousErrors")
93
+ .ele("Instruction").txt("Treat these as repair feedback from the previous validation attempt. Rewrite output.jsonl from the schema contract; do not patch source column names into schema keys piecemeal.").up();
93
94
  for (const error of errors) {
94
95
  xml = xml.ele("Error").txt(error).up();
95
96
  }
@@ -114,16 +115,110 @@ function buildContextSection(context) {
114
115
  xml = xml.up();
115
116
  return xml.end({ prettyPrint: true, headless: true });
116
117
  }
118
+ function asRecord(value) {
119
+ return value && typeof value === "object" && !Array.isArray(value)
120
+ ? value
121
+ : null;
122
+ }
123
+ function getSchemaObject(context) {
124
+ return asRecord(context.schema?.schema);
125
+ }
126
+ function joinSchemaPath(basePath, key) {
127
+ return basePath === "$" ? `$.${key}` : `${basePath}.${key}`;
128
+ }
129
+ function collectSchemaContract(schema, path = "$", contract = {
130
+ requiredPaths: [],
131
+ propertyPaths: [],
132
+ enumConstraints: [],
133
+ closedObjectPaths: [],
134
+ }) {
135
+ const record = asRecord(schema);
136
+ if (!record) {
137
+ return contract;
138
+ }
139
+ if (Array.isArray(record.enum)) {
140
+ contract.enumConstraints.push({
141
+ path,
142
+ values: record.enum.map((value) => JSON.stringify(value)),
143
+ });
144
+ }
145
+ const properties = asRecord(record.properties);
146
+ if (properties) {
147
+ if (record.additionalProperties === false) {
148
+ contract.closedObjectPaths.push(path);
149
+ }
150
+ const required = Array.isArray(record.required)
151
+ ? record.required.filter((value) => typeof value === "string")
152
+ : [];
153
+ for (const key of required) {
154
+ contract.requiredPaths.push(joinSchemaPath(path, key));
155
+ }
156
+ for (const [key, childSchema] of Object.entries(properties)) {
157
+ const childPath = joinSchemaPath(path, key);
158
+ contract.propertyPaths.push(childPath);
159
+ collectSchemaContract(childSchema, childPath, contract);
160
+ }
161
+ }
162
+ if (record.items) {
163
+ collectSchemaContract(record.items, `${path}[]`, contract);
164
+ }
165
+ for (const keyword of ["oneOf", "anyOf", "allOf"]) {
166
+ if (Array.isArray(record[keyword])) {
167
+ for (const childSchema of record[keyword]) {
168
+ collectSchemaContract(childSchema, path, contract);
169
+ }
170
+ }
171
+ }
172
+ return contract;
173
+ }
174
+ function appendLimitedList(xml, elementName, itemName, values, maxItems) {
175
+ let node = xml.ele(elementName);
176
+ for (const value of values.slice(0, maxItems)) {
177
+ node = node.ele(itemName).txt(value).up();
178
+ }
179
+ if (values.length > maxItems) {
180
+ node = node.ele("Truncated").txt(String(values.length - maxItems)).up();
181
+ }
182
+ return node.up();
183
+ }
117
184
  function buildSchemaSection(context) {
118
- if (!context.schema) {
185
+ const schema = getSchemaObject(context);
186
+ if (!context.schema || !schema) {
119
187
  return "";
120
188
  }
189
+ const contract = collectSchemaContract(schema);
121
190
  let xml = create()
122
191
  .com("Schema section: This defines the structure of ONE RECORD (row). Each line in the JSONL output must conform to this schema.")
123
192
  .ele("Schema")
124
193
  .ele("Title").txt(context.schema.title || "").up()
125
- .ele("Description").txt(context.schema.description || "").up()
126
- .ele("JsonSchema").txt(JSON.stringify(context.schema.schema, null, 2)).up()
194
+ .ele("Description").txt(context.schema.description || "").up();
195
+ xml = xml
196
+ .ele("SchemaContract")
197
+ .ele("Purpose").txt("Compact output contract derived from JSON Schema. Use this before writing output.jsonl.").up()
198
+ .ele("Rule").txt("Use only schema property keys in data objects. Source headers are input labels, not output keys.").up()
199
+ .ele("Rule").txt("Required paths are required everywhere, including nested objects and array items.").up()
200
+ .ele("Rule").txt("Enum fields must use exactly one of the listed literal values. Normalize source labels to the closest valid enum literal; never emit a value outside the enum.").up();
201
+ xml = appendLimitedList(xml, "RequiredPaths", "Path", contract.requiredPaths, 120);
202
+ xml = appendLimitedList(xml, "PropertyPaths", "Path", contract.propertyPaths, 160);
203
+ let enumsXml = xml.ele("EnumConstraints");
204
+ for (const constraint of contract.enumConstraints.slice(0, 80)) {
205
+ let enumXml = enumsXml.ele("Enum", { path: constraint.path });
206
+ for (const value of constraint.values.slice(0, 80)) {
207
+ enumXml = enumXml.ele("Value").txt(value).up();
208
+ }
209
+ if (constraint.values.length > 80) {
210
+ enumXml = enumXml.ele("Truncated").txt(String(constraint.values.length - 80)).up();
211
+ }
212
+ enumsXml = enumXml.up();
213
+ }
214
+ if (contract.enumConstraints.length > 80) {
215
+ enumsXml = enumsXml.ele("Truncated").txt(String(contract.enumConstraints.length - 80)).up();
216
+ }
217
+ xml = enumsXml.up();
218
+ xml = appendLimitedList(xml, "ClosedObjectPaths", "Path", contract.closedObjectPaths, 80);
219
+ xml = xml
220
+ .up()
221
+ .ele("JsonSchema").txt(JSON.stringify(schema, null, 2)).up()
127
222
  .up();
128
223
  return xml.end({ prettyPrint: true, headless: true });
129
224
  }
@@ -148,6 +243,9 @@ function buildInstructions(context) {
148
243
  .ele("Requirements")
149
244
  .ele("Requirement").txt("Every output row must conform exactly to the provided schema").up()
150
245
  .ele("Requirement").txt("Every data object MUST use the exact property names from the provided JSON Schema required/properties keys").up()
246
+ .ele("Requirement").txt("Build a schema-first mapping from source columns to schema fields before writing output.jsonl. Do not use raw source headers as JSON keys unless they are exactly schema keys").up()
247
+ .ele("Requirement").txt("For nested required fields, populate the required child keys inside each nested object or array item; top-level validity is not enough").up()
248
+ .ele("Requirement").txt("For enum fields, emit exactly one allowed enum literal from SchemaContract; normalize labels or abbreviations into allowed literals").up()
151
249
  .ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the source language").up()
152
250
  .ele("Requirement").txt("Do not call generateSchema when a schema is already provided").up()
153
251
  .up()
@@ -173,6 +271,7 @@ function buildInstructions(context) {
173
271
  .ele("Requirement").txt("Parse ALL data rows/records from the file (exclude header sections and metadata)").up()
174
272
  .ele("Requirement").txt("Output JSONL format: each line is {\"type\": \"row\", \"data\": {...record...}}").up()
175
273
  .ele("Requirement").txt("When a schema is provided, each data object must contain the exact required schema keys and must not use translated or synonymous keys").up()
274
+ .ele("Requirement").txt("When validation returns zero valid rows, treat the previous output as structurally wrong and rewrite output.jsonl from the SchemaContract, not by applying small patches").up()
176
275
  .ele("Requirement").txt("Extract ONLY data records; skip any header lines, summary sections, or file metadata").up()
177
276
  .ele("Requirement").txt(`Save output to: ${outputPath}`).up()
178
277
  .ele("Requirement").txt("Use descriptive scriptName in snake_case (e.g., 'parse_csv_to_jsonl')").up()
@@ -207,6 +306,11 @@ export function buildFileDatasetPrompt(context) {
207
306
  sections.push("");
208
307
  sections.push(buildContextSection(context));
209
308
  sections.push("");
309
+ const schemaSection = buildSchemaSection(context);
310
+ if (schemaSection) {
311
+ sections.push(schemaSection);
312
+ sections.push("");
313
+ }
210
314
  sections.push(buildInstructions(context));
211
315
  return sections.join("\n");
212
316
  }
@@ -1,31 +1,18 @@
1
- import { getDatasetWorkstation } from "../datasetFiles.js";
2
- import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
1
+ import { runDatasetSandboxCommandStep } from "../sandbox/steps.js";
3
2
  const DEFAULT_HEAD_LINES = 50;
4
3
  async function runPythonSnippet(runtime, sandboxId, datasetId, scriptName, code, args, description) {
5
- const workstation = getDatasetWorkstation(datasetId);
6
- const scriptPath = `${workstation}/${scriptName}.py`;
7
- await writeDatasetSandboxFilesStep({
8
- runtime,
9
- sandboxId,
10
- files: [
11
- {
12
- path: scriptPath,
13
- contentBase64: Buffer.from(code, "utf-8").toString("base64"),
14
- },
15
- ],
16
- });
17
4
  const result = await runDatasetSandboxCommandStep({
18
5
  runtime,
19
6
  sandboxId,
20
7
  cmd: "python",
21
- args: [scriptPath, ...args],
8
+ args: ["-c", code, ...args],
22
9
  });
23
10
  const stdout = result.stdout || "";
24
11
  const stderr = result.stderr || "";
25
12
  return {
26
13
  description,
27
14
  script: code,
28
- command: `python ${scriptPath} ${args.join(" ")}`,
15
+ command: `python -c <${scriptName}.py> ${args.join(" ")}`,
29
16
  stdout,
30
17
  stderr,
31
18
  };
@@ -1,5 +1,5 @@
1
1
  import { type ContextReactor } from "@ekairos/events";
2
- import type { TransformDatasetRunOptions } from "./transform-dataset.types.js";
2
+ import type { TransformDatasetRunOptions, TransformSandboxState, TransformSourcePreviewContext } from "./transform-dataset.types.js";
3
3
  export type { TransformDatasetAgentParams, TransformDatasetContext, TransformDatasetResult, TransformDatasetRunOptions, TransformPromptContext, TransformSandboxState, } from "./transform-dataset.types.js";
4
4
  export declare function createTransformDatasetContext<Env extends {
5
5
  orgId: string;
@@ -11,6 +11,11 @@ export declare function createTransformDatasetContext<Env extends {
11
11
  model?: string;
12
12
  sandboxId?: string;
13
13
  reactor?: ContextReactor<any, any>;
14
+ sandboxState?: TransformSandboxState;
15
+ sourcePreviews?: Array<{
16
+ datasetId: string;
17
+ preview: TransformSourcePreviewContext;
18
+ }>;
14
19
  }): {
15
20
  datasetId: string;
16
21
  transform(runtime: {
@@ -2,6 +2,7 @@ import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/even
2
2
  import { createClearDatasetTool } from "../clearDataset.tool.js";
3
3
  import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
4
4
  import { datasetUpdateSchemaStep } from "../dataset/steps.js";
5
+ import { getDatasetOutputPath } from "../datasetFiles.js";
5
6
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
6
7
  import { buildTransformDatasetPromptStep, ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "./transform-dataset.steps.js";
7
8
  import { createDatasetId } from "../id.js";
@@ -20,7 +21,8 @@ function createTransformDatasetContextDefinition(params) {
20
21
  let contextBuilder = createContext("dataset.transform")
21
22
  .context(async (stored, _env, runtime) => {
22
23
  const previous = stored?.content ?? {};
23
- const sandboxState = previous?.sandboxState ?? { initialized: false, sourcePaths: [] };
24
+ const sandboxState = previous?.sandboxState ??
25
+ params.sandboxState ?? { initialized: false, sourcePaths: [] };
24
26
  const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
25
27
  const sourceDatasetIds = Array.isArray(previous?.sourceDatasetIds)
26
28
  ? previous.sourceDatasetIds
@@ -42,19 +44,28 @@ function createTransformDatasetContextDefinition(params) {
42
44
  if (!sandboxId) {
43
45
  throw new Error("dataset_sandbox_required");
44
46
  }
45
- const initialized = await ensureTransformSourcesInSandboxStep({
46
- runtime,
47
- sandboxId,
48
- datasetId,
49
- sourceDatasetIds,
50
- state: sandboxState,
51
- });
52
- const sourcePreviews = await generateTransformSourcePreviewsStep({
53
- runtime,
54
- sandboxId,
55
- datasetId,
56
- sourcePaths: initialized.sourcePaths,
57
- });
47
+ const initialized = sandboxState.initialized && Array.isArray(sandboxState.sourcePaths)
48
+ ? {
49
+ sourcePaths: sandboxState.sourcePaths,
50
+ outputPath: previous?.sandboxConfig?.outputPath ?? getDatasetOutputPath(datasetId),
51
+ state: sandboxState,
52
+ }
53
+ : await ensureTransformSourcesInSandboxStep({
54
+ runtime,
55
+ sandboxId,
56
+ datasetId,
57
+ sourceDatasetIds,
58
+ state: sandboxState,
59
+ });
60
+ let sourcePreviews = previous?.sourcePreviews ?? params.sourcePreviews ?? undefined;
61
+ if (!sourcePreviews) {
62
+ sourcePreviews = await generateTransformSourcePreviewsStep({
63
+ runtime,
64
+ sandboxId,
65
+ datasetId,
66
+ sourcePaths: initialized.sourcePaths,
67
+ });
68
+ }
58
69
  await datasetUpdateSchemaStep({
59
70
  runtime,
60
71
  datasetId,
@@ -155,6 +166,8 @@ export function createTransformDatasetContext(params) {
155
166
  model: params.model,
156
167
  sandboxId: params.sandboxId,
157
168
  reactor: params.reactor,
169
+ sandboxState: params.sandboxState,
170
+ sourcePreviews: params.sourcePreviews,
158
171
  });
159
172
  return {
160
173
  datasetId,
@@ -189,12 +202,14 @@ export function createTransformDatasetContext(params) {
189
202
  maxModelSteps: 5,
190
203
  },
191
204
  __initialContent: {
205
+ ...(options.initialContent ?? {}),
192
206
  datasetId,
193
207
  sourceDatasetIds: params.sourceDatasetIds,
194
208
  outputSchema: params.outputSchema,
195
209
  instructions: params.instructions,
196
210
  sandboxId: params.sandboxId ?? "",
197
- sandboxState: { initialized: false, sourcePaths: [] },
211
+ sandboxState: params.sandboxState ?? { initialized: false, sourcePaths: [] },
212
+ sourcePreviews: params.sourcePreviews,
198
213
  },
199
214
  });
200
215
  await awaitContextRun(shell.run);
@@ -1,4 +1,4 @@
1
- import { getDatasetOutputPath, getDatasetWorkstation } from "../datasetFiles.js";
1
+ import { getDatasetOutputPath, getDatasetSourcesDir, getDatasetStandardDirs, } from "../datasetFiles.js";
2
2
  import { datasetReadOutputJsonlStep } from "../dataset/steps.js";
3
3
  import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
4
4
  import { generateSourcePreview } from "./filepreview.js";
@@ -12,16 +12,15 @@ export async function ensureTransformSourcesInSandboxStep(params) {
12
12
  state: params.state,
13
13
  };
14
14
  }
15
- const workstation = getDatasetWorkstation(params.datasetId);
16
15
  await runDatasetSandboxCommandStep({
17
16
  runtime: params.runtime,
18
17
  sandboxId: params.sandboxId,
19
18
  cmd: "mkdir",
20
- args: ["-p", workstation],
19
+ args: ["-p", ...getDatasetStandardDirs(params.datasetId)],
21
20
  });
22
21
  const sourcePaths = [];
23
22
  for (const sourceDatasetId of params.sourceDatasetIds) {
24
- const sourcePath = `${workstation}/source_${sourceDatasetId}.jsonl`;
23
+ const sourcePath = `${getDatasetSourcesDir(params.datasetId)}/source_${sourceDatasetId}.jsonl`;
25
24
  const source = await datasetReadOutputJsonlStep({
26
25
  runtime: params.runtime,
27
26
  datasetId: sourceDatasetId,
@@ -35,10 +35,16 @@ export type TransformDatasetAgentParams = {
35
35
  model?: string;
36
36
  sandboxId?: string;
37
37
  reactor?: ContextReactor<any, any>;
38
+ sandboxState?: TransformSandboxState;
39
+ sourcePreviews?: Array<{
40
+ datasetId: string;
41
+ preview: TransformSourcePreviewContext;
42
+ }>;
38
43
  };
39
44
  export type TransformDatasetRunOptions = {
40
45
  prompt?: string;
41
46
  durable?: boolean;
47
+ initialContent?: Record<string, any>;
42
48
  };
43
49
  export type TransformDatasetResult = {
44
50
  id: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.79-beta.development.0",
3
+ "version": "1.22.81-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.79-beta.development.0",
69
- "@ekairos/events": "^1.22.79-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.79-beta.development.0",
68
+ "@ekairos/domain": "^1.22.81-beta.development.0",
69
+ "@ekairos/events": "^1.22.81-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.81-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",