@ekairos/dataset 1.22.80-beta.development.0 → 1.22.82-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,10 @@
1
1
  export const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
2
- const DEFAULT_VERCEL_WORKDIR_BASE = "/vercel/sandbox/datasets";
3
- const DEFAULT_DAYTONA_WORKDIR_BASE = "/home/daytona/.ekairos/datasets";
4
- const DEFAULT_SPRITES_WORKDIR_BASE = "/workspace/.ekairos/datasets";
2
+ const DATASET_WORKDIR_BASE = "/tmp/ekairos/dataset";
5
3
  function trimTrailingSlash(value) {
6
4
  return value.endsWith("/") ? value.slice(0, -1) : value;
7
5
  }
8
6
  export function getDatasetWorkdirBase() {
9
- const explicit = String(process.env.DATASET_SANDBOX_WORKDIR_BASE ?? "").trim();
10
- if (explicit)
11
- return trimTrailingSlash(explicit);
12
- const provider = String(process.env.SANDBOX_PROVIDER ?? "").trim().toLowerCase();
13
- if (provider === "daytona")
14
- return DEFAULT_DAYTONA_WORKDIR_BASE;
15
- if (provider === "vercel")
16
- return DEFAULT_VERCEL_WORKDIR_BASE;
17
- if (provider === "sprites")
18
- return DEFAULT_SPRITES_WORKDIR_BASE;
19
- return DEFAULT_VERCEL_WORKDIR_BASE;
7
+ return trimTrailingSlash(DATASET_WORKDIR_BASE);
20
8
  }
21
9
  export function getDatasetWorkstation(datasetId) {
22
10
  return `${getDatasetWorkdirBase()}/${datasetId}`;
@@ -1,26 +1,14 @@
1
1
  import { getDatasetSourcesDir, getDatasetStandardDirs, getDatasetWorkstation, } from "../datasetFiles.js";
2
2
  import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
3
3
  import { buildFileDatasetPrompt } from "./prompts.js";
4
- import { generateFilePreview, ensurePreviewScriptsAvailable } from "./filepreview.js";
4
+ import { generateFilePreview } from "./filepreview.js";
5
5
  import { readInstantFileStep } from "./steps.js";
6
6
  export async function initializeFileParseSandboxStep(params) {
7
7
  "use step";
8
8
  if (params.state.initialized) {
9
9
  return { filePath: params.state.filePath, state: params.state };
10
10
  }
11
- console.log(`[FileParseContext ${params.datasetId}] Initializing sandbox...`);
12
- await ensurePreviewScriptsAvailable(params.runtime, params.sandboxId);
13
- console.log(`[FileParseContext ${params.datasetId}] Installing Python dependencies...`);
14
- const pipInstall = await runDatasetSandboxCommandStep({
15
- runtime: params.runtime,
16
- sandboxId: params.sandboxId,
17
- cmd: "python",
18
- args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
19
- });
20
- const installStderr = pipInstall.stderr;
21
- if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
22
- throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
23
- }
11
+ console.log(`[FileParseContext ${params.datasetId}] Preparing source file in sandbox...`);
24
12
  console.log(`[FileParseContext ${params.datasetId}] Fetching file from InstantDB...`);
25
13
  const file = await readInstantFileStep({ runtime: params.runtime, fileId: params.fileId });
26
14
  console.log(`[FileParseContext ${params.datasetId}] Creating dataset workstation...`);
@@ -5,7 +5,6 @@ interface PreviewOptions {
5
5
  tailLines?: number;
6
6
  midLines?: number;
7
7
  }
8
- export declare function resolveFilePreviewScriptPath(scriptName: string): string;
9
8
  export declare function getEmbeddedFilePreviewScriptBase64(scriptName: string): string;
10
- export declare function ensurePreviewScriptsAvailable(runtime: any, sandboxId: string): Promise<void>;
9
+ export declare function ensurePreviewScriptsAvailable(_runtime: any, _sandboxId: string): Promise<void>;
11
10
  export declare function generateFilePreview(runtime: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
@@ -1,40 +1,8 @@
1
- import { existsSync, readFileSync } from "node:fs";
2
- import { dirname, join } from "node:path";
3
- import { fileURLToPath } from "node:url";
4
- import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
1
+ import { runDatasetSandboxCommandStep } from "../sandbox/steps.js";
5
2
  import { PYTHON_SCRIPT_BASE64_BY_NAME } from "./scripts.generated.js";
6
3
  const DEFAULT_HEAD_LINES = 50;
7
4
  const DEFAULT_TAIL_LINES = 20;
8
5
  const DEFAULT_MID_LINES = 20;
9
- const SANDBOX_SCRIPT_DIRECTORY = "/tmp/ekairos/dataset/file/scripts";
10
- const PYTHON_SCRIPT_FILES = [
11
- "file_metadata.py",
12
- "preview_head_csv.py",
13
- "preview_head_excel.py",
14
- "preview_mid_csv.py",
15
- "preview_mid_excel.py",
16
- "preview_tail_csv.py",
17
- "preview_tail_excel.py",
18
- ];
19
- export function resolveFilePreviewScriptPath(scriptName) {
20
- const currentDir = dirname(fileURLToPath(import.meta.url));
21
- const taskRoot = String(process.env.LAMBDA_TASK_ROOT ?? "").trim();
22
- const candidates = [
23
- join(currentDir, "scripts", scriptName),
24
- join(process.cwd(), "node_modules", "@ekairos", "dataset", "dist", "file", "scripts", scriptName),
25
- taskRoot
26
- ? join(taskRoot, "node_modules", "@ekairos", "dataset", "dist", "file", "scripts", scriptName)
27
- : "",
28
- join(process.cwd(), "packages", "dataset", "dist", "file", "scripts", scriptName),
29
- join(process.cwd(), "packages", "dataset", "src", "file", "scripts", scriptName),
30
- ].filter(Boolean);
31
- for (const candidate of candidates) {
32
- if (existsSync(candidate)) {
33
- return candidate;
34
- }
35
- }
36
- throw new Error(`dataset_preview_script_not_found:${scriptName}; searched=${candidates.join(",")}`);
37
- }
38
6
  export function getEmbeddedFilePreviewScriptBase64(scriptName) {
39
7
  const embedded = PYTHON_SCRIPT_BASE64_BY_NAME[scriptName];
40
8
  if (!embedded) {
@@ -42,31 +10,9 @@ export function getEmbeddedFilePreviewScriptBase64(scriptName) {
42
10
  }
43
11
  return embedded;
44
12
  }
45
- function readFilePreviewScriptBase64(scriptName) {
46
- try {
47
- const scriptPath = resolveFilePreviewScriptPath(scriptName);
48
- return Buffer.from(readFileSync(scriptPath)).toString("base64");
49
- }
50
- catch (error) {
51
- try {
52
- return getEmbeddedFilePreviewScriptBase64(scriptName);
53
- }
54
- catch {
55
- throw error;
56
- }
57
- }
58
- }
59
13
  function readFilePreviewScriptText(scriptName) {
60
- try {
61
- const scriptPath = resolveFilePreviewScriptPath(scriptName);
62
- return readFileSync(scriptPath, "utf-8");
63
- }
64
- catch {
65
- return Buffer.from(getEmbeddedFilePreviewScriptBase64(scriptName), "base64").toString("utf-8");
66
- }
14
+ return Buffer.from(getEmbeddedFilePreviewScriptBase64(scriptName), "base64").toString("utf-8");
67
15
  }
68
- const preparedSandboxIds = new Set();
69
- const sandboxSetupPromises = new Map();
70
16
  function sanitizePreviewText(value) {
71
17
  return String(value ?? "")
72
18
  .replace(/\u0000/g, "")
@@ -99,65 +45,16 @@ function validateScriptResult(result, context) {
99
45
  throw new Error(`${context} failed: ${stderr.substring(0, 500)}`);
100
46
  }
101
47
  }
102
- export async function ensurePreviewScriptsAvailable(runtime, sandboxId) {
103
- if (preparedSandboxIds.has(sandboxId)) {
104
- return;
105
- }
106
- const inFlight = sandboxSetupPromises.get(sandboxId);
107
- if (inFlight) {
108
- await inFlight;
109
- return;
110
- }
111
- const setupPromise = (async () => {
112
- try {
113
- await runDatasetSandboxCommandStep({
114
- runtime,
115
- sandboxId,
116
- cmd: "mkdir",
117
- args: ["-p", SANDBOX_SCRIPT_DIRECTORY],
118
- });
119
- }
120
- catch (error) {
121
- console.warn("[Dataset Scripts] Failed to create sandbox scripts directory", error);
122
- }
123
- const filesToWrite = [];
124
- for (const scriptName of PYTHON_SCRIPT_FILES) {
125
- try {
126
- filesToWrite.push({
127
- path: `${SANDBOX_SCRIPT_DIRECTORY}/${scriptName}`,
128
- contentBase64: readFilePreviewScriptBase64(scriptName),
129
- });
130
- }
131
- catch (error) {
132
- console.error(`[Dataset Scripts] Failed to read script ${scriptName}`, error);
133
- throw error;
134
- }
135
- }
136
- if (filesToWrite.length > 0) {
137
- await writeDatasetSandboxFilesStep({
138
- runtime,
139
- sandboxId,
140
- files: filesToWrite,
141
- });
142
- }
143
- })();
144
- sandboxSetupPromises.set(sandboxId, setupPromise);
145
- try {
146
- await setupPromise;
147
- preparedSandboxIds.add(sandboxId);
148
- }
149
- catch (error) {
150
- sandboxSetupPromises.delete(sandboxId);
151
- throw error;
152
- }
48
+ export async function ensurePreviewScriptsAvailable(_runtime, _sandboxId) {
49
+ return;
153
50
  }
154
51
  export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId, options = {}) {
155
52
  const context = {
156
53
  totalRows: 0,
157
54
  };
158
55
  try {
159
- await ensurePreviewScriptsAvailable(runtime, sandboxId);
160
56
  const metadataResult = await runScript(runtime, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
57
+ validateScriptResult(metadataResult, `preview_metadata for ${datasetId}`);
161
58
  context.metadata = metadataResult;
162
59
  let previewKind = null;
163
60
  if (metadataResult.stdout) {
@@ -219,25 +116,19 @@ export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, d
219
116
  }
220
117
  catch (error) {
221
118
  console.error(`[Dataset ${datasetId}] Error generating file preview:`, error);
119
+ throw error;
222
120
  }
223
121
  return context;
224
122
  }
225
123
  async function runScript(runtime, sandboxId, scriptName, args, description) {
226
- const scriptPath = `${SANDBOX_SCRIPT_DIRECTORY}/${scriptName}`;
227
- const command = `python ${scriptPath} ${args.join(" ")}`;
228
- let scriptContent = "";
229
- try {
230
- scriptContent = readFilePreviewScriptText(scriptName);
231
- }
232
- catch (error) {
233
- console.warn(`Failed to read script ${scriptName}:`, error);
234
- }
124
+ const scriptContent = readFilePreviewScriptText(scriptName);
125
+ const command = `python -c <${scriptName}> ${args.join(" ")}`;
235
126
  try {
236
127
  const result = await runDatasetSandboxCommandStep({
237
128
  runtime,
238
129
  sandboxId,
239
130
  cmd: "python",
240
- args: [scriptPath, ...args],
131
+ args: ["-c", scriptContent, ...args],
241
132
  });
242
133
  return {
243
134
  description,
@@ -1,30 +1,18 @@
1
- import { getDatasetScriptsDir } from "../datasetFiles.js";
2
- import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
1
+ import { runDatasetSandboxCommandStep } from "../sandbox/steps.js";
3
2
  const DEFAULT_HEAD_LINES = 50;
4
3
  async function runPythonSnippet(runtime, sandboxId, datasetId, scriptName, code, args, description) {
5
- const scriptPath = `${getDatasetScriptsDir(datasetId)}/${scriptName}.py`;
6
- await writeDatasetSandboxFilesStep({
7
- runtime,
8
- sandboxId,
9
- files: [
10
- {
11
- path: scriptPath,
12
- contentBase64: Buffer.from(code, "utf-8").toString("base64"),
13
- },
14
- ],
15
- });
16
4
  const result = await runDatasetSandboxCommandStep({
17
5
  runtime,
18
6
  sandboxId,
19
7
  cmd: "python",
20
- args: [scriptPath, ...args],
8
+ args: ["-c", code, ...args],
21
9
  });
22
10
  const stdout = result.stdout || "";
23
11
  const stderr = result.stderr || "";
24
12
  return {
25
13
  description,
26
14
  script: code,
27
- command: `python ${scriptPath} ${args.join(" ")}`,
15
+ command: `python -c <${scriptName}.py> ${args.join(" ")}`,
28
16
  stdout,
29
17
  stderr,
30
18
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.80-beta.development.0",
3
+ "version": "1.22.82-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.80-beta.development.0",
69
- "@ekairos/events": "^1.22.80-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.80-beta.development.0",
68
+ "@ekairos/domain": "^1.22.82-beta.development.0",
69
+ "@ekairos/events": "^1.22.82-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.82-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",
@@ -80,10 +80,10 @@
80
80
  "@ekairos/openai-reactor": "workspace:*",
81
81
  "@ekairos/tsconfig": "workspace:*",
82
82
  "@types/node": "^24.5.0",
83
- "@workflow/serde": "5.0.0-beta.0",
84
- "@workflow/vitest": "5.0.0-beta.1",
83
+ "@workflow/serde": "5.0.0-beta.1",
84
+ "@workflow/vitest": "5.0.0-beta.5",
85
85
  "dotenv": "^17.2.3",
86
86
  "typescript": "^5.9.2",
87
- "workflow": "5.0.0-beta.1"
87
+ "workflow": "5.0.0-beta.5"
88
88
  }
89
89
  }