@ekairos/dataset 1.22.59-beta.development.0 → 1.22.61-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,4 @@
1
1
  import { readFileSync } from "node:fs";
2
- import { createRequire } from "node:module";
3
2
  import { dirname, join } from "node:path";
4
3
  import { fileURLToPath } from "node:url";
5
4
  import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
@@ -16,19 +15,33 @@ const PYTHON_SCRIPT_FILES = [
16
15
  "preview_tail_csv.py",
17
16
  "preview_tail_excel.py",
18
17
  ];
19
- const require = createRequire(import.meta.url);
20
18
  function resolveScriptPath(scriptName) {
21
- try {
22
- return require.resolve(`@ekairos/dataset/file/scripts/${scriptName}`);
23
- }
24
- catch {
25
- // Prefer local scripts in src/ (tests/dev), and after build the scripts are copied to dist/
26
- // at the same relative path, so this works in both environments.
27
- return join(dirname(fileURLToPath(import.meta.url)), "scripts", scriptName);
28
- }
19
+ // In src and dist the scripts live beside this module. Avoid package-resolution here:
20
+ // Turbopack treats package-resolved Python script paths as module edges.
21
+ return join(dirname(fileURLToPath(import.meta.url)), "scripts", scriptName);
29
22
  }
30
23
  const preparedSandboxIds = new Set();
31
24
  const sandboxSetupPromises = new Map();
25
+ function sanitizePreviewText(value) {
26
+ return String(value ?? "")
27
+ .replace(/\u0000/g, "")
28
+ .replace(/[\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "");
29
+ }
30
+ function getPreviewKind(extension) {
31
+ const normalized = extension.toLowerCase();
32
+ if (normalized === ".xlsx" || normalized === ".xls")
33
+ return "excel";
34
+ if (normalized === ".csv" ||
35
+ normalized === ".tsv" ||
36
+ normalized === ".txt" ||
37
+ normalized === ".log" ||
38
+ normalized === ".json" ||
39
+ normalized === ".jsonl" ||
40
+ normalized === ".md") {
41
+ return "text";
42
+ }
43
+ return null;
44
+ }
32
45
  function validateScriptResult(result, context) {
33
46
  if (!result.stderr) {
34
47
  return;
@@ -103,13 +116,13 @@ export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, d
103
116
  await ensurePreviewScriptsAvailable(runtime, sandboxId);
104
117
  const metadataResult = await runScript(runtime, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
105
118
  context.metadata = metadataResult;
106
- let isExcel = false;
119
+ let previewKind = null;
107
120
  if (metadataResult.stdout) {
108
121
  try {
109
122
  const metadataJson = JSON.parse(metadataResult.stdout);
110
123
  context.totalRows = metadataJson.row_count_estimate || 0;
111
124
  const extension = metadataJson.extension || "";
112
- isExcel = extension === ".xlsx" || extension === ".xls";
125
+ previewKind = getPreviewKind(extension);
113
126
  }
114
127
  catch {
115
128
  console.warn(`[Dataset ${datasetId}] Failed to parse metadata JSON`);
@@ -122,9 +135,13 @@ export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, d
122
135
  console.log(`[Dataset ${datasetId}] No rows detected, skipping preview`);
123
136
  return context;
124
137
  }
125
- const headScript = isExcel ? "preview_head_excel.py" : "preview_head_csv.py";
126
- const tailScript = isExcel ? "preview_tail_excel.py" : "preview_tail_csv.py";
127
- const midScript = isExcel ? "preview_mid_excel.py" : "preview_mid_csv.py";
138
+ if (!previewKind) {
139
+ console.log(`[Dataset ${datasetId}] Binary or unsupported preview format, keeping metadata only`);
140
+ return context;
141
+ }
142
+ const headScript = previewKind === "excel" ? "preview_head_excel.py" : "preview_head_csv.py";
143
+ const tailScript = previewKind === "excel" ? "preview_tail_excel.py" : "preview_tail_csv.py";
144
+ const midScript = previewKind === "excel" ? "preview_mid_excel.py" : "preview_mid_csv.py";
128
145
  if (totalRows <= headLines) {
129
146
  console.log(`[Dataset ${datasetId}] File has ${totalRows} rows, reading all with head only`);
130
147
  const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
@@ -184,8 +201,8 @@ async function runScript(runtime, sandboxId, scriptName, args, description) {
184
201
  description,
185
202
  script: scriptContent,
186
203
  command,
187
- stdout: result.stdout || "",
188
- stderr: result.stderr || "",
204
+ stdout: sanitizePreviewText(result.stdout),
205
+ stderr: sanitizePreviewText(result.stderr),
189
206
  };
190
207
  }
191
208
  catch (error) {
@@ -194,7 +211,7 @@ async function runScript(runtime, sandboxId, scriptName, args, description) {
194
211
  script: scriptContent,
195
212
  command,
196
213
  stdout: "",
197
- stderr: error instanceof Error ? error.message : String(error),
214
+ stderr: sanitizePreviewText(error instanceof Error ? error.message : String(error)),
198
215
  };
199
216
  }
200
217
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.59-beta.development.0",
3
+ "version": "1.22.61-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.59-beta.development.0",
69
- "@ekairos/events": "^1.22.59-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.59-beta.development.0",
68
+ "@ekairos/domain": "^1.22.61-beta.development.0",
69
+ "@ekairos/events": "^1.22.61-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.61-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",