@ekairos/dataset 1.22.59-beta.development.0 → 1.22.60-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,26 @@ function resolveScriptPath(scriptName) {
29
29
  }
30
30
  const preparedSandboxIds = new Set();
31
31
  const sandboxSetupPromises = new Map();
32
+ function sanitizePreviewText(value) {
33
+ return String(value ?? "")
34
+ .replace(/\u0000/g, "")
35
+ .replace(/[\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "");
36
+ }
37
+ function getPreviewKind(extension) {
38
+ const normalized = extension.toLowerCase();
39
+ if (normalized === ".xlsx" || normalized === ".xls")
40
+ return "excel";
41
+ if (normalized === ".csv" ||
42
+ normalized === ".tsv" ||
43
+ normalized === ".txt" ||
44
+ normalized === ".log" ||
45
+ normalized === ".json" ||
46
+ normalized === ".jsonl" ||
47
+ normalized === ".md") {
48
+ return "text";
49
+ }
50
+ return null;
51
+ }
32
52
  function validateScriptResult(result, context) {
33
53
  if (!result.stderr) {
34
54
  return;
@@ -103,13 +123,13 @@ export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, d
103
123
  await ensurePreviewScriptsAvailable(runtime, sandboxId);
104
124
  const metadataResult = await runScript(runtime, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
105
125
  context.metadata = metadataResult;
106
- let isExcel = false;
126
+ let previewKind = null;
107
127
  if (metadataResult.stdout) {
108
128
  try {
109
129
  const metadataJson = JSON.parse(metadataResult.stdout);
110
130
  context.totalRows = metadataJson.row_count_estimate || 0;
111
131
  const extension = metadataJson.extension || "";
112
- isExcel = extension === ".xlsx" || extension === ".xls";
132
+ previewKind = getPreviewKind(extension);
113
133
  }
114
134
  catch {
115
135
  console.warn(`[Dataset ${datasetId}] Failed to parse metadata JSON`);
@@ -122,9 +142,13 @@ export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, d
122
142
  console.log(`[Dataset ${datasetId}] No rows detected, skipping preview`);
123
143
  return context;
124
144
  }
125
- const headScript = isExcel ? "preview_head_excel.py" : "preview_head_csv.py";
126
- const tailScript = isExcel ? "preview_tail_excel.py" : "preview_tail_csv.py";
127
- const midScript = isExcel ? "preview_mid_excel.py" : "preview_mid_csv.py";
145
+ if (!previewKind) {
146
+ console.log(`[Dataset ${datasetId}] Binary or unsupported preview format, keeping metadata only`);
147
+ return context;
148
+ }
149
+ const headScript = previewKind === "excel" ? "preview_head_excel.py" : "preview_head_csv.py";
150
+ const tailScript = previewKind === "excel" ? "preview_tail_excel.py" : "preview_tail_csv.py";
151
+ const midScript = previewKind === "excel" ? "preview_mid_excel.py" : "preview_mid_csv.py";
128
152
  if (totalRows <= headLines) {
129
153
  console.log(`[Dataset ${datasetId}] File has ${totalRows} rows, reading all with head only`);
130
154
  const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
@@ -184,8 +208,8 @@ async function runScript(runtime, sandboxId, scriptName, args, description) {
184
208
  description,
185
209
  script: scriptContent,
186
210
  command,
187
- stdout: result.stdout || "",
188
- stderr: result.stderr || "",
211
+ stdout: sanitizePreviewText(result.stdout),
212
+ stderr: sanitizePreviewText(result.stderr),
189
213
  };
190
214
  }
191
215
  catch (error) {
@@ -194,7 +218,7 @@ async function runScript(runtime, sandboxId, scriptName, args, description) {
194
218
  script: scriptContent,
195
219
  command,
196
220
  stdout: "",
197
- stderr: error instanceof Error ? error.message : String(error),
221
+ stderr: sanitizePreviewText(error instanceof Error ? error.message : String(error)),
198
222
  };
199
223
  }
200
224
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.59-beta.development.0",
3
+ "version": "1.22.60-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.59-beta.development.0",
69
- "@ekairos/events": "^1.22.59-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.59-beta.development.0",
68
+ "@ekairos/domain": "^1.22.60-beta.development.0",
69
+ "@ekairos/events": "^1.22.60-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.60-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",