@ekairos/dataset 1.22.59-beta.development.0 → 1.22.61-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/file/filepreview.js +35 -18
- package/package.json +4 -4
package/dist/file/filepreview.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { readFileSync } from "node:fs";
|
|
2
|
-
import { createRequire } from "node:module";
|
|
3
2
|
import { dirname, join } from "node:path";
|
|
4
3
|
import { fileURLToPath } from "node:url";
|
|
5
4
|
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
|
|
@@ -16,19 +15,33 @@ const PYTHON_SCRIPT_FILES = [
|
|
|
16
15
|
"preview_tail_csv.py",
|
|
17
16
|
"preview_tail_excel.py",
|
|
18
17
|
];
|
|
19
|
-
const require = createRequire(import.meta.url);
|
|
20
18
|
function resolveScriptPath(scriptName) {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
catch {
|
|
25
|
-
// Prefer local scripts in src/ (tests/dev), and after build the scripts are copied to dist/
|
|
26
|
-
// at the same relative path, so this works in both environments.
|
|
27
|
-
return join(dirname(fileURLToPath(import.meta.url)), "scripts", scriptName);
|
|
28
|
-
}
|
|
19
|
+
// In src and dist the scripts live beside this module. Avoid package-resolution here:
|
|
20
|
+
// Turbopack treats package-resolved Python script paths as module edges.
|
|
21
|
+
return join(dirname(fileURLToPath(import.meta.url)), "scripts", scriptName);
|
|
29
22
|
}
|
|
30
23
|
const preparedSandboxIds = new Set();
|
|
31
24
|
const sandboxSetupPromises = new Map();
|
|
25
|
+
function sanitizePreviewText(value) {
|
|
26
|
+
return String(value ?? "")
|
|
27
|
+
.replace(/\u0000/g, "")
|
|
28
|
+
.replace(/[\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "");
|
|
29
|
+
}
|
|
30
|
+
function getPreviewKind(extension) {
|
|
31
|
+
const normalized = extension.toLowerCase();
|
|
32
|
+
if (normalized === ".xlsx" || normalized === ".xls")
|
|
33
|
+
return "excel";
|
|
34
|
+
if (normalized === ".csv" ||
|
|
35
|
+
normalized === ".tsv" ||
|
|
36
|
+
normalized === ".txt" ||
|
|
37
|
+
normalized === ".log" ||
|
|
38
|
+
normalized === ".json" ||
|
|
39
|
+
normalized === ".jsonl" ||
|
|
40
|
+
normalized === ".md") {
|
|
41
|
+
return "text";
|
|
42
|
+
}
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
32
45
|
function validateScriptResult(result, context) {
|
|
33
46
|
if (!result.stderr) {
|
|
34
47
|
return;
|
|
@@ -103,13 +116,13 @@ export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, d
|
|
|
103
116
|
await ensurePreviewScriptsAvailable(runtime, sandboxId);
|
|
104
117
|
const metadataResult = await runScript(runtime, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
|
|
105
118
|
context.metadata = metadataResult;
|
|
106
|
-
let
|
|
119
|
+
let previewKind = null;
|
|
107
120
|
if (metadataResult.stdout) {
|
|
108
121
|
try {
|
|
109
122
|
const metadataJson = JSON.parse(metadataResult.stdout);
|
|
110
123
|
context.totalRows = metadataJson.row_count_estimate || 0;
|
|
111
124
|
const extension = metadataJson.extension || "";
|
|
112
|
-
|
|
125
|
+
previewKind = getPreviewKind(extension);
|
|
113
126
|
}
|
|
114
127
|
catch {
|
|
115
128
|
console.warn(`[Dataset ${datasetId}] Failed to parse metadata JSON`);
|
|
@@ -122,9 +135,13 @@ export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, d
|
|
|
122
135
|
console.log(`[Dataset ${datasetId}] No rows detected, skipping preview`);
|
|
123
136
|
return context;
|
|
124
137
|
}
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
138
|
+
if (!previewKind) {
|
|
139
|
+
console.log(`[Dataset ${datasetId}] Binary or unsupported preview format, keeping metadata only`);
|
|
140
|
+
return context;
|
|
141
|
+
}
|
|
142
|
+
const headScript = previewKind === "excel" ? "preview_head_excel.py" : "preview_head_csv.py";
|
|
143
|
+
const tailScript = previewKind === "excel" ? "preview_tail_excel.py" : "preview_tail_csv.py";
|
|
144
|
+
const midScript = previewKind === "excel" ? "preview_mid_excel.py" : "preview_mid_csv.py";
|
|
128
145
|
if (totalRows <= headLines) {
|
|
129
146
|
console.log(`[Dataset ${datasetId}] File has ${totalRows} rows, reading all with head only`);
|
|
130
147
|
const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
|
|
@@ -184,8 +201,8 @@ async function runScript(runtime, sandboxId, scriptName, args, description) {
|
|
|
184
201
|
description,
|
|
185
202
|
script: scriptContent,
|
|
186
203
|
command,
|
|
187
|
-
stdout: result.stdout
|
|
188
|
-
stderr: result.stderr
|
|
204
|
+
stdout: sanitizePreviewText(result.stdout),
|
|
205
|
+
stderr: sanitizePreviewText(result.stderr),
|
|
189
206
|
};
|
|
190
207
|
}
|
|
191
208
|
catch (error) {
|
|
@@ -194,7 +211,7 @@ async function runScript(runtime, sandboxId, scriptName, args, description) {
|
|
|
194
211
|
script: scriptContent,
|
|
195
212
|
command,
|
|
196
213
|
stdout: "",
|
|
197
|
-
stderr: error instanceof Error ? error.message : String(error),
|
|
214
|
+
stderr: sanitizePreviewText(error instanceof Error ? error.message : String(error)),
|
|
198
215
|
};
|
|
199
216
|
}
|
|
200
217
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.61-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.61-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.61-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.61-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|