@ekairos/dataset 1.22.39-beta.development.0 → 1.22.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +347 -0
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/context.d.ts +15 -0
- package/dist/builder/context.js +251 -0
- package/dist/builder/instructions.d.ts +5 -0
- package/dist/builder/instructions.js +40 -0
- package/dist/builder/materialize.d.ts +83 -0
- package/dist/builder/materialize.js +548 -0
- package/dist/builder/materializeQuery.d.ts +12 -0
- package/dist/builder/materializeQuery.js +31 -0
- package/dist/builder/persistence.d.ts +22 -0
- package/dist/builder/persistence.js +192 -0
- package/dist/builder/rows.d.ts +7 -0
- package/dist/builder/rows.js +56 -0
- package/dist/builder/schemaInference.d.ts +3 -0
- package/dist/builder/schemaInference.js +61 -0
- package/dist/builder/types.d.ts +144 -0
- package/dist/builder/types.js +1 -0
- package/dist/clearDataset.tool.d.ts +2 -3
- package/dist/clearDataset.tool.js +13 -17
- package/dist/completeDataset.steps.d.ts +117 -0
- package/dist/completeDataset.steps.js +537 -0
- package/dist/completeDataset.tool.d.ts +132 -7
- package/dist/completeDataset.tool.js +46 -192
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +79 -0
- package/dist/contextWorkspace.js +234 -0
- package/dist/dataset/steps.d.ts +39 -15
- package/dist/dataset/steps.js +96 -39
- package/dist/dataset.d.ts +3 -67
- package/dist/dataset.js +129 -521
- package/dist/datasetFiles.d.ts +5 -1
- package/dist/datasetFiles.js +29 -27
- package/dist/defineNotation.tool.d.ts +49 -0
- package/dist/defineNotation.tool.js +154 -0
- package/dist/domain.d.ts +1 -2
- package/dist/domain.js +1 -6
- package/dist/executeCommand.tool.d.ts +2 -30
- package/dist/executeCommand.tool.js +165 -39
- package/dist/file/file-dataset.agent.d.ts +19 -56
- package/dist/file/file-dataset.agent.js +181 -134
- package/dist/file/file-dataset.steps.d.ts +27 -0
- package/dist/file/file-dataset.steps.js +47 -0
- package/dist/file/file-dataset.types.d.ts +64 -0
- package/dist/file/file-dataset.types.js +1 -0
- package/dist/file/filepreview.d.ts +5 -35
- package/dist/file/filepreview.js +60 -107
- package/dist/file/filepreview.types.d.ts +31 -0
- package/dist/file/filepreview.types.js +1 -0
- package/dist/file/generateSchema.tool.d.ts +2 -3
- package/dist/file/generateSchema.tool.js +11 -15
- package/dist/file/index.d.ts +1 -2
- package/dist/file/index.js +1 -18
- package/dist/file/prompts.d.ts +2 -3
- package/dist/file/prompts.js +152 -32
- package/dist/file/scripts.generated.d.ts +1 -0
- package/dist/file/scripts.generated.js +11 -0
- package/dist/file/steps.d.ts +1 -2
- package/dist/file/steps.js +9 -7
- package/dist/id.d.ts +1 -0
- package/dist/id.js +10 -0
- package/dist/index.d.ts +9 -7
- package/dist/index.js +9 -23
- package/dist/materializeDataset.tool.d.ts +51 -31
- package/dist/materializeDataset.tool.js +81 -65
- package/dist/notation.d.ts +205 -0
- package/dist/notation.js +424 -0
- package/dist/query/index.d.ts +1 -2
- package/dist/query/index.js +1 -18
- package/dist/query/queryDomain.d.ts +3 -4
- package/dist/query/queryDomain.js +3 -40
- package/dist/query/queryDomain.step.d.ts +1 -1
- package/dist/query/queryDomain.step.js +24 -13
- package/dist/sandbox/steps.d.ts +23 -15
- package/dist/sandbox/steps.js +73 -76
- package/dist/sandbox.steps.d.ts +1 -2
- package/dist/sandbox.steps.js +1 -18
- package/dist/schema.d.ts +15 -13
- package/dist/schema.js +27 -37
- package/dist/service.d.ts +12 -5
- package/dist/service.js +88 -15
- package/dist/skill.d.ts +0 -1
- package/dist/skill.js +12 -17
- package/dist/transform/filepreview.d.ts +2 -3
- package/dist/transform/filepreview.js +9 -26
- package/dist/transform/index.d.ts +2 -3
- package/dist/transform/index.js +2 -8
- package/dist/transform/prompts.d.ts +1 -34
- package/dist/transform/prompts.js +66 -46
- package/dist/transform/transform-dataset.agent.d.ts +20 -45
- package/dist/transform/transform-dataset.agent.js +151 -91
- package/dist/transform/transform-dataset.steps.d.ts +30 -0
- package/dist/transform/transform-dataset.steps.js +61 -0
- package/dist/transform/transform-dataset.types.d.ts +95 -0
- package/dist/transform/transform-dataset.types.js +1 -0
- package/dist/transform/transformDataset.d.ts +3 -3
- package/dist/transform/transformDataset.js +15 -18
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +33 -8
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeDataset.tool.d.ts.map +0 -1
- package/dist/completeDataset.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/dataset.d.ts.map +0 -1
- package/dist/dataset.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/eventsReactRuntime.d.ts +0 -22
- package/dist/eventsReactRuntime.d.ts.map +0 -1
- package/dist/eventsReactRuntime.js +0 -29
- package/dist/eventsReactRuntime.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/file-dataset.agent.d.ts.map +0 -1
- package/dist/file/file-dataset.agent.js.map +0 -1
- package/dist/file/filepreview.d.ts.map +0 -1
- package/dist/file/filepreview.js.map +0 -1
- package/dist/file/generateSchema.tool.d.ts.map +0 -1
- package/dist/file/generateSchema.tool.js.map +0 -1
- package/dist/file/index.d.ts.map +0 -1
- package/dist/file/index.js.map +0 -1
- package/dist/file/prompts.d.ts.map +0 -1
- package/dist/file/prompts.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/materializeDataset.tool.d.ts.map +0 -1
- package/dist/materializeDataset.tool.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/queryDomain.d.ts.map +0 -1
- package/dist/query/queryDomain.js.map +0 -1
- package/dist/query/queryDomain.step.d.ts.map +0 -1
- package/dist/query/queryDomain.step.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/sandbox.steps.d.ts.map +0 -1
- package/dist/sandbox.steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/skill.d.ts.map +0 -1
- package/dist/skill.js.map +0 -1
- package/dist/transform/filepreview.d.ts.map +0 -1
- package/dist/transform/filepreview.js.map +0 -1
- package/dist/transform/index.d.ts.map +0 -1
- package/dist/transform/index.js.map +0 -1
- package/dist/transform/prompts.d.ts.map +0 -1
- package/dist/transform/prompts.js.map +0 -1
- package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
- package/dist/transform/transform-dataset.agent.js.map +0 -1
- package/dist/transform/transformDataset.d.ts.map +0 -1
- package/dist/transform/transformDataset.js.map +0 -1
package/dist/file/filepreview.js
CHANGED
|
@@ -1,30 +1,38 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
exports.ensurePreviewScriptsAvailable = ensurePreviewScriptsAvailable;
|
|
4
|
-
exports.generateFilePreview = generateFilePreview;
|
|
5
|
-
const fs_1 = require("fs");
|
|
6
|
-
const path_1 = require("path");
|
|
7
|
-
const steps_1 = require("../sandbox/steps");
|
|
1
|
+
import { runDatasetSandboxCommandStep } from "../sandbox/steps.js";
|
|
2
|
+
import { PYTHON_SCRIPT_BASE64_BY_NAME } from "./scripts.generated.js";
|
|
8
3
|
const DEFAULT_HEAD_LINES = 50;
|
|
9
4
|
const DEFAULT_TAIL_LINES = 20;
|
|
10
5
|
const DEFAULT_MID_LINES = 20;
|
|
11
|
-
|
|
12
|
-
const
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
"
|
|
20
|
-
|
|
21
|
-
function
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
6
|
+
export function getEmbeddedFilePreviewScriptBase64(scriptName) {
|
|
7
|
+
const embedded = PYTHON_SCRIPT_BASE64_BY_NAME[scriptName];
|
|
8
|
+
if (!embedded) {
|
|
9
|
+
throw new Error(`dataset_preview_script_not_embedded:${scriptName}`);
|
|
10
|
+
}
|
|
11
|
+
return embedded;
|
|
12
|
+
}
|
|
13
|
+
function readFilePreviewScriptText(scriptName) {
|
|
14
|
+
return Buffer.from(getEmbeddedFilePreviewScriptBase64(scriptName), "base64").toString("utf-8");
|
|
15
|
+
}
|
|
16
|
+
function sanitizePreviewText(value) {
|
|
17
|
+
return String(value ?? "")
|
|
18
|
+
.replace(/\u0000/g, "")
|
|
19
|
+
.replace(/[\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "");
|
|
20
|
+
}
|
|
21
|
+
function getPreviewKind(extension) {
|
|
22
|
+
const normalized = extension.toLowerCase();
|
|
23
|
+
if (normalized === ".xlsx" || normalized === ".xls")
|
|
24
|
+
return "excel";
|
|
25
|
+
if (normalized === ".csv" ||
|
|
26
|
+
normalized === ".tsv" ||
|
|
27
|
+
normalized === ".txt" ||
|
|
28
|
+
normalized === ".log" ||
|
|
29
|
+
normalized === ".json" ||
|
|
30
|
+
normalized === ".jsonl" ||
|
|
31
|
+
normalized === ".md") {
|
|
32
|
+
return "text";
|
|
33
|
+
}
|
|
34
|
+
return null;
|
|
25
35
|
}
|
|
26
|
-
const preparedSandboxIds = new Set();
|
|
27
|
-
const sandboxSetupPromises = new Map();
|
|
28
36
|
function validateScriptResult(result, context) {
|
|
29
37
|
if (!result.stderr) {
|
|
30
38
|
return;
|
|
@@ -37,75 +45,24 @@ function validateScriptResult(result, context) {
|
|
|
37
45
|
throw new Error(`${context} failed: ${stderr.substring(0, 500)}`);
|
|
38
46
|
}
|
|
39
47
|
}
|
|
40
|
-
async function ensurePreviewScriptsAvailable(
|
|
41
|
-
|
|
42
|
-
return;
|
|
43
|
-
}
|
|
44
|
-
const inFlight = sandboxSetupPromises.get(sandboxId);
|
|
45
|
-
if (inFlight) {
|
|
46
|
-
await inFlight;
|
|
47
|
-
return;
|
|
48
|
-
}
|
|
49
|
-
const setupPromise = (async () => {
|
|
50
|
-
try {
|
|
51
|
-
await (0, steps_1.runDatasetSandboxCommandStep)({
|
|
52
|
-
env,
|
|
53
|
-
sandboxId,
|
|
54
|
-
cmd: "mkdir",
|
|
55
|
-
args: ["-p", SANDBOX_SCRIPT_DIRECTORY],
|
|
56
|
-
});
|
|
57
|
-
}
|
|
58
|
-
catch (error) {
|
|
59
|
-
console.warn("[Dataset Scripts] Failed to create sandbox scripts directory", error);
|
|
60
|
-
}
|
|
61
|
-
const filesToWrite = [];
|
|
62
|
-
for (const scriptName of PYTHON_SCRIPT_FILES) {
|
|
63
|
-
try {
|
|
64
|
-
const scriptPath = resolveScriptPath(scriptName);
|
|
65
|
-
const fileBuffer = (0, fs_1.readFileSync)(scriptPath);
|
|
66
|
-
filesToWrite.push({
|
|
67
|
-
path: `${SANDBOX_SCRIPT_DIRECTORY}/${scriptName}`,
|
|
68
|
-
contentBase64: Buffer.from(fileBuffer).toString("base64"),
|
|
69
|
-
});
|
|
70
|
-
}
|
|
71
|
-
catch (error) {
|
|
72
|
-
console.error(`[Dataset Scripts] Failed to read script ${scriptName}`, error);
|
|
73
|
-
throw error;
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
if (filesToWrite.length > 0) {
|
|
77
|
-
await (0, steps_1.writeDatasetSandboxFilesStep)({
|
|
78
|
-
env,
|
|
79
|
-
sandboxId,
|
|
80
|
-
files: filesToWrite,
|
|
81
|
-
});
|
|
82
|
-
}
|
|
83
|
-
})();
|
|
84
|
-
sandboxSetupPromises.set(sandboxId, setupPromise);
|
|
85
|
-
try {
|
|
86
|
-
await setupPromise;
|
|
87
|
-
preparedSandboxIds.add(sandboxId);
|
|
88
|
-
}
|
|
89
|
-
catch (error) {
|
|
90
|
-
sandboxSetupPromises.delete(sandboxId);
|
|
91
|
-
throw error;
|
|
92
|
-
}
|
|
48
|
+
export async function ensurePreviewScriptsAvailable(_runtime, _sandboxId) {
|
|
49
|
+
return;
|
|
93
50
|
}
|
|
94
|
-
async function generateFilePreview(
|
|
51
|
+
export async function generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId, options = {}) {
|
|
95
52
|
const context = {
|
|
96
53
|
totalRows: 0,
|
|
97
54
|
};
|
|
98
55
|
try {
|
|
99
|
-
await
|
|
100
|
-
|
|
56
|
+
const metadataResult = await runScript(runtime, sandboxId, "file_metadata.py", [sandboxFilePath], "Extracts file metadata: name, extension, size, row count estimate, column count, and header preview");
|
|
57
|
+
validateScriptResult(metadataResult, `preview_metadata for ${datasetId}`);
|
|
101
58
|
context.metadata = metadataResult;
|
|
102
|
-
let
|
|
59
|
+
let previewKind = null;
|
|
103
60
|
if (metadataResult.stdout) {
|
|
104
61
|
try {
|
|
105
62
|
const metadataJson = JSON.parse(metadataResult.stdout);
|
|
106
63
|
context.totalRows = metadataJson.row_count_estimate || 0;
|
|
107
64
|
const extension = metadataJson.extension || "";
|
|
108
|
-
|
|
65
|
+
previewKind = getPreviewKind(extension);
|
|
109
66
|
}
|
|
110
67
|
catch {
|
|
111
68
|
console.warn(`[Dataset ${datasetId}] Failed to parse metadata JSON`);
|
|
@@ -118,28 +75,32 @@ async function generateFilePreview(env, sandboxId, sandboxFilePath, datasetId, o
|
|
|
118
75
|
console.log(`[Dataset ${datasetId}] No rows detected, skipping preview`);
|
|
119
76
|
return context;
|
|
120
77
|
}
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
78
|
+
if (!previewKind) {
|
|
79
|
+
console.log(`[Dataset ${datasetId}] Binary or unsupported preview format, keeping metadata only`);
|
|
80
|
+
return context;
|
|
81
|
+
}
|
|
82
|
+
const headScript = previewKind === "excel" ? "preview_head_excel.py" : "preview_head_csv.py";
|
|
83
|
+
const tailScript = previewKind === "excel" ? "preview_tail_excel.py" : "preview_tail_csv.py";
|
|
84
|
+
const midScript = previewKind === "excel" ? "preview_mid_excel.py" : "preview_mid_csv.py";
|
|
124
85
|
if (totalRows <= headLines) {
|
|
125
86
|
console.log(`[Dataset ${datasetId}] File has ${totalRows} rows, reading all with head only`);
|
|
126
|
-
const headResult = await runScript(
|
|
87
|
+
const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
|
|
127
88
|
validateScriptResult(headResult, `preview_head for ${datasetId}`);
|
|
128
89
|
context.head = headResult;
|
|
129
90
|
return context;
|
|
130
91
|
}
|
|
131
92
|
if (headLines + tailLines >= totalRows) {
|
|
132
93
|
console.log(`[Dataset ${datasetId}] Head + tail would cover entire file (${totalRows} rows), reading all with head only`);
|
|
133
|
-
const headResult = await runScript(
|
|
94
|
+
const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(totalRows)], `Reads the first ${totalRows} rows (entire file)`);
|
|
134
95
|
validateScriptResult(headResult, `preview_head for ${datasetId}`);
|
|
135
96
|
context.head = headResult;
|
|
136
97
|
return context;
|
|
137
98
|
}
|
|
138
99
|
console.log(`[Dataset ${datasetId}] Reading head (${headLines} rows) and tail (${tailLines} rows) from ${totalRows} total rows`);
|
|
139
|
-
const headResult = await runScript(
|
|
100
|
+
const headResult = await runScript(runtime, sandboxId, headScript, [sandboxFilePath, String(headLines)], `Reads the first ${headLines} rows of the file`);
|
|
140
101
|
validateScriptResult(headResult, `preview_head for ${datasetId}`);
|
|
141
102
|
context.head = headResult;
|
|
142
|
-
const tailResult = await runScript(
|
|
103
|
+
const tailResult = await runScript(runtime, sandboxId, tailScript, [sandboxFilePath, String(tailLines)], `Reads the last ${tailLines} rows of the file`);
|
|
143
104
|
validateScriptResult(tailResult, `preview_tail for ${datasetId}`);
|
|
144
105
|
context.tail = tailResult;
|
|
145
106
|
const midLines = options.midLines || DEFAULT_MID_LINES;
|
|
@@ -148,40 +109,33 @@ async function generateFilePreview(env, sandboxId, sandboxFilePath, datasetId, o
|
|
|
148
109
|
const midStart = headLines;
|
|
149
110
|
const midEnd = totalRows - tailLines;
|
|
150
111
|
console.log(`[Dataset ${datasetId}] Large gap (${gapSize} rows), adding mid sample (${midLines} rows)`);
|
|
151
|
-
const midResult = await runScript(
|
|
112
|
+
const midResult = await runScript(runtime, sandboxId, midScript, [sandboxFilePath, String(midStart), String(midEnd), String(midLines)], `Samples ${midLines} rows from the middle section (rows ${midStart + 1} to ${midEnd})`);
|
|
152
113
|
validateScriptResult(midResult, `preview_mid for ${datasetId}`);
|
|
153
114
|
context.mid = midResult;
|
|
154
115
|
}
|
|
155
116
|
}
|
|
156
117
|
catch (error) {
|
|
157
118
|
console.error(`[Dataset ${datasetId}] Error generating file preview:`, error);
|
|
119
|
+
throw error;
|
|
158
120
|
}
|
|
159
121
|
return context;
|
|
160
122
|
}
|
|
161
|
-
async function runScript(
|
|
162
|
-
const
|
|
163
|
-
const command = `python
|
|
164
|
-
let scriptContent = "";
|
|
165
|
-
try {
|
|
166
|
-
const localScriptPath = resolveScriptPath(scriptName);
|
|
167
|
-
scriptContent = (0, fs_1.readFileSync)(localScriptPath, 'utf-8');
|
|
168
|
-
}
|
|
169
|
-
catch (error) {
|
|
170
|
-
console.warn(`Failed to read script ${scriptName}:`, error);
|
|
171
|
-
}
|
|
123
|
+
async function runScript(runtime, sandboxId, scriptName, args, description) {
|
|
124
|
+
const scriptContent = readFilePreviewScriptText(scriptName);
|
|
125
|
+
const command = `python -c <${scriptName}> ${args.join(" ")}`;
|
|
172
126
|
try {
|
|
173
|
-
const result = await
|
|
174
|
-
|
|
127
|
+
const result = await runDatasetSandboxCommandStep({
|
|
128
|
+
runtime,
|
|
175
129
|
sandboxId,
|
|
176
130
|
cmd: "python",
|
|
177
|
-
args: [
|
|
131
|
+
args: ["-c", scriptContent, ...args],
|
|
178
132
|
});
|
|
179
133
|
return {
|
|
180
134
|
description,
|
|
181
135
|
script: scriptContent,
|
|
182
136
|
command,
|
|
183
|
-
stdout: result.stdout
|
|
184
|
-
stderr: result.stderr
|
|
137
|
+
stdout: sanitizePreviewText(result.stdout),
|
|
138
|
+
stderr: sanitizePreviewText(result.stderr),
|
|
185
139
|
};
|
|
186
140
|
}
|
|
187
141
|
catch (error) {
|
|
@@ -190,8 +144,7 @@ async function runScript(env, sandboxId, scriptName, args, description) {
|
|
|
190
144
|
script: scriptContent,
|
|
191
145
|
command,
|
|
192
146
|
stdout: "",
|
|
193
|
-
stderr: error instanceof Error ? error.message : String(error),
|
|
147
|
+
stderr: sanitizePreviewText(error instanceof Error ? error.message : String(error)),
|
|
194
148
|
};
|
|
195
149
|
}
|
|
196
150
|
}
|
|
197
|
-
//# sourceMappingURL=filepreview.js.map
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
export type FilePreviewContext = {
|
|
2
|
+
totalRows: number;
|
|
3
|
+
metadata?: {
|
|
4
|
+
description: string;
|
|
5
|
+
script: string;
|
|
6
|
+
command: string;
|
|
7
|
+
stdout: string;
|
|
8
|
+
stderr: string;
|
|
9
|
+
};
|
|
10
|
+
head?: {
|
|
11
|
+
description: string;
|
|
12
|
+
script: string;
|
|
13
|
+
command: string;
|
|
14
|
+
stdout: string;
|
|
15
|
+
stderr: string;
|
|
16
|
+
};
|
|
17
|
+
tail?: {
|
|
18
|
+
description: string;
|
|
19
|
+
script: string;
|
|
20
|
+
command: string;
|
|
21
|
+
stdout: string;
|
|
22
|
+
stderr: string;
|
|
23
|
+
};
|
|
24
|
+
mid?: {
|
|
25
|
+
description: string;
|
|
26
|
+
script: string;
|
|
27
|
+
command: string;
|
|
28
|
+
stdout: string;
|
|
29
|
+
stderr: string;
|
|
30
|
+
};
|
|
31
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -2,9 +2,9 @@ interface GenerateSchemaToolParams {
|
|
|
2
2
|
datasetId: string;
|
|
3
3
|
isNested?: boolean;
|
|
4
4
|
fileId?: string;
|
|
5
|
-
|
|
5
|
+
runtime: any;
|
|
6
6
|
}
|
|
7
|
-
export declare function createGenerateSchemaTool({ datasetId, isNested, fileId,
|
|
7
|
+
export declare function createGenerateSchemaTool({ datasetId, isNested, fileId, runtime }: GenerateSchemaToolParams): import("ai").Tool<{
|
|
8
8
|
schemaTitle: string;
|
|
9
9
|
schemaDescription: string;
|
|
10
10
|
schemaJson: string;
|
|
@@ -25,4 +25,3 @@ export declare function createGenerateSchemaTool({ datasetId, isNested, fileId,
|
|
|
25
25
|
error?: undefined;
|
|
26
26
|
}>;
|
|
27
27
|
export {};
|
|
28
|
-
//# sourceMappingURL=generateSchema.tool.d.ts.map
|
|
@@ -1,20 +1,17 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
const steps_1 = require("../dataset/steps");
|
|
7
|
-
function createGenerateSchemaTool({ datasetId, isNested, fileId, env }) {
|
|
8
|
-
return (0, ai_1.tool)({
|
|
1
|
+
import { tool } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { datasetUpdateSchemaStep } from "../dataset/steps.js";
|
|
4
|
+
export function createGenerateSchemaTool({ datasetId, isNested, fileId, runtime }) {
|
|
5
|
+
return tool({
|
|
9
6
|
description: `Generate a formal JSON schema for a SINGLE RECORD (row) from the file. This schema describes the structure of ONE record, not the entire dataset or array of records. Requirements:
|
|
10
7
|
1. Schema describes ONE RECORD structure only (no array wrappers)
|
|
11
8
|
2. All property names MUST use lowercaseCamelCase convention (e.g., 'productName', 'unitPrice')
|
|
12
9
|
3. Each property MUST have a description field
|
|
13
10
|
4. The schema description must explain what one record represents and field mappings from original file`,
|
|
14
|
-
inputSchema:
|
|
15
|
-
schemaTitle:
|
|
16
|
-
schemaDescription:
|
|
17
|
-
schemaJson:
|
|
11
|
+
inputSchema: z.object({
|
|
12
|
+
schemaTitle: z.string().describe("Title for the RECORD schema in PascalCase (e.g., 'ProductRecord', 'TransactionRecord')"),
|
|
13
|
+
schemaDescription: z.string().describe("Comprehensive description that includes: 1) what ONE record represents, 2) its purpose, 3) complete field mapping from original file fields to schema fields with explanations (e.g., 'ARTÍCULO' -> 'articleCode': normalized to camelCase)"),
|
|
14
|
+
schemaJson: z.string().describe("Complete JSON schema as string describing ONE RECORD. Must be type 'object' with properties. All properties must be in lowercaseCamelCase and have descriptions. Do NOT use type 'array' at root level."),
|
|
18
15
|
}),
|
|
19
16
|
execute: async ({ schemaTitle, schemaDescription, schemaJson, }) => {
|
|
20
17
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
@@ -74,8 +71,8 @@ function createGenerateSchemaTool({ datasetId, isNested, fileId, env }) {
|
|
|
74
71
|
console.log(`[Dataset ${datasetId}] Description: ${schemaDescription}`);
|
|
75
72
|
console.log(`[Dataset ${datasetId}] Schema JSON:`);
|
|
76
73
|
console.log(JSON.stringify(parsedSchema, null, 2));
|
|
77
|
-
const updateResult = await
|
|
78
|
-
|
|
74
|
+
const updateResult = await datasetUpdateSchemaStep({
|
|
75
|
+
runtime,
|
|
79
76
|
datasetId,
|
|
80
77
|
schema: schemaData,
|
|
81
78
|
status: "schema_complete",
|
|
@@ -107,4 +104,3 @@ function createGenerateSchemaTool({ datasetId, isNested, fileId, env }) {
|
|
|
107
104
|
},
|
|
108
105
|
});
|
|
109
106
|
}
|
|
110
|
-
//# sourceMappingURL=generateSchema.tool.js.map
|
package/dist/file/index.d.ts
CHANGED
|
@@ -1,2 +1 @@
|
|
|
1
|
-
export * from "./file-dataset.agent";
|
|
2
|
-
//# sourceMappingURL=index.d.ts.map
|
|
1
|
+
export * from "./file-dataset.agent.js";
|
package/dist/file/index.js
CHANGED
|
@@ -1,18 +1 @@
|
|
|
1
|
-
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
-
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
-
};
|
|
16
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
__exportStar(require("./file-dataset.agent"), exports);
|
|
18
|
-
//# sourceMappingURL=index.js.map
|
|
1
|
+
export * from "./file-dataset.agent.js";
|
package/dist/file/prompts.d.ts
CHANGED
|
@@ -1,3 +1,2 @@
|
|
|
1
|
-
import {
|
|
2
|
-
export declare function buildFileDatasetPrompt(context:
|
|
3
|
-
//# sourceMappingURL=prompts.d.ts.map
|
|
1
|
+
import type { FileParseContext } from "./file-dataset.types.js";
|
|
2
|
+
export declare function buildFileDatasetPrompt(context: FileParseContext): string;
|