@ekairos/dataset 1.22.54-beta.development.0 → 1.22.56-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/file/file-dataset.agent.d.ts +9 -56
- package/dist/file/file-dataset.agent.js +83 -97
- package/dist/file/file-dataset.steps.d.ts +21 -0
- package/dist/file/file-dataset.steps.js +62 -0
- package/dist/file/file-dataset.types.d.ts +50 -0
- package/dist/file/file-dataset.types.js +1 -0
- package/dist/file/filepreview.d.ts +2 -32
- package/dist/file/filepreview.types.d.ts +31 -0
- package/dist/file/filepreview.types.js +1 -0
- package/dist/file/prompts.d.ts +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/transform/index.d.ts +1 -1
- package/dist/transform/index.js +1 -1
- package/dist/transform/prompts.d.ts +1 -33
- package/dist/transform/transform-dataset.agent.d.ts +8 -44
- package/dist/transform/transform-dataset.agent.js +87 -55
- package/dist/transform/transform-dataset.steps.d.ts +30 -0
- package/dist/transform/transform-dataset.steps.js +62 -0
- package/dist/transform/transform-dataset.types.d.ts +86 -0
- package/dist/transform/transform-dataset.types.js +1 -0
- package/package.json +4 -4
|
@@ -1,59 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
export type FileParseContext
|
|
4
|
-
datasetId: string;
|
|
5
|
-
fileId: string;
|
|
6
|
-
instructions: string;
|
|
7
|
-
sandboxConfig: {
|
|
8
|
-
filePath: string;
|
|
9
|
-
};
|
|
10
|
-
analysis: any[];
|
|
11
|
-
schema: any | null;
|
|
12
|
-
plan: any | null;
|
|
13
|
-
executionResult: any | null;
|
|
14
|
-
errors: string[];
|
|
15
|
-
iterationCount: number;
|
|
16
|
-
filePreview?: FilePreviewContext;
|
|
17
|
-
};
|
|
18
|
-
export type FileParseContextParams = {
|
|
19
|
-
fileId: string;
|
|
20
|
-
instructions?: string;
|
|
21
|
-
sandboxId?: string;
|
|
22
|
-
datasetId?: string;
|
|
23
|
-
model?: string;
|
|
24
|
-
reactor?: ContextReactor<any, any>;
|
|
25
|
-
};
|
|
26
|
-
export type FileParseRunOptions = {
|
|
27
|
-
prompt?: string;
|
|
28
|
-
durable?: boolean;
|
|
29
|
-
};
|
|
30
|
-
export type FileParseContextBuilder<Env extends {
|
|
31
|
-
orgId: string;
|
|
32
|
-
}> = {
|
|
33
|
-
datasetId: string;
|
|
34
|
-
context: ReturnType<ReturnType<typeof createContext<Env>>["context"]> extends any ? any : any;
|
|
35
|
-
};
|
|
36
|
-
export type DatasetResult = {
|
|
37
|
-
id: string;
|
|
38
|
-
status?: string;
|
|
39
|
-
title?: string;
|
|
40
|
-
schema?: any;
|
|
41
|
-
analysis?: any;
|
|
42
|
-
calculatedTotalRows?: number;
|
|
43
|
-
actualGeneratedRowCount?: number;
|
|
44
|
-
createdAt?: number;
|
|
45
|
-
updatedAt?: number;
|
|
46
|
-
};
|
|
47
|
-
/**
|
|
48
|
-
* Factory (DX-first):
|
|
49
|
-
*
|
|
50
|
-
* Usage:
|
|
51
|
-
* const { datasetId } = await createFileParseContext(fileId, { instructions }).parse(runtime)
|
|
52
|
-
*
|
|
53
|
-
* - Uses the caller runtime; no secondary runtime is created.
|
|
54
|
-
* - All I/O happens in `"use step"` functions via the provided Ekairos runtime.
|
|
55
|
-
* - `parse()` is the entrypoint; it calls `context.react(...)` internally.
|
|
56
|
-
*/
|
|
1
|
+
import { type ContextReactor } from "@ekairos/events";
|
|
2
|
+
import type { FileParseRunOptions } from "./file-dataset.types.js";
|
|
3
|
+
export type { DatasetResult, FileParseContext, FileParseContextBuilder, FileParseContextParams, FileParseRunOptions, SandboxState, } from "./file-dataset.types.js";
|
|
57
4
|
export declare function createFileParseContext<Env extends {
|
|
58
5
|
orgId: string;
|
|
59
6
|
}>(fileId: string, opts?: {
|
|
@@ -71,3 +18,9 @@ export declare function createFileParseContext<Env extends {
|
|
|
71
18
|
}>;
|
|
72
19
|
context: any;
|
|
73
20
|
};
|
|
21
|
+
export declare function registerFileParseContext<Env extends {
|
|
22
|
+
orgId: string;
|
|
23
|
+
}>(opts?: {
|
|
24
|
+
model?: string;
|
|
25
|
+
reactor?: ContextReactor<any, any>;
|
|
26
|
+
}): void;
|
|
@@ -1,15 +1,11 @@
|
|
|
1
|
-
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL } from "@ekairos/events";
|
|
2
|
-
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
|
|
3
|
-
import { createGenerateSchemaTool } from "./generateSchema.tool.js";
|
|
4
|
-
import { createCompleteDatasetTool, didCompleteDatasetSucceed } from "../completeDataset.tool.js";
|
|
5
|
-
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
6
|
-
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
7
|
-
import { buildFileDatasetPrompt } from "./prompts.js";
|
|
8
|
-
import { generateFilePreview, ensurePreviewScriptsAvailable } from "./filepreview.js";
|
|
1
|
+
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
|
|
9
2
|
import { id } from "@instantdb/admin";
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
3
|
+
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
4
|
+
import { createCompleteDatasetTool, didCompleteDatasetSucceed, } from "../completeDataset.tool.js";
|
|
12
5
|
import { datasetGetByIdStep } from "../dataset/steps.js";
|
|
6
|
+
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
7
|
+
import { createGenerateSchemaTool } from "./generateSchema.tool.js";
|
|
8
|
+
import { buildFileDatasetPromptStep, generateFileParsePreviewStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
|
|
13
9
|
async function awaitContextRun(run) {
|
|
14
10
|
if (!run)
|
|
15
11
|
return;
|
|
@@ -19,89 +15,55 @@ async function awaitContextRun(run) {
|
|
|
19
15
|
}
|
|
20
16
|
await run;
|
|
21
17
|
}
|
|
22
|
-
async function initializeSandbox(runtime, sandboxId, datasetId, fileId, state) {
|
|
23
|
-
if (state.initialized) {
|
|
24
|
-
return state.filePath;
|
|
25
|
-
}
|
|
26
|
-
console.log(`[FileParseContext ${datasetId}] Initializing sandbox...`);
|
|
27
|
-
await ensurePreviewScriptsAvailable(runtime, sandboxId);
|
|
28
|
-
console.log(`[FileParseContext ${datasetId}] Installing Python dependencies...`);
|
|
29
|
-
const pipInstall = await runDatasetSandboxCommandStep({
|
|
30
|
-
runtime,
|
|
31
|
-
sandboxId,
|
|
32
|
-
cmd: "python",
|
|
33
|
-
args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
|
|
34
|
-
});
|
|
35
|
-
const installStderr = pipInstall.stderr;
|
|
36
|
-
if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
|
|
37
|
-
throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
|
|
38
|
-
}
|
|
39
|
-
console.log(`[FileParseContext ${datasetId}] Fetching file from InstantDB...`);
|
|
40
|
-
const file = await readInstantFileStep({ runtime, fileId });
|
|
41
|
-
console.log(`[FileParseContext ${datasetId}] Creating dataset workstation...`);
|
|
42
|
-
const workstation = getDatasetWorkstation(datasetId);
|
|
43
|
-
await runDatasetSandboxCommandStep({
|
|
44
|
-
runtime,
|
|
45
|
-
sandboxId,
|
|
46
|
-
cmd: "mkdir",
|
|
47
|
-
args: ["-p", workstation],
|
|
48
|
-
});
|
|
49
|
-
const fileName = file.contentDisposition ?? "";
|
|
50
|
-
const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
|
|
51
|
-
const sandboxFilePath = `${workstation}/${fileId}${fileExtension}`;
|
|
52
|
-
await writeDatasetSandboxFilesStep({
|
|
53
|
-
runtime,
|
|
54
|
-
sandboxId,
|
|
55
|
-
files: [
|
|
56
|
-
{
|
|
57
|
-
path: sandboxFilePath,
|
|
58
|
-
contentBase64: file.contentBase64,
|
|
59
|
-
},
|
|
60
|
-
],
|
|
61
|
-
});
|
|
62
|
-
console.log(`[FileParseContext ${datasetId}] ✅ Workstation created: ${workstation}`);
|
|
63
|
-
console.log(`[FileParseContext ${datasetId}] ✅ File saved: ${sandboxFilePath}`);
|
|
64
|
-
state.filePath = sandboxFilePath;
|
|
65
|
-
state.initialized = true;
|
|
66
|
-
return sandboxFilePath;
|
|
67
|
-
}
|
|
68
|
-
/**
|
|
69
|
-
* FileParseContext
|
|
70
|
-
*
|
|
71
|
-
* Uso:
|
|
72
|
-
* - Crear una instancia con `fileId`, `instructions` y un `sandbox`
|
|
73
|
-
* - Llamar `getDataset()` para crear un dataset nuevo (crea un datasetId interno)
|
|
74
|
-
* - Llamar `followUp(datasetId, feedback)` para iterar el mismo dataset con feedback
|
|
75
|
-
*
|
|
76
|
-
* Internamente corre un Context (`createContext("file.parse")`) que itera hasta que se ejecuta el tool `completeDataset`.
|
|
77
|
-
*/
|
|
78
18
|
function createFileParseContextDefinition(params) {
|
|
79
|
-
const
|
|
19
|
+
const fallbackDatasetId = params.datasetId;
|
|
80
20
|
const model = params.model ?? "openai/gpt-5";
|
|
81
21
|
let contextBuilder = createContext("file.parse")
|
|
82
22
|
.context(async (stored, _env, runtime) => {
|
|
83
23
|
const previous = stored?.content ?? {};
|
|
84
24
|
const sandboxState = previous?.sandboxState ?? { initialized: false, filePath: "" };
|
|
25
|
+
const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
|
|
26
|
+
const fileId = previous?.fileId ?? params.fileId ?? "";
|
|
27
|
+
const instructions = previous?.instructions ?? params.instructions ?? "";
|
|
85
28
|
const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
|
|
29
|
+
if (!datasetId) {
|
|
30
|
+
throw new Error("dataset_id_required");
|
|
31
|
+
}
|
|
32
|
+
if (!fileId) {
|
|
33
|
+
throw new Error("dataset_file_id_required");
|
|
34
|
+
}
|
|
86
35
|
if (!sandboxId) {
|
|
87
36
|
throw new Error("dataset_sandbox_required");
|
|
88
37
|
}
|
|
89
|
-
const
|
|
38
|
+
const initialized = await initializeFileParseSandboxStep({
|
|
39
|
+
runtime,
|
|
40
|
+
sandboxId,
|
|
41
|
+
datasetId,
|
|
42
|
+
fileId,
|
|
43
|
+
state: sandboxState,
|
|
44
|
+
});
|
|
45
|
+
const sandboxFilePath = initialized.filePath;
|
|
90
46
|
let filePreview = undefined;
|
|
91
47
|
try {
|
|
92
|
-
filePreview = await
|
|
48
|
+
filePreview = await generateFileParsePreviewStep({
|
|
49
|
+
runtime,
|
|
50
|
+
sandboxId,
|
|
51
|
+
sandboxFilePath,
|
|
52
|
+
datasetId,
|
|
53
|
+
});
|
|
93
54
|
}
|
|
94
55
|
catch {
|
|
95
|
-
// optional
|
|
56
|
+
// Preview is optional; parsing can still proceed from the file path.
|
|
96
57
|
}
|
|
97
58
|
let schema = null;
|
|
98
59
|
const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
|
|
99
|
-
if (datasetResult.ok && datasetResult.data.schema)
|
|
60
|
+
if (datasetResult.ok && datasetResult.data.schema) {
|
|
100
61
|
schema = datasetResult.data.schema;
|
|
62
|
+
}
|
|
101
63
|
const ctx = {
|
|
102
64
|
datasetId,
|
|
103
|
-
fileId
|
|
104
|
-
instructions
|
|
65
|
+
fileId,
|
|
66
|
+
instructions,
|
|
105
67
|
sandboxConfig: { filePath: sandboxFilePath },
|
|
106
68
|
analysis: [],
|
|
107
69
|
schema,
|
|
@@ -114,16 +76,16 @@ function createFileParseContextDefinition(params) {
|
|
|
114
76
|
return {
|
|
115
77
|
...previous,
|
|
116
78
|
datasetId,
|
|
117
|
-
fileId
|
|
118
|
-
instructions
|
|
79
|
+
fileId,
|
|
80
|
+
instructions,
|
|
119
81
|
sandboxId,
|
|
120
|
-
sandboxState,
|
|
82
|
+
sandboxState: initialized.state,
|
|
121
83
|
ctx,
|
|
122
84
|
};
|
|
123
85
|
})
|
|
124
86
|
.narrative(async (stored) => {
|
|
125
87
|
const ctx = stored?.content?.ctx;
|
|
126
|
-
const base =
|
|
88
|
+
const base = await buildFileDatasetPromptStep({ context: ctx });
|
|
127
89
|
const userInstructions = String(ctx?.instructions ?? "").trim();
|
|
128
90
|
if (!userInstructions)
|
|
129
91
|
return base;
|
|
@@ -138,27 +100,36 @@ function createFileParseContextDefinition(params) {
|
|
|
138
100
|
})
|
|
139
101
|
.actions(async (_stored, _env, runtime) => {
|
|
140
102
|
const existingSchema = _stored?.content?.ctx?.schema?.schema;
|
|
103
|
+
const datasetId = _stored?.content?.datasetId ?? fallbackDatasetId ?? "";
|
|
104
|
+
const fileId = _stored?.content?.fileId ?? params.fileId ?? "";
|
|
105
|
+
const sandboxId = _stored?.content?.sandboxId ?? params.sandboxId ?? "";
|
|
106
|
+
if (!datasetId)
|
|
107
|
+
throw new Error("dataset_id_required");
|
|
108
|
+
if (!fileId)
|
|
109
|
+
throw new Error("dataset_file_id_required");
|
|
110
|
+
if (!sandboxId)
|
|
111
|
+
throw new Error("dataset_sandbox_required");
|
|
141
112
|
const actions = {
|
|
142
113
|
executeCommand: createExecuteCommandTool({
|
|
143
114
|
datasetId,
|
|
144
|
-
sandboxId
|
|
115
|
+
sandboxId,
|
|
145
116
|
runtime,
|
|
146
117
|
}),
|
|
147
118
|
completeDataset: createCompleteDatasetTool({
|
|
148
119
|
datasetId,
|
|
149
|
-
sandboxId
|
|
120
|
+
sandboxId,
|
|
150
121
|
runtime,
|
|
151
122
|
}),
|
|
152
123
|
clearDataset: createClearDatasetTool({
|
|
153
124
|
datasetId,
|
|
154
|
-
sandboxId
|
|
125
|
+
sandboxId,
|
|
155
126
|
runtime,
|
|
156
127
|
}),
|
|
157
128
|
};
|
|
158
129
|
if (!existingSchema) {
|
|
159
130
|
actions.generateSchema = createGenerateSchemaTool({
|
|
160
131
|
datasetId,
|
|
161
|
-
fileId
|
|
132
|
+
fileId,
|
|
162
133
|
runtime,
|
|
163
134
|
});
|
|
164
135
|
}
|
|
@@ -174,28 +145,19 @@ function createFileParseContextDefinition(params) {
|
|
|
174
145
|
contextBuilder = contextBuilder.model(model);
|
|
175
146
|
}
|
|
176
147
|
const context = contextBuilder.build();
|
|
177
|
-
return { datasetId, context };
|
|
148
|
+
return { datasetId: fallbackDatasetId ?? "", context };
|
|
178
149
|
}
|
|
179
|
-
/**
|
|
180
|
-
* Factory (DX-first):
|
|
181
|
-
*
|
|
182
|
-
* Usage:
|
|
183
|
-
* const { datasetId } = await createFileParseContext(fileId, { instructions }).parse(runtime)
|
|
184
|
-
*
|
|
185
|
-
* - Uses the caller runtime; no secondary runtime is created.
|
|
186
|
-
* - All I/O happens in `"use step"` functions via the provided Ekairos runtime.
|
|
187
|
-
* - `parse()` is the entrypoint; it calls `context.react(...)` internally.
|
|
188
|
-
*/
|
|
189
150
|
export function createFileParseContext(fileId, opts) {
|
|
151
|
+
const datasetId = opts?.datasetId ?? id();
|
|
190
152
|
const params = {
|
|
191
153
|
fileId,
|
|
192
154
|
instructions: opts?.instructions,
|
|
193
155
|
sandboxId: opts?.sandboxId,
|
|
194
|
-
datasetId
|
|
156
|
+
datasetId,
|
|
195
157
|
model: opts?.model,
|
|
196
158
|
reactor: opts?.reactor,
|
|
197
159
|
};
|
|
198
|
-
const {
|
|
160
|
+
const { context } = createFileParseContextDefinition(params);
|
|
199
161
|
return {
|
|
200
162
|
datasetId,
|
|
201
163
|
async parse(runtime, options = {}) {
|
|
@@ -205,19 +167,43 @@ export function createFileParseContext(fileId, opts) {
|
|
|
205
167
|
channel: WEB_CHANNEL,
|
|
206
168
|
createdAt: new Date().toISOString(),
|
|
207
169
|
content: {
|
|
208
|
-
parts: [
|
|
170
|
+
parts: [
|
|
171
|
+
{
|
|
172
|
+
type: "text",
|
|
173
|
+
text: options.prompt ?? "generate a dataset for this file",
|
|
174
|
+
},
|
|
175
|
+
],
|
|
209
176
|
},
|
|
210
177
|
};
|
|
211
178
|
const shell = await context.react(triggerEvent, {
|
|
212
179
|
runtime: runtime,
|
|
213
180
|
context: { key: `dataset:${datasetId}` },
|
|
214
181
|
durable: options.durable ?? false,
|
|
215
|
-
options: {
|
|
182
|
+
options: {
|
|
183
|
+
silent: true,
|
|
184
|
+
preventClose: true,
|
|
185
|
+
sendFinish: false,
|
|
186
|
+
maxIterations: 20,
|
|
187
|
+
maxModelSteps: 5,
|
|
188
|
+
},
|
|
189
|
+
__initialContent: {
|
|
190
|
+
datasetId,
|
|
191
|
+
fileId,
|
|
192
|
+
instructions: opts?.instructions ?? "",
|
|
193
|
+
sandboxId: opts?.sandboxId ?? "",
|
|
194
|
+
sandboxState: { initialized: false, filePath: "" },
|
|
195
|
+
},
|
|
216
196
|
});
|
|
217
197
|
await awaitContextRun(shell.run);
|
|
218
198
|
return { datasetId };
|
|
219
199
|
},
|
|
220
|
-
// Optional: expose the built context for advanced callers (not required for parse DX)
|
|
221
200
|
context,
|
|
222
201
|
};
|
|
223
202
|
}
|
|
203
|
+
export function registerFileParseContext(opts) {
|
|
204
|
+
createFileParseContextDefinition({
|
|
205
|
+
model: opts?.model,
|
|
206
|
+
reactor: opts?.reactor,
|
|
207
|
+
}).context;
|
|
208
|
+
}
|
|
209
|
+
registerFileParseContext();
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { FileParseContext, SandboxState } from "./file-dataset.types.js";
|
|
2
|
+
import type { FilePreviewContext } from "./filepreview.types.js";
|
|
3
|
+
export declare function initializeFileParseSandboxStep(params: {
|
|
4
|
+
runtime: any;
|
|
5
|
+
sandboxId: string;
|
|
6
|
+
datasetId: string;
|
|
7
|
+
fileId: string;
|
|
8
|
+
state: SandboxState;
|
|
9
|
+
}): Promise<{
|
|
10
|
+
filePath: string;
|
|
11
|
+
state: SandboxState;
|
|
12
|
+
}>;
|
|
13
|
+
export declare function generateFileParsePreviewStep(params: {
|
|
14
|
+
runtime: any;
|
|
15
|
+
sandboxId: string;
|
|
16
|
+
sandboxFilePath: string;
|
|
17
|
+
datasetId: string;
|
|
18
|
+
}): Promise<FilePreviewContext>;
|
|
19
|
+
export declare function buildFileDatasetPromptStep(params: {
|
|
20
|
+
context: FileParseContext;
|
|
21
|
+
}): Promise<string>;
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { getDatasetWorkstation } from "../datasetFiles.js";
|
|
2
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
|
|
3
|
+
import { buildFileDatasetPrompt } from "./prompts.js";
|
|
4
|
+
import { generateFilePreview, ensurePreviewScriptsAvailable } from "./filepreview.js";
|
|
5
|
+
import { readInstantFileStep } from "./steps.js";
|
|
6
|
+
export async function initializeFileParseSandboxStep(params) {
|
|
7
|
+
"use step";
|
|
8
|
+
if (params.state.initialized) {
|
|
9
|
+
return { filePath: params.state.filePath, state: params.state };
|
|
10
|
+
}
|
|
11
|
+
console.log(`[FileParseContext ${params.datasetId}] Initializing sandbox...`);
|
|
12
|
+
await ensurePreviewScriptsAvailable(params.runtime, params.sandboxId);
|
|
13
|
+
console.log(`[FileParseContext ${params.datasetId}] Installing Python dependencies...`);
|
|
14
|
+
const pipInstall = await runDatasetSandboxCommandStep({
|
|
15
|
+
runtime: params.runtime,
|
|
16
|
+
sandboxId: params.sandboxId,
|
|
17
|
+
cmd: "python",
|
|
18
|
+
args: ["-m", "pip", "install", "pandas", "openpyxl", "--quiet", "--upgrade"],
|
|
19
|
+
});
|
|
20
|
+
const installStderr = pipInstall.stderr;
|
|
21
|
+
if (installStderr && (installStderr.includes("ERROR") || installStderr.includes("FAILED"))) {
|
|
22
|
+
throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
|
|
23
|
+
}
|
|
24
|
+
console.log(`[FileParseContext ${params.datasetId}] Fetching file from InstantDB...`);
|
|
25
|
+
const file = await readInstantFileStep({ runtime: params.runtime, fileId: params.fileId });
|
|
26
|
+
console.log(`[FileParseContext ${params.datasetId}] Creating dataset workstation...`);
|
|
27
|
+
const workstation = getDatasetWorkstation(params.datasetId);
|
|
28
|
+
await runDatasetSandboxCommandStep({
|
|
29
|
+
runtime: params.runtime,
|
|
30
|
+
sandboxId: params.sandboxId,
|
|
31
|
+
cmd: "mkdir",
|
|
32
|
+
args: ["-p", workstation],
|
|
33
|
+
});
|
|
34
|
+
const fileName = file.contentDisposition ?? "";
|
|
35
|
+
const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
|
|
36
|
+
const sandboxFilePath = `${workstation}/${params.fileId}${fileExtension}`;
|
|
37
|
+
await writeDatasetSandboxFilesStep({
|
|
38
|
+
runtime: params.runtime,
|
|
39
|
+
sandboxId: params.sandboxId,
|
|
40
|
+
files: [
|
|
41
|
+
{
|
|
42
|
+
path: sandboxFilePath,
|
|
43
|
+
contentBase64: file.contentBase64,
|
|
44
|
+
},
|
|
45
|
+
],
|
|
46
|
+
});
|
|
47
|
+
console.log(`[FileParseContext ${params.datasetId}] Workstation created: ${workstation}`);
|
|
48
|
+
console.log(`[FileParseContext ${params.datasetId}] File saved: ${sandboxFilePath}`);
|
|
49
|
+
const state = {
|
|
50
|
+
initialized: true,
|
|
51
|
+
filePath: sandboxFilePath,
|
|
52
|
+
};
|
|
53
|
+
return { filePath: sandboxFilePath, state };
|
|
54
|
+
}
|
|
55
|
+
export async function generateFileParsePreviewStep(params) {
|
|
56
|
+
"use step";
|
|
57
|
+
return await generateFilePreview(params.runtime, params.sandboxId, params.sandboxFilePath, params.datasetId);
|
|
58
|
+
}
|
|
59
|
+
export async function buildFileDatasetPromptStep(params) {
|
|
60
|
+
"use step";
|
|
61
|
+
return buildFileDatasetPrompt(params.context);
|
|
62
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import type { ContextReactor } from "@ekairos/events";
|
|
2
|
+
import type { FilePreviewContext } from "./filepreview.types.js";
|
|
3
|
+
export type SandboxState = {
|
|
4
|
+
initialized: boolean;
|
|
5
|
+
filePath: string;
|
|
6
|
+
};
|
|
7
|
+
export type FileParseContext = {
|
|
8
|
+
datasetId: string;
|
|
9
|
+
fileId: string;
|
|
10
|
+
instructions: string;
|
|
11
|
+
sandboxConfig: {
|
|
12
|
+
filePath: string;
|
|
13
|
+
};
|
|
14
|
+
analysis: any[];
|
|
15
|
+
schema: any | null;
|
|
16
|
+
plan: any | null;
|
|
17
|
+
executionResult: any | null;
|
|
18
|
+
errors: string[];
|
|
19
|
+
iterationCount: number;
|
|
20
|
+
filePreview?: FilePreviewContext;
|
|
21
|
+
};
|
|
22
|
+
export type FileParseContextParams = {
|
|
23
|
+
fileId?: string;
|
|
24
|
+
instructions?: string;
|
|
25
|
+
sandboxId?: string;
|
|
26
|
+
datasetId?: string;
|
|
27
|
+
model?: string;
|
|
28
|
+
reactor?: ContextReactor<any, any>;
|
|
29
|
+
};
|
|
30
|
+
export type FileParseRunOptions = {
|
|
31
|
+
prompt?: string;
|
|
32
|
+
durable?: boolean;
|
|
33
|
+
};
|
|
34
|
+
export type FileParseContextBuilder<Env extends {
|
|
35
|
+
orgId: string;
|
|
36
|
+
}> = {
|
|
37
|
+
datasetId: string;
|
|
38
|
+
context: any;
|
|
39
|
+
};
|
|
40
|
+
export type DatasetResult = {
|
|
41
|
+
id: string;
|
|
42
|
+
status?: string;
|
|
43
|
+
title?: string;
|
|
44
|
+
schema?: any;
|
|
45
|
+
analysis?: any;
|
|
46
|
+
calculatedTotalRows?: number;
|
|
47
|
+
actualGeneratedRowCount?: number;
|
|
48
|
+
createdAt?: number;
|
|
49
|
+
updatedAt?: number;
|
|
50
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -1,34 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
metadata?: {
|
|
4
|
-
description: string;
|
|
5
|
-
script: string;
|
|
6
|
-
command: string;
|
|
7
|
-
stdout: string;
|
|
8
|
-
stderr: string;
|
|
9
|
-
};
|
|
10
|
-
head?: {
|
|
11
|
-
description: string;
|
|
12
|
-
script: string;
|
|
13
|
-
command: string;
|
|
14
|
-
stdout: string;
|
|
15
|
-
stderr: string;
|
|
16
|
-
};
|
|
17
|
-
tail?: {
|
|
18
|
-
description: string;
|
|
19
|
-
script: string;
|
|
20
|
-
command: string;
|
|
21
|
-
stdout: string;
|
|
22
|
-
stderr: string;
|
|
23
|
-
};
|
|
24
|
-
mid?: {
|
|
25
|
-
description: string;
|
|
26
|
-
script: string;
|
|
27
|
-
command: string;
|
|
28
|
-
stdout: string;
|
|
29
|
-
stderr: string;
|
|
30
|
-
};
|
|
31
|
-
};
|
|
1
|
+
import type { FilePreviewContext } from "./filepreview.types.js";
|
|
2
|
+
export type { FilePreviewContext } from "./filepreview.types.js";
|
|
32
3
|
interface PreviewOptions {
|
|
33
4
|
headLines?: number;
|
|
34
5
|
tailLines?: number;
|
|
@@ -36,4 +7,3 @@ interface PreviewOptions {
|
|
|
36
7
|
}
|
|
37
8
|
export declare function ensurePreviewScriptsAvailable(runtime: any, sandboxId: string): Promise<void>;
|
|
38
9
|
export declare function generateFilePreview(runtime: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
|
|
39
|
-
export {};
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
export type FilePreviewContext = {
|
|
2
|
+
totalRows: number;
|
|
3
|
+
metadata?: {
|
|
4
|
+
description: string;
|
|
5
|
+
script: string;
|
|
6
|
+
command: string;
|
|
7
|
+
stdout: string;
|
|
8
|
+
stderr: string;
|
|
9
|
+
};
|
|
10
|
+
head?: {
|
|
11
|
+
description: string;
|
|
12
|
+
script: string;
|
|
13
|
+
command: string;
|
|
14
|
+
stdout: string;
|
|
15
|
+
stderr: string;
|
|
16
|
+
};
|
|
17
|
+
tail?: {
|
|
18
|
+
description: string;
|
|
19
|
+
script: string;
|
|
20
|
+
command: string;
|
|
21
|
+
stdout: string;
|
|
22
|
+
stderr: string;
|
|
23
|
+
};
|
|
24
|
+
mid?: {
|
|
25
|
+
description: string;
|
|
26
|
+
script: string;
|
|
27
|
+
command: string;
|
|
28
|
+
stdout: string;
|
|
29
|
+
stderr: string;
|
|
30
|
+
};
|
|
31
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/file/prompts.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { FileParseContext } from "./file-dataset.
|
|
1
|
+
import type { FileParseContext } from "./file-dataset.types.js";
|
|
2
2
|
export declare function buildFileDatasetPrompt(context: FileParseContext): string;
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { createTransformDatasetContext, type TransformDatasetAgentParams, type TransformDatasetContext, } from "./transform-dataset.agent.js";
|
|
1
|
+
export { createTransformDatasetContext, registerTransformDatasetContext, type TransformDatasetAgentParams, type TransformDatasetContext, type TransformDatasetRunOptions, } from "./transform-dataset.agent.js";
|
|
2
2
|
export { transformDataset, type TransformDatasetInput, type TransformDatasetResult, } from "./transformDataset.js";
|
package/dist/transform/index.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { createTransformDatasetContext, } from "./transform-dataset.agent.js";
|
|
1
|
+
export { createTransformDatasetContext, registerTransformDatasetContext, } from "./transform-dataset.agent.js";
|
|
2
2
|
export { transformDataset, } from "./transformDataset.js";
|
|
@@ -1,34 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
datasetId: string;
|
|
3
|
-
sourceDatasetIds: string[];
|
|
4
|
-
outputSchema: any;
|
|
5
|
-
sandboxConfig: {
|
|
6
|
-
sourcePaths: Array<{
|
|
7
|
-
datasetId: string;
|
|
8
|
-
path: string;
|
|
9
|
-
}>;
|
|
10
|
-
outputPath: string;
|
|
11
|
-
};
|
|
12
|
-
sourcePreviews?: Array<{
|
|
13
|
-
datasetId: string;
|
|
14
|
-
preview: {
|
|
15
|
-
totalRows: number;
|
|
16
|
-
metadata?: {
|
|
17
|
-
description: string;
|
|
18
|
-
script: string;
|
|
19
|
-
command: string;
|
|
20
|
-
stdout: string;
|
|
21
|
-
stderr: string;
|
|
22
|
-
};
|
|
23
|
-
head?: {
|
|
24
|
-
description: string;
|
|
25
|
-
script: string;
|
|
26
|
-
command: string;
|
|
27
|
-
stdout: string;
|
|
28
|
-
stderr: string;
|
|
29
|
-
};
|
|
30
|
-
};
|
|
31
|
-
}>;
|
|
32
|
-
errors: string[];
|
|
33
|
-
};
|
|
1
|
+
import type { TransformPromptContext } from "./transform-dataset.types.js";
|
|
34
2
|
export declare function buildTransformDatasetPrompt(context: TransformPromptContext): string;
|
|
@@ -1,48 +1,6 @@
|
|
|
1
1
|
import { type ContextReactor } from "@ekairos/events";
|
|
2
|
-
import {
|
|
3
|
-
export type TransformDatasetContext
|
|
4
|
-
datasetId: string;
|
|
5
|
-
sourceDatasetIds: string[];
|
|
6
|
-
outputSchema: any;
|
|
7
|
-
sandboxConfig: {
|
|
8
|
-
sourcePaths: Array<{
|
|
9
|
-
datasetId: string;
|
|
10
|
-
path: string;
|
|
11
|
-
}>;
|
|
12
|
-
outputPath: string;
|
|
13
|
-
};
|
|
14
|
-
sourcePreviews?: Array<{
|
|
15
|
-
datasetId: string;
|
|
16
|
-
preview: TransformSourcePreviewContext;
|
|
17
|
-
}>;
|
|
18
|
-
errors: string[];
|
|
19
|
-
iterationCount: number;
|
|
20
|
-
instructions?: string;
|
|
21
|
-
};
|
|
22
|
-
export type TransformDatasetAgentParams = {
|
|
23
|
-
sourceDatasetIds: string[];
|
|
24
|
-
outputSchema: any;
|
|
25
|
-
instructions?: string;
|
|
26
|
-
datasetId?: string;
|
|
27
|
-
model?: string;
|
|
28
|
-
sandboxId?: string;
|
|
29
|
-
reactor?: ContextReactor<any, any>;
|
|
30
|
-
};
|
|
31
|
-
export type TransformDatasetRunOptions = {
|
|
32
|
-
prompt?: string;
|
|
33
|
-
durable?: boolean;
|
|
34
|
-
};
|
|
35
|
-
export type TransformDatasetResult = {
|
|
36
|
-
id: string;
|
|
37
|
-
status?: string;
|
|
38
|
-
title?: string;
|
|
39
|
-
schema?: any;
|
|
40
|
-
analysis?: any;
|
|
41
|
-
calculatedTotalRows?: number;
|
|
42
|
-
actualGeneratedRowCount?: number;
|
|
43
|
-
createdAt?: number;
|
|
44
|
-
updatedAt?: number;
|
|
45
|
-
};
|
|
2
|
+
import type { TransformDatasetRunOptions } from "./transform-dataset.types.js";
|
|
3
|
+
export type { TransformDatasetAgentParams, TransformDatasetContext, TransformDatasetResult, TransformDatasetRunOptions, TransformPromptContext, TransformSandboxState, } from "./transform-dataset.types.js";
|
|
46
4
|
export declare function createTransformDatasetContext<Env extends {
|
|
47
5
|
orgId: string;
|
|
48
6
|
}>(params: {
|
|
@@ -62,3 +20,9 @@ export declare function createTransformDatasetContext<Env extends {
|
|
|
62
20
|
}>;
|
|
63
21
|
context: any;
|
|
64
22
|
};
|
|
23
|
+
export declare function registerTransformDatasetContext<Env extends {
|
|
24
|
+
orgId: string;
|
|
25
|
+
}>(opts?: {
|
|
26
|
+
model?: string;
|
|
27
|
+
reactor?: ContextReactor<any, any>;
|
|
28
|
+
}): void;
|
|
@@ -1,13 +1,10 @@
|
|
|
1
|
-
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL } from "@ekairos/events";
|
|
2
|
-
import { createCompleteDatasetTool, didCompleteDatasetSucceed } from "../completeDataset.tool.js";
|
|
3
|
-
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
4
|
-
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
5
|
-
import { buildTransformDatasetPrompt } from "./prompts.js";
|
|
6
|
-
import { getDatasetWorkstation, getDatasetOutputPath } from "../datasetFiles.js";
|
|
1
|
+
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
|
|
7
2
|
import { id } from "@instantdb/admin";
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
3
|
+
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
4
|
+
import { createCompleteDatasetTool, didCompleteDatasetSucceed, } from "../completeDataset.tool.js";
|
|
5
|
+
import { datasetUpdateSchemaStep } from "../dataset/steps.js";
|
|
6
|
+
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
7
|
+
import { buildTransformDatasetPromptStep, ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "./transform-dataset.steps.js";
|
|
11
8
|
async function awaitContextRun(run) {
|
|
12
9
|
if (!run)
|
|
13
10
|
return;
|
|
@@ -17,66 +14,68 @@ async function awaitContextRun(run) {
|
|
|
17
14
|
}
|
|
18
15
|
await run;
|
|
19
16
|
}
|
|
20
|
-
async function ensureSourcesInSandbox(runtime, sandboxId, datasetId, sourceDatasetIds, state) {
|
|
21
|
-
if (state.initialized) {
|
|
22
|
-
return { sourcePaths: state.sourcePaths, outputPath: getDatasetOutputPath(datasetId) };
|
|
23
|
-
}
|
|
24
|
-
const workstation = getDatasetWorkstation(datasetId);
|
|
25
|
-
await runDatasetSandboxCommandStep({ runtime, sandboxId, cmd: "mkdir", args: ["-p", workstation] });
|
|
26
|
-
const sourcePaths = [];
|
|
27
|
-
for (const sourceDatasetId of sourceDatasetIds) {
|
|
28
|
-
const sourcePath = `${workstation}/source_${sourceDatasetId}.jsonl`;
|
|
29
|
-
const source = await datasetReadOutputJsonlStep({ runtime, datasetId: sourceDatasetId });
|
|
30
|
-
await writeDatasetSandboxFilesStep({
|
|
31
|
-
runtime,
|
|
32
|
-
sandboxId,
|
|
33
|
-
files: [{ path: sourcePath, contentBase64: source.contentBase64 }],
|
|
34
|
-
});
|
|
35
|
-
sourcePaths.push({ datasetId: sourceDatasetId, path: sourcePath });
|
|
36
|
-
}
|
|
37
|
-
state.sourcePaths = sourcePaths;
|
|
38
|
-
state.initialized = true;
|
|
39
|
-
return { sourcePaths, outputPath: getDatasetOutputPath(datasetId) };
|
|
40
|
-
}
|
|
41
17
|
function createTransformDatasetContextDefinition(params) {
|
|
42
|
-
const
|
|
18
|
+
const fallbackDatasetId = params.datasetId;
|
|
43
19
|
const model = params.model ?? "openai/gpt-5";
|
|
44
20
|
let contextBuilder = createContext("dataset.transform")
|
|
45
21
|
.context(async (stored, _env, runtime) => {
|
|
46
22
|
const previous = stored?.content ?? {};
|
|
47
23
|
const sandboxState = previous?.sandboxState ?? { initialized: false, sourcePaths: [] };
|
|
24
|
+
const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
|
|
25
|
+
const sourceDatasetIds = Array.isArray(previous?.sourceDatasetIds)
|
|
26
|
+
? previous.sourceDatasetIds
|
|
27
|
+
: Array.isArray(params.sourceDatasetIds)
|
|
28
|
+
? params.sourceDatasetIds
|
|
29
|
+
: [];
|
|
30
|
+
const outputSchema = previous?.outputSchema ?? params.outputSchema;
|
|
31
|
+
const instructions = previous?.instructions ?? params.instructions;
|
|
48
32
|
const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
|
|
33
|
+
if (!datasetId) {
|
|
34
|
+
throw new Error("dataset_id_required");
|
|
35
|
+
}
|
|
36
|
+
if (sourceDatasetIds.length === 0) {
|
|
37
|
+
throw new Error("dataset_transform_sources_required");
|
|
38
|
+
}
|
|
39
|
+
if (!outputSchema) {
|
|
40
|
+
throw new Error("dataset_transform_schema_required");
|
|
41
|
+
}
|
|
49
42
|
if (!sandboxId) {
|
|
50
43
|
throw new Error("dataset_sandbox_required");
|
|
51
44
|
}
|
|
52
|
-
const
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
45
|
+
const initialized = await ensureTransformSourcesInSandboxStep({
|
|
46
|
+
runtime,
|
|
47
|
+
sandboxId,
|
|
48
|
+
datasetId,
|
|
49
|
+
sourceDatasetIds,
|
|
50
|
+
state: sandboxState,
|
|
51
|
+
});
|
|
52
|
+
const sourcePreviews = await generateTransformSourcePreviewsStep({
|
|
53
|
+
runtime,
|
|
54
|
+
sandboxId,
|
|
55
|
+
datasetId,
|
|
56
|
+
sourcePaths: initialized.sourcePaths,
|
|
57
|
+
});
|
|
64
58
|
await datasetUpdateSchemaStep({
|
|
65
59
|
runtime,
|
|
66
60
|
datasetId,
|
|
67
|
-
schema:
|
|
61
|
+
schema: outputSchema,
|
|
68
62
|
status: "schema_complete",
|
|
69
63
|
});
|
|
70
64
|
const promptContext = {
|
|
71
65
|
datasetId,
|
|
72
|
-
sourceDatasetIds
|
|
73
|
-
outputSchema
|
|
74
|
-
sandboxConfig: {
|
|
66
|
+
sourceDatasetIds,
|
|
67
|
+
outputSchema,
|
|
68
|
+
sandboxConfig: {
|
|
69
|
+
sourcePaths: initialized.sourcePaths,
|
|
70
|
+
outputPath: initialized.outputPath,
|
|
71
|
+
},
|
|
75
72
|
sourcePreviews: sourcePreviews.length > 0 ? sourcePreviews : undefined,
|
|
76
73
|
errors: [],
|
|
77
74
|
};
|
|
78
|
-
const basePrompt =
|
|
79
|
-
|
|
75
|
+
const basePrompt = await buildTransformDatasetPromptStep({
|
|
76
|
+
context: promptContext,
|
|
77
|
+
});
|
|
78
|
+
const userInstructions = String(instructions ?? "").trim();
|
|
80
79
|
const system = userInstructions
|
|
81
80
|
? [
|
|
82
81
|
"## USER INSTRUCTIONS",
|
|
@@ -90,17 +89,28 @@ function createTransformDatasetContextDefinition(params) {
|
|
|
90
89
|
return {
|
|
91
90
|
...previous,
|
|
92
91
|
datasetId,
|
|
92
|
+
sourceDatasetIds,
|
|
93
|
+
outputSchema,
|
|
94
|
+
instructions,
|
|
93
95
|
sandboxId,
|
|
94
|
-
sandboxState,
|
|
96
|
+
sandboxState: initialized.state,
|
|
95
97
|
system,
|
|
96
|
-
sandboxConfig: {
|
|
98
|
+
sandboxConfig: {
|
|
99
|
+
sourcePaths: initialized.sourcePaths,
|
|
100
|
+
outputPath: initialized.outputPath,
|
|
101
|
+
},
|
|
97
102
|
};
|
|
98
103
|
})
|
|
99
104
|
.narrative(async (stored) => {
|
|
100
105
|
return String(stored?.content?.system ?? "");
|
|
101
106
|
})
|
|
102
107
|
.actions(async (stored, _env, runtime) => {
|
|
108
|
+
const datasetId = stored?.content?.datasetId ?? fallbackDatasetId ?? "";
|
|
103
109
|
const sandboxId = stored?.content?.sandboxId ?? params.sandboxId ?? "";
|
|
110
|
+
if (!datasetId)
|
|
111
|
+
throw new Error("dataset_id_required");
|
|
112
|
+
if (!sandboxId)
|
|
113
|
+
throw new Error("dataset_sandbox_required");
|
|
104
114
|
return {
|
|
105
115
|
executeCommand: createExecuteCommandTool({
|
|
106
116
|
datasetId,
|
|
@@ -129,14 +139,15 @@ function createTransformDatasetContextDefinition(params) {
|
|
|
129
139
|
contextBuilder = contextBuilder.model(model);
|
|
130
140
|
}
|
|
131
141
|
const context = contextBuilder.build();
|
|
132
|
-
return { datasetId, context };
|
|
142
|
+
return { datasetId: fallbackDatasetId ?? "", context };
|
|
133
143
|
}
|
|
134
144
|
export function createTransformDatasetContext(params) {
|
|
135
|
-
const
|
|
145
|
+
const datasetId = params.datasetId ?? id();
|
|
146
|
+
const { context } = createTransformDatasetContextDefinition({
|
|
136
147
|
sourceDatasetIds: params.sourceDatasetIds,
|
|
137
148
|
outputSchema: params.outputSchema,
|
|
138
149
|
instructions: params.instructions,
|
|
139
|
-
datasetId
|
|
150
|
+
datasetId,
|
|
140
151
|
model: params.model,
|
|
141
152
|
sandboxId: params.sandboxId,
|
|
142
153
|
reactor: params.reactor,
|
|
@@ -166,7 +177,21 @@ export function createTransformDatasetContext(params) {
|
|
|
166
177
|
runtime: runtime,
|
|
167
178
|
context: { key: `dataset:${datasetId}` },
|
|
168
179
|
durable: options.durable ?? false,
|
|
169
|
-
options: {
|
|
180
|
+
options: {
|
|
181
|
+
silent: true,
|
|
182
|
+
preventClose: true,
|
|
183
|
+
sendFinish: false,
|
|
184
|
+
maxIterations: 20,
|
|
185
|
+
maxModelSteps: 5,
|
|
186
|
+
},
|
|
187
|
+
__initialContent: {
|
|
188
|
+
datasetId,
|
|
189
|
+
sourceDatasetIds: params.sourceDatasetIds,
|
|
190
|
+
outputSchema: params.outputSchema,
|
|
191
|
+
instructions: params.instructions,
|
|
192
|
+
sandboxId: params.sandboxId ?? "",
|
|
193
|
+
sandboxState: { initialized: false, sourcePaths: [] },
|
|
194
|
+
},
|
|
170
195
|
});
|
|
171
196
|
await awaitContextRun(shell.run);
|
|
172
197
|
return { datasetId };
|
|
@@ -174,3 +199,10 @@ export function createTransformDatasetContext(params) {
|
|
|
174
199
|
context,
|
|
175
200
|
};
|
|
176
201
|
}
|
|
202
|
+
export function registerTransformDatasetContext(opts) {
|
|
203
|
+
createTransformDatasetContextDefinition({
|
|
204
|
+
model: opts?.model,
|
|
205
|
+
reactor: opts?.reactor,
|
|
206
|
+
}).context;
|
|
207
|
+
}
|
|
208
|
+
registerTransformDatasetContext();
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { TransformPromptContext, TransformSandboxState, TransformSourcePreviewContext } from "./transform-dataset.types.js";
|
|
2
|
+
export declare function ensureTransformSourcesInSandboxStep(params: {
|
|
3
|
+
runtime: any;
|
|
4
|
+
sandboxId: string;
|
|
5
|
+
datasetId: string;
|
|
6
|
+
sourceDatasetIds: string[];
|
|
7
|
+
state: TransformSandboxState;
|
|
8
|
+
}): Promise<{
|
|
9
|
+
sourcePaths: Array<{
|
|
10
|
+
datasetId: string;
|
|
11
|
+
path: string;
|
|
12
|
+
}>;
|
|
13
|
+
outputPath: string;
|
|
14
|
+
state: TransformSandboxState;
|
|
15
|
+
}>;
|
|
16
|
+
export declare function generateTransformSourcePreviewsStep(params: {
|
|
17
|
+
runtime: any;
|
|
18
|
+
sandboxId: string;
|
|
19
|
+
datasetId: string;
|
|
20
|
+
sourcePaths: Array<{
|
|
21
|
+
datasetId: string;
|
|
22
|
+
path: string;
|
|
23
|
+
}>;
|
|
24
|
+
}): Promise<Array<{
|
|
25
|
+
datasetId: string;
|
|
26
|
+
preview: TransformSourcePreviewContext;
|
|
27
|
+
}>>;
|
|
28
|
+
export declare function buildTransformDatasetPromptStep(params: {
|
|
29
|
+
context: TransformPromptContext;
|
|
30
|
+
}): Promise<string>;
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { getDatasetOutputPath, getDatasetWorkstation } from "../datasetFiles.js";
|
|
2
|
+
import { datasetReadOutputJsonlStep } from "../dataset/steps.js";
|
|
3
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
|
|
4
|
+
import { generateSourcePreview } from "./filepreview.js";
|
|
5
|
+
import { buildTransformDatasetPrompt } from "./prompts.js";
|
|
6
|
+
export async function ensureTransformSourcesInSandboxStep(params) {
|
|
7
|
+
"use step";
|
|
8
|
+
if (params.state.initialized) {
|
|
9
|
+
return {
|
|
10
|
+
sourcePaths: params.state.sourcePaths,
|
|
11
|
+
outputPath: getDatasetOutputPath(params.datasetId),
|
|
12
|
+
state: params.state,
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
const workstation = getDatasetWorkstation(params.datasetId);
|
|
16
|
+
await runDatasetSandboxCommandStep({
|
|
17
|
+
runtime: params.runtime,
|
|
18
|
+
sandboxId: params.sandboxId,
|
|
19
|
+
cmd: "mkdir",
|
|
20
|
+
args: ["-p", workstation],
|
|
21
|
+
});
|
|
22
|
+
const sourcePaths = [];
|
|
23
|
+
for (const sourceDatasetId of params.sourceDatasetIds) {
|
|
24
|
+
const sourcePath = `${workstation}/source_${sourceDatasetId}.jsonl`;
|
|
25
|
+
const source = await datasetReadOutputJsonlStep({
|
|
26
|
+
runtime: params.runtime,
|
|
27
|
+
datasetId: sourceDatasetId,
|
|
28
|
+
});
|
|
29
|
+
await writeDatasetSandboxFilesStep({
|
|
30
|
+
runtime: params.runtime,
|
|
31
|
+
sandboxId: params.sandboxId,
|
|
32
|
+
files: [{ path: sourcePath, contentBase64: source.contentBase64 }],
|
|
33
|
+
});
|
|
34
|
+
sourcePaths.push({ datasetId: sourceDatasetId, path: sourcePath });
|
|
35
|
+
}
|
|
36
|
+
return {
|
|
37
|
+
sourcePaths,
|
|
38
|
+
outputPath: getDatasetOutputPath(params.datasetId),
|
|
39
|
+
state: {
|
|
40
|
+
initialized: true,
|
|
41
|
+
sourcePaths,
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export async function generateTransformSourcePreviewsStep(params) {
|
|
46
|
+
"use step";
|
|
47
|
+
const sourcePreviews = [];
|
|
48
|
+
for (const sourcePath of params.sourcePaths) {
|
|
49
|
+
try {
|
|
50
|
+
const preview = await generateSourcePreview(params.runtime, params.sandboxId, sourcePath.path, params.datasetId);
|
|
51
|
+
sourcePreviews.push({ datasetId: sourcePath.datasetId, preview });
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
// Source preview is optional; transformation can still read the JSONL files.
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return sourcePreviews;
|
|
58
|
+
}
|
|
59
|
+
export async function buildTransformDatasetPromptStep(params) {
|
|
60
|
+
"use step";
|
|
61
|
+
return buildTransformDatasetPrompt(params.context);
|
|
62
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import type { ContextReactor } from "@ekairos/events";
|
|
2
|
+
import type { TransformSourcePreviewContext } from "./filepreview.js";
|
|
3
|
+
export type { TransformSourcePreviewContext } from "./filepreview.js";
|
|
4
|
+
export type TransformSandboxState = {
|
|
5
|
+
initialized: boolean;
|
|
6
|
+
sourcePaths: Array<{
|
|
7
|
+
datasetId: string;
|
|
8
|
+
path: string;
|
|
9
|
+
}>;
|
|
10
|
+
};
|
|
11
|
+
export type TransformDatasetContext = {
|
|
12
|
+
datasetId: string;
|
|
13
|
+
sourceDatasetIds: string[];
|
|
14
|
+
outputSchema: any;
|
|
15
|
+
sandboxConfig: {
|
|
16
|
+
sourcePaths: Array<{
|
|
17
|
+
datasetId: string;
|
|
18
|
+
path: string;
|
|
19
|
+
}>;
|
|
20
|
+
outputPath: string;
|
|
21
|
+
};
|
|
22
|
+
sourcePreviews?: Array<{
|
|
23
|
+
datasetId: string;
|
|
24
|
+
preview: TransformSourcePreviewContext;
|
|
25
|
+
}>;
|
|
26
|
+
errors: string[];
|
|
27
|
+
iterationCount: number;
|
|
28
|
+
instructions?: string;
|
|
29
|
+
};
|
|
30
|
+
export type TransformDatasetAgentParams = {
|
|
31
|
+
sourceDatasetIds?: string[];
|
|
32
|
+
outputSchema?: any;
|
|
33
|
+
instructions?: string;
|
|
34
|
+
datasetId?: string;
|
|
35
|
+
model?: string;
|
|
36
|
+
sandboxId?: string;
|
|
37
|
+
reactor?: ContextReactor<any, any>;
|
|
38
|
+
};
|
|
39
|
+
export type TransformDatasetRunOptions = {
|
|
40
|
+
prompt?: string;
|
|
41
|
+
durable?: boolean;
|
|
42
|
+
};
|
|
43
|
+
export type TransformDatasetResult = {
|
|
44
|
+
id: string;
|
|
45
|
+
status?: string;
|
|
46
|
+
title?: string;
|
|
47
|
+
schema?: any;
|
|
48
|
+
analysis?: any;
|
|
49
|
+
calculatedTotalRows?: number;
|
|
50
|
+
actualGeneratedRowCount?: number;
|
|
51
|
+
createdAt?: number;
|
|
52
|
+
updatedAt?: number;
|
|
53
|
+
};
|
|
54
|
+
export type TransformPromptContext = {
|
|
55
|
+
datasetId: string;
|
|
56
|
+
sourceDatasetIds: string[];
|
|
57
|
+
outputSchema: any;
|
|
58
|
+
sandboxConfig: {
|
|
59
|
+
sourcePaths: Array<{
|
|
60
|
+
datasetId: string;
|
|
61
|
+
path: string;
|
|
62
|
+
}>;
|
|
63
|
+
outputPath: string;
|
|
64
|
+
};
|
|
65
|
+
sourcePreviews?: Array<{
|
|
66
|
+
datasetId: string;
|
|
67
|
+
preview: {
|
|
68
|
+
totalRows: number;
|
|
69
|
+
metadata?: {
|
|
70
|
+
description: string;
|
|
71
|
+
script: string;
|
|
72
|
+
command: string;
|
|
73
|
+
stdout: string;
|
|
74
|
+
stderr: string;
|
|
75
|
+
};
|
|
76
|
+
head?: {
|
|
77
|
+
description: string;
|
|
78
|
+
script: string;
|
|
79
|
+
command: string;
|
|
80
|
+
stdout: string;
|
|
81
|
+
stderr: string;
|
|
82
|
+
};
|
|
83
|
+
};
|
|
84
|
+
}>;
|
|
85
|
+
errors: string[];
|
|
86
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.56-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.56-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.56-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.56-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|