@ekairos/dataset 1.22.40-beta.development.0 → 1.22.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/context.d.ts +15 -0
- package/dist/builder/context.js +251 -0
- package/dist/builder/instructions.d.ts +4 -5
- package/dist/builder/instructions.js +15 -21
- package/dist/builder/materialize.d.ts +77 -10
- package/dist/builder/materialize.js +495 -152
- package/dist/builder/materializeQuery.d.ts +12 -0
- package/dist/builder/materializeQuery.js +31 -0
- package/dist/builder/persistence.d.ts +10 -6
- package/dist/builder/persistence.js +107 -62
- package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -1
- package/dist/builder/{sourceRows.js → rows.js} +3 -9
- package/dist/builder/schemaInference.d.ts +1 -2
- package/dist/builder/schemaInference.js +4 -12
- package/dist/builder/types.d.ts +41 -26
- package/dist/builder/types.js +1 -3
- package/dist/clearDataset.tool.d.ts +2 -3
- package/dist/clearDataset.tool.js +13 -17
- package/dist/completeDataset.steps.d.ts +117 -0
- package/dist/completeDataset.steps.js +537 -0
- package/dist/completeDataset.tool.d.ts +132 -7
- package/dist/completeDataset.tool.js +46 -192
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +79 -0
- package/dist/contextWorkspace.js +234 -0
- package/dist/dataset/steps.d.ts +39 -15
- package/dist/dataset/steps.js +96 -39
- package/dist/dataset.d.ts +2 -3
- package/dist/dataset.js +73 -51
- package/dist/datasetFiles.d.ts +5 -1
- package/dist/datasetFiles.js +29 -27
- package/dist/defineNotation.tool.d.ts +49 -0
- package/dist/defineNotation.tool.js +154 -0
- package/dist/domain.d.ts +1 -2
- package/dist/domain.js +1 -6
- package/dist/executeCommand.tool.d.ts +2 -30
- package/dist/executeCommand.tool.js +165 -39
- package/dist/file/file-dataset.agent.d.ts +19 -56
- package/dist/file/file-dataset.agent.js +182 -136
- package/dist/file/file-dataset.steps.d.ts +27 -0
- package/dist/file/file-dataset.steps.js +47 -0
- package/dist/file/file-dataset.types.d.ts +64 -0
- package/dist/file/file-dataset.types.js +1 -0
- package/dist/file/filepreview.d.ts +5 -35
- package/dist/file/filepreview.js +60 -107
- package/dist/file/filepreview.types.d.ts +31 -0
- package/dist/file/filepreview.types.js +1 -0
- package/dist/file/generateSchema.tool.d.ts +2 -3
- package/dist/file/generateSchema.tool.js +11 -15
- package/dist/file/index.d.ts +1 -2
- package/dist/file/index.js +1 -18
- package/dist/file/prompts.d.ts +2 -3
- package/dist/file/prompts.js +152 -32
- package/dist/file/scripts.generated.d.ts +1 -0
- package/dist/file/scripts.generated.js +11 -0
- package/dist/file/steps.d.ts +1 -2
- package/dist/file/steps.js +9 -7
- package/dist/id.d.ts +1 -0
- package/dist/id.js +10 -0
- package/dist/index.d.ts +9 -7
- package/dist/index.js +9 -23
- package/dist/materializeDataset.tool.d.ts +35 -28
- package/dist/materializeDataset.tool.js +74 -68
- package/dist/notation.d.ts +205 -0
- package/dist/notation.js +424 -0
- package/dist/query/index.d.ts +1 -2
- package/dist/query/index.js +1 -18
- package/dist/query/queryDomain.d.ts +3 -4
- package/dist/query/queryDomain.js +3 -40
- package/dist/query/queryDomain.step.d.ts +1 -1
- package/dist/query/queryDomain.step.js +24 -13
- package/dist/sandbox/steps.d.ts +23 -15
- package/dist/sandbox/steps.js +73 -76
- package/dist/sandbox.steps.d.ts +1 -2
- package/dist/sandbox.steps.js +1 -18
- package/dist/schema.d.ts +14 -3
- package/dist/schema.js +27 -26
- package/dist/service.d.ts +12 -5
- package/dist/service.js +88 -15
- package/dist/skill.d.ts +0 -1
- package/dist/skill.js +12 -17
- package/dist/transform/filepreview.d.ts +2 -3
- package/dist/transform/filepreview.js +9 -26
- package/dist/transform/index.d.ts +2 -3
- package/dist/transform/index.js +2 -8
- package/dist/transform/prompts.d.ts +1 -34
- package/dist/transform/prompts.js +66 -46
- package/dist/transform/transform-dataset.agent.d.ts +21 -46
- package/dist/transform/transform-dataset.agent.js +152 -93
- package/dist/transform/transform-dataset.steps.d.ts +30 -0
- package/dist/transform/transform-dataset.steps.js +61 -0
- package/dist/transform/transform-dataset.types.d.ts +96 -0
- package/dist/transform/transform-dataset.types.js +1 -0
- package/dist/transform/transformDataset.d.ts +3 -3
- package/dist/transform/transformDataset.js +15 -18
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +33 -8
- package/dist/builder/instructions.d.ts.map +0 -1
- package/dist/builder/instructions.js.map +0 -1
- package/dist/builder/materialize.d.ts.map +0 -1
- package/dist/builder/materialize.js.map +0 -1
- package/dist/builder/persistence.d.ts.map +0 -1
- package/dist/builder/persistence.js.map +0 -1
- package/dist/builder/schemaInference.d.ts.map +0 -1
- package/dist/builder/schemaInference.js.map +0 -1
- package/dist/builder/sourceRows.d.ts.map +0 -1
- package/dist/builder/sourceRows.js.map +0 -1
- package/dist/builder/types.d.ts.map +0 -1
- package/dist/builder/types.js.map +0 -1
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeDataset.tool.d.ts.map +0 -1
- package/dist/completeDataset.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/dataset.d.ts.map +0 -1
- package/dist/dataset.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/eventsReactRuntime.d.ts +0 -22
- package/dist/eventsReactRuntime.d.ts.map +0 -1
- package/dist/eventsReactRuntime.js +0 -29
- package/dist/eventsReactRuntime.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/file-dataset.agent.d.ts.map +0 -1
- package/dist/file/file-dataset.agent.js.map +0 -1
- package/dist/file/filepreview.d.ts.map +0 -1
- package/dist/file/filepreview.js.map +0 -1
- package/dist/file/generateSchema.tool.d.ts.map +0 -1
- package/dist/file/generateSchema.tool.js.map +0 -1
- package/dist/file/index.d.ts.map +0 -1
- package/dist/file/index.js.map +0 -1
- package/dist/file/prompts.d.ts.map +0 -1
- package/dist/file/prompts.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/materializeDataset.tool.d.ts.map +0 -1
- package/dist/materializeDataset.tool.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/queryDomain.d.ts.map +0 -1
- package/dist/query/queryDomain.js.map +0 -1
- package/dist/query/queryDomain.step.d.ts.map +0 -1
- package/dist/query/queryDomain.step.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/sandbox.steps.d.ts.map +0 -1
- package/dist/sandbox.steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/skill.d.ts.map +0 -1
- package/dist/skill.js.map +0 -1
- package/dist/transform/filepreview.d.ts.map +0 -1
- package/dist/transform/filepreview.js.map +0 -1
- package/dist/transform/index.d.ts.map +0 -1
- package/dist/transform/index.js.map +0 -1
- package/dist/transform/prompts.d.ts.map +0 -1
- package/dist/transform/prompts.js.map +0 -1
- package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
- package/dist/transform/transform-dataset.agent.js.map +0 -1
- package/dist/transform/transformDataset.d.ts.map +0 -1
- package/dist/transform/transformDataset.js.map +0 -1
|
@@ -1,77 +1,78 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
return
|
|
1
|
+
import { INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
|
|
2
|
+
import { createContext } from "@ekairos/reactor/context";
|
|
3
|
+
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
4
|
+
import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
|
|
5
|
+
import { datasetUpdateSchemaStep } from "../dataset/steps.js";
|
|
6
|
+
import { getDatasetOutputPath } from "../datasetFiles.js";
|
|
7
|
+
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
8
|
+
import { createDefineNotationTool } from "../defineNotation.tool.js";
|
|
9
|
+
import { createCompleteObjectTool, createReplaceRowsTool, } from "../writeDatasetRows.tool.js";
|
|
10
|
+
import { buildTransformDatasetPromptStep, } from "./transform-dataset.steps.js";
|
|
11
|
+
import { createDatasetId } from "../id.js";
|
|
12
|
+
async function awaitContextRun(run) {
|
|
13
|
+
if (!run)
|
|
14
|
+
return;
|
|
15
|
+
if (run.returnValue) {
|
|
16
|
+
await run.returnValue;
|
|
17
|
+
return;
|
|
18
18
|
}
|
|
19
|
-
|
|
20
|
-
await (0, steps_2.runDatasetSandboxCommandStep)({ env, sandboxId, cmd: "mkdir", args: ["-p", workstation] });
|
|
21
|
-
const sourcePaths = [];
|
|
22
|
-
for (const sourceDatasetId of sourceDatasetIds) {
|
|
23
|
-
const sourcePath = `${workstation}/source_${sourceDatasetId}.jsonl`;
|
|
24
|
-
const source = await (0, steps_1.datasetReadOutputJsonlStep)({ env, datasetId: sourceDatasetId });
|
|
25
|
-
await (0, steps_2.writeDatasetSandboxFilesStep)({
|
|
26
|
-
env,
|
|
27
|
-
sandboxId,
|
|
28
|
-
files: [{ path: sourcePath, contentBase64: source.contentBase64 }],
|
|
29
|
-
});
|
|
30
|
-
sourcePaths.push({ datasetId: sourceDatasetId, path: sourcePath });
|
|
31
|
-
}
|
|
32
|
-
state.sourcePaths = sourcePaths;
|
|
33
|
-
state.initialized = true;
|
|
34
|
-
return { sourcePaths, outputPath: (0, datasetFiles_1.getDatasetOutputPath)(datasetId) };
|
|
19
|
+
await run;
|
|
35
20
|
}
|
|
36
|
-
function
|
|
37
|
-
const
|
|
21
|
+
function createTransformDatasetContextDefinition(params) {
|
|
22
|
+
const fallbackDatasetId = params.datasetId;
|
|
38
23
|
const model = params.model ?? "openai/gpt-5";
|
|
39
|
-
let
|
|
40
|
-
.context(async (stored,
|
|
24
|
+
let contextBuilder = createContext("dataset.transform")
|
|
25
|
+
.context(async (stored, _env, runtime) => {
|
|
41
26
|
const previous = stored?.content ?? {};
|
|
42
|
-
const sandboxState = previous?.sandboxState ??
|
|
27
|
+
const sandboxState = previous?.sandboxState ??
|
|
28
|
+
params.sandboxState ?? { initialized: false, inputPaths: [] };
|
|
29
|
+
const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
|
|
30
|
+
const inputDatasetIds = Array.isArray(previous?.inputDatasetIds)
|
|
31
|
+
? previous.inputDatasetIds
|
|
32
|
+
: Array.isArray(params.inputDatasetIds)
|
|
33
|
+
? params.inputDatasetIds
|
|
34
|
+
: [];
|
|
35
|
+
const outputSchema = previous?.outputSchema ?? params.outputSchema;
|
|
36
|
+
const instructions = previous?.instructions ?? params.instructions;
|
|
43
37
|
const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
|
|
38
|
+
if (!datasetId) {
|
|
39
|
+
throw new Error("dataset_id_required");
|
|
40
|
+
}
|
|
41
|
+
if (inputDatasetIds.length === 0) {
|
|
42
|
+
throw new Error("dataset_transform_inputs_required");
|
|
43
|
+
}
|
|
44
|
+
if (!outputSchema) {
|
|
45
|
+
throw new Error("dataset_transform_schema_required");
|
|
46
|
+
}
|
|
44
47
|
if (!sandboxId) {
|
|
45
48
|
throw new Error("dataset_sandbox_required");
|
|
46
49
|
}
|
|
47
|
-
const
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
}
|
|
54
|
-
catch {
|
|
55
|
-
// optional
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
// Persist output schema on the dataset record (so completeDataset validates against it)
|
|
59
|
-
await (0, steps_1.datasetUpdateSchemaStep)({
|
|
60
|
-
env,
|
|
50
|
+
const initialized = sandboxState.initialized && Array.isArray(sandboxState.inputPaths)
|
|
51
|
+
? sandboxState
|
|
52
|
+
: { initialized: false, inputPaths: [] };
|
|
53
|
+
const inputPreviews = previous?.inputPreviews ?? params.inputPreviews ?? [];
|
|
54
|
+
await datasetUpdateSchemaStep({
|
|
55
|
+
runtime,
|
|
61
56
|
datasetId,
|
|
62
|
-
schema:
|
|
57
|
+
schema: outputSchema,
|
|
63
58
|
status: "schema_complete",
|
|
64
59
|
});
|
|
65
60
|
const promptContext = {
|
|
66
61
|
datasetId,
|
|
67
|
-
|
|
68
|
-
outputSchema
|
|
69
|
-
sandboxConfig: {
|
|
70
|
-
|
|
62
|
+
inputDatasetIds,
|
|
63
|
+
outputSchema,
|
|
64
|
+
sandboxConfig: {
|
|
65
|
+
inputPaths: initialized.inputPaths,
|
|
66
|
+
outputPath: previous?.sandboxConfig?.outputPath ?? getDatasetOutputPath(datasetId),
|
|
67
|
+
},
|
|
68
|
+
inputPreviews: inputPreviews.length > 0 ? inputPreviews : undefined,
|
|
69
|
+
contextResources: previous?.contextResources ?? params.contextResources ?? [],
|
|
71
70
|
errors: [],
|
|
72
71
|
};
|
|
73
|
-
const basePrompt =
|
|
74
|
-
|
|
72
|
+
const basePrompt = await buildTransformDatasetPromptStep({
|
|
73
|
+
context: promptContext,
|
|
74
|
+
});
|
|
75
|
+
const userInstructions = String(instructions ?? "").trim();
|
|
75
76
|
const system = userInstructions
|
|
76
77
|
? [
|
|
77
78
|
"## USER INSTRUCTIONS",
|
|
@@ -85,89 +86,147 @@ function createTransformDatasetStoryDefinition(params) {
|
|
|
85
86
|
return {
|
|
86
87
|
...previous,
|
|
87
88
|
datasetId,
|
|
89
|
+
inputDatasetIds,
|
|
90
|
+
outputSchema,
|
|
91
|
+
instructions,
|
|
88
92
|
sandboxId,
|
|
89
|
-
sandboxState,
|
|
93
|
+
sandboxState: initialized,
|
|
94
|
+
contextResources: previous?.contextResources ?? params.contextResources ?? [],
|
|
90
95
|
system,
|
|
91
|
-
sandboxConfig: {
|
|
96
|
+
sandboxConfig: {
|
|
97
|
+
inputPaths: initialized.inputPaths,
|
|
98
|
+
outputPath: previous?.sandboxConfig?.outputPath ?? getDatasetOutputPath(datasetId),
|
|
99
|
+
},
|
|
92
100
|
};
|
|
93
101
|
})
|
|
102
|
+
.resources(({ content }) => Array.isArray(content?.contextResources) ? content.contextResources : [])
|
|
94
103
|
.narrative(async (stored) => {
|
|
95
104
|
return String(stored?.content?.system ?? "");
|
|
96
105
|
})
|
|
97
|
-
.actions(async (stored,
|
|
106
|
+
.actions(async (stored, _env, runtime) => {
|
|
107
|
+
const datasetId = stored?.content?.datasetId ?? fallbackDatasetId ?? "";
|
|
98
108
|
const sandboxId = stored?.content?.sandboxId ?? params.sandboxId ?? "";
|
|
109
|
+
if (!datasetId)
|
|
110
|
+
throw new Error("dataset_id_required");
|
|
111
|
+
if (!sandboxId)
|
|
112
|
+
throw new Error("dataset_sandbox_required");
|
|
99
113
|
return {
|
|
100
|
-
|
|
114
|
+
completeObject: createCompleteObjectTool({
|
|
101
115
|
datasetId,
|
|
102
116
|
sandboxId,
|
|
103
|
-
|
|
117
|
+
runtime,
|
|
118
|
+
schema: stored?.content?.outputSchema,
|
|
104
119
|
}),
|
|
105
|
-
|
|
120
|
+
replaceRows: createReplaceRowsTool({
|
|
106
121
|
datasetId,
|
|
107
122
|
sandboxId,
|
|
108
|
-
|
|
123
|
+
runtime,
|
|
124
|
+
schema: stored?.content?.outputSchema,
|
|
109
125
|
}),
|
|
110
|
-
|
|
126
|
+
executeCommand: createExecuteCommandTool({
|
|
111
127
|
datasetId,
|
|
112
128
|
sandboxId,
|
|
113
|
-
|
|
129
|
+
runtime,
|
|
130
|
+
}),
|
|
131
|
+
completeDataset: createCompleteDatasetTool({
|
|
132
|
+
datasetId,
|
|
133
|
+
sandboxId,
|
|
134
|
+
runtime,
|
|
135
|
+
}),
|
|
136
|
+
clearDataset: createClearDatasetTool({
|
|
137
|
+
datasetId,
|
|
138
|
+
sandboxId,
|
|
139
|
+
runtime,
|
|
140
|
+
}),
|
|
141
|
+
defineNotation: createDefineNotationTool({
|
|
142
|
+
datasetId,
|
|
143
|
+
runtime,
|
|
114
144
|
}),
|
|
115
145
|
};
|
|
116
146
|
})
|
|
117
147
|
.shouldContinue(({ reactionEvent }) => {
|
|
118
|
-
|
|
148
|
+
const fatalFailure = getDatasetFatalFailure(reactionEvent);
|
|
149
|
+
if (fatalFailure) {
|
|
150
|
+
throw new Error(fatalFailure);
|
|
151
|
+
}
|
|
152
|
+
return !didCompleteDatasetSucceed(reactionEvent);
|
|
119
153
|
});
|
|
120
154
|
if (params.reactor) {
|
|
121
|
-
|
|
155
|
+
contextBuilder = contextBuilder.reactor(params.reactor);
|
|
122
156
|
}
|
|
123
157
|
else {
|
|
124
|
-
|
|
158
|
+
contextBuilder = contextBuilder.model(model);
|
|
125
159
|
}
|
|
126
|
-
const
|
|
127
|
-
return { datasetId,
|
|
160
|
+
const context = contextBuilder.build();
|
|
161
|
+
return { datasetId: fallbackDatasetId ?? "", context };
|
|
128
162
|
}
|
|
129
|
-
function
|
|
130
|
-
const
|
|
131
|
-
|
|
163
|
+
export function createTransformDatasetContext(params) {
|
|
164
|
+
const datasetId = params.datasetId ?? createDatasetId();
|
|
165
|
+
const { context } = createTransformDatasetContextDefinition({
|
|
166
|
+
inputDatasetIds: params.inputDatasetIds,
|
|
132
167
|
outputSchema: params.outputSchema,
|
|
133
168
|
instructions: params.instructions,
|
|
134
|
-
datasetId
|
|
169
|
+
datasetId,
|
|
135
170
|
model: params.model,
|
|
136
171
|
sandboxId: params.sandboxId,
|
|
137
172
|
reactor: params.reactor,
|
|
173
|
+
sandboxState: params.sandboxState,
|
|
174
|
+
inputPreviews: params.inputPreviews,
|
|
175
|
+
contextResources: params.contextResources,
|
|
138
176
|
});
|
|
139
177
|
return {
|
|
140
178
|
datasetId,
|
|
141
|
-
async transform(
|
|
142
|
-
const datasetCountText = params.
|
|
143
|
-
? "the
|
|
144
|
-
: `${params.
|
|
179
|
+
async transform(runtime, options = {}) {
|
|
180
|
+
const datasetCountText = params.inputDatasetIds.length === 1
|
|
181
|
+
? "the input dataset"
|
|
182
|
+
: `${params.inputDatasetIds.length} input datasets`;
|
|
145
183
|
const triggerEvent = {
|
|
146
|
-
id: (
|
|
147
|
-
type:
|
|
148
|
-
channel:
|
|
184
|
+
id: createDatasetId(),
|
|
185
|
+
type: INPUT_TEXT_ITEM_TYPE,
|
|
186
|
+
channel: WEB_CHANNEL,
|
|
149
187
|
createdAt: new Date().toISOString(),
|
|
150
188
|
content: {
|
|
151
189
|
parts: [
|
|
152
190
|
{
|
|
153
191
|
type: "text",
|
|
154
|
-
text: prompt ??
|
|
192
|
+
text: options.prompt ??
|
|
155
193
|
`Transform ${datasetCountText} into a new dataset matching the provided output schema`,
|
|
156
194
|
},
|
|
157
195
|
],
|
|
158
196
|
},
|
|
159
197
|
};
|
|
160
|
-
const
|
|
161
|
-
|
|
162
|
-
runtime,
|
|
198
|
+
const shell = await context.react(triggerEvent, {
|
|
199
|
+
runtime: runtime,
|
|
163
200
|
context: { key: `dataset:${datasetId}` },
|
|
164
|
-
durable: false,
|
|
165
|
-
options: {
|
|
201
|
+
durable: options.durable ?? false,
|
|
202
|
+
options: {
|
|
203
|
+
preventClose: true,
|
|
204
|
+
sendFinish: false,
|
|
205
|
+
maxIterations: 20,
|
|
206
|
+
maxModelSteps: 5,
|
|
207
|
+
},
|
|
208
|
+
__initialContent: {
|
|
209
|
+
...(options.initialContent ?? {}),
|
|
210
|
+
datasetId,
|
|
211
|
+
inputDatasetIds: params.inputDatasetIds,
|
|
212
|
+
outputSchema: params.outputSchema,
|
|
213
|
+
instructions: params.instructions,
|
|
214
|
+
sandboxId: params.sandboxId ?? "",
|
|
215
|
+
sandboxState: params.sandboxState ?? { initialized: false, inputPaths: [] },
|
|
216
|
+
inputPreviews: params.inputPreviews,
|
|
217
|
+
contextResources: params.contextResources ?? [],
|
|
218
|
+
},
|
|
166
219
|
});
|
|
167
|
-
await shell.run;
|
|
220
|
+
await awaitContextRun(shell.run);
|
|
168
221
|
return { datasetId };
|
|
169
222
|
},
|
|
170
|
-
|
|
223
|
+
context,
|
|
171
224
|
};
|
|
172
225
|
}
|
|
173
|
-
|
|
226
|
+
export function registerTransformDatasetContext(opts) {
|
|
227
|
+
createTransformDatasetContextDefinition({
|
|
228
|
+
model: opts?.model,
|
|
229
|
+
reactor: opts?.reactor,
|
|
230
|
+
}).context;
|
|
231
|
+
}
|
|
232
|
+
registerTransformDatasetContext();
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { TransformPromptContext, TransformSandboxState, TransformInputPreviewContext } from "./transform-dataset.types.js";
|
|
2
|
+
export declare function ensureTransformInputsInSandboxStep(params: {
|
|
3
|
+
runtime: any;
|
|
4
|
+
sandboxId: string;
|
|
5
|
+
datasetId: string;
|
|
6
|
+
inputDatasetIds: string[];
|
|
7
|
+
state: TransformSandboxState;
|
|
8
|
+
}): Promise<{
|
|
9
|
+
inputPaths: Array<{
|
|
10
|
+
datasetId: string;
|
|
11
|
+
path: string;
|
|
12
|
+
}>;
|
|
13
|
+
outputPath: string;
|
|
14
|
+
state: TransformSandboxState;
|
|
15
|
+
}>;
|
|
16
|
+
export declare function generateTransformInputPreviewsStep(params: {
|
|
17
|
+
runtime: any;
|
|
18
|
+
sandboxId: string;
|
|
19
|
+
datasetId: string;
|
|
20
|
+
inputPaths: Array<{
|
|
21
|
+
datasetId: string;
|
|
22
|
+
path: string;
|
|
23
|
+
}>;
|
|
24
|
+
}): Promise<Array<{
|
|
25
|
+
datasetId: string;
|
|
26
|
+
preview: TransformInputPreviewContext;
|
|
27
|
+
}>>;
|
|
28
|
+
export declare function buildTransformDatasetPromptStep(params: {
|
|
29
|
+
context: TransformPromptContext;
|
|
30
|
+
}): Promise<string>;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { getDatasetOutputPath, getDatasetResourcesDir, getDatasetStandardDirs, } from "../datasetFiles.js";
|
|
2
|
+
import { datasetReadOutputJsonlStep } from "../dataset/steps.js";
|
|
3
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
|
|
4
|
+
import { generateInputPreview } from "./filepreview.js";
|
|
5
|
+
import { buildTransformDatasetPrompt } from "./prompts.js";
|
|
6
|
+
export async function ensureTransformInputsInSandboxStep(params) {
|
|
7
|
+
"use step";
|
|
8
|
+
if (params.state.initialized) {
|
|
9
|
+
return {
|
|
10
|
+
inputPaths: params.state.inputPaths,
|
|
11
|
+
outputPath: getDatasetOutputPath(params.datasetId),
|
|
12
|
+
state: params.state,
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
await runDatasetSandboxCommandStep({
|
|
16
|
+
runtime: params.runtime,
|
|
17
|
+
sandboxId: params.sandboxId,
|
|
18
|
+
cmd: "mkdir",
|
|
19
|
+
args: ["-p", ...getDatasetStandardDirs(params.datasetId)],
|
|
20
|
+
});
|
|
21
|
+
const inputPaths = [];
|
|
22
|
+
for (const inputDatasetId of params.inputDatasetIds) {
|
|
23
|
+
const inputPath = `${getDatasetResourcesDir(params.datasetId)}/resource_${inputDatasetId}.jsonl`;
|
|
24
|
+
const input = await datasetReadOutputJsonlStep({
|
|
25
|
+
runtime: params.runtime,
|
|
26
|
+
datasetId: inputDatasetId,
|
|
27
|
+
});
|
|
28
|
+
await writeDatasetSandboxFilesStep({
|
|
29
|
+
runtime: params.runtime,
|
|
30
|
+
sandboxId: params.sandboxId,
|
|
31
|
+
files: [{ path: inputPath, contentBase64: input.contentBase64 }],
|
|
32
|
+
});
|
|
33
|
+
inputPaths.push({ datasetId: inputDatasetId, path: inputPath });
|
|
34
|
+
}
|
|
35
|
+
return {
|
|
36
|
+
inputPaths,
|
|
37
|
+
outputPath: getDatasetOutputPath(params.datasetId),
|
|
38
|
+
state: {
|
|
39
|
+
initialized: true,
|
|
40
|
+
inputPaths,
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
export async function generateTransformInputPreviewsStep(params) {
|
|
45
|
+
"use step";
|
|
46
|
+
const inputPreviews = [];
|
|
47
|
+
for (const inputPath of params.inputPaths) {
|
|
48
|
+
try {
|
|
49
|
+
const preview = await generateInputPreview(params.runtime, params.sandboxId, inputPath.path, params.datasetId);
|
|
50
|
+
inputPreviews.push({ datasetId: inputPath.datasetId, preview });
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
// Input preview is optional; transformation can still read the JSONL files.
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return inputPreviews;
|
|
57
|
+
}
|
|
58
|
+
export async function buildTransformDatasetPromptStep(params) {
|
|
59
|
+
"use step";
|
|
60
|
+
return buildTransformDatasetPrompt(params.context);
|
|
61
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import type { StoredContextResource } from "@ekairos/events";
|
|
2
|
+
import type { ContextReactor } from "@ekairos/reactor/context";
|
|
3
|
+
import type { TransformInputPreviewContext } from "./filepreview.js";
|
|
4
|
+
export type { TransformInputPreviewContext } from "./filepreview.js";
|
|
5
|
+
export type TransformSandboxState = {
|
|
6
|
+
initialized: boolean;
|
|
7
|
+
inputPaths: Array<{
|
|
8
|
+
datasetId: string;
|
|
9
|
+
path: string;
|
|
10
|
+
}>;
|
|
11
|
+
};
|
|
12
|
+
export type TransformDatasetContext = {
|
|
13
|
+
datasetId: string;
|
|
14
|
+
inputDatasetIds: string[];
|
|
15
|
+
outputSchema: any;
|
|
16
|
+
sandboxConfig: {
|
|
17
|
+
inputPaths: Array<{
|
|
18
|
+
datasetId: string;
|
|
19
|
+
path: string;
|
|
20
|
+
}>;
|
|
21
|
+
outputPath: string;
|
|
22
|
+
};
|
|
23
|
+
inputPreviews?: Array<{
|
|
24
|
+
datasetId: string;
|
|
25
|
+
preview: TransformInputPreviewContext;
|
|
26
|
+
}>;
|
|
27
|
+
contextResources?: StoredContextResource[];
|
|
28
|
+
errors: string[];
|
|
29
|
+
iterationCount: number;
|
|
30
|
+
instructions?: string;
|
|
31
|
+
};
|
|
32
|
+
export type TransformDatasetAgentParams = {
|
|
33
|
+
inputDatasetIds?: string[];
|
|
34
|
+
outputSchema?: any;
|
|
35
|
+
instructions?: string;
|
|
36
|
+
datasetId?: string;
|
|
37
|
+
model?: string;
|
|
38
|
+
sandboxId?: string;
|
|
39
|
+
reactor?: ContextReactor<any, any>;
|
|
40
|
+
sandboxState?: TransformSandboxState;
|
|
41
|
+
inputPreviews?: Array<{
|
|
42
|
+
datasetId: string;
|
|
43
|
+
preview: TransformInputPreviewContext;
|
|
44
|
+
}>;
|
|
45
|
+
contextResources?: StoredContextResource[];
|
|
46
|
+
};
|
|
47
|
+
export type TransformDatasetRunOptions = {
|
|
48
|
+
prompt?: string;
|
|
49
|
+
durable?: boolean;
|
|
50
|
+
initialContent?: Record<string, any>;
|
|
51
|
+
};
|
|
52
|
+
export type TransformDatasetResult = {
|
|
53
|
+
id: string;
|
|
54
|
+
status?: string;
|
|
55
|
+
title?: string;
|
|
56
|
+
schema?: any;
|
|
57
|
+
analysis?: any;
|
|
58
|
+
calculatedTotalRows?: number;
|
|
59
|
+
actualGeneratedRowCount?: number;
|
|
60
|
+
createdAt?: number;
|
|
61
|
+
updatedAt?: number;
|
|
62
|
+
};
|
|
63
|
+
export type TransformPromptContext = {
|
|
64
|
+
datasetId: string;
|
|
65
|
+
inputDatasetIds: string[];
|
|
66
|
+
outputSchema: any;
|
|
67
|
+
sandboxConfig: {
|
|
68
|
+
inputPaths: Array<{
|
|
69
|
+
datasetId: string;
|
|
70
|
+
path: string;
|
|
71
|
+
}>;
|
|
72
|
+
outputPath: string;
|
|
73
|
+
};
|
|
74
|
+
inputPreviews?: Array<{
|
|
75
|
+
datasetId: string;
|
|
76
|
+
preview: {
|
|
77
|
+
totalRows: number;
|
|
78
|
+
metadata?: {
|
|
79
|
+
description: string;
|
|
80
|
+
script: string;
|
|
81
|
+
command: string;
|
|
82
|
+
stdout: string;
|
|
83
|
+
stderr: string;
|
|
84
|
+
};
|
|
85
|
+
head?: {
|
|
86
|
+
description: string;
|
|
87
|
+
script: string;
|
|
88
|
+
command: string;
|
|
89
|
+
stdout: string;
|
|
90
|
+
stderr: string;
|
|
91
|
+
};
|
|
92
|
+
};
|
|
93
|
+
}>;
|
|
94
|
+
contextResources?: StoredContextResource[];
|
|
95
|
+
errors: string[];
|
|
96
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { AnyDatasetRuntime } from "../builder/types.js";
|
|
1
2
|
export type TransformDatasetInput = {
|
|
2
3
|
datasets: Array<{
|
|
3
4
|
id: string;
|
|
@@ -14,7 +15,6 @@ export type TransformDatasetResult = {
|
|
|
14
15
|
};
|
|
15
16
|
/**
|
|
16
17
|
* Workflow-compatible dataset transform.
|
|
17
|
-
* Executes the transform
|
|
18
|
+
* Executes the transform context and returns datasetId + preview rows.
|
|
18
19
|
*/
|
|
19
|
-
export declare function transformDataset(input: TransformDatasetInput): Promise<TransformDatasetResult>;
|
|
20
|
-
//# sourceMappingURL=transformDataset.d.ts.map
|
|
20
|
+
export declare function transformDataset(runtime: AnyDatasetRuntime, input: TransformDatasetInput): Promise<TransformDatasetResult>;
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
exports.transformDataset = transformDataset;
|
|
4
|
-
const runtime_1 = require("@ekairos/events/runtime");
|
|
5
|
-
const steps_1 = require("../dataset/steps");
|
|
6
|
-
const transform_dataset_agent_1 = require("./transform-dataset.agent");
|
|
1
|
+
import { datasetPreviewRowsStep } from "../dataset/steps.js";
|
|
2
|
+
import { createTransformDatasetContext } from "./transform-dataset.agent.js";
|
|
7
3
|
function buildInstructions(input) {
|
|
8
|
-
const
|
|
4
|
+
const inputs = input.datasets
|
|
9
5
|
.map((d, idx) => {
|
|
10
6
|
const name = d.description ? ` - ${d.description}` : "";
|
|
11
7
|
return `${idx + 1}. ${d.id}${name}`;
|
|
@@ -16,8 +12,8 @@ function buildInstructions(input) {
|
|
|
16
12
|
"Use pandas when helpful. Output must be JSONL with {type:'row', data:{...}} lines.",
|
|
17
13
|
"Respect the provided output schema exactly.",
|
|
18
14
|
"",
|
|
19
|
-
"##
|
|
20
|
-
|
|
15
|
+
"## Input Datasets",
|
|
16
|
+
inputs || "- (none)",
|
|
21
17
|
"",
|
|
22
18
|
"## Transformation Description (LaTeX + sets)",
|
|
23
19
|
String(input.description ?? "").trim(),
|
|
@@ -25,19 +21,20 @@ function buildInstructions(input) {
|
|
|
25
21
|
}
|
|
26
22
|
/**
|
|
27
23
|
* Workflow-compatible dataset transform.
|
|
28
|
-
* Executes the transform
|
|
24
|
+
* Executes the transform context and returns datasetId + preview rows.
|
|
29
25
|
*/
|
|
30
|
-
async function transformDataset(input) {
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
sourceDatasetIds: input.datasets.map((d) => d.id),
|
|
26
|
+
export async function transformDataset(runtime, input) {
|
|
27
|
+
const transformContext = createTransformDatasetContext({
|
|
28
|
+
inputDatasetIds: input.datasets.map((d) => d.id),
|
|
34
29
|
outputSchema: input.outputSchema,
|
|
35
30
|
instructions: buildInstructions(input),
|
|
36
31
|
datasetId: input.datasetId,
|
|
37
32
|
model: input.model,
|
|
38
33
|
});
|
|
39
|
-
await
|
|
40
|
-
const preview = await
|
|
41
|
-
|
|
34
|
+
await transformContext.transform(runtime);
|
|
35
|
+
const preview = await datasetPreviewRowsStep({
|
|
36
|
+
runtime,
|
|
37
|
+
datasetId: transformContext.datasetId,
|
|
38
|
+
});
|
|
39
|
+
return { datasetId: transformContext.datasetId, previewRows: preview.rows };
|
|
42
40
|
}
|
|
43
|
-
//# sourceMappingURL=transformDataset.js.map
|