@ekairos/dataset 1.22.54-beta.development.0 → 1.22.55-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -16,7 +16,7 @@ export type FileParseContext = {
|
|
|
16
16
|
filePreview?: FilePreviewContext;
|
|
17
17
|
};
|
|
18
18
|
export type FileParseContextParams = {
|
|
19
|
-
fileId
|
|
19
|
+
fileId?: string;
|
|
20
20
|
instructions?: string;
|
|
21
21
|
sandboxId?: string;
|
|
22
22
|
datasetId?: string;
|
|
@@ -71,3 +71,9 @@ export declare function createFileParseContext<Env extends {
|
|
|
71
71
|
}>;
|
|
72
72
|
context: any;
|
|
73
73
|
};
|
|
74
|
+
export declare function registerFileParseContext<Env extends {
|
|
75
|
+
orgId: string;
|
|
76
|
+
}>(opts?: {
|
|
77
|
+
model?: string;
|
|
78
|
+
reactor?: ContextReactor<any, any>;
|
|
79
|
+
}): void;
|
|
@@ -20,6 +20,7 @@ async function awaitContextRun(run) {
|
|
|
20
20
|
await run;
|
|
21
21
|
}
|
|
22
22
|
async function initializeSandbox(runtime, sandboxId, datasetId, fileId, state) {
|
|
23
|
+
"use step";
|
|
23
24
|
if (state.initialized) {
|
|
24
25
|
return state.filePath;
|
|
25
26
|
}
|
|
@@ -76,17 +77,26 @@ async function initializeSandbox(runtime, sandboxId, datasetId, fileId, state) {
|
|
|
76
77
|
* Internamente corre un Context (`createContext("file.parse")`) que itera hasta que se ejecuta el tool `completeDataset`.
|
|
77
78
|
*/
|
|
78
79
|
function createFileParseContextDefinition(params) {
|
|
79
|
-
const
|
|
80
|
+
const fallbackDatasetId = params.datasetId;
|
|
80
81
|
const model = params.model ?? "openai/gpt-5";
|
|
81
82
|
let contextBuilder = createContext("file.parse")
|
|
82
83
|
.context(async (stored, _env, runtime) => {
|
|
83
84
|
const previous = stored?.content ?? {};
|
|
84
85
|
const sandboxState = previous?.sandboxState ?? { initialized: false, filePath: "" };
|
|
86
|
+
const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
|
|
87
|
+
const fileId = previous?.fileId ?? params.fileId ?? "";
|
|
88
|
+
const instructions = previous?.instructions ?? params.instructions ?? "";
|
|
85
89
|
const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
|
|
90
|
+
if (!datasetId) {
|
|
91
|
+
throw new Error("dataset_id_required");
|
|
92
|
+
}
|
|
93
|
+
if (!fileId) {
|
|
94
|
+
throw new Error("dataset_file_id_required");
|
|
95
|
+
}
|
|
86
96
|
if (!sandboxId) {
|
|
87
97
|
throw new Error("dataset_sandbox_required");
|
|
88
98
|
}
|
|
89
|
-
const sandboxFilePath = await initializeSandbox(runtime, sandboxId, datasetId,
|
|
99
|
+
const sandboxFilePath = await initializeSandbox(runtime, sandboxId, datasetId, fileId, sandboxState);
|
|
90
100
|
let filePreview = undefined;
|
|
91
101
|
try {
|
|
92
102
|
filePreview = await generateFilePreview(runtime, sandboxId, sandboxFilePath, datasetId);
|
|
@@ -100,8 +110,8 @@ function createFileParseContextDefinition(params) {
|
|
|
100
110
|
schema = datasetResult.data.schema;
|
|
101
111
|
const ctx = {
|
|
102
112
|
datasetId,
|
|
103
|
-
fileId
|
|
104
|
-
instructions
|
|
113
|
+
fileId,
|
|
114
|
+
instructions,
|
|
105
115
|
sandboxConfig: { filePath: sandboxFilePath },
|
|
106
116
|
analysis: [],
|
|
107
117
|
schema,
|
|
@@ -114,8 +124,8 @@ function createFileParseContextDefinition(params) {
|
|
|
114
124
|
return {
|
|
115
125
|
...previous,
|
|
116
126
|
datasetId,
|
|
117
|
-
fileId
|
|
118
|
-
instructions
|
|
127
|
+
fileId,
|
|
128
|
+
instructions,
|
|
119
129
|
sandboxId,
|
|
120
130
|
sandboxState,
|
|
121
131
|
ctx,
|
|
@@ -138,27 +148,36 @@ function createFileParseContextDefinition(params) {
|
|
|
138
148
|
})
|
|
139
149
|
.actions(async (_stored, _env, runtime) => {
|
|
140
150
|
const existingSchema = _stored?.content?.ctx?.schema?.schema;
|
|
151
|
+
const datasetId = _stored?.content?.datasetId ?? fallbackDatasetId ?? "";
|
|
152
|
+
const fileId = _stored?.content?.fileId ?? params.fileId ?? "";
|
|
153
|
+
const sandboxId = _stored?.content?.sandboxId ?? params.sandboxId ?? "";
|
|
154
|
+
if (!datasetId)
|
|
155
|
+
throw new Error("dataset_id_required");
|
|
156
|
+
if (!fileId)
|
|
157
|
+
throw new Error("dataset_file_id_required");
|
|
158
|
+
if (!sandboxId)
|
|
159
|
+
throw new Error("dataset_sandbox_required");
|
|
141
160
|
const actions = {
|
|
142
161
|
executeCommand: createExecuteCommandTool({
|
|
143
162
|
datasetId,
|
|
144
|
-
sandboxId
|
|
163
|
+
sandboxId,
|
|
145
164
|
runtime,
|
|
146
165
|
}),
|
|
147
166
|
completeDataset: createCompleteDatasetTool({
|
|
148
167
|
datasetId,
|
|
149
|
-
sandboxId
|
|
168
|
+
sandboxId,
|
|
150
169
|
runtime,
|
|
151
170
|
}),
|
|
152
171
|
clearDataset: createClearDatasetTool({
|
|
153
172
|
datasetId,
|
|
154
|
-
sandboxId
|
|
173
|
+
sandboxId,
|
|
155
174
|
runtime,
|
|
156
175
|
}),
|
|
157
176
|
};
|
|
158
177
|
if (!existingSchema) {
|
|
159
178
|
actions.generateSchema = createGenerateSchemaTool({
|
|
160
179
|
datasetId,
|
|
161
|
-
fileId
|
|
180
|
+
fileId,
|
|
162
181
|
runtime,
|
|
163
182
|
});
|
|
164
183
|
}
|
|
@@ -174,7 +193,7 @@ function createFileParseContextDefinition(params) {
|
|
|
174
193
|
contextBuilder = contextBuilder.model(model);
|
|
175
194
|
}
|
|
176
195
|
const context = contextBuilder.build();
|
|
177
|
-
return { datasetId, context };
|
|
196
|
+
return { datasetId: fallbackDatasetId ?? "", context };
|
|
178
197
|
}
|
|
179
198
|
/**
|
|
180
199
|
* Factory (DX-first):
|
|
@@ -187,15 +206,16 @@ function createFileParseContextDefinition(params) {
|
|
|
187
206
|
* - `parse()` is the entrypoint; it calls `context.react(...)` internally.
|
|
188
207
|
*/
|
|
189
208
|
export function createFileParseContext(fileId, opts) {
|
|
209
|
+
const datasetId = opts?.datasetId ?? id();
|
|
190
210
|
const params = {
|
|
191
211
|
fileId,
|
|
192
212
|
instructions: opts?.instructions,
|
|
193
213
|
sandboxId: opts?.sandboxId,
|
|
194
|
-
datasetId
|
|
214
|
+
datasetId,
|
|
195
215
|
model: opts?.model,
|
|
196
216
|
reactor: opts?.reactor,
|
|
197
217
|
};
|
|
198
|
-
const {
|
|
218
|
+
const { context } = createFileParseContextDefinition(params);
|
|
199
219
|
return {
|
|
200
220
|
datasetId,
|
|
201
221
|
async parse(runtime, options = {}) {
|
|
@@ -213,6 +233,13 @@ export function createFileParseContext(fileId, opts) {
|
|
|
213
233
|
context: { key: `dataset:${datasetId}` },
|
|
214
234
|
durable: options.durable ?? false,
|
|
215
235
|
options: { silent: true, preventClose: true, sendFinish: false, maxIterations: 20, maxModelSteps: 5 },
|
|
236
|
+
__initialContent: {
|
|
237
|
+
datasetId,
|
|
238
|
+
fileId,
|
|
239
|
+
instructions: opts?.instructions ?? "",
|
|
240
|
+
sandboxId: opts?.sandboxId ?? "",
|
|
241
|
+
sandboxState: { initialized: false, filePath: "" },
|
|
242
|
+
},
|
|
216
243
|
});
|
|
217
244
|
await awaitContextRun(shell.run);
|
|
218
245
|
return { datasetId };
|
|
@@ -221,3 +248,10 @@ export function createFileParseContext(fileId, opts) {
|
|
|
221
248
|
context,
|
|
222
249
|
};
|
|
223
250
|
}
|
|
251
|
+
export function registerFileParseContext(opts) {
|
|
252
|
+
createFileParseContextDefinition({
|
|
253
|
+
model: opts?.model,
|
|
254
|
+
reactor: opts?.reactor,
|
|
255
|
+
}).context;
|
|
256
|
+
}
|
|
257
|
+
registerFileParseContext();
|
|
@@ -20,8 +20,8 @@ export type TransformDatasetContext = {
|
|
|
20
20
|
instructions?: string;
|
|
21
21
|
};
|
|
22
22
|
export type TransformDatasetAgentParams = {
|
|
23
|
-
sourceDatasetIds
|
|
24
|
-
outputSchema
|
|
23
|
+
sourceDatasetIds?: string[];
|
|
24
|
+
outputSchema?: any;
|
|
25
25
|
instructions?: string;
|
|
26
26
|
datasetId?: string;
|
|
27
27
|
model?: string;
|
|
@@ -62,3 +62,9 @@ export declare function createTransformDatasetContext<Env extends {
|
|
|
62
62
|
}>;
|
|
63
63
|
context: any;
|
|
64
64
|
};
|
|
65
|
+
export declare function registerTransformDatasetContext<Env extends {
|
|
66
|
+
orgId: string;
|
|
67
|
+
}>(opts?: {
|
|
68
|
+
model?: string;
|
|
69
|
+
reactor?: ContextReactor<any, any>;
|
|
70
|
+
}): void;
|
|
@@ -18,6 +18,7 @@ async function awaitContextRun(run) {
|
|
|
18
18
|
await run;
|
|
19
19
|
}
|
|
20
20
|
async function ensureSourcesInSandbox(runtime, sandboxId, datasetId, sourceDatasetIds, state) {
|
|
21
|
+
"use step";
|
|
21
22
|
if (state.initialized) {
|
|
22
23
|
return { sourcePaths: state.sourcePaths, outputPath: getDatasetOutputPath(datasetId) };
|
|
23
24
|
}
|
|
@@ -39,17 +40,34 @@ async function ensureSourcesInSandbox(runtime, sandboxId, datasetId, sourceDatas
|
|
|
39
40
|
return { sourcePaths, outputPath: getDatasetOutputPath(datasetId) };
|
|
40
41
|
}
|
|
41
42
|
function createTransformDatasetContextDefinition(params) {
|
|
42
|
-
const
|
|
43
|
+
const fallbackDatasetId = params.datasetId;
|
|
43
44
|
const model = params.model ?? "openai/gpt-5";
|
|
44
45
|
let contextBuilder = createContext("dataset.transform")
|
|
45
46
|
.context(async (stored, _env, runtime) => {
|
|
46
47
|
const previous = stored?.content ?? {};
|
|
47
48
|
const sandboxState = previous?.sandboxState ?? { initialized: false, sourcePaths: [] };
|
|
49
|
+
const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
|
|
50
|
+
const sourceDatasetIds = Array.isArray(previous?.sourceDatasetIds)
|
|
51
|
+
? previous.sourceDatasetIds
|
|
52
|
+
: Array.isArray(params.sourceDatasetIds)
|
|
53
|
+
? params.sourceDatasetIds
|
|
54
|
+
: [];
|
|
55
|
+
const outputSchema = previous?.outputSchema ?? params.outputSchema;
|
|
56
|
+
const instructions = previous?.instructions ?? params.instructions;
|
|
48
57
|
const sandboxId = previous?.sandboxId ?? params.sandboxId ?? "";
|
|
58
|
+
if (!datasetId) {
|
|
59
|
+
throw new Error("dataset_id_required");
|
|
60
|
+
}
|
|
61
|
+
if (sourceDatasetIds.length === 0) {
|
|
62
|
+
throw new Error("dataset_transform_sources_required");
|
|
63
|
+
}
|
|
64
|
+
if (!outputSchema) {
|
|
65
|
+
throw new Error("dataset_transform_schema_required");
|
|
66
|
+
}
|
|
49
67
|
if (!sandboxId) {
|
|
50
68
|
throw new Error("dataset_sandbox_required");
|
|
51
69
|
}
|
|
52
|
-
const { sourcePaths, outputPath } = await ensureSourcesInSandbox(runtime, sandboxId, datasetId,
|
|
70
|
+
const { sourcePaths, outputPath } = await ensureSourcesInSandbox(runtime, sandboxId, datasetId, sourceDatasetIds, sandboxState);
|
|
53
71
|
const sourcePreviews = [];
|
|
54
72
|
for (const sp of sourcePaths) {
|
|
55
73
|
try {
|
|
@@ -64,19 +82,19 @@ function createTransformDatasetContextDefinition(params) {
|
|
|
64
82
|
await datasetUpdateSchemaStep({
|
|
65
83
|
runtime,
|
|
66
84
|
datasetId,
|
|
67
|
-
schema:
|
|
85
|
+
schema: outputSchema,
|
|
68
86
|
status: "schema_complete",
|
|
69
87
|
});
|
|
70
88
|
const promptContext = {
|
|
71
89
|
datasetId,
|
|
72
|
-
sourceDatasetIds
|
|
73
|
-
outputSchema
|
|
90
|
+
sourceDatasetIds,
|
|
91
|
+
outputSchema,
|
|
74
92
|
sandboxConfig: { sourcePaths, outputPath },
|
|
75
93
|
sourcePreviews: sourcePreviews.length > 0 ? sourcePreviews : undefined,
|
|
76
94
|
errors: [],
|
|
77
95
|
};
|
|
78
96
|
const basePrompt = buildTransformDatasetPrompt(promptContext);
|
|
79
|
-
const userInstructions = String(
|
|
97
|
+
const userInstructions = String(instructions ?? "").trim();
|
|
80
98
|
const system = userInstructions
|
|
81
99
|
? [
|
|
82
100
|
"## USER INSTRUCTIONS",
|
|
@@ -90,6 +108,9 @@ function createTransformDatasetContextDefinition(params) {
|
|
|
90
108
|
return {
|
|
91
109
|
...previous,
|
|
92
110
|
datasetId,
|
|
111
|
+
sourceDatasetIds,
|
|
112
|
+
outputSchema,
|
|
113
|
+
instructions,
|
|
93
114
|
sandboxId,
|
|
94
115
|
sandboxState,
|
|
95
116
|
system,
|
|
@@ -100,7 +121,12 @@ function createTransformDatasetContextDefinition(params) {
|
|
|
100
121
|
return String(stored?.content?.system ?? "");
|
|
101
122
|
})
|
|
102
123
|
.actions(async (stored, _env, runtime) => {
|
|
124
|
+
const datasetId = stored?.content?.datasetId ?? fallbackDatasetId ?? "";
|
|
103
125
|
const sandboxId = stored?.content?.sandboxId ?? params.sandboxId ?? "";
|
|
126
|
+
if (!datasetId)
|
|
127
|
+
throw new Error("dataset_id_required");
|
|
128
|
+
if (!sandboxId)
|
|
129
|
+
throw new Error("dataset_sandbox_required");
|
|
104
130
|
return {
|
|
105
131
|
executeCommand: createExecuteCommandTool({
|
|
106
132
|
datasetId,
|
|
@@ -129,14 +155,15 @@ function createTransformDatasetContextDefinition(params) {
|
|
|
129
155
|
contextBuilder = contextBuilder.model(model);
|
|
130
156
|
}
|
|
131
157
|
const context = contextBuilder.build();
|
|
132
|
-
return { datasetId, context };
|
|
158
|
+
return { datasetId: fallbackDatasetId ?? "", context };
|
|
133
159
|
}
|
|
134
160
|
export function createTransformDatasetContext(params) {
|
|
135
|
-
const
|
|
161
|
+
const datasetId = params.datasetId ?? id();
|
|
162
|
+
const { context } = createTransformDatasetContextDefinition({
|
|
136
163
|
sourceDatasetIds: params.sourceDatasetIds,
|
|
137
164
|
outputSchema: params.outputSchema,
|
|
138
165
|
instructions: params.instructions,
|
|
139
|
-
datasetId
|
|
166
|
+
datasetId,
|
|
140
167
|
model: params.model,
|
|
141
168
|
sandboxId: params.sandboxId,
|
|
142
169
|
reactor: params.reactor,
|
|
@@ -167,6 +194,14 @@ export function createTransformDatasetContext(params) {
|
|
|
167
194
|
context: { key: `dataset:${datasetId}` },
|
|
168
195
|
durable: options.durable ?? false,
|
|
169
196
|
options: { silent: true, preventClose: true, sendFinish: false, maxIterations: 20, maxModelSteps: 5 },
|
|
197
|
+
__initialContent: {
|
|
198
|
+
datasetId,
|
|
199
|
+
sourceDatasetIds: params.sourceDatasetIds,
|
|
200
|
+
outputSchema: params.outputSchema,
|
|
201
|
+
instructions: params.instructions,
|
|
202
|
+
sandboxId: params.sandboxId ?? "",
|
|
203
|
+
sandboxState: { initialized: false, sourcePaths: [] },
|
|
204
|
+
},
|
|
170
205
|
});
|
|
171
206
|
await awaitContextRun(shell.run);
|
|
172
207
|
return { datasetId };
|
|
@@ -174,3 +209,10 @@ export function createTransformDatasetContext(params) {
|
|
|
174
209
|
context,
|
|
175
210
|
};
|
|
176
211
|
}
|
|
212
|
+
export function registerTransformDatasetContext(opts) {
|
|
213
|
+
createTransformDatasetContextDefinition({
|
|
214
|
+
model: opts?.model,
|
|
215
|
+
reactor: opts?.reactor,
|
|
216
|
+
}).context;
|
|
217
|
+
}
|
|
218
|
+
registerTransformDatasetContext();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.55-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.55-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.55-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.55-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|