@ekairos/dataset 1.22.51-beta.development.0 → 1.22.53-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/materialize.js +42 -25
- package/dist/dataset.js +0 -6
- package/dist/sandbox/steps.d.ts +2 -8
- package/dist/sandbox/steps.js +4 -1
- package/package.json +4 -4
|
@@ -6,22 +6,40 @@ import { buildFileDefaultInstructions, buildRawSourceInstructions, buildTransfor
|
|
|
6
6
|
import { createOrUpdateDatasetMetadata, uploadInlineTextSource, } from "./persistence.js";
|
|
7
7
|
import { getDomainDescriptor } from "./sourceRows.js";
|
|
8
8
|
import { materializeQuerySource } from "./materializeQuery.js";
|
|
9
|
+
import { createDatasetSandboxStep } from "../sandbox/steps.js";
|
|
9
10
|
function makeIntermediateDatasetId(targetDatasetId, sourceKind, index) {
|
|
10
11
|
return `${targetDatasetId}__${sourceKind}_${index}`;
|
|
11
12
|
}
|
|
13
|
+
async function resolveDatasetSandboxId(state, targetDatasetId) {
|
|
14
|
+
const sandboxId = String(state.sandboxId ?? "").trim();
|
|
15
|
+
if (sandboxId)
|
|
16
|
+
return sandboxId;
|
|
17
|
+
const created = await createDatasetSandboxStep({
|
|
18
|
+
runtime: state.runtime,
|
|
19
|
+
provider: "vercel",
|
|
20
|
+
sandboxRuntime: "python3.13",
|
|
21
|
+
timeoutMs: 20 * 60 * 1000,
|
|
22
|
+
resources: { vcpus: 2 },
|
|
23
|
+
purpose: "dataset.materialize",
|
|
24
|
+
params: { datasetId: targetDatasetId },
|
|
25
|
+
vercel: {
|
|
26
|
+
profile: "ephemeral",
|
|
27
|
+
deleteOnStop: true,
|
|
28
|
+
},
|
|
29
|
+
});
|
|
30
|
+
return created.sandboxId;
|
|
31
|
+
}
|
|
12
32
|
export async function materializeSingleFileLikeSource(state, source, targetDatasetId) {
|
|
13
33
|
if (!state.reactor) {
|
|
14
34
|
throw new Error("dataset_reactor_required");
|
|
15
35
|
}
|
|
16
|
-
|
|
17
|
-
throw new Error("dataset_sandbox_required");
|
|
18
|
-
}
|
|
36
|
+
const sandboxId = await resolveDatasetSandboxId(state, targetDatasetId);
|
|
19
37
|
const fileId = source.kind === "file"
|
|
20
38
|
? source.fileId
|
|
21
39
|
: await uploadInlineTextSource(state.runtime, targetDatasetId, source);
|
|
22
40
|
await createOrUpdateDatasetMetadata(state.runtime, {
|
|
23
41
|
datasetId: targetDatasetId,
|
|
24
|
-
sandboxId
|
|
42
|
+
sandboxId,
|
|
25
43
|
title: state.title ?? targetDatasetId,
|
|
26
44
|
instructions: state.instructions,
|
|
27
45
|
sources: [
|
|
@@ -42,7 +60,7 @@ export async function materializeSingleFileLikeSource(state, source, targetDatas
|
|
|
42
60
|
datasetId: targetDatasetId,
|
|
43
61
|
instructions: state.instructions ?? buildFileDefaultInstructions(state.outputSchema),
|
|
44
62
|
reactor: state.reactor,
|
|
45
|
-
sandboxId
|
|
63
|
+
sandboxId,
|
|
46
64
|
});
|
|
47
65
|
await parseContext.parse(state.runtime, { durable: state.durable });
|
|
48
66
|
if (!state.outputSchema) {
|
|
@@ -84,14 +102,13 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
|
|
|
84
102
|
if (!state.reactor) {
|
|
85
103
|
throw new Error("dataset_reactor_required");
|
|
86
104
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
}
|
|
105
|
+
const sandboxId = await resolveDatasetSandboxId(state, targetDatasetId);
|
|
106
|
+
const stateWithSandbox = { ...state, sandboxId };
|
|
90
107
|
const normalizedSources = [];
|
|
91
|
-
for (let index = 0; index <
|
|
92
|
-
normalizedSources.push(await normalizeSourceToDatasetId(
|
|
108
|
+
for (let index = 0; index < stateWithSandbox.sources.length; index++) {
|
|
109
|
+
normalizedSources.push(await normalizeSourceToDatasetId(stateWithSandbox, stateWithSandbox.sources[index], targetDatasetId, index));
|
|
93
110
|
}
|
|
94
|
-
const transformSchema =
|
|
111
|
+
const transformSchema = stateWithSandbox.outputSchema ??
|
|
95
112
|
{
|
|
96
113
|
title: "DatasetRow",
|
|
97
114
|
description: "One dataset row",
|
|
@@ -101,12 +118,12 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
|
|
|
101
118
|
properties: {},
|
|
102
119
|
},
|
|
103
120
|
};
|
|
104
|
-
await createOrUpdateDatasetMetadata(
|
|
121
|
+
await createOrUpdateDatasetMetadata(stateWithSandbox.runtime, {
|
|
105
122
|
datasetId: targetDatasetId,
|
|
106
|
-
sandboxId
|
|
107
|
-
title:
|
|
108
|
-
instructions:
|
|
109
|
-
sources:
|
|
123
|
+
sandboxId,
|
|
124
|
+
title: stateWithSandbox.title ?? targetDatasetId,
|
|
125
|
+
instructions: stateWithSandbox.instructions,
|
|
126
|
+
sources: stateWithSandbox.sources.map((source) => source.kind === "query"
|
|
110
127
|
? {
|
|
111
128
|
kind: "query",
|
|
112
129
|
query: source.query,
|
|
@@ -115,29 +132,29 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
|
|
|
115
132
|
...getDomainDescriptor(source.domain),
|
|
116
133
|
}
|
|
117
134
|
: source),
|
|
118
|
-
sourceKinds:
|
|
135
|
+
sourceKinds: stateWithSandbox.sources.map((source) => source.kind),
|
|
119
136
|
schema: transformSchema,
|
|
120
137
|
status: "building",
|
|
121
138
|
});
|
|
122
139
|
const transformContext = createTransformDatasetContext({
|
|
123
140
|
sourceDatasetIds: normalizedSources,
|
|
124
141
|
outputSchema: transformSchema,
|
|
125
|
-
instructions: buildTransformInstructions(normalizedSources.length,
|
|
142
|
+
instructions: buildTransformInstructions(normalizedSources.length, stateWithSandbox.instructions, stateWithSandbox.outputSchema),
|
|
126
143
|
datasetId: targetDatasetId,
|
|
127
|
-
reactor:
|
|
128
|
-
sandboxId
|
|
144
|
+
reactor: stateWithSandbox.reactor,
|
|
145
|
+
sandboxId,
|
|
129
146
|
});
|
|
130
|
-
await transformContext.transform(
|
|
131
|
-
if (!
|
|
147
|
+
await transformContext.transform(stateWithSandbox.runtime, { durable: stateWithSandbox.durable });
|
|
148
|
+
if (!stateWithSandbox.outputSchema) {
|
|
132
149
|
await datasetInferAndUpdateSchemaStep({
|
|
133
|
-
runtime:
|
|
150
|
+
runtime: stateWithSandbox.runtime,
|
|
134
151
|
datasetId: targetDatasetId,
|
|
135
152
|
title: `${targetDatasetId}Row`,
|
|
136
153
|
description: "One dataset row",
|
|
137
154
|
});
|
|
138
155
|
}
|
|
139
|
-
if (
|
|
140
|
-
await datasetReadOneStep({ runtime:
|
|
156
|
+
if (stateWithSandbox.first) {
|
|
157
|
+
await datasetReadOneStep({ runtime: stateWithSandbox.runtime, datasetId: targetDatasetId });
|
|
141
158
|
}
|
|
142
159
|
return targetDatasetId;
|
|
143
160
|
}
|
package/dist/dataset.js
CHANGED
|
@@ -128,18 +128,12 @@ export function dataset(runtime, options = {}) {
|
|
|
128
128
|
return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
|
|
129
129
|
}
|
|
130
130
|
if (isSingleSource && (onlySource.kind === "file" || onlySource.kind === "text")) {
|
|
131
|
-
if (!effectiveState.sandboxId) {
|
|
132
|
-
throw new Error("dataset_sandbox_required");
|
|
133
|
-
}
|
|
134
131
|
if (!effectiveState.reactor) {
|
|
135
132
|
throw new Error("dataset_reactor_required");
|
|
136
133
|
}
|
|
137
134
|
await getDatasetAgentMaterializers().materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
|
|
138
135
|
return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
|
|
139
136
|
}
|
|
140
|
-
if (!effectiveState.sandboxId) {
|
|
141
|
-
throw new Error("dataset_sandbox_required");
|
|
142
|
-
}
|
|
143
137
|
if (!effectiveState.reactor) {
|
|
144
138
|
throw new Error("dataset_reactor_required");
|
|
145
139
|
}
|
package/dist/sandbox/steps.d.ts
CHANGED
|
@@ -1,13 +1,7 @@
|
|
|
1
|
+
import { type SandboxConfig } from "@ekairos/sandbox";
|
|
1
2
|
export type DatasetSandboxId = string;
|
|
2
|
-
export type CreateDatasetSandboxParams = {
|
|
3
|
+
export type CreateDatasetSandboxParams = Pick<SandboxConfig, "provider" | "timeoutMs" | "ports" | "resources" | "purpose" | "params" | "env" | "domain" | "dataset" | "vercel"> & {
|
|
3
4
|
sandboxRuntime?: string;
|
|
4
|
-
timeoutMs?: number;
|
|
5
|
-
ports?: number[];
|
|
6
|
-
resources?: {
|
|
7
|
-
vcpus?: number;
|
|
8
|
-
};
|
|
9
|
-
purpose?: string;
|
|
10
|
-
params?: Record<string, any>;
|
|
11
5
|
};
|
|
12
6
|
export type DatasetSandboxRunCommandResult = {
|
|
13
7
|
exitCode: number;
|
package/dist/sandbox/steps.js
CHANGED
|
@@ -72,7 +72,10 @@ export async function createDatasetSandboxStep(params) {
|
|
|
72
72
|
}
|
|
73
73
|
const db = await getRuntimeDb(params.runtime);
|
|
74
74
|
const service = new SandboxService(db);
|
|
75
|
-
const sandboxParams = {
|
|
75
|
+
const sandboxParams = {
|
|
76
|
+
...params,
|
|
77
|
+
...(params.sandboxRuntime ? { runtime: params.sandboxRuntime } : {}),
|
|
78
|
+
};
|
|
76
79
|
delete sandboxParams.sandboxRuntime;
|
|
77
80
|
const created = await service.createSandbox(sandboxParams);
|
|
78
81
|
if (!created.ok)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.53-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.53-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.53-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.53-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|