@ekairos/dataset 1.22.51-beta.development.0 → 1.22.52-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,22 +6,40 @@ import { buildFileDefaultInstructions, buildRawSourceInstructions, buildTransfor
6
6
  import { createOrUpdateDatasetMetadata, uploadInlineTextSource, } from "./persistence.js";
7
7
  import { getDomainDescriptor } from "./sourceRows.js";
8
8
  import { materializeQuerySource } from "./materializeQuery.js";
9
+ import { createDatasetSandboxStep } from "../sandbox/steps.js";
9
10
  function makeIntermediateDatasetId(targetDatasetId, sourceKind, index) {
10
11
  return `${targetDatasetId}__${sourceKind}_${index}`;
11
12
  }
13
+ async function resolveDatasetSandboxId(state, targetDatasetId) {
14
+ const sandboxId = String(state.sandboxId ?? "").trim();
15
+ if (sandboxId)
16
+ return sandboxId;
17
+ const created = await createDatasetSandboxStep({
18
+ runtime: state.runtime,
19
+ provider: "vercel",
20
+ sandboxRuntime: "python3.13",
21
+ timeoutMs: 20 * 60 * 1000,
22
+ resources: { vcpus: 2 },
23
+ purpose: "dataset.materialize",
24
+ params: { datasetId: targetDatasetId },
25
+ vercel: {
26
+ profile: "ephemeral",
27
+ deleteOnStop: true,
28
+ },
29
+ });
30
+ return created.sandboxId;
31
+ }
12
32
  export async function materializeSingleFileLikeSource(state, source, targetDatasetId) {
13
33
  if (!state.reactor) {
14
34
  throw new Error("dataset_reactor_required");
15
35
  }
16
- if (!state.sandboxId) {
17
- throw new Error("dataset_sandbox_required");
18
- }
36
+ const sandboxId = await resolveDatasetSandboxId(state, targetDatasetId);
19
37
  const fileId = source.kind === "file"
20
38
  ? source.fileId
21
39
  : await uploadInlineTextSource(state.runtime, targetDatasetId, source);
22
40
  await createOrUpdateDatasetMetadata(state.runtime, {
23
41
  datasetId: targetDatasetId,
24
- sandboxId: state.sandboxId,
42
+ sandboxId,
25
43
  title: state.title ?? targetDatasetId,
26
44
  instructions: state.instructions,
27
45
  sources: [
@@ -42,7 +60,7 @@ export async function materializeSingleFileLikeSource(state, source, targetDatas
42
60
  datasetId: targetDatasetId,
43
61
  instructions: state.instructions ?? buildFileDefaultInstructions(state.outputSchema),
44
62
  reactor: state.reactor,
45
- sandboxId: state.sandboxId,
63
+ sandboxId,
46
64
  });
47
65
  await parseContext.parse(state.runtime, { durable: state.durable });
48
66
  if (!state.outputSchema) {
@@ -84,14 +102,13 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
84
102
  if (!state.reactor) {
85
103
  throw new Error("dataset_reactor_required");
86
104
  }
87
- if (!state.sandboxId) {
88
- throw new Error("dataset_sandbox_required");
89
- }
105
+ const sandboxId = await resolveDatasetSandboxId(state, targetDatasetId);
106
+ const stateWithSandbox = { ...state, sandboxId };
90
107
  const normalizedSources = [];
91
- for (let index = 0; index < state.sources.length; index++) {
92
- normalizedSources.push(await normalizeSourceToDatasetId(state, state.sources[index], targetDatasetId, index));
108
+ for (let index = 0; index < stateWithSandbox.sources.length; index++) {
109
+ normalizedSources.push(await normalizeSourceToDatasetId(stateWithSandbox, stateWithSandbox.sources[index], targetDatasetId, index));
93
110
  }
94
- const transformSchema = state.outputSchema ??
111
+ const transformSchema = stateWithSandbox.outputSchema ??
95
112
  {
96
113
  title: "DatasetRow",
97
114
  description: "One dataset row",
@@ -101,12 +118,12 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
101
118
  properties: {},
102
119
  },
103
120
  };
104
- await createOrUpdateDatasetMetadata(state.runtime, {
121
+ await createOrUpdateDatasetMetadata(stateWithSandbox.runtime, {
105
122
  datasetId: targetDatasetId,
106
- sandboxId: state.sandboxId,
107
- title: state.title ?? targetDatasetId,
108
- instructions: state.instructions,
109
- sources: state.sources.map((source) => source.kind === "query"
123
+ sandboxId,
124
+ title: stateWithSandbox.title ?? targetDatasetId,
125
+ instructions: stateWithSandbox.instructions,
126
+ sources: stateWithSandbox.sources.map((source) => source.kind === "query"
110
127
  ? {
111
128
  kind: "query",
112
129
  query: source.query,
@@ -115,29 +132,29 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
115
132
  ...getDomainDescriptor(source.domain),
116
133
  }
117
134
  : source),
118
- sourceKinds: state.sources.map((source) => source.kind),
135
+ sourceKinds: stateWithSandbox.sources.map((source) => source.kind),
119
136
  schema: transformSchema,
120
137
  status: "building",
121
138
  });
122
139
  const transformContext = createTransformDatasetContext({
123
140
  sourceDatasetIds: normalizedSources,
124
141
  outputSchema: transformSchema,
125
- instructions: buildTransformInstructions(normalizedSources.length, state.instructions, state.outputSchema),
142
+ instructions: buildTransformInstructions(normalizedSources.length, stateWithSandbox.instructions, stateWithSandbox.outputSchema),
126
143
  datasetId: targetDatasetId,
127
- reactor: state.reactor,
128
- sandboxId: state.sandboxId,
144
+ reactor: stateWithSandbox.reactor,
145
+ sandboxId,
129
146
  });
130
- await transformContext.transform(state.runtime, { durable: state.durable });
131
- if (!state.outputSchema) {
147
+ await transformContext.transform(stateWithSandbox.runtime, { durable: stateWithSandbox.durable });
148
+ if (!stateWithSandbox.outputSchema) {
132
149
  await datasetInferAndUpdateSchemaStep({
133
- runtime: state.runtime,
150
+ runtime: stateWithSandbox.runtime,
134
151
  datasetId: targetDatasetId,
135
152
  title: `${targetDatasetId}Row`,
136
153
  description: "One dataset row",
137
154
  });
138
155
  }
139
- if (state.first) {
140
- await datasetReadOneStep({ runtime: state.runtime, datasetId: targetDatasetId });
156
+ if (stateWithSandbox.first) {
157
+ await datasetReadOneStep({ runtime: stateWithSandbox.runtime, datasetId: targetDatasetId });
141
158
  }
142
159
  return targetDatasetId;
143
160
  }
package/dist/dataset.js CHANGED
@@ -128,18 +128,12 @@ export function dataset(runtime, options = {}) {
128
128
  return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
129
129
  }
130
130
  if (isSingleSource && (onlySource.kind === "file" || onlySource.kind === "text")) {
131
- if (!effectiveState.sandboxId) {
132
- throw new Error("dataset_sandbox_required");
133
- }
134
131
  if (!effectiveState.reactor) {
135
132
  throw new Error("dataset_reactor_required");
136
133
  }
137
134
  await getDatasetAgentMaterializers().materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
138
135
  return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
139
136
  }
140
- if (!effectiveState.sandboxId) {
141
- throw new Error("dataset_sandbox_required");
142
- }
143
137
  if (!effectiveState.reactor) {
144
138
  throw new Error("dataset_reactor_required");
145
139
  }
@@ -1,13 +1,7 @@
1
+ import { type SandboxConfig } from "@ekairos/sandbox";
1
2
  export type DatasetSandboxId = string;
2
- export type CreateDatasetSandboxParams = {
3
+ export type CreateDatasetSandboxParams = Pick<SandboxConfig, "provider" | "timeoutMs" | "ports" | "resources" | "purpose" | "params" | "env" | "domain" | "dataset" | "vercel"> & {
3
4
  sandboxRuntime?: string;
4
- timeoutMs?: number;
5
- ports?: number[];
6
- resources?: {
7
- vcpus?: number;
8
- };
9
- purpose?: string;
10
- params?: Record<string, any>;
11
5
  };
12
6
  export type DatasetSandboxRunCommandResult = {
13
7
  exitCode: number;
@@ -72,7 +72,10 @@ export async function createDatasetSandboxStep(params) {
72
72
  }
73
73
  const db = await getRuntimeDb(params.runtime);
74
74
  const service = new SandboxService(db);
75
- const sandboxParams = { ...params, runtime: params.sandboxRuntime };
75
+ const sandboxParams = {
76
+ ...params,
77
+ ...(params.sandboxRuntime ? { runtime: params.sandboxRuntime } : {}),
78
+ };
76
79
  delete sandboxParams.sandboxRuntime;
77
80
  const created = await service.createSandbox(sandboxParams);
78
81
  if (!created.ok)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.51-beta.development.0",
3
+ "version": "1.22.52-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.51-beta.development.0",
69
- "@ekairos/events": "^1.22.51-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.51-beta.development.0",
68
+ "@ekairos/domain": "^1.22.52-beta.development.0",
69
+ "@ekairos/events": "^1.22.52-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.52-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",