@ekairos/dataset 1.22.45-beta.development.0 → 1.22.47-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/instructions.d.ts +1 -2
- package/dist/builder/instructions.js +4 -11
- package/dist/builder/materialize.d.ts +1 -2
- package/dist/builder/materialize.js +30 -36
- package/dist/builder/persistence.d.ts +1 -2
- package/dist/builder/persistence.js +17 -26
- package/dist/builder/schemaInference.d.ts +1 -2
- package/dist/builder/schemaInference.js +4 -12
- package/dist/builder/sourceRows.d.ts +0 -1
- package/dist/builder/sourceRows.js +3 -9
- package/dist/builder/types.d.ts +1 -2
- package/dist/builder/types.js +1 -3
- package/dist/clearDataset.tool.d.ts +0 -1
- package/dist/clearDataset.tool.js +12 -16
- package/dist/completeDataset.tool.d.ts +0 -1
- package/dist/completeDataset.tool.js +18 -25
- package/dist/dataset/steps.d.ts +5 -6
- package/dist/dataset/steps.js +25 -36
- package/dist/dataset.d.ts +2 -3
- package/dist/dataset.js +13 -17
- package/dist/datasetFiles.d.ts +0 -1
- package/dist/datasetFiles.js +5 -12
- package/dist/domain.d.ts +1 -2
- package/dist/domain.js +1 -6
- package/dist/eventsReactRuntime.d.ts +0 -1
- package/dist/eventsReactRuntime.js +9 -13
- package/dist/executeCommand.tool.d.ts +0 -1
- package/dist/executeCommand.tool.js +12 -16
- package/dist/file/file-dataset.agent.d.ts +1 -2
- package/dist/file/file-dataset.agent.js +34 -38
- package/dist/file/filepreview.d.ts +0 -1
- package/dist/file/filepreview.js +12 -16
- package/dist/file/generateSchema.tool.d.ts +0 -1
- package/dist/file/generateSchema.tool.js +10 -14
- package/dist/file/index.d.ts +1 -2
- package/dist/file/index.js +1 -18
- package/dist/file/prompts.d.ts +1 -2
- package/dist/file/prompts.js +13 -17
- package/dist/file/steps.d.ts +0 -1
- package/dist/file/steps.js +3 -7
- package/dist/index.d.ts +6 -7
- package/dist/index.js +6 -23
- package/dist/materializeDataset.tool.d.ts +1 -2
- package/dist/materializeDataset.tool.js +40 -45
- package/dist/query/index.d.ts +1 -2
- package/dist/query/index.js +1 -18
- package/dist/query/queryDomain.d.ts +1 -2
- package/dist/query/queryDomain.js +2 -39
- package/dist/query/queryDomain.step.d.ts +0 -1
- package/dist/query/queryDomain.step.js +8 -12
- package/dist/sandbox/steps.d.ts +0 -1
- package/dist/sandbox/steps.js +30 -74
- package/dist/sandbox.steps.d.ts +1 -2
- package/dist/sandbox.steps.js +1 -18
- package/dist/schema.d.ts +0 -1
- package/dist/schema.js +22 -26
- package/dist/service.d.ts +1 -2
- package/dist/service.js +5 -10
- package/dist/skill.d.ts +0 -1
- package/dist/skill.js +12 -17
- package/dist/transform/filepreview.d.ts +0 -1
- package/dist/transform/filepreview.js +6 -10
- package/dist/transform/index.d.ts +2 -3
- package/dist/transform/index.js +2 -8
- package/dist/transform/prompts.d.ts +0 -1
- package/dist/transform/prompts.js +12 -16
- package/dist/transform/transform-dataset.agent.d.ts +1 -2
- package/dist/transform/transform-dataset.agent.js +31 -35
- package/dist/transform/transformDataset.d.ts +0 -1
- package/dist/transform/transformDataset.js +7 -11
- package/package.json +12 -7
- package/dist/builder/instructions.d.ts.map +0 -1
- package/dist/builder/instructions.js.map +0 -1
- package/dist/builder/materialize.d.ts.map +0 -1
- package/dist/builder/materialize.js.map +0 -1
- package/dist/builder/persistence.d.ts.map +0 -1
- package/dist/builder/persistence.js.map +0 -1
- package/dist/builder/schemaInference.d.ts.map +0 -1
- package/dist/builder/schemaInference.js.map +0 -1
- package/dist/builder/sourceRows.d.ts.map +0 -1
- package/dist/builder/sourceRows.js.map +0 -1
- package/dist/builder/types.d.ts.map +0 -1
- package/dist/builder/types.js.map +0 -1
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeDataset.tool.d.ts.map +0 -1
- package/dist/completeDataset.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/dataset.d.ts.map +0 -1
- package/dist/dataset.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/eventsReactRuntime.d.ts.map +0 -1
- package/dist/eventsReactRuntime.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/file-dataset.agent.d.ts.map +0 -1
- package/dist/file/file-dataset.agent.js.map +0 -1
- package/dist/file/filepreview.d.ts.map +0 -1
- package/dist/file/filepreview.js.map +0 -1
- package/dist/file/generateSchema.tool.d.ts.map +0 -1
- package/dist/file/generateSchema.tool.js.map +0 -1
- package/dist/file/index.d.ts.map +0 -1
- package/dist/file/index.js.map +0 -1
- package/dist/file/prompts.d.ts.map +0 -1
- package/dist/file/prompts.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/materializeDataset.tool.d.ts.map +0 -1
- package/dist/materializeDataset.tool.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/queryDomain.d.ts.map +0 -1
- package/dist/query/queryDomain.js.map +0 -1
- package/dist/query/queryDomain.step.d.ts.map +0 -1
- package/dist/query/queryDomain.step.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/sandbox.steps.d.ts.map +0 -1
- package/dist/sandbox.steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/skill.d.ts.map +0 -1
- package/dist/skill.js.map +0 -1
- package/dist/transform/filepreview.d.ts.map +0 -1
- package/dist/transform/filepreview.js.map +0 -1
- package/dist/transform/index.d.ts.map +0 -1
- package/dist/transform/index.js.map +0 -1
- package/dist/transform/prompts.d.ts.map +0 -1
- package/dist/transform/prompts.js.map +0 -1
- package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
- package/dist/transform/transform-dataset.agent.js.map +0 -1
- package/dist/transform/transformDataset.d.ts.map +0 -1
- package/dist/transform/transformDataset.js.map +0 -1
package/dist/dataset/steps.js
CHANGED
|
@@ -1,32 +1,22 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
exports.getDatasetServiceDb = getDatasetServiceDb;
|
|
4
|
-
exports.datasetGetByIdStep = datasetGetByIdStep;
|
|
5
|
-
exports.datasetReadOutputJsonlStep = datasetReadOutputJsonlStep;
|
|
6
|
-
exports.datasetUpdateSchemaStep = datasetUpdateSchemaStep;
|
|
7
|
-
exports.datasetUploadOutputFileStep = datasetUploadOutputFileStep;
|
|
8
|
-
exports.datasetUpdateStatusStep = datasetUpdateStatusStep;
|
|
9
|
-
exports.datasetClearStep = datasetClearStep;
|
|
10
|
-
exports.datasetPreviewRowsStep = datasetPreviewRowsStep;
|
|
11
|
-
const runtime_1 = require("@ekairos/events/runtime");
|
|
12
|
-
const service_1 = require("../service");
|
|
1
|
+
import { getContextRuntime, getContextEnv } from "@ekairos/events/runtime";
|
|
2
|
+
import { DatasetService } from "../service.js";
|
|
13
3
|
async function resolveEnv(env) {
|
|
14
|
-
return env ?? (await
|
|
4
|
+
return env ?? (await getContextEnv());
|
|
15
5
|
}
|
|
16
|
-
async function getDatasetServiceDb(env) {
|
|
6
|
+
export async function getDatasetServiceDb(env) {
|
|
17
7
|
"use step";
|
|
18
|
-
const runtime = (await
|
|
8
|
+
const runtime = (await getContextRuntime(await resolveEnv(env)));
|
|
19
9
|
return runtime.db;
|
|
20
10
|
}
|
|
21
|
-
async function datasetGetByIdStep(params) {
|
|
11
|
+
export async function datasetGetByIdStep(params) {
|
|
22
12
|
"use step";
|
|
23
|
-
const db = (await
|
|
24
|
-
const service = new
|
|
13
|
+
const db = (await getContextRuntime(await resolveEnv(params.env))).db;
|
|
14
|
+
const service = new DatasetService(db);
|
|
25
15
|
return await service.getDatasetById(params.datasetId);
|
|
26
16
|
}
|
|
27
|
-
async function datasetReadOutputJsonlStep(params) {
|
|
17
|
+
export async function datasetReadOutputJsonlStep(params) {
|
|
28
18
|
"use step";
|
|
29
|
-
const db = (await
|
|
19
|
+
const db = (await getContextRuntime(await resolveEnv(params.env))).db;
|
|
30
20
|
for (let attempt = 1; attempt <= 20; attempt++) {
|
|
31
21
|
const query = await db.query({
|
|
32
22
|
dataset_datasets: {
|
|
@@ -45,29 +35,29 @@ async function datasetReadOutputJsonlStep(params) {
|
|
|
45
35
|
}
|
|
46
36
|
throw new Error("Dataset output file not found");
|
|
47
37
|
}
|
|
48
|
-
async function datasetUpdateSchemaStep(params) {
|
|
38
|
+
export async function datasetUpdateSchemaStep(params) {
|
|
49
39
|
"use step";
|
|
50
|
-
const db = (await
|
|
51
|
-
const service = new
|
|
40
|
+
const db = (await getContextRuntime(await resolveEnv(params.env))).db;
|
|
41
|
+
const service = new DatasetService(db);
|
|
52
42
|
return await service.updateDatasetSchema({
|
|
53
43
|
datasetId: params.datasetId,
|
|
54
44
|
schema: params.schema,
|
|
55
45
|
status: params.status,
|
|
56
46
|
});
|
|
57
47
|
}
|
|
58
|
-
async function datasetUploadOutputFileStep(params) {
|
|
48
|
+
export async function datasetUploadOutputFileStep(params) {
|
|
59
49
|
"use step";
|
|
60
|
-
const db = (await
|
|
61
|
-
const service = new
|
|
50
|
+
const db = (await getContextRuntime(await resolveEnv(params.env))).db;
|
|
51
|
+
const service = new DatasetService(db);
|
|
62
52
|
return await service.uploadDatasetOutputFile({
|
|
63
53
|
datasetId: params.datasetId,
|
|
64
54
|
fileBuffer: params.fileBuffer,
|
|
65
55
|
});
|
|
66
56
|
}
|
|
67
|
-
async function datasetUpdateStatusStep(params) {
|
|
57
|
+
export async function datasetUpdateStatusStep(params) {
|
|
68
58
|
"use step";
|
|
69
|
-
const db = (await
|
|
70
|
-
const service = new
|
|
59
|
+
const db = (await getContextRuntime(await resolveEnv(params.env))).db;
|
|
60
|
+
const service = new DatasetService(db);
|
|
71
61
|
return await service.updateDatasetStatus({
|
|
72
62
|
datasetId: params.datasetId,
|
|
73
63
|
status: params.status,
|
|
@@ -75,20 +65,19 @@ async function datasetUpdateStatusStep(params) {
|
|
|
75
65
|
actualGeneratedRowCount: params.actualGeneratedRowCount,
|
|
76
66
|
});
|
|
77
67
|
}
|
|
78
|
-
async function datasetClearStep(params) {
|
|
68
|
+
export async function datasetClearStep(params) {
|
|
79
69
|
"use step";
|
|
80
|
-
const db = (await
|
|
81
|
-
const service = new
|
|
70
|
+
const db = (await getContextRuntime(await resolveEnv(params.env))).db;
|
|
71
|
+
const service = new DatasetService(db);
|
|
82
72
|
return await service.clearDataset(params.datasetId);
|
|
83
73
|
}
|
|
84
|
-
async function datasetPreviewRowsStep(params) {
|
|
74
|
+
export async function datasetPreviewRowsStep(params) {
|
|
85
75
|
"use step";
|
|
86
|
-
const db = (await
|
|
87
|
-
const service = new
|
|
76
|
+
const db = (await getContextRuntime(await resolveEnv(params.env))).db;
|
|
77
|
+
const service = new DatasetService(db);
|
|
88
78
|
const rowsResult = await service.previewRows(params.datasetId, params.limit ?? 20);
|
|
89
79
|
if (!rowsResult.ok) {
|
|
90
80
|
throw new Error(rowsResult.error);
|
|
91
81
|
}
|
|
92
82
|
return { rows: rowsResult.data };
|
|
93
83
|
}
|
|
94
|
-
//# sourceMappingURL=steps.js.map
|
package/dist/dataset.d.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import type { AnyDatasetRuntime, DatasetBuilder, DatasetBuilderOptions, DatasetRuntimeHandle } from "./builder/types";
|
|
2
|
-
export type { AnyDatasetRuntime, CompatibleSourceDomain, DatasetBuilder, DatasetBuilderOptions, DatasetBuildOptions, DatasetBuildResult, DatasetExistingSource, DatasetExistingSourceInput, DatasetFileSource, DatasetFileSourceInput, DatasetMode, DatasetOutput, DatasetQuerySourceInput, DatasetReader, DatasetReaderResult, DatasetRuntimeEnv, DatasetRuntimeHandle, DatasetSchemaInput, DatasetTextSource, DatasetSourceInput, DatasetTextSourceInput, } from "./builder/types";
|
|
1
|
+
import type { AnyDatasetRuntime, DatasetBuilder, DatasetBuilderOptions, DatasetRuntimeHandle } from "./builder/types.js";
|
|
2
|
+
export type { AnyDatasetRuntime, CompatibleSourceDomain, DatasetBuilder, DatasetBuilderOptions, DatasetBuildOptions, DatasetBuildResult, DatasetExistingSource, DatasetExistingSourceInput, DatasetFileSource, DatasetFileSourceInput, DatasetMode, DatasetOutput, DatasetQuerySourceInput, DatasetReader, DatasetReaderResult, DatasetRuntimeEnv, DatasetRuntimeHandle, DatasetSchemaInput, DatasetTextSource, DatasetSourceInput, DatasetTextSourceInput, } from "./builder/types.js";
|
|
3
3
|
export declare function dataset<Runtime extends AnyDatasetRuntime>(runtime: Runtime & DatasetRuntimeHandle<Runtime>, options?: DatasetBuilderOptions): DatasetBuilder<Runtime>;
|
|
4
|
-
//# sourceMappingURL=dataset.d.ts.map
|
package/dist/dataset.js
CHANGED
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
const materialize_1 = require("./builder/materialize");
|
|
7
|
-
const persistence_1 = require("./builder/persistence");
|
|
8
|
-
function dataset(runtime, options = {}) {
|
|
1
|
+
import { id as newId } from "@instantdb/admin";
|
|
2
|
+
import { buildObjectOutputInstructions } from "./builder/instructions.js";
|
|
3
|
+
import { materializeDerivedDataset, materializeQuerySource, materializeSingleFileLikeSource, } from "./builder/materialize.js";
|
|
4
|
+
import { finalizeBuildResult } from "./builder/persistence.js";
|
|
5
|
+
export function dataset(runtime, options = {}) {
|
|
9
6
|
const datasetId = normalizeDatasetId(options.datasetId);
|
|
10
7
|
const typedRuntime = runtime;
|
|
11
8
|
const state = {
|
|
@@ -107,14 +104,14 @@ function dataset(runtime, options = {}) {
|
|
|
107
104
|
? {
|
|
108
105
|
...state,
|
|
109
106
|
first: true,
|
|
110
|
-
instructions:
|
|
107
|
+
instructions: buildObjectOutputInstructions(state.instructions),
|
|
111
108
|
}
|
|
112
109
|
: state;
|
|
113
110
|
const onlySource = effectiveState.sources[0];
|
|
114
111
|
const isSingleSource = effectiveState.sources.length === 1;
|
|
115
112
|
const hasInstructions = Boolean(String(effectiveState.instructions ?? "").trim());
|
|
116
113
|
if (isSingleSource && onlySource.kind === "query" && !hasInstructions) {
|
|
117
|
-
await
|
|
114
|
+
await materializeQuerySource(effectiveState.runtime, onlySource, {
|
|
118
115
|
datasetId: targetDatasetId,
|
|
119
116
|
sandboxId: effectiveState.sandboxId,
|
|
120
117
|
schema: effectiveState.outputSchema,
|
|
@@ -122,7 +119,7 @@ function dataset(runtime, options = {}) {
|
|
|
122
119
|
instructions: effectiveState.instructions,
|
|
123
120
|
first: effectiveState.first,
|
|
124
121
|
});
|
|
125
|
-
return finalizeOutputResult(await
|
|
122
|
+
return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
|
|
126
123
|
}
|
|
127
124
|
if (isSingleSource && (onlySource.kind === "file" || onlySource.kind === "text")) {
|
|
128
125
|
if (!effectiveState.sandboxId) {
|
|
@@ -131,8 +128,8 @@ function dataset(runtime, options = {}) {
|
|
|
131
128
|
if (!effectiveState.reactor) {
|
|
132
129
|
throw new Error("dataset_reactor_required");
|
|
133
130
|
}
|
|
134
|
-
await
|
|
135
|
-
return finalizeOutputResult(await
|
|
131
|
+
await materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
|
|
132
|
+
return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
|
|
136
133
|
}
|
|
137
134
|
if (!effectiveState.sandboxId) {
|
|
138
135
|
throw new Error("dataset_sandbox_required");
|
|
@@ -140,14 +137,14 @@ function dataset(runtime, options = {}) {
|
|
|
140
137
|
if (!effectiveState.reactor) {
|
|
141
138
|
throw new Error("dataset_reactor_required");
|
|
142
139
|
}
|
|
143
|
-
await
|
|
144
|
-
return finalizeOutputResult(await
|
|
140
|
+
await materializeDerivedDataset(effectiveState, targetDatasetId);
|
|
141
|
+
return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
|
|
145
142
|
},
|
|
146
143
|
};
|
|
147
144
|
return api;
|
|
148
145
|
}
|
|
149
146
|
function normalizeDatasetId(datasetId) {
|
|
150
|
-
const normalized = String(datasetId ?? (
|
|
147
|
+
const normalized = String(datasetId ?? newId()).trim();
|
|
151
148
|
if (!normalized) {
|
|
152
149
|
throw new Error("dataset_id_required");
|
|
153
150
|
}
|
|
@@ -161,4 +158,3 @@ function finalizeOutputResult(result, output) {
|
|
|
161
158
|
object: result.firstRow ?? null,
|
|
162
159
|
};
|
|
163
160
|
}
|
|
164
|
-
//# sourceMappingURL=dataset.js.map
|
package/dist/datasetFiles.d.ts
CHANGED
|
@@ -2,4 +2,3 @@ export declare const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
|
|
|
2
2
|
export declare function getDatasetWorkdirBase(): string;
|
|
3
3
|
export declare function getDatasetWorkstation(datasetId: string): string;
|
|
4
4
|
export declare function getDatasetOutputPath(datasetId: string): string;
|
|
5
|
-
//# sourceMappingURL=datasetFiles.d.ts.map
|
package/dist/datasetFiles.js
CHANGED
|
@@ -1,17 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.DATASET_OUTPUT_FILE_NAME = void 0;
|
|
4
|
-
exports.getDatasetWorkdirBase = getDatasetWorkdirBase;
|
|
5
|
-
exports.getDatasetWorkstation = getDatasetWorkstation;
|
|
6
|
-
exports.getDatasetOutputPath = getDatasetOutputPath;
|
|
7
|
-
exports.DATASET_OUTPUT_FILE_NAME = "output.jsonl";
|
|
1
|
+
export const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
|
|
8
2
|
const DEFAULT_VERCEL_WORKDIR_BASE = "/vercel/sandbox/datasets";
|
|
9
3
|
const DEFAULT_DAYTONA_WORKDIR_BASE = "/home/daytona/.ekairos/datasets";
|
|
10
4
|
const DEFAULT_SPRITES_WORKDIR_BASE = "/workspace/.ekairos/datasets";
|
|
11
5
|
function trimTrailingSlash(value) {
|
|
12
6
|
return value.endsWith("/") ? value.slice(0, -1) : value;
|
|
13
7
|
}
|
|
14
|
-
function getDatasetWorkdirBase() {
|
|
8
|
+
export function getDatasetWorkdirBase() {
|
|
15
9
|
const explicit = String(process.env.DATASET_SANDBOX_WORKDIR_BASE ?? "").trim();
|
|
16
10
|
if (explicit)
|
|
17
11
|
return trimTrailingSlash(explicit);
|
|
@@ -24,10 +18,9 @@ function getDatasetWorkdirBase() {
|
|
|
24
18
|
return DEFAULT_SPRITES_WORKDIR_BASE;
|
|
25
19
|
return DEFAULT_VERCEL_WORKDIR_BASE;
|
|
26
20
|
}
|
|
27
|
-
function getDatasetWorkstation(datasetId) {
|
|
21
|
+
export function getDatasetWorkstation(datasetId) {
|
|
28
22
|
return `${getDatasetWorkdirBase()}/${datasetId}`;
|
|
29
23
|
}
|
|
30
|
-
function getDatasetOutputPath(datasetId) {
|
|
31
|
-
return `${getDatasetWorkstation(datasetId)}/${
|
|
24
|
+
export function getDatasetOutputPath(datasetId) {
|
|
25
|
+
return `${getDatasetWorkstation(datasetId)}/${DATASET_OUTPUT_FILE_NAME}`;
|
|
32
26
|
}
|
|
33
|
-
//# sourceMappingURL=datasetFiles.js.map
|
package/dist/domain.d.ts
CHANGED
|
@@ -1,2 +1 @@
|
|
|
1
|
-
export { datasetDomain } from "./schema";
|
|
2
|
-
//# sourceMappingURL=domain.d.ts.map
|
|
1
|
+
export { datasetDomain } from "./schema.js";
|
package/dist/domain.js
CHANGED
|
@@ -1,6 +1 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.datasetDomain = void 0;
|
|
4
|
-
var schema_1 = require("./schema");
|
|
5
|
-
Object.defineProperty(exports, "datasetDomain", { enumerable: true, get: function () { return schema_1.datasetDomain; } });
|
|
6
|
-
//# sourceMappingURL=domain.js.map
|
|
1
|
+
export { datasetDomain } from "./schema.js";
|
|
@@ -1,21 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
const events_1 = require("@ekairos/events");
|
|
5
|
-
const runtime_1 = require("@ekairos/events/runtime");
|
|
6
|
-
function createEventsReactRuntime(env) {
|
|
1
|
+
import { eventsDomain } from "@ekairos/events";
|
|
2
|
+
import { getContextRuntime } from "@ekairos/events/runtime";
|
|
3
|
+
export function createEventsReactRuntime(env) {
|
|
7
4
|
const meta = () => ({
|
|
8
|
-
domain:
|
|
9
|
-
schema:
|
|
10
|
-
context: typeof
|
|
11
|
-
contextString: typeof
|
|
12
|
-
?
|
|
5
|
+
domain: eventsDomain,
|
|
6
|
+
schema: eventsDomain.toInstantSchema(),
|
|
7
|
+
context: typeof eventsDomain.context === "function" ? eventsDomain.context() : undefined,
|
|
8
|
+
contextString: typeof eventsDomain.contextString === "function"
|
|
9
|
+
? eventsDomain.contextString()
|
|
13
10
|
: undefined,
|
|
14
11
|
});
|
|
15
12
|
const runtime = {
|
|
16
13
|
env,
|
|
17
14
|
async db() {
|
|
18
|
-
const resolved = (await
|
|
15
|
+
const resolved = (await getContextRuntime(env));
|
|
19
16
|
return resolved.db;
|
|
20
17
|
},
|
|
21
18
|
async resolve() {
|
|
@@ -26,4 +23,3 @@ function createEventsReactRuntime(env) {
|
|
|
26
23
|
};
|
|
27
24
|
return runtime;
|
|
28
25
|
}
|
|
29
|
-
//# sourceMappingURL=eventsReactRuntime.js.map
|
|
@@ -1,24 +1,21 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
const zod_1 = require("zod");
|
|
6
|
-
const steps_1 = require("./sandbox/steps");
|
|
7
|
-
const datasetFiles_1 = require("./datasetFiles");
|
|
1
|
+
import { tool } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "./sandbox/steps.js";
|
|
4
|
+
import { getDatasetWorkstation } from "./datasetFiles.js";
|
|
8
5
|
// To keep responses predictable for big data scenarios, we cap stdout/stderr.
|
|
9
6
|
// The tool's return payload exposes stdout (capped) plus the on-disk script path.
|
|
10
7
|
const MAX_STDOUT_CHARS = 20000;
|
|
11
8
|
const MAX_STDERR_CHARS = 5000;
|
|
12
|
-
function createExecuteCommandTool({ datasetId, sandboxId, env }) {
|
|
13
|
-
return
|
|
9
|
+
export function createExecuteCommandTool({ datasetId, sandboxId, env }) {
|
|
10
|
+
return tool({
|
|
14
11
|
description: "Execute Python scripts in the sandbox. Always saves script to a file before executing. The tool's output is EXACTLY the script's stdout and includes the script file path for traceability. CRITICAL: Print concise, human-readable summaries only; do NOT print raw large data. For big results, write artifacts to files in the workstation and print their file paths. Always include progress/result prints (e.g., 'Processing file X...', 'Found Y records', 'Generated output.csv').",
|
|
15
|
-
inputSchema:
|
|
16
|
-
pythonCode:
|
|
17
|
-
scriptName:
|
|
12
|
+
inputSchema: z.object({
|
|
13
|
+
pythonCode: z.string().describe("Python code to execute. Saved to a file before running. MANDATORY: Use print() to report progress and final results. Keep prints concise; avoid dumping rows/JSON. For large outputs, write to files in the workstation directory and print only file paths and brief summaries."),
|
|
14
|
+
scriptName: z.string().describe("Name for the script file in snake_case (e.g., 'inspect_file', 'parse_csv', 'generate_dataset'). A UUID will be appended automatically."),
|
|
18
15
|
}),
|
|
19
16
|
execute: async ({ pythonCode, scriptName }) => {
|
|
20
17
|
const uuid = `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
|
|
21
|
-
const workstation =
|
|
18
|
+
const workstation = getDatasetWorkstation(datasetId);
|
|
22
19
|
const scriptFile = `${workstation}/${scriptName}-${uuid}.py`;
|
|
23
20
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
24
21
|
console.log(`[Dataset ${datasetId}] Tool: executeCommand`);
|
|
@@ -27,7 +24,7 @@ function createExecuteCommandTool({ datasetId, sandboxId, env }) {
|
|
|
27
24
|
console.log(`[Dataset ${datasetId}] Code length: ${pythonCode.length} chars`);
|
|
28
25
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
29
26
|
try {
|
|
30
|
-
await
|
|
27
|
+
await writeDatasetSandboxFilesStep({
|
|
31
28
|
env,
|
|
32
29
|
sandboxId,
|
|
33
30
|
files: [
|
|
@@ -39,7 +36,7 @@ function createExecuteCommandTool({ datasetId, sandboxId, env }) {
|
|
|
39
36
|
});
|
|
40
37
|
console.log(`[Dataset ${datasetId}] Script written to: ${scriptFile}`);
|
|
41
38
|
console.log(`[Dataset ${datasetId}] Executing: python ${scriptFile}`);
|
|
42
|
-
const result = await
|
|
39
|
+
const result = await runDatasetSandboxCommandStep({
|
|
43
40
|
env,
|
|
44
41
|
sandboxId,
|
|
45
42
|
cmd: "python",
|
|
@@ -127,4 +124,3 @@ function createExecuteCommandTool({ datasetId, sandboxId, env }) {
|
|
|
127
124
|
},
|
|
128
125
|
});
|
|
129
126
|
}
|
|
130
|
-
//# sourceMappingURL=executeCommand.tool.js.map
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { createContext, type ContextReactor } from "@ekairos/events";
|
|
2
|
-
import { FilePreviewContext } from "./filepreview";
|
|
2
|
+
import { FilePreviewContext } from "./filepreview.js";
|
|
3
3
|
export type FileParseStoryContext = {
|
|
4
4
|
datasetId: string;
|
|
5
5
|
fileId: string;
|
|
@@ -65,4 +65,3 @@ export declare function createFileParseStory<Env extends {
|
|
|
65
65
|
}>;
|
|
66
66
|
story: any;
|
|
67
67
|
};
|
|
68
|
-
//# sourceMappingURL=file-dataset.agent.d.ts.map
|
|
@@ -1,27 +1,24 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
const steps_2 = require("./steps");
|
|
15
|
-
const steps_3 = require("../dataset/steps");
|
|
16
|
-
const eventsReactRuntime_1 = require("../eventsReactRuntime");
|
|
1
|
+
import { createContext, didToolExecute, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL } from "@ekairos/events";
|
|
2
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
|
|
3
|
+
import { createGenerateSchemaTool } from "./generateSchema.tool.js";
|
|
4
|
+
import { createCompleteDatasetTool } from "../completeDataset.tool.js";
|
|
5
|
+
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
6
|
+
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
7
|
+
import { buildFileDatasetPrompt } from "./prompts.js";
|
|
8
|
+
import { generateFilePreview, ensurePreviewScriptsAvailable } from "./filepreview.js";
|
|
9
|
+
import { id } from "@instantdb/admin";
|
|
10
|
+
import { getDatasetWorkstation } from "../datasetFiles.js";
|
|
11
|
+
import { readInstantFileStep } from "./steps.js";
|
|
12
|
+
import { datasetGetByIdStep } from "../dataset/steps.js";
|
|
13
|
+
import { createEventsReactRuntime } from "../eventsReactRuntime.js";
|
|
17
14
|
async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
18
15
|
if (state.initialized) {
|
|
19
16
|
return state.filePath;
|
|
20
17
|
}
|
|
21
18
|
console.log(`[FileParseStory ${datasetId}] Initializing sandbox...`);
|
|
22
|
-
await
|
|
19
|
+
await ensurePreviewScriptsAvailable(env, sandboxId);
|
|
23
20
|
console.log(`[FileParseStory ${datasetId}] Installing Python dependencies...`);
|
|
24
|
-
const pipInstall = await
|
|
21
|
+
const pipInstall = await runDatasetSandboxCommandStep({
|
|
25
22
|
env,
|
|
26
23
|
sandboxId,
|
|
27
24
|
cmd: "python",
|
|
@@ -32,10 +29,10 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
|
32
29
|
throw new Error(`pip install failed: ${installStderr.substring(0, 300)}`);
|
|
33
30
|
}
|
|
34
31
|
console.log(`[FileParseStory ${datasetId}] Fetching file from InstantDB...`);
|
|
35
|
-
const file = await
|
|
32
|
+
const file = await readInstantFileStep({ env, fileId });
|
|
36
33
|
console.log(`[FileParseStory ${datasetId}] Creating dataset workstation...`);
|
|
37
|
-
const workstation =
|
|
38
|
-
await
|
|
34
|
+
const workstation = getDatasetWorkstation(datasetId);
|
|
35
|
+
await runDatasetSandboxCommandStep({
|
|
39
36
|
env,
|
|
40
37
|
sandboxId,
|
|
41
38
|
cmd: "mkdir",
|
|
@@ -44,7 +41,7 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
|
44
41
|
const fileName = file.contentDisposition ?? "";
|
|
45
42
|
const fileExtension = fileName.includes(".") ? fileName.substring(fileName.lastIndexOf(".")) : "";
|
|
46
43
|
const sandboxFilePath = `${workstation}/${fileId}${fileExtension}`;
|
|
47
|
-
await
|
|
44
|
+
await writeDatasetSandboxFilesStep({
|
|
48
45
|
env,
|
|
49
46
|
sandboxId,
|
|
50
47
|
files: [
|
|
@@ -71,9 +68,9 @@ async function initializeSandbox(env, sandboxId, datasetId, fileId, state) {
|
|
|
71
68
|
* Internamente corre un Context (`createContext("file.parse")`) que itera hasta que se ejecuta el tool `completeDataset`.
|
|
72
69
|
*/
|
|
73
70
|
function createFileParseStoryDefinition(params) {
|
|
74
|
-
const datasetId = params.datasetId ??
|
|
71
|
+
const datasetId = params.datasetId ?? id();
|
|
75
72
|
const model = params.model ?? "openai/gpt-5";
|
|
76
|
-
let storyBuilder =
|
|
73
|
+
let storyBuilder = createContext("file.parse")
|
|
77
74
|
.context(async (stored, env) => {
|
|
78
75
|
const previous = stored?.content ?? {};
|
|
79
76
|
const sandboxState = previous?.sandboxState ?? { initialized: false, filePath: "" };
|
|
@@ -84,13 +81,13 @@ function createFileParseStoryDefinition(params) {
|
|
|
84
81
|
const sandboxFilePath = await initializeSandbox(env, sandboxId, datasetId, params.fileId, sandboxState);
|
|
85
82
|
let filePreview = undefined;
|
|
86
83
|
try {
|
|
87
|
-
filePreview = await
|
|
84
|
+
filePreview = await generateFilePreview(env, sandboxId, sandboxFilePath, datasetId);
|
|
88
85
|
}
|
|
89
86
|
catch {
|
|
90
87
|
// optional
|
|
91
88
|
}
|
|
92
89
|
let schema = null;
|
|
93
|
-
const datasetResult = await
|
|
90
|
+
const datasetResult = await datasetGetByIdStep({ env, datasetId });
|
|
94
91
|
if (datasetResult.ok && datasetResult.data.schema)
|
|
95
92
|
schema = datasetResult.data.schema;
|
|
96
93
|
const ctx = {
|
|
@@ -118,7 +115,7 @@ function createFileParseStoryDefinition(params) {
|
|
|
118
115
|
})
|
|
119
116
|
.narrative(async (stored) => {
|
|
120
117
|
const ctx = stored?.content?.ctx;
|
|
121
|
-
const base =
|
|
118
|
+
const base = buildFileDatasetPrompt(ctx);
|
|
122
119
|
const userInstructions = String(ctx?.instructions ?? "").trim();
|
|
123
120
|
if (!userInstructions)
|
|
124
121
|
return base;
|
|
@@ -134,24 +131,24 @@ function createFileParseStoryDefinition(params) {
|
|
|
134
131
|
.actions(async (_stored, env) => {
|
|
135
132
|
const existingSchema = _stored?.content?.ctx?.schema?.schema;
|
|
136
133
|
const actions = {
|
|
137
|
-
executeCommand:
|
|
134
|
+
executeCommand: createExecuteCommandTool({
|
|
138
135
|
datasetId,
|
|
139
136
|
sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
|
|
140
137
|
env,
|
|
141
138
|
}),
|
|
142
|
-
completeDataset:
|
|
139
|
+
completeDataset: createCompleteDatasetTool({
|
|
143
140
|
datasetId,
|
|
144
141
|
sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
|
|
145
142
|
env,
|
|
146
143
|
}),
|
|
147
|
-
clearDataset:
|
|
144
|
+
clearDataset: createClearDatasetTool({
|
|
148
145
|
datasetId,
|
|
149
146
|
sandboxId: _stored?.content?.sandboxId ?? params.sandboxId ?? "",
|
|
150
147
|
env,
|
|
151
148
|
}),
|
|
152
149
|
};
|
|
153
150
|
if (!existingSchema) {
|
|
154
|
-
actions.generateSchema =
|
|
151
|
+
actions.generateSchema = createGenerateSchemaTool({
|
|
155
152
|
datasetId,
|
|
156
153
|
fileId: params.fileId,
|
|
157
154
|
env,
|
|
@@ -160,7 +157,7 @@ function createFileParseStoryDefinition(params) {
|
|
|
160
157
|
return actions;
|
|
161
158
|
})
|
|
162
159
|
.shouldContinue(({ reactionEvent }) => {
|
|
163
|
-
return !
|
|
160
|
+
return !didToolExecute(reactionEvent, "completeDataset");
|
|
164
161
|
});
|
|
165
162
|
if (params.reactor) {
|
|
166
163
|
storyBuilder = storyBuilder.reactor(params.reactor);
|
|
@@ -181,7 +178,7 @@ function createFileParseStoryDefinition(params) {
|
|
|
181
178
|
* - All I/O happens in `"use step"` functions via Ekairos runtime (`getContextRuntime(env).db`).
|
|
182
179
|
* - `parse()` is the entrypoint; it calls `story.react(...)` internally.
|
|
183
180
|
*/
|
|
184
|
-
function createFileParseStory(fileId, opts) {
|
|
181
|
+
export function createFileParseStory(fileId, opts) {
|
|
185
182
|
const params = {
|
|
186
183
|
fileId,
|
|
187
184
|
instructions: opts?.instructions,
|
|
@@ -195,15 +192,15 @@ function createFileParseStory(fileId, opts) {
|
|
|
195
192
|
datasetId,
|
|
196
193
|
async parse(env, prompt) {
|
|
197
194
|
const triggerEvent = {
|
|
198
|
-
id:
|
|
199
|
-
type:
|
|
200
|
-
channel:
|
|
195
|
+
id: id(),
|
|
196
|
+
type: INPUT_TEXT_ITEM_TYPE,
|
|
197
|
+
channel: WEB_CHANNEL,
|
|
201
198
|
createdAt: new Date().toISOString(),
|
|
202
199
|
content: {
|
|
203
200
|
parts: [{ type: "text", text: prompt ?? "generate a dataset for this file" }],
|
|
204
201
|
},
|
|
205
202
|
};
|
|
206
|
-
const runtime =
|
|
203
|
+
const runtime = createEventsReactRuntime((env ?? {}));
|
|
207
204
|
const shell = await story.react(triggerEvent, {
|
|
208
205
|
runtime,
|
|
209
206
|
context: { key: `dataset:${datasetId}` },
|
|
@@ -217,4 +214,3 @@ function createFileParseStory(fileId, opts) {
|
|
|
217
214
|
story,
|
|
218
215
|
};
|
|
219
216
|
}
|
|
220
|
-
//# sourceMappingURL=file-dataset.agent.js.map
|
|
@@ -37,4 +37,3 @@ interface PreviewOptions {
|
|
|
37
37
|
export declare function ensurePreviewScriptsAvailable(env: any, sandboxId: string): Promise<void>;
|
|
38
38
|
export declare function generateFilePreview(env: any, sandboxId: string, sandboxFilePath: string, datasetId: string, options?: PreviewOptions): Promise<FilePreviewContext>;
|
|
39
39
|
export {};
|
|
40
|
-
//# sourceMappingURL=filepreview.d.ts.map
|
package/dist/file/filepreview.js
CHANGED
|
@@ -1,10 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
const fs_1 = require("fs");
|
|
6
|
-
const path_1 = require("path");
|
|
7
|
-
const steps_1 = require("../sandbox/steps");
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
|
|
8
5
|
const DEFAULT_HEAD_LINES = 50;
|
|
9
6
|
const DEFAULT_TAIL_LINES = 20;
|
|
10
7
|
const DEFAULT_MID_LINES = 20;
|
|
@@ -21,7 +18,7 @@ const PYTHON_SCRIPT_FILES = [
|
|
|
21
18
|
function resolveScriptPath(scriptName) {
|
|
22
19
|
// Prefer local scripts in src/ (tests/dev), and after build the scripts are copied to dist/
|
|
23
20
|
// at the same relative path, so this works in both environments.
|
|
24
|
-
return (
|
|
21
|
+
return join(dirname(fileURLToPath(import.meta.url)), "scripts", scriptName);
|
|
25
22
|
}
|
|
26
23
|
const preparedSandboxIds = new Set();
|
|
27
24
|
const sandboxSetupPromises = new Map();
|
|
@@ -37,7 +34,7 @@ function validateScriptResult(result, context) {
|
|
|
37
34
|
throw new Error(`${context} failed: ${stderr.substring(0, 500)}`);
|
|
38
35
|
}
|
|
39
36
|
}
|
|
40
|
-
async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
37
|
+
export async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
41
38
|
if (preparedSandboxIds.has(sandboxId)) {
|
|
42
39
|
return;
|
|
43
40
|
}
|
|
@@ -48,7 +45,7 @@ async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
|
48
45
|
}
|
|
49
46
|
const setupPromise = (async () => {
|
|
50
47
|
try {
|
|
51
|
-
await
|
|
48
|
+
await runDatasetSandboxCommandStep({
|
|
52
49
|
env,
|
|
53
50
|
sandboxId,
|
|
54
51
|
cmd: "mkdir",
|
|
@@ -62,7 +59,7 @@ async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
|
62
59
|
for (const scriptName of PYTHON_SCRIPT_FILES) {
|
|
63
60
|
try {
|
|
64
61
|
const scriptPath = resolveScriptPath(scriptName);
|
|
65
|
-
const fileBuffer =
|
|
62
|
+
const fileBuffer = readFileSync(scriptPath);
|
|
66
63
|
filesToWrite.push({
|
|
67
64
|
path: `${SANDBOX_SCRIPT_DIRECTORY}/${scriptName}`,
|
|
68
65
|
contentBase64: Buffer.from(fileBuffer).toString("base64"),
|
|
@@ -74,7 +71,7 @@ async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
|
74
71
|
}
|
|
75
72
|
}
|
|
76
73
|
if (filesToWrite.length > 0) {
|
|
77
|
-
await
|
|
74
|
+
await writeDatasetSandboxFilesStep({
|
|
78
75
|
env,
|
|
79
76
|
sandboxId,
|
|
80
77
|
files: filesToWrite,
|
|
@@ -91,7 +88,7 @@ async function ensurePreviewScriptsAvailable(env, sandboxId) {
|
|
|
91
88
|
throw error;
|
|
92
89
|
}
|
|
93
90
|
}
|
|
94
|
-
async function generateFilePreview(env, sandboxId, sandboxFilePath, datasetId, options = {}) {
|
|
91
|
+
export async function generateFilePreview(env, sandboxId, sandboxFilePath, datasetId, options = {}) {
|
|
95
92
|
const context = {
|
|
96
93
|
totalRows: 0,
|
|
97
94
|
};
|
|
@@ -164,13 +161,13 @@ async function runScript(env, sandboxId, scriptName, args, description) {
|
|
|
164
161
|
let scriptContent = "";
|
|
165
162
|
try {
|
|
166
163
|
const localScriptPath = resolveScriptPath(scriptName);
|
|
167
|
-
scriptContent =
|
|
164
|
+
scriptContent = readFileSync(localScriptPath, 'utf-8');
|
|
168
165
|
}
|
|
169
166
|
catch (error) {
|
|
170
167
|
console.warn(`Failed to read script ${scriptName}:`, error);
|
|
171
168
|
}
|
|
172
169
|
try {
|
|
173
|
-
const result = await
|
|
170
|
+
const result = await runDatasetSandboxCommandStep({
|
|
174
171
|
env,
|
|
175
172
|
sandboxId,
|
|
176
173
|
cmd: "python",
|
|
@@ -194,4 +191,3 @@ async function runScript(env, sandboxId, scriptName, args, description) {
|
|
|
194
191
|
};
|
|
195
192
|
}
|
|
196
193
|
}
|
|
197
|
-
//# sourceMappingURL=filepreview.js.map
|