@ekairos/dataset 1.22.82-beta.development.0 → 1.22.84-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/agentMaterializers.d.ts +2 -2
- package/dist/builder/context.d.ts +7 -0
- package/dist/builder/context.js +192 -0
- package/dist/builder/instructions.d.ts +3 -3
- package/dist/builder/instructions.js +10 -10
- package/dist/builder/materialize.d.ts +12 -11
- package/dist/builder/materialize.js +122 -121
- package/dist/builder/materializeQuery.d.ts +3 -2
- package/dist/builder/materializeQuery.js +10 -19
- package/dist/builder/persistence.d.ts +4 -5
- package/dist/builder/persistence.js +20 -19
- package/dist/builder/types.d.ts +31 -24
- package/dist/completeDataset.steps.d.ts +9 -8
- package/dist/completeDataset.steps.js +18 -11
- package/dist/completeDataset.tool.d.ts +9 -8
- package/dist/completeDataset.tool.js +2 -1
- package/dist/contextWorkspace.d.ts +72 -0
- package/dist/contextWorkspace.js +218 -0
- package/dist/dataset.d.ts +1 -1
- package/dist/dataset.js +42 -29
- package/dist/datasetFiles.d.ts +1 -1
- package/dist/datasetFiles.js +3 -3
- package/dist/executeCommand.tool.d.ts +1 -43
- package/dist/executeCommand.tool.js +10 -3
- package/dist/file/file-dataset.agent.d.ts +2 -0
- package/dist/file/file-dataset.agent.js +51 -16
- package/dist/file/file-dataset.steps.d.ts +6 -0
- package/dist/file/file-dataset.steps.js +18 -21
- package/dist/file/file-dataset.types.d.ts +10 -0
- package/dist/file/prompts.js +16 -14
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/materializeDataset.tool.d.ts +34 -26
- package/dist/materializeDataset.tool.js +40 -29
- package/dist/schema.d.ts +12 -2
- package/dist/schema.js +6 -3
- package/dist/service.d.ts +2 -2
- package/dist/service.js +6 -3
- package/dist/transform/filepreview.d.ts +2 -2
- package/dist/transform/filepreview.js +3 -3
- package/dist/transform/prompts.js +25 -25
- package/dist/transform/transform-dataset.agent.d.ts +4 -4
- package/dist/transform/transform-dataset.agent.js +29 -30
- package/dist/transform/transform-dataset.steps.d.ts +7 -7
- package/dist/transform/transform-dataset.steps.js +20 -20
- package/dist/transform/transform-dataset.types.d.ts +13 -13
- package/dist/transform/transformDataset.js +4 -4
- package/package.json +4 -4
- /package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -0
- /package/dist/builder/{sourceRows.js → rows.js} +0 -0
|
@@ -1,29 +1,29 @@
|
|
|
1
|
-
import type { TransformPromptContext, TransformSandboxState,
|
|
2
|
-
export declare function
|
|
1
|
+
import type { TransformPromptContext, TransformSandboxState, TransformInputPreviewContext } from "./transform-dataset.types.js";
|
|
2
|
+
export declare function ensureTransformInputsInSandboxStep(params: {
|
|
3
3
|
runtime: any;
|
|
4
4
|
sandboxId: string;
|
|
5
5
|
datasetId: string;
|
|
6
|
-
|
|
6
|
+
inputDatasetIds: string[];
|
|
7
7
|
state: TransformSandboxState;
|
|
8
8
|
}): Promise<{
|
|
9
|
-
|
|
9
|
+
inputPaths: Array<{
|
|
10
10
|
datasetId: string;
|
|
11
11
|
path: string;
|
|
12
12
|
}>;
|
|
13
13
|
outputPath: string;
|
|
14
14
|
state: TransformSandboxState;
|
|
15
15
|
}>;
|
|
16
|
-
export declare function
|
|
16
|
+
export declare function generateTransformInputPreviewsStep(params: {
|
|
17
17
|
runtime: any;
|
|
18
18
|
sandboxId: string;
|
|
19
19
|
datasetId: string;
|
|
20
|
-
|
|
20
|
+
inputPaths: Array<{
|
|
21
21
|
datasetId: string;
|
|
22
22
|
path: string;
|
|
23
23
|
}>;
|
|
24
24
|
}): Promise<Array<{
|
|
25
25
|
datasetId: string;
|
|
26
|
-
preview:
|
|
26
|
+
preview: TransformInputPreviewContext;
|
|
27
27
|
}>>;
|
|
28
28
|
export declare function buildTransformDatasetPromptStep(params: {
|
|
29
29
|
context: TransformPromptContext;
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
import { getDatasetOutputPath,
|
|
1
|
+
import { getDatasetOutputPath, getDatasetResourcesDir, getDatasetStandardDirs, } from "../datasetFiles.js";
|
|
2
2
|
import { datasetReadOutputJsonlStep } from "../dataset/steps.js";
|
|
3
3
|
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
|
|
4
|
-
import {
|
|
4
|
+
import { generateInputPreview } from "./filepreview.js";
|
|
5
5
|
import { buildTransformDatasetPrompt } from "./prompts.js";
|
|
6
|
-
export async function
|
|
6
|
+
export async function ensureTransformInputsInSandboxStep(params) {
|
|
7
7
|
"use step";
|
|
8
8
|
if (params.state.initialized) {
|
|
9
9
|
return {
|
|
10
|
-
|
|
10
|
+
inputPaths: params.state.inputPaths,
|
|
11
11
|
outputPath: getDatasetOutputPath(params.datasetId),
|
|
12
12
|
state: params.state,
|
|
13
13
|
};
|
|
@@ -18,42 +18,42 @@ export async function ensureTransformSourcesInSandboxStep(params) {
|
|
|
18
18
|
cmd: "mkdir",
|
|
19
19
|
args: ["-p", ...getDatasetStandardDirs(params.datasetId)],
|
|
20
20
|
});
|
|
21
|
-
const
|
|
22
|
-
for (const
|
|
23
|
-
const
|
|
24
|
-
const
|
|
21
|
+
const inputPaths = [];
|
|
22
|
+
for (const inputDatasetId of params.inputDatasetIds) {
|
|
23
|
+
const inputPath = `${getDatasetResourcesDir(params.datasetId)}/resource_${inputDatasetId}.jsonl`;
|
|
24
|
+
const input = await datasetReadOutputJsonlStep({
|
|
25
25
|
runtime: params.runtime,
|
|
26
|
-
datasetId:
|
|
26
|
+
datasetId: inputDatasetId,
|
|
27
27
|
});
|
|
28
28
|
await writeDatasetSandboxFilesStep({
|
|
29
29
|
runtime: params.runtime,
|
|
30
30
|
sandboxId: params.sandboxId,
|
|
31
|
-
files: [{ path:
|
|
31
|
+
files: [{ path: inputPath, contentBase64: input.contentBase64 }],
|
|
32
32
|
});
|
|
33
|
-
|
|
33
|
+
inputPaths.push({ datasetId: inputDatasetId, path: inputPath });
|
|
34
34
|
}
|
|
35
35
|
return {
|
|
36
|
-
|
|
36
|
+
inputPaths,
|
|
37
37
|
outputPath: getDatasetOutputPath(params.datasetId),
|
|
38
38
|
state: {
|
|
39
39
|
initialized: true,
|
|
40
|
-
|
|
40
|
+
inputPaths,
|
|
41
41
|
},
|
|
42
42
|
};
|
|
43
43
|
}
|
|
44
|
-
export async function
|
|
44
|
+
export async function generateTransformInputPreviewsStep(params) {
|
|
45
45
|
"use step";
|
|
46
|
-
const
|
|
47
|
-
for (const
|
|
46
|
+
const inputPreviews = [];
|
|
47
|
+
for (const inputPath of params.inputPaths) {
|
|
48
48
|
try {
|
|
49
|
-
const preview = await
|
|
50
|
-
|
|
49
|
+
const preview = await generateInputPreview(params.runtime, params.sandboxId, inputPath.path, params.datasetId);
|
|
50
|
+
inputPreviews.push({ datasetId: inputPath.datasetId, preview });
|
|
51
51
|
}
|
|
52
52
|
catch {
|
|
53
|
-
//
|
|
53
|
+
// Input preview is optional; transformation can still read the JSONL files.
|
|
54
54
|
}
|
|
55
55
|
}
|
|
56
|
-
return
|
|
56
|
+
return inputPreviews;
|
|
57
57
|
}
|
|
58
58
|
export async function buildTransformDatasetPromptStep(params) {
|
|
59
59
|
"use step";
|
|
@@ -1,34 +1,34 @@
|
|
|
1
1
|
import type { ContextReactor } from "@ekairos/events";
|
|
2
|
-
import type {
|
|
3
|
-
export type {
|
|
2
|
+
import type { TransformInputPreviewContext } from "./filepreview.js";
|
|
3
|
+
export type { TransformInputPreviewContext } from "./filepreview.js";
|
|
4
4
|
export type TransformSandboxState = {
|
|
5
5
|
initialized: boolean;
|
|
6
|
-
|
|
6
|
+
inputPaths: Array<{
|
|
7
7
|
datasetId: string;
|
|
8
8
|
path: string;
|
|
9
9
|
}>;
|
|
10
10
|
};
|
|
11
11
|
export type TransformDatasetContext = {
|
|
12
12
|
datasetId: string;
|
|
13
|
-
|
|
13
|
+
inputDatasetIds: string[];
|
|
14
14
|
outputSchema: any;
|
|
15
15
|
sandboxConfig: {
|
|
16
|
-
|
|
16
|
+
inputPaths: Array<{
|
|
17
17
|
datasetId: string;
|
|
18
18
|
path: string;
|
|
19
19
|
}>;
|
|
20
20
|
outputPath: string;
|
|
21
21
|
};
|
|
22
|
-
|
|
22
|
+
inputPreviews?: Array<{
|
|
23
23
|
datasetId: string;
|
|
24
|
-
preview:
|
|
24
|
+
preview: TransformInputPreviewContext;
|
|
25
25
|
}>;
|
|
26
26
|
errors: string[];
|
|
27
27
|
iterationCount: number;
|
|
28
28
|
instructions?: string;
|
|
29
29
|
};
|
|
30
30
|
export type TransformDatasetAgentParams = {
|
|
31
|
-
|
|
31
|
+
inputDatasetIds?: string[];
|
|
32
32
|
outputSchema?: any;
|
|
33
33
|
instructions?: string;
|
|
34
34
|
datasetId?: string;
|
|
@@ -36,9 +36,9 @@ export type TransformDatasetAgentParams = {
|
|
|
36
36
|
sandboxId?: string;
|
|
37
37
|
reactor?: ContextReactor<any, any>;
|
|
38
38
|
sandboxState?: TransformSandboxState;
|
|
39
|
-
|
|
39
|
+
inputPreviews?: Array<{
|
|
40
40
|
datasetId: string;
|
|
41
|
-
preview:
|
|
41
|
+
preview: TransformInputPreviewContext;
|
|
42
42
|
}>;
|
|
43
43
|
};
|
|
44
44
|
export type TransformDatasetRunOptions = {
|
|
@@ -59,16 +59,16 @@ export type TransformDatasetResult = {
|
|
|
59
59
|
};
|
|
60
60
|
export type TransformPromptContext = {
|
|
61
61
|
datasetId: string;
|
|
62
|
-
|
|
62
|
+
inputDatasetIds: string[];
|
|
63
63
|
outputSchema: any;
|
|
64
64
|
sandboxConfig: {
|
|
65
|
-
|
|
65
|
+
inputPaths: Array<{
|
|
66
66
|
datasetId: string;
|
|
67
67
|
path: string;
|
|
68
68
|
}>;
|
|
69
69
|
outputPath: string;
|
|
70
70
|
};
|
|
71
|
-
|
|
71
|
+
inputPreviews?: Array<{
|
|
72
72
|
datasetId: string;
|
|
73
73
|
preview: {
|
|
74
74
|
totalRows: number;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { datasetPreviewRowsStep } from "../dataset/steps.js";
|
|
2
2
|
import { createTransformDatasetContext } from "./transform-dataset.agent.js";
|
|
3
3
|
function buildInstructions(input) {
|
|
4
|
-
const
|
|
4
|
+
const inputs = input.datasets
|
|
5
5
|
.map((d, idx) => {
|
|
6
6
|
const name = d.description ? ` - ${d.description}` : "";
|
|
7
7
|
return `${idx + 1}. ${d.id}${name}`;
|
|
@@ -12,8 +12,8 @@ function buildInstructions(input) {
|
|
|
12
12
|
"Use pandas when helpful. Output must be JSONL with {type:'row', data:{...}} lines.",
|
|
13
13
|
"Respect the provided output schema exactly.",
|
|
14
14
|
"",
|
|
15
|
-
"##
|
|
16
|
-
|
|
15
|
+
"## Input Datasets",
|
|
16
|
+
inputs || "- (none)",
|
|
17
17
|
"",
|
|
18
18
|
"## Transformation Description (LaTeX + sets)",
|
|
19
19
|
String(input.description ?? "").trim(),
|
|
@@ -25,7 +25,7 @@ function buildInstructions(input) {
|
|
|
25
25
|
*/
|
|
26
26
|
export async function transformDataset(runtime, input) {
|
|
27
27
|
const transformContext = createTransformDatasetContext({
|
|
28
|
-
|
|
28
|
+
inputDatasetIds: input.datasets.map((d) => d.id),
|
|
29
29
|
outputSchema: input.outputSchema,
|
|
30
30
|
instructions: buildInstructions(input),
|
|
31
31
|
datasetId: input.datasetId,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.84-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.84-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.84-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.84-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|
|
File without changes
|
|
File without changes
|