@ekairos/dataset 1.22.37-beta.development.0 → 1.22.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +347 -0
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/context.d.ts +15 -0
- package/dist/builder/context.js +251 -0
- package/dist/builder/instructions.d.ts +5 -0
- package/dist/builder/instructions.js +40 -0
- package/dist/builder/materialize.d.ts +83 -0
- package/dist/builder/materialize.js +548 -0
- package/dist/builder/materializeQuery.d.ts +12 -0
- package/dist/builder/materializeQuery.js +31 -0
- package/dist/builder/persistence.d.ts +22 -0
- package/dist/builder/persistence.js +153 -0
- package/dist/builder/rows.d.ts +7 -0
- package/dist/builder/rows.js +56 -0
- package/dist/builder/schemaInference.d.ts +3 -0
- package/dist/builder/schemaInference.js +61 -0
- package/dist/builder/types.d.ts +140 -0
- package/dist/builder/types.js +1 -0
- package/dist/clearDataset.tool.d.ts +2 -3
- package/dist/clearDataset.tool.js +13 -17
- package/dist/completeDataset.steps.d.ts +117 -0
- package/dist/completeDataset.steps.js +487 -0
- package/dist/completeDataset.tool.d.ts +132 -7
- package/dist/completeDataset.tool.js +46 -192
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +79 -0
- package/dist/contextWorkspace.js +234 -0
- package/dist/dataset/steps.d.ts +39 -15
- package/dist/dataset/steps.js +96 -39
- package/dist/dataset.d.ts +3 -67
- package/dist/dataset.js +129 -521
- package/dist/datasetFiles.d.ts +5 -1
- package/dist/datasetFiles.js +29 -27
- package/dist/domain.d.ts +1 -2
- package/dist/domain.js +1 -6
- package/dist/executeCommand.tool.d.ts +2 -30
- package/dist/executeCommand.tool.js +165 -39
- package/dist/file/file-dataset.agent.d.ts +19 -56
- package/dist/file/file-dataset.agent.js +176 -134
- package/dist/file/file-dataset.steps.d.ts +27 -0
- package/dist/file/file-dataset.steps.js +47 -0
- package/dist/file/file-dataset.types.d.ts +64 -0
- package/dist/file/file-dataset.types.js +1 -0
- package/dist/file/filepreview.d.ts +5 -35
- package/dist/file/filepreview.js +60 -107
- package/dist/file/filepreview.types.d.ts +31 -0
- package/dist/file/filepreview.types.js +1 -0
- package/dist/file/generateSchema.tool.d.ts +2 -3
- package/dist/file/generateSchema.tool.js +11 -15
- package/dist/file/index.d.ts +1 -2
- package/dist/file/index.js +1 -18
- package/dist/file/prompts.d.ts +2 -3
- package/dist/file/prompts.js +134 -27
- package/dist/file/scripts.generated.d.ts +1 -0
- package/dist/file/scripts.generated.js +11 -0
- package/dist/file/steps.d.ts +1 -2
- package/dist/file/steps.js +9 -7
- package/dist/id.d.ts +1 -0
- package/dist/id.js +10 -0
- package/dist/index.d.ts +8 -7
- package/dist/index.js +8 -23
- package/dist/materializeDataset.tool.d.ts +52 -32
- package/dist/materializeDataset.tool.js +81 -65
- package/dist/query/index.d.ts +1 -2
- package/dist/query/index.js +1 -18
- package/dist/query/queryDomain.d.ts +3 -4
- package/dist/query/queryDomain.js +3 -40
- package/dist/query/queryDomain.step.d.ts +1 -1
- package/dist/query/queryDomain.step.js +13 -13
- package/dist/sandbox/steps.d.ts +23 -15
- package/dist/sandbox/steps.js +73 -76
- package/dist/sandbox.steps.d.ts +1 -2
- package/dist/sandbox.steps.js +1 -18
- package/dist/schema.d.ts +13 -13
- package/dist/schema.js +25 -37
- package/dist/service.d.ts +8 -5
- package/dist/service.js +70 -15
- package/dist/skill.d.ts +0 -1
- package/dist/skill.js +12 -17
- package/dist/transform/filepreview.d.ts +2 -3
- package/dist/transform/filepreview.js +9 -26
- package/dist/transform/index.d.ts +2 -3
- package/dist/transform/index.js +2 -8
- package/dist/transform/prompts.d.ts +1 -34
- package/dist/transform/prompts.js +58 -43
- package/dist/transform/transform-dataset.agent.d.ts +20 -45
- package/dist/transform/transform-dataset.agent.js +146 -91
- package/dist/transform/transform-dataset.steps.d.ts +30 -0
- package/dist/transform/transform-dataset.steps.js +61 -0
- package/dist/transform/transform-dataset.types.d.ts +95 -0
- package/dist/transform/transform-dataset.types.js +1 -0
- package/dist/transform/transformDataset.d.ts +3 -3
- package/dist/transform/transformDataset.js +15 -18
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +35 -10
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeDataset.tool.d.ts.map +0 -1
- package/dist/completeDataset.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/dataset.d.ts.map +0 -1
- package/dist/dataset.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/eventsReactRuntime.d.ts +0 -22
- package/dist/eventsReactRuntime.d.ts.map +0 -1
- package/dist/eventsReactRuntime.js +0 -29
- package/dist/eventsReactRuntime.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/file-dataset.agent.d.ts.map +0 -1
- package/dist/file/file-dataset.agent.js.map +0 -1
- package/dist/file/filepreview.d.ts.map +0 -1
- package/dist/file/filepreview.js.map +0 -1
- package/dist/file/generateSchema.tool.d.ts.map +0 -1
- package/dist/file/generateSchema.tool.js.map +0 -1
- package/dist/file/index.d.ts.map +0 -1
- package/dist/file/index.js.map +0 -1
- package/dist/file/prompts.d.ts.map +0 -1
- package/dist/file/prompts.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/materializeDataset.tool.d.ts.map +0 -1
- package/dist/materializeDataset.tool.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/queryDomain.d.ts.map +0 -1
- package/dist/query/queryDomain.js.map +0 -1
- package/dist/query/queryDomain.step.d.ts.map +0 -1
- package/dist/query/queryDomain.step.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/sandbox.steps.d.ts.map +0 -1
- package/dist/sandbox.steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/skill.d.ts.map +0 -1
- package/dist/skill.js.map +0 -1
- package/dist/transform/filepreview.d.ts.map +0 -1
- package/dist/transform/filepreview.js.map +0 -1
- package/dist/transform/index.d.ts.map +0 -1
- package/dist/transform/index.js.map +0 -1
- package/dist/transform/prompts.d.ts.map +0 -1
- package/dist/transform/prompts.js.map +0 -1
- package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
- package/dist/transform/transform-dataset.agent.js.map +0 -1
- package/dist/transform/transformDataset.d.ts.map +0 -1
- package/dist/transform/transformDataset.js.map +0 -1
package/dist/skill.js
CHANGED
|
@@ -1,48 +1,44 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.buildDatasetSkillPackage = buildDatasetSkillPackage;
|
|
7
|
-
const node_fs_1 = require("node:fs");
|
|
8
|
-
const node_path_1 = __importDefault(require("node:path"));
|
|
1
|
+
import { readdirSync, readFileSync, statSync } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
9
4
|
function walkFiles(rootDir, currentDir = rootDir) {
|
|
10
|
-
const entries =
|
|
5
|
+
const entries = readdirSync(currentDir, { withFileTypes: true });
|
|
11
6
|
const files = [];
|
|
12
7
|
for (const entry of entries) {
|
|
13
|
-
const absPath =
|
|
8
|
+
const absPath = path.join(currentDir, entry.name);
|
|
14
9
|
if (entry.isDirectory()) {
|
|
15
10
|
files.push(...walkFiles(rootDir, absPath));
|
|
16
11
|
continue;
|
|
17
12
|
}
|
|
18
13
|
if (entry.isFile()) {
|
|
19
|
-
files.push(
|
|
14
|
+
files.push(path.relative(rootDir, absPath).replace(/\\/g, "/"));
|
|
20
15
|
}
|
|
21
16
|
}
|
|
22
17
|
return files;
|
|
23
18
|
}
|
|
24
19
|
function resolveDatasetSkillRoot() {
|
|
25
|
-
const
|
|
20
|
+
const currentDir = path.dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const fromDist = path.resolve(currentDir, "..", "skill");
|
|
26
22
|
if (statExists(fromDist))
|
|
27
23
|
return fromDist;
|
|
28
|
-
const fromSrc =
|
|
24
|
+
const fromSrc = path.resolve(currentDir, "..", "..", "skill");
|
|
29
25
|
if (statExists(fromSrc))
|
|
30
26
|
return fromSrc;
|
|
31
27
|
throw new Error("dataset_skill_root_not_found");
|
|
32
28
|
}
|
|
33
29
|
function statExists(candidate) {
|
|
34
30
|
try {
|
|
35
|
-
return
|
|
31
|
+
return statSync(candidate).isDirectory();
|
|
36
32
|
}
|
|
37
33
|
catch {
|
|
38
34
|
return false;
|
|
39
35
|
}
|
|
40
36
|
}
|
|
41
|
-
function buildDatasetSkillPackage() {
|
|
37
|
+
export function buildDatasetSkillPackage() {
|
|
42
38
|
const skillRoot = resolveDatasetSkillRoot();
|
|
43
39
|
const files = walkFiles(skillRoot).map((relativePath) => ({
|
|
44
40
|
path: relativePath,
|
|
45
|
-
contentBase64:
|
|
41
|
+
contentBase64: readFileSync(path.join(skillRoot, relativePath)).toString("base64"),
|
|
46
42
|
}));
|
|
47
43
|
return {
|
|
48
44
|
name: "dataset",
|
|
@@ -50,4 +46,3 @@ function buildDatasetSkillPackage() {
|
|
|
50
46
|
files,
|
|
51
47
|
};
|
|
52
48
|
}
|
|
53
|
-
//# sourceMappingURL=skill.js.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export type
|
|
1
|
+
export type TransformInputPreviewContext = {
|
|
2
2
|
totalRows: number;
|
|
3
3
|
metadata?: {
|
|
4
4
|
description: string;
|
|
@@ -18,6 +18,5 @@ export type TransformSourcePreviewContext = {
|
|
|
18
18
|
interface PreviewOptions {
|
|
19
19
|
headLines?: number;
|
|
20
20
|
}
|
|
21
|
-
export declare function
|
|
21
|
+
export declare function generateInputPreview(runtime: any, sandboxId: string, inputPath: string, datasetId: string, options?: PreviewOptions): Promise<TransformInputPreviewContext>;
|
|
22
22
|
export {};
|
|
23
|
-
//# sourceMappingURL=filepreview.d.ts.map
|
|
@@ -1,39 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.generateSourcePreview = generateSourcePreview;
|
|
4
|
-
const datasetFiles_1 = require("../datasetFiles");
|
|
5
|
-
const steps_1 = require("../sandbox/steps");
|
|
1
|
+
import { runDatasetSandboxCommandStep } from "../sandbox/steps.js";
|
|
6
2
|
const DEFAULT_HEAD_LINES = 50;
|
|
7
|
-
async function runPythonSnippet(
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
await (0, steps_1.writeDatasetSandboxFilesStep)({
|
|
11
|
-
env,
|
|
12
|
-
sandboxId,
|
|
13
|
-
files: [
|
|
14
|
-
{
|
|
15
|
-
path: scriptPath,
|
|
16
|
-
contentBase64: Buffer.from(code, "utf-8").toString("base64"),
|
|
17
|
-
},
|
|
18
|
-
],
|
|
19
|
-
});
|
|
20
|
-
const result = await (0, steps_1.runDatasetSandboxCommandStep)({
|
|
21
|
-
env,
|
|
3
|
+
async function runPythonSnippet(runtime, sandboxId, datasetId, scriptName, code, args, description) {
|
|
4
|
+
const result = await runDatasetSandboxCommandStep({
|
|
5
|
+
runtime,
|
|
22
6
|
sandboxId,
|
|
23
7
|
cmd: "python",
|
|
24
|
-
args: [
|
|
8
|
+
args: ["-c", code, ...args],
|
|
25
9
|
});
|
|
26
10
|
const stdout = result.stdout || "";
|
|
27
11
|
const stderr = result.stderr || "";
|
|
28
12
|
return {
|
|
29
13
|
description,
|
|
30
14
|
script: code,
|
|
31
|
-
command: `python
|
|
15
|
+
command: `python -c <${scriptName}.py> ${args.join(" ")}`,
|
|
32
16
|
stdout,
|
|
33
17
|
stderr,
|
|
34
18
|
};
|
|
35
19
|
}
|
|
36
|
-
async function
|
|
20
|
+
export async function generateInputPreview(runtime, sandboxId, inputPath, datasetId, options = {}) {
|
|
37
21
|
const context = {
|
|
38
22
|
totalRows: 0,
|
|
39
23
|
};
|
|
@@ -57,7 +41,7 @@ try:
|
|
|
57
41
|
except Exception as e:
|
|
58
42
|
print(str(e))
|
|
59
43
|
`;
|
|
60
|
-
const meta = await runPythonSnippet(
|
|
44
|
+
const meta = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_count", countScript, [inputPath], "Counts number of JSONL records with type='row'");
|
|
61
45
|
context.metadata = meta;
|
|
62
46
|
try {
|
|
63
47
|
if (meta.stdout) {
|
|
@@ -92,8 +76,7 @@ try:
|
|
|
92
76
|
except Exception as e:
|
|
93
77
|
print(str(e))
|
|
94
78
|
`;
|
|
95
|
-
const head = await runPythonSnippet(
|
|
79
|
+
const head = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_head", headScript, [inputPath, String(headLines)], `Reads the first ${headLines} JSONL row records`);
|
|
96
80
|
context.head = head;
|
|
97
81
|
return context;
|
|
98
82
|
}
|
|
99
|
-
//# sourceMappingURL=filepreview.js.map
|
|
@@ -1,3 +1,2 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export { transformDataset, type TransformDatasetInput, type TransformDatasetResult, } from "./transformDataset";
|
|
3
|
-
//# sourceMappingURL=index.d.ts.map
|
|
1
|
+
export { createTransformDatasetContext, registerTransformDatasetContext, type TransformDatasetAgentParams, type TransformDatasetContext, type TransformDatasetRunOptions, } from "./transform-dataset.agent.js";
|
|
2
|
+
export { transformDataset, type TransformDatasetInput, type TransformDatasetResult, } from "./transformDataset.js";
|
package/dist/transform/index.js
CHANGED
|
@@ -1,8 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
exports.transformDataset = exports.createTransformDatasetStory = void 0;
|
|
4
|
-
var transform_dataset_agent_1 = require("./transform-dataset.agent");
|
|
5
|
-
Object.defineProperty(exports, "createTransformDatasetStory", { enumerable: true, get: function () { return transform_dataset_agent_1.createTransformDatasetStory; } });
|
|
6
|
-
var transformDataset_1 = require("./transformDataset");
|
|
7
|
-
Object.defineProperty(exports, "transformDataset", { enumerable: true, get: function () { return transformDataset_1.transformDataset; } });
|
|
8
|
-
//# sourceMappingURL=index.js.map
|
|
1
|
+
export { createTransformDatasetContext, registerTransformDatasetContext, } from "./transform-dataset.agent.js";
|
|
2
|
+
export { transformDataset, } from "./transformDataset.js";
|
|
@@ -1,35 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
datasetId: string;
|
|
3
|
-
sourceDatasetIds: string[];
|
|
4
|
-
outputSchema: any;
|
|
5
|
-
sandboxConfig: {
|
|
6
|
-
sourcePaths: Array<{
|
|
7
|
-
datasetId: string;
|
|
8
|
-
path: string;
|
|
9
|
-
}>;
|
|
10
|
-
outputPath: string;
|
|
11
|
-
};
|
|
12
|
-
sourcePreviews?: Array<{
|
|
13
|
-
datasetId: string;
|
|
14
|
-
preview: {
|
|
15
|
-
totalRows: number;
|
|
16
|
-
metadata?: {
|
|
17
|
-
description: string;
|
|
18
|
-
script: string;
|
|
19
|
-
command: string;
|
|
20
|
-
stdout: string;
|
|
21
|
-
stderr: string;
|
|
22
|
-
};
|
|
23
|
-
head?: {
|
|
24
|
-
description: string;
|
|
25
|
-
script: string;
|
|
26
|
-
command: string;
|
|
27
|
-
stdout: string;
|
|
28
|
-
stderr: string;
|
|
29
|
-
};
|
|
30
|
-
};
|
|
31
|
-
}>;
|
|
32
|
-
errors: string[];
|
|
33
|
-
};
|
|
1
|
+
import type { TransformPromptContext } from "./transform-dataset.types.js";
|
|
34
2
|
export declare function buildTransformDatasetPrompt(context: TransformPromptContext): string;
|
|
35
|
-
//# sourceMappingURL=prompts.d.ts.map
|
|
@@ -1,45 +1,48 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.buildTransformDatasetPrompt = buildTransformDatasetPrompt;
|
|
4
|
-
const xmlbuilder2_1 = require("xmlbuilder2");
|
|
1
|
+
import { create } from "xmlbuilder2";
|
|
5
2
|
function buildRole() {
|
|
6
|
-
let xml =
|
|
3
|
+
let xml = create()
|
|
7
4
|
.ele("Role")
|
|
8
|
-
.txt("You are a dataset transformer. Your goal is to read one or more
|
|
5
|
+
.txt("You are a dataset transformer. Your goal is to read one or more input datasets/resources and produce a NEW dataset whose records conform exactly to the provided output schema.")
|
|
9
6
|
.up();
|
|
10
7
|
return xml.end({ prettyPrint: true, headless: true });
|
|
11
8
|
}
|
|
12
9
|
function buildGoal() {
|
|
13
|
-
let xml =
|
|
10
|
+
let xml = create()
|
|
14
11
|
.ele("Goal")
|
|
15
|
-
.txt("Transform the
|
|
12
|
+
.txt("Transform the input dataset(s) into a new dataset strictly matching the output schema. Use the lowest-cost direct completion tool that can produce the correct output. Use sandbox command execution only when commands are necessary to inspect, parse, aggregate, join, or compute over files/resources that cannot be handled directly from the visible context and previews.")
|
|
16
13
|
.up();
|
|
17
14
|
return xml.end({ prettyPrint: true, headless: true });
|
|
18
15
|
}
|
|
19
16
|
function buildContextSection(context) {
|
|
20
|
-
let xml =
|
|
17
|
+
let xml = create()
|
|
21
18
|
.ele("Context")
|
|
22
19
|
.ele("DatasetId").txt(context.datasetId).up();
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
20
|
+
if (context.contextResources && context.contextResources.length > 0) {
|
|
21
|
+
let resourcesXml = create().ele("ContextResources");
|
|
22
|
+
for (const resource of context.contextResources) {
|
|
23
|
+
resourcesXml = resourcesXml
|
|
24
|
+
.ele("Resource")
|
|
25
|
+
.ele("Key").txt(String(resource.key)).up()
|
|
26
|
+
.ele("Type").txt(String(resource.type)).up()
|
|
27
|
+
.ele("Name").txt(String(resource.name)).up()
|
|
28
|
+
.ele("Description").txt(String(resource.description)).up()
|
|
29
|
+
.ele("DescriptorJson").txt(JSON.stringify(resource, null, 2)).up()
|
|
30
|
+
.up();
|
|
31
|
+
}
|
|
32
|
+
xml = xml.import(resourcesXml.first());
|
|
34
33
|
}
|
|
34
|
+
let sandboxXml = create().ele("Sandbox");
|
|
35
|
+
sandboxXml = sandboxXml.ele("ContextResourcesPath").txt("/tmp/ekairos/contexts/{contextId}/resources").up();
|
|
36
|
+
sandboxXml = sandboxXml.ele("ResourcesManifest").txt("/tmp/ekairos/contexts/{contextId}/resources/manifest.json").up();
|
|
35
37
|
sandboxXml = sandboxXml.ele("OutputPath").txt(context.sandboxConfig.outputPath).up();
|
|
38
|
+
sandboxXml = sandboxXml.ele("Note").txt("Context resources are materialized lazily only when executeCommand is called. Do not assume resource files exist unless you are using executeCommand. If executeCommand is used, read the manifest path from os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] inside Python.").up();
|
|
36
39
|
xml = xml.import(sandboxXml.first());
|
|
37
|
-
if (context.
|
|
38
|
-
let previewsXml =
|
|
39
|
-
for (const
|
|
40
|
-
const sp =
|
|
41
|
-
let px =
|
|
42
|
-
.ele("DatasetId").txt(
|
|
40
|
+
if (context.inputPreviews && context.inputPreviews.length > 0) {
|
|
41
|
+
let previewsXml = create().ele("InputPreviews");
|
|
42
|
+
for (const inputPreviewInfo of context.inputPreviews) {
|
|
43
|
+
const sp = inputPreviewInfo.preview;
|
|
44
|
+
let px = create().ele("InputPreview")
|
|
45
|
+
.ele("DatasetId").txt(inputPreviewInfo.datasetId).up()
|
|
43
46
|
.ele("TotalRows").txt(String(sp.totalRows)).up();
|
|
44
47
|
if (sp.metadata) {
|
|
45
48
|
const m = sp.metadata;
|
|
@@ -71,7 +74,7 @@ function buildContextSection(context) {
|
|
|
71
74
|
xml = xml.import(previewsXml.first());
|
|
72
75
|
}
|
|
73
76
|
if (Array.isArray(context.errors) && context.errors.length > 0) {
|
|
74
|
-
let ex =
|
|
77
|
+
let ex = create().ele("PreviousErrors");
|
|
75
78
|
for (const e of context.errors) {
|
|
76
79
|
ex = ex.ele("Error").txt(e).up();
|
|
77
80
|
}
|
|
@@ -81,7 +84,7 @@ function buildContextSection(context) {
|
|
|
81
84
|
return xml.end({ prettyPrint: true, headless: true });
|
|
82
85
|
}
|
|
83
86
|
function buildOutputSchemaSection(context) {
|
|
84
|
-
let xml =
|
|
87
|
+
let xml = create()
|
|
85
88
|
.ele("OutputSchema")
|
|
86
89
|
.ele("JsonSchema").txt(JSON.stringify(context.outputSchema?.schema ?? context.outputSchema ?? {}, null, 2)).up()
|
|
87
90
|
.up();
|
|
@@ -89,39 +92,52 @@ function buildOutputSchemaSection(context) {
|
|
|
89
92
|
}
|
|
90
93
|
function buildInstructions(context) {
|
|
91
94
|
const outputPath = context.sandboxConfig.outputPath;
|
|
92
|
-
const
|
|
93
|
-
? "You have multiple
|
|
95
|
+
const multipleInputsNote = (context.contextResources?.length ?? context.inputDatasetIds.length) > 1
|
|
96
|
+
? "You have multiple context resources available. You may need to read, join, filter, or combine data from them to produce the output."
|
|
94
97
|
: "";
|
|
95
|
-
let xml =
|
|
98
|
+
let xml = create()
|
|
96
99
|
.ele("Instructions")
|
|
97
100
|
.ele("Workflow")
|
|
98
|
-
.ele("Step", { number: "1", name: "Inspect
|
|
99
|
-
.ele("Action").txt(`Review
|
|
101
|
+
.ele("Step", { number: "1", name: "Inspect Inputs" })
|
|
102
|
+
.ele("Action").txt(`Review ContextResources and any InputPreviews to understand current record structures, evidence, fields, shapes and edge cases. ${multipleInputsNote}`).up()
|
|
103
|
+
.ele("Note").txt("ContextResources DescriptorJson may include inline text, metadata, previewRows, or other visible evidence. Treat that visible content as already available context. Do not use executeCommand only to reread it.").up()
|
|
100
104
|
.up()
|
|
101
105
|
.ele("Step", { number: "2", name: "Plan Mapping" })
|
|
102
|
-
.ele("Action").txt("Plan a deterministic mapping from
|
|
103
|
-
.ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple
|
|
106
|
+
.ele("Action").txt("Plan a deterministic mapping from input data fields to the output schema fields (normalize names, types, and formats).").up()
|
|
107
|
+
.ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple inputs, decide how to combine or relate them. Output field names must remain exactly as declared by the output schema.").up()
|
|
104
108
|
.up()
|
|
105
109
|
.ele("Step", { number: "3", name: "Transform" })
|
|
106
|
-
.ele("Action").txt("
|
|
107
|
-
.ele("Requirement").txt(
|
|
110
|
+
.ele("Action").txt("For single-object output, use completeObject with the final object. For row output, use replaceRows with the final rows. Use executeCommand only when command execution is necessary, not merely convenient.").up()
|
|
111
|
+
.ele("Requirement").txt("Do not call completeObject until you have constructed the complete data object. completeObject requires data; a summary-only call is invalid and wastes a model iteration.").up()
|
|
112
|
+
.ele("Requirement").txt("Command execution is necessary only when the final output cannot be produced directly from the provided context, resource descriptors, or previews, and requires running code to inspect, parse, aggregate, join, or compute over files/resources.").up()
|
|
113
|
+
.ele("Requirement").txt("If the final output can be written directly from context already visible to you, do not use executeCommand. Do not use executeCommand just to format JSON, build an object, write output.jsonl, or make completion easier.").up()
|
|
114
|
+
.ele("Requirement").txt("Before using executeCommand, verify that direct completion is insufficient: you need file/resource contents not already visible in DescriptorJson or previews, deterministic computation over many rows, parsing/aggregation that is unreliable to do directly, or output too large/repetitive for direct completion. If none apply, command execution is not needed.").up()
|
|
115
|
+
.ele("Requirement").txt("When using executeCommand, provide commandDescription before the script runs. It must describe the inputs/resources used, operation performed, expected output, and why a command is the right tool.").up()
|
|
116
|
+
.ele("Requirement").txt("When executeCommand is used, context resources are materialized before the script runs at /tmp/ekairos/contexts/{contextId}/resources. The Python process receives EKAIROS_CONTEXT_RESOURCES_DIR and EKAIROS_CONTEXT_RESOURCES_MANIFEST environment variables. Read os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] inside the script to discover exact files and metadata. Manifest entries expose files as resource['files'][index]['path'].").up()
|
|
117
|
+
.ele("Requirement").txt("If only some resources are needed for a command, pass resourceKeys with the specific ContextResources keys. Omit resourceKeys only when the script truly needs all resources.").up()
|
|
118
|
+
.ele("Requirement").txt(`If executeCommand is used, write file to: ${outputPath}`).up()
|
|
119
|
+
.ele("Requirement").txt("Every data object MUST use the exact property names from OutputSchema required/properties keys. Do not translate, localize, rename, or infer alternative field names.").up()
|
|
108
120
|
.ele("Requirement").txt("Do not print large data to stdout; only progress and summaries.").up()
|
|
121
|
+
.ele("Requirement").txt("Do not install packages, download dependencies, or access the network from executeCommand. Use only the available runtime and standard library unless a dependency is already present.").up()
|
|
109
122
|
.up()
|
|
110
123
|
.ele("Step", { number: "4", name: "Validate and Complete" })
|
|
111
|
-
.ele("Action").txt("
|
|
124
|
+
.ele("Action").txt("When using completeObject or replaceRows, no separate completeDataset call is needed. When using executeCommand, call completeDataset to validate against the output schema and mark as completed.").up()
|
|
125
|
+
.ele("Behavior").txt("If any completion tool returns success:false, inspect validation details, repair the output, and call the appropriate completion tool again. Do not stop until a completion tool returns success:true.").up()
|
|
112
126
|
.up()
|
|
113
127
|
.up()
|
|
114
128
|
.ele("Rules")
|
|
115
129
|
.ele("Rule").txt("Output must strictly match the output schema for each record in data.").up()
|
|
116
|
-
.ele("Rule").txt("
|
|
130
|
+
.ele("Rule").txt("OutputSchema property names are authoritative. Field names are a technical contract; only field values may preserve input language.").up()
|
|
131
|
+
.ele("Rule").txt("Use the cheapest correct tool. completeObject and replaceRows are low-cost completion tools. executeCommand is a high-cost computation tool and requires an explicit commandDescription.").up()
|
|
132
|
+
.ele("Rule").txt("If using output.jsonl, each line must be a standalone JSON object with {type:'row', data:{...}}.").up()
|
|
117
133
|
.ele("Rule").txt("Do not include headers, summaries, or metadata as records.").up()
|
|
118
|
-
.ele("Rule").txt("Be robust to malformed lines in
|
|
134
|
+
.ele("Rule").txt("Be robust to malformed lines in input: skip or sanitize, but do not crash.").up()
|
|
119
135
|
.up()
|
|
120
|
-
.ele("CurrentTask").txt("Transform
|
|
136
|
+
.ele("CurrentTask").txt("Transform input dataset(s) to match OutputSchema and complete the dataset with the appropriate available tool.").up()
|
|
121
137
|
.up();
|
|
122
138
|
return xml.end({ prettyPrint: true, headless: true });
|
|
123
139
|
}
|
|
124
|
-
function buildTransformDatasetPrompt(context) {
|
|
140
|
+
export function buildTransformDatasetPrompt(context) {
|
|
125
141
|
const sections = [];
|
|
126
142
|
sections.push(buildRole());
|
|
127
143
|
sections.push("");
|
|
@@ -134,4 +150,3 @@ function buildTransformDatasetPrompt(context) {
|
|
|
134
150
|
sections.push(buildInstructions(context));
|
|
135
151
|
return sections.join("\n");
|
|
136
152
|
}
|
|
137
|
-
//# sourceMappingURL=prompts.js.map
|
|
@@ -1,59 +1,34 @@
|
|
|
1
1
|
import { type ContextReactor } from "@ekairos/events";
|
|
2
|
-
import {
|
|
3
|
-
export type TransformDatasetContext
|
|
4
|
-
|
|
5
|
-
sourceDatasetIds: string[];
|
|
6
|
-
outputSchema: any;
|
|
7
|
-
sandboxConfig: {
|
|
8
|
-
sourcePaths: Array<{
|
|
9
|
-
datasetId: string;
|
|
10
|
-
path: string;
|
|
11
|
-
}>;
|
|
12
|
-
outputPath: string;
|
|
13
|
-
};
|
|
14
|
-
sourcePreviews?: Array<{
|
|
15
|
-
datasetId: string;
|
|
16
|
-
preview: TransformSourcePreviewContext;
|
|
17
|
-
}>;
|
|
18
|
-
errors: string[];
|
|
19
|
-
iterationCount: number;
|
|
20
|
-
instructions?: string;
|
|
21
|
-
};
|
|
22
|
-
export type TransformDatasetAgentParams = {
|
|
23
|
-
sourceDatasetIds: string[];
|
|
24
|
-
outputSchema: any;
|
|
25
|
-
instructions?: string;
|
|
26
|
-
datasetId?: string;
|
|
27
|
-
model?: string;
|
|
28
|
-
sandboxId?: string;
|
|
29
|
-
reactor?: ContextReactor<any, any>;
|
|
30
|
-
};
|
|
31
|
-
export type TransformDatasetResult = {
|
|
32
|
-
id: string;
|
|
33
|
-
status?: string;
|
|
34
|
-
title?: string;
|
|
35
|
-
schema?: any;
|
|
36
|
-
analysis?: any;
|
|
37
|
-
calculatedTotalRows?: number;
|
|
38
|
-
actualGeneratedRowCount?: number;
|
|
39
|
-
createdAt?: number;
|
|
40
|
-
updatedAt?: number;
|
|
41
|
-
};
|
|
42
|
-
export declare function createTransformDatasetStory<Env extends {
|
|
2
|
+
import type { TransformDatasetRunOptions, TransformSandboxState, TransformInputPreviewContext } from "./transform-dataset.types.js";
|
|
3
|
+
export type { TransformDatasetAgentParams, TransformDatasetContext, TransformDatasetResult, TransformDatasetRunOptions, TransformPromptContext, TransformSandboxState, } from "./transform-dataset.types.js";
|
|
4
|
+
export declare function createTransformDatasetContext<Env extends {
|
|
43
5
|
orgId: string;
|
|
44
6
|
}>(params: {
|
|
45
|
-
|
|
7
|
+
inputDatasetIds: string[];
|
|
46
8
|
outputSchema: any;
|
|
47
9
|
instructions?: string;
|
|
48
10
|
datasetId?: string;
|
|
49
11
|
model?: string;
|
|
50
12
|
sandboxId?: string;
|
|
51
13
|
reactor?: ContextReactor<any, any>;
|
|
14
|
+
sandboxState?: TransformSandboxState;
|
|
15
|
+
inputPreviews?: Array<{
|
|
16
|
+
datasetId: string;
|
|
17
|
+
preview: TransformInputPreviewContext;
|
|
18
|
+
}>;
|
|
19
|
+
contextResources?: any[];
|
|
52
20
|
}): {
|
|
53
21
|
datasetId: string;
|
|
54
|
-
transform(
|
|
22
|
+
transform(runtime: {
|
|
23
|
+
env: Env;
|
|
24
|
+
}, options?: TransformDatasetRunOptions): Promise<{
|
|
55
25
|
datasetId: string;
|
|
56
26
|
}>;
|
|
57
|
-
|
|
27
|
+
context: any;
|
|
58
28
|
};
|
|
59
|
-
|
|
29
|
+
export declare function registerTransformDatasetContext<Env extends {
|
|
30
|
+
orgId: string;
|
|
31
|
+
}>(opts?: {
|
|
32
|
+
model?: string;
|
|
33
|
+
reactor?: ContextReactor<any, any>;
|
|
34
|
+
}): void;
|