@ekairos/dataset 1.22.39-beta.development.0 → 1.22.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +347 -0
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/context.d.ts +15 -0
- package/dist/builder/context.js +251 -0
- package/dist/builder/instructions.d.ts +5 -0
- package/dist/builder/instructions.js +40 -0
- package/dist/builder/materialize.d.ts +83 -0
- package/dist/builder/materialize.js +548 -0
- package/dist/builder/materializeQuery.d.ts +12 -0
- package/dist/builder/materializeQuery.js +31 -0
- package/dist/builder/persistence.d.ts +22 -0
- package/dist/builder/persistence.js +192 -0
- package/dist/builder/rows.d.ts +7 -0
- package/dist/builder/rows.js +56 -0
- package/dist/builder/schemaInference.d.ts +3 -0
- package/dist/builder/schemaInference.js +61 -0
- package/dist/builder/types.d.ts +144 -0
- package/dist/builder/types.js +1 -0
- package/dist/clearDataset.tool.d.ts +2 -3
- package/dist/clearDataset.tool.js +13 -17
- package/dist/completeDataset.steps.d.ts +117 -0
- package/dist/completeDataset.steps.js +537 -0
- package/dist/completeDataset.tool.d.ts +132 -7
- package/dist/completeDataset.tool.js +46 -192
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +79 -0
- package/dist/contextWorkspace.js +234 -0
- package/dist/dataset/steps.d.ts +39 -15
- package/dist/dataset/steps.js +96 -39
- package/dist/dataset.d.ts +3 -67
- package/dist/dataset.js +129 -521
- package/dist/datasetFiles.d.ts +5 -1
- package/dist/datasetFiles.js +29 -27
- package/dist/defineNotation.tool.d.ts +49 -0
- package/dist/defineNotation.tool.js +154 -0
- package/dist/domain.d.ts +1 -2
- package/dist/domain.js +1 -6
- package/dist/executeCommand.tool.d.ts +2 -30
- package/dist/executeCommand.tool.js +165 -39
- package/dist/file/file-dataset.agent.d.ts +19 -56
- package/dist/file/file-dataset.agent.js +181 -134
- package/dist/file/file-dataset.steps.d.ts +27 -0
- package/dist/file/file-dataset.steps.js +47 -0
- package/dist/file/file-dataset.types.d.ts +64 -0
- package/dist/file/file-dataset.types.js +1 -0
- package/dist/file/filepreview.d.ts +5 -35
- package/dist/file/filepreview.js +60 -107
- package/dist/file/filepreview.types.d.ts +31 -0
- package/dist/file/filepreview.types.js +1 -0
- package/dist/file/generateSchema.tool.d.ts +2 -3
- package/dist/file/generateSchema.tool.js +11 -15
- package/dist/file/index.d.ts +1 -2
- package/dist/file/index.js +1 -18
- package/dist/file/prompts.d.ts +2 -3
- package/dist/file/prompts.js +152 -32
- package/dist/file/scripts.generated.d.ts +1 -0
- package/dist/file/scripts.generated.js +11 -0
- package/dist/file/steps.d.ts +1 -2
- package/dist/file/steps.js +9 -7
- package/dist/id.d.ts +1 -0
- package/dist/id.js +10 -0
- package/dist/index.d.ts +9 -7
- package/dist/index.js +9 -23
- package/dist/materializeDataset.tool.d.ts +51 -31
- package/dist/materializeDataset.tool.js +81 -65
- package/dist/notation.d.ts +205 -0
- package/dist/notation.js +424 -0
- package/dist/query/index.d.ts +1 -2
- package/dist/query/index.js +1 -18
- package/dist/query/queryDomain.d.ts +3 -4
- package/dist/query/queryDomain.js +3 -40
- package/dist/query/queryDomain.step.d.ts +1 -1
- package/dist/query/queryDomain.step.js +24 -13
- package/dist/sandbox/steps.d.ts +23 -15
- package/dist/sandbox/steps.js +73 -76
- package/dist/sandbox.steps.d.ts +1 -2
- package/dist/sandbox.steps.js +1 -18
- package/dist/schema.d.ts +15 -13
- package/dist/schema.js +27 -37
- package/dist/service.d.ts +12 -5
- package/dist/service.js +88 -15
- package/dist/skill.d.ts +0 -1
- package/dist/skill.js +12 -17
- package/dist/transform/filepreview.d.ts +2 -3
- package/dist/transform/filepreview.js +9 -26
- package/dist/transform/index.d.ts +2 -3
- package/dist/transform/index.js +2 -8
- package/dist/transform/prompts.d.ts +1 -34
- package/dist/transform/prompts.js +66 -46
- package/dist/transform/transform-dataset.agent.d.ts +20 -45
- package/dist/transform/transform-dataset.agent.js +151 -91
- package/dist/transform/transform-dataset.steps.d.ts +30 -0
- package/dist/transform/transform-dataset.steps.js +61 -0
- package/dist/transform/transform-dataset.types.d.ts +95 -0
- package/dist/transform/transform-dataset.types.js +1 -0
- package/dist/transform/transformDataset.d.ts +3 -3
- package/dist/transform/transformDataset.js +15 -18
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +33 -8
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeDataset.tool.d.ts.map +0 -1
- package/dist/completeDataset.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/dataset.d.ts.map +0 -1
- package/dist/dataset.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/eventsReactRuntime.d.ts +0 -22
- package/dist/eventsReactRuntime.d.ts.map +0 -1
- package/dist/eventsReactRuntime.js +0 -29
- package/dist/eventsReactRuntime.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/file-dataset.agent.d.ts.map +0 -1
- package/dist/file/file-dataset.agent.js.map +0 -1
- package/dist/file/filepreview.d.ts.map +0 -1
- package/dist/file/filepreview.js.map +0 -1
- package/dist/file/generateSchema.tool.d.ts.map +0 -1
- package/dist/file/generateSchema.tool.js.map +0 -1
- package/dist/file/index.d.ts.map +0 -1
- package/dist/file/index.js.map +0 -1
- package/dist/file/prompts.d.ts.map +0 -1
- package/dist/file/prompts.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/materializeDataset.tool.d.ts.map +0 -1
- package/dist/materializeDataset.tool.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/queryDomain.d.ts.map +0 -1
- package/dist/query/queryDomain.js.map +0 -1
- package/dist/query/queryDomain.step.d.ts.map +0 -1
- package/dist/query/queryDomain.step.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/sandbox.steps.d.ts.map +0 -1
- package/dist/sandbox.steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/skill.d.ts.map +0 -1
- package/dist/skill.js.map +0 -1
- package/dist/transform/filepreview.d.ts.map +0 -1
- package/dist/transform/filepreview.js.map +0 -1
- package/dist/transform/index.d.ts.map +0 -1
- package/dist/transform/index.js.map +0 -1
- package/dist/transform/prompts.d.ts.map +0 -1
- package/dist/transform/prompts.js.map +0 -1
- package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
- package/dist/transform/transform-dataset.agent.js.map +0 -1
- package/dist/transform/transformDataset.d.ts.map +0 -1
- package/dist/transform/transformDataset.js.map +0 -1
package/dist/skill.js
CHANGED
|
@@ -1,48 +1,44 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.buildDatasetSkillPackage = buildDatasetSkillPackage;
|
|
7
|
-
const node_fs_1 = require("node:fs");
|
|
8
|
-
const node_path_1 = __importDefault(require("node:path"));
|
|
1
|
+
import { readdirSync, readFileSync, statSync } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
9
4
|
function walkFiles(rootDir, currentDir = rootDir) {
|
|
10
|
-
const entries =
|
|
5
|
+
const entries = readdirSync(currentDir, { withFileTypes: true });
|
|
11
6
|
const files = [];
|
|
12
7
|
for (const entry of entries) {
|
|
13
|
-
const absPath =
|
|
8
|
+
const absPath = path.join(currentDir, entry.name);
|
|
14
9
|
if (entry.isDirectory()) {
|
|
15
10
|
files.push(...walkFiles(rootDir, absPath));
|
|
16
11
|
continue;
|
|
17
12
|
}
|
|
18
13
|
if (entry.isFile()) {
|
|
19
|
-
files.push(
|
|
14
|
+
files.push(path.relative(rootDir, absPath).replace(/\\/g, "/"));
|
|
20
15
|
}
|
|
21
16
|
}
|
|
22
17
|
return files;
|
|
23
18
|
}
|
|
24
19
|
function resolveDatasetSkillRoot() {
|
|
25
|
-
const
|
|
20
|
+
const currentDir = path.dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const fromDist = path.resolve(currentDir, "..", "skill");
|
|
26
22
|
if (statExists(fromDist))
|
|
27
23
|
return fromDist;
|
|
28
|
-
const fromSrc =
|
|
24
|
+
const fromSrc = path.resolve(currentDir, "..", "..", "skill");
|
|
29
25
|
if (statExists(fromSrc))
|
|
30
26
|
return fromSrc;
|
|
31
27
|
throw new Error("dataset_skill_root_not_found");
|
|
32
28
|
}
|
|
33
29
|
function statExists(candidate) {
|
|
34
30
|
try {
|
|
35
|
-
return
|
|
31
|
+
return statSync(candidate).isDirectory();
|
|
36
32
|
}
|
|
37
33
|
catch {
|
|
38
34
|
return false;
|
|
39
35
|
}
|
|
40
36
|
}
|
|
41
|
-
function buildDatasetSkillPackage() {
|
|
37
|
+
export function buildDatasetSkillPackage() {
|
|
42
38
|
const skillRoot = resolveDatasetSkillRoot();
|
|
43
39
|
const files = walkFiles(skillRoot).map((relativePath) => ({
|
|
44
40
|
path: relativePath,
|
|
45
|
-
contentBase64:
|
|
41
|
+
contentBase64: readFileSync(path.join(skillRoot, relativePath)).toString("base64"),
|
|
46
42
|
}));
|
|
47
43
|
return {
|
|
48
44
|
name: "dataset",
|
|
@@ -50,4 +46,3 @@ function buildDatasetSkillPackage() {
|
|
|
50
46
|
files,
|
|
51
47
|
};
|
|
52
48
|
}
|
|
53
|
-
//# sourceMappingURL=skill.js.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export type
|
|
1
|
+
export type TransformInputPreviewContext = {
|
|
2
2
|
totalRows: number;
|
|
3
3
|
metadata?: {
|
|
4
4
|
description: string;
|
|
@@ -18,6 +18,5 @@ export type TransformSourcePreviewContext = {
|
|
|
18
18
|
interface PreviewOptions {
|
|
19
19
|
headLines?: number;
|
|
20
20
|
}
|
|
21
|
-
export declare function
|
|
21
|
+
export declare function generateInputPreview(runtime: any, sandboxId: string, inputPath: string, datasetId: string, options?: PreviewOptions): Promise<TransformInputPreviewContext>;
|
|
22
22
|
export {};
|
|
23
|
-
//# sourceMappingURL=filepreview.d.ts.map
|
|
@@ -1,39 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.generateSourcePreview = generateSourcePreview;
|
|
4
|
-
const datasetFiles_1 = require("../datasetFiles");
|
|
5
|
-
const steps_1 = require("../sandbox/steps");
|
|
1
|
+
import { runDatasetSandboxCommandStep } from "../sandbox/steps.js";
|
|
6
2
|
const DEFAULT_HEAD_LINES = 50;
|
|
7
|
-
async function runPythonSnippet(
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
await (0, steps_1.writeDatasetSandboxFilesStep)({
|
|
11
|
-
env,
|
|
12
|
-
sandboxId,
|
|
13
|
-
files: [
|
|
14
|
-
{
|
|
15
|
-
path: scriptPath,
|
|
16
|
-
contentBase64: Buffer.from(code, "utf-8").toString("base64"),
|
|
17
|
-
},
|
|
18
|
-
],
|
|
19
|
-
});
|
|
20
|
-
const result = await (0, steps_1.runDatasetSandboxCommandStep)({
|
|
21
|
-
env,
|
|
3
|
+
async function runPythonSnippet(runtime, sandboxId, datasetId, scriptName, code, args, description) {
|
|
4
|
+
const result = await runDatasetSandboxCommandStep({
|
|
5
|
+
runtime,
|
|
22
6
|
sandboxId,
|
|
23
7
|
cmd: "python",
|
|
24
|
-
args: [
|
|
8
|
+
args: ["-c", code, ...args],
|
|
25
9
|
});
|
|
26
10
|
const stdout = result.stdout || "";
|
|
27
11
|
const stderr = result.stderr || "";
|
|
28
12
|
return {
|
|
29
13
|
description,
|
|
30
14
|
script: code,
|
|
31
|
-
command: `python
|
|
15
|
+
command: `python -c <${scriptName}.py> ${args.join(" ")}`,
|
|
32
16
|
stdout,
|
|
33
17
|
stderr,
|
|
34
18
|
};
|
|
35
19
|
}
|
|
36
|
-
async function
|
|
20
|
+
export async function generateInputPreview(runtime, sandboxId, inputPath, datasetId, options = {}) {
|
|
37
21
|
const context = {
|
|
38
22
|
totalRows: 0,
|
|
39
23
|
};
|
|
@@ -57,7 +41,7 @@ try:
|
|
|
57
41
|
except Exception as e:
|
|
58
42
|
print(str(e))
|
|
59
43
|
`;
|
|
60
|
-
const meta = await runPythonSnippet(
|
|
44
|
+
const meta = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_count", countScript, [inputPath], "Counts number of JSONL records with type='row'");
|
|
61
45
|
context.metadata = meta;
|
|
62
46
|
try {
|
|
63
47
|
if (meta.stdout) {
|
|
@@ -92,8 +76,7 @@ try:
|
|
|
92
76
|
except Exception as e:
|
|
93
77
|
print(str(e))
|
|
94
78
|
`;
|
|
95
|
-
const head = await runPythonSnippet(
|
|
79
|
+
const head = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_head", headScript, [inputPath, String(headLines)], `Reads the first ${headLines} JSONL row records`);
|
|
96
80
|
context.head = head;
|
|
97
81
|
return context;
|
|
98
82
|
}
|
|
99
|
-
//# sourceMappingURL=filepreview.js.map
|
|
@@ -1,3 +1,2 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export { transformDataset, type TransformDatasetInput, type TransformDatasetResult, } from "./transformDataset";
|
|
3
|
-
//# sourceMappingURL=index.d.ts.map
|
|
1
|
+
export { createTransformDatasetContext, registerTransformDatasetContext, type TransformDatasetAgentParams, type TransformDatasetContext, type TransformDatasetRunOptions, } from "./transform-dataset.agent.js";
|
|
2
|
+
export { transformDataset, type TransformDatasetInput, type TransformDatasetResult, } from "./transformDataset.js";
|
package/dist/transform/index.js
CHANGED
|
@@ -1,8 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
exports.transformDataset = exports.createTransformDatasetStory = void 0;
|
|
4
|
-
var transform_dataset_agent_1 = require("./transform-dataset.agent");
|
|
5
|
-
Object.defineProperty(exports, "createTransformDatasetStory", { enumerable: true, get: function () { return transform_dataset_agent_1.createTransformDatasetStory; } });
|
|
6
|
-
var transformDataset_1 = require("./transformDataset");
|
|
7
|
-
Object.defineProperty(exports, "transformDataset", { enumerable: true, get: function () { return transformDataset_1.transformDataset; } });
|
|
8
|
-
//# sourceMappingURL=index.js.map
|
|
1
|
+
export { createTransformDatasetContext, registerTransformDatasetContext, } from "./transform-dataset.agent.js";
|
|
2
|
+
export { transformDataset, } from "./transformDataset.js";
|
|
@@ -1,35 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
datasetId: string;
|
|
3
|
-
sourceDatasetIds: string[];
|
|
4
|
-
outputSchema: any;
|
|
5
|
-
sandboxConfig: {
|
|
6
|
-
sourcePaths: Array<{
|
|
7
|
-
datasetId: string;
|
|
8
|
-
path: string;
|
|
9
|
-
}>;
|
|
10
|
-
outputPath: string;
|
|
11
|
-
};
|
|
12
|
-
sourcePreviews?: Array<{
|
|
13
|
-
datasetId: string;
|
|
14
|
-
preview: {
|
|
15
|
-
totalRows: number;
|
|
16
|
-
metadata?: {
|
|
17
|
-
description: string;
|
|
18
|
-
script: string;
|
|
19
|
-
command: string;
|
|
20
|
-
stdout: string;
|
|
21
|
-
stderr: string;
|
|
22
|
-
};
|
|
23
|
-
head?: {
|
|
24
|
-
description: string;
|
|
25
|
-
script: string;
|
|
26
|
-
command: string;
|
|
27
|
-
stdout: string;
|
|
28
|
-
stderr: string;
|
|
29
|
-
};
|
|
30
|
-
};
|
|
31
|
-
}>;
|
|
32
|
-
errors: string[];
|
|
33
|
-
};
|
|
1
|
+
import type { TransformPromptContext } from "./transform-dataset.types.js";
|
|
34
2
|
export declare function buildTransformDatasetPrompt(context: TransformPromptContext): string;
|
|
35
|
-
//# sourceMappingURL=prompts.d.ts.map
|
|
@@ -1,45 +1,48 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.buildTransformDatasetPrompt = buildTransformDatasetPrompt;
|
|
4
|
-
const xmlbuilder2_1 = require("xmlbuilder2");
|
|
1
|
+
import { create } from "xmlbuilder2";
|
|
5
2
|
function buildRole() {
|
|
6
|
-
let xml =
|
|
3
|
+
let xml = create()
|
|
7
4
|
.ele("Role")
|
|
8
|
-
.txt("You are a dataset transformer. Your goal is to read one or more
|
|
5
|
+
.txt("You are a dataset transformer. Your goal is to read one or more input datasets/resources and produce a NEW dataset whose records conform exactly to the provided output schema.")
|
|
9
6
|
.up();
|
|
10
7
|
return xml.end({ prettyPrint: true, headless: true });
|
|
11
8
|
}
|
|
12
9
|
function buildGoal() {
|
|
13
|
-
let xml =
|
|
10
|
+
let xml = create()
|
|
14
11
|
.ele("Goal")
|
|
15
|
-
.txt("Transform the
|
|
12
|
+
.txt("Transform the input dataset(s) into a new dataset strictly matching the output schema. Use the lowest-cost direct completion tool that can produce the correct output. Use sandbox command execution only when commands are necessary to inspect, parse, aggregate, join, or compute over files/resources that cannot be handled directly from the visible context and previews.")
|
|
16
13
|
.up();
|
|
17
14
|
return xml.end({ prettyPrint: true, headless: true });
|
|
18
15
|
}
|
|
19
16
|
function buildContextSection(context) {
|
|
20
|
-
let xml =
|
|
17
|
+
let xml = create()
|
|
21
18
|
.ele("Context")
|
|
22
19
|
.ele("DatasetId").txt(context.datasetId).up();
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
20
|
+
if (context.contextResources && context.contextResources.length > 0) {
|
|
21
|
+
let resourcesXml = create().ele("ContextResources");
|
|
22
|
+
for (const resource of context.contextResources) {
|
|
23
|
+
resourcesXml = resourcesXml
|
|
24
|
+
.ele("Resource")
|
|
25
|
+
.ele("Key").txt(String(resource.key)).up()
|
|
26
|
+
.ele("Type").txt(String(resource.type)).up()
|
|
27
|
+
.ele("Name").txt(String(resource.name)).up()
|
|
28
|
+
.ele("Description").txt(String(resource.description)).up()
|
|
29
|
+
.ele("DescriptorJson").txt(JSON.stringify(resource, null, 2)).up()
|
|
30
|
+
.up();
|
|
31
|
+
}
|
|
32
|
+
xml = xml.import(resourcesXml.first());
|
|
34
33
|
}
|
|
34
|
+
let sandboxXml = create().ele("Sandbox");
|
|
35
|
+
sandboxXml = sandboxXml.ele("ContextResourcesPath").txt("/tmp/ekairos/contexts/{contextId}/resources").up();
|
|
36
|
+
sandboxXml = sandboxXml.ele("ResourcesManifest").txt("/tmp/ekairos/contexts/{contextId}/resources/manifest.json").up();
|
|
35
37
|
sandboxXml = sandboxXml.ele("OutputPath").txt(context.sandboxConfig.outputPath).up();
|
|
38
|
+
sandboxXml = sandboxXml.ele("Note").txt("Context resources are materialized lazily only when executeCommand is called. Do not assume resource files exist unless you are using executeCommand. If executeCommand is used, read the manifest path from os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] inside Python.").up();
|
|
36
39
|
xml = xml.import(sandboxXml.first());
|
|
37
|
-
if (context.
|
|
38
|
-
let previewsXml =
|
|
39
|
-
for (const
|
|
40
|
-
const sp =
|
|
41
|
-
let px =
|
|
42
|
-
.ele("DatasetId").txt(
|
|
40
|
+
if (context.inputPreviews && context.inputPreviews.length > 0) {
|
|
41
|
+
let previewsXml = create().ele("InputPreviews");
|
|
42
|
+
for (const inputPreviewInfo of context.inputPreviews) {
|
|
43
|
+
const sp = inputPreviewInfo.preview;
|
|
44
|
+
let px = create().ele("InputPreview")
|
|
45
|
+
.ele("DatasetId").txt(inputPreviewInfo.datasetId).up()
|
|
43
46
|
.ele("TotalRows").txt(String(sp.totalRows)).up();
|
|
44
47
|
if (sp.metadata) {
|
|
45
48
|
const m = sp.metadata;
|
|
@@ -71,7 +74,7 @@ function buildContextSection(context) {
|
|
|
71
74
|
xml = xml.import(previewsXml.first());
|
|
72
75
|
}
|
|
73
76
|
if (Array.isArray(context.errors) && context.errors.length > 0) {
|
|
74
|
-
let ex =
|
|
77
|
+
let ex = create().ele("PreviousErrors");
|
|
75
78
|
for (const e of context.errors) {
|
|
76
79
|
ex = ex.ele("Error").txt(e).up();
|
|
77
80
|
}
|
|
@@ -81,7 +84,7 @@ function buildContextSection(context) {
|
|
|
81
84
|
return xml.end({ prettyPrint: true, headless: true });
|
|
82
85
|
}
|
|
83
86
|
function buildOutputSchemaSection(context) {
|
|
84
|
-
let xml =
|
|
87
|
+
let xml = create()
|
|
85
88
|
.ele("OutputSchema")
|
|
86
89
|
.ele("JsonSchema").txt(JSON.stringify(context.outputSchema?.schema ?? context.outputSchema ?? {}, null, 2)).up()
|
|
87
90
|
.up();
|
|
@@ -89,39 +92,57 @@ function buildOutputSchemaSection(context) {
|
|
|
89
92
|
}
|
|
90
93
|
function buildInstructions(context) {
|
|
91
94
|
const outputPath = context.sandboxConfig.outputPath;
|
|
92
|
-
const
|
|
93
|
-
? "You have multiple
|
|
95
|
+
const multipleInputsNote = (context.contextResources?.length ?? context.inputDatasetIds.length) > 1
|
|
96
|
+
? "You have multiple context resources available. You may need to read, join, filter, or combine data from them to produce the output."
|
|
94
97
|
: "";
|
|
95
|
-
let xml =
|
|
98
|
+
let xml = create()
|
|
96
99
|
.ele("Instructions")
|
|
97
100
|
.ele("Workflow")
|
|
98
|
-
.ele("Step", { number: "1", name: "Inspect
|
|
99
|
-
.ele("Action").txt(`Review
|
|
101
|
+
.ele("Step", { number: "1", name: "Inspect Inputs" })
|
|
102
|
+
.ele("Action").txt(`Review ContextResources and any InputPreviews to understand current record structures, evidence, fields, shapes and edge cases. ${multipleInputsNote}`).up()
|
|
103
|
+
.ele("Note").txt("ContextResources DescriptorJson may include inline text, metadata, previewRows, or other visible evidence. Treat that visible content as already available context. Do not use executeCommand only to reread it.").up()
|
|
104
|
+
.up()
|
|
105
|
+
.ele("Step", { number: "2", name: "Define the Output Dataset (PLAN FIRST)" })
|
|
106
|
+
.ele("Action").txt("Call defineNotation with the formal definition of the OUTPUT dataset as a set derived from the input sets: e.g. D = \\pi_{fields}(\\sigma_{condition}(A \\bowtie B)) or set-builder with quantifiers, in LaTeX. Declare the input sets, bound variables and the predicates the output set satisfies.").up()
|
|
107
|
+
.ele("Note").txt("The definition and the materialization (the transform code + output rows) are TWO CO-EQUAL FACES of the dataset; author the definition FIRST as the PLAN: it states which sets you draw from, how they combine (join, filter, project, aggregate) and which invariants the output keeps (e.g. totals preserved). The definition is a formal proposition we trust — predicates may be semantic. Only for purely arithmetic invariants you MAY add a checkJson for optional advisory evidence. REFINE the definition whenever inspection of the inputs reveals new sets, variables or corrections, and call defineNotation with final=true just before completing — as the RESULT it describes the produced output; any arithmetic predicates then get advisory evidence (never a verdict).").up()
|
|
100
108
|
.up()
|
|
101
|
-
.ele("Step", { number: "
|
|
102
|
-
.ele("Action").txt("Plan a deterministic mapping from
|
|
103
|
-
.ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple
|
|
109
|
+
.ele("Step", { number: "3", name: "Plan Mapping" })
|
|
110
|
+
.ele("Action").txt("Plan a deterministic mapping from input data fields to the output schema fields (normalize names, types, and formats).").up()
|
|
111
|
+
.ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple inputs, decide how to combine or relate them. Output field names must remain exactly as declared by the output schema.").up()
|
|
104
112
|
.up()
|
|
105
|
-
.ele("Step", { number: "
|
|
106
|
-
.ele("Action").txt("
|
|
107
|
-
.ele("Requirement").txt(
|
|
113
|
+
.ele("Step", { number: "4", name: "Transform" })
|
|
114
|
+
.ele("Action").txt("For single-object output, use completeObject with the final object. For row output, use replaceRows with the final rows. Use executeCommand only when command execution is necessary, not merely convenient.").up()
|
|
115
|
+
.ele("Requirement").txt("Do not call completeObject until you have constructed the complete data object. completeObject requires data; a summary-only call is invalid and wastes a model iteration.").up()
|
|
116
|
+
.ele("Requirement").txt("Command execution is necessary only when the final output cannot be produced directly from the provided context, resource descriptors, or previews, and requires running code to inspect, parse, aggregate, join, or compute over files/resources.").up()
|
|
117
|
+
.ele("Requirement").txt("If the final output can be written directly from context already visible to you, do not use executeCommand. Do not use executeCommand just to format JSON, build an object, write output.jsonl, or make completion easier.").up()
|
|
118
|
+
.ele("Requirement").txt("Before using executeCommand, verify that direct completion is insufficient: you need file/resource contents not already visible in DescriptorJson or previews, deterministic computation over many rows, parsing/aggregation that is unreliable to do directly, or output too large/repetitive for direct completion. If none apply, command execution is not needed.").up()
|
|
119
|
+
.ele("Requirement").txt("When using executeCommand, provide commandDescription before the script runs. It must describe the inputs/resources used, operation performed, expected output, and why a command is the right tool.").up()
|
|
120
|
+
.ele("Requirement").txt("When executeCommand is used, context resources are materialized before the script runs at /tmp/ekairos/contexts/{contextId}/resources. The Python process receives EKAIROS_CONTEXT_RESOURCES_DIR and EKAIROS_CONTEXT_RESOURCES_MANIFEST environment variables. Read os.environ['EKAIROS_CONTEXT_RESOURCES_MANIFEST'] inside the script to discover exact files and metadata. Manifest entries expose files as resource['files'][index]['path'].").up()
|
|
121
|
+
.ele("Requirement").txt("If only some resources are needed for a command, pass resourceKeys with the specific ContextResources keys. Omit resourceKeys only when the script truly needs all resources.").up()
|
|
122
|
+
.ele("Requirement").txt(`If executeCommand is used, write file to: ${outputPath}`).up()
|
|
123
|
+
.ele("Requirement").txt("Every data object MUST use the exact property names from OutputSchema required/properties keys. Do not translate, localize, rename, or infer alternative field names.").up()
|
|
108
124
|
.ele("Requirement").txt("Do not print large data to stdout; only progress and summaries.").up()
|
|
125
|
+
.ele("Requirement").txt("Do not install packages, download dependencies, or access the network from executeCommand. Use only the available runtime and standard library unless a dependency is already present.").up()
|
|
109
126
|
.up()
|
|
110
|
-
.ele("Step", { number: "
|
|
111
|
-
.ele("Action").txt("Call completeDataset to validate against the output schema and mark as completed.").up()
|
|
127
|
+
.ele("Step", { number: "5", name: "Validate and Complete" })
|
|
128
|
+
.ele("Action").txt("Call defineNotation with final=true (the definition as RESULT, matching the produced output), then: when using completeObject or replaceRows, no separate completeDataset call is needed. When using executeCommand, call completeDataset to validate against the output schema and mark as completed.").up()
|
|
129
|
+
.ele("Behavior").txt("If any completion tool returns success:false, inspect validation details, repair the output, and call the appropriate completion tool again. Do not stop until a completion tool returns success:true.").up()
|
|
112
130
|
.up()
|
|
113
131
|
.up()
|
|
114
132
|
.ele("Rules")
|
|
133
|
+
.ele("Rule").txt("The formal definition (defineNotation) and the materialization (transform code + output rows) are co-equal faces of the dataset: author the definition first as the PLAN, refine it on every discovery, finalize it as the RESULT before completing.").up()
|
|
115
134
|
.ele("Rule").txt("Output must strictly match the output schema for each record in data.").up()
|
|
116
|
-
.ele("Rule").txt("
|
|
135
|
+
.ele("Rule").txt("OutputSchema property names are authoritative. Field names are a technical contract; only field values may preserve input language.").up()
|
|
136
|
+
.ele("Rule").txt("Use the cheapest correct tool. completeObject and replaceRows are low-cost completion tools. executeCommand is a high-cost computation tool and requires an explicit commandDescription.").up()
|
|
137
|
+
.ele("Rule").txt("If using output.jsonl, each line must be a standalone JSON object with {type:'row', data:{...}}.").up()
|
|
117
138
|
.ele("Rule").txt("Do not include headers, summaries, or metadata as records.").up()
|
|
118
|
-
.ele("Rule").txt("Be robust to malformed lines in
|
|
139
|
+
.ele("Rule").txt("Be robust to malformed lines in input: skip or sanitize, but do not crash.").up()
|
|
119
140
|
.up()
|
|
120
|
-
.ele("CurrentTask").txt("Transform
|
|
141
|
+
.ele("CurrentTask").txt("Transform input dataset(s) to match OutputSchema and complete the dataset with the appropriate available tool.").up()
|
|
121
142
|
.up();
|
|
122
143
|
return xml.end({ prettyPrint: true, headless: true });
|
|
123
144
|
}
|
|
124
|
-
function buildTransformDatasetPrompt(context) {
|
|
145
|
+
export function buildTransformDatasetPrompt(context) {
|
|
125
146
|
const sections = [];
|
|
126
147
|
sections.push(buildRole());
|
|
127
148
|
sections.push("");
|
|
@@ -134,4 +155,3 @@ function buildTransformDatasetPrompt(context) {
|
|
|
134
155
|
sections.push(buildInstructions(context));
|
|
135
156
|
return sections.join("\n");
|
|
136
157
|
}
|
|
137
|
-
//# sourceMappingURL=prompts.js.map
|
|
@@ -1,59 +1,34 @@
|
|
|
1
1
|
import { type ContextReactor } from "@ekairos/events";
|
|
2
|
-
import {
|
|
3
|
-
export type TransformDatasetContext
|
|
4
|
-
|
|
5
|
-
sourceDatasetIds: string[];
|
|
6
|
-
outputSchema: any;
|
|
7
|
-
sandboxConfig: {
|
|
8
|
-
sourcePaths: Array<{
|
|
9
|
-
datasetId: string;
|
|
10
|
-
path: string;
|
|
11
|
-
}>;
|
|
12
|
-
outputPath: string;
|
|
13
|
-
};
|
|
14
|
-
sourcePreviews?: Array<{
|
|
15
|
-
datasetId: string;
|
|
16
|
-
preview: TransformSourcePreviewContext;
|
|
17
|
-
}>;
|
|
18
|
-
errors: string[];
|
|
19
|
-
iterationCount: number;
|
|
20
|
-
instructions?: string;
|
|
21
|
-
};
|
|
22
|
-
export type TransformDatasetAgentParams = {
|
|
23
|
-
sourceDatasetIds: string[];
|
|
24
|
-
outputSchema: any;
|
|
25
|
-
instructions?: string;
|
|
26
|
-
datasetId?: string;
|
|
27
|
-
model?: string;
|
|
28
|
-
sandboxId?: string;
|
|
29
|
-
reactor?: ContextReactor<any, any>;
|
|
30
|
-
};
|
|
31
|
-
export type TransformDatasetResult = {
|
|
32
|
-
id: string;
|
|
33
|
-
status?: string;
|
|
34
|
-
title?: string;
|
|
35
|
-
schema?: any;
|
|
36
|
-
analysis?: any;
|
|
37
|
-
calculatedTotalRows?: number;
|
|
38
|
-
actualGeneratedRowCount?: number;
|
|
39
|
-
createdAt?: number;
|
|
40
|
-
updatedAt?: number;
|
|
41
|
-
};
|
|
42
|
-
export declare function createTransformDatasetStory<Env extends {
|
|
2
|
+
import type { TransformDatasetRunOptions, TransformSandboxState, TransformInputPreviewContext } from "./transform-dataset.types.js";
|
|
3
|
+
export type { TransformDatasetAgentParams, TransformDatasetContext, TransformDatasetResult, TransformDatasetRunOptions, TransformPromptContext, TransformSandboxState, } from "./transform-dataset.types.js";
|
|
4
|
+
export declare function createTransformDatasetContext<Env extends {
|
|
43
5
|
orgId: string;
|
|
44
6
|
}>(params: {
|
|
45
|
-
|
|
7
|
+
inputDatasetIds: string[];
|
|
46
8
|
outputSchema: any;
|
|
47
9
|
instructions?: string;
|
|
48
10
|
datasetId?: string;
|
|
49
11
|
model?: string;
|
|
50
12
|
sandboxId?: string;
|
|
51
13
|
reactor?: ContextReactor<any, any>;
|
|
14
|
+
sandboxState?: TransformSandboxState;
|
|
15
|
+
inputPreviews?: Array<{
|
|
16
|
+
datasetId: string;
|
|
17
|
+
preview: TransformInputPreviewContext;
|
|
18
|
+
}>;
|
|
19
|
+
contextResources?: any[];
|
|
52
20
|
}): {
|
|
53
21
|
datasetId: string;
|
|
54
|
-
transform(
|
|
22
|
+
transform(runtime: {
|
|
23
|
+
env: Env;
|
|
24
|
+
}, options?: TransformDatasetRunOptions): Promise<{
|
|
55
25
|
datasetId: string;
|
|
56
26
|
}>;
|
|
57
|
-
|
|
27
|
+
context: any;
|
|
58
28
|
};
|
|
59
|
-
|
|
29
|
+
export declare function registerTransformDatasetContext<Env extends {
|
|
30
|
+
orgId: string;
|
|
31
|
+
}>(opts?: {
|
|
32
|
+
model?: string;
|
|
33
|
+
reactor?: ContextReactor<any, any>;
|
|
34
|
+
}): void;
|