@ekairos/dataset 1.22.40-beta.development.0 → 1.22.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/context.d.ts +15 -0
- package/dist/builder/context.js +251 -0
- package/dist/builder/instructions.d.ts +4 -5
- package/dist/builder/instructions.js +15 -21
- package/dist/builder/materialize.d.ts +77 -10
- package/dist/builder/materialize.js +495 -152
- package/dist/builder/materializeQuery.d.ts +12 -0
- package/dist/builder/materializeQuery.js +31 -0
- package/dist/builder/persistence.d.ts +10 -6
- package/dist/builder/persistence.js +107 -62
- package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -1
- package/dist/builder/{sourceRows.js → rows.js} +3 -9
- package/dist/builder/schemaInference.d.ts +1 -2
- package/dist/builder/schemaInference.js +4 -12
- package/dist/builder/types.d.ts +41 -26
- package/dist/builder/types.js +1 -3
- package/dist/clearDataset.tool.d.ts +2 -3
- package/dist/clearDataset.tool.js +13 -17
- package/dist/completeDataset.steps.d.ts +117 -0
- package/dist/completeDataset.steps.js +537 -0
- package/dist/completeDataset.tool.d.ts +132 -7
- package/dist/completeDataset.tool.js +46 -192
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +79 -0
- package/dist/contextWorkspace.js +234 -0
- package/dist/dataset/steps.d.ts +39 -15
- package/dist/dataset/steps.js +96 -39
- package/dist/dataset.d.ts +2 -3
- package/dist/dataset.js +73 -51
- package/dist/datasetFiles.d.ts +5 -1
- package/dist/datasetFiles.js +29 -27
- package/dist/defineNotation.tool.d.ts +49 -0
- package/dist/defineNotation.tool.js +154 -0
- package/dist/domain.d.ts +1 -2
- package/dist/domain.js +1 -6
- package/dist/executeCommand.tool.d.ts +2 -30
- package/dist/executeCommand.tool.js +165 -39
- package/dist/file/file-dataset.agent.d.ts +19 -56
- package/dist/file/file-dataset.agent.js +182 -136
- package/dist/file/file-dataset.steps.d.ts +27 -0
- package/dist/file/file-dataset.steps.js +47 -0
- package/dist/file/file-dataset.types.d.ts +64 -0
- package/dist/file/file-dataset.types.js +1 -0
- package/dist/file/filepreview.d.ts +5 -35
- package/dist/file/filepreview.js +60 -107
- package/dist/file/filepreview.types.d.ts +31 -0
- package/dist/file/filepreview.types.js +1 -0
- package/dist/file/generateSchema.tool.d.ts +2 -3
- package/dist/file/generateSchema.tool.js +11 -15
- package/dist/file/index.d.ts +1 -2
- package/dist/file/index.js +1 -18
- package/dist/file/prompts.d.ts +2 -3
- package/dist/file/prompts.js +152 -32
- package/dist/file/scripts.generated.d.ts +1 -0
- package/dist/file/scripts.generated.js +11 -0
- package/dist/file/steps.d.ts +1 -2
- package/dist/file/steps.js +9 -7
- package/dist/id.d.ts +1 -0
- package/dist/id.js +10 -0
- package/dist/index.d.ts +9 -7
- package/dist/index.js +9 -23
- package/dist/materializeDataset.tool.d.ts +35 -28
- package/dist/materializeDataset.tool.js +74 -68
- package/dist/notation.d.ts +205 -0
- package/dist/notation.js +424 -0
- package/dist/query/index.d.ts +1 -2
- package/dist/query/index.js +1 -18
- package/dist/query/queryDomain.d.ts +3 -4
- package/dist/query/queryDomain.js +3 -40
- package/dist/query/queryDomain.step.d.ts +1 -1
- package/dist/query/queryDomain.step.js +24 -13
- package/dist/sandbox/steps.d.ts +23 -15
- package/dist/sandbox/steps.js +73 -76
- package/dist/sandbox.steps.d.ts +1 -2
- package/dist/sandbox.steps.js +1 -18
- package/dist/schema.d.ts +14 -3
- package/dist/schema.js +27 -26
- package/dist/service.d.ts +12 -5
- package/dist/service.js +88 -15
- package/dist/skill.d.ts +0 -1
- package/dist/skill.js +12 -17
- package/dist/transform/filepreview.d.ts +2 -3
- package/dist/transform/filepreview.js +9 -26
- package/dist/transform/index.d.ts +2 -3
- package/dist/transform/index.js +2 -8
- package/dist/transform/prompts.d.ts +1 -34
- package/dist/transform/prompts.js +66 -46
- package/dist/transform/transform-dataset.agent.d.ts +21 -46
- package/dist/transform/transform-dataset.agent.js +152 -93
- package/dist/transform/transform-dataset.steps.d.ts +30 -0
- package/dist/transform/transform-dataset.steps.js +61 -0
- package/dist/transform/transform-dataset.types.d.ts +96 -0
- package/dist/transform/transform-dataset.types.js +1 -0
- package/dist/transform/transformDataset.d.ts +3 -3
- package/dist/transform/transformDataset.js +15 -18
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +33 -8
- package/dist/builder/instructions.d.ts.map +0 -1
- package/dist/builder/instructions.js.map +0 -1
- package/dist/builder/materialize.d.ts.map +0 -1
- package/dist/builder/materialize.js.map +0 -1
- package/dist/builder/persistence.d.ts.map +0 -1
- package/dist/builder/persistence.js.map +0 -1
- package/dist/builder/schemaInference.d.ts.map +0 -1
- package/dist/builder/schemaInference.js.map +0 -1
- package/dist/builder/sourceRows.d.ts.map +0 -1
- package/dist/builder/sourceRows.js.map +0 -1
- package/dist/builder/types.d.ts.map +0 -1
- package/dist/builder/types.js.map +0 -1
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeDataset.tool.d.ts.map +0 -1
- package/dist/completeDataset.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/dataset.d.ts.map +0 -1
- package/dist/dataset.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/eventsReactRuntime.d.ts +0 -22
- package/dist/eventsReactRuntime.d.ts.map +0 -1
- package/dist/eventsReactRuntime.js +0 -29
- package/dist/eventsReactRuntime.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/file-dataset.agent.d.ts.map +0 -1
- package/dist/file/file-dataset.agent.js.map +0 -1
- package/dist/file/filepreview.d.ts.map +0 -1
- package/dist/file/filepreview.js.map +0 -1
- package/dist/file/generateSchema.tool.d.ts.map +0 -1
- package/dist/file/generateSchema.tool.js.map +0 -1
- package/dist/file/index.d.ts.map +0 -1
- package/dist/file/index.js.map +0 -1
- package/dist/file/prompts.d.ts.map +0 -1
- package/dist/file/prompts.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/materializeDataset.tool.d.ts.map +0 -1
- package/dist/materializeDataset.tool.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/queryDomain.d.ts.map +0 -1
- package/dist/query/queryDomain.js.map +0 -1
- package/dist/query/queryDomain.step.d.ts.map +0 -1
- package/dist/query/queryDomain.step.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/sandbox.steps.d.ts.map +0 -1
- package/dist/sandbox.steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/skill.d.ts.map +0 -1
- package/dist/skill.js.map +0 -1
- package/dist/transform/filepreview.d.ts.map +0 -1
- package/dist/transform/filepreview.js.map +0 -1
- package/dist/transform/index.d.ts.map +0 -1
- package/dist/transform/index.js.map +0 -1
- package/dist/transform/prompts.d.ts.map +0 -1
- package/dist/transform/prompts.js.map +0 -1
- package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
- package/dist/transform/transform-dataset.agent.js.map +0 -1
- package/dist/transform/transformDataset.d.ts.map +0 -1
- package/dist/transform/transformDataset.js.map +0 -1
|
@@ -1,25 +1,150 @@
|
|
|
1
1
|
interface CompleteDatasetToolParams {
|
|
2
2
|
datasetId: string;
|
|
3
3
|
sandboxId: string;
|
|
4
|
-
|
|
4
|
+
runtime: any;
|
|
5
|
+
outputPath?: string;
|
|
5
6
|
}
|
|
6
|
-
export declare function createCompleteDatasetTool({ datasetId, sandboxId,
|
|
7
|
+
export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }: CompleteDatasetToolParams): import("ai").Tool<{
|
|
7
8
|
summary: string;
|
|
8
9
|
}, {
|
|
10
|
+
rowSource: string;
|
|
11
|
+
outputPath: string;
|
|
12
|
+
storagePath: string;
|
|
9
13
|
success: boolean;
|
|
10
|
-
validation?:
|
|
14
|
+
validation?: {
|
|
11
15
|
index: number;
|
|
12
16
|
valid: boolean;
|
|
13
17
|
errors?: string[];
|
|
14
|
-
|
|
18
|
+
errorDetails?: Array<{
|
|
19
|
+
path: string;
|
|
20
|
+
keyword: string;
|
|
21
|
+
message: string;
|
|
22
|
+
params?: Record<string, unknown>;
|
|
23
|
+
schemaPath?: string;
|
|
24
|
+
}>;
|
|
25
|
+
dataKeys?: string[];
|
|
26
|
+
}[];
|
|
27
|
+
validationTruncated?: number;
|
|
28
|
+
failureSummary?: {
|
|
29
|
+
rowRecordCount: number;
|
|
30
|
+
validRowCount: number;
|
|
31
|
+
invalidRowCount: number;
|
|
32
|
+
expectedTopLevelKeys: string[];
|
|
33
|
+
requiredTopLevelKeys: string[];
|
|
34
|
+
requiredPaths: string[];
|
|
35
|
+
enumConstraints: Array<{
|
|
36
|
+
path: string;
|
|
37
|
+
values: unknown[];
|
|
38
|
+
}>;
|
|
39
|
+
topErrors: Array<{
|
|
40
|
+
message: string;
|
|
41
|
+
count: number;
|
|
42
|
+
}>;
|
|
43
|
+
missingRequiredProperties: Array<{
|
|
44
|
+
property: string;
|
|
45
|
+
count: number;
|
|
46
|
+
}>;
|
|
47
|
+
additionalProperties: Array<{
|
|
48
|
+
property: string;
|
|
49
|
+
count: number;
|
|
50
|
+
}>;
|
|
51
|
+
enumFailures: Array<{
|
|
52
|
+
path: string;
|
|
53
|
+
allowedValues: unknown[];
|
|
54
|
+
count: number;
|
|
55
|
+
}>;
|
|
56
|
+
observedTopLevelKeys: string[];
|
|
57
|
+
sampleInvalidRows: Array<{
|
|
58
|
+
index: number;
|
|
59
|
+
dataKeys?: string[];
|
|
60
|
+
errors?: string[];
|
|
61
|
+
}>;
|
|
62
|
+
};
|
|
63
|
+
repairInstructions?: string[];
|
|
15
64
|
validRowCount?: number;
|
|
65
|
+
rowRecordCount?: number;
|
|
16
66
|
error?: string;
|
|
67
|
+
status?: string;
|
|
68
|
+
message?: string;
|
|
69
|
+
validRows?: undefined;
|
|
70
|
+
dataFileId?: undefined;
|
|
71
|
+
records?: undefined;
|
|
72
|
+
summary?: undefined;
|
|
17
73
|
} | {
|
|
18
74
|
success: boolean;
|
|
19
|
-
|
|
20
|
-
|
|
75
|
+
status: string;
|
|
76
|
+
rowSource: string;
|
|
77
|
+
validRows: number;
|
|
78
|
+
rowRecordCount: number;
|
|
79
|
+
validation: {
|
|
80
|
+
index: number;
|
|
81
|
+
valid: boolean;
|
|
82
|
+
errors?: string[];
|
|
83
|
+
errorDetails?: Array<{
|
|
84
|
+
path: string;
|
|
85
|
+
keyword: string;
|
|
86
|
+
message: string;
|
|
87
|
+
params?: Record<string, unknown>;
|
|
88
|
+
schemaPath?: string;
|
|
89
|
+
}>;
|
|
90
|
+
dataKeys?: string[];
|
|
91
|
+
}[] | undefined;
|
|
92
|
+
error: string;
|
|
93
|
+
message: string;
|
|
94
|
+
outputPath: string;
|
|
21
95
|
storagePath: string;
|
|
96
|
+
dataFileId?: undefined;
|
|
97
|
+
records?: undefined;
|
|
98
|
+
summary?: undefined;
|
|
99
|
+
} | {
|
|
100
|
+
success: boolean;
|
|
101
|
+
status: string;
|
|
102
|
+
rowSource: string;
|
|
103
|
+
validRows: number;
|
|
104
|
+
rowRecordCount: number;
|
|
105
|
+
validation: {
|
|
106
|
+
index: number;
|
|
107
|
+
valid: boolean;
|
|
108
|
+
errors?: string[];
|
|
109
|
+
errorDetails?: Array<{
|
|
110
|
+
path: string;
|
|
111
|
+
keyword: string;
|
|
112
|
+
message: string;
|
|
113
|
+
params?: Record<string, unknown>;
|
|
114
|
+
schemaPath?: string;
|
|
115
|
+
}>;
|
|
116
|
+
dataKeys?: string[];
|
|
117
|
+
}[] | undefined;
|
|
118
|
+
error: string;
|
|
22
119
|
message: string;
|
|
120
|
+
outputPath: string;
|
|
121
|
+
storagePath: string;
|
|
122
|
+
dataFileId: string;
|
|
123
|
+
records?: undefined;
|
|
124
|
+
summary?: undefined;
|
|
125
|
+
} | {
|
|
126
|
+
success: boolean;
|
|
127
|
+
status: string;
|
|
128
|
+
rowSource: string;
|
|
129
|
+
records: number;
|
|
130
|
+
summary: string;
|
|
131
|
+
outputPath: string;
|
|
132
|
+
storagePath: string;
|
|
133
|
+
dataFileId: string;
|
|
134
|
+
validRows?: undefined;
|
|
135
|
+
rowRecordCount?: undefined;
|
|
136
|
+
validation?: undefined;
|
|
137
|
+
error?: undefined;
|
|
138
|
+
message?: undefined;
|
|
23
139
|
}>;
|
|
140
|
+
export declare function didCompleteDatasetSucceed(event: {
|
|
141
|
+
content?: {
|
|
142
|
+
parts?: any[];
|
|
143
|
+
};
|
|
144
|
+
}): boolean;
|
|
145
|
+
export declare function getDatasetFatalFailure(event: {
|
|
146
|
+
content?: {
|
|
147
|
+
parts?: any[];
|
|
148
|
+
};
|
|
149
|
+
}): string | null;
|
|
24
150
|
export {};
|
|
25
|
-
//# sourceMappingURL=completeDataset.tool.d.ts.map
|
|
@@ -1,212 +1,66 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
}
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
const steps_1 = require("./sandbox/steps");
|
|
10
|
-
const ajv_1 = __importDefault(require("ajv"));
|
|
11
|
-
const datasetFiles_1 = require("./datasetFiles");
|
|
12
|
-
const steps_2 = require("./dataset/steps");
|
|
13
|
-
let ajvInstance = null;
|
|
14
|
-
function getAjv() {
|
|
15
|
-
if (!ajvInstance) {
|
|
16
|
-
ajvInstance = new ajv_1.default({
|
|
17
|
-
allErrors: true,
|
|
18
|
-
strict: false,
|
|
19
|
-
});
|
|
20
|
-
}
|
|
21
|
-
return ajvInstance;
|
|
22
|
-
}
|
|
23
|
-
function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
24
|
-
return (0, ai_1.tool)({
|
|
25
|
-
description: "Mark the dataset as completed. Use only when output.jsonl has been successfully generated and is ready for validation.",
|
|
26
|
-
inputSchema: zod_1.z.object({
|
|
27
|
-
summary: zod_1.z.string().describe("Summary of the completed dataset including record count and structure"),
|
|
1
|
+
import { tool } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { persistDatasetStep } from "./completeDataset.steps.js";
|
|
4
|
+
export function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }) {
|
|
5
|
+
return tool({
|
|
6
|
+
description: "Validate and complete the dataset from output.jsonl. The result includes the JSONL outputPath and storagePath used for completion.",
|
|
7
|
+
inputSchema: z.object({
|
|
8
|
+
summary: z.string().describe("Summary of the completed dataset including record count and structure"),
|
|
28
9
|
}),
|
|
29
10
|
execute: async ({ summary }) => {
|
|
30
11
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
31
12
|
console.log(`[Dataset ${datasetId}] Tool: completeDataset`);
|
|
32
13
|
console.log(`[Dataset ${datasetId}] Summary: ${summary}`);
|
|
33
14
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
}
|
|
38
|
-
catch (error) {
|
|
39
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
40
|
-
console.error(`[Dataset ${datasetId}] Missing output file:`, message);
|
|
41
|
-
return {
|
|
42
|
-
success: false,
|
|
43
|
-
error: message,
|
|
44
|
-
};
|
|
45
|
-
}
|
|
46
|
-
console.log(`[Dataset ${datasetId}] Validating dataset rows against schema`);
|
|
47
|
-
const datasetResult = await (0, steps_2.datasetGetByIdStep)({ env, datasetId });
|
|
48
|
-
if (!datasetResult.ok) {
|
|
49
|
-
console.error(`[Dataset ${datasetId}] ${datasetResult.error}`);
|
|
50
|
-
return {
|
|
51
|
-
success: false,
|
|
52
|
-
error: datasetResult.error,
|
|
53
|
-
};
|
|
54
|
-
}
|
|
55
|
-
const datasetRecord = datasetResult.data;
|
|
56
|
-
if (!datasetRecord.schema) {
|
|
57
|
-
console.error(`[Dataset ${datasetId}] Schema not found in database`);
|
|
58
|
-
return {
|
|
59
|
-
success: false,
|
|
60
|
-
error: "Schema not found in database. Please generate schema first.",
|
|
61
|
-
};
|
|
62
|
-
}
|
|
63
|
-
const schemaJson = datasetRecord.schema.schema;
|
|
64
|
-
let validator;
|
|
65
|
-
try {
|
|
66
|
-
validator = getAjv().compile(schemaJson);
|
|
67
|
-
}
|
|
68
|
-
catch (error) {
|
|
69
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
70
|
-
console.error(`[Dataset ${datasetId}] Failed to compile schema:`, message);
|
|
71
|
-
return {
|
|
72
|
-
success: false,
|
|
73
|
-
error: `Failed to compile schema: ${message}`,
|
|
74
|
-
};
|
|
75
|
-
}
|
|
76
|
-
const validationResult = await validateJsonlRows({
|
|
77
|
-
env,
|
|
15
|
+
return await persistDatasetStep({
|
|
16
|
+
runtime,
|
|
17
|
+
datasetId,
|
|
78
18
|
sandboxId,
|
|
19
|
+
summary,
|
|
79
20
|
outputPath,
|
|
80
|
-
validator,
|
|
81
|
-
datasetId,
|
|
82
|
-
});
|
|
83
|
-
if (!validationResult.success) {
|
|
84
|
-
return validationResult;
|
|
85
|
-
}
|
|
86
|
-
const totalValidRows = validationResult.validRowCount;
|
|
87
|
-
console.log(`[Dataset ${datasetId}] Reading file content for upload`);
|
|
88
|
-
const fileRead = await (0, steps_1.readDatasetSandboxFileStep)({ env, sandboxId, path: outputPath });
|
|
89
|
-
if (!fileRead.contentBase64) {
|
|
90
|
-
console.error(`[Dataset ${datasetId}] Empty file content`);
|
|
91
|
-
return {
|
|
92
|
-
success: false,
|
|
93
|
-
error: "Empty file content",
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
const fileBuffer = Buffer.from(fileRead.contentBase64, "base64");
|
|
97
|
-
console.log(`[Dataset ${datasetId}] Uploading file to InstantDB storage`);
|
|
98
|
-
const uploadResult = await (0, steps_2.datasetUploadOutputFileStep)({ env, datasetId, fileBuffer });
|
|
99
|
-
if (!uploadResult.ok) {
|
|
100
|
-
console.error(`[Dataset ${datasetId}] File upload failed: ${uploadResult.error}`);
|
|
101
|
-
return {
|
|
102
|
-
success: false,
|
|
103
|
-
error: uploadResult.error,
|
|
104
|
-
};
|
|
105
|
-
}
|
|
106
|
-
console.log(`[Dataset ${datasetId}] File uploaded successfully: ${uploadResult.data.fileId}`);
|
|
107
|
-
const statusResult = await (0, steps_2.datasetUpdateStatusStep)({
|
|
108
|
-
env,
|
|
109
|
-
datasetId,
|
|
110
|
-
status: "completed",
|
|
111
|
-
calculatedTotalRows: totalValidRows,
|
|
112
|
-
actualGeneratedRowCount: totalValidRows,
|
|
113
21
|
});
|
|
114
|
-
if (!statusResult.ok) {
|
|
115
|
-
console.error(`[Dataset ${datasetId}] Failed to update status: ${statusResult.error}`);
|
|
116
|
-
return {
|
|
117
|
-
success: false,
|
|
118
|
-
error: statusResult.error,
|
|
119
|
-
};
|
|
120
|
-
}
|
|
121
|
-
console.log(`[Dataset ${datasetId}] Dataset marked as COMPLETED (${totalValidRows} valid rows)`);
|
|
122
|
-
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
123
|
-
return {
|
|
124
|
-
success: true,
|
|
125
|
-
validRows: totalValidRows,
|
|
126
|
-
fileId: uploadResult.data.fileId,
|
|
127
|
-
storagePath: uploadResult.data.storagePath,
|
|
128
|
-
message: "Dataset creation completed and uploaded to storage",
|
|
129
|
-
};
|
|
130
22
|
},
|
|
131
23
|
});
|
|
132
24
|
}
|
|
133
|
-
|
|
134
|
-
const
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
if (result.exitCode !== 0) {
|
|
141
|
-
throw new Error(`Required file not found: ${path}`);
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
async function validateJsonlRows({ env, sandboxId, outputPath, validator, datasetId }) {
|
|
145
|
-
const validation = [];
|
|
146
|
-
let validRowCount = 0;
|
|
147
|
-
console.log(`[Dataset ${datasetId}] Reading and validating JSONL file from sandbox`);
|
|
148
|
-
const fileRead = await (0, steps_1.readDatasetSandboxFileStep)({ env, sandboxId, path: outputPath });
|
|
149
|
-
if (!fileRead.contentBase64) {
|
|
150
|
-
console.log(`[Dataset ${datasetId}] Empty output file`);
|
|
151
|
-
return { success: true, validation, validRowCount: 0 };
|
|
152
|
-
}
|
|
153
|
-
const fileContent = Buffer.from(fileRead.contentBase64, "base64").toString();
|
|
154
|
-
const lines = fileContent.split("\n");
|
|
155
|
-
console.log(`[Dataset ${datasetId}] Validating ${lines.length} lines`);
|
|
156
|
-
for (let index = 0; index < lines.length; index++) {
|
|
157
|
-
const line = lines[index];
|
|
158
|
-
const trimmed = line.trim();
|
|
159
|
-
if (trimmed.length === 0) {
|
|
160
|
-
continue;
|
|
25
|
+
export function didCompleteDatasetSucceed(event) {
|
|
26
|
+
const parts = Array.isArray(event?.content?.parts) ? event.content.parts : [];
|
|
27
|
+
return parts.some((part) => {
|
|
28
|
+
if (part?.type === "action" &&
|
|
29
|
+
["completeDataset", "completeObject", "replaceRows"].includes(part?.content?.actionName)) {
|
|
30
|
+
const output = part.content.output;
|
|
31
|
+
return part.content.status === "completed" && output?.success === true && output?.status === "completed";
|
|
161
32
|
}
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
33
|
+
if (part?.type === "tool-completeDataset" ||
|
|
34
|
+
part?.type === "tool-completeObject" ||
|
|
35
|
+
part?.type === "tool-replaceRows") {
|
|
36
|
+
const output = part.output ?? part.result;
|
|
37
|
+
return part.state === "output-available" && output?.success === true && output?.status === "completed";
|
|
165
38
|
}
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
39
|
+
return false;
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
export function getDatasetFatalFailure(event) {
|
|
43
|
+
const parts = Array.isArray(event?.content?.parts) ? event.content.parts : [];
|
|
44
|
+
for (const part of parts) {
|
|
45
|
+
let actionName;
|
|
46
|
+
let output;
|
|
47
|
+
if (part?.type === "action") {
|
|
48
|
+
actionName = part.content?.actionName;
|
|
49
|
+
output = part.content?.output;
|
|
177
50
|
}
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
index,
|
|
182
|
-
valid: false,
|
|
183
|
-
errors: ["Missing 'data' field"],
|
|
184
|
-
});
|
|
185
|
-
continue;
|
|
51
|
+
else if (typeof part?.type === "string" && part.type.startsWith("tool-")) {
|
|
52
|
+
actionName = part.type.slice("tool-".length);
|
|
53
|
+
output = part.output ?? part.result;
|
|
186
54
|
}
|
|
187
|
-
|
|
188
|
-
if (!valid) {
|
|
189
|
-
const errors = Array.isArray(validator.errors)
|
|
190
|
-
? validator.errors.map((err) => err.message || "Unknown validation error")
|
|
191
|
-
: ["Unknown validation error"];
|
|
192
|
-
validation.push({
|
|
193
|
-
index,
|
|
194
|
-
valid: false,
|
|
195
|
-
errors,
|
|
196
|
-
});
|
|
55
|
+
if (!output || output.fatal !== true) {
|
|
197
56
|
continue;
|
|
198
57
|
}
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
58
|
+
const message = typeof output.error === "string" && output.error.trim()
|
|
59
|
+
? output.error.trim()
|
|
60
|
+
: typeof output.message === "string" && output.message.trim()
|
|
61
|
+
? output.message.trim()
|
|
62
|
+
: "Dataset action failed fatally";
|
|
63
|
+
return actionName ? `${actionName}: ${message}` : message;
|
|
204
64
|
}
|
|
205
|
-
|
|
206
|
-
return {
|
|
207
|
-
success: true,
|
|
208
|
-
validation,
|
|
209
|
-
validRowCount,
|
|
210
|
-
};
|
|
65
|
+
return null;
|
|
211
66
|
}
|
|
212
|
-
//# sourceMappingURL=completeDataset.tool.js.map
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { StoredContextResource } from "@ekairos/events";
|
|
2
|
+
type MaterializedContextResource = {
|
|
3
|
+
key: string;
|
|
4
|
+
type: string;
|
|
5
|
+
name: string;
|
|
6
|
+
description: string;
|
|
7
|
+
dir: string;
|
|
8
|
+
metadataPath: string;
|
|
9
|
+
files: Array<{
|
|
10
|
+
path: string;
|
|
11
|
+
role: string;
|
|
12
|
+
mediaType?: string;
|
|
13
|
+
}>;
|
|
14
|
+
status: "materialized" | "metadata_only" | "skipped";
|
|
15
|
+
reason?: string;
|
|
16
|
+
};
|
|
17
|
+
export type ContextResourcesMaterialization = {
|
|
18
|
+
contextId: string;
|
|
19
|
+
sandboxId: string;
|
|
20
|
+
resourcesDir: string;
|
|
21
|
+
manifestPath: string;
|
|
22
|
+
resources: MaterializedContextResource[];
|
|
23
|
+
};
|
|
24
|
+
export declare function materializeContextResourcesStep(params: {
|
|
25
|
+
runtime: any;
|
|
26
|
+
sandboxId: string;
|
|
27
|
+
contextId: string;
|
|
28
|
+
resources: StoredContextResource[];
|
|
29
|
+
resourceKeys?: string[] | null;
|
|
30
|
+
}): Promise<ContextResourcesMaterialization>;
|
|
31
|
+
export {};
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import { datasetReadOutputJsonlStep } from "./dataset/steps.js";
|
|
2
|
+
import { readInstantFileStep } from "./file/steps.js";
|
|
3
|
+
import { getContextResourcesDir, sanitizeContextWorkspacePathSegment, } from "./contextWorkspace.js";
|
|
4
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
|
|
5
|
+
function asRecord(value) {
|
|
6
|
+
return value && typeof value === "object" && !Array.isArray(value)
|
|
7
|
+
? value
|
|
8
|
+
: null;
|
|
9
|
+
}
|
|
10
|
+
function asString(value) {
|
|
11
|
+
return typeof value === "string" ? value.trim() : "";
|
|
12
|
+
}
|
|
13
|
+
function contentFileName(resource) {
|
|
14
|
+
const filename = asString(resource.filename);
|
|
15
|
+
if (filename) {
|
|
16
|
+
return sanitizeContextWorkspacePathSegment(filename, "resource");
|
|
17
|
+
}
|
|
18
|
+
if (resource.type === "dataset")
|
|
19
|
+
return "resource.jsonl";
|
|
20
|
+
if (resource.type === "text")
|
|
21
|
+
return "resource.txt";
|
|
22
|
+
return "resource";
|
|
23
|
+
}
|
|
24
|
+
function selectResources(resources, resourceKeys) {
|
|
25
|
+
const requested = new Set(Array.isArray(resourceKeys)
|
|
26
|
+
? resourceKeys.map((key) => String(key).trim()).filter(Boolean)
|
|
27
|
+
: []);
|
|
28
|
+
if (requested.size === 0)
|
|
29
|
+
return resources;
|
|
30
|
+
return resources.filter((resource) => requested.has(resource.key));
|
|
31
|
+
}
|
|
32
|
+
export async function materializeContextResourcesStep(params) {
|
|
33
|
+
"use step";
|
|
34
|
+
const resourcesDir = getContextResourcesDir({ contextId: params.contextId });
|
|
35
|
+
const manifestPath = `${resourcesDir}/manifest.json`;
|
|
36
|
+
const selectedResources = selectResources(params.resources, params.resourceKeys);
|
|
37
|
+
const resourceDirs = selectedResources.map((resource, index) => {
|
|
38
|
+
const segment = sanitizeContextWorkspacePathSegment(resource.key, `resource_${index + 1}`);
|
|
39
|
+
return `${resourcesDir}/${segment}`;
|
|
40
|
+
});
|
|
41
|
+
await runDatasetSandboxCommandStep({
|
|
42
|
+
runtime: params.runtime,
|
|
43
|
+
sandboxId: params.sandboxId,
|
|
44
|
+
cmd: "mkdir",
|
|
45
|
+
args: ["-p", resourcesDir, ...resourceDirs],
|
|
46
|
+
});
|
|
47
|
+
const materialized = [];
|
|
48
|
+
for (let index = 0; index < selectedResources.length; index++) {
|
|
49
|
+
const resource = selectedResources[index];
|
|
50
|
+
const resourceDir = resourceDirs[index];
|
|
51
|
+
const metadataPath = `${resourceDir}/metadata.json`;
|
|
52
|
+
const files = [];
|
|
53
|
+
let status = "metadata_only";
|
|
54
|
+
let reason;
|
|
55
|
+
if (resource.type === "file" && asString(resource.fileId)) {
|
|
56
|
+
const file = await readInstantFileStep({
|
|
57
|
+
runtime: params.runtime,
|
|
58
|
+
fileId: asString(resource.fileId),
|
|
59
|
+
});
|
|
60
|
+
const path = `${resourceDir}/${contentFileName(resource)}`;
|
|
61
|
+
await writeDatasetSandboxFilesStep({
|
|
62
|
+
runtime: params.runtime,
|
|
63
|
+
sandboxId: params.sandboxId,
|
|
64
|
+
files: [{ path, contentBase64: file.contentBase64 }],
|
|
65
|
+
});
|
|
66
|
+
files.push({ path, role: "content", mediaType: asString(resource.mediaType) || undefined });
|
|
67
|
+
status = "materialized";
|
|
68
|
+
}
|
|
69
|
+
else if (resource.type === "dataset" && asString(resource.datasetId)) {
|
|
70
|
+
const datasetId = asString(resource.datasetId);
|
|
71
|
+
const content = await datasetReadOutputJsonlStep({
|
|
72
|
+
runtime: params.runtime,
|
|
73
|
+
datasetId,
|
|
74
|
+
});
|
|
75
|
+
const path = `${resourceDir}/resource.jsonl`;
|
|
76
|
+
await writeDatasetSandboxFilesStep({
|
|
77
|
+
runtime: params.runtime,
|
|
78
|
+
sandboxId: params.sandboxId,
|
|
79
|
+
files: [{ path, contentBase64: content.contentBase64 }],
|
|
80
|
+
});
|
|
81
|
+
files.push({ path, role: "content", mediaType: "application/x-ndjson" });
|
|
82
|
+
status = "materialized";
|
|
83
|
+
}
|
|
84
|
+
else if (resource.type === "text" && typeof resource.text === "string") {
|
|
85
|
+
const path = `${resourceDir}/${contentFileName(resource)}`;
|
|
86
|
+
await writeDatasetSandboxTextFilesStep({
|
|
87
|
+
runtime: params.runtime,
|
|
88
|
+
sandboxId: params.sandboxId,
|
|
89
|
+
files: [{ path, content: String(resource.text) }],
|
|
90
|
+
});
|
|
91
|
+
files.push({
|
|
92
|
+
path,
|
|
93
|
+
role: "content",
|
|
94
|
+
mediaType: asString(resource.mimeType) || "text/plain",
|
|
95
|
+
});
|
|
96
|
+
status = "materialized";
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
reason =
|
|
100
|
+
resource.type === "file"
|
|
101
|
+
? "file resource has no fileId"
|
|
102
|
+
: resource.type === "dataset"
|
|
103
|
+
? "dataset resource has no datasetId"
|
|
104
|
+
: resource.type === "link" || resource.type === "repository" || resource.type === "external"
|
|
105
|
+
? `${resource.type} resources are metadata-only until an approved adapter materializes them`
|
|
106
|
+
: "resource type is metadata-only";
|
|
107
|
+
}
|
|
108
|
+
const metadata = {
|
|
109
|
+
...(asRecord(resource) ?? {}),
|
|
110
|
+
key: resource.key,
|
|
111
|
+
type: resource.type,
|
|
112
|
+
name: resource.name,
|
|
113
|
+
description: resource.description,
|
|
114
|
+
materialized: {
|
|
115
|
+
status,
|
|
116
|
+
reason,
|
|
117
|
+
dir: resourceDir,
|
|
118
|
+
files,
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
await writeDatasetSandboxTextFilesStep({
|
|
122
|
+
runtime: params.runtime,
|
|
123
|
+
sandboxId: params.sandboxId,
|
|
124
|
+
files: [{ path: metadataPath, content: JSON.stringify(metadata, null, 2) }],
|
|
125
|
+
});
|
|
126
|
+
materialized.push({
|
|
127
|
+
key: resource.key,
|
|
128
|
+
type: resource.type,
|
|
129
|
+
name: resource.name,
|
|
130
|
+
description: resource.description,
|
|
131
|
+
dir: resourceDir,
|
|
132
|
+
metadataPath,
|
|
133
|
+
files,
|
|
134
|
+
status,
|
|
135
|
+
reason,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
const manifest = {
|
|
139
|
+
contextId: params.contextId,
|
|
140
|
+
sandboxId: params.sandboxId,
|
|
141
|
+
resourcesDir,
|
|
142
|
+
manifestPath,
|
|
143
|
+
resources: materialized,
|
|
144
|
+
};
|
|
145
|
+
await writeDatasetSandboxTextFilesStep({
|
|
146
|
+
runtime: params.runtime,
|
|
147
|
+
sandboxId: params.sandboxId,
|
|
148
|
+
files: [{ path: manifestPath, content: JSON.stringify(manifest, null, 2) }],
|
|
149
|
+
});
|
|
150
|
+
return manifest;
|
|
151
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
export type ContextWorkspaceFileRole = "input" | "output" | "artifact";
|
|
2
|
+
export type ContextWorkspaceFileInput = {
|
|
3
|
+
fileId: string;
|
|
4
|
+
filename?: string;
|
|
5
|
+
mediaType?: string;
|
|
6
|
+
role?: ContextWorkspaceFileRole;
|
|
7
|
+
sourceEventId?: string;
|
|
8
|
+
sourcePartIndex?: number;
|
|
9
|
+
};
|
|
10
|
+
export type PreparedContextWorkspaceFile = {
|
|
11
|
+
fileId: string;
|
|
12
|
+
filename: string;
|
|
13
|
+
mediaType?: string;
|
|
14
|
+
role: ContextWorkspaceFileRole;
|
|
15
|
+
path: string;
|
|
16
|
+
sourceEventId?: string;
|
|
17
|
+
sourcePartIndex?: number;
|
|
18
|
+
};
|
|
19
|
+
export type PreparedContextExecutionWorkspace = {
|
|
20
|
+
contextId: string;
|
|
21
|
+
executionId: string;
|
|
22
|
+
sandboxId: string;
|
|
23
|
+
root: string;
|
|
24
|
+
contextRoot: string;
|
|
25
|
+
eventsDir: string;
|
|
26
|
+
resourcesDir: string;
|
|
27
|
+
outputDir: string;
|
|
28
|
+
scriptsDir: string;
|
|
29
|
+
tmpDir: string;
|
|
30
|
+
manifestPath: string;
|
|
31
|
+
files: PreparedContextWorkspaceFile[];
|
|
32
|
+
};
|
|
33
|
+
export declare function sanitizeContextWorkspacePathSegment(value: string, fallback: string): string;
|
|
34
|
+
export declare function getContextWorkspaceBase(): string;
|
|
35
|
+
export declare function getContextExecutionWorkspaceRoot(params: {
|
|
36
|
+
contextId: string;
|
|
37
|
+
executionId: string;
|
|
38
|
+
root?: string;
|
|
39
|
+
}): string;
|
|
40
|
+
export declare function getContextWorkspaceRoot(params: {
|
|
41
|
+
contextId: string;
|
|
42
|
+
root?: string;
|
|
43
|
+
}): string;
|
|
44
|
+
export declare function getContextEventsDir(params: {
|
|
45
|
+
contextId: string;
|
|
46
|
+
root?: string;
|
|
47
|
+
}): string;
|
|
48
|
+
export declare function getContextResourcesDir(params: {
|
|
49
|
+
contextId: string;
|
|
50
|
+
root?: string;
|
|
51
|
+
}): string;
|
|
52
|
+
export declare function getContextExecutionWorkspaceDirs(params: {
|
|
53
|
+
contextId: string;
|
|
54
|
+
executionId: string;
|
|
55
|
+
root?: string;
|
|
56
|
+
}): {
|
|
57
|
+
root: string;
|
|
58
|
+
contextRoot: string;
|
|
59
|
+
eventsDir: string;
|
|
60
|
+
resourcesDir: string;
|
|
61
|
+
outputDir: string;
|
|
62
|
+
scriptsDir: string;
|
|
63
|
+
tmpDir: string;
|
|
64
|
+
manifestPath: string;
|
|
65
|
+
};
|
|
66
|
+
export declare function getContextExecutionWorkspaceStandardDirs(params: {
|
|
67
|
+
contextId: string;
|
|
68
|
+
executionId: string;
|
|
69
|
+
root?: string;
|
|
70
|
+
}): string[];
|
|
71
|
+
export declare function extractContextWorkspaceFilesFromEventItems(eventItems: unknown[]): ContextWorkspaceFileInput[];
|
|
72
|
+
export declare function prepareContextExecutionWorkspaceStep(params: {
|
|
73
|
+
runtime: any;
|
|
74
|
+
sandboxId: string;
|
|
75
|
+
contextId: string;
|
|
76
|
+
executionId: string;
|
|
77
|
+
files: ContextWorkspaceFileInput[];
|
|
78
|
+
root?: string;
|
|
79
|
+
}): Promise<PreparedContextExecutionWorkspace>;
|