@ekairos/dataset 1.22.85-beta.development.0 → 1.22.87-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/context.d.ts +8 -0
- package/dist/builder/context.js +68 -9
- package/dist/builder/instructions.js +3 -2
- package/dist/builder/materialize.js +11 -25
- package/dist/builder/types.d.ts +2 -1
- package/dist/completeDataset.steps.d.ts +29 -0
- package/dist/completeDataset.steps.js +32 -1
- package/dist/completeDataset.tool.d.ts +41 -0
- package/dist/completeDataset.tool.js +6 -3
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +7 -0
- package/dist/contextWorkspace.js +17 -1
- package/dist/dataset/steps.js +12 -0
- package/dist/dataset.js +1 -0
- package/dist/executeCommand.tool.d.ts +1 -4
- package/dist/executeCommand.tool.js +113 -31
- package/dist/sandbox/steps.js +4 -2
- package/dist/service.d.ts +4 -0
- package/dist/service.js +59 -2
- package/dist/transform/prompts.js +37 -21
- package/dist/transform/transform-dataset.agent.d.ts +1 -0
- package/dist/transform/transform-dataset.agent.js +25 -25
- package/dist/transform/transform-dataset.types.d.ts +4 -1
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +4 -4
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import { tool } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { validateRows } from "./builder/schemaInference.js";
|
|
4
|
+
import { getDatasetRuntimeDb } from "./dataset/steps.js";
|
|
5
|
+
import { DatasetService } from "./service.js";
|
|
6
|
+
const rowSchema = z.record(z.string(), z.any());
|
|
7
|
+
function normalizeJsonSchema(schemaInput) {
|
|
8
|
+
if (!schemaInput || typeof schemaInput !== "object")
|
|
9
|
+
return null;
|
|
10
|
+
return schemaInput.schema && typeof schemaInput.schema === "object"
|
|
11
|
+
? schemaInput.schema
|
|
12
|
+
: schemaInput;
|
|
13
|
+
}
|
|
14
|
+
function applyDescription(schema, jsonSchema) {
|
|
15
|
+
const description = typeof jsonSchema?.description === "string"
|
|
16
|
+
? jsonSchema.description.trim()
|
|
17
|
+
: "";
|
|
18
|
+
return description ? schema.describe(description) : schema;
|
|
19
|
+
}
|
|
20
|
+
function unionSchemas(schemas) {
|
|
21
|
+
if (schemas.length === 0)
|
|
22
|
+
return z.any();
|
|
23
|
+
if (schemas.length === 1)
|
|
24
|
+
return schemas[0];
|
|
25
|
+
return z.union(schemas);
|
|
26
|
+
}
|
|
27
|
+
function literalSchema(value) {
|
|
28
|
+
if (typeof value === "string" ||
|
|
29
|
+
typeof value === "number" ||
|
|
30
|
+
typeof value === "boolean" ||
|
|
31
|
+
value === null) {
|
|
32
|
+
return z.literal(value);
|
|
33
|
+
}
|
|
34
|
+
return z.any();
|
|
35
|
+
}
|
|
36
|
+
function zodFromJsonSchema(jsonSchema) {
|
|
37
|
+
if (!jsonSchema || typeof jsonSchema !== "object")
|
|
38
|
+
return z.any();
|
|
39
|
+
if (Array.isArray(jsonSchema.enum) && jsonSchema.enum.length > 0) {
|
|
40
|
+
return applyDescription(unionSchemas(jsonSchema.enum.map((value) => literalSchema(value))), jsonSchema);
|
|
41
|
+
}
|
|
42
|
+
if (Array.isArray(jsonSchema.anyOf) && jsonSchema.anyOf.length > 0) {
|
|
43
|
+
return applyDescription(unionSchemas(jsonSchema.anyOf.map((entry) => zodFromJsonSchema(entry))), jsonSchema);
|
|
44
|
+
}
|
|
45
|
+
if (Array.isArray(jsonSchema.oneOf) && jsonSchema.oneOf.length > 0) {
|
|
46
|
+
return applyDescription(unionSchemas(jsonSchema.oneOf.map((entry) => zodFromJsonSchema(entry))), jsonSchema);
|
|
47
|
+
}
|
|
48
|
+
if (Array.isArray(jsonSchema.type)) {
|
|
49
|
+
const nonNullTypes = jsonSchema.type.filter((type) => type !== "null");
|
|
50
|
+
const schemas = nonNullTypes.map((type) => zodFromJsonSchema({ ...jsonSchema, type }));
|
|
51
|
+
if (jsonSchema.type.includes("null")) {
|
|
52
|
+
schemas.push(z.null());
|
|
53
|
+
}
|
|
54
|
+
return applyDescription(unionSchemas(schemas), jsonSchema);
|
|
55
|
+
}
|
|
56
|
+
switch (jsonSchema.type) {
|
|
57
|
+
case "object": {
|
|
58
|
+
const properties = jsonSchema.properties && typeof jsonSchema.properties === "object"
|
|
59
|
+
? jsonSchema.properties
|
|
60
|
+
: {};
|
|
61
|
+
const required = new Set(Array.isArray(jsonSchema.required)
|
|
62
|
+
? jsonSchema.required.map((key) => String(key))
|
|
63
|
+
: []);
|
|
64
|
+
const shape = {};
|
|
65
|
+
for (const [key, propertySchema] of Object.entries(properties)) {
|
|
66
|
+
const property = zodFromJsonSchema(propertySchema);
|
|
67
|
+
shape[key] = required.has(key) ? property : property.optional();
|
|
68
|
+
}
|
|
69
|
+
const objectSchema = z.object(shape);
|
|
70
|
+
return applyDescription(jsonSchema.additionalProperties === false
|
|
71
|
+
? objectSchema.strict()
|
|
72
|
+
: objectSchema.passthrough(), jsonSchema);
|
|
73
|
+
}
|
|
74
|
+
case "array":
|
|
75
|
+
return applyDescription(z.array(zodFromJsonSchema(jsonSchema.items)), jsonSchema);
|
|
76
|
+
case "integer":
|
|
77
|
+
return applyDescription(z.number().int(), jsonSchema);
|
|
78
|
+
case "number":
|
|
79
|
+
return applyDescription(z.number(), jsonSchema);
|
|
80
|
+
case "boolean":
|
|
81
|
+
return applyDescription(z.boolean(), jsonSchema);
|
|
82
|
+
case "null":
|
|
83
|
+
return applyDescription(z.null(), jsonSchema);
|
|
84
|
+
case "string":
|
|
85
|
+
return applyDescription(z.string(), jsonSchema);
|
|
86
|
+
default:
|
|
87
|
+
return applyDescription(z.any(), jsonSchema);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
function createToolRowSchema(schemaInput) {
|
|
91
|
+
const jsonSchema = normalizeJsonSchema(schemaInput);
|
|
92
|
+
if (!jsonSchema)
|
|
93
|
+
return rowSchema;
|
|
94
|
+
return zodFromJsonSchema(jsonSchema);
|
|
95
|
+
}
|
|
96
|
+
export async function persistDatasetRowsStep(params) {
|
|
97
|
+
"use step";
|
|
98
|
+
const db = await getDatasetRuntimeDb(params.runtime);
|
|
99
|
+
const service = new DatasetService(db);
|
|
100
|
+
const datasetResult = await service.getDatasetById(params.datasetId);
|
|
101
|
+
if (!datasetResult.ok) {
|
|
102
|
+
return {
|
|
103
|
+
success: false,
|
|
104
|
+
status: "dataset_not_found",
|
|
105
|
+
rowSource: "direct",
|
|
106
|
+
outputPath: null,
|
|
107
|
+
storagePath: null,
|
|
108
|
+
error: datasetResult.error,
|
|
109
|
+
message: datasetResult.error,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
const schema = datasetResult.data?.schema;
|
|
113
|
+
if (!schema) {
|
|
114
|
+
return {
|
|
115
|
+
success: false,
|
|
116
|
+
status: "schema_missing",
|
|
117
|
+
rowSource: "direct",
|
|
118
|
+
outputPath: null,
|
|
119
|
+
storagePath: null,
|
|
120
|
+
error: "Schema not found in database. Please generate schema first.",
|
|
121
|
+
message: "Schema not found in database. Please generate schema first.",
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
try {
|
|
125
|
+
validateRows(params.rows, schema);
|
|
126
|
+
}
|
|
127
|
+
catch (error) {
|
|
128
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
129
|
+
return {
|
|
130
|
+
success: false,
|
|
131
|
+
status: "validation_failed",
|
|
132
|
+
rowSource: "direct",
|
|
133
|
+
outputPath: null,
|
|
134
|
+
storagePath: null,
|
|
135
|
+
validRows: 0,
|
|
136
|
+
rowRecordCount: params.rows.length,
|
|
137
|
+
error: message,
|
|
138
|
+
message,
|
|
139
|
+
repairInstructions: [
|
|
140
|
+
"Return rows using exactly the dataset output schema property names.",
|
|
141
|
+
"Populate all required fields and use only allowed enum values.",
|
|
142
|
+
],
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
const clearResult = await service.clearDataset(params.datasetId);
|
|
146
|
+
if (!clearResult.ok) {
|
|
147
|
+
return {
|
|
148
|
+
success: false,
|
|
149
|
+
status: "clear_failed",
|
|
150
|
+
rowSource: "direct",
|
|
151
|
+
outputPath: null,
|
|
152
|
+
storagePath: null,
|
|
153
|
+
error: clearResult.error,
|
|
154
|
+
message: clearResult.error,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
const saved = await service.addDatasetRecords({
|
|
158
|
+
datasetId: params.datasetId,
|
|
159
|
+
records: params.rows.map((row, order) => ({ rowContent: row, order })),
|
|
160
|
+
});
|
|
161
|
+
if (!saved.ok) {
|
|
162
|
+
return {
|
|
163
|
+
success: false,
|
|
164
|
+
status: "record_save_failed",
|
|
165
|
+
rowSource: "direct",
|
|
166
|
+
outputPath: null,
|
|
167
|
+
storagePath: null,
|
|
168
|
+
error: saved.error,
|
|
169
|
+
message: saved.error,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
const statusResult = await service.updateDatasetStatus({
|
|
173
|
+
datasetId: params.datasetId,
|
|
174
|
+
status: "completed",
|
|
175
|
+
calculatedTotalRows: params.rows.length,
|
|
176
|
+
actualGeneratedRowCount: params.rows.length,
|
|
177
|
+
});
|
|
178
|
+
if (!statusResult.ok) {
|
|
179
|
+
return {
|
|
180
|
+
success: false,
|
|
181
|
+
status: "status_update_failed",
|
|
182
|
+
rowSource: "direct",
|
|
183
|
+
outputPath: null,
|
|
184
|
+
storagePath: null,
|
|
185
|
+
validRows: params.rows.length,
|
|
186
|
+
rowRecordCount: params.rows.length,
|
|
187
|
+
savedRecords: saved.data.savedCount,
|
|
188
|
+
error: statusResult.error,
|
|
189
|
+
message: statusResult.error,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
return {
|
|
193
|
+
success: true,
|
|
194
|
+
status: "completed",
|
|
195
|
+
rowSource: "direct",
|
|
196
|
+
outputPath: null,
|
|
197
|
+
storagePath: null,
|
|
198
|
+
records: params.rows.length,
|
|
199
|
+
savedRecords: saved.data.savedCount,
|
|
200
|
+
summary: params.summary,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
export function createReplaceRowsTool(params) {
|
|
204
|
+
const toolRowSchema = createToolRowSchema(params.schema);
|
|
205
|
+
const inputSchema = z.object({
|
|
206
|
+
rows: z
|
|
207
|
+
.array(toolRowSchema)
|
|
208
|
+
.min(1)
|
|
209
|
+
.describe("Output dataset rows. Each row must match the dataset output schema exactly."),
|
|
210
|
+
summary: z
|
|
211
|
+
.string()
|
|
212
|
+
.optional()
|
|
213
|
+
.describe("Short summary of the completed dataset including record count and structure."),
|
|
214
|
+
});
|
|
215
|
+
return tool({
|
|
216
|
+
description: "Replace the dataset output with rows provided directly as JSON objects, then validate and complete the dataset.",
|
|
217
|
+
inputSchema: inputSchema,
|
|
218
|
+
execute: async ({ rows, summary }) => {
|
|
219
|
+
const finalSummary = summary || `Completed dataset with ${rows.length} rows.`;
|
|
220
|
+
console.log(`[Dataset ${params.datasetId}] ========================================`);
|
|
221
|
+
console.log(`[Dataset ${params.datasetId}] Tool: replaceRows`);
|
|
222
|
+
console.log(`[Dataset ${params.datasetId}] Rows: ${rows.length}`);
|
|
223
|
+
console.log(`[Dataset ${params.datasetId}] Summary: ${finalSummary}`);
|
|
224
|
+
console.log(`[Dataset ${params.datasetId}] ========================================`);
|
|
225
|
+
return await persistDatasetRowsStep({
|
|
226
|
+
...params,
|
|
227
|
+
rows,
|
|
228
|
+
summary: finalSummary,
|
|
229
|
+
});
|
|
230
|
+
},
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
export function createCompleteObjectTool(params) {
|
|
234
|
+
const toolRowSchema = createToolRowSchema(params.schema);
|
|
235
|
+
const inputSchema = z.object({
|
|
236
|
+
data: toolRowSchema.describe("Required final object. It must match the dataset output schema exactly. This field is mandatory; do not omit it."),
|
|
237
|
+
summary: z
|
|
238
|
+
.string()
|
|
239
|
+
.optional()
|
|
240
|
+
.describe("Short summary of the completed object and why it satisfies the output schema."),
|
|
241
|
+
});
|
|
242
|
+
return tool({
|
|
243
|
+
description: "Complete an object-mode dataset by providing the final object directly. This writes one dataset row, validates it against the output schema, and completes the dataset. Do not call this tool until the final data object is fully constructed. Never call completeObject with only a summary.",
|
|
244
|
+
inputSchema: inputSchema,
|
|
245
|
+
execute: async ({ data, summary }) => {
|
|
246
|
+
const finalSummary = summary || "Completed object dataset.";
|
|
247
|
+
console.log(`[Dataset ${params.datasetId}] ========================================`);
|
|
248
|
+
console.log(`[Dataset ${params.datasetId}] Tool: completeObject`);
|
|
249
|
+
console.log(`[Dataset ${params.datasetId}] Summary: ${finalSummary}`);
|
|
250
|
+
console.log(`[Dataset ${params.datasetId}] ========================================`);
|
|
251
|
+
return await persistDatasetRowsStep({
|
|
252
|
+
...params,
|
|
253
|
+
rows: [data],
|
|
254
|
+
summary: finalSummary,
|
|
255
|
+
});
|
|
256
|
+
},
|
|
257
|
+
});
|
|
258
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.87-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.87-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.87-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.87-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|