@ekairos/dataset 1.22.46-beta.development.0 → 1.22.47-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/instructions.d.ts +1 -2
- package/dist/builder/instructions.js +4 -11
- package/dist/builder/materialize.d.ts +1 -2
- package/dist/builder/materialize.js +30 -36
- package/dist/builder/persistence.d.ts +1 -2
- package/dist/builder/persistence.js +17 -26
- package/dist/builder/schemaInference.d.ts +1 -2
- package/dist/builder/schemaInference.js +4 -12
- package/dist/builder/sourceRows.d.ts +0 -1
- package/dist/builder/sourceRows.js +3 -9
- package/dist/builder/types.d.ts +1 -2
- package/dist/builder/types.js +1 -3
- package/dist/clearDataset.tool.d.ts +0 -1
- package/dist/clearDataset.tool.js +12 -16
- package/dist/completeDataset.tool.d.ts +0 -1
- package/dist/completeDataset.tool.js +18 -25
- package/dist/dataset/steps.d.ts +5 -6
- package/dist/dataset/steps.js +25 -36
- package/dist/dataset.d.ts +2 -3
- package/dist/dataset.js +13 -17
- package/dist/datasetFiles.d.ts +0 -1
- package/dist/datasetFiles.js +5 -12
- package/dist/domain.d.ts +1 -2
- package/dist/domain.js +1 -6
- package/dist/eventsReactRuntime.d.ts +0 -1
- package/dist/eventsReactRuntime.js +9 -13
- package/dist/executeCommand.tool.d.ts +0 -1
- package/dist/executeCommand.tool.js +12 -16
- package/dist/file/file-dataset.agent.d.ts +1 -2
- package/dist/file/file-dataset.agent.js +34 -38
- package/dist/file/filepreview.d.ts +0 -1
- package/dist/file/filepreview.js +12 -16
- package/dist/file/generateSchema.tool.d.ts +0 -1
- package/dist/file/generateSchema.tool.js +10 -14
- package/dist/file/index.d.ts +1 -2
- package/dist/file/index.js +1 -18
- package/dist/file/prompts.d.ts +1 -2
- package/dist/file/prompts.js +13 -17
- package/dist/file/steps.d.ts +0 -1
- package/dist/file/steps.js +3 -7
- package/dist/index.d.ts +6 -7
- package/dist/index.js +6 -23
- package/dist/materializeDataset.tool.d.ts +1 -2
- package/dist/materializeDataset.tool.js +40 -45
- package/dist/query/index.d.ts +1 -2
- package/dist/query/index.js +1 -18
- package/dist/query/queryDomain.d.ts +1 -2
- package/dist/query/queryDomain.js +2 -39
- package/dist/query/queryDomain.step.d.ts +0 -1
- package/dist/query/queryDomain.step.js +8 -12
- package/dist/sandbox/steps.d.ts +0 -1
- package/dist/sandbox/steps.js +30 -74
- package/dist/sandbox.steps.d.ts +1 -2
- package/dist/sandbox.steps.js +1 -18
- package/dist/schema.d.ts +0 -1
- package/dist/schema.js +22 -26
- package/dist/service.d.ts +1 -2
- package/dist/service.js +5 -10
- package/dist/skill.d.ts +0 -1
- package/dist/skill.js +12 -17
- package/dist/transform/filepreview.d.ts +0 -1
- package/dist/transform/filepreview.js +6 -10
- package/dist/transform/index.d.ts +2 -3
- package/dist/transform/index.js +2 -8
- package/dist/transform/prompts.d.ts +0 -1
- package/dist/transform/prompts.js +12 -16
- package/dist/transform/transform-dataset.agent.d.ts +1 -2
- package/dist/transform/transform-dataset.agent.js +31 -35
- package/dist/transform/transformDataset.d.ts +0 -1
- package/dist/transform/transformDataset.js +7 -11
- package/package.json +12 -7
- package/dist/builder/instructions.d.ts.map +0 -1
- package/dist/builder/instructions.js.map +0 -1
- package/dist/builder/materialize.d.ts.map +0 -1
- package/dist/builder/materialize.js.map +0 -1
- package/dist/builder/persistence.d.ts.map +0 -1
- package/dist/builder/persistence.js.map +0 -1
- package/dist/builder/schemaInference.d.ts.map +0 -1
- package/dist/builder/schemaInference.js.map +0 -1
- package/dist/builder/sourceRows.d.ts.map +0 -1
- package/dist/builder/sourceRows.js.map +0 -1
- package/dist/builder/types.d.ts.map +0 -1
- package/dist/builder/types.js.map +0 -1
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeDataset.tool.d.ts.map +0 -1
- package/dist/completeDataset.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/dataset.d.ts.map +0 -1
- package/dist/dataset.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/eventsReactRuntime.d.ts.map +0 -1
- package/dist/eventsReactRuntime.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/file-dataset.agent.d.ts.map +0 -1
- package/dist/file/file-dataset.agent.js.map +0 -1
- package/dist/file/filepreview.d.ts.map +0 -1
- package/dist/file/filepreview.js.map +0 -1
- package/dist/file/generateSchema.tool.d.ts.map +0 -1
- package/dist/file/generateSchema.tool.js.map +0 -1
- package/dist/file/index.d.ts.map +0 -1
- package/dist/file/index.js.map +0 -1
- package/dist/file/prompts.d.ts.map +0 -1
- package/dist/file/prompts.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/materializeDataset.tool.d.ts.map +0 -1
- package/dist/materializeDataset.tool.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/queryDomain.d.ts.map +0 -1
- package/dist/query/queryDomain.js.map +0 -1
- package/dist/query/queryDomain.step.d.ts.map +0 -1
- package/dist/query/queryDomain.step.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/sandbox.steps.d.ts.map +0 -1
- package/dist/sandbox.steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/skill.d.ts.map +0 -1
- package/dist/skill.js.map +0 -1
- package/dist/transform/filepreview.d.ts.map +0 -1
- package/dist/transform/filepreview.js.map +0 -1
- package/dist/transform/index.d.ts.map +0 -1
- package/dist/transform/index.js.map +0 -1
- package/dist/transform/prompts.d.ts.map +0 -1
- package/dist/transform/prompts.js.map +0 -1
- package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
- package/dist/transform/transform-dataset.agent.js.map +0 -1
- package/dist/transform/transformDataset.d.ts.map +0 -1
- package/dist/transform/transformDataset.js.map +0 -1
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import type { DatasetSchemaInput } from "./types";
|
|
1
|
+
import type { DatasetSchemaInput } from "./types.js";
|
|
2
2
|
export declare function buildFileDefaultInstructions(schema?: DatasetSchemaInput): "Create a dataset from the source file and ensure each output row matches the provided dataset schema exactly." | "Create a dataset representing the source content as structured rows.";
|
|
3
3
|
export declare function buildRawSourceInstructions(sourceKind: "file" | "text"): "Create a dataset representing the raw text content as structured rows without applying business transformations." | "Create a dataset representing the raw file content as structured rows without applying business transformations.";
|
|
4
4
|
export declare function buildTransformInstructions(sourceCount: number, userInstructions?: string, schema?: DatasetSchemaInput): string;
|
|
5
5
|
export declare function buildObjectOutputInstructions(userInstructions?: string): string;
|
|
6
|
-
//# sourceMappingURL=instructions.d.ts.map
|
|
@@ -1,22 +1,16 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.buildFileDefaultInstructions = buildFileDefaultInstructions;
|
|
4
|
-
exports.buildRawSourceInstructions = buildRawSourceInstructions;
|
|
5
|
-
exports.buildTransformInstructions = buildTransformInstructions;
|
|
6
|
-
exports.buildObjectOutputInstructions = buildObjectOutputInstructions;
|
|
7
|
-
function buildFileDefaultInstructions(schema) {
|
|
1
|
+
export function buildFileDefaultInstructions(schema) {
|
|
8
2
|
if (schema) {
|
|
9
3
|
return "Create a dataset from the source file and ensure each output row matches the provided dataset schema exactly.";
|
|
10
4
|
}
|
|
11
5
|
return "Create a dataset representing the source content as structured rows.";
|
|
12
6
|
}
|
|
13
|
-
function buildRawSourceInstructions(sourceKind) {
|
|
7
|
+
export function buildRawSourceInstructions(sourceKind) {
|
|
14
8
|
if (sourceKind === "text") {
|
|
15
9
|
return "Create a dataset representing the raw text content as structured rows without applying business transformations.";
|
|
16
10
|
}
|
|
17
11
|
return "Create a dataset representing the raw file content as structured rows without applying business transformations.";
|
|
18
12
|
}
|
|
19
|
-
function buildTransformInstructions(sourceCount, userInstructions, schema) {
|
|
13
|
+
export function buildTransformInstructions(sourceCount, userInstructions, schema) {
|
|
20
14
|
const explicit = String(userInstructions ?? "").trim();
|
|
21
15
|
if (explicit)
|
|
22
16
|
return explicit;
|
|
@@ -31,7 +25,7 @@ function buildTransformInstructions(sourceCount, userInstructions, schema) {
|
|
|
31
25
|
}
|
|
32
26
|
return "Transform the source dataset into a new useful dataset.";
|
|
33
27
|
}
|
|
34
|
-
function buildObjectOutputInstructions(userInstructions) {
|
|
28
|
+
export function buildObjectOutputInstructions(userInstructions) {
|
|
35
29
|
const base = String(userInstructions ?? "").trim();
|
|
36
30
|
const objectContract = [
|
|
37
31
|
"Output mode is object.",
|
|
@@ -43,4 +37,3 @@ function buildObjectOutputInstructions(userInstructions) {
|
|
|
43
37
|
return objectContract;
|
|
44
38
|
return [base, "", objectContract].join("\n");
|
|
45
39
|
}
|
|
46
|
-
//# sourceMappingURL=instructions.js.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalSource } from "./types";
|
|
1
|
+
import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalSource } from "./types.js";
|
|
2
2
|
export declare function materializeQuerySource<Runtime extends AnyDatasetRuntime>(runtime: DatasetBuilderState<Runtime>["runtime"], source: Extract<InternalSource, {
|
|
3
3
|
kind: "query";
|
|
4
4
|
}>, params: {
|
|
@@ -13,4 +13,3 @@ export declare function materializeSingleFileLikeSource<Runtime extends AnyDatas
|
|
|
13
13
|
kind: "file" | "text";
|
|
14
14
|
}>, targetDatasetId: string): Promise<string>;
|
|
15
15
|
export declare function materializeDerivedDataset<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, targetDatasetId: string): Promise<string>;
|
|
16
|
-
//# sourceMappingURL=materialize.d.ts.map
|
|
@@ -1,24 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
const transform_dataset_agent_1 = require("../transform/transform-dataset.agent");
|
|
9
|
-
const instructions_1 = require("./instructions");
|
|
10
|
-
const persistence_1 = require("./persistence");
|
|
11
|
-
const schemaInference_1 = require("./schemaInference");
|
|
12
|
-
const sourceRows_1 = require("./sourceRows");
|
|
1
|
+
import { createFileParseStory } from "../file/file-dataset.agent.js";
|
|
2
|
+
import { DatasetService } from "../service.js";
|
|
3
|
+
import { createTransformDatasetStory } from "../transform/transform-dataset.agent.js";
|
|
4
|
+
import { buildFileDefaultInstructions, buildRawSourceInstructions, buildTransformInstructions, } from "./instructions.js";
|
|
5
|
+
import { createOrUpdateDatasetMetadata, getDatasetDb, materializeRowsToDataset, uploadInlineTextSource, } from "./persistence.js";
|
|
6
|
+
import { inferDatasetSchema } from "./schemaInference.js";
|
|
7
|
+
import { getDomainDescriptor, normalizeQueryRows } from "./sourceRows.js";
|
|
13
8
|
function makeIntermediateDatasetId(targetDatasetId, sourceKind, index) {
|
|
14
9
|
return `${targetDatasetId}__${sourceKind}_${index}`;
|
|
15
10
|
}
|
|
16
|
-
async function materializeQuerySource(runtime, source, params) {
|
|
11
|
+
export async function materializeQuerySource(runtime, source, params) {
|
|
17
12
|
const scoped = await runtime.use(source.domain);
|
|
18
13
|
const result = await scoped.db.query(source.query);
|
|
19
|
-
const rows =
|
|
20
|
-
const domainDescriptor =
|
|
21
|
-
return await
|
|
14
|
+
const rows = normalizeQueryRows(result);
|
|
15
|
+
const domainDescriptor = getDomainDescriptor(source.domain);
|
|
16
|
+
return await materializeRowsToDataset(runtime, {
|
|
22
17
|
datasetId: params.datasetId,
|
|
23
18
|
sandboxId: params.sandboxId,
|
|
24
19
|
title: params.title ?? source.title,
|
|
@@ -44,7 +39,7 @@ async function materializeQuerySource(runtime, source, params) {
|
|
|
44
39
|
first: params.first,
|
|
45
40
|
});
|
|
46
41
|
}
|
|
47
|
-
async function materializeSingleFileLikeSource(state, source, targetDatasetId) {
|
|
42
|
+
export async function materializeSingleFileLikeSource(state, source, targetDatasetId) {
|
|
48
43
|
if (!state.reactor) {
|
|
49
44
|
throw new Error("dataset_reactor_required");
|
|
50
45
|
}
|
|
@@ -53,8 +48,8 @@ async function materializeSingleFileLikeSource(state, source, targetDatasetId) {
|
|
|
53
48
|
}
|
|
54
49
|
const fileId = source.kind === "file"
|
|
55
50
|
? source.fileId
|
|
56
|
-
: await
|
|
57
|
-
await
|
|
51
|
+
: await uploadInlineTextSource(state.runtime, targetDatasetId, source);
|
|
52
|
+
await createOrUpdateDatasetMetadata(state.runtime, {
|
|
58
53
|
datasetId: targetDatasetId,
|
|
59
54
|
sandboxId: state.sandboxId,
|
|
60
55
|
title: state.title ?? targetDatasetId,
|
|
@@ -73,21 +68,21 @@ async function materializeSingleFileLikeSource(state, source, targetDatasetId) {
|
|
|
73
68
|
schema: state.outputSchema,
|
|
74
69
|
status: "building",
|
|
75
70
|
});
|
|
76
|
-
const parseStory =
|
|
71
|
+
const parseStory = createFileParseStory(fileId, {
|
|
77
72
|
datasetId: targetDatasetId,
|
|
78
|
-
instructions: state.instructions ??
|
|
73
|
+
instructions: state.instructions ?? buildFileDefaultInstructions(state.outputSchema),
|
|
79
74
|
reactor: state.reactor,
|
|
80
75
|
sandboxId: state.sandboxId,
|
|
81
76
|
});
|
|
82
77
|
await parseStory.parse(state.env);
|
|
83
78
|
if (!state.outputSchema) {
|
|
84
|
-
const db = await
|
|
85
|
-
const service = new
|
|
79
|
+
const db = await getDatasetDb(state.runtime);
|
|
80
|
+
const service = new DatasetService(db);
|
|
86
81
|
const readResult = await service.readRows({ datasetId: targetDatasetId, cursor: 0, limit: 1000 });
|
|
87
82
|
if (!readResult.ok) {
|
|
88
83
|
throw new Error(readResult.error);
|
|
89
84
|
}
|
|
90
|
-
const inferred =
|
|
85
|
+
const inferred = inferDatasetSchema(readResult.data.rows, `${targetDatasetId}Row`, "One dataset row");
|
|
91
86
|
const updateResult = await service.updateDatasetSchema({
|
|
92
87
|
datasetId: targetDatasetId,
|
|
93
88
|
schema: inferred,
|
|
@@ -98,8 +93,8 @@ async function materializeSingleFileLikeSource(state, source, targetDatasetId) {
|
|
|
98
93
|
}
|
|
99
94
|
}
|
|
100
95
|
if (state.first) {
|
|
101
|
-
const db = await
|
|
102
|
-
const service = new
|
|
96
|
+
const db = await getDatasetDb(state.runtime);
|
|
97
|
+
const service = new DatasetService(db);
|
|
103
98
|
const firstResult = await service.readOne(targetDatasetId);
|
|
104
99
|
if (!firstResult.ok) {
|
|
105
100
|
throw new Error(firstResult.error);
|
|
@@ -125,11 +120,11 @@ async function normalizeSourceToDatasetId(state, source, targetDatasetId, source
|
|
|
125
120
|
...state,
|
|
126
121
|
outputSchema: undefined,
|
|
127
122
|
first: false,
|
|
128
|
-
instructions:
|
|
123
|
+
instructions: buildRawSourceInstructions(source.kind),
|
|
129
124
|
}, source, intermediateDatasetId);
|
|
130
125
|
return intermediateDatasetId;
|
|
131
126
|
}
|
|
132
|
-
async function materializeDerivedDataset(state, targetDatasetId) {
|
|
127
|
+
export async function materializeDerivedDataset(state, targetDatasetId) {
|
|
133
128
|
if (!state.reactor) {
|
|
134
129
|
throw new Error("dataset_reactor_required");
|
|
135
130
|
}
|
|
@@ -150,7 +145,7 @@ async function materializeDerivedDataset(state, targetDatasetId) {
|
|
|
150
145
|
properties: {},
|
|
151
146
|
},
|
|
152
147
|
};
|
|
153
|
-
await
|
|
148
|
+
await createOrUpdateDatasetMetadata(state.runtime, {
|
|
154
149
|
datasetId: targetDatasetId,
|
|
155
150
|
sandboxId: state.sandboxId,
|
|
156
151
|
title: state.title ?? targetDatasetId,
|
|
@@ -161,30 +156,30 @@ async function materializeDerivedDataset(state, targetDatasetId) {
|
|
|
161
156
|
query: source.query,
|
|
162
157
|
title: source.title,
|
|
163
158
|
explanation: source.explanation,
|
|
164
|
-
...
|
|
159
|
+
...getDomainDescriptor(source.domain),
|
|
165
160
|
}
|
|
166
161
|
: source),
|
|
167
162
|
sourceKinds: state.sources.map((source) => source.kind),
|
|
168
163
|
schema: transformSchema,
|
|
169
164
|
status: "building",
|
|
170
165
|
});
|
|
171
|
-
const transformStory =
|
|
166
|
+
const transformStory = createTransformDatasetStory({
|
|
172
167
|
sourceDatasetIds: normalizedSources,
|
|
173
168
|
outputSchema: transformSchema,
|
|
174
|
-
instructions:
|
|
169
|
+
instructions: buildTransformInstructions(normalizedSources.length, state.instructions, state.outputSchema),
|
|
175
170
|
datasetId: targetDatasetId,
|
|
176
171
|
reactor: state.reactor,
|
|
177
172
|
sandboxId: state.sandboxId,
|
|
178
173
|
});
|
|
179
174
|
await transformStory.transform(state.env);
|
|
180
|
-
const db = await
|
|
181
|
-
const service = new
|
|
175
|
+
const db = await getDatasetDb(state.runtime);
|
|
176
|
+
const service = new DatasetService(db);
|
|
182
177
|
if (!state.outputSchema) {
|
|
183
178
|
const readResult = await service.readRows({ datasetId: targetDatasetId, cursor: 0, limit: 1000 });
|
|
184
179
|
if (!readResult.ok) {
|
|
185
180
|
throw new Error(readResult.error);
|
|
186
181
|
}
|
|
187
|
-
const inferred =
|
|
182
|
+
const inferred = inferDatasetSchema(readResult.data.rows, `${targetDatasetId}Row`, "One dataset row");
|
|
188
183
|
const updateResult = await service.updateDatasetSchema({
|
|
189
184
|
datasetId: targetDatasetId,
|
|
190
185
|
schema: inferred,
|
|
@@ -202,4 +197,3 @@ async function materializeDerivedDataset(state, targetDatasetId) {
|
|
|
202
197
|
}
|
|
203
198
|
return targetDatasetId;
|
|
204
199
|
}
|
|
205
|
-
//# sourceMappingURL=materialize.js.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { AnyDatasetRuntime, DatasetBuildResult, DatasetTextSourceInput, MaterializeRowsParams } from "./types";
|
|
1
|
+
import type { AnyDatasetRuntime, DatasetBuildResult, DatasetTextSourceInput, MaterializeRowsParams } from "./types.js";
|
|
2
2
|
export declare function defaultTextSourceName(source: DatasetTextSourceInput): string;
|
|
3
3
|
export declare function getDatasetDb<Runtime extends AnyDatasetRuntime>(runtime: Runtime): Promise<any>;
|
|
4
4
|
export declare function createOrUpdateDatasetMetadata<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: {
|
|
@@ -15,4 +15,3 @@ export declare function createOrUpdateDatasetMetadata<Runtime extends AnyDataset
|
|
|
15
15
|
export declare function materializeRowsToDataset<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: MaterializeRowsParams): Promise<string>;
|
|
16
16
|
export declare function uploadInlineTextSource<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, source: DatasetTextSourceInput): Promise<string>;
|
|
17
17
|
export declare function finalizeBuildResult<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, withFirst: boolean): Promise<DatasetBuildResult>;
|
|
18
|
-
//# sourceMappingURL=persistence.d.ts.map
|
|
@@ -1,16 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
exports.materializeRowsToDataset = materializeRowsToDataset;
|
|
7
|
-
exports.uploadInlineTextSource = uploadInlineTextSource;
|
|
8
|
-
exports.finalizeBuildResult = finalizeBuildResult;
|
|
9
|
-
const service_1 = require("../service");
|
|
10
|
-
const schema_1 = require("../schema");
|
|
11
|
-
const schemaInference_1 = require("./schemaInference");
|
|
12
|
-
const sourceRows_1 = require("./sourceRows");
|
|
13
|
-
function defaultTextSourceName(source) {
|
|
1
|
+
import { DatasetService } from "../service.js";
|
|
2
|
+
import { datasetDomain } from "../schema.js";
|
|
3
|
+
import { inferDatasetSchema, validateRows } from "./schemaInference.js";
|
|
4
|
+
import { rowsToJsonl } from "./sourceRows.js";
|
|
5
|
+
export function defaultTextSourceName(source) {
|
|
14
6
|
if (source.name?.trim())
|
|
15
7
|
return source.name.trim();
|
|
16
8
|
const mimeType = String(source.mimeType ?? "").toLowerCase();
|
|
@@ -22,13 +14,13 @@ function defaultTextSourceName(source) {
|
|
|
22
14
|
return "source.yaml";
|
|
23
15
|
return "source.txt";
|
|
24
16
|
}
|
|
25
|
-
async function getDatasetDb(runtime) {
|
|
26
|
-
const scoped = await runtime.use(
|
|
17
|
+
export async function getDatasetDb(runtime) {
|
|
18
|
+
const scoped = await runtime.use(datasetDomain);
|
|
27
19
|
return scoped.db;
|
|
28
20
|
}
|
|
29
|
-
async function createOrUpdateDatasetMetadata(runtime, params) {
|
|
21
|
+
export async function createOrUpdateDatasetMetadata(runtime, params) {
|
|
30
22
|
const db = await getDatasetDb(runtime);
|
|
31
|
-
const service = new
|
|
23
|
+
const service = new DatasetService(db);
|
|
32
24
|
const result = await service.createDataset({
|
|
33
25
|
id: params.datasetId,
|
|
34
26
|
sandboxId: params.sandboxId,
|
|
@@ -45,13 +37,13 @@ async function createOrUpdateDatasetMetadata(runtime, params) {
|
|
|
45
37
|
throw new Error(result.error);
|
|
46
38
|
}
|
|
47
39
|
}
|
|
48
|
-
async function materializeRowsToDataset(runtime, params) {
|
|
40
|
+
export async function materializeRowsToDataset(runtime, params) {
|
|
49
41
|
if (params.first && params.rows.length > 1) {
|
|
50
42
|
throw new Error("dataset_first_expected_zero_or_one_row");
|
|
51
43
|
}
|
|
52
44
|
const resolvedSchema = params.schema ??
|
|
53
|
-
|
|
54
|
-
|
|
45
|
+
inferDatasetSchema(params.rows, params.title ? `${params.title}Row` : "DatasetRow", params.title ? `One row for ${params.title}` : "One dataset row");
|
|
46
|
+
validateRows(params.rows, resolvedSchema);
|
|
55
47
|
await createOrUpdateDatasetMetadata(runtime, {
|
|
56
48
|
datasetId: params.datasetId,
|
|
57
49
|
sandboxId: params.sandboxId,
|
|
@@ -64,10 +56,10 @@ async function materializeRowsToDataset(runtime, params) {
|
|
|
64
56
|
status: "building",
|
|
65
57
|
});
|
|
66
58
|
const db = await getDatasetDb(runtime);
|
|
67
|
-
const service = new
|
|
59
|
+
const service = new DatasetService(db);
|
|
68
60
|
const uploadResult = await service.uploadDatasetOutputFile({
|
|
69
61
|
datasetId: params.datasetId,
|
|
70
|
-
fileBuffer: Buffer.from(
|
|
62
|
+
fileBuffer: Buffer.from(rowsToJsonl(params.rows), "utf-8"),
|
|
71
63
|
});
|
|
72
64
|
if (!uploadResult.ok) {
|
|
73
65
|
throw new Error(uploadResult.error);
|
|
@@ -83,7 +75,7 @@ async function materializeRowsToDataset(runtime, params) {
|
|
|
83
75
|
}
|
|
84
76
|
return params.datasetId;
|
|
85
77
|
}
|
|
86
|
-
async function uploadInlineTextSource(runtime, datasetId, source) {
|
|
78
|
+
export async function uploadInlineTextSource(runtime, datasetId, source) {
|
|
87
79
|
const db = await getDatasetDb(runtime);
|
|
88
80
|
const fileName = defaultTextSourceName(source);
|
|
89
81
|
const storagePath = `/dataset/source/${datasetId}/${Date.now()}-${fileName}`;
|
|
@@ -97,9 +89,9 @@ async function uploadInlineTextSource(runtime, datasetId, source) {
|
|
|
97
89
|
}
|
|
98
90
|
return fileId;
|
|
99
91
|
}
|
|
100
|
-
async function finalizeBuildResult(runtime, datasetId, withFirst) {
|
|
92
|
+
export async function finalizeBuildResult(runtime, datasetId, withFirst) {
|
|
101
93
|
const db = await getDatasetDb(runtime);
|
|
102
|
-
const service = new
|
|
94
|
+
const service = new DatasetService(db);
|
|
103
95
|
const datasetResult = await service.getDatasetById(datasetId);
|
|
104
96
|
if (!datasetResult.ok) {
|
|
105
97
|
throw new Error(datasetResult.error);
|
|
@@ -144,4 +136,3 @@ async function finalizeBuildResult(runtime, datasetId, withFirst) {
|
|
|
144
136
|
firstRow: firstResult.data,
|
|
145
137
|
};
|
|
146
138
|
}
|
|
147
|
-
//# sourceMappingURL=persistence.js.map
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import type { DatasetSchemaInput } from "./types";
|
|
1
|
+
import type { DatasetSchemaInput } from "./types.js";
|
|
2
2
|
export declare function inferDatasetSchema(rows: any[], title?: string, description?: string): DatasetSchemaInput;
|
|
3
3
|
export declare function validateRows(rows: any[], schema: DatasetSchemaInput): void;
|
|
4
|
-
//# sourceMappingURL=schemaInference.d.ts.map
|
|
@@ -1,12 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.inferDatasetSchema = inferDatasetSchema;
|
|
7
|
-
exports.validateRows = validateRows;
|
|
8
|
-
const ajv_1 = __importDefault(require("ajv"));
|
|
9
|
-
const ajv = new ajv_1.default({ allErrors: true, strict: false });
|
|
1
|
+
import Ajv from "ajv";
|
|
2
|
+
const ajv = new Ajv({ allErrors: true, strict: false });
|
|
10
3
|
function inferJsonSchemaType(value) {
|
|
11
4
|
if (value === null)
|
|
12
5
|
return { type: "null" };
|
|
@@ -23,7 +16,7 @@ function inferJsonSchemaType(value) {
|
|
|
23
16
|
return { type: "string" };
|
|
24
17
|
}
|
|
25
18
|
}
|
|
26
|
-
function inferDatasetSchema(rows, title = "DatasetRow", description = "One dataset row") {
|
|
19
|
+
export function inferDatasetSchema(rows, title = "DatasetRow", description = "One dataset row") {
|
|
27
20
|
const properties = {};
|
|
28
21
|
const required = [];
|
|
29
22
|
const keys = new Set();
|
|
@@ -56,7 +49,7 @@ function inferDatasetSchema(rows, title = "DatasetRow", description = "One datas
|
|
|
56
49
|
},
|
|
57
50
|
};
|
|
58
51
|
}
|
|
59
|
-
function validateRows(rows, schema) {
|
|
52
|
+
export function validateRows(rows, schema) {
|
|
60
53
|
const validator = ajv.compile(schema.schema);
|
|
61
54
|
for (const row of rows) {
|
|
62
55
|
const valid = validator(row);
|
|
@@ -66,4 +59,3 @@ function validateRows(rows, schema) {
|
|
|
66
59
|
}
|
|
67
60
|
}
|
|
68
61
|
}
|
|
69
|
-
//# sourceMappingURL=schemaInference.js.map
|
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.rowsToJsonl = rowsToJsonl;
|
|
4
|
-
exports.normalizeQueryRows = normalizeQueryRows;
|
|
5
|
-
exports.getDomainDescriptor = getDomainDescriptor;
|
|
6
|
-
function rowsToJsonl(rows) {
|
|
1
|
+
export function rowsToJsonl(rows) {
|
|
7
2
|
return rows
|
|
8
3
|
.map((row) => JSON.stringify({
|
|
9
4
|
type: "row",
|
|
@@ -12,7 +7,7 @@ function rowsToJsonl(rows) {
|
|
|
12
7
|
.join("\n")
|
|
13
8
|
.concat(rows.length > 0 ? "\n" : "");
|
|
14
9
|
}
|
|
15
|
-
function normalizeQueryRows(result) {
|
|
10
|
+
export function normalizeQueryRows(result) {
|
|
16
11
|
if (!result || typeof result !== "object")
|
|
17
12
|
return [];
|
|
18
13
|
const entries = Object.entries(result);
|
|
@@ -49,7 +44,7 @@ function normalizeQueryRows(result) {
|
|
|
49
44
|
}
|
|
50
45
|
return rows;
|
|
51
46
|
}
|
|
52
|
-
function getDomainDescriptor(domain) {
|
|
47
|
+
export function getDomainDescriptor(domain) {
|
|
53
48
|
const meta = domain?.meta ?? {};
|
|
54
49
|
const context = typeof domain?.context === "function" ? domain.context() : {};
|
|
55
50
|
const name = String(meta?.name ?? context?.name ?? "domain");
|
|
@@ -59,4 +54,3 @@ function getDomainDescriptor(domain) {
|
|
|
59
54
|
...(packageName ? { domainPackageName: packageName } : {}),
|
|
60
55
|
};
|
|
61
56
|
}
|
|
62
|
-
//# sourceMappingURL=sourceRows.js.map
|
package/dist/builder/types.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import type { InstaQLParams, ValidQuery } from "@instantdb/core";
|
|
|
2
2
|
import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
|
|
3
3
|
import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
|
|
4
4
|
import type { ContextReactor } from "@ekairos/events";
|
|
5
|
-
import { datasetDomain } from "../schema";
|
|
5
|
+
import { datasetDomain } from "../schema.js";
|
|
6
6
|
export type DatasetQuerySourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
|
|
7
7
|
query: InstaQLParams<DomainInstantSchema<D>>;
|
|
8
8
|
title?: string;
|
|
@@ -127,4 +127,3 @@ export type DatasetBuilder<Runtime extends AnyDatasetRuntime> = {
|
|
|
127
127
|
first(): DatasetBuilder<Runtime>;
|
|
128
128
|
build(options?: DatasetBuildOptions): Promise<DatasetBuildResult>;
|
|
129
129
|
};
|
|
130
|
-
//# sourceMappingURL=types.d.ts.map
|
package/dist/builder/types.js
CHANGED
|
@@ -1,26 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
const steps_2 = require("./dataset/steps");
|
|
9
|
-
function createClearDatasetTool({ datasetId, sandboxId, env }) {
|
|
10
|
-
return (0, ai_1.tool)({
|
|
1
|
+
import { tool } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { runDatasetSandboxCommandStep } from "./sandbox/steps.js";
|
|
4
|
+
import { getDatasetOutputPath } from "./datasetFiles.js";
|
|
5
|
+
import { datasetClearStep } from "./dataset/steps.js";
|
|
6
|
+
export function createClearDatasetTool({ datasetId, sandboxId, env }) {
|
|
7
|
+
return tool({
|
|
11
8
|
description: "Clear all dataset records and output files. This will delete all generated data and reset the dataset to its initial state.",
|
|
12
|
-
inputSchema:
|
|
13
|
-
reason:
|
|
9
|
+
inputSchema: z.object({
|
|
10
|
+
reason: z.string().describe("The reason for clearing the dataset"),
|
|
14
11
|
}),
|
|
15
12
|
execute: async ({ reason }) => {
|
|
16
13
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
17
14
|
console.log(`[Dataset ${datasetId}] Tool: clearDataset`);
|
|
18
15
|
console.log(`[Dataset ${datasetId}] Reason: ${reason}`);
|
|
19
16
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
20
|
-
const outputPath =
|
|
17
|
+
const outputPath = getDatasetOutputPath(datasetId);
|
|
21
18
|
console.log(`[Dataset ${datasetId}] Step 1: Deleting output file`);
|
|
22
19
|
try {
|
|
23
|
-
const result = await
|
|
20
|
+
const result = await runDatasetSandboxCommandStep({
|
|
24
21
|
env,
|
|
25
22
|
sandboxId,
|
|
26
23
|
cmd: "rm",
|
|
@@ -38,7 +35,7 @@ function createClearDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
38
35
|
console.warn(`[Dataset ${datasetId}] Error deleting output file: ${message}`);
|
|
39
36
|
}
|
|
40
37
|
console.log(`[Dataset ${datasetId}] Step 2: Clearing dataset records`);
|
|
41
|
-
const clearResult = await
|
|
38
|
+
const clearResult = await datasetClearStep({ env, datasetId });
|
|
42
39
|
if (!clearResult.ok) {
|
|
43
40
|
console.error(`[Dataset ${datasetId}] Failed to clear dataset: ${clearResult.error}`);
|
|
44
41
|
return {
|
|
@@ -58,4 +55,3 @@ function createClearDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
58
55
|
},
|
|
59
56
|
});
|
|
60
57
|
}
|
|
61
|
-
//# sourceMappingURL=clearDataset.tool.js.map
|
|
@@ -1,37 +1,31 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
const ai_1 = require("ai");
|
|
8
|
-
const zod_1 = require("zod");
|
|
9
|
-
const steps_1 = require("./sandbox/steps");
|
|
10
|
-
const ajv_1 = __importDefault(require("ajv"));
|
|
11
|
-
const datasetFiles_1 = require("./datasetFiles");
|
|
12
|
-
const steps_2 = require("./dataset/steps");
|
|
1
|
+
import { tool } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { readDatasetSandboxFileStep, runDatasetSandboxCommandStep } from "./sandbox/steps.js";
|
|
4
|
+
import Ajv from "ajv";
|
|
5
|
+
import { getDatasetOutputPath, } from "./datasetFiles.js";
|
|
6
|
+
import { datasetGetByIdStep, datasetUpdateStatusStep, datasetUploadOutputFileStep } from "./dataset/steps.js";
|
|
13
7
|
let ajvInstance = null;
|
|
14
8
|
function getAjv() {
|
|
15
9
|
if (!ajvInstance) {
|
|
16
|
-
ajvInstance = new
|
|
10
|
+
ajvInstance = new Ajv({
|
|
17
11
|
allErrors: true,
|
|
18
12
|
strict: false,
|
|
19
13
|
});
|
|
20
14
|
}
|
|
21
15
|
return ajvInstance;
|
|
22
16
|
}
|
|
23
|
-
function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
24
|
-
return
|
|
17
|
+
export function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
18
|
+
return tool({
|
|
25
19
|
description: "Mark the dataset as completed. Use only when output.jsonl has been successfully generated and is ready for validation.",
|
|
26
|
-
inputSchema:
|
|
27
|
-
summary:
|
|
20
|
+
inputSchema: z.object({
|
|
21
|
+
summary: z.string().describe("Summary of the completed dataset including record count and structure"),
|
|
28
22
|
}),
|
|
29
23
|
execute: async ({ summary }) => {
|
|
30
24
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
31
25
|
console.log(`[Dataset ${datasetId}] Tool: completeDataset`);
|
|
32
26
|
console.log(`[Dataset ${datasetId}] Summary: ${summary}`);
|
|
33
27
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
34
|
-
const outputPath =
|
|
28
|
+
const outputPath = getDatasetOutputPath(datasetId);
|
|
35
29
|
try {
|
|
36
30
|
await ensureFileExists(env, sandboxId, outputPath);
|
|
37
31
|
}
|
|
@@ -44,7 +38,7 @@ function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
44
38
|
};
|
|
45
39
|
}
|
|
46
40
|
console.log(`[Dataset ${datasetId}] Validating dataset rows against schema`);
|
|
47
|
-
const datasetResult = await
|
|
41
|
+
const datasetResult = await datasetGetByIdStep({ env, datasetId });
|
|
48
42
|
if (!datasetResult.ok) {
|
|
49
43
|
console.error(`[Dataset ${datasetId}] ${datasetResult.error}`);
|
|
50
44
|
return {
|
|
@@ -85,7 +79,7 @@ function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
85
79
|
}
|
|
86
80
|
const totalValidRows = validationResult.validRowCount;
|
|
87
81
|
console.log(`[Dataset ${datasetId}] Reading file content for upload`);
|
|
88
|
-
const fileRead = await
|
|
82
|
+
const fileRead = await readDatasetSandboxFileStep({ env, sandboxId, path: outputPath });
|
|
89
83
|
if (!fileRead.contentBase64) {
|
|
90
84
|
console.error(`[Dataset ${datasetId}] Empty file content`);
|
|
91
85
|
return {
|
|
@@ -95,7 +89,7 @@ function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
95
89
|
}
|
|
96
90
|
const fileBuffer = Buffer.from(fileRead.contentBase64, "base64");
|
|
97
91
|
console.log(`[Dataset ${datasetId}] Uploading file to InstantDB storage`);
|
|
98
|
-
const uploadResult = await
|
|
92
|
+
const uploadResult = await datasetUploadOutputFileStep({ env, datasetId, fileBuffer });
|
|
99
93
|
if (!uploadResult.ok) {
|
|
100
94
|
console.error(`[Dataset ${datasetId}] File upload failed: ${uploadResult.error}`);
|
|
101
95
|
return {
|
|
@@ -104,7 +98,7 @@ function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
104
98
|
};
|
|
105
99
|
}
|
|
106
100
|
console.log(`[Dataset ${datasetId}] File uploaded successfully: ${uploadResult.data.fileId}`);
|
|
107
|
-
const statusResult = await
|
|
101
|
+
const statusResult = await datasetUpdateStatusStep({
|
|
108
102
|
env,
|
|
109
103
|
datasetId,
|
|
110
104
|
status: "completed",
|
|
@@ -131,7 +125,7 @@ function createCompleteDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
131
125
|
});
|
|
132
126
|
}
|
|
133
127
|
async function ensureFileExists(env, sandboxId, path) {
|
|
134
|
-
const result = await
|
|
128
|
+
const result = await runDatasetSandboxCommandStep({
|
|
135
129
|
env,
|
|
136
130
|
sandboxId,
|
|
137
131
|
cmd: "test",
|
|
@@ -145,7 +139,7 @@ async function validateJsonlRows({ env, sandboxId, outputPath, validator, datase
|
|
|
145
139
|
const validation = [];
|
|
146
140
|
let validRowCount = 0;
|
|
147
141
|
console.log(`[Dataset ${datasetId}] Reading and validating JSONL file from sandbox`);
|
|
148
|
-
const fileRead = await
|
|
142
|
+
const fileRead = await readDatasetSandboxFileStep({ env, sandboxId, path: outputPath });
|
|
149
143
|
if (!fileRead.contentBase64) {
|
|
150
144
|
console.log(`[Dataset ${datasetId}] Empty output file`);
|
|
151
145
|
return { success: true, validation, validRowCount: 0 };
|
|
@@ -209,4 +203,3 @@ async function validateJsonlRows({ env, sandboxId, outputPath, validator, datase
|
|
|
209
203
|
validRowCount,
|
|
210
204
|
};
|
|
211
205
|
}
|
|
212
|
-
//# sourceMappingURL=completeDataset.tool.js.map
|
package/dist/dataset/steps.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ export declare function getDatasetServiceDb(env?: any): Promise<any>;
|
|
|
2
2
|
export declare function datasetGetByIdStep(params: {
|
|
3
3
|
env?: any;
|
|
4
4
|
datasetId: string;
|
|
5
|
-
}): Promise<import("../service").ServiceResult<any>>;
|
|
5
|
+
}): Promise<import("../service.js").ServiceResult<any>>;
|
|
6
6
|
export declare function datasetReadOutputJsonlStep(params: {
|
|
7
7
|
env?: any;
|
|
8
8
|
datasetId: string;
|
|
@@ -14,12 +14,12 @@ export declare function datasetUpdateSchemaStep(params: {
|
|
|
14
14
|
datasetId: string;
|
|
15
15
|
schema: any;
|
|
16
16
|
status?: string;
|
|
17
|
-
}): Promise<import("../service").ServiceResult<void>>;
|
|
17
|
+
}): Promise<import("../service.js").ServiceResult<void>>;
|
|
18
18
|
export declare function datasetUploadOutputFileStep(params: {
|
|
19
19
|
env?: any;
|
|
20
20
|
datasetId: string;
|
|
21
21
|
fileBuffer: Buffer;
|
|
22
|
-
}): Promise<import("../service").ServiceResult<{
|
|
22
|
+
}): Promise<import("../service.js").ServiceResult<{
|
|
23
23
|
fileId: string;
|
|
24
24
|
storagePath: string;
|
|
25
25
|
}>>;
|
|
@@ -29,11 +29,11 @@ export declare function datasetUpdateStatusStep(params: {
|
|
|
29
29
|
status: string;
|
|
30
30
|
calculatedTotalRows?: number;
|
|
31
31
|
actualGeneratedRowCount?: number;
|
|
32
|
-
}): Promise<import("../service").ServiceResult<void>>;
|
|
32
|
+
}): Promise<import("../service.js").ServiceResult<void>>;
|
|
33
33
|
export declare function datasetClearStep(params: {
|
|
34
34
|
env?: any;
|
|
35
35
|
datasetId: string;
|
|
36
|
-
}): Promise<import("../service").ServiceResult<{
|
|
36
|
+
}): Promise<import("../service.js").ServiceResult<{
|
|
37
37
|
deletedCount: number;
|
|
38
38
|
}>>;
|
|
39
39
|
export declare function datasetPreviewRowsStep(params: {
|
|
@@ -43,4 +43,3 @@ export declare function datasetPreviewRowsStep(params: {
|
|
|
43
43
|
}): Promise<{
|
|
44
44
|
rows: any[];
|
|
45
45
|
}>;
|
|
46
|
-
//# sourceMappingURL=steps.d.ts.map
|