@ekairos/dataset 1.22.40-beta.development.0 → 1.22.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/context.d.ts +15 -0
- package/dist/builder/context.js +251 -0
- package/dist/builder/instructions.d.ts +4 -5
- package/dist/builder/instructions.js +15 -21
- package/dist/builder/materialize.d.ts +77 -10
- package/dist/builder/materialize.js +495 -152
- package/dist/builder/materializeQuery.d.ts +12 -0
- package/dist/builder/materializeQuery.js +31 -0
- package/dist/builder/persistence.d.ts +10 -6
- package/dist/builder/persistence.js +107 -62
- package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -1
- package/dist/builder/{sourceRows.js → rows.js} +3 -9
- package/dist/builder/schemaInference.d.ts +1 -2
- package/dist/builder/schemaInference.js +4 -12
- package/dist/builder/types.d.ts +41 -26
- package/dist/builder/types.js +1 -3
- package/dist/clearDataset.tool.d.ts +2 -3
- package/dist/clearDataset.tool.js +13 -17
- package/dist/completeDataset.steps.d.ts +117 -0
- package/dist/completeDataset.steps.js +537 -0
- package/dist/completeDataset.tool.d.ts +132 -7
- package/dist/completeDataset.tool.js +46 -192
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +79 -0
- package/dist/contextWorkspace.js +234 -0
- package/dist/dataset/steps.d.ts +39 -15
- package/dist/dataset/steps.js +96 -39
- package/dist/dataset.d.ts +2 -3
- package/dist/dataset.js +73 -51
- package/dist/datasetFiles.d.ts +5 -1
- package/dist/datasetFiles.js +29 -27
- package/dist/defineNotation.tool.d.ts +49 -0
- package/dist/defineNotation.tool.js +154 -0
- package/dist/domain.d.ts +1 -2
- package/dist/domain.js +1 -6
- package/dist/executeCommand.tool.d.ts +2 -30
- package/dist/executeCommand.tool.js +165 -39
- package/dist/file/file-dataset.agent.d.ts +19 -56
- package/dist/file/file-dataset.agent.js +182 -136
- package/dist/file/file-dataset.steps.d.ts +27 -0
- package/dist/file/file-dataset.steps.js +47 -0
- package/dist/file/file-dataset.types.d.ts +64 -0
- package/dist/file/file-dataset.types.js +1 -0
- package/dist/file/filepreview.d.ts +5 -35
- package/dist/file/filepreview.js +60 -107
- package/dist/file/filepreview.types.d.ts +31 -0
- package/dist/file/filepreview.types.js +1 -0
- package/dist/file/generateSchema.tool.d.ts +2 -3
- package/dist/file/generateSchema.tool.js +11 -15
- package/dist/file/index.d.ts +1 -2
- package/dist/file/index.js +1 -18
- package/dist/file/prompts.d.ts +2 -3
- package/dist/file/prompts.js +152 -32
- package/dist/file/scripts.generated.d.ts +1 -0
- package/dist/file/scripts.generated.js +11 -0
- package/dist/file/steps.d.ts +1 -2
- package/dist/file/steps.js +9 -7
- package/dist/id.d.ts +1 -0
- package/dist/id.js +10 -0
- package/dist/index.d.ts +9 -7
- package/dist/index.js +9 -23
- package/dist/materializeDataset.tool.d.ts +35 -28
- package/dist/materializeDataset.tool.js +74 -68
- package/dist/notation.d.ts +205 -0
- package/dist/notation.js +424 -0
- package/dist/query/index.d.ts +1 -2
- package/dist/query/index.js +1 -18
- package/dist/query/queryDomain.d.ts +3 -4
- package/dist/query/queryDomain.js +3 -40
- package/dist/query/queryDomain.step.d.ts +1 -1
- package/dist/query/queryDomain.step.js +24 -13
- package/dist/sandbox/steps.d.ts +23 -15
- package/dist/sandbox/steps.js +73 -76
- package/dist/sandbox.steps.d.ts +1 -2
- package/dist/sandbox.steps.js +1 -18
- package/dist/schema.d.ts +14 -3
- package/dist/schema.js +27 -26
- package/dist/service.d.ts +12 -5
- package/dist/service.js +88 -15
- package/dist/skill.d.ts +0 -1
- package/dist/skill.js +12 -17
- package/dist/transform/filepreview.d.ts +2 -3
- package/dist/transform/filepreview.js +9 -26
- package/dist/transform/index.d.ts +2 -3
- package/dist/transform/index.js +2 -8
- package/dist/transform/prompts.d.ts +1 -34
- package/dist/transform/prompts.js +66 -46
- package/dist/transform/transform-dataset.agent.d.ts +21 -46
- package/dist/transform/transform-dataset.agent.js +152 -93
- package/dist/transform/transform-dataset.steps.d.ts +30 -0
- package/dist/transform/transform-dataset.steps.js +61 -0
- package/dist/transform/transform-dataset.types.d.ts +96 -0
- package/dist/transform/transform-dataset.types.js +1 -0
- package/dist/transform/transformDataset.d.ts +3 -3
- package/dist/transform/transformDataset.js +15 -18
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +33 -8
- package/dist/builder/instructions.d.ts.map +0 -1
- package/dist/builder/instructions.js.map +0 -1
- package/dist/builder/materialize.d.ts.map +0 -1
- package/dist/builder/materialize.js.map +0 -1
- package/dist/builder/persistence.d.ts.map +0 -1
- package/dist/builder/persistence.js.map +0 -1
- package/dist/builder/schemaInference.d.ts.map +0 -1
- package/dist/builder/schemaInference.js.map +0 -1
- package/dist/builder/sourceRows.d.ts.map +0 -1
- package/dist/builder/sourceRows.js.map +0 -1
- package/dist/builder/types.d.ts.map +0 -1
- package/dist/builder/types.js.map +0 -1
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeDataset.tool.d.ts.map +0 -1
- package/dist/completeDataset.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/dataset.d.ts.map +0 -1
- package/dist/dataset.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/eventsReactRuntime.d.ts +0 -22
- package/dist/eventsReactRuntime.d.ts.map +0 -1
- package/dist/eventsReactRuntime.js +0 -29
- package/dist/eventsReactRuntime.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/file-dataset.agent.d.ts.map +0 -1
- package/dist/file/file-dataset.agent.js.map +0 -1
- package/dist/file/filepreview.d.ts.map +0 -1
- package/dist/file/filepreview.js.map +0 -1
- package/dist/file/generateSchema.tool.d.ts.map +0 -1
- package/dist/file/generateSchema.tool.js.map +0 -1
- package/dist/file/index.d.ts.map +0 -1
- package/dist/file/index.js.map +0 -1
- package/dist/file/prompts.d.ts.map +0 -1
- package/dist/file/prompts.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/materializeDataset.tool.d.ts.map +0 -1
- package/dist/materializeDataset.tool.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/queryDomain.d.ts.map +0 -1
- package/dist/query/queryDomain.js.map +0 -1
- package/dist/query/queryDomain.step.d.ts.map +0 -1
- package/dist/query/queryDomain.step.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/sandbox.steps.d.ts.map +0 -1
- package/dist/sandbox.steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/skill.d.ts.map +0 -1
- package/dist/skill.js.map +0 -1
- package/dist/transform/filepreview.d.ts.map +0 -1
- package/dist/transform/filepreview.js.map +0 -1
- package/dist/transform/index.d.ts.map +0 -1
- package/dist/transform/index.js.map +0 -1
- package/dist/transform/prompts.d.ts.map +0 -1
- package/dist/transform/prompts.js.map +0 -1
- package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
- package/dist/transform/transform-dataset.agent.js.map +0 -1
- package/dist/transform/transformDataset.d.ts.map +0 -1
- package/dist/transform/transformDataset.js.map +0 -1
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalDatasetResource } from "./types.js";
|
|
2
|
+
export declare function materializeQueryResource<Runtime extends AnyDatasetRuntime>(runtime: DatasetBuilderState<Runtime>["runtime"], resource: Extract<InternalDatasetResource, {
|
|
3
|
+
kind: "query";
|
|
4
|
+
}>, params: {
|
|
5
|
+
datasetId: string;
|
|
6
|
+
sandboxId?: string;
|
|
7
|
+
schema?: DatasetSchemaInput;
|
|
8
|
+
title?: string;
|
|
9
|
+
instructions?: string;
|
|
10
|
+
first?: boolean;
|
|
11
|
+
contextId: string;
|
|
12
|
+
}): Promise<string>;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { materializeRowsToDataset } from "./persistence.js";
|
|
2
|
+
import { getDomainDescriptor, normalizeQueryRows } from "./rows.js";
|
|
3
|
+
async function readQueryResourceRowsStep(params) {
|
|
4
|
+
"use step";
|
|
5
|
+
const db = await params.runtime.db();
|
|
6
|
+
const result = await db.query(params.query);
|
|
7
|
+
return { rows: normalizeQueryRows(result) };
|
|
8
|
+
}
|
|
9
|
+
export async function materializeQueryResource(runtime, resource, params) {
|
|
10
|
+
const { rows } = await readQueryResourceRowsStep({
|
|
11
|
+
runtime,
|
|
12
|
+
query: resource.query,
|
|
13
|
+
});
|
|
14
|
+
const domainDescriptor = getDomainDescriptor(resource.domain);
|
|
15
|
+
return await materializeRowsToDataset(runtime, {
|
|
16
|
+
datasetId: params.datasetId,
|
|
17
|
+
sandboxId: params.sandboxId,
|
|
18
|
+
title: params.title ?? resource.title,
|
|
19
|
+
instructions: params.instructions,
|
|
20
|
+
contextId: params.contextId,
|
|
21
|
+
analysis: {
|
|
22
|
+
query: resource.query,
|
|
23
|
+
explanation: resource.explanation,
|
|
24
|
+
...domainDescriptor,
|
|
25
|
+
},
|
|
26
|
+
rows,
|
|
27
|
+
schema: params.schema,
|
|
28
|
+
inferSchema: !params.schema,
|
|
29
|
+
first: params.first,
|
|
30
|
+
});
|
|
31
|
+
}
|
|
@@ -1,18 +1,22 @@
|
|
|
1
|
-
import type { AnyDatasetRuntime, DatasetBuildResult,
|
|
2
|
-
export declare function
|
|
1
|
+
import type { AnyDatasetRuntime, DatasetBuildResult, DatasetTextResourceInput, MaterializeRowsParams } from "./types.js";
|
|
2
|
+
export declare function defaultTextResourceName(resource: DatasetTextResourceInput): string;
|
|
3
3
|
export declare function getDatasetDb<Runtime extends AnyDatasetRuntime>(runtime: Runtime): Promise<any>;
|
|
4
4
|
export declare function createOrUpdateDatasetMetadata<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: {
|
|
5
5
|
datasetId: string;
|
|
6
6
|
sandboxId?: string;
|
|
7
7
|
title?: string;
|
|
8
8
|
instructions?: string;
|
|
9
|
-
|
|
10
|
-
sourceKinds: string[];
|
|
9
|
+
contextId: string;
|
|
11
10
|
analysis?: any;
|
|
12
11
|
schema?: any;
|
|
13
12
|
status?: string;
|
|
14
13
|
}): Promise<void>;
|
|
15
14
|
export declare function materializeRowsToDataset<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: MaterializeRowsParams): Promise<string>;
|
|
16
|
-
export declare function
|
|
15
|
+
export declare function uploadInlineTextResource<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, resource: DatasetTextResourceInput): Promise<string>;
|
|
17
16
|
export declare function finalizeBuildResult<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, withFirst: boolean): Promise<DatasetBuildResult>;
|
|
18
|
-
|
|
17
|
+
export declare function createDatasetBuildResult<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: {
|
|
18
|
+
datasetId: string;
|
|
19
|
+
dataset: any;
|
|
20
|
+
previewRows: any[];
|
|
21
|
+
firstRow?: any | null;
|
|
22
|
+
}): DatasetBuildResult;
|
|
@@ -1,41 +1,38 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
const
|
|
11
|
-
const schemaInference_1 = require("./schemaInference");
|
|
12
|
-
const sourceRows_1 = require("./sourceRows");
|
|
13
|
-
function defaultTextSourceName(source) {
|
|
14
|
-
if (source.name?.trim())
|
|
15
|
-
return source.name.trim();
|
|
16
|
-
const mimeType = String(source.mimeType ?? "").toLowerCase();
|
|
1
|
+
import { DatasetService } from "../service.js";
|
|
2
|
+
import { datasetDomain } from "../schema.js";
|
|
3
|
+
import { annotateNotationEvidence, inferQueryNotation, } from "../notation.js";
|
|
4
|
+
import { datasetGetByIdStep, datasetPreviewRowsStep, datasetReadOneStep, datasetReadRowsStep, } from "../dataset/steps.js";
|
|
5
|
+
import { inferDatasetSchema, validateRows } from "./schemaInference.js";
|
|
6
|
+
import { rowsToJsonl } from "./rows.js";
|
|
7
|
+
export function defaultTextResourceName(resource) {
|
|
8
|
+
if (resource.name?.trim())
|
|
9
|
+
return resource.name.trim();
|
|
10
|
+
const mimeType = String(resource.mimeType ?? "").toLowerCase();
|
|
17
11
|
if (mimeType.includes("csv"))
|
|
18
|
-
return "
|
|
12
|
+
return "resource.csv";
|
|
19
13
|
if (mimeType.includes("json"))
|
|
20
|
-
return "
|
|
14
|
+
return "resource.json";
|
|
21
15
|
if (mimeType.includes("yaml") || mimeType.includes("yml"))
|
|
22
|
-
return "
|
|
23
|
-
return "
|
|
16
|
+
return "resource.yaml";
|
|
17
|
+
return "resource.txt";
|
|
24
18
|
}
|
|
25
|
-
async function getDatasetDb(runtime) {
|
|
26
|
-
const scoped = await runtime.use(
|
|
19
|
+
export async function getDatasetDb(runtime) {
|
|
20
|
+
const scoped = await runtime.use(datasetDomain);
|
|
27
21
|
return scoped.db;
|
|
28
22
|
}
|
|
29
|
-
async function createOrUpdateDatasetMetadata(runtime, params) {
|
|
23
|
+
export async function createOrUpdateDatasetMetadata(runtime, params) {
|
|
24
|
+
"use step";
|
|
25
|
+
if (!params.contextId.trim()) {
|
|
26
|
+
throw new Error("dataset_context_required");
|
|
27
|
+
}
|
|
30
28
|
const db = await getDatasetDb(runtime);
|
|
31
|
-
const service = new
|
|
29
|
+
const service = new DatasetService(db);
|
|
32
30
|
const result = await service.createDataset({
|
|
33
31
|
id: params.datasetId,
|
|
34
32
|
sandboxId: params.sandboxId,
|
|
35
33
|
title: params.title ?? params.datasetId,
|
|
36
34
|
instructions: params.instructions ?? "",
|
|
37
|
-
|
|
38
|
-
sourceKinds: params.sourceKinds,
|
|
35
|
+
contextId: params.contextId,
|
|
39
36
|
analysis: params.analysis,
|
|
40
37
|
schema: params.schema,
|
|
41
38
|
status: params.status ?? "building",
|
|
@@ -45,29 +42,29 @@ async function createOrUpdateDatasetMetadata(runtime, params) {
|
|
|
45
42
|
throw new Error(result.error);
|
|
46
43
|
}
|
|
47
44
|
}
|
|
48
|
-
async function materializeRowsToDataset(runtime, params) {
|
|
45
|
+
export async function materializeRowsToDataset(runtime, params) {
|
|
46
|
+
"use step";
|
|
49
47
|
if (params.first && params.rows.length > 1) {
|
|
50
48
|
throw new Error("dataset_first_expected_zero_or_one_row");
|
|
51
49
|
}
|
|
52
50
|
const resolvedSchema = params.schema ??
|
|
53
|
-
|
|
54
|
-
|
|
51
|
+
inferDatasetSchema(params.rows, params.title ? `${params.title}Row` : "DatasetRow", params.title ? `One row for ${params.title}` : "One dataset row");
|
|
52
|
+
validateRows(params.rows, resolvedSchema);
|
|
55
53
|
await createOrUpdateDatasetMetadata(runtime, {
|
|
56
54
|
datasetId: params.datasetId,
|
|
57
55
|
sandboxId: params.sandboxId,
|
|
58
56
|
title: params.title,
|
|
59
57
|
instructions: params.instructions,
|
|
60
|
-
|
|
61
|
-
sourceKinds: params.sourceKinds,
|
|
58
|
+
contextId: params.contextId,
|
|
62
59
|
analysis: params.analysis,
|
|
63
60
|
schema: resolvedSchema,
|
|
64
61
|
status: "building",
|
|
65
62
|
});
|
|
66
63
|
const db = await getDatasetDb(runtime);
|
|
67
|
-
const service = new
|
|
64
|
+
const service = new DatasetService(db);
|
|
68
65
|
const uploadResult = await service.uploadDatasetOutputFile({
|
|
69
66
|
datasetId: params.datasetId,
|
|
70
|
-
fileBuffer: Buffer.from(
|
|
67
|
+
fileBuffer: Buffer.from(rowsToJsonl(params.rows), "utf-8"),
|
|
71
68
|
});
|
|
72
69
|
if (!uploadResult.ok) {
|
|
73
70
|
throw new Error(uploadResult.error);
|
|
@@ -81,67 +78,115 @@ async function materializeRowsToDataset(runtime, params) {
|
|
|
81
78
|
if (!statusResult.ok) {
|
|
82
79
|
throw new Error(statusResult.error);
|
|
83
80
|
}
|
|
81
|
+
// Formal notation, informative only (never blocks the build): a notation
|
|
82
|
+
// proposed during the build (agent iterations) gets advisory evidence
|
|
83
|
+
// against the materialized rows; query-backed builds with no proposed
|
|
84
|
+
// notation get the deterministic one derived from query + schema + rows.
|
|
85
|
+
try {
|
|
86
|
+
const existing = await service.getDatasetById(params.datasetId);
|
|
87
|
+
const previous = (existing.ok ? existing.data?.notation : null);
|
|
88
|
+
const analysis = (params.analysis ?? {});
|
|
89
|
+
const queryNotation = analysis.query && typeof analysis.query === "object"
|
|
90
|
+
? inferQueryNotation({
|
|
91
|
+
entityNames: Object.keys(analysis.query),
|
|
92
|
+
rowCount: params.rows.length,
|
|
93
|
+
schema: resolvedSchema,
|
|
94
|
+
explanation: typeof analysis.explanation === "string" ? analysis.explanation : undefined,
|
|
95
|
+
})
|
|
96
|
+
: null;
|
|
97
|
+
// Query-backed builds are deterministic, so a freshly inferred notation
|
|
98
|
+
// always wins (a prior run's notation would be stale). Only agent-built
|
|
99
|
+
// datasets (no query) keep the notation the agent proposed during the
|
|
100
|
+
// build, which by now is the latest `previous`.
|
|
101
|
+
const candidate = queryNotation ??
|
|
102
|
+
(previous && Array.isArray(previous.predicates) && previous.predicates.length > 0
|
|
103
|
+
? previous
|
|
104
|
+
: null);
|
|
105
|
+
if (candidate) {
|
|
106
|
+
await service.updateDatasetNotation({
|
|
107
|
+
datasetId: params.datasetId,
|
|
108
|
+
notation: annotateNotationEvidence(candidate, params.rows),
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
catch {
|
|
113
|
+
// notation must never affect the build result
|
|
114
|
+
}
|
|
84
115
|
return params.datasetId;
|
|
85
116
|
}
|
|
86
|
-
async function
|
|
117
|
+
export async function uploadInlineTextResource(runtime, datasetId, resource) {
|
|
118
|
+
"use step";
|
|
87
119
|
const db = await getDatasetDb(runtime);
|
|
88
|
-
const fileName =
|
|
89
|
-
const storagePath = `/dataset/
|
|
90
|
-
const uploadResult = await db.storage.uploadFile(storagePath, Buffer.from(
|
|
91
|
-
contentType:
|
|
120
|
+
const fileName = defaultTextResourceName(resource);
|
|
121
|
+
const storagePath = `/dataset/resource/${datasetId}/${Date.now()}-${fileName}`;
|
|
122
|
+
const uploadResult = await db.storage.uploadFile(storagePath, Buffer.from(resource.text, "utf-8"), {
|
|
123
|
+
contentType: resource.mimeType ?? "text/plain",
|
|
92
124
|
contentDisposition: fileName,
|
|
93
125
|
});
|
|
94
126
|
const fileId = uploadResult?.data?.id;
|
|
95
127
|
if (!fileId) {
|
|
96
|
-
throw new Error("
|
|
128
|
+
throw new Error("dataset_text_resource_upload_failed");
|
|
97
129
|
}
|
|
98
130
|
return fileId;
|
|
99
131
|
}
|
|
100
|
-
async function finalizeBuildResult(runtime, datasetId, withFirst) {
|
|
101
|
-
const
|
|
102
|
-
|
|
103
|
-
const datasetResult = await service.getDatasetById(datasetId);
|
|
104
|
-
if (!datasetResult.ok) {
|
|
132
|
+
export async function finalizeBuildResult(runtime, datasetId, withFirst) {
|
|
133
|
+
const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
|
|
134
|
+
if (!datasetResult.ok)
|
|
105
135
|
throw new Error(datasetResult.error);
|
|
106
|
-
}
|
|
107
|
-
const previewResult = await service.previewRows(datasetId, 20);
|
|
108
|
-
if (!previewResult.ok) {
|
|
109
|
-
throw new Error(previewResult.error);
|
|
110
|
-
}
|
|
136
|
+
const previewResult = await datasetPreviewRowsStep({ runtime, datasetId, limit: 20 });
|
|
111
137
|
const reader = {
|
|
112
138
|
async read(cursorOrParams, limit) {
|
|
113
139
|
const params = typeof cursorOrParams === "object" && cursorOrParams !== null
|
|
114
140
|
? cursorOrParams
|
|
115
141
|
: { cursor: cursorOrParams, limit };
|
|
116
|
-
|
|
142
|
+
return await datasetReadRowsStep({
|
|
143
|
+
runtime,
|
|
117
144
|
datasetId,
|
|
118
145
|
cursor: params.cursor,
|
|
119
146
|
limit: params.limit,
|
|
120
147
|
});
|
|
121
|
-
if (!rowsResult.ok) {
|
|
122
|
-
throw new Error(rowsResult.error);
|
|
123
|
-
}
|
|
124
|
-
return rowsResult.data;
|
|
125
148
|
},
|
|
126
149
|
};
|
|
150
|
+
const notation = (datasetResult.data?.notation ?? null);
|
|
127
151
|
if (!withFirst) {
|
|
128
152
|
return {
|
|
129
153
|
datasetId,
|
|
130
154
|
dataset: datasetResult.data,
|
|
131
|
-
|
|
155
|
+
notation,
|
|
156
|
+
previewRows: previewResult.rows,
|
|
132
157
|
reader,
|
|
133
158
|
};
|
|
134
159
|
}
|
|
135
|
-
const firstResult = await
|
|
136
|
-
if (!firstResult.ok) {
|
|
137
|
-
throw new Error(firstResult.error);
|
|
138
|
-
}
|
|
160
|
+
const firstResult = await datasetReadOneStep({ runtime, datasetId });
|
|
139
161
|
return {
|
|
140
162
|
datasetId,
|
|
141
163
|
dataset: datasetResult.data,
|
|
142
|
-
|
|
164
|
+
notation,
|
|
165
|
+
previewRows: previewResult.rows,
|
|
166
|
+
reader,
|
|
167
|
+
firstRow: firstResult.row,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
export function createDatasetBuildResult(runtime, params) {
|
|
171
|
+
const reader = {
|
|
172
|
+
async read(cursorOrParams, limit) {
|
|
173
|
+
const readParams = typeof cursorOrParams === "object" && cursorOrParams !== null
|
|
174
|
+
? cursorOrParams
|
|
175
|
+
: { cursor: cursorOrParams, limit };
|
|
176
|
+
return await datasetReadRowsStep({
|
|
177
|
+
runtime,
|
|
178
|
+
datasetId: params.datasetId,
|
|
179
|
+
cursor: readParams.cursor,
|
|
180
|
+
limit: readParams.limit,
|
|
181
|
+
});
|
|
182
|
+
},
|
|
183
|
+
};
|
|
184
|
+
return {
|
|
185
|
+
datasetId: params.datasetId,
|
|
186
|
+
dataset: params.dataset,
|
|
187
|
+
notation: (params.dataset?.notation ?? null),
|
|
188
|
+
previewRows: params.previewRows,
|
|
143
189
|
reader,
|
|
144
|
-
firstRow:
|
|
190
|
+
...(params.firstRow !== undefined ? { firstRow: params.firstRow } : {}),
|
|
145
191
|
};
|
|
146
192
|
}
|
|
147
|
-
//# sourceMappingURL=persistence.js.map
|
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.rowsToJsonl = rowsToJsonl;
|
|
4
|
-
exports.normalizeQueryRows = normalizeQueryRows;
|
|
5
|
-
exports.getDomainDescriptor = getDomainDescriptor;
|
|
6
|
-
function rowsToJsonl(rows) {
|
|
1
|
+
export function rowsToJsonl(rows) {
|
|
7
2
|
return rows
|
|
8
3
|
.map((row) => JSON.stringify({
|
|
9
4
|
type: "row",
|
|
@@ -12,7 +7,7 @@ function rowsToJsonl(rows) {
|
|
|
12
7
|
.join("\n")
|
|
13
8
|
.concat(rows.length > 0 ? "\n" : "");
|
|
14
9
|
}
|
|
15
|
-
function normalizeQueryRows(result) {
|
|
10
|
+
export function normalizeQueryRows(result) {
|
|
16
11
|
if (!result || typeof result !== "object")
|
|
17
12
|
return [];
|
|
18
13
|
const entries = Object.entries(result);
|
|
@@ -49,7 +44,7 @@ function normalizeQueryRows(result) {
|
|
|
49
44
|
}
|
|
50
45
|
return rows;
|
|
51
46
|
}
|
|
52
|
-
function getDomainDescriptor(domain) {
|
|
47
|
+
export function getDomainDescriptor(domain) {
|
|
53
48
|
const meta = domain?.meta ?? {};
|
|
54
49
|
const context = typeof domain?.context === "function" ? domain.context() : {};
|
|
55
50
|
const name = String(meta?.name ?? context?.name ?? "domain");
|
|
@@ -59,4 +54,3 @@ function getDomainDescriptor(domain) {
|
|
|
59
54
|
...(packageName ? { domainPackageName: packageName } : {}),
|
|
60
55
|
};
|
|
61
56
|
}
|
|
62
|
-
//# sourceMappingURL=sourceRows.js.map
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import type { DatasetSchemaInput } from "./types";
|
|
1
|
+
import type { DatasetSchemaInput } from "./types.js";
|
|
2
2
|
export declare function inferDatasetSchema(rows: any[], title?: string, description?: string): DatasetSchemaInput;
|
|
3
3
|
export declare function validateRows(rows: any[], schema: DatasetSchemaInput): void;
|
|
4
|
-
//# sourceMappingURL=schemaInference.d.ts.map
|
|
@@ -1,12 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.inferDatasetSchema = inferDatasetSchema;
|
|
7
|
-
exports.validateRows = validateRows;
|
|
8
|
-
const ajv_1 = __importDefault(require("ajv"));
|
|
9
|
-
const ajv = new ajv_1.default({ allErrors: true, strict: false });
|
|
1
|
+
import Ajv from "ajv";
|
|
2
|
+
const ajv = new Ajv({ allErrors: true, strict: false });
|
|
10
3
|
function inferJsonSchemaType(value) {
|
|
11
4
|
if (value === null)
|
|
12
5
|
return { type: "null" };
|
|
@@ -23,7 +16,7 @@ function inferJsonSchemaType(value) {
|
|
|
23
16
|
return { type: "string" };
|
|
24
17
|
}
|
|
25
18
|
}
|
|
26
|
-
function inferDatasetSchema(rows, title = "DatasetRow", description = "One dataset row") {
|
|
19
|
+
export function inferDatasetSchema(rows, title = "DatasetRow", description = "One dataset row") {
|
|
27
20
|
const properties = {};
|
|
28
21
|
const required = [];
|
|
29
22
|
const keys = new Set();
|
|
@@ -56,7 +49,7 @@ function inferDatasetSchema(rows, title = "DatasetRow", description = "One datas
|
|
|
56
49
|
},
|
|
57
50
|
};
|
|
58
51
|
}
|
|
59
|
-
function validateRows(rows, schema) {
|
|
52
|
+
export function validateRows(rows, schema) {
|
|
60
53
|
const validator = ajv.compile(schema.schema);
|
|
61
54
|
for (const row of rows) {
|
|
62
55
|
const valid = validator(row);
|
|
@@ -66,4 +59,3 @@ function validateRows(rows, schema) {
|
|
|
66
59
|
}
|
|
67
60
|
}
|
|
68
61
|
}
|
|
69
|
-
//# sourceMappingURL=schemaInference.js.map
|
package/dist/builder/types.d.ts
CHANGED
|
@@ -1,38 +1,46 @@
|
|
|
1
1
|
import type { InstaQLParams, ValidQuery } from "@instantdb/core";
|
|
2
2
|
import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
|
|
3
3
|
import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
|
|
4
|
-
import type {
|
|
5
|
-
import {
|
|
6
|
-
|
|
4
|
+
import type { ContextIdentifier, StoredContextResource } from "@ekairos/events";
|
|
5
|
+
import type { ContextReactor } from "@ekairos/reactor/context";
|
|
6
|
+
import { datasetDomain } from "../schema.js";
|
|
7
|
+
import type { DatasetNotation } from "../notation.js";
|
|
8
|
+
export type DatasetQueryResourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
|
|
7
9
|
query: InstaQLParams<DomainInstantSchema<D>>;
|
|
8
10
|
title?: string;
|
|
9
11
|
explanation?: string;
|
|
10
12
|
domain: D;
|
|
11
13
|
};
|
|
12
|
-
export type
|
|
14
|
+
export type DatasetFileResourceInput = {
|
|
13
15
|
fileId: string;
|
|
14
16
|
description?: string;
|
|
17
|
+
filename?: string;
|
|
18
|
+
mediaType?: string;
|
|
15
19
|
};
|
|
16
|
-
export type
|
|
20
|
+
export type DatasetTextResourceInput = {
|
|
17
21
|
text: string;
|
|
18
22
|
mimeType?: string;
|
|
19
23
|
name?: string;
|
|
20
24
|
description?: string;
|
|
21
25
|
};
|
|
22
|
-
export type
|
|
26
|
+
export type DatasetExistingResourceInput = {
|
|
23
27
|
datasetId: string;
|
|
24
28
|
description?: string;
|
|
25
29
|
};
|
|
26
|
-
export type
|
|
30
|
+
export type DatasetContextResourceInput = ContextIdentifier;
|
|
31
|
+
export type DatasetFileResource = {
|
|
27
32
|
kind: "file";
|
|
28
|
-
} &
|
|
29
|
-
export type
|
|
33
|
+
} & DatasetFileResourceInput;
|
|
34
|
+
export type DatasetTextResource = {
|
|
30
35
|
kind: "text";
|
|
31
|
-
} &
|
|
32
|
-
export type
|
|
36
|
+
} & DatasetTextResourceInput;
|
|
37
|
+
export type DatasetExistingResource = {
|
|
33
38
|
kind: "dataset";
|
|
34
|
-
} &
|
|
35
|
-
export type
|
|
39
|
+
} & DatasetExistingResourceInput;
|
|
40
|
+
export type DatasetContextResource = {
|
|
41
|
+
kind: "context";
|
|
42
|
+
} & DatasetContextResourceInput;
|
|
43
|
+
export type DatasetResourceInput = DatasetFileResourceInput | DatasetTextResourceInput | DatasetExistingResourceInput | DatasetContextResourceInput | DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource;
|
|
36
44
|
export type DatasetSchemaInput = {
|
|
37
45
|
title?: string;
|
|
38
46
|
description?: string;
|
|
@@ -42,13 +50,15 @@ export type DatasetOutput = "rows" | "object";
|
|
|
42
50
|
export type DatasetMode = "auto" | "schema";
|
|
43
51
|
export type DatasetBuilderOptions = {
|
|
44
52
|
datasetId?: string;
|
|
53
|
+
durable?: boolean;
|
|
45
54
|
};
|
|
46
55
|
export type DatasetBuildOptions = {
|
|
47
56
|
datasetId?: string;
|
|
57
|
+
durable?: boolean;
|
|
48
58
|
};
|
|
49
|
-
export type
|
|
59
|
+
export type InternalDatasetResource = DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource | ({
|
|
50
60
|
kind: "query";
|
|
51
|
-
} &
|
|
61
|
+
} & DatasetQueryResourceInput);
|
|
52
62
|
export type DatasetReaderResult = {
|
|
53
63
|
rows: any[];
|
|
54
64
|
cursor: number;
|
|
@@ -64,6 +74,9 @@ export type DatasetReader = {
|
|
|
64
74
|
export type DatasetBuildResult = {
|
|
65
75
|
datasetId: string;
|
|
66
76
|
dataset: any;
|
|
77
|
+
/** the formal definition (intensional face), co-equal with the rows */
|
|
78
|
+
notation: DatasetNotation | null;
|
|
79
|
+
/** preview of the materialization (extensional face) */
|
|
67
80
|
previewRows: any[];
|
|
68
81
|
reader: DatasetReader;
|
|
69
82
|
object?: any | null;
|
|
@@ -74,8 +87,8 @@ export type DatasetRuntimeEnv = {
|
|
|
74
87
|
};
|
|
75
88
|
export type AnyDatasetRuntime = EkairosRuntime<any, any, any>;
|
|
76
89
|
export type DatasetRuntimeHandle<Runtime extends AnyDatasetRuntime> = RuntimeForDomain<Runtime, typeof datasetDomain>;
|
|
77
|
-
export type
|
|
78
|
-
export type
|
|
90
|
+
export type CompatibleQueryDomain<Runtime extends AnyDatasetRuntime, D extends DomainSchemaResult> = RuntimeForDomain<Runtime, D> extends never ? never : D;
|
|
91
|
+
export type DatasetQueryResourceOptions<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>> = {
|
|
79
92
|
query: Q;
|
|
80
93
|
title?: string;
|
|
81
94
|
explanation?: string;
|
|
@@ -83,14 +96,17 @@ export type DatasetQuerySourceOptions<D extends DomainSchemaResult, Q extends Va
|
|
|
83
96
|
export type DatasetBuilderState<Runtime extends AnyDatasetRuntime> = {
|
|
84
97
|
runtime: Runtime;
|
|
85
98
|
env: Runtime["env"] & DatasetRuntimeEnv;
|
|
86
|
-
|
|
99
|
+
resources: InternalDatasetResource[];
|
|
100
|
+
contextResources?: StoredContextResource[];
|
|
87
101
|
title?: string;
|
|
88
102
|
sandboxId?: string;
|
|
103
|
+
contextId?: string;
|
|
89
104
|
outputSchema?: DatasetSchemaInput;
|
|
90
105
|
output: DatasetOutput;
|
|
91
106
|
inferSchema: boolean;
|
|
92
107
|
instructions?: string;
|
|
93
108
|
reactor?: ContextReactor<any, any>;
|
|
109
|
+
durable?: boolean;
|
|
94
110
|
first: boolean;
|
|
95
111
|
};
|
|
96
112
|
export type MaterializeRowsParams = {
|
|
@@ -98,8 +114,7 @@ export type MaterializeRowsParams = {
|
|
|
98
114
|
sandboxId?: string;
|
|
99
115
|
title?: string;
|
|
100
116
|
instructions?: string;
|
|
101
|
-
|
|
102
|
-
sourceKinds: string[];
|
|
117
|
+
contextId: string;
|
|
103
118
|
analysis?: any;
|
|
104
119
|
rows: any[];
|
|
105
120
|
schema?: DatasetSchemaInput;
|
|
@@ -108,11 +123,12 @@ export type MaterializeRowsParams = {
|
|
|
108
123
|
};
|
|
109
124
|
export type DatasetBuilder<Runtime extends AnyDatasetRuntime> = {
|
|
110
125
|
readonly datasetId: string;
|
|
111
|
-
fromFile(
|
|
112
|
-
fromText(
|
|
113
|
-
fromDataset(
|
|
114
|
-
|
|
115
|
-
|
|
126
|
+
fromFile(resource: DatasetFileResourceInput): DatasetBuilder<Runtime>;
|
|
127
|
+
fromText(resource: DatasetTextResourceInput): DatasetBuilder<Runtime>;
|
|
128
|
+
fromDataset(resource: DatasetExistingResourceInput): DatasetBuilder<Runtime>;
|
|
129
|
+
fromContext(context: DatasetContextResourceInput): DatasetBuilder<Runtime>;
|
|
130
|
+
from(...resources: DatasetResourceInput[]): DatasetBuilder<Runtime>;
|
|
131
|
+
fromQuery<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>>(domain: D & CompatibleQueryDomain<Runtime, D>, resource: DatasetQueryResourceOptions<D, Q>): DatasetBuilder<Runtime>;
|
|
116
132
|
title(title: string): DatasetBuilder<Runtime>;
|
|
117
133
|
sandbox(input: {
|
|
118
134
|
sandboxId: string;
|
|
@@ -127,4 +143,3 @@ export type DatasetBuilder<Runtime extends AnyDatasetRuntime> = {
|
|
|
127
143
|
first(): DatasetBuilder<Runtime>;
|
|
128
144
|
build(options?: DatasetBuildOptions): Promise<DatasetBuildResult>;
|
|
129
145
|
};
|
|
130
|
-
//# sourceMappingURL=types.d.ts.map
|
package/dist/builder/types.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
interface ClearDatasetToolParams {
|
|
2
2
|
datasetId: string;
|
|
3
3
|
sandboxId: string;
|
|
4
|
-
|
|
4
|
+
runtime: any;
|
|
5
5
|
}
|
|
6
|
-
export declare function createClearDatasetTool({ datasetId, sandboxId,
|
|
6
|
+
export declare function createClearDatasetTool({ datasetId, sandboxId, runtime }: ClearDatasetToolParams): import("ai").Tool<{
|
|
7
7
|
reason: string;
|
|
8
8
|
}, {
|
|
9
9
|
success: boolean;
|
|
@@ -17,4 +17,3 @@ export declare function createClearDatasetTool({ datasetId, sandboxId, env }: Cl
|
|
|
17
17
|
error?: undefined;
|
|
18
18
|
}>;
|
|
19
19
|
export {};
|
|
20
|
-
//# sourceMappingURL=clearDataset.tool.d.ts.map
|
|
@@ -1,27 +1,24 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
const steps_2 = require("./dataset/steps");
|
|
9
|
-
function createClearDatasetTool({ datasetId, sandboxId, env }) {
|
|
10
|
-
return (0, ai_1.tool)({
|
|
1
|
+
import { tool } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { runDatasetSandboxCommandStep } from "./sandbox/steps.js";
|
|
4
|
+
import { getDatasetOutputPath } from "./datasetFiles.js";
|
|
5
|
+
import { datasetClearStep } from "./dataset/steps.js";
|
|
6
|
+
export function createClearDatasetTool({ datasetId, sandboxId, runtime }) {
|
|
7
|
+
return tool({
|
|
11
8
|
description: "Clear all dataset records and output files. This will delete all generated data and reset the dataset to its initial state.",
|
|
12
|
-
inputSchema:
|
|
13
|
-
reason:
|
|
9
|
+
inputSchema: z.object({
|
|
10
|
+
reason: z.string().describe("The reason for clearing the dataset"),
|
|
14
11
|
}),
|
|
15
12
|
execute: async ({ reason }) => {
|
|
16
13
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
17
14
|
console.log(`[Dataset ${datasetId}] Tool: clearDataset`);
|
|
18
15
|
console.log(`[Dataset ${datasetId}] Reason: ${reason}`);
|
|
19
16
|
console.log(`[Dataset ${datasetId}] ========================================`);
|
|
20
|
-
const outputPath =
|
|
17
|
+
const outputPath = getDatasetOutputPath(datasetId);
|
|
21
18
|
console.log(`[Dataset ${datasetId}] Step 1: Deleting output file`);
|
|
22
19
|
try {
|
|
23
|
-
const result = await
|
|
24
|
-
|
|
20
|
+
const result = await runDatasetSandboxCommandStep({
|
|
21
|
+
runtime,
|
|
25
22
|
sandboxId,
|
|
26
23
|
cmd: "rm",
|
|
27
24
|
args: ["-f", outputPath],
|
|
@@ -38,7 +35,7 @@ function createClearDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
38
35
|
console.warn(`[Dataset ${datasetId}] Error deleting output file: ${message}`);
|
|
39
36
|
}
|
|
40
37
|
console.log(`[Dataset ${datasetId}] Step 2: Clearing dataset records`);
|
|
41
|
-
const clearResult = await
|
|
38
|
+
const clearResult = await datasetClearStep({ runtime, datasetId });
|
|
42
39
|
if (!clearResult.ok) {
|
|
43
40
|
console.error(`[Dataset ${datasetId}] Failed to clear dataset: ${clearResult.error}`);
|
|
44
41
|
return {
|
|
@@ -58,4 +55,3 @@ function createClearDatasetTool({ datasetId, sandboxId, env }) {
|
|
|
58
55
|
},
|
|
59
56
|
});
|
|
60
57
|
}
|
|
61
|
-
//# sourceMappingURL=clearDataset.tool.js.map
|