@ekairos/dataset 1.22.82-beta.development.0 → 1.22.84-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/agentMaterializers.d.ts +2 -2
- package/dist/builder/context.d.ts +7 -0
- package/dist/builder/context.js +192 -0
- package/dist/builder/instructions.d.ts +3 -3
- package/dist/builder/instructions.js +10 -10
- package/dist/builder/materialize.d.ts +12 -11
- package/dist/builder/materialize.js +122 -121
- package/dist/builder/materializeQuery.d.ts +3 -2
- package/dist/builder/materializeQuery.js +10 -19
- package/dist/builder/persistence.d.ts +4 -5
- package/dist/builder/persistence.js +20 -19
- package/dist/builder/types.d.ts +31 -24
- package/dist/completeDataset.steps.d.ts +9 -8
- package/dist/completeDataset.steps.js +18 -11
- package/dist/completeDataset.tool.d.ts +9 -8
- package/dist/completeDataset.tool.js +2 -1
- package/dist/contextWorkspace.d.ts +72 -0
- package/dist/contextWorkspace.js +218 -0
- package/dist/dataset.d.ts +1 -1
- package/dist/dataset.js +42 -29
- package/dist/datasetFiles.d.ts +1 -1
- package/dist/datasetFiles.js +3 -3
- package/dist/executeCommand.tool.d.ts +1 -43
- package/dist/executeCommand.tool.js +10 -3
- package/dist/file/file-dataset.agent.d.ts +2 -0
- package/dist/file/file-dataset.agent.js +51 -16
- package/dist/file/file-dataset.steps.d.ts +6 -0
- package/dist/file/file-dataset.steps.js +18 -21
- package/dist/file/file-dataset.types.d.ts +10 -0
- package/dist/file/prompts.js +16 -14
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/materializeDataset.tool.d.ts +34 -26
- package/dist/materializeDataset.tool.js +40 -29
- package/dist/schema.d.ts +12 -2
- package/dist/schema.js +6 -3
- package/dist/service.d.ts +2 -2
- package/dist/service.js +6 -3
- package/dist/transform/filepreview.d.ts +2 -2
- package/dist/transform/filepreview.js +3 -3
- package/dist/transform/prompts.js +25 -25
- package/dist/transform/transform-dataset.agent.d.ts +4 -4
- package/dist/transform/transform-dataset.agent.js +29 -30
- package/dist/transform/transform-dataset.steps.d.ts +7 -7
- package/dist/transform/transform-dataset.steps.js +20 -20
- package/dist/transform/transform-dataset.types.d.ts +13 -13
- package/dist/transform/transformDataset.js +4 -4
- package/package.json +4 -4
- /package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -0
- /package/dist/builder/{sourceRows.js → rows.js} +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import type { AnyDatasetRuntime, DatasetBuilderState,
|
|
1
|
+
import type { AnyDatasetRuntime, DatasetBuilderState, InternalDatasetResource } from "./types.js";
|
|
2
2
|
export type DatasetAgentMaterializers = {
|
|
3
|
-
|
|
3
|
+
materializeSingleFileLikeResource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, resource: Extract<InternalDatasetResource, {
|
|
4
4
|
kind: "file" | "text";
|
|
5
5
|
}>, targetDatasetId: string): Promise<string>;
|
|
6
6
|
materializeDerivedDataset<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, targetDatasetId: string): Promise<string>;
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { AnyDatasetRuntime, InternalDatasetResource } from "./types.js";
|
|
2
|
+
type DatasetContextResolution = {
|
|
3
|
+
contextId: string;
|
|
4
|
+
resources: InternalDatasetResource[];
|
|
5
|
+
};
|
|
6
|
+
export declare function resolveDatasetResourceContext<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, resources: InternalDatasetResource[]): Promise<DatasetContextResolution>;
|
|
7
|
+
export {};
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import { eventsDomain } from "@ekairos/events";
|
|
2
|
+
import { createDatasetId } from "../id.js";
|
|
3
|
+
import { getDomainDescriptor } from "./rows.js";
|
|
4
|
+
function getContextWhere(context) {
|
|
5
|
+
return "id" in context ? { id: context.id } : { key: context.key };
|
|
6
|
+
}
|
|
7
|
+
async function getEventsDb(runtime) {
|
|
8
|
+
const scoped = await runtime.use(eventsDomain);
|
|
9
|
+
return scoped.db;
|
|
10
|
+
}
|
|
11
|
+
function resourceKey(index, resource) {
|
|
12
|
+
if (resource.kind === "file")
|
|
13
|
+
return `file:${index}:${resource.fileId}`;
|
|
14
|
+
if (resource.kind === "text")
|
|
15
|
+
return `text:${index}:${resource.name ?? "inline"}`;
|
|
16
|
+
if (resource.kind === "dataset")
|
|
17
|
+
return `dataset:${index}:${resource.datasetId}`;
|
|
18
|
+
if (resource.kind === "query")
|
|
19
|
+
return `query:${index}:${resource.title ?? "query"}`;
|
|
20
|
+
return `resource:${index}`;
|
|
21
|
+
}
|
|
22
|
+
function resourceName(index, resource) {
|
|
23
|
+
if (resource.kind === "file")
|
|
24
|
+
return resource.filename ?? `File ${index + 1}`;
|
|
25
|
+
if (resource.kind === "text")
|
|
26
|
+
return resource.name ?? `Text ${index + 1}`;
|
|
27
|
+
if (resource.kind === "dataset")
|
|
28
|
+
return resource.datasetId;
|
|
29
|
+
if (resource.kind === "query")
|
|
30
|
+
return resource.title ?? `Query ${index + 1}`;
|
|
31
|
+
return `Resource ${index + 1}`;
|
|
32
|
+
}
|
|
33
|
+
function resourceDescription(resource) {
|
|
34
|
+
if ("description" in resource && typeof resource.description === "string" && resource.description.trim()) {
|
|
35
|
+
return resource.description.trim();
|
|
36
|
+
}
|
|
37
|
+
if (resource.kind === "query" && typeof resource.explanation === "string" && resource.explanation.trim()) {
|
|
38
|
+
return resource.explanation.trim();
|
|
39
|
+
}
|
|
40
|
+
return `Dataset ${resource.kind} resource.`;
|
|
41
|
+
}
|
|
42
|
+
function resourceToContextResource(index, resource) {
|
|
43
|
+
const base = {
|
|
44
|
+
key: resourceKey(index, resource),
|
|
45
|
+
type: resource.kind,
|
|
46
|
+
name: resourceName(index, resource),
|
|
47
|
+
description: resourceDescription(resource),
|
|
48
|
+
};
|
|
49
|
+
if (resource.kind === "file") {
|
|
50
|
+
return {
|
|
51
|
+
...base,
|
|
52
|
+
fileId: resource.fileId,
|
|
53
|
+
filename: resource.filename,
|
|
54
|
+
mediaType: resource.mediaType,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
if (resource.kind === "text") {
|
|
58
|
+
return {
|
|
59
|
+
...base,
|
|
60
|
+
text: resource.text,
|
|
61
|
+
mimeType: resource.mimeType,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
if (resource.kind === "dataset") {
|
|
65
|
+
return {
|
|
66
|
+
...base,
|
|
67
|
+
datasetId: resource.datasetId,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
if (resource.kind === "query") {
|
|
71
|
+
return {
|
|
72
|
+
...base,
|
|
73
|
+
query: resource.query,
|
|
74
|
+
title: resource.title,
|
|
75
|
+
explanation: resource.explanation,
|
|
76
|
+
...getDomainDescriptor(resource.domain),
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
return base;
|
|
80
|
+
}
|
|
81
|
+
async function createDatasetResourceContextStep(params) {
|
|
82
|
+
"use step";
|
|
83
|
+
const db = await getEventsDb(params.runtime);
|
|
84
|
+
const contextId = createDatasetId();
|
|
85
|
+
const now = new Date();
|
|
86
|
+
await db.transact([
|
|
87
|
+
db.tx.event_contexts[contextId].create({
|
|
88
|
+
createdAt: now,
|
|
89
|
+
updatedAt: now,
|
|
90
|
+
name: `Dataset ${params.datasetId} resource context`,
|
|
91
|
+
status: "open_idle",
|
|
92
|
+
content: {
|
|
93
|
+
datasetId: params.datasetId,
|
|
94
|
+
resourceCount: params.resources.length,
|
|
95
|
+
},
|
|
96
|
+
resources: params.resources,
|
|
97
|
+
description: `Dataset materialization context for ${params.datasetId}.`,
|
|
98
|
+
goal: "Materialize the dataset from the resources declared in this context.",
|
|
99
|
+
}),
|
|
100
|
+
]);
|
|
101
|
+
return {
|
|
102
|
+
contextId,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
function contextResourceToDatasetResource(resource) {
|
|
106
|
+
if (resource.type === "file" && typeof resource.fileId === "string" && resource.fileId.trim()) {
|
|
107
|
+
return {
|
|
108
|
+
kind: "file",
|
|
109
|
+
fileId: resource.fileId.trim(),
|
|
110
|
+
description: resource.description,
|
|
111
|
+
filename: typeof resource.filename === "string" ? resource.filename : undefined,
|
|
112
|
+
mediaType: typeof resource.mediaType === "string" ? resource.mediaType : undefined,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
if (resource.type === "dataset" &&
|
|
116
|
+
typeof resource.datasetId === "string" &&
|
|
117
|
+
resource.datasetId.trim()) {
|
|
118
|
+
return {
|
|
119
|
+
kind: "dataset",
|
|
120
|
+
datasetId: resource.datasetId.trim(),
|
|
121
|
+
description: resource.description,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
if (resource.type === "text" && typeof resource.text === "string") {
|
|
125
|
+
return {
|
|
126
|
+
kind: "text",
|
|
127
|
+
text: String(resource.text),
|
|
128
|
+
mimeType: typeof resource.mimeType === "string"
|
|
129
|
+
? String(resource.mimeType)
|
|
130
|
+
: "text/plain",
|
|
131
|
+
name: resource.name,
|
|
132
|
+
description: resource.description,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
if (resource.type === "query") {
|
|
136
|
+
throw new Error("dataset_context_query_resource_requires_builder_shortcut");
|
|
137
|
+
}
|
|
138
|
+
return {
|
|
139
|
+
kind: "text",
|
|
140
|
+
text: JSON.stringify({ resource }, null, 2),
|
|
141
|
+
mimeType: "application/vnd.ekairos.context-resource+json",
|
|
142
|
+
name: `${resource.key}.context-resource.json`,
|
|
143
|
+
description: resource.description,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
async function readExistingContext(params) {
|
|
147
|
+
"use step";
|
|
148
|
+
const db = await getEventsDb(params.runtime);
|
|
149
|
+
const res = await db.query({
|
|
150
|
+
event_contexts: {
|
|
151
|
+
$: {
|
|
152
|
+
where: getContextWhere(params.context),
|
|
153
|
+
limit: 1,
|
|
154
|
+
},
|
|
155
|
+
},
|
|
156
|
+
});
|
|
157
|
+
const row = res?.event_contexts?.[0];
|
|
158
|
+
if (!row?.id)
|
|
159
|
+
throw new Error("dataset_context_not_found");
|
|
160
|
+
const resources = Array.isArray(row.resources)
|
|
161
|
+
? row.resources
|
|
162
|
+
: [];
|
|
163
|
+
if (resources.length === 0) {
|
|
164
|
+
throw new Error("dataset_context_resources_required");
|
|
165
|
+
}
|
|
166
|
+
return {
|
|
167
|
+
contextId: String(row.id),
|
|
168
|
+
resources: resources.map((resource) => contextResourceToDatasetResource(resource)),
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
export async function resolveDatasetResourceContext(runtime, datasetId, resources) {
|
|
172
|
+
const contextRefs = resources.filter((resource) => resource.kind === "context");
|
|
173
|
+
if (contextRefs.length > 1) {
|
|
174
|
+
throw new Error("dataset_context_resource_must_be_unique");
|
|
175
|
+
}
|
|
176
|
+
if (contextRefs.length === 1) {
|
|
177
|
+
if (resources.length > 1) {
|
|
178
|
+
throw new Error("dataset_context_resource_is_exclusive");
|
|
179
|
+
}
|
|
180
|
+
return await readExistingContext({ runtime, context: contextRefs[0] });
|
|
181
|
+
}
|
|
182
|
+
const contextResourceRecords = resources.map((resource, index) => resourceToContextResource(index, resource));
|
|
183
|
+
const created = await createDatasetResourceContextStep({
|
|
184
|
+
runtime,
|
|
185
|
+
datasetId,
|
|
186
|
+
resources: contextResourceRecords,
|
|
187
|
+
});
|
|
188
|
+
return {
|
|
189
|
+
contextId: created.contextId,
|
|
190
|
+
resources,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { DatasetSchemaInput } from "./types.js";
|
|
2
|
-
export declare function buildFileDefaultInstructions(schema?: DatasetSchemaInput): "Create a dataset from the
|
|
3
|
-
export declare function
|
|
4
|
-
export declare function buildTransformInstructions(
|
|
2
|
+
export declare function buildFileDefaultInstructions(schema?: DatasetSchemaInput): "Create a dataset from the resource file and ensure each output row matches the provided dataset schema exactly." | "Create a dataset representing the resource content as structured rows.";
|
|
3
|
+
export declare function buildRawResourceInstructions(resourceKind: "file" | "text"): "Create a dataset representing the raw text content as structured rows without applying business transformations." | "Create a dataset representing the raw file content as structured rows without applying business transformations.";
|
|
4
|
+
export declare function buildTransformInstructions(resourceCount: number, userInstructions?: string, schema?: DatasetSchemaInput): string;
|
|
5
5
|
export declare function buildObjectOutputInstructions(userInstructions?: string): string;
|
|
@@ -1,29 +1,29 @@
|
|
|
1
1
|
export function buildFileDefaultInstructions(schema) {
|
|
2
2
|
if (schema) {
|
|
3
|
-
return "Create a dataset from the
|
|
3
|
+
return "Create a dataset from the resource file and ensure each output row matches the provided dataset schema exactly.";
|
|
4
4
|
}
|
|
5
|
-
return "Create a dataset representing the
|
|
5
|
+
return "Create a dataset representing the resource content as structured rows.";
|
|
6
6
|
}
|
|
7
|
-
export function
|
|
8
|
-
if (
|
|
7
|
+
export function buildRawResourceInstructions(resourceKind) {
|
|
8
|
+
if (resourceKind === "text") {
|
|
9
9
|
return "Create a dataset representing the raw text content as structured rows without applying business transformations.";
|
|
10
10
|
}
|
|
11
11
|
return "Create a dataset representing the raw file content as structured rows without applying business transformations.";
|
|
12
12
|
}
|
|
13
|
-
export function buildTransformInstructions(
|
|
13
|
+
export function buildTransformInstructions(resourceCount, userInstructions, schema) {
|
|
14
14
|
const explicit = String(userInstructions ?? "").trim();
|
|
15
15
|
if (explicit)
|
|
16
16
|
return explicit;
|
|
17
|
-
if (
|
|
17
|
+
if (resourceCount > 1) {
|
|
18
18
|
if (schema) {
|
|
19
|
-
return "Combine the
|
|
19
|
+
return "Combine the input datasets into a new dataset that matches the provided output schema exactly.";
|
|
20
20
|
}
|
|
21
|
-
return "Combine the
|
|
21
|
+
return "Combine the input datasets into one coherent dataset.";
|
|
22
22
|
}
|
|
23
23
|
if (schema) {
|
|
24
|
-
return "Transform the
|
|
24
|
+
return "Transform the input dataset into a new dataset that matches the provided output schema exactly.";
|
|
25
25
|
}
|
|
26
|
-
return "Transform the
|
|
26
|
+
return "Transform the input dataset into a new useful dataset.";
|
|
27
27
|
}
|
|
28
28
|
export function buildObjectOutputInstructions(userInstructions) {
|
|
29
29
|
const base = String(userInstructions ?? "").trim();
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput,
|
|
1
|
+
import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalDatasetResource } from "./types.js";
|
|
2
2
|
import type { SandboxState } from "../file/file-dataset.types.js";
|
|
3
3
|
import type { FilePreviewContext } from "../file/filepreview.types.js";
|
|
4
|
-
import type { TransformSandboxState,
|
|
4
|
+
import type { TransformSandboxState, TransformInputPreviewContext } from "../transform/transform-dataset.types.js";
|
|
5
5
|
export declare function resolveDatasetAgentDurable(requestedDurable?: boolean): Promise<boolean>;
|
|
6
6
|
type PreparedFileDatasetContext = {
|
|
7
7
|
kind: "file";
|
|
@@ -11,17 +11,19 @@ type PreparedFileDatasetContext = {
|
|
|
11
11
|
sandboxState: SandboxState;
|
|
12
12
|
filePreview?: FilePreviewContext;
|
|
13
13
|
schema?: DatasetSchemaInput | null;
|
|
14
|
+
filename?: string;
|
|
15
|
+
mediaType?: string;
|
|
14
16
|
};
|
|
15
17
|
type PreparedTransformDatasetContext = {
|
|
16
18
|
kind: "transform";
|
|
17
19
|
datasetId: string;
|
|
18
20
|
sandboxId: string;
|
|
19
|
-
|
|
21
|
+
inputDatasetIds: string[];
|
|
20
22
|
outputSchema: DatasetSchemaInput;
|
|
21
23
|
sandboxState: TransformSandboxState;
|
|
22
|
-
|
|
24
|
+
inputPreviews?: Array<{
|
|
23
25
|
datasetId: string;
|
|
24
|
-
preview:
|
|
26
|
+
preview: TransformInputPreviewContext;
|
|
25
27
|
}>;
|
|
26
28
|
};
|
|
27
29
|
type PreparedDatasetContext = PreparedFileDatasetContext | PreparedTransformDatasetContext;
|
|
@@ -35,19 +37,18 @@ export declare function initializeDatasetStep<Runtime extends AnyDatasetRuntime>
|
|
|
35
37
|
sandboxId: string;
|
|
36
38
|
title?: string;
|
|
37
39
|
instructions?: string;
|
|
38
|
-
|
|
39
|
-
sourceKinds: string[];
|
|
40
|
+
contextId: string;
|
|
40
41
|
schema?: DatasetSchemaInput;
|
|
41
42
|
}): Promise<{
|
|
42
43
|
datasetId: string;
|
|
43
44
|
sandboxId: string;
|
|
44
45
|
}>;
|
|
45
|
-
export declare function
|
|
46
|
+
export declare function prepareDatasetResourcesStep<Runtime extends AnyDatasetRuntime>(params: {
|
|
46
47
|
kind: "file";
|
|
47
48
|
runtime: Runtime;
|
|
48
49
|
datasetId: string;
|
|
49
50
|
sandboxId: string;
|
|
50
|
-
|
|
51
|
+
resource: Extract<InternalDatasetResource, {
|
|
51
52
|
kind: "file" | "text";
|
|
52
53
|
}>;
|
|
53
54
|
schema?: DatasetSchemaInput;
|
|
@@ -56,7 +57,7 @@ export declare function prepareDatasetSourcesStep<Runtime extends AnyDatasetRunt
|
|
|
56
57
|
runtime: Runtime;
|
|
57
58
|
datasetId: string;
|
|
58
59
|
sandboxId: string;
|
|
59
|
-
|
|
60
|
+
inputDatasetIds: string[];
|
|
60
61
|
outputSchema: DatasetSchemaInput;
|
|
61
62
|
}): Promise<PreparedDatasetContext>;
|
|
62
63
|
export declare function initializeDatasetContextStep(params: {
|
|
@@ -75,7 +76,7 @@ export declare function completeDatasetStep<Runtime extends AnyDatasetRuntime>(p
|
|
|
75
76
|
previewRows: any[];
|
|
76
77
|
firstRow: any;
|
|
77
78
|
}>;
|
|
78
|
-
export declare function
|
|
79
|
+
export declare function materializeSingleFileLikeResource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, resource: Extract<InternalDatasetResource, {
|
|
79
80
|
kind: "file" | "text";
|
|
80
81
|
}>, targetDatasetId: string): Promise<string>;
|
|
81
82
|
export declare function materializeDerivedDataset<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, targetDatasetId: string): Promise<string>;
|