@ekairos/dataset 1.22.83-beta.development.0 → 1.22.84-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/builder/agentMaterializers.d.ts +2 -2
  2. package/dist/builder/context.d.ts +7 -0
  3. package/dist/builder/context.js +192 -0
  4. package/dist/builder/instructions.d.ts +3 -3
  5. package/dist/builder/instructions.js +10 -10
  6. package/dist/builder/materialize.d.ts +10 -11
  7. package/dist/builder/materialize.js +116 -113
  8. package/dist/builder/materializeQuery.d.ts +3 -2
  9. package/dist/builder/materializeQuery.js +10 -19
  10. package/dist/builder/persistence.d.ts +4 -5
  11. package/dist/builder/persistence.js +20 -19
  12. package/dist/builder/types.d.ts +29 -24
  13. package/dist/completeDataset.steps.js +1 -1
  14. package/dist/dataset.d.ts +1 -1
  15. package/dist/dataset.js +42 -29
  16. package/dist/datasetFiles.d.ts +1 -1
  17. package/dist/datasetFiles.js +3 -3
  18. package/dist/file/file-dataset.agent.js +3 -4
  19. package/dist/file/prompts.js +12 -12
  20. package/dist/materializeDataset.tool.d.ts +34 -26
  21. package/dist/materializeDataset.tool.js +40 -29
  22. package/dist/schema.d.ts +12 -2
  23. package/dist/schema.js +6 -3
  24. package/dist/service.d.ts +1 -2
  25. package/dist/service.js +5 -2
  26. package/dist/transform/filepreview.d.ts +2 -2
  27. package/dist/transform/filepreview.js +3 -3
  28. package/dist/transform/prompts.js +25 -25
  29. package/dist/transform/transform-dataset.agent.d.ts +4 -4
  30. package/dist/transform/transform-dataset.agent.js +29 -30
  31. package/dist/transform/transform-dataset.steps.d.ts +7 -7
  32. package/dist/transform/transform-dataset.steps.js +20 -20
  33. package/dist/transform/transform-dataset.types.d.ts +13 -13
  34. package/dist/transform/transformDataset.js +4 -4
  35. package/package.json +4 -4
  36. /package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -0
  37. /package/dist/builder/{sourceRows.js → rows.js} +0 -0
@@ -1,6 +1,6 @@
1
- import type { AnyDatasetRuntime, DatasetBuilderState, InternalSource } from "./types.js";
1
+ import type { AnyDatasetRuntime, DatasetBuilderState, InternalDatasetResource } from "./types.js";
2
2
  export type DatasetAgentMaterializers = {
3
- materializeSingleFileLikeSource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, source: Extract<InternalSource, {
3
+ materializeSingleFileLikeResource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, resource: Extract<InternalDatasetResource, {
4
4
  kind: "file" | "text";
5
5
  }>, targetDatasetId: string): Promise<string>;
6
6
  materializeDerivedDataset<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, targetDatasetId: string): Promise<string>;
@@ -0,0 +1,7 @@
1
+ import type { AnyDatasetRuntime, InternalDatasetResource } from "./types.js";
2
+ type DatasetContextResolution = {
3
+ contextId: string;
4
+ resources: InternalDatasetResource[];
5
+ };
6
+ export declare function resolveDatasetResourceContext<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, resources: InternalDatasetResource[]): Promise<DatasetContextResolution>;
7
+ export {};
@@ -0,0 +1,192 @@
1
+ import { eventsDomain } from "@ekairos/events";
2
+ import { createDatasetId } from "../id.js";
3
+ import { getDomainDescriptor } from "./rows.js";
4
+ function getContextWhere(context) {
5
+ return "id" in context ? { id: context.id } : { key: context.key };
6
+ }
7
+ async function getEventsDb(runtime) {
8
+ const scoped = await runtime.use(eventsDomain);
9
+ return scoped.db;
10
+ }
11
+ function resourceKey(index, resource) {
12
+ if (resource.kind === "file")
13
+ return `file:${index}:${resource.fileId}`;
14
+ if (resource.kind === "text")
15
+ return `text:${index}:${resource.name ?? "inline"}`;
16
+ if (resource.kind === "dataset")
17
+ return `dataset:${index}:${resource.datasetId}`;
18
+ if (resource.kind === "query")
19
+ return `query:${index}:${resource.title ?? "query"}`;
20
+ return `resource:${index}`;
21
+ }
22
+ function resourceName(index, resource) {
23
+ if (resource.kind === "file")
24
+ return resource.filename ?? `File ${index + 1}`;
25
+ if (resource.kind === "text")
26
+ return resource.name ?? `Text ${index + 1}`;
27
+ if (resource.kind === "dataset")
28
+ return resource.datasetId;
29
+ if (resource.kind === "query")
30
+ return resource.title ?? `Query ${index + 1}`;
31
+ return `Resource ${index + 1}`;
32
+ }
33
+ function resourceDescription(resource) {
34
+ if ("description" in resource && typeof resource.description === "string" && resource.description.trim()) {
35
+ return resource.description.trim();
36
+ }
37
+ if (resource.kind === "query" && typeof resource.explanation === "string" && resource.explanation.trim()) {
38
+ return resource.explanation.trim();
39
+ }
40
+ return `Dataset ${resource.kind} resource.`;
41
+ }
42
+ function resourceToContextResource(index, resource) {
43
+ const base = {
44
+ key: resourceKey(index, resource),
45
+ type: resource.kind,
46
+ name: resourceName(index, resource),
47
+ description: resourceDescription(resource),
48
+ };
49
+ if (resource.kind === "file") {
50
+ return {
51
+ ...base,
52
+ fileId: resource.fileId,
53
+ filename: resource.filename,
54
+ mediaType: resource.mediaType,
55
+ };
56
+ }
57
+ if (resource.kind === "text") {
58
+ return {
59
+ ...base,
60
+ text: resource.text,
61
+ mimeType: resource.mimeType,
62
+ };
63
+ }
64
+ if (resource.kind === "dataset") {
65
+ return {
66
+ ...base,
67
+ datasetId: resource.datasetId,
68
+ };
69
+ }
70
+ if (resource.kind === "query") {
71
+ return {
72
+ ...base,
73
+ query: resource.query,
74
+ title: resource.title,
75
+ explanation: resource.explanation,
76
+ ...getDomainDescriptor(resource.domain),
77
+ };
78
+ }
79
+ return base;
80
+ }
81
+ async function createDatasetResourceContextStep(params) {
82
+ "use step";
83
+ const db = await getEventsDb(params.runtime);
84
+ const contextId = createDatasetId();
85
+ const now = new Date();
86
+ await db.transact([
87
+ db.tx.event_contexts[contextId].create({
88
+ createdAt: now,
89
+ updatedAt: now,
90
+ name: `Dataset ${params.datasetId} resource context`,
91
+ status: "open_idle",
92
+ content: {
93
+ datasetId: params.datasetId,
94
+ resourceCount: params.resources.length,
95
+ },
96
+ resources: params.resources,
97
+ description: `Dataset materialization context for ${params.datasetId}.`,
98
+ goal: "Materialize the dataset from the resources declared in this context.",
99
+ }),
100
+ ]);
101
+ return {
102
+ contextId,
103
+ };
104
+ }
105
+ function contextResourceToDatasetResource(resource) {
106
+ if (resource.type === "file" && typeof resource.fileId === "string" && resource.fileId.trim()) {
107
+ return {
108
+ kind: "file",
109
+ fileId: resource.fileId.trim(),
110
+ description: resource.description,
111
+ filename: typeof resource.filename === "string" ? resource.filename : undefined,
112
+ mediaType: typeof resource.mediaType === "string" ? resource.mediaType : undefined,
113
+ };
114
+ }
115
+ if (resource.type === "dataset" &&
116
+ typeof resource.datasetId === "string" &&
117
+ resource.datasetId.trim()) {
118
+ return {
119
+ kind: "dataset",
120
+ datasetId: resource.datasetId.trim(),
121
+ description: resource.description,
122
+ };
123
+ }
124
+ if (resource.type === "text" && typeof resource.text === "string") {
125
+ return {
126
+ kind: "text",
127
+ text: String(resource.text),
128
+ mimeType: typeof resource.mimeType === "string"
129
+ ? String(resource.mimeType)
130
+ : "text/plain",
131
+ name: resource.name,
132
+ description: resource.description,
133
+ };
134
+ }
135
+ if (resource.type === "query") {
136
+ throw new Error("dataset_context_query_resource_requires_builder_shortcut");
137
+ }
138
+ return {
139
+ kind: "text",
140
+ text: JSON.stringify({ resource }, null, 2),
141
+ mimeType: "application/vnd.ekairos.context-resource+json",
142
+ name: `${resource.key}.context-resource.json`,
143
+ description: resource.description,
144
+ };
145
+ }
146
+ async function readExistingContext(params) {
147
+ "use step";
148
+ const db = await getEventsDb(params.runtime);
149
+ const res = await db.query({
150
+ event_contexts: {
151
+ $: {
152
+ where: getContextWhere(params.context),
153
+ limit: 1,
154
+ },
155
+ },
156
+ });
157
+ const row = res?.event_contexts?.[0];
158
+ if (!row?.id)
159
+ throw new Error("dataset_context_not_found");
160
+ const resources = Array.isArray(row.resources)
161
+ ? row.resources
162
+ : [];
163
+ if (resources.length === 0) {
164
+ throw new Error("dataset_context_resources_required");
165
+ }
166
+ return {
167
+ contextId: String(row.id),
168
+ resources: resources.map((resource) => contextResourceToDatasetResource(resource)),
169
+ };
170
+ }
171
+ export async function resolveDatasetResourceContext(runtime, datasetId, resources) {
172
+ const contextRefs = resources.filter((resource) => resource.kind === "context");
173
+ if (contextRefs.length > 1) {
174
+ throw new Error("dataset_context_resource_must_be_unique");
175
+ }
176
+ if (contextRefs.length === 1) {
177
+ if (resources.length > 1) {
178
+ throw new Error("dataset_context_resource_is_exclusive");
179
+ }
180
+ return await readExistingContext({ runtime, context: contextRefs[0] });
181
+ }
182
+ const contextResourceRecords = resources.map((resource, index) => resourceToContextResource(index, resource));
183
+ const created = await createDatasetResourceContextStep({
184
+ runtime,
185
+ datasetId,
186
+ resources: contextResourceRecords,
187
+ });
188
+ return {
189
+ contextId: created.contextId,
190
+ resources,
191
+ };
192
+ }
@@ -1,5 +1,5 @@
1
1
  import type { DatasetSchemaInput } from "./types.js";
2
- export declare function buildFileDefaultInstructions(schema?: DatasetSchemaInput): "Create a dataset from the source file and ensure each output row matches the provided dataset schema exactly." | "Create a dataset representing the source content as structured rows.";
3
- export declare function buildRawSourceInstructions(sourceKind: "file" | "text"): "Create a dataset representing the raw text content as structured rows without applying business transformations." | "Create a dataset representing the raw file content as structured rows without applying business transformations.";
4
- export declare function buildTransformInstructions(sourceCount: number, userInstructions?: string, schema?: DatasetSchemaInput): string;
2
+ export declare function buildFileDefaultInstructions(schema?: DatasetSchemaInput): "Create a dataset from the resource file and ensure each output row matches the provided dataset schema exactly." | "Create a dataset representing the resource content as structured rows.";
3
+ export declare function buildRawResourceInstructions(resourceKind: "file" | "text"): "Create a dataset representing the raw text content as structured rows without applying business transformations." | "Create a dataset representing the raw file content as structured rows without applying business transformations.";
4
+ export declare function buildTransformInstructions(resourceCount: number, userInstructions?: string, schema?: DatasetSchemaInput): string;
5
5
  export declare function buildObjectOutputInstructions(userInstructions?: string): string;
@@ -1,29 +1,29 @@
1
1
  export function buildFileDefaultInstructions(schema) {
2
2
  if (schema) {
3
- return "Create a dataset from the source file and ensure each output row matches the provided dataset schema exactly.";
3
+ return "Create a dataset from the resource file and ensure each output row matches the provided dataset schema exactly.";
4
4
  }
5
- return "Create a dataset representing the source content as structured rows.";
5
+ return "Create a dataset representing the resource content as structured rows.";
6
6
  }
7
- export function buildRawSourceInstructions(sourceKind) {
8
- if (sourceKind === "text") {
7
+ export function buildRawResourceInstructions(resourceKind) {
8
+ if (resourceKind === "text") {
9
9
  return "Create a dataset representing the raw text content as structured rows without applying business transformations.";
10
10
  }
11
11
  return "Create a dataset representing the raw file content as structured rows without applying business transformations.";
12
12
  }
13
- export function buildTransformInstructions(sourceCount, userInstructions, schema) {
13
+ export function buildTransformInstructions(resourceCount, userInstructions, schema) {
14
14
  const explicit = String(userInstructions ?? "").trim();
15
15
  if (explicit)
16
16
  return explicit;
17
- if (sourceCount > 1) {
17
+ if (resourceCount > 1) {
18
18
  if (schema) {
19
- return "Combine the source datasets into a new dataset that matches the provided output schema exactly.";
19
+ return "Combine the input datasets into a new dataset that matches the provided output schema exactly.";
20
20
  }
21
- return "Combine the source datasets into one coherent dataset.";
21
+ return "Combine the input datasets into one coherent dataset.";
22
22
  }
23
23
  if (schema) {
24
- return "Transform the source dataset into a new dataset that matches the provided output schema exactly.";
24
+ return "Transform the input dataset into a new dataset that matches the provided output schema exactly.";
25
25
  }
26
- return "Transform the source dataset into a new useful dataset.";
26
+ return "Transform the input dataset into a new useful dataset.";
27
27
  }
28
28
  export function buildObjectOutputInstructions(userInstructions) {
29
29
  const base = String(userInstructions ?? "").trim();
@@ -1,7 +1,7 @@
1
- import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalSource } from "./types.js";
1
+ import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalDatasetResource } from "./types.js";
2
2
  import type { SandboxState } from "../file/file-dataset.types.js";
3
3
  import type { FilePreviewContext } from "../file/filepreview.types.js";
4
- import type { TransformSandboxState, TransformSourcePreviewContext } from "../transform/transform-dataset.types.js";
4
+ import type { TransformSandboxState, TransformInputPreviewContext } from "../transform/transform-dataset.types.js";
5
5
  export declare function resolveDatasetAgentDurable(requestedDurable?: boolean): Promise<boolean>;
6
6
  type PreparedFileDatasetContext = {
7
7
  kind: "file";
@@ -18,12 +18,12 @@ type PreparedTransformDatasetContext = {
18
18
  kind: "transform";
19
19
  datasetId: string;
20
20
  sandboxId: string;
21
- sourceDatasetIds: string[];
21
+ inputDatasetIds: string[];
22
22
  outputSchema: DatasetSchemaInput;
23
23
  sandboxState: TransformSandboxState;
24
- sourcePreviews?: Array<{
24
+ inputPreviews?: Array<{
25
25
  datasetId: string;
26
- preview: TransformSourcePreviewContext;
26
+ preview: TransformInputPreviewContext;
27
27
  }>;
28
28
  };
29
29
  type PreparedDatasetContext = PreparedFileDatasetContext | PreparedTransformDatasetContext;
@@ -37,19 +37,18 @@ export declare function initializeDatasetStep<Runtime extends AnyDatasetRuntime>
37
37
  sandboxId: string;
38
38
  title?: string;
39
39
  instructions?: string;
40
- sources: any[];
41
- sourceKinds: string[];
40
+ contextId: string;
42
41
  schema?: DatasetSchemaInput;
43
42
  }): Promise<{
44
43
  datasetId: string;
45
44
  sandboxId: string;
46
45
  }>;
47
- export declare function prepareDatasetSourcesStep<Runtime extends AnyDatasetRuntime>(params: {
46
+ export declare function prepareDatasetResourcesStep<Runtime extends AnyDatasetRuntime>(params: {
48
47
  kind: "file";
49
48
  runtime: Runtime;
50
49
  datasetId: string;
51
50
  sandboxId: string;
52
- source: Extract<InternalSource, {
51
+ resource: Extract<InternalDatasetResource, {
53
52
  kind: "file" | "text";
54
53
  }>;
55
54
  schema?: DatasetSchemaInput;
@@ -58,7 +57,7 @@ export declare function prepareDatasetSourcesStep<Runtime extends AnyDatasetRunt
58
57
  runtime: Runtime;
59
58
  datasetId: string;
60
59
  sandboxId: string;
61
- sourceDatasetIds: string[];
60
+ inputDatasetIds: string[];
62
61
  outputSchema: DatasetSchemaInput;
63
62
  }): Promise<PreparedDatasetContext>;
64
63
  export declare function initializeDatasetContextStep(params: {
@@ -77,7 +76,7 @@ export declare function completeDatasetStep<Runtime extends AnyDatasetRuntime>(p
77
76
  previewRows: any[];
78
77
  firstRow: any;
79
78
  }>;
80
- export declare function materializeSingleFileLikeSource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, source: Extract<InternalSource, {
79
+ export declare function materializeSingleFileLikeResource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, resource: Extract<InternalDatasetResource, {
81
80
  kind: "file" | "text";
82
81
  }>, targetDatasetId: string): Promise<string>;
83
82
  export declare function materializeDerivedDataset<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, targetDatasetId: string): Promise<string>;