@ekairos/dataset 1.22.34-beta.development.0 → 1.22.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +347 -0
  2. package/dist/agents.d.ts +8 -0
  3. package/dist/agents.js +8 -0
  4. package/dist/builder/agentMaterializers.d.ts +9 -0
  5. package/dist/builder/agentMaterializers.js +10 -0
  6. package/dist/builder/context.d.ts +15 -0
  7. package/dist/builder/context.js +251 -0
  8. package/dist/builder/instructions.d.ts +5 -0
  9. package/dist/builder/instructions.js +40 -0
  10. package/dist/builder/materialize.d.ts +83 -0
  11. package/dist/builder/materialize.js +548 -0
  12. package/dist/builder/materializeQuery.d.ts +12 -0
  13. package/dist/builder/materializeQuery.js +31 -0
  14. package/dist/builder/persistence.d.ts +22 -0
  15. package/dist/builder/persistence.js +153 -0
  16. package/dist/builder/rows.d.ts +7 -0
  17. package/dist/builder/rows.js +56 -0
  18. package/dist/builder/schemaInference.d.ts +3 -0
  19. package/dist/builder/schemaInference.js +61 -0
  20. package/dist/builder/types.d.ts +140 -0
  21. package/dist/builder/types.js +1 -0
  22. package/dist/clearDataset.tool.d.ts +2 -3
  23. package/dist/clearDataset.tool.js +13 -17
  24. package/dist/completeDataset.steps.d.ts +117 -0
  25. package/dist/completeDataset.steps.js +487 -0
  26. package/dist/completeDataset.tool.d.ts +132 -7
  27. package/dist/completeDataset.tool.js +46 -192
  28. package/dist/contextResources.d.ts +31 -0
  29. package/dist/contextResources.js +151 -0
  30. package/dist/contextWorkspace.d.ts +79 -0
  31. package/dist/contextWorkspace.js +234 -0
  32. package/dist/dataset/steps.d.ts +39 -15
  33. package/dist/dataset/steps.js +96 -39
  34. package/dist/dataset.d.ts +3 -67
  35. package/dist/dataset.js +129 -520
  36. package/dist/datasetFiles.d.ts +5 -1
  37. package/dist/datasetFiles.js +29 -27
  38. package/dist/domain.d.ts +1 -2
  39. package/dist/domain.js +1 -6
  40. package/dist/executeCommand.tool.d.ts +2 -30
  41. package/dist/executeCommand.tool.js +165 -39
  42. package/dist/file/file-dataset.agent.d.ts +19 -56
  43. package/dist/file/file-dataset.agent.js +176 -132
  44. package/dist/file/file-dataset.steps.d.ts +27 -0
  45. package/dist/file/file-dataset.steps.js +47 -0
  46. package/dist/file/file-dataset.types.d.ts +64 -0
  47. package/dist/file/file-dataset.types.js +1 -0
  48. package/dist/file/filepreview.d.ts +5 -35
  49. package/dist/file/filepreview.js +60 -107
  50. package/dist/file/filepreview.types.d.ts +31 -0
  51. package/dist/file/filepreview.types.js +1 -0
  52. package/dist/file/generateSchema.tool.d.ts +2 -3
  53. package/dist/file/generateSchema.tool.js +11 -15
  54. package/dist/file/index.d.ts +1 -2
  55. package/dist/file/index.js +1 -18
  56. package/dist/file/prompts.d.ts +2 -3
  57. package/dist/file/prompts.js +134 -27
  58. package/dist/file/scripts.generated.d.ts +1 -0
  59. package/dist/file/scripts.generated.js +11 -0
  60. package/dist/file/steps.d.ts +1 -2
  61. package/dist/file/steps.js +9 -7
  62. package/dist/id.d.ts +1 -0
  63. package/dist/id.js +10 -0
  64. package/dist/index.d.ts +8 -7
  65. package/dist/index.js +8 -23
  66. package/dist/materializeDataset.tool.d.ts +52 -32
  67. package/dist/materializeDataset.tool.js +81 -65
  68. package/dist/query/index.d.ts +1 -2
  69. package/dist/query/index.js +1 -18
  70. package/dist/query/queryDomain.d.ts +3 -4
  71. package/dist/query/queryDomain.js +3 -40
  72. package/dist/query/queryDomain.step.d.ts +1 -1
  73. package/dist/query/queryDomain.step.js +13 -13
  74. package/dist/sandbox/steps.d.ts +23 -15
  75. package/dist/sandbox/steps.js +73 -76
  76. package/dist/sandbox.steps.d.ts +1 -2
  77. package/dist/sandbox.steps.js +1 -18
  78. package/dist/schema.d.ts +13 -13
  79. package/dist/schema.js +25 -37
  80. package/dist/service.d.ts +8 -5
  81. package/dist/service.js +70 -15
  82. package/dist/skill.d.ts +0 -1
  83. package/dist/skill.js +12 -17
  84. package/dist/transform/filepreview.d.ts +2 -3
  85. package/dist/transform/filepreview.js +9 -26
  86. package/dist/transform/index.d.ts +2 -3
  87. package/dist/transform/index.js +2 -8
  88. package/dist/transform/prompts.d.ts +1 -34
  89. package/dist/transform/prompts.js +58 -43
  90. package/dist/transform/transform-dataset.agent.d.ts +20 -45
  91. package/dist/transform/transform-dataset.agent.js +146 -89
  92. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  93. package/dist/transform/transform-dataset.steps.js +61 -0
  94. package/dist/transform/transform-dataset.types.d.ts +95 -0
  95. package/dist/transform/transform-dataset.types.js +1 -0
  96. package/dist/transform/transformDataset.d.ts +3 -3
  97. package/dist/transform/transformDataset.js +15 -18
  98. package/dist/writeDatasetRows.tool.d.ts +188 -0
  99. package/dist/writeDatasetRows.tool.js +258 -0
  100. package/package.json +36 -11
  101. package/dist/clearDataset.tool.d.ts.map +0 -1
  102. package/dist/clearDataset.tool.js.map +0 -1
  103. package/dist/completeDataset.tool.d.ts.map +0 -1
  104. package/dist/completeDataset.tool.js.map +0 -1
  105. package/dist/dataset/steps.d.ts.map +0 -1
  106. package/dist/dataset/steps.js.map +0 -1
  107. package/dist/dataset.d.ts.map +0 -1
  108. package/dist/dataset.js.map +0 -1
  109. package/dist/datasetFiles.d.ts.map +0 -1
  110. package/dist/datasetFiles.js.map +0 -1
  111. package/dist/domain.d.ts.map +0 -1
  112. package/dist/domain.js.map +0 -1
  113. package/dist/executeCommand.tool.d.ts.map +0 -1
  114. package/dist/executeCommand.tool.js.map +0 -1
  115. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  116. package/dist/file/file-dataset.agent.js.map +0 -1
  117. package/dist/file/filepreview.d.ts.map +0 -1
  118. package/dist/file/filepreview.js.map +0 -1
  119. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  120. package/dist/file/generateSchema.tool.js.map +0 -1
  121. package/dist/file/index.d.ts.map +0 -1
  122. package/dist/file/index.js.map +0 -1
  123. package/dist/file/prompts.d.ts.map +0 -1
  124. package/dist/file/prompts.js.map +0 -1
  125. package/dist/file/steps.d.ts.map +0 -1
  126. package/dist/file/steps.js.map +0 -1
  127. package/dist/index.d.ts.map +0 -1
  128. package/dist/index.js.map +0 -1
  129. package/dist/materializeDataset.tool.d.ts.map +0 -1
  130. package/dist/materializeDataset.tool.js.map +0 -1
  131. package/dist/query/index.d.ts.map +0 -1
  132. package/dist/query/index.js.map +0 -1
  133. package/dist/query/queryDomain.d.ts.map +0 -1
  134. package/dist/query/queryDomain.js.map +0 -1
  135. package/dist/query/queryDomain.step.d.ts.map +0 -1
  136. package/dist/query/queryDomain.step.js.map +0 -1
  137. package/dist/sandbox/steps.d.ts.map +0 -1
  138. package/dist/sandbox/steps.js.map +0 -1
  139. package/dist/sandbox.steps.d.ts.map +0 -1
  140. package/dist/sandbox.steps.js.map +0 -1
  141. package/dist/schema.d.ts.map +0 -1
  142. package/dist/schema.js.map +0 -1
  143. package/dist/service.d.ts.map +0 -1
  144. package/dist/service.js.map +0 -1
  145. package/dist/skill.d.ts.map +0 -1
  146. package/dist/skill.js.map +0 -1
  147. package/dist/transform/filepreview.d.ts.map +0 -1
  148. package/dist/transform/filepreview.js.map +0 -1
  149. package/dist/transform/index.d.ts.map +0 -1
  150. package/dist/transform/index.js.map +0 -1
  151. package/dist/transform/prompts.d.ts.map +0 -1
  152. package/dist/transform/prompts.js.map +0 -1
  153. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  154. package/dist/transform/transform-dataset.agent.js.map +0 -1
  155. package/dist/transform/transformDataset.d.ts.map +0 -1
  156. package/dist/transform/transformDataset.js.map +0 -1
@@ -0,0 +1,31 @@
1
+ import { materializeRowsToDataset } from "./persistence.js";
2
+ import { getDomainDescriptor, normalizeQueryRows } from "./rows.js";
3
+ async function readQueryResourceRowsStep(params) {
4
+ "use step";
5
+ const db = await params.runtime.db();
6
+ const result = await db.query(params.query);
7
+ return { rows: normalizeQueryRows(result) };
8
+ }
9
+ export async function materializeQueryResource(runtime, resource, params) {
10
+ const { rows } = await readQueryResourceRowsStep({
11
+ runtime,
12
+ query: resource.query,
13
+ });
14
+ const domainDescriptor = getDomainDescriptor(resource.domain);
15
+ return await materializeRowsToDataset(runtime, {
16
+ datasetId: params.datasetId,
17
+ sandboxId: params.sandboxId,
18
+ title: params.title ?? resource.title,
19
+ instructions: params.instructions,
20
+ contextId: params.contextId,
21
+ analysis: {
22
+ query: resource.query,
23
+ explanation: resource.explanation,
24
+ ...domainDescriptor,
25
+ },
26
+ rows,
27
+ schema: params.schema,
28
+ inferSchema: !params.schema,
29
+ first: params.first,
30
+ });
31
+ }
@@ -0,0 +1,22 @@
1
+ import type { AnyDatasetRuntime, DatasetBuildResult, DatasetTextResourceInput, MaterializeRowsParams } from "./types.js";
2
+ export declare function defaultTextResourceName(resource: DatasetTextResourceInput): string;
3
+ export declare function getDatasetDb<Runtime extends AnyDatasetRuntime>(runtime: Runtime): Promise<any>;
4
+ export declare function createOrUpdateDatasetMetadata<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: {
5
+ datasetId: string;
6
+ sandboxId?: string;
7
+ title?: string;
8
+ instructions?: string;
9
+ contextId: string;
10
+ analysis?: any;
11
+ schema?: any;
12
+ status?: string;
13
+ }): Promise<void>;
14
+ export declare function materializeRowsToDataset<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: MaterializeRowsParams): Promise<string>;
15
+ export declare function uploadInlineTextResource<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, resource: DatasetTextResourceInput): Promise<string>;
16
+ export declare function finalizeBuildResult<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, withFirst: boolean): Promise<DatasetBuildResult>;
17
+ export declare function createDatasetBuildResult<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: {
18
+ datasetId: string;
19
+ dataset: any;
20
+ previewRows: any[];
21
+ firstRow?: any | null;
22
+ }): DatasetBuildResult;
@@ -0,0 +1,153 @@
1
+ import { DatasetService } from "../service.js";
2
+ import { datasetDomain } from "../schema.js";
3
+ import { datasetGetByIdStep, datasetPreviewRowsStep, datasetReadOneStep, datasetReadRowsStep, } from "../dataset/steps.js";
4
+ import { inferDatasetSchema, validateRows } from "./schemaInference.js";
5
+ import { rowsToJsonl } from "./rows.js";
6
+ export function defaultTextResourceName(resource) {
7
+ if (resource.name?.trim())
8
+ return resource.name.trim();
9
+ const mimeType = String(resource.mimeType ?? "").toLowerCase();
10
+ if (mimeType.includes("csv"))
11
+ return "resource.csv";
12
+ if (mimeType.includes("json"))
13
+ return "resource.json";
14
+ if (mimeType.includes("yaml") || mimeType.includes("yml"))
15
+ return "resource.yaml";
16
+ return "resource.txt";
17
+ }
18
+ export async function getDatasetDb(runtime) {
19
+ const scoped = await runtime.use(datasetDomain);
20
+ return scoped.db;
21
+ }
22
+ export async function createOrUpdateDatasetMetadata(runtime, params) {
23
+ "use step";
24
+ if (!params.contextId.trim()) {
25
+ throw new Error("dataset_context_required");
26
+ }
27
+ const db = await getDatasetDb(runtime);
28
+ const service = new DatasetService(db);
29
+ const result = await service.createDataset({
30
+ id: params.datasetId,
31
+ sandboxId: params.sandboxId,
32
+ title: params.title ?? params.datasetId,
33
+ instructions: params.instructions ?? "",
34
+ contextId: params.contextId,
35
+ analysis: params.analysis,
36
+ schema: params.schema,
37
+ status: params.status ?? "building",
38
+ organizationId: runtime.env.orgId,
39
+ });
40
+ if (!result.ok) {
41
+ throw new Error(result.error);
42
+ }
43
+ }
44
+ export async function materializeRowsToDataset(runtime, params) {
45
+ "use step";
46
+ if (params.first && params.rows.length > 1) {
47
+ throw new Error("dataset_first_expected_zero_or_one_row");
48
+ }
49
+ const resolvedSchema = params.schema ??
50
+ inferDatasetSchema(params.rows, params.title ? `${params.title}Row` : "DatasetRow", params.title ? `One row for ${params.title}` : "One dataset row");
51
+ validateRows(params.rows, resolvedSchema);
52
+ await createOrUpdateDatasetMetadata(runtime, {
53
+ datasetId: params.datasetId,
54
+ sandboxId: params.sandboxId,
55
+ title: params.title,
56
+ instructions: params.instructions,
57
+ contextId: params.contextId,
58
+ analysis: params.analysis,
59
+ schema: resolvedSchema,
60
+ status: "building",
61
+ });
62
+ const db = await getDatasetDb(runtime);
63
+ const service = new DatasetService(db);
64
+ const uploadResult = await service.uploadDatasetOutputFile({
65
+ datasetId: params.datasetId,
66
+ fileBuffer: Buffer.from(rowsToJsonl(params.rows), "utf-8"),
67
+ });
68
+ if (!uploadResult.ok) {
69
+ throw new Error(uploadResult.error);
70
+ }
71
+ const statusResult = await service.updateDatasetStatus({
72
+ datasetId: params.datasetId,
73
+ status: "completed",
74
+ calculatedTotalRows: params.rows.length,
75
+ actualGeneratedRowCount: params.rows.length,
76
+ });
77
+ if (!statusResult.ok) {
78
+ throw new Error(statusResult.error);
79
+ }
80
+ return params.datasetId;
81
+ }
82
+ export async function uploadInlineTextResource(runtime, datasetId, resource) {
83
+ "use step";
84
+ const db = await getDatasetDb(runtime);
85
+ const fileName = defaultTextResourceName(resource);
86
+ const storagePath = `/dataset/resource/${datasetId}/${Date.now()}-${fileName}`;
87
+ const uploadResult = await db.storage.uploadFile(storagePath, Buffer.from(resource.text, "utf-8"), {
88
+ contentType: resource.mimeType ?? "text/plain",
89
+ contentDisposition: fileName,
90
+ });
91
+ const fileId = uploadResult?.data?.id;
92
+ if (!fileId) {
93
+ throw new Error("dataset_text_resource_upload_failed");
94
+ }
95
+ return fileId;
96
+ }
97
+ export async function finalizeBuildResult(runtime, datasetId, withFirst) {
98
+ const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
99
+ if (!datasetResult.ok)
100
+ throw new Error(datasetResult.error);
101
+ const previewResult = await datasetPreviewRowsStep({ runtime, datasetId, limit: 20 });
102
+ const reader = {
103
+ async read(cursorOrParams, limit) {
104
+ const params = typeof cursorOrParams === "object" && cursorOrParams !== null
105
+ ? cursorOrParams
106
+ : { cursor: cursorOrParams, limit };
107
+ return await datasetReadRowsStep({
108
+ runtime,
109
+ datasetId,
110
+ cursor: params.cursor,
111
+ limit: params.limit,
112
+ });
113
+ },
114
+ };
115
+ if (!withFirst) {
116
+ return {
117
+ datasetId,
118
+ dataset: datasetResult.data,
119
+ previewRows: previewResult.rows,
120
+ reader,
121
+ };
122
+ }
123
+ const firstResult = await datasetReadOneStep({ runtime, datasetId });
124
+ return {
125
+ datasetId,
126
+ dataset: datasetResult.data,
127
+ previewRows: previewResult.rows,
128
+ reader,
129
+ firstRow: firstResult.row,
130
+ };
131
+ }
132
+ export function createDatasetBuildResult(runtime, params) {
133
+ const reader = {
134
+ async read(cursorOrParams, limit) {
135
+ const readParams = typeof cursorOrParams === "object" && cursorOrParams !== null
136
+ ? cursorOrParams
137
+ : { cursor: cursorOrParams, limit };
138
+ return await datasetReadRowsStep({
139
+ runtime,
140
+ datasetId: params.datasetId,
141
+ cursor: readParams.cursor,
142
+ limit: readParams.limit,
143
+ });
144
+ },
145
+ };
146
+ return {
147
+ datasetId: params.datasetId,
148
+ dataset: params.dataset,
149
+ previewRows: params.previewRows,
150
+ reader,
151
+ ...(params.firstRow !== undefined ? { firstRow: params.firstRow } : {}),
152
+ };
153
+ }
@@ -0,0 +1,7 @@
1
+ import type { DomainSchemaResult } from "@ekairos/domain";
2
+ export declare function rowsToJsonl(rows: any[]): string;
3
+ export declare function normalizeQueryRows(result: any): any[];
4
+ export declare function getDomainDescriptor(domain: DomainSchemaResult): {
5
+ domainPackageName?: string | undefined;
6
+ domainName: string;
7
+ };
@@ -0,0 +1,56 @@
1
+ export function rowsToJsonl(rows) {
2
+ return rows
3
+ .map((row) => JSON.stringify({
4
+ type: "row",
5
+ data: row,
6
+ }))
7
+ .join("\n")
8
+ .concat(rows.length > 0 ? "\n" : "");
9
+ }
10
+ export function normalizeQueryRows(result) {
11
+ if (!result || typeof result !== "object")
12
+ return [];
13
+ const entries = Object.entries(result);
14
+ if (entries.length === 0)
15
+ return [];
16
+ if (entries.length === 1) {
17
+ const [key, value] = entries[0];
18
+ if (Array.isArray(value)) {
19
+ return value.map((row) => (row && typeof row === "object" ? row : { value: row }));
20
+ }
21
+ if (value && typeof value === "object") {
22
+ return [value];
23
+ }
24
+ return [{ [key]: value }];
25
+ }
26
+ const rows = [];
27
+ for (const [key, value] of entries) {
28
+ if (Array.isArray(value)) {
29
+ for (const row of value) {
30
+ if (row && typeof row === "object") {
31
+ rows.push({ __entity: key, ...row });
32
+ }
33
+ else {
34
+ rows.push({ __entity: key, value: row });
35
+ }
36
+ }
37
+ continue;
38
+ }
39
+ if (value && typeof value === "object") {
40
+ rows.push({ __entity: key, ...value });
41
+ continue;
42
+ }
43
+ rows.push({ __entity: key, value });
44
+ }
45
+ return rows;
46
+ }
47
+ export function getDomainDescriptor(domain) {
48
+ const meta = domain?.meta ?? {};
49
+ const context = typeof domain?.context === "function" ? domain.context() : {};
50
+ const name = String(meta?.name ?? context?.name ?? "domain");
51
+ const packageName = String(meta?.packageName ?? "");
52
+ return {
53
+ domainName: name,
54
+ ...(packageName ? { domainPackageName: packageName } : {}),
55
+ };
56
+ }
@@ -0,0 +1,3 @@
1
+ import type { DatasetSchemaInput } from "./types.js";
2
+ export declare function inferDatasetSchema(rows: any[], title?: string, description?: string): DatasetSchemaInput;
3
+ export declare function validateRows(rows: any[], schema: DatasetSchemaInput): void;
@@ -0,0 +1,61 @@
1
+ import Ajv from "ajv";
2
+ const ajv = new Ajv({ allErrors: true, strict: false });
3
+ function inferJsonSchemaType(value) {
4
+ if (value === null)
5
+ return { type: "null" };
6
+ if (Array.isArray(value))
7
+ return { type: "array" };
8
+ switch (typeof value) {
9
+ case "number":
10
+ return { type: "number" };
11
+ case "boolean":
12
+ return { type: "boolean" };
13
+ case "object":
14
+ return { type: "object", additionalProperties: true };
15
+ default:
16
+ return { type: "string" };
17
+ }
18
+ }
19
+ export function inferDatasetSchema(rows, title = "DatasetRow", description = "One dataset row") {
20
+ const properties = {};
21
+ const required = [];
22
+ const keys = new Set();
23
+ for (const row of rows) {
24
+ if (!row || typeof row !== "object")
25
+ continue;
26
+ for (const key of Object.keys(row)) {
27
+ keys.add(key);
28
+ }
29
+ }
30
+ for (const key of keys) {
31
+ const values = rows.map((row) => (row && typeof row === "object" ? row[key] : undefined));
32
+ const firstDefined = values.find((value) => value !== undefined);
33
+ properties[key] = {
34
+ ...inferJsonSchemaType(firstDefined),
35
+ description: `${key} value`,
36
+ };
37
+ if (values.every((value) => value !== undefined)) {
38
+ required.push(key);
39
+ }
40
+ }
41
+ return {
42
+ title,
43
+ description,
44
+ schema: {
45
+ type: "object",
46
+ additionalProperties: false,
47
+ properties,
48
+ required,
49
+ },
50
+ };
51
+ }
52
+ export function validateRows(rows, schema) {
53
+ const validator = ajv.compile(schema.schema);
54
+ for (const row of rows) {
55
+ const valid = validator(row);
56
+ if (!valid) {
57
+ const error = validator.errors?.map((entry) => entry.message || "validation_error").join("; ");
58
+ throw new Error(error || "dataset_schema_validation_failed");
59
+ }
60
+ }
61
+ }
@@ -0,0 +1,140 @@
1
+ import type { InstaQLParams, ValidQuery } from "@instantdb/core";
2
+ import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
3
+ import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
4
+ import type { ContextIdentifier, ContextReactor, StoredContextResource } from "@ekairos/events";
5
+ import { datasetDomain } from "../schema.js";
6
+ export type DatasetQueryResourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
7
+ query: InstaQLParams<DomainInstantSchema<D>>;
8
+ title?: string;
9
+ explanation?: string;
10
+ domain: D;
11
+ };
12
+ export type DatasetFileResourceInput = {
13
+ fileId: string;
14
+ description?: string;
15
+ filename?: string;
16
+ mediaType?: string;
17
+ };
18
+ export type DatasetTextResourceInput = {
19
+ text: string;
20
+ mimeType?: string;
21
+ name?: string;
22
+ description?: string;
23
+ };
24
+ export type DatasetExistingResourceInput = {
25
+ datasetId: string;
26
+ description?: string;
27
+ };
28
+ export type DatasetContextResourceInput = ContextIdentifier;
29
+ export type DatasetFileResource = {
30
+ kind: "file";
31
+ } & DatasetFileResourceInput;
32
+ export type DatasetTextResource = {
33
+ kind: "text";
34
+ } & DatasetTextResourceInput;
35
+ export type DatasetExistingResource = {
36
+ kind: "dataset";
37
+ } & DatasetExistingResourceInput;
38
+ export type DatasetContextResource = {
39
+ kind: "context";
40
+ } & DatasetContextResourceInput;
41
+ export type DatasetResourceInput = DatasetFileResourceInput | DatasetTextResourceInput | DatasetExistingResourceInput | DatasetContextResourceInput | DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource;
42
+ export type DatasetSchemaInput = {
43
+ title?: string;
44
+ description?: string;
45
+ schema: any;
46
+ };
47
+ export type DatasetOutput = "rows" | "object";
48
+ export type DatasetMode = "auto" | "schema";
49
+ export type DatasetBuilderOptions = {
50
+ datasetId?: string;
51
+ durable?: boolean;
52
+ };
53
+ export type DatasetBuildOptions = {
54
+ datasetId?: string;
55
+ durable?: boolean;
56
+ };
57
+ export type InternalDatasetResource = DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource | ({
58
+ kind: "query";
59
+ } & DatasetQueryResourceInput);
60
+ export type DatasetReaderResult = {
61
+ rows: any[];
62
+ cursor: number;
63
+ done: boolean;
64
+ };
65
+ export type DatasetReader = {
66
+ read(cursor?: number, limit?: number): Promise<DatasetReaderResult>;
67
+ read(params?: {
68
+ cursor?: number;
69
+ limit?: number;
70
+ }): Promise<DatasetReaderResult>;
71
+ };
72
+ export type DatasetBuildResult = {
73
+ datasetId: string;
74
+ dataset: any;
75
+ previewRows: any[];
76
+ reader: DatasetReader;
77
+ object?: any | null;
78
+ firstRow?: any | null;
79
+ };
80
+ export type DatasetRuntimeEnv = {
81
+ orgId: string;
82
+ };
83
+ export type AnyDatasetRuntime = EkairosRuntime<any, any, any>;
84
+ export type DatasetRuntimeHandle<Runtime extends AnyDatasetRuntime> = RuntimeForDomain<Runtime, typeof datasetDomain>;
85
+ export type CompatibleQueryDomain<Runtime extends AnyDatasetRuntime, D extends DomainSchemaResult> = RuntimeForDomain<Runtime, D> extends never ? never : D;
86
+ export type DatasetQueryResourceOptions<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>> = {
87
+ query: Q;
88
+ title?: string;
89
+ explanation?: string;
90
+ };
91
+ export type DatasetBuilderState<Runtime extends AnyDatasetRuntime> = {
92
+ runtime: Runtime;
93
+ env: Runtime["env"] & DatasetRuntimeEnv;
94
+ resources: InternalDatasetResource[];
95
+ contextResources?: StoredContextResource[];
96
+ title?: string;
97
+ sandboxId?: string;
98
+ contextId?: string;
99
+ outputSchema?: DatasetSchemaInput;
100
+ output: DatasetOutput;
101
+ inferSchema: boolean;
102
+ instructions?: string;
103
+ reactor?: ContextReactor<any, any>;
104
+ durable?: boolean;
105
+ first: boolean;
106
+ };
107
+ export type MaterializeRowsParams = {
108
+ datasetId: string;
109
+ sandboxId?: string;
110
+ title?: string;
111
+ instructions?: string;
112
+ contextId: string;
113
+ analysis?: any;
114
+ rows: any[];
115
+ schema?: DatasetSchemaInput;
116
+ inferSchema?: boolean;
117
+ first?: boolean;
118
+ };
119
+ export type DatasetBuilder<Runtime extends AnyDatasetRuntime> = {
120
+ readonly datasetId: string;
121
+ fromFile(resource: DatasetFileResourceInput): DatasetBuilder<Runtime>;
122
+ fromText(resource: DatasetTextResourceInput): DatasetBuilder<Runtime>;
123
+ fromDataset(resource: DatasetExistingResourceInput): DatasetBuilder<Runtime>;
124
+ fromContext(context: DatasetContextResourceInput): DatasetBuilder<Runtime>;
125
+ from(...resources: DatasetResourceInput[]): DatasetBuilder<Runtime>;
126
+ fromQuery<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>>(domain: D & CompatibleQueryDomain<Runtime, D>, resource: DatasetQueryResourceOptions<D, Q>): DatasetBuilder<Runtime>;
127
+ title(title: string): DatasetBuilder<Runtime>;
128
+ sandbox(input: {
129
+ sandboxId: string;
130
+ }): DatasetBuilder<Runtime>;
131
+ schema(schema: DatasetSchemaInput): DatasetBuilder<Runtime>;
132
+ inferSchema(): DatasetBuilder<Runtime>;
133
+ auto(): DatasetBuilder<Runtime>;
134
+ asRows(): DatasetBuilder<Runtime>;
135
+ asObject(): DatasetBuilder<Runtime>;
136
+ instructions(instructions: string): DatasetBuilder<Runtime>;
137
+ reactor(reactor: ContextReactor<any, any>): DatasetBuilder<Runtime>;
138
+ first(): DatasetBuilder<Runtime>;
139
+ build(options?: DatasetBuildOptions): Promise<DatasetBuildResult>;
140
+ };
@@ -0,0 +1 @@
1
+ export {};
@@ -1,9 +1,9 @@
1
1
  interface ClearDatasetToolParams {
2
2
  datasetId: string;
3
3
  sandboxId: string;
4
- env?: any;
4
+ runtime: any;
5
5
  }
6
- export declare function createClearDatasetTool({ datasetId, sandboxId, env }: ClearDatasetToolParams): import("ai").Tool<{
6
+ export declare function createClearDatasetTool({ datasetId, sandboxId, runtime }: ClearDatasetToolParams): import("ai").Tool<{
7
7
  reason: string;
8
8
  }, {
9
9
  success: boolean;
@@ -17,4 +17,3 @@ export declare function createClearDatasetTool({ datasetId, sandboxId, env }: Cl
17
17
  error?: undefined;
18
18
  }>;
19
19
  export {};
20
- //# sourceMappingURL=clearDataset.tool.d.ts.map
@@ -1,27 +1,24 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.createClearDatasetTool = createClearDatasetTool;
4
- const ai_1 = require("ai");
5
- const zod_1 = require("zod");
6
- const steps_1 = require("./sandbox/steps");
7
- const datasetFiles_1 = require("./datasetFiles");
8
- const steps_2 = require("./dataset/steps");
9
- function createClearDatasetTool({ datasetId, sandboxId, env }) {
10
- return (0, ai_1.tool)({
1
+ import { tool } from "ai";
2
+ import { z } from "zod";
3
+ import { runDatasetSandboxCommandStep } from "./sandbox/steps.js";
4
+ import { getDatasetOutputPath } from "./datasetFiles.js";
5
+ import { datasetClearStep } from "./dataset/steps.js";
6
+ export function createClearDatasetTool({ datasetId, sandboxId, runtime }) {
7
+ return tool({
11
8
  description: "Clear all dataset records and output files. This will delete all generated data and reset the dataset to its initial state.",
12
- inputSchema: zod_1.z.object({
13
- reason: zod_1.z.string().describe("The reason for clearing the dataset"),
9
+ inputSchema: z.object({
10
+ reason: z.string().describe("The reason for clearing the dataset"),
14
11
  }),
15
12
  execute: async ({ reason }) => {
16
13
  console.log(`[Dataset ${datasetId}] ========================================`);
17
14
  console.log(`[Dataset ${datasetId}] Tool: clearDataset`);
18
15
  console.log(`[Dataset ${datasetId}] Reason: ${reason}`);
19
16
  console.log(`[Dataset ${datasetId}] ========================================`);
20
- const outputPath = (0, datasetFiles_1.getDatasetOutputPath)(datasetId);
17
+ const outputPath = getDatasetOutputPath(datasetId);
21
18
  console.log(`[Dataset ${datasetId}] Step 1: Deleting output file`);
22
19
  try {
23
- const result = await (0, steps_1.runDatasetSandboxCommandStep)({
24
- env,
20
+ const result = await runDatasetSandboxCommandStep({
21
+ runtime,
25
22
  sandboxId,
26
23
  cmd: "rm",
27
24
  args: ["-f", outputPath],
@@ -38,7 +35,7 @@ function createClearDatasetTool({ datasetId, sandboxId, env }) {
38
35
  console.warn(`[Dataset ${datasetId}] Error deleting output file: ${message}`);
39
36
  }
40
37
  console.log(`[Dataset ${datasetId}] Step 2: Clearing dataset records`);
41
- const clearResult = await (0, steps_2.datasetClearStep)({ env, datasetId });
38
+ const clearResult = await datasetClearStep({ runtime, datasetId });
42
39
  if (!clearResult.ok) {
43
40
  console.error(`[Dataset ${datasetId}] Failed to clear dataset: ${clearResult.error}`);
44
41
  return {
@@ -58,4 +55,3 @@ function createClearDatasetTool({ datasetId, sandboxId, env }) {
58
55
  },
59
56
  });
60
57
  }
61
- //# sourceMappingURL=clearDataset.tool.js.map
@@ -0,0 +1,117 @@
1
+ export interface PersistDatasetStepParams {
2
+ datasetId: string;
3
+ sandboxId: string;
4
+ runtime: any;
5
+ summary?: string;
6
+ outputPath?: string;
7
+ }
8
+ export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summary, outputPath }: PersistDatasetStepParams): Promise<{
9
+ rowSource: string;
10
+ outputPath: string;
11
+ storagePath: string;
12
+ success: boolean;
13
+ validation?: RowValidationEntry[];
14
+ validationTruncated?: number;
15
+ failureSummary?: ValidationFailureSummary;
16
+ repairInstructions?: string[];
17
+ validRowCount?: number;
18
+ rowRecordCount?: number;
19
+ error?: string;
20
+ status?: string;
21
+ message?: string;
22
+ validRows?: undefined;
23
+ dataFileId?: undefined;
24
+ records?: undefined;
25
+ summary?: undefined;
26
+ } | {
27
+ success: boolean;
28
+ status: string;
29
+ rowSource: string;
30
+ validRows: number;
31
+ rowRecordCount: number;
32
+ validation: RowValidationEntry[] | undefined;
33
+ error: string;
34
+ message: string;
35
+ outputPath: string;
36
+ storagePath: string;
37
+ dataFileId?: undefined;
38
+ records?: undefined;
39
+ summary?: undefined;
40
+ } | {
41
+ success: boolean;
42
+ status: string;
43
+ rowSource: string;
44
+ validRows: number;
45
+ rowRecordCount: number;
46
+ validation: RowValidationEntry[] | undefined;
47
+ error: string;
48
+ message: string;
49
+ outputPath: string;
50
+ storagePath: string;
51
+ dataFileId: string;
52
+ records?: undefined;
53
+ summary?: undefined;
54
+ } | {
55
+ success: boolean;
56
+ status: string;
57
+ rowSource: string;
58
+ records: number;
59
+ summary: string;
60
+ outputPath: string;
61
+ storagePath: string;
62
+ dataFileId: string;
63
+ validRows?: undefined;
64
+ rowRecordCount?: undefined;
65
+ validation?: undefined;
66
+ error?: undefined;
67
+ message?: undefined;
68
+ }>;
69
+ type RowValidationEntry = {
70
+ index: number;
71
+ valid: boolean;
72
+ errors?: string[];
73
+ errorDetails?: Array<{
74
+ path: string;
75
+ keyword: string;
76
+ message: string;
77
+ params?: Record<string, unknown>;
78
+ schemaPath?: string;
79
+ }>;
80
+ dataKeys?: string[];
81
+ };
82
+ type ValidationFailureSummary = {
83
+ rowRecordCount: number;
84
+ validRowCount: number;
85
+ invalidRowCount: number;
86
+ expectedTopLevelKeys: string[];
87
+ requiredTopLevelKeys: string[];
88
+ requiredPaths: string[];
89
+ enumConstraints: Array<{
90
+ path: string;
91
+ values: unknown[];
92
+ }>;
93
+ topErrors: Array<{
94
+ message: string;
95
+ count: number;
96
+ }>;
97
+ missingRequiredProperties: Array<{
98
+ property: string;
99
+ count: number;
100
+ }>;
101
+ additionalProperties: Array<{
102
+ property: string;
103
+ count: number;
104
+ }>;
105
+ enumFailures: Array<{
106
+ path: string;
107
+ allowedValues: unknown[];
108
+ count: number;
109
+ }>;
110
+ observedTopLevelKeys: string[];
111
+ sampleInvalidRows: Array<{
112
+ index: number;
113
+ dataKeys?: string[];
114
+ errors?: string[];
115
+ }>;
116
+ };
117
+ export {};