@ekairos/dataset 1.22.40-beta.development.0 → 1.22.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/dist/agents.d.ts +8 -0
  2. package/dist/agents.js +8 -0
  3. package/dist/builder/agentMaterializers.d.ts +9 -0
  4. package/dist/builder/agentMaterializers.js +10 -0
  5. package/dist/builder/context.d.ts +15 -0
  6. package/dist/builder/context.js +251 -0
  7. package/dist/builder/instructions.d.ts +4 -5
  8. package/dist/builder/instructions.js +15 -21
  9. package/dist/builder/materialize.d.ts +77 -10
  10. package/dist/builder/materialize.js +495 -152
  11. package/dist/builder/materializeQuery.d.ts +12 -0
  12. package/dist/builder/materializeQuery.js +31 -0
  13. package/dist/builder/persistence.d.ts +10 -6
  14. package/dist/builder/persistence.js +107 -62
  15. package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -1
  16. package/dist/builder/{sourceRows.js → rows.js} +3 -9
  17. package/dist/builder/schemaInference.d.ts +1 -2
  18. package/dist/builder/schemaInference.js +4 -12
  19. package/dist/builder/types.d.ts +41 -26
  20. package/dist/builder/types.js +1 -3
  21. package/dist/clearDataset.tool.d.ts +2 -3
  22. package/dist/clearDataset.tool.js +13 -17
  23. package/dist/completeDataset.steps.d.ts +117 -0
  24. package/dist/completeDataset.steps.js +537 -0
  25. package/dist/completeDataset.tool.d.ts +132 -7
  26. package/dist/completeDataset.tool.js +46 -192
  27. package/dist/contextResources.d.ts +31 -0
  28. package/dist/contextResources.js +151 -0
  29. package/dist/contextWorkspace.d.ts +79 -0
  30. package/dist/contextWorkspace.js +234 -0
  31. package/dist/dataset/steps.d.ts +39 -15
  32. package/dist/dataset/steps.js +96 -39
  33. package/dist/dataset.d.ts +2 -3
  34. package/dist/dataset.js +73 -51
  35. package/dist/datasetFiles.d.ts +5 -1
  36. package/dist/datasetFiles.js +29 -27
  37. package/dist/defineNotation.tool.d.ts +49 -0
  38. package/dist/defineNotation.tool.js +154 -0
  39. package/dist/domain.d.ts +1 -2
  40. package/dist/domain.js +1 -6
  41. package/dist/executeCommand.tool.d.ts +2 -30
  42. package/dist/executeCommand.tool.js +165 -39
  43. package/dist/file/file-dataset.agent.d.ts +19 -56
  44. package/dist/file/file-dataset.agent.js +182 -136
  45. package/dist/file/file-dataset.steps.d.ts +27 -0
  46. package/dist/file/file-dataset.steps.js +47 -0
  47. package/dist/file/file-dataset.types.d.ts +64 -0
  48. package/dist/file/file-dataset.types.js +1 -0
  49. package/dist/file/filepreview.d.ts +5 -35
  50. package/dist/file/filepreview.js +60 -107
  51. package/dist/file/filepreview.types.d.ts +31 -0
  52. package/dist/file/filepreview.types.js +1 -0
  53. package/dist/file/generateSchema.tool.d.ts +2 -3
  54. package/dist/file/generateSchema.tool.js +11 -15
  55. package/dist/file/index.d.ts +1 -2
  56. package/dist/file/index.js +1 -18
  57. package/dist/file/prompts.d.ts +2 -3
  58. package/dist/file/prompts.js +152 -32
  59. package/dist/file/scripts.generated.d.ts +1 -0
  60. package/dist/file/scripts.generated.js +11 -0
  61. package/dist/file/steps.d.ts +1 -2
  62. package/dist/file/steps.js +9 -7
  63. package/dist/id.d.ts +1 -0
  64. package/dist/id.js +10 -0
  65. package/dist/index.d.ts +9 -7
  66. package/dist/index.js +9 -23
  67. package/dist/materializeDataset.tool.d.ts +35 -28
  68. package/dist/materializeDataset.tool.js +74 -68
  69. package/dist/notation.d.ts +205 -0
  70. package/dist/notation.js +424 -0
  71. package/dist/query/index.d.ts +1 -2
  72. package/dist/query/index.js +1 -18
  73. package/dist/query/queryDomain.d.ts +3 -4
  74. package/dist/query/queryDomain.js +3 -40
  75. package/dist/query/queryDomain.step.d.ts +1 -1
  76. package/dist/query/queryDomain.step.js +24 -13
  77. package/dist/sandbox/steps.d.ts +23 -15
  78. package/dist/sandbox/steps.js +73 -76
  79. package/dist/sandbox.steps.d.ts +1 -2
  80. package/dist/sandbox.steps.js +1 -18
  81. package/dist/schema.d.ts +14 -3
  82. package/dist/schema.js +27 -26
  83. package/dist/service.d.ts +12 -5
  84. package/dist/service.js +88 -15
  85. package/dist/skill.d.ts +0 -1
  86. package/dist/skill.js +12 -17
  87. package/dist/transform/filepreview.d.ts +2 -3
  88. package/dist/transform/filepreview.js +9 -26
  89. package/dist/transform/index.d.ts +2 -3
  90. package/dist/transform/index.js +2 -8
  91. package/dist/transform/prompts.d.ts +1 -34
  92. package/dist/transform/prompts.js +66 -46
  93. package/dist/transform/transform-dataset.agent.d.ts +21 -46
  94. package/dist/transform/transform-dataset.agent.js +152 -93
  95. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  96. package/dist/transform/transform-dataset.steps.js +61 -0
  97. package/dist/transform/transform-dataset.types.d.ts +96 -0
  98. package/dist/transform/transform-dataset.types.js +1 -0
  99. package/dist/transform/transformDataset.d.ts +3 -3
  100. package/dist/transform/transformDataset.js +15 -18
  101. package/dist/writeDatasetRows.tool.d.ts +188 -0
  102. package/dist/writeDatasetRows.tool.js +258 -0
  103. package/package.json +33 -8
  104. package/dist/builder/instructions.d.ts.map +0 -1
  105. package/dist/builder/instructions.js.map +0 -1
  106. package/dist/builder/materialize.d.ts.map +0 -1
  107. package/dist/builder/materialize.js.map +0 -1
  108. package/dist/builder/persistence.d.ts.map +0 -1
  109. package/dist/builder/persistence.js.map +0 -1
  110. package/dist/builder/schemaInference.d.ts.map +0 -1
  111. package/dist/builder/schemaInference.js.map +0 -1
  112. package/dist/builder/sourceRows.d.ts.map +0 -1
  113. package/dist/builder/sourceRows.js.map +0 -1
  114. package/dist/builder/types.d.ts.map +0 -1
  115. package/dist/builder/types.js.map +0 -1
  116. package/dist/clearDataset.tool.d.ts.map +0 -1
  117. package/dist/clearDataset.tool.js.map +0 -1
  118. package/dist/completeDataset.tool.d.ts.map +0 -1
  119. package/dist/completeDataset.tool.js.map +0 -1
  120. package/dist/dataset/steps.d.ts.map +0 -1
  121. package/dist/dataset/steps.js.map +0 -1
  122. package/dist/dataset.d.ts.map +0 -1
  123. package/dist/dataset.js.map +0 -1
  124. package/dist/datasetFiles.d.ts.map +0 -1
  125. package/dist/datasetFiles.js.map +0 -1
  126. package/dist/domain.d.ts.map +0 -1
  127. package/dist/domain.js.map +0 -1
  128. package/dist/eventsReactRuntime.d.ts +0 -22
  129. package/dist/eventsReactRuntime.d.ts.map +0 -1
  130. package/dist/eventsReactRuntime.js +0 -29
  131. package/dist/eventsReactRuntime.js.map +0 -1
  132. package/dist/executeCommand.tool.d.ts.map +0 -1
  133. package/dist/executeCommand.tool.js.map +0 -1
  134. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  135. package/dist/file/file-dataset.agent.js.map +0 -1
  136. package/dist/file/filepreview.d.ts.map +0 -1
  137. package/dist/file/filepreview.js.map +0 -1
  138. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  139. package/dist/file/generateSchema.tool.js.map +0 -1
  140. package/dist/file/index.d.ts.map +0 -1
  141. package/dist/file/index.js.map +0 -1
  142. package/dist/file/prompts.d.ts.map +0 -1
  143. package/dist/file/prompts.js.map +0 -1
  144. package/dist/file/steps.d.ts.map +0 -1
  145. package/dist/file/steps.js.map +0 -1
  146. package/dist/index.d.ts.map +0 -1
  147. package/dist/index.js.map +0 -1
  148. package/dist/materializeDataset.tool.d.ts.map +0 -1
  149. package/dist/materializeDataset.tool.js.map +0 -1
  150. package/dist/query/index.d.ts.map +0 -1
  151. package/dist/query/index.js.map +0 -1
  152. package/dist/query/queryDomain.d.ts.map +0 -1
  153. package/dist/query/queryDomain.js.map +0 -1
  154. package/dist/query/queryDomain.step.d.ts.map +0 -1
  155. package/dist/query/queryDomain.step.js.map +0 -1
  156. package/dist/sandbox/steps.d.ts.map +0 -1
  157. package/dist/sandbox/steps.js.map +0 -1
  158. package/dist/sandbox.steps.d.ts.map +0 -1
  159. package/dist/sandbox.steps.js.map +0 -1
  160. package/dist/schema.d.ts.map +0 -1
  161. package/dist/schema.js.map +0 -1
  162. package/dist/service.d.ts.map +0 -1
  163. package/dist/service.js.map +0 -1
  164. package/dist/skill.d.ts.map +0 -1
  165. package/dist/skill.js.map +0 -1
  166. package/dist/transform/filepreview.d.ts.map +0 -1
  167. package/dist/transform/filepreview.js.map +0 -1
  168. package/dist/transform/index.d.ts.map +0 -1
  169. package/dist/transform/index.js.map +0 -1
  170. package/dist/transform/prompts.d.ts.map +0 -1
  171. package/dist/transform/prompts.js.map +0 -1
  172. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  173. package/dist/transform/transform-dataset.agent.js.map +0 -1
  174. package/dist/transform/transformDataset.d.ts.map +0 -1
  175. package/dist/transform/transformDataset.js.map +0 -1
@@ -0,0 +1,12 @@
1
+ import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalDatasetResource } from "./types.js";
2
+ export declare function materializeQueryResource<Runtime extends AnyDatasetRuntime>(runtime: DatasetBuilderState<Runtime>["runtime"], resource: Extract<InternalDatasetResource, {
3
+ kind: "query";
4
+ }>, params: {
5
+ datasetId: string;
6
+ sandboxId?: string;
7
+ schema?: DatasetSchemaInput;
8
+ title?: string;
9
+ instructions?: string;
10
+ first?: boolean;
11
+ contextId: string;
12
+ }): Promise<string>;
@@ -0,0 +1,31 @@
1
+ import { materializeRowsToDataset } from "./persistence.js";
2
+ import { getDomainDescriptor, normalizeQueryRows } from "./rows.js";
3
+ async function readQueryResourceRowsStep(params) {
4
+ "use step";
5
+ const db = await params.runtime.db();
6
+ const result = await db.query(params.query);
7
+ return { rows: normalizeQueryRows(result) };
8
+ }
9
+ export async function materializeQueryResource(runtime, resource, params) {
10
+ const { rows } = await readQueryResourceRowsStep({
11
+ runtime,
12
+ query: resource.query,
13
+ });
14
+ const domainDescriptor = getDomainDescriptor(resource.domain);
15
+ return await materializeRowsToDataset(runtime, {
16
+ datasetId: params.datasetId,
17
+ sandboxId: params.sandboxId,
18
+ title: params.title ?? resource.title,
19
+ instructions: params.instructions,
20
+ contextId: params.contextId,
21
+ analysis: {
22
+ query: resource.query,
23
+ explanation: resource.explanation,
24
+ ...domainDescriptor,
25
+ },
26
+ rows,
27
+ schema: params.schema,
28
+ inferSchema: !params.schema,
29
+ first: params.first,
30
+ });
31
+ }
@@ -1,18 +1,22 @@
1
- import type { AnyDatasetRuntime, DatasetBuildResult, DatasetTextSourceInput, MaterializeRowsParams } from "./types";
2
- export declare function defaultTextSourceName(source: DatasetTextSourceInput): string;
1
+ import type { AnyDatasetRuntime, DatasetBuildResult, DatasetTextResourceInput, MaterializeRowsParams } from "./types.js";
2
+ export declare function defaultTextResourceName(resource: DatasetTextResourceInput): string;
3
3
  export declare function getDatasetDb<Runtime extends AnyDatasetRuntime>(runtime: Runtime): Promise<any>;
4
4
  export declare function createOrUpdateDatasetMetadata<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: {
5
5
  datasetId: string;
6
6
  sandboxId?: string;
7
7
  title?: string;
8
8
  instructions?: string;
9
- sources: any[];
10
- sourceKinds: string[];
9
+ contextId: string;
11
10
  analysis?: any;
12
11
  schema?: any;
13
12
  status?: string;
14
13
  }): Promise<void>;
15
14
  export declare function materializeRowsToDataset<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: MaterializeRowsParams): Promise<string>;
16
- export declare function uploadInlineTextSource<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, source: DatasetTextSourceInput): Promise<string>;
15
+ export declare function uploadInlineTextResource<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, resource: DatasetTextResourceInput): Promise<string>;
17
16
  export declare function finalizeBuildResult<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, withFirst: boolean): Promise<DatasetBuildResult>;
18
- //# sourceMappingURL=persistence.d.ts.map
17
+ export declare function createDatasetBuildResult<Runtime extends AnyDatasetRuntime>(runtime: Runtime, params: {
18
+ datasetId: string;
19
+ dataset: any;
20
+ previewRows: any[];
21
+ firstRow?: any | null;
22
+ }): DatasetBuildResult;
@@ -1,41 +1,38 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.defaultTextSourceName = defaultTextSourceName;
4
- exports.getDatasetDb = getDatasetDb;
5
- exports.createOrUpdateDatasetMetadata = createOrUpdateDatasetMetadata;
6
- exports.materializeRowsToDataset = materializeRowsToDataset;
7
- exports.uploadInlineTextSource = uploadInlineTextSource;
8
- exports.finalizeBuildResult = finalizeBuildResult;
9
- const service_1 = require("../service");
10
- const schema_1 = require("../schema");
11
- const schemaInference_1 = require("./schemaInference");
12
- const sourceRows_1 = require("./sourceRows");
13
- function defaultTextSourceName(source) {
14
- if (source.name?.trim())
15
- return source.name.trim();
16
- const mimeType = String(source.mimeType ?? "").toLowerCase();
1
+ import { DatasetService } from "../service.js";
2
+ import { datasetDomain } from "../schema.js";
3
+ import { annotateNotationEvidence, inferQueryNotation, } from "../notation.js";
4
+ import { datasetGetByIdStep, datasetPreviewRowsStep, datasetReadOneStep, datasetReadRowsStep, } from "../dataset/steps.js";
5
+ import { inferDatasetSchema, validateRows } from "./schemaInference.js";
6
+ import { rowsToJsonl } from "./rows.js";
7
+ export function defaultTextResourceName(resource) {
8
+ if (resource.name?.trim())
9
+ return resource.name.trim();
10
+ const mimeType = String(resource.mimeType ?? "").toLowerCase();
17
11
  if (mimeType.includes("csv"))
18
- return "source.csv";
12
+ return "resource.csv";
19
13
  if (mimeType.includes("json"))
20
- return "source.json";
14
+ return "resource.json";
21
15
  if (mimeType.includes("yaml") || mimeType.includes("yml"))
22
- return "source.yaml";
23
- return "source.txt";
16
+ return "resource.yaml";
17
+ return "resource.txt";
24
18
  }
25
- async function getDatasetDb(runtime) {
26
- const scoped = await runtime.use(schema_1.datasetDomain);
19
+ export async function getDatasetDb(runtime) {
20
+ const scoped = await runtime.use(datasetDomain);
27
21
  return scoped.db;
28
22
  }
29
- async function createOrUpdateDatasetMetadata(runtime, params) {
23
+ export async function createOrUpdateDatasetMetadata(runtime, params) {
24
+ "use step";
25
+ if (!params.contextId.trim()) {
26
+ throw new Error("dataset_context_required");
27
+ }
30
28
  const db = await getDatasetDb(runtime);
31
- const service = new service_1.DatasetService(db);
29
+ const service = new DatasetService(db);
32
30
  const result = await service.createDataset({
33
31
  id: params.datasetId,
34
32
  sandboxId: params.sandboxId,
35
33
  title: params.title ?? params.datasetId,
36
34
  instructions: params.instructions ?? "",
37
- sources: params.sources,
38
- sourceKinds: params.sourceKinds,
35
+ contextId: params.contextId,
39
36
  analysis: params.analysis,
40
37
  schema: params.schema,
41
38
  status: params.status ?? "building",
@@ -45,29 +42,29 @@ async function createOrUpdateDatasetMetadata(runtime, params) {
45
42
  throw new Error(result.error);
46
43
  }
47
44
  }
48
- async function materializeRowsToDataset(runtime, params) {
45
+ export async function materializeRowsToDataset(runtime, params) {
46
+ "use step";
49
47
  if (params.first && params.rows.length > 1) {
50
48
  throw new Error("dataset_first_expected_zero_or_one_row");
51
49
  }
52
50
  const resolvedSchema = params.schema ??
53
- (0, schemaInference_1.inferDatasetSchema)(params.rows, params.title ? `${params.title}Row` : "DatasetRow", params.title ? `One row for ${params.title}` : "One dataset row");
54
- (0, schemaInference_1.validateRows)(params.rows, resolvedSchema);
51
+ inferDatasetSchema(params.rows, params.title ? `${params.title}Row` : "DatasetRow", params.title ? `One row for ${params.title}` : "One dataset row");
52
+ validateRows(params.rows, resolvedSchema);
55
53
  await createOrUpdateDatasetMetadata(runtime, {
56
54
  datasetId: params.datasetId,
57
55
  sandboxId: params.sandboxId,
58
56
  title: params.title,
59
57
  instructions: params.instructions,
60
- sources: params.sources,
61
- sourceKinds: params.sourceKinds,
58
+ contextId: params.contextId,
62
59
  analysis: params.analysis,
63
60
  schema: resolvedSchema,
64
61
  status: "building",
65
62
  });
66
63
  const db = await getDatasetDb(runtime);
67
- const service = new service_1.DatasetService(db);
64
+ const service = new DatasetService(db);
68
65
  const uploadResult = await service.uploadDatasetOutputFile({
69
66
  datasetId: params.datasetId,
70
- fileBuffer: Buffer.from((0, sourceRows_1.rowsToJsonl)(params.rows), "utf-8"),
67
+ fileBuffer: Buffer.from(rowsToJsonl(params.rows), "utf-8"),
71
68
  });
72
69
  if (!uploadResult.ok) {
73
70
  throw new Error(uploadResult.error);
@@ -81,67 +78,115 @@ async function materializeRowsToDataset(runtime, params) {
81
78
  if (!statusResult.ok) {
82
79
  throw new Error(statusResult.error);
83
80
  }
81
+ // Formal notation, informative only (never blocks the build): a notation
82
+ // proposed during the build (agent iterations) gets advisory evidence
83
+ // against the materialized rows; query-backed builds with no proposed
84
+ // notation get the deterministic one derived from query + schema + rows.
85
+ try {
86
+ const existing = await service.getDatasetById(params.datasetId);
87
+ const previous = (existing.ok ? existing.data?.notation : null);
88
+ const analysis = (params.analysis ?? {});
89
+ const queryNotation = analysis.query && typeof analysis.query === "object"
90
+ ? inferQueryNotation({
91
+ entityNames: Object.keys(analysis.query),
92
+ rowCount: params.rows.length,
93
+ schema: resolvedSchema,
94
+ explanation: typeof analysis.explanation === "string" ? analysis.explanation : undefined,
95
+ })
96
+ : null;
97
+ // Query-backed builds are deterministic, so a freshly inferred notation
98
+ // always wins (a prior run's notation would be stale). Only agent-built
99
+ // datasets (no query) keep the notation the agent proposed during the
100
+ // build, which by now is the latest `previous`.
101
+ const candidate = queryNotation ??
102
+ (previous && Array.isArray(previous.predicates) && previous.predicates.length > 0
103
+ ? previous
104
+ : null);
105
+ if (candidate) {
106
+ await service.updateDatasetNotation({
107
+ datasetId: params.datasetId,
108
+ notation: annotateNotationEvidence(candidate, params.rows),
109
+ });
110
+ }
111
+ }
112
+ catch {
113
+ // notation must never affect the build result
114
+ }
84
115
  return params.datasetId;
85
116
  }
86
- async function uploadInlineTextSource(runtime, datasetId, source) {
117
+ export async function uploadInlineTextResource(runtime, datasetId, resource) {
118
+ "use step";
87
119
  const db = await getDatasetDb(runtime);
88
- const fileName = defaultTextSourceName(source);
89
- const storagePath = `/dataset/source/${datasetId}/${Date.now()}-${fileName}`;
90
- const uploadResult = await db.storage.uploadFile(storagePath, Buffer.from(source.text, "utf-8"), {
91
- contentType: source.mimeType ?? "text/plain",
120
+ const fileName = defaultTextResourceName(resource);
121
+ const storagePath = `/dataset/resource/${datasetId}/${Date.now()}-${fileName}`;
122
+ const uploadResult = await db.storage.uploadFile(storagePath, Buffer.from(resource.text, "utf-8"), {
123
+ contentType: resource.mimeType ?? "text/plain",
92
124
  contentDisposition: fileName,
93
125
  });
94
126
  const fileId = uploadResult?.data?.id;
95
127
  if (!fileId) {
96
- throw new Error("dataset_text_source_upload_failed");
128
+ throw new Error("dataset_text_resource_upload_failed");
97
129
  }
98
130
  return fileId;
99
131
  }
100
- async function finalizeBuildResult(runtime, datasetId, withFirst) {
101
- const db = await getDatasetDb(runtime);
102
- const service = new service_1.DatasetService(db);
103
- const datasetResult = await service.getDatasetById(datasetId);
104
- if (!datasetResult.ok) {
132
+ export async function finalizeBuildResult(runtime, datasetId, withFirst) {
133
+ const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
134
+ if (!datasetResult.ok)
105
135
  throw new Error(datasetResult.error);
106
- }
107
- const previewResult = await service.previewRows(datasetId, 20);
108
- if (!previewResult.ok) {
109
- throw new Error(previewResult.error);
110
- }
136
+ const previewResult = await datasetPreviewRowsStep({ runtime, datasetId, limit: 20 });
111
137
  const reader = {
112
138
  async read(cursorOrParams, limit) {
113
139
  const params = typeof cursorOrParams === "object" && cursorOrParams !== null
114
140
  ? cursorOrParams
115
141
  : { cursor: cursorOrParams, limit };
116
- const rowsResult = await service.readRows({
142
+ return await datasetReadRowsStep({
143
+ runtime,
117
144
  datasetId,
118
145
  cursor: params.cursor,
119
146
  limit: params.limit,
120
147
  });
121
- if (!rowsResult.ok) {
122
- throw new Error(rowsResult.error);
123
- }
124
- return rowsResult.data;
125
148
  },
126
149
  };
150
+ const notation = (datasetResult.data?.notation ?? null);
127
151
  if (!withFirst) {
128
152
  return {
129
153
  datasetId,
130
154
  dataset: datasetResult.data,
131
- previewRows: previewResult.data,
155
+ notation,
156
+ previewRows: previewResult.rows,
132
157
  reader,
133
158
  };
134
159
  }
135
- const firstResult = await service.readOne(datasetId);
136
- if (!firstResult.ok) {
137
- throw new Error(firstResult.error);
138
- }
160
+ const firstResult = await datasetReadOneStep({ runtime, datasetId });
139
161
  return {
140
162
  datasetId,
141
163
  dataset: datasetResult.data,
142
- previewRows: previewResult.data,
164
+ notation,
165
+ previewRows: previewResult.rows,
166
+ reader,
167
+ firstRow: firstResult.row,
168
+ };
169
+ }
170
+ export function createDatasetBuildResult(runtime, params) {
171
+ const reader = {
172
+ async read(cursorOrParams, limit) {
173
+ const readParams = typeof cursorOrParams === "object" && cursorOrParams !== null
174
+ ? cursorOrParams
175
+ : { cursor: cursorOrParams, limit };
176
+ return await datasetReadRowsStep({
177
+ runtime,
178
+ datasetId: params.datasetId,
179
+ cursor: readParams.cursor,
180
+ limit: readParams.limit,
181
+ });
182
+ },
183
+ };
184
+ return {
185
+ datasetId: params.datasetId,
186
+ dataset: params.dataset,
187
+ notation: (params.dataset?.notation ?? null),
188
+ previewRows: params.previewRows,
143
189
  reader,
144
- firstRow: firstResult.data,
190
+ ...(params.firstRow !== undefined ? { firstRow: params.firstRow } : {}),
145
191
  };
146
192
  }
147
- //# sourceMappingURL=persistence.js.map
@@ -5,4 +5,3 @@ export declare function getDomainDescriptor(domain: DomainSchemaResult): {
5
5
  domainPackageName?: string | undefined;
6
6
  domainName: string;
7
7
  };
8
- //# sourceMappingURL=sourceRows.d.ts.map
@@ -1,9 +1,4 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.rowsToJsonl = rowsToJsonl;
4
- exports.normalizeQueryRows = normalizeQueryRows;
5
- exports.getDomainDescriptor = getDomainDescriptor;
6
- function rowsToJsonl(rows) {
1
+ export function rowsToJsonl(rows) {
7
2
  return rows
8
3
  .map((row) => JSON.stringify({
9
4
  type: "row",
@@ -12,7 +7,7 @@ function rowsToJsonl(rows) {
12
7
  .join("\n")
13
8
  .concat(rows.length > 0 ? "\n" : "");
14
9
  }
15
- function normalizeQueryRows(result) {
10
+ export function normalizeQueryRows(result) {
16
11
  if (!result || typeof result !== "object")
17
12
  return [];
18
13
  const entries = Object.entries(result);
@@ -49,7 +44,7 @@ function normalizeQueryRows(result) {
49
44
  }
50
45
  return rows;
51
46
  }
52
- function getDomainDescriptor(domain) {
47
+ export function getDomainDescriptor(domain) {
53
48
  const meta = domain?.meta ?? {};
54
49
  const context = typeof domain?.context === "function" ? domain.context() : {};
55
50
  const name = String(meta?.name ?? context?.name ?? "domain");
@@ -59,4 +54,3 @@ function getDomainDescriptor(domain) {
59
54
  ...(packageName ? { domainPackageName: packageName } : {}),
60
55
  };
61
56
  }
62
- //# sourceMappingURL=sourceRows.js.map
@@ -1,4 +1,3 @@
1
- import type { DatasetSchemaInput } from "./types";
1
+ import type { DatasetSchemaInput } from "./types.js";
2
2
  export declare function inferDatasetSchema(rows: any[], title?: string, description?: string): DatasetSchemaInput;
3
3
  export declare function validateRows(rows: any[], schema: DatasetSchemaInput): void;
4
- //# sourceMappingURL=schemaInference.d.ts.map
@@ -1,12 +1,5 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.inferDatasetSchema = inferDatasetSchema;
7
- exports.validateRows = validateRows;
8
- const ajv_1 = __importDefault(require("ajv"));
9
- const ajv = new ajv_1.default({ allErrors: true, strict: false });
1
+ import Ajv from "ajv";
2
+ const ajv = new Ajv({ allErrors: true, strict: false });
10
3
  function inferJsonSchemaType(value) {
11
4
  if (value === null)
12
5
  return { type: "null" };
@@ -23,7 +16,7 @@ function inferJsonSchemaType(value) {
23
16
  return { type: "string" };
24
17
  }
25
18
  }
26
- function inferDatasetSchema(rows, title = "DatasetRow", description = "One dataset row") {
19
+ export function inferDatasetSchema(rows, title = "DatasetRow", description = "One dataset row") {
27
20
  const properties = {};
28
21
  const required = [];
29
22
  const keys = new Set();
@@ -56,7 +49,7 @@ function inferDatasetSchema(rows, title = "DatasetRow", description = "One datas
56
49
  },
57
50
  };
58
51
  }
59
- function validateRows(rows, schema) {
52
+ export function validateRows(rows, schema) {
60
53
  const validator = ajv.compile(schema.schema);
61
54
  for (const row of rows) {
62
55
  const valid = validator(row);
@@ -66,4 +59,3 @@ function validateRows(rows, schema) {
66
59
  }
67
60
  }
68
61
  }
69
- //# sourceMappingURL=schemaInference.js.map
@@ -1,38 +1,46 @@
1
1
  import type { InstaQLParams, ValidQuery } from "@instantdb/core";
2
2
  import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
3
3
  import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
4
- import type { ContextReactor } from "@ekairos/events";
5
- import { datasetDomain } from "../schema";
6
- export type DatasetQuerySourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
4
+ import type { ContextIdentifier, StoredContextResource } from "@ekairos/events";
5
+ import type { ContextReactor } from "@ekairos/reactor/context";
6
+ import { datasetDomain } from "../schema.js";
7
+ import type { DatasetNotation } from "../notation.js";
8
+ export type DatasetQueryResourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
7
9
  query: InstaQLParams<DomainInstantSchema<D>>;
8
10
  title?: string;
9
11
  explanation?: string;
10
12
  domain: D;
11
13
  };
12
- export type DatasetFileSourceInput = {
14
+ export type DatasetFileResourceInput = {
13
15
  fileId: string;
14
16
  description?: string;
17
+ filename?: string;
18
+ mediaType?: string;
15
19
  };
16
- export type DatasetTextSourceInput = {
20
+ export type DatasetTextResourceInput = {
17
21
  text: string;
18
22
  mimeType?: string;
19
23
  name?: string;
20
24
  description?: string;
21
25
  };
22
- export type DatasetExistingSourceInput = {
26
+ export type DatasetExistingResourceInput = {
23
27
  datasetId: string;
24
28
  description?: string;
25
29
  };
26
- export type DatasetFileSource = {
30
+ export type DatasetContextResourceInput = ContextIdentifier;
31
+ export type DatasetFileResource = {
27
32
  kind: "file";
28
- } & DatasetFileSourceInput;
29
- export type DatasetTextSource = {
33
+ } & DatasetFileResourceInput;
34
+ export type DatasetTextResource = {
30
35
  kind: "text";
31
- } & DatasetTextSourceInput;
32
- export type DatasetExistingSource = {
36
+ } & DatasetTextResourceInput;
37
+ export type DatasetExistingResource = {
33
38
  kind: "dataset";
34
- } & DatasetExistingSourceInput;
35
- export type DatasetSourceInput = DatasetFileSourceInput | DatasetTextSourceInput | DatasetExistingSourceInput | DatasetFileSource | DatasetTextSource | DatasetExistingSource;
39
+ } & DatasetExistingResourceInput;
40
+ export type DatasetContextResource = {
41
+ kind: "context";
42
+ } & DatasetContextResourceInput;
43
+ export type DatasetResourceInput = DatasetFileResourceInput | DatasetTextResourceInput | DatasetExistingResourceInput | DatasetContextResourceInput | DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource;
36
44
  export type DatasetSchemaInput = {
37
45
  title?: string;
38
46
  description?: string;
@@ -42,13 +50,15 @@ export type DatasetOutput = "rows" | "object";
42
50
  export type DatasetMode = "auto" | "schema";
43
51
  export type DatasetBuilderOptions = {
44
52
  datasetId?: string;
53
+ durable?: boolean;
45
54
  };
46
55
  export type DatasetBuildOptions = {
47
56
  datasetId?: string;
57
+ durable?: boolean;
48
58
  };
49
- export type InternalSource = DatasetFileSource | DatasetTextSource | DatasetExistingSource | ({
59
+ export type InternalDatasetResource = DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource | ({
50
60
  kind: "query";
51
- } & DatasetQuerySourceInput);
61
+ } & DatasetQueryResourceInput);
52
62
  export type DatasetReaderResult = {
53
63
  rows: any[];
54
64
  cursor: number;
@@ -64,6 +74,9 @@ export type DatasetReader = {
64
74
  export type DatasetBuildResult = {
65
75
  datasetId: string;
66
76
  dataset: any;
77
+ /** the formal definition (intensional face), co-equal with the rows */
78
+ notation: DatasetNotation | null;
79
+ /** preview of the materialization (extensional face) */
67
80
  previewRows: any[];
68
81
  reader: DatasetReader;
69
82
  object?: any | null;
@@ -74,8 +87,8 @@ export type DatasetRuntimeEnv = {
74
87
  };
75
88
  export type AnyDatasetRuntime = EkairosRuntime<any, any, any>;
76
89
  export type DatasetRuntimeHandle<Runtime extends AnyDatasetRuntime> = RuntimeForDomain<Runtime, typeof datasetDomain>;
77
- export type CompatibleSourceDomain<Runtime extends AnyDatasetRuntime, D extends DomainSchemaResult> = RuntimeForDomain<Runtime, D> extends never ? never : D;
78
- export type DatasetQuerySourceOptions<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>> = {
90
+ export type CompatibleQueryDomain<Runtime extends AnyDatasetRuntime, D extends DomainSchemaResult> = RuntimeForDomain<Runtime, D> extends never ? never : D;
91
+ export type DatasetQueryResourceOptions<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>> = {
79
92
  query: Q;
80
93
  title?: string;
81
94
  explanation?: string;
@@ -83,14 +96,17 @@ export type DatasetQuerySourceOptions<D extends DomainSchemaResult, Q extends Va
83
96
  export type DatasetBuilderState<Runtime extends AnyDatasetRuntime> = {
84
97
  runtime: Runtime;
85
98
  env: Runtime["env"] & DatasetRuntimeEnv;
86
- sources: InternalSource[];
99
+ resources: InternalDatasetResource[];
100
+ contextResources?: StoredContextResource[];
87
101
  title?: string;
88
102
  sandboxId?: string;
103
+ contextId?: string;
89
104
  outputSchema?: DatasetSchemaInput;
90
105
  output: DatasetOutput;
91
106
  inferSchema: boolean;
92
107
  instructions?: string;
93
108
  reactor?: ContextReactor<any, any>;
109
+ durable?: boolean;
94
110
  first: boolean;
95
111
  };
96
112
  export type MaterializeRowsParams = {
@@ -98,8 +114,7 @@ export type MaterializeRowsParams = {
98
114
  sandboxId?: string;
99
115
  title?: string;
100
116
  instructions?: string;
101
- sources: any[];
102
- sourceKinds: string[];
117
+ contextId: string;
103
118
  analysis?: any;
104
119
  rows: any[];
105
120
  schema?: DatasetSchemaInput;
@@ -108,11 +123,12 @@ export type MaterializeRowsParams = {
108
123
  };
109
124
  export type DatasetBuilder<Runtime extends AnyDatasetRuntime> = {
110
125
  readonly datasetId: string;
111
- fromFile(source: DatasetFileSourceInput): DatasetBuilder<Runtime>;
112
- fromText(source: DatasetTextSourceInput): DatasetBuilder<Runtime>;
113
- fromDataset(source: DatasetExistingSourceInput): DatasetBuilder<Runtime>;
114
- from(...sources: DatasetSourceInput[]): DatasetBuilder<Runtime>;
115
- fromQuery<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>>(domain: D & CompatibleSourceDomain<Runtime, D>, source: DatasetQuerySourceOptions<D, Q>): DatasetBuilder<Runtime>;
126
+ fromFile(resource: DatasetFileResourceInput): DatasetBuilder<Runtime>;
127
+ fromText(resource: DatasetTextResourceInput): DatasetBuilder<Runtime>;
128
+ fromDataset(resource: DatasetExistingResourceInput): DatasetBuilder<Runtime>;
129
+ fromContext(context: DatasetContextResourceInput): DatasetBuilder<Runtime>;
130
+ from(...resources: DatasetResourceInput[]): DatasetBuilder<Runtime>;
131
+ fromQuery<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>>(domain: D & CompatibleQueryDomain<Runtime, D>, resource: DatasetQueryResourceOptions<D, Q>): DatasetBuilder<Runtime>;
116
132
  title(title: string): DatasetBuilder<Runtime>;
117
133
  sandbox(input: {
118
134
  sandboxId: string;
@@ -127,4 +143,3 @@ export type DatasetBuilder<Runtime extends AnyDatasetRuntime> = {
127
143
  first(): DatasetBuilder<Runtime>;
128
144
  build(options?: DatasetBuildOptions): Promise<DatasetBuildResult>;
129
145
  };
130
- //# sourceMappingURL=types.d.ts.map
@@ -1,3 +1 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- //# sourceMappingURL=types.js.map
1
+ export {};
@@ -1,9 +1,9 @@
1
1
  interface ClearDatasetToolParams {
2
2
  datasetId: string;
3
3
  sandboxId: string;
4
- env?: any;
4
+ runtime: any;
5
5
  }
6
- export declare function createClearDatasetTool({ datasetId, sandboxId, env }: ClearDatasetToolParams): import("ai").Tool<{
6
+ export declare function createClearDatasetTool({ datasetId, sandboxId, runtime }: ClearDatasetToolParams): import("ai").Tool<{
7
7
  reason: string;
8
8
  }, {
9
9
  success: boolean;
@@ -17,4 +17,3 @@ export declare function createClearDatasetTool({ datasetId, sandboxId, env }: Cl
17
17
  error?: undefined;
18
18
  }>;
19
19
  export {};
20
- //# sourceMappingURL=clearDataset.tool.d.ts.map
@@ -1,27 +1,24 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.createClearDatasetTool = createClearDatasetTool;
4
- const ai_1 = require("ai");
5
- const zod_1 = require("zod");
6
- const steps_1 = require("./sandbox/steps");
7
- const datasetFiles_1 = require("./datasetFiles");
8
- const steps_2 = require("./dataset/steps");
9
- function createClearDatasetTool({ datasetId, sandboxId, env }) {
10
- return (0, ai_1.tool)({
1
+ import { tool } from "ai";
2
+ import { z } from "zod";
3
+ import { runDatasetSandboxCommandStep } from "./sandbox/steps.js";
4
+ import { getDatasetOutputPath } from "./datasetFiles.js";
5
+ import { datasetClearStep } from "./dataset/steps.js";
6
+ export function createClearDatasetTool({ datasetId, sandboxId, runtime }) {
7
+ return tool({
11
8
  description: "Clear all dataset records and output files. This will delete all generated data and reset the dataset to its initial state.",
12
- inputSchema: zod_1.z.object({
13
- reason: zod_1.z.string().describe("The reason for clearing the dataset"),
9
+ inputSchema: z.object({
10
+ reason: z.string().describe("The reason for clearing the dataset"),
14
11
  }),
15
12
  execute: async ({ reason }) => {
16
13
  console.log(`[Dataset ${datasetId}] ========================================`);
17
14
  console.log(`[Dataset ${datasetId}] Tool: clearDataset`);
18
15
  console.log(`[Dataset ${datasetId}] Reason: ${reason}`);
19
16
  console.log(`[Dataset ${datasetId}] ========================================`);
20
- const outputPath = (0, datasetFiles_1.getDatasetOutputPath)(datasetId);
17
+ const outputPath = getDatasetOutputPath(datasetId);
21
18
  console.log(`[Dataset ${datasetId}] Step 1: Deleting output file`);
22
19
  try {
23
- const result = await (0, steps_1.runDatasetSandboxCommandStep)({
24
- env,
20
+ const result = await runDatasetSandboxCommandStep({
21
+ runtime,
25
22
  sandboxId,
26
23
  cmd: "rm",
27
24
  args: ["-f", outputPath],
@@ -38,7 +35,7 @@ function createClearDatasetTool({ datasetId, sandboxId, env }) {
38
35
  console.warn(`[Dataset ${datasetId}] Error deleting output file: ${message}`);
39
36
  }
40
37
  console.log(`[Dataset ${datasetId}] Step 2: Clearing dataset records`);
41
- const clearResult = await (0, steps_2.datasetClearStep)({ env, datasetId });
38
+ const clearResult = await datasetClearStep({ runtime, datasetId });
42
39
  if (!clearResult.ok) {
43
40
  console.error(`[Dataset ${datasetId}] Failed to clear dataset: ${clearResult.error}`);
44
41
  return {
@@ -58,4 +55,3 @@ function createClearDatasetTool({ datasetId, sandboxId, env }) {
58
55
  },
59
56
  });
60
57
  }
61
- //# sourceMappingURL=clearDataset.tool.js.map