@ekairos/dataset 1.22.39-beta.development.0 → 1.22.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/README.md +347 -0
  2. package/dist/agents.d.ts +8 -0
  3. package/dist/agents.js +8 -0
  4. package/dist/builder/agentMaterializers.d.ts +9 -0
  5. package/dist/builder/agentMaterializers.js +10 -0
  6. package/dist/builder/context.d.ts +15 -0
  7. package/dist/builder/context.js +251 -0
  8. package/dist/builder/instructions.d.ts +5 -0
  9. package/dist/builder/instructions.js +40 -0
  10. package/dist/builder/materialize.d.ts +83 -0
  11. package/dist/builder/materialize.js +548 -0
  12. package/dist/builder/materializeQuery.d.ts +12 -0
  13. package/dist/builder/materializeQuery.js +31 -0
  14. package/dist/builder/persistence.d.ts +22 -0
  15. package/dist/builder/persistence.js +192 -0
  16. package/dist/builder/rows.d.ts +7 -0
  17. package/dist/builder/rows.js +56 -0
  18. package/dist/builder/schemaInference.d.ts +3 -0
  19. package/dist/builder/schemaInference.js +61 -0
  20. package/dist/builder/types.d.ts +144 -0
  21. package/dist/builder/types.js +1 -0
  22. package/dist/clearDataset.tool.d.ts +2 -3
  23. package/dist/clearDataset.tool.js +13 -17
  24. package/dist/completeDataset.steps.d.ts +117 -0
  25. package/dist/completeDataset.steps.js +537 -0
  26. package/dist/completeDataset.tool.d.ts +132 -7
  27. package/dist/completeDataset.tool.js +46 -192
  28. package/dist/contextResources.d.ts +31 -0
  29. package/dist/contextResources.js +151 -0
  30. package/dist/contextWorkspace.d.ts +79 -0
  31. package/dist/contextWorkspace.js +234 -0
  32. package/dist/dataset/steps.d.ts +39 -15
  33. package/dist/dataset/steps.js +96 -39
  34. package/dist/dataset.d.ts +3 -67
  35. package/dist/dataset.js +129 -521
  36. package/dist/datasetFiles.d.ts +5 -1
  37. package/dist/datasetFiles.js +29 -27
  38. package/dist/defineNotation.tool.d.ts +49 -0
  39. package/dist/defineNotation.tool.js +154 -0
  40. package/dist/domain.d.ts +1 -2
  41. package/dist/domain.js +1 -6
  42. package/dist/executeCommand.tool.d.ts +2 -30
  43. package/dist/executeCommand.tool.js +165 -39
  44. package/dist/file/file-dataset.agent.d.ts +19 -56
  45. package/dist/file/file-dataset.agent.js +181 -134
  46. package/dist/file/file-dataset.steps.d.ts +27 -0
  47. package/dist/file/file-dataset.steps.js +47 -0
  48. package/dist/file/file-dataset.types.d.ts +64 -0
  49. package/dist/file/file-dataset.types.js +1 -0
  50. package/dist/file/filepreview.d.ts +5 -35
  51. package/dist/file/filepreview.js +60 -107
  52. package/dist/file/filepreview.types.d.ts +31 -0
  53. package/dist/file/filepreview.types.js +1 -0
  54. package/dist/file/generateSchema.tool.d.ts +2 -3
  55. package/dist/file/generateSchema.tool.js +11 -15
  56. package/dist/file/index.d.ts +1 -2
  57. package/dist/file/index.js +1 -18
  58. package/dist/file/prompts.d.ts +2 -3
  59. package/dist/file/prompts.js +152 -32
  60. package/dist/file/scripts.generated.d.ts +1 -0
  61. package/dist/file/scripts.generated.js +11 -0
  62. package/dist/file/steps.d.ts +1 -2
  63. package/dist/file/steps.js +9 -7
  64. package/dist/id.d.ts +1 -0
  65. package/dist/id.js +10 -0
  66. package/dist/index.d.ts +9 -7
  67. package/dist/index.js +9 -23
  68. package/dist/materializeDataset.tool.d.ts +51 -31
  69. package/dist/materializeDataset.tool.js +81 -65
  70. package/dist/notation.d.ts +205 -0
  71. package/dist/notation.js +424 -0
  72. package/dist/query/index.d.ts +1 -2
  73. package/dist/query/index.js +1 -18
  74. package/dist/query/queryDomain.d.ts +3 -4
  75. package/dist/query/queryDomain.js +3 -40
  76. package/dist/query/queryDomain.step.d.ts +1 -1
  77. package/dist/query/queryDomain.step.js +24 -13
  78. package/dist/sandbox/steps.d.ts +23 -15
  79. package/dist/sandbox/steps.js +73 -76
  80. package/dist/sandbox.steps.d.ts +1 -2
  81. package/dist/sandbox.steps.js +1 -18
  82. package/dist/schema.d.ts +15 -13
  83. package/dist/schema.js +27 -37
  84. package/dist/service.d.ts +12 -5
  85. package/dist/service.js +88 -15
  86. package/dist/skill.d.ts +0 -1
  87. package/dist/skill.js +12 -17
  88. package/dist/transform/filepreview.d.ts +2 -3
  89. package/dist/transform/filepreview.js +9 -26
  90. package/dist/transform/index.d.ts +2 -3
  91. package/dist/transform/index.js +2 -8
  92. package/dist/transform/prompts.d.ts +1 -34
  93. package/dist/transform/prompts.js +66 -46
  94. package/dist/transform/transform-dataset.agent.d.ts +20 -45
  95. package/dist/transform/transform-dataset.agent.js +151 -91
  96. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  97. package/dist/transform/transform-dataset.steps.js +61 -0
  98. package/dist/transform/transform-dataset.types.d.ts +95 -0
  99. package/dist/transform/transform-dataset.types.js +1 -0
  100. package/dist/transform/transformDataset.d.ts +3 -3
  101. package/dist/transform/transformDataset.js +15 -18
  102. package/dist/writeDatasetRows.tool.d.ts +188 -0
  103. package/dist/writeDatasetRows.tool.js +258 -0
  104. package/package.json +33 -8
  105. package/dist/clearDataset.tool.d.ts.map +0 -1
  106. package/dist/clearDataset.tool.js.map +0 -1
  107. package/dist/completeDataset.tool.d.ts.map +0 -1
  108. package/dist/completeDataset.tool.js.map +0 -1
  109. package/dist/dataset/steps.d.ts.map +0 -1
  110. package/dist/dataset/steps.js.map +0 -1
  111. package/dist/dataset.d.ts.map +0 -1
  112. package/dist/dataset.js.map +0 -1
  113. package/dist/datasetFiles.d.ts.map +0 -1
  114. package/dist/datasetFiles.js.map +0 -1
  115. package/dist/domain.d.ts.map +0 -1
  116. package/dist/domain.js.map +0 -1
  117. package/dist/eventsReactRuntime.d.ts +0 -22
  118. package/dist/eventsReactRuntime.d.ts.map +0 -1
  119. package/dist/eventsReactRuntime.js +0 -29
  120. package/dist/eventsReactRuntime.js.map +0 -1
  121. package/dist/executeCommand.tool.d.ts.map +0 -1
  122. package/dist/executeCommand.tool.js.map +0 -1
  123. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  124. package/dist/file/file-dataset.agent.js.map +0 -1
  125. package/dist/file/filepreview.d.ts.map +0 -1
  126. package/dist/file/filepreview.js.map +0 -1
  127. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  128. package/dist/file/generateSchema.tool.js.map +0 -1
  129. package/dist/file/index.d.ts.map +0 -1
  130. package/dist/file/index.js.map +0 -1
  131. package/dist/file/prompts.d.ts.map +0 -1
  132. package/dist/file/prompts.js.map +0 -1
  133. package/dist/file/steps.d.ts.map +0 -1
  134. package/dist/file/steps.js.map +0 -1
  135. package/dist/index.d.ts.map +0 -1
  136. package/dist/index.js.map +0 -1
  137. package/dist/materializeDataset.tool.d.ts.map +0 -1
  138. package/dist/materializeDataset.tool.js.map +0 -1
  139. package/dist/query/index.d.ts.map +0 -1
  140. package/dist/query/index.js.map +0 -1
  141. package/dist/query/queryDomain.d.ts.map +0 -1
  142. package/dist/query/queryDomain.js.map +0 -1
  143. package/dist/query/queryDomain.step.d.ts.map +0 -1
  144. package/dist/query/queryDomain.step.js.map +0 -1
  145. package/dist/sandbox/steps.d.ts.map +0 -1
  146. package/dist/sandbox/steps.js.map +0 -1
  147. package/dist/sandbox.steps.d.ts.map +0 -1
  148. package/dist/sandbox.steps.js.map +0 -1
  149. package/dist/schema.d.ts.map +0 -1
  150. package/dist/schema.js.map +0 -1
  151. package/dist/service.d.ts.map +0 -1
  152. package/dist/service.js.map +0 -1
  153. package/dist/skill.d.ts.map +0 -1
  154. package/dist/skill.js.map +0 -1
  155. package/dist/transform/filepreview.d.ts.map +0 -1
  156. package/dist/transform/filepreview.js.map +0 -1
  157. package/dist/transform/index.d.ts.map +0 -1
  158. package/dist/transform/index.js.map +0 -1
  159. package/dist/transform/prompts.d.ts.map +0 -1
  160. package/dist/transform/prompts.js.map +0 -1
  161. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  162. package/dist/transform/transform-dataset.agent.js.map +0 -1
  163. package/dist/transform/transformDataset.d.ts.map +0 -1
  164. package/dist/transform/transformDataset.js.map +0 -1
@@ -0,0 +1,234 @@
1
+ import { readInstantFileStep } from "./file/steps.js";
2
+ import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
3
+ const CONTEXT_WORKSPACE_BASE = "/tmp/ekairos/contexts";
4
+ const WORKSPACE_MANIFEST_FILE_NAME = "manifest.json";
5
+ function trimTrailingSlash(value) {
6
+ return value.endsWith("/") ? value.slice(0, -1) : value;
7
+ }
8
+ function sanitizePathSegment(value, fallback) {
9
+ const parts = String(value ?? "")
10
+ .trim()
11
+ .replace(/\\/g, "/")
12
+ .split("/")
13
+ .filter(Boolean);
14
+ const normalized = parts[parts.length - 1]
15
+ ?.replace(/[^a-zA-Z0-9_.-]/g, "_")
16
+ .replace(/_+/g, "_")
17
+ .slice(0, 160);
18
+ return normalized || fallback;
19
+ }
20
+ export function sanitizeContextWorkspacePathSegment(value, fallback) {
21
+ return sanitizePathSegment(value, fallback);
22
+ }
23
+ function filenameFromContentDisposition(value, fallback) {
24
+ const raw = String(value ?? "").trim();
25
+ if (!raw)
26
+ return fallback;
27
+ const filenameStar = raw.match(/filename\*=UTF-8''([^;]+)/i)?.[1];
28
+ if (filenameStar) {
29
+ return sanitizePathSegment(decodeURIComponent(filenameStar), fallback);
30
+ }
31
+ const filename = raw.match(/filename="?([^";]+)"?/i)?.[1];
32
+ return sanitizePathSegment(filename ?? raw, fallback);
33
+ }
34
+ function resolveContextEventPartDir(params) {
35
+ const sourceEventId = sanitizePathSegment(params.sourceEventId, "event");
36
+ const sourcePartIndex = Number.isFinite(params.sourcePartIndex)
37
+ ? Math.max(0, Math.floor(params.sourcePartIndex))
38
+ : 0;
39
+ return `${params.eventsDir}/${sourceEventId}/parts/${sourcePartIndex}`;
40
+ }
41
+ function resolveWorkspaceFilePath(params) {
42
+ return `${resolveContextEventPartDir(params)}/file`;
43
+ }
44
+ export function getContextWorkspaceBase() {
45
+ return trimTrailingSlash(CONTEXT_WORKSPACE_BASE);
46
+ }
47
+ export function getContextExecutionWorkspaceRoot(params) {
48
+ if (params.root)
49
+ return trimTrailingSlash(params.root);
50
+ const contextId = sanitizePathSegment(params.contextId, "context");
51
+ const executionId = sanitizePathSegment(params.executionId, "execution");
52
+ return `${getContextWorkspaceBase()}/${contextId}/executions/${executionId}`;
53
+ }
54
+ export function getContextWorkspaceRoot(params) {
55
+ if (params.root)
56
+ return trimTrailingSlash(params.root);
57
+ const contextId = sanitizePathSegment(params.contextId, "context");
58
+ return `${getContextWorkspaceBase()}/${contextId}`;
59
+ }
60
+ export function getContextEventsDir(params) {
61
+ return `${getContextWorkspaceRoot(params)}/events`;
62
+ }
63
+ export function getContextResourcesDir(params) {
64
+ return `${getContextWorkspaceRoot(params)}/resources`;
65
+ }
66
+ export function getContextExecutionWorkspaceDirs(params) {
67
+ const root = getContextExecutionWorkspaceRoot(params);
68
+ const contextRoot = getContextWorkspaceRoot(params);
69
+ const eventsDir = getContextEventsDir(params);
70
+ const resourcesDir = getContextResourcesDir(params);
71
+ return {
72
+ root,
73
+ contextRoot,
74
+ eventsDir,
75
+ resourcesDir,
76
+ outputDir: `${root}/output`,
77
+ scriptsDir: `${root}/scripts`,
78
+ tmpDir: `${root}/tmp`,
79
+ manifestPath: `${root}/${WORKSPACE_MANIFEST_FILE_NAME}`,
80
+ };
81
+ }
82
+ export function getContextExecutionWorkspaceStandardDirs(params) {
83
+ const dirs = getContextExecutionWorkspaceDirs(params);
84
+ return [
85
+ dirs.contextRoot,
86
+ dirs.eventsDir,
87
+ dirs.resourcesDir,
88
+ dirs.root,
89
+ dirs.outputDir,
90
+ dirs.scriptsDir,
91
+ dirs.tmpDir,
92
+ ];
93
+ }
94
+ export function extractContextWorkspaceFilesFromEventItems(eventItems) {
95
+ const files = [];
96
+ for (const item of eventItems) {
97
+ const itemRecord = asRecord(item);
98
+ const parts = Array.isArray(asRecord(itemRecord?.content)?.parts)
99
+ ? asRecord(itemRecord?.content)?.parts
100
+ : [];
101
+ parts.forEach((part, partIndex) => {
102
+ collectPartFiles(part, {
103
+ files,
104
+ sourceEventId: asText(itemRecord?.id),
105
+ sourcePartIndex: partIndex,
106
+ });
107
+ });
108
+ }
109
+ return files;
110
+ }
111
+ export async function prepareContextExecutionWorkspaceStep(params) {
112
+ "use step";
113
+ const dirs = getContextExecutionWorkspaceDirs(params);
114
+ const filePartDirs = Array.from(new Set(params.files.map((fileInput) => resolveContextEventPartDir({
115
+ eventsDir: dirs.eventsDir,
116
+ sourceEventId: fileInput.sourceEventId ?? fileInput.fileId,
117
+ sourcePartIndex: fileInput.sourcePartIndex ?? 0,
118
+ }))));
119
+ await runDatasetSandboxCommandStep({
120
+ runtime: params.runtime,
121
+ sandboxId: params.sandboxId,
122
+ cmd: "mkdir",
123
+ args: ["-p", ...getContextExecutionWorkspaceStandardDirs(params), ...filePartDirs],
124
+ });
125
+ const preparedFiles = [];
126
+ for (const fileInput of params.files) {
127
+ const fileId = String(fileInput.fileId ?? "").trim();
128
+ if (!fileId)
129
+ continue;
130
+ const file = await readInstantFileStep({ runtime: params.runtime, fileId });
131
+ const filename = sanitizePathSegment(fileInput.filename ??
132
+ filenameFromContentDisposition(file.contentDisposition, `${fileId}.bin`), `${fileId}.bin`);
133
+ const path = resolveWorkspaceFilePath({
134
+ eventsDir: dirs.eventsDir,
135
+ sourceEventId: fileInput.sourceEventId ?? fileId,
136
+ sourcePartIndex: fileInput.sourcePartIndex ?? 0,
137
+ });
138
+ const metadataPath = `${resolveContextEventPartDir({
139
+ eventsDir: dirs.eventsDir,
140
+ sourceEventId: fileInput.sourceEventId ?? fileId,
141
+ sourcePartIndex: fileInput.sourcePartIndex ?? 0,
142
+ })}/metadata.json`;
143
+ await writeDatasetSandboxFilesStep({
144
+ runtime: params.runtime,
145
+ sandboxId: params.sandboxId,
146
+ files: [{ path, contentBase64: file.contentBase64 }],
147
+ });
148
+ await writeDatasetSandboxTextFilesStep({
149
+ runtime: params.runtime,
150
+ sandboxId: params.sandboxId,
151
+ files: [
152
+ {
153
+ path: metadataPath,
154
+ content: JSON.stringify({
155
+ fileId,
156
+ filename,
157
+ mediaType: fileInput.mediaType,
158
+ role: fileInput.role ?? "input",
159
+ sourceEventId: fileInput.sourceEventId,
160
+ sourcePartIndex: fileInput.sourcePartIndex,
161
+ }, null, 2),
162
+ },
163
+ ],
164
+ });
165
+ preparedFiles.push({
166
+ fileId,
167
+ filename,
168
+ mediaType: fileInput.mediaType,
169
+ role: fileInput.role ?? "input",
170
+ path,
171
+ sourceEventId: fileInput.sourceEventId,
172
+ sourcePartIndex: fileInput.sourcePartIndex,
173
+ });
174
+ }
175
+ const manifest = {
176
+ contextId: params.contextId,
177
+ executionId: params.executionId,
178
+ sandboxId: params.sandboxId,
179
+ ...dirs,
180
+ files: preparedFiles,
181
+ };
182
+ await writeDatasetSandboxTextFilesStep({
183
+ runtime: params.runtime,
184
+ sandboxId: params.sandboxId,
185
+ files: [
186
+ {
187
+ path: dirs.manifestPath,
188
+ content: JSON.stringify(manifest, null, 2),
189
+ },
190
+ ],
191
+ });
192
+ return manifest;
193
+ }
194
+ function collectPartFiles(value, params) {
195
+ const record = asRecord(value);
196
+ if (!record)
197
+ return;
198
+ if (record.type === "file") {
199
+ pushFileRecord(record, params);
200
+ return;
201
+ }
202
+ const content = asRecord(record.content);
203
+ if (!content)
204
+ return;
205
+ if (Array.isArray(content.blocks)) {
206
+ for (const block of content.blocks) {
207
+ const blockRecord = asRecord(block);
208
+ if (blockRecord?.type === "file") {
209
+ pushFileRecord(blockRecord, params);
210
+ }
211
+ }
212
+ }
213
+ }
214
+ function pushFileRecord(record, params) {
215
+ const fileId = asText(record.fileId);
216
+ if (!fileId)
217
+ return;
218
+ params.files.push({
219
+ fileId,
220
+ filename: asText(record.filename),
221
+ mediaType: asText(record.mediaType),
222
+ role: "input",
223
+ sourceEventId: params.sourceEventId,
224
+ sourcePartIndex: params.sourcePartIndex,
225
+ });
226
+ }
227
+ function asRecord(value) {
228
+ return value && typeof value === "object" && !Array.isArray(value)
229
+ ? value
230
+ : null;
231
+ }
232
+ function asText(value) {
233
+ return typeof value === "string" && value.trim() ? value.trim() : undefined;
234
+ }
@@ -1,46 +1,70 @@
1
- export declare function getDatasetServiceDb(env?: any): Promise<any>;
1
+ export declare function getDatasetRuntimeDb(runtime: any): Promise<any>;
2
+ export declare function getDatasetServiceDb(runtime: any): Promise<any>;
2
3
  export declare function datasetGetByIdStep(params: {
3
- env?: any;
4
+ runtime: any;
4
5
  datasetId: string;
5
- }): Promise<import("../service").ServiceResult<any>>;
6
+ }): Promise<import("../service.js").ServiceResult<any>>;
6
7
  export declare function datasetReadOutputJsonlStep(params: {
7
- env?: any;
8
+ runtime: any;
8
9
  datasetId: string;
9
10
  }): Promise<{
10
11
  contentBase64: string;
11
12
  }>;
12
13
  export declare function datasetUpdateSchemaStep(params: {
13
- env?: any;
14
+ runtime: any;
14
15
  datasetId: string;
15
16
  schema: any;
16
17
  status?: string;
17
- }): Promise<import("../service").ServiceResult<void>>;
18
+ }): Promise<import("../service.js").ServiceResult<void>>;
18
19
  export declare function datasetUploadOutputFileStep(params: {
19
- env?: any;
20
+ runtime: any;
20
21
  datasetId: string;
21
- fileBuffer: Buffer;
22
- }): Promise<import("../service").ServiceResult<{
22
+ contentBase64: string;
23
+ }): Promise<import("../service.js").ServiceResult<{
23
24
  fileId: string;
24
25
  storagePath: string;
25
26
  }>>;
26
27
  export declare function datasetUpdateStatusStep(params: {
27
- env?: any;
28
+ runtime: any;
28
29
  datasetId: string;
29
30
  status: string;
30
31
  calculatedTotalRows?: number;
31
32
  actualGeneratedRowCount?: number;
32
- }): Promise<import("../service").ServiceResult<void>>;
33
+ }): Promise<import("../service.js").ServiceResult<void>>;
33
34
  export declare function datasetClearStep(params: {
34
- env?: any;
35
+ runtime: any;
35
36
  datasetId: string;
36
- }): Promise<import("../service").ServiceResult<{
37
+ }): Promise<import("../service.js").ServiceResult<{
37
38
  deletedCount: number;
38
39
  }>>;
39
40
  export declare function datasetPreviewRowsStep(params: {
40
- env?: any;
41
+ runtime: any;
42
+ datasetId: string;
43
+ limit?: number;
44
+ }): Promise<{
45
+ rows: any[];
46
+ }>;
47
+ export declare function datasetReadRowsStep(params: {
48
+ runtime: any;
41
49
  datasetId: string;
50
+ cursor?: number;
42
51
  limit?: number;
43
52
  }): Promise<{
44
53
  rows: any[];
54
+ cursor: number;
55
+ done: boolean;
56
+ }>;
57
+ export declare function datasetReadOneStep(params: {
58
+ runtime: any;
59
+ datasetId: string;
60
+ }): Promise<{
61
+ row: any | null;
62
+ }>;
63
+ export declare function datasetInferAndUpdateSchemaStep(params: {
64
+ runtime: any;
65
+ datasetId: string;
66
+ title?: string;
67
+ description?: string;
68
+ }): Promise<{
69
+ schema: import("../dataset.js").DatasetSchemaInput;
45
70
  }>;
46
- //# sourceMappingURL=steps.d.ts.map
@@ -1,32 +1,33 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.getDatasetServiceDb = getDatasetServiceDb;
4
- exports.datasetGetByIdStep = datasetGetByIdStep;
5
- exports.datasetReadOutputJsonlStep = datasetReadOutputJsonlStep;
6
- exports.datasetUpdateSchemaStep = datasetUpdateSchemaStep;
7
- exports.datasetUploadOutputFileStep = datasetUploadOutputFileStep;
8
- exports.datasetUpdateStatusStep = datasetUpdateStatusStep;
9
- exports.datasetClearStep = datasetClearStep;
10
- exports.datasetPreviewRowsStep = datasetPreviewRowsStep;
11
- const runtime_1 = require("@ekairos/events/runtime");
12
- const service_1 = require("../service");
13
- async function resolveEnv(env) {
14
- return env ?? (await (0, runtime_1.getContextEnv)());
1
+ import { DatasetService } from "../service.js";
2
+ import { datasetDomain } from "../schema.js";
3
+ import { inferDatasetSchema } from "../builder/schemaInference.js";
4
+ import { rowsToJsonl } from "../builder/rows.js";
5
+ export async function getDatasetRuntimeDb(runtime) {
6
+ if (!runtime) {
7
+ throw new Error("Dataset step requires runtime.");
8
+ }
9
+ if (typeof runtime.use === "function") {
10
+ const scoped = await runtime.use(datasetDomain);
11
+ const scopedDb = scoped.db;
12
+ return typeof scopedDb === "function" ? await scopedDb.call(scoped) : scopedDb;
13
+ }
14
+ const db = runtime.db;
15
+ return typeof db === "function" ? await db.call(runtime) : db;
15
16
  }
16
- async function getDatasetServiceDb(env) {
17
+ export async function getDatasetServiceDb(runtime) {
17
18
  "use step";
18
- const runtime = (await (0, runtime_1.getContextRuntime)(await resolveEnv(env)));
19
- return runtime.db;
19
+ return await getDatasetRuntimeDb(runtime);
20
20
  }
21
- async function datasetGetByIdStep(params) {
21
+ export async function datasetGetByIdStep(params) {
22
22
  "use step";
23
- const db = (await (0, runtime_1.getContextRuntime)(await resolveEnv(params.env))).db;
24
- const service = new service_1.DatasetService(db);
23
+ const db = await getDatasetRuntimeDb(params.runtime);
24
+ const service = new DatasetService(db);
25
25
  return await service.getDatasetById(params.datasetId);
26
26
  }
27
- async function datasetReadOutputJsonlStep(params) {
27
+ export async function datasetReadOutputJsonlStep(params) {
28
28
  "use step";
29
- const db = (await (0, runtime_1.getContextRuntime)(await resolveEnv(params.env))).db;
29
+ const db = await getDatasetRuntimeDb(params.runtime);
30
+ const service = new DatasetService(db);
30
31
  for (let attempt = 1; attempt <= 20; attempt++) {
31
32
  const query = await db.query({
32
33
  dataset_datasets: {
@@ -41,33 +42,43 @@ async function datasetReadOutputJsonlStep(params) {
41
42
  const fileBuffer = await fetch(url).then((r) => r.arrayBuffer());
42
43
  return { contentBase64: Buffer.from(fileBuffer).toString("base64") };
43
44
  }
45
+ const directRows = await service.readRows({
46
+ datasetId: params.datasetId,
47
+ cursor: 0,
48
+ limit: 100000,
49
+ });
50
+ if (directRows.ok && directRows.data.rows.length > 0) {
51
+ return {
52
+ contentBase64: Buffer.from(rowsToJsonl(directRows.data.rows), "utf-8").toString("base64"),
53
+ };
54
+ }
44
55
  await new Promise((resolve) => setTimeout(resolve, 250 * attempt));
45
56
  }
46
57
  throw new Error("Dataset output file not found");
47
58
  }
48
- async function datasetUpdateSchemaStep(params) {
59
+ export async function datasetUpdateSchemaStep(params) {
49
60
  "use step";
50
- const db = (await (0, runtime_1.getContextRuntime)(await resolveEnv(params.env))).db;
51
- const service = new service_1.DatasetService(db);
61
+ const db = await getDatasetRuntimeDb(params.runtime);
62
+ const service = new DatasetService(db);
52
63
  return await service.updateDatasetSchema({
53
64
  datasetId: params.datasetId,
54
65
  schema: params.schema,
55
66
  status: params.status,
56
67
  });
57
68
  }
58
- async function datasetUploadOutputFileStep(params) {
69
+ export async function datasetUploadOutputFileStep(params) {
59
70
  "use step";
60
- const db = (await (0, runtime_1.getContextRuntime)(await resolveEnv(params.env))).db;
61
- const service = new service_1.DatasetService(db);
71
+ const db = await getDatasetRuntimeDb(params.runtime);
72
+ const service = new DatasetService(db);
62
73
  return await service.uploadDatasetOutputFile({
63
74
  datasetId: params.datasetId,
64
- fileBuffer: params.fileBuffer,
75
+ fileBuffer: Buffer.from(params.contentBase64, "base64"),
65
76
  });
66
77
  }
67
- async function datasetUpdateStatusStep(params) {
78
+ export async function datasetUpdateStatusStep(params) {
68
79
  "use step";
69
- const db = (await (0, runtime_1.getContextRuntime)(await resolveEnv(params.env))).db;
70
- const service = new service_1.DatasetService(db);
80
+ const db = await getDatasetRuntimeDb(params.runtime);
81
+ const service = new DatasetService(db);
71
82
  return await service.updateDatasetStatus({
72
83
  datasetId: params.datasetId,
73
84
  status: params.status,
@@ -75,20 +86,66 @@ async function datasetUpdateStatusStep(params) {
75
86
  actualGeneratedRowCount: params.actualGeneratedRowCount,
76
87
  });
77
88
  }
78
- async function datasetClearStep(params) {
89
+ export async function datasetClearStep(params) {
79
90
  "use step";
80
- const db = (await (0, runtime_1.getContextRuntime)(await resolveEnv(params.env))).db;
81
- const service = new service_1.DatasetService(db);
91
+ const db = await getDatasetRuntimeDb(params.runtime);
92
+ const service = new DatasetService(db);
82
93
  return await service.clearDataset(params.datasetId);
83
94
  }
84
- async function datasetPreviewRowsStep(params) {
95
+ export async function datasetPreviewRowsStep(params) {
85
96
  "use step";
86
- const db = (await (0, runtime_1.getContextRuntime)(await resolveEnv(params.env))).db;
87
- const service = new service_1.DatasetService(db);
97
+ const db = await getDatasetRuntimeDb(params.runtime);
98
+ const service = new DatasetService(db);
88
99
  const rowsResult = await service.previewRows(params.datasetId, params.limit ?? 20);
89
100
  if (!rowsResult.ok) {
90
101
  throw new Error(rowsResult.error);
91
102
  }
92
103
  return { rows: rowsResult.data };
93
104
  }
94
- //# sourceMappingURL=steps.js.map
105
+ export async function datasetReadRowsStep(params) {
106
+ "use step";
107
+ const db = await getDatasetRuntimeDb(params.runtime);
108
+ const service = new DatasetService(db);
109
+ const rowsResult = await service.readRows({
110
+ datasetId: params.datasetId,
111
+ cursor: params.cursor,
112
+ limit: params.limit,
113
+ });
114
+ if (!rowsResult.ok) {
115
+ throw new Error(rowsResult.error);
116
+ }
117
+ return rowsResult.data;
118
+ }
119
+ export async function datasetReadOneStep(params) {
120
+ "use step";
121
+ const db = await getDatasetRuntimeDb(params.runtime);
122
+ const service = new DatasetService(db);
123
+ const firstResult = await service.readOne(params.datasetId);
124
+ if (!firstResult.ok) {
125
+ throw new Error(firstResult.error);
126
+ }
127
+ return { row: firstResult.data };
128
+ }
129
+ export async function datasetInferAndUpdateSchemaStep(params) {
130
+ "use step";
131
+ const db = await getDatasetRuntimeDb(params.runtime);
132
+ const service = new DatasetService(db);
133
+ const readResult = await service.readRows({
134
+ datasetId: params.datasetId,
135
+ cursor: 0,
136
+ limit: 1000,
137
+ });
138
+ if (!readResult.ok) {
139
+ throw new Error(readResult.error);
140
+ }
141
+ const inferred = inferDatasetSchema(readResult.data.rows, params.title ?? `${params.datasetId}Row`, params.description ?? "One dataset row");
142
+ const updateResult = await service.updateDatasetSchema({
143
+ datasetId: params.datasetId,
144
+ schema: inferred,
145
+ status: "completed",
146
+ });
147
+ if (!updateResult.ok) {
148
+ throw new Error(updateResult.error);
149
+ }
150
+ return { schema: inferred };
151
+ }
package/dist/dataset.d.ts CHANGED
@@ -1,67 +1,3 @@
1
- import type { DomainSchemaResult } from "@ekairos/domain";
2
- import type { ContextReactor } from "@ekairos/events";
3
- export type DatasetQuerySourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
4
- query: Record<string, any>;
5
- title?: string;
6
- explanation?: string;
7
- domain: D;
8
- };
9
- export type DatasetFileSourceInput = {
10
- fileId: string;
11
- description?: string;
12
- };
13
- export type DatasetTextSourceInput = {
14
- text: string;
15
- mimeType?: string;
16
- name?: string;
17
- description?: string;
18
- };
19
- export type DatasetExistingSourceInput = {
20
- datasetId: string;
21
- description?: string;
22
- };
23
- export type DatasetSchemaInput = {
24
- title?: string;
25
- description?: string;
26
- schema: any;
27
- };
28
- export type DatasetReaderResult = {
29
- rows: any[];
30
- cursor: number;
31
- done: boolean;
32
- };
33
- export type DatasetReader = {
34
- read(cursor?: number, limit?: number): Promise<DatasetReaderResult>;
35
- read(params?: {
36
- cursor?: number;
37
- limit?: number;
38
- }): Promise<DatasetReaderResult>;
39
- };
40
- export type DatasetBuildResult = {
41
- datasetId: string;
42
- dataset: any;
43
- previewRows: any[];
44
- reader: DatasetReader;
45
- firstRow?: any | null;
46
- };
47
- export declare function dataset<Env extends {
48
- orgId: string;
49
- }>(env: Env): {
50
- fromFile(source: DatasetFileSourceInput): /*elided*/ any;
51
- fromText(source: DatasetTextSourceInput): /*elided*/ any;
52
- fromDataset(source: DatasetExistingSourceInput): /*elided*/ any;
53
- fromQuery<D extends DomainSchemaResult>(domain: D, source: Omit<DatasetQuerySourceInput<D>, "domain">): /*elided*/ any;
54
- title(title: string): /*elided*/ any;
55
- sandbox(input: {
56
- sandboxId: string;
57
- }): /*elided*/ any;
58
- schema(schema: DatasetSchemaInput): /*elided*/ any;
59
- inferSchema(): /*elided*/ any;
60
- instructions(instructions: string): /*elided*/ any;
61
- reactor(reactor: ContextReactor<any, any>): /*elided*/ any;
62
- first(): /*elided*/ any;
63
- build(options?: {
64
- datasetId?: string;
65
- }): Promise<DatasetBuildResult>;
66
- };
67
- //# sourceMappingURL=dataset.d.ts.map
1
+ import type { AnyDatasetRuntime, DatasetBuilder, DatasetBuilderOptions, DatasetRuntimeHandle } from "./builder/types.js";
2
+ export type { AnyDatasetRuntime, CompatibleQueryDomain, DatasetBuilder, DatasetBuilderOptions, DatasetBuildOptions, DatasetBuildResult, DatasetExistingResource, DatasetExistingResourceInput, DatasetFileResource, DatasetFileResourceInput, DatasetMode, DatasetOutput, DatasetQueryResourceInput, DatasetReader, DatasetReaderResult, DatasetRuntimeEnv, DatasetRuntimeHandle, DatasetSchemaInput, DatasetTextResource, DatasetResourceInput, DatasetTextResourceInput, } from "./builder/types.js";
3
+ export declare function dataset<Runtime extends AnyDatasetRuntime>(runtime: Runtime & DatasetRuntimeHandle<Runtime>, options?: DatasetBuilderOptions): DatasetBuilder<Runtime>;