@ekairos/dataset 1.22.48-beta.development.0 → 1.22.50-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/agents.d.ts +8 -0
  2. package/dist/agents.js +8 -0
  3. package/dist/builder/agentMaterializers.d.ts +9 -0
  4. package/dist/builder/agentMaterializers.js +10 -0
  5. package/dist/builder/materialize.d.ts +1 -11
  6. package/dist/builder/materialize.js +25 -77
  7. package/dist/builder/materializeQuery.d.ts +11 -0
  8. package/dist/builder/materializeQuery.js +40 -0
  9. package/dist/builder/persistence.js +13 -21
  10. package/dist/builder/types.d.ts +3 -0
  11. package/dist/clearDataset.tool.d.ts +2 -2
  12. package/dist/clearDataset.tool.js +3 -3
  13. package/dist/completeDataset.tool.d.ts +31 -3
  14. package/dist/completeDataset.tool.js +101 -13
  15. package/dist/dataset/steps.d.ts +32 -8
  16. package/dist/dataset/steps.js +69 -13
  17. package/dist/dataset.js +13 -7
  18. package/dist/executeCommand.tool.d.ts +2 -2
  19. package/dist/executeCommand.tool.js +3 -3
  20. package/dist/file/file-dataset.agent.d.ts +17 -11
  21. package/dist/file/file-dataset.agent.js +54 -47
  22. package/dist/file/filepreview.d.ts +2 -2
  23. package/dist/file/filepreview.js +24 -17
  24. package/dist/file/generateSchema.tool.d.ts +2 -2
  25. package/dist/file/generateSchema.tool.js +2 -2
  26. package/dist/file/prompts.d.ts +2 -2
  27. package/dist/file/prompts.js +6 -1
  28. package/dist/file/steps.d.ts +1 -1
  29. package/dist/file/steps.js +8 -2
  30. package/dist/index.d.ts +0 -1
  31. package/dist/index.js +0 -1
  32. package/dist/query/queryDomain.d.ts +3 -3
  33. package/dist/query/queryDomain.js +3 -3
  34. package/dist/query/queryDomain.step.d.ts +1 -0
  35. package/dist/query/queryDomain.step.js +8 -4
  36. package/dist/sandbox/steps.d.ts +6 -6
  37. package/dist/sandbox/steps.js +16 -12
  38. package/dist/transform/filepreview.d.ts +1 -1
  39. package/dist/transform/filepreview.js +6 -6
  40. package/dist/transform/index.d.ts +1 -1
  41. package/dist/transform/index.js +1 -1
  42. package/dist/transform/prompts.js +4 -1
  43. package/dist/transform/transform-dataset.agent.d.ts +9 -3
  44. package/dist/transform/transform-dataset.agent.js +39 -32
  45. package/dist/transform/transformDataset.d.ts +3 -2
  46. package/dist/transform/transformDataset.js +10 -9
  47. package/package.json +19 -5
  48. package/dist/eventsReactRuntime.d.ts +0 -21
  49. package/dist/eventsReactRuntime.js +0 -25
@@ -0,0 +1,8 @@
1
+ import "./builder/materialize.js";
2
+ export * from "./dataset.js";
3
+ export * from "./domain.js";
4
+ export * from "./materializeDataset.tool.js";
5
+ export * from "./schema.js";
6
+ export * from "./service.js";
7
+ export * from "./file/file-dataset.agent.js";
8
+ export * from "./transform/index.js";
package/dist/agents.js ADDED
@@ -0,0 +1,8 @@
1
+ import "./builder/materialize.js";
2
+ export * from "./dataset.js";
3
+ export * from "./domain.js";
4
+ export * from "./materializeDataset.tool.js";
5
+ export * from "./schema.js";
6
+ export * from "./service.js";
7
+ export * from "./file/file-dataset.agent.js";
8
+ export * from "./transform/index.js";
@@ -0,0 +1,9 @@
1
+ import type { AnyDatasetRuntime, DatasetBuilderState, InternalSource } from "./types.js";
2
+ export type DatasetAgentMaterializers = {
3
+ materializeSingleFileLikeSource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, source: Extract<InternalSource, {
4
+ kind: "file" | "text";
5
+ }>, targetDatasetId: string): Promise<string>;
6
+ materializeDerivedDataset<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, targetDatasetId: string): Promise<string>;
7
+ };
8
+ export declare function registerDatasetAgentMaterializers(materializers: DatasetAgentMaterializers): void;
9
+ export declare function getDatasetAgentMaterializers(): DatasetAgentMaterializers;
@@ -0,0 +1,10 @@
1
+ let agentMaterializers = null;
2
+ export function registerDatasetAgentMaterializers(materializers) {
3
+ agentMaterializers = materializers;
4
+ }
5
+ export function getDatasetAgentMaterializers() {
6
+ if (!agentMaterializers) {
7
+ throw new Error("dataset_agent_materializers_not_registered");
8
+ }
9
+ return agentMaterializers;
10
+ }
@@ -1,14 +1,4 @@
1
- import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalSource } from "./types.js";
2
- export declare function materializeQuerySource<Runtime extends AnyDatasetRuntime>(runtime: DatasetBuilderState<Runtime>["runtime"], source: Extract<InternalSource, {
3
- kind: "query";
4
- }>, params: {
5
- datasetId: string;
6
- sandboxId?: string;
7
- schema?: DatasetSchemaInput;
8
- title?: string;
9
- instructions?: string;
10
- first?: boolean;
11
- }): Promise<string>;
1
+ import type { AnyDatasetRuntime, DatasetBuilderState, InternalSource } from "./types.js";
12
2
  export declare function materializeSingleFileLikeSource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, source: Extract<InternalSource, {
13
3
  kind: "file" | "text";
14
4
  }>, targetDatasetId: string): Promise<string>;
@@ -1,44 +1,14 @@
1
- import { createFileParseStory } from "../file/file-dataset.agent.js";
2
- import { DatasetService } from "../service.js";
3
- import { createTransformDatasetStory } from "../transform/transform-dataset.agent.js";
1
+ import { createFileParseContext } from "../file/file-dataset.agent.js";
2
+ import { createTransformDatasetContext } from "../transform/transform-dataset.agent.js";
3
+ import { datasetInferAndUpdateSchemaStep, datasetReadOneStep, } from "../dataset/steps.js";
4
+ import { registerDatasetAgentMaterializers } from "./agentMaterializers.js";
4
5
  import { buildFileDefaultInstructions, buildRawSourceInstructions, buildTransformInstructions, } from "./instructions.js";
5
- import { createOrUpdateDatasetMetadata, getDatasetDb, materializeRowsToDataset, uploadInlineTextSource, } from "./persistence.js";
6
- import { inferDatasetSchema } from "./schemaInference.js";
7
- import { getDomainDescriptor, normalizeQueryRows } from "./sourceRows.js";
6
+ import { createOrUpdateDatasetMetadata, uploadInlineTextSource, } from "./persistence.js";
7
+ import { getDomainDescriptor } from "./sourceRows.js";
8
+ import { materializeQuerySource } from "./materializeQuery.js";
8
9
  function makeIntermediateDatasetId(targetDatasetId, sourceKind, index) {
9
10
  return `${targetDatasetId}__${sourceKind}_${index}`;
10
11
  }
11
- export async function materializeQuerySource(runtime, source, params) {
12
- const scoped = await runtime.use(source.domain);
13
- const result = await scoped.db.query(source.query);
14
- const rows = normalizeQueryRows(result);
15
- const domainDescriptor = getDomainDescriptor(source.domain);
16
- return await materializeRowsToDataset(runtime, {
17
- datasetId: params.datasetId,
18
- sandboxId: params.sandboxId,
19
- title: params.title ?? source.title,
20
- instructions: params.instructions,
21
- sources: [
22
- {
23
- kind: "query",
24
- query: source.query,
25
- title: source.title,
26
- explanation: source.explanation,
27
- ...domainDescriptor,
28
- },
29
- ],
30
- sourceKinds: ["query"],
31
- analysis: {
32
- query: source.query,
33
- explanation: source.explanation,
34
- ...domainDescriptor,
35
- },
36
- rows,
37
- schema: params.schema,
38
- inferSchema: !params.schema,
39
- first: params.first,
40
- });
41
- }
42
12
  export async function materializeSingleFileLikeSource(state, source, targetDatasetId) {
43
13
  if (!state.reactor) {
44
14
  throw new Error("dataset_reactor_required");
@@ -68,37 +38,23 @@ export async function materializeSingleFileLikeSource(state, source, targetDatas
68
38
  schema: state.outputSchema,
69
39
  status: "building",
70
40
  });
71
- const parseStory = createFileParseStory(fileId, {
41
+ const parseContext = createFileParseContext(fileId, {
72
42
  datasetId: targetDatasetId,
73
43
  instructions: state.instructions ?? buildFileDefaultInstructions(state.outputSchema),
74
44
  reactor: state.reactor,
75
45
  sandboxId: state.sandboxId,
76
46
  });
77
- await parseStory.parse(state.env);
47
+ await parseContext.parse(state.runtime, { durable: state.durable });
78
48
  if (!state.outputSchema) {
79
- const db = await getDatasetDb(state.runtime);
80
- const service = new DatasetService(db);
81
- const readResult = await service.readRows({ datasetId: targetDatasetId, cursor: 0, limit: 1000 });
82
- if (!readResult.ok) {
83
- throw new Error(readResult.error);
84
- }
85
- const inferred = inferDatasetSchema(readResult.data.rows, `${targetDatasetId}Row`, "One dataset row");
86
- const updateResult = await service.updateDatasetSchema({
49
+ await datasetInferAndUpdateSchemaStep({
50
+ runtime: state.runtime,
87
51
  datasetId: targetDatasetId,
88
- schema: inferred,
89
- status: "completed",
52
+ title: `${targetDatasetId}Row`,
53
+ description: "One dataset row",
90
54
  });
91
- if (!updateResult.ok) {
92
- throw new Error(updateResult.error);
93
- }
94
55
  }
95
56
  if (state.first) {
96
- const db = await getDatasetDb(state.runtime);
97
- const service = new DatasetService(db);
98
- const firstResult = await service.readOne(targetDatasetId);
99
- if (!firstResult.ok) {
100
- throw new Error(firstResult.error);
101
- }
57
+ await datasetReadOneStep({ runtime: state.runtime, datasetId: targetDatasetId });
102
58
  }
103
59
  return targetDatasetId;
104
60
  }
@@ -163,7 +119,7 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
163
119
  schema: transformSchema,
164
120
  status: "building",
165
121
  });
166
- const transformStory = createTransformDatasetStory({
122
+ const transformContext = createTransformDatasetContext({
167
123
  sourceDatasetIds: normalizedSources,
168
124
  outputSchema: transformSchema,
169
125
  instructions: buildTransformInstructions(normalizedSources.length, state.instructions, state.outputSchema),
@@ -171,29 +127,21 @@ export async function materializeDerivedDataset(state, targetDatasetId) {
171
127
  reactor: state.reactor,
172
128
  sandboxId: state.sandboxId,
173
129
  });
174
- await transformStory.transform(state.env);
175
- const db = await getDatasetDb(state.runtime);
176
- const service = new DatasetService(db);
130
+ await transformContext.transform(state.runtime, { durable: state.durable });
177
131
  if (!state.outputSchema) {
178
- const readResult = await service.readRows({ datasetId: targetDatasetId, cursor: 0, limit: 1000 });
179
- if (!readResult.ok) {
180
- throw new Error(readResult.error);
181
- }
182
- const inferred = inferDatasetSchema(readResult.data.rows, `${targetDatasetId}Row`, "One dataset row");
183
- const updateResult = await service.updateDatasetSchema({
132
+ await datasetInferAndUpdateSchemaStep({
133
+ runtime: state.runtime,
184
134
  datasetId: targetDatasetId,
185
- schema: inferred,
186
- status: "completed",
135
+ title: `${targetDatasetId}Row`,
136
+ description: "One dataset row",
187
137
  });
188
- if (!updateResult.ok) {
189
- throw new Error(updateResult.error);
190
- }
191
138
  }
192
139
  if (state.first) {
193
- const firstResult = await service.readOne(targetDatasetId);
194
- if (!firstResult.ok) {
195
- throw new Error(firstResult.error);
196
- }
140
+ await datasetReadOneStep({ runtime: state.runtime, datasetId: targetDatasetId });
197
141
  }
198
142
  return targetDatasetId;
199
143
  }
144
+ registerDatasetAgentMaterializers({
145
+ materializeSingleFileLikeSource,
146
+ materializeDerivedDataset,
147
+ });
@@ -0,0 +1,11 @@
1
+ import type { AnyDatasetRuntime, DatasetBuilderState, DatasetSchemaInput, InternalSource } from "./types.js";
2
+ export declare function materializeQuerySource<Runtime extends AnyDatasetRuntime>(runtime: DatasetBuilderState<Runtime>["runtime"], source: Extract<InternalSource, {
3
+ kind: "query";
4
+ }>, params: {
5
+ datasetId: string;
6
+ sandboxId?: string;
7
+ schema?: DatasetSchemaInput;
8
+ title?: string;
9
+ instructions?: string;
10
+ first?: boolean;
11
+ }): Promise<string>;
@@ -0,0 +1,40 @@
1
+ import { materializeRowsToDataset } from "./persistence.js";
2
+ import { getDomainDescriptor, normalizeQueryRows } from "./sourceRows.js";
3
+ async function readQuerySourceRowsStep(params) {
4
+ "use step";
5
+ const db = await params.runtime.db();
6
+ const result = await db.query(params.query);
7
+ return { rows: normalizeQueryRows(result) };
8
+ }
9
+ export async function materializeQuerySource(runtime, source, params) {
10
+ const { rows } = await readQuerySourceRowsStep({
11
+ runtime,
12
+ query: source.query,
13
+ });
14
+ const domainDescriptor = getDomainDescriptor(source.domain);
15
+ return await materializeRowsToDataset(runtime, {
16
+ datasetId: params.datasetId,
17
+ sandboxId: params.sandboxId,
18
+ title: params.title ?? source.title,
19
+ instructions: params.instructions,
20
+ sources: [
21
+ {
22
+ kind: "query",
23
+ query: source.query,
24
+ title: source.title,
25
+ explanation: source.explanation,
26
+ ...domainDescriptor,
27
+ },
28
+ ],
29
+ sourceKinds: ["query"],
30
+ analysis: {
31
+ query: source.query,
32
+ explanation: source.explanation,
33
+ ...domainDescriptor,
34
+ },
35
+ rows,
36
+ schema: params.schema,
37
+ inferSchema: !params.schema,
38
+ first: params.first,
39
+ });
40
+ }
@@ -1,5 +1,6 @@
1
1
  import { DatasetService } from "../service.js";
2
2
  import { datasetDomain } from "../schema.js";
3
+ import { datasetGetByIdStep, datasetPreviewRowsStep, datasetReadOneStep, datasetReadRowsStep, } from "../dataset/steps.js";
3
4
  import { inferDatasetSchema, validateRows } from "./schemaInference.js";
4
5
  import { rowsToJsonl } from "./sourceRows.js";
5
6
  export function defaultTextSourceName(source) {
@@ -19,6 +20,7 @@ export async function getDatasetDb(runtime) {
19
20
  return scoped.db;
20
21
  }
21
22
  export async function createOrUpdateDatasetMetadata(runtime, params) {
23
+ "use step";
22
24
  const db = await getDatasetDb(runtime);
23
25
  const service = new DatasetService(db);
24
26
  const result = await service.createDataset({
@@ -38,6 +40,7 @@ export async function createOrUpdateDatasetMetadata(runtime, params) {
38
40
  }
39
41
  }
40
42
  export async function materializeRowsToDataset(runtime, params) {
43
+ "use step";
41
44
  if (params.first && params.rows.length > 1) {
42
45
  throw new Error("dataset_first_expected_zero_or_one_row");
43
46
  }
@@ -76,6 +79,7 @@ export async function materializeRowsToDataset(runtime, params) {
76
79
  return params.datasetId;
77
80
  }
78
81
  export async function uploadInlineTextSource(runtime, datasetId, source) {
82
+ "use step";
79
83
  const db = await getDatasetDb(runtime);
80
84
  const fileName = defaultTextSourceName(source);
81
85
  const storagePath = `/dataset/source/${datasetId}/${Date.now()}-${fileName}`;
@@ -90,49 +94,37 @@ export async function uploadInlineTextSource(runtime, datasetId, source) {
90
94
  return fileId;
91
95
  }
92
96
  export async function finalizeBuildResult(runtime, datasetId, withFirst) {
93
- const db = await getDatasetDb(runtime);
94
- const service = new DatasetService(db);
95
- const datasetResult = await service.getDatasetById(datasetId);
96
- if (!datasetResult.ok) {
97
+ const datasetResult = await datasetGetByIdStep({ runtime, datasetId });
98
+ if (!datasetResult.ok)
97
99
  throw new Error(datasetResult.error);
98
- }
99
- const previewResult = await service.previewRows(datasetId, 20);
100
- if (!previewResult.ok) {
101
- throw new Error(previewResult.error);
102
- }
100
+ const previewResult = await datasetPreviewRowsStep({ runtime, datasetId, limit: 20 });
103
101
  const reader = {
104
102
  async read(cursorOrParams, limit) {
105
103
  const params = typeof cursorOrParams === "object" && cursorOrParams !== null
106
104
  ? cursorOrParams
107
105
  : { cursor: cursorOrParams, limit };
108
- const rowsResult = await service.readRows({
106
+ return await datasetReadRowsStep({
107
+ runtime,
109
108
  datasetId,
110
109
  cursor: params.cursor,
111
110
  limit: params.limit,
112
111
  });
113
- if (!rowsResult.ok) {
114
- throw new Error(rowsResult.error);
115
- }
116
- return rowsResult.data;
117
112
  },
118
113
  };
119
114
  if (!withFirst) {
120
115
  return {
121
116
  datasetId,
122
117
  dataset: datasetResult.data,
123
- previewRows: previewResult.data,
118
+ previewRows: previewResult.rows,
124
119
  reader,
125
120
  };
126
121
  }
127
- const firstResult = await service.readOne(datasetId);
128
- if (!firstResult.ok) {
129
- throw new Error(firstResult.error);
130
- }
122
+ const firstResult = await datasetReadOneStep({ runtime, datasetId });
131
123
  return {
132
124
  datasetId,
133
125
  dataset: datasetResult.data,
134
- previewRows: previewResult.data,
126
+ previewRows: previewResult.rows,
135
127
  reader,
136
- firstRow: firstResult.data,
128
+ firstRow: firstResult.row,
137
129
  };
138
130
  }
@@ -42,9 +42,11 @@ export type DatasetOutput = "rows" | "object";
42
42
  export type DatasetMode = "auto" | "schema";
43
43
  export type DatasetBuilderOptions = {
44
44
  datasetId?: string;
45
+ durable?: boolean;
45
46
  };
46
47
  export type DatasetBuildOptions = {
47
48
  datasetId?: string;
49
+ durable?: boolean;
48
50
  };
49
51
  export type InternalSource = DatasetFileSource | DatasetTextSource | DatasetExistingSource | ({
50
52
  kind: "query";
@@ -91,6 +93,7 @@ export type DatasetBuilderState<Runtime extends AnyDatasetRuntime> = {
91
93
  inferSchema: boolean;
92
94
  instructions?: string;
93
95
  reactor?: ContextReactor<any, any>;
96
+ durable?: boolean;
94
97
  first: boolean;
95
98
  };
96
99
  export type MaterializeRowsParams = {
@@ -1,9 +1,9 @@
1
1
  interface ClearDatasetToolParams {
2
2
  datasetId: string;
3
3
  sandboxId: string;
4
- env?: any;
4
+ runtime: any;
5
5
  }
6
- export declare function createClearDatasetTool({ datasetId, sandboxId, env }: ClearDatasetToolParams): import("ai").Tool<{
6
+ export declare function createClearDatasetTool({ datasetId, sandboxId, runtime }: ClearDatasetToolParams): import("ai").Tool<{
7
7
  reason: string;
8
8
  }, {
9
9
  success: boolean;
@@ -3,7 +3,7 @@ import { z } from "zod";
3
3
  import { runDatasetSandboxCommandStep } from "./sandbox/steps.js";
4
4
  import { getDatasetOutputPath } from "./datasetFiles.js";
5
5
  import { datasetClearStep } from "./dataset/steps.js";
6
- export function createClearDatasetTool({ datasetId, sandboxId, env }) {
6
+ export function createClearDatasetTool({ datasetId, sandboxId, runtime }) {
7
7
  return tool({
8
8
  description: "Clear all dataset records and output files. This will delete all generated data and reset the dataset to its initial state.",
9
9
  inputSchema: z.object({
@@ -18,7 +18,7 @@ export function createClearDatasetTool({ datasetId, sandboxId, env }) {
18
18
  console.log(`[Dataset ${datasetId}] Step 1: Deleting output file`);
19
19
  try {
20
20
  const result = await runDatasetSandboxCommandStep({
21
- env,
21
+ runtime,
22
22
  sandboxId,
23
23
  cmd: "rm",
24
24
  args: ["-f", outputPath],
@@ -35,7 +35,7 @@ export function createClearDatasetTool({ datasetId, sandboxId, env }) {
35
35
  console.warn(`[Dataset ${datasetId}] Error deleting output file: ${message}`);
36
36
  }
37
37
  console.log(`[Dataset ${datasetId}] Step 2: Clearing dataset records`);
38
- const clearResult = await datasetClearStep({ env, datasetId });
38
+ const clearResult = await datasetClearStep({ runtime, datasetId });
39
39
  if (!clearResult.ok) {
40
40
  console.error(`[Dataset ${datasetId}] Failed to clear dataset: ${clearResult.error}`);
41
41
  return {
@@ -1,9 +1,9 @@
1
1
  interface CompleteDatasetToolParams {
2
2
  datasetId: string;
3
3
  sandboxId: string;
4
- env?: any;
4
+ runtime: any;
5
5
  }
6
- export declare function createCompleteDatasetTool({ datasetId, sandboxId, env }: CompleteDatasetToolParams): import("ai").Tool<{
6
+ export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtime }: CompleteDatasetToolParams): import("ai").Tool<{
7
7
  summary: string;
8
8
  }, {
9
9
  success: boolean;
@@ -11,14 +11,42 @@ export declare function createCompleteDatasetTool({ datasetId, sandboxId, env }:
11
11
  index: number;
12
12
  valid: boolean;
13
13
  errors?: string[];
14
+ dataKeys?: string[];
14
15
  }>;
15
16
  validRowCount?: number;
17
+ rowRecordCount?: number;
16
18
  error?: string;
19
+ status?: string;
20
+ message?: string;
17
21
  } | {
18
22
  success: boolean;
19
- validRows: number | undefined;
23
+ status: string;
24
+ validRows: number;
25
+ rowRecordCount: number;
26
+ validation: {
27
+ index: number;
28
+ valid: boolean;
29
+ errors?: string[];
30
+ dataKeys?: string[];
31
+ }[] | undefined;
32
+ error: string;
33
+ message: string;
34
+ fileId?: undefined;
35
+ storagePath?: undefined;
36
+ } | {
37
+ success: boolean;
38
+ status: string;
39
+ validRows: number;
40
+ rowRecordCount: number;
20
41
  fileId: string;
21
42
  storagePath: string;
22
43
  message: string;
44
+ validation?: undefined;
45
+ error?: undefined;
23
46
  }>;
47
+ export declare function didCompleteDatasetSucceed(event: {
48
+ content?: {
49
+ parts?: any[];
50
+ };
51
+ }): boolean;
24
52
  export {};