@ekairos/dataset 1.22.82-beta.development.0 → 1.22.84-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/builder/agentMaterializers.d.ts +2 -2
  2. package/dist/builder/context.d.ts +7 -0
  3. package/dist/builder/context.js +192 -0
  4. package/dist/builder/instructions.d.ts +3 -3
  5. package/dist/builder/instructions.js +10 -10
  6. package/dist/builder/materialize.d.ts +12 -11
  7. package/dist/builder/materialize.js +122 -121
  8. package/dist/builder/materializeQuery.d.ts +3 -2
  9. package/dist/builder/materializeQuery.js +10 -19
  10. package/dist/builder/persistence.d.ts +4 -5
  11. package/dist/builder/persistence.js +20 -19
  12. package/dist/builder/types.d.ts +31 -24
  13. package/dist/completeDataset.steps.d.ts +9 -8
  14. package/dist/completeDataset.steps.js +18 -11
  15. package/dist/completeDataset.tool.d.ts +9 -8
  16. package/dist/completeDataset.tool.js +2 -1
  17. package/dist/contextWorkspace.d.ts +72 -0
  18. package/dist/contextWorkspace.js +218 -0
  19. package/dist/dataset.d.ts +1 -1
  20. package/dist/dataset.js +42 -29
  21. package/dist/datasetFiles.d.ts +1 -1
  22. package/dist/datasetFiles.js +3 -3
  23. package/dist/executeCommand.tool.d.ts +1 -43
  24. package/dist/executeCommand.tool.js +10 -3
  25. package/dist/file/file-dataset.agent.d.ts +2 -0
  26. package/dist/file/file-dataset.agent.js +51 -16
  27. package/dist/file/file-dataset.steps.d.ts +6 -0
  28. package/dist/file/file-dataset.steps.js +18 -21
  29. package/dist/file/file-dataset.types.d.ts +10 -0
  30. package/dist/file/prompts.js +16 -14
  31. package/dist/index.d.ts +1 -0
  32. package/dist/index.js +1 -0
  33. package/dist/materializeDataset.tool.d.ts +34 -26
  34. package/dist/materializeDataset.tool.js +40 -29
  35. package/dist/schema.d.ts +12 -2
  36. package/dist/schema.js +6 -3
  37. package/dist/service.d.ts +2 -2
  38. package/dist/service.js +6 -3
  39. package/dist/transform/filepreview.d.ts +2 -2
  40. package/dist/transform/filepreview.js +3 -3
  41. package/dist/transform/prompts.js +25 -25
  42. package/dist/transform/transform-dataset.agent.d.ts +4 -4
  43. package/dist/transform/transform-dataset.agent.js +29 -30
  44. package/dist/transform/transform-dataset.steps.d.ts +7 -7
  45. package/dist/transform/transform-dataset.steps.js +20 -20
  46. package/dist/transform/transform-dataset.types.d.ts +13 -13
  47. package/dist/transform/transformDataset.js +4 -4
  48. package/package.json +4 -4
  49. /package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -0
  50. /package/dist/builder/{sourceRows.js → rows.js} +0 -0
@@ -7,26 +7,30 @@ declare const materializeDatasetToolInputSchema: z.ZodObject<{
7
7
  datasetId: z.ZodOptional<z.ZodString>;
8
8
  sandboxId: z.ZodOptional<z.ZodString>;
9
9
  title: z.ZodOptional<z.ZodString>;
10
- sources: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
11
- kind: z.ZodLiteral<"file">;
12
- fileId: z.ZodString;
13
- description: z.ZodOptional<z.ZodString>;
10
+ context: z.ZodOptional<z.ZodUnion<readonly [z.ZodObject<{
11
+ id: z.ZodString;
14
12
  }, z.core.$strip>, z.ZodObject<{
15
- kind: z.ZodLiteral<"text">;
16
- text: z.ZodString;
17
- mimeType: z.ZodOptional<z.ZodString>;
13
+ key: z.ZodString;
14
+ }, z.core.$strip>]>>;
15
+ files: z.ZodOptional<z.ZodArray<z.ZodObject<{
16
+ description: z.ZodOptional<z.ZodString>;
17
+ fileId: z.ZodString;
18
+ }, z.core.$strip>>>;
19
+ texts: z.ZodOptional<z.ZodArray<z.ZodObject<{
18
20
  name: z.ZodOptional<z.ZodString>;
21
+ text: z.ZodString;
19
22
  description: z.ZodOptional<z.ZodString>;
20
- }, z.core.$strip>, z.ZodObject<{
21
- kind: z.ZodLiteral<"dataset">;
23
+ mimeType: z.ZodOptional<z.ZodString>;
24
+ }, z.core.$strip>>>;
25
+ datasets: z.ZodOptional<z.ZodArray<z.ZodObject<{
22
26
  datasetId: z.ZodString;
23
27
  description: z.ZodOptional<z.ZodString>;
24
- }, z.core.$strip>, z.ZodObject<{
25
- kind: z.ZodLiteral<"query">;
26
- query: z.ZodRecord<z.ZodString, z.ZodAny>;
28
+ }, z.core.$strip>>>;
29
+ queries: z.ZodOptional<z.ZodArray<z.ZodObject<{
27
30
  title: z.ZodOptional<z.ZodString>;
31
+ query: z.ZodRecord<z.ZodString, z.ZodAny>;
28
32
  explanation: z.ZodOptional<z.ZodString>;
29
- }, z.core.$strip>], "kind">>;
33
+ }, z.core.$strip>>>;
30
34
  instructions: z.ZodOptional<z.ZodString>;
31
35
  mode: z.ZodOptional<z.ZodEnum<{
32
36
  schema: "schema";
@@ -52,29 +56,33 @@ export declare function createMaterializeDatasetTool<Runtime extends AnyMaterial
52
56
  queryDomain: QueryDomain & CompatibleToolQueryDomain<Runtime, QueryDomain>;
53
57
  toolName?: string;
54
58
  }): import("ai").Tool<{
55
- sources: ({
56
- kind: "file";
59
+ datasetId?: string | undefined;
60
+ sandboxId?: string | undefined;
61
+ title?: string | undefined;
62
+ context?: {
63
+ id: string;
64
+ } | {
65
+ key: string;
66
+ } | undefined;
67
+ files?: {
57
68
  fileId: string;
58
69
  description?: string | undefined;
59
- } | {
60
- kind: "text";
70
+ }[] | undefined;
71
+ texts?: {
61
72
  text: string;
62
- mimeType?: string | undefined;
63
73
  name?: string | undefined;
64
74
  description?: string | undefined;
65
- } | {
66
- kind: "dataset";
75
+ mimeType?: string | undefined;
76
+ }[] | undefined;
77
+ datasets?: {
67
78
  datasetId: string;
68
79
  description?: string | undefined;
69
- } | {
70
- kind: "query";
80
+ }[] | undefined;
81
+ queries?: {
71
82
  query: Record<string, any>;
72
83
  title?: string | undefined;
73
84
  explanation?: string | undefined;
74
- })[];
75
- datasetId?: string | undefined;
76
- sandboxId?: string | undefined;
77
- title?: string | undefined;
85
+ }[] | undefined;
78
86
  instructions?: string | undefined;
79
87
  mode?: "schema" | "auto" | undefined;
80
88
  output?: "object" | "rows" | undefined;
@@ -1,29 +1,33 @@
1
1
  import { tool } from "ai";
2
2
  import { z } from "zod";
3
3
  import { dataset } from "./dataset.js";
4
- const fileSourceSchema = z.object({
4
+ const fileResourceSchema = z.object({
5
5
  kind: z.literal("file"),
6
6
  fileId: z.string(),
7
7
  description: z.string().optional(),
8
8
  });
9
- const textSourceSchema = z.object({
9
+ const textResourceSchema = z.object({
10
10
  kind: z.literal("text"),
11
11
  text: z.string(),
12
12
  mimeType: z.string().optional(),
13
13
  name: z.string().optional(),
14
14
  description: z.string().optional(),
15
15
  });
16
- const datasetSourceSchema = z.object({
16
+ const datasetResourceSchema = z.object({
17
17
  kind: z.literal("dataset"),
18
18
  datasetId: z.string(),
19
19
  description: z.string().optional(),
20
20
  });
21
- const querySourceSchema = z.object({
21
+ const queryResourceSchema = z.object({
22
22
  kind: z.literal("query"),
23
23
  query: z.record(z.string(), z.any()),
24
24
  title: z.string().optional(),
25
25
  explanation: z.string().optional(),
26
26
  });
27
+ const contextInputSchema = z.union([
28
+ z.object({ id: z.string() }),
29
+ z.object({ key: z.string() }),
30
+ ]);
27
31
  const datasetSchemaSchema = z.object({
28
32
  title: z.string().optional(),
29
33
  description: z.string().optional(),
@@ -33,14 +37,11 @@ const materializeDatasetToolInputSchema = z.object({
33
37
  datasetId: z.string().optional(),
34
38
  sandboxId: z.string().optional(),
35
39
  title: z.string().optional(),
36
- sources: z
37
- .array(z.discriminatedUnion("kind", [
38
- fileSourceSchema,
39
- textSourceSchema,
40
- datasetSourceSchema,
41
- querySourceSchema,
42
- ]))
43
- .min(1),
40
+ context: contextInputSchema.optional(),
41
+ files: z.array(fileResourceSchema.omit({ kind: true })).optional(),
42
+ texts: z.array(textResourceSchema.omit({ kind: true })).optional(),
43
+ datasets: z.array(datasetResourceSchema.omit({ kind: true })).optional(),
44
+ queries: z.array(queryResourceSchema.omit({ kind: true })).optional(),
44
45
  instructions: z.string().optional(),
45
46
  mode: z.enum(["auto", "schema"]).optional(),
46
47
  output: z.enum(["rows", "object"]).optional(),
@@ -49,7 +50,7 @@ const materializeDatasetToolInputSchema = z.object({
49
50
  });
50
51
  export function createMaterializeDatasetTool(params) {
51
52
  return tool({
52
- description: "Materialize a dataset from declarative sources. Returns only the target datasetId. Query sources use the preconfigured runtime domain.",
53
+ description: "Materialize a dataset from declarative resources. Returns only the target datasetId. Query resources use the preconfigured runtime domain.",
53
54
  inputSchema: materializeDatasetToolInputSchema,
54
55
  execute: async (input) => {
55
56
  let builder = dataset(params.runtime);
@@ -59,23 +60,33 @@ export function createMaterializeDatasetTool(params) {
59
60
  if (input.sandboxId?.trim()) {
60
61
  builder = builder.sandbox({ sandboxId: input.sandboxId });
61
62
  }
62
- for (const source of input.sources) {
63
- if (source.kind === "file") {
64
- builder = builder.fromFile(source);
65
- continue;
66
- }
67
- if (source.kind === "text") {
68
- builder = builder.fromText(source);
69
- continue;
70
- }
71
- if (source.kind === "dataset") {
72
- builder = builder.fromDataset(source);
73
- continue;
74
- }
63
+ const materialCount = (input.files?.length ?? 0) +
64
+ (input.texts?.length ?? 0) +
65
+ (input.datasets?.length ?? 0) +
66
+ (input.queries?.length ?? 0);
67
+ if (input.context && materialCount > 0) {
68
+ throw new Error("dataset_context_resource_is_exclusive");
69
+ }
70
+ if (!input.context && materialCount === 0) {
71
+ throw new Error("dataset_context_or_material_required");
72
+ }
73
+ if (input.context) {
74
+ builder = builder.fromContext(input.context);
75
+ }
76
+ for (const resource of input.files ?? []) {
77
+ builder = builder.fromFile(resource);
78
+ }
79
+ for (const resource of input.texts ?? []) {
80
+ builder = builder.fromText(resource);
81
+ }
82
+ for (const resource of input.datasets ?? []) {
83
+ builder = builder.fromDataset(resource);
84
+ }
85
+ for (const resource of input.queries ?? []) {
75
86
  builder = builder.fromQuery(params.queryDomain, {
76
- query: source.query,
77
- title: source.title,
78
- explanation: source.explanation,
87
+ query: resource.query,
88
+ title: resource.title,
89
+ explanation: resource.explanation,
79
90
  });
80
91
  }
81
92
  if (input.output === "object") {
package/dist/schema.d.ts CHANGED
@@ -8,8 +8,6 @@ declare const entities: {
8
8
  updatedAt: import("@instantdb/core").DataAttrDef<number, false, false, false>;
9
9
  organizationId: import("@instantdb/core").DataAttrDef<string, false, true, false>;
10
10
  title: import("@instantdb/core").DataAttrDef<string, false, false, false>;
11
- sources: import("@instantdb/core").DataAttrDef<any, false, false, false>;
12
- sourceKinds: import("@instantdb/core").DataAttrDef<any, false, false, false>;
13
11
  instructions: import("@instantdb/core").DataAttrDef<string, false, false, false>;
14
12
  analysis: import("@instantdb/core").DataAttrDef<any, false, false, false>;
15
13
  schema: import("@instantdb/core").DataAttrDef<any, false, false, false>;
@@ -47,6 +45,18 @@ declare const links: {
47
45
  readonly label: "datasets";
48
46
  };
49
47
  };
48
+ readonly dataset_datasetsContext: {
49
+ readonly forward: {
50
+ readonly on: "dataset_datasets";
51
+ readonly has: "one";
52
+ readonly label: "context";
53
+ };
54
+ readonly reverse: {
55
+ readonly on: "event_contexts";
56
+ readonly has: "many";
57
+ readonly label: "datasets";
58
+ };
59
+ };
50
60
  };
51
61
  declare const rooms: {};
52
62
  export declare const datasetDomain: DomainSchemaResult<typeof entities, typeof links, typeof rooms, {}, "dataset", "dataset">;
package/dist/schema.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { i } from "@instantdb/core";
2
2
  import { domain } from "@ekairos/domain";
3
+ import { eventsDomain } from "@ekairos/events";
3
4
  const entities = {
4
5
  dataset_datasets: i.entity({
5
6
  datasetId: i.string().unique().indexed(),
@@ -9,8 +10,6 @@ const entities = {
9
10
  updatedAt: i.number().optional(),
10
11
  organizationId: i.string().optional().indexed(),
11
12
  title: i.string().optional(),
12
- sources: i.json().optional(),
13
- sourceKinds: i.json().optional(),
14
13
  instructions: i.string().optional(),
15
14
  analysis: i.json().optional(),
16
15
  schema: i.json().optional(),
@@ -32,9 +31,13 @@ const links = {
32
31
  forward: { on: "dataset_datasets", has: "one", label: "dataFile" },
33
32
  reverse: { on: "$files", has: "many", label: "datasets" },
34
33
  },
34
+ dataset_datasetsContext: {
35
+ forward: { on: "dataset_datasets", has: "one", label: "context" },
36
+ reverse: { on: "event_contexts", has: "many", label: "datasets" },
37
+ },
35
38
  };
36
39
  const rooms = {};
37
- export const datasetDomain = domain("dataset").withSchema({
40
+ export const datasetDomain = domain("dataset").includes(eventsDomain).withSchema({
38
41
  entities,
39
42
  links,
40
43
  rooms,
package/dist/service.d.ts CHANGED
@@ -15,8 +15,7 @@ export declare class DatasetService {
15
15
  private resolveDatasetEntityId;
16
16
  createDataset(params: {
17
17
  id?: string;
18
- sources?: any;
19
- sourceKinds?: any;
18
+ contextId?: string;
20
19
  instructions?: string;
21
20
  status?: string;
22
21
  organizationId?: string;
@@ -64,6 +63,7 @@ export declare class DatasetService {
64
63
  uploadDatasetOutputFile(params: {
65
64
  datasetId: string;
66
65
  fileBuffer: Buffer;
66
+ storagePath?: string;
67
67
  }): Promise<ServiceResult<{
68
68
  fileId: string;
69
69
  storagePath: string;
package/dist/service.js CHANGED
@@ -28,18 +28,21 @@ export class DatasetService {
28
28
  async createDataset(params) {
29
29
  try {
30
30
  const datasetId = params.id ?? createDatasetId();
31
+ const { id: _id, contextId, ...attrs } = params;
31
32
  const existing = await this.resolveDatasetEntityId(datasetId);
32
33
  const entityId = existing.ok ? existing.data : createDatasetId();
33
34
  const mutations = [];
34
35
  mutations.push(this.db.tx.dataset_datasets[entityId].update({
35
36
  datasetId,
36
- sources: params.sources ?? "",
37
37
  instructions: params.instructions ?? "",
38
38
  status: params.status ?? "created",
39
39
  createdAt: Date.now(),
40
40
  updatedAt: Date.now(),
41
- ...params,
41
+ ...attrs,
42
42
  }));
43
+ if (contextId) {
44
+ mutations.push(this.db.tx.dataset_datasets[entityId].link({ context: contextId }));
45
+ }
43
46
  await this.db.transact(mutations);
44
47
  return { ok: true, data: { datasetId } };
45
48
  }
@@ -308,7 +311,7 @@ export class DatasetService {
308
311
  }
309
312
  async uploadDatasetOutputFile(params) {
310
313
  try {
311
- const storagePath = `/dataset/${params.datasetId}/output.jsonl`;
314
+ const storagePath = params.storagePath ?? `/dataset/${params.datasetId}/output.jsonl`;
312
315
  const uploadResult = await this.db.storage.uploadFile(storagePath, params.fileBuffer, {
313
316
  contentType: "application/x-ndjson",
314
317
  contentDisposition: "output.jsonl",
@@ -1,4 +1,4 @@
1
- export type TransformSourcePreviewContext = {
1
+ export type TransformInputPreviewContext = {
2
2
  totalRows: number;
3
3
  metadata?: {
4
4
  description: string;
@@ -18,5 +18,5 @@ export type TransformSourcePreviewContext = {
18
18
  interface PreviewOptions {
19
19
  headLines?: number;
20
20
  }
21
- export declare function generateSourcePreview(runtime: any, sandboxId: string, sourcePath: string, datasetId: string, options?: PreviewOptions): Promise<TransformSourcePreviewContext>;
21
+ export declare function generateInputPreview(runtime: any, sandboxId: string, inputPath: string, datasetId: string, options?: PreviewOptions): Promise<TransformInputPreviewContext>;
22
22
  export {};
@@ -17,7 +17,7 @@ async function runPythonSnippet(runtime, sandboxId, datasetId, scriptName, code,
17
17
  stderr,
18
18
  };
19
19
  }
20
- export async function generateSourcePreview(runtime, sandboxId, sourcePath, datasetId, options = {}) {
20
+ export async function generateInputPreview(runtime, sandboxId, inputPath, datasetId, options = {}) {
21
21
  const context = {
22
22
  totalRows: 0,
23
23
  };
@@ -41,7 +41,7 @@ try:
41
41
  except Exception as e:
42
42
  print(str(e))
43
43
  `;
44
- const meta = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_count", countScript, [sourcePath], "Counts number of JSONL records with type='row'");
44
+ const meta = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_count", countScript, [inputPath], "Counts number of JSONL records with type='row'");
45
45
  context.metadata = meta;
46
46
  try {
47
47
  if (meta.stdout) {
@@ -76,7 +76,7 @@ try:
76
76
  except Exception as e:
77
77
  print(str(e))
78
78
  `;
79
- const head = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_head", headScript, [sourcePath, String(headLines)], `Reads the first ${headLines} JSONL row records`);
79
+ const head = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_head", headScript, [inputPath, String(headLines)], `Reads the first ${headLines} JSONL row records`);
80
80
  context.head = head;
81
81
  return context;
82
82
  }
@@ -9,7 +9,7 @@ function buildRole() {
9
9
  function buildGoal() {
10
10
  let xml = create()
11
11
  .ele("Goal")
12
- .txt("Transform the source dataset(s) (JSONL with {type:'row', data:{...}} per line) into a new dataset strictly matching the output schema. Save to output.jsonl in the dataset workstation. Each line must remain a single JSON object representing one record. You may need to combine, filter, or reshape data from multiple source datasets.")
12
+ .txt("Transform the input dataset(s) (JSONL with {type:'row', data:{...}} per line) into a new dataset strictly matching the output schema. Save to output.jsonl in the dataset workstation. Each line must remain a single JSON object representing one record. You may need to combine, filter, or reshape data from multiple input datasets.")
13
13
  .up();
14
14
  return xml.end({ prettyPrint: true, headless: true });
15
15
  }
@@ -17,26 +17,26 @@ function buildContextSection(context) {
17
17
  let xml = create()
18
18
  .ele("Context")
19
19
  .ele("DatasetId").txt(context.datasetId).up();
20
- let sourcesXml = create().ele("SourceDatasets");
21
- for (const sourceId of context.sourceDatasetIds) {
22
- sourcesXml = sourcesXml.ele("SourceDatasetId").txt(sourceId).up();
20
+ let inputsXml = create().ele("InputDatasets");
21
+ for (const sourceId of context.inputDatasetIds) {
22
+ inputsXml = inputsXml.ele("InputDatasetId").txt(sourceId).up();
23
23
  }
24
- xml = xml.import(sourcesXml.first());
24
+ xml = xml.import(inputsXml.first());
25
25
  let sandboxXml = create().ele("Sandbox");
26
- for (const sourcePathInfo of context.sandboxConfig.sourcePaths) {
27
- sandboxXml = sandboxXml.ele("SourceFile")
28
- .ele("DatasetId").txt(sourcePathInfo.datasetId).up()
29
- .ele("Path").txt(sourcePathInfo.path).up()
26
+ for (const inputPathInfo of context.sandboxConfig.inputPaths) {
27
+ sandboxXml = sandboxXml.ele("InputFile")
28
+ .ele("DatasetId").txt(inputPathInfo.datasetId).up()
29
+ .ele("Path").txt(inputPathInfo.path).up()
30
30
  .up();
31
31
  }
32
32
  sandboxXml = sandboxXml.ele("OutputPath").txt(context.sandboxConfig.outputPath).up();
33
33
  xml = xml.import(sandboxXml.first());
34
- if (context.sourcePreviews && context.sourcePreviews.length > 0) {
35
- let previewsXml = create().ele("SourcePreviews");
36
- for (const sourcePreviewInfo of context.sourcePreviews) {
37
- const sp = sourcePreviewInfo.preview;
38
- let px = create().ele("SourcePreview")
39
- .ele("DatasetId").txt(sourcePreviewInfo.datasetId).up()
34
+ if (context.inputPreviews && context.inputPreviews.length > 0) {
35
+ let previewsXml = create().ele("InputPreviews");
36
+ for (const inputPreviewInfo of context.inputPreviews) {
37
+ const sp = inputPreviewInfo.preview;
38
+ let px = create().ele("InputPreview")
39
+ .ele("DatasetId").txt(inputPreviewInfo.datasetId).up()
40
40
  .ele("TotalRows").txt(String(sp.totalRows)).up();
41
41
  if (sp.metadata) {
42
42
  const m = sp.metadata;
@@ -86,21 +86,21 @@ function buildOutputSchemaSection(context) {
86
86
  }
87
87
  function buildInstructions(context) {
88
88
  const outputPath = context.sandboxConfig.outputPath;
89
- const multipleSourcesNote = context.sourceDatasetIds.length > 1
90
- ? "You have multiple source datasets available. You may need to read, join, filter, or combine data from them to produce the output."
89
+ const multipleInputsNote = context.inputDatasetIds.length > 1
90
+ ? "You have multiple input datasets available. You may need to read, join, filter, or combine data from them to produce the output."
91
91
  : "";
92
92
  let xml = create()
93
93
  .ele("Instructions")
94
94
  .ele("Workflow")
95
- .ele("Step", { number: "1", name: "Inspect Source" })
96
- .ele("Action").txt(`Review SourcePreviews to understand current record structures (data fields, shapes, edge cases). ${multipleSourcesNote}`).up()
95
+ .ele("Step", { number: "1", name: "Inspect Inputs" })
96
+ .ele("Action").txt(`Review InputPreviews to understand current record structures (data fields, shapes, edge cases). ${multipleInputsNote}`).up()
97
97
  .up()
98
98
  .ele("Step", { number: "2", name: "Plan Mapping" })
99
- .ele("Action").txt("Plan a deterministic mapping from source data fields to the output schema fields (normalize names, types, and formats).").up()
100
- .ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple sources, decide how to combine or relate them. Output field names must remain exactly as declared by the output schema.").up()
99
+ .ele("Action").txt("Plan a deterministic mapping from input data fields to the output schema fields (normalize names, types, and formats).").up()
100
+ .ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple inputs, decide how to combine or relate them. Output field names must remain exactly as declared by the output schema.").up()
101
101
  .up()
102
102
  .ele("Step", { number: "3", name: "Transform" })
103
- .ele("Action").txt("Use executeCommand to run a Python script that reads source JSONL file(s) and writes transformed records to output.jsonl. Keep line-per-record JSON objects with { 'type': 'row', 'data': { ... } }.").up()
103
+ .ele("Action").txt("Use executeCommand to run a Python script that reads input JSONL file(s) and writes transformed records to output.jsonl. Keep line-per-record JSON objects with { 'type': 'row', 'data': { ... } }.").up()
104
104
  .ele("Requirement").txt(`Write file to: ${outputPath}`).up()
105
105
  .ele("Requirement").txt("Every data object MUST use the exact property names from OutputSchema required/properties keys. Do not translate, localize, rename, or infer alternative field names.").up()
106
106
  .ele("Requirement").txt("Do not print large data to stdout; only progress and summaries.").up()
@@ -112,12 +112,12 @@ function buildInstructions(context) {
112
112
  .up()
113
113
  .ele("Rules")
114
114
  .ele("Rule").txt("Output must strictly match the output schema for each record in data.").up()
115
- .ele("Rule").txt("OutputSchema property names are authoritative. Field names are a technical contract; only field values may preserve source language.").up()
115
+ .ele("Rule").txt("OutputSchema property names are authoritative. Field names are a technical contract; only field values may preserve input language.").up()
116
116
  .ele("Rule").txt("Each line in output.jsonl must be a standalone JSON object with {type:'row', data:{...}}.").up()
117
117
  .ele("Rule").txt("Do not include headers, summaries, or metadata as records.").up()
118
- .ele("Rule").txt("Be robust to malformed lines in source: skip or sanitize, but do not crash.").up()
118
+ .ele("Rule").txt("Be robust to malformed lines in input: skip or sanitize, but do not crash.").up()
119
119
  .up()
120
- .ele("CurrentTask").txt("Transform source dataset(s) to match OutputSchema and write output.jsonl, then complete.").up()
120
+ .ele("CurrentTask").txt("Transform input dataset(s) to match OutputSchema and write output.jsonl, then complete.").up()
121
121
  .up();
122
122
  return xml.end({ prettyPrint: true, headless: true });
123
123
  }
@@ -1,10 +1,10 @@
1
1
  import { type ContextReactor } from "@ekairos/events";
2
- import type { TransformDatasetRunOptions, TransformSandboxState, TransformSourcePreviewContext } from "./transform-dataset.types.js";
2
+ import type { TransformDatasetRunOptions, TransformSandboxState, TransformInputPreviewContext } from "./transform-dataset.types.js";
3
3
  export type { TransformDatasetAgentParams, TransformDatasetContext, TransformDatasetResult, TransformDatasetRunOptions, TransformPromptContext, TransformSandboxState, } from "./transform-dataset.types.js";
4
4
  export declare function createTransformDatasetContext<Env extends {
5
5
  orgId: string;
6
6
  }>(params: {
7
- sourceDatasetIds: string[];
7
+ inputDatasetIds: string[];
8
8
  outputSchema: any;
9
9
  instructions?: string;
10
10
  datasetId?: string;
@@ -12,9 +12,9 @@ export declare function createTransformDatasetContext<Env extends {
12
12
  sandboxId?: string;
13
13
  reactor?: ContextReactor<any, any>;
14
14
  sandboxState?: TransformSandboxState;
15
- sourcePreviews?: Array<{
15
+ inputPreviews?: Array<{
16
16
  datasetId: string;
17
- preview: TransformSourcePreviewContext;
17
+ preview: TransformInputPreviewContext;
18
18
  }>;
19
19
  }): {
20
20
  datasetId: string;
@@ -4,7 +4,7 @@ import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFa
4
4
  import { datasetUpdateSchemaStep } from "../dataset/steps.js";
5
5
  import { getDatasetOutputPath } from "../datasetFiles.js";
6
6
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
7
- import { buildTransformDatasetPromptStep, ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "./transform-dataset.steps.js";
7
+ import { buildTransformDatasetPromptStep, ensureTransformInputsInSandboxStep, generateTransformInputPreviewsStep, } from "./transform-dataset.steps.js";
8
8
  import { createDatasetId } from "../id.js";
9
9
  async function awaitContextRun(run) {
10
10
  if (!run)
@@ -22,12 +22,12 @@ function createTransformDatasetContextDefinition(params) {
22
22
  .context(async (stored, _env, runtime) => {
23
23
  const previous = stored?.content ?? {};
24
24
  const sandboxState = previous?.sandboxState ??
25
- params.sandboxState ?? { initialized: false, sourcePaths: [] };
25
+ params.sandboxState ?? { initialized: false, inputPaths: [] };
26
26
  const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
27
- const sourceDatasetIds = Array.isArray(previous?.sourceDatasetIds)
28
- ? previous.sourceDatasetIds
29
- : Array.isArray(params.sourceDatasetIds)
30
- ? params.sourceDatasetIds
27
+ const inputDatasetIds = Array.isArray(previous?.inputDatasetIds)
28
+ ? previous.inputDatasetIds
29
+ : Array.isArray(params.inputDatasetIds)
30
+ ? params.inputDatasetIds
31
31
  : [];
32
32
  const outputSchema = previous?.outputSchema ?? params.outputSchema;
33
33
  const instructions = previous?.instructions ?? params.instructions;
@@ -35,8 +35,8 @@ function createTransformDatasetContextDefinition(params) {
35
35
  if (!datasetId) {
36
36
  throw new Error("dataset_id_required");
37
37
  }
38
- if (sourceDatasetIds.length === 0) {
39
- throw new Error("dataset_transform_sources_required");
38
+ if (inputDatasetIds.length === 0) {
39
+ throw new Error("dataset_transform_inputs_required");
40
40
  }
41
41
  if (!outputSchema) {
42
42
  throw new Error("dataset_transform_schema_required");
@@ -44,26 +44,26 @@ function createTransformDatasetContextDefinition(params) {
44
44
  if (!sandboxId) {
45
45
  throw new Error("dataset_sandbox_required");
46
46
  }
47
- const initialized = sandboxState.initialized && Array.isArray(sandboxState.sourcePaths)
47
+ const initialized = sandboxState.initialized && Array.isArray(sandboxState.inputPaths)
48
48
  ? {
49
- sourcePaths: sandboxState.sourcePaths,
49
+ inputPaths: sandboxState.inputPaths,
50
50
  outputPath: previous?.sandboxConfig?.outputPath ?? getDatasetOutputPath(datasetId),
51
51
  state: sandboxState,
52
52
  }
53
- : await ensureTransformSourcesInSandboxStep({
53
+ : await ensureTransformInputsInSandboxStep({
54
54
  runtime,
55
55
  sandboxId,
56
56
  datasetId,
57
- sourceDatasetIds,
57
+ inputDatasetIds,
58
58
  state: sandboxState,
59
59
  });
60
- let sourcePreviews = previous?.sourcePreviews ?? params.sourcePreviews ?? undefined;
61
- if (!sourcePreviews) {
62
- sourcePreviews = await generateTransformSourcePreviewsStep({
60
+ let inputPreviews = previous?.inputPreviews ?? params.inputPreviews ?? undefined;
61
+ if (!inputPreviews) {
62
+ inputPreviews = await generateTransformInputPreviewsStep({
63
63
  runtime,
64
64
  sandboxId,
65
65
  datasetId,
66
- sourcePaths: initialized.sourcePaths,
66
+ inputPaths: initialized.inputPaths,
67
67
  });
68
68
  }
69
69
  await datasetUpdateSchemaStep({
@@ -74,13 +74,13 @@ function createTransformDatasetContextDefinition(params) {
74
74
  });
75
75
  const promptContext = {
76
76
  datasetId,
77
- sourceDatasetIds,
77
+ inputDatasetIds,
78
78
  outputSchema,
79
79
  sandboxConfig: {
80
- sourcePaths: initialized.sourcePaths,
80
+ inputPaths: initialized.inputPaths,
81
81
  outputPath: initialized.outputPath,
82
82
  },
83
- sourcePreviews: sourcePreviews.length > 0 ? sourcePreviews : undefined,
83
+ inputPreviews: inputPreviews.length > 0 ? inputPreviews : undefined,
84
84
  errors: [],
85
85
  };
86
86
  const basePrompt = await buildTransformDatasetPromptStep({
@@ -100,14 +100,14 @@ function createTransformDatasetContextDefinition(params) {
100
100
  return {
101
101
  ...previous,
102
102
  datasetId,
103
- sourceDatasetIds,
103
+ inputDatasetIds,
104
104
  outputSchema,
105
105
  instructions,
106
106
  sandboxId,
107
107
  sandboxState: initialized.state,
108
108
  system,
109
109
  sandboxConfig: {
110
- sourcePaths: initialized.sourcePaths,
110
+ inputPaths: initialized.inputPaths,
111
111
  outputPath: initialized.outputPath,
112
112
  },
113
113
  };
@@ -159,7 +159,7 @@ function createTransformDatasetContextDefinition(params) {
159
159
  export function createTransformDatasetContext(params) {
160
160
  const datasetId = params.datasetId ?? createDatasetId();
161
161
  const { context } = createTransformDatasetContextDefinition({
162
- sourceDatasetIds: params.sourceDatasetIds,
162
+ inputDatasetIds: params.inputDatasetIds,
163
163
  outputSchema: params.outputSchema,
164
164
  instructions: params.instructions,
165
165
  datasetId,
@@ -167,14 +167,14 @@ export function createTransformDatasetContext(params) {
167
167
  sandboxId: params.sandboxId,
168
168
  reactor: params.reactor,
169
169
  sandboxState: params.sandboxState,
170
- sourcePreviews: params.sourcePreviews,
170
+ inputPreviews: params.inputPreviews,
171
171
  });
172
172
  return {
173
173
  datasetId,
174
174
  async transform(runtime, options = {}) {
175
- const datasetCountText = params.sourceDatasetIds.length === 1
176
- ? "the source dataset"
177
- : `${params.sourceDatasetIds.length} source datasets`;
175
+ const datasetCountText = params.inputDatasetIds.length === 1
176
+ ? "the input dataset"
177
+ : `${params.inputDatasetIds.length} input datasets`;
178
178
  const triggerEvent = {
179
179
  id: createDatasetId(),
180
180
  type: INPUT_TEXT_ITEM_TYPE,
@@ -195,7 +195,6 @@ export function createTransformDatasetContext(params) {
195
195
  context: { key: `dataset:${datasetId}` },
196
196
  durable: options.durable ?? false,
197
197
  options: {
198
- silent: true,
199
198
  preventClose: true,
200
199
  sendFinish: false,
201
200
  maxIterations: 20,
@@ -204,12 +203,12 @@ export function createTransformDatasetContext(params) {
204
203
  __initialContent: {
205
204
  ...(options.initialContent ?? {}),
206
205
  datasetId,
207
- sourceDatasetIds: params.sourceDatasetIds,
206
+ inputDatasetIds: params.inputDatasetIds,
208
207
  outputSchema: params.outputSchema,
209
208
  instructions: params.instructions,
210
209
  sandboxId: params.sandboxId ?? "",
211
- sandboxState: params.sandboxState ?? { initialized: false, sourcePaths: [] },
212
- sourcePreviews: params.sourcePreviews,
210
+ sandboxState: params.sandboxState ?? { initialized: false, inputPaths: [] },
211
+ inputPreviews: params.inputPreviews,
213
212
  },
214
213
  });
215
214
  await awaitContextRun(shell.run);