@ekairos/dataset 1.22.83-beta.development.0 → 1.22.84-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/builder/agentMaterializers.d.ts +2 -2
  2. package/dist/builder/context.d.ts +7 -0
  3. package/dist/builder/context.js +192 -0
  4. package/dist/builder/instructions.d.ts +3 -3
  5. package/dist/builder/instructions.js +10 -10
  6. package/dist/builder/materialize.d.ts +10 -11
  7. package/dist/builder/materialize.js +116 -113
  8. package/dist/builder/materializeQuery.d.ts +3 -2
  9. package/dist/builder/materializeQuery.js +10 -19
  10. package/dist/builder/persistence.d.ts +4 -5
  11. package/dist/builder/persistence.js +20 -19
  12. package/dist/builder/types.d.ts +29 -24
  13. package/dist/completeDataset.steps.js +1 -1
  14. package/dist/dataset.d.ts +1 -1
  15. package/dist/dataset.js +42 -29
  16. package/dist/datasetFiles.d.ts +1 -1
  17. package/dist/datasetFiles.js +3 -3
  18. package/dist/file/file-dataset.agent.js +3 -4
  19. package/dist/file/prompts.js +12 -12
  20. package/dist/materializeDataset.tool.d.ts +34 -26
  21. package/dist/materializeDataset.tool.js +40 -29
  22. package/dist/schema.d.ts +12 -2
  23. package/dist/schema.js +6 -3
  24. package/dist/service.d.ts +1 -2
  25. package/dist/service.js +5 -2
  26. package/dist/transform/filepreview.d.ts +2 -2
  27. package/dist/transform/filepreview.js +3 -3
  28. package/dist/transform/prompts.js +25 -25
  29. package/dist/transform/transform-dataset.agent.d.ts +4 -4
  30. package/dist/transform/transform-dataset.agent.js +29 -30
  31. package/dist/transform/transform-dataset.steps.d.ts +7 -7
  32. package/dist/transform/transform-dataset.steps.js +20 -20
  33. package/dist/transform/transform-dataset.types.d.ts +13 -13
  34. package/dist/transform/transformDataset.js +4 -4
  35. package/package.json +4 -4
  36. /package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -0
  37. /package/dist/builder/{sourceRows.js → rows.js} +0 -0
package/dist/schema.d.ts CHANGED
@@ -8,8 +8,6 @@ declare const entities: {
8
8
  updatedAt: import("@instantdb/core").DataAttrDef<number, false, false, false>;
9
9
  organizationId: import("@instantdb/core").DataAttrDef<string, false, true, false>;
10
10
  title: import("@instantdb/core").DataAttrDef<string, false, false, false>;
11
- sources: import("@instantdb/core").DataAttrDef<any, false, false, false>;
12
- sourceKinds: import("@instantdb/core").DataAttrDef<any, false, false, false>;
13
11
  instructions: import("@instantdb/core").DataAttrDef<string, false, false, false>;
14
12
  analysis: import("@instantdb/core").DataAttrDef<any, false, false, false>;
15
13
  schema: import("@instantdb/core").DataAttrDef<any, false, false, false>;
@@ -47,6 +45,18 @@ declare const links: {
47
45
  readonly label: "datasets";
48
46
  };
49
47
  };
48
+ readonly dataset_datasetsContext: {
49
+ readonly forward: {
50
+ readonly on: "dataset_datasets";
51
+ readonly has: "one";
52
+ readonly label: "context";
53
+ };
54
+ readonly reverse: {
55
+ readonly on: "event_contexts";
56
+ readonly has: "many";
57
+ readonly label: "datasets";
58
+ };
59
+ };
50
60
  };
51
61
  declare const rooms: {};
52
62
  export declare const datasetDomain: DomainSchemaResult<typeof entities, typeof links, typeof rooms, {}, "dataset", "dataset">;
package/dist/schema.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { i } from "@instantdb/core";
2
2
  import { domain } from "@ekairos/domain";
3
+ import { eventsDomain } from "@ekairos/events";
3
4
  const entities = {
4
5
  dataset_datasets: i.entity({
5
6
  datasetId: i.string().unique().indexed(),
@@ -9,8 +10,6 @@ const entities = {
9
10
  updatedAt: i.number().optional(),
10
11
  organizationId: i.string().optional().indexed(),
11
12
  title: i.string().optional(),
12
- sources: i.json().optional(),
13
- sourceKinds: i.json().optional(),
14
13
  instructions: i.string().optional(),
15
14
  analysis: i.json().optional(),
16
15
  schema: i.json().optional(),
@@ -32,9 +31,13 @@ const links = {
32
31
  forward: { on: "dataset_datasets", has: "one", label: "dataFile" },
33
32
  reverse: { on: "$files", has: "many", label: "datasets" },
34
33
  },
34
+ dataset_datasetsContext: {
35
+ forward: { on: "dataset_datasets", has: "one", label: "context" },
36
+ reverse: { on: "event_contexts", has: "many", label: "datasets" },
37
+ },
35
38
  };
36
39
  const rooms = {};
37
- export const datasetDomain = domain("dataset").withSchema({
40
+ export const datasetDomain = domain("dataset").includes(eventsDomain).withSchema({
38
41
  entities,
39
42
  links,
40
43
  rooms,
package/dist/service.d.ts CHANGED
@@ -15,8 +15,7 @@ export declare class DatasetService {
15
15
  private resolveDatasetEntityId;
16
16
  createDataset(params: {
17
17
  id?: string;
18
- sources?: any;
19
- sourceKinds?: any;
18
+ contextId?: string;
20
19
  instructions?: string;
21
20
  status?: string;
22
21
  organizationId?: string;
package/dist/service.js CHANGED
@@ -28,18 +28,21 @@ export class DatasetService {
28
28
  async createDataset(params) {
29
29
  try {
30
30
  const datasetId = params.id ?? createDatasetId();
31
+ const { id: _id, contextId, ...attrs } = params;
31
32
  const existing = await this.resolveDatasetEntityId(datasetId);
32
33
  const entityId = existing.ok ? existing.data : createDatasetId();
33
34
  const mutations = [];
34
35
  mutations.push(this.db.tx.dataset_datasets[entityId].update({
35
36
  datasetId,
36
- sources: params.sources ?? "",
37
37
  instructions: params.instructions ?? "",
38
38
  status: params.status ?? "created",
39
39
  createdAt: Date.now(),
40
40
  updatedAt: Date.now(),
41
- ...params,
41
+ ...attrs,
42
42
  }));
43
+ if (contextId) {
44
+ mutations.push(this.db.tx.dataset_datasets[entityId].link({ context: contextId }));
45
+ }
43
46
  await this.db.transact(mutations);
44
47
  return { ok: true, data: { datasetId } };
45
48
  }
@@ -1,4 +1,4 @@
1
- export type TransformSourcePreviewContext = {
1
+ export type TransformInputPreviewContext = {
2
2
  totalRows: number;
3
3
  metadata?: {
4
4
  description: string;
@@ -18,5 +18,5 @@ export type TransformSourcePreviewContext = {
18
18
  interface PreviewOptions {
19
19
  headLines?: number;
20
20
  }
21
- export declare function generateSourcePreview(runtime: any, sandboxId: string, sourcePath: string, datasetId: string, options?: PreviewOptions): Promise<TransformSourcePreviewContext>;
21
+ export declare function generateInputPreview(runtime: any, sandboxId: string, inputPath: string, datasetId: string, options?: PreviewOptions): Promise<TransformInputPreviewContext>;
22
22
  export {};
@@ -17,7 +17,7 @@ async function runPythonSnippet(runtime, sandboxId, datasetId, scriptName, code,
17
17
  stderr,
18
18
  };
19
19
  }
20
- export async function generateSourcePreview(runtime, sandboxId, sourcePath, datasetId, options = {}) {
20
+ export async function generateInputPreview(runtime, sandboxId, inputPath, datasetId, options = {}) {
21
21
  const context = {
22
22
  totalRows: 0,
23
23
  };
@@ -41,7 +41,7 @@ try:
41
41
  except Exception as e:
42
42
  print(str(e))
43
43
  `;
44
- const meta = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_count", countScript, [sourcePath], "Counts number of JSONL records with type='row'");
44
+ const meta = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_count", countScript, [inputPath], "Counts number of JSONL records with type='row'");
45
45
  context.metadata = meta;
46
46
  try {
47
47
  if (meta.stdout) {
@@ -76,7 +76,7 @@ try:
76
76
  except Exception as e:
77
77
  print(str(e))
78
78
  `;
79
- const head = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_head", headScript, [sourcePath, String(headLines)], `Reads the first ${headLines} JSONL row records`);
79
+ const head = await runPythonSnippet(runtime, sandboxId, datasetId, "jsonl_head", headScript, [inputPath, String(headLines)], `Reads the first ${headLines} JSONL row records`);
80
80
  context.head = head;
81
81
  return context;
82
82
  }
@@ -9,7 +9,7 @@ function buildRole() {
9
9
  function buildGoal() {
10
10
  let xml = create()
11
11
  .ele("Goal")
12
- .txt("Transform the source dataset(s) (JSONL with {type:'row', data:{...}} per line) into a new dataset strictly matching the output schema. Save to output.jsonl in the dataset workstation. Each line must remain a single JSON object representing one record. You may need to combine, filter, or reshape data from multiple source datasets.")
12
+ .txt("Transform the input dataset(s) (JSONL with {type:'row', data:{...}} per line) into a new dataset strictly matching the output schema. Save to output.jsonl in the dataset workstation. Each line must remain a single JSON object representing one record. You may need to combine, filter, or reshape data from multiple input datasets.")
13
13
  .up();
14
14
  return xml.end({ prettyPrint: true, headless: true });
15
15
  }
@@ -17,26 +17,26 @@ function buildContextSection(context) {
17
17
  let xml = create()
18
18
  .ele("Context")
19
19
  .ele("DatasetId").txt(context.datasetId).up();
20
- let sourcesXml = create().ele("SourceDatasets");
21
- for (const sourceId of context.sourceDatasetIds) {
22
- sourcesXml = sourcesXml.ele("SourceDatasetId").txt(sourceId).up();
20
+ let inputsXml = create().ele("InputDatasets");
21
+ for (const sourceId of context.inputDatasetIds) {
22
+ inputsXml = inputsXml.ele("InputDatasetId").txt(sourceId).up();
23
23
  }
24
- xml = xml.import(sourcesXml.first());
24
+ xml = xml.import(inputsXml.first());
25
25
  let sandboxXml = create().ele("Sandbox");
26
- for (const sourcePathInfo of context.sandboxConfig.sourcePaths) {
27
- sandboxXml = sandboxXml.ele("SourceFile")
28
- .ele("DatasetId").txt(sourcePathInfo.datasetId).up()
29
- .ele("Path").txt(sourcePathInfo.path).up()
26
+ for (const inputPathInfo of context.sandboxConfig.inputPaths) {
27
+ sandboxXml = sandboxXml.ele("InputFile")
28
+ .ele("DatasetId").txt(inputPathInfo.datasetId).up()
29
+ .ele("Path").txt(inputPathInfo.path).up()
30
30
  .up();
31
31
  }
32
32
  sandboxXml = sandboxXml.ele("OutputPath").txt(context.sandboxConfig.outputPath).up();
33
33
  xml = xml.import(sandboxXml.first());
34
- if (context.sourcePreviews && context.sourcePreviews.length > 0) {
35
- let previewsXml = create().ele("SourcePreviews");
36
- for (const sourcePreviewInfo of context.sourcePreviews) {
37
- const sp = sourcePreviewInfo.preview;
38
- let px = create().ele("SourcePreview")
39
- .ele("DatasetId").txt(sourcePreviewInfo.datasetId).up()
34
+ if (context.inputPreviews && context.inputPreviews.length > 0) {
35
+ let previewsXml = create().ele("InputPreviews");
36
+ for (const inputPreviewInfo of context.inputPreviews) {
37
+ const sp = inputPreviewInfo.preview;
38
+ let px = create().ele("InputPreview")
39
+ .ele("DatasetId").txt(inputPreviewInfo.datasetId).up()
40
40
  .ele("TotalRows").txt(String(sp.totalRows)).up();
41
41
  if (sp.metadata) {
42
42
  const m = sp.metadata;
@@ -86,21 +86,21 @@ function buildOutputSchemaSection(context) {
86
86
  }
87
87
  function buildInstructions(context) {
88
88
  const outputPath = context.sandboxConfig.outputPath;
89
- const multipleSourcesNote = context.sourceDatasetIds.length > 1
90
- ? "You have multiple source datasets available. You may need to read, join, filter, or combine data from them to produce the output."
89
+ const multipleInputsNote = context.inputDatasetIds.length > 1
90
+ ? "You have multiple input datasets available. You may need to read, join, filter, or combine data from them to produce the output."
91
91
  : "";
92
92
  let xml = create()
93
93
  .ele("Instructions")
94
94
  .ele("Workflow")
95
- .ele("Step", { number: "1", name: "Inspect Source" })
96
- .ele("Action").txt(`Review SourcePreviews to understand current record structures (data fields, shapes, edge cases). ${multipleSourcesNote}`).up()
95
+ .ele("Step", { number: "1", name: "Inspect Inputs" })
96
+ .ele("Action").txt(`Review InputPreviews to understand current record structures (data fields, shapes, edge cases). ${multipleInputsNote}`).up()
97
97
  .up()
98
98
  .ele("Step", { number: "2", name: "Plan Mapping" })
99
- .ele("Action").txt("Plan a deterministic mapping from source data fields to the output schema fields (normalize names, types, and formats).").up()
100
- .ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple sources, decide how to combine or relate them. Output field names must remain exactly as declared by the output schema.").up()
99
+ .ele("Action").txt("Plan a deterministic mapping from input data fields to the output schema fields (normalize names, types, and formats).").up()
100
+ .ele("Note").txt("If fields are missing, set defaults; if types differ, coerce consistently. When working with multiple inputs, decide how to combine or relate them. Output field names must remain exactly as declared by the output schema.").up()
101
101
  .up()
102
102
  .ele("Step", { number: "3", name: "Transform" })
103
- .ele("Action").txt("Use executeCommand to run a Python script that reads source JSONL file(s) and writes transformed records to output.jsonl. Keep line-per-record JSON objects with { 'type': 'row', 'data': { ... } }.").up()
103
+ .ele("Action").txt("Use executeCommand to run a Python script that reads input JSONL file(s) and writes transformed records to output.jsonl. Keep line-per-record JSON objects with { 'type': 'row', 'data': { ... } }.").up()
104
104
  .ele("Requirement").txt(`Write file to: ${outputPath}`).up()
105
105
  .ele("Requirement").txt("Every data object MUST use the exact property names from OutputSchema required/properties keys. Do not translate, localize, rename, or infer alternative field names.").up()
106
106
  .ele("Requirement").txt("Do not print large data to stdout; only progress and summaries.").up()
@@ -112,12 +112,12 @@ function buildInstructions(context) {
112
112
  .up()
113
113
  .ele("Rules")
114
114
  .ele("Rule").txt("Output must strictly match the output schema for each record in data.").up()
115
- .ele("Rule").txt("OutputSchema property names are authoritative. Field names are a technical contract; only field values may preserve source language.").up()
115
+ .ele("Rule").txt("OutputSchema property names are authoritative. Field names are a technical contract; only field values may preserve input language.").up()
116
116
  .ele("Rule").txt("Each line in output.jsonl must be a standalone JSON object with {type:'row', data:{...}}.").up()
117
117
  .ele("Rule").txt("Do not include headers, summaries, or metadata as records.").up()
118
- .ele("Rule").txt("Be robust to malformed lines in source: skip or sanitize, but do not crash.").up()
118
+ .ele("Rule").txt("Be robust to malformed lines in input: skip or sanitize, but do not crash.").up()
119
119
  .up()
120
- .ele("CurrentTask").txt("Transform source dataset(s) to match OutputSchema and write output.jsonl, then complete.").up()
120
+ .ele("CurrentTask").txt("Transform input dataset(s) to match OutputSchema and write output.jsonl, then complete.").up()
121
121
  .up();
122
122
  return xml.end({ prettyPrint: true, headless: true });
123
123
  }
@@ -1,10 +1,10 @@
1
1
  import { type ContextReactor } from "@ekairos/events";
2
- import type { TransformDatasetRunOptions, TransformSandboxState, TransformSourcePreviewContext } from "./transform-dataset.types.js";
2
+ import type { TransformDatasetRunOptions, TransformSandboxState, TransformInputPreviewContext } from "./transform-dataset.types.js";
3
3
  export type { TransformDatasetAgentParams, TransformDatasetContext, TransformDatasetResult, TransformDatasetRunOptions, TransformPromptContext, TransformSandboxState, } from "./transform-dataset.types.js";
4
4
  export declare function createTransformDatasetContext<Env extends {
5
5
  orgId: string;
6
6
  }>(params: {
7
- sourceDatasetIds: string[];
7
+ inputDatasetIds: string[];
8
8
  outputSchema: any;
9
9
  instructions?: string;
10
10
  datasetId?: string;
@@ -12,9 +12,9 @@ export declare function createTransformDatasetContext<Env extends {
12
12
  sandboxId?: string;
13
13
  reactor?: ContextReactor<any, any>;
14
14
  sandboxState?: TransformSandboxState;
15
- sourcePreviews?: Array<{
15
+ inputPreviews?: Array<{
16
16
  datasetId: string;
17
- preview: TransformSourcePreviewContext;
17
+ preview: TransformInputPreviewContext;
18
18
  }>;
19
19
  }): {
20
20
  datasetId: string;
@@ -4,7 +4,7 @@ import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFa
4
4
  import { datasetUpdateSchemaStep } from "../dataset/steps.js";
5
5
  import { getDatasetOutputPath } from "../datasetFiles.js";
6
6
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
7
- import { buildTransformDatasetPromptStep, ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "./transform-dataset.steps.js";
7
+ import { buildTransformDatasetPromptStep, ensureTransformInputsInSandboxStep, generateTransformInputPreviewsStep, } from "./transform-dataset.steps.js";
8
8
  import { createDatasetId } from "../id.js";
9
9
  async function awaitContextRun(run) {
10
10
  if (!run)
@@ -22,12 +22,12 @@ function createTransformDatasetContextDefinition(params) {
22
22
  .context(async (stored, _env, runtime) => {
23
23
  const previous = stored?.content ?? {};
24
24
  const sandboxState = previous?.sandboxState ??
25
- params.sandboxState ?? { initialized: false, sourcePaths: [] };
25
+ params.sandboxState ?? { initialized: false, inputPaths: [] };
26
26
  const datasetId = previous?.datasetId ?? fallbackDatasetId ?? "";
27
- const sourceDatasetIds = Array.isArray(previous?.sourceDatasetIds)
28
- ? previous.sourceDatasetIds
29
- : Array.isArray(params.sourceDatasetIds)
30
- ? params.sourceDatasetIds
27
+ const inputDatasetIds = Array.isArray(previous?.inputDatasetIds)
28
+ ? previous.inputDatasetIds
29
+ : Array.isArray(params.inputDatasetIds)
30
+ ? params.inputDatasetIds
31
31
  : [];
32
32
  const outputSchema = previous?.outputSchema ?? params.outputSchema;
33
33
  const instructions = previous?.instructions ?? params.instructions;
@@ -35,8 +35,8 @@ function createTransformDatasetContextDefinition(params) {
35
35
  if (!datasetId) {
36
36
  throw new Error("dataset_id_required");
37
37
  }
38
- if (sourceDatasetIds.length === 0) {
39
- throw new Error("dataset_transform_sources_required");
38
+ if (inputDatasetIds.length === 0) {
39
+ throw new Error("dataset_transform_inputs_required");
40
40
  }
41
41
  if (!outputSchema) {
42
42
  throw new Error("dataset_transform_schema_required");
@@ -44,26 +44,26 @@ function createTransformDatasetContextDefinition(params) {
44
44
  if (!sandboxId) {
45
45
  throw new Error("dataset_sandbox_required");
46
46
  }
47
- const initialized = sandboxState.initialized && Array.isArray(sandboxState.sourcePaths)
47
+ const initialized = sandboxState.initialized && Array.isArray(sandboxState.inputPaths)
48
48
  ? {
49
- sourcePaths: sandboxState.sourcePaths,
49
+ inputPaths: sandboxState.inputPaths,
50
50
  outputPath: previous?.sandboxConfig?.outputPath ?? getDatasetOutputPath(datasetId),
51
51
  state: sandboxState,
52
52
  }
53
- : await ensureTransformSourcesInSandboxStep({
53
+ : await ensureTransformInputsInSandboxStep({
54
54
  runtime,
55
55
  sandboxId,
56
56
  datasetId,
57
- sourceDatasetIds,
57
+ inputDatasetIds,
58
58
  state: sandboxState,
59
59
  });
60
- let sourcePreviews = previous?.sourcePreviews ?? params.sourcePreviews ?? undefined;
61
- if (!sourcePreviews) {
62
- sourcePreviews = await generateTransformSourcePreviewsStep({
60
+ let inputPreviews = previous?.inputPreviews ?? params.inputPreviews ?? undefined;
61
+ if (!inputPreviews) {
62
+ inputPreviews = await generateTransformInputPreviewsStep({
63
63
  runtime,
64
64
  sandboxId,
65
65
  datasetId,
66
- sourcePaths: initialized.sourcePaths,
66
+ inputPaths: initialized.inputPaths,
67
67
  });
68
68
  }
69
69
  await datasetUpdateSchemaStep({
@@ -74,13 +74,13 @@ function createTransformDatasetContextDefinition(params) {
74
74
  });
75
75
  const promptContext = {
76
76
  datasetId,
77
- sourceDatasetIds,
77
+ inputDatasetIds,
78
78
  outputSchema,
79
79
  sandboxConfig: {
80
- sourcePaths: initialized.sourcePaths,
80
+ inputPaths: initialized.inputPaths,
81
81
  outputPath: initialized.outputPath,
82
82
  },
83
- sourcePreviews: sourcePreviews.length > 0 ? sourcePreviews : undefined,
83
+ inputPreviews: inputPreviews.length > 0 ? inputPreviews : undefined,
84
84
  errors: [],
85
85
  };
86
86
  const basePrompt = await buildTransformDatasetPromptStep({
@@ -100,14 +100,14 @@ function createTransformDatasetContextDefinition(params) {
100
100
  return {
101
101
  ...previous,
102
102
  datasetId,
103
- sourceDatasetIds,
103
+ inputDatasetIds,
104
104
  outputSchema,
105
105
  instructions,
106
106
  sandboxId,
107
107
  sandboxState: initialized.state,
108
108
  system,
109
109
  sandboxConfig: {
110
- sourcePaths: initialized.sourcePaths,
110
+ inputPaths: initialized.inputPaths,
111
111
  outputPath: initialized.outputPath,
112
112
  },
113
113
  };
@@ -159,7 +159,7 @@ function createTransformDatasetContextDefinition(params) {
159
159
  export function createTransformDatasetContext(params) {
160
160
  const datasetId = params.datasetId ?? createDatasetId();
161
161
  const { context } = createTransformDatasetContextDefinition({
162
- sourceDatasetIds: params.sourceDatasetIds,
162
+ inputDatasetIds: params.inputDatasetIds,
163
163
  outputSchema: params.outputSchema,
164
164
  instructions: params.instructions,
165
165
  datasetId,
@@ -167,14 +167,14 @@ export function createTransformDatasetContext(params) {
167
167
  sandboxId: params.sandboxId,
168
168
  reactor: params.reactor,
169
169
  sandboxState: params.sandboxState,
170
- sourcePreviews: params.sourcePreviews,
170
+ inputPreviews: params.inputPreviews,
171
171
  });
172
172
  return {
173
173
  datasetId,
174
174
  async transform(runtime, options = {}) {
175
- const datasetCountText = params.sourceDatasetIds.length === 1
176
- ? "the source dataset"
177
- : `${params.sourceDatasetIds.length} source datasets`;
175
+ const datasetCountText = params.inputDatasetIds.length === 1
176
+ ? "the input dataset"
177
+ : `${params.inputDatasetIds.length} input datasets`;
178
178
  const triggerEvent = {
179
179
  id: createDatasetId(),
180
180
  type: INPUT_TEXT_ITEM_TYPE,
@@ -195,7 +195,6 @@ export function createTransformDatasetContext(params) {
195
195
  context: { key: `dataset:${datasetId}` },
196
196
  durable: options.durable ?? false,
197
197
  options: {
198
- silent: true,
199
198
  preventClose: true,
200
199
  sendFinish: false,
201
200
  maxIterations: 20,
@@ -204,12 +203,12 @@ export function createTransformDatasetContext(params) {
204
203
  __initialContent: {
205
204
  ...(options.initialContent ?? {}),
206
205
  datasetId,
207
- sourceDatasetIds: params.sourceDatasetIds,
206
+ inputDatasetIds: params.inputDatasetIds,
208
207
  outputSchema: params.outputSchema,
209
208
  instructions: params.instructions,
210
209
  sandboxId: params.sandboxId ?? "",
211
- sandboxState: params.sandboxState ?? { initialized: false, sourcePaths: [] },
212
- sourcePreviews: params.sourcePreviews,
210
+ sandboxState: params.sandboxState ?? { initialized: false, inputPaths: [] },
211
+ inputPreviews: params.inputPreviews,
213
212
  },
214
213
  });
215
214
  await awaitContextRun(shell.run);
@@ -1,29 +1,29 @@
1
- import type { TransformPromptContext, TransformSandboxState, TransformSourcePreviewContext } from "./transform-dataset.types.js";
2
- export declare function ensureTransformSourcesInSandboxStep(params: {
1
+ import type { TransformPromptContext, TransformSandboxState, TransformInputPreviewContext } from "./transform-dataset.types.js";
2
+ export declare function ensureTransformInputsInSandboxStep(params: {
3
3
  runtime: any;
4
4
  sandboxId: string;
5
5
  datasetId: string;
6
- sourceDatasetIds: string[];
6
+ inputDatasetIds: string[];
7
7
  state: TransformSandboxState;
8
8
  }): Promise<{
9
- sourcePaths: Array<{
9
+ inputPaths: Array<{
10
10
  datasetId: string;
11
11
  path: string;
12
12
  }>;
13
13
  outputPath: string;
14
14
  state: TransformSandboxState;
15
15
  }>;
16
- export declare function generateTransformSourcePreviewsStep(params: {
16
+ export declare function generateTransformInputPreviewsStep(params: {
17
17
  runtime: any;
18
18
  sandboxId: string;
19
19
  datasetId: string;
20
- sourcePaths: Array<{
20
+ inputPaths: Array<{
21
21
  datasetId: string;
22
22
  path: string;
23
23
  }>;
24
24
  }): Promise<Array<{
25
25
  datasetId: string;
26
- preview: TransformSourcePreviewContext;
26
+ preview: TransformInputPreviewContext;
27
27
  }>>;
28
28
  export declare function buildTransformDatasetPromptStep(params: {
29
29
  context: TransformPromptContext;
@@ -1,13 +1,13 @@
1
- import { getDatasetOutputPath, getDatasetSourcesDir, getDatasetStandardDirs, } from "../datasetFiles.js";
1
+ import { getDatasetOutputPath, getDatasetResourcesDir, getDatasetStandardDirs, } from "../datasetFiles.js";
2
2
  import { datasetReadOutputJsonlStep } from "../dataset/steps.js";
3
3
  import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep } from "../sandbox/steps.js";
4
- import { generateSourcePreview } from "./filepreview.js";
4
+ import { generateInputPreview } from "./filepreview.js";
5
5
  import { buildTransformDatasetPrompt } from "./prompts.js";
6
- export async function ensureTransformSourcesInSandboxStep(params) {
6
+ export async function ensureTransformInputsInSandboxStep(params) {
7
7
  "use step";
8
8
  if (params.state.initialized) {
9
9
  return {
10
- sourcePaths: params.state.sourcePaths,
10
+ inputPaths: params.state.inputPaths,
11
11
  outputPath: getDatasetOutputPath(params.datasetId),
12
12
  state: params.state,
13
13
  };
@@ -18,42 +18,42 @@ export async function ensureTransformSourcesInSandboxStep(params) {
18
18
  cmd: "mkdir",
19
19
  args: ["-p", ...getDatasetStandardDirs(params.datasetId)],
20
20
  });
21
- const sourcePaths = [];
22
- for (const sourceDatasetId of params.sourceDatasetIds) {
23
- const sourcePath = `${getDatasetSourcesDir(params.datasetId)}/source_${sourceDatasetId}.jsonl`;
24
- const source = await datasetReadOutputJsonlStep({
21
+ const inputPaths = [];
22
+ for (const inputDatasetId of params.inputDatasetIds) {
23
+ const inputPath = `${getDatasetResourcesDir(params.datasetId)}/resource_${inputDatasetId}.jsonl`;
24
+ const input = await datasetReadOutputJsonlStep({
25
25
  runtime: params.runtime,
26
- datasetId: sourceDatasetId,
26
+ datasetId: inputDatasetId,
27
27
  });
28
28
  await writeDatasetSandboxFilesStep({
29
29
  runtime: params.runtime,
30
30
  sandboxId: params.sandboxId,
31
- files: [{ path: sourcePath, contentBase64: source.contentBase64 }],
31
+ files: [{ path: inputPath, contentBase64: input.contentBase64 }],
32
32
  });
33
- sourcePaths.push({ datasetId: sourceDatasetId, path: sourcePath });
33
+ inputPaths.push({ datasetId: inputDatasetId, path: inputPath });
34
34
  }
35
35
  return {
36
- sourcePaths,
36
+ inputPaths,
37
37
  outputPath: getDatasetOutputPath(params.datasetId),
38
38
  state: {
39
39
  initialized: true,
40
- sourcePaths,
40
+ inputPaths,
41
41
  },
42
42
  };
43
43
  }
44
- export async function generateTransformSourcePreviewsStep(params) {
44
+ export async function generateTransformInputPreviewsStep(params) {
45
45
  "use step";
46
- const sourcePreviews = [];
47
- for (const sourcePath of params.sourcePaths) {
46
+ const inputPreviews = [];
47
+ for (const inputPath of params.inputPaths) {
48
48
  try {
49
- const preview = await generateSourcePreview(params.runtime, params.sandboxId, sourcePath.path, params.datasetId);
50
- sourcePreviews.push({ datasetId: sourcePath.datasetId, preview });
49
+ const preview = await generateInputPreview(params.runtime, params.sandboxId, inputPath.path, params.datasetId);
50
+ inputPreviews.push({ datasetId: inputPath.datasetId, preview });
51
51
  }
52
52
  catch {
53
- // Source preview is optional; transformation can still read the JSONL files.
53
+ // Input preview is optional; transformation can still read the JSONL files.
54
54
  }
55
55
  }
56
- return sourcePreviews;
56
+ return inputPreviews;
57
57
  }
58
58
  export async function buildTransformDatasetPromptStep(params) {
59
59
  "use step";
@@ -1,34 +1,34 @@
1
1
  import type { ContextReactor } from "@ekairos/events";
2
- import type { TransformSourcePreviewContext } from "./filepreview.js";
3
- export type { TransformSourcePreviewContext } from "./filepreview.js";
2
+ import type { TransformInputPreviewContext } from "./filepreview.js";
3
+ export type { TransformInputPreviewContext } from "./filepreview.js";
4
4
  export type TransformSandboxState = {
5
5
  initialized: boolean;
6
- sourcePaths: Array<{
6
+ inputPaths: Array<{
7
7
  datasetId: string;
8
8
  path: string;
9
9
  }>;
10
10
  };
11
11
  export type TransformDatasetContext = {
12
12
  datasetId: string;
13
- sourceDatasetIds: string[];
13
+ inputDatasetIds: string[];
14
14
  outputSchema: any;
15
15
  sandboxConfig: {
16
- sourcePaths: Array<{
16
+ inputPaths: Array<{
17
17
  datasetId: string;
18
18
  path: string;
19
19
  }>;
20
20
  outputPath: string;
21
21
  };
22
- sourcePreviews?: Array<{
22
+ inputPreviews?: Array<{
23
23
  datasetId: string;
24
- preview: TransformSourcePreviewContext;
24
+ preview: TransformInputPreviewContext;
25
25
  }>;
26
26
  errors: string[];
27
27
  iterationCount: number;
28
28
  instructions?: string;
29
29
  };
30
30
  export type TransformDatasetAgentParams = {
31
- sourceDatasetIds?: string[];
31
+ inputDatasetIds?: string[];
32
32
  outputSchema?: any;
33
33
  instructions?: string;
34
34
  datasetId?: string;
@@ -36,9 +36,9 @@ export type TransformDatasetAgentParams = {
36
36
  sandboxId?: string;
37
37
  reactor?: ContextReactor<any, any>;
38
38
  sandboxState?: TransformSandboxState;
39
- sourcePreviews?: Array<{
39
+ inputPreviews?: Array<{
40
40
  datasetId: string;
41
- preview: TransformSourcePreviewContext;
41
+ preview: TransformInputPreviewContext;
42
42
  }>;
43
43
  };
44
44
  export type TransformDatasetRunOptions = {
@@ -59,16 +59,16 @@ export type TransformDatasetResult = {
59
59
  };
60
60
  export type TransformPromptContext = {
61
61
  datasetId: string;
62
- sourceDatasetIds: string[];
62
+ inputDatasetIds: string[];
63
63
  outputSchema: any;
64
64
  sandboxConfig: {
65
- sourcePaths: Array<{
65
+ inputPaths: Array<{
66
66
  datasetId: string;
67
67
  path: string;
68
68
  }>;
69
69
  outputPath: string;
70
70
  };
71
- sourcePreviews?: Array<{
71
+ inputPreviews?: Array<{
72
72
  datasetId: string;
73
73
  preview: {
74
74
  totalRows: number;
@@ -1,7 +1,7 @@
1
1
  import { datasetPreviewRowsStep } from "../dataset/steps.js";
2
2
  import { createTransformDatasetContext } from "./transform-dataset.agent.js";
3
3
  function buildInstructions(input) {
4
- const sources = input.datasets
4
+ const inputs = input.datasets
5
5
  .map((d, idx) => {
6
6
  const name = d.description ? ` - ${d.description}` : "";
7
7
  return `${idx + 1}. ${d.id}${name}`;
@@ -12,8 +12,8 @@ function buildInstructions(input) {
12
12
  "Use pandas when helpful. Output must be JSONL with {type:'row', data:{...}} lines.",
13
13
  "Respect the provided output schema exactly.",
14
14
  "",
15
- "## Source Datasets",
16
- sources || "- (none)",
15
+ "## Input Datasets",
16
+ inputs || "- (none)",
17
17
  "",
18
18
  "## Transformation Description (LaTeX + sets)",
19
19
  String(input.description ?? "").trim(),
@@ -25,7 +25,7 @@ function buildInstructions(input) {
25
25
  */
26
26
  export async function transformDataset(runtime, input) {
27
27
  const transformContext = createTransformDatasetContext({
28
- sourceDatasetIds: input.datasets.map((d) => d.id),
28
+ inputDatasetIds: input.datasets.map((d) => d.id),
29
29
  outputSchema: input.outputSchema,
30
30
  instructions: buildInstructions(input),
31
31
  datasetId: input.datasetId,