@ekairos/dataset 1.22.83-beta.development.0 → 1.22.85-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/builder/agentMaterializers.d.ts +2 -2
  2. package/dist/builder/context.d.ts +7 -0
  3. package/dist/builder/context.js +192 -0
  4. package/dist/builder/instructions.d.ts +3 -3
  5. package/dist/builder/instructions.js +10 -10
  6. package/dist/builder/materialize.d.ts +10 -11
  7. package/dist/builder/materialize.js +116 -113
  8. package/dist/builder/materializeQuery.d.ts +3 -2
  9. package/dist/builder/materializeQuery.js +10 -19
  10. package/dist/builder/persistence.d.ts +4 -5
  11. package/dist/builder/persistence.js +20 -19
  12. package/dist/builder/types.d.ts +29 -24
  13. package/dist/completeDataset.steps.js +1 -1
  14. package/dist/dataset.d.ts +1 -1
  15. package/dist/dataset.js +42 -29
  16. package/dist/datasetFiles.d.ts +1 -1
  17. package/dist/datasetFiles.js +3 -3
  18. package/dist/file/file-dataset.agent.js +3 -4
  19. package/dist/file/prompts.js +12 -12
  20. package/dist/materializeDataset.tool.d.ts +34 -26
  21. package/dist/materializeDataset.tool.js +40 -29
  22. package/dist/schema.d.ts +12 -2
  23. package/dist/schema.js +6 -3
  24. package/dist/service.d.ts +1 -2
  25. package/dist/service.js +5 -2
  26. package/dist/transform/filepreview.d.ts +2 -2
  27. package/dist/transform/filepreview.js +3 -3
  28. package/dist/transform/prompts.js +25 -25
  29. package/dist/transform/transform-dataset.agent.d.ts +4 -4
  30. package/dist/transform/transform-dataset.agent.js +29 -30
  31. package/dist/transform/transform-dataset.steps.d.ts +7 -7
  32. package/dist/transform/transform-dataset.steps.js +20 -20
  33. package/dist/transform/transform-dataset.types.d.ts +13 -13
  34. package/dist/transform/transformDataset.js +4 -4
  35. package/package.json +4 -4
  36. /package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -0
  37. /package/dist/builder/{sourceRows.js → rows.js} +0 -0
@@ -1,40 +1,44 @@
1
1
  import type { InstaQLParams, ValidQuery } from "@instantdb/core";
2
2
  import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
3
3
  import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
4
- import type { ContextReactor } from "@ekairos/events";
4
+ import type { ContextIdentifier, ContextReactor } from "@ekairos/events";
5
5
  import { datasetDomain } from "../schema.js";
6
- export type DatasetQuerySourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
6
+ export type DatasetQueryResourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
7
7
  query: InstaQLParams<DomainInstantSchema<D>>;
8
8
  title?: string;
9
9
  explanation?: string;
10
10
  domain: D;
11
11
  };
12
- export type DatasetFileSourceInput = {
12
+ export type DatasetFileResourceInput = {
13
13
  fileId: string;
14
14
  description?: string;
15
15
  filename?: string;
16
16
  mediaType?: string;
17
17
  };
18
- export type DatasetTextSourceInput = {
18
+ export type DatasetTextResourceInput = {
19
19
  text: string;
20
20
  mimeType?: string;
21
21
  name?: string;
22
22
  description?: string;
23
23
  };
24
- export type DatasetExistingSourceInput = {
24
+ export type DatasetExistingResourceInput = {
25
25
  datasetId: string;
26
26
  description?: string;
27
27
  };
28
- export type DatasetFileSource = {
28
+ export type DatasetContextResourceInput = ContextIdentifier;
29
+ export type DatasetFileResource = {
29
30
  kind: "file";
30
- } & DatasetFileSourceInput;
31
- export type DatasetTextSource = {
31
+ } & DatasetFileResourceInput;
32
+ export type DatasetTextResource = {
32
33
  kind: "text";
33
- } & DatasetTextSourceInput;
34
- export type DatasetExistingSource = {
34
+ } & DatasetTextResourceInput;
35
+ export type DatasetExistingResource = {
35
36
  kind: "dataset";
36
- } & DatasetExistingSourceInput;
37
- export type DatasetSourceInput = DatasetFileSourceInput | DatasetTextSourceInput | DatasetExistingSourceInput | DatasetFileSource | DatasetTextSource | DatasetExistingSource;
37
+ } & DatasetExistingResourceInput;
38
+ export type DatasetContextResource = {
39
+ kind: "context";
40
+ } & DatasetContextResourceInput;
41
+ export type DatasetResourceInput = DatasetFileResourceInput | DatasetTextResourceInput | DatasetExistingResourceInput | DatasetContextResourceInput | DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource;
38
42
  export type DatasetSchemaInput = {
39
43
  title?: string;
40
44
  description?: string;
@@ -50,9 +54,9 @@ export type DatasetBuildOptions = {
50
54
  datasetId?: string;
51
55
  durable?: boolean;
52
56
  };
53
- export type InternalSource = DatasetFileSource | DatasetTextSource | DatasetExistingSource | ({
57
+ export type InternalDatasetResource = DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource | ({
54
58
  kind: "query";
55
- } & DatasetQuerySourceInput);
59
+ } & DatasetQueryResourceInput);
56
60
  export type DatasetReaderResult = {
57
61
  rows: any[];
58
62
  cursor: number;
@@ -78,8 +82,8 @@ export type DatasetRuntimeEnv = {
78
82
  };
79
83
  export type AnyDatasetRuntime = EkairosRuntime<any, any, any>;
80
84
  export type DatasetRuntimeHandle<Runtime extends AnyDatasetRuntime> = RuntimeForDomain<Runtime, typeof datasetDomain>;
81
- export type CompatibleSourceDomain<Runtime extends AnyDatasetRuntime, D extends DomainSchemaResult> = RuntimeForDomain<Runtime, D> extends never ? never : D;
82
- export type DatasetQuerySourceOptions<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>> = {
85
+ export type CompatibleQueryDomain<Runtime extends AnyDatasetRuntime, D extends DomainSchemaResult> = RuntimeForDomain<Runtime, D> extends never ? never : D;
86
+ export type DatasetQueryResourceOptions<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>> = {
83
87
  query: Q;
84
88
  title?: string;
85
89
  explanation?: string;
@@ -87,9 +91,10 @@ export type DatasetQuerySourceOptions<D extends DomainSchemaResult, Q extends Va
87
91
  export type DatasetBuilderState<Runtime extends AnyDatasetRuntime> = {
88
92
  runtime: Runtime;
89
93
  env: Runtime["env"] & DatasetRuntimeEnv;
90
- sources: InternalSource[];
94
+ resources: InternalDatasetResource[];
91
95
  title?: string;
92
96
  sandboxId?: string;
97
+ contextId?: string;
93
98
  outputSchema?: DatasetSchemaInput;
94
99
  output: DatasetOutput;
95
100
  inferSchema: boolean;
@@ -103,8 +108,7 @@ export type MaterializeRowsParams = {
103
108
  sandboxId?: string;
104
109
  title?: string;
105
110
  instructions?: string;
106
- sources: any[];
107
- sourceKinds: string[];
111
+ contextId: string;
108
112
  analysis?: any;
109
113
  rows: any[];
110
114
  schema?: DatasetSchemaInput;
@@ -113,11 +117,12 @@ export type MaterializeRowsParams = {
113
117
  };
114
118
  export type DatasetBuilder<Runtime extends AnyDatasetRuntime> = {
115
119
  readonly datasetId: string;
116
- fromFile(source: DatasetFileSourceInput): DatasetBuilder<Runtime>;
117
- fromText(source: DatasetTextSourceInput): DatasetBuilder<Runtime>;
118
- fromDataset(source: DatasetExistingSourceInput): DatasetBuilder<Runtime>;
119
- from(...sources: DatasetSourceInput[]): DatasetBuilder<Runtime>;
120
- fromQuery<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>>(domain: D & CompatibleSourceDomain<Runtime, D>, source: DatasetQuerySourceOptions<D, Q>): DatasetBuilder<Runtime>;
120
+ fromFile(resource: DatasetFileResourceInput): DatasetBuilder<Runtime>;
121
+ fromText(resource: DatasetTextResourceInput): DatasetBuilder<Runtime>;
122
+ fromDataset(resource: DatasetExistingResourceInput): DatasetBuilder<Runtime>;
123
+ fromContext(context: DatasetContextResourceInput): DatasetBuilder<Runtime>;
124
+ from(...resources: DatasetResourceInput[]): DatasetBuilder<Runtime>;
125
+ fromQuery<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>>(domain: D & CompatibleQueryDomain<Runtime, D>, resource: DatasetQueryResourceOptions<D, Q>): DatasetBuilder<Runtime>;
121
126
  title(title: string): DatasetBuilder<Runtime>;
122
127
  sandbox(input: {
123
128
  sandboxId: string;
@@ -313,7 +313,7 @@ function buildValidationFailureSummary(params) {
313
313
  }
314
314
  function buildRepairInstructions(summary) {
315
315
  const instructions = [
316
- "Rewrite output.jsonl using the schema as the source of truth. Do not use source file headers as JSON keys unless they exactly match schema property names.",
316
+ "Rewrite output.jsonl using the schema as the authority. Do not use input file headers as JSON keys unless they exactly match schema property names.",
317
317
  "Each non-empty line must be a JSON object shaped as {\"type\":\"row\",\"data\":{...}}.",
318
318
  "Populate every required top-level and nested required path from failureSummary.requiredPaths.",
319
319
  "For enum fields, emit exactly one allowed literal from failureSummary.enumConstraints or failureSummary.enumFailures.",
package/dist/dataset.d.ts CHANGED
@@ -1,3 +1,3 @@
1
1
  import type { AnyDatasetRuntime, DatasetBuilder, DatasetBuilderOptions, DatasetRuntimeHandle } from "./builder/types.js";
2
- export type { AnyDatasetRuntime, CompatibleSourceDomain, DatasetBuilder, DatasetBuilderOptions, DatasetBuildOptions, DatasetBuildResult, DatasetExistingSource, DatasetExistingSourceInput, DatasetFileSource, DatasetFileSourceInput, DatasetMode, DatasetOutput, DatasetQuerySourceInput, DatasetReader, DatasetReaderResult, DatasetRuntimeEnv, DatasetRuntimeHandle, DatasetSchemaInput, DatasetTextSource, DatasetSourceInput, DatasetTextSourceInput, } from "./builder/types.js";
2
+ export type { AnyDatasetRuntime, CompatibleQueryDomain, DatasetBuilder, DatasetBuilderOptions, DatasetBuildOptions, DatasetBuildResult, DatasetExistingResource, DatasetExistingResourceInput, DatasetFileResource, DatasetFileResourceInput, DatasetMode, DatasetOutput, DatasetQueryResourceInput, DatasetReader, DatasetReaderResult, DatasetRuntimeEnv, DatasetRuntimeHandle, DatasetSchemaInput, DatasetTextResource, DatasetResourceInput, DatasetTextResourceInput, } from "./builder/types.js";
3
3
  export declare function dataset<Runtime extends AnyDatasetRuntime>(runtime: Runtime & DatasetRuntimeHandle<Runtime>, options?: DatasetBuilderOptions): DatasetBuilder<Runtime>;
package/dist/dataset.js CHANGED
@@ -1,7 +1,8 @@
1
1
  import { buildObjectOutputInstructions } from "./builder/instructions.js";
2
+ import { resolveDatasetResourceContext } from "./builder/context.js";
2
3
  import { createDatasetId } from "./id.js";
3
- import { completeDatasetStep, materializeDerivedDataset, materializeSingleFileLikeSource, } from "./builder/materialize.js";
4
- import { materializeQuerySource } from "./builder/materializeQuery.js";
4
+ import { completeDatasetStep, materializeDerivedDataset, materializeSingleFileLikeResource, } from "./builder/materialize.js";
5
+ import { materializeQueryResource } from "./builder/materializeQuery.js";
5
6
  import { createDatasetBuildResult, finalizeBuildResult, } from "./builder/persistence.js";
6
7
  export function dataset(runtime, options = {}) {
7
8
  const datasetId = normalizeDatasetId(options.datasetId);
@@ -9,7 +10,7 @@ export function dataset(runtime, options = {}) {
9
10
  const state = {
10
11
  runtime: typedRuntime,
11
12
  env: typedRuntime.env,
12
- sources: [],
13
+ resources: [],
13
14
  output: "rows",
14
15
  inferSchema: false,
15
16
  durable: options.durable,
@@ -17,38 +18,46 @@ export function dataset(runtime, options = {}) {
17
18
  };
18
19
  const api = {
19
20
  datasetId,
20
- fromFile(source) {
21
- state.sources.push({ kind: "file", ...source });
21
+ fromFile(resource) {
22
+ state.resources.push({ kind: "file", ...resource });
22
23
  return api;
23
24
  },
24
- fromText(source) {
25
- state.sources.push({ kind: "text", ...source });
25
+ fromText(resource) {
26
+ state.resources.push({ kind: "text", ...resource });
26
27
  return api;
27
28
  },
28
- fromDataset(source) {
29
- state.sources.push({ kind: "dataset", ...source });
29
+ fromDataset(resource) {
30
+ state.resources.push({ kind: "dataset", ...resource });
30
31
  return api;
31
32
  },
32
- from(...sources) {
33
- for (const source of sources) {
34
- if ("kind" in source) {
35
- state.sources.push(source);
33
+ fromContext(context) {
34
+ state.resources.push({ kind: "context", ...context });
35
+ return api;
36
+ },
37
+ from(...resources) {
38
+ for (const resource of resources) {
39
+ if ("kind" in resource) {
40
+ state.resources.push(resource);
41
+ continue;
42
+ }
43
+ if ("fileId" in resource) {
44
+ state.resources.push({ kind: "file", ...resource });
36
45
  continue;
37
46
  }
38
- if ("fileId" in source) {
39
- state.sources.push({ kind: "file", ...source });
47
+ if ("datasetId" in resource) {
48
+ state.resources.push({ kind: "dataset", ...resource });
40
49
  continue;
41
50
  }
42
- if ("datasetId" in source) {
43
- state.sources.push({ kind: "dataset", ...source });
51
+ if ("id" in resource || "key" in resource) {
52
+ state.resources.push({ kind: "context", ...resource });
44
53
  continue;
45
54
  }
46
- state.sources.push({ kind: "text", ...source });
55
+ state.resources.push({ kind: "text", ...resource });
47
56
  }
48
57
  return api;
49
58
  },
50
- fromQuery(domain, source) {
51
- state.sources.push({ kind: "query", domain, ...source });
59
+ fromQuery(domain, resource) {
60
+ state.resources.push({ kind: "query", domain, ...resource });
52
61
  return api;
53
62
  },
54
63
  title(title) {
@@ -96,8 +105,8 @@ export function dataset(runtime, options = {}) {
96
105
  return api;
97
106
  },
98
107
  async build(options) {
99
- if (state.sources.length === 0) {
100
- throw new Error("dataset_sources_required");
108
+ if (state.resources.length === 0) {
109
+ throw new Error("dataset_resources_required");
101
110
  }
102
111
  const targetDatasetId = options?.datasetId
103
112
  ? normalizeDatasetId(options.datasetId)
@@ -106,6 +115,9 @@ export function dataset(runtime, options = {}) {
106
115
  ...state,
107
116
  durable: options?.durable ?? state.durable,
108
117
  };
118
+ const context = await resolveDatasetResourceContext(typedRuntime, targetDatasetId, stateWithBuildOptions.resources);
119
+ stateWithBuildOptions.resources = context.resources;
120
+ stateWithBuildOptions.contextId = context.contextId;
109
121
  const effectiveState = stateWithBuildOptions.output === "object"
110
122
  ? {
111
123
  ...stateWithBuildOptions,
@@ -113,25 +125,26 @@ export function dataset(runtime, options = {}) {
113
125
  instructions: buildObjectOutputInstructions(stateWithBuildOptions.instructions),
114
126
  }
115
127
  : stateWithBuildOptions;
116
- const onlySource = effectiveState.sources[0];
117
- const isSingleSource = effectiveState.sources.length === 1;
128
+ const onlyResource = effectiveState.resources[0];
129
+ const isSingleResource = effectiveState.resources.length === 1;
118
130
  const hasInstructions = Boolean(String(effectiveState.instructions ?? "").trim());
119
- if (isSingleSource && onlySource.kind === "query" && !hasInstructions) {
120
- await materializeQuerySource(effectiveState.runtime, onlySource, {
131
+ if (isSingleResource && onlyResource.kind === "query" && !hasInstructions) {
132
+ await materializeQueryResource(effectiveState.runtime, onlyResource, {
121
133
  datasetId: targetDatasetId,
122
134
  sandboxId: effectiveState.sandboxId,
123
135
  schema: effectiveState.outputSchema,
124
- title: effectiveState.title ?? onlySource.title,
136
+ title: effectiveState.title ?? onlyResource.title,
125
137
  instructions: effectiveState.instructions,
126
138
  first: effectiveState.first,
139
+ contextId: effectiveState.contextId ?? "",
127
140
  });
128
141
  return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
129
142
  }
130
- if (isSingleSource && (onlySource.kind === "file" || onlySource.kind === "text")) {
143
+ if (isSingleResource && (onlyResource.kind === "file" || onlyResource.kind === "text")) {
131
144
  if (!effectiveState.reactor) {
132
145
  throw new Error("dataset_reactor_required");
133
146
  }
134
- await materializeSingleFileLikeSource(effectiveState, onlySource, targetDatasetId);
147
+ await materializeSingleFileLikeResource(effectiveState, onlyResource, targetDatasetId);
135
148
  const completed = await completeDatasetStep({
136
149
  runtime: effectiveState.runtime,
137
150
  datasetId: targetDatasetId,
@@ -1,7 +1,7 @@
1
1
  export declare const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
2
2
  export declare function getDatasetWorkdirBase(): string;
3
3
  export declare function getDatasetWorkstation(datasetId: string): string;
4
- export declare function getDatasetSourcesDir(datasetId: string): string;
4
+ export declare function getDatasetResourcesDir(datasetId: string): string;
5
5
  export declare function getDatasetScriptsDir(datasetId: string): string;
6
6
  export declare function getDatasetArtifactsDir(datasetId: string): string;
7
7
  export declare function getDatasetLogsDir(datasetId: string): string;
@@ -9,8 +9,8 @@ export function getDatasetWorkdirBase() {
9
9
  export function getDatasetWorkstation(datasetId) {
10
10
  return `${getDatasetWorkdirBase()}/${datasetId}`;
11
11
  }
12
- export function getDatasetSourcesDir(datasetId) {
13
- return `${getDatasetWorkstation(datasetId)}/sources`;
12
+ export function getDatasetResourcesDir(datasetId) {
13
+ return `${getDatasetWorkstation(datasetId)}/resources`;
14
14
  }
15
15
  export function getDatasetScriptsDir(datasetId) {
16
16
  return `${getDatasetWorkstation(datasetId)}/scripts`;
@@ -24,7 +24,7 @@ export function getDatasetLogsDir(datasetId) {
24
24
  export function getDatasetStandardDirs(datasetId) {
25
25
  return [
26
26
  getDatasetWorkstation(datasetId),
27
- getDatasetSourcesDir(datasetId),
27
+ getDatasetResourcesDir(datasetId),
28
28
  getDatasetScriptsDir(datasetId),
29
29
  getDatasetArtifactsDir(datasetId),
30
30
  getDatasetLogsDir(datasetId),
@@ -210,7 +210,7 @@ export function createFileParseContext(fileId, opts) {
210
210
  {
211
211
  type: "file",
212
212
  fileId,
213
- filename: opts?.filename ?? "source-file",
213
+ filename: opts?.filename ?? "resource-file",
214
214
  mediaType: opts?.mediaType ?? "application/octet-stream",
215
215
  },
216
216
  ],
@@ -218,14 +218,13 @@ export function createFileParseContext(fileId, opts) {
218
218
  };
219
219
  params.sourceEventId = triggerEvent.id;
220
220
  params.sourcePartIndex = 1;
221
- params.filename = opts?.filename ?? "source-file";
221
+ params.filename = opts?.filename ?? "resource-file";
222
222
  params.mediaType = opts?.mediaType ?? "application/octet-stream";
223
223
  const shell = await context.react(triggerEvent, {
224
224
  runtime: runtime,
225
225
  context: { key: `dataset:${datasetId}` },
226
226
  durable: options.durable ?? false,
227
227
  options: {
228
- silent: true,
229
228
  preventClose: true,
230
229
  sendFinish: false,
231
230
  maxIterations: 20,
@@ -237,7 +236,7 @@ export function createFileParseContext(fileId, opts) {
237
236
  fileId,
238
237
  sourceEventId: triggerEvent.id,
239
238
  sourcePartIndex: 1,
240
- filename: opts?.filename ?? "source-file",
239
+ filename: opts?.filename ?? "resource-file",
241
240
  mediaType: opts?.mediaType ?? "application/octet-stream",
242
241
  instructions: opts?.instructions ?? "",
243
242
  sandboxId: opts?.sandboxId ?? "",
@@ -11,13 +11,13 @@ function buildRole() {
11
11
  function buildGoal() {
12
12
  let xml = create()
13
13
  .ele("Goal")
14
- .txt("Convert the source file into a validated JSONL dataset (output.jsonl) where each line is a JSON object conforming to a generated schema. The schema describes ONE data record structure. Extract ONLY data records; exclude any header sections, metadata, or summary information from the file.")
14
+ .txt("Convert the input file into a validated JSONL dataset (output.jsonl) where each line is a JSON object conforming to a generated schema. The schema describes ONE data record structure. Extract ONLY data records; exclude any header sections, metadata, or summary information from the file.")
15
15
  .up();
16
16
  return xml.end({ prettyPrint: true, headless: true });
17
17
  }
18
- function buildSourceInfo(context) {
18
+ function buildResourceInfo(context) {
19
19
  let xml = create()
20
- .ele("Source")
20
+ .ele("FileResource")
21
21
  .ele("Type").txt("file").up()
22
22
  .ele("FileId").txt(context.fileId).up()
23
23
  .ele("DatasetId").txt(context.datasetId).up()
@@ -90,7 +90,7 @@ function buildErrorsSection(errors) {
90
90
  }
91
91
  let xml = create()
92
92
  .ele("PreviousErrors")
93
- .ele("Instruction").txt("Treat these as repair feedback from the previous validation attempt. Rewrite output.jsonl from the schema contract; do not patch source column names into schema keys piecemeal.").up();
93
+ .ele("Instruction").txt("Treat these as repair feedback from the previous validation attempt. Rewrite output.jsonl from the schema contract; do not patch input column names into schema keys piecemeal.").up();
94
94
  for (const error of errors) {
95
95
  xml = xml.ele("Error").txt(error).up();
96
96
  }
@@ -100,8 +100,8 @@ function buildErrorsSection(errors) {
100
100
  function buildContextSection(context) {
101
101
  let xml = create()
102
102
  .ele("Context");
103
- const sourceXml = buildSourceInfo(context);
104
- xml = xml.import(sourceXml.first());
103
+ const resourceXml = buildResourceInfo(context);
104
+ xml = xml.import(resourceXml.first());
105
105
  if (context.filePreview) {
106
106
  const previewXml = buildFilePreviewSection(context.filePreview);
107
107
  xml = xml.import(previewXml.first());
@@ -195,9 +195,9 @@ function buildSchemaSection(context) {
195
195
  xml = xml
196
196
  .ele("SchemaContract")
197
197
  .ele("Purpose").txt("Compact output contract derived from JSON Schema. Use this before writing output.jsonl.").up()
198
- .ele("Rule").txt("Use only schema property keys in data objects. Source headers are input labels, not output keys.").up()
198
+ .ele("Rule").txt("Use only schema property keys in data objects. Input headers are input labels, not output keys.").up()
199
199
  .ele("Rule").txt("Required paths are required everywhere, including nested objects and array items.").up()
200
- .ele("Rule").txt("Enum fields must use exactly one of the listed literal values. Normalize source labels to the closest valid enum literal; never emit a value outside the enum.").up();
200
+ .ele("Rule").txt("Enum fields must use exactly one of the listed literal values. Normalize input labels to the closest valid enum literal; never emit a value outside the enum.").up();
201
201
  xml = appendLimitedList(xml, "RequiredPaths", "Path", contract.requiredPaths, 120);
202
202
  xml = appendLimitedList(xml, "PropertyPaths", "Path", contract.propertyPaths, 160);
203
203
  let enumsXml = xml.ele("EnumConstraints");
@@ -245,10 +245,10 @@ function buildInstructions(context) {
245
245
  .ele("Requirements")
246
246
  .ele("Requirement").txt("Every output row must conform exactly to the provided schema").up()
247
247
  .ele("Requirement").txt("Every data object MUST use the exact property names from the provided JSON Schema required/properties keys").up()
248
- .ele("Requirement").txt("Build a schema-first mapping from source columns to schema fields before writing output.jsonl. Do not use raw source headers as JSON keys unless they are exactly schema keys").up()
248
+ .ele("Requirement").txt("Build a schema-first mapping from input columns to schema fields before writing output.jsonl. Do not use raw input headers as JSON keys unless they are exactly schema keys").up()
249
249
  .ele("Requirement").txt("For nested required fields, populate the required child keys inside each nested object or array item; top-level validity is not enough").up()
250
250
  .ele("Requirement").txt("For enum fields, emit exactly one allowed enum literal from SchemaContract; normalize labels or abbreviations into allowed literals").up()
251
- .ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the source language").up()
251
+ .ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the input language").up()
252
252
  .ele("Requirement").txt("Do not call generateSchema when a schema is already provided").up()
253
253
  .up()
254
254
  .up();
@@ -286,8 +286,8 @@ function buildInstructions(context) {
286
286
  .up()
287
287
  .ele("Rules")
288
288
  .ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
289
- .ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the source language").up()
290
- .ele("Rule").txt("Original/source language applies to extracted values only, not to JSON object keys").up()
289
+ .ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the input language").up()
290
+ .ele("Rule").txt("Original/input language applies to extracted values only, not to JSON object keys").up()
291
291
  .ele("Rule").txt("Datasets contain ONLY data records; exclude all header sections and file metadata").up()
292
292
  .ele("Rule").txt("JSONL format: each line = separate JSON object representing one data record").up()
293
293
  .ele("Rule").txt("FilePreview shows raw file content - use Script to understand data extraction").up()
@@ -7,26 +7,30 @@ declare const materializeDatasetToolInputSchema: z.ZodObject<{
7
7
  datasetId: z.ZodOptional<z.ZodString>;
8
8
  sandboxId: z.ZodOptional<z.ZodString>;
9
9
  title: z.ZodOptional<z.ZodString>;
10
- sources: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
11
- kind: z.ZodLiteral<"file">;
12
- fileId: z.ZodString;
13
- description: z.ZodOptional<z.ZodString>;
10
+ context: z.ZodOptional<z.ZodUnion<readonly [z.ZodObject<{
11
+ id: z.ZodString;
14
12
  }, z.core.$strip>, z.ZodObject<{
15
- kind: z.ZodLiteral<"text">;
16
- text: z.ZodString;
17
- mimeType: z.ZodOptional<z.ZodString>;
13
+ key: z.ZodString;
14
+ }, z.core.$strip>]>>;
15
+ files: z.ZodOptional<z.ZodArray<z.ZodObject<{
16
+ description: z.ZodOptional<z.ZodString>;
17
+ fileId: z.ZodString;
18
+ }, z.core.$strip>>>;
19
+ texts: z.ZodOptional<z.ZodArray<z.ZodObject<{
18
20
  name: z.ZodOptional<z.ZodString>;
21
+ text: z.ZodString;
19
22
  description: z.ZodOptional<z.ZodString>;
20
- }, z.core.$strip>, z.ZodObject<{
21
- kind: z.ZodLiteral<"dataset">;
23
+ mimeType: z.ZodOptional<z.ZodString>;
24
+ }, z.core.$strip>>>;
25
+ datasets: z.ZodOptional<z.ZodArray<z.ZodObject<{
22
26
  datasetId: z.ZodString;
23
27
  description: z.ZodOptional<z.ZodString>;
24
- }, z.core.$strip>, z.ZodObject<{
25
- kind: z.ZodLiteral<"query">;
26
- query: z.ZodRecord<z.ZodString, z.ZodAny>;
28
+ }, z.core.$strip>>>;
29
+ queries: z.ZodOptional<z.ZodArray<z.ZodObject<{
27
30
  title: z.ZodOptional<z.ZodString>;
31
+ query: z.ZodRecord<z.ZodString, z.ZodAny>;
28
32
  explanation: z.ZodOptional<z.ZodString>;
29
- }, z.core.$strip>], "kind">>;
33
+ }, z.core.$strip>>>;
30
34
  instructions: z.ZodOptional<z.ZodString>;
31
35
  mode: z.ZodOptional<z.ZodEnum<{
32
36
  schema: "schema";
@@ -52,29 +56,33 @@ export declare function createMaterializeDatasetTool<Runtime extends AnyMaterial
52
56
  queryDomain: QueryDomain & CompatibleToolQueryDomain<Runtime, QueryDomain>;
53
57
  toolName?: string;
54
58
  }): import("ai").Tool<{
55
- sources: ({
56
- kind: "file";
59
+ datasetId?: string | undefined;
60
+ sandboxId?: string | undefined;
61
+ title?: string | undefined;
62
+ context?: {
63
+ id: string;
64
+ } | {
65
+ key: string;
66
+ } | undefined;
67
+ files?: {
57
68
  fileId: string;
58
69
  description?: string | undefined;
59
- } | {
60
- kind: "text";
70
+ }[] | undefined;
71
+ texts?: {
61
72
  text: string;
62
- mimeType?: string | undefined;
63
73
  name?: string | undefined;
64
74
  description?: string | undefined;
65
- } | {
66
- kind: "dataset";
75
+ mimeType?: string | undefined;
76
+ }[] | undefined;
77
+ datasets?: {
67
78
  datasetId: string;
68
79
  description?: string | undefined;
69
- } | {
70
- kind: "query";
80
+ }[] | undefined;
81
+ queries?: {
71
82
  query: Record<string, any>;
72
83
  title?: string | undefined;
73
84
  explanation?: string | undefined;
74
- })[];
75
- datasetId?: string | undefined;
76
- sandboxId?: string | undefined;
77
- title?: string | undefined;
85
+ }[] | undefined;
78
86
  instructions?: string | undefined;
79
87
  mode?: "schema" | "auto" | undefined;
80
88
  output?: "object" | "rows" | undefined;
@@ -1,29 +1,33 @@
1
1
  import { tool } from "ai";
2
2
  import { z } from "zod";
3
3
  import { dataset } from "./dataset.js";
4
- const fileSourceSchema = z.object({
4
+ const fileResourceSchema = z.object({
5
5
  kind: z.literal("file"),
6
6
  fileId: z.string(),
7
7
  description: z.string().optional(),
8
8
  });
9
- const textSourceSchema = z.object({
9
+ const textResourceSchema = z.object({
10
10
  kind: z.literal("text"),
11
11
  text: z.string(),
12
12
  mimeType: z.string().optional(),
13
13
  name: z.string().optional(),
14
14
  description: z.string().optional(),
15
15
  });
16
- const datasetSourceSchema = z.object({
16
+ const datasetResourceSchema = z.object({
17
17
  kind: z.literal("dataset"),
18
18
  datasetId: z.string(),
19
19
  description: z.string().optional(),
20
20
  });
21
- const querySourceSchema = z.object({
21
+ const queryResourceSchema = z.object({
22
22
  kind: z.literal("query"),
23
23
  query: z.record(z.string(), z.any()),
24
24
  title: z.string().optional(),
25
25
  explanation: z.string().optional(),
26
26
  });
27
+ const contextInputSchema = z.union([
28
+ z.object({ id: z.string() }),
29
+ z.object({ key: z.string() }),
30
+ ]);
27
31
  const datasetSchemaSchema = z.object({
28
32
  title: z.string().optional(),
29
33
  description: z.string().optional(),
@@ -33,14 +37,11 @@ const materializeDatasetToolInputSchema = z.object({
33
37
  datasetId: z.string().optional(),
34
38
  sandboxId: z.string().optional(),
35
39
  title: z.string().optional(),
36
- sources: z
37
- .array(z.discriminatedUnion("kind", [
38
- fileSourceSchema,
39
- textSourceSchema,
40
- datasetSourceSchema,
41
- querySourceSchema,
42
- ]))
43
- .min(1),
40
+ context: contextInputSchema.optional(),
41
+ files: z.array(fileResourceSchema.omit({ kind: true })).optional(),
42
+ texts: z.array(textResourceSchema.omit({ kind: true })).optional(),
43
+ datasets: z.array(datasetResourceSchema.omit({ kind: true })).optional(),
44
+ queries: z.array(queryResourceSchema.omit({ kind: true })).optional(),
44
45
  instructions: z.string().optional(),
45
46
  mode: z.enum(["auto", "schema"]).optional(),
46
47
  output: z.enum(["rows", "object"]).optional(),
@@ -49,7 +50,7 @@ const materializeDatasetToolInputSchema = z.object({
49
50
  });
50
51
  export function createMaterializeDatasetTool(params) {
51
52
  return tool({
52
- description: "Materialize a dataset from declarative sources. Returns only the target datasetId. Query sources use the preconfigured runtime domain.",
53
+ description: "Materialize a dataset from declarative resources. Returns only the target datasetId. Query resources use the preconfigured runtime domain.",
53
54
  inputSchema: materializeDatasetToolInputSchema,
54
55
  execute: async (input) => {
55
56
  let builder = dataset(params.runtime);
@@ -59,23 +60,33 @@ export function createMaterializeDatasetTool(params) {
59
60
  if (input.sandboxId?.trim()) {
60
61
  builder = builder.sandbox({ sandboxId: input.sandboxId });
61
62
  }
62
- for (const source of input.sources) {
63
- if (source.kind === "file") {
64
- builder = builder.fromFile(source);
65
- continue;
66
- }
67
- if (source.kind === "text") {
68
- builder = builder.fromText(source);
69
- continue;
70
- }
71
- if (source.kind === "dataset") {
72
- builder = builder.fromDataset(source);
73
- continue;
74
- }
63
+ const materialCount = (input.files?.length ?? 0) +
64
+ (input.texts?.length ?? 0) +
65
+ (input.datasets?.length ?? 0) +
66
+ (input.queries?.length ?? 0);
67
+ if (input.context && materialCount > 0) {
68
+ throw new Error("dataset_context_resource_is_exclusive");
69
+ }
70
+ if (!input.context && materialCount === 0) {
71
+ throw new Error("dataset_context_or_material_required");
72
+ }
73
+ if (input.context) {
74
+ builder = builder.fromContext(input.context);
75
+ }
76
+ for (const resource of input.files ?? []) {
77
+ builder = builder.fromFile(resource);
78
+ }
79
+ for (const resource of input.texts ?? []) {
80
+ builder = builder.fromText(resource);
81
+ }
82
+ for (const resource of input.datasets ?? []) {
83
+ builder = builder.fromDataset(resource);
84
+ }
85
+ for (const resource of input.queries ?? []) {
75
86
  builder = builder.fromQuery(params.queryDomain, {
76
- query: source.query,
77
- title: source.title,
78
- explanation: source.explanation,
87
+ query: resource.query,
88
+ title: resource.title,
89
+ explanation: resource.explanation,
79
90
  });
80
91
  }
81
92
  if (input.output === "object") {