@ekairos/dataset 1.22.83-beta.development.0 → 1.22.85-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/agentMaterializers.d.ts +2 -2
- package/dist/builder/context.d.ts +7 -0
- package/dist/builder/context.js +192 -0
- package/dist/builder/instructions.d.ts +3 -3
- package/dist/builder/instructions.js +10 -10
- package/dist/builder/materialize.d.ts +10 -11
- package/dist/builder/materialize.js +116 -113
- package/dist/builder/materializeQuery.d.ts +3 -2
- package/dist/builder/materializeQuery.js +10 -19
- package/dist/builder/persistence.d.ts +4 -5
- package/dist/builder/persistence.js +20 -19
- package/dist/builder/types.d.ts +29 -24
- package/dist/completeDataset.steps.js +1 -1
- package/dist/dataset.d.ts +1 -1
- package/dist/dataset.js +42 -29
- package/dist/datasetFiles.d.ts +1 -1
- package/dist/datasetFiles.js +3 -3
- package/dist/file/file-dataset.agent.js +3 -4
- package/dist/file/prompts.js +12 -12
- package/dist/materializeDataset.tool.d.ts +34 -26
- package/dist/materializeDataset.tool.js +40 -29
- package/dist/schema.d.ts +12 -2
- package/dist/schema.js +6 -3
- package/dist/service.d.ts +1 -2
- package/dist/service.js +5 -2
- package/dist/transform/filepreview.d.ts +2 -2
- package/dist/transform/filepreview.js +3 -3
- package/dist/transform/prompts.js +25 -25
- package/dist/transform/transform-dataset.agent.d.ts +4 -4
- package/dist/transform/transform-dataset.agent.js +29 -30
- package/dist/transform/transform-dataset.steps.d.ts +7 -7
- package/dist/transform/transform-dataset.steps.js +20 -20
- package/dist/transform/transform-dataset.types.d.ts +13 -13
- package/dist/transform/transformDataset.js +4 -4
- package/package.json +4 -4
- /package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -0
- /package/dist/builder/{sourceRows.js → rows.js} +0 -0
package/dist/builder/types.d.ts
CHANGED
|
@@ -1,40 +1,44 @@
|
|
|
1
1
|
import type { InstaQLParams, ValidQuery } from "@instantdb/core";
|
|
2
2
|
import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
|
|
3
3
|
import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
|
|
4
|
-
import type { ContextReactor } from "@ekairos/events";
|
|
4
|
+
import type { ContextIdentifier, ContextReactor } from "@ekairos/events";
|
|
5
5
|
import { datasetDomain } from "../schema.js";
|
|
6
|
-
export type
|
|
6
|
+
export type DatasetQueryResourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
|
|
7
7
|
query: InstaQLParams<DomainInstantSchema<D>>;
|
|
8
8
|
title?: string;
|
|
9
9
|
explanation?: string;
|
|
10
10
|
domain: D;
|
|
11
11
|
};
|
|
12
|
-
export type
|
|
12
|
+
export type DatasetFileResourceInput = {
|
|
13
13
|
fileId: string;
|
|
14
14
|
description?: string;
|
|
15
15
|
filename?: string;
|
|
16
16
|
mediaType?: string;
|
|
17
17
|
};
|
|
18
|
-
export type
|
|
18
|
+
export type DatasetTextResourceInput = {
|
|
19
19
|
text: string;
|
|
20
20
|
mimeType?: string;
|
|
21
21
|
name?: string;
|
|
22
22
|
description?: string;
|
|
23
23
|
};
|
|
24
|
-
export type
|
|
24
|
+
export type DatasetExistingResourceInput = {
|
|
25
25
|
datasetId: string;
|
|
26
26
|
description?: string;
|
|
27
27
|
};
|
|
28
|
-
export type
|
|
28
|
+
export type DatasetContextResourceInput = ContextIdentifier;
|
|
29
|
+
export type DatasetFileResource = {
|
|
29
30
|
kind: "file";
|
|
30
|
-
} &
|
|
31
|
-
export type
|
|
31
|
+
} & DatasetFileResourceInput;
|
|
32
|
+
export type DatasetTextResource = {
|
|
32
33
|
kind: "text";
|
|
33
|
-
} &
|
|
34
|
-
export type
|
|
34
|
+
} & DatasetTextResourceInput;
|
|
35
|
+
export type DatasetExistingResource = {
|
|
35
36
|
kind: "dataset";
|
|
36
|
-
} &
|
|
37
|
-
export type
|
|
37
|
+
} & DatasetExistingResourceInput;
|
|
38
|
+
export type DatasetContextResource = {
|
|
39
|
+
kind: "context";
|
|
40
|
+
} & DatasetContextResourceInput;
|
|
41
|
+
export type DatasetResourceInput = DatasetFileResourceInput | DatasetTextResourceInput | DatasetExistingResourceInput | DatasetContextResourceInput | DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource;
|
|
38
42
|
export type DatasetSchemaInput = {
|
|
39
43
|
title?: string;
|
|
40
44
|
description?: string;
|
|
@@ -50,9 +54,9 @@ export type DatasetBuildOptions = {
|
|
|
50
54
|
datasetId?: string;
|
|
51
55
|
durable?: boolean;
|
|
52
56
|
};
|
|
53
|
-
export type
|
|
57
|
+
export type InternalDatasetResource = DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource | ({
|
|
54
58
|
kind: "query";
|
|
55
|
-
} &
|
|
59
|
+
} & DatasetQueryResourceInput);
|
|
56
60
|
export type DatasetReaderResult = {
|
|
57
61
|
rows: any[];
|
|
58
62
|
cursor: number;
|
|
@@ -78,8 +82,8 @@ export type DatasetRuntimeEnv = {
|
|
|
78
82
|
};
|
|
79
83
|
export type AnyDatasetRuntime = EkairosRuntime<any, any, any>;
|
|
80
84
|
export type DatasetRuntimeHandle<Runtime extends AnyDatasetRuntime> = RuntimeForDomain<Runtime, typeof datasetDomain>;
|
|
81
|
-
export type
|
|
82
|
-
export type
|
|
85
|
+
export type CompatibleQueryDomain<Runtime extends AnyDatasetRuntime, D extends DomainSchemaResult> = RuntimeForDomain<Runtime, D> extends never ? never : D;
|
|
86
|
+
export type DatasetQueryResourceOptions<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>> = {
|
|
83
87
|
query: Q;
|
|
84
88
|
title?: string;
|
|
85
89
|
explanation?: string;
|
|
@@ -87,9 +91,10 @@ export type DatasetQuerySourceOptions<D extends DomainSchemaResult, Q extends Va
|
|
|
87
91
|
export type DatasetBuilderState<Runtime extends AnyDatasetRuntime> = {
|
|
88
92
|
runtime: Runtime;
|
|
89
93
|
env: Runtime["env"] & DatasetRuntimeEnv;
|
|
90
|
-
|
|
94
|
+
resources: InternalDatasetResource[];
|
|
91
95
|
title?: string;
|
|
92
96
|
sandboxId?: string;
|
|
97
|
+
contextId?: string;
|
|
93
98
|
outputSchema?: DatasetSchemaInput;
|
|
94
99
|
output: DatasetOutput;
|
|
95
100
|
inferSchema: boolean;
|
|
@@ -103,8 +108,7 @@ export type MaterializeRowsParams = {
|
|
|
103
108
|
sandboxId?: string;
|
|
104
109
|
title?: string;
|
|
105
110
|
instructions?: string;
|
|
106
|
-
|
|
107
|
-
sourceKinds: string[];
|
|
111
|
+
contextId: string;
|
|
108
112
|
analysis?: any;
|
|
109
113
|
rows: any[];
|
|
110
114
|
schema?: DatasetSchemaInput;
|
|
@@ -113,11 +117,12 @@ export type MaterializeRowsParams = {
|
|
|
113
117
|
};
|
|
114
118
|
export type DatasetBuilder<Runtime extends AnyDatasetRuntime> = {
|
|
115
119
|
readonly datasetId: string;
|
|
116
|
-
fromFile(
|
|
117
|
-
fromText(
|
|
118
|
-
fromDataset(
|
|
119
|
-
|
|
120
|
-
|
|
120
|
+
fromFile(resource: DatasetFileResourceInput): DatasetBuilder<Runtime>;
|
|
121
|
+
fromText(resource: DatasetTextResourceInput): DatasetBuilder<Runtime>;
|
|
122
|
+
fromDataset(resource: DatasetExistingResourceInput): DatasetBuilder<Runtime>;
|
|
123
|
+
fromContext(context: DatasetContextResourceInput): DatasetBuilder<Runtime>;
|
|
124
|
+
from(...resources: DatasetResourceInput[]): DatasetBuilder<Runtime>;
|
|
125
|
+
fromQuery<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>>(domain: D & CompatibleQueryDomain<Runtime, D>, resource: DatasetQueryResourceOptions<D, Q>): DatasetBuilder<Runtime>;
|
|
121
126
|
title(title: string): DatasetBuilder<Runtime>;
|
|
122
127
|
sandbox(input: {
|
|
123
128
|
sandboxId: string;
|
|
@@ -313,7 +313,7 @@ function buildValidationFailureSummary(params) {
|
|
|
313
313
|
}
|
|
314
314
|
function buildRepairInstructions(summary) {
|
|
315
315
|
const instructions = [
|
|
316
|
-
"Rewrite output.jsonl using the schema as the
|
|
316
|
+
"Rewrite output.jsonl using the schema as the authority. Do not use input file headers as JSON keys unless they exactly match schema property names.",
|
|
317
317
|
"Each non-empty line must be a JSON object shaped as {\"type\":\"row\",\"data\":{...}}.",
|
|
318
318
|
"Populate every required top-level and nested required path from failureSummary.requiredPaths.",
|
|
319
319
|
"For enum fields, emit exactly one allowed literal from failureSummary.enumConstraints or failureSummary.enumFailures.",
|
package/dist/dataset.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import type { AnyDatasetRuntime, DatasetBuilder, DatasetBuilderOptions, DatasetRuntimeHandle } from "./builder/types.js";
|
|
2
|
-
export type { AnyDatasetRuntime,
|
|
2
|
+
export type { AnyDatasetRuntime, CompatibleQueryDomain, DatasetBuilder, DatasetBuilderOptions, DatasetBuildOptions, DatasetBuildResult, DatasetExistingResource, DatasetExistingResourceInput, DatasetFileResource, DatasetFileResourceInput, DatasetMode, DatasetOutput, DatasetQueryResourceInput, DatasetReader, DatasetReaderResult, DatasetRuntimeEnv, DatasetRuntimeHandle, DatasetSchemaInput, DatasetTextResource, DatasetResourceInput, DatasetTextResourceInput, } from "./builder/types.js";
|
|
3
3
|
export declare function dataset<Runtime extends AnyDatasetRuntime>(runtime: Runtime & DatasetRuntimeHandle<Runtime>, options?: DatasetBuilderOptions): DatasetBuilder<Runtime>;
|
package/dist/dataset.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { buildObjectOutputInstructions } from "./builder/instructions.js";
|
|
2
|
+
import { resolveDatasetResourceContext } from "./builder/context.js";
|
|
2
3
|
import { createDatasetId } from "./id.js";
|
|
3
|
-
import { completeDatasetStep, materializeDerivedDataset,
|
|
4
|
-
import {
|
|
4
|
+
import { completeDatasetStep, materializeDerivedDataset, materializeSingleFileLikeResource, } from "./builder/materialize.js";
|
|
5
|
+
import { materializeQueryResource } from "./builder/materializeQuery.js";
|
|
5
6
|
import { createDatasetBuildResult, finalizeBuildResult, } from "./builder/persistence.js";
|
|
6
7
|
export function dataset(runtime, options = {}) {
|
|
7
8
|
const datasetId = normalizeDatasetId(options.datasetId);
|
|
@@ -9,7 +10,7 @@ export function dataset(runtime, options = {}) {
|
|
|
9
10
|
const state = {
|
|
10
11
|
runtime: typedRuntime,
|
|
11
12
|
env: typedRuntime.env,
|
|
12
|
-
|
|
13
|
+
resources: [],
|
|
13
14
|
output: "rows",
|
|
14
15
|
inferSchema: false,
|
|
15
16
|
durable: options.durable,
|
|
@@ -17,38 +18,46 @@ export function dataset(runtime, options = {}) {
|
|
|
17
18
|
};
|
|
18
19
|
const api = {
|
|
19
20
|
datasetId,
|
|
20
|
-
fromFile(
|
|
21
|
-
state.
|
|
21
|
+
fromFile(resource) {
|
|
22
|
+
state.resources.push({ kind: "file", ...resource });
|
|
22
23
|
return api;
|
|
23
24
|
},
|
|
24
|
-
fromText(
|
|
25
|
-
state.
|
|
25
|
+
fromText(resource) {
|
|
26
|
+
state.resources.push({ kind: "text", ...resource });
|
|
26
27
|
return api;
|
|
27
28
|
},
|
|
28
|
-
fromDataset(
|
|
29
|
-
state.
|
|
29
|
+
fromDataset(resource) {
|
|
30
|
+
state.resources.push({ kind: "dataset", ...resource });
|
|
30
31
|
return api;
|
|
31
32
|
},
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
33
|
+
fromContext(context) {
|
|
34
|
+
state.resources.push({ kind: "context", ...context });
|
|
35
|
+
return api;
|
|
36
|
+
},
|
|
37
|
+
from(...resources) {
|
|
38
|
+
for (const resource of resources) {
|
|
39
|
+
if ("kind" in resource) {
|
|
40
|
+
state.resources.push(resource);
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
if ("fileId" in resource) {
|
|
44
|
+
state.resources.push({ kind: "file", ...resource });
|
|
36
45
|
continue;
|
|
37
46
|
}
|
|
38
|
-
if ("
|
|
39
|
-
state.
|
|
47
|
+
if ("datasetId" in resource) {
|
|
48
|
+
state.resources.push({ kind: "dataset", ...resource });
|
|
40
49
|
continue;
|
|
41
50
|
}
|
|
42
|
-
if ("
|
|
43
|
-
state.
|
|
51
|
+
if ("id" in resource || "key" in resource) {
|
|
52
|
+
state.resources.push({ kind: "context", ...resource });
|
|
44
53
|
continue;
|
|
45
54
|
}
|
|
46
|
-
state.
|
|
55
|
+
state.resources.push({ kind: "text", ...resource });
|
|
47
56
|
}
|
|
48
57
|
return api;
|
|
49
58
|
},
|
|
50
|
-
fromQuery(domain,
|
|
51
|
-
state.
|
|
59
|
+
fromQuery(domain, resource) {
|
|
60
|
+
state.resources.push({ kind: "query", domain, ...resource });
|
|
52
61
|
return api;
|
|
53
62
|
},
|
|
54
63
|
title(title) {
|
|
@@ -96,8 +105,8 @@ export function dataset(runtime, options = {}) {
|
|
|
96
105
|
return api;
|
|
97
106
|
},
|
|
98
107
|
async build(options) {
|
|
99
|
-
if (state.
|
|
100
|
-
throw new Error("
|
|
108
|
+
if (state.resources.length === 0) {
|
|
109
|
+
throw new Error("dataset_resources_required");
|
|
101
110
|
}
|
|
102
111
|
const targetDatasetId = options?.datasetId
|
|
103
112
|
? normalizeDatasetId(options.datasetId)
|
|
@@ -106,6 +115,9 @@ export function dataset(runtime, options = {}) {
|
|
|
106
115
|
...state,
|
|
107
116
|
durable: options?.durable ?? state.durable,
|
|
108
117
|
};
|
|
118
|
+
const context = await resolveDatasetResourceContext(typedRuntime, targetDatasetId, stateWithBuildOptions.resources);
|
|
119
|
+
stateWithBuildOptions.resources = context.resources;
|
|
120
|
+
stateWithBuildOptions.contextId = context.contextId;
|
|
109
121
|
const effectiveState = stateWithBuildOptions.output === "object"
|
|
110
122
|
? {
|
|
111
123
|
...stateWithBuildOptions,
|
|
@@ -113,25 +125,26 @@ export function dataset(runtime, options = {}) {
|
|
|
113
125
|
instructions: buildObjectOutputInstructions(stateWithBuildOptions.instructions),
|
|
114
126
|
}
|
|
115
127
|
: stateWithBuildOptions;
|
|
116
|
-
const
|
|
117
|
-
const
|
|
128
|
+
const onlyResource = effectiveState.resources[0];
|
|
129
|
+
const isSingleResource = effectiveState.resources.length === 1;
|
|
118
130
|
const hasInstructions = Boolean(String(effectiveState.instructions ?? "").trim());
|
|
119
|
-
if (
|
|
120
|
-
await
|
|
131
|
+
if (isSingleResource && onlyResource.kind === "query" && !hasInstructions) {
|
|
132
|
+
await materializeQueryResource(effectiveState.runtime, onlyResource, {
|
|
121
133
|
datasetId: targetDatasetId,
|
|
122
134
|
sandboxId: effectiveState.sandboxId,
|
|
123
135
|
schema: effectiveState.outputSchema,
|
|
124
|
-
title: effectiveState.title ??
|
|
136
|
+
title: effectiveState.title ?? onlyResource.title,
|
|
125
137
|
instructions: effectiveState.instructions,
|
|
126
138
|
first: effectiveState.first,
|
|
139
|
+
contextId: effectiveState.contextId ?? "",
|
|
127
140
|
});
|
|
128
141
|
return finalizeOutputResult(await finalizeBuildResult(effectiveState.runtime, targetDatasetId, effectiveState.first), effectiveState.output);
|
|
129
142
|
}
|
|
130
|
-
if (
|
|
143
|
+
if (isSingleResource && (onlyResource.kind === "file" || onlyResource.kind === "text")) {
|
|
131
144
|
if (!effectiveState.reactor) {
|
|
132
145
|
throw new Error("dataset_reactor_required");
|
|
133
146
|
}
|
|
134
|
-
await
|
|
147
|
+
await materializeSingleFileLikeResource(effectiveState, onlyResource, targetDatasetId);
|
|
135
148
|
const completed = await completeDatasetStep({
|
|
136
149
|
runtime: effectiveState.runtime,
|
|
137
150
|
datasetId: targetDatasetId,
|
package/dist/datasetFiles.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export declare const DATASET_OUTPUT_FILE_NAME = "output.jsonl";
|
|
2
2
|
export declare function getDatasetWorkdirBase(): string;
|
|
3
3
|
export declare function getDatasetWorkstation(datasetId: string): string;
|
|
4
|
-
export declare function
|
|
4
|
+
export declare function getDatasetResourcesDir(datasetId: string): string;
|
|
5
5
|
export declare function getDatasetScriptsDir(datasetId: string): string;
|
|
6
6
|
export declare function getDatasetArtifactsDir(datasetId: string): string;
|
|
7
7
|
export declare function getDatasetLogsDir(datasetId: string): string;
|
package/dist/datasetFiles.js
CHANGED
|
@@ -9,8 +9,8 @@ export function getDatasetWorkdirBase() {
|
|
|
9
9
|
export function getDatasetWorkstation(datasetId) {
|
|
10
10
|
return `${getDatasetWorkdirBase()}/${datasetId}`;
|
|
11
11
|
}
|
|
12
|
-
export function
|
|
13
|
-
return `${getDatasetWorkstation(datasetId)}/
|
|
12
|
+
export function getDatasetResourcesDir(datasetId) {
|
|
13
|
+
return `${getDatasetWorkstation(datasetId)}/resources`;
|
|
14
14
|
}
|
|
15
15
|
export function getDatasetScriptsDir(datasetId) {
|
|
16
16
|
return `${getDatasetWorkstation(datasetId)}/scripts`;
|
|
@@ -24,7 +24,7 @@ export function getDatasetLogsDir(datasetId) {
|
|
|
24
24
|
export function getDatasetStandardDirs(datasetId) {
|
|
25
25
|
return [
|
|
26
26
|
getDatasetWorkstation(datasetId),
|
|
27
|
-
|
|
27
|
+
getDatasetResourcesDir(datasetId),
|
|
28
28
|
getDatasetScriptsDir(datasetId),
|
|
29
29
|
getDatasetArtifactsDir(datasetId),
|
|
30
30
|
getDatasetLogsDir(datasetId),
|
|
@@ -210,7 +210,7 @@ export function createFileParseContext(fileId, opts) {
|
|
|
210
210
|
{
|
|
211
211
|
type: "file",
|
|
212
212
|
fileId,
|
|
213
|
-
filename: opts?.filename ?? "
|
|
213
|
+
filename: opts?.filename ?? "resource-file",
|
|
214
214
|
mediaType: opts?.mediaType ?? "application/octet-stream",
|
|
215
215
|
},
|
|
216
216
|
],
|
|
@@ -218,14 +218,13 @@ export function createFileParseContext(fileId, opts) {
|
|
|
218
218
|
};
|
|
219
219
|
params.sourceEventId = triggerEvent.id;
|
|
220
220
|
params.sourcePartIndex = 1;
|
|
221
|
-
params.filename = opts?.filename ?? "
|
|
221
|
+
params.filename = opts?.filename ?? "resource-file";
|
|
222
222
|
params.mediaType = opts?.mediaType ?? "application/octet-stream";
|
|
223
223
|
const shell = await context.react(triggerEvent, {
|
|
224
224
|
runtime: runtime,
|
|
225
225
|
context: { key: `dataset:${datasetId}` },
|
|
226
226
|
durable: options.durable ?? false,
|
|
227
227
|
options: {
|
|
228
|
-
silent: true,
|
|
229
228
|
preventClose: true,
|
|
230
229
|
sendFinish: false,
|
|
231
230
|
maxIterations: 20,
|
|
@@ -237,7 +236,7 @@ export function createFileParseContext(fileId, opts) {
|
|
|
237
236
|
fileId,
|
|
238
237
|
sourceEventId: triggerEvent.id,
|
|
239
238
|
sourcePartIndex: 1,
|
|
240
|
-
filename: opts?.filename ?? "
|
|
239
|
+
filename: opts?.filename ?? "resource-file",
|
|
241
240
|
mediaType: opts?.mediaType ?? "application/octet-stream",
|
|
242
241
|
instructions: opts?.instructions ?? "",
|
|
243
242
|
sandboxId: opts?.sandboxId ?? "",
|
package/dist/file/prompts.js
CHANGED
|
@@ -11,13 +11,13 @@ function buildRole() {
|
|
|
11
11
|
function buildGoal() {
|
|
12
12
|
let xml = create()
|
|
13
13
|
.ele("Goal")
|
|
14
|
-
.txt("Convert the
|
|
14
|
+
.txt("Convert the input file into a validated JSONL dataset (output.jsonl) where each line is a JSON object conforming to a generated schema. The schema describes ONE data record structure. Extract ONLY data records; exclude any header sections, metadata, or summary information from the file.")
|
|
15
15
|
.up();
|
|
16
16
|
return xml.end({ prettyPrint: true, headless: true });
|
|
17
17
|
}
|
|
18
|
-
function
|
|
18
|
+
function buildResourceInfo(context) {
|
|
19
19
|
let xml = create()
|
|
20
|
-
.ele("
|
|
20
|
+
.ele("FileResource")
|
|
21
21
|
.ele("Type").txt("file").up()
|
|
22
22
|
.ele("FileId").txt(context.fileId).up()
|
|
23
23
|
.ele("DatasetId").txt(context.datasetId).up()
|
|
@@ -90,7 +90,7 @@ function buildErrorsSection(errors) {
|
|
|
90
90
|
}
|
|
91
91
|
let xml = create()
|
|
92
92
|
.ele("PreviousErrors")
|
|
93
|
-
.ele("Instruction").txt("Treat these as repair feedback from the previous validation attempt. Rewrite output.jsonl from the schema contract; do not patch
|
|
93
|
+
.ele("Instruction").txt("Treat these as repair feedback from the previous validation attempt. Rewrite output.jsonl from the schema contract; do not patch input column names into schema keys piecemeal.").up();
|
|
94
94
|
for (const error of errors) {
|
|
95
95
|
xml = xml.ele("Error").txt(error).up();
|
|
96
96
|
}
|
|
@@ -100,8 +100,8 @@ function buildErrorsSection(errors) {
|
|
|
100
100
|
function buildContextSection(context) {
|
|
101
101
|
let xml = create()
|
|
102
102
|
.ele("Context");
|
|
103
|
-
const
|
|
104
|
-
xml = xml.import(
|
|
103
|
+
const resourceXml = buildResourceInfo(context);
|
|
104
|
+
xml = xml.import(resourceXml.first());
|
|
105
105
|
if (context.filePreview) {
|
|
106
106
|
const previewXml = buildFilePreviewSection(context.filePreview);
|
|
107
107
|
xml = xml.import(previewXml.first());
|
|
@@ -195,9 +195,9 @@ function buildSchemaSection(context) {
|
|
|
195
195
|
xml = xml
|
|
196
196
|
.ele("SchemaContract")
|
|
197
197
|
.ele("Purpose").txt("Compact output contract derived from JSON Schema. Use this before writing output.jsonl.").up()
|
|
198
|
-
.ele("Rule").txt("Use only schema property keys in data objects.
|
|
198
|
+
.ele("Rule").txt("Use only schema property keys in data objects. Input headers are input labels, not output keys.").up()
|
|
199
199
|
.ele("Rule").txt("Required paths are required everywhere, including nested objects and array items.").up()
|
|
200
|
-
.ele("Rule").txt("Enum fields must use exactly one of the listed literal values. Normalize
|
|
200
|
+
.ele("Rule").txt("Enum fields must use exactly one of the listed literal values. Normalize input labels to the closest valid enum literal; never emit a value outside the enum.").up();
|
|
201
201
|
xml = appendLimitedList(xml, "RequiredPaths", "Path", contract.requiredPaths, 120);
|
|
202
202
|
xml = appendLimitedList(xml, "PropertyPaths", "Path", contract.propertyPaths, 160);
|
|
203
203
|
let enumsXml = xml.ele("EnumConstraints");
|
|
@@ -245,10 +245,10 @@ function buildInstructions(context) {
|
|
|
245
245
|
.ele("Requirements")
|
|
246
246
|
.ele("Requirement").txt("Every output row must conform exactly to the provided schema").up()
|
|
247
247
|
.ele("Requirement").txt("Every data object MUST use the exact property names from the provided JSON Schema required/properties keys").up()
|
|
248
|
-
.ele("Requirement").txt("Build a schema-first mapping from
|
|
248
|
+
.ele("Requirement").txt("Build a schema-first mapping from input columns to schema fields before writing output.jsonl. Do not use raw input headers as JSON keys unless they are exactly schema keys").up()
|
|
249
249
|
.ele("Requirement").txt("For nested required fields, populate the required child keys inside each nested object or array item; top-level validity is not enough").up()
|
|
250
250
|
.ele("Requirement").txt("For enum fields, emit exactly one allowed enum literal from SchemaContract; normalize labels or abbreviations into allowed literals").up()
|
|
251
|
-
.ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the
|
|
251
|
+
.ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the input language").up()
|
|
252
252
|
.ele("Requirement").txt("Do not call generateSchema when a schema is already provided").up()
|
|
253
253
|
.up()
|
|
254
254
|
.up();
|
|
@@ -286,8 +286,8 @@ function buildInstructions(context) {
|
|
|
286
286
|
.up()
|
|
287
287
|
.ele("Rules")
|
|
288
288
|
.ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
|
|
289
|
-
.ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the
|
|
290
|
-
.ele("Rule").txt("Original/
|
|
289
|
+
.ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the input language").up()
|
|
290
|
+
.ele("Rule").txt("Original/input language applies to extracted values only, not to JSON object keys").up()
|
|
291
291
|
.ele("Rule").txt("Datasets contain ONLY data records; exclude all header sections and file metadata").up()
|
|
292
292
|
.ele("Rule").txt("JSONL format: each line = separate JSON object representing one data record").up()
|
|
293
293
|
.ele("Rule").txt("FilePreview shows raw file content - use Script to understand data extraction").up()
|
|
@@ -7,26 +7,30 @@ declare const materializeDatasetToolInputSchema: z.ZodObject<{
|
|
|
7
7
|
datasetId: z.ZodOptional<z.ZodString>;
|
|
8
8
|
sandboxId: z.ZodOptional<z.ZodString>;
|
|
9
9
|
title: z.ZodOptional<z.ZodString>;
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
fileId: z.ZodString;
|
|
13
|
-
description: z.ZodOptional<z.ZodString>;
|
|
10
|
+
context: z.ZodOptional<z.ZodUnion<readonly [z.ZodObject<{
|
|
11
|
+
id: z.ZodString;
|
|
14
12
|
}, z.core.$strip>, z.ZodObject<{
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
13
|
+
key: z.ZodString;
|
|
14
|
+
}, z.core.$strip>]>>;
|
|
15
|
+
files: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
16
|
+
description: z.ZodOptional<z.ZodString>;
|
|
17
|
+
fileId: z.ZodString;
|
|
18
|
+
}, z.core.$strip>>>;
|
|
19
|
+
texts: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
18
20
|
name: z.ZodOptional<z.ZodString>;
|
|
21
|
+
text: z.ZodString;
|
|
19
22
|
description: z.ZodOptional<z.ZodString>;
|
|
20
|
-
|
|
21
|
-
|
|
23
|
+
mimeType: z.ZodOptional<z.ZodString>;
|
|
24
|
+
}, z.core.$strip>>>;
|
|
25
|
+
datasets: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
22
26
|
datasetId: z.ZodString;
|
|
23
27
|
description: z.ZodOptional<z.ZodString>;
|
|
24
|
-
}, z.core.$strip
|
|
25
|
-
|
|
26
|
-
query: z.ZodRecord<z.ZodString, z.ZodAny>;
|
|
28
|
+
}, z.core.$strip>>>;
|
|
29
|
+
queries: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
27
30
|
title: z.ZodOptional<z.ZodString>;
|
|
31
|
+
query: z.ZodRecord<z.ZodString, z.ZodAny>;
|
|
28
32
|
explanation: z.ZodOptional<z.ZodString>;
|
|
29
|
-
}, z.core.$strip
|
|
33
|
+
}, z.core.$strip>>>;
|
|
30
34
|
instructions: z.ZodOptional<z.ZodString>;
|
|
31
35
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
32
36
|
schema: "schema";
|
|
@@ -52,29 +56,33 @@ export declare function createMaterializeDatasetTool<Runtime extends AnyMaterial
|
|
|
52
56
|
queryDomain: QueryDomain & CompatibleToolQueryDomain<Runtime, QueryDomain>;
|
|
53
57
|
toolName?: string;
|
|
54
58
|
}): import("ai").Tool<{
|
|
55
|
-
|
|
56
|
-
|
|
59
|
+
datasetId?: string | undefined;
|
|
60
|
+
sandboxId?: string | undefined;
|
|
61
|
+
title?: string | undefined;
|
|
62
|
+
context?: {
|
|
63
|
+
id: string;
|
|
64
|
+
} | {
|
|
65
|
+
key: string;
|
|
66
|
+
} | undefined;
|
|
67
|
+
files?: {
|
|
57
68
|
fileId: string;
|
|
58
69
|
description?: string | undefined;
|
|
59
|
-
} |
|
|
60
|
-
|
|
70
|
+
}[] | undefined;
|
|
71
|
+
texts?: {
|
|
61
72
|
text: string;
|
|
62
|
-
mimeType?: string | undefined;
|
|
63
73
|
name?: string | undefined;
|
|
64
74
|
description?: string | undefined;
|
|
65
|
-
|
|
66
|
-
|
|
75
|
+
mimeType?: string | undefined;
|
|
76
|
+
}[] | undefined;
|
|
77
|
+
datasets?: {
|
|
67
78
|
datasetId: string;
|
|
68
79
|
description?: string | undefined;
|
|
69
|
-
} |
|
|
70
|
-
|
|
80
|
+
}[] | undefined;
|
|
81
|
+
queries?: {
|
|
71
82
|
query: Record<string, any>;
|
|
72
83
|
title?: string | undefined;
|
|
73
84
|
explanation?: string | undefined;
|
|
74
|
-
}
|
|
75
|
-
datasetId?: string | undefined;
|
|
76
|
-
sandboxId?: string | undefined;
|
|
77
|
-
title?: string | undefined;
|
|
85
|
+
}[] | undefined;
|
|
78
86
|
instructions?: string | undefined;
|
|
79
87
|
mode?: "schema" | "auto" | undefined;
|
|
80
88
|
output?: "object" | "rows" | undefined;
|
|
@@ -1,29 +1,33 @@
|
|
|
1
1
|
import { tool } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { dataset } from "./dataset.js";
|
|
4
|
-
const
|
|
4
|
+
const fileResourceSchema = z.object({
|
|
5
5
|
kind: z.literal("file"),
|
|
6
6
|
fileId: z.string(),
|
|
7
7
|
description: z.string().optional(),
|
|
8
8
|
});
|
|
9
|
-
const
|
|
9
|
+
const textResourceSchema = z.object({
|
|
10
10
|
kind: z.literal("text"),
|
|
11
11
|
text: z.string(),
|
|
12
12
|
mimeType: z.string().optional(),
|
|
13
13
|
name: z.string().optional(),
|
|
14
14
|
description: z.string().optional(),
|
|
15
15
|
});
|
|
16
|
-
const
|
|
16
|
+
const datasetResourceSchema = z.object({
|
|
17
17
|
kind: z.literal("dataset"),
|
|
18
18
|
datasetId: z.string(),
|
|
19
19
|
description: z.string().optional(),
|
|
20
20
|
});
|
|
21
|
-
const
|
|
21
|
+
const queryResourceSchema = z.object({
|
|
22
22
|
kind: z.literal("query"),
|
|
23
23
|
query: z.record(z.string(), z.any()),
|
|
24
24
|
title: z.string().optional(),
|
|
25
25
|
explanation: z.string().optional(),
|
|
26
26
|
});
|
|
27
|
+
const contextInputSchema = z.union([
|
|
28
|
+
z.object({ id: z.string() }),
|
|
29
|
+
z.object({ key: z.string() }),
|
|
30
|
+
]);
|
|
27
31
|
const datasetSchemaSchema = z.object({
|
|
28
32
|
title: z.string().optional(),
|
|
29
33
|
description: z.string().optional(),
|
|
@@ -33,14 +37,11 @@ const materializeDatasetToolInputSchema = z.object({
|
|
|
33
37
|
datasetId: z.string().optional(),
|
|
34
38
|
sandboxId: z.string().optional(),
|
|
35
39
|
title: z.string().optional(),
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
querySourceSchema,
|
|
42
|
-
]))
|
|
43
|
-
.min(1),
|
|
40
|
+
context: contextInputSchema.optional(),
|
|
41
|
+
files: z.array(fileResourceSchema.omit({ kind: true })).optional(),
|
|
42
|
+
texts: z.array(textResourceSchema.omit({ kind: true })).optional(),
|
|
43
|
+
datasets: z.array(datasetResourceSchema.omit({ kind: true })).optional(),
|
|
44
|
+
queries: z.array(queryResourceSchema.omit({ kind: true })).optional(),
|
|
44
45
|
instructions: z.string().optional(),
|
|
45
46
|
mode: z.enum(["auto", "schema"]).optional(),
|
|
46
47
|
output: z.enum(["rows", "object"]).optional(),
|
|
@@ -49,7 +50,7 @@ const materializeDatasetToolInputSchema = z.object({
|
|
|
49
50
|
});
|
|
50
51
|
export function createMaterializeDatasetTool(params) {
|
|
51
52
|
return tool({
|
|
52
|
-
description: "Materialize a dataset from declarative
|
|
53
|
+
description: "Materialize a dataset from declarative resources. Returns only the target datasetId. Query resources use the preconfigured runtime domain.",
|
|
53
54
|
inputSchema: materializeDatasetToolInputSchema,
|
|
54
55
|
execute: async (input) => {
|
|
55
56
|
let builder = dataset(params.runtime);
|
|
@@ -59,23 +60,33 @@ export function createMaterializeDatasetTool(params) {
|
|
|
59
60
|
if (input.sandboxId?.trim()) {
|
|
60
61
|
builder = builder.sandbox({ sandboxId: input.sandboxId });
|
|
61
62
|
}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
63
|
+
const materialCount = (input.files?.length ?? 0) +
|
|
64
|
+
(input.texts?.length ?? 0) +
|
|
65
|
+
(input.datasets?.length ?? 0) +
|
|
66
|
+
(input.queries?.length ?? 0);
|
|
67
|
+
if (input.context && materialCount > 0) {
|
|
68
|
+
throw new Error("dataset_context_resource_is_exclusive");
|
|
69
|
+
}
|
|
70
|
+
if (!input.context && materialCount === 0) {
|
|
71
|
+
throw new Error("dataset_context_or_material_required");
|
|
72
|
+
}
|
|
73
|
+
if (input.context) {
|
|
74
|
+
builder = builder.fromContext(input.context);
|
|
75
|
+
}
|
|
76
|
+
for (const resource of input.files ?? []) {
|
|
77
|
+
builder = builder.fromFile(resource);
|
|
78
|
+
}
|
|
79
|
+
for (const resource of input.texts ?? []) {
|
|
80
|
+
builder = builder.fromText(resource);
|
|
81
|
+
}
|
|
82
|
+
for (const resource of input.datasets ?? []) {
|
|
83
|
+
builder = builder.fromDataset(resource);
|
|
84
|
+
}
|
|
85
|
+
for (const resource of input.queries ?? []) {
|
|
75
86
|
builder = builder.fromQuery(params.queryDomain, {
|
|
76
|
-
query:
|
|
77
|
-
title:
|
|
78
|
-
explanation:
|
|
87
|
+
query: resource.query,
|
|
88
|
+
title: resource.title,
|
|
89
|
+
explanation: resource.explanation,
|
|
79
90
|
});
|
|
80
91
|
}
|
|
81
92
|
if (input.output === "object") {
|