@ekairos/dataset 1.22.82-beta.development.0 → 1.22.84-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/agentMaterializers.d.ts +2 -2
- package/dist/builder/context.d.ts +7 -0
- package/dist/builder/context.js +192 -0
- package/dist/builder/instructions.d.ts +3 -3
- package/dist/builder/instructions.js +10 -10
- package/dist/builder/materialize.d.ts +12 -11
- package/dist/builder/materialize.js +122 -121
- package/dist/builder/materializeQuery.d.ts +3 -2
- package/dist/builder/materializeQuery.js +10 -19
- package/dist/builder/persistence.d.ts +4 -5
- package/dist/builder/persistence.js +20 -19
- package/dist/builder/types.d.ts +31 -24
- package/dist/completeDataset.steps.d.ts +9 -8
- package/dist/completeDataset.steps.js +18 -11
- package/dist/completeDataset.tool.d.ts +9 -8
- package/dist/completeDataset.tool.js +2 -1
- package/dist/contextWorkspace.d.ts +72 -0
- package/dist/contextWorkspace.js +218 -0
- package/dist/dataset.d.ts +1 -1
- package/dist/dataset.js +42 -29
- package/dist/datasetFiles.d.ts +1 -1
- package/dist/datasetFiles.js +3 -3
- package/dist/executeCommand.tool.d.ts +1 -43
- package/dist/executeCommand.tool.js +10 -3
- package/dist/file/file-dataset.agent.d.ts +2 -0
- package/dist/file/file-dataset.agent.js +51 -16
- package/dist/file/file-dataset.steps.d.ts +6 -0
- package/dist/file/file-dataset.steps.js +18 -21
- package/dist/file/file-dataset.types.d.ts +10 -0
- package/dist/file/prompts.js +16 -14
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/materializeDataset.tool.d.ts +34 -26
- package/dist/materializeDataset.tool.js +40 -29
- package/dist/schema.d.ts +12 -2
- package/dist/schema.js +6 -3
- package/dist/service.d.ts +2 -2
- package/dist/service.js +6 -3
- package/dist/transform/filepreview.d.ts +2 -2
- package/dist/transform/filepreview.js +3 -3
- package/dist/transform/prompts.js +25 -25
- package/dist/transform/transform-dataset.agent.d.ts +4 -4
- package/dist/transform/transform-dataset.agent.js +29 -30
- package/dist/transform/transform-dataset.steps.d.ts +7 -7
- package/dist/transform/transform-dataset.steps.js +20 -20
- package/dist/transform/transform-dataset.types.d.ts +13 -13
- package/dist/transform/transformDataset.js +4 -4
- package/package.json +4 -4
- /package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -0
- /package/dist/builder/{sourceRows.js → rows.js} +0 -0
package/dist/builder/types.d.ts
CHANGED
|
@@ -1,38 +1,44 @@
|
|
|
1
1
|
import type { InstaQLParams, ValidQuery } from "@instantdb/core";
|
|
2
2
|
import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
|
|
3
3
|
import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
|
|
4
|
-
import type { ContextReactor } from "@ekairos/events";
|
|
4
|
+
import type { ContextIdentifier, ContextReactor } from "@ekairos/events";
|
|
5
5
|
import { datasetDomain } from "../schema.js";
|
|
6
|
-
export type
|
|
6
|
+
export type DatasetQueryResourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
|
|
7
7
|
query: InstaQLParams<DomainInstantSchema<D>>;
|
|
8
8
|
title?: string;
|
|
9
9
|
explanation?: string;
|
|
10
10
|
domain: D;
|
|
11
11
|
};
|
|
12
|
-
export type
|
|
12
|
+
export type DatasetFileResourceInput = {
|
|
13
13
|
fileId: string;
|
|
14
14
|
description?: string;
|
|
15
|
+
filename?: string;
|
|
16
|
+
mediaType?: string;
|
|
15
17
|
};
|
|
16
|
-
export type
|
|
18
|
+
export type DatasetTextResourceInput = {
|
|
17
19
|
text: string;
|
|
18
20
|
mimeType?: string;
|
|
19
21
|
name?: string;
|
|
20
22
|
description?: string;
|
|
21
23
|
};
|
|
22
|
-
export type
|
|
24
|
+
export type DatasetExistingResourceInput = {
|
|
23
25
|
datasetId: string;
|
|
24
26
|
description?: string;
|
|
25
27
|
};
|
|
26
|
-
export type
|
|
28
|
+
export type DatasetContextResourceInput = ContextIdentifier;
|
|
29
|
+
export type DatasetFileResource = {
|
|
27
30
|
kind: "file";
|
|
28
|
-
} &
|
|
29
|
-
export type
|
|
31
|
+
} & DatasetFileResourceInput;
|
|
32
|
+
export type DatasetTextResource = {
|
|
30
33
|
kind: "text";
|
|
31
|
-
} &
|
|
32
|
-
export type
|
|
34
|
+
} & DatasetTextResourceInput;
|
|
35
|
+
export type DatasetExistingResource = {
|
|
33
36
|
kind: "dataset";
|
|
34
|
-
} &
|
|
35
|
-
export type
|
|
37
|
+
} & DatasetExistingResourceInput;
|
|
38
|
+
export type DatasetContextResource = {
|
|
39
|
+
kind: "context";
|
|
40
|
+
} & DatasetContextResourceInput;
|
|
41
|
+
export type DatasetResourceInput = DatasetFileResourceInput | DatasetTextResourceInput | DatasetExistingResourceInput | DatasetContextResourceInput | DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource;
|
|
36
42
|
export type DatasetSchemaInput = {
|
|
37
43
|
title?: string;
|
|
38
44
|
description?: string;
|
|
@@ -48,9 +54,9 @@ export type DatasetBuildOptions = {
|
|
|
48
54
|
datasetId?: string;
|
|
49
55
|
durable?: boolean;
|
|
50
56
|
};
|
|
51
|
-
export type
|
|
57
|
+
export type InternalDatasetResource = DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource | ({
|
|
52
58
|
kind: "query";
|
|
53
|
-
} &
|
|
59
|
+
} & DatasetQueryResourceInput);
|
|
54
60
|
export type DatasetReaderResult = {
|
|
55
61
|
rows: any[];
|
|
56
62
|
cursor: number;
|
|
@@ -76,8 +82,8 @@ export type DatasetRuntimeEnv = {
|
|
|
76
82
|
};
|
|
77
83
|
export type AnyDatasetRuntime = EkairosRuntime<any, any, any>;
|
|
78
84
|
export type DatasetRuntimeHandle<Runtime extends AnyDatasetRuntime> = RuntimeForDomain<Runtime, typeof datasetDomain>;
|
|
79
|
-
export type
|
|
80
|
-
export type
|
|
85
|
+
export type CompatibleQueryDomain<Runtime extends AnyDatasetRuntime, D extends DomainSchemaResult> = RuntimeForDomain<Runtime, D> extends never ? never : D;
|
|
86
|
+
export type DatasetQueryResourceOptions<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>> = {
|
|
81
87
|
query: Q;
|
|
82
88
|
title?: string;
|
|
83
89
|
explanation?: string;
|
|
@@ -85,9 +91,10 @@ export type DatasetQuerySourceOptions<D extends DomainSchemaResult, Q extends Va
|
|
|
85
91
|
export type DatasetBuilderState<Runtime extends AnyDatasetRuntime> = {
|
|
86
92
|
runtime: Runtime;
|
|
87
93
|
env: Runtime["env"] & DatasetRuntimeEnv;
|
|
88
|
-
|
|
94
|
+
resources: InternalDatasetResource[];
|
|
89
95
|
title?: string;
|
|
90
96
|
sandboxId?: string;
|
|
97
|
+
contextId?: string;
|
|
91
98
|
outputSchema?: DatasetSchemaInput;
|
|
92
99
|
output: DatasetOutput;
|
|
93
100
|
inferSchema: boolean;
|
|
@@ -101,8 +108,7 @@ export type MaterializeRowsParams = {
|
|
|
101
108
|
sandboxId?: string;
|
|
102
109
|
title?: string;
|
|
103
110
|
instructions?: string;
|
|
104
|
-
|
|
105
|
-
sourceKinds: string[];
|
|
111
|
+
contextId: string;
|
|
106
112
|
analysis?: any;
|
|
107
113
|
rows: any[];
|
|
108
114
|
schema?: DatasetSchemaInput;
|
|
@@ -111,11 +117,12 @@ export type MaterializeRowsParams = {
|
|
|
111
117
|
};
|
|
112
118
|
export type DatasetBuilder<Runtime extends AnyDatasetRuntime> = {
|
|
113
119
|
readonly datasetId: string;
|
|
114
|
-
fromFile(
|
|
115
|
-
fromText(
|
|
116
|
-
fromDataset(
|
|
117
|
-
|
|
118
|
-
|
|
120
|
+
fromFile(resource: DatasetFileResourceInput): DatasetBuilder<Runtime>;
|
|
121
|
+
fromText(resource: DatasetTextResourceInput): DatasetBuilder<Runtime>;
|
|
122
|
+
fromDataset(resource: DatasetExistingResourceInput): DatasetBuilder<Runtime>;
|
|
123
|
+
fromContext(context: DatasetContextResourceInput): DatasetBuilder<Runtime>;
|
|
124
|
+
from(...resources: DatasetResourceInput[]): DatasetBuilder<Runtime>;
|
|
125
|
+
fromQuery<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>>(domain: D & CompatibleQueryDomain<Runtime, D>, resource: DatasetQueryResourceOptions<D, Q>): DatasetBuilder<Runtime>;
|
|
119
126
|
title(title: string): DatasetBuilder<Runtime>;
|
|
120
127
|
sandbox(input: {
|
|
121
128
|
sandboxId: string;
|
|
@@ -3,8 +3,9 @@ export interface PersistDatasetStepParams {
|
|
|
3
3
|
sandboxId: string;
|
|
4
4
|
runtime: any;
|
|
5
5
|
summary?: string;
|
|
6
|
+
outputPath?: string;
|
|
6
7
|
}
|
|
7
|
-
export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summary }: PersistDatasetStepParams): Promise<{
|
|
8
|
+
export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summary, outputPath }: PersistDatasetStepParams): Promise<{
|
|
8
9
|
success: boolean;
|
|
9
10
|
validation?: RowValidationEntry[];
|
|
10
11
|
validationTruncated?: number;
|
|
@@ -23,18 +24,18 @@ export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summ
|
|
|
23
24
|
validation: RowValidationEntry[] | undefined;
|
|
24
25
|
error: string;
|
|
25
26
|
message: string;
|
|
26
|
-
|
|
27
|
-
|
|
27
|
+
records?: undefined;
|
|
28
|
+
summary?: undefined;
|
|
28
29
|
} | {
|
|
29
30
|
success: boolean;
|
|
30
31
|
status: string;
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
message: string;
|
|
32
|
+
records: number;
|
|
33
|
+
summary: string;
|
|
34
|
+
validRows?: undefined;
|
|
35
|
+
rowRecordCount?: undefined;
|
|
36
36
|
validation?: undefined;
|
|
37
37
|
error?: undefined;
|
|
38
|
+
message?: undefined;
|
|
38
39
|
}>;
|
|
39
40
|
type RowValidationEntry = {
|
|
40
41
|
index: number;
|
|
@@ -13,14 +13,15 @@ function getAjv() {
|
|
|
13
13
|
}
|
|
14
14
|
return ajvInstance;
|
|
15
15
|
}
|
|
16
|
-
export async function persistDatasetStep({ runtime, datasetId, sandboxId, summary }) {
|
|
16
|
+
export async function persistDatasetStep({ runtime, datasetId, sandboxId, summary, outputPath }) {
|
|
17
17
|
"use step";
|
|
18
|
-
const
|
|
18
|
+
const resolvedOutputPath = outputPath ?? getDatasetOutputPath(datasetId);
|
|
19
|
+
const storagePath = resolveExecutionStoragePath(resolvedOutputPath, datasetId);
|
|
19
20
|
if (summary) {
|
|
20
21
|
console.log(`[Dataset ${datasetId}] Persisting completed dataset: ${summary}`);
|
|
21
22
|
}
|
|
22
23
|
try {
|
|
23
|
-
await ensureFileExists(runtime, sandboxId,
|
|
24
|
+
await ensureFileExists(runtime, sandboxId, resolvedOutputPath);
|
|
24
25
|
}
|
|
25
26
|
catch (error) {
|
|
26
27
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -85,7 +86,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
85
86
|
const validationResult = await validateJsonlRows({
|
|
86
87
|
runtime,
|
|
87
88
|
sandboxId,
|
|
88
|
-
outputPath,
|
|
89
|
+
outputPath: resolvedOutputPath,
|
|
89
90
|
validator,
|
|
90
91
|
schema: schemaJson,
|
|
91
92
|
datasetId,
|
|
@@ -96,7 +97,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
96
97
|
const totalValidRows = validationResult.validRowCount ?? 0;
|
|
97
98
|
const rowRecordCount = validationResult.rowRecordCount ?? totalValidRows;
|
|
98
99
|
console.log(`[Dataset ${datasetId}] Reading file content for upload`);
|
|
99
|
-
const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path:
|
|
100
|
+
const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path: resolvedOutputPath });
|
|
100
101
|
if (!fileRead.contentBase64) {
|
|
101
102
|
console.error(`[Dataset ${datasetId}] Empty file content`);
|
|
102
103
|
return {
|
|
@@ -113,6 +114,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
113
114
|
const uploadResult = await service.uploadDatasetOutputFile({
|
|
114
115
|
datasetId,
|
|
115
116
|
fileBuffer: Buffer.from(fileRead.contentBase64, "base64"),
|
|
117
|
+
storagePath,
|
|
116
118
|
});
|
|
117
119
|
if (!uploadResult.ok) {
|
|
118
120
|
console.error(`[Dataset ${datasetId}] File upload failed: ${uploadResult.error}`);
|
|
@@ -150,13 +152,18 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
|
|
|
150
152
|
return {
|
|
151
153
|
success: true,
|
|
152
154
|
status: "completed",
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
fileId: uploadResult.data.fileId,
|
|
156
|
-
storagePath: uploadResult.data.storagePath,
|
|
157
|
-
message: "Dataset creation completed and uploaded to storage",
|
|
155
|
+
records: totalValidRows,
|
|
156
|
+
summary: summary ?? `Dataset completed with ${totalValidRows} records.`,
|
|
158
157
|
};
|
|
159
158
|
}
|
|
159
|
+
function resolveExecutionStoragePath(outputPath, datasetId) {
|
|
160
|
+
const normalized = String(outputPath ?? "").replace(/\\/g, "/");
|
|
161
|
+
const marker = "/tmp/ekairos/contexts/";
|
|
162
|
+
if (normalized.startsWith(marker)) {
|
|
163
|
+
return normalized.slice("/tmp/ekairos".length);
|
|
164
|
+
}
|
|
165
|
+
return `/dataset/${datasetId}/output.jsonl`;
|
|
166
|
+
}
|
|
160
167
|
async function ensureFileExists(runtime, sandboxId, path) {
|
|
161
168
|
const result = await runDatasetSandboxCommandStep({
|
|
162
169
|
runtime,
|
|
@@ -306,7 +313,7 @@ function buildValidationFailureSummary(params) {
|
|
|
306
313
|
}
|
|
307
314
|
function buildRepairInstructions(summary) {
|
|
308
315
|
const instructions = [
|
|
309
|
-
"Rewrite output.jsonl using the schema as the
|
|
316
|
+
"Rewrite output.jsonl using the schema as the authority. Do not use input file headers as JSON keys unless they exactly match schema property names.",
|
|
310
317
|
"Each non-empty line must be a JSON object shaped as {\"type\":\"row\",\"data\":{...}}.",
|
|
311
318
|
"Populate every required top-level and nested required path from failureSummary.requiredPaths.",
|
|
312
319
|
"For enum fields, emit exactly one allowed literal from failureSummary.enumConstraints or failureSummary.enumFailures.",
|
|
@@ -2,8 +2,9 @@ interface CompleteDatasetToolParams {
|
|
|
2
2
|
datasetId: string;
|
|
3
3
|
sandboxId: string;
|
|
4
4
|
runtime: any;
|
|
5
|
+
outputPath?: string;
|
|
5
6
|
}
|
|
6
|
-
export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtime }: CompleteDatasetToolParams): import("ai").Tool<{
|
|
7
|
+
export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }: CompleteDatasetToolParams): import("ai").Tool<{
|
|
7
8
|
summary: string;
|
|
8
9
|
}, {
|
|
9
10
|
success: boolean;
|
|
@@ -82,18 +83,18 @@ export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtim
|
|
|
82
83
|
}[] | undefined;
|
|
83
84
|
error: string;
|
|
84
85
|
message: string;
|
|
85
|
-
|
|
86
|
-
|
|
86
|
+
records?: undefined;
|
|
87
|
+
summary?: undefined;
|
|
87
88
|
} | {
|
|
88
89
|
success: boolean;
|
|
89
90
|
status: string;
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
message: string;
|
|
91
|
+
records: number;
|
|
92
|
+
summary: string;
|
|
93
|
+
validRows?: undefined;
|
|
94
|
+
rowRecordCount?: undefined;
|
|
95
95
|
validation?: undefined;
|
|
96
96
|
error?: undefined;
|
|
97
|
+
message?: undefined;
|
|
97
98
|
}>;
|
|
98
99
|
export declare function didCompleteDatasetSucceed(event: {
|
|
99
100
|
content?: {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { tool } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { persistDatasetStep } from "./completeDataset.steps.js";
|
|
4
|
-
export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
|
|
4
|
+
export function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }) {
|
|
5
5
|
return tool({
|
|
6
6
|
description: "Mark the dataset as completed. Use only when output.jsonl has been successfully generated and is ready for validation.",
|
|
7
7
|
inputSchema: z.object({
|
|
@@ -17,6 +17,7 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
|
|
|
17
17
|
datasetId,
|
|
18
18
|
sandboxId,
|
|
19
19
|
summary,
|
|
20
|
+
outputPath,
|
|
20
21
|
});
|
|
21
22
|
},
|
|
22
23
|
});
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
export type ContextWorkspaceFileRole = "input" | "output" | "artifact";
|
|
2
|
+
export type ContextWorkspaceFileInput = {
|
|
3
|
+
fileId: string;
|
|
4
|
+
filename?: string;
|
|
5
|
+
mediaType?: string;
|
|
6
|
+
role?: ContextWorkspaceFileRole;
|
|
7
|
+
sourceEventId?: string;
|
|
8
|
+
sourcePartIndex?: number;
|
|
9
|
+
};
|
|
10
|
+
export type PreparedContextWorkspaceFile = {
|
|
11
|
+
fileId: string;
|
|
12
|
+
filename: string;
|
|
13
|
+
mediaType?: string;
|
|
14
|
+
role: ContextWorkspaceFileRole;
|
|
15
|
+
path: string;
|
|
16
|
+
sourceEventId?: string;
|
|
17
|
+
sourcePartIndex?: number;
|
|
18
|
+
};
|
|
19
|
+
export type PreparedContextExecutionWorkspace = {
|
|
20
|
+
contextId: string;
|
|
21
|
+
executionId: string;
|
|
22
|
+
sandboxId: string;
|
|
23
|
+
root: string;
|
|
24
|
+
contextRoot: string;
|
|
25
|
+
eventsDir: string;
|
|
26
|
+
outputDir: string;
|
|
27
|
+
scriptsDir: string;
|
|
28
|
+
tmpDir: string;
|
|
29
|
+
manifestPath: string;
|
|
30
|
+
files: PreparedContextWorkspaceFile[];
|
|
31
|
+
};
|
|
32
|
+
export declare function getContextWorkspaceBase(): string;
|
|
33
|
+
export declare function getContextExecutionWorkspaceRoot(params: {
|
|
34
|
+
contextId: string;
|
|
35
|
+
executionId: string;
|
|
36
|
+
root?: string;
|
|
37
|
+
}): string;
|
|
38
|
+
export declare function getContextWorkspaceRoot(params: {
|
|
39
|
+
contextId: string;
|
|
40
|
+
root?: string;
|
|
41
|
+
}): string;
|
|
42
|
+
export declare function getContextEventsDir(params: {
|
|
43
|
+
contextId: string;
|
|
44
|
+
root?: string;
|
|
45
|
+
}): string;
|
|
46
|
+
export declare function getContextExecutionWorkspaceDirs(params: {
|
|
47
|
+
contextId: string;
|
|
48
|
+
executionId: string;
|
|
49
|
+
root?: string;
|
|
50
|
+
}): {
|
|
51
|
+
root: string;
|
|
52
|
+
contextRoot: string;
|
|
53
|
+
eventsDir: string;
|
|
54
|
+
outputDir: string;
|
|
55
|
+
scriptsDir: string;
|
|
56
|
+
tmpDir: string;
|
|
57
|
+
manifestPath: string;
|
|
58
|
+
};
|
|
59
|
+
export declare function getContextExecutionWorkspaceStandardDirs(params: {
|
|
60
|
+
contextId: string;
|
|
61
|
+
executionId: string;
|
|
62
|
+
root?: string;
|
|
63
|
+
}): string[];
|
|
64
|
+
export declare function extractContextWorkspaceFilesFromEventItems(eventItems: unknown[]): ContextWorkspaceFileInput[];
|
|
65
|
+
export declare function prepareContextExecutionWorkspaceStep(params: {
|
|
66
|
+
runtime: any;
|
|
67
|
+
sandboxId: string;
|
|
68
|
+
contextId: string;
|
|
69
|
+
executionId: string;
|
|
70
|
+
files: ContextWorkspaceFileInput[];
|
|
71
|
+
root?: string;
|
|
72
|
+
}): Promise<PreparedContextExecutionWorkspace>;
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import { readInstantFileStep } from "./file/steps.js";
|
|
2
|
+
import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
|
|
3
|
+
const CONTEXT_WORKSPACE_BASE = "/tmp/ekairos/contexts";
|
|
4
|
+
const WORKSPACE_MANIFEST_FILE_NAME = "manifest.json";
|
|
5
|
+
function trimTrailingSlash(value) {
|
|
6
|
+
return value.endsWith("/") ? value.slice(0, -1) : value;
|
|
7
|
+
}
|
|
8
|
+
function sanitizePathSegment(value, fallback) {
|
|
9
|
+
const parts = String(value ?? "")
|
|
10
|
+
.trim()
|
|
11
|
+
.replace(/\\/g, "/")
|
|
12
|
+
.split("/")
|
|
13
|
+
.filter(Boolean);
|
|
14
|
+
const normalized = parts[parts.length - 1]
|
|
15
|
+
?.replace(/[^a-zA-Z0-9_.-]/g, "_")
|
|
16
|
+
.replace(/_+/g, "_")
|
|
17
|
+
.slice(0, 160);
|
|
18
|
+
return normalized || fallback;
|
|
19
|
+
}
|
|
20
|
+
function filenameFromContentDisposition(value, fallback) {
|
|
21
|
+
const raw = String(value ?? "").trim();
|
|
22
|
+
if (!raw)
|
|
23
|
+
return fallback;
|
|
24
|
+
const filenameStar = raw.match(/filename\*=UTF-8''([^;]+)/i)?.[1];
|
|
25
|
+
if (filenameStar) {
|
|
26
|
+
return sanitizePathSegment(decodeURIComponent(filenameStar), fallback);
|
|
27
|
+
}
|
|
28
|
+
const filename = raw.match(/filename="?([^";]+)"?/i)?.[1];
|
|
29
|
+
return sanitizePathSegment(filename ?? raw, fallback);
|
|
30
|
+
}
|
|
31
|
+
function resolveContextEventPartDir(params) {
|
|
32
|
+
const sourceEventId = sanitizePathSegment(params.sourceEventId, "event");
|
|
33
|
+
const sourcePartIndex = Number.isFinite(params.sourcePartIndex)
|
|
34
|
+
? Math.max(0, Math.floor(params.sourcePartIndex))
|
|
35
|
+
: 0;
|
|
36
|
+
return `${params.eventsDir}/${sourceEventId}/parts/${sourcePartIndex}`;
|
|
37
|
+
}
|
|
38
|
+
function resolveWorkspaceFilePath(params) {
|
|
39
|
+
return `${resolveContextEventPartDir(params)}/file`;
|
|
40
|
+
}
|
|
41
|
+
export function getContextWorkspaceBase() {
|
|
42
|
+
return trimTrailingSlash(CONTEXT_WORKSPACE_BASE);
|
|
43
|
+
}
|
|
44
|
+
export function getContextExecutionWorkspaceRoot(params) {
|
|
45
|
+
if (params.root)
|
|
46
|
+
return trimTrailingSlash(params.root);
|
|
47
|
+
const contextId = sanitizePathSegment(params.contextId, "context");
|
|
48
|
+
const executionId = sanitizePathSegment(params.executionId, "execution");
|
|
49
|
+
return `${getContextWorkspaceBase()}/${contextId}/executions/${executionId}`;
|
|
50
|
+
}
|
|
51
|
+
export function getContextWorkspaceRoot(params) {
|
|
52
|
+
if (params.root)
|
|
53
|
+
return trimTrailingSlash(params.root);
|
|
54
|
+
const contextId = sanitizePathSegment(params.contextId, "context");
|
|
55
|
+
return `${getContextWorkspaceBase()}/${contextId}`;
|
|
56
|
+
}
|
|
57
|
+
export function getContextEventsDir(params) {
|
|
58
|
+
return `${getContextWorkspaceRoot(params)}/events`;
|
|
59
|
+
}
|
|
60
|
+
export function getContextExecutionWorkspaceDirs(params) {
|
|
61
|
+
const root = getContextExecutionWorkspaceRoot(params);
|
|
62
|
+
const contextRoot = getContextWorkspaceRoot(params);
|
|
63
|
+
const eventsDir = getContextEventsDir(params);
|
|
64
|
+
return {
|
|
65
|
+
root,
|
|
66
|
+
contextRoot,
|
|
67
|
+
eventsDir,
|
|
68
|
+
outputDir: `${root}/output`,
|
|
69
|
+
scriptsDir: `${root}/scripts`,
|
|
70
|
+
tmpDir: `${root}/tmp`,
|
|
71
|
+
manifestPath: `${root}/${WORKSPACE_MANIFEST_FILE_NAME}`,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
export function getContextExecutionWorkspaceStandardDirs(params) {
|
|
75
|
+
const dirs = getContextExecutionWorkspaceDirs(params);
|
|
76
|
+
return [dirs.contextRoot, dirs.eventsDir, dirs.root, dirs.outputDir, dirs.scriptsDir, dirs.tmpDir];
|
|
77
|
+
}
|
|
78
|
+
export function extractContextWorkspaceFilesFromEventItems(eventItems) {
|
|
79
|
+
const files = [];
|
|
80
|
+
for (const item of eventItems) {
|
|
81
|
+
const itemRecord = asRecord(item);
|
|
82
|
+
const parts = Array.isArray(asRecord(itemRecord?.content)?.parts)
|
|
83
|
+
? asRecord(itemRecord?.content)?.parts
|
|
84
|
+
: [];
|
|
85
|
+
parts.forEach((part, partIndex) => {
|
|
86
|
+
collectPartFiles(part, {
|
|
87
|
+
files,
|
|
88
|
+
sourceEventId: asText(itemRecord?.id),
|
|
89
|
+
sourcePartIndex: partIndex,
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
return files;
|
|
94
|
+
}
|
|
95
|
+
export async function prepareContextExecutionWorkspaceStep(params) {
|
|
96
|
+
"use step";
|
|
97
|
+
const dirs = getContextExecutionWorkspaceDirs(params);
|
|
98
|
+
const filePartDirs = Array.from(new Set(params.files.map((fileInput) => resolveContextEventPartDir({
|
|
99
|
+
eventsDir: dirs.eventsDir,
|
|
100
|
+
sourceEventId: fileInput.sourceEventId ?? fileInput.fileId,
|
|
101
|
+
sourcePartIndex: fileInput.sourcePartIndex ?? 0,
|
|
102
|
+
}))));
|
|
103
|
+
await runDatasetSandboxCommandStep({
|
|
104
|
+
runtime: params.runtime,
|
|
105
|
+
sandboxId: params.sandboxId,
|
|
106
|
+
cmd: "mkdir",
|
|
107
|
+
args: ["-p", ...getContextExecutionWorkspaceStandardDirs(params), ...filePartDirs],
|
|
108
|
+
});
|
|
109
|
+
const preparedFiles = [];
|
|
110
|
+
for (const fileInput of params.files) {
|
|
111
|
+
const fileId = String(fileInput.fileId ?? "").trim();
|
|
112
|
+
if (!fileId)
|
|
113
|
+
continue;
|
|
114
|
+
const file = await readInstantFileStep({ runtime: params.runtime, fileId });
|
|
115
|
+
const filename = sanitizePathSegment(fileInput.filename ??
|
|
116
|
+
filenameFromContentDisposition(file.contentDisposition, `${fileId}.bin`), `${fileId}.bin`);
|
|
117
|
+
const path = resolveWorkspaceFilePath({
|
|
118
|
+
eventsDir: dirs.eventsDir,
|
|
119
|
+
sourceEventId: fileInput.sourceEventId ?? fileId,
|
|
120
|
+
sourcePartIndex: fileInput.sourcePartIndex ?? 0,
|
|
121
|
+
});
|
|
122
|
+
const metadataPath = `${resolveContextEventPartDir({
|
|
123
|
+
eventsDir: dirs.eventsDir,
|
|
124
|
+
sourceEventId: fileInput.sourceEventId ?? fileId,
|
|
125
|
+
sourcePartIndex: fileInput.sourcePartIndex ?? 0,
|
|
126
|
+
})}/metadata.json`;
|
|
127
|
+
await writeDatasetSandboxFilesStep({
|
|
128
|
+
runtime: params.runtime,
|
|
129
|
+
sandboxId: params.sandboxId,
|
|
130
|
+
files: [{ path, contentBase64: file.contentBase64 }],
|
|
131
|
+
});
|
|
132
|
+
await writeDatasetSandboxTextFilesStep({
|
|
133
|
+
runtime: params.runtime,
|
|
134
|
+
sandboxId: params.sandboxId,
|
|
135
|
+
files: [
|
|
136
|
+
{
|
|
137
|
+
path: metadataPath,
|
|
138
|
+
content: JSON.stringify({
|
|
139
|
+
fileId,
|
|
140
|
+
filename,
|
|
141
|
+
mediaType: fileInput.mediaType,
|
|
142
|
+
role: fileInput.role ?? "input",
|
|
143
|
+
sourceEventId: fileInput.sourceEventId,
|
|
144
|
+
sourcePartIndex: fileInput.sourcePartIndex,
|
|
145
|
+
}, null, 2),
|
|
146
|
+
},
|
|
147
|
+
],
|
|
148
|
+
});
|
|
149
|
+
preparedFiles.push({
|
|
150
|
+
fileId,
|
|
151
|
+
filename,
|
|
152
|
+
mediaType: fileInput.mediaType,
|
|
153
|
+
role: fileInput.role ?? "input",
|
|
154
|
+
path,
|
|
155
|
+
sourceEventId: fileInput.sourceEventId,
|
|
156
|
+
sourcePartIndex: fileInput.sourcePartIndex,
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
const manifest = {
|
|
160
|
+
contextId: params.contextId,
|
|
161
|
+
executionId: params.executionId,
|
|
162
|
+
sandboxId: params.sandboxId,
|
|
163
|
+
...dirs,
|
|
164
|
+
files: preparedFiles,
|
|
165
|
+
};
|
|
166
|
+
await writeDatasetSandboxTextFilesStep({
|
|
167
|
+
runtime: params.runtime,
|
|
168
|
+
sandboxId: params.sandboxId,
|
|
169
|
+
files: [
|
|
170
|
+
{
|
|
171
|
+
path: dirs.manifestPath,
|
|
172
|
+
content: JSON.stringify(manifest, null, 2),
|
|
173
|
+
},
|
|
174
|
+
],
|
|
175
|
+
});
|
|
176
|
+
return manifest;
|
|
177
|
+
}
|
|
178
|
+
function collectPartFiles(value, params) {
|
|
179
|
+
const record = asRecord(value);
|
|
180
|
+
if (!record)
|
|
181
|
+
return;
|
|
182
|
+
if (record.type === "file") {
|
|
183
|
+
pushFileRecord(record, params);
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
const content = asRecord(record.content);
|
|
187
|
+
if (!content)
|
|
188
|
+
return;
|
|
189
|
+
if (Array.isArray(content.blocks)) {
|
|
190
|
+
for (const block of content.blocks) {
|
|
191
|
+
const blockRecord = asRecord(block);
|
|
192
|
+
if (blockRecord?.type === "file") {
|
|
193
|
+
pushFileRecord(blockRecord, params);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
function pushFileRecord(record, params) {
|
|
199
|
+
const fileId = asText(record.fileId);
|
|
200
|
+
if (!fileId)
|
|
201
|
+
return;
|
|
202
|
+
params.files.push({
|
|
203
|
+
fileId,
|
|
204
|
+
filename: asText(record.filename),
|
|
205
|
+
mediaType: asText(record.mediaType),
|
|
206
|
+
role: "input",
|
|
207
|
+
sourceEventId: params.sourceEventId,
|
|
208
|
+
sourcePartIndex: params.sourcePartIndex,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
function asRecord(value) {
|
|
212
|
+
return value && typeof value === "object" && !Array.isArray(value)
|
|
213
|
+
? value
|
|
214
|
+
: null;
|
|
215
|
+
}
|
|
216
|
+
function asText(value) {
|
|
217
|
+
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
|
218
|
+
}
|
package/dist/dataset.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import type { AnyDatasetRuntime, DatasetBuilder, DatasetBuilderOptions, DatasetRuntimeHandle } from "./builder/types.js";
|
|
2
|
-
export type { AnyDatasetRuntime,
|
|
2
|
+
export type { AnyDatasetRuntime, CompatibleQueryDomain, DatasetBuilder, DatasetBuilderOptions, DatasetBuildOptions, DatasetBuildResult, DatasetExistingResource, DatasetExistingResourceInput, DatasetFileResource, DatasetFileResourceInput, DatasetMode, DatasetOutput, DatasetQueryResourceInput, DatasetReader, DatasetReaderResult, DatasetRuntimeEnv, DatasetRuntimeHandle, DatasetSchemaInput, DatasetTextResource, DatasetResourceInput, DatasetTextResourceInput, } from "./builder/types.js";
|
|
3
3
|
export declare function dataset<Runtime extends AnyDatasetRuntime>(runtime: Runtime & DatasetRuntimeHandle<Runtime>, options?: DatasetBuilderOptions): DatasetBuilder<Runtime>;
|