@ekairos/dataset 1.22.82-beta.development.0 → 1.22.84-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/builder/agentMaterializers.d.ts +2 -2
  2. package/dist/builder/context.d.ts +7 -0
  3. package/dist/builder/context.js +192 -0
  4. package/dist/builder/instructions.d.ts +3 -3
  5. package/dist/builder/instructions.js +10 -10
  6. package/dist/builder/materialize.d.ts +12 -11
  7. package/dist/builder/materialize.js +122 -121
  8. package/dist/builder/materializeQuery.d.ts +3 -2
  9. package/dist/builder/materializeQuery.js +10 -19
  10. package/dist/builder/persistence.d.ts +4 -5
  11. package/dist/builder/persistence.js +20 -19
  12. package/dist/builder/types.d.ts +31 -24
  13. package/dist/completeDataset.steps.d.ts +9 -8
  14. package/dist/completeDataset.steps.js +18 -11
  15. package/dist/completeDataset.tool.d.ts +9 -8
  16. package/dist/completeDataset.tool.js +2 -1
  17. package/dist/contextWorkspace.d.ts +72 -0
  18. package/dist/contextWorkspace.js +218 -0
  19. package/dist/dataset.d.ts +1 -1
  20. package/dist/dataset.js +42 -29
  21. package/dist/datasetFiles.d.ts +1 -1
  22. package/dist/datasetFiles.js +3 -3
  23. package/dist/executeCommand.tool.d.ts +1 -43
  24. package/dist/executeCommand.tool.js +10 -3
  25. package/dist/file/file-dataset.agent.d.ts +2 -0
  26. package/dist/file/file-dataset.agent.js +51 -16
  27. package/dist/file/file-dataset.steps.d.ts +6 -0
  28. package/dist/file/file-dataset.steps.js +18 -21
  29. package/dist/file/file-dataset.types.d.ts +10 -0
  30. package/dist/file/prompts.js +16 -14
  31. package/dist/index.d.ts +1 -0
  32. package/dist/index.js +1 -0
  33. package/dist/materializeDataset.tool.d.ts +34 -26
  34. package/dist/materializeDataset.tool.js +40 -29
  35. package/dist/schema.d.ts +12 -2
  36. package/dist/schema.js +6 -3
  37. package/dist/service.d.ts +2 -2
  38. package/dist/service.js +6 -3
  39. package/dist/transform/filepreview.d.ts +2 -2
  40. package/dist/transform/filepreview.js +3 -3
  41. package/dist/transform/prompts.js +25 -25
  42. package/dist/transform/transform-dataset.agent.d.ts +4 -4
  43. package/dist/transform/transform-dataset.agent.js +29 -30
  44. package/dist/transform/transform-dataset.steps.d.ts +7 -7
  45. package/dist/transform/transform-dataset.steps.js +20 -20
  46. package/dist/transform/transform-dataset.types.d.ts +13 -13
  47. package/dist/transform/transformDataset.js +4 -4
  48. package/package.json +4 -4
  49. /package/dist/builder/{sourceRows.d.ts → rows.d.ts} +0 -0
  50. /package/dist/builder/{sourceRows.js → rows.js} +0 -0
@@ -1,38 +1,44 @@
1
1
  import type { InstaQLParams, ValidQuery } from "@instantdb/core";
2
2
  import type { DomainInstantSchema, DomainSchemaResult } from "@ekairos/domain";
3
3
  import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
4
- import type { ContextReactor } from "@ekairos/events";
4
+ import type { ContextIdentifier, ContextReactor } from "@ekairos/events";
5
5
  import { datasetDomain } from "../schema.js";
6
- export type DatasetQuerySourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
6
+ export type DatasetQueryResourceInput<D extends DomainSchemaResult = DomainSchemaResult> = {
7
7
  query: InstaQLParams<DomainInstantSchema<D>>;
8
8
  title?: string;
9
9
  explanation?: string;
10
10
  domain: D;
11
11
  };
12
- export type DatasetFileSourceInput = {
12
+ export type DatasetFileResourceInput = {
13
13
  fileId: string;
14
14
  description?: string;
15
+ filename?: string;
16
+ mediaType?: string;
15
17
  };
16
- export type DatasetTextSourceInput = {
18
+ export type DatasetTextResourceInput = {
17
19
  text: string;
18
20
  mimeType?: string;
19
21
  name?: string;
20
22
  description?: string;
21
23
  };
22
- export type DatasetExistingSourceInput = {
24
+ export type DatasetExistingResourceInput = {
23
25
  datasetId: string;
24
26
  description?: string;
25
27
  };
26
- export type DatasetFileSource = {
28
+ export type DatasetContextResourceInput = ContextIdentifier;
29
+ export type DatasetFileResource = {
27
30
  kind: "file";
28
- } & DatasetFileSourceInput;
29
- export type DatasetTextSource = {
31
+ } & DatasetFileResourceInput;
32
+ export type DatasetTextResource = {
30
33
  kind: "text";
31
- } & DatasetTextSourceInput;
32
- export type DatasetExistingSource = {
34
+ } & DatasetTextResourceInput;
35
+ export type DatasetExistingResource = {
33
36
  kind: "dataset";
34
- } & DatasetExistingSourceInput;
35
- export type DatasetSourceInput = DatasetFileSourceInput | DatasetTextSourceInput | DatasetExistingSourceInput | DatasetFileSource | DatasetTextSource | DatasetExistingSource;
37
+ } & DatasetExistingResourceInput;
38
+ export type DatasetContextResource = {
39
+ kind: "context";
40
+ } & DatasetContextResourceInput;
41
+ export type DatasetResourceInput = DatasetFileResourceInput | DatasetTextResourceInput | DatasetExistingResourceInput | DatasetContextResourceInput | DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource;
36
42
  export type DatasetSchemaInput = {
37
43
  title?: string;
38
44
  description?: string;
@@ -48,9 +54,9 @@ export type DatasetBuildOptions = {
48
54
  datasetId?: string;
49
55
  durable?: boolean;
50
56
  };
51
- export type InternalSource = DatasetFileSource | DatasetTextSource | DatasetExistingSource | ({
57
+ export type InternalDatasetResource = DatasetFileResource | DatasetTextResource | DatasetExistingResource | DatasetContextResource | ({
52
58
  kind: "query";
53
- } & DatasetQuerySourceInput);
59
+ } & DatasetQueryResourceInput);
54
60
  export type DatasetReaderResult = {
55
61
  rows: any[];
56
62
  cursor: number;
@@ -76,8 +82,8 @@ export type DatasetRuntimeEnv = {
76
82
  };
77
83
  export type AnyDatasetRuntime = EkairosRuntime<any, any, any>;
78
84
  export type DatasetRuntimeHandle<Runtime extends AnyDatasetRuntime> = RuntimeForDomain<Runtime, typeof datasetDomain>;
79
- export type CompatibleSourceDomain<Runtime extends AnyDatasetRuntime, D extends DomainSchemaResult> = RuntimeForDomain<Runtime, D> extends never ? never : D;
80
- export type DatasetQuerySourceOptions<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>> = {
85
+ export type CompatibleQueryDomain<Runtime extends AnyDatasetRuntime, D extends DomainSchemaResult> = RuntimeForDomain<Runtime, D> extends never ? never : D;
86
+ export type DatasetQueryResourceOptions<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>> = {
81
87
  query: Q;
82
88
  title?: string;
83
89
  explanation?: string;
@@ -85,9 +91,10 @@ export type DatasetQuerySourceOptions<D extends DomainSchemaResult, Q extends Va
85
91
  export type DatasetBuilderState<Runtime extends AnyDatasetRuntime> = {
86
92
  runtime: Runtime;
87
93
  env: Runtime["env"] & DatasetRuntimeEnv;
88
- sources: InternalSource[];
94
+ resources: InternalDatasetResource[];
89
95
  title?: string;
90
96
  sandboxId?: string;
97
+ contextId?: string;
91
98
  outputSchema?: DatasetSchemaInput;
92
99
  output: DatasetOutput;
93
100
  inferSchema: boolean;
@@ -101,8 +108,7 @@ export type MaterializeRowsParams = {
101
108
  sandboxId?: string;
102
109
  title?: string;
103
110
  instructions?: string;
104
- sources: any[];
105
- sourceKinds: string[];
111
+ contextId: string;
106
112
  analysis?: any;
107
113
  rows: any[];
108
114
  schema?: DatasetSchemaInput;
@@ -111,11 +117,12 @@ export type MaterializeRowsParams = {
111
117
  };
112
118
  export type DatasetBuilder<Runtime extends AnyDatasetRuntime> = {
113
119
  readonly datasetId: string;
114
- fromFile(source: DatasetFileSourceInput): DatasetBuilder<Runtime>;
115
- fromText(source: DatasetTextSourceInput): DatasetBuilder<Runtime>;
116
- fromDataset(source: DatasetExistingSourceInput): DatasetBuilder<Runtime>;
117
- from(...sources: DatasetSourceInput[]): DatasetBuilder<Runtime>;
118
- fromQuery<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>>(domain: D & CompatibleSourceDomain<Runtime, D>, source: DatasetQuerySourceOptions<D, Q>): DatasetBuilder<Runtime>;
120
+ fromFile(resource: DatasetFileResourceInput): DatasetBuilder<Runtime>;
121
+ fromText(resource: DatasetTextResourceInput): DatasetBuilder<Runtime>;
122
+ fromDataset(resource: DatasetExistingResourceInput): DatasetBuilder<Runtime>;
123
+ fromContext(context: DatasetContextResourceInput): DatasetBuilder<Runtime>;
124
+ from(...resources: DatasetResourceInput[]): DatasetBuilder<Runtime>;
125
+ fromQuery<D extends DomainSchemaResult, Q extends ValidQuery<Q, DomainInstantSchema<D>>>(domain: D & CompatibleQueryDomain<Runtime, D>, resource: DatasetQueryResourceOptions<D, Q>): DatasetBuilder<Runtime>;
119
126
  title(title: string): DatasetBuilder<Runtime>;
120
127
  sandbox(input: {
121
128
  sandboxId: string;
@@ -3,8 +3,9 @@ export interface PersistDatasetStepParams {
3
3
  sandboxId: string;
4
4
  runtime: any;
5
5
  summary?: string;
6
+ outputPath?: string;
6
7
  }
7
- export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summary }: PersistDatasetStepParams): Promise<{
8
+ export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summary, outputPath }: PersistDatasetStepParams): Promise<{
8
9
  success: boolean;
9
10
  validation?: RowValidationEntry[];
10
11
  validationTruncated?: number;
@@ -23,18 +24,18 @@ export declare function persistDatasetStep({ runtime, datasetId, sandboxId, summ
23
24
  validation: RowValidationEntry[] | undefined;
24
25
  error: string;
25
26
  message: string;
26
- fileId?: undefined;
27
- storagePath?: undefined;
27
+ records?: undefined;
28
+ summary?: undefined;
28
29
  } | {
29
30
  success: boolean;
30
31
  status: string;
31
- validRows: number;
32
- rowRecordCount: number;
33
- fileId: string;
34
- storagePath: string;
35
- message: string;
32
+ records: number;
33
+ summary: string;
34
+ validRows?: undefined;
35
+ rowRecordCount?: undefined;
36
36
  validation?: undefined;
37
37
  error?: undefined;
38
+ message?: undefined;
38
39
  }>;
39
40
  type RowValidationEntry = {
40
41
  index: number;
@@ -13,14 +13,15 @@ function getAjv() {
13
13
  }
14
14
  return ajvInstance;
15
15
  }
16
- export async function persistDatasetStep({ runtime, datasetId, sandboxId, summary }) {
16
+ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summary, outputPath }) {
17
17
  "use step";
18
- const outputPath = getDatasetOutputPath(datasetId);
18
+ const resolvedOutputPath = outputPath ?? getDatasetOutputPath(datasetId);
19
+ const storagePath = resolveExecutionStoragePath(resolvedOutputPath, datasetId);
19
20
  if (summary) {
20
21
  console.log(`[Dataset ${datasetId}] Persisting completed dataset: ${summary}`);
21
22
  }
22
23
  try {
23
- await ensureFileExists(runtime, sandboxId, outputPath);
24
+ await ensureFileExists(runtime, sandboxId, resolvedOutputPath);
24
25
  }
25
26
  catch (error) {
26
27
  const message = error instanceof Error ? error.message : String(error);
@@ -85,7 +86,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
85
86
  const validationResult = await validateJsonlRows({
86
87
  runtime,
87
88
  sandboxId,
88
- outputPath,
89
+ outputPath: resolvedOutputPath,
89
90
  validator,
90
91
  schema: schemaJson,
91
92
  datasetId,
@@ -96,7 +97,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
96
97
  const totalValidRows = validationResult.validRowCount ?? 0;
97
98
  const rowRecordCount = validationResult.rowRecordCount ?? totalValidRows;
98
99
  console.log(`[Dataset ${datasetId}] Reading file content for upload`);
99
- const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path: outputPath });
100
+ const fileRead = await readDatasetSandboxFileStep({ runtime, sandboxId, path: resolvedOutputPath });
100
101
  if (!fileRead.contentBase64) {
101
102
  console.error(`[Dataset ${datasetId}] Empty file content`);
102
103
  return {
@@ -113,6 +114,7 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
113
114
  const uploadResult = await service.uploadDatasetOutputFile({
114
115
  datasetId,
115
116
  fileBuffer: Buffer.from(fileRead.contentBase64, "base64"),
117
+ storagePath,
116
118
  });
117
119
  if (!uploadResult.ok) {
118
120
  console.error(`[Dataset ${datasetId}] File upload failed: ${uploadResult.error}`);
@@ -150,13 +152,18 @@ export async function persistDatasetStep({ runtime, datasetId, sandboxId, summar
150
152
  return {
151
153
  success: true,
152
154
  status: "completed",
153
- validRows: totalValidRows,
154
- rowRecordCount,
155
- fileId: uploadResult.data.fileId,
156
- storagePath: uploadResult.data.storagePath,
157
- message: "Dataset creation completed and uploaded to storage",
155
+ records: totalValidRows,
156
+ summary: summary ?? `Dataset completed with ${totalValidRows} records.`,
158
157
  };
159
158
  }
159
+ function resolveExecutionStoragePath(outputPath, datasetId) {
160
+ const normalized = String(outputPath ?? "").replace(/\\/g, "/");
161
+ const marker = "/tmp/ekairos/contexts/";
162
+ if (normalized.startsWith(marker)) {
163
+ return normalized.slice("/tmp/ekairos".length);
164
+ }
165
+ return `/dataset/${datasetId}/output.jsonl`;
166
+ }
160
167
  async function ensureFileExists(runtime, sandboxId, path) {
161
168
  const result = await runDatasetSandboxCommandStep({
162
169
  runtime,
@@ -306,7 +313,7 @@ function buildValidationFailureSummary(params) {
306
313
  }
307
314
  function buildRepairInstructions(summary) {
308
315
  const instructions = [
309
- "Rewrite output.jsonl using the schema as the source of truth. Do not use source file headers as JSON keys unless they exactly match schema property names.",
316
+ "Rewrite output.jsonl using the schema as the authority. Do not use input file headers as JSON keys unless they exactly match schema property names.",
310
317
  "Each non-empty line must be a JSON object shaped as {\"type\":\"row\",\"data\":{...}}.",
311
318
  "Populate every required top-level and nested required path from failureSummary.requiredPaths.",
312
319
  "For enum fields, emit exactly one allowed literal from failureSummary.enumConstraints or failureSummary.enumFailures.",
@@ -2,8 +2,9 @@ interface CompleteDatasetToolParams {
2
2
  datasetId: string;
3
3
  sandboxId: string;
4
4
  runtime: any;
5
+ outputPath?: string;
5
6
  }
6
- export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtime }: CompleteDatasetToolParams): import("ai").Tool<{
7
+ export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }: CompleteDatasetToolParams): import("ai").Tool<{
7
8
  summary: string;
8
9
  }, {
9
10
  success: boolean;
@@ -82,18 +83,18 @@ export declare function createCompleteDatasetTool({ datasetId, sandboxId, runtim
82
83
  }[] | undefined;
83
84
  error: string;
84
85
  message: string;
85
- fileId?: undefined;
86
- storagePath?: undefined;
86
+ records?: undefined;
87
+ summary?: undefined;
87
88
  } | {
88
89
  success: boolean;
89
90
  status: string;
90
- validRows: number;
91
- rowRecordCount: number;
92
- fileId: string;
93
- storagePath: string;
94
- message: string;
91
+ records: number;
92
+ summary: string;
93
+ validRows?: undefined;
94
+ rowRecordCount?: undefined;
95
95
  validation?: undefined;
96
96
  error?: undefined;
97
+ message?: undefined;
97
98
  }>;
98
99
  export declare function didCompleteDatasetSucceed(event: {
99
100
  content?: {
@@ -1,7 +1,7 @@
1
1
  import { tool } from "ai";
2
2
  import { z } from "zod";
3
3
  import { persistDatasetStep } from "./completeDataset.steps.js";
4
- export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
4
+ export function createCompleteDatasetTool({ datasetId, sandboxId, runtime, outputPath }) {
5
5
  return tool({
6
6
  description: "Mark the dataset as completed. Use only when output.jsonl has been successfully generated and is ready for validation.",
7
7
  inputSchema: z.object({
@@ -17,6 +17,7 @@ export function createCompleteDatasetTool({ datasetId, sandboxId, runtime }) {
17
17
  datasetId,
18
18
  sandboxId,
19
19
  summary,
20
+ outputPath,
20
21
  });
21
22
  },
22
23
  });
@@ -0,0 +1,72 @@
1
+ export type ContextWorkspaceFileRole = "input" | "output" | "artifact";
2
+ export type ContextWorkspaceFileInput = {
3
+ fileId: string;
4
+ filename?: string;
5
+ mediaType?: string;
6
+ role?: ContextWorkspaceFileRole;
7
+ sourceEventId?: string;
8
+ sourcePartIndex?: number;
9
+ };
10
+ export type PreparedContextWorkspaceFile = {
11
+ fileId: string;
12
+ filename: string;
13
+ mediaType?: string;
14
+ role: ContextWorkspaceFileRole;
15
+ path: string;
16
+ sourceEventId?: string;
17
+ sourcePartIndex?: number;
18
+ };
19
+ export type PreparedContextExecutionWorkspace = {
20
+ contextId: string;
21
+ executionId: string;
22
+ sandboxId: string;
23
+ root: string;
24
+ contextRoot: string;
25
+ eventsDir: string;
26
+ outputDir: string;
27
+ scriptsDir: string;
28
+ tmpDir: string;
29
+ manifestPath: string;
30
+ files: PreparedContextWorkspaceFile[];
31
+ };
32
+ export declare function getContextWorkspaceBase(): string;
33
+ export declare function getContextExecutionWorkspaceRoot(params: {
34
+ contextId: string;
35
+ executionId: string;
36
+ root?: string;
37
+ }): string;
38
+ export declare function getContextWorkspaceRoot(params: {
39
+ contextId: string;
40
+ root?: string;
41
+ }): string;
42
+ export declare function getContextEventsDir(params: {
43
+ contextId: string;
44
+ root?: string;
45
+ }): string;
46
+ export declare function getContextExecutionWorkspaceDirs(params: {
47
+ contextId: string;
48
+ executionId: string;
49
+ root?: string;
50
+ }): {
51
+ root: string;
52
+ contextRoot: string;
53
+ eventsDir: string;
54
+ outputDir: string;
55
+ scriptsDir: string;
56
+ tmpDir: string;
57
+ manifestPath: string;
58
+ };
59
+ export declare function getContextExecutionWorkspaceStandardDirs(params: {
60
+ contextId: string;
61
+ executionId: string;
62
+ root?: string;
63
+ }): string[];
64
+ export declare function extractContextWorkspaceFilesFromEventItems(eventItems: unknown[]): ContextWorkspaceFileInput[];
65
+ export declare function prepareContextExecutionWorkspaceStep(params: {
66
+ runtime: any;
67
+ sandboxId: string;
68
+ contextId: string;
69
+ executionId: string;
70
+ files: ContextWorkspaceFileInput[];
71
+ root?: string;
72
+ }): Promise<PreparedContextExecutionWorkspace>;
@@ -0,0 +1,218 @@
1
+ import { readInstantFileStep } from "./file/steps.js";
2
+ import { runDatasetSandboxCommandStep, writeDatasetSandboxFilesStep, writeDatasetSandboxTextFilesStep, } from "./sandbox/steps.js";
3
+ const CONTEXT_WORKSPACE_BASE = "/tmp/ekairos/contexts";
4
+ const WORKSPACE_MANIFEST_FILE_NAME = "manifest.json";
5
+ function trimTrailingSlash(value) {
6
+ return value.endsWith("/") ? value.slice(0, -1) : value;
7
+ }
8
+ function sanitizePathSegment(value, fallback) {
9
+ const parts = String(value ?? "")
10
+ .trim()
11
+ .replace(/\\/g, "/")
12
+ .split("/")
13
+ .filter(Boolean);
14
+ const normalized = parts[parts.length - 1]
15
+ ?.replace(/[^a-zA-Z0-9_.-]/g, "_")
16
+ .replace(/_+/g, "_")
17
+ .slice(0, 160);
18
+ return normalized || fallback;
19
+ }
20
+ function filenameFromContentDisposition(value, fallback) {
21
+ const raw = String(value ?? "").trim();
22
+ if (!raw)
23
+ return fallback;
24
+ const filenameStar = raw.match(/filename\*=UTF-8''([^;]+)/i)?.[1];
25
+ if (filenameStar) {
26
+ return sanitizePathSegment(decodeURIComponent(filenameStar), fallback);
27
+ }
28
+ const filename = raw.match(/filename="?([^";]+)"?/i)?.[1];
29
+ return sanitizePathSegment(filename ?? raw, fallback);
30
+ }
31
+ function resolveContextEventPartDir(params) {
32
+ const sourceEventId = sanitizePathSegment(params.sourceEventId, "event");
33
+ const sourcePartIndex = Number.isFinite(params.sourcePartIndex)
34
+ ? Math.max(0, Math.floor(params.sourcePartIndex))
35
+ : 0;
36
+ return `${params.eventsDir}/${sourceEventId}/parts/${sourcePartIndex}`;
37
+ }
38
+ function resolveWorkspaceFilePath(params) {
39
+ return `${resolveContextEventPartDir(params)}/file`;
40
+ }
41
+ export function getContextWorkspaceBase() {
42
+ return trimTrailingSlash(CONTEXT_WORKSPACE_BASE);
43
+ }
44
+ export function getContextExecutionWorkspaceRoot(params) {
45
+ if (params.root)
46
+ return trimTrailingSlash(params.root);
47
+ const contextId = sanitizePathSegment(params.contextId, "context");
48
+ const executionId = sanitizePathSegment(params.executionId, "execution");
49
+ return `${getContextWorkspaceBase()}/${contextId}/executions/${executionId}`;
50
+ }
51
+ export function getContextWorkspaceRoot(params) {
52
+ if (params.root)
53
+ return trimTrailingSlash(params.root);
54
+ const contextId = sanitizePathSegment(params.contextId, "context");
55
+ return `${getContextWorkspaceBase()}/${contextId}`;
56
+ }
57
+ export function getContextEventsDir(params) {
58
+ return `${getContextWorkspaceRoot(params)}/events`;
59
+ }
60
+ export function getContextExecutionWorkspaceDirs(params) {
61
+ const root = getContextExecutionWorkspaceRoot(params);
62
+ const contextRoot = getContextWorkspaceRoot(params);
63
+ const eventsDir = getContextEventsDir(params);
64
+ return {
65
+ root,
66
+ contextRoot,
67
+ eventsDir,
68
+ outputDir: `${root}/output`,
69
+ scriptsDir: `${root}/scripts`,
70
+ tmpDir: `${root}/tmp`,
71
+ manifestPath: `${root}/${WORKSPACE_MANIFEST_FILE_NAME}`,
72
+ };
73
+ }
74
+ export function getContextExecutionWorkspaceStandardDirs(params) {
75
+ const dirs = getContextExecutionWorkspaceDirs(params);
76
+ return [dirs.contextRoot, dirs.eventsDir, dirs.root, dirs.outputDir, dirs.scriptsDir, dirs.tmpDir];
77
+ }
78
+ export function extractContextWorkspaceFilesFromEventItems(eventItems) {
79
+ const files = [];
80
+ for (const item of eventItems) {
81
+ const itemRecord = asRecord(item);
82
+ const parts = Array.isArray(asRecord(itemRecord?.content)?.parts)
83
+ ? asRecord(itemRecord?.content)?.parts
84
+ : [];
85
+ parts.forEach((part, partIndex) => {
86
+ collectPartFiles(part, {
87
+ files,
88
+ sourceEventId: asText(itemRecord?.id),
89
+ sourcePartIndex: partIndex,
90
+ });
91
+ });
92
+ }
93
+ return files;
94
+ }
95
+ export async function prepareContextExecutionWorkspaceStep(params) {
96
+ "use step";
97
+ const dirs = getContextExecutionWorkspaceDirs(params);
98
+ const filePartDirs = Array.from(new Set(params.files.map((fileInput) => resolveContextEventPartDir({
99
+ eventsDir: dirs.eventsDir,
100
+ sourceEventId: fileInput.sourceEventId ?? fileInput.fileId,
101
+ sourcePartIndex: fileInput.sourcePartIndex ?? 0,
102
+ }))));
103
+ await runDatasetSandboxCommandStep({
104
+ runtime: params.runtime,
105
+ sandboxId: params.sandboxId,
106
+ cmd: "mkdir",
107
+ args: ["-p", ...getContextExecutionWorkspaceStandardDirs(params), ...filePartDirs],
108
+ });
109
+ const preparedFiles = [];
110
+ for (const fileInput of params.files) {
111
+ const fileId = String(fileInput.fileId ?? "").trim();
112
+ if (!fileId)
113
+ continue;
114
+ const file = await readInstantFileStep({ runtime: params.runtime, fileId });
115
+ const filename = sanitizePathSegment(fileInput.filename ??
116
+ filenameFromContentDisposition(file.contentDisposition, `${fileId}.bin`), `${fileId}.bin`);
117
+ const path = resolveWorkspaceFilePath({
118
+ eventsDir: dirs.eventsDir,
119
+ sourceEventId: fileInput.sourceEventId ?? fileId,
120
+ sourcePartIndex: fileInput.sourcePartIndex ?? 0,
121
+ });
122
+ const metadataPath = `${resolveContextEventPartDir({
123
+ eventsDir: dirs.eventsDir,
124
+ sourceEventId: fileInput.sourceEventId ?? fileId,
125
+ sourcePartIndex: fileInput.sourcePartIndex ?? 0,
126
+ })}/metadata.json`;
127
+ await writeDatasetSandboxFilesStep({
128
+ runtime: params.runtime,
129
+ sandboxId: params.sandboxId,
130
+ files: [{ path, contentBase64: file.contentBase64 }],
131
+ });
132
+ await writeDatasetSandboxTextFilesStep({
133
+ runtime: params.runtime,
134
+ sandboxId: params.sandboxId,
135
+ files: [
136
+ {
137
+ path: metadataPath,
138
+ content: JSON.stringify({
139
+ fileId,
140
+ filename,
141
+ mediaType: fileInput.mediaType,
142
+ role: fileInput.role ?? "input",
143
+ sourceEventId: fileInput.sourceEventId,
144
+ sourcePartIndex: fileInput.sourcePartIndex,
145
+ }, null, 2),
146
+ },
147
+ ],
148
+ });
149
+ preparedFiles.push({
150
+ fileId,
151
+ filename,
152
+ mediaType: fileInput.mediaType,
153
+ role: fileInput.role ?? "input",
154
+ path,
155
+ sourceEventId: fileInput.sourceEventId,
156
+ sourcePartIndex: fileInput.sourcePartIndex,
157
+ });
158
+ }
159
+ const manifest = {
160
+ contextId: params.contextId,
161
+ executionId: params.executionId,
162
+ sandboxId: params.sandboxId,
163
+ ...dirs,
164
+ files: preparedFiles,
165
+ };
166
+ await writeDatasetSandboxTextFilesStep({
167
+ runtime: params.runtime,
168
+ sandboxId: params.sandboxId,
169
+ files: [
170
+ {
171
+ path: dirs.manifestPath,
172
+ content: JSON.stringify(manifest, null, 2),
173
+ },
174
+ ],
175
+ });
176
+ return manifest;
177
+ }
178
+ function collectPartFiles(value, params) {
179
+ const record = asRecord(value);
180
+ if (!record)
181
+ return;
182
+ if (record.type === "file") {
183
+ pushFileRecord(record, params);
184
+ return;
185
+ }
186
+ const content = asRecord(record.content);
187
+ if (!content)
188
+ return;
189
+ if (Array.isArray(content.blocks)) {
190
+ for (const block of content.blocks) {
191
+ const blockRecord = asRecord(block);
192
+ if (blockRecord?.type === "file") {
193
+ pushFileRecord(blockRecord, params);
194
+ }
195
+ }
196
+ }
197
+ }
198
+ function pushFileRecord(record, params) {
199
+ const fileId = asText(record.fileId);
200
+ if (!fileId)
201
+ return;
202
+ params.files.push({
203
+ fileId,
204
+ filename: asText(record.filename),
205
+ mediaType: asText(record.mediaType),
206
+ role: "input",
207
+ sourceEventId: params.sourceEventId,
208
+ sourcePartIndex: params.sourcePartIndex,
209
+ });
210
+ }
211
+ function asRecord(value) {
212
+ return value && typeof value === "object" && !Array.isArray(value)
213
+ ? value
214
+ : null;
215
+ }
216
+ function asText(value) {
217
+ return typeof value === "string" && value.trim() ? value.trim() : undefined;
218
+ }
package/dist/dataset.d.ts CHANGED
@@ -1,3 +1,3 @@
1
1
  import type { AnyDatasetRuntime, DatasetBuilder, DatasetBuilderOptions, DatasetRuntimeHandle } from "./builder/types.js";
2
- export type { AnyDatasetRuntime, CompatibleSourceDomain, DatasetBuilder, DatasetBuilderOptions, DatasetBuildOptions, DatasetBuildResult, DatasetExistingSource, DatasetExistingSourceInput, DatasetFileSource, DatasetFileSourceInput, DatasetMode, DatasetOutput, DatasetQuerySourceInput, DatasetReader, DatasetReaderResult, DatasetRuntimeEnv, DatasetRuntimeHandle, DatasetSchemaInput, DatasetTextSource, DatasetSourceInput, DatasetTextSourceInput, } from "./builder/types.js";
2
+ export type { AnyDatasetRuntime, CompatibleQueryDomain, DatasetBuilder, DatasetBuilderOptions, DatasetBuildOptions, DatasetBuildResult, DatasetExistingResource, DatasetExistingResourceInput, DatasetFileResource, DatasetFileResourceInput, DatasetMode, DatasetOutput, DatasetQueryResourceInput, DatasetReader, DatasetReaderResult, DatasetRuntimeEnv, DatasetRuntimeHandle, DatasetSchemaInput, DatasetTextResource, DatasetResourceInput, DatasetTextResourceInput, } from "./builder/types.js";
3
3
  export declare function dataset<Runtime extends AnyDatasetRuntime>(runtime: Runtime & DatasetRuntimeHandle<Runtime>, options?: DatasetBuilderOptions): DatasetBuilder<Runtime>;