@ekairos/dataset 1.22.34-beta.development.0 → 1.22.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +347 -0
  2. package/dist/agents.d.ts +8 -0
  3. package/dist/agents.js +8 -0
  4. package/dist/builder/agentMaterializers.d.ts +9 -0
  5. package/dist/builder/agentMaterializers.js +10 -0
  6. package/dist/builder/context.d.ts +15 -0
  7. package/dist/builder/context.js +251 -0
  8. package/dist/builder/instructions.d.ts +5 -0
  9. package/dist/builder/instructions.js +40 -0
  10. package/dist/builder/materialize.d.ts +83 -0
  11. package/dist/builder/materialize.js +548 -0
  12. package/dist/builder/materializeQuery.d.ts +12 -0
  13. package/dist/builder/materializeQuery.js +31 -0
  14. package/dist/builder/persistence.d.ts +22 -0
  15. package/dist/builder/persistence.js +153 -0
  16. package/dist/builder/rows.d.ts +7 -0
  17. package/dist/builder/rows.js +56 -0
  18. package/dist/builder/schemaInference.d.ts +3 -0
  19. package/dist/builder/schemaInference.js +61 -0
  20. package/dist/builder/types.d.ts +140 -0
  21. package/dist/builder/types.js +1 -0
  22. package/dist/clearDataset.tool.d.ts +2 -3
  23. package/dist/clearDataset.tool.js +13 -17
  24. package/dist/completeDataset.steps.d.ts +117 -0
  25. package/dist/completeDataset.steps.js +487 -0
  26. package/dist/completeDataset.tool.d.ts +132 -7
  27. package/dist/completeDataset.tool.js +46 -192
  28. package/dist/contextResources.d.ts +31 -0
  29. package/dist/contextResources.js +151 -0
  30. package/dist/contextWorkspace.d.ts +79 -0
  31. package/dist/contextWorkspace.js +234 -0
  32. package/dist/dataset/steps.d.ts +39 -15
  33. package/dist/dataset/steps.js +96 -39
  34. package/dist/dataset.d.ts +3 -67
  35. package/dist/dataset.js +129 -520
  36. package/dist/datasetFiles.d.ts +5 -1
  37. package/dist/datasetFiles.js +29 -27
  38. package/dist/domain.d.ts +1 -2
  39. package/dist/domain.js +1 -6
  40. package/dist/executeCommand.tool.d.ts +2 -30
  41. package/dist/executeCommand.tool.js +165 -39
  42. package/dist/file/file-dataset.agent.d.ts +19 -56
  43. package/dist/file/file-dataset.agent.js +176 -132
  44. package/dist/file/file-dataset.steps.d.ts +27 -0
  45. package/dist/file/file-dataset.steps.js +47 -0
  46. package/dist/file/file-dataset.types.d.ts +64 -0
  47. package/dist/file/file-dataset.types.js +1 -0
  48. package/dist/file/filepreview.d.ts +5 -35
  49. package/dist/file/filepreview.js +60 -107
  50. package/dist/file/filepreview.types.d.ts +31 -0
  51. package/dist/file/filepreview.types.js +1 -0
  52. package/dist/file/generateSchema.tool.d.ts +2 -3
  53. package/dist/file/generateSchema.tool.js +11 -15
  54. package/dist/file/index.d.ts +1 -2
  55. package/dist/file/index.js +1 -18
  56. package/dist/file/prompts.d.ts +2 -3
  57. package/dist/file/prompts.js +134 -27
  58. package/dist/file/scripts.generated.d.ts +1 -0
  59. package/dist/file/scripts.generated.js +11 -0
  60. package/dist/file/steps.d.ts +1 -2
  61. package/dist/file/steps.js +9 -7
  62. package/dist/id.d.ts +1 -0
  63. package/dist/id.js +10 -0
  64. package/dist/index.d.ts +8 -7
  65. package/dist/index.js +8 -23
  66. package/dist/materializeDataset.tool.d.ts +52 -32
  67. package/dist/materializeDataset.tool.js +81 -65
  68. package/dist/query/index.d.ts +1 -2
  69. package/dist/query/index.js +1 -18
  70. package/dist/query/queryDomain.d.ts +3 -4
  71. package/dist/query/queryDomain.js +3 -40
  72. package/dist/query/queryDomain.step.d.ts +1 -1
  73. package/dist/query/queryDomain.step.js +13 -13
  74. package/dist/sandbox/steps.d.ts +23 -15
  75. package/dist/sandbox/steps.js +73 -76
  76. package/dist/sandbox.steps.d.ts +1 -2
  77. package/dist/sandbox.steps.js +1 -18
  78. package/dist/schema.d.ts +13 -13
  79. package/dist/schema.js +25 -37
  80. package/dist/service.d.ts +8 -5
  81. package/dist/service.js +70 -15
  82. package/dist/skill.d.ts +0 -1
  83. package/dist/skill.js +12 -17
  84. package/dist/transform/filepreview.d.ts +2 -3
  85. package/dist/transform/filepreview.js +9 -26
  86. package/dist/transform/index.d.ts +2 -3
  87. package/dist/transform/index.js +2 -8
  88. package/dist/transform/prompts.d.ts +1 -34
  89. package/dist/transform/prompts.js +58 -43
  90. package/dist/transform/transform-dataset.agent.d.ts +20 -45
  91. package/dist/transform/transform-dataset.agent.js +146 -89
  92. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  93. package/dist/transform/transform-dataset.steps.js +61 -0
  94. package/dist/transform/transform-dataset.types.d.ts +95 -0
  95. package/dist/transform/transform-dataset.types.js +1 -0
  96. package/dist/transform/transformDataset.d.ts +3 -3
  97. package/dist/transform/transformDataset.js +15 -18
  98. package/dist/writeDatasetRows.tool.d.ts +188 -0
  99. package/dist/writeDatasetRows.tool.js +258 -0
  100. package/package.json +36 -11
  101. package/dist/clearDataset.tool.d.ts.map +0 -1
  102. package/dist/clearDataset.tool.js.map +0 -1
  103. package/dist/completeDataset.tool.d.ts.map +0 -1
  104. package/dist/completeDataset.tool.js.map +0 -1
  105. package/dist/dataset/steps.d.ts.map +0 -1
  106. package/dist/dataset/steps.js.map +0 -1
  107. package/dist/dataset.d.ts.map +0 -1
  108. package/dist/dataset.js.map +0 -1
  109. package/dist/datasetFiles.d.ts.map +0 -1
  110. package/dist/datasetFiles.js.map +0 -1
  111. package/dist/domain.d.ts.map +0 -1
  112. package/dist/domain.js.map +0 -1
  113. package/dist/executeCommand.tool.d.ts.map +0 -1
  114. package/dist/executeCommand.tool.js.map +0 -1
  115. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  116. package/dist/file/file-dataset.agent.js.map +0 -1
  117. package/dist/file/filepreview.d.ts.map +0 -1
  118. package/dist/file/filepreview.js.map +0 -1
  119. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  120. package/dist/file/generateSchema.tool.js.map +0 -1
  121. package/dist/file/index.d.ts.map +0 -1
  122. package/dist/file/index.js.map +0 -1
  123. package/dist/file/prompts.d.ts.map +0 -1
  124. package/dist/file/prompts.js.map +0 -1
  125. package/dist/file/steps.d.ts.map +0 -1
  126. package/dist/file/steps.js.map +0 -1
  127. package/dist/index.d.ts.map +0 -1
  128. package/dist/index.js.map +0 -1
  129. package/dist/materializeDataset.tool.d.ts.map +0 -1
  130. package/dist/materializeDataset.tool.js.map +0 -1
  131. package/dist/query/index.d.ts.map +0 -1
  132. package/dist/query/index.js.map +0 -1
  133. package/dist/query/queryDomain.d.ts.map +0 -1
  134. package/dist/query/queryDomain.js.map +0 -1
  135. package/dist/query/queryDomain.step.d.ts.map +0 -1
  136. package/dist/query/queryDomain.step.js.map +0 -1
  137. package/dist/sandbox/steps.d.ts.map +0 -1
  138. package/dist/sandbox/steps.js.map +0 -1
  139. package/dist/sandbox.steps.d.ts.map +0 -1
  140. package/dist/sandbox.steps.js.map +0 -1
  141. package/dist/schema.d.ts.map +0 -1
  142. package/dist/schema.js.map +0 -1
  143. package/dist/service.d.ts.map +0 -1
  144. package/dist/service.js.map +0 -1
  145. package/dist/skill.d.ts.map +0 -1
  146. package/dist/skill.js.map +0 -1
  147. package/dist/transform/filepreview.d.ts.map +0 -1
  148. package/dist/transform/filepreview.js.map +0 -1
  149. package/dist/transform/index.d.ts.map +0 -1
  150. package/dist/transform/index.js.map +0 -1
  151. package/dist/transform/prompts.d.ts.map +0 -1
  152. package/dist/transform/prompts.js.map +0 -1
  153. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  154. package/dist/transform/transform-dataset.agent.js.map +0 -1
  155. package/dist/transform/transformDataset.d.ts.map +0 -1
  156. package/dist/transform/transformDataset.js.map +0 -1
package/README.md ADDED
@@ -0,0 +1,347 @@
1
+ # @ekairos/dataset
2
+
3
+ Runtime-first dataset materialization for Ekairos domains.
4
+
5
+ `@ekairos/dataset` replaces the older `@ekairos/structure` flow with a domain-owned dataset API. It persists dataset metadata and output rows in InstantDB, while file parsing and transformations run through sandbox-backed reactors when work cannot be materialized directly.
6
+
7
+ ## Mental Model
8
+
9
+ A dataset build has two responsibilities:
10
+
11
+ 1. Read or produce source rows from one or more sources.
12
+ 2. Persist the resulting dataset under `datasetDomain`.
13
+
14
+ The caller passes a typed `EkairosRuntime`. The runtime must include `datasetDomain`; query sources also require the runtime to include the queried source domain.
15
+
16
+ ```ts
17
+ import { domain } from "@ekairos/domain"
18
+ import { EkairosRuntime } from "@ekairos/domain/runtime"
19
+ import { dataset, datasetDomain } from "@ekairos/dataset"
20
+
21
+ import { sourceDomain } from "./source.domain"
22
+
23
+ const appDomain = domain("app")
24
+ .includes(datasetDomain)
25
+ .includes(sourceDomain)
26
+ .withSchema({ entities: {}, links: {}, rooms: {} })
27
+
28
+ class AppRuntime extends EkairosRuntime<{ orgId: string }, typeof appDomain, any> {
29
+ protected getDomain() {
30
+ return appDomain
31
+ }
32
+
33
+ protected resolveDb() {
34
+ return db
35
+ }
36
+ }
37
+
38
+ const runtime = new AppRuntime({ orgId: "org_1" })
39
+ ```
40
+
41
+ Use `appDomain.toInstantSchema()` to provision or push the InstantDB schema. Dataset itself does not own global DB access; it uses `runtime.use(datasetDomain)` internally.
42
+
43
+ ## Basic Usage
44
+
45
+ ```ts
46
+ const result = await dataset(runtime, { datasetId: "products_v1" })
47
+ .from({ kind: "text", text: "sku,price\nA1,10", mimeType: "text/csv" })
48
+ .auto()
49
+ .asRows()
50
+ .sandbox({ sandboxId })
51
+ .reactor(reactor)
52
+ .build()
53
+
54
+ console.log(result.datasetId)
55
+ console.log(result.previewRows)
56
+ ```
57
+
58
+ `dataset(runtime, { datasetId })` mirrors the old `structure(env, { datasetId })` style. You can also pass the id at build time:
59
+
60
+ ```ts
61
+ await dataset(runtime)
62
+ .from({ kind: "dataset", datasetId: "source_dataset_v1" })
63
+ .schema(productSchema)
64
+ .sandbox({ sandboxId })
65
+ .reactor(reactor)
66
+ .build({ datasetId: "normalized_products_v1" })
67
+ ```
68
+
69
+ ## Sources
70
+
71
+ Use `.from(...)` for structure-compatible sources:
72
+
73
+ ```ts
74
+ dataset(runtime).from(
75
+ { kind: "file", fileId: "file_1", description: "Supplier CSV" },
76
+ { kind: "text", text: "sku,price\nA1,10", mimeType: "text/csv", name: "inline.csv" },
77
+ { kind: "dataset", datasetId: "existing_dataset_v1" },
78
+ )
79
+ ```
80
+
81
+ The builder also keeps explicit source methods:
82
+
83
+ ```ts
84
+ dataset(runtime)
85
+ .fromFile({ fileId: "file_1" })
86
+ .fromText({ text: "raw input", name: "input.txt" })
87
+ .fromDataset({ datasetId: "existing_dataset_v1" })
88
+ ```
89
+
90
+ Query sources must use `.fromQuery(sourceDomain, ...)` because they need a second domain:
91
+
92
+ ```ts
93
+ const snapshot = await dataset(runtime, { datasetId: "open_items_v1" })
94
+ .fromQuery(sourceDomain, {
95
+ query: {
96
+ source_items: {
97
+ $: {
98
+ where: { status: "open" },
99
+ fields: ["title", "quantity"],
100
+ limit: 100,
101
+ },
102
+ },
103
+ },
104
+ title: "Open items",
105
+ explanation: "Snapshot of open source items",
106
+ })
107
+ .build()
108
+ ```
109
+
110
+ The query is typed with the same InstantDB query types used by `db.query`. Unknown entities, fields, filters, and link shapes fail at compile time.
111
+
112
+ ## Runtime Compatibility
113
+
114
+ The runtime check is name plus schema, not name only.
115
+
116
+ ```ts
117
+ const appDomain = domain("app")
118
+ .includes(datasetDomain)
119
+ .includes(sourceDomain)
120
+ .withSchema({ entities: {}, links: {}, rooms: {} })
121
+
122
+ dataset(runtime).fromQuery(sourceDomain, { query: { source_items: {} } })
123
+ ```
124
+
125
+ Subdomains are supported transitively. If domain `B` includes domain `A`, and the runtime root includes `B`, then `.fromQuery(A, ...)` is accepted.
126
+
127
+ A different domain with the same name but incompatible schema is rejected. A runtime that includes only `datasetDomain` can persist datasets but cannot query a source domain through `.fromQuery(...)`.
128
+
129
+ ## Output Modes
130
+
131
+ Rows are the default output:
132
+
133
+ ```ts
134
+ await dataset(runtime)
135
+ .from({ kind: "dataset", datasetId: "source_v1" })
136
+ .asRows()
137
+ .build({ datasetId: "rows_v1" })
138
+ ```
139
+
140
+ Object output is represented as a single-row dataset:
141
+
142
+ ```ts
143
+ const result = await dataset(runtime, { datasetId: "summary_v1" })
144
+ .from({ kind: "dataset", datasetId: "orders_v1" })
145
+ .instructions("Summarize orders by currency.")
146
+ .schema(summarySchema)
147
+ .asObject()
148
+ .sandbox({ sandboxId })
149
+ .reactor(reactor)
150
+ .build()
151
+
152
+ console.log(result.object)
153
+ ```
154
+
155
+ `asObject()` forces a one-row output contract. The persisted dataset still uses JSONL rows, and the returned result exposes the row as both `firstRow` and `object`.
156
+
157
+ ## Schema Modes
158
+
159
+ Use `schema(...)` when the output contract is known:
160
+
161
+ ```ts
162
+ const productSchema = {
163
+ title: "ProductRow",
164
+ description: "One product row",
165
+ schema: {
166
+ type: "object",
167
+ additionalProperties: false,
168
+ properties: {
169
+ sku: { type: "string" },
170
+ price: { type: "number" },
171
+ },
172
+ required: ["sku", "price"],
173
+ },
174
+ }
175
+
176
+ await dataset(runtime)
177
+ .from({ kind: "file", fileId })
178
+ .schema(productSchema)
179
+ .sandbox({ sandboxId })
180
+ .reactor(reactor)
181
+ .build({ datasetId: "products_v1" })
182
+ ```
183
+
184
+ Use `auto()` or `inferSchema()` when the builder should infer a schema from the materialized rows:
185
+
186
+ ```ts
187
+ await dataset(runtime)
188
+ .from({ kind: "text", text: csv, mimeType: "text/csv" })
189
+ .auto()
190
+ .sandbox({ sandboxId })
191
+ .reactor(reactor)
192
+ .build({ datasetId: "auto_products_v1" })
193
+ ```
194
+
195
+ `auto()` is an alias for `inferSchema()`.
196
+
197
+ ## Sandbox And Reactor Requirements
198
+
199
+ Some builds can materialize directly:
200
+
201
+ - A single query source without custom instructions can run without sandbox or reactor.
202
+
203
+ Other builds require sandbox execution and a reactor:
204
+
205
+ - File sources
206
+ - Text sources
207
+ - Existing dataset transformations
208
+ - Multiple sources
209
+ - Query sources with custom instructions
210
+ - Any build that needs agent-driven parsing or transformation
211
+
212
+ If these are missing, the builder throws `dataset_sandbox_required` or `dataset_reactor_required`.
213
+
214
+ ## Build Result
215
+
216
+ ```ts
217
+ type DatasetBuildResult = {
218
+ datasetId: string
219
+ dataset: any
220
+ previewRows: any[]
221
+ reader: {
222
+ read(cursor?: number, limit?: number): Promise<{
223
+ rows: any[]
224
+ cursor: number
225
+ done: boolean
226
+ }>
227
+ read(params?: { cursor?: number; limit?: number }): Promise<{
228
+ rows: any[]
229
+ cursor: number
230
+ done: boolean
231
+ }>
232
+ }
233
+ firstRow?: any | null
234
+ object?: any | null
235
+ }
236
+ ```
237
+
238
+ Read more rows with the returned reader:
239
+
240
+ ```ts
241
+ const page = await result.reader.read({ cursor: 0, limit: 100 })
242
+ ```
243
+
244
+ Use `.first()` when the build must produce zero or one row:
245
+
246
+ ```ts
247
+ const result = await dataset(runtime)
248
+ .fromQuery(sourceDomain, { query: { source_items: { $: { limit: 1 } } } })
249
+ .first()
250
+ .build({ datasetId: "single_item_v1" })
251
+
252
+ console.log(result.firstRow)
253
+ ```
254
+
255
+ If more than one row is produced, the builder throws `dataset_first_expected_zero_or_one_row`.
256
+
257
+ ## Declarative Tool
258
+
259
+ `createMaterializeDatasetTool` exposes the same materialization contract as an AI SDK tool. It is useful when a reactor or agent needs to request dataset builds declaratively.
260
+
261
+ ```ts
262
+ import { createMaterializeDatasetTool } from "@ekairos/dataset"
263
+
264
+ const materializeDataset = createMaterializeDatasetTool({
265
+ runtime,
266
+ reactor,
267
+ queryDomain: sourceDomain,
268
+ })
269
+ ```
270
+
271
+ Tool input:
272
+
273
+ ```ts
274
+ {
275
+ datasetId?: string
276
+ sandboxId?: string
277
+ title?: string
278
+ sources: Array<
279
+ | { kind: "file"; fileId: string; description?: string }
280
+ | { kind: "text"; text: string; mimeType?: string; name?: string; description?: string }
281
+ | { kind: "dataset"; datasetId: string; description?: string }
282
+ | { kind: "query"; query: Record<string, any>; title?: string; explanation?: string }
283
+ >
284
+ instructions?: string
285
+ mode?: "auto" | "schema"
286
+ output?: "rows" | "object"
287
+ schema?: DatasetSchemaInput
288
+ first?: boolean
289
+ }
290
+ ```
291
+
292
+ The tool returns only `{ datasetId }`.
293
+
294
+ The tool runtime must include `datasetDomain`, and its `queryDomain` must also be compatible with that same runtime.
295
+
296
+ ## Replacing Structure
297
+
298
+ Old structure-style chain:
299
+
300
+ ```ts
301
+ await structure(env, { datasetId: "summary_v1" })
302
+ .from({ kind: "text", text, mimeType: "text/plain", name: "input.txt" })
303
+ .instructions("Return a summary object.")
304
+ .schema(summarySchema)
305
+ .asObject()
306
+ .build()
307
+ ```
308
+
309
+ Dataset replacement:
310
+
311
+ ```ts
312
+ await dataset(runtime, { datasetId: "summary_v1" })
313
+ .from({ kind: "text", text, mimeType: "text/plain", name: "input.txt" })
314
+ .instructions("Return a summary object.")
315
+ .schema(summarySchema)
316
+ .asObject()
317
+ .sandbox({ sandboxId })
318
+ .reactor(reactor)
319
+ .build()
320
+ ```
321
+
322
+ Key differences:
323
+
324
+ - Pass `runtime`, not `env`.
325
+ - The runtime must include `datasetDomain`.
326
+ - Query sources must be explicit: `.fromQuery(sourceDomain, { query })`.
327
+ - Object output is stored as a one-row dataset, not as structure context content.
328
+ - Sandbox and reactor are explicit when parsing or transforming is required.
329
+
330
+ ## Exports
331
+
332
+ Main exports:
333
+
334
+ - `dataset`
335
+ - `datasetDomain`
336
+ - `createMaterializeDatasetTool`
337
+ - `materializeDatasetToolInputSchema`
338
+ - `DatasetBuilder`
339
+ - `DatasetBuildResult`
340
+ - `DatasetSourceInput`
341
+ - `DatasetSchemaInput`
342
+ - `DatasetOutput`
343
+ - `DatasetMode`
344
+
345
+ ## Internal Notes
346
+
347
+ Implementation notes live in `src/README.md`. Public callers should use the root package API from `@ekairos/dataset`.
@@ -0,0 +1,8 @@
1
+ import "./builder/materialize.js";
2
+ export * from "./dataset.js";
3
+ export * from "./domain.js";
4
+ export * from "./materializeDataset.tool.js";
5
+ export * from "./schema.js";
6
+ export * from "./service.js";
7
+ export * from "./file/file-dataset.agent.js";
8
+ export * from "./transform/index.js";
package/dist/agents.js ADDED
@@ -0,0 +1,8 @@
1
+ import "./builder/materialize.js";
2
+ export * from "./dataset.js";
3
+ export * from "./domain.js";
4
+ export * from "./materializeDataset.tool.js";
5
+ export * from "./schema.js";
6
+ export * from "./service.js";
7
+ export * from "./file/file-dataset.agent.js";
8
+ export * from "./transform/index.js";
@@ -0,0 +1,9 @@
1
+ import type { AnyDatasetRuntime, DatasetBuilderState, InternalDatasetResource } from "./types.js";
2
+ export type DatasetAgentMaterializers = {
3
+ materializeSingleFileLikeResource<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, resource: Extract<InternalDatasetResource, {
4
+ kind: "file" | "text";
5
+ }>, targetDatasetId: string): Promise<string>;
6
+ materializeDerivedDataset<Runtime extends AnyDatasetRuntime>(state: DatasetBuilderState<Runtime>, targetDatasetId: string): Promise<string>;
7
+ };
8
+ export declare function registerDatasetAgentMaterializers(materializers: DatasetAgentMaterializers): void;
9
+ export declare function getDatasetAgentMaterializers(): DatasetAgentMaterializers;
@@ -0,0 +1,10 @@
1
+ let agentMaterializers = null;
2
+ export function registerDatasetAgentMaterializers(materializers) {
3
+ agentMaterializers = materializers;
4
+ }
5
+ export function getDatasetAgentMaterializers() {
6
+ if (!agentMaterializers) {
7
+ throw new Error("dataset_agent_materializers_not_registered");
8
+ }
9
+ return agentMaterializers;
10
+ }
@@ -0,0 +1,15 @@
1
+ import type { AnyDatasetRuntime, InternalDatasetResource } from "./types.js";
2
+ type DatasetContextResolution = {
3
+ contextId: string;
4
+ resources: InternalDatasetResource[];
5
+ contextResources: DatasetContextResourceRecord[];
6
+ };
7
+ type DatasetContextResourceRecord = {
8
+ key: string;
9
+ type: string;
10
+ name: string;
11
+ description: string;
12
+ [key: string]: unknown;
13
+ };
14
+ export declare function resolveDatasetResourceContext<Runtime extends AnyDatasetRuntime>(runtime: Runtime, datasetId: string, resources: InternalDatasetResource[]): Promise<DatasetContextResolution>;
15
+ export {};
@@ -0,0 +1,251 @@
1
+ import { eventsDomain } from "@ekairos/events";
2
+ import { createDatasetId } from "../id.js";
3
+ import { datasetDomain } from "../schema.js";
4
+ import { DatasetService } from "../service.js";
5
+ import { getDomainDescriptor } from "./rows.js";
6
+ function getContextWhere(context) {
7
+ return "id" in context ? { id: context.id } : { key: context.key };
8
+ }
9
+ async function getEventsDb(runtime) {
10
+ const scoped = await runtime.use(eventsDomain);
11
+ return scoped.db;
12
+ }
13
+ async function getDatasetDb(runtime) {
14
+ const scoped = await runtime.use(datasetDomain);
15
+ return scoped.db;
16
+ }
17
+ function resourceKey(index, resource) {
18
+ if (resource.kind === "file")
19
+ return `file:${index}:${resource.fileId}`;
20
+ if (resource.kind === "text")
21
+ return `text:${index}:${resource.name ?? "inline"}`;
22
+ if (resource.kind === "dataset")
23
+ return `dataset:${index}:${resource.datasetId}`;
24
+ if (resource.kind === "query")
25
+ return `query:${index}:${resource.title ?? "query"}`;
26
+ return `resource:${index}`;
27
+ }
28
+ function resourceName(index, resource) {
29
+ if (resource.kind === "file")
30
+ return resource.filename ?? `File ${index + 1}`;
31
+ if (resource.kind === "text")
32
+ return resource.name ?? `Text ${index + 1}`;
33
+ if (resource.kind === "dataset")
34
+ return resource.datasetId;
35
+ if (resource.kind === "query")
36
+ return resource.title ?? `Query ${index + 1}`;
37
+ return `Resource ${index + 1}`;
38
+ }
39
+ function resourceDescription(resource) {
40
+ if ("description" in resource && typeof resource.description === "string" && resource.description.trim()) {
41
+ return resource.description.trim();
42
+ }
43
+ if (resource.kind === "query" && typeof resource.explanation === "string" && resource.explanation.trim()) {
44
+ return resource.explanation.trim();
45
+ }
46
+ return `Dataset ${resource.kind} resource.`;
47
+ }
48
+ function resourceToContextResource(index, resource) {
49
+ const base = {
50
+ key: resourceKey(index, resource),
51
+ type: resource.kind,
52
+ name: resourceName(index, resource),
53
+ description: resourceDescription(resource),
54
+ };
55
+ if (resource.kind === "file") {
56
+ return {
57
+ ...base,
58
+ fileId: resource.fileId,
59
+ filename: resource.filename,
60
+ mediaType: resource.mediaType,
61
+ };
62
+ }
63
+ if (resource.kind === "text") {
64
+ return {
65
+ ...base,
66
+ text: resource.text,
67
+ mimeType: resource.mimeType,
68
+ };
69
+ }
70
+ if (resource.kind === "dataset") {
71
+ return {
72
+ ...base,
73
+ datasetId: resource.datasetId,
74
+ };
75
+ }
76
+ if (resource.kind === "query") {
77
+ return {
78
+ ...base,
79
+ query: resource.query,
80
+ title: resource.title,
81
+ explanation: resource.explanation,
82
+ ...getDomainDescriptor(resource.domain),
83
+ };
84
+ }
85
+ return base;
86
+ }
87
+ async function createDatasetResourceContextStep(params) {
88
+ "use step";
89
+ const db = await getEventsDb(params.runtime);
90
+ const contextKey = `dataset:${params.datasetId}`;
91
+ const existing = await db.query({
92
+ event_contexts: {
93
+ $: { where: { key: contextKey }, limit: 1 },
94
+ },
95
+ });
96
+ const contextId = existing.event_contexts?.[0]?.id ?? createDatasetId();
97
+ const now = new Date();
98
+ const resources = await enrichDatasetContextResources(params.runtime, params.resources);
99
+ await db.transact([
100
+ db.tx.event_contexts[contextId].update({
101
+ key: contextKey,
102
+ createdAt: now,
103
+ updatedAt: now,
104
+ name: `Dataset ${params.datasetId}`,
105
+ status: "open_idle",
106
+ content: {
107
+ datasetId: params.datasetId,
108
+ resourceCount: resources.length,
109
+ },
110
+ resources,
111
+ description: `Dataset execution context for ${params.datasetId}.`,
112
+ goal: "Produce the dataset output from the resources declared in this context.",
113
+ }),
114
+ ]);
115
+ return {
116
+ contextId,
117
+ };
118
+ }
119
+ async function enrichDatasetContextResources(runtime, resources) {
120
+ const datasetResources = resources.filter((resource) => resource.type === "dataset" && typeof resource.datasetId === "string");
121
+ if (datasetResources.length === 0)
122
+ return resources;
123
+ const db = await getDatasetDb(runtime);
124
+ const service = new DatasetService(db);
125
+ const enriched = [];
126
+ for (const resource of resources) {
127
+ if (resource.type !== "dataset" || typeof resource.datasetId !== "string") {
128
+ enriched.push(resource);
129
+ continue;
130
+ }
131
+ const preview = await service.previewRows(resource.datasetId, 20);
132
+ if (!preview.ok) {
133
+ enriched.push({
134
+ ...resource,
135
+ previewError: preview.error,
136
+ });
137
+ continue;
138
+ }
139
+ enriched.push({
140
+ ...resource,
141
+ previewRows: preview.data,
142
+ previewLimit: 20,
143
+ });
144
+ }
145
+ return enriched;
146
+ }
147
+ function contextResourceToDatasetResource(resource) {
148
+ if (resource.type === "file" && typeof resource.fileId === "string" && resource.fileId.trim()) {
149
+ return {
150
+ kind: "file",
151
+ fileId: resource.fileId.trim(),
152
+ description: resource.description,
153
+ filename: typeof resource.filename === "string" ? resource.filename : undefined,
154
+ mediaType: typeof resource.mediaType === "string" ? resource.mediaType : undefined,
155
+ };
156
+ }
157
+ if (resource.type === "dataset" &&
158
+ typeof resource.datasetId === "string" &&
159
+ resource.datasetId.trim()) {
160
+ return {
161
+ kind: "dataset",
162
+ datasetId: resource.datasetId.trim(),
163
+ description: resource.description,
164
+ };
165
+ }
166
+ if (resource.type === "text" && typeof resource.text === "string") {
167
+ return {
168
+ kind: "text",
169
+ text: String(resource.text),
170
+ mimeType: typeof resource.mimeType === "string"
171
+ ? String(resource.mimeType)
172
+ : "text/plain",
173
+ name: resource.name,
174
+ description: resource.description,
175
+ };
176
+ }
177
+ if (resource.type === "query") {
178
+ throw new Error("dataset_context_query_resource_requires_builder_shortcut");
179
+ }
180
+ return {
181
+ kind: "text",
182
+ text: JSON.stringify({ resource }, null, 2),
183
+ mimeType: "application/vnd.ekairos.context-resource+json",
184
+ name: `${resource.key}.context-resource.json`,
185
+ description: resource.description,
186
+ };
187
+ }
188
+ async function readExistingContext(params) {
189
+ "use step";
190
+ const db = await getEventsDb(params.runtime);
191
+ const res = await db.query({
192
+ event_contexts: {
193
+ $: {
194
+ where: getContextWhere(params.context),
195
+ limit: 1,
196
+ },
197
+ },
198
+ });
199
+ const row = res?.event_contexts?.[0];
200
+ if (!row?.id)
201
+ throw new Error("dataset_context_not_found");
202
+ const resources = Array.isArray(row.resources)
203
+ ? row.resources
204
+ : [];
205
+ if (resources.length === 0) {
206
+ throw new Error("dataset_context_resources_required");
207
+ }
208
+ const sourceContextId = String(row.id);
209
+ const copiedResources = resources.map((resource) => ({
210
+ ...resource,
211
+ sourceContextId: resource.sourceContextId ?? sourceContextId,
212
+ sourceResourceKey: resource.sourceResourceKey ?? resource.key,
213
+ }));
214
+ return {
215
+ contextId: sourceContextId,
216
+ resources: resources.map((resource) => contextResourceToDatasetResource(resource)),
217
+ contextResources: copiedResources,
218
+ };
219
+ }
220
+ export async function resolveDatasetResourceContext(runtime, datasetId, resources) {
221
+ const contextRefs = resources.filter((resource) => resource.kind === "context");
222
+ if (contextRefs.length > 1) {
223
+ throw new Error("dataset_context_resource_must_be_unique");
224
+ }
225
+ if (contextRefs.length === 1) {
226
+ if (resources.length > 1) {
227
+ throw new Error("dataset_context_resource_is_exclusive");
228
+ }
229
+ const source = await readExistingContext({ runtime, context: contextRefs[0] });
230
+ const created = await createDatasetResourceContextStep({
231
+ runtime,
232
+ datasetId,
233
+ resources: source.contextResources,
234
+ });
235
+ return {
236
+ ...source,
237
+ contextId: created.contextId,
238
+ };
239
+ }
240
+ const contextResourceRecords = resources.map((resource, index) => resourceToContextResource(index, resource));
241
+ const created = await createDatasetResourceContextStep({
242
+ runtime,
243
+ datasetId,
244
+ resources: contextResourceRecords,
245
+ });
246
+ return {
247
+ contextId: created.contextId,
248
+ resources,
249
+ contextResources: contextResourceRecords,
250
+ };
251
+ }
@@ -0,0 +1,5 @@
1
+ import type { DatasetSchemaInput } from "./types.js";
2
+ export declare function buildFileDefaultInstructions(schema?: DatasetSchemaInput): "Create a dataset from the resource file and ensure each output row matches the provided dataset schema exactly." | "Create a dataset representing the resource content as structured rows.";
3
+ export declare function buildRawResourceInstructions(resourceKind: "file" | "text"): "Create a dataset representing the raw text content as structured rows without applying business transformations." | "Create a dataset representing the raw file content as structured rows without applying business transformations.";
4
+ export declare function buildTransformInstructions(resourceCount: number, userInstructions?: string, schema?: DatasetSchemaInput): string;
5
+ export declare function buildObjectOutputInstructions(userInstructions?: string): string;