@ekairos/dataset 1.22.39-beta.development.0 → 1.22.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/README.md +347 -0
  2. package/dist/agents.d.ts +8 -0
  3. package/dist/agents.js +8 -0
  4. package/dist/builder/agentMaterializers.d.ts +9 -0
  5. package/dist/builder/agentMaterializers.js +10 -0
  6. package/dist/builder/context.d.ts +15 -0
  7. package/dist/builder/context.js +251 -0
  8. package/dist/builder/instructions.d.ts +5 -0
  9. package/dist/builder/instructions.js +40 -0
  10. package/dist/builder/materialize.d.ts +83 -0
  11. package/dist/builder/materialize.js +548 -0
  12. package/dist/builder/materializeQuery.d.ts +12 -0
  13. package/dist/builder/materializeQuery.js +31 -0
  14. package/dist/builder/persistence.d.ts +22 -0
  15. package/dist/builder/persistence.js +192 -0
  16. package/dist/builder/rows.d.ts +7 -0
  17. package/dist/builder/rows.js +56 -0
  18. package/dist/builder/schemaInference.d.ts +3 -0
  19. package/dist/builder/schemaInference.js +61 -0
  20. package/dist/builder/types.d.ts +144 -0
  21. package/dist/builder/types.js +1 -0
  22. package/dist/clearDataset.tool.d.ts +2 -3
  23. package/dist/clearDataset.tool.js +13 -17
  24. package/dist/completeDataset.steps.d.ts +117 -0
  25. package/dist/completeDataset.steps.js +537 -0
  26. package/dist/completeDataset.tool.d.ts +132 -7
  27. package/dist/completeDataset.tool.js +46 -192
  28. package/dist/contextResources.d.ts +31 -0
  29. package/dist/contextResources.js +151 -0
  30. package/dist/contextWorkspace.d.ts +79 -0
  31. package/dist/contextWorkspace.js +234 -0
  32. package/dist/dataset/steps.d.ts +39 -15
  33. package/dist/dataset/steps.js +96 -39
  34. package/dist/dataset.d.ts +3 -67
  35. package/dist/dataset.js +129 -521
  36. package/dist/datasetFiles.d.ts +5 -1
  37. package/dist/datasetFiles.js +29 -27
  38. package/dist/defineNotation.tool.d.ts +49 -0
  39. package/dist/defineNotation.tool.js +154 -0
  40. package/dist/domain.d.ts +1 -2
  41. package/dist/domain.js +1 -6
  42. package/dist/executeCommand.tool.d.ts +2 -30
  43. package/dist/executeCommand.tool.js +165 -39
  44. package/dist/file/file-dataset.agent.d.ts +19 -56
  45. package/dist/file/file-dataset.agent.js +181 -134
  46. package/dist/file/file-dataset.steps.d.ts +27 -0
  47. package/dist/file/file-dataset.steps.js +47 -0
  48. package/dist/file/file-dataset.types.d.ts +64 -0
  49. package/dist/file/file-dataset.types.js +1 -0
  50. package/dist/file/filepreview.d.ts +5 -35
  51. package/dist/file/filepreview.js +60 -107
  52. package/dist/file/filepreview.types.d.ts +31 -0
  53. package/dist/file/filepreview.types.js +1 -0
  54. package/dist/file/generateSchema.tool.d.ts +2 -3
  55. package/dist/file/generateSchema.tool.js +11 -15
  56. package/dist/file/index.d.ts +1 -2
  57. package/dist/file/index.js +1 -18
  58. package/dist/file/prompts.d.ts +2 -3
  59. package/dist/file/prompts.js +152 -32
  60. package/dist/file/scripts.generated.d.ts +1 -0
  61. package/dist/file/scripts.generated.js +11 -0
  62. package/dist/file/steps.d.ts +1 -2
  63. package/dist/file/steps.js +9 -7
  64. package/dist/id.d.ts +1 -0
  65. package/dist/id.js +10 -0
  66. package/dist/index.d.ts +9 -7
  67. package/dist/index.js +9 -23
  68. package/dist/materializeDataset.tool.d.ts +51 -31
  69. package/dist/materializeDataset.tool.js +81 -65
  70. package/dist/notation.d.ts +205 -0
  71. package/dist/notation.js +424 -0
  72. package/dist/query/index.d.ts +1 -2
  73. package/dist/query/index.js +1 -18
  74. package/dist/query/queryDomain.d.ts +3 -4
  75. package/dist/query/queryDomain.js +3 -40
  76. package/dist/query/queryDomain.step.d.ts +1 -1
  77. package/dist/query/queryDomain.step.js +24 -13
  78. package/dist/sandbox/steps.d.ts +23 -15
  79. package/dist/sandbox/steps.js +73 -76
  80. package/dist/sandbox.steps.d.ts +1 -2
  81. package/dist/sandbox.steps.js +1 -18
  82. package/dist/schema.d.ts +15 -13
  83. package/dist/schema.js +27 -37
  84. package/dist/service.d.ts +12 -5
  85. package/dist/service.js +88 -15
  86. package/dist/skill.d.ts +0 -1
  87. package/dist/skill.js +12 -17
  88. package/dist/transform/filepreview.d.ts +2 -3
  89. package/dist/transform/filepreview.js +9 -26
  90. package/dist/transform/index.d.ts +2 -3
  91. package/dist/transform/index.js +2 -8
  92. package/dist/transform/prompts.d.ts +1 -34
  93. package/dist/transform/prompts.js +66 -46
  94. package/dist/transform/transform-dataset.agent.d.ts +20 -45
  95. package/dist/transform/transform-dataset.agent.js +151 -91
  96. package/dist/transform/transform-dataset.steps.d.ts +30 -0
  97. package/dist/transform/transform-dataset.steps.js +61 -0
  98. package/dist/transform/transform-dataset.types.d.ts +95 -0
  99. package/dist/transform/transform-dataset.types.js +1 -0
  100. package/dist/transform/transformDataset.d.ts +3 -3
  101. package/dist/transform/transformDataset.js +15 -18
  102. package/dist/writeDatasetRows.tool.d.ts +188 -0
  103. package/dist/writeDatasetRows.tool.js +258 -0
  104. package/package.json +33 -8
  105. package/dist/clearDataset.tool.d.ts.map +0 -1
  106. package/dist/clearDataset.tool.js.map +0 -1
  107. package/dist/completeDataset.tool.d.ts.map +0 -1
  108. package/dist/completeDataset.tool.js.map +0 -1
  109. package/dist/dataset/steps.d.ts.map +0 -1
  110. package/dist/dataset/steps.js.map +0 -1
  111. package/dist/dataset.d.ts.map +0 -1
  112. package/dist/dataset.js.map +0 -1
  113. package/dist/datasetFiles.d.ts.map +0 -1
  114. package/dist/datasetFiles.js.map +0 -1
  115. package/dist/domain.d.ts.map +0 -1
  116. package/dist/domain.js.map +0 -1
  117. package/dist/eventsReactRuntime.d.ts +0 -22
  118. package/dist/eventsReactRuntime.d.ts.map +0 -1
  119. package/dist/eventsReactRuntime.js +0 -29
  120. package/dist/eventsReactRuntime.js.map +0 -1
  121. package/dist/executeCommand.tool.d.ts.map +0 -1
  122. package/dist/executeCommand.tool.js.map +0 -1
  123. package/dist/file/file-dataset.agent.d.ts.map +0 -1
  124. package/dist/file/file-dataset.agent.js.map +0 -1
  125. package/dist/file/filepreview.d.ts.map +0 -1
  126. package/dist/file/filepreview.js.map +0 -1
  127. package/dist/file/generateSchema.tool.d.ts.map +0 -1
  128. package/dist/file/generateSchema.tool.js.map +0 -1
  129. package/dist/file/index.d.ts.map +0 -1
  130. package/dist/file/index.js.map +0 -1
  131. package/dist/file/prompts.d.ts.map +0 -1
  132. package/dist/file/prompts.js.map +0 -1
  133. package/dist/file/steps.d.ts.map +0 -1
  134. package/dist/file/steps.js.map +0 -1
  135. package/dist/index.d.ts.map +0 -1
  136. package/dist/index.js.map +0 -1
  137. package/dist/materializeDataset.tool.d.ts.map +0 -1
  138. package/dist/materializeDataset.tool.js.map +0 -1
  139. package/dist/query/index.d.ts.map +0 -1
  140. package/dist/query/index.js.map +0 -1
  141. package/dist/query/queryDomain.d.ts.map +0 -1
  142. package/dist/query/queryDomain.js.map +0 -1
  143. package/dist/query/queryDomain.step.d.ts.map +0 -1
  144. package/dist/query/queryDomain.step.js.map +0 -1
  145. package/dist/sandbox/steps.d.ts.map +0 -1
  146. package/dist/sandbox/steps.js.map +0 -1
  147. package/dist/sandbox.steps.d.ts.map +0 -1
  148. package/dist/sandbox.steps.js.map +0 -1
  149. package/dist/schema.d.ts.map +0 -1
  150. package/dist/schema.js.map +0 -1
  151. package/dist/service.d.ts.map +0 -1
  152. package/dist/service.js.map +0 -1
  153. package/dist/skill.d.ts.map +0 -1
  154. package/dist/skill.js.map +0 -1
  155. package/dist/transform/filepreview.d.ts.map +0 -1
  156. package/dist/transform/filepreview.js.map +0 -1
  157. package/dist/transform/index.d.ts.map +0 -1
  158. package/dist/transform/index.js.map +0 -1
  159. package/dist/transform/prompts.d.ts.map +0 -1
  160. package/dist/transform/prompts.js.map +0 -1
  161. package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
  162. package/dist/transform/transform-dataset.agent.js.map +0 -1
  163. package/dist/transform/transformDataset.d.ts.map +0 -1
  164. package/dist/transform/transformDataset.js.map +0 -1
@@ -1,26 +1,23 @@
1
- "use strict";
2
1
  // Plain build API using template literals and XML
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.buildFileDatasetPrompt = buildFileDatasetPrompt;
5
- const xmlbuilder2_1 = require("xmlbuilder2");
6
- const datasetFiles_1 = require("../datasetFiles");
2
+ import { create } from "xmlbuilder2";
3
+ import { getDatasetWorkstation, getDatasetOutputPath } from "../datasetFiles.js";
7
4
  function buildRole() {
8
- let xml = (0, xmlbuilder2_1.create)()
5
+ let xml = create()
9
6
  .ele("Role")
10
7
  .txt("You are a dataset creator for a SINGLE file. Your goal is to convert the file content into a validated JSONL dataset where each line represents one record.")
11
8
  .up();
12
9
  return xml.end({ prettyPrint: true, headless: true });
13
10
  }
14
11
  function buildGoal() {
15
- let xml = (0, xmlbuilder2_1.create)()
12
+ let xml = create()
16
13
  .ele("Goal")
17
- .txt("Convert the source file into a validated JSONL dataset (output.jsonl) where each line is a JSON object conforming to a generated schema. The schema describes ONE data record structure. Extract ONLY data records; exclude any header sections, metadata, or summary information from the file.")
14
+ .txt("Convert the input file into a validated JSONL dataset (output.jsonl) where each line is a JSON object conforming to a generated schema. The schema describes ONE data record structure. Extract ONLY data records; exclude any header sections, metadata, or summary information from the file.")
18
15
  .up();
19
16
  return xml.end({ prettyPrint: true, headless: true });
20
17
  }
21
- function buildSourceInfo(context) {
22
- let xml = (0, xmlbuilder2_1.create)()
23
- .ele("Source")
18
+ function buildResourceInfo(context) {
19
+ let xml = create()
20
+ .ele("FileResource")
24
21
  .ele("Type").txt("file").up()
25
22
  .ele("FileId").txt(context.fileId).up()
26
23
  .ele("DatasetId").txt(context.datasetId).up()
@@ -29,7 +26,7 @@ function buildSourceInfo(context) {
29
26
  return xml;
30
27
  }
31
28
  function buildFilePreviewSection(preview) {
32
- let xml = (0, xmlbuilder2_1.create)()
29
+ let xml = create()
33
30
  .ele("FilePreview")
34
31
  .ele("TotalRows").txt(String(preview.totalRows)).up();
35
32
  if (preview.metadata) {
@@ -91,8 +88,9 @@ function buildErrorsSection(errors) {
91
88
  if (errors.length === 0) {
92
89
  return null;
93
90
  }
94
- let xml = (0, xmlbuilder2_1.create)()
95
- .ele("PreviousErrors");
91
+ let xml = create()
92
+ .ele("PreviousErrors")
93
+ .ele("Instruction").txt("Treat these as repair feedback from the previous validation attempt. Rewrite output.jsonl from the schema contract; do not patch input column names into schema keys piecemeal.").up();
96
94
  for (const error of errors) {
97
95
  xml = xml.ele("Error").txt(error).up();
98
96
  }
@@ -100,10 +98,10 @@ function buildErrorsSection(errors) {
100
98
  return xml;
101
99
  }
102
100
  function buildContextSection(context) {
103
- let xml = (0, xmlbuilder2_1.create)()
101
+ let xml = create()
104
102
  .ele("Context");
105
- const sourceXml = buildSourceInfo(context);
106
- xml = xml.import(sourceXml.first());
103
+ const resourceXml = buildResourceInfo(context);
104
+ xml = xml.import(resourceXml.first());
107
105
  if (context.filePreview) {
108
106
  const previewXml = buildFilePreviewSection(context.filePreview);
109
107
  xml = xml.import(previewXml.first());
@@ -117,46 +115,159 @@ function buildContextSection(context) {
117
115
  xml = xml.up();
118
116
  return xml.end({ prettyPrint: true, headless: true });
119
117
  }
118
+ function asRecord(value) {
119
+ return value && typeof value === "object" && !Array.isArray(value)
120
+ ? value
121
+ : null;
122
+ }
123
+ function getSchemaObject(context) {
124
+ return asRecord(context.schema?.schema);
125
+ }
126
+ function joinSchemaPath(basePath, key) {
127
+ return basePath === "$" ? `$.${key}` : `${basePath}.${key}`;
128
+ }
129
+ function collectSchemaContract(schema, path = "$", contract = {
130
+ requiredPaths: [],
131
+ propertyPaths: [],
132
+ enumConstraints: [],
133
+ closedObjectPaths: [],
134
+ }) {
135
+ const record = asRecord(schema);
136
+ if (!record) {
137
+ return contract;
138
+ }
139
+ if (Array.isArray(record.enum)) {
140
+ contract.enumConstraints.push({
141
+ path,
142
+ values: record.enum.map((value) => JSON.stringify(value)),
143
+ });
144
+ }
145
+ const properties = asRecord(record.properties);
146
+ if (properties) {
147
+ if (record.additionalProperties === false) {
148
+ contract.closedObjectPaths.push(path);
149
+ }
150
+ const required = Array.isArray(record.required)
151
+ ? record.required.filter((value) => typeof value === "string")
152
+ : [];
153
+ for (const key of required) {
154
+ contract.requiredPaths.push(joinSchemaPath(path, key));
155
+ }
156
+ for (const [key, childSchema] of Object.entries(properties)) {
157
+ const childPath = joinSchemaPath(path, key);
158
+ contract.propertyPaths.push(childPath);
159
+ collectSchemaContract(childSchema, childPath, contract);
160
+ }
161
+ }
162
+ if (record.items) {
163
+ collectSchemaContract(record.items, `${path}[]`, contract);
164
+ }
165
+ for (const keyword of ["oneOf", "anyOf", "allOf"]) {
166
+ if (Array.isArray(record[keyword])) {
167
+ for (const childSchema of record[keyword]) {
168
+ collectSchemaContract(childSchema, path, contract);
169
+ }
170
+ }
171
+ }
172
+ return contract;
173
+ }
174
+ function appendLimitedList(xml, elementName, itemName, values, maxItems) {
175
+ let node = xml.ele(elementName);
176
+ for (const value of values.slice(0, maxItems)) {
177
+ node = node.ele(itemName).txt(value).up();
178
+ }
179
+ if (values.length > maxItems) {
180
+ node = node.ele("Truncated").txt(String(values.length - maxItems)).up();
181
+ }
182
+ return node.up();
183
+ }
120
184
  function buildSchemaSection(context) {
121
- if (!context.schema) {
185
+ const schema = getSchemaObject(context);
186
+ if (!context.schema || !schema) {
122
187
  return "";
123
188
  }
124
- let xml = (0, xmlbuilder2_1.create)()
189
+ const contract = collectSchemaContract(schema);
190
+ let xml = create()
125
191
  .com("Schema section: This defines the structure of ONE RECORD (row). Each line in the JSONL output must conform to this schema.")
126
192
  .ele("Schema")
127
193
  .ele("Title").txt(context.schema.title || "").up()
128
- .ele("Description").txt(context.schema.description || "").up()
129
- .ele("JsonSchema").txt(JSON.stringify(context.schema.schema, null, 2)).up()
194
+ .ele("Description").txt(context.schema.description || "").up();
195
+ xml = xml
196
+ .ele("SchemaContract")
197
+ .ele("Purpose").txt("Compact output contract derived from JSON Schema. Use this before writing output.jsonl.").up()
198
+ .ele("Rule").txt("Use only schema property keys in data objects. Input headers are input labels, not output keys.").up()
199
+ .ele("Rule").txt("Required paths are required everywhere, including nested objects and array items.").up()
200
+ .ele("Rule").txt("Enum fields must use exactly one of the listed literal values. Normalize input labels to the closest valid enum literal; never emit a value outside the enum.").up();
201
+ xml = appendLimitedList(xml, "RequiredPaths", "Path", contract.requiredPaths, 120);
202
+ xml = appendLimitedList(xml, "PropertyPaths", "Path", contract.propertyPaths, 160);
203
+ let enumsXml = xml.ele("EnumConstraints");
204
+ for (const constraint of contract.enumConstraints.slice(0, 80)) {
205
+ let enumXml = enumsXml.ele("Enum", { path: constraint.path });
206
+ for (const value of constraint.values.slice(0, 80)) {
207
+ enumXml = enumXml.ele("Value").txt(value).up();
208
+ }
209
+ if (constraint.values.length > 80) {
210
+ enumXml = enumXml.ele("Truncated").txt(String(constraint.values.length - 80)).up();
211
+ }
212
+ enumsXml = enumXml.up();
213
+ }
214
+ if (contract.enumConstraints.length > 80) {
215
+ enumsXml = enumsXml.ele("Truncated").txt(String(contract.enumConstraints.length - 80)).up();
216
+ }
217
+ xml = enumsXml.up();
218
+ xml = appendLimitedList(xml, "ClosedObjectPaths", "Path", contract.closedObjectPaths, 80);
219
+ xml = xml
220
+ .up()
221
+ .ele("JsonSchema").txt(JSON.stringify(schema, null, 2)).up()
130
222
  .up();
131
223
  return xml.end({ prettyPrint: true, headless: true });
132
224
  }
133
225
  function buildInstructions(context) {
134
- const datasetWorkstation = (0, datasetFiles_1.getDatasetWorkstation)(context.datasetId);
135
- const outputPath = (0, datasetFiles_1.getDatasetOutputPath)(context.datasetId);
226
+ const datasetWorkstation = context.sandboxConfig.scriptsDir
227
+ ? context.sandboxConfig.scriptsDir.replace(/\/scripts$/, "")
228
+ : getDatasetWorkstation(context.datasetId);
229
+ const outputPath = context.sandboxConfig.outputPath ?? getDatasetOutputPath(context.datasetId);
136
230
  const hasProvidedSchema = Boolean(context.schema?.schema);
137
231
  const currentTask = hasProvidedSchema
138
232
  ? "Review FilePreview section, use the provided schema as the output contract, then parse the file and generate the dataset"
139
233
  : "Review FilePreview section to understand file structure, then generate JSON Schema for a SINGLE RECORD, then parse the file and generate the dataset";
140
- let xml = (0, xmlbuilder2_1.create)()
234
+ let xml = create()
141
235
  .ele("Instructions")
142
236
  .ele("Workflow")
143
237
  .ele("Step", { number: "1", name: "Inspect File" })
144
238
  .ele("Action").txt("Review the FilePreview section in Context to understand the file structure").up()
145
239
  .ele("Note").txt("FilePreview contains: TotalRows (total data rows), Metadata (file properties with JSON output), Head (first N raw file lines), Tail (last N lines if present), Mid (middle sample for large files). Each section shows Description, Script (full Python code), Command, Stdout (raw content), Stderr. This allows you to understand the exact file format.").up()
146
240
  .up();
241
+ xml = xml
242
+ .ele("Step", { number: "2", name: "Define the Dataset (PLAN FIRST)" })
243
+ .ele("Action").txt("Call defineNotation with the INITIAL formal definition of the dataset as a set, derived from the file preview: D = { r | r ∈ File ∧ <constraints> } in LaTeX, the symbols it binds (sets, variables, functions) and the predicates the set satisfies").up()
244
+ .ele("Requirements")
245
+ .ele("Requirement").txt("The definition and the materialization (schema + parsing code + rows) are TWO CO-EQUAL FACES of the dataset. The definition is the dataset stated intensionally — author it FIRST; it is your PLAN and the code is built to realize it").up()
246
+ .ele("Requirement").txt("Use set-builder notation, quantifiers and arithmetic in LaTeX (e.g. D = \\{(c, q, p) \\mid q \\in \\mathbb{Z}^{+},\\; p \\in \\mathbb{R}_{\\geq 0}\\})").up()
247
+ .ele("Requirement").txt("Declare every discovered set and variable as a symbol with a one-line meaning").up()
248
+ .ele("Requirement").txt("Predicates are formal claims we trust; they may be semantic (e.g. 'x es una frase relevante'). Only for the few that are purely arithmetic (row counts, field types, ranges, uniqueness, aggregates) you MAY add a checkJson for optional advisory evidence — leave every other claim without checkJson").up()
249
+ .ele("Requirement").txt("REFINE: every time the analysis discovers a new set, variable, constraint or correction (new columns, unexpected types, excluded sections), call defineNotation again with the updated definition and the reason. The definition is not fixed up front — discovery is the point").up()
250
+ .ele("Requirement").txt("Before calling completeDataset, call defineNotation one last time with final=true so the definition becomes the RESULT — it describes EXACTLY the dataset you produced; any arithmetic predicates get optional advisory evidence afterwards (never a pass/fail verdict — the dataset's validity is trusted)").up()
251
+ .up()
252
+ .up();
147
253
  if (hasProvidedSchema) {
148
254
  xml = xml
149
- .ele("Step", { number: "2", name: "Use Provided Schema" })
255
+ .ele("Step", { number: "3", name: "Use Provided Schema" })
150
256
  .ele("Action").txt("Use the provided schema as the output contract for every row in output.jsonl").up()
151
257
  .ele("Requirements")
152
258
  .ele("Requirement").txt("Every output row must conform exactly to the provided schema").up()
259
+ .ele("Requirement").txt("Every data object MUST use the exact property names from the provided JSON Schema required/properties keys").up()
260
+ .ele("Requirement").txt("Build a schema-first mapping from input columns to schema fields before writing output.jsonl. Do not use raw input headers as JSON keys unless they are exactly schema keys").up()
261
+ .ele("Requirement").txt("For nested required fields, populate the required child keys inside each nested object or array item; top-level validity is not enough").up()
262
+ .ele("Requirement").txt("For enum fields, emit exactly one allowed enum literal from SchemaContract; normalize labels or abbreviations into allowed literals").up()
263
+ .ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the input language").up()
153
264
  .ele("Requirement").txt("Do not call generateSchema when a schema is already provided").up()
154
265
  .up()
155
266
  .up();
156
267
  }
157
268
  else {
158
269
  xml = xml
159
- .ele("Step", { number: "2", name: "Generate JSON Schema" })
270
+ .ele("Step", { number: "3", name: "Generate JSON Schema" })
160
271
  .ele("Action").txt("Call generateSchema to create a JSON Schema for a SINGLE DATA RECORD (one row of data)").up()
161
272
  .ele("Requirements")
162
273
  .ele("Requirement").txt("Schema describes ONE DATA RECORD structure only (type: object, not array)").up()
@@ -168,23 +279,28 @@ function buildInstructions(context) {
168
279
  .up();
169
280
  }
170
281
  xml = xml
171
- .ele("Step", { number: "3", name: "Generate Dataset JSONL" })
282
+ .ele("Step", { number: "4", name: "Generate Dataset JSONL" })
172
283
  .ele("Action").txt(`Use executeCommand to parse the file and generate output.jsonl in the dataset workstation`).up()
173
284
  .ele("Requirements")
174
285
  .ele("Requirement").txt("Parse ALL data rows/records from the file (exclude header sections and metadata)").up()
175
286
  .ele("Requirement").txt("Output JSONL format: each line is {\"type\": \"row\", \"data\": {...record...}}").up()
287
+ .ele("Requirement").txt("When a schema is provided, each data object must contain the exact required schema keys and must not use translated or synonymous keys").up()
288
+ .ele("Requirement").txt("When validation returns zero valid rows, treat the previous output as structurally wrong and rewrite output.jsonl from the SchemaContract, not by applying small patches").up()
176
289
  .ele("Requirement").txt("Extract ONLY data records; skip any header lines, summary sections, or file metadata").up()
177
290
  .ele("Requirement").txt(`Save output to: ${outputPath}`).up()
178
291
  .ele("Requirement").txt("Use descriptive scriptName in snake_case (e.g., 'parse_csv_to_jsonl')").up()
179
292
  .up()
180
293
  .up()
181
- .ele("Step", { number: "4", name: "Complete and Validate" })
182
- .ele("Action").txt("Call completeDataset to validate the dataset").up()
183
- .ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns error details if validation fails.").up()
294
+ .ele("Step", { number: "5", name: "Complete and Validate" })
295
+ .ele("Action").txt("Call defineNotation with final=true (the definition as RESULT, matching the produced rows), then call completeDataset to validate the dataset").up()
296
+ .ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns success:false with validation details if validation fails. If validation fails, inspect validation errors, rewrite output.jsonl, and call completeDataset again. Do not stop until completeDataset returns success:true.").up()
184
297
  .up()
185
298
  .up()
186
299
  .ele("Rules")
300
+ .ele("Rule").txt("The formal definition (defineNotation) and the materialization (schema + code + rows) are co-equal faces of the dataset: author the definition first as the PLAN, refine it on every discovery, finalize it as the RESULT before completion").up()
187
301
  .ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
302
+ .ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the input language").up()
303
+ .ele("Rule").txt("Original/input language applies to extracted values only, not to JSON object keys").up()
188
304
  .ele("Rule").txt("Datasets contain ONLY data records; exclude all header sections and file metadata").up()
189
305
  .ele("Rule").txt("JSONL format: each line = separate JSON object representing one data record").up()
190
306
  .ele("Rule").txt("FilePreview shows raw file content - use Script to understand data extraction").up()
@@ -197,7 +313,7 @@ function buildInstructions(context) {
197
313
  .up();
198
314
  return xml.end({ prettyPrint: true, headless: true });
199
315
  }
200
- function buildFileDatasetPrompt(context) {
316
+ export function buildFileDatasetPrompt(context) {
201
317
  const sections = [];
202
318
  sections.push(buildRole());
203
319
  sections.push("");
@@ -205,7 +321,11 @@ function buildFileDatasetPrompt(context) {
205
321
  sections.push("");
206
322
  sections.push(buildContextSection(context));
207
323
  sections.push("");
324
+ const schemaSection = buildSchemaSection(context);
325
+ if (schemaSection) {
326
+ sections.push(schemaSection);
327
+ sections.push("");
328
+ }
208
329
  sections.push(buildInstructions(context));
209
330
  return sections.join("\n");
210
331
  }
211
- //# sourceMappingURL=prompts.js.map
@@ -0,0 +1 @@
1
+ export declare const PYTHON_SCRIPT_BASE64_BY_NAME: Readonly<Record<string, string>>;
@@ -0,0 +1,11 @@
1
+ // Generated by packages/dataset/scripts/generate-python-scripts-module.js.
2
+ // Do not edit by hand.
3
+ export const PYTHON_SCRIPT_BASE64_BY_NAME = Object.freeze({
4
+ "file_metadata.py": "aW1wb3J0IHN5cwppbXBvcnQgb3MKaW1wb3J0IGpzb24KaW1wb3J0IGNzdgoKZmlsZV9wYXRoID0gc3lzLmFyZ3ZbMV0KCmluZm8gPSB7CiAgICAiZmlsZV9uYW1lIjogb3MucGF0aC5iYXNlbmFtZShmaWxlX3BhdGgpLAogICAgImV4dGVuc2lvbiI6IG9zLnBhdGguc3BsaXRleHQoZmlsZV9wYXRoKVsxXS5sb3dlcigpLAogICAgInNpemVfYnl0ZXMiOiBvcy5wYXRoLmdldHNpemUoZmlsZV9wYXRoKSBpZiBvcy5wYXRoLmV4aXN0cyhmaWxlX3BhdGgpIGVsc2UgTm9uZQp9CgppZiBpbmZvWyJzaXplX2J5dGVzIl0gaXMgbm90IE5vbmU6CiAgICB1bml0cyA9IFsiQiIsICJLQiIsICJNQiIsICJHQiIsICJUQiIsICJQQiJdCiAgICBzaXplID0gZmxvYXQoaW5mb1sic2l6ZV9ieXRlcyJdKQogICAgaWR4ID0gMAogICAgd2hpbGUgc2l6ZSA+PSAxMDI0IGFuZCBpZHggPCBsZW4odW5pdHMpIC0gMToKICAgICAgICBzaXplIC89IDEwMjQKICAgICAgICBpZHggKz0gMQogICAgaW5mb1sic2l6ZV9odW1hbiJdID0gZiJ7c2l6ZTouMmZ9IHt1bml0c1tpZHhdfSIKCnJvd19lc3RpbWF0ZSA9IE5vbmUKY29sdW1uX2VzdGltYXRlID0gTm9uZQpoZWFkZXJfcHJldmlldyA9IE5vbmUKCnRyeToKICAgIGlmIGluZm9bImV4dGVuc2lvbiJdIGluIFsiLnhsc3giLCAiLnhscyJdOgogICAgICAgIGltcG9ydCBvcGVucHl4bAogICAgICAgIHdiID0gb3BlbnB5eGwubG9hZF93b3JrYm9vayhmaWxlX3BhdGgsIHJlYWRfb25seT1UcnVlLCBkYXRhX29ubHk9RmFsc2UpCiAgICAgICAgc2hlZXRzID0gW10KICAgICAgICBmaXJzdF9zaGVldF9yb3dzID0gMAogICAgICAgIGZpcnN0X3NoZWV0X2NvbHVtbnMgPSAwCiAgICAgICAgZm9yIGlkeCwgd3MgaW4gZW51bWVyYXRlKHdiLndvcmtzaGVldHMpOgogICAgICAgICAgICByb3dzID0gd3MubWF4X3JvdyBvciAwCiAgICAgICAgICAgIGNvbHMgPSB3cy5tYXhfY29sdW1uIG9yIDAKICAgICAgICAgICAgaWYgaWR4ID09IDA6CiAgICAgICAgICAgICAgICBmaXJzdF9zaGVldF9yb3dzID0gcm93cwogICAgICAgICAgICAgICAgZmlyc3Rfc2hlZXRfY29sdW1ucyA9IGNvbHMKICAgICAgICAgICAgc2hlZXRfaW5mbyA9IHsibmFtZSI6IHdzLnRpdGxlLCAicm93cyI6IHJvd3MsICJjb2x1bW5zIjogY29sc30KICAgICAgICAgICAgc2hlZXRzLmFwcGVuZChzaGVldF9pbmZvKQogICAgICAgIGluZm9bInNoZWV0X3N0YXRzIl0gPSBzaGVldHMKICAgICAgICByb3dfZXN0aW1hdGUgPSBmaXJzdF9zaGVldF9yb3dzCiAgICAgICAgY29sdW1uX2VzdGltYXRlID0gZmlyc3Rfc2hlZXRfY29sdW1ucyBpZiBmaXJzdF9zaGVldF9jb2x1bW5zID4gMCBlbHNlIE5vbmUKICAgICAgICB0cnk6CiAgICAgICAgICAgIGZpcnN0X3NoZWV0ID0gd2Iud29ya3NoZWV0c1swXQogICAgICAgICAgICBoZWFkZXJfcHJldmlldyA9IFtzdHIoY2VsbC52YWx1ZSkgaWYgY2VsbC52YWx1ZSBpcyBub3QgTm9uZSBlbHNlICIiIGZvciBjZWxsIGluIG5leHQoZmlyc3Rfc2hlZXQuaXRlcl9yb3dzKG1pbl9yb3c9MSwgbWF4X3Jvdz0xKSldCiAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbjoKICAgICAgICAgICAgaGVhZGVyX3ByZXZpZXcgPSBOb25lCiAgICBlbHNlOgogICAgICAgIHJvd19jb3VudCA9IDAKICAgICAgICB3aXRoIG9wZW4oZmlsZV9wYXRoLCAicmIiKSBhcyBmaDoKICAgICAgICAgICAgZm9yIF8gaW4gZmg6CiAgICAgICAgICAgICAgICByb3dfY291bnQgKz0gMQogICAgICAgIHJvd19lc3RpbWF0ZSA9IHJvd19jb3VudCAtIDEgaWYgcm93X2NvdW50ID4gMCBlbHNlIDAKICAgICAgICB3aXRoIG9wZW4oZmlsZV9wYXRoLCAiciIsIGVuY29kaW5nPSJ1dGYtOCIsIGVycm9ycz0iaWdub3JlIikgYXMgZmg6CiAgICAgICAgICAgIHJlYWRlciA9IGNzdi5yZWFkZXIoZmgpCiAgICAgICAgICAgIGhlYWRlcl9wcmV2aWV3ID0gbmV4dChyZWFkZXIsIFtdKQogICAgICAgIGNvbHVtbl9lc3RpbWF0ZSA9IGxlbihoZWFkZXJfcHJldmlldykgaWYgaGVhZGVyX3ByZXZpZXcgZWxzZSBOb25lCmV4Y2VwdCBFeGNlcHRpb24gYXMgZXJyb3I6CiAgICBpbmZvWyJtZXRhZGF0YV9lcnJvciJdID0gc3RyKGVycm9yKQoKaWYgcm93X2VzdGltYXRlIGlzIG5vdCBOb25lOgogICAgaW5mb1sicm93X2NvdW50X2VzdGltYXRlIl0gPSByb3dfZXN0aW1hdGUKaWYgY29sdW1uX2VzdGltYXRlIGlzIG5vdCBOb25lOgogICAgaW5mb1siY29sdW1uX2NvdW50X2VzdGltYXRlIl0gPSBjb2x1bW5fZXN0aW1hdGUKaWYgaGVhZGVyX3ByZXZpZXcgaXMgbm90IE5vbmU6CiAgICBpbmZvWyJoZWFkZXJfcHJldmlldyJdID0gaGVhZGVyX3ByZXZpZXcKCnByaW50KGpzb24uZHVtcHMoaW5mbywgZW5zdXJlX2FzY2lpPUZhbHNlKSkKCg==",
5
+ "preview_head_csv.py": "aW1wb3J0IHN5cwppbXBvcnQgY3N2CmltcG9ydCB4bWwuZXRyZWUuRWxlbWVudFRyZWUgYXMgRVQKCmZpbGVfcGF0aCA9IHN5cy5hcmd2WzFdCm5fcm93cyA9IGludChzeXMuYXJndlsyXSkgaWYgbGVuKHN5cy5hcmd2KSA+IDIgZWxzZSAxMAoKcm9vdCA9IEVULkVsZW1lbnQoInJvd3MiKQoKd2l0aCBvcGVuKGZpbGVfcGF0aCwgInIiLCBlbmNvZGluZz0idXRmLTgiLCBlcnJvcnM9Imlnbm9yZSIpIGFzIGZoOgogICAgcmVhZGVyID0gY3N2LnJlYWRlcihmaCkKICAgIGZvciByb3dfaWR4LCByb3dfZGF0YSBpbiBlbnVtZXJhdGUocmVhZGVyKToKICAgICAgICBpZiByb3dfaWR4ID49IG5fcm93czoKICAgICAgICAgICAgYnJlYWsKICAgICAgICByb3dfZWxlbSA9IEVULlN1YkVsZW1lbnQocm9vdCwgInJvdyIsIGluZGV4PXN0cihyb3dfaWR4KSkKICAgICAgICBmb3IgY29sX2lkeCwgY2VsbF92YWx1ZSBpbiBlbnVtZXJhdGUocm93X2RhdGEpOgogICAgICAgICAgICBjZWxsX2VsZW0gPSBFVC5TdWJFbGVtZW50KHJvd19lbGVtLCAiY2VsbCIsIGNvbHVtbj1zdHIoY29sX2lkeCkpCiAgICAgICAgICAgIGNlbGxfZWxlbS50ZXh0ID0gY2VsbF92YWx1ZSBpZiBjZWxsX3ZhbHVlIGVsc2UgIiIKCnRyZWUgPSBFVC5FbGVtZW50VHJlZShyb290KQpFVC5pbmRlbnQodHJlZSwgc3BhY2U9IiAgIikKcHJpbnQoJzw/eG1sIHZlcnNpb249IjEuMCIgZW5jb2Rpbmc9IlVURi04Ij8+JykKcHJpbnQoRVQudG9zdHJpbmcocm9vdCwgZW5jb2Rpbmc9InVuaWNvZGUiKSkKCg==",
6
+ "preview_head_excel.py": "aW1wb3J0IHN5cwppbXBvcnQgb3BlbnB5eGwKaW1wb3J0IHhtbC5ldHJlZS5FbGVtZW50VHJlZSBhcyBFVAoKZmlsZV9wYXRoID0gc3lzLmFyZ3ZbMV0Kbl9yb3dzID0gaW50KHN5cy5hcmd2WzJdKSBpZiBsZW4oc3lzLmFyZ3YpID4gMiBlbHNlIDEwCgp3YiA9IG9wZW5weXhsLmxvYWRfd29ya2Jvb2soZmlsZV9wYXRoLCByZWFkX29ubHk9VHJ1ZSwgZGF0YV9vbmx5PVRydWUpCndzID0gd2Iud29ya3NoZWV0c1swXQoKcm9vdCA9IEVULkVsZW1lbnQoInJvd3MiKQoKcm93c19wcmludGVkID0gMApmb3Igcm93X2lkeCwgcm93IGluIGVudW1lcmF0ZSh3cy5pdGVyX3Jvd3MoKSk6CiAgICBpZiByb3dzX3ByaW50ZWQgPj0gbl9yb3dzOgogICAgICAgIGJyZWFrCiAgICByb3dfZWxlbSA9IEVULlN1YkVsZW1lbnQocm9vdCwgInJvdyIsIGluZGV4PXN0cihyb3dfaWR4KSkKICAgIAogICAgZm9yIGNvbF9pZHgsIGNlbGwgaW4gZW51bWVyYXRlKHJvdyk6CiAgICAgICAgY2VsbF9lbGVtID0gRVQuU3ViRWxlbWVudChyb3dfZWxlbSwgImNlbGwiLCBjb2x1bW49c3RyKGNvbF9pZHgpKQogICAgICAgIGNlbGxfdmFsdWUgPSBjZWxsLnZhbHVlCiAgICAgICAgY2VsbF9lbGVtLnRleHQgPSBzdHIoY2VsbF92YWx1ZSkgaWYgY2VsbF92YWx1ZSBpcyBub3QgTm9uZSBlbHNlICIiCiAgICAKICAgIHJvd3NfcHJpbnRlZCArPSAxCgp0cmVlID0gRVQuRWxlbWVudFRyZWUocm9vdCkKRVQuaW5kZW50KHRyZWUsIHNwYWNlPSIgICIpCnByaW50KCc8P3htbCB2ZXJzaW9uPSIxLjAiIGVuY29kaW5nPSJVVEYtOCI/PicpCnByaW50KEVULnRvc3RyaW5nKHJvb3QsIGVuY29kaW5nPSJ1bmljb2RlIikpCgo=",
7
+ "preview_mid_csv.py": "aW1wb3J0IHN5cwppbXBvcnQgY3N2CmltcG9ydCByYW5kb20KaW1wb3J0IHhtbC5ldHJlZS5FbGVtZW50VHJlZSBhcyBFVAoKZmlsZV9wYXRoID0gc3lzLmFyZ3ZbMV0Kc3RhcnRfcm93ID0gaW50KHN5cy5hcmd2WzJdKQplbmRfcm93ID0gaW50KHN5cy5hcmd2WzNdKQpuX3NhbXBsZXMgPSBpbnQoc3lzLmFyZ3ZbNF0pIGlmIGxlbihzeXMuYXJndikgPiA0IGVsc2UgMTAKCnJvb3QgPSBFVC5FbGVtZW50KCJyb3dzIikKCndpdGggb3BlbihmaWxlX3BhdGgsICJyIiwgZW5jb2Rpbmc9InV0Zi04IiwgZXJyb3JzPSJpZ25vcmUiKSBhcyBmaDoKICAgIHJlYWRlciA9IGNzdi5yZWFkZXIoZmgpCiAgICBhbGxfcm93cyA9IGxpc3QocmVhZGVyKQoKbWlkX3NlY3Rpb24gPSBhbGxfcm93c1tzdGFydF9yb3c6ZW5kX3Jvd10KCmlmIGxlbihtaWRfc2VjdGlvbikgPD0gbl9zYW1wbGVzOgogICAgc2FtcGxlZF93aXRoX2lkeCA9IFsoaSwgcm93KSBmb3IgaSwgcm93IGluIGVudW1lcmF0ZShtaWRfc2VjdGlvbildCmVsc2U6CiAgICByYW5kb20uc2VlZCg0MikKICAgIHNhbXBsZWRfd2l0aF9pZHggPSBzb3J0ZWQocmFuZG9tLnNhbXBsZShsaXN0KGVudW1lcmF0ZShtaWRfc2VjdGlvbikpLCBuX3NhbXBsZXMpLCBrZXk9bGFtYmRhIHg6IHhbMF0pCgpmb3IgcmVsYXRpdmVfaWR4LCByb3dfZGF0YSBpbiBzYW1wbGVkX3dpdGhfaWR4OgogICAgYWN0dWFsX2lkeCA9IHN0YXJ0X3JvdyArIHJlbGF0aXZlX2lkeAogICAgcm93X2VsZW0gPSBFVC5TdWJFbGVtZW50KHJvb3QsICJyb3ciLCBpbmRleD1zdHIoYWN0dWFsX2lkeCkpCiAgICBmb3IgY29sX2lkeCwgY2VsbF92YWx1ZSBpbiBlbnVtZXJhdGUocm93X2RhdGEpOgogICAgICAgIGNlbGxfZWxlbSA9IEVULlN1YkVsZW1lbnQocm93X2VsZW0sICJjZWxsIiwgY29sdW1uPXN0cihjb2xfaWR4KSkKICAgICAgICBjZWxsX2VsZW0udGV4dCA9IGNlbGxfdmFsdWUgaWYgY2VsbF92YWx1ZSBlbHNlICIiCgp0cmVlID0gRVQuRWxlbWVudFRyZWUocm9vdCkKRVQuaW5kZW50KHRyZWUsIHNwYWNlPSIgICIpCnByaW50KCc8P3htbCB2ZXJzaW9uPSIxLjAiIGVuY29kaW5nPSJVVEYtOCI/PicpCnByaW50KEVULnRvc3RyaW5nKHJvb3QsIGVuY29kaW5nPSJ1bmljb2RlIikpCgo=",
8
+ "preview_mid_excel.py": "aW1wb3J0IHN5cwppbXBvcnQgcmFuZG9tCmltcG9ydCBvcGVucHl4bAppbXBvcnQgeG1sLmV0cmVlLkVsZW1lbnRUcmVlIGFzIEVUCgpmaWxlX3BhdGggPSBzeXMuYXJndlsxXQpzdGFydF9yb3cgPSBpbnQoc3lzLmFyZ3ZbMl0pCmVuZF9yb3cgPSBpbnQoc3lzLmFyZ3ZbM10pCm5fc2FtcGxlcyA9IGludChzeXMuYXJndls0XSkgaWYgbGVuKHN5cy5hcmd2KSA+IDQgZWxzZSAxMAoKd2IgPSBvcGVucHl4bC5sb2FkX3dvcmtib29rKGZpbGVfcGF0aCwgcmVhZF9vbmx5PVRydWUsIGRhdGFfb25seT1UcnVlKQp3cyA9IHdiLndvcmtzaGVldHNbMF0KCnJvb3QgPSBFVC5FbGVtZW50KCJyb3dzIikKCmFsbF9yb3dzID0gbGlzdCh3cy5pdGVyX3Jvd3MoKSkKbWlkX3NlY3Rpb24gPSBhbGxfcm93c1tzdGFydF9yb3c6ZW5kX3Jvd10KCmlmIGxlbihtaWRfc2VjdGlvbikgPD0gbl9zYW1wbGVzOgogICAgc2FtcGxlZF93aXRoX2lkeCA9IFsoaSwgcm93KSBmb3IgaSwgcm93IGluIGVudW1lcmF0ZShtaWRfc2VjdGlvbildCmVsc2U6CiAgICByYW5kb20uc2VlZCg0MikKICAgIGluZGljZXMgPSBzb3J0ZWQocmFuZG9tLnNhbXBsZShyYW5nZShsZW4obWlkX3NlY3Rpb24pKSwgbl9zYW1wbGVzKSkKICAgIHNhbXBsZWRfd2l0aF9pZHggPSBbKGksIG1pZF9zZWN0aW9uW2ldKSBmb3IgaSBpbiBpbmRpY2VzXQoKZm9yIHJlbGF0aXZlX2lkeCwgcm93IGluIHNhbXBsZWRfd2l0aF9pZHg6CiAgICBhY3R1YWxfaWR4ID0gc3RhcnRfcm93ICsgcmVsYXRpdmVfaWR4CiAgICByb3dfZWxlbSA9IEVULlN1YkVsZW1lbnQocm9vdCwgInJvdyIsIGluZGV4PXN0cihhY3R1YWxfaWR4KSkKICAgIAogICAgZm9yIGNvbF9pZHgsIGNlbGwgaW4gZW51bWVyYXRlKHJvdyk6CiAgICAgICAgY2VsbF9lbGVtID0gRVQuU3ViRWxlbWVudChyb3dfZWxlbSwgImNlbGwiLCBjb2x1bW49c3RyKGNvbF9pZHgpKQogICAgICAgIGNlbGxfdmFsdWUgPSBjZWxsLnZhbHVlCiAgICAgICAgY2VsbF9lbGVtLnRleHQgPSBzdHIoY2VsbF92YWx1ZSkgaWYgY2VsbF92YWx1ZSBpcyBub3QgTm9uZSBlbHNlICIiCgp0cmVlID0gRVQuRWxlbWVudFRyZWUocm9vdCkKRVQuaW5kZW50KHRyZWUsIHNwYWNlPSIgICIpCnByaW50KCc8P3htbCB2ZXJzaW9uPSIxLjAiIGVuY29kaW5nPSJVVEYtOCI/PicpCnByaW50KEVULnRvc3RyaW5nKHJvb3QsIGVuY29kaW5nPSJ1bmljb2RlIikpCgo=",
9
+ "preview_tail_csv.py": "aW1wb3J0IHN5cwppbXBvcnQgY3N2CmltcG9ydCB4bWwuZXRyZWUuRWxlbWVudFRyZWUgYXMgRVQKZnJvbSBjb2xsZWN0aW9ucyBpbXBvcnQgZGVxdWUKCmZpbGVfcGF0aCA9IHN5cy5hcmd2WzFdCm5fcm93cyA9IGludChzeXMuYXJndlsyXSkgaWYgbGVuKHN5cy5hcmd2KSA+IDIgZWxzZSAxMAoKcm9vdCA9IEVULkVsZW1lbnQoInJvd3MiKQoKd2l0aCBvcGVuKGZpbGVfcGF0aCwgInIiLCBlbmNvZGluZz0idXRmLTgiLCBlcnJvcnM9Imlnbm9yZSIpIGFzIGZoOgogICAgcmVhZGVyID0gY3N2LnJlYWRlcihmaCkKICAgIGFsbF9yb3dzID0gbGlzdChyZWFkZXIpCiAgICB0YWlsX3Jvd3MgPSBhbGxfcm93c1stbl9yb3dzOl0gaWYgbGVuKGFsbF9yb3dzKSA+IG5fcm93cyBlbHNlIGFsbF9yb3dzCiAgICBzdGFydF9pbmRleCA9IGxlbihhbGxfcm93cykgLSBsZW4odGFpbF9yb3dzKQogICAgCiAgICBmb3IgaWR4LCByb3dfZGF0YSBpbiBlbnVtZXJhdGUodGFpbF9yb3dzKToKICAgICAgICByb3dfZWxlbSA9IEVULlN1YkVsZW1lbnQocm9vdCwgInJvdyIsIGluZGV4PXN0cihzdGFydF9pbmRleCArIGlkeCkpCiAgICAgICAgZm9yIGNvbF9pZHgsIGNlbGxfdmFsdWUgaW4gZW51bWVyYXRlKHJvd19kYXRhKToKICAgICAgICAgICAgY2VsbF9lbGVtID0gRVQuU3ViRWxlbWVudChyb3dfZWxlbSwgImNlbGwiLCBjb2x1bW49c3RyKGNvbF9pZHgpKQogICAgICAgICAgICBjZWxsX2VsZW0udGV4dCA9IGNlbGxfdmFsdWUgaWYgY2VsbF92YWx1ZSBlbHNlICIiCgp0cmVlID0gRVQuRWxlbWVudFRyZWUocm9vdCkKRVQuaW5kZW50KHRyZWUsIHNwYWNlPSIgICIpCnByaW50KCc8P3htbCB2ZXJzaW9uPSIxLjAiIGVuY29kaW5nPSJVVEYtOCI/PicpCnByaW50KEVULnRvc3RyaW5nKHJvb3QsIGVuY29kaW5nPSJ1bmljb2RlIikpCgo=",
10
+ "preview_tail_excel.py": "aW1wb3J0IHN5cwppbXBvcnQgb3BlbnB5eGwKaW1wb3J0IHhtbC5ldHJlZS5FbGVtZW50VHJlZSBhcyBFVAoKZmlsZV9wYXRoID0gc3lzLmFyZ3ZbMV0Kbl9yb3dzID0gaW50KHN5cy5hcmd2WzJdKSBpZiBsZW4oc3lzLmFyZ3YpID4gMiBlbHNlIDEwCgp3YiA9IG9wZW5weXhsLmxvYWRfd29ya2Jvb2soZmlsZV9wYXRoLCByZWFkX29ubHk9VHJ1ZSwgZGF0YV9vbmx5PVRydWUpCndzID0gd2Iud29ya3NoZWV0c1swXQoKcm9vdCA9IEVULkVsZW1lbnQoInJvd3MiKQoKYWxsX3Jvd3MgPSBsaXN0KHdzLml0ZXJfcm93cygpKQp0YWlsX3Jvd3MgPSBhbGxfcm93c1stbl9yb3dzOl0gaWYgbGVuKGFsbF9yb3dzKSA+IG5fcm93cyBlbHNlIGFsbF9yb3dzCnN0YXJ0X2luZGV4ID0gbGVuKGFsbF9yb3dzKSAtIGxlbih0YWlsX3Jvd3MpCgpmb3IgaWR4LCByb3cgaW4gZW51bWVyYXRlKHRhaWxfcm93cyk6CiAgICByb3dfZWxlbSA9IEVULlN1YkVsZW1lbnQocm9vdCwgInJvdyIsIGluZGV4PXN0cihzdGFydF9pbmRleCArIGlkeCkpCiAgICAKICAgIGZvciBjb2xfaWR4LCBjZWxsIGluIGVudW1lcmF0ZShyb3cpOgogICAgICAgIGNlbGxfZWxlbSA9IEVULlN1YkVsZW1lbnQocm93X2VsZW0sICJjZWxsIiwgY29sdW1uPXN0cihjb2xfaWR4KSkKICAgICAgICBjZWxsX3ZhbHVlID0gY2VsbC52YWx1ZQogICAgICAgIGNlbGxfZWxlbS50ZXh0ID0gc3RyKGNlbGxfdmFsdWUpIGlmIGNlbGxfdmFsdWUgaXMgbm90IE5vbmUgZWxzZSAiIgoKdHJlZSA9IEVULkVsZW1lbnRUcmVlKHJvb3QpCkVULmluZGVudCh0cmVlLCBzcGFjZT0iICAiKQpwcmludCgnPD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4nKQpwcmludChFVC50b3N0cmluZyhyb290LCBlbmNvZGluZz0idW5pY29kZSIpKQoK"
11
+ });
@@ -1,9 +1,8 @@
1
1
  export declare function readInstantFileStep(params: {
2
- env: any;
2
+ runtime: any;
3
3
  fileId: string;
4
4
  }): Promise<{
5
5
  url: string;
6
6
  contentDisposition?: string;
7
7
  contentBase64: string;
8
8
  }>;
9
- //# sourceMappingURL=steps.d.ts.map
@@ -1,10 +1,13 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.readInstantFileStep = readInstantFileStep;
4
- const runtime_1 = require("@ekairos/events/runtime");
5
- async function readInstantFileStep(params) {
1
+ async function getRuntimeDb(runtime) {
2
+ if (!runtime) {
3
+ throw new Error("Dataset file step requires runtime.");
4
+ }
5
+ const db = runtime.db;
6
+ return typeof db === "function" ? await db.call(runtime) : db;
7
+ }
8
+ export async function readInstantFileStep(params) {
6
9
  "use step";
7
- const db = (await (0, runtime_1.getContextRuntime)(params.env)).db;
10
+ const db = await getRuntimeDb(params.runtime);
8
11
  const fileQuery = await db.query({
9
12
  $files: { $: { where: { id: params.fileId }, limit: 1 } },
10
13
  });
@@ -20,4 +23,3 @@ async function readInstantFileStep(params) {
20
23
  contentBase64: Buffer.from(fileBuffer).toString("base64"),
21
24
  };
22
25
  }
23
- //# sourceMappingURL=steps.js.map
package/dist/id.d.ts ADDED
@@ -0,0 +1 @@
1
+ export declare function createDatasetId(): string;
package/dist/id.js ADDED
@@ -0,0 +1,10 @@
1
+ export function createDatasetId() {
2
+ const uuid = globalThis.crypto?.randomUUID?.();
3
+ if (typeof uuid === "string" && uuid.length > 0)
4
+ return uuid;
5
+ return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
6
+ const r = (Math.random() * 16) | 0;
7
+ const v = c === "x" ? r : (r & 0x3) | 0x8;
8
+ return v.toString(16);
9
+ });
10
+ }
package/dist/index.d.ts CHANGED
@@ -1,7 +1,9 @@
1
- export * from "./dataset";
2
- export * from "./domain";
3
- export * from "./materializeDataset.tool";
4
- export * from "./schema";
5
- export * from "./service";
6
- export * from "./skill";
7
- //# sourceMappingURL=index.d.ts.map
1
+ export * from "./dataset.js";
2
+ export * from "./contextWorkspace.js";
3
+ export * from "./domain.js";
4
+ export * from "./materializeDataset.tool.js";
5
+ export * from "./notation.js";
6
+ export * from "./schema.js";
7
+ export * from "./service.js";
8
+ export { registerFileParseContext } from "./file/file-dataset.agent.js";
9
+ export { registerTransformDatasetContext } from "./transform/index.js";
package/dist/index.js CHANGED
@@ -1,23 +1,9 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
- for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
- };
16
- Object.defineProperty(exports, "__esModule", { value: true });
17
- __exportStar(require("./dataset"), exports);
18
- __exportStar(require("./domain"), exports);
19
- __exportStar(require("./materializeDataset.tool"), exports);
20
- __exportStar(require("./schema"), exports);
21
- __exportStar(require("./service"), exports);
22
- __exportStar(require("./skill"), exports);
23
- //# sourceMappingURL=index.js.map
1
+ export * from "./dataset.js";
2
+ export * from "./contextWorkspace.js";
3
+ export * from "./domain.js";
4
+ export * from "./materializeDataset.tool.js";
5
+ export * from "./notation.js";
6
+ export * from "./schema.js";
7
+ export * from "./service.js";
8
+ export { registerFileParseContext } from "./file/file-dataset.agent.js";
9
+ export { registerTransformDatasetContext } from "./transform/index.js";
@@ -1,31 +1,45 @@
1
1
  import { z } from "zod";
2
2
  import type { DomainSchemaResult } from "@ekairos/domain";
3
+ import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
3
4
  import type { ContextReactor } from "@ekairos/events";
5
+ import { datasetDomain } from "./schema.js";
4
6
  declare const materializeDatasetToolInputSchema: z.ZodObject<{
5
7
  datasetId: z.ZodOptional<z.ZodString>;
6
8
  sandboxId: z.ZodOptional<z.ZodString>;
7
9
  title: z.ZodOptional<z.ZodString>;
8
- sources: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
9
- kind: z.ZodLiteral<"file">;
10
+ context: z.ZodOptional<z.ZodUnion<readonly [z.ZodObject<{
11
+ id: z.ZodString;
12
+ }, z.core.$strip>, z.ZodObject<{
13
+ key: z.ZodString;
14
+ }, z.core.$strip>]>>;
15
+ files: z.ZodOptional<z.ZodArray<z.ZodObject<{
16
+ description: z.ZodOptional<z.ZodString>;
10
17
  fileId: z.ZodString;
18
+ }, z.core.$strip>>>;
19
+ texts: z.ZodOptional<z.ZodArray<z.ZodObject<{
20
+ name: z.ZodOptional<z.ZodString>;
11
21
  description: z.ZodOptional<z.ZodString>;
12
- }, z.core.$strip>, z.ZodObject<{
13
- kind: z.ZodLiteral<"text">;
14
22
  text: z.ZodString;
15
23
  mimeType: z.ZodOptional<z.ZodString>;
16
- name: z.ZodOptional<z.ZodString>;
17
- description: z.ZodOptional<z.ZodString>;
18
- }, z.core.$strip>, z.ZodObject<{
19
- kind: z.ZodLiteral<"dataset">;
24
+ }, z.core.$strip>>>;
25
+ datasets: z.ZodOptional<z.ZodArray<z.ZodObject<{
20
26
  datasetId: z.ZodString;
21
27
  description: z.ZodOptional<z.ZodString>;
22
- }, z.core.$strip>, z.ZodObject<{
23
- kind: z.ZodLiteral<"query">;
24
- query: z.ZodRecord<z.ZodString, z.ZodAny>;
28
+ }, z.core.$strip>>>;
29
+ queries: z.ZodOptional<z.ZodArray<z.ZodObject<{
25
30
  title: z.ZodOptional<z.ZodString>;
31
+ query: z.ZodRecord<z.ZodString, z.ZodAny>;
26
32
  explanation: z.ZodOptional<z.ZodString>;
27
- }, z.core.$strip>], "kind">>;
33
+ }, z.core.$strip>>>;
28
34
  instructions: z.ZodOptional<z.ZodString>;
35
+ mode: z.ZodOptional<z.ZodEnum<{
36
+ schema: "schema";
37
+ auto: "auto";
38
+ }>>;
39
+ output: z.ZodOptional<z.ZodEnum<{
40
+ object: "object";
41
+ rows: "rows";
42
+ }>>;
29
43
  schema: z.ZodOptional<z.ZodObject<{
30
44
  title: z.ZodOptional<z.ZodString>;
31
45
  description: z.ZodOptional<z.ZodString>;
@@ -33,38 +47,45 @@ declare const materializeDatasetToolInputSchema: z.ZodObject<{
33
47
  }, z.core.$strip>>;
34
48
  first: z.ZodOptional<z.ZodBoolean>;
35
49
  }, z.core.$strip>;
36
- export declare function createMaterializeDatasetTool<Env extends {
37
- orgId: string;
38
- }>(params: {
39
- env: Env;
50
+ type AnyMaterializeDatasetRuntime = EkairosRuntime<any, any, any>;
51
+ type MaterializeDatasetRuntimeHandle<Runtime extends AnyMaterializeDatasetRuntime> = RuntimeForDomain<Runtime, typeof datasetDomain>;
52
+ type CompatibleToolQueryDomain<Runtime extends AnyMaterializeDatasetRuntime, QueryDomain extends DomainSchemaResult> = RuntimeForDomain<Runtime, QueryDomain> extends never ? never : QueryDomain;
53
+ export declare function createMaterializeDatasetTool<Runtime extends AnyMaterializeDatasetRuntime, QueryDomain extends DomainSchemaResult>(params: {
54
+ runtime: Runtime & MaterializeDatasetRuntimeHandle<Runtime>;
40
55
  reactor?: ContextReactor<any, any>;
41
- queryDomain: DomainSchemaResult;
56
+ queryDomain: QueryDomain & CompatibleToolQueryDomain<Runtime, QueryDomain>;
42
57
  toolName?: string;
43
58
  }): import("ai").Tool<{
44
- sources: ({
45
- kind: "file";
59
+ datasetId?: string | undefined;
60
+ sandboxId?: string | undefined;
61
+ title?: string | undefined;
62
+ context?: {
63
+ id: string;
64
+ } | {
65
+ key: string;
66
+ } | undefined;
67
+ files?: {
46
68
  fileId: string;
47
69
  description?: string | undefined;
48
- } | {
49
- kind: "text";
70
+ }[] | undefined;
71
+ texts?: {
50
72
  text: string;
51
- mimeType?: string | undefined;
52
73
  name?: string | undefined;
53
74
  description?: string | undefined;
54
- } | {
55
- kind: "dataset";
75
+ mimeType?: string | undefined;
76
+ }[] | undefined;
77
+ datasets?: {
56
78
  datasetId: string;
57
79
  description?: string | undefined;
58
- } | {
59
- kind: "query";
80
+ }[] | undefined;
81
+ queries?: {
60
82
  query: Record<string, any>;
61
83
  title?: string | undefined;
62
84
  explanation?: string | undefined;
63
- })[];
64
- datasetId?: string | undefined;
65
- sandboxId?: string | undefined;
66
- title?: string | undefined;
85
+ }[] | undefined;
67
86
  instructions?: string | undefined;
87
+ mode?: "schema" | "auto" | undefined;
88
+ output?: "object" | "rows" | undefined;
68
89
  schema?: {
69
90
  schema: any;
70
91
  title?: string | undefined;
@@ -75,4 +96,3 @@ export declare function createMaterializeDatasetTool<Env extends {
75
96
  datasetId: string;
76
97
  }>;
77
98
  export { materializeDatasetToolInputSchema };
78
- //# sourceMappingURL=materializeDataset.tool.d.ts.map