@ekairos/dataset 1.22.39-beta.development.0 → 1.22.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +347 -0
- package/dist/agents.d.ts +8 -0
- package/dist/agents.js +8 -0
- package/dist/builder/agentMaterializers.d.ts +9 -0
- package/dist/builder/agentMaterializers.js +10 -0
- package/dist/builder/context.d.ts +15 -0
- package/dist/builder/context.js +251 -0
- package/dist/builder/instructions.d.ts +5 -0
- package/dist/builder/instructions.js +40 -0
- package/dist/builder/materialize.d.ts +83 -0
- package/dist/builder/materialize.js +548 -0
- package/dist/builder/materializeQuery.d.ts +12 -0
- package/dist/builder/materializeQuery.js +31 -0
- package/dist/builder/persistence.d.ts +22 -0
- package/dist/builder/persistence.js +192 -0
- package/dist/builder/rows.d.ts +7 -0
- package/dist/builder/rows.js +56 -0
- package/dist/builder/schemaInference.d.ts +3 -0
- package/dist/builder/schemaInference.js +61 -0
- package/dist/builder/types.d.ts +144 -0
- package/dist/builder/types.js +1 -0
- package/dist/clearDataset.tool.d.ts +2 -3
- package/dist/clearDataset.tool.js +13 -17
- package/dist/completeDataset.steps.d.ts +117 -0
- package/dist/completeDataset.steps.js +537 -0
- package/dist/completeDataset.tool.d.ts +132 -7
- package/dist/completeDataset.tool.js +46 -192
- package/dist/contextResources.d.ts +31 -0
- package/dist/contextResources.js +151 -0
- package/dist/contextWorkspace.d.ts +79 -0
- package/dist/contextWorkspace.js +234 -0
- package/dist/dataset/steps.d.ts +39 -15
- package/dist/dataset/steps.js +96 -39
- package/dist/dataset.d.ts +3 -67
- package/dist/dataset.js +129 -521
- package/dist/datasetFiles.d.ts +5 -1
- package/dist/datasetFiles.js +29 -27
- package/dist/defineNotation.tool.d.ts +49 -0
- package/dist/defineNotation.tool.js +154 -0
- package/dist/domain.d.ts +1 -2
- package/dist/domain.js +1 -6
- package/dist/executeCommand.tool.d.ts +2 -30
- package/dist/executeCommand.tool.js +165 -39
- package/dist/file/file-dataset.agent.d.ts +19 -56
- package/dist/file/file-dataset.agent.js +181 -134
- package/dist/file/file-dataset.steps.d.ts +27 -0
- package/dist/file/file-dataset.steps.js +47 -0
- package/dist/file/file-dataset.types.d.ts +64 -0
- package/dist/file/file-dataset.types.js +1 -0
- package/dist/file/filepreview.d.ts +5 -35
- package/dist/file/filepreview.js +60 -107
- package/dist/file/filepreview.types.d.ts +31 -0
- package/dist/file/filepreview.types.js +1 -0
- package/dist/file/generateSchema.tool.d.ts +2 -3
- package/dist/file/generateSchema.tool.js +11 -15
- package/dist/file/index.d.ts +1 -2
- package/dist/file/index.js +1 -18
- package/dist/file/prompts.d.ts +2 -3
- package/dist/file/prompts.js +152 -32
- package/dist/file/scripts.generated.d.ts +1 -0
- package/dist/file/scripts.generated.js +11 -0
- package/dist/file/steps.d.ts +1 -2
- package/dist/file/steps.js +9 -7
- package/dist/id.d.ts +1 -0
- package/dist/id.js +10 -0
- package/dist/index.d.ts +9 -7
- package/dist/index.js +9 -23
- package/dist/materializeDataset.tool.d.ts +51 -31
- package/dist/materializeDataset.tool.js +81 -65
- package/dist/notation.d.ts +205 -0
- package/dist/notation.js +424 -0
- package/dist/query/index.d.ts +1 -2
- package/dist/query/index.js +1 -18
- package/dist/query/queryDomain.d.ts +3 -4
- package/dist/query/queryDomain.js +3 -40
- package/dist/query/queryDomain.step.d.ts +1 -1
- package/dist/query/queryDomain.step.js +24 -13
- package/dist/sandbox/steps.d.ts +23 -15
- package/dist/sandbox/steps.js +73 -76
- package/dist/sandbox.steps.d.ts +1 -2
- package/dist/sandbox.steps.js +1 -18
- package/dist/schema.d.ts +15 -13
- package/dist/schema.js +27 -37
- package/dist/service.d.ts +12 -5
- package/dist/service.js +88 -15
- package/dist/skill.d.ts +0 -1
- package/dist/skill.js +12 -17
- package/dist/transform/filepreview.d.ts +2 -3
- package/dist/transform/filepreview.js +9 -26
- package/dist/transform/index.d.ts +2 -3
- package/dist/transform/index.js +2 -8
- package/dist/transform/prompts.d.ts +1 -34
- package/dist/transform/prompts.js +66 -46
- package/dist/transform/transform-dataset.agent.d.ts +20 -45
- package/dist/transform/transform-dataset.agent.js +151 -91
- package/dist/transform/transform-dataset.steps.d.ts +30 -0
- package/dist/transform/transform-dataset.steps.js +61 -0
- package/dist/transform/transform-dataset.types.d.ts +95 -0
- package/dist/transform/transform-dataset.types.js +1 -0
- package/dist/transform/transformDataset.d.ts +3 -3
- package/dist/transform/transformDataset.js +15 -18
- package/dist/writeDatasetRows.tool.d.ts +188 -0
- package/dist/writeDatasetRows.tool.js +258 -0
- package/package.json +33 -8
- package/dist/clearDataset.tool.d.ts.map +0 -1
- package/dist/clearDataset.tool.js.map +0 -1
- package/dist/completeDataset.tool.d.ts.map +0 -1
- package/dist/completeDataset.tool.js.map +0 -1
- package/dist/dataset/steps.d.ts.map +0 -1
- package/dist/dataset/steps.js.map +0 -1
- package/dist/dataset.d.ts.map +0 -1
- package/dist/dataset.js.map +0 -1
- package/dist/datasetFiles.d.ts.map +0 -1
- package/dist/datasetFiles.js.map +0 -1
- package/dist/domain.d.ts.map +0 -1
- package/dist/domain.js.map +0 -1
- package/dist/eventsReactRuntime.d.ts +0 -22
- package/dist/eventsReactRuntime.d.ts.map +0 -1
- package/dist/eventsReactRuntime.js +0 -29
- package/dist/eventsReactRuntime.js.map +0 -1
- package/dist/executeCommand.tool.d.ts.map +0 -1
- package/dist/executeCommand.tool.js.map +0 -1
- package/dist/file/file-dataset.agent.d.ts.map +0 -1
- package/dist/file/file-dataset.agent.js.map +0 -1
- package/dist/file/filepreview.d.ts.map +0 -1
- package/dist/file/filepreview.js.map +0 -1
- package/dist/file/generateSchema.tool.d.ts.map +0 -1
- package/dist/file/generateSchema.tool.js.map +0 -1
- package/dist/file/index.d.ts.map +0 -1
- package/dist/file/index.js.map +0 -1
- package/dist/file/prompts.d.ts.map +0 -1
- package/dist/file/prompts.js.map +0 -1
- package/dist/file/steps.d.ts.map +0 -1
- package/dist/file/steps.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/materializeDataset.tool.d.ts.map +0 -1
- package/dist/materializeDataset.tool.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/queryDomain.d.ts.map +0 -1
- package/dist/query/queryDomain.js.map +0 -1
- package/dist/query/queryDomain.step.d.ts.map +0 -1
- package/dist/query/queryDomain.step.js.map +0 -1
- package/dist/sandbox/steps.d.ts.map +0 -1
- package/dist/sandbox/steps.js.map +0 -1
- package/dist/sandbox.steps.d.ts.map +0 -1
- package/dist/sandbox.steps.js.map +0 -1
- package/dist/schema.d.ts.map +0 -1
- package/dist/schema.js.map +0 -1
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js.map +0 -1
- package/dist/skill.d.ts.map +0 -1
- package/dist/skill.js.map +0 -1
- package/dist/transform/filepreview.d.ts.map +0 -1
- package/dist/transform/filepreview.js.map +0 -1
- package/dist/transform/index.d.ts.map +0 -1
- package/dist/transform/index.js.map +0 -1
- package/dist/transform/prompts.d.ts.map +0 -1
- package/dist/transform/prompts.js.map +0 -1
- package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
- package/dist/transform/transform-dataset.agent.js.map +0 -1
- package/dist/transform/transformDataset.d.ts.map +0 -1
- package/dist/transform/transformDataset.js.map +0 -1
package/dist/file/prompts.js
CHANGED
|
@@ -1,26 +1,23 @@
|
|
|
1
|
-
"use strict";
|
|
2
1
|
// Plain build API using template literals and XML
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
const xmlbuilder2_1 = require("xmlbuilder2");
|
|
6
|
-
const datasetFiles_1 = require("../datasetFiles");
|
|
2
|
+
import { create } from "xmlbuilder2";
|
|
3
|
+
import { getDatasetWorkstation, getDatasetOutputPath } from "../datasetFiles.js";
|
|
7
4
|
function buildRole() {
|
|
8
|
-
let xml =
|
|
5
|
+
let xml = create()
|
|
9
6
|
.ele("Role")
|
|
10
7
|
.txt("You are a dataset creator for a SINGLE file. Your goal is to convert the file content into a validated JSONL dataset where each line represents one record.")
|
|
11
8
|
.up();
|
|
12
9
|
return xml.end({ prettyPrint: true, headless: true });
|
|
13
10
|
}
|
|
14
11
|
function buildGoal() {
|
|
15
|
-
let xml =
|
|
12
|
+
let xml = create()
|
|
16
13
|
.ele("Goal")
|
|
17
|
-
.txt("Convert the
|
|
14
|
+
.txt("Convert the input file into a validated JSONL dataset (output.jsonl) where each line is a JSON object conforming to a generated schema. The schema describes ONE data record structure. Extract ONLY data records; exclude any header sections, metadata, or summary information from the file.")
|
|
18
15
|
.up();
|
|
19
16
|
return xml.end({ prettyPrint: true, headless: true });
|
|
20
17
|
}
|
|
21
|
-
function
|
|
22
|
-
let xml =
|
|
23
|
-
.ele("
|
|
18
|
+
function buildResourceInfo(context) {
|
|
19
|
+
let xml = create()
|
|
20
|
+
.ele("FileResource")
|
|
24
21
|
.ele("Type").txt("file").up()
|
|
25
22
|
.ele("FileId").txt(context.fileId).up()
|
|
26
23
|
.ele("DatasetId").txt(context.datasetId).up()
|
|
@@ -29,7 +26,7 @@ function buildSourceInfo(context) {
|
|
|
29
26
|
return xml;
|
|
30
27
|
}
|
|
31
28
|
function buildFilePreviewSection(preview) {
|
|
32
|
-
let xml =
|
|
29
|
+
let xml = create()
|
|
33
30
|
.ele("FilePreview")
|
|
34
31
|
.ele("TotalRows").txt(String(preview.totalRows)).up();
|
|
35
32
|
if (preview.metadata) {
|
|
@@ -91,8 +88,9 @@ function buildErrorsSection(errors) {
|
|
|
91
88
|
if (errors.length === 0) {
|
|
92
89
|
return null;
|
|
93
90
|
}
|
|
94
|
-
let xml =
|
|
95
|
-
.ele("PreviousErrors")
|
|
91
|
+
let xml = create()
|
|
92
|
+
.ele("PreviousErrors")
|
|
93
|
+
.ele("Instruction").txt("Treat these as repair feedback from the previous validation attempt. Rewrite output.jsonl from the schema contract; do not patch input column names into schema keys piecemeal.").up();
|
|
96
94
|
for (const error of errors) {
|
|
97
95
|
xml = xml.ele("Error").txt(error).up();
|
|
98
96
|
}
|
|
@@ -100,10 +98,10 @@ function buildErrorsSection(errors) {
|
|
|
100
98
|
return xml;
|
|
101
99
|
}
|
|
102
100
|
function buildContextSection(context) {
|
|
103
|
-
let xml =
|
|
101
|
+
let xml = create()
|
|
104
102
|
.ele("Context");
|
|
105
|
-
const
|
|
106
|
-
xml = xml.import(
|
|
103
|
+
const resourceXml = buildResourceInfo(context);
|
|
104
|
+
xml = xml.import(resourceXml.first());
|
|
107
105
|
if (context.filePreview) {
|
|
108
106
|
const previewXml = buildFilePreviewSection(context.filePreview);
|
|
109
107
|
xml = xml.import(previewXml.first());
|
|
@@ -117,46 +115,159 @@ function buildContextSection(context) {
|
|
|
117
115
|
xml = xml.up();
|
|
118
116
|
return xml.end({ prettyPrint: true, headless: true });
|
|
119
117
|
}
|
|
118
|
+
function asRecord(value) {
|
|
119
|
+
return value && typeof value === "object" && !Array.isArray(value)
|
|
120
|
+
? value
|
|
121
|
+
: null;
|
|
122
|
+
}
|
|
123
|
+
function getSchemaObject(context) {
|
|
124
|
+
return asRecord(context.schema?.schema);
|
|
125
|
+
}
|
|
126
|
+
function joinSchemaPath(basePath, key) {
|
|
127
|
+
return basePath === "$" ? `$.${key}` : `${basePath}.${key}`;
|
|
128
|
+
}
|
|
129
|
+
function collectSchemaContract(schema, path = "$", contract = {
|
|
130
|
+
requiredPaths: [],
|
|
131
|
+
propertyPaths: [],
|
|
132
|
+
enumConstraints: [],
|
|
133
|
+
closedObjectPaths: [],
|
|
134
|
+
}) {
|
|
135
|
+
const record = asRecord(schema);
|
|
136
|
+
if (!record) {
|
|
137
|
+
return contract;
|
|
138
|
+
}
|
|
139
|
+
if (Array.isArray(record.enum)) {
|
|
140
|
+
contract.enumConstraints.push({
|
|
141
|
+
path,
|
|
142
|
+
values: record.enum.map((value) => JSON.stringify(value)),
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
const properties = asRecord(record.properties);
|
|
146
|
+
if (properties) {
|
|
147
|
+
if (record.additionalProperties === false) {
|
|
148
|
+
contract.closedObjectPaths.push(path);
|
|
149
|
+
}
|
|
150
|
+
const required = Array.isArray(record.required)
|
|
151
|
+
? record.required.filter((value) => typeof value === "string")
|
|
152
|
+
: [];
|
|
153
|
+
for (const key of required) {
|
|
154
|
+
contract.requiredPaths.push(joinSchemaPath(path, key));
|
|
155
|
+
}
|
|
156
|
+
for (const [key, childSchema] of Object.entries(properties)) {
|
|
157
|
+
const childPath = joinSchemaPath(path, key);
|
|
158
|
+
contract.propertyPaths.push(childPath);
|
|
159
|
+
collectSchemaContract(childSchema, childPath, contract);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
if (record.items) {
|
|
163
|
+
collectSchemaContract(record.items, `${path}[]`, contract);
|
|
164
|
+
}
|
|
165
|
+
for (const keyword of ["oneOf", "anyOf", "allOf"]) {
|
|
166
|
+
if (Array.isArray(record[keyword])) {
|
|
167
|
+
for (const childSchema of record[keyword]) {
|
|
168
|
+
collectSchemaContract(childSchema, path, contract);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return contract;
|
|
173
|
+
}
|
|
174
|
+
function appendLimitedList(xml, elementName, itemName, values, maxItems) {
|
|
175
|
+
let node = xml.ele(elementName);
|
|
176
|
+
for (const value of values.slice(0, maxItems)) {
|
|
177
|
+
node = node.ele(itemName).txt(value).up();
|
|
178
|
+
}
|
|
179
|
+
if (values.length > maxItems) {
|
|
180
|
+
node = node.ele("Truncated").txt(String(values.length - maxItems)).up();
|
|
181
|
+
}
|
|
182
|
+
return node.up();
|
|
183
|
+
}
|
|
120
184
|
function buildSchemaSection(context) {
|
|
121
|
-
|
|
185
|
+
const schema = getSchemaObject(context);
|
|
186
|
+
if (!context.schema || !schema) {
|
|
122
187
|
return "";
|
|
123
188
|
}
|
|
124
|
-
|
|
189
|
+
const contract = collectSchemaContract(schema);
|
|
190
|
+
let xml = create()
|
|
125
191
|
.com("Schema section: This defines the structure of ONE RECORD (row). Each line in the JSONL output must conform to this schema.")
|
|
126
192
|
.ele("Schema")
|
|
127
193
|
.ele("Title").txt(context.schema.title || "").up()
|
|
128
|
-
.ele("Description").txt(context.schema.description || "").up()
|
|
129
|
-
|
|
194
|
+
.ele("Description").txt(context.schema.description || "").up();
|
|
195
|
+
xml = xml
|
|
196
|
+
.ele("SchemaContract")
|
|
197
|
+
.ele("Purpose").txt("Compact output contract derived from JSON Schema. Use this before writing output.jsonl.").up()
|
|
198
|
+
.ele("Rule").txt("Use only schema property keys in data objects. Input headers are input labels, not output keys.").up()
|
|
199
|
+
.ele("Rule").txt("Required paths are required everywhere, including nested objects and array items.").up()
|
|
200
|
+
.ele("Rule").txt("Enum fields must use exactly one of the listed literal values. Normalize input labels to the closest valid enum literal; never emit a value outside the enum.").up();
|
|
201
|
+
xml = appendLimitedList(xml, "RequiredPaths", "Path", contract.requiredPaths, 120);
|
|
202
|
+
xml = appendLimitedList(xml, "PropertyPaths", "Path", contract.propertyPaths, 160);
|
|
203
|
+
let enumsXml = xml.ele("EnumConstraints");
|
|
204
|
+
for (const constraint of contract.enumConstraints.slice(0, 80)) {
|
|
205
|
+
let enumXml = enumsXml.ele("Enum", { path: constraint.path });
|
|
206
|
+
for (const value of constraint.values.slice(0, 80)) {
|
|
207
|
+
enumXml = enumXml.ele("Value").txt(value).up();
|
|
208
|
+
}
|
|
209
|
+
if (constraint.values.length > 80) {
|
|
210
|
+
enumXml = enumXml.ele("Truncated").txt(String(constraint.values.length - 80)).up();
|
|
211
|
+
}
|
|
212
|
+
enumsXml = enumXml.up();
|
|
213
|
+
}
|
|
214
|
+
if (contract.enumConstraints.length > 80) {
|
|
215
|
+
enumsXml = enumsXml.ele("Truncated").txt(String(contract.enumConstraints.length - 80)).up();
|
|
216
|
+
}
|
|
217
|
+
xml = enumsXml.up();
|
|
218
|
+
xml = appendLimitedList(xml, "ClosedObjectPaths", "Path", contract.closedObjectPaths, 80);
|
|
219
|
+
xml = xml
|
|
220
|
+
.up()
|
|
221
|
+
.ele("JsonSchema").txt(JSON.stringify(schema, null, 2)).up()
|
|
130
222
|
.up();
|
|
131
223
|
return xml.end({ prettyPrint: true, headless: true });
|
|
132
224
|
}
|
|
133
225
|
function buildInstructions(context) {
|
|
134
|
-
const datasetWorkstation =
|
|
135
|
-
|
|
226
|
+
const datasetWorkstation = context.sandboxConfig.scriptsDir
|
|
227
|
+
? context.sandboxConfig.scriptsDir.replace(/\/scripts$/, "")
|
|
228
|
+
: getDatasetWorkstation(context.datasetId);
|
|
229
|
+
const outputPath = context.sandboxConfig.outputPath ?? getDatasetOutputPath(context.datasetId);
|
|
136
230
|
const hasProvidedSchema = Boolean(context.schema?.schema);
|
|
137
231
|
const currentTask = hasProvidedSchema
|
|
138
232
|
? "Review FilePreview section, use the provided schema as the output contract, then parse the file and generate the dataset"
|
|
139
233
|
: "Review FilePreview section to understand file structure, then generate JSON Schema for a SINGLE RECORD, then parse the file and generate the dataset";
|
|
140
|
-
let xml =
|
|
234
|
+
let xml = create()
|
|
141
235
|
.ele("Instructions")
|
|
142
236
|
.ele("Workflow")
|
|
143
237
|
.ele("Step", { number: "1", name: "Inspect File" })
|
|
144
238
|
.ele("Action").txt("Review the FilePreview section in Context to understand the file structure").up()
|
|
145
239
|
.ele("Note").txt("FilePreview contains: TotalRows (total data rows), Metadata (file properties with JSON output), Head (first N raw file lines), Tail (last N lines if present), Mid (middle sample for large files). Each section shows Description, Script (full Python code), Command, Stdout (raw content), Stderr. This allows you to understand the exact file format.").up()
|
|
146
240
|
.up();
|
|
241
|
+
xml = xml
|
|
242
|
+
.ele("Step", { number: "2", name: "Define the Dataset (PLAN FIRST)" })
|
|
243
|
+
.ele("Action").txt("Call defineNotation with the INITIAL formal definition of the dataset as a set, derived from the file preview: D = { r | r ∈ File ∧ <constraints> } in LaTeX, the symbols it binds (sets, variables, functions) and the predicates the set satisfies").up()
|
|
244
|
+
.ele("Requirements")
|
|
245
|
+
.ele("Requirement").txt("The definition and the materialization (schema + parsing code + rows) are TWO CO-EQUAL FACES of the dataset. The definition is the dataset stated intensionally — author it FIRST; it is your PLAN and the code is built to realize it").up()
|
|
246
|
+
.ele("Requirement").txt("Use set-builder notation, quantifiers and arithmetic in LaTeX (e.g. D = \\{(c, q, p) \\mid q \\in \\mathbb{Z}^{+},\\; p \\in \\mathbb{R}_{\\geq 0}\\})").up()
|
|
247
|
+
.ele("Requirement").txt("Declare every discovered set and variable as a symbol with a one-line meaning").up()
|
|
248
|
+
.ele("Requirement").txt("Predicates are formal claims we trust; they may be semantic (e.g. 'x es una frase relevante'). Only for the few that are purely arithmetic (row counts, field types, ranges, uniqueness, aggregates) you MAY add a checkJson for optional advisory evidence — leave every other claim without checkJson").up()
|
|
249
|
+
.ele("Requirement").txt("REFINE: every time the analysis discovers a new set, variable, constraint or correction (new columns, unexpected types, excluded sections), call defineNotation again with the updated definition and the reason. The definition is not fixed up front — discovery is the point").up()
|
|
250
|
+
.ele("Requirement").txt("Before calling completeDataset, call defineNotation one last time with final=true so the definition becomes the RESULT — it describes EXACTLY the dataset you produced; any arithmetic predicates get optional advisory evidence afterwards (never a pass/fail verdict — the dataset's validity is trusted)").up()
|
|
251
|
+
.up()
|
|
252
|
+
.up();
|
|
147
253
|
if (hasProvidedSchema) {
|
|
148
254
|
xml = xml
|
|
149
|
-
.ele("Step", { number: "
|
|
255
|
+
.ele("Step", { number: "3", name: "Use Provided Schema" })
|
|
150
256
|
.ele("Action").txt("Use the provided schema as the output contract for every row in output.jsonl").up()
|
|
151
257
|
.ele("Requirements")
|
|
152
258
|
.ele("Requirement").txt("Every output row must conform exactly to the provided schema").up()
|
|
259
|
+
.ele("Requirement").txt("Every data object MUST use the exact property names from the provided JSON Schema required/properties keys").up()
|
|
260
|
+
.ele("Requirement").txt("Build a schema-first mapping from input columns to schema fields before writing output.jsonl. Do not use raw input headers as JSON keys unless they are exactly schema keys").up()
|
|
261
|
+
.ele("Requirement").txt("For nested required fields, populate the required child keys inside each nested object or array item; top-level validity is not enough").up()
|
|
262
|
+
.ele("Requirement").txt("For enum fields, emit exactly one allowed enum literal from SchemaContract; normalize labels or abbreviations into allowed literals").up()
|
|
263
|
+
.ele("Requirement").txt("Do not translate, localize, rename, camelize differently, or infer alternative field names. Field names are a technical contract; only field values may preserve the input language").up()
|
|
153
264
|
.ele("Requirement").txt("Do not call generateSchema when a schema is already provided").up()
|
|
154
265
|
.up()
|
|
155
266
|
.up();
|
|
156
267
|
}
|
|
157
268
|
else {
|
|
158
269
|
xml = xml
|
|
159
|
-
.ele("Step", { number: "
|
|
270
|
+
.ele("Step", { number: "3", name: "Generate JSON Schema" })
|
|
160
271
|
.ele("Action").txt("Call generateSchema to create a JSON Schema for a SINGLE DATA RECORD (one row of data)").up()
|
|
161
272
|
.ele("Requirements")
|
|
162
273
|
.ele("Requirement").txt("Schema describes ONE DATA RECORD structure only (type: object, not array)").up()
|
|
@@ -168,23 +279,28 @@ function buildInstructions(context) {
|
|
|
168
279
|
.up();
|
|
169
280
|
}
|
|
170
281
|
xml = xml
|
|
171
|
-
.ele("Step", { number: "
|
|
282
|
+
.ele("Step", { number: "4", name: "Generate Dataset JSONL" })
|
|
172
283
|
.ele("Action").txt(`Use executeCommand to parse the file and generate output.jsonl in the dataset workstation`).up()
|
|
173
284
|
.ele("Requirements")
|
|
174
285
|
.ele("Requirement").txt("Parse ALL data rows/records from the file (exclude header sections and metadata)").up()
|
|
175
286
|
.ele("Requirement").txt("Output JSONL format: each line is {\"type\": \"row\", \"data\": {...record...}}").up()
|
|
287
|
+
.ele("Requirement").txt("When a schema is provided, each data object must contain the exact required schema keys and must not use translated or synonymous keys").up()
|
|
288
|
+
.ele("Requirement").txt("When validation returns zero valid rows, treat the previous output as structurally wrong and rewrite output.jsonl from the SchemaContract, not by applying small patches").up()
|
|
176
289
|
.ele("Requirement").txt("Extract ONLY data records; skip any header lines, summary sections, or file metadata").up()
|
|
177
290
|
.ele("Requirement").txt(`Save output to: ${outputPath}`).up()
|
|
178
291
|
.ele("Requirement").txt("Use descriptive scriptName in snake_case (e.g., 'parse_csv_to_jsonl')").up()
|
|
179
292
|
.up()
|
|
180
293
|
.up()
|
|
181
|
-
.ele("Step", { number: "
|
|
182
|
-
.ele("Action").txt("Call completeDataset to validate the dataset").up()
|
|
183
|
-
.ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns
|
|
294
|
+
.ele("Step", { number: "5", name: "Complete and Validate" })
|
|
295
|
+
.ele("Action").txt("Call defineNotation with final=true (the definition as RESULT, matching the produced rows), then call completeDataset to validate the dataset").up()
|
|
296
|
+
.ele("Behavior").txt("Validates that output.jsonl exists and all records conform to the schema stored in database. Returns success:false with validation details if validation fails. If validation fails, inspect validation errors, rewrite output.jsonl, and call completeDataset again. Do not stop until completeDataset returns success:true.").up()
|
|
184
297
|
.up()
|
|
185
298
|
.up()
|
|
186
299
|
.ele("Rules")
|
|
300
|
+
.ele("Rule").txt("The formal definition (defineNotation) and the materialization (schema + code + rows) are co-equal faces of the dataset: author the definition first as the PLAN, refine it on every discovery, finalize it as the RESULT before completion").up()
|
|
187
301
|
.ele("Rule").txt("Schema defines ONE DATA RECORD structure (not array, not header)").up()
|
|
302
|
+
.ele("Rule").txt("Schema property names are authoritative. Never translate or rename keys such as itemName, quantity, or unit into the input language").up()
|
|
303
|
+
.ele("Rule").txt("Original/input language applies to extracted values only, not to JSON object keys").up()
|
|
188
304
|
.ele("Rule").txt("Datasets contain ONLY data records; exclude all header sections and file metadata").up()
|
|
189
305
|
.ele("Rule").txt("JSONL format: each line = separate JSON object representing one data record").up()
|
|
190
306
|
.ele("Rule").txt("FilePreview shows raw file content - use Script to understand data extraction").up()
|
|
@@ -197,7 +313,7 @@ function buildInstructions(context) {
|
|
|
197
313
|
.up();
|
|
198
314
|
return xml.end({ prettyPrint: true, headless: true });
|
|
199
315
|
}
|
|
200
|
-
function buildFileDatasetPrompt(context) {
|
|
316
|
+
export function buildFileDatasetPrompt(context) {
|
|
201
317
|
const sections = [];
|
|
202
318
|
sections.push(buildRole());
|
|
203
319
|
sections.push("");
|
|
@@ -205,7 +321,11 @@ function buildFileDatasetPrompt(context) {
|
|
|
205
321
|
sections.push("");
|
|
206
322
|
sections.push(buildContextSection(context));
|
|
207
323
|
sections.push("");
|
|
324
|
+
const schemaSection = buildSchemaSection(context);
|
|
325
|
+
if (schemaSection) {
|
|
326
|
+
sections.push(schemaSection);
|
|
327
|
+
sections.push("");
|
|
328
|
+
}
|
|
208
329
|
sections.push(buildInstructions(context));
|
|
209
330
|
return sections.join("\n");
|
|
210
331
|
}
|
|
211
|
-
//# sourceMappingURL=prompts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const PYTHON_SCRIPT_BASE64_BY_NAME: Readonly<Record<string, string>>;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
// Generated by packages/dataset/scripts/generate-python-scripts-module.js.
|
|
2
|
+
// Do not edit by hand.
|
|
3
|
+
export const PYTHON_SCRIPT_BASE64_BY_NAME = Object.freeze({
|
|
4
|
+
"file_metadata.py": "aW1wb3J0IHN5cwppbXBvcnQgb3MKaW1wb3J0IGpzb24KaW1wb3J0IGNzdgoKZmlsZV9wYXRoID0gc3lzLmFyZ3ZbMV0KCmluZm8gPSB7CiAgICAiZmlsZV9uYW1lIjogb3MucGF0aC5iYXNlbmFtZShmaWxlX3BhdGgpLAogICAgImV4dGVuc2lvbiI6IG9zLnBhdGguc3BsaXRleHQoZmlsZV9wYXRoKVsxXS5sb3dlcigpLAogICAgInNpemVfYnl0ZXMiOiBvcy5wYXRoLmdldHNpemUoZmlsZV9wYXRoKSBpZiBvcy5wYXRoLmV4aXN0cyhmaWxlX3BhdGgpIGVsc2UgTm9uZQp9CgppZiBpbmZvWyJzaXplX2J5dGVzIl0gaXMgbm90IE5vbmU6CiAgICB1bml0cyA9IFsiQiIsICJLQiIsICJNQiIsICJHQiIsICJUQiIsICJQQiJdCiAgICBzaXplID0gZmxvYXQoaW5mb1sic2l6ZV9ieXRlcyJdKQogICAgaWR4ID0gMAogICAgd2hpbGUgc2l6ZSA+PSAxMDI0IGFuZCBpZHggPCBsZW4odW5pdHMpIC0gMToKICAgICAgICBzaXplIC89IDEwMjQKICAgICAgICBpZHggKz0gMQogICAgaW5mb1sic2l6ZV9odW1hbiJdID0gZiJ7c2l6ZTouMmZ9IHt1bml0c1tpZHhdfSIKCnJvd19lc3RpbWF0ZSA9IE5vbmUKY29sdW1uX2VzdGltYXRlID0gTm9uZQpoZWFkZXJfcHJldmlldyA9IE5vbmUKCnRyeToKICAgIGlmIGluZm9bImV4dGVuc2lvbiJdIGluIFsiLnhsc3giLCAiLnhscyJdOgogICAgICAgIGltcG9ydCBvcGVucHl4bAogICAgICAgIHdiID0gb3BlbnB5eGwubG9hZF93b3JrYm9vayhmaWxlX3BhdGgsIHJlYWRfb25seT1UcnVlLCBkYXRhX29ubHk9RmFsc2UpCiAgICAgICAgc2hlZXRzID0gW10KICAgICAgICBmaXJzdF9zaGVldF9yb3dzID0gMAogICAgICAgIGZpcnN0X3NoZWV0X2NvbHVtbnMgPSAwCiAgICAgICAgZm9yIGlkeCwgd3MgaW4gZW51bWVyYXRlKHdiLndvcmtzaGVldHMpOgogICAgICAgICAgICByb3dzID0gd3MubWF4X3JvdyBvciAwCiAgICAgICAgICAgIGNvbHMgPSB3cy5tYXhfY29sdW1uIG9yIDAKICAgICAgICAgICAgaWYgaWR4ID09IDA6CiAgICAgICAgICAgICAgICBmaXJzdF9zaGVldF9yb3dzID0gcm93cwogICAgICAgICAgICAgICAgZmlyc3Rfc2hlZXRfY29sdW1ucyA9IGNvbHMKICAgICAgICAgICAgc2hlZXRfaW5mbyA9IHsibmFtZSI6IHdzLnRpdGxlLCAicm93cyI6IHJvd3MsICJjb2x1bW5zIjogY29sc30KICAgICAgICAgICAgc2hlZXRzLmFwcGVuZChzaGVldF9pbmZvKQogICAgICAgIGluZm9bInNoZWV0X3N0YXRzIl0gPSBzaGVldHMKICAgICAgICByb3dfZXN0aW1hdGUgPSBmaXJzdF9zaGVldF9yb3dzCiAgICAgICAgY29sdW1uX2VzdGltYXRlID0gZmlyc3Rfc2hlZXRfY29sdW1ucyBpZiBmaXJzdF9zaGVldF9jb2x1bW5zID4gMCBlbHNlIE5vbmUKICAgICAgICB0cnk6CiAgICAgICAgICAgIGZpcnN0X3NoZWV0ID0gd2Iud29ya3NoZWV0c1swXQogICAgICAgICAgICBoZWFkZXJfcHJldmlldyA9IFtzdHIoY2VsbC52YWx1ZSkgaWYgY2VsbC52YWx1ZSBpcyBub3QgTm9uZSBlbHNlICIiIGZvciBjZWxsIGluIG5leHQoZmlyc3Rfc2hlZXQuaXRlcl9yb3dzKG1pbl9yb3c9MSwgbWF4X3Jvdz0xKSldCiAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbjoKICAgICAgICAgICAgaGVhZGVyX3ByZXZpZXcgPSBOb25lCiAgICBlbHNlOgogICAgICAgIHJvd19jb3VudCA9IDAKICAgICAgICB3aXRoIG9wZW4oZmlsZV9wYXRoLCAicmIiKSBhcyBmaDoKICAgICAgICAgICAgZm9yIF8gaW4gZmg6CiAgICAgICAgICAgICAgICByb3dfY291bnQgKz0gMQogICAgICAgIHJvd19lc3RpbWF0ZSA9IHJvd19jb3VudCAtIDEgaWYgcm93X2NvdW50ID4gMCBlbHNlIDAKICAgICAgICB3aXRoIG9wZW4oZmlsZV9wYXRoLCAiciIsIGVuY29kaW5nPSJ1dGYtOCIsIGVycm9ycz0iaWdub3JlIikgYXMgZmg6CiAgICAgICAgICAgIHJlYWRlciA9IGNzdi5yZWFkZXIoZmgpCiAgICAgICAgICAgIGhlYWRlcl9wcmV2aWV3ID0gbmV4dChyZWFkZXIsIFtdKQogICAgICAgIGNvbHVtbl9lc3RpbWF0ZSA9IGxlbihoZWFkZXJfcHJldmlldykgaWYgaGVhZGVyX3ByZXZpZXcgZWxzZSBOb25lCmV4Y2VwdCBFeGNlcHRpb24gYXMgZXJyb3I6CiAgICBpbmZvWyJtZXRhZGF0YV9lcnJvciJdID0gc3RyKGVycm9yKQoKaWYgcm93X2VzdGltYXRlIGlzIG5vdCBOb25lOgogICAgaW5mb1sicm93X2NvdW50X2VzdGltYXRlIl0gPSByb3dfZXN0aW1hdGUKaWYgY29sdW1uX2VzdGltYXRlIGlzIG5vdCBOb25lOgogICAgaW5mb1siY29sdW1uX2NvdW50X2VzdGltYXRlIl0gPSBjb2x1bW5fZXN0aW1hdGUKaWYgaGVhZGVyX3ByZXZpZXcgaXMgbm90IE5vbmU6CiAgICBpbmZvWyJoZWFkZXJfcHJldmlldyJdID0gaGVhZGVyX3ByZXZpZXcKCnByaW50KGpzb24uZHVtcHMoaW5mbywgZW5zdXJlX2FzY2lpPUZhbHNlKSkKCg==",
|
|
5
|
+
"preview_head_csv.py": "aW1wb3J0IHN5cwppbXBvcnQgY3N2CmltcG9ydCB4bWwuZXRyZWUuRWxlbWVudFRyZWUgYXMgRVQKCmZpbGVfcGF0aCA9IHN5cy5hcmd2WzFdCm5fcm93cyA9IGludChzeXMuYXJndlsyXSkgaWYgbGVuKHN5cy5hcmd2KSA+IDIgZWxzZSAxMAoKcm9vdCA9IEVULkVsZW1lbnQoInJvd3MiKQoKd2l0aCBvcGVuKGZpbGVfcGF0aCwgInIiLCBlbmNvZGluZz0idXRmLTgiLCBlcnJvcnM9Imlnbm9yZSIpIGFzIGZoOgogICAgcmVhZGVyID0gY3N2LnJlYWRlcihmaCkKICAgIGZvciByb3dfaWR4LCByb3dfZGF0YSBpbiBlbnVtZXJhdGUocmVhZGVyKToKICAgICAgICBpZiByb3dfaWR4ID49IG5fcm93czoKICAgICAgICAgICAgYnJlYWsKICAgICAgICByb3dfZWxlbSA9IEVULlN1YkVsZW1lbnQocm9vdCwgInJvdyIsIGluZGV4PXN0cihyb3dfaWR4KSkKICAgICAgICBmb3IgY29sX2lkeCwgY2VsbF92YWx1ZSBpbiBlbnVtZXJhdGUocm93X2RhdGEpOgogICAgICAgICAgICBjZWxsX2VsZW0gPSBFVC5TdWJFbGVtZW50KHJvd19lbGVtLCAiY2VsbCIsIGNvbHVtbj1zdHIoY29sX2lkeCkpCiAgICAgICAgICAgIGNlbGxfZWxlbS50ZXh0ID0gY2VsbF92YWx1ZSBpZiBjZWxsX3ZhbHVlIGVsc2UgIiIKCnRyZWUgPSBFVC5FbGVtZW50VHJlZShyb290KQpFVC5pbmRlbnQodHJlZSwgc3BhY2U9IiAgIikKcHJpbnQoJzw/eG1sIHZlcnNpb249IjEuMCIgZW5jb2Rpbmc9IlVURi04Ij8+JykKcHJpbnQoRVQudG9zdHJpbmcocm9vdCwgZW5jb2Rpbmc9InVuaWNvZGUiKSkKCg==",
|
|
6
|
+
"preview_head_excel.py": "aW1wb3J0IHN5cwppbXBvcnQgb3BlbnB5eGwKaW1wb3J0IHhtbC5ldHJlZS5FbGVtZW50VHJlZSBhcyBFVAoKZmlsZV9wYXRoID0gc3lzLmFyZ3ZbMV0Kbl9yb3dzID0gaW50KHN5cy5hcmd2WzJdKSBpZiBsZW4oc3lzLmFyZ3YpID4gMiBlbHNlIDEwCgp3YiA9IG9wZW5weXhsLmxvYWRfd29ya2Jvb2soZmlsZV9wYXRoLCByZWFkX29ubHk9VHJ1ZSwgZGF0YV9vbmx5PVRydWUpCndzID0gd2Iud29ya3NoZWV0c1swXQoKcm9vdCA9IEVULkVsZW1lbnQoInJvd3MiKQoKcm93c19wcmludGVkID0gMApmb3Igcm93X2lkeCwgcm93IGluIGVudW1lcmF0ZSh3cy5pdGVyX3Jvd3MoKSk6CiAgICBpZiByb3dzX3ByaW50ZWQgPj0gbl9yb3dzOgogICAgICAgIGJyZWFrCiAgICByb3dfZWxlbSA9IEVULlN1YkVsZW1lbnQocm9vdCwgInJvdyIsIGluZGV4PXN0cihyb3dfaWR4KSkKICAgIAogICAgZm9yIGNvbF9pZHgsIGNlbGwgaW4gZW51bWVyYXRlKHJvdyk6CiAgICAgICAgY2VsbF9lbGVtID0gRVQuU3ViRWxlbWVudChyb3dfZWxlbSwgImNlbGwiLCBjb2x1bW49c3RyKGNvbF9pZHgpKQogICAgICAgIGNlbGxfdmFsdWUgPSBjZWxsLnZhbHVlCiAgICAgICAgY2VsbF9lbGVtLnRleHQgPSBzdHIoY2VsbF92YWx1ZSkgaWYgY2VsbF92YWx1ZSBpcyBub3QgTm9uZSBlbHNlICIiCiAgICAKICAgIHJvd3NfcHJpbnRlZCArPSAxCgp0cmVlID0gRVQuRWxlbWVudFRyZWUocm9vdCkKRVQuaW5kZW50KHRyZWUsIHNwYWNlPSIgICIpCnByaW50KCc8P3htbCB2ZXJzaW9uPSIxLjAiIGVuY29kaW5nPSJVVEYtOCI/PicpCnByaW50KEVULnRvc3RyaW5nKHJvb3QsIGVuY29kaW5nPSJ1bmljb2RlIikpCgo=",
|
|
7
|
+
"preview_mid_csv.py": "aW1wb3J0IHN5cwppbXBvcnQgY3N2CmltcG9ydCByYW5kb20KaW1wb3J0IHhtbC5ldHJlZS5FbGVtZW50VHJlZSBhcyBFVAoKZmlsZV9wYXRoID0gc3lzLmFyZ3ZbMV0Kc3RhcnRfcm93ID0gaW50KHN5cy5hcmd2WzJdKQplbmRfcm93ID0gaW50KHN5cy5hcmd2WzNdKQpuX3NhbXBsZXMgPSBpbnQoc3lzLmFyZ3ZbNF0pIGlmIGxlbihzeXMuYXJndikgPiA0IGVsc2UgMTAKCnJvb3QgPSBFVC5FbGVtZW50KCJyb3dzIikKCndpdGggb3BlbihmaWxlX3BhdGgsICJyIiwgZW5jb2Rpbmc9InV0Zi04IiwgZXJyb3JzPSJpZ25vcmUiKSBhcyBmaDoKICAgIHJlYWRlciA9IGNzdi5yZWFkZXIoZmgpCiAgICBhbGxfcm93cyA9IGxpc3QocmVhZGVyKQoKbWlkX3NlY3Rpb24gPSBhbGxfcm93c1tzdGFydF9yb3c6ZW5kX3Jvd10KCmlmIGxlbihtaWRfc2VjdGlvbikgPD0gbl9zYW1wbGVzOgogICAgc2FtcGxlZF93aXRoX2lkeCA9IFsoaSwgcm93KSBmb3IgaSwgcm93IGluIGVudW1lcmF0ZShtaWRfc2VjdGlvbildCmVsc2U6CiAgICByYW5kb20uc2VlZCg0MikKICAgIHNhbXBsZWRfd2l0aF9pZHggPSBzb3J0ZWQocmFuZG9tLnNhbXBsZShsaXN0KGVudW1lcmF0ZShtaWRfc2VjdGlvbikpLCBuX3NhbXBsZXMpLCBrZXk9bGFtYmRhIHg6IHhbMF0pCgpmb3IgcmVsYXRpdmVfaWR4LCByb3dfZGF0YSBpbiBzYW1wbGVkX3dpdGhfaWR4OgogICAgYWN0dWFsX2lkeCA9IHN0YXJ0X3JvdyArIHJlbGF0aXZlX2lkeAogICAgcm93X2VsZW0gPSBFVC5TdWJFbGVtZW50KHJvb3QsICJyb3ciLCBpbmRleD1zdHIoYWN0dWFsX2lkeCkpCiAgICBmb3IgY29sX2lkeCwgY2VsbF92YWx1ZSBpbiBlbnVtZXJhdGUocm93X2RhdGEpOgogICAgICAgIGNlbGxfZWxlbSA9IEVULlN1YkVsZW1lbnQocm93X2VsZW0sICJjZWxsIiwgY29sdW1uPXN0cihjb2xfaWR4KSkKICAgICAgICBjZWxsX2VsZW0udGV4dCA9IGNlbGxfdmFsdWUgaWYgY2VsbF92YWx1ZSBlbHNlICIiCgp0cmVlID0gRVQuRWxlbWVudFRyZWUocm9vdCkKRVQuaW5kZW50KHRyZWUsIHNwYWNlPSIgICIpCnByaW50KCc8P3htbCB2ZXJzaW9uPSIxLjAiIGVuY29kaW5nPSJVVEYtOCI/PicpCnByaW50KEVULnRvc3RyaW5nKHJvb3QsIGVuY29kaW5nPSJ1bmljb2RlIikpCgo=",
|
|
8
|
+
"preview_mid_excel.py": "aW1wb3J0IHN5cwppbXBvcnQgcmFuZG9tCmltcG9ydCBvcGVucHl4bAppbXBvcnQgeG1sLmV0cmVlLkVsZW1lbnRUcmVlIGFzIEVUCgpmaWxlX3BhdGggPSBzeXMuYXJndlsxXQpzdGFydF9yb3cgPSBpbnQoc3lzLmFyZ3ZbMl0pCmVuZF9yb3cgPSBpbnQoc3lzLmFyZ3ZbM10pCm5fc2FtcGxlcyA9IGludChzeXMuYXJndls0XSkgaWYgbGVuKHN5cy5hcmd2KSA+IDQgZWxzZSAxMAoKd2IgPSBvcGVucHl4bC5sb2FkX3dvcmtib29rKGZpbGVfcGF0aCwgcmVhZF9vbmx5PVRydWUsIGRhdGFfb25seT1UcnVlKQp3cyA9IHdiLndvcmtzaGVldHNbMF0KCnJvb3QgPSBFVC5FbGVtZW50KCJyb3dzIikKCmFsbF9yb3dzID0gbGlzdCh3cy5pdGVyX3Jvd3MoKSkKbWlkX3NlY3Rpb24gPSBhbGxfcm93c1tzdGFydF9yb3c6ZW5kX3Jvd10KCmlmIGxlbihtaWRfc2VjdGlvbikgPD0gbl9zYW1wbGVzOgogICAgc2FtcGxlZF93aXRoX2lkeCA9IFsoaSwgcm93KSBmb3IgaSwgcm93IGluIGVudW1lcmF0ZShtaWRfc2VjdGlvbildCmVsc2U6CiAgICByYW5kb20uc2VlZCg0MikKICAgIGluZGljZXMgPSBzb3J0ZWQocmFuZG9tLnNhbXBsZShyYW5nZShsZW4obWlkX3NlY3Rpb24pKSwgbl9zYW1wbGVzKSkKICAgIHNhbXBsZWRfd2l0aF9pZHggPSBbKGksIG1pZF9zZWN0aW9uW2ldKSBmb3IgaSBpbiBpbmRpY2VzXQoKZm9yIHJlbGF0aXZlX2lkeCwgcm93IGluIHNhbXBsZWRfd2l0aF9pZHg6CiAgICBhY3R1YWxfaWR4ID0gc3RhcnRfcm93ICsgcmVsYXRpdmVfaWR4CiAgICByb3dfZWxlbSA9IEVULlN1YkVsZW1lbnQocm9vdCwgInJvdyIsIGluZGV4PXN0cihhY3R1YWxfaWR4KSkKICAgIAogICAgZm9yIGNvbF9pZHgsIGNlbGwgaW4gZW51bWVyYXRlKHJvdyk6CiAgICAgICAgY2VsbF9lbGVtID0gRVQuU3ViRWxlbWVudChyb3dfZWxlbSwgImNlbGwiLCBjb2x1bW49c3RyKGNvbF9pZHgpKQogICAgICAgIGNlbGxfdmFsdWUgPSBjZWxsLnZhbHVlCiAgICAgICAgY2VsbF9lbGVtLnRleHQgPSBzdHIoY2VsbF92YWx1ZSkgaWYgY2VsbF92YWx1ZSBpcyBub3QgTm9uZSBlbHNlICIiCgp0cmVlID0gRVQuRWxlbWVudFRyZWUocm9vdCkKRVQuaW5kZW50KHRyZWUsIHNwYWNlPSIgICIpCnByaW50KCc8P3htbCB2ZXJzaW9uPSIxLjAiIGVuY29kaW5nPSJVVEYtOCI/PicpCnByaW50KEVULnRvc3RyaW5nKHJvb3QsIGVuY29kaW5nPSJ1bmljb2RlIikpCgo=",
|
|
9
|
+
"preview_tail_csv.py": "aW1wb3J0IHN5cwppbXBvcnQgY3N2CmltcG9ydCB4bWwuZXRyZWUuRWxlbWVudFRyZWUgYXMgRVQKZnJvbSBjb2xsZWN0aW9ucyBpbXBvcnQgZGVxdWUKCmZpbGVfcGF0aCA9IHN5cy5hcmd2WzFdCm5fcm93cyA9IGludChzeXMuYXJndlsyXSkgaWYgbGVuKHN5cy5hcmd2KSA+IDIgZWxzZSAxMAoKcm9vdCA9IEVULkVsZW1lbnQoInJvd3MiKQoKd2l0aCBvcGVuKGZpbGVfcGF0aCwgInIiLCBlbmNvZGluZz0idXRmLTgiLCBlcnJvcnM9Imlnbm9yZSIpIGFzIGZoOgogICAgcmVhZGVyID0gY3N2LnJlYWRlcihmaCkKICAgIGFsbF9yb3dzID0gbGlzdChyZWFkZXIpCiAgICB0YWlsX3Jvd3MgPSBhbGxfcm93c1stbl9yb3dzOl0gaWYgbGVuKGFsbF9yb3dzKSA+IG5fcm93cyBlbHNlIGFsbF9yb3dzCiAgICBzdGFydF9pbmRleCA9IGxlbihhbGxfcm93cykgLSBsZW4odGFpbF9yb3dzKQogICAgCiAgICBmb3IgaWR4LCByb3dfZGF0YSBpbiBlbnVtZXJhdGUodGFpbF9yb3dzKToKICAgICAgICByb3dfZWxlbSA9IEVULlN1YkVsZW1lbnQocm9vdCwgInJvdyIsIGluZGV4PXN0cihzdGFydF9pbmRleCArIGlkeCkpCiAgICAgICAgZm9yIGNvbF9pZHgsIGNlbGxfdmFsdWUgaW4gZW51bWVyYXRlKHJvd19kYXRhKToKICAgICAgICAgICAgY2VsbF9lbGVtID0gRVQuU3ViRWxlbWVudChyb3dfZWxlbSwgImNlbGwiLCBjb2x1bW49c3RyKGNvbF9pZHgpKQogICAgICAgICAgICBjZWxsX2VsZW0udGV4dCA9IGNlbGxfdmFsdWUgaWYgY2VsbF92YWx1ZSBlbHNlICIiCgp0cmVlID0gRVQuRWxlbWVudFRyZWUocm9vdCkKRVQuaW5kZW50KHRyZWUsIHNwYWNlPSIgICIpCnByaW50KCc8P3htbCB2ZXJzaW9uPSIxLjAiIGVuY29kaW5nPSJVVEYtOCI/PicpCnByaW50KEVULnRvc3RyaW5nKHJvb3QsIGVuY29kaW5nPSJ1bmljb2RlIikpCgo=",
|
|
10
|
+
"preview_tail_excel.py": "aW1wb3J0IHN5cwppbXBvcnQgb3BlbnB5eGwKaW1wb3J0IHhtbC5ldHJlZS5FbGVtZW50VHJlZSBhcyBFVAoKZmlsZV9wYXRoID0gc3lzLmFyZ3ZbMV0Kbl9yb3dzID0gaW50KHN5cy5hcmd2WzJdKSBpZiBsZW4oc3lzLmFyZ3YpID4gMiBlbHNlIDEwCgp3YiA9IG9wZW5weXhsLmxvYWRfd29ya2Jvb2soZmlsZV9wYXRoLCByZWFkX29ubHk9VHJ1ZSwgZGF0YV9vbmx5PVRydWUpCndzID0gd2Iud29ya3NoZWV0c1swXQoKcm9vdCA9IEVULkVsZW1lbnQoInJvd3MiKQoKYWxsX3Jvd3MgPSBsaXN0KHdzLml0ZXJfcm93cygpKQp0YWlsX3Jvd3MgPSBhbGxfcm93c1stbl9yb3dzOl0gaWYgbGVuKGFsbF9yb3dzKSA+IG5fcm93cyBlbHNlIGFsbF9yb3dzCnN0YXJ0X2luZGV4ID0gbGVuKGFsbF9yb3dzKSAtIGxlbih0YWlsX3Jvd3MpCgpmb3IgaWR4LCByb3cgaW4gZW51bWVyYXRlKHRhaWxfcm93cyk6CiAgICByb3dfZWxlbSA9IEVULlN1YkVsZW1lbnQocm9vdCwgInJvdyIsIGluZGV4PXN0cihzdGFydF9pbmRleCArIGlkeCkpCiAgICAKICAgIGZvciBjb2xfaWR4LCBjZWxsIGluIGVudW1lcmF0ZShyb3cpOgogICAgICAgIGNlbGxfZWxlbSA9IEVULlN1YkVsZW1lbnQocm93X2VsZW0sICJjZWxsIiwgY29sdW1uPXN0cihjb2xfaWR4KSkKICAgICAgICBjZWxsX3ZhbHVlID0gY2VsbC52YWx1ZQogICAgICAgIGNlbGxfZWxlbS50ZXh0ID0gc3RyKGNlbGxfdmFsdWUpIGlmIGNlbGxfdmFsdWUgaXMgbm90IE5vbmUgZWxzZSAiIgoKdHJlZSA9IEVULkVsZW1lbnRUcmVlKHJvb3QpCkVULmluZGVudCh0cmVlLCBzcGFjZT0iICAiKQpwcmludCgnPD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4nKQpwcmludChFVC50b3N0cmluZyhyb290LCBlbmNvZGluZz0idW5pY29kZSIpKQoK"
|
|
11
|
+
});
|
package/dist/file/steps.d.ts
CHANGED
package/dist/file/steps.js
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
async function getRuntimeDb(runtime) {
|
|
2
|
+
if (!runtime) {
|
|
3
|
+
throw new Error("Dataset file step requires runtime.");
|
|
4
|
+
}
|
|
5
|
+
const db = runtime.db;
|
|
6
|
+
return typeof db === "function" ? await db.call(runtime) : db;
|
|
7
|
+
}
|
|
8
|
+
export async function readInstantFileStep(params) {
|
|
6
9
|
"use step";
|
|
7
|
-
const db =
|
|
10
|
+
const db = await getRuntimeDb(params.runtime);
|
|
8
11
|
const fileQuery = await db.query({
|
|
9
12
|
$files: { $: { where: { id: params.fileId }, limit: 1 } },
|
|
10
13
|
});
|
|
@@ -20,4 +23,3 @@ async function readInstantFileStep(params) {
|
|
|
20
23
|
contentBase64: Buffer.from(fileBuffer).toString("base64"),
|
|
21
24
|
};
|
|
22
25
|
}
|
|
23
|
-
//# sourceMappingURL=steps.js.map
|
package/dist/id.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function createDatasetId(): string;
|
package/dist/id.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export function createDatasetId() {
|
|
2
|
+
const uuid = globalThis.crypto?.randomUUID?.();
|
|
3
|
+
if (typeof uuid === "string" && uuid.length > 0)
|
|
4
|
+
return uuid;
|
|
5
|
+
return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
|
|
6
|
+
const r = (Math.random() * 16) | 0;
|
|
7
|
+
const v = c === "x" ? r : (r & 0x3) | 0x8;
|
|
8
|
+
return v.toString(16);
|
|
9
|
+
});
|
|
10
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
export * from "./dataset";
|
|
2
|
-
export * from "./
|
|
3
|
-
export * from "./
|
|
4
|
-
export * from "./
|
|
5
|
-
export * from "./
|
|
6
|
-
export * from "./
|
|
7
|
-
|
|
1
|
+
export * from "./dataset.js";
|
|
2
|
+
export * from "./contextWorkspace.js";
|
|
3
|
+
export * from "./domain.js";
|
|
4
|
+
export * from "./materializeDataset.tool.js";
|
|
5
|
+
export * from "./notation.js";
|
|
6
|
+
export * from "./schema.js";
|
|
7
|
+
export * from "./service.js";
|
|
8
|
+
export { registerFileParseContext } from "./file/file-dataset.agent.js";
|
|
9
|
+
export { registerTransformDatasetContext } from "./transform/index.js";
|
package/dist/index.js
CHANGED
|
@@ -1,23 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
-
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
-
};
|
|
16
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
__exportStar(require("./dataset"), exports);
|
|
18
|
-
__exportStar(require("./domain"), exports);
|
|
19
|
-
__exportStar(require("./materializeDataset.tool"), exports);
|
|
20
|
-
__exportStar(require("./schema"), exports);
|
|
21
|
-
__exportStar(require("./service"), exports);
|
|
22
|
-
__exportStar(require("./skill"), exports);
|
|
23
|
-
//# sourceMappingURL=index.js.map
|
|
1
|
+
export * from "./dataset.js";
|
|
2
|
+
export * from "./contextWorkspace.js";
|
|
3
|
+
export * from "./domain.js";
|
|
4
|
+
export * from "./materializeDataset.tool.js";
|
|
5
|
+
export * from "./notation.js";
|
|
6
|
+
export * from "./schema.js";
|
|
7
|
+
export * from "./service.js";
|
|
8
|
+
export { registerFileParseContext } from "./file/file-dataset.agent.js";
|
|
9
|
+
export { registerTransformDatasetContext } from "./transform/index.js";
|
|
@@ -1,31 +1,45 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
import type { DomainSchemaResult } from "@ekairos/domain";
|
|
3
|
+
import type { EkairosRuntime, RuntimeForDomain } from "@ekairos/domain/runtime";
|
|
3
4
|
import type { ContextReactor } from "@ekairos/events";
|
|
5
|
+
import { datasetDomain } from "./schema.js";
|
|
4
6
|
declare const materializeDatasetToolInputSchema: z.ZodObject<{
|
|
5
7
|
datasetId: z.ZodOptional<z.ZodString>;
|
|
6
8
|
sandboxId: z.ZodOptional<z.ZodString>;
|
|
7
9
|
title: z.ZodOptional<z.ZodString>;
|
|
8
|
-
|
|
9
|
-
|
|
10
|
+
context: z.ZodOptional<z.ZodUnion<readonly [z.ZodObject<{
|
|
11
|
+
id: z.ZodString;
|
|
12
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
13
|
+
key: z.ZodString;
|
|
14
|
+
}, z.core.$strip>]>>;
|
|
15
|
+
files: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
16
|
+
description: z.ZodOptional<z.ZodString>;
|
|
10
17
|
fileId: z.ZodString;
|
|
18
|
+
}, z.core.$strip>>>;
|
|
19
|
+
texts: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
20
|
+
name: z.ZodOptional<z.ZodString>;
|
|
11
21
|
description: z.ZodOptional<z.ZodString>;
|
|
12
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
13
|
-
kind: z.ZodLiteral<"text">;
|
|
14
22
|
text: z.ZodString;
|
|
15
23
|
mimeType: z.ZodOptional<z.ZodString>;
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
19
|
-
kind: z.ZodLiteral<"dataset">;
|
|
24
|
+
}, z.core.$strip>>>;
|
|
25
|
+
datasets: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
20
26
|
datasetId: z.ZodString;
|
|
21
27
|
description: z.ZodOptional<z.ZodString>;
|
|
22
|
-
}, z.core.$strip
|
|
23
|
-
|
|
24
|
-
query: z.ZodRecord<z.ZodString, z.ZodAny>;
|
|
28
|
+
}, z.core.$strip>>>;
|
|
29
|
+
queries: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
25
30
|
title: z.ZodOptional<z.ZodString>;
|
|
31
|
+
query: z.ZodRecord<z.ZodString, z.ZodAny>;
|
|
26
32
|
explanation: z.ZodOptional<z.ZodString>;
|
|
27
|
-
}, z.core.$strip
|
|
33
|
+
}, z.core.$strip>>>;
|
|
28
34
|
instructions: z.ZodOptional<z.ZodString>;
|
|
35
|
+
mode: z.ZodOptional<z.ZodEnum<{
|
|
36
|
+
schema: "schema";
|
|
37
|
+
auto: "auto";
|
|
38
|
+
}>>;
|
|
39
|
+
output: z.ZodOptional<z.ZodEnum<{
|
|
40
|
+
object: "object";
|
|
41
|
+
rows: "rows";
|
|
42
|
+
}>>;
|
|
29
43
|
schema: z.ZodOptional<z.ZodObject<{
|
|
30
44
|
title: z.ZodOptional<z.ZodString>;
|
|
31
45
|
description: z.ZodOptional<z.ZodString>;
|
|
@@ -33,38 +47,45 @@ declare const materializeDatasetToolInputSchema: z.ZodObject<{
|
|
|
33
47
|
}, z.core.$strip>>;
|
|
34
48
|
first: z.ZodOptional<z.ZodBoolean>;
|
|
35
49
|
}, z.core.$strip>;
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
50
|
+
type AnyMaterializeDatasetRuntime = EkairosRuntime<any, any, any>;
|
|
51
|
+
type MaterializeDatasetRuntimeHandle<Runtime extends AnyMaterializeDatasetRuntime> = RuntimeForDomain<Runtime, typeof datasetDomain>;
|
|
52
|
+
type CompatibleToolQueryDomain<Runtime extends AnyMaterializeDatasetRuntime, QueryDomain extends DomainSchemaResult> = RuntimeForDomain<Runtime, QueryDomain> extends never ? never : QueryDomain;
|
|
53
|
+
export declare function createMaterializeDatasetTool<Runtime extends AnyMaterializeDatasetRuntime, QueryDomain extends DomainSchemaResult>(params: {
|
|
54
|
+
runtime: Runtime & MaterializeDatasetRuntimeHandle<Runtime>;
|
|
40
55
|
reactor?: ContextReactor<any, any>;
|
|
41
|
-
queryDomain:
|
|
56
|
+
queryDomain: QueryDomain & CompatibleToolQueryDomain<Runtime, QueryDomain>;
|
|
42
57
|
toolName?: string;
|
|
43
58
|
}): import("ai").Tool<{
|
|
44
|
-
|
|
45
|
-
|
|
59
|
+
datasetId?: string | undefined;
|
|
60
|
+
sandboxId?: string | undefined;
|
|
61
|
+
title?: string | undefined;
|
|
62
|
+
context?: {
|
|
63
|
+
id: string;
|
|
64
|
+
} | {
|
|
65
|
+
key: string;
|
|
66
|
+
} | undefined;
|
|
67
|
+
files?: {
|
|
46
68
|
fileId: string;
|
|
47
69
|
description?: string | undefined;
|
|
48
|
-
} |
|
|
49
|
-
|
|
70
|
+
}[] | undefined;
|
|
71
|
+
texts?: {
|
|
50
72
|
text: string;
|
|
51
|
-
mimeType?: string | undefined;
|
|
52
73
|
name?: string | undefined;
|
|
53
74
|
description?: string | undefined;
|
|
54
|
-
|
|
55
|
-
|
|
75
|
+
mimeType?: string | undefined;
|
|
76
|
+
}[] | undefined;
|
|
77
|
+
datasets?: {
|
|
56
78
|
datasetId: string;
|
|
57
79
|
description?: string | undefined;
|
|
58
|
-
} |
|
|
59
|
-
|
|
80
|
+
}[] | undefined;
|
|
81
|
+
queries?: {
|
|
60
82
|
query: Record<string, any>;
|
|
61
83
|
title?: string | undefined;
|
|
62
84
|
explanation?: string | undefined;
|
|
63
|
-
}
|
|
64
|
-
datasetId?: string | undefined;
|
|
65
|
-
sandboxId?: string | undefined;
|
|
66
|
-
title?: string | undefined;
|
|
85
|
+
}[] | undefined;
|
|
67
86
|
instructions?: string | undefined;
|
|
87
|
+
mode?: "schema" | "auto" | undefined;
|
|
88
|
+
output?: "object" | "rows" | undefined;
|
|
68
89
|
schema?: {
|
|
69
90
|
schema: any;
|
|
70
91
|
title?: string | undefined;
|
|
@@ -75,4 +96,3 @@ export declare function createMaterializeDatasetTool<Env extends {
|
|
|
75
96
|
datasetId: string;
|
|
76
97
|
}>;
|
|
77
98
|
export { materializeDatasetToolInputSchema };
|
|
78
|
-
//# sourceMappingURL=materializeDataset.tool.d.ts.map
|