npm - @ekairos/dataset - Versions diffs - 1.22.39-beta.development.0 → 1.22.39 - Mend

@ekairos/dataset 1.22.39-beta.development.0 → 1.22.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

package/README.md +347 -0
package/dist/agents.d.ts +8 -0
package/dist/agents.js +8 -0
package/dist/builder/agentMaterializers.d.ts +9 -0
package/dist/builder/agentMaterializers.js +10 -0
package/dist/builder/context.d.ts +15 -0
package/dist/builder/context.js +251 -0
package/dist/builder/instructions.d.ts +5 -0
package/dist/builder/instructions.js +40 -0
package/dist/builder/materialize.d.ts +83 -0
package/dist/builder/materialize.js +548 -0
package/dist/builder/materializeQuery.d.ts +12 -0
package/dist/builder/materializeQuery.js +31 -0
package/dist/builder/persistence.d.ts +22 -0
package/dist/builder/persistence.js +192 -0
package/dist/builder/rows.d.ts +7 -0
package/dist/builder/rows.js +56 -0
package/dist/builder/schemaInference.d.ts +3 -0
package/dist/builder/schemaInference.js +61 -0
package/dist/builder/types.d.ts +144 -0
package/dist/builder/types.js +1 -0
package/dist/clearDataset.tool.d.ts +2 -3
package/dist/clearDataset.tool.js +13 -17
package/dist/completeDataset.steps.d.ts +117 -0
package/dist/completeDataset.steps.js +537 -0
package/dist/completeDataset.tool.d.ts +132 -7
package/dist/completeDataset.tool.js +46 -192
package/dist/contextResources.d.ts +31 -0
package/dist/contextResources.js +151 -0
package/dist/contextWorkspace.d.ts +79 -0
package/dist/contextWorkspace.js +234 -0
package/dist/dataset/steps.d.ts +39 -15
package/dist/dataset/steps.js +96 -39
package/dist/dataset.d.ts +3 -67
package/dist/dataset.js +129 -521
package/dist/datasetFiles.d.ts +5 -1
package/dist/datasetFiles.js +29 -27
package/dist/defineNotation.tool.d.ts +49 -0
package/dist/defineNotation.tool.js +154 -0
package/dist/domain.d.ts +1 -2
package/dist/domain.js +1 -6
package/dist/executeCommand.tool.d.ts +2 -30
package/dist/executeCommand.tool.js +165 -39
package/dist/file/file-dataset.agent.d.ts +19 -56
package/dist/file/file-dataset.agent.js +181 -134
package/dist/file/file-dataset.steps.d.ts +27 -0
package/dist/file/file-dataset.steps.js +47 -0
package/dist/file/file-dataset.types.d.ts +64 -0
package/dist/file/file-dataset.types.js +1 -0
package/dist/file/filepreview.d.ts +5 -35
package/dist/file/filepreview.js +60 -107
package/dist/file/filepreview.types.d.ts +31 -0
package/dist/file/filepreview.types.js +1 -0
package/dist/file/generateSchema.tool.d.ts +2 -3
package/dist/file/generateSchema.tool.js +11 -15
package/dist/file/index.d.ts +1 -2
package/dist/file/index.js +1 -18
package/dist/file/prompts.d.ts +2 -3
package/dist/file/prompts.js +152 -32
package/dist/file/scripts.generated.d.ts +1 -0
package/dist/file/scripts.generated.js +11 -0
package/dist/file/steps.d.ts +1 -2
package/dist/file/steps.js +9 -7
package/dist/id.d.ts +1 -0
package/dist/id.js +10 -0
package/dist/index.d.ts +9 -7
package/dist/index.js +9 -23
package/dist/materializeDataset.tool.d.ts +51 -31
package/dist/materializeDataset.tool.js +81 -65
package/dist/notation.d.ts +205 -0
package/dist/notation.js +424 -0
package/dist/query/index.d.ts +1 -2
package/dist/query/index.js +1 -18
package/dist/query/queryDomain.d.ts +3 -4
package/dist/query/queryDomain.js +3 -40
package/dist/query/queryDomain.step.d.ts +1 -1
package/dist/query/queryDomain.step.js +24 -13
package/dist/sandbox/steps.d.ts +23 -15
package/dist/sandbox/steps.js +73 -76
package/dist/sandbox.steps.d.ts +1 -2
package/dist/sandbox.steps.js +1 -18
package/dist/schema.d.ts +15 -13
package/dist/schema.js +27 -37
package/dist/service.d.ts +12 -5
package/dist/service.js +88 -15
package/dist/skill.d.ts +0 -1
package/dist/skill.js +12 -17
package/dist/transform/filepreview.d.ts +2 -3
package/dist/transform/filepreview.js +9 -26
package/dist/transform/index.d.ts +2 -3
package/dist/transform/index.js +2 -8
package/dist/transform/prompts.d.ts +1 -34
package/dist/transform/prompts.js +66 -46
package/dist/transform/transform-dataset.agent.d.ts +20 -45
package/dist/transform/transform-dataset.agent.js +151 -91
package/dist/transform/transform-dataset.steps.d.ts +30 -0
package/dist/transform/transform-dataset.steps.js +61 -0
package/dist/transform/transform-dataset.types.d.ts +95 -0
package/dist/transform/transform-dataset.types.js +1 -0
package/dist/transform/transformDataset.d.ts +3 -3
package/dist/transform/transformDataset.js +15 -18
package/dist/writeDatasetRows.tool.d.ts +188 -0
package/dist/writeDatasetRows.tool.js +258 -0
package/package.json +33 -8
package/dist/clearDataset.tool.d.ts.map +0 -1
package/dist/clearDataset.tool.js.map +0 -1
package/dist/completeDataset.tool.d.ts.map +0 -1
package/dist/completeDataset.tool.js.map +0 -1
package/dist/dataset/steps.d.ts.map +0 -1
package/dist/dataset/steps.js.map +0 -1
package/dist/dataset.d.ts.map +0 -1
package/dist/dataset.js.map +0 -1
package/dist/datasetFiles.d.ts.map +0 -1
package/dist/datasetFiles.js.map +0 -1
package/dist/domain.d.ts.map +0 -1
package/dist/domain.js.map +0 -1
package/dist/eventsReactRuntime.d.ts +0 -22
package/dist/eventsReactRuntime.d.ts.map +0 -1
package/dist/eventsReactRuntime.js +0 -29
package/dist/eventsReactRuntime.js.map +0 -1
package/dist/executeCommand.tool.d.ts.map +0 -1
package/dist/executeCommand.tool.js.map +0 -1
package/dist/file/file-dataset.agent.d.ts.map +0 -1
package/dist/file/file-dataset.agent.js.map +0 -1
package/dist/file/filepreview.d.ts.map +0 -1
package/dist/file/filepreview.js.map +0 -1
package/dist/file/generateSchema.tool.d.ts.map +0 -1
package/dist/file/generateSchema.tool.js.map +0 -1
package/dist/file/index.d.ts.map +0 -1
package/dist/file/index.js.map +0 -1
package/dist/file/prompts.d.ts.map +0 -1
package/dist/file/prompts.js.map +0 -1
package/dist/file/steps.d.ts.map +0 -1
package/dist/file/steps.js.map +0 -1
package/dist/index.d.ts.map +0 -1
package/dist/index.js.map +0 -1
package/dist/materializeDataset.tool.d.ts.map +0 -1
package/dist/materializeDataset.tool.js.map +0 -1
package/dist/query/index.d.ts.map +0 -1
package/dist/query/index.js.map +0 -1
package/dist/query/queryDomain.d.ts.map +0 -1
package/dist/query/queryDomain.js.map +0 -1
package/dist/query/queryDomain.step.d.ts.map +0 -1
package/dist/query/queryDomain.step.js.map +0 -1
package/dist/sandbox/steps.d.ts.map +0 -1
package/dist/sandbox/steps.js.map +0 -1
package/dist/sandbox.steps.d.ts.map +0 -1
package/dist/sandbox.steps.js.map +0 -1
package/dist/schema.d.ts.map +0 -1
package/dist/schema.js.map +0 -1
package/dist/service.d.ts.map +0 -1
package/dist/service.js.map +0 -1
package/dist/skill.d.ts.map +0 -1
package/dist/skill.js.map +0 -1
package/dist/transform/filepreview.d.ts.map +0 -1
package/dist/transform/filepreview.js.map +0 -1
package/dist/transform/index.d.ts.map +0 -1
package/dist/transform/index.js.map +0 -1
package/dist/transform/prompts.d.ts.map +0 -1
package/dist/transform/prompts.js.map +0 -1
package/dist/transform/transform-dataset.agent.d.ts.map +0 -1
package/dist/transform/transform-dataset.agent.js.map +0 -1
package/dist/transform/transformDataset.d.ts.map +0 -1
package/dist/transform/transformDataset.js.map +0 -1

package/dist/materializeDataset.tool.js CHANGED Viewed

@@ -1,88 +1,104 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.materializeDatasetToolInputSchema = void 0;
-exports.createMaterializeDatasetTool = createMaterializeDatasetTool;
-const ai_1 = require("ai");
-const zod_1 = require("zod");
-const dataset_1 = require("./dataset");
-const fileSourceSchema = zod_1.z.object({
-    kind: zod_1.z.literal("file"),
-    fileId: zod_1.z.string(),
-    description: zod_1.z.string().optional(),
+import { tool } from "ai";
+import { z } from "zod";
+import { dataset } from "./dataset.js";
+const fileResourceSchema = z.object({
+    kind: z.literal("file"),
+    fileId: z.string(),
+    description: z.string().optional(),
 });
-const textSourceSchema = zod_1.z.object({
-    kind: zod_1.z.literal("text"),
-    text: zod_1.z.string(),
-    mimeType: zod_1.z.string().optional(),
-    name: zod_1.z.string().optional(),
-    description: zod_1.z.string().optional(),
+const textResourceSchema = z.object({
+    kind: z.literal("text"),
+    text: z.string(),
+    mimeType: z.string().optional(),
+    name: z.string().optional(),
+    description: z.string().optional(),
 });
-const datasetSourceSchema = zod_1.z.object({
-    kind: zod_1.z.literal("dataset"),
-    datasetId: zod_1.z.string(),
-    description: zod_1.z.string().optional(),
+const datasetResourceSchema = z.object({
+    kind: z.literal("dataset"),
+    datasetId: z.string(),
+    description: z.string().optional(),
 });
-const querySourceSchema = zod_1.z.object({
-    kind: zod_1.z.literal("query"),
-    query: zod_1.z.record(zod_1.z.string(), zod_1.z.any()),
-    title: zod_1.z.string().optional(),
-    explanation: zod_1.z.string().optional(),
+const queryResourceSchema = z.object({
+    kind: z.literal("query"),
+    query: z.record(z.string(), z.any()),
+    title: z.string().optional(),
+    explanation: z.string().optional(),
 });
-const datasetSchemaSchema = zod_1.z.object({
-    title: zod_1.z.string().optional(),
-    description: zod_1.z.string().optional(),
-    schema: zod_1.z.any(),
+const contextInputSchema = z.union([
+    z.object({ id: z.string() }),
+    z.object({ key: z.string() }),
+]);
+const datasetSchemaSchema = z.object({
+    title: z.string().optional(),
+    description: z.string().optional(),
+    schema: z.any(),
 });
-const materializeDatasetToolInputSchema = zod_1.z.object({
-    datasetId: zod_1.z.string().optional(),
-    sandboxId: zod_1.z.string().optional(),
-    title: zod_1.z.string().optional(),
-    sources: zod_1.z.array(zod_1.z.discriminatedUnion("kind", [
-        fileSourceSchema,
-        textSourceSchema,
-        datasetSourceSchema,
-        querySourceSchema,
-    ])).min(1),
-    instructions: zod_1.z.string().optional(),
+const materializeDatasetToolInputSchema = z.object({
+    datasetId: z.string().optional(),
+    sandboxId: z.string().optional(),
+    title: z.string().optional(),
+    context: contextInputSchema.optional(),
+    files: z.array(fileResourceSchema.omit({ kind: true })).optional(),
+    texts: z.array(textResourceSchema.omit({ kind: true })).optional(),
+    datasets: z.array(datasetResourceSchema.omit({ kind: true })).optional(),
+    queries: z.array(queryResourceSchema.omit({ kind: true })).optional(),
+    instructions: z.string().optional(),
+    mode: z.enum(["auto", "schema"]).optional(),
+    output: z.enum(["rows", "object"]).optional(),
     schema: datasetSchemaSchema.optional(),
-    first: zod_1.z.boolean().optional(),
+    first: z.boolean().optional(),
 });
-exports.materializeDatasetToolInputSchema = materializeDatasetToolInputSchema;
-function createMaterializeDatasetTool(params) {
-    return (0, ai_1.tool)({
-        description: "Materialize a dataset from declarative sources. Returns only the target datasetId. Query sources use the preconfigured runtime domain.",
+export function createMaterializeDatasetTool(params) {
+    return tool({
+        description: "Materialize a dataset from declarative resources. Returns only the target datasetId. Query resources use the preconfigured runtime domain.",
         inputSchema: materializeDatasetToolInputSchema,
         execute: async (input) => {
-            let builder = (0, dataset_1.dataset)(params.env);
+            let builder = dataset(params.runtime);
             if (input.title?.trim()) {
                 builder = builder.title(input.title);
             }
             if (input.sandboxId?.trim()) {
                 builder = builder.sandbox({ sandboxId: input.sandboxId });
             }
-            for (const source of input.sources) {
-                if (source.kind === "file") {
-                    builder = builder.fromFile(source);
-                    continue;
-                }
-                if (source.kind === "text") {
-                    builder = builder.fromText(source);
-                    continue;
-                }
-                if (source.kind === "dataset") {
-                    builder = builder.fromDataset(source);
-                    continue;
-                }
+            const materialCount = (input.files?.length ?? 0) +
+                (input.texts?.length ?? 0) +
+                (input.datasets?.length ?? 0) +
+                (input.queries?.length ?? 0);
+            if (input.context && materialCount > 0) {
+                throw new Error("dataset_context_resource_is_exclusive");
+            }
+            if (!input.context && materialCount === 0) {
+                throw new Error("dataset_context_or_material_required");
+            }
+            if (input.context) {
+                builder = builder.fromContext(input.context);
+            }
+            for (const resource of input.files ?? []) {
+                builder = builder.fromFile(resource);
+            }
+            for (const resource of input.texts ?? []) {
+                builder = builder.fromText(resource);
+            }
+            for (const resource of input.datasets ?? []) {
+                builder = builder.fromDataset(resource);
+            }
+            for (const resource of input.queries ?? []) {
                 builder = builder.fromQuery(params.queryDomain, {
-                    query: source.query,
-                    title: source.title,
-                    explanation: source.explanation,
+                    query: resource.query,
+                    title: resource.title,
+                    explanation: resource.explanation,
                 });
             }
+            if (input.output === "object") {
+                builder = builder.asObject();
+            }
+            else {
+                builder = builder.asRows();
+            }
             if (input.schema) {
                 builder = builder.schema(input.schema);
             }
-            else {
+            else if (input.mode === "auto" || input.mode === undefined) {
                 builder = builder.inferSchema();
             }
             if (input.instructions?.trim()) {
@@ -99,4 +115,4 @@ function createMaterializeDatasetTool(params) {
         },
     });
 }
-//# sourceMappingURL=materializeDataset.tool.js.map
+export { materializeDatasetToolInputSchema };

package/dist/notation.d.ts ADDED Viewed

@@ -0,0 +1,205 @@
+/**
+ * Formal notation for datasets — the dataset stated intensionally.
+ *
+ * A dataset has TWO CO-EQUAL FACES at the same level:
+ * - its formal DEFINITION (this notation: the proposition that defines the
+ *   set, in LaTeX), and
+ * - its MATERIALIZATION (the rows + the code that produces them).
+ * The notation is not a comment about the data; it IS the dataset, written
+ * as a logical statement. The materialization is the same set written
+ * extensionally. Neither is subordinate to the other.
+ *
+ * The SAME notation plays two roles across the lifecycle: it is the PLAN
+ * (status "plan": stated first, the materialization is built to realize it)
+ * and, once finalized, the RESULT (status "result": it describes exactly
+ * what was produced). It is iterated in between — every revision keeps the
+ * prior version in `history`, so the discovery trail stays visible.
+ *
+ * The definition is a logical proposition, possibly DERIVED (a syllogism),
+ * so it is NOT, in general, mechanically verifiable: a predicate may be
+ * semantic ("x es una frase divertida") and the set is still well-formed.
+ * We TRUST the formality and the produced dataset — there is no verdict.
+ *
+ * SOME predicates happen to be arithmetic (a row count, a field type, a
+ * preserved total). For those, and only those, we attach OPTIONAL evidence
+ * computed over the rows. It is advisory: a contradiction is a hint, never
+ * a claim that the dataset is invalid. Predicates with no arithmetic form
+ * are "asserted" — trusted. Nothing here blocks or changes a build; the
+ * notation rides on dataset_datasets.notation.
+ */
+export type DatasetNotationSymbolKind = "set" | "variable" | "function" | "constant" | "predicate";
+export type DatasetNotationSymbol = {
+    /** plain identifier, e.g. "D", "w", "Orders" */
+    name: string;
+    /** LaTeX for the symbol, e.g. "\\mathcal{D}" (defaults to the name) */
+    latex?: string;
+    kind: DatasetNotationSymbolKind;
+    description: string;
+};
+export type NotationCmpOp = "=" | "!=" | "<" | "<=" | ">" | ">=";
+/**
+ * OPTIONAL arithmetic evidence for the subset of predicates that happen to
+ * be mechanical (counts, types, ranges, totals). Evaluated over the rows;
+ * field access supports dot-paths into nested records ("company.taxId").
+ * Leaf checks are dataset-level propositions; and/or/not/implies compose
+ * them propositionally. A predicate WITHOUT a check is a formal/semantic
+ * claim we trust — that is the normal case, not an exception.
+ */
+export type NotationCheck = {
+    kind: "row_count";
+    op: NotationCmpOp;
+    value: number;
+} | {
+    kind: "field_type";
+    field: string;
+    type: "number" | "integer" | "string" | "boolean";
+    allowNull?: boolean;
+} | {
+    kind: "field_range";
+    field: string;
+    min?: number;
+    max?: number;
+} | {
+    kind: "field_in";
+    field: string;
+    values: Array<string | number | boolean>;
+} | {
+    kind: "field_nonnull";
+    field: string;
+} | {
+    kind: "field_matches";
+    field: string;
+    pattern: string;
+} | {
+    kind: "unique";
+    fields: string[];
+} | {
+    kind: "aggregate";
+    fn: "sum" | "count" | "min" | "max" | "avg";
+    /** omit for fn = "count" */
+    field?: string;
+    op: NotationCmpOp;
+    value: number;
+    /** absolute tolerance for float comparison (default 1e-9) */
+    tolerance?: number;
+} | {
+    kind: "and";
+    checks: NotationCheck[];
+} | {
+    kind: "or";
+    checks: NotationCheck[];
+} | {
+    kind: "not";
+    check: NotationCheck;
+} | {
+    kind: "implies";
+    if: NotationCheck;
+    then: NotationCheck;
+};
+export type DatasetNotationPredicate = {
+    /** stable id within the notation, e.g. "p1", "rowCount" */
+    id: string;
+    description: string;
+    /** the claim in LaTeX, e.g. "\\forall r \\in D:\\; r.amount > 0" */
+    latex: string;
+    /**
+     * OPTIONAL arithmetic form. Absent (the common case) = a formal/semantic
+     * claim we trust without mechanical checking.
+     */
+    check?: NotationCheck;
+};
+/**
+ * Advisory evidence for one predicate. Never a verdict on the dataset:
+ * - "asserted"     formal/semantic claim, trusted, no mechanical check
+ * - "supported"    arithmetic evidence agrees with the stated claim
+ * - "contradicted" arithmetic evidence disagrees — a hint, not a failure
+ */
+export type DatasetNotationCheckResult = {
+    predicateId: string;
+    status: "asserted" | "supported" | "contradicted";
+    detail?: string;
+};
+export type DatasetNotationRevision = {
+    version: number;
+    latex: string;
+    /** why this revision happened — the discovery that triggered it */
+    reason: string;
+    at: number;
+};
+/**
+ * The role the notation currently plays — the two ends of its life:
+ * - "plan":   stated before/while building; the materialization realizes it
+ * - "result": finalized; it describes the dataset that was produced
+ * There is intentionally NO "verified"/"violated" verdict — validity is
+ * trusted, not proven. Iteration is tracked by `version`/`history`; advisory
+ * arithmetic evidence lives in `checks`, separate from this role.
+ */
+export type DatasetNotationStatus = "plan" | "result";
+export type DatasetNotation = {
+    version: number;
+    status: DatasetNotationStatus;
+    /** the main definition: the dataset as a set, in LaTeX */
+    latex: string;
+    symbols: DatasetNotationSymbol[];
+    predicates: DatasetNotationPredicate[];
+    /** advisory per-predicate evidence (asserted/supported/contradicted) */
+    checks?: DatasetNotationCheckResult[];
+    /** when the advisory evidence was last computed */
+    evidenceAt?: number;
+    history: DatasetNotationRevision[];
+};
+export type NotationRevisionInput = {
+    latex: string;
+    symbols?: DatasetNotationSymbol[];
+    predicates?: DatasetNotationPredicate[];
+    reason: string;
+    /** "final" marks the notation as describing the produced dataset */
+    final?: boolean;
+};
+/**
+ * Iterate the notation: every revision bumps the version and appends to
+ * history, so the discovery trail (sets/variables found along the way)
+ * stays visible.
+ */
+export declare function reviseDatasetNotation(previous: DatasetNotation | null | undefined, input: NotationRevisionInput): DatasetNotation;
+/** escape an identifier for use inside \text{} */
+export declare function latexIdentifier(name: string): string;
+type JsonSchemaLike = {
+    title?: string;
+    schema?: Record<string, any>;
+    properties?: Record<string, any>;
+};
+/**
+ * A query-backed dataset has a complete deterministic description: the
+ * dataset is the image of a known query over a known domain. No model is
+ * involved, so here the formal definition and its predicates derive
+ * mechanically from the query, the inferred schema and the row count — and
+ * those predicates DO carry arithmetic evidence (the special case where the
+ * formal claims happen to be fully mechanical).
+ */
+export declare function inferQueryNotation(params: {
+    entityNames: string[];
+    rowCount: number;
+    schema?: JsonSchemaLike | null;
+    explanation?: string;
+}): DatasetNotation;
+type CheckOutcome = {
+    ok: boolean;
+    detail: string;
+};
+export declare function evaluateNotationCheck(rows: any[], check: NotationCheck): CheckOutcome;
+/**
+ * Annotate a notation with ADVISORY arithmetic evidence over the produced
+ * rows. Never throws, never blocks, and never changes the notation's
+ * lifecycle status — the dataset's validity is trusted, not proven here.
+ *
+ * Each predicate is reported as:
+ * - "asserted"     no arithmetic form (formal/semantic claim, trusted)
+ * - "supported"    arithmetic evidence agrees
+ * - "contradicted" arithmetic evidence disagrees (a hint to look, not a
+ *                  verdict that the dataset is wrong)
+ * A check that can't be evaluated stays "asserted" — we don't downgrade a
+ * trusted claim because of a malformed mechanical form.
+ */
+export declare function annotateNotationEvidence(notation: DatasetNotation, rows: any[]): DatasetNotation;
+export {};