npm - @eagleoutice/flowr - Versions diffs - 2.2.16 → 2.4.0 - Mend

@eagleoutice/flowr 2.2.16 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

package/README.md +48 -20
package/abstract-interpretation/data-frame/absint-info.d.ts +109 -0
package/abstract-interpretation/data-frame/absint-info.js +31 -0
package/abstract-interpretation/data-frame/absint-visitor.d.ts +58 -0
package/abstract-interpretation/data-frame/absint-visitor.js +171 -0
package/abstract-interpretation/data-frame/domain.d.ts +107 -0
package/abstract-interpretation/data-frame/domain.js +315 -0
package/abstract-interpretation/data-frame/mappers/access-mapper.d.ts +17 -0
package/abstract-interpretation/data-frame/mappers/access-mapper.js +166 -0
package/abstract-interpretation/data-frame/mappers/arguments.d.ts +117 -0
package/abstract-interpretation/data-frame/mappers/arguments.js +188 -0
package/abstract-interpretation/data-frame/mappers/assignment-mapper.d.ts +20 -0
package/abstract-interpretation/data-frame/mappers/assignment-mapper.js +34 -0
package/abstract-interpretation/data-frame/mappers/function-mapper.d.ts +261 -0
package/abstract-interpretation/data-frame/mappers/function-mapper.js +1219 -0
package/abstract-interpretation/data-frame/mappers/replacement-mapper.d.ts +12 -0
package/abstract-interpretation/data-frame/mappers/replacement-mapper.js +206 -0
package/abstract-interpretation/data-frame/resolve-args.d.ts +42 -0
package/abstract-interpretation/data-frame/resolve-args.js +118 -0
package/abstract-interpretation/data-frame/semantics.d.ts +213 -0
package/abstract-interpretation/data-frame/semantics.js +363 -0
package/abstract-interpretation/data-frame/shape-inference.d.ts +38 -0
package/abstract-interpretation/data-frame/shape-inference.js +111 -0
package/benchmark/slicer.d.ts +15 -1
package/benchmark/slicer.js +137 -0
package/benchmark/stats/print.js +123 -45
package/benchmark/stats/size-of.d.ts +7 -0
package/benchmark/stats/size-of.js +1 -0
package/benchmark/stats/stats.d.ts +30 -1
package/benchmark/stats/stats.js +4 -2
package/benchmark/summarizer/data.d.ts +33 -2
package/benchmark/summarizer/first-phase/input.js +5 -1
package/benchmark/summarizer/first-phase/process.js +47 -1
package/benchmark/summarizer/second-phase/graph.js +1 -1
package/benchmark/summarizer/second-phase/process.js +102 -4
package/cli/benchmark-app.d.ts +2 -0
package/cli/benchmark-app.js +2 -0
package/cli/benchmark-helper-app.d.ts +2 -0
package/cli/benchmark-helper-app.js +10 -3
package/cli/common/options.js +4 -0
package/cli/repl/commands/repl-query.js +1 -1
package/cli/repl/server/connection.js +14 -5
package/config.d.ts +31 -0
package/config.js +21 -1
package/control-flow/basic-cfg-guided-visitor.d.ts +1 -2
package/control-flow/basic-cfg-guided-visitor.js +0 -6
package/control-flow/cfg-simplification.d.ts +6 -0
package/control-flow/cfg-simplification.js +18 -9
package/control-flow/control-flow-graph.d.ts +3 -8
package/control-flow/control-flow-graph.js +5 -6
package/control-flow/dfg-cfg-guided-visitor.js +1 -1
package/control-flow/extract-cfg.d.ts +2 -2
package/control-flow/extract-cfg.js +52 -63
package/control-flow/semantic-cfg-guided-visitor.d.ts +1 -1
package/control-flow/semantic-cfg-guided-visitor.js +1 -1
package/core/steps/all/static-slicing/00-slice.d.ts +7 -1
package/core/steps/all/static-slicing/00-slice.js +9 -3
package/core/steps/pipeline/default-pipelines.d.ts +74 -74
package/dataflow/environments/built-in.d.ts +7 -5
package/dataflow/environments/built-in.js +16 -13
package/dataflow/eval/resolve/alias-tracking.js +2 -2
package/dataflow/eval/resolve/resolve.d.ts +53 -9
package/dataflow/eval/resolve/resolve.js +132 -38
package/dataflow/graph/dataflowgraph-builder.js +2 -2
package/dataflow/graph/graph.js +1 -1
package/dataflow/graph/invert-dfg.d.ts +2 -0
package/dataflow/graph/invert-dfg.js +17 -0
package/dataflow/internal/process/functions/call/built-in/built-in-source.d.ts +1 -0
package/dataflow/internal/process/functions/call/built-in/built-in-source.js +4 -0
package/documentation/doc-util/doc-query.js +11 -1
package/documentation/doc-util/doc-search.js +2 -2
package/documentation/print-cfg-wiki.js +3 -4
package/documentation/print-core-wiki.js +2 -2
package/documentation/print-dataflow-graph-wiki.js +7 -0
package/documentation/print-faq-wiki.js +4 -0
package/documentation/print-interface-wiki.js +11 -0
package/documentation/print-linter-wiki.js +36 -4
package/documentation/print-linting-and-testing-wiki.js +13 -1
package/documentation/print-onboarding-wiki.js +4 -0
package/documentation/print-query-wiki.js +29 -3
package/linter/linter-executor.js +1 -2
package/linter/linter-format.d.ts +26 -4
package/linter/linter-format.js +25 -6
package/linter/linter-rules.d.ts +63 -12
package/linter/linter-rules.js +5 -1
package/linter/rules/absolute-path.d.ts +4 -7
package/linter/rules/absolute-path.js +9 -6
package/linter/rules/dataframe-access-validation.d.ts +55 -0
package/linter/rules/dataframe-access-validation.js +118 -0
package/linter/rules/dead-code.d.ts +43 -0
package/linter/rules/dead-code.js +50 -0
package/linter/rules/deprecated-functions.d.ts +3 -2
package/linter/rules/deprecated-functions.js +3 -1
package/linter/rules/file-path-validity.d.ts +4 -4
package/linter/rules/file-path-validity.js +8 -6
package/linter/rules/naming-convention.d.ts +5 -4
package/linter/rules/naming-convention.js +8 -2
package/linter/rules/seeded-randomness.d.ts +4 -3
package/linter/rules/seeded-randomness.js +3 -1
package/linter/rules/unused-definition.d.ts +2 -0
package/linter/rules/unused-definition.js +3 -1
package/package.json +2 -2
package/queries/catalog/dependencies-query/dependencies-query-executor.js +6 -1
package/queries/catalog/dependencies-query/function-info/read-functions.js +1 -0
package/queries/catalog/dependencies-query/function-info/write-functions.js +1 -0
package/queries/catalog/df-shape-query/df-shape-query-executor.d.ts +3 -0
package/queries/catalog/df-shape-query/df-shape-query-executor.js +46 -0
package/queries/catalog/df-shape-query/df-shape-query-format.d.ts +72 -0
package/queries/catalog/df-shape-query/df-shape-query-format.js +31 -0
package/queries/catalog/linter-query/linter-query-format.js +1 -1
package/queries/catalog/location-map-query/location-map-query-executor.js +7 -5
package/queries/catalog/location-map-query/location-map-query-format.d.ts +3 -0
package/queries/catalog/location-map-query/location-map-query-format.js +1 -0
package/queries/catalog/search-query/search-query-executor.js +1 -1
package/queries/catalog/static-slice-query/static-slice-query-executor.js +2 -1
package/queries/catalog/static-slice-query/static-slice-query-format.d.ts +3 -0
package/queries/catalog/static-slice-query/static-slice-query-format.js +3 -1
package/queries/query-print.d.ts +1 -1
package/queries/query-print.js +0 -1
package/queries/query.d.ts +77 -6
package/queries/query.js +26 -11
package/search/flowr-search-builder.d.ts +6 -6
package/search/flowr-search-executor.d.ts +2 -2
package/search/flowr-search-executor.js +1 -1
package/search/flowr-search.d.ts +13 -8
package/search/flowr-search.js +21 -0
package/search/search-executor/search-enrichers.d.ts +87 -20
package/search/search-executor/search-enrichers.js +44 -5
package/search/search-executor/search-generators.d.ts +4 -4
package/search/search-executor/search-generators.js +12 -7
package/search/search-executor/search-mappers.js +3 -2
package/search/search-executor/search-transformer.d.ts +3 -3
package/search/search-executor/search-transformer.js +2 -2
package/slicing/static/static-slicer.d.ts +4 -2
package/slicing/static/static-slicer.js +10 -4
package/util/collections/arrays.d.ts +2 -0
package/util/collections/arrays.js +9 -0
package/util/files.d.ts +8 -2
package/util/files.js +22 -4
package/util/mermaid/dfg.js +4 -2
package/util/r-value.d.ts +23 -0
package/util/r-value.js +113 -0
package/util/range.d.ts +1 -0
package/util/range.js +5 -1
package/util/version.js +1 -1
package/util/cfg/cfg.d.ts +0 -0
package/util/cfg/cfg.js +0 -2

package/abstract-interpretation/data-frame/semantics.js ADDED Viewed

@@ -0,0 +1,363 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.DataFrameOperationNames = exports.ConstraintType = void 0;
+exports.applyDataFrameSemantics = applyDataFrameSemantics;
+exports.getConstraintType = getConstraintType;
+const assert_1 = require("../../util/assert");
+const domain_1 = require("./domain");
+/**
+ * Represents the different types of resulting constraints that are inferred by abstract data frame operations.
+ */
+var ConstraintType;
+(function (ConstraintType) {
+    /** The inferred constraints must hold for the operand at the point of the operation */
+    ConstraintType[ConstraintType["OperandPrecondition"] = 0] = "OperandPrecondition";
+    /** The inferred constraints are applied to the operand during the operation */
+    ConstraintType[ConstraintType["OperandModification"] = 1] = "OperandModification";
+    /** The inferred constraints must hold for the returned result of the operation */
+    ConstraintType[ConstraintType["ResultPostcondition"] = 2] = "ResultPostcondition";
+})(ConstraintType || (exports.ConstraintType = ConstraintType = {}));
+/**
+ * Mapper for defining the abstract data frame operations and mapping them to semantics applier functions,
+ * including information about the type of the resulting constraints that are inferred by the operation.
+ */
+const DataFrameSemanticsMapper = {
+    'create': { apply: applyCreateSemantics, type: ConstraintType.ResultPostcondition },
+    'read': { apply: applyReadSemantics, type: ConstraintType.ResultPostcondition },
+    'accessCols': { apply: applyAccessColsSemantics, type: ConstraintType.OperandPrecondition },
+    'accessRows': { apply: applyAccessRowsSemantics, type: ConstraintType.OperandPrecondition },
+    'assignCols': { apply: applyAssignColsSemantics, type: ConstraintType.OperandModification },
+    'assignRows': { apply: applyAssignRowsSemantics, type: ConstraintType.OperandModification },
+    'setColNames': { apply: applySetColNamesSemantics, type: ConstraintType.OperandModification },
+    'addCols': { apply: applyAddColsSemantics, type: ConstraintType.ResultPostcondition },
+    'addRows': { apply: applyAddRowsSemantics, type: ConstraintType.ResultPostcondition },
+    'removeCols': { apply: applyRemoveColsSemantics, type: ConstraintType.ResultPostcondition },
+    'removeRows': { apply: applyRemoveRowsSemantics, type: ConstraintType.ResultPostcondition },
+    'concatCols': { apply: applyConcatColsSemantics, type: ConstraintType.ResultPostcondition },
+    'concatRows': { apply: applyConcatRowsSemantics, type: ConstraintType.ResultPostcondition },
+    'subsetCols': { apply: applySubsetColsSemantics, type: ConstraintType.ResultPostcondition },
+    'subsetRows': { apply: applySubsetRowsSemantics, type: ConstraintType.ResultPostcondition },
+    'filterRows': { apply: applyFilterRowsSemantics, type: ConstraintType.ResultPostcondition },
+    'mutateCols': { apply: applyMutateColsSemantics, type: ConstraintType.ResultPostcondition },
+    'groupBy': { apply: applyGroupBySemantics, type: ConstraintType.ResultPostcondition },
+    'summarize': { apply: applySummarizeSemantics, type: ConstraintType.ResultPostcondition },
+    'join': { apply: applyJoinSemantics, type: ConstraintType.ResultPostcondition },
+    'unknown': { apply: applyUnknownSemantics, type: ConstraintType.ResultPostcondition },
+    'identity': { apply: applyIdentitySemantics, type: ConstraintType.ResultPostcondition }
+};
+/** The names of all abstract data frame operations */
+exports.DataFrameOperationNames = Object.keys(DataFrameSemanticsMapper);
+/**
+ * Applies the abstract semantics of an abstract data frame operation with respect to the data frame shape domain.
+ * This expects that all arguments have already been sanitized according to the original concrete data frame function (e.g. by replacing duplicate/invalid column names).
+ *
+ * @param operation - The name of the abstract operation to apply the semantics of
+ * @param value     - The abstract data frame shape of the operand of the abstract operation
+ * @param args      - The arguments for applying the abstract semantics of the abstract operation
+ * @param options   - The optional additional options of the abstract operation
+ * @returns The resulting new data frame shape constraints.
+ * The semantic type of the resulting constraints depends on the {@link ConstraintType} of the abstract operation.
+ */
+function applyDataFrameSemantics(operation, value, args, options) {
+    const applier = DataFrameSemanticsMapper[operation];
+    return applier.apply(value, args, options);
+}
+/**
+ * Gets the default resulting constraint type for an abstract data frame operation.
+ */
+function getConstraintType(operation) {
+    return DataFrameSemanticsMapper[operation].type;
+}
+function applyCreateSemantics(value, { colnames, rows }) {
+    const cols = colnames?.length;
+    return {
+        colnames: colnames?.every(assert_1.isNotUndefined) ? colnames : domain_1.ColNamesTop,
+        cols: cols !== undefined ? [cols, cols] : domain_1.IntervalTop,
+        rows: Array.isArray(rows) ? rows : typeof rows === 'number' ? [rows, rows] : domain_1.IntervalTop
+    };
+}
+function applyReadSemantics(value, { colnames, rows }) {
+    return applyCreateSemantics(value, { colnames, rows });
+}
+function applyAccessColsSemantics(value, { columns }) {
+    if (columns?.every(col => typeof col === 'string')) {
+        return {
+            ...value,
+            colnames: (0, domain_1.joinColNames)(value.colnames, columns)
+        };
+    }
+    else if (columns?.every(col => typeof col === 'number')) {
+        return {
+            ...value,
+            cols: columns.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.cols)
+        };
+    }
+    return value;
+}
+function applyAccessRowsSemantics(value, { rows }) {
+    if (rows !== undefined) {
+        return {
+            ...value,
+            rows: rows.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.rows)
+        };
+    }
+    return value;
+}
+function applyAssignColsSemantics(value, { columns }) {
+    if (columns?.every(col => typeof col === 'string')) {
+        const cols = columns.length;
+        return {
+            ...value,
+            colnames: (0, domain_1.joinColNames)(value.colnames, columns),
+            cols: (0, domain_1.maxInterval)((0, domain_1.addInterval)(value.cols, [0, cols]), [cols, cols])
+        };
+    }
+    else if (columns?.every(col => typeof col === 'number')) {
+        return {
+            ...value,
+            colnames: domain_1.ColNamesTop,
+            cols: columns.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.cols)
+        };
+    }
+    return {
+        ...value,
+        colnames: domain_1.ColNamesTop,
+        cols: (0, domain_1.extendIntervalToInfinity)(value.cols)
+    };
+}
+function applyAssignRowsSemantics(value, { rows }) {
+    if (rows !== undefined) {
+        return {
+            ...value,
+            rows: rows.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.rows)
+        };
+    }
+    return {
+        ...value,
+        rows: (0, domain_1.extendIntervalToInfinity)(value.rows)
+    };
+}
+function applySetColNamesSemantics(value, { colnames }, options) {
+    if (options?.partial) {
+        return {
+            ...value,
+            colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop,
+        };
+    }
+    const cols = colnames?.length;
+    const allColNames = value.cols !== domain_1.IntervalBottom && cols !== undefined && cols >= value.cols[1];
+    return {
+        ...value,
+        colnames: allColNames && colnames?.every(assert_1.isNotUndefined) ? colnames : domain_1.ColNamesTop,
+    };
+}
+function applyAddColsSemantics(value, { colnames }) {
+    const cols = colnames?.length;
+    return {
+        ...value,
+        colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop,
+        cols: cols !== undefined ? (0, domain_1.addInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToInfinity)(value.cols)
+    };
+}
+function applyAddRowsSemantics(value, { rows }) {
+    return {
+        ...value,
+        rows: rows !== undefined ? (0, domain_1.addInterval)(value.rows, [rows, rows]) : (0, domain_1.extendIntervalToInfinity)(value.rows)
+    };
+}
+function applyRemoveColsSemantics(value, { colnames }, options) {
+    const cols = colnames?.length;
+    if (options?.maybe) {
+        return {
+            ...value,
+            colnames: colnames !== undefined ? (0, domain_1.subtractColNames)(value.colnames, colnames.filter(assert_1.isNotUndefined)) : value.colnames,
+            cols: cols !== undefined ? (0, domain_1.subtractInterval)(value.cols, [cols, 0]) : (0, domain_1.extendIntervalToZero)(value.cols)
+        };
+    }
+    return {
+        ...value,
+        colnames: colnames !== undefined ? (0, domain_1.subtractColNames)(value.colnames, colnames.filter(assert_1.isNotUndefined)) : value.colnames,
+        cols: cols !== undefined ? (0, domain_1.subtractInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToZero)(value.cols)
+    };
+}
+function applyRemoveRowsSemantics(value, { rows }, options) {
+    if (options?.maybe) {
+        return {
+            ...value,
+            cols: rows !== undefined ? (0, domain_1.subtractInterval)(value.cols, [rows, 0]) : (0, domain_1.extendIntervalToZero)(value.cols)
+        };
+    }
+    return {
+        ...value,
+        rows: rows !== undefined ? (0, domain_1.subtractInterval)(value.rows, [rows, rows]) : (0, domain_1.extendIntervalToZero)(value.rows)
+    };
+}
+function applyConcatColsSemantics(value, { other }) {
+    return {
+        ...value,
+        colnames: (0, domain_1.joinColNames)(value.colnames, other.colnames),
+        cols: (0, domain_1.addInterval)(value.cols, other.cols)
+    };
+}
+function applyConcatRowsSemantics(value, { other }) {
+    if (value.cols !== domain_1.IntervalBottom && value.cols[0] === 0) {
+        return {
+            ...value,
+            colnames: (0, domain_1.joinColNames)(value.colnames, other.colnames),
+            cols: (0, domain_1.joinInterval)(value.cols, other.cols),
+            rows: (0, domain_1.addInterval)(value.rows, other.rows)
+        };
+    }
+    return {
+        ...value,
+        rows: (0, domain_1.addInterval)(value.rows, other.rows)
+    };
+}
+function applySubsetColsSemantics(value, { colnames }, options) {
+    const cols = colnames?.length;
+    if (options?.duplicateCols) {
+        return {
+            ...value,
+            colnames: domain_1.ColNamesTop,
+            cols: cols !== undefined ? [cols, cols] : domain_1.IntervalTop
+        };
+    }
+    else if (options?.renamedCols) {
+        return {
+            ...value,
+            colnames: domain_1.ColNamesTop,
+            cols: cols !== undefined ? (0, domain_1.minInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToZero)(value.cols)
+        };
+    }
+    return {
+        ...value,
+        colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.meetColNames)(value.colnames, colnames) : value.colnames,
+        cols: cols !== undefined ? (0, domain_1.minInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToZero)(value.cols)
+    };
+}
+function applySubsetRowsSemantics(value, { rows }, options) {
+    if (options?.duplicateRows) {
+        return {
+            ...value,
+            rows: rows !== undefined ? [rows, rows] : domain_1.IntervalTop
+        };
+    }
+    return {
+        ...value,
+        rows: rows !== undefined ? (0, domain_1.minInterval)(value.rows, [rows, rows]) : (0, domain_1.extendIntervalToZero)(value.rows)
+    };
+}
+function applyFilterRowsSemantics(value, { condition }) {
+    return {
+        ...value,
+        rows: condition ? value.rows : condition === false ? [0, 0] : (0, domain_1.extendIntervalToZero)(value.rows)
+    };
+}
+function applyMutateColsSemantics(value, { colnames }) {
+    const cols = colnames?.length;
+    return {
+        ...value,
+        colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop,
+        cols: cols !== undefined ? (0, domain_1.maxInterval)((0, domain_1.addInterval)(value.cols, [0, cols]), [cols, cols]) : (0, domain_1.extendIntervalToInfinity)(value.cols)
+    };
+}
+function applyGroupBySemantics(value, { by }, options) {
+    if (options?.mutatedCols) {
+        return {
+            ...value,
+            colnames: by.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, by) : domain_1.ColNamesTop,
+            cols: (0, domain_1.addInterval)(value.cols, [0, by.length])
+        };
+    }
+    // Group by only marks columns as groups but does not change the shape itself
+    return value;
+}
+function applySummarizeSemantics(value, { colnames }) {
+    const cols = colnames?.length;
+    return {
+        ...value,
+        colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop,
+        cols: cols !== undefined ? (0, domain_1.minInterval)((0, domain_1.addInterval)(value.cols, [0, cols]), [cols, Infinity]) : (0, domain_1.extendIntervalToInfinity)(value.rows),
+        rows: (0, domain_1.maxInterval)((0, domain_1.minInterval)(value.rows, [1, Infinity]), [0, 1])
+    };
+}
+function applyJoinSemantics(value, { other, by }, options) {
+    // Merge two intervals by creating the maximum of the lower bounds and adding the upper bounds
+    const mergeInterval = (interval1, interval2) => {
+        if (interval1 === domain_1.IntervalBottom || interval2 === domain_1.IntervalBottom) {
+            return domain_1.IntervalBottom;
+        }
+        else {
+            return [Math.max(interval1[0], interval2[0]), interval1[1] + interval2[1]];
+        }
+    };
+    // Creating the Cartesian product of two intervals by keeping the lower bound and multiplying the upper bounds
+    const productInterval = (lower, interval1, interval2) => {
+        if (lower === domain_1.IntervalBottom || interval1 === domain_1.IntervalBottom || interval2 === domain_1.IntervalBottom) {
+            return domain_1.IntervalBottom;
+        }
+        else {
+            return [lower[0], interval1[1] * interval2[1]];
+        }
+    };
+    const commonCols = (0, domain_1.meetColNames)(value.colnames, other.colnames);
+    let duplicateCols; // whether columns may be renamed due to occurrence in both data frames
+    let productRows; // whether the resulting rows may be a Cartesian product of the rows of the data frames
+    if (options?.natural) {
+        duplicateCols = false;
+        productRows = commonCols !== domain_1.ColNamesTop && commonCols.length === 0;
+    }
+    else if (by === undefined) {
+        duplicateCols = true;
+        productRows = true;
+    }
+    else if (by.length === 0) {
+        duplicateCols = commonCols === domain_1.ColNamesTop || commonCols.length > 0;
+        productRows = true;
+    }
+    else if (by.every(assert_1.isNotUndefined)) {
+        const remainingCols = (0, domain_1.subtractColNames)(commonCols, by);
+        duplicateCols = remainingCols === domain_1.ColNamesTop || remainingCols.length > 0;
+        productRows = false;
+    }
+    else {
+        duplicateCols = true;
+        productRows = false;
+    }
+    const joinType = options?.join ?? 'inner';
+    let rows;
+    switch (joinType) {
+        case 'inner':
+            rows = (0, domain_1.extendIntervalToZero)((0, domain_1.minInterval)(value.rows, other.rows));
+            break;
+        case 'left':
+            rows = value.rows;
+            break;
+        case 'right':
+            rows = other.rows;
+            break;
+        case 'full':
+            rows = mergeInterval(value.rows, other.rows);
+            break;
+        default:
+            (0, assert_1.assertUnreachable)(joinType);
+    }
+    const byCols = by?.length;
+    return {
+        ...value,
+        colnames: duplicateCols ? domain_1.ColNamesTop : (0, domain_1.joinColNames)(value.colnames, other.colnames),
+        cols: byCols !== undefined ? (0, domain_1.subtractInterval)((0, domain_1.addInterval)(value.cols, other.cols), [byCols, byCols]) : mergeInterval(value.cols, other.cols),
+        rows: productRows ? productInterval(rows, value.rows, other.rows) : rows
+    };
+}
+function applyIdentitySemantics(value,
+// eslint-disable-next-line @typescript-eslint/no-empty-object-type
+_args) {
+    return value;
+}
+function applyUnknownSemantics(_value,
+// eslint-disable-next-line @typescript-eslint/no-empty-object-type
+_args) {
+    return domain_1.DataFrameTop;
+}
+//# sourceMappingURL=semantics.js.map

package/abstract-interpretation/data-frame/shape-inference.d.ts ADDED Viewed

@@ -0,0 +1,38 @@
+import type { FlowrConfigOptions } from '../../config';
+import { type ControlFlowInformation } from '../../control-flow/control-flow-graph';
+import type { DataflowGraph } from '../../dataflow/graph/graph';
+import type { RNode } from '../../r-bridge/lang-4.x/ast/model/model';
+import type { NormalizedAst, ParentInformation } from '../../r-bridge/lang-4.x/ast/model/processing/decorate';
+import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
+import { type AbstractInterpretationInfo } from './absint-info';
+import { type DataFrameDomain, type DataFrameStateDomain } from './domain';
+/**
+ * Infers the shape of data frames by performing abstract interpretation using the control flow graph of a program.
+ * This directly attaches the inferred data frames shapes to the AST (see {@link AbstractInterpretationInfo}).
+ *
+ * @param cfinfo - The control flow information containing the control flow graph
+ * @param dfg    - The data flow graph to resolve variable origins and function arguments
+ * @param ast    - The abstract syntax tree to resolve node IDs to AST nodes
+ * @param config - The flowR configuration to use for the shape inference
+ * @returns The abstract data frame state at the exit node of the control flow graph (see {@link DataFrameStateDomain}).
+ * The abstract data frame states for all other nodes are attached to the AST.
+ */
+export declare function inferDataFrameShapes(cfinfo: ControlFlowInformation, dfg: DataflowGraph, ast: NormalizedAst<ParentInformation & AbstractInterpretationInfo>, config: FlowrConfigOptions): DataFrameStateDomain;
+/**
+ * Resolves the abstract data frame shape of a node in the AST.
+ * This requires that the data frame shape inference has been executed before using {@link inferDataFrameShapes}.
+ *
+ * @param id     - The node or node ID to get the data frame shape for
+ * @param dfg    - The data flow graph used to resolve the data frame shape
+ * @param domain - An optional abstract data frame state domain used to resolve the data frame shape (defaults to the state at the requested node)
+ * @returns The abstract data frame shape of the node, or `undefined` if no data frame shape was inferred for the node
+ */
+export declare function resolveIdToDataFrameShape(id: RNode<ParentInformation & AbstractInterpretationInfo> | NodeId | undefined, dfg: DataflowGraph | undefined, domain?: DataFrameStateDomain): DataFrameDomain | undefined;
+/**
+ * Gets all origins of a variable in the data flow graph that have already been visited.
+ *
+ * @param node - The node to get the origins for
+ * @param dfg  - The data flow graph for resolving the origins
+ * @returns The origins nodes of the variable
+ */
+export declare function getVariableOrigins(node: NodeId, dfg: DataflowGraph): RNode<ParentInformation & AbstractInterpretationInfo>[];

package/abstract-interpretation/data-frame/shape-inference.js ADDED Viewed

@@ -0,0 +1,111 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.inferDataFrameShapes = inferDataFrameShapes;
+exports.resolveIdToDataFrameShape = resolveIdToDataFrameShape;
+exports.getVariableOrigins = getVariableOrigins;
+const control_flow_graph_1 = require("../../control-flow/control-flow-graph");
+const vertex_1 = require("../../dataflow/graph/vertex");
+const dfg_get_origin_1 = require("../../dataflow/origin/dfg-get-origin");
+const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
+const type_1 = require("../../r-bridge/lang-4.x/ast/model/type");
+const assert_1 = require("../../util/assert");
+const absint_info_1 = require("./absint-info");
+const absint_visitor_1 = require("./absint-visitor");
+const domain_1 = require("./domain");
+/**
+ * Infers the shape of data frames by performing abstract interpretation using the control flow graph of a program.
+ * This directly attaches the inferred data frames shapes to the AST (see {@link AbstractInterpretationInfo}).
+ *
+ * @param cfinfo - The control flow information containing the control flow graph
+ * @param dfg    - The data flow graph to resolve variable origins and function arguments
+ * @param ast    - The abstract syntax tree to resolve node IDs to AST nodes
+ * @param config - The flowR configuration to use for the shape inference
+ * @returns The abstract data frame state at the exit node of the control flow graph (see {@link DataFrameStateDomain}).
+ * The abstract data frame states for all other nodes are attached to the AST.
+ */
+function inferDataFrameShapes(cfinfo, dfg, ast, config) {
+    const visitor = new absint_visitor_1.DataFrameShapeInferenceVisitor({ controlFlow: cfinfo, dfg: dfg, normalizedAst: ast, flowrConfig: config });
+    visitor.start();
+    const exitPoints = cfinfo.exitPoints.map(id => cfinfo.graph.getVertex(id)).filter(assert_1.isNotUndefined);
+    const exitNodes = exitPoints.map(vertex => ast.idMap.get((0, control_flow_graph_1.getVertexRootId)(vertex))).filter(assert_1.isNotUndefined);
+    const result = exitNodes.map(node => node.info.dataFrame?.domain ?? new Map());
+    return (0, domain_1.joinDataFrameStates)(...result);
+}
+/**
+ * Resolves the abstract data frame shape of a node in the AST.
+ * This requires that the data frame shape inference has been executed before using {@link inferDataFrameShapes}.
+ *
+ * @param id     - The node or node ID to get the data frame shape for
+ * @param dfg    - The data flow graph used to resolve the data frame shape
+ * @param domain - An optional abstract data frame state domain used to resolve the data frame shape (defaults to the state at the requested node)
+ * @returns The abstract data frame shape of the node, or `undefined` if no data frame shape was inferred for the node
+ */
+function resolveIdToDataFrameShape(id, dfg, domain) {
+    const node = id === undefined || typeof id === 'object' ? id : dfg?.idMap?.get(id);
+    domain ??= node?.info.dataFrame?.domain;
+    if (dfg === undefined || node === undefined || domain === undefined) {
+        return;
+    }
+    else if (domain.has(node.info.id)) {
+        return domain.get(node.info.id);
+    }
+    const vertex = dfg.getVertex(node.info.id);
+    const call = vertex?.tag === vertex_1.VertexType.FunctionCall ? vertex : undefined;
+    const origins = Array.isArray(call?.origin) ? call.origin : [];
+    if (node.type === type_1.RType.Symbol) {
+        const values = getVariableOrigins(node.info.id, dfg).map(origin => domain.get(origin.info.id));
+        if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
+            return (0, domain_1.joinDataFrames)(...values);
+        }
+    }
+    else if (node.type === type_1.RType.Argument && node.value !== undefined) {
+        return resolveIdToDataFrameShape(node.value, dfg, domain);
+    }
+    else if (node.type === type_1.RType.ExpressionList && node.children.length > 0) {
+        return resolveIdToDataFrameShape(node.children[node.children.length - 1], dfg, domain);
+    }
+    else if (node.type === type_1.RType.Pipe) {
+        return resolveIdToDataFrameShape(node.rhs, dfg, domain);
+    }
+    else if (origins.includes('builtin:pipe')) {
+        if (node.type === type_1.RType.BinaryOp) {
+            return resolveIdToDataFrameShape(node.rhs, dfg, domain);
+        }
+        else if (call?.args.length === 2 && call?.args[1] !== r_function_call_1.EmptyArgument) {
+            return resolveIdToDataFrameShape(call.args[1].nodeId, dfg, domain);
+        }
+    }
+    else if (node.type === type_1.RType.IfThenElse) {
+        if (node.otherwise !== undefined) {
+            const values = [node.then, node.otherwise].map(entry => resolveIdToDataFrameShape(entry, dfg, domain));
+            if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
+                return (0, domain_1.joinDataFrames)(...values);
+            }
+        }
+    }
+    else if (origins.includes('builtin:if-then-else') && call?.args.every(arg => arg !== r_function_call_1.EmptyArgument)) {
+        if (call.args.length === 3) {
+            const values = call.args.slice(1, 3).map(entry => resolveIdToDataFrameShape(entry.nodeId, dfg, domain));
+            if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
+                return (0, domain_1.joinDataFrames)(...values);
+            }
+        }
+    }
+}
+/**
+ * Gets all origins of a variable in the data flow graph that have already been visited.
+ *
+ * @param node - The node to get the origins for
+ * @param dfg  - The data flow graph for resolving the origins
+ * @returns The origins nodes of the variable
+ */
+function getVariableOrigins(node, dfg) {
+    // get each variable origin that has already been visited and whose assignment has already been processed
+    return (0, dfg_get_origin_1.getOriginInDfg)(dfg, node)
+        ?.filter(origin => origin.type === 0 /* OriginType.ReadVariableOrigin */)
+        .map(entry => dfg.idMap?.get(entry.id))
+        .filter(assert_1.isNotUndefined)
+        .filter(origin => origin.info.dataFrame?.domain !== undefined)
+        .filter(origin => !(0, absint_info_1.hasDataFrameInfoMarker)(origin, absint_info_1.DataFrameInfoMarker.Unassigned)) ?? [];
+}
+//# sourceMappingURL=shape-inference.js.map

package/benchmark/slicer.d.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import type { MergeableRecord } from '../util/objects';
 import type { DataflowInformation } from '../dataflow/info';
 import type { SliceResult } from '../slicing/static/slicer-types';
 import type { ReconstructionResult } from '../reconstruct/reconstruct';
-import type { PerSliceStats, SlicerStats } from './stats/stats';
+import type { PerSliceStats, SlicerStats, SlicerStatsDfShape } from './stats/stats';
 import type { NormalizedAst } from '../r-bridge/lang-4.x/ast/model/processing/decorate';
 import type { SlicingCriteria } from '../slicing/criterion/parse';
 import type { RParseRequestFromFile, RParseRequestFromText } from '../r-bridge/retriever';
@@ -57,10 +57,12 @@ export declare class BenchmarkSlicer {
     private readonly perSliceMeasurements;
     private readonly deltas;
     private readonly parserName;
+    private config;
     private stats;
     private loadedXml;
     private dataflow;
     private normalizedAst;
+    private controlFlow;
     private totalStopwatch;
     private finished;
     private executor;
@@ -95,8 +97,20 @@ export declare class BenchmarkSlicer {
      * @returns The per slice stats retrieved for this slicing criteria
      */
     slice(...slicingCriteria: SlicingCriteria): Promise<BenchmarkSingleSliceStats>;
+    /**
+     * Extract the control flow graph using {@link extractCFG}
+     */
+    extractCFG(): void;
+    /**
+     * Infer the shape of data frames using abstract interpretation with {@link inferDataFrameShapes}
+     *
+     * @returns The statistics of the data frame shape inference
+     */
+    inferDataFrameShapes(): SlicerStatsDfShape;
+    private getInferredSize;
     /** Bridging the gap between the new internal and the old names for the benchmarking */
     private measureCommonStep;
+    private measureSimpleStep;
     private measureSliceStep;
     private guardActive;
     /**