npm - @eagleoutice/flowr - Versions diffs - 2.2.15 → 2.3.0 - Mend

@eagleoutice/flowr 2.2.15 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (255) hide show

package/README.md +226 -6
package/abstract-interpretation/data-frame/absint-info.d.ts +109 -0
package/abstract-interpretation/data-frame/absint-info.js +31 -0
package/abstract-interpretation/data-frame/absint-visitor.d.ts +59 -0
package/abstract-interpretation/data-frame/absint-visitor.js +173 -0
package/abstract-interpretation/data-frame/domain.d.ts +107 -0
package/abstract-interpretation/data-frame/domain.js +315 -0
package/abstract-interpretation/data-frame/mappers/access-mapper.d.ts +17 -0
package/abstract-interpretation/data-frame/mappers/access-mapper.js +166 -0
package/abstract-interpretation/data-frame/mappers/arguments.d.ts +117 -0
package/abstract-interpretation/data-frame/mappers/arguments.js +188 -0
package/abstract-interpretation/data-frame/mappers/assignment-mapper.d.ts +20 -0
package/abstract-interpretation/data-frame/mappers/assignment-mapper.js +34 -0
package/abstract-interpretation/data-frame/mappers/function-mapper.d.ts +261 -0
package/abstract-interpretation/data-frame/mappers/function-mapper.js +1219 -0
package/abstract-interpretation/data-frame/mappers/replacement-mapper.d.ts +12 -0
package/abstract-interpretation/data-frame/mappers/replacement-mapper.js +206 -0
package/abstract-interpretation/data-frame/resolve-args.d.ts +42 -0
package/abstract-interpretation/data-frame/resolve-args.js +118 -0
package/abstract-interpretation/data-frame/semantics.d.ts +213 -0
package/abstract-interpretation/data-frame/semantics.js +366 -0
package/abstract-interpretation/data-frame/shape-inference.d.ts +38 -0
package/abstract-interpretation/data-frame/shape-inference.js +117 -0
package/benchmark/slicer.d.ts +18 -2
package/benchmark/slicer.js +143 -5
package/benchmark/stats/print.js +123 -45
package/benchmark/stats/size-of.d.ts +7 -0
package/benchmark/stats/size-of.js +1 -0
package/benchmark/stats/stats.d.ts +30 -1
package/benchmark/stats/stats.js +4 -2
package/benchmark/summarizer/data.d.ts +33 -2
package/benchmark/summarizer/first-phase/input.js +5 -1
package/benchmark/summarizer/first-phase/process.d.ts +2 -1
package/benchmark/summarizer/first-phase/process.js +49 -3
package/benchmark/summarizer/second-phase/process.js +101 -3
package/cli/benchmark-app.d.ts +2 -0
package/cli/benchmark-app.js +5 -1
package/cli/benchmark-helper-app.d.ts +2 -0
package/cli/benchmark-helper-app.js +13 -8
package/cli/common/options.js +4 -0
package/cli/export-quads-app.js +2 -1
package/cli/flowr.js +58 -57
package/cli/repl/commands/repl-cfg.js +13 -13
package/cli/repl/commands/repl-commands.js +2 -2
package/cli/repl/commands/repl-dataflow.js +10 -10
package/cli/repl/commands/repl-execute.d.ts +2 -3
package/cli/repl/commands/repl-execute.js +4 -4
package/cli/repl/commands/repl-lineage.js +4 -4
package/cli/repl/commands/repl-main.d.ts +12 -1
package/cli/repl/commands/repl-normalize.js +6 -6
package/cli/repl/commands/repl-parse.js +2 -2
package/cli/repl/commands/repl-query.js +9 -9
package/cli/repl/commands/repl-version.js +1 -1
package/cli/repl/core.d.ts +5 -2
package/cli/repl/core.js +10 -8
package/cli/repl/server/connection.d.ts +3 -1
package/cli/repl/server/connection.js +7 -5
package/cli/repl/server/server.d.ts +3 -2
package/cli/repl/server/server.js +4 -2
package/cli/script-core/statistics-core.d.ts +2 -1
package/cli/script-core/statistics-core.js +2 -2
package/cli/script-core/statistics-helper-core.d.ts +2 -1
package/cli/script-core/statistics-helper-core.js +5 -4
package/cli/slicer-app.js +4 -2
package/cli/statistics-app.js +2 -1
package/cli/statistics-helper-app.js +2 -1
package/config.d.ts +43 -10
package/config.js +47 -43
package/control-flow/cfg-dead-code.js +45 -2
package/control-flow/cfg-simplification.d.ts +2 -0
package/control-flow/control-flow-graph.d.ts +2 -0
package/control-flow/control-flow-graph.js +8 -0
package/control-flow/dfg-cfg-guided-visitor.d.ts +5 -3
package/control-flow/dfg-cfg-guided-visitor.js +15 -4
package/control-flow/extract-cfg.d.ts +4 -2
package/control-flow/extract-cfg.js +4 -3
package/control-flow/semantic-cfg-guided-visitor.d.ts +20 -2
package/control-flow/semantic-cfg-guided-visitor.js +24 -4
package/core/pipeline-executor.d.ts +4 -1
package/core/pipeline-executor.js +6 -5
package/core/steps/all/core/10-normalize.d.ts +2 -0
package/core/steps/all/core/10-normalize.js +1 -1
package/core/steps/all/core/11-normalize-tree-sitter.d.ts +2 -1
package/core/steps/all/core/11-normalize-tree-sitter.js +2 -2
package/core/steps/all/core/20-dataflow.d.ts +2 -1
package/core/steps/all/core/20-dataflow.js +2 -2
package/core/steps/all/static-slicing/00-slice.d.ts +2 -1
package/core/steps/all/static-slicing/00-slice.js +2 -2
package/core/steps/pipeline/default-pipelines.d.ts +32 -31
package/core/steps/pipeline/default-pipelines.js +8 -8
package/core/steps/pipeline-step.d.ts +2 -1
package/dataflow/environments/built-in-config.d.ts +3 -3
package/dataflow/environments/built-in.d.ts +11 -3
package/dataflow/environments/built-in.js +5 -3
package/dataflow/environments/default-builtin-config.js +4 -2
package/dataflow/environments/define.d.ts +2 -1
package/dataflow/environments/define.js +4 -5
package/dataflow/environments/remove.d.ts +6 -0
package/dataflow/environments/remove.js +29 -0
package/dataflow/eval/resolve/alias-tracking.d.ts +7 -2
package/dataflow/eval/resolve/alias-tracking.js +11 -8
package/dataflow/eval/resolve/resolve-argument.d.ts +8 -0
package/dataflow/eval/resolve/resolve-argument.js +118 -0
package/dataflow/eval/resolve/resolve.d.ts +65 -18
package/dataflow/eval/resolve/resolve.js +144 -48
package/dataflow/eval/values/string/string-constants.d.ts +1 -1
package/dataflow/eval/values/string/string-constants.js +7 -2
package/dataflow/extractor.d.ts +2 -1
package/dataflow/extractor.js +2 -1
package/dataflow/internal/process/functions/call/built-in/built-in-access.js +5 -6
package/dataflow/internal/process/functions/call/built-in/built-in-apply.js +1 -1
package/dataflow/internal/process/functions/call/built-in/built-in-assignment.d.ts +4 -2
package/dataflow/internal/process/functions/call/built-in/built-in-assignment.js +11 -11
package/dataflow/internal/process/functions/call/built-in/built-in-eval.js +10 -11
package/dataflow/internal/process/functions/call/built-in/built-in-expression-list.js +7 -2
package/dataflow/internal/process/functions/call/built-in/built-in-for-loop.js +2 -3
package/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.js +1 -1
package/dataflow/internal/process/functions/call/built-in/built-in-list.js +2 -2
package/dataflow/internal/process/functions/call/built-in/built-in-replacement.js +2 -3
package/dataflow/internal/process/functions/call/built-in/built-in-source.d.ts +6 -3
package/dataflow/internal/process/functions/call/built-in/built-in-source.js +19 -15
package/dataflow/internal/process/functions/call/built-in/built-in-vector.js +2 -2
package/dataflow/internal/process/functions/call/built-in/built-in-while-loop.js +1 -1
package/dataflow/internal/process/functions/call/common.js +1 -1
package/dataflow/internal/process/functions/process-parameter.js +1 -1
package/dataflow/origin/dfg-get-symbol-refs.d.ts +21 -0
package/dataflow/origin/dfg-get-symbol-refs.js +50 -0
package/dataflow/processor.d.ts +5 -0
package/documentation/doc-util/doc-cfg.js +4 -3
package/documentation/doc-util/doc-code.d.ts +1 -1
package/documentation/doc-util/doc-dfg.js +3 -2
package/documentation/doc-util/doc-functions.d.ts +24 -0
package/documentation/doc-util/doc-functions.js +65 -0
package/documentation/doc-util/doc-normalized-ast.js +3 -2
package/documentation/doc-util/doc-print.d.ts +5 -0
package/documentation/doc-util/doc-print.js +36 -0
package/documentation/doc-util/doc-query.js +13 -2
package/documentation/doc-util/doc-repl.js +2 -1
package/documentation/doc-util/doc-search.js +3 -2
package/documentation/doc-util/doc-types.d.ts +28 -6
package/documentation/doc-util/doc-types.js +89 -45
package/documentation/print-cfg-wiki.js +6 -7
package/documentation/print-core-wiki.js +5 -5
package/documentation/print-dataflow-graph-wiki.js +10 -10
package/documentation/print-engines-wiki.js +1 -2
package/documentation/print-faq-wiki.js +8 -2
package/documentation/print-interface-wiki.js +12 -2
package/documentation/print-linter-issue.d.ts +1 -0
package/documentation/print-linter-issue.js +71 -0
package/documentation/print-linter-wiki.js +223 -34
package/documentation/print-linting-and-testing-wiki.js +2 -4
package/documentation/print-normalized-ast-wiki.js +3 -3
package/documentation/print-query-wiki.js +18 -2
package/documentation/print-readme.js +24 -1
package/documentation/print-search-wiki.js +1 -2
package/linter/linter-executor.d.ts +3 -1
package/linter/linter-executor.js +3 -2
package/linter/linter-format.d.ts +67 -7
package/linter/linter-format.js +12 -1
package/linter/linter-rules.d.ts +178 -16
package/linter/linter-rules.js +14 -4
package/linter/linter-tags.d.ts +80 -0
package/linter/linter-tags.js +85 -0
package/linter/rules/absolute-path.d.ts +71 -0
package/linter/rules/absolute-path.js +177 -0
package/linter/rules/dataframe-access-validation.d.ts +53 -0
package/linter/rules/dataframe-access-validation.js +116 -0
package/linter/rules/deprecated-functions.d.ts +43 -0
package/linter/rules/deprecated-functions.js +58 -0
package/linter/rules/{2-file-path-validity.d.ts → file-path-validity.d.ts} +16 -6
package/linter/rules/{2-file-path-validity.js → file-path-validity.js} +21 -13
package/linter/rules/naming-convention.d.ts +71 -0
package/linter/rules/naming-convention.js +168 -0
package/linter/rules/seeded-randomness.d.ts +65 -0
package/linter/rules/seeded-randomness.js +122 -0
package/linter/rules/unused-definition.d.ts +41 -0
package/linter/rules/unused-definition.js +105 -0
package/package.json +5 -2
package/queries/base-query-format.d.ts +2 -0
package/queries/catalog/call-context-query/call-context-query-executor.d.ts +1 -1
package/queries/catalog/call-context-query/call-context-query-executor.js +2 -2
package/queries/catalog/cluster-query/cluster-query-format.d.ts +1 -1
package/queries/catalog/config-query/config-query-executor.d.ts +1 -1
package/queries/catalog/config-query/config-query-executor.js +2 -3
package/queries/catalog/control-flow-query/control-flow-query-executor.d.ts +1 -1
package/queries/catalog/control-flow-query/control-flow-query-executor.js +2 -2
package/queries/catalog/control-flow-query/control-flow-query-format.d.ts +1 -1
package/queries/catalog/dataflow-lens-query/dataflow-lens-query-format.d.ts +1 -1
package/queries/catalog/dataflow-query/dataflow-query-format.d.ts +1 -1
package/queries/catalog/dependencies-query/dependencies-query-executor.js +4 -116
package/queries/catalog/dependencies-query/dependencies-query-format.d.ts +1 -1
package/queries/catalog/df-shape-query/df-shape-query-executor.d.ts +3 -0
package/queries/catalog/df-shape-query/df-shape-query-executor.js +46 -0
package/queries/catalog/df-shape-query/df-shape-query-format.d.ts +72 -0
package/queries/catalog/df-shape-query/df-shape-query-format.js +31 -0
package/queries/catalog/happens-before-query/happens-before-query-format.d.ts +1 -1
package/queries/catalog/id-map-query/id-map-query-format.d.ts +1 -1
package/queries/catalog/lineage-query/lineage-query-format.d.ts +1 -1
package/queries/catalog/linter-query/linter-query-executor.d.ts +1 -1
package/queries/catalog/linter-query/linter-query-executor.js +2 -2
package/queries/catalog/linter-query/linter-query-format.d.ts +1 -1
package/queries/catalog/linter-query/linter-query-format.js +16 -12
package/queries/catalog/normalized-ast-query/normalized-ast-query-format.d.ts +1 -1
package/queries/catalog/origin-query/origin-query-format.d.ts +1 -1
package/queries/catalog/project-query/project-query-format.d.ts +1 -1
package/queries/catalog/resolve-value-query/resolve-value-query-executor.d.ts +1 -1
package/queries/catalog/resolve-value-query/resolve-value-query-executor.js +2 -2
package/queries/catalog/resolve-value-query/resolve-value-query-format.d.ts +1 -1
package/queries/catalog/search-query/search-query-executor.d.ts +1 -1
package/queries/catalog/search-query/search-query-executor.js +2 -2
package/queries/catalog/search-query/search-query-format.d.ts +1 -1
package/queries/catalog/static-slice-query/static-slice-query-executor.d.ts +1 -1
package/queries/catalog/static-slice-query/static-slice-query-executor.js +2 -2
package/queries/catalog/static-slice-query/static-slice-query-format.d.ts +1 -1
package/queries/query.d.ts +76 -16
package/queries/query.js +2 -0
package/r-bridge/lang-4.x/ast/parser/json/parser.d.ts +2 -1
package/r-bridge/lang-4.x/ast/parser/json/parser.js +4 -2
package/r-bridge/lang-4.x/convert-values.js +2 -1
package/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.d.ts +3 -1
package/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.js +4 -4
package/r-bridge/lang-4.x/tree-sitter/tree-sitter-normalize.d.ts +1 -1
package/r-bridge/lang-4.x/tree-sitter/tree-sitter-normalize.js +7 -5
package/r-bridge/shell.d.ts +3 -2
package/r-bridge/shell.js +4 -5
package/search/flowr-search-builder.d.ts +6 -2
package/search/flowr-search-builder.js +7 -0
package/search/flowr-search-filters.d.ts +32 -8
package/search/flowr-search-filters.js +42 -15
package/search/flowr-search.d.ts +4 -0
package/search/search-executor/search-enrichers.d.ts +7 -3
package/search/search-executor/search-enrichers.js +29 -20
package/search/search-executor/search-generators.js +1 -1
package/search/search-executor/search-transformer.d.ts +2 -0
package/search/search-executor/search-transformer.js +10 -1
package/slicing/static/static-slicer.d.ts +1 -1
package/slicing/static/static-slicer.js +2 -3
package/statistics/statistics.d.ts +3 -1
package/statistics/statistics.js +5 -4
package/util/containers.d.ts +12 -9
package/util/containers.js +12 -9
package/util/files.d.ts +8 -2
package/util/files.js +22 -4
package/util/objects.d.ts +5 -4
package/util/r-value.d.ts +23 -0
package/util/r-value.js +113 -0
package/util/range.d.ts +5 -1
package/util/range.js +11 -3
package/util/text/strings.d.ts +6 -0
package/util/text/strings.js +35 -0
package/util/version.js +1 -1
package/linter/rules/1-deprecated-functions.d.ts +0 -34
package/linter/rules/1-deprecated-functions.js +0 -54
package/util/cfg/cfg.d.ts +0 -0
package/util/cfg/cfg.js +0 -2

package/benchmark/slicer.js CHANGED Viewed

@@ -10,6 +10,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.BenchmarkSlicer = exports.benchmarkLogger = void 0;
 const stopwatch_1 = require("./stopwatch");
 const fs_1 = __importDefault(require("fs"));
+const seedrandom_1 = __importDefault(require("seedrandom"));
 const log_1 = require("../util/log");
 const assert_1 = require("../util/assert");
 const strings_1 = require("../util/text/strings");
@@ -24,6 +25,11 @@ const tree_sitter_types_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitte
 const tree_sitter_executor_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitter-executor");
 const vertex_1 = require("../dataflow/graph/vertex");
 const arrays_1 = require("../util/collections/arrays");
+const config_1 = require("../config");
+const extract_cfg_1 = require("../control-flow/extract-cfg");
+const absint_info_1 = require("../abstract-interpretation/data-frame/absint-info");
+const domain_1 = require("../abstract-interpretation/data-frame/domain");
+const shape_inference_1 = require("../abstract-interpretation/data-frame/shape-inference");
 /**
  * The logger to be used for benchmarking as a global object.
  */
@@ -34,10 +40,12 @@ class BenchmarkSlicer {
     perSliceMeasurements = new Map();
     deltas = new Map();
     parserName;
+    config;
     stats;
     loadedXml;
     dataflow;
     normalizedAst;
+    controlFlow;
     totalStopwatch;
     finished = false;
     // Yes, this is unclean, but we know that we assign the executor during the initialization and this saves us from having to check for nullability every time
@@ -51,15 +59,16 @@ class BenchmarkSlicer {
      * Initialize the slicer on the given request.
      * Can only be called once for each instance.
      */
-    async init(request, autoSelectIf, threshold) {
+    async init(request, config, autoSelectIf, threshold) {
         (0, assert_1.guard)(this.stats === undefined, 'cannot initialize the slicer twice');
+        this.config = config;
         // we know these are in sync so we just cast to one of them
         this.parser = await this.commonMeasurements.measure('initialize R session', async () => {
             if (this.parserName === 'r-shell') {
-                return new shell_1.RShell();
+                return new shell_1.RShell((0, config_1.getEngineConfig)(config, 'r-shell'));
             }
             else {
-                await tree_sitter_executor_1.TreeSitterExecutor.initTreeSitter();
+                await tree_sitter_executor_1.TreeSitterExecutor.initTreeSitter((0, config_1.getEngineConfig)(config, 'tree-sitter'));
                 return new tree_sitter_executor_1.TreeSitterExecutor();
             }
         });
@@ -68,7 +77,7 @@ class BenchmarkSlicer {
             criterion: [],
             autoSelectIf,
             threshold,
-        });
+        }, config);
         this.loadedXml = (await this.measureCommonStep('parse', 'retrieve AST from R code')).parsed;
         this.normalizedAst = await this.measureCommonStep('normalize', 'normalize R AST');
         this.dataflow = await this.measureCommonStep('dataflow', 'produce dataflow information');
@@ -256,6 +265,112 @@ class BenchmarkSlicer {
             code: stats.reconstructedCode
         };
     }
+    /**
+     * Extract the control flow graph using {@link extractCFG}
+     */
+    extractCFG() {
+        exports.benchmarkLogger.trace('try to extract the control flow graph');
+        this.guardActive();
+        (0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined for control flow extraction');
+        (0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined for control flow extraction');
+        (0, assert_1.guard)(this.config !== undefined, 'config should be defined for control flow extraction');
+        const ast = this.normalizedAst;
+        const dfg = this.dataflow.graph;
+        const config = this.config;
+        this.controlFlow = this.measureSimpleStep('extract control flow graph', () => (0, extract_cfg_1.extractCfg)(ast, config, dfg));
+    }
+    /**
+     * Infer the shape of data frames using abstract interpretation with {@link inferDataFrameShapes}
+     *
+     * @returns The statistics of the data frame shape inference
+     */
+    inferDataFrameShapes() {
+        exports.benchmarkLogger.trace('try to infer shapes for data frames');
+        (0, assert_1.guard)(this.stats !== undefined && !this.finished, 'need to call init before, and can not do after finish!');
+        (0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined for data frame shape inference');
+        (0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined for data frame shape inference');
+        (0, assert_1.guard)(this.controlFlow !== undefined, 'controlFlow should be defined for data frame shape inference');
+        (0, assert_1.guard)(this.config !== undefined, 'config should be defined for data frame shape inference');
+        const ast = this.normalizedAst;
+        const dfg = this.dataflow.graph;
+        const cfinfo = this.controlFlow;
+        const config = this.config;
+        const stats = {
+            numberOfDataFrameFiles: 0,
+            numberOfNonDataFrameFiles: 0,
+            numberOfResultConstraints: 0,
+            numberOfResultingValues: 0,
+            numberOfResultingTop: 0,
+            numberOfResultingBottom: 0,
+            numberOfEmptyNodes: 0,
+            numberOfOperationNodes: 0,
+            numberOfValueNodes: 0,
+            sizeOfInfo: 0,
+            perNodeStats: new Map()
+        };
+        const result = this.measureSimpleStep('infer data frame shapes', () => (0, shape_inference_1.inferDataFrameShapes)(cfinfo, dfg, ast, config));
+        stats.numberOfResultConstraints = result.size;
+        for (const value of result.values()) {
+            if ((0, domain_1.equalDataFrameDomain)(value, domain_1.DataFrameTop)) {
+                stats.numberOfResultingTop++;
+            }
+            else if ((0, domain_1.equalDataFrameDomain)(value, domain_1.DataFrameBottom)) {
+                stats.numberOfResultingBottom++;
+            }
+            else {
+                stats.numberOfResultingValues++;
+            }
+        }
+        (0, visitor_1.visitAst)(this.normalizedAst.ast, (node) => {
+            if (node.info.dataFrame === undefined) {
+                return;
+            }
+            stats.sizeOfInfo += (0, size_of_1.safeSizeOf)([node.info.dataFrame]);
+            const expression = (0, absint_info_1.hasDataFrameExpressionInfo)(node) ? node.info.dataFrame : undefined;
+            const value = node.info.dataFrame.domain?.get(node.info.id);
+            // Only store per-node information for nodes representing expressions or nodes with abstract values
+            if (expression === undefined && value === undefined) {
+                stats.numberOfEmptyNodes++;
+                return;
+            }
+            const nodeStats = {
+                numberOfEntries: node.info.dataFrame?.domain?.size ?? 0
+            };
+            if (expression !== undefined) {
+                nodeStats.mappedOperations = expression.operations.map(op => op.operation);
+                stats.numberOfOperationNodes++;
+            }
+            if (value !== undefined) {
+                nodeStats.inferredColNames = value.colnames === domain_1.ColNamesTop ? 'top' : value.colnames.length;
+                nodeStats.inferredColCount = this.getInferredSize(value.cols);
+                nodeStats.inferredRowCount = this.getInferredSize(value.rows);
+                nodeStats.approxRangeColCount = value.cols === domain_1.IntervalBottom ? 0 : value.cols[1] - value.cols[0];
+                nodeStats.approxRangeRowCount = value.rows === domain_1.IntervalBottom ? 0 : value.rows[1] - value.rows[0];
+                stats.numberOfValueNodes++;
+            }
+            stats.perNodeStats.set(node.info.id, nodeStats);
+        });
+        if (stats.numberOfOperationNodes > 0) {
+            stats.numberOfDataFrameFiles = 1;
+        }
+        else {
+            stats.numberOfNonDataFrameFiles = 1;
+        }
+        this.stats.dataFrameShape = stats;
+        return stats;
+    }
+    getInferredSize(value) {
+        if ((0, domain_1.equalInterval)(value, domain_1.IntervalTop)) {
+            return 'top';
+        }
+        else if (value === domain_1.IntervalBottom) {
+            return 'bottom';
+        }
+        else if (!isFinite(value[1])) {
+            return 'infinite';
+        }
+        return Math.floor((value[0] + value[1]) / 2);
+    }
     /** Bridging the gap between the new internal and the old names for the benchmarking */
     async measureCommonStep(expectedStep, keyToMeasure) {
         const memoryInit = process.memoryUsage();
@@ -269,6 +384,18 @@ class BenchmarkSlicer {
         });
         return result;
     }
+    measureSimpleStep(keyToMeasure, measurement) {
+        const memoryInit = process.memoryUsage();
+        const result = this.commonMeasurements.measure(keyToMeasure, measurement);
+        const memoryEnd = process.memoryUsage();
+        this.deltas.set(keyToMeasure, {
+            heap: memoryEnd.heapUsed - memoryInit.heapUsed,
+            rss: memoryEnd.rss - memoryInit.rss,
+            external: memoryEnd.external - memoryInit.external,
+            buffs: memoryEnd.arrayBuffers - memoryInit.arrayBuffers
+        });
+        return result;
+    }
     async measureSliceStep(expectedStep, measure, keyToMeasure) {
         const { result } = await measure.measureAsync(keyToMeasure, () => this.executor.nextStep(expectedStep));
         return result;
@@ -301,7 +428,8 @@ class BenchmarkSlicer {
                 allCriteria = (0, arrays_1.equidistantSampling)(allCriteria, sampleCount, 'ceil');
             }
             else {
-                allCriteria.sort(() => Math.random() - 0.5);
+                const random = options.seed ? (0, seedrandom_1.default)(options.seed) : Math.random;
+                allCriteria.sort(() => random() - 0.5);
                 allCriteria.length = Math.min(allCriteria.length, sampleCount);
             }
         }
@@ -327,6 +455,8 @@ class BenchmarkSlicer {
         const retrieveTime = Number(this.stats.commonMeasurements.get('retrieve AST from R code'));
         const normalizeTime = Number(this.stats.commonMeasurements.get('normalize R AST'));
         const dataflowTime = Number(this.stats.commonMeasurements.get('produce dataflow information'));
+        const controlFlowTime = Number(this.stats.commonMeasurements.get('extract control flow graph'));
+        const dataFrameShapeTime = Number(this.stats.commonMeasurements.get('infer data frame shapes'));
         this.stats.retrieveTimePerToken = {
             raw: retrieveTime / this.stats.input.numberOfRTokens,
             normalized: retrieveTime / this.stats.input.numberOfNormalizedTokens
@@ -343,6 +473,14 @@ class BenchmarkSlicer {
             raw: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfRTokens,
             normalized: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfNormalizedTokens
         };
+        this.stats.controlFlowTimePerToken = !isNaN(controlFlowTime) ? {
+            raw: controlFlowTime / this.stats.input.numberOfRTokens,
+            normalized: controlFlowTime / this.stats.input.numberOfNormalizedTokens,
+        } : undefined;
+        this.stats.dataFrameShapeTimePerToken = !isNaN(dataFrameShapeTime) ? {
+            raw: dataFrameShapeTime / this.stats.input.numberOfRTokens,
+            normalized: dataFrameShapeTime / this.stats.input.numberOfNormalizedTokens,
+        } : undefined;
         return {
             stats: this.stats,
             parse: typeof this.loadedXml === 'string' ? this.loadedXml : JSON.stringify(this.loadedXml),

package/benchmark/stats/print.js CHANGED Viewed

@@ -84,22 +84,34 @@ function convertNumberToNiceBytes(x) {
 function stats2string(stats) {
     let result = `
 Request: ${JSON.stringify(stats.request)}
-Shell init time:              ${print(stats.commonMeasurements, 'initialize R session')}
-AST retrieval:                ${print(stats.commonMeasurements, 'retrieve AST from R code')}
-AST retrieval per token:      ${formatNanoseconds(stats.retrieveTimePerToken.normalized)}
-AST retrieval per R token:    ${formatNanoseconds(stats.retrieveTimePerToken.raw)}
-AST normalization:            ${print(stats.commonMeasurements, 'normalize R AST')}
-AST normalization per token:  ${formatNanoseconds(stats.normalizeTimePerToken.normalized)}
-AST normalization per R token:${formatNanoseconds(stats.normalizeTimePerToken.raw)}
-Dataflow creation:            ${print(stats.commonMeasurements, 'produce dataflow information')}
-Dataflow creation per token:  ${formatNanoseconds(stats.dataflowTimePerToken.normalized)}
-Dataflow creation per R token:${formatNanoseconds(stats.dataflowTimePerToken.raw)}
-Total common time per token:  ${formatNanoseconds(stats.totalCommonTimePerToken.normalized)}
-Total common time per R token:${formatNanoseconds(stats.totalCommonTimePerToken.raw)}
-Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.perSliceMeasurements.numberOfSlices !== 1 ? 's' : ''}:`;
+Shell init time:                      ${print(stats.commonMeasurements, 'initialize R session')}
+AST retrieval:                        ${print(stats.commonMeasurements, 'retrieve AST from R code')}
+AST retrieval per token:              ${formatNanoseconds(stats.retrieveTimePerToken.normalized)}
+AST retrieval per R token:            ${formatNanoseconds(stats.retrieveTimePerToken.raw)}
+AST normalization:                    ${print(stats.commonMeasurements, 'normalize R AST')}
+AST normalization per token:          ${formatNanoseconds(stats.normalizeTimePerToken.normalized)}
+AST normalization per R token:        ${formatNanoseconds(stats.normalizeTimePerToken.raw)}
+Dataflow creation:                    ${print(stats.commonMeasurements, 'produce dataflow information')}
+Dataflow creation per token:          ${formatNanoseconds(stats.dataflowTimePerToken.normalized)}
+Dataflow creation per R token:        ${formatNanoseconds(stats.dataflowTimePerToken.raw)}
+Total common time per token:          ${formatNanoseconds(stats.totalCommonTimePerToken.normalized)}
+Total common time per R token:        ${formatNanoseconds(stats.totalCommonTimePerToken.raw)}`;
+    if (stats.commonMeasurements.has('extract control flow graph') && stats.controlFlowTimePerToken !== undefined) {
+        result += `
+Control flow extraction:              ${print(stats.commonMeasurements, 'extract control flow graph')}
+Control flow extraction per token:    ${formatNanoseconds(stats.controlFlowTimePerToken.normalized)}
+Control flow extraction per R token:  ${formatNanoseconds(stats.controlFlowTimePerToken.raw)}`;
+    }
+    if (stats.commonMeasurements.has('infer data frame shapes') && stats.dataFrameShapeTimePerToken !== undefined) {
+        result += `
+Dataframe shape inference:            ${print(stats.commonMeasurements, 'infer data frame shapes')}
+Dataframe shape inference per token:  ${formatNanoseconds(stats.dataFrameShapeTimePerToken.normalized)}
+Dataframe shape inference per R token:${formatNanoseconds(stats.dataFrameShapeTimePerToken.raw)}`;
+    }
     if (stats.perSliceMeasurements.numberOfSlices > 0) {
         result += `
+Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.perSliceMeasurements.numberOfSlices !== 1 ? 's' : ''}:
   Total:                              ${printSummarizedMeasurements(stats.perSliceMeasurements, 'total')}
   Slice creation:                     ${printSummarizedMeasurements(stats.perSliceMeasurements, 'static slicing')}
   Slice creation per token in slice:  ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.sliceTimePerToken.normalized)}
@@ -110,7 +122,7 @@ Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.pe
   Total per token in slice:           ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.normalized)}
   Total per R token in slice:         ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.raw)}
   Used Slice Criteria Sizes:          ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceCriteriaSizes)}
-  Result Slice Sizes:
+  Result Slice Sizes:
     Number of lines:                     ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.lines)}
     Number of non-empty lines:           ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.nonEmptyLines)}
     Number of characters:                ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.characters)}
@@ -120,12 +132,12 @@ Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.pe
     Number of R tokens (w/o comments):   ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.tokensNoComments)}
     Normalized R tokens:                 ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokens)}
     Normalized R tokens (w/o comments):  ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokensNoComments)}
-    Number of dataflow nodes:            ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.dataflowNodes)}
-`;
+    Number of dataflow nodes:            ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.dataflowNodes)}`;
     }
-    return `${result}
-Shell close:                  ${print(stats.commonMeasurements, 'close R session')}
-Total:                        ${print(stats.commonMeasurements, 'total')}
+    result += `
+Shell close:                         ${print(stats.commonMeasurements, 'close R session')}
+Total:                               ${print(stats.commonMeasurements, 'total')}
 Input:
   Number of lines:                     ${pad(stats.input.numberOfLines)}
@@ -148,9 +160,64 @@ Dataflow:
   Number of stored Env indices:  ${pad(stats.dataflow.storedEnvIndices)}
   Number of overwritten indices: ${pad(stats.dataflow.overwrittenIndices)}
   Size of graph:                 ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`;
+    if (stats.dataFrameShape !== undefined) {
+        result += `
+Dataframe shape inference:
+  Number of resulting constraints:${pad(stats.dataFrameShape.numberOfResultConstraints)}
+  Number of operation nodes:      ${pad(stats.dataFrameShape.numberOfOperationNodes)}
+  Number of abstract value nodes: ${pad(stats.dataFrameShape.numberOfValueNodes)}
+  Number of entries per node:     ${pad(stats.dataFrameShape.numberOfEntriesPerNode.mean)}
+  Number of operations:           ${pad(stats.dataFrameShape.numberOfOperations)}
+  Number of total values:         ${pad(stats.dataFrameShape.numberOfTotalValues)}
+  Number of total top:            ${pad(stats.dataFrameShape.numberOfTotalTop)}
+  Inferred column names per node: ${pad(stats.dataFrameShape.inferredColNames.mean)}
+  Number of column names values:  ${pad(stats.dataFrameShape.numberOfColNamesValues)}
+  Number of column names Top:     ${pad(stats.dataFrameShape.numberOfColNamesTop)}
+  Inferred column count per node: ${pad(stats.dataFrameShape.inferredColCount.mean)}
+  Number of column count values:  ${pad(stats.dataFrameShape.numberOfColCountValues)}
+  Number of column count Top:     ${pad(stats.dataFrameShape.numberOfColCountTop)}
+  Number of column count infinite:${pad(stats.dataFrameShape.numberOfColCountInfinite)}
+  Inferred row count per node:    ${pad(stats.dataFrameShape.inferredRowCount.mean)}
+  Number of row count values:     ${pad(stats.dataFrameShape.numberOfRowCountValues)}
+  Number of row count Top:        ${pad(stats.dataFrameShape.numberOfRowCountTop)}
+  Number of row count infinite:   ${pad(stats.dataFrameShape.numberOfRowCountInfinite)}
+  Size of data frame shape info:  ${convertNumberToNiceBytes(stats.dataFrameShape.sizeOfInfo)}`;
+    }
+    return result;
 }
 function ultimateStats2String(stats) {
-    const slice = stats.totalSlices > 0 ? `Slice summary for:
+    let result = `
+Summarized: ${stats.totalRequests} requests and ${stats.totalSlices} slices
+Shell init time:                      ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('initialize R session'))}
+AST retrieval:                        ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('retrieve AST from R code'))}
+AST retrieval per token:              ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.normalized)}
+AST retrieval per R token:            ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.raw)}
+AST normalization:                    ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('normalize R AST'))}
+AST normalization per token:          ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.normalized)}
+AST normalization per R token:        ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.raw)}
+Dataflow creation:                    ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('produce dataflow information'))}
+Dataflow creation per token:          ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.normalized)}
+Dataflow creation per R token:        ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.raw)}
+Total common time per token:          ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.normalized)}
+Total common time per R token:        ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.raw)}`;
+    if (stats.commonMeasurements.has('extract control flow graph') && stats.controlFlowTimePerToken !== undefined) {
+        result += `
+Control flow extraction:              ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('extract control flow graph'))}
+Control flow extraction per token:    ${formatSummarizedTimeMeasure(stats.controlFlowTimePerToken.normalized)}
+Control flow extraction per R token:  ${formatSummarizedTimeMeasure(stats.controlFlowTimePerToken.raw)}`;
+    }
+    if (stats.commonMeasurements.has('infer data frame shapes') && stats.dataFrameShapeTimePerToken !== undefined) {
+        result += `
+Dataframe shape inference:            ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('infer data frame shapes'))}
+Dataframe shape inference per token:  ${formatSummarizedTimeMeasure(stats.dataFrameShapeTimePerToken.normalized)}
+Dataframe shape inference per R token:${formatSummarizedTimeMeasure(stats.dataFrameShapeTimePerToken.raw)}`;
+    }
+    // Used Slice Criteria Sizes: ${formatSummarizedMeasure(stats.perSliceMeasurements.sliceCriteriaSizes)}
+    if (stats.totalSlices > 0) {
+        result += `
+Slicing summary for ${stats.totalSlices} slice${stats.totalSlices !== 1 ? 's' : ''}:
   Total:                              ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('total'))}
   Slice creation:                     ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('static slicing'))}
   Slice creation per token in slice:  ${formatSummarizedTimeMeasure(stats.sliceTimePerToken.normalized)}
@@ -161,29 +228,14 @@ function ultimateStats2String(stats) {
   Total per token in slice:           ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.normalized)}
   Total per R token in slice:         ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.raw)}
   Failed to Re-Parse:                 ${pad(stats.failedToRepParse)}/${stats.totalSlices}
-  Times hit Threshold:                ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
+  Times hit Threshold:                ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
 ${reduction2String('Reductions', stats.reduction)}
-${reduction2String('Reductions without comments and empty lines', stats.reductionNoFluff)}` : 'No slices';
-    // Used Slice Criteria Sizes: ${formatSummarizedMeasure(stats.perSliceMeasurements.sliceCriteriaSizes)}
-    return `
-Summarized: ${stats.totalRequests} requests and ${stats.totalSlices} slices
-Shell init time:              ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('initialize R session'))}
-AST retrieval:                ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('retrieve AST from R code'))}
-AST retrieval per token:      ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.normalized)}
-AST retrieval per R token:    ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.raw)}
-AST normalization:            ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('normalize R AST'))}
-AST normalization per token:  ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.normalized)}
-AST normalization per R token:${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.raw)}
-Dataflow creation:            ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('produce dataflow information'))}
-Dataflow creation per token:  ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.normalized)}
-Dataflow creation per R token:${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.raw)}
-Total common time per token:  ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.normalized)}
-Total common time per R token:${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.raw)}
-${slice}
+${reduction2String('Reductions without comments and empty lines', stats.reductionNoFluff)}`;
+    }
+    result += `
-Shell close:                  ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('close R session'))}
-Total:                        ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('total'))}
+Shell close:                        ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('close R session'))}
+Total:                              ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('total'))}
 Input:
   Number of lines:                     ${formatSummarizedMeasure(stats.input.numberOfLines)}
@@ -205,12 +257,38 @@ Dataflow:
   Number of stored Vtx indices:  ${formatSummarizedMeasure(stats.dataflow.storedVertexIndices)}
   Number of stored Env indices:  ${formatSummarizedMeasure(stats.dataflow.storedEnvIndices)}
   Number of overwritten indices: ${formatSummarizedMeasure(stats.dataflow.overwrittenIndices)}
-  Size of graph:                 ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
-`;
+  Size of graph:                 ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}`;
+    if (stats.dataFrameShape !== undefined) {
+        result += `
+Dataframe shape inference:
+  Number of resulting constraints:${formatSummarizedMeasure(stats.dataFrameShape.numberOfResultConstraints)}
+  Number of operation nodes:      ${formatSummarizedMeasure(stats.dataFrameShape.numberOfOperationNodes)}
+  Number of abstract value nodes: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfValueNodes)}
+  Number of entries per node:     ${formatSummarizedMeasure(stats.dataFrameShape.numberOfEntriesPerNode)}
+  Number of operations:           ${formatSummarizedMeasure(stats.dataFrameShape.numberOfOperations)}
+  Number of total values:         ${formatSummarizedMeasure(stats.dataFrameShape.numberOfTotalValues)}
+  Number of total top:            ${formatSummarizedMeasure(stats.dataFrameShape.numberOfTotalTop)}
+  Inferred column names per node: ${formatSummarizedMeasure(stats.dataFrameShape.inferredColNames)}
+  Number of column names values:  ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColNamesValues)}
+  Number of column names top:     ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColNamesTop)}
+  Inferred column count per node: ${formatSummarizedMeasure(stats.dataFrameShape.inferredColCount)}
+  Number of column count exact:   ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountExact)}
+  Number of column count values:  ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountValues)}
+  Number of column count top:     ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountTop)}
+  Number of column count infinite:${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountInfinite)}
+  Inferred row count per node:    ${formatSummarizedMeasure(stats.dataFrameShape.inferredRowCount)}
+  Number of row count exact:      ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountExact)}
+  Number of row count values:     ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountValues)}
+  Number of row count top:        ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountTop)}
+  Number of row count infinite:   ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountInfinite)}
+  Size of data frame shape info:  ${formatSummarizedMeasure(stats.dataFrameShape.sizeOfInfo, convertNumberToNiceBytes)}`;
+    }
+    return result;
 }
 function reduction2String(title, reduction) {
     return `
-  ${title} (reduced by x%):
+  ${title} (reduced by x%):
     Number of lines:                     ${formatSummarizedMeasure(reduction.numberOfLines, asPercentage)}
     Number of lines no auto:             ${formatSummarizedMeasure(reduction.numberOfLinesNoAutoSelection, asPercentage)}
     Number of characters:                ${formatSummarizedMeasure(reduction.numberOfCharacters, asPercentage)}

package/benchmark/stats/size-of.d.ts CHANGED Viewed

@@ -1,3 +1,10 @@
 import type { DataflowGraph } from '../../dataflow/graph/graph';
 /** Returns the size of the given df graph in bytes (without sharing in-memory) */
 export declare function getSizeOfDfGraph(df: DataflowGraph): number;
+/**
+ * Calculates the size of an array in bytes.
+ *
+ * @param array - The array to calculate the size of.
+ * @returns The size of the array in bytes.
+ */
+export declare function safeSizeOf<T>(array: T[]): number;

package/benchmark/stats/size-of.js CHANGED Viewed

@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 };
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.getSizeOfDfGraph = getSizeOfDfGraph;
+exports.safeSizeOf = safeSizeOf;
 const environment_1 = require("../../dataflow/environments/environment");
 const vertex_1 = require("../../dataflow/graph/vertex");
 const identifier_1 = require("../../dataflow/environments/identifier");

package/benchmark/stats/stats.d.ts CHANGED Viewed

@@ -4,7 +4,10 @@ import type { ReconstructionResult } from '../../reconstruct/reconstruct';
 import type { RParseRequestFromFile, RParseRequestFromText } from '../../r-bridge/retriever';
 import type { TimePerToken } from '../summarizer/data';
 import type { MergeableRecord } from '../../util/objects';
-export declare const CommonSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total"];
+import type { DataFrameOperationName } from '../../abstract-interpretation/data-frame/semantics';
+export declare const RequiredSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total"];
+export declare const OptionalSlicerMeasurements: readonly ["extract control flow graph", "infer data frame shapes"];
+export declare const CommonSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total", "extract control flow graph", "infer data frame shapes"];
 export type CommonSlicerMeasurements = typeof CommonSlicerMeasurements[number];
 export declare const PerSliceMeasurements: readonly ["static slicing", "reconstruct code", "total"];
 export type PerSliceMeasurements = typeof PerSliceMeasurements[number];
@@ -41,6 +44,29 @@ export interface SlicerStatsDataflow<T = number> {
     storedEnvIndices: T;
     overwrittenIndices: T;
 }
+export interface SlicerStatsDfShape<T = number> {
+    numberOfDataFrameFiles: T extends number ? 0 | 1 : number;
+    numberOfNonDataFrameFiles: T extends number ? 0 | 1 : number;
+    numberOfResultConstraints: T;
+    numberOfResultingValues: T;
+    numberOfResultingTop: T;
+    numberOfResultingBottom: T;
+    numberOfEmptyNodes: T;
+    numberOfOperationNodes: T;
+    numberOfValueNodes: T;
+    sizeOfInfo: T;
+    perNodeStats: Map<NodeId, PerNodeStatsDfShape<T>>;
+}
+export interface PerNodeStatsDfShape<T = number> {
+    numberOfEntries: T;
+    mappedOperations?: DataFrameOperationName[];
+    inferredColNames?: T | 'top';
+    inferredColCount?: T | 'bottom' | 'infinite' | 'top';
+    inferredRowCount?: T | 'bottom' | 'infinite' | 'top';
+    /** difference between upper and lower bound of interval domain (to estimate approximation) */
+    approxRangeColCount?: T;
+    approxRangeRowCount?: T;
+}
 /**
  * Please note, that these measurement can be negative as there is no guarantee that the memory usage will increase
  * due to, e.g., garbage collection.
@@ -61,8 +87,11 @@ export interface SlicerStats {
     request: RParseRequestFromFile | RParseRequestFromText;
     input: SlicerStatsInput;
     dataflow: SlicerStatsDataflow;
+    dataFrameShape?: SlicerStatsDfShape;
     retrieveTimePerToken: TimePerToken<number>;
     normalizeTimePerToken: TimePerToken<number>;
     dataflowTimePerToken: TimePerToken<number>;
     totalCommonTimePerToken: TimePerToken<number>;
+    controlFlowTimePerToken?: TimePerToken<number>;
+    dataFrameShapeTimePerToken?: TimePerToken<number>;
 }

package/benchmark/stats/stats.js CHANGED Viewed

@@ -1,6 +1,8 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.PerSliceMeasurements = exports.CommonSlicerMeasurements = void 0;
-exports.CommonSlicerMeasurements = ['initialize R session', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'close R session', 'total'];
+exports.PerSliceMeasurements = exports.CommonSlicerMeasurements = exports.OptionalSlicerMeasurements = exports.RequiredSlicerMeasurements = void 0;
+exports.RequiredSlicerMeasurements = ['initialize R session', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'close R session', 'total'];
+exports.OptionalSlicerMeasurements = ['extract control flow graph', 'infer data frame shapes'];
+exports.CommonSlicerMeasurements = [...exports.RequiredSlicerMeasurements, ...exports.OptionalSlicerMeasurements];
 exports.PerSliceMeasurements = ['static slicing', 'reconstruct code', 'total'];
 //# sourceMappingURL=stats.js.map

package/benchmark/summarizer/data.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
+import type { DataFrameOperationName } from '../../abstract-interpretation/data-frame/semantics';
 import type { SummarizedMeasurement } from '../../util/summarizer';
-import type { CommonSlicerMeasurements, PerSliceMeasurements, SlicerStats, SlicerStatsDataflow, SlicerStatsInput } from '../stats/stats';
+import type { CommonSlicerMeasurements, PerSliceMeasurements, SlicerStats, SlicerStatsDfShape, SlicerStatsDataflow, SlicerStatsInput } from '../stats/stats';
 export interface SliceSizeCollection {
     lines: number[];
     nonEmptyLines: number[];
@@ -21,7 +22,8 @@ export interface SliceSizeCollection {
  */
 export type SummarizedSlicerStats = {
     perSliceMeasurements: SummarizedPerSliceStats;
-} & Omit<SlicerStats, 'perSliceMeasurements'>;
+    dataFrameShape?: SummarizedDfShapeStats;
+} & Omit<SlicerStats, 'perSliceMeasurements' | 'dataFrameShape'>;
 export interface Reduction<T = number> {
     numberOfLines: T;
     numberOfLinesNoAutoSelection: T;
@@ -62,6 +64,8 @@ export interface UltimateSlicerStats {
     normalizeTimePerToken: TimePerToken;
     dataflowTimePerToken: TimePerToken;
     totalCommonTimePerToken: TimePerToken;
+    controlFlowTimePerToken?: TimePerToken;
+    dataFrameShapeTimePerToken?: TimePerToken;
     sliceTimePerToken: TimePerToken;
     reconstructTimePerToken: TimePerToken;
     totalPerSliceTimePerToken: TimePerToken;
@@ -74,4 +78,31 @@ export interface UltimateSlicerStats {
     reductionNoFluff: Reduction<SummarizedMeasurement>;
     input: SlicerStatsInput<SummarizedMeasurement>;
     dataflow: SlicerStatsDataflow<SummarizedMeasurement>;
+    dataFrameShape?: SummarizedDfShapeStats<SummarizedMeasurement>;
+}
+export interface SummarizedDfShapeStats<T = number> extends Omit<SlicerStatsDfShape<T>, 'perNodeStats'> {
+    numberOfEntriesPerNode: SummarizedMeasurement;
+    numberOfOperations: T;
+    numberOfTotalValues: T;
+    numberOfTotalTop: T;
+    numberOfTotalBottom: T;
+    inferredColNames: SummarizedMeasurement;
+    numberOfColNamesValues: T;
+    numberOfColNamesTop: T;
+    numberOfColNamesBottom: T;
+    inferredColCount: SummarizedMeasurement;
+    numberOfColCountExact: T;
+    numberOfColCountValues: T;
+    numberOfColCountTop: T;
+    numberOfColCountInfinite: T;
+    numberOfColCountBottom: T;
+    approxRangeColCount: SummarizedMeasurement;
+    inferredRowCount: SummarizedMeasurement;
+    numberOfRowCountExact: T;
+    numberOfRowCountValues: T;
+    numberOfRowCountTop: T;
+    numberOfRowCountInfinite: T;
+    numberOfRowCountBottom: T;
+    approxRangeRowCount: SummarizedMeasurement;
+    perOperationNumber: Map<DataFrameOperationName, T>;
 }

package/benchmark/summarizer/first-phase/input.js CHANGED Viewed

@@ -28,7 +28,11 @@ async function processRunMeasurement(line, fileNum, lineNum, textOutputAppendPat
                 return [k, BigInt(v.slice(0, -1))];
             })),
             perSliceMeasurements: new Map(got.stats.perSliceMeasurements
-                .map(([k, v]) => mapPerSliceStats(k, v)))
+                .map(([k, v]) => mapPerSliceStats(k, v))),
+            dataFrameShape: got.stats.dataFrameShape !== undefined ? {
+                ...got.stats.dataFrameShape,
+                perNodeStats: new Map(got.stats.dataFrameShape.perNodeStats)
+            } : undefined
         }
     };
     const totalSlices = got.stats.perSliceMeasurements.size;

package/benchmark/summarizer/first-phase/process.d.ts CHANGED Viewed

@@ -2,11 +2,12 @@ import type { Reduction, SummarizedSlicerStats, TimePerToken } from '../data';
 import type { SummarizedMeasurement } from '../../../util/summarizer';
 import type { PerSliceStats, SlicerStats } from '../../stats/stats';
 import type { SlicingCriteria } from '../../../slicing/criterion/parse';
+import type { RShellEngineConfig } from '../../../config';
 /**
  * Summarizes the given stats by calculating the min, max, median, mean, and the standard deviation for each measurement.
  * @see Slicer
  */
-export declare function summarizeSlicerStats(stats: SlicerStats, report?: (criteria: SlicingCriteria, stats: PerSliceStats) => void): Promise<Readonly<SummarizedSlicerStats>>;
+export declare function summarizeSlicerStats(stats: SlicerStats, report?: (criteria: SlicingCriteria, stats: PerSliceStats) => void, engineConf?: RShellEngineConfig): Promise<Readonly<SummarizedSlicerStats>>;
 export declare function summarizeSummarizedMeasurement(data: SummarizedMeasurement[]): SummarizedMeasurement;
 export declare function summarizeSummarizedReductions(reductions: Reduction<SummarizedMeasurement>[]): Reduction<SummarizedMeasurement>;
 export declare function summarizeSummarizedTimePerToken(times: TimePerToken[]): TimePerToken;