npm - @eagleoutice/flowr - Versions diffs - 2.2.10 → 2.2.12 - Mend

@eagleoutice/flowr 2.2.10 → 2.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

package/README.md CHANGED Viewed

@@ -48,7 +48,7 @@ It offers a wide variety of features, for example:
     ```shell
     $ docker run -it --rm eagleoutice/flowr # or npm run flowr
-    flowR repl using flowR v2.2.9, R v4.4.0 (r-shell engine)
+    flowR repl using flowR v2.2.11, R v4.4.3 (r-shell engine)
     R> :slicer test/testfiles/example.R --criterion "11@sum"
     ```
@@ -95,7 +95,7 @@ It offers a wide variety of features, for example:
 * 🚀 **fast data- and control-flow graphs**\
-  Within just <i><span title="This measurement is automatically fetched from the latest benchmark!">122.2 ms</span></i> (as of Feb 21, 2025),
+  Within just <i><span title="This measurement is automatically fetched from the latest benchmark!">117.9 ms</span></i> (as of Mar 2, 2025),
   _flowR_ can analyze the data- and control-flow of the average real-world R script. See the [benchmarks](https://flowr-analysis.github.io/flowr/wiki/stats/benchmark) for more information,
   and consult the [wiki pages](https://github.com/flowr-analysis/flowr/wiki/Dataflow-Graph) for more details on the dataflow graph.
@@ -131,7 +131,7 @@ It offers a wide variety of features, for example:
     ```shell
     $ docker run -it --rm eagleoutice/flowr # or npm run flowr
-    flowR repl using flowR v2.2.9, R v4.4.0 (r-shell engine)
+    flowR repl using flowR v2.2.11, R v4.4.3 (r-shell engine)
     R> :dataflow* test/testfiles/example.R
     ```
@@ -377,7 +377,7 @@ It offers a wide variety of features, for example:
     ```
-    (The analysis required _21.60 ms_ (including parse and normalize, using the [r-shell](https://github.com/flowr-analysis/flowr/wiki/Engines) engine) within the generation environment.)
+    (The analysis required _22.14 ms_ (including parse and normalize, using the [r-shell](https://github.com/flowr-analysis/flowr/wiki/Engines) engine) within the generation environment.)

package/benchmark/slicer.d.ts CHANGED Viewed

@@ -43,6 +43,13 @@ export interface BenchmarkSingleSliceStats extends MergeableRecord {
     /** the final code, as the result of the 'reconstruct' step */
     code: ReconstructionResult;
 }
+/**
+ * The type of sampling strategy to use when slicing all possible variables.
+ *
+ * - `'random'`: Randomly select the given number of slicing criteria.
+ * - `'equidistant'`: Select the given number of slicing criteria in an equidistant manner.
+ */
+export type SamplingStrategy = 'random' | 'equidistant';
 export declare class BenchmarkSlicer {
     /** Measures all data recorded *once* per slicer (complete setup up to the dataflow graph creation) */
     private readonly commonMeasurements;
@@ -62,38 +69,58 @@ export declare class BenchmarkSlicer {
      * Initialize the slicer on the given request.
      * Can only be called once for each instance.
      */
-    init(request: RParseRequestFromFile | RParseRequestFromText, autoSelectIf?: AutoSelectPredicate): Promise<void>;
+    init(request: RParseRequestFromFile | RParseRequestFromText, autoSelectIf?: AutoSelectPredicate, threshold?: number): Promise<void>;
     private calculateStatsAfterInit;
     /**
-   * Slice for the given {@link SlicingCriteria}.
-   * @see SingleSlicingCriterion
-   *
-   * @returns The per slice stats retrieved for this slicing criteria
-   */
+     * Counts the number of stored indices in the dataflow graph created by the pointer analysis.
+     */
+    private countStoredVertexIndices;
+    /**
+     * Counts the number of stored indices in the dataflow graph created by the pointer analysis.
+     */
+    private countStoredEnvIndices;
+    /**
+     * Counts the number of stored indices in the passed definitions.
+     */
+    private countStoredIndices;
+    /**
+     * Recursively counts the number of indices and sub-indices in the given collection.
+     */
+    private countIndices;
+    /**
+     * Slice for the given {@link SlicingCriteria}.
+     * @see SingleSlicingCriterion
+     *
+     * @returns The per slice stats retrieved for this slicing criteria
+     */
     slice(...slicingCriteria: SlicingCriteria): Promise<BenchmarkSingleSliceStats>;
     /** Bridging the gap between the new internal and the old names for the benchmarking */
     private measureCommonStep;
     private measureSliceStep;
     private guardActive;
     /**
-   * Call {@link slice} for all slicing criteria that match the given filter.
-   * See {@link collectAllSlicingCriteria} for details.
-   * <p>
-   * the `report` function will be called *before* each *individual* slice is performed.
-   *
-   * @returns The number of slices that were produced
-   *
-   * @see collectAllSlicingCriteria
-   * @see SlicingCriteriaFilter
-   */
-    sliceForAll(filter: SlicingCriteriaFilter, report?: (current: number, total: number, allCriteria: SlicingCriteria[]) => void, sampleRandom?: number): Promise<number>;
+     * Call {@link slice} for all slicing criteria that match the given filter.
+     * See {@link collectAllSlicingCriteria} for details.
+     * <p>
+     * the `report` function will be called *before* each *individual* slice is performed.
+     *
+     * @returns The number of slices that were produced
+     *
+     * @see collectAllSlicingCriteria
+     * @see SlicingCriteriaFilter
+     */
+    sliceForAll(filter: SlicingCriteriaFilter, report?: (current: number, total: number, allCriteria: SlicingCriteria[]) => void, options?: {
+        sampleCount?: number;
+        maxSliceCount?: number;
+        sampleStrategy?: SamplingStrategy;
+    }): Promise<number>;
     /**
-   * Retrieves the final stats and closes the shell session.
-   * Can be called multiple times to retrieve the stored stats, but will only close the session once (the first time).
-   */
+     * Retrieves the final stats and closes the shell session.
+     * Can be called multiple times to retrieve the stored stats, but will only close the session once (the first time).
+     */
     finish(): BenchmarkSlicerStats;
     /**
-   * Only call in case of an error - if the session must be closed and the benchmark itself is to be considered failed/dead.
-   */
+     * Only call in case of an error - if the session must be closed and the benchmark itself is to be considered failed/dead.
+     */
     ensureSessionClosed(): void;
 }

package/benchmark/slicer.js CHANGED Viewed

@@ -22,6 +22,8 @@ const size_of_1 = require("./stats/size-of");
 const shell_1 = require("../r-bridge/shell");
 const tree_sitter_types_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitter-types");
 const tree_sitter_executor_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitter-executor");
+const vertex_1 = require("../dataflow/graph/vertex");
+const arrays_1 = require("../util/arrays");
 /**
  * The logger to be used for benchmarking as a global object.
  */
@@ -49,7 +51,7 @@ class BenchmarkSlicer {
      * Initialize the slicer on the given request.
      * Can only be called once for each instance.
      */
-    async init(request, autoSelectIf) {
+    async init(request, autoSelectIf, threshold) {
         (0, assert_1.guard)(this.stats === undefined, 'cannot initialize the slicer twice');
         // we know these are in sync so we just cast to one of them
         this.parser = await this.commonMeasurements.measure('initialize R session', async () => {
@@ -64,7 +66,8 @@ class BenchmarkSlicer {
         this.executor = (0, default_pipelines_1.createSlicePipeline)(this.parser, {
             request: { ...request },
             criterion: [],
-            autoSelectIf
+            autoSelectIf,
+            threshold,
         });
         this.loadedXml = (await this.measureCommonStep('parse', 'retrieve AST from R code')).parsed;
         this.normalizedAst = await this.measureCommonStep('normalize', 'normalize R AST');
@@ -127,6 +130,9 @@ class BenchmarkSlicer {
             }
             return false;
         });
+        const storedVertexIndices = this.countStoredVertexIndices();
+        const storedEnvIndices = this.countStoredEnvIndices();
+        const overwrittenIndices = storedVertexIndices - storedEnvIndices;
         const split = loadedContent.split('\n');
         const nonWhitespace = (0, strings_1.withoutWhitespace)(loadedContent).length;
         this.stats = {
@@ -150,7 +156,10 @@ class BenchmarkSlicer {
                 numberOfEdges: numberOfEdges,
                 numberOfCalls: numberOfCalls,
                 numberOfFunctionDefinitions: numberOfDefinitions,
-                sizeOfObject: (0, size_of_1.getSizeOfDfGraph)(this.dataflow.graph)
+                sizeOfObject: (0, size_of_1.getSizeOfDfGraph)(this.dataflow.graph),
+                storedVertexIndices: storedVertexIndices,
+                storedEnvIndices: storedEnvIndices,
+                overwrittenIndices: overwrittenIndices,
             },
             // these are all properly initialized in finish()
             commonMeasurements: new Map(),
@@ -161,11 +170,52 @@ class BenchmarkSlicer {
         };
     }
     /**
-   * Slice for the given {@link SlicingCriteria}.
-   * @see SingleSlicingCriterion
-   *
-   * @returns The per slice stats retrieved for this slicing criteria
-   */
+     * Counts the number of stored indices in the dataflow graph created by the pointer analysis.
+     */
+    countStoredVertexIndices() {
+        return this.countStoredIndices(this.dataflow?.out.map(ref => ref) ?? []);
+    }
+    /**
+     * Counts the number of stored indices in the dataflow graph created by the pointer analysis.
+     */
+    countStoredEnvIndices() {
+        return this.countStoredIndices(this.dataflow?.environment.current.memory.values()
+            ?.flatMap(def => def)
+            .map(def => def) ?? []);
+    }
+    /**
+     * Counts the number of stored indices in the passed definitions.
+     */
+    countStoredIndices(definitions) {
+        let numberOfIndices = 0;
+        for (const reference of definitions) {
+            if (reference.indicesCollection) {
+                numberOfIndices += this.countIndices(reference.indicesCollection);
+            }
+        }
+        return numberOfIndices;
+    }
+    /**
+     * Recursively counts the number of indices and sub-indices in the given collection.
+     */
+    countIndices(collection) {
+        let numberOfIndices = 0;
+        for (const indices of collection ?? []) {
+            for (const index of indices.indices) {
+                numberOfIndices++;
+                if ((0, vertex_1.isParentContainerIndex)(index)) {
+                    numberOfIndices += this.countIndices(index.subIndices);
+                }
+            }
+        }
+        return numberOfIndices;
+    }
+    /**
+     * Slice for the given {@link SlicingCriteria}.
+     * @see SingleSlicingCriterion
+     *
+     * @returns The per slice stats retrieved for this slicing criteria
+     */
     async slice(...slicingCriteria) {
         exports.benchmarkLogger.trace(`try to slice for criteria ${JSON.stringify(slicingCriteria)}`);
         this.guardActive();
@@ -227,23 +277,33 @@ class BenchmarkSlicer {
         (0, assert_1.guard)(this.stats !== undefined && !this.finished, 'need to call init before, and can not do after finish!');
     }
     /**
-   * Call {@link slice} for all slicing criteria that match the given filter.
-   * See {@link collectAllSlicingCriteria} for details.
-   * <p>
-   * the `report` function will be called *before* each *individual* slice is performed.
-   *
-   * @returns The number of slices that were produced
-   *
-   * @see collectAllSlicingCriteria
-   * @see SlicingCriteriaFilter
-   */
-    async sliceForAll(filter, report = () => { }, sampleRandom = -1) {
+     * Call {@link slice} for all slicing criteria that match the given filter.
+     * See {@link collectAllSlicingCriteria} for details.
+     * <p>
+     * the `report` function will be called *before* each *individual* slice is performed.
+     *
+     * @returns The number of slices that were produced
+     *
+     * @see collectAllSlicingCriteria
+     * @see SlicingCriteriaFilter
+     */
+    async sliceForAll(filter, report = () => { }, options = {}) {
+        const { sampleCount, maxSliceCount, sampleStrategy } = { sampleCount: -1, maxSliceCount: -1, sampleStrategy: 'random', ...options };
         this.guardActive();
         let count = 0;
-        const allCriteria = [...(0, collect_all_1.collectAllSlicingCriteria)(this.normalizedAst.ast, filter)];
-        if (sampleRandom > 0) {
-            allCriteria.sort(() => Math.random() - 0.5);
-            allCriteria.length = Math.min(allCriteria.length, sampleRandom);
+        let allCriteria = [...(0, collect_all_1.collectAllSlicingCriteria)(this.normalizedAst.ast, filter)];
+        // Cancel slicing if the number of slices exceeds the limit
+        if (maxSliceCount > 0 && allCriteria.length > maxSliceCount) {
+            return -allCriteria.length;
+        }
+        if (sampleCount > 0) {
+            if (sampleStrategy === 'equidistant') {
+                allCriteria = (0, arrays_1.equidistantSampling)(allCriteria, sampleCount, 'ceil');
+            }
+            else {
+                allCriteria.sort(() => Math.random() - 0.5);
+                allCriteria.length = Math.min(allCriteria.length, sampleCount);
+            }
         }
         for (const slicingCriteria of allCriteria) {
             report(count, allCriteria.length, allCriteria);
@@ -253,9 +313,9 @@ class BenchmarkSlicer {
         return count;
     }
     /**
-   * Retrieves the final stats and closes the shell session.
-   * Can be called multiple times to retrieve the stored stats, but will only close the session once (the first time).
-   */
+     * Retrieves the final stats and closes the shell session.
+     * Can be called multiple times to retrieve the stored stats, but will only close the session once (the first time).
+     */
     finish() {
         (0, assert_1.guard)(this.stats !== undefined, 'need to call init before finish');
         if (!this.finished) {
@@ -291,8 +351,8 @@ class BenchmarkSlicer {
         };
     }
     /**
-   * Only call in case of an error - if the session must be closed and the benchmark itself is to be considered failed/dead.
-   */
+     * Only call in case of an error - if the session must be closed and the benchmark itself is to be considered failed/dead.
+     */
     ensureSessionClosed() {
         this.parser?.close();
     }

package/benchmark/stats/print.js CHANGED Viewed

@@ -140,11 +140,14 @@ Input:
   Normalized R tokens (w/o comments):  ${pad(stats.input.numberOfNormalizedTokensNoComments)}
 Dataflow:
-  Number of nodes:            ${pad(stats.dataflow.numberOfNodes)}
-  Number of edges:            ${pad(stats.dataflow.numberOfEdges)}
-  Number of calls:            ${pad(stats.dataflow.numberOfCalls)}
-  Number of function defs:    ${pad(stats.dataflow.numberOfFunctionDefinitions)}
-  Size of graph:              ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`;
+  Number of nodes:               ${pad(stats.dataflow.numberOfNodes)}
+  Number of edges:               ${pad(stats.dataflow.numberOfEdges)}
+  Number of calls:               ${pad(stats.dataflow.numberOfCalls)}
+  Number of function defs:       ${pad(stats.dataflow.numberOfFunctionDefinitions)}
+  Number of stored Vtx indices:  ${pad(stats.dataflow.storedVertexIndices)}
+  Number of stored Env indices:  ${pad(stats.dataflow.storedEnvIndices)}
+  Number of overwritten indices: ${pad(stats.dataflow.overwrittenIndices)}
+  Size of graph:                 ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`;
 }
 function ultimateStats2String(stats) {
     const slice = stats.totalSlices > 0 ? `Slice summary for:
@@ -195,11 +198,14 @@ Input:
   Normalized R tokens (w/o comments):  ${formatSummarizedMeasure(stats.input.numberOfNormalizedTokensNoComments)}
 Dataflow:
-  Number of nodes:            ${formatSummarizedMeasure(stats.dataflow.numberOfNodes)}
-  Number of edges:            ${formatSummarizedMeasure(stats.dataflow.numberOfEdges)}
-  Number of calls:            ${formatSummarizedMeasure(stats.dataflow.numberOfCalls)}
-  Number of function defs:    ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}
-  Size of graph:              ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
+  Number of nodes:               ${formatSummarizedMeasure(stats.dataflow.numberOfNodes)}
+  Number of edges:               ${formatSummarizedMeasure(stats.dataflow.numberOfEdges)}
+  Number of calls:               ${formatSummarizedMeasure(stats.dataflow.numberOfCalls)}
+  Number of function defs:       ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}
+  Number of stored Vtx indices:  ${formatSummarizedMeasure(stats.dataflow.storedVertexIndices)}
+  Number of stored Env indices:  ${formatSummarizedMeasure(stats.dataflow.storedEnvIndices)}
+  Number of overwritten indices: ${formatSummarizedMeasure(stats.dataflow.overwrittenIndices)}
+  Size of graph:                 ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
 `;
 }
 function reduction2String(title, reduction) {

package/benchmark/stats/size-of.js CHANGED Viewed

@@ -65,6 +65,23 @@ function getSizeOfDfGraph(df) {
         });
         verts.push(vertex);
     }
-    return (0, object_sizeof_1.default)([...verts, ...df.edges()]);
+    return safeSizeOf([...verts, ...df.edges()]);
+}
+/**
+ * Calculates the size of an array in bytes.
+ *
+ * @param array - The array to calculate the size of.
+ * @returns The size of the array in bytes.
+ */
+function safeSizeOf(array) {
+    const size = (0, object_sizeof_1.default)(array);
+    if (typeof size === 'number') {
+        return size;
+    }
+    // the sizeOf method returns an error object, when the size could not be calculated
+    // in this case, we split the array in half and calculate the size of each half recursively
+    const chunkSize = Math.ceil(array.length / 2);
+    // subtract 1, because of the separate stringification of the array
+    return safeSizeOf(array.slice(0, chunkSize)) + safeSizeOf(array.slice(chunkSize)) - 1;
 }
 //# sourceMappingURL=size-of.js.map

package/benchmark/stats/stats.d.ts CHANGED Viewed

@@ -37,6 +37,9 @@ export interface SlicerStatsDataflow<T = number> {
     numberOfCalls: T;
     numberOfFunctionDefinitions: T;
     sizeOfObject: T;
+    storedVertexIndices: T;
+    storedEnvIndices: T;
+    overwrittenIndices: T;
 }
 /**
  * Please note, that these measurement can be negative as there is no guarantee that the memory usage will increase

package/benchmark/summarizer/second-phase/process.js CHANGED Viewed

@@ -85,7 +85,10 @@ function summarizeAllSummarizedStats(stats) {
             numberOfFunctionDefinitions: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.numberOfFunctionDefinitions)),
             numberOfCalls: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.numberOfCalls)),
             numberOfEdges: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.numberOfEdges)),
-            sizeOfObject: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.sizeOfObject))
+            sizeOfObject: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.sizeOfObject)),
+            storedVertexIndices: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.storedVertexIndices)),
+            storedEnvIndices: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.storedEnvIndices)),
+            overwrittenIndices: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.overwrittenIndices)),
         }
     };
 }
@@ -125,7 +128,10 @@ function summarizeAllUltimateStats(stats) {
             numberOfFunctionDefinitions: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.numberOfFunctionDefinitions)),
             numberOfCalls: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.numberOfCalls)),
             numberOfEdges: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.numberOfEdges)),
-            sizeOfObject: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.sizeOfObject))
+            sizeOfObject: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.sizeOfObject)),
+            storedVertexIndices: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.storedVertexIndices)),
+            storedEnvIndices: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.storedEnvIndices)),
+            overwrittenIndices: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.overwrittenIndices)),
         }
     };
 }

package/cli/benchmark-app.d.ts CHANGED Viewed

@@ -9,4 +9,9 @@ export interface BenchmarkCliOptions {
     limit?: number;
     runs?: number;
     parser: KnownParserName;
+    'enable-pointer-tracking': boolean;
+    'max-file-slices': number;
+    threshold?: number;
+    'per-file-time-limit'?: number;
+    'sampling-strategy': string;
 }

package/cli/benchmark-app.js CHANGED Viewed

@@ -42,10 +42,17 @@ async function benchmark() {
     console.log(`Using ${options.parallel} parallel executors`);
     // we do not use the limit argument to be able to pick the limit randomly
     const files = [];
-    for (const input of options.input) {
-        for await (const file of (0, files_1.allRFiles)(input)) {
-            files.push({ request: file, baseDir: input });
-        }
+    const firstFile = options.input[0];
+    // Check whether input is single JSON file containing a list of paths
+    if (options.input.length === 1 && fs_1.default.statSync(firstFile).isFile() && firstFile.endsWith('.json')) {
+        console.log('Input is a single JSON file. Assuming it contains a list of files to process');
+        const content = fs_1.default.readFileSync(firstFile, 'utf8');
+        const paths = JSON.parse(content);
+        const baseDir = findCommonBaseDir(paths);
+        await collectFiles(files, paths, () => baseDir);
+    }
+    else {
+        await collectFiles(files, options.input, (p) => p);
     }
     if (options.limit) {
         log_1.log.info(`limiting to ${options.limit} files`);
@@ -59,18 +66,54 @@ async function benchmark() {
         '--file-id', `${i}`,
         '--output', path_1.default.join(options.output, path_1.default.relative(f.baseDir, `${f.request.content}.json`)),
         '--slice', options.slice, ...verboseAdd,
-        '--parser', options.parser
+        '--parser', options.parser,
+        ...(options['enable-pointer-tracking'] ? ['--enable-pointer-tracking'] : []),
+        '--max-slices', `${options['max-file-slices']}`,
+        ...(options.threshold ? ['--threshold', `${options.threshold}`] : []),
+        '--sampling-strategy', options['sampling-strategy'],
     ]);
     const runs = options.runs ?? 1;
     for (let i = 1; i <= runs; i++) {
         console.log(`Run ${i} of ${runs}`);
         const pool = new parallel_1.LimitedThreadPool(`${__dirname}/benchmark-helper-app`,
         // we reverse here "for looks", since the helper pops from the end, and we want file ids to be ascending :D
-        args.map(a => [...a, '--run-num', `${i}`]).reverse(), limit, options.parallel);
+        args.map(a => [...a, '--run-num', `${i}`]).reverse(), limit, options.parallel, options['per-file-time-limit']);
         await pool.run();
         const stats = pool.getStats();
         console.log(`Run ${i} of ${runs}: Benchmarked ${stats.counter} files, skipped ${stats.skipped.length} files due to errors`);
     }
 }
+/**
+ * Collect all R files from the given paths.
+ *
+ * @param files - list of files to append to
+ * @param paths - list of paths to search for R files
+ * @param getBaseDir - function to get the base directory of a path
+ */
+async function collectFiles(files, paths, getBaseDir) {
+    for (const input of paths) {
+        for await (const file of (0, files_1.allRFiles)(input)) {
+            files.push({ request: file, baseDir: getBaseDir(input) });
+        }
+    }
+}
+/**
+ * Find the common base directory of a list of paths.
+ *
+ * @param paths - list of paths
+ * @returns the common base directory
+ */
+function findCommonBaseDir(paths) {
+    const baseDirs = paths.map(f => path_1.default.dirname(f));
+    return baseDirs.reduce((acc, dir) => {
+        const split = dir.split(path_1.default.sep);
+        const accSplit = acc.split(path_1.default.sep);
+        let i = 0;
+        while (i < split.length && i < accSplit.length && split[i] === accSplit[i]) {
+            i++;
+        }
+        return split.slice(0, i).join(path_1.default.sep);
+    }, baseDirs[0]);
+}
 void benchmark();
 //# sourceMappingURL=benchmark-app.js.map

package/cli/benchmark-helper-app.d.ts CHANGED Viewed

@@ -8,4 +8,8 @@ export interface SingleBenchmarkCliOptions {
     slice: string;
     output?: string;
     parser: KnownParserName;
+    'enable-pointer-tracking': boolean;
+    'max-slices': number;
+    threshold?: number;
+    'sampling-strategy': string;
 }

package/cli/benchmark-helper-app.js CHANGED Viewed

@@ -11,6 +11,7 @@ const script_1 = require("./common/script");
 const slicer_1 = require("../benchmark/slicer");
 const all_variables_1 = require("../slicing/criterion/filters/all-variables");
 const path_1 = __importDefault(require("path"));
+const config_1 = require("../config");
 const options = (0, script_1.processCommandLineArgs)('benchmark-helper', [], {
     subtitle: 'Will slice for all possible variables, signal by exit code if slicing was successful, and can be run standalone',
     examples: [
@@ -23,6 +24,7 @@ if (options.verbose) {
 }
 const numberRegex = /^\d+$/;
 (0, assert_1.guard)(options.slice === 'all' || options.slice === 'no' || numberRegex.test(options.slice), 'slice must be either all, no, or a number');
+(0, assert_1.guard)(options['sampling-strategy'] === 'random' || options['sampling-strategy'] === 'equidistant', 'sample-strategy must be either random or equidistant');
 async function benchmark() {
     // we do not use the limit argument to be able to pick the limit randomly
     (0, assert_1.guard)(options.input !== undefined, 'No input file given');
@@ -31,18 +33,31 @@ async function benchmark() {
     // prefix for printing to console, includes file id and run number if present
     const prefix = `[${options.input}${options['file-id'] !== undefined ? ` (file ${options['file-id']}, run ${options['run-num']})` : ''}]`;
     console.log(`${prefix} Appending output to ${options.output}`);
-    fs_1.default.mkdirSync(path_1.default.parse(options.output).dir, { recursive: true });
+    const directory = path_1.default.parse(options.output).dir;
+    // ensure the directory exists if path contains one
+    if (directory !== '') {
+        fs_1.default.mkdirSync(directory, { recursive: true });
+    }
+    // Enable pointer analysis if requested, otherwise disable it
+    if (options['enable-pointer-tracking']) {
+        (0, config_1.amendConfig)({ solver: { ...(0, config_1.getConfig)().solver, pointerTracking: true, } });
+    }
+    else {
+        (0, config_1.amendConfig)({ solver: { ...(0, config_1.getConfig)().solver, pointerTracking: false, } });
+    }
     // ensure the file exists
     const fileStat = fs_1.default.statSync(options.input);
     (0, assert_1.guard)(fileStat.isFile(), `File ${options.input} does not exist or is no file`);
     const request = { request: 'file', content: options.input };
+    const maxSlices = options['max-slices'] ?? -1;
     const slicer = new slicer_1.BenchmarkSlicer(options.parser);
     try {
-        await slicer.init(request);
+        await slicer.init(request, undefined, options.threshold);
         // ${escape}1F${escape}1G${escape}2K for line reset
         if (options.slice === 'all') {
-            const count = await slicer.sliceForAll(all_variables_1.DefaultAllVariablesFilter, (i, total, arr) => console.log(`${prefix} Slicing ${i + 1}/${total} [${JSON.stringify(arr[i])}]`));
+            const count = await slicer.sliceForAll(all_variables_1.DefaultAllVariablesFilter, (i, total, arr) => console.log(`${prefix} Slicing ${i + 1}/${total} [${JSON.stringify(arr[i])}]`), { maxSliceCount: maxSlices });
             console.log(`${prefix} Completed Slicing`);
+            (0, assert_1.guard)(count >= 0, `Number of slices exceeded limit of ${maxSlices} with ${-count} slices, skipping in count`);
             (0, assert_1.guard)(count > 0, `No possible slices found for ${options.input}, skipping in count`);
         }
         else if (options.slice === 'no') {
@@ -51,8 +66,9 @@ async function benchmark() {
         else {
             const limit = parseInt(options.slice);
             console.log(`${prefix} Slicing up to ${limit} possible slices`);
-            const count = await slicer.sliceForAll(all_variables_1.DefaultAllVariablesFilter, (i, total, arr) => console.log(`${prefix} Slicing ${i + 1}/${total} [${JSON.stringify(arr[i])}]`), limit);
+            const count = await slicer.sliceForAll(all_variables_1.DefaultAllVariablesFilter, (i, total, arr) => console.log(`${prefix} Slicing ${i + 1}/${total} [${JSON.stringify(arr[i])}]`), { sampleCount: limit, maxSliceCount: maxSlices, sampleStrategy: options['sampling-strategy'] });
             console.log(`${prefix} Completed Slicing`);
+            (0, assert_1.guard)(count >= 0, `Number of slices exceeded limit of ${maxSlices} with ${-count} slices, skipping in count`);
             (0, assert_1.guard)(count > 0, `No possible slices found for ${options.input}, skipping in count`);
         }
         const { stats } = slicer.finish();

package/cli/common/options.js CHANGED Viewed

@@ -16,11 +16,16 @@ exports.benchmarkOptions = [
     { name: 'help', alias: 'h', type: Boolean, description: 'Print this usage guide' },
     { name: 'limit', alias: 'l', type: Number, description: 'Limit the number of files to process (if given, this will choose these files randomly and add the chosen names to the output' },
     { name: 'runs', alias: 'r', type: Number, description: 'The amount of benchmark runs that should be done, out of which an average will be calculated' },
-    { name: 'input', alias: 'i', type: String, description: 'Pass a folder or file as src to read from', multiple: true, defaultOption: true, defaultValue: [], typeLabel: '{underline files/folders}' },
+    { name: 'input', alias: 'i', type: String, description: 'Pass a folder or file as src to read from. Alternatively, pass a single JSON file that contains a list of paths.', multiple: true, defaultOption: true, defaultValue: [], typeLabel: '{underline files/folders}' },
     { name: 'parallel', alias: 'p', type: String, description: 'Number of parallel executors (defaults to {italic max(cpu.count-1, 1)})', defaultValue: Math.max(os_1.default.cpus().length - 1, 1), typeLabel: '{underline number}' },
     { name: 'slice', alias: 's', type: String, description: 'Automatically slice for *all* variables (default) or *no* slicing and only parsing/dataflow construction. Numbers will indicate: sample X random slices from all.', defaultValue: 'all', typeLabel: '{underline all/no}' },
-    { name: 'output', alias: 'o', type: String, description: `Directory to write all the measurements to in a per-file-basis (defaults to {italic benchmark-${StartTimeString}})`, defaultValue: `benchmark-${StartTimeString}`, typeLabel: '{underline file}' },
-    { name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' }
+    { name: 'output', alias: 'o', type: String, description: `Folder to write all the measurements to in a per-file-basis (defaults to {italic benchmark-${StartTimeString}})`, defaultValue: `benchmark-${StartTimeString}`, typeLabel: '{underline folder}' },
+    { name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' },
+    { name: 'enable-pointer-tracking', type: Boolean, description: 'Run dataflow analysis with pointer tracking', defaultValue: false },
+    { name: 'max-file-slices', type: Number, description: 'If file has more than passed number of slices, the file is not processed', defaultValue: -1, typeLabel: '{underline number}' },
+    { name: 'threshold', alias: 't', type: Number, description: 'How many re-visits of the same node are ok?', defaultValue: undefined, typeLabel: '{underline number}' },
+    { name: 'per-file-time-limit', type: Number, description: 'Time limit in milliseconds to process single file (disabled by default)', defaultValue: undefined, typeLabel: '{underline number}' },
+    { name: 'sampling-strategy', type: String, description: 'Which strategy to use, when sampling is enabled', defaultValue: 'random', typeLabel: '{underline random/edquidistant}' },
 ];
 exports.benchmarkHelperOptions = [
     { name: 'verbose', alias: 'v', type: Boolean, description: 'Run with verbose logging [do not use for the real benchmark as this affects the time measurements, but only to find errors]' },
@@ -30,7 +35,11 @@ exports.benchmarkHelperOptions = [
     { name: 'run-num', alias: 'r', type: Number, description: 'The n-th time that the file with the given file-id is being benchmarked' },
     { name: 'slice', alias: 's', type: String, description: 'Automatically slice for *all* variables (default) or *no* slicing and only parsing/dataflow construction. Numbers will indicate: sample X random slices from all.', defaultValue: 'all', typeLabel: '{underline all/no}' },
     { name: 'output', alias: 'o', type: String, description: 'File to write the measurements to (appends a single line in JSON format)', typeLabel: '{underline file}' },
-    { name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' }
+    { name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' },
+    { name: 'enable-pointer-tracking', type: Boolean, description: 'Run dataflow analysis with pointer tracking', defaultValue: false },
+    { name: 'max-slices', type: Number, description: 'If file has more than passed number of slices, the file is not processed', defaultValue: -1, typeLabel: '{underline number}' },
+    { name: 'threshold', alias: 't', type: Number, description: 'How many re-visits of the same node are ok?', defaultValue: undefined, typeLabel: '{underline number}' },
+    { name: 'sampling-strategy', type: String, description: 'Which strategy to use, when sampling is enabled', defaultValue: 'random', typeLabel: '{underline random/edquidistant}' },
 ];
 exports.exportQuadsOptions = [
     { name: 'verbose', alias: 'v', type: Boolean, description: 'Run with verbose logging' },

package/cli/repl/commands/repl-commands.js CHANGED Viewed

@@ -81,6 +81,8 @@ const _commands = {
     'normalize*': repl_normalize_1.normalizeStarCommand,
     'dataflow': repl_dataflow_1.dataflowCommand,
     'dataflow*': repl_dataflow_1.dataflowStarCommand,
+    'dataflowsimple': repl_dataflow_1.dataflowSimplifiedCommand,
+    'dataflowsimple*': repl_dataflow_1.dataflowSimpleStarCommand,
     'controlflow': repl_cfg_1.controlflowCommand,
     'controlflow*': repl_cfg_1.controlflowStarCommand,
     'lineage': repl_lineage_1.lineageCommand,

package/cli/repl/commands/repl-dataflow.d.ts CHANGED Viewed

@@ -1,3 +1,5 @@
 import type { ReplCommand } from './repl-main';
 export declare const dataflowCommand: ReplCommand;
 export declare const dataflowStarCommand: ReplCommand;
+export declare const dataflowSimplifiedCommand: ReplCommand;
+export declare const dataflowSimpleStarCommand: ReplCommand;