npm - @eagleoutice/flowr - Versions diffs - 2.0.2 → 2.0.4 - Mend

@eagleoutice/flowr 2.0.2 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/benchmark/slicer.d.ts +1 -0
package/benchmark/slicer.js +39 -3
package/benchmark/stats/print.d.ts +1 -0
package/benchmark/stats/print.js +66 -22
package/benchmark/stats/size-of.d.ts +3 -0
package/benchmark/stats/size-of.js +68 -0
package/benchmark/stats/stats.d.ts +18 -0
package/benchmark/summarizer/data.d.ts +14 -0
package/benchmark/summarizer/first-phase/input.d.ts +2 -2
package/benchmark/summarizer/first-phase/input.js +21 -21
package/benchmark/summarizer/first-phase/process.d.ts +3 -1
package/benchmark/summarizer/first-phase/process.js +38 -4
package/benchmark/summarizer/second-phase/graph.js +7 -0
package/benchmark/summarizer/second-phase/process.js +47 -25
package/benchmark/summarizer/summarizer.d.ts +1 -0
package/benchmark/summarizer/summarizer.js +23 -10
package/dataflow/environments/environment.d.ts +2 -1
package/dataflow/graph/edge.d.ts +2 -3
package/dataflow/graph/edge.js +2 -2
package/dataflow/graph/graph.js +3 -2
package/dataflow/internal/process/process-value.js +0 -1
package/package.json +2 -1
package/util/mermaid/dfg.d.ts +0 -1
package/util/mermaid/dfg.js +1 -8
package/util/version.js +1 -1

package/benchmark/slicer.d.ts CHANGED Viewed

@@ -51,6 +51,7 @@ export declare class BenchmarkSlicer {
     /** Measures all data recorded *once* per slicer (complete setup up to the dataflow graph creation) */
     private readonly commonMeasurements;
     private readonly perSliceMeasurements;
+    private readonly deltas;
     private readonly shell;
     private stats;
     private loadedXml;

package/benchmark/slicer.js CHANGED Viewed

@@ -19,6 +19,7 @@ const default_pipelines_1 = require("../core/steps/pipeline/default-pipelines");
 const retriever_1 = require("../r-bridge/retriever");
 const collect_all_1 = require("../slicing/criterion/collect-all");
 const visitor_1 = require("../r-bridge/lang-4.x/ast/model/processing/visitor");
+const size_of_1 = require("./stats/size-of");
 exports.benchmarkLogger = log_1.log.getSubLogger({ name: 'benchmark' });
 /**
  * A slicer that can be used to slice exactly one file (multiple times).
@@ -33,6 +34,7 @@ class BenchmarkSlicer {
     /** Measures all data recorded *once* per slicer (complete setup up to the dataflow graph creation) */
     commonMeasurements = new stopwatch_1.Measurements();
     perSliceMeasurements = new Map();
+    deltas = new Map();
     shell;
     stats;
     loadedXml;
@@ -105,8 +107,8 @@ class BenchmarkSlicer {
         const split = loadedContent.split('\n');
         const nonWhitespace = (0, strings_1.withoutWhitespace)(loadedContent).length;
         this.stats = {
-            commonMeasurements: new Map(),
             perSliceMeasurements: this.perSliceMeasurements,
+            memory: this.deltas,
             request,
             input: {
                 numberOfLines: split.length,
@@ -124,8 +126,15 @@ class BenchmarkSlicer {
                 numberOfNodes: [...this.dataflow.graph.vertices(true)].length,
                 numberOfEdges: numberOfEdges,
                 numberOfCalls: numberOfCalls,
-                numberOfFunctionDefinitions: numberOfDefinitions
-            }
+                numberOfFunctionDefinitions: numberOfDefinitions,
+                sizeOfObject: (0, size_of_1.getSizeOfDfGraph)(this.dataflow.graph)
+            },
+            // these are all properly initialized in finish()
+            commonMeasurements: new Map(),
+            retrieveTimePerToken: { raw: 0, normalized: 0 },
+            normalizeTimePerToken: { raw: 0, normalized: 0 },
+            dataflowTimePerToken: { raw: 0, normalized: 0 },
+            totalCommonTimePerToken: { raw: 0, normalized: 0 }
         };
     }
     /**
@@ -177,7 +186,15 @@ class BenchmarkSlicer {
     }
     /** Bridging the gap between the new internal and the old names for the benchmarking */
     async measureCommonStep(expectedStep, keyToMeasure) {
+        const memoryInit = process.memoryUsage();
         const { result } = await this.commonMeasurements.measureAsync(keyToMeasure, () => this.pipeline.nextStep(expectedStep));
+        const memoryEnd = process.memoryUsage();
+        this.deltas.set(keyToMeasure, {
+            heap: memoryEnd.heapUsed - memoryInit.heapUsed,
+            rss: memoryEnd.rss - memoryInit.rss,
+            external: memoryEnd.external - memoryInit.external,
+            buffs: memoryEnd.arrayBuffers - memoryInit.arrayBuffers
+        });
         return result;
     }
     async measureSliceStep(expectedStep, measure, keyToMeasure) {
@@ -221,6 +238,25 @@ class BenchmarkSlicer {
             this.finished = true;
         }
         this.stats.commonMeasurements = this.commonMeasurements.get();
+        const retrieveTime = Number(this.stats.commonMeasurements.get('retrieve AST from R code'));
+        const normalizeTime = Number(this.stats.commonMeasurements.get('normalize R AST'));
+        const dataflowTime = Number(this.stats.commonMeasurements.get('produce dataflow information'));
+        this.stats.retrieveTimePerToken = {
+            raw: retrieveTime / this.stats.input.numberOfRTokens,
+            normalized: retrieveTime / this.stats.input.numberOfNormalizedTokens
+        };
+        this.stats.normalizeTimePerToken = {
+            raw: normalizeTime / this.stats.input.numberOfRTokens,
+            normalized: normalizeTime / this.stats.input.numberOfNormalizedTokens
+        };
+        this.stats.dataflowTimePerToken = {
+            raw: dataflowTime / this.stats.input.numberOfRTokens,
+            normalized: dataflowTime / this.stats.input.numberOfNormalizedTokens
+        };
+        this.stats.totalCommonTimePerToken = {
+            raw: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfRTokens,
+            normalized: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfNormalizedTokens
+        };
         return {
             stats: this.stats,
             parse: this.loadedXml,

package/benchmark/stats/print.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { SummarizedSlicerStats, UltimateSlicerStats } from '../summarizer/data';
+export declare function formatNanoseconds(nanoseconds: bigint | number): string;
 /**
  * Converts the given stats to a human-readable string.
  * You may have to {@link summarizeSlicerStats | summarize} the stats first.

package/benchmark/stats/print.js CHANGED Viewed

@@ -1,26 +1,29 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.ultimateStats2String = exports.stats2string = void 0;
+exports.ultimateStats2String = exports.stats2string = exports.formatNanoseconds = void 0;
 const assert_1 = require("../../util/assert");
 const padSize = 15;
 function pad(string) {
     return String(string).padStart(padSize, ' ');
 }
-function divWithRest(dividend, divisor) {
-    return [dividend / divisor, dividend % divisor];
-}
 function formatNanoseconds(nanoseconds) {
     if (nanoseconds < 0) {
         return '??';
     }
-    const [seconds, rest] = divWithRest(typeof nanoseconds === 'number' ? BigInt(Math.round(nanoseconds)) : nanoseconds, BigInt(1e9));
-    const [milliseconds, remainingNanoseconds] = divWithRest(rest, BigInt(1e6));
-    const secondsStr = seconds > 0 ? `${String(seconds).padStart(2, '0')}.` : '';
-    const millisecondsStr = seconds > 0 ? `${String(milliseconds).padStart(3, '0')}:` : `${String(milliseconds)}:`;
-    const nanoStr = String(remainingNanoseconds).padEnd(3, '0').substring(0, 3);
-    const unit = seconds === 0n ? 'ms' : ' s'; /* space for padding */
-    return pad(`${secondsStr}${millisecondsStr}${nanoStr}${unit}`);
+    const wholeNanos = typeof nanoseconds === 'bigint' ? nanoseconds : BigInt(Math.round(nanoseconds));
+    const nanos = wholeNanos % BigInt(1e+6);
+    const wholeMillis = wholeNanos / BigInt(1e+6);
+    const millis = wholeMillis % BigInt(1000);
+    const wholeSeconds = wholeMillis / BigInt(1000);
+    if (wholeSeconds > 0) {
+        const nanoString = nanos > 0 ? `:${nanos}` : '';
+        return pad(`${wholeSeconds}.${String(millis).padStart(3, '0')}${nanoString} s`);
+    }
+    else {
+        return pad(`${millis}:${String(nanos).padStart(6, '0')}ms`);
+    }
 }
+exports.formatNanoseconds = formatNanoseconds;
 function print(measurements, key) {
     const time = measurements.get(key);
     (0, assert_1.guard)(time !== undefined, `Measurement for ${JSON.stringify(key)} not found`);
@@ -61,6 +64,16 @@ function printCountSummarizedMeasurements(stats) {
     const range = `${stats.min} - ${stats.max}`.padStart(padSize, ' ');
     return `${range} (median: ${stats.median}, mean: ${stats.mean}, std: ${stats.std})`;
 }
+const units = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'];
+// based on https://stackoverflow.com/a/39906526
+function convertNumberToNiceBytes(x) {
+    let n = Math.abs(x);
+    let l = 0;
+    while (n >= 1024 && ++l) {
+        n = n / 1024;
+    }
+    return pad((x < 0 ? '-' : '') + n.toFixed(n < 10 && l > 0 ? 1 : 0) + ' ' + units[l]);
+}
 /**
  * Converts the given stats to a human-readable string.
  * You may have to {@link summarizeSlicerStats | summarize} the stats first.
@@ -70,16 +83,30 @@ function stats2string(stats) {
 Request: ${JSON.stringify(stats.request)}
 Shell init time:              ${print(stats.commonMeasurements, 'initialize R session')}
 AST retrieval:                ${print(stats.commonMeasurements, 'retrieve AST from R code')}
+AST retrieval per token:      ${formatNanoseconds(stats.retrieveTimePerToken.normalized)}
+AST retrieval per R token:    ${formatNanoseconds(stats.retrieveTimePerToken.raw)}
 AST normalization:            ${print(stats.commonMeasurements, 'normalize R AST')}
+AST normalization per token:  ${formatNanoseconds(stats.normalizeTimePerToken.normalized)}
+AST normalization per R token:${formatNanoseconds(stats.normalizeTimePerToken.raw)}
 Dataflow creation:            ${print(stats.commonMeasurements, 'produce dataflow information')}
+Dataflow creation per token:  ${formatNanoseconds(stats.dataflowTimePerToken.normalized)}
+Dataflow creation per R token:${formatNanoseconds(stats.dataflowTimePerToken.raw)}
+Total common time per token:  ${formatNanoseconds(stats.totalCommonTimePerToken.normalized)}
+Total common time per R token:${formatNanoseconds(stats.totalCommonTimePerToken.raw)}
 Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.perSliceMeasurements.numberOfSlices !== 1 ? 's' : ''}:`;
     if (stats.perSliceMeasurements.numberOfSlices > 0) {
         result += `
-  Total:                      ${printSummarizedMeasurements(stats.perSliceMeasurements, 'total')}
-  Slice creation:             ${printSummarizedMeasurements(stats.perSliceMeasurements, 'static slicing')}
-  Reconstruction:             ${printSummarizedMeasurements(stats.perSliceMeasurements, 'reconstruct code')}
-  Used Slice Criteria Sizes:  ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceCriteriaSizes)}
+  Total:                              ${printSummarizedMeasurements(stats.perSliceMeasurements, 'total')}
+  Slice creation:                     ${printSummarizedMeasurements(stats.perSliceMeasurements, 'static slicing')}
+  Slice creation per token in slice:  ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.sliceTimePerToken.normalized)}
+  Slice creation per R token in slice:${formatSummarizedTimeMeasure(stats.perSliceMeasurements.sliceTimePerToken.raw)}
+  Reconstruction:                     ${printSummarizedMeasurements(stats.perSliceMeasurements, 'reconstruct code')}
+  Reconstruction per token in slice:  ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.reconstructTimePerToken.normalized)}
+  Reconstruction per R token in slice:${formatSummarizedTimeMeasure(stats.perSliceMeasurements.reconstructTimePerToken.raw)}
+  Total per token in slice:           ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.normalized)}
+  Total per R token in slice:         ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.raw)}
+  Used Slice Criteria Sizes:          ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceCriteriaSizes)}
   Result Slice Sizes:
     Number of lines:                     ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.lines)}
     Number of non-empty lines:           ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.nonEmptyLines)}
@@ -113,7 +140,8 @@ Dataflow:
   Number of nodes:            ${pad(stats.dataflow.numberOfNodes)}
   Number of edges:            ${pad(stats.dataflow.numberOfEdges)}
   Number of calls:            ${pad(stats.dataflow.numberOfCalls)}
-  Number of function defs:    ${pad(stats.dataflow.numberOfFunctionDefinitions)}`;
+  Number of function defs:    ${pad(stats.dataflow.numberOfFunctionDefinitions)}
+  Size of graph:              ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`;
 }
 exports.stats2string = stats2string;
 function ultimateStats2String(stats) {
@@ -122,15 +150,29 @@ function ultimateStats2String(stats) {
 Summarized: ${stats.totalRequests} requests and ${stats.totalSlices} slices
 Shell init time:              ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('initialize R session'))}
 AST retrieval:                ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('retrieve AST from R code'))}
+AST retrieval per token:      ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.normalized)}
+AST retrieval per R token:    ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.raw)}
 AST normalization:            ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('normalize R AST'))}
+AST normalization per token:  ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.normalized)}
+AST normalization per R token:${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.raw)}
 Dataflow creation:            ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('produce dataflow information'))}
+Dataflow creation per token:  ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.normalized)}
+Dataflow creation per R token:${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.raw)}
+Total common time per token:  ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.normalized)}
+Total common time per R token:${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.raw)}
 Slice summary for:
-  Total:                      ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('total'))}
-  Slice creation:             ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('static slicing'))}
-  Reconstruction:             ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('reconstruct code'))}
-  Failed to Re-Parse:         ${pad(stats.failedToRepParse)}/${stats.totalSlices}
-  Times hit Threshold:        ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
+  Total:                              ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('total'))}
+  Slice creation:                     ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('static slicing'))}
+  Slice creation per token in slice:  ${formatSummarizedTimeMeasure(stats.sliceTimePerToken.normalized)}
+  Slice creation per R token in slice:${formatSummarizedTimeMeasure(stats.sliceTimePerToken.raw)}
+  Reconstruction:                     ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('reconstruct code'))}
+  Reconstruction per token in slice:  ${formatSummarizedTimeMeasure(stats.reconstructTimePerToken.normalized)}
+  Reconstruction per R token in slice:${formatSummarizedTimeMeasure(stats.reconstructTimePerToken.raw)}
+  Total per token in slice:           ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.normalized)}
+  Total per R token in slice:         ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.raw)}
+  Failed to Re-Parse:                 ${pad(stats.failedToRepParse)}/${stats.totalSlices}
+  Times hit Threshold:                ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
 ${reduction2String('Reductions', stats.reduction)}
 ${reduction2String('Reductions without comments and empty lines', stats.reductionNoFluff)}
@@ -153,7 +195,9 @@ Dataflow:
   Number of nodes:            ${formatSummarizedMeasure(stats.dataflow.numberOfNodes)}
   Number of edges:            ${formatSummarizedMeasure(stats.dataflow.numberOfEdges)}
   Number of calls:            ${formatSummarizedMeasure(stats.dataflow.numberOfCalls)}
-  Number of function defs:    ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}`;
+  Number of function defs:    ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}
+  Size of graph:              ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
+`;
 }
 exports.ultimateStats2String = ultimateStats2String;
 function reduction2String(title, reduction) {

package/benchmark/stats/size-of.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { DataflowGraph } from '../../dataflow/graph/graph';
+/** Returns the size of the given df graph in bytes (without sharing in-memory) */
+export declare function getSizeOfDfGraph(df: DataflowGraph): number;

package/benchmark/stats/size-of.js ADDED Viewed

@@ -0,0 +1,68 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.getSizeOfDfGraph = void 0;
+const environment_1 = require("../../dataflow/environments/environment");
+const object_sizeof_1 = __importDefault(require("object-sizeof"));
+/* we have to kill all processors linked in the default environment as they cannot be serialized and they are shared anyway */
+function killBuiltInEnv(env) {
+    if (env === undefined) {
+        return undefined;
+    }
+    else if (env.id === environment_1.BuiltInEnvironment.id) {
+        /* in this case, the reference would be shared for sure */
+        return {
+            id: env.id,
+            parent: killBuiltInEnv(env.parent),
+            memory: new Map()
+        };
+    }
+    const memory = new Map();
+    for (const [k, v] of env.memory) {
+        memory.set(k, v.filter(v => !v.kind.startsWith('built-in') && !('processor' in v)));
+    }
+    return {
+        id: env.id,
+        parent: killBuiltInEnv(env.parent),
+        memory
+    };
+}
+/** Returns the size of the given df graph in bytes (without sharing in-memory) */
+function getSizeOfDfGraph(df) {
+    const verts = [];
+    for (const [, v] of df.vertices(true)) {
+        let vertex = v;
+        if (vertex.environment) {
+            vertex = {
+                ...vertex,
+                environment: {
+                    ...vertex.environment,
+                    current: killBuiltInEnv(v.environment.current)
+                }
+            };
+        }
+        if (vertex.tag === "function-definition" /* VertexType.FunctionDefinition */) {
+            vertex = {
+                ...vertex,
+                subflow: {
+                    ...vertex.subflow,
+                    environment: {
+                        ...vertex.subflow.environment,
+                        current: killBuiltInEnv(vertex.subflow.environment.current)
+                    }
+                }
+            };
+        }
+        vertex = {
+            ...vertex,
+            /* shared anyway by using constants */
+            tag: 0
+        };
+        verts.push(vertex);
+    }
+    return (0, object_sizeof_1.default)([...verts, ...df.edges()]);
+}
+exports.getSizeOfDfGraph = getSizeOfDfGraph;
+//# sourceMappingURL=size-of.js.map

package/benchmark/stats/stats.d.ts CHANGED Viewed

@@ -2,6 +2,8 @@ import type { SingleSlicingCriterion, SlicingCriteria } from '../../slicing/crit
 import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
 import type { ReconstructionResult } from '../../reconstruct/reconstruct';
 import type { RParseRequestFromFile, RParseRequestFromText } from '../../r-bridge/retriever';
+import type { TimePerToken } from '../summarizer/data';
+import type { MergeableRecord } from '../../util/objects';
 export declare const CommonSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total"];
 export type CommonSlicerMeasurements = typeof CommonSlicerMeasurements[number];
 export declare const PerSliceMeasurements: readonly ["static slicing", "reconstruct code", "total"];
@@ -34,6 +36,17 @@ export interface SlicerStatsDataflow<T = number> {
     numberOfEdges: T;
     numberOfCalls: T;
     numberOfFunctionDefinitions: T;
+    sizeOfObject: T;
+}
+/**
+ * Please note, that these measurement can be negative as there is no guarantee that the memory usage will increase
+ * due to, e.g., garbage collection.
+*/
+export interface BenchmarkMemoryMeasurement<T = number> extends MergeableRecord {
+    heap: T;
+    rss: T;
+    external: T;
+    buffs: T;
 }
 /**
  * The statistics that are collected by the {@link BenchmarkSlicer} and used for benchmarking.
@@ -41,7 +54,12 @@ export interface SlicerStatsDataflow<T = number> {
 export interface SlicerStats {
     commonMeasurements: Map<CommonSlicerMeasurements, ElapsedTime>;
     perSliceMeasurements: Map<SlicingCriteria, PerSliceStats>;
+    memory: Map<CommonSlicerMeasurements, BenchmarkMemoryMeasurement>;
     request: RParseRequestFromFile | RParseRequestFromText;
     input: SlicerStatsInput;
     dataflow: SlicerStatsDataflow;
+    retrieveTimePerToken: TimePerToken<number>;
+    normalizeTimePerToken: TimePerToken<number>;
+    dataflowTimePerToken: TimePerToken<number>;
+    totalCommonTimePerToken: TimePerToken<number>;
 }

package/benchmark/summarizer/data.d.ts CHANGED Viewed

@@ -31,12 +31,19 @@ export interface Reduction<T = number> {
     numberOfNormalizedTokens: T;
     numberOfDataflowNodes: T;
 }
+export interface TimePerToken<T = SummarizedMeasurement> {
+    raw: T;
+    normalized: T;
+}
 export interface SummarizedPerSliceStats {
     /** number of total slicing calls */
     numberOfSlices: number;
     /** statistics on the used slicing criteria (number of ids within criteria etc.) */
     sliceCriteriaSizes: SummarizedMeasurement;
     measurements: Map<PerSliceMeasurements, SummarizedMeasurement>;
+    sliceTimePerToken: TimePerToken;
+    reconstructTimePerToken: TimePerToken;
+    totalPerSliceTimePerToken: TimePerToken;
     reduction: Reduction<SummarizedMeasurement>;
     /** reduction, but without taking into account comments and empty lines */
     reductionNoFluff: Reduction<SummarizedMeasurement>;
@@ -51,6 +58,13 @@ export interface UltimateSlicerStats {
     totalSlices: number;
     commonMeasurements: Map<CommonSlicerMeasurements, SummarizedMeasurement>;
     perSliceMeasurements: Map<PerSliceMeasurements, SummarizedMeasurement>;
+    retrieveTimePerToken: TimePerToken;
+    normalizeTimePerToken: TimePerToken;
+    dataflowTimePerToken: TimePerToken;
+    totalCommonTimePerToken: TimePerToken;
+    sliceTimePerToken: TimePerToken;
+    reconstructTimePerToken: TimePerToken;
+    totalPerSliceTimePerToken: TimePerToken;
     /** sum */
     failedToRepParse: number;
     /** sum */

package/benchmark/summarizer/first-phase/input.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
 /// <reference types="node" />
-export declare function processRunMeasurement(line: Buffer, fileNum: number, lineNum: number, summarizedText: string, outputPath: string): Promise<void>;
-export declare function processSummarizedFileMeasurement(file: string, summariesFile: string, outputPath: string): void;
+export declare function processRunMeasurement(line: Buffer, fileNum: number, lineNum: number, textOutputAppendPath: string, rawOutputPath: string): Promise<void>;
+export declare function processSummarizedRunMeasurement(runNum: number, summarizedFiles: string[], appendPath: string): void;

package/benchmark/summarizer/first-phase/input.js CHANGED Viewed

@@ -3,27 +3,24 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.processSummarizedFileMeasurement = exports.processRunMeasurement = void 0;
+exports.processSummarizedRunMeasurement = exports.processRunMeasurement = void 0;
 const fs_1 = __importDefault(require("fs"));
 const process_1 = require("../second-phase/process");
 const process_2 = require("./process");
 const assert_1 = require("../../../util/assert");
 const ansi_1 = require("../../../util/ansi");
 const json_1 = require("../../../util/json");
-const files_1 = require("../../../util/files");
 const print_1 = require("../../stats/print");
-async function processRunMeasurement(line, fileNum, lineNum, summarizedText, outputPath) {
+async function processRunMeasurement(line, fileNum, lineNum, textOutputAppendPath, rawOutputPath) {
     let got = JSON.parse(line.toString());
     console.log(`[file ${fileNum}, line ${lineNum}] Summarize for ${got.filename}`);
     // now we have to recover the maps and bigints :C
     got = {
-        filename: got.filename,
-        'file-id': got['file-id'],
-        'run-num': got['run-num'],
+        ...got,
         stats: {
-            input: got.stats.input,
-            request: got.stats.request,
-            dataflow: got.stats.dataflow,
+            ...got.stats,
+            memory: new Map(got.stats.memory
+                .map(([k, v]) => [k, v])),
             commonMeasurements: new Map(got.stats.commonMeasurements
                 .map(([k, v]) => {
                 (0, assert_1.guard)(v.endsWith('n'), 'Expected a bigint');
@@ -38,28 +35,31 @@ async function processRunMeasurement(line, fileNum, lineNum, summarizedText, out
     let atSliceNumber = 0;
     const summarized = await (0, process_2.summarizeSlicerStats)(got.stats, (criterion, stats) => {
         console.log(`${ansi_1.escape}1F${ansi_1.escape}1G${ansi_1.escape}2K    [${++atSliceNumber}/${totalSlices}] Summarizing ${JSON.stringify(criterion)} (reconstructed has ${stats.reconstructedCode.code.length} characters)`);
+        if (stats.reconstructedCode.code.length < 50) {
+            console.log(`Reconstructed code: ${stats.reconstructedCode.code}`);
+        }
     });
-    console.log(`    - Append raw summary to ${outputPath}`);
-    fs_1.default.appendFileSync(outputPath, `${JSON.stringify({
+    console.log(`    - Write raw summary to ${rawOutputPath}`);
+    fs_1.default.writeFileSync(rawOutputPath, `${JSON.stringify({
         filename: got.filename,
         'file-id': got['file-id'],
         'run-num': got['run-num'],
         summarize: summarized
     }, json_1.jsonReplacer)}\n`);
-    console.log(`    - Append textual summary to ${summarizedText}`);
-    fs_1.default.appendFileSync(summarizedText, `${(0, print_1.stats2string)(summarized)}\n`);
+    console.log(`    - Append textual summary to ${textOutputAppendPath}`);
+    fs_1.default.appendFileSync(textOutputAppendPath, `${(0, print_1.stats2string)(summarized)}\n`);
 }
 exports.processRunMeasurement = processRunMeasurement;
-function processSummarizedFileMeasurement(file, summariesFile, outputPath) {
-    console.log(`Summarize all runs for ${file}`);
+function processSummarizedRunMeasurement(runNum, summarizedFiles, appendPath) {
+    console.log(`Summarizing all file statistics for run ${runNum}`);
     const summaries = [];
-    (0, files_1.readLineByLineSync)(summariesFile, l => (0, process_1.processNextSummary)(l, summaries));
-    fs_1.default.appendFileSync(outputPath, `${JSON.stringify({
-        filename: file,
-        summarize: (0, process_1.summarizeAllSummarizedStats)(summaries)
-    }, json_1.jsonReplacer)}\n`);
+    for (const file of summarizedFiles) {
+        (0, process_1.processNextSummary)(fs_1.default.readFileSync(file), summaries);
+    }
+    fs_1.default.appendFileSync(appendPath, `${JSON.stringify((0, process_1.summarizeAllSummarizedStats)(summaries), json_1.jsonReplacer)}\n`);
+    console.log(`Appended summary of run ${runNum} to ${appendPath}`);
 }
-exports.processSummarizedFileMeasurement = processSummarizedFileMeasurement;
+exports.processSummarizedRunMeasurement = processSummarizedRunMeasurement;
 function mapPerSliceStats(k, v) {
     return [k, {
             reconstructedCode: v.reconstructedCode,

package/benchmark/summarizer/first-phase/process.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { Reduction, SummarizedSlicerStats } from '../data';
+import type { Reduction, SummarizedSlicerStats, TimePerToken } from '../data';
 import type { SummarizedMeasurement } from '../../../util/summarizer';
 import type { PerSliceStats, SlicerStats } from '../../stats/stats';
 import type { SlicingCriteria } from '../../../slicing/criterion/parse';
@@ -9,3 +9,5 @@ import type { SlicingCriteria } from '../../../slicing/criterion/parse';
 export declare function summarizeSlicerStats(stats: SlicerStats, report?: (criteria: SlicingCriteria, stats: PerSliceStats) => void): Promise<Readonly<SummarizedSlicerStats>>;
 export declare function summarizeSummarizedMeasurement(data: SummarizedMeasurement[]): SummarizedMeasurement;
 export declare function summarizeSummarizedReductions(reductions: Reduction<SummarizedMeasurement>[]): Reduction<SummarizedMeasurement>;
+export declare function summarizeSummarizedTimePerToken(times: TimePerToken[]): TimePerToken;
+export declare function summarizeTimePerToken(times: TimePerToken<number>[]): TimePerToken;

package/benchmark/summarizer/first-phase/process.js CHANGED Viewed

@@ -26,7 +26,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.summarizeSummarizedReductions = exports.summarizeSummarizedMeasurement = exports.summarizeSlicerStats = void 0;
+exports.summarizeTimePerToken = exports.summarizeSummarizedTimePerToken = exports.summarizeSummarizedReductions = exports.summarizeSummarizedMeasurement = exports.summarizeSlicerStats = void 0;
 const tmp = __importStar(require("tmp"));
 const fs_1 = __importDefault(require("fs"));
 const defaultmap_1 = require("../../../util/defaultmap");
@@ -92,10 +92,12 @@ function calculateReductionForSlice(input, dataflow, perSlice, ignoreFluff) {
  */
 async function summarizeSlicerStats(stats, report = () => {
 }) {
-    const perSliceStats = stats.perSliceMeasurements;
     const collect = new defaultmap_1.DefaultMap(() => []);
     const sizeOfSliceCriteria = [];
     const reParseShellSession = new shell_1.RShell();
+    const sliceTimes = [];
+    const reconstructTimes = [];
+    const totalTimes = [];
     const reductions = [];
     const reductionsNoFluff = [];
     let failedOutputs = 0;
@@ -114,7 +116,7 @@ async function summarizeSlicerStats(stats, report = () => {
         dataflowNodes: []
     };
     let timesHitThreshold = 0;
-    for (const [criteria, perSliceStat] of perSliceStats) {
+    for (const [criteria, perSliceStat] of stats.perSliceMeasurements) {
         report(criteria, perSliceStat);
         for (const measure of perSliceStat.measurements) {
             collect.get(measure[0]).push(Number(measure[1]));
@@ -177,6 +179,20 @@ async function summarizeSlicerStats(stats, report = () => {
             };
             reductions.push(calculateReductionForSlice(stats.input, stats.dataflow, perSlice, false));
             reductionsNoFluff.push(calculateReductionForSlice(stats.input, stats.dataflow, perSlice, true));
+            const sliceTime = Number(perSliceStat.measurements.get('static slicing'));
+            const reconstructTime = Number(perSliceStat.measurements.get('reconstruct code'));
+            sliceTimes.push({
+                raw: sliceTime / numberOfRTokens,
+                normalized: sliceTime / numberOfNormalizedTokens
+            });
+            reconstructTimes.push({
+                raw: reconstructTime / numberOfRTokens,
+                normalized: reconstructTime / numberOfNormalizedTokens
+            });
+            totalTimes.push({
+                raw: (sliceTime + reconstructTime) / numberOfRTokens,
+                normalized: (sliceTime + reconstructTime) / numberOfNormalizedTokens
+            });
         }
         catch (e) {
             console.error(`    ! Failed to re-parse the output of the slicer for ${JSON.stringify(criteria)}`); //, e
@@ -194,13 +210,16 @@ async function summarizeSlicerStats(stats, report = () => {
     return {
         ...stats,
         perSliceMeasurements: {
-            numberOfSlices: perSliceStats.size,
+            numberOfSlices: stats.perSliceMeasurements.size,
             sliceCriteriaSizes: (0, summarizer_1.summarizeMeasurement)(sizeOfSliceCriteria),
             measurements: summarized,
             failedToRepParse: failedOutputs,
             timesHitThreshold,
             reduction: summarizeReductions(reductions),
             reductionNoFluff: summarizeReductions(reductionsNoFluff),
+            sliceTimePerToken: summarizeTimePerToken(sliceTimes),
+            reconstructTimePerToken: summarizeTimePerToken(reconstructTimes),
+            totalPerSliceTimePerToken: summarizeTimePerToken(totalTimes),
             sliceSize: {
                 lines: (0, summarizer_1.summarizeMeasurement)(sliceSize.lines),
                 nonEmptyLines: (0, summarizer_1.summarizeMeasurement)(sliceSize.nonEmptyLines),
@@ -220,6 +239,7 @@ async function summarizeSlicerStats(stats, report = () => {
 }
 exports.summarizeSlicerStats = summarizeSlicerStats;
 function summarizeSummarizedMeasurement(data) {
+    data = data.filter(assert_1.isNotUndefined);
     const min = data.map(d => d.min).filter(assert_1.isNotUndefined).reduce((a, b) => Math.min(a, b), Infinity);
     const max = data.map(d => d.max).filter(assert_1.isNotUndefined).reduce((a, b) => Math.max(a, b), -Infinity);
     // calculate median of medians (don't just average the median!)
@@ -255,4 +275,18 @@ function summarizeReductions(reductions) {
         numberOfDataflowNodes: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfDataflowNodes).filter(assert_1.isNotUndefined))
     };
 }
+function summarizeSummarizedTimePerToken(times) {
+    return {
+        raw: summarizeSummarizedMeasurement(times.map(t => t.raw)),
+        normalized: summarizeSummarizedMeasurement(times.map(t => t.normalized)),
+    };
+}
+exports.summarizeSummarizedTimePerToken = summarizeSummarizedTimePerToken;
+function summarizeTimePerToken(times) {
+    return {
+        raw: (0, summarizer_1.summarizeMeasurement)(times.map(t => t.raw)),
+        normalized: (0, summarizer_1.summarizeMeasurement)(times.map(t => t.normalized)),
+    };
+}
+exports.summarizeTimePerToken = summarizeTimePerToken;
 //# sourceMappingURL=process.js.map

package/benchmark/summarizer/second-phase/graph.js CHANGED Viewed

@@ -47,6 +47,13 @@ function writeGraphOutput(ultimate, outputGraphPath) {
         value: ultimate.reduction.numberOfNormalizedTokens.mean,
         extra: `std: ${ultimate.reduction.numberOfNormalizedTokens.std}`
     });
+    data.push({
+        name: 'memory (df-graph)',
+        unit: 'Bytes',
+        value: Number(ultimate.dataflow.sizeOfObject.mean),
+        range: Number(ultimate.dataflow.sizeOfObject.std),
+        extra: `median: ${(ultimate.dataflow.sizeOfObject.median).toFixed(2)}`
+    });
     // write the output file
     fs_1.default.writeFileSync(outputGraphPath, JSON.stringify(data, json_1.jsonReplacer));
 }

package/benchmark/summarizer/second-phase/process.js CHANGED Viewed

@@ -9,6 +9,14 @@ const stats_1 = require("../../stats/stats");
 function summarizeAllSummarizedStats(stats) {
     const commonMeasurements = new defaultmap_1.DefaultMap(() => []);
     const perSliceMeasurements = new defaultmap_1.DefaultMap(() => []);
+    const sliceTimesPerToken = [];
+    const reconstructTimesPerToken = [];
+    const totalPerSliceTimesPerToken = [];
+    const retrieveTimesPerToken = [];
+    const normalizeTimesPerToken = [];
+    const dataflowTimesPerToken = [];
+    const totalCommonTimesPerToken = [];
+    const memory = new defaultmap_1.DefaultMap(() => []);
     const reductions = [];
     const reductionsNoFluff = [];
     const inputs = [];
@@ -23,6 +31,16 @@ function summarizeAllSummarizedStats(stats) {
         for (const [k, v] of stat.perSliceMeasurements.measurements) {
             perSliceMeasurements.get(k).push(v);
         }
+        sliceTimesPerToken.push(stat.perSliceMeasurements.sliceTimePerToken);
+        reconstructTimesPerToken.push(stat.perSliceMeasurements.reconstructTimePerToken);
+        totalPerSliceTimesPerToken.push(stat.perSliceMeasurements.totalPerSliceTimePerToken);
+        retrieveTimesPerToken.push(stat.retrieveTimePerToken);
+        normalizeTimesPerToken.push(stat.normalizeTimePerToken);
+        dataflowTimesPerToken.push(stat.dataflowTimePerToken);
+        totalCommonTimesPerToken.push(stat.totalCommonTimePerToken);
+        for (const [k, v] of stat.memory) {
+            memory.get(k).push(v);
+        }
         reductions.push(stat.perSliceMeasurements.reduction);
         reductionsNoFluff.push(stat.perSliceMeasurements.reductionNoFluff);
         inputs.push(stat.input);
@@ -36,6 +54,13 @@ function summarizeAllSummarizedStats(stats) {
         totalSlices: totalSlices,
         commonMeasurements: new Map([...commonMeasurements.entries()].map(([k, v]) => [k, (0, summarizer_1.summarizeMeasurement)(v)])),
         perSliceMeasurements: new Map([...perSliceMeasurements.entries()].map(([k, v]) => [k, (0, process_1.summarizeSummarizedMeasurement)(v)])),
+        sliceTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(sliceTimesPerToken),
+        reconstructTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(reconstructTimesPerToken),
+        totalPerSliceTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(totalPerSliceTimesPerToken),
+        retrieveTimePerToken: (0, process_1.summarizeTimePerToken)(retrieveTimesPerToken),
+        normalizeTimePerToken: (0, process_1.summarizeTimePerToken)(normalizeTimesPerToken),
+        dataflowTimePerToken: (0, process_1.summarizeTimePerToken)(dataflowTimesPerToken),
+        totalCommonTimePerToken: (0, process_1.summarizeTimePerToken)(totalCommonTimesPerToken),
         failedToRepParse,
         timesHitThreshold,
         reduction: (0, process_1.summarizeSummarizedReductions)(reductions),
@@ -56,7 +81,8 @@ function summarizeAllSummarizedStats(stats) {
             numberOfNodes: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.numberOfNodes)),
             numberOfFunctionDefinitions: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.numberOfFunctionDefinitions)),
             numberOfCalls: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.numberOfCalls)),
-            numberOfEdges: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.numberOfEdges))
+            numberOfEdges: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.numberOfEdges)),
+            sizeOfObject: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.sizeOfObject))
         }
     };
 }
@@ -71,6 +97,13 @@ function summarizeAllUltimateStats(stats) {
         // average out / summarize other measurements
         commonMeasurements: new Map(stats_1.CommonSlicerMeasurements.map(m => [m, (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.commonMeasurements.get(m)))])),
         perSliceMeasurements: new Map(stats_1.PerSliceMeasurements.map(m => [m, (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.perSliceMeasurements.get(m)))])),
+        sliceTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.sliceTimePerToken)),
+        reconstructTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.reconstructTimePerToken)),
+        totalPerSliceTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.totalPerSliceTimePerToken)),
+        retrieveTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.retrieveTimePerToken)),
+        normalizeTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.normalizeTimePerToken)),
+        dataflowTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.dataflowTimePerToken)),
+        totalCommonTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.totalCommonTimePerToken)),
         reduction: (0, process_1.summarizeSummarizedReductions)(stats.map(s => s.reduction)),
         reductionNoFluff: (0, process_1.summarizeSummarizedReductions)(stats.map(s => s.reductionNoFluff)),
         input: {
@@ -89,7 +122,8 @@ function summarizeAllUltimateStats(stats) {
             numberOfNodes: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.numberOfNodes)),
             numberOfFunctionDefinitions: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.numberOfFunctionDefinitions)),
             numberOfCalls: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.numberOfCalls)),
-            numberOfEdges: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.numberOfEdges))
+            numberOfEdges: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.numberOfEdges)),
+            sizeOfObject: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.sizeOfObject))
         }
     };
 }
@@ -98,23 +132,19 @@ function processNextSummary(line, allSummarized) {
     let got = JSON.parse(line.toString());
     got = {
         summarize: {
-            input: got.summarize.input,
-            request: got.summarize.request,
-            dataflow: got.summarize.dataflow,
+            ...got.summarize,
+            // restore maps
+            memory: new Map(got.summarize.memory
+                .map(([k, v]) => [k, v])),
             commonMeasurements: new Map(got.summarize.commonMeasurements
                 .map(([k, v]) => {
                 (0, assert_1.guard)(v.endsWith('n'), 'Expected a bigint');
                 return [k, BigInt(v.slice(0, -1))];
             })),
             perSliceMeasurements: {
-                numberOfSlices: got.summarize.perSliceMeasurements.numberOfSlices,
-                sliceCriteriaSizes: got.summarize.perSliceMeasurements.sliceCriteriaSizes,
+                ...got.summarize.perSliceMeasurements,
+                // restore maps
                 measurements: new Map(got.summarize.perSliceMeasurements.measurements),
-                reduction: got.summarize.perSliceMeasurements.reduction,
-                reductionNoFluff: got.summarize.perSliceMeasurements.reductionNoFluff,
-                timesHitThreshold: got.summarize.perSliceMeasurements.timesHitThreshold,
-                failedToRepParse: got.summarize.perSliceMeasurements.failedToRepParse,
-                sliceSize: got.summarize.perSliceMeasurements.sliceSize
             }
         }
     };
@@ -124,20 +154,12 @@ exports.processNextSummary = processNextSummary;
 function processNextUltimateSummary(line, allSummarized) {
     let got = JSON.parse(line.toString());
     got = {
-        summarize: {
-            totalRequests: got.summarize.totalRequests,
-            totalSlices: got.summarize.totalSlices,
-            commonMeasurements: new Map(got.summarize.commonMeasurements),
-            perSliceMeasurements: new Map(got.summarize.perSliceMeasurements),
-            failedToRepParse: got.summarize.failedToRepParse,
-            timesHitThreshold: got.summarize.timesHitThreshold,
-            reduction: got.summarize.reduction,
-            reductionNoFluff: got.summarize.reductionNoFluff,
-            input: got.summarize.input,
-            dataflow: got.summarize.dataflow,
-        }
+        ...got,
+        // restore maps
+        commonMeasurements: new Map(got.commonMeasurements),
+        perSliceMeasurements: new Map(got.perSliceMeasurements),
     };
-    allSummarized.push(got.summarize);
+    allSummarized.push(got);
 }
 exports.processNextUltimateSummary = processNextUltimateSummary;
 //# sourceMappingURL=process.js.map

package/benchmark/summarizer/summarizer.d.ts CHANGED Viewed

@@ -28,4 +28,5 @@ export declare class BenchmarkSummarizer extends Summarizer<UltimateSlicerStats,
     preparationPhase(): Promise<void>;
     summarizePhase(): Promise<UltimateSlicerStats>;
     private removeIfExists;
+    private summaryFile;
 }

package/benchmark/summarizer/summarizer.js CHANGED Viewed

@@ -13,31 +13,41 @@ const summarizer_1 = require("../../util/summarizer");
 const files_1 = require("../../util/files");
 const json_1 = require("../../util/json");
 const print_1 = require("../stats/print");
+const defaultmap_1 = require("../../util/defaultmap");
 class BenchmarkSummarizer extends summarizer_1.Summarizer {
     constructor(config) {
         super(config);
     }
     async preparationPhase() {
-        this.removeIfExists(`${this.config.intermediateOutputPath}.json`);
+        this.removeIfExists(this.summaryFile());
         this.removeIfExists(this.config.intermediateOutputPath);
         fs_1.default.mkdirSync(this.config.intermediateOutputPath);
-        const dirContent = fs_1.default.readdirSync(this.config.inputPath);
-        for (let i = 0; i < dirContent.length; i++) {
-            const filePath = path_1.default.join(this.config.inputPath, dirContent[i]);
-            const outputPath = path_1.default.join(this.config.intermediateOutputPath, dirContent[i]);
+        const filesToSummarize = fs_1.default.readdirSync(this.config.inputPath);
+        const outputPathsPerRun = new defaultmap_1.DefaultMap(() => []);
+        for (let i = 0; i < filesToSummarize.length; i++) {
+            const fileInputPath = path_1.default.join(this.config.inputPath, filesToSummarize[i]);
+            const outputDir = path_1.default.join(this.config.intermediateOutputPath, path_1.default.parse(filesToSummarize[i]).name);
+            fs_1.default.mkdirSync(outputDir);
+            const textOutputPath = path_1.default.join(outputDir, 'summary.log');
             // generate measurements for each run
-            await (0, files_1.readLineByLine)(filePath, (line, lineNumber) => (0, input_1.processRunMeasurement)(line, i, lineNumber, `${outputPath}.log`, outputPath));
-            // generate combined measurements for the file
-            (0, input_1.processSummarizedFileMeasurement)(filePath, outputPath, `${this.config.intermediateOutputPath}.json`);
+            await (0, files_1.readLineByLine)(fileInputPath, (line, lineNumber) => {
+                const runOutputPath = path_1.default.join(outputDir, `run-${lineNumber}.json`);
+                outputPathsPerRun.get(lineNumber).push(runOutputPath);
+                return (0, input_1.processRunMeasurement)(line, i, lineNumber, textOutputPath, runOutputPath);
+            });
+        }
+        // generate combined measurements for each file per run
+        for (const [run, paths] of outputPathsPerRun.entries()) {
+            (0, input_1.processSummarizedRunMeasurement)(run, paths, this.summaryFile());
         }
         this.log('Done summarizing');
     }
     // eslint-disable-next-line @typescript-eslint/require-await -- just to obey the structure
     async summarizePhase() {
-        this.log(`Summarizing all summaries from ${this.config.inputPath}...`);
+        this.log(`Summarizing all summaries from ${this.summaryFile()}...`);
         this.removeIfExists(this.config.outputPath);
         const summaries = [];
-        (0, files_1.readLineByLineSync)(`${this.config.intermediateOutputPath}.json`, (l) => (0, process_1.processNextUltimateSummary)(l, summaries));
+        (0, files_1.readLineByLineSync)(this.summaryFile(), (l) => (0, process_1.processNextUltimateSummary)(l, summaries));
         const ultimate = (0, process_1.summarizeAllUltimateStats)(summaries);
         this.log(`Writing ultimate summary to ${this.config.outputPath}`);
         fs_1.default.writeFileSync(this.config.outputPath, JSON.stringify(ultimate, json_1.jsonReplacer));
@@ -53,6 +63,9 @@ class BenchmarkSummarizer extends summarizer_1.Summarizer {
             fs_1.default.rmSync(path, { recursive: true });
         }
     }
+    summaryFile() {
+        return `${this.config.intermediateOutputPath}.json`;
+    }
 }
 exports.BenchmarkSummarizer = BenchmarkSummarizer;
 //# sourceMappingURL=summarizer.js.map

package/dataflow/environments/environment.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import type { DataflowGraph } from '../graph/graph';
 import type { ControlDependency } from '../info';
 export declare function makeReferenceMaybe(ref: IdentifierReference, graph: DataflowGraph, environments: REnvironmentInformation, includeDefs: boolean, defaultCd?: ControlDependency | undefined): IdentifierReference;
 export declare function makeAllMaybe(references: readonly IdentifierReference[] | undefined, graph: DataflowGraph, environments: REnvironmentInformation, includeDefs: boolean, defaultCd?: ControlDependency | undefined): IdentifierReference[];
+export type EnvironmentMemory = Map<Identifier, IdentifierDefinition[]>;
 export interface IEnvironment {
     /** unique and internally generated identifier -- will not be used for comparison but assists debugging for tracking identities */
     readonly id: string;
@@ -17,7 +18,7 @@ export interface IEnvironment {
     /**
    * Maps to exactly one definition of an identifier if the source is known, otherwise to a list of all possible definitions
    */
-    memory: Map<Identifier, IdentifierDefinition[]>;
+    memory: EnvironmentMemory;
 }
 export declare class Environment implements IEnvironment {
     readonly id: string;

package/dataflow/graph/edge.d.ts CHANGED Viewed

@@ -2,7 +2,6 @@
  * An edge consist of:
  * - the target node (i.e., the variable or processing node),
  * - a type (if it is read or used in the context), and
- * - an attribute (if this edge exists for every program execution or if it is only one possible execution path).
  */
 export interface DataflowGraphEdge {
     types: EdgeTypeBits;
@@ -40,8 +39,8 @@ export declare const enum EdgeTypeName {
     DefinedBy = "defined-by",
     Calls = "calls",
     Returns = "returns",
-    DefinesOnCall = "defined-by-on-call",
-    DefinedByOnCall = "defines-on-call",
+    DefinesOnCall = "defines-on-call",
+    DefinedByOnCall = "defined-by-on-call",
     Argument = "argument",
     SideEffectOnCall = "side-effect-on-call",
     NonStandardEvaluation = "non-standard-evaluation"

package/dataflow/graph/edge.js CHANGED Viewed

@@ -6,8 +6,8 @@ const edgeTypeToHumanReadableName = new Map([
     [2 /* EdgeType.DefinedBy */, "defined-by" /* EdgeTypeName.DefinedBy */],
     [4 /* EdgeType.Calls */, "calls" /* EdgeTypeName.Calls */],
     [8 /* EdgeType.Returns */, "returns" /* EdgeTypeName.Returns */],
-    [16 /* EdgeType.DefinesOnCall */, "defined-by-on-call" /* EdgeTypeName.DefinesOnCall */],
-    [32 /* EdgeType.DefinedByOnCall */, "defines-on-call" /* EdgeTypeName.DefinedByOnCall */],
+    [16 /* EdgeType.DefinesOnCall */, "defines-on-call" /* EdgeTypeName.DefinesOnCall */],
+    [32 /* EdgeType.DefinedByOnCall */, "defined-by-on-call" /* EdgeTypeName.DefinedByOnCall */],
     [64 /* EdgeType.Argument */, "argument" /* EdgeTypeName.Argument */],
     [128 /* EdgeType.SideEffectOnCall */, "side-effect-on-call" /* EdgeTypeName.SideEffectOnCall */],
     [256 /* EdgeType.NonStandardEvaluation */, "non-standard-evaluation" /* EdgeTypeName.NonStandardEvaluation */]

package/dataflow/graph/graph.js CHANGED Viewed

@@ -7,6 +7,7 @@ const arrays_1 = require("../../util/arrays");
 const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
 const environment_1 = require("../environments/environment");
 const clone_1 = require("../environments/clone");
+const built_in_1 = require("../environments/built-in");
 function isPositionalArgument(arg) {
     return arg !== r_function_call_1.EmptyArgument && arg.name === undefined;
 }
@@ -159,7 +160,6 @@ class DataflowGraph {
         const environment = vertex.environment === undefined ? fallback : (0, clone_1.cloneEnvironmentInformation)(vertex.environment);
         this.vertexInformation.set(vertex.id, {
             ...vertex,
-            when: vertex.controlDependencies ?? 'always',
             environment
         });
         if (asRoot) {
@@ -171,11 +171,12 @@ class DataflowGraph {
    * Will insert a new edge into the graph,
    * if the direction of the edge is of no importance (`same-read-read` or `same-def-def`), source
    * and target will be sorted so that `from` has the lower, and `to` the higher id (default ordering).
+    * Please note, that this will never make edges to {@link BuiltIn} as they are not part of the graph.
    */
     addEdge(from, to, edgeInfo) {
         const { fromId, toId } = extractEdgeIds(from, to);
         const { type, ...rest } = edgeInfo;
-        if (fromId === toId) {
+        if (fromId === toId || toId === built_in_1.BuiltIn) {
             return this;
         }
         /* we now that we pass all required arguments */

package/dataflow/internal/process/process-value.js CHANGED Viewed

@@ -11,7 +11,6 @@ function processValue(value, data) {
         graph: new graph_1.DataflowGraph(data.completeAst.idMap).addVertex({
             tag: "value" /* VertexType.Value */,
             id: value.info.id,
-            value: value.lexeme,
             controlDependencies: data.controlDependencies
         }),
         exitPoints: [{ nodeId: value.info.id, type: 0 /* ExitPointType.Default */, controlDependencies: data.controlDependencies }],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@eagleoutice/flowr",
-  "version": "2.0.2",
+  "version": "2.0.4",
   "description": "Static Dataflow Analyzer and Program Slicer for the R Programming Language",
   "types": "dist/src/index.d.ts",
   "repository": {
@@ -228,6 +228,7 @@
     "n-readlines": "^1.0.1",
     "n3": "^1.17.2",
     "object-hash": "^3.0.0",
+    "object-sizeof": "^2.6.4",
     "rotating-file-stream": "^3.1.1",
     "semver": "^7.5.4",
     "tar": "^7.1.0",

package/util/mermaid/dfg.d.ts CHANGED Viewed

@@ -7,7 +7,6 @@ type Mark = MarkVertex | MarkEdge;
 interface MermaidGraph {
     nodeLines: string[];
     edgeLines: string[];
-    hasBuiltIn: boolean;
     includeEnvironments: boolean;
     mark: ReadonlySet<Mark> | undefined;
     /** in the form of from-\>to because I am lazy, see {@link encodeEdge} */

package/util/mermaid/dfg.js CHANGED Viewed

@@ -7,7 +7,6 @@ const graph_1 = require("../../dataflow/graph/graph");
 const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
 const edge_1 = require("../../dataflow/graph/edge");
 const environment_1 = require("../../dataflow/environments/environment");
-const built_in_1 = require("../../dataflow/environments/built-in");
 function formatRange(range) {
     if (range === undefined) {
         return '??-??';
@@ -151,9 +150,6 @@ function vertexToMermaid(info, mermaid, id, idPrefix, mark) {
             if (edgeTypes.has('CD-True')) {
                 mermaid.edgeLines.push(`    linkStyle ${mermaid.presentEdges.size - 1} stroke:gray,color:gray;`);
             }
-            if (target === built_in_1.BuiltIn) {
-                mermaid.hasBuiltIn = true;
-            }
         }
     }
     if (info.tag === 'function-definition') {
@@ -162,15 +158,12 @@ function vertexToMermaid(info, mermaid, id, idPrefix, mark) {
 }
 // make the passing of root ids more performant again
 function graphToMermaidGraph(rootIds, { graph, prefix = 'flowchart TD', idPrefix = '', includeEnvironments = true, mark, rootGraph, presentEdges = new Set() }) {
-    const mermaid = { nodeLines: prefix === null ? [] : [prefix], edgeLines: [], presentEdges, hasBuiltIn: false, mark, rootGraph: rootGraph ?? graph, includeEnvironments };
+    const mermaid = { nodeLines: prefix === null ? [] : [prefix], edgeLines: [], presentEdges, mark, rootGraph: rootGraph ?? graph, includeEnvironments };
     for (const [id, info] of graph.vertices(true)) {
         if (rootIds.has(id)) {
             vertexToMermaid(info, mermaid, id, idPrefix, mark);
         }
     }
-    if (mermaid.hasBuiltIn) {
-        mermaid.nodeLines.push(`    ${idPrefix}${built_in_1.BuiltIn}["Built-in"]`);
-    }
     return mermaid;
 }
 function graphToMermaid(config) {

package/util/version.js CHANGED Viewed

@@ -3,7 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.flowrVersion = void 0;
 const semver_1 = require("semver");
 // this is automatically replaced with the current version by release-it
-const version = '2.0.2';
+const version = '2.0.4';
 function flowrVersion() {
     return new semver_1.SemVer(version);
 }