npm - @eagleoutice/flowr - Versions diffs - 2.0.1 → 2.0.3 - Mend

@eagleoutice/flowr 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/benchmark/slicer.d.ts +1 -0
package/benchmark/slicer.js +69 -8
package/benchmark/stats/print.d.ts +1 -0
package/benchmark/stats/print.js +94 -31
package/benchmark/stats/size-of.d.ts +3 -0
package/benchmark/stats/size-of.js +68 -0
package/benchmark/stats/stats.d.ts +23 -0
package/benchmark/summarizer/data.d.ts +24 -1
package/benchmark/summarizer/first-phase/input.d.ts +2 -2
package/benchmark/summarizer/first-phase/input.js +21 -21
package/benchmark/summarizer/first-phase/process.d.ts +4 -2
package/benchmark/summarizer/first-phase/process.js +120 -33
package/benchmark/summarizer/second-phase/graph.js +7 -0
package/benchmark/summarizer/second-phase/process.js +65 -27
package/benchmark/summarizer/summarizer.d.ts +1 -0
package/benchmark/summarizer/summarizer.js +23 -10
package/cli/repl/commands/commands.js +19 -1
package/cli/slicer-app.js +1 -1
package/dataflow/environments/append.js +1 -2
package/dataflow/environments/built-in.js +2 -1
package/dataflow/environments/clone.js +1 -1
package/dataflow/environments/diff.d.ts +1 -1
package/dataflow/environments/diff.js +16 -18
package/dataflow/environments/environment.d.ts +6 -8
package/dataflow/environments/environment.js +5 -8
package/dataflow/environments/identifier.d.ts +2 -1
package/dataflow/environments/overwrite.js +1 -2
package/dataflow/environments/scoping.js +1 -1
package/dataflow/graph/diff.js +11 -6
package/dataflow/graph/edge.d.ts +2 -3
package/dataflow/graph/edge.js +2 -2
package/dataflow/graph/graph.d.ts +6 -2
package/dataflow/graph/graph.js +16 -9
package/dataflow/graph/vertex.d.ts +2 -1
package/dataflow/info.d.ts +10 -1
package/dataflow/info.js +54 -2
package/dataflow/internal/linker.d.ts +1 -1
package/dataflow/internal/linker.js +1 -2
package/dataflow/internal/process/functions/call/built-in/built-in-assignment.js +5 -5
package/dataflow/internal/process/functions/call/built-in/built-in-for-loop.js +1 -1
package/dataflow/internal/process/functions/call/built-in/built-in-function-definition.js +21 -25
package/dataflow/internal/process/functions/call/built-in/built-in-get.js +6 -1
package/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.js +10 -8
package/dataflow/internal/process/functions/call/built-in/built-in-logical-bin-op.d.ts +1 -0
package/dataflow/internal/process/functions/call/built-in/built-in-logical-bin-op.js +1 -2
package/dataflow/internal/process/functions/call/built-in/built-in-while-loop.js +1 -1
package/dataflow/internal/process/functions/call/default-call-handling.js +1 -1
package/dataflow/internal/process/functions/call/unnamed-call-handling.js +1 -1
package/dataflow/internal/process/process-value.js +0 -1
package/dataflow/processor.d.ts +2 -3
package/package.json +5 -2
package/r-bridge/data/data.d.ts +1 -1
package/r-bridge/data/data.js +1 -1
package/r-bridge/lang-4.x/ast/model/nodes/r-function-call.d.ts +2 -2
package/r-bridge/lang-4.x/ast/model/operators.js +1 -1
package/r-bridge/lang-4.x/ast/model/processing/decorate.js +1 -1
package/r-bridge/lang-4.x/ast/model/processing/stateful-fold.js +1 -1
package/r-bridge/lang-4.x/ast/model/processing/visitor.js +2 -2
package/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-call.js +2 -2
package/r-bridge/lang-4.x/ast/parser/xml/internal/operators/normalize-binary.js +1 -1
package/r-bridge/retriever.d.ts +1 -1
package/r-bridge/retriever.js +3 -2
package/r-bridge/shell.js +2 -1
package/reconstruct/reconstruct.d.ts +3 -3
package/reconstruct/reconstruct.js +40 -41
package/slicing/criterion/filters/all-variables.js +1 -1
package/slicing/static/static-slicer.js +2 -2
package/statistics/features/common-syntax-probability.js +1 -1
package/statistics/features/supported/control-flow/control-flow.js +1 -1
package/statistics/features/supported/defined-functions/defined-functions.js +1 -1
package/statistics/features/supported/loops/loops.js +1 -1
package/statistics/features/supported/used-functions/used-functions.js +1 -1
package/util/assert.d.ts +1 -1
package/util/mermaid/ast.js +4 -0
package/util/mermaid/dfg.d.ts +0 -1
package/util/mermaid/dfg.js +16 -13
package/util/mermaid/mermaid.js +21 -1
package/util/version.js +1 -1

package/benchmark/slicer.d.ts CHANGED Viewed

@@ -51,6 +51,7 @@ export declare class BenchmarkSlicer {
     /** Measures all data recorded *once* per slicer (complete setup up to the dataflow graph creation) */
     private readonly commonMeasurements;
     private readonly perSliceMeasurements;
+    private readonly deltas;
     private readonly shell;
     private stats;
     private loadedXml;

package/benchmark/slicer.js CHANGED Viewed

@@ -17,8 +17,9 @@ const strings_1 = require("../util/strings");
 const shell_1 = require("../r-bridge/shell");
 const default_pipelines_1 = require("../core/steps/pipeline/default-pipelines");
 const retriever_1 = require("../r-bridge/retriever");
-const collect_1 = require("../r-bridge/lang-4.x/ast/model/collect");
 const collect_all_1 = require("../slicing/criterion/collect-all");
+const visitor_1 = require("../r-bridge/lang-4.x/ast/model/processing/visitor");
+const size_of_1 = require("./stats/size-of");
 exports.benchmarkLogger = log_1.log.getSubLogger({ name: 'benchmark' });
 /**
  * A slicer that can be used to slice exactly one file (multiple times).
@@ -33,6 +34,7 @@ class BenchmarkSlicer {
     /** Measures all data recorded *once* per slicer (complete setup up to the dataflow graph creation) */
     commonMeasurements = new stopwatch_1.Measurements();
     perSliceMeasurements = new Map();
+    deltas = new Map();
     shell;
     stats;
     loadedXml;
@@ -67,6 +69,7 @@ class BenchmarkSlicer {
         const loadedContent = request.request === 'text' ? request.content : fs_1.default.readFileSync(request.content, 'utf-8');
         // retrieve number of R tokens - flowr_parsed should still contain the last parsed code
         const numberOfRTokens = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(this.shell);
+        const numberOfRTokensNoComments = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(this.shell, true);
         (0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined after initialization');
         (0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined after initialization');
         // collect dataflow graph size
@@ -84,23 +87,54 @@ class BenchmarkSlicer {
                 numberOfDefinitions++;
             }
         }
+        let nodes = 0;
+        let nodesNoComments = 0;
+        let commentChars = 0;
+        let commentCharsNoWhitespace = 0;
+        (0, visitor_1.visitAst)(this.normalizedAst.ast, t => {
+            nodes++;
+            const comments = t.info.additionalTokens?.filter(t => t.type === "RComment" /* RType.Comment */);
+            if (comments && comments.length > 0) {
+                const content = comments.map(c => c.lexeme ?? '').join('');
+                commentChars += content.length;
+                commentCharsNoWhitespace += (0, strings_1.withoutWhitespace)(content).length;
+            }
+            else {
+                nodesNoComments++;
+            }
+            return false;
+        });
+        const split = loadedContent.split('\n');
+        const nonWhitespace = (0, strings_1.withoutWhitespace)(loadedContent).length;
         this.stats = {
-            commonMeasurements: new Map(),
             perSliceMeasurements: this.perSliceMeasurements,
+            memory: this.deltas,
             request,
             input: {
-                numberOfLines: loadedContent.split('\n').length,
+                numberOfLines: split.length,
+                numberOfNonEmptyLines: split.filter(l => l.trim().length > 0).length,
                 numberOfCharacters: loadedContent.length,
-                numberOfNonWhitespaceCharacters: (0, strings_1.withoutWhitespace)(loadedContent).length,
+                numberOfCharactersNoComments: loadedContent.length - commentChars,
+                numberOfNonWhitespaceCharacters: nonWhitespace,
+                numberOfNonWhitespaceCharactersNoComments: nonWhitespace - commentCharsNoWhitespace,
                 numberOfRTokens: numberOfRTokens,
-                numberOfNormalizedTokens: [...(0, collect_1.collectAllIds)(this.normalizedAst.ast)].length
+                numberOfRTokensNoComments: numberOfRTokensNoComments,
+                numberOfNormalizedTokens: nodes,
+                numberOfNormalizedTokensNoComments: nodesNoComments
             },
             dataflow: {
                 numberOfNodes: [...this.dataflow.graph.vertices(true)].length,
                 numberOfEdges: numberOfEdges,
                 numberOfCalls: numberOfCalls,
-                numberOfFunctionDefinitions: numberOfDefinitions
-            }
+                numberOfFunctionDefinitions: numberOfDefinitions,
+                sizeOfObject: (0, size_of_1.getSizeOfDfGraph)(this.dataflow.graph)
+            },
+            // these are all properly initialized in finish()
+            commonMeasurements: new Map(),
+            retrieveTimePerToken: { raw: 0, normalized: 0 },
+            normalizeTimePerToken: { raw: 0, normalized: 0 },
+            dataflowTimePerToken: { raw: 0, normalized: 0 },
+            totalCommonTimePerToken: { raw: 0, normalized: 0 }
         };
     }
     /**
@@ -122,7 +156,7 @@ class BenchmarkSlicer {
             timesHitThreshold: 0,
             reconstructedCode: {
                 code: '',
-                autoSelected: 0
+                linesWithAutoSelected: 0
             }
         };
         this.perSliceMeasurements.set(slicingCriteria, stats);
@@ -152,7 +186,15 @@ class BenchmarkSlicer {
     }
     /** Bridging the gap between the new internal and the old names for the benchmarking */
     async measureCommonStep(expectedStep, keyToMeasure) {
+        const memoryInit = process.memoryUsage();
         const { result } = await this.commonMeasurements.measureAsync(keyToMeasure, () => this.pipeline.nextStep(expectedStep));
+        const memoryEnd = process.memoryUsage();
+        this.deltas.set(keyToMeasure, {
+            heap: memoryEnd.heapUsed - memoryInit.heapUsed,
+            rss: memoryEnd.rss - memoryInit.rss,
+            external: memoryEnd.external - memoryInit.external,
+            buffs: memoryEnd.arrayBuffers - memoryInit.arrayBuffers
+        });
         return result;
     }
     async measureSliceStep(expectedStep, measure, keyToMeasure) {
@@ -196,6 +238,25 @@ class BenchmarkSlicer {
             this.finished = true;
         }
         this.stats.commonMeasurements = this.commonMeasurements.get();
+        const retrieveTime = Number(this.stats.commonMeasurements.get('retrieve AST from R code'));
+        const normalizeTime = Number(this.stats.commonMeasurements.get('normalize R AST'));
+        const dataflowTime = Number(this.stats.commonMeasurements.get('produce dataflow information'));
+        this.stats.retrieveTimePerToken = {
+            raw: retrieveTime / this.stats.input.numberOfRTokens,
+            normalized: retrieveTime / this.stats.input.numberOfNormalizedTokens
+        };
+        this.stats.normalizeTimePerToken = {
+            raw: normalizeTime / this.stats.input.numberOfRTokens,
+            normalized: normalizeTime / this.stats.input.numberOfNormalizedTokens
+        };
+        this.stats.dataflowTimePerToken = {
+            raw: dataflowTime / this.stats.input.numberOfRTokens,
+            normalized: dataflowTime / this.stats.input.numberOfNormalizedTokens
+        };
+        this.stats.totalCommonTimePerToken = {
+            raw: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfRTokens,
+            normalized: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfNormalizedTokens
+        };
         return {
             stats: this.stats,
             parse: this.loadedXml,

package/benchmark/stats/print.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { SummarizedSlicerStats, UltimateSlicerStats } from '../summarizer/data';
+export declare function formatNanoseconds(nanoseconds: bigint | number): string;
 /**
  * Converts the given stats to a human-readable string.
  * You may have to {@link summarizeSlicerStats | summarize} the stats first.

package/benchmark/stats/print.js CHANGED Viewed

@@ -1,26 +1,29 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.ultimateStats2String = exports.stats2string = void 0;
+exports.ultimateStats2String = exports.stats2string = exports.formatNanoseconds = void 0;
 const assert_1 = require("../../util/assert");
 const padSize = 15;
 function pad(string) {
     return String(string).padStart(padSize, ' ');
 }
-function divWithRest(dividend, divisor) {
-    return [dividend / divisor, dividend % divisor];
-}
 function formatNanoseconds(nanoseconds) {
     if (nanoseconds < 0) {
         return '??';
     }
-    const [seconds, rest] = divWithRest(typeof nanoseconds === 'number' ? BigInt(Math.round(nanoseconds)) : nanoseconds, BigInt(1e9));
-    const [milliseconds, remainingNanoseconds] = divWithRest(rest, BigInt(1e6));
-    const secondsStr = seconds > 0 ? `${String(seconds).padStart(2, '0')}.` : '';
-    const millisecondsStr = seconds > 0 ? `${String(milliseconds).padStart(3, '0')}:` : `${String(milliseconds)}:`;
-    const nanoStr = String(remainingNanoseconds).padEnd(3, '0').substring(0, 3);
-    const unit = seconds === 0n ? 'ms' : ' s'; /* space for padding */
-    return pad(`${secondsStr}${millisecondsStr}${nanoStr}${unit}`);
+    const wholeNanos = typeof nanoseconds === 'bigint' ? nanoseconds : BigInt(Math.round(nanoseconds));
+    const nanos = wholeNanos % BigInt(1e+6);
+    const wholeMillis = wholeNanos / BigInt(1e+6);
+    const millis = wholeMillis % BigInt(1000);
+    const wholeSeconds = wholeMillis / BigInt(1000);
+    if (wholeSeconds > 0) {
+        const nanoString = nanos > 0 ? `:${nanos}` : '';
+        return pad(`${wholeSeconds}.${String(millis).padStart(3, '0')}${nanoString} s`);
+    }
+    else {
+        return pad(`${millis}:${String(nanos).padStart(6, '0')}ms`);
+    }
 }
+exports.formatNanoseconds = formatNanoseconds;
 function print(measurements, key) {
     const time = measurements.get(key);
     (0, assert_1.guard)(time !== undefined, `Measurement for ${JSON.stringify(key)} not found`);
@@ -40,6 +43,7 @@ function asPercentage(num) {
     if (isNaN(num)) {
         return '??%';
     }
+    (0, assert_1.guard)(num >= 0 && num <= 1, `Percentage ${num} should be between 0 and 1`);
     return pad(`${roundTo(num * 100, 3)}%`);
 }
 function asFloat(num) {
@@ -60,6 +64,16 @@ function printCountSummarizedMeasurements(stats) {
     const range = `${stats.min} - ${stats.max}`.padStart(padSize, ' ');
     return `${range} (median: ${stats.median}, mean: ${stats.mean}, std: ${stats.std})`;
 }
+const units = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'];
+// based on https://stackoverflow.com/a/39906526
+function convertNumberToNiceBytes(x) {
+    let n = Math.abs(x);
+    let l = 0;
+    while (n >= 1024 && ++l) {
+        n = n / 1024;
+    }
+    return pad((x < 0 ? '-' : '') + n.toFixed(n < 10 && l > 0 ? 1 : 0) + ' ' + units[l]);
+}
 /**
  * Converts the given stats to a human-readable string.
  * You may have to {@link summarizeSlicerStats | summarize} the stats first.
@@ -69,23 +83,40 @@ function stats2string(stats) {
 Request: ${JSON.stringify(stats.request)}
 Shell init time:              ${print(stats.commonMeasurements, 'initialize R session')}
 AST retrieval:                ${print(stats.commonMeasurements, 'retrieve AST from R code')}
+AST retrieval per token:      ${formatNanoseconds(stats.retrieveTimePerToken.normalized)}
+AST retrieval per R token:    ${formatNanoseconds(stats.retrieveTimePerToken.raw)}
 AST normalization:            ${print(stats.commonMeasurements, 'normalize R AST')}
+AST normalization per token:  ${formatNanoseconds(stats.normalizeTimePerToken.normalized)}
+AST normalization per R token:${formatNanoseconds(stats.normalizeTimePerToken.raw)}
 Dataflow creation:            ${print(stats.commonMeasurements, 'produce dataflow information')}
+Dataflow creation per token:  ${formatNanoseconds(stats.dataflowTimePerToken.normalized)}
+Dataflow creation per R token:${formatNanoseconds(stats.dataflowTimePerToken.raw)}
+Total common time per token:  ${formatNanoseconds(stats.totalCommonTimePerToken.normalized)}
+Total common time per R token:${formatNanoseconds(stats.totalCommonTimePerToken.raw)}
 Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.perSliceMeasurements.numberOfSlices !== 1 ? 's' : ''}:`;
     if (stats.perSliceMeasurements.numberOfSlices > 0) {
         result += `
-  Total:                      ${printSummarizedMeasurements(stats.perSliceMeasurements, 'total')}
-  Slice creation:             ${printSummarizedMeasurements(stats.perSliceMeasurements, 'static slicing')}
-  Reconstruction:             ${printSummarizedMeasurements(stats.perSliceMeasurements, 'reconstruct code')}
-  Used Slice Criteria Sizes:  ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceCriteriaSizes)}
+  Total:                              ${printSummarizedMeasurements(stats.perSliceMeasurements, 'total')}
+  Slice creation:                     ${printSummarizedMeasurements(stats.perSliceMeasurements, 'static slicing')}
+  Slice creation per token in slice:  ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.sliceTimePerToken.normalized)}
+  Slice creation per R token in slice:${formatSummarizedTimeMeasure(stats.perSliceMeasurements.sliceTimePerToken.raw)}
+  Reconstruction:                     ${printSummarizedMeasurements(stats.perSliceMeasurements, 'reconstruct code')}
+  Reconstruction per token in slice:  ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.reconstructTimePerToken.normalized)}
+  Reconstruction per R token in slice:${formatSummarizedTimeMeasure(stats.perSliceMeasurements.reconstructTimePerToken.raw)}
+  Total per token in slice:           ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.normalized)}
+  Total per R token in slice:         ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.raw)}
+  Used Slice Criteria Sizes:          ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceCriteriaSizes)}
   Result Slice Sizes:
     Number of lines:                     ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.lines)}
+    Number of non-empty lines:           ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.nonEmptyLines)}
     Number of characters:                ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.characters)}
     Number of non whitespace characters: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.nonWhitespaceCharacters)}
-    Number of auto selected:             ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.autoSelected)}
+    Number of auto selected lines:       ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.linesWithAutoSelected)}
     Number of R tokens:                  ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.tokens)}
+    Number of R tokens (w/o comments):   ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.tokensNoComments)}
     Normalized R tokens:                 ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokens)}
+    Normalized R tokens (w/o comments):  ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokensNoComments)}
     Number of dataflow nodes:            ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.dataflowNodes)}
 `;
     }
@@ -95,16 +126,22 @@ Total:                        ${print(stats.commonMeasurements, 'total')}
 Input:
   Number of lines:                     ${pad(stats.input.numberOfLines)}
+  Number of non empty lines:           ${pad(stats.input.numberOfNonEmptyLines)}
   Number of characters:                ${pad(stats.input.numberOfCharacters)}
+  Number of characters (w/o comments): ${pad(stats.input.numberOfCharactersNoComments)}
   Number of non whitespace characters: ${pad(stats.input.numberOfNonWhitespaceCharacters)}
+  Number of n. w. c. (w/o comments):   ${pad(stats.input.numberOfNonWhitespaceCharactersNoComments)}
   Number of tokens:                    ${pad(stats.input.numberOfRTokens)}
+  Number of tokens (w/o comments):     ${pad(stats.input.numberOfRTokensNoComments)}
   Normalized R tokens:                 ${pad(stats.input.numberOfNormalizedTokens)}
+  Normalized R tokens (w/o comments):  ${pad(stats.input.numberOfNormalizedTokensNoComments)}
 Dataflow:
   Number of nodes:            ${pad(stats.dataflow.numberOfNodes)}
   Number of edges:            ${pad(stats.dataflow.numberOfEdges)}
   Number of calls:            ${pad(stats.dataflow.numberOfCalls)}
-  Number of function defs:    ${pad(stats.dataflow.numberOfFunctionDefinitions)}`;
+  Number of function defs:    ${pad(stats.dataflow.numberOfFunctionDefinitions)}
+  Size of graph:              ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`;
 }
 exports.stats2string = stats2string;
 function ultimateStats2String(stats) {
@@ -113,39 +150,65 @@ function ultimateStats2String(stats) {
 Summarized: ${stats.totalRequests} requests and ${stats.totalSlices} slices
 Shell init time:              ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('initialize R session'))}
 AST retrieval:                ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('retrieve AST from R code'))}
+AST retrieval per token:      ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.normalized)}
+AST retrieval per R token:    ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.raw)}
 AST normalization:            ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('normalize R AST'))}
+AST normalization per token:  ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.normalized)}
+AST normalization per R token:${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.raw)}
 Dataflow creation:            ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('produce dataflow information'))}
+Dataflow creation per token:  ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.normalized)}
+Dataflow creation per R token:${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.raw)}
+Total common time per token:  ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.normalized)}
+Total common time per R token:${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.raw)}
 Slice summary for:
-  Total:                      ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('total'))}
-  Slice creation:             ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('static slicing'))}
-  Reconstruction:             ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('reconstruct code'))}
-  Failed to Re-Parse:         ${pad(stats.failedToRepParse)}/${stats.totalSlices}
-  Times hit Threshold:        ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
-  Reductions (reduced by x%):
-    Number of lines:                     ${formatSummarizedMeasure(stats.reduction.numberOfLines, asPercentage)}
-    Number of lines no auto:             ${formatSummarizedMeasure(stats.reduction.numberOfLinesNoAutoSelection, asPercentage)}
-    Number of characters:                ${formatSummarizedMeasure(stats.reduction.numberOfCharacters, asPercentage)}
-    Number of non whitespace characters: ${formatSummarizedMeasure(stats.reduction.numberOfNonWhitespaceCharacters, asPercentage)}
-    Number of R tokens:                  ${formatSummarizedMeasure(stats.reduction.numberOfRTokens, asPercentage)}
-    Normalized R tokens:                 ${formatSummarizedMeasure(stats.reduction.numberOfNormalizedTokens, asPercentage)}
-    Number of dataflow nodes:            ${formatSummarizedMeasure(stats.reduction.numberOfDataflowNodes, asPercentage)}
+  Total:                              ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('total'))}
+  Slice creation:                     ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('static slicing'))}
+  Slice creation per token in slice:  ${formatSummarizedTimeMeasure(stats.sliceTimePerToken.normalized)}
+  Slice creation per R token in slice:${formatSummarizedTimeMeasure(stats.sliceTimePerToken.raw)}
+  Reconstruction:                     ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('reconstruct code'))}
+  Reconstruction per token in slice:  ${formatSummarizedTimeMeasure(stats.reconstructTimePerToken.normalized)}
+  Reconstruction per R token in slice:${formatSummarizedTimeMeasure(stats.reconstructTimePerToken.raw)}
+  Total per token in slice:           ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.normalized)}
+  Total per R token in slice:         ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.raw)}
+  Failed to Re-Parse:                 ${pad(stats.failedToRepParse)}/${stats.totalSlices}
+  Times hit Threshold:                ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
+${reduction2String('Reductions', stats.reduction)}
+${reduction2String('Reductions without comments and empty lines', stats.reductionNoFluff)}
 Shell close:                  ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('close R session'))}
 Total:                        ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('total'))}
 Input:
   Number of lines:                     ${formatSummarizedMeasure(stats.input.numberOfLines)}
+  Number of non empty lines:           ${formatSummarizedMeasure(stats.input.numberOfNonEmptyLines)}
   Number of characters:                ${formatSummarizedMeasure(stats.input.numberOfCharacters)}
+  Number of characters (w/o comments): ${formatSummarizedMeasure(stats.input.numberOfCharactersNoComments)}
   Number of non whitespace characters: ${formatSummarizedMeasure(stats.input.numberOfNonWhitespaceCharacters)}
+  Number of n. w. c. (w/o comments):   ${formatSummarizedMeasure(stats.input.numberOfNonWhitespaceCharactersNoComments)}
   Number of tokens:                    ${formatSummarizedMeasure(stats.input.numberOfRTokens)}
+  Number of tokens (w/o comments):     ${formatSummarizedMeasure(stats.input.numberOfRTokensNoComments)}
   Normalized R tokens:                 ${formatSummarizedMeasure(stats.input.numberOfNormalizedTokens)}
+  Normalized R tokens (w/o comments):  ${formatSummarizedMeasure(stats.input.numberOfNormalizedTokensNoComments)}
 Dataflow:
   Number of nodes:            ${formatSummarizedMeasure(stats.dataflow.numberOfNodes)}
   Number of edges:            ${formatSummarizedMeasure(stats.dataflow.numberOfEdges)}
   Number of calls:            ${formatSummarizedMeasure(stats.dataflow.numberOfCalls)}
-  Number of function defs:    ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}`;
+  Number of function defs:    ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}
+  Size of graph:              ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
+`;
 }
 exports.ultimateStats2String = ultimateStats2String;
+function reduction2String(title, reduction) {
+    return `
+  ${title} (reduced by x%):
+    Number of lines:                     ${formatSummarizedMeasure(reduction.numberOfLines, asPercentage)}
+    Number of lines no auto:             ${formatSummarizedMeasure(reduction.numberOfLinesNoAutoSelection, asPercentage)}
+    Number of characters:                ${formatSummarizedMeasure(reduction.numberOfCharacters, asPercentage)}
+    Number of non whitespace characters: ${formatSummarizedMeasure(reduction.numberOfNonWhitespaceCharacters, asPercentage)}
+    Number of R tokens:                  ${formatSummarizedMeasure(reduction.numberOfRTokens, asPercentage)}
+    Normalized R tokens:                 ${formatSummarizedMeasure(reduction.numberOfNormalizedTokens, asPercentage)}
+    Number of dataflow nodes:            ${formatSummarizedMeasure(reduction.numberOfDataflowNodes, asPercentage)}`;
+}
 //# sourceMappingURL=print.js.map

package/benchmark/stats/size-of.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { DataflowGraph } from '../../dataflow/graph/graph';
+/** Returns the size of the given df graph in bytes (without sharing in-memory) */
+export declare function getSizeOfDfGraph(df: DataflowGraph): number;

package/benchmark/stats/size-of.js ADDED Viewed

@@ -0,0 +1,68 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.getSizeOfDfGraph = void 0;
+const environment_1 = require("../../dataflow/environments/environment");
+const object_sizeof_1 = __importDefault(require("object-sizeof"));
+/* we have to kill all processors linked in the default environment as they cannot be serialized and they are shared anyway */
+function killBuiltInEnv(env) {
+    if (env === undefined) {
+        return undefined;
+    }
+    else if (env.id === environment_1.BuiltInEnvironment.id) {
+        /* in this case, the reference would be shared for sure */
+        return {
+            id: env.id,
+            parent: killBuiltInEnv(env.parent),
+            memory: new Map()
+        };
+    }
+    const memory = new Map();
+    for (const [k, v] of env.memory) {
+        memory.set(k, v.filter(v => !v.kind.startsWith('built-in') && !('processor' in v)));
+    }
+    return {
+        id: env.id,
+        parent: killBuiltInEnv(env.parent),
+        memory
+    };
+}
+/** Returns the size of the given df graph in bytes (without sharing in-memory) */
+function getSizeOfDfGraph(df) {
+    const verts = [];
+    for (const [, v] of df.vertices(true)) {
+        let vertex = v;
+        if (vertex.environment) {
+            vertex = {
+                ...vertex,
+                environment: {
+                    ...vertex.environment,
+                    current: killBuiltInEnv(v.environment.current)
+                }
+            };
+        }
+        if (vertex.tag === "function-definition" /* VertexType.FunctionDefinition */) {
+            vertex = {
+                ...vertex,
+                subflow: {
+                    ...vertex.subflow,
+                    environment: {
+                        ...vertex.subflow.environment,
+                        current: killBuiltInEnv(vertex.subflow.environment.current)
+                    }
+                }
+            };
+        }
+        vertex = {
+            ...vertex,
+            /* shared anyway by using constants */
+            tag: 0
+        };
+        verts.push(vertex);
+    }
+    return (0, object_sizeof_1.default)([...verts, ...df.edges()]);
+}
+exports.getSizeOfDfGraph = getSizeOfDfGraph;
+//# sourceMappingURL=size-of.js.map

package/benchmark/stats/stats.d.ts CHANGED Viewed

@@ -2,6 +2,8 @@ import type { SingleSlicingCriterion, SlicingCriteria } from '../../slicing/crit
 import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
 import type { ReconstructionResult } from '../../reconstruct/reconstruct';
 import type { RParseRequestFromFile, RParseRequestFromText } from '../../r-bridge/retriever';
+import type { TimePerToken } from '../summarizer/data';
+import type { MergeableRecord } from '../../util/objects';
 export declare const CommonSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total"];
 export type CommonSlicerMeasurements = typeof CommonSlicerMeasurements[number];
 export declare const PerSliceMeasurements: readonly ["static slicing", "reconstruct code", "total"];
@@ -19,16 +21,32 @@ export interface PerSliceStats {
 }
 export interface SlicerStatsInput<T = number> {
     numberOfLines: T;
+    numberOfNonEmptyLines: T;
     numberOfCharacters: T;
+    numberOfCharactersNoComments: T;
     numberOfNonWhitespaceCharacters: T;
+    numberOfNonWhitespaceCharactersNoComments: T;
     numberOfRTokens: T;
+    numberOfRTokensNoComments: T;
     numberOfNormalizedTokens: T;
+    numberOfNormalizedTokensNoComments: T;
 }
 export interface SlicerStatsDataflow<T = number> {
     numberOfNodes: T;
     numberOfEdges: T;
     numberOfCalls: T;
     numberOfFunctionDefinitions: T;
+    sizeOfObject: T;
+}
+/**
+ * Please note, that these measurement can be negative as there is no guarantee that the memory usage will increase
+ * due to, e.g., garbage collection.
+*/
+export interface BenchmarkMemoryMeasurement<T = number> extends MergeableRecord {
+    heap: T;
+    rss: T;
+    external: T;
+    buffs: T;
 }
 /**
  * The statistics that are collected by the {@link BenchmarkSlicer} and used for benchmarking.
@@ -36,7 +54,12 @@ export interface SlicerStatsDataflow<T = number> {
 export interface SlicerStats {
     commonMeasurements: Map<CommonSlicerMeasurements, ElapsedTime>;
     perSliceMeasurements: Map<SlicingCriteria, PerSliceStats>;
+    memory: Map<CommonSlicerMeasurements, BenchmarkMemoryMeasurement>;
     request: RParseRequestFromFile | RParseRequestFromText;
     input: SlicerStatsInput;
     dataflow: SlicerStatsDataflow;
+    retrieveTimePerToken: TimePerToken<number>;
+    normalizeTimePerToken: TimePerToken<number>;
+    dataflowTimePerToken: TimePerToken<number>;
+    totalCommonTimePerToken: TimePerToken<number>;
 }

package/benchmark/summarizer/data.d.ts CHANGED Viewed

@@ -2,13 +2,18 @@ import type { SummarizedMeasurement } from '../../util/summarizer';
 import type { CommonSlicerMeasurements, PerSliceMeasurements, SlicerStats, SlicerStatsDataflow, SlicerStatsInput } from '../stats/stats';
 export interface SliceSizeCollection {
     lines: number[];
+    nonEmptyLines: number[];
     characters: number[];
+    charactersNoComments: number[];
     nonWhitespaceCharacters: number[];
+    nonWhitespaceCharactersNoComments: number[];
     /** like library statements during reconstruction */
-    autoSelected: number[];
+    linesWithAutoSelected: number[];
     dataflowNodes: number[];
     tokens: number[];
+    tokensNoComments: number[];
     normalizedTokens: number[];
+    normalizedTokensNoComments: number[];
 }
 /**
  * @see SlicerStats
@@ -26,13 +31,22 @@ export interface Reduction<T = number> {
     numberOfNormalizedTokens: T;
     numberOfDataflowNodes: T;
 }
+export interface TimePerToken<T = SummarizedMeasurement> {
+    raw: T;
+    normalized: T;
+}
 export interface SummarizedPerSliceStats {
     /** number of total slicing calls */
     numberOfSlices: number;
     /** statistics on the used slicing criteria (number of ids within criteria etc.) */
     sliceCriteriaSizes: SummarizedMeasurement;
     measurements: Map<PerSliceMeasurements, SummarizedMeasurement>;
+    sliceTimePerToken: TimePerToken;
+    reconstructTimePerToken: TimePerToken;
+    totalPerSliceTimePerToken: TimePerToken;
     reduction: Reduction<SummarizedMeasurement>;
+    /** reduction, but without taking into account comments and empty lines */
+    reductionNoFluff: Reduction<SummarizedMeasurement>;
     failedToRepParse: number;
     timesHitThreshold: number;
     sliceSize: {
@@ -44,11 +58,20 @@ export interface UltimateSlicerStats {
     totalSlices: number;
     commonMeasurements: Map<CommonSlicerMeasurements, SummarizedMeasurement>;
     perSliceMeasurements: Map<PerSliceMeasurements, SummarizedMeasurement>;
+    retrieveTimePerToken: TimePerToken;
+    normalizeTimePerToken: TimePerToken;
+    dataflowTimePerToken: TimePerToken;
+    totalCommonTimePerToken: TimePerToken;
+    sliceTimePerToken: TimePerToken;
+    reconstructTimePerToken: TimePerToken;
+    totalPerSliceTimePerToken: TimePerToken;
     /** sum */
     failedToRepParse: number;
     /** sum */
     timesHitThreshold: number;
     reduction: Reduction<SummarizedMeasurement>;
+    /** reduction, but without taking into account comments and empty lines */
+    reductionNoFluff: Reduction<SummarizedMeasurement>;
     input: SlicerStatsInput<SummarizedMeasurement>;
     dataflow: SlicerStatsDataflow<SummarizedMeasurement>;
 }

package/benchmark/summarizer/first-phase/input.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
 /// <reference types="node" />
-export declare function processRunMeasurement(line: Buffer, fileNum: number, lineNum: number, summarizedText: string, outputPath: string): Promise<void>;
-export declare function processSummarizedFileMeasurement(file: string, summariesFile: string, outputPath: string): void;
+export declare function processRunMeasurement(line: Buffer, fileNum: number, lineNum: number, textOutputAppendPath: string, rawOutputPath: string): Promise<void>;
+export declare function processSummarizedRunMeasurement(runNum: number, summarizedFiles: string[], appendPath: string): void;