@eagleoutice/flowr 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/benchmark/slicer.d.ts +1 -0
  2. package/benchmark/slicer.js +69 -8
  3. package/benchmark/stats/print.d.ts +1 -0
  4. package/benchmark/stats/print.js +94 -31
  5. package/benchmark/stats/size-of.d.ts +3 -0
  6. package/benchmark/stats/size-of.js +68 -0
  7. package/benchmark/stats/stats.d.ts +23 -0
  8. package/benchmark/summarizer/data.d.ts +24 -1
  9. package/benchmark/summarizer/first-phase/input.d.ts +2 -2
  10. package/benchmark/summarizer/first-phase/input.js +21 -21
  11. package/benchmark/summarizer/first-phase/process.d.ts +4 -2
  12. package/benchmark/summarizer/first-phase/process.js +120 -33
  13. package/benchmark/summarizer/second-phase/graph.js +7 -0
  14. package/benchmark/summarizer/second-phase/process.js +65 -27
  15. package/benchmark/summarizer/summarizer.d.ts +1 -0
  16. package/benchmark/summarizer/summarizer.js +23 -10
  17. package/cli/repl/commands/commands.js +19 -1
  18. package/cli/slicer-app.js +1 -1
  19. package/dataflow/environments/append.js +1 -2
  20. package/dataflow/environments/built-in.js +2 -1
  21. package/dataflow/environments/clone.js +1 -1
  22. package/dataflow/environments/diff.d.ts +1 -1
  23. package/dataflow/environments/diff.js +16 -18
  24. package/dataflow/environments/environment.d.ts +6 -8
  25. package/dataflow/environments/environment.js +5 -8
  26. package/dataflow/environments/identifier.d.ts +2 -1
  27. package/dataflow/environments/overwrite.js +1 -2
  28. package/dataflow/environments/scoping.js +1 -1
  29. package/dataflow/graph/diff.js +11 -6
  30. package/dataflow/graph/edge.d.ts +2 -3
  31. package/dataflow/graph/edge.js +2 -2
  32. package/dataflow/graph/graph.d.ts +6 -2
  33. package/dataflow/graph/graph.js +16 -9
  34. package/dataflow/graph/vertex.d.ts +2 -1
  35. package/dataflow/info.d.ts +10 -1
  36. package/dataflow/info.js +54 -2
  37. package/dataflow/internal/linker.d.ts +1 -1
  38. package/dataflow/internal/linker.js +1 -2
  39. package/dataflow/internal/process/functions/call/built-in/built-in-assignment.js +5 -5
  40. package/dataflow/internal/process/functions/call/built-in/built-in-for-loop.js +1 -1
  41. package/dataflow/internal/process/functions/call/built-in/built-in-function-definition.js +21 -25
  42. package/dataflow/internal/process/functions/call/built-in/built-in-get.js +6 -1
  43. package/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.js +10 -8
  44. package/dataflow/internal/process/functions/call/built-in/built-in-logical-bin-op.d.ts +1 -0
  45. package/dataflow/internal/process/functions/call/built-in/built-in-logical-bin-op.js +1 -2
  46. package/dataflow/internal/process/functions/call/built-in/built-in-while-loop.js +1 -1
  47. package/dataflow/internal/process/functions/call/default-call-handling.js +1 -1
  48. package/dataflow/internal/process/functions/call/unnamed-call-handling.js +1 -1
  49. package/dataflow/internal/process/process-value.js +0 -1
  50. package/dataflow/processor.d.ts +2 -3
  51. package/package.json +5 -2
  52. package/r-bridge/data/data.d.ts +1 -1
  53. package/r-bridge/data/data.js +1 -1
  54. package/r-bridge/lang-4.x/ast/model/nodes/r-function-call.d.ts +2 -2
  55. package/r-bridge/lang-4.x/ast/model/operators.js +1 -1
  56. package/r-bridge/lang-4.x/ast/model/processing/decorate.js +1 -1
  57. package/r-bridge/lang-4.x/ast/model/processing/stateful-fold.js +1 -1
  58. package/r-bridge/lang-4.x/ast/model/processing/visitor.js +2 -2
  59. package/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-call.js +2 -2
  60. package/r-bridge/lang-4.x/ast/parser/xml/internal/operators/normalize-binary.js +1 -1
  61. package/r-bridge/retriever.d.ts +1 -1
  62. package/r-bridge/retriever.js +3 -2
  63. package/r-bridge/shell.js +2 -1
  64. package/reconstruct/reconstruct.d.ts +3 -3
  65. package/reconstruct/reconstruct.js +40 -41
  66. package/slicing/criterion/filters/all-variables.js +1 -1
  67. package/slicing/static/static-slicer.js +2 -2
  68. package/statistics/features/common-syntax-probability.js +1 -1
  69. package/statistics/features/supported/control-flow/control-flow.js +1 -1
  70. package/statistics/features/supported/defined-functions/defined-functions.js +1 -1
  71. package/statistics/features/supported/loops/loops.js +1 -1
  72. package/statistics/features/supported/used-functions/used-functions.js +1 -1
  73. package/util/assert.d.ts +1 -1
  74. package/util/mermaid/ast.js +4 -0
  75. package/util/mermaid/dfg.d.ts +0 -1
  76. package/util/mermaid/dfg.js +16 -13
  77. package/util/mermaid/mermaid.js +21 -1
  78. package/util/version.js +1 -1
@@ -51,6 +51,7 @@ export declare class BenchmarkSlicer {
51
51
  /** Measures all data recorded *once* per slicer (complete setup up to the dataflow graph creation) */
52
52
  private readonly commonMeasurements;
53
53
  private readonly perSliceMeasurements;
54
+ private readonly deltas;
54
55
  private readonly shell;
55
56
  private stats;
56
57
  private loadedXml;
@@ -17,8 +17,9 @@ const strings_1 = require("../util/strings");
17
17
  const shell_1 = require("../r-bridge/shell");
18
18
  const default_pipelines_1 = require("../core/steps/pipeline/default-pipelines");
19
19
  const retriever_1 = require("../r-bridge/retriever");
20
- const collect_1 = require("../r-bridge/lang-4.x/ast/model/collect");
21
20
  const collect_all_1 = require("../slicing/criterion/collect-all");
21
+ const visitor_1 = require("../r-bridge/lang-4.x/ast/model/processing/visitor");
22
+ const size_of_1 = require("./stats/size-of");
22
23
  exports.benchmarkLogger = log_1.log.getSubLogger({ name: 'benchmark' });
23
24
  /**
24
25
  * A slicer that can be used to slice exactly one file (multiple times).
@@ -33,6 +34,7 @@ class BenchmarkSlicer {
33
34
  /** Measures all data recorded *once* per slicer (complete setup up to the dataflow graph creation) */
34
35
  commonMeasurements = new stopwatch_1.Measurements();
35
36
  perSliceMeasurements = new Map();
37
+ deltas = new Map();
36
38
  shell;
37
39
  stats;
38
40
  loadedXml;
@@ -67,6 +69,7 @@ class BenchmarkSlicer {
67
69
  const loadedContent = request.request === 'text' ? request.content : fs_1.default.readFileSync(request.content, 'utf-8');
68
70
  // retrieve number of R tokens - flowr_parsed should still contain the last parsed code
69
71
  const numberOfRTokens = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(this.shell);
72
+ const numberOfRTokensNoComments = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(this.shell, true);
70
73
  (0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined after initialization');
71
74
  (0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined after initialization');
72
75
  // collect dataflow graph size
@@ -84,23 +87,54 @@ class BenchmarkSlicer {
84
87
  numberOfDefinitions++;
85
88
  }
86
89
  }
90
+ let nodes = 0;
91
+ let nodesNoComments = 0;
92
+ let commentChars = 0;
93
+ let commentCharsNoWhitespace = 0;
94
+ (0, visitor_1.visitAst)(this.normalizedAst.ast, t => {
95
+ nodes++;
96
+ const comments = t.info.additionalTokens?.filter(t => t.type === "RComment" /* RType.Comment */);
97
+ if (comments && comments.length > 0) {
98
+ const content = comments.map(c => c.lexeme ?? '').join('');
99
+ commentChars += content.length;
100
+ commentCharsNoWhitespace += (0, strings_1.withoutWhitespace)(content).length;
101
+ }
102
+ else {
103
+ nodesNoComments++;
104
+ }
105
+ return false;
106
+ });
107
+ const split = loadedContent.split('\n');
108
+ const nonWhitespace = (0, strings_1.withoutWhitespace)(loadedContent).length;
87
109
  this.stats = {
88
- commonMeasurements: new Map(),
89
110
  perSliceMeasurements: this.perSliceMeasurements,
111
+ memory: this.deltas,
90
112
  request,
91
113
  input: {
92
- numberOfLines: loadedContent.split('\n').length,
114
+ numberOfLines: split.length,
115
+ numberOfNonEmptyLines: split.filter(l => l.trim().length > 0).length,
93
116
  numberOfCharacters: loadedContent.length,
94
- numberOfNonWhitespaceCharacters: (0, strings_1.withoutWhitespace)(loadedContent).length,
117
+ numberOfCharactersNoComments: loadedContent.length - commentChars,
118
+ numberOfNonWhitespaceCharacters: nonWhitespace,
119
+ numberOfNonWhitespaceCharactersNoComments: nonWhitespace - commentCharsNoWhitespace,
95
120
  numberOfRTokens: numberOfRTokens,
96
- numberOfNormalizedTokens: [...(0, collect_1.collectAllIds)(this.normalizedAst.ast)].length
121
+ numberOfRTokensNoComments: numberOfRTokensNoComments,
122
+ numberOfNormalizedTokens: nodes,
123
+ numberOfNormalizedTokensNoComments: nodesNoComments
97
124
  },
98
125
  dataflow: {
99
126
  numberOfNodes: [...this.dataflow.graph.vertices(true)].length,
100
127
  numberOfEdges: numberOfEdges,
101
128
  numberOfCalls: numberOfCalls,
102
- numberOfFunctionDefinitions: numberOfDefinitions
103
- }
129
+ numberOfFunctionDefinitions: numberOfDefinitions,
130
+ sizeOfObject: (0, size_of_1.getSizeOfDfGraph)(this.dataflow.graph)
131
+ },
132
+ // these are all properly initialized in finish()
133
+ commonMeasurements: new Map(),
134
+ retrieveTimePerToken: { raw: 0, normalized: 0 },
135
+ normalizeTimePerToken: { raw: 0, normalized: 0 },
136
+ dataflowTimePerToken: { raw: 0, normalized: 0 },
137
+ totalCommonTimePerToken: { raw: 0, normalized: 0 }
104
138
  };
105
139
  }
106
140
  /**
@@ -122,7 +156,7 @@ class BenchmarkSlicer {
122
156
  timesHitThreshold: 0,
123
157
  reconstructedCode: {
124
158
  code: '',
125
- autoSelected: 0
159
+ linesWithAutoSelected: 0
126
160
  }
127
161
  };
128
162
  this.perSliceMeasurements.set(slicingCriteria, stats);
@@ -152,7 +186,15 @@ class BenchmarkSlicer {
152
186
  }
153
187
  /** Bridging the gap between the new internal and the old names for the benchmarking */
154
188
  async measureCommonStep(expectedStep, keyToMeasure) {
189
+ const memoryInit = process.memoryUsage();
155
190
  const { result } = await this.commonMeasurements.measureAsync(keyToMeasure, () => this.pipeline.nextStep(expectedStep));
191
+ const memoryEnd = process.memoryUsage();
192
+ this.deltas.set(keyToMeasure, {
193
+ heap: memoryEnd.heapUsed - memoryInit.heapUsed,
194
+ rss: memoryEnd.rss - memoryInit.rss,
195
+ external: memoryEnd.external - memoryInit.external,
196
+ buffs: memoryEnd.arrayBuffers - memoryInit.arrayBuffers
197
+ });
156
198
  return result;
157
199
  }
158
200
  async measureSliceStep(expectedStep, measure, keyToMeasure) {
@@ -196,6 +238,25 @@ class BenchmarkSlicer {
196
238
  this.finished = true;
197
239
  }
198
240
  this.stats.commonMeasurements = this.commonMeasurements.get();
241
+ const retrieveTime = Number(this.stats.commonMeasurements.get('retrieve AST from R code'));
242
+ const normalizeTime = Number(this.stats.commonMeasurements.get('normalize R AST'));
243
+ const dataflowTime = Number(this.stats.commonMeasurements.get('produce dataflow information'));
244
+ this.stats.retrieveTimePerToken = {
245
+ raw: retrieveTime / this.stats.input.numberOfRTokens,
246
+ normalized: retrieveTime / this.stats.input.numberOfNormalizedTokens
247
+ };
248
+ this.stats.normalizeTimePerToken = {
249
+ raw: normalizeTime / this.stats.input.numberOfRTokens,
250
+ normalized: normalizeTime / this.stats.input.numberOfNormalizedTokens
251
+ };
252
+ this.stats.dataflowTimePerToken = {
253
+ raw: dataflowTime / this.stats.input.numberOfRTokens,
254
+ normalized: dataflowTime / this.stats.input.numberOfNormalizedTokens
255
+ };
256
+ this.stats.totalCommonTimePerToken = {
257
+ raw: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfRTokens,
258
+ normalized: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfNormalizedTokens
259
+ };
199
260
  return {
200
261
  stats: this.stats,
201
262
  parse: this.loadedXml,
@@ -1,4 +1,5 @@
1
1
  import type { SummarizedSlicerStats, UltimateSlicerStats } from '../summarizer/data';
2
+ export declare function formatNanoseconds(nanoseconds: bigint | number): string;
2
3
  /**
3
4
  * Converts the given stats to a human-readable string.
4
5
  * You may have to {@link summarizeSlicerStats | summarize} the stats first.
@@ -1,26 +1,29 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ultimateStats2String = exports.stats2string = void 0;
3
+ exports.ultimateStats2String = exports.stats2string = exports.formatNanoseconds = void 0;
4
4
  const assert_1 = require("../../util/assert");
5
5
  const padSize = 15;
6
6
  function pad(string) {
7
7
  return String(string).padStart(padSize, ' ');
8
8
  }
9
- function divWithRest(dividend, divisor) {
10
- return [dividend / divisor, dividend % divisor];
11
- }
12
9
  function formatNanoseconds(nanoseconds) {
13
10
  if (nanoseconds < 0) {
14
11
  return '??';
15
12
  }
16
- const [seconds, rest] = divWithRest(typeof nanoseconds === 'number' ? BigInt(Math.round(nanoseconds)) : nanoseconds, BigInt(1e9));
17
- const [milliseconds, remainingNanoseconds] = divWithRest(rest, BigInt(1e6));
18
- const secondsStr = seconds > 0 ? `${String(seconds).padStart(2, '0')}.` : '';
19
- const millisecondsStr = seconds > 0 ? `${String(milliseconds).padStart(3, '0')}:` : `${String(milliseconds)}:`;
20
- const nanoStr = String(remainingNanoseconds).padEnd(3, '0').substring(0, 3);
21
- const unit = seconds === 0n ? 'ms' : ' s'; /* space for padding */
22
- return pad(`${secondsStr}${millisecondsStr}${nanoStr}${unit}`);
13
+ const wholeNanos = typeof nanoseconds === 'bigint' ? nanoseconds : BigInt(Math.round(nanoseconds));
14
+ const nanos = wholeNanos % BigInt(1e+6);
15
+ const wholeMillis = wholeNanos / BigInt(1e+6);
16
+ const millis = wholeMillis % BigInt(1000);
17
+ const wholeSeconds = wholeMillis / BigInt(1000);
18
+ if (wholeSeconds > 0) {
19
+ const nanoString = nanos > 0 ? `:${nanos}` : '';
20
+ return pad(`${wholeSeconds}.${String(millis).padStart(3, '0')}${nanoString} s`);
21
+ }
22
+ else {
23
+ return pad(`${millis}:${String(nanos).padStart(6, '0')}ms`);
24
+ }
23
25
  }
26
+ exports.formatNanoseconds = formatNanoseconds;
24
27
  function print(measurements, key) {
25
28
  const time = measurements.get(key);
26
29
  (0, assert_1.guard)(time !== undefined, `Measurement for ${JSON.stringify(key)} not found`);
@@ -40,6 +43,7 @@ function asPercentage(num) {
40
43
  if (isNaN(num)) {
41
44
  return '??%';
42
45
  }
46
+ (0, assert_1.guard)(num >= 0 && num <= 1, `Percentage ${num} should be between 0 and 1`);
43
47
  return pad(`${roundTo(num * 100, 3)}%`);
44
48
  }
45
49
  function asFloat(num) {
@@ -60,6 +64,16 @@ function printCountSummarizedMeasurements(stats) {
60
64
  const range = `${stats.min} - ${stats.max}`.padStart(padSize, ' ');
61
65
  return `${range} (median: ${stats.median}, mean: ${stats.mean}, std: ${stats.std})`;
62
66
  }
67
+ const units = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'];
68
+ // based on https://stackoverflow.com/a/39906526
69
+ function convertNumberToNiceBytes(x) {
70
+ let n = Math.abs(x);
71
+ let l = 0;
72
+ while (n >= 1024 && ++l) {
73
+ n = n / 1024;
74
+ }
75
+ return pad((x < 0 ? '-' : '') + n.toFixed(n < 10 && l > 0 ? 1 : 0) + ' ' + units[l]);
76
+ }
63
77
  /**
64
78
  * Converts the given stats to a human-readable string.
65
79
  * You may have to {@link summarizeSlicerStats | summarize} the stats first.
@@ -69,23 +83,40 @@ function stats2string(stats) {
69
83
  Request: ${JSON.stringify(stats.request)}
70
84
  Shell init time: ${print(stats.commonMeasurements, 'initialize R session')}
71
85
  AST retrieval: ${print(stats.commonMeasurements, 'retrieve AST from R code')}
86
+ AST retrieval per token: ${formatNanoseconds(stats.retrieveTimePerToken.normalized)}
87
+ AST retrieval per R token: ${formatNanoseconds(stats.retrieveTimePerToken.raw)}
72
88
  AST normalization: ${print(stats.commonMeasurements, 'normalize R AST')}
89
+ AST normalization per token: ${formatNanoseconds(stats.normalizeTimePerToken.normalized)}
90
+ AST normalization per R token:${formatNanoseconds(stats.normalizeTimePerToken.raw)}
73
91
  Dataflow creation: ${print(stats.commonMeasurements, 'produce dataflow information')}
92
+ Dataflow creation per token: ${formatNanoseconds(stats.dataflowTimePerToken.normalized)}
93
+ Dataflow creation per R token:${formatNanoseconds(stats.dataflowTimePerToken.raw)}
94
+ Total common time per token: ${formatNanoseconds(stats.totalCommonTimePerToken.normalized)}
95
+ Total common time per R token:${formatNanoseconds(stats.totalCommonTimePerToken.raw)}
74
96
 
75
97
  Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.perSliceMeasurements.numberOfSlices !== 1 ? 's' : ''}:`;
76
98
  if (stats.perSliceMeasurements.numberOfSlices > 0) {
77
99
  result += `
78
- Total: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'total')}
79
- Slice creation: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'static slicing')}
80
- Reconstruction: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'reconstruct code')}
81
- Used Slice Criteria Sizes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceCriteriaSizes)}
100
+ Total: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'total')}
101
+ Slice creation: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'static slicing')}
102
+ Slice creation per token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.sliceTimePerToken.normalized)}
103
+ Slice creation per R token in slice:${formatSummarizedTimeMeasure(stats.perSliceMeasurements.sliceTimePerToken.raw)}
104
+ Reconstruction: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'reconstruct code')}
105
+ Reconstruction per token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.reconstructTimePerToken.normalized)}
106
+ Reconstruction per R token in slice:${formatSummarizedTimeMeasure(stats.perSliceMeasurements.reconstructTimePerToken.raw)}
107
+ Total per token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.normalized)}
108
+ Total per R token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.raw)}
109
+ Used Slice Criteria Sizes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceCriteriaSizes)}
82
110
  Result Slice Sizes:
83
111
  Number of lines: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.lines)}
112
+ Number of non-empty lines: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.nonEmptyLines)}
84
113
  Number of characters: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.characters)}
85
114
  Number of non whitespace characters: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.nonWhitespaceCharacters)}
86
- Number of auto selected: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.autoSelected)}
115
+ Number of auto selected lines: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.linesWithAutoSelected)}
87
116
  Number of R tokens: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.tokens)}
117
+ Number of R tokens (w/o comments): ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.tokensNoComments)}
88
118
  Normalized R tokens: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokens)}
119
+ Normalized R tokens (w/o comments): ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokensNoComments)}
89
120
  Number of dataflow nodes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.dataflowNodes)}
90
121
  `;
91
122
  }
@@ -95,16 +126,22 @@ Total: ${print(stats.commonMeasurements, 'total')}
95
126
 
96
127
  Input:
97
128
  Number of lines: ${pad(stats.input.numberOfLines)}
129
+ Number of non empty lines: ${pad(stats.input.numberOfNonEmptyLines)}
98
130
  Number of characters: ${pad(stats.input.numberOfCharacters)}
131
+ Number of characters (w/o comments): ${pad(stats.input.numberOfCharactersNoComments)}
99
132
  Number of non whitespace characters: ${pad(stats.input.numberOfNonWhitespaceCharacters)}
133
+ Number of n. w. c. (w/o comments): ${pad(stats.input.numberOfNonWhitespaceCharactersNoComments)}
100
134
  Number of tokens: ${pad(stats.input.numberOfRTokens)}
135
+ Number of tokens (w/o comments): ${pad(stats.input.numberOfRTokensNoComments)}
101
136
  Normalized R tokens: ${pad(stats.input.numberOfNormalizedTokens)}
137
+ Normalized R tokens (w/o comments): ${pad(stats.input.numberOfNormalizedTokensNoComments)}
102
138
 
103
139
  Dataflow:
104
140
  Number of nodes: ${pad(stats.dataflow.numberOfNodes)}
105
141
  Number of edges: ${pad(stats.dataflow.numberOfEdges)}
106
142
  Number of calls: ${pad(stats.dataflow.numberOfCalls)}
107
- Number of function defs: ${pad(stats.dataflow.numberOfFunctionDefinitions)}`;
143
+ Number of function defs: ${pad(stats.dataflow.numberOfFunctionDefinitions)}
144
+ Size of graph: ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`;
108
145
  }
109
146
  exports.stats2string = stats2string;
110
147
  function ultimateStats2String(stats) {
@@ -113,39 +150,65 @@ function ultimateStats2String(stats) {
113
150
  Summarized: ${stats.totalRequests} requests and ${stats.totalSlices} slices
114
151
  Shell init time: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('initialize R session'))}
115
152
  AST retrieval: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('retrieve AST from R code'))}
153
+ AST retrieval per token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.normalized)}
154
+ AST retrieval per R token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.raw)}
116
155
  AST normalization: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('normalize R AST'))}
156
+ AST normalization per token: ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.normalized)}
157
+ AST normalization per R token:${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.raw)}
117
158
  Dataflow creation: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('produce dataflow information'))}
159
+ Dataflow creation per token: ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.normalized)}
160
+ Dataflow creation per R token:${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.raw)}
161
+ Total common time per token: ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.normalized)}
162
+ Total common time per R token:${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.raw)}
118
163
 
119
164
  Slice summary for:
120
- Total: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('total'))}
121
- Slice creation: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('static slicing'))}
122
- Reconstruction: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('reconstruct code'))}
123
- Failed to Re-Parse: ${pad(stats.failedToRepParse)}/${stats.totalSlices}
124
- Times hit Threshold: ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
125
- Reductions (reduced by x%):
126
- Number of lines: ${formatSummarizedMeasure(stats.reduction.numberOfLines, asPercentage)}
127
- Number of lines no auto: ${formatSummarizedMeasure(stats.reduction.numberOfLinesNoAutoSelection, asPercentage)}
128
- Number of characters: ${formatSummarizedMeasure(stats.reduction.numberOfCharacters, asPercentage)}
129
- Number of non whitespace characters: ${formatSummarizedMeasure(stats.reduction.numberOfNonWhitespaceCharacters, asPercentage)}
130
- Number of R tokens: ${formatSummarizedMeasure(stats.reduction.numberOfRTokens, asPercentage)}
131
- Normalized R tokens: ${formatSummarizedMeasure(stats.reduction.numberOfNormalizedTokens, asPercentage)}
132
- Number of dataflow nodes: ${formatSummarizedMeasure(stats.reduction.numberOfDataflowNodes, asPercentage)}
165
+ Total: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('total'))}
166
+ Slice creation: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('static slicing'))}
167
+ Slice creation per token in slice: ${formatSummarizedTimeMeasure(stats.sliceTimePerToken.normalized)}
168
+ Slice creation per R token in slice:${formatSummarizedTimeMeasure(stats.sliceTimePerToken.raw)}
169
+ Reconstruction: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('reconstruct code'))}
170
+ Reconstruction per token in slice: ${formatSummarizedTimeMeasure(stats.reconstructTimePerToken.normalized)}
171
+ Reconstruction per R token in slice:${formatSummarizedTimeMeasure(stats.reconstructTimePerToken.raw)}
172
+ Total per token in slice: ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.normalized)}
173
+ Total per R token in slice: ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.raw)}
174
+ Failed to Re-Parse: ${pad(stats.failedToRepParse)}/${stats.totalSlices}
175
+ Times hit Threshold: ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
176
+ ${reduction2String('Reductions', stats.reduction)}
177
+ ${reduction2String('Reductions without comments and empty lines', stats.reductionNoFluff)}
133
178
 
134
179
  Shell close: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('close R session'))}
135
180
  Total: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('total'))}
136
181
 
137
182
  Input:
138
183
  Number of lines: ${formatSummarizedMeasure(stats.input.numberOfLines)}
184
+ Number of non empty lines: ${formatSummarizedMeasure(stats.input.numberOfNonEmptyLines)}
139
185
  Number of characters: ${formatSummarizedMeasure(stats.input.numberOfCharacters)}
186
+ Number of characters (w/o comments): ${formatSummarizedMeasure(stats.input.numberOfCharactersNoComments)}
140
187
  Number of non whitespace characters: ${formatSummarizedMeasure(stats.input.numberOfNonWhitespaceCharacters)}
188
+ Number of n. w. c. (w/o comments): ${formatSummarizedMeasure(stats.input.numberOfNonWhitespaceCharactersNoComments)}
141
189
  Number of tokens: ${formatSummarizedMeasure(stats.input.numberOfRTokens)}
190
+ Number of tokens (w/o comments): ${formatSummarizedMeasure(stats.input.numberOfRTokensNoComments)}
142
191
  Normalized R tokens: ${formatSummarizedMeasure(stats.input.numberOfNormalizedTokens)}
192
+ Normalized R tokens (w/o comments): ${formatSummarizedMeasure(stats.input.numberOfNormalizedTokensNoComments)}
143
193
 
144
194
  Dataflow:
145
195
  Number of nodes: ${formatSummarizedMeasure(stats.dataflow.numberOfNodes)}
146
196
  Number of edges: ${formatSummarizedMeasure(stats.dataflow.numberOfEdges)}
147
197
  Number of calls: ${formatSummarizedMeasure(stats.dataflow.numberOfCalls)}
148
- Number of function defs: ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}`;
198
+ Number of function defs: ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}
199
+ Size of graph: ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
200
+ `;
149
201
  }
150
202
  exports.ultimateStats2String = ultimateStats2String;
203
+ function reduction2String(title, reduction) {
204
+ return `
205
+ ${title} (reduced by x%):
206
+ Number of lines: ${formatSummarizedMeasure(reduction.numberOfLines, asPercentage)}
207
+ Number of lines no auto: ${formatSummarizedMeasure(reduction.numberOfLinesNoAutoSelection, asPercentage)}
208
+ Number of characters: ${formatSummarizedMeasure(reduction.numberOfCharacters, asPercentage)}
209
+ Number of non whitespace characters: ${formatSummarizedMeasure(reduction.numberOfNonWhitespaceCharacters, asPercentage)}
210
+ Number of R tokens: ${formatSummarizedMeasure(reduction.numberOfRTokens, asPercentage)}
211
+ Normalized R tokens: ${formatSummarizedMeasure(reduction.numberOfNormalizedTokens, asPercentage)}
212
+ Number of dataflow nodes: ${formatSummarizedMeasure(reduction.numberOfDataflowNodes, asPercentage)}`;
213
+ }
151
214
  //# sourceMappingURL=print.js.map
@@ -0,0 +1,3 @@
1
+ import type { DataflowGraph } from '../../dataflow/graph/graph';
2
+ /** Returns the size of the given df graph in bytes (without sharing in-memory) */
3
+ export declare function getSizeOfDfGraph(df: DataflowGraph): number;
@@ -0,0 +1,68 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.getSizeOfDfGraph = void 0;
7
+ const environment_1 = require("../../dataflow/environments/environment");
8
+ const object_sizeof_1 = __importDefault(require("object-sizeof"));
9
+ /* we have to kill all processors linked in the default environment as they cannot be serialized and they are shared anyway */
10
+ function killBuiltInEnv(env) {
11
+ if (env === undefined) {
12
+ return undefined;
13
+ }
14
+ else if (env.id === environment_1.BuiltInEnvironment.id) {
15
+ /* in this case, the reference would be shared for sure */
16
+ return {
17
+ id: env.id,
18
+ parent: killBuiltInEnv(env.parent),
19
+ memory: new Map()
20
+ };
21
+ }
22
+ const memory = new Map();
23
+ for (const [k, v] of env.memory) {
24
+ memory.set(k, v.filter(v => !v.kind.startsWith('built-in') && !('processor' in v)));
25
+ }
26
+ return {
27
+ id: env.id,
28
+ parent: killBuiltInEnv(env.parent),
29
+ memory
30
+ };
31
+ }
32
+ /** Returns the size of the given df graph in bytes (without sharing in-memory) */
33
+ function getSizeOfDfGraph(df) {
34
+ const verts = [];
35
+ for (const [, v] of df.vertices(true)) {
36
+ let vertex = v;
37
+ if (vertex.environment) {
38
+ vertex = {
39
+ ...vertex,
40
+ environment: {
41
+ ...vertex.environment,
42
+ current: killBuiltInEnv(v.environment.current)
43
+ }
44
+ };
45
+ }
46
+ if (vertex.tag === "function-definition" /* VertexType.FunctionDefinition */) {
47
+ vertex = {
48
+ ...vertex,
49
+ subflow: {
50
+ ...vertex.subflow,
51
+ environment: {
52
+ ...vertex.subflow.environment,
53
+ current: killBuiltInEnv(vertex.subflow.environment.current)
54
+ }
55
+ }
56
+ };
57
+ }
58
+ vertex = {
59
+ ...vertex,
60
+ /* shared anyway by using constants */
61
+ tag: 0
62
+ };
63
+ verts.push(vertex);
64
+ }
65
+ return (0, object_sizeof_1.default)([...verts, ...df.edges()]);
66
+ }
67
+ exports.getSizeOfDfGraph = getSizeOfDfGraph;
68
+ //# sourceMappingURL=size-of.js.map
@@ -2,6 +2,8 @@ import type { SingleSlicingCriterion, SlicingCriteria } from '../../slicing/crit
2
2
  import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
3
3
  import type { ReconstructionResult } from '../../reconstruct/reconstruct';
4
4
  import type { RParseRequestFromFile, RParseRequestFromText } from '../../r-bridge/retriever';
5
+ import type { TimePerToken } from '../summarizer/data';
6
+ import type { MergeableRecord } from '../../util/objects';
5
7
  export declare const CommonSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total"];
6
8
  export type CommonSlicerMeasurements = typeof CommonSlicerMeasurements[number];
7
9
  export declare const PerSliceMeasurements: readonly ["static slicing", "reconstruct code", "total"];
@@ -19,16 +21,32 @@ export interface PerSliceStats {
19
21
  }
20
22
  export interface SlicerStatsInput<T = number> {
21
23
  numberOfLines: T;
24
+ numberOfNonEmptyLines: T;
22
25
  numberOfCharacters: T;
26
+ numberOfCharactersNoComments: T;
23
27
  numberOfNonWhitespaceCharacters: T;
28
+ numberOfNonWhitespaceCharactersNoComments: T;
24
29
  numberOfRTokens: T;
30
+ numberOfRTokensNoComments: T;
25
31
  numberOfNormalizedTokens: T;
32
+ numberOfNormalizedTokensNoComments: T;
26
33
  }
27
34
  export interface SlicerStatsDataflow<T = number> {
28
35
  numberOfNodes: T;
29
36
  numberOfEdges: T;
30
37
  numberOfCalls: T;
31
38
  numberOfFunctionDefinitions: T;
39
+ sizeOfObject: T;
40
+ }
41
+ /**
42
+ * Please note, that these measurement can be negative as there is no guarantee that the memory usage will increase
43
+ * due to, e.g., garbage collection.
44
+ */
45
+ export interface BenchmarkMemoryMeasurement<T = number> extends MergeableRecord {
46
+ heap: T;
47
+ rss: T;
48
+ external: T;
49
+ buffs: T;
32
50
  }
33
51
  /**
34
52
  * The statistics that are collected by the {@link BenchmarkSlicer} and used for benchmarking.
@@ -36,7 +54,12 @@ export interface SlicerStatsDataflow<T = number> {
36
54
  export interface SlicerStats {
37
55
  commonMeasurements: Map<CommonSlicerMeasurements, ElapsedTime>;
38
56
  perSliceMeasurements: Map<SlicingCriteria, PerSliceStats>;
57
+ memory: Map<CommonSlicerMeasurements, BenchmarkMemoryMeasurement>;
39
58
  request: RParseRequestFromFile | RParseRequestFromText;
40
59
  input: SlicerStatsInput;
41
60
  dataflow: SlicerStatsDataflow;
61
+ retrieveTimePerToken: TimePerToken<number>;
62
+ normalizeTimePerToken: TimePerToken<number>;
63
+ dataflowTimePerToken: TimePerToken<number>;
64
+ totalCommonTimePerToken: TimePerToken<number>;
42
65
  }
@@ -2,13 +2,18 @@ import type { SummarizedMeasurement } from '../../util/summarizer';
2
2
  import type { CommonSlicerMeasurements, PerSliceMeasurements, SlicerStats, SlicerStatsDataflow, SlicerStatsInput } from '../stats/stats';
3
3
  export interface SliceSizeCollection {
4
4
  lines: number[];
5
+ nonEmptyLines: number[];
5
6
  characters: number[];
7
+ charactersNoComments: number[];
6
8
  nonWhitespaceCharacters: number[];
9
+ nonWhitespaceCharactersNoComments: number[];
7
10
  /** like library statements during reconstruction */
8
- autoSelected: number[];
11
+ linesWithAutoSelected: number[];
9
12
  dataflowNodes: number[];
10
13
  tokens: number[];
14
+ tokensNoComments: number[];
11
15
  normalizedTokens: number[];
16
+ normalizedTokensNoComments: number[];
12
17
  }
13
18
  /**
14
19
  * @see SlicerStats
@@ -26,13 +31,22 @@ export interface Reduction<T = number> {
26
31
  numberOfNormalizedTokens: T;
27
32
  numberOfDataflowNodes: T;
28
33
  }
34
+ export interface TimePerToken<T = SummarizedMeasurement> {
35
+ raw: T;
36
+ normalized: T;
37
+ }
29
38
  export interface SummarizedPerSliceStats {
30
39
  /** number of total slicing calls */
31
40
  numberOfSlices: number;
32
41
  /** statistics on the used slicing criteria (number of ids within criteria etc.) */
33
42
  sliceCriteriaSizes: SummarizedMeasurement;
34
43
  measurements: Map<PerSliceMeasurements, SummarizedMeasurement>;
44
+ sliceTimePerToken: TimePerToken;
45
+ reconstructTimePerToken: TimePerToken;
46
+ totalPerSliceTimePerToken: TimePerToken;
35
47
  reduction: Reduction<SummarizedMeasurement>;
48
+ /** reduction, but without taking into account comments and empty lines */
49
+ reductionNoFluff: Reduction<SummarizedMeasurement>;
36
50
  failedToRepParse: number;
37
51
  timesHitThreshold: number;
38
52
  sliceSize: {
@@ -44,11 +58,20 @@ export interface UltimateSlicerStats {
44
58
  totalSlices: number;
45
59
  commonMeasurements: Map<CommonSlicerMeasurements, SummarizedMeasurement>;
46
60
  perSliceMeasurements: Map<PerSliceMeasurements, SummarizedMeasurement>;
61
+ retrieveTimePerToken: TimePerToken;
62
+ normalizeTimePerToken: TimePerToken;
63
+ dataflowTimePerToken: TimePerToken;
64
+ totalCommonTimePerToken: TimePerToken;
65
+ sliceTimePerToken: TimePerToken;
66
+ reconstructTimePerToken: TimePerToken;
67
+ totalPerSliceTimePerToken: TimePerToken;
47
68
  /** sum */
48
69
  failedToRepParse: number;
49
70
  /** sum */
50
71
  timesHitThreshold: number;
51
72
  reduction: Reduction<SummarizedMeasurement>;
73
+ /** reduction, but without taking into account comments and empty lines */
74
+ reductionNoFluff: Reduction<SummarizedMeasurement>;
52
75
  input: SlicerStatsInput<SummarizedMeasurement>;
53
76
  dataflow: SlicerStatsDataflow<SummarizedMeasurement>;
54
77
  }
@@ -1,3 +1,3 @@
1
1
  /// <reference types="node" />
2
- export declare function processRunMeasurement(line: Buffer, fileNum: number, lineNum: number, summarizedText: string, outputPath: string): Promise<void>;
3
- export declare function processSummarizedFileMeasurement(file: string, summariesFile: string, outputPath: string): void;
2
+ export declare function processRunMeasurement(line: Buffer, fileNum: number, lineNum: number, textOutputAppendPath: string, rawOutputPath: string): Promise<void>;
3
+ export declare function processSummarizedRunMeasurement(runNum: number, summarizedFiles: string[], appendPath: string): void;