@eagleoutice/flowr 2.2.16 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/README.md +48 -20
  2. package/abstract-interpretation/data-frame/absint-info.d.ts +109 -0
  3. package/abstract-interpretation/data-frame/absint-info.js +31 -0
  4. package/abstract-interpretation/data-frame/absint-visitor.d.ts +58 -0
  5. package/abstract-interpretation/data-frame/absint-visitor.js +171 -0
  6. package/abstract-interpretation/data-frame/domain.d.ts +107 -0
  7. package/abstract-interpretation/data-frame/domain.js +315 -0
  8. package/abstract-interpretation/data-frame/mappers/access-mapper.d.ts +17 -0
  9. package/abstract-interpretation/data-frame/mappers/access-mapper.js +166 -0
  10. package/abstract-interpretation/data-frame/mappers/arguments.d.ts +117 -0
  11. package/abstract-interpretation/data-frame/mappers/arguments.js +188 -0
  12. package/abstract-interpretation/data-frame/mappers/assignment-mapper.d.ts +20 -0
  13. package/abstract-interpretation/data-frame/mappers/assignment-mapper.js +34 -0
  14. package/abstract-interpretation/data-frame/mappers/function-mapper.d.ts +261 -0
  15. package/abstract-interpretation/data-frame/mappers/function-mapper.js +1219 -0
  16. package/abstract-interpretation/data-frame/mappers/replacement-mapper.d.ts +12 -0
  17. package/abstract-interpretation/data-frame/mappers/replacement-mapper.js +206 -0
  18. package/abstract-interpretation/data-frame/resolve-args.d.ts +42 -0
  19. package/abstract-interpretation/data-frame/resolve-args.js +118 -0
  20. package/abstract-interpretation/data-frame/semantics.d.ts +213 -0
  21. package/abstract-interpretation/data-frame/semantics.js +363 -0
  22. package/abstract-interpretation/data-frame/shape-inference.d.ts +38 -0
  23. package/abstract-interpretation/data-frame/shape-inference.js +111 -0
  24. package/benchmark/slicer.d.ts +15 -1
  25. package/benchmark/slicer.js +137 -0
  26. package/benchmark/stats/print.js +123 -45
  27. package/benchmark/stats/size-of.d.ts +7 -0
  28. package/benchmark/stats/size-of.js +1 -0
  29. package/benchmark/stats/stats.d.ts +30 -1
  30. package/benchmark/stats/stats.js +4 -2
  31. package/benchmark/summarizer/data.d.ts +33 -2
  32. package/benchmark/summarizer/first-phase/input.js +5 -1
  33. package/benchmark/summarizer/first-phase/process.js +47 -1
  34. package/benchmark/summarizer/second-phase/graph.js +1 -1
  35. package/benchmark/summarizer/second-phase/process.js +102 -4
  36. package/cli/benchmark-app.d.ts +2 -0
  37. package/cli/benchmark-app.js +2 -0
  38. package/cli/benchmark-helper-app.d.ts +2 -0
  39. package/cli/benchmark-helper-app.js +10 -3
  40. package/cli/common/options.js +4 -0
  41. package/cli/repl/commands/repl-query.js +1 -1
  42. package/cli/repl/server/connection.js +14 -5
  43. package/config.d.ts +31 -0
  44. package/config.js +21 -1
  45. package/control-flow/basic-cfg-guided-visitor.d.ts +1 -2
  46. package/control-flow/basic-cfg-guided-visitor.js +0 -6
  47. package/control-flow/cfg-simplification.d.ts +6 -0
  48. package/control-flow/cfg-simplification.js +18 -9
  49. package/control-flow/control-flow-graph.d.ts +3 -8
  50. package/control-flow/control-flow-graph.js +5 -6
  51. package/control-flow/dfg-cfg-guided-visitor.js +1 -1
  52. package/control-flow/extract-cfg.d.ts +2 -2
  53. package/control-flow/extract-cfg.js +52 -63
  54. package/control-flow/semantic-cfg-guided-visitor.d.ts +1 -1
  55. package/control-flow/semantic-cfg-guided-visitor.js +1 -1
  56. package/core/steps/all/static-slicing/00-slice.d.ts +7 -1
  57. package/core/steps/all/static-slicing/00-slice.js +9 -3
  58. package/core/steps/pipeline/default-pipelines.d.ts +74 -74
  59. package/dataflow/environments/built-in.d.ts +7 -5
  60. package/dataflow/environments/built-in.js +16 -13
  61. package/dataflow/eval/resolve/alias-tracking.js +2 -2
  62. package/dataflow/eval/resolve/resolve.d.ts +53 -9
  63. package/dataflow/eval/resolve/resolve.js +132 -38
  64. package/dataflow/graph/dataflowgraph-builder.js +2 -2
  65. package/dataflow/graph/graph.js +1 -1
  66. package/dataflow/graph/invert-dfg.d.ts +2 -0
  67. package/dataflow/graph/invert-dfg.js +17 -0
  68. package/dataflow/internal/process/functions/call/built-in/built-in-source.d.ts +1 -0
  69. package/dataflow/internal/process/functions/call/built-in/built-in-source.js +4 -0
  70. package/documentation/doc-util/doc-query.js +11 -1
  71. package/documentation/doc-util/doc-search.js +2 -2
  72. package/documentation/print-cfg-wiki.js +3 -4
  73. package/documentation/print-core-wiki.js +2 -2
  74. package/documentation/print-dataflow-graph-wiki.js +7 -0
  75. package/documentation/print-faq-wiki.js +4 -0
  76. package/documentation/print-interface-wiki.js +11 -0
  77. package/documentation/print-linter-wiki.js +36 -4
  78. package/documentation/print-linting-and-testing-wiki.js +13 -1
  79. package/documentation/print-onboarding-wiki.js +4 -0
  80. package/documentation/print-query-wiki.js +29 -3
  81. package/linter/linter-executor.js +1 -2
  82. package/linter/linter-format.d.ts +26 -4
  83. package/linter/linter-format.js +25 -6
  84. package/linter/linter-rules.d.ts +63 -12
  85. package/linter/linter-rules.js +5 -1
  86. package/linter/rules/absolute-path.d.ts +4 -7
  87. package/linter/rules/absolute-path.js +9 -6
  88. package/linter/rules/dataframe-access-validation.d.ts +55 -0
  89. package/linter/rules/dataframe-access-validation.js +118 -0
  90. package/linter/rules/dead-code.d.ts +43 -0
  91. package/linter/rules/dead-code.js +50 -0
  92. package/linter/rules/deprecated-functions.d.ts +3 -2
  93. package/linter/rules/deprecated-functions.js +3 -1
  94. package/linter/rules/file-path-validity.d.ts +4 -4
  95. package/linter/rules/file-path-validity.js +8 -6
  96. package/linter/rules/naming-convention.d.ts +5 -4
  97. package/linter/rules/naming-convention.js +8 -2
  98. package/linter/rules/seeded-randomness.d.ts +4 -3
  99. package/linter/rules/seeded-randomness.js +3 -1
  100. package/linter/rules/unused-definition.d.ts +2 -0
  101. package/linter/rules/unused-definition.js +3 -1
  102. package/package.json +2 -2
  103. package/queries/catalog/dependencies-query/dependencies-query-executor.js +6 -1
  104. package/queries/catalog/dependencies-query/function-info/read-functions.js +1 -0
  105. package/queries/catalog/dependencies-query/function-info/write-functions.js +1 -0
  106. package/queries/catalog/df-shape-query/df-shape-query-executor.d.ts +3 -0
  107. package/queries/catalog/df-shape-query/df-shape-query-executor.js +46 -0
  108. package/queries/catalog/df-shape-query/df-shape-query-format.d.ts +72 -0
  109. package/queries/catalog/df-shape-query/df-shape-query-format.js +31 -0
  110. package/queries/catalog/linter-query/linter-query-format.js +1 -1
  111. package/queries/catalog/location-map-query/location-map-query-executor.js +7 -5
  112. package/queries/catalog/location-map-query/location-map-query-format.d.ts +3 -0
  113. package/queries/catalog/location-map-query/location-map-query-format.js +1 -0
  114. package/queries/catalog/search-query/search-query-executor.js +1 -1
  115. package/queries/catalog/static-slice-query/static-slice-query-executor.js +2 -1
  116. package/queries/catalog/static-slice-query/static-slice-query-format.d.ts +3 -0
  117. package/queries/catalog/static-slice-query/static-slice-query-format.js +3 -1
  118. package/queries/query-print.d.ts +1 -1
  119. package/queries/query-print.js +0 -1
  120. package/queries/query.d.ts +77 -6
  121. package/queries/query.js +26 -11
  122. package/search/flowr-search-builder.d.ts +6 -6
  123. package/search/flowr-search-executor.d.ts +2 -2
  124. package/search/flowr-search-executor.js +1 -1
  125. package/search/flowr-search.d.ts +13 -8
  126. package/search/flowr-search.js +21 -0
  127. package/search/search-executor/search-enrichers.d.ts +87 -20
  128. package/search/search-executor/search-enrichers.js +44 -5
  129. package/search/search-executor/search-generators.d.ts +4 -4
  130. package/search/search-executor/search-generators.js +12 -7
  131. package/search/search-executor/search-mappers.js +3 -2
  132. package/search/search-executor/search-transformer.d.ts +3 -3
  133. package/search/search-executor/search-transformer.js +2 -2
  134. package/slicing/static/static-slicer.d.ts +4 -2
  135. package/slicing/static/static-slicer.js +10 -4
  136. package/util/collections/arrays.d.ts +2 -0
  137. package/util/collections/arrays.js +9 -0
  138. package/util/files.d.ts +8 -2
  139. package/util/files.js +22 -4
  140. package/util/mermaid/dfg.js +4 -2
  141. package/util/r-value.d.ts +23 -0
  142. package/util/r-value.js +113 -0
  143. package/util/range.d.ts +1 -0
  144. package/util/range.js +5 -1
  145. package/util/version.js +1 -1
  146. package/util/cfg/cfg.d.ts +0 -0
  147. package/util/cfg/cfg.js +0 -2
@@ -26,6 +26,10 @@ const tree_sitter_executor_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-si
26
26
  const vertex_1 = require("../dataflow/graph/vertex");
27
27
  const arrays_1 = require("../util/collections/arrays");
28
28
  const config_1 = require("../config");
29
+ const extract_cfg_1 = require("../control-flow/extract-cfg");
30
+ const absint_info_1 = require("../abstract-interpretation/data-frame/absint-info");
31
+ const domain_1 = require("../abstract-interpretation/data-frame/domain");
32
+ const shape_inference_1 = require("../abstract-interpretation/data-frame/shape-inference");
29
33
  /**
30
34
  * The logger to be used for benchmarking as a global object.
31
35
  */
@@ -36,10 +40,12 @@ class BenchmarkSlicer {
36
40
  perSliceMeasurements = new Map();
37
41
  deltas = new Map();
38
42
  parserName;
43
+ config;
39
44
  stats;
40
45
  loadedXml;
41
46
  dataflow;
42
47
  normalizedAst;
48
+ controlFlow;
43
49
  totalStopwatch;
44
50
  finished = false;
45
51
  // Yes, this is unclean, but we know that we assign the executor during the initialization and this saves us from having to check for nullability every time
@@ -55,6 +61,7 @@ class BenchmarkSlicer {
55
61
  */
56
62
  async init(request, config, autoSelectIf, threshold) {
57
63
  (0, assert_1.guard)(this.stats === undefined, 'cannot initialize the slicer twice');
64
+ this.config = config;
58
65
  // we know these are in sync so we just cast to one of them
59
66
  this.parser = await this.commonMeasurements.measure('initialize R session', async () => {
60
67
  if (this.parserName === 'r-shell') {
@@ -258,6 +265,114 @@ class BenchmarkSlicer {
258
265
  code: stats.reconstructedCode
259
266
  };
260
267
  }
268
+ /**
269
+ * Extract the control flow graph using {@link extractCFG}
270
+ */
271
+ extractCFG() {
272
+ exports.benchmarkLogger.trace('try to extract the control flow graph');
273
+ this.guardActive();
274
+ (0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined for control flow extraction');
275
+ (0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined for control flow extraction');
276
+ (0, assert_1.guard)(this.config !== undefined, 'config should be defined for control flow extraction');
277
+ const ast = this.normalizedAst;
278
+ const dfg = this.dataflow.graph;
279
+ const config = this.config;
280
+ this.controlFlow = this.measureSimpleStep('extract control flow graph', () => (0, extract_cfg_1.extractCfg)(ast, config, dfg));
281
+ }
282
+ /**
283
+ * Infer the shape of data frames using abstract interpretation with {@link inferDataFrameShapes}
284
+ *
285
+ * @returns The statistics of the data frame shape inference
286
+ */
287
+ inferDataFrameShapes() {
288
+ exports.benchmarkLogger.trace('try to infer shapes for data frames');
289
+ (0, assert_1.guard)(this.stats !== undefined && !this.finished, 'need to call init before, and can not do after finish!');
290
+ (0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined for data frame shape inference');
291
+ (0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined for data frame shape inference');
292
+ (0, assert_1.guard)(this.controlFlow !== undefined, 'controlFlow should be defined for data frame shape inference');
293
+ (0, assert_1.guard)(this.config !== undefined, 'config should be defined for data frame shape inference');
294
+ const ast = this.normalizedAst;
295
+ const dfg = this.dataflow.graph;
296
+ const cfinfo = this.controlFlow;
297
+ const config = this.config;
298
+ const stats = {
299
+ numberOfDataFrameFiles: 0,
300
+ numberOfNonDataFrameFiles: 0,
301
+ numberOfResultConstraints: 0,
302
+ numberOfResultingValues: 0,
303
+ numberOfResultingTop: 0,
304
+ numberOfResultingBottom: 0,
305
+ numberOfEmptyNodes: 0,
306
+ numberOfOperationNodes: 0,
307
+ numberOfValueNodes: 0,
308
+ sizeOfInfo: 0,
309
+ perNodeStats: new Map()
310
+ };
311
+ const result = this.measureSimpleStep('infer data frame shapes', () => (0, shape_inference_1.inferDataFrameShapes)(cfinfo, dfg, ast, config));
312
+ stats.numberOfResultConstraints = result.size;
313
+ for (const value of result.values()) {
314
+ if ((0, domain_1.equalDataFrameDomain)(value, domain_1.DataFrameTop)) {
315
+ stats.numberOfResultingTop++;
316
+ }
317
+ else if ((0, domain_1.equalDataFrameDomain)(value, domain_1.DataFrameBottom)) {
318
+ stats.numberOfResultingBottom++;
319
+ }
320
+ else {
321
+ stats.numberOfResultingValues++;
322
+ }
323
+ }
324
+ (0, visitor_1.visitAst)(this.normalizedAst.ast, (node) => {
325
+ if (node.info.dataFrame === undefined) {
326
+ return;
327
+ }
328
+ stats.sizeOfInfo += (0, size_of_1.safeSizeOf)([node.info.dataFrame]);
329
+ const expression = (0, absint_info_1.hasDataFrameExpressionInfo)(node) ? node.info.dataFrame : undefined;
330
+ const value = node.info.dataFrame.domain?.get(node.info.id);
331
+ // Only store per-node information for nodes representing expressions or nodes with abstract values
332
+ if (expression === undefined && value === undefined) {
333
+ stats.numberOfEmptyNodes++;
334
+ return;
335
+ }
336
+ const nodeStats = {
337
+ numberOfEntries: node.info.dataFrame?.domain?.size ?? 0
338
+ };
339
+ if (expression !== undefined) {
340
+ nodeStats.mappedOperations = expression.operations.map(op => op.operation);
341
+ stats.numberOfOperationNodes++;
342
+ if (value !== undefined) {
343
+ nodeStats.inferredColNames = value.colnames === domain_1.ColNamesTop ? 'top' : value.colnames.length;
344
+ nodeStats.inferredColCount = this.getInferredSize(value.cols);
345
+ nodeStats.inferredRowCount = this.getInferredSize(value.rows);
346
+ nodeStats.approxRangeColCount = value.cols === domain_1.IntervalBottom ? 0 : value.cols[1] - value.cols[0];
347
+ nodeStats.approxRangeRowCount = value.rows === domain_1.IntervalBottom ? 0 : value.rows[1] - value.rows[0];
348
+ }
349
+ }
350
+ if (value !== undefined) {
351
+ stats.numberOfValueNodes++;
352
+ }
353
+ stats.perNodeStats.set(node.info.id, nodeStats);
354
+ });
355
+ if (stats.numberOfOperationNodes > 0) {
356
+ stats.numberOfDataFrameFiles = 1;
357
+ }
358
+ else {
359
+ stats.numberOfNonDataFrameFiles = 1;
360
+ }
361
+ this.stats.dataFrameShape = stats;
362
+ return stats;
363
+ }
364
+ getInferredSize(value) {
365
+ if ((0, domain_1.equalInterval)(value, domain_1.IntervalTop)) {
366
+ return 'top';
367
+ }
368
+ else if (value === domain_1.IntervalBottom) {
369
+ return 'bottom';
370
+ }
371
+ else if (!isFinite(value[1])) {
372
+ return 'infinite';
373
+ }
374
+ return Math.floor((value[0] + value[1]) / 2);
375
+ }
261
376
  /** Bridging the gap between the new internal and the old names for the benchmarking */
262
377
  async measureCommonStep(expectedStep, keyToMeasure) {
263
378
  const memoryInit = process.memoryUsage();
@@ -271,6 +386,18 @@ class BenchmarkSlicer {
271
386
  });
272
387
  return result;
273
388
  }
389
+ measureSimpleStep(keyToMeasure, measurement) {
390
+ const memoryInit = process.memoryUsage();
391
+ const result = this.commonMeasurements.measure(keyToMeasure, measurement);
392
+ const memoryEnd = process.memoryUsage();
393
+ this.deltas.set(keyToMeasure, {
394
+ heap: memoryEnd.heapUsed - memoryInit.heapUsed,
395
+ rss: memoryEnd.rss - memoryInit.rss,
396
+ external: memoryEnd.external - memoryInit.external,
397
+ buffs: memoryEnd.arrayBuffers - memoryInit.arrayBuffers
398
+ });
399
+ return result;
400
+ }
274
401
  async measureSliceStep(expectedStep, measure, keyToMeasure) {
275
402
  const { result } = await measure.measureAsync(keyToMeasure, () => this.executor.nextStep(expectedStep));
276
403
  return result;
@@ -330,6 +457,8 @@ class BenchmarkSlicer {
330
457
  const retrieveTime = Number(this.stats.commonMeasurements.get('retrieve AST from R code'));
331
458
  const normalizeTime = Number(this.stats.commonMeasurements.get('normalize R AST'));
332
459
  const dataflowTime = Number(this.stats.commonMeasurements.get('produce dataflow information'));
460
+ const controlFlowTime = Number(this.stats.commonMeasurements.get('extract control flow graph'));
461
+ const dataFrameShapeTime = Number(this.stats.commonMeasurements.get('infer data frame shapes'));
333
462
  this.stats.retrieveTimePerToken = {
334
463
  raw: retrieveTime / this.stats.input.numberOfRTokens,
335
464
  normalized: retrieveTime / this.stats.input.numberOfNormalizedTokens
@@ -346,6 +475,14 @@ class BenchmarkSlicer {
346
475
  raw: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfRTokens,
347
476
  normalized: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfNormalizedTokens
348
477
  };
478
+ this.stats.controlFlowTimePerToken = !isNaN(controlFlowTime) ? {
479
+ raw: controlFlowTime / this.stats.input.numberOfRTokens,
480
+ normalized: controlFlowTime / this.stats.input.numberOfNormalizedTokens,
481
+ } : undefined;
482
+ this.stats.dataFrameShapeTimePerToken = !isNaN(dataFrameShapeTime) ? {
483
+ raw: dataFrameShapeTime / this.stats.input.numberOfRTokens,
484
+ normalized: dataFrameShapeTime / this.stats.input.numberOfNormalizedTokens,
485
+ } : undefined;
349
486
  return {
350
487
  stats: this.stats,
351
488
  parse: typeof this.loadedXml === 'string' ? this.loadedXml : JSON.stringify(this.loadedXml),
@@ -84,22 +84,34 @@ function convertNumberToNiceBytes(x) {
84
84
  function stats2string(stats) {
85
85
  let result = `
86
86
  Request: ${JSON.stringify(stats.request)}
87
- Shell init time: ${print(stats.commonMeasurements, 'initialize R session')}
88
- AST retrieval: ${print(stats.commonMeasurements, 'retrieve AST from R code')}
89
- AST retrieval per token: ${formatNanoseconds(stats.retrieveTimePerToken.normalized)}
90
- AST retrieval per R token: ${formatNanoseconds(stats.retrieveTimePerToken.raw)}
91
- AST normalization: ${print(stats.commonMeasurements, 'normalize R AST')}
92
- AST normalization per token: ${formatNanoseconds(stats.normalizeTimePerToken.normalized)}
93
- AST normalization per R token:${formatNanoseconds(stats.normalizeTimePerToken.raw)}
94
- Dataflow creation: ${print(stats.commonMeasurements, 'produce dataflow information')}
95
- Dataflow creation per token: ${formatNanoseconds(stats.dataflowTimePerToken.normalized)}
96
- Dataflow creation per R token:${formatNanoseconds(stats.dataflowTimePerToken.raw)}
97
- Total common time per token: ${formatNanoseconds(stats.totalCommonTimePerToken.normalized)}
98
- Total common time per R token:${formatNanoseconds(stats.totalCommonTimePerToken.raw)}
99
-
100
- Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.perSliceMeasurements.numberOfSlices !== 1 ? 's' : ''}:`;
87
+ Shell init time: ${print(stats.commonMeasurements, 'initialize R session')}
88
+ AST retrieval: ${print(stats.commonMeasurements, 'retrieve AST from R code')}
89
+ AST retrieval per token: ${formatNanoseconds(stats.retrieveTimePerToken.normalized)}
90
+ AST retrieval per R token: ${formatNanoseconds(stats.retrieveTimePerToken.raw)}
91
+ AST normalization: ${print(stats.commonMeasurements, 'normalize R AST')}
92
+ AST normalization per token: ${formatNanoseconds(stats.normalizeTimePerToken.normalized)}
93
+ AST normalization per R token: ${formatNanoseconds(stats.normalizeTimePerToken.raw)}
94
+ Dataflow creation: ${print(stats.commonMeasurements, 'produce dataflow information')}
95
+ Dataflow creation per token: ${formatNanoseconds(stats.dataflowTimePerToken.normalized)}
96
+ Dataflow creation per R token: ${formatNanoseconds(stats.dataflowTimePerToken.raw)}
97
+ Total common time per token: ${formatNanoseconds(stats.totalCommonTimePerToken.normalized)}
98
+ Total common time per R token: ${formatNanoseconds(stats.totalCommonTimePerToken.raw)}`;
99
+ if (stats.commonMeasurements.has('extract control flow graph') && stats.controlFlowTimePerToken !== undefined) {
100
+ result += `
101
+ Control flow extraction: ${print(stats.commonMeasurements, 'extract control flow graph')}
102
+ Control flow extraction per token: ${formatNanoseconds(stats.controlFlowTimePerToken.normalized)}
103
+ Control flow extraction per R token: ${formatNanoseconds(stats.controlFlowTimePerToken.raw)}`;
104
+ }
105
+ if (stats.commonMeasurements.has('infer data frame shapes') && stats.dataFrameShapeTimePerToken !== undefined) {
106
+ result += `
107
+ Dataframe shape inference: ${print(stats.commonMeasurements, 'infer data frame shapes')}
108
+ Dataframe shape inference per token: ${formatNanoseconds(stats.dataFrameShapeTimePerToken.normalized)}
109
+ Dataframe shape inference per R token:${formatNanoseconds(stats.dataFrameShapeTimePerToken.raw)}`;
110
+ }
101
111
  if (stats.perSliceMeasurements.numberOfSlices > 0) {
102
112
  result += `
113
+
114
+ Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.perSliceMeasurements.numberOfSlices !== 1 ? 's' : ''}:
103
115
  Total: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'total')}
104
116
  Slice creation: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'static slicing')}
105
117
  Slice creation per token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.sliceTimePerToken.normalized)}
@@ -110,7 +122,7 @@ Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.pe
110
122
  Total per token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.normalized)}
111
123
  Total per R token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.raw)}
112
124
  Used Slice Criteria Sizes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceCriteriaSizes)}
113
- Result Slice Sizes:
125
+ Result Slice Sizes:
114
126
  Number of lines: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.lines)}
115
127
  Number of non-empty lines: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.nonEmptyLines)}
116
128
  Number of characters: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.characters)}
@@ -120,12 +132,12 @@ Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.pe
120
132
  Number of R tokens (w/o comments): ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.tokensNoComments)}
121
133
  Normalized R tokens: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokens)}
122
134
  Normalized R tokens (w/o comments): ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokensNoComments)}
123
- Number of dataflow nodes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.dataflowNodes)}
124
- `;
135
+ Number of dataflow nodes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.dataflowNodes)}`;
125
136
  }
126
- return `${result}
127
- Shell close: ${print(stats.commonMeasurements, 'close R session')}
128
- Total: ${print(stats.commonMeasurements, 'total')}
137
+ result += `
138
+
139
+ Shell close: ${print(stats.commonMeasurements, 'close R session')}
140
+ Total: ${print(stats.commonMeasurements, 'total')}
129
141
 
130
142
  Input:
131
143
  Number of lines: ${pad(stats.input.numberOfLines)}
@@ -148,9 +160,64 @@ Dataflow:
148
160
  Number of stored Env indices: ${pad(stats.dataflow.storedEnvIndices)}
149
161
  Number of overwritten indices: ${pad(stats.dataflow.overwrittenIndices)}
150
162
  Size of graph: ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`;
163
+ if (stats.dataFrameShape !== undefined) {
164
+ result += `
165
+
166
+ Dataframe shape inference:
167
+ Number of resulting constraints:${pad(stats.dataFrameShape.numberOfResultConstraints)}
168
+ Number of operation nodes: ${pad(stats.dataFrameShape.numberOfOperationNodes)}
169
+ Number of abstract value nodes: ${pad(stats.dataFrameShape.numberOfValueNodes)}
170
+ Number of entries per node: ${pad(stats.dataFrameShape.numberOfEntriesPerNode.mean)}
171
+ Number of operations: ${pad(stats.dataFrameShape.numberOfOperations)}
172
+ Number of total values: ${pad(stats.dataFrameShape.numberOfTotalValues)}
173
+ Number of total top: ${pad(stats.dataFrameShape.numberOfTotalTop)}
174
+ Inferred column names per node: ${pad(stats.dataFrameShape.inferredColNames.mean)}
175
+ Number of column names values: ${pad(stats.dataFrameShape.numberOfColNamesValues)}
176
+ Number of column names Top: ${pad(stats.dataFrameShape.numberOfColNamesTop)}
177
+ Inferred column count per node: ${pad(stats.dataFrameShape.inferredColCount.mean)}
178
+ Number of column count values: ${pad(stats.dataFrameShape.numberOfColCountValues)}
179
+ Number of column count Top: ${pad(stats.dataFrameShape.numberOfColCountTop)}
180
+ Number of column count infinite:${pad(stats.dataFrameShape.numberOfColCountInfinite)}
181
+ Inferred row count per node: ${pad(stats.dataFrameShape.inferredRowCount.mean)}
182
+ Number of row count values: ${pad(stats.dataFrameShape.numberOfRowCountValues)}
183
+ Number of row count Top: ${pad(stats.dataFrameShape.numberOfRowCountTop)}
184
+ Number of row count infinite: ${pad(stats.dataFrameShape.numberOfRowCountInfinite)}
185
+ Size of data frame shape info: ${convertNumberToNiceBytes(stats.dataFrameShape.sizeOfInfo)}`;
186
+ }
187
+ return result;
151
188
  }
152
189
  function ultimateStats2String(stats) {
153
- const slice = stats.totalSlices > 0 ? `Slice summary for:
190
+ let result = `
191
+ Summarized: ${stats.totalRequests} requests and ${stats.totalSlices} slices
192
+ Shell init time: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('initialize R session'))}
193
+ AST retrieval: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('retrieve AST from R code'))}
194
+ AST retrieval per token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.normalized)}
195
+ AST retrieval per R token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.raw)}
196
+ AST normalization: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('normalize R AST'))}
197
+ AST normalization per token: ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.normalized)}
198
+ AST normalization per R token: ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.raw)}
199
+ Dataflow creation: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('produce dataflow information'))}
200
+ Dataflow creation per token: ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.normalized)}
201
+ Dataflow creation per R token: ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.raw)}
202
+ Total common time per token: ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.normalized)}
203
+ Total common time per R token: ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.raw)}`;
204
+ if (stats.commonMeasurements.has('extract control flow graph') && stats.controlFlowTimePerToken !== undefined) {
205
+ result += `
206
+ Control flow extraction: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('extract control flow graph'))}
207
+ Control flow extraction per token: ${formatSummarizedTimeMeasure(stats.controlFlowTimePerToken.normalized)}
208
+ Control flow extraction per R token: ${formatSummarizedTimeMeasure(stats.controlFlowTimePerToken.raw)}`;
209
+ }
210
+ if (stats.commonMeasurements.has('infer data frame shapes') && stats.dataFrameShapeTimePerToken !== undefined) {
211
+ result += `
212
+ Dataframe shape inference: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('infer data frame shapes'))}
213
+ Dataframe shape inference per token: ${formatSummarizedTimeMeasure(stats.dataFrameShapeTimePerToken.normalized)}
214
+ Dataframe shape inference per R token:${formatSummarizedTimeMeasure(stats.dataFrameShapeTimePerToken.raw)}`;
215
+ }
216
+ // Used Slice Criteria Sizes: ${formatSummarizedMeasure(stats.perSliceMeasurements.sliceCriteriaSizes)}
217
+ if (stats.totalSlices > 0) {
218
+ result += `
219
+
220
+ Slicing summary for ${stats.totalSlices} slice${stats.totalSlices !== 1 ? 's' : ''}:
154
221
  Total: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('total'))}
155
222
  Slice creation: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('static slicing'))}
156
223
  Slice creation per token in slice: ${formatSummarizedTimeMeasure(stats.sliceTimePerToken.normalized)}
@@ -161,29 +228,14 @@ function ultimateStats2String(stats) {
161
228
  Total per token in slice: ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.normalized)}
162
229
  Total per R token in slice: ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.raw)}
163
230
  Failed to Re-Parse: ${pad(stats.failedToRepParse)}/${stats.totalSlices}
164
- Times hit Threshold: ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
231
+ Times hit Threshold: ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
165
232
  ${reduction2String('Reductions', stats.reduction)}
166
- ${reduction2String('Reductions without comments and empty lines', stats.reductionNoFluff)}` : 'No slices';
167
- // Used Slice Criteria Sizes: ${formatSummarizedMeasure(stats.perSliceMeasurements.sliceCriteriaSizes)}
168
- return `
169
- Summarized: ${stats.totalRequests} requests and ${stats.totalSlices} slices
170
- Shell init time: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('initialize R session'))}
171
- AST retrieval: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('retrieve AST from R code'))}
172
- AST retrieval per token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.normalized)}
173
- AST retrieval per R token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.raw)}
174
- AST normalization: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('normalize R AST'))}
175
- AST normalization per token: ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.normalized)}
176
- AST normalization per R token:${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.raw)}
177
- Dataflow creation: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('produce dataflow information'))}
178
- Dataflow creation per token: ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.normalized)}
179
- Dataflow creation per R token:${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.raw)}
180
- Total common time per token: ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.normalized)}
181
- Total common time per R token:${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.raw)}
182
-
183
- ${slice}
233
+ ${reduction2String('Reductions without comments and empty lines', stats.reductionNoFluff)}`;
234
+ }
235
+ result += `
184
236
 
185
- Shell close: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('close R session'))}
186
- Total: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('total'))}
237
+ Shell close: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('close R session'))}
238
+ Total: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('total'))}
187
239
 
188
240
  Input:
189
241
  Number of lines: ${formatSummarizedMeasure(stats.input.numberOfLines)}
@@ -205,12 +257,38 @@ Dataflow:
205
257
  Number of stored Vtx indices: ${formatSummarizedMeasure(stats.dataflow.storedVertexIndices)}
206
258
  Number of stored Env indices: ${formatSummarizedMeasure(stats.dataflow.storedEnvIndices)}
207
259
  Number of overwritten indices: ${formatSummarizedMeasure(stats.dataflow.overwrittenIndices)}
208
- Size of graph: ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
209
- `;
260
+ Size of graph: ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}`;
261
+ if (stats.dataFrameShape !== undefined) {
262
+ result += `
263
+
264
+ Dataframe shape inference:
265
+ Number of resulting constraints:${formatSummarizedMeasure(stats.dataFrameShape.numberOfResultConstraints)}
266
+ Number of operation nodes: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfOperationNodes)}
267
+ Number of abstract value nodes: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfValueNodes)}
268
+ Number of entries per node: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfEntriesPerNode)}
269
+ Number of operations: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfOperations)}
270
+ Number of total values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfTotalValues)}
271
+ Number of total top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfTotalTop)}
272
+ Inferred column names per node: ${formatSummarizedMeasure(stats.dataFrameShape.inferredColNames)}
273
+ Number of column names values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColNamesValues)}
274
+ Number of column names top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColNamesTop)}
275
+ Inferred column count per node: ${formatSummarizedMeasure(stats.dataFrameShape.inferredColCount)}
276
+ Number of column count exact: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountExact)}
277
+ Number of column count values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountValues)}
278
+ Number of column count top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountTop)}
279
+ Number of column count infinite:${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountInfinite)}
280
+ Inferred row count per node: ${formatSummarizedMeasure(stats.dataFrameShape.inferredRowCount)}
281
+ Number of row count exact: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountExact)}
282
+ Number of row count values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountValues)}
283
+ Number of row count top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountTop)}
284
+ Number of row count infinite: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountInfinite)}
285
+ Size of data frame shape info: ${formatSummarizedMeasure(stats.dataFrameShape.sizeOfInfo, convertNumberToNiceBytes)}`;
286
+ }
287
+ return result;
210
288
  }
211
289
  function reduction2String(title, reduction) {
212
290
  return `
213
- ${title} (reduced by x%):
291
+ ${title} (reduced by x%):
214
292
  Number of lines: ${formatSummarizedMeasure(reduction.numberOfLines, asPercentage)}
215
293
  Number of lines no auto: ${formatSummarizedMeasure(reduction.numberOfLinesNoAutoSelection, asPercentage)}
216
294
  Number of characters: ${formatSummarizedMeasure(reduction.numberOfCharacters, asPercentage)}
@@ -1,3 +1,10 @@
1
1
  import type { DataflowGraph } from '../../dataflow/graph/graph';
2
2
  /** Returns the size of the given df graph in bytes (without sharing in-memory) */
3
3
  export declare function getSizeOfDfGraph(df: DataflowGraph): number;
4
+ /**
5
+ * Calculates the size of an array in bytes.
6
+ *
7
+ * @param array - The array to calculate the size of.
8
+ * @returns The size of the array in bytes.
9
+ */
10
+ export declare function safeSizeOf<T>(array: T[]): number;
@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.getSizeOfDfGraph = getSizeOfDfGraph;
7
+ exports.safeSizeOf = safeSizeOf;
7
8
  const environment_1 = require("../../dataflow/environments/environment");
8
9
  const vertex_1 = require("../../dataflow/graph/vertex");
9
10
  const identifier_1 = require("../../dataflow/environments/identifier");
@@ -4,7 +4,10 @@ import type { ReconstructionResult } from '../../reconstruct/reconstruct';
4
4
  import type { RParseRequestFromFile, RParseRequestFromText } from '../../r-bridge/retriever';
5
5
  import type { TimePerToken } from '../summarizer/data';
6
6
  import type { MergeableRecord } from '../../util/objects';
7
- export declare const CommonSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total"];
7
+ import type { DataFrameOperationName } from '../../abstract-interpretation/data-frame/semantics';
8
+ export declare const RequiredSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total"];
9
+ export declare const OptionalSlicerMeasurements: readonly ["extract control flow graph", "infer data frame shapes"];
10
+ export declare const CommonSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total", "extract control flow graph", "infer data frame shapes"];
8
11
  export type CommonSlicerMeasurements = typeof CommonSlicerMeasurements[number];
9
12
  export declare const PerSliceMeasurements: readonly ["static slicing", "reconstruct code", "total"];
10
13
  export type PerSliceMeasurements = typeof PerSliceMeasurements[number];
@@ -41,6 +44,29 @@ export interface SlicerStatsDataflow<T = number> {
41
44
  storedEnvIndices: T;
42
45
  overwrittenIndices: T;
43
46
  }
47
+ export interface SlicerStatsDfShape<T = number> {
48
+ numberOfDataFrameFiles: T extends number ? 0 | 1 : number;
49
+ numberOfNonDataFrameFiles: T extends number ? 0 | 1 : number;
50
+ numberOfResultConstraints: T;
51
+ numberOfResultingValues: T;
52
+ numberOfResultingTop: T;
53
+ numberOfResultingBottom: T;
54
+ numberOfEmptyNodes: T;
55
+ numberOfOperationNodes: T;
56
+ numberOfValueNodes: T;
57
+ sizeOfInfo: T;
58
+ perNodeStats: Map<NodeId, PerNodeStatsDfShape<T>>;
59
+ }
60
+ export interface PerNodeStatsDfShape<T = number> {
61
+ numberOfEntries: T;
62
+ mappedOperations?: DataFrameOperationName[];
63
+ inferredColNames?: T | 'top';
64
+ inferredColCount?: T | 'bottom' | 'infinite' | 'top';
65
+ inferredRowCount?: T | 'bottom' | 'infinite' | 'top';
66
+ /** difference between upper and lower bound of interval domain (to estimate approximation) */
67
+ approxRangeColCount?: T;
68
+ approxRangeRowCount?: T;
69
+ }
44
70
  /**
45
71
  * Please note, that these measurement can be negative as there is no guarantee that the memory usage will increase
46
72
  * due to, e.g., garbage collection.
@@ -61,8 +87,11 @@ export interface SlicerStats {
61
87
  request: RParseRequestFromFile | RParseRequestFromText;
62
88
  input: SlicerStatsInput;
63
89
  dataflow: SlicerStatsDataflow;
90
+ dataFrameShape?: SlicerStatsDfShape;
64
91
  retrieveTimePerToken: TimePerToken<number>;
65
92
  normalizeTimePerToken: TimePerToken<number>;
66
93
  dataflowTimePerToken: TimePerToken<number>;
67
94
  totalCommonTimePerToken: TimePerToken<number>;
95
+ controlFlowTimePerToken?: TimePerToken<number>;
96
+ dataFrameShapeTimePerToken?: TimePerToken<number>;
68
97
  }
@@ -1,6 +1,8 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.PerSliceMeasurements = exports.CommonSlicerMeasurements = void 0;
4
- exports.CommonSlicerMeasurements = ['initialize R session', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'close R session', 'total'];
3
+ exports.PerSliceMeasurements = exports.CommonSlicerMeasurements = exports.OptionalSlicerMeasurements = exports.RequiredSlicerMeasurements = void 0;
4
+ exports.RequiredSlicerMeasurements = ['initialize R session', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'close R session', 'total'];
5
+ exports.OptionalSlicerMeasurements = ['extract control flow graph', 'infer data frame shapes'];
6
+ exports.CommonSlicerMeasurements = [...exports.RequiredSlicerMeasurements, ...exports.OptionalSlicerMeasurements];
5
7
  exports.PerSliceMeasurements = ['static slicing', 'reconstruct code', 'total'];
6
8
  //# sourceMappingURL=stats.js.map
@@ -1,5 +1,6 @@
1
+ import type { DataFrameOperationName } from '../../abstract-interpretation/data-frame/semantics';
1
2
  import type { SummarizedMeasurement } from '../../util/summarizer';
2
- import type { CommonSlicerMeasurements, PerSliceMeasurements, SlicerStats, SlicerStatsDataflow, SlicerStatsInput } from '../stats/stats';
3
+ import type { CommonSlicerMeasurements, PerSliceMeasurements, SlicerStats, SlicerStatsDfShape, SlicerStatsDataflow, SlicerStatsInput } from '../stats/stats';
3
4
  export interface SliceSizeCollection {
4
5
  lines: number[];
5
6
  nonEmptyLines: number[];
@@ -21,7 +22,8 @@ export interface SliceSizeCollection {
21
22
  */
22
23
  export type SummarizedSlicerStats = {
23
24
  perSliceMeasurements: SummarizedPerSliceStats;
24
- } & Omit<SlicerStats, 'perSliceMeasurements'>;
25
+ dataFrameShape?: SummarizedDfShapeStats;
26
+ } & Omit<SlicerStats, 'perSliceMeasurements' | 'dataFrameShape'>;
25
27
  export interface Reduction<T = number> {
26
28
  numberOfLines: T;
27
29
  numberOfLinesNoAutoSelection: T;
@@ -62,6 +64,8 @@ export interface UltimateSlicerStats {
62
64
  normalizeTimePerToken: TimePerToken;
63
65
  dataflowTimePerToken: TimePerToken;
64
66
  totalCommonTimePerToken: TimePerToken;
67
+ controlFlowTimePerToken?: TimePerToken;
68
+ dataFrameShapeTimePerToken?: TimePerToken;
65
69
  sliceTimePerToken: TimePerToken;
66
70
  reconstructTimePerToken: TimePerToken;
67
71
  totalPerSliceTimePerToken: TimePerToken;
@@ -74,4 +78,31 @@ export interface UltimateSlicerStats {
74
78
  reductionNoFluff: Reduction<SummarizedMeasurement>;
75
79
  input: SlicerStatsInput<SummarizedMeasurement>;
76
80
  dataflow: SlicerStatsDataflow<SummarizedMeasurement>;
81
+ dataFrameShape?: SummarizedDfShapeStats<SummarizedMeasurement>;
82
+ }
83
+ export interface SummarizedDfShapeStats<T = number> extends Omit<SlicerStatsDfShape<T>, 'perNodeStats'> {
84
+ numberOfEntriesPerNode: SummarizedMeasurement;
85
+ numberOfOperations: T;
86
+ numberOfTotalValues: T;
87
+ numberOfTotalTop: T;
88
+ numberOfTotalBottom: T;
89
+ inferredColNames: SummarizedMeasurement;
90
+ numberOfColNamesValues: T;
91
+ numberOfColNamesTop: T;
92
+ numberOfColNamesBottom: T;
93
+ inferredColCount: SummarizedMeasurement;
94
+ numberOfColCountExact: T;
95
+ numberOfColCountValues: T;
96
+ numberOfColCountTop: T;
97
+ numberOfColCountInfinite: T;
98
+ numberOfColCountBottom: T;
99
+ approxRangeColCount: SummarizedMeasurement;
100
+ inferredRowCount: SummarizedMeasurement;
101
+ numberOfRowCountExact: T;
102
+ numberOfRowCountValues: T;
103
+ numberOfRowCountTop: T;
104
+ numberOfRowCountInfinite: T;
105
+ numberOfRowCountBottom: T;
106
+ approxRangeRowCount: SummarizedMeasurement;
107
+ perOperationNumber: Map<DataFrameOperationName, T>;
77
108
  }
@@ -28,7 +28,11 @@ async function processRunMeasurement(line, fileNum, lineNum, textOutputAppendPat
28
28
  return [k, BigInt(v.slice(0, -1))];
29
29
  })),
30
30
  perSliceMeasurements: new Map(got.stats.perSliceMeasurements
31
- .map(([k, v]) => mapPerSliceStats(k, v)))
31
+ .map(([k, v]) => mapPerSliceStats(k, v))),
32
+ dataFrameShape: got.stats.dataFrameShape !== undefined ? {
33
+ ...got.stats.dataFrameShape,
34
+ perNodeStats: new Map(got.stats.dataFrameShape.perNodeStats)
35
+ } : undefined
32
36
  }
33
37
  };
34
38
  const totalSlices = got.stats.perSliceMeasurements.size;
@@ -53,6 +53,7 @@ const retriever_1 = require("../../../r-bridge/retriever");
53
53
  const visitor_1 = require("../../../r-bridge/lang-4.x/ast/model/processing/visitor");
54
54
  const type_1 = require("../../../r-bridge/lang-4.x/ast/model/type");
55
55
  const arrays_1 = require("../../../util/collections/arrays");
56
+ const semantics_1 = require("../../../abstract-interpretation/data-frame/semantics");
56
57
  const tempfile = (() => {
57
58
  let _tempfile = undefined;
58
59
  return () => {
@@ -250,9 +251,54 @@ async function summarizeSlicerStats(stats, report = () => {
250
251
  normalizedTokensNoComments: (0, summarizer_1.summarizeMeasurement)(sliceSize.normalizedTokensNoComments),
251
252
  dataflowNodes: (0, summarizer_1.summarizeMeasurement)(sliceSize.dataflowNodes)
252
253
  }
253
- }
254
+ },
255
+ dataFrameShape: stats.dataFrameShape ? summarizeDfShapeStats(stats.dataFrameShape) : undefined
256
+ };
257
+ }
258
+ function summarizeDfShapeStats({ perNodeStats, ...stats }) {
259
+ const nodeStats = perNodeStats.values().toArray();
260
+ const isTop = (value) => value === 'top';
261
+ const isInfinite = (value) => value === 'infinite';
262
+ const isBottom = (value) => value === 'bottom';
263
+ const isValue = (value) => value !== undefined && !isTop(value) && !isInfinite(value) && !isBottom(value);
264
+ return {
265
+ ...stats,
266
+ numberOfEntriesPerNode: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.numberOfEntries)),
267
+ numberOfOperations: (0, arrays_1.arraySum)(nodeStats.map(s => s.mappedOperations?.length).filter(assert_1.isNotUndefined)),
268
+ numberOfTotalValues: nodeStats.filter(s => isValue(s.inferredColNames) && isValue(s.inferredColCount) && isValue(s.inferredRowCount)).length,
269
+ numberOfTotalTop: nodeStats.filter(s => isTop(s.inferredColNames) && isTop(s.inferredColCount) && isTop(s.inferredRowCount)).length,
270
+ numberOfTotalBottom: nodeStats.filter(s => s.inferredColNames === 0 && isBottom(s.inferredColCount) && isBottom(s.inferredRowCount)).length,
271
+ inferredColNames: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.inferredColNames).filter(isValue)),
272
+ numberOfColNamesValues: nodeStats.map(s => s.inferredColNames).filter(isValue).length,
273
+ numberOfColNamesTop: nodeStats.map(s => s.inferredColNames).filter(isTop).length,
274
+ numberOfColNamesBottom: nodeStats.map(s => s.inferredColNames).filter(number => number === 0).length,
275
+ inferredColCount: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.inferredColCount).filter(isValue)),
276
+ numberOfColCountExact: nodeStats.map(s => s.approxRangeColCount).filter(range => range === 0).length,
277
+ numberOfColCountValues: nodeStats.map(s => s.inferredColCount).filter(isValue).length,
278
+ numberOfColCountTop: nodeStats.map(s => s.inferredColCount).filter(isTop).length,
279
+ numberOfColCountInfinite: nodeStats.map(s => s.inferredColCount).filter(isInfinite).length,
280
+ numberOfColCountBottom: nodeStats.map(s => s.inferredColCount).filter(isBottom).length,
281
+ approxRangeColCount: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.approxRangeColCount).filter(assert_1.isNotUndefined).filter(isFinite)),
282
+ inferredRowCount: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.inferredRowCount).filter(isValue)),
283
+ numberOfRowCountExact: nodeStats.map(s => s.approxRangeRowCount).filter(range => range === 0).length,
284
+ numberOfRowCountValues: nodeStats.map(s => s.inferredRowCount).filter(isValue).length,
285
+ numberOfRowCountTop: nodeStats.map(s => s.inferredRowCount).filter(isTop).length,
286
+ numberOfRowCountInfinite: nodeStats.map(s => s.inferredRowCount).filter(isInfinite).length,
287
+ numberOfRowCountBottom: nodeStats.map(s => s.inferredRowCount).filter(isBottom).length,
288
+ approxRangeRowCount: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.approxRangeRowCount).filter(assert_1.isNotUndefined).filter(isFinite)),
289
+ perOperationNumber: summarizePerOperationStats(nodeStats),
254
290
  };
255
291
  }
292
+ function summarizePerOperationStats(nodeStats) {
293
+ const perOperationNumber = new Map(semantics_1.DataFrameOperationNames.map(name => [name, 0]));
294
+ for (const stat of nodeStats) {
295
+ for (const operation of stat.mappedOperations ?? []) {
296
+ const value = perOperationNumber.get(operation) ?? 0;
297
+ perOperationNumber.set(operation, value + 1);
298
+ }
299
+ }
300
+ return perOperationNumber;
301
+ }
256
302
  function summarizeSummarizedMeasurement(data) {
257
303
  data = data.filter(assert_1.isNotUndefined);
258
304
  const min = Math.min(...data.map(d => d.min).filter(assert_1.isNotUndefined));
@@ -11,7 +11,7 @@ function writeGraphOutput(ultimate, outputGraphPath) {
11
11
  const data = [];
12
12
  for (const { name, measurements } of [{ name: 'per-file', measurements: ultimate.commonMeasurements }, { name: 'per-slice', measurements: ultimate.perSliceMeasurements }]) {
13
13
  for (const [point, measurement] of measurements) {
14
- if (point === 'close R session' || point === 'initialize R session') {
14
+ if (point === 'close R session' || point === 'initialize R session' || !measurement?.mean || !measurement?.std) {
15
15
  continue;
16
16
  }
17
17
  const pointName = point === 'total' ? `total ${name}` : point;