@eagleoutice/flowr 2.2.15 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. package/README.md +226 -6
  2. package/abstract-interpretation/data-frame/absint-info.d.ts +109 -0
  3. package/abstract-interpretation/data-frame/absint-info.js +31 -0
  4. package/abstract-interpretation/data-frame/absint-visitor.d.ts +59 -0
  5. package/abstract-interpretation/data-frame/absint-visitor.js +173 -0
  6. package/abstract-interpretation/data-frame/domain.d.ts +107 -0
  7. package/abstract-interpretation/data-frame/domain.js +315 -0
  8. package/abstract-interpretation/data-frame/mappers/access-mapper.d.ts +17 -0
  9. package/abstract-interpretation/data-frame/mappers/access-mapper.js +166 -0
  10. package/abstract-interpretation/data-frame/mappers/arguments.d.ts +117 -0
  11. package/abstract-interpretation/data-frame/mappers/arguments.js +188 -0
  12. package/abstract-interpretation/data-frame/mappers/assignment-mapper.d.ts +20 -0
  13. package/abstract-interpretation/data-frame/mappers/assignment-mapper.js +34 -0
  14. package/abstract-interpretation/data-frame/mappers/function-mapper.d.ts +261 -0
  15. package/abstract-interpretation/data-frame/mappers/function-mapper.js +1219 -0
  16. package/abstract-interpretation/data-frame/mappers/replacement-mapper.d.ts +12 -0
  17. package/abstract-interpretation/data-frame/mappers/replacement-mapper.js +206 -0
  18. package/abstract-interpretation/data-frame/resolve-args.d.ts +42 -0
  19. package/abstract-interpretation/data-frame/resolve-args.js +118 -0
  20. package/abstract-interpretation/data-frame/semantics.d.ts +213 -0
  21. package/abstract-interpretation/data-frame/semantics.js +366 -0
  22. package/abstract-interpretation/data-frame/shape-inference.d.ts +38 -0
  23. package/abstract-interpretation/data-frame/shape-inference.js +117 -0
  24. package/benchmark/slicer.d.ts +18 -2
  25. package/benchmark/slicer.js +143 -5
  26. package/benchmark/stats/print.js +123 -45
  27. package/benchmark/stats/size-of.d.ts +7 -0
  28. package/benchmark/stats/size-of.js +1 -0
  29. package/benchmark/stats/stats.d.ts +30 -1
  30. package/benchmark/stats/stats.js +4 -2
  31. package/benchmark/summarizer/data.d.ts +33 -2
  32. package/benchmark/summarizer/first-phase/input.js +5 -1
  33. package/benchmark/summarizer/first-phase/process.d.ts +2 -1
  34. package/benchmark/summarizer/first-phase/process.js +49 -3
  35. package/benchmark/summarizer/second-phase/process.js +101 -3
  36. package/cli/benchmark-app.d.ts +2 -0
  37. package/cli/benchmark-app.js +5 -1
  38. package/cli/benchmark-helper-app.d.ts +2 -0
  39. package/cli/benchmark-helper-app.js +13 -8
  40. package/cli/common/options.js +4 -0
  41. package/cli/export-quads-app.js +2 -1
  42. package/cli/flowr.js +58 -57
  43. package/cli/repl/commands/repl-cfg.js +13 -13
  44. package/cli/repl/commands/repl-commands.js +2 -2
  45. package/cli/repl/commands/repl-dataflow.js +10 -10
  46. package/cli/repl/commands/repl-execute.d.ts +2 -3
  47. package/cli/repl/commands/repl-execute.js +4 -4
  48. package/cli/repl/commands/repl-lineage.js +4 -4
  49. package/cli/repl/commands/repl-main.d.ts +12 -1
  50. package/cli/repl/commands/repl-normalize.js +6 -6
  51. package/cli/repl/commands/repl-parse.js +2 -2
  52. package/cli/repl/commands/repl-query.js +9 -9
  53. package/cli/repl/commands/repl-version.js +1 -1
  54. package/cli/repl/core.d.ts +5 -2
  55. package/cli/repl/core.js +10 -8
  56. package/cli/repl/server/connection.d.ts +3 -1
  57. package/cli/repl/server/connection.js +7 -5
  58. package/cli/repl/server/server.d.ts +3 -2
  59. package/cli/repl/server/server.js +4 -2
  60. package/cli/script-core/statistics-core.d.ts +2 -1
  61. package/cli/script-core/statistics-core.js +2 -2
  62. package/cli/script-core/statistics-helper-core.d.ts +2 -1
  63. package/cli/script-core/statistics-helper-core.js +5 -4
  64. package/cli/slicer-app.js +4 -2
  65. package/cli/statistics-app.js +2 -1
  66. package/cli/statistics-helper-app.js +2 -1
  67. package/config.d.ts +43 -10
  68. package/config.js +47 -43
  69. package/control-flow/cfg-dead-code.js +45 -2
  70. package/control-flow/cfg-simplification.d.ts +2 -0
  71. package/control-flow/control-flow-graph.d.ts +2 -0
  72. package/control-flow/control-flow-graph.js +8 -0
  73. package/control-flow/dfg-cfg-guided-visitor.d.ts +5 -3
  74. package/control-flow/dfg-cfg-guided-visitor.js +15 -4
  75. package/control-flow/extract-cfg.d.ts +4 -2
  76. package/control-flow/extract-cfg.js +4 -3
  77. package/control-flow/semantic-cfg-guided-visitor.d.ts +20 -2
  78. package/control-flow/semantic-cfg-guided-visitor.js +24 -4
  79. package/core/pipeline-executor.d.ts +4 -1
  80. package/core/pipeline-executor.js +6 -5
  81. package/core/steps/all/core/10-normalize.d.ts +2 -0
  82. package/core/steps/all/core/10-normalize.js +1 -1
  83. package/core/steps/all/core/11-normalize-tree-sitter.d.ts +2 -1
  84. package/core/steps/all/core/11-normalize-tree-sitter.js +2 -2
  85. package/core/steps/all/core/20-dataflow.d.ts +2 -1
  86. package/core/steps/all/core/20-dataflow.js +2 -2
  87. package/core/steps/all/static-slicing/00-slice.d.ts +2 -1
  88. package/core/steps/all/static-slicing/00-slice.js +2 -2
  89. package/core/steps/pipeline/default-pipelines.d.ts +32 -31
  90. package/core/steps/pipeline/default-pipelines.js +8 -8
  91. package/core/steps/pipeline-step.d.ts +2 -1
  92. package/dataflow/environments/built-in-config.d.ts +3 -3
  93. package/dataflow/environments/built-in.d.ts +11 -3
  94. package/dataflow/environments/built-in.js +5 -3
  95. package/dataflow/environments/default-builtin-config.js +4 -2
  96. package/dataflow/environments/define.d.ts +2 -1
  97. package/dataflow/environments/define.js +4 -5
  98. package/dataflow/environments/remove.d.ts +6 -0
  99. package/dataflow/environments/remove.js +29 -0
  100. package/dataflow/eval/resolve/alias-tracking.d.ts +7 -2
  101. package/dataflow/eval/resolve/alias-tracking.js +11 -8
  102. package/dataflow/eval/resolve/resolve-argument.d.ts +8 -0
  103. package/dataflow/eval/resolve/resolve-argument.js +118 -0
  104. package/dataflow/eval/resolve/resolve.d.ts +65 -18
  105. package/dataflow/eval/resolve/resolve.js +144 -48
  106. package/dataflow/eval/values/string/string-constants.d.ts +1 -1
  107. package/dataflow/eval/values/string/string-constants.js +7 -2
  108. package/dataflow/extractor.d.ts +2 -1
  109. package/dataflow/extractor.js +2 -1
  110. package/dataflow/internal/process/functions/call/built-in/built-in-access.js +5 -6
  111. package/dataflow/internal/process/functions/call/built-in/built-in-apply.js +1 -1
  112. package/dataflow/internal/process/functions/call/built-in/built-in-assignment.d.ts +4 -2
  113. package/dataflow/internal/process/functions/call/built-in/built-in-assignment.js +11 -11
  114. package/dataflow/internal/process/functions/call/built-in/built-in-eval.js +10 -11
  115. package/dataflow/internal/process/functions/call/built-in/built-in-expression-list.js +7 -2
  116. package/dataflow/internal/process/functions/call/built-in/built-in-for-loop.js +2 -3
  117. package/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.js +1 -1
  118. package/dataflow/internal/process/functions/call/built-in/built-in-list.js +2 -2
  119. package/dataflow/internal/process/functions/call/built-in/built-in-replacement.js +2 -3
  120. package/dataflow/internal/process/functions/call/built-in/built-in-source.d.ts +6 -3
  121. package/dataflow/internal/process/functions/call/built-in/built-in-source.js +19 -15
  122. package/dataflow/internal/process/functions/call/built-in/built-in-vector.js +2 -2
  123. package/dataflow/internal/process/functions/call/built-in/built-in-while-loop.js +1 -1
  124. package/dataflow/internal/process/functions/call/common.js +1 -1
  125. package/dataflow/internal/process/functions/process-parameter.js +1 -1
  126. package/dataflow/origin/dfg-get-symbol-refs.d.ts +21 -0
  127. package/dataflow/origin/dfg-get-symbol-refs.js +50 -0
  128. package/dataflow/processor.d.ts +5 -0
  129. package/documentation/doc-util/doc-cfg.js +4 -3
  130. package/documentation/doc-util/doc-code.d.ts +1 -1
  131. package/documentation/doc-util/doc-dfg.js +3 -2
  132. package/documentation/doc-util/doc-functions.d.ts +24 -0
  133. package/documentation/doc-util/doc-functions.js +65 -0
  134. package/documentation/doc-util/doc-normalized-ast.js +3 -2
  135. package/documentation/doc-util/doc-print.d.ts +5 -0
  136. package/documentation/doc-util/doc-print.js +36 -0
  137. package/documentation/doc-util/doc-query.js +13 -2
  138. package/documentation/doc-util/doc-repl.js +2 -1
  139. package/documentation/doc-util/doc-search.js +3 -2
  140. package/documentation/doc-util/doc-types.d.ts +28 -6
  141. package/documentation/doc-util/doc-types.js +89 -45
  142. package/documentation/print-cfg-wiki.js +6 -7
  143. package/documentation/print-core-wiki.js +5 -5
  144. package/documentation/print-dataflow-graph-wiki.js +10 -10
  145. package/documentation/print-engines-wiki.js +1 -2
  146. package/documentation/print-faq-wiki.js +8 -2
  147. package/documentation/print-interface-wiki.js +12 -2
  148. package/documentation/print-linter-issue.d.ts +1 -0
  149. package/documentation/print-linter-issue.js +71 -0
  150. package/documentation/print-linter-wiki.js +223 -34
  151. package/documentation/print-linting-and-testing-wiki.js +2 -4
  152. package/documentation/print-normalized-ast-wiki.js +3 -3
  153. package/documentation/print-query-wiki.js +18 -2
  154. package/documentation/print-readme.js +24 -1
  155. package/documentation/print-search-wiki.js +1 -2
  156. package/linter/linter-executor.d.ts +3 -1
  157. package/linter/linter-executor.js +3 -2
  158. package/linter/linter-format.d.ts +67 -7
  159. package/linter/linter-format.js +12 -1
  160. package/linter/linter-rules.d.ts +178 -16
  161. package/linter/linter-rules.js +14 -4
  162. package/linter/linter-tags.d.ts +80 -0
  163. package/linter/linter-tags.js +85 -0
  164. package/linter/rules/absolute-path.d.ts +71 -0
  165. package/linter/rules/absolute-path.js +177 -0
  166. package/linter/rules/dataframe-access-validation.d.ts +53 -0
  167. package/linter/rules/dataframe-access-validation.js +116 -0
  168. package/linter/rules/deprecated-functions.d.ts +43 -0
  169. package/linter/rules/deprecated-functions.js +58 -0
  170. package/linter/rules/{2-file-path-validity.d.ts → file-path-validity.d.ts} +16 -6
  171. package/linter/rules/{2-file-path-validity.js → file-path-validity.js} +21 -13
  172. package/linter/rules/naming-convention.d.ts +71 -0
  173. package/linter/rules/naming-convention.js +168 -0
  174. package/linter/rules/seeded-randomness.d.ts +65 -0
  175. package/linter/rules/seeded-randomness.js +122 -0
  176. package/linter/rules/unused-definition.d.ts +41 -0
  177. package/linter/rules/unused-definition.js +105 -0
  178. package/package.json +5 -2
  179. package/queries/base-query-format.d.ts +2 -0
  180. package/queries/catalog/call-context-query/call-context-query-executor.d.ts +1 -1
  181. package/queries/catalog/call-context-query/call-context-query-executor.js +2 -2
  182. package/queries/catalog/cluster-query/cluster-query-format.d.ts +1 -1
  183. package/queries/catalog/config-query/config-query-executor.d.ts +1 -1
  184. package/queries/catalog/config-query/config-query-executor.js +2 -3
  185. package/queries/catalog/control-flow-query/control-flow-query-executor.d.ts +1 -1
  186. package/queries/catalog/control-flow-query/control-flow-query-executor.js +2 -2
  187. package/queries/catalog/control-flow-query/control-flow-query-format.d.ts +1 -1
  188. package/queries/catalog/dataflow-lens-query/dataflow-lens-query-format.d.ts +1 -1
  189. package/queries/catalog/dataflow-query/dataflow-query-format.d.ts +1 -1
  190. package/queries/catalog/dependencies-query/dependencies-query-executor.js +4 -116
  191. package/queries/catalog/dependencies-query/dependencies-query-format.d.ts +1 -1
  192. package/queries/catalog/df-shape-query/df-shape-query-executor.d.ts +3 -0
  193. package/queries/catalog/df-shape-query/df-shape-query-executor.js +46 -0
  194. package/queries/catalog/df-shape-query/df-shape-query-format.d.ts +72 -0
  195. package/queries/catalog/df-shape-query/df-shape-query-format.js +31 -0
  196. package/queries/catalog/happens-before-query/happens-before-query-format.d.ts +1 -1
  197. package/queries/catalog/id-map-query/id-map-query-format.d.ts +1 -1
  198. package/queries/catalog/lineage-query/lineage-query-format.d.ts +1 -1
  199. package/queries/catalog/linter-query/linter-query-executor.d.ts +1 -1
  200. package/queries/catalog/linter-query/linter-query-executor.js +2 -2
  201. package/queries/catalog/linter-query/linter-query-format.d.ts +1 -1
  202. package/queries/catalog/linter-query/linter-query-format.js +16 -12
  203. package/queries/catalog/normalized-ast-query/normalized-ast-query-format.d.ts +1 -1
  204. package/queries/catalog/origin-query/origin-query-format.d.ts +1 -1
  205. package/queries/catalog/project-query/project-query-format.d.ts +1 -1
  206. package/queries/catalog/resolve-value-query/resolve-value-query-executor.d.ts +1 -1
  207. package/queries/catalog/resolve-value-query/resolve-value-query-executor.js +2 -2
  208. package/queries/catalog/resolve-value-query/resolve-value-query-format.d.ts +1 -1
  209. package/queries/catalog/search-query/search-query-executor.d.ts +1 -1
  210. package/queries/catalog/search-query/search-query-executor.js +2 -2
  211. package/queries/catalog/search-query/search-query-format.d.ts +1 -1
  212. package/queries/catalog/static-slice-query/static-slice-query-executor.d.ts +1 -1
  213. package/queries/catalog/static-slice-query/static-slice-query-executor.js +2 -2
  214. package/queries/catalog/static-slice-query/static-slice-query-format.d.ts +1 -1
  215. package/queries/query.d.ts +76 -16
  216. package/queries/query.js +2 -0
  217. package/r-bridge/lang-4.x/ast/parser/json/parser.d.ts +2 -1
  218. package/r-bridge/lang-4.x/ast/parser/json/parser.js +4 -2
  219. package/r-bridge/lang-4.x/convert-values.js +2 -1
  220. package/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.d.ts +3 -1
  221. package/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.js +4 -4
  222. package/r-bridge/lang-4.x/tree-sitter/tree-sitter-normalize.d.ts +1 -1
  223. package/r-bridge/lang-4.x/tree-sitter/tree-sitter-normalize.js +7 -5
  224. package/r-bridge/shell.d.ts +3 -2
  225. package/r-bridge/shell.js +4 -5
  226. package/search/flowr-search-builder.d.ts +6 -2
  227. package/search/flowr-search-builder.js +7 -0
  228. package/search/flowr-search-filters.d.ts +32 -8
  229. package/search/flowr-search-filters.js +42 -15
  230. package/search/flowr-search.d.ts +4 -0
  231. package/search/search-executor/search-enrichers.d.ts +7 -3
  232. package/search/search-executor/search-enrichers.js +29 -20
  233. package/search/search-executor/search-generators.js +1 -1
  234. package/search/search-executor/search-transformer.d.ts +2 -0
  235. package/search/search-executor/search-transformer.js +10 -1
  236. package/slicing/static/static-slicer.d.ts +1 -1
  237. package/slicing/static/static-slicer.js +2 -3
  238. package/statistics/statistics.d.ts +3 -1
  239. package/statistics/statistics.js +5 -4
  240. package/util/containers.d.ts +12 -9
  241. package/util/containers.js +12 -9
  242. package/util/files.d.ts +8 -2
  243. package/util/files.js +22 -4
  244. package/util/objects.d.ts +5 -4
  245. package/util/r-value.d.ts +23 -0
  246. package/util/r-value.js +113 -0
  247. package/util/range.d.ts +5 -1
  248. package/util/range.js +11 -3
  249. package/util/text/strings.d.ts +6 -0
  250. package/util/text/strings.js +35 -0
  251. package/util/version.js +1 -1
  252. package/linter/rules/1-deprecated-functions.d.ts +0 -34
  253. package/linter/rules/1-deprecated-functions.js +0 -54
  254. package/util/cfg/cfg.d.ts +0 -0
  255. package/util/cfg/cfg.js +0 -2
@@ -10,6 +10,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
10
10
  exports.BenchmarkSlicer = exports.benchmarkLogger = void 0;
11
11
  const stopwatch_1 = require("./stopwatch");
12
12
  const fs_1 = __importDefault(require("fs"));
13
+ const seedrandom_1 = __importDefault(require("seedrandom"));
13
14
  const log_1 = require("../util/log");
14
15
  const assert_1 = require("../util/assert");
15
16
  const strings_1 = require("../util/text/strings");
@@ -24,6 +25,11 @@ const tree_sitter_types_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitte
24
25
  const tree_sitter_executor_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitter-executor");
25
26
  const vertex_1 = require("../dataflow/graph/vertex");
26
27
  const arrays_1 = require("../util/collections/arrays");
28
+ const config_1 = require("../config");
29
+ const extract_cfg_1 = require("../control-flow/extract-cfg");
30
+ const absint_info_1 = require("../abstract-interpretation/data-frame/absint-info");
31
+ const domain_1 = require("../abstract-interpretation/data-frame/domain");
32
+ const shape_inference_1 = require("../abstract-interpretation/data-frame/shape-inference");
27
33
  /**
28
34
  * The logger to be used for benchmarking as a global object.
29
35
  */
@@ -34,10 +40,12 @@ class BenchmarkSlicer {
34
40
  perSliceMeasurements = new Map();
35
41
  deltas = new Map();
36
42
  parserName;
43
+ config;
37
44
  stats;
38
45
  loadedXml;
39
46
  dataflow;
40
47
  normalizedAst;
48
+ controlFlow;
41
49
  totalStopwatch;
42
50
  finished = false;
43
51
  // Yes, this is unclean, but we know that we assign the executor during the initialization and this saves us from having to check for nullability every time
@@ -51,15 +59,16 @@ class BenchmarkSlicer {
51
59
  * Initialize the slicer on the given request.
52
60
  * Can only be called once for each instance.
53
61
  */
54
- async init(request, autoSelectIf, threshold) {
62
+ async init(request, config, autoSelectIf, threshold) {
55
63
  (0, assert_1.guard)(this.stats === undefined, 'cannot initialize the slicer twice');
64
+ this.config = config;
56
65
  // we know these are in sync so we just cast to one of them
57
66
  this.parser = await this.commonMeasurements.measure('initialize R session', async () => {
58
67
  if (this.parserName === 'r-shell') {
59
- return new shell_1.RShell();
68
+ return new shell_1.RShell((0, config_1.getEngineConfig)(config, 'r-shell'));
60
69
  }
61
70
  else {
62
- await tree_sitter_executor_1.TreeSitterExecutor.initTreeSitter();
71
+ await tree_sitter_executor_1.TreeSitterExecutor.initTreeSitter((0, config_1.getEngineConfig)(config, 'tree-sitter'));
63
72
  return new tree_sitter_executor_1.TreeSitterExecutor();
64
73
  }
65
74
  });
@@ -68,7 +77,7 @@ class BenchmarkSlicer {
68
77
  criterion: [],
69
78
  autoSelectIf,
70
79
  threshold,
71
- });
80
+ }, config);
72
81
  this.loadedXml = (await this.measureCommonStep('parse', 'retrieve AST from R code')).parsed;
73
82
  this.normalizedAst = await this.measureCommonStep('normalize', 'normalize R AST');
74
83
  this.dataflow = await this.measureCommonStep('dataflow', 'produce dataflow information');
@@ -256,6 +265,112 @@ class BenchmarkSlicer {
256
265
  code: stats.reconstructedCode
257
266
  };
258
267
  }
268
+ /**
269
+ * Extract the control flow graph using {@link extractCFG}
270
+ */
271
+ extractCFG() {
272
+ exports.benchmarkLogger.trace('try to extract the control flow graph');
273
+ this.guardActive();
274
+ (0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined for control flow extraction');
275
+ (0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined for control flow extraction');
276
+ (0, assert_1.guard)(this.config !== undefined, 'config should be defined for control flow extraction');
277
+ const ast = this.normalizedAst;
278
+ const dfg = this.dataflow.graph;
279
+ const config = this.config;
280
+ this.controlFlow = this.measureSimpleStep('extract control flow graph', () => (0, extract_cfg_1.extractCfg)(ast, config, dfg));
281
+ }
282
+ /**
283
+ * Infer the shape of data frames using abstract interpretation with {@link inferDataFrameShapes}
284
+ *
285
+ * @returns The statistics of the data frame shape inference
286
+ */
287
+ inferDataFrameShapes() {
288
+ exports.benchmarkLogger.trace('try to infer shapes for data frames');
289
+ (0, assert_1.guard)(this.stats !== undefined && !this.finished, 'need to call init before, and can not do after finish!');
290
+ (0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined for data frame shape inference');
291
+ (0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined for data frame shape inference');
292
+ (0, assert_1.guard)(this.controlFlow !== undefined, 'controlFlow should be defined for data frame shape inference');
293
+ (0, assert_1.guard)(this.config !== undefined, 'config should be defined for data frame shape inference');
294
+ const ast = this.normalizedAst;
295
+ const dfg = this.dataflow.graph;
296
+ const cfinfo = this.controlFlow;
297
+ const config = this.config;
298
+ const stats = {
299
+ numberOfDataFrameFiles: 0,
300
+ numberOfNonDataFrameFiles: 0,
301
+ numberOfResultConstraints: 0,
302
+ numberOfResultingValues: 0,
303
+ numberOfResultingTop: 0,
304
+ numberOfResultingBottom: 0,
305
+ numberOfEmptyNodes: 0,
306
+ numberOfOperationNodes: 0,
307
+ numberOfValueNodes: 0,
308
+ sizeOfInfo: 0,
309
+ perNodeStats: new Map()
310
+ };
311
+ const result = this.measureSimpleStep('infer data frame shapes', () => (0, shape_inference_1.inferDataFrameShapes)(cfinfo, dfg, ast, config));
312
+ stats.numberOfResultConstraints = result.size;
313
+ for (const value of result.values()) {
314
+ if ((0, domain_1.equalDataFrameDomain)(value, domain_1.DataFrameTop)) {
315
+ stats.numberOfResultingTop++;
316
+ }
317
+ else if ((0, domain_1.equalDataFrameDomain)(value, domain_1.DataFrameBottom)) {
318
+ stats.numberOfResultingBottom++;
319
+ }
320
+ else {
321
+ stats.numberOfResultingValues++;
322
+ }
323
+ }
324
+ (0, visitor_1.visitAst)(this.normalizedAst.ast, (node) => {
325
+ if (node.info.dataFrame === undefined) {
326
+ return;
327
+ }
328
+ stats.sizeOfInfo += (0, size_of_1.safeSizeOf)([node.info.dataFrame]);
329
+ const expression = (0, absint_info_1.hasDataFrameExpressionInfo)(node) ? node.info.dataFrame : undefined;
330
+ const value = node.info.dataFrame.domain?.get(node.info.id);
331
+ // Only store per-node information for nodes representing expressions or nodes with abstract values
332
+ if (expression === undefined && value === undefined) {
333
+ stats.numberOfEmptyNodes++;
334
+ return;
335
+ }
336
+ const nodeStats = {
337
+ numberOfEntries: node.info.dataFrame?.domain?.size ?? 0
338
+ };
339
+ if (expression !== undefined) {
340
+ nodeStats.mappedOperations = expression.operations.map(op => op.operation);
341
+ stats.numberOfOperationNodes++;
342
+ }
343
+ if (value !== undefined) {
344
+ nodeStats.inferredColNames = value.colnames === domain_1.ColNamesTop ? 'top' : value.colnames.length;
345
+ nodeStats.inferredColCount = this.getInferredSize(value.cols);
346
+ nodeStats.inferredRowCount = this.getInferredSize(value.rows);
347
+ nodeStats.approxRangeColCount = value.cols === domain_1.IntervalBottom ? 0 : value.cols[1] - value.cols[0];
348
+ nodeStats.approxRangeRowCount = value.rows === domain_1.IntervalBottom ? 0 : value.rows[1] - value.rows[0];
349
+ stats.numberOfValueNodes++;
350
+ }
351
+ stats.perNodeStats.set(node.info.id, nodeStats);
352
+ });
353
+ if (stats.numberOfOperationNodes > 0) {
354
+ stats.numberOfDataFrameFiles = 1;
355
+ }
356
+ else {
357
+ stats.numberOfNonDataFrameFiles = 1;
358
+ }
359
+ this.stats.dataFrameShape = stats;
360
+ return stats;
361
+ }
362
+ getInferredSize(value) {
363
+ if ((0, domain_1.equalInterval)(value, domain_1.IntervalTop)) {
364
+ return 'top';
365
+ }
366
+ else if (value === domain_1.IntervalBottom) {
367
+ return 'bottom';
368
+ }
369
+ else if (!isFinite(value[1])) {
370
+ return 'infinite';
371
+ }
372
+ return Math.floor((value[0] + value[1]) / 2);
373
+ }
259
374
  /** Bridging the gap between the new internal and the old names for the benchmarking */
260
375
  async measureCommonStep(expectedStep, keyToMeasure) {
261
376
  const memoryInit = process.memoryUsage();
@@ -269,6 +384,18 @@ class BenchmarkSlicer {
269
384
  });
270
385
  return result;
271
386
  }
387
+ measureSimpleStep(keyToMeasure, measurement) {
388
+ const memoryInit = process.memoryUsage();
389
+ const result = this.commonMeasurements.measure(keyToMeasure, measurement);
390
+ const memoryEnd = process.memoryUsage();
391
+ this.deltas.set(keyToMeasure, {
392
+ heap: memoryEnd.heapUsed - memoryInit.heapUsed,
393
+ rss: memoryEnd.rss - memoryInit.rss,
394
+ external: memoryEnd.external - memoryInit.external,
395
+ buffs: memoryEnd.arrayBuffers - memoryInit.arrayBuffers
396
+ });
397
+ return result;
398
+ }
272
399
  async measureSliceStep(expectedStep, measure, keyToMeasure) {
273
400
  const { result } = await measure.measureAsync(keyToMeasure, () => this.executor.nextStep(expectedStep));
274
401
  return result;
@@ -301,7 +428,8 @@ class BenchmarkSlicer {
301
428
  allCriteria = (0, arrays_1.equidistantSampling)(allCriteria, sampleCount, 'ceil');
302
429
  }
303
430
  else {
304
- allCriteria.sort(() => Math.random() - 0.5);
431
+ const random = options.seed ? (0, seedrandom_1.default)(options.seed) : Math.random;
432
+ allCriteria.sort(() => random() - 0.5);
305
433
  allCriteria.length = Math.min(allCriteria.length, sampleCount);
306
434
  }
307
435
  }
@@ -327,6 +455,8 @@ class BenchmarkSlicer {
327
455
  const retrieveTime = Number(this.stats.commonMeasurements.get('retrieve AST from R code'));
328
456
  const normalizeTime = Number(this.stats.commonMeasurements.get('normalize R AST'));
329
457
  const dataflowTime = Number(this.stats.commonMeasurements.get('produce dataflow information'));
458
+ const controlFlowTime = Number(this.stats.commonMeasurements.get('extract control flow graph'));
459
+ const dataFrameShapeTime = Number(this.stats.commonMeasurements.get('infer data frame shapes'));
330
460
  this.stats.retrieveTimePerToken = {
331
461
  raw: retrieveTime / this.stats.input.numberOfRTokens,
332
462
  normalized: retrieveTime / this.stats.input.numberOfNormalizedTokens
@@ -343,6 +473,14 @@ class BenchmarkSlicer {
343
473
  raw: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfRTokens,
344
474
  normalized: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfNormalizedTokens
345
475
  };
476
+ this.stats.controlFlowTimePerToken = !isNaN(controlFlowTime) ? {
477
+ raw: controlFlowTime / this.stats.input.numberOfRTokens,
478
+ normalized: controlFlowTime / this.stats.input.numberOfNormalizedTokens,
479
+ } : undefined;
480
+ this.stats.dataFrameShapeTimePerToken = !isNaN(dataFrameShapeTime) ? {
481
+ raw: dataFrameShapeTime / this.stats.input.numberOfRTokens,
482
+ normalized: dataFrameShapeTime / this.stats.input.numberOfNormalizedTokens,
483
+ } : undefined;
346
484
  return {
347
485
  stats: this.stats,
348
486
  parse: typeof this.loadedXml === 'string' ? this.loadedXml : JSON.stringify(this.loadedXml),
@@ -84,22 +84,34 @@ function convertNumberToNiceBytes(x) {
84
84
  function stats2string(stats) {
85
85
  let result = `
86
86
  Request: ${JSON.stringify(stats.request)}
87
- Shell init time: ${print(stats.commonMeasurements, 'initialize R session')}
88
- AST retrieval: ${print(stats.commonMeasurements, 'retrieve AST from R code')}
89
- AST retrieval per token: ${formatNanoseconds(stats.retrieveTimePerToken.normalized)}
90
- AST retrieval per R token: ${formatNanoseconds(stats.retrieveTimePerToken.raw)}
91
- AST normalization: ${print(stats.commonMeasurements, 'normalize R AST')}
92
- AST normalization per token: ${formatNanoseconds(stats.normalizeTimePerToken.normalized)}
93
- AST normalization per R token:${formatNanoseconds(stats.normalizeTimePerToken.raw)}
94
- Dataflow creation: ${print(stats.commonMeasurements, 'produce dataflow information')}
95
- Dataflow creation per token: ${formatNanoseconds(stats.dataflowTimePerToken.normalized)}
96
- Dataflow creation per R token:${formatNanoseconds(stats.dataflowTimePerToken.raw)}
97
- Total common time per token: ${formatNanoseconds(stats.totalCommonTimePerToken.normalized)}
98
- Total common time per R token:${formatNanoseconds(stats.totalCommonTimePerToken.raw)}
99
-
100
- Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.perSliceMeasurements.numberOfSlices !== 1 ? 's' : ''}:`;
87
+ Shell init time: ${print(stats.commonMeasurements, 'initialize R session')}
88
+ AST retrieval: ${print(stats.commonMeasurements, 'retrieve AST from R code')}
89
+ AST retrieval per token: ${formatNanoseconds(stats.retrieveTimePerToken.normalized)}
90
+ AST retrieval per R token: ${formatNanoseconds(stats.retrieveTimePerToken.raw)}
91
+ AST normalization: ${print(stats.commonMeasurements, 'normalize R AST')}
92
+ AST normalization per token: ${formatNanoseconds(stats.normalizeTimePerToken.normalized)}
93
+ AST normalization per R token: ${formatNanoseconds(stats.normalizeTimePerToken.raw)}
94
+ Dataflow creation: ${print(stats.commonMeasurements, 'produce dataflow information')}
95
+ Dataflow creation per token: ${formatNanoseconds(stats.dataflowTimePerToken.normalized)}
96
+ Dataflow creation per R token: ${formatNanoseconds(stats.dataflowTimePerToken.raw)}
97
+ Total common time per token: ${formatNanoseconds(stats.totalCommonTimePerToken.normalized)}
98
+ Total common time per R token: ${formatNanoseconds(stats.totalCommonTimePerToken.raw)}`;
99
+ if (stats.commonMeasurements.has('extract control flow graph') && stats.controlFlowTimePerToken !== undefined) {
100
+ result += `
101
+ Control flow extraction: ${print(stats.commonMeasurements, 'extract control flow graph')}
102
+ Control flow extraction per token: ${formatNanoseconds(stats.controlFlowTimePerToken.normalized)}
103
+ Control flow extraction per R token: ${formatNanoseconds(stats.controlFlowTimePerToken.raw)}`;
104
+ }
105
+ if (stats.commonMeasurements.has('infer data frame shapes') && stats.dataFrameShapeTimePerToken !== undefined) {
106
+ result += `
107
+ Dataframe shape inference: ${print(stats.commonMeasurements, 'infer data frame shapes')}
108
+ Dataframe shape inference per token: ${formatNanoseconds(stats.dataFrameShapeTimePerToken.normalized)}
109
+ Dataframe shape inference per R token:${formatNanoseconds(stats.dataFrameShapeTimePerToken.raw)}`;
110
+ }
101
111
  if (stats.perSliceMeasurements.numberOfSlices > 0) {
102
112
  result += `
113
+
114
+ Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.perSliceMeasurements.numberOfSlices !== 1 ? 's' : ''}:
103
115
  Total: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'total')}
104
116
  Slice creation: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'static slicing')}
105
117
  Slice creation per token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.sliceTimePerToken.normalized)}
@@ -110,7 +122,7 @@ Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.pe
110
122
  Total per token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.normalized)}
111
123
  Total per R token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.raw)}
112
124
  Used Slice Criteria Sizes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceCriteriaSizes)}
113
- Result Slice Sizes:
125
+ Result Slice Sizes:
114
126
  Number of lines: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.lines)}
115
127
  Number of non-empty lines: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.nonEmptyLines)}
116
128
  Number of characters: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.characters)}
@@ -120,12 +132,12 @@ Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.pe
120
132
  Number of R tokens (w/o comments): ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.tokensNoComments)}
121
133
  Normalized R tokens: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokens)}
122
134
  Normalized R tokens (w/o comments): ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokensNoComments)}
123
- Number of dataflow nodes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.dataflowNodes)}
124
- `;
135
+ Number of dataflow nodes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.dataflowNodes)}`;
125
136
  }
126
- return `${result}
127
- Shell close: ${print(stats.commonMeasurements, 'close R session')}
128
- Total: ${print(stats.commonMeasurements, 'total')}
137
+ result += `
138
+
139
+ Shell close: ${print(stats.commonMeasurements, 'close R session')}
140
+ Total: ${print(stats.commonMeasurements, 'total')}
129
141
 
130
142
  Input:
131
143
  Number of lines: ${pad(stats.input.numberOfLines)}
@@ -148,9 +160,64 @@ Dataflow:
148
160
  Number of stored Env indices: ${pad(stats.dataflow.storedEnvIndices)}
149
161
  Number of overwritten indices: ${pad(stats.dataflow.overwrittenIndices)}
150
162
  Size of graph: ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`;
163
+ if (stats.dataFrameShape !== undefined) {
164
+ result += `
165
+
166
+ Dataframe shape inference:
167
+ Number of resulting constraints:${pad(stats.dataFrameShape.numberOfResultConstraints)}
168
+ Number of operation nodes: ${pad(stats.dataFrameShape.numberOfOperationNodes)}
169
+ Number of abstract value nodes: ${pad(stats.dataFrameShape.numberOfValueNodes)}
170
+ Number of entries per node: ${pad(stats.dataFrameShape.numberOfEntriesPerNode.mean)}
171
+ Number of operations: ${pad(stats.dataFrameShape.numberOfOperations)}
172
+ Number of total values: ${pad(stats.dataFrameShape.numberOfTotalValues)}
173
+ Number of total top: ${pad(stats.dataFrameShape.numberOfTotalTop)}
174
+ Inferred column names per node: ${pad(stats.dataFrameShape.inferredColNames.mean)}
175
+ Number of column names values: ${pad(stats.dataFrameShape.numberOfColNamesValues)}
176
+ Number of column names Top: ${pad(stats.dataFrameShape.numberOfColNamesTop)}
177
+ Inferred column count per node: ${pad(stats.dataFrameShape.inferredColCount.mean)}
178
+ Number of column count values: ${pad(stats.dataFrameShape.numberOfColCountValues)}
179
+ Number of column count Top: ${pad(stats.dataFrameShape.numberOfColCountTop)}
180
+ Number of column count infinite:${pad(stats.dataFrameShape.numberOfColCountInfinite)}
181
+ Inferred row count per node: ${pad(stats.dataFrameShape.inferredRowCount.mean)}
182
+ Number of row count values: ${pad(stats.dataFrameShape.numberOfRowCountValues)}
183
+ Number of row count Top: ${pad(stats.dataFrameShape.numberOfRowCountTop)}
184
+ Number of row count infinite: ${pad(stats.dataFrameShape.numberOfRowCountInfinite)}
185
+ Size of data frame shape info: ${convertNumberToNiceBytes(stats.dataFrameShape.sizeOfInfo)}`;
186
+ }
187
+ return result;
151
188
  }
152
189
  function ultimateStats2String(stats) {
153
- const slice = stats.totalSlices > 0 ? `Slice summary for:
190
+ let result = `
191
+ Summarized: ${stats.totalRequests} requests and ${stats.totalSlices} slices
192
+ Shell init time: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('initialize R session'))}
193
+ AST retrieval: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('retrieve AST from R code'))}
194
+ AST retrieval per token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.normalized)}
195
+ AST retrieval per R token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.raw)}
196
+ AST normalization: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('normalize R AST'))}
197
+ AST normalization per token: ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.normalized)}
198
+ AST normalization per R token: ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.raw)}
199
+ Dataflow creation: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('produce dataflow information'))}
200
+ Dataflow creation per token: ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.normalized)}
201
+ Dataflow creation per R token: ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.raw)}
202
+ Total common time per token: ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.normalized)}
203
+ Total common time per R token: ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.raw)}`;
204
+ if (stats.commonMeasurements.has('extract control flow graph') && stats.controlFlowTimePerToken !== undefined) {
205
+ result += `
206
+ Control flow extraction: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('extract control flow graph'))}
207
+ Control flow extraction per token: ${formatSummarizedTimeMeasure(stats.controlFlowTimePerToken.normalized)}
208
+ Control flow extraction per R token: ${formatSummarizedTimeMeasure(stats.controlFlowTimePerToken.raw)}`;
209
+ }
210
+ if (stats.commonMeasurements.has('infer data frame shapes') && stats.dataFrameShapeTimePerToken !== undefined) {
211
+ result += `
212
+ Dataframe shape inference: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('infer data frame shapes'))}
213
+ Dataframe shape inference per token: ${formatSummarizedTimeMeasure(stats.dataFrameShapeTimePerToken.normalized)}
214
+ Dataframe shape inference per R token:${formatSummarizedTimeMeasure(stats.dataFrameShapeTimePerToken.raw)}`;
215
+ }
216
+ // Used Slice Criteria Sizes: ${formatSummarizedMeasure(stats.perSliceMeasurements.sliceCriteriaSizes)}
217
+ if (stats.totalSlices > 0) {
218
+ result += `
219
+
220
+ Slicing summary for ${stats.totalSlices} slice${stats.totalSlices !== 1 ? 's' : ''}:
154
221
  Total: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('total'))}
155
222
  Slice creation: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('static slicing'))}
156
223
  Slice creation per token in slice: ${formatSummarizedTimeMeasure(stats.sliceTimePerToken.normalized)}
@@ -161,29 +228,14 @@ function ultimateStats2String(stats) {
161
228
  Total per token in slice: ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.normalized)}
162
229
  Total per R token in slice: ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.raw)}
163
230
  Failed to Re-Parse: ${pad(stats.failedToRepParse)}/${stats.totalSlices}
164
- Times hit Threshold: ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
231
+ Times hit Threshold: ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
165
232
  ${reduction2String('Reductions', stats.reduction)}
166
- ${reduction2String('Reductions without comments and empty lines', stats.reductionNoFluff)}` : 'No slices';
167
- // Used Slice Criteria Sizes: ${formatSummarizedMeasure(stats.perSliceMeasurements.sliceCriteriaSizes)}
168
- return `
169
- Summarized: ${stats.totalRequests} requests and ${stats.totalSlices} slices
170
- Shell init time: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('initialize R session'))}
171
- AST retrieval: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('retrieve AST from R code'))}
172
- AST retrieval per token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.normalized)}
173
- AST retrieval per R token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.raw)}
174
- AST normalization: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('normalize R AST'))}
175
- AST normalization per token: ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.normalized)}
176
- AST normalization per R token:${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.raw)}
177
- Dataflow creation: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('produce dataflow information'))}
178
- Dataflow creation per token: ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.normalized)}
179
- Dataflow creation per R token:${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.raw)}
180
- Total common time per token: ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.normalized)}
181
- Total common time per R token:${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.raw)}
182
-
183
- ${slice}
233
+ ${reduction2String('Reductions without comments and empty lines', stats.reductionNoFluff)}`;
234
+ }
235
+ result += `
184
236
 
185
- Shell close: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('close R session'))}
186
- Total: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('total'))}
237
+ Shell close: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('close R session'))}
238
+ Total: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('total'))}
187
239
 
188
240
  Input:
189
241
  Number of lines: ${formatSummarizedMeasure(stats.input.numberOfLines)}
@@ -205,12 +257,38 @@ Dataflow:
205
257
  Number of stored Vtx indices: ${formatSummarizedMeasure(stats.dataflow.storedVertexIndices)}
206
258
  Number of stored Env indices: ${formatSummarizedMeasure(stats.dataflow.storedEnvIndices)}
207
259
  Number of overwritten indices: ${formatSummarizedMeasure(stats.dataflow.overwrittenIndices)}
208
- Size of graph: ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
209
- `;
260
+ Size of graph: ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}`;
261
+ if (stats.dataFrameShape !== undefined) {
262
+ result += `
263
+
264
+ Dataframe shape inference:
265
+ Number of resulting constraints:${formatSummarizedMeasure(stats.dataFrameShape.numberOfResultConstraints)}
266
+ Number of operation nodes: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfOperationNodes)}
267
+ Number of abstract value nodes: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfValueNodes)}
268
+ Number of entries per node: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfEntriesPerNode)}
269
+ Number of operations: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfOperations)}
270
+ Number of total values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfTotalValues)}
271
+ Number of total top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfTotalTop)}
272
+ Inferred column names per node: ${formatSummarizedMeasure(stats.dataFrameShape.inferredColNames)}
273
+ Number of column names values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColNamesValues)}
274
+ Number of column names top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColNamesTop)}
275
+ Inferred column count per node: ${formatSummarizedMeasure(stats.dataFrameShape.inferredColCount)}
276
+ Number of column count exact: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountExact)}
277
+ Number of column count values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountValues)}
278
+ Number of column count top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountTop)}
279
+ Number of column count infinite:${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountInfinite)}
280
+ Inferred row count per node: ${formatSummarizedMeasure(stats.dataFrameShape.inferredRowCount)}
281
+ Number of row count exact: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountExact)}
282
+ Number of row count values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountValues)}
283
+ Number of row count top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountTop)}
284
+ Number of row count infinite: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountInfinite)}
285
+ Size of data frame shape info: ${formatSummarizedMeasure(stats.dataFrameShape.sizeOfInfo, convertNumberToNiceBytes)}`;
286
+ }
287
+ return result;
210
288
  }
211
289
  function reduction2String(title, reduction) {
212
290
  return `
213
- ${title} (reduced by x%):
291
+ ${title} (reduced by x%):
214
292
  Number of lines: ${formatSummarizedMeasure(reduction.numberOfLines, asPercentage)}
215
293
  Number of lines no auto: ${formatSummarizedMeasure(reduction.numberOfLinesNoAutoSelection, asPercentage)}
216
294
  Number of characters: ${formatSummarizedMeasure(reduction.numberOfCharacters, asPercentage)}
@@ -1,3 +1,10 @@
1
1
  import type { DataflowGraph } from '../../dataflow/graph/graph';
2
2
  /** Returns the size of the given df graph in bytes (without sharing in-memory) */
3
3
  export declare function getSizeOfDfGraph(df: DataflowGraph): number;
4
+ /**
5
+ * Calculates the size of an array in bytes.
6
+ *
7
+ * @param array - The array to calculate the size of.
8
+ * @returns The size of the array in bytes.
9
+ */
10
+ export declare function safeSizeOf<T>(array: T[]): number;
@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.getSizeOfDfGraph = getSizeOfDfGraph;
7
+ exports.safeSizeOf = safeSizeOf;
7
8
  const environment_1 = require("../../dataflow/environments/environment");
8
9
  const vertex_1 = require("../../dataflow/graph/vertex");
9
10
  const identifier_1 = require("../../dataflow/environments/identifier");
@@ -4,7 +4,10 @@ import type { ReconstructionResult } from '../../reconstruct/reconstruct';
4
4
  import type { RParseRequestFromFile, RParseRequestFromText } from '../../r-bridge/retriever';
5
5
  import type { TimePerToken } from '../summarizer/data';
6
6
  import type { MergeableRecord } from '../../util/objects';
7
- export declare const CommonSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total"];
7
+ import type { DataFrameOperationName } from '../../abstract-interpretation/data-frame/semantics';
8
+ export declare const RequiredSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total"];
9
+ export declare const OptionalSlicerMeasurements: readonly ["extract control flow graph", "infer data frame shapes"];
10
+ export declare const CommonSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total", "extract control flow graph", "infer data frame shapes"];
8
11
  export type CommonSlicerMeasurements = typeof CommonSlicerMeasurements[number];
9
12
  export declare const PerSliceMeasurements: readonly ["static slicing", "reconstruct code", "total"];
10
13
  export type PerSliceMeasurements = typeof PerSliceMeasurements[number];
@@ -41,6 +44,29 @@ export interface SlicerStatsDataflow<T = number> {
41
44
  storedEnvIndices: T;
42
45
  overwrittenIndices: T;
43
46
  }
47
+ export interface SlicerStatsDfShape<T = number> {
48
+ numberOfDataFrameFiles: T extends number ? 0 | 1 : number;
49
+ numberOfNonDataFrameFiles: T extends number ? 0 | 1 : number;
50
+ numberOfResultConstraints: T;
51
+ numberOfResultingValues: T;
52
+ numberOfResultingTop: T;
53
+ numberOfResultingBottom: T;
54
+ numberOfEmptyNodes: T;
55
+ numberOfOperationNodes: T;
56
+ numberOfValueNodes: T;
57
+ sizeOfInfo: T;
58
+ perNodeStats: Map<NodeId, PerNodeStatsDfShape<T>>;
59
+ }
60
+ export interface PerNodeStatsDfShape<T = number> {
61
+ numberOfEntries: T;
62
+ mappedOperations?: DataFrameOperationName[];
63
+ inferredColNames?: T | 'top';
64
+ inferredColCount?: T | 'bottom' | 'infinite' | 'top';
65
+ inferredRowCount?: T | 'bottom' | 'infinite' | 'top';
66
+ /** difference between upper and lower bound of interval domain (to estimate approximation) */
67
+ approxRangeColCount?: T;
68
+ approxRangeRowCount?: T;
69
+ }
44
70
  /**
45
71
  * Please note, that these measurement can be negative as there is no guarantee that the memory usage will increase
46
72
  * due to, e.g., garbage collection.
@@ -61,8 +87,11 @@ export interface SlicerStats {
61
87
  request: RParseRequestFromFile | RParseRequestFromText;
62
88
  input: SlicerStatsInput;
63
89
  dataflow: SlicerStatsDataflow;
90
+ dataFrameShape?: SlicerStatsDfShape;
64
91
  retrieveTimePerToken: TimePerToken<number>;
65
92
  normalizeTimePerToken: TimePerToken<number>;
66
93
  dataflowTimePerToken: TimePerToken<number>;
67
94
  totalCommonTimePerToken: TimePerToken<number>;
95
+ controlFlowTimePerToken?: TimePerToken<number>;
96
+ dataFrameShapeTimePerToken?: TimePerToken<number>;
68
97
  }
@@ -1,6 +1,8 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.PerSliceMeasurements = exports.CommonSlicerMeasurements = void 0;
4
- exports.CommonSlicerMeasurements = ['initialize R session', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'close R session', 'total'];
3
+ exports.PerSliceMeasurements = exports.CommonSlicerMeasurements = exports.OptionalSlicerMeasurements = exports.RequiredSlicerMeasurements = void 0;
4
+ exports.RequiredSlicerMeasurements = ['initialize R session', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'close R session', 'total'];
5
+ exports.OptionalSlicerMeasurements = ['extract control flow graph', 'infer data frame shapes'];
6
+ exports.CommonSlicerMeasurements = [...exports.RequiredSlicerMeasurements, ...exports.OptionalSlicerMeasurements];
5
7
  exports.PerSliceMeasurements = ['static slicing', 'reconstruct code', 'total'];
6
8
  //# sourceMappingURL=stats.js.map
@@ -1,5 +1,6 @@
1
+ import type { DataFrameOperationName } from '../../abstract-interpretation/data-frame/semantics';
1
2
  import type { SummarizedMeasurement } from '../../util/summarizer';
2
- import type { CommonSlicerMeasurements, PerSliceMeasurements, SlicerStats, SlicerStatsDataflow, SlicerStatsInput } from '../stats/stats';
3
+ import type { CommonSlicerMeasurements, PerSliceMeasurements, SlicerStats, SlicerStatsDfShape, SlicerStatsDataflow, SlicerStatsInput } from '../stats/stats';
3
4
  export interface SliceSizeCollection {
4
5
  lines: number[];
5
6
  nonEmptyLines: number[];
@@ -21,7 +22,8 @@ export interface SliceSizeCollection {
21
22
  */
22
23
  export type SummarizedSlicerStats = {
23
24
  perSliceMeasurements: SummarizedPerSliceStats;
24
- } & Omit<SlicerStats, 'perSliceMeasurements'>;
25
+ dataFrameShape?: SummarizedDfShapeStats;
26
+ } & Omit<SlicerStats, 'perSliceMeasurements' | 'dataFrameShape'>;
25
27
  export interface Reduction<T = number> {
26
28
  numberOfLines: T;
27
29
  numberOfLinesNoAutoSelection: T;
@@ -62,6 +64,8 @@ export interface UltimateSlicerStats {
62
64
  normalizeTimePerToken: TimePerToken;
63
65
  dataflowTimePerToken: TimePerToken;
64
66
  totalCommonTimePerToken: TimePerToken;
67
+ controlFlowTimePerToken?: TimePerToken;
68
+ dataFrameShapeTimePerToken?: TimePerToken;
65
69
  sliceTimePerToken: TimePerToken;
66
70
  reconstructTimePerToken: TimePerToken;
67
71
  totalPerSliceTimePerToken: TimePerToken;
@@ -74,4 +78,31 @@ export interface UltimateSlicerStats {
74
78
  reductionNoFluff: Reduction<SummarizedMeasurement>;
75
79
  input: SlicerStatsInput<SummarizedMeasurement>;
76
80
  dataflow: SlicerStatsDataflow<SummarizedMeasurement>;
81
+ dataFrameShape?: SummarizedDfShapeStats<SummarizedMeasurement>;
82
+ }
83
+ export interface SummarizedDfShapeStats<T = number> extends Omit<SlicerStatsDfShape<T>, 'perNodeStats'> {
84
+ numberOfEntriesPerNode: SummarizedMeasurement;
85
+ numberOfOperations: T;
86
+ numberOfTotalValues: T;
87
+ numberOfTotalTop: T;
88
+ numberOfTotalBottom: T;
89
+ inferredColNames: SummarizedMeasurement;
90
+ numberOfColNamesValues: T;
91
+ numberOfColNamesTop: T;
92
+ numberOfColNamesBottom: T;
93
+ inferredColCount: SummarizedMeasurement;
94
+ numberOfColCountExact: T;
95
+ numberOfColCountValues: T;
96
+ numberOfColCountTop: T;
97
+ numberOfColCountInfinite: T;
98
+ numberOfColCountBottom: T;
99
+ approxRangeColCount: SummarizedMeasurement;
100
+ inferredRowCount: SummarizedMeasurement;
101
+ numberOfRowCountExact: T;
102
+ numberOfRowCountValues: T;
103
+ numberOfRowCountTop: T;
104
+ numberOfRowCountInfinite: T;
105
+ numberOfRowCountBottom: T;
106
+ approxRangeRowCount: SummarizedMeasurement;
107
+ perOperationNumber: Map<DataFrameOperationName, T>;
77
108
  }
@@ -28,7 +28,11 @@ async function processRunMeasurement(line, fileNum, lineNum, textOutputAppendPat
28
28
  return [k, BigInt(v.slice(0, -1))];
29
29
  })),
30
30
  perSliceMeasurements: new Map(got.stats.perSliceMeasurements
31
- .map(([k, v]) => mapPerSliceStats(k, v)))
31
+ .map(([k, v]) => mapPerSliceStats(k, v))),
32
+ dataFrameShape: got.stats.dataFrameShape !== undefined ? {
33
+ ...got.stats.dataFrameShape,
34
+ perNodeStats: new Map(got.stats.dataFrameShape.perNodeStats)
35
+ } : undefined
32
36
  }
33
37
  };
34
38
  const totalSlices = got.stats.perSliceMeasurements.size;
@@ -2,11 +2,12 @@ import type { Reduction, SummarizedSlicerStats, TimePerToken } from '../data';
2
2
  import type { SummarizedMeasurement } from '../../../util/summarizer';
3
3
  import type { PerSliceStats, SlicerStats } from '../../stats/stats';
4
4
  import type { SlicingCriteria } from '../../../slicing/criterion/parse';
5
+ import type { RShellEngineConfig } from '../../../config';
5
6
  /**
6
7
  * Summarizes the given stats by calculating the min, max, median, mean, and the standard deviation for each measurement.
7
8
  * @see Slicer
8
9
  */
9
- export declare function summarizeSlicerStats(stats: SlicerStats, report?: (criteria: SlicingCriteria, stats: PerSliceStats) => void): Promise<Readonly<SummarizedSlicerStats>>;
10
+ export declare function summarizeSlicerStats(stats: SlicerStats, report?: (criteria: SlicingCriteria, stats: PerSliceStats) => void, engineConf?: RShellEngineConfig): Promise<Readonly<SummarizedSlicerStats>>;
10
11
  export declare function summarizeSummarizedMeasurement(data: SummarizedMeasurement[]): SummarizedMeasurement;
11
12
  export declare function summarizeSummarizedReductions(reductions: Reduction<SummarizedMeasurement>[]): Reduction<SummarizedMeasurement>;
12
13
  export declare function summarizeSummarizedTimePerToken(times: TimePerToken[]): TimePerToken;