@eagleoutice/flowr 2.2.10 → 2.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/README.md +4 -4
  2. package/benchmark/slicer.d.ts +49 -22
  3. package/benchmark/slicer.js +88 -28
  4. package/benchmark/stats/print.js +16 -10
  5. package/benchmark/stats/size-of.js +18 -1
  6. package/benchmark/stats/stats.d.ts +3 -0
  7. package/benchmark/summarizer/second-phase/process.js +8 -2
  8. package/cli/benchmark-app.d.ts +5 -0
  9. package/cli/benchmark-app.js +49 -6
  10. package/cli/benchmark-helper-app.d.ts +4 -0
  11. package/cli/benchmark-helper-app.js +20 -4
  12. package/cli/common/options.js +13 -4
  13. package/cli/repl/commands/repl-commands.js +2 -0
  14. package/cli/repl/commands/repl-dataflow.d.ts +2 -0
  15. package/cli/repl/commands/repl-dataflow.js +35 -1
  16. package/cli/repl/server/compact.d.ts +2 -2
  17. package/cli/repl/server/compact.js +3 -3
  18. package/cli/repl/server/messages/message-analysis.d.ts +2 -2
  19. package/cli/repl/server/messages/message-analysis.js +2 -2
  20. package/config.d.ts +27 -2
  21. package/config.js +30 -4
  22. package/dataflow/environments/built-in-config.d.ts +5 -2
  23. package/dataflow/environments/built-in-config.js +8 -2
  24. package/dataflow/environments/built-in.d.ts +8 -1
  25. package/dataflow/environments/built-in.js +8 -1
  26. package/dataflow/environments/clone.d.ts +5 -0
  27. package/dataflow/environments/clone.js +5 -0
  28. package/dataflow/environments/default-builtin-config.js +96 -10
  29. package/dataflow/environments/define.d.ts +5 -1
  30. package/dataflow/environments/define.js +36 -10
  31. package/dataflow/environments/environment.js +4 -2
  32. package/dataflow/environments/overwrite.js +4 -0
  33. package/dataflow/environments/remove.d.ts +6 -0
  34. package/dataflow/environments/remove.js +24 -0
  35. package/dataflow/environments/resolve-by-name.js +1 -1
  36. package/dataflow/extractor.d.ts +1 -1
  37. package/dataflow/extractor.js +8 -6
  38. package/dataflow/graph/dataflowgraph-builder.d.ts +76 -6
  39. package/dataflow/graph/dataflowgraph-builder.js +102 -6
  40. package/dataflow/graph/edge.js +4 -1
  41. package/dataflow/graph/graph.d.ts +12 -1
  42. package/dataflow/graph/graph.js +37 -0
  43. package/dataflow/graph/vertex.d.ts +42 -2
  44. package/dataflow/graph/vertex.js +32 -0
  45. package/dataflow/internal/linker.js +3 -1
  46. package/dataflow/internal/process/functions/call/argument/unpack-argument.d.ts +3 -0
  47. package/dataflow/internal/process/functions/call/argument/unpack-argument.js +4 -10
  48. package/dataflow/internal/process/functions/call/built-in/built-in-access.d.ts +1 -0
  49. package/dataflow/internal/process/functions/call/built-in/built-in-access.js +55 -45
  50. package/dataflow/internal/process/functions/call/built-in/built-in-apply.d.ts +6 -4
  51. package/dataflow/internal/process/functions/call/built-in/built-in-apply.js +27 -8
  52. package/dataflow/internal/process/functions/call/built-in/built-in-assignment.js +37 -7
  53. package/dataflow/internal/process/functions/call/built-in/built-in-eval.d.ts +10 -0
  54. package/dataflow/internal/process/functions/call/built-in/built-in-eval.js +140 -0
  55. package/dataflow/internal/process/functions/call/built-in/built-in-expression-list.js +4 -3
  56. package/dataflow/internal/process/functions/call/built-in/built-in-list.js +51 -17
  57. package/dataflow/internal/process/functions/call/built-in/built-in-pipe.js +21 -3
  58. package/dataflow/internal/process/functions/call/built-in/built-in-replacement.d.ts +3 -0
  59. package/dataflow/internal/process/functions/call/built-in/built-in-replacement.js +83 -29
  60. package/dataflow/internal/process/functions/call/built-in/built-in-rm.d.ts +7 -0
  61. package/dataflow/internal/process/functions/call/built-in/built-in-rm.js +41 -0
  62. package/dataflow/internal/process/functions/call/built-in/built-in-source.js +20 -6
  63. package/dataflow/internal/process/functions/call/built-in/built-in-vector.d.ts +15 -0
  64. package/dataflow/internal/process/functions/call/built-in/built-in-vector.js +75 -0
  65. package/dataflow/internal/process/functions/call/common.d.ts +1 -1
  66. package/dataflow/internal/process/functions/call/common.js +4 -2
  67. package/dataflow/internal/process/functions/call/named-call-handling.d.ts +2 -0
  68. package/dataflow/internal/process/functions/call/named-call-handling.js +9 -5
  69. package/dataflow/internal/process/process-named-call.d.ts +3 -0
  70. package/dataflow/internal/process/process-named-call.js +3 -0
  71. package/dataflow/processor.d.ts +7 -7
  72. package/documentation/data/server/doc-data-server-messages.js +2 -2
  73. package/documentation/doc-util/doc-cfg.d.ts +11 -2
  74. package/documentation/doc-util/doc-cfg.js +35 -6
  75. package/documentation/doc-util/doc-code.js +10 -2
  76. package/documentation/print-capabilities-markdown.js +1 -1
  77. package/documentation/print-cfg-wiki.d.ts +1 -0
  78. package/documentation/print-cfg-wiki.js +84 -0
  79. package/documentation/print-core-wiki.js +2 -2
  80. package/documentation/print-interface-wiki.js +4 -0
  81. package/documentation/print-query-wiki.js +22 -3
  82. package/package.json +4 -3
  83. package/queries/catalog/call-context-query/call-context-query-executor.js +13 -0
  84. package/queries/catalog/call-context-query/call-context-query-format.d.ts +4 -0
  85. package/queries/catalog/call-context-query/call-context-query-format.js +1 -0
  86. package/queries/catalog/call-context-query/identify-link-to-last-call-relation.js +1 -1
  87. package/queries/catalog/dependencies-query/dependencies-query-executor.js +13 -5
  88. package/queries/catalog/dependencies-query/dependencies-query-format.d.ts +1 -25
  89. package/queries/catalog/dependencies-query/dependencies-query-format.js +2 -145
  90. package/queries/catalog/dependencies-query/function-info/function-info.d.ts +24 -0
  91. package/queries/catalog/dependencies-query/function-info/function-info.js +10 -0
  92. package/queries/catalog/dependencies-query/function-info/library-functions.d.ts +2 -0
  93. package/queries/catalog/dependencies-query/function-info/library-functions.js +18 -0
  94. package/queries/catalog/dependencies-query/function-info/read-functions.d.ts +2 -0
  95. package/queries/catalog/dependencies-query/function-info/read-functions.js +101 -0
  96. package/queries/catalog/dependencies-query/function-info/source-functions.d.ts +2 -0
  97. package/queries/catalog/dependencies-query/function-info/source-functions.js +11 -0
  98. package/queries/catalog/dependencies-query/function-info/write-functions.d.ts +2 -0
  99. package/queries/catalog/dependencies-query/function-info/write-functions.js +87 -0
  100. package/queries/catalog/location-map-query/location-map-query-executor.d.ts +1 -1
  101. package/queries/catalog/location-map-query/location-map-query-executor.js +38 -3
  102. package/queries/catalog/location-map-query/location-map-query-format.d.ts +10 -1
  103. package/queries/catalog/location-map-query/location-map-query-format.js +5 -1
  104. package/queries/catalog/project-query/project-query-executor.d.ts +3 -0
  105. package/queries/catalog/project-query/project-query-executor.js +17 -0
  106. package/queries/catalog/project-query/project-query-format.d.ts +67 -0
  107. package/queries/catalog/project-query/project-query-format.js +26 -0
  108. package/queries/query.d.ts +60 -1
  109. package/queries/query.js +3 -1
  110. package/r-bridge/data/data.d.ts +2 -2
  111. package/r-bridge/data/data.js +2 -2
  112. package/slicing/static/fingerprint.js +8 -1
  113. package/slicing/static/slice-call.d.ts +1 -1
  114. package/slicing/static/slice-call.js +5 -16
  115. package/slicing/static/slicer-types.d.ts +2 -0
  116. package/slicing/static/static-slicer.d.ts +4 -2
  117. package/slicing/static/static-slicer.js +24 -18
  118. package/slicing/static/visiting-queue.d.ts +7 -1
  119. package/slicing/static/visiting-queue.js +20 -6
  120. package/util/arrays.d.ts +23 -0
  121. package/util/arrays.js +41 -0
  122. package/util/cfg/visitor.d.ts +1 -1
  123. package/util/cfg/visitor.js +2 -2
  124. package/util/{list-access.d.ts → containers.d.ts} +24 -4
  125. package/util/{list-access.js → containers.js} +42 -12
  126. package/util/mermaid/ast.js +12 -1
  127. package/util/mermaid/cfg.js +2 -2
  128. package/util/parallel.d.ts +2 -1
  129. package/util/parallel.js +11 -2
  130. package/util/prefix.d.ts +13 -0
  131. package/util/prefix.js +34 -0
  132. package/util/version.js +1 -1
package/README.md CHANGED
@@ -48,7 +48,7 @@ It offers a wide variety of features, for example:
48
48
 
49
49
  ```shell
50
50
  $ docker run -it --rm eagleoutice/flowr # or npm run flowr
51
- flowR repl using flowR v2.2.9, R v4.4.0 (r-shell engine)
51
+ flowR repl using flowR v2.2.11, R v4.4.3 (r-shell engine)
52
52
  R> :slicer test/testfiles/example.R --criterion "11@sum"
53
53
  ```
54
54
 
@@ -95,7 +95,7 @@ It offers a wide variety of features, for example:
95
95
 
96
96
 
97
97
  * 🚀 **fast data- and control-flow graphs**\
98
- Within just <i><span title="This measurement is automatically fetched from the latest benchmark!">122.2 ms</span></i> (as of Feb 21, 2025),
98
+ Within just <i><span title="This measurement is automatically fetched from the latest benchmark!">117.9 ms</span></i> (as of Mar 2, 2025),
99
99
  _flowR_ can analyze the data- and control-flow of the average real-world R script. See the [benchmarks](https://flowr-analysis.github.io/flowr/wiki/stats/benchmark) for more information,
100
100
  and consult the [wiki pages](https://github.com/flowr-analysis/flowr/wiki/Dataflow-Graph) for more details on the dataflow graph.
101
101
 
@@ -131,7 +131,7 @@ It offers a wide variety of features, for example:
131
131
 
132
132
  ```shell
133
133
  $ docker run -it --rm eagleoutice/flowr # or npm run flowr
134
- flowR repl using flowR v2.2.9, R v4.4.0 (r-shell engine)
134
+ flowR repl using flowR v2.2.11, R v4.4.3 (r-shell engine)
135
135
  R> :dataflow* test/testfiles/example.R
136
136
  ```
137
137
 
@@ -377,7 +377,7 @@ It offers a wide variety of features, for example:
377
377
  ```
378
378
 
379
379
 
380
- (The analysis required _21.60 ms_ (including parse and normalize, using the [r-shell](https://github.com/flowr-analysis/flowr/wiki/Engines) engine) within the generation environment.)
380
+ (The analysis required _22.14 ms_ (including parse and normalize, using the [r-shell](https://github.com/flowr-analysis/flowr/wiki/Engines) engine) within the generation environment.)
381
381
 
382
382
 
383
383
 
@@ -43,6 +43,13 @@ export interface BenchmarkSingleSliceStats extends MergeableRecord {
43
43
  /** the final code, as the result of the 'reconstruct' step */
44
44
  code: ReconstructionResult;
45
45
  }
46
+ /**
47
+ * The type of sampling strategy to use when slicing all possible variables.
48
+ *
49
+ * - `'random'`: Randomly select the given number of slicing criteria.
50
+ * - `'equidistant'`: Select the given number of slicing criteria in an equidistant manner.
51
+ */
52
+ export type SamplingStrategy = 'random' | 'equidistant';
46
53
  export declare class BenchmarkSlicer {
47
54
  /** Measures all data recorded *once* per slicer (complete setup up to the dataflow graph creation) */
48
55
  private readonly commonMeasurements;
@@ -62,38 +69,58 @@ export declare class BenchmarkSlicer {
62
69
  * Initialize the slicer on the given request.
63
70
  * Can only be called once for each instance.
64
71
  */
65
- init(request: RParseRequestFromFile | RParseRequestFromText, autoSelectIf?: AutoSelectPredicate): Promise<void>;
72
+ init(request: RParseRequestFromFile | RParseRequestFromText, autoSelectIf?: AutoSelectPredicate, threshold?: number): Promise<void>;
66
73
  private calculateStatsAfterInit;
67
74
  /**
68
- * Slice for the given {@link SlicingCriteria}.
69
- * @see SingleSlicingCriterion
70
- *
71
- * @returns The per slice stats retrieved for this slicing criteria
72
- */
75
+ * Counts the number of stored indices in the dataflow graph created by the pointer analysis.
76
+ */
77
+ private countStoredVertexIndices;
78
+ /**
79
+ * Counts the number of stored indices in the dataflow graph created by the pointer analysis.
80
+ */
81
+ private countStoredEnvIndices;
82
+ /**
83
+ * Counts the number of stored indices in the passed definitions.
84
+ */
85
+ private countStoredIndices;
86
+ /**
87
+ * Recursively counts the number of indices and sub-indices in the given collection.
88
+ */
89
+ private countIndices;
90
+ /**
91
+ * Slice for the given {@link SlicingCriteria}.
92
+ * @see SingleSlicingCriterion
93
+ *
94
+ * @returns The per slice stats retrieved for this slicing criteria
95
+ */
73
96
  slice(...slicingCriteria: SlicingCriteria): Promise<BenchmarkSingleSliceStats>;
74
97
  /** Bridging the gap between the new internal and the old names for the benchmarking */
75
98
  private measureCommonStep;
76
99
  private measureSliceStep;
77
100
  private guardActive;
78
101
  /**
79
- * Call {@link slice} for all slicing criteria that match the given filter.
80
- * See {@link collectAllSlicingCriteria} for details.
81
- * <p>
82
- * the `report` function will be called *before* each *individual* slice is performed.
83
- *
84
- * @returns The number of slices that were produced
85
- *
86
- * @see collectAllSlicingCriteria
87
- * @see SlicingCriteriaFilter
88
- */
89
- sliceForAll(filter: SlicingCriteriaFilter, report?: (current: number, total: number, allCriteria: SlicingCriteria[]) => void, sampleRandom?: number): Promise<number>;
102
+ * Call {@link slice} for all slicing criteria that match the given filter.
103
+ * See {@link collectAllSlicingCriteria} for details.
104
+ * <p>
105
+ * the `report` function will be called *before* each *individual* slice is performed.
106
+ *
107
+ * @returns The number of slices that were produced
108
+ *
109
+ * @see collectAllSlicingCriteria
110
+ * @see SlicingCriteriaFilter
111
+ */
112
+ sliceForAll(filter: SlicingCriteriaFilter, report?: (current: number, total: number, allCriteria: SlicingCriteria[]) => void, options?: {
113
+ sampleCount?: number;
114
+ maxSliceCount?: number;
115
+ sampleStrategy?: SamplingStrategy;
116
+ }): Promise<number>;
90
117
  /**
91
- * Retrieves the final stats and closes the shell session.
92
- * Can be called multiple times to retrieve the stored stats, but will only close the session once (the first time).
93
- */
118
+ * Retrieves the final stats and closes the shell session.
119
+ * Can be called multiple times to retrieve the stored stats, but will only close the session once (the first time).
120
+ */
94
121
  finish(): BenchmarkSlicerStats;
95
122
  /**
96
- * Only call in case of an error - if the session must be closed and the benchmark itself is to be considered failed/dead.
97
- */
123
+ * Only call in case of an error - if the session must be closed and the benchmark itself is to be considered failed/dead.
124
+ */
98
125
  ensureSessionClosed(): void;
99
126
  }
@@ -22,6 +22,8 @@ const size_of_1 = require("./stats/size-of");
22
22
  const shell_1 = require("../r-bridge/shell");
23
23
  const tree_sitter_types_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitter-types");
24
24
  const tree_sitter_executor_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitter-executor");
25
+ const vertex_1 = require("../dataflow/graph/vertex");
26
+ const arrays_1 = require("../util/arrays");
25
27
  /**
26
28
  * The logger to be used for benchmarking as a global object.
27
29
  */
@@ -49,7 +51,7 @@ class BenchmarkSlicer {
49
51
  * Initialize the slicer on the given request.
50
52
  * Can only be called once for each instance.
51
53
  */
52
- async init(request, autoSelectIf) {
54
+ async init(request, autoSelectIf, threshold) {
53
55
  (0, assert_1.guard)(this.stats === undefined, 'cannot initialize the slicer twice');
54
56
  // we know these are in sync so we just cast to one of them
55
57
  this.parser = await this.commonMeasurements.measure('initialize R session', async () => {
@@ -64,7 +66,8 @@ class BenchmarkSlicer {
64
66
  this.executor = (0, default_pipelines_1.createSlicePipeline)(this.parser, {
65
67
  request: { ...request },
66
68
  criterion: [],
67
- autoSelectIf
69
+ autoSelectIf,
70
+ threshold,
68
71
  });
69
72
  this.loadedXml = (await this.measureCommonStep('parse', 'retrieve AST from R code')).parsed;
70
73
  this.normalizedAst = await this.measureCommonStep('normalize', 'normalize R AST');
@@ -127,6 +130,9 @@ class BenchmarkSlicer {
127
130
  }
128
131
  return false;
129
132
  });
133
+ const storedVertexIndices = this.countStoredVertexIndices();
134
+ const storedEnvIndices = this.countStoredEnvIndices();
135
+ const overwrittenIndices = storedVertexIndices - storedEnvIndices;
130
136
  const split = loadedContent.split('\n');
131
137
  const nonWhitespace = (0, strings_1.withoutWhitespace)(loadedContent).length;
132
138
  this.stats = {
@@ -150,7 +156,10 @@ class BenchmarkSlicer {
150
156
  numberOfEdges: numberOfEdges,
151
157
  numberOfCalls: numberOfCalls,
152
158
  numberOfFunctionDefinitions: numberOfDefinitions,
153
- sizeOfObject: (0, size_of_1.getSizeOfDfGraph)(this.dataflow.graph)
159
+ sizeOfObject: (0, size_of_1.getSizeOfDfGraph)(this.dataflow.graph),
160
+ storedVertexIndices: storedVertexIndices,
161
+ storedEnvIndices: storedEnvIndices,
162
+ overwrittenIndices: overwrittenIndices,
154
163
  },
155
164
  // these are all properly initialized in finish()
156
165
  commonMeasurements: new Map(),
@@ -161,11 +170,52 @@ class BenchmarkSlicer {
161
170
  };
162
171
  }
163
172
  /**
164
- * Slice for the given {@link SlicingCriteria}.
165
- * @see SingleSlicingCriterion
166
- *
167
- * @returns The per slice stats retrieved for this slicing criteria
168
- */
173
+ * Counts the number of stored indices in the dataflow graph created by the pointer analysis.
174
+ */
175
+ countStoredVertexIndices() {
176
+ return this.countStoredIndices(this.dataflow?.out.map(ref => ref) ?? []);
177
+ }
178
+ /**
179
+ * Counts the number of stored indices in the dataflow graph created by the pointer analysis.
180
+ */
181
+ countStoredEnvIndices() {
182
+ return this.countStoredIndices(this.dataflow?.environment.current.memory.values()
183
+ ?.flatMap(def => def)
184
+ .map(def => def) ?? []);
185
+ }
186
+ /**
187
+ * Counts the number of stored indices in the passed definitions.
188
+ */
189
+ countStoredIndices(definitions) {
190
+ let numberOfIndices = 0;
191
+ for (const reference of definitions) {
192
+ if (reference.indicesCollection) {
193
+ numberOfIndices += this.countIndices(reference.indicesCollection);
194
+ }
195
+ }
196
+ return numberOfIndices;
197
+ }
198
+ /**
199
+ * Recursively counts the number of indices and sub-indices in the given collection.
200
+ */
201
+ countIndices(collection) {
202
+ let numberOfIndices = 0;
203
+ for (const indices of collection ?? []) {
204
+ for (const index of indices.indices) {
205
+ numberOfIndices++;
206
+ if ((0, vertex_1.isParentContainerIndex)(index)) {
207
+ numberOfIndices += this.countIndices(index.subIndices);
208
+ }
209
+ }
210
+ }
211
+ return numberOfIndices;
212
+ }
213
+ /**
214
+ * Slice for the given {@link SlicingCriteria}.
215
+ * @see SingleSlicingCriterion
216
+ *
217
+ * @returns The per slice stats retrieved for this slicing criteria
218
+ */
169
219
  async slice(...slicingCriteria) {
170
220
  exports.benchmarkLogger.trace(`try to slice for criteria ${JSON.stringify(slicingCriteria)}`);
171
221
  this.guardActive();
@@ -227,23 +277,33 @@ class BenchmarkSlicer {
227
277
  (0, assert_1.guard)(this.stats !== undefined && !this.finished, 'need to call init before, and can not do after finish!');
228
278
  }
229
279
  /**
230
- * Call {@link slice} for all slicing criteria that match the given filter.
231
- * See {@link collectAllSlicingCriteria} for details.
232
- * <p>
233
- * the `report` function will be called *before* each *individual* slice is performed.
234
- *
235
- * @returns The number of slices that were produced
236
- *
237
- * @see collectAllSlicingCriteria
238
- * @see SlicingCriteriaFilter
239
- */
240
- async sliceForAll(filter, report = () => { }, sampleRandom = -1) {
280
+ * Call {@link slice} for all slicing criteria that match the given filter.
281
+ * See {@link collectAllSlicingCriteria} for details.
282
+ * <p>
283
+ * the `report` function will be called *before* each *individual* slice is performed.
284
+ *
285
+ * @returns The number of slices that were produced
286
+ *
287
+ * @see collectAllSlicingCriteria
288
+ * @see SlicingCriteriaFilter
289
+ */
290
+ async sliceForAll(filter, report = () => { }, options = {}) {
291
+ const { sampleCount, maxSliceCount, sampleStrategy } = { sampleCount: -1, maxSliceCount: -1, sampleStrategy: 'random', ...options };
241
292
  this.guardActive();
242
293
  let count = 0;
243
- const allCriteria = [...(0, collect_all_1.collectAllSlicingCriteria)(this.normalizedAst.ast, filter)];
244
- if (sampleRandom > 0) {
245
- allCriteria.sort(() => Math.random() - 0.5);
246
- allCriteria.length = Math.min(allCriteria.length, sampleRandom);
294
+ let allCriteria = [...(0, collect_all_1.collectAllSlicingCriteria)(this.normalizedAst.ast, filter)];
295
+ // Cancel slicing if the number of slices exceeds the limit
296
+ if (maxSliceCount > 0 && allCriteria.length > maxSliceCount) {
297
+ return -allCriteria.length;
298
+ }
299
+ if (sampleCount > 0) {
300
+ if (sampleStrategy === 'equidistant') {
301
+ allCriteria = (0, arrays_1.equidistantSampling)(allCriteria, sampleCount, 'ceil');
302
+ }
303
+ else {
304
+ allCriteria.sort(() => Math.random() - 0.5);
305
+ allCriteria.length = Math.min(allCriteria.length, sampleCount);
306
+ }
247
307
  }
248
308
  for (const slicingCriteria of allCriteria) {
249
309
  report(count, allCriteria.length, allCriteria);
@@ -253,9 +313,9 @@ class BenchmarkSlicer {
253
313
  return count;
254
314
  }
255
315
  /**
256
- * Retrieves the final stats and closes the shell session.
257
- * Can be called multiple times to retrieve the stored stats, but will only close the session once (the first time).
258
- */
316
+ * Retrieves the final stats and closes the shell session.
317
+ * Can be called multiple times to retrieve the stored stats, but will only close the session once (the first time).
318
+ */
259
319
  finish() {
260
320
  (0, assert_1.guard)(this.stats !== undefined, 'need to call init before finish');
261
321
  if (!this.finished) {
@@ -291,8 +351,8 @@ class BenchmarkSlicer {
291
351
  };
292
352
  }
293
353
  /**
294
- * Only call in case of an error - if the session must be closed and the benchmark itself is to be considered failed/dead.
295
- */
354
+ * Only call in case of an error - if the session must be closed and the benchmark itself is to be considered failed/dead.
355
+ */
296
356
  ensureSessionClosed() {
297
357
  this.parser?.close();
298
358
  }
@@ -140,11 +140,14 @@ Input:
140
140
  Normalized R tokens (w/o comments): ${pad(stats.input.numberOfNormalizedTokensNoComments)}
141
141
 
142
142
  Dataflow:
143
- Number of nodes: ${pad(stats.dataflow.numberOfNodes)}
144
- Number of edges: ${pad(stats.dataflow.numberOfEdges)}
145
- Number of calls: ${pad(stats.dataflow.numberOfCalls)}
146
- Number of function defs: ${pad(stats.dataflow.numberOfFunctionDefinitions)}
147
- Size of graph: ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`;
143
+ Number of nodes: ${pad(stats.dataflow.numberOfNodes)}
144
+ Number of edges: ${pad(stats.dataflow.numberOfEdges)}
145
+ Number of calls: ${pad(stats.dataflow.numberOfCalls)}
146
+ Number of function defs: ${pad(stats.dataflow.numberOfFunctionDefinitions)}
147
+ Number of stored Vtx indices: ${pad(stats.dataflow.storedVertexIndices)}
148
+ Number of stored Env indices: ${pad(stats.dataflow.storedEnvIndices)}
149
+ Number of overwritten indices: ${pad(stats.dataflow.overwrittenIndices)}
150
+ Size of graph: ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`;
148
151
  }
149
152
  function ultimateStats2String(stats) {
150
153
  const slice = stats.totalSlices > 0 ? `Slice summary for:
@@ -195,11 +198,14 @@ Input:
195
198
  Normalized R tokens (w/o comments): ${formatSummarizedMeasure(stats.input.numberOfNormalizedTokensNoComments)}
196
199
 
197
200
  Dataflow:
198
- Number of nodes: ${formatSummarizedMeasure(stats.dataflow.numberOfNodes)}
199
- Number of edges: ${formatSummarizedMeasure(stats.dataflow.numberOfEdges)}
200
- Number of calls: ${formatSummarizedMeasure(stats.dataflow.numberOfCalls)}
201
- Number of function defs: ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}
202
- Size of graph: ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
201
+ Number of nodes: ${formatSummarizedMeasure(stats.dataflow.numberOfNodes)}
202
+ Number of edges: ${formatSummarizedMeasure(stats.dataflow.numberOfEdges)}
203
+ Number of calls: ${formatSummarizedMeasure(stats.dataflow.numberOfCalls)}
204
+ Number of function defs: ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}
205
+ Number of stored Vtx indices: ${formatSummarizedMeasure(stats.dataflow.storedVertexIndices)}
206
+ Number of stored Env indices: ${formatSummarizedMeasure(stats.dataflow.storedEnvIndices)}
207
+ Number of overwritten indices: ${formatSummarizedMeasure(stats.dataflow.overwrittenIndices)}
208
+ Size of graph: ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
203
209
  `;
204
210
  }
205
211
  function reduction2String(title, reduction) {
@@ -65,6 +65,23 @@ function getSizeOfDfGraph(df) {
65
65
  });
66
66
  verts.push(vertex);
67
67
  }
68
- return (0, object_sizeof_1.default)([...verts, ...df.edges()]);
68
+ return safeSizeOf([...verts, ...df.edges()]);
69
+ }
70
+ /**
71
+ * Calculates the size of an array in bytes.
72
+ *
73
+ * @param array - The array to calculate the size of.
74
+ * @returns The size of the array in bytes.
75
+ */
76
+ function safeSizeOf(array) {
77
+ const size = (0, object_sizeof_1.default)(array);
78
+ if (typeof size === 'number') {
79
+ return size;
80
+ }
81
+ // the sizeOf method returns an error object, when the size could not be calculated
82
+ // in this case, we split the array in half and calculate the size of each half recursively
83
+ const chunkSize = Math.ceil(array.length / 2);
84
+ // subtract 1, because of the separate stringification of the array
85
+ return safeSizeOf(array.slice(0, chunkSize)) + safeSizeOf(array.slice(chunkSize)) - 1;
69
86
  }
70
87
  //# sourceMappingURL=size-of.js.map
@@ -37,6 +37,9 @@ export interface SlicerStatsDataflow<T = number> {
37
37
  numberOfCalls: T;
38
38
  numberOfFunctionDefinitions: T;
39
39
  sizeOfObject: T;
40
+ storedVertexIndices: T;
41
+ storedEnvIndices: T;
42
+ overwrittenIndices: T;
40
43
  }
41
44
  /**
42
45
  * Please note, that these measurement can be negative as there is no guarantee that the memory usage will increase
@@ -85,7 +85,10 @@ function summarizeAllSummarizedStats(stats) {
85
85
  numberOfFunctionDefinitions: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.numberOfFunctionDefinitions)),
86
86
  numberOfCalls: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.numberOfCalls)),
87
87
  numberOfEdges: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.numberOfEdges)),
88
- sizeOfObject: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.sizeOfObject))
88
+ sizeOfObject: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.sizeOfObject)),
89
+ storedVertexIndices: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.storedVertexIndices)),
90
+ storedEnvIndices: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.storedEnvIndices)),
91
+ overwrittenIndices: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.overwrittenIndices)),
89
92
  }
90
93
  };
91
94
  }
@@ -125,7 +128,10 @@ function summarizeAllUltimateStats(stats) {
125
128
  numberOfFunctionDefinitions: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.numberOfFunctionDefinitions)),
126
129
  numberOfCalls: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.numberOfCalls)),
127
130
  numberOfEdges: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.numberOfEdges)),
128
- sizeOfObject: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.sizeOfObject))
131
+ sizeOfObject: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.sizeOfObject)),
132
+ storedVertexIndices: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.storedVertexIndices)),
133
+ storedEnvIndices: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.storedEnvIndices)),
134
+ overwrittenIndices: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.overwrittenIndices)),
129
135
  }
130
136
  };
131
137
  }
@@ -9,4 +9,9 @@ export interface BenchmarkCliOptions {
9
9
  limit?: number;
10
10
  runs?: number;
11
11
  parser: KnownParserName;
12
+ 'enable-pointer-tracking': boolean;
13
+ 'max-file-slices': number;
14
+ threshold?: number;
15
+ 'per-file-time-limit'?: number;
16
+ 'sampling-strategy': string;
12
17
  }
@@ -42,10 +42,17 @@ async function benchmark() {
42
42
  console.log(`Using ${options.parallel} parallel executors`);
43
43
  // we do not use the limit argument to be able to pick the limit randomly
44
44
  const files = [];
45
- for (const input of options.input) {
46
- for await (const file of (0, files_1.allRFiles)(input)) {
47
- files.push({ request: file, baseDir: input });
48
- }
45
+ const firstFile = options.input[0];
46
+ // Check whether input is single JSON file containing a list of paths
47
+ if (options.input.length === 1 && fs_1.default.statSync(firstFile).isFile() && firstFile.endsWith('.json')) {
48
+ console.log('Input is a single JSON file. Assuming it contains a list of files to process');
49
+ const content = fs_1.default.readFileSync(firstFile, 'utf8');
50
+ const paths = JSON.parse(content);
51
+ const baseDir = findCommonBaseDir(paths);
52
+ await collectFiles(files, paths, () => baseDir);
53
+ }
54
+ else {
55
+ await collectFiles(files, options.input, (p) => p);
49
56
  }
50
57
  if (options.limit) {
51
58
  log_1.log.info(`limiting to ${options.limit} files`);
@@ -59,18 +66,54 @@ async function benchmark() {
59
66
  '--file-id', `${i}`,
60
67
  '--output', path_1.default.join(options.output, path_1.default.relative(f.baseDir, `${f.request.content}.json`)),
61
68
  '--slice', options.slice, ...verboseAdd,
62
- '--parser', options.parser
69
+ '--parser', options.parser,
70
+ ...(options['enable-pointer-tracking'] ? ['--enable-pointer-tracking'] : []),
71
+ '--max-slices', `${options['max-file-slices']}`,
72
+ ...(options.threshold ? ['--threshold', `${options.threshold}`] : []),
73
+ '--sampling-strategy', options['sampling-strategy'],
63
74
  ]);
64
75
  const runs = options.runs ?? 1;
65
76
  for (let i = 1; i <= runs; i++) {
66
77
  console.log(`Run ${i} of ${runs}`);
67
78
  const pool = new parallel_1.LimitedThreadPool(`${__dirname}/benchmark-helper-app`,
68
79
  // we reverse here "for looks", since the helper pops from the end, and we want file ids to be ascending :D
69
- args.map(a => [...a, '--run-num', `${i}`]).reverse(), limit, options.parallel);
80
+ args.map(a => [...a, '--run-num', `${i}`]).reverse(), limit, options.parallel, options['per-file-time-limit']);
70
81
  await pool.run();
71
82
  const stats = pool.getStats();
72
83
  console.log(`Run ${i} of ${runs}: Benchmarked ${stats.counter} files, skipped ${stats.skipped.length} files due to errors`);
73
84
  }
74
85
  }
86
+ /**
87
+ * Collect all R files from the given paths.
88
+ *
89
+ * @param files - list of files to append to
90
+ * @param paths - list of paths to search for R files
91
+ * @param getBaseDir - function to get the base directory of a path
92
+ */
93
+ async function collectFiles(files, paths, getBaseDir) {
94
+ for (const input of paths) {
95
+ for await (const file of (0, files_1.allRFiles)(input)) {
96
+ files.push({ request: file, baseDir: getBaseDir(input) });
97
+ }
98
+ }
99
+ }
100
+ /**
101
+ * Find the common base directory of a list of paths.
102
+ *
103
+ * @param paths - list of paths
104
+ * @returns the common base directory
105
+ */
106
+ function findCommonBaseDir(paths) {
107
+ const baseDirs = paths.map(f => path_1.default.dirname(f));
108
+ return baseDirs.reduce((acc, dir) => {
109
+ const split = dir.split(path_1.default.sep);
110
+ const accSplit = acc.split(path_1.default.sep);
111
+ let i = 0;
112
+ while (i < split.length && i < accSplit.length && split[i] === accSplit[i]) {
113
+ i++;
114
+ }
115
+ return split.slice(0, i).join(path_1.default.sep);
116
+ }, baseDirs[0]);
117
+ }
75
118
  void benchmark();
76
119
  //# sourceMappingURL=benchmark-app.js.map
@@ -8,4 +8,8 @@ export interface SingleBenchmarkCliOptions {
8
8
  slice: string;
9
9
  output?: string;
10
10
  parser: KnownParserName;
11
+ 'enable-pointer-tracking': boolean;
12
+ 'max-slices': number;
13
+ threshold?: number;
14
+ 'sampling-strategy': string;
11
15
  }
@@ -11,6 +11,7 @@ const script_1 = require("./common/script");
11
11
  const slicer_1 = require("../benchmark/slicer");
12
12
  const all_variables_1 = require("../slicing/criterion/filters/all-variables");
13
13
  const path_1 = __importDefault(require("path"));
14
+ const config_1 = require("../config");
14
15
  const options = (0, script_1.processCommandLineArgs)('benchmark-helper', [], {
15
16
  subtitle: 'Will slice for all possible variables, signal by exit code if slicing was successful, and can be run standalone',
16
17
  examples: [
@@ -23,6 +24,7 @@ if (options.verbose) {
23
24
  }
24
25
  const numberRegex = /^\d+$/;
25
26
  (0, assert_1.guard)(options.slice === 'all' || options.slice === 'no' || numberRegex.test(options.slice), 'slice must be either all, no, or a number');
27
+ (0, assert_1.guard)(options['sampling-strategy'] === 'random' || options['sampling-strategy'] === 'equidistant', 'sample-strategy must be either random or equidistant');
26
28
  async function benchmark() {
27
29
  // we do not use the limit argument to be able to pick the limit randomly
28
30
  (0, assert_1.guard)(options.input !== undefined, 'No input file given');
@@ -31,18 +33,31 @@ async function benchmark() {
31
33
  // prefix for printing to console, includes file id and run number if present
32
34
  const prefix = `[${options.input}${options['file-id'] !== undefined ? ` (file ${options['file-id']}, run ${options['run-num']})` : ''}]`;
33
35
  console.log(`${prefix} Appending output to ${options.output}`);
34
- fs_1.default.mkdirSync(path_1.default.parse(options.output).dir, { recursive: true });
36
+ const directory = path_1.default.parse(options.output).dir;
37
+ // ensure the directory exists if path contains one
38
+ if (directory !== '') {
39
+ fs_1.default.mkdirSync(directory, { recursive: true });
40
+ }
41
+ // Enable pointer analysis if requested, otherwise disable it
42
+ if (options['enable-pointer-tracking']) {
43
+ (0, config_1.amendConfig)({ solver: { ...(0, config_1.getConfig)().solver, pointerTracking: true, } });
44
+ }
45
+ else {
46
+ (0, config_1.amendConfig)({ solver: { ...(0, config_1.getConfig)().solver, pointerTracking: false, } });
47
+ }
35
48
  // ensure the file exists
36
49
  const fileStat = fs_1.default.statSync(options.input);
37
50
  (0, assert_1.guard)(fileStat.isFile(), `File ${options.input} does not exist or is no file`);
38
51
  const request = { request: 'file', content: options.input };
52
+ const maxSlices = options['max-slices'] ?? -1;
39
53
  const slicer = new slicer_1.BenchmarkSlicer(options.parser);
40
54
  try {
41
- await slicer.init(request);
55
+ await slicer.init(request, undefined, options.threshold);
42
56
  // ${escape}1F${escape}1G${escape}2K for line reset
43
57
  if (options.slice === 'all') {
44
- const count = await slicer.sliceForAll(all_variables_1.DefaultAllVariablesFilter, (i, total, arr) => console.log(`${prefix} Slicing ${i + 1}/${total} [${JSON.stringify(arr[i])}]`));
58
+ const count = await slicer.sliceForAll(all_variables_1.DefaultAllVariablesFilter, (i, total, arr) => console.log(`${prefix} Slicing ${i + 1}/${total} [${JSON.stringify(arr[i])}]`), { maxSliceCount: maxSlices });
45
59
  console.log(`${prefix} Completed Slicing`);
60
+ (0, assert_1.guard)(count >= 0, `Number of slices exceeded limit of ${maxSlices} with ${-count} slices, skipping in count`);
46
61
  (0, assert_1.guard)(count > 0, `No possible slices found for ${options.input}, skipping in count`);
47
62
  }
48
63
  else if (options.slice === 'no') {
@@ -51,8 +66,9 @@ async function benchmark() {
51
66
  else {
52
67
  const limit = parseInt(options.slice);
53
68
  console.log(`${prefix} Slicing up to ${limit} possible slices`);
54
- const count = await slicer.sliceForAll(all_variables_1.DefaultAllVariablesFilter, (i, total, arr) => console.log(`${prefix} Slicing ${i + 1}/${total} [${JSON.stringify(arr[i])}]`), limit);
69
+ const count = await slicer.sliceForAll(all_variables_1.DefaultAllVariablesFilter, (i, total, arr) => console.log(`${prefix} Slicing ${i + 1}/${total} [${JSON.stringify(arr[i])}]`), { sampleCount: limit, maxSliceCount: maxSlices, sampleStrategy: options['sampling-strategy'] });
55
70
  console.log(`${prefix} Completed Slicing`);
71
+ (0, assert_1.guard)(count >= 0, `Number of slices exceeded limit of ${maxSlices} with ${-count} slices, skipping in count`);
56
72
  (0, assert_1.guard)(count > 0, `No possible slices found for ${options.input}, skipping in count`);
57
73
  }
58
74
  const { stats } = slicer.finish();
@@ -16,11 +16,16 @@ exports.benchmarkOptions = [
16
16
  { name: 'help', alias: 'h', type: Boolean, description: 'Print this usage guide' },
17
17
  { name: 'limit', alias: 'l', type: Number, description: 'Limit the number of files to process (if given, this will choose these files randomly and add the chosen names to the output' },
18
18
  { name: 'runs', alias: 'r', type: Number, description: 'The amount of benchmark runs that should be done, out of which an average will be calculated' },
19
- { name: 'input', alias: 'i', type: String, description: 'Pass a folder or file as src to read from', multiple: true, defaultOption: true, defaultValue: [], typeLabel: '{underline files/folders}' },
19
+ { name: 'input', alias: 'i', type: String, description: 'Pass a folder or file as src to read from. Alternatively, pass a single JSON file that contains a list of paths.', multiple: true, defaultOption: true, defaultValue: [], typeLabel: '{underline files/folders}' },
20
20
  { name: 'parallel', alias: 'p', type: String, description: 'Number of parallel executors (defaults to {italic max(cpu.count-1, 1)})', defaultValue: Math.max(os_1.default.cpus().length - 1, 1), typeLabel: '{underline number}' },
21
21
  { name: 'slice', alias: 's', type: String, description: 'Automatically slice for *all* variables (default) or *no* slicing and only parsing/dataflow construction. Numbers will indicate: sample X random slices from all.', defaultValue: 'all', typeLabel: '{underline all/no}' },
22
- { name: 'output', alias: 'o', type: String, description: `Directory to write all the measurements to in a per-file-basis (defaults to {italic benchmark-${StartTimeString}})`, defaultValue: `benchmark-${StartTimeString}`, typeLabel: '{underline file}' },
23
- { name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' }
22
+ { name: 'output', alias: 'o', type: String, description: `Folder to write all the measurements to in a per-file-basis (defaults to {italic benchmark-${StartTimeString}})`, defaultValue: `benchmark-${StartTimeString}`, typeLabel: '{underline folder}' },
23
+ { name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' },
24
+ { name: 'enable-pointer-tracking', type: Boolean, description: 'Run dataflow analysis with pointer tracking', defaultValue: false },
25
+ { name: 'max-file-slices', type: Number, description: 'If file has more than passed number of slices, the file is not processed', defaultValue: -1, typeLabel: '{underline number}' },
26
+ { name: 'threshold', alias: 't', type: Number, description: 'How many re-visits of the same node are ok?', defaultValue: undefined, typeLabel: '{underline number}' },
27
+ { name: 'per-file-time-limit', type: Number, description: 'Time limit in milliseconds to process single file (disabled by default)', defaultValue: undefined, typeLabel: '{underline number}' },
28
+ { name: 'sampling-strategy', type: String, description: 'Which strategy to use, when sampling is enabled', defaultValue: 'random', typeLabel: '{underline random/edquidistant}' },
24
29
  ];
25
30
  exports.benchmarkHelperOptions = [
26
31
  { name: 'verbose', alias: 'v', type: Boolean, description: 'Run with verbose logging [do not use for the real benchmark as this affects the time measurements, but only to find errors]' },
@@ -30,7 +35,11 @@ exports.benchmarkHelperOptions = [
30
35
  { name: 'run-num', alias: 'r', type: Number, description: 'The n-th time that the file with the given file-id is being benchmarked' },
31
36
  { name: 'slice', alias: 's', type: String, description: 'Automatically slice for *all* variables (default) or *no* slicing and only parsing/dataflow construction. Numbers will indicate: sample X random slices from all.', defaultValue: 'all', typeLabel: '{underline all/no}' },
32
37
  { name: 'output', alias: 'o', type: String, description: 'File to write the measurements to (appends a single line in JSON format)', typeLabel: '{underline file}' },
33
- { name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' }
38
+ { name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' },
39
+ { name: 'enable-pointer-tracking', type: Boolean, description: 'Run dataflow analysis with pointer tracking', defaultValue: false },
40
+ { name: 'max-slices', type: Number, description: 'If file has more than passed number of slices, the file is not processed', defaultValue: -1, typeLabel: '{underline number}' },
41
+ { name: 'threshold', alias: 't', type: Number, description: 'How many re-visits of the same node are ok?', defaultValue: undefined, typeLabel: '{underline number}' },
42
+ { name: 'sampling-strategy', type: String, description: 'Which strategy to use, when sampling is enabled', defaultValue: 'random', typeLabel: '{underline random/edquidistant}' },
34
43
  ];
35
44
  exports.exportQuadsOptions = [
36
45
  { name: 'verbose', alias: 'v', type: Boolean, description: 'Run with verbose logging' },
@@ -81,6 +81,8 @@ const _commands = {
81
81
  'normalize*': repl_normalize_1.normalizeStarCommand,
82
82
  'dataflow': repl_dataflow_1.dataflowCommand,
83
83
  'dataflow*': repl_dataflow_1.dataflowStarCommand,
84
+ 'dataflowsimple': repl_dataflow_1.dataflowSimplifiedCommand,
85
+ 'dataflowsimple*': repl_dataflow_1.dataflowSimpleStarCommand,
84
86
  'controlflow': repl_cfg_1.controlflowCommand,
85
87
  'controlflow*': repl_cfg_1.controlflowStarCommand,
86
88
  'lineage': repl_lineage_1.lineageCommand,
@@ -1,3 +1,5 @@
1
1
  import type { ReplCommand } from './repl-main';
2
2
  export declare const dataflowCommand: ReplCommand;
3
3
  export declare const dataflowStarCommand: ReplCommand;
4
+ export declare const dataflowSimplifiedCommand: ReplCommand;
5
+ export declare const dataflowSimpleStarCommand: ReplCommand;