@eagleoutice/flowr 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +41 -29
  2. package/abstract-interpretation/data-frame/absint-visitor.d.ts +2 -3
  3. package/abstract-interpretation/data-frame/absint-visitor.js +14 -16
  4. package/abstract-interpretation/data-frame/mappers/function-mapper.js +3 -3
  5. package/abstract-interpretation/data-frame/semantics.d.ts +1 -1
  6. package/abstract-interpretation/data-frame/semantics.js +7 -10
  7. package/abstract-interpretation/data-frame/shape-inference.js +2 -8
  8. package/benchmark/slicer.js +7 -5
  9. package/benchmark/summarizer/second-phase/graph.js +1 -1
  10. package/benchmark/summarizer/second-phase/process.js +1 -1
  11. package/cli/benchmark-app.d.ts +1 -0
  12. package/cli/benchmark-app.js +1 -0
  13. package/cli/benchmark-helper-app.d.ts +1 -0
  14. package/cli/benchmark-helper-app.js +4 -3
  15. package/cli/common/options.js +2 -0
  16. package/cli/repl/commands/repl-query.js +1 -1
  17. package/cli/repl/server/connection.js +14 -5
  18. package/control-flow/basic-cfg-guided-visitor.d.ts +1 -2
  19. package/control-flow/basic-cfg-guided-visitor.js +0 -6
  20. package/control-flow/cfg-simplification.d.ts +6 -0
  21. package/control-flow/cfg-simplification.js +18 -9
  22. package/control-flow/control-flow-graph.d.ts +2 -8
  23. package/control-flow/control-flow-graph.js +1 -6
  24. package/control-flow/extract-cfg.d.ts +2 -2
  25. package/control-flow/extract-cfg.js +52 -63
  26. package/core/steps/all/static-slicing/00-slice.d.ts +7 -1
  27. package/core/steps/all/static-slicing/00-slice.js +9 -3
  28. package/core/steps/pipeline/default-pipelines.d.ts +74 -74
  29. package/dataflow/environments/built-in.d.ts +2 -2
  30. package/dataflow/environments/built-in.js +13 -12
  31. package/dataflow/graph/dataflowgraph-builder.js +2 -2
  32. package/dataflow/graph/graph.js +1 -1
  33. package/dataflow/graph/invert-dfg.d.ts +2 -0
  34. package/dataflow/graph/invert-dfg.js +17 -0
  35. package/documentation/doc-util/doc-query.js +1 -1
  36. package/documentation/doc-util/doc-search.js +2 -2
  37. package/documentation/print-cfg-wiki.js +3 -4
  38. package/documentation/print-core-wiki.js +2 -2
  39. package/documentation/print-dataflow-graph-wiki.js +7 -0
  40. package/documentation/print-faq-wiki.js +4 -0
  41. package/documentation/print-linter-wiki.js +32 -4
  42. package/documentation/print-linting-and-testing-wiki.js +13 -1
  43. package/documentation/print-onboarding-wiki.js +4 -0
  44. package/documentation/print-query-wiki.js +12 -3
  45. package/linter/linter-executor.js +1 -2
  46. package/linter/linter-format.d.ts +26 -4
  47. package/linter/linter-format.js +25 -6
  48. package/linter/linter-rules.d.ts +40 -12
  49. package/linter/linter-rules.js +3 -1
  50. package/linter/rules/absolute-path.d.ts +4 -7
  51. package/linter/rules/absolute-path.js +9 -6
  52. package/linter/rules/dataframe-access-validation.d.ts +3 -1
  53. package/linter/rules/dataframe-access-validation.js +3 -1
  54. package/linter/rules/dead-code.d.ts +43 -0
  55. package/linter/rules/dead-code.js +50 -0
  56. package/linter/rules/deprecated-functions.d.ts +3 -2
  57. package/linter/rules/deprecated-functions.js +3 -1
  58. package/linter/rules/file-path-validity.d.ts +4 -4
  59. package/linter/rules/file-path-validity.js +8 -6
  60. package/linter/rules/naming-convention.d.ts +4 -3
  61. package/linter/rules/naming-convention.js +3 -1
  62. package/linter/rules/seeded-randomness.d.ts +4 -3
  63. package/linter/rules/seeded-randomness.js +3 -1
  64. package/linter/rules/unused-definition.d.ts +2 -0
  65. package/linter/rules/unused-definition.js +3 -1
  66. package/package.json +1 -1
  67. package/queries/catalog/dependencies-query/dependencies-query-executor.js +6 -1
  68. package/queries/catalog/dependencies-query/function-info/read-functions.js +1 -0
  69. package/queries/catalog/dependencies-query/function-info/write-functions.js +1 -0
  70. package/queries/catalog/linter-query/linter-query-format.js +1 -1
  71. package/queries/catalog/location-map-query/location-map-query-executor.js +7 -5
  72. package/queries/catalog/location-map-query/location-map-query-format.d.ts +3 -0
  73. package/queries/catalog/location-map-query/location-map-query-format.js +1 -0
  74. package/queries/catalog/search-query/search-query-executor.js +1 -1
  75. package/queries/catalog/static-slice-query/static-slice-query-executor.js +2 -1
  76. package/queries/catalog/static-slice-query/static-slice-query-format.d.ts +3 -0
  77. package/queries/catalog/static-slice-query/static-slice-query-format.js +3 -1
  78. package/queries/query-print.d.ts +1 -1
  79. package/queries/query-print.js +0 -1
  80. package/queries/query.d.ts +16 -5
  81. package/queries/query.js +24 -11
  82. package/search/flowr-search-builder.d.ts +6 -6
  83. package/search/flowr-search-executor.d.ts +2 -2
  84. package/search/flowr-search-executor.js +1 -1
  85. package/search/flowr-search.d.ts +13 -8
  86. package/search/flowr-search.js +21 -0
  87. package/search/search-executor/search-enrichers.d.ts +87 -20
  88. package/search/search-executor/search-enrichers.js +44 -5
  89. package/search/search-executor/search-generators.d.ts +4 -4
  90. package/search/search-executor/search-generators.js +12 -7
  91. package/search/search-executor/search-mappers.js +3 -2
  92. package/search/search-executor/search-transformer.d.ts +3 -3
  93. package/search/search-executor/search-transformer.js +2 -2
  94. package/slicing/static/static-slicer.d.ts +4 -2
  95. package/slicing/static/static-slicer.js +10 -4
  96. package/util/collections/arrays.d.ts +2 -0
  97. package/util/collections/arrays.js +9 -0
  98. package/util/mermaid/dfg.js +4 -2
  99. package/util/range.d.ts +1 -0
  100. package/util/range.js +5 -1
  101. package/util/version.js +1 -1
package/README.md CHANGED
@@ -24,7 +24,7 @@ It offers a wide variety of features, for example:
24
24
 
25
25
  ```shell
26
26
  $ docker run -it --rm eagleoutice/flowr # or npm run flowr
27
- flowR repl using flowR v2.2.16, R v4.5.0 (r-shell engine)
27
+ flowR repl using flowR v2.3.0, R v4.5.0 (r-shell engine)
28
28
  R> :query @linter "read.csv(\"/root/x.txt\")"
29
29
  ```
30
30
 
@@ -33,17 +33,17 @@ It offers a wide variety of features, for example:
33
33
 
34
34
 
35
35
  ```text
36
- Query: linter (2 ms)
36
+ Query: linter (3 ms)
37
37
  ╰ **Deprecated Functions** (deprecated-functions):
38
38
  ╰ _Metadata_: <code>{"totalDeprecatedCalls":0,"totalDeprecatedFunctionDefinitions":0,"searchTimeMs":0,"processTimeMs":0}</code>
39
39
  ╰ **File Path Validity** (file-path-validity):
40
- definitely:
40
+ certain:
41
41
  ╰ Path `/root/x.txt` at 1.1-23
42
42
  ╰ _Metadata_: <code>{"totalReads":1,"totalUnknown":0,"totalWritesBeforeAlways":0,"totalValid":0,"searchTimeMs":1,"processTimeMs":0}</code>
43
43
  ╰ **Seeded Randomness** (seeded-randomness):
44
44
  ╰ _Metadata_: <code>{"consumerCalls":0,"callsWithFunctionProducers":0,"callsWithAssignmentProducers":0,"callsWithNonConstantProducers":0,"searchTimeMs":0,"processTimeMs":0}</code>
45
45
  ╰ **Absolute Paths** (absolute-file-paths):
46
- definitely:
46
+ certain:
47
47
  ╰ Path `/root/x.txt` at 1.1-23
48
48
  ╰ _Metadata_: <code>{"totalConsidered":1,"totalUnknown":0,"searchTimeMs":1,"processTimeMs":0}</code>
49
49
  ╰ **Unused Definitions** (unused-definitions):
@@ -52,7 +52,9 @@ It offers a wide variety of features, for example:
52
52
  ╰ _Metadata_: <code>{"numMatches":0,"numBreak":0,"searchTimeMs":0,"processTimeMs":0}</code>
53
53
  ╰ **Dataframe Access Validation** (dataframe-access-validation):
54
54
  ╰ _Metadata_: <code>{"numOperations":0,"numAccesses":0,"totalAccessed":0,"searchTimeMs":0,"processTimeMs":0}</code>
55
- All queries together required ≈2 ms (1ms accuracy, total 7 ms)
55
+ **Dead Code** (dead-code):
56
+ ╰ _Metadata_: <code>{"consideredNodes":5,"searchTimeMs":1,"processTimeMs":0}</code>
57
+ All queries together required ≈3 ms (1ms accuracy, total 9 ms)
56
58
  ```
57
59
 
58
60
 
@@ -74,30 +76,32 @@ It offers a wide variety of features, for example:
74
76
 
75
77
  _Results (prettified and summarized):_
76
78
 
77
- Query: **linter** (12 ms)\
79
+ Query: **linter** (14 ms)\
78
80
  &nbsp;&nbsp;&nbsp;╰ **Deprecated Functions** (deprecated-functions):\
79
- &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"totalDeprecatedCalls":0,"totalDeprecatedFunctionDefinitions":0,"searchTimeMs":1,"processTimeMs":0}</code>\
81
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"totalDeprecatedCalls":0,"totalDeprecatedFunctionDefinitions":0,"searchTimeMs":2,"processTimeMs":0}</code>\
80
82
  &nbsp;&nbsp;&nbsp;╰ **File Path Validity** (file-path-validity):\
81
- &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ definitely:\
83
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ certain:\
82
84
  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ Path `/root/x.txt` at 1.1-23\
83
- &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"totalReads":1,"totalUnknown":0,"totalWritesBeforeAlways":0,"totalValid":0,"searchTimeMs":3,"processTimeMs":1}</code>\
85
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"totalReads":1,"totalUnknown":0,"totalWritesBeforeAlways":0,"totalValid":0,"searchTimeMs":4,"processTimeMs":1}</code>\
84
86
  &nbsp;&nbsp;&nbsp;╰ **Seeded Randomness** (seeded-randomness):\
85
- &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"consumerCalls":0,"callsWithFunctionProducers":0,"callsWithAssignmentProducers":0,"callsWithNonConstantProducers":0,"searchTimeMs":1,"processTimeMs":0}</code>\
87
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"consumerCalls":0,"callsWithFunctionProducers":0,"callsWithAssignmentProducers":0,"callsWithNonConstantProducers":0,"searchTimeMs":0,"processTimeMs":1}</code>\
86
88
  &nbsp;&nbsp;&nbsp;╰ **Absolute Paths** (absolute-file-paths):\
87
- &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ definitely:\
89
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ certain:\
88
90
  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ Path `/root/x.txt` at 1.1-23\
89
- &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"totalConsidered":1,"totalUnknown":0,"searchTimeMs":2,"processTimeMs":0}</code>\
91
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"totalConsidered":1,"totalUnknown":0,"searchTimeMs":1,"processTimeMs":1}</code>\
90
92
  &nbsp;&nbsp;&nbsp;╰ **Unused Definitions** (unused-definitions):\
91
93
  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"totalConsidered":0,"searchTimeMs":0,"processTimeMs":0}</code>\
92
94
  &nbsp;&nbsp;&nbsp;╰ **Naming Convention** (naming-convention):\
93
95
  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"numMatches":0,"numBreak":0,"searchTimeMs":0,"processTimeMs":0}</code>\
94
96
  &nbsp;&nbsp;&nbsp;╰ **Dataframe Access Validation** (dataframe-access-validation):\
95
- &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"numOperations":0,"numAccesses":0,"totalAccessed":0,"searchTimeMs":0,"processTimeMs":3}</code>\
96
- _All queries together required ≈13 ms (1ms accuracy, total 207 ms)_
97
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"numOperations":0,"numAccesses":0,"totalAccessed":0,"searchTimeMs":0,"processTimeMs":2}</code>\
98
+ &nbsp;&nbsp;&nbsp;╰ **Dead Code** (dead-code):\
99
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;╰ _Metadata_: <code>{"consideredNodes":5,"searchTimeMs":0,"processTimeMs":0}</code>\
100
+ _All queries together required ≈14 ms (1ms accuracy, total 214 ms)_
97
101
 
98
102
  <details> <summary style="color:gray">Show Detailed Results as Json</summary>
99
103
 
100
- The analysis required _207.2 ms_ (including parsing and normalization and the query) within the generation environment.
104
+ The analysis required _213.8 ms_ (including parsing and normalization and the query) within the generation environment.
101
105
 
102
106
  In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR.
103
107
  Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki/Interface) wiki page for more information on how to get those.
@@ -114,7 +118,7 @@ It offers a wide variety of features, for example:
114
118
  ".meta": {
115
119
  "totalDeprecatedCalls": 0,
116
120
  "totalDeprecatedFunctionDefinitions": 0,
117
- "searchTimeMs": 1,
121
+ "searchTimeMs": 2,
118
122
  "processTimeMs": 0
119
123
  }
120
124
  },
@@ -128,7 +132,7 @@ It offers a wide variety of features, for example:
128
132
  23
129
133
  ],
130
134
  "filePath": "/root/x.txt",
131
- "certainty": "definitely"
135
+ "certainty": "certain"
132
136
  }
133
137
  ],
134
138
  ".meta": {
@@ -136,7 +140,7 @@ It offers a wide variety of features, for example:
136
140
  "totalUnknown": 0,
137
141
  "totalWritesBeforeAlways": 0,
138
142
  "totalValid": 0,
139
- "searchTimeMs": 3,
143
+ "searchTimeMs": 4,
140
144
  "processTimeMs": 1
141
145
  }
142
146
  },
@@ -147,14 +151,14 @@ It offers a wide variety of features, for example:
147
151
  "callsWithFunctionProducers": 0,
148
152
  "callsWithAssignmentProducers": 0,
149
153
  "callsWithNonConstantProducers": 0,
150
- "searchTimeMs": 1,
151
- "processTimeMs": 0
154
+ "searchTimeMs": 0,
155
+ "processTimeMs": 1
152
156
  }
153
157
  },
154
158
  "absolute-file-paths": {
155
159
  "results": [
156
160
  {
157
- "certainty": "definitely",
161
+ "certainty": "certain",
158
162
  "filePath": "/root/x.txt",
159
163
  "range": [
160
164
  1,
@@ -167,8 +171,8 @@ It offers a wide variety of features, for example:
167
171
  ".meta": {
168
172
  "totalConsidered": 1,
169
173
  "totalUnknown": 0,
170
- "searchTimeMs": 2,
171
- "processTimeMs": 0
174
+ "searchTimeMs": 1,
175
+ "processTimeMs": 1
172
176
  }
173
177
  },
174
178
  "unused-definitions": {
@@ -195,16 +199,24 @@ It offers a wide variety of features, for example:
195
199
  "numAccesses": 0,
196
200
  "totalAccessed": 0,
197
201
  "searchTimeMs": 0,
198
- "processTimeMs": 3
202
+ "processTimeMs": 2
203
+ }
204
+ },
205
+ "dead-code": {
206
+ "results": [],
207
+ ".meta": {
208
+ "consideredNodes": 5,
209
+ "searchTimeMs": 0,
210
+ "processTimeMs": 0
199
211
  }
200
212
  }
201
213
  },
202
214
  ".meta": {
203
- "timing": 12
215
+ "timing": 14
204
216
  }
205
217
  },
206
218
  ".meta": {
207
- "timing": 13
219
+ "timing": 14
208
220
  }
209
221
  }
210
222
  ```
@@ -271,7 +283,7 @@ It offers a wide variety of features, for example:
271
283
 
272
284
  ```shell
273
285
  $ docker run -it --rm eagleoutice/flowr # or npm run flowr
274
- flowR repl using flowR v2.2.16, R v4.5.0 (r-shell engine)
286
+ flowR repl using flowR v2.3.0, R v4.5.0 (r-shell engine)
275
287
  R> :slicer test/testfiles/example.R --criterion "11@sum"
276
288
  ```
277
289
 
@@ -318,7 +330,7 @@ It offers a wide variety of features, for example:
318
330
 
319
331
 
320
332
  * 🚀 **fast data- and control-flow graphs**\
321
- Within just <i><span title="This measurement is automatically fetched from the latest benchmark!">136.1 ms</span></i> (as of Jul 12, 2025),
333
+ Within just <i><span title="This measurement is automatically fetched from the latest benchmark!">136.8 ms</span></i> (as of Jul 21, 2025),
322
334
  _flowR_ can analyze the data- and control-flow of the average real-world R script. See the [benchmarks](https://flowr-analysis.github.io/flowr/wiki/stats/benchmark) for more information,
323
335
  and consult the [wiki pages](https://github.com/flowr-analysis/flowr/wiki/Dataflow-Graph) for more details on the dataflow graph.
324
336
 
@@ -354,7 +366,7 @@ It offers a wide variety of features, for example:
354
366
 
355
367
  ```shell
356
368
  $ docker run -it --rm eagleoutice/flowr # or npm run flowr
357
- flowR repl using flowR v2.2.16, R v4.5.0 (r-shell engine)
369
+ flowR repl using flowR v2.3.0, R v4.5.0 (r-shell engine)
358
370
  R> :dataflow* test/testfiles/example.R
359
371
  ```
360
372
 
@@ -47,9 +47,8 @@ export declare class DataFrameShapeInferenceVisitor<OtherInfo = NoInfo, ControlF
47
47
  source: NodeId | undefined;
48
48
  target: NodeId | undefined;
49
49
  }): void;
50
- private processOperation;
51
- private processDataFrameAssignment;
52
- private processDataFrameExpression;
50
+ private applyDataFrameAssignment;
51
+ private applyDataFrameExpression;
53
52
  /** We only process vertices of leaf nodes and exit vertices (no entry nodes of complex nodes) */
54
53
  private shouldSkipVertex;
55
54
  /** Get all AST nodes for the predecessor vertices that are leaf nodes and exit vertices */
@@ -31,7 +31,7 @@ class DataFrameShapeInferenceVisitor extends semantic_cfg_guided_visitor_1.Seman
31
31
  }
32
32
  visitNode(nodeId) {
33
33
  const vertex = this.getCfgVertex(nodeId);
34
- // skip vertices representing mid markers or entries of complex nodes
34
+ // skip vertices representing entries of complex nodes
35
35
  if (vertex === undefined || this.shouldSkipVertex(vertex)) {
36
36
  return true;
37
37
  }
@@ -40,7 +40,7 @@ class DataFrameShapeInferenceVisitor extends semantic_cfg_guided_visitor_1.Seman
40
40
  this.onVisitNode(nodeId);
41
41
  const visitedCount = this.visited.get(vertex.id) ?? 0;
42
42
  this.visited.set(vertex.id, visitedCount + 1);
43
- // only continue visitor if the node has not been visited before or the data frame value of the node changed
43
+ // only continue visiting if the node has not been visited before or the data frame value of the node changed
44
44
  return visitedCount === 0 || !(0, domain_1.equalDataFrameState)(this.oldDomain, this.newDomain);
45
45
  }
46
46
  visitDataflowNode(vertex) {
@@ -69,7 +69,7 @@ class DataFrameShapeInferenceVisitor extends semantic_cfg_guided_visitor_1.Seman
69
69
  const sourceNode = this.getNormalizedAst(source);
70
70
  if (node !== undefined && (0, assignment_mapper_1.isAssignmentTarget)(targetNode) && sourceNode !== undefined) {
71
71
  node.info.dataFrame = (0, assignment_mapper_1.mapDataFrameVariableAssignment)(targetNode, sourceNode, this.config.dfg);
72
- this.processOperation(node);
72
+ this.applyDataFrameAssignment(node);
73
73
  this.clearUnassignedInfo(targetNode);
74
74
  }
75
75
  }
@@ -77,14 +77,14 @@ class DataFrameShapeInferenceVisitor extends semantic_cfg_guided_visitor_1.Seman
77
77
  const node = this.getNormalizedAst(call.id);
78
78
  if (node !== undefined) {
79
79
  node.info.dataFrame = (0, access_mapper_1.mapDataFrameAccess)(node, this.config.dfg);
80
- this.processOperation(node);
80
+ this.applyDataFrameExpression(node);
81
81
  }
82
82
  }
83
83
  onDefaultFunctionCall({ call }) {
84
84
  const node = this.getNormalizedAst(call.id);
85
85
  if (node !== undefined) {
86
86
  node.info.dataFrame = (0, function_mapper_1.mapDataFrameFunctionCall)(node, this.config.dfg, this.config.flowrConfig);
87
- this.processOperation(node);
87
+ this.applyDataFrameExpression(node);
88
88
  }
89
89
  }
90
90
  onReplacementCall({ call, source, target }) {
@@ -93,19 +93,14 @@ class DataFrameShapeInferenceVisitor extends semantic_cfg_guided_visitor_1.Seman
93
93
  const sourceNode = this.getNormalizedAst(source);
94
94
  if (node !== undefined && targetNode !== undefined && sourceNode !== undefined) {
95
95
  node.info.dataFrame = (0, replacement_mapper_1.mapDataFrameReplacementFunction)(node, sourceNode, this.config.dfg);
96
- this.processOperation(node);
96
+ this.applyDataFrameExpression(node);
97
97
  this.clearUnassignedInfo(targetNode);
98
98
  }
99
99
  }
100
- processOperation(node) {
101
- if ((0, absint_info_1.hasDataFrameAssignmentInfo)(node)) {
102
- this.processDataFrameAssignment(node);
103
- }
104
- else if ((0, absint_info_1.hasDataFrameExpressionInfo)(node)) {
105
- this.processDataFrameExpression(node);
100
+ applyDataFrameAssignment(node) {
101
+ if (!(0, absint_info_1.hasDataFrameAssignmentInfo)(node)) {
102
+ return;
106
103
  }
107
- }
108
- processDataFrameAssignment(node) {
109
104
  const value = (0, shape_inference_1.resolveIdToDataFrameShape)(node.info.dataFrame.expression, this.config.dfg, this.newDomain);
110
105
  if (value !== undefined) {
111
106
  this.newDomain.set(node.info.dataFrame.identifier, value);
@@ -116,11 +111,14 @@ class DataFrameShapeInferenceVisitor extends semantic_cfg_guided_visitor_1.Seman
116
111
  }
117
112
  }
118
113
  }
119
- processDataFrameExpression(node) {
114
+ applyDataFrameExpression(node) {
115
+ if (!(0, absint_info_1.hasDataFrameExpressionInfo)(node)) {
116
+ return;
117
+ }
120
118
  let value = domain_1.DataFrameTop;
121
119
  for (const { operation, operand, type, options, ...args } of node.info.dataFrame.operations) {
122
120
  const operandValue = operand !== undefined ? (0, shape_inference_1.resolveIdToDataFrameShape)(operand, this.config.dfg, this.newDomain) : value;
123
- value = (0, semantics_1.applySemantics)(operation, operandValue ?? domain_1.DataFrameTop, args, options);
121
+ value = (0, semantics_1.applyDataFrameSemantics)(operation, operandValue ?? domain_1.DataFrameTop, args, options);
124
122
  const constraintType = type ?? (0, semantics_1.getConstraintType)(operation);
125
123
  if (operand !== undefined && constraintType === semantics_1.ConstraintType.OperandModification) {
126
124
  this.newDomain.set(operand, value);
@@ -214,8 +214,8 @@ const DataFrameFunctionParamsMapper = {
214
214
  critical: [{ pos: -1, name: 'row.names' }]
215
215
  },
216
216
  'as.data.frame': {
217
- critical: [],
218
- dataFrame: { pos: 0, name: 'x' }
217
+ dataFrame: { pos: 0, name: 'x' },
218
+ critical: []
219
219
  },
220
220
  'read.table': {
221
221
  fileName: { pos: 0, name: 'file' },
@@ -1097,7 +1097,7 @@ function mapDataFrameUnknown(args, params, info) {
1097
1097
  return;
1098
1098
  }
1099
1099
  return [{
1100
- operation: 'identity',
1100
+ operation: 'unknown',
1101
1101
  operand: dataFrame.value.info.id,
1102
1102
  ...(params.constraintType !== undefined ? { type: params.constraintType } : {})
1103
1103
  }];
@@ -123,7 +123,7 @@ export type DataFrameOperationOptions<N extends DataFrameOperationName> = Parame
123
123
  * @returns The resulting new data frame shape constraints.
124
124
  * The semantic type of the resulting constraints depends on the {@link ConstraintType} of the abstract operation.
125
125
  */
126
- export declare function applySemantics<Name extends DataFrameOperationName>(operation: Name, value: DataFrameDomain, args: DataFrameOperationArgs<Name>, options?: DataFrameOperationOptions<Name>): DataFrameDomain;
126
+ export declare function applyDataFrameSemantics<Name extends DataFrameOperationName>(operation: Name, value: DataFrameDomain, args: DataFrameOperationArgs<Name>, options?: DataFrameOperationOptions<Name>): DataFrameDomain;
127
127
  /**
128
128
  * Gets the default resulting constraint type for an abstract data frame operation.
129
129
  */
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.DataFrameOperationNames = exports.ConstraintType = void 0;
4
- exports.applySemantics = applySemantics;
4
+ exports.applyDataFrameSemantics = applyDataFrameSemantics;
5
5
  exports.getConstraintType = getConstraintType;
6
6
  const assert_1 = require("../../util/assert");
7
7
  const domain_1 = require("./domain");
@@ -58,7 +58,7 @@ exports.DataFrameOperationNames = Object.keys(DataFrameSemanticsMapper);
58
58
  * @returns The resulting new data frame shape constraints.
59
59
  * The semantic type of the resulting constraints depends on the {@link ConstraintType} of the abstract operation.
60
60
  */
61
- function applySemantics(operation, value, args, options) {
61
+ function applyDataFrameSemantics(operation, value, args, options) {
62
62
  const applier = DataFrameSemanticsMapper[operation];
63
63
  return applier.apply(value, args, options);
64
64
  }
@@ -266,14 +266,11 @@ function applyGroupBySemantics(value, { by }, options) {
266
266
  return {
267
267
  ...value,
268
268
  colnames: by.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, by) : domain_1.ColNamesTop,
269
- cols: (0, domain_1.addInterval)(value.cols, [0, by.length]),
270
- rows: (0, domain_1.extendIntervalToZero)(value.rows)
269
+ cols: (0, domain_1.addInterval)(value.cols, [0, by.length])
271
270
  };
272
271
  }
273
- return {
274
- ...value,
275
- rows: (0, domain_1.extendIntervalToZero)(value.rows)
276
- };
272
+ // Group by only marks columns as groups but does not change the shape itself
273
+ return value;
277
274
  }
278
275
  function applySummarizeSemantics(value, { colnames }) {
279
276
  const cols = colnames?.length;
@@ -281,7 +278,7 @@ function applySummarizeSemantics(value, { colnames }) {
281
278
  ...value,
282
279
  colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop,
283
280
  cols: cols !== undefined ? (0, domain_1.minInterval)((0, domain_1.addInterval)(value.cols, [0, cols]), [cols, Infinity]) : (0, domain_1.extendIntervalToInfinity)(value.rows),
284
- rows: (0, domain_1.maxInterval)((0, domain_1.minInterval)(value.rows, [1, Infinity]), [1, 1])
281
+ rows: (0, domain_1.maxInterval)((0, domain_1.minInterval)(value.rows, [1, Infinity]), [0, 1])
285
282
  };
286
283
  }
287
284
  function applyJoinSemantics(value, { other, by }, options) {
@@ -304,7 +301,7 @@ function applyJoinSemantics(value, { other, by }, options) {
304
301
  }
305
302
  };
306
303
  const commonCols = (0, domain_1.meetColNames)(value.colnames, other.colnames);
307
- let duplicateCols; // whether columns may be renamed due to occurance in both data frames
304
+ let duplicateCols; // whether columns may be renamed due to occurrence in both data frames
308
305
  let productRows; // whether the resulting rows may be a Cartesian product of the rows of the data frames
309
306
  if (options?.natural) {
310
307
  duplicateCols = false;
@@ -76,10 +76,7 @@ function resolveIdToDataFrameShape(id, dfg, domain) {
76
76
  }
77
77
  }
78
78
  else if (node.type === type_1.RType.IfThenElse) {
79
- if (node.otherwise === undefined) {
80
- return resolveIdToDataFrameShape(node.then, dfg, domain) !== undefined ? domain_1.DataFrameTop : undefined;
81
- }
82
- else {
79
+ if (node.otherwise !== undefined) {
83
80
  const values = [node.then, node.otherwise].map(entry => resolveIdToDataFrameShape(entry, dfg, domain));
84
81
  if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
85
82
  return (0, domain_1.joinDataFrames)(...values);
@@ -87,10 +84,7 @@ function resolveIdToDataFrameShape(id, dfg, domain) {
87
84
  }
88
85
  }
89
86
  else if (origins.includes('builtin:if-then-else') && call?.args.every(arg => arg !== r_function_call_1.EmptyArgument)) {
90
- if (call.args.length === 2) {
91
- return resolveIdToDataFrameShape(call.args[1].nodeId, dfg, domain) !== undefined ? domain_1.DataFrameTop : undefined;
92
- }
93
- else if (call.args.length === 3) {
87
+ if (call.args.length === 3) {
94
88
  const values = call.args.slice(1, 3).map(entry => resolveIdToDataFrameShape(entry.nodeId, dfg, domain));
95
89
  if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
96
90
  return (0, domain_1.joinDataFrames)(...values);
@@ -339,13 +339,15 @@ class BenchmarkSlicer {
339
339
  if (expression !== undefined) {
340
340
  nodeStats.mappedOperations = expression.operations.map(op => op.operation);
341
341
  stats.numberOfOperationNodes++;
342
+ if (value !== undefined) {
343
+ nodeStats.inferredColNames = value.colnames === domain_1.ColNamesTop ? 'top' : value.colnames.length;
344
+ nodeStats.inferredColCount = this.getInferredSize(value.cols);
345
+ nodeStats.inferredRowCount = this.getInferredSize(value.rows);
346
+ nodeStats.approxRangeColCount = value.cols === domain_1.IntervalBottom ? 0 : value.cols[1] - value.cols[0];
347
+ nodeStats.approxRangeRowCount = value.rows === domain_1.IntervalBottom ? 0 : value.rows[1] - value.rows[0];
348
+ }
342
349
  }
343
350
  if (value !== undefined) {
344
- nodeStats.inferredColNames = value.colnames === domain_1.ColNamesTop ? 'top' : value.colnames.length;
345
- nodeStats.inferredColCount = this.getInferredSize(value.cols);
346
- nodeStats.inferredRowCount = this.getInferredSize(value.rows);
347
- nodeStats.approxRangeColCount = value.cols === domain_1.IntervalBottom ? 0 : value.cols[1] - value.cols[0];
348
- nodeStats.approxRangeRowCount = value.rows === domain_1.IntervalBottom ? 0 : value.rows[1] - value.rows[0];
349
351
  stats.numberOfValueNodes++;
350
352
  }
351
353
  stats.perNodeStats.set(node.info.id, nodeStats);
@@ -11,7 +11,7 @@ function writeGraphOutput(ultimate, outputGraphPath) {
11
11
  const data = [];
12
12
  for (const { name, measurements } of [{ name: 'per-file', measurements: ultimate.commonMeasurements }, { name: 'per-slice', measurements: ultimate.perSliceMeasurements }]) {
13
13
  for (const [point, measurement] of measurements) {
14
- if (point === 'close R session' || point === 'initialize R session') {
14
+ if (point === 'close R session' || point === 'initialize R session' || !measurement?.mean || !measurement?.std) {
15
15
  continue;
16
16
  }
17
17
  const pointName = point === 'total' ? `total ${name}` : point;
@@ -152,7 +152,7 @@ function summarizeAllUltimateStats(stats) {
152
152
  failedToRepParse: Math.max(...stats.map(s => s.failedToRepParse)),
153
153
  timesHitThreshold: Math.max(...stats.map(s => s.timesHitThreshold)),
154
154
  // average out / summarize other measurements
155
- commonMeasurements: new Map(stats_1.CommonSlicerMeasurements.map(m => [m, (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.commonMeasurements.get(m)))])),
155
+ commonMeasurements: new Map(stats_1.CommonSlicerMeasurements.filter(m => stats.some(s => s.commonMeasurements.has(m))).map(m => [m, (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.commonMeasurements.get(m)))])),
156
156
  perSliceMeasurements: new Map(stats_1.PerSliceMeasurements.map(m => [m, (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.perSliceMeasurements.get(m)))])),
157
157
  sliceTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.sliceTimePerToken)),
158
158
  reconstructTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.reconstructTimePerToken)),
@@ -16,4 +16,5 @@ export interface BenchmarkCliOptions {
16
16
  threshold?: number;
17
17
  'per-file-time-limit'?: number;
18
18
  'sampling-strategy': string;
19
+ cfg?: boolean;
19
20
  }
@@ -75,6 +75,7 @@ async function benchmark() {
75
75
  ...(options.threshold ? ['--threshold', `${options.threshold}`] : []),
76
76
  '--sampling-strategy', options['sampling-strategy'],
77
77
  ...(options.seed ? ['--seed', options.seed] : []),
78
+ ...(options.cfg ? ['--cfg'] : []),
78
79
  ]);
79
80
  const runs = options.runs ?? 1;
80
81
  for (let i = 1; i <= runs; i++) {
@@ -11,6 +11,7 @@ export interface SingleBenchmarkCliOptions {
11
11
  'dataframe-shape-inference': boolean;
12
12
  'enable-pointer-tracking': boolean;
13
13
  'max-slices': number;
14
+ 'cfg': boolean;
14
15
  threshold?: number;
15
16
  'sampling-strategy': string;
16
17
  seed?: string;
@@ -34,7 +34,7 @@ async function benchmark() {
34
34
  const prefix = `[${options.input}${options['file-id'] !== undefined ? ` (file ${options['file-id']}, run ${options['run-num']})` : ''}]`;
35
35
  console.log(`${prefix} Appending output to ${options.output}`);
36
36
  const directory = path_1.default.parse(options.output).dir;
37
- // ensure the directory exists if path contains one
37
+ // ensure the directory exists if the path contains one
38
38
  if (directory !== '') {
39
39
  fs_1.default.mkdirSync(directory, { recursive: true });
40
40
  }
@@ -69,9 +69,10 @@ async function benchmark() {
69
69
  (0, assert_1.guard)(count >= 0, `Number of slices exceeded limit of ${maxSlices} with ${-count} slices, skipping in count`);
70
70
  (0, assert_1.guard)(count > 0, `No possible slices found for ${options.input}, skipping in count`);
71
71
  }
72
- if (options['dataframe-shape-inference']) {
73
- console.log(`${prefix} Extracting control flow graph for data frame shape inference`);
72
+ if (options['cfg'] || options['dataframe-shape-inference']) {
74
73
  slicer.extractCFG();
74
+ }
75
+ if (options['dataframe-shape-inference']) {
75
76
  console.log(`${prefix} Performing shape inference for data frames`);
76
77
  slicer.inferDataFrameShapes();
77
78
  console.log(`${prefix} Completed data frame shape inference`);
@@ -28,6 +28,7 @@ exports.benchmarkOptions = [
28
28
  { name: 'threshold', alias: 't', type: Number, description: 'How many re-visits of the same node are ok?', defaultValue: undefined, typeLabel: '{underline number}' },
29
29
  { name: 'per-file-time-limit', type: Number, description: 'Time limit in milliseconds to process single file (disabled by default)', defaultValue: undefined, typeLabel: '{underline number}' },
30
30
  { name: 'sampling-strategy', type: String, description: 'Which strategy to use, when sampling is enabled', defaultValue: 'random', typeLabel: '{underline random/equidistant}' },
31
+ { name: 'cfg', alias: 'c', type: Boolean, description: 'Extract the control flow graph of the file (benchmark it too)' }
31
32
  ];
32
33
  exports.benchmarkHelperOptions = [
33
34
  { name: 'verbose', alias: 'v', type: Boolean, description: 'Run with verbose logging [do not use for the real benchmark as this affects the time measurements, but only to find errors]' },
@@ -36,6 +37,7 @@ exports.benchmarkHelperOptions = [
36
37
  { name: 'file-id', alias: 'd', type: Number, description: 'A numeric file id that can be used to match an input and run-num to a file' },
37
38
  { name: 'run-num', alias: 'r', type: Number, description: 'The n-th time that the file with the given file-id is being benchmarked' },
38
39
  { name: 'slice', alias: 's', type: String, description: 'Automatically slice for *all* variables (default) or *no* slicing and only parsing/dataflow construction. Numbers will indicate: sample X random slices from all.', defaultValue: 'all', typeLabel: '{underline all/no}' },
40
+ { name: 'cfg', alias: 'c', type: Boolean, description: 'Extract the control flow graph of the file (benchmark it too)' },
39
41
  { name: 'output', alias: 'o', type: String, description: 'File to write the measurements to (appends a single line in JSON format)', typeLabel: '{underline file}' },
40
42
  { name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' },
41
43
  { name: 'dataframe-shape-inference', type: Boolean, description: 'Infer the shape of data frames using abstract interpretation (includes control flow graph extraction)', defaultValue: false },
@@ -59,7 +59,7 @@ async function processQueryArgs(line, parser, output, config) {
59
59
  }
60
60
  const processed = await getDataflow(config, parser, args.join(' '));
61
61
  return {
62
- query: (0, query_1.executeQueries)({ dataflow: processed.dataflow, ast: processed.normalize, config: config }, parsedQuery),
62
+ query: await Promise.resolve((0, query_1.executeQueries)({ dataflow: processed.dataflow, ast: processed.normalize, config: config }, parsedQuery)),
63
63
  processed
64
64
  };
65
65
  }
@@ -348,11 +348,20 @@ class FlowRServerConnection {
348
348
  const { dataflow: dfg, normalize: ast } = fileInformation.pipeline.getResults(true);
349
349
  (0, assert_1.guard)(dfg !== undefined, `Dataflow graph must be present (request: ${request.filetoken})`);
350
350
  (0, assert_1.guard)(ast !== undefined, `AST must be present (request: ${request.filetoken})`);
351
- const results = (0, query_1.executeQueries)({ dataflow: dfg, ast, config: this.config }, request.query);
352
- (0, send_1.sendMessage)(this.socket, {
353
- type: 'response-query',
354
- id: request.id,
355
- results
351
+ void Promise.resolve((0, query_1.executeQueries)({ dataflow: dfg, ast, config: this.config }, request.query)).then(results => {
352
+ (0, send_1.sendMessage)(this.socket, {
353
+ type: 'response-query',
354
+ id: request.id,
355
+ results
356
+ });
357
+ }).catch(e => {
358
+ this.logger.error(`[${this.name}] Error while executing query: ${String(e)}`);
359
+ (0, send_1.sendMessage)(this.socket, {
360
+ id: request.id,
361
+ type: 'error',
362
+ fatal: false,
363
+ reason: `Error while executing query: ${String(e)}`
364
+ });
356
365
  });
357
366
  }
358
367
  }
@@ -1,4 +1,4 @@
1
- import type { CfgBasicBlockVertex, CfgEndMarkerVertex, CfgExpressionVertex, CfgMidMarkerVertex, CfgSimpleVertex, CfgStatementVertex, ControlFlowInformation } from './control-flow-graph';
1
+ import type { CfgBasicBlockVertex, CfgEndMarkerVertex, CfgExpressionVertex, CfgSimpleVertex, CfgStatementVertex, ControlFlowInformation } from './control-flow-graph';
2
2
  import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id';
3
3
  export interface BasicCfgGuidedVisitorConfiguration<ControlFlow extends ControlFlowInformation = ControlFlowInformation> {
4
4
  readonly controlFlow: ControlFlow;
@@ -34,6 +34,5 @@ export declare class BasicCfgGuidedVisitor<ControlFlow extends ControlFlowInform
34
34
  protected onBasicBlockNode(node: CfgBasicBlockVertex): void;
35
35
  protected onStatementNode(_node: CfgStatementVertex): void;
36
36
  protected onExpressionNode(_node: CfgExpressionVertex): void;
37
- protected onMidMarkerNode(_node: CfgMidMarkerVertex): void;
38
37
  protected onEndMarkerNode(_node: CfgEndMarkerVertex): void;
39
38
  }
@@ -72,9 +72,6 @@ class BasicCfgGuidedVisitor {
72
72
  case control_flow_graph_1.CfgVertexType.Expression:
73
73
  this.onExpressionNode(vertex);
74
74
  break;
75
- case control_flow_graph_1.CfgVertexType.MidMarker:
76
- this.onMidMarkerNode(vertex);
77
- break;
78
75
  case control_flow_graph_1.CfgVertexType.EndMarker:
79
76
  this.onEndMarkerNode(vertex);
80
77
  break;
@@ -103,9 +100,6 @@ class BasicCfgGuidedVisitor {
103
100
  onExpressionNode(_node) {
104
101
  /* does nothing by default */
105
102
  }
106
- onMidMarkerNode(_node) {
107
- /* does nothing by default */
108
- }
109
103
  onEndMarkerNode(_node) {
110
104
  /* does nothing by default */
111
105
  }
@@ -1,6 +1,7 @@
1
1
  import type { ControlFlowInformation } from './control-flow-graph';
2
2
  import type { NormalizedAst } from '../r-bridge/lang-4.x/ast/model/processing/decorate';
3
3
  import type { DataflowGraph } from '../dataflow/graph/graph';
4
+ import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id';
4
5
  import { cfgAnalyzeDeadCode } from './cfg-dead-code';
5
6
  import type { FlowrConfigOptions } from '../config';
6
7
  export interface CfgPassInfo {
@@ -28,4 +29,9 @@ export declare function simplifyControlFlowInformation(cfg: ControlFlowInformati
28
29
  declare function cfgRemoveDeadCode(cfg: ControlFlowInformation, _info?: CfgPassInfo): ControlFlowInformation;
29
30
  declare function uniqueControlFlowSets(cfg: ControlFlowInformation, _info?: CfgPassInfo): ControlFlowInformation;
30
31
  declare function toBasicBlocks(cfg: ControlFlowInformation, _info?: CfgPassInfo): ControlFlowInformation;
32
+ /**
33
+ * Uses {@link visitCfgInOrder} to find all nodes that are reachable from the control flow graph's {@link ControlFlowInformation.entryPoints} and returns them as a set.
34
+ * @param cfg - The control flow graph whose reachable nodes to find.
35
+ */
36
+ export declare function cfgFindAllReachable(cfg: ControlFlowInformation): Set<NodeId>;
31
37
  export {};