@eagleoutice/flowr 2.2.16 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +35 -19
  2. package/abstract-interpretation/data-frame/absint-info.d.ts +109 -0
  3. package/abstract-interpretation/data-frame/absint-info.js +31 -0
  4. package/abstract-interpretation/data-frame/absint-visitor.d.ts +59 -0
  5. package/abstract-interpretation/data-frame/absint-visitor.js +173 -0
  6. package/abstract-interpretation/data-frame/domain.d.ts +107 -0
  7. package/abstract-interpretation/data-frame/domain.js +315 -0
  8. package/abstract-interpretation/data-frame/mappers/access-mapper.d.ts +17 -0
  9. package/abstract-interpretation/data-frame/mappers/access-mapper.js +166 -0
  10. package/abstract-interpretation/data-frame/mappers/arguments.d.ts +117 -0
  11. package/abstract-interpretation/data-frame/mappers/arguments.js +188 -0
  12. package/abstract-interpretation/data-frame/mappers/assignment-mapper.d.ts +20 -0
  13. package/abstract-interpretation/data-frame/mappers/assignment-mapper.js +34 -0
  14. package/abstract-interpretation/data-frame/mappers/function-mapper.d.ts +261 -0
  15. package/abstract-interpretation/data-frame/mappers/function-mapper.js +1219 -0
  16. package/abstract-interpretation/data-frame/mappers/replacement-mapper.d.ts +12 -0
  17. package/abstract-interpretation/data-frame/mappers/replacement-mapper.js +206 -0
  18. package/abstract-interpretation/data-frame/resolve-args.d.ts +42 -0
  19. package/abstract-interpretation/data-frame/resolve-args.js +118 -0
  20. package/abstract-interpretation/data-frame/semantics.d.ts +213 -0
  21. package/abstract-interpretation/data-frame/semantics.js +366 -0
  22. package/abstract-interpretation/data-frame/shape-inference.d.ts +38 -0
  23. package/abstract-interpretation/data-frame/shape-inference.js +117 -0
  24. package/benchmark/slicer.d.ts +15 -1
  25. package/benchmark/slicer.js +135 -0
  26. package/benchmark/stats/print.js +123 -45
  27. package/benchmark/stats/size-of.d.ts +7 -0
  28. package/benchmark/stats/size-of.js +1 -0
  29. package/benchmark/stats/stats.d.ts +30 -1
  30. package/benchmark/stats/stats.js +4 -2
  31. package/benchmark/summarizer/data.d.ts +33 -2
  32. package/benchmark/summarizer/first-phase/input.js +5 -1
  33. package/benchmark/summarizer/first-phase/process.js +47 -1
  34. package/benchmark/summarizer/second-phase/process.js +101 -3
  35. package/cli/benchmark-app.d.ts +1 -0
  36. package/cli/benchmark-app.js +1 -0
  37. package/cli/benchmark-helper-app.d.ts +1 -0
  38. package/cli/benchmark-helper-app.js +8 -2
  39. package/cli/common/options.js +2 -0
  40. package/config.d.ts +31 -0
  41. package/config.js +21 -1
  42. package/control-flow/control-flow-graph.d.ts +1 -0
  43. package/control-flow/control-flow-graph.js +4 -0
  44. package/control-flow/dfg-cfg-guided-visitor.js +1 -1
  45. package/control-flow/semantic-cfg-guided-visitor.d.ts +1 -1
  46. package/control-flow/semantic-cfg-guided-visitor.js +1 -1
  47. package/dataflow/environments/built-in.d.ts +5 -3
  48. package/dataflow/environments/built-in.js +3 -1
  49. package/dataflow/eval/resolve/alias-tracking.js +2 -2
  50. package/dataflow/eval/resolve/resolve.d.ts +53 -9
  51. package/dataflow/eval/resolve/resolve.js +132 -38
  52. package/dataflow/internal/process/functions/call/built-in/built-in-source.d.ts +1 -0
  53. package/dataflow/internal/process/functions/call/built-in/built-in-source.js +4 -0
  54. package/documentation/doc-util/doc-query.js +10 -0
  55. package/documentation/print-interface-wiki.js +11 -0
  56. package/documentation/print-linter-wiki.js +4 -0
  57. package/documentation/print-query-wiki.js +17 -0
  58. package/linter/linter-rules.d.ts +25 -2
  59. package/linter/linter-rules.js +3 -1
  60. package/linter/rules/absolute-path.d.ts +1 -1
  61. package/linter/rules/dataframe-access-validation.d.ts +53 -0
  62. package/linter/rules/dataframe-access-validation.js +116 -0
  63. package/linter/rules/naming-convention.d.ts +1 -1
  64. package/linter/rules/naming-convention.js +5 -1
  65. package/package.json +2 -2
  66. package/queries/catalog/df-shape-query/df-shape-query-executor.d.ts +3 -0
  67. package/queries/catalog/df-shape-query/df-shape-query-executor.js +46 -0
  68. package/queries/catalog/df-shape-query/df-shape-query-format.d.ts +72 -0
  69. package/queries/catalog/df-shape-query/df-shape-query-format.js +31 -0
  70. package/queries/query.d.ts +61 -1
  71. package/queries/query.js +2 -0
  72. package/util/files.d.ts +8 -2
  73. package/util/files.js +22 -4
  74. package/util/r-value.d.ts +23 -0
  75. package/util/r-value.js +113 -0
  76. package/util/version.js +1 -1
  77. package/util/cfg/cfg.d.ts +0 -0
  78. package/util/cfg/cfg.js +0 -2
@@ -9,6 +9,8 @@ const defaultmap_1 = require("../../../util/collections/defaultmap");
9
9
  const summarizer_1 = require("../../../util/summarizer");
10
10
  const assert_1 = require("../../../util/assert");
11
11
  const stats_1 = require("../../stats/stats");
12
+ const semantics_1 = require("../../../abstract-interpretation/data-frame/semantics");
13
+ const arrays_1 = require("../../../util/collections/arrays");
12
14
  function summarizeAllSummarizedStats(stats) {
13
15
  const commonMeasurements = new defaultmap_1.DefaultMap(() => []);
14
16
  const perSliceMeasurements = new defaultmap_1.DefaultMap(() => []);
@@ -19,11 +21,14 @@ function summarizeAllSummarizedStats(stats) {
19
21
  const normalizeTimesPerToken = [];
20
22
  const dataflowTimesPerToken = [];
21
23
  const totalCommonTimesPerToken = [];
24
+ const controlFlowTimePerToken = [];
25
+ const dataFrameShapeTimePerToken = [];
22
26
  const memory = new defaultmap_1.DefaultMap(() => []);
23
27
  const reductions = [];
24
28
  const reductionsNoFluff = [];
25
29
  const inputs = [];
26
30
  const dataflows = [];
31
+ const dataFrameShapes = [];
27
32
  let failedToRepParse = 0;
28
33
  let timesHitThreshold = 0;
29
34
  let totalSlices = 0;
@@ -41,6 +46,12 @@ function summarizeAllSummarizedStats(stats) {
41
46
  normalizeTimesPerToken.push(stat.normalizeTimePerToken);
42
47
  dataflowTimesPerToken.push(stat.dataflowTimePerToken);
43
48
  totalCommonTimesPerToken.push(stat.totalCommonTimePerToken);
49
+ if (stat.controlFlowTimePerToken !== undefined) {
50
+ controlFlowTimePerToken.push(stat.controlFlowTimePerToken);
51
+ }
52
+ if (stat.dataFrameShapeTimePerToken !== undefined) {
53
+ dataFrameShapeTimePerToken.push(stat.dataFrameShapeTimePerToken);
54
+ }
44
55
  for (const [k, v] of stat.memory) {
45
56
  memory.get(k).push(v);
46
57
  }
@@ -48,6 +59,9 @@ function summarizeAllSummarizedStats(stats) {
48
59
  reductionsNoFluff.push(stat.perSliceMeasurements.reductionNoFluff);
49
60
  inputs.push(stat.input);
50
61
  dataflows.push(stat.dataflow);
62
+ if (stat.dataFrameShape !== undefined) {
63
+ dataFrameShapes.push(stat.dataFrameShape);
64
+ }
51
65
  failedToRepParse += stat.perSliceMeasurements.failedToRepParse;
52
66
  totalSlices += stat.perSliceMeasurements.numberOfSlices;
53
67
  timesHitThreshold += stat.perSliceMeasurements.timesHitThreshold;
@@ -64,6 +78,8 @@ function summarizeAllSummarizedStats(stats) {
64
78
  normalizeTimePerToken: (0, process_1.summarizeTimePerToken)(normalizeTimesPerToken),
65
79
  dataflowTimePerToken: (0, process_1.summarizeTimePerToken)(dataflowTimesPerToken),
66
80
  totalCommonTimePerToken: (0, process_1.summarizeTimePerToken)(totalCommonTimesPerToken),
81
+ controlFlowTimePerToken: controlFlowTimePerToken.length > 0 ? (0, process_1.summarizeTimePerToken)(controlFlowTimePerToken) : undefined,
82
+ dataFrameShapeTimePerToken: dataFrameShapeTimePerToken.length > 0 ? (0, process_1.summarizeTimePerToken)(dataFrameShapeTimePerToken) : undefined,
67
83
  failedToRepParse,
68
84
  timesHitThreshold,
69
85
  reduction: (0, process_1.summarizeSummarizedReductions)(reductions),
@@ -89,7 +105,43 @@ function summarizeAllSummarizedStats(stats) {
89
105
  storedVertexIndices: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.storedVertexIndices)),
90
106
  storedEnvIndices: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.storedEnvIndices)),
91
107
  overwrittenIndices: (0, summarizer_1.summarizeMeasurement)(dataflows.map(d => d.overwrittenIndices)),
92
- }
108
+ },
109
+ dataFrameShape: stats.some(s => s.dataFrameShape !== undefined) ? {
110
+ numberOfDataFrameFiles: (0, arrays_1.arraySum)(stats.map(s => s.dataFrameShape?.numberOfDataFrameFiles).filter(assert_1.isNotUndefined)),
111
+ numberOfNonDataFrameFiles: (0, arrays_1.arraySum)(stats.map(s => s.dataFrameShape?.numberOfNonDataFrameFiles).filter(assert_1.isNotUndefined)),
112
+ numberOfResultConstraints: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfResultConstraints).filter(assert_1.isNotUndefined)),
113
+ numberOfResultingValues: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfResultingValues).filter(assert_1.isNotUndefined)),
114
+ numberOfResultingTop: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfResultingTop).filter(assert_1.isNotUndefined)),
115
+ numberOfResultingBottom: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfResultingBottom).filter(assert_1.isNotUndefined)),
116
+ numberOfEmptyNodes: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfEmptyNodes).filter(assert_1.isNotUndefined)),
117
+ numberOfOperationNodes: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfOperationNodes).filter(assert_1.isNotUndefined)),
118
+ numberOfValueNodes: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfValueNodes).filter(assert_1.isNotUndefined)),
119
+ sizeOfInfo: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.sizeOfInfo).filter(assert_1.isNotUndefined)),
120
+ numberOfEntriesPerNode: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfEntriesPerNode).filter(assert_1.isNotUndefined)),
121
+ numberOfOperations: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfOperations).filter(assert_1.isNotUndefined)),
122
+ numberOfTotalValues: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfTotalValues).filter(assert_1.isNotUndefined)),
123
+ numberOfTotalTop: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfTotalTop).filter(assert_1.isNotUndefined)),
124
+ numberOfTotalBottom: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfTotalBottom).filter(assert_1.isNotUndefined)),
125
+ inferredColNames: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.inferredColNames).filter(assert_1.isNotUndefined)),
126
+ numberOfColNamesValues: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColNamesValues).filter(assert_1.isNotUndefined)),
127
+ numberOfColNamesTop: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColNamesTop).filter(assert_1.isNotUndefined)),
128
+ numberOfColNamesBottom: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColNamesBottom).filter(assert_1.isNotUndefined)),
129
+ inferredColCount: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.inferredColCount).filter(assert_1.isNotUndefined)),
130
+ numberOfColCountExact: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColCountExact).filter(assert_1.isNotUndefined)),
131
+ numberOfColCountValues: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColCountValues).filter(assert_1.isNotUndefined)),
132
+ numberOfColCountTop: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColCountTop).filter(assert_1.isNotUndefined)),
133
+ numberOfColCountInfinite: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColCountInfinite).filter(assert_1.isNotUndefined)),
134
+ numberOfColCountBottom: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColCountBottom).filter(assert_1.isNotUndefined)),
135
+ approxRangeColCount: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.approxRangeColCount).filter(assert_1.isNotUndefined)),
136
+ inferredRowCount: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.inferredRowCount).filter(assert_1.isNotUndefined)),
137
+ numberOfRowCountExact: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfRowCountExact).filter(assert_1.isNotUndefined)),
138
+ numberOfRowCountValues: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfRowCountValues).filter(assert_1.isNotUndefined)),
139
+ numberOfRowCountTop: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfRowCountTop).filter(assert_1.isNotUndefined)),
140
+ numberOfRowCountInfinite: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfRowCountInfinite).filter(assert_1.isNotUndefined)),
141
+ numberOfRowCountBottom: (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.numberOfRowCountBottom).filter(assert_1.isNotUndefined)),
142
+ approxRangeRowCount: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.approxRangeRowCount).filter(assert_1.isNotUndefined)),
143
+ perOperationNumber: new Map(semantics_1.DataFrameOperationNames.map(n => [n, (0, summarizer_1.summarizeMeasurement)(stats.map(s => s.dataFrameShape?.perOperationNumber.get(n) ?? 0))]))
144
+ } : undefined
93
145
  };
94
146
  }
95
147
  function summarizeAllUltimateStats(stats) {
@@ -109,6 +161,8 @@ function summarizeAllUltimateStats(stats) {
109
161
  normalizeTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.normalizeTimePerToken)),
110
162
  dataflowTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.dataflowTimePerToken)),
111
163
  totalCommonTimePerToken: (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.totalCommonTimePerToken)),
164
+ controlFlowTimePerToken: stats.some(s => s.controlFlowTimePerToken !== undefined) ? (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.controlFlowTimePerToken).filter(assert_1.isNotUndefined)) : undefined,
165
+ dataFrameShapeTimePerToken: stats.some(s => s.dataFrameShapeTimePerToken !== undefined) ? (0, process_1.summarizeSummarizedTimePerToken)(stats.map(s => s.dataFrameShapeTimePerToken).filter(assert_1.isNotUndefined)) : undefined,
112
166
  reduction: (0, process_1.summarizeSummarizedReductions)(stats.map(s => s.reduction)),
113
167
  reductionNoFluff: (0, process_1.summarizeSummarizedReductions)(stats.map(s => s.reductionNoFluff)),
114
168
  input: {
@@ -132,7 +186,43 @@ function summarizeAllUltimateStats(stats) {
132
186
  storedVertexIndices: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.storedVertexIndices)),
133
187
  storedEnvIndices: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.storedEnvIndices)),
134
188
  overwrittenIndices: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataflow.overwrittenIndices)),
135
- }
189
+ },
190
+ dataFrameShape: stats.some(s => s.dataFrameShape !== undefined) ? {
191
+ numberOfDataFrameFiles: (0, arrays_1.arraySum)(stats.map(s => s.dataFrameShape?.numberOfDataFrameFiles).filter(assert_1.isNotUndefined)),
192
+ numberOfNonDataFrameFiles: (0, arrays_1.arraySum)(stats.map(s => s.dataFrameShape?.numberOfNonDataFrameFiles).filter(assert_1.isNotUndefined)),
193
+ numberOfResultConstraints: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfResultConstraints).filter(assert_1.isNotUndefined)),
194
+ numberOfResultingValues: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfResultingValues).filter(assert_1.isNotUndefined)),
195
+ numberOfResultingTop: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfResultingTop).filter(assert_1.isNotUndefined)),
196
+ numberOfResultingBottom: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfResultingBottom).filter(assert_1.isNotUndefined)),
197
+ numberOfEmptyNodes: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfEmptyNodes).filter(assert_1.isNotUndefined)),
198
+ numberOfOperationNodes: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfOperationNodes).filter(assert_1.isNotUndefined)),
199
+ numberOfValueNodes: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfValueNodes).filter(assert_1.isNotUndefined)),
200
+ sizeOfInfo: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.sizeOfInfo).filter(assert_1.isNotUndefined)),
201
+ numberOfEntriesPerNode: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfEntriesPerNode).filter(assert_1.isNotUndefined)),
202
+ numberOfOperations: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfOperations).filter(assert_1.isNotUndefined)),
203
+ numberOfTotalValues: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfTotalValues).filter(assert_1.isNotUndefined)),
204
+ numberOfTotalTop: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfTotalTop).filter(assert_1.isNotUndefined)),
205
+ numberOfTotalBottom: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfTotalBottom).filter(assert_1.isNotUndefined)),
206
+ inferredColNames: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.inferredColNames).filter(assert_1.isNotUndefined)),
207
+ numberOfColNamesValues: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColNamesValues).filter(assert_1.isNotUndefined)),
208
+ numberOfColNamesTop: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColNamesTop).filter(assert_1.isNotUndefined)),
209
+ numberOfColNamesBottom: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColNamesBottom).filter(assert_1.isNotUndefined)),
210
+ inferredColCount: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.inferredColCount).filter(assert_1.isNotUndefined)),
211
+ numberOfColCountExact: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColCountExact).filter(assert_1.isNotUndefined)),
212
+ numberOfColCountValues: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColCountValues).filter(assert_1.isNotUndefined)),
213
+ numberOfColCountTop: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColCountTop).filter(assert_1.isNotUndefined)),
214
+ numberOfColCountInfinite: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColCountInfinite).filter(assert_1.isNotUndefined)),
215
+ numberOfColCountBottom: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfColCountBottom).filter(assert_1.isNotUndefined)),
216
+ approxRangeColCount: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.approxRangeColCount).filter(assert_1.isNotUndefined)),
217
+ inferredRowCount: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.inferredRowCount).filter(assert_1.isNotUndefined)),
218
+ numberOfRowCountExact: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfRowCountExact).filter(assert_1.isNotUndefined)),
219
+ numberOfRowCountValues: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfRowCountValues).filter(assert_1.isNotUndefined)),
220
+ numberOfRowCountTop: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfRowCountTop).filter(assert_1.isNotUndefined)),
221
+ numberOfRowCountInfinite: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfRowCountInfinite).filter(assert_1.isNotUndefined)),
222
+ numberOfRowCountBottom: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.numberOfRowCountBottom).filter(assert_1.isNotUndefined)),
223
+ approxRangeRowCount: (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.approxRangeRowCount).filter(assert_1.isNotUndefined)),
224
+ perOperationNumber: new Map(semantics_1.DataFrameOperationNames.map(n => [n, (0, process_1.summarizeSummarizedMeasurement)(stats.map(s => s.dataFrameShape?.perOperationNumber.get(n)).filter(assert_1.isNotUndefined))]))
225
+ } : undefined
136
226
  };
137
227
  }
138
228
  function processNextSummary(line, allSummarized) {
@@ -152,7 +242,11 @@ function processNextSummary(line, allSummarized) {
152
242
  ...got.summarize.perSliceMeasurements,
153
243
  // restore maps
154
244
  measurements: new Map(got.summarize.perSliceMeasurements.measurements),
155
- }
245
+ },
246
+ dataFrameShape: got.summarize.dataFrameShape !== undefined ? {
247
+ ...got.summarize.dataFrameShape,
248
+ perOperationNumber: new Map(got.summarize.dataFrameShape.perOperationNumber)
249
+ } : undefined
156
250
  }
157
251
  };
158
252
  allSummarized.push(got.summarize);
@@ -164,6 +258,10 @@ function processNextUltimateSummary(line, allSummarized) {
164
258
  // restore maps
165
259
  commonMeasurements: new Map(got.commonMeasurements),
166
260
  perSliceMeasurements: new Map(got.perSliceMeasurements),
261
+ dataFrameShape: got.dataFrameShape !== undefined ? {
262
+ ...got.dataFrameShape,
263
+ perOperationNumber: new Map(got.dataFrameShape.perOperationNumber)
264
+ } : undefined
167
265
  };
168
266
  allSummarized.push(got);
169
267
  }
@@ -10,6 +10,7 @@ export interface BenchmarkCliOptions {
10
10
  runs?: number;
11
11
  seed?: string;
12
12
  parser: KnownParserName;
13
+ 'dataframe-shape-inference': boolean;
13
14
  'enable-pointer-tracking': boolean;
14
15
  'max-file-slices': number;
15
16
  threshold?: number;
@@ -69,6 +69,7 @@ async function benchmark() {
69
69
  '--output', path_1.default.join(options.output, path_1.default.relative(f.baseDir, `${f.request.content}.json`)),
70
70
  '--slice', options.slice, ...verboseAdd,
71
71
  '--parser', options.parser,
72
+ ...(options['dataframe-shape-inference'] ? ['--dataframe-shape-inference'] : []),
72
73
  ...(options['enable-pointer-tracking'] ? ['--enable-pointer-tracking'] : []),
73
74
  '--max-slices', `${options['max-file-slices']}`,
74
75
  ...(options.threshold ? ['--threshold', `${options.threshold}`] : []),
@@ -8,6 +8,7 @@ export interface SingleBenchmarkCliOptions {
8
8
  slice: string;
9
9
  output?: string;
10
10
  parser: KnownParserName;
11
+ 'dataframe-shape-inference': boolean;
11
12
  'enable-pointer-tracking': boolean;
12
13
  'max-slices': number;
13
14
  threshold?: number;
@@ -39,8 +39,7 @@ async function benchmark() {
39
39
  fs_1.default.mkdirSync(directory, { recursive: true });
40
40
  }
41
41
  // Enable pointer analysis if requested, otherwise disable it
42
- const config = (0, config_1.getConfig)();
43
- (0, config_1.amendConfig)(config, c => {
42
+ const config = (0, config_1.amendConfig)((0, config_1.getConfig)(), c => {
44
43
  c.solver.pointerTracking = options['enable-pointer-tracking'];
45
44
  return c;
46
45
  });
@@ -70,6 +69,13 @@ async function benchmark() {
70
69
  (0, assert_1.guard)(count >= 0, `Number of slices exceeded limit of ${maxSlices} with ${-count} slices, skipping in count`);
71
70
  (0, assert_1.guard)(count > 0, `No possible slices found for ${options.input}, skipping in count`);
72
71
  }
72
+ if (options['dataframe-shape-inference']) {
73
+ console.log(`${prefix} Extracting control flow graph for data frame shape inference`);
74
+ slicer.extractCFG();
75
+ console.log(`${prefix} Performing shape inference for data frames`);
76
+ slicer.inferDataFrameShapes();
77
+ console.log(`${prefix} Completed data frame shape inference`);
78
+ }
73
79
  const { stats } = slicer.finish();
74
80
  const output = {
75
81
  filename: options.input,
@@ -22,6 +22,7 @@ exports.benchmarkOptions = [
22
22
  { name: 'slice', alias: 's', type: String, description: 'Automatically slice for *all* variables (default) or *no* slicing and only parsing/dataflow construction. Numbers will indicate: sample X random slices from all.', defaultValue: 'all', typeLabel: '{underline all/no}' },
23
23
  { name: 'output', alias: 'o', type: String, description: `Folder to write all the measurements to in a per-file-basis (defaults to {italic benchmark-${StartTimeString}})`, defaultValue: `benchmark-${StartTimeString}`, typeLabel: '{underline folder}' },
24
24
  { name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' },
25
+ { name: 'dataframe-shape-inference', type: Boolean, description: 'Infer the shape of data frames using abstract interpretation (includes control flow graph extraction)', defaultValue: false },
25
26
  { name: 'enable-pointer-tracking', type: Boolean, description: 'Run dataflow analysis with pointer tracking', defaultValue: false },
26
27
  { name: 'max-file-slices', type: Number, description: 'If file has more than passed number of slices, the file is not processed', defaultValue: -1, typeLabel: '{underline number}' },
27
28
  { name: 'threshold', alias: 't', type: Number, description: 'How many re-visits of the same node are ok?', defaultValue: undefined, typeLabel: '{underline number}' },
@@ -37,6 +38,7 @@ exports.benchmarkHelperOptions = [
37
38
  { name: 'slice', alias: 's', type: String, description: 'Automatically slice for *all* variables (default) or *no* slicing and only parsing/dataflow construction. Numbers will indicate: sample X random slices from all.', defaultValue: 'all', typeLabel: '{underline all/no}' },
38
39
  { name: 'output', alias: 'o', type: String, description: 'File to write the measurements to (appends a single line in JSON format)', typeLabel: '{underline file}' },
39
40
  { name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' },
41
+ { name: 'dataframe-shape-inference', type: Boolean, description: 'Infer the shape of data frames using abstract interpretation (includes control flow graph extraction)', defaultValue: false },
40
42
  { name: 'enable-pointer-tracking', type: Boolean, description: 'Run dataflow analysis with pointer tracking', defaultValue: false },
41
43
  { name: 'max-slices', type: Number, description: 'If file has more than passed number of slices, the file is not processed', defaultValue: -1, typeLabel: '{underline number}' },
42
44
  { name: 'threshold', alias: 't', type: Number, description: 'How many re-visits of the same node are ok?', defaultValue: undefined, typeLabel: '{underline number}' },
package/config.d.ts CHANGED
@@ -145,6 +145,37 @@ export interface FlowrConfigOptions extends MergeableRecord {
145
145
  readonly threshold?: number;
146
146
  };
147
147
  };
148
+ /**
149
+ * Configuration options for abstract interpretation
150
+ */
151
+ readonly abstractInterpretation: {
152
+ /**
153
+ * The configuration of the shape inference for data frames
154
+ */
155
+ readonly dataFrame: {
156
+ /**
157
+ * The maximum number of columns names to infer for data frames before over-approximating the column names to top
158
+ */
159
+ readonly maxColNames: number;
160
+ /**
161
+ * The threshold for the number of visitations of a node at which widening should be performed to ensure the termination of the fixpoint iteration
162
+ */
163
+ readonly wideningThreshold: number;
164
+ /**
165
+ * Configuration options for reading data frame shapes from loaded external data files, such as CSV files
166
+ */
167
+ readonly readLoadedData: {
168
+ /**
169
+ * Whether data frame shapes should be extracted from loaded external data files, such as CSV files
170
+ */
171
+ readonly readExternalFiles: boolean;
172
+ /**
173
+ * The maximum number of lines to read when extracting data frame shapes from loaded files, such as CSV files
174
+ */
175
+ readonly maxReadLines: number;
176
+ };
177
+ };
178
+ };
148
179
  }
149
180
  export interface TreeSitterEngineConfig extends MergeableRecord {
150
181
  readonly type: 'tree-sitter';
package/config.js CHANGED
@@ -81,6 +81,16 @@ exports.defaultConfigOptions = {
81
81
  slicer: {
82
82
  threshold: 50
83
83
  }
84
+ },
85
+ abstractInterpretation: {
86
+ dataFrame: {
87
+ maxColNames: 50,
88
+ wideningThreshold: 4,
89
+ readLoadedData: {
90
+ readExternalFiles: true,
91
+ maxReadLines: 1e6
92
+ }
93
+ }
84
94
  }
85
95
  };
86
96
  exports.flowrConfigFileSchema = joi_1.default.object({
@@ -120,7 +130,17 @@ exports.flowrConfigFileSchema = joi_1.default.object({
120
130
  slicer: joi_1.default.object({
121
131
  threshold: joi_1.default.number().optional().description('The maximum number of iterations to perform on a single function call during slicing.')
122
132
  }).optional().description('The configuration for the slicer.')
123
- }).description('How to resolve constants, constraints, cells, ...')
133
+ }).description('How to resolve constants, constraints, cells, ...'),
134
+ abstractInterpretation: joi_1.default.object({
135
+ dataFrame: joi_1.default.object({
136
+ maxColNames: joi_1.default.number().min(0).description('The maximum number of columns names to infer for data frames before over-approximating the column names to top.'),
137
+ wideningThreshold: joi_1.default.number().min(1).description('The threshold for the number of visitations of a node at which widening should be performed to ensure the termination of the fixpoint iteration.'),
138
+ readLoadedData: joi_1.default.object({
139
+ readExternalFiles: joi_1.default.boolean().description('Whether data frame shapes should be extracted from loaded external files, such as CSV files.'),
140
+ maxReadLines: joi_1.default.number().min(1).description('The maximum number of lines to read when extracting data frame shapes from loaded files, such as CSV files.')
141
+ }).description('Configuration options for reading data frame shapes from loaded external data files, such as CSV files.')
142
+ }).description('The configuration of the shape inference for data frames.')
143
+ }).description('The configuration options for abstract interpretation.')
124
144
  }).description('The configuration file format for flowR.');
125
145
  function parseConfig(jsonString) {
126
146
  try {
@@ -68,6 +68,7 @@ export interface CfgBasicBlockVertex extends CfgBaseVertex {
68
68
  export type CfgSimpleVertex = CfgStatementVertex | CfgExpressionVertex | CfgBasicBlockVertex | CfgMidMarkerVertex | CfgEndMarkerVertex;
69
69
  export declare function equalVertex(a: CfgSimpleVertex, b: CfgSimpleVertex): boolean;
70
70
  export declare function isMarkerVertex(vertex: CfgSimpleVertex): vertex is CfgMidMarkerVertex | CfgEndMarkerVertex;
71
+ export declare function getVertexRootId(vertex: CfgSimpleVertex): NodeId;
71
72
  interface CfgFlowDependencyEdge extends MergeableRecord {
72
73
  label: CfgEdgeType.Fd;
73
74
  }
@@ -4,6 +4,7 @@ exports.ControlFlowGraph = exports.CfgVertexType = void 0;
4
4
  exports.edgeTypeToString = edgeTypeToString;
5
5
  exports.equalVertex = equalVertex;
6
6
  exports.isMarkerVertex = isMarkerVertex;
7
+ exports.getVertexRootId = getVertexRootId;
7
8
  exports.emptyControlFlowInformation = emptyControlFlowInformation;
8
9
  const assert_1 = require("../util/assert");
9
10
  var CfgVertexType;
@@ -47,6 +48,9 @@ function equalVertex(a, b) {
47
48
  function isMarkerVertex(vertex) {
48
49
  return vertex.type === CfgVertexType.MidMarker || vertex.type === CfgVertexType.EndMarker;
49
50
  }
51
+ function getVertexRootId(vertex) {
52
+ return isMarkerVertex(vertex) ? vertex.root : vertex.id;
53
+ }
50
54
  /**
51
55
  * This class represents the control flow graph of an R program.
52
56
  * The control flow may be hierarchical when confronted with function definitions (see {@link CfgSimpleVertex} and {@link CFG#rootVertexIds|rootVertexIds()}).
@@ -36,7 +36,7 @@ class DataflowAwareCfgGuidedVisitor extends basic_cfg_guided_visitor_1.BasicCfgG
36
36
  }
37
37
  }
38
38
  visitDataflowNode(node) {
39
- const dfgVertex = this.getDataflowGraph((0, control_flow_graph_1.isMarkerVertex)(node) ? node.root : node.id);
39
+ const dfgVertex = this.getDataflowGraph((0, control_flow_graph_1.getVertexRootId)(node));
40
40
  if (!dfgVertex) {
41
41
  this.visitUnknown(node);
42
42
  return;
@@ -45,7 +45,7 @@ export declare class SemanticCfgGuidedVisitor<OtherInfo = NoInfo, ControlFlow ex
45
45
  /**
46
46
  * A helper function to get the normalized AST node for the given id or fail if it does not exist.
47
47
  */
48
- protected getNormalizedAst(id: NodeId): RNode<OtherInfo & ParentInformation> | undefined;
48
+ protected getNormalizedAst(id: NodeId | undefined): RNode<OtherInfo & ParentInformation> | undefined;
49
49
  /**
50
50
  * See {@link DataflowAwareCfgGuidedVisitor#visitValue} for the base implementation.
51
51
  * This now dispatches the value to the appropriate event handler based on its type.
@@ -35,7 +35,7 @@ class SemanticCfgGuidedVisitor extends dfg_cfg_guided_visitor_1.DataflowAwareCfg
35
35
  * A helper function to get the normalized AST node for the given id or fail if it does not exist.
36
36
  */
37
37
  getNormalizedAst(id) {
38
- return this.config.normalizedAst.idMap.get(id);
38
+ return id !== undefined ? this.config.normalizedAst.idMap.get(id) : undefined;
39
39
  }
40
40
  /**
41
41
  * See {@link DataflowAwareCfgGuidedVisitor#visitValue} for the base implementation.
@@ -30,7 +30,7 @@ import { processRm } from '../internal/process/functions/call/built-in/built-in-
30
30
  import { processEvalCall } from '../internal/process/functions/call/built-in/built-in-eval';
31
31
  import type { REnvironmentInformation } from './environment';
32
32
  import type { Value } from '../eval/values/r-value';
33
- import { resolveAsVector } from '../eval/resolve/resolve';
33
+ import { resolveAsVector, resolveAsSeq, resolveAsMinus, resolveAsPlus } from '../eval/resolve/resolve';
34
34
  import type { DataflowGraph } from '../graph/graph';
35
35
  import type { VariableResolve } from '../../config';
36
36
  export type BuiltIn = `built-in:${string}`;
@@ -62,7 +62,7 @@ export interface DefaultBuiltInProcessorConfiguration extends ForceArguments {
62
62
  */
63
63
  readonly useAsProcessor?: UseAsProcessors;
64
64
  }
65
- export type BuiltInEvalHandler = (resolve: VariableResolve, a: RNodeWithParent, env: REnvironmentInformation, graph?: DataflowGraph, map?: AstIdMap) => Value;
65
+ export type BuiltInEvalHandler = (resolve: VariableResolve, a: RNodeWithParent, env?: REnvironmentInformation, graph?: DataflowGraph, map?: AstIdMap) => Value;
66
66
  declare function defaultBuiltInProcessor<OtherInfo>(name: RSymbol<OtherInfo & ParentInformation>, args: readonly RFunctionArgument<OtherInfo & ParentInformation>[], rootId: NodeId, data: DataflowProcessorInformation<OtherInfo & ParentInformation>, config: DefaultBuiltInProcessorConfiguration): DataflowInformation;
67
67
  export declare function registerBuiltInFunctions<Config extends object, Proc extends BuiltInIdentifierProcessorWithConfig<Config>>(both: boolean, names: readonly Identifier[], processor: Proc, config: Config): void;
68
68
  export declare const BuiltInProcessorMapper: {
@@ -90,7 +90,9 @@ export declare const BuiltInProcessorMapper: {
90
90
  };
91
91
  export declare const BuiltInEvalHandlerMapper: {
92
92
  readonly 'built-in:c': typeof resolveAsVector;
93
- readonly 'builtin:vector': typeof resolveAsVector;
93
+ readonly 'built-in::': typeof resolveAsSeq;
94
+ readonly 'built-in:+': typeof resolveAsPlus;
95
+ readonly 'built-in:-': typeof resolveAsMinus;
94
96
  };
95
97
  export type BuiltInMappingName = keyof typeof BuiltInProcessorMapper;
96
98
  export type ConfigOfBuiltInMappingName<N extends BuiltInMappingName> = Parameters<typeof BuiltInProcessorMapper[N]>[4];
@@ -144,7 +144,9 @@ exports.BuiltInProcessorMapper = {
144
144
  };
145
145
  exports.BuiltInEvalHandlerMapper = {
146
146
  'built-in:c': resolve_1.resolveAsVector,
147
- 'builtin:vector': resolve_1.resolveAsVector
147
+ 'built-in::': resolve_1.resolveAsSeq,
148
+ 'built-in:+': resolve_1.resolveAsPlus,
149
+ 'built-in:-': resolve_1.resolveAsMinus
148
150
  };
149
151
  exports.BuiltInMemory = new Map();
150
152
  exports.EmptyBuiltInMemory = new Map();
@@ -139,6 +139,8 @@ function resolveIdToValue(id, { environment, graph, idMap, full = true, resolve
139
139
  return r_value_1.Top;
140
140
  }
141
141
  case type_1.RType.FunctionCall:
142
+ case type_1.RType.BinaryOp:
143
+ case type_1.RType.UnaryOp:
142
144
  return (0, set_constants_1.setFrom)((0, resolve_1.resolveNode)(resolve, node, environment, graph, idMap));
143
145
  case type_1.RType.String:
144
146
  case type_1.RType.Number:
@@ -261,8 +263,6 @@ function isNestedInLoop(node, ast) {
261
263
  function trackAliasesInGraph(id, graph, idMap) {
262
264
  idMap ??= graph.idMap;
263
265
  (0, assert_1.guard)(idMap !== undefined, 'The ID map is required to get the lineage of a node');
264
- const start = graph.getVertex(id);
265
- (0, assert_1.guard)(start !== undefined, 'Unable to find start for alias tracking');
266
266
  const queue = new visiting_queue_1.VisitingQueue(25);
267
267
  const clean = (0, environment_1.initializeCleanEnvironments)();
268
268
  const cleanFingerprint = (0, fingerprint_1.envFingerprint)(clean);
@@ -1,7 +1,8 @@
1
1
  import type { AstIdMap, RNodeWithParent } from '../../../r-bridge/lang-4.x/ast/model/processing/decorate';
2
2
  import type { REnvironmentInformation } from '../../environments/environment';
3
3
  import type { DataflowGraph } from '../../graph/graph';
4
- import type { Value } from '../values/r-value';
4
+ import type { Lift, Value, ValueNumber, ValueVector } from '../values/r-value';
5
+ import { Top } from '../values/r-value';
5
6
  import type { VariableResolve } from '../../../config';
6
7
  /**
7
8
  * Helper function used by {@link resolveIdToValue}, please use that instead, if
@@ -22,16 +23,59 @@ export declare function resolveNode(resolve: VariableResolve, a: RNodeWithParent
22
23
  * Helper function used by {@link resolveIdToValue}, please use that instead, if
23
24
  * you want to resolve the value of an identifier / node
24
25
  *
25
- * This function converts an r-node to a Value Vector {@link vectorFrom}
26
- * It also recursively resolves any symbols, values, function calls (only c), in
27
- * order to construct the value of the vector to resolve by calling {@link resolveIdToValue}
28
- * or {@link resolveNode}
26
+ * This function resolves a vector function call `c` to a {@link ValueVector}
27
+ * by recursively resolving the values of the arguments by calling {@link resolveIdToValue}
29
28
  *
30
- * @param a - Node of the vector to resolve
31
- * @param env - Environment to use
32
29
  * @param resolve - Variable resolve mode
30
+ * @param node - Node of the vector function to resolve
31
+ * @param env - Environment to use
33
32
  * @param graph - Dataflow graph
34
- * @param map - Idmap of Dataflow Graph
33
+ * @param map - Id map of the dataflow graph
35
34
  * @returns ValueVector or Top
36
35
  */
37
- export declare function resolveAsVector(resolve: VariableResolve, a: RNodeWithParent, env: REnvironmentInformation, graph?: DataflowGraph, map?: AstIdMap): Value;
36
+ export declare function resolveAsVector(resolve: VariableResolve, node: RNodeWithParent, environment?: REnvironmentInformation, graph?: DataflowGraph, idMap?: AstIdMap): ValueVector<Lift<Value[]>> | typeof Top;
37
+ /**
38
+ * Helper function used by {@link resolveIdToValue}, please use that instead, if
39
+ * you want to resolve the value of an identifier / node
40
+ *
41
+ * This function resolves a binary sequence operator `:` to a {@link ValueVector} of {@link ValueNumber}s
42
+ * by recursively resolving the values of the arguments by calling {@link resolveIdToValue}
43
+ *
44
+ * @param resolve - Variable resolve mode
45
+ * @param operator - Node of the sequence operator to resolve
46
+ * @param env - Environment to use
47
+ * @param graph - Dataflow graph
48
+ * @param map - Id map of the dataflow graph
49
+ * @returns ValueVector of ValueNumbers or Top
50
+ */
51
+ export declare function resolveAsSeq(resolve: VariableResolve, operator: RNodeWithParent, environment?: REnvironmentInformation, graph?: DataflowGraph, idMap?: AstIdMap): ValueVector<Lift<ValueNumber[]>> | typeof Top;
52
+ /**
53
+ * Helper function used by {@link resolveIdToValue}, please use that instead, if
54
+ * you want to resolve the value of an identifier / node
55
+ *
56
+ * This function resolves a unary plus operator `+` to a {@link ValueNumber} or {@link ValueVector} of ValueNumbers
57
+ * by recursively resolving the values of the arguments by calling {@link resolveIdToValue}
58
+ *
59
+ * @param resolve - Variable resolve mode
60
+ * @param operator - Node of the plus operator to resolve
61
+ * @param env - Environment to use
62
+ * @param graph - Dataflow graph
63
+ * @param map - Id map of the dataflow graph
64
+ * @returns ValueNumber, ValueVector of ValueNumbers, or Top
65
+ */
66
+ export declare function resolveAsPlus(resolve: VariableResolve, operator: RNodeWithParent, environment?: REnvironmentInformation, graph?: DataflowGraph, idMap?: AstIdMap): ValueNumber | ValueVector<Lift<ValueNumber[]>> | typeof Top;
67
+ /**
68
+ * Helper function used by {@link resolveIdToValue}, please use that instead, if
69
+ * you want to resolve the value of an identifier / node
70
+ *
71
+ * This function resolves a unary minus operator `-` to a {@link ValueNumber} or {@link ValueVector} of ValueNumbers
72
+ * by recursively resolving the values of the arguments by calling {@link resolveIdToValue}
73
+ *
74
+ * @param resolve - Variable resolve mode
75
+ * @param operator - Node of the minus operator to resolve
76
+ * @param env - Environment to use
77
+ * @param graph - Dataflow graph
78
+ * @param map - Id map of the dataflow graph
79
+ * @returns ValueNumber, ValueVector of ValueNumbers, or Top
80
+ */
81
+ export declare function resolveAsMinus(resolve: VariableResolve, operator: RNodeWithParent, environment?: REnvironmentInformation, graph?: DataflowGraph, idMap?: AstIdMap): ValueNumber | ValueVector<Lift<ValueNumber[]>> | typeof Top;