@eagleoutice/flowr 2.2.16 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -20
- package/abstract-interpretation/data-frame/absint-info.d.ts +109 -0
- package/abstract-interpretation/data-frame/absint-info.js +31 -0
- package/abstract-interpretation/data-frame/absint-visitor.d.ts +58 -0
- package/abstract-interpretation/data-frame/absint-visitor.js +171 -0
- package/abstract-interpretation/data-frame/domain.d.ts +107 -0
- package/abstract-interpretation/data-frame/domain.js +315 -0
- package/abstract-interpretation/data-frame/mappers/access-mapper.d.ts +17 -0
- package/abstract-interpretation/data-frame/mappers/access-mapper.js +166 -0
- package/abstract-interpretation/data-frame/mappers/arguments.d.ts +117 -0
- package/abstract-interpretation/data-frame/mappers/arguments.js +188 -0
- package/abstract-interpretation/data-frame/mappers/assignment-mapper.d.ts +20 -0
- package/abstract-interpretation/data-frame/mappers/assignment-mapper.js +34 -0
- package/abstract-interpretation/data-frame/mappers/function-mapper.d.ts +261 -0
- package/abstract-interpretation/data-frame/mappers/function-mapper.js +1219 -0
- package/abstract-interpretation/data-frame/mappers/replacement-mapper.d.ts +12 -0
- package/abstract-interpretation/data-frame/mappers/replacement-mapper.js +206 -0
- package/abstract-interpretation/data-frame/resolve-args.d.ts +42 -0
- package/abstract-interpretation/data-frame/resolve-args.js +118 -0
- package/abstract-interpretation/data-frame/semantics.d.ts +213 -0
- package/abstract-interpretation/data-frame/semantics.js +363 -0
- package/abstract-interpretation/data-frame/shape-inference.d.ts +38 -0
- package/abstract-interpretation/data-frame/shape-inference.js +111 -0
- package/benchmark/slicer.d.ts +15 -1
- package/benchmark/slicer.js +137 -0
- package/benchmark/stats/print.js +123 -45
- package/benchmark/stats/size-of.d.ts +7 -0
- package/benchmark/stats/size-of.js +1 -0
- package/benchmark/stats/stats.d.ts +30 -1
- package/benchmark/stats/stats.js +4 -2
- package/benchmark/summarizer/data.d.ts +33 -2
- package/benchmark/summarizer/first-phase/input.js +5 -1
- package/benchmark/summarizer/first-phase/process.js +47 -1
- package/benchmark/summarizer/second-phase/graph.js +1 -1
- package/benchmark/summarizer/second-phase/process.js +102 -4
- package/cli/benchmark-app.d.ts +2 -0
- package/cli/benchmark-app.js +2 -0
- package/cli/benchmark-helper-app.d.ts +2 -0
- package/cli/benchmark-helper-app.js +10 -3
- package/cli/common/options.js +4 -0
- package/cli/repl/commands/repl-query.js +1 -1
- package/cli/repl/server/connection.js +14 -5
- package/config.d.ts +31 -0
- package/config.js +21 -1
- package/control-flow/basic-cfg-guided-visitor.d.ts +1 -2
- package/control-flow/basic-cfg-guided-visitor.js +0 -6
- package/control-flow/cfg-simplification.d.ts +6 -0
- package/control-flow/cfg-simplification.js +18 -9
- package/control-flow/control-flow-graph.d.ts +3 -8
- package/control-flow/control-flow-graph.js +5 -6
- package/control-flow/dfg-cfg-guided-visitor.js +1 -1
- package/control-flow/extract-cfg.d.ts +2 -2
- package/control-flow/extract-cfg.js +52 -63
- package/control-flow/semantic-cfg-guided-visitor.d.ts +1 -1
- package/control-flow/semantic-cfg-guided-visitor.js +1 -1
- package/core/steps/all/static-slicing/00-slice.d.ts +7 -1
- package/core/steps/all/static-slicing/00-slice.js +9 -3
- package/core/steps/pipeline/default-pipelines.d.ts +74 -74
- package/dataflow/environments/built-in.d.ts +7 -5
- package/dataflow/environments/built-in.js +16 -13
- package/dataflow/eval/resolve/alias-tracking.js +2 -2
- package/dataflow/eval/resolve/resolve.d.ts +53 -9
- package/dataflow/eval/resolve/resolve.js +132 -38
- package/dataflow/graph/dataflowgraph-builder.js +2 -2
- package/dataflow/graph/graph.js +1 -1
- package/dataflow/graph/invert-dfg.d.ts +2 -0
- package/dataflow/graph/invert-dfg.js +17 -0
- package/dataflow/internal/process/functions/call/built-in/built-in-source.d.ts +1 -0
- package/dataflow/internal/process/functions/call/built-in/built-in-source.js +4 -0
- package/documentation/doc-util/doc-query.js +11 -1
- package/documentation/doc-util/doc-search.js +2 -2
- package/documentation/print-cfg-wiki.js +3 -4
- package/documentation/print-core-wiki.js +2 -2
- package/documentation/print-dataflow-graph-wiki.js +7 -0
- package/documentation/print-faq-wiki.js +4 -0
- package/documentation/print-interface-wiki.js +11 -0
- package/documentation/print-linter-wiki.js +36 -4
- package/documentation/print-linting-and-testing-wiki.js +13 -1
- package/documentation/print-onboarding-wiki.js +4 -0
- package/documentation/print-query-wiki.js +29 -3
- package/linter/linter-executor.js +1 -2
- package/linter/linter-format.d.ts +26 -4
- package/linter/linter-format.js +25 -6
- package/linter/linter-rules.d.ts +63 -12
- package/linter/linter-rules.js +5 -1
- package/linter/rules/absolute-path.d.ts +4 -7
- package/linter/rules/absolute-path.js +9 -6
- package/linter/rules/dataframe-access-validation.d.ts +55 -0
- package/linter/rules/dataframe-access-validation.js +118 -0
- package/linter/rules/dead-code.d.ts +43 -0
- package/linter/rules/dead-code.js +50 -0
- package/linter/rules/deprecated-functions.d.ts +3 -2
- package/linter/rules/deprecated-functions.js +3 -1
- package/linter/rules/file-path-validity.d.ts +4 -4
- package/linter/rules/file-path-validity.js +8 -6
- package/linter/rules/naming-convention.d.ts +5 -4
- package/linter/rules/naming-convention.js +8 -2
- package/linter/rules/seeded-randomness.d.ts +4 -3
- package/linter/rules/seeded-randomness.js +3 -1
- package/linter/rules/unused-definition.d.ts +2 -0
- package/linter/rules/unused-definition.js +3 -1
- package/package.json +2 -2
- package/queries/catalog/dependencies-query/dependencies-query-executor.js +6 -1
- package/queries/catalog/dependencies-query/function-info/read-functions.js +1 -0
- package/queries/catalog/dependencies-query/function-info/write-functions.js +1 -0
- package/queries/catalog/df-shape-query/df-shape-query-executor.d.ts +3 -0
- package/queries/catalog/df-shape-query/df-shape-query-executor.js +46 -0
- package/queries/catalog/df-shape-query/df-shape-query-format.d.ts +72 -0
- package/queries/catalog/df-shape-query/df-shape-query-format.js +31 -0
- package/queries/catalog/linter-query/linter-query-format.js +1 -1
- package/queries/catalog/location-map-query/location-map-query-executor.js +7 -5
- package/queries/catalog/location-map-query/location-map-query-format.d.ts +3 -0
- package/queries/catalog/location-map-query/location-map-query-format.js +1 -0
- package/queries/catalog/search-query/search-query-executor.js +1 -1
- package/queries/catalog/static-slice-query/static-slice-query-executor.js +2 -1
- package/queries/catalog/static-slice-query/static-slice-query-format.d.ts +3 -0
- package/queries/catalog/static-slice-query/static-slice-query-format.js +3 -1
- package/queries/query-print.d.ts +1 -1
- package/queries/query-print.js +0 -1
- package/queries/query.d.ts +77 -6
- package/queries/query.js +26 -11
- package/search/flowr-search-builder.d.ts +6 -6
- package/search/flowr-search-executor.d.ts +2 -2
- package/search/flowr-search-executor.js +1 -1
- package/search/flowr-search.d.ts +13 -8
- package/search/flowr-search.js +21 -0
- package/search/search-executor/search-enrichers.d.ts +87 -20
- package/search/search-executor/search-enrichers.js +44 -5
- package/search/search-executor/search-generators.d.ts +4 -4
- package/search/search-executor/search-generators.js +12 -7
- package/search/search-executor/search-mappers.js +3 -2
- package/search/search-executor/search-transformer.d.ts +3 -3
- package/search/search-executor/search-transformer.js +2 -2
- package/slicing/static/static-slicer.d.ts +4 -2
- package/slicing/static/static-slicer.js +10 -4
- package/util/collections/arrays.d.ts +2 -0
- package/util/collections/arrays.js +9 -0
- package/util/files.d.ts +8 -2
- package/util/files.js +22 -4
- package/util/mermaid/dfg.js +4 -2
- package/util/r-value.d.ts +23 -0
- package/util/r-value.js +113 -0
- package/util/range.d.ts +1 -0
- package/util/range.js +5 -1
- package/util/version.js +1 -1
- package/util/cfg/cfg.d.ts +0 -0
- package/util/cfg/cfg.js +0 -2
package/benchmark/slicer.js
CHANGED
|
@@ -26,6 +26,10 @@ const tree_sitter_executor_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-si
|
|
|
26
26
|
const vertex_1 = require("../dataflow/graph/vertex");
|
|
27
27
|
const arrays_1 = require("../util/collections/arrays");
|
|
28
28
|
const config_1 = require("../config");
|
|
29
|
+
const extract_cfg_1 = require("../control-flow/extract-cfg");
|
|
30
|
+
const absint_info_1 = require("../abstract-interpretation/data-frame/absint-info");
|
|
31
|
+
const domain_1 = require("../abstract-interpretation/data-frame/domain");
|
|
32
|
+
const shape_inference_1 = require("../abstract-interpretation/data-frame/shape-inference");
|
|
29
33
|
/**
|
|
30
34
|
* The logger to be used for benchmarking as a global object.
|
|
31
35
|
*/
|
|
@@ -36,10 +40,12 @@ class BenchmarkSlicer {
|
|
|
36
40
|
perSliceMeasurements = new Map();
|
|
37
41
|
deltas = new Map();
|
|
38
42
|
parserName;
|
|
43
|
+
config;
|
|
39
44
|
stats;
|
|
40
45
|
loadedXml;
|
|
41
46
|
dataflow;
|
|
42
47
|
normalizedAst;
|
|
48
|
+
controlFlow;
|
|
43
49
|
totalStopwatch;
|
|
44
50
|
finished = false;
|
|
45
51
|
// Yes, this is unclean, but we know that we assign the executor during the initialization and this saves us from having to check for nullability every time
|
|
@@ -55,6 +61,7 @@ class BenchmarkSlicer {
|
|
|
55
61
|
*/
|
|
56
62
|
async init(request, config, autoSelectIf, threshold) {
|
|
57
63
|
(0, assert_1.guard)(this.stats === undefined, 'cannot initialize the slicer twice');
|
|
64
|
+
this.config = config;
|
|
58
65
|
// we know these are in sync so we just cast to one of them
|
|
59
66
|
this.parser = await this.commonMeasurements.measure('initialize R session', async () => {
|
|
60
67
|
if (this.parserName === 'r-shell') {
|
|
@@ -258,6 +265,114 @@ class BenchmarkSlicer {
|
|
|
258
265
|
code: stats.reconstructedCode
|
|
259
266
|
};
|
|
260
267
|
}
|
|
268
|
+
/**
|
|
269
|
+
* Extract the control flow graph using {@link extractCFG}
|
|
270
|
+
*/
|
|
271
|
+
extractCFG() {
|
|
272
|
+
exports.benchmarkLogger.trace('try to extract the control flow graph');
|
|
273
|
+
this.guardActive();
|
|
274
|
+
(0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined for control flow extraction');
|
|
275
|
+
(0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined for control flow extraction');
|
|
276
|
+
(0, assert_1.guard)(this.config !== undefined, 'config should be defined for control flow extraction');
|
|
277
|
+
const ast = this.normalizedAst;
|
|
278
|
+
const dfg = this.dataflow.graph;
|
|
279
|
+
const config = this.config;
|
|
280
|
+
this.controlFlow = this.measureSimpleStep('extract control flow graph', () => (0, extract_cfg_1.extractCfg)(ast, config, dfg));
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Infer the shape of data frames using abstract interpretation with {@link inferDataFrameShapes}
|
|
284
|
+
*
|
|
285
|
+
* @returns The statistics of the data frame shape inference
|
|
286
|
+
*/
|
|
287
|
+
inferDataFrameShapes() {
|
|
288
|
+
exports.benchmarkLogger.trace('try to infer shapes for data frames');
|
|
289
|
+
(0, assert_1.guard)(this.stats !== undefined && !this.finished, 'need to call init before, and can not do after finish!');
|
|
290
|
+
(0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined for data frame shape inference');
|
|
291
|
+
(0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined for data frame shape inference');
|
|
292
|
+
(0, assert_1.guard)(this.controlFlow !== undefined, 'controlFlow should be defined for data frame shape inference');
|
|
293
|
+
(0, assert_1.guard)(this.config !== undefined, 'config should be defined for data frame shape inference');
|
|
294
|
+
const ast = this.normalizedAst;
|
|
295
|
+
const dfg = this.dataflow.graph;
|
|
296
|
+
const cfinfo = this.controlFlow;
|
|
297
|
+
const config = this.config;
|
|
298
|
+
const stats = {
|
|
299
|
+
numberOfDataFrameFiles: 0,
|
|
300
|
+
numberOfNonDataFrameFiles: 0,
|
|
301
|
+
numberOfResultConstraints: 0,
|
|
302
|
+
numberOfResultingValues: 0,
|
|
303
|
+
numberOfResultingTop: 0,
|
|
304
|
+
numberOfResultingBottom: 0,
|
|
305
|
+
numberOfEmptyNodes: 0,
|
|
306
|
+
numberOfOperationNodes: 0,
|
|
307
|
+
numberOfValueNodes: 0,
|
|
308
|
+
sizeOfInfo: 0,
|
|
309
|
+
perNodeStats: new Map()
|
|
310
|
+
};
|
|
311
|
+
const result = this.measureSimpleStep('infer data frame shapes', () => (0, shape_inference_1.inferDataFrameShapes)(cfinfo, dfg, ast, config));
|
|
312
|
+
stats.numberOfResultConstraints = result.size;
|
|
313
|
+
for (const value of result.values()) {
|
|
314
|
+
if ((0, domain_1.equalDataFrameDomain)(value, domain_1.DataFrameTop)) {
|
|
315
|
+
stats.numberOfResultingTop++;
|
|
316
|
+
}
|
|
317
|
+
else if ((0, domain_1.equalDataFrameDomain)(value, domain_1.DataFrameBottom)) {
|
|
318
|
+
stats.numberOfResultingBottom++;
|
|
319
|
+
}
|
|
320
|
+
else {
|
|
321
|
+
stats.numberOfResultingValues++;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
(0, visitor_1.visitAst)(this.normalizedAst.ast, (node) => {
|
|
325
|
+
if (node.info.dataFrame === undefined) {
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
stats.sizeOfInfo += (0, size_of_1.safeSizeOf)([node.info.dataFrame]);
|
|
329
|
+
const expression = (0, absint_info_1.hasDataFrameExpressionInfo)(node) ? node.info.dataFrame : undefined;
|
|
330
|
+
const value = node.info.dataFrame.domain?.get(node.info.id);
|
|
331
|
+
// Only store per-node information for nodes representing expressions or nodes with abstract values
|
|
332
|
+
if (expression === undefined && value === undefined) {
|
|
333
|
+
stats.numberOfEmptyNodes++;
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
const nodeStats = {
|
|
337
|
+
numberOfEntries: node.info.dataFrame?.domain?.size ?? 0
|
|
338
|
+
};
|
|
339
|
+
if (expression !== undefined) {
|
|
340
|
+
nodeStats.mappedOperations = expression.operations.map(op => op.operation);
|
|
341
|
+
stats.numberOfOperationNodes++;
|
|
342
|
+
if (value !== undefined) {
|
|
343
|
+
nodeStats.inferredColNames = value.colnames === domain_1.ColNamesTop ? 'top' : value.colnames.length;
|
|
344
|
+
nodeStats.inferredColCount = this.getInferredSize(value.cols);
|
|
345
|
+
nodeStats.inferredRowCount = this.getInferredSize(value.rows);
|
|
346
|
+
nodeStats.approxRangeColCount = value.cols === domain_1.IntervalBottom ? 0 : value.cols[1] - value.cols[0];
|
|
347
|
+
nodeStats.approxRangeRowCount = value.rows === domain_1.IntervalBottom ? 0 : value.rows[1] - value.rows[0];
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
if (value !== undefined) {
|
|
351
|
+
stats.numberOfValueNodes++;
|
|
352
|
+
}
|
|
353
|
+
stats.perNodeStats.set(node.info.id, nodeStats);
|
|
354
|
+
});
|
|
355
|
+
if (stats.numberOfOperationNodes > 0) {
|
|
356
|
+
stats.numberOfDataFrameFiles = 1;
|
|
357
|
+
}
|
|
358
|
+
else {
|
|
359
|
+
stats.numberOfNonDataFrameFiles = 1;
|
|
360
|
+
}
|
|
361
|
+
this.stats.dataFrameShape = stats;
|
|
362
|
+
return stats;
|
|
363
|
+
}
|
|
364
|
+
getInferredSize(value) {
|
|
365
|
+
if ((0, domain_1.equalInterval)(value, domain_1.IntervalTop)) {
|
|
366
|
+
return 'top';
|
|
367
|
+
}
|
|
368
|
+
else if (value === domain_1.IntervalBottom) {
|
|
369
|
+
return 'bottom';
|
|
370
|
+
}
|
|
371
|
+
else if (!isFinite(value[1])) {
|
|
372
|
+
return 'infinite';
|
|
373
|
+
}
|
|
374
|
+
return Math.floor((value[0] + value[1]) / 2);
|
|
375
|
+
}
|
|
261
376
|
/** Bridging the gap between the new internal and the old names for the benchmarking */
|
|
262
377
|
async measureCommonStep(expectedStep, keyToMeasure) {
|
|
263
378
|
const memoryInit = process.memoryUsage();
|
|
@@ -271,6 +386,18 @@ class BenchmarkSlicer {
|
|
|
271
386
|
});
|
|
272
387
|
return result;
|
|
273
388
|
}
|
|
389
|
+
measureSimpleStep(keyToMeasure, measurement) {
|
|
390
|
+
const memoryInit = process.memoryUsage();
|
|
391
|
+
const result = this.commonMeasurements.measure(keyToMeasure, measurement);
|
|
392
|
+
const memoryEnd = process.memoryUsage();
|
|
393
|
+
this.deltas.set(keyToMeasure, {
|
|
394
|
+
heap: memoryEnd.heapUsed - memoryInit.heapUsed,
|
|
395
|
+
rss: memoryEnd.rss - memoryInit.rss,
|
|
396
|
+
external: memoryEnd.external - memoryInit.external,
|
|
397
|
+
buffs: memoryEnd.arrayBuffers - memoryInit.arrayBuffers
|
|
398
|
+
});
|
|
399
|
+
return result;
|
|
400
|
+
}
|
|
274
401
|
async measureSliceStep(expectedStep, measure, keyToMeasure) {
|
|
275
402
|
const { result } = await measure.measureAsync(keyToMeasure, () => this.executor.nextStep(expectedStep));
|
|
276
403
|
return result;
|
|
@@ -330,6 +457,8 @@ class BenchmarkSlicer {
|
|
|
330
457
|
const retrieveTime = Number(this.stats.commonMeasurements.get('retrieve AST from R code'));
|
|
331
458
|
const normalizeTime = Number(this.stats.commonMeasurements.get('normalize R AST'));
|
|
332
459
|
const dataflowTime = Number(this.stats.commonMeasurements.get('produce dataflow information'));
|
|
460
|
+
const controlFlowTime = Number(this.stats.commonMeasurements.get('extract control flow graph'));
|
|
461
|
+
const dataFrameShapeTime = Number(this.stats.commonMeasurements.get('infer data frame shapes'));
|
|
333
462
|
this.stats.retrieveTimePerToken = {
|
|
334
463
|
raw: retrieveTime / this.stats.input.numberOfRTokens,
|
|
335
464
|
normalized: retrieveTime / this.stats.input.numberOfNormalizedTokens
|
|
@@ -346,6 +475,14 @@ class BenchmarkSlicer {
|
|
|
346
475
|
raw: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfRTokens,
|
|
347
476
|
normalized: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfNormalizedTokens
|
|
348
477
|
};
|
|
478
|
+
this.stats.controlFlowTimePerToken = !isNaN(controlFlowTime) ? {
|
|
479
|
+
raw: controlFlowTime / this.stats.input.numberOfRTokens,
|
|
480
|
+
normalized: controlFlowTime / this.stats.input.numberOfNormalizedTokens,
|
|
481
|
+
} : undefined;
|
|
482
|
+
this.stats.dataFrameShapeTimePerToken = !isNaN(dataFrameShapeTime) ? {
|
|
483
|
+
raw: dataFrameShapeTime / this.stats.input.numberOfRTokens,
|
|
484
|
+
normalized: dataFrameShapeTime / this.stats.input.numberOfNormalizedTokens,
|
|
485
|
+
} : undefined;
|
|
349
486
|
return {
|
|
350
487
|
stats: this.stats,
|
|
351
488
|
parse: typeof this.loadedXml === 'string' ? this.loadedXml : JSON.stringify(this.loadedXml),
|
package/benchmark/stats/print.js
CHANGED
|
@@ -84,22 +84,34 @@ function convertNumberToNiceBytes(x) {
|
|
|
84
84
|
function stats2string(stats) {
|
|
85
85
|
let result = `
|
|
86
86
|
Request: ${JSON.stringify(stats.request)}
|
|
87
|
-
Shell init time:
|
|
88
|
-
AST retrieval:
|
|
89
|
-
AST retrieval per token:
|
|
90
|
-
AST retrieval per R token:
|
|
91
|
-
AST normalization:
|
|
92
|
-
AST normalization per token:
|
|
93
|
-
AST normalization per R token
|
|
94
|
-
Dataflow creation:
|
|
95
|
-
Dataflow creation per token:
|
|
96
|
-
Dataflow creation per R token
|
|
97
|
-
Total common time per token:
|
|
98
|
-
Total common time per R token
|
|
99
|
-
|
|
100
|
-
|
|
87
|
+
Shell init time: ${print(stats.commonMeasurements, 'initialize R session')}
|
|
88
|
+
AST retrieval: ${print(stats.commonMeasurements, 'retrieve AST from R code')}
|
|
89
|
+
AST retrieval per token: ${formatNanoseconds(stats.retrieveTimePerToken.normalized)}
|
|
90
|
+
AST retrieval per R token: ${formatNanoseconds(stats.retrieveTimePerToken.raw)}
|
|
91
|
+
AST normalization: ${print(stats.commonMeasurements, 'normalize R AST')}
|
|
92
|
+
AST normalization per token: ${formatNanoseconds(stats.normalizeTimePerToken.normalized)}
|
|
93
|
+
AST normalization per R token: ${formatNanoseconds(stats.normalizeTimePerToken.raw)}
|
|
94
|
+
Dataflow creation: ${print(stats.commonMeasurements, 'produce dataflow information')}
|
|
95
|
+
Dataflow creation per token: ${formatNanoseconds(stats.dataflowTimePerToken.normalized)}
|
|
96
|
+
Dataflow creation per R token: ${formatNanoseconds(stats.dataflowTimePerToken.raw)}
|
|
97
|
+
Total common time per token: ${formatNanoseconds(stats.totalCommonTimePerToken.normalized)}
|
|
98
|
+
Total common time per R token: ${formatNanoseconds(stats.totalCommonTimePerToken.raw)}`;
|
|
99
|
+
if (stats.commonMeasurements.has('extract control flow graph') && stats.controlFlowTimePerToken !== undefined) {
|
|
100
|
+
result += `
|
|
101
|
+
Control flow extraction: ${print(stats.commonMeasurements, 'extract control flow graph')}
|
|
102
|
+
Control flow extraction per token: ${formatNanoseconds(stats.controlFlowTimePerToken.normalized)}
|
|
103
|
+
Control flow extraction per R token: ${formatNanoseconds(stats.controlFlowTimePerToken.raw)}`;
|
|
104
|
+
}
|
|
105
|
+
if (stats.commonMeasurements.has('infer data frame shapes') && stats.dataFrameShapeTimePerToken !== undefined) {
|
|
106
|
+
result += `
|
|
107
|
+
Dataframe shape inference: ${print(stats.commonMeasurements, 'infer data frame shapes')}
|
|
108
|
+
Dataframe shape inference per token: ${formatNanoseconds(stats.dataFrameShapeTimePerToken.normalized)}
|
|
109
|
+
Dataframe shape inference per R token:${formatNanoseconds(stats.dataFrameShapeTimePerToken.raw)}`;
|
|
110
|
+
}
|
|
101
111
|
if (stats.perSliceMeasurements.numberOfSlices > 0) {
|
|
102
112
|
result += `
|
|
113
|
+
|
|
114
|
+
Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.perSliceMeasurements.numberOfSlices !== 1 ? 's' : ''}:
|
|
103
115
|
Total: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'total')}
|
|
104
116
|
Slice creation: ${printSummarizedMeasurements(stats.perSliceMeasurements, 'static slicing')}
|
|
105
117
|
Slice creation per token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.sliceTimePerToken.normalized)}
|
|
@@ -110,7 +122,7 @@ Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.pe
|
|
|
110
122
|
Total per token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.normalized)}
|
|
111
123
|
Total per R token in slice: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.totalPerSliceTimePerToken.raw)}
|
|
112
124
|
Used Slice Criteria Sizes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceCriteriaSizes)}
|
|
113
|
-
Result Slice Sizes:
|
|
125
|
+
Result Slice Sizes:
|
|
114
126
|
Number of lines: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.lines)}
|
|
115
127
|
Number of non-empty lines: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.nonEmptyLines)}
|
|
116
128
|
Number of characters: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.characters)}
|
|
@@ -120,12 +132,12 @@ Slicing summary for ${stats.perSliceMeasurements.numberOfSlices} slice${stats.pe
|
|
|
120
132
|
Number of R tokens (w/o comments): ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.tokensNoComments)}
|
|
121
133
|
Normalized R tokens: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokens)}
|
|
122
134
|
Normalized R tokens (w/o comments): ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.normalizedTokensNoComments)}
|
|
123
|
-
Number of dataflow nodes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.dataflowNodes)}
|
|
124
|
-
`;
|
|
135
|
+
Number of dataflow nodes: ${printCountSummarizedMeasurements(stats.perSliceMeasurements.sliceSize.dataflowNodes)}`;
|
|
125
136
|
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
137
|
+
result += `
|
|
138
|
+
|
|
139
|
+
Shell close: ${print(stats.commonMeasurements, 'close R session')}
|
|
140
|
+
Total: ${print(stats.commonMeasurements, 'total')}
|
|
129
141
|
|
|
130
142
|
Input:
|
|
131
143
|
Number of lines: ${pad(stats.input.numberOfLines)}
|
|
@@ -148,9 +160,64 @@ Dataflow:
|
|
|
148
160
|
Number of stored Env indices: ${pad(stats.dataflow.storedEnvIndices)}
|
|
149
161
|
Number of overwritten indices: ${pad(stats.dataflow.overwrittenIndices)}
|
|
150
162
|
Size of graph: ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`;
|
|
163
|
+
if (stats.dataFrameShape !== undefined) {
|
|
164
|
+
result += `
|
|
165
|
+
|
|
166
|
+
Dataframe shape inference:
|
|
167
|
+
Number of resulting constraints:${pad(stats.dataFrameShape.numberOfResultConstraints)}
|
|
168
|
+
Number of operation nodes: ${pad(stats.dataFrameShape.numberOfOperationNodes)}
|
|
169
|
+
Number of abstract value nodes: ${pad(stats.dataFrameShape.numberOfValueNodes)}
|
|
170
|
+
Number of entries per node: ${pad(stats.dataFrameShape.numberOfEntriesPerNode.mean)}
|
|
171
|
+
Number of operations: ${pad(stats.dataFrameShape.numberOfOperations)}
|
|
172
|
+
Number of total values: ${pad(stats.dataFrameShape.numberOfTotalValues)}
|
|
173
|
+
Number of total top: ${pad(stats.dataFrameShape.numberOfTotalTop)}
|
|
174
|
+
Inferred column names per node: ${pad(stats.dataFrameShape.inferredColNames.mean)}
|
|
175
|
+
Number of column names values: ${pad(stats.dataFrameShape.numberOfColNamesValues)}
|
|
176
|
+
Number of column names Top: ${pad(stats.dataFrameShape.numberOfColNamesTop)}
|
|
177
|
+
Inferred column count per node: ${pad(stats.dataFrameShape.inferredColCount.mean)}
|
|
178
|
+
Number of column count values: ${pad(stats.dataFrameShape.numberOfColCountValues)}
|
|
179
|
+
Number of column count Top: ${pad(stats.dataFrameShape.numberOfColCountTop)}
|
|
180
|
+
Number of column count infinite:${pad(stats.dataFrameShape.numberOfColCountInfinite)}
|
|
181
|
+
Inferred row count per node: ${pad(stats.dataFrameShape.inferredRowCount.mean)}
|
|
182
|
+
Number of row count values: ${pad(stats.dataFrameShape.numberOfRowCountValues)}
|
|
183
|
+
Number of row count Top: ${pad(stats.dataFrameShape.numberOfRowCountTop)}
|
|
184
|
+
Number of row count infinite: ${pad(stats.dataFrameShape.numberOfRowCountInfinite)}
|
|
185
|
+
Size of data frame shape info: ${convertNumberToNiceBytes(stats.dataFrameShape.sizeOfInfo)}`;
|
|
186
|
+
}
|
|
187
|
+
return result;
|
|
151
188
|
}
|
|
152
189
|
function ultimateStats2String(stats) {
|
|
153
|
-
|
|
190
|
+
let result = `
|
|
191
|
+
Summarized: ${stats.totalRequests} requests and ${stats.totalSlices} slices
|
|
192
|
+
Shell init time: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('initialize R session'))}
|
|
193
|
+
AST retrieval: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('retrieve AST from R code'))}
|
|
194
|
+
AST retrieval per token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.normalized)}
|
|
195
|
+
AST retrieval per R token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.raw)}
|
|
196
|
+
AST normalization: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('normalize R AST'))}
|
|
197
|
+
AST normalization per token: ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.normalized)}
|
|
198
|
+
AST normalization per R token: ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.raw)}
|
|
199
|
+
Dataflow creation: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('produce dataflow information'))}
|
|
200
|
+
Dataflow creation per token: ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.normalized)}
|
|
201
|
+
Dataflow creation per R token: ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.raw)}
|
|
202
|
+
Total common time per token: ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.normalized)}
|
|
203
|
+
Total common time per R token: ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.raw)}`;
|
|
204
|
+
if (stats.commonMeasurements.has('extract control flow graph') && stats.controlFlowTimePerToken !== undefined) {
|
|
205
|
+
result += `
|
|
206
|
+
Control flow extraction: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('extract control flow graph'))}
|
|
207
|
+
Control flow extraction per token: ${formatSummarizedTimeMeasure(stats.controlFlowTimePerToken.normalized)}
|
|
208
|
+
Control flow extraction per R token: ${formatSummarizedTimeMeasure(stats.controlFlowTimePerToken.raw)}`;
|
|
209
|
+
}
|
|
210
|
+
if (stats.commonMeasurements.has('infer data frame shapes') && stats.dataFrameShapeTimePerToken !== undefined) {
|
|
211
|
+
result += `
|
|
212
|
+
Dataframe shape inference: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('infer data frame shapes'))}
|
|
213
|
+
Dataframe shape inference per token: ${formatSummarizedTimeMeasure(stats.dataFrameShapeTimePerToken.normalized)}
|
|
214
|
+
Dataframe shape inference per R token:${formatSummarizedTimeMeasure(stats.dataFrameShapeTimePerToken.raw)}`;
|
|
215
|
+
}
|
|
216
|
+
// Used Slice Criteria Sizes: ${formatSummarizedMeasure(stats.perSliceMeasurements.sliceCriteriaSizes)}
|
|
217
|
+
if (stats.totalSlices > 0) {
|
|
218
|
+
result += `
|
|
219
|
+
|
|
220
|
+
Slicing summary for ${stats.totalSlices} slice${stats.totalSlices !== 1 ? 's' : ''}:
|
|
154
221
|
Total: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('total'))}
|
|
155
222
|
Slice creation: ${formatSummarizedTimeMeasure(stats.perSliceMeasurements.get('static slicing'))}
|
|
156
223
|
Slice creation per token in slice: ${formatSummarizedTimeMeasure(stats.sliceTimePerToken.normalized)}
|
|
@@ -161,29 +228,14 @@ function ultimateStats2String(stats) {
|
|
|
161
228
|
Total per token in slice: ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.normalized)}
|
|
162
229
|
Total per R token in slice: ${formatSummarizedTimeMeasure(stats.totalPerSliceTimePerToken.raw)}
|
|
163
230
|
Failed to Re-Parse: ${pad(stats.failedToRepParse)}/${stats.totalSlices}
|
|
164
|
-
Times hit Threshold: ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
|
|
231
|
+
Times hit Threshold: ${pad(stats.timesHitThreshold)}/${stats.totalSlices}
|
|
165
232
|
${reduction2String('Reductions', stats.reduction)}
|
|
166
|
-
${reduction2String('Reductions without comments and empty lines', stats.reductionNoFluff)}
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
Summarized: ${stats.totalRequests} requests and ${stats.totalSlices} slices
|
|
170
|
-
Shell init time: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('initialize R session'))}
|
|
171
|
-
AST retrieval: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('retrieve AST from R code'))}
|
|
172
|
-
AST retrieval per token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.normalized)}
|
|
173
|
-
AST retrieval per R token: ${formatSummarizedTimeMeasure(stats.retrieveTimePerToken.raw)}
|
|
174
|
-
AST normalization: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('normalize R AST'))}
|
|
175
|
-
AST normalization per token: ${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.normalized)}
|
|
176
|
-
AST normalization per R token:${formatSummarizedTimeMeasure(stats.normalizeTimePerToken.raw)}
|
|
177
|
-
Dataflow creation: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('produce dataflow information'))}
|
|
178
|
-
Dataflow creation per token: ${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.normalized)}
|
|
179
|
-
Dataflow creation per R token:${formatSummarizedTimeMeasure(stats.dataflowTimePerToken.raw)}
|
|
180
|
-
Total common time per token: ${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.normalized)}
|
|
181
|
-
Total common time per R token:${formatSummarizedTimeMeasure(stats.totalCommonTimePerToken.raw)}
|
|
182
|
-
|
|
183
|
-
${slice}
|
|
233
|
+
${reduction2String('Reductions without comments and empty lines', stats.reductionNoFluff)}`;
|
|
234
|
+
}
|
|
235
|
+
result += `
|
|
184
236
|
|
|
185
|
-
Shell close:
|
|
186
|
-
Total:
|
|
237
|
+
Shell close: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('close R session'))}
|
|
238
|
+
Total: ${formatSummarizedTimeMeasure(stats.commonMeasurements.get('total'))}
|
|
187
239
|
|
|
188
240
|
Input:
|
|
189
241
|
Number of lines: ${formatSummarizedMeasure(stats.input.numberOfLines)}
|
|
@@ -205,12 +257,38 @@ Dataflow:
|
|
|
205
257
|
Number of stored Vtx indices: ${formatSummarizedMeasure(stats.dataflow.storedVertexIndices)}
|
|
206
258
|
Number of stored Env indices: ${formatSummarizedMeasure(stats.dataflow.storedEnvIndices)}
|
|
207
259
|
Number of overwritten indices: ${formatSummarizedMeasure(stats.dataflow.overwrittenIndices)}
|
|
208
|
-
Size of graph: ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
|
|
209
|
-
|
|
260
|
+
Size of graph: ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}`;
|
|
261
|
+
if (stats.dataFrameShape !== undefined) {
|
|
262
|
+
result += `
|
|
263
|
+
|
|
264
|
+
Dataframe shape inference:
|
|
265
|
+
Number of resulting constraints:${formatSummarizedMeasure(stats.dataFrameShape.numberOfResultConstraints)}
|
|
266
|
+
Number of operation nodes: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfOperationNodes)}
|
|
267
|
+
Number of abstract value nodes: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfValueNodes)}
|
|
268
|
+
Number of entries per node: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfEntriesPerNode)}
|
|
269
|
+
Number of operations: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfOperations)}
|
|
270
|
+
Number of total values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfTotalValues)}
|
|
271
|
+
Number of total top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfTotalTop)}
|
|
272
|
+
Inferred column names per node: ${formatSummarizedMeasure(stats.dataFrameShape.inferredColNames)}
|
|
273
|
+
Number of column names values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColNamesValues)}
|
|
274
|
+
Number of column names top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColNamesTop)}
|
|
275
|
+
Inferred column count per node: ${formatSummarizedMeasure(stats.dataFrameShape.inferredColCount)}
|
|
276
|
+
Number of column count exact: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountExact)}
|
|
277
|
+
Number of column count values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountValues)}
|
|
278
|
+
Number of column count top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountTop)}
|
|
279
|
+
Number of column count infinite:${formatSummarizedMeasure(stats.dataFrameShape.numberOfColCountInfinite)}
|
|
280
|
+
Inferred row count per node: ${formatSummarizedMeasure(stats.dataFrameShape.inferredRowCount)}
|
|
281
|
+
Number of row count exact: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountExact)}
|
|
282
|
+
Number of row count values: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountValues)}
|
|
283
|
+
Number of row count top: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountTop)}
|
|
284
|
+
Number of row count infinite: ${formatSummarizedMeasure(stats.dataFrameShape.numberOfRowCountInfinite)}
|
|
285
|
+
Size of data frame shape info: ${formatSummarizedMeasure(stats.dataFrameShape.sizeOfInfo, convertNumberToNiceBytes)}`;
|
|
286
|
+
}
|
|
287
|
+
return result;
|
|
210
288
|
}
|
|
211
289
|
function reduction2String(title, reduction) {
|
|
212
290
|
return `
|
|
213
|
-
${title} (reduced by x%):
|
|
291
|
+
${title} (reduced by x%):
|
|
214
292
|
Number of lines: ${formatSummarizedMeasure(reduction.numberOfLines, asPercentage)}
|
|
215
293
|
Number of lines no auto: ${formatSummarizedMeasure(reduction.numberOfLinesNoAutoSelection, asPercentage)}
|
|
216
294
|
Number of characters: ${formatSummarizedMeasure(reduction.numberOfCharacters, asPercentage)}
|
|
@@ -1,3 +1,10 @@
|
|
|
1
1
|
import type { DataflowGraph } from '../../dataflow/graph/graph';
|
|
2
2
|
/** Returns the size of the given df graph in bytes (without sharing in-memory) */
|
|
3
3
|
export declare function getSizeOfDfGraph(df: DataflowGraph): number;
|
|
4
|
+
/**
|
|
5
|
+
* Calculates the size of an array in bytes.
|
|
6
|
+
*
|
|
7
|
+
* @param array - The array to calculate the size of.
|
|
8
|
+
* @returns The size of the array in bytes.
|
|
9
|
+
*/
|
|
10
|
+
export declare function safeSizeOf<T>(array: T[]): number;
|
|
@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.getSizeOfDfGraph = getSizeOfDfGraph;
|
|
7
|
+
exports.safeSizeOf = safeSizeOf;
|
|
7
8
|
const environment_1 = require("../../dataflow/environments/environment");
|
|
8
9
|
const vertex_1 = require("../../dataflow/graph/vertex");
|
|
9
10
|
const identifier_1 = require("../../dataflow/environments/identifier");
|
|
@@ -4,7 +4,10 @@ import type { ReconstructionResult } from '../../reconstruct/reconstruct';
|
|
|
4
4
|
import type { RParseRequestFromFile, RParseRequestFromText } from '../../r-bridge/retriever';
|
|
5
5
|
import type { TimePerToken } from '../summarizer/data';
|
|
6
6
|
import type { MergeableRecord } from '../../util/objects';
|
|
7
|
-
|
|
7
|
+
import type { DataFrameOperationName } from '../../abstract-interpretation/data-frame/semantics';
|
|
8
|
+
export declare const RequiredSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total"];
|
|
9
|
+
export declare const OptionalSlicerMeasurements: readonly ["extract control flow graph", "infer data frame shapes"];
|
|
10
|
+
export declare const CommonSlicerMeasurements: readonly ["initialize R session", "retrieve AST from R code", "normalize R AST", "produce dataflow information", "close R session", "total", "extract control flow graph", "infer data frame shapes"];
|
|
8
11
|
export type CommonSlicerMeasurements = typeof CommonSlicerMeasurements[number];
|
|
9
12
|
export declare const PerSliceMeasurements: readonly ["static slicing", "reconstruct code", "total"];
|
|
10
13
|
export type PerSliceMeasurements = typeof PerSliceMeasurements[number];
|
|
@@ -41,6 +44,29 @@ export interface SlicerStatsDataflow<T = number> {
|
|
|
41
44
|
storedEnvIndices: T;
|
|
42
45
|
overwrittenIndices: T;
|
|
43
46
|
}
|
|
47
|
+
export interface SlicerStatsDfShape<T = number> {
|
|
48
|
+
numberOfDataFrameFiles: T extends number ? 0 | 1 : number;
|
|
49
|
+
numberOfNonDataFrameFiles: T extends number ? 0 | 1 : number;
|
|
50
|
+
numberOfResultConstraints: T;
|
|
51
|
+
numberOfResultingValues: T;
|
|
52
|
+
numberOfResultingTop: T;
|
|
53
|
+
numberOfResultingBottom: T;
|
|
54
|
+
numberOfEmptyNodes: T;
|
|
55
|
+
numberOfOperationNodes: T;
|
|
56
|
+
numberOfValueNodes: T;
|
|
57
|
+
sizeOfInfo: T;
|
|
58
|
+
perNodeStats: Map<NodeId, PerNodeStatsDfShape<T>>;
|
|
59
|
+
}
|
|
60
|
+
export interface PerNodeStatsDfShape<T = number> {
|
|
61
|
+
numberOfEntries: T;
|
|
62
|
+
mappedOperations?: DataFrameOperationName[];
|
|
63
|
+
inferredColNames?: T | 'top';
|
|
64
|
+
inferredColCount?: T | 'bottom' | 'infinite' | 'top';
|
|
65
|
+
inferredRowCount?: T | 'bottom' | 'infinite' | 'top';
|
|
66
|
+
/** difference between upper and lower bound of interval domain (to estimate approximation) */
|
|
67
|
+
approxRangeColCount?: T;
|
|
68
|
+
approxRangeRowCount?: T;
|
|
69
|
+
}
|
|
44
70
|
/**
|
|
45
71
|
* Please note, that these measurement can be negative as there is no guarantee that the memory usage will increase
|
|
46
72
|
* due to, e.g., garbage collection.
|
|
@@ -61,8 +87,11 @@ export interface SlicerStats {
|
|
|
61
87
|
request: RParseRequestFromFile | RParseRequestFromText;
|
|
62
88
|
input: SlicerStatsInput;
|
|
63
89
|
dataflow: SlicerStatsDataflow;
|
|
90
|
+
dataFrameShape?: SlicerStatsDfShape;
|
|
64
91
|
retrieveTimePerToken: TimePerToken<number>;
|
|
65
92
|
normalizeTimePerToken: TimePerToken<number>;
|
|
66
93
|
dataflowTimePerToken: TimePerToken<number>;
|
|
67
94
|
totalCommonTimePerToken: TimePerToken<number>;
|
|
95
|
+
controlFlowTimePerToken?: TimePerToken<number>;
|
|
96
|
+
dataFrameShapeTimePerToken?: TimePerToken<number>;
|
|
68
97
|
}
|
package/benchmark/stats/stats.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.PerSliceMeasurements = exports.CommonSlicerMeasurements = void 0;
|
|
4
|
-
exports.
|
|
3
|
+
exports.PerSliceMeasurements = exports.CommonSlicerMeasurements = exports.OptionalSlicerMeasurements = exports.RequiredSlicerMeasurements = void 0;
|
|
4
|
+
exports.RequiredSlicerMeasurements = ['initialize R session', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'close R session', 'total'];
|
|
5
|
+
exports.OptionalSlicerMeasurements = ['extract control flow graph', 'infer data frame shapes'];
|
|
6
|
+
exports.CommonSlicerMeasurements = [...exports.RequiredSlicerMeasurements, ...exports.OptionalSlicerMeasurements];
|
|
5
7
|
exports.PerSliceMeasurements = ['static slicing', 'reconstruct code', 'total'];
|
|
6
8
|
//# sourceMappingURL=stats.js.map
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
import type { DataFrameOperationName } from '../../abstract-interpretation/data-frame/semantics';
|
|
1
2
|
import type { SummarizedMeasurement } from '../../util/summarizer';
|
|
2
|
-
import type { CommonSlicerMeasurements, PerSliceMeasurements, SlicerStats, SlicerStatsDataflow, SlicerStatsInput } from '../stats/stats';
|
|
3
|
+
import type { CommonSlicerMeasurements, PerSliceMeasurements, SlicerStats, SlicerStatsDfShape, SlicerStatsDataflow, SlicerStatsInput } from '../stats/stats';
|
|
3
4
|
export interface SliceSizeCollection {
|
|
4
5
|
lines: number[];
|
|
5
6
|
nonEmptyLines: number[];
|
|
@@ -21,7 +22,8 @@ export interface SliceSizeCollection {
|
|
|
21
22
|
*/
|
|
22
23
|
export type SummarizedSlicerStats = {
|
|
23
24
|
perSliceMeasurements: SummarizedPerSliceStats;
|
|
24
|
-
|
|
25
|
+
dataFrameShape?: SummarizedDfShapeStats;
|
|
26
|
+
} & Omit<SlicerStats, 'perSliceMeasurements' | 'dataFrameShape'>;
|
|
25
27
|
export interface Reduction<T = number> {
|
|
26
28
|
numberOfLines: T;
|
|
27
29
|
numberOfLinesNoAutoSelection: T;
|
|
@@ -62,6 +64,8 @@ export interface UltimateSlicerStats {
|
|
|
62
64
|
normalizeTimePerToken: TimePerToken;
|
|
63
65
|
dataflowTimePerToken: TimePerToken;
|
|
64
66
|
totalCommonTimePerToken: TimePerToken;
|
|
67
|
+
controlFlowTimePerToken?: TimePerToken;
|
|
68
|
+
dataFrameShapeTimePerToken?: TimePerToken;
|
|
65
69
|
sliceTimePerToken: TimePerToken;
|
|
66
70
|
reconstructTimePerToken: TimePerToken;
|
|
67
71
|
totalPerSliceTimePerToken: TimePerToken;
|
|
@@ -74,4 +78,31 @@ export interface UltimateSlicerStats {
|
|
|
74
78
|
reductionNoFluff: Reduction<SummarizedMeasurement>;
|
|
75
79
|
input: SlicerStatsInput<SummarizedMeasurement>;
|
|
76
80
|
dataflow: SlicerStatsDataflow<SummarizedMeasurement>;
|
|
81
|
+
dataFrameShape?: SummarizedDfShapeStats<SummarizedMeasurement>;
|
|
82
|
+
}
|
|
83
|
+
export interface SummarizedDfShapeStats<T = number> extends Omit<SlicerStatsDfShape<T>, 'perNodeStats'> {
|
|
84
|
+
numberOfEntriesPerNode: SummarizedMeasurement;
|
|
85
|
+
numberOfOperations: T;
|
|
86
|
+
numberOfTotalValues: T;
|
|
87
|
+
numberOfTotalTop: T;
|
|
88
|
+
numberOfTotalBottom: T;
|
|
89
|
+
inferredColNames: SummarizedMeasurement;
|
|
90
|
+
numberOfColNamesValues: T;
|
|
91
|
+
numberOfColNamesTop: T;
|
|
92
|
+
numberOfColNamesBottom: T;
|
|
93
|
+
inferredColCount: SummarizedMeasurement;
|
|
94
|
+
numberOfColCountExact: T;
|
|
95
|
+
numberOfColCountValues: T;
|
|
96
|
+
numberOfColCountTop: T;
|
|
97
|
+
numberOfColCountInfinite: T;
|
|
98
|
+
numberOfColCountBottom: T;
|
|
99
|
+
approxRangeColCount: SummarizedMeasurement;
|
|
100
|
+
inferredRowCount: SummarizedMeasurement;
|
|
101
|
+
numberOfRowCountExact: T;
|
|
102
|
+
numberOfRowCountValues: T;
|
|
103
|
+
numberOfRowCountTop: T;
|
|
104
|
+
numberOfRowCountInfinite: T;
|
|
105
|
+
numberOfRowCountBottom: T;
|
|
106
|
+
approxRangeRowCount: SummarizedMeasurement;
|
|
107
|
+
perOperationNumber: Map<DataFrameOperationName, T>;
|
|
77
108
|
}
|
|
@@ -28,7 +28,11 @@ async function processRunMeasurement(line, fileNum, lineNum, textOutputAppendPat
|
|
|
28
28
|
return [k, BigInt(v.slice(0, -1))];
|
|
29
29
|
})),
|
|
30
30
|
perSliceMeasurements: new Map(got.stats.perSliceMeasurements
|
|
31
|
-
.map(([k, v]) => mapPerSliceStats(k, v)))
|
|
31
|
+
.map(([k, v]) => mapPerSliceStats(k, v))),
|
|
32
|
+
dataFrameShape: got.stats.dataFrameShape !== undefined ? {
|
|
33
|
+
...got.stats.dataFrameShape,
|
|
34
|
+
perNodeStats: new Map(got.stats.dataFrameShape.perNodeStats)
|
|
35
|
+
} : undefined
|
|
32
36
|
}
|
|
33
37
|
};
|
|
34
38
|
const totalSlices = got.stats.perSliceMeasurements.size;
|
|
@@ -53,6 +53,7 @@ const retriever_1 = require("../../../r-bridge/retriever");
|
|
|
53
53
|
const visitor_1 = require("../../../r-bridge/lang-4.x/ast/model/processing/visitor");
|
|
54
54
|
const type_1 = require("../../../r-bridge/lang-4.x/ast/model/type");
|
|
55
55
|
const arrays_1 = require("../../../util/collections/arrays");
|
|
56
|
+
const semantics_1 = require("../../../abstract-interpretation/data-frame/semantics");
|
|
56
57
|
const tempfile = (() => {
|
|
57
58
|
let _tempfile = undefined;
|
|
58
59
|
return () => {
|
|
@@ -250,9 +251,54 @@ async function summarizeSlicerStats(stats, report = () => {
|
|
|
250
251
|
normalizedTokensNoComments: (0, summarizer_1.summarizeMeasurement)(sliceSize.normalizedTokensNoComments),
|
|
251
252
|
dataflowNodes: (0, summarizer_1.summarizeMeasurement)(sliceSize.dataflowNodes)
|
|
252
253
|
}
|
|
253
|
-
}
|
|
254
|
+
},
|
|
255
|
+
dataFrameShape: stats.dataFrameShape ? summarizeDfShapeStats(stats.dataFrameShape) : undefined
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
function summarizeDfShapeStats({ perNodeStats, ...stats }) {
|
|
259
|
+
const nodeStats = perNodeStats.values().toArray();
|
|
260
|
+
const isTop = (value) => value === 'top';
|
|
261
|
+
const isInfinite = (value) => value === 'infinite';
|
|
262
|
+
const isBottom = (value) => value === 'bottom';
|
|
263
|
+
const isValue = (value) => value !== undefined && !isTop(value) && !isInfinite(value) && !isBottom(value);
|
|
264
|
+
return {
|
|
265
|
+
...stats,
|
|
266
|
+
numberOfEntriesPerNode: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.numberOfEntries)),
|
|
267
|
+
numberOfOperations: (0, arrays_1.arraySum)(nodeStats.map(s => s.mappedOperations?.length).filter(assert_1.isNotUndefined)),
|
|
268
|
+
numberOfTotalValues: nodeStats.filter(s => isValue(s.inferredColNames) && isValue(s.inferredColCount) && isValue(s.inferredRowCount)).length,
|
|
269
|
+
numberOfTotalTop: nodeStats.filter(s => isTop(s.inferredColNames) && isTop(s.inferredColCount) && isTop(s.inferredRowCount)).length,
|
|
270
|
+
numberOfTotalBottom: nodeStats.filter(s => s.inferredColNames === 0 && isBottom(s.inferredColCount) && isBottom(s.inferredRowCount)).length,
|
|
271
|
+
inferredColNames: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.inferredColNames).filter(isValue)),
|
|
272
|
+
numberOfColNamesValues: nodeStats.map(s => s.inferredColNames).filter(isValue).length,
|
|
273
|
+
numberOfColNamesTop: nodeStats.map(s => s.inferredColNames).filter(isTop).length,
|
|
274
|
+
numberOfColNamesBottom: nodeStats.map(s => s.inferredColNames).filter(number => number === 0).length,
|
|
275
|
+
inferredColCount: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.inferredColCount).filter(isValue)),
|
|
276
|
+
numberOfColCountExact: nodeStats.map(s => s.approxRangeColCount).filter(range => range === 0).length,
|
|
277
|
+
numberOfColCountValues: nodeStats.map(s => s.inferredColCount).filter(isValue).length,
|
|
278
|
+
numberOfColCountTop: nodeStats.map(s => s.inferredColCount).filter(isTop).length,
|
|
279
|
+
numberOfColCountInfinite: nodeStats.map(s => s.inferredColCount).filter(isInfinite).length,
|
|
280
|
+
numberOfColCountBottom: nodeStats.map(s => s.inferredColCount).filter(isBottom).length,
|
|
281
|
+
approxRangeColCount: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.approxRangeColCount).filter(assert_1.isNotUndefined).filter(isFinite)),
|
|
282
|
+
inferredRowCount: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.inferredRowCount).filter(isValue)),
|
|
283
|
+
numberOfRowCountExact: nodeStats.map(s => s.approxRangeRowCount).filter(range => range === 0).length,
|
|
284
|
+
numberOfRowCountValues: nodeStats.map(s => s.inferredRowCount).filter(isValue).length,
|
|
285
|
+
numberOfRowCountTop: nodeStats.map(s => s.inferredRowCount).filter(isTop).length,
|
|
286
|
+
numberOfRowCountInfinite: nodeStats.map(s => s.inferredRowCount).filter(isInfinite).length,
|
|
287
|
+
numberOfRowCountBottom: nodeStats.map(s => s.inferredRowCount).filter(isBottom).length,
|
|
288
|
+
approxRangeRowCount: (0, summarizer_1.summarizeMeasurement)(nodeStats.map(s => s.approxRangeRowCount).filter(assert_1.isNotUndefined).filter(isFinite)),
|
|
289
|
+
perOperationNumber: summarizePerOperationStats(nodeStats),
|
|
254
290
|
};
|
|
255
291
|
}
|
|
292
|
+
function summarizePerOperationStats(nodeStats) {
|
|
293
|
+
const perOperationNumber = new Map(semantics_1.DataFrameOperationNames.map(name => [name, 0]));
|
|
294
|
+
for (const stat of nodeStats) {
|
|
295
|
+
for (const operation of stat.mappedOperations ?? []) {
|
|
296
|
+
const value = perOperationNumber.get(operation) ?? 0;
|
|
297
|
+
perOperationNumber.set(operation, value + 1);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
return perOperationNumber;
|
|
301
|
+
}
|
|
256
302
|
function summarizeSummarizedMeasurement(data) {
|
|
257
303
|
data = data.filter(assert_1.isNotUndefined);
|
|
258
304
|
const min = Math.min(...data.map(d => d.min).filter(assert_1.isNotUndefined));
|
|
@@ -11,7 +11,7 @@ function writeGraphOutput(ultimate, outputGraphPath) {
|
|
|
11
11
|
const data = [];
|
|
12
12
|
for (const { name, measurements } of [{ name: 'per-file', measurements: ultimate.commonMeasurements }, { name: 'per-slice', measurements: ultimate.perSliceMeasurements }]) {
|
|
13
13
|
for (const [point, measurement] of measurements) {
|
|
14
|
-
if (point === 'close R session' || point === 'initialize R session') {
|
|
14
|
+
if (point === 'close R session' || point === 'initialize R session' || !measurement?.mean || !measurement?.std) {
|
|
15
15
|
continue;
|
|
16
16
|
}
|
|
17
17
|
const pointName = point === 'total' ? `total ${name}` : point;
|