@eagleoutice/flowr 1.4.2 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/print/slice-diff-ansi.js +7 -7
- package/core/steps.d.ts +352 -11
- package/core/steps.js +4 -4
- package/index.d.ts +0 -2
- package/index.js +0 -2
- package/package.json +65 -215
- package/util/{summarizer/summarizer.d.ts → summarizer.d.ts} +15 -1
- package/util/summarizer.js +37 -0
- package/util/version.d.ts +2 -0
- package/util/version.js +10 -0
- package/benchmark/index.d.ts +0 -3
- package/benchmark/index.js +0 -20
- package/benchmark/slicer.d.ts +0 -98
- package/benchmark/slicer.js +0 -213
- package/benchmark/stats/index.d.ts +0 -10
- package/benchmark/stats/index.js +0 -27
- package/benchmark/stats/print.d.ts +0 -7
- package/benchmark/stats/print.js +0 -155
- package/benchmark/stats/stats.d.ts +0 -40
- package/benchmark/stats/stats.js +0 -6
- package/benchmark/stopwatch.d.ts +0 -35
- package/benchmark/stopwatch.js +0 -79
- package/cli/benchmark-app.d.ts +0 -10
- package/cli/benchmark-app.js +0 -67
- package/cli/benchmark-helper-app.d.ts +0 -9
- package/cli/benchmark-helper-app.js +0 -69
- package/cli/common/features.d.ts +0 -3
- package/cli/common/features.js +0 -30
- package/cli/common/index.d.ts +0 -2
- package/cli/common/index.js +0 -19
- package/cli/common/options.d.ts +0 -20
- package/cli/common/options.js +0 -85
- package/cli/common/script.d.ts +0 -21
- package/cli/common/script.js +0 -61
- package/cli/common/scripts-info.d.ts +0 -25
- package/cli/common/scripts-info.js +0 -83
- package/cli/export-quads-app.d.ts +0 -7
- package/cli/export-quads-app.js +0 -42
- package/cli/repl/commands/cfg.d.ts +0 -3
- package/cli/repl/commands/cfg.js +0 -37
- package/cli/repl/commands/commands.d.ts +0 -13
- package/cli/repl/commands/commands.js +0 -142
- package/cli/repl/commands/dataflow.d.ts +0 -3
- package/cli/repl/commands/dataflow.js +0 -34
- package/cli/repl/commands/execute.d.ts +0 -4
- package/cli/repl/commands/execute.js +0 -27
- package/cli/repl/commands/index.d.ts +0 -2
- package/cli/repl/commands/index.js +0 -19
- package/cli/repl/commands/main.d.ts +0 -39
- package/cli/repl/commands/main.js +0 -14
- package/cli/repl/commands/normalize.d.ts +0 -3
- package/cli/repl/commands/normalize.js +0 -34
- package/cli/repl/commands/parse.d.ts +0 -2
- package/cli/repl/commands/parse.js +0 -109
- package/cli/repl/commands/quit.d.ts +0 -2
- package/cli/repl/commands/quit.js +0 -15
- package/cli/repl/commands/version.d.ts +0 -16
- package/cli/repl/commands/version.js +0 -33
- package/cli/repl/core.d.ts +0 -41
- package/cli/repl/core.js +0 -172
- package/cli/repl/execute.d.ts +0 -28
- package/cli/repl/execute.js +0 -79
- package/cli/repl/index.d.ts +0 -5
- package/cli/repl/index.js +0 -22
- package/cli/repl/prompt.d.ts +0 -2
- package/cli/repl/prompt.js +0 -9
- package/cli/repl/server/connection.d.ts +0 -21
- package/cli/repl/server/connection.js +0 -213
- package/cli/repl/server/messages/analysis.d.ts +0 -71
- package/cli/repl/server/messages/analysis.js +0 -21
- package/cli/repl/server/messages/error.d.ts +0 -11
- package/cli/repl/server/messages/error.js +0 -3
- package/cli/repl/server/messages/hello.d.ts +0 -20
- package/cli/repl/server/messages/hello.js +0 -3
- package/cli/repl/server/messages/index.d.ts +0 -1
- package/cli/repl/server/messages/index.js +0 -3
- package/cli/repl/server/messages/messages.d.ts +0 -35
- package/cli/repl/server/messages/messages.js +0 -40
- package/cli/repl/server/messages/repl.d.ts +0 -33
- package/cli/repl/server/messages/repl.js +0 -37
- package/cli/repl/server/messages/slice.d.ts +0 -25
- package/cli/repl/server/messages/slice.js +0 -37
- package/cli/repl/server/net.d.ts +0 -49
- package/cli/repl/server/net.js +0 -63
- package/cli/repl/server/send.d.ts +0 -4
- package/cli/repl/server/send.js +0 -18
- package/cli/repl/server/server.d.ts +0 -20
- package/cli/repl/server/server.js +0 -66
- package/cli/repl/server/validate.d.ts +0 -15
- package/cli/repl/server/validate.js +0 -34
- package/cli/slicer-app.d.ts +0 -11
- package/cli/slicer-app.js +0 -81
- package/cli/statistics-app.d.ts +0 -11
- package/cli/statistics-app.js +0 -98
- package/cli/statistics-helper-app.d.ts +0 -11
- package/cli/statistics-helper-app.js +0 -82
- package/cli/summarizer-app.d.ts +0 -18
- package/cli/summarizer-app.js +0 -66
- package/flowr.d.ts +0 -29
- package/flowr.js +0 -142
- package/statistics/features/common-syntax-probability.d.ts +0 -31
- package/statistics/features/common-syntax-probability.js +0 -156
- package/statistics/features/feature.d.ts +0 -175
- package/statistics/features/feature.js +0 -30
- package/statistics/features/index.d.ts +0 -1
- package/statistics/features/index.js +0 -18
- package/statistics/features/post-processing.d.ts +0 -12
- package/statistics/features/post-processing.js +0 -21
- package/statistics/features/supported/assignments/assignments.d.ts +0 -11
- package/statistics/features/supported/assignments/assignments.js +0 -53
- package/statistics/features/supported/assignments/index.d.ts +0 -1
- package/statistics/features/supported/assignments/index.js +0 -6
- package/statistics/features/supported/assignments/post-process.d.ts +0 -3
- package/statistics/features/supported/assignments/post-process.js +0 -125
- package/statistics/features/supported/comments/comments.d.ts +0 -18
- package/statistics/features/supported/comments/comments.js +0 -133
- package/statistics/features/supported/comments/index.d.ts +0 -1
- package/statistics/features/supported/comments/index.js +0 -6
- package/statistics/features/supported/comments/post-process.d.ts +0 -3
- package/statistics/features/supported/comments/post-process.js +0 -50
- package/statistics/features/supported/control-flow/control-flow.d.ts +0 -17
- package/statistics/features/supported/control-flow/control-flow.js +0 -67
- package/statistics/features/supported/control-flow/index.d.ts +0 -1
- package/statistics/features/supported/control-flow/index.js +0 -6
- package/statistics/features/supported/control-flow/post-process.d.ts +0 -3
- package/statistics/features/supported/control-flow/post-process.js +0 -65
- package/statistics/features/supported/data-access/data-access.d.ts +0 -15
- package/statistics/features/supported/data-access/data-access.js +0 -118
- package/statistics/features/supported/data-access/index.d.ts +0 -1
- package/statistics/features/supported/data-access/index.js +0 -6
- package/statistics/features/supported/data-access/post-process.d.ts +0 -3
- package/statistics/features/supported/data-access/post-process.js +0 -107
- package/statistics/features/supported/defined-functions/defined-functions.d.ts +0 -35
- package/statistics/features/supported/defined-functions/defined-functions.js +0 -139
- package/statistics/features/supported/defined-functions/index.d.ts +0 -1
- package/statistics/features/supported/defined-functions/index.js +0 -6
- package/statistics/features/supported/defined-functions/post-process.d.ts +0 -6
- package/statistics/features/supported/defined-functions/post-process.js +0 -177
- package/statistics/features/supported/expression-list/expression-list.d.ts +0 -9
- package/statistics/features/supported/expression-list/expression-list.js +0 -36
- package/statistics/features/supported/expression-list/index.d.ts +0 -1
- package/statistics/features/supported/expression-list/index.js +0 -6
- package/statistics/features/supported/expression-list/post-process.d.ts +0 -3
- package/statistics/features/supported/expression-list/post-process.js +0 -44
- package/statistics/features/supported/index.d.ts +0 -10
- package/statistics/features/supported/index.js +0 -27
- package/statistics/features/supported/loops/index.d.ts +0 -1
- package/statistics/features/supported/loops/index.js +0 -6
- package/statistics/features/supported/loops/loops.d.ts +0 -20
- package/statistics/features/supported/loops/loops.js +0 -79
- package/statistics/features/supported/loops/post-process.d.ts +0 -3
- package/statistics/features/supported/loops/post-process.js +0 -72
- package/statistics/features/supported/used-functions/index.d.ts +0 -1
- package/statistics/features/supported/used-functions/index.js +0 -6
- package/statistics/features/supported/used-functions/post-process.d.ts +0 -6
- package/statistics/features/supported/used-functions/post-process.js +0 -179
- package/statistics/features/supported/used-functions/used-functions.d.ts +0 -24
- package/statistics/features/supported/used-functions/used-functions.js +0 -95
- package/statistics/features/supported/used-packages/index.d.ts +0 -1
- package/statistics/features/supported/used-packages/index.js +0 -6
- package/statistics/features/supported/used-packages/post-process.d.ts +0 -3
- package/statistics/features/supported/used-packages/post-process.js +0 -121
- package/statistics/features/supported/used-packages/used-packages.d.ts +0 -16
- package/statistics/features/supported/used-packages/used-packages.js +0 -130
- package/statistics/features/supported/values/index.d.ts +0 -1
- package/statistics/features/supported/values/index.js +0 -6
- package/statistics/features/supported/values/post-process.d.ts +0 -3
- package/statistics/features/supported/values/post-process.js +0 -72
- package/statistics/features/supported/values/values.d.ts +0 -14
- package/statistics/features/supported/values/values.js +0 -101
- package/statistics/features/supported/variables/index.d.ts +0 -1
- package/statistics/features/supported/variables/index.js +0 -6
- package/statistics/features/supported/variables/post-process.d.ts +0 -9
- package/statistics/features/supported/variables/post-process.js +0 -122
- package/statistics/features/supported/variables/variables.d.ts +0 -15
- package/statistics/features/supported/variables/variables.js +0 -70
- package/statistics/index.d.ts +0 -6
- package/statistics/index.js +0 -24
- package/statistics/meta-statistics.d.ts +0 -33
- package/statistics/meta-statistics.js +0 -17
- package/statistics/output/file-provider.d.ts +0 -37
- package/statistics/output/file-provider.js +0 -97
- package/statistics/output/index.d.ts +0 -4
- package/statistics/output/index.js +0 -21
- package/statistics/output/print-stats.d.ts +0 -17
- package/statistics/output/print-stats.js +0 -69
- package/statistics/output/statistics-file.d.ts +0 -37
- package/statistics/output/statistics-file.js +0 -69
- package/statistics/statistics.d.ts +0 -24
- package/statistics/statistics.js +0 -108
- package/util/summarizer/auto-detect.d.ts +0 -2
- package/util/summarizer/auto-detect.js +0 -32
- package/util/summarizer/benchmark/data.d.ts +0 -66
- package/util/summarizer/benchmark/data.js +0 -13
- package/util/summarizer/benchmark/first-phase/input.d.ts +0 -3
- package/util/summarizer/benchmark/first-phase/input.js +0 -75
- package/util/summarizer/benchmark/first-phase/process.d.ts +0 -11
- package/util/summarizer/benchmark/first-phase/process.js +0 -217
- package/util/summarizer/benchmark/second-phase/graph.d.ts +0 -2
- package/util/summarizer/benchmark/second-phase/graph.js +0 -54
- package/util/summarizer/benchmark/second-phase/process.d.ts +0 -6
- package/util/summarizer/benchmark/second-phase/process.js +0 -126
- package/util/summarizer/benchmark/summarizer.d.ts +0 -31
- package/util/summarizer/benchmark/summarizer.js +0 -58
- package/util/summarizer/statistics/first-phase/process.d.ts +0 -6
- package/util/summarizer/statistics/first-phase/process.js +0 -81
- package/util/summarizer/statistics/post-process/clusterer.d.ts +0 -26
- package/util/summarizer/statistics/post-process/clusterer.js +0 -43
- package/util/summarizer/statistics/post-process/file-based-count.d.ts +0 -17
- package/util/summarizer/statistics/post-process/file-based-count.js +0 -49
- package/util/summarizer/statistics/post-process/histogram.d.ts +0 -59
- package/util/summarizer/statistics/post-process/histogram.js +0 -128
- package/util/summarizer/statistics/post-process/index.d.ts +0 -4
- package/util/summarizer/statistics/post-process/index.js +0 -21
- package/util/summarizer/statistics/post-process/post-process-output.d.ts +0 -16
- package/util/summarizer/statistics/post-process/post-process-output.js +0 -103
- package/util/summarizer/statistics/second-phase/process.d.ts +0 -11
- package/util/summarizer/statistics/second-phase/process.js +0 -117
- package/util/summarizer/statistics/summarizer.d.ts +0 -35
- package/util/summarizer/statistics/summarizer.js +0 -135
- package/util/summarizer/summarizer.js +0 -13
- /package/{statistics/output → util}/ansi.d.ts +0 -0
- /package/{statistics/output → util}/ansi.js +0 -0
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
import type { ClusterReport } from './clusterer';
|
|
2
|
-
export interface FileBasedTable {
|
|
3
|
-
header: string[];
|
|
4
|
-
rows: string[][];
|
|
5
|
-
}
|
|
6
|
-
/**
|
|
7
|
-
* The purpose of this function is to reformat {@link ClusterReport} in way that lists file-based contributions.
|
|
8
|
-
* E.g., "the file with id 12 contained the assignment with `<-` 3 times".
|
|
9
|
-
* Feature Values are listed in the header.
|
|
10
|
-
*
|
|
11
|
-
* @param report - the report to reformat
|
|
12
|
-
*/
|
|
13
|
-
export declare function fileBasedCount(report: ClusterReport): FileBasedTable;
|
|
14
|
-
/**
|
|
15
|
-
* The threshold will cap of values larger to the threshold.
|
|
16
|
-
*/
|
|
17
|
-
export declare function writeFileBasedCountToFile(table: FileBasedTable, filepath: string): void;
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.writeFileBasedCountToFile = exports.fileBasedCount = void 0;
|
|
7
|
-
const fs_1 = __importDefault(require("fs"));
|
|
8
|
-
/**
|
|
9
|
-
* The purpose of this function is to reformat {@link ClusterReport} in way that lists file-based contributions.
|
|
10
|
-
* E.g., "the file with id 12 contained the assignment with `<-` 3 times".
|
|
11
|
-
* Feature Values are listed in the header.
|
|
12
|
-
*
|
|
13
|
-
* @param report - the report to reformat
|
|
14
|
-
*/
|
|
15
|
-
function fileBasedCount(report) {
|
|
16
|
-
const values = report.valueInfoMap;
|
|
17
|
-
const contexts = [...report.valueInfoMap.values()];
|
|
18
|
-
const header = [...values.keys()].map(k => `"${k}"`);
|
|
19
|
-
const rows = [];
|
|
20
|
-
for (const id of report.contextIdMap.values()) {
|
|
21
|
-
rows.push(contexts.map(c => `${c.get(id)}`));
|
|
22
|
-
}
|
|
23
|
-
return {
|
|
24
|
-
header: header,
|
|
25
|
-
rows: rows
|
|
26
|
-
};
|
|
27
|
-
}
|
|
28
|
-
exports.fileBasedCount = fileBasedCount;
|
|
29
|
-
/**
|
|
30
|
-
* The threshold will cap of values larger to the threshold.
|
|
31
|
-
*/
|
|
32
|
-
function writeFileBasedCountToFile(table, filepath) {
|
|
33
|
-
const handle = fs_1.default.openSync(filepath, 'w');
|
|
34
|
-
const header = table.header.join('\t');
|
|
35
|
-
fs_1.default.writeSync(handle, `${header}\n`);
|
|
36
|
-
let max = 0;
|
|
37
|
-
function processEntry(r) {
|
|
38
|
-
const val = Number(r);
|
|
39
|
-
max = Math.max(val, max);
|
|
40
|
-
return r;
|
|
41
|
-
}
|
|
42
|
-
for (const row of table.rows) {
|
|
43
|
-
fs_1.default.writeSync(handle, row.map(processEntry).join('\t') + '\n');
|
|
44
|
-
}
|
|
45
|
-
fs_1.default.writeSync(handle, `%%% max: ${max}\n`);
|
|
46
|
-
fs_1.default.closeSync(handle);
|
|
47
|
-
}
|
|
48
|
-
exports.writeFileBasedCountToFile = writeFileBasedCountToFile;
|
|
49
|
-
//# sourceMappingURL=file-based-count.js.map
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
import type { ClusterReport } from './clusterer';
|
|
2
|
-
import type { Table } from '../../../files';
|
|
3
|
-
/**
|
|
4
|
-
* A conventional histogram (e.g., created by {@link histogramFromNumbers}).
|
|
5
|
-
* Can be converted to a {@link Table} by {@link histograms2table}.
|
|
6
|
-
* As described in {@link histogramFromNumbers}, there always will be a special bin for minimum.
|
|
7
|
-
*/
|
|
8
|
-
export interface Histogram {
|
|
9
|
-
/** A name intended for humans to know what the histogram is about. */
|
|
10
|
-
readonly name: string;
|
|
11
|
-
/** Values located in each bin */
|
|
12
|
-
bins: number[];
|
|
13
|
-
/** The configured size of each bin (stored explicitly to avoid semantic confusion with floating point arithmetic/problems with different rounding schemes) */
|
|
14
|
-
binSize: number;
|
|
15
|
-
/** Minimum value encountered (inclusive minimum of the underlying value range) */
|
|
16
|
-
min: number;
|
|
17
|
-
/** Maximum value encountered (inclusive maximum of the underlying value range) */
|
|
18
|
-
max: number;
|
|
19
|
-
/** Average of the included numbers */
|
|
20
|
-
mean: number;
|
|
21
|
-
/** Standard deviation of the included numbers */
|
|
22
|
-
std: number;
|
|
23
|
-
/** Median of the included numbers */
|
|
24
|
-
median: number;
|
|
25
|
-
}
|
|
26
|
-
/**
|
|
27
|
-
* Produces column-wise histogram-information based on a {@link ClusterReport}.
|
|
28
|
-
*
|
|
29
|
-
* Let's suppose you want histograms for the Assignments feature.
|
|
30
|
-
* By default, for each clustered value, a histogram is produced (can be configured by `filter`).
|
|
31
|
-
*
|
|
32
|
-
* @param report - The report to collect histogram information from
|
|
33
|
-
* @param binSize - Size of each bin (see {@link histogramFromNumbers} for details on why we do not specify the bin-count)
|
|
34
|
-
* @param relateValuesToNumberOfLines - If true, each value (like `<-` appeared in file 'x' exactly `N` times) will be divided by the number of lines in the file 'x'.
|
|
35
|
-
* @param filter - If given, only produce histograms for the given values
|
|
36
|
-
*/
|
|
37
|
-
export declare function histogramsFromClusters(report: ClusterReport, binSize: number, relateValuesToNumberOfLines: boolean, ...filter: string[]): Histogram[];
|
|
38
|
-
/**
|
|
39
|
-
* Produces a histogram from a list of numbers.
|
|
40
|
-
* Because we need to create several histograms of different datasets and want to compare them, we do not accept the
|
|
41
|
-
* number of bins desired and calculate the bin-size from the data (via `Math.ceil((max - min + 1) / bins)`).
|
|
42
|
-
* Instead, we require the bin-size to be given.
|
|
43
|
-
* There *always* will be an extra bin for the minimum value.
|
|
44
|
-
*/
|
|
45
|
-
export declare function histogramFromNumbers(name: string, binSize: number, values: number[]): Histogram;
|
|
46
|
-
/**
|
|
47
|
-
* Takes an array of histograms created by {@link histogramFromNumbers} and produces a CSV table from it.
|
|
48
|
-
* They must have the same bin-size for this function to work.
|
|
49
|
-
*
|
|
50
|
-
* The table has the following columns:
|
|
51
|
-
* - `bin` - The corresponding bin number
|
|
52
|
-
* - `from` - The exclusive lower bound of the bin
|
|
53
|
-
* - `to` - The inclusive upper bound of the bin
|
|
54
|
-
* - a column with the name of each histogram, containing its count of values in the corresponding bin
|
|
55
|
-
*
|
|
56
|
-
* @param histograms - The histogram to convert (assumed to have the same ranges and bins)
|
|
57
|
-
* @param countAsDensity - If true, the count is divided by the total number of values (individually for each histogram, similar to pgfplots `hist/density` option)
|
|
58
|
-
*/
|
|
59
|
-
export declare function histograms2table(histograms: Histogram[], countAsDensity?: boolean): Table;
|
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.histograms2table = exports.histogramFromNumbers = exports.histogramsFromClusters = void 0;
|
|
7
|
-
const defaultmap_1 = require("../../../defaultmap");
|
|
8
|
-
const assert_1 = require("../../../assert");
|
|
9
|
-
const bimap_1 = require("../../../bimap");
|
|
10
|
-
const fs_1 = __importDefault(require("fs"));
|
|
11
|
-
const benchmark_1 = require("../../../../benchmark");
|
|
12
|
-
/**
|
|
13
|
-
* Produces column-wise histogram-information based on a {@link ClusterReport}.
|
|
14
|
-
*
|
|
15
|
-
* Let's suppose you want histograms for the Assignments feature.
|
|
16
|
-
* By default, for each clustered value, a histogram is produced (can be configured by `filter`).
|
|
17
|
-
*
|
|
18
|
-
* @param report - The report to collect histogram information from
|
|
19
|
-
* @param binSize - Size of each bin (see {@link histogramFromNumbers} for details on why we do not specify the bin-count)
|
|
20
|
-
* @param relateValuesToNumberOfLines - If true, each value (like `<-` appeared in file 'x' exactly `N` times) will be divided by the number of lines in the file 'x'.
|
|
21
|
-
* @param filter - If given, only produce histograms for the given values
|
|
22
|
-
*/
|
|
23
|
-
function histogramsFromClusters(report, binSize, relateValuesToNumberOfLines, ...filter) {
|
|
24
|
-
const contexts = [...report.valueInfoMap.entries()];
|
|
25
|
-
const filenameFromId = new bimap_1.BiMap(report.contextIdMap.entries());
|
|
26
|
-
// first, we collect the number of appearances for each value
|
|
27
|
-
const valueCounts = new defaultmap_1.DefaultMap(() => []);
|
|
28
|
-
for (const id of report.contextIdMap.values()) {
|
|
29
|
-
// calculate the number of lines within the file given by the id
|
|
30
|
-
const filename = filenameFromId.getKey(id);
|
|
31
|
-
(0, assert_1.guard)(filename !== undefined, `filename for id ${id} is undefined`);
|
|
32
|
-
const numberOfLines = relateValuesToNumberOfLines ? fs_1.default.readFileSync(filename, 'utf-8').split('\n').length : 1;
|
|
33
|
-
for (const [value, counts] of contexts) {
|
|
34
|
-
valueCounts.get(value).push(counts.get(id) / numberOfLines);
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
return [...valueCounts.entries()].map(([name, counts]) => filter.length === 0 || filter.includes(name) ? histogramFromNumbers(name, binSize, counts) : undefined).filter(assert_1.isNotUndefined);
|
|
38
|
-
}
|
|
39
|
-
exports.histogramsFromClusters = histogramsFromClusters;
|
|
40
|
-
/**
|
|
41
|
-
* Produces a histogram from a list of numbers.
|
|
42
|
-
* Because we need to create several histograms of different datasets and want to compare them, we do not accept the
|
|
43
|
-
* number of bins desired and calculate the bin-size from the data (via `Math.ceil((max - min + 1) / bins)`).
|
|
44
|
-
* Instead, we require the bin-size to be given.
|
|
45
|
-
* There *always* will be an extra bin for the minimum value.
|
|
46
|
-
*/
|
|
47
|
-
function histogramFromNumbers(name, binSize, values) {
|
|
48
|
-
(0, assert_1.guard)(binSize > 0, `binSize must be greater than 0, but was ${binSize}`);
|
|
49
|
-
(0, assert_1.guard)(values.length > 0, 'values must not be empty');
|
|
50
|
-
const summarized = (0, benchmark_1.summarizeMeasurement)(values);
|
|
51
|
-
const numberOfBins = Math.ceil((summarized.max - summarized.min + 1) / binSize) + 1;
|
|
52
|
-
const histogram = new Array(numberOfBins).fill(0);
|
|
53
|
-
for (const v of values) {
|
|
54
|
-
const bin = v === summarized.min ? 0 : Math.floor((v - summarized.min) / binSize) + 1;
|
|
55
|
-
histogram[bin]++;
|
|
56
|
-
}
|
|
57
|
-
return {
|
|
58
|
-
name: name,
|
|
59
|
-
bins: histogram,
|
|
60
|
-
binSize,
|
|
61
|
-
...summarized
|
|
62
|
-
};
|
|
63
|
-
}
|
|
64
|
-
exports.histogramFromNumbers = histogramFromNumbers;
|
|
65
|
-
/**
|
|
66
|
-
* Takes an array of histograms created by {@link histogramFromNumbers} and produces a CSV table from it.
|
|
67
|
-
* They must have the same bin-size for this function to work.
|
|
68
|
-
*
|
|
69
|
-
* The table has the following columns:
|
|
70
|
-
* - `bin` - The corresponding bin number
|
|
71
|
-
* - `from` - The exclusive lower bound of the bin
|
|
72
|
-
* - `to` - The inclusive upper bound of the bin
|
|
73
|
-
* - a column with the name of each histogram, containing its count of values in the corresponding bin
|
|
74
|
-
*
|
|
75
|
-
* @param histograms - The histogram to convert (assumed to have the same ranges and bins)
|
|
76
|
-
* @param countAsDensity - If true, the count is divided by the total number of values (individually for each histogram, similar to pgfplots `hist/density` option)
|
|
77
|
-
*/
|
|
78
|
-
function histograms2table(histograms, countAsDensity = false) {
|
|
79
|
-
(0, assert_1.guard)(histograms.length > 0, 'there must be at least one histogram to convert to a table');
|
|
80
|
-
const mostBins = guardForLargestBinSize(histograms);
|
|
81
|
-
const header = ['bin', 'from', 'to', ...histograms.map(h => JSON.stringify(h.name))];
|
|
82
|
-
const sums = histograms.map(h => h.bins.reduce((a, b) => a + b, 0));
|
|
83
|
-
const rows = [];
|
|
84
|
-
for (let binIndex = 0; binIndex < mostBins; binIndex++) {
|
|
85
|
-
const row = new Array(histograms.length + 3);
|
|
86
|
-
row[0] = String(binIndex);
|
|
87
|
-
if (binIndex === 0) {
|
|
88
|
-
row[1] = histograms[0].min.toFixed(3);
|
|
89
|
-
row[2] = histograms[0].min.toFixed(3);
|
|
90
|
-
}
|
|
91
|
-
else {
|
|
92
|
-
row[1] = String((binIndex - 1) * histograms[0].binSize + histograms[0].min);
|
|
93
|
-
row[2] = String((binIndex) * histograms[0].binSize + histograms[0].min);
|
|
94
|
-
}
|
|
95
|
-
// fill remaining columns
|
|
96
|
-
writeRoResultsForHistograms(histograms, binIndex, row, countAsDensity, sums);
|
|
97
|
-
rows.push(row);
|
|
98
|
-
}
|
|
99
|
-
return {
|
|
100
|
-
header: header,
|
|
101
|
-
rows: rows
|
|
102
|
-
};
|
|
103
|
-
}
|
|
104
|
-
exports.histograms2table = histograms2table;
|
|
105
|
-
function guardForLargestBinSize(histograms) {
|
|
106
|
-
const first = histograms[0];
|
|
107
|
-
let mostBins = first.bins.length;
|
|
108
|
-
for (let i = 1; i < histograms.length; i++) {
|
|
109
|
-
(0, assert_1.guard)(histograms[i].binSize === first.binSize, `histograms must have the same bin-size, but ${histograms[i].name} has ${histograms[i].binSize} instead of ${first.binSize}`);
|
|
110
|
-
if (histograms[i].bins.length > mostBins) {
|
|
111
|
-
mostBins = histograms[i].bins.length;
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
return mostBins;
|
|
115
|
-
}
|
|
116
|
-
function writeRoResultsForHistograms(histograms, binIndex, row, countAsDensity, sums) {
|
|
117
|
-
for (let j = 0; j < histograms.length; j++) {
|
|
118
|
-
const bins = histograms[j].bins;
|
|
119
|
-
// does not have to be performant...
|
|
120
|
-
if (binIndex >= bins.length) {
|
|
121
|
-
row[j + 3] = '0'; /* in a histogram, 0 is the best default value for bins that are not present -- no value appeared in the corresponding bin */
|
|
122
|
-
}
|
|
123
|
-
else {
|
|
124
|
-
row[j + 3] = String(countAsDensity ? bins[binIndex] / sums[j] : bins[binIndex]);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
//# sourceMappingURL=histogram.js.map
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
-
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
-
};
|
|
16
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
__exportStar(require("./clusterer"), exports);
|
|
18
|
-
__exportStar(require("./post-process-output"), exports);
|
|
19
|
-
__exportStar(require("./histogram"), exports);
|
|
20
|
-
__exportStar(require("./file-based-count"), exports);
|
|
21
|
-
//# sourceMappingURL=index.js.map
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import type { FeatureSelection } from '../../../../statistics';
|
|
2
|
-
import type { ClusterReport } from './clusterer';
|
|
3
|
-
/**
|
|
4
|
-
* Post process the collections in a given folder, reducing them in a memory preserving way.
|
|
5
|
-
*
|
|
6
|
-
* @param filepath - Path to the root file of the data collection like `statistics-out/top-2023-01-01-00-00-00/`
|
|
7
|
-
* @param features - Collection of features to post process, expects corresponding folders to exist
|
|
8
|
-
*
|
|
9
|
-
* @returns non-aggregated reports for each sub-key of each feature
|
|
10
|
-
*/
|
|
11
|
-
export declare function postProcessFeatureFolder(filepath: string, features: FeatureSelection): ClusterReport[];
|
|
12
|
-
/**
|
|
13
|
-
* Prints the report to the console, but limits the output to the `limit` entries with the highest counts.
|
|
14
|
-
* The names of these entries (like `->`) are returned, so they can be used to filter the following histograms.
|
|
15
|
-
*/
|
|
16
|
-
export declare function printClusterReport(report: ClusterReport, limit?: number): string[];
|
|
@@ -1,103 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.printClusterReport = exports.postProcessFeatureFolder = void 0;
|
|
7
|
-
const statistics_1 = require("../../../../statistics");
|
|
8
|
-
const path_1 = __importDefault(require("path"));
|
|
9
|
-
const log_1 = require("../../../log");
|
|
10
|
-
const fs_1 = __importDefault(require("fs"));
|
|
11
|
-
const clusterer_1 = require("./clusterer");
|
|
12
|
-
const r_bridge_1 = require("../../../../r-bridge");
|
|
13
|
-
const defaultmap_1 = require("../../../defaultmap");
|
|
14
|
-
/**
|
|
15
|
-
* Post process the collections in a given folder, reducing them in a memory preserving way.
|
|
16
|
-
*
|
|
17
|
-
* @param filepath - Path to the root file of the data collection like `statistics-out/top-2023-01-01-00-00-00/`
|
|
18
|
-
* @param features - Collection of features to post process, expects corresponding folders to exist
|
|
19
|
-
*
|
|
20
|
-
* @returns non-aggregated reports for each sub-key of each feature
|
|
21
|
-
*/
|
|
22
|
-
function postProcessFeatureFolder(filepath, features) {
|
|
23
|
-
if (!fs_1.default.existsSync(filepath)) {
|
|
24
|
-
log_1.log.warn(`Folder for ${filepath} does not exist, skipping post processing`);
|
|
25
|
-
return [];
|
|
26
|
-
}
|
|
27
|
-
const results = [];
|
|
28
|
-
for (const feature of features) {
|
|
29
|
-
const result = processFeatureFolder(filepath, feature);
|
|
30
|
-
if (result.length > 0) {
|
|
31
|
-
results.push(...result);
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
return results;
|
|
35
|
-
}
|
|
36
|
-
exports.postProcessFeatureFolder = postProcessFeatureFolder;
|
|
37
|
-
/**
|
|
38
|
-
* Process a single feature folder like `Assignments/`
|
|
39
|
-
*
|
|
40
|
-
* @param filepath - Same as the input to {@link postProcessFeatureFolder}
|
|
41
|
-
* @param feature - The (single) feature to process
|
|
42
|
-
*/
|
|
43
|
-
function processFeatureFolder(filepath, feature) {
|
|
44
|
-
const featureInfo = statistics_1.ALL_FEATURES[feature];
|
|
45
|
-
const targetPath = path_1.default.join(filepath, featureInfo.name);
|
|
46
|
-
if (!fs_1.default.existsSync(targetPath)) {
|
|
47
|
-
log_1.log.warn(`Folder for ${feature} does not exist at ${targetPath} skipping post processing of this feature`);
|
|
48
|
-
return [];
|
|
49
|
-
}
|
|
50
|
-
log_1.log.info(`Processing ${feature} at ${targetPath}`);
|
|
51
|
-
const contextIdMap = new defaultmap_1.DefaultMap((0, r_bridge_1.deterministicCountingIdGenerator)());
|
|
52
|
-
const featureSubKeys = Object.keys(featureInfo.initialValue);
|
|
53
|
-
const reports = [];
|
|
54
|
-
for (const subKey of featureSubKeys) {
|
|
55
|
-
const value = processFeatureSubKey(targetPath, subKey, contextIdMap);
|
|
56
|
-
if (value !== undefined) {
|
|
57
|
-
reports.push(value);
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
return reports;
|
|
61
|
-
}
|
|
62
|
-
function processFeatureSubKey(featurePath, subKey, contextIdMap) {
|
|
63
|
-
const targetPath = path_1.default.join(featurePath, `${subKey}${statistics_1.defaultStatisticsFileSuffix}`);
|
|
64
|
-
if (!fs_1.default.existsSync(targetPath)) {
|
|
65
|
-
log_1.log.warn(`Folder for ${subKey} does not exist at ${targetPath} skipping post processing of this key`);
|
|
66
|
-
return undefined;
|
|
67
|
-
}
|
|
68
|
-
return (0, clusterer_1.clusterStatisticsOutput)(targetPath, contextIdMap);
|
|
69
|
-
}
|
|
70
|
-
/**
|
|
71
|
-
* Prints the report to the console, but limits the output to the `limit` entries with the highest counts.
|
|
72
|
-
* The names of these entries (like `->`) are returned, so they can be used to filter the following histograms.
|
|
73
|
-
*/
|
|
74
|
-
function printClusterReport(report, limit = 1000) {
|
|
75
|
-
console.log('\n\n\n');
|
|
76
|
-
console.log(report.filepath);
|
|
77
|
-
const shortStats = [...report.valueInfoMap.entries()].map(([name, values]) => {
|
|
78
|
-
return {
|
|
79
|
-
name,
|
|
80
|
-
count: [...values.values()].reduce((a, b) => a + b, 0),
|
|
81
|
-
unique: values.size()
|
|
82
|
-
};
|
|
83
|
-
}).sort((a, b) => b.count - a.count).slice(0, limit);
|
|
84
|
-
const { longestName, longestCount, longestUnique } = shortStats.reduce((acc, { name, count, unique }) => {
|
|
85
|
-
return {
|
|
86
|
-
longestName: Math.max(acc.longestName, name.length),
|
|
87
|
-
longestCount: Math.max(acc.longestCount, count.toLocaleString().length),
|
|
88
|
-
longestUnique: Math.max(acc.longestUnique, unique.toLocaleString().length),
|
|
89
|
-
};
|
|
90
|
-
}, { longestName: 0, longestCount: 0, longestUnique: 0 });
|
|
91
|
-
for (const { name, count, unique } of shortStats) {
|
|
92
|
-
const strId = `${name}`.padEnd(longestName, ' ');
|
|
93
|
-
const strCount = count.toLocaleString().padStart(longestCount, ' ');
|
|
94
|
-
const strUnique = unique.toLocaleString().padStart(longestUnique, ' ');
|
|
95
|
-
const uniqueSuffix = `\t (${strUnique} ${statistics_1.formatter.format('unique', { color: 7 /* Colors.White */, effect: statistics_1.ColorEffect.Foreground })})`;
|
|
96
|
-
console.log(`\t${statistics_1.formatter.format(strId, { style: 1 /* FontStyles.Bold */ })}\t ${strCount} ` +
|
|
97
|
-
`${statistics_1.formatter.format('total', { color: 7 /* Colors.White */, effect: statistics_1.ColorEffect.Foreground })}`
|
|
98
|
-
+ (count !== unique ? uniqueSuffix : ''));
|
|
99
|
-
}
|
|
100
|
-
return shortStats.map(({ name }) => name);
|
|
101
|
-
}
|
|
102
|
-
exports.printClusterReport = printClusterReport;
|
|
103
|
-
//# sourceMappingURL=post-process-output.js.map
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import type { CommonSummarizerConfiguration } from '../../summarizer';
|
|
2
|
-
import type { StatisticsSummarizerConfiguration } from '../summarizer';
|
|
3
|
-
/**
|
|
4
|
-
* Post process the collections in a given folder, retrieving the final summaries.
|
|
5
|
-
*
|
|
6
|
-
* @param logger - The logger to use for outputs
|
|
7
|
-
* @param filepath - Path to the root file of the data collection (contains all the archives)
|
|
8
|
-
* @param config - Configuration of the summarizer
|
|
9
|
-
* @param outputPath - The final outputPath to write the result to (may differ from the configured root folder)
|
|
10
|
-
*/
|
|
11
|
-
export declare function postProcessFeatureFolder(logger: CommonSummarizerConfiguration['logger'], filepath: string, config: StatisticsSummarizerConfiguration, outputPath: string): void;
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.postProcessFeatureFolder = void 0;
|
|
7
|
-
const fs_1 = __importDefault(require("fs"));
|
|
8
|
-
const path_1 = __importDefault(require("path"));
|
|
9
|
-
const statistics_1 = require("../../../../statistics");
|
|
10
|
-
const files_1 = require("../../../files");
|
|
11
|
-
const assert_1 = require("../../../assert");
|
|
12
|
-
const time_1 = require("../../../time");
|
|
13
|
-
const data_1 = require("../../benchmark/data");
|
|
14
|
-
const process_1 = require("../../benchmark/first-phase/process");
|
|
15
|
-
const arrays_1 = require("../../../arrays");
|
|
16
|
-
function postProcessFeatures(config, filepath, outputPath, logger, metaFeatureInformation) {
|
|
17
|
-
for (const featureName of config.featuresToUse) {
|
|
18
|
-
const featureInfo = statistics_1.ALL_FEATURES[featureName];
|
|
19
|
-
const targetPath = path_1.default.join(filepath, featureInfo.name);
|
|
20
|
-
const targetFeature = path_1.default.join(outputPath, featureInfo.name);
|
|
21
|
-
if (!featureInfo.postProcess) {
|
|
22
|
-
logger(` Skipping post processing of ${featureName} as no post processing behavior is defined`);
|
|
23
|
-
continue;
|
|
24
|
-
}
|
|
25
|
-
logger(` Post processing of ${featureName}...`);
|
|
26
|
-
if (!fs_1.default.existsSync(targetFeature)) {
|
|
27
|
-
fs_1.default.mkdirSync(targetFeature, { recursive: true });
|
|
28
|
-
}
|
|
29
|
-
if (global.gc) {
|
|
30
|
-
logger(` [${(0, time_1.date2string)(new Date())}] Running garbage collection (--expose-gc)`);
|
|
31
|
-
global.gc();
|
|
32
|
-
}
|
|
33
|
-
featureInfo.postProcess(targetPath, metaFeatureInformation, targetFeature, config);
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
function postProcessMeta(config, filepath, outputPath, logger, metaFeatureInformation) {
|
|
37
|
-
const fileStatisticsSummary = {
|
|
38
|
-
successfulParsed: [],
|
|
39
|
-
processingTimeMs: [],
|
|
40
|
-
failedRequests: [],
|
|
41
|
-
// min lengths of 1 etc. could come from different line endings
|
|
42
|
-
lines: [],
|
|
43
|
-
characters: [],
|
|
44
|
-
numberOfNormalizedNodes: []
|
|
45
|
-
};
|
|
46
|
-
if (!fs_1.default.existsSync(path_1.default.join(outputPath, 'meta'))) {
|
|
47
|
-
fs_1.default.mkdirSync(path_1.default.join(outputPath, 'meta'), { recursive: true });
|
|
48
|
-
}
|
|
49
|
-
const out = fs_1.default.createWriteStream(path_1.default.join(outputPath, 'meta', 'stats.csv'));
|
|
50
|
-
out.write(`file,successfulParsed,${(0, data_1.summarizedMeasurement2CsvHeader)('processing')},failedRequests,${(0, data_1.summarizedMeasurement2CsvHeader)('line-length')},${(0, data_1.summarizedMeasurement2CsvHeader)('lines')},${(0, data_1.summarizedMeasurement2CsvHeader)('characters')},numberOfNormalizedNodes\n`);
|
|
51
|
-
for (const [file, info] of metaFeatureInformation) {
|
|
52
|
-
// we could retrieve these by summing later as well :thinking: however, this makes it more explicit
|
|
53
|
-
const characters = (0, arrays_1.sum)(info.stats.lines[0]);
|
|
54
|
-
out.write(`${JSON.stringify(file)},${info.stats.successfulParsed},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(info.stats.processingTimeMs))},`
|
|
55
|
-
+ `${info.stats.failedRequests.length},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(info.stats.lines[0]))},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)([info.stats.lines[0].length]))},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)([characters]))},${info.stats.numberOfNormalizedNodes[0]}\n`);
|
|
56
|
-
fileStatisticsSummary.successfulParsed.push(info.stats.successfulParsed);
|
|
57
|
-
fileStatisticsSummary.processingTimeMs.push(...info.stats.processingTimeMs);
|
|
58
|
-
fileStatisticsSummary.failedRequests.push(info.stats.failedRequests.length);
|
|
59
|
-
fileStatisticsSummary.lines.push(info.stats.lines[0]);
|
|
60
|
-
fileStatisticsSummary.characters.push(characters);
|
|
61
|
-
fileStatisticsSummary.numberOfNormalizedNodes.push(info.stats.numberOfNormalizedNodes[0]);
|
|
62
|
-
}
|
|
63
|
-
out.write(`all,${(0, arrays_1.sum)(fileStatisticsSummary.successfulParsed)},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(fileStatisticsSummary.processingTimeMs))},`
|
|
64
|
-
+ `${(0, arrays_1.sum)(fileStatisticsSummary.failedRequests)},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(fileStatisticsSummary.lines.flat()))},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(fileStatisticsSummary.lines.map(l => l.length)))},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(fileStatisticsSummary.characters))},${(0, arrays_1.sum)(fileStatisticsSummary.numberOfNormalizedNodes)}\n`);
|
|
65
|
-
out.close();
|
|
66
|
-
}
|
|
67
|
-
/**
|
|
68
|
-
* Post process the collections in a given folder, retrieving the final summaries.
|
|
69
|
-
*
|
|
70
|
-
* @param logger - The logger to use for outputs
|
|
71
|
-
* @param filepath - Path to the root file of the data collection (contains all the archives)
|
|
72
|
-
* @param config - Configuration of the summarizer
|
|
73
|
-
* @param outputPath - The final outputPath to write the result to (may differ from the configured root folder)
|
|
74
|
-
*/
|
|
75
|
-
function postProcessFeatureFolder(logger, filepath, config, outputPath) {
|
|
76
|
-
if (!fs_1.default.existsSync(filepath)) {
|
|
77
|
-
logger(` Folder for ${filepath} does not exist, skipping post processing`);
|
|
78
|
-
return;
|
|
79
|
-
}
|
|
80
|
-
if (!fs_1.default.existsSync(outputPath)) {
|
|
81
|
-
fs_1.default.mkdirSync(outputPath, { recursive: true });
|
|
82
|
-
}
|
|
83
|
-
const metaFeatureInformation = extractMetaInformationFrom(logger, path_1.default.join(filepath, 'meta', 'features.txt'), path_1.default.join(filepath, 'meta', 'stats.txt'));
|
|
84
|
-
postProcessFeatures(config, filepath, outputPath, logger, metaFeatureInformation);
|
|
85
|
-
postProcessMeta(config, filepath, outputPath, logger, metaFeatureInformation);
|
|
86
|
-
}
|
|
87
|
-
exports.postProcessFeatureFolder = postProcessFeatureFolder;
|
|
88
|
-
function extractMetaInformationFrom(logger, metaFeaturesPath, metaStatsPath) {
|
|
89
|
-
const storage = new Map();
|
|
90
|
-
logger(` [${(0, time_1.date2string)(new Date())}] Collect feature statistics`);
|
|
91
|
-
(0, files_1.readLineByLineSync)(metaFeaturesPath, (line, lineNumber) => {
|
|
92
|
-
if (line.length === 0) {
|
|
93
|
-
return;
|
|
94
|
-
}
|
|
95
|
-
if (lineNumber % 2_500 === 0) {
|
|
96
|
-
logger(` [${(0, time_1.date2string)(new Date())}] ${lineNumber} meta feature lines processed`);
|
|
97
|
-
}
|
|
98
|
-
const meta = JSON.parse(line.toString());
|
|
99
|
-
storage.set(meta.file, meta.content);
|
|
100
|
-
});
|
|
101
|
-
logger(` [${(0, time_1.date2string)(new Date())}] Collect meta statistics`);
|
|
102
|
-
(0, files_1.readLineByLineSync)(metaStatsPath, (line, lineNumber) => {
|
|
103
|
-
if (line.length === 0) {
|
|
104
|
-
return;
|
|
105
|
-
}
|
|
106
|
-
if (lineNumber % 2_500 === 0) {
|
|
107
|
-
logger(` [${(0, time_1.date2string)(new Date())}] ${lineNumber} meta statistics lines processed`);
|
|
108
|
-
}
|
|
109
|
-
const meta = JSON.parse(line.toString());
|
|
110
|
-
const existing = storage.get(meta.file);
|
|
111
|
-
(0, assert_1.guard)(existing !== undefined, () => `Expected to find meta information for ${meta.file} in line ${lineNumber + 1} of ${metaFeaturesPath}`);
|
|
112
|
-
existing.stats = meta.content;
|
|
113
|
-
});
|
|
114
|
-
logger(` [${(0, time_1.date2string)(new Date())}] Done collecting meta information`);
|
|
115
|
-
return storage;
|
|
116
|
-
}
|
|
117
|
-
//# sourceMappingURL=process.js.map
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import type { CommonSummarizerConfiguration } from '../summarizer';
|
|
2
|
-
import { Summarizer } from '../summarizer';
|
|
3
|
-
import type { FeatureSelection } from '../../../statistics';
|
|
4
|
-
export interface StatisticsSummarizerConfiguration extends CommonSummarizerConfiguration {
|
|
5
|
-
/**
|
|
6
|
-
* The input path to read all zips from
|
|
7
|
-
*/
|
|
8
|
-
inputPath: string;
|
|
9
|
-
/**
|
|
10
|
-
* Features to extract the summaries for
|
|
11
|
-
*/
|
|
12
|
-
featuresToUse: FeatureSelection;
|
|
13
|
-
/**
|
|
14
|
-
* Path for the intermediate results of the preparation phase
|
|
15
|
-
*/
|
|
16
|
-
intermediateOutputPath: string;
|
|
17
|
-
/**
|
|
18
|
-
* Path for the final results of the summarization phase
|
|
19
|
-
*/
|
|
20
|
-
outputPath: string;
|
|
21
|
-
/**
|
|
22
|
-
* How many folders to skip to find the project root
|
|
23
|
-
*/
|
|
24
|
-
projectSkip: number;
|
|
25
|
-
}
|
|
26
|
-
export declare const statisticsFileNameRegex: RegExp;
|
|
27
|
-
export declare class StatisticsSummarizer extends Summarizer<unknown, StatisticsSummarizerConfiguration> {
|
|
28
|
-
constructor(config: StatisticsSummarizerConfiguration);
|
|
29
|
-
private removeIfExists;
|
|
30
|
-
/**
|
|
31
|
-
* The preparation phase essentially merges all files into one by just attaching lines together!
|
|
32
|
-
*/
|
|
33
|
-
preparationPhase(useTypeClassification: boolean): Promise<void>;
|
|
34
|
-
summarizePhase(): Promise<unknown>;
|
|
35
|
-
}
|