@eagleoutice/flowr 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (300) hide show
  1. package/README.md +3 -3
  2. package/abstract-interpretation/processor.js +9 -3
  3. package/config.d.ts +16 -0
  4. package/config.js +75 -0
  5. package/core/input.d.ts +1 -1
  6. package/core/output.d.ts +1 -2
  7. package/core/print/parse-printer.d.ts +1 -2
  8. package/core/print/parse-printer.js +6 -4
  9. package/core/print/slice-diff-ansi.js +7 -7
  10. package/core/slicer.js +4 -8
  11. package/core/steps.d.ts +355 -31
  12. package/core/steps.js +7 -14
  13. package/dataflow/environments/environment.js +8 -0
  14. package/dataflow/environments/register.js +1 -0
  15. package/dataflow/extractor.d.ts +2 -2
  16. package/dataflow/extractor.js +10 -2
  17. package/dataflow/internal/process/functions/function-call.js +7 -1
  18. package/dataflow/internal/process/functions/source.d.ts +8 -0
  19. package/dataflow/internal/process/functions/source.js +81 -0
  20. package/dataflow/processor.d.ts +10 -1
  21. package/index.d.ts +0 -2
  22. package/index.js +0 -2
  23. package/package.json +75 -202
  24. package/r-bridge/lang-4.x/ast/index.d.ts +1 -0
  25. package/r-bridge/lang-4.x/ast/index.js +3 -0
  26. package/r-bridge/lang-4.x/ast/model/processing/decorate.d.ts +2 -0
  27. package/r-bridge/lang-4.x/ast/model/processing/decorate.js +6 -1
  28. package/r-bridge/lang-4.x/ast/parser/json/format.d.ts +14 -0
  29. package/r-bridge/lang-4.x/ast/parser/json/format.js +26 -0
  30. package/r-bridge/lang-4.x/ast/parser/json/parser.d.ts +7 -0
  31. package/r-bridge/lang-4.x/ast/parser/json/parser.js +57 -0
  32. package/r-bridge/lang-4.x/ast/parser/xml/data.d.ts +0 -3
  33. package/r-bridge/lang-4.x/ast/parser/xml/index.d.ts +0 -2
  34. package/r-bridge/lang-4.x/ast/parser/xml/index.js +0 -2
  35. package/r-bridge/lang-4.x/ast/parser/xml/input-format.d.ts +5 -1
  36. package/r-bridge/lang-4.x/ast/parser/xml/input-format.js +7 -10
  37. package/r-bridge/lang-4.x/ast/parser/xml/internal/access.js +2 -2
  38. package/r-bridge/lang-4.x/ast/parser/xml/internal/control/if-then-else.js +1 -1
  39. package/r-bridge/lang-4.x/ast/parser/xml/internal/control/if-then.js +2 -2
  40. package/r-bridge/lang-4.x/ast/parser/xml/internal/expression/expression.js +4 -4
  41. package/r-bridge/lang-4.x/ast/parser/xml/internal/functions/argument.js +2 -2
  42. package/r-bridge/lang-4.x/ast/parser/xml/internal/functions/call.js +4 -4
  43. package/r-bridge/lang-4.x/ast/parser/xml/internal/functions/definition.js +2 -2
  44. package/r-bridge/lang-4.x/ast/parser/xml/internal/functions/parameter.js +2 -2
  45. package/r-bridge/lang-4.x/ast/parser/xml/internal/index.d.ts +0 -1
  46. package/r-bridge/lang-4.x/ast/parser/xml/internal/index.js +0 -1
  47. package/r-bridge/lang-4.x/ast/parser/xml/internal/loops/break.js +2 -2
  48. package/r-bridge/lang-4.x/ast/parser/xml/internal/loops/for.js +3 -6
  49. package/r-bridge/lang-4.x/ast/parser/xml/internal/loops/next.js +2 -2
  50. package/r-bridge/lang-4.x/ast/parser/xml/internal/loops/repeat.js +2 -2
  51. package/r-bridge/lang-4.x/ast/parser/xml/internal/loops/while.js +2 -2
  52. package/r-bridge/lang-4.x/ast/parser/xml/internal/meta.d.ts +6 -11
  53. package/r-bridge/lang-4.x/ast/parser/xml/internal/meta.js +15 -23
  54. package/r-bridge/lang-4.x/ast/parser/xml/internal/operators/binary.js +6 -6
  55. package/r-bridge/lang-4.x/ast/parser/xml/internal/operators/unary.js +3 -3
  56. package/r-bridge/lang-4.x/ast/parser/xml/internal/other/comment.js +2 -2
  57. package/r-bridge/lang-4.x/ast/parser/xml/internal/other/line-directive.js +2 -2
  58. package/r-bridge/lang-4.x/ast/parser/xml/internal/structure/elements.js +3 -3
  59. package/r-bridge/lang-4.x/ast/parser/xml/internal/structure/root.js +3 -4
  60. package/r-bridge/lang-4.x/ast/parser/xml/internal/structure/single-element.js +2 -2
  61. package/r-bridge/lang-4.x/ast/parser/xml/internal/values/number.js +2 -2
  62. package/r-bridge/lang-4.x/ast/parser/xml/internal/values/string.js +2 -2
  63. package/r-bridge/lang-4.x/ast/parser/xml/internal/values/symbol.js +4 -4
  64. package/r-bridge/lang-4.x/values.d.ts +0 -1
  65. package/r-bridge/lang-4.x/values.js +14 -6
  66. package/r-bridge/retriever.d.ts +25 -21
  67. package/r-bridge/retriever.js +73 -23
  68. package/r-bridge/shell-executor.d.ts +3 -17
  69. package/r-bridge/shell-executor.js +9 -78
  70. package/r-bridge/shell.d.ts +5 -27
  71. package/r-bridge/shell.js +31 -92
  72. package/{statistics/output → util}/ansi.js +1 -1
  73. package/util/args.d.ts +8 -4
  74. package/util/args.js +11 -4
  75. package/util/cfg/visitor.js +1 -1
  76. package/util/files.d.ts +6 -0
  77. package/util/files.js +11 -1
  78. package/util/log.js +3 -0
  79. package/util/{summarizer/summarizer.d.ts → summarizer.d.ts} +15 -1
  80. package/util/summarizer.js +37 -0
  81. package/util/version.d.ts +2 -0
  82. package/util/version.js +10 -0
  83. package/benchmark/index.d.ts +0 -3
  84. package/benchmark/index.js +0 -20
  85. package/benchmark/slicer.d.ts +0 -101
  86. package/benchmark/slicer.js +0 -225
  87. package/benchmark/stats/index.d.ts +0 -10
  88. package/benchmark/stats/index.js +0 -27
  89. package/benchmark/stats/print.d.ts +0 -7
  90. package/benchmark/stats/print.js +0 -157
  91. package/benchmark/stats/stats.d.ts +0 -41
  92. package/benchmark/stats/stats.js +0 -6
  93. package/benchmark/stopwatch.d.ts +0 -35
  94. package/benchmark/stopwatch.js +0 -79
  95. package/cli/benchmark-app.d.ts +0 -9
  96. package/cli/benchmark-app.js +0 -52
  97. package/cli/benchmark-helper-app.d.ts +0 -7
  98. package/cli/benchmark-helper-app.js +0 -60
  99. package/cli/common/features.d.ts +0 -3
  100. package/cli/common/features.js +0 -30
  101. package/cli/common/index.d.ts +0 -2
  102. package/cli/common/index.js +0 -19
  103. package/cli/common/options.d.ts +0 -20
  104. package/cli/common/options.js +0 -82
  105. package/cli/common/script.d.ts +0 -21
  106. package/cli/common/script.js +0 -61
  107. package/cli/common/scripts-info.d.ts +0 -24
  108. package/cli/common/scripts-info.js +0 -69
  109. package/cli/export-quads-app.d.ts +0 -7
  110. package/cli/export-quads-app.js +0 -46
  111. package/cli/repl/commands/cfg.d.ts +0 -3
  112. package/cli/repl/commands/cfg.js +0 -37
  113. package/cli/repl/commands/commands.d.ts +0 -11
  114. package/cli/repl/commands/commands.js +0 -103
  115. package/cli/repl/commands/dataflow.d.ts +0 -3
  116. package/cli/repl/commands/dataflow.js +0 -34
  117. package/cli/repl/commands/execute.d.ts +0 -4
  118. package/cli/repl/commands/execute.js +0 -27
  119. package/cli/repl/commands/index.d.ts +0 -2
  120. package/cli/repl/commands/index.js +0 -19
  121. package/cli/repl/commands/main.d.ts +0 -39
  122. package/cli/repl/commands/main.js +0 -14
  123. package/cli/repl/commands/normalize.d.ts +0 -3
  124. package/cli/repl/commands/normalize.js +0 -34
  125. package/cli/repl/commands/parse.d.ts +0 -2
  126. package/cli/repl/commands/parse.js +0 -109
  127. package/cli/repl/commands/quit.d.ts +0 -2
  128. package/cli/repl/commands/quit.js +0 -12
  129. package/cli/repl/commands/version.d.ts +0 -16
  130. package/cli/repl/commands/version.js +0 -33
  131. package/cli/repl/core.d.ts +0 -39
  132. package/cli/repl/core.js +0 -116
  133. package/cli/repl/execute.d.ts +0 -28
  134. package/cli/repl/execute.js +0 -79
  135. package/cli/repl/index.d.ts +0 -5
  136. package/cli/repl/index.js +0 -22
  137. package/cli/repl/prompt.d.ts +0 -2
  138. package/cli/repl/prompt.js +0 -9
  139. package/cli/repl/server/connection.d.ts +0 -21
  140. package/cli/repl/server/connection.js +0 -218
  141. package/cli/repl/server/messages/analysis.d.ts +0 -71
  142. package/cli/repl/server/messages/analysis.js +0 -21
  143. package/cli/repl/server/messages/error.d.ts +0 -11
  144. package/cli/repl/server/messages/error.js +0 -3
  145. package/cli/repl/server/messages/hello.d.ts +0 -20
  146. package/cli/repl/server/messages/hello.js +0 -3
  147. package/cli/repl/server/messages/index.d.ts +0 -1
  148. package/cli/repl/server/messages/index.js +0 -3
  149. package/cli/repl/server/messages/messages.d.ts +0 -35
  150. package/cli/repl/server/messages/messages.js +0 -40
  151. package/cli/repl/server/messages/repl.d.ts +0 -33
  152. package/cli/repl/server/messages/repl.js +0 -37
  153. package/cli/repl/server/messages/slice.d.ts +0 -25
  154. package/cli/repl/server/messages/slice.js +0 -37
  155. package/cli/repl/server/net.d.ts +0 -49
  156. package/cli/repl/server/net.js +0 -63
  157. package/cli/repl/server/send.d.ts +0 -4
  158. package/cli/repl/server/send.js +0 -18
  159. package/cli/repl/server/server.d.ts +0 -20
  160. package/cli/repl/server/server.js +0 -66
  161. package/cli/repl/server/validate.d.ts +0 -15
  162. package/cli/repl/server/validate.js +0 -34
  163. package/cli/slicer-app.d.ts +0 -11
  164. package/cli/slicer-app.js +0 -81
  165. package/cli/statistics-app.d.ts +0 -11
  166. package/cli/statistics-app.js +0 -98
  167. package/cli/statistics-helper-app.d.ts +0 -11
  168. package/cli/statistics-helper-app.js +0 -83
  169. package/cli/summarizer-app.d.ts +0 -18
  170. package/cli/summarizer-app.js +0 -67
  171. package/flowr.d.ts +0 -27
  172. package/flowr.js +0 -137
  173. package/r-bridge/lang-4.x/ast/parser/xml/config.d.ts +0 -25
  174. package/r-bridge/lang-4.x/ast/parser/xml/config.js +0 -16
  175. package/r-bridge/lang-4.x/ast/parser/xml/internal/xml-to-json.d.ts +0 -9
  176. package/r-bridge/lang-4.x/ast/parser/xml/internal/xml-to-json.js +0 -51
  177. package/r-bridge/lang-4.x/ast/parser/xml/parser.d.ts +0 -17
  178. package/r-bridge/lang-4.x/ast/parser/xml/parser.js +0 -30
  179. package/statistics/features/common-syntax-probability.d.ts +0 -31
  180. package/statistics/features/common-syntax-probability.js +0 -156
  181. package/statistics/features/feature.d.ts +0 -175
  182. package/statistics/features/feature.js +0 -30
  183. package/statistics/features/index.d.ts +0 -1
  184. package/statistics/features/index.js +0 -18
  185. package/statistics/features/post-processing.d.ts +0 -12
  186. package/statistics/features/post-processing.js +0 -21
  187. package/statistics/features/supported/assignments/assignments.d.ts +0 -11
  188. package/statistics/features/supported/assignments/assignments.js +0 -53
  189. package/statistics/features/supported/assignments/index.d.ts +0 -1
  190. package/statistics/features/supported/assignments/index.js +0 -6
  191. package/statistics/features/supported/assignments/post-process.d.ts +0 -3
  192. package/statistics/features/supported/assignments/post-process.js +0 -125
  193. package/statistics/features/supported/comments/comments.d.ts +0 -18
  194. package/statistics/features/supported/comments/comments.js +0 -133
  195. package/statistics/features/supported/comments/index.d.ts +0 -1
  196. package/statistics/features/supported/comments/index.js +0 -6
  197. package/statistics/features/supported/comments/post-process.d.ts +0 -3
  198. package/statistics/features/supported/comments/post-process.js +0 -50
  199. package/statistics/features/supported/control-flow/control-flow.d.ts +0 -17
  200. package/statistics/features/supported/control-flow/control-flow.js +0 -67
  201. package/statistics/features/supported/control-flow/index.d.ts +0 -1
  202. package/statistics/features/supported/control-flow/index.js +0 -6
  203. package/statistics/features/supported/control-flow/post-process.d.ts +0 -3
  204. package/statistics/features/supported/control-flow/post-process.js +0 -65
  205. package/statistics/features/supported/data-access/data-access.d.ts +0 -15
  206. package/statistics/features/supported/data-access/data-access.js +0 -118
  207. package/statistics/features/supported/data-access/index.d.ts +0 -1
  208. package/statistics/features/supported/data-access/index.js +0 -6
  209. package/statistics/features/supported/data-access/post-process.d.ts +0 -3
  210. package/statistics/features/supported/data-access/post-process.js +0 -107
  211. package/statistics/features/supported/defined-functions/defined-functions.d.ts +0 -35
  212. package/statistics/features/supported/defined-functions/defined-functions.js +0 -139
  213. package/statistics/features/supported/defined-functions/index.d.ts +0 -1
  214. package/statistics/features/supported/defined-functions/index.js +0 -6
  215. package/statistics/features/supported/defined-functions/post-process.d.ts +0 -6
  216. package/statistics/features/supported/defined-functions/post-process.js +0 -177
  217. package/statistics/features/supported/expression-list/expression-list.d.ts +0 -9
  218. package/statistics/features/supported/expression-list/expression-list.js +0 -36
  219. package/statistics/features/supported/expression-list/index.d.ts +0 -1
  220. package/statistics/features/supported/expression-list/index.js +0 -6
  221. package/statistics/features/supported/expression-list/post-process.d.ts +0 -3
  222. package/statistics/features/supported/expression-list/post-process.js +0 -44
  223. package/statistics/features/supported/index.d.ts +0 -10
  224. package/statistics/features/supported/index.js +0 -27
  225. package/statistics/features/supported/loops/index.d.ts +0 -1
  226. package/statistics/features/supported/loops/index.js +0 -6
  227. package/statistics/features/supported/loops/loops.d.ts +0 -20
  228. package/statistics/features/supported/loops/loops.js +0 -79
  229. package/statistics/features/supported/loops/post-process.d.ts +0 -3
  230. package/statistics/features/supported/loops/post-process.js +0 -72
  231. package/statistics/features/supported/used-functions/index.d.ts +0 -1
  232. package/statistics/features/supported/used-functions/index.js +0 -6
  233. package/statistics/features/supported/used-functions/post-process.d.ts +0 -6
  234. package/statistics/features/supported/used-functions/post-process.js +0 -179
  235. package/statistics/features/supported/used-functions/used-functions.d.ts +0 -24
  236. package/statistics/features/supported/used-functions/used-functions.js +0 -95
  237. package/statistics/features/supported/used-packages/index.d.ts +0 -1
  238. package/statistics/features/supported/used-packages/index.js +0 -6
  239. package/statistics/features/supported/used-packages/post-process.d.ts +0 -3
  240. package/statistics/features/supported/used-packages/post-process.js +0 -121
  241. package/statistics/features/supported/used-packages/used-packages.d.ts +0 -16
  242. package/statistics/features/supported/used-packages/used-packages.js +0 -130
  243. package/statistics/features/supported/values/index.d.ts +0 -1
  244. package/statistics/features/supported/values/index.js +0 -6
  245. package/statistics/features/supported/values/post-process.d.ts +0 -3
  246. package/statistics/features/supported/values/post-process.js +0 -72
  247. package/statistics/features/supported/values/values.d.ts +0 -14
  248. package/statistics/features/supported/values/values.js +0 -101
  249. package/statistics/features/supported/variables/index.d.ts +0 -1
  250. package/statistics/features/supported/variables/index.js +0 -6
  251. package/statistics/features/supported/variables/post-process.d.ts +0 -9
  252. package/statistics/features/supported/variables/post-process.js +0 -122
  253. package/statistics/features/supported/variables/variables.d.ts +0 -15
  254. package/statistics/features/supported/variables/variables.js +0 -70
  255. package/statistics/index.d.ts +0 -6
  256. package/statistics/index.js +0 -24
  257. package/statistics/meta-statistics.d.ts +0 -33
  258. package/statistics/meta-statistics.js +0 -17
  259. package/statistics/output/file-provider.d.ts +0 -37
  260. package/statistics/output/file-provider.js +0 -97
  261. package/statistics/output/index.d.ts +0 -4
  262. package/statistics/output/index.js +0 -21
  263. package/statistics/output/print-stats.d.ts +0 -17
  264. package/statistics/output/print-stats.js +0 -69
  265. package/statistics/output/statistics-file.d.ts +0 -37
  266. package/statistics/output/statistics-file.js +0 -69
  267. package/statistics/statistics.d.ts +0 -24
  268. package/statistics/statistics.js +0 -109
  269. package/util/summarizer/auto-detect.d.ts +0 -2
  270. package/util/summarizer/auto-detect.js +0 -32
  271. package/util/summarizer/benchmark/data.d.ts +0 -66
  272. package/util/summarizer/benchmark/data.js +0 -13
  273. package/util/summarizer/benchmark/first-phase/input.d.ts +0 -2
  274. package/util/summarizer/benchmark/first-phase/input.js +0 -59
  275. package/util/summarizer/benchmark/first-phase/process.d.ts +0 -10
  276. package/util/summarizer/benchmark/first-phase/process.js +0 -208
  277. package/util/summarizer/benchmark/second-phase/graph.d.ts +0 -2
  278. package/util/summarizer/benchmark/second-phase/graph.js +0 -54
  279. package/util/summarizer/benchmark/second-phase/process.d.ts +0 -4
  280. package/util/summarizer/benchmark/second-phase/process.js +0 -89
  281. package/util/summarizer/benchmark/summarizer.d.ts +0 -35
  282. package/util/summarizer/benchmark/summarizer.js +0 -49
  283. package/util/summarizer/statistics/first-phase/process.d.ts +0 -6
  284. package/util/summarizer/statistics/first-phase/process.js +0 -81
  285. package/util/summarizer/statistics/post-process/clusterer.d.ts +0 -26
  286. package/util/summarizer/statistics/post-process/clusterer.js +0 -43
  287. package/util/summarizer/statistics/post-process/file-based-count.d.ts +0 -17
  288. package/util/summarizer/statistics/post-process/file-based-count.js +0 -49
  289. package/util/summarizer/statistics/post-process/histogram.d.ts +0 -59
  290. package/util/summarizer/statistics/post-process/histogram.js +0 -128
  291. package/util/summarizer/statistics/post-process/index.d.ts +0 -4
  292. package/util/summarizer/statistics/post-process/index.js +0 -21
  293. package/util/summarizer/statistics/post-process/post-process-output.d.ts +0 -16
  294. package/util/summarizer/statistics/post-process/post-process-output.js +0 -104
  295. package/util/summarizer/statistics/second-phase/process.d.ts +0 -11
  296. package/util/summarizer/statistics/second-phase/process.js +0 -117
  297. package/util/summarizer/statistics/summarizer.d.ts +0 -35
  298. package/util/summarizer/statistics/summarizer.js +0 -135
  299. package/util/summarizer/summarizer.js +0 -13
  300. /package/{statistics/output → util}/ansi.d.ts +0 -0
@@ -1,43 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.clusterStatisticsOutput = void 0;
7
- /**
8
- * Reading the given file line by line and expecting constructs of {@link StatisticsOutputFormat},
9
- * this module is responsible for identifying interesting groups of same data.
10
- *
11
- * @module
12
- */
13
- const n_readlines_1 = __importDefault(require("n-readlines"));
14
- const defaultmap_1 = require("../../../defaultmap");
15
- const r_bridge_1 = require("../../../../r-bridge");
16
- /**
17
- * Takes a statistics file like `statistics-out/top-2023-01-01-00-00-00/Assignments/assignmentOperator.txt` and clusters the values by context
18
- *
19
- * @param filepath - Filepath of the statistics file
20
- * @param contextIdMap - The id map to use, can use an existing one to reuse ids for same contexts spreading over multiple input files.
21
- * `undefined` is used for unknown contexts. This map allows us to reference contexts with a way shorter identifier (vs. the full file path).
22
- */
23
- function clusterStatisticsOutput(filepath, contextIdMap = new defaultmap_1.DefaultMap((0, r_bridge_1.deterministicCountingIdGenerator)())) {
24
- const lineReader = new n_readlines_1.default(filepath);
25
- // for each value we store the context ids it was seen in (may list the same context multiple times if more often) - this serves as a counter as well
26
- const valueInfoMap = new defaultmap_1.DefaultMap(() => new defaultmap_1.DefaultMap(() => 0));
27
- let line;
28
- // eslint-disable-next-line no-cond-assign
29
- while (line = lineReader.next()) {
30
- const json = JSON.parse(line.toString());
31
- const contextId = contextIdMap.get(json[1]);
32
- const value = valueInfoMap.get(json[0]);
33
- // step the counter accordingly
34
- value.set(contextId, value.get(contextId) + 1);
35
- }
36
- return {
37
- filepath,
38
- contextIdMap,
39
- valueInfoMap
40
- };
41
- }
42
- exports.clusterStatisticsOutput = clusterStatisticsOutput;
43
- //# sourceMappingURL=clusterer.js.map
@@ -1,17 +0,0 @@
1
- import type { ClusterReport } from './clusterer';
2
- export interface FileBasedTable {
3
- header: string[];
4
- rows: string[][];
5
- }
6
- /**
7
- * The purpose of this function is to reformat {@link ClusterReport} in way that lists file-based contributions.
8
- * E.g., "the file with id 12 contained the assignment with `<-` 3 times".
9
- * Feature Values are listed in the header.
10
- *
11
- * @param report - the report to reformat
12
- */
13
- export declare function fileBasedCount(report: ClusterReport): FileBasedTable;
14
- /**
15
- * The threshold will cap of values larger to the threshold.
16
- */
17
- export declare function writeFileBasedCountToFile(table: FileBasedTable, filepath: string): void;
@@ -1,49 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.writeFileBasedCountToFile = exports.fileBasedCount = void 0;
7
- const fs_1 = __importDefault(require("fs"));
8
- /**
9
- * The purpose of this function is to reformat {@link ClusterReport} in way that lists file-based contributions.
10
- * E.g., "the file with id 12 contained the assignment with `<-` 3 times".
11
- * Feature Values are listed in the header.
12
- *
13
- * @param report - the report to reformat
14
- */
15
- function fileBasedCount(report) {
16
- const values = report.valueInfoMap;
17
- const contexts = [...report.valueInfoMap.values()];
18
- const header = [...values.keys()].map(k => `"${k}"`);
19
- const rows = [];
20
- for (const id of report.contextIdMap.values()) {
21
- rows.push(contexts.map(c => `${c.get(id)}`));
22
- }
23
- return {
24
- header: header,
25
- rows: rows
26
- };
27
- }
28
- exports.fileBasedCount = fileBasedCount;
29
- /**
30
- * The threshold will cap of values larger to the threshold.
31
- */
32
- function writeFileBasedCountToFile(table, filepath) {
33
- const handle = fs_1.default.openSync(filepath, 'w');
34
- const header = table.header.join('\t');
35
- fs_1.default.writeSync(handle, `${header}\n`);
36
- let max = 0;
37
- function processEntry(r) {
38
- const val = Number(r);
39
- max = Math.max(val, max);
40
- return r;
41
- }
42
- for (const row of table.rows) {
43
- fs_1.default.writeSync(handle, row.map(processEntry).join('\t') + '\n');
44
- }
45
- fs_1.default.writeSync(handle, `%%% max: ${max}\n`);
46
- fs_1.default.closeSync(handle);
47
- }
48
- exports.writeFileBasedCountToFile = writeFileBasedCountToFile;
49
- //# sourceMappingURL=file-based-count.js.map
@@ -1,59 +0,0 @@
1
- import type { ClusterReport } from './clusterer';
2
- import type { Table } from '../../../files';
3
- /**
4
- * A conventional histogram (e.g., created by {@link histogramFromNumbers}).
5
- * Can be converted to a {@link Table} by {@link histograms2table}.
6
- * As described in {@link histogramFromNumbers}, there always will be a special bin for minimum.
7
- */
8
- export interface Histogram {
9
- /** A name intended for humans to know what the histogram is about. */
10
- readonly name: string;
11
- /** Values located in each bin */
12
- bins: number[];
13
- /** The configured size of each bin (stored explicitly to avoid semantic confusion with floating point arithmetic/problems with different rounding schemes) */
14
- binSize: number;
15
- /** Minimum value encountered (inclusive minimum of the underlying value range) */
16
- min: number;
17
- /** Maximum value encountered (inclusive maximum of the underlying value range) */
18
- max: number;
19
- /** Average of the included numbers */
20
- mean: number;
21
- /** Standard deviation of the included numbers */
22
- std: number;
23
- /** Median of the included numbers */
24
- median: number;
25
- }
26
- /**
27
- * Produces column-wise histogram-information based on a {@link ClusterReport}.
28
- *
29
- * Let's suppose you want histograms for the Assignments feature.
30
- * By default, for each clustered value, a histogram is produced (can be configured by `filter`).
31
- *
32
- * @param report - The report to collect histogram information from
33
- * @param binSize - Size of each bin (see {@link histogramFromNumbers} for details on why we do not specify the bin-count)
34
- * @param relateValuesToNumberOfLines - If true, each value (like `<-` appeared in file 'x' exactly `N` times) will be divided by the number of lines in the file 'x'.
35
- * @param filter - If given, only produce histograms for the given values
36
- */
37
- export declare function histogramsFromClusters(report: ClusterReport, binSize: number, relateValuesToNumberOfLines: boolean, ...filter: string[]): Histogram[];
38
- /**
39
- * Produces a histogram from a list of numbers.
40
- * Because we need to create several histograms of different datasets and want to compare them, we do not accept the
41
- * number of bins desired and calculate the bin-size from the data (via `Math.ceil((max - min + 1) / bins)`).
42
- * Instead, we require the bin-size to be given.
43
- * There *always* will be an extra bin for the minimum value.
44
- */
45
- export declare function histogramFromNumbers(name: string, binSize: number, values: number[]): Histogram;
46
- /**
47
- * Takes an array of histograms created by {@link histogramFromNumbers} and produces a CSV table from it.
48
- * They must have the same bin-size for this function to work.
49
- *
50
- * The table has the following columns:
51
- * - `bin` - The corresponding bin number
52
- * - `from` - The exclusive lower bound of the bin
53
- * - `to` - The inclusive upper bound of the bin
54
- * - a column with the name of each histogram, containing its count of values in the corresponding bin
55
- *
56
- * @param histograms - The histogram to convert (assumed to have the same ranges and bins)
57
- * @param countAsDensity - If true, the count is divided by the total number of values (individually for each histogram, similar to pgfplots `hist/density` option)
58
- */
59
- export declare function histograms2table(histograms: Histogram[], countAsDensity?: boolean): Table;
@@ -1,128 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.histograms2table = exports.histogramFromNumbers = exports.histogramsFromClusters = void 0;
7
- const defaultmap_1 = require("../../../defaultmap");
8
- const assert_1 = require("../../../assert");
9
- const bimap_1 = require("../../../bimap");
10
- const fs_1 = __importDefault(require("fs"));
11
- const benchmark_1 = require("../../../../benchmark");
12
- /**
13
- * Produces column-wise histogram-information based on a {@link ClusterReport}.
14
- *
15
- * Let's suppose you want histograms for the Assignments feature.
16
- * By default, for each clustered value, a histogram is produced (can be configured by `filter`).
17
- *
18
- * @param report - The report to collect histogram information from
19
- * @param binSize - Size of each bin (see {@link histogramFromNumbers} for details on why we do not specify the bin-count)
20
- * @param relateValuesToNumberOfLines - If true, each value (like `<-` appeared in file 'x' exactly `N` times) will be divided by the number of lines in the file 'x'.
21
- * @param filter - If given, only produce histograms for the given values
22
- */
23
- function histogramsFromClusters(report, binSize, relateValuesToNumberOfLines, ...filter) {
24
- const contexts = [...report.valueInfoMap.entries()];
25
- const filenameFromId = new bimap_1.BiMap(report.contextIdMap.entries());
26
- // first, we collect the number of appearances for each value
27
- const valueCounts = new defaultmap_1.DefaultMap(() => []);
28
- for (const id of report.contextIdMap.values()) {
29
- // calculate the number of lines within the file given by the id
30
- const filename = filenameFromId.getKey(id);
31
- (0, assert_1.guard)(filename !== undefined, `filename for id ${id} is undefined`);
32
- const numberOfLines = relateValuesToNumberOfLines ? fs_1.default.readFileSync(filename, 'utf-8').split('\n').length : 1;
33
- for (const [value, counts] of contexts) {
34
- valueCounts.get(value).push(counts.get(id) / numberOfLines);
35
- }
36
- }
37
- return [...valueCounts.entries()].map(([name, counts]) => filter.length === 0 || filter.includes(name) ? histogramFromNumbers(name, binSize, counts) : undefined).filter(assert_1.isNotUndefined);
38
- }
39
- exports.histogramsFromClusters = histogramsFromClusters;
40
- /**
41
- * Produces a histogram from a list of numbers.
42
- * Because we need to create several histograms of different datasets and want to compare them, we do not accept the
43
- * number of bins desired and calculate the bin-size from the data (via `Math.ceil((max - min + 1) / bins)`).
44
- * Instead, we require the bin-size to be given.
45
- * There *always* will be an extra bin for the minimum value.
46
- */
47
- function histogramFromNumbers(name, binSize, values) {
48
- (0, assert_1.guard)(binSize > 0, `binSize must be greater than 0, but was ${binSize}`);
49
- (0, assert_1.guard)(values.length > 0, 'values must not be empty');
50
- const summarized = (0, benchmark_1.summarizeMeasurement)(values);
51
- const numberOfBins = Math.ceil((summarized.max - summarized.min + 1) / binSize) + 1;
52
- const histogram = new Array(numberOfBins).fill(0);
53
- for (const v of values) {
54
- const bin = v === summarized.min ? 0 : Math.floor((v - summarized.min) / binSize) + 1;
55
- histogram[bin]++;
56
- }
57
- return {
58
- name: name,
59
- bins: histogram,
60
- binSize,
61
- ...summarized
62
- };
63
- }
64
- exports.histogramFromNumbers = histogramFromNumbers;
65
- /**
66
- * Takes an array of histograms created by {@link histogramFromNumbers} and produces a CSV table from it.
67
- * They must have the same bin-size for this function to work.
68
- *
69
- * The table has the following columns:
70
- * - `bin` - The corresponding bin number
71
- * - `from` - The exclusive lower bound of the bin
72
- * - `to` - The inclusive upper bound of the bin
73
- * - a column with the name of each histogram, containing its count of values in the corresponding bin
74
- *
75
- * @param histograms - The histogram to convert (assumed to have the same ranges and bins)
76
- * @param countAsDensity - If true, the count is divided by the total number of values (individually for each histogram, similar to pgfplots `hist/density` option)
77
- */
78
- function histograms2table(histograms, countAsDensity = false) {
79
- (0, assert_1.guard)(histograms.length > 0, 'there must be at least one histogram to convert to a table');
80
- const mostBins = guardForLargestBinSize(histograms);
81
- const header = ['bin', 'from', 'to', ...histograms.map(h => JSON.stringify(h.name))];
82
- const sums = histograms.map(h => h.bins.reduce((a, b) => a + b, 0));
83
- const rows = [];
84
- for (let binIndex = 0; binIndex < mostBins; binIndex++) {
85
- const row = new Array(histograms.length + 3);
86
- row[0] = String(binIndex);
87
- if (binIndex === 0) {
88
- row[1] = histograms[0].min.toFixed(3);
89
- row[2] = histograms[0].min.toFixed(3);
90
- }
91
- else {
92
- row[1] = String((binIndex - 1) * histograms[0].binSize + histograms[0].min);
93
- row[2] = String((binIndex) * histograms[0].binSize + histograms[0].min);
94
- }
95
- // fill remaining columns
96
- writeRoResultsForHistograms(histograms, binIndex, row, countAsDensity, sums);
97
- rows.push(row);
98
- }
99
- return {
100
- header: header,
101
- rows: rows
102
- };
103
- }
104
- exports.histograms2table = histograms2table;
105
- function guardForLargestBinSize(histograms) {
106
- const first = histograms[0];
107
- let mostBins = first.bins.length;
108
- for (let i = 1; i < histograms.length; i++) {
109
- (0, assert_1.guard)(histograms[i].binSize === first.binSize, `histograms must have the same bin-size, but ${histograms[i].name} has ${histograms[i].binSize} instead of ${first.binSize}`);
110
- if (histograms[i].bins.length > mostBins) {
111
- mostBins = histograms[i].bins.length;
112
- }
113
- }
114
- return mostBins;
115
- }
116
- function writeRoResultsForHistograms(histograms, binIndex, row, countAsDensity, sums) {
117
- for (let j = 0; j < histograms.length; j++) {
118
- const bins = histograms[j].bins;
119
- // does not have to be performant...
120
- if (binIndex >= bins.length) {
121
- row[j + 3] = '0'; /* in a histogram, 0 is the best default value for bins that are not present -- no value appeared in the corresponding bin */
122
- }
123
- else {
124
- row[j + 3] = String(countAsDensity ? bins[binIndex] / sums[j] : bins[binIndex]);
125
- }
126
- }
127
- }
128
- //# sourceMappingURL=histogram.js.map
@@ -1,4 +0,0 @@
1
- export * from './clusterer';
2
- export * from './post-process-output';
3
- export * from './histogram';
4
- export * from './file-based-count';
@@ -1,21 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
- for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
- };
16
- Object.defineProperty(exports, "__esModule", { value: true });
17
- __exportStar(require("./clusterer"), exports);
18
- __exportStar(require("./post-process-output"), exports);
19
- __exportStar(require("./histogram"), exports);
20
- __exportStar(require("./file-based-count"), exports);
21
- //# sourceMappingURL=index.js.map
@@ -1,16 +0,0 @@
1
- import type { FeatureSelection } from '../../../../statistics';
2
- import type { ClusterReport } from './clusterer';
3
- /**
4
- * Post process the collections in a given folder, reducing them in a memory preserving way.
5
- *
6
- * @param filepath - Path to the root file of the data collection like `statistics-out/top-2023-01-01-00-00-00/`
7
- * @param features - Collection of features to post process, expects corresponding folders to exist
8
- *
9
- * @returns non-aggregated reports for each sub-key of each feature
10
- */
11
- export declare function postProcessFeatureFolder(filepath: string, features: FeatureSelection): ClusterReport[];
12
- /**
13
- * Prints the report to the console, but limits the output to the `limit` entries with the highest counts.
14
- * The names of these entries (like `->`) are returned, so they can be used to filter the following histograms.
15
- */
16
- export declare function printClusterReport(report: ClusterReport, limit?: number): string[];
@@ -1,104 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.printClusterReport = exports.postProcessFeatureFolder = void 0;
7
- const statistics_1 = require("../../../../statistics");
8
- const path_1 = __importDefault(require("path"));
9
- const log_1 = require("../../../log");
10
- const fs_1 = __importDefault(require("fs"));
11
- const clusterer_1 = require("./clusterer");
12
- const statistics_2 = require("../../../../statistics");
13
- const r_bridge_1 = require("../../../../r-bridge");
14
- const defaultmap_1 = require("../../../defaultmap");
15
- /**
16
- * Post process the collections in a given folder, reducing them in a memory preserving way.
17
- *
18
- * @param filepath - Path to the root file of the data collection like `statistics-out/top-2023-01-01-00-00-00/`
19
- * @param features - Collection of features to post process, expects corresponding folders to exist
20
- *
21
- * @returns non-aggregated reports for each sub-key of each feature
22
- */
23
- function postProcessFeatureFolder(filepath, features) {
24
- if (!fs_1.default.existsSync(filepath)) {
25
- log_1.log.warn(`Folder for ${filepath} does not exist, skipping post processing`);
26
- return [];
27
- }
28
- const results = [];
29
- for (const feature of features) {
30
- const result = processFeatureFolder(filepath, feature);
31
- if (result.length > 0) {
32
- results.push(...result);
33
- }
34
- }
35
- return results;
36
- }
37
- exports.postProcessFeatureFolder = postProcessFeatureFolder;
38
- /**
39
- * Process a single feature folder like `Assignments/`
40
- *
41
- * @param filepath - Same as the input to {@link postProcessFeatureFolder}
42
- * @param feature - The (single) feature to process
43
- */
44
- function processFeatureFolder(filepath, feature) {
45
- const featureInfo = statistics_1.ALL_FEATURES[feature];
46
- const targetPath = path_1.default.join(filepath, featureInfo.name);
47
- if (!fs_1.default.existsSync(targetPath)) {
48
- log_1.log.warn(`Folder for ${feature} does not exist at ${targetPath} skipping post processing of this feature`);
49
- return [];
50
- }
51
- log_1.log.info(`Processing ${feature} at ${targetPath}`);
52
- const contextIdMap = new defaultmap_1.DefaultMap((0, r_bridge_1.deterministicCountingIdGenerator)());
53
- const featureSubKeys = Object.keys(featureInfo.initialValue);
54
- const reports = [];
55
- for (const subKey of featureSubKeys) {
56
- const value = processFeatureSubKey(targetPath, subKey, contextIdMap);
57
- if (value !== undefined) {
58
- reports.push(value);
59
- }
60
- }
61
- return reports;
62
- }
63
- function processFeatureSubKey(featurePath, subKey, contextIdMap) {
64
- const targetPath = path_1.default.join(featurePath, `${subKey}${statistics_2.defaultStatisticsFileSuffix}`);
65
- if (!fs_1.default.existsSync(targetPath)) {
66
- log_1.log.warn(`Folder for ${subKey} does not exist at ${targetPath} skipping post processing of this key`);
67
- return undefined;
68
- }
69
- return (0, clusterer_1.clusterStatisticsOutput)(targetPath, contextIdMap);
70
- }
71
- /**
72
- * Prints the report to the console, but limits the output to the `limit` entries with the highest counts.
73
- * The names of these entries (like `->`) are returned, so they can be used to filter the following histograms.
74
- */
75
- function printClusterReport(report, limit = 1000) {
76
- console.log('\n\n\n');
77
- console.log(report.filepath);
78
- const shortStats = [...report.valueInfoMap.entries()].map(([name, values]) => {
79
- return {
80
- name,
81
- count: [...values.values()].reduce((a, b) => a + b, 0),
82
- unique: values.size()
83
- };
84
- }).sort((a, b) => b.count - a.count).slice(0, limit);
85
- const { longestName, longestCount, longestUnique } = shortStats.reduce((acc, { name, count, unique }) => {
86
- return {
87
- longestName: Math.max(acc.longestName, name.length),
88
- longestCount: Math.max(acc.longestCount, count.toLocaleString().length),
89
- longestUnique: Math.max(acc.longestUnique, unique.toLocaleString().length),
90
- };
91
- }, { longestName: 0, longestCount: 0, longestUnique: 0 });
92
- for (const { name, count, unique } of shortStats) {
93
- const strId = `${name}`.padEnd(longestName, ' ');
94
- const strCount = count.toLocaleString().padStart(longestCount, ' ');
95
- const strUnique = unique.toLocaleString().padStart(longestUnique, ' ');
96
- const uniqueSuffix = `\t (${strUnique} ${statistics_2.formatter.format('unique', { color: 7 /* Colors.White */, effect: statistics_2.ColorEffect.Foreground })})`;
97
- console.log(`\t${statistics_2.formatter.format(strId, { style: 1 /* FontStyles.Bold */ })}\t ${strCount} ` +
98
- `${statistics_2.formatter.format('total', { color: 7 /* Colors.White */, effect: statistics_2.ColorEffect.Foreground })}`
99
- + (count !== unique ? uniqueSuffix : ''));
100
- }
101
- return shortStats.map(({ name }) => name);
102
- }
103
- exports.printClusterReport = printClusterReport;
104
- //# sourceMappingURL=post-process-output.js.map
@@ -1,11 +0,0 @@
1
- import type { CommonSummarizerConfiguration } from '../../summarizer';
2
- import type { StatisticsSummarizerConfiguration } from '../summarizer';
3
- /**
4
- * Post process the collections in a given folder, retrieving the final summaries.
5
- *
6
- * @param logger - The logger to use for outputs
7
- * @param filepath - Path to the root file of the data collection (contains all the archives)
8
- * @param config - Configuration of the summarizer
9
- * @param outputPath - The final outputPath to write the result to (may differ from the configured root folder)
10
- */
11
- export declare function postProcessFeatureFolder(logger: CommonSummarizerConfiguration['logger'], filepath: string, config: StatisticsSummarizerConfiguration, outputPath: string): void;
@@ -1,117 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.postProcessFeatureFolder = void 0;
7
- const fs_1 = __importDefault(require("fs"));
8
- const path_1 = __importDefault(require("path"));
9
- const statistics_1 = require("../../../../statistics");
10
- const files_1 = require("../../../files");
11
- const assert_1 = require("../../../assert");
12
- const time_1 = require("../../../time");
13
- const data_1 = require("../../benchmark/data");
14
- const process_1 = require("../../benchmark/first-phase/process");
15
- const arrays_1 = require("../../../arrays");
16
- function postProcessFeatures(config, filepath, outputPath, logger, metaFeatureInformation) {
17
- for (const featureName of config.featuresToUse) {
18
- const featureInfo = statistics_1.ALL_FEATURES[featureName];
19
- const targetPath = path_1.default.join(filepath, featureInfo.name);
20
- const targetFeature = path_1.default.join(outputPath, featureInfo.name);
21
- if (!featureInfo.postProcess) {
22
- logger(` Skipping post processing of ${featureName} as no post processing behavior is defined`);
23
- continue;
24
- }
25
- logger(` Post processing of ${featureName}...`);
26
- if (!fs_1.default.existsSync(targetFeature)) {
27
- fs_1.default.mkdirSync(targetFeature, { recursive: true });
28
- }
29
- if (global.gc) {
30
- logger(` [${(0, time_1.date2string)(new Date())}] Running garbage collection (--expose-gc)`);
31
- global.gc();
32
- }
33
- featureInfo.postProcess(targetPath, metaFeatureInformation, targetFeature, config);
34
- }
35
- }
36
- function postProcessMeta(config, filepath, outputPath, logger, metaFeatureInformation) {
37
- const fileStatisticsSummary = {
38
- successfulParsed: [],
39
- processingTimeMs: [],
40
- failedRequests: [],
41
- // min lengths of 1 etc. could come from different line endings
42
- lines: [],
43
- characters: [],
44
- numberOfNormalizedNodes: []
45
- };
46
- if (!fs_1.default.existsSync(path_1.default.join(outputPath, 'meta'))) {
47
- fs_1.default.mkdirSync(path_1.default.join(outputPath, 'meta'), { recursive: true });
48
- }
49
- const out = fs_1.default.createWriteStream(path_1.default.join(outputPath, 'meta', 'stats.csv'));
50
- out.write(`file,successfulParsed,${(0, data_1.summarizedMeasurement2CsvHeader)('processing')},failedRequests,${(0, data_1.summarizedMeasurement2CsvHeader)('line-length')},${(0, data_1.summarizedMeasurement2CsvHeader)('lines')},${(0, data_1.summarizedMeasurement2CsvHeader)('characters')},numberOfNormalizedNodes\n`);
51
- for (const [file, info] of metaFeatureInformation) {
52
- // we could retrieve these by summing later as well :thinking: however, this makes it more explicit
53
- const characters = (0, arrays_1.sum)(info.stats.lines[0]);
54
- out.write(`${JSON.stringify(file)},${info.stats.successfulParsed},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(info.stats.processingTimeMs))},`
55
- + `${info.stats.failedRequests.length},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(info.stats.lines[0]))},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)([info.stats.lines[0].length]))},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)([characters]))},${info.stats.numberOfNormalizedNodes[0]}\n`);
56
- fileStatisticsSummary.successfulParsed.push(info.stats.successfulParsed);
57
- fileStatisticsSummary.processingTimeMs.push(...info.stats.processingTimeMs);
58
- fileStatisticsSummary.failedRequests.push(info.stats.failedRequests.length);
59
- fileStatisticsSummary.lines.push(info.stats.lines[0]);
60
- fileStatisticsSummary.characters.push(characters);
61
- fileStatisticsSummary.numberOfNormalizedNodes.push(info.stats.numberOfNormalizedNodes[0]);
62
- }
63
- out.write(`all,${(0, arrays_1.sum)(fileStatisticsSummary.successfulParsed)},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(fileStatisticsSummary.processingTimeMs))},`
64
- + `${(0, arrays_1.sum)(fileStatisticsSummary.failedRequests)},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(fileStatisticsSummary.lines.flat()))},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(fileStatisticsSummary.lines.map(l => l.length)))},${(0, data_1.summarizedMeasurement2Csv)((0, process_1.summarizeMeasurement)(fileStatisticsSummary.characters))},${(0, arrays_1.sum)(fileStatisticsSummary.numberOfNormalizedNodes)}\n`);
65
- out.close();
66
- }
67
- /**
68
- * Post process the collections in a given folder, retrieving the final summaries.
69
- *
70
- * @param logger - The logger to use for outputs
71
- * @param filepath - Path to the root file of the data collection (contains all the archives)
72
- * @param config - Configuration of the summarizer
73
- * @param outputPath - The final outputPath to write the result to (may differ from the configured root folder)
74
- */
75
- function postProcessFeatureFolder(logger, filepath, config, outputPath) {
76
- if (!fs_1.default.existsSync(filepath)) {
77
- logger(` Folder for ${filepath} does not exist, skipping post processing`);
78
- return;
79
- }
80
- if (!fs_1.default.existsSync(outputPath)) {
81
- fs_1.default.mkdirSync(outputPath, { recursive: true });
82
- }
83
- const metaFeatureInformation = extractMetaInformationFrom(logger, path_1.default.join(filepath, 'meta', 'features.txt'), path_1.default.join(filepath, 'meta', 'stats.txt'));
84
- postProcessFeatures(config, filepath, outputPath, logger, metaFeatureInformation);
85
- postProcessMeta(config, filepath, outputPath, logger, metaFeatureInformation);
86
- }
87
- exports.postProcessFeatureFolder = postProcessFeatureFolder;
88
- function extractMetaInformationFrom(logger, metaFeaturesPath, metaStatsPath) {
89
- const storage = new Map();
90
- logger(` [${(0, time_1.date2string)(new Date())}] Collect feature statistics`);
91
- (0, files_1.readLineByLineSync)(metaFeaturesPath, (line, lineNumber) => {
92
- if (line.length === 0) {
93
- return;
94
- }
95
- if (lineNumber % 2_500 === 0) {
96
- logger(` [${(0, time_1.date2string)(new Date())}] ${lineNumber} meta feature lines processed`);
97
- }
98
- const meta = JSON.parse(line.toString());
99
- storage.set(meta.file, meta.content);
100
- });
101
- logger(` [${(0, time_1.date2string)(new Date())}] Collect meta statistics`);
102
- (0, files_1.readLineByLineSync)(metaStatsPath, (line, lineNumber) => {
103
- if (line.length === 0) {
104
- return;
105
- }
106
- if (lineNumber % 2_500 === 0) {
107
- logger(` [${(0, time_1.date2string)(new Date())}] ${lineNumber} meta statistics lines processed`);
108
- }
109
- const meta = JSON.parse(line.toString());
110
- const existing = storage.get(meta.file);
111
- (0, assert_1.guard)(existing !== undefined, () => `Expected to find meta information for ${meta.file} in line ${lineNumber + 1} of ${metaFeaturesPath}`);
112
- existing.stats = meta.content;
113
- });
114
- logger(` [${(0, time_1.date2string)(new Date())}] Done collecting meta information`);
115
- return storage;
116
- }
117
- //# sourceMappingURL=process.js.map
@@ -1,35 +0,0 @@
1
- import type { CommonSummarizerConfiguration } from '../summarizer';
2
- import { Summarizer } from '../summarizer';
3
- import type { FeatureSelection } from '../../../statistics';
4
- export interface StatisticsSummarizerConfiguration extends CommonSummarizerConfiguration {
5
- /**
6
- * The input path to read all zips from
7
- */
8
- inputPath: string;
9
- /**
10
- * Features to extract the summaries for
11
- */
12
- featuresToUse: FeatureSelection;
13
- /**
14
- * Path for the intermediate results of the preparation phase
15
- */
16
- intermediateOutputPath: string;
17
- /**
18
- * Path for the final results of the summarization phase
19
- */
20
- outputPath: string;
21
- /**
22
- * How many folders to skip to find the project root
23
- */
24
- projectSkip: number;
25
- }
26
- export declare const statisticsFileNameRegex: RegExp;
27
- export declare class StatisticsSummarizer extends Summarizer<unknown, StatisticsSummarizerConfiguration> {
28
- constructor(config: StatisticsSummarizerConfiguration);
29
- private removeIfExists;
30
- /**
31
- * The preparation phase essentially merges all files into one by just attaching lines together!
32
- */
33
- preparationPhase(useTypeClassification: boolean): Promise<void>;
34
- summarizePhase(): Promise<unknown>;
35
- }