@eagleoutice/flowr 2.2.15 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. package/README.md +226 -6
  2. package/abstract-interpretation/data-frame/absint-info.d.ts +109 -0
  3. package/abstract-interpretation/data-frame/absint-info.js +31 -0
  4. package/abstract-interpretation/data-frame/absint-visitor.d.ts +59 -0
  5. package/abstract-interpretation/data-frame/absint-visitor.js +173 -0
  6. package/abstract-interpretation/data-frame/domain.d.ts +107 -0
  7. package/abstract-interpretation/data-frame/domain.js +315 -0
  8. package/abstract-interpretation/data-frame/mappers/access-mapper.d.ts +17 -0
  9. package/abstract-interpretation/data-frame/mappers/access-mapper.js +166 -0
  10. package/abstract-interpretation/data-frame/mappers/arguments.d.ts +117 -0
  11. package/abstract-interpretation/data-frame/mappers/arguments.js +188 -0
  12. package/abstract-interpretation/data-frame/mappers/assignment-mapper.d.ts +20 -0
  13. package/abstract-interpretation/data-frame/mappers/assignment-mapper.js +34 -0
  14. package/abstract-interpretation/data-frame/mappers/function-mapper.d.ts +261 -0
  15. package/abstract-interpretation/data-frame/mappers/function-mapper.js +1219 -0
  16. package/abstract-interpretation/data-frame/mappers/replacement-mapper.d.ts +12 -0
  17. package/abstract-interpretation/data-frame/mappers/replacement-mapper.js +206 -0
  18. package/abstract-interpretation/data-frame/resolve-args.d.ts +42 -0
  19. package/abstract-interpretation/data-frame/resolve-args.js +118 -0
  20. package/abstract-interpretation/data-frame/semantics.d.ts +213 -0
  21. package/abstract-interpretation/data-frame/semantics.js +366 -0
  22. package/abstract-interpretation/data-frame/shape-inference.d.ts +38 -0
  23. package/abstract-interpretation/data-frame/shape-inference.js +117 -0
  24. package/benchmark/slicer.d.ts +18 -2
  25. package/benchmark/slicer.js +143 -5
  26. package/benchmark/stats/print.js +123 -45
  27. package/benchmark/stats/size-of.d.ts +7 -0
  28. package/benchmark/stats/size-of.js +1 -0
  29. package/benchmark/stats/stats.d.ts +30 -1
  30. package/benchmark/stats/stats.js +4 -2
  31. package/benchmark/summarizer/data.d.ts +33 -2
  32. package/benchmark/summarizer/first-phase/input.js +5 -1
  33. package/benchmark/summarizer/first-phase/process.d.ts +2 -1
  34. package/benchmark/summarizer/first-phase/process.js +49 -3
  35. package/benchmark/summarizer/second-phase/process.js +101 -3
  36. package/cli/benchmark-app.d.ts +2 -0
  37. package/cli/benchmark-app.js +5 -1
  38. package/cli/benchmark-helper-app.d.ts +2 -0
  39. package/cli/benchmark-helper-app.js +13 -8
  40. package/cli/common/options.js +4 -0
  41. package/cli/export-quads-app.js +2 -1
  42. package/cli/flowr.js +58 -57
  43. package/cli/repl/commands/repl-cfg.js +13 -13
  44. package/cli/repl/commands/repl-commands.js +2 -2
  45. package/cli/repl/commands/repl-dataflow.js +10 -10
  46. package/cli/repl/commands/repl-execute.d.ts +2 -3
  47. package/cli/repl/commands/repl-execute.js +4 -4
  48. package/cli/repl/commands/repl-lineage.js +4 -4
  49. package/cli/repl/commands/repl-main.d.ts +12 -1
  50. package/cli/repl/commands/repl-normalize.js +6 -6
  51. package/cli/repl/commands/repl-parse.js +2 -2
  52. package/cli/repl/commands/repl-query.js +9 -9
  53. package/cli/repl/commands/repl-version.js +1 -1
  54. package/cli/repl/core.d.ts +5 -2
  55. package/cli/repl/core.js +10 -8
  56. package/cli/repl/server/connection.d.ts +3 -1
  57. package/cli/repl/server/connection.js +7 -5
  58. package/cli/repl/server/server.d.ts +3 -2
  59. package/cli/repl/server/server.js +4 -2
  60. package/cli/script-core/statistics-core.d.ts +2 -1
  61. package/cli/script-core/statistics-core.js +2 -2
  62. package/cli/script-core/statistics-helper-core.d.ts +2 -1
  63. package/cli/script-core/statistics-helper-core.js +5 -4
  64. package/cli/slicer-app.js +4 -2
  65. package/cli/statistics-app.js +2 -1
  66. package/cli/statistics-helper-app.js +2 -1
  67. package/config.d.ts +43 -10
  68. package/config.js +47 -43
  69. package/control-flow/cfg-dead-code.js +45 -2
  70. package/control-flow/cfg-simplification.d.ts +2 -0
  71. package/control-flow/control-flow-graph.d.ts +2 -0
  72. package/control-flow/control-flow-graph.js +8 -0
  73. package/control-flow/dfg-cfg-guided-visitor.d.ts +5 -3
  74. package/control-flow/dfg-cfg-guided-visitor.js +15 -4
  75. package/control-flow/extract-cfg.d.ts +4 -2
  76. package/control-flow/extract-cfg.js +4 -3
  77. package/control-flow/semantic-cfg-guided-visitor.d.ts +20 -2
  78. package/control-flow/semantic-cfg-guided-visitor.js +24 -4
  79. package/core/pipeline-executor.d.ts +4 -1
  80. package/core/pipeline-executor.js +6 -5
  81. package/core/steps/all/core/10-normalize.d.ts +2 -0
  82. package/core/steps/all/core/10-normalize.js +1 -1
  83. package/core/steps/all/core/11-normalize-tree-sitter.d.ts +2 -1
  84. package/core/steps/all/core/11-normalize-tree-sitter.js +2 -2
  85. package/core/steps/all/core/20-dataflow.d.ts +2 -1
  86. package/core/steps/all/core/20-dataflow.js +2 -2
  87. package/core/steps/all/static-slicing/00-slice.d.ts +2 -1
  88. package/core/steps/all/static-slicing/00-slice.js +2 -2
  89. package/core/steps/pipeline/default-pipelines.d.ts +32 -31
  90. package/core/steps/pipeline/default-pipelines.js +8 -8
  91. package/core/steps/pipeline-step.d.ts +2 -1
  92. package/dataflow/environments/built-in-config.d.ts +3 -3
  93. package/dataflow/environments/built-in.d.ts +11 -3
  94. package/dataflow/environments/built-in.js +5 -3
  95. package/dataflow/environments/default-builtin-config.js +4 -2
  96. package/dataflow/environments/define.d.ts +2 -1
  97. package/dataflow/environments/define.js +4 -5
  98. package/dataflow/environments/remove.d.ts +6 -0
  99. package/dataflow/environments/remove.js +29 -0
  100. package/dataflow/eval/resolve/alias-tracking.d.ts +7 -2
  101. package/dataflow/eval/resolve/alias-tracking.js +11 -8
  102. package/dataflow/eval/resolve/resolve-argument.d.ts +8 -0
  103. package/dataflow/eval/resolve/resolve-argument.js +118 -0
  104. package/dataflow/eval/resolve/resolve.d.ts +65 -18
  105. package/dataflow/eval/resolve/resolve.js +144 -48
  106. package/dataflow/eval/values/string/string-constants.d.ts +1 -1
  107. package/dataflow/eval/values/string/string-constants.js +7 -2
  108. package/dataflow/extractor.d.ts +2 -1
  109. package/dataflow/extractor.js +2 -1
  110. package/dataflow/internal/process/functions/call/built-in/built-in-access.js +5 -6
  111. package/dataflow/internal/process/functions/call/built-in/built-in-apply.js +1 -1
  112. package/dataflow/internal/process/functions/call/built-in/built-in-assignment.d.ts +4 -2
  113. package/dataflow/internal/process/functions/call/built-in/built-in-assignment.js +11 -11
  114. package/dataflow/internal/process/functions/call/built-in/built-in-eval.js +10 -11
  115. package/dataflow/internal/process/functions/call/built-in/built-in-expression-list.js +7 -2
  116. package/dataflow/internal/process/functions/call/built-in/built-in-for-loop.js +2 -3
  117. package/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.js +1 -1
  118. package/dataflow/internal/process/functions/call/built-in/built-in-list.js +2 -2
  119. package/dataflow/internal/process/functions/call/built-in/built-in-replacement.js +2 -3
  120. package/dataflow/internal/process/functions/call/built-in/built-in-source.d.ts +6 -3
  121. package/dataflow/internal/process/functions/call/built-in/built-in-source.js +19 -15
  122. package/dataflow/internal/process/functions/call/built-in/built-in-vector.js +2 -2
  123. package/dataflow/internal/process/functions/call/built-in/built-in-while-loop.js +1 -1
  124. package/dataflow/internal/process/functions/call/common.js +1 -1
  125. package/dataflow/internal/process/functions/process-parameter.js +1 -1
  126. package/dataflow/origin/dfg-get-symbol-refs.d.ts +21 -0
  127. package/dataflow/origin/dfg-get-symbol-refs.js +50 -0
  128. package/dataflow/processor.d.ts +5 -0
  129. package/documentation/doc-util/doc-cfg.js +4 -3
  130. package/documentation/doc-util/doc-code.d.ts +1 -1
  131. package/documentation/doc-util/doc-dfg.js +3 -2
  132. package/documentation/doc-util/doc-functions.d.ts +24 -0
  133. package/documentation/doc-util/doc-functions.js +65 -0
  134. package/documentation/doc-util/doc-normalized-ast.js +3 -2
  135. package/documentation/doc-util/doc-print.d.ts +5 -0
  136. package/documentation/doc-util/doc-print.js +36 -0
  137. package/documentation/doc-util/doc-query.js +13 -2
  138. package/documentation/doc-util/doc-repl.js +2 -1
  139. package/documentation/doc-util/doc-search.js +3 -2
  140. package/documentation/doc-util/doc-types.d.ts +28 -6
  141. package/documentation/doc-util/doc-types.js +89 -45
  142. package/documentation/print-cfg-wiki.js +6 -7
  143. package/documentation/print-core-wiki.js +5 -5
  144. package/documentation/print-dataflow-graph-wiki.js +10 -10
  145. package/documentation/print-engines-wiki.js +1 -2
  146. package/documentation/print-faq-wiki.js +8 -2
  147. package/documentation/print-interface-wiki.js +12 -2
  148. package/documentation/print-linter-issue.d.ts +1 -0
  149. package/documentation/print-linter-issue.js +71 -0
  150. package/documentation/print-linter-wiki.js +223 -34
  151. package/documentation/print-linting-and-testing-wiki.js +2 -4
  152. package/documentation/print-normalized-ast-wiki.js +3 -3
  153. package/documentation/print-query-wiki.js +18 -2
  154. package/documentation/print-readme.js +24 -1
  155. package/documentation/print-search-wiki.js +1 -2
  156. package/linter/linter-executor.d.ts +3 -1
  157. package/linter/linter-executor.js +3 -2
  158. package/linter/linter-format.d.ts +67 -7
  159. package/linter/linter-format.js +12 -1
  160. package/linter/linter-rules.d.ts +178 -16
  161. package/linter/linter-rules.js +14 -4
  162. package/linter/linter-tags.d.ts +80 -0
  163. package/linter/linter-tags.js +85 -0
  164. package/linter/rules/absolute-path.d.ts +71 -0
  165. package/linter/rules/absolute-path.js +177 -0
  166. package/linter/rules/dataframe-access-validation.d.ts +53 -0
  167. package/linter/rules/dataframe-access-validation.js +116 -0
  168. package/linter/rules/deprecated-functions.d.ts +43 -0
  169. package/linter/rules/deprecated-functions.js +58 -0
  170. package/linter/rules/{2-file-path-validity.d.ts → file-path-validity.d.ts} +16 -6
  171. package/linter/rules/{2-file-path-validity.js → file-path-validity.js} +21 -13
  172. package/linter/rules/naming-convention.d.ts +71 -0
  173. package/linter/rules/naming-convention.js +168 -0
  174. package/linter/rules/seeded-randomness.d.ts +65 -0
  175. package/linter/rules/seeded-randomness.js +122 -0
  176. package/linter/rules/unused-definition.d.ts +41 -0
  177. package/linter/rules/unused-definition.js +105 -0
  178. package/package.json +5 -2
  179. package/queries/base-query-format.d.ts +2 -0
  180. package/queries/catalog/call-context-query/call-context-query-executor.d.ts +1 -1
  181. package/queries/catalog/call-context-query/call-context-query-executor.js +2 -2
  182. package/queries/catalog/cluster-query/cluster-query-format.d.ts +1 -1
  183. package/queries/catalog/config-query/config-query-executor.d.ts +1 -1
  184. package/queries/catalog/config-query/config-query-executor.js +2 -3
  185. package/queries/catalog/control-flow-query/control-flow-query-executor.d.ts +1 -1
  186. package/queries/catalog/control-flow-query/control-flow-query-executor.js +2 -2
  187. package/queries/catalog/control-flow-query/control-flow-query-format.d.ts +1 -1
  188. package/queries/catalog/dataflow-lens-query/dataflow-lens-query-format.d.ts +1 -1
  189. package/queries/catalog/dataflow-query/dataflow-query-format.d.ts +1 -1
  190. package/queries/catalog/dependencies-query/dependencies-query-executor.js +4 -116
  191. package/queries/catalog/dependencies-query/dependencies-query-format.d.ts +1 -1
  192. package/queries/catalog/df-shape-query/df-shape-query-executor.d.ts +3 -0
  193. package/queries/catalog/df-shape-query/df-shape-query-executor.js +46 -0
  194. package/queries/catalog/df-shape-query/df-shape-query-format.d.ts +72 -0
  195. package/queries/catalog/df-shape-query/df-shape-query-format.js +31 -0
  196. package/queries/catalog/happens-before-query/happens-before-query-format.d.ts +1 -1
  197. package/queries/catalog/id-map-query/id-map-query-format.d.ts +1 -1
  198. package/queries/catalog/lineage-query/lineage-query-format.d.ts +1 -1
  199. package/queries/catalog/linter-query/linter-query-executor.d.ts +1 -1
  200. package/queries/catalog/linter-query/linter-query-executor.js +2 -2
  201. package/queries/catalog/linter-query/linter-query-format.d.ts +1 -1
  202. package/queries/catalog/linter-query/linter-query-format.js +16 -12
  203. package/queries/catalog/normalized-ast-query/normalized-ast-query-format.d.ts +1 -1
  204. package/queries/catalog/origin-query/origin-query-format.d.ts +1 -1
  205. package/queries/catalog/project-query/project-query-format.d.ts +1 -1
  206. package/queries/catalog/resolve-value-query/resolve-value-query-executor.d.ts +1 -1
  207. package/queries/catalog/resolve-value-query/resolve-value-query-executor.js +2 -2
  208. package/queries/catalog/resolve-value-query/resolve-value-query-format.d.ts +1 -1
  209. package/queries/catalog/search-query/search-query-executor.d.ts +1 -1
  210. package/queries/catalog/search-query/search-query-executor.js +2 -2
  211. package/queries/catalog/search-query/search-query-format.d.ts +1 -1
  212. package/queries/catalog/static-slice-query/static-slice-query-executor.d.ts +1 -1
  213. package/queries/catalog/static-slice-query/static-slice-query-executor.js +2 -2
  214. package/queries/catalog/static-slice-query/static-slice-query-format.d.ts +1 -1
  215. package/queries/query.d.ts +76 -16
  216. package/queries/query.js +2 -0
  217. package/r-bridge/lang-4.x/ast/parser/json/parser.d.ts +2 -1
  218. package/r-bridge/lang-4.x/ast/parser/json/parser.js +4 -2
  219. package/r-bridge/lang-4.x/convert-values.js +2 -1
  220. package/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.d.ts +3 -1
  221. package/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.js +4 -4
  222. package/r-bridge/lang-4.x/tree-sitter/tree-sitter-normalize.d.ts +1 -1
  223. package/r-bridge/lang-4.x/tree-sitter/tree-sitter-normalize.js +7 -5
  224. package/r-bridge/shell.d.ts +3 -2
  225. package/r-bridge/shell.js +4 -5
  226. package/search/flowr-search-builder.d.ts +6 -2
  227. package/search/flowr-search-builder.js +7 -0
  228. package/search/flowr-search-filters.d.ts +32 -8
  229. package/search/flowr-search-filters.js +42 -15
  230. package/search/flowr-search.d.ts +4 -0
  231. package/search/search-executor/search-enrichers.d.ts +7 -3
  232. package/search/search-executor/search-enrichers.js +29 -20
  233. package/search/search-executor/search-generators.js +1 -1
  234. package/search/search-executor/search-transformer.d.ts +2 -0
  235. package/search/search-executor/search-transformer.js +10 -1
  236. package/slicing/static/static-slicer.d.ts +1 -1
  237. package/slicing/static/static-slicer.js +2 -3
  238. package/statistics/statistics.d.ts +3 -1
  239. package/statistics/statistics.js +5 -4
  240. package/util/containers.d.ts +12 -9
  241. package/util/containers.js +12 -9
  242. package/util/files.d.ts +8 -2
  243. package/util/files.js +22 -4
  244. package/util/objects.d.ts +5 -4
  245. package/util/r-value.d.ts +23 -0
  246. package/util/r-value.js +113 -0
  247. package/util/range.d.ts +5 -1
  248. package/util/range.js +11 -3
  249. package/util/text/strings.d.ts +6 -0
  250. package/util/text/strings.js +35 -0
  251. package/util/version.js +1 -1
  252. package/linter/rules/1-deprecated-functions.d.ts +0 -34
  253. package/linter/rules/1-deprecated-functions.js +0 -54
  254. package/util/cfg/cfg.d.ts +0 -0
  255. package/util/cfg/cfg.js +0 -2
@@ -0,0 +1,1219 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.mapDataFrameFunctionCall = mapDataFrameFunctionCall;
4
+ const config_1 = require("../../../config");
5
+ const make_argument_1 = require("../../../dataflow/internal/process/functions/call/argument/make-argument");
6
+ const built_in_source_1 = require("../../../dataflow/internal/process/functions/call/built-in/built-in-source");
7
+ const r_function_call_1 = require("../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
8
+ const type_1 = require("../../../r-bridge/lang-4.x/ast/model/type");
9
+ const retriever_1 = require("../../../r-bridge/retriever");
10
+ const assert_1 = require("../../../util/assert");
11
+ const files_1 = require("../../../util/files");
12
+ const domain_1 = require("../domain");
13
+ const resolve_args_1 = require("../resolve-args");
14
+ const shape_inference_1 = require("../shape-inference");
15
+ const arguments_1 = require("./arguments");
16
+ /**
17
+ * Represents the different types of data frames in R
18
+ */
19
+ var DataFrameType;
20
+ (function (DataFrameType) {
21
+ DataFrameType["DataFrame"] = "data.frame";
22
+ DataFrameType["Tibble"] = "tibble";
23
+ DataFrameType["DataTable"] = "data.table";
24
+ })(DataFrameType || (DataFrameType = {}));
25
+ /**
26
+ * Mapper for mapping the supported concrete data frame functions to mapper functions,
27
+ * including information about the origin library of the functions and the type of the returned data frame.
28
+ */
29
+ const DataFrameFunctionMapper = {
30
+ 'data.frame': { mapper: mapDataFrameCreate, library: 'base', returnType: DataFrameType.DataFrame },
31
+ 'as.data.frame': { mapper: mapDataFrameConvert, library: 'base', returnType: DataFrameType.DataFrame },
32
+ 'read.table': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
33
+ 'read.csv': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
34
+ 'read.csv2': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
35
+ 'read.delim': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
36
+ 'read.delim2': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
37
+ 'read_table': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
38
+ 'read_csv': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
39
+ 'read_csv2': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
40
+ 'read_tsv': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
41
+ 'read_delim': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
42
+ 'cbind': { mapper: mapDataFrameColBind, library: 'base', returnType: DataFrameType.DataFrame },
43
+ 'rbind': { mapper: mapDataFrameRowBind, library: 'base', returnType: DataFrameType.DataFrame },
44
+ 'head': { mapper: mapDataFrameHeadTail, library: 'utils', returnType: DataFrameType.DataFrame },
45
+ 'tail': { mapper: mapDataFrameHeadTail, library: 'utils', returnType: DataFrameType.DataFrame },
46
+ 'subset': { mapper: mapDataFrameSubset, library: 'base', returnType: DataFrameType.DataFrame },
47
+ 'filter': { mapper: mapDataFrameFilter, library: 'dplyr', returnType: DataFrameType.DataFrame },
48
+ 'select': { mapper: mapDataFrameSelect, library: 'dplyr', returnType: DataFrameType.DataFrame },
49
+ 'mutate': { mapper: mapDataFrameMutate, library: 'dplyr', returnType: DataFrameType.DataFrame },
50
+ 'transform': { mapper: mapDataFrameMutate, library: 'base', returnType: DataFrameType.DataFrame },
51
+ 'group_by': { mapper: mapDataFrameGroupBy, library: 'dplyr', returnType: DataFrameType.Tibble },
52
+ 'summarise': { mapper: mapDataFrameSummarize, library: 'dplyr', returnType: DataFrameType.DataFrame },
53
+ 'summarize': { mapper: mapDataFrameSummarize, library: 'dplyr', returnType: DataFrameType.DataFrame },
54
+ 'inner_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
55
+ 'left_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
56
+ 'right_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
57
+ 'full_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
58
+ 'merge': { mapper: mapDataFrameJoin, library: 'base', returnType: DataFrameType.DataFrame },
59
+ 'relocate': { mapper: mapDataFrameIdentity, library: 'dplyr', returnType: DataFrameType.DataFrame },
60
+ 'arrange': { mapper: mapDataFrameIdentity, library: 'dplyr', returnType: DataFrameType.DataFrame }
61
+ };
62
+ /**
63
+ * List of other data frame functions that are not explicitly supported but may return data frames.
64
+ */
65
+ const OtherDataFrameFunctions = [
66
+ {
67
+ type: 'entry_point',
68
+ names: ['anova', 'AIC', 'BIC'],
69
+ library: 'anova',
70
+ returnType: DataFrameType.DataFrame
71
+ }, {
72
+ type: 'entry_point',
73
+ names: ['Anova', 'Manova'],
74
+ library: 'car',
75
+ returnType: DataFrameType.DataFrame
76
+ }, {
77
+ type: 'entry_point',
78
+ names: ['lmer'],
79
+ library: 'lme4',
80
+ returnType: DataFrameType.DataFrame
81
+ }, {
82
+ type: 'entry_point',
83
+ names: ['data_frame', 'as_data_frame'],
84
+ library: 'dplyr',
85
+ returnType: DataFrameType.DataFrame
86
+ }, {
87
+ type: 'entry_point',
88
+ names: ['tbl', 'as.tbl'],
89
+ library: 'dplyr',
90
+ returnType: DataFrameType.Tibble
91
+ }, {
92
+ type: 'entry_point',
93
+ names: ['read_fwf', 'read_log'],
94
+ library: 'readr',
95
+ returnType: DataFrameType.Tibble
96
+ }, {
97
+ type: 'entry_point',
98
+ names: ['read_excel', 'read_xls', 'read_xlsx'],
99
+ library: 'readxl',
100
+ returnType: DataFrameType.Tibble
101
+ }, {
102
+ type: 'entry_point',
103
+ names: ['tibble', 'tibble_row', 'as_tibble', 'tribble'],
104
+ library: 'tibble',
105
+ returnType: DataFrameType.Tibble
106
+ }, {
107
+ type: 'entry_point',
108
+ names: ['data.table', 'as.data.table', 'fread'],
109
+ library: 'data.table',
110
+ returnType: DataFrameType.DataTable
111
+ }, {
112
+ type: 'transformation',
113
+ names: ['na.omit'],
114
+ library: 'stats',
115
+ returnType: DataFrameType.DataFrame,
116
+ dataFrame: { pos: 0, name: 'object' }
117
+ }, {
118
+ type: 'transformation',
119
+ names: ['unique', 't'],
120
+ library: 'base',
121
+ returnType: DataFrameType.DataFrame,
122
+ dataFrame: { pos: 0, name: 'x' }
123
+ }, {
124
+ type: 'transformation',
125
+ names: ['aggregate'],
126
+ library: 'stats',
127
+ returnType: DataFrameType.DataFrame,
128
+ dataFrame: { pos: 0, name: 'x' }
129
+ }, {
130
+ type: 'transformation',
131
+ names: ['with', 'within'],
132
+ library: 'base',
133
+ returnType: DataFrameType.DataFrame,
134
+ dataFrame: { pos: 0, name: 'data' }
135
+ }, {
136
+ type: 'transformation',
137
+ names: ['reshape'],
138
+ library: 'stats',
139
+ returnType: DataFrameType.DataFrame,
140
+ dataFrame: { pos: 0, name: 'data' }
141
+ }, {
142
+ type: 'transformation',
143
+ names: ['melt'],
144
+ library: 'reshape2',
145
+ returnType: DataFrameType.DataFrame,
146
+ dataFrame: { pos: 0, name: 'data' }
147
+ }, {
148
+ type: 'transformation',
149
+ names: [
150
+ 'transmute', 'distinct', 'distinct_prepare', 'group_by_prepare', 'rename', 'rename_with', 'reframe',
151
+ 'slice', 'slice_head', 'slice_tail', 'slice_min', 'slice_max', 'slice_sample'
152
+ ],
153
+ library: 'dplyr',
154
+ returnType: DataFrameType.DataFrame,
155
+ dataFrame: { pos: 0, name: '.data' }
156
+ }, {
157
+ type: 'transformation',
158
+ names: [
159
+ 'filter_if', 'filter_at', 'filter_all', 'select_if', 'select_at', 'select_all',
160
+ 'mutate_if', 'mutate_at', 'mutate_all', 'transmute_if', 'transmute_at', 'transmute_all',
161
+ 'distinct_if', 'distinct_at', 'distinct_all', 'group_by_if', 'group_by_at', 'group_by_all',
162
+ 'summarize_if', 'summarise_if', 'summarize_at', 'summarise_at', 'summarize_all', 'summarise_all',
163
+ 'arrange_if', 'arrange_at', 'arrange_all', 'rename_if', 'rename_at', 'rename_all'
164
+ ],
165
+ library: 'dplyr',
166
+ returnType: DataFrameType.Tibble,
167
+ dataFrame: { pos: 0, name: '.tbl' }
168
+ }, {
169
+ type: 'transformation',
170
+ names: [
171
+ 'semi_join', 'anti_join', 'nest_join', 'cross_join',
172
+ 'ungroup', 'count', 'tally', 'add_count', 'add_tally',
173
+ 'rows_insert', 'rows_append', 'rows_update', 'rows_patch', 'rows_upsert', 'rows_delete'
174
+ ],
175
+ library: 'dplyr',
176
+ returnType: DataFrameType.DataFrame,
177
+ dataFrame: { pos: 0, name: 'x' }
178
+ }, {
179
+ type: 'transformation',
180
+ names: ['bind_cols', 'bind_rows'],
181
+ library: 'dplyr',
182
+ returnType: DataFrameType.DataFrame
183
+ }, {
184
+ type: 'transformation',
185
+ names: [
186
+ 'drop_na', 'replace_na', 'pivot_longer', 'pivot_wider',
187
+ 'separate', 'separate_wider_position', 'separate_wider_delim', 'unite'
188
+ ],
189
+ library: 'tidyr',
190
+ returnType: DataFrameType.DataFrame,
191
+ dataFrame: { pos: 0, name: 'data' }
192
+ }, {
193
+ type: 'transformation',
194
+ names: ['add_column', 'add_row', 'add_case'],
195
+ library: 'tibble',
196
+ returnType: DataFrameType.Tibble,
197
+ dataFrame: { pos: 0, name: '.data' }
198
+ }, {
199
+ type: 'transformation',
200
+ names: ['melt', 'dcast'],
201
+ library: 'data.table',
202
+ returnType: DataFrameType.DataTable,
203
+ dataFrame: { pos: 0, name: 'data' }
204
+ }
205
+ ];
206
+ /**
207
+ * Mapper for defining the location of all relevant function parameters for each supported data frame function of {@link DataFrameFunctionMapper}.
208
+ */
209
+ const DataFrameFunctionParamsMapper = {
210
+ 'data.frame': {
211
+ checkNames: { pos: -1, name: 'check.names', default: true },
212
+ noDupNames: { pos: -1, name: 'check.names', default: true },
213
+ special: ['row.names', 'check.rows', 'check.names', 'fix.empty.names', 'stringsAsFactors'],
214
+ critical: [{ pos: -1, name: 'row.names' }]
215
+ },
216
+ 'as.data.frame': {
217
+ critical: [],
218
+ dataFrame: { pos: 0, name: 'x' }
219
+ },
220
+ 'read.table': {
221
+ fileName: { pos: 0, name: 'file' },
222
+ header: { pos: 1, name: 'header', default: false },
223
+ separator: { pos: 2, name: 'sep', default: '\\s' },
224
+ quote: { pos: 3, name: 'quote', default: '"\'' },
225
+ skipLines: { pos: 12, name: 'skip', default: 0 },
226
+ checkNames: { pos: 13, name: 'check.names', default: true },
227
+ noDupNames: { pos: 13, name: 'check.names', default: true },
228
+ comment: { pos: 17, name: 'comment.char', default: '#' },
229
+ text: { pos: 23, name: 'text' },
230
+ critical: [
231
+ { pos: 6, name: 'row.names' },
232
+ { pos: 7, name: 'col.names' },
233
+ { pos: 11, name: 'nrows', default: -1 },
234
+ { pos: 15, name: 'strip.white', default: false },
235
+ { pos: 16, name: 'blank.lines.skip', default: true },
236
+ { pos: 18, name: 'allow.escapes', default: false },
237
+ ]
238
+ },
239
+ 'read.csv': {
240
+ fileName: { pos: 0, name: 'file' },
241
+ header: { pos: 1, name: 'header', default: true },
242
+ separator: { pos: 2, name: 'sep', default: ',' },
243
+ quote: { pos: 3, name: 'quote', default: '"' },
244
+ comment: { pos: 6, name: 'comment.char', default: '' },
245
+ skipLines: { pos: -1, name: 'skip', default: 0 },
246
+ checkNames: { pos: -1, name: 'check.names', default: true },
247
+ noDupNames: { pos: -1, name: 'check.names', default: true },
248
+ text: { pos: -1, name: 'text' },
249
+ critical: [
250
+ { pos: -1, name: 'row.names' },
251
+ { pos: -1, name: 'col.names' },
252
+ { pos: -1, name: 'nrows', default: -1 },
253
+ { pos: -1, name: 'strip.white', default: false },
254
+ { pos: -1, name: 'blank.lines.skip', default: true },
255
+ { pos: -1, name: 'allow.escapes', default: false },
256
+ ]
257
+ },
258
+ 'read.csv2': {
259
+ fileName: { pos: 0, name: 'file' },
260
+ header: { pos: 1, name: 'header', default: true },
261
+ separator: { pos: 2, name: 'sep', default: ';' },
262
+ quote: { pos: 3, name: 'quote', default: '"' },
263
+ comment: { pos: 6, name: 'comment.char', default: '' },
264
+ skipLines: { pos: -1, name: 'skip', default: 0 },
265
+ checkNames: { pos: -1, name: 'check.names', default: true },
266
+ noDupNames: { pos: -1, name: 'check.names', default: true },
267
+ text: { pos: -1, name: 'text' },
268
+ critical: [
269
+ { pos: -1, name: 'row.names' },
270
+ { pos: -1, name: 'col.names' },
271
+ { pos: -1, name: 'nrows', default: -1 },
272
+ { pos: -1, name: 'strip.white', default: false },
273
+ { pos: -1, name: 'blank.lines.skip', default: true },
274
+ { pos: -1, name: 'allow.escapes', default: false },
275
+ ]
276
+ },
277
+ 'read.delim': {
278
+ fileName: { pos: 0, name: 'file' },
279
+ header: { pos: 1, name: 'header', default: true },
280
+ separator: { pos: 2, name: 'sep', default: '\\t' },
281
+ quote: { pos: 3, name: 'quote', default: '"' },
282
+ comment: { pos: 6, name: 'comment.char', default: '' },
283
+ skipLines: { pos: -1, name: 'skip', default: 0 },
284
+ checkNames: { pos: -1, name: 'check.names', default: true },
285
+ noDupNames: { pos: -1, name: 'check.names', default: true },
286
+ text: { pos: -1, name: 'text' },
287
+ critical: [
288
+ { pos: -1, name: 'row.names' },
289
+ { pos: -1, name: 'col.names' },
290
+ { pos: -1, name: 'nrows', default: -1 },
291
+ { pos: -1, name: 'strip.white', default: false },
292
+ { pos: -1, name: 'blank.lines.skip', default: true },
293
+ { pos: -1, name: 'allow.escapes', default: false },
294
+ ]
295
+ },
296
+ 'read.delim2': {
297
+ fileName: { pos: 0, name: 'file' },
298
+ header: { pos: 1, name: 'header', default: true },
299
+ separator: { pos: 2, name: 'sep', default: '\\t' },
300
+ quote: { pos: 3, name: 'quote', default: '"' },
301
+ comment: { pos: 6, name: 'comment.char', default: '' },
302
+ skipLines: { pos: -1, name: 'skip', default: 0 },
303
+ checkNames: { pos: -1, name: 'check.names', default: true },
304
+ noDupNames: { pos: -1, name: 'check.names', default: true },
305
+ text: { pos: -1, name: 'text' },
306
+ critical: [
307
+ { pos: -1, name: 'row.names' },
308
+ { pos: -1, name: 'col.names' },
309
+ { pos: -1, name: 'nrows', default: -1 },
310
+ { pos: -1, name: 'strip.white', default: false },
311
+ { pos: -1, name: 'blank.lines.skip', default: true },
312
+ { pos: -1, name: 'allow.escapes', default: false },
313
+ ]
314
+ },
315
+ 'read_table': {
316
+ fileName: { pos: 0, name: 'file' },
317
+ header: { pos: 1, name: 'col_names', default: true },
318
+ separator: { pos: -1, default: '\\s' },
319
+ quote: { pos: -1, default: '"' },
320
+ skipLines: { pos: 5, name: 'skip', default: 0 },
321
+ comment: { pos: 9, name: 'comment', default: '' },
322
+ checkNames: { pos: -1, default: false },
323
+ noDupNames: { pos: -1, default: true },
324
+ critical: [
325
+ { pos: 6, name: 'n_max', default: Infinity },
326
+ { pos: 11, name: 'skip_empty_rows', default: true }
327
+ ],
328
+ noEmptyNames: true
329
+ },
330
+ 'read_csv': {
331
+ fileName: { pos: 0, name: 'file' },
332
+ header: { pos: 1, name: 'col_names', default: true },
333
+ separator: { pos: -1, default: ',' },
334
+ quote: { pos: 8, name: 'quote', default: '"' },
335
+ comment: { pos: 9, name: 'comment', default: '' },
336
+ skipLines: { pos: 11, name: 'skip', default: 0 },
337
+ checkNames: { pos: -1, default: false },
338
+ noDupNames: { pos: -1, default: true },
339
+ critical: [
340
+ { pos: 3, name: 'col_select' },
341
+ { pos: 4, name: 'id' },
342
+ { pos: 10, name: 'trim_ws', default: true },
343
+ { pos: 12, name: 'n_max', default: Infinity },
344
+ { pos: 14, name: 'name_repair', default: 'unique' },
345
+ { pos: 18, name: 'skip_empty_rows', default: true }
346
+ ],
347
+ noEmptyNames: true
348
+ },
349
+ 'read_csv2': {
350
+ fileName: { pos: 0, name: 'file' },
351
+ header: { pos: 1, name: 'col_names', default: true },
352
+ separator: { pos: -1, default: ';' },
353
+ quote: { pos: 8, name: 'quote', default: '"' },
354
+ comment: { pos: 9, name: 'comment', default: '' },
355
+ skipLines: { pos: 11, name: 'skip', default: 0 },
356
+ checkNames: { pos: -1, default: false },
357
+ noDupNames: { pos: -1, default: true },
358
+ critical: [
359
+ { pos: 3, name: 'col_select' },
360
+ { pos: 4, name: 'id' },
361
+ { pos: 10, name: 'trim_ws', default: true },
362
+ { pos: 12, name: 'n_max', default: Infinity },
363
+ { pos: 14, name: 'name_repair', default: 'unique' },
364
+ { pos: 18, name: 'skip_empty_rows', default: true }
365
+ ],
366
+ noEmptyNames: true
367
+ },
368
+ 'read_tsv': {
369
+ fileName: { pos: 0, name: 'file' },
370
+ header: { pos: 1, name: 'col_names', default: true },
371
+ separator: { pos: -1, default: '\\t' },
372
+ quote: { pos: 8, name: 'quote', default: '"' },
373
+ comment: { pos: 9, name: 'comment', default: '' },
374
+ skipLines: { pos: 11, name: 'skip', default: 0 },
375
+ checkNames: { pos: -1, default: false },
376
+ noDupNames: { pos: -1, default: true },
377
+ critical: [
378
+ { pos: 3, name: 'col_select' },
379
+ { pos: 4, name: 'id' },
380
+ { pos: 10, name: 'trim_ws', default: true },
381
+ { pos: 12, name: 'n_max', default: Infinity },
382
+ { pos: 14, name: 'name_repair', default: 'unique' },
383
+ { pos: 18, name: 'skip_empty_rows', default: true }
384
+ ],
385
+ noEmptyNames: true
386
+ },
387
+ 'read_delim': {
388
+ fileName: { pos: 0, name: 'file' },
389
+ separator: { pos: 1, name: 'delim', default: '\t' },
390
+ quote: { pos: 2, name: 'quote', default: '"' },
391
+ header: { pos: 5, name: 'col_names', default: true },
392
+ comment: { pos: 12, name: 'comment', default: '' },
393
+ skipLines: { pos: 14, name: 'skip', default: 0 },
394
+ checkNames: { pos: -1, default: false },
395
+ noDupNames: { pos: -1, default: true },
396
+ critical: [
397
+ { pos: 3, name: 'escape_backslash', default: false },
398
+ { pos: 4, name: 'escape_double', default: true },
399
+ { pos: 7, name: 'col_select' },
400
+ { pos: 8, name: 'id' },
401
+ { pos: 13, name: 'trim_ws', default: false },
402
+ { pos: 15, name: 'n_max', default: Infinity },
403
+ { pos: 17, name: 'name_repair', default: 'unique' },
404
+ { pos: 21, name: 'skip_empty_rows', default: true }
405
+ ],
406
+ noEmptyNames: true
407
+ },
408
+ 'cbind': {
409
+ special: ['deparse.level', 'make.row.names', 'stringsAsFactors', 'factor.exclude']
410
+ },
411
+ 'rbind': {
412
+ special: ['deparse.level', 'make.row.names', 'stringsAsFactors', 'factor.exclude']
413
+ },
414
+ 'head': {
415
+ dataFrame: { pos: 0, name: 'x' },
416
+ amount: { pos: 1, name: 'n', default: 6 }
417
+ },
418
+ 'tail': {
419
+ dataFrame: { pos: 0, name: 'x' },
420
+ amount: { pos: 1, name: 'n', default: 6 }
421
+ },
422
+ 'subset': {
423
+ dataFrame: { pos: 0, name: 'x' },
424
+ subset: { pos: 1, name: 'subset' },
425
+ select: { pos: 2, name: 'select' },
426
+ drop: { pos: 3, name: 'drop', default: false }
427
+ },
428
+ 'filter': {
429
+ dataFrame: { pos: 0, name: '.data' },
430
+ special: ['.by', '.preserve']
431
+ },
432
+ 'select': {
433
+ dataFrame: { pos: 0, name: '.data' },
434
+ special: []
435
+ },
436
+ 'mutate': {
437
+ dataFrame: { pos: 0, name: '.data' },
438
+ special: ['.by', '.keep', '.before', '.after'],
439
+ critical: [{ pos: -1, name: '.keep' }],
440
+ checkNames: false,
441
+ noDupNames: false
442
+ },
443
+ 'transform': {
444
+ dataFrame: { pos: 0, name: '_data' },
445
+ special: [],
446
+ checkNames: true,
447
+ noDupNames: true
448
+ },
449
+ 'group_by': {
450
+ dataFrame: { pos: 0, name: '.data' },
451
+ by: { pos: 1 },
452
+ special: ['.add', '.drop']
453
+ },
454
+ 'summarise': {
455
+ dataFrame: { pos: 0, name: '.data' },
456
+ special: ['.by', '.groups']
457
+ },
458
+ 'summarize': {
459
+ dataFrame: { pos: 0, name: '.data' },
460
+ special: ['.by', '.groups']
461
+ },
462
+ 'inner_join': {
463
+ dataFrame: { pos: 0, name: 'x' },
464
+ otherDataFrame: { pos: 1, name: 'y' },
465
+ by: { pos: 2, name: 'by' },
466
+ joinAll: { pos: -1, default: false },
467
+ joinLeft: { pos: -1, default: false },
468
+ joinRight: { pos: -1, default: false },
469
+ critical: [{ pos: -1, name: 'keep' }]
470
+ },
471
+ 'left_join': {
472
+ dataFrame: { pos: 0, name: 'x' },
473
+ otherDataFrame: { pos: 1, name: 'y' },
474
+ by: { pos: 2, name: 'by' },
475
+ joinAll: { pos: -1, default: false },
476
+ joinLeft: { pos: -1, default: true },
477
+ joinRight: { pos: -1, default: false },
478
+ critical: [{ pos: -1, name: 'keep' }]
479
+ },
480
+ 'right_join': {
481
+ dataFrame: { pos: 0, name: 'x' },
482
+ otherDataFrame: { pos: 1, name: 'y' },
483
+ by: { pos: 2, name: 'by' },
484
+ joinAll: { pos: -1, default: false },
485
+ joinLeft: { pos: -1, default: false },
486
+ joinRight: { pos: -1, default: true },
487
+ critical: [{ pos: -1, name: 'keep' }]
488
+ },
489
+ 'full_join': {
490
+ dataFrame: { pos: 0, name: 'x' },
491
+ otherDataFrame: { pos: 1, name: 'y' },
492
+ by: { pos: 2, name: 'by' },
493
+ joinAll: { pos: -1, default: true },
494
+ joinLeft: { pos: -1, default: false },
495
+ joinRight: { pos: -1, default: false },
496
+ critical: [{ pos: -1, name: 'keep' }]
497
+ },
498
+ 'merge': {
499
+ dataFrame: { pos: 0, name: 'x' },
500
+ otherDataFrame: { pos: 1, name: 'y' },
501
+ by: { pos: 2, name: 'by' },
502
+ joinAll: { pos: 5, name: 'all', default: false },
503
+ joinLeft: { pos: 6, name: 'all.x', default: false },
504
+ joinRight: { pos: 7, name: 'all.y', default: false },
505
+ critical: [
506
+ { pos: 3, name: 'by.x' },
507
+ { pos: 4, name: 'by.y' }
508
+ ]
509
+ },
510
+ 'relocate': {
511
+ dataFrame: { pos: 0, name: '.data' },
512
+ special: ['.before', '.after'],
513
+ disallowNamedArgs: true
514
+ },
515
+ 'arrange': {
516
+ dataFrame: { pos: 0, name: '.data' },
517
+ special: ['.by_group', '.locale']
518
+ }
519
+ };
520
+ /**
521
+ * Maps a concrete data frame function call to abstract data frame operations.
522
+ *
523
+ * @param node - The R node of the function call
524
+ * @param dfg - The data flow graph for resolving the arguments
525
+ * @param config - The flowR configuration to use
526
+ * @returns Data frame expression info containing the mapped abstract data frame operations, or `undefined` if the node does not represent a data frame function call
527
+ */
528
+ function mapDataFrameFunctionCall(node, dfg, config) {
529
+ if (node.type !== type_1.RType.FunctionCall || !node.named) {
530
+ return;
531
+ }
532
+ const resolveInfo = { graph: dfg, idMap: dfg.idMap, full: true, resolve: config_1.VariableResolve.Alias };
533
+ let operations;
534
+ if (isDataFrameFunction(node.functionName.content)) {
535
+ const functionName = node.functionName.content;
536
+ const mapper = DataFrameFunctionMapper[functionName].mapper;
537
+ const params = DataFrameFunctionParamsMapper[functionName];
538
+ const args = (0, arguments_1.getFunctionArguments)(node, dfg);
539
+ if ((0, arguments_1.hasCriticalArgument)(args, params.critical, resolveInfo)) {
540
+ operations = [{ operation: 'unknown', operand: undefined }];
541
+ }
542
+ else {
543
+ operations = mapper(args, params, resolveInfo, config);
544
+ }
545
+ }
546
+ else {
547
+ const mapping = getOtherDataFrameFunction(node.functionName.content);
548
+ if (mapping === undefined) {
549
+ return;
550
+ }
551
+ else if (mapping.type === 'entry_point') {
552
+ operations = [{ operation: 'unknown', operand: undefined }];
553
+ }
554
+ else if (mapping.type === 'transformation' || mapping.type === 'modification') {
555
+ const args = (0, arguments_1.getFunctionArguments)(node, dfg);
556
+ operations = mapDataFrameUnknown(args, mapping, resolveInfo);
557
+ }
558
+ else {
559
+ (0, assert_1.assertUnreachable)(mapping);
560
+ }
561
+ }
562
+ if (operations !== undefined) {
563
+ return { type: 'expression', operations };
564
+ }
565
+ }
566
+ function isDataFrameFunction(functionName) {
567
+ // a check with `functionName in DataFrameFunctionMapper` would return true for "toString"
568
+ return Object.prototype.hasOwnProperty.call(DataFrameFunctionMapper, functionName);
569
+ }
570
+ function getOtherDataFrameFunction(functionName) {
571
+ return OtherDataFrameFunctions.find(entry => entry.names.includes(functionName));
572
+ }
573
+ function mapDataFrameCreate(args, params, info) {
574
+ const checkNames = (0, arguments_1.getArgumentValue)(args, params.checkNames, info);
575
+ const noDupNames = (0, arguments_1.getArgumentValue)(args, params.noDupNames, info);
576
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
577
+ const argNames = args.map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
578
+ const argLengths = args.map(arg => (0, resolve_args_1.resolveIdToArgVectorLength)(arg, info));
579
+ const allVectors = argLengths.every(assert_1.isNotUndefined);
580
+ const rows = allVectors ? Math.max(...argLengths, 0) : undefined;
581
+ let colnames = argNames;
582
+ // over-approximate the column names if arguments are present but cannot be resolved to values
583
+ if (!allVectors || typeof checkNames !== 'boolean' || typeof noDupNames !== 'boolean') {
584
+ colnames = undefined;
585
+ }
586
+ else if (rows === 0) {
587
+ colnames = [];
588
+ }
589
+ else {
590
+ colnames = (0, arguments_1.filterValidNames)(colnames, checkNames, noDupNames);
591
+ }
592
+ return [{
593
+ operation: 'create',
594
+ operand: undefined,
595
+ colnames,
596
+ rows
597
+ }];
598
+ }
599
+ function mapDataFrameConvert(args, params, info) {
600
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
601
+ if (dataFrame === r_function_call_1.EmptyArgument || dataFrame?.value === undefined) {
602
+ return [{ operation: 'unknown', operand: undefined }];
603
+ }
604
+ return [{
605
+ operation: 'identity',
606
+ operand: dataFrame.value.info.id
607
+ }];
608
+ }
609
+ function mapDataFrameRead(args, params, info, config) {
610
+ const fileNameArg = (0, arguments_1.getFunctionArgument)(args, params.fileName, info);
611
+ const textArg = params.text ? (0, arguments_1.getFunctionArgument)(args, params.text, info) : undefined;
612
+ const { source, request } = getRequestFromRead(fileNameArg, textArg, params, info, config);
613
+ const header = (0, arguments_1.getArgumentValue)(args, params.header, info);
614
+ const separator = (0, arguments_1.getArgumentValue)(args, params.separator, info);
615
+ const quote = (0, arguments_1.getArgumentValue)(args, params.quote, info);
616
+ const comment = (0, arguments_1.getArgumentValue)(args, params.comment, info);
617
+ const skipLines = (0, arguments_1.getArgumentValue)(args, params.skipLines, info);
618
+ const checkNames = (0, arguments_1.getArgumentValue)(args, params.checkNames, info);
619
+ const noDupNames = (0, arguments_1.getArgumentValue)(args, params.noDupNames, info);
620
+ const validArguments = typeof header === 'boolean' && typeof separator === 'string' && typeof quote === 'string' && typeof comment === 'string' &&
621
+ typeof skipLines === 'number' && typeof checkNames === 'boolean' && typeof noDupNames === 'boolean';
622
+ if (request === undefined || !config.abstractInterpretation.dataFrame.readLoadedData.readExternalFiles || !validArguments) {
623
+ return [{
624
+ operation: 'read',
625
+ operand: undefined,
626
+ source,
627
+ colnames: undefined,
628
+ rows: undefined
629
+ }];
630
+ }
631
+ const LineCommentRegex = new RegExp(`\\s*[${(0, arguments_1.escapeRegExp)(comment, true)}].*`);
632
+ let firstLine = undefined;
633
+ let firstLineNumber = 0;
634
+ let rowCount = 0;
635
+ const parseLine = (line, lineNumber) => {
636
+ const text = comment ? line.toString().replace(LineCommentRegex, '') : line.toString();
637
+ if (text.length > 0 && lineNumber >= (skipLines ?? 0)) {
638
+ if (firstLine === undefined) {
639
+ firstLine = getEntriesFromCsvLine(text, separator, quote, comment);
640
+ firstLineNumber = lineNumber;
641
+ }
642
+ if (!header || lineNumber > firstLineNumber) {
643
+ rowCount++;
644
+ }
645
+ }
646
+ };
647
+ const allLines = parseRequestContent(request, parseLine, config.abstractInterpretation.dataFrame.readLoadedData.maxReadLines);
648
+ let colnames;
649
+ if (header) {
650
+ colnames = (0, arguments_1.filterValidNames)(firstLine, checkNames, noDupNames, params.noEmptyNames);
651
+ }
652
+ else if (firstLine !== undefined) {
653
+ colnames = Array(firstLine.length).fill(undefined);
654
+ }
655
+ return [{
656
+ operation: 'read',
657
+ operand: undefined,
658
+ source,
659
+ colnames,
660
+ rows: allLines ? rowCount : [rowCount, Infinity]
661
+ }];
662
+ }
663
+ function mapDataFrameColBind(args, params, info) {
664
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
665
+ const dataFrame = args.find(arg => (0, arguments_1.isDataFrameArgument)(arg, info));
666
+ if (dataFrame === undefined) {
667
+ return;
668
+ }
669
+ else if (args.length === 1) {
670
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
671
+ }
672
+ const result = [];
673
+ let operand = dataFrame.value;
674
+ let colnames = [];
675
+ for (const arg of args) {
676
+ if (arg !== dataFrame && arg !== r_function_call_1.EmptyArgument) {
677
+ const otherDataFrame = (0, shape_inference_1.resolveIdToDataFrameShape)(arg.value, info.graph);
678
+ if (otherDataFrame !== undefined) {
679
+ result.push({
680
+ operation: 'concatCols',
681
+ operand: operand?.info.id,
682
+ other: otherDataFrame
683
+ });
684
+ operand = undefined;
685
+ // added columns are top if argument cannot be resolved to constant (vector-like) value
686
+ }
687
+ else if ((0, resolve_args_1.resolveIdToArgValue)(arg, info) !== undefined) {
688
+ const colname = (0, resolve_args_1.resolveIdToArgName)(arg, info);
689
+ colnames?.push(colname);
690
+ }
691
+ else {
692
+ colnames = undefined;
693
+ }
694
+ }
695
+ }
696
+ if (colnames === undefined || colnames.length > 0) {
697
+ result.push({
698
+ operation: 'addCols',
699
+ operand: operand?.info.id,
700
+ colnames
701
+ });
702
+ }
703
+ return result;
704
+ }
705
+ function mapDataFrameRowBind(args, params, info) {
706
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
707
+ const dataFrame = args.find(arg => (0, arguments_1.isDataFrameArgument)(arg, info));
708
+ if (dataFrame === undefined) {
709
+ return;
710
+ }
711
+ else if (args.length === 1) {
712
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
713
+ }
714
+ const result = [];
715
+ let operand = dataFrame.value;
716
+ let rows = 0;
717
+ for (const arg of args) {
718
+ if (arg !== dataFrame && arg !== r_function_call_1.EmptyArgument) {
719
+ const otherDataFrame = (0, shape_inference_1.resolveIdToDataFrameShape)(arg.value, info.graph);
720
+ if (otherDataFrame !== undefined) {
721
+ result.push({
722
+ operation: 'concatRows',
723
+ operand: operand?.info.id,
724
+ other: otherDataFrame
725
+ });
726
+ operand = undefined;
727
+ // number of added rows is top if arguments cannot be resolved to constant (vector-like) value
728
+ }
729
+ else if ((0, resolve_args_1.resolveIdToArgValue)(arg, info) !== undefined) {
730
+ rows = rows !== undefined ? rows + 1 : undefined;
731
+ }
732
+ else {
733
+ rows = undefined;
734
+ }
735
+ }
736
+ }
737
+ if (rows === undefined || rows > 0) {
738
+ result.push({
739
+ operation: 'addRows',
740
+ operand: operand?.info.id,
741
+ rows
742
+ });
743
+ }
744
+ return result;
745
+ }
746
+ function mapDataFrameHeadTail(args, params, info) {
747
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
748
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
749
+ return;
750
+ }
751
+ const result = [];
752
+ const amount = (0, arguments_1.getArgumentValue)(args, params.amount, info);
753
+ let rows = undefined;
754
+ let cols = undefined;
755
+ if (typeof amount === 'number') {
756
+ rows = amount;
757
+ }
758
+ else if (Array.isArray(amount) && amount.length <= 2 && amount.every(value => typeof value === 'number')) {
759
+ rows = amount[0];
760
+ cols = amount[1];
761
+ }
762
+ result.push({
763
+ operation: rows === undefined || rows >= 0 ? 'subsetRows' : 'removeRows',
764
+ operand: dataFrame.value.info.id,
765
+ rows: rows !== undefined ? Math.abs(rows) : undefined
766
+ });
767
+ if (cols !== undefined) {
768
+ result.push({
769
+ operation: cols >= 0 ? 'subsetCols' : 'removeCols',
770
+ operand: undefined,
771
+ colnames: Array(Math.abs(cols)).fill(undefined)
772
+ });
773
+ }
774
+ return result;
775
+ }
776
+ function mapDataFrameSubset(args, params, info) {
777
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
778
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
779
+ return;
780
+ }
781
+ else if (args.length === 1) {
782
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
783
+ }
784
+ const result = [];
785
+ let operand = dataFrame.value;
786
+ const filterArg = (0, arguments_1.getFunctionArgument)(args, params.subset, info);
787
+ const filterValue = (0, resolve_args_1.resolveIdToArgValue)(filterArg, info);
788
+ const selectArg = (0, arguments_1.getFunctionArgument)(args, params.select, info);
789
+ const dropArg = (0, arguments_1.getFunctionArgument)(args, params.drop, info);
790
+ const condition = typeof filterValue === 'boolean' ? filterValue : undefined;
791
+ const filterNames = (0, arguments_1.getUnresolvedSymbolsInExpression)(filterArg, info.graph);
792
+ const { selectedCols, unselectedCols } = getSelectedColumns([selectArg], info);
793
+ const accessedCols = [...filterNames, ...selectedCols ?? [], ...unselectedCols ?? []];
794
+ const mixedAccess = accessedCols.some(col => typeof col === 'string') && accessedCols.some(col => typeof col === 'number');
795
+ const duplicateCols = accessedCols.some((col, index, list) => col !== undefined && list.indexOf(col) !== index);
796
+ if (accessedCols.some(col => typeof col === 'string')) {
797
+ result.push({
798
+ operation: 'accessCols',
799
+ operand: operand?.info.id,
800
+ columns: accessedCols.filter(col => typeof col === 'string')
801
+ });
802
+ }
803
+ if (accessedCols.some(col => typeof col === 'number')) {
804
+ result.push({
805
+ operation: 'accessCols',
806
+ operand: operand?.info.id,
807
+ columns: accessedCols.filter(col => typeof col === 'number').map(Math.abs)
808
+ });
809
+ }
810
+ if (filterArg !== undefined && filterArg !== r_function_call_1.EmptyArgument) {
811
+ result.push({
812
+ operation: 'filterRows',
813
+ operand: operand?.info.id,
814
+ condition: condition
815
+ });
816
+ operand = undefined;
817
+ }
818
+ if (!dropArg || accessedCols.length > 1) {
819
+ if (unselectedCols === undefined || unselectedCols.length > 0) {
820
+ result.push({
821
+ operation: 'removeCols',
822
+ operand: operand?.info.id,
823
+ colnames: unselectedCols?.map(col => typeof col === 'string' ? col : undefined)
824
+ });
825
+ operand = undefined;
826
+ }
827
+ if (selectedCols === undefined || selectedCols.length > 0) {
828
+ result.push({
829
+ operation: 'subsetCols',
830
+ operand: operand?.info.id,
831
+ colnames: selectedCols?.map(col => typeof col === 'string' ? col : undefined),
832
+ ...(duplicateCols || mixedAccess ? { options: { duplicateCols: true } } : {})
833
+ });
834
+ operand = undefined;
835
+ }
836
+ }
837
+ return result;
838
+ }
839
+ function mapDataFrameFilter(args, params, info) {
840
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
841
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
842
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
843
+ return;
844
+ }
845
+ else if (args.length === 1) {
846
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
847
+ }
848
+ const result = [];
849
+ const filterArgs = args.filter(arg => arg !== dataFrame);
850
+ const filterValues = filterArgs.map(arg => (0, resolve_args_1.resolveIdToArgValue)(arg, info));
851
+ const accessedNames = filterArgs.flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph));
852
+ const condition = filterValues.every(value => typeof value === 'boolean') ? filterValues.every(cond => cond) : undefined;
853
+ if (accessedNames.length > 0) {
854
+ result.push({
855
+ operation: 'accessCols',
856
+ operand: dataFrame.value.info.id,
857
+ columns: accessedNames
858
+ });
859
+ }
860
+ result.push({
861
+ operation: 'filterRows',
862
+ operand: dataFrame.value.info.id,
863
+ condition: condition
864
+ });
865
+ return result;
866
+ }
867
+ function mapDataFrameSelect(args, params, info) {
868
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
869
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
870
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
871
+ return;
872
+ }
873
+ const result = [];
874
+ let operand = dataFrame.value;
875
+ const selectArgs = args.filter(arg => arg !== dataFrame);
876
+ let { selectedCols, unselectedCols } = getSelectedColumns(selectArgs, info);
877
+ const accessedCols = [...selectedCols ?? [], ...unselectedCols ?? []];
878
+ const mixedAccess = accessedCols.some(col => typeof col === 'string') && accessedCols.some(col => typeof col === 'number');
879
+ const duplicateAccess = accessedCols.some((col, _, list) => col !== undefined && list.filter(other => other === col).length > 1);
880
+ const renamedCols = selectArgs.some(arguments_1.isNamedArgument);
881
+ // map to top if columns are selected mixed by string and number, or are selected duplicate
882
+ if (mixedAccess || duplicateAccess) {
883
+ selectedCols = undefined;
884
+ unselectedCols = [];
885
+ }
886
+ if (accessedCols.some(col => typeof col === 'string')) {
887
+ result.push({
888
+ operation: 'accessCols',
889
+ operand: operand?.info.id,
890
+ columns: accessedCols.filter(col => typeof col === 'string')
891
+ });
892
+ }
893
+ if (accessedCols.some(col => typeof col === 'number')) {
894
+ result.push({
895
+ operation: 'accessCols',
896
+ operand: operand?.info.id,
897
+ columns: accessedCols.filter(col => typeof col === 'number').map(Math.abs)
898
+ });
899
+ }
900
+ if (unselectedCols === undefined || unselectedCols.length > 0) {
901
+ result.push({
902
+ operation: 'removeCols',
903
+ operand: operand?.info.id,
904
+ colnames: unselectedCols?.map(col => typeof col === 'string' ? col : undefined)
905
+ });
906
+ operand = undefined;
907
+ }
908
+ if (selectedCols === undefined || selectedCols.length > 0 || unselectedCols?.length === 0) {
909
+ result.push({
910
+ operation: 'subsetCols',
911
+ operand: operand?.info.id,
912
+ colnames: selectedCols?.map(col => typeof col === 'string' ? col : undefined),
913
+ ...(renamedCols ? { options: { renamedCols: true } } : {})
914
+ });
915
+ operand = undefined;
916
+ }
917
+ return result;
918
+ }
919
+ function mapDataFrameMutate(args, params, info) {
920
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
921
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
922
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
923
+ return;
924
+ }
925
+ else if (args.length === 1) {
926
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
927
+ }
928
+ const result = [];
929
+ let operand = dataFrame.value;
930
+ const mutateArgs = args.filter(arg => arg !== dataFrame);
931
+ let deletedCols = mutateArgs
932
+ .filter(arguments_1.isRNull)
933
+ .map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
934
+ let mutatedCols = mutateArgs
935
+ .filter(arg => !(0, arguments_1.isRNull)(arg))
936
+ .map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
937
+ // only column names that are not created by mutation are preconditions on the operand
938
+ const accessedNames = mutateArgs
939
+ .flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph))
940
+ .filter(arg => !mutatedCols?.includes(arg));
941
+ deletedCols = (0, arguments_1.filterValidNames)(deletedCols, params.checkNames, params.noDupNames);
942
+ mutatedCols = (0, arguments_1.filterValidNames)(mutatedCols, params.checkNames, params.noDupNames);
943
+ if (accessedNames.length > 0) {
944
+ result.push({
945
+ operation: 'accessCols',
946
+ operand: operand?.info.id,
947
+ columns: accessedNames
948
+ });
949
+ }
950
+ if (deletedCols === undefined || deletedCols.length > 0) {
951
+ result.push({
952
+ operation: 'removeCols',
953
+ operand: operand?.info.id,
954
+ colnames: deletedCols,
955
+ options: { maybe: true }
956
+ });
957
+ operand = undefined;
958
+ }
959
+ if (mutatedCols === undefined || mutatedCols.length > 0 || deletedCols?.length === 0) {
960
+ result.push({
961
+ operation: 'mutateCols',
962
+ operand: operand?.info.id,
963
+ colnames: mutatedCols
964
+ });
965
+ operand = undefined;
966
+ }
967
+ return result;
968
+ }
969
+ function mapDataFrameGroupBy(args, params, info) {
970
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
971
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
972
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
973
+ return;
974
+ }
975
+ else if (args.length === 1) {
976
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
977
+ }
978
+ const result = [];
979
+ const byArgs = args.filter(arg => arg !== dataFrame);
980
+ const accessedNames = byArgs.flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph));
981
+ const byNames = byArgs.map(arg => (0, arguments_1.isNamedArgument)(arg) ? (0, resolve_args_1.resolveIdToArgName)(arg, info) : (0, resolve_args_1.resolveIdToArgValueSymbolName)(arg, info));
982
+ const mutatedCols = byArgs.some(arguments_1.isNamedArgument) || byNames.some(assert_1.isUndefined);
983
+ if (accessedNames.length > 0) {
984
+ result.push({
985
+ operation: 'accessCols',
986
+ operand: dataFrame.value.info.id,
987
+ columns: accessedNames
988
+ });
989
+ }
990
+ result.push({
991
+ operation: 'groupBy',
992
+ operand: dataFrame.value.info.id,
993
+ by: byNames,
994
+ ...(mutatedCols ? { options: { mutatedCols: true } } : {})
995
+ });
996
+ return result;
997
+ }
998
+ function mapDataFrameSummarize(args, params, info) {
999
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
1000
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
1001
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
1002
+ return;
1003
+ }
1004
+ const result = [];
1005
+ const summarizeArgs = args.filter(arg => arg !== dataFrame);
1006
+ const accessedNames = summarizeArgs.flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph));
1007
+ const summarizedCols = summarizeArgs.map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
1008
+ if (accessedNames.length > 0) {
1009
+ result.push({
1010
+ operation: 'accessCols',
1011
+ operand: dataFrame.value.info.id,
1012
+ columns: accessedNames
1013
+ });
1014
+ }
1015
+ result.push({
1016
+ operation: 'summarize',
1017
+ operand: dataFrame.value.info.id,
1018
+ colnames: summarizedCols
1019
+ });
1020
+ return result;
1021
+ }
1022
+ function mapDataFrameJoin(args, params, info) {
1023
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
1024
+ const joinAll = (0, arguments_1.getArgumentValue)(args, params.joinAll, info);
1025
+ const joinLeft = (0, arguments_1.getArgumentValue)(args, params.joinLeft, info);
1026
+ const joinRight = (0, arguments_1.getArgumentValue)(args, params.joinRight, info);
1027
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
1028
+ return;
1029
+ }
1030
+ else if (args.length === 1) {
1031
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
1032
+ }
1033
+ else if (typeof joinAll !== 'boolean' || typeof joinLeft !== 'boolean' || typeof joinRight !== 'boolean') {
1034
+ return [{ operation: 'unknown', operand: dataFrame.value.info.id }];
1035
+ }
1036
+ const result = [];
1037
+ const otherArg = (0, arguments_1.getFunctionArgument)(args, params.otherDataFrame, info);
1038
+ const byArg = (0, arguments_1.getFunctionArgument)(args, params.by, info);
1039
+ const otherDataFrame = (0, shape_inference_1.resolveIdToDataFrameShape)(otherArg, info.graph) ?? domain_1.DataFrameTop;
1040
+ let byCols;
1041
+ const joinType = getJoinType(joinAll, joinLeft, joinRight);
1042
+ if (byArg !== undefined) {
1043
+ const byValue = (0, resolve_args_1.resolveIdToArgValue)(byArg, info);
1044
+ if (typeof byValue === 'string' || typeof byValue === 'number') {
1045
+ byCols = [byValue];
1046
+ }
1047
+ else if (Array.isArray(byValue) && (byValue.every(by => typeof by === 'string') || byValue.every(by => typeof by === 'number'))) {
1048
+ byCols = byValue;
1049
+ }
1050
+ }
1051
+ if (byCols?.some(by => typeof by === 'string')) {
1052
+ result.push({
1053
+ operation: 'accessCols',
1054
+ operand: dataFrame.value.info.id,
1055
+ columns: byCols.filter(by => typeof by === 'string')
1056
+ });
1057
+ }
1058
+ if (byCols?.some(by => typeof by === 'number')) {
1059
+ result.push({
1060
+ operation: 'accessCols',
1061
+ operand: dataFrame.value.info.id,
1062
+ columns: byCols.filter(by => typeof by === 'number')
1063
+ });
1064
+ }
1065
+ result.push({
1066
+ operation: 'join',
1067
+ operand: dataFrame.value.info.id,
1068
+ other: otherDataFrame,
1069
+ by: byCols?.map(by => typeof by === 'string' ? by : undefined),
1070
+ options: { join: joinType, natural: byArg === undefined }
1071
+ });
1072
+ return result;
1073
+ }
1074
+ function mapDataFrameIdentity(args, params, info) {
1075
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
1076
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
1077
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
1078
+ return;
1079
+ }
1080
+ else if (params.disallowNamedArgs && args.some(arguments_1.isNamedArgument)) {
1081
+ return [{ operation: 'unknown', operand: dataFrame.value.info.id }];
1082
+ }
1083
+ return [{
1084
+ operation: 'identity',
1085
+ operand: dataFrame.value.info.id
1086
+ }];
1087
+ }
1088
+ function mapDataFrameUnknown(args, params, info) {
1089
+ let dataFrame;
1090
+ if (params.dataFrame !== undefined) {
1091
+ dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
1092
+ }
1093
+ else {
1094
+ dataFrame = args.find(arg => (0, arguments_1.isDataFrameArgument)(arg, info));
1095
+ }
1096
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
1097
+ return;
1098
+ }
1099
+ return [{
1100
+ operation: 'identity',
1101
+ operand: dataFrame.value.info.id,
1102
+ ...(params.constraintType !== undefined ? { type: params.constraintType } : {})
1103
+ }];
1104
+ }
1105
+ function getRequestFromRead(fileNameArg, textArg, params, info, config) {
1106
+ let source;
1107
+ let request;
1108
+ if (fileNameArg !== undefined && fileNameArg !== r_function_call_1.EmptyArgument) {
1109
+ const fileName = (0, resolve_args_1.resolveIdToArgValue)(fileNameArg, info);
1110
+ if (typeof fileName === 'string') {
1111
+ source = fileName;
1112
+ const referenceChain = fileNameArg.info.file ? [(0, retriever_1.requestFromInput)(`file://${fileNameArg.info.file}`)] : [];
1113
+ const sources = (0, built_in_source_1.findSource)(config.solver.resolveSource, fileName, { referenceChain });
1114
+ if (sources?.length === 1) {
1115
+ source = sources[0];
1116
+ // create request from resolved source file path
1117
+ request = (0, built_in_source_1.getSourceProvider)().createRequest(source);
1118
+ }
1119
+ else if (params.text === undefined && (0, resolve_args_1.unescapeSpecialChars)(fileName).includes('\n')) {
1120
+ // create request from string if file name argument contains newline
1121
+ request = (0, retriever_1.requestFromInput)((0, resolve_args_1.unescapeSpecialChars)(fileName));
1122
+ }
1123
+ }
1124
+ }
1125
+ else if (textArg !== undefined && textArg !== r_function_call_1.EmptyArgument) {
1126
+ const text = (0, resolve_args_1.resolveIdToArgValue)(textArg, info);
1127
+ if (typeof text === 'string') {
1128
+ source = text;
1129
+ request = (0, retriever_1.requestFromInput)((0, resolve_args_1.unescapeSpecialChars)(text));
1130
+ }
1131
+ }
1132
+ return { source, request };
1133
+ }
1134
+ function parseRequestContent(request, parser, maxLines) {
1135
+ const requestType = request.request;
1136
+ switch (requestType) {
1137
+ case 'text':
1138
+ request.content.split('\n').forEach(parser);
1139
+ return true;
1140
+ case 'file':
1141
+ return (0, files_1.readLineByLineSync)(request.content, parser, maxLines);
1142
+ default:
1143
+ (0, assert_1.assertUnreachable)(requestType);
1144
+ }
1145
+ }
1146
+ /**
1147
+ * Gets all entries from a line of a CSV file using a custom separator char, quote char, and comment char
1148
+ */
1149
+ function getEntriesFromCsvLine(line, sep = ',', quote = '"', comment = '', trim = true) {
1150
+ sep = (0, arguments_1.escapeRegExp)(sep, true); // only allow tokens like `\s`, `\t`, or `\n` in separator, quote, and comment chars
1151
+ quote = (0, arguments_1.escapeRegExp)(quote, true);
1152
+ comment = (0, arguments_1.escapeRegExp)(comment, true);
1153
+ const quantifier = sep === '\\s' ? '+' : '*'; // do not allow unquoted empty entries in whitespace-sparated files
1154
+ const LineCommentRegex = new RegExp(`[${comment}].*`);
1155
+ const CsvEntryRegex = new RegExp(`(?<=^|[${sep}])(?:[${quote}]((?:[^${quote}]|[${quote}]{2})*)[${quote}]|([^${sep}]${quantifier}))`, 'g');
1156
+ const DoubleQuoteRegex = new RegExp(`([${quote}])\\1`, 'g'); // regex for doubled quotes like `""` or `''`
1157
+ return (comment ? line.replace(LineCommentRegex, '') : line)
1158
+ .matchAll(CsvEntryRegex)
1159
+ .map(match => match[1]?.replace(DoubleQuoteRegex, '$1') ?? match[2])
1160
+ .map(entry => trim ? entry.trim() : entry)
1161
+ .toArray();
1162
+ }
1163
+ /**
1164
+ * Resolves all selected columns in a select expression, such as `id`, `"id"`, `1`, `c(id, name)`, `c("id", "name")`, `1:2`, `-id`, `-1`, `-c(id, name)`, `c(-1, -2)`, etc.
1165
+ */
1166
+ function getSelectedColumns(args, info) {
1167
+ let selectedCols = [];
1168
+ let unselectedCols = [];
1169
+ const joinColumns = (columns1, columns2) => columns1 !== undefined && columns2 !== undefined ? [...columns1, ...columns2] : undefined;
1170
+ for (const arg of args) {
1171
+ if (arg !== undefined && arg !== r_function_call_1.EmptyArgument) {
1172
+ if (arg.value?.type === type_1.RType.FunctionCall && arg.value.named && arg.value.functionName.content === 'c') {
1173
+ const result = getSelectedColumns(arg.value.arguments, info);
1174
+ selectedCols = joinColumns(selectedCols, result.selectedCols);
1175
+ unselectedCols = joinColumns(unselectedCols, result.unselectedCols);
1176
+ }
1177
+ else if (arg.value?.type === type_1.RType.UnaryOp && arg.value.operator === '-' && info.idMap !== undefined) {
1178
+ const result = getSelectedColumns([(0, make_argument_1.toUnnamedArgument)(arg.value.operand, info.idMap)], info);
1179
+ selectedCols = joinColumns(selectedCols, result.unselectedCols);
1180
+ unselectedCols = joinColumns(unselectedCols, result.selectedCols);
1181
+ }
1182
+ else if (arg.value?.type === type_1.RType.BinaryOp && arg.value.operator === ':' && info.idMap !== undefined) {
1183
+ const values = (0, resolve_args_1.resolveIdToArgValue)((0, make_argument_1.toUnnamedArgument)(arg.value, info.idMap), { ...info, resolve: config_1.VariableResolve.Disabled });
1184
+ if (Array.isArray(values) && values.every(value => typeof value === 'number')) {
1185
+ selectedCols = joinColumns(selectedCols, values.filter(value => value >= 0));
1186
+ unselectedCols = joinColumns(unselectedCols, values.filter(value => value < 0).map(Math.abs));
1187
+ }
1188
+ else {
1189
+ selectedCols = undefined;
1190
+ }
1191
+ }
1192
+ else if (arg.value?.type === type_1.RType.Symbol || arg.value?.type === type_1.RType.String) {
1193
+ selectedCols?.push((0, resolve_args_1.resolveIdToArgValueSymbolName)(arg, info));
1194
+ }
1195
+ else if (arg.value?.type === type_1.RType.Number) {
1196
+ selectedCols?.push(arg.value.content.num);
1197
+ }
1198
+ else {
1199
+ selectedCols = undefined;
1200
+ }
1201
+ }
1202
+ }
1203
+ return { selectedCols, unselectedCols };
1204
+ }
1205
+ function getJoinType(joinAll, joinLeft, joinRight) {
1206
+ if (joinAll || (joinLeft && joinRight)) {
1207
+ return 'full';
1208
+ }
1209
+ else if (joinLeft) {
1210
+ return 'left';
1211
+ }
1212
+ else if (joinRight) {
1213
+ return 'right';
1214
+ }
1215
+ else {
1216
+ return 'inner';
1217
+ }
1218
+ }
1219
+ //# sourceMappingURL=function-mapper.js.map