@eagleoutice/flowr 2.2.16 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/README.md +48 -20
  2. package/abstract-interpretation/data-frame/absint-info.d.ts +109 -0
  3. package/abstract-interpretation/data-frame/absint-info.js +31 -0
  4. package/abstract-interpretation/data-frame/absint-visitor.d.ts +58 -0
  5. package/abstract-interpretation/data-frame/absint-visitor.js +171 -0
  6. package/abstract-interpretation/data-frame/domain.d.ts +107 -0
  7. package/abstract-interpretation/data-frame/domain.js +315 -0
  8. package/abstract-interpretation/data-frame/mappers/access-mapper.d.ts +17 -0
  9. package/abstract-interpretation/data-frame/mappers/access-mapper.js +166 -0
  10. package/abstract-interpretation/data-frame/mappers/arguments.d.ts +117 -0
  11. package/abstract-interpretation/data-frame/mappers/arguments.js +188 -0
  12. package/abstract-interpretation/data-frame/mappers/assignment-mapper.d.ts +20 -0
  13. package/abstract-interpretation/data-frame/mappers/assignment-mapper.js +34 -0
  14. package/abstract-interpretation/data-frame/mappers/function-mapper.d.ts +261 -0
  15. package/abstract-interpretation/data-frame/mappers/function-mapper.js +1219 -0
  16. package/abstract-interpretation/data-frame/mappers/replacement-mapper.d.ts +12 -0
  17. package/abstract-interpretation/data-frame/mappers/replacement-mapper.js +206 -0
  18. package/abstract-interpretation/data-frame/resolve-args.d.ts +42 -0
  19. package/abstract-interpretation/data-frame/resolve-args.js +118 -0
  20. package/abstract-interpretation/data-frame/semantics.d.ts +213 -0
  21. package/abstract-interpretation/data-frame/semantics.js +363 -0
  22. package/abstract-interpretation/data-frame/shape-inference.d.ts +38 -0
  23. package/abstract-interpretation/data-frame/shape-inference.js +111 -0
  24. package/benchmark/slicer.d.ts +15 -1
  25. package/benchmark/slicer.js +137 -0
  26. package/benchmark/stats/print.js +123 -45
  27. package/benchmark/stats/size-of.d.ts +7 -0
  28. package/benchmark/stats/size-of.js +1 -0
  29. package/benchmark/stats/stats.d.ts +30 -1
  30. package/benchmark/stats/stats.js +4 -2
  31. package/benchmark/summarizer/data.d.ts +33 -2
  32. package/benchmark/summarizer/first-phase/input.js +5 -1
  33. package/benchmark/summarizer/first-phase/process.js +47 -1
  34. package/benchmark/summarizer/second-phase/graph.js +1 -1
  35. package/benchmark/summarizer/second-phase/process.js +102 -4
  36. package/cli/benchmark-app.d.ts +2 -0
  37. package/cli/benchmark-app.js +2 -0
  38. package/cli/benchmark-helper-app.d.ts +2 -0
  39. package/cli/benchmark-helper-app.js +10 -3
  40. package/cli/common/options.js +4 -0
  41. package/cli/repl/commands/repl-query.js +1 -1
  42. package/cli/repl/server/connection.js +14 -5
  43. package/config.d.ts +31 -0
  44. package/config.js +21 -1
  45. package/control-flow/basic-cfg-guided-visitor.d.ts +1 -2
  46. package/control-flow/basic-cfg-guided-visitor.js +0 -6
  47. package/control-flow/cfg-simplification.d.ts +6 -0
  48. package/control-flow/cfg-simplification.js +18 -9
  49. package/control-flow/control-flow-graph.d.ts +3 -8
  50. package/control-flow/control-flow-graph.js +5 -6
  51. package/control-flow/dfg-cfg-guided-visitor.js +1 -1
  52. package/control-flow/extract-cfg.d.ts +2 -2
  53. package/control-flow/extract-cfg.js +52 -63
  54. package/control-flow/semantic-cfg-guided-visitor.d.ts +1 -1
  55. package/control-flow/semantic-cfg-guided-visitor.js +1 -1
  56. package/core/steps/all/static-slicing/00-slice.d.ts +7 -1
  57. package/core/steps/all/static-slicing/00-slice.js +9 -3
  58. package/core/steps/pipeline/default-pipelines.d.ts +74 -74
  59. package/dataflow/environments/built-in.d.ts +7 -5
  60. package/dataflow/environments/built-in.js +16 -13
  61. package/dataflow/eval/resolve/alias-tracking.js +2 -2
  62. package/dataflow/eval/resolve/resolve.d.ts +53 -9
  63. package/dataflow/eval/resolve/resolve.js +132 -38
  64. package/dataflow/graph/dataflowgraph-builder.js +2 -2
  65. package/dataflow/graph/graph.js +1 -1
  66. package/dataflow/graph/invert-dfg.d.ts +2 -0
  67. package/dataflow/graph/invert-dfg.js +17 -0
  68. package/dataflow/internal/process/functions/call/built-in/built-in-source.d.ts +1 -0
  69. package/dataflow/internal/process/functions/call/built-in/built-in-source.js +4 -0
  70. package/documentation/doc-util/doc-query.js +11 -1
  71. package/documentation/doc-util/doc-search.js +2 -2
  72. package/documentation/print-cfg-wiki.js +3 -4
  73. package/documentation/print-core-wiki.js +2 -2
  74. package/documentation/print-dataflow-graph-wiki.js +7 -0
  75. package/documentation/print-faq-wiki.js +4 -0
  76. package/documentation/print-interface-wiki.js +11 -0
  77. package/documentation/print-linter-wiki.js +36 -4
  78. package/documentation/print-linting-and-testing-wiki.js +13 -1
  79. package/documentation/print-onboarding-wiki.js +4 -0
  80. package/documentation/print-query-wiki.js +29 -3
  81. package/linter/linter-executor.js +1 -2
  82. package/linter/linter-format.d.ts +26 -4
  83. package/linter/linter-format.js +25 -6
  84. package/linter/linter-rules.d.ts +63 -12
  85. package/linter/linter-rules.js +5 -1
  86. package/linter/rules/absolute-path.d.ts +4 -7
  87. package/linter/rules/absolute-path.js +9 -6
  88. package/linter/rules/dataframe-access-validation.d.ts +55 -0
  89. package/linter/rules/dataframe-access-validation.js +118 -0
  90. package/linter/rules/dead-code.d.ts +43 -0
  91. package/linter/rules/dead-code.js +50 -0
  92. package/linter/rules/deprecated-functions.d.ts +3 -2
  93. package/linter/rules/deprecated-functions.js +3 -1
  94. package/linter/rules/file-path-validity.d.ts +4 -4
  95. package/linter/rules/file-path-validity.js +8 -6
  96. package/linter/rules/naming-convention.d.ts +5 -4
  97. package/linter/rules/naming-convention.js +8 -2
  98. package/linter/rules/seeded-randomness.d.ts +4 -3
  99. package/linter/rules/seeded-randomness.js +3 -1
  100. package/linter/rules/unused-definition.d.ts +2 -0
  101. package/linter/rules/unused-definition.js +3 -1
  102. package/package.json +2 -2
  103. package/queries/catalog/dependencies-query/dependencies-query-executor.js +6 -1
  104. package/queries/catalog/dependencies-query/function-info/read-functions.js +1 -0
  105. package/queries/catalog/dependencies-query/function-info/write-functions.js +1 -0
  106. package/queries/catalog/df-shape-query/df-shape-query-executor.d.ts +3 -0
  107. package/queries/catalog/df-shape-query/df-shape-query-executor.js +46 -0
  108. package/queries/catalog/df-shape-query/df-shape-query-format.d.ts +72 -0
  109. package/queries/catalog/df-shape-query/df-shape-query-format.js +31 -0
  110. package/queries/catalog/linter-query/linter-query-format.js +1 -1
  111. package/queries/catalog/location-map-query/location-map-query-executor.js +7 -5
  112. package/queries/catalog/location-map-query/location-map-query-format.d.ts +3 -0
  113. package/queries/catalog/location-map-query/location-map-query-format.js +1 -0
  114. package/queries/catalog/search-query/search-query-executor.js +1 -1
  115. package/queries/catalog/static-slice-query/static-slice-query-executor.js +2 -1
  116. package/queries/catalog/static-slice-query/static-slice-query-format.d.ts +3 -0
  117. package/queries/catalog/static-slice-query/static-slice-query-format.js +3 -1
  118. package/queries/query-print.d.ts +1 -1
  119. package/queries/query-print.js +0 -1
  120. package/queries/query.d.ts +77 -6
  121. package/queries/query.js +26 -11
  122. package/search/flowr-search-builder.d.ts +6 -6
  123. package/search/flowr-search-executor.d.ts +2 -2
  124. package/search/flowr-search-executor.js +1 -1
  125. package/search/flowr-search.d.ts +13 -8
  126. package/search/flowr-search.js +21 -0
  127. package/search/search-executor/search-enrichers.d.ts +87 -20
  128. package/search/search-executor/search-enrichers.js +44 -5
  129. package/search/search-executor/search-generators.d.ts +4 -4
  130. package/search/search-executor/search-generators.js +12 -7
  131. package/search/search-executor/search-mappers.js +3 -2
  132. package/search/search-executor/search-transformer.d.ts +3 -3
  133. package/search/search-executor/search-transformer.js +2 -2
  134. package/slicing/static/static-slicer.d.ts +4 -2
  135. package/slicing/static/static-slicer.js +10 -4
  136. package/util/collections/arrays.d.ts +2 -0
  137. package/util/collections/arrays.js +9 -0
  138. package/util/files.d.ts +8 -2
  139. package/util/files.js +22 -4
  140. package/util/mermaid/dfg.js +4 -2
  141. package/util/r-value.d.ts +23 -0
  142. package/util/r-value.js +113 -0
  143. package/util/range.d.ts +1 -0
  144. package/util/range.js +5 -1
  145. package/util/version.js +1 -1
  146. package/util/cfg/cfg.d.ts +0 -0
  147. package/util/cfg/cfg.js +0 -2
@@ -0,0 +1,1219 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.mapDataFrameFunctionCall = mapDataFrameFunctionCall;
4
+ const config_1 = require("../../../config");
5
+ const make_argument_1 = require("../../../dataflow/internal/process/functions/call/argument/make-argument");
6
+ const built_in_source_1 = require("../../../dataflow/internal/process/functions/call/built-in/built-in-source");
7
+ const r_function_call_1 = require("../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
8
+ const type_1 = require("../../../r-bridge/lang-4.x/ast/model/type");
9
+ const retriever_1 = require("../../../r-bridge/retriever");
10
+ const assert_1 = require("../../../util/assert");
11
+ const files_1 = require("../../../util/files");
12
+ const domain_1 = require("../domain");
13
+ const resolve_args_1 = require("../resolve-args");
14
+ const shape_inference_1 = require("../shape-inference");
15
+ const arguments_1 = require("./arguments");
16
+ /**
17
+ * Represents the different types of data frames in R
18
+ */
19
+ var DataFrameType;
20
+ (function (DataFrameType) {
21
+ DataFrameType["DataFrame"] = "data.frame";
22
+ DataFrameType["Tibble"] = "tibble";
23
+ DataFrameType["DataTable"] = "data.table";
24
+ })(DataFrameType || (DataFrameType = {}));
25
+ /**
26
+ * Mapper for mapping the supported concrete data frame functions to mapper functions,
27
+ * including information about the origin library of the functions and the type of the returned data frame.
28
+ */
29
+ const DataFrameFunctionMapper = {
30
+ 'data.frame': { mapper: mapDataFrameCreate, library: 'base', returnType: DataFrameType.DataFrame },
31
+ 'as.data.frame': { mapper: mapDataFrameConvert, library: 'base', returnType: DataFrameType.DataFrame },
32
+ 'read.table': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
33
+ 'read.csv': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
34
+ 'read.csv2': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
35
+ 'read.delim': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
36
+ 'read.delim2': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
37
+ 'read_table': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
38
+ 'read_csv': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
39
+ 'read_csv2': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
40
+ 'read_tsv': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
41
+ 'read_delim': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
42
+ 'cbind': { mapper: mapDataFrameColBind, library: 'base', returnType: DataFrameType.DataFrame },
43
+ 'rbind': { mapper: mapDataFrameRowBind, library: 'base', returnType: DataFrameType.DataFrame },
44
+ 'head': { mapper: mapDataFrameHeadTail, library: 'utils', returnType: DataFrameType.DataFrame },
45
+ 'tail': { mapper: mapDataFrameHeadTail, library: 'utils', returnType: DataFrameType.DataFrame },
46
+ 'subset': { mapper: mapDataFrameSubset, library: 'base', returnType: DataFrameType.DataFrame },
47
+ 'filter': { mapper: mapDataFrameFilter, library: 'dplyr', returnType: DataFrameType.DataFrame },
48
+ 'select': { mapper: mapDataFrameSelect, library: 'dplyr', returnType: DataFrameType.DataFrame },
49
+ 'mutate': { mapper: mapDataFrameMutate, library: 'dplyr', returnType: DataFrameType.DataFrame },
50
+ 'transform': { mapper: mapDataFrameMutate, library: 'base', returnType: DataFrameType.DataFrame },
51
+ 'group_by': { mapper: mapDataFrameGroupBy, library: 'dplyr', returnType: DataFrameType.Tibble },
52
+ 'summarise': { mapper: mapDataFrameSummarize, library: 'dplyr', returnType: DataFrameType.DataFrame },
53
+ 'summarize': { mapper: mapDataFrameSummarize, library: 'dplyr', returnType: DataFrameType.DataFrame },
54
+ 'inner_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
55
+ 'left_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
56
+ 'right_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
57
+ 'full_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
58
+ 'merge': { mapper: mapDataFrameJoin, library: 'base', returnType: DataFrameType.DataFrame },
59
+ 'relocate': { mapper: mapDataFrameIdentity, library: 'dplyr', returnType: DataFrameType.DataFrame },
60
+ 'arrange': { mapper: mapDataFrameIdentity, library: 'dplyr', returnType: DataFrameType.DataFrame }
61
+ };
62
+ /**
63
+ * List of other data frame functions that are not explicitly supported but may return data frames.
64
+ */
65
+ const OtherDataFrameFunctions = [
66
+ {
67
+ type: 'entry_point',
68
+ names: ['anova', 'AIC', 'BIC'],
69
+ library: 'anova',
70
+ returnType: DataFrameType.DataFrame
71
+ }, {
72
+ type: 'entry_point',
73
+ names: ['Anova', 'Manova'],
74
+ library: 'car',
75
+ returnType: DataFrameType.DataFrame
76
+ }, {
77
+ type: 'entry_point',
78
+ names: ['lmer'],
79
+ library: 'lme4',
80
+ returnType: DataFrameType.DataFrame
81
+ }, {
82
+ type: 'entry_point',
83
+ names: ['data_frame', 'as_data_frame'],
84
+ library: 'dplyr',
85
+ returnType: DataFrameType.DataFrame
86
+ }, {
87
+ type: 'entry_point',
88
+ names: ['tbl', 'as.tbl'],
89
+ library: 'dplyr',
90
+ returnType: DataFrameType.Tibble
91
+ }, {
92
+ type: 'entry_point',
93
+ names: ['read_fwf', 'read_log'],
94
+ library: 'readr',
95
+ returnType: DataFrameType.Tibble
96
+ }, {
97
+ type: 'entry_point',
98
+ names: ['read_excel', 'read_xls', 'read_xlsx'],
99
+ library: 'readxl',
100
+ returnType: DataFrameType.Tibble
101
+ }, {
102
+ type: 'entry_point',
103
+ names: ['tibble', 'tibble_row', 'as_tibble', 'tribble'],
104
+ library: 'tibble',
105
+ returnType: DataFrameType.Tibble
106
+ }, {
107
+ type: 'entry_point',
108
+ names: ['data.table', 'as.data.table', 'fread'],
109
+ library: 'data.table',
110
+ returnType: DataFrameType.DataTable
111
+ }, {
112
+ type: 'transformation',
113
+ names: ['na.omit'],
114
+ library: 'stats',
115
+ returnType: DataFrameType.DataFrame,
116
+ dataFrame: { pos: 0, name: 'object' }
117
+ }, {
118
+ type: 'transformation',
119
+ names: ['unique', 't'],
120
+ library: 'base',
121
+ returnType: DataFrameType.DataFrame,
122
+ dataFrame: { pos: 0, name: 'x' }
123
+ }, {
124
+ type: 'transformation',
125
+ names: ['aggregate'],
126
+ library: 'stats',
127
+ returnType: DataFrameType.DataFrame,
128
+ dataFrame: { pos: 0, name: 'x' }
129
+ }, {
130
+ type: 'transformation',
131
+ names: ['with', 'within'],
132
+ library: 'base',
133
+ returnType: DataFrameType.DataFrame,
134
+ dataFrame: { pos: 0, name: 'data' }
135
+ }, {
136
+ type: 'transformation',
137
+ names: ['reshape'],
138
+ library: 'stats',
139
+ returnType: DataFrameType.DataFrame,
140
+ dataFrame: { pos: 0, name: 'data' }
141
+ }, {
142
+ type: 'transformation',
143
+ names: ['melt'],
144
+ library: 'reshape2',
145
+ returnType: DataFrameType.DataFrame,
146
+ dataFrame: { pos: 0, name: 'data' }
147
+ }, {
148
+ type: 'transformation',
149
+ names: [
150
+ 'transmute', 'distinct', 'distinct_prepare', 'group_by_prepare', 'rename', 'rename_with', 'reframe',
151
+ 'slice', 'slice_head', 'slice_tail', 'slice_min', 'slice_max', 'slice_sample'
152
+ ],
153
+ library: 'dplyr',
154
+ returnType: DataFrameType.DataFrame,
155
+ dataFrame: { pos: 0, name: '.data' }
156
+ }, {
157
+ type: 'transformation',
158
+ names: [
159
+ 'filter_if', 'filter_at', 'filter_all', 'select_if', 'select_at', 'select_all',
160
+ 'mutate_if', 'mutate_at', 'mutate_all', 'transmute_if', 'transmute_at', 'transmute_all',
161
+ 'distinct_if', 'distinct_at', 'distinct_all', 'group_by_if', 'group_by_at', 'group_by_all',
162
+ 'summarize_if', 'summarise_if', 'summarize_at', 'summarise_at', 'summarize_all', 'summarise_all',
163
+ 'arrange_if', 'arrange_at', 'arrange_all', 'rename_if', 'rename_at', 'rename_all'
164
+ ],
165
+ library: 'dplyr',
166
+ returnType: DataFrameType.Tibble,
167
+ dataFrame: { pos: 0, name: '.tbl' }
168
+ }, {
169
+ type: 'transformation',
170
+ names: [
171
+ 'semi_join', 'anti_join', 'nest_join', 'cross_join',
172
+ 'ungroup', 'count', 'tally', 'add_count', 'add_tally',
173
+ 'rows_insert', 'rows_append', 'rows_update', 'rows_patch', 'rows_upsert', 'rows_delete'
174
+ ],
175
+ library: 'dplyr',
176
+ returnType: DataFrameType.DataFrame,
177
+ dataFrame: { pos: 0, name: 'x' }
178
+ }, {
179
+ type: 'transformation',
180
+ names: ['bind_cols', 'bind_rows'],
181
+ library: 'dplyr',
182
+ returnType: DataFrameType.DataFrame
183
+ }, {
184
+ type: 'transformation',
185
+ names: [
186
+ 'drop_na', 'replace_na', 'pivot_longer', 'pivot_wider',
187
+ 'separate', 'separate_wider_position', 'separate_wider_delim', 'unite'
188
+ ],
189
+ library: 'tidyr',
190
+ returnType: DataFrameType.DataFrame,
191
+ dataFrame: { pos: 0, name: 'data' }
192
+ }, {
193
+ type: 'transformation',
194
+ names: ['add_column', 'add_row', 'add_case'],
195
+ library: 'tibble',
196
+ returnType: DataFrameType.Tibble,
197
+ dataFrame: { pos: 0, name: '.data' }
198
+ }, {
199
+ type: 'transformation',
200
+ names: ['melt', 'dcast'],
201
+ library: 'data.table',
202
+ returnType: DataFrameType.DataTable,
203
+ dataFrame: { pos: 0, name: 'data' }
204
+ }
205
+ ];
206
+ /**
207
+ * Mapper for defining the location of all relevant function parameters for each supported data frame function of {@link DataFrameFunctionMapper}.
208
+ */
209
+ const DataFrameFunctionParamsMapper = {
210
+ 'data.frame': {
211
+ checkNames: { pos: -1, name: 'check.names', default: true },
212
+ noDupNames: { pos: -1, name: 'check.names', default: true },
213
+ special: ['row.names', 'check.rows', 'check.names', 'fix.empty.names', 'stringsAsFactors'],
214
+ critical: [{ pos: -1, name: 'row.names' }]
215
+ },
216
+ 'as.data.frame': {
217
+ dataFrame: { pos: 0, name: 'x' },
218
+ critical: []
219
+ },
220
+ 'read.table': {
221
+ fileName: { pos: 0, name: 'file' },
222
+ header: { pos: 1, name: 'header', default: false },
223
+ separator: { pos: 2, name: 'sep', default: '\\s' },
224
+ quote: { pos: 3, name: 'quote', default: '"\'' },
225
+ skipLines: { pos: 12, name: 'skip', default: 0 },
226
+ checkNames: { pos: 13, name: 'check.names', default: true },
227
+ noDupNames: { pos: 13, name: 'check.names', default: true },
228
+ comment: { pos: 17, name: 'comment.char', default: '#' },
229
+ text: { pos: 23, name: 'text' },
230
+ critical: [
231
+ { pos: 6, name: 'row.names' },
232
+ { pos: 7, name: 'col.names' },
233
+ { pos: 11, name: 'nrows', default: -1 },
234
+ { pos: 15, name: 'strip.white', default: false },
235
+ { pos: 16, name: 'blank.lines.skip', default: true },
236
+ { pos: 18, name: 'allow.escapes', default: false },
237
+ ]
238
+ },
239
+ 'read.csv': {
240
+ fileName: { pos: 0, name: 'file' },
241
+ header: { pos: 1, name: 'header', default: true },
242
+ separator: { pos: 2, name: 'sep', default: ',' },
243
+ quote: { pos: 3, name: 'quote', default: '"' },
244
+ comment: { pos: 6, name: 'comment.char', default: '' },
245
+ skipLines: { pos: -1, name: 'skip', default: 0 },
246
+ checkNames: { pos: -1, name: 'check.names', default: true },
247
+ noDupNames: { pos: -1, name: 'check.names', default: true },
248
+ text: { pos: -1, name: 'text' },
249
+ critical: [
250
+ { pos: -1, name: 'row.names' },
251
+ { pos: -1, name: 'col.names' },
252
+ { pos: -1, name: 'nrows', default: -1 },
253
+ { pos: -1, name: 'strip.white', default: false },
254
+ { pos: -1, name: 'blank.lines.skip', default: true },
255
+ { pos: -1, name: 'allow.escapes', default: false },
256
+ ]
257
+ },
258
+ 'read.csv2': {
259
+ fileName: { pos: 0, name: 'file' },
260
+ header: { pos: 1, name: 'header', default: true },
261
+ separator: { pos: 2, name: 'sep', default: ';' },
262
+ quote: { pos: 3, name: 'quote', default: '"' },
263
+ comment: { pos: 6, name: 'comment.char', default: '' },
264
+ skipLines: { pos: -1, name: 'skip', default: 0 },
265
+ checkNames: { pos: -1, name: 'check.names', default: true },
266
+ noDupNames: { pos: -1, name: 'check.names', default: true },
267
+ text: { pos: -1, name: 'text' },
268
+ critical: [
269
+ { pos: -1, name: 'row.names' },
270
+ { pos: -1, name: 'col.names' },
271
+ { pos: -1, name: 'nrows', default: -1 },
272
+ { pos: -1, name: 'strip.white', default: false },
273
+ { pos: -1, name: 'blank.lines.skip', default: true },
274
+ { pos: -1, name: 'allow.escapes', default: false },
275
+ ]
276
+ },
277
+ 'read.delim': {
278
+ fileName: { pos: 0, name: 'file' },
279
+ header: { pos: 1, name: 'header', default: true },
280
+ separator: { pos: 2, name: 'sep', default: '\\t' },
281
+ quote: { pos: 3, name: 'quote', default: '"' },
282
+ comment: { pos: 6, name: 'comment.char', default: '' },
283
+ skipLines: { pos: -1, name: 'skip', default: 0 },
284
+ checkNames: { pos: -1, name: 'check.names', default: true },
285
+ noDupNames: { pos: -1, name: 'check.names', default: true },
286
+ text: { pos: -1, name: 'text' },
287
+ critical: [
288
+ { pos: -1, name: 'row.names' },
289
+ { pos: -1, name: 'col.names' },
290
+ { pos: -1, name: 'nrows', default: -1 },
291
+ { pos: -1, name: 'strip.white', default: false },
292
+ { pos: -1, name: 'blank.lines.skip', default: true },
293
+ { pos: -1, name: 'allow.escapes', default: false },
294
+ ]
295
+ },
296
+ 'read.delim2': {
297
+ fileName: { pos: 0, name: 'file' },
298
+ header: { pos: 1, name: 'header', default: true },
299
+ separator: { pos: 2, name: 'sep', default: '\\t' },
300
+ quote: { pos: 3, name: 'quote', default: '"' },
301
+ comment: { pos: 6, name: 'comment.char', default: '' },
302
+ skipLines: { pos: -1, name: 'skip', default: 0 },
303
+ checkNames: { pos: -1, name: 'check.names', default: true },
304
+ noDupNames: { pos: -1, name: 'check.names', default: true },
305
+ text: { pos: -1, name: 'text' },
306
+ critical: [
307
+ { pos: -1, name: 'row.names' },
308
+ { pos: -1, name: 'col.names' },
309
+ { pos: -1, name: 'nrows', default: -1 },
310
+ { pos: -1, name: 'strip.white', default: false },
311
+ { pos: -1, name: 'blank.lines.skip', default: true },
312
+ { pos: -1, name: 'allow.escapes', default: false },
313
+ ]
314
+ },
315
+ 'read_table': {
316
+ fileName: { pos: 0, name: 'file' },
317
+ header: { pos: 1, name: 'col_names', default: true },
318
+ separator: { pos: -1, default: '\\s' },
319
+ quote: { pos: -1, default: '"' },
320
+ skipLines: { pos: 5, name: 'skip', default: 0 },
321
+ comment: { pos: 9, name: 'comment', default: '' },
322
+ checkNames: { pos: -1, default: false },
323
+ noDupNames: { pos: -1, default: true },
324
+ critical: [
325
+ { pos: 6, name: 'n_max', default: Infinity },
326
+ { pos: 11, name: 'skip_empty_rows', default: true }
327
+ ],
328
+ noEmptyNames: true
329
+ },
330
+ 'read_csv': {
331
+ fileName: { pos: 0, name: 'file' },
332
+ header: { pos: 1, name: 'col_names', default: true },
333
+ separator: { pos: -1, default: ',' },
334
+ quote: { pos: 8, name: 'quote', default: '"' },
335
+ comment: { pos: 9, name: 'comment', default: '' },
336
+ skipLines: { pos: 11, name: 'skip', default: 0 },
337
+ checkNames: { pos: -1, default: false },
338
+ noDupNames: { pos: -1, default: true },
339
+ critical: [
340
+ { pos: 3, name: 'col_select' },
341
+ { pos: 4, name: 'id' },
342
+ { pos: 10, name: 'trim_ws', default: true },
343
+ { pos: 12, name: 'n_max', default: Infinity },
344
+ { pos: 14, name: 'name_repair', default: 'unique' },
345
+ { pos: 18, name: 'skip_empty_rows', default: true }
346
+ ],
347
+ noEmptyNames: true
348
+ },
349
+ 'read_csv2': {
350
+ fileName: { pos: 0, name: 'file' },
351
+ header: { pos: 1, name: 'col_names', default: true },
352
+ separator: { pos: -1, default: ';' },
353
+ quote: { pos: 8, name: 'quote', default: '"' },
354
+ comment: { pos: 9, name: 'comment', default: '' },
355
+ skipLines: { pos: 11, name: 'skip', default: 0 },
356
+ checkNames: { pos: -1, default: false },
357
+ noDupNames: { pos: -1, default: true },
358
+ critical: [
359
+ { pos: 3, name: 'col_select' },
360
+ { pos: 4, name: 'id' },
361
+ { pos: 10, name: 'trim_ws', default: true },
362
+ { pos: 12, name: 'n_max', default: Infinity },
363
+ { pos: 14, name: 'name_repair', default: 'unique' },
364
+ { pos: 18, name: 'skip_empty_rows', default: true }
365
+ ],
366
+ noEmptyNames: true
367
+ },
368
+ 'read_tsv': {
369
+ fileName: { pos: 0, name: 'file' },
370
+ header: { pos: 1, name: 'col_names', default: true },
371
+ separator: { pos: -1, default: '\\t' },
372
+ quote: { pos: 8, name: 'quote', default: '"' },
373
+ comment: { pos: 9, name: 'comment', default: '' },
374
+ skipLines: { pos: 11, name: 'skip', default: 0 },
375
+ checkNames: { pos: -1, default: false },
376
+ noDupNames: { pos: -1, default: true },
377
+ critical: [
378
+ { pos: 3, name: 'col_select' },
379
+ { pos: 4, name: 'id' },
380
+ { pos: 10, name: 'trim_ws', default: true },
381
+ { pos: 12, name: 'n_max', default: Infinity },
382
+ { pos: 14, name: 'name_repair', default: 'unique' },
383
+ { pos: 18, name: 'skip_empty_rows', default: true }
384
+ ],
385
+ noEmptyNames: true
386
+ },
387
+ 'read_delim': {
388
+ fileName: { pos: 0, name: 'file' },
389
+ separator: { pos: 1, name: 'delim', default: '\t' },
390
+ quote: { pos: 2, name: 'quote', default: '"' },
391
+ header: { pos: 5, name: 'col_names', default: true },
392
+ comment: { pos: 12, name: 'comment', default: '' },
393
+ skipLines: { pos: 14, name: 'skip', default: 0 },
394
+ checkNames: { pos: -1, default: false },
395
+ noDupNames: { pos: -1, default: true },
396
+ critical: [
397
+ { pos: 3, name: 'escape_backslash', default: false },
398
+ { pos: 4, name: 'escape_double', default: true },
399
+ { pos: 7, name: 'col_select' },
400
+ { pos: 8, name: 'id' },
401
+ { pos: 13, name: 'trim_ws', default: false },
402
+ { pos: 15, name: 'n_max', default: Infinity },
403
+ { pos: 17, name: 'name_repair', default: 'unique' },
404
+ { pos: 21, name: 'skip_empty_rows', default: true }
405
+ ],
406
+ noEmptyNames: true
407
+ },
408
+ 'cbind': {
409
+ special: ['deparse.level', 'make.row.names', 'stringsAsFactors', 'factor.exclude']
410
+ },
411
+ 'rbind': {
412
+ special: ['deparse.level', 'make.row.names', 'stringsAsFactors', 'factor.exclude']
413
+ },
414
+ 'head': {
415
+ dataFrame: { pos: 0, name: 'x' },
416
+ amount: { pos: 1, name: 'n', default: 6 }
417
+ },
418
+ 'tail': {
419
+ dataFrame: { pos: 0, name: 'x' },
420
+ amount: { pos: 1, name: 'n', default: 6 }
421
+ },
422
+ 'subset': {
423
+ dataFrame: { pos: 0, name: 'x' },
424
+ subset: { pos: 1, name: 'subset' },
425
+ select: { pos: 2, name: 'select' },
426
+ drop: { pos: 3, name: 'drop', default: false }
427
+ },
428
+ 'filter': {
429
+ dataFrame: { pos: 0, name: '.data' },
430
+ special: ['.by', '.preserve']
431
+ },
432
+ 'select': {
433
+ dataFrame: { pos: 0, name: '.data' },
434
+ special: []
435
+ },
436
+ 'mutate': {
437
+ dataFrame: { pos: 0, name: '.data' },
438
+ special: ['.by', '.keep', '.before', '.after'],
439
+ critical: [{ pos: -1, name: '.keep' }],
440
+ checkNames: false,
441
+ noDupNames: false
442
+ },
443
+ 'transform': {
444
+ dataFrame: { pos: 0, name: '_data' },
445
+ special: [],
446
+ checkNames: true,
447
+ noDupNames: true
448
+ },
449
+ 'group_by': {
450
+ dataFrame: { pos: 0, name: '.data' },
451
+ by: { pos: 1 },
452
+ special: ['.add', '.drop']
453
+ },
454
+ 'summarise': {
455
+ dataFrame: { pos: 0, name: '.data' },
456
+ special: ['.by', '.groups']
457
+ },
458
+ 'summarize': {
459
+ dataFrame: { pos: 0, name: '.data' },
460
+ special: ['.by', '.groups']
461
+ },
462
+ 'inner_join': {
463
+ dataFrame: { pos: 0, name: 'x' },
464
+ otherDataFrame: { pos: 1, name: 'y' },
465
+ by: { pos: 2, name: 'by' },
466
+ joinAll: { pos: -1, default: false },
467
+ joinLeft: { pos: -1, default: false },
468
+ joinRight: { pos: -1, default: false },
469
+ critical: [{ pos: -1, name: 'keep' }]
470
+ },
471
+ 'left_join': {
472
+ dataFrame: { pos: 0, name: 'x' },
473
+ otherDataFrame: { pos: 1, name: 'y' },
474
+ by: { pos: 2, name: 'by' },
475
+ joinAll: { pos: -1, default: false },
476
+ joinLeft: { pos: -1, default: true },
477
+ joinRight: { pos: -1, default: false },
478
+ critical: [{ pos: -1, name: 'keep' }]
479
+ },
480
+ 'right_join': {
481
+ dataFrame: { pos: 0, name: 'x' },
482
+ otherDataFrame: { pos: 1, name: 'y' },
483
+ by: { pos: 2, name: 'by' },
484
+ joinAll: { pos: -1, default: false },
485
+ joinLeft: { pos: -1, default: false },
486
+ joinRight: { pos: -1, default: true },
487
+ critical: [{ pos: -1, name: 'keep' }]
488
+ },
489
+ 'full_join': {
490
+ dataFrame: { pos: 0, name: 'x' },
491
+ otherDataFrame: { pos: 1, name: 'y' },
492
+ by: { pos: 2, name: 'by' },
493
+ joinAll: { pos: -1, default: true },
494
+ joinLeft: { pos: -1, default: false },
495
+ joinRight: { pos: -1, default: false },
496
+ critical: [{ pos: -1, name: 'keep' }]
497
+ },
498
+ 'merge': {
499
+ dataFrame: { pos: 0, name: 'x' },
500
+ otherDataFrame: { pos: 1, name: 'y' },
501
+ by: { pos: 2, name: 'by' },
502
+ joinAll: { pos: 5, name: 'all', default: false },
503
+ joinLeft: { pos: 6, name: 'all.x', default: false },
504
+ joinRight: { pos: 7, name: 'all.y', default: false },
505
+ critical: [
506
+ { pos: 3, name: 'by.x' },
507
+ { pos: 4, name: 'by.y' }
508
+ ]
509
+ },
510
+ 'relocate': {
511
+ dataFrame: { pos: 0, name: '.data' },
512
+ special: ['.before', '.after'],
513
+ disallowNamedArgs: true
514
+ },
515
+ 'arrange': {
516
+ dataFrame: { pos: 0, name: '.data' },
517
+ special: ['.by_group', '.locale']
518
+ }
519
+ };
520
+ /**
521
+ * Maps a concrete data frame function call to abstract data frame operations.
522
+ *
523
+ * @param node - The R node of the function call
524
+ * @param dfg - The data flow graph for resolving the arguments
525
+ * @param config - The flowR configuration to use
526
+ * @returns Data frame expression info containing the mapped abstract data frame operations, or `undefined` if the node does not represent a data frame function call
527
+ */
528
+ function mapDataFrameFunctionCall(node, dfg, config) {
529
+ if (node.type !== type_1.RType.FunctionCall || !node.named) {
530
+ return;
531
+ }
532
+ const resolveInfo = { graph: dfg, idMap: dfg.idMap, full: true, resolve: config_1.VariableResolve.Alias };
533
+ let operations;
534
+ if (isDataFrameFunction(node.functionName.content)) {
535
+ const functionName = node.functionName.content;
536
+ const mapper = DataFrameFunctionMapper[functionName].mapper;
537
+ const params = DataFrameFunctionParamsMapper[functionName];
538
+ const args = (0, arguments_1.getFunctionArguments)(node, dfg);
539
+ if ((0, arguments_1.hasCriticalArgument)(args, params.critical, resolveInfo)) {
540
+ operations = [{ operation: 'unknown', operand: undefined }];
541
+ }
542
+ else {
543
+ operations = mapper(args, params, resolveInfo, config);
544
+ }
545
+ }
546
+ else {
547
+ const mapping = getOtherDataFrameFunction(node.functionName.content);
548
+ if (mapping === undefined) {
549
+ return;
550
+ }
551
+ else if (mapping.type === 'entry_point') {
552
+ operations = [{ operation: 'unknown', operand: undefined }];
553
+ }
554
+ else if (mapping.type === 'transformation' || mapping.type === 'modification') {
555
+ const args = (0, arguments_1.getFunctionArguments)(node, dfg);
556
+ operations = mapDataFrameUnknown(args, mapping, resolveInfo);
557
+ }
558
+ else {
559
+ (0, assert_1.assertUnreachable)(mapping);
560
+ }
561
+ }
562
+ if (operations !== undefined) {
563
+ return { type: 'expression', operations };
564
+ }
565
+ }
566
+ function isDataFrameFunction(functionName) {
567
+ // a check with `functionName in DataFrameFunctionMapper` would return true for "toString"
568
+ return Object.prototype.hasOwnProperty.call(DataFrameFunctionMapper, functionName);
569
+ }
570
+ function getOtherDataFrameFunction(functionName) {
571
+ return OtherDataFrameFunctions.find(entry => entry.names.includes(functionName));
572
+ }
573
+ function mapDataFrameCreate(args, params, info) {
574
+ const checkNames = (0, arguments_1.getArgumentValue)(args, params.checkNames, info);
575
+ const noDupNames = (0, arguments_1.getArgumentValue)(args, params.noDupNames, info);
576
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
577
+ const argNames = args.map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
578
+ const argLengths = args.map(arg => (0, resolve_args_1.resolveIdToArgVectorLength)(arg, info));
579
+ const allVectors = argLengths.every(assert_1.isNotUndefined);
580
+ const rows = allVectors ? Math.max(...argLengths, 0) : undefined;
581
+ let colnames = argNames;
582
+ // over-approximate the column names if arguments are present but cannot be resolved to values
583
+ if (!allVectors || typeof checkNames !== 'boolean' || typeof noDupNames !== 'boolean') {
584
+ colnames = undefined;
585
+ }
586
+ else if (rows === 0) {
587
+ colnames = [];
588
+ }
589
+ else {
590
+ colnames = (0, arguments_1.filterValidNames)(colnames, checkNames, noDupNames);
591
+ }
592
+ return [{
593
+ operation: 'create',
594
+ operand: undefined,
595
+ colnames,
596
+ rows
597
+ }];
598
+ }
599
+ function mapDataFrameConvert(args, params, info) {
600
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
601
+ if (dataFrame === r_function_call_1.EmptyArgument || dataFrame?.value === undefined) {
602
+ return [{ operation: 'unknown', operand: undefined }];
603
+ }
604
+ return [{
605
+ operation: 'identity',
606
+ operand: dataFrame.value.info.id
607
+ }];
608
+ }
609
+ function mapDataFrameRead(args, params, info, config) {
610
+ const fileNameArg = (0, arguments_1.getFunctionArgument)(args, params.fileName, info);
611
+ const textArg = params.text ? (0, arguments_1.getFunctionArgument)(args, params.text, info) : undefined;
612
+ const { source, request } = getRequestFromRead(fileNameArg, textArg, params, info, config);
613
+ const header = (0, arguments_1.getArgumentValue)(args, params.header, info);
614
+ const separator = (0, arguments_1.getArgumentValue)(args, params.separator, info);
615
+ const quote = (0, arguments_1.getArgumentValue)(args, params.quote, info);
616
+ const comment = (0, arguments_1.getArgumentValue)(args, params.comment, info);
617
+ const skipLines = (0, arguments_1.getArgumentValue)(args, params.skipLines, info);
618
+ const checkNames = (0, arguments_1.getArgumentValue)(args, params.checkNames, info);
619
+ const noDupNames = (0, arguments_1.getArgumentValue)(args, params.noDupNames, info);
620
+ const validArguments = typeof header === 'boolean' && typeof separator === 'string' && typeof quote === 'string' && typeof comment === 'string' &&
621
+ typeof skipLines === 'number' && typeof checkNames === 'boolean' && typeof noDupNames === 'boolean';
622
+ if (request === undefined || !config.abstractInterpretation.dataFrame.readLoadedData.readExternalFiles || !validArguments) {
623
+ return [{
624
+ operation: 'read',
625
+ operand: undefined,
626
+ source,
627
+ colnames: undefined,
628
+ rows: undefined
629
+ }];
630
+ }
631
+ const LineCommentRegex = new RegExp(`\\s*[${(0, arguments_1.escapeRegExp)(comment, true)}].*`);
632
+ let firstLine = undefined;
633
+ let firstLineNumber = 0;
634
+ let rowCount = 0;
635
+ const parseLine = (line, lineNumber) => {
636
+ const text = comment ? line.toString().replace(LineCommentRegex, '') : line.toString();
637
+ if (text.length > 0 && lineNumber >= (skipLines ?? 0)) {
638
+ if (firstLine === undefined) {
639
+ firstLine = getEntriesFromCsvLine(text, separator, quote, comment);
640
+ firstLineNumber = lineNumber;
641
+ }
642
+ if (!header || lineNumber > firstLineNumber) {
643
+ rowCount++;
644
+ }
645
+ }
646
+ };
647
+ const allLines = parseRequestContent(request, parseLine, config.abstractInterpretation.dataFrame.readLoadedData.maxReadLines);
648
+ let colnames;
649
+ if (header) {
650
+ colnames = (0, arguments_1.filterValidNames)(firstLine, checkNames, noDupNames, params.noEmptyNames);
651
+ }
652
+ else if (firstLine !== undefined) {
653
+ colnames = Array(firstLine.length).fill(undefined);
654
+ }
655
+ return [{
656
+ operation: 'read',
657
+ operand: undefined,
658
+ source,
659
+ colnames,
660
+ rows: allLines ? rowCount : [rowCount, Infinity]
661
+ }];
662
+ }
663
+ function mapDataFrameColBind(args, params, info) {
664
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
665
+ const dataFrame = args.find(arg => (0, arguments_1.isDataFrameArgument)(arg, info));
666
+ if (dataFrame === undefined) {
667
+ return;
668
+ }
669
+ else if (args.length === 1) {
670
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
671
+ }
672
+ const result = [];
673
+ let operand = dataFrame.value;
674
+ let colnames = [];
675
+ for (const arg of args) {
676
+ if (arg !== dataFrame && arg !== r_function_call_1.EmptyArgument) {
677
+ const otherDataFrame = (0, shape_inference_1.resolveIdToDataFrameShape)(arg.value, info.graph);
678
+ if (otherDataFrame !== undefined) {
679
+ result.push({
680
+ operation: 'concatCols',
681
+ operand: operand?.info.id,
682
+ other: otherDataFrame
683
+ });
684
+ operand = undefined;
685
+ // added columns are top if argument cannot be resolved to constant (vector-like) value
686
+ }
687
+ else if ((0, resolve_args_1.resolveIdToArgValue)(arg, info) !== undefined) {
688
+ const colname = (0, resolve_args_1.resolveIdToArgName)(arg, info);
689
+ colnames?.push(colname);
690
+ }
691
+ else {
692
+ colnames = undefined;
693
+ }
694
+ }
695
+ }
696
+ if (colnames === undefined || colnames.length > 0) {
697
+ result.push({
698
+ operation: 'addCols',
699
+ operand: operand?.info.id,
700
+ colnames
701
+ });
702
+ }
703
+ return result;
704
+ }
705
+ function mapDataFrameRowBind(args, params, info) {
706
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
707
+ const dataFrame = args.find(arg => (0, arguments_1.isDataFrameArgument)(arg, info));
708
+ if (dataFrame === undefined) {
709
+ return;
710
+ }
711
+ else if (args.length === 1) {
712
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
713
+ }
714
+ const result = [];
715
+ let operand = dataFrame.value;
716
+ let rows = 0;
717
+ for (const arg of args) {
718
+ if (arg !== dataFrame && arg !== r_function_call_1.EmptyArgument) {
719
+ const otherDataFrame = (0, shape_inference_1.resolveIdToDataFrameShape)(arg.value, info.graph);
720
+ if (otherDataFrame !== undefined) {
721
+ result.push({
722
+ operation: 'concatRows',
723
+ operand: operand?.info.id,
724
+ other: otherDataFrame
725
+ });
726
+ operand = undefined;
727
+ // number of added rows is top if arguments cannot be resolved to constant (vector-like) value
728
+ }
729
+ else if ((0, resolve_args_1.resolveIdToArgValue)(arg, info) !== undefined) {
730
+ rows = rows !== undefined ? rows + 1 : undefined;
731
+ }
732
+ else {
733
+ rows = undefined;
734
+ }
735
+ }
736
+ }
737
+ if (rows === undefined || rows > 0) {
738
+ result.push({
739
+ operation: 'addRows',
740
+ operand: operand?.info.id,
741
+ rows
742
+ });
743
+ }
744
+ return result;
745
+ }
746
+ function mapDataFrameHeadTail(args, params, info) {
747
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
748
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
749
+ return;
750
+ }
751
+ const result = [];
752
+ const amount = (0, arguments_1.getArgumentValue)(args, params.amount, info);
753
+ let rows = undefined;
754
+ let cols = undefined;
755
+ if (typeof amount === 'number') {
756
+ rows = amount;
757
+ }
758
+ else if (Array.isArray(amount) && amount.length <= 2 && amount.every(value => typeof value === 'number')) {
759
+ rows = amount[0];
760
+ cols = amount[1];
761
+ }
762
+ result.push({
763
+ operation: rows === undefined || rows >= 0 ? 'subsetRows' : 'removeRows',
764
+ operand: dataFrame.value.info.id,
765
+ rows: rows !== undefined ? Math.abs(rows) : undefined
766
+ });
767
+ if (cols !== undefined) {
768
+ result.push({
769
+ operation: cols >= 0 ? 'subsetCols' : 'removeCols',
770
+ operand: undefined,
771
+ colnames: Array(Math.abs(cols)).fill(undefined)
772
+ });
773
+ }
774
+ return result;
775
+ }
776
+ function mapDataFrameSubset(args, params, info) {
777
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
778
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
779
+ return;
780
+ }
781
+ else if (args.length === 1) {
782
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
783
+ }
784
+ const result = [];
785
+ let operand = dataFrame.value;
786
+ const filterArg = (0, arguments_1.getFunctionArgument)(args, params.subset, info);
787
+ const filterValue = (0, resolve_args_1.resolveIdToArgValue)(filterArg, info);
788
+ const selectArg = (0, arguments_1.getFunctionArgument)(args, params.select, info);
789
+ const dropArg = (0, arguments_1.getFunctionArgument)(args, params.drop, info);
790
+ const condition = typeof filterValue === 'boolean' ? filterValue : undefined;
791
+ const filterNames = (0, arguments_1.getUnresolvedSymbolsInExpression)(filterArg, info.graph);
792
+ const { selectedCols, unselectedCols } = getSelectedColumns([selectArg], info);
793
+ const accessedCols = [...filterNames, ...selectedCols ?? [], ...unselectedCols ?? []];
794
+ const mixedAccess = accessedCols.some(col => typeof col === 'string') && accessedCols.some(col => typeof col === 'number');
795
+ const duplicateCols = accessedCols.some((col, index, list) => col !== undefined && list.indexOf(col) !== index);
796
+ if (accessedCols.some(col => typeof col === 'string')) {
797
+ result.push({
798
+ operation: 'accessCols',
799
+ operand: operand?.info.id,
800
+ columns: accessedCols.filter(col => typeof col === 'string')
801
+ });
802
+ }
803
+ if (accessedCols.some(col => typeof col === 'number')) {
804
+ result.push({
805
+ operation: 'accessCols',
806
+ operand: operand?.info.id,
807
+ columns: accessedCols.filter(col => typeof col === 'number').map(Math.abs)
808
+ });
809
+ }
810
+ if (filterArg !== undefined && filterArg !== r_function_call_1.EmptyArgument) {
811
+ result.push({
812
+ operation: 'filterRows',
813
+ operand: operand?.info.id,
814
+ condition: condition
815
+ });
816
+ operand = undefined;
817
+ }
818
+ if (!dropArg || accessedCols.length > 1) {
819
+ if (unselectedCols === undefined || unselectedCols.length > 0) {
820
+ result.push({
821
+ operation: 'removeCols',
822
+ operand: operand?.info.id,
823
+ colnames: unselectedCols?.map(col => typeof col === 'string' ? col : undefined)
824
+ });
825
+ operand = undefined;
826
+ }
827
+ if (selectedCols === undefined || selectedCols.length > 0) {
828
+ result.push({
829
+ operation: 'subsetCols',
830
+ operand: operand?.info.id,
831
+ colnames: selectedCols?.map(col => typeof col === 'string' ? col : undefined),
832
+ ...(duplicateCols || mixedAccess ? { options: { duplicateCols: true } } : {})
833
+ });
834
+ operand = undefined;
835
+ }
836
+ }
837
+ return result;
838
+ }
839
+ function mapDataFrameFilter(args, params, info) {
840
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
841
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
842
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
843
+ return;
844
+ }
845
+ else if (args.length === 1) {
846
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
847
+ }
848
+ const result = [];
849
+ const filterArgs = args.filter(arg => arg !== dataFrame);
850
+ const filterValues = filterArgs.map(arg => (0, resolve_args_1.resolveIdToArgValue)(arg, info));
851
+ const accessedNames = filterArgs.flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph));
852
+ const condition = filterValues.every(value => typeof value === 'boolean') ? filterValues.every(cond => cond) : undefined;
853
+ if (accessedNames.length > 0) {
854
+ result.push({
855
+ operation: 'accessCols',
856
+ operand: dataFrame.value.info.id,
857
+ columns: accessedNames
858
+ });
859
+ }
860
+ result.push({
861
+ operation: 'filterRows',
862
+ operand: dataFrame.value.info.id,
863
+ condition: condition
864
+ });
865
+ return result;
866
+ }
867
+ function mapDataFrameSelect(args, params, info) {
868
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
869
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
870
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
871
+ return;
872
+ }
873
+ const result = [];
874
+ let operand = dataFrame.value;
875
+ const selectArgs = args.filter(arg => arg !== dataFrame);
876
+ let { selectedCols, unselectedCols } = getSelectedColumns(selectArgs, info);
877
+ const accessedCols = [...selectedCols ?? [], ...unselectedCols ?? []];
878
+ const mixedAccess = accessedCols.some(col => typeof col === 'string') && accessedCols.some(col => typeof col === 'number');
879
+ const duplicateAccess = accessedCols.some((col, _, list) => col !== undefined && list.filter(other => other === col).length > 1);
880
+ const renamedCols = selectArgs.some(arguments_1.isNamedArgument);
881
+ // map to top if columns are selected mixed by string and number, or are selected duplicate
882
+ if (mixedAccess || duplicateAccess) {
883
+ selectedCols = undefined;
884
+ unselectedCols = [];
885
+ }
886
+ if (accessedCols.some(col => typeof col === 'string')) {
887
+ result.push({
888
+ operation: 'accessCols',
889
+ operand: operand?.info.id,
890
+ columns: accessedCols.filter(col => typeof col === 'string')
891
+ });
892
+ }
893
+ if (accessedCols.some(col => typeof col === 'number')) {
894
+ result.push({
895
+ operation: 'accessCols',
896
+ operand: operand?.info.id,
897
+ columns: accessedCols.filter(col => typeof col === 'number').map(Math.abs)
898
+ });
899
+ }
900
+ if (unselectedCols === undefined || unselectedCols.length > 0) {
901
+ result.push({
902
+ operation: 'removeCols',
903
+ operand: operand?.info.id,
904
+ colnames: unselectedCols?.map(col => typeof col === 'string' ? col : undefined)
905
+ });
906
+ operand = undefined;
907
+ }
908
+ if (selectedCols === undefined || selectedCols.length > 0 || unselectedCols?.length === 0) {
909
+ result.push({
910
+ operation: 'subsetCols',
911
+ operand: operand?.info.id,
912
+ colnames: selectedCols?.map(col => typeof col === 'string' ? col : undefined),
913
+ ...(renamedCols ? { options: { renamedCols: true } } : {})
914
+ });
915
+ operand = undefined;
916
+ }
917
+ return result;
918
+ }
919
+ function mapDataFrameMutate(args, params, info) {
920
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
921
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
922
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
923
+ return;
924
+ }
925
+ else if (args.length === 1) {
926
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
927
+ }
928
+ const result = [];
929
+ let operand = dataFrame.value;
930
+ const mutateArgs = args.filter(arg => arg !== dataFrame);
931
+ let deletedCols = mutateArgs
932
+ .filter(arguments_1.isRNull)
933
+ .map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
934
+ let mutatedCols = mutateArgs
935
+ .filter(arg => !(0, arguments_1.isRNull)(arg))
936
+ .map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
937
+ // only column names that are not created by mutation are preconditions on the operand
938
+ const accessedNames = mutateArgs
939
+ .flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph))
940
+ .filter(arg => !mutatedCols?.includes(arg));
941
+ deletedCols = (0, arguments_1.filterValidNames)(deletedCols, params.checkNames, params.noDupNames);
942
+ mutatedCols = (0, arguments_1.filterValidNames)(mutatedCols, params.checkNames, params.noDupNames);
943
+ if (accessedNames.length > 0) {
944
+ result.push({
945
+ operation: 'accessCols',
946
+ operand: operand?.info.id,
947
+ columns: accessedNames
948
+ });
949
+ }
950
+ if (deletedCols === undefined || deletedCols.length > 0) {
951
+ result.push({
952
+ operation: 'removeCols',
953
+ operand: operand?.info.id,
954
+ colnames: deletedCols,
955
+ options: { maybe: true }
956
+ });
957
+ operand = undefined;
958
+ }
959
+ if (mutatedCols === undefined || mutatedCols.length > 0 || deletedCols?.length === 0) {
960
+ result.push({
961
+ operation: 'mutateCols',
962
+ operand: operand?.info.id,
963
+ colnames: mutatedCols
964
+ });
965
+ operand = undefined;
966
+ }
967
+ return result;
968
+ }
969
+ function mapDataFrameGroupBy(args, params, info) {
970
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
971
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
972
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
973
+ return;
974
+ }
975
+ else if (args.length === 1) {
976
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
977
+ }
978
+ const result = [];
979
+ const byArgs = args.filter(arg => arg !== dataFrame);
980
+ const accessedNames = byArgs.flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph));
981
+ const byNames = byArgs.map(arg => (0, arguments_1.isNamedArgument)(arg) ? (0, resolve_args_1.resolveIdToArgName)(arg, info) : (0, resolve_args_1.resolveIdToArgValueSymbolName)(arg, info));
982
+ const mutatedCols = byArgs.some(arguments_1.isNamedArgument) || byNames.some(assert_1.isUndefined);
983
+ if (accessedNames.length > 0) {
984
+ result.push({
985
+ operation: 'accessCols',
986
+ operand: dataFrame.value.info.id,
987
+ columns: accessedNames
988
+ });
989
+ }
990
+ result.push({
991
+ operation: 'groupBy',
992
+ operand: dataFrame.value.info.id,
993
+ by: byNames,
994
+ ...(mutatedCols ? { options: { mutatedCols: true } } : {})
995
+ });
996
+ return result;
997
+ }
998
+ function mapDataFrameSummarize(args, params, info) {
999
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
1000
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
1001
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
1002
+ return;
1003
+ }
1004
+ const result = [];
1005
+ const summarizeArgs = args.filter(arg => arg !== dataFrame);
1006
+ const accessedNames = summarizeArgs.flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph));
1007
+ const summarizedCols = summarizeArgs.map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
1008
+ if (accessedNames.length > 0) {
1009
+ result.push({
1010
+ operation: 'accessCols',
1011
+ operand: dataFrame.value.info.id,
1012
+ columns: accessedNames
1013
+ });
1014
+ }
1015
+ result.push({
1016
+ operation: 'summarize',
1017
+ operand: dataFrame.value.info.id,
1018
+ colnames: summarizedCols
1019
+ });
1020
+ return result;
1021
+ }
1022
+ function mapDataFrameJoin(args, params, info) {
1023
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
1024
+ const joinAll = (0, arguments_1.getArgumentValue)(args, params.joinAll, info);
1025
+ const joinLeft = (0, arguments_1.getArgumentValue)(args, params.joinLeft, info);
1026
+ const joinRight = (0, arguments_1.getArgumentValue)(args, params.joinRight, info);
1027
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
1028
+ return;
1029
+ }
1030
+ else if (args.length === 1) {
1031
+ return [{ operation: 'identity', operand: dataFrame.value.info.id }];
1032
+ }
1033
+ else if (typeof joinAll !== 'boolean' || typeof joinLeft !== 'boolean' || typeof joinRight !== 'boolean') {
1034
+ return [{ operation: 'unknown', operand: dataFrame.value.info.id }];
1035
+ }
1036
+ const result = [];
1037
+ const otherArg = (0, arguments_1.getFunctionArgument)(args, params.otherDataFrame, info);
1038
+ const byArg = (0, arguments_1.getFunctionArgument)(args, params.by, info);
1039
+ const otherDataFrame = (0, shape_inference_1.resolveIdToDataFrameShape)(otherArg, info.graph) ?? domain_1.DataFrameTop;
1040
+ let byCols;
1041
+ const joinType = getJoinType(joinAll, joinLeft, joinRight);
1042
+ if (byArg !== undefined) {
1043
+ const byValue = (0, resolve_args_1.resolveIdToArgValue)(byArg, info);
1044
+ if (typeof byValue === 'string' || typeof byValue === 'number') {
1045
+ byCols = [byValue];
1046
+ }
1047
+ else if (Array.isArray(byValue) && (byValue.every(by => typeof by === 'string') || byValue.every(by => typeof by === 'number'))) {
1048
+ byCols = byValue;
1049
+ }
1050
+ }
1051
+ if (byCols?.some(by => typeof by === 'string')) {
1052
+ result.push({
1053
+ operation: 'accessCols',
1054
+ operand: dataFrame.value.info.id,
1055
+ columns: byCols.filter(by => typeof by === 'string')
1056
+ });
1057
+ }
1058
+ if (byCols?.some(by => typeof by === 'number')) {
1059
+ result.push({
1060
+ operation: 'accessCols',
1061
+ operand: dataFrame.value.info.id,
1062
+ columns: byCols.filter(by => typeof by === 'number')
1063
+ });
1064
+ }
1065
+ result.push({
1066
+ operation: 'join',
1067
+ operand: dataFrame.value.info.id,
1068
+ other: otherDataFrame,
1069
+ by: byCols?.map(by => typeof by === 'string' ? by : undefined),
1070
+ options: { join: joinType, natural: byArg === undefined }
1071
+ });
1072
+ return result;
1073
+ }
1074
+ function mapDataFrameIdentity(args, params, info) {
1075
+ args = (0, arguments_1.getEffectiveArgs)(args, params.special);
1076
+ const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
1077
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
1078
+ return;
1079
+ }
1080
+ else if (params.disallowNamedArgs && args.some(arguments_1.isNamedArgument)) {
1081
+ return [{ operation: 'unknown', operand: dataFrame.value.info.id }];
1082
+ }
1083
+ return [{
1084
+ operation: 'identity',
1085
+ operand: dataFrame.value.info.id
1086
+ }];
1087
+ }
1088
+ function mapDataFrameUnknown(args, params, info) {
1089
+ let dataFrame;
1090
+ if (params.dataFrame !== undefined) {
1091
+ dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
1092
+ }
1093
+ else {
1094
+ dataFrame = args.find(arg => (0, arguments_1.isDataFrameArgument)(arg, info));
1095
+ }
1096
+ if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
1097
+ return;
1098
+ }
1099
+ return [{
1100
+ operation: 'unknown',
1101
+ operand: dataFrame.value.info.id,
1102
+ ...(params.constraintType !== undefined ? { type: params.constraintType } : {})
1103
+ }];
1104
+ }
1105
+ function getRequestFromRead(fileNameArg, textArg, params, info, config) {
1106
+ let source;
1107
+ let request;
1108
+ if (fileNameArg !== undefined && fileNameArg !== r_function_call_1.EmptyArgument) {
1109
+ const fileName = (0, resolve_args_1.resolveIdToArgValue)(fileNameArg, info);
1110
+ if (typeof fileName === 'string') {
1111
+ source = fileName;
1112
+ const referenceChain = fileNameArg.info.file ? [(0, retriever_1.requestFromInput)(`file://${fileNameArg.info.file}`)] : [];
1113
+ const sources = (0, built_in_source_1.findSource)(config.solver.resolveSource, fileName, { referenceChain });
1114
+ if (sources?.length === 1) {
1115
+ source = sources[0];
1116
+ // create request from resolved source file path
1117
+ request = (0, built_in_source_1.getSourceProvider)().createRequest(source);
1118
+ }
1119
+ else if (params.text === undefined && (0, resolve_args_1.unescapeSpecialChars)(fileName).includes('\n')) {
1120
+ // create request from string if file name argument contains newline
1121
+ request = (0, retriever_1.requestFromInput)((0, resolve_args_1.unescapeSpecialChars)(fileName));
1122
+ }
1123
+ }
1124
+ }
1125
+ else if (textArg !== undefined && textArg !== r_function_call_1.EmptyArgument) {
1126
+ const text = (0, resolve_args_1.resolveIdToArgValue)(textArg, info);
1127
+ if (typeof text === 'string') {
1128
+ source = text;
1129
+ request = (0, retriever_1.requestFromInput)((0, resolve_args_1.unescapeSpecialChars)(text));
1130
+ }
1131
+ }
1132
+ return { source, request };
1133
+ }
1134
+ function parseRequestContent(request, parser, maxLines) {
1135
+ const requestType = request.request;
1136
+ switch (requestType) {
1137
+ case 'text':
1138
+ request.content.split('\n').forEach(parser);
1139
+ return true;
1140
+ case 'file':
1141
+ return (0, files_1.readLineByLineSync)(request.content, parser, maxLines);
1142
+ default:
1143
+ (0, assert_1.assertUnreachable)(requestType);
1144
+ }
1145
+ }
1146
+ /**
1147
+ * Gets all entries from a line of a CSV file using a custom separator char, quote char, and comment char
1148
+ */
1149
+ function getEntriesFromCsvLine(line, sep = ',', quote = '"', comment = '', trim = true) {
1150
+ sep = (0, arguments_1.escapeRegExp)(sep, true); // only allow tokens like `\s`, `\t`, or `\n` in separator, quote, and comment chars
1151
+ quote = (0, arguments_1.escapeRegExp)(quote, true);
1152
+ comment = (0, arguments_1.escapeRegExp)(comment, true);
1153
+ const quantifier = sep === '\\s' ? '+' : '*'; // do not allow unquoted empty entries in whitespace-sparated files
1154
+ const LineCommentRegex = new RegExp(`[${comment}].*`);
1155
+ const CsvEntryRegex = new RegExp(`(?<=^|[${sep}])(?:[${quote}]((?:[^${quote}]|[${quote}]{2})*)[${quote}]|([^${sep}]${quantifier}))`, 'g');
1156
+ const DoubleQuoteRegex = new RegExp(`([${quote}])\\1`, 'g'); // regex for doubled quotes like `""` or `''`
1157
+ return (comment ? line.replace(LineCommentRegex, '') : line)
1158
+ .matchAll(CsvEntryRegex)
1159
+ .map(match => match[1]?.replace(DoubleQuoteRegex, '$1') ?? match[2])
1160
+ .map(entry => trim ? entry.trim() : entry)
1161
+ .toArray();
1162
+ }
1163
+ /**
1164
+ * Resolves all selected columns in a select expression, such as `id`, `"id"`, `1`, `c(id, name)`, `c("id", "name")`, `1:2`, `-id`, `-1`, `-c(id, name)`, `c(-1, -2)`, etc.
1165
+ */
1166
+ function getSelectedColumns(args, info) {
1167
+ let selectedCols = [];
1168
+ let unselectedCols = [];
1169
+ const joinColumns = (columns1, columns2) => columns1 !== undefined && columns2 !== undefined ? [...columns1, ...columns2] : undefined;
1170
+ for (const arg of args) {
1171
+ if (arg !== undefined && arg !== r_function_call_1.EmptyArgument) {
1172
+ if (arg.value?.type === type_1.RType.FunctionCall && arg.value.named && arg.value.functionName.content === 'c') {
1173
+ const result = getSelectedColumns(arg.value.arguments, info);
1174
+ selectedCols = joinColumns(selectedCols, result.selectedCols);
1175
+ unselectedCols = joinColumns(unselectedCols, result.unselectedCols);
1176
+ }
1177
+ else if (arg.value?.type === type_1.RType.UnaryOp && arg.value.operator === '-' && info.idMap !== undefined) {
1178
+ const result = getSelectedColumns([(0, make_argument_1.toUnnamedArgument)(arg.value.operand, info.idMap)], info);
1179
+ selectedCols = joinColumns(selectedCols, result.unselectedCols);
1180
+ unselectedCols = joinColumns(unselectedCols, result.selectedCols);
1181
+ }
1182
+ else if (arg.value?.type === type_1.RType.BinaryOp && arg.value.operator === ':' && info.idMap !== undefined) {
1183
+ const values = (0, resolve_args_1.resolveIdToArgValue)((0, make_argument_1.toUnnamedArgument)(arg.value, info.idMap), { ...info, resolve: config_1.VariableResolve.Disabled });
1184
+ if (Array.isArray(values) && values.every(value => typeof value === 'number')) {
1185
+ selectedCols = joinColumns(selectedCols, values.filter(value => value >= 0));
1186
+ unselectedCols = joinColumns(unselectedCols, values.filter(value => value < 0).map(Math.abs));
1187
+ }
1188
+ else {
1189
+ selectedCols = undefined;
1190
+ }
1191
+ }
1192
+ else if (arg.value?.type === type_1.RType.Symbol || arg.value?.type === type_1.RType.String) {
1193
+ selectedCols?.push((0, resolve_args_1.resolveIdToArgValueSymbolName)(arg, info));
1194
+ }
1195
+ else if (arg.value?.type === type_1.RType.Number) {
1196
+ selectedCols?.push(arg.value.content.num);
1197
+ }
1198
+ else {
1199
+ selectedCols = undefined;
1200
+ }
1201
+ }
1202
+ }
1203
+ return { selectedCols, unselectedCols };
1204
+ }
1205
+ function getJoinType(joinAll, joinLeft, joinRight) {
1206
+ if (joinAll || (joinLeft && joinRight)) {
1207
+ return 'full';
1208
+ }
1209
+ else if (joinLeft) {
1210
+ return 'left';
1211
+ }
1212
+ else if (joinRight) {
1213
+ return 'right';
1214
+ }
1215
+ else {
1216
+ return 'inner';
1217
+ }
1218
+ }
1219
+ //# sourceMappingURL=function-mapper.js.map