@eagleoutice/flowr 2.2.16 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -20
- package/abstract-interpretation/data-frame/absint-info.d.ts +109 -0
- package/abstract-interpretation/data-frame/absint-info.js +31 -0
- package/abstract-interpretation/data-frame/absint-visitor.d.ts +58 -0
- package/abstract-interpretation/data-frame/absint-visitor.js +171 -0
- package/abstract-interpretation/data-frame/domain.d.ts +107 -0
- package/abstract-interpretation/data-frame/domain.js +315 -0
- package/abstract-interpretation/data-frame/mappers/access-mapper.d.ts +17 -0
- package/abstract-interpretation/data-frame/mappers/access-mapper.js +166 -0
- package/abstract-interpretation/data-frame/mappers/arguments.d.ts +117 -0
- package/abstract-interpretation/data-frame/mappers/arguments.js +188 -0
- package/abstract-interpretation/data-frame/mappers/assignment-mapper.d.ts +20 -0
- package/abstract-interpretation/data-frame/mappers/assignment-mapper.js +34 -0
- package/abstract-interpretation/data-frame/mappers/function-mapper.d.ts +261 -0
- package/abstract-interpretation/data-frame/mappers/function-mapper.js +1219 -0
- package/abstract-interpretation/data-frame/mappers/replacement-mapper.d.ts +12 -0
- package/abstract-interpretation/data-frame/mappers/replacement-mapper.js +206 -0
- package/abstract-interpretation/data-frame/resolve-args.d.ts +42 -0
- package/abstract-interpretation/data-frame/resolve-args.js +118 -0
- package/abstract-interpretation/data-frame/semantics.d.ts +213 -0
- package/abstract-interpretation/data-frame/semantics.js +363 -0
- package/abstract-interpretation/data-frame/shape-inference.d.ts +38 -0
- package/abstract-interpretation/data-frame/shape-inference.js +111 -0
- package/benchmark/slicer.d.ts +15 -1
- package/benchmark/slicer.js +137 -0
- package/benchmark/stats/print.js +123 -45
- package/benchmark/stats/size-of.d.ts +7 -0
- package/benchmark/stats/size-of.js +1 -0
- package/benchmark/stats/stats.d.ts +30 -1
- package/benchmark/stats/stats.js +4 -2
- package/benchmark/summarizer/data.d.ts +33 -2
- package/benchmark/summarizer/first-phase/input.js +5 -1
- package/benchmark/summarizer/first-phase/process.js +47 -1
- package/benchmark/summarizer/second-phase/graph.js +1 -1
- package/benchmark/summarizer/second-phase/process.js +102 -4
- package/cli/benchmark-app.d.ts +2 -0
- package/cli/benchmark-app.js +2 -0
- package/cli/benchmark-helper-app.d.ts +2 -0
- package/cli/benchmark-helper-app.js +10 -3
- package/cli/common/options.js +4 -0
- package/cli/repl/commands/repl-query.js +1 -1
- package/cli/repl/server/connection.js +14 -5
- package/config.d.ts +31 -0
- package/config.js +21 -1
- package/control-flow/basic-cfg-guided-visitor.d.ts +1 -2
- package/control-flow/basic-cfg-guided-visitor.js +0 -6
- package/control-flow/cfg-simplification.d.ts +6 -0
- package/control-flow/cfg-simplification.js +18 -9
- package/control-flow/control-flow-graph.d.ts +3 -8
- package/control-flow/control-flow-graph.js +5 -6
- package/control-flow/dfg-cfg-guided-visitor.js +1 -1
- package/control-flow/extract-cfg.d.ts +2 -2
- package/control-flow/extract-cfg.js +52 -63
- package/control-flow/semantic-cfg-guided-visitor.d.ts +1 -1
- package/control-flow/semantic-cfg-guided-visitor.js +1 -1
- package/core/steps/all/static-slicing/00-slice.d.ts +7 -1
- package/core/steps/all/static-slicing/00-slice.js +9 -3
- package/core/steps/pipeline/default-pipelines.d.ts +74 -74
- package/dataflow/environments/built-in.d.ts +7 -5
- package/dataflow/environments/built-in.js +16 -13
- package/dataflow/eval/resolve/alias-tracking.js +2 -2
- package/dataflow/eval/resolve/resolve.d.ts +53 -9
- package/dataflow/eval/resolve/resolve.js +132 -38
- package/dataflow/graph/dataflowgraph-builder.js +2 -2
- package/dataflow/graph/graph.js +1 -1
- package/dataflow/graph/invert-dfg.d.ts +2 -0
- package/dataflow/graph/invert-dfg.js +17 -0
- package/dataflow/internal/process/functions/call/built-in/built-in-source.d.ts +1 -0
- package/dataflow/internal/process/functions/call/built-in/built-in-source.js +4 -0
- package/documentation/doc-util/doc-query.js +11 -1
- package/documentation/doc-util/doc-search.js +2 -2
- package/documentation/print-cfg-wiki.js +3 -4
- package/documentation/print-core-wiki.js +2 -2
- package/documentation/print-dataflow-graph-wiki.js +7 -0
- package/documentation/print-faq-wiki.js +4 -0
- package/documentation/print-interface-wiki.js +11 -0
- package/documentation/print-linter-wiki.js +36 -4
- package/documentation/print-linting-and-testing-wiki.js +13 -1
- package/documentation/print-onboarding-wiki.js +4 -0
- package/documentation/print-query-wiki.js +29 -3
- package/linter/linter-executor.js +1 -2
- package/linter/linter-format.d.ts +26 -4
- package/linter/linter-format.js +25 -6
- package/linter/linter-rules.d.ts +63 -12
- package/linter/linter-rules.js +5 -1
- package/linter/rules/absolute-path.d.ts +4 -7
- package/linter/rules/absolute-path.js +9 -6
- package/linter/rules/dataframe-access-validation.d.ts +55 -0
- package/linter/rules/dataframe-access-validation.js +118 -0
- package/linter/rules/dead-code.d.ts +43 -0
- package/linter/rules/dead-code.js +50 -0
- package/linter/rules/deprecated-functions.d.ts +3 -2
- package/linter/rules/deprecated-functions.js +3 -1
- package/linter/rules/file-path-validity.d.ts +4 -4
- package/linter/rules/file-path-validity.js +8 -6
- package/linter/rules/naming-convention.d.ts +5 -4
- package/linter/rules/naming-convention.js +8 -2
- package/linter/rules/seeded-randomness.d.ts +4 -3
- package/linter/rules/seeded-randomness.js +3 -1
- package/linter/rules/unused-definition.d.ts +2 -0
- package/linter/rules/unused-definition.js +3 -1
- package/package.json +2 -2
- package/queries/catalog/dependencies-query/dependencies-query-executor.js +6 -1
- package/queries/catalog/dependencies-query/function-info/read-functions.js +1 -0
- package/queries/catalog/dependencies-query/function-info/write-functions.js +1 -0
- package/queries/catalog/df-shape-query/df-shape-query-executor.d.ts +3 -0
- package/queries/catalog/df-shape-query/df-shape-query-executor.js +46 -0
- package/queries/catalog/df-shape-query/df-shape-query-format.d.ts +72 -0
- package/queries/catalog/df-shape-query/df-shape-query-format.js +31 -0
- package/queries/catalog/linter-query/linter-query-format.js +1 -1
- package/queries/catalog/location-map-query/location-map-query-executor.js +7 -5
- package/queries/catalog/location-map-query/location-map-query-format.d.ts +3 -0
- package/queries/catalog/location-map-query/location-map-query-format.js +1 -0
- package/queries/catalog/search-query/search-query-executor.js +1 -1
- package/queries/catalog/static-slice-query/static-slice-query-executor.js +2 -1
- package/queries/catalog/static-slice-query/static-slice-query-format.d.ts +3 -0
- package/queries/catalog/static-slice-query/static-slice-query-format.js +3 -1
- package/queries/query-print.d.ts +1 -1
- package/queries/query-print.js +0 -1
- package/queries/query.d.ts +77 -6
- package/queries/query.js +26 -11
- package/search/flowr-search-builder.d.ts +6 -6
- package/search/flowr-search-executor.d.ts +2 -2
- package/search/flowr-search-executor.js +1 -1
- package/search/flowr-search.d.ts +13 -8
- package/search/flowr-search.js +21 -0
- package/search/search-executor/search-enrichers.d.ts +87 -20
- package/search/search-executor/search-enrichers.js +44 -5
- package/search/search-executor/search-generators.d.ts +4 -4
- package/search/search-executor/search-generators.js +12 -7
- package/search/search-executor/search-mappers.js +3 -2
- package/search/search-executor/search-transformer.d.ts +3 -3
- package/search/search-executor/search-transformer.js +2 -2
- package/slicing/static/static-slicer.d.ts +4 -2
- package/slicing/static/static-slicer.js +10 -4
- package/util/collections/arrays.d.ts +2 -0
- package/util/collections/arrays.js +9 -0
- package/util/files.d.ts +8 -2
- package/util/files.js +22 -4
- package/util/mermaid/dfg.js +4 -2
- package/util/r-value.d.ts +23 -0
- package/util/r-value.js +113 -0
- package/util/range.d.ts +1 -0
- package/util/range.js +5 -1
- package/util/version.js +1 -1
- package/util/cfg/cfg.d.ts +0 -0
- package/util/cfg/cfg.js +0 -2
|
@@ -0,0 +1,1219 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.mapDataFrameFunctionCall = mapDataFrameFunctionCall;
|
|
4
|
+
const config_1 = require("../../../config");
|
|
5
|
+
const make_argument_1 = require("../../../dataflow/internal/process/functions/call/argument/make-argument");
|
|
6
|
+
const built_in_source_1 = require("../../../dataflow/internal/process/functions/call/built-in/built-in-source");
|
|
7
|
+
const r_function_call_1 = require("../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
|
|
8
|
+
const type_1 = require("../../../r-bridge/lang-4.x/ast/model/type");
|
|
9
|
+
const retriever_1 = require("../../../r-bridge/retriever");
|
|
10
|
+
const assert_1 = require("../../../util/assert");
|
|
11
|
+
const files_1 = require("../../../util/files");
|
|
12
|
+
const domain_1 = require("../domain");
|
|
13
|
+
const resolve_args_1 = require("../resolve-args");
|
|
14
|
+
const shape_inference_1 = require("../shape-inference");
|
|
15
|
+
const arguments_1 = require("./arguments");
|
|
16
|
+
/**
|
|
17
|
+
* Represents the different types of data frames in R
|
|
18
|
+
*/
|
|
19
|
+
var DataFrameType;
|
|
20
|
+
(function (DataFrameType) {
|
|
21
|
+
DataFrameType["DataFrame"] = "data.frame";
|
|
22
|
+
DataFrameType["Tibble"] = "tibble";
|
|
23
|
+
DataFrameType["DataTable"] = "data.table";
|
|
24
|
+
})(DataFrameType || (DataFrameType = {}));
|
|
25
|
+
/**
|
|
26
|
+
* Mapper for mapping the supported concrete data frame functions to mapper functions,
|
|
27
|
+
* including information about the origin library of the functions and the type of the returned data frame.
|
|
28
|
+
*/
|
|
29
|
+
const DataFrameFunctionMapper = {
|
|
30
|
+
'data.frame': { mapper: mapDataFrameCreate, library: 'base', returnType: DataFrameType.DataFrame },
|
|
31
|
+
'as.data.frame': { mapper: mapDataFrameConvert, library: 'base', returnType: DataFrameType.DataFrame },
|
|
32
|
+
'read.table': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
|
|
33
|
+
'read.csv': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
|
|
34
|
+
'read.csv2': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
|
|
35
|
+
'read.delim': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
|
|
36
|
+
'read.delim2': { mapper: mapDataFrameRead, library: 'utils', returnType: DataFrameType.DataFrame },
|
|
37
|
+
'read_table': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
|
|
38
|
+
'read_csv': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
|
|
39
|
+
'read_csv2': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
|
|
40
|
+
'read_tsv': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
|
|
41
|
+
'read_delim': { mapper: mapDataFrameRead, library: 'readr', returnType: DataFrameType.Tibble },
|
|
42
|
+
'cbind': { mapper: mapDataFrameColBind, library: 'base', returnType: DataFrameType.DataFrame },
|
|
43
|
+
'rbind': { mapper: mapDataFrameRowBind, library: 'base', returnType: DataFrameType.DataFrame },
|
|
44
|
+
'head': { mapper: mapDataFrameHeadTail, library: 'utils', returnType: DataFrameType.DataFrame },
|
|
45
|
+
'tail': { mapper: mapDataFrameHeadTail, library: 'utils', returnType: DataFrameType.DataFrame },
|
|
46
|
+
'subset': { mapper: mapDataFrameSubset, library: 'base', returnType: DataFrameType.DataFrame },
|
|
47
|
+
'filter': { mapper: mapDataFrameFilter, library: 'dplyr', returnType: DataFrameType.DataFrame },
|
|
48
|
+
'select': { mapper: mapDataFrameSelect, library: 'dplyr', returnType: DataFrameType.DataFrame },
|
|
49
|
+
'mutate': { mapper: mapDataFrameMutate, library: 'dplyr', returnType: DataFrameType.DataFrame },
|
|
50
|
+
'transform': { mapper: mapDataFrameMutate, library: 'base', returnType: DataFrameType.DataFrame },
|
|
51
|
+
'group_by': { mapper: mapDataFrameGroupBy, library: 'dplyr', returnType: DataFrameType.Tibble },
|
|
52
|
+
'summarise': { mapper: mapDataFrameSummarize, library: 'dplyr', returnType: DataFrameType.DataFrame },
|
|
53
|
+
'summarize': { mapper: mapDataFrameSummarize, library: 'dplyr', returnType: DataFrameType.DataFrame },
|
|
54
|
+
'inner_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
|
|
55
|
+
'left_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
|
|
56
|
+
'right_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
|
|
57
|
+
'full_join': { mapper: mapDataFrameJoin, library: 'dplyr', returnType: DataFrameType.DataFrame },
|
|
58
|
+
'merge': { mapper: mapDataFrameJoin, library: 'base', returnType: DataFrameType.DataFrame },
|
|
59
|
+
'relocate': { mapper: mapDataFrameIdentity, library: 'dplyr', returnType: DataFrameType.DataFrame },
|
|
60
|
+
'arrange': { mapper: mapDataFrameIdentity, library: 'dplyr', returnType: DataFrameType.DataFrame }
|
|
61
|
+
};
|
|
62
|
+
/**
|
|
63
|
+
* List of other data frame functions that are not explicitly supported but may return data frames.
|
|
64
|
+
*/
|
|
65
|
+
const OtherDataFrameFunctions = [
|
|
66
|
+
{
|
|
67
|
+
type: 'entry_point',
|
|
68
|
+
names: ['anova', 'AIC', 'BIC'],
|
|
69
|
+
library: 'anova',
|
|
70
|
+
returnType: DataFrameType.DataFrame
|
|
71
|
+
}, {
|
|
72
|
+
type: 'entry_point',
|
|
73
|
+
names: ['Anova', 'Manova'],
|
|
74
|
+
library: 'car',
|
|
75
|
+
returnType: DataFrameType.DataFrame
|
|
76
|
+
}, {
|
|
77
|
+
type: 'entry_point',
|
|
78
|
+
names: ['lmer'],
|
|
79
|
+
library: 'lme4',
|
|
80
|
+
returnType: DataFrameType.DataFrame
|
|
81
|
+
}, {
|
|
82
|
+
type: 'entry_point',
|
|
83
|
+
names: ['data_frame', 'as_data_frame'],
|
|
84
|
+
library: 'dplyr',
|
|
85
|
+
returnType: DataFrameType.DataFrame
|
|
86
|
+
}, {
|
|
87
|
+
type: 'entry_point',
|
|
88
|
+
names: ['tbl', 'as.tbl'],
|
|
89
|
+
library: 'dplyr',
|
|
90
|
+
returnType: DataFrameType.Tibble
|
|
91
|
+
}, {
|
|
92
|
+
type: 'entry_point',
|
|
93
|
+
names: ['read_fwf', 'read_log'],
|
|
94
|
+
library: 'readr',
|
|
95
|
+
returnType: DataFrameType.Tibble
|
|
96
|
+
}, {
|
|
97
|
+
type: 'entry_point',
|
|
98
|
+
names: ['read_excel', 'read_xls', 'read_xlsx'],
|
|
99
|
+
library: 'readxl',
|
|
100
|
+
returnType: DataFrameType.Tibble
|
|
101
|
+
}, {
|
|
102
|
+
type: 'entry_point',
|
|
103
|
+
names: ['tibble', 'tibble_row', 'as_tibble', 'tribble'],
|
|
104
|
+
library: 'tibble',
|
|
105
|
+
returnType: DataFrameType.Tibble
|
|
106
|
+
}, {
|
|
107
|
+
type: 'entry_point',
|
|
108
|
+
names: ['data.table', 'as.data.table', 'fread'],
|
|
109
|
+
library: 'data.table',
|
|
110
|
+
returnType: DataFrameType.DataTable
|
|
111
|
+
}, {
|
|
112
|
+
type: 'transformation',
|
|
113
|
+
names: ['na.omit'],
|
|
114
|
+
library: 'stats',
|
|
115
|
+
returnType: DataFrameType.DataFrame,
|
|
116
|
+
dataFrame: { pos: 0, name: 'object' }
|
|
117
|
+
}, {
|
|
118
|
+
type: 'transformation',
|
|
119
|
+
names: ['unique', 't'],
|
|
120
|
+
library: 'base',
|
|
121
|
+
returnType: DataFrameType.DataFrame,
|
|
122
|
+
dataFrame: { pos: 0, name: 'x' }
|
|
123
|
+
}, {
|
|
124
|
+
type: 'transformation',
|
|
125
|
+
names: ['aggregate'],
|
|
126
|
+
library: 'stats',
|
|
127
|
+
returnType: DataFrameType.DataFrame,
|
|
128
|
+
dataFrame: { pos: 0, name: 'x' }
|
|
129
|
+
}, {
|
|
130
|
+
type: 'transformation',
|
|
131
|
+
names: ['with', 'within'],
|
|
132
|
+
library: 'base',
|
|
133
|
+
returnType: DataFrameType.DataFrame,
|
|
134
|
+
dataFrame: { pos: 0, name: 'data' }
|
|
135
|
+
}, {
|
|
136
|
+
type: 'transformation',
|
|
137
|
+
names: ['reshape'],
|
|
138
|
+
library: 'stats',
|
|
139
|
+
returnType: DataFrameType.DataFrame,
|
|
140
|
+
dataFrame: { pos: 0, name: 'data' }
|
|
141
|
+
}, {
|
|
142
|
+
type: 'transformation',
|
|
143
|
+
names: ['melt'],
|
|
144
|
+
library: 'reshape2',
|
|
145
|
+
returnType: DataFrameType.DataFrame,
|
|
146
|
+
dataFrame: { pos: 0, name: 'data' }
|
|
147
|
+
}, {
|
|
148
|
+
type: 'transformation',
|
|
149
|
+
names: [
|
|
150
|
+
'transmute', 'distinct', 'distinct_prepare', 'group_by_prepare', 'rename', 'rename_with', 'reframe',
|
|
151
|
+
'slice', 'slice_head', 'slice_tail', 'slice_min', 'slice_max', 'slice_sample'
|
|
152
|
+
],
|
|
153
|
+
library: 'dplyr',
|
|
154
|
+
returnType: DataFrameType.DataFrame,
|
|
155
|
+
dataFrame: { pos: 0, name: '.data' }
|
|
156
|
+
}, {
|
|
157
|
+
type: 'transformation',
|
|
158
|
+
names: [
|
|
159
|
+
'filter_if', 'filter_at', 'filter_all', 'select_if', 'select_at', 'select_all',
|
|
160
|
+
'mutate_if', 'mutate_at', 'mutate_all', 'transmute_if', 'transmute_at', 'transmute_all',
|
|
161
|
+
'distinct_if', 'distinct_at', 'distinct_all', 'group_by_if', 'group_by_at', 'group_by_all',
|
|
162
|
+
'summarize_if', 'summarise_if', 'summarize_at', 'summarise_at', 'summarize_all', 'summarise_all',
|
|
163
|
+
'arrange_if', 'arrange_at', 'arrange_all', 'rename_if', 'rename_at', 'rename_all'
|
|
164
|
+
],
|
|
165
|
+
library: 'dplyr',
|
|
166
|
+
returnType: DataFrameType.Tibble,
|
|
167
|
+
dataFrame: { pos: 0, name: '.tbl' }
|
|
168
|
+
}, {
|
|
169
|
+
type: 'transformation',
|
|
170
|
+
names: [
|
|
171
|
+
'semi_join', 'anti_join', 'nest_join', 'cross_join',
|
|
172
|
+
'ungroup', 'count', 'tally', 'add_count', 'add_tally',
|
|
173
|
+
'rows_insert', 'rows_append', 'rows_update', 'rows_patch', 'rows_upsert', 'rows_delete'
|
|
174
|
+
],
|
|
175
|
+
library: 'dplyr',
|
|
176
|
+
returnType: DataFrameType.DataFrame,
|
|
177
|
+
dataFrame: { pos: 0, name: 'x' }
|
|
178
|
+
}, {
|
|
179
|
+
type: 'transformation',
|
|
180
|
+
names: ['bind_cols', 'bind_rows'],
|
|
181
|
+
library: 'dplyr',
|
|
182
|
+
returnType: DataFrameType.DataFrame
|
|
183
|
+
}, {
|
|
184
|
+
type: 'transformation',
|
|
185
|
+
names: [
|
|
186
|
+
'drop_na', 'replace_na', 'pivot_longer', 'pivot_wider',
|
|
187
|
+
'separate', 'separate_wider_position', 'separate_wider_delim', 'unite'
|
|
188
|
+
],
|
|
189
|
+
library: 'tidyr',
|
|
190
|
+
returnType: DataFrameType.DataFrame,
|
|
191
|
+
dataFrame: { pos: 0, name: 'data' }
|
|
192
|
+
}, {
|
|
193
|
+
type: 'transformation',
|
|
194
|
+
names: ['add_column', 'add_row', 'add_case'],
|
|
195
|
+
library: 'tibble',
|
|
196
|
+
returnType: DataFrameType.Tibble,
|
|
197
|
+
dataFrame: { pos: 0, name: '.data' }
|
|
198
|
+
}, {
|
|
199
|
+
type: 'transformation',
|
|
200
|
+
names: ['melt', 'dcast'],
|
|
201
|
+
library: 'data.table',
|
|
202
|
+
returnType: DataFrameType.DataTable,
|
|
203
|
+
dataFrame: { pos: 0, name: 'data' }
|
|
204
|
+
}
|
|
205
|
+
];
|
|
206
|
+
/**
|
|
207
|
+
* Mapper for defining the location of all relevant function parameters for each supported data frame function of {@link DataFrameFunctionMapper}.
|
|
208
|
+
*/
|
|
209
|
+
const DataFrameFunctionParamsMapper = {
|
|
210
|
+
'data.frame': {
|
|
211
|
+
checkNames: { pos: -1, name: 'check.names', default: true },
|
|
212
|
+
noDupNames: { pos: -1, name: 'check.names', default: true },
|
|
213
|
+
special: ['row.names', 'check.rows', 'check.names', 'fix.empty.names', 'stringsAsFactors'],
|
|
214
|
+
critical: [{ pos: -1, name: 'row.names' }]
|
|
215
|
+
},
|
|
216
|
+
'as.data.frame': {
|
|
217
|
+
dataFrame: { pos: 0, name: 'x' },
|
|
218
|
+
critical: []
|
|
219
|
+
},
|
|
220
|
+
'read.table': {
|
|
221
|
+
fileName: { pos: 0, name: 'file' },
|
|
222
|
+
header: { pos: 1, name: 'header', default: false },
|
|
223
|
+
separator: { pos: 2, name: 'sep', default: '\\s' },
|
|
224
|
+
quote: { pos: 3, name: 'quote', default: '"\'' },
|
|
225
|
+
skipLines: { pos: 12, name: 'skip', default: 0 },
|
|
226
|
+
checkNames: { pos: 13, name: 'check.names', default: true },
|
|
227
|
+
noDupNames: { pos: 13, name: 'check.names', default: true },
|
|
228
|
+
comment: { pos: 17, name: 'comment.char', default: '#' },
|
|
229
|
+
text: { pos: 23, name: 'text' },
|
|
230
|
+
critical: [
|
|
231
|
+
{ pos: 6, name: 'row.names' },
|
|
232
|
+
{ pos: 7, name: 'col.names' },
|
|
233
|
+
{ pos: 11, name: 'nrows', default: -1 },
|
|
234
|
+
{ pos: 15, name: 'strip.white', default: false },
|
|
235
|
+
{ pos: 16, name: 'blank.lines.skip', default: true },
|
|
236
|
+
{ pos: 18, name: 'allow.escapes', default: false },
|
|
237
|
+
]
|
|
238
|
+
},
|
|
239
|
+
'read.csv': {
|
|
240
|
+
fileName: { pos: 0, name: 'file' },
|
|
241
|
+
header: { pos: 1, name: 'header', default: true },
|
|
242
|
+
separator: { pos: 2, name: 'sep', default: ',' },
|
|
243
|
+
quote: { pos: 3, name: 'quote', default: '"' },
|
|
244
|
+
comment: { pos: 6, name: 'comment.char', default: '' },
|
|
245
|
+
skipLines: { pos: -1, name: 'skip', default: 0 },
|
|
246
|
+
checkNames: { pos: -1, name: 'check.names', default: true },
|
|
247
|
+
noDupNames: { pos: -1, name: 'check.names', default: true },
|
|
248
|
+
text: { pos: -1, name: 'text' },
|
|
249
|
+
critical: [
|
|
250
|
+
{ pos: -1, name: 'row.names' },
|
|
251
|
+
{ pos: -1, name: 'col.names' },
|
|
252
|
+
{ pos: -1, name: 'nrows', default: -1 },
|
|
253
|
+
{ pos: -1, name: 'strip.white', default: false },
|
|
254
|
+
{ pos: -1, name: 'blank.lines.skip', default: true },
|
|
255
|
+
{ pos: -1, name: 'allow.escapes', default: false },
|
|
256
|
+
]
|
|
257
|
+
},
|
|
258
|
+
'read.csv2': {
|
|
259
|
+
fileName: { pos: 0, name: 'file' },
|
|
260
|
+
header: { pos: 1, name: 'header', default: true },
|
|
261
|
+
separator: { pos: 2, name: 'sep', default: ';' },
|
|
262
|
+
quote: { pos: 3, name: 'quote', default: '"' },
|
|
263
|
+
comment: { pos: 6, name: 'comment.char', default: '' },
|
|
264
|
+
skipLines: { pos: -1, name: 'skip', default: 0 },
|
|
265
|
+
checkNames: { pos: -1, name: 'check.names', default: true },
|
|
266
|
+
noDupNames: { pos: -1, name: 'check.names', default: true },
|
|
267
|
+
text: { pos: -1, name: 'text' },
|
|
268
|
+
critical: [
|
|
269
|
+
{ pos: -1, name: 'row.names' },
|
|
270
|
+
{ pos: -1, name: 'col.names' },
|
|
271
|
+
{ pos: -1, name: 'nrows', default: -1 },
|
|
272
|
+
{ pos: -1, name: 'strip.white', default: false },
|
|
273
|
+
{ pos: -1, name: 'blank.lines.skip', default: true },
|
|
274
|
+
{ pos: -1, name: 'allow.escapes', default: false },
|
|
275
|
+
]
|
|
276
|
+
},
|
|
277
|
+
'read.delim': {
|
|
278
|
+
fileName: { pos: 0, name: 'file' },
|
|
279
|
+
header: { pos: 1, name: 'header', default: true },
|
|
280
|
+
separator: { pos: 2, name: 'sep', default: '\\t' },
|
|
281
|
+
quote: { pos: 3, name: 'quote', default: '"' },
|
|
282
|
+
comment: { pos: 6, name: 'comment.char', default: '' },
|
|
283
|
+
skipLines: { pos: -1, name: 'skip', default: 0 },
|
|
284
|
+
checkNames: { pos: -1, name: 'check.names', default: true },
|
|
285
|
+
noDupNames: { pos: -1, name: 'check.names', default: true },
|
|
286
|
+
text: { pos: -1, name: 'text' },
|
|
287
|
+
critical: [
|
|
288
|
+
{ pos: -1, name: 'row.names' },
|
|
289
|
+
{ pos: -1, name: 'col.names' },
|
|
290
|
+
{ pos: -1, name: 'nrows', default: -1 },
|
|
291
|
+
{ pos: -1, name: 'strip.white', default: false },
|
|
292
|
+
{ pos: -1, name: 'blank.lines.skip', default: true },
|
|
293
|
+
{ pos: -1, name: 'allow.escapes', default: false },
|
|
294
|
+
]
|
|
295
|
+
},
|
|
296
|
+
'read.delim2': {
|
|
297
|
+
fileName: { pos: 0, name: 'file' },
|
|
298
|
+
header: { pos: 1, name: 'header', default: true },
|
|
299
|
+
separator: { pos: 2, name: 'sep', default: '\\t' },
|
|
300
|
+
quote: { pos: 3, name: 'quote', default: '"' },
|
|
301
|
+
comment: { pos: 6, name: 'comment.char', default: '' },
|
|
302
|
+
skipLines: { pos: -1, name: 'skip', default: 0 },
|
|
303
|
+
checkNames: { pos: -1, name: 'check.names', default: true },
|
|
304
|
+
noDupNames: { pos: -1, name: 'check.names', default: true },
|
|
305
|
+
text: { pos: -1, name: 'text' },
|
|
306
|
+
critical: [
|
|
307
|
+
{ pos: -1, name: 'row.names' },
|
|
308
|
+
{ pos: -1, name: 'col.names' },
|
|
309
|
+
{ pos: -1, name: 'nrows', default: -1 },
|
|
310
|
+
{ pos: -1, name: 'strip.white', default: false },
|
|
311
|
+
{ pos: -1, name: 'blank.lines.skip', default: true },
|
|
312
|
+
{ pos: -1, name: 'allow.escapes', default: false },
|
|
313
|
+
]
|
|
314
|
+
},
|
|
315
|
+
'read_table': {
|
|
316
|
+
fileName: { pos: 0, name: 'file' },
|
|
317
|
+
header: { pos: 1, name: 'col_names', default: true },
|
|
318
|
+
separator: { pos: -1, default: '\\s' },
|
|
319
|
+
quote: { pos: -1, default: '"' },
|
|
320
|
+
skipLines: { pos: 5, name: 'skip', default: 0 },
|
|
321
|
+
comment: { pos: 9, name: 'comment', default: '' },
|
|
322
|
+
checkNames: { pos: -1, default: false },
|
|
323
|
+
noDupNames: { pos: -1, default: true },
|
|
324
|
+
critical: [
|
|
325
|
+
{ pos: 6, name: 'n_max', default: Infinity },
|
|
326
|
+
{ pos: 11, name: 'skip_empty_rows', default: true }
|
|
327
|
+
],
|
|
328
|
+
noEmptyNames: true
|
|
329
|
+
},
|
|
330
|
+
'read_csv': {
|
|
331
|
+
fileName: { pos: 0, name: 'file' },
|
|
332
|
+
header: { pos: 1, name: 'col_names', default: true },
|
|
333
|
+
separator: { pos: -1, default: ',' },
|
|
334
|
+
quote: { pos: 8, name: 'quote', default: '"' },
|
|
335
|
+
comment: { pos: 9, name: 'comment', default: '' },
|
|
336
|
+
skipLines: { pos: 11, name: 'skip', default: 0 },
|
|
337
|
+
checkNames: { pos: -1, default: false },
|
|
338
|
+
noDupNames: { pos: -1, default: true },
|
|
339
|
+
critical: [
|
|
340
|
+
{ pos: 3, name: 'col_select' },
|
|
341
|
+
{ pos: 4, name: 'id' },
|
|
342
|
+
{ pos: 10, name: 'trim_ws', default: true },
|
|
343
|
+
{ pos: 12, name: 'n_max', default: Infinity },
|
|
344
|
+
{ pos: 14, name: 'name_repair', default: 'unique' },
|
|
345
|
+
{ pos: 18, name: 'skip_empty_rows', default: true }
|
|
346
|
+
],
|
|
347
|
+
noEmptyNames: true
|
|
348
|
+
},
|
|
349
|
+
'read_csv2': {
|
|
350
|
+
fileName: { pos: 0, name: 'file' },
|
|
351
|
+
header: { pos: 1, name: 'col_names', default: true },
|
|
352
|
+
separator: { pos: -1, default: ';' },
|
|
353
|
+
quote: { pos: 8, name: 'quote', default: '"' },
|
|
354
|
+
comment: { pos: 9, name: 'comment', default: '' },
|
|
355
|
+
skipLines: { pos: 11, name: 'skip', default: 0 },
|
|
356
|
+
checkNames: { pos: -1, default: false },
|
|
357
|
+
noDupNames: { pos: -1, default: true },
|
|
358
|
+
critical: [
|
|
359
|
+
{ pos: 3, name: 'col_select' },
|
|
360
|
+
{ pos: 4, name: 'id' },
|
|
361
|
+
{ pos: 10, name: 'trim_ws', default: true },
|
|
362
|
+
{ pos: 12, name: 'n_max', default: Infinity },
|
|
363
|
+
{ pos: 14, name: 'name_repair', default: 'unique' },
|
|
364
|
+
{ pos: 18, name: 'skip_empty_rows', default: true }
|
|
365
|
+
],
|
|
366
|
+
noEmptyNames: true
|
|
367
|
+
},
|
|
368
|
+
'read_tsv': {
|
|
369
|
+
fileName: { pos: 0, name: 'file' },
|
|
370
|
+
header: { pos: 1, name: 'col_names', default: true },
|
|
371
|
+
separator: { pos: -1, default: '\\t' },
|
|
372
|
+
quote: { pos: 8, name: 'quote', default: '"' },
|
|
373
|
+
comment: { pos: 9, name: 'comment', default: '' },
|
|
374
|
+
skipLines: { pos: 11, name: 'skip', default: 0 },
|
|
375
|
+
checkNames: { pos: -1, default: false },
|
|
376
|
+
noDupNames: { pos: -1, default: true },
|
|
377
|
+
critical: [
|
|
378
|
+
{ pos: 3, name: 'col_select' },
|
|
379
|
+
{ pos: 4, name: 'id' },
|
|
380
|
+
{ pos: 10, name: 'trim_ws', default: true },
|
|
381
|
+
{ pos: 12, name: 'n_max', default: Infinity },
|
|
382
|
+
{ pos: 14, name: 'name_repair', default: 'unique' },
|
|
383
|
+
{ pos: 18, name: 'skip_empty_rows', default: true }
|
|
384
|
+
],
|
|
385
|
+
noEmptyNames: true
|
|
386
|
+
},
|
|
387
|
+
'read_delim': {
|
|
388
|
+
fileName: { pos: 0, name: 'file' },
|
|
389
|
+
separator: { pos: 1, name: 'delim', default: '\t' },
|
|
390
|
+
quote: { pos: 2, name: 'quote', default: '"' },
|
|
391
|
+
header: { pos: 5, name: 'col_names', default: true },
|
|
392
|
+
comment: { pos: 12, name: 'comment', default: '' },
|
|
393
|
+
skipLines: { pos: 14, name: 'skip', default: 0 },
|
|
394
|
+
checkNames: { pos: -1, default: false },
|
|
395
|
+
noDupNames: { pos: -1, default: true },
|
|
396
|
+
critical: [
|
|
397
|
+
{ pos: 3, name: 'escape_backslash', default: false },
|
|
398
|
+
{ pos: 4, name: 'escape_double', default: true },
|
|
399
|
+
{ pos: 7, name: 'col_select' },
|
|
400
|
+
{ pos: 8, name: 'id' },
|
|
401
|
+
{ pos: 13, name: 'trim_ws', default: false },
|
|
402
|
+
{ pos: 15, name: 'n_max', default: Infinity },
|
|
403
|
+
{ pos: 17, name: 'name_repair', default: 'unique' },
|
|
404
|
+
{ pos: 21, name: 'skip_empty_rows', default: true }
|
|
405
|
+
],
|
|
406
|
+
noEmptyNames: true
|
|
407
|
+
},
|
|
408
|
+
'cbind': {
|
|
409
|
+
special: ['deparse.level', 'make.row.names', 'stringsAsFactors', 'factor.exclude']
|
|
410
|
+
},
|
|
411
|
+
'rbind': {
|
|
412
|
+
special: ['deparse.level', 'make.row.names', 'stringsAsFactors', 'factor.exclude']
|
|
413
|
+
},
|
|
414
|
+
'head': {
|
|
415
|
+
dataFrame: { pos: 0, name: 'x' },
|
|
416
|
+
amount: { pos: 1, name: 'n', default: 6 }
|
|
417
|
+
},
|
|
418
|
+
'tail': {
|
|
419
|
+
dataFrame: { pos: 0, name: 'x' },
|
|
420
|
+
amount: { pos: 1, name: 'n', default: 6 }
|
|
421
|
+
},
|
|
422
|
+
'subset': {
|
|
423
|
+
dataFrame: { pos: 0, name: 'x' },
|
|
424
|
+
subset: { pos: 1, name: 'subset' },
|
|
425
|
+
select: { pos: 2, name: 'select' },
|
|
426
|
+
drop: { pos: 3, name: 'drop', default: false }
|
|
427
|
+
},
|
|
428
|
+
'filter': {
|
|
429
|
+
dataFrame: { pos: 0, name: '.data' },
|
|
430
|
+
special: ['.by', '.preserve']
|
|
431
|
+
},
|
|
432
|
+
'select': {
|
|
433
|
+
dataFrame: { pos: 0, name: '.data' },
|
|
434
|
+
special: []
|
|
435
|
+
},
|
|
436
|
+
'mutate': {
|
|
437
|
+
dataFrame: { pos: 0, name: '.data' },
|
|
438
|
+
special: ['.by', '.keep', '.before', '.after'],
|
|
439
|
+
critical: [{ pos: -1, name: '.keep' }],
|
|
440
|
+
checkNames: false,
|
|
441
|
+
noDupNames: false
|
|
442
|
+
},
|
|
443
|
+
'transform': {
|
|
444
|
+
dataFrame: { pos: 0, name: '_data' },
|
|
445
|
+
special: [],
|
|
446
|
+
checkNames: true,
|
|
447
|
+
noDupNames: true
|
|
448
|
+
},
|
|
449
|
+
'group_by': {
|
|
450
|
+
dataFrame: { pos: 0, name: '.data' },
|
|
451
|
+
by: { pos: 1 },
|
|
452
|
+
special: ['.add', '.drop']
|
|
453
|
+
},
|
|
454
|
+
'summarise': {
|
|
455
|
+
dataFrame: { pos: 0, name: '.data' },
|
|
456
|
+
special: ['.by', '.groups']
|
|
457
|
+
},
|
|
458
|
+
'summarize': {
|
|
459
|
+
dataFrame: { pos: 0, name: '.data' },
|
|
460
|
+
special: ['.by', '.groups']
|
|
461
|
+
},
|
|
462
|
+
'inner_join': {
|
|
463
|
+
dataFrame: { pos: 0, name: 'x' },
|
|
464
|
+
otherDataFrame: { pos: 1, name: 'y' },
|
|
465
|
+
by: { pos: 2, name: 'by' },
|
|
466
|
+
joinAll: { pos: -1, default: false },
|
|
467
|
+
joinLeft: { pos: -1, default: false },
|
|
468
|
+
joinRight: { pos: -1, default: false },
|
|
469
|
+
critical: [{ pos: -1, name: 'keep' }]
|
|
470
|
+
},
|
|
471
|
+
'left_join': {
|
|
472
|
+
dataFrame: { pos: 0, name: 'x' },
|
|
473
|
+
otherDataFrame: { pos: 1, name: 'y' },
|
|
474
|
+
by: { pos: 2, name: 'by' },
|
|
475
|
+
joinAll: { pos: -1, default: false },
|
|
476
|
+
joinLeft: { pos: -1, default: true },
|
|
477
|
+
joinRight: { pos: -1, default: false },
|
|
478
|
+
critical: [{ pos: -1, name: 'keep' }]
|
|
479
|
+
},
|
|
480
|
+
'right_join': {
|
|
481
|
+
dataFrame: { pos: 0, name: 'x' },
|
|
482
|
+
otherDataFrame: { pos: 1, name: 'y' },
|
|
483
|
+
by: { pos: 2, name: 'by' },
|
|
484
|
+
joinAll: { pos: -1, default: false },
|
|
485
|
+
joinLeft: { pos: -1, default: false },
|
|
486
|
+
joinRight: { pos: -1, default: true },
|
|
487
|
+
critical: [{ pos: -1, name: 'keep' }]
|
|
488
|
+
},
|
|
489
|
+
'full_join': {
|
|
490
|
+
dataFrame: { pos: 0, name: 'x' },
|
|
491
|
+
otherDataFrame: { pos: 1, name: 'y' },
|
|
492
|
+
by: { pos: 2, name: 'by' },
|
|
493
|
+
joinAll: { pos: -1, default: true },
|
|
494
|
+
joinLeft: { pos: -1, default: false },
|
|
495
|
+
joinRight: { pos: -1, default: false },
|
|
496
|
+
critical: [{ pos: -1, name: 'keep' }]
|
|
497
|
+
},
|
|
498
|
+
'merge': {
|
|
499
|
+
dataFrame: { pos: 0, name: 'x' },
|
|
500
|
+
otherDataFrame: { pos: 1, name: 'y' },
|
|
501
|
+
by: { pos: 2, name: 'by' },
|
|
502
|
+
joinAll: { pos: 5, name: 'all', default: false },
|
|
503
|
+
joinLeft: { pos: 6, name: 'all.x', default: false },
|
|
504
|
+
joinRight: { pos: 7, name: 'all.y', default: false },
|
|
505
|
+
critical: [
|
|
506
|
+
{ pos: 3, name: 'by.x' },
|
|
507
|
+
{ pos: 4, name: 'by.y' }
|
|
508
|
+
]
|
|
509
|
+
},
|
|
510
|
+
'relocate': {
|
|
511
|
+
dataFrame: { pos: 0, name: '.data' },
|
|
512
|
+
special: ['.before', '.after'],
|
|
513
|
+
disallowNamedArgs: true
|
|
514
|
+
},
|
|
515
|
+
'arrange': {
|
|
516
|
+
dataFrame: { pos: 0, name: '.data' },
|
|
517
|
+
special: ['.by_group', '.locale']
|
|
518
|
+
}
|
|
519
|
+
};
|
|
520
|
+
/**
|
|
521
|
+
* Maps a concrete data frame function call to abstract data frame operations.
|
|
522
|
+
*
|
|
523
|
+
* @param node - The R node of the function call
|
|
524
|
+
* @param dfg - The data flow graph for resolving the arguments
|
|
525
|
+
* @param config - The flowR configuration to use
|
|
526
|
+
* @returns Data frame expression info containing the mapped abstract data frame operations, or `undefined` if the node does not represent a data frame function call
|
|
527
|
+
*/
|
|
528
|
+
function mapDataFrameFunctionCall(node, dfg, config) {
|
|
529
|
+
if (node.type !== type_1.RType.FunctionCall || !node.named) {
|
|
530
|
+
return;
|
|
531
|
+
}
|
|
532
|
+
const resolveInfo = { graph: dfg, idMap: dfg.idMap, full: true, resolve: config_1.VariableResolve.Alias };
|
|
533
|
+
let operations;
|
|
534
|
+
if (isDataFrameFunction(node.functionName.content)) {
|
|
535
|
+
const functionName = node.functionName.content;
|
|
536
|
+
const mapper = DataFrameFunctionMapper[functionName].mapper;
|
|
537
|
+
const params = DataFrameFunctionParamsMapper[functionName];
|
|
538
|
+
const args = (0, arguments_1.getFunctionArguments)(node, dfg);
|
|
539
|
+
if ((0, arguments_1.hasCriticalArgument)(args, params.critical, resolveInfo)) {
|
|
540
|
+
operations = [{ operation: 'unknown', operand: undefined }];
|
|
541
|
+
}
|
|
542
|
+
else {
|
|
543
|
+
operations = mapper(args, params, resolveInfo, config);
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
else {
|
|
547
|
+
const mapping = getOtherDataFrameFunction(node.functionName.content);
|
|
548
|
+
if (mapping === undefined) {
|
|
549
|
+
return;
|
|
550
|
+
}
|
|
551
|
+
else if (mapping.type === 'entry_point') {
|
|
552
|
+
operations = [{ operation: 'unknown', operand: undefined }];
|
|
553
|
+
}
|
|
554
|
+
else if (mapping.type === 'transformation' || mapping.type === 'modification') {
|
|
555
|
+
const args = (0, arguments_1.getFunctionArguments)(node, dfg);
|
|
556
|
+
operations = mapDataFrameUnknown(args, mapping, resolveInfo);
|
|
557
|
+
}
|
|
558
|
+
else {
|
|
559
|
+
(0, assert_1.assertUnreachable)(mapping);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
if (operations !== undefined) {
|
|
563
|
+
return { type: 'expression', operations };
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
function isDataFrameFunction(functionName) {
|
|
567
|
+
// a check with `functionName in DataFrameFunctionMapper` would return true for "toString"
|
|
568
|
+
return Object.prototype.hasOwnProperty.call(DataFrameFunctionMapper, functionName);
|
|
569
|
+
}
|
|
570
|
+
function getOtherDataFrameFunction(functionName) {
|
|
571
|
+
return OtherDataFrameFunctions.find(entry => entry.names.includes(functionName));
|
|
572
|
+
}
|
|
573
|
+
function mapDataFrameCreate(args, params, info) {
|
|
574
|
+
const checkNames = (0, arguments_1.getArgumentValue)(args, params.checkNames, info);
|
|
575
|
+
const noDupNames = (0, arguments_1.getArgumentValue)(args, params.noDupNames, info);
|
|
576
|
+
args = (0, arguments_1.getEffectiveArgs)(args, params.special);
|
|
577
|
+
const argNames = args.map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
|
|
578
|
+
const argLengths = args.map(arg => (0, resolve_args_1.resolveIdToArgVectorLength)(arg, info));
|
|
579
|
+
const allVectors = argLengths.every(assert_1.isNotUndefined);
|
|
580
|
+
const rows = allVectors ? Math.max(...argLengths, 0) : undefined;
|
|
581
|
+
let colnames = argNames;
|
|
582
|
+
// over-approximate the column names if arguments are present but cannot be resolved to values
|
|
583
|
+
if (!allVectors || typeof checkNames !== 'boolean' || typeof noDupNames !== 'boolean') {
|
|
584
|
+
colnames = undefined;
|
|
585
|
+
}
|
|
586
|
+
else if (rows === 0) {
|
|
587
|
+
colnames = [];
|
|
588
|
+
}
|
|
589
|
+
else {
|
|
590
|
+
colnames = (0, arguments_1.filterValidNames)(colnames, checkNames, noDupNames);
|
|
591
|
+
}
|
|
592
|
+
return [{
|
|
593
|
+
operation: 'create',
|
|
594
|
+
operand: undefined,
|
|
595
|
+
colnames,
|
|
596
|
+
rows
|
|
597
|
+
}];
|
|
598
|
+
}
|
|
599
|
+
function mapDataFrameConvert(args, params, info) {
|
|
600
|
+
const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
|
|
601
|
+
if (dataFrame === r_function_call_1.EmptyArgument || dataFrame?.value === undefined) {
|
|
602
|
+
return [{ operation: 'unknown', operand: undefined }];
|
|
603
|
+
}
|
|
604
|
+
return [{
|
|
605
|
+
operation: 'identity',
|
|
606
|
+
operand: dataFrame.value.info.id
|
|
607
|
+
}];
|
|
608
|
+
}
|
|
609
|
+
function mapDataFrameRead(args, params, info, config) {
|
|
610
|
+
const fileNameArg = (0, arguments_1.getFunctionArgument)(args, params.fileName, info);
|
|
611
|
+
const textArg = params.text ? (0, arguments_1.getFunctionArgument)(args, params.text, info) : undefined;
|
|
612
|
+
const { source, request } = getRequestFromRead(fileNameArg, textArg, params, info, config);
|
|
613
|
+
const header = (0, arguments_1.getArgumentValue)(args, params.header, info);
|
|
614
|
+
const separator = (0, arguments_1.getArgumentValue)(args, params.separator, info);
|
|
615
|
+
const quote = (0, arguments_1.getArgumentValue)(args, params.quote, info);
|
|
616
|
+
const comment = (0, arguments_1.getArgumentValue)(args, params.comment, info);
|
|
617
|
+
const skipLines = (0, arguments_1.getArgumentValue)(args, params.skipLines, info);
|
|
618
|
+
const checkNames = (0, arguments_1.getArgumentValue)(args, params.checkNames, info);
|
|
619
|
+
const noDupNames = (0, arguments_1.getArgumentValue)(args, params.noDupNames, info);
|
|
620
|
+
const validArguments = typeof header === 'boolean' && typeof separator === 'string' && typeof quote === 'string' && typeof comment === 'string' &&
|
|
621
|
+
typeof skipLines === 'number' && typeof checkNames === 'boolean' && typeof noDupNames === 'boolean';
|
|
622
|
+
if (request === undefined || !config.abstractInterpretation.dataFrame.readLoadedData.readExternalFiles || !validArguments) {
|
|
623
|
+
return [{
|
|
624
|
+
operation: 'read',
|
|
625
|
+
operand: undefined,
|
|
626
|
+
source,
|
|
627
|
+
colnames: undefined,
|
|
628
|
+
rows: undefined
|
|
629
|
+
}];
|
|
630
|
+
}
|
|
631
|
+
const LineCommentRegex = new RegExp(`\\s*[${(0, arguments_1.escapeRegExp)(comment, true)}].*`);
|
|
632
|
+
let firstLine = undefined;
|
|
633
|
+
let firstLineNumber = 0;
|
|
634
|
+
let rowCount = 0;
|
|
635
|
+
const parseLine = (line, lineNumber) => {
|
|
636
|
+
const text = comment ? line.toString().replace(LineCommentRegex, '') : line.toString();
|
|
637
|
+
if (text.length > 0 && lineNumber >= (skipLines ?? 0)) {
|
|
638
|
+
if (firstLine === undefined) {
|
|
639
|
+
firstLine = getEntriesFromCsvLine(text, separator, quote, comment);
|
|
640
|
+
firstLineNumber = lineNumber;
|
|
641
|
+
}
|
|
642
|
+
if (!header || lineNumber > firstLineNumber) {
|
|
643
|
+
rowCount++;
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
};
|
|
647
|
+
const allLines = parseRequestContent(request, parseLine, config.abstractInterpretation.dataFrame.readLoadedData.maxReadLines);
|
|
648
|
+
let colnames;
|
|
649
|
+
if (header) {
|
|
650
|
+
colnames = (0, arguments_1.filterValidNames)(firstLine, checkNames, noDupNames, params.noEmptyNames);
|
|
651
|
+
}
|
|
652
|
+
else if (firstLine !== undefined) {
|
|
653
|
+
colnames = Array(firstLine.length).fill(undefined);
|
|
654
|
+
}
|
|
655
|
+
return [{
|
|
656
|
+
operation: 'read',
|
|
657
|
+
operand: undefined,
|
|
658
|
+
source,
|
|
659
|
+
colnames,
|
|
660
|
+
rows: allLines ? rowCount : [rowCount, Infinity]
|
|
661
|
+
}];
|
|
662
|
+
}
|
|
663
|
+
function mapDataFrameColBind(args, params, info) {
|
|
664
|
+
args = (0, arguments_1.getEffectiveArgs)(args, params.special);
|
|
665
|
+
const dataFrame = args.find(arg => (0, arguments_1.isDataFrameArgument)(arg, info));
|
|
666
|
+
if (dataFrame === undefined) {
|
|
667
|
+
return;
|
|
668
|
+
}
|
|
669
|
+
else if (args.length === 1) {
|
|
670
|
+
return [{ operation: 'identity', operand: dataFrame.value.info.id }];
|
|
671
|
+
}
|
|
672
|
+
const result = [];
|
|
673
|
+
let operand = dataFrame.value;
|
|
674
|
+
let colnames = [];
|
|
675
|
+
for (const arg of args) {
|
|
676
|
+
if (arg !== dataFrame && arg !== r_function_call_1.EmptyArgument) {
|
|
677
|
+
const otherDataFrame = (0, shape_inference_1.resolveIdToDataFrameShape)(arg.value, info.graph);
|
|
678
|
+
if (otherDataFrame !== undefined) {
|
|
679
|
+
result.push({
|
|
680
|
+
operation: 'concatCols',
|
|
681
|
+
operand: operand?.info.id,
|
|
682
|
+
other: otherDataFrame
|
|
683
|
+
});
|
|
684
|
+
operand = undefined;
|
|
685
|
+
// added columns are top if argument cannot be resolved to constant (vector-like) value
|
|
686
|
+
}
|
|
687
|
+
else if ((0, resolve_args_1.resolveIdToArgValue)(arg, info) !== undefined) {
|
|
688
|
+
const colname = (0, resolve_args_1.resolveIdToArgName)(arg, info);
|
|
689
|
+
colnames?.push(colname);
|
|
690
|
+
}
|
|
691
|
+
else {
|
|
692
|
+
colnames = undefined;
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
if (colnames === undefined || colnames.length > 0) {
|
|
697
|
+
result.push({
|
|
698
|
+
operation: 'addCols',
|
|
699
|
+
operand: operand?.info.id,
|
|
700
|
+
colnames
|
|
701
|
+
});
|
|
702
|
+
}
|
|
703
|
+
return result;
|
|
704
|
+
}
|
|
705
|
+
function mapDataFrameRowBind(args, params, info) {
|
|
706
|
+
args = (0, arguments_1.getEffectiveArgs)(args, params.special);
|
|
707
|
+
const dataFrame = args.find(arg => (0, arguments_1.isDataFrameArgument)(arg, info));
|
|
708
|
+
if (dataFrame === undefined) {
|
|
709
|
+
return;
|
|
710
|
+
}
|
|
711
|
+
else if (args.length === 1) {
|
|
712
|
+
return [{ operation: 'identity', operand: dataFrame.value.info.id }];
|
|
713
|
+
}
|
|
714
|
+
const result = [];
|
|
715
|
+
let operand = dataFrame.value;
|
|
716
|
+
let rows = 0;
|
|
717
|
+
for (const arg of args) {
|
|
718
|
+
if (arg !== dataFrame && arg !== r_function_call_1.EmptyArgument) {
|
|
719
|
+
const otherDataFrame = (0, shape_inference_1.resolveIdToDataFrameShape)(arg.value, info.graph);
|
|
720
|
+
if (otherDataFrame !== undefined) {
|
|
721
|
+
result.push({
|
|
722
|
+
operation: 'concatRows',
|
|
723
|
+
operand: operand?.info.id,
|
|
724
|
+
other: otherDataFrame
|
|
725
|
+
});
|
|
726
|
+
operand = undefined;
|
|
727
|
+
// number of added rows is top if arguments cannot be resolved to constant (vector-like) value
|
|
728
|
+
}
|
|
729
|
+
else if ((0, resolve_args_1.resolveIdToArgValue)(arg, info) !== undefined) {
|
|
730
|
+
rows = rows !== undefined ? rows + 1 : undefined;
|
|
731
|
+
}
|
|
732
|
+
else {
|
|
733
|
+
rows = undefined;
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
if (rows === undefined || rows > 0) {
|
|
738
|
+
result.push({
|
|
739
|
+
operation: 'addRows',
|
|
740
|
+
operand: operand?.info.id,
|
|
741
|
+
rows
|
|
742
|
+
});
|
|
743
|
+
}
|
|
744
|
+
return result;
|
|
745
|
+
}
|
|
746
|
+
function mapDataFrameHeadTail(args, params, info) {
|
|
747
|
+
const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
|
|
748
|
+
if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
const result = [];
|
|
752
|
+
const amount = (0, arguments_1.getArgumentValue)(args, params.amount, info);
|
|
753
|
+
let rows = undefined;
|
|
754
|
+
let cols = undefined;
|
|
755
|
+
if (typeof amount === 'number') {
|
|
756
|
+
rows = amount;
|
|
757
|
+
}
|
|
758
|
+
else if (Array.isArray(amount) && amount.length <= 2 && amount.every(value => typeof value === 'number')) {
|
|
759
|
+
rows = amount[0];
|
|
760
|
+
cols = amount[1];
|
|
761
|
+
}
|
|
762
|
+
result.push({
|
|
763
|
+
operation: rows === undefined || rows >= 0 ? 'subsetRows' : 'removeRows',
|
|
764
|
+
operand: dataFrame.value.info.id,
|
|
765
|
+
rows: rows !== undefined ? Math.abs(rows) : undefined
|
|
766
|
+
});
|
|
767
|
+
if (cols !== undefined) {
|
|
768
|
+
result.push({
|
|
769
|
+
operation: cols >= 0 ? 'subsetCols' : 'removeCols',
|
|
770
|
+
operand: undefined,
|
|
771
|
+
colnames: Array(Math.abs(cols)).fill(undefined)
|
|
772
|
+
});
|
|
773
|
+
}
|
|
774
|
+
return result;
|
|
775
|
+
}
|
|
776
|
+
function mapDataFrameSubset(args, params, info) {
|
|
777
|
+
const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
|
|
778
|
+
if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
|
|
779
|
+
return;
|
|
780
|
+
}
|
|
781
|
+
else if (args.length === 1) {
|
|
782
|
+
return [{ operation: 'identity', operand: dataFrame.value.info.id }];
|
|
783
|
+
}
|
|
784
|
+
const result = [];
|
|
785
|
+
let operand = dataFrame.value;
|
|
786
|
+
const filterArg = (0, arguments_1.getFunctionArgument)(args, params.subset, info);
|
|
787
|
+
const filterValue = (0, resolve_args_1.resolveIdToArgValue)(filterArg, info);
|
|
788
|
+
const selectArg = (0, arguments_1.getFunctionArgument)(args, params.select, info);
|
|
789
|
+
const dropArg = (0, arguments_1.getFunctionArgument)(args, params.drop, info);
|
|
790
|
+
const condition = typeof filterValue === 'boolean' ? filterValue : undefined;
|
|
791
|
+
const filterNames = (0, arguments_1.getUnresolvedSymbolsInExpression)(filterArg, info.graph);
|
|
792
|
+
const { selectedCols, unselectedCols } = getSelectedColumns([selectArg], info);
|
|
793
|
+
const accessedCols = [...filterNames, ...selectedCols ?? [], ...unselectedCols ?? []];
|
|
794
|
+
const mixedAccess = accessedCols.some(col => typeof col === 'string') && accessedCols.some(col => typeof col === 'number');
|
|
795
|
+
const duplicateCols = accessedCols.some((col, index, list) => col !== undefined && list.indexOf(col) !== index);
|
|
796
|
+
if (accessedCols.some(col => typeof col === 'string')) {
|
|
797
|
+
result.push({
|
|
798
|
+
operation: 'accessCols',
|
|
799
|
+
operand: operand?.info.id,
|
|
800
|
+
columns: accessedCols.filter(col => typeof col === 'string')
|
|
801
|
+
});
|
|
802
|
+
}
|
|
803
|
+
if (accessedCols.some(col => typeof col === 'number')) {
|
|
804
|
+
result.push({
|
|
805
|
+
operation: 'accessCols',
|
|
806
|
+
operand: operand?.info.id,
|
|
807
|
+
columns: accessedCols.filter(col => typeof col === 'number').map(Math.abs)
|
|
808
|
+
});
|
|
809
|
+
}
|
|
810
|
+
if (filterArg !== undefined && filterArg !== r_function_call_1.EmptyArgument) {
|
|
811
|
+
result.push({
|
|
812
|
+
operation: 'filterRows',
|
|
813
|
+
operand: operand?.info.id,
|
|
814
|
+
condition: condition
|
|
815
|
+
});
|
|
816
|
+
operand = undefined;
|
|
817
|
+
}
|
|
818
|
+
if (!dropArg || accessedCols.length > 1) {
|
|
819
|
+
if (unselectedCols === undefined || unselectedCols.length > 0) {
|
|
820
|
+
result.push({
|
|
821
|
+
operation: 'removeCols',
|
|
822
|
+
operand: operand?.info.id,
|
|
823
|
+
colnames: unselectedCols?.map(col => typeof col === 'string' ? col : undefined)
|
|
824
|
+
});
|
|
825
|
+
operand = undefined;
|
|
826
|
+
}
|
|
827
|
+
if (selectedCols === undefined || selectedCols.length > 0) {
|
|
828
|
+
result.push({
|
|
829
|
+
operation: 'subsetCols',
|
|
830
|
+
operand: operand?.info.id,
|
|
831
|
+
colnames: selectedCols?.map(col => typeof col === 'string' ? col : undefined),
|
|
832
|
+
...(duplicateCols || mixedAccess ? { options: { duplicateCols: true } } : {})
|
|
833
|
+
});
|
|
834
|
+
operand = undefined;
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
return result;
|
|
838
|
+
}
|
|
839
|
+
function mapDataFrameFilter(args, params, info) {
|
|
840
|
+
args = (0, arguments_1.getEffectiveArgs)(args, params.special);
|
|
841
|
+
const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
|
|
842
|
+
if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
|
|
843
|
+
return;
|
|
844
|
+
}
|
|
845
|
+
else if (args.length === 1) {
|
|
846
|
+
return [{ operation: 'identity', operand: dataFrame.value.info.id }];
|
|
847
|
+
}
|
|
848
|
+
const result = [];
|
|
849
|
+
const filterArgs = args.filter(arg => arg !== dataFrame);
|
|
850
|
+
const filterValues = filterArgs.map(arg => (0, resolve_args_1.resolveIdToArgValue)(arg, info));
|
|
851
|
+
const accessedNames = filterArgs.flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph));
|
|
852
|
+
const condition = filterValues.every(value => typeof value === 'boolean') ? filterValues.every(cond => cond) : undefined;
|
|
853
|
+
if (accessedNames.length > 0) {
|
|
854
|
+
result.push({
|
|
855
|
+
operation: 'accessCols',
|
|
856
|
+
operand: dataFrame.value.info.id,
|
|
857
|
+
columns: accessedNames
|
|
858
|
+
});
|
|
859
|
+
}
|
|
860
|
+
result.push({
|
|
861
|
+
operation: 'filterRows',
|
|
862
|
+
operand: dataFrame.value.info.id,
|
|
863
|
+
condition: condition
|
|
864
|
+
});
|
|
865
|
+
return result;
|
|
866
|
+
}
|
|
867
|
+
function mapDataFrameSelect(args, params, info) {
|
|
868
|
+
args = (0, arguments_1.getEffectiveArgs)(args, params.special);
|
|
869
|
+
const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
|
|
870
|
+
if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
|
|
871
|
+
return;
|
|
872
|
+
}
|
|
873
|
+
const result = [];
|
|
874
|
+
let operand = dataFrame.value;
|
|
875
|
+
const selectArgs = args.filter(arg => arg !== dataFrame);
|
|
876
|
+
let { selectedCols, unselectedCols } = getSelectedColumns(selectArgs, info);
|
|
877
|
+
const accessedCols = [...selectedCols ?? [], ...unselectedCols ?? []];
|
|
878
|
+
const mixedAccess = accessedCols.some(col => typeof col === 'string') && accessedCols.some(col => typeof col === 'number');
|
|
879
|
+
const duplicateAccess = accessedCols.some((col, _, list) => col !== undefined && list.filter(other => other === col).length > 1);
|
|
880
|
+
const renamedCols = selectArgs.some(arguments_1.isNamedArgument);
|
|
881
|
+
// map to top if columns are selected mixed by string and number, or are selected duplicate
|
|
882
|
+
if (mixedAccess || duplicateAccess) {
|
|
883
|
+
selectedCols = undefined;
|
|
884
|
+
unselectedCols = [];
|
|
885
|
+
}
|
|
886
|
+
if (accessedCols.some(col => typeof col === 'string')) {
|
|
887
|
+
result.push({
|
|
888
|
+
operation: 'accessCols',
|
|
889
|
+
operand: operand?.info.id,
|
|
890
|
+
columns: accessedCols.filter(col => typeof col === 'string')
|
|
891
|
+
});
|
|
892
|
+
}
|
|
893
|
+
if (accessedCols.some(col => typeof col === 'number')) {
|
|
894
|
+
result.push({
|
|
895
|
+
operation: 'accessCols',
|
|
896
|
+
operand: operand?.info.id,
|
|
897
|
+
columns: accessedCols.filter(col => typeof col === 'number').map(Math.abs)
|
|
898
|
+
});
|
|
899
|
+
}
|
|
900
|
+
if (unselectedCols === undefined || unselectedCols.length > 0) {
|
|
901
|
+
result.push({
|
|
902
|
+
operation: 'removeCols',
|
|
903
|
+
operand: operand?.info.id,
|
|
904
|
+
colnames: unselectedCols?.map(col => typeof col === 'string' ? col : undefined)
|
|
905
|
+
});
|
|
906
|
+
operand = undefined;
|
|
907
|
+
}
|
|
908
|
+
if (selectedCols === undefined || selectedCols.length > 0 || unselectedCols?.length === 0) {
|
|
909
|
+
result.push({
|
|
910
|
+
operation: 'subsetCols',
|
|
911
|
+
operand: operand?.info.id,
|
|
912
|
+
colnames: selectedCols?.map(col => typeof col === 'string' ? col : undefined),
|
|
913
|
+
...(renamedCols ? { options: { renamedCols: true } } : {})
|
|
914
|
+
});
|
|
915
|
+
operand = undefined;
|
|
916
|
+
}
|
|
917
|
+
return result;
|
|
918
|
+
}
|
|
919
|
+
function mapDataFrameMutate(args, params, info) {
|
|
920
|
+
args = (0, arguments_1.getEffectiveArgs)(args, params.special);
|
|
921
|
+
const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
|
|
922
|
+
if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
|
|
923
|
+
return;
|
|
924
|
+
}
|
|
925
|
+
else if (args.length === 1) {
|
|
926
|
+
return [{ operation: 'identity', operand: dataFrame.value.info.id }];
|
|
927
|
+
}
|
|
928
|
+
const result = [];
|
|
929
|
+
let operand = dataFrame.value;
|
|
930
|
+
const mutateArgs = args.filter(arg => arg !== dataFrame);
|
|
931
|
+
let deletedCols = mutateArgs
|
|
932
|
+
.filter(arguments_1.isRNull)
|
|
933
|
+
.map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
|
|
934
|
+
let mutatedCols = mutateArgs
|
|
935
|
+
.filter(arg => !(0, arguments_1.isRNull)(arg))
|
|
936
|
+
.map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
|
|
937
|
+
// only column names that are not created by mutation are preconditions on the operand
|
|
938
|
+
const accessedNames = mutateArgs
|
|
939
|
+
.flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph))
|
|
940
|
+
.filter(arg => !mutatedCols?.includes(arg));
|
|
941
|
+
deletedCols = (0, arguments_1.filterValidNames)(deletedCols, params.checkNames, params.noDupNames);
|
|
942
|
+
mutatedCols = (0, arguments_1.filterValidNames)(mutatedCols, params.checkNames, params.noDupNames);
|
|
943
|
+
if (accessedNames.length > 0) {
|
|
944
|
+
result.push({
|
|
945
|
+
operation: 'accessCols',
|
|
946
|
+
operand: operand?.info.id,
|
|
947
|
+
columns: accessedNames
|
|
948
|
+
});
|
|
949
|
+
}
|
|
950
|
+
if (deletedCols === undefined || deletedCols.length > 0) {
|
|
951
|
+
result.push({
|
|
952
|
+
operation: 'removeCols',
|
|
953
|
+
operand: operand?.info.id,
|
|
954
|
+
colnames: deletedCols,
|
|
955
|
+
options: { maybe: true }
|
|
956
|
+
});
|
|
957
|
+
operand = undefined;
|
|
958
|
+
}
|
|
959
|
+
if (mutatedCols === undefined || mutatedCols.length > 0 || deletedCols?.length === 0) {
|
|
960
|
+
result.push({
|
|
961
|
+
operation: 'mutateCols',
|
|
962
|
+
operand: operand?.info.id,
|
|
963
|
+
colnames: mutatedCols
|
|
964
|
+
});
|
|
965
|
+
operand = undefined;
|
|
966
|
+
}
|
|
967
|
+
return result;
|
|
968
|
+
}
|
|
969
|
+
function mapDataFrameGroupBy(args, params, info) {
|
|
970
|
+
args = (0, arguments_1.getEffectiveArgs)(args, params.special);
|
|
971
|
+
const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
|
|
972
|
+
if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
|
|
973
|
+
return;
|
|
974
|
+
}
|
|
975
|
+
else if (args.length === 1) {
|
|
976
|
+
return [{ operation: 'identity', operand: dataFrame.value.info.id }];
|
|
977
|
+
}
|
|
978
|
+
const result = [];
|
|
979
|
+
const byArgs = args.filter(arg => arg !== dataFrame);
|
|
980
|
+
const accessedNames = byArgs.flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph));
|
|
981
|
+
const byNames = byArgs.map(arg => (0, arguments_1.isNamedArgument)(arg) ? (0, resolve_args_1.resolveIdToArgName)(arg, info) : (0, resolve_args_1.resolveIdToArgValueSymbolName)(arg, info));
|
|
982
|
+
const mutatedCols = byArgs.some(arguments_1.isNamedArgument) || byNames.some(assert_1.isUndefined);
|
|
983
|
+
if (accessedNames.length > 0) {
|
|
984
|
+
result.push({
|
|
985
|
+
operation: 'accessCols',
|
|
986
|
+
operand: dataFrame.value.info.id,
|
|
987
|
+
columns: accessedNames
|
|
988
|
+
});
|
|
989
|
+
}
|
|
990
|
+
result.push({
|
|
991
|
+
operation: 'groupBy',
|
|
992
|
+
operand: dataFrame.value.info.id,
|
|
993
|
+
by: byNames,
|
|
994
|
+
...(mutatedCols ? { options: { mutatedCols: true } } : {})
|
|
995
|
+
});
|
|
996
|
+
return result;
|
|
997
|
+
}
|
|
998
|
+
function mapDataFrameSummarize(args, params, info) {
|
|
999
|
+
args = (0, arguments_1.getEffectiveArgs)(args, params.special);
|
|
1000
|
+
const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
|
|
1001
|
+
if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
|
|
1002
|
+
return;
|
|
1003
|
+
}
|
|
1004
|
+
const result = [];
|
|
1005
|
+
const summarizeArgs = args.filter(arg => arg !== dataFrame);
|
|
1006
|
+
const accessedNames = summarizeArgs.flatMap(arg => (0, arguments_1.getUnresolvedSymbolsInExpression)(arg, info.graph));
|
|
1007
|
+
const summarizedCols = summarizeArgs.map(arg => (0, resolve_args_1.resolveIdToArgName)(arg, info));
|
|
1008
|
+
if (accessedNames.length > 0) {
|
|
1009
|
+
result.push({
|
|
1010
|
+
operation: 'accessCols',
|
|
1011
|
+
operand: dataFrame.value.info.id,
|
|
1012
|
+
columns: accessedNames
|
|
1013
|
+
});
|
|
1014
|
+
}
|
|
1015
|
+
result.push({
|
|
1016
|
+
operation: 'summarize',
|
|
1017
|
+
operand: dataFrame.value.info.id,
|
|
1018
|
+
colnames: summarizedCols
|
|
1019
|
+
});
|
|
1020
|
+
return result;
|
|
1021
|
+
}
|
|
1022
|
+
function mapDataFrameJoin(args, params, info) {
|
|
1023
|
+
const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
|
|
1024
|
+
const joinAll = (0, arguments_1.getArgumentValue)(args, params.joinAll, info);
|
|
1025
|
+
const joinLeft = (0, arguments_1.getArgumentValue)(args, params.joinLeft, info);
|
|
1026
|
+
const joinRight = (0, arguments_1.getArgumentValue)(args, params.joinRight, info);
|
|
1027
|
+
if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
|
|
1028
|
+
return;
|
|
1029
|
+
}
|
|
1030
|
+
else if (args.length === 1) {
|
|
1031
|
+
return [{ operation: 'identity', operand: dataFrame.value.info.id }];
|
|
1032
|
+
}
|
|
1033
|
+
else if (typeof joinAll !== 'boolean' || typeof joinLeft !== 'boolean' || typeof joinRight !== 'boolean') {
|
|
1034
|
+
return [{ operation: 'unknown', operand: dataFrame.value.info.id }];
|
|
1035
|
+
}
|
|
1036
|
+
const result = [];
|
|
1037
|
+
const otherArg = (0, arguments_1.getFunctionArgument)(args, params.otherDataFrame, info);
|
|
1038
|
+
const byArg = (0, arguments_1.getFunctionArgument)(args, params.by, info);
|
|
1039
|
+
const otherDataFrame = (0, shape_inference_1.resolveIdToDataFrameShape)(otherArg, info.graph) ?? domain_1.DataFrameTop;
|
|
1040
|
+
let byCols;
|
|
1041
|
+
const joinType = getJoinType(joinAll, joinLeft, joinRight);
|
|
1042
|
+
if (byArg !== undefined) {
|
|
1043
|
+
const byValue = (0, resolve_args_1.resolveIdToArgValue)(byArg, info);
|
|
1044
|
+
if (typeof byValue === 'string' || typeof byValue === 'number') {
|
|
1045
|
+
byCols = [byValue];
|
|
1046
|
+
}
|
|
1047
|
+
else if (Array.isArray(byValue) && (byValue.every(by => typeof by === 'string') || byValue.every(by => typeof by === 'number'))) {
|
|
1048
|
+
byCols = byValue;
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
if (byCols?.some(by => typeof by === 'string')) {
|
|
1052
|
+
result.push({
|
|
1053
|
+
operation: 'accessCols',
|
|
1054
|
+
operand: dataFrame.value.info.id,
|
|
1055
|
+
columns: byCols.filter(by => typeof by === 'string')
|
|
1056
|
+
});
|
|
1057
|
+
}
|
|
1058
|
+
if (byCols?.some(by => typeof by === 'number')) {
|
|
1059
|
+
result.push({
|
|
1060
|
+
operation: 'accessCols',
|
|
1061
|
+
operand: dataFrame.value.info.id,
|
|
1062
|
+
columns: byCols.filter(by => typeof by === 'number')
|
|
1063
|
+
});
|
|
1064
|
+
}
|
|
1065
|
+
result.push({
|
|
1066
|
+
operation: 'join',
|
|
1067
|
+
operand: dataFrame.value.info.id,
|
|
1068
|
+
other: otherDataFrame,
|
|
1069
|
+
by: byCols?.map(by => typeof by === 'string' ? by : undefined),
|
|
1070
|
+
options: { join: joinType, natural: byArg === undefined }
|
|
1071
|
+
});
|
|
1072
|
+
return result;
|
|
1073
|
+
}
|
|
1074
|
+
function mapDataFrameIdentity(args, params, info) {
|
|
1075
|
+
args = (0, arguments_1.getEffectiveArgs)(args, params.special);
|
|
1076
|
+
const dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
|
|
1077
|
+
if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
|
|
1078
|
+
return;
|
|
1079
|
+
}
|
|
1080
|
+
else if (params.disallowNamedArgs && args.some(arguments_1.isNamedArgument)) {
|
|
1081
|
+
return [{ operation: 'unknown', operand: dataFrame.value.info.id }];
|
|
1082
|
+
}
|
|
1083
|
+
return [{
|
|
1084
|
+
operation: 'identity',
|
|
1085
|
+
operand: dataFrame.value.info.id
|
|
1086
|
+
}];
|
|
1087
|
+
}
|
|
1088
|
+
function mapDataFrameUnknown(args, params, info) {
|
|
1089
|
+
let dataFrame;
|
|
1090
|
+
if (params.dataFrame !== undefined) {
|
|
1091
|
+
dataFrame = (0, arguments_1.getFunctionArgument)(args, params.dataFrame, info);
|
|
1092
|
+
}
|
|
1093
|
+
else {
|
|
1094
|
+
dataFrame = args.find(arg => (0, arguments_1.isDataFrameArgument)(arg, info));
|
|
1095
|
+
}
|
|
1096
|
+
if (!(0, arguments_1.isDataFrameArgument)(dataFrame, info)) {
|
|
1097
|
+
return;
|
|
1098
|
+
}
|
|
1099
|
+
return [{
|
|
1100
|
+
operation: 'unknown',
|
|
1101
|
+
operand: dataFrame.value.info.id,
|
|
1102
|
+
...(params.constraintType !== undefined ? { type: params.constraintType } : {})
|
|
1103
|
+
}];
|
|
1104
|
+
}
|
|
1105
|
+
function getRequestFromRead(fileNameArg, textArg, params, info, config) {
|
|
1106
|
+
let source;
|
|
1107
|
+
let request;
|
|
1108
|
+
if (fileNameArg !== undefined && fileNameArg !== r_function_call_1.EmptyArgument) {
|
|
1109
|
+
const fileName = (0, resolve_args_1.resolveIdToArgValue)(fileNameArg, info);
|
|
1110
|
+
if (typeof fileName === 'string') {
|
|
1111
|
+
source = fileName;
|
|
1112
|
+
const referenceChain = fileNameArg.info.file ? [(0, retriever_1.requestFromInput)(`file://${fileNameArg.info.file}`)] : [];
|
|
1113
|
+
const sources = (0, built_in_source_1.findSource)(config.solver.resolveSource, fileName, { referenceChain });
|
|
1114
|
+
if (sources?.length === 1) {
|
|
1115
|
+
source = sources[0];
|
|
1116
|
+
// create request from resolved source file path
|
|
1117
|
+
request = (0, built_in_source_1.getSourceProvider)().createRequest(source);
|
|
1118
|
+
}
|
|
1119
|
+
else if (params.text === undefined && (0, resolve_args_1.unescapeSpecialChars)(fileName).includes('\n')) {
|
|
1120
|
+
// create request from string if file name argument contains newline
|
|
1121
|
+
request = (0, retriever_1.requestFromInput)((0, resolve_args_1.unescapeSpecialChars)(fileName));
|
|
1122
|
+
}
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
else if (textArg !== undefined && textArg !== r_function_call_1.EmptyArgument) {
|
|
1126
|
+
const text = (0, resolve_args_1.resolveIdToArgValue)(textArg, info);
|
|
1127
|
+
if (typeof text === 'string') {
|
|
1128
|
+
source = text;
|
|
1129
|
+
request = (0, retriever_1.requestFromInput)((0, resolve_args_1.unescapeSpecialChars)(text));
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
1132
|
+
return { source, request };
|
|
1133
|
+
}
|
|
1134
|
+
function parseRequestContent(request, parser, maxLines) {
|
|
1135
|
+
const requestType = request.request;
|
|
1136
|
+
switch (requestType) {
|
|
1137
|
+
case 'text':
|
|
1138
|
+
request.content.split('\n').forEach(parser);
|
|
1139
|
+
return true;
|
|
1140
|
+
case 'file':
|
|
1141
|
+
return (0, files_1.readLineByLineSync)(request.content, parser, maxLines);
|
|
1142
|
+
default:
|
|
1143
|
+
(0, assert_1.assertUnreachable)(requestType);
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
/**
|
|
1147
|
+
* Gets all entries from a line of a CSV file using a custom separator char, quote char, and comment char
|
|
1148
|
+
*/
|
|
1149
|
+
function getEntriesFromCsvLine(line, sep = ',', quote = '"', comment = '', trim = true) {
|
|
1150
|
+
sep = (0, arguments_1.escapeRegExp)(sep, true); // only allow tokens like `\s`, `\t`, or `\n` in separator, quote, and comment chars
|
|
1151
|
+
quote = (0, arguments_1.escapeRegExp)(quote, true);
|
|
1152
|
+
comment = (0, arguments_1.escapeRegExp)(comment, true);
|
|
1153
|
+
const quantifier = sep === '\\s' ? '+' : '*'; // do not allow unquoted empty entries in whitespace-sparated files
|
|
1154
|
+
const LineCommentRegex = new RegExp(`[${comment}].*`);
|
|
1155
|
+
const CsvEntryRegex = new RegExp(`(?<=^|[${sep}])(?:[${quote}]((?:[^${quote}]|[${quote}]{2})*)[${quote}]|([^${sep}]${quantifier}))`, 'g');
|
|
1156
|
+
const DoubleQuoteRegex = new RegExp(`([${quote}])\\1`, 'g'); // regex for doubled quotes like `""` or `''`
|
|
1157
|
+
return (comment ? line.replace(LineCommentRegex, '') : line)
|
|
1158
|
+
.matchAll(CsvEntryRegex)
|
|
1159
|
+
.map(match => match[1]?.replace(DoubleQuoteRegex, '$1') ?? match[2])
|
|
1160
|
+
.map(entry => trim ? entry.trim() : entry)
|
|
1161
|
+
.toArray();
|
|
1162
|
+
}
|
|
1163
|
+
/**
|
|
1164
|
+
* Resolves all selected columns in a select expression, such as `id`, `"id"`, `1`, `c(id, name)`, `c("id", "name")`, `1:2`, `-id`, `-1`, `-c(id, name)`, `c(-1, -2)`, etc.
|
|
1165
|
+
*/
|
|
1166
|
+
function getSelectedColumns(args, info) {
|
|
1167
|
+
let selectedCols = [];
|
|
1168
|
+
let unselectedCols = [];
|
|
1169
|
+
const joinColumns = (columns1, columns2) => columns1 !== undefined && columns2 !== undefined ? [...columns1, ...columns2] : undefined;
|
|
1170
|
+
for (const arg of args) {
|
|
1171
|
+
if (arg !== undefined && arg !== r_function_call_1.EmptyArgument) {
|
|
1172
|
+
if (arg.value?.type === type_1.RType.FunctionCall && arg.value.named && arg.value.functionName.content === 'c') {
|
|
1173
|
+
const result = getSelectedColumns(arg.value.arguments, info);
|
|
1174
|
+
selectedCols = joinColumns(selectedCols, result.selectedCols);
|
|
1175
|
+
unselectedCols = joinColumns(unselectedCols, result.unselectedCols);
|
|
1176
|
+
}
|
|
1177
|
+
else if (arg.value?.type === type_1.RType.UnaryOp && arg.value.operator === '-' && info.idMap !== undefined) {
|
|
1178
|
+
const result = getSelectedColumns([(0, make_argument_1.toUnnamedArgument)(arg.value.operand, info.idMap)], info);
|
|
1179
|
+
selectedCols = joinColumns(selectedCols, result.unselectedCols);
|
|
1180
|
+
unselectedCols = joinColumns(unselectedCols, result.selectedCols);
|
|
1181
|
+
}
|
|
1182
|
+
else if (arg.value?.type === type_1.RType.BinaryOp && arg.value.operator === ':' && info.idMap !== undefined) {
|
|
1183
|
+
const values = (0, resolve_args_1.resolveIdToArgValue)((0, make_argument_1.toUnnamedArgument)(arg.value, info.idMap), { ...info, resolve: config_1.VariableResolve.Disabled });
|
|
1184
|
+
if (Array.isArray(values) && values.every(value => typeof value === 'number')) {
|
|
1185
|
+
selectedCols = joinColumns(selectedCols, values.filter(value => value >= 0));
|
|
1186
|
+
unselectedCols = joinColumns(unselectedCols, values.filter(value => value < 0).map(Math.abs));
|
|
1187
|
+
}
|
|
1188
|
+
else {
|
|
1189
|
+
selectedCols = undefined;
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
else if (arg.value?.type === type_1.RType.Symbol || arg.value?.type === type_1.RType.String) {
|
|
1193
|
+
selectedCols?.push((0, resolve_args_1.resolveIdToArgValueSymbolName)(arg, info));
|
|
1194
|
+
}
|
|
1195
|
+
else if (arg.value?.type === type_1.RType.Number) {
|
|
1196
|
+
selectedCols?.push(arg.value.content.num);
|
|
1197
|
+
}
|
|
1198
|
+
else {
|
|
1199
|
+
selectedCols = undefined;
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
return { selectedCols, unselectedCols };
|
|
1204
|
+
}
|
|
1205
|
+
function getJoinType(joinAll, joinLeft, joinRight) {
|
|
1206
|
+
if (joinAll || (joinLeft && joinRight)) {
|
|
1207
|
+
return 'full';
|
|
1208
|
+
}
|
|
1209
|
+
else if (joinLeft) {
|
|
1210
|
+
return 'left';
|
|
1211
|
+
}
|
|
1212
|
+
else if (joinRight) {
|
|
1213
|
+
return 'right';
|
|
1214
|
+
}
|
|
1215
|
+
else {
|
|
1216
|
+
return 'inner';
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
//# sourceMappingURL=function-mapper.js.map
|