@eagleoutice/flowr 2.10.2 → 2.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -23
- package/documentation/wiki-cfg.js +3 -3
- package/documentation/wiki-linter.js +1 -1
- package/documentation/wiki-query.js +29 -0
- package/linter/linter-rules.d.ts +12 -12
- package/linter/linter-rules.js +2 -2
- package/linter/rules/network-functions.d.ts +1 -1
- package/linter/rules/network-functions.js +8 -2
- package/linter/rules/{problematic-eval.d.ts → problematic-inputs.d.ts} +15 -17
- package/linter/rules/problematic-inputs.js +111 -0
- package/linter/rules/seeded-randomness.d.ts +1 -1
- package/linter/rules/seeded-randomness.js +8 -1
- package/package.json +4 -4
- package/queries/catalog/call-context-query/call-context-query-executor.js +2 -2
- package/queries/catalog/call-context-query/call-context-query-format.d.ts +1 -1
- package/queries/catalog/call-context-query/call-context-query-format.js +1 -2
- package/queries/catalog/dependencies-query/function-info/read-functions.js +6 -0
- package/queries/catalog/dependencies-query/function-info/write-functions.js +8 -1
- package/queries/catalog/input-sources-query/input-sources-query-executor.js +27 -5
- package/queries/catalog/input-sources-query/input-sources-query-format.js +10 -7
- package/queries/catalog/input-sources-query/simple-input-classifier.d.ts +36 -8
- package/queries/catalog/input-sources-query/simple-input-classifier.js +59 -28
- package/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.d.ts +1 -1
- package/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.js +1 -1
- package/util/version.js +1 -1
- package/linter/rules/problematic-eval.js +0 -83
package/README.md
CHANGED
|
@@ -24,7 +24,7 @@ It offers a wide variety of features, for example:
|
|
|
24
24
|
|
|
25
25
|
```shell
|
|
26
26
|
$ docker run -it --rm eagleoutice/flowr # or npm run flowr
|
|
27
|
-
flowR repl using flowR v2.10.
|
|
27
|
+
flowR repl using flowR v2.10.3, R grammar v14 (tree-sitter engine)
|
|
28
28
|
R> :query @linter "read.csv(\"/root/x.txt\")"
|
|
29
29
|
```
|
|
30
30
|
|
|
@@ -58,7 +58,7 @@ It offers a wide variety of features, for example:
|
|
|
58
58
|
╰ Metadata: consideredNodes: 5, searchTimeMs: 0, processTimeMs: 0
|
|
59
59
|
╰ Useless Loops (useless-loop):
|
|
60
60
|
╰ Metadata: numOfUselessLoops: 0, searchTimeMs: 0, processTimeMs: 0
|
|
61
|
-
╰ Problematic
|
|
61
|
+
╰ Problematic inputs (problematic-inputs):
|
|
62
62
|
╰ Metadata: searchTimeMs: 0, processTimeMs: 0
|
|
63
63
|
╰ Stop without call.=False argument (stop-call):
|
|
64
64
|
╰ Metadata: consideredNodes: 0, searchTimeMs: 0, processTimeMs: 0
|
|
@@ -86,15 +86,15 @@ It offers a wide variety of features, for example:
|
|
|
86
86
|
|
|
87
87
|
_Results (prettified and summarized):_
|
|
88
88
|
|
|
89
|
-
Query: **linter** (
|
|
89
|
+
Query: **linter** (2 ms)\
|
|
90
90
|
╰ **Deprecated Functions** (deprecated-functions):\
|
|
91
|
-
╰ _Metadata_: <code>totalCalls: 0, totalFunctionDefinitions: 0, searchTimeMs:
|
|
91
|
+
╰ _Metadata_: <code>totalCalls: 0, totalFunctionDefinitions: 0, searchTimeMs: 0, processTimeMs: 0</code>\
|
|
92
92
|
╰ **File Path Validity** (file-path-validity):\
|
|
93
93
|
╰ certain:\
|
|
94
94
|
╰ Path `/root/x.txt` at 1.1-23\
|
|
95
|
-
╰ _Metadata_: <code>totalReads: 1, totalUnknown: 0, totalWritesBeforeAlways: 0, totalValid: 0, searchTimeMs:
|
|
95
|
+
╰ _Metadata_: <code>totalReads: 1, totalUnknown: 0, totalWritesBeforeAlways: 0, totalValid: 0, searchTimeMs: 1, processTimeMs: 0</code>\
|
|
96
96
|
╰ **Seeded Randomness** (seeded-randomness):\
|
|
97
|
-
╰ _Metadata_: <code>consumerCalls: 0, callsWithFunctionProducers: 0, callsWithAssignmentProducers: 0, callsWithNonConstantProducers: 0, callsWithOtherBranchProducers: 0, searchTimeMs:
|
|
97
|
+
╰ _Metadata_: <code>consumerCalls: 0, callsWithFunctionProducers: 0, callsWithAssignmentProducers: 0, callsWithNonConstantProducers: 0, callsWithOtherBranchProducers: 0, searchTimeMs: 0, processTimeMs: 0</code>\
|
|
98
98
|
╰ **Absolute Paths** (absolute-file-paths):\
|
|
99
99
|
╰ certain:\
|
|
100
100
|
╰ Path `/root/x.txt` at 1.1-23\
|
|
@@ -104,24 +104,24 @@ It offers a wide variety of features, for example:
|
|
|
104
104
|
╰ **Naming Convention** (naming-convention):\
|
|
105
105
|
╰ _Metadata_: <code>numMatches: 0, numBreak: 0, searchTimeMs: 0, processTimeMs: 0</code>\
|
|
106
106
|
╰ **Network Functions** (network-functions):\
|
|
107
|
-
╰ _Metadata_: <code>totalCalls: 0, totalFunctionDefinitions: 0, searchTimeMs: 0, processTimeMs:
|
|
107
|
+
╰ _Metadata_: <code>totalCalls: 0, totalFunctionDefinitions: 0, searchTimeMs: 0, processTimeMs: 1</code>\
|
|
108
108
|
╰ **Dataframe Access Validation** (dataframe-access-validation):\
|
|
109
|
-
╰ _Metadata_: <code>numOperations: 0, numAccesses: 0, totalAccessed: 0, searchTimeMs: 0, processTimeMs:
|
|
109
|
+
╰ _Metadata_: <code>numOperations: 0, numAccesses: 0, totalAccessed: 0, searchTimeMs: 0, processTimeMs: 0</code>\
|
|
110
110
|
╰ **Dead Code** (dead-code):\
|
|
111
111
|
╰ _Metadata_: <code>consideredNodes: 5, searchTimeMs: 0, processTimeMs: 0</code>\
|
|
112
112
|
╰ **Useless Loops** (useless-loop):\
|
|
113
113
|
╰ _Metadata_: <code>numOfUselessLoops: 0, searchTimeMs: 0, processTimeMs: 0</code>\
|
|
114
|
-
╰ **Problematic
|
|
114
|
+
╰ **Problematic inputs** (problematic-inputs):\
|
|
115
115
|
╰ _Metadata_: <code>searchTimeMs: 0, processTimeMs: 0</code>\
|
|
116
116
|
╰ **Stop without call.=False argument** (stop-call):\
|
|
117
117
|
╰ _Metadata_: <code>consideredNodes: 0, searchTimeMs: 0, processTimeMs: 0</code>\
|
|
118
118
|
╰ **Roxygen Arguments** (roxygen-arguments):\
|
|
119
119
|
╰ _Metadata_: <code>searchTimeMs: 0, processTimeMs: 0</code>\
|
|
120
|
-
_All queries together required ≈
|
|
120
|
+
_All queries together required ≈2 ms (1ms accuracy, total 2 ms)_
|
|
121
121
|
|
|
122
122
|
<details> <summary style="color:gray">Show Detailed Results as Json</summary>
|
|
123
123
|
|
|
124
|
-
The analysis required
|
|
124
|
+
The analysis required _2.2 ms_ (including parsing and normalization and the query) within the generation environment.
|
|
125
125
|
|
|
126
126
|
In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR.
|
|
127
127
|
Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki/Interface) wiki page for more information on how to get those.
|
|
@@ -138,7 +138,7 @@ It offers a wide variety of features, for example:
|
|
|
138
138
|
".meta": {
|
|
139
139
|
"totalCalls": 0,
|
|
140
140
|
"totalFunctionDefinitions": 0,
|
|
141
|
-
"searchTimeMs":
|
|
141
|
+
"searchTimeMs": 0,
|
|
142
142
|
"processTimeMs": 0
|
|
143
143
|
}
|
|
144
144
|
},
|
|
@@ -161,7 +161,7 @@ It offers a wide variety of features, for example:
|
|
|
161
161
|
"totalUnknown": 0,
|
|
162
162
|
"totalWritesBeforeAlways": 0,
|
|
163
163
|
"totalValid": 0,
|
|
164
|
-
"searchTimeMs":
|
|
164
|
+
"searchTimeMs": 1,
|
|
165
165
|
"processTimeMs": 0
|
|
166
166
|
}
|
|
167
167
|
},
|
|
@@ -173,7 +173,7 @@ It offers a wide variety of features, for example:
|
|
|
173
173
|
"callsWithAssignmentProducers": 0,
|
|
174
174
|
"callsWithNonConstantProducers": 0,
|
|
175
175
|
"callsWithOtherBranchProducers": 0,
|
|
176
|
-
"searchTimeMs":
|
|
176
|
+
"searchTimeMs": 0,
|
|
177
177
|
"processTimeMs": 0
|
|
178
178
|
}
|
|
179
179
|
},
|
|
@@ -220,7 +220,7 @@ It offers a wide variety of features, for example:
|
|
|
220
220
|
"totalCalls": 0,
|
|
221
221
|
"totalFunctionDefinitions": 0,
|
|
222
222
|
"searchTimeMs": 0,
|
|
223
|
-
"processTimeMs":
|
|
223
|
+
"processTimeMs": 1
|
|
224
224
|
}
|
|
225
225
|
},
|
|
226
226
|
"dataframe-access-validation": {
|
|
@@ -230,7 +230,7 @@ It offers a wide variety of features, for example:
|
|
|
230
230
|
"numAccesses": 0,
|
|
231
231
|
"totalAccessed": 0,
|
|
232
232
|
"searchTimeMs": 0,
|
|
233
|
-
"processTimeMs":
|
|
233
|
+
"processTimeMs": 0
|
|
234
234
|
}
|
|
235
235
|
},
|
|
236
236
|
"dead-code": {
|
|
@@ -249,7 +249,7 @@ It offers a wide variety of features, for example:
|
|
|
249
249
|
"processTimeMs": 0
|
|
250
250
|
}
|
|
251
251
|
},
|
|
252
|
-
"problematic-
|
|
252
|
+
"problematic-inputs": {
|
|
253
253
|
"results": [],
|
|
254
254
|
".meta": {
|
|
255
255
|
"searchTimeMs": 0,
|
|
@@ -273,11 +273,11 @@ It offers a wide variety of features, for example:
|
|
|
273
273
|
}
|
|
274
274
|
},
|
|
275
275
|
".meta": {
|
|
276
|
-
"timing":
|
|
276
|
+
"timing": 2
|
|
277
277
|
}
|
|
278
278
|
},
|
|
279
279
|
".meta": {
|
|
280
|
-
"timing":
|
|
280
|
+
"timing": 2
|
|
281
281
|
}
|
|
282
282
|
}
|
|
283
283
|
```
|
|
@@ -342,7 +342,7 @@ It offers a wide variety of features, for example:
|
|
|
342
342
|
|
|
343
343
|
```shell
|
|
344
344
|
$ docker run -it --rm eagleoutice/flowr # or npm run flowr
|
|
345
|
-
flowR repl using flowR v2.10.
|
|
345
|
+
flowR repl using flowR v2.10.3, R grammar v14 (tree-sitter engine)
|
|
346
346
|
R> :query @static-slice (11@sum) file://test/testfiles/example.R
|
|
347
347
|
```
|
|
348
348
|
|
|
@@ -390,7 +390,7 @@ It offers a wide variety of features, for example:
|
|
|
390
390
|
|
|
391
391
|
|
|
392
392
|
* 🚀 **fast call-graph, data-, and control-flow graphs**\
|
|
393
|
-
Within just [<i><span title="This measurement is automatically fetched from the latest benchmark!">
|
|
393
|
+
Within just [<i><span title="This measurement is automatically fetched from the latest benchmark!">104.1 ms</span></i> (as of Apr 7, 2026)](https://flowr-analysis.github.io/flowr/wiki/stats/benchmark),
|
|
394
394
|
_flowR_ can analyze the data- and control-flow of the average real-world R script. See the [benchmarks](https://flowr-analysis.github.io/flowr/wiki/stats/benchmark) for more information,
|
|
395
395
|
and consult the [wiki pages](https://github.com/flowr-analysis/flowr/wiki/dataflow-graph) for more details on the [dataflow graphs](https://github.com/flowr-analysis/flowr/wiki/dataflow-graph) as well as [call graphs](https://github.com/flowr-analysis/flowr/wiki/dataflow-graph#perspectives-cg).
|
|
396
396
|
|
|
@@ -426,7 +426,7 @@ It offers a wide variety of features, for example:
|
|
|
426
426
|
|
|
427
427
|
```shell
|
|
428
428
|
$ docker run -it --rm eagleoutice/flowr # or npm run flowr
|
|
429
|
-
flowR repl using flowR v2.10.
|
|
429
|
+
flowR repl using flowR v2.10.3, R grammar v14 (tree-sitter engine)
|
|
430
430
|
R> :dataflow* test/testfiles/example.R
|
|
431
431
|
```
|
|
432
432
|
|
|
@@ -734,7 +734,7 @@ It offers a wide variety of features, for example:
|
|
|
734
734
|
```
|
|
735
735
|
|
|
736
736
|
|
|
737
|
-
(The analysis required
|
|
737
|
+
(The analysis required _1.6 ms_ (including parse and normalize, using the [tree-sitter](https://github.com/flowr-analysis/flowr/wiki/Engines) engine) within the generation environment.)
|
|
738
738
|
|
|
739
739
|
|
|
740
740
|
|
|
@@ -201,7 +201,7 @@ ${(0, doc_structure_1.section)('Structure of the Control Flow Graph', 2, 'cfg-st
|
|
|
201
201
|
|
|
202
202
|
You can produce your very own control flow graph with ${ctx.link(extract_cfg_1.extractCfg)}.
|
|
203
203
|
The ${ctx.link(control_flow_graph_1.ControlFlowGraph)} class describes everything required to model the control flow graph, with its edge types described by
|
|
204
|
-
${ctx.link('CfgEdge')} and its vertices by ${ctx.link('
|
|
204
|
+
${ctx.link('CfgEdge')} and its vertices by ${ctx.link('CfgVertex')}.
|
|
205
205
|
However, you should be aware of the ${ctx.link('ControlFlowInformation')} interface which adds some additional information the CFG
|
|
206
206
|
(and is used during the construction of the CFG as well):
|
|
207
207
|
|
|
@@ -221,7 +221,7 @@ ${Object.entries(control_flow_graph_1.CfgVertexType).map(([key, value]) => `- \`
|
|
|
221
221
|
We use the ${ctx.link('CfgBasicBlockVertex')} to represent [basic blocks](#cfg-basic-blocks) and separate
|
|
222
222
|
expressions (${ctx.link('CfgExpressionVertex')}) and statements (${ctx.link('CfgStatementVertex')})
|
|
223
223
|
as control flow units with and without side effects (if you want to, you can see view statements as effectful expressions).
|
|
224
|
-
The markers (${ctx.link('
|
|
224
|
+
The markers (${ctx.link('CfgMarkerVertex')}) indicate the end of larger expressions/statements.
|
|
225
225
|
|
|
226
226
|
To signal these links, the expressions and statements contain information about the attached markers:
|
|
227
227
|
|
|
@@ -239,7 +239,7 @@ ${(0, doc_structure_1.block)({
|
|
|
239
239
|
content: `
|
|
240
240
|
Every CFG vertex has a ${ctx.link('NodeId')} that links it to the [normalized AST](${doc_files_1.FlowrWikiBaseRef}/Normalized-AST) (although basic blocks will find no counterpart as they are a structuring element of the CFG).
|
|
241
241
|
Additionally, it may provide information on the called functions (in case that the current element is a function call).
|
|
242
|
-
Have a look at the ${ctx.link('
|
|
242
|
+
Have a look at the ${ctx.link('CfgBaseVertexWithMarker')} interface for more information.
|
|
243
243
|
`.trim()
|
|
244
244
|
})}
|
|
245
245
|
|
|
@@ -124,7 +124,7 @@ df[6, "value"]
|
|
|
124
124
|
rule(knownParser, 'useless-loop', 'UselessLoopConfig', 'USELESS_LOOP', 'lint-useless-loop', 'for(i in c(1)) { print(i) }', tagTypes);
|
|
125
125
|
rule(knownParser, 'stop-call', 'StopWithCallConfig', 'STOP_WITH_CALL_ARG', 'lint-stop-call', 'stop(42)', tagTypes);
|
|
126
126
|
rule(knownParser, 'roxygen-arguments', 'RoxygenArgsConfig', 'ROXYGEN_ARGS', 'lint-roxygen-arguments', '#\' A function with two parameters, but only only one documented\n#\' @param a A variable\nf = function(a, b){return a;}', tagTypes);
|
|
127
|
-
rule(knownParser, 'problematic-
|
|
127
|
+
rule(knownParser, 'problematic-inputs', 'ProblematicInputsConfig', 'PROBLEMATIC_INPUTS', 'lint-problematic-inputs', `
|
|
128
128
|
function(x) {
|
|
129
129
|
eval(x)
|
|
130
130
|
}
|
|
@@ -40,6 +40,7 @@ const does_call_query_executor_1 = require("../queries/catalog/does-call-query/d
|
|
|
40
40
|
const inspect_exception_query_executor_1 = require("../queries/catalog/inspect-exceptions-query/inspect-exception-query-executor");
|
|
41
41
|
const slice_direction_1 = require("../util/slice-direction");
|
|
42
42
|
const provenance_query_executor_1 = require("../queries/catalog/provenance-query/provenance-query-executor");
|
|
43
|
+
const input_sources_query_executor_1 = require("../queries/catalog/input-sources-query/input-sources-query-executor");
|
|
43
44
|
(0, doc_query_1.registerQueryDocumentation)('call-context', {
|
|
44
45
|
name: 'Call-Context Query',
|
|
45
46
|
type: 'active',
|
|
@@ -608,6 +609,34 @@ ${await (0, doc_query_1.showQuery)(shell, exampleCode, [{
|
|
|
608
609
|
`;
|
|
609
610
|
}
|
|
610
611
|
});
|
|
612
|
+
(0, doc_query_1.registerQueryDocumentation)('input-sources', {
|
|
613
|
+
name: 'Input Sources Query',
|
|
614
|
+
type: 'active',
|
|
615
|
+
shortDescription: 'Classify the input sources of function calls',
|
|
616
|
+
functionName: input_sources_query_executor_1.executeInputSourcesQuery.name,
|
|
617
|
+
functionFile: '../queries/catalog/input-sources-query/input-sources-query-executor.ts',
|
|
618
|
+
buildExplanation: async (shell) => {
|
|
619
|
+
const exampleCode = `
|
|
620
|
+
f <- function(x) {
|
|
621
|
+
x <- x * 2
|
|
622
|
+
print(x)
|
|
623
|
+
}`.trim();
|
|
624
|
+
const criterion = '3@print';
|
|
625
|
+
return `
|
|
626
|
+
Given a [slicing criterion](${doc_files_1.FlowrWikiBaseRef}/Terminology#slicing-criterion) to
|
|
627
|
+
something like a function call, flowR classifies the types of all input sources (e.g., arguments).
|
|
628
|
+
|
|
629
|
+
To exemplify the query, consider the following code:
|
|
630
|
+
${(0, doc_code_1.codeBlock)('r', exampleCode)}
|
|
631
|
+
If you are interested in the input-sources of the \`print\` call, you can use:
|
|
632
|
+
|
|
633
|
+
${await (0, doc_query_1.showQuery)(shell, exampleCode, [{
|
|
634
|
+
type: 'input-sources',
|
|
635
|
+
criterion
|
|
636
|
+
}], { showCode: false, shorthand: (0, doc_query_1.sliceQueryShorthand)([criterion], (0, doc_escape_1.escapeNewline)(exampleCode)) })}
|
|
637
|
+
`;
|
|
638
|
+
}
|
|
639
|
+
});
|
|
611
640
|
(0, doc_query_1.registerQueryDocumentation)('dependencies', {
|
|
612
641
|
name: 'Dependencies Query',
|
|
613
642
|
type: 'active',
|
package/linter/linter-rules.d.ts
CHANGED
|
@@ -78,7 +78,7 @@ export declare const LintingRules: {
|
|
|
78
78
|
readonly type: "assignment";
|
|
79
79
|
readonly name: ".Random.seed";
|
|
80
80
|
}];
|
|
81
|
-
readonly randomnessConsumers: readonly ["jitter", "sample", "sample.int", "arima.sim", "kmeans", "princomp", "rcauchy", "rchisq", "rexp", "rgamma", "rgeom", "rlnorm", "rlogis", "rmultinom", "rnbinom", "rnorm", "rpois", "runif", "pointLabel", "some", "rbernoulli", "rdunif", "generateSeedVectors"];
|
|
81
|
+
readonly randomnessConsumers: readonly ["jitter", "sample", "sample.int", "arima.sim", "kmeans", "princomp", "rcauchy", "rchisq", "rexp", "rgamma", "rgeom", "rlnorm", "rlogis", "rmultinom", "rnbinom", "rnorm", "rpois", "runif", "pointLabel", "some", "rbernoulli", "rdunif", "generateSeedVectors", "rbeta", "rf", "rhyper", "rweibull", "rt", "rvonmises", "rwilcox", "rxor", "rhyper", "rmvnorm", "rsignrank", "randomForest", "permuted", "permute", "shuffle", "shuffleSet", "data_shuffle", "sample_frac", "sample_n", "slice_sample"];
|
|
82
82
|
};
|
|
83
83
|
readonly tags: readonly [import("./linter-tags").LintingRuleTag.Robustness, import("./linter-tags").LintingRuleTag.Reproducibility];
|
|
84
84
|
readonly certainty: import("./linter-format").LintingRuleCertainty.BestEffort;
|
|
@@ -196,7 +196,7 @@ export declare const LintingRules: {
|
|
|
196
196
|
readonly certainty: import("./linter-format").LintingRuleCertainty.BestEffort;
|
|
197
197
|
readonly description: "Marks network functions that execute network operations, such as downloading files or making HTTP requests.";
|
|
198
198
|
readonly defaultConfig: {
|
|
199
|
-
readonly fns: readonly ["read.table", "read.csv", "read.csv2", "read.delim", "read.delim2", "readRDS", "download.file", "url", "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "content", "handle", "get_callback", "VERB", "fread", "gzcon", "readlines", "source", "load", "curl_download", "curl_fetch_memory", "getURL", "getForm", "read_html", "html_nodes", "html_text", "fromJSON", "read.xlsx", "drive_download", "drive_get", "s3read_using", "s3write_using", "storage_download", "AnnotationHub", "ExperimentHub"];
|
|
199
|
+
readonly fns: readonly ["read.table", "read.csv", "read.csv2", "read.delim", "read.delim2", "readRDS", "download.file", "url", "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "content", "handle", "get_callback", "VERB", "fread", "gzcon", "readlines", "readLines", "source", "load", "curl_download", "curl_fetch_memory", "getURL", "getForm", "read_html", "read_xml", "html_nodes", "html_text", "fromJSON", "read.xlsx", "drive_download", "drive_get", "s3read_using", "s3write_using", "storage_download", "AnnotationHub", "ExperimentHub", "scan", "socketConnection", "request", "curl"];
|
|
200
200
|
readonly onlyTriggerWithArgument: RegExp;
|
|
201
201
|
};
|
|
202
202
|
};
|
|
@@ -281,28 +281,28 @@ export declare const LintingRules: {
|
|
|
281
281
|
};
|
|
282
282
|
};
|
|
283
283
|
};
|
|
284
|
-
readonly 'problematic-
|
|
285
|
-
readonly createSearch: (config: import("./rules/problematic-
|
|
286
|
-
readonly processSearchResult: (elements: import("../search/flowr-search").FlowrSearchElements<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../search/flowr-search").FlowrSearchElement<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation>[]>, _config: import("./rules/problematic-
|
|
284
|
+
readonly 'problematic-inputs': {
|
|
285
|
+
readonly createSearch: (config: import("./rules/problematic-inputs").ProblematicInputsConfig) => import("../search/flowr-search-builder").FlowrSearchBuilder<"from-query", [], import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../search/flowr-search").FlowrSearchElements<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../search/flowr-search").FlowrSearchElement<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation>[]>>;
|
|
286
|
+
readonly processSearchResult: (elements: import("../search/flowr-search").FlowrSearchElements<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../search/flowr-search").FlowrSearchElement<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation>[]>, _config: import("./rules/problematic-inputs").ProblematicInputsConfig, data: {
|
|
287
287
|
normalize: import("../r-bridge/lang-4.x/ast/model/processing/decorate").NormalizedAst;
|
|
288
288
|
dataflow: import("../dataflow/info").DataflowInformation;
|
|
289
289
|
cfg: import("../control-flow/control-flow-graph").ControlFlowInformation;
|
|
290
290
|
analyzer: import("../project/flowr-analyzer").ReadonlyFlowrAnalysisProvider;
|
|
291
291
|
}) => Promise<{
|
|
292
|
-
results: import("./rules/problematic-
|
|
293
|
-
".meta": import("./rules/problematic-
|
|
292
|
+
results: import("./rules/problematic-inputs").ProblematicInputsResult[];
|
|
293
|
+
".meta": import("./rules/problematic-inputs").ProblematicInputsMetadata;
|
|
294
294
|
}>;
|
|
295
295
|
readonly prettyPrint: {
|
|
296
|
-
readonly query: (result: import("./rules/problematic-
|
|
297
|
-
readonly full: (result: import("./rules/problematic-
|
|
296
|
+
readonly query: (result: import("./rules/problematic-inputs").ProblematicInputsResult) => string;
|
|
297
|
+
readonly full: (result: import("./rules/problematic-inputs").ProblematicInputsResult) => string;
|
|
298
298
|
};
|
|
299
299
|
readonly info: {
|
|
300
|
-
readonly name: "Problematic
|
|
301
|
-
readonly description: "Detects uses of eval
|
|
300
|
+
readonly name: "Problematic inputs";
|
|
301
|
+
readonly description: "Detects uses of configured dynamic calls (e.g. eval, system) whose inputs are not statically constant. Prints the computed input-sources for the call and flags usages that depend on non-constant/trusted inputs.";
|
|
302
302
|
readonly tags: readonly [import("./linter-tags").LintingRuleTag.Security, import("./linter-tags").LintingRuleTag.Smell, import("./linter-tags").LintingRuleTag.Readability, import("./linter-tags").LintingRuleTag.Performance];
|
|
303
303
|
readonly certainty: import("./linter-format").LintingRuleCertainty.BestEffort;
|
|
304
304
|
readonly defaultConfig: {
|
|
305
|
-
readonly
|
|
305
|
+
readonly consider: readonly ["^eval$", "^system$", "^system2$", "^shell$"];
|
|
306
306
|
};
|
|
307
307
|
};
|
|
308
308
|
};
|
package/linter/linter-rules.js
CHANGED
|
@@ -13,7 +13,7 @@ const useless_loop_1 = require("./rules/useless-loop");
|
|
|
13
13
|
const network_functions_1 = require("./rules/network-functions");
|
|
14
14
|
const stop_with_call_arg_1 = require("./rules/stop-with-call-arg");
|
|
15
15
|
const roxygen_arguments_1 = require("./rules/roxygen-arguments");
|
|
16
|
-
const
|
|
16
|
+
const problematic_inputs_1 = require("./rules/problematic-inputs");
|
|
17
17
|
/**
|
|
18
18
|
* The registry of currently supported linting rules.
|
|
19
19
|
* A linting rule can be executed on a dataflow pipeline result using {@link executeLintingRule}.
|
|
@@ -29,7 +29,7 @@ exports.LintingRules = {
|
|
|
29
29
|
'dataframe-access-validation': dataframe_access_validation_1.DATA_FRAME_ACCESS_VALIDATION,
|
|
30
30
|
'dead-code': dead_code_1.DEAD_CODE,
|
|
31
31
|
'useless-loop': useless_loop_1.USELESS_LOOP,
|
|
32
|
-
'problematic-
|
|
32
|
+
'problematic-inputs': problematic_inputs_1.PROBLEMATIC_INPUTS,
|
|
33
33
|
'stop-call': stop_with_call_arg_1.STOP_WITH_CALL_ARG,
|
|
34
34
|
'roxygen-arguments': roxygen_arguments_1.ROXYGEN_ARGS
|
|
35
35
|
};
|
|
@@ -31,7 +31,7 @@ export declare const NETWORK_FUNCTIONS: {
|
|
|
31
31
|
readonly certainty: LintingRuleCertainty.BestEffort;
|
|
32
32
|
readonly description: "Marks network functions that execute network operations, such as downloading files or making HTTP requests.";
|
|
33
33
|
readonly defaultConfig: {
|
|
34
|
-
readonly fns: readonly ["read.table", "read.csv", "read.csv2", "read.delim", "read.delim2", "readRDS", "download.file", "url", "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "content", "handle", "get_callback", "VERB", "fread", "gzcon", "readlines", "source", "load", "curl_download", "curl_fetch_memory", "getURL", "getForm", "read_html", "html_nodes", "html_text", "fromJSON", "read.xlsx", "drive_download", "drive_get", "s3read_using", "s3write_using", "storage_download", "AnnotationHub", "ExperimentHub"];
|
|
34
|
+
readonly fns: readonly ["read.table", "read.csv", "read.csv2", "read.delim", "read.delim2", "readRDS", "download.file", "url", "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "content", "handle", "get_callback", "VERB", "fread", "gzcon", "readlines", "readLines", "source", "load", "curl_download", "curl_fetch_memory", "getURL", "getForm", "read_html", "read_xml", "html_nodes", "html_text", "fromJSON", "read.xlsx", "drive_download", "drive_get", "s3read_using", "s3write_using", "storage_download", "AnnotationHub", "ExperimentHub", "scan", "socketConnection", "request", "curl"];
|
|
35
35
|
readonly onlyTriggerWithArgument: RegExp;
|
|
36
36
|
};
|
|
37
37
|
};
|
|
@@ -29,8 +29,14 @@ exports.NETWORK_FUNCTIONS = {
|
|
|
29
29
|
certainty: linter_format_1.LintingRuleCertainty.BestEffort,
|
|
30
30
|
description: 'Marks network functions that execute network operations, such as downloading files or making HTTP requests.',
|
|
31
31
|
defaultConfig: {
|
|
32
|
-
fns: [
|
|
33
|
-
|
|
32
|
+
fns: [
|
|
33
|
+
'read.table', 'read.csv', 'read.csv2', 'read.delim', 'read.delim2', 'readRDS', 'download.file', 'url', 'GET', 'POST', 'PUT',
|
|
34
|
+
'DELETE', 'PATCH', 'HEAD', 'content', 'handle', 'get_callback', 'VERB', 'fread', 'gzcon', 'readlines', 'readLines', 'source', 'load', 'curl_download',
|
|
35
|
+
'curl_fetch_memory', 'getURL', 'getForm', 'read_html', 'read_xml', 'html_nodes', 'html_text', 'fromJSON', 'read.xlsx', 'drive_download', 'drive_get',
|
|
36
|
+
's3read_using', 's3write_using', 'storage_download', 'AnnotationHub', 'ExperimentHub', 'scan',
|
|
37
|
+
'socketConnection', 'request', 'curl'
|
|
38
|
+
],
|
|
39
|
+
onlyTriggerWithArgument: /^(https?|ftps?):\/\//
|
|
34
40
|
}
|
|
35
41
|
}
|
|
36
42
|
};
|
|
@@ -6,39 +6,37 @@ import type { InputSources } from '../../queries/catalog/input-sources-query/sim
|
|
|
6
6
|
/**
|
|
7
7
|
* Describes a linting result for a problematic eval usage, including the location of the eval call and the computed input sources that lead to it.
|
|
8
8
|
*/
|
|
9
|
-
export interface
|
|
9
|
+
export interface ProblematicInputsResult extends LintingResult {
|
|
10
|
+
name: string;
|
|
10
11
|
loc: SourceLocation;
|
|
11
12
|
sources: InputSources;
|
|
12
13
|
}
|
|
13
|
-
export interface
|
|
14
|
-
|
|
15
|
-
* All calls that should be considered to be valid eval entry points, this will be interpreted as a Regex!
|
|
16
|
-
*/
|
|
17
|
-
considerAsEval: string;
|
|
14
|
+
export interface ProblematicInputsConfig extends MergeableRecord {
|
|
15
|
+
consider?: string | string[];
|
|
18
16
|
}
|
|
19
|
-
export type
|
|
20
|
-
export declare const
|
|
21
|
-
readonly createSearch: (config:
|
|
22
|
-
readonly processSearchResult: (elements: import("../../search/flowr-search").FlowrSearchElements<import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../../search/flowr-search").FlowrSearchElement<import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation>[]>, _config:
|
|
17
|
+
export type ProblematicInputsMetadata = MergeableRecord;
|
|
18
|
+
export declare const PROBLEMATIC_INPUTS: {
|
|
19
|
+
readonly createSearch: (config: ProblematicInputsConfig) => import("../../search/flowr-search-builder").FlowrSearchBuilder<"from-query", [], import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../../search/flowr-search").FlowrSearchElements<import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../../search/flowr-search").FlowrSearchElement<import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation>[]>>;
|
|
20
|
+
readonly processSearchResult: (elements: import("../../search/flowr-search").FlowrSearchElements<import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../../search/flowr-search").FlowrSearchElement<import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation>[]>, _config: ProblematicInputsConfig, data: {
|
|
23
21
|
normalize: import("../../r-bridge/lang-4.x/ast/model/processing/decorate").NormalizedAst;
|
|
24
22
|
dataflow: import("../../dataflow/info").DataflowInformation;
|
|
25
23
|
cfg: import("../../control-flow/control-flow-graph").ControlFlowInformation;
|
|
26
24
|
analyzer: import("../../project/flowr-analyzer").ReadonlyFlowrAnalysisProvider;
|
|
27
25
|
}) => Promise<{
|
|
28
|
-
results:
|
|
29
|
-
".meta":
|
|
26
|
+
results: ProblematicInputsResult[];
|
|
27
|
+
".meta": ProblematicInputsMetadata;
|
|
30
28
|
}>;
|
|
31
29
|
readonly prettyPrint: {
|
|
32
|
-
readonly query: (result:
|
|
33
|
-
readonly full: (result:
|
|
30
|
+
readonly query: (result: ProblematicInputsResult) => string;
|
|
31
|
+
readonly full: (result: ProblematicInputsResult) => string;
|
|
34
32
|
};
|
|
35
33
|
readonly info: {
|
|
36
|
-
readonly name: "Problematic
|
|
37
|
-
readonly description: "Detects uses of eval
|
|
34
|
+
readonly name: "Problematic inputs";
|
|
35
|
+
readonly description: "Detects uses of configured dynamic calls (e.g. eval, system) whose inputs are not statically constant. Prints the computed input-sources for the call and flags usages that depend on non-constant/trusted inputs.";
|
|
38
36
|
readonly tags: readonly [LintingRuleTag.Security, LintingRuleTag.Smell, LintingRuleTag.Readability, LintingRuleTag.Performance];
|
|
39
37
|
readonly certainty: LintingRuleCertainty.BestEffort;
|
|
40
38
|
readonly defaultConfig: {
|
|
41
|
-
readonly
|
|
39
|
+
readonly consider: readonly ["^eval$", "^system$", "^system2$", "^shell$"];
|
|
42
40
|
};
|
|
43
41
|
};
|
|
44
42
|
};
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.PROBLEMATIC_INPUTS = void 0;
|
|
4
|
+
const linter_format_1 = require("../linter-format");
|
|
5
|
+
const flowr_search_builder_1 = require("../../search/flowr-search-builder");
|
|
6
|
+
const range_1 = require("../../util/range");
|
|
7
|
+
const linter_tags_1 = require("../linter-tags");
|
|
8
|
+
const simple_input_classifier_1 = require("../../queries/catalog/input-sources-query/simple-input-classifier");
|
|
9
|
+
const parse_1 = require("../../slicing/criterion/parse");
|
|
10
|
+
const defaultConsider = ['^eval$', '^system$', '^system2$', '^shell$'];
|
|
11
|
+
function normalizeConsider(cfg) {
|
|
12
|
+
if (cfg?.consider === undefined) {
|
|
13
|
+
return Array.from(defaultConsider, s => new RegExp(s));
|
|
14
|
+
}
|
|
15
|
+
if (Array.isArray(cfg.consider)) {
|
|
16
|
+
const arr = cfg.consider.length === 0 ? Array.from(defaultConsider) : cfg.consider;
|
|
17
|
+
// deduplicate while preserving order
|
|
18
|
+
return Array.from(new Set(arr), s => new RegExp(s));
|
|
19
|
+
}
|
|
20
|
+
return [new RegExp(cfg.consider)];
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Format a list of input sources either as a single-line string (inline) or a block.
|
|
24
|
+
* - inline: returns a semicolon-separated single-line summary
|
|
25
|
+
* - block: returns an array of lines (to be joined with newlines by the caller)
|
|
26
|
+
*/
|
|
27
|
+
function formatInputSources(inputs, inline = true) {
|
|
28
|
+
if (!inputs || inputs.length === 0) {
|
|
29
|
+
return inline ? '' : [];
|
|
30
|
+
}
|
|
31
|
+
const formatOne = (s, inlineMode) => {
|
|
32
|
+
const typeStr = '[' + s.types.join(',') + ']';
|
|
33
|
+
const cdsStr = s.cds ? ', cds: [' + s.cds.join(',') + ']' : '';
|
|
34
|
+
return inlineMode
|
|
35
|
+
? `${s.id} (type: ${typeStr}, trace: ${s.trace}${cdsStr})`
|
|
36
|
+
: `- ${s.id}: type=${typeStr}, trace=${s.trace}${cdsStr}`;
|
|
37
|
+
};
|
|
38
|
+
if (inline) {
|
|
39
|
+
return inputs.map(s => formatOne(s, true)).join('; ');
|
|
40
|
+
}
|
|
41
|
+
return inputs.map(s => formatOne(s, false));
|
|
42
|
+
}
|
|
43
|
+
// small helpers to keep checks readable
|
|
44
|
+
function hasUnknownSource(sources) {
|
|
45
|
+
return sources.some(s => s.types.includes(simple_input_classifier_1.InputType.Unknown));
|
|
46
|
+
}
|
|
47
|
+
function isProblematicForAllowed(sources, allowed) {
|
|
48
|
+
return sources.some(s => s.types.some(t => !allowed.includes(t)));
|
|
49
|
+
}
|
|
50
|
+
exports.PROBLEMATIC_INPUTS = {
|
|
51
|
+
createSearch: config => {
|
|
52
|
+
const cfg = config;
|
|
53
|
+
const considerArr = normalizeConsider(cfg);
|
|
54
|
+
const queries = considerArr.map((name, i) => ({
|
|
55
|
+
type: 'call-context',
|
|
56
|
+
callName: name,
|
|
57
|
+
callNameExact: false,
|
|
58
|
+
subkind: `fn-${i}`
|
|
59
|
+
}));
|
|
60
|
+
return flowr_search_builder_1.Q.fromQuery(...queries);
|
|
61
|
+
},
|
|
62
|
+
processSearchResult: async (elements, _config, data) => {
|
|
63
|
+
const results = [];
|
|
64
|
+
const defaultAccept = [simple_input_classifier_1.InputType.Constant, simple_input_classifier_1.InputType.DerivedConstant];
|
|
65
|
+
for (const element of elements.getElements()) {
|
|
66
|
+
const nid = element.node.info.id;
|
|
67
|
+
const criterion = parse_1.SlicingCriterion.fromId(nid);
|
|
68
|
+
const q = { type: 'input-sources', criterion };
|
|
69
|
+
const all = await data.analyzer.query([q]);
|
|
70
|
+
const inputSourcesResult = all['input-sources'];
|
|
71
|
+
const sources = inputSourcesResult?.results?.[criterion] ?? [];
|
|
72
|
+
if (isProblematicForAllowed(sources, defaultAccept)) {
|
|
73
|
+
const certainty = hasUnknownSource(sources) ? linter_format_1.LintingResultCertainty.Uncertain : linter_format_1.LintingResultCertainty.Certain;
|
|
74
|
+
results.push({
|
|
75
|
+
involvedId: nid,
|
|
76
|
+
certainty,
|
|
77
|
+
loc: range_1.SourceLocation.fromNode(element.node) ?? range_1.SourceLocation.invalid(),
|
|
78
|
+
name: element.node.lexeme ?? '',
|
|
79
|
+
sources
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return {
|
|
84
|
+
results,
|
|
85
|
+
'.meta': {}
|
|
86
|
+
};
|
|
87
|
+
},
|
|
88
|
+
/* helper to format input sources for pretty printing */
|
|
89
|
+
prettyPrint: {
|
|
90
|
+
[linter_format_1.LintingPrettyPrintContext.Query]: result => {
|
|
91
|
+
const inputs = result.sources ?? [];
|
|
92
|
+
const srcStr = formatInputSources(inputs, true);
|
|
93
|
+
return 'Use of configured dynamic call at ' + range_1.SourceLocation.format(result.loc) + (srcStr ? '; inputs: ' + srcStr : '');
|
|
94
|
+
},
|
|
95
|
+
[linter_format_1.LintingPrettyPrintContext.Full]: result => {
|
|
96
|
+
const inputs = result.sources ?? [];
|
|
97
|
+
const srcLines = formatInputSources(inputs, false);
|
|
98
|
+
return 'Use of configured dynamic call at ' + range_1.SourceLocation.format(result.loc) + ' is potentially problematic' + (srcLines.length ? '\nInputs:\n' + srcLines.join('\n') : '');
|
|
99
|
+
}
|
|
100
|
+
},
|
|
101
|
+
info: {
|
|
102
|
+
name: 'Problematic inputs',
|
|
103
|
+
description: 'Detects uses of configured dynamic calls (e.g. eval, system) whose inputs are not statically constant. Prints the computed input-sources for the call and flags usages that depend on non-constant/trusted inputs.',
|
|
104
|
+
tags: [linter_tags_1.LintingRuleTag.Security, linter_tags_1.LintingRuleTag.Smell, linter_tags_1.LintingRuleTag.Readability, linter_tags_1.LintingRuleTag.Performance],
|
|
105
|
+
certainty: linter_format_1.LintingRuleCertainty.BestEffort,
|
|
106
|
+
defaultConfig: {
|
|
107
|
+
consider: defaultConsider
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
//# sourceMappingURL=problematic-inputs.js.map
|
|
@@ -54,7 +54,7 @@ export declare const SEEDED_RANDOMNESS: {
|
|
|
54
54
|
readonly type: "assignment";
|
|
55
55
|
readonly name: ".Random.seed";
|
|
56
56
|
}];
|
|
57
|
-
readonly randomnessConsumers: readonly ["jitter", "sample", "sample.int", "arima.sim", "kmeans", "princomp", "rcauchy", "rchisq", "rexp", "rgamma", "rgeom", "rlnorm", "rlogis", "rmultinom", "rnbinom", "rnorm", "rpois", "runif", "pointLabel", "some", "rbernoulli", "rdunif", "generateSeedVectors"];
|
|
57
|
+
readonly randomnessConsumers: readonly ["jitter", "sample", "sample.int", "arima.sim", "kmeans", "princomp", "rcauchy", "rchisq", "rexp", "rgamma", "rgeom", "rlnorm", "rlogis", "rmultinom", "rnbinom", "rnorm", "rpois", "runif", "pointLabel", "some", "rbernoulli", "rdunif", "generateSeedVectors", "rbeta", "rf", "rhyper", "rweibull", "rt", "rvonmises", "rwilcox", "rxor", "rhyper", "rmvnorm", "rsignrank", "randomForest", "permuted", "permute", "shuffle", "shuffleSet", "data_shuffle", "sample_frac", "sample_n", "slice_sample"];
|
|
58
58
|
};
|
|
59
59
|
readonly tags: readonly [LintingRuleTag.Robustness, LintingRuleTag.Reproducibility];
|
|
60
60
|
readonly certainty: LintingRuleCertainty.BestEffort;
|
|
@@ -126,7 +126,14 @@ exports.SEEDED_RANDOMNESS = {
|
|
|
126
126
|
info: {
|
|
127
127
|
defaultConfig: {
|
|
128
128
|
randomnessProducers: [{ type: 'function', name: 'set.seed' }, { type: 'assignment', name: '.Random.seed' }],
|
|
129
|
-
randomnessConsumers: [
|
|
129
|
+
randomnessConsumers: [
|
|
130
|
+
'jitter', 'sample', 'sample.int', 'arima.sim', 'kmeans', 'princomp', 'rcauchy', 'rchisq', 'rexp',
|
|
131
|
+
'rgamma', 'rgeom', 'rlnorm', 'rlogis', 'rmultinom', 'rnbinom', 'rnorm', 'rpois', 'runif', 'pointLabel',
|
|
132
|
+
'some', 'rbernoulli', 'rdunif', 'generateSeedVectors',
|
|
133
|
+
'rbeta', 'rf', 'rhyper', 'rweibull', 'rt', 'rvonmises', 'rwilcox', 'rxor', 'rhyper', 'rmvnorm',
|
|
134
|
+
'rsignrank', 'randomForest',
|
|
135
|
+
'permuted', 'permute', 'shuffle', 'shuffleSet', 'data_shuffle', 'sample_frac', 'sample_n', 'slice_sample',
|
|
136
|
+
],
|
|
130
137
|
},
|
|
131
138
|
tags: [linter_tags_1.LintingRuleTag.Robustness, linter_tags_1.LintingRuleTag.Reproducibility],
|
|
132
139
|
// only finds proper randomness producers and consumers due to its config, but will not find all producers/consumers since not all existing deprecated functions will be in the config
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@eagleoutice/flowr",
|
|
3
|
-
"version": "2.10.
|
|
3
|
+
"version": "2.10.4",
|
|
4
4
|
"description": "Static Dataflow Analyzer and Program Slicer for the R Programming Language",
|
|
5
5
|
"types": "dist/src/index.d.ts",
|
|
6
6
|
"repository": {
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
"build": "tsc --project .",
|
|
33
33
|
"build-dev": "npm run build && npm run build:copy-wasm",
|
|
34
34
|
"build:bundle-flowr": "npm run build && esbuild --bundle dist/src/cli/flowr.js --platform=node --tree-shaking=true --minify --external:clipboardy --target=node22 --outfile=dist/src/cli/flowr.min.js && npm run build:copy-wasm",
|
|
35
|
-
"build:copy-wasm": "mkdir -p dist/node_modules/@
|
|
35
|
+
"build:copy-wasm": "mkdir -p dist/node_modules/@davisvaughan/tree-sitter-r/ && mkdir -p dist/node_modules/web-tree-sitter && cp node_modules/@davisvaughan/tree-sitter-r/tree-sitter-r.wasm dist/node_modules/@davisvaughan/tree-sitter-r/ && cp node_modules/web-tree-sitter/tree-sitter.wasm dist/node_modules/web-tree-sitter/",
|
|
36
36
|
"lint-local": "npx eslint --version && npx eslint src/ test/ --rule \"no-warning-comments: off\"",
|
|
37
37
|
"lint": "npm run license-compat -- --summary && npx eslint --version && npx eslint src/ test/",
|
|
38
38
|
"license-compat": "license-checker-rseidelsohn --onlyAllow 'MIT;MIT OR X11;GPLv2;LGPL;GNUGPL;ISC;Apache-2.0;FreeBSD;BSD-2-Clause;clearbsd;ModifiedBSD;BSD-3-Clause;Python-2.0;Unlicense;WTFPL;BlueOak-1.0.0;CC-BY-4.0;CC-BY-3.0;CC0-1.0;0BSD'",
|
|
@@ -167,7 +167,7 @@
|
|
|
167
167
|
"devDependencies": {
|
|
168
168
|
"@commitlint/cli": "^20.5.0",
|
|
169
169
|
"@commitlint/config-angular": "^20.5.0",
|
|
170
|
-
"@eagleoutice/eslint-config-flowr": "^1.0.
|
|
170
|
+
"@eagleoutice/eslint-config-flowr": "^1.0.40",
|
|
171
171
|
"@eslint/eslintrc": "^3.3.3",
|
|
172
172
|
"@eslint/js": "^9.39.2",
|
|
173
173
|
"@j-ulrich/release-it-regex-bumper": "^5.3.0",
|
|
@@ -198,7 +198,7 @@
|
|
|
198
198
|
"vitest": "^3.2.4"
|
|
199
199
|
},
|
|
200
200
|
"dependencies": {
|
|
201
|
-
"@
|
|
201
|
+
"@davisvaughan/tree-sitter-r": "^1.2.0",
|
|
202
202
|
"@jupyterlab/nbformat": "^4.5.4",
|
|
203
203
|
"@xmldom/xmldom": "^0.9.7",
|
|
204
204
|
"clipboardy": "^4.0.0",
|
|
@@ -77,14 +77,14 @@ function promoteQueryCallNames(queries) {
|
|
|
77
77
|
...q.fileFilter,
|
|
78
78
|
filter: promoteCallName(q.fileFilter.filter)
|
|
79
79
|
},
|
|
80
|
-
linkTo: Array.isArray(q.linkTo) ? q.linkTo.map(l => ({
|
|
80
|
+
linkTo: q.linkTo ? Array.isArray(q.linkTo) ? q.linkTo.map(l => ({
|
|
81
81
|
...l,
|
|
82
82
|
callName: promoteCallName(l.callName)
|
|
83
83
|
})) : {
|
|
84
84
|
...q.linkTo,
|
|
85
85
|
/* we have to add another promotion layer whenever we add something without this call name */
|
|
86
86
|
callName: promoteCallName(q.linkTo.callName)
|
|
87
|
-
}
|
|
87
|
+
} : undefined
|
|
88
88
|
};
|
|
89
89
|
}
|
|
90
90
|
else {
|
|
@@ -88,7 +88,7 @@ export type LinkTo<CallName extends CallNameTypes = CallNameTypes, AttachLinkInf
|
|
|
88
88
|
attachLinkInfo?: AttachLinkInfo;
|
|
89
89
|
};
|
|
90
90
|
export interface SubCallContextQueryFormat<CallName extends CallNameTypes = CallNameTypes, AttachLinkInfo = NoInfo> extends DefaultCallContextQueryFormat<CallName> {
|
|
91
|
-
readonly linkTo
|
|
91
|
+
readonly linkTo?: LinkTo<CallName, AttachLinkInfo> | LinkTo<CallName, AttachLinkInfo>[];
|
|
92
92
|
}
|
|
93
93
|
export interface CallContextQuerySubKindResult {
|
|
94
94
|
/** The id of the call vertex identified within the supplied dataflow graph */
|
|
@@ -29,8 +29,7 @@ exports.CallContextQueryDefinition = {
|
|
|
29
29
|
executor: call_context_query_executor_1.executeCallContextQueries,
|
|
30
30
|
asciiSummarizer: async (formatter, analyzer, queryResults, result) => {
|
|
31
31
|
const out = queryResults;
|
|
32
|
-
result.push(`Query: ${(0, ansi_1.bold)('call-context', formatter)} (${(0, time_1.printAsMs)(out['.meta'].timing, 0)})
|
|
33
|
-
result.push((0, query_print_1.asciiCallContext)(formatter, out, (await analyzer.normalize()).idMap));
|
|
32
|
+
result.push(`Query: ${(0, ansi_1.bold)('call-context', formatter)} (${(0, time_1.printAsMs)(out['.meta'].timing, 0)})`, (0, query_print_1.asciiCallContext)(formatter, out, (await analyzer.normalize()).idMap));
|
|
34
33
|
return true;
|
|
35
34
|
},
|
|
36
35
|
schema: joi_1.default.object({
|
|
@@ -108,5 +108,11 @@ exports.ReadFunctions = [
|
|
|
108
108
|
{ package: 'rpolars', name: 'pl_scan_ipc', argIdx: 0, argName: 'source', resolveValue: true, ignoreIf: 'arg-missing' },
|
|
109
109
|
{ package: 'rpolars', name: 'pl_scan_ndjson', argIdx: 0, argName: 'source', resolveValue: true, ignoreIf: 'arg-missing' },
|
|
110
110
|
{ package: 'rpolars', name: 'pl_scan_parquet', argIdx: 0, argName: 'source', resolveValue: true, ignoreIf: 'arg-missing' },
|
|
111
|
+
{ package: 'rio', name: 'import', argIdx: 0, argName: 'file', resolveValue: true },
|
|
112
|
+
{ package: 'rio', name: 'import_list', argIdx: 0, argName: 'file', resolveValue: true },
|
|
113
|
+
{ package: 'openxlsx', name: 'read.xlsx', argIdx: 0, argName: 'file', resolveValue: true },
|
|
114
|
+
{ package: 'openxlsx', name: 'loadWorkbook', argIdx: 0, argName: 'file', resolveValue: true },
|
|
115
|
+
{ package: 'readODS', name: 'read_ods', argIdx: 0, argName: 'path', resolveValue: true },
|
|
116
|
+
{ package: 'vroom', name: 'vroom', argIdx: 0, argName: 'file', resolveValue: true },
|
|
111
117
|
];
|
|
112
118
|
//# sourceMappingURL=read-functions.js.map
|
|
@@ -7,7 +7,7 @@ const OutputRedirects = [
|
|
|
7
7
|
];
|
|
8
8
|
exports.WriteFunctions = [
|
|
9
9
|
{ package: 'base', name: 'save', argName: 'file', resolveValue: true },
|
|
10
|
-
{ package: 'base', name: 'save.image', argIdx: 1, argName: 'file', resolveValue: true },
|
|
10
|
+
{ package: 'base', name: 'save.image', argIdx: 1, argName: 'file', resolveValue: true, defaultValue: '.RData' },
|
|
11
11
|
{ package: 'base', name: 'write', argIdx: 1, argName: 'file', resolveValue: true },
|
|
12
12
|
{ package: 'base', name: 'dput', argIdx: 1, argName: 'file', resolveValue: true },
|
|
13
13
|
{ package: 'base', name: 'dump', argIdx: 1, argName: 'file', resolveValue: true },
|
|
@@ -115,6 +115,13 @@ exports.WriteFunctions = [
|
|
|
115
115
|
{ package: 'rpolars', name: 'write_csv', argIdx: 0, argName: 'file', resolveValue: true, ignoreIf: 'arg-missing' },
|
|
116
116
|
{ package: 'rpolars', name: 'write_ndjson', argIdx: 0, argName: 'file', resolveValue: true, ignoreIf: 'arg-missing' },
|
|
117
117
|
{ package: 'rpolars', name: 'write_parquet', argIdx: 0, argName: 'file', resolveValue: true, ignoreIf: 'arg-missing' },
|
|
118
|
+
{ package: 'data.table', name: 'fwrite', argIdx: 1, argName: 'file', resolveValue: true },
|
|
119
|
+
{ package: 'writexl', name: 'write_xlsx', argIdx: 1, argName: 'path', resolveValue: true },
|
|
120
|
+
{ package: 'openxlsx', name: 'write.xlsx', argIdx: 1, argName: 'file', resolveValue: true },
|
|
121
|
+
{ package: 'vroom', name: 'vroom_write', argIdx: 1, argName: 'file', resolveValue: true },
|
|
122
|
+
{ package: 'vroom', name: 'vroom_write_lines', argIdx: 1, argName: 'file', resolveValue: true },
|
|
123
|
+
{ package: 'rio', name: 'export', argIdx: 1, argName: 'file', resolveValue: true },
|
|
124
|
+
{ package: 'rio', name: 'export_list', argIdx: 1, argName: 'file', resolveValue: true },
|
|
118
125
|
{ package: 'magick', name: 'image_write', argIdx: 1, argName: 'path', resolveValue: true, ignoreIf: 'arg-missing' },
|
|
119
126
|
];
|
|
120
127
|
//# sourceMappingURL=write-functions.js.map
|
|
@@ -28,6 +28,7 @@ async function executeInputSourcesQuery({ analyzer }, queries) {
|
|
|
28
28
|
const fdef = r_function_definition_1.RFunctionDefinition.rootFunctionDefinition(provenanceNode, nast.idMap);
|
|
29
29
|
const provenance = df_helper_1.Dataflow.provenanceGraph(criterionId, df.graph, fdef ? model_1.RNode.collectAllIds(fdef) : undefined);
|
|
30
30
|
results[key] = (0, simple_input_classifier_1.classifyInput)(criterionId, provenance, {
|
|
31
|
+
fullDfg: df.graph,
|
|
31
32
|
networkFns: query.config?.networkFns ?? network_functions_1.NETWORK_FUNCTIONS.info.defaultConfig.fns,
|
|
32
33
|
randomFns: query.config?.randomFns ?? seeded_randomness_1.SEEDED_RANDOMNESS.info.defaultConfig.randomnessConsumers,
|
|
33
34
|
pureFns: query.config?.pureFns ?? ['paste', 'paste0', 'parse', '+', '-', '*',
|
|
@@ -43,17 +44,38 @@ async function executeInputSourcesQuery({ analyzer }, queries) {
|
|
|
43
44
|
'min', 'max', 'range', 'sum', 'prod', 'mean', 'median', 'var', 'sd',
|
|
44
45
|
'head', 'tail', 'seq', 'rep',
|
|
45
46
|
'apply', 'lapply', 'sapply', 'vapply', 'tapply',
|
|
46
|
-
'matrix', 'array',
|
|
47
|
-
'expression', 'call', 'as.call', 'as.expression',
|
|
47
|
+
'matrix', 'array',
|
|
48
48
|
'rownames', 'colnames',
|
|
49
49
|
'list.files', 'tolower', 'toupper', 'printf',
|
|
50
50
|
'<-', '->', '=', '<<-', '->>', 'assign', 'get',
|
|
51
51
|
'[', '[[', '$', 'length<-', 'dim<-', 'names<-', 'colnames<-', 'rownames<-',
|
|
52
|
-
'as.character', 'as.numeric', 'as.logical', 'as.list', 'as.data.frame', 'as.matrix', 'as.array',
|
|
52
|
+
'as.character', 'as.numeric', 'as.logical', 'as.raw', 'as.list', 'as.data.frame', 'as.matrix', 'as.array',
|
|
53
53
|
'identity', 'invisible', 'return', 'force', 'missing',
|
|
54
|
-
'print', 'cat', 'message', 'warning', 'stop'
|
|
54
|
+
'print', 'cat', 'message', 'warning', 'stop',
|
|
55
|
+
'format', 'sprintf', 'formatC',
|
|
56
|
+
'is.na', 'is.null', 'is.numeric', 'is.character',
|
|
57
|
+
'which', 'match', 'order', 'sort', 'unique', 'duplicated', 'na.omit',
|
|
58
|
+
'grep', 'grepl', 'sub', 'gsub', 'regexpr', 'gregexpr', 'regexec', 'regmatches',
|
|
59
|
+
'as.integer', 'as.double', 'as.complex',
|
|
60
|
+
'trimws', 'seq_len', 'seq_along', 'rep.int',
|
|
61
|
+
'pmin', 'pmax', 'cumsum', 'cumprod', 'cummax', 'cummin', 'diff', 'signif',
|
|
62
|
+
'table', 'prop.table', 'xtabs',
|
|
63
|
+
'rbind', 'cbind', 't', 'crossprod', 'tcrossprod',
|
|
64
|
+
'colSums', 'rowSums', 'colMeans', 'rowMeans',
|
|
65
|
+
'solve', 'det', 'eigen',
|
|
66
|
+
'is.factor', 'is.logical', 'is.vector', 'is.matrix', 'is.data.frame',
|
|
67
|
+
],
|
|
68
|
+
readFileFns: query.config?.readFileFns ?? read_functions_1.ReadFunctions.map(f => f.name),
|
|
69
|
+
systemFns: query.config?.systemFns ?? ['system', 'system2', 'pipe', 'shell', 'shell.exec'],
|
|
70
|
+
ffiFns: query.config?.ffiFns ?? ['.C', '.Call', '.Fortran', '.External', 'dyn.load', 'sourceCpp', 'getNativeSymbolInfo'],
|
|
71
|
+
langFns: query.config?.langFns ?? [
|
|
72
|
+
'substitute', 'quote', 'bquote', 'enquote',
|
|
73
|
+
'enexpr', 'enexprs', 'enquo', 'enquos',
|
|
74
|
+
'expression', 'call', 'as.call', 'as.expression',
|
|
75
|
+
'as.name', 'as.symbol', 'alist', 'as.language', 'evalq',
|
|
76
|
+
'expr', 'quo', 'enexpr', 'ensym', 'ensyms'
|
|
55
77
|
],
|
|
56
|
-
|
|
78
|
+
optionsFns: query.config?.optionsFns ?? ['options', 'getOption', 'Sys.getenv']
|
|
57
79
|
});
|
|
58
80
|
}
|
|
59
81
|
return {
|
|
@@ -29,10 +29,10 @@ exports.InputSourcesDefinition = {
|
|
|
29
29
|
const nast = (await analyzer.normalize()).idMap;
|
|
30
30
|
for (const [key, sources] of Object.entries(out.results)) {
|
|
31
31
|
result.push(` ╰ Input Sources for ${key}`);
|
|
32
|
-
for (const { id, trace,
|
|
32
|
+
for (const { id, trace, types } of sources) {
|
|
33
33
|
const kNode = nast.get(id);
|
|
34
34
|
const kLoc = kNode ? range_1.SourceLocation.format(range_1.SourceLocation.fromNode(kNode)) : 'unknown location';
|
|
35
|
-
result.push(` ╰ ${kLoc} (id: ${id}), type: ${JSON.stringify(
|
|
35
|
+
result.push(` ╰ ${kLoc} (id: ${id}), type: ${JSON.stringify(types)}, trace: ${trace}`);
|
|
36
36
|
}
|
|
37
37
|
}
|
|
38
38
|
return true;
|
|
@@ -42,11 +42,14 @@ exports.InputSourcesDefinition = {
|
|
|
42
42
|
type: joi_1.default.string().valid('input-sources').required().description('The type of the query.'),
|
|
43
43
|
criterion: joi_1.default.string().required().description('The slicing criterion to use.'),
|
|
44
44
|
config: joi_1.default.object({
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
45
|
+
pureFns: joi_1.default.array().items(joi_1.default.string()).optional().description('Deterministic/pure functions: functions that preserve constantness of their inputs (e.g., arithmetic, parse).'),
|
|
46
|
+
networkFns: joi_1.default.array().items(joi_1.default.string()).optional().description('Functions that fetch data from the network (e.g., download.file, url connections).'),
|
|
47
|
+
randomFns: joi_1.default.array().items(joi_1.default.string()).optional().description('Functions that produce randomness (e.g., runif, rnorm).'),
|
|
48
|
+
readFileFns: joi_1.default.array().items(joi_1.default.string()).optional().description('Functions that read from the filesystem and produce data (e.g., read.csv, readRDS).'),
|
|
49
|
+
systemFns: joi_1.default.array().items(joi_1.default.string()).optional().description('Functions that execute system commands (e.g., system, system2, shell, pipe).'),
|
|
50
|
+
ffiFns: joi_1.default.array().items(joi_1.default.string()).optional().description('Functions that call native code via the R FFI (.C, .Call, .Fortran, .External, dyn.load).'),
|
|
51
|
+
langFns: joi_1.default.array().items(joi_1.default.string()).optional().description('Functions that produce language objects (e.g., substitute, quote, bquote, expression).'),
|
|
52
|
+
optionsFns: joi_1.default.array().items(joi_1.default.string()).optional().description('Functions that access or set global options (e.g., options, getOption).'),
|
|
50
53
|
}).optional()
|
|
51
54
|
}).description('Input Sources query definition'),
|
|
52
55
|
flattenInvolvedNodes: (queryResults) => {
|
|
@@ -8,13 +8,13 @@ import { Identifier } from '../../../dataflow/environments/identifier';
|
|
|
8
8
|
* joining differing lattice elements.
|
|
9
9
|
*
|
|
10
10
|
*```
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*[Param] [File] [Net]
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
11
|
+
* [ Unknown ]
|
|
12
|
+
* |
|
|
13
|
+
* [Param] [File] [Net], ...
|
|
14
|
+
* |
|
|
15
|
+
* [ DerivedConstant ]
|
|
16
|
+
* |
|
|
17
|
+
* [ Constant ]
|
|
18
18
|
*```
|
|
19
19
|
*
|
|
20
20
|
*/
|
|
@@ -23,6 +23,14 @@ export declare enum InputType {
|
|
|
23
23
|
File = "file",
|
|
24
24
|
Network = "net",
|
|
25
25
|
Random = "rand",
|
|
26
|
+
/** Calls to system/system2 and similar */
|
|
27
|
+
System = "system",
|
|
28
|
+
/** Calls to .C / Fortran interfaces */
|
|
29
|
+
Ffi = "ffi",
|
|
30
|
+
/** Language objects (quote/substitute/etc.) */
|
|
31
|
+
Lang = "lang",
|
|
32
|
+
/** Global options / option accessors (options, getOption) */
|
|
33
|
+
Options = "options",
|
|
26
34
|
Constant = "const",
|
|
27
35
|
/** Read from environment/call scope */
|
|
28
36
|
Scope = "scope",
|
|
@@ -46,7 +54,7 @@ export declare enum InputTraceType {
|
|
|
46
54
|
*/
|
|
47
55
|
export interface InputSource extends MergeableRecord {
|
|
48
56
|
id: NodeId;
|
|
49
|
-
|
|
57
|
+
types: InputType[];
|
|
50
58
|
trace: InputTraceType;
|
|
51
59
|
/** if the trace is affected by control dependencies, they are classified too, this is a duplicate free array */
|
|
52
60
|
cds?: InputType[];
|
|
@@ -81,6 +89,26 @@ export interface InputClassifierConfig extends MergeableRecord {
|
|
|
81
89
|
* Functions that read from the file system
|
|
82
90
|
*/
|
|
83
91
|
readFileFns: readonly InputClassifierFunctionIdentifier[];
|
|
92
|
+
/**
|
|
93
|
+
* Functions that call system utilities (system/system2)
|
|
94
|
+
*/
|
|
95
|
+
systemFns?: readonly InputClassifierFunctionIdentifier[];
|
|
96
|
+
/**
|
|
97
|
+
* Functions that call native code via .C/.Fortran interfaces
|
|
98
|
+
*/
|
|
99
|
+
ffiFns?: readonly InputClassifierFunctionIdentifier[];
|
|
100
|
+
/**
|
|
101
|
+
* Functions that produce language objects such as quote/substitute
|
|
102
|
+
*/
|
|
103
|
+
langFns?: readonly InputClassifierFunctionIdentifier[];
|
|
104
|
+
/**
|
|
105
|
+
* Functions that access or set global options
|
|
106
|
+
*/
|
|
107
|
+
optionsFns?: readonly InputClassifierFunctionIdentifier[];
|
|
108
|
+
/**
|
|
109
|
+
* For the scope escape analysis, pass on the full, non-reduced DFG here
|
|
110
|
+
*/
|
|
111
|
+
fullDfg?: DataflowGraph;
|
|
84
112
|
}
|
|
85
113
|
/**
|
|
86
114
|
* Takes the given id which is expected to either be:
|
|
@@ -6,6 +6,7 @@ const graph_1 = require("../../../dataflow/graph/graph");
|
|
|
6
6
|
const objects_1 = require("../../../util/objects");
|
|
7
7
|
const vertex_1 = require("../../../dataflow/graph/vertex");
|
|
8
8
|
const df_helper_1 = require("../../../dataflow/graph/df-helper");
|
|
9
|
+
const edge_1 = require("../../../dataflow/graph/edge");
|
|
9
10
|
const identifier_1 = require("../../../dataflow/environments/identifier");
|
|
10
11
|
const assert_1 = require("../../../util/assert");
|
|
11
12
|
const arrays_1 = require("../../../util/collections/arrays");
|
|
@@ -18,16 +19,20 @@ class InputClassifier {
|
|
|
18
19
|
this.dfg = dfg;
|
|
19
20
|
this.config = config;
|
|
20
21
|
}
|
|
22
|
+
isDefinedByOnCall(id) {
|
|
23
|
+
const out = (this.config.fullDfg ?? this.dfg).outgoingEdges(id) ?? new Map();
|
|
24
|
+
return out.values().some(e => edge_1.DfEdge.includesType(e, edge_1.EdgeType.DefinedByOnCall));
|
|
25
|
+
}
|
|
21
26
|
classifyEntry(vertex) {
|
|
22
27
|
const cached = this.cache.get(vertex.id);
|
|
23
28
|
if (cached) {
|
|
24
29
|
return cached;
|
|
25
30
|
}
|
|
26
31
|
// insert temporary unknown to break cycles
|
|
27
|
-
this.cache.set(vertex.id, { id: vertex.id,
|
|
32
|
+
this.cache.set(vertex.id, { id: vertex.id, types: [InputType.Unknown], trace: InputTraceType.Unknown });
|
|
28
33
|
switch (vertex.tag) {
|
|
29
34
|
case vertex_1.VertexType.Value:
|
|
30
|
-
return this.classifyCdsAndReturn(vertex, { id: vertex.id,
|
|
35
|
+
return this.classifyCdsAndReturn(vertex, { id: vertex.id, types: [InputType.Constant], trace: InputTraceType.Unknown });
|
|
31
36
|
case vertex_1.VertexType.FunctionCall:
|
|
32
37
|
return this.classifyFunctionCall(vertex);
|
|
33
38
|
case vertex_1.VertexType.VariableDefinition:
|
|
@@ -35,7 +40,7 @@ class InputClassifier {
|
|
|
35
40
|
case vertex_1.VertexType.Use:
|
|
36
41
|
return this.classifyVariable(vertex);
|
|
37
42
|
default:
|
|
38
|
-
return this.classifyCdsAndReturn(vertex, { id: vertex.id,
|
|
43
|
+
return this.classifyCdsAndReturn(vertex, { id: vertex.id, types: [InputType.Unknown], trace: InputTraceType.Unknown });
|
|
39
44
|
}
|
|
40
45
|
}
|
|
41
46
|
classifyFunctionCall(call) {
|
|
@@ -59,17 +64,29 @@ class InputClassifier {
|
|
|
59
64
|
}
|
|
60
65
|
if (!matchesList(call, this.config.pureFns)) {
|
|
61
66
|
if (matchesList(call, this.config.readFileFns)) {
|
|
62
|
-
return this.classifyCdsAndReturn(call, { id: call.id,
|
|
67
|
+
return this.classifyCdsAndReturn(call, { id: call.id, types: [InputType.File], trace: InputTraceType.Unknown });
|
|
63
68
|
}
|
|
64
69
|
else if (matchesList(call, this.config.networkFns)) {
|
|
65
|
-
return this.classifyCdsAndReturn(call, { id: call.id,
|
|
70
|
+
return this.classifyCdsAndReturn(call, { id: call.id, types: [InputType.Network], trace: InputTraceType.Unknown });
|
|
66
71
|
}
|
|
67
72
|
else if (matchesList(call, this.config.randomFns)) {
|
|
68
|
-
return this.classifyCdsAndReturn(call, { id: call.id,
|
|
73
|
+
return this.classifyCdsAndReturn(call, { id: call.id, types: [InputType.Random], trace: InputTraceType.Unknown });
|
|
74
|
+
}
|
|
75
|
+
else if (matchesList(call, this.config.systemFns)) {
|
|
76
|
+
return this.classifyCdsAndReturn(call, { id: call.id, types: [InputType.System], trace: InputTraceType.Unknown });
|
|
77
|
+
}
|
|
78
|
+
else if (matchesList(call, this.config.ffiFns)) {
|
|
79
|
+
return this.classifyCdsAndReturn(call, { id: call.id, types: [InputType.Ffi], trace: InputTraceType.Unknown });
|
|
80
|
+
}
|
|
81
|
+
else if (matchesList(call, this.config.langFns)) {
|
|
82
|
+
return this.classifyCdsAndReturn(call, { id: call.id, types: [InputType.Lang], trace: InputTraceType.Unknown });
|
|
83
|
+
}
|
|
84
|
+
else if (matchesList(call, this.config.optionsFns)) {
|
|
85
|
+
return this.classifyCdsAndReturn(call, { id: call.id, types: [InputType.Options], trace: InputTraceType.Unknown });
|
|
69
86
|
}
|
|
70
87
|
else {
|
|
71
88
|
// if it is not pure, we cannot classify based on the inputs, in that case we do not know!
|
|
72
|
-
return this.classifyCdsAndReturn(call, { id: call.id,
|
|
89
|
+
return this.classifyCdsAndReturn(call, { id: call.id, types: [InputType.Unknown], trace: InputTraceType.Unknown });
|
|
73
90
|
}
|
|
74
91
|
}
|
|
75
92
|
// Otherwise, classify by arguments; pure functions get Known/Pure handling
|
|
@@ -91,7 +108,7 @@ class InputClassifier {
|
|
|
91
108
|
}
|
|
92
109
|
const classified = this.classifyEntry(argVtx);
|
|
93
110
|
// collect all observed types from this argument
|
|
94
|
-
argTypes.push(...classified.
|
|
111
|
+
argTypes.push(...classified.types);
|
|
95
112
|
if (classified.cds) {
|
|
96
113
|
cdTypes.push(...classified.cds);
|
|
97
114
|
}
|
|
@@ -100,34 +117,40 @@ class InputClassifier {
|
|
|
100
117
|
// all arguments only contain constant-like types -> derived constant
|
|
101
118
|
const allConstLike = argTypes.length > 0 && argTypes.every(t => t === InputType.Constant || t === InputType.DerivedConstant);
|
|
102
119
|
if (allConstLike) {
|
|
103
|
-
return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id,
|
|
120
|
+
return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id, types: [InputType.DerivedConstant], trace: InputTraceType.Pure, cds }));
|
|
104
121
|
}
|
|
105
|
-
|
|
106
|
-
return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id,
|
|
122
|
+
argTypes.push(InputType.DerivedConstant);
|
|
123
|
+
return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id, types: (0, arrays_1.uniqueArray)(argTypes), trace: InputTraceType.Known, cds }));
|
|
107
124
|
}
|
|
108
125
|
classifyVariable(vtx) {
|
|
109
126
|
const origins = df_helper_1.Dataflow.origin(this.dfg, vtx.id);
|
|
110
127
|
if (origins === undefined) {
|
|
111
|
-
return this.classifyCdsAndReturn(vtx, { id: vtx.id,
|
|
128
|
+
return this.classifyCdsAndReturn(vtx, { id: vtx.id, types: this.isDefinedByOnCall(vtx.id) ? [InputType.Scope] : [InputType.Unknown], trace: InputTraceType.Unknown });
|
|
112
129
|
}
|
|
113
130
|
const types = [];
|
|
114
131
|
const cds = [];
|
|
115
132
|
let allPure = true;
|
|
116
133
|
for (const o of origins) {
|
|
117
134
|
if (o.type === 4 /* OriginType.ConstantOrigin */) {
|
|
118
|
-
types.push(InputType.
|
|
135
|
+
types.push(InputType.DerivedConstant);
|
|
119
136
|
continue;
|
|
120
137
|
}
|
|
121
138
|
if (o.type === 0 /* OriginType.ReadVariableOrigin */ || o.type === 1 /* OriginType.WriteVariableOrigin */) {
|
|
122
139
|
const v = this.dfg.getVertex(o.id);
|
|
123
140
|
if (v) {
|
|
141
|
+
// if the referenced definition is linked via defined-by-on-call to another
|
|
142
|
+
// id (e.g., a parameter linked to a caller argument), mark it as a Scope origin
|
|
143
|
+
if (this.isDefinedByOnCall(v.id)) {
|
|
144
|
+
types.push(InputType.Scope);
|
|
145
|
+
allPure = false;
|
|
146
|
+
}
|
|
124
147
|
// if this is a variable definition that is a parameter, classify as Parameter
|
|
125
148
|
if (v.tag === vertex_1.VertexType.VariableDefinition && this.dfg.idMap?.get(v.id)?.info.role === "param-n" /* RoleInParent.ParameterName */) {
|
|
126
149
|
types.push(InputType.Parameter);
|
|
127
150
|
continue;
|
|
128
151
|
}
|
|
129
152
|
const c = this.classifyEntry(v);
|
|
130
|
-
types.push(...c.
|
|
153
|
+
types.push(...c.types);
|
|
131
154
|
if (c.cds) {
|
|
132
155
|
cds.push(...c.cds);
|
|
133
156
|
}
|
|
@@ -144,7 +167,7 @@ class InputClassifier {
|
|
|
144
167
|
const v = this.dfg.getVertex(o.id);
|
|
145
168
|
if (v) {
|
|
146
169
|
const c = this.classifyEntry(v);
|
|
147
|
-
types.push(...c.
|
|
170
|
+
types.push(...c.types);
|
|
148
171
|
if (c.cds) {
|
|
149
172
|
cds.push(...c.cds);
|
|
150
173
|
}
|
|
@@ -162,17 +185,17 @@ class InputClassifier {
|
|
|
162
185
|
}
|
|
163
186
|
const t = types.length === 0 ? [InputType.Unknown] : (0, arrays_1.uniqueArray)(types);
|
|
164
187
|
const trace = allPure ? InputTraceType.Pure : InputTraceType.Alias;
|
|
165
|
-
return this.classifyCdsAndReturn(vtx, { id: vtx.id,
|
|
188
|
+
return this.classifyCdsAndReturn(vtx, { id: vtx.id, types: t, trace, cds: cds.length === 0 ? undefined : (0, arrays_1.uniqueArray)(cds) });
|
|
166
189
|
}
|
|
167
190
|
classifyVariableDefinition(vtx) {
|
|
168
191
|
// parameter definitions are classified as Parameter
|
|
169
192
|
if (this.dfg.idMap?.get(vtx.id)?.info.role === "param-n" /* RoleInParent.ParameterName */) {
|
|
170
|
-
return this.classifyCdsAndReturn(vtx, { id: vtx.id,
|
|
193
|
+
return this.classifyCdsAndReturn(vtx, { id: vtx.id, types: [InputType.Parameter], trace: InputTraceType.Unknown });
|
|
171
194
|
}
|
|
172
195
|
const sources = vtx.source;
|
|
173
196
|
if (sources === undefined || sources.length === 0) {
|
|
174
197
|
// fallback to unknown if we cannot find the value
|
|
175
|
-
return this.classifyCdsAndReturn(vtx, { id: vtx.id,
|
|
198
|
+
return this.classifyCdsAndReturn(vtx, { id: vtx.id, types: [InputType.Unknown], trace: InputTraceType.Unknown });
|
|
176
199
|
}
|
|
177
200
|
const types = [];
|
|
178
201
|
const cds = [];
|
|
@@ -181,7 +204,7 @@ class InputClassifier {
|
|
|
181
204
|
const tv = this.dfg.getVertex(tid);
|
|
182
205
|
if (tv) {
|
|
183
206
|
const c = this.classifyEntry(tv);
|
|
184
|
-
types.push(...c.
|
|
207
|
+
types.push(...c.types);
|
|
185
208
|
if (c.cds) {
|
|
186
209
|
cds.push(...c.cds);
|
|
187
210
|
}
|
|
@@ -195,7 +218,7 @@ class InputClassifier {
|
|
|
195
218
|
}
|
|
196
219
|
const t = types.length === 0 ? [InputType.Unknown] : (0, arrays_1.uniqueArray)(types);
|
|
197
220
|
const trace = allPure ? InputTraceType.Pure : InputTraceType.Alias;
|
|
198
|
-
return this.classifyCdsAndReturn(vtx, { id: vtx.id,
|
|
221
|
+
return this.classifyCdsAndReturn(vtx, { id: vtx.id, types: t, trace, cds: cds.length === 0 ? undefined : (0, arrays_1.uniqueArray)(cds) });
|
|
199
222
|
}
|
|
200
223
|
classifyCdsAndReturn(vtx, src) {
|
|
201
224
|
if (vtx.cds) {
|
|
@@ -205,7 +228,7 @@ class InputClassifier {
|
|
|
205
228
|
return undefined;
|
|
206
229
|
}
|
|
207
230
|
const e = this.classifyEntry(cv);
|
|
208
|
-
return e.cds ? [...e.
|
|
231
|
+
return e.cds ? [...e.types, ...e.cds] : [...e.types];
|
|
209
232
|
}).filter(assert_1.isNotUndefined).concat(src.cds ?? []));
|
|
210
233
|
if (cds.length > 0) {
|
|
211
234
|
src.cds = cds;
|
|
@@ -224,13 +247,13 @@ class InputClassifier {
|
|
|
224
247
|
* joining differing lattice elements.
|
|
225
248
|
*
|
|
226
249
|
*```
|
|
227
|
-
*
|
|
228
|
-
*
|
|
229
|
-
*[Param] [File] [Net]
|
|
230
|
-
*
|
|
231
|
-
*
|
|
232
|
-
*
|
|
233
|
-
*
|
|
250
|
+
* [ Unknown ]
|
|
251
|
+
* |
|
|
252
|
+
* [Param] [File] [Net], ...
|
|
253
|
+
* |
|
|
254
|
+
* [ DerivedConstant ]
|
|
255
|
+
* |
|
|
256
|
+
* [ Constant ]
|
|
234
257
|
*```
|
|
235
258
|
*
|
|
236
259
|
*/
|
|
@@ -240,6 +263,14 @@ var InputType;
|
|
|
240
263
|
InputType["File"] = "file";
|
|
241
264
|
InputType["Network"] = "net";
|
|
242
265
|
InputType["Random"] = "rand";
|
|
266
|
+
/** Calls to system/system2 and similar */
|
|
267
|
+
InputType["System"] = "system";
|
|
268
|
+
/** Calls to .C / Fortran interfaces */
|
|
269
|
+
InputType["Ffi"] = "ffi";
|
|
270
|
+
/** Language objects (quote/substitute/etc.) */
|
|
271
|
+
InputType["Lang"] = "lang";
|
|
272
|
+
/** Global options / option accessors (options, getOption) */
|
|
273
|
+
InputType["Options"] = "options";
|
|
243
274
|
InputType["Constant"] = "const";
|
|
244
275
|
/** Read from environment/call scope */
|
|
245
276
|
InputType["Scope"] = "scope";
|
|
@@ -4,7 +4,7 @@ import type { RParseRequest } from '../../retriever';
|
|
|
4
4
|
import type { SyncParser, TreeSitterInformation } from '../../parser';
|
|
5
5
|
import type { TreeSitterEngineConfig } from '../../../config';
|
|
6
6
|
import type { ReadonlyFlowrAnalysisProvider } from '../../../project/flowr-analyzer';
|
|
7
|
-
export declare const DEFAULT_TREE_SITTER_R_WASM_PATH = "./node_modules/@
|
|
7
|
+
export declare const DEFAULT_TREE_SITTER_R_WASM_PATH = "./node_modules/@davisvaughan/tree-sitter-r/tree-sitter-r.wasm";
|
|
8
8
|
export declare const DEFAULT_TREE_SITTER_WASM_PATH = "./node_modules/web-tree-sitter/tree-sitter.wasm";
|
|
9
9
|
/**
|
|
10
10
|
* Synchronous and (way) faster alternative to the {@link RShell} using tree-sitter.
|
|
@@ -7,7 +7,7 @@ exports.TreeSitterExecutor = exports.DEFAULT_TREE_SITTER_WASM_PATH = exports.DEF
|
|
|
7
7
|
const web_tree_sitter_1 = __importDefault(require("web-tree-sitter"));
|
|
8
8
|
const log_1 = require("../../../util/log");
|
|
9
9
|
const fs_1 = __importDefault(require("fs"));
|
|
10
|
-
exports.DEFAULT_TREE_SITTER_R_WASM_PATH = './node_modules/@
|
|
10
|
+
exports.DEFAULT_TREE_SITTER_R_WASM_PATH = './node_modules/@davisvaughan/tree-sitter-r/tree-sitter-r.wasm';
|
|
11
11
|
exports.DEFAULT_TREE_SITTER_WASM_PATH = './node_modules/web-tree-sitter/tree-sitter.wasm';
|
|
12
12
|
const wasmLog = log_1.log.getSubLogger({ name: 'tree-sitter-wasm' });
|
|
13
13
|
/**
|
package/util/version.js
CHANGED
|
@@ -6,7 +6,7 @@ exports.printVersionInformation = printVersionInformation;
|
|
|
6
6
|
const semver_1 = require("semver");
|
|
7
7
|
const assert_1 = require("./assert");
|
|
8
8
|
// this is automatically replaced with the current version by release-it
|
|
9
|
-
const version = '2.10.
|
|
9
|
+
const version = '2.10.4';
|
|
10
10
|
/**
|
|
11
11
|
* Retrieves the current flowR version as a new {@link SemVer} object.
|
|
12
12
|
*/
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.PROBLEMATIC_EVAL = void 0;
|
|
4
|
-
const linter_format_1 = require("../linter-format");
|
|
5
|
-
const flowr_search_builder_1 = require("../../search/flowr-search-builder");
|
|
6
|
-
const range_1 = require("../../util/range");
|
|
7
|
-
const linter_tags_1 = require("../linter-tags");
|
|
8
|
-
const simple_input_classifier_1 = require("../../queries/catalog/input-sources-query/simple-input-classifier");
|
|
9
|
-
const query_1 = require("../../queries/query");
|
|
10
|
-
const parse_1 = require("../../slicing/criterion/parse");
|
|
11
|
-
/**
|
|
12
|
-
* Format a list of input sources either as a single-line string (inline) or a block.
|
|
13
|
-
* - inline: returns a semicolon-separated single-line summary
|
|
14
|
-
* - block: returns an array of lines (to be joined with newlines by the caller)
|
|
15
|
-
*/
|
|
16
|
-
function formatInputSources(inputs, inline = true) {
|
|
17
|
-
if (!inputs || inputs.length === 0) {
|
|
18
|
-
return inline ? '' : [];
|
|
19
|
-
}
|
|
20
|
-
if (inline) {
|
|
21
|
-
return inputs.map(s => `${s.id} (type: ${Array.isArray(s.type) ? '[' + s.type.join(',') + ']' : s.type}, trace: ${s.trace}${s.cds ? ', cds: [' + s.cds.join(',') + ']' : ''})`).join('; ');
|
|
22
|
-
}
|
|
23
|
-
return inputs.map(s => `- ${s.id}: type=${Array.isArray(s.type) ? '[' + s.type.join(',') + ']' : s.type}, trace=${s.trace}${s.cds ? ', cds=[' + s.cds.join(',') + ']' : ''}`);
|
|
24
|
-
}
|
|
25
|
-
exports.PROBLEMATIC_EVAL = {
|
|
26
|
-
/* create a search that finds calls that look like eval-like functions */
|
|
27
|
-
createSearch: config => flowr_search_builder_1.Q.fromQuery({
|
|
28
|
-
type: 'call-context',
|
|
29
|
-
callName: config.considerAsEval,
|
|
30
|
-
callNameExact: false
|
|
31
|
-
}),
|
|
32
|
-
processSearchResult: async (elements, _config, data) => {
|
|
33
|
-
const results = [];
|
|
34
|
-
for (const element of elements.getElements()) {
|
|
35
|
-
const nid = element.node.info.id;
|
|
36
|
-
// run an input-sources query for this eval-like call
|
|
37
|
-
const criterion = parse_1.SlicingCriterion.fromId(nid);
|
|
38
|
-
const q = { type: 'input-sources', criterion };
|
|
39
|
-
const all = await (0, query_1.executeQueries)({ analyzer: data.analyzer }, [q]);
|
|
40
|
-
const inputSourcesResult = all['input-sources'];
|
|
41
|
-
const sources = inputSourcesResult?.results?.[criterion] ?? [];
|
|
42
|
-
// if any input is not a constant or derived constant, flag it
|
|
43
|
-
const problematic = sources.some(s => Array.isArray(s.type)
|
|
44
|
-
? s.type.some(t => t !== simple_input_classifier_1.InputType.Constant && t !== simple_input_classifier_1.InputType.DerivedConstant)
|
|
45
|
-
: (s.type !== simple_input_classifier_1.InputType.Constant && s.type !== simple_input_classifier_1.InputType.DerivedConstant));
|
|
46
|
-
if (problematic) {
|
|
47
|
-
results.push({
|
|
48
|
-
involvedId: nid,
|
|
49
|
-
certainty: sources.some(s => Array.isArray(s.type) ? s.type.includes(simple_input_classifier_1.InputType.Unknown) : s.type === simple_input_classifier_1.InputType.Unknown) ? linter_format_1.LintingResultCertainty.Uncertain : linter_format_1.LintingResultCertainty.Certain,
|
|
50
|
-
loc: range_1.SourceLocation.fromNode(element.node) ?? range_1.SourceLocation.invalid(),
|
|
51
|
-
sources
|
|
52
|
-
});
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
return {
|
|
56
|
-
results,
|
|
57
|
-
'.meta': {}
|
|
58
|
-
};
|
|
59
|
-
},
|
|
60
|
-
/* helper to format input sources for pretty printing */
|
|
61
|
-
prettyPrint: {
|
|
62
|
-
[linter_format_1.LintingPrettyPrintContext.Query]: result => {
|
|
63
|
-
const inputs = result.sources ?? [];
|
|
64
|
-
const srcStr = formatInputSources(inputs, true);
|
|
65
|
-
return `Use of eval-like function at ${range_1.SourceLocation.format(result.loc)}${srcStr ? `; inputs: ${srcStr}` : ''}`;
|
|
66
|
-
},
|
|
67
|
-
[linter_format_1.LintingPrettyPrintContext.Full]: result => {
|
|
68
|
-
const inputs = result.sources ?? [];
|
|
69
|
-
const srcLines = formatInputSources(inputs, false);
|
|
70
|
-
return `Use of eval-like function at ${range_1.SourceLocation.format(result.loc)} is potentially problematic${srcLines.length ? '\nInputs:\n' + srcLines.join('\n') : ''}`;
|
|
71
|
-
}
|
|
72
|
-
},
|
|
73
|
-
info: {
|
|
74
|
-
name: 'Problematic eval',
|
|
75
|
-
description: 'Detects uses of eval-like functions whose inputs are not statically constant. Prints the computed input-sources for the eval and flags usages that depend on non-constant/trusted inputs.',
|
|
76
|
-
tags: [linter_tags_1.LintingRuleTag.Security, linter_tags_1.LintingRuleTag.Smell, linter_tags_1.LintingRuleTag.Readability, linter_tags_1.LintingRuleTag.Performance],
|
|
77
|
-
certainty: linter_format_1.LintingRuleCertainty.BestEffort,
|
|
78
|
-
defaultConfig: {
|
|
79
|
-
considerAsEval: '^eval$'
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
};
|
|
83
|
-
//# sourceMappingURL=problematic-eval.js.map
|