@eagleoutice/flowr 2.10.3 → 2.10.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -26
- package/abstract-interpretation/absint-visitor.d.ts +17 -21
- package/abstract-interpretation/absint-visitor.js +47 -48
- package/abstract-interpretation/data-frame/dataframe-domain.d.ts +0 -3
- package/abstract-interpretation/data-frame/shape-inference.d.ts +2 -1
- package/abstract-interpretation/data-frame/shape-inference.js +5 -4
- package/abstract-interpretation/domains/abstract-domain.d.ts +17 -16
- package/abstract-interpretation/domains/abstract-domain.js +25 -27
- package/abstract-interpretation/domains/bounded-set-domain.js +1 -1
- package/abstract-interpretation/domains/multi-value-state-domain.d.ts +32 -0
- package/abstract-interpretation/domains/multi-value-state-domain.js +60 -0
- package/abstract-interpretation/domains/partial-product-domain.d.ts +43 -0
- package/abstract-interpretation/domains/partial-product-domain.js +163 -0
- package/abstract-interpretation/domains/product-domain.d.ts +2 -29
- package/abstract-interpretation/domains/product-domain.js +6 -123
- package/abstract-interpretation/domains/set-range-domain.js +3 -3
- package/abstract-interpretation/domains/set-upper-bound-domain.js +1 -1
- package/abstract-interpretation/domains/singleton-domain.js +1 -1
- package/abstract-interpretation/domains/state-abstract-domain.d.ts +13 -28
- package/abstract-interpretation/domains/state-abstract-domain.js +16 -38
- package/abstract-interpretation/domains/state-domain-like.d.ts +36 -0
- package/abstract-interpretation/domains/state-domain-like.js +3 -0
- package/cli/flowr.js +11 -1
- package/config.d.ts +7 -0
- package/config.js +22 -3
- package/control-flow/semantic-cfg-guided-visitor.d.ts +4 -0
- package/control-flow/semantic-cfg-guided-visitor.js +20 -32
- package/dataflow/environments/default-builtin-config.d.ts +10 -0
- package/dataflow/environments/default-builtin-config.js +2 -1
- package/dataflow/internal/process/functions/call/built-in/built-in-eval.d.ts +2 -0
- package/dataflow/internal/process/functions/call/built-in/built-in-eval.js +38 -21
- package/documentation/doc-readme.js +13 -2
- package/documentation/wiki-absint.d.ts +1 -2
- package/documentation/wiki-absint.js +34 -10
- package/documentation/wiki-analyzer.js +3 -4
- package/documentation/wiki-interface.js +21 -16
- package/documentation/wiki-linter.js +1 -1
- package/linter/linter-rules.d.ts +12 -12
- package/linter/linter-rules.js +2 -2
- package/linter/rules/network-functions.d.ts +1 -1
- package/linter/rules/network-functions.js +8 -2
- package/linter/rules/problematic-inputs.d.ts +43 -0
- package/linter/rules/problematic-inputs.js +110 -0
- package/linter/rules/seeded-randomness.d.ts +1 -1
- package/linter/rules/seeded-randomness.js +8 -1
- package/package.json +4 -4
- package/project/flowr-analyzer-builder.d.ts +6 -3
- package/project/flowr-analyzer-builder.js +12 -5
- package/project/plugins/file-plugins/files/flowr-rmarkdown-file.d.ts +4 -3
- package/project/plugins/file-plugins/files/flowr-rmarkdown-file.js +17 -4
- package/project/plugins/flowr-analyzer-plugin.d.ts +1 -1
- package/project/plugins/flowr-analyzer-plugin.js +1 -1
- package/queries/catalog/call-context-query/call-context-query-executor.js +2 -2
- package/queries/catalog/call-context-query/call-context-query-format.d.ts +1 -1
- package/queries/catalog/call-context-query/call-context-query-format.js +1 -2
- package/queries/catalog/dependencies-query/function-info/read-functions.js +6 -0
- package/queries/catalog/dependencies-query/function-info/write-functions.js +7 -0
- package/queries/catalog/input-sources-query/input-source-functions.d.ts +6 -0
- package/queries/catalog/input-sources-query/input-source-functions.js +50 -0
- package/queries/catalog/input-sources-query/input-sources-query-executor.d.ts +1 -1
- package/queries/catalog/input-sources-query/input-sources-query-executor.js +19 -31
- package/queries/catalog/input-sources-query/input-sources-query-format.d.ts +2 -1
- package/queries/catalog/input-sources-query/input-sources-query-format.js +26 -8
- package/queries/catalog/input-sources-query/simple-input-classifier.d.ts +33 -28
- package/queries/catalog/input-sources-query/simple-input-classifier.js +192 -99
- package/r-bridge/lang-4.x/ast/model/model.d.ts +4 -4
- package/r-bridge/lang-4.x/ast/model/nodes/r-access.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-argument.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-binary-op.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-break.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-comment.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-expression-list.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-for-loop.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-function-call.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-function-definition.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-if-then-else.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-line-directive.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-logical.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-next.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-number.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-parameter.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-pipe.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-repeat-loop.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-string.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-symbol.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-unary-op.d.ts +3 -3
- package/r-bridge/lang-4.x/ast/model/nodes/r-while-loop.d.ts +3 -3
- package/util/record.d.ts +18 -3
- package/util/record.js +22 -1
- package/util/version.js +1 -1
- package/linter/rules/problematic-eval.d.ts +0 -44
- package/linter/rules/problematic-eval.js +0 -83
- package/project/plugins/flowr-analyzer-plugin-defaults.d.ts +0 -5
- package/project/plugins/flowr-analyzer-plugin-defaults.js +0 -37
|
@@ -3,8 +3,10 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.WikiAbsint = void 0;
|
|
4
4
|
const absint_visitor_1 = require("../abstract-interpretation/absint-visitor");
|
|
5
5
|
const abstract_domain_1 = require("../abstract-interpretation/domains/abstract-domain");
|
|
6
|
+
const bounded_set_domain_1 = require("../abstract-interpretation/domains/bounded-set-domain");
|
|
6
7
|
const interval_domain_1 = require("../abstract-interpretation/domains/interval-domain");
|
|
7
8
|
const lattice_1 = require("../abstract-interpretation/domains/lattice");
|
|
9
|
+
const multi_value_state_domain_1 = require("../abstract-interpretation/domains/multi-value-state-domain");
|
|
8
10
|
const state_abstract_domain_1 = require("../abstract-interpretation/domains/state-abstract-domain");
|
|
9
11
|
const semantic_cfg_guided_visitor_1 = require("../control-flow/semantic-cfg-guided-visitor");
|
|
10
12
|
const identifier_1 = require("../dataflow/environments/identifier");
|
|
@@ -15,10 +17,13 @@ const doc_code_1 = require("./doc-util/doc-code");
|
|
|
15
17
|
const doc_structure_1 = require("./doc-util/doc-structure");
|
|
16
18
|
const doc_maker_1 = require("./wiki-mk/doc-maker");
|
|
17
19
|
class IntervalInferenceVisitor extends absint_visitor_1.AbstractInterpretationVisitor {
|
|
20
|
+
constructor(config) {
|
|
21
|
+
super(config, state_abstract_domain_1.StateAbstractDomain.top(interval_domain_1.IntervalDomain.top()));
|
|
22
|
+
}
|
|
18
23
|
onNumberConstant({ vertex, node }) {
|
|
19
24
|
super.onNumberConstant({ vertex, node });
|
|
20
25
|
const interval = new interval_domain_1.IntervalDomain([node.content.num, node.content.num]);
|
|
21
|
-
this.
|
|
26
|
+
this.currentState.set(node.info.id, interval);
|
|
22
27
|
}
|
|
23
28
|
onFunctionCall({ call }) {
|
|
24
29
|
super.onFunctionCall({ call });
|
|
@@ -32,9 +37,9 @@ class IntervalInferenceVisitor extends absint_visitor_1.AbstractInterpretationVi
|
|
|
32
37
|
// We map the numerical operation to the resulting interval after applying the abstract semantics of the operation
|
|
33
38
|
switch (identifier_1.Identifier.getName(call.name)) {
|
|
34
39
|
case '+':
|
|
35
|
-
return this.
|
|
40
|
+
return this.currentState.set(call.id, left.add(right));
|
|
36
41
|
case '-':
|
|
37
|
-
return this.
|
|
42
|
+
return this.currentState.set(call.id, left.subtract(right));
|
|
38
43
|
}
|
|
39
44
|
}
|
|
40
45
|
}
|
|
@@ -52,7 +57,7 @@ async function inferIntervals() {
|
|
|
52
57
|
const dfg = (await analyzer.dataflow()).graph;
|
|
53
58
|
const cfg = await analyzer.controlflow(undefined, cfg_kind_1.CfgKind.NoFunctionDefs);
|
|
54
59
|
const ctx = analyzer.inspectContext();
|
|
55
|
-
const inference = new IntervalInferenceVisitor({ controlFlow: cfg, dfg: dfg, normalizedAst: ast, ctx: ctx }
|
|
60
|
+
const inference = new IntervalInferenceVisitor({ controlFlow: cfg, dfg: dfg, normalizedAst: ast, ctx: ctx });
|
|
56
61
|
inference.start();
|
|
57
62
|
const result = inference.getEndState();
|
|
58
63
|
return result.isValue() ? result.value.entries().toArray()
|
|
@@ -63,6 +68,21 @@ function nodeIdToSlicingCriterion(id, idMap) {
|
|
|
63
68
|
const node = idMap.get(id);
|
|
64
69
|
return `${node?.location?.[0]}@${node?.lexeme}`;
|
|
65
70
|
}
|
|
71
|
+
function multiValueExample() {
|
|
72
|
+
const domain = {
|
|
73
|
+
number: new interval_domain_1.IntervalDomain(interval_domain_1.IntervalTop),
|
|
74
|
+
string: new bounded_set_domain_1.BoundedSetDomain(lattice_1.Top)
|
|
75
|
+
};
|
|
76
|
+
const reduction = ({ number, string }) => {
|
|
77
|
+
if (number?.isBottom() || string?.isBottom()) {
|
|
78
|
+
return { number: domain.number.bottom(), string: domain.string.bottom() };
|
|
79
|
+
}
|
|
80
|
+
return { number, string };
|
|
81
|
+
};
|
|
82
|
+
const state = new multi_value_state_domain_1.MultiValueStateDomain(new Map(), domain, [reduction]);
|
|
83
|
+
state.setValue(0, 'number', new interval_domain_1.IntervalDomain([42, 42]));
|
|
84
|
+
state.setValue(1, 'string', new bounded_set_domain_1.BoundedSetDomain(new Set(['Hello world!'])));
|
|
85
|
+
}
|
|
66
86
|
class WikiAbsint extends doc_maker_1.DocMaker {
|
|
67
87
|
constructor() {
|
|
68
88
|
super('wiki/Abstract Interpretation.md', module.filename, 'abstract interpretation framework');
|
|
@@ -113,30 +133,34 @@ All boxes link to their respective implementation in the source code.
|
|
|
113
133
|
${(0, doc_code_1.codeBlock)('mermaid', ctx.mermaid(abstract_domain_1.AbstractDomain, { simplify: true, reverse: true }))}
|
|
114
134
|
`.trim())}
|
|
115
135
|
|
|
136
|
+
Multiple abstract domains can be combined using a ${ctx.link(multi_value_state_domain_1.MultiValueDomain)} (for example, to use an interval domain for numbers and bounded set domain for strings at the same time). A multi-value state domain (${ctx.link(multi_value_state_domain_1.MultiValueStateDomain)}) as state domain of a multi-value domain can be used to track the state of multiple value domains in a program. Additionally, is enables to define reductions on the multi-value domain to refine the inferred value for a value domain based on the other value domains in the multi-value domain. For example, the following example shows how a multi-value state domain can be defined to track numbers and strings at the same time with a simple reduction that sets both domains to bottom if one domain is bottom.
|
|
137
|
+
|
|
138
|
+
${ctx.code(multiValueExample, { dropLinesStart: 1, dropLinesEnd: 1 })}
|
|
139
|
+
|
|
116
140
|
${(0, doc_structure_1.section)('Abstract Interpretation', 2, 'abstract-interpretation')}
|
|
117
141
|
|
|
118
142
|
We perform abstract interpretation by forward-traversing the ${ctx.linkPage('wiki/Control Flow Graph', 'control flow graph')} of _flowR_ using an ${ctx.link(absint_visitor_1.AbstractInterpretationVisitor)}. For each visited control flow vertex, the visitor retrieves the current abstract state by joining the abstract states of the predecessors, applies the abstract semantics of the visited control flow vertex to the current state, and updates the abstract state of the currently visited vertex to the current state. The visitor already handles assignments and (delayed) widening at widening points. However, the visitor does not yet support interprocedural abstract interpretation.
|
|
119
143
|
|
|
120
|
-
To implement a custom abstract interpretation analysis, we can just create a new class and extend the ${ctx.link(absint_visitor_1.AbstractInterpretationVisitor)}. The abstract interpretation visitor uses a ${ctx.link(state_abstract_domain_1.StateAbstractDomain)} to capture the current abstract state at each vertex in the control flow graph.
|
|
144
|
+
To implement a custom abstract interpretation analysis, we can just create a new class and extend the ${ctx.link(absint_visitor_1.AbstractInterpretationVisitor)}. The abstract interpretation visitor uses a \`StateDomain\` (e.g., a ${ctx.link(state_abstract_domain_1.StateAbstractDomain)}) to capture the current abstract state at each vertex in the control flow graph. We can then extend the callback functions of the ${ctx.link(absint_visitor_1.AbstractInterpretationVisitor)} to implement the abstract semantics of expressions, such as ${ctx.link(`${semantic_cfg_guided_visitor_1.SemanticCfgGuidedVisitor.name}:::onNumberConstant`)}, ${ctx.link(`${absint_visitor_1.AbstractInterpretationVisitor.name}:::onFunctionCall`)} and ${ctx.link(`${semantic_cfg_guided_visitor_1.SemanticCfgGuidedVisitor.name}:::onReplacementCall`)} (make sure to still call the respective super function). The abstract interpretation visitor provides the following functions to retrieve the currently inferred values:
|
|
121
145
|
|
|
122
146
|
* ${ctx.linkM(absint_visitor_1.AbstractInterpretationVisitor, 'getAbstractValue')} to resolve the inferred abstract value for an AST node (this includes resolving symbols, pipes, and if expressions)
|
|
123
147
|
* ${ctx.linkM(absint_visitor_1.AbstractInterpretationVisitor, 'getAbstractState')} to get the inferred abstract state at an AST node mapping AST nodes to abstract values
|
|
124
148
|
* ${ctx.linkM(absint_visitor_1.AbstractInterpretationVisitor, 'getAbstractTrace')} to get the complete abstract trace mapping AST nodes to abstract states at the respective node
|
|
125
149
|
* ${ctx.linkM(absint_visitor_1.AbstractInterpretationVisitor, 'getEndState')} to get the inferred abstract state at the end of the program (at the exit points of the control flow graph)
|
|
126
150
|
|
|
127
|
-
For example, if we want to perform a (very basic) interval analysis using abstract interpretation in _flowR_, we can implement the following ${ctx.link(IntervalInferenceVisitor)} that extends ${ctx.link(absint_visitor_1.AbstractInterpretationVisitor)} using the ${ctx.link(interval_domain_1.IntervalDomain)}:
|
|
151
|
+
For example, if we want to perform a (very basic) interval analysis using abstract interpretation in _flowR_, we can implement the following ${ctx.link(IntervalInferenceVisitor)} that extends ${ctx.link(absint_visitor_1.AbstractInterpretationVisitor)} using a ${ctx.link(state_abstract_domain_1.StateAbstractDomain)} for the ${ctx.link(interval_domain_1.IntervalDomain)}:
|
|
128
152
|
|
|
129
|
-
${ctx.code(
|
|
153
|
+
${ctx.code(inferIntervals, { dropLinesStart: 1, dropLinesEnd: 5 })}
|
|
130
154
|
|
|
131
|
-
The interval inference visitor first overrides the ${ctx.link(`${semantic_cfg_guided_visitor_1.SemanticCfgGuidedVisitor.name}:::onNumberConstant`)} function to infer intervals for visited control flow vertices that represent numeric constants. For numeric constants, the resulting interval consists just of the number value of the constant. We then update the current abstract state of the visitor
|
|
155
|
+
The interval inference visitor first overrides the ${ctx.link(`${semantic_cfg_guided_visitor_1.SemanticCfgGuidedVisitor.name}:::onNumberConstant`)} function to infer intervals for visited control flow vertices that represent numeric constants. For numeric constants, the resulting interval consists just of the number value of the constant. We then update the current abstract state of the visitor by setting the inferred abstract value of the currently visited control flow vertex to the new interval.
|
|
132
156
|
|
|
133
|
-
In this simple example, we only want to support the addition and subtraction of numeric values. Therefore, we override the ${ctx.link(`${absint_visitor_1.AbstractInterpretationVisitor.name}:::onFunctionCall`)} function to apply the abstract semantics of additions and subtraction with resprect to the interval domain. For the addition and subtraction, we are only interested in function calls with exactly two non-empty arguments. We first resolve the currently inferred abstract value for the left and right operand of the function call. If we have
|
|
157
|
+
In this simple example, we only want to support the addition and subtraction of numeric values. Therefore, we override the ${ctx.link(`${absint_visitor_1.AbstractInterpretationVisitor.name}:::onFunctionCall`)} function to apply the abstract semantics of additions and subtraction with resprect to the interval domain. For the addition and subtraction, we are only interested in function calls with exactly two non-empty arguments. We first resolve the currently inferred abstract value for the left and right operand of the function call. If we have not inferred a value for one of the operands, this function call might not be a numeric function call and we ignore it. Otherwise, we check whether the function call represents an addition or subtraction and apply the abstract semantics of the operation to the left and right operand. We then again update the current abstract state of the visitor by setting the inferred abstract value of the currently visited function call vertex to the abstract value resulting from applying the abstract semantics of the operation to the operands.
|
|
134
158
|
|
|
135
159
|
If we now want to run the interval inference, we can write the following code:
|
|
136
160
|
|
|
137
161
|
${ctx.code(inferIntervals, { dropLinesStart: 1, dropLinesEnd: 5 })}
|
|
138
162
|
|
|
139
|
-
We first need a ${ctx.linkPage('wiki/Analyzer', 'flowR analyzer')} (in this case, using the ${ctx.linkPage('wiki/Engines', 'tree-sitter engine')}). In this example, we want to analyze a small example code that assigns \`42\` to the variable \`x\`, randomly assigns \`6\` or \`12\` to the variable \`y\`, and assignes the sum of \`x\` and \`y\` to the variable \`z\`. For the abstract interpretation visitor, we need to retrieve the ${ctx.linkPage('wiki/Normalized AST', 'normalized AST')}, ${ctx.linkPage('wiki/Dataflow Graph', 'dataflow graph')}, ${ctx.linkPage('wiki/Control Flow Graph', 'control flow graph')}, context of the flowR anaylzer
|
|
163
|
+
We first need a ${ctx.linkPage('wiki/Analyzer', 'flowR analyzer')} (in this case, using the ${ctx.linkPage('wiki/Engines', 'tree-sitter engine')}). In this example, we want to analyze a small example code that assigns \`42\` to the variable \`x\`, randomly assigns \`6\` or \`12\` to the variable \`y\`, and assignes the sum of \`x\` and \`y\` to the variable \`z\`. For the abstract interpretation visitor, we need to retrieve the ${ctx.linkPage('wiki/Normalized AST', 'normalized AST')}, ${ctx.linkPage('wiki/Dataflow Graph', 'dataflow graph')}, ${ctx.linkPage('wiki/Control Flow Graph', 'control flow graph')}, and context of the flowR anaylzer. For performance reasons, we construct the control flow graph without simplification passes, data flow information, and function definitions. We then create a new ${ctx.link(IntervalInferenceVisitor)} using the control flow graph, dataflow graph, normalized AST, and analyzer context, and start the visitor using ${ctx.linkM(absint_visitor_1.AbstractInterpretationVisitor, 'start', { hideClass: true })}. After the visitor is finished, we retrieve the inferred abstract state at the end of the program using ${ctx.linkM(absint_visitor_1.AbstractInterpretationVisitor, 'getEndState', { hideClass: true })}.
|
|
140
164
|
|
|
141
165
|
If we now print the inferred abstract state at the end of the program, we get the following output:
|
|
142
166
|
|
|
@@ -22,7 +22,6 @@ const flowr_analyzer_loading_order_context_1 = require("../project/context/flowr
|
|
|
22
22
|
const flowr_analyzer_dependencies_context_1 = require("../project/context/flowr-analyzer-dependencies-context");
|
|
23
23
|
const flowr_analyzer_cache_1 = require("../project/cache/flowr-analyzer-cache");
|
|
24
24
|
const pipeline_executor_1 = require("../core/pipeline-executor");
|
|
25
|
-
const flowr_analyzer_plugin_defaults_1 = require("../project/plugins/flowr-analyzer-plugin-defaults");
|
|
26
25
|
const doc_maker_1 = require("./wiki-mk/doc-maker");
|
|
27
26
|
const flowr_analyzer_rmd_file_plugin_1 = require("../project/plugins/file-plugins/notebooks/flowr-analyzer-rmd-file-plugin");
|
|
28
27
|
const flowr_analyzer_plugin_1 = require("../project/plugins/flowr-analyzer-plugin");
|
|
@@ -219,7 +218,7 @@ This indicates three ways to add a new plugin:
|
|
|
219
218
|
3. By providing a tuple of the plugin name and its constructor arguments (e.g., \`['file:rmd', [/.*.rmd/i]]\` for the ${ctx.link(flowr_analyzer_rmd_file_plugin_1.FlowrAnalyzerRmdFilePlugin)}).\\
|
|
220
219
|
This will also use the ${ctx.link(plugin_registry_1.makePlugin)} function under the hood to create the plugin instance.
|
|
221
220
|
|
|
222
|
-
Please note, that by passing \`false\` to the builder constructor, no default plugins (see ${ctx.link(
|
|
221
|
+
Please note, that by passing \`false\` to the builder constructor, no default plugins (see ${ctx.link('FlowrDefaultPlugins')}) are registered (otherwise, all of the plugins in the example above would be registered by default).
|
|
223
222
|
If you want to unregister specific plugins, you can use the ${ctx.linkM(flowr_analyzer_builder_1.FlowrAnalyzerBuilder, 'unregisterPlugins')} method.
|
|
224
223
|
|
|
225
224
|
${(0, doc_structure_1.block)({
|
|
@@ -251,7 +250,7 @@ Plugins allow you to extend the capabilities of the analyzer in many different w
|
|
|
251
250
|
For example, they can be used to support other file formats, or to provide new algorithms to determine the loading order of files in a project.
|
|
252
251
|
All plugins have to extend the ${ctx.link(flowr_analyzer_plugin_1.FlowrAnalyzerPlugin)} base class and specify their ${ctx.link('PluginType')}.
|
|
253
252
|
During the analysis, the analyzer will apply all registered plugins of the different types at the appropriate stages of the analysis.
|
|
254
|
-
If you just want to _use_ these plugins, you can usually ignore their [type](#plugin-types) and just register them with the builder as described
|
|
253
|
+
If you just want to _use_ these plugins, you can usually ignore their [type](#plugin-types) and just register them with the builder as described
|
|
255
254
|
in the [Builder Configuration](#builder-configuration) section above.
|
|
256
255
|
However, if you want to _create_ new plugins, you should be aware of the different plugin types and when they are applied during the analysis.
|
|
257
256
|
|
|
@@ -285,7 +284,7 @@ ${(0, doc_structure_1.section)('Project Discovery', 4)}
|
|
|
285
284
|
|
|
286
285
|
These plugins trigger when confronted with a project analysis request (see, ${ctx.link('RProjectAnalysisRequest')}).
|
|
287
286
|
Their job is to identify the files that belong to the project and add them to the analysis.
|
|
288
|
-
flowR provides the ${ctx.link(flowr_analyzer_project_discovery_plugin_1.FlowrAnalyzerProjectDiscoveryPlugin)} with a
|
|
287
|
+
flowR provides the ${ctx.link(flowr_analyzer_project_discovery_plugin_1.FlowrAnalyzerProjectDiscoveryPlugin)} with a
|
|
289
288
|
${ctx.link(flowr_analyzer_project_discovery_plugin_1.FlowrAnalyzerProjectDiscoveryPlugin.defaultPlugin.name)} as the default implementation that simply collects all R source files in the given folder.
|
|
290
289
|
|
|
291
290
|
Please note that all project discovery plugins should conform to the ${ctx.link(flowr_analyzer_project_discovery_plugin_1.FlowrAnalyzerProjectDiscoveryPlugin)} base class.
|
|
@@ -20,6 +20,7 @@ const doc_structure_1 = require("./doc-util/doc-structure");
|
|
|
20
20
|
const doc_maker_1 = require("./wiki-mk/doc-maker");
|
|
21
21
|
const doc_writing_code_1 = require("./data/interface/doc-writing-code");
|
|
22
22
|
const built_in_proc_name_1 = require("../dataflow/environments/built-in-proc-name");
|
|
23
|
+
const flowr_analyzer_1 = require("../project/flowr-analyzer");
|
|
23
24
|
async function explainServer(parser) {
|
|
24
25
|
(0, doc_data_server_messages_1.documentAllServerMessages)();
|
|
25
26
|
return `
|
|
@@ -42,7 +43,7 @@ ${await (0, doc_server_message_1.printServerMessages)(parser)}
|
|
|
42
43
|
### 📡 Ways of Connecting
|
|
43
44
|
|
|
44
45
|
If you are interested in clients that communicate with _flowR_, please check out the [R adapter](${doc_files_1.FlowrGithubBaseRef}/flowr-r-adapter)
|
|
45
|
-
as well as the [Visual Studio Code extension](${doc_files_1.FlowrGithubBaseRef}/vscode-flowr).
|
|
46
|
+
as well as the [Visual Studio Code extension](${doc_files_1.FlowrGithubBaseRef}/vscode-flowr).
|
|
46
47
|
|
|
47
48
|
<ol>
|
|
48
49
|
|
|
@@ -99,9 +100,9 @@ async function explainRepl(parser, ctx) {
|
|
|
99
100
|
> To execute arbitrary R commands with a repl request, _flowR_ has to be started explicitly with ${ctx.cliOption('flowr', 'r-session-access')}.
|
|
100
101
|
> Please be aware that this introduces a security risk and note that this relies on the ${ctx.linkPage('wiki/Engines', '`r-shell` engine')} .
|
|
101
102
|
|
|
102
|
-
Although primarily meant for users to explore,
|
|
103
|
-
there is nothing which forbids simply calling _flowR_ as a subprocess to use standard-in, -output, and -error
|
|
104
|
-
for communication (although you can access the REPL using the server as well,
|
|
103
|
+
Although primarily meant for users to explore,
|
|
104
|
+
there is nothing which forbids simply calling _flowR_ as a subprocess to use standard-in, -output, and -error
|
|
105
|
+
for communication (although you can access the REPL using the server as well,
|
|
105
106
|
with the [REPL Request](#message-request-repl-execution) message).
|
|
106
107
|
|
|
107
108
|
The read-eval-print loop (REPL) works relatively simple.
|
|
@@ -111,7 +112,7 @@ The best command to get started with the REPL is ${ctx.replCmd('help')}.
|
|
|
111
112
|
Besides, you can leave the REPL either with the command ${ctx.replCmd('quit')} or by pressing <kbd>Ctrl</kbd>+<kbd>C</kbd> twice.
|
|
112
113
|
When writing a *command*, you may press <kbd>Tab</kbd> to get a list of completions, if available.
|
|
113
114
|
Multiple commands can be entered in a single line by separating them with a semicolon (\`;\`), e.g. \`:parse "x<-2"; :df*\`.
|
|
114
|
-
If a command is given without R code, the REPL will re-use R code given in a previous command.
|
|
115
|
+
If a command is given without R code, the REPL will re-use R code given in a previous command.
|
|
115
116
|
The prior example will hence return first the parsed AST of the program and then the dataflow graph for \`"x <- 2"\`.
|
|
116
117
|
|
|
117
118
|
> [!NOTE]
|
|
@@ -144,7 +145,7 @@ can be used to also modify the currently active configuration of _flowR_ within
|
|
|
144
145
|
|
|
145
146
|
### Example: Retrieving the Dataflow Graph
|
|
146
147
|
|
|
147
|
-
To retrieve a URL to the [mermaid](https://mermaid.js.org/) diagram of the dataflow of a given expression,
|
|
148
|
+
To retrieve a URL to the [mermaid](https://mermaid.js.org/) diagram of the dataflow of a given expression,
|
|
148
149
|
use ${ctx.replCmd('dataflow*')} (or ${ctx.replCmd('dataflow')} to get the mermaid code in the cli):
|
|
149
150
|
|
|
150
151
|
${await (0, doc_repl_1.documentReplSession)(parser, [{
|
|
@@ -163,8 +164,8 @@ For the slicing with ${ctx.replCmd('slicer')}, you have access to the same [magi
|
|
|
163
164
|
|
|
164
165
|
### Example: Interfacing with the File System
|
|
165
166
|
|
|
166
|
-
Many commands that allow for an R-expression (like ${ctx.replCmd('dataflow*')}) allow for a file as well
|
|
167
|
-
if the argument starts with \`${retriever_1.fileProtocol}\`.
|
|
167
|
+
Many commands that allow for an R-expression (like ${ctx.replCmd('dataflow*')}) allow for a file as well
|
|
168
|
+
if the argument starts with \`${retriever_1.fileProtocol}\`.
|
|
168
169
|
If you are working from the root directory of the _flowR_ repository, the following gives you the parsed AST of the example file using the ${ctx.replCmd('parse')} command:
|
|
169
170
|
|
|
170
171
|
${await (0, doc_repl_1.documentReplSession)(parser, [{
|
|
@@ -179,7 +180,7 @@ ${(0, doc_code_1.codeBlock)('r', (0, doc_files_1.getFileContentFromRoot)('test/t
|
|
|
179
180
|
|
|
180
181
|
</details>
|
|
181
182
|
|
|
182
|
-
As _flowR_ directly transforms this AST the output focuses on being human-readable instead of being machine-readable.
|
|
183
|
+
As _flowR_ directly transforms this AST the output focuses on being human-readable instead of being machine-readable.
|
|
183
184
|
`
|
|
184
185
|
}])}
|
|
185
186
|
|
|
@@ -203,8 +204,8 @@ For more information on the available queries, please check out the ${ctx.linkPa
|
|
|
203
204
|
function explainConfigFile(ctx) {
|
|
204
205
|
return `
|
|
205
206
|
|
|
206
|
-
When running _flowR_, you may want to specify some behaviors with a dedicated configuration file.
|
|
207
|
-
By default, flowR looks for a file named \`${flowr_main_options_1.defaultConfigFile}\` in the current working directory (or any higher directory).
|
|
207
|
+
When running _flowR_, you may want to specify some behaviors with a dedicated configuration file.
|
|
208
|
+
By default, flowR looks for a file named \`${flowr_main_options_1.defaultConfigFile}\` in the current working directory (or any higher directory).
|
|
208
209
|
You can also specify a different file with ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'config-file')} or pass the configuration inline using ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'config-json')}.
|
|
209
210
|
To inspect the current configuration, you can run flowr with the ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'verbose')} flag, or use the \`config\` [Query](${doc_files_1.FlowrWikiBaseRef}/Query%20API).
|
|
210
211
|
Within the REPL this works by running the following:
|
|
@@ -216,13 +217,15 @@ ${ctx.linkO(config_1.FlowrConfig, 'amend')}.
|
|
|
216
217
|
The following summarizes the configuration options:
|
|
217
218
|
|
|
218
219
|
- \`ignoreSourceCalls\`: If set to \`true\`, _flowR_ will ignore source calls when analyzing the code, i.e., ignoring the inclusion of other files.
|
|
219
|
-
- \`semantics\`: allows to configure the way _flowR_ handles R, although we currently only support \`semantics/environment/overwriteBuiltIns\`.
|
|
220
|
-
You may use this to overwrite _flowR_'s handling of built-in function and even completely clear the preset definitions shipped with flowR.
|
|
220
|
+
- \`semantics\`: allows to configure the way _flowR_ handles R, although we currently only support \`semantics/environment/overwriteBuiltIns\`.
|
|
221
|
+
You may use this to overwrite _flowR_'s handling of built-in function and even completely clear the preset definitions shipped with flowR.
|
|
221
222
|
See [Configure BuiltIn Semantics](#configure-builtin-semantics) for more information.
|
|
222
223
|
- \`solver\`: allows to configure how _flowR_ resolves variables and their values (currently we support: ${Object.values(config_1.VariableResolve).map(v => `\`${v}\``).join(', ')}), as well as if pointer analysis should be active.
|
|
223
224
|
- \`engines\`: allows to configure the engines used by _flowR_ to interact with R code. See the [Engines wiki page](${doc_files_1.FlowrWikiBaseRef}/Engines) for more information.
|
|
224
225
|
- \`defaultEngine\`: allows to specify the default engine to use for interacting with R code. If not set, an arbitrary engine from the specified list will be used.
|
|
225
226
|
- \`abstractInterpretation\`: allows to configure how _flowR_ performs abstract interpretation, although we currently only support data frame shape inference through abstract interpretation.
|
|
227
|
+
- \`defaultPlugins\`: allows to configure which plugins to load by default when creating a new ${ctx.link(flowr_analyzer_1.FlowrAnalyzer)} instance.
|
|
228
|
+
- \`repl.plugins\`: allows to configure which plugins to load in the _flowR_ REPL. Use \`flowr:default\` to reference the plugins specified by \`defaultPlugins\`.
|
|
226
229
|
|
|
227
230
|
So you can configure _flowR_ by adding a file like the following:
|
|
228
231
|
|
|
@@ -241,9 +244,11 @@ ${(0, doc_code_1.codeBlock)('json', JSON.stringify({
|
|
|
241
244
|
}
|
|
242
245
|
}
|
|
243
246
|
},
|
|
247
|
+
defaultPlugins: ['file:description', 'versions:description'],
|
|
244
248
|
repl: {
|
|
245
249
|
quickStats: false,
|
|
246
|
-
dfProcessorHeat: false
|
|
250
|
+
dfProcessorHeat: false,
|
|
251
|
+
plugins: ['flowr:default']
|
|
247
252
|
},
|
|
248
253
|
project: {
|
|
249
254
|
resolveUnknownPathsOnDisk: true
|
|
@@ -277,9 +282,9 @@ ${(0, doc_code_1.codeBlock)('json', JSON.stringify({
|
|
|
277
282
|
|
|
278
283
|
</details>
|
|
279
284
|
|
|
280
|
-
<details>
|
|
285
|
+
<details>
|
|
281
286
|
<a id='configure-builtin-semantics'></a>
|
|
282
|
-
<summary>Configure Built-In Semantics</summary>
|
|
287
|
+
<summary>Configure Built-In Semantics</summary>
|
|
283
288
|
|
|
284
289
|
|
|
285
290
|
\`semantics/environment/overwriteBuiltins\` accepts two keys:
|
|
@@ -124,7 +124,7 @@ df[6, "value"]
|
|
|
124
124
|
rule(knownParser, 'useless-loop', 'UselessLoopConfig', 'USELESS_LOOP', 'lint-useless-loop', 'for(i in c(1)) { print(i) }', tagTypes);
|
|
125
125
|
rule(knownParser, 'stop-call', 'StopWithCallConfig', 'STOP_WITH_CALL_ARG', 'lint-stop-call', 'stop(42)', tagTypes);
|
|
126
126
|
rule(knownParser, 'roxygen-arguments', 'RoxygenArgsConfig', 'ROXYGEN_ARGS', 'lint-roxygen-arguments', '#\' A function with two parameters, but only only one documented\n#\' @param a A variable\nf = function(a, b){return a;}', tagTypes);
|
|
127
|
-
rule(knownParser, 'problematic-
|
|
127
|
+
rule(knownParser, 'problematic-inputs', 'ProblematicInputsConfig', 'PROBLEMATIC_INPUTS', 'lint-problematic-inputs', `
|
|
128
128
|
function(x) {
|
|
129
129
|
eval(x)
|
|
130
130
|
}
|
package/linter/linter-rules.d.ts
CHANGED
|
@@ -78,7 +78,7 @@ export declare const LintingRules: {
|
|
|
78
78
|
readonly type: "assignment";
|
|
79
79
|
readonly name: ".Random.seed";
|
|
80
80
|
}];
|
|
81
|
-
readonly randomnessConsumers: readonly ["jitter", "sample", "sample.int", "arima.sim", "kmeans", "princomp", "rcauchy", "rchisq", "rexp", "rgamma", "rgeom", "rlnorm", "rlogis", "rmultinom", "rnbinom", "rnorm", "rpois", "runif", "pointLabel", "some", "rbernoulli", "rdunif", "generateSeedVectors"];
|
|
81
|
+
readonly randomnessConsumers: readonly ["jitter", "sample", "sample.int", "arima.sim", "kmeans", "princomp", "rcauchy", "rchisq", "rexp", "rgamma", "rgeom", "rlnorm", "rlogis", "rmultinom", "rnbinom", "rnorm", "rpois", "runif", "pointLabel", "some", "rbernoulli", "rdunif", "generateSeedVectors", "rbeta", "rf", "rhyper", "rweibull", "rt", "rvonmises", "rwilcox", "rxor", "rhyper", "rmvnorm", "rsignrank", "randomForest", "permuted", "permute", "shuffle", "shuffleSet", "data_shuffle", "sample_frac", "sample_n", "slice_sample"];
|
|
82
82
|
};
|
|
83
83
|
readonly tags: readonly [import("./linter-tags").LintingRuleTag.Robustness, import("./linter-tags").LintingRuleTag.Reproducibility];
|
|
84
84
|
readonly certainty: import("./linter-format").LintingRuleCertainty.BestEffort;
|
|
@@ -196,7 +196,7 @@ export declare const LintingRules: {
|
|
|
196
196
|
readonly certainty: import("./linter-format").LintingRuleCertainty.BestEffort;
|
|
197
197
|
readonly description: "Marks network functions that execute network operations, such as downloading files or making HTTP requests.";
|
|
198
198
|
readonly defaultConfig: {
|
|
199
|
-
readonly fns: readonly ["read.table", "read.csv", "read.csv2", "read.delim", "read.delim2", "readRDS", "download.file", "url", "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "content", "handle", "get_callback", "VERB", "fread", "gzcon", "readlines", "source", "load", "curl_download", "curl_fetch_memory", "getURL", "getForm", "read_html", "html_nodes", "html_text", "fromJSON", "read.xlsx", "drive_download", "drive_get", "s3read_using", "s3write_using", "storage_download", "AnnotationHub", "ExperimentHub"];
|
|
199
|
+
readonly fns: readonly ["read.table", "read.csv", "read.csv2", "read.delim", "read.delim2", "readRDS", "download.file", "url", "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "content", "handle", "get_callback", "VERB", "fread", "gzcon", "readlines", "readLines", "source", "load", "curl_download", "curl_fetch_memory", "getURL", "getForm", "read_html", "read_xml", "html_nodes", "html_text", "fromJSON", "read.xlsx", "drive_download", "drive_get", "s3read_using", "s3write_using", "storage_download", "AnnotationHub", "ExperimentHub", "scan", "socketConnection", "request", "curl"];
|
|
200
200
|
readonly onlyTriggerWithArgument: RegExp;
|
|
201
201
|
};
|
|
202
202
|
};
|
|
@@ -281,28 +281,28 @@ export declare const LintingRules: {
|
|
|
281
281
|
};
|
|
282
282
|
};
|
|
283
283
|
};
|
|
284
|
-
readonly 'problematic-
|
|
285
|
-
readonly createSearch: (config: import("./rules/problematic-
|
|
286
|
-
readonly processSearchResult: (elements: import("../search/flowr-search").FlowrSearchElements<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../search/flowr-search").FlowrSearchElement<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation>[]>,
|
|
284
|
+
readonly 'problematic-inputs': {
|
|
285
|
+
readonly createSearch: (config: import("./rules/problematic-inputs").ProblematicInputsConfig) => import("../search/flowr-search-builder").FlowrSearchBuilder<"from-query", [], import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../search/flowr-search").FlowrSearchElements<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../search/flowr-search").FlowrSearchElement<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation>[]>>;
|
|
286
|
+
readonly processSearchResult: (elements: import("../search/flowr-search").FlowrSearchElements<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../search/flowr-search").FlowrSearchElement<import("../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation>[]>, config: import("./rules/problematic-inputs").ProblematicInputsConfig, data: {
|
|
287
287
|
normalize: import("../r-bridge/lang-4.x/ast/model/processing/decorate").NormalizedAst;
|
|
288
288
|
dataflow: import("../dataflow/info").DataflowInformation;
|
|
289
289
|
cfg: import("../control-flow/control-flow-graph").ControlFlowInformation;
|
|
290
290
|
analyzer: import("../project/flowr-analyzer").ReadonlyFlowrAnalysisProvider;
|
|
291
291
|
}) => Promise<{
|
|
292
|
-
results: import("./rules/problematic-
|
|
293
|
-
|
|
292
|
+
results: import("./rules/problematic-inputs").ProblematicInputsResult[];
|
|
293
|
+
'.meta': {};
|
|
294
294
|
}>;
|
|
295
295
|
readonly prettyPrint: {
|
|
296
|
-
readonly query: (result: import("./rules/problematic-
|
|
297
|
-
readonly full: (result: import("./rules/problematic-
|
|
296
|
+
readonly query: (result: import("./rules/problematic-inputs").ProblematicInputsResult) => string;
|
|
297
|
+
readonly full: (result: import("./rules/problematic-inputs").ProblematicInputsResult) => string;
|
|
298
298
|
};
|
|
299
299
|
readonly info: {
|
|
300
|
-
readonly name: "Problematic
|
|
301
|
-
readonly description: "Detects uses of eval
|
|
300
|
+
readonly name: "Problematic inputs";
|
|
301
|
+
readonly description: "Detects uses of configured dynamic calls (e.g. eval, system) whose inputs are not statically constant. Prints the computed input-sources for the call and flags usages that depend on non-constant/trusted inputs.";
|
|
302
302
|
readonly tags: readonly [import("./linter-tags").LintingRuleTag.Security, import("./linter-tags").LintingRuleTag.Smell, import("./linter-tags").LintingRuleTag.Readability, import("./linter-tags").LintingRuleTag.Performance];
|
|
303
303
|
readonly certainty: import("./linter-format").LintingRuleCertainty.BestEffort;
|
|
304
304
|
readonly defaultConfig: {
|
|
305
|
-
readonly
|
|
305
|
+
readonly consider: readonly ["^eval$", "^system$", "^system2$", "^shell$"];
|
|
306
306
|
};
|
|
307
307
|
};
|
|
308
308
|
};
|
package/linter/linter-rules.js
CHANGED
|
@@ -13,7 +13,7 @@ const useless_loop_1 = require("./rules/useless-loop");
|
|
|
13
13
|
const network_functions_1 = require("./rules/network-functions");
|
|
14
14
|
const stop_with_call_arg_1 = require("./rules/stop-with-call-arg");
|
|
15
15
|
const roxygen_arguments_1 = require("./rules/roxygen-arguments");
|
|
16
|
-
const
|
|
16
|
+
const problematic_inputs_1 = require("./rules/problematic-inputs");
|
|
17
17
|
/**
|
|
18
18
|
* The registry of currently supported linting rules.
|
|
19
19
|
* A linting rule can be executed on a dataflow pipeline result using {@link executeLintingRule}.
|
|
@@ -29,7 +29,7 @@ exports.LintingRules = {
|
|
|
29
29
|
'dataframe-access-validation': dataframe_access_validation_1.DATA_FRAME_ACCESS_VALIDATION,
|
|
30
30
|
'dead-code': dead_code_1.DEAD_CODE,
|
|
31
31
|
'useless-loop': useless_loop_1.USELESS_LOOP,
|
|
32
|
-
'problematic-
|
|
32
|
+
'problematic-inputs': problematic_inputs_1.PROBLEMATIC_INPUTS,
|
|
33
33
|
'stop-call': stop_with_call_arg_1.STOP_WITH_CALL_ARG,
|
|
34
34
|
'roxygen-arguments': roxygen_arguments_1.ROXYGEN_ARGS
|
|
35
35
|
};
|
|
@@ -31,7 +31,7 @@ export declare const NETWORK_FUNCTIONS: {
|
|
|
31
31
|
readonly certainty: LintingRuleCertainty.BestEffort;
|
|
32
32
|
readonly description: "Marks network functions that execute network operations, such as downloading files or making HTTP requests.";
|
|
33
33
|
readonly defaultConfig: {
|
|
34
|
-
readonly fns: readonly ["read.table", "read.csv", "read.csv2", "read.delim", "read.delim2", "readRDS", "download.file", "url", "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "content", "handle", "get_callback", "VERB", "fread", "gzcon", "readlines", "source", "load", "curl_download", "curl_fetch_memory", "getURL", "getForm", "read_html", "html_nodes", "html_text", "fromJSON", "read.xlsx", "drive_download", "drive_get", "s3read_using", "s3write_using", "storage_download", "AnnotationHub", "ExperimentHub"];
|
|
34
|
+
readonly fns: readonly ["read.table", "read.csv", "read.csv2", "read.delim", "read.delim2", "readRDS", "download.file", "url", "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "content", "handle", "get_callback", "VERB", "fread", "gzcon", "readlines", "readLines", "source", "load", "curl_download", "curl_fetch_memory", "getURL", "getForm", "read_html", "read_xml", "html_nodes", "html_text", "fromJSON", "read.xlsx", "drive_download", "drive_get", "s3read_using", "s3write_using", "storage_download", "AnnotationHub", "ExperimentHub", "scan", "socketConnection", "request", "curl"];
|
|
35
35
|
readonly onlyTriggerWithArgument: RegExp;
|
|
36
36
|
};
|
|
37
37
|
};
|
|
@@ -29,8 +29,14 @@ exports.NETWORK_FUNCTIONS = {
|
|
|
29
29
|
certainty: linter_format_1.LintingRuleCertainty.BestEffort,
|
|
30
30
|
description: 'Marks network functions that execute network operations, such as downloading files or making HTTP requests.',
|
|
31
31
|
defaultConfig: {
|
|
32
|
-
fns: [
|
|
33
|
-
|
|
32
|
+
fns: [
|
|
33
|
+
'read.table', 'read.csv', 'read.csv2', 'read.delim', 'read.delim2', 'readRDS', 'download.file', 'url', 'GET', 'POST', 'PUT',
|
|
34
|
+
'DELETE', 'PATCH', 'HEAD', 'content', 'handle', 'get_callback', 'VERB', 'fread', 'gzcon', 'readlines', 'readLines', 'source', 'load', 'curl_download',
|
|
35
|
+
'curl_fetch_memory', 'getURL', 'getForm', 'read_html', 'read_xml', 'html_nodes', 'html_text', 'fromJSON', 'read.xlsx', 'drive_download', 'drive_get',
|
|
36
|
+
's3read_using', 's3write_using', 'storage_download', 'AnnotationHub', 'ExperimentHub', 'scan',
|
|
37
|
+
'socketConnection', 'request', 'curl'
|
|
38
|
+
],
|
|
39
|
+
onlyTriggerWithArgument: /^(https?|ftps?):\/\//
|
|
34
40
|
}
|
|
35
41
|
}
|
|
36
42
|
};
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { type LintingResult, LintingRuleCertainty } from '../linter-format';
|
|
2
|
+
import type { MergeableRecord } from '../../util/objects';
|
|
3
|
+
import { SourceLocation } from '../../util/range';
|
|
4
|
+
import { LintingRuleTag } from '../linter-tags';
|
|
5
|
+
import type { InputClassifierConfig, InputSources } from '../../queries/catalog/input-sources-query/simple-input-classifier';
|
|
6
|
+
/**
|
|
7
|
+
* Describes a linting result for a problematic eval usage, including the location of the eval call and the computed input sources that lead to it.
|
|
8
|
+
*/
|
|
9
|
+
export interface ProblematicInputsResult extends LintingResult {
|
|
10
|
+
name: string;
|
|
11
|
+
loc: SourceLocation;
|
|
12
|
+
sources: InputSources;
|
|
13
|
+
}
|
|
14
|
+
export interface ProblematicInputsConfig extends MergeableRecord {
|
|
15
|
+
consider?: string | string[];
|
|
16
|
+
inputFns?: InputClassifierConfig;
|
|
17
|
+
}
|
|
18
|
+
export type ProblematicInputsMetadata = MergeableRecord;
|
|
19
|
+
export declare const PROBLEMATIC_INPUTS: {
|
|
20
|
+
readonly createSearch: (config: ProblematicInputsConfig) => import("../../search/flowr-search-builder").FlowrSearchBuilder<"from-query", [], import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../../search/flowr-search").FlowrSearchElements<import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../../search/flowr-search").FlowrSearchElement<import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation>[]>>;
|
|
21
|
+
readonly processSearchResult: (elements: import("../../search/flowr-search").FlowrSearchElements<import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation, import("../../search/flowr-search").FlowrSearchElement<import("../../r-bridge/lang-4.x/ast/model/processing/decorate").ParentInformation>[]>, config: ProblematicInputsConfig, data: {
|
|
22
|
+
normalize: import("../../r-bridge/lang-4.x/ast/model/processing/decorate").NormalizedAst;
|
|
23
|
+
dataflow: import("../../dataflow/info").DataflowInformation;
|
|
24
|
+
cfg: import("../../control-flow/control-flow-graph").ControlFlowInformation;
|
|
25
|
+
analyzer: import("../../project/flowr-analyzer").ReadonlyFlowrAnalysisProvider;
|
|
26
|
+
}) => Promise<{
|
|
27
|
+
results: ProblematicInputsResult[];
|
|
28
|
+
'.meta': {};
|
|
29
|
+
}>;
|
|
30
|
+
readonly prettyPrint: {
|
|
31
|
+
readonly query: (result: ProblematicInputsResult) => string;
|
|
32
|
+
readonly full: (result: ProblematicInputsResult) => string;
|
|
33
|
+
};
|
|
34
|
+
readonly info: {
|
|
35
|
+
readonly name: "Problematic inputs";
|
|
36
|
+
readonly description: "Detects uses of configured dynamic calls (e.g. eval, system) whose inputs are not statically constant. Prints the computed input-sources for the call and flags usages that depend on non-constant/trusted inputs.";
|
|
37
|
+
readonly tags: readonly [LintingRuleTag.Security, LintingRuleTag.Smell, LintingRuleTag.Readability, LintingRuleTag.Performance];
|
|
38
|
+
readonly certainty: LintingRuleCertainty.BestEffort;
|
|
39
|
+
readonly defaultConfig: {
|
|
40
|
+
readonly consider: readonly ["^eval$", "^system$", "^system2$", "^shell$"];
|
|
41
|
+
};
|
|
42
|
+
};
|
|
43
|
+
};
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.PROBLEMATIC_INPUTS = void 0;
|
|
4
|
+
const linter_format_1 = require("../linter-format");
|
|
5
|
+
const flowr_search_builder_1 = require("../../search/flowr-search-builder");
|
|
6
|
+
const range_1 = require("../../util/range");
|
|
7
|
+
const linter_tags_1 = require("../linter-tags");
|
|
8
|
+
const simple_input_classifier_1 = require("../../queries/catalog/input-sources-query/simple-input-classifier");
|
|
9
|
+
const parse_1 = require("../../slicing/criterion/parse");
|
|
10
|
+
const defaultConsider = ['^eval$', '^system$', '^system2$', '^shell$'];
|
|
11
|
+
function normalizeConsider(cfg) {
|
|
12
|
+
if (cfg?.consider === undefined) {
|
|
13
|
+
return Array.from(defaultConsider, s => new RegExp(s));
|
|
14
|
+
}
|
|
15
|
+
if (Array.isArray(cfg.consider)) {
|
|
16
|
+
const arr = cfg.consider.length === 0 ? Array.from(defaultConsider) : cfg.consider;
|
|
17
|
+
// deduplicate while preserving order
|
|
18
|
+
return Array.from(new Set(arr), s => new RegExp(s));
|
|
19
|
+
}
|
|
20
|
+
return [new RegExp(cfg.consider)];
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Format a list of input sources either as a single-line string (inline) or a block.
|
|
24
|
+
* - inline: returns a semicolon-separated single-line summary
|
|
25
|
+
* - block: returns an array of lines (to be joined with newlines by the caller)
|
|
26
|
+
*/
|
|
27
|
+
function formatInputSources(inputs, inline = true) {
|
|
28
|
+
if (!inputs || inputs.length === 0) {
|
|
29
|
+
return inline ? '' : [];
|
|
30
|
+
}
|
|
31
|
+
const formatOne = (s, inlineMode) => {
|
|
32
|
+
const typeStr = '[' + s.types.join(',') + ']';
|
|
33
|
+
const cdsStr = s.cds ? ', cds: [' + s.cds.join(',') + ']' : '';
|
|
34
|
+
return inlineMode
|
|
35
|
+
? `${s.id} (type: ${typeStr}, trace: ${s.trace}${cdsStr})`
|
|
36
|
+
: `- ${s.id}: type=${typeStr}, trace=${s.trace}${cdsStr}`;
|
|
37
|
+
};
|
|
38
|
+
if (inline) {
|
|
39
|
+
return inputs.map(s => formatOne(s, true)).join('; ');
|
|
40
|
+
}
|
|
41
|
+
return inputs.map(s => formatOne(s, false));
|
|
42
|
+
}
|
|
43
|
+
// small helpers to keep checks readable
|
|
44
|
+
function hasUnknownSource(sources) {
|
|
45
|
+
return sources.some(s => s.types.includes(simple_input_classifier_1.InputType.Unknown));
|
|
46
|
+
}
|
|
47
|
+
function isProblematicForAllowed(sources, allowed) {
|
|
48
|
+
return sources.some(s => s.types.some(t => !allowed.includes(t)));
|
|
49
|
+
}
|
|
50
|
+
exports.PROBLEMATIC_INPUTS = {
|
|
51
|
+
createSearch: config => {
|
|
52
|
+
const considerArr = normalizeConsider(config);
|
|
53
|
+
const queries = considerArr.map((name, i) => ({
|
|
54
|
+
type: 'call-context',
|
|
55
|
+
callName: name,
|
|
56
|
+
callNameExact: false,
|
|
57
|
+
subkind: `fn-${i}`
|
|
58
|
+
}));
|
|
59
|
+
return flowr_search_builder_1.Q.fromQuery(...queries);
|
|
60
|
+
},
|
|
61
|
+
processSearchResult: async (elements, config, data) => {
|
|
62
|
+
const results = [];
|
|
63
|
+
const defaultAccept = [simple_input_classifier_1.InputType.Constant, simple_input_classifier_1.InputType.DerivedConstant];
|
|
64
|
+
for (const element of elements.getElements()) {
|
|
65
|
+
const nid = element.node.info.id;
|
|
66
|
+
const criterion = parse_1.SlicingCriterion.fromId(nid);
|
|
67
|
+
const q = { type: 'input-sources', criterion, config: config.inputFns };
|
|
68
|
+
const all = await data.analyzer.query([q]);
|
|
69
|
+
const inputSourcesResult = all['input-sources'];
|
|
70
|
+
const sources = inputSourcesResult?.results?.[criterion] ?? [];
|
|
71
|
+
if (isProblematicForAllowed(sources, defaultAccept)) {
|
|
72
|
+
const certainty = hasUnknownSource(sources) ? linter_format_1.LintingResultCertainty.Uncertain : linter_format_1.LintingResultCertainty.Certain;
|
|
73
|
+
results.push({
|
|
74
|
+
involvedId: nid,
|
|
75
|
+
certainty,
|
|
76
|
+
loc: range_1.SourceLocation.fromNode(element.node) ?? range_1.SourceLocation.invalid(),
|
|
77
|
+
name: element.node.lexeme ?? '',
|
|
78
|
+
sources
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return {
|
|
83
|
+
results,
|
|
84
|
+
'.meta': {}
|
|
85
|
+
};
|
|
86
|
+
},
|
|
87
|
+
/* helper to format input sources for pretty printing */
|
|
88
|
+
prettyPrint: {
|
|
89
|
+
[linter_format_1.LintingPrettyPrintContext.Query]: result => {
|
|
90
|
+
const inputs = result.sources ?? [];
|
|
91
|
+
const srcStr = formatInputSources(inputs, true);
|
|
92
|
+
return 'Use of configured dynamic call at ' + range_1.SourceLocation.format(result.loc) + (srcStr ? '; inputs: ' + srcStr : '');
|
|
93
|
+
},
|
|
94
|
+
[linter_format_1.LintingPrettyPrintContext.Full]: result => {
|
|
95
|
+
const inputs = result.sources ?? [];
|
|
96
|
+
const srcLines = formatInputSources(inputs, false);
|
|
97
|
+
return 'Use of configured dynamic call at ' + range_1.SourceLocation.format(result.loc) + ' is potentially problematic' + (srcLines.length ? '\nInputs:\n' + srcLines.join('\n') : '');
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
info: {
|
|
101
|
+
name: 'Problematic inputs',
|
|
102
|
+
description: 'Detects uses of configured dynamic calls (e.g. eval, system) whose inputs are not statically constant. Prints the computed input-sources for the call and flags usages that depend on non-constant/trusted inputs.',
|
|
103
|
+
tags: [linter_tags_1.LintingRuleTag.Security, linter_tags_1.LintingRuleTag.Smell, linter_tags_1.LintingRuleTag.Readability, linter_tags_1.LintingRuleTag.Performance],
|
|
104
|
+
certainty: linter_format_1.LintingRuleCertainty.BestEffort,
|
|
105
|
+
defaultConfig: {
|
|
106
|
+
consider: defaultConsider
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
//# sourceMappingURL=problematic-inputs.js.map
|
|
@@ -54,7 +54,7 @@ export declare const SEEDED_RANDOMNESS: {
|
|
|
54
54
|
readonly type: "assignment";
|
|
55
55
|
readonly name: ".Random.seed";
|
|
56
56
|
}];
|
|
57
|
-
readonly randomnessConsumers: readonly ["jitter", "sample", "sample.int", "arima.sim", "kmeans", "princomp", "rcauchy", "rchisq", "rexp", "rgamma", "rgeom", "rlnorm", "rlogis", "rmultinom", "rnbinom", "rnorm", "rpois", "runif", "pointLabel", "some", "rbernoulli", "rdunif", "generateSeedVectors"];
|
|
57
|
+
readonly randomnessConsumers: readonly ["jitter", "sample", "sample.int", "arima.sim", "kmeans", "princomp", "rcauchy", "rchisq", "rexp", "rgamma", "rgeom", "rlnorm", "rlogis", "rmultinom", "rnbinom", "rnorm", "rpois", "runif", "pointLabel", "some", "rbernoulli", "rdunif", "generateSeedVectors", "rbeta", "rf", "rhyper", "rweibull", "rt", "rvonmises", "rwilcox", "rxor", "rhyper", "rmvnorm", "rsignrank", "randomForest", "permuted", "permute", "shuffle", "shuffleSet", "data_shuffle", "sample_frac", "sample_n", "slice_sample"];
|
|
58
58
|
};
|
|
59
59
|
readonly tags: readonly [LintingRuleTag.Robustness, LintingRuleTag.Reproducibility];
|
|
60
60
|
readonly certainty: LintingRuleCertainty.BestEffort;
|
|
@@ -126,7 +126,14 @@ exports.SEEDED_RANDOMNESS = {
|
|
|
126
126
|
info: {
|
|
127
127
|
defaultConfig: {
|
|
128
128
|
randomnessProducers: [{ type: 'function', name: 'set.seed' }, { type: 'assignment', name: '.Random.seed' }],
|
|
129
|
-
randomnessConsumers: [
|
|
129
|
+
randomnessConsumers: [
|
|
130
|
+
'jitter', 'sample', 'sample.int', 'arima.sim', 'kmeans', 'princomp', 'rcauchy', 'rchisq', 'rexp',
|
|
131
|
+
'rgamma', 'rgeom', 'rlnorm', 'rlogis', 'rmultinom', 'rnbinom', 'rnorm', 'rpois', 'runif', 'pointLabel',
|
|
132
|
+
'some', 'rbernoulli', 'rdunif', 'generateSeedVectors',
|
|
133
|
+
'rbeta', 'rf', 'rhyper', 'rweibull', 'rt', 'rvonmises', 'rwilcox', 'rxor', 'rhyper', 'rmvnorm',
|
|
134
|
+
'rsignrank', 'randomForest',
|
|
135
|
+
'permuted', 'permute', 'shuffle', 'shuffleSet', 'data_shuffle', 'sample_frac', 'sample_n', 'slice_sample',
|
|
136
|
+
],
|
|
130
137
|
},
|
|
131
138
|
tags: [linter_tags_1.LintingRuleTag.Robustness, linter_tags_1.LintingRuleTag.Reproducibility],
|
|
132
139
|
// only finds proper randomness producers and consumers due to its config, but will not find all producers/consumers since not all existing deprecated functions will be in the config
|