@eagleoutice/flowr 2.2.2 → 2.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/repl/commands/repl-dataflow.js +7 -4
- package/cli/repl/commands/repl-parse.js +43 -2
- package/cli/repl/print-version.d.ts +1 -0
- package/cli/repl/print-version.js +7 -2
- package/cli/repl/server/connection.js +10 -8
- package/core/pipeline-executor.d.ts +6 -0
- package/core/pipeline-executor.js +8 -0
- package/core/print/dataflow-printer.js +3 -0
- package/core/steps/all/core/01-parse-tree-sitter.d.ts +7 -0
- package/core/steps/pipeline/default-pipelines.d.ts +57 -47
- package/core/steps/pipeline/default-pipelines.js +23 -2
- package/core/steps/pipeline/pipeline.d.ts +1 -1
- package/core/steps/pipeline/pipeline.js +1 -1
- package/core/steps/pipeline-step.d.ts +1 -3
- package/dataflow/environments/resolve-by-name.d.ts +3 -2
- package/dataflow/environments/resolve-by-name.js +4 -4
- package/dataflow/extractor.d.ts +10 -0
- package/dataflow/extractor.js +10 -0
- package/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.js +1 -1
- package/dataflow/internal/process/functions/call/built-in/built-in-source.js +20 -4
- package/documentation/doc-util/doc-dfg.d.ts +5 -3
- package/documentation/doc-util/doc-dfg.js +10 -8
- package/documentation/doc-util/doc-files.d.ts +1 -1
- package/documentation/doc-util/doc-files.js +1 -1
- package/documentation/doc-util/doc-normalized-ast.d.ts +2 -1
- package/documentation/doc-util/doc-normalized-ast.js +4 -5
- package/documentation/doc-util/doc-repl.d.ts +6 -2
- package/documentation/doc-util/doc-repl.js +10 -6
- package/documentation/doc-util/doc-structure.d.ts +1 -1
- package/documentation/doc-util/doc-types.d.ts +7 -5
- package/documentation/doc-util/doc-types.js +15 -10
- package/documentation/index.d.ts +9 -0
- package/documentation/index.js +26 -0
- package/documentation/print-capabilities-markdown.js +105 -19
- package/documentation/print-core-wiki.d.ts +1 -0
- package/documentation/print-core-wiki.js +406 -0
- package/documentation/print-dataflow-graph-wiki.js +27 -27
- package/documentation/print-interface-wiki.js +1 -1
- package/documentation/print-linting-and-testing-wiki.js +26 -8
- package/documentation/print-normalized-ast-wiki.js +22 -17
- package/documentation/print-query-wiki.js +7 -7
- package/documentation/print-search-wiki.js +2 -1
- package/package.json +3 -2
- package/queries/catalog/happens-before-query/happens-before-query-format.js +1 -1
- package/queries/catalog/resolve-value-query/resolve-value-query-executor.js +1 -1
- package/queries/catalog/resolve-value-query/resolve-value-query-format.js +1 -1
- package/queries/catalog/search-query/search-query-format.js +1 -1
- package/r-bridge/data/data.d.ts +48 -7
- package/r-bridge/data/data.js +62 -8
- package/r-bridge/data/types.d.ts +7 -1
- package/r-bridge/lang-4.x/ast/model/processing/decorate.d.ts +2 -0
- package/r-bridge/lang-4.x/ast/model/processing/node-id.js +2 -5
- package/r-bridge/lang-4.x/ast/parser/json/format.d.ts +6 -0
- package/r-bridge/lang-4.x/ast/parser/json/format.js +6 -0
- package/r-bridge/lang-4.x/ast/parser/json/parser.d.ts +13 -2
- package/r-bridge/lang-4.x/ast/parser/json/parser.js +19 -3
- package/r-bridge/lang-4.x/ast/parser/main/internal/structure/normalize-root.d.ts +3 -0
- package/r-bridge/lang-4.x/ast/parser/main/internal/structure/normalize-root.js +3 -0
- package/r-bridge/lang-4.x/tree-sitter/tree-sitter-normalize.js +6 -1
- package/r-bridge/parser.d.ts +10 -0
- package/r-bridge/parser.js +26 -2
- package/search/flowr-search-builder.d.ts +1 -2
- package/search/flowr-search-builder.js +1 -3
- package/util/mermaid/dfg.d.ts +3 -0
- package/util/mermaid/dfg.js +24 -8
- package/util/strings.d.ts +9 -0
- package/util/strings.js +14 -0
- package/util/version.js +1 -1
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const shell_1 = require("../r-bridge/shell");
|
|
7
|
+
const log_1 = require("../../test/functionality/_helper/log");
|
|
8
|
+
const log_2 = require("../util/log");
|
|
9
|
+
const doc_auto_gen_1 = require("./doc-util/doc-auto-gen");
|
|
10
|
+
const doc_structure_1 = require("./doc-util/doc-structure");
|
|
11
|
+
const doc_files_1 = require("./doc-util/doc-files");
|
|
12
|
+
const doc_cli_option_1 = require("./doc-util/doc-cli-option");
|
|
13
|
+
const doc_types_1 = require("./doc-util/doc-types");
|
|
14
|
+
const path_1 = __importDefault(require("path"));
|
|
15
|
+
const doc_code_1 = require("./doc-util/doc-code");
|
|
16
|
+
const extractor_1 = require("../dataflow/extractor");
|
|
17
|
+
const parser_1 = require("../r-bridge/parser");
|
|
18
|
+
const parser_2 = require("../r-bridge/lang-4.x/ast/parser/json/parser");
|
|
19
|
+
const doc_repl_1 = require("./doc-util/doc-repl");
|
|
20
|
+
const doc_dfg_1 = require("./doc-util/doc-dfg");
|
|
21
|
+
const doc_normalized_ast_1 = require("./doc-util/doc-normalized-ast");
|
|
22
|
+
const init_1 = require("../r-bridge/init");
|
|
23
|
+
const format_1 = require("../r-bridge/lang-4.x/ast/parser/json/format");
|
|
24
|
+
const normalize_root_1 = require("../r-bridge/lang-4.x/ast/parser/main/internal/structure/normalize-root");
|
|
25
|
+
const decorate_1 = require("../r-bridge/lang-4.x/ast/model/processing/decorate");
|
|
26
|
+
const process_uninteresting_leaf_1 = require("../dataflow/internal/process/process-uninteresting-leaf");
|
|
27
|
+
const built_in_access_1 = require("../dataflow/internal/process/functions/call/built-in/built-in-access");
|
|
28
|
+
const built_in_for_loop_1 = require("../dataflow/internal/process/functions/call/built-in/built-in-for-loop");
|
|
29
|
+
const built_in_repeat_loop_1 = require("../dataflow/internal/process/functions/call/built-in/built-in-repeat-loop");
|
|
30
|
+
const linker_1 = require("../dataflow/internal/linker");
|
|
31
|
+
const static_slicer_1 = require("../slicing/static/static-slicer");
|
|
32
|
+
const info_1 = require("../dataflow/info");
|
|
33
|
+
const processor_1 = require("../dataflow/processor");
|
|
34
|
+
const default_pipelines_1 = require("../core/steps/pipeline/default-pipelines");
|
|
35
|
+
const tree_sitter_executor_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitter-executor");
|
|
36
|
+
const retriever_1 = require("../r-bridge/retriever");
|
|
37
|
+
const json_1 = require("../util/json");
|
|
38
|
+
const stateful_fold_1 = require("../r-bridge/lang-4.x/ast/model/processing/stateful-fold");
|
|
39
|
+
const normalize_single_node_1 = require("../r-bridge/lang-4.x/ast/parser/main/internal/structure/normalize-single-node");
|
|
40
|
+
const normalize_if_then_1 = require("../r-bridge/lang-4.x/ast/parser/main/internal/control/normalize-if-then");
|
|
41
|
+
const normalize_for_1 = require("../r-bridge/lang-4.x/ast/parser/main/internal/loops/normalize-for");
|
|
42
|
+
const doc_issue_1 = require("./doc-util/doc-issue");
|
|
43
|
+
const pipeline_executor_1 = require("../core/pipeline-executor");
|
|
44
|
+
const pipeline_1 = require("../core/steps/pipeline/pipeline");
|
|
45
|
+
async function getText(shell) {
|
|
46
|
+
const rversion = (await shell.usedRVersion())?.format() ?? 'unknown';
|
|
47
|
+
const sampleCode = 'x <- 1; print(x)';
|
|
48
|
+
const { info, program } = (0, doc_types_1.getTypesFromFolderAsMermaid)({
|
|
49
|
+
rootFolder: path_1.default.resolve('./src'),
|
|
50
|
+
typeName: shell_1.RShell.name,
|
|
51
|
+
inlineTypes: doc_types_1.mermaidHide
|
|
52
|
+
});
|
|
53
|
+
return `${(0, doc_auto_gen_1.autoGenHeader)({ filename: module.filename, purpose: 'core', rVersion: rversion })}
|
|
54
|
+
|
|
55
|
+
This wiki page provides an overview of the inner workings of _flowR_.
|
|
56
|
+
It is mostly intended for developers that want to extend the capabilities of _flowR_
|
|
57
|
+
and assumes knowledge of [TypeScript](https://www.typescriptlang.org/) and [R](https://www.r-project.org/).
|
|
58
|
+
If you think parts of the wiki are missing, wrong, or outdated, please do not hesitate to [open a new issue](${doc_issue_1.NewIssueUrl})!
|
|
59
|
+
In case you are new and want to develop for flowR, please check out the relevant [Setup](${doc_files_1.FlowrWikiBaseRef}/Setup#-developing-for-flowr) wiki page
|
|
60
|
+
and the [Contributing Guidelines](${doc_files_1.RemoteFlowrFilePathBaseRef}/.github/CONTRIBUTING.md).
|
|
61
|
+
|
|
62
|
+
${(0, doc_structure_1.block)({
|
|
63
|
+
type: 'NOTE',
|
|
64
|
+
content: `
|
|
65
|
+
Essentially every step we explain here can be explored directly from flowR's REPL in an interactive fashion (see the [Interface](${doc_files_1.FlowrWikiBaseRef}/Interface#using-the-repl) wiki page).
|
|
66
|
+
We recommend to use commands like ${(0, doc_cli_option_1.getReplCommand)('parse')} or ${(0, doc_cli_option_1.getReplCommand)('dataflow*')} to explore the output of flowR using your own samples.
|
|
67
|
+
As a quickstart you may use:
|
|
68
|
+
|
|
69
|
+
${await (0, doc_repl_1.documentReplSession)(shell, [{
|
|
70
|
+
command: `:parse "${sampleCode}"`,
|
|
71
|
+
description: `Retrieves the AST from the ${(0, doc_types_1.shortLink)(shell_1.RShell.name, info)}.`
|
|
72
|
+
}])}
|
|
73
|
+
|
|
74
|
+
If you are brave (or desperate) enough, you can also try to use the ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'verbose')} option to be dumped with information about flowR's internals (please, never use this for benchmarking).
|
|
75
|
+
See the [Getting flowR to Talk](#getting-flowr-to-talk) section below for more information.
|
|
76
|
+
`
|
|
77
|
+
})}
|
|
78
|
+
|
|
79
|
+
* [Pipelines and their Execution](#pipelines-and-their-execution)
|
|
80
|
+
* [How flowR Produces Dataflow Graphs](#how-flowr-produces-dataflow-graphs)
|
|
81
|
+
* [Overview](#overview)
|
|
82
|
+
* [Parsing](#parsing)
|
|
83
|
+
* [Normalization](#normalization)
|
|
84
|
+
* [Dataflow Graph Generation](#dataflow-graph-generation)
|
|
85
|
+
* [Beyond the Dataflow Graph](#beyond-the-dataflow-graph)
|
|
86
|
+
* [Static Backward Slicing](#static-backward-slicing)
|
|
87
|
+
* [Getting flowR to Talk](#getting-flowr-to-talk)
|
|
88
|
+
|
|
89
|
+
## Pipelines and their Execution
|
|
90
|
+
|
|
91
|
+
At the core of every analysis by flowR is the ${(0, doc_types_1.shortLink)(pipeline_executor_1.PipelineExecutor.name, info)} class which takes a sequence of analysis steps (in the form of a ${(0, doc_types_1.shortLink)('Pipeline', info)}) and executes it
|
|
92
|
+
on a given input. In general, these pipeline steps are analysis agnostic and may use arbitrary input and ordering. However, two important and predefined pipelines,
|
|
93
|
+
the ${(0, doc_types_1.shortLink)('DEFAULT_DATAFLOW_PIPELINE', info)} and the ${(0, doc_types_1.shortLink)('TREE_SITTER_DATAFLOW_PIPELINE', info)} adequately cover the most common analysis steps
|
|
94
|
+
(differentiated only by the [Engine](${doc_files_1.FlowrWikiBaseRef}/Engines) used).
|
|
95
|
+
|
|
96
|
+
${(0, doc_structure_1.block)({
|
|
97
|
+
type: 'TIP',
|
|
98
|
+
content: `
|
|
99
|
+
You can hover over most links within these wiki pages to get access to the tsdoc comment of the respective element.
|
|
100
|
+
The links should direct you to the up-to-date implementation.
|
|
101
|
+
`
|
|
102
|
+
})}
|
|
103
|
+
|
|
104
|
+
Using the [\`tree-sitter\` engine](${doc_files_1.FlowrWikiBaseRef}/Engines) you can request a dataflow analysis of a sample piece of R code like the following:
|
|
105
|
+
|
|
106
|
+
${(0, doc_code_1.codeBlock)('typescript', `
|
|
107
|
+
const executor = new PipelineExecutor(TREE_SITTER_DATAFLOW_PIPELINE, {
|
|
108
|
+
parser: new TreeSitterExecutor(),
|
|
109
|
+
request: requestFromInput('x <- 1; y <- x; print(y);')
|
|
110
|
+
});
|
|
111
|
+
const result = await executor.allRemainingSteps();
|
|
112
|
+
`)}
|
|
113
|
+
|
|
114
|
+
This is, roughly, what the ${(0, doc_types_1.shortLink)('replGetDataflow', info)} function does for the ${(0, doc_cli_option_1.getReplCommand)('dataflow')} REPL command when using the [\`tree-sitter\` engine](${doc_files_1.FlowrWikiBaseRef}/Engines).
|
|
115
|
+
We create a new ${(0, doc_types_1.shortLink)(pipeline_executor_1.PipelineExecutor.name, info)} with the ${(0, doc_types_1.shortLink)('TREE_SITTER_DATAFLOW_PIPELINE', info)} and then use ${(0, doc_types_1.shortLink)(`${pipeline_executor_1.PipelineExecutor.name}::${new pipeline_executor_1.PipelineExecutor(default_pipelines_1.TREE_SITTER_PARSE_PIPELINE, { parser: new tree_sitter_executor_1.TreeSitterExecutor(), request: (0, retriever_1.requestFromInput)('') }).allRemainingSteps.name}`, info)}
|
|
116
|
+
to cause the execution of all contained steps (in general, pipelines can be executed step-by-step, but this is usually not required if you just want the result).
|
|
117
|
+
${(0, doc_types_1.shortLink)(retriever_1.requestFromInput.name, info)} is merely a convenience function to create a request object from a code string.
|
|
118
|
+
|
|
119
|
+
In general, however, most flowR-internal functions which are tasked with generating dataflow prefer the use of ${(0, doc_types_1.shortLink)(default_pipelines_1.createDataflowPipeline.name, info)} as this function
|
|
120
|
+
automatically selects the correct pipeline based on the engine used.
|
|
121
|
+
|
|
122
|
+
### Understanding Pipeline Steps
|
|
123
|
+
|
|
124
|
+
Everything that complies to the ${(0, doc_types_1.shortLink)('IPipelineStep', info)} interface can be used as a step in a pipeline, with the most important definition being the
|
|
125
|
+
\`processor\` function, which refers to the actual work performed by the step.
|
|
126
|
+
For example, the ${(0, doc_types_1.shortLink)('STATIC_DATAFLOW', info)} step ultimately relies on the ${(0, doc_types_1.shortLink)(extractor_1.produceDataFlowGraph.name, info)} function to create a [dataflow graph](${doc_files_1.FlowrWikiBaseRef}/Dataflow-Graph)
|
|
127
|
+
using the [normalized AST](${doc_files_1.FlowrWikiBaseRef}/Normalized-AST) of the program.
|
|
128
|
+
|
|
129
|
+
### Shape of a Pipeline Step
|
|
130
|
+
|
|
131
|
+
Using code, you can provide an arbitrary pipeline step to the executor, as long as it implements the ${(0, doc_types_1.shortLink)('IPipelineStep', info)} interface:
|
|
132
|
+
|
|
133
|
+
${(0, doc_types_1.printHierarchy)({ program, info, root: 'IPipelineStep', maxDepth: 0 })}
|
|
134
|
+
|
|
135
|
+
Every step may specify required inputs, ways of visualizing the output, and its dependencies using the ${(0, doc_types_1.shortLink)('IPipelineStepOrder', info)} interface.
|
|
136
|
+
As the types may seem to be somewhat confusing or over-complicated, we recommend you to look at some existing steps, like
|
|
137
|
+
the ${(0, doc_types_1.shortLink)('PARSE_WITH_R_SHELL_STEP', info)} or the ${(0, doc_types_1.shortLink)('STATIC_DATAFLOW', info)} step.
|
|
138
|
+
The pipeline executor should do a good job of scheduling these steps (usually using a topological sort), and inferring the required inputs in the type system (have a look at the ${(0, doc_types_1.shortLink)(pipeline_1.createPipeline.name, info)} function if you want to know more).
|
|
139
|
+
|
|
140
|
+
${(0, doc_structure_1.block)({
|
|
141
|
+
type: 'NOTE',
|
|
142
|
+
content: `
|
|
143
|
+
Under the hood there is a step-subtype called a decoration. Such a step can be added to a pipeline to decorate the output of another one (e.g., making it more precise, re-adding debug info, ...).
|
|
144
|
+
To mark a step as a decoration, you can use the \`decorates\` field in the ${(0, doc_types_1.shortLink)('IPipelineStepOrder', info)} interface.
|
|
145
|
+
However, as such steps are currently not relevant for any of flowR's core analyses we will not go into detail here. It suffices to know how "real" steps work.
|
|
146
|
+
`
|
|
147
|
+
})}
|
|
148
|
+
|
|
149
|
+
## How flowR Produces Dataflow Graphs
|
|
150
|
+
|
|
151
|
+
This section focuses on the generation of a [dataflow graph](${doc_files_1.FlowrWikiBaseRef}/Dataflow-Graph) from a given R program, using the [RShell Engine](${doc_files_1.FlowrWikiBaseRef}/Engines) and hence the
|
|
152
|
+
${(0, doc_types_1.shortLink)('DEFAULT_DATAFLOW_PIPELINE', info)}. The [\`tree-sitter\` engine](${doc_files_1.FlowrWikiBaseRef}/Engines) uses the ${(0, doc_types_1.shortLink)('TREE_SITTER_DATAFLOW_PIPELINE', info)}),
|
|
153
|
+
which replaces the parser with the integrated tree-sitter parser and hence uses a slightly adapted normalization step to produce a similar [normalized AST](${doc_files_1.FlowrWikiBaseRef}/Normalized-AST).
|
|
154
|
+
The [dataflow graph](${doc_files_1.FlowrWikiBaseRef}/Dataflow-Graph) should be the same for both engines (although [\`tree-sitter\`](${doc_files_1.FlowrWikiBaseRef}/Engines) is faster and may be able to parse more files).
|
|
155
|
+
|
|
156
|
+
### Overview
|
|
157
|
+
|
|
158
|
+
Let's have a look at the definition of the pipeline:
|
|
159
|
+
|
|
160
|
+
${(0, doc_types_1.printHierarchy)({ program, info, root: 'DEFAULT_DATAFLOW_PIPELINE', maxDepth: 0 })}
|
|
161
|
+
|
|
162
|
+
We can see that it relies on three steps:
|
|
163
|
+
|
|
164
|
+
1. **${(0, doc_types_1.shortLink)('PARSE_WITH_R_SHELL_STEP', info, false)}** ([parsing](#parsing)): Uses the ${(0, doc_types_1.shortLink)(shell_1.RShell.name, info)} to parse the input program.\\
|
|
165
|
+
_Its main function linked as the processor is the ${(0, doc_types_1.shortLink)(parser_1.parseRequests.name, info, false)} function._
|
|
166
|
+
2. **${(0, doc_types_1.shortLink)('NORMALIZE', info, false)}** ([normalization](#normalization)): Normalizes the AST produced by the parser (to create a [normalized AST](${doc_files_1.FlowrWikiBaseRef}/Normalized-AST)).\\
|
|
167
|
+
_Its main function linked as the processor is the ${(0, doc_types_1.shortLink)(parser_2.normalize.name, info, false)} function._
|
|
168
|
+
3. **${(0, doc_types_1.shortLink)('STATIC_DATAFLOW', info, false)}** ([dataflow](#dataflow-graph-generation)): Produces the actual [dataflow graph](${doc_files_1.FlowrWikiBaseRef}/Dataflow-Graph) from the normalized AST.\\
|
|
169
|
+
_Its main function linked as the processor is the ${(0, doc_types_1.shortLink)(extractor_1.produceDataFlowGraph.name, info, false)} function._
|
|
170
|
+
|
|
171
|
+
To explore these steps, let's use the REPL with the (very simple and contrived) R code: \`${sampleCode}\`.
|
|
172
|
+
|
|
173
|
+
${await (0, doc_repl_1.documentReplSession)(shell, [{
|
|
174
|
+
command: `:parse "${sampleCode}"`,
|
|
175
|
+
description: `This shows the ASCII-Art representation of the parse-tree of the R code \`${sampleCode}\`, as it is provided by the ${(0, doc_types_1.shortLink)(shell_1.RShell.name, info)}. See the ${(0, doc_types_1.shortLink)(init_1.initCommand.name, info)} function for more information on how we request a parse.`
|
|
176
|
+
},
|
|
177
|
+
{
|
|
178
|
+
command: `:normalize* "${sampleCode}"`,
|
|
179
|
+
description: `Following the link output should show the following:\n${await (0, doc_normalized_ast_1.printNormalizedAstForCode)(shell, sampleCode, { showCode: false })}`
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
command: `:dataflow* "${sampleCode}"`,
|
|
183
|
+
description: `Following the link output should show the following:\n${await (0, doc_dfg_1.printDfGraphForCode)(shell, sampleCode, { showCode: false })}`
|
|
184
|
+
}
|
|
185
|
+
], { openOutput: false })}
|
|
186
|
+
|
|
187
|
+
Especially when you are just starting with flowR, we recommend to use the REPL to explore the output of the different steps.
|
|
188
|
+
|
|
189
|
+
${(0, doc_structure_1.block)({
|
|
190
|
+
type: 'NOTE',
|
|
191
|
+
content: 'Maybe you are left with the question: What is tree-sitter doing differently? Expand the following to get more information!\n\n' + (0, doc_structure_1.details)('And what changes with tree-sitter?', `
|
|
192
|
+
|
|
193
|
+
Essentially not much (from a user perspective, it does essentially everything and all differently under the hood)! Have a look at the [Engines](${doc_files_1.FlowrWikiBaseRef}/Engines) wiki page for more information on the differences between the engines.
|
|
194
|
+
Below you can see the Repl commands for the tree-sitter engine (using ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'default-engine')} to set the engine to tree-sitter):
|
|
195
|
+
|
|
196
|
+
${await (async () => {
|
|
197
|
+
const exec = new tree_sitter_executor_1.TreeSitterExecutor();
|
|
198
|
+
return await (0, doc_repl_1.documentReplSession)(exec, [{
|
|
199
|
+
command: `:parse "${sampleCode}"`,
|
|
200
|
+
description: `This shows the ASCII-Art representation of the parse-tree of the R code \`${sampleCode}\`, as it is provided by the ${(0, doc_types_1.shortLink)(tree_sitter_executor_1.TreeSitterExecutor.name, info)}. See the [Engines](${doc_files_1.FlowrWikiBaseRef}/Engines) wiki page for more information on the differences between the engines.`
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
command: `:normalize* "${sampleCode}"`,
|
|
204
|
+
description: `Following the link output should show the following:\n${await (0, doc_normalized_ast_1.printNormalizedAstForCode)(exec, sampleCode, { showCode: false })}`
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
command: `:dataflow* "${sampleCode}"`,
|
|
208
|
+
description: `Following the link output should show the following:\n${await (0, doc_dfg_1.printDfGraphForCode)(exec, sampleCode, { showCode: false })}`
|
|
209
|
+
}], { openOutput: false, args: '--default-engine tree-sitter' });
|
|
210
|
+
})()}
|
|
211
|
+
`)
|
|
212
|
+
})}
|
|
213
|
+
|
|
214
|
+
### Parsing
|
|
215
|
+
|
|
216
|
+
The parsing step uses the ${(0, doc_types_1.shortLink)(shell_1.RShell.name, info)} to parse the input program (or, of course, the ${(0, doc_types_1.shortLink)(tree_sitter_executor_1.TreeSitterExecutor.name, info)} when using the [\`tree-sitter\` engine](${doc_files_1.FlowrWikiBaseRef}/Engines)).
|
|
217
|
+
To speed up the process, we use the ${(0, doc_types_1.shortLink)(init_1.initCommand.name, info)} function to compile the parsing function and rely on a
|
|
218
|
+
custom serialization, which outputs the information in a CSV-like format.
|
|
219
|
+
This means, that the ${(0, doc_cli_option_1.getReplCommand)('parse')} command actually kind-of lies to you, as it does pretty print the serialized version which looks more like the following (this uses the ${(0, doc_types_1.shortLink)(retriever_1.retrieveParseDataFromRCode.name, info)} function with the sample code \`${sampleCode}\`):
|
|
220
|
+
|
|
221
|
+
${(0, doc_structure_1.details)(`Raw parse output for <code>${sampleCode}</code>`, `For the code \`${sampleCode}\`:\n\n` + (0, doc_code_1.codeBlock)('csv', await (0, retriever_1.retrieveParseDataFromRCode)((0, retriever_1.requestFromInput)(sampleCode), shell)))}
|
|
222
|
+
|
|
223
|
+
Beautiful, right? I thought so too! In fact, the output is a little bit nicer, when we put it into a table-format and add the appropriate headers:
|
|
224
|
+
|
|
225
|
+
<details open>
|
|
226
|
+
<summary>Parse output in table format</summary>
|
|
227
|
+
|
|
228
|
+
For the code \`${sampleCode}\`:
|
|
229
|
+
|
|
230
|
+
| line-start | col-start | line-end | col-end | id | parent | token type | terminal | text |
|
|
231
|
+
| ---------: | --------: | -------: | ------: | -: | -----: | ---------- | -------- | ---- |
|
|
232
|
+
${await (0, retriever_1.retrieveParseDataFromRCode)((0, retriever_1.requestFromInput)(sampleCode), shell).then(data => JSON.parse('[' + data + ']').map(([line1, col1, line2, col2, id, parent, type, terminal, text]) => `| ${line1} | ${col1} | ${line2} | ${col2} | ${id} | ${parent} | \`${type}\` | ${terminal} | ${text} |`).join('\n'))}
|
|
233
|
+
|
|
234
|
+
</details>
|
|
235
|
+
|
|
236
|
+
In fact, this data is merely what R's [\`base::parse\`](https://stat.ethz.ch/R-manual/R-devel/library/base/html/parse.html) and [\`utils::getParseData\`](https://stat.ethz.ch/R-manual/R-devel/library/utils/html/getParseData.html) functions provide.
|
|
237
|
+
We then use this data in the [normalization](#normalization) step to create a [normalized AST](${doc_files_1.FlowrWikiBaseRef}/Normalized-AST).
|
|
238
|
+
|
|
239
|
+
If you are interested in the raw token types that we may encounter, have a look at the ${(0, doc_types_1.shortLink)('RawRType', info)} enum.
|
|
240
|
+
|
|
241
|
+
### Normalization
|
|
242
|
+
|
|
243
|
+
The normalization function ${(0, doc_types_1.shortLink)(parser_2.normalize.name, info)} takes the output from the previous steps and uses the ${(0, doc_types_1.shortLink)(format_1.prepareParsedData.name, info)} and
|
|
244
|
+
${(0, doc_types_1.shortLink)(format_1.convertPreparedParsedData.name, info)} functions to first transform the serialized parsing output to an object.
|
|
245
|
+
Next, ${(0, doc_types_1.shortLink)(normalize_root_1.normalizeRootObjToAst.name, info)} transforms this object to a normalized AST and ${(0, doc_types_1.shortLink)(decorate_1.decorateAst.name, info)} adds additional information to the AST (like roles, ids, depth, etc.).
|
|
246
|
+
While looking at the mermaid visualization of such an AST is nice and usually sufficient, looking at the objects themselves shows you the full range of information the AST provides (all encompassed within the ${(0, doc_types_1.shortLink)('RNode', info)} type).
|
|
247
|
+
|
|
248
|
+
Let's have a look at the normalized AST for the sample code \`${sampleCode}\` (please refer to the [normalized AST](${doc_files_1.FlowrWikiBaseRef}/Normalized-AST) wiki page for more information):
|
|
249
|
+
|
|
250
|
+
${(0, doc_structure_1.details)('Normalized AST for <code>x <- 1; print(x)</code>', (0, doc_code_1.codeBlock)('json', JSON.stringify((await (0, default_pipelines_1.createNormalizePipeline)(shell, { request: (0, retriever_1.requestFromInput)(sampleCode) }).allRemainingSteps()).normalize.ast, json_1.jsonReplacer, 4)))}
|
|
251
|
+
|
|
252
|
+
This is… a lot! We get the type from the ${(0, doc_types_1.shortLink)('RType', info)} enum, the lexeme, location information, an id, the children of the node, and their parents.
|
|
253
|
+
While the [normalized AST](${doc_files_1.FlowrWikiBaseRef}/Normalized-AST) wiki page provides you with information on how to interpret this data, we will focus on how we get it from the
|
|
254
|
+
table provided by the [parsing](#parsing) step.
|
|
255
|
+
|
|
256
|
+
There are two important functions: ${(0, doc_types_1.shortLink)(normalize_root_1.normalizeRootObjToAst.name, info)}, which operates on the parse-output already transformed into a tree-like structure,
|
|
257
|
+
and ${(0, doc_types_1.shortLink)(decorate_1.decorateAst.name, info)}, which adds additional information to the AST.
|
|
258
|
+
Both follow a [fold](https://en.wikipedia.org/wiki/Fold_(higher-order_function)) pattern.
|
|
259
|
+
The fold is explicit for ${(0, doc_types_1.shortLink)(decorate_1.decorateAst.name, info)}, which directly relies on the ${(0, doc_types_1.shortLink)(stateful_fold_1.foldAstStateful.name, info)} function,
|
|
260
|
+
while ${(0, doc_types_1.shortLink)(normalize_root_1.normalizeRootObjToAst.name, info)} uses the fold-idiom but deviates in cases in which (for example) we require more information on other nodes to know what it should be normalized too.
|
|
261
|
+
|
|
262
|
+
#### Normalizing the Object
|
|
263
|
+
|
|
264
|
+
We have a handler for everything. For example ${(0, doc_types_1.shortLink)(normalize_if_then_1.tryNormalizeIfThen.name, info)} or ${(0, doc_types_1.shortLink)(normalize_for_1.tryNormalizeFor.name, info)} to handle \`if(x) y\` or \`for(i in 1:10) x\` constructs.
|
|
265
|
+
All of these handlers contain many sanity checks to be sure that we talk to an ${(0, doc_types_1.shortLink)('RShell', info)} which we can handle (as assumptions may break with newer versions).
|
|
266
|
+
These functions contain the keyword \`try\` as they may fail. For example, whenever they notice late into normalization that they should actually be a different construct (R is great).
|
|
267
|
+
For single nodes, we use ${(0, doc_types_1.shortLink)(normalize_single_node_1.normalizeSingleNode.name, info)} which contains a catch-all for some edge-cases in the R grammar.
|
|
268
|
+
|
|
269
|
+
The output of just this pass is listed below (using the ${(0, doc_types_1.shortLink)(parser_2.normalizeButNotDecorated.name, info)} function):
|
|
270
|
+
|
|
271
|
+
${(0, doc_structure_1.details)('Ast for <code>x <- 1; print(x)</code> after the first normalization', (0, doc_code_1.codeBlock)('json', JSON.stringify((0, parser_2.normalizeButNotDecorated)((await (0, default_pipelines_1.createParsePipeline)(shell, { request: (0, retriever_1.requestFromInput)(sampleCode) }).allRemainingSteps()).parse), json_1.jsonReplacer, 4)))}
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
#### Decorating the AST
|
|
275
|
+
|
|
276
|
+
The decoration is comparatively trivial. We take the AST throw it into the ${(0, doc_types_1.shortLink)(decorate_1.decorateAst.name, info)} function (which again, handles each normalized node type) and
|
|
277
|
+
get:
|
|
278
|
+
|
|
279
|
+
1. The AST with ids, roles, and depth information (see the [normalized AST](${doc_files_1.FlowrWikiBaseRef}/Normalized-AST) wiki page for more information).
|
|
280
|
+
2. A mapping of ids to nodes in the form of a ${(0, doc_types_1.shortLink)('AstIdMap', info)} object. This allows us to quickly access nodes by their id.
|
|
281
|
+
|
|
282
|
+
The ids used for the AST generation are arbitrary (usually created by the ${(0, doc_types_1.shortLink)(decorate_1.deterministicCountingIdGenerator.name, info)}) function) but unique and intentionally
|
|
283
|
+
separated from the ids used by the R parser. For one, this detaches us from the [Engine](${doc_files_1.FlowrWikiBaseRef}/Engines) used, and secondly, it allows for much easier
|
|
284
|
+
extension of the AST (e.g., when R files use [\`base::source\`](https://stat.ethz.ch/R-manual/R-devel/library/base/html/source.html) to include other R files).
|
|
285
|
+
All ids conform to the ${(0, doc_types_1.shortLink)('NodeId', info)} type.
|
|
286
|
+
|
|
287
|
+
### Dataflow Graph Generation
|
|
288
|
+
|
|
289
|
+
The core of the dataflow graph generation works as a "stateful [fold](https://en.wikipedia.org/wiki/Fold_(higher-order_function))",
|
|
290
|
+
which uses the tree-like structure of the AST to combine the dataflow information of the children, while tracking the currently active variables and control flow
|
|
291
|
+
information as a “backpack” (state).
|
|
292
|
+
We use the ${(0, doc_types_1.shortLink)(extractor_1.produceDataFlowGraph.name, info)} function as an entry point to the dataflow generation (the actual fold entry is in ${(0, doc_types_1.shortLink)(processor_1.processDataflowFor.name, info)}).
|
|
293
|
+
The function is mainly backed by its ${(0, doc_types_1.shortLink)('processors', info)} object which maps each type in the normalized AST to an appropriate handler ("fold-function").
|
|
294
|
+
|
|
295
|
+
To understand these handlers, let's start with the simplest one, ${(0, doc_types_1.shortLink)(process_uninteresting_leaf_1.processUninterestingLeaf.name, info)} signals that
|
|
296
|
+
we do not care about this node and just produce an empty dataflow information (using ${(0, doc_types_1.shortLink)(info_1.initializeCleanDataflowInformation.name, info)}).
|
|
297
|
+
Looking at the function showcases the general structure of a processor:
|
|
298
|
+
|
|
299
|
+
${(0, doc_types_1.printHierarchy)({ program, info, root: 'processUninterestingLeaf', maxDepth: 2, openTop: true })}
|
|
300
|
+
|
|
301
|
+
Every processor has the same shape. It takes the normalized node (see the [normalized AST](${doc_files_1.FlowrWikiBaseRef}/Normalized-AST) for more information),
|
|
302
|
+
and a ${(0, doc_types_1.shortLink)('DataflowProcessorInformation', info)} object which, as some kind of "backpack" carries global information
|
|
303
|
+
to every handler.
|
|
304
|
+
This information is to be used to create a ${(0, doc_types_1.shortLink)('DataflowInformation', info)}:
|
|
305
|
+
|
|
306
|
+
${(0, doc_types_1.printHierarchy)({ program, info, root: 'DataflowInformation', maxDepth: 2 })}
|
|
307
|
+
|
|
308
|
+
Essentially, these processors should use the dataflow information from their children combined with their own semantics
|
|
309
|
+
to produce a new dataflow information to pass upwards in the fold. The ${(0, doc_types_1.shortLink)('DataflowInformation', info)} contains:
|
|
310
|
+
|
|
311
|
+
* the ${(0, doc_types_1.shortLink)('DataflowGraph', info)} of the current subtree
|
|
312
|
+
* the currently active ${(0, doc_types_1.shortLink)('REnvironmentInformation', info)} as an abstraction of all active definitions linking to potential definition locations (see [Advanced R::Environments](https://adv-r.hadley.nz/environments.html))
|
|
313
|
+
* control flow information in ${(0, doc_types_1.shortLink)('DataflowCfgInformation', info)} which is used to enrich the dataflow information with control flow information
|
|
314
|
+
* and sets of currently ingoing (read), outgoing (write) and unknown ${(0, doc_types_1.shortLink)('IdentifierReference', info)}s.
|
|
315
|
+
|
|
316
|
+
While all of them are essentially empty when processing an “uninteresting leaf”, handling a constant is slightly more interesting with ${(0, doc_types_1.shortLink)('processValue', info)}:
|
|
317
|
+
|
|
318
|
+
${(0, doc_types_1.printHierarchy)({ program, info, root: 'processValue', maxDepth: 2, openTop: true })}
|
|
319
|
+
|
|
320
|
+
Please note, that we add the [value vertex](${doc_files_1.FlowrWikiBaseRef}/Dataflow-Graph#value-vertex) to the newly created dataflow graph,
|
|
321
|
+
which holds a reference to the constant. If you are confused with the use of the ${(0, doc_types_1.shortLink)('ParentInformation', info)} type,
|
|
322
|
+
this stems from the [AST decoration](#normalization) and signals that we have a decorated ${(0, doc_types_1.shortLink)('RNode', info)} (which may have additional information in \`OtherInfo\`).
|
|
323
|
+
|
|
324
|
+
Yet again, this is not very interesting. When looking at the ${(0, doc_types_1.shortLink)('processors', info)} object you may be confused by
|
|
325
|
+
many lines just mapping the node to the ${(0, doc_types_1.shortLink)('processAsNamedCall', info)} function.
|
|
326
|
+
This is because during the dataflow analysis we actually "desugar" the AST, and treat syntax constructs like binary operators (e.g., \`x + y\`) as function calls (e.g. \`\` \`+\`(x, y) \`\`).
|
|
327
|
+
We do this, because R does it the same way, and allows to even overwrite these operators (including \`if\`, \`<-\`, etc.) by their name.
|
|
328
|
+
By treating them like R, as function calls, we get support for these overwrites for free, courtesy of flowR's call resolution.
|
|
329
|
+
|
|
330
|
+
But where are all the interesting things handled then?
|
|
331
|
+
For that, we want to have a look at the built-in environment, which can be freely configured using flowR's [configuration system](${doc_files_1.FlowrWikiBaseRef}/Interface#configuring-flowr).
|
|
332
|
+
FlowR's heart and soul resides in the ${(0, doc_types_1.shortLink)('DefaultBuiltinConfig', info)} object, which is used to configure the built-in environment
|
|
333
|
+
by mapping function names to ${(0, doc_types_1.shortLink)('BuiltInProcessorMapper', info)} functions.
|
|
334
|
+
There you can find functions like ${(0, doc_types_1.shortLink)(built_in_access_1.processAccess.name, info)} which handles the (subset) access to a variable,
|
|
335
|
+
or ${(0, doc_types_1.shortLink)(built_in_for_loop_1.processForLoop.name, info)} which handles the primitive for loop construct (whenever it is not overwritten).
|
|
336
|
+
|
|
337
|
+
Just as an example, we want to have a look at the ${(0, doc_types_1.shortLink)(built_in_repeat_loop_1.processRepeatLoop.name, info)} function, as it is one of the simplest built-in processors
|
|
338
|
+
we have:
|
|
339
|
+
|
|
340
|
+
${(0, doc_types_1.printHierarchy)({ program, info, root: 'processRepeatLoop', maxDepth: 2, openTop: true })}
|
|
341
|
+
|
|
342
|
+
Similar to any other built-in processor, we get the name of the function call which caused us to land here,
|
|
343
|
+
as well as the passed arguments. The \`rootId\` refers to what caused the call to happen (and is usually just the function call),
|
|
344
|
+
while \`data\` is our good old backpack, carrying all the information we need to produce a dataflow graph.
|
|
345
|
+
|
|
346
|
+
After a couple of common sanity checks at the beginning which we use to check whether the repeat loop is used in a way that we expect,
|
|
347
|
+
we start by issuing the fold continuation by processing its arguments. Given we expect \`repeat <body>\`, we expect only a single argument.
|
|
348
|
+
During the processing we make sure to stitch in the correct control dependencies, adding the repeat loop to the mix.
|
|
349
|
+
For just the repeat loop the stitching is actually not necessary, but this way the handling is consistent for all looping constructs.
|
|
350
|
+
|
|
351
|
+
Afterward, we take the \`processedArguments\`, perform another round of sanity checks and then use two special functions to apply the
|
|
352
|
+
semantic effects of the repeat loop. We first use one of flowR's linkers to
|
|
353
|
+
${(0, doc_types_1.shortLink)(linker_1.linkCircularRedefinitionsWithinALoop.name, info)} and then retrieve the active exit points with ${(0, doc_types_1.shortLink)(info_1.filterOutLoopExitPoints.name, info)}.
|
|
354
|
+
|
|
355
|
+
Feel free to have a look around and explore the other handlers for now. Each of them uses the results of its children alongside the active backpack
|
|
356
|
+
to produce a new dataflow information.
|
|
357
|
+
|
|
358
|
+
## Beyond the Dataflow Graph
|
|
359
|
+
|
|
360
|
+
Given the [dataflow graph](${doc_files_1.FlowrWikiBaseRef}/Dataflow-Graph), you can do a lot more!
|
|
361
|
+
You can issue [queries](${doc_files_1.FlowrWikiBaseRef}/Query-API) to explore the graph, [search](${doc_files_1.FlowrWikiBaseRef}/Search-API) for specific elements, or, for example, request a [static backward slice](#static-backward-slicing).
|
|
362
|
+
Of course, all of these endeavors work not just with the ${(0, doc_types_1.shortLink)(shell_1.RShell.name, info)} but also with the [\`tree-sitter\` engine](${doc_files_1.FlowrWikiBaseRef}/Engines).
|
|
363
|
+
|
|
364
|
+
### Static Backward Slicing
|
|
365
|
+
|
|
366
|
+
The slicing is available as an extra step as you can see by inspecting he ${(0, doc_types_1.shortLink)('DEFAULT_SLICING_PIPELINE', info)}.
|
|
367
|
+
Besides ${(0, doc_types_1.shortLink)('STATIC_SLICE', info)} it contains a ${(0, doc_types_1.shortLink)('NAIVE_RECONSTRUCT', info)} to print the slice as (executable) R code.
|
|
368
|
+
|
|
369
|
+
Your main point of interesting here is the ${(0, doc_types_1.shortLink)(static_slicer_1.staticSlicing.name, info)} function which relies on a modified
|
|
370
|
+
breadth-first search to collect all nodes which are part of the slice.
|
|
371
|
+
For more information on how the slicing works, please refer to the [tool demonstration (Section 3.2)](https://doi.org/10.1145/3691620.3695359),
|
|
372
|
+
or the [original master's thesis (Chapter 4)](https://doi.org/10.18725/OPARU-50107).
|
|
373
|
+
|
|
374
|
+
You can explore the slicing using the REPL with the ${(0, doc_cli_option_1.getReplCommand)('slicer')} command:
|
|
375
|
+
|
|
376
|
+
${await (0, doc_repl_1.documentReplSession)(shell, [{
|
|
377
|
+
command: ':slicer test/testfiles/example.R --criterion "12@product"',
|
|
378
|
+
description: 'Slice for the example file for the variable "prod" in line 12.'
|
|
379
|
+
}], { openOutput: true })}
|
|
380
|
+
|
|
381
|
+
## Helpful Things
|
|
382
|
+
|
|
383
|
+
### Getting flowR to Talk
|
|
384
|
+
|
|
385
|
+
When using flowR from the CLI, you can use the ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'verbose')} option to get more information about what flowR is doing.
|
|
386
|
+
While coding, however, you can use the ${log_1.setMinLevelOfAllLogs.name} function to set the minimum level of logs to be displayed (this works with the ${(0, doc_types_1.shortLink)(log_2.FlowrLogger.name, info)} abstraction).
|
|
387
|
+
In general, you can configure the levels of individual logs, such as the general \`log\` (obtained with ${(0, doc_types_1.shortLink)('getActiveLog', info)}) or the ${(0, doc_types_1.shortLink)('parseLog', info)}.
|
|
388
|
+
Please note that flowR makes no guarantees that log outputs are persistent across versions, and it is up to the implementors to provide sensible logging.
|
|
389
|
+
If you are an implementor and want to add logging, please make sure that there are no larger runtime impliciations when logging is disabled.
|
|
390
|
+
Have a look at the ${(0, doc_types_1.shortLink)(log_2.expensiveTrace.name, info)} function for example, which uses a function to generate the log message only when the log level is reached.
|
|
391
|
+
|
|
392
|
+
`;
|
|
393
|
+
}
|
|
394
|
+
/** if we run this script, we want a Markdown representation of the capabilities */
|
|
395
|
+
if (require.main === module) {
|
|
396
|
+
void tree_sitter_executor_1.TreeSitterExecutor.initTreeSitter().then(() => {
|
|
397
|
+
(0, log_1.setMinLevelOfAllLogs)(6 /* LogLevel.Fatal */);
|
|
398
|
+
const shell = new shell_1.RShell();
|
|
399
|
+
void getText(shell).then(str => {
|
|
400
|
+
console.log(str);
|
|
401
|
+
}).finally(() => {
|
|
402
|
+
shell.close();
|
|
403
|
+
});
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
//# sourceMappingURL=print-core-wiki.js.map
|
|
@@ -92,7 +92,7 @@ async function getVertexExplanations(shell, vertexType) {
|
|
|
92
92
|
Describes a constant value (numbers, booleans/logicals, strings, ...).
|
|
93
93
|
In general, the respective vertex is more or less a dummy vertex as you can see from its implementation.
|
|
94
94
|
|
|
95
|
-
${(0, doc_types_1.printHierarchy)({ program: vertexType.program,
|
|
95
|
+
${(0, doc_types_1.printHierarchy)({ program: vertexType.program, info: vertexType.info, root: 'DataflowGraphVertexValue' })}
|
|
96
96
|
|
|
97
97
|
${(0, doc_structure_1.block)({
|
|
98
98
|
type: 'NOTE',
|
|
@@ -122,7 +122,7 @@ Describes symbol/variable references which are read (or potentially read at a gi
|
|
|
122
122
|
Similar to the [value vertex](#value-vertex) described above, this is more a marker vertex as
|
|
123
123
|
you can see from the implementation.
|
|
124
124
|
|
|
125
|
-
${(0, doc_types_1.printHierarchy)({ program: vertexType.program,
|
|
125
|
+
${(0, doc_types_1.printHierarchy)({ program: vertexType.program, info: vertexType.info, root: 'DataflowGraphVertexUse' })}
|
|
126
126
|
|
|
127
127
|
${(0, doc_structure_1.block)({
|
|
128
128
|
type: 'NOTE',
|
|
@@ -168,12 +168,12 @@ Describes any kind of function call, including unnamed calls and those that happ
|
|
|
168
168
|
In general the vertex provides you with information about
|
|
169
169
|
the _name_ of the called function, the passed _arguments_, and the _environment_ in which the call happens (if it is of importance).
|
|
170
170
|
|
|
171
|
-
However, the implementation reveals that it may hold an additional \`onlyBuiltin\` flag to indicate that the call is only calling builtin functions — however, this is only a flag to improve performance
|
|
171
|
+
However, the implementation reveals that it may hold an additional \`onlyBuiltin\` flag to indicate that the call is only calling builtin functions — however, this is only a flag to improve performance,
|
|
172
172
|
and it should not be relied on as it may under-approximate the actual calling targets (e.g., being \`false\` even though all calls resolve to builtins).
|
|
173
173
|
|
|
174
|
-
${(0, doc_types_1.printHierarchy)({ program: vertexType.program,
|
|
174
|
+
${(0, doc_types_1.printHierarchy)({ program: vertexType.program, info: vertexType.info, root: 'DataflowGraphVertexFunctionCall' })}
|
|
175
175
|
The related function argument references are defined like this:
|
|
176
|
-
${(0, doc_types_1.printHierarchy)({ program: vertexType.program,
|
|
176
|
+
${(0, doc_types_1.printHierarchy)({ program: vertexType.program, info: vertexType.info, root: 'FunctionArgument' })}
|
|
177
177
|
|
|
178
178
|
|
|
179
179
|
${(0, doc_structure_1.details)('Example: Simple Function Call (unresolved)', await (async () => {
|
|
@@ -214,7 +214,7 @@ In other words, we classify the references as ${(0, doc_general_1.lastJoin)(call
|
|
|
214
214
|
}), ', ', ', and ')}.
|
|
215
215
|
For more information on the types of references, please consult the implementation.
|
|
216
216
|
|
|
217
|
-
${(0, doc_types_1.printHierarchy)({ program: identifierType.program,
|
|
217
|
+
${(0, doc_types_1.printHierarchy)({ program: identifierType.program, info: identifierType.info, root: 'ReferenceType' })}
|
|
218
218
|
`;
|
|
219
219
|
})())}
|
|
220
220
|
|
|
@@ -289,7 +289,7 @@ However, they are actually linked with the call of the built-in function \`{\` (
|
|
|
289
289
|
|
|
290
290
|
${(0, doc_structure_1.details)('3) the function resolves to a mix of both', `
|
|
291
291
|
|
|
292
|
-
Users may write
|
|
292
|
+
Users may write… interesting pieces of code - for reasons we should not be interested in!
|
|
293
293
|
Consider a case in which you have a built-in function (like the assignment operator \`<-\`) and a user that wants to redefine the meaning of the function call _sometimes_:
|
|
294
294
|
|
|
295
295
|
${await (async () => {
|
|
@@ -333,7 +333,7 @@ Function calls are the most complicated mechanism in R as essentially everything
|
|
|
333
333
|
Even **control structures** like \`if(p) a else b\` are desugared into function calls (e.g., as \`if\`(p, a, b)).
|
|
334
334
|
${(0, doc_structure_1.details)('Example: <code>if</code> as a Function Call', await (0, doc_dfg_1.printDfGraphForCode)(shell, 'if(p) a else b'))}
|
|
335
335
|
|
|
336
|
-
Similarly you should be aware of calls to **anonymous functions**, which may appear given directly (e.g. as \`(function() 1)()\`) or indirectly, with code
|
|
336
|
+
Similarly, you should be aware of calls to **anonymous functions**, which may appear given directly (e.g. as \`(function() 1)()\`) or indirectly, with code
|
|
337
337
|
directly calling the return of another function call: \`foo()()\`.
|
|
338
338
|
${(0, doc_structure_1.details)('Example: Anonymous Function Call (given directly)', await (0, doc_dfg_1.printDfGraphForCode)(shell, '(function() 1)()', { mark: new Set([6, '6->4']) }))}
|
|
339
339
|
|
|
@@ -358,7 +358,7 @@ ${(0, doc_structure_1.details)('Example: Super Definition (<code><<-</code>)', a
|
|
|
358
358
|
|
|
359
359
|
The implementation is relatively sparse and similar to the other marker vertices:
|
|
360
360
|
|
|
361
|
-
${(0, doc_types_1.printHierarchy)({ program: vertexType.program,
|
|
361
|
+
${(0, doc_types_1.printHierarchy)({ program: vertexType.program, info: vertexType.info, root: 'DataflowGraphVertexVariableDefinition' })}
|
|
362
362
|
|
|
363
363
|
Of course, there are not just operators that define variables, but also functions, like \`assign\`.
|
|
364
364
|
|
|
@@ -399,11 +399,11 @@ As you can see, _flowR_ is able to recognize that the initial definition of \`x\
|
|
|
399
399
|
Defining a function does do a lot of things: 1) it creates a new scope, 2) it may introduce parameters which act as promises and which are only evaluated if they are actually required in the body, 3) it may access the enclosing environments and the callstack.
|
|
400
400
|
The vertex object in the dataflow graph stores multiple things, including all exit points, the enclosing environment if necessary, and the information of the subflow (the "body" of the function).
|
|
401
401
|
|
|
402
|
-
${(0, doc_types_1.printHierarchy)({ program: vertexType.program,
|
|
402
|
+
${(0, doc_types_1.printHierarchy)({ program: vertexType.program, info: vertexType.info, root: 'DataflowGraphVertexFunctionDefinition' })}
|
|
403
403
|
The subflow is defined like this:
|
|
404
|
-
${(0, doc_types_1.printHierarchy)({ program: vertexType.program,
|
|
404
|
+
${(0, doc_types_1.printHierarchy)({ program: vertexType.program, info: vertexType.info, root: 'DataflowFunctionFlowInformation' })}
|
|
405
405
|
And if you are interested in the exit points, they are defined like this:
|
|
406
|
-
${(0, doc_types_1.printHierarchy)({ program: vertexType.program,
|
|
406
|
+
${(0, doc_types_1.printHierarchy)({ program: vertexType.program, info: vertexType.info, root: 'ExitPoint' })}
|
|
407
407
|
|
|
408
408
|
|
|
409
409
|
Whenever we visualize a function definition, we use a dedicated node to represent the anonymous function object,
|
|
@@ -481,7 +481,7 @@ Besides this being a theoretically "shorter" way of defining a function, this be
|
|
|
481
481
|
}
|
|
482
482
|
return results.join('\n');
|
|
483
483
|
}
|
|
484
|
-
async function getEdgesExplanations(shell) {
|
|
484
|
+
async function getEdgesExplanations(shell, vertexType) {
|
|
485
485
|
const edgeExplanations = new Map();
|
|
486
486
|
edgeExplanations.set(edge_1.EdgeType.Reads, [{
|
|
487
487
|
shell,
|
|
@@ -495,8 +495,8 @@ ${(0, doc_structure_1.block)({
|
|
|
495
495
|
content: `
|
|
496
496
|
A ${linkEdgeName(edge_1.EdgeType.Reads)} edge is not a transitive closure and only links the "directly read" definition(s).
|
|
497
497
|
Our abstract domains resolving transitive ${linkEdgeName(edge_1.EdgeType.Reads)} edges (and for that matter, following ${linkEdgeName(edge_1.EdgeType.Returns)} as well)
|
|
498
|
-
are currently tailored to what we need in _flowR_. Hence we offer a function like
|
|
499
|
-
as well as
|
|
498
|
+
are currently tailored to what we need in _flowR_. Hence, we offer a function like ${(0, doc_types_1.shortLink)(linker_1.getAllFunctionCallTargets.name, vertexType.info)} (defined in ${(0, doc_files_1.getFilePathMd)('../dataflow/internal/linker.ts')}),
|
|
499
|
+
as well as ${(0, doc_types_1.shortLink)(resolve_by_name_1.resolvesToBuiltInConstant.name, vertexType.info)} (defined in ${(0, doc_files_1.getFilePathMd)('../dataflow/environments/resolve-by-name.ts')}) which do this for specific cases.
|
|
500
500
|
|
|
501
501
|
${(0, doc_structure_1.details)('Example: Multi-Level Reads', await (0, doc_dfg_1.printDfGraphForCode)(shell, 'x <- 3\ny <- x\nprint(y)', { mark: new Set(['9->7', '7->3', '4->0']) }))}
|
|
502
502
|
|
|
@@ -557,7 +557,7 @@ However, nested definitions can carry it (in the nested case, \`x\` is defined b
|
|
|
557
557
|
shell,
|
|
558
558
|
name: 'Returns Edge',
|
|
559
559
|
type: edge_1.EdgeType.Returns,
|
|
560
|
-
description: 'Link the [function call](#function-call-vertex)
|
|
560
|
+
description: 'Link the [function call](#function-call-vertex) to the exit points of the target definition (this may incorporate the call-context).',
|
|
561
561
|
code: 'foo <- function() x\nfoo()',
|
|
562
562
|
expectedSubgraph: (0, dataflowgraph_builder_1.emptyGraph)().returns('2@foo', '1@x')
|
|
563
563
|
}, []]);
|
|
@@ -582,7 +582,7 @@ f()
|
|
|
582
582
|
|
|
583
583
|
${dfInfo}
|
|
584
584
|
|
|
585
|
-
The final call evaluates to \`3\` (similar to if we
|
|
585
|
+
The final call evaluates to \`3\` (similar to if we defined \`x\` before the function definition).
|
|
586
586
|
Within a dataflow graph you can see this with two edges. The \`x\` within the function body will have a ${linkEdgeName(edge_1.EdgeType.DefinedByOnCall)}
|
|
587
587
|
to every definition it _may_ refer to. In turn, each call vertex calling the function which encloses the use of \`x\` will have a
|
|
588
588
|
${linkEdgeName(edge_1.EdgeType.DefinesOnCall)} edge to the definition(s) it causes to be active within the function body.
|
|
@@ -607,7 +607,7 @@ ${dfInfo}
|
|
|
607
607
|
type: edge_1.EdgeType.Argument,
|
|
608
608
|
description: `Links a [function call](#function-call-vertex) to the entry point of its arguments. If we do not know the target of such a call, we automatically assume that all arguments are read by the call as well!
|
|
609
609
|
|
|
610
|
-
The exception to this is the [function definition](#function-definition-vertex) which does no longer hold these argument relationships (as they are
|
|
610
|
+
The exception to this is the [function definition](#function-definition-vertex) which does no longer hold these argument relationships (as they are not implicit in the structure).
|
|
611
611
|
`,
|
|
612
612
|
code: 'f(x,y)',
|
|
613
613
|
expectedSubgraph: (0, dataflowgraph_builder_1.emptyGraph)().argument('1@f', '1@x').reads('1@f', '1@x').argument('1@f', '1@y').reads('1@f', '1@y')
|
|
@@ -678,7 +678,7 @@ async function getText(shell) {
|
|
|
678
678
|
const rversion = (await shell.usedRVersion())?.format() ?? 'unknown';
|
|
679
679
|
/* we collect type information on the graph */
|
|
680
680
|
const vertexType = (0, doc_types_1.getTypesFromFolderAsMermaid)({
|
|
681
|
-
|
|
681
|
+
rootFolder: path_1.default.resolve('./src/'),
|
|
682
682
|
typeName: 'DataflowGraphVertexInfo',
|
|
683
683
|
inlineTypes: ['MergeableRecord']
|
|
684
684
|
});
|
|
@@ -689,8 +689,8 @@ async function getText(shell) {
|
|
|
689
689
|
});
|
|
690
690
|
return `${(0, doc_auto_gen_1.autoGenHeader)({ filename: module.filename, purpose: 'dataflow graph', rVersion: rversion })}
|
|
691
691
|
|
|
692
|
-
This page briefly summarizes flowR's dataflow graph, represented by ${(0, doc_types_1.shortLink)(
|
|
693
|
-
In case you want to manually build such a graph (e.g., for testing), you can use the
|
|
692
|
+
This page briefly summarizes flowR's dataflow graph, represented by the ${(0, doc_types_1.shortLink)(graph_1.DataflowGraph.name, vertexType.info)}.
|
|
693
|
+
In case you want to manually build such a graph (e.g., for testing), you can use the ${(0, doc_types_1.shortLink)(dataflowgraph_builder_1.DataflowGraphBuilder.name, vertexType.info)}.
|
|
694
694
|
This wiki page focuses on explaining what such a dataflow graph looks like!
|
|
695
695
|
|
|
696
696
|
Please be aware that the accompanied [dataflow information](#dataflow-information) returned by _flowR_ contains things besides the graph,
|
|
@@ -702,7 +702,7 @@ Additionally, you may be interested in the set of [Unknown Side Effects](#unknow
|
|
|
702
702
|
> you can either use the [Visual Studio Code extension](${doc_files_1.FlowrGithubBaseRef}/vscode-flowr) or the ${(0, doc_cli_option_1.getReplCommand)('dataflow*')}
|
|
703
703
|
> command in the REPL (see the [Interface wiki page](${doc_files_1.FlowrWikiBaseRef}/Interface) for more information). When using _flowR_ as a library, you may use the functions in ${(0, doc_files_1.getFilePathMd)('../util/mermaid/dfg.ts')}.
|
|
704
704
|
>
|
|
705
|
-
> If you receive a dataflow graph in its serialized form (e.g., by talking to a [_flowR_ server](${doc_files_1.FlowrWikiBaseRef}/Interface)), you can use
|
|
705
|
+
> If you receive a dataflow graph in its serialized form (e.g., by talking to a [_flowR_ server](${doc_files_1.FlowrWikiBaseRef}/Interface)), you can use ${(0, doc_types_1.shortLink)(`${graph_1.DataflowGraph.name}::${graph_1.DataflowGraph.fromJson.name}`, vertexType.info, true, 'i')} to retrieve the graph from the JSON representation.
|
|
706
706
|
|
|
707
707
|
${await (0, doc_dfg_1.printDfGraphForCode)(shell, 'x <- 3\ny <- x + 1\ny')}
|
|
708
708
|
|
|
@@ -743,9 +743,9 @@ The following sections present details on the different types of vertices and ed
|
|
|
743
743
|
> [!NOTE]
|
|
744
744
|
> Every dataflow vertex holds an \`id\` which links it to the respective node in the [normalized AST](${doc_files_1.FlowrWikiBaseRef}/Normalized%20AST).
|
|
745
745
|
> So if you want more information about the respective vertex, you can usually access more information
|
|
746
|
-
> using the
|
|
746
|
+
> using the <code>${(0, doc_types_1.shortLink)(`${graph_1.DataflowGraph.name}`, vertexType.info, false, 'i')}::idMap</code> linked to the dataflow graph:
|
|
747
747
|
${(0, doc_general_1.prefixLines)((0, doc_code_1.codeBlock)('ts', 'const node = graph.idMap.get(id);'), '> ')}
|
|
748
|
-
> In case you just need the name (\`lexeme\`) of the respective vertex, ${
|
|
748
|
+
> In case you just need the name (\`lexeme\`) of the respective vertex, ${(0, doc_types_1.shortLink)(node_id_1.recoverName.name, vertexType.info)} can help you out:
|
|
749
749
|
${(0, doc_general_1.prefixLines)((0, doc_code_1.codeBlock)('ts', `const name = ${node_id_1.recoverName.name}(id, graph.idMap);`), '> ')}
|
|
750
750
|
|
|
751
751
|
## Vertices
|
|
@@ -754,7 +754,7 @@ ${await getVertexExplanations(shell, vertexType)}
|
|
|
754
754
|
|
|
755
755
|
## Edges
|
|
756
756
|
|
|
757
|
-
${await getEdgesExplanations(shell)}
|
|
757
|
+
${await getEdgesExplanations(shell, vertexType)}
|
|
758
758
|
|
|
759
759
|
## Control Dependencies
|
|
760
760
|
|
|
@@ -778,7 +778,7 @@ ${(0, doc_structure_1.details)('Example: Nested Conditionals', await (0, doc_dfg
|
|
|
778
778
|
## Dataflow Information
|
|
779
779
|
|
|
780
780
|
Using _flowR's_ code interface (see the [Interface](${doc_files_1.FlowrWikiBaseRef}/Interface) wiki page for more), you can generate the dataflow information
|
|
781
|
-
for a given piece of R code (in this case \`x <- 1; x + 1\`) as follows:
|
|
781
|
+
for a given piece of R code (in this case \`x <- 1; x + 1\`) as follows (using the ${(0, doc_types_1.shortLink)(shell_1.RShell.name, vertexType.info)} and the ${(0, doc_types_1.shortLink)(pipeline_executor_1.PipelineExecutor.name, vertexType.info)} classes):
|
|
782
782
|
|
|
783
783
|
${(0, doc_code_1.codeBlock)('ts', `
|
|
784
784
|
const shell = new ${shell_1.RShell.name}()
|
|
@@ -821,7 +821,7 @@ ${(0, doc_code_1.codeBlock)('text', JSON.stringify(result.dataflow, json_1.jsonR
|
|
|
821
821
|
|
|
822
822
|
You may be interested in its implementation:
|
|
823
823
|
|
|
824
|
-
${(0, doc_types_1.printHierarchy)({ program: vertexType.program,
|
|
824
|
+
${(0, doc_types_1.printHierarchy)({ program: vertexType.program, info: vertexType.info, root: 'DataflowInformation' })}
|
|
825
825
|
|
|
826
826
|
Let's start by looking at the properties of the dataflow information object: ${Object.keys(result.dataflow).map(k => `\`${k}\``).join(', ')}.
|
|
827
827
|
|
|
@@ -133,7 +133,7 @@ use ${(0, doc_cli_option_1.getReplCommand)('dataflow*')} (or ${(0, doc_cli_optio
|
|
|
133
133
|
|
|
134
134
|
${await (0, doc_repl_1.documentReplSession)(shell, [{
|
|
135
135
|
command: ':dataflow* y <- 1 + x',
|
|
136
|
-
description: `Retrieve the dataflow graph of the expression \`y <- 1 + x\`. It looks like this:\n${await (0, doc_dfg_1.printDfGraphForCode)(shell, 'y <- 1 + x')}
|
|
136
|
+
description: `Retrieve the dataflow graph of the expression \`y <- 1 + x\`. It looks like this:\n${await (0, doc_dfg_1.printDfGraphForCode)(shell, 'y <- 1 + x')}`
|
|
137
137
|
}])}
|
|
138
138
|
|
|
139
139
|
For the slicing with ${(0, doc_cli_option_1.getReplCommand)('slicer')}, you have access to the same [magic comments](#slice-magic-comments) as with the [slice request](#message-request-slice).
|