npm - @eagleoutice/flowr - Versions diffs - 2.9.11 → 2.9.13 - Mend

@eagleoutice/flowr 2.9.11 → 2.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

package/README.md +31 -31
package/benchmark/slicer.d.ts +4 -2
package/benchmark/slicer.js +20 -6
package/benchmark/stats/print.js +12 -0
package/benchmark/stats/stats.d.ts +3 -2
package/benchmark/stats/stats.js +1 -1
package/benchmark/summarizer/data.d.ts +1 -0
package/benchmark/summarizer/second-phase/process.js +5 -0
package/cli/benchmark-app.d.ts +1 -0
package/cli/benchmark-app.js +1 -0
package/cli/benchmark-helper-app.d.ts +2 -1
package/cli/benchmark-helper-app.js +6 -3
package/cli/common/options.d.ts +8 -0
package/cli/common/options.js +3 -1
package/cli/common/scripts-info.d.ts +8 -0
package/cli/export-quads-app.js +1 -1
package/cli/flowr.js +3 -3
package/cli/repl/core.d.ts +3 -3
package/cli/repl/server/connection.d.ts +2 -2
package/cli/repl/server/server.d.ts +2 -2
package/cli/script-core/statistics-core.d.ts +2 -2
package/cli/script-core/statistics-helper-core.d.ts +2 -2
package/cli/script-core/statistics-helper-core.js +1 -1
package/cli/slicer-app.js +2 -2
package/cli/statistics-app.js +1 -1
package/cli/statistics-helper-app.js +1 -1
package/cli/wiki.js +2 -2
package/config.d.ts +65 -24
package/config.js +197 -161
package/control-flow/extract-cfg.js +5 -8
package/core/steps/pipeline-step.d.ts +2 -2
package/dataflow/cluster.js +12 -8
package/dataflow/eval/resolve/alias-tracking.js +12 -15
package/dataflow/graph/graph.js +8 -8
package/dataflow/graph/quads.js +4 -7
package/dataflow/internal/linker.js +5 -5
package/dataflow/internal/process/functions/call/built-in/built-in-eval.js +2 -2
package/dataflow/internal/process/functions/call/built-in/built-in-source.d.ts +1 -1
package/dataflow/internal/process/functions/call/built-in/built-in-source.js +20 -9
package/documentation/doc-readme.js +2 -2
package/documentation/wiki-analyzer.js +7 -5
package/documentation/wiki-core.js +1 -3
package/documentation/wiki-dataflow-graph.js +87 -32
package/documentation/wiki-engine.js +18 -0
package/documentation/wiki-interface.js +5 -3
package/documentation/wiki-linter.js +5 -5
package/documentation/wiki-mk/doc-context.d.ts +44 -11
package/documentation/wiki-mk/doc-context.js +19 -17
package/engines.d.ts +2 -2
package/engines.js +4 -4
package/linter/rules/dataframe-access-validation.js +5 -5
package/linter/rules/naming-convention.d.ts +1 -1
package/linter/rules/naming-convention.js +7 -3
package/package.json +3 -1
package/project/context/flowr-analyzer-context.d.ts +6 -6
package/project/context/flowr-analyzer-context.js +2 -2
package/project/context/flowr-analyzer-files-context.d.ts +2 -2
package/project/context/flowr-analyzer-files-context.js +28 -8
package/project/flowr-analyzer-builder.d.ts +10 -6
package/project/flowr-analyzer-builder.js +12 -3
package/project/flowr-analyzer.d.ts +3 -3
package/queries/catalog/config-query/config-query-format.d.ts +5 -5
package/queries/catalog/dependencies-query/function-info/library-functions.js +2 -1
package/queries/catalog/dependencies-query/function-info/read-functions.js +1 -1
package/queries/catalog/dependencies-query/function-info/visualize-functions.js +9 -1
package/queries/catalog/dependencies-query/function-info/write-functions.js +1 -0
package/queries/catalog/df-shape-query/df-shape-query-format.d.ts +2 -2
package/queries/catalog/does-call-query/does-call-query-format.d.ts +2 -2
package/queries/catalog/files-query/files-query-format.d.ts +3 -3
package/queries/catalog/inspect-exceptions-query/inspect-exception-query-format.d.ts +2 -2
package/queries/catalog/inspect-higher-order-query/inspect-higher-order-query-format.d.ts +2 -2
package/queries/catalog/inspect-recursion-query/inspect-recursion-query-format.d.ts +2 -2
package/queries/catalog/linter-query/linter-query-format.d.ts +3 -3
package/queries/catalog/location-map-query/location-map-query-format.d.ts +2 -2
package/queries/catalog/origin-query/origin-query-format.d.ts +2 -2
package/queries/catalog/resolve-value-query/resolve-value-query-executor.js +3 -3
package/queries/catalog/resolve-value-query/resolve-value-query-format.d.ts +2 -2
package/queries/catalog/resolve-value-query/resolve-value-query-format.js +4 -0
package/queries/catalog/static-slice-query/static-slice-query-format.d.ts +2 -2
package/queries/query.d.ts +18 -18
package/r-bridge/lang-4.x/ast/model/model.d.ts +7 -2
package/r-bridge/lang-4.x/ast/model/model.js +13 -0
package/r-bridge/lang-4.x/ast/parser/json/parser.d.ts +2 -2
package/r-bridge/lang-4.x/ast/parser/json/parser.js +2 -2
package/r-bridge/lang-4.x/tree-sitter/tree-sitter-normalize.js +6 -2
package/statistics/statistics.d.ts +2 -2
package/util/mermaid/dfg.d.ts +8 -0
package/util/mermaid/dfg.js +4 -0
package/util/objects.d.ts +12 -0
package/util/objects.js +28 -0
package/util/quads.js +14 -6
package/util/range.d.ts +3 -0
package/util/range.js +3 -0
package/util/summarizer.js +1 -1
package/util/version.js +1 -1

package/control-flow/extract-cfg.js CHANGED Viewed

@@ -292,14 +292,11 @@ function cfgFor(forLoop, variable, vector, body) {
     for (const breakPoint of body.breaks) {
         graph.addEdge(control_flow_graph_1.CfgVertex.toExitId(forLoopId), breakPoint, control_flow_graph_1.CfgEdge.makeFd());
     }
-    const isNotEndless = body.exitPoints.length > 0 || body.breaks.length > 0;
-    if (isNotEndless) {
-        graph.addVertex(control_flow_graph_1.CfgVertex.makeExitMarker(forLoopId));
-        for (const e of variable.exitPoints) {
-            graph.addEdge(control_flow_graph_1.CfgVertex.toExitId(forLoopId), e, control_flow_graph_1.CfgEdge.makeCdFalse(forLoopId));
-        }
+    graph.addVertex(control_flow_graph_1.CfgVertex.makeExitMarker(forLoopId));
+    for (const e of variable.exitPoints) {
+        graph.addEdge(control_flow_graph_1.CfgVertex.toExitId(forLoopId), e, control_flow_graph_1.CfgEdge.makeCdFalse(forLoopId));
     }
-    return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: isNotEndless ? [control_flow_graph_1.CfgVertex.toExitId(forLoopId)] : [], entryPoints: [forLoopId] };
+    return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [control_flow_graph_1.CfgVertex.toExitId(forLoopId)], entryPoints: [forLoopId] };
 }
 function cfgFunctionDefinition(fn, params, body) {
     const fnId = fn.info.id;
@@ -336,7 +333,7 @@ function cfgFunctionDefinition(fn, params, body) {
     return { graph: graph, breaks: [], nexts: [], returns: [], exitPoints: [fnId], entryPoints: [fnId] };
 }
 function cfgFunctionCall(call, name, args, down) {
-    if (call.named && call.functionName.content === 'ifelse') {
+    if (call.named && call.functionName.content === 'ifelse' && args.length > 1) {
         // special built-in handling for ifelse as it is an expression that does not short-circuit
         return cfgIfThenElse(call, args[0] === r_function_call_1.EmptyArgument ? (0, control_flow_graph_1.emptyControlFlowInformation)() : args[0], args[1] === r_function_call_1.EmptyArgument ? (0, control_flow_graph_1.emptyControlFlowInformation)() : args[1], args[2] === r_function_call_1.EmptyArgument ? (0, control_flow_graph_1.emptyControlFlowInformation)() : args[2]);
     }

package/core/steps/pipeline-step.d.ts CHANGED Viewed

@@ -4,7 +4,7 @@
  */
 import type { MergeableRecord } from '../../util/objects';
 import type { InternalStepPrinter, IPipelineStepPrinter, StepOutputFormat } from '../print/print';
-import type { FlowrConfigOptions } from '../../config';
+import type { FlowrConfig } from '../../config';
 /**
  * This represents the format of a step processor which retrieves two things:
  *
@@ -16,7 +16,7 @@ import type { FlowrConfigOptions } from '../../config';
  * list all steps that you require as your {@link IPipelineStepOrder#dependencies|dependencies}, even if they would be
  * already covered transitively.
  */
-export type StepProcessingFunction = (results: Record<string, unknown>, input: Record<string, unknown>, config: FlowrConfigOptions) => unknown;
+export type StepProcessingFunction = (results: Record<string, unknown>, input: Record<string, unknown>, config: FlowrConfig) => unknown;
 /**
  * This represents the required execution frequency of a step.
  */

package/dataflow/cluster.js CHANGED Viewed

@@ -30,18 +30,22 @@ function makeCluster(graph, from, notReached) {
     if (info.tag === vertex_1.VertexType.FunctionDefinition) {
         for (const { nodeId } of info.exitPoints) {
             if (notReached.delete(nodeId)) {
-                makeCluster(graph, nodeId, notReached).forEach(n => nodes.add(n));
+                for (const m of makeCluster(graph, nodeId, notReached)) {
+                    nodes.add(m);
+                }
             }
         }
     }
     // cluster adjacent edges
-    for (const [dest, e] of [...graph.outgoingEdges(from) ?? [], ...graph.ingoingEdges(from) ?? []]) {
-        // don't cluster for function content if it isn't returned
-        if (edge_1.DfEdge.doesNotIncludeType(e, edge_1.EdgeType.Returns) && info.onlyBuiltin && info.name === '{') {
-            continue;
-        }
-        if (notReached.delete(dest)) {
-            makeCluster(graph, dest, notReached).forEach(n => nodes.add(n));
+    for (const edges of [graph.outgoingEdges(from), graph.ingoingEdges(from)]) {
+        for (const [dest, e] of edges ?? []) {
+            // don't cluster for function content if it isn't returned
+            if (edge_1.DfEdge.doesNotIncludeType(e, edge_1.EdgeType.Returns) && info.onlyBuiltin && info.name === '{') {
+                continue;
+            }
+            if (notReached.delete(dest)) {
+                makeCluster(graph, dest, notReached).forEach(n => nodes.add(n));
+            }
         }
     }
     return nodes;

package/dataflow/eval/resolve/alias-tracking.js CHANGED Viewed

@@ -34,7 +34,7 @@ function getFunctionCallAlias(sourceId, dataflow, environment) {
         return undefined;
     }
     const defs = (0, resolve_by_name_1.resolveByName)(identifier, environment, identifier_1.ReferenceType.Function);
-    if (defs === undefined || defs.length !== 1) {
+    if (defs?.length !== 1) {
         return undefined;
     }
     return [sourceId];
@@ -206,7 +206,7 @@ function trackAliasInEnvironments(identifier, environment, { blocked, idMap, res
             }
         }
     }
-    if (values.size == 0) {
+    if (values.size === 0) {
         return r_value_1.Top;
     }
     return (0, set_constants_1.setFrom)(...values);
@@ -246,18 +246,7 @@ function trackAliasInEnvironments(identifier, environment, { blocked, idMap, res
     }
 });
 function isNestedInLoop(node, ast) {
-    const parent = node?.info.parent;
-    if (node === undefined || !parent) {
-        return false;
-    }
-    const parentNode = ast.get(parent);
-    if (parentNode === undefined) {
-        return false;
-    }
-    if (parentNode.type === type_1.RType.WhileLoop || parentNode.type === type_1.RType.RepeatLoop || parentNode.type === type_1.RType.ForLoop) {
-        return true;
-    }
-    return isNestedInLoop(parentNode, ast);
+    return model_1.RNode.iterateParents(node, ast).some(model_1.RLoopConstructs.is);
 }
 /**
  * Please use {@link resolveIdToValue}
@@ -311,12 +300,14 @@ function trackAliasesInGraph(id, graph, ctx, idMap) {
         }
         const isFn = t === vertex_1.VertexType.FunctionCall;
         const outgoingEdges = graph.outgoingEdges(id) ?? [];
+        let foundRetuns = false;
         // travel all read and defined-by edges
         for (const [targetId, { types }] of outgoingEdges) {
             if (isFn) {
                 if (types === edge_1.EdgeType.Returns || types === edge_1.EdgeType.DefinedByOnCall || types === edge_1.EdgeType.DefinedBy) {
                     queue.add(targetId, baseEnvironment, cleanFingerprint, false);
                 }
+                foundRetuns ||= edge_1.DfEdge.includesType({ types }, edge_1.EdgeType.Returns);
                 continue;
             }
             // currently, they have to be exact!
@@ -324,6 +315,9 @@ function trackAliasesInGraph(id, graph, ctx, idMap) {
                 queue.add(targetId, baseEnvironment, cleanFingerprint, false);
             }
         }
+        if (isFn && !foundRetuns) {
+            return r_value_1.Top;
+        }
     }
     if (forceTop || resultIds.length === 0) {
         return r_value_1.Top;
@@ -332,10 +326,13 @@ function trackAliasesInGraph(id, graph, ctx, idMap) {
     for (const id of resultIds) {
         const node = idMap.get(id);
         if (node !== undefined) {
+            if (node.info.role === "param-v" /* RoleInParent.ParameterDefaultValue */ || model_1.RNode.iterateParents(node, idMap).some(p => p.info.role === "param-v" /* RoleInParent.ParameterDefaultValue */)) {
+                return r_value_1.Top;
+            }
             values.add((0, general_1.valueFromRNodeConstant)(node));
         }
     }
-    return (0, set_constants_1.setFrom)(...values);
+    return values.size === 0 ? r_value_1.Top : (0, set_constants_1.setFrom)(...values);
 }
 /**
  * Please use {@link resolveIdToValue}

package/dataflow/graph/graph.js CHANGED Viewed

@@ -402,17 +402,17 @@ class DataflowGraph {
         to = node_id_1.NodeId.normalize(to);
         const vertex = this.getVertex(from);
         (0, assert_1.guard)(vertex !== undefined, () => `node must be defined for ${from} to add control dependency`);
-        vertex.cds ??= [];
-        let hasControlDependency = false;
-        for (const { id, when: cond } of vertex.cds) {
-            if (id === to && when !== cond) {
-                hasControlDependency = true;
-                break;
+        if (vertex.cds) {
+            for (const { id, when: cond } of vertex.cds) {
+                if (id === to && when !== cond) {
+                    return this;
+                }
             }
         }
-        if (!hasControlDependency) {
-            vertex.cds.push({ id: to, when });
+        else {
+            vertex.cds = [];
         }
+        vertex.cds.push({ id: to, when });
         return this;
     }
     /** Marks the given node as having unknown side effects */

package/dataflow/graph/quads.js CHANGED Viewed

@@ -10,13 +10,10 @@ const edge_1 = require("./edge");
  */
 function df2quads(graph, config) {
     return (0, quads_1.graph2quads)({
-        rootIds: [...graph.rootIds()],
-        vertices: graph.vertices(true)
-            .map(([id, v]) => ({
-            ...v,
-            id
-        })).toArray(),
-        edges: graph.edges().flatMap(([fromId, targets]) => [...targets].map(([toId, info]) => ({
+        rootIds: Array.from(graph.rootIds()),
+        vertices: Array.from(graph.vertices(true)
+            .map(([, v]) => v)),
+        edges: graph.edges().flatMap(([fromId, targets]) => Array.from(targets).map(([toId, info]) => ({
             from: fromId,
             to: toId,
             type: Array.from(edge_1.DfEdge.typesToNames(info)),

package/dataflow/internal/linker.js CHANGED Viewed

@@ -395,18 +395,18 @@ function getAllLinkedFunctionDefinitions(functionDefinitionReadIds, dataflowGrap
             builtIns.add(cid);
             continue;
         }
-        const currentInfo = dataflowGraph.get(cid, true);
-        if (currentInfo === undefined) {
+        const vertex = dataflowGraph.getVertex(cid);
+        if (vertex === undefined) {
             continue;
         }
-        const [vertex, edges] = currentInfo;
         // Found a function definition
         if (vertex.subflow !== undefined) {
             result.add(vertex);
             continue;
         }
         let hasReturnEdge = false;
-        for (const [target, e] of edges) {
+        const outgoing = dataflowGraph.outgoingEdges(cid) ?? [];
+        for (const [target, e] of outgoing) {
             if (edge_1.DfEdge.includesType(e, edge_1.EdgeType.Returns)) {
                 hasReturnEdge = true;
                 if (!visited.has(target)) {
@@ -417,7 +417,7 @@ function getAllLinkedFunctionDefinitions(functionDefinitionReadIds, dataflowGrap
         if (vertex.tag === vertex_1.VertexType.FunctionCall || hasReturnEdge || (vertex.tag === vertex_1.VertexType.VariableDefinition && vertex.par)) {
             continue;
         }
-        for (const [target, e] of edges) {
+        for (const [target, e] of outgoing) {
             if (edge_1.DfEdge.includesType(e, LinkedFnFollowBits) && !visited.has(target)) {
                 potential.push(target);
             }

package/dataflow/internal/process/functions/call/built-in/built-in-eval.js CHANGED Viewed

@@ -45,13 +45,13 @@ function processEvalCall(name, args, rootId, data, config) {
         const idGenerator = (0, decorate_1.sourcedDeterministicCountingIdGenerator)(name.lexeme + '::' + rootId, name.location);
         data = {
             ...data,
-            cds: [...(data.cds ?? []), { id: rootId, when: true }]
+            cds: code.length > 1 ? [...(data.cds ?? []), { id: rootId, when: true }] : data.cds
         };
         const originalInfo = { ...information };
         const result = [];
         for (const c of code) {
             const codeRequest = (0, retriever_1.requestFromInput)(c);
-            const r = (0, built_in_source_1.sourceRequest)(rootId, codeRequest, data, originalInfo, idGenerator);
+            const r = (0, built_in_source_1.sourceRequest)(rootId, codeRequest, data, originalInfo, code.length > 1, idGenerator);
             result.push(r);
             // add a returns edge from the eval to the result
             for (const e of r.exitPoints) {

package/dataflow/internal/process/functions/call/built-in/built-in-source.d.ts CHANGED Viewed

@@ -45,7 +45,7 @@ export declare function processSourceCall<OtherInfo>(name: RSymbol<OtherInfo & P
  * Processes a source request with the given dataflow processor information and existing dataflow information
  * Otherwise, this can be an {@link RProjectFile} representing a standalone source file
  */
-export declare function sourceRequest<OtherInfo>(rootId: NodeId, request: RParseRequest | RProjectFile<OtherInfo & ParentInformation>, data: DataflowProcessorInformation<OtherInfo & ParentInformation>, information: DataflowInformation, getId?: IdGenerator<NoInfo>): DataflowInformation;
+export declare function sourceRequest<OtherInfo>(rootId: NodeId, request: RParseRequest | RProjectFile<OtherInfo & ParentInformation>, data: DataflowProcessorInformation<OtherInfo & ParentInformation>, information: DataflowInformation, makeMaybe: boolean, getId?: IdGenerator<NoInfo>): DataflowInformation;
 /**
  * Processes a standalone source file (i.e., not from a source function call)
  */

package/dataflow/internal/process/functions/call/built-in/built-in-source.js CHANGED Viewed

@@ -30,6 +30,7 @@ const r_value_1 = require("../../../../../eval/values/r-value");
 const unknown_side_effect_1 = require("../../../../../graph/unknown-side-effect");
 const alias_tracking_1 = require("../../../../../eval/resolve/alias-tracking");
 const built_in_1 = require("../../../../../environments/built-in");
+const edge_1 = require("../../../../../graph/edge");
 /**
  * Infers working directories based on the given option and reference chain
  */
@@ -117,8 +118,8 @@ function findSource(resolveSource, seed, data) {
             const effectivePath = explore ? path_1.default.join(explore, tryPath) : tryPath;
             const context = data.ctx.files;
             const get = context.exists(effectivePath, capitalization) ?? context.exists(returnPlatformPath(effectivePath), capitalization);
-            if (get && !found.includes(effectivePath)) {
-                found.push(returnPlatformPath(effectivePath));
+            if (get && !found.includes(returnPlatformPath(get))) {
+                found.push(returnPlatformPath(get));
             }
         }
     }
@@ -176,7 +177,7 @@ function processSourceCall(name, args, rootId, data, config) {
                 result = sourceRequest(rootId, {
                     request: 'file',
                     content: f
-                }, data, result, (0, decorate_1.sourcedDeterministicCountingIdGenerator)((findCount > 0 ? findCount + '::' : '') + f, name.location));
+                }, data, result, true, (0, decorate_1.sourcedDeterministicCountingIdGenerator)((findCount > 0 ? findCount + '::' : '') + f, name.location));
             }
             return result;
         }
@@ -189,7 +190,7 @@ function processSourceCall(name, args, rootId, data, config) {
  * Processes a source request with the given dataflow processor information and existing dataflow information
  * Otherwise, this can be an {@link RProjectFile} representing a standalone source file
  */
-function sourceRequest(rootId, request, data, information, getId) {
+function sourceRequest(rootId, request, data, information, makeMaybe, getId) {
     // parse, normalize and dataflow the sourced file
     let dataflow;
     let fst;
@@ -239,11 +240,21 @@ function sourceRequest(rootId, request, data, information, getId) {
     }
     // take the entry point as well as all the written references, and give them a control dependency to the source call to show that they are conditional
     if (!String(rootId).startsWith('file-')) {
-        if (dataflow.graph.hasVertex(dataflow.entryPoint)) {
-            dataflow.graph.addControlDependency(dataflow.entryPoint, rootId, true);
+        if (makeMaybe) {
+            if (dataflow.graph.hasVertex(dataflow.entryPoint)) {
+                dataflow.graph.addControlDependency(dataflow.entryPoint, rootId, true);
+            }
+            for (const out of dataflow.out) {
+                dataflow.graph.addControlDependency(out.nodeId, rootId, true);
+            }
         }
-        for (const out of dataflow.out) {
-            dataflow.graph.addControlDependency(out.nodeId, rootId, true);
+        else {
+            if (dataflow.graph.hasVertex(dataflow.entryPoint)) {
+                dataflow.graph.addEdge(dataflow.entryPoint, rootId, edge_1.EdgeType.Reads);
+            }
+            for (const out of dataflow.out) {
+                dataflow.graph.addEdge(out.nodeId, rootId, edge_1.EdgeType.Reads);
+            }
         }
     }
     data.ctx.files.addConsideredFile(filePath ?? '<inline>');
@@ -272,6 +283,6 @@ function standaloneSourceFile(idx, file, data, information) {
         ...data,
         environment: information.environment,
         referenceChain: [...data.referenceChain, file.filePath]
-    }, information);
+    }, information, false);
 }
 //# sourceMappingURL=built-in-source.js.map

package/documentation/doc-readme.js CHANGED Viewed

@@ -21,7 +21,7 @@ const PublicationsMain = [
         header: 'Statically Analyzing the Dataflow of R Programs (OOPSLA \'25)',
         description: 'Please cite this paper if you are using flowR in your research.',
         doi: 'https://doi.org/10.1145/3763087',
-        bibtex: `@article{10.1145/3763087,
+        bibtex: String.raw `@article{10.1145/3763087,
 	author = {Sihler, Florian and Tichy, Matthias},
 	title = {Statically Analyzing the Dataflow of R Programs},
 	year = {2025},
@@ -32,7 +32,7 @@ const PublicationsMain = [
 	number = {OOPSLA2},
 	url = {https://doi.org/10.1145/3763087},
 	doi = {10.1145/3763087},
-	abstract = {The R programming language is primarily designed for statistical computing and mostly used by researchers without a background in computer science. R provides a wide range of dynamic features and peculiarities that are difficult to analyze statically like dynamic scoping and lazy evaluation with dynamic side effects. At the same time, the R ecosystem lacks sophisticated analysis tools that support researchers in understanding and improving their code.   In this paper, we present a novel static dataflow analysis framework for the R programming language that is capable of handling the dynamic nature of R programs and produces the dataflow graph of given R programs. This graph can be essential in a range of analyses, including program slicing, which we implement as a proof of concept. The core analysis works as a stateful fold over a normalized version of the abstract syntax tree of the R program, which tracks (re-)definitions, values, function calls, side effects, external files, and a dynamic control flow to produce one dataflow graph per program.   We evaluate the correctness of our analysis using output equivalence testing on a manually curated dataset of 779 sensible slicing points from executable real-world R scripts. Additionally, we use a set of systematic test cases based on the capabilities of the R language and the implementation of the R interpreter and measure the runtimes well as the memory consumption on a set of 4,230 real-world R scripts and 20,815 packages available on R’s package manager CRAN.   Furthermore, we evaluate the recall of our program slicer, its accuracy using shrinking, and its improvement over the state of the art. We correctly analyze almost all programs in our equivalence test suite, preserving the identical output for 99.7\\% of the manually curated slicing points. On average, we require 576ms to analyze the dataflow and around 213kB to store the graph of a research script.   This shows that our analysis is capable of analyzing real-world sources quickly and correctly. Our slicer achieves an average reduction of 84.8\\% of tokens indicating its potential to improve program comprehension.},
+	abstract = {The R programming language is primarily designed for statistical computing and mostly used by researchers without a background in computer science. R provides a wide range of dynamic features and peculiarities that are difficult to analyze statically like dynamic scoping and lazy evaluation with dynamic side effects. At the same time, the R ecosystem lacks sophisticated analysis tools that support researchers in understanding and improving their code.   In this paper, we present a novel static dataflow analysis framework for the R programming language that is capable of handling the dynamic nature of R programs and produces the dataflow graph of given R programs. This graph can be essential in a range of analyses, including program slicing, which we implement as a proof of concept. The core analysis works as a stateful fold over a normalized version of the abstract syntax tree of the R program, which tracks (re-)definitions, values, function calls, side effects, external files, and a dynamic control flow to produce one dataflow graph per program.   We evaluate the correctness of our analysis using output equivalence testing on a manually curated dataset of 779 sensible slicing points from executable real-world R scripts. Additionally, we use a set of systematic test cases based on the capabilities of the R language and the implementation of the R interpreter and measure the runtimes well as the memory consumption on a set of 4,230 real-world R scripts and 20,815 packages available on R’s package manager CRAN.   Furthermore, we evaluate the recall of our program slicer, its accuracy using shrinking, and its improvement over the state of the art. We correctly analyze almost all programs in our equivalence test suite, preserving the identical output for 99.7\% of the manually curated slicing points. On average, we require 576ms to analyze the dataflow and around 213kB to store the graph of a research script.   This shows that our analysis is capable of analyzing real-world sources quickly and correctly. Our slicer achieves an average reduction of 84.8\% of tokens indicating its potential to improve program comprehension.},
 	journal = {Proc. ACM Program. Lang.},
 	month = oct,
 	articleno = {309},

package/documentation/wiki-analyzer.js CHANGED Viewed

@@ -29,6 +29,7 @@ const flowr_analyzer_plugin_1 = require("../project/plugins/flowr-analyzer-plugi
 const flowr_analyzer_environment_context_1 = require("../project/context/flowr-analyzer-environment-context");
 const flowr_analyzer_functions_context_1 = require("../project/context/flowr-analyzer-functions-context");
 const flowr_analyzer_meta_context_1 = require("../project/context/flowr-analyzer-meta-context");
+const config_1 = require("../config");
 async function analyzerQuickExample() {
     const analyzer = await new flowr_analyzer_builder_1.FlowrAnalyzerBuilder()
         .setEngine('tree-sitter')
@@ -172,23 +173,24 @@ The following sections highlight some of the most important configuration option
 ${(0, doc_structure_1.section)('Configuring flowR', 3)}
 You can fundamentally change the behavior of flowR using the [config file](${doc_files_1.FlowrWikiBaseRef}/Interface#configuring-flowr),
-embedded in the interface ${ctx.link('FlowrConfigOptions')}.
+embedded in the interface ${ctx.link(config_1.FlowrConfig)}.
 With the builder you can either provide a complete configuration or amend the default configuration using:
 * ${ctx.linkM(flowr_analyzer_builder_1.FlowrAnalyzerBuilder, 'setConfig')} to set a complete configuration
+* ${ctx.linkM(flowr_analyzer_builder_1.FlowrAnalyzerBuilder, 'configure')} to set the value of a specific key in the config
 * ${ctx.linkM(flowr_analyzer_builder_1.FlowrAnalyzerBuilder, 'amendConfig')} to amend the default configuration
-By default, the builder uses flowR's standard configuration obtained with ${ctx.link('defaultConfigOptions')}.
+By default, the builder uses flowR's standard configuration obtained with ${ctx.linkO(config_1.FlowrConfig, 'default')}.
 ${(0, doc_structure_1.block)({
             type: 'NOTE',
-            content: `During the analysis with the ${ctx.link(flowr_analyzer_1.FlowrAnalyzer.name)}, you can also access the configuration with
+            content: `During the analysis with the ${ctx.link(flowr_analyzer_1.FlowrAnalyzer)}, you can also access the configuration with
 		 the ${ctx.link(flowr_analyzer_context_1.FlowrAnalyzerContext)}.`
         })}
 ${(0, doc_structure_1.section)('Configuring the Engine', 3)}
-FlowR supports multiple [engines](${doc_files_1.FlowrWikiBaseRef}/Engines) for parsing and analyzing R code.
+FlowR supports multiple ${ctx.linkPage('wiki/Engines', 'engines')} for parsing and analyzing R code.
 With the builder, you can select the engine to use with:
 * ${ctx.linkM(flowr_analyzer_builder_1.FlowrAnalyzerBuilder, 'setEngine')} to set the desired engine.
@@ -292,7 +294,7 @@ ${(0, doc_structure_1.section)('File Loading', 4)}
 These plugins register for every file encountered by the [files context](#Files_Context) and determine whether and _how_ they can process the file.
 They are responsible for transforming the raw file content into a representation that flowR can work with during the analysis.
-For example, the ${ctx.link(flowr_analyzer_description_file_plugin_1.FlowrAnalyzerDescriptionFilePlugin.name)} adds support for R \`DESCRIPTION\` files by parsing their content into key-value pairs.
+For example, the ${ctx.link(flowr_analyzer_description_file_plugin_1.FlowrAnalyzerDescriptionFilePlugin)} adds support for R \`DESCRIPTION\` files by parsing their content into key-value pairs.
 These can then be used by other plugins, e.g. the ${ctx.link(flowr_analyzer_package_versions_description_file_plugin_1.FlowrAnalyzerPackageVersionsDescriptionFilePlugin)} that extracts package version information from these files.
 If multiple file plugins could apply (${ctx.link('DefaultFlowrAnalyzerFilePlugin::' + flowr_analyzer_file_plugin_1.FlowrAnalyzerFilePlugin.defaultPlugin().applies.name)}) to the same file,

package/documentation/wiki-core.js CHANGED Viewed

@@ -47,9 +47,7 @@ const log_1 = require("../../test/functionality/_helper/log");
 const log_2 = require("../util/log");
 async function makeAnalyzerExample() {
     const analyzer = await new flowr_analyzer_builder_1.FlowrAnalyzerBuilder()
-        .amendConfig(c => {
-        c.ignoreSourceCalls = true;
-    })
+        .configure('ignoreSourceCalls', true)
         .setEngine('tree-sitter')
         .build();
     analyzer.addRequest('x <- 1; y <- x; print(y);');

package/documentation/wiki-dataflow-graph.js CHANGED Viewed

@@ -39,6 +39,10 @@ const flowr_analyzer_context_1 = require("../project/context/flowr-analyzer-cont
 const doc_maker_1 = require("./wiki-mk/doc-maker");
 const flowr_analyzer_1 = require("../project/flowr-analyzer");
 const built_in_1 = require("../dataflow/environments/built-in");
+const dfg_1 = require("../util/mermaid/dfg");
+const r_number_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-number");
+const model_1 = require("../r-bridge/lang-4.x/ast/model/model");
+const range_1 = require("../util/range");
 async function subExplanation(parser, { description, code, expectedSubgraph }) {
     expectedSubgraph = await (0, doc_dfg_1.verifyExpectedSubgraph)(parser, code, expectedSubgraph);
     const marks = [];
@@ -741,12 +745,14 @@ class WikiDataflowGraph extends doc_maker_1.DocMaker {
         super('wiki/Dataflow Graph.md', module.filename, 'dataflow graph');
     }
     async text({ ctx, treeSitter }) {
+        const introExampleCode = 'x <- 3\ny <- x + 1\ny';
         return `
-This page briefly summarizes flowR's dataflow graph, represented by the ${ctx.link(graph_1.DataflowGraph)} class within the code.
-In case you want to manually build such a graph (e.g., for testing), you can use the ${ctx.link(dataflowgraph_builder_1.DataflowGraphBuilder)}.
+This page briefly summarizes flowR's dataflow graph (${ctx.link(graph_1.DataflowGraph)}).
 If you are interested in which features we support and which features are still to be worked on, please refer to our ${ctx.linkPage('wiki/Capabilities')} page.
-In summary, we discuss the following topics:
+In case you want to manually build such a graph (e.g., for testing), you can use the ${ctx.link(dataflowgraph_builder_1.DataflowGraphBuilder)}.
+In summary, we discuss the following topics in this wiki page:
+- [Reading the Visualization](#reading-the-visualization)
 - [Vertices](#vertices)
 - [Edges](#edges)
 - [Control Dependencies](#control-dependencies)
@@ -756,37 +762,35 @@ In summary, we discuss the following topics:
     - [Call Graph Perspective](#perspectives-cg)
 - [Working with the Dataflow Graph](#dfg-working)
-Please be aware that the accompanied [dataflow information](#dataflow-information) (${ctx.link('DataflowInformation')}) returned by _flowR_ contains things besides the graph,
-like the entry and exit points of the subgraphs, and currently active references (see [below](#dataflow-information)).
-Additionally, you may be interested in the set of [Unknown Side Effects](#unknown-side-effects), marking calls which _flowR_ is unable to handle correctly.
-Potentially, you are interested in another perspective that flowR provides, the [control flow graph](${doc_files_1.FlowrWikiBaseRef}/Control%20Flow%20Graph), so please check the correpsonding
-wiki page if you are unsure.
+Please be aware that the accompanied [dataflow information](#dataflow-information) (${ctx.link('DataflowInformation')}) returned by _flowR_
+contains things besides the graph, like the entry and exit points of the subgraphs, and currently active references (see [below](#dataflow-information)).
+Additionally, you may be interested in the [Unknown Side Effects](#unknown-side-effects), marking calls which _flowR_ is unable to handle correctly.
 > [!TIP]
-> If you want to investigate the dataflow graph,
-> you can either use the [Visual Studio Code extension](${doc_files_1.FlowrVsCode}) or the ${ctx.replCmd('dataflow*')}
-> command in the REPL (see the ${ctx.linkPage('wiki/Interface', 'Interface wiki page')} for more information).
-> There is also a simplified perspective available with ${ctx.replCmd('dataflowsimple*')} that does not show everything but is easier to read.
+> To investigate the dataflow graph,
+> you can either use the ${ctx.linkPage('flowr:vscode')} or the ${ctx.replCmd('dataflow*')}
+> command in the REPL (see the ${ctx.linkPage('wiki/Interface', 'Interface wiki page')}).
+> There is also a simplified version available with ${ctx.replCmd('dataflowsimple*')} that does not show everything but is easier to read.
 > For small graphs, you can also use ${ctx.replCmd('dataflowascii')} to print the graph as ASCII art.
->
-> When using _flowR_ as a library, you may use the functions in ${(0, doc_files_1.getFilePathMd)('../util/mermaid/dfg.ts')}.
 >
-> If you receive a dataflow graph in its serialized form (e.g., by talking to a [_flowR_ server](${doc_files_1.FlowrWikiBaseRef}/Interface)), you can use ${ctx.linkM(graph_1.DataflowGraph, 'fromJson', { realNameWrapper: 'i', codeFont: true })} to retrieve the graph from the JSON representation.
+> If you receive a dataflow graph in its serialized form (e.g., by talking to a [_flowR_ server](${doc_files_1.FlowrWikiBaseRef}/Interface)), you can use ${ctx.linkM(graph_1.DataflowGraph, 'fromJson', { realNameWrapper: 'i', codeFont: true })} to recover the graph object.
 >
 > Also, check out the [${doc_files_1.FlowrGithubGroupName}/sample-analyzer-df-diff](${doc_files_1.FlowrGithubBaseRef}/sample-analyzer-df-diff) repository for a complete example project creating and comparing dataflow graphs.
-${await (0, doc_dfg_1.printDfGraphForCode)(treeSitter, 'x <- 3\ny <- x + 1\ny')}
+To get started, let's look at the graph for the following code snippet:
+${(0, doc_code_1.codeBlock)('r', introExampleCode)}
+With this code, the corresponding dataflow graph looks like this:
-The above dataflow graph showcases the general gist. We define a dataflow graph as a directed graph G = (V, E), differentiating between ${(0, doc_data_dfg_util_1.getAllVertices)().length} types of vertices V and
-${(0, doc_data_dfg_util_1.getAllEdges)().length} types of edges E allowing each vertex to have a single, and each edge to have multiple distinct types.
+${await (0, doc_dfg_1.printDfGraphForCode)(treeSitter, introExampleCode, { showCode: false })}
+The above dataflow graph showcases the general gist. We define a dataflow graph as a directed graph G&nbsp;=&nbsp;(V,&nbsp;E),
+differentiating between ${(0, doc_data_dfg_util_1.getAllVertices)().length} types of vertices&nbsp;V and
+${(0, doc_data_dfg_util_1.getAllEdges)().length} types of edges&nbsp;E allowing each vertex to have a single, and each edge to have multiple distinct types.
 Additionally, every node may have links to its [control dependencies](#control-dependencies) (which you may view as a ${(0, text_1.nth)((0, doc_data_dfg_util_1.getAllEdges)().length + 1)} edge type,
 although they are explicitly no data dependency and relate to the ${ctx.linkPage('wiki/Control Flow Graph')}.
-<details open>
-<summary>Vertex Types</summary>
+${(0, doc_structure_1.details)('Simplified Version of the graph', await (0, doc_dfg_1.printDfGraphForCode)(treeSitter, 'x <- 3\ny <- x + 1\ny', { simplified: true, showCode: false }))}
 The following vertices types exist:
@@ -794,20 +798,13 @@ The following vertices types exist:
 ${(0, doc_structure_1.details)('Class Diagram', 'All boxes should link to their respective implementation:\n' + (0, doc_code_1.codeBlock)('mermaid', ctx.mermaid('DataflowGraphVertexInfo', { inlineTypes: ['MergeableRecord'] })))}
-</details>
-<details open>
-<summary>Edge Types</summary>
-The following edges types exist, internally we use bitmasks to represent multiple types in a compact form:
+The following edges types exist, internally we use bitmasks to represent multiple types in a compact form, so you
+should use the ${ctx.link('DfEdge', { codeFont: false, realNameWrapper: 'i' }, { type: 'variable' })} object and its methods to work with them:
 1. ${(0, doc_data_dfg_util_1.getAllEdges)().map(([k, v], index) => `[\`${k}\` (${v})](#${index + 1}-${k.toLowerCase().replace(/\s/g, '-')}-edge)`).join('\n1. ')}
 ${(0, doc_structure_1.details)('Class Diagram', 'All boxes should link to their respective implementation:\n' + (0, doc_code_1.codeBlock)('mermaid', ctx.mermaid('EdgeType', { inlineTypes: ['MergeableRecord'] })))}
-</details>
 From an implementation perspective all of these types are represented by respective interfaces, see ${(0, doc_files_1.getFilePathMd)('../dataflow/graph/vertex.ts')} and ${(0, doc_files_1.getFilePathMd)('../dataflow/graph/edge.ts')}.
@@ -828,6 +825,64 @@ ${(0, doc_general_1.prefixLines)((0, doc_code_1.codeBlock)('ts', `const name = $
 > For argument wrappers you can access the dataflow information for their value. For dead code, however, flowR currently contains
 > some core heuristics that remove it which cannot be reversed easily. So please open [an issue](${doc_issue_1.NewIssueUrl}) if you encounter such a case and require the node to be present in the dataflow graph.
+${(0, doc_structure_1.section)('Reading the Visualizations', 2, 'reading-the-visualization')}
+Before we dive into the details of the different vertices and edges, let's briefly talk about how to read the visualizations.
+For this, let's have a look at a very simple graph, created for the number \`42\`:
+${await (0, doc_dfg_1.printDfGraphForCode)(treeSitter, '42', { showCode: false })}
+${(0, doc_structure_1.section)('Vertex Shape', 3, 'vtx-shape')}
+The _shape_ of the vertex tells you the type of the vertex in the dataflow graph using the following scheme (the types are
+explained in more detail in the following sections):
+${(0, doc_code_1.codeBlock)('mermaid', 'flowchart TD\n' +
+            // use mermaidNodeBrackets to get open and closing bracket
+            Object.entries(vertex_1.VertexType)
+                .map(([k, v]) => {
+                const { open, close } = (0, dfg_1.mermaidNodeBrackets)(v);
+                return `   ${v}${open}${k}${close}`;
+            }).join('\n') +
+            // we add a subflow for the function definition
+            '\n    subgraph fbox ["function body"]\n   body((...))\n    end\n   fdef-->fbox')}
+${(0, doc_structure_1.section)('Syntactic Types', 3, 'vtx-synt-type')}
+Within the shape, in square brackets, you can find the syntactic type of the vertex
+which is linked to the node in the ${ctx.linkPage('wiki/Normalized AST')}.
+For more information on valid types and what to do with them, please refer to the ${ctx.linkPage('wiki/Normalized AST', 'normalized AST wiki page')}
+and the corresponding helper objects (e.g., ${ctx.link(r_number_1.RNumber, undefined, { type: 'variable' })}).
+${(0, doc_structure_1.section)('Lexeme', 3, 'vtx-lexeme')}
+Also in the first line, next to the [syntactic type](#vtx-synt-type), you can find the lexeme of the vertex (if it has one, e.g., for a variable definition or use).
+This usually represents the textual source string of the respective vertex, and is also linked to the ${ctx.linkPage('wiki/Normalized AST')}.
+You can access the lexeme too with ${ctx.linkO(model_1.RNode, 'lexeme')}.
+${(0, doc_structure_1.section)('Vertex Id', 3, 'vtx-id')}
+In the second line, you will usually find the id (in the form of a ${ctx.link(node_id_1.NodeId, undefined, { type: 'variable' })}) of the vertex,
+alongside its [control dependencies](#control-dependencies) if it has any. This id links the vertex to the respective node in the ${ctx.linkPage('wiki/Normalized AST')} (and all other perspectives created by flowR).
+To give you an example, have a look at the following graph:
+${await (0, doc_dfg_1.printDfGraphForCode)(treeSitter, 'if(u) a', { showCode: false, mark: new Set(['1']) })}
+With the _may_ prefix you can see that \`a\` has a [control dependency](#control-dependencies)
+on the \`if\`, which only triggers when the condition is \`true\` (as indicated by the \`+\` suffix).
+${(0, doc_structure_1.section)('Location', 3, 'vtx-location')}
+The third line indicates the compressed ${ctx.link(range_1.SourceRange)} of the vertex in the format \`startLine.startCharacter - endLine.endCharacter\`. If the range reads \`1.7\`,
+this is short for \`1.7-1.7\`, likewise, \`1.7-9\` is short for \`1.7-1.9\`. So, \`1.7-9\` describes something starting
+in the first line at the seventh character and ending in the first line at the ninth character.
+${(0, doc_structure_1.section)('Arguments and Additional Information', 3, 'vtx-additional-info')}
+Some vertices (e.g., [function calls](#function-call-vertex)) have additional information, like the arguments of the call.
+As you can see with the \`if\` example above alongside the [vertex id](#vtx-id),
+these vertices also have an additional line which lists the ids of the arguments in order to clear any ambiguity in case, for example,
+the mermaid graph layouting fumbles the order.
 ${(0, doc_structure_1.section)('Vertices', 2, 'vertices')}
 1. ${(0, doc_data_dfg_util_1.getAllVertices)().map(([k, v]) => `[\`${k}\`](#${v.toLowerCase().replaceAll(/\s/g, '-')}-vertex)`).join('\n1. ')}
@@ -836,7 +891,7 @@ ${await getVertexExplanations(treeSitter, ctx)}
 ${(0, doc_structure_1.section)('Edges', 2, 'edges')}
-1. ${(0, doc_data_dfg_util_1.getAllEdges)().map(([k, v], index) => `[\`${k}\` (${v})](#${index + 1}-${k.toLowerCase().replace(/\s/g, '-')}-edge)`).join('\n1. ')}
+1. ${(0, doc_data_dfg_util_1.getAllEdges)().map(([k, v], index) => `[\`${k}\` (${v})](#${index + 1}-${k.toLowerCase().replaceAll(/\s/g, '-')}-edge)`).join('\n1. ')}
 ${await getEdgesExplanations(treeSitter, ctx)}