polyglot-piranha 0.3.36__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/Cargo.lock +1 -1
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/Cargo.toml +1 -1
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/PKG-INFO +1 -1
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/utils.rs +3 -3
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/main.rs +1 -1
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/capture_group_patterns.rs +6 -6
- polyglot_piranha-0.4.0/src/models/concrete_syntax/concrete_syntax.pest +22 -0
- polyglot_piranha-0.4.0/src/models/concrete_syntax/constraint_checker.rs +29 -0
- polyglot_piranha-0.4.0/src/models/concrete_syntax/cursor_utils.rs +66 -0
- polyglot_piranha-0.4.0/src/models/concrete_syntax/interpreter.rs +448 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/concrete_syntax/mod.rs +4 -0
- polyglot_piranha-0.4.0/src/models/concrete_syntax/parser.rs +236 -0
- polyglot_piranha-0.4.0/src/models/concrete_syntax/resolver.rs +92 -0
- polyglot_piranha-0.4.0/src/models/concrete_syntax/types.rs +103 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/concrete_syntax/unit_tests/interpreter_test.rs +33 -2
- polyglot_piranha-0.3.36/src/models/concrete_syntax/unit_tests/test_parser.rs → polyglot_piranha-0.4.0/src/models/concrete_syntax/unit_tests/parser_test.rs +79 -3
- polyglot_piranha-0.4.0/src/models/concrete_syntax/unit_tests/resolver_test.rs +117 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/edit.rs +2 -2
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/filter.rs +2 -2
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/matches.rs +4 -4
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/outgoing_edges.rs +1 -1
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/piranha_arguments.rs +1 -1
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/piranha_output.rs +1 -1
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/rule.rs +1 -1
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/rule_graph.rs +4 -4
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/rule_store.rs +1 -1
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/utilities/tree_sitter_utilities.rs +1 -1
- polyglot_piranha-0.3.36/src/models/concrete_syntax/concrete_syntax.pest +0 -15
- polyglot_piranha-0.3.36/src/models/concrete_syntax/interpreter.rs +0 -426
- polyglot_piranha-0.3.36/src/models/concrete_syntax/parser.rs +0 -112
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/LICENSE +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/NOTICE +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/README.md +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/plugins/pyproject.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/polyglot_piranha.pyi +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/pyproject.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/go/edges.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/go/rules.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/go/scope_config.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java/edges.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java/rules.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java/scope_config.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java_cs/edges.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java_cs/rules.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java_cs/scope_config.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/kt/edges.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/kt/rules.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/kt/scope_config.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/ruby/edges.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/ruby/rules.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/ruby/scope_config.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/scala/scope_config.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/swift/edges.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/swift/rules.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/swift/scope_config.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/README.md +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/analysis.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/mod.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/tag_analysis.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/unit_tests/tag_analysis_test.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/lib.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/default_configs.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/language.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/mod.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/scopes.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/source_code_unit.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/piranha_arguments_test.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/rule_graph_validation_test.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/rule_test.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/scopes_test.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/source_code_unit_test.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/testdata/custom_builtin/edges.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/testdata/custom_builtin/rules.toml +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/mod.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_go.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_java.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_kt.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_python.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_scala.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_scm.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_strings.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_swift.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_thrift.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_ts.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_tsx.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_yaml.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/utilities/mod.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/utilities/regex_utilities.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/utilities/unit_tests/tree_sitter_utilities_test.rs +0 -0
- {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/utilities/unit_tests/utilities_test.rs +0 -0
@@ -2,7 +2,7 @@
|
|
2
2
|
authors = ["Uber Technologies Inc."]
|
3
3
|
name = "piranha"
|
4
4
|
description = "Polyglot Piranha is a library for performing structural find and replace with deep cleanup."
|
5
|
-
version = "0.
|
5
|
+
version = "0.4.0"
|
6
6
|
edition = "2021"
|
7
7
|
include = ["pyproject.toml", "src/"]
|
8
8
|
exclude = ["legacy"]
|
@@ -59,7 +59,7 @@ pub fn get_capture_groups_from_regex(re: Regex) -> Vec<String> {
|
|
59
59
|
|
60
60
|
// Check all capture names (i.e., named groups) in the pattern
|
61
61
|
for capture_name in re.capture_names().flatten() {
|
62
|
-
let tag = format!("@{}"
|
62
|
+
let tag = format!("@{capture_name}");
|
63
63
|
tags.push(tag);
|
64
64
|
}
|
65
65
|
tags
|
@@ -112,7 +112,7 @@ pub fn get_capture_groups_from_tsq(pattern: String) -> Vec<String> {
|
|
112
112
|
})
|
113
113
|
.unwrap_or_default();
|
114
114
|
|
115
|
-
log::debug!("capture_groups: {:?}"
|
115
|
+
log::debug!("capture_groups: {capture_groups:?}");
|
116
116
|
|
117
117
|
capture_groups
|
118
118
|
}
|
@@ -166,6 +166,6 @@ pub fn get_capture_group_usage_from_tsq(pattern: String) -> Vec<String> {
|
|
166
166
|
.map(|mat| mat.as_str().to_owned())
|
167
167
|
.collect();
|
168
168
|
|
169
|
-
log::debug!("capture_groups: {:?}"
|
169
|
+
log::debug!("capture_groups: {capture_groups:?}");
|
170
170
|
capture_groups
|
171
171
|
}
|
@@ -35,7 +35,7 @@ fn main() {
|
|
35
35
|
|
36
36
|
let args = PiranhaArguments::from_cli();
|
37
37
|
|
38
|
-
debug!("Piranha Arguments are \n{:#?}"
|
38
|
+
debug!("Piranha Arguments are \n{args:#?}");
|
39
39
|
let piranha_output_summaries = execute_piranha(&args);
|
40
40
|
|
41
41
|
if let Some(path) = args.path_to_output_summary() {
|
@@ -133,7 +133,6 @@ impl CompiledCGPattern {
|
|
133
133
|
&self, node: &Node, source_code: String, recursive: bool, replace_node: Option<String>,
|
134
134
|
replace_node_idx: Option<u8>,
|
135
135
|
) -> Vec<Match> {
|
136
|
-
let code_str = source_code.as_bytes();
|
137
136
|
match self {
|
138
137
|
CompiledCGPattern::Q(query) => get_all_matches_for_query(
|
139
138
|
node,
|
@@ -147,14 +146,15 @@ impl CompiledCGPattern {
|
|
147
146
|
get_all_matches_for_regex(node, source_code, regex, recursive, replace_node)
|
148
147
|
}
|
149
148
|
CompiledCGPattern::M(concrete_syntax) => {
|
150
|
-
let
|
149
|
+
let source_code_ref = source_code.as_bytes();
|
150
|
+
let resolved_syntax = concrete_syntax.clone().resolve().unwrap();
|
151
|
+
get_all_matches_for_concrete_syntax(
|
151
152
|
node,
|
152
|
-
|
153
|
-
|
153
|
+
source_code_ref,
|
154
|
+
&resolved_syntax,
|
154
155
|
recursive,
|
155
156
|
replace_node,
|
156
|
-
)
|
157
|
-
matches.0
|
157
|
+
)
|
158
158
|
}
|
159
159
|
}
|
160
160
|
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
// Concrete Syntax Grammar for Piranha
|
2
|
+
concrete_syntax = { SOI ~ pattern ~ EOI }
|
3
|
+
pattern = { (element)+ ~ ("|>" ~ constraints)? }
|
4
|
+
|
5
|
+
// An element is either a capture or literal text
|
6
|
+
element = _{ capture | literal_text }
|
7
|
+
|
8
|
+
// Captures: :[name], :[name+], :[name*], @name
|
9
|
+
capture = { (":[" ~ identifier ~ capture_mode? ~ "]") | "@"~identifier } // FIXME: Should remove @ from the grammar, because literals may be parsed incorrectly
|
10
|
+
capture_mode = { "+" | "*" }
|
11
|
+
identifier = { (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
|
12
|
+
|
13
|
+
// Literal text - single word/token without whitespace
|
14
|
+
literal_text = { (!( ":[" | "|>" | "@" ) ~ ANY)+ }
|
15
|
+
WHITESPACE = _{ (" " | "\t" | "\r" | "\n")+ }
|
16
|
+
|
17
|
+
// Where constraints (extensible for future constraint types)
|
18
|
+
constraints = { constraint ~ ("," ~ constraint)* }
|
19
|
+
constraint = { in_constraint }
|
20
|
+
in_constraint = { capture ~ "in" ~ "[" ~ list_items? ~ "]" }
|
21
|
+
list_items = { quoted_string ~ ("," ~ quoted_string)* }
|
22
|
+
quoted_string = { "\"" ~ ( "\\\\" | "\\\"" | (!"\"" ~ ANY) )* ~ "\"" }
|
@@ -0,0 +1,29 @@
|
|
1
|
+
/*
|
2
|
+
Copyright (c) 2023 Uber Technologies, Inc.
|
3
|
+
|
4
|
+
<p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
5
|
+
except in compliance with the License. You may obtain a copy of the License at
|
6
|
+
<p>http://www.apache.org/licenses/LICENSE-2.0
|
7
|
+
|
8
|
+
<p>Unless required by applicable law or agreed to in writing, software distributed under the
|
9
|
+
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
10
|
+
express or implied. See the License for the specific language governing permissions and
|
11
|
+
limitations under the License.
|
12
|
+
*/
|
13
|
+
|
14
|
+
use crate::models::concrete_syntax::parser::CsConstraint;
|
15
|
+
use crate::models::concrete_syntax::types::CapturedNode;
|
16
|
+
|
17
|
+
/// Check if a captured node satisfies a single constraint
|
18
|
+
pub(crate) fn check_constraint(node: &CapturedNode, ctr: &CsConstraint) -> bool {
|
19
|
+
match ctr {
|
20
|
+
CsConstraint::In { items, .. } => items.contains(&node.text.to_string()),
|
21
|
+
}
|
22
|
+
}
|
23
|
+
|
24
|
+
/// Check if a captured node satisfies all constraints
|
25
|
+
pub(crate) fn satisfies_constraints(node: &CapturedNode, constraints: &[CsConstraint]) -> bool {
|
26
|
+
constraints
|
27
|
+
.iter()
|
28
|
+
.all(|constraint| check_constraint(node, constraint))
|
29
|
+
}
|
@@ -0,0 +1,66 @@
|
|
1
|
+
/*
|
2
|
+
Copyright (c) 2023 Uber Technologies, Inc.
|
3
|
+
|
4
|
+
<p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
5
|
+
except in compliance with the License. You may obtain a copy of the License at
|
6
|
+
<p>http://www.apache.org/licenses/LICENSE-2.0
|
7
|
+
|
8
|
+
<p>Unless required by applicable law or agreed to in writing, software distributed under the
|
9
|
+
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
10
|
+
express or implied. See the License for the specific language governing permissions and
|
11
|
+
limitations under the License.
|
12
|
+
*/
|
13
|
+
|
14
|
+
use crate::models::matches::Range;
|
15
|
+
use tree_sitter::{Node, TreeCursor};
|
16
|
+
|
17
|
+
/// Cursor navigation utilities for tree traversal during pattern matching.
|
18
|
+
pub struct CursorNavigator;
|
19
|
+
|
20
|
+
impl CursorNavigator {
|
21
|
+
/// Advances the cursor to the next sibling, or if no sibling exists,
|
22
|
+
/// moves up to the parent and tries to find the next sibling at that level.
|
23
|
+
pub fn find_next_sibling_or_ancestor_sibling(cursor: &mut TreeCursor) -> bool {
|
24
|
+
while !cursor.goto_next_sibling() {
|
25
|
+
if !cursor.goto_parent() {
|
26
|
+
return false;
|
27
|
+
}
|
28
|
+
}
|
29
|
+
true
|
30
|
+
}
|
31
|
+
|
32
|
+
/// Skips over comment nodes by advancing the cursor to the next non-comment sibling.
|
33
|
+
/// If the current node is not a comment, the cursor position remains unchanged.
|
34
|
+
pub fn skip_comment_nodes(cursor: &mut TreeCursor) {
|
35
|
+
let mut node = cursor.node();
|
36
|
+
while node.kind().contains("comment") && cursor.goto_next_sibling() {
|
37
|
+
node = cursor.node();
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
/// Finds the index of a target node among its parent's children.
|
42
|
+
pub fn find_child_index(target_node: &Node, parent_node: &Node) -> Option<usize> {
|
43
|
+
parent_node
|
44
|
+
.children(&mut parent_node.walk())
|
45
|
+
.enumerate()
|
46
|
+
.find(|&(_i, child)| child == *target_node)
|
47
|
+
.map(|(i, _child)| i)
|
48
|
+
}
|
49
|
+
|
50
|
+
/// Creates a range that spans from the start of the first node to the end of the second node.
|
51
|
+
pub fn span_node_ranges(first_node: &Node, last_node: &Node) -> Range {
|
52
|
+
Range::span_ranges(first_node.range(), last_node.range())
|
53
|
+
}
|
54
|
+
|
55
|
+
/// Extracts text from source code within the specified byte range.
|
56
|
+
pub fn get_text_from_range(start_byte: usize, end_byte: usize, source_code: &[u8]) -> String {
|
57
|
+
let text_slice = &source_code[start_byte..end_byte];
|
58
|
+
String::from_utf8_lossy(text_slice).to_string()
|
59
|
+
}
|
60
|
+
}
|
61
|
+
|
62
|
+
#[cfg(test)]
|
63
|
+
mod tests {
|
64
|
+
|
65
|
+
// Tests will be added as we migrate functionality
|
66
|
+
}
|
@@ -0,0 +1,448 @@
|
|
1
|
+
/*
|
2
|
+
Copyright (c) 2023 Uber Technologies, Inc.
|
3
|
+
|
4
|
+
<p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
5
|
+
except in compliance with the License. You may obtain a copy of the License at
|
6
|
+
<p>http://www.apache.org/licenses/LICENSE-2.0
|
7
|
+
|
8
|
+
<p>Unless required by applicable law or agreed to in writing, software distributed under the
|
9
|
+
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
10
|
+
express or implied. See the License for the specific language governing permissions and
|
11
|
+
limitations under the License.
|
12
|
+
*/
|
13
|
+
|
14
|
+
use crate::models::matches::Range;
|
15
|
+
|
16
|
+
use crate::models::concrete_syntax::constraint_checker::satisfies_constraints;
|
17
|
+
use crate::models::concrete_syntax::cursor_utils::CursorNavigator;
|
18
|
+
use crate::models::concrete_syntax::parser::CaptureMode;
|
19
|
+
use crate::models::concrete_syntax::parser::CsConstraint;
|
20
|
+
use crate::models::concrete_syntax::resolver::{ResolvedConcreteSyntax, ResolvedCsElement};
|
21
|
+
use crate::models::concrete_syntax::types::{CapturedNode, MatchingContext, PatternMatchResult};
|
22
|
+
use crate::models::matches::Match;
|
23
|
+
use std::collections::HashMap;
|
24
|
+
use tree_sitter::{Node, TreeCursor};
|
25
|
+
use tree_sitter_traversal::Cursor;
|
26
|
+
|
27
|
+
// =============================================================================
|
28
|
+
// PUBLIC API
|
29
|
+
// =============================================================================
|
30
|
+
|
31
|
+
pub(crate) fn get_all_matches_for_concrete_syntax(
|
32
|
+
node: &Node, source_code_ref: &[u8], cs: &ResolvedConcreteSyntax, recursive: bool,
|
33
|
+
replace_node: Option<String>,
|
34
|
+
) -> Vec<Match> {
|
35
|
+
let mut matches: Vec<Match> = Vec::new();
|
36
|
+
|
37
|
+
if let PatternMatchResult::Success {
|
38
|
+
captures: mut mapping,
|
39
|
+
consumed_nodes: _last_node_index,
|
40
|
+
range: Some(range),
|
41
|
+
} = match_sequential_siblings(&mut node.walk(), source_code_ref, &cs.pattern.sequence)
|
42
|
+
{
|
43
|
+
// If the user doesn't specify a replace node, we use '*' as the default (everything)
|
44
|
+
let replace_node_key = replace_node.clone().unwrap_or("*".to_string());
|
45
|
+
|
46
|
+
if replace_node_key == "*" {
|
47
|
+
let replace_node_match = CapturedNode {
|
48
|
+
range,
|
49
|
+
text: CursorNavigator::get_text_from_range(
|
50
|
+
range.start_byte,
|
51
|
+
range.end_byte,
|
52
|
+
source_code_ref,
|
53
|
+
),
|
54
|
+
};
|
55
|
+
mapping.insert("*".to_string(), replace_node_match);
|
56
|
+
}
|
57
|
+
|
58
|
+
let match_item = create_match_from_capture(&replace_node_key, mapping, range);
|
59
|
+
matches.push(match_item);
|
60
|
+
}
|
61
|
+
|
62
|
+
if recursive {
|
63
|
+
let mut cursor = node.walk();
|
64
|
+
for child in node.children(&mut cursor) {
|
65
|
+
let mut inner_matches = get_all_matches_for_concrete_syntax(
|
66
|
+
&child,
|
67
|
+
source_code_ref,
|
68
|
+
cs,
|
69
|
+
recursive,
|
70
|
+
replace_node.clone(),
|
71
|
+
);
|
72
|
+
matches.append(&mut inner_matches)
|
73
|
+
}
|
74
|
+
}
|
75
|
+
matches
|
76
|
+
}
|
77
|
+
|
78
|
+
// =============================================================================
|
79
|
+
// CORE MATCHING ENGINE
|
80
|
+
// =============================================================================
|
81
|
+
|
82
|
+
/// Attempts to match a pattern against sequential sibling nodes in an AST.
|
83
|
+
///
|
84
|
+
/// ## How it works:
|
85
|
+
/// Given a function body with multiple statements, try to match a pattern starting
|
86
|
+
/// at each statement position until we find a match.
|
87
|
+
///
|
88
|
+
/// ```
|
89
|
+
/// Pattern: int :[x] = 2; :[x]++;
|
90
|
+
///
|
91
|
+
/// Function Body AST:
|
92
|
+
/// ├── [0] char a = 'c' ; ← Start matching here: ❌ No match
|
93
|
+
/// ├── [1] a++; ← Start matching here: ❌ No match
|
94
|
+
/// ├── [2] int b = 2; ← Start matching here: ✅ MATCH! (spans [2] and [3]) -> Returns
|
95
|
+
/// ├── [3] b++; ←
|
96
|
+
/// ├── [4] return a; ←
|
97
|
+
/// └── [5] } ←
|
98
|
+
/// ```
|
99
|
+
///
|
100
|
+
/// The algorithm slides a "matching window" across all possible starting positions,
|
101
|
+
/// trying to match consecutive siblings against the pattern elements.
|
102
|
+
fn match_sequential_siblings(
|
103
|
+
cursor: &mut TreeCursor, source_code_ref: &[u8], cs_elements: &[ResolvedCsElement],
|
104
|
+
) -> PatternMatchResult {
|
105
|
+
let parent_node = cursor.node();
|
106
|
+
let mut child_seq_match_start = 0;
|
107
|
+
if cursor.goto_first_child() {
|
108
|
+
// Iterate through siblings to find a match
|
109
|
+
loop {
|
110
|
+
// Clone the cursor in order to attempt matching the sequence starting at cursor.node
|
111
|
+
// Cloning here is necessary other we won't be able to advance to the next sibling if the matching fails
|
112
|
+
let result = {
|
113
|
+
match_cs_pattern(
|
114
|
+
&mut MatchingContext {
|
115
|
+
cursor: cursor.clone(),
|
116
|
+
source_code: source_code_ref,
|
117
|
+
top_node: &parent_node,
|
118
|
+
},
|
119
|
+
cs_elements,
|
120
|
+
true,
|
121
|
+
)
|
122
|
+
};
|
123
|
+
|
124
|
+
// If we got a successful match, extract the mapping and index
|
125
|
+
if let PatternMatchResult::Success {
|
126
|
+
captures: mapping,
|
127
|
+
consumed_nodes: last_node_index,
|
128
|
+
range: None,
|
129
|
+
} = result
|
130
|
+
{
|
131
|
+
// Determine the last matched node. Remember, we are matching subsequences of children [n ... k]
|
132
|
+
let last_node = parent_node.child(last_node_index);
|
133
|
+
let range = Range::span_ranges(cursor.node().range(), last_node.unwrap().range());
|
134
|
+
if last_node_index != child_seq_match_start || parent_node.child_count() == 1 {
|
135
|
+
return PatternMatchResult::Success {
|
136
|
+
captures: mapping,
|
137
|
+
consumed_nodes: last_node_index,
|
138
|
+
range: Some(range),
|
139
|
+
};
|
140
|
+
}
|
141
|
+
// This is to prevent double matches when unrolling a node. i.e., matching the statement in a function body,
|
142
|
+
// as well as the statement itself when unrolled.
|
143
|
+
return PatternMatchResult::failed();
|
144
|
+
}
|
145
|
+
|
146
|
+
child_seq_match_start += 1;
|
147
|
+
if !cursor.goto_next_sibling() {
|
148
|
+
break;
|
149
|
+
}
|
150
|
+
}
|
151
|
+
} // Not currently handing matching of leaf nodes. Current semantics would never match it anyway.
|
152
|
+
PatternMatchResult::failed()
|
153
|
+
}
|
154
|
+
|
155
|
+
/// Top-level entry point for concrete-syntax matching.
|
156
|
+
///
|
157
|
+
/// Dispatches each element of the resolved pattern—capture groups or literal text—against the AST
|
158
|
+
/// at the current cursor position, recursing until the pattern is fully applied.
|
159
|
+
pub(crate) fn match_cs_pattern(
|
160
|
+
ctx: &mut MatchingContext<'_>, cs_elements: &[ResolvedCsElement], can_continue: bool,
|
161
|
+
) -> PatternMatchResult {
|
162
|
+
// Check if we finished matching! In case, we haven't `check_match_completion` returns None
|
163
|
+
if let Some(result) = check_match_completion(ctx, cs_elements, can_continue) {
|
164
|
+
return result;
|
165
|
+
}
|
166
|
+
|
167
|
+
// Skip comment nodes always
|
168
|
+
CursorNavigator::skip_comment_nodes(&mut ctx.cursor);
|
169
|
+
|
170
|
+
// Get the first element and remaining elements
|
171
|
+
let (first, rest) = cs_elements.split_first().unwrap();
|
172
|
+
|
173
|
+
match first {
|
174
|
+
ResolvedCsElement::Capture {
|
175
|
+
name,
|
176
|
+
mode,
|
177
|
+
constraints,
|
178
|
+
} => match mode {
|
179
|
+
CaptureMode::Single => match_single_capture(ctx, name, constraints, rest),
|
180
|
+
CaptureMode::OnePlus => match_one_plus_capture(ctx, name, constraints, rest),
|
181
|
+
CaptureMode::ZeroPlus => match_zero_plus_capture(ctx, name, constraints, rest),
|
182
|
+
},
|
183
|
+
ResolvedCsElement::Literal(literal_text) => match_literal(ctx, literal_text, rest),
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
fn match_literal(
|
188
|
+
ctx: &mut MatchingContext<'_>, literal_text: &str, remaining_elements: &[ResolvedCsElement],
|
189
|
+
) -> PatternMatchResult {
|
190
|
+
// We match literals against leaves
|
191
|
+
while ctx.cursor.node().child_count() != 0 {
|
192
|
+
ctx.cursor.goto_first_child();
|
193
|
+
}
|
194
|
+
|
195
|
+
let node_code = ctx.cursor.node().utf8_text(ctx.source_code).unwrap().trim();
|
196
|
+
if literal_text.starts_with(node_code) && !node_code.is_empty() {
|
197
|
+
let advance_by = node_code.len();
|
198
|
+
// Can only advance if there is still enough chars to consume
|
199
|
+
if advance_by > literal_text.len() {
|
200
|
+
return PatternMatchResult::failed();
|
201
|
+
}
|
202
|
+
|
203
|
+
let should_match = CursorNavigator::find_next_sibling_or_ancestor_sibling(&mut ctx.cursor);
|
204
|
+
|
205
|
+
// If we consumed the entire literal, continue with remaining elements
|
206
|
+
return if advance_by == literal_text.len() {
|
207
|
+
match_cs_pattern(ctx, remaining_elements, should_match)
|
208
|
+
} else {
|
209
|
+
// If we only consumed part of the literal, create a new literal with the remaining text
|
210
|
+
let remaining_literal = &literal_text[advance_by..];
|
211
|
+
let mut new_elements = vec![ResolvedCsElement::Literal(remaining_literal.to_string())];
|
212
|
+
new_elements.extend_from_slice(remaining_elements);
|
213
|
+
match_cs_pattern(ctx, &new_elements, should_match)
|
214
|
+
};
|
215
|
+
}
|
216
|
+
PatternMatchResult::failed()
|
217
|
+
}
|
218
|
+
|
219
|
+
// =============================================================================
|
220
|
+
// CAPTURE MODE HANDLERS
|
221
|
+
// =============================================================================
|
222
|
+
|
223
|
+
/// Handle single capture: :[var] - must match exactly one node
|
224
|
+
fn match_single_capture(
|
225
|
+
ctx: &mut MatchingContext<'_>, var_name: &str, constraints: &[CsConstraint],
|
226
|
+
remaining_pattern: &[ResolvedCsElement],
|
227
|
+
) -> PatternMatchResult {
|
228
|
+
match_at_all_tree_levels(ctx, var_name, constraints, remaining_pattern, false)
|
229
|
+
}
|
230
|
+
|
231
|
+
/// Handle one-plus capture: :[var+] - must match one or more nodes
|
232
|
+
fn match_one_plus_capture(
|
233
|
+
ctx: &mut MatchingContext<'_>, var_name: &str, constraints: &[CsConstraint],
|
234
|
+
remaining_pattern: &[ResolvedCsElement],
|
235
|
+
) -> PatternMatchResult {
|
236
|
+
match_at_all_tree_levels(ctx, var_name, constraints, remaining_pattern, true)
|
237
|
+
}
|
238
|
+
|
239
|
+
/// Handle zero-plus capture: :[var*] - can match zero or more nodes
|
240
|
+
fn match_zero_plus_capture(
|
241
|
+
ctx: &mut MatchingContext<'_>, var_name: &str, constraints: &[CsConstraint],
|
242
|
+
remaining_pattern: &[ResolvedCsElement],
|
243
|
+
) -> PatternMatchResult {
|
244
|
+
// First try to match with zero nodes
|
245
|
+
let mut zero_match_ctx = MatchingContext {
|
246
|
+
cursor: ctx.cursor.clone(),
|
247
|
+
source_code: ctx.source_code,
|
248
|
+
top_node: ctx.top_node,
|
249
|
+
};
|
250
|
+
let empty_capture = create_empty_captured_node();
|
251
|
+
if satisfies_constraints(&empty_capture, constraints) {
|
252
|
+
let zero_match_result = match_cs_pattern(&mut zero_match_ctx, remaining_pattern, true);
|
253
|
+
if let PatternMatchResult::Success {
|
254
|
+
captures: mut zero_captures,
|
255
|
+
consumed_nodes: last_matched_node_idx,
|
256
|
+
range: None,
|
257
|
+
} = zero_match_result
|
258
|
+
{
|
259
|
+
zero_captures.insert(var_name.to_string(), empty_capture);
|
260
|
+
return PatternMatchResult::success(zero_captures, last_matched_node_idx);
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
264
|
+
// If zero nodes didn't work, try one or more nodes
|
265
|
+
match_at_all_tree_levels(ctx, var_name, constraints, remaining_pattern, true)
|
266
|
+
}
|
267
|
+
|
268
|
+
/// Attempts to match a capture by exploring different levels of the AST hierarchy.
|
269
|
+
/// Starting from the current node, it tries to match progressively smaller subtrees
|
270
|
+
/// by traversing down to child nodes. This allows flexible matching where a capture
|
271
|
+
/// can match a large statement block or just a single expression within it.
|
272
|
+
///
|
273
|
+
/// For example, given: `int x = 1; x++;`
|
274
|
+
/// The capture `:[x]` could match:
|
275
|
+
/// - The entire statement block: `int x = 1; x++;`
|
276
|
+
/// - Just the declaration: `int x = 1;`
|
277
|
+
/// - Just the type: `int`
|
278
|
+
fn match_at_all_tree_levels(
|
279
|
+
ctx: &mut MatchingContext<'_>, var_name: &str, constraints: &[CsConstraint],
|
280
|
+
remaining_pattern: &[ResolvedCsElement], allow_horizontal_expansion: bool,
|
281
|
+
) -> PatternMatchResult {
|
282
|
+
// Try matching at different tree levels, going deeper each iteration
|
283
|
+
loop {
|
284
|
+
let result = try_match_node_range(
|
285
|
+
ctx,
|
286
|
+
var_name,
|
287
|
+
constraints,
|
288
|
+
remaining_pattern,
|
289
|
+
allow_horizontal_expansion,
|
290
|
+
);
|
291
|
+
if let PatternMatchResult::Success { .. } = result {
|
292
|
+
return result;
|
293
|
+
}
|
294
|
+
|
295
|
+
// Move one level down to try matching against smaller/deeper nodes
|
296
|
+
if !ctx.cursor.goto_first_child() {
|
297
|
+
break;
|
298
|
+
}
|
299
|
+
}
|
300
|
+
PatternMatchResult::failed()
|
301
|
+
}
|
302
|
+
|
303
|
+
/// Try to match a range of nodes starting at the current cursor position, expanding the range if needed and allowed
|
304
|
+
/// It assigns [range_start, range_end] to a capture group, and matches the rest of the cs pattern against remaining nodes
|
305
|
+
///
|
306
|
+
/// Per‐iteration diagram:
|
307
|
+
///
|
308
|
+
/// AST siblings: … ─ node₀ ─ node₁ ─ node₂ ─ node₃ ─ node₄ ─ …
|
309
|
+
/// ↑ ↑
|
310
|
+
/// range_start range_end
|
311
|
+
///
|
312
|
+
/// 1) capture = text(node₁…node₂)
|
313
|
+
/// 2) match_cs_pattern(
|
314
|
+
/// remaining_elements, // CS elements still to match
|
315
|
+
/// /* should_match */ true if node₃ exists
|
316
|
+
/// ) starting at node₃, node₄, …
|
317
|
+
fn try_match_node_range(
|
318
|
+
ctx: &mut MatchingContext<'_>, var_name: &str, constraints: &[CsConstraint],
|
319
|
+
remaining_pattern: &[ResolvedCsElement], allow_horizontal_expansion: bool,
|
320
|
+
) -> PatternMatchResult {
|
321
|
+
// 1. Initial anchors
|
322
|
+
let range_start = ctx.cursor.node();
|
323
|
+
let mut range_end = range_start;
|
324
|
+
let mut next_cursor = ctx.cursor.clone();
|
325
|
+
// 'should_match' is used to check whether there are siblings or ancestor siblings we need to match
|
326
|
+
// after we assign the current range to the capture node
|
327
|
+
let mut should_match = CursorNavigator::find_next_sibling_or_ancestor_sibling(&mut next_cursor);
|
328
|
+
let mut is_last = false;
|
329
|
+
|
330
|
+
// Helper to slice out the captured text
|
331
|
+
let make_capture = |end: &Node| CapturedNode {
|
332
|
+
range: Range::span_ranges(range_start.range(), end.range()),
|
333
|
+
text: CursorNavigator::get_text_from_range(
|
334
|
+
range_start.range().start_byte,
|
335
|
+
end.range().end_byte,
|
336
|
+
ctx.source_code,
|
337
|
+
),
|
338
|
+
};
|
339
|
+
|
340
|
+
loop {
|
341
|
+
// ——— 1) try current [start…end] slice ———
|
342
|
+
let captured = make_capture(&range_end);
|
343
|
+
if satisfies_constraints(&captured, constraints) {
|
344
|
+
let mut sub_ctx = MatchingContext {
|
345
|
+
cursor: next_cursor.clone(),
|
346
|
+
source_code: ctx.source_code,
|
347
|
+
top_node: ctx.top_node,
|
348
|
+
};
|
349
|
+
if let PatternMatchResult::Success {
|
350
|
+
mut captures,
|
351
|
+
consumed_nodes,
|
352
|
+
range: _,
|
353
|
+
} = match_cs_pattern(&mut sub_ctx, remaining_pattern, should_match)
|
354
|
+
{
|
355
|
+
// conflict check
|
356
|
+
if let Some(prev) = captures.get(var_name) {
|
357
|
+
if prev.text.trim() != captured.text.trim() {
|
358
|
+
return PatternMatchResult::failed();
|
359
|
+
}
|
360
|
+
}
|
361
|
+
captures.insert(var_name.to_owned(), captured);
|
362
|
+
return PatternMatchResult::success(captures, consumed_nodes);
|
363
|
+
}
|
364
|
+
}
|
365
|
+
|
366
|
+
// ——— 2) need to expand slice? ———
|
367
|
+
if !allow_horizontal_expansion || is_last {
|
368
|
+
return PatternMatchResult::failed();
|
369
|
+
}
|
370
|
+
|
371
|
+
// grow the range to include the node under next_cursor
|
372
|
+
range_end = next_cursor.node();
|
373
|
+
is_last = !next_cursor.goto_next_sibling();
|
374
|
+
if is_last {
|
375
|
+
// once we’re at the end of the range, check if there is any ancestor left to match
|
376
|
+
should_match = CursorNavigator::find_next_sibling_or_ancestor_sibling(&mut next_cursor);
|
377
|
+
}
|
378
|
+
}
|
379
|
+
}
|
380
|
+
|
381
|
+
// =============================================================================
|
382
|
+
// HELPER FUNCTIONS
|
383
|
+
// =============================================================================
|
384
|
+
|
385
|
+
/// Helper function to create a match from captured nodes
|
386
|
+
fn create_match_from_capture(
|
387
|
+
replace_node_key: &str, match_map: HashMap<String, CapturedNode>, _range: Range,
|
388
|
+
) -> Match {
|
389
|
+
let replace_node_match = match_map.get(replace_node_key).cloned().unwrap_or_else(|| {
|
390
|
+
panic!("The tag {replace_node_key} provided in the replace node is not present")
|
391
|
+
});
|
392
|
+
|
393
|
+
Match {
|
394
|
+
matched_string: replace_node_match.text,
|
395
|
+
range: replace_node_match.range,
|
396
|
+
matches: match_map.into_iter().map(|(k, v)| (k, v.text)).collect(),
|
397
|
+
associated_comma: None,
|
398
|
+
associated_comments: Vec::new(),
|
399
|
+
associated_leading_empty_lines: Vec::new(),
|
400
|
+
}
|
401
|
+
}
|
402
|
+
|
403
|
+
/// Handle the case where pattern is empty or nodes are exhausted
|
404
|
+
fn check_match_completion(
|
405
|
+
ctx: &mut MatchingContext<'_>, cs_elements: &[ResolvedCsElement], can_continue: bool,
|
406
|
+
) -> Option<PatternMatchResult> {
|
407
|
+
if cs_elements.is_empty() {
|
408
|
+
if !can_continue {
|
409
|
+
return Some(PatternMatchResult::success(
|
410
|
+
HashMap::new(),
|
411
|
+
ctx.top_node.child_count() - 1,
|
412
|
+
));
|
413
|
+
}
|
414
|
+
let index = find_last_matched_node(&mut ctx.cursor, ctx.top_node);
|
415
|
+
return match index {
|
416
|
+
Some(consumed_nodes) => Some(PatternMatchResult::success(HashMap::new(), consumed_nodes)),
|
417
|
+
None => Some(PatternMatchResult::failed()),
|
418
|
+
};
|
419
|
+
} else if !can_continue {
|
420
|
+
return Some(PatternMatchResult::failed());
|
421
|
+
}
|
422
|
+
None
|
423
|
+
}
|
424
|
+
|
425
|
+
/// Finds the index of the last matched node relative to the top level node.
|
426
|
+
/// Returns the index of the child node where matching concluded, or 0 if the cursor
|
427
|
+
/// is not positioned on a child of the parent node.
|
428
|
+
fn find_last_matched_node(cursor: &mut TreeCursor, parent_node: &Node) -> Option<usize> {
|
429
|
+
CursorNavigator::find_child_index(&cursor.node(), parent_node)
|
430
|
+
.map(|i| if i > 0 { i - 1 } else { 0 })
|
431
|
+
}
|
432
|
+
|
433
|
+
/// Create an empty captured node for zero-match patterns
|
434
|
+
fn create_empty_captured_node() -> CapturedNode {
|
435
|
+
CapturedNode {
|
436
|
+
range: Range {
|
437
|
+
start_byte: 0,
|
438
|
+
end_byte: 0,
|
439
|
+
start_point: crate::models::matches::Point { row: 0, column: 0 },
|
440
|
+
end_point: crate::models::matches::Point { row: 0, column: 0 },
|
441
|
+
},
|
442
|
+
text: String::new(),
|
443
|
+
}
|
444
|
+
}
|
445
|
+
|
446
|
+
#[cfg(test)]
|
447
|
+
#[path = "unit_tests/interpreter_test.rs"]
|
448
|
+
mod interpreter_test;
|