polyglot-piranha 0.3.36__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/Cargo.lock +1 -1
  2. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/Cargo.toml +1 -1
  3. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/PKG-INFO +1 -1
  4. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/utils.rs +3 -3
  5. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/main.rs +1 -1
  6. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/capture_group_patterns.rs +6 -6
  7. polyglot_piranha-0.4.0/src/models/concrete_syntax/concrete_syntax.pest +22 -0
  8. polyglot_piranha-0.4.0/src/models/concrete_syntax/constraint_checker.rs +29 -0
  9. polyglot_piranha-0.4.0/src/models/concrete_syntax/cursor_utils.rs +66 -0
  10. polyglot_piranha-0.4.0/src/models/concrete_syntax/interpreter.rs +448 -0
  11. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/concrete_syntax/mod.rs +4 -0
  12. polyglot_piranha-0.4.0/src/models/concrete_syntax/parser.rs +236 -0
  13. polyglot_piranha-0.4.0/src/models/concrete_syntax/resolver.rs +92 -0
  14. polyglot_piranha-0.4.0/src/models/concrete_syntax/types.rs +103 -0
  15. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/concrete_syntax/unit_tests/interpreter_test.rs +33 -2
  16. polyglot_piranha-0.3.36/src/models/concrete_syntax/unit_tests/test_parser.rs → polyglot_piranha-0.4.0/src/models/concrete_syntax/unit_tests/parser_test.rs +79 -3
  17. polyglot_piranha-0.4.0/src/models/concrete_syntax/unit_tests/resolver_test.rs +117 -0
  18. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/edit.rs +2 -2
  19. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/filter.rs +2 -2
  20. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/matches.rs +4 -4
  21. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/outgoing_edges.rs +1 -1
  22. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/piranha_arguments.rs +1 -1
  23. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/piranha_output.rs +1 -1
  24. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/rule.rs +1 -1
  25. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/rule_graph.rs +4 -4
  26. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/rule_store.rs +1 -1
  27. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/utilities/tree_sitter_utilities.rs +1 -1
  28. polyglot_piranha-0.3.36/src/models/concrete_syntax/concrete_syntax.pest +0 -15
  29. polyglot_piranha-0.3.36/src/models/concrete_syntax/interpreter.rs +0 -426
  30. polyglot_piranha-0.3.36/src/models/concrete_syntax/parser.rs +0 -112
  31. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/LICENSE +0 -0
  32. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/NOTICE +0 -0
  33. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/README.md +0 -0
  34. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/plugins/pyproject.toml +0 -0
  35. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/polyglot_piranha.pyi +0 -0
  36. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/pyproject.toml +0 -0
  37. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/go/edges.toml +0 -0
  38. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/go/rules.toml +0 -0
  39. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/go/scope_config.toml +0 -0
  40. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java/edges.toml +0 -0
  41. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java/rules.toml +0 -0
  42. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java/scope_config.toml +0 -0
  43. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java_cs/edges.toml +0 -0
  44. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java_cs/rules.toml +0 -0
  45. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/java_cs/scope_config.toml +0 -0
  46. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/kt/edges.toml +0 -0
  47. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/kt/rules.toml +0 -0
  48. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/kt/scope_config.toml +0 -0
  49. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/ruby/edges.toml +0 -0
  50. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/ruby/rules.toml +0 -0
  51. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/ruby/scope_config.toml +0 -0
  52. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/scala/scope_config.toml +0 -0
  53. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/swift/edges.toml +0 -0
  54. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/swift/rules.toml +0 -0
  55. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/cleanup_rules/swift/scope_config.toml +0 -0
  56. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/README.md +0 -0
  57. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/analysis.rs +0 -0
  58. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/mod.rs +0 -0
  59. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/tag_analysis.rs +0 -0
  60. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/df/unit_tests/tag_analysis_test.rs +0 -0
  61. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/lib.rs +0 -0
  62. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/default_configs.rs +0 -0
  63. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/language.rs +0 -0
  64. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/mod.rs +0 -0
  65. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/scopes.rs +0 -0
  66. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/source_code_unit.rs +0 -0
  67. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/piranha_arguments_test.rs +0 -0
  68. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/rule_graph_validation_test.rs +0 -0
  69. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/rule_test.rs +0 -0
  70. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/scopes_test.rs +0 -0
  71. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/source_code_unit_test.rs +0 -0
  72. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/testdata/custom_builtin/edges.toml +0 -0
  73. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/models/unit_tests/testdata/custom_builtin/rules.toml +0 -0
  74. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/mod.rs +0 -0
  75. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_go.rs +0 -0
  76. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_java.rs +0 -0
  77. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_kt.rs +0 -0
  78. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_python.rs +0 -0
  79. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_scala.rs +0 -0
  80. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_scm.rs +0 -0
  81. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_strings.rs +0 -0
  82. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_swift.rs +0 -0
  83. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_thrift.rs +0 -0
  84. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_ts.rs +0 -0
  85. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_tsx.rs +0 -0
  86. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/tests/test_piranha_yaml.rs +0 -0
  87. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/utilities/mod.rs +0 -0
  88. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/utilities/regex_utilities.rs +0 -0
  89. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/utilities/unit_tests/tree_sitter_utilities_test.rs +0 -0
  90. {polyglot_piranha-0.3.36 → polyglot_piranha-0.4.0}/src/utilities/unit_tests/utilities_test.rs +0 -0
@@ -687,7 +687,7 @@ dependencies = [
687
687
 
688
688
  [[package]]
689
689
  name = "piranha"
690
- version = "0.3.36"
690
+ version = "0.4.0"
691
691
  dependencies = [
692
692
  "assert_cmd",
693
693
  "cc",
@@ -2,7 +2,7 @@
2
2
  authors = ["Uber Technologies Inc."]
3
3
  name = "piranha"
4
4
  description = "Polyglot Piranha is a library for performing structural find and replace with deep cleanup."
5
- version = "0.3.36"
5
+ version = "0.4.0"
6
6
  edition = "2021"
7
7
  include = ["pyproject.toml", "src/"]
8
8
  exclude = ["legacy"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: polyglot_piranha
3
- Version: 0.3.36
3
+ Version: 0.4.0
4
4
  Requires-Dist: pytest>=8.0 ; extra == 'dev'
5
5
  Provides-Extra: dev
6
6
  License-File: LICENSE
@@ -59,7 +59,7 @@ pub fn get_capture_groups_from_regex(re: Regex) -> Vec<String> {
59
59
 
60
60
  // Check all capture names (i.e., named groups) in the pattern
61
61
  for capture_name in re.capture_names().flatten() {
62
- let tag = format!("@{}", capture_name);
62
+ let tag = format!("@{capture_name}");
63
63
  tags.push(tag);
64
64
  }
65
65
  tags
@@ -112,7 +112,7 @@ pub fn get_capture_groups_from_tsq(pattern: String) -> Vec<String> {
112
112
  })
113
113
  .unwrap_or_default();
114
114
 
115
- log::debug!("capture_groups: {:?}", capture_groups);
115
+ log::debug!("capture_groups: {capture_groups:?}");
116
116
 
117
117
  capture_groups
118
118
  }
@@ -166,6 +166,6 @@ pub fn get_capture_group_usage_from_tsq(pattern: String) -> Vec<String> {
166
166
  .map(|mat| mat.as_str().to_owned())
167
167
  .collect();
168
168
 
169
- log::debug!("capture_groups: {:?}", capture_groups);
169
+ log::debug!("capture_groups: {capture_groups:?}");
170
170
  capture_groups
171
171
  }
@@ -35,7 +35,7 @@ fn main() {
35
35
 
36
36
  let args = PiranhaArguments::from_cli();
37
37
 
38
- debug!("Piranha Arguments are \n{:#?}", args);
38
+ debug!("Piranha Arguments are \n{args:#?}");
39
39
  let piranha_output_summaries = execute_piranha(&args);
40
40
 
41
41
  if let Some(path) = args.path_to_output_summary() {
@@ -133,7 +133,6 @@ impl CompiledCGPattern {
133
133
  &self, node: &Node, source_code: String, recursive: bool, replace_node: Option<String>,
134
134
  replace_node_idx: Option<u8>,
135
135
  ) -> Vec<Match> {
136
- let code_str = source_code.as_bytes();
137
136
  match self {
138
137
  CompiledCGPattern::Q(query) => get_all_matches_for_query(
139
138
  node,
@@ -147,14 +146,15 @@ impl CompiledCGPattern {
147
146
  get_all_matches_for_regex(node, source_code, regex, recursive, replace_node)
148
147
  }
149
148
  CompiledCGPattern::M(concrete_syntax) => {
150
- let matches = get_all_matches_for_concrete_syntax(
149
+ let source_code_ref = source_code.as_bytes();
150
+ let resolved_syntax = concrete_syntax.clone().resolve().unwrap();
151
+ get_all_matches_for_concrete_syntax(
151
152
  node,
152
- code_str,
153
- concrete_syntax,
153
+ source_code_ref,
154
+ &resolved_syntax,
154
155
  recursive,
155
156
  replace_node,
156
- );
157
- matches.0
157
+ )
158
158
  }
159
159
  }
160
160
  }
@@ -0,0 +1,22 @@
1
+ // Concrete Syntax Grammar for Piranha
2
+ concrete_syntax = { SOI ~ pattern ~ EOI }
3
+ pattern = { (element)+ ~ ("|>" ~ constraints)? }
4
+
5
+ // An element is either a capture or literal text
6
+ element = _{ capture | literal_text }
7
+
8
+ // Captures: :[name], :[name+], :[name*], @name
9
+ capture = { (":[" ~ identifier ~ capture_mode? ~ "]") | "@"~identifier } // FIXME: Should remove @ from the grammar, because literals may be parsed incorrectly
10
+ capture_mode = { "+" | "*" }
11
+ identifier = { (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
12
+
13
+ // Literal text - single word/token without whitespace
14
+ literal_text = { (!( ":[" | "|>" | "@" ) ~ ANY)+ }
15
+ WHITESPACE = _{ (" " | "\t" | "\r" | "\n")+ }
16
+
17
+ // Where constraints (extensible for future constraint types)
18
+ constraints = { constraint ~ ("," ~ constraint)* }
19
+ constraint = { in_constraint }
20
+ in_constraint = { capture ~ "in" ~ "[" ~ list_items? ~ "]" }
21
+ list_items = { quoted_string ~ ("," ~ quoted_string)* }
22
+ quoted_string = { "\"" ~ ( "\\\\" | "\\\"" | (!"\"" ~ ANY) )* ~ "\"" }
@@ -0,0 +1,29 @@
1
+ /*
2
+ Copyright (c) 2023 Uber Technologies, Inc.
3
+
4
+ <p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
5
+ except in compliance with the License. You may obtain a copy of the License at
6
+ <p>http://www.apache.org/licenses/LICENSE-2.0
7
+
8
+ <p>Unless required by applicable law or agreed to in writing, software distributed under the
9
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
10
+ express or implied. See the License for the specific language governing permissions and
11
+ limitations under the License.
12
+ */
13
+
14
+ use crate::models::concrete_syntax::parser::CsConstraint;
15
+ use crate::models::concrete_syntax::types::CapturedNode;
16
+
17
+ /// Check if a captured node satisfies a single constraint
18
+ pub(crate) fn check_constraint(node: &CapturedNode, ctr: &CsConstraint) -> bool {
19
+ match ctr {
20
+ CsConstraint::In { items, .. } => items.contains(&node.text.to_string()),
21
+ }
22
+ }
23
+
24
+ /// Check if a captured node satisfies all constraints
25
+ pub(crate) fn satisfies_constraints(node: &CapturedNode, constraints: &[CsConstraint]) -> bool {
26
+ constraints
27
+ .iter()
28
+ .all(|constraint| check_constraint(node, constraint))
29
+ }
@@ -0,0 +1,66 @@
1
+ /*
2
+ Copyright (c) 2023 Uber Technologies, Inc.
3
+
4
+ <p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
5
+ except in compliance with the License. You may obtain a copy of the License at
6
+ <p>http://www.apache.org/licenses/LICENSE-2.0
7
+
8
+ <p>Unless required by applicable law or agreed to in writing, software distributed under the
9
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
10
+ express or implied. See the License for the specific language governing permissions and
11
+ limitations under the License.
12
+ */
13
+
14
+ use crate::models::matches::Range;
15
+ use tree_sitter::{Node, TreeCursor};
16
+
17
+ /// Cursor navigation utilities for tree traversal during pattern matching.
18
+ pub struct CursorNavigator;
19
+
20
+ impl CursorNavigator {
21
+ /// Advances the cursor to the next sibling, or if no sibling exists,
22
+ /// moves up to the parent and tries to find the next sibling at that level.
23
+ pub fn find_next_sibling_or_ancestor_sibling(cursor: &mut TreeCursor) -> bool {
24
+ while !cursor.goto_next_sibling() {
25
+ if !cursor.goto_parent() {
26
+ return false;
27
+ }
28
+ }
29
+ true
30
+ }
31
+
32
+ /// Skips over comment nodes by advancing the cursor to the next non-comment sibling.
33
+ /// If the current node is not a comment, the cursor position remains unchanged.
34
+ pub fn skip_comment_nodes(cursor: &mut TreeCursor) {
35
+ let mut node = cursor.node();
36
+ while node.kind().contains("comment") && cursor.goto_next_sibling() {
37
+ node = cursor.node();
38
+ }
39
+ }
40
+
41
+ /// Finds the index of a target node among its parent's children.
42
+ pub fn find_child_index(target_node: &Node, parent_node: &Node) -> Option<usize> {
43
+ parent_node
44
+ .children(&mut parent_node.walk())
45
+ .enumerate()
46
+ .find(|&(_i, child)| child == *target_node)
47
+ .map(|(i, _child)| i)
48
+ }
49
+
50
+ /// Creates a range that spans from the start of the first node to the end of the second node.
51
+ pub fn span_node_ranges(first_node: &Node, last_node: &Node) -> Range {
52
+ Range::span_ranges(first_node.range(), last_node.range())
53
+ }
54
+
55
+ /// Extracts text from source code within the specified byte range.
56
+ pub fn get_text_from_range(start_byte: usize, end_byte: usize, source_code: &[u8]) -> String {
57
+ let text_slice = &source_code[start_byte..end_byte];
58
+ String::from_utf8_lossy(text_slice).to_string()
59
+ }
60
+ }
61
+
62
+ #[cfg(test)]
63
+ mod tests {
64
+
65
+ // Tests will be added as we migrate functionality
66
+ }
@@ -0,0 +1,448 @@
1
+ /*
2
+ Copyright (c) 2023 Uber Technologies, Inc.
3
+
4
+ <p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
5
+ except in compliance with the License. You may obtain a copy of the License at
6
+ <p>http://www.apache.org/licenses/LICENSE-2.0
7
+
8
+ <p>Unless required by applicable law or agreed to in writing, software distributed under the
9
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
10
+ express or implied. See the License for the specific language governing permissions and
11
+ limitations under the License.
12
+ */
13
+
14
+ use crate::models::matches::Range;
15
+
16
+ use crate::models::concrete_syntax::constraint_checker::satisfies_constraints;
17
+ use crate::models::concrete_syntax::cursor_utils::CursorNavigator;
18
+ use crate::models::concrete_syntax::parser::CaptureMode;
19
+ use crate::models::concrete_syntax::parser::CsConstraint;
20
+ use crate::models::concrete_syntax::resolver::{ResolvedConcreteSyntax, ResolvedCsElement};
21
+ use crate::models::concrete_syntax::types::{CapturedNode, MatchingContext, PatternMatchResult};
22
+ use crate::models::matches::Match;
23
+ use std::collections::HashMap;
24
+ use tree_sitter::{Node, TreeCursor};
25
+ use tree_sitter_traversal::Cursor;
26
+
27
+ // =============================================================================
28
+ // PUBLIC API
29
+ // =============================================================================
30
+
31
+ pub(crate) fn get_all_matches_for_concrete_syntax(
32
+ node: &Node, source_code_ref: &[u8], cs: &ResolvedConcreteSyntax, recursive: bool,
33
+ replace_node: Option<String>,
34
+ ) -> Vec<Match> {
35
+ let mut matches: Vec<Match> = Vec::new();
36
+
37
+ if let PatternMatchResult::Success {
38
+ captures: mut mapping,
39
+ consumed_nodes: _last_node_index,
40
+ range: Some(range),
41
+ } = match_sequential_siblings(&mut node.walk(), source_code_ref, &cs.pattern.sequence)
42
+ {
43
+ // If the user doesn't specify a replace node, we use '*' as the default (everything)
44
+ let replace_node_key = replace_node.clone().unwrap_or("*".to_string());
45
+
46
+ if replace_node_key == "*" {
47
+ let replace_node_match = CapturedNode {
48
+ range,
49
+ text: CursorNavigator::get_text_from_range(
50
+ range.start_byte,
51
+ range.end_byte,
52
+ source_code_ref,
53
+ ),
54
+ };
55
+ mapping.insert("*".to_string(), replace_node_match);
56
+ }
57
+
58
+ let match_item = create_match_from_capture(&replace_node_key, mapping, range);
59
+ matches.push(match_item);
60
+ }
61
+
62
+ if recursive {
63
+ let mut cursor = node.walk();
64
+ for child in node.children(&mut cursor) {
65
+ let mut inner_matches = get_all_matches_for_concrete_syntax(
66
+ &child,
67
+ source_code_ref,
68
+ cs,
69
+ recursive,
70
+ replace_node.clone(),
71
+ );
72
+ matches.append(&mut inner_matches)
73
+ }
74
+ }
75
+ matches
76
+ }
77
+
78
+ // =============================================================================
79
+ // CORE MATCHING ENGINE
80
+ // =============================================================================
81
+
82
+ /// Attempts to match a pattern against sequential sibling nodes in an AST.
83
+ ///
84
+ /// ## How it works:
85
+ /// Given a function body with multiple statements, try to match a pattern starting
86
+ /// at each statement position until we find a match.
87
+ ///
88
+ /// ```
89
+ /// Pattern: int :[x] = 2; :[x]++;
90
+ ///
91
+ /// Function Body AST:
92
+ /// ├── [0] char a = 'c' ; ← Start matching here: ❌ No match
93
+ /// ├── [1] a++; ← Start matching here: ❌ No match
94
+ /// ├── [2] int b = 2; ← Start matching here: ✅ MATCH! (spans [2] and [3]) -> Returns
95
+ /// ├── [3] b++; ←
96
+ /// ├── [4] return a; ←
97
+ /// └── [5] } ←
98
+ /// ```
99
+ ///
100
+ /// The algorithm slides a "matching window" across all possible starting positions,
101
+ /// trying to match consecutive siblings against the pattern elements.
102
+ fn match_sequential_siblings(
103
+ cursor: &mut TreeCursor, source_code_ref: &[u8], cs_elements: &[ResolvedCsElement],
104
+ ) -> PatternMatchResult {
105
+ let parent_node = cursor.node();
106
+ let mut child_seq_match_start = 0;
107
+ if cursor.goto_first_child() {
108
+ // Iterate through siblings to find a match
109
+ loop {
110
+ // Clone the cursor in order to attempt matching the sequence starting at cursor.node
111
+ // Cloning here is necessary other we won't be able to advance to the next sibling if the matching fails
112
+ let result = {
113
+ match_cs_pattern(
114
+ &mut MatchingContext {
115
+ cursor: cursor.clone(),
116
+ source_code: source_code_ref,
117
+ top_node: &parent_node,
118
+ },
119
+ cs_elements,
120
+ true,
121
+ )
122
+ };
123
+
124
+ // If we got a successful match, extract the mapping and index
125
+ if let PatternMatchResult::Success {
126
+ captures: mapping,
127
+ consumed_nodes: last_node_index,
128
+ range: None,
129
+ } = result
130
+ {
131
+ // Determine the last matched node. Remember, we are matching subsequences of children [n ... k]
132
+ let last_node = parent_node.child(last_node_index);
133
+ let range = Range::span_ranges(cursor.node().range(), last_node.unwrap().range());
134
+ if last_node_index != child_seq_match_start || parent_node.child_count() == 1 {
135
+ return PatternMatchResult::Success {
136
+ captures: mapping,
137
+ consumed_nodes: last_node_index,
138
+ range: Some(range),
139
+ };
140
+ }
141
+ // This is to prevent double matches when unrolling a node. i.e., matching the statement in a function body,
142
+ // as well as the statement itself when unrolled.
143
+ return PatternMatchResult::failed();
144
+ }
145
+
146
+ child_seq_match_start += 1;
147
+ if !cursor.goto_next_sibling() {
148
+ break;
149
+ }
150
+ }
151
+ } // Not currently handing matching of leaf nodes. Current semantics would never match it anyway.
152
+ PatternMatchResult::failed()
153
+ }
154
+
155
+ /// Top-level entry point for concrete-syntax matching.
156
+ ///
157
+ /// Dispatches each element of the resolved pattern—capture groups or literal text—against the AST
158
+ /// at the current cursor position, recursing until the pattern is fully applied.
159
+ pub(crate) fn match_cs_pattern(
160
+ ctx: &mut MatchingContext<'_>, cs_elements: &[ResolvedCsElement], can_continue: bool,
161
+ ) -> PatternMatchResult {
162
+ // Check if we finished matching! In case, we haven't `check_match_completion` returns None
163
+ if let Some(result) = check_match_completion(ctx, cs_elements, can_continue) {
164
+ return result;
165
+ }
166
+
167
+ // Skip comment nodes always
168
+ CursorNavigator::skip_comment_nodes(&mut ctx.cursor);
169
+
170
+ // Get the first element and remaining elements
171
+ let (first, rest) = cs_elements.split_first().unwrap();
172
+
173
+ match first {
174
+ ResolvedCsElement::Capture {
175
+ name,
176
+ mode,
177
+ constraints,
178
+ } => match mode {
179
+ CaptureMode::Single => match_single_capture(ctx, name, constraints, rest),
180
+ CaptureMode::OnePlus => match_one_plus_capture(ctx, name, constraints, rest),
181
+ CaptureMode::ZeroPlus => match_zero_plus_capture(ctx, name, constraints, rest),
182
+ },
183
+ ResolvedCsElement::Literal(literal_text) => match_literal(ctx, literal_text, rest),
184
+ }
185
+ }
186
+
187
+ fn match_literal(
188
+ ctx: &mut MatchingContext<'_>, literal_text: &str, remaining_elements: &[ResolvedCsElement],
189
+ ) -> PatternMatchResult {
190
+ // We match literals against leaves
191
+ while ctx.cursor.node().child_count() != 0 {
192
+ ctx.cursor.goto_first_child();
193
+ }
194
+
195
+ let node_code = ctx.cursor.node().utf8_text(ctx.source_code).unwrap().trim();
196
+ if literal_text.starts_with(node_code) && !node_code.is_empty() {
197
+ let advance_by = node_code.len();
198
+ // Can only advance if there is still enough chars to consume
199
+ if advance_by > literal_text.len() {
200
+ return PatternMatchResult::failed();
201
+ }
202
+
203
+ let should_match = CursorNavigator::find_next_sibling_or_ancestor_sibling(&mut ctx.cursor);
204
+
205
+ // If we consumed the entire literal, continue with remaining elements
206
+ return if advance_by == literal_text.len() {
207
+ match_cs_pattern(ctx, remaining_elements, should_match)
208
+ } else {
209
+ // If we only consumed part of the literal, create a new literal with the remaining text
210
+ let remaining_literal = &literal_text[advance_by..];
211
+ let mut new_elements = vec![ResolvedCsElement::Literal(remaining_literal.to_string())];
212
+ new_elements.extend_from_slice(remaining_elements);
213
+ match_cs_pattern(ctx, &new_elements, should_match)
214
+ };
215
+ }
216
+ PatternMatchResult::failed()
217
+ }
218
+
219
+ // =============================================================================
220
+ // CAPTURE MODE HANDLERS
221
+ // =============================================================================
222
+
223
+ /// Handle single capture: :[var] - must match exactly one node
224
+ fn match_single_capture(
225
+ ctx: &mut MatchingContext<'_>, var_name: &str, constraints: &[CsConstraint],
226
+ remaining_pattern: &[ResolvedCsElement],
227
+ ) -> PatternMatchResult {
228
+ match_at_all_tree_levels(ctx, var_name, constraints, remaining_pattern, false)
229
+ }
230
+
231
+ /// Handle one-plus capture: :[var+] - must match one or more nodes
232
+ fn match_one_plus_capture(
233
+ ctx: &mut MatchingContext<'_>, var_name: &str, constraints: &[CsConstraint],
234
+ remaining_pattern: &[ResolvedCsElement],
235
+ ) -> PatternMatchResult {
236
+ match_at_all_tree_levels(ctx, var_name, constraints, remaining_pattern, true)
237
+ }
238
+
239
+ /// Handle zero-plus capture: :[var*] - can match zero or more nodes
240
+ fn match_zero_plus_capture(
241
+ ctx: &mut MatchingContext<'_>, var_name: &str, constraints: &[CsConstraint],
242
+ remaining_pattern: &[ResolvedCsElement],
243
+ ) -> PatternMatchResult {
244
+ // First try to match with zero nodes
245
+ let mut zero_match_ctx = MatchingContext {
246
+ cursor: ctx.cursor.clone(),
247
+ source_code: ctx.source_code,
248
+ top_node: ctx.top_node,
249
+ };
250
+ let empty_capture = create_empty_captured_node();
251
+ if satisfies_constraints(&empty_capture, constraints) {
252
+ let zero_match_result = match_cs_pattern(&mut zero_match_ctx, remaining_pattern, true);
253
+ if let PatternMatchResult::Success {
254
+ captures: mut zero_captures,
255
+ consumed_nodes: last_matched_node_idx,
256
+ range: None,
257
+ } = zero_match_result
258
+ {
259
+ zero_captures.insert(var_name.to_string(), empty_capture);
260
+ return PatternMatchResult::success(zero_captures, last_matched_node_idx);
261
+ }
262
+ }
263
+
264
+ // If zero nodes didn't work, try one or more nodes
265
+ match_at_all_tree_levels(ctx, var_name, constraints, remaining_pattern, true)
266
+ }
267
+
268
+ /// Attempts to match a capture by exploring different levels of the AST hierarchy.
269
+ /// Starting from the current node, it tries to match progressively smaller subtrees
270
+ /// by traversing down to child nodes. This allows flexible matching where a capture
271
+ /// can match a large statement block or just a single expression within it.
272
+ ///
273
+ /// For example, given: `int x = 1; x++;`
274
+ /// The capture `:[x]` could match:
275
+ /// - The entire statement block: `int x = 1; x++;`
276
+ /// - Just the declaration: `int x = 1;`
277
+ /// - Just the type: `int`
278
+ fn match_at_all_tree_levels(
279
+ ctx: &mut MatchingContext<'_>, var_name: &str, constraints: &[CsConstraint],
280
+ remaining_pattern: &[ResolvedCsElement], allow_horizontal_expansion: bool,
281
+ ) -> PatternMatchResult {
282
+ // Try matching at different tree levels, going deeper each iteration
283
+ loop {
284
+ let result = try_match_node_range(
285
+ ctx,
286
+ var_name,
287
+ constraints,
288
+ remaining_pattern,
289
+ allow_horizontal_expansion,
290
+ );
291
+ if let PatternMatchResult::Success { .. } = result {
292
+ return result;
293
+ }
294
+
295
+ // Move one level down to try matching against smaller/deeper nodes
296
+ if !ctx.cursor.goto_first_child() {
297
+ break;
298
+ }
299
+ }
300
+ PatternMatchResult::failed()
301
+ }
302
+
303
+ /// Try to match a range of nodes starting at the current cursor position, expanding the range if needed and allowed
304
+ /// It assigns [range_start, range_end] to a capture group, and matches the rest of the cs pattern against remaining nodes
305
+ ///
306
+ /// Per‐iteration diagram:
307
+ ///
308
+ /// AST siblings: … ─ node₀ ─ node₁ ─ node₂ ─ node₃ ─ node₄ ─ …
309
+ /// ↑ ↑
310
+ /// range_start range_end
311
+ ///
312
+ /// 1) capture = text(node₁…node₂)
313
+ /// 2) match_cs_pattern(
314
+ /// remaining_elements, // CS elements still to match
315
+ /// /* should_match */ true if node₃ exists
316
+ /// ) starting at node₃, node₄, …
317
+ fn try_match_node_range(
318
+ ctx: &mut MatchingContext<'_>, var_name: &str, constraints: &[CsConstraint],
319
+ remaining_pattern: &[ResolvedCsElement], allow_horizontal_expansion: bool,
320
+ ) -> PatternMatchResult {
321
+ // 1. Initial anchors
322
+ let range_start = ctx.cursor.node();
323
+ let mut range_end = range_start;
324
+ let mut next_cursor = ctx.cursor.clone();
325
+ // 'should_match' is used to check whether there are siblings or ancestor siblings we need to match
326
+ // after we assign the current range to the capture node
327
+ let mut should_match = CursorNavigator::find_next_sibling_or_ancestor_sibling(&mut next_cursor);
328
+ let mut is_last = false;
329
+
330
+ // Helper to slice out the captured text
331
+ let make_capture = |end: &Node| CapturedNode {
332
+ range: Range::span_ranges(range_start.range(), end.range()),
333
+ text: CursorNavigator::get_text_from_range(
334
+ range_start.range().start_byte,
335
+ end.range().end_byte,
336
+ ctx.source_code,
337
+ ),
338
+ };
339
+
340
+ loop {
341
+ // ——— 1) try current [start…end] slice ———
342
+ let captured = make_capture(&range_end);
343
+ if satisfies_constraints(&captured, constraints) {
344
+ let mut sub_ctx = MatchingContext {
345
+ cursor: next_cursor.clone(),
346
+ source_code: ctx.source_code,
347
+ top_node: ctx.top_node,
348
+ };
349
+ if let PatternMatchResult::Success {
350
+ mut captures,
351
+ consumed_nodes,
352
+ range: _,
353
+ } = match_cs_pattern(&mut sub_ctx, remaining_pattern, should_match)
354
+ {
355
+ // conflict check
356
+ if let Some(prev) = captures.get(var_name) {
357
+ if prev.text.trim() != captured.text.trim() {
358
+ return PatternMatchResult::failed();
359
+ }
360
+ }
361
+ captures.insert(var_name.to_owned(), captured);
362
+ return PatternMatchResult::success(captures, consumed_nodes);
363
+ }
364
+ }
365
+
366
+ // ——— 2) need to expand slice? ———
367
+ if !allow_horizontal_expansion || is_last {
368
+ return PatternMatchResult::failed();
369
+ }
370
+
371
+ // grow the range to include the node under next_cursor
372
+ range_end = next_cursor.node();
373
+ is_last = !next_cursor.goto_next_sibling();
374
+ if is_last {
375
+ // once we’re at the end of the range, check if there is any ancestor left to match
376
+ should_match = CursorNavigator::find_next_sibling_or_ancestor_sibling(&mut next_cursor);
377
+ }
378
+ }
379
+ }
380
+
381
+ // =============================================================================
382
+ // HELPER FUNCTIONS
383
+ // =============================================================================
384
+
385
+ /// Helper function to create a match from captured nodes
386
+ fn create_match_from_capture(
387
+ replace_node_key: &str, match_map: HashMap<String, CapturedNode>, _range: Range,
388
+ ) -> Match {
389
+ let replace_node_match = match_map.get(replace_node_key).cloned().unwrap_or_else(|| {
390
+ panic!("The tag {replace_node_key} provided in the replace node is not present")
391
+ });
392
+
393
+ Match {
394
+ matched_string: replace_node_match.text,
395
+ range: replace_node_match.range,
396
+ matches: match_map.into_iter().map(|(k, v)| (k, v.text)).collect(),
397
+ associated_comma: None,
398
+ associated_comments: Vec::new(),
399
+ associated_leading_empty_lines: Vec::new(),
400
+ }
401
+ }
402
+
403
+ /// Handle the case where pattern is empty or nodes are exhausted
404
+ fn check_match_completion(
405
+ ctx: &mut MatchingContext<'_>, cs_elements: &[ResolvedCsElement], can_continue: bool,
406
+ ) -> Option<PatternMatchResult> {
407
+ if cs_elements.is_empty() {
408
+ if !can_continue {
409
+ return Some(PatternMatchResult::success(
410
+ HashMap::new(),
411
+ ctx.top_node.child_count() - 1,
412
+ ));
413
+ }
414
+ let index = find_last_matched_node(&mut ctx.cursor, ctx.top_node);
415
+ return match index {
416
+ Some(consumed_nodes) => Some(PatternMatchResult::success(HashMap::new(), consumed_nodes)),
417
+ None => Some(PatternMatchResult::failed()),
418
+ };
419
+ } else if !can_continue {
420
+ return Some(PatternMatchResult::failed());
421
+ }
422
+ None
423
+ }
424
+
425
+ /// Finds the index of the last matched node relative to the top level node.
426
+ /// Returns the index of the child node where matching concluded, or 0 if the cursor
427
+ /// is not positioned on a child of the parent node.
428
+ fn find_last_matched_node(cursor: &mut TreeCursor, parent_node: &Node) -> Option<usize> {
429
+ CursorNavigator::find_child_index(&cursor.node(), parent_node)
430
+ .map(|i| if i > 0 { i - 1 } else { 0 })
431
+ }
432
+
433
+ /// Create an empty captured node for zero-match patterns
434
+ fn create_empty_captured_node() -> CapturedNode {
435
+ CapturedNode {
436
+ range: Range {
437
+ start_byte: 0,
438
+ end_byte: 0,
439
+ start_point: crate::models::matches::Point { row: 0, column: 0 },
440
+ end_point: crate::models::matches::Point { row: 0, column: 0 },
441
+ },
442
+ text: String::new(),
443
+ }
444
+ }
445
+
446
+ #[cfg(test)]
447
+ #[path = "unit_tests/interpreter_test.rs"]
448
+ mod interpreter_test;
@@ -11,5 +11,9 @@
11
11
  limitations under the License.
12
12
  */
13
13
 
14
+ pub(crate) mod constraint_checker;
15
+ pub(crate) mod cursor_utils;
14
16
  pub(crate) mod interpreter;
15
17
  pub(crate) mod parser;
18
+ pub(crate) mod resolver;
19
+ pub(crate) mod types;