ruby_tree_sitter 0.20.8.3 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +2 -1
  3. data/README.md +32 -18
  4. data/ext/tree_sitter/extconf.rb +1 -1
  5. data/ext/tree_sitter/input.c +1 -0
  6. data/ext/tree_sitter/language.c +131 -46
  7. data/ext/tree_sitter/logger.c +28 -12
  8. data/ext/tree_sitter/node.c +438 -130
  9. data/ext/tree_sitter/parser.c +232 -37
  10. data/ext/tree_sitter/query.c +197 -72
  11. data/ext/tree_sitter/query_cursor.c +140 -28
  12. data/ext/tree_sitter/repo.rb +1 -1
  13. data/ext/tree_sitter/tree.c +118 -34
  14. data/ext/tree_sitter/tree_cursor.c +205 -33
  15. data/ext/tree_sitter/tree_sitter.c +12 -0
  16. data/lib/tree_sitter/node.rb +23 -6
  17. data/lib/tree_sitter/version.rb +4 -2
  18. data/lib/tree_sitter.rb +1 -0
  19. data/lib/tree_stand/ast_modifier.rb +30 -0
  20. data/lib/tree_stand/breadth_first_visitor.rb +54 -0
  21. data/lib/tree_stand/config.rb +13 -0
  22. data/lib/tree_stand/node.rb +224 -0
  23. data/lib/tree_stand/parser.rb +67 -0
  24. data/lib/tree_stand/range.rb +55 -0
  25. data/lib/tree_stand/tree.rb +123 -0
  26. data/lib/tree_stand/utils/printer.rb +73 -0
  27. data/lib/tree_stand/version.rb +7 -0
  28. data/lib/tree_stand/visitor.rb +127 -0
  29. data/lib/tree_stand/visitors/tree_walker.rb +37 -0
  30. data/lib/tree_stand.rb +48 -0
  31. data/tree_sitter.gemspec +14 -11
  32. metadata +37 -108
  33. data/test/README.md +0 -15
  34. data/test/test_helper.rb +0 -9
  35. data/test/tree_sitter/js_test.rb +0 -48
  36. data/test/tree_sitter/language_test.rb +0 -73
  37. data/test/tree_sitter/logger_test.rb +0 -70
  38. data/test/tree_sitter/node_test.rb +0 -411
  39. data/test/tree_sitter/parser_test.rb +0 -140
  40. data/test/tree_sitter/query_test.rb +0 -153
  41. data/test/tree_sitter/tree_cursor_test.rb +0 -83
  42. data/test/tree_sitter/tree_test.rb +0 -51
@@ -16,9 +16,39 @@ static VALUE query_cursor_allocate(VALUE klass) {
16
16
  return res;
17
17
  }
18
18
  DATA_UNWRAP(query_cursor)
19
+ /**
20
+ * Create a new cursor for executing a given query.
21
+ *
22
+ * The cursor stores the state that is needed to iteratively search
23
+ * for matches. To use the query cursor, first call {QueryCursor#exec}
24
+ * to start running a given query on a given syntax node. Then, there are
25
+ * two options for consuming the results of the query:
26
+ * 1. Repeatedly call {QueryCursor#next_match} to iterate over all of the
27
+ * *matches* in the order that they were found. Each match contains the
28
+ * index of the pattern that matched, and an array of captures. Because
29
+ * multiple patterns can match the same set of nodes, one match may contain
30
+ * captures that appear *before* some of the captures from a previous match.
31
+ * 2. Repeatedly call {QueryCursor#next_capture} to iterate over all of the
32
+ * individual *captures* in the order that they appear. This is useful if
33
+ * don't care about which pattern matched, and just want a single ordered
34
+ * sequence of captures.
35
+ *
36
+ * If you don't care about consuming all of the results, you can stop calling
37
+ * {QueryCursor#next_match} or {QueryCursor#next_capture} at any point.
38
+ * You can then start executing another query on another node by calling
39
+ * {QueryCursor#exec} again.
40
+ */
19
41
  DATA_PTR_NEW(cQueryCursor, TSQueryCursor, query_cursor)
20
42
  DATA_FROM_VALUE(TSQueryCursor *, query_cursor)
21
43
 
44
+ /**
45
+ * Start running a given query on a given node.
46
+ *
47
+ * @param query [Query]
48
+ * @param node [Node]
49
+ *
50
+ * @return [QueryCursor]
51
+ */
22
52
  static VALUE query_cursor_exec(VALUE self, VALUE query, VALUE node) {
23
53
  VALUE res = query_cursor_allocate(cQueryCursor);
24
54
  query_cursor_t *query_cursor = unwrap(res);
@@ -27,49 +57,78 @@ static VALUE query_cursor_exec(VALUE self, VALUE query, VALUE node) {
27
57
  return res;
28
58
  }
29
59
 
60
+ /**
61
+ * Manage the maximum number of in-progress matches allowed by this query
62
+ * cursor.
63
+ *
64
+ * Query cursors have an optional maximum capacity for storing lists of
65
+ * in-progress captures. If this capacity is exceeded, then the
66
+ * earliest-starting match will silently be dropped to make room for further
67
+ * matches. This maximum capacity is optional — by default, query cursors allow
68
+ * any number of pending matches, dynamically allocating new space for them as
69
+ * needed as the query is executed.
70
+ */
30
71
  static VALUE query_cursor_did_exceed_match_limit(VALUE self) {
31
72
  return ts_query_cursor_did_exceed_match_limit(SELF) ? Qtrue : Qfalse;
32
73
  }
33
74
 
34
- static VALUE query_cursor_get_match_limit(VALUE self) {
35
- return UINT2NUM(ts_query_cursor_match_limit(SELF));
36
- }
37
-
75
+ /**
76
+ * @param limit [Integer]
77
+ *
78
+ * @return [nil]
79
+ */
38
80
  static VALUE query_cursor_set_match_limit(VALUE self, VALUE limit) {
39
81
  ts_query_cursor_set_match_limit(SELF, NUM2UINT(limit));
40
82
  return Qnil;
41
83
  }
42
84
 
43
- static VALUE query_cursor_set_byte_range(VALUE self, VALUE from, VALUE to) {
44
- ts_query_cursor_set_byte_range(SELF, NUM2UINT(from), NUM2UINT(to));
45
- return Qnil;
46
- }
47
-
48
- static VALUE query_cursor_set_point_range(VALUE self, VALUE from, VALUE to) {
49
- ts_query_cursor_set_point_range(SELF, value_to_point(from),
50
- value_to_point(to));
51
- return Qnil;
85
+ /**
86
+ * @return [Integer]
87
+ */
88
+ static VALUE query_cursor_get_match_limit(VALUE self) {
89
+ return UINT2NUM(ts_query_cursor_match_limit(SELF));
52
90
  }
53
91
 
54
- static VALUE query_cursor_next_match(VALUE self) {
55
- TSQueryMatch match;
56
- if (ts_query_cursor_next_match(SELF, &match)) {
57
- return new_query_match(&match);
58
- } else {
59
- return Qnil;
92
+ /**
93
+ * Set the maximum start depth for a query cursor.
94
+ *
95
+ * This prevents cursors from exploring children nodes at a certain depth.
96
+ * Note if a pattern includes many children, then they will still be checked.
97
+ *
98
+ * The zero max start depth value can be used as a special behavior and
99
+ * it helps to destructure a subtree by staying on a node and using captures
100
+ * for interested parts. Note that the zero max start depth only limit a search
101
+ * depth for a pattern's root node but other nodes that are parts of the pattern
102
+ * may be searched at any depth what defined by the pattern structure.
103
+ *
104
+ * @param max_start_depth [Integer|nil] set to nil to remove the maximum start
105
+ * depth.
106
+ *
107
+ * @return [nil]
108
+ */
109
+ static VALUE query_cursor_set_max_start_depth(VALUE self,
110
+ VALUE max_start_depth) {
111
+ uint32_t max = UINT32_MAX;
112
+ if (!NIL_P(max_start_depth)) {
113
+ max = NUM2UINT(max_start_depth);
60
114
  }
61
- }
62
-
63
- static VALUE query_cursor_remove_match(VALUE self, VALUE id) {
64
- ts_query_cursor_remove_match(SELF, NUM2UINT(id));
115
+ ts_query_cursor_set_max_start_depth(SELF, max);
65
116
  return Qnil;
66
117
  }
67
118
 
68
- // NOTE: maybe this is the limit of how "transparent" the bindings need to be.
119
+ // FIXME: maybe this is the limit of how "transparent" the bindings need to be.
69
120
  // Pending benchmarks, this can be very inefficient because obviously
70
121
  // ts_query_cursor_next_capture is intended to be used in a loop. Creating an
71
122
  // array of two values and returning them, intuitively speaking, seem very
72
123
  // inefficient.
124
+ // FIXME: maybe this needs to return an empty array to make for a nicer ruby
125
+ // API?
126
+ /**
127
+ * Advance to the next capture of the currently running query.
128
+ *
129
+ * @return [Array<Integer|Boolean>|nil] If there is a capture, return a tuple
130
+ * [Integer, Boolean], otherwise return +nil+.
131
+ */
73
132
  static VALUE query_cursor_next_capture(VALUE self) {
74
133
  TSQueryMatch match;
75
134
  uint32_t index;
@@ -83,21 +142,74 @@ static VALUE query_cursor_next_capture(VALUE self) {
83
142
  }
84
143
  }
85
144
 
145
+ /**
146
+ * Advance to the next match of the currently running query.
147
+ *
148
+ * @return [Boolean] Whether there's a match.
149
+ */
150
+ static VALUE query_cursor_next_match(VALUE self) {
151
+ TSQueryMatch match;
152
+ if (ts_query_cursor_next_match(SELF, &match)) {
153
+ return new_query_match(&match);
154
+ } else {
155
+ return Qnil;
156
+ }
157
+ }
158
+
159
+ static VALUE query_cursor_remove_match(VALUE self, VALUE id) {
160
+ ts_query_cursor_remove_match(SELF, NUM2UINT(id));
161
+ return Qnil;
162
+ }
163
+
164
+ /**
165
+ * @param from [Integer]
166
+ * @param to [Integer]
167
+ *
168
+ * @return [nil]
169
+ */
170
+ static VALUE query_cursor_set_byte_range(VALUE self, VALUE from, VALUE to) {
171
+ ts_query_cursor_set_byte_range(SELF, NUM2UINT(from), NUM2UINT(to));
172
+ return Qnil;
173
+ }
174
+
175
+ /**
176
+ * @param from [Point]
177
+ * @param to [Point]
178
+ *
179
+ * @return [nil]
180
+ */
181
+ static VALUE query_cursor_set_point_range(VALUE self, VALUE from, VALUE to) {
182
+ ts_query_cursor_set_point_range(SELF, value_to_point(from),
183
+ value_to_point(to));
184
+ return Qnil;
185
+ }
186
+
86
187
  void init_query_cursor(void) {
87
188
  cQueryCursor = rb_define_class_under(mTreeSitter, "QueryCursor", rb_cObject);
88
189
 
89
190
  rb_define_alloc_func(cQueryCursor, query_cursor_allocate);
90
191
 
192
+ /* Module methods */
193
+ rb_define_module_function(cQueryCursor, "exec", query_cursor_exec, 2);
194
+
91
195
  /* Class methods */
196
+ // Accessors
92
197
  DECLARE_ACCESSOR(cQueryCursor, query_cursor, match_limit)
93
- rb_define_module_function(cQueryCursor, "exec", query_cursor_exec, 2);
198
+
199
+ // Other
94
200
  rb_define_method(cQueryCursor, "exceed_match_limit?",
95
201
  query_cursor_did_exceed_match_limit, 0);
202
+ rb_define_method(cQueryCursor, "match_limit", query_cursor_get_match_limit,
203
+ 0);
204
+ rb_define_method(cQueryCursor, "match_limit=", query_cursor_set_match_limit,
205
+ 1);
206
+ rb_define_method(cQueryCursor,
207
+ "max_start_depth=", query_cursor_set_max_start_depth, 1);
208
+ rb_define_method(cQueryCursor, "next_capture", query_cursor_next_capture, 0);
209
+ rb_define_method(cQueryCursor, "next_match", query_cursor_next_match, 0);
210
+ rb_define_method(cQueryCursor, "remove_match", query_cursor_remove_match, 1);
96
211
  rb_define_method(cQueryCursor, "set_byte_range", query_cursor_set_byte_range,
97
212
  2);
98
213
  rb_define_method(cQueryCursor, "set_point_range",
99
214
  query_cursor_set_point_range, 2);
100
- rb_define_method(cQueryCursor, "next_match", query_cursor_next_match, 0);
101
- rb_define_method(cQueryCursor, "remove_match", query_cursor_remove_match, 1);
102
- rb_define_method(cQueryCursor, "next_capture", query_cursor_next_capture, 0);
103
215
  }
@@ -17,7 +17,7 @@ module TreeSitter
17
17
  @url = {
18
18
  git: 'https://github.com/tree-sitter/tree-sitter',
19
19
  tar: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.tar.gz",
20
- zip: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.zip"
20
+ zip: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.zip",
21
21
  }
22
22
 
23
23
  @exe = {}
@@ -63,40 +63,115 @@ VALUE new_tree(TSTree *ptr) {
63
63
 
64
64
  DATA_FROM_VALUE(TSTree *, tree)
65
65
 
66
- static VALUE tree_copy(VALUE self) { return new_tree(ts_tree_copy(SELF)); }
66
+ /**
67
+ * Compare an old edited syntax tree to a new syntax tree representing the same
68
+ * document, returning an array of ranges whose syntactic structure has changed.
69
+ *
70
+ * For this to work correctly, the old syntax tree must have been edited such
71
+ * that its ranges match up to the new tree. Generally, you'll want to call
72
+ * this function right after calling one of the {Parser#parse} functions.
73
+ * You need to pass the old tree that was passed to parse, as well as the new
74
+ * tree that was returned from that function.
75
+ *
76
+ * @param old_tree [Tree]
77
+ * @param new_tree [Tree]
78
+ *
79
+ * @return [Array<Range>]
80
+ */
81
+ static VALUE tree_changed_ranges(VALUE _self, VALUE old_tree, VALUE new_tree) {
82
+ TSTree *old = unwrap(old_tree)->data;
83
+ TSTree *new = unwrap(new_tree)->data;
84
+ uint32_t length;
85
+ TSRange *ranges = ts_tree_get_changed_ranges(old, new, &length);
86
+ VALUE res = rb_ary_new_capa(length);
67
87
 
68
- static VALUE tree_root_node(VALUE self) {
69
- return new_node_by_val(ts_tree_root_node(SELF));
88
+ for (uint32_t i = 0; i < length; i++) {
89
+ rb_ary_push(res, new_range(&ranges[i]));
90
+ }
91
+
92
+ if (ranges) {
93
+ free(ranges);
94
+ }
95
+
96
+ return res;
70
97
  }
71
98
 
72
- static VALUE tree_language(VALUE self) {
73
- return new_language(ts_tree_language(SELF));
99
+ static VALUE tree_finalizer(VALUE _self) {
100
+ VALUE rc = rb_cv_get(cTree, "@@rc");
101
+ VALUE keys = rb_funcall(rc, rb_intern("keys"), 0);
102
+ long len = RARRAY_LEN(keys);
103
+
104
+ for (long i = 0; i < len; ++i) {
105
+ VALUE curr = RARRAY_AREF(keys, i);
106
+ unsigned int val = NUM2UINT(rb_hash_lookup(rc, curr));
107
+ if (val > 0) {
108
+ ts_tree_delete((TSTree *)NUM2ULONG(curr));
109
+ }
110
+
111
+ rb_hash_delete(rc, curr);
112
+ }
113
+
114
+ return Qnil;
74
115
  }
75
116
 
117
+ /**
118
+ * Create a shallow copy of the syntax tree. This is very fast.
119
+ *
120
+ * You need to copy a syntax tree in order to use it on more than one thread at
121
+ * a time, as syntax trees are not thread safe.
122
+ *
123
+ * @return [Tree]
124
+ */
125
+ static VALUE tree_copy(VALUE self) { return new_tree(ts_tree_copy(SELF)); }
126
+
127
+ /**
128
+ * Edit the syntax tree to keep it in sync with source code that has been
129
+ * edited.
130
+ *
131
+ * You must describe the edit both in terms of byte offsets and in terms of
132
+ * (row, column) coordinates.
133
+ *
134
+ * @param edit [InputEdit]
135
+ *
136
+ * @return [nil]
137
+ */
76
138
  static VALUE tree_edit(VALUE self, VALUE edit) {
77
139
  TSInputEdit in = value_to_input_edit(edit);
78
140
  ts_tree_edit(SELF, &in);
79
141
  return Qnil;
80
142
  }
81
143
 
82
- static VALUE tree_changed_ranges(VALUE _self, VALUE old_tree, VALUE new_tree) {
83
- TSTree *old = unwrap(old_tree)->data;
84
- TSTree *new = unwrap(new_tree)->data;
144
+ /**
145
+ * Get the array of included ranges that was used to parse the syntax tree.
146
+ *
147
+ * @return [Array<Range>]
148
+ */
149
+ static VALUE included_ranges(VALUE self) {
85
150
  uint32_t length;
86
- TSRange *ranges = ts_tree_get_changed_ranges(old, new, &length);
151
+ const TSRange *ranges = ts_tree_included_ranges(SELF, &length);
87
152
  VALUE res = rb_ary_new_capa(length);
88
-
89
153
  for (uint32_t i = 0; i < length; i++) {
90
154
  rb_ary_push(res, new_range(&ranges[i]));
91
155
  }
92
-
93
- if (ranges) {
94
- free(ranges);
95
- }
96
-
97
156
  return res;
98
157
  }
99
158
 
159
+ /**
160
+ * Get the language that was used to parse the syntax tree.
161
+ *
162
+ * @return [Language]
163
+ */
164
+ static VALUE tree_language(VALUE self) {
165
+ return new_language(ts_tree_language(SELF));
166
+ }
167
+
168
+ /**
169
+ * Write a DOT graph describing the syntax tree to the given file.
170
+ *
171
+ * @param file [String]
172
+ *
173
+ * @return [nil]
174
+ */
100
175
  static VALUE tree_print_dot_graph(VALUE self, VALUE file) {
101
176
  Check_Type(file, T_STRING);
102
177
  char *path = StringValueCStr(file);
@@ -112,22 +187,26 @@ static VALUE tree_print_dot_graph(VALUE self, VALUE file) {
112
187
  return Qnil;
113
188
  }
114
189
 
115
- static VALUE tree_finalizer(VALUE _self) {
116
- VALUE rc = rb_cv_get(cTree, "@@rc");
117
- VALUE keys = rb_funcall(rc, rb_intern("keys"), 0);
118
- long len = RARRAY_LEN(keys);
119
-
120
- for (long i = 0; i < len; ++i) {
121
- VALUE curr = RARRAY_AREF(keys, i);
122
- unsigned int val = NUM2UINT(rb_hash_lookup(rc, curr));
123
- if (val > 0) {
124
- ts_tree_delete((TSTree *)NUM2ULONG(curr));
125
- }
126
-
127
- rb_hash_delete(rc, curr);
128
- }
190
+ /**
191
+ * Get the root node of the syntax tree.
192
+ *
193
+ * @return [Node]
194
+ */
195
+ static VALUE tree_root_node(VALUE self) {
196
+ return new_node_by_val(ts_tree_root_node(SELF));
197
+ }
129
198
 
130
- return Qnil;
199
+ /**
200
+ * Get the root node of the syntax tree, but with its position
201
+ * shifted forward by the given offset.
202
+ *
203
+ * @return [Node]
204
+ */
205
+ static VALUE tree_root_node_with_offset(VALUE self, VALUE offset_bytes,
206
+ VALUE offset_extent) {
207
+ uint32_t bytes = NUM2UINT(offset_bytes);
208
+ TSPoint extent = value_to_point(offset_extent);
209
+ return new_node_by_val(ts_tree_root_node_with_offset(SELF, bytes, extent));
131
210
  }
132
211
 
133
212
  void init_tree(void) {
@@ -135,14 +214,19 @@ void init_tree(void) {
135
214
 
136
215
  rb_undef_alloc_func(cTree);
137
216
 
217
+ /* Module methods */
218
+ rb_define_module_function(cTree, "changed_ranges", tree_changed_ranges, 2);
219
+ rb_define_module_function(cTree, "finalizer", tree_finalizer, 0);
220
+
138
221
  /* Class methods */
139
222
  rb_define_method(cTree, "copy", tree_copy, 0);
140
- rb_define_method(cTree, "root_node", tree_root_node, 0);
141
- rb_define_method(cTree, "language", tree_language, 0);
142
223
  rb_define_method(cTree, "edit", tree_edit, 1);
143
- rb_define_module_function(cTree, "changed_ranges", tree_changed_ranges, 2);
224
+ rb_define_method(cTree, "included_ranges", included_ranges, 0);
225
+ rb_define_method(cTree, "language", tree_language, 0);
144
226
  rb_define_method(cTree, "print_dot_graph", tree_print_dot_graph, 1);
145
- rb_define_module_function(cTree, "finalizer", tree_finalizer, 0);
227
+ rb_define_method(cTree, "root_node", tree_root_node, 0);
228
+ rb_define_method(cTree, "root_node_with_offset", tree_root_node_with_offset,
229
+ 2);
146
230
 
147
231
  // Reference-count created trees
148
232
  VALUE rc = rb_hash_new();