ruby_tree_sitter 0.20.8.3-x86_64-linux → 1.1.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +2 -1
  3. data/README.md +32 -18
  4. data/ext/tree_sitter/extconf.rb +1 -1
  5. data/ext/tree_sitter/input.c +1 -0
  6. data/ext/tree_sitter/language.c +131 -46
  7. data/ext/tree_sitter/logger.c +28 -12
  8. data/ext/tree_sitter/node.c +438 -130
  9. data/ext/tree_sitter/parser.c +232 -37
  10. data/ext/tree_sitter/query.c +197 -72
  11. data/ext/tree_sitter/query_cursor.c +140 -28
  12. data/ext/tree_sitter/repo.rb +1 -1
  13. data/ext/tree_sitter/tree.c +118 -34
  14. data/ext/tree_sitter/tree_cursor.c +205 -33
  15. data/ext/tree_sitter/tree_sitter.c +12 -0
  16. data/lib/tree_sitter/node.rb +23 -6
  17. data/lib/tree_sitter/tree_sitter.so +0 -0
  18. data/lib/tree_sitter/version.rb +4 -2
  19. data/lib/tree_sitter.rb +1 -0
  20. data/lib/tree_stand/ast_modifier.rb +30 -0
  21. data/lib/tree_stand/breadth_first_visitor.rb +54 -0
  22. data/lib/tree_stand/config.rb +13 -0
  23. data/lib/tree_stand/node.rb +224 -0
  24. data/lib/tree_stand/parser.rb +67 -0
  25. data/lib/tree_stand/range.rb +55 -0
  26. data/lib/tree_stand/tree.rb +123 -0
  27. data/lib/tree_stand/utils/printer.rb +73 -0
  28. data/lib/tree_stand/version.rb +7 -0
  29. data/lib/tree_stand/visitor.rb +127 -0
  30. data/lib/tree_stand/visitors/tree_walker.rb +37 -0
  31. data/lib/tree_stand.rb +48 -0
  32. data/tree_sitter.gemspec +14 -11
  33. metadata +36 -107
  34. data/test/README.md +0 -15
  35. data/test/test_helper.rb +0 -9
  36. data/test/tree_sitter/js_test.rb +0 -48
  37. data/test/tree_sitter/language_test.rb +0 -73
  38. data/test/tree_sitter/logger_test.rb +0 -70
  39. data/test/tree_sitter/node_test.rb +0 -411
  40. data/test/tree_sitter/parser_test.rb +0 -140
  41. data/test/tree_sitter/query_test.rb +0 -153
  42. data/test/tree_sitter/tree_cursor_test.rb +0 -83
  43. data/test/tree_sitter/tree_test.rb +0 -51
@@ -16,9 +16,39 @@ static VALUE query_cursor_allocate(VALUE klass) {
16
16
  return res;
17
17
  }
18
18
  DATA_UNWRAP(query_cursor)
19
+ /**
20
+ * Create a new cursor for executing a given query.
21
+ *
22
+ * The cursor stores the state that is needed to iteratively search
23
+ * for matches. To use the query cursor, first call {QueryCursor#exec}
24
+ * to start running a given query on a given syntax node. Then, there are
25
+ * two options for consuming the results of the query:
26
+ * 1. Repeatedly call {QueryCursor#next_match} to iterate over all of the
27
+ * *matches* in the order that they were found. Each match contains the
28
+ * index of the pattern that matched, and an array of captures. Because
29
+ * multiple patterns can match the same set of nodes, one match may contain
30
+ * captures that appear *before* some of the captures from a previous match.
31
+ * 2. Repeatedly call {QueryCursor#next_capture} to iterate over all of the
32
+ * individual *captures* in the order that they appear. This is useful if
33
+ * don't care about which pattern matched, and just want a single ordered
34
+ * sequence of captures.
35
+ *
36
+ * If you don't care about consuming all of the results, you can stop calling
37
+ * {QueryCursor#next_match} or {QueryCursor#next_capture} at any point.
38
+ * You can then start executing another query on another node by calling
39
+ * {QueryCursor#exec} again.
40
+ */
19
41
  DATA_PTR_NEW(cQueryCursor, TSQueryCursor, query_cursor)
20
42
  DATA_FROM_VALUE(TSQueryCursor *, query_cursor)
21
43
 
44
+ /**
45
+ * Start running a given query on a given node.
46
+ *
47
+ * @param query [Query]
48
+ * @param node [Node]
49
+ *
50
+ * @return [QueryCursor]
51
+ */
22
52
  static VALUE query_cursor_exec(VALUE self, VALUE query, VALUE node) {
23
53
  VALUE res = query_cursor_allocate(cQueryCursor);
24
54
  query_cursor_t *query_cursor = unwrap(res);
@@ -27,49 +57,78 @@ static VALUE query_cursor_exec(VALUE self, VALUE query, VALUE node) {
27
57
  return res;
28
58
  }
29
59
 
60
+ /**
61
+ * Manage the maximum number of in-progress matches allowed by this query
62
+ * cursor.
63
+ *
64
+ * Query cursors have an optional maximum capacity for storing lists of
65
+ * in-progress captures. If this capacity is exceeded, then the
66
+ * earliest-starting match will silently be dropped to make room for further
67
+ * matches. This maximum capacity is optional — by default, query cursors allow
68
+ * any number of pending matches, dynamically allocating new space for them as
69
+ * needed as the query is executed.
70
+ */
30
71
  static VALUE query_cursor_did_exceed_match_limit(VALUE self) {
31
72
  return ts_query_cursor_did_exceed_match_limit(SELF) ? Qtrue : Qfalse;
32
73
  }
33
74
 
34
- static VALUE query_cursor_get_match_limit(VALUE self) {
35
- return UINT2NUM(ts_query_cursor_match_limit(SELF));
36
- }
37
-
75
+ /**
76
+ * @param limit [Integer]
77
+ *
78
+ * @return [nil]
79
+ */
38
80
  static VALUE query_cursor_set_match_limit(VALUE self, VALUE limit) {
39
81
  ts_query_cursor_set_match_limit(SELF, NUM2UINT(limit));
40
82
  return Qnil;
41
83
  }
42
84
 
43
- static VALUE query_cursor_set_byte_range(VALUE self, VALUE from, VALUE to) {
44
- ts_query_cursor_set_byte_range(SELF, NUM2UINT(from), NUM2UINT(to));
45
- return Qnil;
46
- }
47
-
48
- static VALUE query_cursor_set_point_range(VALUE self, VALUE from, VALUE to) {
49
- ts_query_cursor_set_point_range(SELF, value_to_point(from),
50
- value_to_point(to));
51
- return Qnil;
85
+ /**
86
+ * @return [Integer]
87
+ */
88
+ static VALUE query_cursor_get_match_limit(VALUE self) {
89
+ return UINT2NUM(ts_query_cursor_match_limit(SELF));
52
90
  }
53
91
 
54
- static VALUE query_cursor_next_match(VALUE self) {
55
- TSQueryMatch match;
56
- if (ts_query_cursor_next_match(SELF, &match)) {
57
- return new_query_match(&match);
58
- } else {
59
- return Qnil;
92
+ /**
93
+ * Set the maximum start depth for a query cursor.
94
+ *
95
+ * This prevents cursors from exploring children nodes at a certain depth.
96
+ * Note if a pattern includes many children, then they will still be checked.
97
+ *
98
+ * The zero max start depth value can be used as a special behavior and
99
+ * it helps to destructure a subtree by staying on a node and using captures
100
+ * for interested parts. Note that the zero max start depth only limit a search
101
+ * depth for a pattern's root node but other nodes that are parts of the pattern
102
+ * may be searched at any depth what defined by the pattern structure.
103
+ *
104
+ * @param max_start_depth [Integer|nil] set to nil to remove the maximum start
105
+ * depth.
106
+ *
107
+ * @return [nil]
108
+ */
109
+ static VALUE query_cursor_set_max_start_depth(VALUE self,
110
+ VALUE max_start_depth) {
111
+ uint32_t max = UINT32_MAX;
112
+ if (!NIL_P(max_start_depth)) {
113
+ max = NUM2UINT(max_start_depth);
60
114
  }
61
- }
62
-
63
- static VALUE query_cursor_remove_match(VALUE self, VALUE id) {
64
- ts_query_cursor_remove_match(SELF, NUM2UINT(id));
115
+ ts_query_cursor_set_max_start_depth(SELF, max);
65
116
  return Qnil;
66
117
  }
67
118
 
68
- // NOTE: maybe this is the limit of how "transparent" the bindings need to be.
119
+ // FIXME: maybe this is the limit of how "transparent" the bindings need to be.
69
120
  // Pending benchmarks, this can be very inefficient because obviously
70
121
  // ts_query_cursor_next_capture is intended to be used in a loop. Creating an
71
122
  // array of two values and returning them, intuitively speaking, seem very
72
123
  // inefficient.
124
+ // FIXME: maybe this needs to return an empty array to make for a nicer ruby
125
+ // API?
126
+ /**
127
+ * Advance to the next capture of the currently running query.
128
+ *
129
+ * @return [Array<Integer|Boolean>|nil] If there is a capture, return a tuple
130
+ * [Integer, Boolean], otherwise return +nil+.
131
+ */
73
132
  static VALUE query_cursor_next_capture(VALUE self) {
74
133
  TSQueryMatch match;
75
134
  uint32_t index;
@@ -83,21 +142,74 @@ static VALUE query_cursor_next_capture(VALUE self) {
83
142
  }
84
143
  }
85
144
 
145
+ /**
146
+ * Advance to the next match of the currently running query.
147
+ *
148
+ * @return [Boolean] Whether there's a match.
149
+ */
150
+ static VALUE query_cursor_next_match(VALUE self) {
151
+ TSQueryMatch match;
152
+ if (ts_query_cursor_next_match(SELF, &match)) {
153
+ return new_query_match(&match);
154
+ } else {
155
+ return Qnil;
156
+ }
157
+ }
158
+
159
+ static VALUE query_cursor_remove_match(VALUE self, VALUE id) {
160
+ ts_query_cursor_remove_match(SELF, NUM2UINT(id));
161
+ return Qnil;
162
+ }
163
+
164
+ /**
165
+ * @param from [Integer]
166
+ * @param to [Integer]
167
+ *
168
+ * @return [nil]
169
+ */
170
+ static VALUE query_cursor_set_byte_range(VALUE self, VALUE from, VALUE to) {
171
+ ts_query_cursor_set_byte_range(SELF, NUM2UINT(from), NUM2UINT(to));
172
+ return Qnil;
173
+ }
174
+
175
+ /**
176
+ * @param from [Point]
177
+ * @param to [Point]
178
+ *
179
+ * @return [nil]
180
+ */
181
+ static VALUE query_cursor_set_point_range(VALUE self, VALUE from, VALUE to) {
182
+ ts_query_cursor_set_point_range(SELF, value_to_point(from),
183
+ value_to_point(to));
184
+ return Qnil;
185
+ }
186
+
86
187
  void init_query_cursor(void) {
87
188
  cQueryCursor = rb_define_class_under(mTreeSitter, "QueryCursor", rb_cObject);
88
189
 
89
190
  rb_define_alloc_func(cQueryCursor, query_cursor_allocate);
90
191
 
192
+ /* Module methods */
193
+ rb_define_module_function(cQueryCursor, "exec", query_cursor_exec, 2);
194
+
91
195
  /* Class methods */
196
+ // Accessors
92
197
  DECLARE_ACCESSOR(cQueryCursor, query_cursor, match_limit)
93
- rb_define_module_function(cQueryCursor, "exec", query_cursor_exec, 2);
198
+
199
+ // Other
94
200
  rb_define_method(cQueryCursor, "exceed_match_limit?",
95
201
  query_cursor_did_exceed_match_limit, 0);
202
+ rb_define_method(cQueryCursor, "match_limit", query_cursor_get_match_limit,
203
+ 0);
204
+ rb_define_method(cQueryCursor, "match_limit=", query_cursor_set_match_limit,
205
+ 1);
206
+ rb_define_method(cQueryCursor,
207
+ "max_start_depth=", query_cursor_set_max_start_depth, 1);
208
+ rb_define_method(cQueryCursor, "next_capture", query_cursor_next_capture, 0);
209
+ rb_define_method(cQueryCursor, "next_match", query_cursor_next_match, 0);
210
+ rb_define_method(cQueryCursor, "remove_match", query_cursor_remove_match, 1);
96
211
  rb_define_method(cQueryCursor, "set_byte_range", query_cursor_set_byte_range,
97
212
  2);
98
213
  rb_define_method(cQueryCursor, "set_point_range",
99
214
  query_cursor_set_point_range, 2);
100
- rb_define_method(cQueryCursor, "next_match", query_cursor_next_match, 0);
101
- rb_define_method(cQueryCursor, "remove_match", query_cursor_remove_match, 1);
102
- rb_define_method(cQueryCursor, "next_capture", query_cursor_next_capture, 0);
103
215
  }
@@ -17,7 +17,7 @@ module TreeSitter
17
17
  @url = {
18
18
  git: 'https://github.com/tree-sitter/tree-sitter',
19
19
  tar: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.tar.gz",
20
- zip: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.zip"
20
+ zip: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.zip",
21
21
  }
22
22
 
23
23
  @exe = {}
@@ -63,40 +63,115 @@ VALUE new_tree(TSTree *ptr) {
63
63
 
64
64
  DATA_FROM_VALUE(TSTree *, tree)
65
65
 
66
- static VALUE tree_copy(VALUE self) { return new_tree(ts_tree_copy(SELF)); }
66
+ /**
67
+ * Compare an old edited syntax tree to a new syntax tree representing the same
68
+ * document, returning an array of ranges whose syntactic structure has changed.
69
+ *
70
+ * For this to work correctly, the old syntax tree must have been edited such
71
+ * that its ranges match up to the new tree. Generally, you'll want to call
72
+ * this function right after calling one of the {Parser#parse} functions.
73
+ * You need to pass the old tree that was passed to parse, as well as the new
74
+ * tree that was returned from that function.
75
+ *
76
+ * @param old_tree [Tree]
77
+ * @param new_tree [Tree]
78
+ *
79
+ * @return [Array<Range>]
80
+ */
81
+ static VALUE tree_changed_ranges(VALUE _self, VALUE old_tree, VALUE new_tree) {
82
+ TSTree *old = unwrap(old_tree)->data;
83
+ TSTree *new = unwrap(new_tree)->data;
84
+ uint32_t length;
85
+ TSRange *ranges = ts_tree_get_changed_ranges(old, new, &length);
86
+ VALUE res = rb_ary_new_capa(length);
67
87
 
68
- static VALUE tree_root_node(VALUE self) {
69
- return new_node_by_val(ts_tree_root_node(SELF));
88
+ for (uint32_t i = 0; i < length; i++) {
89
+ rb_ary_push(res, new_range(&ranges[i]));
90
+ }
91
+
92
+ if (ranges) {
93
+ free(ranges);
94
+ }
95
+
96
+ return res;
70
97
  }
71
98
 
72
- static VALUE tree_language(VALUE self) {
73
- return new_language(ts_tree_language(SELF));
99
+ static VALUE tree_finalizer(VALUE _self) {
100
+ VALUE rc = rb_cv_get(cTree, "@@rc");
101
+ VALUE keys = rb_funcall(rc, rb_intern("keys"), 0);
102
+ long len = RARRAY_LEN(keys);
103
+
104
+ for (long i = 0; i < len; ++i) {
105
+ VALUE curr = RARRAY_AREF(keys, i);
106
+ unsigned int val = NUM2UINT(rb_hash_lookup(rc, curr));
107
+ if (val > 0) {
108
+ ts_tree_delete((TSTree *)NUM2ULONG(curr));
109
+ }
110
+
111
+ rb_hash_delete(rc, curr);
112
+ }
113
+
114
+ return Qnil;
74
115
  }
75
116
 
117
+ /**
118
+ * Create a shallow copy of the syntax tree. This is very fast.
119
+ *
120
+ * You need to copy a syntax tree in order to use it on more than one thread at
121
+ * a time, as syntax trees are not thread safe.
122
+ *
123
+ * @return [Tree]
124
+ */
125
+ static VALUE tree_copy(VALUE self) { return new_tree(ts_tree_copy(SELF)); }
126
+
127
+ /**
128
+ * Edit the syntax tree to keep it in sync with source code that has been
129
+ * edited.
130
+ *
131
+ * You must describe the edit both in terms of byte offsets and in terms of
132
+ * (row, column) coordinates.
133
+ *
134
+ * @param edit [InputEdit]
135
+ *
136
+ * @return [nil]
137
+ */
76
138
  static VALUE tree_edit(VALUE self, VALUE edit) {
77
139
  TSInputEdit in = value_to_input_edit(edit);
78
140
  ts_tree_edit(SELF, &in);
79
141
  return Qnil;
80
142
  }
81
143
 
82
- static VALUE tree_changed_ranges(VALUE _self, VALUE old_tree, VALUE new_tree) {
83
- TSTree *old = unwrap(old_tree)->data;
84
- TSTree *new = unwrap(new_tree)->data;
144
+ /**
145
+ * Get the array of included ranges that was used to parse the syntax tree.
146
+ *
147
+ * @return [Array<Range>]
148
+ */
149
+ static VALUE included_ranges(VALUE self) {
85
150
  uint32_t length;
86
- TSRange *ranges = ts_tree_get_changed_ranges(old, new, &length);
151
+ const TSRange *ranges = ts_tree_included_ranges(SELF, &length);
87
152
  VALUE res = rb_ary_new_capa(length);
88
-
89
153
  for (uint32_t i = 0; i < length; i++) {
90
154
  rb_ary_push(res, new_range(&ranges[i]));
91
155
  }
92
-
93
- if (ranges) {
94
- free(ranges);
95
- }
96
-
97
156
  return res;
98
157
  }
99
158
 
159
+ /**
160
+ * Get the language that was used to parse the syntax tree.
161
+ *
162
+ * @return [Language]
163
+ */
164
+ static VALUE tree_language(VALUE self) {
165
+ return new_language(ts_tree_language(SELF));
166
+ }
167
+
168
+ /**
169
+ * Write a DOT graph describing the syntax tree to the given file.
170
+ *
171
+ * @param file [String]
172
+ *
173
+ * @return [nil]
174
+ */
100
175
  static VALUE tree_print_dot_graph(VALUE self, VALUE file) {
101
176
  Check_Type(file, T_STRING);
102
177
  char *path = StringValueCStr(file);
@@ -112,22 +187,26 @@ static VALUE tree_print_dot_graph(VALUE self, VALUE file) {
112
187
  return Qnil;
113
188
  }
114
189
 
115
- static VALUE tree_finalizer(VALUE _self) {
116
- VALUE rc = rb_cv_get(cTree, "@@rc");
117
- VALUE keys = rb_funcall(rc, rb_intern("keys"), 0);
118
- long len = RARRAY_LEN(keys);
119
-
120
- for (long i = 0; i < len; ++i) {
121
- VALUE curr = RARRAY_AREF(keys, i);
122
- unsigned int val = NUM2UINT(rb_hash_lookup(rc, curr));
123
- if (val > 0) {
124
- ts_tree_delete((TSTree *)NUM2ULONG(curr));
125
- }
126
-
127
- rb_hash_delete(rc, curr);
128
- }
190
+ /**
191
+ * Get the root node of the syntax tree.
192
+ *
193
+ * @return [Node]
194
+ */
195
+ static VALUE tree_root_node(VALUE self) {
196
+ return new_node_by_val(ts_tree_root_node(SELF));
197
+ }
129
198
 
130
- return Qnil;
199
+ /**
200
+ * Get the root node of the syntax tree, but with its position
201
+ * shifted forward by the given offset.
202
+ *
203
+ * @return [Node]
204
+ */
205
+ static VALUE tree_root_node_with_offset(VALUE self, VALUE offset_bytes,
206
+ VALUE offset_extent) {
207
+ uint32_t bytes = NUM2UINT(offset_bytes);
208
+ TSPoint extent = value_to_point(offset_extent);
209
+ return new_node_by_val(ts_tree_root_node_with_offset(SELF, bytes, extent));
131
210
  }
132
211
 
133
212
  void init_tree(void) {
@@ -135,14 +214,19 @@ void init_tree(void) {
135
214
 
136
215
  rb_undef_alloc_func(cTree);
137
216
 
217
+ /* Module methods */
218
+ rb_define_module_function(cTree, "changed_ranges", tree_changed_ranges, 2);
219
+ rb_define_module_function(cTree, "finalizer", tree_finalizer, 0);
220
+
138
221
  /* Class methods */
139
222
  rb_define_method(cTree, "copy", tree_copy, 0);
140
- rb_define_method(cTree, "root_node", tree_root_node, 0);
141
- rb_define_method(cTree, "language", tree_language, 0);
142
223
  rb_define_method(cTree, "edit", tree_edit, 1);
143
- rb_define_module_function(cTree, "changed_ranges", tree_changed_ranges, 2);
224
+ rb_define_method(cTree, "included_ranges", included_ranges, 0);
225
+ rb_define_method(cTree, "language", tree_language, 0);
144
226
  rb_define_method(cTree, "print_dot_graph", tree_print_dot_graph, 1);
145
- rb_define_module_function(cTree, "finalizer", tree_finalizer, 0);
227
+ rb_define_method(cTree, "root_node", tree_root_node, 0);
228
+ rb_define_method(cTree, "root_node_with_offset", tree_root_node_with_offset,
229
+ 2);
146
230
 
147
231
  // Reference-count created trees
148
232
  VALUE rc = rb_hash_new();