ruby_tree_sitter 1.6.0-x86_64-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +22 -0
  3. data/README.md +213 -0
  4. data/ext/tree_sitter/encoding.c +29 -0
  5. data/ext/tree_sitter/extconf.rb +149 -0
  6. data/ext/tree_sitter/input.c +127 -0
  7. data/ext/tree_sitter/input_edit.c +42 -0
  8. data/ext/tree_sitter/language.c +219 -0
  9. data/ext/tree_sitter/logger.c +228 -0
  10. data/ext/tree_sitter/macros.h +163 -0
  11. data/ext/tree_sitter/node.c +623 -0
  12. data/ext/tree_sitter/parser.c +398 -0
  13. data/ext/tree_sitter/point.c +26 -0
  14. data/ext/tree_sitter/quantifier.c +43 -0
  15. data/ext/tree_sitter/query.c +289 -0
  16. data/ext/tree_sitter/query_capture.c +28 -0
  17. data/ext/tree_sitter/query_cursor.c +231 -0
  18. data/ext/tree_sitter/query_error.c +41 -0
  19. data/ext/tree_sitter/query_match.c +44 -0
  20. data/ext/tree_sitter/query_predicate_step.c +83 -0
  21. data/ext/tree_sitter/range.c +35 -0
  22. data/ext/tree_sitter/repo.rb +128 -0
  23. data/ext/tree_sitter/symbol_type.c +46 -0
  24. data/ext/tree_sitter/tree.c +234 -0
  25. data/ext/tree_sitter/tree_cursor.c +269 -0
  26. data/ext/tree_sitter/tree_sitter.c +44 -0
  27. data/ext/tree_sitter/tree_sitter.h +107 -0
  28. data/lib/tree_sitter/3.0/tree_sitter.so +0 -0
  29. data/lib/tree_sitter/3.1/tree_sitter.so +0 -0
  30. data/lib/tree_sitter/3.2/tree_sitter.so +0 -0
  31. data/lib/tree_sitter/3.3/tree_sitter.so +0 -0
  32. data/lib/tree_sitter/helpers.rb +23 -0
  33. data/lib/tree_sitter/mixins/language.rb +167 -0
  34. data/lib/tree_sitter/node.rb +167 -0
  35. data/lib/tree_sitter/query.rb +191 -0
  36. data/lib/tree_sitter/query_captures.rb +30 -0
  37. data/lib/tree_sitter/query_cursor.rb +27 -0
  38. data/lib/tree_sitter/query_match.rb +100 -0
  39. data/lib/tree_sitter/query_matches.rb +39 -0
  40. data/lib/tree_sitter/query_predicate.rb +14 -0
  41. data/lib/tree_sitter/text_predicate_capture.rb +37 -0
  42. data/lib/tree_sitter/version.rb +8 -0
  43. data/lib/tree_sitter.rb +34 -0
  44. data/lib/tree_stand/ast_modifier.rb +30 -0
  45. data/lib/tree_stand/breadth_first_visitor.rb +54 -0
  46. data/lib/tree_stand/config.rb +19 -0
  47. data/lib/tree_stand/node.rb +351 -0
  48. data/lib/tree_stand/parser.rb +87 -0
  49. data/lib/tree_stand/range.rb +55 -0
  50. data/lib/tree_stand/tree.rb +123 -0
  51. data/lib/tree_stand/utils/printer.rb +73 -0
  52. data/lib/tree_stand/version.rb +7 -0
  53. data/lib/tree_stand/visitor.rb +127 -0
  54. data/lib/tree_stand/visitors/tree_walker.rb +37 -0
  55. data/lib/tree_stand.rb +48 -0
  56. data/tree_sitter.gemspec +34 -0
  57. metadata +135 -0
@@ -0,0 +1,398 @@
1
+ #include "tree_sitter.h"
2
+
3
+ extern VALUE mTreeSitter;
4
+
5
+ VALUE cParser;
6
+
7
+ typedef struct {
8
+ TSParser *data;
9
+ size_t cancellation_flag;
10
+ } parser_t;
11
+
12
+ static void parser_free(void *ptr) {
13
+ ts_parser_delete(((parser_t *)ptr)->data);
14
+ xfree(ptr);
15
+ }
16
+
17
+ static size_t parser_memsize(const void *ptr) {
18
+ parser_t *type = (parser_t *)ptr;
19
+ return sizeof(type);
20
+ }
21
+
22
+ const rb_data_type_t parser_data_type = {
23
+ .wrap_struct_name = "parser",
24
+ .function =
25
+ {
26
+ .dmark = NULL,
27
+ .dfree = parser_free,
28
+ .dsize = parser_memsize,
29
+ .dcompact = NULL,
30
+ },
31
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
32
+ };
33
+
34
+ DATA_UNWRAP(parser)
35
+
36
+ static VALUE parser_allocate(VALUE klass) {
37
+ parser_t *parser;
38
+ VALUE res = TypedData_Make_Struct(klass, parser_t, &parser_data_type, parser);
39
+ parser->data = ts_parser_new();
40
+ return res;
41
+ }
42
+
43
+ /**
44
+ * Get the parser's current cancellation flag pointer.
45
+ *
46
+ * @return [Integer]
47
+ */
48
+ static VALUE parser_get_cancellation_flag(VALUE self) {
49
+ return SIZET2NUM(*ts_parser_cancellation_flag(SELF));
50
+ }
51
+
52
+ /**
53
+ * Set the parser's current cancellation flag pointer.
54
+ *
55
+ * If a non-null pointer is assigned, then the parser will periodically read
56
+ * from this pointer during parsing. If it reads a non-zero value, it will
57
+ * halt early, returning +nil+.
58
+ *
59
+ * @see parse
60
+ *
61
+ * @note This is not well-tested in the bindings.
62
+ *
63
+ * @return nil
64
+ */
65
+ static VALUE parser_set_cancellation_flag(VALUE self, VALUE flag) {
66
+ unwrap(self)->cancellation_flag = NUM2SIZET(flag);
67
+ ts_parser_set_cancellation_flag(SELF, &unwrap(self)->cancellation_flag);
68
+ return Qnil;
69
+ }
70
+
71
+ /**
72
+ * Get the parser's current language.
73
+ */
74
+ static VALUE parser_get_language(VALUE self) {
75
+ return new_language(ts_parser_language(SELF));
76
+ }
77
+
78
+ /**
79
+ * Set the language that the parser should use for parsing.
80
+ *
81
+ * Returns a boolean indicating whether or not the language was successfully
82
+ * assigned. True means assignment succeeded. False means there was a version
83
+ * mismatch: the language was generated with an incompatible version of the
84
+ * Tree-sitter CLI. Check the language's version using {Language#version}
85
+ * and compare it to this library's {TreeSitter::LANGUAGE_VERSION} and
86
+ * {TreeSitter::MIN_COMPATIBLE_LANGUAGE_VERSION} constants.
87
+ *
88
+ * @return [Boolean]
89
+ */
90
+ static VALUE parser_set_language(VALUE self, VALUE language) {
91
+ return ts_parser_set_language(SELF, value_to_language(language)) ? Qtrue
92
+ : Qfalse;
93
+ }
94
+
95
+ /**
96
+ * Get the ranges of text that the parser will include when parsing.
97
+ *
98
+ * @return [Array<Range>]
99
+ */
100
+ static VALUE parser_get_included_ranges(VALUE self) {
101
+ uint32_t length;
102
+ const TSRange *ranges = ts_parser_included_ranges(SELF, &length);
103
+ VALUE res = rb_ary_new_capa(length);
104
+ for (uint32_t i = 0; i < length; i++) {
105
+ rb_ary_push(res, new_range(&ranges[i]));
106
+ }
107
+ return res;
108
+ }
109
+
110
+ /**
111
+ * Set the ranges of text that the parser should include when parsing.
112
+ *
113
+ * By default, the parser will always include entire documents. This function
114
+ * allows you to parse only a *portion* of a document but still return a syntax
115
+ * tree whose ranges match up with the document as a whole. You can also pass
116
+ * multiple disjoint ranges.
117
+ *
118
+ * The second and third parameters specify the location and length of an array
119
+ * of ranges. The parser does *not* take ownership of these ranges; it copies
120
+ * the data, so it doesn't matter how these ranges are allocated.
121
+ *
122
+ * If +array+'s +length+ is zero, then the entire document will be parsed.
123
+ * Otherwise, the given ranges must be ordered from earliest to latest in the
124
+ * document, and they must not overlap. That is, the following must hold for
125
+ * all:
126
+ *
127
+ * i < length - 1: ranges[i].end_byte <= ranges[i + 1].start_byte
128
+ *
129
+ * If this requirement is not satisfied, the operation will fail, the ranges
130
+ * will not be assigned, and this function will return +false+. On success,
131
+ * this function returns +true+
132
+ *
133
+ * @param array [Array<Range>]
134
+ *
135
+ * @return [Boolean]
136
+ */
137
+ static VALUE parser_set_included_ranges(VALUE self, VALUE array) {
138
+ Check_Type(array, T_ARRAY);
139
+
140
+ long length = rb_array_len(array);
141
+ TSRange *ranges = (TSRange *)malloc(length * sizeof(TSRange));
142
+ for (long i = 0; i < length; i++) {
143
+ ranges[i] = value_to_range(rb_ary_entry(array, i));
144
+ }
145
+ bool res = ts_parser_set_included_ranges(SELF, ranges, (uint32_t)length);
146
+ if (ranges) {
147
+ free(ranges);
148
+ }
149
+ return res ? Qtrue : Qfalse;
150
+ }
151
+
152
+ /**
153
+ * Get the parser's current logger.
154
+ *
155
+ * @return [Logger]
156
+ */
157
+ static VALUE parser_get_logger(VALUE self) {
158
+ return new_logger_by_val(ts_parser_logger(SELF));
159
+ }
160
+
161
+ /**
162
+ * Set the logger that a parser should use during parsing.
163
+ *
164
+ * The parser does not take ownership over the logger payload. If a logger was
165
+ * previously assigned, the caller is responsible for releasing any memory
166
+ * owned by the previous logger.
167
+ *
168
+ * @param logger [Logger] or any object that has a +printf+, +puts+, or +write+.
169
+ *
170
+ * @return nil
171
+ */
172
+ static VALUE parser_set_logger(VALUE self, VALUE logger) {
173
+ ts_parser_set_logger(SELF, value_to_logger(logger));
174
+ return Qnil;
175
+ }
176
+
177
+ /**
178
+ * Use the parser to parse some source code and create a syntax tree.
179
+ *
180
+ * If you are parsing this document for the first time, pass +nil+ for the
181
+ * +old_tree+ parameter. Otherwise, if you have already parsed an earlier
182
+ * version of this document and the document has since been edited, pass the
183
+ * previous syntax tree so that the unchanged parts of it can be reused.
184
+ * This will save time and memory. For this to work correctly, you must have
185
+ * already edited the old syntax tree using the {Tree#edit} function in a
186
+ * way that exactly matches the source code changes.
187
+ *
188
+ * The input parameter lets you specify how to read the text. It has the
189
+ * following three fields:
190
+ * 1. +read+: A function to retrieve a chunk of text at a given byte offset
191
+ * and (row, column) position. The function should return a pointer to the
192
+ * text and write its length to the +bytes_read+ pointer. The parser does
193
+ * not take ownership of this buffer; it just borrows it until it has
194
+ * finished reading it. The function should write a zero value to the
195
+ * +bytes_read+ pointer to indicate the end of the document.
196
+ * 2. +payload+: An arbitrary pointer that will be passed to each invocation
197
+ * of the +read+ function.
198
+ * 3. +encoding+: An indication of how the text is encoded. Either
199
+ * {Encoding::UTF8} or {Encoding::UTF16}.
200
+ *
201
+ * This function returns a syntax tree on success, and +nil+ on failure. There
202
+ * are three possible reasons for failure:
203
+ * 1. The parser does not have a language assigned. Check for this using the
204
+ * {Parser#language} function.
205
+ * 2. Parsing was cancelled due to a timeout that was set by an earlier call to
206
+ * the {Parser#timeout_micros=} function. You can resume parsing from
207
+ * where the parser left out by calling {Parser#parse} again with the
208
+ * same arguments. Or you can start parsing from scratch by first calling
209
+ * {Parser#reset}.
210
+ * 3. Parsing was cancelled using a cancellation flag that was set by an
211
+ * earlier call to {Parser#cancellation_flag=}. You can resume parsing
212
+ * from where the parser left out by calling {Parser#parse} again with
213
+ * the same arguments.
214
+ *
215
+ * @note this is curently incomplete, as the {Input} class is incomplete.
216
+ *
217
+ * @param old_tree [Tree]
218
+ * @param input [Input]
219
+ *
220
+ * @return [Tree, nil] A parse tree if parsing was successful.
221
+ */
222
+ static VALUE parser_parse(VALUE self, VALUE old_tree, VALUE input) {
223
+ if (NIL_P(input)) {
224
+ return Qnil;
225
+ }
226
+
227
+ TSTree *tree = NULL;
228
+ if (!NIL_P(old_tree)) {
229
+ tree = value_to_tree(old_tree);
230
+ }
231
+
232
+ TSTree *ret = ts_parser_parse(SELF, tree, value_to_input(input));
233
+ if (ret == NULL) {
234
+ return Qnil;
235
+ } else {
236
+ return new_tree(ret);
237
+ }
238
+ }
239
+
240
+ /**
241
+ * Use the parser to parse some source code stored in one contiguous buffer.
242
+ * The first two parameters are the same as in the {Parser#parse} function
243
+ * above. The second two parameters indicate the location of the buffer and its
244
+ * length in bytes.
245
+ *
246
+ * @param old_tree [Tree]
247
+ * @param string [String]
248
+ *
249
+ * @return [Tree, nil] A parse tree if parsing was successful.
250
+ */
251
+ static VALUE parser_parse_string(VALUE self, VALUE old_tree, VALUE string) {
252
+ if (NIL_P(string)) {
253
+ return Qnil;
254
+ }
255
+
256
+ const char *str = StringValuePtr(string);
257
+ uint32_t len = (uint32_t)RSTRING_LEN(string);
258
+ TSTree *tree = NULL;
259
+ if (!NIL_P(old_tree)) {
260
+ tree = value_to_tree(old_tree);
261
+ }
262
+
263
+ TSTree *ret = ts_parser_parse_string(SELF, tree, str, len);
264
+ if (ret == NULL) {
265
+ return Qnil;
266
+ } else {
267
+ return new_tree(ret);
268
+ }
269
+ }
270
+
271
+ /**
272
+ * Use the parser to parse some source code stored in one contiguous buffer with
273
+ * a given encoding. The first four parameters work the same as in the
274
+ * {Parser#parse_string} method above. The final parameter indicates whether
275
+ * the text is encoded as {Encoding::UTF8} or {Encoding::UTF16}.
276
+ *
277
+ * @param old_tree [Tree]
278
+ * @param string [String]
279
+ * @param encoding [Encoding]
280
+ *
281
+ * @return [Tree, nil] A parse tree if parsing was successful.
282
+ */
283
+ static VALUE parser_parse_string_encoding(VALUE self, VALUE old_tree,
284
+ VALUE string, VALUE encoding) {
285
+ if (NIL_P(string)) {
286
+ return Qnil;
287
+ }
288
+
289
+ const char *str = StringValuePtr(string);
290
+ uint32_t len = (uint32_t)RSTRING_LEN(string);
291
+ TSTree *tree = NULL;
292
+ if (!NIL_P(old_tree)) {
293
+ tree = value_to_tree(old_tree);
294
+ }
295
+
296
+ TSTree *ret = ts_parser_parse_string_encoding(SELF, tree, str, len,
297
+ value_to_encoding(encoding));
298
+
299
+ if (ret == NULL) {
300
+ return Qnil;
301
+ } else {
302
+ return new_tree(ret);
303
+ }
304
+ }
305
+
306
+ /**
307
+ * Set the file descriptor to which the parser should write debugging graphs
308
+ * during parsing. The graphs are formatted in the DOT language. You may want
309
+ * to pipe these graphs directly to a +dot(1)+ process in order to generate
310
+ * SVG output. You can turn off this logging by passing a negative number.
311
+ *
312
+ * @param file [Integer, String, nil] a file name to print, or turn off by
313
+ * passing +nil+ or +-1+
314
+ *
315
+ * @return nil
316
+ */
317
+ static VALUE parser_print_dot_graphs(VALUE self, VALUE file) {
318
+ if (NIL_P(file)) {
319
+ ts_parser_print_dot_graphs(SELF, -1);
320
+ } else if (rb_integer_type_p(file) && NUM2INT(file) < 0) {
321
+ ts_parser_print_dot_graphs(SELF, NUM2INT(file));
322
+ } else {
323
+ Check_Type(file, T_STRING);
324
+ char *path = StringValueCStr(file);
325
+ int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC,
326
+ 0644); // 0644 = all read + user write
327
+ ts_parser_print_dot_graphs(SELF, fd);
328
+ }
329
+ return Qnil;
330
+ }
331
+
332
+ /**
333
+ * Instruct the parser to start the next parse from the beginning.
334
+ *
335
+ * If the parser previously failed because of a timeout or a cancellation, then
336
+ * by default, it will resume where it left off on the next call to
337
+ * {Parser#parse} or other parsing functions. If you don't want to resume,
338
+ * and instead intend to use this parser to parse some other document, you must
339
+ * call {Parser#reset} first.
340
+ *
341
+ * @return nil
342
+ */
343
+ static VALUE parser_reset(VALUE self) {
344
+ ts_parser_reset(SELF);
345
+ return Qnil;
346
+ }
347
+
348
+ /**
349
+ * Get the duration in microseconds that parsing is allowed to take.
350
+ *
351
+ * @return [Integer]
352
+ */
353
+ static VALUE parser_get_timeout_micros(VALUE self) {
354
+ return ULL2NUM(ts_parser_timeout_micros(SELF));
355
+ }
356
+
357
+ /**
358
+ * Set the maximum duration in microseconds that parsing should be allowed to
359
+ * take before halting.
360
+ *
361
+ * If parsing takes longer than this, it will halt early, returning +nil+.
362
+ *
363
+ * @see parse
364
+ *
365
+ * @param timeout [Integer]
366
+ *
367
+ * @return [nil]
368
+ */
369
+ static VALUE parser_set_timeout_micros(VALUE self, VALUE timeout) {
370
+ ts_parser_set_timeout_micros(SELF, NUM2ULL(timeout));
371
+ return Qnil;
372
+ }
373
+
374
+ void init_parser(void) {
375
+ cParser = rb_define_class_under(mTreeSitter, "Parser", rb_cObject);
376
+
377
+ rb_define_alloc_func(cParser, parser_allocate);
378
+
379
+ /* Class methods */
380
+ rb_define_method(cParser, "cancellation_flag", parser_get_cancellation_flag,
381
+ 0);
382
+ rb_define_method(cParser, "cancellation_flag=", parser_set_cancellation_flag,
383
+ 1);
384
+ rb_define_method(cParser, "included_ranges", parser_get_included_ranges, 0);
385
+ rb_define_method(cParser, "included_ranges=", parser_set_included_ranges, 1);
386
+ rb_define_method(cParser, "language", parser_get_language, 0);
387
+ rb_define_method(cParser, "language=", parser_set_language, 1);
388
+ rb_define_method(cParser, "logger", parser_get_logger, 0);
389
+ rb_define_method(cParser, "logger=", parser_set_logger, 1);
390
+ rb_define_method(cParser, "parse", parser_parse, 2);
391
+ rb_define_method(cParser, "parse_string", parser_parse_string, 2);
392
+ rb_define_method(cParser, "parse_string_encoding",
393
+ parser_parse_string_encoding, 3);
394
+ rb_define_method(cParser, "print_dot_graphs", parser_print_dot_graphs, 1);
395
+ rb_define_method(cParser, "reset", parser_reset, 0);
396
+ rb_define_method(cParser, "timeout_micros", parser_get_timeout_micros, 0);
397
+ rb_define_method(cParser, "timeout_micros=", parser_set_timeout_micros, 1);
398
+ }
@@ -0,0 +1,26 @@
1
+ #include "tree_sitter.h"
2
+
3
+ extern VALUE mTreeSitter;
4
+
5
+ VALUE cPoint;
6
+
7
+ DATA_WRAP(Point, point)
8
+ DATA_DEFINE_ACCESSOR(point, row, UINT2NUM, NUM2UINT)
9
+ DATA_DEFINE_ACCESSOR(point, column, UINT2NUM, NUM2UINT)
10
+
11
+ static VALUE point_inspect(VALUE self) {
12
+ point_t *point = unwrap(self);
13
+ return rb_sprintf("{row=%i, column=%i}", point->data.row, point->data.column);
14
+ }
15
+
16
+ void init_point(void) {
17
+ cPoint = rb_define_class_under(mTreeSitter, "Point", rb_cObject);
18
+
19
+ rb_define_alloc_func(cPoint, point_allocate);
20
+
21
+ /* Class methods */
22
+ DECLARE_ACCESSOR(cPoint, point, row)
23
+ DECLARE_ACCESSOR(cPoint, point, column)
24
+ rb_define_method(cPoint, "inspect", point_inspect, 0);
25
+ rb_define_method(cPoint, "to_s", point_inspect, 0);
26
+ }
@@ -0,0 +1,43 @@
1
+ #include "tree_sitter.h"
2
+
3
+ extern VALUE mTreeSitter;
4
+
5
+ VALUE mQuantifier;
6
+
7
+ TSQuantifier value_to_quantifier(VALUE quantifier) {
8
+ return NUM2UINT(quantifier);
9
+ }
10
+
11
+ const char *quantifier_str(TSQuantifier error) {
12
+ switch (error) {
13
+ case TSQuantifierZero:
14
+ return "Zero";
15
+ case TSQuantifierZeroOrOne:
16
+ return "ZeroOrOne";
17
+ case TSQuantifierZeroOrMore:
18
+ return "ZeroOrMore";
19
+ case TSQuantifierOne:
20
+ return "One";
21
+ case TSQuantifierOneOrMore:
22
+ return "OneOrMore";
23
+ default:
24
+ return "??";
25
+ }
26
+ }
27
+
28
+ static VALUE quantifier_name(VALUE self, VALUE quant) {
29
+ int q = NUM2UINT(quant);
30
+ return safe_str(quantifier_str(q));
31
+ }
32
+
33
+ void init_quantifier(void) {
34
+ mQuantifier = rb_define_module_under(mTreeSitter, "Quantifier");
35
+
36
+ rb_define_module_function(mTreeSitter, "quantifier_name", quantifier_name, 1);
37
+
38
+ rb_define_const(mQuantifier, "ZERO", UINT2NUM(0));
39
+ rb_define_const(mQuantifier, "ZERO_OR_ONE", UINT2NUM(1));
40
+ rb_define_const(mQuantifier, "ZERO_OR_MORE", UINT2NUM(2));
41
+ rb_define_const(mQuantifier, "ONE", UINT2NUM(3));
42
+ rb_define_const(mQuantifier, "ONE_OR_MORE", UINT2NUM(4));
43
+ }