ruby_tree_sitter 1.1.0-arm64-darwin-22

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +22 -0
  3. data/README.md +199 -0
  4. data/ext/tree_sitter/encoding.c +29 -0
  5. data/ext/tree_sitter/extconf.rb +149 -0
  6. data/ext/tree_sitter/input.c +127 -0
  7. data/ext/tree_sitter/input_edit.c +42 -0
  8. data/ext/tree_sitter/language.c +219 -0
  9. data/ext/tree_sitter/logger.c +228 -0
  10. data/ext/tree_sitter/macros.h +163 -0
  11. data/ext/tree_sitter/node.c +618 -0
  12. data/ext/tree_sitter/parser.c +398 -0
  13. data/ext/tree_sitter/point.c +26 -0
  14. data/ext/tree_sitter/quantifier.c +43 -0
  15. data/ext/tree_sitter/query.c +282 -0
  16. data/ext/tree_sitter/query_capture.c +28 -0
  17. data/ext/tree_sitter/query_cursor.c +215 -0
  18. data/ext/tree_sitter/query_error.c +41 -0
  19. data/ext/tree_sitter/query_match.c +44 -0
  20. data/ext/tree_sitter/query_predicate_step.c +83 -0
  21. data/ext/tree_sitter/range.c +35 -0
  22. data/ext/tree_sitter/repo.rb +121 -0
  23. data/ext/tree_sitter/symbol_type.c +46 -0
  24. data/ext/tree_sitter/tree.c +234 -0
  25. data/ext/tree_sitter/tree_cursor.c +269 -0
  26. data/ext/tree_sitter/tree_sitter.c +44 -0
  27. data/ext/tree_sitter/tree_sitter.h +107 -0
  28. data/lib/tree_sitter/node.rb +197 -0
  29. data/lib/tree_sitter/tree_sitter.bundle +0 -0
  30. data/lib/tree_sitter/version.rb +8 -0
  31. data/lib/tree_sitter.rb +14 -0
  32. data/lib/tree_stand/ast_modifier.rb +30 -0
  33. data/lib/tree_stand/breadth_first_visitor.rb +54 -0
  34. data/lib/tree_stand/config.rb +13 -0
  35. data/lib/tree_stand/node.rb +224 -0
  36. data/lib/tree_stand/parser.rb +67 -0
  37. data/lib/tree_stand/range.rb +55 -0
  38. data/lib/tree_stand/tree.rb +123 -0
  39. data/lib/tree_stand/utils/printer.rb +73 -0
  40. data/lib/tree_stand/version.rb +7 -0
  41. data/lib/tree_stand/visitor.rb +127 -0
  42. data/lib/tree_stand/visitors/tree_walker.rb +37 -0
  43. data/lib/tree_stand.rb +48 -0
  44. data/tree_sitter.gemspec +35 -0
  45. metadata +124 -0
@@ -0,0 +1,398 @@
1
+ #include "tree_sitter.h"
2
+
3
+ extern VALUE mTreeSitter;
4
+
5
+ VALUE cParser;
6
+
7
+ typedef struct {
8
+ TSParser *data;
9
+ size_t cancellation_flag;
10
+ } parser_t;
11
+
12
+ static void parser_free(void *ptr) {
13
+ ts_parser_delete(((parser_t *)ptr)->data);
14
+ xfree(ptr);
15
+ }
16
+
17
+ static size_t parser_memsize(const void *ptr) {
18
+ parser_t *type = (parser_t *)ptr;
19
+ return sizeof(type);
20
+ }
21
+
22
+ const rb_data_type_t parser_data_type = {
23
+ .wrap_struct_name = "parser",
24
+ .function =
25
+ {
26
+ .dmark = NULL,
27
+ .dfree = parser_free,
28
+ .dsize = parser_memsize,
29
+ .dcompact = NULL,
30
+ },
31
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
32
+ };
33
+
34
+ DATA_UNWRAP(parser)
35
+
36
+ static VALUE parser_allocate(VALUE klass) {
37
+ parser_t *parser;
38
+ VALUE res = TypedData_Make_Struct(klass, parser_t, &parser_data_type, parser);
39
+ parser->data = ts_parser_new();
40
+ return res;
41
+ }
42
+
43
+ /**
44
+ * Get the parser's current cancellation flag pointer.
45
+ *
46
+ * @return [Integer]
47
+ */
48
+ static VALUE parser_get_cancellation_flag(VALUE self) {
49
+ return SIZET2NUM(*ts_parser_cancellation_flag(SELF));
50
+ }
51
+
52
+ /**
53
+ * Set the parser's current cancellation flag pointer.
54
+ *
55
+ * If a non-null pointer is assigned, then the parser will periodically read
56
+ * from this pointer during parsing. If it reads a non-zero value, it will
57
+ * halt early, returning +nil+.
58
+ *
59
+ * @see parse
60
+ *
61
+ * @note This is not well-tested in the bindings.
62
+ *
63
+ * @return nil
64
+ */
65
+ static VALUE parser_set_cancellation_flag(VALUE self, VALUE flag) {
66
+ unwrap(self)->cancellation_flag = NUM2SIZET(flag);
67
+ ts_parser_set_cancellation_flag(SELF, &unwrap(self)->cancellation_flag);
68
+ return Qnil;
69
+ }
70
+
71
+ /**
72
+ * Get the parser's current language.
73
+ */
74
+ static VALUE parser_get_language(VALUE self) {
75
+ return new_language(ts_parser_language(SELF));
76
+ }
77
+
78
+ /**
79
+ * Set the language that the parser should use for parsing.
80
+ *
81
+ * Returns a boolean indicating whether or not the language was successfully
82
+ * assigned. True means assignment succeeded. False means there was a version
83
+ * mismatch: the language was generated with an incompatible version of the
84
+ * Tree-sitter CLI. Check the language's version using {Language#version}
85
+ * and compare it to this library's {TreeSitter::LANGUAGE_VERSION} and
86
+ * {TreeSitter::MIN_COMPATIBLE_LANGUAGE_VERSION} constants.
87
+ *
88
+ * @return [Boolean]
89
+ */
90
+ static VALUE parser_set_language(VALUE self, VALUE language) {
91
+ return ts_parser_set_language(SELF, value_to_language(language)) ? Qtrue
92
+ : Qfalse;
93
+ }
94
+
95
+ /**
96
+ * Get the ranges of text that the parser will include when parsing.
97
+ *
98
+ * @return [Array<Range>]
99
+ */
100
+ static VALUE parser_get_included_ranges(VALUE self) {
101
+ uint32_t length;
102
+ const TSRange *ranges = ts_parser_included_ranges(SELF, &length);
103
+ VALUE res = rb_ary_new_capa(length);
104
+ for (uint32_t i = 0; i < length; i++) {
105
+ rb_ary_push(res, new_range(&ranges[i]));
106
+ }
107
+ return res;
108
+ }
109
+
110
+ /**
111
+ * Set the ranges of text that the parser should include when parsing.
112
+ *
113
+ * By default, the parser will always include entire documents. This function
114
+ * allows you to parse only a *portion* of a document but still return a syntax
115
+ * tree whose ranges match up with the document as a whole. You can also pass
116
+ * multiple disjoint ranges.
117
+ *
118
+ * The second and third parameters specify the location and length of an array
119
+ * of ranges. The parser does *not* take ownership of these ranges; it copies
120
+ * the data, so it doesn't matter how these ranges are allocated.
121
+ *
122
+ * If +array+'s +length+ is zero, then the entire document will be parsed.
123
+ * Otherwise, the given ranges must be ordered from earliest to latest in the
124
+ * document, and they must not overlap. That is, the following must hold for
125
+ * all:
126
+ *
127
+ * i < length - 1: ranges[i].end_byte <= ranges[i + 1].start_byte
128
+ *
129
+ * If this requirement is not satisfied, the operation will fail, the ranges
130
+ * will not be assigned, and this function will return +false+. On success,
131
+ * this function returns +true+
132
+ *
133
+ * @param array [Array<Range>]
134
+ *
135
+ * @return [Boolean]
136
+ */
137
+ static VALUE parser_set_included_ranges(VALUE self, VALUE array) {
138
+ Check_Type(array, T_ARRAY);
139
+
140
+ long length = rb_array_len(array);
141
+ TSRange *ranges = (TSRange *)malloc(length * sizeof(TSRange));
142
+ for (long i = 0; i < length; i++) {
143
+ ranges[i] = value_to_range(rb_ary_entry(array, i));
144
+ }
145
+ bool res = ts_parser_set_included_ranges(SELF, ranges, (uint32_t)length);
146
+ if (ranges) {
147
+ free(ranges);
148
+ }
149
+ return res ? Qtrue : Qfalse;
150
+ }
151
+
152
+ /**
153
+ * Get the parser's current logger.
154
+ *
155
+ * @return [Logger]
156
+ */
157
+ static VALUE parser_get_logger(VALUE self) {
158
+ return new_logger_by_val(ts_parser_logger(SELF));
159
+ }
160
+
161
+ /**
162
+ * Set the logger that a parser should use during parsing.
163
+ *
164
+ * The parser does not take ownership over the logger payload. If a logger was
165
+ * previously assigned, the caller is responsible for releasing any memory
166
+ * owned by the previous logger.
167
+ *
168
+ * @param logger [Logger] or any object that has a +printf+, +puts+, or +write+.
169
+ *
170
+ * @return nil
171
+ */
172
+ static VALUE parser_set_logger(VALUE self, VALUE logger) {
173
+ ts_parser_set_logger(SELF, value_to_logger(logger));
174
+ return Qnil;
175
+ }
176
+
177
+ /**
178
+ * Use the parser to parse some source code and create a syntax tree.
179
+ *
180
+ * If you are parsing this document for the first time, pass +nil+ for the
181
+ * +old_tree+ parameter. Otherwise, if you have already parsed an earlier
182
+ * version of this document and the document has since been edited, pass the
183
+ * previous syntax tree so that the unchanged parts of it can be reused.
184
+ * This will save time and memory. For this to work correctly, you must have
185
+ * already edited the old syntax tree using the {Tree#edit} function in a
186
+ * way that exactly matches the source code changes.
187
+ *
188
+ * The input parameter lets you specify how to read the text. It has the
189
+ * following three fields:
190
+ * 1. +read+: A function to retrieve a chunk of text at a given byte offset
191
+ * and (row, column) position. The function should return a pointer to the
192
+ * text and write its length to the +bytes_read+ pointer. The parser does
193
+ * not take ownership of this buffer; it just borrows it until it has
194
+ * finished reading it. The function should write a zero value to the
195
+ * +bytes_read+ pointer to indicate the end of the document.
196
+ * 2. +payload+: An arbitrary pointer that will be passed to each invocation
197
+ * of the +read+ function.
198
+ * 3. +encoding+: An indication of how the text is encoded. Either
199
+ * {Encoding::UTF8} or {Encoding::UTF16}.
200
+ *
201
+ * This function returns a syntax tree on success, and +nil+ on failure. There
202
+ * are three possible reasons for failure:
203
+ * 1. The parser does not have a language assigned. Check for this using the
204
+ * {Parser#language} function.
205
+ * 2. Parsing was cancelled due to a timeout that was set by an earlier call to
206
+ * the {Parser#timeout_micros=} function. You can resume parsing from
207
+ * where the parser left out by calling {Parser#parse} again with the
208
+ * same arguments. Or you can start parsing from scratch by first calling
209
+ * {Parser#reset}.
210
+ * 3. Parsing was cancelled using a cancellation flag that was set by an
211
+ * earlier call to {Parsert#cancellation_flag=}. You can resume parsing
212
+ * from where the parser left out by calling {Parser#parse} again with
213
+ * the same arguments.
214
+ *
215
+ * @note this is curently incomplete, as the {Input} class is incomplete.
216
+ *
217
+ * @param old_tree [Tree]
218
+ * @param input [Input]
219
+ *
220
+ * @return [Tree, nil] A parse tree if parsing was successful.
221
+ */
222
+ static VALUE parser_parse(VALUE self, VALUE old_tree, VALUE input) {
223
+ if (NIL_P(input)) {
224
+ return Qnil;
225
+ }
226
+
227
+ TSTree *tree = NULL;
228
+ if (!NIL_P(old_tree)) {
229
+ tree = value_to_tree(old_tree);
230
+ }
231
+
232
+ TSTree *ret = ts_parser_parse(SELF, tree, value_to_input(input));
233
+ if (ret == NULL) {
234
+ return Qnil;
235
+ } else {
236
+ return new_tree(ret);
237
+ }
238
+ }
239
+
240
+ /**
241
+ * Use the parser to parse some source code stored in one contiguous buffer.
242
+ * The first two parameters are the same as in the {Parser#parse} function
243
+ * above. The second two parameters indicate the location of the buffer and its
244
+ * length in bytes.
245
+ *
246
+ * @param old_tree [Tree]
247
+ * @param string [String]
248
+ *
249
+ * @return [Tree, nil] A parse tree if parsing was successful.
250
+ */
251
+ static VALUE parser_parse_string(VALUE self, VALUE old_tree, VALUE string) {
252
+ if (NIL_P(string)) {
253
+ return Qnil;
254
+ }
255
+
256
+ const char *str = StringValuePtr(string);
257
+ uint32_t len = (uint32_t)RSTRING_LEN(string);
258
+ TSTree *tree = NULL;
259
+ if (!NIL_P(old_tree)) {
260
+ tree = value_to_tree(old_tree);
261
+ }
262
+
263
+ TSTree *ret = ts_parser_parse_string(SELF, tree, str, len);
264
+ if (ret == NULL) {
265
+ return Qnil;
266
+ } else {
267
+ return new_tree(ret);
268
+ }
269
+ }
270
+
271
+ /**
272
+ * Use the parser to parse some source code stored in one contiguous buffer with
273
+ * a given encoding. The first four parameters work the same as in the
274
+ * {Parser#parse_string} method above. The final parameter indicates whether
275
+ * the text is encoded as {Encoding::UTF8} or {Encoding::UTF16}.
276
+ *
277
+ * @param old_tree [Tree]
278
+ * @param string [String]
279
+ * @param encoding [Encoding]
280
+ *
281
+ * @return [Tree, nil] A parse tree if parsing was successful.
282
+ */
283
+ static VALUE parser_parse_string_encoding(VALUE self, VALUE old_tree,
284
+ VALUE string, VALUE encoding) {
285
+ if (NIL_P(string)) {
286
+ return Qnil;
287
+ }
288
+
289
+ const char *str = StringValuePtr(string);
290
+ uint32_t len = (uint32_t)RSTRING_LEN(string);
291
+ TSTree *tree = NULL;
292
+ if (!NIL_P(old_tree)) {
293
+ tree = value_to_tree(old_tree);
294
+ }
295
+
296
+ TSTree *ret = ts_parser_parse_string_encoding(SELF, tree, str, len,
297
+ value_to_encoding(encoding));
298
+
299
+ if (ret == NULL) {
300
+ return Qnil;
301
+ } else {
302
+ return new_tree(ret);
303
+ }
304
+ }
305
+
306
+ /**
307
+ * Set the file descriptor to which the parser should write debugging graphs
308
+ * during parsing. The graphs are formatted in the DOT language. You may want
309
+ * to pipe these graphs directly to a +dot(1)+ process in order to generate
310
+ * SVG output. You can turn off this logging by passing a negative number.
311
+ *
312
+ * @param file [Integer, String, nil] a file name to print, or turn off by
313
+ * passing +nil+ or +-1+
314
+ *
315
+ * @return nil
316
+ */
317
+ static VALUE parser_print_dot_graphs(VALUE self, VALUE file) {
318
+ if (NIL_P(file)) {
319
+ ts_parser_print_dot_graphs(SELF, -1);
320
+ } else if (rb_integer_type_p(file) && NUM2INT(file) < 0) {
321
+ ts_parser_print_dot_graphs(SELF, NUM2INT(file));
322
+ } else {
323
+ Check_Type(file, T_STRING);
324
+ char *path = StringValueCStr(file);
325
+ int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC,
326
+ 0644); // 0644 = all read + user write
327
+ ts_parser_print_dot_graphs(SELF, fd);
328
+ }
329
+ return Qnil;
330
+ }
331
+
332
+ /**
333
+ * Instruct the parser to start the next parse from the beginning.
334
+ *
335
+ * If the parser previously failed because of a timeout or a cancellation, then
336
+ * by default, it will resume where it left off on the next call to
337
+ * {Parser#parse} or other parsing functions. If you don't want to resume,
338
+ * and instead intend to use this parser to parse some other document, you must
339
+ * call {Parser#reset} first.
340
+ *
341
+ * @return nil
342
+ */
343
+ static VALUE parser_reset(VALUE self) {
344
+ ts_parser_reset(SELF);
345
+ return Qnil;
346
+ }
347
+
348
+ /**
349
+ * Get the duration in microseconds that parsing is allowed to take.
350
+ *
351
+ * @return [Integer]
352
+ */
353
+ static VALUE parser_get_timeout_micros(VALUE self) {
354
+ return ULL2NUM(ts_parser_timeout_micros(SELF));
355
+ }
356
+
357
+ /**
358
+ * Set the maximum duration in microseconds that parsing should be allowed to
359
+ * take before halting.
360
+ *
361
+ * If parsing takes longer than this, it will halt early, returning +nil+.
362
+ *
363
+ * @see parse
364
+ *
365
+ * @param timeout [Integer]
366
+ *
367
+ * @return [nil]
368
+ */
369
+ static VALUE parser_set_timeout_micros(VALUE self, VALUE timeout) {
370
+ ts_parser_set_timeout_micros(SELF, NUM2ULL(timeout));
371
+ return Qnil;
372
+ }
373
+
374
+ void init_parser(void) {
375
+ cParser = rb_define_class_under(mTreeSitter, "Parser", rb_cObject);
376
+
377
+ rb_define_alloc_func(cParser, parser_allocate);
378
+
379
+ /* Class methods */
380
+ rb_define_method(cParser, "cancellation_flag", parser_get_cancellation_flag,
381
+ 0);
382
+ rb_define_method(cParser, "cancellation_flag=", parser_set_cancellation_flag,
383
+ 1);
384
+ rb_define_method(cParser, "included_ranges", parser_get_included_ranges, 0);
385
+ rb_define_method(cParser, "included_ranges=", parser_set_included_ranges, 1);
386
+ rb_define_method(cParser, "language", parser_get_language, 0);
387
+ rb_define_method(cParser, "language=", parser_set_language, 1);
388
+ rb_define_method(cParser, "logger", parser_get_logger, 0);
389
+ rb_define_method(cParser, "logger=", parser_set_logger, 1);
390
+ rb_define_method(cParser, "parse", parser_parse, 2);
391
+ rb_define_method(cParser, "parse_string", parser_parse_string, 2);
392
+ rb_define_method(cParser, "parse_string_encoding",
393
+ parser_parse_string_encoding, 3);
394
+ rb_define_method(cParser, "print_dot_graphs", parser_print_dot_graphs, 1);
395
+ rb_define_method(cParser, "reset", parser_reset, 0);
396
+ rb_define_method(cParser, "timeout_micros", parser_get_timeout_micros, 0);
397
+ rb_define_method(cParser, "timeout_micros=", parser_set_timeout_micros, 1);
398
+ }
@@ -0,0 +1,26 @@
1
+ #include "tree_sitter.h"
2
+
3
+ extern VALUE mTreeSitter;
4
+
5
+ VALUE cPoint;
6
+
7
+ DATA_WRAP(Point, point)
8
+ DATA_DEFINE_ACCESSOR(point, row, UINT2NUM, NUM2UINT)
9
+ DATA_DEFINE_ACCESSOR(point, column, UINT2NUM, NUM2UINT)
10
+
11
+ static VALUE point_inspect(VALUE self) {
12
+ point_t *point = unwrap(self);
13
+ return rb_sprintf("{row=%i, column=%i}", point->data.row, point->data.column);
14
+ }
15
+
16
+ void init_point(void) {
17
+ cPoint = rb_define_class_under(mTreeSitter, "Point", rb_cObject);
18
+
19
+ rb_define_alloc_func(cPoint, point_allocate);
20
+
21
+ /* Class methods */
22
+ DECLARE_ACCESSOR(cPoint, point, row)
23
+ DECLARE_ACCESSOR(cPoint, point, column)
24
+ rb_define_method(cPoint, "inspect", point_inspect, 0);
25
+ rb_define_method(cPoint, "to_s", point_inspect, 0);
26
+ }
@@ -0,0 +1,43 @@
1
+ #include "tree_sitter.h"
2
+
3
+ extern VALUE mTreeSitter;
4
+
5
+ VALUE mQuantifier;
6
+
7
+ TSQuantifier value_to_quantifier(VALUE quantifier) {
8
+ return NUM2UINT(quantifier);
9
+ }
10
+
11
+ const char *quantifier_str(TSQuantifier error) {
12
+ switch (error) {
13
+ case TSQuantifierZero:
14
+ return "Zero";
15
+ case TSQuantifierZeroOrOne:
16
+ return "ZeroOrOne";
17
+ case TSQuantifierZeroOrMore:
18
+ return "ZeroOrMore";
19
+ case TSQuantifierOne:
20
+ return "One";
21
+ case TSQuantifierOneOrMore:
22
+ return "OneOrMore";
23
+ default:
24
+ return "??";
25
+ }
26
+ }
27
+
28
+ static VALUE quantifier_name(VALUE self, VALUE quant) {
29
+ int q = NUM2UINT(quant);
30
+ return safe_str(quantifier_str(q));
31
+ }
32
+
33
+ void init_quantifier(void) {
34
+ mQuantifier = rb_define_module_under(mTreeSitter, "Quantifier");
35
+
36
+ rb_define_module_function(mTreeSitter, "quantifier_name", quantifier_name, 1);
37
+
38
+ rb_define_const(mQuantifier, "ZERO", UINT2NUM(0));
39
+ rb_define_const(mQuantifier, "ZERO_OR_ONE", UINT2NUM(1));
40
+ rb_define_const(mQuantifier, "ZERO_OR_MORE", UINT2NUM(2));
41
+ rb_define_const(mQuantifier, "ONE", UINT2NUM(3));
42
+ rb_define_const(mQuantifier, "ONE_OR_MORE", UINT2NUM(4));
43
+ }