ruby_tree_sitter 0.20.8.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +2 -1
  3. data/README.md +32 -18
  4. data/ext/tree_sitter/extconf.rb +1 -1
  5. data/ext/tree_sitter/input.c +1 -0
  6. data/ext/tree_sitter/language.c +131 -46
  7. data/ext/tree_sitter/logger.c +28 -12
  8. data/ext/tree_sitter/node.c +438 -130
  9. data/ext/tree_sitter/parser.c +232 -37
  10. data/ext/tree_sitter/query.c +197 -72
  11. data/ext/tree_sitter/query_cursor.c +140 -28
  12. data/ext/tree_sitter/repo.rb +1 -1
  13. data/ext/tree_sitter/tree.c +118 -34
  14. data/ext/tree_sitter/tree_cursor.c +205 -33
  15. data/ext/tree_sitter/tree_sitter.c +12 -0
  16. data/lib/tree_sitter/node.rb +23 -6
  17. data/lib/tree_sitter/version.rb +4 -2
  18. data/lib/tree_sitter.rb +1 -0
  19. data/lib/tree_stand/ast_modifier.rb +30 -0
  20. data/lib/tree_stand/breadth_first_visitor.rb +54 -0
  21. data/lib/tree_stand/config.rb +13 -0
  22. data/lib/tree_stand/node.rb +224 -0
  23. data/lib/tree_stand/parser.rb +67 -0
  24. data/lib/tree_stand/range.rb +55 -0
  25. data/lib/tree_stand/tree.rb +123 -0
  26. data/lib/tree_stand/utils/printer.rb +73 -0
  27. data/lib/tree_stand/version.rb +7 -0
  28. data/lib/tree_stand/visitor.rb +127 -0
  29. data/lib/tree_stand/visitors/tree_walker.rb +37 -0
  30. data/lib/tree_stand.rb +48 -0
  31. data/tree_sitter.gemspec +14 -11
  32. metadata +37 -108
  33. data/test/README.md +0 -15
  34. data/test/test_helper.rb +0 -9
  35. data/test/tree_sitter/js_test.rb +0 -48
  36. data/test/tree_sitter/language_test.rb +0 -73
  37. data/test/tree_sitter/logger_test.rb +0 -70
  38. data/test/tree_sitter/node_test.rb +0 -411
  39. data/test/tree_sitter/parser_test.rb +0 -140
  40. data/test/tree_sitter/query_test.rb +0 -153
  41. data/test/tree_sitter/tree_cursor_test.rb +0 -83
  42. data/test/tree_sitter/tree_test.rb +0 -51
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f897961a83176c564dd214e3b8c893880302aa9552cac65b9f0c2e06e396ba7c
4
- data.tar.gz: 0ba1af8010641341f95bab2bc7182faa94181da5f862a4224ef5d87c44d1eff9
3
+ metadata.gz: '08ade679a8ed94e7099d2cd14a06b3c889c4a3961431c2421be67b362083e0ad'
4
+ data.tar.gz: 01abc16071189e237e4b20587a42a8d021e34d8be1df52361a77753b8e02dc00
5
5
  SHA512:
6
- metadata.gz: a3ff40e54f3d0b87984d14217f66a16f1c61a8c59c6a41d996e1792c513b2a8a3937f44839253d994d876bcbd6c1551b6c657b1cb6b6f3a0c84d752cf226da88
7
- data.tar.gz: fac9fc60614e9ebd14e3181a4d2519cb4da9d170ea51326585204d3b65265349b3cdf964967c57d381bf795023c7b5e839ab406ba536ee5f971228efac9d3a5e
6
+ metadata.gz: a94c4a808efefb8855c7b6385ec74f48ae1ae35a2038ef4f0736675a4181ee72a405859880508c31ebef7514f706709cdd1589c386cb0d21bce1b3f82ab4f70a
7
+ data.tar.gz: a4af07d96b5f1356a5f71e532335b44c7d5a0eb4c59a2b09cca3b49148b6437874b76ba55266161f1567b459a8b755d9ecdba625fd6d59c598ae89dfa4a1803f
data/LICENSE CHANGED
@@ -1,6 +1,7 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2018-2021 Max Brunsfeld
3
+ Copyright (c) 2022-present, Faveod SAS.
4
+ Copyright (c) 2022-present, Shopify Inc.
4
5
 
5
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -10,6 +10,10 @@ very old, unmaintained, and don't work with modern `tree-sitter` APIs.
10
10
 
11
11
  ## Usage
12
12
 
13
+ ### TreeSitter API
14
+
15
+ The TreeSitter API is a low-level Ruby binding for tree-sitter.
16
+
13
17
  ``` ruby
14
18
  require 'tree_sitter'
15
19
 
@@ -28,32 +32,39 @@ root.each do |child|
28
32
  end
29
33
  ```
30
34
 
31
- ## About
35
+ The main philosophy behind the TreeSitter bindings is to do a 1:1 mapping between
36
+ tree-sitter's `C` API and `Ruby`, which makes it easier to experiment and port
37
+ ideas from different languages/bindings.
32
38
 
33
- The main philosophy behind these bindings is to do a 1:1 mapping between
34
- tree-sitter's `C` API and `Ruby`.
39
+ But it feels like writing some managed `C` with `Ruby`, and that's why we provide
40
+ a high-level API ([TreeStand](#treestand-api)) as well.
35
41
 
36
- It doesn't mean that we're going to yield the best perormance, but this design
37
- allows us to better experiment, and easily port ideas from other projects.
42
+ ### TreeStand API
38
43
 
39
- ### Versioning
44
+ The TreeStand API is a high-level Ruby wrapper for the [TreeSitter](#treesitter-api) bindings. It
45
+ makes it easier to configure the parsers, and work with the underlying syntax tree.
46
+ ```ruby
47
+ require 'tree_stand'
40
48
 
41
- This gem follows the `tree-sitter` versioning scheme, and appends its own
42
- version at the end.
49
+ TreeStand.configure do
50
+ config.parser_path = "path/to/parser/folder/"
51
+ end
43
52
 
44
- For instance, `tree-sitter` is now at version `0.20.8`, so this gem's version
45
- will be `0.20.8.x` where x is incremented with every notable batch of
46
- bugfixes or some ruby-only additions.
53
+ sql_parser = TreeStand::Parser.new("sql")
54
+ ruby_parser = TreeStand::Parser.new("ruby")
55
+ ```
56
+
57
+ TreeStand provides an idiomatic Ruby interface to work with tree-sitter parsers.
47
58
 
48
59
  ## Dependencies
49
60
 
50
- This gem is a binding for `tree-sitter`. It doesn't have a version of
51
- `tree-sitter` baked in it.
61
+ This gem is a binding for `tree-sitter`. It doesn't have a version of
62
+ `tree-sitter` baked in it by default.
52
63
 
53
64
  You must install `tree-sitter` and make sure that their dynamic library is
54
65
  accessible from `$PATH`, or build the gem with `--disable-sys-libs`, which will
55
66
  download the latest tagged `tree-sitter` and build against it (see [Build from
56
- source](docs/Development.md#build-from-source) .)
67
+ source](docs/Contributing.md#build-from-source) .)
57
68
 
58
69
  You can either install `tree-sitter` from source or through your go-to package manager.
59
70
 
@@ -88,7 +99,7 @@ brew install tree-sitter
88
99
  From [rubygems](https://rubygems.org/gems/ruby_tree_sitter), in your `Gemfile`:
89
100
 
90
101
  ```ruby
91
- gem 'ruby_tree_sitter', '~> 0.20.8.1'
102
+ gem 'ruby_tree_sitter', '~> 1.0'
92
103
  ```
93
104
 
94
105
  Or manually:
@@ -123,7 +134,7 @@ If you don't want to install from `rubygems`, `git`, or if you don't want to
123
134
  compile on install, then download a native gem from this repository's
124
135
  [releases](https://github.com/Faveod/ruby-tree-sitter/releases), or you can
125
136
  compile it yourself (see [Build from
126
- source](docs/Development.md#build-from-source) .)
137
+ source](docs/Contributing.md#build-from-source) .)
127
138
 
128
139
  In that case, you'd have to point your `Gemfile` to the `gem` as such:
129
140
 
@@ -131,6 +142,9 @@ In that case, you'd have to point your `Gemfile` to the `gem` as such:
131
142
  gem 'tree_sitter', path: 'path/to/native/tree_sitter.gem'
132
143
  ```
133
144
 
145
+ ⚠️ We're currently missing a lot of platforms and architectures. Cross-build
146
+ will come back in the near future.
147
+
134
148
  ### Parsers
135
149
 
136
150
  You will have to install parsers yourself, either by:
@@ -157,7 +171,7 @@ See `examples` directory.
157
171
 
158
172
  ## Development
159
173
 
160
- See [`docs/README.md`](docs/Development.md).
174
+ See [`docs/README.md`](docs/Contributing.md).
161
175
 
162
176
  ## 🚧 👷‍♀️ Notes 👷 🚧
163
177
 
@@ -172,7 +186,7 @@ don't copy them left and right, and then expect them to work without
172
186
  `SEGFAULT`ing and creating a black-hole in your living-room. Assume that you
173
187
  have to work locally with them. If you get a `SEGFAULT`, you can debug the
174
188
  native `C` code using `gdb`. You can read more on `SEGFAULT`s
175
- [here](docs/SIGSEGV.md), and debugging [here](docs/Development.md#Debugging).
189
+ [here](docs/SIGSEGV.md), and debugging [here](docs/Contributing.md#Debugging).
176
190
 
177
191
  That said, we do aim at providing an idiomatic `Ruby` interface. It should also
178
192
  provide a _safer_ interface, where you don't have to worry about when and how
@@ -30,7 +30,7 @@ dir_include, dir_lib =
30
30
  if system_tree_sitter?
31
31
  [
32
32
  %w[/opt/include /opt/local/include /usr/include /usr/local/include],
33
- %w[/opt/lib /opt/local/lib /usr/lib /usr/local/lib]
33
+ %w[/opt/lib /opt/local/lib /usr/lib /usr/local/lib],
34
34
  ]
35
35
  else
36
36
  repo = TreeSitter::Repo.new
@@ -113,6 +113,7 @@ static VALUE input_set_payload(VALUE self, VALUE payload) {
113
113
  return Qnil;
114
114
  }
115
115
 
116
+ // FIXME: Missing encoding and read!
116
117
  void init_input(void) {
117
118
  cInput = rb_define_class_under(mTreeSitter, "Input", rb_cObject);
118
119
 
@@ -25,45 +25,17 @@ VALUE new_language(const TSLanguage *language) {
25
25
  return res;
26
26
  }
27
27
 
28
- static VALUE language_symbol_count(VALUE self) {
29
- return UINT2NUM(ts_language_symbol_count(SELF));
30
- }
31
-
32
- static VALUE language_symbol_name(VALUE self, VALUE symbol) {
33
- return safe_str(ts_language_symbol_name(SELF, NUM2UINT(symbol)));
34
- }
35
-
36
- static VALUE language_symbol_for_name(VALUE self, VALUE string,
37
- VALUE is_named) {
38
- const char *str = rb_id2name(SYM2ID(string));
39
- uint32_t length = (uint32_t)strlen(str);
40
- bool named = RTEST(is_named);
41
- return UINT2NUM(ts_language_symbol_for_name(SELF, str, length, named));
42
- }
43
-
44
- static VALUE language_field_count(VALUE self) {
45
- return UINT2NUM(ts_language_field_count(SELF));
46
- }
47
-
48
- static VALUE language_field_name_for_id(VALUE self, VALUE field_id) {
49
- return safe_str(ts_language_field_name_for_id(SELF, NUM2UINT(field_id)));
50
- }
51
-
52
- static VALUE language_field_id_for_name(VALUE self, VALUE name) {
53
- TSLanguage *language = SELF;
54
- const char *str = StringValuePtr(name);
55
- uint32_t length = (uint32_t)RSTRING_LEN(name);
56
- return UINT2NUM(ts_language_field_id_for_name(language, str, length));
57
- }
58
-
59
- static VALUE language_symbol_type(VALUE self, VALUE symbol) {
60
- return new_symbol_type(ts_language_symbol_type(SELF, NUM2UINT(symbol)));
61
- }
62
-
63
- static VALUE language_version(VALUE self) {
64
- return UINT2NUM(ts_language_version(SELF));
65
- }
66
-
28
+ /**
29
+ * Load a language parser from disk.
30
+ *
31
+ * @raise [RuntimeError] if the parser was not found, or if it's incompatible
32
+ * with this gem.
33
+ *
34
+ * @param name [String] the parser's name.
35
+ * @param path [String] the parser's shared library (so, dylib) path on disk.
36
+ *
37
+ * @return [Language]
38
+ */
67
39
  static VALUE language_load(VALUE self, VALUE name, VALUE path) {
68
40
  VALUE path_s = rb_funcall(path, rb_intern("to_s"), 0);
69
41
  char *path_cstr = StringValueCStr(path_s);
@@ -113,22 +85,135 @@ static VALUE language_equal(VALUE self, VALUE other) {
113
85
  return this == that ? Qtrue : Qfalse;
114
86
  }
115
87
 
88
+ /**
89
+ * Get the number of distinct field names in the language.
90
+ *
91
+ * @return [Integer]
92
+ */
93
+ static VALUE language_field_count(VALUE self) {
94
+ return UINT2NUM(ts_language_field_count(SELF));
95
+ }
96
+
97
+ /**
98
+ * Get the numerical id for the given field name string.
99
+ *
100
+ * @param name [String]
101
+ *
102
+ * @return [Integer]
103
+ */
104
+ static VALUE language_field_id_for_name(VALUE self, VALUE name) {
105
+ TSLanguage *language = SELF;
106
+ const char *str = StringValuePtr(name);
107
+ uint32_t length = (uint32_t)RSTRING_LEN(name);
108
+ return UINT2NUM(ts_language_field_id_for_name(language, str, length));
109
+ }
110
+
111
+ /**
112
+ * Get the field name string for the given numerical id.
113
+ *
114
+ * @param field_id [Integer]
115
+ *
116
+ * @return [String]
117
+ */
118
+ static VALUE language_field_name_for_id(VALUE self, VALUE field_id) {
119
+ return safe_str(ts_language_field_name_for_id(SELF, NUM2UINT(field_id)));
120
+ }
121
+
122
+ /**
123
+ * Get the next parse state. Combine this with lookahead iterators to generate
124
+ * completion suggestions or valid symbols in error nodes. Use
125
+ * {Node#grammar_symbol} for valid symbols.
126
+ */
127
+ static VALUE language_next_state(VALUE self, VALUE state, VALUE symbol) {
128
+ uint16_t sta = (uint16_t)NUM2UINT(state);
129
+ uint16_t sym = (uint16_t)NUM2UINT(symbol);
130
+ return UINT2NUM(ts_language_next_state(SELF, sta, sym));
131
+ }
132
+
133
+ /**
134
+ * Get the number of distinct node types in the language.
135
+ *
136
+ * @return [Integer]
137
+ */
138
+ static VALUE language_symbol_count(VALUE self) {
139
+ return UINT2NUM(ts_language_symbol_count(SELF));
140
+ }
141
+
142
+ /**
143
+ * Get a node type string for the given numerical id.
144
+ *
145
+ * @param symbol [Integer]
146
+ *
147
+ * @return [String]
148
+ */
149
+ static VALUE language_symbol_name(VALUE self, VALUE symbol) {
150
+ return safe_str(ts_language_symbol_name(SELF, NUM2UINT(symbol)));
151
+ }
152
+
153
+ /**
154
+ * Get the numerical id for the given node type string.
155
+ *
156
+ * @param string [Symbol]
157
+ * @param is_named [Boolean]
158
+ *
159
+ * @return [Integer]
160
+ */
161
+ static VALUE language_symbol_for_name(VALUE self, VALUE string,
162
+ VALUE is_named) {
163
+ const char *str = rb_id2name(SYM2ID(string));
164
+ uint32_t length = (uint32_t)strlen(str);
165
+ bool named = RTEST(is_named);
166
+ return UINT2NUM(ts_language_symbol_for_name(SELF, str, length, named));
167
+ }
168
+
169
+ /**
170
+ * Check whether the given node type id belongs to named nodes, anonymous nodes,
171
+ * or a hidden nodes.
172
+ *
173
+ * Hidden nodes are never returned from the API.
174
+ *
175
+ * @see Node#named?
176
+ *
177
+ * @param symbol [Integer]
178
+ *
179
+ * @return [SymbolType]
180
+ */
181
+ static VALUE language_symbol_type(VALUE self, VALUE symbol) {
182
+ return new_symbol_type(ts_language_symbol_type(SELF, NUM2UINT(symbol)));
183
+ }
184
+
185
+ /**
186
+ * Get the ABI version number for this language. This version number is used
187
+ * to ensure that languages were generated by a compatible version of
188
+ * Tree-sitter.
189
+ *
190
+ * @see Parser#language=
191
+ */
192
+ static VALUE language_version(VALUE self) {
193
+ return UINT2NUM(ts_language_version(SELF));
194
+ }
195
+
116
196
  void init_language(void) {
117
197
  cLanguage = rb_define_class_under(mTreeSitter, "Language", rb_cObject);
118
198
 
119
199
  rb_define_alloc_func(cLanguage, language_allocate);
120
200
 
201
+ /* Module methods */
202
+ rb_define_module_function(cLanguage, "load", language_load, 2);
203
+
204
+ /* Operators */
205
+ rb_define_method(cLanguage, "==", language_equal, 1);
206
+
121
207
  /* Class methods */
122
- rb_define_method(cLanguage, "symbol_count", language_symbol_count, 0);
123
- rb_define_method(cLanguage, "symbol_name", language_symbol_name, 1);
124
- rb_define_method(cLanguage, "symbol_for_name", language_symbol_for_name, 2);
125
208
  rb_define_method(cLanguage, "field_count", language_field_count, 0);
126
- rb_define_method(cLanguage, "field_name_for_id", language_field_name_for_id,
127
- 1);
128
209
  rb_define_method(cLanguage, "field_id_for_name", language_field_id_for_name,
129
210
  1);
211
+ rb_define_method(cLanguage, "field_name_for_id", language_field_name_for_id,
212
+ 1);
213
+ rb_define_method(cLanguage, "next_state", language_next_state, 2);
214
+ rb_define_method(cLanguage, "symbol_count", language_symbol_count, 0);
215
+ rb_define_method(cLanguage, "symbol_for_name", language_symbol_for_name, 2);
216
+ rb_define_method(cLanguage, "symbol_name", language_symbol_name, 1);
130
217
  rb_define_method(cLanguage, "symbol_type", language_symbol_type, 1);
131
218
  rb_define_method(cLanguage, "version", language_version, 0);
132
- rb_define_module_function(cLanguage, "load", language_load, 2);
133
- rb_define_method(cLanguage, "==", language_equal, 1);
134
219
  }
@@ -147,19 +147,35 @@ static void logger_initialize_stderr(logger_t *logger) {
147
147
  }
148
148
  }
149
149
 
150
- // For now, we only take:
151
- // argv[0] = stream
152
- // argv[1] = format : String
153
- //
154
- // We need to add support for argv[1] : lambda/block
155
- //
156
- // case argv[1]
157
- // in lambda => lambda
158
- // in String => puts || printf || write
159
- // else => write
160
- // end
161
- //
150
+ /**
151
+ * Create a new logger.
152
+ *
153
+ * By default, it logs to stderr.
154
+ *
155
+ * You can provide your proper backend. You have to make sure that it
156
+ * exposes a +printf+, +puts+, or +write+ (lookup is done in that specific
157
+ * order). {StringIO} is a perfect candidate.
158
+ *
159
+ * You can also provide a format ({String}) if your backend supports a +printf+.
160
+ *
161
+ * @example
162
+ * backend = StringIO.new
163
+ * parser.logger = TreeSitter::Logger.new(backend)
164
+ *
165
+ * @param args [Array] The first argument is always a backend. The second
166
+ * argument is the format.
167
+ */
162
168
  static VALUE logger_initialize(int argc, VALUE *argv, VALUE self) {
169
+ // TODO:
170
+ // For now, we only take:
171
+ // argv[0] = stream
172
+ // argv[1] = format : String
173
+ // We need to add support for argv[1] : lambda/block
174
+ // case argv[1]
175
+ // in lambda => lambda
176
+ // in String => puts || printf || write
177
+ // else => write
178
+ // end
163
179
  logger_t *logger = unwrap(self);
164
180
 
165
181
  VALUE payload;