ruby_tree_sitter 0.20.8.2-x86_64-darwin-20 → 1.0.0-x86_64-darwin-20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +2 -1
  3. data/README.md +32 -18
  4. data/ext/tree_sitter/extconf.rb +1 -43
  5. data/ext/tree_sitter/input.c +1 -0
  6. data/ext/tree_sitter/language.c +131 -46
  7. data/ext/tree_sitter/logger.c +28 -12
  8. data/ext/tree_sitter/node.c +438 -130
  9. data/ext/tree_sitter/parser.c +232 -37
  10. data/ext/tree_sitter/query.c +197 -72
  11. data/ext/tree_sitter/query_cursor.c +140 -28
  12. data/ext/tree_sitter/repo.rb +121 -0
  13. data/ext/tree_sitter/tree.c +118 -34
  14. data/ext/tree_sitter/tree_cursor.c +205 -33
  15. data/ext/tree_sitter/tree_sitter.c +12 -0
  16. data/lib/tree_sitter/node.rb +43 -9
  17. data/lib/tree_sitter/tree_sitter.bundle +0 -0
  18. data/lib/tree_sitter/version.rb +4 -2
  19. data/lib/tree_sitter.rb +1 -0
  20. data/lib/tree_stand/ast_modifier.rb +30 -0
  21. data/lib/tree_stand/breadth_first_visitor.rb +54 -0
  22. data/lib/tree_stand/config.rb +13 -0
  23. data/lib/tree_stand/node.rb +224 -0
  24. data/lib/tree_stand/parser.rb +67 -0
  25. data/lib/tree_stand/range.rb +55 -0
  26. data/lib/tree_stand/tree.rb +123 -0
  27. data/lib/tree_stand/utils/printer.rb +73 -0
  28. data/lib/tree_stand/version.rb +7 -0
  29. data/lib/tree_stand/visitor.rb +127 -0
  30. data/lib/tree_stand/visitors/tree_walker.rb +37 -0
  31. data/lib/tree_stand.rb +48 -0
  32. data/tree_sitter.gemspec +15 -12
  33. metadata +37 -107
  34. data/test/README.md +0 -15
  35. data/test/test_helper.rb +0 -9
  36. data/test/tree_sitter/js_test.rb +0 -48
  37. data/test/tree_sitter/language_test.rb +0 -73
  38. data/test/tree_sitter/logger_test.rb +0 -70
  39. data/test/tree_sitter/node_test.rb +0 -355
  40. data/test/tree_sitter/parser_test.rb +0 -140
  41. data/test/tree_sitter/query_test.rb +0 -153
  42. data/test/tree_sitter/tree_cursor_test.rb +0 -83
  43. data/test/tree_sitter/tree_test.rb +0 -51
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0b842a2305f9adfe075436d7fc21572e90ccf498419cf686f5a705110da630d1
4
- data.tar.gz: 352cc3aa0d0d3ef67e60aeb023b633c400e9bb77f674695f729a5fc187cb2555
3
+ metadata.gz: c04e0d228ece9018552b37e99dc47435ea0619b04517d435d14989052a911e66
4
+ data.tar.gz: 51c6ffb6bc12033a384e3f6a1e934c16201892de2a5fb03a091cdabde0493123
5
5
  SHA512:
6
- metadata.gz: e001b5024326bb8bbcf8083b27982cdd6aff01a3671c5fbb454f519d90899bc20ba4cf67a465e932904243efaeca51077445895f0ec489ced6767e059083af02
7
- data.tar.gz: 6d4256ab4da4fe855c20bff4340f90869a7cfc414b3e2a30c6c1325c47159ef184fcb94f356dfd5792194e332b1fcada85991d8a2c3a57f502daae63693c7e5d
6
+ metadata.gz: c2312869308f29643285c83086a7288153d9943a93009fdeebae691ea2c3a0d3f53411e040638b1da85c9b53d033d7dc61659bd0c0e7c2376acaa146342afcf5
7
+ data.tar.gz: e742289452f9caac7a79565c4e004dd70040dc6fe7f431be535ea0bf098f6150a2196b64a342dfefd2beac802b46398ec12fa9b7afd443db04472328a8ed7295
data/LICENSE CHANGED
@@ -1,6 +1,7 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2018-2021 Max Brunsfeld
3
+ Copyright (c) 2022-present, Faveod SAS.
4
+ Copyright (c) 2022-present, Shopify Inc.
4
5
 
5
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -10,6 +10,10 @@ very old, unmaintained, and don't work with modern `tree-sitter` APIs.
10
10
 
11
11
  ## Usage
12
12
 
13
+ ### TreeSitter API
14
+
15
+ The TreeSitter API is a low-level Ruby binding for tree-sitter.
16
+
13
17
  ``` ruby
14
18
  require 'tree_sitter'
15
19
 
@@ -28,32 +32,39 @@ root.each do |child|
28
32
  end
29
33
  ```
30
34
 
31
- ## About
35
+ The main philosophy behind the TreeSitter bindings is to do a 1:1 mapping between
36
+ tree-sitter's `C` API and `Ruby`, which makes it easier to experiment and port
37
+ ideas from different languages/bindings.
32
38
 
33
- The main philosophy behind these bindings is to do a 1:1 mapping between
34
- tree-sitter's `C` API and `Ruby`.
39
+ But it feels like writing some managed `C` with `Ruby`, and that's why we provide
40
+ a high-level API ([TreeStand](#treestand-api)) as well.
35
41
 
36
- It doesn't mean that we're going to yield the best perormance, but this design
37
- allows us to better experiment, and easily port ideas from other projects.
42
+ ### TreeStand API
38
43
 
39
- ### Versioning
44
+ The TreeStand API is a high-level Ruby wrapper for the [TreeSitter](#treesitter-api) bindings. It
45
+ makes it easier to configure the parsers, and work with the underlying syntax tree.
46
+ ```ruby
47
+ require 'tree_stand'
40
48
 
41
- This gem follows the `tree-sitter` versioning scheme, and appends its own
42
- version at the end.
49
+ TreeStand.configure do
50
+ config.parser_path = "path/to/parser/folder/"
51
+ end
43
52
 
44
- For instance, `tree-sitter` is now at version `0.20.8`, so this gem's version
45
- will be `0.20.8.x` where x is incremented with every notable batch of
46
- bugfixes or some ruby-only additions.
53
+ sql_parser = TreeStand::Parser.new("sql")
54
+ ruby_parser = TreeStand::Parser.new("ruby")
55
+ ```
56
+
57
+ TreeStand provides an idiomatic Ruby interface to work with tree-sitter parsers.
47
58
 
48
59
  ## Dependencies
49
60
 
50
- This gem is a binding for `tree-sitter`. It doesn't have a version of
51
- `tree-sitter` baked in it.
61
+ This gem is a binding for `tree-sitter`. It doesn't have a version of
62
+ `tree-sitter` baked in it by default.
52
63
 
53
64
  You must install `tree-sitter` and make sure that their dynamic library is
54
65
  accessible from `$PATH`, or build the gem with `--disable-sys-libs`, which will
55
66
  download the latest tagged `tree-sitter` and build against it (see [Build from
56
- source](docs/Development.md#build-from-source) .)
67
+ source](docs/Contributing.md#build-from-source) .)
57
68
 
58
69
  You can either install `tree-sitter` from source or through your go-to package manager.
59
70
 
@@ -88,7 +99,7 @@ brew install tree-sitter
88
99
  From [rubygems](https://rubygems.org/gems/ruby_tree_sitter), in your `Gemfile`:
89
100
 
90
101
  ```ruby
91
- gem 'ruby_tree_sitter', '~> 0.20.8.1'
102
+ gem 'ruby_tree_sitter', '~> 1.0'
92
103
  ```
93
104
 
94
105
  Or manually:
@@ -123,7 +134,7 @@ If you don't want to install from `rubygems`, `git`, or if you don't want to
123
134
  compile on install, then download a native gem from this repository's
124
135
  [releases](https://github.com/Faveod/ruby-tree-sitter/releases), or you can
125
136
  compile it yourself (see [Build from
126
- source](docs/Development.md#build-from-source) .)
137
+ source](docs/Contributing.md#build-from-source) .)
127
138
 
128
139
  In that case, you'd have to point your `Gemfile` to the `gem` as such:
129
140
 
@@ -131,6 +142,9 @@ In that case, you'd have to point your `Gemfile` to the `gem` as such:
131
142
  gem 'tree_sitter', path: 'path/to/native/tree_sitter.gem'
132
143
  ```
133
144
 
145
+ ⚠️ We're currently missing a lot of platforms and architectures. Cross-build
146
+ will come back in the near future.
147
+
134
148
  ### Parsers
135
149
 
136
150
  You will have to install parsers yourself, either by:
@@ -157,7 +171,7 @@ See `examples` directory.
157
171
 
158
172
  ## Development
159
173
 
160
- See [`docs/README.md`](docs/Development.md).
174
+ See [`docs/README.md`](docs/Contributing.md).
161
175
 
162
176
  ## 🚧 👷‍♀️ Notes 👷 🚧
163
177
 
@@ -172,7 +186,7 @@ don't copy them left and right, and then expect them to work without
172
186
  `SEGFAULT`ing and creating a black-hole in your living-room. Assume that you
173
187
  have to work locally with them. If you get a `SEGFAULT`, you can debug the
174
188
  native `C` code using `gdb`. You can read more on `SEGFAULT`s
175
- [here](docs/SIGSEGV.md), and debugging [here](docs/Development.md#Debugging).
189
+ [here](docs/SIGSEGV.md), and debugging [here](docs/Contributing.md#Debugging).
176
190
 
177
191
  That said, we do aim at providing an idiomatic `Ruby` interface. It should also
178
192
  provide a _safer_ interface, where you don't have to worry about when and how
@@ -30,7 +30,7 @@ dir_include, dir_lib =
30
30
  if system_tree_sitter?
31
31
  [
32
32
  %w[/opt/include /opt/local/include /usr/include /usr/local/include],
33
- %w[/opt/lib /opt/local/lib /usr/lib /usr/local/lib]
33
+ %w[/opt/lib /opt/local/lib /usr/lib /usr/local/lib],
34
34
  ]
35
35
  else
36
36
  repo = TreeSitter::Repo.new
@@ -53,48 +53,6 @@ dir_include, dir_lib =
53
53
  repo.include_and_lib_dirs
54
54
  end
55
55
 
56
- # TREESITTER_SPEC = Bundler.load_gemspec('../../../../tree_sitter.gemspec')
57
-
58
- # # def version = TREESITTER_SPEC.version.gsub(/\A(\d+\.\d+\.\d+)(\.\d+)?\z/, '\1')
59
-
60
- # def version = '0.20.8'
61
-
62
- # LINUX_PLATFORM_REGEX = /linux/
63
- # DARWIN_PLATFORM_REGEX = /darwin/
64
-
65
- # def platform = RUBY_PLATFORM
66
-
67
- # def darwin?
68
- # !!(platform =~ DARWIN_PLATFORM_REGEX)
69
- # end
70
-
71
- # def dll_ext
72
- # darwin? ? 'dylib' : 'so'
73
- # end
74
-
75
- # def staging_path
76
- # '../../stage/lib/tree_sitter'
77
- # end
78
-
79
- # def downloaded_dll_path
80
- # "tree-sitter-#{version}"
81
- # end
82
-
83
- # def add_tree_sitter_dll_to_gem
84
- # puts ">>>>>>>>>>>>> #{`pwd`}"
85
- # path = "#{downloaded_dll_path}/libtree-sitter*.#{dll_ext}"
86
- # files =
87
- # Dir.glob(path)
88
- # .map { |f| Pathname(f) }
89
- # .filter { |f| !f.symlink? && f.file? }
90
- # dll = files.first
91
- # dst = Pathname(staging_path) / "libtree-sitter.#{dll_ext}"
92
- # FileUtils.cp(dll, dst)
93
- # TREESITTER_SPEC.files << dst
94
- # end
95
-
96
- # add_tree_sitter_dll_to_gem
97
-
98
56
  # ################################## #
99
57
  # Generate Makefile #
100
58
  # ################################## #
@@ -113,6 +113,7 @@ static VALUE input_set_payload(VALUE self, VALUE payload) {
113
113
  return Qnil;
114
114
  }
115
115
 
116
+ // FIXME: Missing encoding and read!
116
117
  void init_input(void) {
117
118
  cInput = rb_define_class_under(mTreeSitter, "Input", rb_cObject);
118
119
 
@@ -25,45 +25,17 @@ VALUE new_language(const TSLanguage *language) {
25
25
  return res;
26
26
  }
27
27
 
28
- static VALUE language_symbol_count(VALUE self) {
29
- return UINT2NUM(ts_language_symbol_count(SELF));
30
- }
31
-
32
- static VALUE language_symbol_name(VALUE self, VALUE symbol) {
33
- return safe_str(ts_language_symbol_name(SELF, NUM2UINT(symbol)));
34
- }
35
-
36
- static VALUE language_symbol_for_name(VALUE self, VALUE string,
37
- VALUE is_named) {
38
- const char *str = rb_id2name(SYM2ID(string));
39
- uint32_t length = (uint32_t)strlen(str);
40
- bool named = RTEST(is_named);
41
- return UINT2NUM(ts_language_symbol_for_name(SELF, str, length, named));
42
- }
43
-
44
- static VALUE language_field_count(VALUE self) {
45
- return UINT2NUM(ts_language_field_count(SELF));
46
- }
47
-
48
- static VALUE language_field_name_for_id(VALUE self, VALUE field_id) {
49
- return safe_str(ts_language_field_name_for_id(SELF, NUM2UINT(field_id)));
50
- }
51
-
52
- static VALUE language_field_id_for_name(VALUE self, VALUE name) {
53
- TSLanguage *language = SELF;
54
- const char *str = StringValuePtr(name);
55
- uint32_t length = (uint32_t)RSTRING_LEN(name);
56
- return UINT2NUM(ts_language_field_id_for_name(language, str, length));
57
- }
58
-
59
- static VALUE language_symbol_type(VALUE self, VALUE symbol) {
60
- return new_symbol_type(ts_language_symbol_type(SELF, NUM2UINT(symbol)));
61
- }
62
-
63
- static VALUE language_version(VALUE self) {
64
- return UINT2NUM(ts_language_version(SELF));
65
- }
66
-
28
+ /**
29
+ * Load a language parser from disk.
30
+ *
31
+ * @raise [RuntimeError] if the parser was not found, or if it's incompatible
32
+ * with this gem.
33
+ *
34
+ * @param name [String] the parser's name.
35
+ * @param path [String] the parser's shared library (so, dylib) path on disk.
36
+ *
37
+ * @return [Language]
38
+ */
67
39
  static VALUE language_load(VALUE self, VALUE name, VALUE path) {
68
40
  VALUE path_s = rb_funcall(path, rb_intern("to_s"), 0);
69
41
  char *path_cstr = StringValueCStr(path_s);
@@ -113,22 +85,135 @@ static VALUE language_equal(VALUE self, VALUE other) {
113
85
  return this == that ? Qtrue : Qfalse;
114
86
  }
115
87
 
88
+ /**
89
+ * Get the number of distinct field names in the language.
90
+ *
91
+ * @return [Integer]
92
+ */
93
+ static VALUE language_field_count(VALUE self) {
94
+ return UINT2NUM(ts_language_field_count(SELF));
95
+ }
96
+
97
+ /**
98
+ * Get the numerical id for the given field name string.
99
+ *
100
+ * @param name [String]
101
+ *
102
+ * @return [Integer]
103
+ */
104
+ static VALUE language_field_id_for_name(VALUE self, VALUE name) {
105
+ TSLanguage *language = SELF;
106
+ const char *str = StringValuePtr(name);
107
+ uint32_t length = (uint32_t)RSTRING_LEN(name);
108
+ return UINT2NUM(ts_language_field_id_for_name(language, str, length));
109
+ }
110
+
111
+ /**
112
+ * Get the field name string for the given numerical id.
113
+ *
114
+ * @param field_id [Integer]
115
+ *
116
+ * @return [String]
117
+ */
118
+ static VALUE language_field_name_for_id(VALUE self, VALUE field_id) {
119
+ return safe_str(ts_language_field_name_for_id(SELF, NUM2UINT(field_id)));
120
+ }
121
+
122
+ /**
123
+ * Get the next parse state. Combine this with lookahead iterators to generate
124
+ * completion suggestions or valid symbols in error nodes. Use
125
+ * {Node#grammar_symbol} for valid symbols.
126
+ */
127
+ static VALUE language_next_state(VALUE self, VALUE state, VALUE symbol) {
128
+ uint16_t sta = (uint16_t)NUM2UINT(state);
129
+ uint16_t sym = (uint16_t)NUM2UINT(symbol);
130
+ return UINT2NUM(ts_language_next_state(SELF, sta, sym));
131
+ }
132
+
133
+ /**
134
+ * Get the number of distinct node types in the language.
135
+ *
136
+ * @return [Integer]
137
+ */
138
+ static VALUE language_symbol_count(VALUE self) {
139
+ return UINT2NUM(ts_language_symbol_count(SELF));
140
+ }
141
+
142
+ /**
143
+ * Get a node type string for the given numerical id.
144
+ *
145
+ * @param symbol [Integer]
146
+ *
147
+ * @return [String]
148
+ */
149
+ static VALUE language_symbol_name(VALUE self, VALUE symbol) {
150
+ return safe_str(ts_language_symbol_name(SELF, NUM2UINT(symbol)));
151
+ }
152
+
153
+ /**
154
+ * Get the numerical id for the given node type string.
155
+ *
156
+ * @param string [Symbol]
157
+ * @param is_named [Boolean]
158
+ *
159
+ * @return [Integer]
160
+ */
161
+ static VALUE language_symbol_for_name(VALUE self, VALUE string,
162
+ VALUE is_named) {
163
+ const char *str = rb_id2name(SYM2ID(string));
164
+ uint32_t length = (uint32_t)strlen(str);
165
+ bool named = RTEST(is_named);
166
+ return UINT2NUM(ts_language_symbol_for_name(SELF, str, length, named));
167
+ }
168
+
169
+ /**
170
+ * Check whether the given node type id belongs to named nodes, anonymous nodes,
171
+ * or a hidden nodes.
172
+ *
173
+ * Hidden nodes are never returned from the API.
174
+ *
175
+ * @see Node#named?
176
+ *
177
+ * @param symbol [Integer]
178
+ *
179
+ * @return [SymbolType]
180
+ */
181
+ static VALUE language_symbol_type(VALUE self, VALUE symbol) {
182
+ return new_symbol_type(ts_language_symbol_type(SELF, NUM2UINT(symbol)));
183
+ }
184
+
185
+ /**
186
+ * Get the ABI version number for this language. This version number is used
187
+ * to ensure that languages were generated by a compatible version of
188
+ * Tree-sitter.
189
+ *
190
+ * @see Parser#language=
191
+ */
192
+ static VALUE language_version(VALUE self) {
193
+ return UINT2NUM(ts_language_version(SELF));
194
+ }
195
+
116
196
  void init_language(void) {
117
197
  cLanguage = rb_define_class_under(mTreeSitter, "Language", rb_cObject);
118
198
 
119
199
  rb_define_alloc_func(cLanguage, language_allocate);
120
200
 
201
+ /* Module methods */
202
+ rb_define_module_function(cLanguage, "load", language_load, 2);
203
+
204
+ /* Operators */
205
+ rb_define_method(cLanguage, "==", language_equal, 1);
206
+
121
207
  /* Class methods */
122
- rb_define_method(cLanguage, "symbol_count", language_symbol_count, 0);
123
- rb_define_method(cLanguage, "symbol_name", language_symbol_name, 1);
124
- rb_define_method(cLanguage, "symbol_for_name", language_symbol_for_name, 2);
125
208
  rb_define_method(cLanguage, "field_count", language_field_count, 0);
126
- rb_define_method(cLanguage, "field_name_for_id", language_field_name_for_id,
127
- 1);
128
209
  rb_define_method(cLanguage, "field_id_for_name", language_field_id_for_name,
129
210
  1);
211
+ rb_define_method(cLanguage, "field_name_for_id", language_field_name_for_id,
212
+ 1);
213
+ rb_define_method(cLanguage, "next_state", language_next_state, 2);
214
+ rb_define_method(cLanguage, "symbol_count", language_symbol_count, 0);
215
+ rb_define_method(cLanguage, "symbol_for_name", language_symbol_for_name, 2);
216
+ rb_define_method(cLanguage, "symbol_name", language_symbol_name, 1);
130
217
  rb_define_method(cLanguage, "symbol_type", language_symbol_type, 1);
131
218
  rb_define_method(cLanguage, "version", language_version, 0);
132
- rb_define_module_function(cLanguage, "load", language_load, 2);
133
- rb_define_method(cLanguage, "==", language_equal, 1);
134
219
  }
@@ -147,19 +147,35 @@ static void logger_initialize_stderr(logger_t *logger) {
147
147
  }
148
148
  }
149
149
 
150
- // For now, we only take:
151
- // argv[0] = stream
152
- // argv[1] = format : String
153
- //
154
- // We need to add support for argv[1] : lambda/block
155
- //
156
- // case argv[1]
157
- // in lambda => lambda
158
- // in String => puts || printf || write
159
- // else => write
160
- // end
161
- //
150
+ /**
151
+ * Create a new logger.
152
+ *
153
+ * By default, it logs to stderr.
154
+ *
155
+ * You can provide your proper backend. You have to make sure that it
156
+ * exposes a +printf+, +puts+, or +write+ (lookup is done in that specific
157
+ * order). {StringIO} is a perfect candidate.
158
+ *
159
+ * You can also provide a format ({String}) if your backend supports a +printf+.
160
+ *
161
+ * @example
162
+ * backend = StringIO.new
163
+ * parser.logger = TreeSitter::Logger.new(backend)
164
+ *
165
+ * @param args [Array] The first argument is always a backend. The second
166
+ * argument is the format.
167
+ */
162
168
  static VALUE logger_initialize(int argc, VALUE *argv, VALUE self) {
169
+ // TODO:
170
+ // For now, we only take:
171
+ // argv[0] = stream
172
+ // argv[1] = format : String
173
+ // We need to add support for argv[1] : lambda/block
174
+ // case argv[1]
175
+ // in lambda => lambda
176
+ // in String => puts || printf || write
177
+ // else => write
178
+ // end
163
179
  logger_t *logger = unwrap(self);
164
180
 
165
181
  VALUE payload;