ruby_tree_sitter 0.20.8.3-x86_64-linux → 1.0.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +2 -1
- data/README.md +32 -18
- data/ext/tree_sitter/extconf.rb +1 -1
- data/ext/tree_sitter/input.c +1 -0
- data/ext/tree_sitter/language.c +131 -46
- data/ext/tree_sitter/logger.c +28 -12
- data/ext/tree_sitter/node.c +438 -130
- data/ext/tree_sitter/parser.c +232 -37
- data/ext/tree_sitter/query.c +197 -72
- data/ext/tree_sitter/query_cursor.c +140 -28
- data/ext/tree_sitter/repo.rb +1 -1
- data/ext/tree_sitter/tree.c +118 -34
- data/ext/tree_sitter/tree_cursor.c +205 -33
- data/ext/tree_sitter/tree_sitter.c +12 -0
- data/lib/tree_sitter/node.rb +23 -6
- data/lib/tree_sitter/tree_sitter.so +0 -0
- data/lib/tree_sitter/version.rb +4 -2
- data/lib/tree_sitter.rb +1 -0
- data/lib/tree_stand/ast_modifier.rb +30 -0
- data/lib/tree_stand/breadth_first_visitor.rb +54 -0
- data/lib/tree_stand/config.rb +13 -0
- data/lib/tree_stand/node.rb +224 -0
- data/lib/tree_stand/parser.rb +67 -0
- data/lib/tree_stand/range.rb +55 -0
- data/lib/tree_stand/tree.rb +123 -0
- data/lib/tree_stand/utils/printer.rb +73 -0
- data/lib/tree_stand/version.rb +7 -0
- data/lib/tree_stand/visitor.rb +127 -0
- data/lib/tree_stand/visitors/tree_walker.rb +37 -0
- data/lib/tree_stand.rb +48 -0
- data/tree_sitter.gemspec +14 -11
- metadata +36 -107
- data/test/README.md +0 -15
- data/test/test_helper.rb +0 -9
- data/test/tree_sitter/js_test.rb +0 -48
- data/test/tree_sitter/language_test.rb +0 -73
- data/test/tree_sitter/logger_test.rb +0 -70
- data/test/tree_sitter/node_test.rb +0 -411
- data/test/tree_sitter/parser_test.rb +0 -140
- data/test/tree_sitter/query_test.rb +0 -153
- data/test/tree_sitter/tree_cursor_test.rb +0 -83
- data/test/tree_sitter/tree_test.rb +0 -51
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5237780e4fc6628d21e5671a2259320bbb307520a690edba10e776e57f3c886b
|
4
|
+
data.tar.gz: 730d07e399f4e5aee47e8c71a8dd346d83f77d0de4d2d00311a8cca7e2f554ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc2878922995a67d5e95029b135f3b9cefb5769e4bf82d5bc94f5e0a282f79429f4aedc7f2ea18301979aefceed576be52f940c357d3844e82e6f7fe872b5c36
|
7
|
+
data.tar.gz: 04cb391ecddc7323a53764aafda6d60789584d6b3dad0272ea9632108d2a7d55f491811e20bb7ce4241fbbd0f193a7a5f0a92899a5a2ca7b2a1e51264f9e3e3c
|
data/LICENSE
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
The MIT License (MIT)
|
2
2
|
|
3
|
-
Copyright (c)
|
3
|
+
Copyright (c) 2022-present, Faveod SAS.
|
4
|
+
Copyright (c) 2022-present, Shopify Inc.
|
4
5
|
|
5
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
7
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -10,6 +10,10 @@ very old, unmaintained, and don't work with modern `tree-sitter` APIs.
|
|
10
10
|
|
11
11
|
## Usage
|
12
12
|
|
13
|
+
### TreeSitter API
|
14
|
+
|
15
|
+
The TreeSitter API is a low-level Ruby binding for tree-sitter.
|
16
|
+
|
13
17
|
``` ruby
|
14
18
|
require 'tree_sitter'
|
15
19
|
|
@@ -28,32 +32,39 @@ root.each do |child|
|
|
28
32
|
end
|
29
33
|
```
|
30
34
|
|
31
|
-
|
35
|
+
The main philosophy behind the TreeSitter bindings is to do a 1:1 mapping between
|
36
|
+
tree-sitter's `C` API and `Ruby`, which makes it easier to experiment and port
|
37
|
+
ideas from different languages/bindings.
|
32
38
|
|
33
|
-
|
34
|
-
|
39
|
+
But it feels like writing some managed `C` with `Ruby`, and that's why we provide
|
40
|
+
a high-level API ([TreeStand](#treestand-api)) as well.
|
35
41
|
|
36
|
-
|
37
|
-
allows us to better experiment, and easily port ideas from other projects.
|
42
|
+
### TreeStand API
|
38
43
|
|
39
|
-
|
44
|
+
The TreeStand API is a high-level Ruby wrapper for the [TreeSitter](#treesitter-api) bindings. It
|
45
|
+
makes it easier to configure the parsers, and work with the underlying syntax tree.
|
46
|
+
```ruby
|
47
|
+
require 'tree_stand'
|
40
48
|
|
41
|
-
|
42
|
-
|
49
|
+
TreeStand.configure do
|
50
|
+
config.parser_path = "path/to/parser/folder/"
|
51
|
+
end
|
43
52
|
|
44
|
-
|
45
|
-
|
46
|
-
|
53
|
+
sql_parser = TreeStand::Parser.new("sql")
|
54
|
+
ruby_parser = TreeStand::Parser.new("ruby")
|
55
|
+
```
|
56
|
+
|
57
|
+
TreeStand provides an idiomatic Ruby interface to work with tree-sitter parsers.
|
47
58
|
|
48
59
|
## Dependencies
|
49
60
|
|
50
|
-
This gem is a binding for `tree-sitter`.
|
51
|
-
`tree-sitter` baked in it.
|
61
|
+
This gem is a binding for `tree-sitter`. It doesn't have a version of
|
62
|
+
`tree-sitter` baked in it by default.
|
52
63
|
|
53
64
|
You must install `tree-sitter` and make sure that their dynamic library is
|
54
65
|
accessible from `$PATH`, or build the gem with `--disable-sys-libs`, which will
|
55
66
|
download the latest tagged `tree-sitter` and build against it (see [Build from
|
56
|
-
source](docs/
|
67
|
+
source](docs/Contributing.md#build-from-source) .)
|
57
68
|
|
58
69
|
You can either install `tree-sitter` from source or through your go-to package manager.
|
59
70
|
|
@@ -88,7 +99,7 @@ brew install tree-sitter
|
|
88
99
|
From [rubygems](https://rubygems.org/gems/ruby_tree_sitter), in your `Gemfile`:
|
89
100
|
|
90
101
|
```ruby
|
91
|
-
gem 'ruby_tree_sitter', '~> 0
|
102
|
+
gem 'ruby_tree_sitter', '~> 1.0'
|
92
103
|
```
|
93
104
|
|
94
105
|
Or manually:
|
@@ -123,7 +134,7 @@ If you don't want to install from `rubygems`, `git`, or if you don't want to
|
|
123
134
|
compile on install, then download a native gem from this repository's
|
124
135
|
[releases](https://github.com/Faveod/ruby-tree-sitter/releases), or you can
|
125
136
|
compile it yourself (see [Build from
|
126
|
-
source](docs/
|
137
|
+
source](docs/Contributing.md#build-from-source) .)
|
127
138
|
|
128
139
|
In that case, you'd have to point your `Gemfile` to the `gem` as such:
|
129
140
|
|
@@ -131,6 +142,9 @@ In that case, you'd have to point your `Gemfile` to the `gem` as such:
|
|
131
142
|
gem 'tree_sitter', path: 'path/to/native/tree_sitter.gem'
|
132
143
|
```
|
133
144
|
|
145
|
+
⚠️ We're currently missing a lot of platforms and architectures. Cross-build
|
146
|
+
will come back in the near future.
|
147
|
+
|
134
148
|
### Parsers
|
135
149
|
|
136
150
|
You will have to install parsers yourself, either by:
|
@@ -157,7 +171,7 @@ See `examples` directory.
|
|
157
171
|
|
158
172
|
## Development
|
159
173
|
|
160
|
-
See [`docs/README.md`](docs/
|
174
|
+
See [`docs/README.md`](docs/Contributing.md).
|
161
175
|
|
162
176
|
## 🚧 👷♀️ Notes 👷 🚧
|
163
177
|
|
@@ -172,7 +186,7 @@ don't copy them left and right, and then expect them to work without
|
|
172
186
|
`SEGFAULT`ing and creating a black-hole in your living-room. Assume that you
|
173
187
|
have to work locally with them. If you get a `SEGFAULT`, you can debug the
|
174
188
|
native `C` code using `gdb`. You can read more on `SEGFAULT`s
|
175
|
-
[here](docs/SIGSEGV.md), and debugging [here](docs/
|
189
|
+
[here](docs/SIGSEGV.md), and debugging [here](docs/Contributing.md#Debugging).
|
176
190
|
|
177
191
|
That said, we do aim at providing an idiomatic `Ruby` interface. It should also
|
178
192
|
provide a _safer_ interface, where you don't have to worry about when and how
|
data/ext/tree_sitter/extconf.rb
CHANGED
@@ -30,7 +30,7 @@ dir_include, dir_lib =
|
|
30
30
|
if system_tree_sitter?
|
31
31
|
[
|
32
32
|
%w[/opt/include /opt/local/include /usr/include /usr/local/include],
|
33
|
-
%w[/opt/lib /opt/local/lib /usr/lib /usr/local/lib]
|
33
|
+
%w[/opt/lib /opt/local/lib /usr/lib /usr/local/lib],
|
34
34
|
]
|
35
35
|
else
|
36
36
|
repo = TreeSitter::Repo.new
|
data/ext/tree_sitter/input.c
CHANGED
data/ext/tree_sitter/language.c
CHANGED
@@ -25,45 +25,17 @@ VALUE new_language(const TSLanguage *language) {
|
|
25
25
|
return res;
|
26
26
|
}
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
uint32_t length = (uint32_t)strlen(str);
|
40
|
-
bool named = RTEST(is_named);
|
41
|
-
return UINT2NUM(ts_language_symbol_for_name(SELF, str, length, named));
|
42
|
-
}
|
43
|
-
|
44
|
-
static VALUE language_field_count(VALUE self) {
|
45
|
-
return UINT2NUM(ts_language_field_count(SELF));
|
46
|
-
}
|
47
|
-
|
48
|
-
static VALUE language_field_name_for_id(VALUE self, VALUE field_id) {
|
49
|
-
return safe_str(ts_language_field_name_for_id(SELF, NUM2UINT(field_id)));
|
50
|
-
}
|
51
|
-
|
52
|
-
static VALUE language_field_id_for_name(VALUE self, VALUE name) {
|
53
|
-
TSLanguage *language = SELF;
|
54
|
-
const char *str = StringValuePtr(name);
|
55
|
-
uint32_t length = (uint32_t)RSTRING_LEN(name);
|
56
|
-
return UINT2NUM(ts_language_field_id_for_name(language, str, length));
|
57
|
-
}
|
58
|
-
|
59
|
-
static VALUE language_symbol_type(VALUE self, VALUE symbol) {
|
60
|
-
return new_symbol_type(ts_language_symbol_type(SELF, NUM2UINT(symbol)));
|
61
|
-
}
|
62
|
-
|
63
|
-
static VALUE language_version(VALUE self) {
|
64
|
-
return UINT2NUM(ts_language_version(SELF));
|
65
|
-
}
|
66
|
-
|
28
|
+
/**
|
29
|
+
* Load a language parser from disk.
|
30
|
+
*
|
31
|
+
* @raise [RuntimeError] if the parser was not found, or if it's incompatible
|
32
|
+
* with this gem.
|
33
|
+
*
|
34
|
+
* @param name [String] the parser's name.
|
35
|
+
* @param path [String] the parser's shared library (so, dylib) path on disk.
|
36
|
+
*
|
37
|
+
* @return [Language]
|
38
|
+
*/
|
67
39
|
static VALUE language_load(VALUE self, VALUE name, VALUE path) {
|
68
40
|
VALUE path_s = rb_funcall(path, rb_intern("to_s"), 0);
|
69
41
|
char *path_cstr = StringValueCStr(path_s);
|
@@ -113,22 +85,135 @@ static VALUE language_equal(VALUE self, VALUE other) {
|
|
113
85
|
return this == that ? Qtrue : Qfalse;
|
114
86
|
}
|
115
87
|
|
88
|
+
/**
|
89
|
+
* Get the number of distinct field names in the language.
|
90
|
+
*
|
91
|
+
* @return [Integer]
|
92
|
+
*/
|
93
|
+
static VALUE language_field_count(VALUE self) {
|
94
|
+
return UINT2NUM(ts_language_field_count(SELF));
|
95
|
+
}
|
96
|
+
|
97
|
+
/**
|
98
|
+
* Get the numerical id for the given field name string.
|
99
|
+
*
|
100
|
+
* @param name [String]
|
101
|
+
*
|
102
|
+
* @return [Integer]
|
103
|
+
*/
|
104
|
+
static VALUE language_field_id_for_name(VALUE self, VALUE name) {
|
105
|
+
TSLanguage *language = SELF;
|
106
|
+
const char *str = StringValuePtr(name);
|
107
|
+
uint32_t length = (uint32_t)RSTRING_LEN(name);
|
108
|
+
return UINT2NUM(ts_language_field_id_for_name(language, str, length));
|
109
|
+
}
|
110
|
+
|
111
|
+
/**
|
112
|
+
* Get the field name string for the given numerical id.
|
113
|
+
*
|
114
|
+
* @param field_id [Integer]
|
115
|
+
*
|
116
|
+
* @return [String]
|
117
|
+
*/
|
118
|
+
static VALUE language_field_name_for_id(VALUE self, VALUE field_id) {
|
119
|
+
return safe_str(ts_language_field_name_for_id(SELF, NUM2UINT(field_id)));
|
120
|
+
}
|
121
|
+
|
122
|
+
/**
|
123
|
+
* Get the next parse state. Combine this with lookahead iterators to generate
|
124
|
+
* completion suggestions or valid symbols in error nodes. Use
|
125
|
+
* {Node#grammar_symbol} for valid symbols.
|
126
|
+
*/
|
127
|
+
static VALUE language_next_state(VALUE self, VALUE state, VALUE symbol) {
|
128
|
+
uint16_t sta = (uint16_t)NUM2UINT(state);
|
129
|
+
uint16_t sym = (uint16_t)NUM2UINT(symbol);
|
130
|
+
return UINT2NUM(ts_language_next_state(SELF, sta, sym));
|
131
|
+
}
|
132
|
+
|
133
|
+
/**
|
134
|
+
* Get the number of distinct node types in the language.
|
135
|
+
*
|
136
|
+
* @return [Integer]
|
137
|
+
*/
|
138
|
+
static VALUE language_symbol_count(VALUE self) {
|
139
|
+
return UINT2NUM(ts_language_symbol_count(SELF));
|
140
|
+
}
|
141
|
+
|
142
|
+
/**
|
143
|
+
* Get a node type string for the given numerical id.
|
144
|
+
*
|
145
|
+
* @param symbol [Integer]
|
146
|
+
*
|
147
|
+
* @return [String]
|
148
|
+
*/
|
149
|
+
static VALUE language_symbol_name(VALUE self, VALUE symbol) {
|
150
|
+
return safe_str(ts_language_symbol_name(SELF, NUM2UINT(symbol)));
|
151
|
+
}
|
152
|
+
|
153
|
+
/**
|
154
|
+
* Get the numerical id for the given node type string.
|
155
|
+
*
|
156
|
+
* @param string [Symbol]
|
157
|
+
* @param is_named [Boolean]
|
158
|
+
*
|
159
|
+
* @return [Integer]
|
160
|
+
*/
|
161
|
+
static VALUE language_symbol_for_name(VALUE self, VALUE string,
|
162
|
+
VALUE is_named) {
|
163
|
+
const char *str = rb_id2name(SYM2ID(string));
|
164
|
+
uint32_t length = (uint32_t)strlen(str);
|
165
|
+
bool named = RTEST(is_named);
|
166
|
+
return UINT2NUM(ts_language_symbol_for_name(SELF, str, length, named));
|
167
|
+
}
|
168
|
+
|
169
|
+
/**
|
170
|
+
* Check whether the given node type id belongs to named nodes, anonymous nodes,
|
171
|
+
* or a hidden nodes.
|
172
|
+
*
|
173
|
+
* Hidden nodes are never returned from the API.
|
174
|
+
*
|
175
|
+
* @see Node#named?
|
176
|
+
*
|
177
|
+
* @param symbol [Integer]
|
178
|
+
*
|
179
|
+
* @return [SymbolType]
|
180
|
+
*/
|
181
|
+
static VALUE language_symbol_type(VALUE self, VALUE symbol) {
|
182
|
+
return new_symbol_type(ts_language_symbol_type(SELF, NUM2UINT(symbol)));
|
183
|
+
}
|
184
|
+
|
185
|
+
/**
|
186
|
+
* Get the ABI version number for this language. This version number is used
|
187
|
+
* to ensure that languages were generated by a compatible version of
|
188
|
+
* Tree-sitter.
|
189
|
+
*
|
190
|
+
* @see Parser#language=
|
191
|
+
*/
|
192
|
+
static VALUE language_version(VALUE self) {
|
193
|
+
return UINT2NUM(ts_language_version(SELF));
|
194
|
+
}
|
195
|
+
|
116
196
|
void init_language(void) {
|
117
197
|
cLanguage = rb_define_class_under(mTreeSitter, "Language", rb_cObject);
|
118
198
|
|
119
199
|
rb_define_alloc_func(cLanguage, language_allocate);
|
120
200
|
|
201
|
+
/* Module methods */
|
202
|
+
rb_define_module_function(cLanguage, "load", language_load, 2);
|
203
|
+
|
204
|
+
/* Operators */
|
205
|
+
rb_define_method(cLanguage, "==", language_equal, 1);
|
206
|
+
|
121
207
|
/* Class methods */
|
122
|
-
rb_define_method(cLanguage, "symbol_count", language_symbol_count, 0);
|
123
|
-
rb_define_method(cLanguage, "symbol_name", language_symbol_name, 1);
|
124
|
-
rb_define_method(cLanguage, "symbol_for_name", language_symbol_for_name, 2);
|
125
208
|
rb_define_method(cLanguage, "field_count", language_field_count, 0);
|
126
|
-
rb_define_method(cLanguage, "field_name_for_id", language_field_name_for_id,
|
127
|
-
1);
|
128
209
|
rb_define_method(cLanguage, "field_id_for_name", language_field_id_for_name,
|
129
210
|
1);
|
211
|
+
rb_define_method(cLanguage, "field_name_for_id", language_field_name_for_id,
|
212
|
+
1);
|
213
|
+
rb_define_method(cLanguage, "next_state", language_next_state, 2);
|
214
|
+
rb_define_method(cLanguage, "symbol_count", language_symbol_count, 0);
|
215
|
+
rb_define_method(cLanguage, "symbol_for_name", language_symbol_for_name, 2);
|
216
|
+
rb_define_method(cLanguage, "symbol_name", language_symbol_name, 1);
|
130
217
|
rb_define_method(cLanguage, "symbol_type", language_symbol_type, 1);
|
131
218
|
rb_define_method(cLanguage, "version", language_version, 0);
|
132
|
-
rb_define_module_function(cLanguage, "load", language_load, 2);
|
133
|
-
rb_define_method(cLanguage, "==", language_equal, 1);
|
134
219
|
}
|
data/ext/tree_sitter/logger.c
CHANGED
@@ -147,19 +147,35 @@ static void logger_initialize_stderr(logger_t *logger) {
|
|
147
147
|
}
|
148
148
|
}
|
149
149
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
150
|
+
/**
|
151
|
+
* Create a new logger.
|
152
|
+
*
|
153
|
+
* By default, it logs to stderr.
|
154
|
+
*
|
155
|
+
* You can provide your proper backend. You have to make sure that it
|
156
|
+
* exposes a +printf+, +puts+, or +write+ (lookup is done in that specific
|
157
|
+
* order). {StringIO} is a perfect candidate.
|
158
|
+
*
|
159
|
+
* You can also provide a format ({String}) if your backend supports a +printf+.
|
160
|
+
*
|
161
|
+
* @example
|
162
|
+
* backend = StringIO.new
|
163
|
+
* parser.logger = TreeSitter::Logger.new(backend)
|
164
|
+
*
|
165
|
+
* @param args [Array] The first argument is always a backend. The second
|
166
|
+
* argument is the format.
|
167
|
+
*/
|
162
168
|
static VALUE logger_initialize(int argc, VALUE *argv, VALUE self) {
|
169
|
+
// TODO:
|
170
|
+
// For now, we only take:
|
171
|
+
// argv[0] = stream
|
172
|
+
// argv[1] = format : String
|
173
|
+
// We need to add support for argv[1] : lambda/block
|
174
|
+
// case argv[1]
|
175
|
+
// in lambda => lambda
|
176
|
+
// in String => puts || printf || write
|
177
|
+
// else => write
|
178
|
+
// end
|
163
179
|
logger_t *logger = unwrap(self);
|
164
180
|
|
165
181
|
VALUE payload;
|