RubyGems - ruby_tree_sitter - Versions diffs - 0.20.8.3-x86_64-linux → 1.1.0-x86_64-linux - Mend

ruby_tree_sitter 0.20.8.3-x86_64-linux → 1.1.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

checksums.yaml +4 -4
data/LICENSE +2 -1
data/README.md +32 -18
data/ext/tree_sitter/extconf.rb +1 -1
data/ext/tree_sitter/input.c +1 -0
data/ext/tree_sitter/language.c +131 -46
data/ext/tree_sitter/logger.c +28 -12
data/ext/tree_sitter/node.c +438 -130
data/ext/tree_sitter/parser.c +232 -37
data/ext/tree_sitter/query.c +197 -72
data/ext/tree_sitter/query_cursor.c +140 -28
data/ext/tree_sitter/repo.rb +1 -1
data/ext/tree_sitter/tree.c +118 -34
data/ext/tree_sitter/tree_cursor.c +205 -33
data/ext/tree_sitter/tree_sitter.c +12 -0
data/lib/tree_sitter/node.rb +23 -6
data/lib/tree_sitter/tree_sitter.so +0 -0
data/lib/tree_sitter/version.rb +4 -2
data/lib/tree_sitter.rb +1 -0
data/lib/tree_stand/ast_modifier.rb +30 -0
data/lib/tree_stand/breadth_first_visitor.rb +54 -0
data/lib/tree_stand/config.rb +13 -0
data/lib/tree_stand/node.rb +224 -0
data/lib/tree_stand/parser.rb +67 -0
data/lib/tree_stand/range.rb +55 -0
data/lib/tree_stand/tree.rb +123 -0
data/lib/tree_stand/utils/printer.rb +73 -0
data/lib/tree_stand/version.rb +7 -0
data/lib/tree_stand/visitor.rb +127 -0
data/lib/tree_stand/visitors/tree_walker.rb +37 -0
data/lib/tree_stand.rb +48 -0
data/tree_sitter.gemspec +14 -11
metadata +36 -107
data/test/README.md +0 -15
data/test/test_helper.rb +0 -9
data/test/tree_sitter/js_test.rb +0 -48
data/test/tree_sitter/language_test.rb +0 -73
data/test/tree_sitter/logger_test.rb +0 -70
data/test/tree_sitter/node_test.rb +0 -411
data/test/tree_sitter/parser_test.rb +0 -140
data/test/tree_sitter/query_test.rb +0 -153
data/test/tree_sitter/tree_cursor_test.rb +0 -83
data/test/tree_sitter/tree_test.rb +0 -51

data/ext/tree_sitter/query_cursor.c CHANGED Viewed

@@ -16,9 +16,39 @@ static VALUE query_cursor_allocate(VALUE klass) {
   return res;
 }
 DATA_UNWRAP(query_cursor)
+/**
+ * Create a new cursor for executing a given query.
+ *
+ * The cursor stores the state that is needed to iteratively search
+ * for matches. To use the query cursor, first call {QueryCursor#exec}
+ * to start running a given query on a given syntax node. Then, there are
+ * two options for consuming the results of the query:
+ * 1. Repeatedly call {QueryCursor#next_match} to iterate over all of the
+ *    *matches* in the order that they were found. Each match contains the
+ *    index of the pattern that matched, and an array of captures. Because
+ *    multiple patterns can match the same set of nodes, one match may contain
+ *    captures that appear *before* some of the captures from a previous match.
+ * 2. Repeatedly call {QueryCursor#next_capture} to iterate over all of the
+ *    individual *captures* in the order that they appear. This is useful if
+ *    don't care about which pattern matched, and just want a single ordered
+ *    sequence of captures.
+ *
+ * If you don't care about consuming all of the results, you can stop calling
+ * {QueryCursor#next_match} or {QueryCursor#next_capture} at any point.
+ * You can then start executing another query on another node by calling
+ * {QueryCursor#exec} again.
+ */
 DATA_PTR_NEW(cQueryCursor, TSQueryCursor, query_cursor)
 DATA_FROM_VALUE(TSQueryCursor *, query_cursor)
+/**
+ * Start running a given query on a given node.
+ *
+ * @param query [Query]
+ * @param node  [Node]
+ *
+ * @return [QueryCursor]
+ */
 static VALUE query_cursor_exec(VALUE self, VALUE query, VALUE node) {
   VALUE res = query_cursor_allocate(cQueryCursor);
   query_cursor_t *query_cursor = unwrap(res);
@@ -27,49 +57,78 @@ static VALUE query_cursor_exec(VALUE self, VALUE query, VALUE node) {
   return res;
 }
+/**
+ * Manage the maximum number of in-progress matches allowed by this query
+ * cursor.
+ *
+ * Query cursors have an optional maximum capacity for storing lists of
+ * in-progress captures. If this capacity is exceeded, then the
+ * earliest-starting match will silently be dropped to make room for further
+ * matches. This maximum capacity is optional — by default, query cursors allow
+ * any number of pending matches, dynamically allocating new space for them as
+ * needed as the query is executed.
+ */
 static VALUE query_cursor_did_exceed_match_limit(VALUE self) {
   return ts_query_cursor_did_exceed_match_limit(SELF) ? Qtrue : Qfalse;
 }
-static VALUE query_cursor_get_match_limit(VALUE self) {
-  return UINT2NUM(ts_query_cursor_match_limit(SELF));
-}
+/**
+ * @param limit [Integer]
+ *
+ * @return [nil]
+ */
 static VALUE query_cursor_set_match_limit(VALUE self, VALUE limit) {
   ts_query_cursor_set_match_limit(SELF, NUM2UINT(limit));
   return Qnil;
 }
-static VALUE query_cursor_set_byte_range(VALUE self, VALUE from, VALUE to) {
-  ts_query_cursor_set_byte_range(SELF, NUM2UINT(from), NUM2UINT(to));
-  return Qnil;
-}
-static VALUE query_cursor_set_point_range(VALUE self, VALUE from, VALUE to) {
-  ts_query_cursor_set_point_range(SELF, value_to_point(from),
-                                  value_to_point(to));
-  return Qnil;
+/**
+ * @return [Integer]
+ */
+static VALUE query_cursor_get_match_limit(VALUE self) {
+  return UINT2NUM(ts_query_cursor_match_limit(SELF));
 }
-static VALUE query_cursor_next_match(VALUE self) {
-  TSQueryMatch match;
-  if (ts_query_cursor_next_match(SELF, &match)) {
-    return new_query_match(&match);
-  } else {
-    return Qnil;
+/**
+ * Set the maximum start depth for a query cursor.
+ *
+ * This prevents cursors from exploring children nodes at a certain depth.
+ * Note if a pattern includes many children, then they will still be checked.
+ *
+ * The zero max start depth value can be used as a special behavior and
+ * it helps to destructure a subtree by staying on a node and using captures
+ * for interested parts. Note that the zero max start depth only limit a search
+ * depth for a pattern's root node but other nodes that are parts of the pattern
+ * may be searched at any depth what defined by the pattern structure.
+ *
+ * @param max_start_depth [Integer|nil] set to nil to remove the maximum start
+ * depth.
+ *
+ * @return [nil]
+ */
+static VALUE query_cursor_set_max_start_depth(VALUE self,
+                                              VALUE max_start_depth) {
+  uint32_t max = UINT32_MAX;
+  if (!NIL_P(max_start_depth)) {
+    max = NUM2UINT(max_start_depth);
   }
-}
-static VALUE query_cursor_remove_match(VALUE self, VALUE id) {
-  ts_query_cursor_remove_match(SELF, NUM2UINT(id));
+  ts_query_cursor_set_max_start_depth(SELF, max);
   return Qnil;
 }
-// NOTE: maybe this is the limit of how "transparent" the bindings need to be.
+// FIXME: maybe this is the limit of how "transparent" the bindings need to be.
 // Pending benchmarks, this can be very inefficient because obviously
 // ts_query_cursor_next_capture is intended to be used in a loop.  Creating an
 // array of two values and returning them, intuitively speaking, seem very
 // inefficient.
+// FIXME: maybe this needs to return an empty array to make for a nicer ruby
+// API?
+/**
+ * Advance to the next capture of the currently running query.
+ *
+ * @return [Array<Integer|Boolean>|nil] If there is a capture, return a tuple
+ * [Integer, Boolean], otherwise return +nil+.
+ */
 static VALUE query_cursor_next_capture(VALUE self) {
   TSQueryMatch match;
   uint32_t index;
@@ -83,21 +142,74 @@ static VALUE query_cursor_next_capture(VALUE self) {
   }
 }
+/**
+ * Advance to the next match of the currently running query.
+ *
+ * @return [Boolean] Whether there's a match.
+ */
+static VALUE query_cursor_next_match(VALUE self) {
+  TSQueryMatch match;
+  if (ts_query_cursor_next_match(SELF, &match)) {
+    return new_query_match(&match);
+  } else {
+    return Qnil;
+  }
+}
+static VALUE query_cursor_remove_match(VALUE self, VALUE id) {
+  ts_query_cursor_remove_match(SELF, NUM2UINT(id));
+  return Qnil;
+}
+/**
+ * @param from [Integer]
+ * @param to   [Integer]
+ *
+ * @return [nil]
+ */
+static VALUE query_cursor_set_byte_range(VALUE self, VALUE from, VALUE to) {
+  ts_query_cursor_set_byte_range(SELF, NUM2UINT(from), NUM2UINT(to));
+  return Qnil;
+}
+/**
+ * @param from [Point]
+ * @param to   [Point]
+ *
+ * @return [nil]
+ */
+static VALUE query_cursor_set_point_range(VALUE self, VALUE from, VALUE to) {
+  ts_query_cursor_set_point_range(SELF, value_to_point(from),
+                                  value_to_point(to));
+  return Qnil;
+}
 void init_query_cursor(void) {
   cQueryCursor = rb_define_class_under(mTreeSitter, "QueryCursor", rb_cObject);
   rb_define_alloc_func(cQueryCursor, query_cursor_allocate);
+  /* Module methods */
+  rb_define_module_function(cQueryCursor, "exec", query_cursor_exec, 2);
   /* Class methods */
+  // Accessors
   DECLARE_ACCESSOR(cQueryCursor, query_cursor, match_limit)
-  rb_define_module_function(cQueryCursor, "exec", query_cursor_exec, 2);
+  // Other
   rb_define_method(cQueryCursor, "exceed_match_limit?",
                    query_cursor_did_exceed_match_limit, 0);
+  rb_define_method(cQueryCursor, "match_limit", query_cursor_get_match_limit,
+                   0);
+  rb_define_method(cQueryCursor, "match_limit=", query_cursor_set_match_limit,
+                   1);
+  rb_define_method(cQueryCursor,
+                   "max_start_depth=", query_cursor_set_max_start_depth, 1);
+  rb_define_method(cQueryCursor, "next_capture", query_cursor_next_capture, 0);
+  rb_define_method(cQueryCursor, "next_match", query_cursor_next_match, 0);
+  rb_define_method(cQueryCursor, "remove_match", query_cursor_remove_match, 1);
   rb_define_method(cQueryCursor, "set_byte_range", query_cursor_set_byte_range,
                    2);
   rb_define_method(cQueryCursor, "set_point_range",
                    query_cursor_set_point_range, 2);
-  rb_define_method(cQueryCursor, "next_match", query_cursor_next_match, 0);
-  rb_define_method(cQueryCursor, "remove_match", query_cursor_remove_match, 1);
-  rb_define_method(cQueryCursor, "next_capture", query_cursor_next_capture, 0);
 }

data/ext/tree_sitter/repo.rb CHANGED Viewed

@@ -17,7 +17,7 @@ module TreeSitter
       @url = {
         git: 'https://github.com/tree-sitter/tree-sitter',
         tar: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.tar.gz",
-        zip: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.zip"
+        zip: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.zip",
       }
       @exe = {}

data/ext/tree_sitter/tree.c CHANGED Viewed

@@ -63,40 +63,115 @@ VALUE new_tree(TSTree *ptr) {
 DATA_FROM_VALUE(TSTree *, tree)
-static VALUE tree_copy(VALUE self) { return new_tree(ts_tree_copy(SELF)); }
+/**
+ * Compare an old edited syntax tree to a new syntax tree representing the same
+ * document, returning an array of ranges whose syntactic structure has changed.
+ *
+ * For this to work correctly, the old syntax tree must have been edited such
+ * that its ranges match up to the new tree. Generally, you'll want to call
+ * this function right after calling one of the {Parser#parse} functions.
+ * You need to pass the old tree that was passed to parse, as well as the new
+ * tree that was returned from that function.
+ *
+ * @param old_tree [Tree]
+ * @param new_tree [Tree]
+ *
+ * @return [Array<Range>]
+ */
+static VALUE tree_changed_ranges(VALUE _self, VALUE old_tree, VALUE new_tree) {
+  TSTree *old = unwrap(old_tree)->data;
+  TSTree *new = unwrap(new_tree)->data;
+  uint32_t length;
+  TSRange *ranges = ts_tree_get_changed_ranges(old, new, &length);
+  VALUE res = rb_ary_new_capa(length);
-static VALUE tree_root_node(VALUE self) {
-  return new_node_by_val(ts_tree_root_node(SELF));
+  for (uint32_t i = 0; i < length; i++) {
+    rb_ary_push(res, new_range(&ranges[i]));
+  }
+  if (ranges) {
+    free(ranges);
+  }
+  return res;
 }
-static VALUE tree_language(VALUE self) {
-  return new_language(ts_tree_language(SELF));
+static VALUE tree_finalizer(VALUE _self) {
+  VALUE rc = rb_cv_get(cTree, "@@rc");
+  VALUE keys = rb_funcall(rc, rb_intern("keys"), 0);
+  long len = RARRAY_LEN(keys);
+  for (long i = 0; i < len; ++i) {
+    VALUE curr = RARRAY_AREF(keys, i);
+    unsigned int val = NUM2UINT(rb_hash_lookup(rc, curr));
+    if (val > 0) {
+      ts_tree_delete((TSTree *)NUM2ULONG(curr));
+    }
+    rb_hash_delete(rc, curr);
+  }
+  return Qnil;
 }
+/**
+ * Create a shallow copy of the syntax tree. This is very fast.
+ *
+ * You need to copy a syntax tree in order to use it on more than one thread at
+ * a time, as syntax trees are not thread safe.
+ *
+ * @return [Tree]
+ */
+static VALUE tree_copy(VALUE self) { return new_tree(ts_tree_copy(SELF)); }
+/**
+ * Edit the syntax tree to keep it in sync with source code that has been
+ * edited.
+ *
+ * You must describe the edit both in terms of byte offsets and in terms of
+ * (row, column) coordinates.
+ *
+ * @param edit [InputEdit]
+ *
+ * @return [nil]
+ */
 static VALUE tree_edit(VALUE self, VALUE edit) {
   TSInputEdit in = value_to_input_edit(edit);
   ts_tree_edit(SELF, &in);
   return Qnil;
 }
-static VALUE tree_changed_ranges(VALUE _self, VALUE old_tree, VALUE new_tree) {
-  TSTree *old = unwrap(old_tree)->data;
-  TSTree *new = unwrap(new_tree)->data;
+/**
+ * Get the array of included ranges that was used to parse the syntax tree.
+ *
+ * @return [Array<Range>]
+ */
+static VALUE included_ranges(VALUE self) {
   uint32_t length;
-  TSRange *ranges = ts_tree_get_changed_ranges(old, new, &length);
+  const TSRange *ranges = ts_tree_included_ranges(SELF, &length);
   VALUE res = rb_ary_new_capa(length);
   for (uint32_t i = 0; i < length; i++) {
     rb_ary_push(res, new_range(&ranges[i]));
   }
-  if (ranges) {
-    free(ranges);
-  }
   return res;
 }
+/**
+ * Get the language that was used to parse the syntax tree.
+ *
+ * @return [Language]
+ */
+static VALUE tree_language(VALUE self) {
+  return new_language(ts_tree_language(SELF));
+}
+/**
+ * Write a DOT graph describing the syntax tree to the given file.
+ *
+ * @param file [String]
+ *
+ * @return [nil]
+ */
 static VALUE tree_print_dot_graph(VALUE self, VALUE file) {
   Check_Type(file, T_STRING);
   char *path = StringValueCStr(file);
@@ -112,22 +187,26 @@ static VALUE tree_print_dot_graph(VALUE self, VALUE file) {
   return Qnil;
 }
-static VALUE tree_finalizer(VALUE _self) {
-  VALUE rc = rb_cv_get(cTree, "@@rc");
-  VALUE keys = rb_funcall(rc, rb_intern("keys"), 0);
-  long len = RARRAY_LEN(keys);
-  for (long i = 0; i < len; ++i) {
-    VALUE curr = RARRAY_AREF(keys, i);
-    unsigned int val = NUM2UINT(rb_hash_lookup(rc, curr));
-    if (val > 0) {
-      ts_tree_delete((TSTree *)NUM2ULONG(curr));
-    }
-    rb_hash_delete(rc, curr);
-  }
+/**
+ * Get the root node of the syntax tree.
+ *
+ * @return [Node]
+ */
+static VALUE tree_root_node(VALUE self) {
+  return new_node_by_val(ts_tree_root_node(SELF));
+}
-  return Qnil;
+/**
+ * Get the root node of the syntax tree, but with its position
+ * shifted forward by the given offset.
+ *
+ * @return [Node]
+ */
+static VALUE tree_root_node_with_offset(VALUE self, VALUE offset_bytes,
+                                        VALUE offset_extent) {
+  uint32_t bytes = NUM2UINT(offset_bytes);
+  TSPoint extent = value_to_point(offset_extent);
+  return new_node_by_val(ts_tree_root_node_with_offset(SELF, bytes, extent));
 }
 void init_tree(void) {
@@ -135,14 +214,19 @@ void init_tree(void) {
   rb_undef_alloc_func(cTree);
+  /* Module methods */
+  rb_define_module_function(cTree, "changed_ranges", tree_changed_ranges, 2);
+  rb_define_module_function(cTree, "finalizer", tree_finalizer, 0);
   /* Class methods */
   rb_define_method(cTree, "copy", tree_copy, 0);
-  rb_define_method(cTree, "root_node", tree_root_node, 0);
-  rb_define_method(cTree, "language", tree_language, 0);
   rb_define_method(cTree, "edit", tree_edit, 1);
-  rb_define_module_function(cTree, "changed_ranges", tree_changed_ranges, 2);
+  rb_define_method(cTree, "included_ranges", included_ranges, 0);
+  rb_define_method(cTree, "language", tree_language, 0);
   rb_define_method(cTree, "print_dot_graph", tree_print_dot_graph, 1);
-  rb_define_module_function(cTree, "finalizer", tree_finalizer, 0);
+  rb_define_method(cTree, "root_node", tree_root_node, 0);
+  rb_define_method(cTree, "root_node_with_offset", tree_root_node_with_offset,
+                   2);
   // Reference-count created trees
   VALUE rc = rb_hash_new();