RubyGems - ruby_tree_sitter - Versions diffs - 0.20.8.3 → 1.1.0 - Mend

ruby_tree_sitter 0.20.8.3 → 1.1.0

Files changed (42) hide show

checksums.yaml +4 -4
data/LICENSE +2 -1
data/README.md +32 -18
data/ext/tree_sitter/extconf.rb +1 -1
data/ext/tree_sitter/input.c +1 -0
data/ext/tree_sitter/language.c +131 -46
data/ext/tree_sitter/logger.c +28 -12
data/ext/tree_sitter/node.c +438 -130
data/ext/tree_sitter/parser.c +232 -37
data/ext/tree_sitter/query.c +197 -72
data/ext/tree_sitter/query_cursor.c +140 -28
data/ext/tree_sitter/repo.rb +1 -1
data/ext/tree_sitter/tree.c +118 -34
data/ext/tree_sitter/tree_cursor.c +205 -33
data/ext/tree_sitter/tree_sitter.c +12 -0
data/lib/tree_sitter/node.rb +23 -6
data/lib/tree_sitter/version.rb +4 -2
data/lib/tree_sitter.rb +1 -0
data/lib/tree_stand/ast_modifier.rb +30 -0
data/lib/tree_stand/breadth_first_visitor.rb +54 -0
data/lib/tree_stand/config.rb +13 -0
data/lib/tree_stand/node.rb +224 -0
data/lib/tree_stand/parser.rb +67 -0
data/lib/tree_stand/range.rb +55 -0
data/lib/tree_stand/tree.rb +123 -0
data/lib/tree_stand/utils/printer.rb +73 -0
data/lib/tree_stand/version.rb +7 -0
data/lib/tree_stand/visitor.rb +127 -0
data/lib/tree_stand/visitors/tree_walker.rb +37 -0
data/lib/tree_stand.rb +48 -0
data/tree_sitter.gemspec +14 -11
metadata +37 -108
data/test/README.md +0 -15
data/test/test_helper.rb +0 -9
data/test/tree_sitter/js_test.rb +0 -48
data/test/tree_sitter/language_test.rb +0 -73
data/test/tree_sitter/logger_test.rb +0 -70
data/test/tree_sitter/node_test.rb +0 -411
data/test/tree_sitter/parser_test.rb +0 -140
data/test/tree_sitter/query_test.rb +0 -153
data/test/tree_sitter/tree_cursor_test.rb +0 -83
data/test/tree_sitter/tree_test.rb +0 -51

data/ext/tree_sitter/query_cursor.c CHANGED Viewed

@@ -16,9 +16,39 @@ static VALUE query_cursor_allocate(VALUE klass) {
   return res;
 }
 DATA_UNWRAP(query_cursor)
+/**
+ * Create a new cursor for executing a given query.
+ *
+ * The cursor stores the state that is needed to iteratively search
+ * for matches. To use the query cursor, first call {QueryCursor#exec}
+ * to start running a given query on a given syntax node. Then, there are
+ * two options for consuming the results of the query:
+ * 1. Repeatedly call {QueryCursor#next_match} to iterate over all of the
+ *    *matches* in the order that they were found. Each match contains the
+ *    index of the pattern that matched, and an array of captures. Because
+ *    multiple patterns can match the same set of nodes, one match may contain
+ *    captures that appear *before* some of the captures from a previous match.
+ * 2. Repeatedly call {QueryCursor#next_capture} to iterate over all of the
+ *    individual *captures* in the order that they appear. This is useful if
+ *    don't care about which pattern matched, and just want a single ordered
+ *    sequence of captures.
+ *
+ * If you don't care about consuming all of the results, you can stop calling
+ * {QueryCursor#next_match} or {QueryCursor#next_capture} at any point.
+ * You can then start executing another query on another node by calling
+ * {QueryCursor#exec} again.
+ */
 DATA_PTR_NEW(cQueryCursor, TSQueryCursor, query_cursor)
 DATA_FROM_VALUE(TSQueryCursor *, query_cursor)
+/**
+ * Start running a given query on a given node.
+ *
+ * @param query [Query]
+ * @param node  [Node]
+ *
+ * @return [QueryCursor]
+ */
 static VALUE query_cursor_exec(VALUE self, VALUE query, VALUE node) {
   VALUE res = query_cursor_allocate(cQueryCursor);
   query_cursor_t *query_cursor = unwrap(res);
@@ -27,49 +57,78 @@ static VALUE query_cursor_exec(VALUE self, VALUE query, VALUE node) {
   return res;
 }
+/**
+ * Manage the maximum number of in-progress matches allowed by this query
+ * cursor.
+ *
+ * Query cursors have an optional maximum capacity for storing lists of
+ * in-progress captures. If this capacity is exceeded, then the
+ * earliest-starting match will silently be dropped to make room for further
+ * matches. This maximum capacity is optional — by default, query cursors allow
+ * any number of pending matches, dynamically allocating new space for them as
+ * needed as the query is executed.
+ */
 static VALUE query_cursor_did_exceed_match_limit(VALUE self) {
   return ts_query_cursor_did_exceed_match_limit(SELF) ? Qtrue : Qfalse;
 }
-static VALUE query_cursor_get_match_limit(VALUE self) {
-  return UINT2NUM(ts_query_cursor_match_limit(SELF));
-}
+/**
+ * @param limit [Integer]
+ *
+ * @return [nil]
+ */
 static VALUE query_cursor_set_match_limit(VALUE self, VALUE limit) {
   ts_query_cursor_set_match_limit(SELF, NUM2UINT(limit));
   return Qnil;
 }
-static VALUE query_cursor_set_byte_range(VALUE self, VALUE from, VALUE to) {
-  ts_query_cursor_set_byte_range(SELF, NUM2UINT(from), NUM2UINT(to));
-  return Qnil;
-}
-static VALUE query_cursor_set_point_range(VALUE self, VALUE from, VALUE to) {
-  ts_query_cursor_set_point_range(SELF, value_to_point(from),
-                                  value_to_point(to));
-  return Qnil;
+/**
+ * @return [Integer]
+ */
+static VALUE query_cursor_get_match_limit(VALUE self) {
+  return UINT2NUM(ts_query_cursor_match_limit(SELF));
 }
-static VALUE query_cursor_next_match(VALUE self) {
-  TSQueryMatch match;
-  if (ts_query_cursor_next_match(SELF, &match)) {
-    return new_query_match(&match);
-  } else {
-    return Qnil;
+/**
+ * Set the maximum start depth for a query cursor.
+ *
+ * This prevents cursors from exploring children nodes at a certain depth.
+ * Note if a pattern includes many children, then they will still be checked.
+ *
+ * The zero max start depth value can be used as a special behavior and
+ * it helps to destructure a subtree by staying on a node and using captures
+ * for interested parts. Note that the zero max start depth only limit a search
+ * depth for a pattern's root node but other nodes that are parts of the pattern
+ * may be searched at any depth what defined by the pattern structure.
+ *
+ * @param max_start_depth [Integer|nil] set to nil to remove the maximum start
+ * depth.
+ *
+ * @return [nil]
+ */
+static VALUE query_cursor_set_max_start_depth(VALUE self,
+                                              VALUE max_start_depth) {
+  uint32_t max = UINT32_MAX;
+  if (!NIL_P(max_start_depth)) {
+    max = NUM2UINT(max_start_depth);
   }
-}
-static VALUE query_cursor_remove_match(VALUE self, VALUE id) {
-  ts_query_cursor_remove_match(SELF, NUM2UINT(id));
+  ts_query_cursor_set_max_start_depth(SELF, max);
   return Qnil;
 }
-// NOTE: maybe this is the limit of how "transparent" the bindings need to be.
+// FIXME: maybe this is the limit of how "transparent" the bindings need to be.
 // Pending benchmarks, this can be very inefficient because obviously
 // ts_query_cursor_next_capture is intended to be used in a loop.  Creating an
 // array of two values and returning them, intuitively speaking, seem very
 // inefficient.
+// FIXME: maybe this needs to return an empty array to make for a nicer ruby
+// API?
+/**
+ * Advance to the next capture of the currently running query.
+ *
+ * @return [Array<Integer|Boolean>|nil] If there is a capture, return a tuple
+ * [Integer, Boolean], otherwise return +nil+.
+ */
 static VALUE query_cursor_next_capture(VALUE self) {
   TSQueryMatch match;
   uint32_t index;
@@ -83,21 +142,74 @@ static VALUE query_cursor_next_capture(VALUE self) {
   }
 }
+/**
+ * Advance to the next match of the currently running query.
+ *
+ * @return [Boolean] Whether there's a match.
+ */
+static VALUE query_cursor_next_match(VALUE self) {
+  TSQueryMatch match;
+  if (ts_query_cursor_next_match(SELF, &match)) {
+    return new_query_match(&match);
+  } else {
+    return Qnil;
+  }
+}
+static VALUE query_cursor_remove_match(VALUE self, VALUE id) {
+  ts_query_cursor_remove_match(SELF, NUM2UINT(id));
+  return Qnil;
+}
+/**
+ * @param from [Integer]
+ * @param to   [Integer]
+ *
+ * @return [nil]
+ */
+static VALUE query_cursor_set_byte_range(VALUE self, VALUE from, VALUE to) {
+  ts_query_cursor_set_byte_range(SELF, NUM2UINT(from), NUM2UINT(to));
+  return Qnil;
+}
+/**
+ * @param from [Point]
+ * @param to   [Point]
+ *
+ * @return [nil]
+ */
+static VALUE query_cursor_set_point_range(VALUE self, VALUE from, VALUE to) {
+  ts_query_cursor_set_point_range(SELF, value_to_point(from),
+                                  value_to_point(to));
+  return Qnil;
+}
 void init_query_cursor(void) {
   cQueryCursor = rb_define_class_under(mTreeSitter, "QueryCursor", rb_cObject);
   rb_define_alloc_func(cQueryCursor, query_cursor_allocate);
+  /* Module methods */
+  rb_define_module_function(cQueryCursor, "exec", query_cursor_exec, 2);
   /* Class methods */
+  // Accessors
   DECLARE_ACCESSOR(cQueryCursor, query_cursor, match_limit)
-  rb_define_module_function(cQueryCursor, "exec", query_cursor_exec, 2);
+  // Other
   rb_define_method(cQueryCursor, "exceed_match_limit?",
                    query_cursor_did_exceed_match_limit, 0);
+  rb_define_method(cQueryCursor, "match_limit", query_cursor_get_match_limit,
+                   0);
+  rb_define_method(cQueryCursor, "match_limit=", query_cursor_set_match_limit,
+                   1);
+  rb_define_method(cQueryCursor,
+                   "max_start_depth=", query_cursor_set_max_start_depth, 1);
+  rb_define_method(cQueryCursor, "next_capture", query_cursor_next_capture, 0);
+  rb_define_method(cQueryCursor, "next_match", query_cursor_next_match, 0);
+  rb_define_method(cQueryCursor, "remove_match", query_cursor_remove_match, 1);
   rb_define_method(cQueryCursor, "set_byte_range", query_cursor_set_byte_range,
                    2);
   rb_define_method(cQueryCursor, "set_point_range",
                    query_cursor_set_point_range, 2);
-  rb_define_method(cQueryCursor, "next_match", query_cursor_next_match, 0);
-  rb_define_method(cQueryCursor, "remove_match", query_cursor_remove_match, 1);
-  rb_define_method(cQueryCursor, "next_capture", query_cursor_next_capture, 0);
 }

data/ext/tree_sitter/repo.rb CHANGED Viewed

@@ -17,7 +17,7 @@ module TreeSitter
       @url = {
         git: 'https://github.com/tree-sitter/tree-sitter',
         tar: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.tar.gz",
-        zip: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.zip"
+        zip: "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v#{@version}.zip",
       }
       @exe = {}

data/ext/tree_sitter/tree.c CHANGED Viewed

@@ -63,40 +63,115 @@ VALUE new_tree(TSTree *ptr) {
 DATA_FROM_VALUE(TSTree *, tree)
-static VALUE tree_copy(VALUE self) { return new_tree(ts_tree_copy(SELF)); }
+/**
+ * Compare an old edited syntax tree to a new syntax tree representing the same
+ * document, returning an array of ranges whose syntactic structure has changed.
+ *
+ * For this to work correctly, the old syntax tree must have been edited such
+ * that its ranges match up to the new tree. Generally, you'll want to call
+ * this function right after calling one of the {Parser#parse} functions.
+ * You need to pass the old tree that was passed to parse, as well as the new
+ * tree that was returned from that function.
+ *
+ * @param old_tree [Tree]
+ * @param new_tree [Tree]
+ *
+ * @return [Array<Range>]
+ */
+static VALUE tree_changed_ranges(VALUE _self, VALUE old_tree, VALUE new_tree) {
+  TSTree *old = unwrap(old_tree)->data;
+  TSTree *new = unwrap(new_tree)->data;
+  uint32_t length;
+  TSRange *ranges = ts_tree_get_changed_ranges(old, new, &length);
+  VALUE res = rb_ary_new_capa(length);
-static VALUE tree_root_node(VALUE self) {
-  return new_node_by_val(ts_tree_root_node(SELF));
+  for (uint32_t i = 0; i < length; i++) {
+    rb_ary_push(res, new_range(&ranges[i]));
+  }
+  if (ranges) {
+    free(ranges);
+  }
+  return res;
 }
-static VALUE tree_language(VALUE self) {
-  return new_language(ts_tree_language(SELF));
+static VALUE tree_finalizer(VALUE _self) {
+  VALUE rc = rb_cv_get(cTree, "@@rc");
+  VALUE keys = rb_funcall(rc, rb_intern("keys"), 0);
+  long len = RARRAY_LEN(keys);
+  for (long i = 0; i < len; ++i) {
+    VALUE curr = RARRAY_AREF(keys, i);
+    unsigned int val = NUM2UINT(rb_hash_lookup(rc, curr));
+    if (val > 0) {
+      ts_tree_delete((TSTree *)NUM2ULONG(curr));
+    }
+    rb_hash_delete(rc, curr);
+  }
+  return Qnil;
 }
+/**
+ * Create a shallow copy of the syntax tree. This is very fast.
+ *
+ * You need to copy a syntax tree in order to use it on more than one thread at
+ * a time, as syntax trees are not thread safe.
+ *
+ * @return [Tree]
+ */
+static VALUE tree_copy(VALUE self) { return new_tree(ts_tree_copy(SELF)); }
+/**
+ * Edit the syntax tree to keep it in sync with source code that has been
+ * edited.
+ *
+ * You must describe the edit both in terms of byte offsets and in terms of
+ * (row, column) coordinates.
+ *
+ * @param edit [InputEdit]
+ *
+ * @return [nil]
+ */
 static VALUE tree_edit(VALUE self, VALUE edit) {
   TSInputEdit in = value_to_input_edit(edit);
   ts_tree_edit(SELF, &in);
   return Qnil;
 }
-static VALUE tree_changed_ranges(VALUE _self, VALUE old_tree, VALUE new_tree) {
-  TSTree *old = unwrap(old_tree)->data;
-  TSTree *new = unwrap(new_tree)->data;
+/**
+ * Get the array of included ranges that was used to parse the syntax tree.
+ *
+ * @return [Array<Range>]
+ */
+static VALUE included_ranges(VALUE self) {
   uint32_t length;
-  TSRange *ranges = ts_tree_get_changed_ranges(old, new, &length);
+  const TSRange *ranges = ts_tree_included_ranges(SELF, &length);
   VALUE res = rb_ary_new_capa(length);
   for (uint32_t i = 0; i < length; i++) {
     rb_ary_push(res, new_range(&ranges[i]));
   }
-  if (ranges) {
-    free(ranges);
-  }
   return res;
 }
+/**
+ * Get the language that was used to parse the syntax tree.
+ *
+ * @return [Language]
+ */
+static VALUE tree_language(VALUE self) {
+  return new_language(ts_tree_language(SELF));
+}
+/**
+ * Write a DOT graph describing the syntax tree to the given file.
+ *
+ * @param file [String]
+ *
+ * @return [nil]
+ */
 static VALUE tree_print_dot_graph(VALUE self, VALUE file) {
   Check_Type(file, T_STRING);
   char *path = StringValueCStr(file);
@@ -112,22 +187,26 @@ static VALUE tree_print_dot_graph(VALUE self, VALUE file) {
   return Qnil;
 }
-static VALUE tree_finalizer(VALUE _self) {
-  VALUE rc = rb_cv_get(cTree, "@@rc");
-  VALUE keys = rb_funcall(rc, rb_intern("keys"), 0);
-  long len = RARRAY_LEN(keys);
-  for (long i = 0; i < len; ++i) {
-    VALUE curr = RARRAY_AREF(keys, i);
-    unsigned int val = NUM2UINT(rb_hash_lookup(rc, curr));
-    if (val > 0) {
-      ts_tree_delete((TSTree *)NUM2ULONG(curr));
-    }
-    rb_hash_delete(rc, curr);
-  }
+/**
+ * Get the root node of the syntax tree.
+ *
+ * @return [Node]
+ */
+static VALUE tree_root_node(VALUE self) {
+  return new_node_by_val(ts_tree_root_node(SELF));
+}
-  return Qnil;
+/**
+ * Get the root node of the syntax tree, but with its position
+ * shifted forward by the given offset.
+ *
+ * @return [Node]
+ */
+static VALUE tree_root_node_with_offset(VALUE self, VALUE offset_bytes,
+                                        VALUE offset_extent) {
+  uint32_t bytes = NUM2UINT(offset_bytes);
+  TSPoint extent = value_to_point(offset_extent);
+  return new_node_by_val(ts_tree_root_node_with_offset(SELF, bytes, extent));
 }
 void init_tree(void) {
@@ -135,14 +214,19 @@ void init_tree(void) {
   rb_undef_alloc_func(cTree);
+  /* Module methods */
+  rb_define_module_function(cTree, "changed_ranges", tree_changed_ranges, 2);
+  rb_define_module_function(cTree, "finalizer", tree_finalizer, 0);
   /* Class methods */
   rb_define_method(cTree, "copy", tree_copy, 0);
-  rb_define_method(cTree, "root_node", tree_root_node, 0);
-  rb_define_method(cTree, "language", tree_language, 0);
   rb_define_method(cTree, "edit", tree_edit, 1);
-  rb_define_module_function(cTree, "changed_ranges", tree_changed_ranges, 2);
+  rb_define_method(cTree, "included_ranges", included_ranges, 0);
+  rb_define_method(cTree, "language", tree_language, 0);
   rb_define_method(cTree, "print_dot_graph", tree_print_dot_graph, 1);
-  rb_define_module_function(cTree, "finalizer", tree_finalizer, 0);
+  rb_define_method(cTree, "root_node", tree_root_node, 0);
+  rb_define_method(cTree, "root_node_with_offset", tree_root_node_with_offset,
+                   2);
   // Reference-count created trees
   VALUE rc = rb_hash_new();