npm - @ast-grep/lang-elixir - Versions diffs - 0.0.2 - Mend

@ast-grep/lang-elixir 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/LICENSE +21 -0
package/README.md +24 -0
package/index.d.ts +10 -0
package/index.js +9 -0
package/package.json +46 -0
package/postinstall.js +4 -0
package/prebuilds/prebuild-Linux-X64/parser.so +0 -0
package/prebuilds/prebuild-Windows-X64/parser.so +0 -0
package/prebuilds/prebuild-macOS-ARM64/parser.so +0 -0
package/src/grammar.json +6760 -0
package/src/node-types.json +3497 -0
package/src/parser.c +428397 -0
package/src/scanner.c +638 -0
package/src/tree_sitter/alloc.h +54 -0
package/src/tree_sitter/array.h +290 -0
package/src/tree_sitter/parser.h +266 -0
package/type.d.ts +2777 -0

package/src/scanner.c ADDED Viewed

@@ -0,0 +1,638 @@
+#include "tree_sitter/parser.h"
+// See references in grammar.externals
+enum TokenType {
+  QUOTED_CONTENT_I_SINGLE,
+  QUOTED_CONTENT_I_DOUBLE,
+  QUOTED_CONTENT_I_HEREDOC_SINGLE,
+  QUOTED_CONTENT_I_HEREDOC_DOUBLE,
+  QUOTED_CONTENT_I_PARENTHESIS,
+  QUOTED_CONTENT_I_CURLY,
+  QUOTED_CONTENT_I_SQUARE,
+  QUOTED_CONTENT_I_ANGLE,
+  QUOTED_CONTENT_I_BAR,
+  QUOTED_CONTENT_I_SLASH,
+  QUOTED_CONTENT_SINGLE,
+  QUOTED_CONTENT_DOUBLE,
+  QUOTED_CONTENT_HEREDOC_SINGLE,
+  QUOTED_CONTENT_HEREDOC_DOUBLE,
+  QUOTED_CONTENT_PARENTHESIS,
+  QUOTED_CONTENT_CURLY,
+  QUOTED_CONTENT_SQUARE,
+  QUOTED_CONTENT_ANGLE,
+  QUOTED_CONTENT_BAR,
+  QUOTED_CONTENT_SLASH,
+  NEWLINE_BEFORE_DO,
+  NEWLINE_BEFORE_BINARY_OPERATOR,
+  NEWLINE_BEFORE_COMMENT,
+  BEFORE_UNARY_OPERATOR,
+  NOT_IN,
+  QUOTED_ATOM_START
+};
+static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+// Note: some checks require several lexer steps of lookahead
+// and alter its state, for these we use names check_*
+static inline bool is_whitespace(int32_t c) {
+  return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+static inline bool is_inline_whitespace(int32_t c) {
+  return c == ' ' || c == '\t';
+}
+static inline bool is_newline(int32_t c) {
+  // Note: this implies \r\n is treated as two line breaks,
+  // but in our case it's fine, since multiple line breaks
+  // make no difference
+  return c == '\n' || c == '\r';
+}
+static inline bool is_digit(int32_t c) { return '0' <= c && c <= '9'; }
+static inline bool check_keyword_end(TSLexer *lexer) {
+  if (lexer->lookahead == ':') {
+    advance(lexer);
+    return is_whitespace(lexer->lookahead);
+  }
+  return false;
+}
+static bool check_operator_end(TSLexer *lexer) {
+  // Keyword
+  if (lexer->lookahead == ':') {
+    return !check_keyword_end(lexer);
+  }
+  while (is_inline_whitespace(lexer->lookahead)) {
+    advance(lexer);
+  }
+  // Operator identifier with arity
+  if (lexer->lookahead == '/') {
+    advance(lexer);
+    while (is_whitespace(lexer->lookahead)) {
+      advance(lexer);
+    }
+    if (is_digit(lexer->lookahead)) {
+      return false;
+    }
+  }
+  return true;
+}
+const char token_terminators[] = {
+    // Operator starts
+    '@', '.', '+', '-', '^', '-', '*', '/', '<', '>', '|', '~', '=', '&', '\\',
+    '%',
+    // Delimiters
+    '{', '}', '[', ']', '(', ')', '"', '\'',
+    // Separators
+    ',', ';',
+    // Comment
+    '#'};
+const uint8_t token_terminators_length =
+    sizeof(token_terminators) / sizeof(char);
+// Note: this is a heuristic as we only use this to distinguish word
+// operators and we don't want to include complex Unicode ranges
+static inline bool is_token_end(int32_t c) {
+  for (uint8_t i = 0; i < token_terminators_length; i++) {
+    if (c == token_terminators[i]) {
+      return true;
+    }
+  }
+  return is_whitespace(c);
+}
+typedef struct {
+  const enum TokenType token_type;
+  const bool supports_interpol;
+  const int32_t end_delimiter;
+  const uint8_t delimiter_length;
+} QuotedContentInfo;
+const QuotedContentInfo quoted_content_infos[] = {
+    {QUOTED_CONTENT_I_SINGLE,         true,  '\'', 1},
+    {QUOTED_CONTENT_I_DOUBLE,         true,  '"',  1},
+    {QUOTED_CONTENT_I_HEREDOC_SINGLE, true,  '\'', 3},
+    {QUOTED_CONTENT_I_HEREDOC_DOUBLE, true,  '"',  3},
+    {QUOTED_CONTENT_I_PARENTHESIS,    true,  ')',  1},
+    {QUOTED_CONTENT_I_CURLY,          true,  '}',  1},
+    {QUOTED_CONTENT_I_SQUARE,         true,  ']',  1},
+    {QUOTED_CONTENT_I_ANGLE,          true,  '>',  1},
+    {QUOTED_CONTENT_I_BAR,            true,  '|',  1},
+    {QUOTED_CONTENT_I_SLASH,          true,  '/',  1},
+    {QUOTED_CONTENT_SINGLE,           false, '\'', 1},
+    {QUOTED_CONTENT_DOUBLE,           false, '"',  1},
+    {QUOTED_CONTENT_HEREDOC_SINGLE,   false, '\'', 3},
+    {QUOTED_CONTENT_HEREDOC_DOUBLE,   false, '"',  3},
+    {QUOTED_CONTENT_PARENTHESIS,      false, ')',  1},
+    {QUOTED_CONTENT_CURLY,            false, '}',  1},
+    {QUOTED_CONTENT_SQUARE,           false, ']',  1},
+    {QUOTED_CONTENT_ANGLE,            false, '>',  1},
+    {QUOTED_CONTENT_BAR,              false, '|',  1},
+    {QUOTED_CONTENT_SLASH,            false, '/',  1},
+};
+const uint8_t quoted_content_infos_length =
+    sizeof(quoted_content_infos) / sizeof(QuotedContentInfo);
+static inline int8_t find_quoted_token_info(const bool *valid_symbols) {
+  // Quoted tokens are mutually exclusive and only one should be valid
+  // at a time. If multiple are valid it means we parse an arbitrary
+  // code outside quotes, in which case we don't want to tokenize it as
+  // quoted content.
+  if (valid_symbols[QUOTED_CONTENT_I_SINGLE] &&
+      valid_symbols[QUOTED_CONTENT_I_DOUBLE]) {
+    return -1;
+  }
+  for (uint8_t i = 0; i < quoted_content_infos_length; i++) {
+    if (valid_symbols[quoted_content_infos[i].token_type]) {
+      return i;
+    }
+  }
+  return -1;
+}
+static bool scan_quoted_content(TSLexer *lexer, const QuotedContentInfo *info) {
+  lexer->result_symbol = info->token_type;
+  bool is_heredoc = (info->delimiter_length == 3);
+  for (bool has_content = false; true; has_content = true) {
+    bool newline = false;
+    if (is_newline(lexer->lookahead)) {
+      advance(lexer);
+      has_content = true;
+      newline = true;
+      while (is_whitespace(lexer->lookahead)) {
+        advance(lexer);
+      }
+    }
+    lexer->mark_end(lexer);
+    if (lexer->lookahead == info->end_delimiter) {
+      uint8_t length = 1;
+      while (length < info->delimiter_length) {
+        advance(lexer);
+        if (lexer->lookahead == info->end_delimiter) {
+          length++;
+        } else {
+          break;
+        }
+      }
+      if (length == info->delimiter_length && (!is_heredoc || newline)) {
+        return has_content;
+      }
+    } else {
+      if (lexer->lookahead == '#') {
+        advance(lexer);
+        if (info->supports_interpol && lexer->lookahead == '{') {
+          return has_content;
+        }
+      } else if (lexer->lookahead == '\\') {
+        advance(lexer);
+        if (is_heredoc && lexer->lookahead == '\n') {
+          // We need to know about the newline to correctly recognise
+          // heredoc end delimiter, so we intentionally ignore
+          // escaping
+        } else if (info->supports_interpol ||
+                   lexer->lookahead == info->end_delimiter) {
+          return has_content;
+        }
+      } else if (lexer->lookahead == '\0') {
+        // If we reached the end of the file, this means there is no
+        // end delimiter, so the syntax is invalid. In that case we
+        // want to treat all the scanned content as quoted content.
+        return has_content;
+      } else {
+        advance(lexer);
+      }
+    }
+  }
+  return false;
+}
+static bool scan_newline(TSLexer *lexer, const bool *valid_symbols) {
+  advance(lexer);
+  while (is_whitespace(lexer->lookahead)) {
+    advance(lexer);
+  }
+  // Note we include all the whitespace after newline, so that the
+  // parser doesn't have to go through it again
+  lexer->mark_end(lexer);
+  if (lexer->lookahead == '#') {
+    lexer->result_symbol = NEWLINE_BEFORE_COMMENT;
+    return true;
+  }
+  if (lexer->lookahead == 'd' && valid_symbols[NEWLINE_BEFORE_DO]) {
+    lexer->result_symbol = NEWLINE_BEFORE_DO;
+    advance(lexer);
+    if (lexer->lookahead == 'o') {
+      advance(lexer);
+      return is_token_end(lexer->lookahead);
+    }
+    return false;
+  }
+  if (valid_symbols[NEWLINE_BEFORE_BINARY_OPERATOR]) {
+    lexer->result_symbol = NEWLINE_BEFORE_BINARY_OPERATOR;
+    // &&, &&&
+    if (lexer->lookahead == '&') {
+      advance(lexer);
+      if (lexer->lookahead == '&') {
+        advance(lexer);
+        if (lexer->lookahead == '&') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        } else {
+          return check_operator_end(lexer);
+        }
+      }
+      // =, ==, ===, =~, =>
+    } else if (lexer->lookahead == '=') {
+      advance(lexer);
+      if (lexer->lookahead == '=') {
+        advance(lexer);
+        if (lexer->lookahead == '=') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        } else {
+          return check_operator_end(lexer);
+        }
+      } else if (lexer->lookahead == '~') {
+        advance(lexer);
+        return check_operator_end(lexer);
+      } else if (lexer->lookahead == '>') {
+        advance(lexer);
+        return check_operator_end(lexer);
+      } else {
+        return check_operator_end(lexer);
+      }
+      // ::
+    } else if (lexer->lookahead == ':') {
+      advance(lexer);
+      if (lexer->lookahead == ':') {
+        advance(lexer);
+        // Ignore ::: atom
+        if (lexer->lookahead == ':')
+          return false;
+        return check_operator_end(lexer);
+      }
+      // ++, +++
+    } else if (lexer->lookahead == '+') {
+      advance(lexer);
+      if (lexer->lookahead == '+') {
+        advance(lexer);
+        if (lexer->lookahead == '+') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        } else {
+          return check_operator_end(lexer);
+        }
+      }
+      // --, ---, ->
+    } else if (lexer->lookahead == '-') {
+      advance(lexer);
+      if (lexer->lookahead == '-') {
+        advance(lexer);
+        if (lexer->lookahead == '-') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        } else {
+          return check_operator_end(lexer);
+        }
+      } else if (lexer->lookahead == '>') {
+        advance(lexer);
+        return check_operator_end(lexer);
+      }
+      // <, <=, <-, <>, <~, <~>, <|>, <<<, <<~
+    } else if (lexer->lookahead == '<') {
+      advance(lexer);
+      if (lexer->lookahead == '=' || lexer->lookahead == '-' ||
+          lexer->lookahead == '>') {
+        advance(lexer);
+        return check_operator_end(lexer);
+      } else if (lexer->lookahead == '~') {
+        advance(lexer);
+        if (lexer->lookahead == '>') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        } else {
+          return check_operator_end(lexer);
+        }
+      } else if (lexer->lookahead == '|') {
+        advance(lexer);
+        if (lexer->lookahead == '>') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        }
+      } else if (lexer->lookahead == '<') {
+        advance(lexer);
+        if (lexer->lookahead == '<' || lexer->lookahead == '~') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        }
+      } else {
+        return check_operator_end(lexer);
+      }
+      // >, >=, >>>
+    } else if (lexer->lookahead == '>') {
+      advance(lexer);
+      if (lexer->lookahead == '=') {
+        advance(lexer);
+        return check_operator_end(lexer);
+      } else if (lexer->lookahead == '>') {
+        advance(lexer);
+        if (lexer->lookahead == '>') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        }
+      } else {
+        return check_operator_end(lexer);
+      }
+      // ^^^
+    } else if (lexer->lookahead == '^') {
+      advance(lexer);
+      if (lexer->lookahead == '^') {
+        advance(lexer);
+        if (lexer->lookahead == '^') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        }
+      }
+      // !=, !==
+    } else if (lexer->lookahead == '!') {
+      advance(lexer);
+      if (lexer->lookahead == '=') {
+        advance(lexer);
+        if (lexer->lookahead == '=') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        } else {
+          return check_operator_end(lexer);
+        }
+      }
+      // ~>, ~>>
+    } else if (lexer->lookahead == '~') {
+      advance(lexer);
+      if (lexer->lookahead == '>') {
+        advance(lexer);
+        if (lexer->lookahead == '>') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        } else {
+          return check_operator_end(lexer);
+        }
+      }
+      // |, ||, |||, |>
+    } else if (lexer->lookahead == '|') {
+      advance(lexer);
+      if (lexer->lookahead == '|') {
+        advance(lexer);
+        if (lexer->lookahead == '|') {
+          advance(lexer);
+          return check_operator_end(lexer);
+        } else {
+          return check_operator_end(lexer);
+        }
+      } else if (lexer->lookahead == '>') {
+        advance(lexer);
+        return check_operator_end(lexer);
+      } else {
+        return check_operator_end(lexer);
+      }
+      // *, **
+    } else if (lexer->lookahead == '*') {
+      advance(lexer);
+      if (lexer->lookahead == '*') {
+        advance(lexer);
+        return check_operator_end(lexer);
+      } else {
+        return check_operator_end(lexer);
+      }
+      // / //
+    } else if (lexer->lookahead == '/') {
+      advance(lexer);
+      if (lexer->lookahead == '/') {
+        advance(lexer);
+        return check_operator_end(lexer);
+      } else {
+        return check_operator_end(lexer);
+      }
+      // ., ..
+    } else if (lexer->lookahead == '.') {
+      advance(lexer);
+      if (lexer->lookahead == '.') {
+        advance(lexer);
+        // Ignore ... identifier
+        if (lexer->lookahead == '.')
+          return false;
+        return check_operator_end(lexer);
+      } else {
+        return check_operator_end(lexer);
+      }
+      // double slash
+    } else if (lexer->lookahead == '\\') {
+      advance(lexer);
+      if (lexer->lookahead == '\\') {
+        advance(lexer);
+        return check_operator_end(lexer);
+      }
+      // when
+    } else if (lexer->lookahead == 'w') {
+      advance(lexer);
+      if (lexer->lookahead == 'h') {
+        advance(lexer);
+        if (lexer->lookahead == 'e') {
+          advance(lexer);
+          if (lexer->lookahead == 'n') {
+            advance(lexer);
+            return is_token_end(lexer->lookahead) && check_operator_end(lexer);
+          }
+        }
+      }
+      // and
+    } else if (lexer->lookahead == 'a') {
+      advance(lexer);
+      if (lexer->lookahead == 'n') {
+        advance(lexer);
+        if (lexer->lookahead == 'd') {
+          advance(lexer);
+          return is_token_end(lexer->lookahead) && check_operator_end(lexer);
+        }
+      }
+      // or
+    } else if (lexer->lookahead == 'o') {
+      advance(lexer);
+      if (lexer->lookahead == 'r') {
+        advance(lexer);
+        return is_token_end(lexer->lookahead) && check_operator_end(lexer);
+      }
+      // in
+    } else if (lexer->lookahead == 'i') {
+      advance(lexer);
+      if (lexer->lookahead == 'n') {
+        advance(lexer);
+        return is_token_end(lexer->lookahead) && check_operator_end(lexer);
+      }
+      // not in
+    } else if (lexer->lookahead == 'n') {
+      advance(lexer);
+      if (lexer->lookahead == 'o') {
+        advance(lexer);
+        if (lexer->lookahead == 't') {
+          advance(lexer);
+          while (is_inline_whitespace(lexer->lookahead)) {
+            advance(lexer);
+          }
+          if (lexer->lookahead == 'i') {
+            advance(lexer);
+            if (lexer->lookahead == 'n') {
+              advance(lexer);
+              return is_token_end(lexer->lookahead) &&
+                     check_operator_end(lexer);
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+static bool scan(TSLexer *lexer, const bool *valid_symbols) {
+  int8_t quoted_content_info_idx = find_quoted_token_info(valid_symbols);
+  // Quoted content, which matches any character except for close
+  // delimiters, escapes and interpolations
+  if (quoted_content_info_idx != -1) {
+    const QuotedContentInfo info =
+        quoted_content_infos[quoted_content_info_idx];
+    return scan_quoted_content(lexer, &info);
+  }
+  bool skipped_whitespace = false;
+  while (is_inline_whitespace(lexer->lookahead)) {
+    skipped_whitespace = true;
+    skip(lexer);
+  }
+  // Newline, which is either tokenized as a special newline or ignored
+  if (is_newline(lexer->lookahead) &&
+      (valid_symbols[NEWLINE_BEFORE_DO] ||
+       valid_symbols[NEWLINE_BEFORE_BINARY_OPERATOR] ||
+       valid_symbols[NEWLINE_BEFORE_COMMENT])) {
+    return scan_newline(lexer, valid_symbols);
+  }
+  // before unary +
+  if (lexer->lookahead == '+') {
+    if (skipped_whitespace && valid_symbols[BEFORE_UNARY_OPERATOR]) {
+      lexer->mark_end(lexer);
+      advance(lexer);
+      if (lexer->lookahead == '+' || lexer->lookahead == ':' ||
+          lexer->lookahead == '/') {
+        return false;
+      }
+      if (is_whitespace(lexer->lookahead)) {
+        return false;
+      }
+      lexer->result_symbol = BEFORE_UNARY_OPERATOR;
+      return true;
+    }
+    // before unary -
+  } else if (lexer->lookahead == '-') {
+    if (skipped_whitespace && valid_symbols[BEFORE_UNARY_OPERATOR]) {
+      lexer->mark_end(lexer);
+      lexer->result_symbol = BEFORE_UNARY_OPERATOR;
+      advance(lexer);
+      if (lexer->lookahead == '-' || lexer->lookahead == '>' ||
+          lexer->lookahead == ':' || lexer->lookahead == '/') {
+        return false;
+      }
+      if (is_whitespace(lexer->lookahead)) {
+        return false;
+      }
+      return true;
+    }
+    // not in
+  } else if (lexer->lookahead == 'n') {
+    if (valid_symbols[NOT_IN]) {
+      lexer->result_symbol = NOT_IN;
+      advance(lexer);
+      if (lexer->lookahead == 'o') {
+        advance(lexer);
+        if (lexer->lookahead == 't') {
+          advance(lexer);
+          while (is_inline_whitespace(lexer->lookahead)) {
+            advance(lexer);
+          }
+          if (lexer->lookahead == 'i') {
+            advance(lexer);
+            if (lexer->lookahead == 'n') {
+              advance(lexer);
+              return is_token_end(lexer->lookahead);
+            }
+          }
+        }
+      }
+    }
+    // quoted atom start
+  } else if (lexer->lookahead == ':') {
+    if (valid_symbols[QUOTED_ATOM_START]) {
+      advance(lexer);
+      lexer->mark_end(lexer);
+      lexer->result_symbol = QUOTED_ATOM_START;
+      if (lexer->lookahead == '"' || lexer->lookahead == '\'') {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+void *tree_sitter_elixir_external_scanner_create() { return NULL; }
+bool tree_sitter_elixir_external_scanner_scan(void *payload, TSLexer *lexer,
+                                              const bool *valid_symbols) {
+  return scan(lexer, valid_symbols);
+}
+unsigned tree_sitter_elixir_external_scanner_serialize(void *payload,
+                                                       char *buffer) {
+  return 0;
+}
+void tree_sitter_elixir_external_scanner_deserialize(void *payload,
+                                                     const char *buffer,
+                                                     unsigned length) {}
+void tree_sitter_elixir_external_scanner_destroy(void *payload) {}

package/src/tree_sitter/alloc.h ADDED Viewed

@@ -0,0 +1,54 @@
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+// Allow clients to override allocation functions
+#ifdef TREE_SITTER_REUSE_ALLOCATOR
+extern void *(*ts_current_malloc)(size_t size);
+extern void *(*ts_current_calloc)(size_t count, size_t size);
+extern void *(*ts_current_realloc)(void *ptr, size_t size);
+extern void (*ts_current_free)(void *ptr);
+#ifndef ts_malloc
+#define ts_malloc  ts_current_malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  ts_current_calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc ts_current_realloc
+#endif
+#ifndef ts_free
+#define ts_free    ts_current_free
+#endif
+#else
+#ifndef ts_malloc
+#define ts_malloc  malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc realloc
+#endif
+#ifndef ts_free
+#define ts_free    free
+#endif
+#endif
+#ifdef __cplusplus
+}
+#endif
+#endif // TREE_SITTER_ALLOC_H_