natalie_parser 2.1.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0598d42a8e0b83aa5a340d1615474a4c5a8e7b209032bf574a230778e4d3ce5e'
4
- data.tar.gz: 475cce9e2f078ab440674b48e3b99ba48f163f34b889b376cf681f74a489ce53
3
+ metadata.gz: 2de813d6f0cb6ab94d5a6bb2dd64dc8be719567d401f19ea2b6ac9e71ebaed65
4
+ data.tar.gz: 668f9574d0968781f077e407f6476f2b320195964f8c10342ab2761beb3f7587
5
5
  SHA512:
6
- metadata.gz: 59cd20f85484845036f934ca5b40c020ad35655824950c036a697bf0bc437d3ebd35eb25bacc7c6f062d096c01b58dea5a1ebd688d0d101192ee7f43b9881b25
7
- data.tar.gz: 53bcc9b34ab02943dda1602141ae949e4fa1dcf1c229dfeda8ef710b01d1d9d9c97137ce7e5b08e88b121025009c9a9fd71edbc77eead37f69f37b794fab02f8
6
+ metadata.gz: 02da9b2afc4bc76c5b76b03abb48a40bb9242e2884146bedd61b7b944f358401655aeb077bf7900eaf6c3936c3d06000115d20200b378d17bf98cc18ffd28174
7
+ data.tar.gz: 5e42458d1ab43bce763d3b929b095f9df5ab620e0c9ba25caac5abb8bd44673ded90bcbcd943b94ffe6cadf65d28f5bfebb54bb50942909c4055feb5c613cb2f
data/CHANGELOG.md CHANGED
@@ -1,5 +1,47 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.3.0 (2022-12-31)
4
+
5
+ - FIX: Add for loop variables to local scope
6
+ - FIX: Add lexer support for leading-zero (bare) octal - e.g. 0777
7
+ - FIX: Allow non-local variable in rescue
8
+ - FIX: Allow trailing comma after for loop variables
9
+ - FIX: Fix for-loop with attr assignment
10
+ - FIX: Fix lexing of symbol keys with trailing punctuation
11
+ - FIX: Fix precedence of 'do' keyword in use with 'for'
12
+ - FIX: Parse for loop with splat assignment
13
+ - FIX: Use proper make on OpenBSD
14
+
15
+ ## 2.2.0 (2022-10-24)
16
+
17
+ - FIX: Allow backreference as first arg of implicit call
18
+ - FIX: Exclude `?` and `!` from class/instance/global variable name
19
+ - FIX: Fix parsing of hashes containing assignments
20
+ - FIX: Parse aliases of unary operators and methods named as keywords
21
+ - FIX: Parse alias of keyword-like method to assignment method
22
+ - FIX: Parse alias/undef of assignment methods
23
+ - FIX: Parse backtick as method name
24
+ - FIX: Parse class/module definition in dynamic class/module
25
+ - FIX: Parse `def !`
26
+ - FIX: Parse `def foo::bar`
27
+ - FIX: Parse `foo(<<FOO)`
28
+ - FIX: Parse `for ... do` and `while ... do`
29
+ - FIX: Parse global variables beginning with `$_`
30
+ - FIX: Parse keywords/operators as method names for `::`
31
+ - FIX: Parse __LINE__ constant
32
+ - FIX: Parse more percent strings
33
+ - FIX: Parse more regexes
34
+ - FIX: Parse more stabby procs without parentheses
35
+ - FIX: Parse multiple parameters set to the same default value
36
+ - FIX: Parse parentheses-less stabby proc with keyword arg
37
+ - FIX: Parse undef of methods with special names
38
+ - FIX: Parse `unless ... then`
39
+ - FIX: Parse variable names starting with extended ASCII characters
40
+ - FIX: Raise SyntaxError for `def ====`
41
+ - FIX: Raise syntax error for `def +@.foo`
42
+ - FIX: Tokenize unterminated ternary at EOF
43
+ - FIX: Use lower precedence for block pass
44
+
3
45
  ## 2.1.0 (2022-08-12)
4
46
 
5
47
  - FEAT: Parse for loops
data/Dockerfile CHANGED
@@ -8,7 +8,7 @@ ENV LC_ALL C.UTF-8
8
8
 
9
9
  WORKDIR natalie_parser
10
10
 
11
- COPY Gemfile /natalie_parser/
11
+ COPY Gemfile /natalie_parser/
12
12
  RUN bundle install
13
13
 
14
14
  ARG CC=gcc
data/Gemfile CHANGED
@@ -6,5 +6,5 @@ source 'https://rubygems.org'
6
6
  gem 'minitest'
7
7
  gem 'minitest-focus'
8
8
  gem 'minitest-reporters'
9
- gem 'ruby_parser'
9
+ gem 'ruby_parser', '3.19.1'
10
10
  gem 'rake'
data/README.md CHANGED
@@ -1,5 +1,6 @@
1
1
  # Natalie Parser
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/natalie_parser.svg)](https://badge.fury.io/rb/natalie_parser)
3
4
  [![github build status](https://github.com/natalie-lang/natalie_parser/actions/workflows/build.yml/badge.svg)](https://github.com/natalie-lang/natalie_parser/actions?query=workflow%3ABuild+branch%3Amaster)
4
5
  [![MIT License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/natalie-lang/natalie_parser/blob/master/LICENSE)
5
6
 
data/Rakefile CHANGED
@@ -185,7 +185,8 @@ file "ext/natalie_parser/natalie_parser.#{so_ext}" => [
185
185
  else
186
186
  cxx_hacky = cxx
187
187
  end
188
- sh "CC=#{cc.inspect} CXX=#{cxx_hacky.inspect} make -C #{build_dir} -j -e V=1 2>&1 | tee #{log_file}"
188
+ make = system('which gmake 2>&1 > /dev/null') ? 'gmake' : 'make'
189
+ sh "CC=#{cc.inspect} CXX=#{cxx_hacky.inspect} #{make} -C #{build_dir} -j -e V=1 2>&1 | tee #{log_file}"
189
190
  end
190
191
 
191
192
  file 'build/fragments.hpp' => ['test/parser_test.rb', 'test/support/extract_parser_test_fragments.rb'] do
@@ -25,7 +25,7 @@ public:
25
25
 
26
26
  virtual void reset_sexp() override {
27
27
  m_sexp = rb_class_new_instance(0, nullptr, Sexp);
28
- rb_ivar_set(m_sexp, rb_intern("@file"), get_file_string(file()));
28
+ rb_ivar_set(m_sexp, rb_intern("@file"), get_file_string(*file()));
29
29
  rb_ivar_set(m_sexp, rb_intern("@line"), rb_int_new(line() + 1));
30
30
  rb_ivar_set(m_sexp, rb_intern("@column"), rb_int_new(column() + 1));
31
31
  }
@@ -139,13 +139,13 @@ public:
139
139
  private:
140
140
  VALUE m_sexp { Qnil };
141
141
 
142
- static VALUE get_file_string(SharedPtr<const String> file) {
143
- auto file_string = s_file_cache.get(*file);
142
+ static VALUE get_file_string(const String &file) {
143
+ auto file_string = s_file_cache.get(file);
144
144
  if (!file_string) {
145
- file_string = rb_str_new(file->c_str(), file->length());
145
+ file_string = rb_str_new(file.c_str(), file.length());
146
146
  // FIXME: Seems there is no way to un-register and object. :-(
147
147
  rb_gc_register_mark_object(file_string);
148
- s_file_cache.put(*file, file_string);
148
+ s_file_cache.put(file, file_string);
149
149
  }
150
150
  return file_string;
151
151
  }
@@ -27,9 +27,9 @@ VALUE initialize(int argc, VALUE *argv, VALUE self) {
27
27
  return self;
28
28
  }
29
29
 
30
- VALUE node_to_ruby(TM::SharedPtr<NatalieParser::Node> node) {
31
- NatalieParser::MRICreator creator { node.ref() };
32
- node->transform(&creator);
30
+ VALUE node_to_ruby(const NatalieParser::Node &node) {
31
+ NatalieParser::MRICreator creator { node };
32
+ node.transform(&creator);
33
33
  return creator.sexp();
34
34
  }
35
35
 
@@ -41,7 +41,7 @@ VALUE parse_on_instance(VALUE self) {
41
41
  auto parser = NatalieParser::Parser { code_string, path_string };
42
42
  try {
43
43
  auto tree = parser.tree();
44
- VALUE ast = node_to_ruby(tree);
44
+ VALUE ast = node_to_ruby(*tree);
45
45
  return ast;
46
46
  } catch (NatalieParser::Parser::SyntaxError &error) {
47
47
  rb_raise(rb_eSyntaxError, "%s", error.message());
@@ -80,6 +80,7 @@ VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
80
80
  case NatalieParser::Token::Type::Constant:
81
81
  case NatalieParser::Token::Type::GlobalVariable:
82
82
  case NatalieParser::Token::Type::InstanceVariable:
83
+ case NatalieParser::Token::Type::OperatorName:
83
84
  case NatalieParser::Token::Type::Symbol:
84
85
  case NatalieParser::Token::Type::SymbolKey: {
85
86
  auto literal = token.literal_string();
@@ -70,9 +70,9 @@ protected:
70
70
  virtual bool skip_whitespace();
71
71
  virtual Token build_next_token();
72
72
  Token consume_symbol();
73
+ SharedPtr<String> consume_word();
73
74
  Token consume_word(Token::Type type);
74
- Token consume_bare_name();
75
- Token consume_constant();
75
+ Token consume_bare_name_or_constant(Token::Type type);
76
76
  Token consume_global_variable();
77
77
  Token consume_heredoc();
78
78
  Token consume_numeric();
@@ -80,11 +80,19 @@ protected:
80
80
  Token consume_nth_ref();
81
81
  long long consume_hex_number(int max_length = 0, bool allow_underscore = false);
82
82
  long long consume_octal_number(int max_length = 0, bool allow_underscore = false);
83
- Token consume_double_quoted_string(char, char, Token::Type begin_type = Token::Type::InterpolatedStringBegin, Token::Type end_type = Token::Type::InterpolatedStringEnd);
83
+ Token consume_double_quoted_string(char, char, Token::Type begin_type, Token::Type end_type);
84
84
  Token consume_single_quoted_string(char, char);
85
85
  Token consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type);
86
86
  Token consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type);
87
87
  Token consume_regexp(char start_char, char stop_char);
88
+ Token consume_percent_symbol(char start_char, char stop_char);
89
+ Token consume_interpolated_string(char start_char, char stop_char);
90
+ Token consume_interpolated_shell(char start_char, char stop_char);
91
+ Token consume_percent_lower_w(char start_char, char stop_char);
92
+ Token consume_percent_upper_w(char start_char, char stop_char);
93
+ Token consume_percent_lower_i(char start_char, char stop_char);
94
+ Token consume_percent_upper_i(char start_char, char stop_char);
95
+ Token consume_percent_string(Token (Lexer::*consumer)(char start_char, char stop_char), bool is_lettered = true);
88
96
  SharedPtr<String> consume_non_whitespace();
89
97
 
90
98
  void utf32_codepoint_to_utf8(String &buf, long long codepoint);
@@ -95,7 +103,7 @@ protected:
95
103
  bool token_is_first_on_line() const;
96
104
 
97
105
  bool char_can_be_string_or_regexp_delimiter(char c) const {
98
- return (c >= '!' && c <= '/') || c == ':' || c == '?' || c == '@' || c == '~' || c == '|' || (c >= '^' && c <= '`');
106
+ return (c >= '!' && c <= '/') || c == ':' || c == ';' || c == '=' || c == '?' || c == '@' || c == '\\' || c == '~' || c == '|' || (c >= '^' && c <= '`');
99
107
  }
100
108
 
101
109
  SharedPtr<String> m_input;
@@ -131,5 +139,10 @@ protected:
131
139
  // then increment m_pair_depth
132
140
  char m_start_char { 0 };
133
141
  int m_pair_depth { 0 };
142
+
143
+ size_t m_remaining_method_names { 0 };
144
+ bool m_allow_assignment_method { false };
145
+ Token::Type m_method_name_separator { Token::Type::Invalid };
146
+ Token m_last_method_name {};
134
147
  };
135
148
  }
@@ -1,7 +1,6 @@
1
1
  #pragma once
2
2
 
3
3
  #include "natalie_parser/node/block_node.hpp"
4
- #include "natalie_parser/node/identifier_node.hpp"
5
4
  #include "natalie_parser/node/node.hpp"
6
5
  #include "natalie_parser/node/node_with_args.hpp"
7
6
  #include "tm/hashmap.hpp"
@@ -23,24 +22,24 @@ public:
23
22
  m_exceptions.push(node);
24
23
  }
25
24
 
26
- void set_exception_name(SharedPtr<IdentifierNode> name) {
25
+ void set_exception_name(SharedPtr<Node> name) {
27
26
  m_name = name;
28
27
  }
29
28
 
30
29
  void set_body(SharedPtr<BlockNode> body) { m_body = body; }
31
30
 
32
- SharedPtr<Node> name_to_node() const;
31
+ SharedPtr<Node> name_to_assignment() const;
33
32
 
34
33
  bool has_name() const { return m_name; }
35
34
 
36
- const SharedPtr<IdentifierNode> name() const { return m_name; }
35
+ const SharedPtr<Node> name() const { return m_name; }
37
36
  const Vector<SharedPtr<Node>> &exceptions() const { return m_exceptions; }
38
37
  const SharedPtr<BlockNode> body() const { return m_body; }
39
38
 
40
39
  virtual void transform(Creator *creator) const override;
41
40
 
42
41
  protected:
43
- SharedPtr<IdentifierNode> m_name {};
42
+ SharedPtr<Node> m_name {};
44
43
  Vector<SharedPtr<Node>> m_exceptions {};
45
44
  SharedPtr<BlockNode> m_body {};
46
45
  };
@@ -25,22 +25,7 @@ public:
25
25
  const SharedPtr<Node> vars() const { return m_vars; }
26
26
  const SharedPtr<BlockNode> body() const { return m_body; }
27
27
 
28
- virtual void transform(Creator *creator) const override {
29
- creator->set_type("for");
30
- creator->append(m_expr);
31
- switch (m_vars->type()) {
32
- case Node::Type::Identifier:
33
- creator->with_assignment(true, [&]() { creator->append(*m_vars); });
34
- break;
35
- case Node::Type::MultipleAssignment:
36
- creator->append(m_vars);
37
- break;
38
- default:
39
- TM_UNREACHABLE();
40
- }
41
- if (!m_body->is_empty())
42
- creator->append(m_body->without_unnecessary_nesting());
43
- }
28
+ virtual void transform(Creator *creator) const override;
44
29
 
45
30
  protected:
46
31
  SharedPtr<Node> m_expr {};
@@ -49,16 +49,22 @@ public:
49
49
 
50
50
  enum class Precedence;
51
51
 
52
+ enum class IterAllow {
53
+ NONE,
54
+ CURLY_ONLY,
55
+ CURLY_AND_BLOCK,
56
+ };
57
+
52
58
  SharedPtr<Node> tree();
53
59
 
54
60
  private:
55
- bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence);
61
+ bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence, IterAllow iter_allow);
56
62
 
57
63
  Precedence get_precedence(Token &token, SharedPtr<Node> left = {});
58
64
 
59
65
  bool is_first_arg_of_call_without_parens(SharedPtr<Node>, Token &);
60
66
 
61
- SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &);
67
+ SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &, IterAllow = IterAllow::CURLY_AND_BLOCK);
62
68
 
63
69
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, std::function<bool(Token::Type)>, bool = false);
64
70
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, Token::Type = Token::Type::EndKeyword, bool = false);
@@ -67,8 +73,9 @@ private:
67
73
  SharedPtr<Node> parse_if_body(LocalsHashmap &);
68
74
  SharedPtr<BlockNode> parse_def_body(LocalsHashmap &);
69
75
 
76
+ void reinsert_collapsed_newline();
70
77
  SharedPtr<Node> parse_alias(LocalsHashmap &);
71
- SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *, bool);
78
+ SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *);
72
79
  SharedPtr<Node> parse_array(LocalsHashmap &);
73
80
  SharedPtr<Node> parse_back_ref(LocalsHashmap &);
74
81
  SharedPtr<Node> parse_begin_block(LocalsHashmap &);
@@ -96,7 +103,8 @@ private:
96
103
  Method,
97
104
  Proc,
98
105
  };
99
- void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext);
106
+ void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext, IterAllow = IterAllow::CURLY_AND_BLOCK);
107
+ SharedPtr<Node> parse_arg_default_value(LocalsHashmap &, IterAllow);
100
108
 
101
109
  SharedPtr<Node> parse_encoding(LocalsHashmap &);
102
110
  SharedPtr<Node> parse_end_block(LocalsHashmap &);
@@ -115,6 +123,7 @@ private:
115
123
  SharedPtr<Node> parse_interpolated_shell(LocalsHashmap &);
116
124
  SharedPtr<Node> parse_interpolated_string(LocalsHashmap &);
117
125
  SharedPtr<Node> parse_interpolated_symbol(LocalsHashmap &);
126
+ SharedPtr<Node> parse_line_constant(LocalsHashmap &);
118
127
  SharedPtr<Node> parse_lit(LocalsHashmap &);
119
128
  SharedPtr<Node> parse_keyword_splat(LocalsHashmap &);
120
129
  SharedPtr<Node> parse_keyword_splat_wrapped_in_hash(LocalsHashmap &);
@@ -124,7 +133,7 @@ private:
124
133
  SharedPtr<Node> parse_nil(LocalsHashmap &);
125
134
  SharedPtr<Node> parse_not(LocalsHashmap &);
126
135
  SharedPtr<Node> parse_nth_ref(LocalsHashmap &);
127
- void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &);
136
+ void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &, IterAllow);
128
137
  SharedPtr<Node> parse_redo(LocalsHashmap &);
129
138
  SharedPtr<Node> parse_retry(LocalsHashmap &);
130
139
  SharedPtr<Node> parse_return(LocalsHashmap &);
@@ -152,6 +161,7 @@ private:
152
161
  SharedPtr<Node> parse_assignment_expression(SharedPtr<Node>, LocalsHashmap &);
153
162
  SharedPtr<Node> parse_assignment_expression_without_multiple_values(SharedPtr<Node>, LocalsHashmap &);
154
163
  SharedPtr<Node> parse_assignment_expression(SharedPtr<Node>, LocalsHashmap &, bool);
164
+ void add_assignment_locals(SharedPtr<Node>, LocalsHashmap &);
155
165
  SharedPtr<Node> parse_assignment_expression_value(bool, LocalsHashmap &, bool);
156
166
  SharedPtr<Node> parse_assignment_identifier(bool, LocalsHashmap &);
157
167
  SharedPtr<Node> parse_call_expression_without_parens(SharedPtr<Node>, LocalsHashmap &);
@@ -103,6 +103,7 @@ public:
103
103
  NotKeyword,
104
104
  NotMatch,
105
105
  NthRef,
106
+ OperatorName,
106
107
  OrKeyword,
107
108
  Percent,
108
109
  PercentEqual,
@@ -431,6 +432,8 @@ public:
431
432
  return "!";
432
433
  case Type::NthRef:
433
434
  return "nth_ref";
435
+ case Type::OperatorName:
436
+ return "operator";
434
437
  case Type::OrKeyword:
435
438
  return "or";
436
439
  case Type::Percent:
@@ -614,6 +617,7 @@ public:
614
617
  case Token::Type::LessThanOrEqual:
615
618
  case Token::Type::Match:
616
619
  case Token::Type::Minus:
620
+ case Token::Type::Not:
617
621
  case Token::Type::NotEqual:
618
622
  case Token::Type::NotMatch:
619
623
  case Token::Type::Percent:
@@ -635,6 +639,7 @@ public:
635
639
  bool is_closing_token() const { return m_type == Type::RBracket || m_type == Type::RCurlyBrace || m_type == Type::RParen; }
636
640
  bool is_comma() const { return m_type == Type::Comma; }
637
641
  bool is_comment() const { return m_type == Type::Comment; }
642
+ bool is_constant_resolution() const { return m_type == Type::ConstantResolution; }
638
643
  bool is_def_keyword() const { return m_type == Type::DefKeyword; }
639
644
  bool is_doc() const { return m_type == Type::Doc; }
640
645
  bool is_dot() const { return m_type == Type::Dot; }
@@ -743,9 +748,23 @@ public:
743
748
  }
744
749
  }
745
750
 
751
+ bool can_be_first_arg_of_def() const {
752
+ switch (m_type) {
753
+ case Token::Type::Ampersand:
754
+ case Token::Type::BareName:
755
+ case Token::Type::Star:
756
+ case Token::Type::StarStar:
757
+ case Token::Type::SymbolKey:
758
+ return true;
759
+ default:
760
+ return false;
761
+ }
762
+ }
763
+
746
764
  bool can_be_first_arg_of_implicit_call() const {
747
765
  switch (m_type) {
748
766
  case Token::Type::Arrow:
767
+ case Token::Type::BackRef:
749
768
  case Token::Type::BareName:
750
769
  case Token::Type::BeginKeyword:
751
770
  case Token::Type::Bignum:
@@ -800,6 +819,7 @@ public:
800
819
  switch (m_type) {
801
820
  case Token::Type::Equal:
802
821
  case Token::Type::LBracket:
822
+ case Token::Type::LParen:
803
823
  return true;
804
824
  default:
805
825
  if (is_operator())
@@ -826,6 +846,7 @@ public:
826
846
 
827
847
  bool can_precede_symbol_key() const {
828
848
  switch (m_type) {
849
+ case Type::Arrow:
829
850
  case Type::BareName:
830
851
  case Type::Comma:
831
852
  case Type::Constant:
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class NatalieParser
4
- VERSION = '2.1.0'
4
+ VERSION = '2.3.0'
5
5
  end
@@ -22,7 +22,7 @@ Token InterpolatedStringLexer::build_next_token() {
22
22
  Token InterpolatedStringLexer::consume_string() {
23
23
  SharedPtr<String> buf = new String;
24
24
  while (auto c = current_char()) {
25
- if (c == '\\') {
25
+ if (c == '\\' && m_stop_char != '\\') {
26
26
  advance(); // backslash
27
27
  auto result = consume_escaped_byte(*buf);
28
28
  if (!result.first)
@@ -38,7 +38,7 @@ Token RegexpLexer::build_next_token() {
38
38
  Token RegexpLexer::consume_regexp() {
39
39
  SharedPtr<String> buf = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  switch (c) {
44
44
  case '/':
@@ -38,7 +38,7 @@ Token WordArrayLexer::build_next_token() {
38
38
  Token WordArrayLexer::consume_array() {
39
39
  m_buffer = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  advance();
44
44
  if (c == ' ') {