natalie_parser 2.1.0 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0598d42a8e0b83aa5a340d1615474a4c5a8e7b209032bf574a230778e4d3ce5e'
4
- data.tar.gz: 475cce9e2f078ab440674b48e3b99ba48f163f34b889b376cf681f74a489ce53
3
+ metadata.gz: 2de813d6f0cb6ab94d5a6bb2dd64dc8be719567d401f19ea2b6ac9e71ebaed65
4
+ data.tar.gz: 668f9574d0968781f077e407f6476f2b320195964f8c10342ab2761beb3f7587
5
5
  SHA512:
6
- metadata.gz: 59cd20f85484845036f934ca5b40c020ad35655824950c036a697bf0bc437d3ebd35eb25bacc7c6f062d096c01b58dea5a1ebd688d0d101192ee7f43b9881b25
7
- data.tar.gz: 53bcc9b34ab02943dda1602141ae949e4fa1dcf1c229dfeda8ef710b01d1d9d9c97137ce7e5b08e88b121025009c9a9fd71edbc77eead37f69f37b794fab02f8
6
+ metadata.gz: 02da9b2afc4bc76c5b76b03abb48a40bb9242e2884146bedd61b7b944f358401655aeb077bf7900eaf6c3936c3d06000115d20200b378d17bf98cc18ffd28174
7
+ data.tar.gz: 5e42458d1ab43bce763d3b929b095f9df5ab620e0c9ba25caac5abb8bd44673ded90bcbcd943b94ffe6cadf65d28f5bfebb54bb50942909c4055feb5c613cb2f
data/CHANGELOG.md CHANGED
@@ -1,5 +1,47 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.3.0 (2022-12-31)
4
+
5
+ - FIX: Add for loop variables to local scope
6
+ - FIX: Add lexer support for leading-zero (bare) octal - e.g. 0777
7
+ - FIX: Allow non-local variable in rescue
8
+ - FIX: Allow trailing comma after for loop variables
9
+ - FIX: Fix for-loop with attr assignment
10
+ - FIX: Fix lexing of symbol keys with trailing punctuation
11
+ - FIX: Fix precedence of 'do' keyword in use with 'for'
12
+ - FIX: Parse for loop with splat assignment
13
+ - FIX: Use proper make on OpenBSD
14
+
15
+ ## 2.2.0 (2022-10-24)
16
+
17
+ - FIX: Allow backreference as first arg of implicit call
18
+ - FIX: Exclude `?` and `!` from class/instance/global variable name
19
+ - FIX: Fix parsing of hashes containing assignments
20
+ - FIX: Parse aliases of unary operators and methods named as keywords
21
+ - FIX: Parse alias of keyword-like method to assignment method
22
+ - FIX: Parse alias/undef of assignment methods
23
+ - FIX: Parse backtick as method name
24
+ - FIX: Parse class/module definition in dynamic class/module
25
+ - FIX: Parse `def !`
26
+ - FIX: Parse `def foo::bar`
27
+ - FIX: Parse `foo(<<FOO)`
28
+ - FIX: Parse `for ... do` and `while ... do`
29
+ - FIX: Parse global variables beginning with `$_`
30
+ - FIX: Parse keywords/operators as method names for `::`
31
+ - FIX: Parse __LINE__ constant
32
+ - FIX: Parse more percent strings
33
+ - FIX: Parse more regexes
34
+ - FIX: Parse more stabby procs without parentheses
35
+ - FIX: Parse multiple parameters set to the same default value
36
+ - FIX: Parse parentheses-less stabby proc with keyword arg
37
+ - FIX: Parse undef of methods with special names
38
+ - FIX: Parse `unless ... then`
39
+ - FIX: Parse variable names starting with extended ASCII characters
40
+ - FIX: Raise SyntaxError for `def ====`
41
+ - FIX: Raise syntax error for `def +@.foo`
42
+ - FIX: Tokenize unterminated ternary at EOF
43
+ - FIX: Use lower precedence for block pass
44
+
3
45
  ## 2.1.0 (2022-08-12)
4
46
 
5
47
  - FEAT: Parse for loops
data/Dockerfile CHANGED
@@ -8,7 +8,7 @@ ENV LC_ALL C.UTF-8
8
8
 
9
9
  WORKDIR natalie_parser
10
10
 
11
- COPY Gemfile /natalie_parser/
11
+ COPY Gemfile /natalie_parser/
12
12
  RUN bundle install
13
13
 
14
14
  ARG CC=gcc
data/Gemfile CHANGED
@@ -6,5 +6,5 @@ source 'https://rubygems.org'
6
6
  gem 'minitest'
7
7
  gem 'minitest-focus'
8
8
  gem 'minitest-reporters'
9
- gem 'ruby_parser'
9
+ gem 'ruby_parser', '3.19.1'
10
10
  gem 'rake'
data/README.md CHANGED
@@ -1,5 +1,6 @@
1
1
  # Natalie Parser
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/natalie_parser.svg)](https://badge.fury.io/rb/natalie_parser)
3
4
  [![github build status](https://github.com/natalie-lang/natalie_parser/actions/workflows/build.yml/badge.svg)](https://github.com/natalie-lang/natalie_parser/actions?query=workflow%3ABuild+branch%3Amaster)
4
5
  [![MIT License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/natalie-lang/natalie_parser/blob/master/LICENSE)
5
6
 
data/Rakefile CHANGED
@@ -185,7 +185,8 @@ file "ext/natalie_parser/natalie_parser.#{so_ext}" => [
185
185
  else
186
186
  cxx_hacky = cxx
187
187
  end
188
- sh "CC=#{cc.inspect} CXX=#{cxx_hacky.inspect} make -C #{build_dir} -j -e V=1 2>&1 | tee #{log_file}"
188
+ make = system('which gmake 2>&1 > /dev/null') ? 'gmake' : 'make'
189
+ sh "CC=#{cc.inspect} CXX=#{cxx_hacky.inspect} #{make} -C #{build_dir} -j -e V=1 2>&1 | tee #{log_file}"
189
190
  end
190
191
 
191
192
  file 'build/fragments.hpp' => ['test/parser_test.rb', 'test/support/extract_parser_test_fragments.rb'] do
@@ -25,7 +25,7 @@ public:
25
25
 
26
26
  virtual void reset_sexp() override {
27
27
  m_sexp = rb_class_new_instance(0, nullptr, Sexp);
28
- rb_ivar_set(m_sexp, rb_intern("@file"), get_file_string(file()));
28
+ rb_ivar_set(m_sexp, rb_intern("@file"), get_file_string(*file()));
29
29
  rb_ivar_set(m_sexp, rb_intern("@line"), rb_int_new(line() + 1));
30
30
  rb_ivar_set(m_sexp, rb_intern("@column"), rb_int_new(column() + 1));
31
31
  }
@@ -139,13 +139,13 @@ public:
139
139
  private:
140
140
  VALUE m_sexp { Qnil };
141
141
 
142
- static VALUE get_file_string(SharedPtr<const String> file) {
143
- auto file_string = s_file_cache.get(*file);
142
+ static VALUE get_file_string(const String &file) {
143
+ auto file_string = s_file_cache.get(file);
144
144
  if (!file_string) {
145
- file_string = rb_str_new(file->c_str(), file->length());
145
+ file_string = rb_str_new(file.c_str(), file.length());
146
146
  // FIXME: Seems there is no way to un-register and object. :-(
147
147
  rb_gc_register_mark_object(file_string);
148
- s_file_cache.put(*file, file_string);
148
+ s_file_cache.put(file, file_string);
149
149
  }
150
150
  return file_string;
151
151
  }
@@ -27,9 +27,9 @@ VALUE initialize(int argc, VALUE *argv, VALUE self) {
27
27
  return self;
28
28
  }
29
29
 
30
- VALUE node_to_ruby(TM::SharedPtr<NatalieParser::Node> node) {
31
- NatalieParser::MRICreator creator { node.ref() };
32
- node->transform(&creator);
30
+ VALUE node_to_ruby(const NatalieParser::Node &node) {
31
+ NatalieParser::MRICreator creator { node };
32
+ node.transform(&creator);
33
33
  return creator.sexp();
34
34
  }
35
35
 
@@ -41,7 +41,7 @@ VALUE parse_on_instance(VALUE self) {
41
41
  auto parser = NatalieParser::Parser { code_string, path_string };
42
42
  try {
43
43
  auto tree = parser.tree();
44
- VALUE ast = node_to_ruby(tree);
44
+ VALUE ast = node_to_ruby(*tree);
45
45
  return ast;
46
46
  } catch (NatalieParser::Parser::SyntaxError &error) {
47
47
  rb_raise(rb_eSyntaxError, "%s", error.message());
@@ -80,6 +80,7 @@ VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
80
80
  case NatalieParser::Token::Type::Constant:
81
81
  case NatalieParser::Token::Type::GlobalVariable:
82
82
  case NatalieParser::Token::Type::InstanceVariable:
83
+ case NatalieParser::Token::Type::OperatorName:
83
84
  case NatalieParser::Token::Type::Symbol:
84
85
  case NatalieParser::Token::Type::SymbolKey: {
85
86
  auto literal = token.literal_string();
@@ -70,9 +70,9 @@ protected:
70
70
  virtual bool skip_whitespace();
71
71
  virtual Token build_next_token();
72
72
  Token consume_symbol();
73
+ SharedPtr<String> consume_word();
73
74
  Token consume_word(Token::Type type);
74
- Token consume_bare_name();
75
- Token consume_constant();
75
+ Token consume_bare_name_or_constant(Token::Type type);
76
76
  Token consume_global_variable();
77
77
  Token consume_heredoc();
78
78
  Token consume_numeric();
@@ -80,11 +80,19 @@ protected:
80
80
  Token consume_nth_ref();
81
81
  long long consume_hex_number(int max_length = 0, bool allow_underscore = false);
82
82
  long long consume_octal_number(int max_length = 0, bool allow_underscore = false);
83
- Token consume_double_quoted_string(char, char, Token::Type begin_type = Token::Type::InterpolatedStringBegin, Token::Type end_type = Token::Type::InterpolatedStringEnd);
83
+ Token consume_double_quoted_string(char, char, Token::Type begin_type, Token::Type end_type);
84
84
  Token consume_single_quoted_string(char, char);
85
85
  Token consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type);
86
86
  Token consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type);
87
87
  Token consume_regexp(char start_char, char stop_char);
88
+ Token consume_percent_symbol(char start_char, char stop_char);
89
+ Token consume_interpolated_string(char start_char, char stop_char);
90
+ Token consume_interpolated_shell(char start_char, char stop_char);
91
+ Token consume_percent_lower_w(char start_char, char stop_char);
92
+ Token consume_percent_upper_w(char start_char, char stop_char);
93
+ Token consume_percent_lower_i(char start_char, char stop_char);
94
+ Token consume_percent_upper_i(char start_char, char stop_char);
95
+ Token consume_percent_string(Token (Lexer::*consumer)(char start_char, char stop_char), bool is_lettered = true);
88
96
  SharedPtr<String> consume_non_whitespace();
89
97
 
90
98
  void utf32_codepoint_to_utf8(String &buf, long long codepoint);
@@ -95,7 +103,7 @@ protected:
95
103
  bool token_is_first_on_line() const;
96
104
 
97
105
  bool char_can_be_string_or_regexp_delimiter(char c) const {
98
- return (c >= '!' && c <= '/') || c == ':' || c == '?' || c == '@' || c == '~' || c == '|' || (c >= '^' && c <= '`');
106
+ return (c >= '!' && c <= '/') || c == ':' || c == ';' || c == '=' || c == '?' || c == '@' || c == '\\' || c == '~' || c == '|' || (c >= '^' && c <= '`');
99
107
  }
100
108
 
101
109
  SharedPtr<String> m_input;
@@ -131,5 +139,10 @@ protected:
131
139
  // then increment m_pair_depth
132
140
  char m_start_char { 0 };
133
141
  int m_pair_depth { 0 };
142
+
143
+ size_t m_remaining_method_names { 0 };
144
+ bool m_allow_assignment_method { false };
145
+ Token::Type m_method_name_separator { Token::Type::Invalid };
146
+ Token m_last_method_name {};
134
147
  };
135
148
  }
@@ -1,7 +1,6 @@
1
1
  #pragma once
2
2
 
3
3
  #include "natalie_parser/node/block_node.hpp"
4
- #include "natalie_parser/node/identifier_node.hpp"
5
4
  #include "natalie_parser/node/node.hpp"
6
5
  #include "natalie_parser/node/node_with_args.hpp"
7
6
  #include "tm/hashmap.hpp"
@@ -23,24 +22,24 @@ public:
23
22
  m_exceptions.push(node);
24
23
  }
25
24
 
26
- void set_exception_name(SharedPtr<IdentifierNode> name) {
25
+ void set_exception_name(SharedPtr<Node> name) {
27
26
  m_name = name;
28
27
  }
29
28
 
30
29
  void set_body(SharedPtr<BlockNode> body) { m_body = body; }
31
30
 
32
- SharedPtr<Node> name_to_node() const;
31
+ SharedPtr<Node> name_to_assignment() const;
33
32
 
34
33
  bool has_name() const { return m_name; }
35
34
 
36
- const SharedPtr<IdentifierNode> name() const { return m_name; }
35
+ const SharedPtr<Node> name() const { return m_name; }
37
36
  const Vector<SharedPtr<Node>> &exceptions() const { return m_exceptions; }
38
37
  const SharedPtr<BlockNode> body() const { return m_body; }
39
38
 
40
39
  virtual void transform(Creator *creator) const override;
41
40
 
42
41
  protected:
43
- SharedPtr<IdentifierNode> m_name {};
42
+ SharedPtr<Node> m_name {};
44
43
  Vector<SharedPtr<Node>> m_exceptions {};
45
44
  SharedPtr<BlockNode> m_body {};
46
45
  };
@@ -25,22 +25,7 @@ public:
25
25
  const SharedPtr<Node> vars() const { return m_vars; }
26
26
  const SharedPtr<BlockNode> body() const { return m_body; }
27
27
 
28
- virtual void transform(Creator *creator) const override {
29
- creator->set_type("for");
30
- creator->append(m_expr);
31
- switch (m_vars->type()) {
32
- case Node::Type::Identifier:
33
- creator->with_assignment(true, [&]() { creator->append(*m_vars); });
34
- break;
35
- case Node::Type::MultipleAssignment:
36
- creator->append(m_vars);
37
- break;
38
- default:
39
- TM_UNREACHABLE();
40
- }
41
- if (!m_body->is_empty())
42
- creator->append(m_body->without_unnecessary_nesting());
43
- }
28
+ virtual void transform(Creator *creator) const override;
44
29
 
45
30
  protected:
46
31
  SharedPtr<Node> m_expr {};
@@ -49,16 +49,22 @@ public:
49
49
 
50
50
  enum class Precedence;
51
51
 
52
+ enum class IterAllow {
53
+ NONE,
54
+ CURLY_ONLY,
55
+ CURLY_AND_BLOCK,
56
+ };
57
+
52
58
  SharedPtr<Node> tree();
53
59
 
54
60
  private:
55
- bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence);
61
+ bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence, IterAllow iter_allow);
56
62
 
57
63
  Precedence get_precedence(Token &token, SharedPtr<Node> left = {});
58
64
 
59
65
  bool is_first_arg_of_call_without_parens(SharedPtr<Node>, Token &);
60
66
 
61
- SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &);
67
+ SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &, IterAllow = IterAllow::CURLY_AND_BLOCK);
62
68
 
63
69
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, std::function<bool(Token::Type)>, bool = false);
64
70
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, Token::Type = Token::Type::EndKeyword, bool = false);
@@ -67,8 +73,9 @@ private:
67
73
  SharedPtr<Node> parse_if_body(LocalsHashmap &);
68
74
  SharedPtr<BlockNode> parse_def_body(LocalsHashmap &);
69
75
 
76
+ void reinsert_collapsed_newline();
70
77
  SharedPtr<Node> parse_alias(LocalsHashmap &);
71
- SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *, bool);
78
+ SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *);
72
79
  SharedPtr<Node> parse_array(LocalsHashmap &);
73
80
  SharedPtr<Node> parse_back_ref(LocalsHashmap &);
74
81
  SharedPtr<Node> parse_begin_block(LocalsHashmap &);
@@ -96,7 +103,8 @@ private:
96
103
  Method,
97
104
  Proc,
98
105
  };
99
- void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext);
106
+ void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext, IterAllow = IterAllow::CURLY_AND_BLOCK);
107
+ SharedPtr<Node> parse_arg_default_value(LocalsHashmap &, IterAllow);
100
108
 
101
109
  SharedPtr<Node> parse_encoding(LocalsHashmap &);
102
110
  SharedPtr<Node> parse_end_block(LocalsHashmap &);
@@ -115,6 +123,7 @@ private:
115
123
  SharedPtr<Node> parse_interpolated_shell(LocalsHashmap &);
116
124
  SharedPtr<Node> parse_interpolated_string(LocalsHashmap &);
117
125
  SharedPtr<Node> parse_interpolated_symbol(LocalsHashmap &);
126
+ SharedPtr<Node> parse_line_constant(LocalsHashmap &);
118
127
  SharedPtr<Node> parse_lit(LocalsHashmap &);
119
128
  SharedPtr<Node> parse_keyword_splat(LocalsHashmap &);
120
129
  SharedPtr<Node> parse_keyword_splat_wrapped_in_hash(LocalsHashmap &);
@@ -124,7 +133,7 @@ private:
124
133
  SharedPtr<Node> parse_nil(LocalsHashmap &);
125
134
  SharedPtr<Node> parse_not(LocalsHashmap &);
126
135
  SharedPtr<Node> parse_nth_ref(LocalsHashmap &);
127
- void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &);
136
+ void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &, IterAllow);
128
137
  SharedPtr<Node> parse_redo(LocalsHashmap &);
129
138
  SharedPtr<Node> parse_retry(LocalsHashmap &);
130
139
  SharedPtr<Node> parse_return(LocalsHashmap &);
@@ -152,6 +161,7 @@ private:
152
161
  SharedPtr<Node> parse_assignment_expression(SharedPtr<Node>, LocalsHashmap &);
153
162
  SharedPtr<Node> parse_assignment_expression_without_multiple_values(SharedPtr<Node>, LocalsHashmap &);
154
163
  SharedPtr<Node> parse_assignment_expression(SharedPtr<Node>, LocalsHashmap &, bool);
164
+ void add_assignment_locals(SharedPtr<Node>, LocalsHashmap &);
155
165
  SharedPtr<Node> parse_assignment_expression_value(bool, LocalsHashmap &, bool);
156
166
  SharedPtr<Node> parse_assignment_identifier(bool, LocalsHashmap &);
157
167
  SharedPtr<Node> parse_call_expression_without_parens(SharedPtr<Node>, LocalsHashmap &);
@@ -103,6 +103,7 @@ public:
103
103
  NotKeyword,
104
104
  NotMatch,
105
105
  NthRef,
106
+ OperatorName,
106
107
  OrKeyword,
107
108
  Percent,
108
109
  PercentEqual,
@@ -431,6 +432,8 @@ public:
431
432
  return "!";
432
433
  case Type::NthRef:
433
434
  return "nth_ref";
435
+ case Type::OperatorName:
436
+ return "operator";
434
437
  case Type::OrKeyword:
435
438
  return "or";
436
439
  case Type::Percent:
@@ -614,6 +617,7 @@ public:
614
617
  case Token::Type::LessThanOrEqual:
615
618
  case Token::Type::Match:
616
619
  case Token::Type::Minus:
620
+ case Token::Type::Not:
617
621
  case Token::Type::NotEqual:
618
622
  case Token::Type::NotMatch:
619
623
  case Token::Type::Percent:
@@ -635,6 +639,7 @@ public:
635
639
  bool is_closing_token() const { return m_type == Type::RBracket || m_type == Type::RCurlyBrace || m_type == Type::RParen; }
636
640
  bool is_comma() const { return m_type == Type::Comma; }
637
641
  bool is_comment() const { return m_type == Type::Comment; }
642
+ bool is_constant_resolution() const { return m_type == Type::ConstantResolution; }
638
643
  bool is_def_keyword() const { return m_type == Type::DefKeyword; }
639
644
  bool is_doc() const { return m_type == Type::Doc; }
640
645
  bool is_dot() const { return m_type == Type::Dot; }
@@ -743,9 +748,23 @@ public:
743
748
  }
744
749
  }
745
750
 
751
+ bool can_be_first_arg_of_def() const {
752
+ switch (m_type) {
753
+ case Token::Type::Ampersand:
754
+ case Token::Type::BareName:
755
+ case Token::Type::Star:
756
+ case Token::Type::StarStar:
757
+ case Token::Type::SymbolKey:
758
+ return true;
759
+ default:
760
+ return false;
761
+ }
762
+ }
763
+
746
764
  bool can_be_first_arg_of_implicit_call() const {
747
765
  switch (m_type) {
748
766
  case Token::Type::Arrow:
767
+ case Token::Type::BackRef:
749
768
  case Token::Type::BareName:
750
769
  case Token::Type::BeginKeyword:
751
770
  case Token::Type::Bignum:
@@ -800,6 +819,7 @@ public:
800
819
  switch (m_type) {
801
820
  case Token::Type::Equal:
802
821
  case Token::Type::LBracket:
822
+ case Token::Type::LParen:
803
823
  return true;
804
824
  default:
805
825
  if (is_operator())
@@ -826,6 +846,7 @@ public:
826
846
 
827
847
  bool can_precede_symbol_key() const {
828
848
  switch (m_type) {
849
+ case Type::Arrow:
829
850
  case Type::BareName:
830
851
  case Type::Comma:
831
852
  case Type::Constant:
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class NatalieParser
4
- VERSION = '2.1.0'
4
+ VERSION = '2.3.0'
5
5
  end
@@ -22,7 +22,7 @@ Token InterpolatedStringLexer::build_next_token() {
22
22
  Token InterpolatedStringLexer::consume_string() {
23
23
  SharedPtr<String> buf = new String;
24
24
  while (auto c = current_char()) {
25
- if (c == '\\') {
25
+ if (c == '\\' && m_stop_char != '\\') {
26
26
  advance(); // backslash
27
27
  auto result = consume_escaped_byte(*buf);
28
28
  if (!result.first)
@@ -38,7 +38,7 @@ Token RegexpLexer::build_next_token() {
38
38
  Token RegexpLexer::consume_regexp() {
39
39
  SharedPtr<String> buf = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  switch (c) {
44
44
  case '/':
@@ -38,7 +38,7 @@ Token WordArrayLexer::build_next_token() {
38
38
  Token WordArrayLexer::consume_array() {
39
39
  m_buffer = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  advance();
44
44
  if (c == ' ') {