regexp_parser 0.4.6 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -448,7 +448,7 @@
448
448
  };
449
449
 
450
450
  alternation {
451
- if in_conditional and conditional_stack.length > 0 and
451
+ if in_conditional and conditional_stack.length > 0 and
452
452
  conditional_stack.last[1] == @group_depth
453
453
  emit(:conditional, :separator, *text(data, ts, te))
454
454
  else
@@ -538,7 +538,7 @@
538
538
 
539
539
  # (?#...) comments: parsed as a single expression, without introducing a
540
540
  # new nesting level. Comments may not include parentheses, escaped or not.
541
- # special case for close, action performed on all transitions to get the
541
+ # special case for close, action performed on all transitions to get the
542
542
  # correct closing count.
543
543
  # ------------------------------------------------------------------------
544
544
  group_open . group_comment $group_closed {
@@ -622,14 +622,10 @@
622
622
  end
623
623
  else
624
624
  if @spacing_stack.length > 1 and
625
- @spacing_stack.last[1] == (@group_depth + 1)
625
+ @spacing_stack.last[:depth] == (@group_depth + 1)
626
626
  @spacing_stack.pop
627
627
 
628
- @free_spacing = @spacing_stack.last[0]
629
-
630
- if @spacing_stack.length == 1
631
- @in_options = false
632
- end
628
+ @free_spacing = @spacing_stack.last[:free_spacing]
633
629
  end
634
630
 
635
631
  emit(:group, :close, *text(data, ts, te))
@@ -777,9 +773,7 @@
777
773
  # THIS IS A GENERATED FILE, DO NOT EDIT DIRECTLY
778
774
  # This file was generated from lib/regexp_parser/scanner/scanner.rl
779
775
 
780
- module Regexp::Scanner
781
- %% write data;
782
-
776
+ class Regexp::Scanner
783
777
  # General scanner error (catch all)
784
778
  class ScannerError < StandardError; end
785
779
 
@@ -839,6 +833,10 @@ module Regexp::Scanner
839
833
  # This method may raise errors if a syntax error is encountered.
840
834
  # --------------------------------------------------------------------------
841
835
  def self.scan(input_object, &block)
836
+ new.scan(input_object, &block)
837
+ end
838
+
839
+ def scan(input_object, &block)
842
840
  @literal, top, stack = nil, 0, []
843
841
 
844
842
  if input_object.is_a?(Regexp)
@@ -857,11 +855,12 @@ module Regexp::Scanner
857
855
  @block = block_given? ? block : nil
858
856
 
859
857
  @in_group, @group_depth = false, 0
860
- @in_options, @spacing_stack = false, [[@free_spacing, 0]]
858
+ @spacing_stack = [{:free_spacing => @free_spacing, :depth => 0}]
861
859
 
862
860
  in_set, set_depth, set_type = false, 0, :set
863
861
  in_conditional, conditional_depth, conditional_stack = false, 0, []
864
862
 
863
+ %% write data;
865
864
  %% write init;
866
865
  %% write exec;
867
866
 
@@ -881,12 +880,25 @@ module Regexp::Scanner
881
880
  @tokens
882
881
  end
883
882
 
883
+ # Emits an array with the details of the scanned pattern
884
+ def emit(type, token, text, ts, te)
885
+ #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}"
886
+
887
+ emit_literal if @literal
888
+
889
+ if @block
890
+ @block.call type, token, text, ts, te
891
+ end
892
+
893
+ @tokens << [type, token, text, ts, te]
894
+ end
895
+
884
896
  private
885
897
 
886
898
  # Ragel's regex-based scan of the group options introduced a lot of
887
899
  # ambiguity, so we just ask it to find the beginning of what looks
888
900
  # like an options run and handle the rest in here.
889
- def self.scan_options(p, data, ts, te)
901
+ def scan_options(p, data, ts, te)
890
902
  text = text(data, ts, te).first
891
903
 
892
904
  options_char, options_length = true, 0
@@ -938,26 +950,26 @@ module Regexp::Scanner
938
950
  end
939
951
 
940
952
  # Copy from ts to te from data as text
941
- def self.copy(data, range)
953
+ def copy(data, range)
942
954
  data[range].pack('c*')
943
955
  end
944
956
 
945
957
  # Copy from ts to te from data as text, returning an array with the text
946
958
  # and the offsets used to copy it.
947
- def self.text(data, ts, te, soff = 0)
959
+ def text(data, ts, te, soff = 0)
948
960
  [copy(data, ts-soff..te-1), ts-soff, te]
949
961
  end
950
962
 
951
963
  # Appends one or more characters to the literal buffer, to be emitted later
952
964
  # by a call to emit_literal. Contents can be a mix of ASCII and UTF-8.
953
- def self.append_literal(data, ts, te)
965
+ def append_literal(data, ts, te)
954
966
  @literal ||= []
955
967
  @literal << text(data, ts, te)
956
968
  end
957
969
 
958
970
  # Emits the literal run collected by calls to the append_literal method,
959
971
  # using the total start (ts) and end (te) offsets of the run.
960
- def self.emit_literal
972
+ def emit_literal
961
973
  ts, te = @literal.first[1], @literal.last[2]
962
974
  text = @literal.map {|t| t[0]}.join
963
975
 
@@ -967,43 +979,34 @@ module Regexp::Scanner
967
979
  emit(:literal, :literal, text, ts, te)
968
980
  end
969
981
 
970
- def self.emit_options(text, ts, te)
971
- if text =~ /\(\?([mixdau]+)?-?([mix]+)?:/
972
- positive, negative = $1, $2
982
+ def emit_options(text, ts, te)
983
+ if text =~ /\(\?([mixdau]*)-?([mix]*)(:)?/
984
+ positive, negative, group_local = $1, $2, $3
973
985
 
974
- if positive =~ /x/
986
+ if positive.include?('x')
975
987
  @free_spacing = true
976
988
  end
977
989
 
978
990
  # If the x appears in both, treat it like ruby does, the second cancels
979
991
  # the first.
980
- if negative =~ /x/
992
+ if negative.include?('x')
981
993
  @free_spacing = false
982
994
  end
983
- end
984
-
985
- @in_options = true
986
- @spacing_stack << [@free_spacing, @group_depth]
987
-
988
- emit(:group, :options, text, ts, te)
989
- end
990
-
991
- # Emits an array with the details of the scanned pattern
992
- def self.emit(type, token, text, ts, te)
993
- #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}"
994
995
 
995
- emit_literal if @literal
996
-
997
- if @block
998
- @block.call type, token, text, ts, te
996
+ if group_local
997
+ @spacing_stack << {:free_spacing => @free_spacing, :depth => @group_depth}
998
+ else
999
+ # switch for parent group level
1000
+ @spacing_stack.last[:free_spacing] = @free_spacing
1001
+ end
999
1002
  end
1000
1003
 
1001
- @tokens << [type, token, text, ts, te]
1004
+ emit(:group, :options, text, ts, te)
1002
1005
  end
1003
1006
 
1004
1007
  # Centralizes and unifies the handling of validation related
1005
1008
  # errors.
1006
- def self.validation_error(type, what, reason)
1009
+ def validation_error(type, what, reason)
1007
1010
  case type
1008
1011
  when :group
1009
1012
  error = InvalidGroupError.new(what, reason)
@@ -1019,12 +1022,12 @@ module Regexp::Scanner
1019
1022
  end
1020
1023
 
1021
1024
  # Used for references with an empty name or number
1022
- def self.empty_backref_error(type, what)
1025
+ def empty_backref_error(type, what)
1023
1026
  validation_error(:backref, what, 'ref ID is empty')
1024
1027
  end
1025
1028
 
1026
1029
  # Used for named expressions with an empty name
1027
- def self.empty_name_error(type, what)
1030
+ def empty_name_error(type, what)
1028
1031
  validation_error(type, what, 'name is empty')
1029
1032
  end
1030
1033
 
@@ -3,7 +3,7 @@ module Regexp::Syntax
3
3
 
4
4
  module Backreference
5
5
  Name = [:name_ref]
6
- Number = [:number_ref, :number_rel_ref]
6
+ Number = [:number, :number_ref, :number_rel_ref]
7
7
 
8
8
  NestLevel = [:name_nest_ref, :number_nest_ref]
9
9
 
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
- module Parser
3
- VERSION = '0.4.6'
2
+ class Parser
3
+ VERSION = '0.4.7'
4
4
  end
5
5
  end
@@ -14,7 +14,7 @@ class ExpressionToH < Test::Unit::TestCase
14
14
  :starts_at => 0,
15
15
  :length => 3,
16
16
  :quantifier => nil,
17
- :options => nil,
17
+ :options => {},
18
18
  :level => nil,
19
19
  :set_level => nil,
20
20
  :conditional_level => nil,
@@ -26,7 +26,7 @@ class ExpressionToH < Test::Unit::TestCase
26
26
  :starts_at => 0,
27
27
  :length => 3,
28
28
  :quantifier => nil,
29
- :options => nil,
29
+ :options => {},
30
30
  :level => 0,
31
31
  :set_level => 0,
32
32
  :conditional_level => 0
@@ -3,6 +3,9 @@ require File.expand_path("../../helpers", __FILE__)
3
3
  class LexerRefCalls < Test::Unit::TestCase
4
4
 
5
5
  tests = {
6
+ # Traditional numerical group back-reference
7
+ '(abc)\1' => [3, :backref, :number, '\1', 5, 7, 0, 0, 0],
8
+
6
9
  # Group back-references, named, numbered, and relative
7
10
  '(?<X>abc)\k<X>' => [3, :backref, :name_ref, '\k<X>', 9, 14, 0, 0, 0],
8
11
  "(?<X>abc)\\k'X'" => [3, :backref, :name_ref, "\\k'X'", 9, 14, 0, 0, 0],
@@ -1,58 +1,62 @@
1
1
  require File.expand_path("../../helpers", __FILE__)
2
2
 
3
3
  class ParserErrors < Test::Unit::TestCase
4
+ def setup
5
+ @rp = Regexp::Parser.new
6
+ @rp.parse(/foo/)
7
+ end
4
8
 
5
9
  def test_parser_unknown_token_type
6
10
  assert_raise( Regexp::Parser::UnknownTokenTypeError ) {
7
- RP.parse_token(Regexp::Token.new(:foo, :bar))
11
+ @rp.__send__(:parse_token, Regexp::Token.new(:foo, :bar))
8
12
  }
9
13
  end
10
14
 
11
15
  def test_parser_unknown_set_token
12
16
  assert_raise( Regexp::Parser::UnknownTokenError ) {
13
- RP.parse_token(Regexp::Token.new(:set, :foo))
17
+ @rp.__send__(:parse_token, Regexp::Token.new(:set, :foo))
14
18
  }
15
19
  end
16
20
 
17
21
  def test_parser_unknown_meta_token
18
22
  assert_raise( Regexp::Parser::UnknownTokenError ) {
19
- RP.parse_token(Regexp::Token.new(:meta, :foo))
23
+ @rp.__send__(:parse_token, Regexp::Token.new(:meta, :foo))
20
24
  }
21
25
  end
22
26
 
23
27
  def test_parser_unknown_character_type_token
24
28
  assert_raise( Regexp::Parser::UnknownTokenError ) {
25
- RP.parse_token(Regexp::Token.new(:type, :foo))
29
+ @rp.__send__(:parse_token, Regexp::Token.new(:type, :foo))
26
30
  }
27
31
  end
28
32
 
29
33
  def test_parser_unknown_unicode_property_token
30
34
  assert_raise( Regexp::Parser::UnknownTokenError ) {
31
- RP.parse_token(Regexp::Token.new(:property, :foo))
35
+ @rp.__send__(:parse_token, Regexp::Token.new(:property, :foo))
32
36
  }
33
37
  end
34
38
 
35
39
  def test_parser_unknown_unicode_nonproperty_token
36
40
  assert_raise( Regexp::Parser::UnknownTokenError ) {
37
- RP.parse_token(Regexp::Token.new(:nonproperty, :foo))
41
+ @rp.__send__(:parse_token, Regexp::Token.new(:nonproperty, :foo))
38
42
  }
39
43
  end
40
44
 
41
45
  def test_parser_unknown_anchor_token
42
46
  assert_raise( Regexp::Parser::UnknownTokenError ) {
43
- RP.parse_token(Regexp::Token.new(:anchor, :foo))
47
+ @rp.__send__(:parse_token, Regexp::Token.new(:anchor, :foo))
44
48
  }
45
49
  end
46
50
 
47
51
  def test_parser_unknown_quantifier_token
48
52
  assert_raise( Regexp::Parser::UnknownTokenError ) {
49
- RP.parse_token(Regexp::Token.new(:quantifier, :foo))
53
+ @rp.__send__(:parse_token, Regexp::Token.new(:quantifier, :foo))
50
54
  }
51
55
  end
52
56
 
53
57
  def test_parser_unknown_group_open_token
54
58
  assert_raise( Regexp::Parser::UnknownTokenError ) {
55
- RP.parse_token(Regexp::Token.new(:group, :foo))
59
+ @rp.__send__(:parse_token, Regexp::Token.new(:group, :foo))
56
60
  }
57
61
  end
58
62
 
@@ -3,44 +3,170 @@ require File.expand_path("../../helpers", __FILE__)
3
3
  class TestParserGroups < Test::Unit::TestCase
4
4
 
5
5
  def test_parse_root_options_mi
6
- t = RP.parse((/[abc]/mi).to_s, 'ruby/1.8')
6
+ t = RP.parse(/[abc]/mi, 'ruby/1.8')
7
7
 
8
8
  assert_equal true, t.m?
9
9
  assert_equal true, t.i?
10
10
  assert_equal false, t.x?
11
11
  end
12
12
 
13
- def test_parse_nested_options_m
14
- t = RP.parse('(?xi-m:a(?m-ix:b))', 'ruby/1.8')
13
+ def test_parse_option_group
14
+ t = RP.parse(/(?m:a)/, 'ruby/1.8')
15
+
16
+ assert_equal Group::Options, t.expressions[0].class
17
+ assert_equal :options, t.expressions[0].token
18
+
19
+ assert_equal true, t.expressions[0].m?
20
+ assert_equal false, t.expressions[0].i?
21
+ assert_equal false, t.expressions[0].x?
22
+ end
23
+
24
+ def test_parse_self_defeating_option_group
25
+ t = RP.parse(/(?m-m:a)/, 'ruby/1.8')
26
+
27
+ assert_equal false, t.expressions[0].m?
28
+ assert_equal false, t.expressions[0].i?
29
+ assert_equal false, t.expressions[0].x?
30
+ end
31
+
32
+ def test_parse_nested_options_activate_one
33
+ t = RP.parse(/(?x-mi:a(?m:b))/, 'ruby/1.8')
34
+
35
+ assert_equal false, t.expressions[0].m?
36
+ assert_equal false, t.expressions[0].i?
37
+ assert_equal true, t.expressions[0].x?
15
38
 
16
39
  assert_equal true, t.expressions[0].expressions[1].m?
17
40
  assert_equal false, t.expressions[0].expressions[1].i?
18
- assert_equal false, t.expressions[0].expressions[1].x?
41
+ assert_equal true, t.expressions[0].expressions[1].x?
19
42
  end
20
43
 
21
- def test_parse_nested_options_xm
22
- t = RP.parse(/(?i-xm:a(?mx-i:b))/, 'ruby/1.8')
44
+ def test_parse_nested_options_deactivate_one
45
+ t = RP.parse(/(?ix-m:a(?-i:b))/, 'ruby/1.8')
23
46
 
24
- assert_equal true, t.expressions[0].expressions[1].m?
47
+ assert_equal false, t.expressions[0].m?
48
+ assert_equal true, t.expressions[0].i?
49
+ assert_equal true, t.expressions[0].x?
50
+
51
+ assert_equal false, t.expressions[0].expressions[1].m?
25
52
  assert_equal false, t.expressions[0].expressions[1].i?
26
53
  assert_equal true, t.expressions[0].expressions[1].x?
27
54
  end
28
55
 
29
- def test_parse_nested_options_im
30
- t = RP.parse(/(?x-mi:a(?mi-x:b))/, 'ruby/1.8')
56
+ def test_parse_nested_options_invert_all
57
+ t = RP.parse('(?xi-m:a(?m-ix:b))', 'ruby/1.8')
58
+
59
+ assert_equal false, t.expressions[0].m?
60
+ assert_equal true, t.expressions[0].i?
61
+ assert_equal true, t.expressions[0].x?
31
62
 
32
63
  assert_equal true, t.expressions[0].expressions[1].m?
33
- assert_equal true, t.expressions[0].expressions[1].i?
64
+ assert_equal false, t.expressions[0].expressions[1].i?
34
65
  assert_equal false, t.expressions[0].expressions[1].x?
35
66
  end
36
67
 
68
+ def test_parse_nested_options_affect_literal_subexpressions
69
+ t = RP.parse(/(?x-mi:a(?m:b))/, 'ruby/1.8')
70
+
71
+ # a
72
+ assert_equal false, t.expressions[0].expressions[0].m?
73
+ assert_equal false, t.expressions[0].expressions[0].i?
74
+ assert_equal true, t.expressions[0].expressions[0].x?
75
+
76
+ # b
77
+ assert_equal true, t.expressions[0].expressions[1].expressions[0].m?
78
+ assert_equal false, t.expressions[0].expressions[1].expressions[0].i?
79
+ assert_equal true, t.expressions[0].expressions[1].expressions[0].x?
80
+ end
81
+
82
+ def test_parse_option_switch_group
83
+ t = RP.parse(/a(?i-m)b/m, 'ruby/1.8')
84
+
85
+ assert_equal Group::Options, t.expressions[1].class
86
+ assert_equal :options, t.expressions[1].token
87
+ # TODO: change this ^ to :options_switch in v1.0.0
88
+
89
+ assert_equal false, t.expressions[1].m?
90
+ assert_equal true, t.expressions[1].i?
91
+ assert_equal false, t.expressions[1].x?
92
+ end
93
+
94
+ def test_parse_option_switch_affects_following_expressions
95
+ t = RP.parse(/a(?i-m)b/m, 'ruby/1.8')
96
+
97
+ # a
98
+ assert_equal true, t.expressions[0].m?
99
+ assert_equal false, t.expressions[0].i?
100
+ assert_equal false, t.expressions[0].x?
101
+
102
+ # b
103
+ assert_equal false, t.expressions[2].m?
104
+ assert_equal true, t.expressions[2].i?
105
+ assert_equal false, t.expressions[2].x?
106
+ end
107
+
108
+ def test_parse_option_switch_in_group
109
+ t = RP.parse(/(a(?i-m)b)c/m, 'ruby/1.8')
110
+
111
+ group1 = t.expressions[0]
112
+
113
+ assert_equal true, group1.m?
114
+ assert_equal false, group1.i?
115
+ assert_equal false, group1.x?
116
+
117
+ # a
118
+ assert_equal true, group1.expressions[0].m?
119
+ assert_equal false, group1.expressions[0].i?
120
+ assert_equal false, group1.expressions[0].x?
121
+
122
+ # (?i-m)
123
+ assert_equal false, group1.expressions[1].m?
124
+ assert_equal true, group1.expressions[1].i?
125
+ assert_equal false, group1.expressions[1].x?
126
+
127
+ # b
128
+ assert_equal false, group1.expressions[2].m?
129
+ assert_equal true, group1.expressions[2].i?
130
+ assert_equal false, group1.expressions[2].x?
131
+
132
+ # c
133
+ assert_equal true, t.expressions[1].m?
134
+ assert_equal false, t.expressions[1].i?
135
+ assert_equal false, t.expressions[1].x?
136
+ end
137
+
138
+ def test_parse_nested_option_switch_in_group
139
+ t = RP.parse(/((?i-m)(a(?-i)b))/m, 'ruby/1.8')
140
+
141
+ group2 = t.expressions[0].expressions[1]
142
+
143
+ assert_equal false, group2.m?
144
+ assert_equal true, group2.i?
145
+ assert_equal false, group2.x?
146
+
147
+ # a
148
+ assert_equal false, group2.expressions[0].m?
149
+ assert_equal true, group2.expressions[0].i?
150
+ assert_equal false, group2.expressions[0].x?
151
+
152
+ # (?-i)
153
+ assert_equal false, group2.expressions[1].m?
154
+ assert_equal false, group2.expressions[1].i?
155
+ assert_equal false, group2.expressions[1].x?
156
+
157
+ # b
158
+ assert_equal false, group2.expressions[2].m?
159
+ assert_equal false, group2.expressions[2].i?
160
+ assert_equal false, group2.expressions[2].x?
161
+ end
162
+
37
163
  if RUBY_VERSION >= '2.0'
38
164
  def test_parse_options_dau
39
165
  t = RP.parse('(?dua:abc)')
40
166
 
41
- assert_equal true, t.expressions[0].d?
167
+ assert_equal false, t.expressions[0].d?
42
168
  assert_equal true, t.expressions[0].a?
43
- assert_equal true, t.expressions[0].u?
169
+ assert_equal false, t.expressions[0].u?
44
170
  end
45
171
 
46
172
  def test_parse_nested_options_dau
@@ -65,12 +191,12 @@ class TestParserGroups < Test::Unit::TestCase
65
191
  assert_equal false, t.expressions[0].a?
66
192
  assert_equal false, t.expressions[0].u?
67
193
 
68
- assert_equal true, t.expressions[0].expressions[1].d?
194
+ assert_equal false, t.expressions[0].expressions[1].d?
69
195
  assert_equal true, t.expressions[0].expressions[1].a?
70
196
  assert_equal false, t.expressions[0].expressions[1].u?
71
197
  assert_equal false, t.expressions[0].expressions[1].x?
72
198
  assert_equal false, t.expressions[0].expressions[1].m?
73
- assert_equal false, t.expressions[0].expressions[1].i?
199
+ assert_equal true, t.expressions[0].expressions[1].i?
74
200
  end
75
201
  end
76
202