regexp_parser 0.4.6 → 0.4.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -448,7 +448,7 @@
448
448
  };
449
449
 
450
450
  alternation {
451
- if in_conditional and conditional_stack.length > 0 and
451
+ if in_conditional and conditional_stack.length > 0 and
452
452
  conditional_stack.last[1] == @group_depth
453
453
  emit(:conditional, :separator, *text(data, ts, te))
454
454
  else
@@ -538,7 +538,7 @@
538
538
 
539
539
  # (?#...) comments: parsed as a single expression, without introducing a
540
540
  # new nesting level. Comments may not include parentheses, escaped or not.
541
- # special case for close, action performed on all transitions to get the
541
+ # special case for close, action performed on all transitions to get the
542
542
  # correct closing count.
543
543
  # ------------------------------------------------------------------------
544
544
  group_open . group_comment $group_closed {
@@ -622,14 +622,10 @@
622
622
  end
623
623
  else
624
624
  if @spacing_stack.length > 1 and
625
- @spacing_stack.last[1] == (@group_depth + 1)
625
+ @spacing_stack.last[:depth] == (@group_depth + 1)
626
626
  @spacing_stack.pop
627
627
 
628
- @free_spacing = @spacing_stack.last[0]
629
-
630
- if @spacing_stack.length == 1
631
- @in_options = false
632
- end
628
+ @free_spacing = @spacing_stack.last[:free_spacing]
633
629
  end
634
630
 
635
631
  emit(:group, :close, *text(data, ts, te))
@@ -777,9 +773,7 @@
777
773
  # THIS IS A GENERATED FILE, DO NOT EDIT DIRECTLY
778
774
  # This file was generated from lib/regexp_parser/scanner/scanner.rl
779
775
 
780
- module Regexp::Scanner
781
- %% write data;
782
-
776
+ class Regexp::Scanner
783
777
  # General scanner error (catch all)
784
778
  class ScannerError < StandardError; end
785
779
 
@@ -839,6 +833,10 @@ module Regexp::Scanner
839
833
  # This method may raise errors if a syntax error is encountered.
840
834
  # --------------------------------------------------------------------------
841
835
  def self.scan(input_object, &block)
836
+ new.scan(input_object, &block)
837
+ end
838
+
839
+ def scan(input_object, &block)
842
840
  @literal, top, stack = nil, 0, []
843
841
 
844
842
  if input_object.is_a?(Regexp)
@@ -857,11 +855,12 @@ module Regexp::Scanner
857
855
  @block = block_given? ? block : nil
858
856
 
859
857
  @in_group, @group_depth = false, 0
860
- @in_options, @spacing_stack = false, [[@free_spacing, 0]]
858
+ @spacing_stack = [{:free_spacing => @free_spacing, :depth => 0}]
861
859
 
862
860
  in_set, set_depth, set_type = false, 0, :set
863
861
  in_conditional, conditional_depth, conditional_stack = false, 0, []
864
862
 
863
+ %% write data;
865
864
  %% write init;
866
865
  %% write exec;
867
866
 
@@ -881,12 +880,25 @@ module Regexp::Scanner
881
880
  @tokens
882
881
  end
883
882
 
883
+ # Emits an array with the details of the scanned pattern
884
+ def emit(type, token, text, ts, te)
885
+ #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}"
886
+
887
+ emit_literal if @literal
888
+
889
+ if @block
890
+ @block.call type, token, text, ts, te
891
+ end
892
+
893
+ @tokens << [type, token, text, ts, te]
894
+ end
895
+
884
896
  private
885
897
 
886
898
  # Ragel's regex-based scan of the group options introduced a lot of
887
899
  # ambiguity, so we just ask it to find the beginning of what looks
888
900
  # like an options run and handle the rest in here.
889
- def self.scan_options(p, data, ts, te)
901
+ def scan_options(p, data, ts, te)
890
902
  text = text(data, ts, te).first
891
903
 
892
904
  options_char, options_length = true, 0
@@ -938,26 +950,26 @@ module Regexp::Scanner
938
950
  end
939
951
 
940
952
  # Copy from ts to te from data as text
941
- def self.copy(data, range)
953
+ def copy(data, range)
942
954
  data[range].pack('c*')
943
955
  end
944
956
 
945
957
  # Copy from ts to te from data as text, returning an array with the text
946
958
  # and the offsets used to copy it.
947
- def self.text(data, ts, te, soff = 0)
959
+ def text(data, ts, te, soff = 0)
948
960
  [copy(data, ts-soff..te-1), ts-soff, te]
949
961
  end
950
962
 
951
963
  # Appends one or more characters to the literal buffer, to be emitted later
952
964
  # by a call to emit_literal. Contents can be a mix of ASCII and UTF-8.
953
- def self.append_literal(data, ts, te)
965
+ def append_literal(data, ts, te)
954
966
  @literal ||= []
955
967
  @literal << text(data, ts, te)
956
968
  end
957
969
 
958
970
  # Emits the literal run collected by calls to the append_literal method,
959
971
  # using the total start (ts) and end (te) offsets of the run.
960
- def self.emit_literal
972
+ def emit_literal
961
973
  ts, te = @literal.first[1], @literal.last[2]
962
974
  text = @literal.map {|t| t[0]}.join
963
975
 
@@ -967,43 +979,34 @@ module Regexp::Scanner
967
979
  emit(:literal, :literal, text, ts, te)
968
980
  end
969
981
 
970
- def self.emit_options(text, ts, te)
971
- if text =~ /\(\?([mixdau]+)?-?([mix]+)?:/
972
- positive, negative = $1, $2
982
+ def emit_options(text, ts, te)
983
+ if text =~ /\(\?([mixdau]*)-?([mix]*)(:)?/
984
+ positive, negative, group_local = $1, $2, $3
973
985
 
974
- if positive =~ /x/
986
+ if positive.include?('x')
975
987
  @free_spacing = true
976
988
  end
977
989
 
978
990
  # If the x appears in both, treat it like ruby does, the second cancels
979
991
  # the first.
980
- if negative =~ /x/
992
+ if negative.include?('x')
981
993
  @free_spacing = false
982
994
  end
983
- end
984
-
985
- @in_options = true
986
- @spacing_stack << [@free_spacing, @group_depth]
987
-
988
- emit(:group, :options, text, ts, te)
989
- end
990
-
991
- # Emits an array with the details of the scanned pattern
992
- def self.emit(type, token, text, ts, te)
993
- #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}"
994
995
 
995
- emit_literal if @literal
996
-
997
- if @block
998
- @block.call type, token, text, ts, te
996
+ if group_local
997
+ @spacing_stack << {:free_spacing => @free_spacing, :depth => @group_depth}
998
+ else
999
+ # switch for parent group level
1000
+ @spacing_stack.last[:free_spacing] = @free_spacing
1001
+ end
999
1002
  end
1000
1003
 
1001
- @tokens << [type, token, text, ts, te]
1004
+ emit(:group, :options, text, ts, te)
1002
1005
  end
1003
1006
 
1004
1007
  # Centralizes and unifies the handling of validation related
1005
1008
  # errors.
1006
- def self.validation_error(type, what, reason)
1009
+ def validation_error(type, what, reason)
1007
1010
  case type
1008
1011
  when :group
1009
1012
  error = InvalidGroupError.new(what, reason)
@@ -1019,12 +1022,12 @@ module Regexp::Scanner
1019
1022
  end
1020
1023
 
1021
1024
  # Used for references with an empty name or number
1022
- def self.empty_backref_error(type, what)
1025
+ def empty_backref_error(type, what)
1023
1026
  validation_error(:backref, what, 'ref ID is empty')
1024
1027
  end
1025
1028
 
1026
1029
  # Used for named expressions with an empty name
1027
- def self.empty_name_error(type, what)
1030
+ def empty_name_error(type, what)
1028
1031
  validation_error(type, what, 'name is empty')
1029
1032
  end
1030
1033
 
@@ -3,7 +3,7 @@ module Regexp::Syntax
3
3
 
4
4
  module Backreference
5
5
  Name = [:name_ref]
6
- Number = [:number_ref, :number_rel_ref]
6
+ Number = [:number, :number_ref, :number_rel_ref]
7
7
 
8
8
  NestLevel = [:name_nest_ref, :number_nest_ref]
9
9
 
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
- module Parser
3
- VERSION = '0.4.6'
2
+ class Parser
3
+ VERSION = '0.4.7'
4
4
  end
5
5
  end
@@ -14,7 +14,7 @@ class ExpressionToH < Test::Unit::TestCase
14
14
  :starts_at => 0,
15
15
  :length => 3,
16
16
  :quantifier => nil,
17
- :options => nil,
17
+ :options => {},
18
18
  :level => nil,
19
19
  :set_level => nil,
20
20
  :conditional_level => nil,
@@ -26,7 +26,7 @@ class ExpressionToH < Test::Unit::TestCase
26
26
  :starts_at => 0,
27
27
  :length => 3,
28
28
  :quantifier => nil,
29
- :options => nil,
29
+ :options => {},
30
30
  :level => 0,
31
31
  :set_level => 0,
32
32
  :conditional_level => 0
@@ -3,6 +3,9 @@ require File.expand_path("../../helpers", __FILE__)
3
3
  class LexerRefCalls < Test::Unit::TestCase
4
4
 
5
5
  tests = {
6
+ # Traditional numerical group back-reference
7
+ '(abc)\1' => [3, :backref, :number, '\1', 5, 7, 0, 0, 0],
8
+
6
9
  # Group back-references, named, numbered, and relative
7
10
  '(?<X>abc)\k<X>' => [3, :backref, :name_ref, '\k<X>', 9, 14, 0, 0, 0],
8
11
  "(?<X>abc)\\k'X'" => [3, :backref, :name_ref, "\\k'X'", 9, 14, 0, 0, 0],
@@ -1,58 +1,62 @@
1
1
  require File.expand_path("../../helpers", __FILE__)
2
2
 
3
3
  class ParserErrors < Test::Unit::TestCase
4
+ def setup
5
+ @rp = Regexp::Parser.new
6
+ @rp.parse(/foo/)
7
+ end
4
8
 
5
9
  def test_parser_unknown_token_type
6
10
  assert_raise( Regexp::Parser::UnknownTokenTypeError ) {
7
- RP.parse_token(Regexp::Token.new(:foo, :bar))
11
+ @rp.__send__(:parse_token, Regexp::Token.new(:foo, :bar))
8
12
  }
9
13
  end
10
14
 
11
15
  def test_parser_unknown_set_token
12
16
  assert_raise( Regexp::Parser::UnknownTokenError ) {
13
- RP.parse_token(Regexp::Token.new(:set, :foo))
17
+ @rp.__send__(:parse_token, Regexp::Token.new(:set, :foo))
14
18
  }
15
19
  end
16
20
 
17
21
  def test_parser_unknown_meta_token
18
22
  assert_raise( Regexp::Parser::UnknownTokenError ) {
19
- RP.parse_token(Regexp::Token.new(:meta, :foo))
23
+ @rp.__send__(:parse_token, Regexp::Token.new(:meta, :foo))
20
24
  }
21
25
  end
22
26
 
23
27
  def test_parser_unknown_character_type_token
24
28
  assert_raise( Regexp::Parser::UnknownTokenError ) {
25
- RP.parse_token(Regexp::Token.new(:type, :foo))
29
+ @rp.__send__(:parse_token, Regexp::Token.new(:type, :foo))
26
30
  }
27
31
  end
28
32
 
29
33
  def test_parser_unknown_unicode_property_token
30
34
  assert_raise( Regexp::Parser::UnknownTokenError ) {
31
- RP.parse_token(Regexp::Token.new(:property, :foo))
35
+ @rp.__send__(:parse_token, Regexp::Token.new(:property, :foo))
32
36
  }
33
37
  end
34
38
 
35
39
  def test_parser_unknown_unicode_nonproperty_token
36
40
  assert_raise( Regexp::Parser::UnknownTokenError ) {
37
- RP.parse_token(Regexp::Token.new(:nonproperty, :foo))
41
+ @rp.__send__(:parse_token, Regexp::Token.new(:nonproperty, :foo))
38
42
  }
39
43
  end
40
44
 
41
45
  def test_parser_unknown_anchor_token
42
46
  assert_raise( Regexp::Parser::UnknownTokenError ) {
43
- RP.parse_token(Regexp::Token.new(:anchor, :foo))
47
+ @rp.__send__(:parse_token, Regexp::Token.new(:anchor, :foo))
44
48
  }
45
49
  end
46
50
 
47
51
  def test_parser_unknown_quantifier_token
48
52
  assert_raise( Regexp::Parser::UnknownTokenError ) {
49
- RP.parse_token(Regexp::Token.new(:quantifier, :foo))
53
+ @rp.__send__(:parse_token, Regexp::Token.new(:quantifier, :foo))
50
54
  }
51
55
  end
52
56
 
53
57
  def test_parser_unknown_group_open_token
54
58
  assert_raise( Regexp::Parser::UnknownTokenError ) {
55
- RP.parse_token(Regexp::Token.new(:group, :foo))
59
+ @rp.__send__(:parse_token, Regexp::Token.new(:group, :foo))
56
60
  }
57
61
  end
58
62
 
@@ -3,44 +3,170 @@ require File.expand_path("../../helpers", __FILE__)
3
3
  class TestParserGroups < Test::Unit::TestCase
4
4
 
5
5
  def test_parse_root_options_mi
6
- t = RP.parse((/[abc]/mi).to_s, 'ruby/1.8')
6
+ t = RP.parse(/[abc]/mi, 'ruby/1.8')
7
7
 
8
8
  assert_equal true, t.m?
9
9
  assert_equal true, t.i?
10
10
  assert_equal false, t.x?
11
11
  end
12
12
 
13
- def test_parse_nested_options_m
14
- t = RP.parse('(?xi-m:a(?m-ix:b))', 'ruby/1.8')
13
+ def test_parse_option_group
14
+ t = RP.parse(/(?m:a)/, 'ruby/1.8')
15
+
16
+ assert_equal Group::Options, t.expressions[0].class
17
+ assert_equal :options, t.expressions[0].token
18
+
19
+ assert_equal true, t.expressions[0].m?
20
+ assert_equal false, t.expressions[0].i?
21
+ assert_equal false, t.expressions[0].x?
22
+ end
23
+
24
+ def test_parse_self_defeating_option_group
25
+ t = RP.parse(/(?m-m:a)/, 'ruby/1.8')
26
+
27
+ assert_equal false, t.expressions[0].m?
28
+ assert_equal false, t.expressions[0].i?
29
+ assert_equal false, t.expressions[0].x?
30
+ end
31
+
32
+ def test_parse_nested_options_activate_one
33
+ t = RP.parse(/(?x-mi:a(?m:b))/, 'ruby/1.8')
34
+
35
+ assert_equal false, t.expressions[0].m?
36
+ assert_equal false, t.expressions[0].i?
37
+ assert_equal true, t.expressions[0].x?
15
38
 
16
39
  assert_equal true, t.expressions[0].expressions[1].m?
17
40
  assert_equal false, t.expressions[0].expressions[1].i?
18
- assert_equal false, t.expressions[0].expressions[1].x?
41
+ assert_equal true, t.expressions[0].expressions[1].x?
19
42
  end
20
43
 
21
- def test_parse_nested_options_xm
22
- t = RP.parse(/(?i-xm:a(?mx-i:b))/, 'ruby/1.8')
44
+ def test_parse_nested_options_deactivate_one
45
+ t = RP.parse(/(?ix-m:a(?-i:b))/, 'ruby/1.8')
23
46
 
24
- assert_equal true, t.expressions[0].expressions[1].m?
47
+ assert_equal false, t.expressions[0].m?
48
+ assert_equal true, t.expressions[0].i?
49
+ assert_equal true, t.expressions[0].x?
50
+
51
+ assert_equal false, t.expressions[0].expressions[1].m?
25
52
  assert_equal false, t.expressions[0].expressions[1].i?
26
53
  assert_equal true, t.expressions[0].expressions[1].x?
27
54
  end
28
55
 
29
- def test_parse_nested_options_im
30
- t = RP.parse(/(?x-mi:a(?mi-x:b))/, 'ruby/1.8')
56
+ def test_parse_nested_options_invert_all
57
+ t = RP.parse('(?xi-m:a(?m-ix:b))', 'ruby/1.8')
58
+
59
+ assert_equal false, t.expressions[0].m?
60
+ assert_equal true, t.expressions[0].i?
61
+ assert_equal true, t.expressions[0].x?
31
62
 
32
63
  assert_equal true, t.expressions[0].expressions[1].m?
33
- assert_equal true, t.expressions[0].expressions[1].i?
64
+ assert_equal false, t.expressions[0].expressions[1].i?
34
65
  assert_equal false, t.expressions[0].expressions[1].x?
35
66
  end
36
67
 
68
+ def test_parse_nested_options_affect_literal_subexpressions
69
+ t = RP.parse(/(?x-mi:a(?m:b))/, 'ruby/1.8')
70
+
71
+ # a
72
+ assert_equal false, t.expressions[0].expressions[0].m?
73
+ assert_equal false, t.expressions[0].expressions[0].i?
74
+ assert_equal true, t.expressions[0].expressions[0].x?
75
+
76
+ # b
77
+ assert_equal true, t.expressions[0].expressions[1].expressions[0].m?
78
+ assert_equal false, t.expressions[0].expressions[1].expressions[0].i?
79
+ assert_equal true, t.expressions[0].expressions[1].expressions[0].x?
80
+ end
81
+
82
+ def test_parse_option_switch_group
83
+ t = RP.parse(/a(?i-m)b/m, 'ruby/1.8')
84
+
85
+ assert_equal Group::Options, t.expressions[1].class
86
+ assert_equal :options, t.expressions[1].token
87
+ # TODO: change this ^ to :options_switch in v1.0.0
88
+
89
+ assert_equal false, t.expressions[1].m?
90
+ assert_equal true, t.expressions[1].i?
91
+ assert_equal false, t.expressions[1].x?
92
+ end
93
+
94
+ def test_parse_option_switch_affects_following_expressions
95
+ t = RP.parse(/a(?i-m)b/m, 'ruby/1.8')
96
+
97
+ # a
98
+ assert_equal true, t.expressions[0].m?
99
+ assert_equal false, t.expressions[0].i?
100
+ assert_equal false, t.expressions[0].x?
101
+
102
+ # b
103
+ assert_equal false, t.expressions[2].m?
104
+ assert_equal true, t.expressions[2].i?
105
+ assert_equal false, t.expressions[2].x?
106
+ end
107
+
108
+ def test_parse_option_switch_in_group
109
+ t = RP.parse(/(a(?i-m)b)c/m, 'ruby/1.8')
110
+
111
+ group1 = t.expressions[0]
112
+
113
+ assert_equal true, group1.m?
114
+ assert_equal false, group1.i?
115
+ assert_equal false, group1.x?
116
+
117
+ # a
118
+ assert_equal true, group1.expressions[0].m?
119
+ assert_equal false, group1.expressions[0].i?
120
+ assert_equal false, group1.expressions[0].x?
121
+
122
+ # (?i-m)
123
+ assert_equal false, group1.expressions[1].m?
124
+ assert_equal true, group1.expressions[1].i?
125
+ assert_equal false, group1.expressions[1].x?
126
+
127
+ # b
128
+ assert_equal false, group1.expressions[2].m?
129
+ assert_equal true, group1.expressions[2].i?
130
+ assert_equal false, group1.expressions[2].x?
131
+
132
+ # c
133
+ assert_equal true, t.expressions[1].m?
134
+ assert_equal false, t.expressions[1].i?
135
+ assert_equal false, t.expressions[1].x?
136
+ end
137
+
138
+ def test_parse_nested_option_switch_in_group
139
+ t = RP.parse(/((?i-m)(a(?-i)b))/m, 'ruby/1.8')
140
+
141
+ group2 = t.expressions[0].expressions[1]
142
+
143
+ assert_equal false, group2.m?
144
+ assert_equal true, group2.i?
145
+ assert_equal false, group2.x?
146
+
147
+ # a
148
+ assert_equal false, group2.expressions[0].m?
149
+ assert_equal true, group2.expressions[0].i?
150
+ assert_equal false, group2.expressions[0].x?
151
+
152
+ # (?-i)
153
+ assert_equal false, group2.expressions[1].m?
154
+ assert_equal false, group2.expressions[1].i?
155
+ assert_equal false, group2.expressions[1].x?
156
+
157
+ # b
158
+ assert_equal false, group2.expressions[2].m?
159
+ assert_equal false, group2.expressions[2].i?
160
+ assert_equal false, group2.expressions[2].x?
161
+ end
162
+
37
163
  if RUBY_VERSION >= '2.0'
38
164
  def test_parse_options_dau
39
165
  t = RP.parse('(?dua:abc)')
40
166
 
41
- assert_equal true, t.expressions[0].d?
167
+ assert_equal false, t.expressions[0].d?
42
168
  assert_equal true, t.expressions[0].a?
43
- assert_equal true, t.expressions[0].u?
169
+ assert_equal false, t.expressions[0].u?
44
170
  end
45
171
 
46
172
  def test_parse_nested_options_dau
@@ -65,12 +191,12 @@ class TestParserGroups < Test::Unit::TestCase
65
191
  assert_equal false, t.expressions[0].a?
66
192
  assert_equal false, t.expressions[0].u?
67
193
 
68
- assert_equal true, t.expressions[0].expressions[1].d?
194
+ assert_equal false, t.expressions[0].expressions[1].d?
69
195
  assert_equal true, t.expressions[0].expressions[1].a?
70
196
  assert_equal false, t.expressions[0].expressions[1].u?
71
197
  assert_equal false, t.expressions[0].expressions[1].x?
72
198
  assert_equal false, t.expressions[0].expressions[1].m?
73
- assert_equal false, t.expressions[0].expressions[1].i?
199
+ assert_equal true, t.expressions[0].expressions[1].i?
74
200
  end
75
201
  end
76
202