regexp_parser 1.3.0 → 1.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -1
  3. data/Gemfile +3 -3
  4. data/README.md +12 -19
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +28 -53
  7. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  8. data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
  9. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  10. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  11. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  12. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  13. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  14. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  15. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  16. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  17. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  18. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  19. data/lib/regexp_parser/expression/sequence.rb +3 -6
  20. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  21. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  22. data/lib/regexp_parser/lexer.rb +30 -44
  23. data/lib/regexp_parser/parser.rb +47 -24
  24. data/lib/regexp_parser/scanner.rb +1228 -1367
  25. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  26. data/lib/regexp_parser/scanner/properties/long.yml +34 -1
  27. data/lib/regexp_parser/scanner/properties/short.yml +12 -0
  28. data/lib/regexp_parser/scanner/scanner.rl +101 -194
  29. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  30. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
  31. data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
  32. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  33. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  34. data/lib/regexp_parser/version.rb +1 -1
  35. data/regexp_parser.gemspec +3 -3
  36. data/spec/expression/base_spec.rb +94 -0
  37. data/spec/expression/clone_spec.rb +120 -0
  38. data/spec/expression/conditional_spec.rb +89 -0
  39. data/spec/expression/free_space_spec.rb +27 -0
  40. data/spec/expression/methods/match_length_spec.rb +161 -0
  41. data/spec/expression/methods/match_spec.rb +25 -0
  42. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  43. data/spec/expression/methods/tests_spec.rb +99 -0
  44. data/spec/expression/methods/traverse_spec.rb +161 -0
  45. data/spec/expression/options_spec.rb +128 -0
  46. data/spec/expression/root_spec.rb +9 -0
  47. data/spec/expression/sequence_spec.rb +9 -0
  48. data/spec/expression/subexpression_spec.rb +50 -0
  49. data/spec/expression/to_h_spec.rb +26 -0
  50. data/spec/expression/to_s_spec.rb +100 -0
  51. data/spec/lexer/all_spec.rb +22 -0
  52. data/spec/lexer/conditionals_spec.rb +53 -0
  53. data/spec/lexer/delimiters_spec.rb +68 -0
  54. data/spec/lexer/escapes_spec.rb +14 -0
  55. data/spec/lexer/keep_spec.rb +10 -0
  56. data/spec/lexer/literals_spec.rb +89 -0
  57. data/spec/lexer/nesting_spec.rb +99 -0
  58. data/spec/lexer/refcalls_spec.rb +55 -0
  59. data/spec/parser/all_spec.rb +43 -0
  60. data/spec/parser/alternation_spec.rb +88 -0
  61. data/spec/parser/anchors_spec.rb +17 -0
  62. data/spec/parser/conditionals_spec.rb +179 -0
  63. data/spec/parser/errors_spec.rb +30 -0
  64. data/spec/parser/escapes_spec.rb +121 -0
  65. data/spec/parser/free_space_spec.rb +130 -0
  66. data/spec/parser/groups_spec.rb +108 -0
  67. data/spec/parser/keep_spec.rb +6 -0
  68. data/spec/parser/posix_classes_spec.rb +8 -0
  69. data/spec/parser/properties_spec.rb +115 -0
  70. data/spec/parser/quantifiers_spec.rb +52 -0
  71. data/spec/parser/refcalls_spec.rb +112 -0
  72. data/spec/parser/set/intersections_spec.rb +127 -0
  73. data/spec/parser/set/ranges_spec.rb +111 -0
  74. data/spec/parser/sets_spec.rb +178 -0
  75. data/spec/parser/types_spec.rb +18 -0
  76. data/spec/scanner/all_spec.rb +18 -0
  77. data/spec/scanner/anchors_spec.rb +21 -0
  78. data/spec/scanner/conditionals_spec.rb +128 -0
  79. data/spec/scanner/delimiters_spec.rb +52 -0
  80. data/spec/scanner/errors_spec.rb +67 -0
  81. data/spec/scanner/escapes_spec.rb +53 -0
  82. data/spec/scanner/free_space_spec.rb +133 -0
  83. data/spec/scanner/groups_spec.rb +52 -0
  84. data/spec/scanner/keep_spec.rb +10 -0
  85. data/spec/scanner/literals_spec.rb +49 -0
  86. data/spec/scanner/meta_spec.rb +18 -0
  87. data/spec/scanner/properties_spec.rb +64 -0
  88. data/spec/scanner/quantifiers_spec.rb +20 -0
  89. data/spec/scanner/refcalls_spec.rb +36 -0
  90. data/spec/scanner/sets_spec.rb +102 -0
  91. data/spec/scanner/types_spec.rb +14 -0
  92. data/spec/spec_helper.rb +15 -0
  93. data/{test → spec}/support/runner.rb +9 -8
  94. data/spec/support/shared_examples.rb +77 -0
  95. data/{test → spec}/support/warning_extractor.rb +5 -7
  96. data/spec/syntax/syntax_spec.rb +48 -0
  97. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  98. data/spec/syntax/versions/1.8.6_spec.rb +17 -0
  99. data/spec/syntax/versions/1.9.1_spec.rb +10 -0
  100. data/spec/syntax/versions/1.9.3_spec.rb +9 -0
  101. data/spec/syntax/versions/2.0.0_spec.rb +13 -0
  102. data/spec/syntax/versions/2.2.0_spec.rb +9 -0
  103. data/spec/syntax/versions/aliases_spec.rb +37 -0
  104. data/spec/token/token_spec.rb +85 -0
  105. metadata +151 -146
  106. data/test/expression/test_all.rb +0 -12
  107. data/test/expression/test_base.rb +0 -90
  108. data/test/expression/test_clone.rb +0 -89
  109. data/test/expression/test_conditionals.rb +0 -113
  110. data/test/expression/test_free_space.rb +0 -35
  111. data/test/expression/test_set.rb +0 -84
  112. data/test/expression/test_strfregexp.rb +0 -230
  113. data/test/expression/test_subexpression.rb +0 -58
  114. data/test/expression/test_tests.rb +0 -99
  115. data/test/expression/test_to_h.rb +0 -59
  116. data/test/expression/test_to_s.rb +0 -104
  117. data/test/expression/test_traverse.rb +0 -161
  118. data/test/helpers.rb +0 -10
  119. data/test/lexer/test_all.rb +0 -41
  120. data/test/lexer/test_conditionals.rb +0 -127
  121. data/test/lexer/test_keep.rb +0 -24
  122. data/test/lexer/test_literals.rb +0 -130
  123. data/test/lexer/test_nesting.rb +0 -132
  124. data/test/lexer/test_refcalls.rb +0 -56
  125. data/test/parser/set/test_intersections.rb +0 -127
  126. data/test/parser/set/test_ranges.rb +0 -111
  127. data/test/parser/test_all.rb +0 -64
  128. data/test/parser/test_alternation.rb +0 -92
  129. data/test/parser/test_anchors.rb +0 -34
  130. data/test/parser/test_conditionals.rb +0 -187
  131. data/test/parser/test_errors.rb +0 -63
  132. data/test/parser/test_escapes.rb +0 -134
  133. data/test/parser/test_free_space.rb +0 -139
  134. data/test/parser/test_groups.rb +0 -289
  135. data/test/parser/test_keep.rb +0 -21
  136. data/test/parser/test_posix_classes.rb +0 -27
  137. data/test/parser/test_properties.rb +0 -133
  138. data/test/parser/test_quantifiers.rb +0 -301
  139. data/test/parser/test_refcalls.rb +0 -186
  140. data/test/parser/test_sets.rb +0 -179
  141. data/test/parser/test_types.rb +0 -50
  142. data/test/scanner/test_all.rb +0 -38
  143. data/test/scanner/test_anchors.rb +0 -38
  144. data/test/scanner/test_conditionals.rb +0 -184
  145. data/test/scanner/test_errors.rb +0 -91
  146. data/test/scanner/test_escapes.rb +0 -56
  147. data/test/scanner/test_free_space.rb +0 -200
  148. data/test/scanner/test_groups.rb +0 -79
  149. data/test/scanner/test_keep.rb +0 -35
  150. data/test/scanner/test_literals.rb +0 -89
  151. data/test/scanner/test_meta.rb +0 -40
  152. data/test/scanner/test_properties.rb +0 -312
  153. data/test/scanner/test_quantifiers.rb +0 -37
  154. data/test/scanner/test_refcalls.rb +0 -52
  155. data/test/scanner/test_scripts.rb +0 -53
  156. data/test/scanner/test_sets.rb +0 -119
  157. data/test/scanner/test_types.rb +0 -35
  158. data/test/scanner/test_unicode_blocks.rb +0 -30
  159. data/test/support/disable_autotest.rb +0 -8
  160. data/test/syntax/test_all.rb +0 -6
  161. data/test/syntax/test_syntax.rb +0 -61
  162. data/test/syntax/test_syntax_token_map.rb +0 -25
  163. data/test/syntax/versions/test_1.8.rb +0 -55
  164. data/test/syntax/versions/test_1.9.1.rb +0 -36
  165. data/test/syntax/versions/test_1.9.3.rb +0 -32
  166. data/test/syntax/versions/test_2.0.0.rb +0 -37
  167. data/test/syntax/versions/test_2.2.0.rb +0 -32
  168. data/test/syntax/versions/test_aliases.rb +0 -129
  169. data/test/syntax/versions/test_all.rb +0 -5
  170. data/test/test_all.rb +0 -5
  171. data/test/token/test_all.rb +0 -2
  172. data/test/token/test_token.rb +0 -107
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe(Regexp::Lexer) do
4
+ specify('lexer returns an array') do
5
+ expect(RL.lex('abc')).to be_instance_of(Array)
6
+ end
7
+
8
+ specify('lexer returns tokens') do
9
+ tokens = RL.lex('^abc+[^one]{2,3}\\b\\d\\\\C-C$')
10
+ expect(tokens).to all(be_a Regexp::Token)
11
+ expect(tokens.map { |token| token.to_a.length }).to all(eq 8)
12
+ end
13
+
14
+ specify('lexer token count') do
15
+ tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
16
+ expect(tokens.length).to eq 28
17
+ end
18
+
19
+ specify('lexer scan alias') do
20
+ expect(RL.scan(/a|b|c/)).to eq RL.lex(/a|b|c/)
21
+ end
22
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Conditional lexing') do
4
+ include_examples 'lex', /(?<A>a)(?(<A>)b|c)/,
5
+ 3 => [:conditional, :open, '(?', 7, 9, 0, 0, 0],
6
+ 4 => [:conditional, :condition, '(<A>)', 9, 14, 0, 0, 1],
7
+ 6 => [:conditional, :separator, '|', 15, 16, 0, 0, 1],
8
+ 8 => [:conditional, :close, ')', 17, 18, 0, 0, 0]
9
+
10
+ include_examples 'lex', /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/,
11
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
12
+ 1 => [:group, :named, '(?<A>', 1, 6, 1, 0, 0],
13
+ 5 => [:conditional, :open, '(?', 13, 15, 2, 0, 0],
14
+ 6 => [:conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
15
+ 8 => [:conditional, :separator, '|', 21, 22, 2, 0, 1],
16
+ 10 => [:conditional, :open, '(?', 23, 25, 3, 0, 1],
17
+ 11 => [:conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
18
+ 12 => [:set, :open, '[', 30, 31, 3, 0, 2],
19
+ 13 => [:literal, :literal, 'e', 31, 32, 3, 1, 2],
20
+ 14 => [:set, :range, '-', 32, 33, 3, 1, 2],
21
+ 15 => [:literal, :literal, 'g', 33, 34, 3, 1, 2],
22
+ 16 => [:set, :close, ']', 34, 35, 3, 0, 2],
23
+ 17 => [:conditional, :separator, '|', 35, 36, 3, 0, 2],
24
+ 23 => [:conditional, :close, ')', 41, 42, 3, 0, 1],
25
+ 25 => [:conditional, :close, ')', 43, 44, 2, 0, 0],
26
+ 26 => [:group, :close, ')', 44, 45, 1, 0, 0],
27
+ 27 => [:group, :close, ')', 45, 46, 0, 0, 0]
28
+
29
+ include_examples 'lex', /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/,
30
+ 9 => [:conditional, :open, '(?', 9, 11, 0, 0, 0],
31
+ 10 => [:conditional, :condition, '(1)', 11, 14, 0, 0, 1],
32
+ 11 => [:conditional, :open, '(?', 14, 16, 0, 0, 1],
33
+ 12 => [:conditional, :condition, '(2)', 16, 19, 0, 0, 2],
34
+ 13 => [:conditional, :open, '(?', 19, 21, 0, 0, 2],
35
+ 14 => [:conditional, :condition, '(3)', 21, 24, 0, 0, 3],
36
+ 16 => [:conditional, :separator, '|', 25, 26, 0, 0, 3],
37
+ 18 => [:conditional, :close, ')', 27, 28, 0, 0, 2],
38
+ 19 => [:conditional, :close, ')', 28, 29, 0, 0, 1],
39
+ 20 => [:conditional, :separator, '|', 29, 30, 0, 0, 1],
40
+ 21 => [:conditional, :open, '(?', 30, 32, 0, 0, 1],
41
+ 22 => [:conditional, :condition, '(3)', 32, 35, 0, 0, 2],
42
+ 23 => [:conditional, :open, '(?', 35, 37, 0, 0, 2],
43
+ 24 => [:conditional, :condition, '(2)', 37, 40, 0, 0, 3],
44
+ 26 => [:conditional, :separator, '|', 41, 42, 0, 0, 3],
45
+ 28 => [:conditional, :close, ')', 43, 44, 0, 0, 2],
46
+ 29 => [:conditional, :separator, '|', 44, 45, 0, 0, 2],
47
+ 30 => [:conditional, :open, '(?', 45, 47, 0, 0, 2],
48
+ 31 => [:conditional, :condition, '(1)', 47, 50, 0, 0, 3],
49
+ 33 => [:conditional, :separator, '|', 51, 52, 0, 0, 3],
50
+ 35 => [:conditional, :close, ')', 53, 54, 0, 0, 2],
51
+ 36 => [:conditional, :close, ')', 54, 55, 0, 0, 1],
52
+ 37 => [:conditional, :close, ')', 55, 56, 0, 0, 0]
53
+ end
@@ -0,0 +1,68 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal delimiter lexing') do
4
+ include_examples 'lex', '}',
5
+ 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0]
6
+
7
+ include_examples 'lex', '}}',
8
+ 0 => [:literal, :literal, '}}', 0, 2, 0, 0, 0]
9
+
10
+ include_examples 'lex', '{',
11
+ 0 => [:literal, :literal, '{', 0, 1, 0, 0, 0]
12
+
13
+ include_examples 'lex', '{{',
14
+ 0 => [:literal, :literal, '{{', 0, 2, 0, 0, 0]
15
+
16
+ include_examples 'lex', '{}',
17
+ 0 => [:literal, :literal, '{}', 0, 2, 0, 0, 0]
18
+
19
+ include_examples 'lex', '}{',
20
+ 0 => [:literal, :literal, '}{', 0, 2, 0, 0, 0]
21
+
22
+ include_examples 'lex', '}{+',
23
+ 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0],
24
+ 1 => [:literal, :literal, '{', 1, 2, 0, 0, 0],
25
+ 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
26
+
27
+ include_examples 'lex', '{{var}}',
28
+ 0 => [:literal, :literal, '{{var}}', 0, 7, 0, 0, 0]
29
+
30
+ include_examples 'lex', 'a{b}c',
31
+ 0 => [:literal, :literal, 'a{b}c', 0, 5, 0, 0, 0]
32
+
33
+ include_examples 'lex', 'a{1,2',
34
+ 0 => [:literal, :literal, 'a{1,2', 0, 5, 0, 0, 0]
35
+
36
+ include_examples 'lex', '({.+})',
37
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
38
+ 1 => [:literal, :literal, '{', 1, 2, 1, 0, 0],
39
+ 2 => [:meta, :dot, '.', 2, 3, 1, 0, 0],
40
+ 3 => [:quantifier, :one_or_more, '+', 3, 4, 1, 0, 0],
41
+ 4 => [:literal, :literal, '}', 4, 5, 1, 0, 0],
42
+ 5 => [:group, :close, ')', 5, 6, 0, 0, 0]
43
+
44
+ include_examples 'lex', ']',
45
+ 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0]
46
+
47
+ include_examples 'lex', ']]',
48
+ 0 => [:literal, :literal, ']]', 0, 2, 0, 0, 0]
49
+
50
+ include_examples 'lex', ']\[',
51
+ 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0],
52
+ 1 => [:escape, :set_open, '\[', 1, 3, 0, 0, 0]
53
+
54
+ include_examples 'lex', '()',
55
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
56
+ 1 => [:group, :close, ')', 1, 2, 0, 0, 0]
57
+
58
+ include_examples 'lex', '{abc:.+}}}[^}]]}',
59
+ 0 => [:literal, :literal, '{abc:', 0, 5, 0, 0, 0],
60
+ 1 => [:meta, :dot, '.', 5, 6, 0, 0, 0],
61
+ 2 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
62
+ 3 => [:literal, :literal, '}}}', 7, 10, 0, 0, 0],
63
+ 4 => [:set, :open, '[', 10, 11, 0, 0, 0],
64
+ 5 => [:set, :negate, '^', 11, 12, 0, 1, 0],
65
+ 6 => [:literal, :literal, '}', 12, 13, 0, 1, 0],
66
+ 7 => [:set, :close, ']', 13, 14, 0, 0, 0],
67
+ 8 => [:literal, :literal, ']}', 14, 16, 0, 0, 0]
68
+ end
@@ -0,0 +1,14 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Escape lexing') do
4
+ include_examples 'lex', '\u{62}',
5
+ 0 => [:escape, :codepoint_list, '\u{62}', 0, 6, 0, 0, 0]
6
+
7
+ include_examples 'lex', '\u{62 63 64}',
8
+ 0 => [:escape, :codepoint_list, '\u{62 63 64}', 0, 12, 0, 0, 0]
9
+
10
+ include_examples 'lex', '\u{62 63 64}+',
11
+ 0 => [:escape, :codepoint_list, '\u{62 63}', 0, 9, 0, 0, 0],
12
+ 1 => [:escape, :codepoint_list, '\u{64}', 9, 15, 0, 0, 0],
13
+ 2 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
14
+ end
@@ -0,0 +1,10 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Keep lexing') do
4
+ include_examples 'lex', /ab\Kcd/,
5
+ 1 => [:keep, :mark, '\K', 2, 4, 0, 0, 0]
6
+
7
+ include_examples 'lex', /(a\Kb)|(c\\\Kd)ef/,
8
+ 2 => [:keep, :mark, '\K', 2, 4, 1, 0, 0],
9
+ 9 => [:keep, :mark, '\K', 11, 13, 1, 0, 0]
10
+ end
@@ -0,0 +1,89 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal lexing') do
4
+ # ascii, single byte characters
5
+ include_examples 'lex', 'a',
6
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0]
7
+
8
+ include_examples 'lex', 'ab+',
9
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
10
+ 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
11
+ 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
12
+
13
+ # 2 byte wide characters, Arabic
14
+ include_examples 'lex', 'ا',
15
+ 0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0]
16
+
17
+ include_examples 'lex', 'aاbبcت',
18
+ 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0]
19
+
20
+ include_examples 'lex', 'aاbبت?',
21
+ 0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
22
+ 1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
23
+ 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0]
24
+
25
+ include_examples 'lex', 'aا?bبcت+',
26
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
27
+ 1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
28
+ 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
29
+ 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
30
+ 4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
31
+ 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0]
32
+
33
+ include_examples 'lex', 'a(اbب+)cت?',
34
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
35
+ 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
36
+ 2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
37
+ 3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
38
+ 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
39
+ 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
40
+ 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
41
+ 7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
42
+ 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0]
43
+
44
+ # 3 byte wide characters, Japanese
45
+ include_examples 'lex', 'ab?れます+cd',
46
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
47
+ 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
48
+ 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
49
+ 3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
50
+ 4 => [:literal, :literal, 'す', 9, 12, 0, 0, 0],
51
+ 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
52
+ 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0]
53
+
54
+ # 4 byte wide characters, Osmanya
55
+ include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
56
+ 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
57
+ 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
58
+ 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
59
+ 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
60
+ 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
61
+ 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
62
+ 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0]
63
+
64
+ include_examples 'lex', 'mu𝄞?si*𝄫c+',
65
+ 0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
66
+ 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
67
+ 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
68
+ 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
69
+ 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
70
+ 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
71
+ 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
72
+ 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
73
+ 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
74
+
75
+ specify('lex single 2 byte char') do
76
+ tokens = RL.lex("\u0627+")
77
+ expect(tokens.count).to eq 2
78
+ end
79
+
80
+ specify('lex single 3 byte char') do
81
+ tokens = RL.lex("\u308C+")
82
+ expect(tokens.count).to eq 2
83
+ end
84
+
85
+ specify('lex single 4 byte char') do
86
+ tokens = RL.lex("\u{1D11E}+")
87
+ expect(tokens.count).to eq 2
88
+ end
89
+ end
@@ -0,0 +1,99 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Nesting lexing') do
4
+ include_examples 'lex', /(((b)))/,
5
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
6
+ 1 => [:group, :capture, '(', 1, 2, 1, 0, 0],
7
+ 2 => [:group, :capture, '(', 2, 3, 2, 0, 0],
8
+ 3 => [:literal, :literal, 'b', 3, 4, 3, 0, 0],
9
+ 4 => [:group, :close, ')', 4, 5, 2, 0, 0],
10
+ 5 => [:group, :close, ')', 5, 6, 1, 0, 0],
11
+ 6 => [:group, :close, ')', 6, 7, 0, 0, 0]
12
+
13
+ include_examples 'lex', /(\((b)\))/,
14
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
15
+ 1 => [:escape, :group_open, '\(', 1, 3, 1, 0, 0],
16
+ 2 => [:group, :capture, '(', 3, 4, 1, 0, 0],
17
+ 3 => [:literal, :literal, 'b', 4, 5, 2, 0, 0],
18
+ 4 => [:group, :close, ')', 5, 6, 1, 0, 0],
19
+ 5 => [:escape, :group_close, '\)', 6, 8, 1, 0, 0],
20
+ 6 => [:group, :close, ')', 8, 9, 0, 0, 0]
21
+
22
+ include_examples 'lex', /(?>a(?>b(?>c)))/,
23
+ 0 => [:group, :atomic, '(?>', 0, 3, 0, 0, 0],
24
+ 2 => [:group, :atomic, '(?>', 4, 7, 1, 0, 0],
25
+ 4 => [:group, :atomic, '(?>', 8, 11, 2, 0, 0],
26
+ 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
27
+ 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
28
+ 8 => [:group, :close, ')', 14, 15, 0, 0, 0]
29
+
30
+ include_examples 'lex', /(?:a(?:b(?:c)))/,
31
+ 0 => [:group, :passive, '(?:', 0, 3, 0, 0, 0],
32
+ 2 => [:group, :passive, '(?:', 4, 7, 1, 0, 0],
33
+ 4 => [:group, :passive, '(?:', 8, 11, 2, 0, 0],
34
+ 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
35
+ 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
36
+ 8 => [:group, :close, ')', 14, 15, 0, 0, 0]
37
+
38
+ include_examples 'lex', /(?=a(?!b(?<=c(?<!d))))/,
39
+ 0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0, 0],
40
+ 2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0, 0],
41
+ 4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0, 0],
42
+ 6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0, 0],
43
+ 8 => [:group, :close, ')', 18, 19, 3, 0, 0],
44
+ 9 => [:group, :close, ')', 19, 20, 2, 0, 0],
45
+ 10 => [:group, :close, ')', 20, 21, 1, 0, 0],
46
+ 11 => [:group, :close, ')', 21, 22, 0, 0, 0]
47
+
48
+ include_examples 'lex', /((?#a)b(?#c)d(?#e))/,
49
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
50
+ 1 => [:group, :comment, '(?#a)', 1, 6, 1, 0, 0],
51
+ 3 => [:group, :comment, '(?#c)', 7, 12, 1, 0, 0],
52
+ 5 => [:group, :comment, '(?#e)', 13, 18, 1, 0, 0],
53
+ 6 => [:group, :close, ')', 18, 19, 0, 0, 0]
54
+
55
+ include_examples 'lex', /a[b-e]f/,
56
+ 1 => [:set, :open, '[', 1, 2, 0, 0, 0],
57
+ 2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
58
+ 3 => [:set, :range, '-', 3, 4, 0, 1, 0],
59
+ 4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
60
+ 5 => [:set, :close, ']', 5, 6, 0, 0, 0]
61
+
62
+ include_examples 'lex', /[[:word:]&&[^c]z]/,
63
+ 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
64
+ 1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
65
+ 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
66
+ 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
67
+ 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
68
+ 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
69
+ 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
70
+ 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
71
+ 8 => [:set, :close, ']', 16, 17, 0, 0, 0]
72
+
73
+ include_examples 'lex', /[\p{word}&&[^c]z]/,
74
+ 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
75
+ 1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
76
+ 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
77
+ 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
78
+ 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
79
+ 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
80
+ 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
81
+ 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
82
+ 8 => [:set, :close, ']', 16, 17, 0, 0, 0]
83
+
84
+ include_examples 'lex', /[a[b[c[d-g]]]]/,
85
+ 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
86
+ 1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
87
+ 2 => [:set, :open, '[', 2, 3, 0, 1, 0],
88
+ 3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
89
+ 4 => [:set, :open, '[', 4, 5, 0, 2, 0],
90
+ 5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
91
+ 6 => [:set, :open, '[', 6, 7, 0, 3, 0],
92
+ 7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
93
+ 8 => [:set, :range, '-', 8, 9, 0, 4, 0],
94
+ 9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
95
+ 10 => [:set, :close, ']', 10, 11, 0, 3, 0],
96
+ 11 => [:set, :close, ']', 11, 12, 0, 2, 0],
97
+ 12 => [:set, :close, ']', 12, 13, 0, 1, 0],
98
+ 13 => [:set, :close, ']', 13, 14, 0, 0, 0]
99
+ end
@@ -0,0 +1,55 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('RefCall lexing') do
4
+ # Traditional numerical group back-reference
5
+ include_examples 'lex', '(abc)\1',
6
+ 3 => [:backref, :number, '\1', 5, 7, 0, 0, 0]
7
+
8
+ # Group back-references, named, numbered, and relative
9
+ include_examples 'lex', '(?<X>abc)\k<X>',
10
+ 3 => [:backref, :name_ref, '\k<X>', 9, 14, 0, 0, 0]
11
+ include_examples 'lex', "(?<X>abc)\\k'X'",
12
+ 3 => [:backref, :name_ref, "\\k'X'", 9, 14, 0, 0, 0]
13
+
14
+ include_examples 'lex', '(abc)\k<1>',
15
+ 3 => [:backref, :number_ref, '\k<1>', 5, 10, 0, 0, 0]
16
+ include_examples 'lex', "(abc)\\k'1'",
17
+ 3 => [:backref, :number_ref, "\\k'1'", 5, 10, 0, 0, 0]
18
+
19
+ include_examples 'lex', '(abc)\k<-1>',
20
+ 3 => [:backref, :number_rel_ref, '\k<-1>', 5, 11, 0, 0, 0]
21
+ include_examples 'lex', "(abc)\\k'-1'",
22
+ 3 => [:backref, :number_rel_ref, "\\k'-1'", 5, 11, 0, 0, 0]
23
+
24
+ # Sub-expression invocation, named, numbered, and relative
25
+ include_examples 'lex', '(?<X>abc)\g<X>',
26
+ 3 => [:backref, :name_call, '\g<X>', 9, 14, 0, 0, 0]
27
+ include_examples 'lex', "(?<X>abc)\\g'X'",
28
+ 3 => [:backref, :name_call, "\\g'X'", 9, 14, 0, 0, 0]
29
+
30
+ include_examples 'lex', '(abc)\g<1>',
31
+ 3 => [:backref, :number_call, '\g<1>', 5, 10, 0, 0, 0]
32
+ include_examples 'lex', "(abc)\\g'1'",
33
+ 3 => [:backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0]
34
+
35
+ include_examples 'lex', '(abc)\g<-1>',
36
+ 3 => [:backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0]
37
+ include_examples 'lex', "(abc)\\g'-1'",
38
+ 3 => [:backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0, 0]
39
+
40
+ include_examples 'lex', '(abc)\g<+1>',
41
+ 3 => [:backref, :number_rel_call, '\g<+1>', 5, 11, 0, 0, 0]
42
+ include_examples 'lex', "(abc)\\g'+1'",
43
+ 3 => [:backref, :number_rel_call, "\\g'+1'", 5, 11, 0, 0, 0]
44
+
45
+ # Group back-references, with nesting level
46
+ include_examples 'lex', '(?<X>abc)\k<X-0>',
47
+ 3 => [:backref, :name_recursion_ref, '\k<X-0>', 9, 16, 0, 0, 0]
48
+ include_examples 'lex', "(?<X>abc)\\k'X-0'",
49
+ 3 => [:backref, :name_recursion_ref, "\\k'X-0'", 9, 16, 0, 0, 0]
50
+
51
+ include_examples 'lex', '(abc)\k<1-0>',
52
+ 3 => [:backref, :number_recursion_ref, '\k<1-0>', 5, 12, 0, 0, 0]
53
+ include_examples 'lex', "(abc)\\k'1-0'",
54
+ 3 => [:backref, :number_recursion_ref, "\\k'1-0'", 5, 12, 0, 0, 0]
55
+ end
@@ -0,0 +1,43 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe(Regexp::Parser) do
4
+ specify('parse returns a root expression') do
5
+ expect(RP.parse('abc')).to be_instance_of(Root)
6
+ end
7
+
8
+ specify('parse can be called with block') do
9
+ expect(RP.parse('abc') { |root| root.class }).to eq Root
10
+ end
11
+
12
+ specify('parse root contains expressions') do
13
+ root = RP.parse(/^a.c+[^one]{2,3}\b\d\\\C-C$/)
14
+ expect(root.expressions).to all(be_a Regexp::Expression::Base)
15
+ end
16
+
17
+ specify('parse root options mi') do
18
+ root = RP.parse(/[abc]/mi, 'ruby/1.8')
19
+
20
+ expect(root.m?).to be true
21
+ expect(root.i?).to be true
22
+ expect(root.x?).to be false
23
+ end
24
+
25
+ specify('parse node types') do
26
+ root = RP.parse('^(one){2,3}([^d\\]efm-qz\\,\\-]*)(ghi)+$')
27
+
28
+ expect(root[1][0]).to be_a(Literal)
29
+ expect(root[1]).to be_quantified
30
+ expect(root[2][0]).to be_a(CharacterSet)
31
+ expect(root[2]).not_to be_quantified
32
+ expect(root[3]).to be_a(Group::Capture)
33
+ expect(root[3]).to be_quantified
34
+ end
35
+
36
+ specify('parse no quantifier target raises error') do
37
+ expect { RP.parse('?abc') }.to raise_error(ArgumentError)
38
+ end
39
+
40
+ specify('parse sequence no quantifier target raises error') do
41
+ expect { RP.parse('abc|?def') }.to raise_error(ArgumentError)
42
+ end
43
+ end