regexp_parser 1.7.0 → 2.8.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +364 -22
  3. data/Gemfile +8 -2
  4. data/LICENSE +1 -1
  5. data/README.md +124 -88
  6. data/Rakefile +6 -70
  7. data/lib/regexp_parser/error.rb +4 -0
  8. data/lib/regexp_parser/expression/base.rb +76 -0
  9. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  10. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  11. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
  12. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
  13. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
  14. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  15. data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
  16. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  17. data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
  18. data/lib/regexp_parser/expression/classes/group.rb +28 -15
  19. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  20. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  21. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
  22. data/lib/regexp_parser/expression/classes/root.rb +4 -19
  23. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
  24. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  25. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  26. data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
  27. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  28. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  29. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  30. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  31. data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
  32. data/lib/regexp_parser/expression/quantifier.rb +57 -17
  33. data/lib/regexp_parser/expression/sequence.rb +11 -47
  34. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  35. data/lib/regexp_parser/expression/shared.rb +111 -0
  36. data/lib/regexp_parser/expression/subexpression.rb +27 -19
  37. data/lib/regexp_parser/expression.rb +14 -141
  38. data/lib/regexp_parser/lexer.rb +83 -41
  39. data/lib/regexp_parser/parser.rb +371 -429
  40. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  41. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  42. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  43. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  44. data/lib/regexp_parser/scanner/properties/long.csv +633 -0
  45. data/lib/regexp_parser/scanner/properties/short.csv +248 -0
  46. data/lib/regexp_parser/scanner/property.rl +4 -4
  47. data/lib/regexp_parser/scanner/scanner.rl +295 -368
  48. data/lib/regexp_parser/scanner.rb +1405 -1674
  49. data/lib/regexp_parser/syntax/any.rb +2 -7
  50. data/lib/regexp_parser/syntax/base.rb +92 -67
  51. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  52. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  53. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  54. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  55. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  56. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  57. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  58. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  59. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  60. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  61. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  62. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  63. data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
  64. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  65. data/lib/regexp_parser/syntax/token.rb +45 -0
  66. data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
  67. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  68. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  69. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  70. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  71. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  73. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  74. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  75. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  78. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  79. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  80. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  81. data/lib/regexp_parser/syntax/versions.rb +3 -1
  82. data/lib/regexp_parser/syntax.rb +8 -6
  83. data/lib/regexp_parser/token.rb +9 -20
  84. data/lib/regexp_parser/version.rb +1 -1
  85. data/lib/regexp_parser.rb +0 -2
  86. data/regexp_parser.gemspec +20 -22
  87. metadata +49 -166
  88. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  89. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  90. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  91. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  92. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  93. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  94. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  95. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  96. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  97. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  98. data/spec/expression/base_spec.rb +0 -94
  99. data/spec/expression/clone_spec.rb +0 -120
  100. data/spec/expression/conditional_spec.rb +0 -89
  101. data/spec/expression/free_space_spec.rb +0 -27
  102. data/spec/expression/methods/match_length_spec.rb +0 -161
  103. data/spec/expression/methods/match_spec.rb +0 -25
  104. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  105. data/spec/expression/methods/tests_spec.rb +0 -99
  106. data/spec/expression/methods/traverse_spec.rb +0 -161
  107. data/spec/expression/options_spec.rb +0 -128
  108. data/spec/expression/root_spec.rb +0 -9
  109. data/spec/expression/sequence_spec.rb +0 -9
  110. data/spec/expression/subexpression_spec.rb +0 -50
  111. data/spec/expression/to_h_spec.rb +0 -26
  112. data/spec/expression/to_s_spec.rb +0 -100
  113. data/spec/lexer/all_spec.rb +0 -22
  114. data/spec/lexer/conditionals_spec.rb +0 -53
  115. data/spec/lexer/escapes_spec.rb +0 -14
  116. data/spec/lexer/keep_spec.rb +0 -10
  117. data/spec/lexer/literals_spec.rb +0 -89
  118. data/spec/lexer/nesting_spec.rb +0 -99
  119. data/spec/lexer/refcalls_spec.rb +0 -55
  120. data/spec/parser/all_spec.rb +0 -43
  121. data/spec/parser/alternation_spec.rb +0 -88
  122. data/spec/parser/anchors_spec.rb +0 -17
  123. data/spec/parser/conditionals_spec.rb +0 -179
  124. data/spec/parser/errors_spec.rb +0 -30
  125. data/spec/parser/escapes_spec.rb +0 -121
  126. data/spec/parser/free_space_spec.rb +0 -130
  127. data/spec/parser/groups_spec.rb +0 -108
  128. data/spec/parser/keep_spec.rb +0 -6
  129. data/spec/parser/posix_classes_spec.rb +0 -8
  130. data/spec/parser/properties_spec.rb +0 -115
  131. data/spec/parser/quantifiers_spec.rb +0 -51
  132. data/spec/parser/refcalls_spec.rb +0 -112
  133. data/spec/parser/set/intersections_spec.rb +0 -127
  134. data/spec/parser/set/ranges_spec.rb +0 -111
  135. data/spec/parser/sets_spec.rb +0 -178
  136. data/spec/parser/types_spec.rb +0 -18
  137. data/spec/scanner/all_spec.rb +0 -18
  138. data/spec/scanner/anchors_spec.rb +0 -21
  139. data/spec/scanner/conditionals_spec.rb +0 -128
  140. data/spec/scanner/errors_spec.rb +0 -68
  141. data/spec/scanner/escapes_spec.rb +0 -53
  142. data/spec/scanner/free_space_spec.rb +0 -133
  143. data/spec/scanner/groups_spec.rb +0 -52
  144. data/spec/scanner/keep_spec.rb +0 -10
  145. data/spec/scanner/literals_spec.rb +0 -49
  146. data/spec/scanner/meta_spec.rb +0 -18
  147. data/spec/scanner/properties_spec.rb +0 -64
  148. data/spec/scanner/quantifiers_spec.rb +0 -20
  149. data/spec/scanner/refcalls_spec.rb +0 -36
  150. data/spec/scanner/sets_spec.rb +0 -102
  151. data/spec/scanner/types_spec.rb +0 -14
  152. data/spec/spec_helper.rb +0 -15
  153. data/spec/support/runner.rb +0 -42
  154. data/spec/support/shared_examples.rb +0 -77
  155. data/spec/support/warning_extractor.rb +0 -60
  156. data/spec/syntax/syntax_spec.rb +0 -48
  157. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  158. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  159. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  160. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  161. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  162. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  163. data/spec/syntax/versions/aliases_spec.rb +0 -37
  164. data/spec/token/token_spec.rb +0 -85
  165. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -1,26 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Expression#to_h') do
4
- specify('Root#to_h') do
5
- root = RP.parse('abc')
6
-
7
- hash = root.to_h
8
-
9
- expect(token: :root, type: :expression, text: 'abc', starts_at: 0, length: 3, quantifier: nil, options: {}, level: nil, set_level: nil, conditional_level: nil, expressions: [{ token: :literal, type: :literal, text: 'abc', starts_at: 0, length: 3, quantifier: nil, options: {}, level: 0, set_level: 0, conditional_level: 0 }]).to eq hash
10
- end
11
-
12
- specify('Quantifier#to_h') do
13
- root = RP.parse('a{2,4}')
14
- exp = root.expressions.at(0)
15
-
16
- hash = exp.quantifier.to_h
17
-
18
- expect(max: 4, min: 2, mode: :greedy, text: '{2,4}', token: :interval).to eq hash
19
- end
20
-
21
- specify('Conditional#to_h') do
22
- root = RP.parse('(?<A>a)(?(<A>)b|c)', 'ruby/2.0')
23
-
24
- expect { root.to_h }.not_to(raise_error)
25
- end
26
- end
@@ -1,100 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Expression#to_s') do
4
- specify('literal alternation') do
5
- pattern = 'abcd|ghij|klmn|pqur'
6
-
7
- expect(RP.parse(pattern).to_s).to eq pattern
8
- end
9
-
10
- specify('quantified alternations') do
11
- pattern = '(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)'
12
-
13
- expect(RP.parse(pattern).to_s).to eq pattern
14
- end
15
-
16
- specify('quantified sets') do
17
- pattern = '[abc]+|[^def]{3,6}'
18
-
19
- expect(RP.parse(pattern).to_s).to eq pattern
20
- end
21
-
22
- specify('property sets') do
23
- pattern = '[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+'
24
-
25
- expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
26
- end
27
-
28
- specify('groups') do
29
- pattern = "(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++"
30
-
31
- expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
32
- end
33
-
34
- specify('assertions') do
35
- pattern = '(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?'
36
-
37
- expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
38
- end
39
-
40
- specify('comments') do
41
- pattern = '(?#start)a(?#middle)b(?#end)'
42
-
43
- expect(RP.parse(pattern).to_s).to eq pattern
44
- end
45
-
46
- specify('options') do
47
- pattern = '(?mix:start)a(?-mix:middle)b(?i-mx:end)'
48
-
49
- expect(RP.parse(pattern).to_s).to eq pattern
50
- end
51
-
52
- specify('url') do
53
- pattern = ('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
54
-
55
- expect(RP.parse(pattern).to_s).to eq pattern
56
- end
57
-
58
- specify('multiline source') do
59
- multiline = /
60
- \A
61
- a? # One letter
62
- b{2,5} # Another one
63
- [c-g]+ # A set
64
- \z
65
- /x
66
-
67
- expect(RP.parse(multiline).to_s).to eq multiline.source
68
- end
69
-
70
- specify('multiline #to_s') do
71
- multiline = /
72
- \A
73
- a? # One letter
74
- b{2,5} # Another one
75
- [c-g]+ # A set
76
- \z
77
- /x
78
-
79
- expect(RP.parse(multiline.to_s).to_s).to eq multiline.to_s
80
- end
81
-
82
- # Free spacing expressions that use spaces between quantifiers and their
83
- # targets do not produce identical results due to the way quantifiers are
84
- # applied to expressions (members, not nodes) and the merging of consecutive
85
- # space nodes. This tests that they produce equivalent results.
86
- specify('multiline equivalence') do
87
- multiline = /
88
- \A
89
- a ? # One letter
90
- b {2,5} # Another one
91
- [c-g] + # A set
92
- \z
93
- /x
94
-
95
- str = 'bbbcged'
96
- root = RP.parse(multiline)
97
-
98
- expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eq multiline.match(str)[0]
99
- end
100
- end
@@ -1,22 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe(Regexp::Lexer) do
4
- specify('lexer returns an array') do
5
- expect(RL.lex('abc')).to be_instance_of(Array)
6
- end
7
-
8
- specify('lexer returns tokens') do
9
- tokens = RL.lex('^abc+[^one]{2,3}\\b\\d\\\\C-C$')
10
- expect(tokens).to all(be_a Regexp::Token)
11
- expect(tokens.map { |token| token.to_a.length }).to all(eq 8)
12
- end
13
-
14
- specify('lexer token count') do
15
- tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
16
- expect(tokens.length).to eq 28
17
- end
18
-
19
- specify('lexer scan alias') do
20
- expect(RL.scan(/a|b|c/)).to eq RL.lex(/a|b|c/)
21
- end
22
- end
@@ -1,53 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Conditional lexing') do
4
- include_examples 'lex', /(?<A>a)(?(<A>)b|c)/,
5
- 3 => [:conditional, :open, '(?', 7, 9, 0, 0, 0],
6
- 4 => [:conditional, :condition, '(<A>)', 9, 14, 0, 0, 1],
7
- 6 => [:conditional, :separator, '|', 15, 16, 0, 0, 1],
8
- 8 => [:conditional, :close, ')', 17, 18, 0, 0, 0]
9
-
10
- include_examples 'lex', /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/,
11
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
12
- 1 => [:group, :named, '(?<A>', 1, 6, 1, 0, 0],
13
- 5 => [:conditional, :open, '(?', 13, 15, 2, 0, 0],
14
- 6 => [:conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
15
- 8 => [:conditional, :separator, '|', 21, 22, 2, 0, 1],
16
- 10 => [:conditional, :open, '(?', 23, 25, 3, 0, 1],
17
- 11 => [:conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
18
- 12 => [:set, :open, '[', 30, 31, 3, 0, 2],
19
- 13 => [:literal, :literal, 'e', 31, 32, 3, 1, 2],
20
- 14 => [:set, :range, '-', 32, 33, 3, 1, 2],
21
- 15 => [:literal, :literal, 'g', 33, 34, 3, 1, 2],
22
- 16 => [:set, :close, ']', 34, 35, 3, 0, 2],
23
- 17 => [:conditional, :separator, '|', 35, 36, 3, 0, 2],
24
- 23 => [:conditional, :close, ')', 41, 42, 3, 0, 1],
25
- 25 => [:conditional, :close, ')', 43, 44, 2, 0, 0],
26
- 26 => [:group, :close, ')', 44, 45, 1, 0, 0],
27
- 27 => [:group, :close, ')', 45, 46, 0, 0, 0]
28
-
29
- include_examples 'lex', /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/,
30
- 9 => [:conditional, :open, '(?', 9, 11, 0, 0, 0],
31
- 10 => [:conditional, :condition, '(1)', 11, 14, 0, 0, 1],
32
- 11 => [:conditional, :open, '(?', 14, 16, 0, 0, 1],
33
- 12 => [:conditional, :condition, '(2)', 16, 19, 0, 0, 2],
34
- 13 => [:conditional, :open, '(?', 19, 21, 0, 0, 2],
35
- 14 => [:conditional, :condition, '(3)', 21, 24, 0, 0, 3],
36
- 16 => [:conditional, :separator, '|', 25, 26, 0, 0, 3],
37
- 18 => [:conditional, :close, ')', 27, 28, 0, 0, 2],
38
- 19 => [:conditional, :close, ')', 28, 29, 0, 0, 1],
39
- 20 => [:conditional, :separator, '|', 29, 30, 0, 0, 1],
40
- 21 => [:conditional, :open, '(?', 30, 32, 0, 0, 1],
41
- 22 => [:conditional, :condition, '(3)', 32, 35, 0, 0, 2],
42
- 23 => [:conditional, :open, '(?', 35, 37, 0, 0, 2],
43
- 24 => [:conditional, :condition, '(2)', 37, 40, 0, 0, 3],
44
- 26 => [:conditional, :separator, '|', 41, 42, 0, 0, 3],
45
- 28 => [:conditional, :close, ')', 43, 44, 0, 0, 2],
46
- 29 => [:conditional, :separator, '|', 44, 45, 0, 0, 2],
47
- 30 => [:conditional, :open, '(?', 45, 47, 0, 0, 2],
48
- 31 => [:conditional, :condition, '(1)', 47, 50, 0, 0, 3],
49
- 33 => [:conditional, :separator, '|', 51, 52, 0, 0, 3],
50
- 35 => [:conditional, :close, ')', 53, 54, 0, 0, 2],
51
- 36 => [:conditional, :close, ')', 54, 55, 0, 0, 1],
52
- 37 => [:conditional, :close, ')', 55, 56, 0, 0, 0]
53
- end
@@ -1,14 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Escape lexing') do
4
- include_examples 'lex', '\u{62}',
5
- 0 => [:escape, :codepoint_list, '\u{62}', 0, 6, 0, 0, 0]
6
-
7
- include_examples 'lex', '\u{62 63 64}',
8
- 0 => [:escape, :codepoint_list, '\u{62 63 64}', 0, 12, 0, 0, 0]
9
-
10
- include_examples 'lex', '\u{62 63 64}+',
11
- 0 => [:escape, :codepoint_list, '\u{62 63}', 0, 9, 0, 0, 0],
12
- 1 => [:escape, :codepoint_list, '\u{64}', 9, 15, 0, 0, 0],
13
- 2 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
14
- end
@@ -1,10 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Keep lexing') do
4
- include_examples 'lex', /ab\Kcd/,
5
- 1 => [:keep, :mark, '\K', 2, 4, 0, 0, 0]
6
-
7
- include_examples 'lex', /(a\Kb)|(c\\\Kd)ef/,
8
- 2 => [:keep, :mark, '\K', 2, 4, 1, 0, 0],
9
- 9 => [:keep, :mark, '\K', 11, 13, 1, 0, 0]
10
- end
@@ -1,89 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Literal lexing') do
4
- # ascii, single byte characters
5
- include_examples 'lex', 'a',
6
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0]
7
-
8
- include_examples 'lex', 'ab+',
9
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
10
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
11
- 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
12
-
13
- # 2 byte wide characters, Arabic
14
- include_examples 'lex', 'ا',
15
- 0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0]
16
-
17
- include_examples 'lex', 'aاbبcت',
18
- 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0]
19
-
20
- include_examples 'lex', 'aاbبت?',
21
- 0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
22
- 1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
23
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0]
24
-
25
- include_examples 'lex', 'aا?bبcت+',
26
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
27
- 1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
28
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
29
- 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
30
- 4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
31
- 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0]
32
-
33
- include_examples 'lex', 'a(اbب+)cت?',
34
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
35
- 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
36
- 2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
37
- 3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
38
- 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
39
- 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
40
- 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
41
- 7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
42
- 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0]
43
-
44
- # 3 byte wide characters, Japanese
45
- include_examples 'lex', 'ab?れます+cd',
46
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
47
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
48
- 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
49
- 3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
50
- 4 => [:literal, :literal, 'す', 9, 12, 0, 0, 0],
51
- 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
52
- 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0]
53
-
54
- # 4 byte wide characters, Osmanya
55
- include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
56
- 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
57
- 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
58
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
59
- 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
60
- 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
61
- 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
62
- 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0]
63
-
64
- include_examples 'lex', 'mu𝄞?si*𝄫c+',
65
- 0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
66
- 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
67
- 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
68
- 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
69
- 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
70
- 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
71
- 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
72
- 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
73
- 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
74
-
75
- specify('lex single 2 byte char') do
76
- tokens = RL.lex("\u0627+")
77
- expect(tokens.count).to eq 2
78
- end
79
-
80
- specify('lex single 3 byte char') do
81
- tokens = RL.lex("\u308C+")
82
- expect(tokens.count).to eq 2
83
- end
84
-
85
- specify('lex single 4 byte char') do
86
- tokens = RL.lex("\u{1D11E}+")
87
- expect(tokens.count).to eq 2
88
- end
89
- end
@@ -1,99 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Nesting lexing') do
4
- include_examples 'lex', /(((b)))/,
5
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
6
- 1 => [:group, :capture, '(', 1, 2, 1, 0, 0],
7
- 2 => [:group, :capture, '(', 2, 3, 2, 0, 0],
8
- 3 => [:literal, :literal, 'b', 3, 4, 3, 0, 0],
9
- 4 => [:group, :close, ')', 4, 5, 2, 0, 0],
10
- 5 => [:group, :close, ')', 5, 6, 1, 0, 0],
11
- 6 => [:group, :close, ')', 6, 7, 0, 0, 0]
12
-
13
- include_examples 'lex', /(\((b)\))/,
14
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
15
- 1 => [:escape, :group_open, '\(', 1, 3, 1, 0, 0],
16
- 2 => [:group, :capture, '(', 3, 4, 1, 0, 0],
17
- 3 => [:literal, :literal, 'b', 4, 5, 2, 0, 0],
18
- 4 => [:group, :close, ')', 5, 6, 1, 0, 0],
19
- 5 => [:escape, :group_close, '\)', 6, 8, 1, 0, 0],
20
- 6 => [:group, :close, ')', 8, 9, 0, 0, 0]
21
-
22
- include_examples 'lex', /(?>a(?>b(?>c)))/,
23
- 0 => [:group, :atomic, '(?>', 0, 3, 0, 0, 0],
24
- 2 => [:group, :atomic, '(?>', 4, 7, 1, 0, 0],
25
- 4 => [:group, :atomic, '(?>', 8, 11, 2, 0, 0],
26
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
27
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
28
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0]
29
-
30
- include_examples 'lex', /(?:a(?:b(?:c)))/,
31
- 0 => [:group, :passive, '(?:', 0, 3, 0, 0, 0],
32
- 2 => [:group, :passive, '(?:', 4, 7, 1, 0, 0],
33
- 4 => [:group, :passive, '(?:', 8, 11, 2, 0, 0],
34
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
35
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
36
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0]
37
-
38
- include_examples 'lex', /(?=a(?!b(?<=c(?<!d))))/,
39
- 0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0, 0],
40
- 2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0, 0],
41
- 4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0, 0],
42
- 6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0, 0],
43
- 8 => [:group, :close, ')', 18, 19, 3, 0, 0],
44
- 9 => [:group, :close, ')', 19, 20, 2, 0, 0],
45
- 10 => [:group, :close, ')', 20, 21, 1, 0, 0],
46
- 11 => [:group, :close, ')', 21, 22, 0, 0, 0]
47
-
48
- include_examples 'lex', /((?#a)b(?#c)d(?#e))/,
49
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
50
- 1 => [:group, :comment, '(?#a)', 1, 6, 1, 0, 0],
51
- 3 => [:group, :comment, '(?#c)', 7, 12, 1, 0, 0],
52
- 5 => [:group, :comment, '(?#e)', 13, 18, 1, 0, 0],
53
- 6 => [:group, :close, ')', 18, 19, 0, 0, 0]
54
-
55
- include_examples 'lex', /a[b-e]f/,
56
- 1 => [:set, :open, '[', 1, 2, 0, 0, 0],
57
- 2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
58
- 3 => [:set, :range, '-', 3, 4, 0, 1, 0],
59
- 4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
60
- 5 => [:set, :close, ']', 5, 6, 0, 0, 0]
61
-
62
- include_examples 'lex', /[[:word:]&&[^c]z]/,
63
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
64
- 1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
65
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
66
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
67
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
68
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
69
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
70
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
71
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0]
72
-
73
- include_examples 'lex', /[\p{word}&&[^c]z]/,
74
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
75
- 1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
76
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
77
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
78
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
79
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
80
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
81
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
82
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0]
83
-
84
- include_examples 'lex', /[a[b[c[d-g]]]]/,
85
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
86
- 1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
87
- 2 => [:set, :open, '[', 2, 3, 0, 1, 0],
88
- 3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
89
- 4 => [:set, :open, '[', 4, 5, 0, 2, 0],
90
- 5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
91
- 6 => [:set, :open, '[', 6, 7, 0, 3, 0],
92
- 7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
93
- 8 => [:set, :range, '-', 8, 9, 0, 4, 0],
94
- 9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
95
- 10 => [:set, :close, ']', 10, 11, 0, 3, 0],
96
- 11 => [:set, :close, ']', 11, 12, 0, 2, 0],
97
- 12 => [:set, :close, ']', 12, 13, 0, 1, 0],
98
- 13 => [:set, :close, ']', 13, 14, 0, 0, 0]
99
- end
@@ -1,55 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('RefCall lexing') do
4
- # Traditional numerical group back-reference
5
- include_examples 'lex', '(abc)\1',
6
- 3 => [:backref, :number, '\1', 5, 7, 0, 0, 0]
7
-
8
- # Group back-references, named, numbered, and relative
9
- include_examples 'lex', '(?<X>abc)\k<X>',
10
- 3 => [:backref, :name_ref, '\k<X>', 9, 14, 0, 0, 0]
11
- include_examples 'lex', "(?<X>abc)\\k'X'",
12
- 3 => [:backref, :name_ref, "\\k'X'", 9, 14, 0, 0, 0]
13
-
14
- include_examples 'lex', '(abc)\k<1>',
15
- 3 => [:backref, :number_ref, '\k<1>', 5, 10, 0, 0, 0]
16
- include_examples 'lex', "(abc)\\k'1'",
17
- 3 => [:backref, :number_ref, "\\k'1'", 5, 10, 0, 0, 0]
18
-
19
- include_examples 'lex', '(abc)\k<-1>',
20
- 3 => [:backref, :number_rel_ref, '\k<-1>', 5, 11, 0, 0, 0]
21
- include_examples 'lex', "(abc)\\k'-1'",
22
- 3 => [:backref, :number_rel_ref, "\\k'-1'", 5, 11, 0, 0, 0]
23
-
24
- # Sub-expression invocation, named, numbered, and relative
25
- include_examples 'lex', '(?<X>abc)\g<X>',
26
- 3 => [:backref, :name_call, '\g<X>', 9, 14, 0, 0, 0]
27
- include_examples 'lex', "(?<X>abc)\\g'X'",
28
- 3 => [:backref, :name_call, "\\g'X'", 9, 14, 0, 0, 0]
29
-
30
- include_examples 'lex', '(abc)\g<1>',
31
- 3 => [:backref, :number_call, '\g<1>', 5, 10, 0, 0, 0]
32
- include_examples 'lex', "(abc)\\g'1'",
33
- 3 => [:backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0]
34
-
35
- include_examples 'lex', '(abc)\g<-1>',
36
- 3 => [:backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0]
37
- include_examples 'lex', "(abc)\\g'-1'",
38
- 3 => [:backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0, 0]
39
-
40
- include_examples 'lex', '(abc)\g<+1>',
41
- 3 => [:backref, :number_rel_call, '\g<+1>', 5, 11, 0, 0, 0]
42
- include_examples 'lex', "(abc)\\g'+1'",
43
- 3 => [:backref, :number_rel_call, "\\g'+1'", 5, 11, 0, 0, 0]
44
-
45
- # Group back-references, with nesting level
46
- include_examples 'lex', '(?<X>abc)\k<X-0>',
47
- 3 => [:backref, :name_recursion_ref, '\k<X-0>', 9, 16, 0, 0, 0]
48
- include_examples 'lex', "(?<X>abc)\\k'X-0'",
49
- 3 => [:backref, :name_recursion_ref, "\\k'X-0'", 9, 16, 0, 0, 0]
50
-
51
- include_examples 'lex', '(abc)\k<1-0>',
52
- 3 => [:backref, :number_recursion_ref, '\k<1-0>', 5, 12, 0, 0, 0]
53
- include_examples 'lex', "(abc)\\k'1-0'",
54
- 3 => [:backref, :number_recursion_ref, "\\k'1-0'", 5, 12, 0, 0, 0]
55
- end
@@ -1,43 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe(Regexp::Parser) do
4
- specify('parse returns a root expression') do
5
- expect(RP.parse('abc')).to be_instance_of(Root)
6
- end
7
-
8
- specify('parse can be called with block') do
9
- expect(RP.parse('abc') { |root| root.class }).to eq Root
10
- end
11
-
12
- specify('parse root contains expressions') do
13
- root = RP.parse(/^a.c+[^one]{2,3}\b\d\\\C-C$/)
14
- expect(root.expressions).to all(be_a Regexp::Expression::Base)
15
- end
16
-
17
- specify('parse root options mi') do
18
- root = RP.parse(/[abc]/mi, 'ruby/1.8')
19
-
20
- expect(root.m?).to be true
21
- expect(root.i?).to be true
22
- expect(root.x?).to be false
23
- end
24
-
25
- specify('parse node types') do
26
- root = RP.parse('^(one){2,3}([^d\\]efm-qz\\,\\-]*)(ghi)+$')
27
-
28
- expect(root[1][0]).to be_a(Literal)
29
- expect(root[1]).to be_quantified
30
- expect(root[2][0]).to be_a(CharacterSet)
31
- expect(root[2]).not_to be_quantified
32
- expect(root[3]).to be_a(Group::Capture)
33
- expect(root[3]).to be_quantified
34
- end
35
-
36
- specify('parse no quantifier target raises error') do
37
- expect { RP.parse('?abc') }.to raise_error(ArgumentError)
38
- end
39
-
40
- specify('parse sequence no quantifier target raises error') do
41
- expect { RP.parse('abc|?def') }.to raise_error(ArgumentError)
42
- end
43
- end
@@ -1,88 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Alternation parsing') do
4
- let(:root) { RP.parse('(ab??|cd*|ef+)*|(gh|ij|kl)?') }
5
-
6
- specify('parse alternation root') do
7
- e = root[0]
8
- expect(e).to be_a(Alternation)
9
- end
10
-
11
- specify('parse alternation alts') do
12
- alts = root[0].alternatives
13
-
14
- expect(alts[0]).to be_a(Alternative)
15
- expect(alts[1]).to be_a(Alternative)
16
-
17
- expect(alts[0][0]).to be_a(Group::Capture)
18
- expect(alts[1][0]).to be_a(Group::Capture)
19
-
20
- expect(alts.length).to eq 2
21
- end
22
-
23
- specify('parse alternation nested') do
24
- e = root[0].alternatives[0][0][0]
25
-
26
- expect(e).to be_a(Alternation)
27
- end
28
-
29
- specify('parse alternation nested sequence') do
30
- alts = root[0][0]
31
- nested = alts[0][0][0]
32
-
33
- expect(nested).to be_a(Alternative)
34
-
35
- expect(nested[0]).to be_a(Literal)
36
- expect(nested[1]).to be_a(Literal)
37
- expect(nested.expressions.length).to eq 2
38
- end
39
-
40
- specify('parse alternation nested groups') do
41
- root = RP.parse('(i|ey|([ougfd]+)|(ney))')
42
-
43
- alts = root[0][0].alternatives
44
- expect(alts.length).to eq 4
45
- end
46
-
47
- specify('parse alternation grouped alts') do
48
- root = RP.parse('ca((n)|(t)|(ll)|(b))')
49
-
50
- alts = root[1][0].alternatives
51
-
52
- expect(alts.length).to eq 4
53
-
54
- expect(alts[0]).to be_a(Alternative)
55
- expect(alts[1]).to be_a(Alternative)
56
- expect(alts[2]).to be_a(Alternative)
57
- expect(alts[3]).to be_a(Alternative)
58
- end
59
-
60
- specify('parse alternation nested grouped alts') do
61
- root = RP.parse('ca((n|t)|(ll|b))')
62
-
63
- alts = root[1][0].alternatives
64
-
65
- expect(alts.length).to eq 2
66
-
67
- expect(alts[0]).to be_a(Alternative)
68
- expect(alts[1]).to be_a(Alternative)
69
-
70
- subalts = root[1][0][0][0][0].alternatives
71
-
72
- expect(alts.length).to eq 2
73
-
74
- expect(subalts[0]).to be_a(Alternative)
75
- expect(subalts[1]).to be_a(Alternative)
76
- end
77
-
78
- specify('parse alternation continues after nesting') do
79
- root = RP.parse(/a|(b)c/)
80
-
81
- seq = root[0][1].expressions
82
-
83
- expect(seq.length).to eq 2
84
-
85
- expect(seq[0]).to be_a(Group::Capture)
86
- expect(seq[1]).to be_a(Literal)
87
- end
88
- end
@@ -1,17 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Anchor parsing') do
4
- include_examples 'parse', /^a/, 0 => [:anchor, :bol, Anchor::BOL]
5
- include_examples 'parse', /a$/, 1 => [:anchor, :eol, Anchor::EOL]
6
-
7
- include_examples 'parse', /\Aa/, 0 => [:anchor, :bos, Anchor::BOS]
8
- include_examples 'parse', /a\z/, 1 => [:anchor, :eos, Anchor::EOS]
9
- include_examples 'parse', /a\Z/, 1 => [:anchor, :eos_ob_eol, Anchor::EOSobEOL]
10
-
11
- include_examples 'parse', /a\b/, 1 => [:anchor, :word_boundary, Anchor::WordBoundary]
12
- include_examples 'parse', /a\B/, 1 => [:anchor, :nonword_boundary, Anchor::NonWordBoundary]
13
-
14
- include_examples 'parse', /a\G/, 1 => [:anchor, :match_start, Anchor::MatchStart]
15
-
16
- include_examples 'parse', /\\A/, 0 => [:escape, :backslash, EscapeSequence::Literal]
17
- end