regexp_parser 2.2.0 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -2
  3. data/LICENSE +1 -1
  4. data/README.md +2 -2
  5. data/Rakefile +5 -8
  6. data/lib/regexp_parser/expression/classes/escape_sequence.rb +12 -7
  7. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  8. data/lib/regexp_parser/scanner/properties/long.csv +604 -0
  9. data/lib/regexp_parser/scanner/properties/short.csv +242 -0
  10. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  11. data/lib/regexp_parser/scanner.rb +126 -124
  12. data/lib/regexp_parser/syntax/base.rb +3 -5
  13. data/lib/regexp_parser/syntax/token/backreference.rb +7 -2
  14. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
  15. data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
  16. data/lib/regexp_parser/version.rb +1 -1
  17. data/regexp_parser.gemspec +20 -22
  18. metadata +11 -143
  19. data/lib/regexp_parser/scanner/properties/long.yml +0 -607
  20. data/lib/regexp_parser/scanner/properties/short.yml +0 -245
  21. data/spec/expression/base_spec.rb +0 -104
  22. data/spec/expression/clone_spec.rb +0 -152
  23. data/spec/expression/conditional_spec.rb +0 -89
  24. data/spec/expression/free_space_spec.rb +0 -27
  25. data/spec/expression/methods/match_length_spec.rb +0 -161
  26. data/spec/expression/methods/match_spec.rb +0 -25
  27. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  28. data/spec/expression/methods/tests_spec.rb +0 -99
  29. data/spec/expression/methods/traverse_spec.rb +0 -161
  30. data/spec/expression/options_spec.rb +0 -128
  31. data/spec/expression/subexpression_spec.rb +0 -50
  32. data/spec/expression/to_h_spec.rb +0 -26
  33. data/spec/expression/to_s_spec.rb +0 -108
  34. data/spec/lexer/all_spec.rb +0 -22
  35. data/spec/lexer/conditionals_spec.rb +0 -53
  36. data/spec/lexer/delimiters_spec.rb +0 -68
  37. data/spec/lexer/escapes_spec.rb +0 -14
  38. data/spec/lexer/keep_spec.rb +0 -10
  39. data/spec/lexer/literals_spec.rb +0 -64
  40. data/spec/lexer/nesting_spec.rb +0 -99
  41. data/spec/lexer/refcalls_spec.rb +0 -60
  42. data/spec/parser/all_spec.rb +0 -43
  43. data/spec/parser/alternation_spec.rb +0 -88
  44. data/spec/parser/anchors_spec.rb +0 -17
  45. data/spec/parser/conditionals_spec.rb +0 -179
  46. data/spec/parser/errors_spec.rb +0 -30
  47. data/spec/parser/escapes_spec.rb +0 -133
  48. data/spec/parser/free_space_spec.rb +0 -130
  49. data/spec/parser/groups_spec.rb +0 -108
  50. data/spec/parser/keep_spec.rb +0 -6
  51. data/spec/parser/options_spec.rb +0 -28
  52. data/spec/parser/posix_classes_spec.rb +0 -8
  53. data/spec/parser/properties_spec.rb +0 -117
  54. data/spec/parser/quantifiers_spec.rb +0 -68
  55. data/spec/parser/refcalls_spec.rb +0 -117
  56. data/spec/parser/set/intersections_spec.rb +0 -127
  57. data/spec/parser/set/ranges_spec.rb +0 -121
  58. data/spec/parser/sets_spec.rb +0 -178
  59. data/spec/parser/types_spec.rb +0 -18
  60. data/spec/scanner/all_spec.rb +0 -18
  61. data/spec/scanner/anchors_spec.rb +0 -21
  62. data/spec/scanner/conditionals_spec.rb +0 -128
  63. data/spec/scanner/delimiters_spec.rb +0 -52
  64. data/spec/scanner/errors_spec.rb +0 -67
  65. data/spec/scanner/escapes_spec.rb +0 -73
  66. data/spec/scanner/free_space_spec.rb +0 -165
  67. data/spec/scanner/groups_spec.rb +0 -61
  68. data/spec/scanner/keep_spec.rb +0 -10
  69. data/spec/scanner/literals_spec.rb +0 -39
  70. data/spec/scanner/meta_spec.rb +0 -18
  71. data/spec/scanner/options_spec.rb +0 -36
  72. data/spec/scanner/properties_spec.rb +0 -64
  73. data/spec/scanner/quantifiers_spec.rb +0 -25
  74. data/spec/scanner/refcalls_spec.rb +0 -55
  75. data/spec/scanner/sets_spec.rb +0 -151
  76. data/spec/scanner/types_spec.rb +0 -14
  77. data/spec/spec_helper.rb +0 -28
  78. data/spec/support/capturing_stderr.rb +0 -9
  79. data/spec/support/shared_examples.rb +0 -77
  80. data/spec/syntax/syntax_spec.rb +0 -48
  81. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  82. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  83. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  84. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  85. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  86. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  87. data/spec/syntax/versions/aliases_spec.rb +0 -38
  88. data/spec/token/token_spec.rb +0 -85
@@ -1,128 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Expression#options') do
4
- it 'returns a hash of options/flags that affect the expression' do
5
- exp = RP.parse(/a/ix)[0]
6
- expect(exp).to be_a Literal
7
- expect(exp.options).to eq(i: true, x: true)
8
- end
9
-
10
- it 'includes options that are locally enabled via special groups' do
11
- exp = RP.parse(/(?x)(?m:a)/i)[1][0]
12
- expect(exp).to be_a Literal
13
- expect(exp.options).to eq(i: true, m: true, x: true)
14
- end
15
-
16
- it 'excludes locally disabled options' do
17
- exp = RP.parse(/(?x)(?-im:a)/i)[1][0]
18
- expect(exp).to be_a Literal
19
- expect(exp.options).to eq(x: true)
20
- end
21
-
22
- it 'gives correct precedence to negative options' do
23
- # Negative options have precedence. E.g. /(?i-i)a/ is case-sensitive.
24
- regexp = /(?i-i:a)/
25
- expect(regexp).to match 'a'
26
- expect(regexp).not_to match 'A'
27
-
28
- exp = RP.parse(regexp)[0][0]
29
- expect(exp).to be_a Literal
30
- expect(exp.options).to eq({})
31
- end
32
-
33
- it 'correctly handles multiple negative option parts' do
34
- regexp = /(?--m--mx--) . /mx
35
- expect(regexp).to match ' . '
36
- expect(regexp).not_to match '.'
37
- expect(regexp).not_to match "\n"
38
-
39
- exp = RP.parse(regexp)[2]
40
- expect(exp.options).to eq({})
41
- end
42
-
43
- it 'gives correct precedence when encountering multiple encoding flags' do
44
- # Any encoding flag overrides all previous encoding flags. If there are
45
- # multiple encoding flags in an options string, the last one wins.
46
- # E.g. /(?dau)\w/ matches UTF8 chars but /(?dua)\w/ only ASCII chars.
47
- regexp1 = /(?dau)\w/
48
- regexp2 = /(?dua)\w/
49
- expect(regexp1).to match 'ü'
50
- expect(regexp2).not_to match 'ü'
51
-
52
- exp1 = RP.parse(regexp1)[1]
53
- exp2 = RP.parse(regexp2)[1]
54
- expect(exp1.options).to eq(u: true)
55
- expect(exp2.options).to eq(a: true)
56
- end
57
-
58
- it 'is accessible via shortcuts' do
59
- exp = Root.build
60
-
61
- expect { exp.options[:i] = true }
62
- .to change { exp.i? }.from(false).to(true)
63
- .and change { exp.ignore_case? }.from(false).to(true)
64
- .and change { exp.case_insensitive? }.from(false).to(true)
65
-
66
- expect { exp.options[:m] = true }
67
- .to change { exp.m? }.from(false).to(true)
68
- .and change { exp.multiline? }.from(false).to(true)
69
-
70
- expect { exp.options[:x] = true }
71
- .to change { exp.x? }.from(false).to(true)
72
- .and change { exp.extended? }.from(false).to(true)
73
- .and change { exp.free_spacing? }.from(false).to(true)
74
-
75
- expect { exp.options[:a] = true }
76
- .to change { exp.a? }.from(false).to(true)
77
- .and change { exp.ascii_classes? }.from(false).to(true)
78
-
79
- expect { exp.options[:d] = true }
80
- .to change { exp.d? }.from(false).to(true)
81
- .and change { exp.default_classes? }.from(false).to(true)
82
-
83
- expect { exp.options[:u] = true }
84
- .to change { exp.u? }.from(false).to(true)
85
- .and change { exp.unicode_classes? }.from(false).to(true)
86
- end
87
-
88
- RSpec.shared_examples '#options' do |regexp, path, klass|
89
- it "works for expression class #{klass}" do
90
- exp = RP.parse(/#{regexp.source}/i).dig(*path)
91
- expect(exp).to be_a(klass)
92
- expect(exp).to be_i
93
- expect(exp).not_to be_x
94
- end
95
- end
96
-
97
- include_examples '#options', //, [], Root
98
- include_examples '#options', /a/, [0], Literal
99
- include_examples '#options', /\A/, [0], Anchor::Base
100
- include_examples '#options', /\d/, [0], CharacterType::Base
101
- include_examples '#options', /\n/, [0], EscapeSequence::Base
102
- include_examples '#options', /\K/, [0], Keep::Mark
103
- include_examples '#options', /./, [0], CharacterType::Any
104
- include_examples '#options', /(a)/, [0], Group::Base
105
- include_examples '#options', /(a)/, [0, 0], Literal
106
- include_examples '#options', /(?=a)/, [0], Assertion::Base
107
- include_examples '#options', /(?=a)/, [0, 0], Literal
108
- include_examples '#options', /(a|b)/, [0], Group::Base
109
- include_examples '#options', /(a|b)/, [0, 0], Alternation
110
- include_examples '#options', /(a|b)/, [0, 0, 0], Alternative
111
- include_examples '#options', /(a|b)/, [0, 0, 0, 0], Literal
112
- include_examples '#options', /(a)\1/, [1], Backreference::Base
113
- include_examples '#options', /(a)\k<1>/, [1], Backreference::Number
114
- include_examples '#options', /(a)\g<1>/, [1], Backreference::NumberCall
115
- include_examples '#options', /[a]/, [0], CharacterSet
116
- include_examples '#options', /[a]/, [0, 0], Literal
117
- include_examples '#options', /[a-z]/, [0, 0], CharacterSet::Range
118
- include_examples '#options', /[a-z]/, [0, 0, 0], Literal
119
- include_examples '#options', /[a&&z]/, [0, 0], CharacterSet::Intersection
120
- include_examples '#options', /[a&&z]/, [0, 0, 0], CharacterSet::IntersectedSequence
121
- include_examples '#options', /[a&&z]/, [0, 0, 0, 0], Literal
122
- include_examples '#options', /[[:ascii:]]/, [0, 0], PosixClass
123
- include_examples '#options', /\p{word}/, [0], UnicodeProperty::Base
124
- include_examples '#options', /(a)(?(1)b|c)/, [1], Conditional::Expression
125
- include_examples '#options', /(a)(?(1)b|c)/, [1, 0], Conditional::Condition
126
- include_examples '#options', /(a)(?(1)b|c)/, [1, 1], Conditional::Branch
127
- include_examples '#options', /(a)(?(1)b|c)/, [1, 1, 0], Literal
128
- end
@@ -1,50 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe(Regexp::Expression::Subexpression) do
4
- specify('#ts, #te') do
5
- regx = /abcd|ghij|klmn|pqur/
6
- root = RP.parse(regx)
7
-
8
- alt = root.first
9
-
10
- { 0 => [0, 4], 1 => [5, 9], 2 => [10, 14], 3 => [15, 19] }.each do |index, span|
11
- sequence = alt[index]
12
-
13
- expect(sequence.ts).to eq span[0]
14
- expect(sequence.te).to eq span[1]
15
- end
16
- end
17
-
18
- specify('#nesting_level') do
19
- root = RP.parse(/a(b(\d|[ef-g[h]]))/)
20
-
21
- tests = {
22
- 'a' => 1,
23
- 'b' => 2,
24
- '\d|[ef-g[h]]' => 3, # alternation
25
- '\d' => 4, # first alternative
26
- '[ef-g[h]]' => 4, # second alternative
27
- 'e' => 5,
28
- 'f-g' => 5,
29
- 'f' => 6,
30
- 'g' => 6,
31
- 'h' => 6,
32
- }
33
-
34
- root.each_expression do |exp|
35
- next unless (expected_nesting_level = tests.delete(exp.to_s))
36
- expect(expected_nesting_level).to eq exp.nesting_level
37
- end
38
-
39
- expect(tests).to be_empty
40
- end
41
-
42
- specify('#dig') do
43
- root = RP.parse(/(((a)))/)
44
-
45
- expect(root.dig(0).to_s).to eq '(((a)))'
46
- expect(root.dig(0, 0, 0, 0).to_s).to eq 'a'
47
- expect(root.dig(0, 0, 0, 0, 0)).to be_nil
48
- expect(root.dig(3, 7)).to be_nil
49
- end
50
- end
@@ -1,26 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Expression#to_h') do
4
- specify('Root#to_h') do
5
- root = RP.parse('abc')
6
-
7
- hash = root.to_h
8
-
9
- expect(token: :root, type: :expression, text: 'abc', starts_at: 0, length: 3, quantifier: nil, options: {}, level: nil, set_level: nil, conditional_level: nil, expressions: [{ token: :literal, type: :literal, text: 'abc', starts_at: 0, length: 3, quantifier: nil, options: {}, level: 0, set_level: 0, conditional_level: 0 }]).to eq hash
10
- end
11
-
12
- specify('Quantifier#to_h') do
13
- root = RP.parse('a{2,4}')
14
- exp = root.expressions.at(0)
15
-
16
- hash = exp.quantifier.to_h
17
-
18
- expect(max: 4, min: 2, mode: :greedy, text: '{2,4}', token: :interval).to eq hash
19
- end
20
-
21
- specify('Conditional#to_h') do
22
- root = RP.parse('(?<A>a)(?(<A>)b|c)', 'ruby/2.0')
23
-
24
- expect { root.to_h }.not_to(raise_error)
25
- end
26
- end
@@ -1,108 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Expression#to_s') do
4
- def parse_frozen(pattern, ruby_version = nil)
5
- IceNine.deep_freeze(RP.parse(pattern, *ruby_version))
6
- end
7
-
8
- def expect_round_trip(pattern, ruby_version = nil)
9
- parsed = parse_frozen(pattern, ruby_version)
10
-
11
- expect(parsed.to_s).to eql(pattern)
12
- end
13
-
14
- specify('literal alternation') do
15
- expect_round_trip('abcd|ghij|klmn|pqur')
16
- end
17
-
18
- specify('quantified alternations') do
19
- expect_round_trip('(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)')
20
- end
21
-
22
- specify('quantified sets') do
23
- expect_round_trip('[abc]+|[^def]{3,6}')
24
- end
25
-
26
- specify('property sets') do
27
- expect_round_trip('[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+', 'ruby/1.9')
28
- end
29
-
30
- specify('groups') do
31
- expect_round_trip("(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++", 'ruby/1.9')
32
- end
33
-
34
- specify('assertions') do
35
- expect_round_trip('(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?', 'ruby/1.9')
36
- end
37
-
38
- specify('comments') do
39
- expect_round_trip('(?#start)a(?#middle)b(?#end)')
40
- end
41
-
42
- specify('options') do
43
- expect_round_trip('(?mix:start)a(?-mix:middle)b(?i-mx:end)')
44
- end
45
-
46
- specify('url') do
47
- expect_round_trip('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
48
- end
49
-
50
- specify('multiline source') do
51
- multiline = /
52
- \A
53
- a? # One letter
54
- b{2,5} # Another one
55
- [c-g]+ # A set
56
- \z
57
- /x
58
-
59
- expect(parse_frozen(multiline).to_s).to eql(multiline.source)
60
- end
61
-
62
- specify('multiline #to_s') do
63
- multiline = /
64
- \A
65
- a? # One letter
66
- b{2,5} # Another one
67
- [c-g]+ # A set
68
- \z
69
- /x
70
-
71
- expect_round_trip(multiline.to_s)
72
- end
73
-
74
- # Free spacing expressions that use spaces between quantifiers and their
75
- # targets do not produce identical results due to the way quantifiers are
76
- # applied to expressions (members, not nodes) and the merging of consecutive
77
- # space nodes. This tests that they produce equivalent results.
78
- specify('multiline equivalence') do
79
- multiline = /
80
- \A
81
- a ? # One letter
82
- b {2,5} # Another one
83
- [c-g] + # A set
84
- \z
85
- /x
86
-
87
- str = 'bbbcged'
88
- root = parse_frozen(multiline)
89
-
90
- expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eql(multiline.match(str)[0])
91
- end
92
-
93
- # special case: implicit groups used for chained quantifiers produce no parens
94
- specify 'chained quantifiers #to_s' do
95
- pattern = /a+{1}{2}/
96
- root = parse_frozen(pattern)
97
- expect(root.to_s).to eql('a+{1}{2}')
98
- end
99
-
100
- # regression test for https://github.com/ammar/regexp_parser/issues/74
101
- specify('non-ascii comment') do
102
- pattern = '(?x) 😋 # 😋'
103
- root = RP.parse(pattern)
104
- expect(root.last).to be_a(Regexp::Expression::Comment)
105
- expect(root.last.to_s).to eql('# 😋')
106
- expect(root.to_s).to eql(pattern)
107
- end
108
- end
@@ -1,22 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe(Regexp::Lexer) do
4
- specify('lexer returns an array') do
5
- expect(RL.lex('abc')).to be_instance_of(Array)
6
- end
7
-
8
- specify('lexer returns tokens') do
9
- tokens = RL.lex('^abc+[^one]{2,3}\\b\\d\\\\C-C$')
10
- expect(tokens).to all(be_a Regexp::Token)
11
- expect(tokens.map { |token| token.to_a.length }).to all(eq 8)
12
- end
13
-
14
- specify('lexer token count') do
15
- tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
16
- expect(tokens.length).to eq 28
17
- end
18
-
19
- specify('lexer scan alias') do
20
- expect(RL.scan(/a|b|c/)).to eq RL.lex(/a|b|c/)
21
- end
22
- end
@@ -1,53 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Conditional lexing') do
4
- include_examples 'lex', /(?<A>a)(?(<A>)b|c)/,
5
- 3 => [:conditional, :open, '(?', 7, 9, 0, 0, 0],
6
- 4 => [:conditional, :condition, '(<A>)', 9, 14, 0, 0, 1],
7
- 6 => [:conditional, :separator, '|', 15, 16, 0, 0, 1],
8
- 8 => [:conditional, :close, ')', 17, 18, 0, 0, 0]
9
-
10
- include_examples 'lex', /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/,
11
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
12
- 1 => [:group, :named, '(?<A>', 1, 6, 1, 0, 0],
13
- 5 => [:conditional, :open, '(?', 13, 15, 2, 0, 0],
14
- 6 => [:conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
15
- 8 => [:conditional, :separator, '|', 21, 22, 2, 0, 1],
16
- 10 => [:conditional, :open, '(?', 23, 25, 3, 0, 1],
17
- 11 => [:conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
18
- 12 => [:set, :open, '[', 30, 31, 3, 0, 2],
19
- 13 => [:literal, :literal, 'e', 31, 32, 3, 1, 2],
20
- 14 => [:set, :range, '-', 32, 33, 3, 1, 2],
21
- 15 => [:literal, :literal, 'g', 33, 34, 3, 1, 2],
22
- 16 => [:set, :close, ']', 34, 35, 3, 0, 2],
23
- 17 => [:conditional, :separator, '|', 35, 36, 3, 0, 2],
24
- 23 => [:conditional, :close, ')', 41, 42, 3, 0, 1],
25
- 25 => [:conditional, :close, ')', 43, 44, 2, 0, 0],
26
- 26 => [:group, :close, ')', 44, 45, 1, 0, 0],
27
- 27 => [:group, :close, ')', 45, 46, 0, 0, 0]
28
-
29
- include_examples 'lex', /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/,
30
- 9 => [:conditional, :open, '(?', 9, 11, 0, 0, 0],
31
- 10 => [:conditional, :condition, '(1)', 11, 14, 0, 0, 1],
32
- 11 => [:conditional, :open, '(?', 14, 16, 0, 0, 1],
33
- 12 => [:conditional, :condition, '(2)', 16, 19, 0, 0, 2],
34
- 13 => [:conditional, :open, '(?', 19, 21, 0, 0, 2],
35
- 14 => [:conditional, :condition, '(3)', 21, 24, 0, 0, 3],
36
- 16 => [:conditional, :separator, '|', 25, 26, 0, 0, 3],
37
- 18 => [:conditional, :close, ')', 27, 28, 0, 0, 2],
38
- 19 => [:conditional, :close, ')', 28, 29, 0, 0, 1],
39
- 20 => [:conditional, :separator, '|', 29, 30, 0, 0, 1],
40
- 21 => [:conditional, :open, '(?', 30, 32, 0, 0, 1],
41
- 22 => [:conditional, :condition, '(3)', 32, 35, 0, 0, 2],
42
- 23 => [:conditional, :open, '(?', 35, 37, 0, 0, 2],
43
- 24 => [:conditional, :condition, '(2)', 37, 40, 0, 0, 3],
44
- 26 => [:conditional, :separator, '|', 41, 42, 0, 0, 3],
45
- 28 => [:conditional, :close, ')', 43, 44, 0, 0, 2],
46
- 29 => [:conditional, :separator, '|', 44, 45, 0, 0, 2],
47
- 30 => [:conditional, :open, '(?', 45, 47, 0, 0, 2],
48
- 31 => [:conditional, :condition, '(1)', 47, 50, 0, 0, 3],
49
- 33 => [:conditional, :separator, '|', 51, 52, 0, 0, 3],
50
- 35 => [:conditional, :close, ')', 53, 54, 0, 0, 2],
51
- 36 => [:conditional, :close, ')', 54, 55, 0, 0, 1],
52
- 37 => [:conditional, :close, ')', 55, 56, 0, 0, 0]
53
- end
@@ -1,68 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Literal delimiter lexing') do
4
- include_examples 'lex', '}',
5
- 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0]
6
-
7
- include_examples 'lex', '}}',
8
- 0 => [:literal, :literal, '}}', 0, 2, 0, 0, 0]
9
-
10
- include_examples 'lex', '{',
11
- 0 => [:literal, :literal, '{', 0, 1, 0, 0, 0]
12
-
13
- include_examples 'lex', '{{',
14
- 0 => [:literal, :literal, '{{', 0, 2, 0, 0, 0]
15
-
16
- include_examples 'lex', '{}',
17
- 0 => [:literal, :literal, '{}', 0, 2, 0, 0, 0]
18
-
19
- include_examples 'lex', '}{',
20
- 0 => [:literal, :literal, '}{', 0, 2, 0, 0, 0]
21
-
22
- include_examples 'lex', '}{+',
23
- 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0],
24
- 1 => [:literal, :literal, '{', 1, 2, 0, 0, 0],
25
- 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
26
-
27
- include_examples 'lex', '{{var}}',
28
- 0 => [:literal, :literal, '{{var}}', 0, 7, 0, 0, 0]
29
-
30
- include_examples 'lex', 'a{b}c',
31
- 0 => [:literal, :literal, 'a{b}c', 0, 5, 0, 0, 0]
32
-
33
- include_examples 'lex', 'a{1,2',
34
- 0 => [:literal, :literal, 'a{1,2', 0, 5, 0, 0, 0]
35
-
36
- include_examples 'lex', '({.+})',
37
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
38
- 1 => [:literal, :literal, '{', 1, 2, 1, 0, 0],
39
- 2 => [:meta, :dot, '.', 2, 3, 1, 0, 0],
40
- 3 => [:quantifier, :one_or_more, '+', 3, 4, 1, 0, 0],
41
- 4 => [:literal, :literal, '}', 4, 5, 1, 0, 0],
42
- 5 => [:group, :close, ')', 5, 6, 0, 0, 0]
43
-
44
- include_examples 'lex', ']',
45
- 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0]
46
-
47
- include_examples 'lex', ']]',
48
- 0 => [:literal, :literal, ']]', 0, 2, 0, 0, 0]
49
-
50
- include_examples 'lex', ']\[',
51
- 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0],
52
- 1 => [:escape, :set_open, '\[', 1, 3, 0, 0, 0]
53
-
54
- include_examples 'lex', '()',
55
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
56
- 1 => [:group, :close, ')', 1, 2, 0, 0, 0]
57
-
58
- include_examples 'lex', '{abc:.+}}}[^}]]}',
59
- 0 => [:literal, :literal, '{abc:', 0, 5, 0, 0, 0],
60
- 1 => [:meta, :dot, '.', 5, 6, 0, 0, 0],
61
- 2 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
62
- 3 => [:literal, :literal, '}}}', 7, 10, 0, 0, 0],
63
- 4 => [:set, :open, '[', 10, 11, 0, 0, 0],
64
- 5 => [:set, :negate, '^', 11, 12, 0, 1, 0],
65
- 6 => [:literal, :literal, '}', 12, 13, 0, 1, 0],
66
- 7 => [:set, :close, ']', 13, 14, 0, 0, 0],
67
- 8 => [:literal, :literal, ']}', 14, 16, 0, 0, 0]
68
- end
@@ -1,14 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Escape lexing') do
4
- include_examples 'lex', '\u{62}',
5
- 0 => [:escape, :codepoint_list, '\u{62}', 0, 6, 0, 0, 0]
6
-
7
- include_examples 'lex', '\u{62 63 64}',
8
- 0 => [:escape, :codepoint_list, '\u{62 63 64}', 0, 12, 0, 0, 0]
9
-
10
- include_examples 'lex', '\u{62 63 64}+',
11
- 0 => [:escape, :codepoint_list, '\u{62 63}', 0, 9, 0, 0, 0],
12
- 1 => [:escape, :codepoint_list, '\u{64}', 9, 15, 0, 0, 0],
13
- 2 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
14
- end
@@ -1,10 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Keep lexing') do
4
- include_examples 'lex', /ab\Kcd/,
5
- 1 => [:keep, :mark, '\K', 2, 4, 0, 0, 0]
6
-
7
- include_examples 'lex', /(a\Kb)|(c\\\Kd)ef/,
8
- 2 => [:keep, :mark, '\K', 2, 4, 1, 0, 0],
9
- 9 => [:keep, :mark, '\K', 11, 13, 1, 0, 0]
10
- end
@@ -1,64 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Literal lexing') do
4
- # ascii, single byte characters
5
- include_examples 'lex', 'a',
6
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0]
7
-
8
- include_examples 'lex', 'ab+',
9
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
10
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
11
- 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
12
-
13
- # 2 byte wide characters
14
- include_examples 'lex', 'äöü+',
15
- 0 => [:literal, :literal, 'äö', 0, 2, 0, 0, 0],
16
- 1 => [:literal, :literal, 'ü', 2, 3, 0, 0, 0],
17
- 2 => [:quantifier, :one_or_more, '+', 3, 4, 0, 0, 0]
18
-
19
- # 3 byte wide characters, Japanese
20
- include_examples 'lex', 'ab?れます+cd',
21
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
22
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
23
- 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
24
- 3 => [:literal, :literal, 'れま', 3, 5, 0, 0, 0],
25
- 4 => [:literal, :literal, 'す', 5, 6, 0, 0, 0],
26
- 5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
27
- 6 => [:literal, :literal, 'cd', 7, 9, 0, 0, 0]
28
-
29
- # 4 byte wide characters, Osmanya
30
- include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
31
- 0 => [:literal, :literal, '𐒀', 0, 1, 0, 0, 0],
32
- 1 => [:literal, :literal, '𐒁', 1, 2, 0, 0, 0],
33
- 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
34
- 3 => [:literal, :literal, '𐒂a', 3, 5, 0, 0, 0],
35
- 4 => [:literal, :literal, 'b', 5, 6, 0, 0, 0],
36
- 5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
37
- 6 => [:literal, :literal, '𐒃', 7, 8, 0, 0, 0]
38
-
39
- include_examples 'lex', 'mu𝄞?si*𝄫c+',
40
- 0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
41
- 1 => [:literal, :literal, '𝄞', 2, 3, 0, 0, 0],
42
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
43
- 3 => [:literal, :literal, 's', 4, 5, 0, 0, 0],
44
- 4 => [:literal, :literal, 'i', 5, 6, 0, 0, 0],
45
- 5 => [:quantifier, :zero_or_more, '*', 6, 7, 0, 0, 0],
46
- 6 => [:literal, :literal, '𝄫', 7, 8, 0, 0, 0],
47
- 7 => [:literal, :literal, 'c', 8, 9, 0, 0, 0],
48
- 8 => [:quantifier, :one_or_more, '+', 9, 10, 0, 0, 0]
49
-
50
- specify('lex single 2 byte char') do
51
- tokens = RL.lex("\u0627+")
52
- expect(tokens.count).to eq 2
53
- end
54
-
55
- specify('lex single 3 byte char') do
56
- tokens = RL.lex("\u308C+")
57
- expect(tokens.count).to eq 2
58
- end
59
-
60
- specify('lex single 4 byte char') do
61
- tokens = RL.lex("\u{1D11E}+")
62
- expect(tokens.count).to eq 2
63
- end
64
- end
@@ -1,99 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Nesting lexing') do
4
- include_examples 'lex', /(((b)))/,
5
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
6
- 1 => [:group, :capture, '(', 1, 2, 1, 0, 0],
7
- 2 => [:group, :capture, '(', 2, 3, 2, 0, 0],
8
- 3 => [:literal, :literal, 'b', 3, 4, 3, 0, 0],
9
- 4 => [:group, :close, ')', 4, 5, 2, 0, 0],
10
- 5 => [:group, :close, ')', 5, 6, 1, 0, 0],
11
- 6 => [:group, :close, ')', 6, 7, 0, 0, 0]
12
-
13
- include_examples 'lex', /(\((b)\))/,
14
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
15
- 1 => [:escape, :group_open, '\(', 1, 3, 1, 0, 0],
16
- 2 => [:group, :capture, '(', 3, 4, 1, 0, 0],
17
- 3 => [:literal, :literal, 'b', 4, 5, 2, 0, 0],
18
- 4 => [:group, :close, ')', 5, 6, 1, 0, 0],
19
- 5 => [:escape, :group_close, '\)', 6, 8, 1, 0, 0],
20
- 6 => [:group, :close, ')', 8, 9, 0, 0, 0]
21
-
22
- include_examples 'lex', /(?>a(?>b(?>c)))/,
23
- 0 => [:group, :atomic, '(?>', 0, 3, 0, 0, 0],
24
- 2 => [:group, :atomic, '(?>', 4, 7, 1, 0, 0],
25
- 4 => [:group, :atomic, '(?>', 8, 11, 2, 0, 0],
26
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
27
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
28
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0]
29
-
30
- include_examples 'lex', /(?:a(?:b(?:c)))/,
31
- 0 => [:group, :passive, '(?:', 0, 3, 0, 0, 0],
32
- 2 => [:group, :passive, '(?:', 4, 7, 1, 0, 0],
33
- 4 => [:group, :passive, '(?:', 8, 11, 2, 0, 0],
34
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
35
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
36
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0]
37
-
38
- include_examples 'lex', /(?=a(?!b(?<=c(?<!d))))/,
39
- 0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0, 0],
40
- 2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0, 0],
41
- 4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0, 0],
42
- 6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0, 0],
43
- 8 => [:group, :close, ')', 18, 19, 3, 0, 0],
44
- 9 => [:group, :close, ')', 19, 20, 2, 0, 0],
45
- 10 => [:group, :close, ')', 20, 21, 1, 0, 0],
46
- 11 => [:group, :close, ')', 21, 22, 0, 0, 0]
47
-
48
- include_examples 'lex', /((?#a)b(?#c)d(?#e))/,
49
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
50
- 1 => [:group, :comment, '(?#a)', 1, 6, 1, 0, 0],
51
- 3 => [:group, :comment, '(?#c)', 7, 12, 1, 0, 0],
52
- 5 => [:group, :comment, '(?#e)', 13, 18, 1, 0, 0],
53
- 6 => [:group, :close, ')', 18, 19, 0, 0, 0]
54
-
55
- include_examples 'lex', /a[b-e]f/,
56
- 1 => [:set, :open, '[', 1, 2, 0, 0, 0],
57
- 2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
58
- 3 => [:set, :range, '-', 3, 4, 0, 1, 0],
59
- 4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
60
- 5 => [:set, :close, ']', 5, 6, 0, 0, 0]
61
-
62
- include_examples 'lex', '[[:word:]&&[^c]z]',
63
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
64
- 1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
65
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
66
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
67
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
68
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
69
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
70
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
71
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0]
72
-
73
- include_examples 'lex', '[\p{word}&&[^c]z]',
74
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
75
- 1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
76
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
77
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
78
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
79
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
80
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
81
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
82
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0]
83
-
84
- include_examples 'lex', /[a[b[c[d-g]]]]/,
85
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
86
- 1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
87
- 2 => [:set, :open, '[', 2, 3, 0, 1, 0],
88
- 3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
89
- 4 => [:set, :open, '[', 4, 5, 0, 2, 0],
90
- 5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
91
- 6 => [:set, :open, '[', 6, 7, 0, 3, 0],
92
- 7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
93
- 8 => [:set, :range, '-', 8, 9, 0, 4, 0],
94
- 9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
95
- 10 => [:set, :close, ']', 10, 11, 0, 3, 0],
96
- 11 => [:set, :close, ']', 11, 12, 0, 2, 0],
97
- 12 => [:set, :close, ']', 12, 13, 0, 1, 0],
98
- 13 => [:set, :close, ']', 13, 14, 0, 0, 0]
99
- end