regexp_parser 2.2.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -2
  3. data/LICENSE +1 -1
  4. data/README.md +2 -2
  5. data/Rakefile +5 -8
  6. data/lib/regexp_parser/expression/classes/escape_sequence.rb +12 -7
  7. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  8. data/lib/regexp_parser/scanner/properties/long.csv +604 -0
  9. data/lib/regexp_parser/scanner/properties/short.csv +242 -0
  10. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  11. data/lib/regexp_parser/scanner.rb +126 -124
  12. data/lib/regexp_parser/syntax/base.rb +3 -5
  13. data/lib/regexp_parser/syntax/token/backreference.rb +7 -2
  14. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
  15. data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
  16. data/lib/regexp_parser/version.rb +1 -1
  17. data/regexp_parser.gemspec +20 -22
  18. metadata +11 -143
  19. data/lib/regexp_parser/scanner/properties/long.yml +0 -607
  20. data/lib/regexp_parser/scanner/properties/short.yml +0 -245
  21. data/spec/expression/base_spec.rb +0 -104
  22. data/spec/expression/clone_spec.rb +0 -152
  23. data/spec/expression/conditional_spec.rb +0 -89
  24. data/spec/expression/free_space_spec.rb +0 -27
  25. data/spec/expression/methods/match_length_spec.rb +0 -161
  26. data/spec/expression/methods/match_spec.rb +0 -25
  27. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  28. data/spec/expression/methods/tests_spec.rb +0 -99
  29. data/spec/expression/methods/traverse_spec.rb +0 -161
  30. data/spec/expression/options_spec.rb +0 -128
  31. data/spec/expression/subexpression_spec.rb +0 -50
  32. data/spec/expression/to_h_spec.rb +0 -26
  33. data/spec/expression/to_s_spec.rb +0 -108
  34. data/spec/lexer/all_spec.rb +0 -22
  35. data/spec/lexer/conditionals_spec.rb +0 -53
  36. data/spec/lexer/delimiters_spec.rb +0 -68
  37. data/spec/lexer/escapes_spec.rb +0 -14
  38. data/spec/lexer/keep_spec.rb +0 -10
  39. data/spec/lexer/literals_spec.rb +0 -64
  40. data/spec/lexer/nesting_spec.rb +0 -99
  41. data/spec/lexer/refcalls_spec.rb +0 -60
  42. data/spec/parser/all_spec.rb +0 -43
  43. data/spec/parser/alternation_spec.rb +0 -88
  44. data/spec/parser/anchors_spec.rb +0 -17
  45. data/spec/parser/conditionals_spec.rb +0 -179
  46. data/spec/parser/errors_spec.rb +0 -30
  47. data/spec/parser/escapes_spec.rb +0 -133
  48. data/spec/parser/free_space_spec.rb +0 -130
  49. data/spec/parser/groups_spec.rb +0 -108
  50. data/spec/parser/keep_spec.rb +0 -6
  51. data/spec/parser/options_spec.rb +0 -28
  52. data/spec/parser/posix_classes_spec.rb +0 -8
  53. data/spec/parser/properties_spec.rb +0 -117
  54. data/spec/parser/quantifiers_spec.rb +0 -68
  55. data/spec/parser/refcalls_spec.rb +0 -117
  56. data/spec/parser/set/intersections_spec.rb +0 -127
  57. data/spec/parser/set/ranges_spec.rb +0 -121
  58. data/spec/parser/sets_spec.rb +0 -178
  59. data/spec/parser/types_spec.rb +0 -18
  60. data/spec/scanner/all_spec.rb +0 -18
  61. data/spec/scanner/anchors_spec.rb +0 -21
  62. data/spec/scanner/conditionals_spec.rb +0 -128
  63. data/spec/scanner/delimiters_spec.rb +0 -52
  64. data/spec/scanner/errors_spec.rb +0 -67
  65. data/spec/scanner/escapes_spec.rb +0 -73
  66. data/spec/scanner/free_space_spec.rb +0 -165
  67. data/spec/scanner/groups_spec.rb +0 -61
  68. data/spec/scanner/keep_spec.rb +0 -10
  69. data/spec/scanner/literals_spec.rb +0 -39
  70. data/spec/scanner/meta_spec.rb +0 -18
  71. data/spec/scanner/options_spec.rb +0 -36
  72. data/spec/scanner/properties_spec.rb +0 -64
  73. data/spec/scanner/quantifiers_spec.rb +0 -25
  74. data/spec/scanner/refcalls_spec.rb +0 -55
  75. data/spec/scanner/sets_spec.rb +0 -151
  76. data/spec/scanner/types_spec.rb +0 -14
  77. data/spec/spec_helper.rb +0 -28
  78. data/spec/support/capturing_stderr.rb +0 -9
  79. data/spec/support/shared_examples.rb +0 -77
  80. data/spec/syntax/syntax_spec.rb +0 -48
  81. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  82. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  83. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  84. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  85. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  86. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  87. data/spec/syntax/versions/aliases_spec.rb +0 -38
  88. data/spec/token/token_spec.rb +0 -85
@@ -1,128 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Expression#options') do
4
- it 'returns a hash of options/flags that affect the expression' do
5
- exp = RP.parse(/a/ix)[0]
6
- expect(exp).to be_a Literal
7
- expect(exp.options).to eq(i: true, x: true)
8
- end
9
-
10
- it 'includes options that are locally enabled via special groups' do
11
- exp = RP.parse(/(?x)(?m:a)/i)[1][0]
12
- expect(exp).to be_a Literal
13
- expect(exp.options).to eq(i: true, m: true, x: true)
14
- end
15
-
16
- it 'excludes locally disabled options' do
17
- exp = RP.parse(/(?x)(?-im:a)/i)[1][0]
18
- expect(exp).to be_a Literal
19
- expect(exp.options).to eq(x: true)
20
- end
21
-
22
- it 'gives correct precedence to negative options' do
23
- # Negative options have precedence. E.g. /(?i-i)a/ is case-sensitive.
24
- regexp = /(?i-i:a)/
25
- expect(regexp).to match 'a'
26
- expect(regexp).not_to match 'A'
27
-
28
- exp = RP.parse(regexp)[0][0]
29
- expect(exp).to be_a Literal
30
- expect(exp.options).to eq({})
31
- end
32
-
33
- it 'correctly handles multiple negative option parts' do
34
- regexp = /(?--m--mx--) . /mx
35
- expect(regexp).to match ' . '
36
- expect(regexp).not_to match '.'
37
- expect(regexp).not_to match "\n"
38
-
39
- exp = RP.parse(regexp)[2]
40
- expect(exp.options).to eq({})
41
- end
42
-
43
- it 'gives correct precedence when encountering multiple encoding flags' do
44
- # Any encoding flag overrides all previous encoding flags. If there are
45
- # multiple encoding flags in an options string, the last one wins.
46
- # E.g. /(?dau)\w/ matches UTF8 chars but /(?dua)\w/ only ASCII chars.
47
- regexp1 = /(?dau)\w/
48
- regexp2 = /(?dua)\w/
49
- expect(regexp1).to match 'ü'
50
- expect(regexp2).not_to match 'ü'
51
-
52
- exp1 = RP.parse(regexp1)[1]
53
- exp2 = RP.parse(regexp2)[1]
54
- expect(exp1.options).to eq(u: true)
55
- expect(exp2.options).to eq(a: true)
56
- end
57
-
58
- it 'is accessible via shortcuts' do
59
- exp = Root.build
60
-
61
- expect { exp.options[:i] = true }
62
- .to change { exp.i? }.from(false).to(true)
63
- .and change { exp.ignore_case? }.from(false).to(true)
64
- .and change { exp.case_insensitive? }.from(false).to(true)
65
-
66
- expect { exp.options[:m] = true }
67
- .to change { exp.m? }.from(false).to(true)
68
- .and change { exp.multiline? }.from(false).to(true)
69
-
70
- expect { exp.options[:x] = true }
71
- .to change { exp.x? }.from(false).to(true)
72
- .and change { exp.extended? }.from(false).to(true)
73
- .and change { exp.free_spacing? }.from(false).to(true)
74
-
75
- expect { exp.options[:a] = true }
76
- .to change { exp.a? }.from(false).to(true)
77
- .and change { exp.ascii_classes? }.from(false).to(true)
78
-
79
- expect { exp.options[:d] = true }
80
- .to change { exp.d? }.from(false).to(true)
81
- .and change { exp.default_classes? }.from(false).to(true)
82
-
83
- expect { exp.options[:u] = true }
84
- .to change { exp.u? }.from(false).to(true)
85
- .and change { exp.unicode_classes? }.from(false).to(true)
86
- end
87
-
88
- RSpec.shared_examples '#options' do |regexp, path, klass|
89
- it "works for expression class #{klass}" do
90
- exp = RP.parse(/#{regexp.source}/i).dig(*path)
91
- expect(exp).to be_a(klass)
92
- expect(exp).to be_i
93
- expect(exp).not_to be_x
94
- end
95
- end
96
-
97
- include_examples '#options', //, [], Root
98
- include_examples '#options', /a/, [0], Literal
99
- include_examples '#options', /\A/, [0], Anchor::Base
100
- include_examples '#options', /\d/, [0], CharacterType::Base
101
- include_examples '#options', /\n/, [0], EscapeSequence::Base
102
- include_examples '#options', /\K/, [0], Keep::Mark
103
- include_examples '#options', /./, [0], CharacterType::Any
104
- include_examples '#options', /(a)/, [0], Group::Base
105
- include_examples '#options', /(a)/, [0, 0], Literal
106
- include_examples '#options', /(?=a)/, [0], Assertion::Base
107
- include_examples '#options', /(?=a)/, [0, 0], Literal
108
- include_examples '#options', /(a|b)/, [0], Group::Base
109
- include_examples '#options', /(a|b)/, [0, 0], Alternation
110
- include_examples '#options', /(a|b)/, [0, 0, 0], Alternative
111
- include_examples '#options', /(a|b)/, [0, 0, 0, 0], Literal
112
- include_examples '#options', /(a)\1/, [1], Backreference::Base
113
- include_examples '#options', /(a)\k<1>/, [1], Backreference::Number
114
- include_examples '#options', /(a)\g<1>/, [1], Backreference::NumberCall
115
- include_examples '#options', /[a]/, [0], CharacterSet
116
- include_examples '#options', /[a]/, [0, 0], Literal
117
- include_examples '#options', /[a-z]/, [0, 0], CharacterSet::Range
118
- include_examples '#options', /[a-z]/, [0, 0, 0], Literal
119
- include_examples '#options', /[a&&z]/, [0, 0], CharacterSet::Intersection
120
- include_examples '#options', /[a&&z]/, [0, 0, 0], CharacterSet::IntersectedSequence
121
- include_examples '#options', /[a&&z]/, [0, 0, 0, 0], Literal
122
- include_examples '#options', /[[:ascii:]]/, [0, 0], PosixClass
123
- include_examples '#options', /\p{word}/, [0], UnicodeProperty::Base
124
- include_examples '#options', /(a)(?(1)b|c)/, [1], Conditional::Expression
125
- include_examples '#options', /(a)(?(1)b|c)/, [1, 0], Conditional::Condition
126
- include_examples '#options', /(a)(?(1)b|c)/, [1, 1], Conditional::Branch
127
- include_examples '#options', /(a)(?(1)b|c)/, [1, 1, 0], Literal
128
- end
@@ -1,50 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe(Regexp::Expression::Subexpression) do
4
- specify('#ts, #te') do
5
- regx = /abcd|ghij|klmn|pqur/
6
- root = RP.parse(regx)
7
-
8
- alt = root.first
9
-
10
- { 0 => [0, 4], 1 => [5, 9], 2 => [10, 14], 3 => [15, 19] }.each do |index, span|
11
- sequence = alt[index]
12
-
13
- expect(sequence.ts).to eq span[0]
14
- expect(sequence.te).to eq span[1]
15
- end
16
- end
17
-
18
- specify('#nesting_level') do
19
- root = RP.parse(/a(b(\d|[ef-g[h]]))/)
20
-
21
- tests = {
22
- 'a' => 1,
23
- 'b' => 2,
24
- '\d|[ef-g[h]]' => 3, # alternation
25
- '\d' => 4, # first alternative
26
- '[ef-g[h]]' => 4, # second alternative
27
- 'e' => 5,
28
- 'f-g' => 5,
29
- 'f' => 6,
30
- 'g' => 6,
31
- 'h' => 6,
32
- }
33
-
34
- root.each_expression do |exp|
35
- next unless (expected_nesting_level = tests.delete(exp.to_s))
36
- expect(expected_nesting_level).to eq exp.nesting_level
37
- end
38
-
39
- expect(tests).to be_empty
40
- end
41
-
42
- specify('#dig') do
43
- root = RP.parse(/(((a)))/)
44
-
45
- expect(root.dig(0).to_s).to eq '(((a)))'
46
- expect(root.dig(0, 0, 0, 0).to_s).to eq 'a'
47
- expect(root.dig(0, 0, 0, 0, 0)).to be_nil
48
- expect(root.dig(3, 7)).to be_nil
49
- end
50
- end
@@ -1,26 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Expression#to_h') do
4
- specify('Root#to_h') do
5
- root = RP.parse('abc')
6
-
7
- hash = root.to_h
8
-
9
- expect(token: :root, type: :expression, text: 'abc', starts_at: 0, length: 3, quantifier: nil, options: {}, level: nil, set_level: nil, conditional_level: nil, expressions: [{ token: :literal, type: :literal, text: 'abc', starts_at: 0, length: 3, quantifier: nil, options: {}, level: 0, set_level: 0, conditional_level: 0 }]).to eq hash
10
- end
11
-
12
- specify('Quantifier#to_h') do
13
- root = RP.parse('a{2,4}')
14
- exp = root.expressions.at(0)
15
-
16
- hash = exp.quantifier.to_h
17
-
18
- expect(max: 4, min: 2, mode: :greedy, text: '{2,4}', token: :interval).to eq hash
19
- end
20
-
21
- specify('Conditional#to_h') do
22
- root = RP.parse('(?<A>a)(?(<A>)b|c)', 'ruby/2.0')
23
-
24
- expect { root.to_h }.not_to(raise_error)
25
- end
26
- end
@@ -1,108 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Expression#to_s') do
4
- def parse_frozen(pattern, ruby_version = nil)
5
- IceNine.deep_freeze(RP.parse(pattern, *ruby_version))
6
- end
7
-
8
- def expect_round_trip(pattern, ruby_version = nil)
9
- parsed = parse_frozen(pattern, ruby_version)
10
-
11
- expect(parsed.to_s).to eql(pattern)
12
- end
13
-
14
- specify('literal alternation') do
15
- expect_round_trip('abcd|ghij|klmn|pqur')
16
- end
17
-
18
- specify('quantified alternations') do
19
- expect_round_trip('(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)')
20
- end
21
-
22
- specify('quantified sets') do
23
- expect_round_trip('[abc]+|[^def]{3,6}')
24
- end
25
-
26
- specify('property sets') do
27
- expect_round_trip('[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+', 'ruby/1.9')
28
- end
29
-
30
- specify('groups') do
31
- expect_round_trip("(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++", 'ruby/1.9')
32
- end
33
-
34
- specify('assertions') do
35
- expect_round_trip('(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?', 'ruby/1.9')
36
- end
37
-
38
- specify('comments') do
39
- expect_round_trip('(?#start)a(?#middle)b(?#end)')
40
- end
41
-
42
- specify('options') do
43
- expect_round_trip('(?mix:start)a(?-mix:middle)b(?i-mx:end)')
44
- end
45
-
46
- specify('url') do
47
- expect_round_trip('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
48
- end
49
-
50
- specify('multiline source') do
51
- multiline = /
52
- \A
53
- a? # One letter
54
- b{2,5} # Another one
55
- [c-g]+ # A set
56
- \z
57
- /x
58
-
59
- expect(parse_frozen(multiline).to_s).to eql(multiline.source)
60
- end
61
-
62
- specify('multiline #to_s') do
63
- multiline = /
64
- \A
65
- a? # One letter
66
- b{2,5} # Another one
67
- [c-g]+ # A set
68
- \z
69
- /x
70
-
71
- expect_round_trip(multiline.to_s)
72
- end
73
-
74
- # Free spacing expressions that use spaces between quantifiers and their
75
- # targets do not produce identical results due to the way quantifiers are
76
- # applied to expressions (members, not nodes) and the merging of consecutive
77
- # space nodes. This tests that they produce equivalent results.
78
- specify('multiline equivalence') do
79
- multiline = /
80
- \A
81
- a ? # One letter
82
- b {2,5} # Another one
83
- [c-g] + # A set
84
- \z
85
- /x
86
-
87
- str = 'bbbcged'
88
- root = parse_frozen(multiline)
89
-
90
- expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eql(multiline.match(str)[0])
91
- end
92
-
93
- # special case: implicit groups used for chained quantifiers produce no parens
94
- specify 'chained quantifiers #to_s' do
95
- pattern = /a+{1}{2}/
96
- root = parse_frozen(pattern)
97
- expect(root.to_s).to eql('a+{1}{2}')
98
- end
99
-
100
- # regression test for https://github.com/ammar/regexp_parser/issues/74
101
- specify('non-ascii comment') do
102
- pattern = '(?x) 😋 # 😋'
103
- root = RP.parse(pattern)
104
- expect(root.last).to be_a(Regexp::Expression::Comment)
105
- expect(root.last.to_s).to eql('# 😋')
106
- expect(root.to_s).to eql(pattern)
107
- end
108
- end
@@ -1,22 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe(Regexp::Lexer) do
4
- specify('lexer returns an array') do
5
- expect(RL.lex('abc')).to be_instance_of(Array)
6
- end
7
-
8
- specify('lexer returns tokens') do
9
- tokens = RL.lex('^abc+[^one]{2,3}\\b\\d\\\\C-C$')
10
- expect(tokens).to all(be_a Regexp::Token)
11
- expect(tokens.map { |token| token.to_a.length }).to all(eq 8)
12
- end
13
-
14
- specify('lexer token count') do
15
- tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
16
- expect(tokens.length).to eq 28
17
- end
18
-
19
- specify('lexer scan alias') do
20
- expect(RL.scan(/a|b|c/)).to eq RL.lex(/a|b|c/)
21
- end
22
- end
@@ -1,53 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Conditional lexing') do
4
- include_examples 'lex', /(?<A>a)(?(<A>)b|c)/,
5
- 3 => [:conditional, :open, '(?', 7, 9, 0, 0, 0],
6
- 4 => [:conditional, :condition, '(<A>)', 9, 14, 0, 0, 1],
7
- 6 => [:conditional, :separator, '|', 15, 16, 0, 0, 1],
8
- 8 => [:conditional, :close, ')', 17, 18, 0, 0, 0]
9
-
10
- include_examples 'lex', /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/,
11
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
12
- 1 => [:group, :named, '(?<A>', 1, 6, 1, 0, 0],
13
- 5 => [:conditional, :open, '(?', 13, 15, 2, 0, 0],
14
- 6 => [:conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
15
- 8 => [:conditional, :separator, '|', 21, 22, 2, 0, 1],
16
- 10 => [:conditional, :open, '(?', 23, 25, 3, 0, 1],
17
- 11 => [:conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
18
- 12 => [:set, :open, '[', 30, 31, 3, 0, 2],
19
- 13 => [:literal, :literal, 'e', 31, 32, 3, 1, 2],
20
- 14 => [:set, :range, '-', 32, 33, 3, 1, 2],
21
- 15 => [:literal, :literal, 'g', 33, 34, 3, 1, 2],
22
- 16 => [:set, :close, ']', 34, 35, 3, 0, 2],
23
- 17 => [:conditional, :separator, '|', 35, 36, 3, 0, 2],
24
- 23 => [:conditional, :close, ')', 41, 42, 3, 0, 1],
25
- 25 => [:conditional, :close, ')', 43, 44, 2, 0, 0],
26
- 26 => [:group, :close, ')', 44, 45, 1, 0, 0],
27
- 27 => [:group, :close, ')', 45, 46, 0, 0, 0]
28
-
29
- include_examples 'lex', /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/,
30
- 9 => [:conditional, :open, '(?', 9, 11, 0, 0, 0],
31
- 10 => [:conditional, :condition, '(1)', 11, 14, 0, 0, 1],
32
- 11 => [:conditional, :open, '(?', 14, 16, 0, 0, 1],
33
- 12 => [:conditional, :condition, '(2)', 16, 19, 0, 0, 2],
34
- 13 => [:conditional, :open, '(?', 19, 21, 0, 0, 2],
35
- 14 => [:conditional, :condition, '(3)', 21, 24, 0, 0, 3],
36
- 16 => [:conditional, :separator, '|', 25, 26, 0, 0, 3],
37
- 18 => [:conditional, :close, ')', 27, 28, 0, 0, 2],
38
- 19 => [:conditional, :close, ')', 28, 29, 0, 0, 1],
39
- 20 => [:conditional, :separator, '|', 29, 30, 0, 0, 1],
40
- 21 => [:conditional, :open, '(?', 30, 32, 0, 0, 1],
41
- 22 => [:conditional, :condition, '(3)', 32, 35, 0, 0, 2],
42
- 23 => [:conditional, :open, '(?', 35, 37, 0, 0, 2],
43
- 24 => [:conditional, :condition, '(2)', 37, 40, 0, 0, 3],
44
- 26 => [:conditional, :separator, '|', 41, 42, 0, 0, 3],
45
- 28 => [:conditional, :close, ')', 43, 44, 0, 0, 2],
46
- 29 => [:conditional, :separator, '|', 44, 45, 0, 0, 2],
47
- 30 => [:conditional, :open, '(?', 45, 47, 0, 0, 2],
48
- 31 => [:conditional, :condition, '(1)', 47, 50, 0, 0, 3],
49
- 33 => [:conditional, :separator, '|', 51, 52, 0, 0, 3],
50
- 35 => [:conditional, :close, ')', 53, 54, 0, 0, 2],
51
- 36 => [:conditional, :close, ')', 54, 55, 0, 0, 1],
52
- 37 => [:conditional, :close, ')', 55, 56, 0, 0, 0]
53
- end
@@ -1,68 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Literal delimiter lexing') do
4
- include_examples 'lex', '}',
5
- 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0]
6
-
7
- include_examples 'lex', '}}',
8
- 0 => [:literal, :literal, '}}', 0, 2, 0, 0, 0]
9
-
10
- include_examples 'lex', '{',
11
- 0 => [:literal, :literal, '{', 0, 1, 0, 0, 0]
12
-
13
- include_examples 'lex', '{{',
14
- 0 => [:literal, :literal, '{{', 0, 2, 0, 0, 0]
15
-
16
- include_examples 'lex', '{}',
17
- 0 => [:literal, :literal, '{}', 0, 2, 0, 0, 0]
18
-
19
- include_examples 'lex', '}{',
20
- 0 => [:literal, :literal, '}{', 0, 2, 0, 0, 0]
21
-
22
- include_examples 'lex', '}{+',
23
- 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0],
24
- 1 => [:literal, :literal, '{', 1, 2, 0, 0, 0],
25
- 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
26
-
27
- include_examples 'lex', '{{var}}',
28
- 0 => [:literal, :literal, '{{var}}', 0, 7, 0, 0, 0]
29
-
30
- include_examples 'lex', 'a{b}c',
31
- 0 => [:literal, :literal, 'a{b}c', 0, 5, 0, 0, 0]
32
-
33
- include_examples 'lex', 'a{1,2',
34
- 0 => [:literal, :literal, 'a{1,2', 0, 5, 0, 0, 0]
35
-
36
- include_examples 'lex', '({.+})',
37
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
38
- 1 => [:literal, :literal, '{', 1, 2, 1, 0, 0],
39
- 2 => [:meta, :dot, '.', 2, 3, 1, 0, 0],
40
- 3 => [:quantifier, :one_or_more, '+', 3, 4, 1, 0, 0],
41
- 4 => [:literal, :literal, '}', 4, 5, 1, 0, 0],
42
- 5 => [:group, :close, ')', 5, 6, 0, 0, 0]
43
-
44
- include_examples 'lex', ']',
45
- 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0]
46
-
47
- include_examples 'lex', ']]',
48
- 0 => [:literal, :literal, ']]', 0, 2, 0, 0, 0]
49
-
50
- include_examples 'lex', ']\[',
51
- 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0],
52
- 1 => [:escape, :set_open, '\[', 1, 3, 0, 0, 0]
53
-
54
- include_examples 'lex', '()',
55
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
56
- 1 => [:group, :close, ')', 1, 2, 0, 0, 0]
57
-
58
- include_examples 'lex', '{abc:.+}}}[^}]]}',
59
- 0 => [:literal, :literal, '{abc:', 0, 5, 0, 0, 0],
60
- 1 => [:meta, :dot, '.', 5, 6, 0, 0, 0],
61
- 2 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
62
- 3 => [:literal, :literal, '}}}', 7, 10, 0, 0, 0],
63
- 4 => [:set, :open, '[', 10, 11, 0, 0, 0],
64
- 5 => [:set, :negate, '^', 11, 12, 0, 1, 0],
65
- 6 => [:literal, :literal, '}', 12, 13, 0, 1, 0],
66
- 7 => [:set, :close, ']', 13, 14, 0, 0, 0],
67
- 8 => [:literal, :literal, ']}', 14, 16, 0, 0, 0]
68
- end
@@ -1,14 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Escape lexing') do
4
- include_examples 'lex', '\u{62}',
5
- 0 => [:escape, :codepoint_list, '\u{62}', 0, 6, 0, 0, 0]
6
-
7
- include_examples 'lex', '\u{62 63 64}',
8
- 0 => [:escape, :codepoint_list, '\u{62 63 64}', 0, 12, 0, 0, 0]
9
-
10
- include_examples 'lex', '\u{62 63 64}+',
11
- 0 => [:escape, :codepoint_list, '\u{62 63}', 0, 9, 0, 0, 0],
12
- 1 => [:escape, :codepoint_list, '\u{64}', 9, 15, 0, 0, 0],
13
- 2 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
14
- end
@@ -1,10 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Keep lexing') do
4
- include_examples 'lex', /ab\Kcd/,
5
- 1 => [:keep, :mark, '\K', 2, 4, 0, 0, 0]
6
-
7
- include_examples 'lex', /(a\Kb)|(c\\\Kd)ef/,
8
- 2 => [:keep, :mark, '\K', 2, 4, 1, 0, 0],
9
- 9 => [:keep, :mark, '\K', 11, 13, 1, 0, 0]
10
- end
@@ -1,64 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Literal lexing') do
4
- # ascii, single byte characters
5
- include_examples 'lex', 'a',
6
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0]
7
-
8
- include_examples 'lex', 'ab+',
9
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
10
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
11
- 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
12
-
13
- # 2 byte wide characters
14
- include_examples 'lex', 'äöü+',
15
- 0 => [:literal, :literal, 'äö', 0, 2, 0, 0, 0],
16
- 1 => [:literal, :literal, 'ü', 2, 3, 0, 0, 0],
17
- 2 => [:quantifier, :one_or_more, '+', 3, 4, 0, 0, 0]
18
-
19
- # 3 byte wide characters, Japanese
20
- include_examples 'lex', 'ab?れます+cd',
21
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
22
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
23
- 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
24
- 3 => [:literal, :literal, 'れま', 3, 5, 0, 0, 0],
25
- 4 => [:literal, :literal, 'す', 5, 6, 0, 0, 0],
26
- 5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
27
- 6 => [:literal, :literal, 'cd', 7, 9, 0, 0, 0]
28
-
29
- # 4 byte wide characters, Osmanya
30
- include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
31
- 0 => [:literal, :literal, '𐒀', 0, 1, 0, 0, 0],
32
- 1 => [:literal, :literal, '𐒁', 1, 2, 0, 0, 0],
33
- 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
34
- 3 => [:literal, :literal, '𐒂a', 3, 5, 0, 0, 0],
35
- 4 => [:literal, :literal, 'b', 5, 6, 0, 0, 0],
36
- 5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
37
- 6 => [:literal, :literal, '𐒃', 7, 8, 0, 0, 0]
38
-
39
- include_examples 'lex', 'mu𝄞?si*𝄫c+',
40
- 0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
41
- 1 => [:literal, :literal, '𝄞', 2, 3, 0, 0, 0],
42
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
43
- 3 => [:literal, :literal, 's', 4, 5, 0, 0, 0],
44
- 4 => [:literal, :literal, 'i', 5, 6, 0, 0, 0],
45
- 5 => [:quantifier, :zero_or_more, '*', 6, 7, 0, 0, 0],
46
- 6 => [:literal, :literal, '𝄫', 7, 8, 0, 0, 0],
47
- 7 => [:literal, :literal, 'c', 8, 9, 0, 0, 0],
48
- 8 => [:quantifier, :one_or_more, '+', 9, 10, 0, 0, 0]
49
-
50
- specify('lex single 2 byte char') do
51
- tokens = RL.lex("\u0627+")
52
- expect(tokens.count).to eq 2
53
- end
54
-
55
- specify('lex single 3 byte char') do
56
- tokens = RL.lex("\u308C+")
57
- expect(tokens.count).to eq 2
58
- end
59
-
60
- specify('lex single 4 byte char') do
61
- tokens = RL.lex("\u{1D11E}+")
62
- expect(tokens.count).to eq 2
63
- end
64
- end
@@ -1,99 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Nesting lexing') do
4
- include_examples 'lex', /(((b)))/,
5
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
6
- 1 => [:group, :capture, '(', 1, 2, 1, 0, 0],
7
- 2 => [:group, :capture, '(', 2, 3, 2, 0, 0],
8
- 3 => [:literal, :literal, 'b', 3, 4, 3, 0, 0],
9
- 4 => [:group, :close, ')', 4, 5, 2, 0, 0],
10
- 5 => [:group, :close, ')', 5, 6, 1, 0, 0],
11
- 6 => [:group, :close, ')', 6, 7, 0, 0, 0]
12
-
13
- include_examples 'lex', /(\((b)\))/,
14
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
15
- 1 => [:escape, :group_open, '\(', 1, 3, 1, 0, 0],
16
- 2 => [:group, :capture, '(', 3, 4, 1, 0, 0],
17
- 3 => [:literal, :literal, 'b', 4, 5, 2, 0, 0],
18
- 4 => [:group, :close, ')', 5, 6, 1, 0, 0],
19
- 5 => [:escape, :group_close, '\)', 6, 8, 1, 0, 0],
20
- 6 => [:group, :close, ')', 8, 9, 0, 0, 0]
21
-
22
- include_examples 'lex', /(?>a(?>b(?>c)))/,
23
- 0 => [:group, :atomic, '(?>', 0, 3, 0, 0, 0],
24
- 2 => [:group, :atomic, '(?>', 4, 7, 1, 0, 0],
25
- 4 => [:group, :atomic, '(?>', 8, 11, 2, 0, 0],
26
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
27
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
28
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0]
29
-
30
- include_examples 'lex', /(?:a(?:b(?:c)))/,
31
- 0 => [:group, :passive, '(?:', 0, 3, 0, 0, 0],
32
- 2 => [:group, :passive, '(?:', 4, 7, 1, 0, 0],
33
- 4 => [:group, :passive, '(?:', 8, 11, 2, 0, 0],
34
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
35
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
36
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0]
37
-
38
- include_examples 'lex', /(?=a(?!b(?<=c(?<!d))))/,
39
- 0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0, 0],
40
- 2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0, 0],
41
- 4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0, 0],
42
- 6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0, 0],
43
- 8 => [:group, :close, ')', 18, 19, 3, 0, 0],
44
- 9 => [:group, :close, ')', 19, 20, 2, 0, 0],
45
- 10 => [:group, :close, ')', 20, 21, 1, 0, 0],
46
- 11 => [:group, :close, ')', 21, 22, 0, 0, 0]
47
-
48
- include_examples 'lex', /((?#a)b(?#c)d(?#e))/,
49
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
50
- 1 => [:group, :comment, '(?#a)', 1, 6, 1, 0, 0],
51
- 3 => [:group, :comment, '(?#c)', 7, 12, 1, 0, 0],
52
- 5 => [:group, :comment, '(?#e)', 13, 18, 1, 0, 0],
53
- 6 => [:group, :close, ')', 18, 19, 0, 0, 0]
54
-
55
- include_examples 'lex', /a[b-e]f/,
56
- 1 => [:set, :open, '[', 1, 2, 0, 0, 0],
57
- 2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
58
- 3 => [:set, :range, '-', 3, 4, 0, 1, 0],
59
- 4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
60
- 5 => [:set, :close, ']', 5, 6, 0, 0, 0]
61
-
62
- include_examples 'lex', '[[:word:]&&[^c]z]',
63
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
64
- 1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
65
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
66
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
67
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
68
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
69
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
70
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
71
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0]
72
-
73
- include_examples 'lex', '[\p{word}&&[^c]z]',
74
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
75
- 1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
76
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
77
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
78
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
79
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
80
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
81
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
82
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0]
83
-
84
- include_examples 'lex', /[a[b[c[d-g]]]]/,
85
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
86
- 1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
87
- 2 => [:set, :open, '[', 2, 3, 0, 1, 0],
88
- 3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
89
- 4 => [:set, :open, '[', 4, 5, 0, 2, 0],
90
- 5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
91
- 6 => [:set, :open, '[', 6, 7, 0, 3, 0],
92
- 7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
93
- 8 => [:set, :range, '-', 8, 9, 0, 4, 0],
94
- 9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
95
- 10 => [:set, :close, ']', 10, 11, 0, 3, 0],
96
- 11 => [:set, :close, ']', 11, 12, 0, 2, 0],
97
- 12 => [:set, :close, ']', 12, 13, 0, 1, 0],
98
- 13 => [:set, :close, ']', 13, 14, 0, 0, 0]
99
- end