regexp_parser 1.5.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +59 -0
  3. data/Gemfile +3 -3
  4. data/README.md +14 -6
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +6 -43
  7. data/lib/regexp_parser/expression/classes/conditional.rb +3 -2
  8. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  9. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  10. data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
  11. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  12. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  13. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  14. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  15. data/lib/regexp_parser/expression/sequence.rb +3 -2
  16. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  17. data/lib/regexp_parser/lexer.rb +4 -25
  18. data/lib/regexp_parser/parser.rb +40 -33
  19. data/lib/regexp_parser/scanner.rb +1208 -1353
  20. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  21. data/lib/regexp_parser/scanner/properties/long.yml +15 -1
  22. data/lib/regexp_parser/scanner/properties/short.yml +5 -0
  23. data/lib/regexp_parser/scanner/scanner.rl +116 -202
  24. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +30 -0
  25. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  26. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  27. data/lib/regexp_parser/version.rb +1 -1
  28. data/spec/expression/base_spec.rb +14 -0
  29. data/spec/expression/methods/match_length_spec.rb +20 -0
  30. data/spec/expression/methods/match_spec.rb +25 -0
  31. data/spec/expression/methods/tests_spec.rb +2 -0
  32. data/spec/expression/methods/traverse_spec.rb +21 -0
  33. data/spec/expression/options_spec.rb +128 -0
  34. data/spec/expression/root_spec.rb +9 -0
  35. data/spec/expression/sequence_spec.rb +9 -0
  36. data/spec/lexer/conditionals_spec.rb +49 -119
  37. data/spec/lexer/delimiters_spec.rb +68 -0
  38. data/spec/lexer/escapes_spec.rb +8 -32
  39. data/spec/lexer/keep_spec.rb +5 -17
  40. data/spec/lexer/literals_spec.rb +73 -110
  41. data/spec/lexer/nesting_spec.rb +86 -117
  42. data/spec/lexer/refcalls_spec.rb +51 -50
  43. data/spec/parser/all_spec.rb +13 -1
  44. data/spec/parser/anchors_spec.rb +9 -23
  45. data/spec/parser/conditionals_spec.rb +9 -9
  46. data/spec/parser/errors_spec.rb +22 -43
  47. data/spec/parser/escapes_spec.rb +33 -44
  48. data/spec/parser/free_space_spec.rb +25 -4
  49. data/spec/parser/groups_spec.rb +98 -257
  50. data/spec/parser/keep_spec.rb +2 -15
  51. data/spec/parser/options_spec.rb +28 -0
  52. data/spec/parser/posix_classes_spec.rb +5 -24
  53. data/spec/parser/properties_spec.rb +42 -54
  54. data/spec/parser/quantifiers_spec.rb +42 -283
  55. data/spec/parser/refcalls_spec.rb +60 -185
  56. data/spec/parser/set/intersections_spec.rb +17 -17
  57. data/spec/parser/set/ranges_spec.rb +17 -17
  58. data/spec/parser/sets_spec.rb +5 -5
  59. data/spec/parser/types_spec.rb +11 -36
  60. data/spec/scanner/anchors_spec.rb +13 -28
  61. data/spec/scanner/conditionals_spec.rb +121 -173
  62. data/spec/scanner/delimiters_spec.rb +52 -0
  63. data/spec/scanner/errors_spec.rb +64 -87
  64. data/spec/scanner/escapes_spec.rb +53 -50
  65. data/spec/scanner/free_space_spec.rb +102 -165
  66. data/spec/scanner/groups_spec.rb +45 -64
  67. data/spec/scanner/keep_spec.rb +5 -28
  68. data/spec/scanner/literals_spec.rb +45 -81
  69. data/spec/scanner/meta_spec.rb +13 -33
  70. data/spec/scanner/options_spec.rb +36 -0
  71. data/spec/scanner/properties_spec.rb +43 -286
  72. data/spec/scanner/quantifiers_spec.rb +13 -28
  73. data/spec/scanner/refcalls_spec.rb +32 -48
  74. data/spec/scanner/sets_spec.rb +88 -102
  75. data/spec/scanner/types_spec.rb +10 -25
  76. data/spec/spec_helper.rb +1 -0
  77. data/spec/support/shared_examples.rb +77 -0
  78. data/spec/syntax/syntax_spec.rb +4 -0
  79. data/spec/syntax/versions/1.8.6_spec.rb +12 -33
  80. data/spec/syntax/versions/1.9.1_spec.rb +5 -18
  81. data/spec/syntax/versions/1.9.3_spec.rb +4 -17
  82. data/spec/syntax/versions/2.0.0_spec.rb +8 -23
  83. data/spec/syntax/versions/2.2.0_spec.rb +4 -17
  84. data/spec/syntax/versions/aliases_spec.rb +27 -109
  85. metadata +28 -10
  86. data/spec/scanner/scripts_spec.rb +0 -49
  87. data/spec/scanner/unicode_blocks_spec.rb +0 -28
@@ -0,0 +1,68 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal delimiter lexing') do
4
+ include_examples 'lex', '}',
5
+ 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0]
6
+
7
+ include_examples 'lex', '}}',
8
+ 0 => [:literal, :literal, '}}', 0, 2, 0, 0, 0]
9
+
10
+ include_examples 'lex', '{',
11
+ 0 => [:literal, :literal, '{', 0, 1, 0, 0, 0]
12
+
13
+ include_examples 'lex', '{{',
14
+ 0 => [:literal, :literal, '{{', 0, 2, 0, 0, 0]
15
+
16
+ include_examples 'lex', '{}',
17
+ 0 => [:literal, :literal, '{}', 0, 2, 0, 0, 0]
18
+
19
+ include_examples 'lex', '}{',
20
+ 0 => [:literal, :literal, '}{', 0, 2, 0, 0, 0]
21
+
22
+ include_examples 'lex', '}{+',
23
+ 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0],
24
+ 1 => [:literal, :literal, '{', 1, 2, 0, 0, 0],
25
+ 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
26
+
27
+ include_examples 'lex', '{{var}}',
28
+ 0 => [:literal, :literal, '{{var}}', 0, 7, 0, 0, 0]
29
+
30
+ include_examples 'lex', 'a{b}c',
31
+ 0 => [:literal, :literal, 'a{b}c', 0, 5, 0, 0, 0]
32
+
33
+ include_examples 'lex', 'a{1,2',
34
+ 0 => [:literal, :literal, 'a{1,2', 0, 5, 0, 0, 0]
35
+
36
+ include_examples 'lex', '({.+})',
37
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
38
+ 1 => [:literal, :literal, '{', 1, 2, 1, 0, 0],
39
+ 2 => [:meta, :dot, '.', 2, 3, 1, 0, 0],
40
+ 3 => [:quantifier, :one_or_more, '+', 3, 4, 1, 0, 0],
41
+ 4 => [:literal, :literal, '}', 4, 5, 1, 0, 0],
42
+ 5 => [:group, :close, ')', 5, 6, 0, 0, 0]
43
+
44
+ include_examples 'lex', ']',
45
+ 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0]
46
+
47
+ include_examples 'lex', ']]',
48
+ 0 => [:literal, :literal, ']]', 0, 2, 0, 0, 0]
49
+
50
+ include_examples 'lex', ']\[',
51
+ 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0],
52
+ 1 => [:escape, :set_open, '\[', 1, 3, 0, 0, 0]
53
+
54
+ include_examples 'lex', '()',
55
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
56
+ 1 => [:group, :close, ')', 1, 2, 0, 0, 0]
57
+
58
+ include_examples 'lex', '{abc:.+}}}[^}]]}',
59
+ 0 => [:literal, :literal, '{abc:', 0, 5, 0, 0, 0],
60
+ 1 => [:meta, :dot, '.', 5, 6, 0, 0, 0],
61
+ 2 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
62
+ 3 => [:literal, :literal, '}}}', 7, 10, 0, 0, 0],
63
+ 4 => [:set, :open, '[', 10, 11, 0, 0, 0],
64
+ 5 => [:set, :negate, '^', 11, 12, 0, 1, 0],
65
+ 6 => [:literal, :literal, '}', 12, 13, 0, 1, 0],
66
+ 7 => [:set, :close, ']', 13, 14, 0, 0, 0],
67
+ 8 => [:literal, :literal, ']}', 14, 16, 0, 0, 0]
68
+ end
@@ -1,38 +1,14 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Escape lexing') do
4
- tests = {
5
- '\u{62}' => {
6
- 0 => [:escape, :codepoint_list, '\u{62}', 0, 6, 0, 0, 0],
7
- },
4
+ include_examples 'lex', '\u{62}',
5
+ 0 => [:escape, :codepoint_list, '\u{62}', 0, 6, 0, 0, 0]
8
6
 
9
- '\u{62 63 64}' => {
10
- 0 => [:escape, :codepoint_list, '\u{62 63 64}', 0, 12, 0, 0, 0],
11
- },
7
+ include_examples 'lex', '\u{62 63 64}',
8
+ 0 => [:escape, :codepoint_list, '\u{62 63 64}', 0, 12, 0, 0, 0]
12
9
 
13
- '\u{62 63 64}+' => {
14
- 0 => [:escape, :codepoint_list, '\u{62 63}', 0, 9, 0, 0, 0],
15
- 1 => [:escape, :codepoint_list, '\u{64}', 9, 15, 0, 0, 0],
16
- 2 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
17
- },
18
- }
19
-
20
- tests.each_with_index do |(pattern, checks), count|
21
- specify("lex_escape_runs_#{count}") do
22
- tokens = RL.lex(pattern)
23
-
24
- checks.each do |index, (type, token, text, ts, te, level, set_level, conditional_level)|
25
- struct = tokens.at(index)
26
-
27
- expect(struct.type).to eq type
28
- expect(struct.token).to eq token
29
- expect(struct.text).to eq text
30
- expect(struct.ts).to eq ts
31
- expect(struct.te).to eq te
32
- expect(struct.level).to eq level
33
- expect(struct.set_level).to eq set_level
34
- expect(struct.conditional_level).to eq conditional_level
35
- end
36
- end
37
- end
10
+ include_examples 'lex', '\u{62 63 64}+',
11
+ 0 => [:escape, :codepoint_list, '\u{62 63}', 0, 9, 0, 0, 0],
12
+ 1 => [:escape, :codepoint_list, '\u{64}', 9, 15, 0, 0, 0],
13
+ 2 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
38
14
  end
@@ -1,22 +1,10 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Keep lexing') do
4
- specify('lex keep token') do
5
- regexp = /ab\Kcd/
6
- tokens = RL.lex(regexp)
4
+ include_examples 'lex', /ab\Kcd/,
5
+ 1 => [:keep, :mark, '\K', 2, 4, 0, 0, 0]
7
6
 
8
- expect(tokens[1].type).to eq :keep
9
- expect(tokens[1].token).to eq :mark
10
- end
11
-
12
- specify('lex keep nested') do
13
- regexp = /(a\Kb)|(c\\\Kd)ef/
14
- tokens = RL.lex(regexp)
15
-
16
- expect(tokens[2].type).to eq :keep
17
- expect(tokens[2].token).to eq :mark
18
-
19
- expect(tokens[9].type).to eq :keep
20
- expect(tokens[9].token).to eq :mark
21
- end
7
+ include_examples 'lex', /(a\Kb)|(c\\\Kd)ef/,
8
+ 2 => [:keep, :mark, '\K', 2, 4, 1, 0, 0],
9
+ 9 => [:keep, :mark, '\K', 11, 13, 1, 0, 0]
22
10
  end
@@ -1,126 +1,89 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Literal lexing') do
4
- tests = {
5
- # ascii, single byte characters
6
- 'a' => {
7
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
8
- },
9
-
10
- 'ab+' => {
11
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
12
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
13
- 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0],
14
- },
15
-
16
-
17
- # 2 byte wide characters, Arabic
18
- 'ا' => {
19
- 0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0],
20
- },
21
-
22
- 'aاbبcت' => {
23
- 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0],
24
- },
25
-
26
- 'aاbبت?' => {
27
- 0 => [:literal, :literal, 'aا', 0, 6, 0, 0, 0],
28
- 1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
29
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
30
- },
31
-
32
- 'aا?bبcت+' => {
33
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
34
- 1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
35
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
36
- 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
37
- 4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
38
- 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0],
39
- },
40
-
41
- 'a(اbب+)cت?' => {
42
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
43
- 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
44
- 2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
45
- 3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
46
- 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
47
- 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
48
- 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
49
- 7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
50
- 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0],
51
- },
52
-
53
-
54
- # 3 byte wide characters, Japanese
55
- 'ab?れます+cd' => {
56
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
57
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
58
- 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
59
- 3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
60
- 4 => [:literal, :literal, '', 9, 12, 0, 0, 0],
61
- 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
62
- 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0],
63
- },
64
-
65
-
66
- # 4 byte wide characters, Osmanya
67
- '𐒀𐒁?𐒂ab+𐒃' => {
68
- 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
69
- 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
70
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
71
- 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
72
- 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
73
- 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
74
- 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0],
75
- },
76
-
77
- 'mu𝄞?si*𝄫c+' => {
78
- 0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
79
- 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
80
- 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
81
- 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
82
- 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
83
- 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
84
- 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
85
- 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
86
- 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
87
- },
88
- }
89
-
90
- tests.each_with_index do |(pattern, checks), count|
91
- specify("lex_literal_runs_#{count}") do
92
- tokens = RL.lex(pattern)
93
-
94
- checks.each do |index, (type, token, text, ts, te, level, set_level, conditional_level)|
95
- struct = tokens.at(index)
96
-
97
- expect(struct.type).to eq type
98
- expect(struct.token).to eq token
99
- expect(struct.text).to eq text
100
- expect(struct.ts).to eq ts
101
- expect(struct.te).to eq te
102
- expect(struct.level).to eq level
103
- expect(struct.set_level).to eq set_level
104
- expect(struct.conditional_level).to eq conditional_level
105
- end
106
- end
107
- end
4
+ # ascii, single byte characters
5
+ include_examples 'lex', 'a',
6
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0]
7
+
8
+ include_examples 'lex', 'ab+',
9
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
10
+ 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
11
+ 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
12
+
13
+ # 2 byte wide characters, Arabic
14
+ include_examples 'lex', 'ا',
15
+ 0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0]
16
+
17
+ include_examples 'lex', 'aاbبcت',
18
+ 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0]
19
+
20
+ include_examples 'lex', 'aاbبت?',
21
+ 0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
22
+ 1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
23
+ 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0]
24
+
25
+ include_examples 'lex', 'aا?bبcت+',
26
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
27
+ 1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
28
+ 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
29
+ 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
30
+ 4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
31
+ 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0]
32
+
33
+ include_examples 'lex', 'a(اbب+)cت?',
34
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
35
+ 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
36
+ 2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
37
+ 3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
38
+ 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
39
+ 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
40
+ 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
41
+ 7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
42
+ 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0]
43
+
44
+ # 3 byte wide characters, Japanese
45
+ include_examples 'lex', 'ab?れます+cd',
46
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
47
+ 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
48
+ 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
49
+ 3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
50
+ 4 => [:literal, :literal, '', 9, 12, 0, 0, 0],
51
+ 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
52
+ 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0]
53
+
54
+ # 4 byte wide characters, Osmanya
55
+ include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
56
+ 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
57
+ 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
58
+ 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
59
+ 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
60
+ 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
61
+ 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
62
+ 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0]
63
+
64
+ include_examples 'lex', 'mu𝄞?si*𝄫c+',
65
+ 0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
66
+ 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
67
+ 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
68
+ 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
69
+ 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
70
+ 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
71
+ 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
72
+ 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
73
+ 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
108
74
 
109
75
  specify('lex single 2 byte char') do
110
76
  tokens = RL.lex("\u0627+")
111
-
112
- expect(tokens.length).to eq 2
77
+ expect(tokens.count).to eq 2
113
78
  end
114
79
 
115
80
  specify('lex single 3 byte char') do
116
81
  tokens = RL.lex("\u308C+")
117
-
118
- expect(tokens.length).to eq 2
82
+ expect(tokens.count).to eq 2
119
83
  end
120
84
 
121
85
  specify('lex single 4 byte char') do
122
86
  tokens = RL.lex("\u{1D11E}+")
123
-
124
- expect(tokens.length).to eq 2
87
+ expect(tokens.count).to eq 2
125
88
  end
126
89
  end
@@ -1,130 +1,99 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Nesting lexing') do
4
- tests = {
5
- '(((b)))' => {
6
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
7
- 1 => [:group, :capture, '(', 1, 2, 1, 0, 0],
8
- 2 => [:group, :capture, '(', 2, 3, 2, 0, 0],
9
- 3 => [:literal, :literal, 'b', 3, 4, 3, 0, 0],
10
- 4 => [:group, :close, ')', 4, 5, 2, 0, 0],
11
- 5 => [:group, :close, ')', 5, 6, 1, 0, 0],
12
- 6 => [:group, :close, ')', 6, 7, 0, 0, 0],
13
- },
4
+ include_examples 'lex', /(((b)))/,
5
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
6
+ 1 => [:group, :capture, '(', 1, 2, 1, 0, 0],
7
+ 2 => [:group, :capture, '(', 2, 3, 2, 0, 0],
8
+ 3 => [:literal, :literal, 'b', 3, 4, 3, 0, 0],
9
+ 4 => [:group, :close, ')', 4, 5, 2, 0, 0],
10
+ 5 => [:group, :close, ')', 5, 6, 1, 0, 0],
11
+ 6 => [:group, :close, ')', 6, 7, 0, 0, 0]
14
12
 
15
- '(\((b)\))' => {
16
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
17
- 1 => [:escape, :group_open, '\(', 1, 3, 1, 0, 0],
18
- 2 => [:group, :capture, '(', 3, 4, 1, 0, 0],
19
- 3 => [:literal, :literal, 'b', 4, 5, 2, 0, 0],
20
- 4 => [:group, :close, ')', 5, 6, 1, 0, 0],
21
- 5 => [:escape, :group_close, '\)', 6, 8, 1, 0, 0],
22
- 6 => [:group, :close, ')', 8, 9, 0, 0, 0],
23
- },
13
+ include_examples 'lex', /(\((b)\))/,
14
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
15
+ 1 => [:escape, :group_open, '\(', 1, 3, 1, 0, 0],
16
+ 2 => [:group, :capture, '(', 3, 4, 1, 0, 0],
17
+ 3 => [:literal, :literal, 'b', 4, 5, 2, 0, 0],
18
+ 4 => [:group, :close, ')', 5, 6, 1, 0, 0],
19
+ 5 => [:escape, :group_close, '\)', 6, 8, 1, 0, 0],
20
+ 6 => [:group, :close, ')', 8, 9, 0, 0, 0]
24
21
 
25
- '(?>a(?>b(?>c)))' => {
26
- 0 => [:group, :atomic, '(?>', 0, 3, 0, 0, 0],
27
- 2 => [:group, :atomic, '(?>', 4, 7, 1, 0, 0],
28
- 4 => [:group, :atomic, '(?>', 8, 11, 2, 0, 0],
29
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
30
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
31
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0],
32
- },
22
+ include_examples 'lex', /(?>a(?>b(?>c)))/,
23
+ 0 => [:group, :atomic, '(?>', 0, 3, 0, 0, 0],
24
+ 2 => [:group, :atomic, '(?>', 4, 7, 1, 0, 0],
25
+ 4 => [:group, :atomic, '(?>', 8, 11, 2, 0, 0],
26
+ 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
27
+ 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
28
+ 8 => [:group, :close, ')', 14, 15, 0, 0, 0]
33
29
 
34
- '(?:a(?:b(?:c)))' => {
35
- 0 => [:group, :passive, '(?:', 0, 3, 0, 0, 0],
36
- 2 => [:group, :passive, '(?:', 4, 7, 1, 0, 0],
37
- 4 => [:group, :passive, '(?:', 8, 11, 2, 0, 0],
38
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
39
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
40
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0],
41
- },
30
+ include_examples 'lex', /(?:a(?:b(?:c)))/,
31
+ 0 => [:group, :passive, '(?:', 0, 3, 0, 0, 0],
32
+ 2 => [:group, :passive, '(?:', 4, 7, 1, 0, 0],
33
+ 4 => [:group, :passive, '(?:', 8, 11, 2, 0, 0],
34
+ 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
35
+ 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
36
+ 8 => [:group, :close, ')', 14, 15, 0, 0, 0]
42
37
 
43
- '(?=a(?!b(?<=c(?<!d))))' => {
44
- 0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0, 0],
45
- 2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0, 0],
46
- 4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0, 0],
47
- 6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0, 0],
48
- 8 => [:group, :close, ')', 18, 19, 3, 0, 0],
49
- 9 => [:group, :close, ')', 19, 20, 2, 0, 0],
50
- 10 => [:group, :close, ')', 20, 21, 1, 0, 0],
51
- 11 => [:group, :close, ')', 21, 22, 0, 0, 0],
52
- },
38
+ include_examples 'lex', /(?=a(?!b(?<=c(?<!d))))/,
39
+ 0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0, 0],
40
+ 2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0, 0],
41
+ 4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0, 0],
42
+ 6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0, 0],
43
+ 8 => [:group, :close, ')', 18, 19, 3, 0, 0],
44
+ 9 => [:group, :close, ')', 19, 20, 2, 0, 0],
45
+ 10 => [:group, :close, ')', 20, 21, 1, 0, 0],
46
+ 11 => [:group, :close, ')', 21, 22, 0, 0, 0]
53
47
 
54
- '((?#a)b(?#c)d(?#e))' => {
55
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
56
- 1 => [:group, :comment, '(?#a)', 1, 6, 1, 0, 0],
57
- 3 => [:group, :comment, '(?#c)', 7, 12, 1, 0, 0],
58
- 5 => [:group, :comment, '(?#e)', 13, 18, 1, 0, 0],
59
- 6 => [:group, :close, ')', 18, 19, 0, 0, 0],
60
- },
48
+ include_examples 'lex', /((?#a)b(?#c)d(?#e))/,
49
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
50
+ 1 => [:group, :comment, '(?#a)', 1, 6, 1, 0, 0],
51
+ 3 => [:group, :comment, '(?#c)', 7, 12, 1, 0, 0],
52
+ 5 => [:group, :comment, '(?#e)', 13, 18, 1, 0, 0],
53
+ 6 => [:group, :close, ')', 18, 19, 0, 0, 0]
61
54
 
62
- 'a[b-e]f' => {
63
- 1 => [:set, :open, '[', 1, 2, 0, 0, 0],
64
- 2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
65
- 3 => [:set, :range, '-', 3, 4, 0, 1, 0],
66
- 4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
67
- 5 => [:set, :close, ']', 5, 6, 0, 0, 0],
68
- },
55
+ include_examples 'lex', /a[b-e]f/,
56
+ 1 => [:set, :open, '[', 1, 2, 0, 0, 0],
57
+ 2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
58
+ 3 => [:set, :range, '-', 3, 4, 0, 1, 0],
59
+ 4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
60
+ 5 => [:set, :close, ']', 5, 6, 0, 0, 0]
69
61
 
70
- '[[:word:]&&[^c]z]' => {
71
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
72
- 1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
73
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
74
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
75
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
76
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
77
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
78
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
79
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
80
- },
62
+ include_examples 'lex', /[[:word:]&&[^c]z]/,
63
+ 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
64
+ 1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
65
+ 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
66
+ 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
67
+ 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
68
+ 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
69
+ 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
70
+ 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
71
+ 8 => [:set, :close, ']', 16, 17, 0, 0, 0]
81
72
 
82
- '[\p{word}&&[^c]z]' => {
83
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
84
- 1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
85
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
86
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
87
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
88
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
89
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
90
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
91
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
92
- },
73
+ include_examples 'lex', /[\p{word}&&[^c]z]/,
74
+ 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
75
+ 1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
76
+ 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
77
+ 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
78
+ 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
79
+ 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
80
+ 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
81
+ 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
82
+ 8 => [:set, :close, ']', 16, 17, 0, 0, 0]
93
83
 
94
- '[a[b[c[d-g]]]]' => {
95
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
96
- 1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
97
- 2 => [:set, :open, '[', 2, 3, 0, 1, 0],
98
- 3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
99
- 4 => [:set, :open, '[', 4, 5, 0, 2, 0],
100
- 5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
101
- 6 => [:set, :open, '[', 6, 7, 0, 3, 0],
102
- 7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
103
- 8 => [:set, :range, '-', 8, 9, 0, 4, 0],
104
- 9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
105
- 10 => [:set, :close, ']', 10, 11, 0, 3, 0],
106
- 11 => [:set, :close, ']', 11, 12, 0, 2, 0],
107
- 12 => [:set, :close, ']', 12, 13, 0, 1, 0],
108
- 13 => [:set, :close, ']', 13, 14, 0, 0, 0],
109
- },
110
- }
111
-
112
- tests.each_with_index do |(pattern, checks), count|
113
- specify("lex_nesting_in_'#{pattern}'_#{count}") do
114
- tokens = RL.lex(pattern, 'ruby/1.9')
115
-
116
- checks.each do |offset, (type, token, text, ts, te, level, set_level, conditional_level)|
117
- struct = tokens.at(offset)
118
-
119
- expect(struct.type).to eq type
120
- expect(struct.token).to eq token
121
- expect(struct.text).to eq text
122
- expect(struct.ts).to eq ts
123
- expect(struct.te).to eq te
124
- expect(struct.level).to eq level
125
- expect(struct.set_level).to eq set_level
126
- expect(struct.conditional_level).to eq conditional_level
127
- end
128
- end
129
- end
84
+ include_examples 'lex', /[a[b[c[d-g]]]]/,
85
+ 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
86
+ 1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
87
+ 2 => [:set, :open, '[', 2, 3, 0, 1, 0],
88
+ 3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
89
+ 4 => [:set, :open, '[', 4, 5, 0, 2, 0],
90
+ 5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
91
+ 6 => [:set, :open, '[', 6, 7, 0, 3, 0],
92
+ 7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
93
+ 8 => [:set, :range, '-', 8, 9, 0, 4, 0],
94
+ 9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
95
+ 10 => [:set, :close, ']', 10, 11, 0, 3, 0],
96
+ 11 => [:set, :close, ']', 11, 12, 0, 2, 0],
97
+ 12 => [:set, :close, ']', 12, 13, 0, 1, 0],
98
+ 13 => [:set, :close, ']', 13, 14, 0, 0, 0]
130
99
  end