regexp_parser 1.3.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +53 -1
  3. data/Gemfile +3 -3
  4. data/README.md +10 -14
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +28 -53
  7. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  8. data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
  9. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  10. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  11. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  12. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  13. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  14. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  15. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  16. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  17. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  18. data/lib/regexp_parser/expression/sequence.rb +3 -6
  19. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  20. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  21. data/lib/regexp_parser/lexer.rb +30 -44
  22. data/lib/regexp_parser/parser.rb +47 -24
  23. data/lib/regexp_parser/scanner.rb +1159 -1329
  24. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  25. data/lib/regexp_parser/scanner/properties/long.yml +34 -1
  26. data/lib/regexp_parser/scanner/properties/short.yml +12 -0
  27. data/lib/regexp_parser/scanner/scanner.rl +82 -190
  28. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  29. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
  30. data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
  31. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  32. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  33. data/lib/regexp_parser/version.rb +1 -1
  34. data/regexp_parser.gemspec +3 -3
  35. data/spec/expression/base_spec.rb +94 -0
  36. data/spec/expression/clone_spec.rb +120 -0
  37. data/spec/expression/conditional_spec.rb +89 -0
  38. data/spec/expression/free_space_spec.rb +27 -0
  39. data/spec/expression/methods/match_length_spec.rb +154 -0
  40. data/spec/expression/methods/match_spec.rb +25 -0
  41. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  42. data/spec/expression/methods/tests_spec.rb +99 -0
  43. data/spec/expression/methods/traverse_spec.rb +140 -0
  44. data/spec/expression/options_spec.rb +128 -0
  45. data/spec/expression/root_spec.rb +9 -0
  46. data/spec/expression/sequence_spec.rb +9 -0
  47. data/spec/expression/subexpression_spec.rb +50 -0
  48. data/spec/expression/to_h_spec.rb +26 -0
  49. data/spec/expression/to_s_spec.rb +100 -0
  50. data/spec/lexer/all_spec.rb +22 -0
  51. data/spec/lexer/conditionals_spec.rb +53 -0
  52. data/spec/lexer/escapes_spec.rb +14 -0
  53. data/spec/lexer/keep_spec.rb +10 -0
  54. data/spec/lexer/literals_spec.rb +89 -0
  55. data/spec/lexer/nesting_spec.rb +99 -0
  56. data/spec/lexer/refcalls_spec.rb +55 -0
  57. data/spec/parser/all_spec.rb +43 -0
  58. data/spec/parser/alternation_spec.rb +88 -0
  59. data/spec/parser/anchors_spec.rb +17 -0
  60. data/spec/parser/conditionals_spec.rb +179 -0
  61. data/spec/parser/errors_spec.rb +30 -0
  62. data/spec/parser/escapes_spec.rb +121 -0
  63. data/spec/parser/free_space_spec.rb +130 -0
  64. data/spec/parser/groups_spec.rb +108 -0
  65. data/spec/parser/keep_spec.rb +6 -0
  66. data/spec/parser/posix_classes_spec.rb +8 -0
  67. data/spec/parser/properties_spec.rb +115 -0
  68. data/spec/parser/quantifiers_spec.rb +51 -0
  69. data/spec/parser/refcalls_spec.rb +112 -0
  70. data/spec/parser/set/intersections_spec.rb +127 -0
  71. data/spec/parser/set/ranges_spec.rb +111 -0
  72. data/spec/parser/sets_spec.rb +178 -0
  73. data/spec/parser/types_spec.rb +18 -0
  74. data/spec/scanner/all_spec.rb +18 -0
  75. data/spec/scanner/anchors_spec.rb +21 -0
  76. data/spec/scanner/conditionals_spec.rb +128 -0
  77. data/spec/scanner/errors_spec.rb +68 -0
  78. data/spec/scanner/escapes_spec.rb +53 -0
  79. data/spec/scanner/free_space_spec.rb +133 -0
  80. data/spec/scanner/groups_spec.rb +52 -0
  81. data/spec/scanner/keep_spec.rb +10 -0
  82. data/spec/scanner/literals_spec.rb +49 -0
  83. data/spec/scanner/meta_spec.rb +18 -0
  84. data/spec/scanner/properties_spec.rb +64 -0
  85. data/spec/scanner/quantifiers_spec.rb +20 -0
  86. data/spec/scanner/refcalls_spec.rb +36 -0
  87. data/spec/scanner/sets_spec.rb +102 -0
  88. data/spec/scanner/types_spec.rb +14 -0
  89. data/spec/spec_helper.rb +15 -0
  90. data/{test → spec}/support/runner.rb +9 -8
  91. data/spec/support/shared_examples.rb +77 -0
  92. data/{test → spec}/support/warning_extractor.rb +5 -7
  93. data/spec/syntax/syntax_spec.rb +48 -0
  94. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  95. data/spec/syntax/versions/1.8.6_spec.rb +17 -0
  96. data/spec/syntax/versions/1.9.1_spec.rb +10 -0
  97. data/spec/syntax/versions/1.9.3_spec.rb +9 -0
  98. data/spec/syntax/versions/2.0.0_spec.rb +13 -0
  99. data/spec/syntax/versions/2.2.0_spec.rb +9 -0
  100. data/spec/syntax/versions/aliases_spec.rb +37 -0
  101. data/spec/token/token_spec.rb +85 -0
  102. metadata +144 -143
  103. data/test/expression/test_all.rb +0 -12
  104. data/test/expression/test_base.rb +0 -90
  105. data/test/expression/test_clone.rb +0 -89
  106. data/test/expression/test_conditionals.rb +0 -113
  107. data/test/expression/test_free_space.rb +0 -35
  108. data/test/expression/test_set.rb +0 -84
  109. data/test/expression/test_strfregexp.rb +0 -230
  110. data/test/expression/test_subexpression.rb +0 -58
  111. data/test/expression/test_tests.rb +0 -99
  112. data/test/expression/test_to_h.rb +0 -59
  113. data/test/expression/test_to_s.rb +0 -104
  114. data/test/expression/test_traverse.rb +0 -161
  115. data/test/helpers.rb +0 -10
  116. data/test/lexer/test_all.rb +0 -41
  117. data/test/lexer/test_conditionals.rb +0 -127
  118. data/test/lexer/test_keep.rb +0 -24
  119. data/test/lexer/test_literals.rb +0 -130
  120. data/test/lexer/test_nesting.rb +0 -132
  121. data/test/lexer/test_refcalls.rb +0 -56
  122. data/test/parser/set/test_intersections.rb +0 -127
  123. data/test/parser/set/test_ranges.rb +0 -111
  124. data/test/parser/test_all.rb +0 -64
  125. data/test/parser/test_alternation.rb +0 -92
  126. data/test/parser/test_anchors.rb +0 -34
  127. data/test/parser/test_conditionals.rb +0 -187
  128. data/test/parser/test_errors.rb +0 -63
  129. data/test/parser/test_escapes.rb +0 -134
  130. data/test/parser/test_free_space.rb +0 -139
  131. data/test/parser/test_groups.rb +0 -289
  132. data/test/parser/test_keep.rb +0 -21
  133. data/test/parser/test_posix_classes.rb +0 -27
  134. data/test/parser/test_properties.rb +0 -133
  135. data/test/parser/test_quantifiers.rb +0 -301
  136. data/test/parser/test_refcalls.rb +0 -186
  137. data/test/parser/test_sets.rb +0 -179
  138. data/test/parser/test_types.rb +0 -50
  139. data/test/scanner/test_all.rb +0 -38
  140. data/test/scanner/test_anchors.rb +0 -38
  141. data/test/scanner/test_conditionals.rb +0 -184
  142. data/test/scanner/test_errors.rb +0 -91
  143. data/test/scanner/test_escapes.rb +0 -56
  144. data/test/scanner/test_free_space.rb +0 -200
  145. data/test/scanner/test_groups.rb +0 -79
  146. data/test/scanner/test_keep.rb +0 -35
  147. data/test/scanner/test_literals.rb +0 -89
  148. data/test/scanner/test_meta.rb +0 -40
  149. data/test/scanner/test_properties.rb +0 -312
  150. data/test/scanner/test_quantifiers.rb +0 -37
  151. data/test/scanner/test_refcalls.rb +0 -52
  152. data/test/scanner/test_scripts.rb +0 -53
  153. data/test/scanner/test_sets.rb +0 -119
  154. data/test/scanner/test_types.rb +0 -35
  155. data/test/scanner/test_unicode_blocks.rb +0 -30
  156. data/test/support/disable_autotest.rb +0 -8
  157. data/test/syntax/test_all.rb +0 -6
  158. data/test/syntax/test_syntax.rb +0 -61
  159. data/test/syntax/test_syntax_token_map.rb +0 -25
  160. data/test/syntax/versions/test_1.8.rb +0 -55
  161. data/test/syntax/versions/test_1.9.1.rb +0 -36
  162. data/test/syntax/versions/test_1.9.3.rb +0 -32
  163. data/test/syntax/versions/test_2.0.0.rb +0 -37
  164. data/test/syntax/versions/test_2.2.0.rb +0 -32
  165. data/test/syntax/versions/test_aliases.rb +0 -129
  166. data/test/syntax/versions/test_all.rb +0 -5
  167. data/test/test_all.rb +0 -5
  168. data/test/token/test_all.rb +0 -2
  169. data/test/token/test_token.rb +0 -107
@@ -1,10 +0,0 @@
1
- require "test/unit"
2
- require File.expand_path("../../lib/regexp_parser", __FILE__)
3
- require 'regexp_property_values'
4
-
5
- RS = Regexp::Scanner
6
- RL = Regexp::Lexer
7
- RP = Regexp::Parser
8
- RE = Regexp::Expression
9
-
10
- include Regexp::Expression
@@ -1,41 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- %w{
4
- literals nesting refcalls
5
- }.each do|tc|
6
- require File.expand_path("../test_#{tc}", __FILE__)
7
- end
8
-
9
- if RUBY_VERSION >= '2.0.0'
10
- %w{conditionals keep}.each do|tc|
11
- require File.expand_path("../test_#{tc}", __FILE__)
12
- end
13
- end
14
-
15
- class TestRegexpLexer < Test::Unit::TestCase
16
-
17
- def test_lexer_returns_an_array
18
- assert_instance_of Array, RL.lex('abc')
19
- end
20
-
21
- def test_lexer_returns_tokens
22
- tokens = RL.lex('^abc+[^one]{2,3}\b\d\\\C-C$')
23
-
24
- assert tokens.all?{ |token| token.kind_of?(Regexp::Token) },
25
- "Not all array members are tokens"
26
-
27
- assert tokens.all?{ |token| token.to_a.length == 8 },
28
- "Not all tokens have a length of 8"
29
- end
30
-
31
- def test_lexer_token_count
32
- tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
33
-
34
- assert_equal 28, tokens.length
35
- end
36
-
37
- def test_lexer_scan_alias
38
- assert_equal RL.lex(/a|b|c/), RL.scan(/a|b|c/)
39
- end
40
-
41
- end
@@ -1,127 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class LexerConditionals < Test::Unit::TestCase
4
- if RUBY_VERSION >= '2.0'
5
-
6
- # Basic lexer output and nesting tests
7
- tests = {
8
- '(?<A>a)(?(<A>)b|c)' => [3, :conditional, :open, '(?', 7, 9, 0, 0, 0],
9
- '(?<B>a)(?(<B>)b|c)' => [4, :conditional, :condition, '(<B>)', 9, 14, 0, 0, 1],
10
- '(?<C>a)(?(<C>)b|c)' => [6, :conditional, :separator, '|', 15, 16, 0, 0, 1],
11
- '(?<D>a)(?(<D>)b|c)' => [8, :conditional, :close, ')', 17, 18, 0, 0, 0],
12
- }
13
-
14
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te, level, set_level, conditional_level)), count|
15
- define_method "test_lexer_#{type}_#{token}_#{count}" do
16
- tokens = RL.lex(pattern)
17
- struct = tokens.at(index)
18
-
19
- assert_equal type, struct.type
20
- assert_equal token, struct.token
21
- assert_equal text, struct.text
22
- assert_equal ts, struct.ts
23
- assert_equal te, struct.te
24
- assert_equal level, struct.level
25
- assert_equal set_level, struct.set_level
26
- assert_equal conditional_level, struct.conditional_level
27
- end
28
- end
29
-
30
- def test_lexer_conditional_mixed_nesting
31
- regexp = /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/
32
- tokens = RL.lex(regexp)
33
-
34
- [
35
- [ 0, :group, :capture, '(', 0, 1, 0, 0, 0],
36
- [ 1, :group, :named, '(?<A>', 1, 6, 1, 0, 0],
37
-
38
- [ 5, :conditional, :open, '(?', 13, 15, 2, 0, 0],
39
- [ 6, :conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
40
- [ 8, :conditional, :separator, '|', 21, 22, 2, 0, 1],
41
-
42
- [10, :conditional, :open, '(?', 23, 25, 3, 0, 1],
43
- [11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
44
-
45
- [12, :set, :open, '[', 30, 31, 3, 0, 2],
46
- [13, :literal, :literal, 'e', 31, 32, 3, 1, 2],
47
- [14, :set, :range, '-', 32, 33, 3, 1, 2],
48
- [15, :literal, :literal, 'g', 33, 34, 3, 1, 2],
49
- [16, :set, :close, ']', 34, 35, 3, 0, 2],
50
-
51
- [17, :conditional, :separator, '|', 35, 36, 3, 0, 2],
52
- [23, :conditional, :close, ')', 41, 42, 3, 0, 1],
53
- [25, :conditional, :close, ')', 43, 44, 2, 0, 0],
54
-
55
- [26, :group, :close, ')', 44, 45, 1, 0, 0],
56
- [27, :group, :close, ')', 45, 46, 0, 0, 0]
57
- ].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
58
- struct = tokens.at(index)
59
-
60
- assert_equal type, struct.type
61
- assert_equal token, struct.token
62
- assert_equal text, struct.text
63
- assert_equal ts, struct.ts
64
- assert_equal te, struct.te
65
- assert_equal level, struct.level
66
- assert_equal set_level, struct.set_level
67
- assert_equal conditional_level, struct.conditional_level
68
- end
69
- end
70
-
71
- def test_lexer_conditional_deep_nesting
72
- regexp = /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/
73
- tokens = RL.lex(regexp)
74
-
75
- [
76
- [ 9, :conditional, :open, '(?', 9, 11, 0, 0, 0],
77
- [10, :conditional, :condition, '(1)', 11, 14, 0, 0, 1],
78
-
79
- [11, :conditional, :open, '(?', 14, 16, 0, 0, 1],
80
- [12, :conditional, :condition, '(2)', 16, 19, 0, 0, 2],
81
-
82
- [13, :conditional, :open, '(?', 19, 21, 0, 0, 2],
83
- [14, :conditional, :condition, '(3)', 21, 24, 0, 0, 3],
84
-
85
- [16, :conditional, :separator, '|', 25, 26, 0, 0, 3],
86
-
87
- [18, :conditional, :close, ')', 27, 28, 0, 0, 2],
88
- [19, :conditional, :close, ')', 28, 29, 0, 0, 1],
89
-
90
- [20, :conditional, :separator, '|', 29, 30, 0, 0, 1],
91
-
92
- [21, :conditional, :open, '(?', 30, 32, 0, 0, 1],
93
- [22, :conditional, :condition, '(3)', 32, 35, 0, 0, 2],
94
-
95
- [23, :conditional, :open, '(?', 35, 37, 0, 0, 2],
96
- [24, :conditional, :condition, '(2)', 37, 40, 0, 0, 3],
97
-
98
- [26, :conditional, :separator, '|', 41, 42, 0, 0, 3],
99
-
100
- [28, :conditional, :close, ')', 43, 44, 0, 0, 2],
101
-
102
- [29, :conditional, :separator, '|', 44, 45, 0, 0, 2],
103
-
104
- [30, :conditional, :open, '(?', 45, 47, 0, 0, 2],
105
- [31, :conditional, :condition, '(1)', 47, 50, 0, 0, 3],
106
-
107
- [33, :conditional, :separator, '|', 51, 52, 0, 0, 3],
108
-
109
- [35, :conditional, :close, ')', 53, 54, 0, 0, 2],
110
- [36, :conditional, :close, ')', 54, 55, 0, 0, 1],
111
- [37, :conditional, :close, ')', 55, 56, 0, 0, 0]
112
- ].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
113
- struct = tokens.at(index)
114
-
115
- assert_equal type, struct.type
116
- assert_equal token, struct.token
117
- assert_equal text, struct.text
118
- assert_equal ts, struct.ts
119
- assert_equal te, struct.te
120
- assert_equal level, struct.level
121
- assert_equal set_level, struct.set_level
122
- assert_equal conditional_level, struct.conditional_level
123
- end
124
- end
125
-
126
- end # if RUBY_VERSION >= '2.0'
127
- end
@@ -1,24 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class LexerKeep < Test::Unit::TestCase
4
-
5
- def test_lex_keep_token
6
- regexp = /ab\Kcd/
7
- tokens = RL.lex(regexp)
8
-
9
- assert_equal :keep, tokens[1].type
10
- assert_equal :mark, tokens[1].token
11
- end
12
-
13
- def test_lex_keep_nested
14
- regexp = /(a\Kb)|(c\\\Kd)ef/
15
- tokens = RL.lex(regexp)
16
-
17
- assert_equal :keep, tokens[2].type
18
- assert_equal :mark, tokens[2].token
19
-
20
- assert_equal :keep, tokens[9].type
21
- assert_equal :mark, tokens[9].token
22
- end
23
-
24
- end
@@ -1,130 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require File.expand_path("../../helpers", __FILE__)
4
-
5
- class LexerLiterals < Test::Unit::TestCase
6
-
7
- tests = {
8
- # ascii, single byte characters
9
- 'a' => {
10
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
11
- },
12
-
13
- 'ab+' => {
14
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
15
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
16
- 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0],
17
- },
18
-
19
-
20
- # 2 byte wide characters, Arabic
21
- 'ا' => {
22
- 0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0],
23
- },
24
-
25
- 'aاbبcت' => {
26
- 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0],
27
- },
28
-
29
- 'aاbبت?' => {
30
- 0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
31
- 1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
32
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
33
- },
34
-
35
- 'aا?bبcت+' => {
36
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
37
- 1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
38
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
39
- 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
40
- 4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
41
- 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0],
42
- },
43
-
44
- 'a(اbب+)cت?' => {
45
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
46
- 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
47
- 2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
48
- 3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
49
- 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
50
- 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
51
- 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
52
- 7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
53
- 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0],
54
- },
55
-
56
-
57
- # 3 byte wide characters, Japanese
58
- 'ab?れます+cd' => {
59
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
60
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
61
- 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
62
- 3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
63
- 4 => [:literal, :literal, 'す', 9, 12, 0, 0, 0],
64
- 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
65
- 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0],
66
- },
67
-
68
-
69
- # 4 byte wide characters, Osmanya
70
- '𐒀𐒁?𐒂ab+𐒃' => {
71
- 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
72
- 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
73
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
74
- 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
75
- 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
76
- 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
77
- 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0],
78
- },
79
-
80
- 'mu𝄞?si*𝄫c+' => {
81
- 0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
82
- 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
83
- 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
84
- 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
85
- 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
86
- 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
87
- 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
88
- 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
89
- 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
90
- },
91
- }
92
-
93
- tests.each_with_index do |(pattern, checks), count|
94
- define_method "test_lex_literal_runs_#{count}" do
95
- tokens = RL.lex(pattern)
96
-
97
- checks.each do |index, (type, token, text, ts, te, level, set_level, conditional_level)|
98
- struct = tokens.at(index)
99
-
100
- assert_equal type, struct.type
101
- assert_equal token, struct.token
102
- assert_equal text, struct.text
103
- assert_equal ts, struct.ts
104
- assert_equal te, struct.te
105
- assert_equal level, struct.level
106
- assert_equal set_level, struct.set_level
107
- assert_equal conditional_level, struct.conditional_level
108
- end
109
- end
110
- end
111
-
112
- def test_lex_single_2_byte_char
113
- tokens = RL.lex('ا+')
114
-
115
- assert_equal 2, tokens.length
116
- end
117
-
118
- def test_lex_single_3_byte_char
119
- tokens = RL.lex('れ+')
120
-
121
- assert_equal 2, tokens.length
122
- end
123
-
124
- def test_lex_single_4_byte_char
125
- tokens = RL.lex('𝄞+')
126
-
127
- assert_equal 2, tokens.length
128
- end
129
-
130
- end
@@ -1,132 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class LexerNesting < Test::Unit::TestCase
4
-
5
- tests = {
6
- '(((b)))' => {
7
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
8
- 1 => [:group, :capture, '(', 1, 2, 1, 0, 0],
9
- 2 => [:group, :capture, '(', 2, 3, 2, 0, 0],
10
- 3 => [:literal, :literal, 'b', 3, 4, 3, 0, 0],
11
- 4 => [:group, :close, ')', 4, 5, 2, 0, 0],
12
- 5 => [:group, :close, ')', 5, 6, 1, 0, 0],
13
- 6 => [:group, :close, ')', 6, 7, 0, 0, 0],
14
- },
15
-
16
- '(\((b)\))' => {
17
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
18
- 1 => [:escape, :group_open, '\(', 1, 3, 1, 0, 0],
19
- 2 => [:group, :capture, '(', 3, 4, 1, 0, 0],
20
- 3 => [:literal, :literal, 'b', 4, 5, 2, 0, 0],
21
- 4 => [:group, :close, ')', 5, 6, 1, 0, 0],
22
- 5 => [:escape, :group_close, '\)', 6, 8, 1, 0, 0],
23
- 6 => [:group, :close, ')', 8, 9, 0, 0, 0],
24
- },
25
-
26
- '(?>a(?>b(?>c)))' => {
27
- 0 => [:group, :atomic, '(?>', 0, 3, 0, 0, 0],
28
- 2 => [:group, :atomic, '(?>', 4, 7, 1, 0, 0],
29
- 4 => [:group, :atomic, '(?>', 8, 11, 2, 0, 0],
30
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
31
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
32
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0],
33
- },
34
-
35
- '(?:a(?:b(?:c)))' => {
36
- 0 => [:group, :passive, '(?:', 0, 3, 0, 0, 0],
37
- 2 => [:group, :passive, '(?:', 4, 7, 1, 0, 0],
38
- 4 => [:group, :passive, '(?:', 8, 11, 2, 0, 0],
39
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
40
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
41
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0],
42
- },
43
-
44
- '(?=a(?!b(?<=c(?<!d))))' => {
45
- 0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0, 0],
46
- 2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0, 0],
47
- 4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0, 0],
48
- 6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0, 0],
49
- 8 => [:group, :close, ')', 18, 19, 3, 0, 0],
50
- 9 => [:group, :close, ')', 19, 20, 2, 0, 0],
51
- 10 => [:group, :close, ')', 20, 21, 1, 0, 0],
52
- 11 => [:group, :close, ')', 21, 22, 0, 0, 0],
53
- },
54
-
55
- '((?#a)b(?#c)d(?#e))' => {
56
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
57
- 1 => [:group, :comment, '(?#a)', 1, 6, 1, 0, 0],
58
- 3 => [:group, :comment, '(?#c)', 7, 12, 1, 0, 0],
59
- 5 => [:group, :comment, '(?#e)', 13, 18, 1, 0, 0],
60
- 6 => [:group, :close, ')', 18, 19, 0, 0, 0],
61
- },
62
-
63
- 'a[b-e]f' => {
64
- 1 => [:set, :open, '[', 1, 2, 0, 0, 0],
65
- 2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
66
- 3 => [:set, :range, '-', 3, 4, 0, 1, 0],
67
- 4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
68
- 5 => [:set, :close, ']', 5, 6, 0, 0, 0],
69
- },
70
-
71
- '[[:word:]&&[^c]z]' => {
72
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
73
- 1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
74
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
75
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
76
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
77
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
78
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
79
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
80
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
81
- },
82
-
83
- '[\p{word}&&[^c]z]' => {
84
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
85
- 1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
86
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
87
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
88
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
89
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
90
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
91
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
92
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
93
- },
94
-
95
- '[a[b[c[d-g]]]]' => {
96
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
97
- 1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
98
- 2 => [:set, :open, '[', 2, 3, 0, 1, 0],
99
- 3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
100
- 4 => [:set, :open, '[', 4, 5, 0, 2, 0],
101
- 5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
102
- 6 => [:set, :open, '[', 6, 7, 0, 3, 0],
103
- 7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
104
- 8 => [:set, :range, '-', 8, 9, 0, 4, 0],
105
- 9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
106
- 10 => [:set, :close, ']', 10, 11, 0, 3, 0],
107
- 11 => [:set, :close, ']', 11, 12, 0, 2, 0],
108
- 12 => [:set, :close, ']', 12, 13, 0, 1, 0],
109
- 13 => [:set, :close, ']', 13, 14, 0, 0, 0],
110
- },
111
- }
112
-
113
- tests.each_with_index do |(pattern, checks), count|
114
- define_method "test_lex_nesting_in_'#{pattern}'_#{count}" do
115
- tokens = RL.lex(pattern, 'ruby/1.9')
116
-
117
- checks.each do |offset, (type, token, text, ts, te, level, set_level, conditional_level)|
118
- struct = tokens.at(offset)
119
-
120
- assert_equal type, struct.type
121
- assert_equal token, struct.token
122
- assert_equal text, struct.text
123
- assert_equal ts, struct.ts
124
- assert_equal te, struct.te
125
- assert_equal level, struct.level
126
- assert_equal set_level, struct.set_level
127
- assert_equal conditional_level, struct.conditional_level
128
- end
129
- end
130
- end
131
-
132
- end