regexp_parser 2.1.1 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +15 -21
  4. data/Rakefile +5 -11
  5. data/lib/regexp_parser/expression/base.rb +123 -0
  6. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  7. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
  8. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  9. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
  10. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  11. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
  12. data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
  13. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  14. data/lib/regexp_parser/expression/classes/property.rb +0 -2
  15. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  16. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  17. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  18. data/lib/regexp_parser/expression/sequence.rb +0 -1
  19. data/lib/regexp_parser/expression/subexpression.rb +0 -1
  20. data/lib/regexp_parser/expression.rb +6 -130
  21. data/lib/regexp_parser/lexer.rb +7 -5
  22. data/lib/regexp_parser/scanner/properties/long.yml +13 -0
  23. data/lib/regexp_parser/scanner/properties/short.yml +9 -1
  24. data/lib/regexp_parser/syntax/any.rb +1 -3
  25. data/lib/regexp_parser/syntax/base.rb +9 -9
  26. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  27. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  28. data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
  29. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  30. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  31. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  32. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  33. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  34. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  35. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  36. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  37. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  38. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  39. data/lib/regexp_parser/syntax/token.rb +45 -0
  40. data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
  41. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  42. data/lib/regexp_parser/syntax.rb +1 -1
  43. data/lib/regexp_parser/token.rb +9 -20
  44. data/lib/regexp_parser/version.rb +1 -1
  45. data/lib/regexp_parser.rb +0 -2
  46. data/spec/lexer/nesting_spec.rb +2 -2
  47. data/spec/parser/escapes_spec.rb +43 -31
  48. data/spec/parser/properties_spec.rb +6 -4
  49. data/spec/parser/set/ranges_spec.rb +26 -16
  50. data/spec/scanner/escapes_spec.rb +28 -19
  51. data/spec/scanner/sets_spec.rb +9 -9
  52. data/spec/spec_helper.rb +13 -1
  53. data/spec/support/capturing_stderr.rb +9 -0
  54. data/spec/syntax/versions/1.8.6_spec.rb +2 -2
  55. data/spec/syntax/versions/2.0.0_spec.rb +2 -2
  56. data/spec/syntax/versions/aliases_spec.rb +1 -0
  57. metadata +26 -26
  58. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  59. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  60. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  61. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  62. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  63. data/spec/support/runner.rb +0 -42
  64. data/spec/support/warning_extractor.rb +0 -60
@@ -28,6 +28,7 @@ cari: carian
28
28
  cc: control
29
29
  cf: format
30
30
  cher: cherokee
31
+ chrs: chorasmian
31
32
  ci: case_ignorable
32
33
  cn: unassigned
33
34
  co: private_use
@@ -45,12 +46,17 @@ dep: deprecated
45
46
  deva: devanagari
46
47
  di: default_ignorable_code_point
47
48
  dia: diacritic
49
+ diak: dives_akuru
48
50
  dogr: dogra
49
51
  dsrt: deseret
50
52
  dupl: duployan
53
+ ebase: emoji_modifier_base
54
+ ecomp: emoji_component
51
55
  egyp: egyptian_hieroglyphs
52
56
  elba: elbasan
53
57
  elym: elymaic
58
+ emod: emoji_modifier
59
+ epres: emoji_presentation
54
60
  ethi: ethiopic
55
61
  ext: extender
56
62
  geor: georgian
@@ -89,6 +95,7 @@ kana: katakana
89
95
  khar: kharoshthi
90
96
  khmr: khmer
91
97
  khoj: khojki
98
+ kits: khitan_small_script
92
99
  knda: kannada
93
100
  kthi: kaithi
94
101
  l: letter
@@ -127,7 +134,7 @@ mroo: mro
127
134
  mtei: meetei_mayek
128
135
  mult: multani
129
136
  mymr: myanmar
130
- n: number
137
+ "n": number
131
138
  nand: nandinagari
132
139
  narb: old_north_arabian
133
140
  nbat: nabataean
@@ -226,6 +233,7 @@ xidc: xid_continue
226
233
  xids: xid_start
227
234
  xpeo: old_persian
228
235
  xsux: cuneiform
236
+ yezi: yezidi
229
237
  yiii: yi
230
238
  z: separator
231
239
  zanb: zanabazar_square
@@ -1,15 +1,13 @@
1
1
  module Regexp::Syntax
2
-
3
2
  # A syntax that always returns true, passing all tokens as implemented. This
4
3
  # is useful during development, testing, and should be useful for some types
5
4
  # of transformations as well.
6
5
  class Any < Base
7
6
  def initialize # rubocop:disable Lint/MissingSuper
8
- @implements = { :* => [:*] }
7
+ @implements = { :* => %i[*] }
9
8
  end
10
9
 
11
10
  def implements?(_type, _token) true end
12
11
  def implements!(_type, _token) true end
13
12
  end
14
-
15
13
  end
@@ -59,7 +59,7 @@ module Regexp::Syntax
59
59
  def normalize_group(type, token)
60
60
  case token
61
61
  when :named_ab, :named_sq
62
- [:group, :named]
62
+ %i[group named]
63
63
  else
64
64
  [type, token]
65
65
  end
@@ -68,21 +68,21 @@ module Regexp::Syntax
68
68
  def normalize_backref(type, token)
69
69
  case token
70
70
  when :name_ref_ab, :name_ref_sq
71
- [:backref, :name_ref]
71
+ %i[backref name_ref]
72
72
  when :name_call_ab, :name_call_sq
73
- [:backref, :name_call]
73
+ %i[backref name_call]
74
74
  when :name_recursion_ref_ab, :name_recursion_ref_sq
75
- [:backref, :name_recursion_ref]
75
+ %i[backref name_recursion_ref]
76
76
  when :number_ref_ab, :number_ref_sq
77
- [:backref, :number_ref]
77
+ %i[backref number_ref]
78
78
  when :number_call_ab, :number_call_sq
79
- [:backref, :number_call]
79
+ %i[backref number_call]
80
80
  when :number_rel_ref_ab, :number_rel_ref_sq
81
- [:backref, :number_rel_ref]
81
+ %i[backref number_rel_ref]
82
82
  when :number_rel_call_ab, :number_rel_call_sq
83
- [:backref, :number_rel_call]
83
+ %i[backref number_rel_call]
84
84
  when :number_recursion_ref_ab, :number_recursion_ref_sq
85
- [:backref, :number_recursion_ref]
85
+ %i[backref number_recursion_ref]
86
86
  else
87
87
  [type, token]
88
88
  end
@@ -0,0 +1,15 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Anchor
4
+ Basic = %i[bol eol]
5
+ Extended = Basic + %i[word_boundary nonword_boundary]
6
+ String = %i[bos eos eos_ob_eol]
7
+ MatchStart = %i[match_start]
8
+
9
+ All = Extended + String + MatchStart
10
+ Type = :anchor
11
+ end
12
+
13
+ Map[Anchor::Type] = Anchor::All
14
+ end
15
+ end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Assertion
4
- Lookahead = [:lookahead, :nlookahead]
5
- Lookbehind = [:lookbehind, :nlookbehind]
4
+ Lookahead = %i[lookahead nlookahead]
5
+ Lookbehind = %i[lookbehind nlookbehind]
6
6
 
7
7
  All = Lookahead + Lookbehind
8
8
  Type = :assertion
@@ -1,10 +1,11 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Backreference
4
- Name = [:name_ref]
5
- Number = [:number, :number_ref, :number_rel_ref]
4
+ Plain = %i[number]
5
+ Number = Plain + %i[number_ref number_rel_ref]
6
+ Name = %i[name_ref]
6
7
 
7
- RecursionLevel = [:name_recursion_ref, :number_recursion_ref]
8
+ RecursionLevel = %i[name_recursion_ref number_recursion_ref]
8
9
 
9
10
  All = Name + Number + RecursionLevel
10
11
  Type = :backref
@@ -12,8 +13,8 @@ module Regexp::Syntax
12
13
 
13
14
  # Type is the same as Backreference so keeping it here, for now.
14
15
  module SubexpressionCall
15
- Name = [:name_call]
16
- Number = [:number_call, :number_rel_call]
16
+ Name = %i[name_call]
17
+ Number = %i[number_call number_rel_call]
17
18
 
18
19
  All = Name + Number
19
20
  end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module CharacterSet
4
- Basic = [:open, :close, :negate, :range]
5
- Extended = Basic + [:intersection]
4
+ Basic = %i[open close negate range]
5
+ Extended = Basic + %i[intersection]
6
6
 
7
7
  All = Extended
8
8
  Type = :set
@@ -2,10 +2,10 @@ module Regexp::Syntax
2
2
  module Token
3
3
  module CharacterType
4
4
  Basic = []
5
- Extended = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
6
- Hex = [:hex, :nonhex]
5
+ Extended = %i[digit nondigit space nonspace word nonword]
6
+ Hex = %i[hex nonhex]
7
7
 
8
- Clustered = [:linebreak, :xgrapheme]
8
+ Clustered = %i[linebreak xgrapheme]
9
9
 
10
10
  All = Basic + Extended + Hex + Clustered
11
11
  Type = :type
@@ -1,10 +1,10 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Conditional
4
- Delimiters = [:open, :close]
4
+ Delimiters = %i[open close]
5
5
 
6
- Condition = [:condition_open, :condition, :condition_close]
7
- Separator = [:separator]
6
+ Condition = %i[condition_open condition condition_close]
7
+ Separator = %i[separator]
8
8
 
9
9
  All = Conditional::Delimiters + Conditional::Condition + Conditional::Separator
10
10
 
@@ -0,0 +1,31 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ # TODO: unify naming with RE::EscapeSequence, on way or the other, in v3.0.0
4
+ module Escape
5
+ Basic = %i[backslash literal]
6
+
7
+ Control = %i[control meta_sequence]
8
+
9
+ ASCII = %i[bell backspace escape form_feed newline carriage
10
+ tab vertical_tab]
11
+
12
+ Unicode = %i[codepoint codepoint_list]
13
+
14
+ Meta = %i[dot alternation
15
+ zero_or_one zero_or_more one_or_more
16
+ bol eol
17
+ group_open group_close
18
+ interval_open interval_close
19
+ set_open set_close]
20
+
21
+ Hex = %i[hex]
22
+
23
+ Octal = %i[octal]
24
+
25
+ All = Basic + Control + ASCII + Unicode + Meta + Hex + Octal
26
+ Type = :escape
27
+ end
28
+
29
+ Map[Escape::Type] = Escape::All
30
+ end
31
+ end
@@ -1,18 +1,18 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Group
4
- Basic = [:capture, :close]
5
- Extended = Basic + [:options, :options_switch]
4
+ Basic = %i[capture close]
5
+ Extended = Basic + %i[options options_switch]
6
6
 
7
- Named = [:named]
8
- Atomic = [:atomic]
9
- Passive = [:passive]
10
- Comment = [:comment]
7
+ Named = %i[named]
8
+ Atomic = %i[atomic]
9
+ Passive = %i[passive]
10
+ Comment = %i[comment]
11
11
 
12
12
  V1_8_6 = Group::Extended + Group::Named + Group::Atomic +
13
13
  Group::Passive + Group::Comment
14
14
 
15
- V2_4_1 = [:absence]
15
+ V2_4_1 = %i[absence]
16
16
 
17
17
  All = V1_8_6 + V2_4_1
18
18
  Type = :group
@@ -1,7 +1,7 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Keep
4
- Mark = [:mark]
4
+ Mark = %i[mark]
5
5
 
6
6
  All = Mark
7
7
  Type = :keep
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Meta
4
- Basic = [:dot]
5
- Extended = Basic + [:alternation]
4
+ Basic = %i[dot]
5
+ Extended = Basic + %i[alternation]
6
6
 
7
7
  All = Extended
8
8
  Type = :meta
@@ -1,10 +1,10 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module PosixClass
4
- Standard = [:alnum, :alpha, :blank, :cntrl, :digit, :graph,
5
- :lower, :print, :punct, :space, :upper, :xdigit]
4
+ Standard = %i[alnum alpha blank cntrl digit graph
5
+ lower print punct space upper xdigit]
6
6
 
7
- Extensions = [:ascii, :word]
7
+ Extensions = %i[ascii word]
8
8
 
9
9
  All = Standard + Extensions
10
10
  Type = :posixclass
@@ -0,0 +1,35 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Quantifier
4
+ Greedy = %i[
5
+ zero_or_one
6
+ zero_or_more
7
+ one_or_more
8
+ ]
9
+
10
+ Reluctant = %i[
11
+ zero_or_one_reluctant
12
+ zero_or_more_reluctant
13
+ one_or_more_reluctant
14
+ ]
15
+
16
+ Possessive = %i[
17
+ zero_or_one_possessive
18
+ zero_or_more_possessive
19
+ one_or_more_possessive
20
+ ]
21
+
22
+ Interval = %i[interval]
23
+ IntervalReluctant = %i[interval_reluctant]
24
+ IntervalPossessive = %i[interval_possessive]
25
+
26
+ IntervalAll = Interval + IntervalReluctant +
27
+ IntervalPossessive
28
+
29
+ All = Greedy + Reluctant + Possessive + IntervalAll
30
+ Type = :quantifier
31
+ end
32
+
33
+ Map[Quantifier::Type] = Quantifier::All
34
+ end
35
+ end