regexp_parser 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +15 -21
  4. data/Rakefile +5 -11
  5. data/lib/regexp_parser/expression/base.rb +123 -0
  6. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  7. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
  8. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  9. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
  10. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  11. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
  12. data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
  13. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  14. data/lib/regexp_parser/expression/classes/property.rb +0 -2
  15. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  16. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  17. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  18. data/lib/regexp_parser/expression/sequence.rb +0 -1
  19. data/lib/regexp_parser/expression/subexpression.rb +0 -1
  20. data/lib/regexp_parser/expression.rb +6 -130
  21. data/lib/regexp_parser/lexer.rb +7 -5
  22. data/lib/regexp_parser/scanner/properties/long.yml +13 -0
  23. data/lib/regexp_parser/scanner/properties/short.yml +9 -1
  24. data/lib/regexp_parser/syntax/any.rb +1 -3
  25. data/lib/regexp_parser/syntax/base.rb +9 -9
  26. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  27. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  28. data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
  29. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  30. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  31. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  32. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  33. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  34. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  35. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  36. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  37. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  38. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  39. data/lib/regexp_parser/syntax/token.rb +45 -0
  40. data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
  41. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  42. data/lib/regexp_parser/syntax.rb +1 -1
  43. data/lib/regexp_parser/token.rb +9 -20
  44. data/lib/regexp_parser/version.rb +1 -1
  45. data/lib/regexp_parser.rb +0 -2
  46. data/spec/lexer/nesting_spec.rb +2 -2
  47. data/spec/parser/escapes_spec.rb +43 -31
  48. data/spec/parser/properties_spec.rb +6 -4
  49. data/spec/parser/set/ranges_spec.rb +26 -16
  50. data/spec/scanner/escapes_spec.rb +28 -19
  51. data/spec/scanner/sets_spec.rb +9 -9
  52. data/spec/spec_helper.rb +13 -1
  53. data/spec/support/capturing_stderr.rb +9 -0
  54. data/spec/syntax/versions/1.8.6_spec.rb +2 -2
  55. data/spec/syntax/versions/2.0.0_spec.rb +2 -2
  56. data/spec/syntax/versions/aliases_spec.rb +1 -0
  57. metadata +26 -26
  58. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  59. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  60. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  61. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  62. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  63. data/spec/support/runner.rb +0 -42
  64. data/spec/support/warning_extractor.rb +0 -60
@@ -28,6 +28,7 @@ cari: carian
28
28
  cc: control
29
29
  cf: format
30
30
  cher: cherokee
31
+ chrs: chorasmian
31
32
  ci: case_ignorable
32
33
  cn: unassigned
33
34
  co: private_use
@@ -45,12 +46,17 @@ dep: deprecated
45
46
  deva: devanagari
46
47
  di: default_ignorable_code_point
47
48
  dia: diacritic
49
+ diak: dives_akuru
48
50
  dogr: dogra
49
51
  dsrt: deseret
50
52
  dupl: duployan
53
+ ebase: emoji_modifier_base
54
+ ecomp: emoji_component
51
55
  egyp: egyptian_hieroglyphs
52
56
  elba: elbasan
53
57
  elym: elymaic
58
+ emod: emoji_modifier
59
+ epres: emoji_presentation
54
60
  ethi: ethiopic
55
61
  ext: extender
56
62
  geor: georgian
@@ -89,6 +95,7 @@ kana: katakana
89
95
  khar: kharoshthi
90
96
  khmr: khmer
91
97
  khoj: khojki
98
+ kits: khitan_small_script
92
99
  knda: kannada
93
100
  kthi: kaithi
94
101
  l: letter
@@ -127,7 +134,7 @@ mroo: mro
127
134
  mtei: meetei_mayek
128
135
  mult: multani
129
136
  mymr: myanmar
130
- n: number
137
+ "n": number
131
138
  nand: nandinagari
132
139
  narb: old_north_arabian
133
140
  nbat: nabataean
@@ -226,6 +233,7 @@ xidc: xid_continue
226
233
  xids: xid_start
227
234
  xpeo: old_persian
228
235
  xsux: cuneiform
236
+ yezi: yezidi
229
237
  yiii: yi
230
238
  z: separator
231
239
  zanb: zanabazar_square
@@ -1,15 +1,13 @@
1
1
  module Regexp::Syntax
2
-
3
2
  # A syntax that always returns true, passing all tokens as implemented. This
4
3
  # is useful during development, testing, and should be useful for some types
5
4
  # of transformations as well.
6
5
  class Any < Base
7
6
  def initialize # rubocop:disable Lint/MissingSuper
8
- @implements = { :* => [:*] }
7
+ @implements = { :* => %i[*] }
9
8
  end
10
9
 
11
10
  def implements?(_type, _token) true end
12
11
  def implements!(_type, _token) true end
13
12
  end
14
-
15
13
  end
@@ -59,7 +59,7 @@ module Regexp::Syntax
59
59
  def normalize_group(type, token)
60
60
  case token
61
61
  when :named_ab, :named_sq
62
- [:group, :named]
62
+ %i[group named]
63
63
  else
64
64
  [type, token]
65
65
  end
@@ -68,21 +68,21 @@ module Regexp::Syntax
68
68
  def normalize_backref(type, token)
69
69
  case token
70
70
  when :name_ref_ab, :name_ref_sq
71
- [:backref, :name_ref]
71
+ %i[backref name_ref]
72
72
  when :name_call_ab, :name_call_sq
73
- [:backref, :name_call]
73
+ %i[backref name_call]
74
74
  when :name_recursion_ref_ab, :name_recursion_ref_sq
75
- [:backref, :name_recursion_ref]
75
+ %i[backref name_recursion_ref]
76
76
  when :number_ref_ab, :number_ref_sq
77
- [:backref, :number_ref]
77
+ %i[backref number_ref]
78
78
  when :number_call_ab, :number_call_sq
79
- [:backref, :number_call]
79
+ %i[backref number_call]
80
80
  when :number_rel_ref_ab, :number_rel_ref_sq
81
- [:backref, :number_rel_ref]
81
+ %i[backref number_rel_ref]
82
82
  when :number_rel_call_ab, :number_rel_call_sq
83
- [:backref, :number_rel_call]
83
+ %i[backref number_rel_call]
84
84
  when :number_recursion_ref_ab, :number_recursion_ref_sq
85
- [:backref, :number_recursion_ref]
85
+ %i[backref number_recursion_ref]
86
86
  else
87
87
  [type, token]
88
88
  end
@@ -0,0 +1,15 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Anchor
4
+ Basic = %i[bol eol]
5
+ Extended = Basic + %i[word_boundary nonword_boundary]
6
+ String = %i[bos eos eos_ob_eol]
7
+ MatchStart = %i[match_start]
8
+
9
+ All = Extended + String + MatchStart
10
+ Type = :anchor
11
+ end
12
+
13
+ Map[Anchor::Type] = Anchor::All
14
+ end
15
+ end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Assertion
4
- Lookahead = [:lookahead, :nlookahead]
5
- Lookbehind = [:lookbehind, :nlookbehind]
4
+ Lookahead = %i[lookahead nlookahead]
5
+ Lookbehind = %i[lookbehind nlookbehind]
6
6
 
7
7
  All = Lookahead + Lookbehind
8
8
  Type = :assertion
@@ -1,10 +1,11 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Backreference
4
- Name = [:name_ref]
5
- Number = [:number, :number_ref, :number_rel_ref]
4
+ Plain = %i[number]
5
+ Number = Plain + %i[number_ref number_rel_ref]
6
+ Name = %i[name_ref]
6
7
 
7
- RecursionLevel = [:name_recursion_ref, :number_recursion_ref]
8
+ RecursionLevel = %i[name_recursion_ref number_recursion_ref]
8
9
 
9
10
  All = Name + Number + RecursionLevel
10
11
  Type = :backref
@@ -12,8 +13,8 @@ module Regexp::Syntax
12
13
 
13
14
  # Type is the same as Backreference so keeping it here, for now.
14
15
  module SubexpressionCall
15
- Name = [:name_call]
16
- Number = [:number_call, :number_rel_call]
16
+ Name = %i[name_call]
17
+ Number = %i[number_call number_rel_call]
17
18
 
18
19
  All = Name + Number
19
20
  end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module CharacterSet
4
- Basic = [:open, :close, :negate, :range]
5
- Extended = Basic + [:intersection]
4
+ Basic = %i[open close negate range]
5
+ Extended = Basic + %i[intersection]
6
6
 
7
7
  All = Extended
8
8
  Type = :set
@@ -2,10 +2,10 @@ module Regexp::Syntax
2
2
  module Token
3
3
  module CharacterType
4
4
  Basic = []
5
- Extended = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
6
- Hex = [:hex, :nonhex]
5
+ Extended = %i[digit nondigit space nonspace word nonword]
6
+ Hex = %i[hex nonhex]
7
7
 
8
- Clustered = [:linebreak, :xgrapheme]
8
+ Clustered = %i[linebreak xgrapheme]
9
9
 
10
10
  All = Basic + Extended + Hex + Clustered
11
11
  Type = :type
@@ -1,10 +1,10 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Conditional
4
- Delimiters = [:open, :close]
4
+ Delimiters = %i[open close]
5
5
 
6
- Condition = [:condition_open, :condition, :condition_close]
7
- Separator = [:separator]
6
+ Condition = %i[condition_open condition condition_close]
7
+ Separator = %i[separator]
8
8
 
9
9
  All = Conditional::Delimiters + Conditional::Condition + Conditional::Separator
10
10
 
@@ -0,0 +1,31 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ # TODO: unify naming with RE::EscapeSequence, on way or the other, in v3.0.0
4
+ module Escape
5
+ Basic = %i[backslash literal]
6
+
7
+ Control = %i[control meta_sequence]
8
+
9
+ ASCII = %i[bell backspace escape form_feed newline carriage
10
+ tab vertical_tab]
11
+
12
+ Unicode = %i[codepoint codepoint_list]
13
+
14
+ Meta = %i[dot alternation
15
+ zero_or_one zero_or_more one_or_more
16
+ bol eol
17
+ group_open group_close
18
+ interval_open interval_close
19
+ set_open set_close]
20
+
21
+ Hex = %i[hex]
22
+
23
+ Octal = %i[octal]
24
+
25
+ All = Basic + Control + ASCII + Unicode + Meta + Hex + Octal
26
+ Type = :escape
27
+ end
28
+
29
+ Map[Escape::Type] = Escape::All
30
+ end
31
+ end
@@ -1,18 +1,18 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Group
4
- Basic = [:capture, :close]
5
- Extended = Basic + [:options, :options_switch]
4
+ Basic = %i[capture close]
5
+ Extended = Basic + %i[options options_switch]
6
6
 
7
- Named = [:named]
8
- Atomic = [:atomic]
9
- Passive = [:passive]
10
- Comment = [:comment]
7
+ Named = %i[named]
8
+ Atomic = %i[atomic]
9
+ Passive = %i[passive]
10
+ Comment = %i[comment]
11
11
 
12
12
  V1_8_6 = Group::Extended + Group::Named + Group::Atomic +
13
13
  Group::Passive + Group::Comment
14
14
 
15
- V2_4_1 = [:absence]
15
+ V2_4_1 = %i[absence]
16
16
 
17
17
  All = V1_8_6 + V2_4_1
18
18
  Type = :group
@@ -1,7 +1,7 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Keep
4
- Mark = [:mark]
4
+ Mark = %i[mark]
5
5
 
6
6
  All = Mark
7
7
  Type = :keep
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Meta
4
- Basic = [:dot]
5
- Extended = Basic + [:alternation]
4
+ Basic = %i[dot]
5
+ Extended = Basic + %i[alternation]
6
6
 
7
7
  All = Extended
8
8
  Type = :meta
@@ -1,10 +1,10 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module PosixClass
4
- Standard = [:alnum, :alpha, :blank, :cntrl, :digit, :graph,
5
- :lower, :print, :punct, :space, :upper, :xdigit]
4
+ Standard = %i[alnum alpha blank cntrl digit graph
5
+ lower print punct space upper xdigit]
6
6
 
7
- Extensions = [:ascii, :word]
7
+ Extensions = %i[ascii word]
8
8
 
9
9
  All = Standard + Extensions
10
10
  Type = :posixclass
@@ -0,0 +1,35 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Quantifier
4
+ Greedy = %i[
5
+ zero_or_one
6
+ zero_or_more
7
+ one_or_more
8
+ ]
9
+
10
+ Reluctant = %i[
11
+ zero_or_one_reluctant
12
+ zero_or_more_reluctant
13
+ one_or_more_reluctant
14
+ ]
15
+
16
+ Possessive = %i[
17
+ zero_or_one_possessive
18
+ zero_or_more_possessive
19
+ one_or_more_possessive
20
+ ]
21
+
22
+ Interval = %i[interval]
23
+ IntervalReluctant = %i[interval_reluctant]
24
+ IntervalPossessive = %i[interval_possessive]
25
+
26
+ IntervalAll = Interval + IntervalReluctant +
27
+ IntervalPossessive
28
+
29
+ All = Greedy + Reluctant + Possessive + IntervalAll
30
+ Type = :quantifier
31
+ end
32
+
33
+ Map[Quantifier::Type] = Quantifier::All
34
+ end
35
+ end