regexp_parser 2.8.2 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -4
- data/LICENSE +1 -1
- data/lib/regexp_parser/expression/classes/character_set.rb +1 -4
- data/lib/regexp_parser/expression/classes/posix_class.rb +0 -4
- data/lib/regexp_parser/expression/classes/unicode_property.rb +6 -9
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression.rb +1 -0
- data/lib/regexp_parser/parser.rb +1 -0
- data/lib/regexp_parser/scanner/properties/long.csv +18 -0
- data/lib/regexp_parser/scanner/properties/short.csv +1 -0
- data/lib/regexp_parser/scanner/scanner.rl +6 -5
- data/lib/regexp_parser/scanner.rb +6 -5
- data/lib/regexp_parser/syntax/token/unicode_property.rb +18 -0
- data/lib/regexp_parser/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8cc1826647cde51d6d1b5a5a58fb005efd2a38a85fa0e817616591ee2fad7862
|
4
|
+
data.tar.gz: 572a6203741b9970bcedc1ace243ea0b9c300ca60b71ac263036eb0f4222dd50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3521fe6dab4be0c0db3c37f3f8d196fc754ff72937336a73ef5547a15ae4f2d366aa28e73d6e5756920d610b943ee51cb2db8e51e53ccb19c1c235a8c45da708
|
7
|
+
data.tar.gz: d05b7babb79c118bdc36ae168d8199ee3500b0cff33cb00ed46d51a4a88725130e931c588146a3f989dd87778b1f39684b2c8a5541c9ac8f91427fc31b1ec97a
|
data/Gemfile
CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
-
gem 'leto', '~> 2.
|
7
|
-
gem 'rake', '~> 13.
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
6
|
+
gem 'leto', '~> 2.1'
|
7
|
+
gem 'rake', '~> 13.1'
|
8
|
+
gem 'regexp_property_values', '~> 1.5'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
11
|
gem 'benchmark-ips', '~> 2.1'
|
12
12
|
gem 'gouteur', '~> 1.1'
|
13
|
-
gem 'rubocop', '~> 1.
|
13
|
+
gem 'rubocop', '~> 1.59'
|
14
14
|
end
|
15
15
|
end
|
data/LICENSE
CHANGED
@@ -1,10 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
attr_accessor :closed, :negative
|
4
|
-
|
5
|
-
alias :negative? :negative
|
6
|
-
alias :negated? :negative
|
7
|
-
alias :closed? :closed
|
4
|
+
alias :closed? :closed
|
8
5
|
|
9
6
|
def initialize(token, options = {})
|
10
7
|
self.negative = false
|
@@ -1,10 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module UnicodeProperty
|
3
3
|
class Base < Regexp::Expression::Base
|
4
|
-
def negative?
|
5
|
-
type == :nonproperty
|
6
|
-
end
|
7
|
-
|
8
4
|
def name
|
9
5
|
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
10
6
|
end
|
@@ -109,11 +105,12 @@ module Regexp::Expression
|
|
109
105
|
class Unassigned < Codepoint::Base; end
|
110
106
|
end
|
111
107
|
|
112
|
-
class Age
|
113
|
-
class
|
114
|
-
class
|
115
|
-
class
|
116
|
-
class
|
108
|
+
class Age < UnicodeProperty::Base; end
|
109
|
+
class Block < UnicodeProperty::Base; end
|
110
|
+
class Derived < UnicodeProperty::Base; end
|
111
|
+
class Emoji < UnicodeProperty::Base; end
|
112
|
+
class Enumerated < UnicodeProperty::Base; end
|
113
|
+
class Script < UnicodeProperty::Base; end
|
117
114
|
end
|
118
115
|
|
119
116
|
# alias for symmetry between token symbol and Expression class name
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def negative?
|
4
|
+
false
|
5
|
+
end
|
6
|
+
|
7
|
+
# not an alias so as to respect overrides of #negative?
|
8
|
+
def negated?
|
9
|
+
negative?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
Anchor::NonWordBoundary.class_eval { def negative?; true end }
|
14
|
+
Assertion::NegativeLookahead.class_eval { def negative?; true end }
|
15
|
+
Assertion::NegativeLookbehind.class_eval { def negative?; true end }
|
16
|
+
CharacterSet.class_eval { def negative?; negative end }
|
17
|
+
CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
|
18
|
+
PosixClass.class_eval { def negative?; type == :nonposixclass end }
|
19
|
+
UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
|
20
|
+
end
|
@@ -28,6 +28,7 @@ require 'regexp_parser/expression/methods/construct'
|
|
28
28
|
require 'regexp_parser/expression/methods/human_name'
|
29
29
|
require 'regexp_parser/expression/methods/match'
|
30
30
|
require 'regexp_parser/expression/methods/match_length'
|
31
|
+
require 'regexp_parser/expression/methods/negative'
|
31
32
|
require 'regexp_parser/expression/methods/options'
|
32
33
|
require 'regexp_parser/expression/methods/parts'
|
33
34
|
require 'regexp_parser/expression/methods/printing'
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -467,6 +467,7 @@ class Regexp::Parser
|
|
467
467
|
when *UPTokens::Age; node << UP::Age.new(token, active_opts)
|
468
468
|
when *UPTokens::Derived; node << UP::Derived.new(token, active_opts)
|
469
469
|
when *UPTokens::Emoji; node << UP::Emoji.new(token, active_opts)
|
470
|
+
when *UPTokens::Enumerated; node << UP::Enumerated.new(token, active_opts)
|
470
471
|
when *UPTokens::Script; node << UP::Script.new(token, active_opts)
|
471
472
|
when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
|
472
473
|
|
@@ -8,6 +8,7 @@ age=12.1,age=12.1
|
|
8
8
|
age=13.0,age=13.0
|
9
9
|
age=14.0,age=14.0
|
10
10
|
age=15.0,age=15.0
|
11
|
+
age=15.1,age=15.1
|
11
12
|
age=2.0,age=2.0
|
12
13
|
age=2.1,age=2.1
|
13
14
|
age=3.0,age=3.0
|
@@ -108,6 +109,19 @@ gothic,gothic
|
|
108
109
|
grantha,grantha
|
109
110
|
graph,graph
|
110
111
|
graphemebase,grapheme_base
|
112
|
+
graphemeclusterbreak=control,grapheme_cluster_break=control
|
113
|
+
graphemeclusterbreak=cr,grapheme_cluster_break=cr
|
114
|
+
graphemeclusterbreak=extend,grapheme_cluster_break=extend
|
115
|
+
graphemeclusterbreak=l,grapheme_cluster_break=l
|
116
|
+
graphemeclusterbreak=lf,grapheme_cluster_break=lf
|
117
|
+
graphemeclusterbreak=lv,grapheme_cluster_break=lv
|
118
|
+
graphemeclusterbreak=lvt,grapheme_cluster_break=lvt
|
119
|
+
graphemeclusterbreak=prepend,grapheme_cluster_break=prepend
|
120
|
+
graphemeclusterbreak=regionalindicator,grapheme_cluster_break=regional_indicator
|
121
|
+
graphemeclusterbreak=spacingmark,grapheme_cluster_break=spacingmark
|
122
|
+
graphemeclusterbreak=t,grapheme_cluster_break=t
|
123
|
+
graphemeclusterbreak=v,grapheme_cluster_break=v
|
124
|
+
graphemeclusterbreak=zwj,grapheme_cluster_break=zwj
|
111
125
|
graphemeextend,grapheme_extend
|
112
126
|
graphemelink,grapheme_link
|
113
127
|
greek,greek
|
@@ -123,11 +137,14 @@ hebrew,hebrew
|
|
123
137
|
hexdigit,hex_digit
|
124
138
|
hiragana,hiragana
|
125
139
|
hyphen,hyphen
|
140
|
+
idcompatmathcontinue,id_compat_math_continue
|
141
|
+
idcompatmathstart,id_compat_math_start
|
126
142
|
idcontinue,id_continue
|
127
143
|
ideographic,ideographic
|
128
144
|
idsbinaryoperator,ids_binary_operator
|
129
145
|
idstart,id_start
|
130
146
|
idstrinaryoperator,ids_trinary_operator
|
147
|
+
idsunaryoperator,ids_unary_operator
|
131
148
|
imperialaramaic,imperial_aramaic
|
132
149
|
inadlam,in_adlam
|
133
150
|
inaegeannumbers,in_aegean_numbers
|
@@ -190,6 +207,7 @@ incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
|
|
190
207
|
incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
|
191
208
|
incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
|
192
209
|
incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
|
210
|
+
incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
|
193
211
|
incombiningdiacriticalmarks,in_combining_diacritical_marks
|
194
212
|
incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
|
195
213
|
incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
|
@@ -364,6 +364,7 @@
|
|
364
364
|
conditional_expression := |*
|
365
365
|
group_lookup . ')' {
|
366
366
|
text = copy(data, ts, te-1)
|
367
|
+
text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID')
|
367
368
|
emit(:conditional, :condition, text)
|
368
369
|
emit(:conditional, :condition_close, ')')
|
369
370
|
};
|
@@ -541,13 +542,13 @@
|
|
541
542
|
case text = copy(data, ts, te)
|
542
543
|
when /^\\k(.)[^0-9\-][^+\-]*['>]$/
|
543
544
|
emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
|
544
|
-
when /^\\k(.)[1-9]\d*['>]$/
|
545
|
+
when /^\\k(.)0*[1-9]\d*['>]$/
|
545
546
|
emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
|
546
|
-
when /^\\k(.)-[1-9]\d*['>]$/
|
547
|
+
when /^\\k(.)-0*[1-9]\d*['>]$/
|
547
548
|
emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
|
548
549
|
when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
|
549
550
|
emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
|
550
|
-
when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
|
551
|
+
when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/
|
551
552
|
emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
|
552
553
|
else
|
553
554
|
raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
|
@@ -560,9 +561,9 @@
|
|
560
561
|
case text = copy(data, ts, te)
|
561
562
|
when /^\\g(.)[^0-9+\-].*['>]$/
|
562
563
|
emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
|
563
|
-
when /^\\g(.)\d
|
564
|
+
when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/
|
564
565
|
emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
|
565
|
-
when /^\\g(.)[+-]\d
|
566
|
+
when /^\\g(.)[+-]0*[1-9]\d*/
|
566
567
|
emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
|
567
568
|
else
|
568
569
|
raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
|
@@ -1570,6 +1570,7 @@ end
|
|
1570
1570
|
te = p+1
|
1571
1571
|
begin
|
1572
1572
|
text = copy(data, ts, te-1)
|
1573
|
+
text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID')
|
1573
1574
|
emit(:conditional, :condition, text)
|
1574
1575
|
emit(:conditional, :condition_close, ')')
|
1575
1576
|
end
|
@@ -1750,13 +1751,13 @@ te = p+1
|
|
1750
1751
|
case text = copy(data, ts, te)
|
1751
1752
|
when /^\\k(.)[^0-9\-][^+\-]*['>]$/
|
1752
1753
|
emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
|
1753
|
-
when /^\\k(.)[1-9]\d*['>]$/
|
1754
|
+
when /^\\k(.)0*[1-9]\d*['>]$/
|
1754
1755
|
emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
|
1755
|
-
when /^\\k(.)-[1-9]\d*['>]$/
|
1756
|
+
when /^\\k(.)-0*[1-9]\d*['>]$/
|
1756
1757
|
emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
|
1757
1758
|
when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
|
1758
1759
|
emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
|
1759
|
-
when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
|
1760
|
+
when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/
|
1760
1761
|
emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
|
1761
1762
|
else
|
1762
1763
|
raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
|
@@ -1770,9 +1771,9 @@ te = p+1
|
|
1770
1771
|
case text = copy(data, ts, te)
|
1771
1772
|
when /^\\g(.)[^0-9+\-].*['>]$/
|
1772
1773
|
emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
|
1773
|
-
when /^\\g(.)\d
|
1774
|
+
when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/
|
1774
1775
|
emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
|
1775
|
-
when /^\\g(.)[+-]\d
|
1776
|
+
when /^\\g(.)[+-]0*[1-9]\d*/
|
1776
1777
|
emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
|
1777
1778
|
else
|
1778
1779
|
raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
|
@@ -703,6 +703,24 @@ module Regexp::Syntax
|
|
703
703
|
extended_pictographic
|
704
704
|
]
|
705
705
|
|
706
|
+
Enumerated_V2_4_0 = %i[
|
707
|
+
grapheme_cluster_break=control
|
708
|
+
grapheme_cluster_break=cr
|
709
|
+
grapheme_cluster_break=extend
|
710
|
+
grapheme_cluster_break=l
|
711
|
+
grapheme_cluster_break=lf
|
712
|
+
grapheme_cluster_break=lv
|
713
|
+
grapheme_cluster_break=lvt
|
714
|
+
grapheme_cluster_break=prepend
|
715
|
+
grapheme_cluster_break=regional_indicator
|
716
|
+
grapheme_cluster_break=spacingmark
|
717
|
+
grapheme_cluster_break=t
|
718
|
+
grapheme_cluster_break=v
|
719
|
+
grapheme_cluster_break=zwj
|
720
|
+
]
|
721
|
+
|
722
|
+
Enumerated = all[:Enumerated_V]
|
723
|
+
|
706
724
|
Emoji = all[:Emoji_V]
|
707
725
|
|
708
726
|
V1_9_0 = Category::All + POSIX + all[:V1_9_0]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2024-01-07 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
15
15
|
email:
|
@@ -46,6 +46,7 @@ files:
|
|
46
46
|
- lib/regexp_parser/expression/methods/human_name.rb
|
47
47
|
- lib/regexp_parser/expression/methods/match.rb
|
48
48
|
- lib/regexp_parser/expression/methods/match_length.rb
|
49
|
+
- lib/regexp_parser/expression/methods/negative.rb
|
49
50
|
- lib/regexp_parser/expression/methods/options.rb
|
50
51
|
- lib/regexp_parser/expression/methods/parts.rb
|
51
52
|
- lib/regexp_parser/expression/methods/printing.rb
|