regexp_parser 2.8.2 → 2.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -4
- data/LICENSE +1 -1
- data/lib/regexp_parser/expression/classes/character_set.rb +1 -4
- data/lib/regexp_parser/expression/classes/posix_class.rb +0 -4
- data/lib/regexp_parser/expression/classes/unicode_property.rb +6 -9
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression.rb +1 -0
- data/lib/regexp_parser/parser.rb +1 -0
- data/lib/regexp_parser/scanner/properties/long.csv +18 -0
- data/lib/regexp_parser/scanner/properties/short.csv +1 -0
- data/lib/regexp_parser/scanner/scanner.rl +6 -5
- data/lib/regexp_parser/scanner.rb +6 -5
- data/lib/regexp_parser/syntax/token/unicode_property.rb +18 -0
- data/lib/regexp_parser/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8cc1826647cde51d6d1b5a5a58fb005efd2a38a85fa0e817616591ee2fad7862
|
4
|
+
data.tar.gz: 572a6203741b9970bcedc1ace243ea0b9c300ca60b71ac263036eb0f4222dd50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3521fe6dab4be0c0db3c37f3f8d196fc754ff72937336a73ef5547a15ae4f2d366aa28e73d6e5756920d610b943ee51cb2db8e51e53ccb19c1c235a8c45da708
|
7
|
+
data.tar.gz: d05b7babb79c118bdc36ae168d8199ee3500b0cff33cb00ed46d51a4a88725130e931c588146a3f989dd87778b1f39684b2c8a5541c9ac8f91427fc31b1ec97a
|
data/Gemfile
CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
-
gem 'leto', '~> 2.
|
7
|
-
gem 'rake', '~> 13.
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
6
|
+
gem 'leto', '~> 2.1'
|
7
|
+
gem 'rake', '~> 13.1'
|
8
|
+
gem 'regexp_property_values', '~> 1.5'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
11
|
gem 'benchmark-ips', '~> 2.1'
|
12
12
|
gem 'gouteur', '~> 1.1'
|
13
|
-
gem 'rubocop', '~> 1.
|
13
|
+
gem 'rubocop', '~> 1.59'
|
14
14
|
end
|
15
15
|
end
|
data/LICENSE
CHANGED
@@ -1,10 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
attr_accessor :closed, :negative
|
4
|
-
|
5
|
-
alias :negative? :negative
|
6
|
-
alias :negated? :negative
|
7
|
-
alias :closed? :closed
|
4
|
+
alias :closed? :closed
|
8
5
|
|
9
6
|
def initialize(token, options = {})
|
10
7
|
self.negative = false
|
@@ -1,10 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module UnicodeProperty
|
3
3
|
class Base < Regexp::Expression::Base
|
4
|
-
def negative?
|
5
|
-
type == :nonproperty
|
6
|
-
end
|
7
|
-
|
8
4
|
def name
|
9
5
|
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
10
6
|
end
|
@@ -109,11 +105,12 @@ module Regexp::Expression
|
|
109
105
|
class Unassigned < Codepoint::Base; end
|
110
106
|
end
|
111
107
|
|
112
|
-
class Age
|
113
|
-
class
|
114
|
-
class
|
115
|
-
class
|
116
|
-
class
|
108
|
+
class Age < UnicodeProperty::Base; end
|
109
|
+
class Block < UnicodeProperty::Base; end
|
110
|
+
class Derived < UnicodeProperty::Base; end
|
111
|
+
class Emoji < UnicodeProperty::Base; end
|
112
|
+
class Enumerated < UnicodeProperty::Base; end
|
113
|
+
class Script < UnicodeProperty::Base; end
|
117
114
|
end
|
118
115
|
|
119
116
|
# alias for symmetry between token symbol and Expression class name
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def negative?
|
4
|
+
false
|
5
|
+
end
|
6
|
+
|
7
|
+
# not an alias so as to respect overrides of #negative?
|
8
|
+
def negated?
|
9
|
+
negative?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
Anchor::NonWordBoundary.class_eval { def negative?; true end }
|
14
|
+
Assertion::NegativeLookahead.class_eval { def negative?; true end }
|
15
|
+
Assertion::NegativeLookbehind.class_eval { def negative?; true end }
|
16
|
+
CharacterSet.class_eval { def negative?; negative end }
|
17
|
+
CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
|
18
|
+
PosixClass.class_eval { def negative?; type == :nonposixclass end }
|
19
|
+
UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
|
20
|
+
end
|
@@ -28,6 +28,7 @@ require 'regexp_parser/expression/methods/construct'
|
|
28
28
|
require 'regexp_parser/expression/methods/human_name'
|
29
29
|
require 'regexp_parser/expression/methods/match'
|
30
30
|
require 'regexp_parser/expression/methods/match_length'
|
31
|
+
require 'regexp_parser/expression/methods/negative'
|
31
32
|
require 'regexp_parser/expression/methods/options'
|
32
33
|
require 'regexp_parser/expression/methods/parts'
|
33
34
|
require 'regexp_parser/expression/methods/printing'
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -467,6 +467,7 @@ class Regexp::Parser
|
|
467
467
|
when *UPTokens::Age; node << UP::Age.new(token, active_opts)
|
468
468
|
when *UPTokens::Derived; node << UP::Derived.new(token, active_opts)
|
469
469
|
when *UPTokens::Emoji; node << UP::Emoji.new(token, active_opts)
|
470
|
+
when *UPTokens::Enumerated; node << UP::Enumerated.new(token, active_opts)
|
470
471
|
when *UPTokens::Script; node << UP::Script.new(token, active_opts)
|
471
472
|
when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
|
472
473
|
|
@@ -8,6 +8,7 @@ age=12.1,age=12.1
|
|
8
8
|
age=13.0,age=13.0
|
9
9
|
age=14.0,age=14.0
|
10
10
|
age=15.0,age=15.0
|
11
|
+
age=15.1,age=15.1
|
11
12
|
age=2.0,age=2.0
|
12
13
|
age=2.1,age=2.1
|
13
14
|
age=3.0,age=3.0
|
@@ -108,6 +109,19 @@ gothic,gothic
|
|
108
109
|
grantha,grantha
|
109
110
|
graph,graph
|
110
111
|
graphemebase,grapheme_base
|
112
|
+
graphemeclusterbreak=control,grapheme_cluster_break=control
|
113
|
+
graphemeclusterbreak=cr,grapheme_cluster_break=cr
|
114
|
+
graphemeclusterbreak=extend,grapheme_cluster_break=extend
|
115
|
+
graphemeclusterbreak=l,grapheme_cluster_break=l
|
116
|
+
graphemeclusterbreak=lf,grapheme_cluster_break=lf
|
117
|
+
graphemeclusterbreak=lv,grapheme_cluster_break=lv
|
118
|
+
graphemeclusterbreak=lvt,grapheme_cluster_break=lvt
|
119
|
+
graphemeclusterbreak=prepend,grapheme_cluster_break=prepend
|
120
|
+
graphemeclusterbreak=regionalindicator,grapheme_cluster_break=regional_indicator
|
121
|
+
graphemeclusterbreak=spacingmark,grapheme_cluster_break=spacingmark
|
122
|
+
graphemeclusterbreak=t,grapheme_cluster_break=t
|
123
|
+
graphemeclusterbreak=v,grapheme_cluster_break=v
|
124
|
+
graphemeclusterbreak=zwj,grapheme_cluster_break=zwj
|
111
125
|
graphemeextend,grapheme_extend
|
112
126
|
graphemelink,grapheme_link
|
113
127
|
greek,greek
|
@@ -123,11 +137,14 @@ hebrew,hebrew
|
|
123
137
|
hexdigit,hex_digit
|
124
138
|
hiragana,hiragana
|
125
139
|
hyphen,hyphen
|
140
|
+
idcompatmathcontinue,id_compat_math_continue
|
141
|
+
idcompatmathstart,id_compat_math_start
|
126
142
|
idcontinue,id_continue
|
127
143
|
ideographic,ideographic
|
128
144
|
idsbinaryoperator,ids_binary_operator
|
129
145
|
idstart,id_start
|
130
146
|
idstrinaryoperator,ids_trinary_operator
|
147
|
+
idsunaryoperator,ids_unary_operator
|
131
148
|
imperialaramaic,imperial_aramaic
|
132
149
|
inadlam,in_adlam
|
133
150
|
inaegeannumbers,in_aegean_numbers
|
@@ -190,6 +207,7 @@ incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
|
|
190
207
|
incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
|
191
208
|
incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
|
192
209
|
incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
|
210
|
+
incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
|
193
211
|
incombiningdiacriticalmarks,in_combining_diacritical_marks
|
194
212
|
incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
|
195
213
|
incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
|
@@ -364,6 +364,7 @@
|
|
364
364
|
conditional_expression := |*
|
365
365
|
group_lookup . ')' {
|
366
366
|
text = copy(data, ts, te-1)
|
367
|
+
text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID')
|
367
368
|
emit(:conditional, :condition, text)
|
368
369
|
emit(:conditional, :condition_close, ')')
|
369
370
|
};
|
@@ -541,13 +542,13 @@
|
|
541
542
|
case text = copy(data, ts, te)
|
542
543
|
when /^\\k(.)[^0-9\-][^+\-]*['>]$/
|
543
544
|
emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
|
544
|
-
when /^\\k(.)[1-9]\d*['>]$/
|
545
|
+
when /^\\k(.)0*[1-9]\d*['>]$/
|
545
546
|
emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
|
546
|
-
when /^\\k(.)-[1-9]\d*['>]$/
|
547
|
+
when /^\\k(.)-0*[1-9]\d*['>]$/
|
547
548
|
emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
|
548
549
|
when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
|
549
550
|
emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
|
550
|
-
when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
|
551
|
+
when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/
|
551
552
|
emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
|
552
553
|
else
|
553
554
|
raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
|
@@ -560,9 +561,9 @@
|
|
560
561
|
case text = copy(data, ts, te)
|
561
562
|
when /^\\g(.)[^0-9+\-].*['>]$/
|
562
563
|
emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
|
563
|
-
when /^\\g(.)\d
|
564
|
+
when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/
|
564
565
|
emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
|
565
|
-
when /^\\g(.)[+-]\d
|
566
|
+
when /^\\g(.)[+-]0*[1-9]\d*/
|
566
567
|
emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
|
567
568
|
else
|
568
569
|
raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
|
@@ -1570,6 +1570,7 @@ end
|
|
1570
1570
|
te = p+1
|
1571
1571
|
begin
|
1572
1572
|
text = copy(data, ts, te-1)
|
1573
|
+
text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID')
|
1573
1574
|
emit(:conditional, :condition, text)
|
1574
1575
|
emit(:conditional, :condition_close, ')')
|
1575
1576
|
end
|
@@ -1750,13 +1751,13 @@ te = p+1
|
|
1750
1751
|
case text = copy(data, ts, te)
|
1751
1752
|
when /^\\k(.)[^0-9\-][^+\-]*['>]$/
|
1752
1753
|
emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
|
1753
|
-
when /^\\k(.)[1-9]\d*['>]$/
|
1754
|
+
when /^\\k(.)0*[1-9]\d*['>]$/
|
1754
1755
|
emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
|
1755
|
-
when /^\\k(.)-[1-9]\d*['>]$/
|
1756
|
+
when /^\\k(.)-0*[1-9]\d*['>]$/
|
1756
1757
|
emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
|
1757
1758
|
when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
|
1758
1759
|
emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
|
1759
|
-
when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
|
1760
|
+
when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/
|
1760
1761
|
emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
|
1761
1762
|
else
|
1762
1763
|
raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
|
@@ -1770,9 +1771,9 @@ te = p+1
|
|
1770
1771
|
case text = copy(data, ts, te)
|
1771
1772
|
when /^\\g(.)[^0-9+\-].*['>]$/
|
1772
1773
|
emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
|
1773
|
-
when /^\\g(.)\d
|
1774
|
+
when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/
|
1774
1775
|
emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
|
1775
|
-
when /^\\g(.)[+-]\d
|
1776
|
+
when /^\\g(.)[+-]0*[1-9]\d*/
|
1776
1777
|
emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
|
1777
1778
|
else
|
1778
1779
|
raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
|
@@ -703,6 +703,24 @@ module Regexp::Syntax
|
|
703
703
|
extended_pictographic
|
704
704
|
]
|
705
705
|
|
706
|
+
Enumerated_V2_4_0 = %i[
|
707
|
+
grapheme_cluster_break=control
|
708
|
+
grapheme_cluster_break=cr
|
709
|
+
grapheme_cluster_break=extend
|
710
|
+
grapheme_cluster_break=l
|
711
|
+
grapheme_cluster_break=lf
|
712
|
+
grapheme_cluster_break=lv
|
713
|
+
grapheme_cluster_break=lvt
|
714
|
+
grapheme_cluster_break=prepend
|
715
|
+
grapheme_cluster_break=regional_indicator
|
716
|
+
grapheme_cluster_break=spacingmark
|
717
|
+
grapheme_cluster_break=t
|
718
|
+
grapheme_cluster_break=v
|
719
|
+
grapheme_cluster_break=zwj
|
720
|
+
]
|
721
|
+
|
722
|
+
Enumerated = all[:Enumerated_V]
|
723
|
+
|
706
724
|
Emoji = all[:Emoji_V]
|
707
725
|
|
708
726
|
V1_9_0 = Category::All + POSIX + all[:V1_9_0]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2024-01-07 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
15
15
|
email:
|
@@ -46,6 +46,7 @@ files:
|
|
46
46
|
- lib/regexp_parser/expression/methods/human_name.rb
|
47
47
|
- lib/regexp_parser/expression/methods/match.rb
|
48
48
|
- lib/regexp_parser/expression/methods/match_length.rb
|
49
|
+
- lib/regexp_parser/expression/methods/negative.rb
|
49
50
|
- lib/regexp_parser/expression/methods/options.rb
|
50
51
|
- lib/regexp_parser/expression/methods/parts.rb
|
51
52
|
- lib/regexp_parser/expression/methods/printing.rb
|