regexp_parser 2.8.3 → 2.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -4
- data/LICENSE +1 -1
- data/lib/regexp_parser/expression/classes/character_set.rb +1 -4
- data/lib/regexp_parser/expression/classes/keep.rb +1 -1
- data/lib/regexp_parser/expression/classes/posix_class.rb +0 -4
- data/lib/regexp_parser/expression/classes/unicode_property.rb +6 -9
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression.rb +34 -33
- data/lib/regexp_parser/parser.rb +6 -5
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +1 -1
- data/lib/regexp_parser/scanner/properties/long.csv +18 -0
- data/lib/regexp_parser/scanner/properties/short.csv +1 -0
- data/lib/regexp_parser/scanner/scanner.rl +3 -3
- data/lib/regexp_parser/scanner.rb +3 -3
- data/lib/regexp_parser/syntax/token/unicode_property.rb +18 -0
- data/lib/regexp_parser/syntax/token.rb +13 -13
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +6 -6
- data/regexp_parser.gemspec +2 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c88d5bc178e9bf95a8a008d9d5e9d8cf1b4a8bb0d65310901a995daa448a28f4
|
4
|
+
data.tar.gz: 47c1ed4782981f5cc2a0bb7bd8f402e360cd60ebeba33615df0c94dd3842b48c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dc1bf229c259b762ea38f459f70a9a04e5ee08207fbae04bdf9045f9f2b1c0f0b6a716a3e08fda55ca0b769ef55f480f7f0e19f3412175fdc7a475362889ab3
|
7
|
+
data.tar.gz: 5de692c1cce8f2436936752d0cf6c5ea51d84bb9c63110dcc49621a476b47800300911952f4d4a687c81f151886bc5570b14af559d74b5196b63e13c684ab7c5
|
data/Gemfile
CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
-
gem 'leto', '~> 2.
|
7
|
-
gem 'rake', '~> 13.
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
6
|
+
gem 'leto', '~> 2.1'
|
7
|
+
gem 'rake', '~> 13.1'
|
8
|
+
gem 'regexp_property_values', '~> 1.5'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
11
|
gem 'benchmark-ips', '~> 2.1'
|
12
12
|
gem 'gouteur', '~> 1.1'
|
13
|
-
gem 'rubocop', '~> 1.
|
13
|
+
gem 'rubocop', '~> 1.59'
|
14
14
|
end
|
15
15
|
end
|
data/LICENSE
CHANGED
@@ -1,10 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
attr_accessor :closed, :negative
|
4
|
-
|
5
|
-
alias :negative? :negative
|
6
|
-
alias :negated? :negative
|
7
|
-
alias :closed? :closed
|
4
|
+
alias :closed? :closed
|
8
5
|
|
9
6
|
def initialize(token, options = {})
|
10
7
|
self.negative = false
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Keep
|
3
|
-
#
|
3
|
+
# TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
|
4
4
|
# that contains all expressions to its left.
|
5
5
|
class Mark < Regexp::Expression::Base; end
|
6
6
|
end
|
@@ -1,10 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module UnicodeProperty
|
3
3
|
class Base < Regexp::Expression::Base
|
4
|
-
def negative?
|
5
|
-
type == :nonproperty
|
6
|
-
end
|
7
|
-
|
8
4
|
def name
|
9
5
|
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
10
6
|
end
|
@@ -109,11 +105,12 @@ module Regexp::Expression
|
|
109
105
|
class Unassigned < Codepoint::Base; end
|
110
106
|
end
|
111
107
|
|
112
|
-
class Age
|
113
|
-
class
|
114
|
-
class
|
115
|
-
class
|
116
|
-
class
|
108
|
+
class Age < UnicodeProperty::Base; end
|
109
|
+
class Block < UnicodeProperty::Base; end
|
110
|
+
class Derived < UnicodeProperty::Base; end
|
111
|
+
class Emoji < UnicodeProperty::Base; end
|
112
|
+
class Enumerated < UnicodeProperty::Base; end
|
113
|
+
class Script < UnicodeProperty::Base; end
|
117
114
|
end
|
118
115
|
|
119
116
|
# alias for symmetry between token symbol and Expression class name
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def negative?
|
4
|
+
false
|
5
|
+
end
|
6
|
+
|
7
|
+
# not an alias so as to respect overrides of #negative?
|
8
|
+
def negated?
|
9
|
+
negative?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
Anchor::NonWordBoundary.class_eval { def negative?; true end }
|
14
|
+
Assertion::NegativeLookahead.class_eval { def negative?; true end }
|
15
|
+
Assertion::NegativeLookbehind.class_eval { def negative?; true end }
|
16
|
+
CharacterSet.class_eval { def negative?; negative end }
|
17
|
+
CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
|
18
|
+
PosixClass.class_eval { def negative?; type == :nonposixclass end }
|
19
|
+
UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
|
20
|
+
end
|
@@ -1,36 +1,37 @@
|
|
1
|
-
|
1
|
+
require_relative 'error'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
3
|
+
require_relative 'expression/shared'
|
4
|
+
require_relative 'expression/base'
|
5
|
+
require_relative 'expression/quantifier'
|
6
|
+
require_relative 'expression/subexpression'
|
7
|
+
require_relative 'expression/sequence'
|
8
|
+
require_relative 'expression/sequence_operation'
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
10
|
+
require_relative 'expression/classes/alternation'
|
11
|
+
require_relative 'expression/classes/anchor'
|
12
|
+
require_relative 'expression/classes/backreference'
|
13
|
+
require_relative 'expression/classes/character_set'
|
14
|
+
require_relative 'expression/classes/character_set/intersection'
|
15
|
+
require_relative 'expression/classes/character_set/range'
|
16
|
+
require_relative 'expression/classes/character_type'
|
17
|
+
require_relative 'expression/classes/conditional'
|
18
|
+
require_relative 'expression/classes/escape_sequence'
|
19
|
+
require_relative 'expression/classes/free_space'
|
20
|
+
require_relative 'expression/classes/group'
|
21
|
+
require_relative 'expression/classes/keep'
|
22
|
+
require_relative 'expression/classes/literal'
|
23
|
+
require_relative 'expression/classes/posix_class'
|
24
|
+
require_relative 'expression/classes/root'
|
25
|
+
require_relative 'expression/classes/unicode_property'
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
27
|
+
require_relative 'expression/methods/construct'
|
28
|
+
require_relative 'expression/methods/human_name'
|
29
|
+
require_relative 'expression/methods/match'
|
30
|
+
require_relative 'expression/methods/match_length'
|
31
|
+
require_relative 'expression/methods/negative'
|
32
|
+
require_relative 'expression/methods/options'
|
33
|
+
require_relative 'expression/methods/parts'
|
34
|
+
require_relative 'expression/methods/printing'
|
35
|
+
require_relative 'expression/methods/strfregexp'
|
36
|
+
require_relative 'expression/methods/tests'
|
37
|
+
require_relative 'expression/methods/traverse'
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative 'error'
|
2
|
+
require_relative 'expression'
|
3
3
|
|
4
4
|
class Regexp::Parser
|
5
5
|
include Regexp::Expression
|
@@ -467,6 +467,7 @@ class Regexp::Parser
|
|
467
467
|
when *UPTokens::Age; node << UP::Age.new(token, active_opts)
|
468
468
|
when *UPTokens::Derived; node << UP::Derived.new(token, active_opts)
|
469
469
|
when *UPTokens::Emoji; node << UP::Emoji.new(token, active_opts)
|
470
|
+
when *UPTokens::Enumerated; node << UP::Enumerated.new(token, active_opts)
|
470
471
|
when *UPTokens::Script; node << UP::Script.new(token, active_opts)
|
471
472
|
when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
|
472
473
|
|
@@ -574,18 +575,18 @@ class Regexp::Parser
|
|
574
575
|
options_stack.last
|
575
576
|
end
|
576
577
|
|
577
|
-
# Assigns referenced expressions to
|
578
|
+
# Assigns referenced expressions to referring expressions, e.g. if there is
|
578
579
|
# an instance of Backreference::Number, its #referenced_expression is set to
|
579
580
|
# the instance of Group::Capture that it refers to via its number.
|
580
581
|
def assign_referenced_expressions
|
581
|
-
# find all
|
582
|
+
# find all referenceable and referring expressions
|
582
583
|
targets = { 0 => root }
|
583
584
|
referrers = []
|
584
585
|
root.each_expression do |exp|
|
585
586
|
exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
|
586
587
|
referrers << exp if exp.referential?
|
587
588
|
end
|
588
|
-
# assign reference expression to
|
589
|
+
# assign reference expression to referring expressions
|
589
590
|
# (in a second iteration because there might be forward references)
|
590
591
|
referrers.each do |exp|
|
591
592
|
exp.referenced_expression = targets[exp.reference] ||
|
@@ -8,6 +8,7 @@ age=12.1,age=12.1
|
|
8
8
|
age=13.0,age=13.0
|
9
9
|
age=14.0,age=14.0
|
10
10
|
age=15.0,age=15.0
|
11
|
+
age=15.1,age=15.1
|
11
12
|
age=2.0,age=2.0
|
12
13
|
age=2.1,age=2.1
|
13
14
|
age=3.0,age=3.0
|
@@ -108,6 +109,19 @@ gothic,gothic
|
|
108
109
|
grantha,grantha
|
109
110
|
graph,graph
|
110
111
|
graphemebase,grapheme_base
|
112
|
+
graphemeclusterbreak=control,grapheme_cluster_break=control
|
113
|
+
graphemeclusterbreak=cr,grapheme_cluster_break=cr
|
114
|
+
graphemeclusterbreak=extend,grapheme_cluster_break=extend
|
115
|
+
graphemeclusterbreak=l,grapheme_cluster_break=l
|
116
|
+
graphemeclusterbreak=lf,grapheme_cluster_break=lf
|
117
|
+
graphemeclusterbreak=lv,grapheme_cluster_break=lv
|
118
|
+
graphemeclusterbreak=lvt,grapheme_cluster_break=lvt
|
119
|
+
graphemeclusterbreak=prepend,grapheme_cluster_break=prepend
|
120
|
+
graphemeclusterbreak=regionalindicator,grapheme_cluster_break=regional_indicator
|
121
|
+
graphemeclusterbreak=spacingmark,grapheme_cluster_break=spacingmark
|
122
|
+
graphemeclusterbreak=t,grapheme_cluster_break=t
|
123
|
+
graphemeclusterbreak=v,grapheme_cluster_break=v
|
124
|
+
graphemeclusterbreak=zwj,grapheme_cluster_break=zwj
|
111
125
|
graphemeextend,grapheme_extend
|
112
126
|
graphemelink,grapheme_link
|
113
127
|
greek,greek
|
@@ -123,11 +137,14 @@ hebrew,hebrew
|
|
123
137
|
hexdigit,hex_digit
|
124
138
|
hiragana,hiragana
|
125
139
|
hyphen,hyphen
|
140
|
+
idcompatmathcontinue,id_compat_math_continue
|
141
|
+
idcompatmathstart,id_compat_math_start
|
126
142
|
idcontinue,id_continue
|
127
143
|
ideographic,ideographic
|
128
144
|
idsbinaryoperator,ids_binary_operator
|
129
145
|
idstart,id_start
|
130
146
|
idstrinaryoperator,ids_trinary_operator
|
147
|
+
idsunaryoperator,ids_unary_operator
|
131
148
|
imperialaramaic,imperial_aramaic
|
132
149
|
inadlam,in_adlam
|
133
150
|
inaegeannumbers,in_aegean_numbers
|
@@ -190,6 +207,7 @@ incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
|
|
190
207
|
incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
|
191
208
|
incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
|
192
209
|
incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
|
210
|
+
incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
|
193
211
|
incombiningdiacriticalmarks,in_combining_diacritical_marks
|
194
212
|
incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
|
195
213
|
incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
|
@@ -640,9 +640,9 @@
|
|
640
640
|
*|;
|
641
641
|
}%%
|
642
642
|
|
643
|
-
|
644
|
-
|
645
|
-
|
643
|
+
require_relative 'scanner/errors/scanner_error'
|
644
|
+
require_relative 'scanner/errors/premature_end_error'
|
645
|
+
require_relative 'scanner/errors/validation_error'
|
646
646
|
|
647
647
|
class Regexp::Scanner
|
648
648
|
# Scans the given regular expression text, or Regexp object and collects the
|
@@ -6,9 +6,9 @@
|
|
6
6
|
# by running `bundle exec rake ragel:rb`
|
7
7
|
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
require_relative 'scanner/errors/scanner_error'
|
10
|
+
require_relative 'scanner/errors/premature_end_error'
|
11
|
+
require_relative 'scanner/errors/validation_error'
|
12
12
|
|
13
13
|
class Regexp::Scanner
|
14
14
|
# Scans the given regular expression text, or Regexp object and collects the
|
@@ -703,6 +703,24 @@ module Regexp::Syntax
|
|
703
703
|
extended_pictographic
|
704
704
|
]
|
705
705
|
|
706
|
+
Enumerated_V2_4_0 = %i[
|
707
|
+
grapheme_cluster_break=control
|
708
|
+
grapheme_cluster_break=cr
|
709
|
+
grapheme_cluster_break=extend
|
710
|
+
grapheme_cluster_break=l
|
711
|
+
grapheme_cluster_break=lf
|
712
|
+
grapheme_cluster_break=lv
|
713
|
+
grapheme_cluster_break=lvt
|
714
|
+
grapheme_cluster_break=prepend
|
715
|
+
grapheme_cluster_break=regional_indicator
|
716
|
+
grapheme_cluster_break=spacingmark
|
717
|
+
grapheme_cluster_break=t
|
718
|
+
grapheme_cluster_break=v
|
719
|
+
grapheme_cluster_break=zwj
|
720
|
+
]
|
721
|
+
|
722
|
+
Enumerated = all[:Enumerated_V]
|
723
|
+
|
706
724
|
Emoji = all[:Emoji_V]
|
707
725
|
|
708
726
|
V1_9_0 = Category::All + POSIX + all[:V1_9_0]
|
@@ -20,19 +20,19 @@ end
|
|
20
20
|
|
21
21
|
|
22
22
|
# Load all the token files, they will populate the Map constant.
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
23
|
+
require_relative 'token/anchor'
|
24
|
+
require_relative 'token/assertion'
|
25
|
+
require_relative 'token/backreference'
|
26
|
+
require_relative 'token/posix_class'
|
27
|
+
require_relative 'token/character_set'
|
28
|
+
require_relative 'token/character_type'
|
29
|
+
require_relative 'token/conditional'
|
30
|
+
require_relative 'token/escape'
|
31
|
+
require_relative 'token/group'
|
32
|
+
require_relative 'token/keep'
|
33
|
+
require_relative 'token/meta'
|
34
|
+
require_relative 'token/quantifier'
|
35
|
+
require_relative 'token/unicode_property'
|
36
36
|
|
37
37
|
|
38
38
|
# After loading all the tokens the map is full. Extract all tokens and types
|
@@ -3,6 +3,6 @@
|
|
3
3
|
#
|
4
4
|
# Aliases for the latest patch version are provided as 'ruby/n.n',
|
5
5
|
# e.g. 'ruby/1.9' refers to Ruby v1.9.3.
|
6
|
-
Dir[File.expand_path('../versions/*.rb', __FILE__)].sort.each { |f|
|
6
|
+
Dir[File.expand_path('../versions/*.rb', __FILE__)].sort.each { |f| require_relative f }
|
7
7
|
|
8
8
|
Regexp::Syntax::CURRENT = Regexp::Syntax.for("ruby/#{RUBY_VERSION}")
|
data/lib/regexp_parser/syntax.rb
CHANGED
data/lib/regexp_parser.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
require_relative 'regexp_parser/version'
|
2
|
+
require_relative 'regexp_parser/token'
|
3
|
+
require_relative 'regexp_parser/scanner'
|
4
|
+
require_relative 'regexp_parser/syntax'
|
5
|
+
require_relative 'regexp_parser/lexer'
|
6
|
+
require_relative 'regexp_parser/parser'
|
data/regexp_parser.gemspec
CHANGED
@@ -16,6 +16,8 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.metadata['source_code_uri'] = spec.homepage
|
17
17
|
spec.metadata['wiki_uri'] = "#{spec.homepage}/wiki"
|
18
18
|
|
19
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
20
|
+
|
19
21
|
spec.authors = ['Ammar Ali', 'Janosch Müller']
|
20
22
|
spec.email = ['ammarabuali@gmail.com', 'janosch84@gmail.com']
|
21
23
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.9.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2024-05-15 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
15
15
|
email:
|
@@ -46,6 +46,7 @@ files:
|
|
46
46
|
- lib/regexp_parser/expression/methods/human_name.rb
|
47
47
|
- lib/regexp_parser/expression/methods/match.rb
|
48
48
|
- lib/regexp_parser/expression/methods/match_length.rb
|
49
|
+
- lib/regexp_parser/expression/methods/negative.rb
|
49
50
|
- lib/regexp_parser/expression/methods/options.rb
|
50
51
|
- lib/regexp_parser/expression/methods/parts.rb
|
51
52
|
- lib/regexp_parser/expression/methods/printing.rb
|
@@ -114,6 +115,7 @@ metadata:
|
|
114
115
|
homepage_uri: https://github.com/ammar/regexp_parser
|
115
116
|
source_code_uri: https://github.com/ammar/regexp_parser
|
116
117
|
wiki_uri: https://github.com/ammar/regexp_parser/wiki
|
118
|
+
rubygems_mfa_required: 'true'
|
117
119
|
post_install_message:
|
118
120
|
rdoc_options: []
|
119
121
|
require_paths:
|
@@ -129,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
129
131
|
- !ruby/object:Gem::Version
|
130
132
|
version: '0'
|
131
133
|
requirements: []
|
132
|
-
rubygems_version: 3.5.
|
134
|
+
rubygems_version: 3.5.3
|
133
135
|
signing_key:
|
134
136
|
specification_version: 4
|
135
137
|
summary: Scanner, lexer, parser for ruby's regular expressions
|