regexp_parser 2.8.3 → 2.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -4
- data/LICENSE +1 -1
- data/lib/regexp_parser/expression/classes/character_set.rb +1 -4
- data/lib/regexp_parser/expression/classes/keep.rb +1 -1
- data/lib/regexp_parser/expression/classes/posix_class.rb +0 -4
- data/lib/regexp_parser/expression/classes/unicode_property.rb +6 -9
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression.rb +34 -33
- data/lib/regexp_parser/parser.rb +6 -5
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +1 -1
- data/lib/regexp_parser/scanner/properties/long.csv +18 -0
- data/lib/regexp_parser/scanner/properties/short.csv +1 -0
- data/lib/regexp_parser/scanner/scanner.rl +3 -3
- data/lib/regexp_parser/scanner.rb +3 -3
- data/lib/regexp_parser/syntax/token/unicode_property.rb +18 -0
- data/lib/regexp_parser/syntax/token.rb +13 -13
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +6 -6
- data/regexp_parser.gemspec +2 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c88d5bc178e9bf95a8a008d9d5e9d8cf1b4a8bb0d65310901a995daa448a28f4
|
4
|
+
data.tar.gz: 47c1ed4782981f5cc2a0bb7bd8f402e360cd60ebeba33615df0c94dd3842b48c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dc1bf229c259b762ea38f459f70a9a04e5ee08207fbae04bdf9045f9f2b1c0f0b6a716a3e08fda55ca0b769ef55f480f7f0e19f3412175fdc7a475362889ab3
|
7
|
+
data.tar.gz: 5de692c1cce8f2436936752d0cf6c5ea51d84bb9c63110dcc49621a476b47800300911952f4d4a687c81f151886bc5570b14af559d74b5196b63e13c684ab7c5
|
data/Gemfile
CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
-
gem 'leto', '~> 2.
|
7
|
-
gem 'rake', '~> 13.
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
6
|
+
gem 'leto', '~> 2.1'
|
7
|
+
gem 'rake', '~> 13.1'
|
8
|
+
gem 'regexp_property_values', '~> 1.5'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
11
|
gem 'benchmark-ips', '~> 2.1'
|
12
12
|
gem 'gouteur', '~> 1.1'
|
13
|
-
gem 'rubocop', '~> 1.
|
13
|
+
gem 'rubocop', '~> 1.59'
|
14
14
|
end
|
15
15
|
end
|
data/LICENSE
CHANGED
@@ -1,10 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
attr_accessor :closed, :negative
|
4
|
-
|
5
|
-
alias :negative? :negative
|
6
|
-
alias :negated? :negative
|
7
|
-
alias :closed? :closed
|
4
|
+
alias :closed? :closed
|
8
5
|
|
9
6
|
def initialize(token, options = {})
|
10
7
|
self.negative = false
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Keep
|
3
|
-
#
|
3
|
+
# TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
|
4
4
|
# that contains all expressions to its left.
|
5
5
|
class Mark < Regexp::Expression::Base; end
|
6
6
|
end
|
@@ -1,10 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module UnicodeProperty
|
3
3
|
class Base < Regexp::Expression::Base
|
4
|
-
def negative?
|
5
|
-
type == :nonproperty
|
6
|
-
end
|
7
|
-
|
8
4
|
def name
|
9
5
|
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
10
6
|
end
|
@@ -109,11 +105,12 @@ module Regexp::Expression
|
|
109
105
|
class Unassigned < Codepoint::Base; end
|
110
106
|
end
|
111
107
|
|
112
|
-
class Age
|
113
|
-
class
|
114
|
-
class
|
115
|
-
class
|
116
|
-
class
|
108
|
+
class Age < UnicodeProperty::Base; end
|
109
|
+
class Block < UnicodeProperty::Base; end
|
110
|
+
class Derived < UnicodeProperty::Base; end
|
111
|
+
class Emoji < UnicodeProperty::Base; end
|
112
|
+
class Enumerated < UnicodeProperty::Base; end
|
113
|
+
class Script < UnicodeProperty::Base; end
|
117
114
|
end
|
118
115
|
|
119
116
|
# alias for symmetry between token symbol and Expression class name
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def negative?
|
4
|
+
false
|
5
|
+
end
|
6
|
+
|
7
|
+
# not an alias so as to respect overrides of #negative?
|
8
|
+
def negated?
|
9
|
+
negative?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
Anchor::NonWordBoundary.class_eval { def negative?; true end }
|
14
|
+
Assertion::NegativeLookahead.class_eval { def negative?; true end }
|
15
|
+
Assertion::NegativeLookbehind.class_eval { def negative?; true end }
|
16
|
+
CharacterSet.class_eval { def negative?; negative end }
|
17
|
+
CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
|
18
|
+
PosixClass.class_eval { def negative?; type == :nonposixclass end }
|
19
|
+
UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
|
20
|
+
end
|
@@ -1,36 +1,37 @@
|
|
1
|
-
|
1
|
+
require_relative 'error'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
3
|
+
require_relative 'expression/shared'
|
4
|
+
require_relative 'expression/base'
|
5
|
+
require_relative 'expression/quantifier'
|
6
|
+
require_relative 'expression/subexpression'
|
7
|
+
require_relative 'expression/sequence'
|
8
|
+
require_relative 'expression/sequence_operation'
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
10
|
+
require_relative 'expression/classes/alternation'
|
11
|
+
require_relative 'expression/classes/anchor'
|
12
|
+
require_relative 'expression/classes/backreference'
|
13
|
+
require_relative 'expression/classes/character_set'
|
14
|
+
require_relative 'expression/classes/character_set/intersection'
|
15
|
+
require_relative 'expression/classes/character_set/range'
|
16
|
+
require_relative 'expression/classes/character_type'
|
17
|
+
require_relative 'expression/classes/conditional'
|
18
|
+
require_relative 'expression/classes/escape_sequence'
|
19
|
+
require_relative 'expression/classes/free_space'
|
20
|
+
require_relative 'expression/classes/group'
|
21
|
+
require_relative 'expression/classes/keep'
|
22
|
+
require_relative 'expression/classes/literal'
|
23
|
+
require_relative 'expression/classes/posix_class'
|
24
|
+
require_relative 'expression/classes/root'
|
25
|
+
require_relative 'expression/classes/unicode_property'
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
27
|
+
require_relative 'expression/methods/construct'
|
28
|
+
require_relative 'expression/methods/human_name'
|
29
|
+
require_relative 'expression/methods/match'
|
30
|
+
require_relative 'expression/methods/match_length'
|
31
|
+
require_relative 'expression/methods/negative'
|
32
|
+
require_relative 'expression/methods/options'
|
33
|
+
require_relative 'expression/methods/parts'
|
34
|
+
require_relative 'expression/methods/printing'
|
35
|
+
require_relative 'expression/methods/strfregexp'
|
36
|
+
require_relative 'expression/methods/tests'
|
37
|
+
require_relative 'expression/methods/traverse'
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative 'error'
|
2
|
+
require_relative 'expression'
|
3
3
|
|
4
4
|
class Regexp::Parser
|
5
5
|
include Regexp::Expression
|
@@ -467,6 +467,7 @@ class Regexp::Parser
|
|
467
467
|
when *UPTokens::Age; node << UP::Age.new(token, active_opts)
|
468
468
|
when *UPTokens::Derived; node << UP::Derived.new(token, active_opts)
|
469
469
|
when *UPTokens::Emoji; node << UP::Emoji.new(token, active_opts)
|
470
|
+
when *UPTokens::Enumerated; node << UP::Enumerated.new(token, active_opts)
|
470
471
|
when *UPTokens::Script; node << UP::Script.new(token, active_opts)
|
471
472
|
when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
|
472
473
|
|
@@ -574,18 +575,18 @@ class Regexp::Parser
|
|
574
575
|
options_stack.last
|
575
576
|
end
|
576
577
|
|
577
|
-
# Assigns referenced expressions to
|
578
|
+
# Assigns referenced expressions to referring expressions, e.g. if there is
|
578
579
|
# an instance of Backreference::Number, its #referenced_expression is set to
|
579
580
|
# the instance of Group::Capture that it refers to via its number.
|
580
581
|
def assign_referenced_expressions
|
581
|
-
# find all
|
582
|
+
# find all referenceable and referring expressions
|
582
583
|
targets = { 0 => root }
|
583
584
|
referrers = []
|
584
585
|
root.each_expression do |exp|
|
585
586
|
exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
|
586
587
|
referrers << exp if exp.referential?
|
587
588
|
end
|
588
|
-
# assign reference expression to
|
589
|
+
# assign reference expression to referring expressions
|
589
590
|
# (in a second iteration because there might be forward references)
|
590
591
|
referrers.each do |exp|
|
591
592
|
exp.referenced_expression = targets[exp.reference] ||
|
@@ -8,6 +8,7 @@ age=12.1,age=12.1
|
|
8
8
|
age=13.0,age=13.0
|
9
9
|
age=14.0,age=14.0
|
10
10
|
age=15.0,age=15.0
|
11
|
+
age=15.1,age=15.1
|
11
12
|
age=2.0,age=2.0
|
12
13
|
age=2.1,age=2.1
|
13
14
|
age=3.0,age=3.0
|
@@ -108,6 +109,19 @@ gothic,gothic
|
|
108
109
|
grantha,grantha
|
109
110
|
graph,graph
|
110
111
|
graphemebase,grapheme_base
|
112
|
+
graphemeclusterbreak=control,grapheme_cluster_break=control
|
113
|
+
graphemeclusterbreak=cr,grapheme_cluster_break=cr
|
114
|
+
graphemeclusterbreak=extend,grapheme_cluster_break=extend
|
115
|
+
graphemeclusterbreak=l,grapheme_cluster_break=l
|
116
|
+
graphemeclusterbreak=lf,grapheme_cluster_break=lf
|
117
|
+
graphemeclusterbreak=lv,grapheme_cluster_break=lv
|
118
|
+
graphemeclusterbreak=lvt,grapheme_cluster_break=lvt
|
119
|
+
graphemeclusterbreak=prepend,grapheme_cluster_break=prepend
|
120
|
+
graphemeclusterbreak=regionalindicator,grapheme_cluster_break=regional_indicator
|
121
|
+
graphemeclusterbreak=spacingmark,grapheme_cluster_break=spacingmark
|
122
|
+
graphemeclusterbreak=t,grapheme_cluster_break=t
|
123
|
+
graphemeclusterbreak=v,grapheme_cluster_break=v
|
124
|
+
graphemeclusterbreak=zwj,grapheme_cluster_break=zwj
|
111
125
|
graphemeextend,grapheme_extend
|
112
126
|
graphemelink,grapheme_link
|
113
127
|
greek,greek
|
@@ -123,11 +137,14 @@ hebrew,hebrew
|
|
123
137
|
hexdigit,hex_digit
|
124
138
|
hiragana,hiragana
|
125
139
|
hyphen,hyphen
|
140
|
+
idcompatmathcontinue,id_compat_math_continue
|
141
|
+
idcompatmathstart,id_compat_math_start
|
126
142
|
idcontinue,id_continue
|
127
143
|
ideographic,ideographic
|
128
144
|
idsbinaryoperator,ids_binary_operator
|
129
145
|
idstart,id_start
|
130
146
|
idstrinaryoperator,ids_trinary_operator
|
147
|
+
idsunaryoperator,ids_unary_operator
|
131
148
|
imperialaramaic,imperial_aramaic
|
132
149
|
inadlam,in_adlam
|
133
150
|
inaegeannumbers,in_aegean_numbers
|
@@ -190,6 +207,7 @@ incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
|
|
190
207
|
incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
|
191
208
|
incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
|
192
209
|
incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
|
210
|
+
incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
|
193
211
|
incombiningdiacriticalmarks,in_combining_diacritical_marks
|
194
212
|
incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
|
195
213
|
incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
|
@@ -640,9 +640,9 @@
|
|
640
640
|
*|;
|
641
641
|
}%%
|
642
642
|
|
643
|
-
|
644
|
-
|
645
|
-
|
643
|
+
require_relative 'scanner/errors/scanner_error'
|
644
|
+
require_relative 'scanner/errors/premature_end_error'
|
645
|
+
require_relative 'scanner/errors/validation_error'
|
646
646
|
|
647
647
|
class Regexp::Scanner
|
648
648
|
# Scans the given regular expression text, or Regexp object and collects the
|
@@ -6,9 +6,9 @@
|
|
6
6
|
# by running `bundle exec rake ragel:rb`
|
7
7
|
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
require_relative 'scanner/errors/scanner_error'
|
10
|
+
require_relative 'scanner/errors/premature_end_error'
|
11
|
+
require_relative 'scanner/errors/validation_error'
|
12
12
|
|
13
13
|
class Regexp::Scanner
|
14
14
|
# Scans the given regular expression text, or Regexp object and collects the
|
@@ -703,6 +703,24 @@ module Regexp::Syntax
|
|
703
703
|
extended_pictographic
|
704
704
|
]
|
705
705
|
|
706
|
+
Enumerated_V2_4_0 = %i[
|
707
|
+
grapheme_cluster_break=control
|
708
|
+
grapheme_cluster_break=cr
|
709
|
+
grapheme_cluster_break=extend
|
710
|
+
grapheme_cluster_break=l
|
711
|
+
grapheme_cluster_break=lf
|
712
|
+
grapheme_cluster_break=lv
|
713
|
+
grapheme_cluster_break=lvt
|
714
|
+
grapheme_cluster_break=prepend
|
715
|
+
grapheme_cluster_break=regional_indicator
|
716
|
+
grapheme_cluster_break=spacingmark
|
717
|
+
grapheme_cluster_break=t
|
718
|
+
grapheme_cluster_break=v
|
719
|
+
grapheme_cluster_break=zwj
|
720
|
+
]
|
721
|
+
|
722
|
+
Enumerated = all[:Enumerated_V]
|
723
|
+
|
706
724
|
Emoji = all[:Emoji_V]
|
707
725
|
|
708
726
|
V1_9_0 = Category::All + POSIX + all[:V1_9_0]
|
@@ -20,19 +20,19 @@ end
|
|
20
20
|
|
21
21
|
|
22
22
|
# Load all the token files, they will populate the Map constant.
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
23
|
+
require_relative 'token/anchor'
|
24
|
+
require_relative 'token/assertion'
|
25
|
+
require_relative 'token/backreference'
|
26
|
+
require_relative 'token/posix_class'
|
27
|
+
require_relative 'token/character_set'
|
28
|
+
require_relative 'token/character_type'
|
29
|
+
require_relative 'token/conditional'
|
30
|
+
require_relative 'token/escape'
|
31
|
+
require_relative 'token/group'
|
32
|
+
require_relative 'token/keep'
|
33
|
+
require_relative 'token/meta'
|
34
|
+
require_relative 'token/quantifier'
|
35
|
+
require_relative 'token/unicode_property'
|
36
36
|
|
37
37
|
|
38
38
|
# After loading all the tokens the map is full. Extract all tokens and types
|
@@ -3,6 +3,6 @@
|
|
3
3
|
#
|
4
4
|
# Aliases for the latest patch version are provided as 'ruby/n.n',
|
5
5
|
# e.g. 'ruby/1.9' refers to Ruby v1.9.3.
|
6
|
-
Dir[File.expand_path('../versions/*.rb', __FILE__)].sort.each { |f|
|
6
|
+
Dir[File.expand_path('../versions/*.rb', __FILE__)].sort.each { |f| require_relative f }
|
7
7
|
|
8
8
|
Regexp::Syntax::CURRENT = Regexp::Syntax.for("ruby/#{RUBY_VERSION}")
|
data/lib/regexp_parser/syntax.rb
CHANGED
data/lib/regexp_parser.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
require_relative 'regexp_parser/version'
|
2
|
+
require_relative 'regexp_parser/token'
|
3
|
+
require_relative 'regexp_parser/scanner'
|
4
|
+
require_relative 'regexp_parser/syntax'
|
5
|
+
require_relative 'regexp_parser/lexer'
|
6
|
+
require_relative 'regexp_parser/parser'
|
data/regexp_parser.gemspec
CHANGED
@@ -16,6 +16,8 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.metadata['source_code_uri'] = spec.homepage
|
17
17
|
spec.metadata['wiki_uri'] = "#{spec.homepage}/wiki"
|
18
18
|
|
19
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
20
|
+
|
19
21
|
spec.authors = ['Ammar Ali', 'Janosch Müller']
|
20
22
|
spec.email = ['ammarabuali@gmail.com', 'janosch84@gmail.com']
|
21
23
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.9.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2024-05-15 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
15
15
|
email:
|
@@ -46,6 +46,7 @@ files:
|
|
46
46
|
- lib/regexp_parser/expression/methods/human_name.rb
|
47
47
|
- lib/regexp_parser/expression/methods/match.rb
|
48
48
|
- lib/regexp_parser/expression/methods/match_length.rb
|
49
|
+
- lib/regexp_parser/expression/methods/negative.rb
|
49
50
|
- lib/regexp_parser/expression/methods/options.rb
|
50
51
|
- lib/regexp_parser/expression/methods/parts.rb
|
51
52
|
- lib/regexp_parser/expression/methods/printing.rb
|
@@ -114,6 +115,7 @@ metadata:
|
|
114
115
|
homepage_uri: https://github.com/ammar/regexp_parser
|
115
116
|
source_code_uri: https://github.com/ammar/regexp_parser
|
116
117
|
wiki_uri: https://github.com/ammar/regexp_parser/wiki
|
118
|
+
rubygems_mfa_required: 'true'
|
117
119
|
post_install_message:
|
118
120
|
rdoc_options: []
|
119
121
|
require_paths:
|
@@ -129,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
129
131
|
- !ruby/object:Gem::Version
|
130
132
|
version: '0'
|
131
133
|
requirements: []
|
132
|
-
rubygems_version: 3.5.
|
134
|
+
rubygems_version: 3.5.3
|
133
135
|
signing_key:
|
134
136
|
specification_version: 4
|
135
137
|
summary: Scanner, lexer, parser for ruby's regular expressions
|