regexp_parser 2.6.0 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +5 -5
- data/LICENSE +1 -1
- data/lib/regexp_parser/expression/base.rb +0 -7
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +5 -10
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
- data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -20
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +21 -91
- data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
- data/lib/regexp_parser/expression/classes/group.rb +0 -22
- data/lib/regexp_parser/expression/classes/keep.rb +1 -1
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
- data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
- data/lib/regexp_parser/expression/methods/construct.rb +2 -4
- data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +5 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +68 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +8 -4
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/referenced_expressions.rb +28 -0
- data/lib/regexp_parser/expression/methods/tests.rb +40 -3
- data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
- data/lib/regexp_parser/expression/quantifier.rb +30 -17
- data/lib/regexp_parser/expression/sequence.rb +5 -10
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +37 -20
- data/lib/regexp_parser/expression/subexpression.rb +20 -15
- data/lib/regexp_parser/expression.rb +37 -31
- data/lib/regexp_parser/lexer.rb +76 -36
- data/lib/regexp_parser/parser.rb +107 -103
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +29 -0
- data/lib/regexp_parser/scanner/properties/short.csv +3 -0
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +101 -172
- data/lib/regexp_parser/scanner.rb +1171 -1365
- data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
- data/lib/regexp_parser/syntax/token/escape.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +9 -2
- data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +13 -13
- data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +6 -6
- data/regexp_parser.gemspec +5 -5
- metadata +17 -8
- data/CHANGELOG.md +0 -601
- data/README.md +0 -503
@@ -1,6 +1,5 @@
|
|
1
1
|
module Regexp::Syntax
|
2
2
|
module Token
|
3
|
-
# TODO: unify naming with RE::EscapeSequence, one way or the other, in v3.0.0
|
4
3
|
module Escape
|
5
4
|
Basic = %i[backslash literal]
|
6
5
|
|
@@ -27,5 +26,8 @@ module Regexp::Syntax
|
|
27
26
|
end
|
28
27
|
|
29
28
|
Map[Escape::Type] = Escape::All
|
29
|
+
|
30
|
+
# alias for symmetry between Token::* and Expression::*
|
31
|
+
EscapeSequence = Escape
|
30
32
|
end
|
31
33
|
end
|
@@ -1,13 +1,20 @@
|
|
1
1
|
module Regexp::Syntax
|
2
2
|
module Token
|
3
3
|
module Meta
|
4
|
-
Basic
|
5
|
-
|
4
|
+
Basic = %i[dot]
|
5
|
+
Alternation = %i[alternation]
|
6
|
+
Extended = Basic + Alternation
|
6
7
|
|
7
8
|
All = Extended
|
8
9
|
Type = :meta
|
9
10
|
end
|
10
11
|
|
11
12
|
Map[Meta::Type] = Meta::All
|
13
|
+
|
14
|
+
# alias for symmetry between Token::* and Expression::*
|
15
|
+
module Alternation
|
16
|
+
All = Meta::Alternation
|
17
|
+
Type = Meta::Type
|
18
|
+
end
|
12
19
|
end
|
13
20
|
end
|
@@ -59,7 +59,7 @@ module Regexp::Syntax
|
|
59
59
|
|
60
60
|
Age_V3_1_0 = %i[age=13.0]
|
61
61
|
|
62
|
-
Age_V3_2_0 = %i[age=14.0]
|
62
|
+
Age_V3_2_0 = %i[age=14.0 age=15.0]
|
63
63
|
|
64
64
|
Age = all[:Age_V]
|
65
65
|
|
@@ -321,6 +321,8 @@ module Regexp::Syntax
|
|
321
321
|
|
322
322
|
Script_V3_2_0 = %i[
|
323
323
|
cypro_minoan
|
324
|
+
kawi
|
325
|
+
nag_mundari
|
324
326
|
old_uyghur
|
325
327
|
tangsa
|
326
328
|
toto
|
@@ -667,11 +669,18 @@ module Regexp::Syntax
|
|
667
669
|
|
668
670
|
UnicodeBlock_V3_2_0 = %i[
|
669
671
|
in_arabic_extended_b
|
672
|
+
in_arabic_extended_c
|
673
|
+
in_cjk_unified_ideographs_extension_h
|
670
674
|
in_cypro_minoan
|
675
|
+
in_cyrillic_extended_d
|
676
|
+
in_devanagari_extended_a
|
671
677
|
in_ethiopic_extended_b
|
678
|
+
in_kaktovik_numerals
|
672
679
|
in_kana_extended_b
|
680
|
+
in_kawi
|
673
681
|
in_latin_extended_f
|
674
682
|
in_latin_extended_g
|
683
|
+
in_nag_mundari
|
675
684
|
in_old_uyghur
|
676
685
|
in_tangsa
|
677
686
|
in_toto
|
@@ -690,6 +699,28 @@ module Regexp::Syntax
|
|
690
699
|
emoji_presentation
|
691
700
|
]
|
692
701
|
|
702
|
+
Emoji_V2_6_0 = %i[
|
703
|
+
extended_pictographic
|
704
|
+
]
|
705
|
+
|
706
|
+
Enumerated_V2_4_0 = %i[
|
707
|
+
grapheme_cluster_break=control
|
708
|
+
grapheme_cluster_break=cr
|
709
|
+
grapheme_cluster_break=extend
|
710
|
+
grapheme_cluster_break=l
|
711
|
+
grapheme_cluster_break=lf
|
712
|
+
grapheme_cluster_break=lv
|
713
|
+
grapheme_cluster_break=lvt
|
714
|
+
grapheme_cluster_break=prepend
|
715
|
+
grapheme_cluster_break=regional_indicator
|
716
|
+
grapheme_cluster_break=spacingmark
|
717
|
+
grapheme_cluster_break=t
|
718
|
+
grapheme_cluster_break=v
|
719
|
+
grapheme_cluster_break=zwj
|
720
|
+
]
|
721
|
+
|
722
|
+
Enumerated = all[:Enumerated_V]
|
723
|
+
|
693
724
|
Emoji = all[:Emoji_V]
|
694
725
|
|
695
726
|
V1_9_0 = Category::All + POSIX + all[:V1_9_0]
|
@@ -713,5 +744,8 @@ module Regexp::Syntax
|
|
713
744
|
|
714
745
|
Map[UnicodeProperty::Type] = UnicodeProperty::All
|
715
746
|
Map[UnicodeProperty::NonType] = UnicodeProperty::All
|
747
|
+
|
748
|
+
# alias for symmetry between token symbol and Token module name
|
749
|
+
Property = UnicodeProperty
|
716
750
|
end
|
717
751
|
end
|
@@ -20,19 +20,19 @@ end
|
|
20
20
|
|
21
21
|
|
22
22
|
# Load all the token files, they will populate the Map constant.
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
23
|
+
require_relative 'token/anchor'
|
24
|
+
require_relative 'token/assertion'
|
25
|
+
require_relative 'token/backreference'
|
26
|
+
require_relative 'token/posix_class'
|
27
|
+
require_relative 'token/character_set'
|
28
|
+
require_relative 'token/character_type'
|
29
|
+
require_relative 'token/conditional'
|
30
|
+
require_relative 'token/escape'
|
31
|
+
require_relative 'token/group'
|
32
|
+
require_relative 'token/keep'
|
33
|
+
require_relative 'token/meta'
|
34
|
+
require_relative 'token/quantifier'
|
35
|
+
require_relative 'token/unicode_property'
|
36
36
|
|
37
37
|
|
38
38
|
# After loading all the tokens the map is full. Extract all tokens and types
|
@@ -37,7 +37,6 @@ module Regexp::Syntax
|
|
37
37
|
return Regexp::Syntax::Any if ['*', 'any'].include?(version.to_s)
|
38
38
|
|
39
39
|
version =~ VERSION_REGEXP || raise(InvalidVersionNameError, version)
|
40
|
-
warn_if_future_version(version)
|
41
40
|
version_const_name = "V#{version.to_s.scan(/\d+/).join('_')}"
|
42
41
|
const_get(version_const_name) || raise(UnknownSyntaxNameError, version)
|
43
42
|
end
|
@@ -63,11 +62,4 @@ module Regexp::Syntax
|
|
63
62
|
# add .99 to treat versions without a patch value as latest patch version
|
64
63
|
Gem::Version.new((name.to_s.scan(/\d+/) << 99).join('.'))
|
65
64
|
end
|
66
|
-
|
67
|
-
def warn_if_future_version(const_name)
|
68
|
-
return if comparable(const_name) < comparable('4.0.0')
|
69
|
-
|
70
|
-
warn('This library has only been tested up to Ruby 3.x, '\
|
71
|
-
"but you are running with #{const_name}")
|
72
|
-
end
|
73
65
|
end
|
@@ -3,4 +3,6 @@
|
|
3
3
|
#
|
4
4
|
# Aliases for the latest patch version are provided as 'ruby/n.n',
|
5
5
|
# e.g. 'ruby/1.9' refers to Ruby v1.9.3.
|
6
|
-
Dir[File.expand_path('../versions/*.rb', __FILE__)].sort.each { |f|
|
6
|
+
Dir[File.expand_path('../versions/*.rb', __FILE__)].sort.each { |f| require_relative f }
|
7
|
+
|
8
|
+
Regexp::Syntax::CURRENT = Regexp::Syntax.for("ruby/#{RUBY_VERSION}")
|
data/lib/regexp_parser/syntax.rb
CHANGED
data/lib/regexp_parser.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
require_relative 'regexp_parser/version'
|
2
|
+
require_relative 'regexp_parser/token'
|
3
|
+
require_relative 'regexp_parser/scanner'
|
4
|
+
require_relative 'regexp_parser/syntax'
|
5
|
+
require_relative 'regexp_parser/lexer'
|
6
|
+
require_relative 'regexp_parser/parser'
|
data/regexp_parser.gemspec
CHANGED
@@ -16,17 +16,17 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.metadata['source_code_uri'] = spec.homepage
|
17
17
|
spec.metadata['wiki_uri'] = "#{spec.homepage}/wiki"
|
18
18
|
|
19
|
-
spec.
|
20
|
-
|
19
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
20
|
+
|
21
|
+
spec.authors = ['Ammar Ali', 'Janosch Müller']
|
22
|
+
spec.email = ['ammarabuali@gmail.com', 'janosch84@gmail.com']
|
21
23
|
|
22
24
|
spec.license = 'MIT'
|
23
25
|
|
24
26
|
spec.require_paths = ['lib']
|
25
27
|
|
26
28
|
spec.files = Dir.glob('lib/**/*.{csv,rb,rl}') +
|
27
|
-
%w[Gemfile Rakefile LICENSE
|
28
|
-
|
29
|
-
spec.rdoc_options = ["--inline-source", "--charset=UTF-8"]
|
29
|
+
%w[Gemfile Rakefile LICENSE regexp_parser.gemspec]
|
30
30
|
|
31
31
|
spec.platform = Gem::Platform::RUBY
|
32
32
|
|
metadata
CHANGED
@@ -1,26 +1,26 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
|
+
- Janosch Müller
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2024-12-25 00:00:00.000000000 Z
|
12
13
|
dependencies: []
|
13
14
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
15
|
email:
|
15
16
|
- ammarabuali@gmail.com
|
17
|
+
- janosch84@gmail.com
|
16
18
|
executables: []
|
17
19
|
extensions: []
|
18
20
|
extra_rdoc_files: []
|
19
21
|
files:
|
20
|
-
- CHANGELOG.md
|
21
22
|
- Gemfile
|
22
23
|
- LICENSE
|
23
|
-
- README.md
|
24
24
|
- Rakefile
|
25
25
|
- lib/regexp_parser.rb
|
26
26
|
- lib/regexp_parser/error.rb
|
@@ -43,10 +43,16 @@ files:
|
|
43
43
|
- lib/regexp_parser/expression/classes/root.rb
|
44
44
|
- lib/regexp_parser/expression/classes/unicode_property.rb
|
45
45
|
- lib/regexp_parser/expression/methods/construct.rb
|
46
|
+
- lib/regexp_parser/expression/methods/escape_sequence_char.rb
|
47
|
+
- lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb
|
46
48
|
- lib/regexp_parser/expression/methods/human_name.rb
|
47
49
|
- lib/regexp_parser/expression/methods/match.rb
|
48
50
|
- lib/regexp_parser/expression/methods/match_length.rb
|
51
|
+
- lib/regexp_parser/expression/methods/negative.rb
|
49
52
|
- lib/regexp_parser/expression/methods/options.rb
|
53
|
+
- lib/regexp_parser/expression/methods/parts.rb
|
54
|
+
- lib/regexp_parser/expression/methods/printing.rb
|
55
|
+
- lib/regexp_parser/expression/methods/referenced_expressions.rb
|
50
56
|
- lib/regexp_parser/expression/methods/strfregexp.rb
|
51
57
|
- lib/regexp_parser/expression/methods/tests.rb
|
52
58
|
- lib/regexp_parser/expression/methods/traverse.rb
|
@@ -59,6 +65,9 @@ files:
|
|
59
65
|
- lib/regexp_parser/parser.rb
|
60
66
|
- lib/regexp_parser/scanner.rb
|
61
67
|
- lib/regexp_parser/scanner/char_type.rl
|
68
|
+
- lib/regexp_parser/scanner/errors/premature_end_error.rb
|
69
|
+
- lib/regexp_parser/scanner/errors/scanner_error.rb
|
70
|
+
- lib/regexp_parser/scanner/errors/validation_error.rb
|
62
71
|
- lib/regexp_parser/scanner/properties/long.csv
|
63
72
|
- lib/regexp_parser/scanner/properties/short.csv
|
64
73
|
- lib/regexp_parser/scanner/property.rl
|
@@ -80,6 +89,7 @@ files:
|
|
80
89
|
- lib/regexp_parser/syntax/token/posix_class.rb
|
81
90
|
- lib/regexp_parser/syntax/token/quantifier.rb
|
82
91
|
- lib/regexp_parser/syntax/token/unicode_property.rb
|
92
|
+
- lib/regexp_parser/syntax/token/virtual.rb
|
83
93
|
- lib/regexp_parser/syntax/version_lookup.rb
|
84
94
|
- lib/regexp_parser/syntax/versions.rb
|
85
95
|
- lib/regexp_parser/syntax/versions/1.8.6.rb
|
@@ -108,10 +118,9 @@ metadata:
|
|
108
118
|
homepage_uri: https://github.com/ammar/regexp_parser
|
109
119
|
source_code_uri: https://github.com/ammar/regexp_parser
|
110
120
|
wiki_uri: https://github.com/ammar/regexp_parser/wiki
|
121
|
+
rubygems_mfa_required: 'true'
|
111
122
|
post_install_message:
|
112
|
-
rdoc_options:
|
113
|
-
- "--inline-source"
|
114
|
-
- "--charset=UTF-8"
|
123
|
+
rdoc_options: []
|
115
124
|
require_paths:
|
116
125
|
- lib
|
117
126
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -125,7 +134,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
125
134
|
- !ruby/object:Gem::Version
|
126
135
|
version: '0'
|
127
136
|
requirements: []
|
128
|
-
rubygems_version: 3.
|
137
|
+
rubygems_version: 3.4.13
|
129
138
|
signing_key:
|
130
139
|
specification_version: 4
|
131
140
|
summary: Scanner, lexer, parser for ruby's regular expressions
|