regexp_parser 2.4.0 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -2
- data/README.md +3 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +1 -0
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -0
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/root.rb +3 -5
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -0
- data/lib/regexp_parser/expression/methods/construct.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +6 -5
- data/lib/regexp_parser/expression/sequence.rb +7 -21
- data/lib/regexp_parser/expression/shared.rb +4 -0
- data/lib/regexp_parser/expression.rb +3 -2
- data/lib/regexp_parser/parser.rb +10 -13
- data/lib/regexp_parser/syntax/token/escape.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f871ec3cdea5a594f72f5386f1b344710e6204f7307ba40d966653197f526be8
|
4
|
+
data.tar.gz: dd93c880f29ec77531faa2379fbfc8e34a9b67680664c6a3477d38afeaa1809a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45e52ab0ce7bec3e4a275efa3828532778c49e8d36eec1ea82a43755a87abc9eee97e986027aa8f5c64fd604f15164d2ad4f37e5d6e22a5a1e3e9da6788271b9
|
7
|
+
data.tar.gz: 1f5514f3252294d9fe0877cff1d8b0db0400838c97ed78d15bbb794b94595c20d081681e4b1fe9bb6c89be7749514d8b2b8cf385360d002cd89e2a76ce6d2e63
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
### Added
|
4
|
+
|
5
|
+
- `Regexp::Expression::Base.construct` and `.token_class` methods
|
6
|
+
|
3
7
|
## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
8
|
|
5
9
|
### Fixed
|
@@ -36,10 +40,12 @@
|
|
36
40
|
|
37
41
|
It will no longer be supported in regexp_parser v3.0.0.
|
38
42
|
|
39
|
-
Please pass a Regexp::Token instead, e.g. replace `
|
40
|
-
with `::Regexp::Token.new(:quantifier,
|
43
|
+
Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
|
44
|
+
with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
|
41
45
|
will be derived automatically.
|
42
46
|
|
47
|
+
Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
|
48
|
+
|
43
49
|
This is consistent with how Expression::Base instances are created.
|
44
50
|
|
45
51
|
|
data/README.md
CHANGED
@@ -447,12 +447,14 @@ Projects using regexp_parser.
|
|
447
447
|
|
448
448
|
- [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool that uses regexp_parser to convert Regexps to css/xpath selectors.
|
449
449
|
|
450
|
-
- [js_regex](https://github.com/
|
450
|
+
- [js_regex](https://github.com/jaynetics/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
|
451
451
|
|
452
452
|
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
453
453
|
|
454
454
|
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
455
455
|
|
456
|
+
- [repper](https://github.com/jaynetics/repper) is a regular expression pretty-printer for Ruby.
|
457
|
+
|
456
458
|
- [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
|
457
459
|
|
458
460
|
- [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper that uses regexp_parser to generate examples of postal codes.
|
File without changes
|
@@ -1,11 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Root < Regexp::Expression::Subexpression
|
3
3
|
def self.build(options = {})
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
def self.build_token
|
8
|
-
Regexp::Token.new(:expression, :root, '', 0)
|
4
|
+
warn "`#{self.class}.build(options)` is deprecated and will raise in "\
|
5
|
+
"regexp_parser v3.0.0. Please use `.construct(options: options)`."
|
6
|
+
construct(options: options)
|
9
7
|
end
|
10
8
|
end
|
11
9
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods
|
4
|
+
# Convenience method to init a valid Expression without a Regexp::Token
|
5
|
+
def construct(params = {})
|
6
|
+
attrs = construct_defaults.merge(params)
|
7
|
+
options = attrs.delete(:options)
|
8
|
+
token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
|
9
|
+
token = Regexp::Token.new(*token_args)
|
10
|
+
raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
|
11
|
+
|
12
|
+
new(token, options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def construct_defaults
|
16
|
+
if self == Root
|
17
|
+
{ type: :expression, token: :root, ts: 0 }
|
18
|
+
elsif self < Sequence
|
19
|
+
{ type: :expression, token: :sequence }
|
20
|
+
else
|
21
|
+
{ type: token_class::Type }
|
22
|
+
end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
|
23
|
+
end
|
24
|
+
|
25
|
+
def token_class
|
26
|
+
if self == Root || self < Sequence
|
27
|
+
nil # no token class because these objects are Parser-generated
|
28
|
+
# TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
|
29
|
+
elsif self == Alternation || self == CharacterType::Any
|
30
|
+
Regexp::Syntax::Token::Meta
|
31
|
+
elsif self <= EscapeSequence::Base
|
32
|
+
Regexp::Syntax::Token::Escape
|
33
|
+
else
|
34
|
+
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def token_class
|
40
|
+
self.class.token_class
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -112,7 +112,7 @@ module Regexp::Expression
|
|
112
112
|
end
|
113
113
|
|
114
114
|
def inner_match_length
|
115
|
-
dummy = Regexp::Expression::Root.
|
115
|
+
dummy = Regexp::Expression::Root.construct
|
116
116
|
dummy.expressions = expressions.map(&:clone)
|
117
117
|
dummy.quantifier = quantifier && quantifier.clone
|
118
118
|
dummy.match_length
|
@@ -14,7 +14,7 @@ module Regexp::Expression
|
|
14
14
|
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
15
15
|
|
16
16
|
init_from_token_and_options(*args)
|
17
|
-
@mode = (token[/greedy|reluctant|possessive/] || :greedy).to_sym
|
17
|
+
@mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
|
18
18
|
@min, @max = minmax
|
19
19
|
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
20
20
|
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
@@ -44,10 +44,11 @@ module Regexp::Expression
|
|
44
44
|
def deprecated_old_init(token, text, min, max, mode = :greedy)
|
45
45
|
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
46
46
|
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
47
|
-
"Please pass a Regexp::Token instead, e.g. replace `
|
48
|
-
"with `::Regexp::Token.new(:quantifier,
|
49
|
-
"will be derived automatically
|
50
|
-
"
|
47
|
+
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
48
|
+
"with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
|
49
|
+
"will be derived automatically.\n"\
|
50
|
+
"Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
|
51
|
+
"This is consistent with how Expression::Base instances are created. "
|
51
52
|
@token = token
|
52
53
|
@text = text
|
53
54
|
@min = min
|
@@ -7,31 +7,17 @@ module Regexp::Expression
|
|
7
7
|
# branches, and CharacterSet::Intersection intersected sequences.
|
8
8
|
class Sequence < Regexp::Expression::Subexpression
|
9
9
|
class << self
|
10
|
-
def add_to(
|
11
|
-
sequence =
|
12
|
-
|
13
|
-
|
14
|
-
params[:conditional_level] ||
|
10
|
+
def add_to(exp, params = {}, active_opts = {})
|
11
|
+
sequence = construct(
|
12
|
+
level: exp.level,
|
13
|
+
set_level: exp.set_level,
|
14
|
+
conditional_level: params[:conditional_level] || exp.conditional_level,
|
15
15
|
)
|
16
|
-
sequence.nesting_level =
|
16
|
+
sequence.nesting_level = exp.nesting_level + 1
|
17
17
|
sequence.options = active_opts
|
18
|
-
|
18
|
+
exp.expressions << sequence
|
19
19
|
sequence
|
20
20
|
end
|
21
|
-
|
22
|
-
def at_levels(level, set_level, conditional_level)
|
23
|
-
token = Regexp::Token.new(
|
24
|
-
:expression,
|
25
|
-
:sequence,
|
26
|
-
'',
|
27
|
-
nil, # ts
|
28
|
-
nil, # te
|
29
|
-
level,
|
30
|
-
set_level,
|
31
|
-
conditional_level
|
32
|
-
)
|
33
|
-
new(token)
|
34
|
-
end
|
35
21
|
end
|
36
22
|
|
37
23
|
def starts_at
|
@@ -1,7 +1,11 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Shared
|
3
|
+
module ClassMethods; end # filled in ./methods/*.rb
|
4
|
+
|
3
5
|
def self.included(mod)
|
4
6
|
mod.class_eval do
|
7
|
+
extend Shared::ClassMethods
|
8
|
+
|
5
9
|
attr_accessor :type, :token, :text, :ts, :te,
|
6
10
|
:level, :set_level, :conditional_level,
|
7
11
|
:options, :quantifier
|
@@ -13,6 +13,7 @@ require 'regexp_parser/expression/classes/backreference'
|
|
13
13
|
require 'regexp_parser/expression/classes/character_set'
|
14
14
|
require 'regexp_parser/expression/classes/character_set/intersection'
|
15
15
|
require 'regexp_parser/expression/classes/character_set/range'
|
16
|
+
require 'regexp_parser/expression/classes/character_type'
|
16
17
|
require 'regexp_parser/expression/classes/conditional'
|
17
18
|
require 'regexp_parser/expression/classes/escape_sequence'
|
18
19
|
require 'regexp_parser/expression/classes/free_space'
|
@@ -20,10 +21,10 @@ require 'regexp_parser/expression/classes/group'
|
|
20
21
|
require 'regexp_parser/expression/classes/keep'
|
21
22
|
require 'regexp_parser/expression/classes/literal'
|
22
23
|
require 'regexp_parser/expression/classes/posix_class'
|
23
|
-
require 'regexp_parser/expression/classes/property'
|
24
24
|
require 'regexp_parser/expression/classes/root'
|
25
|
-
require 'regexp_parser/expression/classes/
|
25
|
+
require 'regexp_parser/expression/classes/unicode_property'
|
26
26
|
|
27
|
+
require 'regexp_parser/expression/methods/construct'
|
27
28
|
require 'regexp_parser/expression/methods/match'
|
28
29
|
require 'regexp_parser/expression/methods/match_length'
|
29
30
|
require 'regexp_parser/expression/methods/options'
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -23,7 +23,7 @@ class Regexp::Parser
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
26
|
-
root = Root.
|
26
|
+
root = Root.construct(options: extract_options(input, options))
|
27
27
|
|
28
28
|
self.root = root
|
29
29
|
self.node = root
|
@@ -200,11 +200,11 @@ class Regexp::Parser
|
|
200
200
|
end
|
201
201
|
|
202
202
|
def captured_group_count_at_level
|
203
|
-
captured_group_counts[node
|
203
|
+
captured_group_counts[node]
|
204
204
|
end
|
205
205
|
|
206
206
|
def count_captured_group
|
207
|
-
captured_group_counts[node
|
207
|
+
captured_group_counts[node] += 1
|
208
208
|
end
|
209
209
|
|
210
210
|
def close_group
|
@@ -475,17 +475,14 @@ class Regexp::Parser
|
|
475
475
|
# description of the problem: https://github.com/ammar/regexp_parser/issues/3
|
476
476
|
# rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
|
477
477
|
if target_node.quantified?
|
478
|
-
|
479
|
-
:
|
480
|
-
:
|
481
|
-
|
482
|
-
target_node.
|
483
|
-
|
484
|
-
|
485
|
-
target_node.set_level,
|
486
|
-
target_node.conditional_level
|
478
|
+
new_group = Group::Passive.construct(
|
479
|
+
token: :passive,
|
480
|
+
ts: target_node.ts,
|
481
|
+
level: target_node.level,
|
482
|
+
set_level: target_node.set_level,
|
483
|
+
conditional_level: target_node.conditional_level,
|
484
|
+
options: active_opts,
|
487
485
|
)
|
488
|
-
new_group = Group::Passive.new(new_token, active_opts)
|
489
486
|
new_group.implicit = true
|
490
487
|
new_group << target_node
|
491
488
|
increase_group_level(target_node)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|
@@ -32,6 +32,7 @@ files:
|
|
32
32
|
- lib/regexp_parser/expression/classes/character_set.rb
|
33
33
|
- lib/regexp_parser/expression/classes/character_set/intersection.rb
|
34
34
|
- lib/regexp_parser/expression/classes/character_set/range.rb
|
35
|
+
- lib/regexp_parser/expression/classes/character_type.rb
|
35
36
|
- lib/regexp_parser/expression/classes/conditional.rb
|
36
37
|
- lib/regexp_parser/expression/classes/escape_sequence.rb
|
37
38
|
- lib/regexp_parser/expression/classes/free_space.rb
|
@@ -39,9 +40,9 @@ files:
|
|
39
40
|
- lib/regexp_parser/expression/classes/keep.rb
|
40
41
|
- lib/regexp_parser/expression/classes/literal.rb
|
41
42
|
- lib/regexp_parser/expression/classes/posix_class.rb
|
42
|
-
- lib/regexp_parser/expression/classes/property.rb
|
43
43
|
- lib/regexp_parser/expression/classes/root.rb
|
44
|
-
- lib/regexp_parser/expression/classes/
|
44
|
+
- lib/regexp_parser/expression/classes/unicode_property.rb
|
45
|
+
- lib/regexp_parser/expression/methods/construct.rb
|
45
46
|
- lib/regexp_parser/expression/methods/match.rb
|
46
47
|
- lib/regexp_parser/expression/methods/match_length.rb
|
47
48
|
- lib/regexp_parser/expression/methods/options.rb
|