regexp_parser 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -2
- data/README.md +3 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +1 -0
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -0
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/root.rb +3 -5
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -0
- data/lib/regexp_parser/expression/methods/construct.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +6 -5
- data/lib/regexp_parser/expression/sequence.rb +7 -21
- data/lib/regexp_parser/expression/shared.rb +4 -0
- data/lib/regexp_parser/expression.rb +3 -2
- data/lib/regexp_parser/parser.rb +10 -13
- data/lib/regexp_parser/syntax/token/escape.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f871ec3cdea5a594f72f5386f1b344710e6204f7307ba40d966653197f526be8
|
4
|
+
data.tar.gz: dd93c880f29ec77531faa2379fbfc8e34a9b67680664c6a3477d38afeaa1809a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45e52ab0ce7bec3e4a275efa3828532778c49e8d36eec1ea82a43755a87abc9eee97e986027aa8f5c64fd604f15164d2ad4f37e5d6e22a5a1e3e9da6788271b9
|
7
|
+
data.tar.gz: 1f5514f3252294d9fe0877cff1d8b0db0400838c97ed78d15bbb794b94595c20d081681e4b1fe9bb6c89be7749514d8b2b8cf385360d002cd89e2a76ce6d2e63
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
### Added
|
4
|
+
|
5
|
+
- `Regexp::Expression::Base.construct` and `.token_class` methods
|
6
|
+
|
3
7
|
## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
8
|
|
5
9
|
### Fixed
|
@@ -36,10 +40,12 @@
|
|
36
40
|
|
37
41
|
It will no longer be supported in regexp_parser v3.0.0.
|
38
42
|
|
39
|
-
Please pass a Regexp::Token instead, e.g. replace `
|
40
|
-
with `::Regexp::Token.new(:quantifier,
|
43
|
+
Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
|
44
|
+
with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
|
41
45
|
will be derived automatically.
|
42
46
|
|
47
|
+
Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
|
48
|
+
|
43
49
|
This is consistent with how Expression::Base instances are created.
|
44
50
|
|
45
51
|
|
data/README.md
CHANGED
@@ -447,12 +447,14 @@ Projects using regexp_parser.
|
|
447
447
|
|
448
448
|
- [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool that uses regexp_parser to convert Regexps to css/xpath selectors.
|
449
449
|
|
450
|
-
- [js_regex](https://github.com/
|
450
|
+
- [js_regex](https://github.com/jaynetics/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
|
451
451
|
|
452
452
|
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
453
453
|
|
454
454
|
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
455
455
|
|
456
|
+
- [repper](https://github.com/jaynetics/repper) is a regular expression pretty-printer for Ruby.
|
457
|
+
|
456
458
|
- [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
|
457
459
|
|
458
460
|
- [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper that uses regexp_parser to generate examples of postal codes.
|
File without changes
|
@@ -1,11 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Root < Regexp::Expression::Subexpression
|
3
3
|
def self.build(options = {})
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
def self.build_token
|
8
|
-
Regexp::Token.new(:expression, :root, '', 0)
|
4
|
+
warn "`#{self.class}.build(options)` is deprecated and will raise in "\
|
5
|
+
"regexp_parser v3.0.0. Please use `.construct(options: options)`."
|
6
|
+
construct(options: options)
|
9
7
|
end
|
10
8
|
end
|
11
9
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods
|
4
|
+
# Convenience method to init a valid Expression without a Regexp::Token
|
5
|
+
def construct(params = {})
|
6
|
+
attrs = construct_defaults.merge(params)
|
7
|
+
options = attrs.delete(:options)
|
8
|
+
token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
|
9
|
+
token = Regexp::Token.new(*token_args)
|
10
|
+
raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
|
11
|
+
|
12
|
+
new(token, options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def construct_defaults
|
16
|
+
if self == Root
|
17
|
+
{ type: :expression, token: :root, ts: 0 }
|
18
|
+
elsif self < Sequence
|
19
|
+
{ type: :expression, token: :sequence }
|
20
|
+
else
|
21
|
+
{ type: token_class::Type }
|
22
|
+
end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
|
23
|
+
end
|
24
|
+
|
25
|
+
def token_class
|
26
|
+
if self == Root || self < Sequence
|
27
|
+
nil # no token class because these objects are Parser-generated
|
28
|
+
# TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
|
29
|
+
elsif self == Alternation || self == CharacterType::Any
|
30
|
+
Regexp::Syntax::Token::Meta
|
31
|
+
elsif self <= EscapeSequence::Base
|
32
|
+
Regexp::Syntax::Token::Escape
|
33
|
+
else
|
34
|
+
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def token_class
|
40
|
+
self.class.token_class
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -112,7 +112,7 @@ module Regexp::Expression
|
|
112
112
|
end
|
113
113
|
|
114
114
|
def inner_match_length
|
115
|
-
dummy = Regexp::Expression::Root.
|
115
|
+
dummy = Regexp::Expression::Root.construct
|
116
116
|
dummy.expressions = expressions.map(&:clone)
|
117
117
|
dummy.quantifier = quantifier && quantifier.clone
|
118
118
|
dummy.match_length
|
@@ -14,7 +14,7 @@ module Regexp::Expression
|
|
14
14
|
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
15
15
|
|
16
16
|
init_from_token_and_options(*args)
|
17
|
-
@mode = (token[/greedy|reluctant|possessive/] || :greedy).to_sym
|
17
|
+
@mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
|
18
18
|
@min, @max = minmax
|
19
19
|
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
20
20
|
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
@@ -44,10 +44,11 @@ module Regexp::Expression
|
|
44
44
|
def deprecated_old_init(token, text, min, max, mode = :greedy)
|
45
45
|
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
46
46
|
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
47
|
-
"Please pass a Regexp::Token instead, e.g. replace `
|
48
|
-
"with `::Regexp::Token.new(:quantifier,
|
49
|
-
"will be derived automatically
|
50
|
-
"
|
47
|
+
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
48
|
+
"with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
|
49
|
+
"will be derived automatically.\n"\
|
50
|
+
"Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
|
51
|
+
"This is consistent with how Expression::Base instances are created. "
|
51
52
|
@token = token
|
52
53
|
@text = text
|
53
54
|
@min = min
|
@@ -7,31 +7,17 @@ module Regexp::Expression
|
|
7
7
|
# branches, and CharacterSet::Intersection intersected sequences.
|
8
8
|
class Sequence < Regexp::Expression::Subexpression
|
9
9
|
class << self
|
10
|
-
def add_to(
|
11
|
-
sequence =
|
12
|
-
|
13
|
-
|
14
|
-
params[:conditional_level] ||
|
10
|
+
def add_to(exp, params = {}, active_opts = {})
|
11
|
+
sequence = construct(
|
12
|
+
level: exp.level,
|
13
|
+
set_level: exp.set_level,
|
14
|
+
conditional_level: params[:conditional_level] || exp.conditional_level,
|
15
15
|
)
|
16
|
-
sequence.nesting_level =
|
16
|
+
sequence.nesting_level = exp.nesting_level + 1
|
17
17
|
sequence.options = active_opts
|
18
|
-
|
18
|
+
exp.expressions << sequence
|
19
19
|
sequence
|
20
20
|
end
|
21
|
-
|
22
|
-
def at_levels(level, set_level, conditional_level)
|
23
|
-
token = Regexp::Token.new(
|
24
|
-
:expression,
|
25
|
-
:sequence,
|
26
|
-
'',
|
27
|
-
nil, # ts
|
28
|
-
nil, # te
|
29
|
-
level,
|
30
|
-
set_level,
|
31
|
-
conditional_level
|
32
|
-
)
|
33
|
-
new(token)
|
34
|
-
end
|
35
21
|
end
|
36
22
|
|
37
23
|
def starts_at
|
@@ -1,7 +1,11 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Shared
|
3
|
+
module ClassMethods; end # filled in ./methods/*.rb
|
4
|
+
|
3
5
|
def self.included(mod)
|
4
6
|
mod.class_eval do
|
7
|
+
extend Shared::ClassMethods
|
8
|
+
|
5
9
|
attr_accessor :type, :token, :text, :ts, :te,
|
6
10
|
:level, :set_level, :conditional_level,
|
7
11
|
:options, :quantifier
|
@@ -13,6 +13,7 @@ require 'regexp_parser/expression/classes/backreference'
|
|
13
13
|
require 'regexp_parser/expression/classes/character_set'
|
14
14
|
require 'regexp_parser/expression/classes/character_set/intersection'
|
15
15
|
require 'regexp_parser/expression/classes/character_set/range'
|
16
|
+
require 'regexp_parser/expression/classes/character_type'
|
16
17
|
require 'regexp_parser/expression/classes/conditional'
|
17
18
|
require 'regexp_parser/expression/classes/escape_sequence'
|
18
19
|
require 'regexp_parser/expression/classes/free_space'
|
@@ -20,10 +21,10 @@ require 'regexp_parser/expression/classes/group'
|
|
20
21
|
require 'regexp_parser/expression/classes/keep'
|
21
22
|
require 'regexp_parser/expression/classes/literal'
|
22
23
|
require 'regexp_parser/expression/classes/posix_class'
|
23
|
-
require 'regexp_parser/expression/classes/property'
|
24
24
|
require 'regexp_parser/expression/classes/root'
|
25
|
-
require 'regexp_parser/expression/classes/
|
25
|
+
require 'regexp_parser/expression/classes/unicode_property'
|
26
26
|
|
27
|
+
require 'regexp_parser/expression/methods/construct'
|
27
28
|
require 'regexp_parser/expression/methods/match'
|
28
29
|
require 'regexp_parser/expression/methods/match_length'
|
29
30
|
require 'regexp_parser/expression/methods/options'
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -23,7 +23,7 @@ class Regexp::Parser
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
26
|
-
root = Root.
|
26
|
+
root = Root.construct(options: extract_options(input, options))
|
27
27
|
|
28
28
|
self.root = root
|
29
29
|
self.node = root
|
@@ -200,11 +200,11 @@ class Regexp::Parser
|
|
200
200
|
end
|
201
201
|
|
202
202
|
def captured_group_count_at_level
|
203
|
-
captured_group_counts[node
|
203
|
+
captured_group_counts[node]
|
204
204
|
end
|
205
205
|
|
206
206
|
def count_captured_group
|
207
|
-
captured_group_counts[node
|
207
|
+
captured_group_counts[node] += 1
|
208
208
|
end
|
209
209
|
|
210
210
|
def close_group
|
@@ -475,17 +475,14 @@ class Regexp::Parser
|
|
475
475
|
# description of the problem: https://github.com/ammar/regexp_parser/issues/3
|
476
476
|
# rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
|
477
477
|
if target_node.quantified?
|
478
|
-
|
479
|
-
:
|
480
|
-
:
|
481
|
-
|
482
|
-
target_node.
|
483
|
-
|
484
|
-
|
485
|
-
target_node.set_level,
|
486
|
-
target_node.conditional_level
|
478
|
+
new_group = Group::Passive.construct(
|
479
|
+
token: :passive,
|
480
|
+
ts: target_node.ts,
|
481
|
+
level: target_node.level,
|
482
|
+
set_level: target_node.set_level,
|
483
|
+
conditional_level: target_node.conditional_level,
|
484
|
+
options: active_opts,
|
487
485
|
)
|
488
|
-
new_group = Group::Passive.new(new_token, active_opts)
|
489
486
|
new_group.implicit = true
|
490
487
|
new_group << target_node
|
491
488
|
increase_group_level(target_node)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|
@@ -32,6 +32,7 @@ files:
|
|
32
32
|
- lib/regexp_parser/expression/classes/character_set.rb
|
33
33
|
- lib/regexp_parser/expression/classes/character_set/intersection.rb
|
34
34
|
- lib/regexp_parser/expression/classes/character_set/range.rb
|
35
|
+
- lib/regexp_parser/expression/classes/character_type.rb
|
35
36
|
- lib/regexp_parser/expression/classes/conditional.rb
|
36
37
|
- lib/regexp_parser/expression/classes/escape_sequence.rb
|
37
38
|
- lib/regexp_parser/expression/classes/free_space.rb
|
@@ -39,9 +40,9 @@ files:
|
|
39
40
|
- lib/regexp_parser/expression/classes/keep.rb
|
40
41
|
- lib/regexp_parser/expression/classes/literal.rb
|
41
42
|
- lib/regexp_parser/expression/classes/posix_class.rb
|
42
|
-
- lib/regexp_parser/expression/classes/property.rb
|
43
43
|
- lib/regexp_parser/expression/classes/root.rb
|
44
|
-
- lib/regexp_parser/expression/classes/
|
44
|
+
- lib/regexp_parser/expression/classes/unicode_property.rb
|
45
|
+
- lib/regexp_parser/expression/methods/construct.rb
|
45
46
|
- lib/regexp_parser/expression/methods/match.rb
|
46
47
|
- lib/regexp_parser/expression/methods/match_length.rb
|
47
48
|
- lib/regexp_parser/expression/methods/options.rb
|