regexp_parser 2.4.0 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b84a4bb274f31b8608c7dc9d55ff6f1b8d92d0d147976f38079ae7701a6debe
4
- data.tar.gz: 41db5f094d0beafade30a1fac2707cbc827831e818c485ad35d7173f18c6a91a
3
+ metadata.gz: f871ec3cdea5a594f72f5386f1b344710e6204f7307ba40d966653197f526be8
4
+ data.tar.gz: dd93c880f29ec77531faa2379fbfc8e34a9b67680664c6a3477d38afeaa1809a
5
5
  SHA512:
6
- metadata.gz: 5dcde6135ac42db609402e47e04ee3be1da8854de286d2baad15dafee04d451814fd7a3bae7adc5440a1fced811e242b69f5fd14bcfc4f3bd5091f86769d56be
7
- data.tar.gz: 2660d0fb28a972a1de53b71b16f8591e573d4214724b5eea8a452549598ff5d0fc5b731149e8332f65bce01c812f4d0d72135bba7e3016064d9f05202a8b5580
6
+ metadata.gz: 45e52ab0ce7bec3e4a275efa3828532778c49e8d36eec1ea82a43755a87abc9eee97e986027aa8f5c64fd604f15164d2ad4f37e5d6e22a5a1e3e9da6788271b9
7
+ data.tar.gz: 1f5514f3252294d9fe0877cff1d8b0db0400838c97ed78d15bbb794b94595c20d081681e4b1fe9bb6c89be7749514d8b2b8cf385360d002cd89e2a76ce6d2e63
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ### Added
4
+
5
+ - `Regexp::Expression::Base.construct` and `.token_class` methods
6
+
3
7
  ## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
4
8
 
5
9
  ### Fixed
@@ -36,10 +40,12 @@
36
40
 
37
41
  It will no longer be supported in regexp_parser v3.0.0.
38
42
 
39
- Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode`
40
- with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode
43
+ Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
44
+ with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
41
45
  will be derived automatically.
42
46
 
47
+ Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
48
+
43
49
  This is consistent with how Expression::Base instances are created.
44
50
 
45
51
 
data/README.md CHANGED
@@ -447,12 +447,14 @@ Projects using regexp_parser.
447
447
 
448
448
  - [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool that uses regexp_parser to convert Regexps to css/xpath selectors.
449
449
 
450
- - [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
450
+ - [js_regex](https://github.com/jaynetics/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
451
451
 
452
452
  - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
453
453
 
454
454
  - [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
455
455
 
456
+ - [repper](https://github.com/jaynetics/repper) is a regular expression pretty-printer for Ruby.
457
+
456
458
  - [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
457
459
 
458
460
  - [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper that uses regexp_parser to generate examples of postal codes.
@@ -1,4 +1,5 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify name with token :backref, one way or the other, in v3.0.0
2
3
  module Backreference
3
4
  class Base < Regexp::Expression::Base
4
5
  attr_accessor :referenced_expression
@@ -1,5 +1,7 @@
1
1
  module Regexp::Expression
2
2
  module Keep
3
+ # TOOD: in regexp_parser v3.0.0 this should possibly be a Subexpression
4
+ # that contains all expressions to its left.
3
5
  class Mark < Regexp::Expression::Base; end
4
6
  end
5
7
  end
@@ -1,11 +1,9 @@
1
1
  module Regexp::Expression
2
2
  class Root < Regexp::Expression::Subexpression
3
3
  def self.build(options = {})
4
- new(build_token, options)
5
- end
6
-
7
- def self.build_token
8
- Regexp::Token.new(:expression, :root, '', 0)
4
+ warn "`#{self.class}.build(options)` is deprecated and will raise in "\
5
+ "regexp_parser v3.0.0. Please use `.construct(options: options)`."
6
+ construct(options: options)
9
7
  end
10
8
  end
11
9
  end
@@ -1,4 +1,5 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify name with token :property, on way or the other, in v3.0.0
2
3
  module UnicodeProperty
3
4
  class Base < Regexp::Expression::Base
4
5
  def negative?
@@ -0,0 +1,43 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods
4
+ # Convenience method to init a valid Expression without a Regexp::Token
5
+ def construct(params = {})
6
+ attrs = construct_defaults.merge(params)
7
+ options = attrs.delete(:options)
8
+ token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
9
+ token = Regexp::Token.new(*token_args)
10
+ raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
11
+
12
+ new(token, options)
13
+ end
14
+
15
+ def construct_defaults
16
+ if self == Root
17
+ { type: :expression, token: :root, ts: 0 }
18
+ elsif self < Sequence
19
+ { type: :expression, token: :sequence }
20
+ else
21
+ { type: token_class::Type }
22
+ end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
23
+ end
24
+
25
+ def token_class
26
+ if self == Root || self < Sequence
27
+ nil # no token class because these objects are Parser-generated
28
+ # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
+ elsif self == Alternation || self == CharacterType::Any
30
+ Regexp::Syntax::Token::Meta
31
+ elsif self <= EscapeSequence::Base
32
+ Regexp::Syntax::Token::Escape
33
+ else
34
+ Regexp::Syntax::Token.const_get(name.split('::')[2])
35
+ end
36
+ end
37
+ end
38
+
39
+ def token_class
40
+ self.class.token_class
41
+ end
42
+ end
43
+ end
@@ -112,7 +112,7 @@ module Regexp::Expression
112
112
  end
113
113
 
114
114
  def inner_match_length
115
- dummy = Regexp::Expression::Root.build
115
+ dummy = Regexp::Expression::Root.construct
116
116
  dummy.expressions = expressions.map(&:clone)
117
117
  dummy.quantifier = quantifier && quantifier.clone
118
118
  dummy.match_length
@@ -14,7 +14,7 @@ module Regexp::Expression
14
14
  deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
15
15
 
16
16
  init_from_token_and_options(*args)
17
- @mode = (token[/greedy|reluctant|possessive/] || :greedy).to_sym
17
+ @mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
18
18
  @min, @max = minmax
19
19
  # TODO: remove in v3.0.0, stop removing parts of #token (?)
20
20
  self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
@@ -44,10 +44,11 @@ module Regexp::Expression
44
44
  def deprecated_old_init(token, text, min, max, mode = :greedy)
45
45
  warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
46
46
  "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
47
- "Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode` "\
48
- "with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode "\
49
- "will be derived automatically. \nThis is consistent with how Expression::Base "\
50
- "instances are created."
47
+ "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
48
+ "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
49
+ "will be derived automatically.\n"\
50
+ "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
51
+ "This is consistent with how Expression::Base instances are created. "
51
52
  @token = token
52
53
  @text = text
53
54
  @min = min
@@ -7,31 +7,17 @@ module Regexp::Expression
7
7
  # branches, and CharacterSet::Intersection intersected sequences.
8
8
  class Sequence < Regexp::Expression::Subexpression
9
9
  class << self
10
- def add_to(subexpression, params = {}, active_opts = {})
11
- sequence = at_levels(
12
- subexpression.level,
13
- subexpression.set_level,
14
- params[:conditional_level] || subexpression.conditional_level
10
+ def add_to(exp, params = {}, active_opts = {})
11
+ sequence = construct(
12
+ level: exp.level,
13
+ set_level: exp.set_level,
14
+ conditional_level: params[:conditional_level] || exp.conditional_level,
15
15
  )
16
- sequence.nesting_level = subexpression.nesting_level + 1
16
+ sequence.nesting_level = exp.nesting_level + 1
17
17
  sequence.options = active_opts
18
- subexpression.expressions << sequence
18
+ exp.expressions << sequence
19
19
  sequence
20
20
  end
21
-
22
- def at_levels(level, set_level, conditional_level)
23
- token = Regexp::Token.new(
24
- :expression,
25
- :sequence,
26
- '',
27
- nil, # ts
28
- nil, # te
29
- level,
30
- set_level,
31
- conditional_level
32
- )
33
- new(token)
34
- end
35
21
  end
36
22
 
37
23
  def starts_at
@@ -1,7 +1,11 @@
1
1
  module Regexp::Expression
2
2
  module Shared
3
+ module ClassMethods; end # filled in ./methods/*.rb
4
+
3
5
  def self.included(mod)
4
6
  mod.class_eval do
7
+ extend Shared::ClassMethods
8
+
5
9
  attr_accessor :type, :token, :text, :ts, :te,
6
10
  :level, :set_level, :conditional_level,
7
11
  :options, :quantifier
@@ -13,6 +13,7 @@ require 'regexp_parser/expression/classes/backreference'
13
13
  require 'regexp_parser/expression/classes/character_set'
14
14
  require 'regexp_parser/expression/classes/character_set/intersection'
15
15
  require 'regexp_parser/expression/classes/character_set/range'
16
+ require 'regexp_parser/expression/classes/character_type'
16
17
  require 'regexp_parser/expression/classes/conditional'
17
18
  require 'regexp_parser/expression/classes/escape_sequence'
18
19
  require 'regexp_parser/expression/classes/free_space'
@@ -20,10 +21,10 @@ require 'regexp_parser/expression/classes/group'
20
21
  require 'regexp_parser/expression/classes/keep'
21
22
  require 'regexp_parser/expression/classes/literal'
22
23
  require 'regexp_parser/expression/classes/posix_class'
23
- require 'regexp_parser/expression/classes/property'
24
24
  require 'regexp_parser/expression/classes/root'
25
- require 'regexp_parser/expression/classes/type'
25
+ require 'regexp_parser/expression/classes/unicode_property'
26
26
 
27
+ require 'regexp_parser/expression/methods/construct'
27
28
  require 'regexp_parser/expression/methods/match'
28
29
  require 'regexp_parser/expression/methods/match_length'
29
30
  require 'regexp_parser/expression/methods/options'
@@ -23,7 +23,7 @@ class Regexp::Parser
23
23
  end
24
24
 
25
25
  def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
26
- root = Root.build(extract_options(input, options))
26
+ root = Root.construct(options: extract_options(input, options))
27
27
 
28
28
  self.root = root
29
29
  self.node = root
@@ -200,11 +200,11 @@ class Regexp::Parser
200
200
  end
201
201
 
202
202
  def captured_group_count_at_level
203
- captured_group_counts[node.level]
203
+ captured_group_counts[node]
204
204
  end
205
205
 
206
206
  def count_captured_group
207
- captured_group_counts[node.level] += 1
207
+ captured_group_counts[node] += 1
208
208
  end
209
209
 
210
210
  def close_group
@@ -475,17 +475,14 @@ class Regexp::Parser
475
475
  # description of the problem: https://github.com/ammar/regexp_parser/issues/3
476
476
  # rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
477
477
  if target_node.quantified?
478
- new_token = Regexp::Token.new(
479
- :group,
480
- :passive,
481
- '', # text (none because this group is implicit)
482
- target_node.ts,
483
- nil, # te (unused)
484
- target_node.level,
485
- target_node.set_level,
486
- target_node.conditional_level
478
+ new_group = Group::Passive.construct(
479
+ token: :passive,
480
+ ts: target_node.ts,
481
+ level: target_node.level,
482
+ set_level: target_node.set_level,
483
+ conditional_level: target_node.conditional_level,
484
+ options: active_opts,
487
485
  )
488
- new_group = Group::Passive.new(new_token, active_opts)
489
486
  new_group.implicit = true
490
487
  new_group << target_node
491
488
  increase_group_level(target_node)
@@ -1,6 +1,6 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
- # TODO: unify naming with RE::EscapeSequence, on way or the other, in v3.0.0
3
+ # TODO: unify naming with RE::EscapeSequence, one way or the other, in v3.0.0
4
4
  module Escape
5
5
  Basic = %i[backslash literal]
6
6
 
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '2.4.0'
3
+ VERSION = '2.5.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ammar Ali
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-09 00:00:00.000000000 Z
11
+ date: 2022-05-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
14
14
  email:
@@ -32,6 +32,7 @@ files:
32
32
  - lib/regexp_parser/expression/classes/character_set.rb
33
33
  - lib/regexp_parser/expression/classes/character_set/intersection.rb
34
34
  - lib/regexp_parser/expression/classes/character_set/range.rb
35
+ - lib/regexp_parser/expression/classes/character_type.rb
35
36
  - lib/regexp_parser/expression/classes/conditional.rb
36
37
  - lib/regexp_parser/expression/classes/escape_sequence.rb
37
38
  - lib/regexp_parser/expression/classes/free_space.rb
@@ -39,9 +40,9 @@ files:
39
40
  - lib/regexp_parser/expression/classes/keep.rb
40
41
  - lib/regexp_parser/expression/classes/literal.rb
41
42
  - lib/regexp_parser/expression/classes/posix_class.rb
42
- - lib/regexp_parser/expression/classes/property.rb
43
43
  - lib/regexp_parser/expression/classes/root.rb
44
- - lib/regexp_parser/expression/classes/type.rb
44
+ - lib/regexp_parser/expression/classes/unicode_property.rb
45
+ - lib/regexp_parser/expression/methods/construct.rb
45
46
  - lib/regexp_parser/expression/methods/match.rb
46
47
  - lib/regexp_parser/expression/methods/match_length.rb
47
48
  - lib/regexp_parser/expression/methods/options.rb