regexp_parser 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b84a4bb274f31b8608c7dc9d55ff6f1b8d92d0d147976f38079ae7701a6debe
4
- data.tar.gz: 41db5f094d0beafade30a1fac2707cbc827831e818c485ad35d7173f18c6a91a
3
+ metadata.gz: f871ec3cdea5a594f72f5386f1b344710e6204f7307ba40d966653197f526be8
4
+ data.tar.gz: dd93c880f29ec77531faa2379fbfc8e34a9b67680664c6a3477d38afeaa1809a
5
5
  SHA512:
6
- metadata.gz: 5dcde6135ac42db609402e47e04ee3be1da8854de286d2baad15dafee04d451814fd7a3bae7adc5440a1fced811e242b69f5fd14bcfc4f3bd5091f86769d56be
7
- data.tar.gz: 2660d0fb28a972a1de53b71b16f8591e573d4214724b5eea8a452549598ff5d0fc5b731149e8332f65bce01c812f4d0d72135bba7e3016064d9f05202a8b5580
6
+ metadata.gz: 45e52ab0ce7bec3e4a275efa3828532778c49e8d36eec1ea82a43755a87abc9eee97e986027aa8f5c64fd604f15164d2ad4f37e5d6e22a5a1e3e9da6788271b9
7
+ data.tar.gz: 1f5514f3252294d9fe0877cff1d8b0db0400838c97ed78d15bbb794b94595c20d081681e4b1fe9bb6c89be7749514d8b2b8cf385360d002cd89e2a76ce6d2e63
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ### Added
4
+
5
+ - `Regexp::Expression::Base.construct` and `.token_class` methods
6
+
3
7
  ## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
4
8
 
5
9
  ### Fixed
@@ -36,10 +40,12 @@
36
40
 
37
41
  It will no longer be supported in regexp_parser v3.0.0.
38
42
 
39
- Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode`
40
- with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode
43
+ Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
44
+ with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
41
45
  will be derived automatically.
42
46
 
47
+ Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
48
+
43
49
  This is consistent with how Expression::Base instances are created.
44
50
 
45
51
 
data/README.md CHANGED
@@ -447,12 +447,14 @@ Projects using regexp_parser.
447
447
 
448
448
  - [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool that uses regexp_parser to convert Regexps to css/xpath selectors.
449
449
 
450
- - [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
450
+ - [js_regex](https://github.com/jaynetics/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
451
451
 
452
452
  - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
453
453
 
454
454
  - [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
455
455
 
456
+ - [repper](https://github.com/jaynetics/repper) is a regular expression pretty-printer for Ruby.
457
+
456
458
  - [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
457
459
 
458
460
  - [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper that uses regexp_parser to generate examples of postal codes.
@@ -1,4 +1,5 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify name with token :backref, one way or the other, in v3.0.0
2
3
  module Backreference
3
4
  class Base < Regexp::Expression::Base
4
5
  attr_accessor :referenced_expression
@@ -1,5 +1,7 @@
1
1
  module Regexp::Expression
2
2
  module Keep
3
+ # TOOD: in regexp_parser v3.0.0 this should possibly be a Subexpression
4
+ # that contains all expressions to its left.
3
5
  class Mark < Regexp::Expression::Base; end
4
6
  end
5
7
  end
@@ -1,11 +1,9 @@
1
1
  module Regexp::Expression
2
2
  class Root < Regexp::Expression::Subexpression
3
3
  def self.build(options = {})
4
- new(build_token, options)
5
- end
6
-
7
- def self.build_token
8
- Regexp::Token.new(:expression, :root, '', 0)
4
+ warn "`#{self.class}.build(options)` is deprecated and will raise in "\
5
+ "regexp_parser v3.0.0. Please use `.construct(options: options)`."
6
+ construct(options: options)
9
7
  end
10
8
  end
11
9
  end
@@ -1,4 +1,5 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify name with token :property, on way or the other, in v3.0.0
2
3
  module UnicodeProperty
3
4
  class Base < Regexp::Expression::Base
4
5
  def negative?
@@ -0,0 +1,43 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods
4
+ # Convenience method to init a valid Expression without a Regexp::Token
5
+ def construct(params = {})
6
+ attrs = construct_defaults.merge(params)
7
+ options = attrs.delete(:options)
8
+ token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
9
+ token = Regexp::Token.new(*token_args)
10
+ raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
11
+
12
+ new(token, options)
13
+ end
14
+
15
+ def construct_defaults
16
+ if self == Root
17
+ { type: :expression, token: :root, ts: 0 }
18
+ elsif self < Sequence
19
+ { type: :expression, token: :sequence }
20
+ else
21
+ { type: token_class::Type }
22
+ end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
23
+ end
24
+
25
+ def token_class
26
+ if self == Root || self < Sequence
27
+ nil # no token class because these objects are Parser-generated
28
+ # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
+ elsif self == Alternation || self == CharacterType::Any
30
+ Regexp::Syntax::Token::Meta
31
+ elsif self <= EscapeSequence::Base
32
+ Regexp::Syntax::Token::Escape
33
+ else
34
+ Regexp::Syntax::Token.const_get(name.split('::')[2])
35
+ end
36
+ end
37
+ end
38
+
39
+ def token_class
40
+ self.class.token_class
41
+ end
42
+ end
43
+ end
@@ -112,7 +112,7 @@ module Regexp::Expression
112
112
  end
113
113
 
114
114
  def inner_match_length
115
- dummy = Regexp::Expression::Root.build
115
+ dummy = Regexp::Expression::Root.construct
116
116
  dummy.expressions = expressions.map(&:clone)
117
117
  dummy.quantifier = quantifier && quantifier.clone
118
118
  dummy.match_length
@@ -14,7 +14,7 @@ module Regexp::Expression
14
14
  deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
15
15
 
16
16
  init_from_token_and_options(*args)
17
- @mode = (token[/greedy|reluctant|possessive/] || :greedy).to_sym
17
+ @mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
18
18
  @min, @max = minmax
19
19
  # TODO: remove in v3.0.0, stop removing parts of #token (?)
20
20
  self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
@@ -44,10 +44,11 @@ module Regexp::Expression
44
44
  def deprecated_old_init(token, text, min, max, mode = :greedy)
45
45
  warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
46
46
  "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
47
- "Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode` "\
48
- "with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode "\
49
- "will be derived automatically. \nThis is consistent with how Expression::Base "\
50
- "instances are created."
47
+ "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
48
+ "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
49
+ "will be derived automatically.\n"\
50
+ "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
51
+ "This is consistent with how Expression::Base instances are created. "
51
52
  @token = token
52
53
  @text = text
53
54
  @min = min
@@ -7,31 +7,17 @@ module Regexp::Expression
7
7
  # branches, and CharacterSet::Intersection intersected sequences.
8
8
  class Sequence < Regexp::Expression::Subexpression
9
9
  class << self
10
- def add_to(subexpression, params = {}, active_opts = {})
11
- sequence = at_levels(
12
- subexpression.level,
13
- subexpression.set_level,
14
- params[:conditional_level] || subexpression.conditional_level
10
+ def add_to(exp, params = {}, active_opts = {})
11
+ sequence = construct(
12
+ level: exp.level,
13
+ set_level: exp.set_level,
14
+ conditional_level: params[:conditional_level] || exp.conditional_level,
15
15
  )
16
- sequence.nesting_level = subexpression.nesting_level + 1
16
+ sequence.nesting_level = exp.nesting_level + 1
17
17
  sequence.options = active_opts
18
- subexpression.expressions << sequence
18
+ exp.expressions << sequence
19
19
  sequence
20
20
  end
21
-
22
- def at_levels(level, set_level, conditional_level)
23
- token = Regexp::Token.new(
24
- :expression,
25
- :sequence,
26
- '',
27
- nil, # ts
28
- nil, # te
29
- level,
30
- set_level,
31
- conditional_level
32
- )
33
- new(token)
34
- end
35
21
  end
36
22
 
37
23
  def starts_at
@@ -1,7 +1,11 @@
1
1
  module Regexp::Expression
2
2
  module Shared
3
+ module ClassMethods; end # filled in ./methods/*.rb
4
+
3
5
  def self.included(mod)
4
6
  mod.class_eval do
7
+ extend Shared::ClassMethods
8
+
5
9
  attr_accessor :type, :token, :text, :ts, :te,
6
10
  :level, :set_level, :conditional_level,
7
11
  :options, :quantifier
@@ -13,6 +13,7 @@ require 'regexp_parser/expression/classes/backreference'
13
13
  require 'regexp_parser/expression/classes/character_set'
14
14
  require 'regexp_parser/expression/classes/character_set/intersection'
15
15
  require 'regexp_parser/expression/classes/character_set/range'
16
+ require 'regexp_parser/expression/classes/character_type'
16
17
  require 'regexp_parser/expression/classes/conditional'
17
18
  require 'regexp_parser/expression/classes/escape_sequence'
18
19
  require 'regexp_parser/expression/classes/free_space'
@@ -20,10 +21,10 @@ require 'regexp_parser/expression/classes/group'
20
21
  require 'regexp_parser/expression/classes/keep'
21
22
  require 'regexp_parser/expression/classes/literal'
22
23
  require 'regexp_parser/expression/classes/posix_class'
23
- require 'regexp_parser/expression/classes/property'
24
24
  require 'regexp_parser/expression/classes/root'
25
- require 'regexp_parser/expression/classes/type'
25
+ require 'regexp_parser/expression/classes/unicode_property'
26
26
 
27
+ require 'regexp_parser/expression/methods/construct'
27
28
  require 'regexp_parser/expression/methods/match'
28
29
  require 'regexp_parser/expression/methods/match_length'
29
30
  require 'regexp_parser/expression/methods/options'
@@ -23,7 +23,7 @@ class Regexp::Parser
23
23
  end
24
24
 
25
25
  def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
26
- root = Root.build(extract_options(input, options))
26
+ root = Root.construct(options: extract_options(input, options))
27
27
 
28
28
  self.root = root
29
29
  self.node = root
@@ -200,11 +200,11 @@ class Regexp::Parser
200
200
  end
201
201
 
202
202
  def captured_group_count_at_level
203
- captured_group_counts[node.level]
203
+ captured_group_counts[node]
204
204
  end
205
205
 
206
206
  def count_captured_group
207
- captured_group_counts[node.level] += 1
207
+ captured_group_counts[node] += 1
208
208
  end
209
209
 
210
210
  def close_group
@@ -475,17 +475,14 @@ class Regexp::Parser
475
475
  # description of the problem: https://github.com/ammar/regexp_parser/issues/3
476
476
  # rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
477
477
  if target_node.quantified?
478
- new_token = Regexp::Token.new(
479
- :group,
480
- :passive,
481
- '', # text (none because this group is implicit)
482
- target_node.ts,
483
- nil, # te (unused)
484
- target_node.level,
485
- target_node.set_level,
486
- target_node.conditional_level
478
+ new_group = Group::Passive.construct(
479
+ token: :passive,
480
+ ts: target_node.ts,
481
+ level: target_node.level,
482
+ set_level: target_node.set_level,
483
+ conditional_level: target_node.conditional_level,
484
+ options: active_opts,
487
485
  )
488
- new_group = Group::Passive.new(new_token, active_opts)
489
486
  new_group.implicit = true
490
487
  new_group << target_node
491
488
  increase_group_level(target_node)
@@ -1,6 +1,6 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
- # TODO: unify naming with RE::EscapeSequence, on way or the other, in v3.0.0
3
+ # TODO: unify naming with RE::EscapeSequence, one way or the other, in v3.0.0
4
4
  module Escape
5
5
  Basic = %i[backslash literal]
6
6
 
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '2.4.0'
3
+ VERSION = '2.5.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ammar Ali
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-09 00:00:00.000000000 Z
11
+ date: 2022-05-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
14
14
  email:
@@ -32,6 +32,7 @@ files:
32
32
  - lib/regexp_parser/expression/classes/character_set.rb
33
33
  - lib/regexp_parser/expression/classes/character_set/intersection.rb
34
34
  - lib/regexp_parser/expression/classes/character_set/range.rb
35
+ - lib/regexp_parser/expression/classes/character_type.rb
35
36
  - lib/regexp_parser/expression/classes/conditional.rb
36
37
  - lib/regexp_parser/expression/classes/escape_sequence.rb
37
38
  - lib/regexp_parser/expression/classes/free_space.rb
@@ -39,9 +40,9 @@ files:
39
40
  - lib/regexp_parser/expression/classes/keep.rb
40
41
  - lib/regexp_parser/expression/classes/literal.rb
41
42
  - lib/regexp_parser/expression/classes/posix_class.rb
42
- - lib/regexp_parser/expression/classes/property.rb
43
43
  - lib/regexp_parser/expression/classes/root.rb
44
- - lib/regexp_parser/expression/classes/type.rb
44
+ - lib/regexp_parser/expression/classes/unicode_property.rb
45
+ - lib/regexp_parser/expression/methods/construct.rb
45
46
  - lib/regexp_parser/expression/methods/match.rb
46
47
  - lib/regexp_parser/expression/methods/match_length.rb
47
48
  - lib/regexp_parser/expression/methods/options.rb