regexp_parser 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +15 -21
  4. data/Rakefile +5 -11
  5. data/lib/regexp_parser/expression/base.rb +123 -0
  6. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  7. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
  8. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  9. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
  10. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  11. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
  12. data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
  13. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  14. data/lib/regexp_parser/expression/classes/property.rb +0 -2
  15. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  16. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  17. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  18. data/lib/regexp_parser/expression/sequence.rb +0 -1
  19. data/lib/regexp_parser/expression/subexpression.rb +0 -1
  20. data/lib/regexp_parser/expression.rb +6 -130
  21. data/lib/regexp_parser/lexer.rb +7 -5
  22. data/lib/regexp_parser/scanner/properties/long.yml +13 -0
  23. data/lib/regexp_parser/scanner/properties/short.yml +9 -1
  24. data/lib/regexp_parser/syntax/any.rb +1 -3
  25. data/lib/regexp_parser/syntax/base.rb +9 -9
  26. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  27. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  28. data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
  29. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  30. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  31. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  32. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  33. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  34. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  35. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  36. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  37. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  38. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  39. data/lib/regexp_parser/syntax/token.rb +45 -0
  40. data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
  41. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  42. data/lib/regexp_parser/syntax.rb +1 -1
  43. data/lib/regexp_parser/token.rb +9 -20
  44. data/lib/regexp_parser/version.rb +1 -1
  45. data/lib/regexp_parser.rb +0 -2
  46. data/spec/lexer/nesting_spec.rb +2 -2
  47. data/spec/parser/escapes_spec.rb +43 -31
  48. data/spec/parser/properties_spec.rb +6 -4
  49. data/spec/parser/set/ranges_spec.rb +26 -16
  50. data/spec/scanner/escapes_spec.rb +28 -19
  51. data/spec/scanner/sets_spec.rb +9 -9
  52. data/spec/spec_helper.rb +13 -1
  53. data/spec/support/capturing_stderr.rb +9 -0
  54. data/spec/syntax/versions/1.8.6_spec.rb +2 -2
  55. data/spec/syntax/versions/2.0.0_spec.rb +2 -2
  56. data/spec/syntax/versions/aliases_spec.rb +1 -0
  57. metadata +26 -26
  58. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  59. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  60. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  61. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  62. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  63. data/spec/support/runner.rb +0 -42
  64. data/spec/support/warning_extractor.rb +0 -60
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 077b8a0c90d90cf46e44671ec1335a5373eef72c61a0bcf4de43ba5217a188c3
4
- data.tar.gz: b9aed868af73adcdf40c09720c5d10091b25a53b25a792717ceb5591039a2931
3
+ metadata.gz: 332259c898b9b344e10961053bb2b761f4dd5530182a5f6195639dba9cbb99f9
4
+ data.tar.gz: b537f9bd23db799ee562494633f1e8423501651540a04b634ae07dfe8f3b19c3
5
5
  SHA512:
6
- metadata.gz: 9c04d9a6434c6e3f322e97e8e2a1c86b3ddda88bd8821368a37b92f5836e4c3df1dc27a79165303420c3e8d5eea31bda1483824da01a40ce30961b645ba65ddd
7
- data.tar.gz: 01e5c261e9dca0c4df7c696128dbc0520ca40aa6b9393cc8d6c3bdb8386470aeb773566000b811f98c1407038216c8d2c0b444c7955ea5a881ac759796f8a440
6
+ metadata.gz: 393ecc1cc20189e4a79252e6acf6dab7dd6dc07ba9c47ae7479746eaf8ebe2ccfd1ebcb82fd027edc2c5c938eb490f2f36a93587d2405a54017e0e2727a35a15
7
+ data.tar.gz: 6c961232ce5f3f409c91d0b66dd23c809e92f47aa6c1f94f2f1929e8eeccfb4bc25fcdf5935fc968d7e0c0ae632992a6d38bc8e982858f2da996a8eac54d3c89
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
4
+
5
+ ### Added
6
+
7
+ - Added support for 13 new unicode properties introduced in Ruby 3.1.0-dev
8
+
3
9
  ## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
4
10
 
5
11
  ### Fixed
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
1
  # Regexp::Parser
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser) [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
3
+ [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
4
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions)
5
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions)
6
+ [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
4
7
 
5
8
  A Ruby gem for tokenizing, parsing, and transforming regular expressions.
6
9
 
@@ -357,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
357
360
  |   _**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | ✓ |
358
361
  |   _**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | ✓ |
359
362
  | **String Escapes** | | ⋱ |
360
- |   _**Control**_ | `\C-C`, `\cD` | ✓ |
363
+ |   _**Control** \[1\]_ | `\C-C`, `\cD` | ✓ |
361
364
  |   _**Hex**_ | `\x20`, `\x{701230}` | ✓ |
362
- |   _**Meta**_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
365
+ |   _**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
363
366
  |   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
364
367
  |   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
365
- | **Unicode Properties** | _<sub>([Unicode 11.0.0](http://www.unicode.org/versions/Unicode11.0.0/))</sub>_ | &#x22f1; |
368
+ | **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
366
369
  | &emsp;&nbsp;_**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | &#x2713; |
367
370
  | &emsp;&nbsp;_**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | &#x2713; |
368
371
  | &emsp;&nbsp;_**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | &#x2713; |
@@ -371,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
371
374
  | &emsp;&nbsp;_**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | &#x2713; |
372
375
  | &emsp;&nbsp;_**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | &#x2713; |
373
376
 
377
+ **\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
378
+ https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
379
+ scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
380
+
374
381
  ##### Inapplicable Features
375
382
 
376
383
  Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
@@ -384,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
384
391
  - **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
385
392
  - **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
386
393
 
387
-
388
394
  See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
389
395
 
390
396
  _**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
@@ -392,26 +398,14 @@ or incorrectly return tokens/objects as literals._
392
398
 
393
399
 
394
400
  ## Testing
395
- To run the tests simply run rake from the root directory, as 'test' is the default task.
396
-
397
- It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
398
-
399
- The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
400
-
401
- ```
402
- bin/test
403
- ```
404
-
405
- You can run a specific test like so:
401
+ To run the tests simply run rake from the root directory.
406
402
 
407
- ```
408
- bin/test spec/scanner/properties_spec.rb
409
- ```
403
+ The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
410
404
 
411
- Note that changes to Ragel files will not be reflected when running `rspec` or `bin/test`, so you might want to run:
405
+ Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
412
406
 
413
407
  ```
414
- rake ragel:rb && bin/test spec/scanner/properties_spec.rb
408
+ rake ragel:rb && rspec spec/scanner/properties_spec.rb
415
409
  ```
416
410
 
417
411
  ## Building
data/Rakefile CHANGED
@@ -1,26 +1,22 @@
1
+ require 'bundler'
1
2
  require 'rubygems'
2
-
3
+ require 'rubygems/package_task'
3
4
  require 'rake'
4
5
  require 'rake/testtask'
5
-
6
- require 'bundler'
7
- require 'rubygems/package_task'
8
-
6
+ require 'rspec/core/rake_task'
9
7
 
10
8
  RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
11
9
  RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
12
10
  RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
13
11
 
14
-
15
12
  Bundler::GemHelper.install_tasks
16
13
 
14
+ RSpec::Core::RakeTask.new(:spec)
17
15
 
18
16
  task :default => [:'test:full']
19
17
 
20
18
  namespace :test do
21
- task full: :'ragel:rb' do
22
- sh 'bin/test'
23
- end
19
+ task full: [:'ragel:rb', :spec]
24
20
  end
25
21
 
26
22
  namespace :ragel do
@@ -49,13 +45,11 @@ namespace :ragel do
49
45
  end
50
46
  end
51
47
 
52
-
53
48
  # Add ragel task as a prerequisite for building the gem to ensure that the
54
49
  # latest scanner code is generated and included in the build.
55
50
  desc "Runs ragel:rb before building the gem"
56
51
  task :build => ['ragel:rb']
57
52
 
58
-
59
53
  namespace :props do
60
54
  desc 'Write new property value hashes for the properties scanner'
61
55
  task :update do
@@ -0,0 +1,123 @@
1
+ module Regexp::Expression
2
+ class Base
3
+ attr_accessor :type, :token
4
+ attr_accessor :text, :ts
5
+ attr_accessor :level, :set_level, :conditional_level, :nesting_level
6
+
7
+ attr_accessor :quantifier
8
+ attr_accessor :options
9
+
10
+ def initialize(token, options = {})
11
+ self.type = token.type
12
+ self.token = token.token
13
+ self.text = token.text
14
+ self.ts = token.ts
15
+ self.level = token.level
16
+ self.set_level = token.set_level
17
+ self.conditional_level = token.conditional_level
18
+ self.nesting_level = 0
19
+ self.quantifier = nil
20
+ self.options = options
21
+ end
22
+
23
+ def initialize_copy(orig)
24
+ self.text = (orig.text ? orig.text.dup : nil)
25
+ self.options = (orig.options ? orig.options.dup : nil)
26
+ self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
27
+ super
28
+ end
29
+
30
+ def to_re(format = :full)
31
+ ::Regexp.new(to_s(format))
32
+ end
33
+
34
+ alias :starts_at :ts
35
+
36
+ def base_length
37
+ to_s(:base).length
38
+ end
39
+
40
+ def full_length
41
+ to_s.length
42
+ end
43
+
44
+ def offset
45
+ [starts_at, full_length]
46
+ end
47
+
48
+ def coded_offset
49
+ '@%d+%d' % offset
50
+ end
51
+
52
+ def to_s(format = :full)
53
+ "#{text}#{quantifier_affix(format)}"
54
+ end
55
+
56
+ def quantifier_affix(expression_format)
57
+ quantifier.to_s if quantified? && expression_format != :base
58
+ end
59
+
60
+ def terminal?
61
+ !respond_to?(:expressions)
62
+ end
63
+
64
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
65
+ self.quantifier = Quantifier.new(token, text, min, max, mode)
66
+ end
67
+
68
+ def unquantified_clone
69
+ clone.tap { |exp| exp.quantifier = nil }
70
+ end
71
+
72
+ def quantified?
73
+ !quantifier.nil?
74
+ end
75
+
76
+ # Deprecated. Prefer `#repetitions` which has a more uniform interface.
77
+ def quantity
78
+ return [nil,nil] unless quantified?
79
+ [quantifier.min, quantifier.max]
80
+ end
81
+
82
+ def repetitions
83
+ return 1..1 unless quantified?
84
+ min = quantifier.min
85
+ max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
86
+ range = min..max
87
+ # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
88
+ if RUBY_VERSION.to_f < 2.7
89
+ range.define_singleton_method(:minmax) { [min, max] }
90
+ end
91
+ range
92
+ end
93
+
94
+ def greedy?
95
+ quantified? and quantifier.greedy?
96
+ end
97
+
98
+ def reluctant?
99
+ quantified? and quantifier.reluctant?
100
+ end
101
+ alias :lazy? :reluctant?
102
+
103
+ def possessive?
104
+ quantified? and quantifier.possessive?
105
+ end
106
+
107
+ def attributes
108
+ {
109
+ type: type,
110
+ token: token,
111
+ text: to_s(:base),
112
+ starts_at: ts,
113
+ length: full_length,
114
+ level: level,
115
+ set_level: set_level,
116
+ conditional_level: conditional_level,
117
+ options: options,
118
+ quantifier: quantified? ? quantifier.to_h : nil,
119
+ }
120
+ end
121
+ alias :to_h :attributes
122
+ end
123
+ end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module Anchor
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -22,5 +21,4 @@ module Regexp::Expression
22
21
  EOS = EndOfString
23
22
  EOSobEOL = EndOfStringOrBeforeEndOfLine
24
23
  end
25
-
26
24
  end
@@ -1,4 +1,5 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
2
3
  module EscapeSequence
3
4
  class Base < Regexp::Expression::Base
4
5
  require 'yaml'
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class FreeSpace < Regexp::Expression::Base
4
3
  def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
5
4
  raise Regexp::Parser::Error, 'Can not quantify a free space object'
@@ -13,5 +12,4 @@ module Regexp::Expression
13
12
  text << exp.text
14
13
  end
15
14
  end
16
-
17
15
  end
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -116,5 +115,4 @@ module Regexp::Expression
116
115
  class Script < UnicodeProperty::Base; end
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
119
-
120
118
  end # module Regexp::Expression
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
3
  def self.build(options = {})
5
4
  new(build_token, options)
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module CharacterType
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -15,5 +14,4 @@ module Regexp::Expression
15
14
  class Linebreak < CharacterType::Base; end
16
15
  class ExtendedGrapheme < CharacterType::Base; end
17
16
  end
18
-
19
17
  end
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  class Quantifier
3
- MODES = [:greedy, :possessive, :reluctant]
3
+ MODES = %i[greedy possessive reluctant]
4
4
 
5
5
  attr_reader :token, :text, :min, :max, :mode
6
6
 
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  # A sequence of expressions. Differs from a Subexpressions by how it handles
4
3
  # quantifiers, as it applies them to its last element instead of itself as
5
4
  # a whole subexpression.
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Subexpression < Regexp::Expression::Base
4
3
  include Enumerable
5
4
 
@@ -1,130 +1,6 @@
1
1
  require 'regexp_parser/error'
2
2
 
3
- module Regexp::Expression
4
- class Base
5
- attr_accessor :type, :token
6
- attr_accessor :text, :ts
7
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
8
-
9
- attr_accessor :quantifier
10
- attr_accessor :options
11
-
12
- def initialize(token, options = {})
13
- self.type = token.type
14
- self.token = token.token
15
- self.text = token.text
16
- self.ts = token.ts
17
- self.level = token.level
18
- self.set_level = token.set_level
19
- self.conditional_level = token.conditional_level
20
- self.nesting_level = 0
21
- self.quantifier = nil
22
- self.options = options
23
- end
24
-
25
- def initialize_copy(orig)
26
- self.text = (orig.text ? orig.text.dup : nil)
27
- self.options = (orig.options ? orig.options.dup : nil)
28
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
29
- super
30
- end
31
-
32
- def to_re(format = :full)
33
- ::Regexp.new(to_s(format))
34
- end
35
-
36
- alias :starts_at :ts
37
-
38
- def base_length
39
- to_s(:base).length
40
- end
41
-
42
- def full_length
43
- to_s.length
44
- end
45
-
46
- def offset
47
- [starts_at, full_length]
48
- end
49
-
50
- def coded_offset
51
- '@%d+%d' % offset
52
- end
53
-
54
- def to_s(format = :full)
55
- "#{text}#{quantifier_affix(format)}"
56
- end
57
-
58
- def quantifier_affix(expression_format)
59
- quantifier.to_s if quantified? && expression_format != :base
60
- end
61
-
62
- def terminal?
63
- !respond_to?(:expressions)
64
- end
65
-
66
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
67
- self.quantifier = Quantifier.new(token, text, min, max, mode)
68
- end
69
-
70
- def unquantified_clone
71
- clone.tap { |exp| exp.quantifier = nil }
72
- end
73
-
74
- def quantified?
75
- !quantifier.nil?
76
- end
77
-
78
- # Deprecated. Prefer `#repetitions` which has a more uniform interface.
79
- def quantity
80
- return [nil,nil] unless quantified?
81
- [quantifier.min, quantifier.max]
82
- end
83
-
84
- def repetitions
85
- return 1..1 unless quantified?
86
- min = quantifier.min
87
- max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
88
- range = min..max
89
- # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
90
- if RUBY_VERSION.to_f < 2.7
91
- range.define_singleton_method(:minmax) { [min, max] }
92
- end
93
- range
94
- end
95
-
96
- def greedy?
97
- quantified? and quantifier.greedy?
98
- end
99
-
100
- def reluctant?
101
- quantified? and quantifier.reluctant?
102
- end
103
- alias :lazy? :reluctant?
104
-
105
- def possessive?
106
- quantified? and quantifier.possessive?
107
- end
108
-
109
- def attributes
110
- {
111
- type: type,
112
- token: token,
113
- text: to_s(:base),
114
- starts_at: ts,
115
- length: full_length,
116
- level: level,
117
- set_level: set_level,
118
- conditional_level: conditional_level,
119
- options: options,
120
- quantifier: quantified? ? quantifier.to_h : nil,
121
- }
122
- end
123
- alias :to_h :attributes
124
- end
125
-
126
- end # module Regexp::Expression
127
-
3
+ require 'regexp_parser/expression/base'
128
4
  require 'regexp_parser/expression/quantifier'
129
5
  require 'regexp_parser/expression/subexpression'
130
6
  require 'regexp_parser/expression/sequence'
@@ -132,9 +8,12 @@ require 'regexp_parser/expression/sequence_operation'
132
8
 
133
9
  require 'regexp_parser/expression/classes/alternation'
134
10
  require 'regexp_parser/expression/classes/anchor'
135
- require 'regexp_parser/expression/classes/backref'
11
+ require 'regexp_parser/expression/classes/backreference'
12
+ require 'regexp_parser/expression/classes/character_set'
13
+ require 'regexp_parser/expression/classes/character_set/intersection'
14
+ require 'regexp_parser/expression/classes/character_set/range'
136
15
  require 'regexp_parser/expression/classes/conditional'
137
- require 'regexp_parser/expression/classes/escape'
16
+ require 'regexp_parser/expression/classes/escape_sequence'
138
17
  require 'regexp_parser/expression/classes/free_space'
139
18
  require 'regexp_parser/expression/classes/group'
140
19
  require 'regexp_parser/expression/classes/keep'
@@ -142,9 +21,6 @@ require 'regexp_parser/expression/classes/literal'
142
21
  require 'regexp_parser/expression/classes/posix_class'
143
22
  require 'regexp_parser/expression/classes/property'
144
23
  require 'regexp_parser/expression/classes/root'
145
- require 'regexp_parser/expression/classes/set'
146
- require 'regexp_parser/expression/classes/set/intersection'
147
- require 'regexp_parser/expression/classes/set/range'
148
24
  require 'regexp_parser/expression/classes/type'
149
25
 
150
26
  require 'regexp_parser/expression/methods/match'
@@ -4,12 +4,14 @@
4
4
  # given syntax flavor.
5
5
  class Regexp::Lexer
6
6
 
7
- OPENING_TOKENS = [
8
- :capture, :passive, :lookahead, :nlookahead, :lookbehind, :nlookbehind,
9
- :atomic, :options, :options_switch, :named, :absence
7
+ OPENING_TOKENS = %i[
8
+ capture passive lookahead nlookahead lookbehind nlookbehind
9
+ atomic options options_switch named absence
10
10
  ].freeze
11
11
 
12
- CLOSING_TOKENS = [:close].freeze
12
+ CLOSING_TOKENS = %i[close].freeze
13
+
14
+ CONDITION_TOKENS = %i[condition condition_close].freeze
13
15
 
14
16
  def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
15
17
  new.lex(input, syntax, options: options, &block)
@@ -40,7 +42,7 @@ class Regexp::Lexer
40
42
  nesting, set_nesting, conditional_nesting)
41
43
 
42
44
  current = merge_condition(current) if type == :conditional and
43
- [:condition, :condition_close].include?(token)
45
+ CONDITION_TOKENS.include?(token)
44
46
 
45
47
  last.next = current if last
46
48
  current.previous = last if last
@@ -8,6 +8,7 @@ age=10.0: age=10.0
8
8
  age=11.0: age=11.0
9
9
  age=12.0: age=12.0
10
10
  age=12.1: age=12.1
11
+ age=13.0: age=13.0
11
12
  age=2.0: age=2.0
12
13
  age=2.1: age=2.1
13
14
  age=3.0: age=3.0
@@ -64,6 +65,7 @@ changeswhenlowercased: changes_when_lowercased
64
65
  changeswhentitlecased: changes_when_titlecased
65
66
  changeswhenuppercased: changes_when_uppercased
66
67
  cherokee: cherokee
68
+ chorasmian: chorasmian
67
69
  closepunctuation: close_punctuation
68
70
  cntrl: cntrl
69
71
  common: common
@@ -83,6 +85,7 @@ deseret: deseret
83
85
  devanagari: devanagari
84
86
  diacritic: diacritic
85
87
  digit: digit
88
+ divesakuru: dives_akuru
86
89
  dogra: dogra
87
90
  duployan: duployan
88
91
  egyptianhieroglyphs: egyptian_hieroglyphs
@@ -167,6 +170,7 @@ incham: in_cham
167
170
  incherokee: in_cherokee
168
171
  incherokeesupplement: in_cherokee_supplement
169
172
  inchesssymbols: in_chess_symbols
173
+ inchorasmian: in_chorasmian
170
174
  incjkcompatibility: in_cjk_compatibility
171
175
  incjkcompatibilityforms: in_cjk_compatibility_forms
172
176
  incjkcompatibilityideographs: in_cjk_compatibility_ideographs
@@ -181,6 +185,7 @@ incjkunifiedideographsextensionc: in_cjk_unified_ideographs_extension_c
181
185
  incjkunifiedideographsextensiond: in_cjk_unified_ideographs_extension_d
182
186
  incjkunifiedideographsextensione: in_cjk_unified_ideographs_extension_e
183
187
  incjkunifiedideographsextensionf: in_cjk_unified_ideographs_extension_f
188
+ incjkunifiedideographsextensiong: in_cjk_unified_ideographs_extension_g
184
189
  incombiningdiacriticalmarks: in_combining_diacritical_marks
185
190
  incombiningdiacriticalmarksextended: in_combining_diacritical_marks_extended
186
191
  incombiningdiacriticalmarksforsymbols: in_combining_diacritical_marks_for_symbols
@@ -204,6 +209,7 @@ indeseret: in_deseret
204
209
  indevanagari: in_devanagari
205
210
  indevanagariextended: in_devanagari_extended
206
211
  indingbats: in_dingbats
212
+ indivesakuru: in_dives_akuru
207
213
  indogra: in_dogra
208
214
  indominotiles: in_domino_tiles
209
215
  induployan: in_duployan
@@ -269,6 +275,7 @@ inkatakana: in_katakana
269
275
  inkatakanaphoneticextensions: in_katakana_phonetic_extensions
270
276
  inkayahli: in_kayah_li
271
277
  inkharoshthi: in_kharoshthi
278
+ inkhitansmallscript: in_khitan_small_script
272
279
  inkhmer: in_khmer
273
280
  inkhmersymbols: in_khmer_symbols
274
281
  inkhojki: in_khojki
@@ -288,6 +295,7 @@ inlineara: in_linear_a
288
295
  inlinearbideograms: in_linear_b_ideograms
289
296
  inlinearbsyllabary: in_linear_b_syllabary
290
297
  inlisu: in_lisu
298
+ inlisusupplement: in_lisu_supplement
291
299
  inlowsurrogates: in_low_surrogates
292
300
  inlycian: in_lycian
293
301
  inlydian: in_lydian
@@ -395,6 +403,7 @@ insupplementaryprivateuseareab: in_supplementary_private_use_area_b
395
403
  insuttonsignwriting: in_sutton_signwriting
396
404
  insylotinagri: in_syloti_nagri
397
405
  insymbolsandpictographsextendeda: in_symbols_and_pictographs_extended_a
406
+ insymbolsforlegacycomputing: in_symbols_for_legacy_computing
398
407
  insyriac: in_syriac
399
408
  insyriacsupplement: in_syriac_supplement
400
409
  intagalog: in_tagalog
@@ -409,6 +418,7 @@ intamil: in_tamil
409
418
  intamilsupplement: in_tamil_supplement
410
419
  intangut: in_tangut
411
420
  intangutcomponents: in_tangut_components
421
+ intangutsupplement: in_tangut_supplement
412
422
  intelugu: in_telugu
413
423
  inthaana: in_thaana
414
424
  inthai: in_thai
@@ -426,6 +436,7 @@ invedicextensions: in_vedic_extensions
426
436
  inverticalforms: in_vertical_forms
427
437
  inwancho: in_wancho
428
438
  inwarangciti: in_warang_citi
439
+ inyezidi: in_yezidi
429
440
  inyijinghexagramsymbols: in_yijing_hexagram_symbols
430
441
  inyiradicals: in_yi_radicals
431
442
  inyisyllables: in_yi_syllables
@@ -437,6 +448,7 @@ kannada: kannada
437
448
  katakana: katakana
438
449
  kayahli: kayah_li
439
450
  kharoshthi: kharoshthi
451
+ khitansmallscript: khitan_small_script
440
452
  khmer: khmer
441
453
  khojki: khojki
442
454
  khudawadi: khudawadi
@@ -590,5 +602,6 @@ xdigit: xdigit
590
602
  xidcontinue: xid_continue
591
603
  xidstart: xid_start
592
604
  xposixpunct: xposixpunct
605
+ yezidi: yezidi
593
606
  yi: yi
594
607
  zanabazarsquare: zanabazar_square