regexp_parser 2.1.1 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +19 -1
  3. data/LICENSE +1 -1
  4. data/README.md +17 -23
  5. data/Rakefile +10 -19
  6. data/lib/regexp_parser/expression/base.rb +123 -0
  7. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  8. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
  9. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  12. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  13. data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
  14. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  15. data/lib/regexp_parser/expression/classes/property.rb +0 -2
  16. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  17. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  18. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  20. data/lib/regexp_parser/expression/sequence.rb +0 -1
  21. data/lib/regexp_parser/expression/subexpression.rb +0 -1
  22. data/lib/regexp_parser/expression.rb +6 -130
  23. data/lib/regexp_parser/lexer.rb +7 -5
  24. data/lib/regexp_parser/scanner/properties/long.csv +604 -0
  25. data/lib/regexp_parser/scanner/properties/short.csv +242 -0
  26. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  27. data/lib/regexp_parser/scanner.rb +126 -124
  28. data/lib/regexp_parser/syntax/any.rb +1 -3
  29. data/lib/regexp_parser/syntax/base.rb +12 -14
  30. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  31. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  32. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  33. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  34. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  35. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  36. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  37. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  38. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  39. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  40. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  41. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  42. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  43. data/lib/regexp_parser/syntax/token.rb +45 -0
  44. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
  45. data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
  46. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  47. data/lib/regexp_parser/syntax.rb +1 -1
  48. data/lib/regexp_parser/token.rb +9 -20
  49. data/lib/regexp_parser/version.rb +1 -1
  50. data/lib/regexp_parser.rb +0 -2
  51. data/regexp_parser.gemspec +20 -22
  52. metadata +32 -164
  53. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  54. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  55. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  56. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  57. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  58. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  59. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  60. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  61. data/spec/expression/base_spec.rb +0 -104
  62. data/spec/expression/clone_spec.rb +0 -152
  63. data/spec/expression/conditional_spec.rb +0 -89
  64. data/spec/expression/free_space_spec.rb +0 -27
  65. data/spec/expression/methods/match_length_spec.rb +0 -161
  66. data/spec/expression/methods/match_spec.rb +0 -25
  67. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  68. data/spec/expression/methods/tests_spec.rb +0 -99
  69. data/spec/expression/methods/traverse_spec.rb +0 -161
  70. data/spec/expression/options_spec.rb +0 -128
  71. data/spec/expression/subexpression_spec.rb +0 -50
  72. data/spec/expression/to_h_spec.rb +0 -26
  73. data/spec/expression/to_s_spec.rb +0 -108
  74. data/spec/lexer/all_spec.rb +0 -22
  75. data/spec/lexer/conditionals_spec.rb +0 -53
  76. data/spec/lexer/delimiters_spec.rb +0 -68
  77. data/spec/lexer/escapes_spec.rb +0 -14
  78. data/spec/lexer/keep_spec.rb +0 -10
  79. data/spec/lexer/literals_spec.rb +0 -64
  80. data/spec/lexer/nesting_spec.rb +0 -99
  81. data/spec/lexer/refcalls_spec.rb +0 -60
  82. data/spec/parser/all_spec.rb +0 -43
  83. data/spec/parser/alternation_spec.rb +0 -88
  84. data/spec/parser/anchors_spec.rb +0 -17
  85. data/spec/parser/conditionals_spec.rb +0 -179
  86. data/spec/parser/errors_spec.rb +0 -30
  87. data/spec/parser/escapes_spec.rb +0 -121
  88. data/spec/parser/free_space_spec.rb +0 -130
  89. data/spec/parser/groups_spec.rb +0 -108
  90. data/spec/parser/keep_spec.rb +0 -6
  91. data/spec/parser/options_spec.rb +0 -28
  92. data/spec/parser/posix_classes_spec.rb +0 -8
  93. data/spec/parser/properties_spec.rb +0 -115
  94. data/spec/parser/quantifiers_spec.rb +0 -68
  95. data/spec/parser/refcalls_spec.rb +0 -117
  96. data/spec/parser/set/intersections_spec.rb +0 -127
  97. data/spec/parser/set/ranges_spec.rb +0 -111
  98. data/spec/parser/sets_spec.rb +0 -178
  99. data/spec/parser/types_spec.rb +0 -18
  100. data/spec/scanner/all_spec.rb +0 -18
  101. data/spec/scanner/anchors_spec.rb +0 -21
  102. data/spec/scanner/conditionals_spec.rb +0 -128
  103. data/spec/scanner/delimiters_spec.rb +0 -52
  104. data/spec/scanner/errors_spec.rb +0 -67
  105. data/spec/scanner/escapes_spec.rb +0 -64
  106. data/spec/scanner/free_space_spec.rb +0 -165
  107. data/spec/scanner/groups_spec.rb +0 -61
  108. data/spec/scanner/keep_spec.rb +0 -10
  109. data/spec/scanner/literals_spec.rb +0 -39
  110. data/spec/scanner/meta_spec.rb +0 -18
  111. data/spec/scanner/options_spec.rb +0 -36
  112. data/spec/scanner/properties_spec.rb +0 -64
  113. data/spec/scanner/quantifiers_spec.rb +0 -25
  114. data/spec/scanner/refcalls_spec.rb +0 -55
  115. data/spec/scanner/sets_spec.rb +0 -151
  116. data/spec/scanner/types_spec.rb +0 -14
  117. data/spec/spec_helper.rb +0 -16
  118. data/spec/support/runner.rb +0 -42
  119. data/spec/support/shared_examples.rb +0 -77
  120. data/spec/support/warning_extractor.rb +0 -60
  121. data/spec/syntax/syntax_spec.rb +0 -48
  122. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  123. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  124. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  125. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  126. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  127. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  128. data/spec/syntax/versions/aliases_spec.rb +0 -37
  129. data/spec/token/token_spec.rb +0 -85
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 077b8a0c90d90cf46e44671ec1335a5373eef72c61a0bcf4de43ba5217a188c3
4
- data.tar.gz: b9aed868af73adcdf40c09720c5d10091b25a53b25a792717ceb5591039a2931
3
+ metadata.gz: 381a794200168f95ff6329cc8a01330d21a05e02b75e0b06dcc6bd8f763c111d
4
+ data.tar.gz: bd7617cb3763e6d759c8e1364aed037ae2fff85af3cf28823476cadd14ff080e
5
5
  SHA512:
6
- metadata.gz: 9c04d9a6434c6e3f322e97e8e2a1c86b3ddda88bd8821368a37b92f5836e4c3df1dc27a79165303420c3e8d5eea31bda1483824da01a40ce30961b645ba65ddd
7
- data.tar.gz: 01e5c261e9dca0c4df7c696128dbc0520ca40aa6b9393cc8d6c3bdb8386470aeb773566000b811f98c1407038216c8d2c0b444c7955ea5a881ac759796f8a440
6
+ metadata.gz: 0a039012013e9b57329fd685aaf29386d8b848071e514f59df0acc3437a1dae5c76b6bf94158cc3deece08f3a1fec9437ac84590d97f8590d8dcee1e0dc6c726
7
+ data.tar.gz: 4d67da41fbef9b9336ccfd02e3a742286bf4ef96d469c8aa2bbb9a6a55ed4aa6027a28b10ba6c9993b15937e3fe51a349632bcf5808f6237cf77a1d29ceb74f2
data/CHANGELOG.md CHANGED
@@ -1,4 +1,22 @@
1
- ## [Unreleased]
1
+ ## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
2
+
3
+ ### Fixed
4
+
5
+ - fixed Syntax version of absence groups (`(?~...)`)
6
+ - the lexer accepted them for any Ruby version
7
+ - now they are only recognized for Ruby >= 2.4.1 in which they were introduced
8
+ - reduced gem size by excluding specs from package
9
+ - removed deprecated `test_files` gemspec setting
10
+ - no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
11
+ - no longer depend on `set`
12
+ - `set` was removed from the stdlib and made a standalone gem as of Ruby 3
13
+ - this made it a hidden/undeclared dependency of `regexp_parser`
14
+
15
+ ## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
16
+
17
+ ### Added
18
+
19
+ - added support for 13 new unicode properties introduced in Ruby 3.1.0
2
20
 
3
21
  ## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
4
22
 
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2015, Ammar Ali
1
+ Copyright (c) 2010, 2012-2022, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
1
  # Regexp::Parser
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser) [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
3
+ [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
4
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions)
5
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions)
6
+ [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
4
7
 
5
8
  A Ruby gem for tokenizing, parsing, and transforming regular expressions.
6
9
 
@@ -357,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
357
360
  | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | &#x2713; |
358
361
  | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | &#x2713; |
359
362
  | **String Escapes** | | &#x22f1; |
360
- | &emsp;&nbsp;_**Control**_ | `\C-C`, `\cD` | &#x2713; |
363
+ | &emsp;&nbsp;_**Control** \[1\]_ | `\C-C`, `\cD` | &#x2713; |
361
364
  | &emsp;&nbsp;_**Hex**_ | `\x20`, `\x{701230}` | &#x2713; |
362
- | &emsp;&nbsp;_**Meta**_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
365
+ | &emsp;&nbsp;_**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
363
366
  | &emsp;&nbsp;_**Octal**_ | `\0`, `\01`, `\012` | &#x2713; |
364
367
  | &emsp;&nbsp;_**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | &#x2713; |
365
- | **Unicode Properties** | _<sub>([Unicode 11.0.0](http://www.unicode.org/versions/Unicode11.0.0/))</sub>_ | &#x22f1; |
368
+ | **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
366
369
  | &emsp;&nbsp;_**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | &#x2713; |
367
370
  | &emsp;&nbsp;_**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | &#x2713; |
368
371
  | &emsp;&nbsp;_**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | &#x2713; |
@@ -371,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
371
374
  | &emsp;&nbsp;_**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | &#x2713; |
372
375
  | &emsp;&nbsp;_**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | &#x2713; |
373
376
 
377
+ **\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
378
+ https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
379
+ scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
380
+
374
381
  ##### Inapplicable Features
375
382
 
376
383
  Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
@@ -384,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
384
391
  - **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
385
392
  - **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
386
393
 
387
-
388
394
  See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
389
395
 
390
396
  _**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
@@ -392,26 +398,14 @@ or incorrectly return tokens/objects as literals._
392
398
 
393
399
 
394
400
  ## Testing
395
- To run the tests simply run rake from the root directory, as 'test' is the default task.
396
-
397
- It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
398
-
399
- The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
400
-
401
- ```
402
- bin/test
403
- ```
404
-
405
- You can run a specific test like so:
401
+ To run the tests simply run rake from the root directory.
406
402
 
407
- ```
408
- bin/test spec/scanner/properties_spec.rb
409
- ```
403
+ The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
410
404
 
411
- Note that changes to Ragel files will not be reflected when running `rspec` or `bin/test`, so you might want to run:
405
+ Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
412
406
 
413
407
  ```
414
- rake ragel:rb && bin/test spec/scanner/properties_spec.rb
408
+ rake ragel:rb && rspec spec/scanner/properties_spec.rb
415
409
  ```
416
410
 
417
411
  ## Building
@@ -443,7 +437,7 @@ Projects using regexp_parser.
443
437
 
444
438
  - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
445
439
 
446
- - [mutant](https://github.com/mbj/mutant) (before v0.9.0) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
440
+ - [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
447
441
 
448
442
  - [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
449
443
 
@@ -476,4 +470,4 @@ Documentation and books used while working on this project.
476
470
 
477
471
  ---
478
472
  ##### Copyright
479
- _Copyright (c) 2010-2020 Ammar Ali. See LICENSE file for details._
473
+ _Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._
data/Rakefile CHANGED
@@ -1,26 +1,22 @@
1
+ require 'bundler'
1
2
  require 'rubygems'
2
-
3
+ require 'rubygems/package_task'
3
4
  require 'rake'
4
5
  require 'rake/testtask'
5
-
6
- require 'bundler'
7
- require 'rubygems/package_task'
8
-
6
+ require 'rspec/core/rake_task'
9
7
 
10
8
  RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
11
9
  RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
12
- RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
13
-
10
+ RAGEL_SOURCE_FILES = %w[scanner] # scanner.rl imports the other files
14
11
 
15
12
  Bundler::GemHelper.install_tasks
16
13
 
14
+ RSpec::Core::RakeTask.new(:spec)
17
15
 
18
16
  task :default => [:'test:full']
19
17
 
20
18
  namespace :test do
21
- task full: :'ragel:rb' do
22
- sh 'bin/test'
23
- end
19
+ task full: [:'ragel:rb', :spec]
24
20
  end
25
21
 
26
22
  namespace :ragel do
@@ -49,13 +45,11 @@ namespace :ragel do
49
45
  end
50
46
  end
51
47
 
52
-
53
48
  # Add ragel task as a prerequisite for building the gem to ensure that the
54
49
  # latest scanner code is generated and included in the build.
55
50
  desc "Runs ragel:rb before building the gem"
56
51
  task :build => ['ragel:rb']
57
52
 
58
-
59
53
  namespace :props do
60
54
  desc 'Write new property value hashes for the properties scanner'
61
55
  task :update do
@@ -63,13 +57,10 @@ namespace :props do
63
57
  RegexpPropertyValues.update
64
58
  dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
65
59
 
66
- require 'psych'
67
60
  write_hash_to_file = ->(hash, path) do
68
61
  File.open(path, 'w') do |f|
69
- f.puts '#',
70
- "# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
71
- '#',
72
- hash.sort.to_h.to_yaml
62
+ f.puts "# THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT",
63
+ *hash.sort.map { |pair| pair.join(',') }
73
64
  end
74
65
  puts "Wrote #{hash.count} aliases to `#{path}`"
75
66
  end
@@ -77,11 +68,11 @@ namespace :props do
77
68
  long_names_to_tokens = RegexpPropertyValues.all.map do |val|
78
69
  [val.identifier, val.full_name.downcase]
79
70
  end
80
- write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
71
+ write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.csv")
81
72
 
82
73
  short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
83
74
  [k.identifier, v.full_name.downcase]
84
75
  end
85
- write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
76
+ write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.csv")
86
77
  end
87
78
  end
@@ -0,0 +1,123 @@
1
+ module Regexp::Expression
2
+ class Base
3
+ attr_accessor :type, :token
4
+ attr_accessor :text, :ts
5
+ attr_accessor :level, :set_level, :conditional_level, :nesting_level
6
+
7
+ attr_accessor :quantifier
8
+ attr_accessor :options
9
+
10
+ def initialize(token, options = {})
11
+ self.type = token.type
12
+ self.token = token.token
13
+ self.text = token.text
14
+ self.ts = token.ts
15
+ self.level = token.level
16
+ self.set_level = token.set_level
17
+ self.conditional_level = token.conditional_level
18
+ self.nesting_level = 0
19
+ self.quantifier = nil
20
+ self.options = options
21
+ end
22
+
23
+ def initialize_copy(orig)
24
+ self.text = (orig.text ? orig.text.dup : nil)
25
+ self.options = (orig.options ? orig.options.dup : nil)
26
+ self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
27
+ super
28
+ end
29
+
30
+ def to_re(format = :full)
31
+ ::Regexp.new(to_s(format))
32
+ end
33
+
34
+ alias :starts_at :ts
35
+
36
+ def base_length
37
+ to_s(:base).length
38
+ end
39
+
40
+ def full_length
41
+ to_s.length
42
+ end
43
+
44
+ def offset
45
+ [starts_at, full_length]
46
+ end
47
+
48
+ def coded_offset
49
+ '@%d+%d' % offset
50
+ end
51
+
52
+ def to_s(format = :full)
53
+ "#{text}#{quantifier_affix(format)}"
54
+ end
55
+
56
+ def quantifier_affix(expression_format)
57
+ quantifier.to_s if quantified? && expression_format != :base
58
+ end
59
+
60
+ def terminal?
61
+ !respond_to?(:expressions)
62
+ end
63
+
64
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
65
+ self.quantifier = Quantifier.new(token, text, min, max, mode)
66
+ end
67
+
68
+ def unquantified_clone
69
+ clone.tap { |exp| exp.quantifier = nil }
70
+ end
71
+
72
+ def quantified?
73
+ !quantifier.nil?
74
+ end
75
+
76
+ # Deprecated. Prefer `#repetitions` which has a more uniform interface.
77
+ def quantity
78
+ return [nil,nil] unless quantified?
79
+ [quantifier.min, quantifier.max]
80
+ end
81
+
82
+ def repetitions
83
+ return 1..1 unless quantified?
84
+ min = quantifier.min
85
+ max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
86
+ range = min..max
87
+ # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
88
+ if RUBY_VERSION.to_f < 2.7
89
+ range.define_singleton_method(:minmax) { [min, max] }
90
+ end
91
+ range
92
+ end
93
+
94
+ def greedy?
95
+ quantified? and quantifier.greedy?
96
+ end
97
+
98
+ def reluctant?
99
+ quantified? and quantifier.reluctant?
100
+ end
101
+ alias :lazy? :reluctant?
102
+
103
+ def possessive?
104
+ quantified? and quantifier.possessive?
105
+ end
106
+
107
+ def attributes
108
+ {
109
+ type: type,
110
+ token: token,
111
+ text: to_s(:base),
112
+ starts_at: ts,
113
+ length: full_length,
114
+ level: level,
115
+ set_level: set_level,
116
+ conditional_level: conditional_level,
117
+ options: options,
118
+ quantifier: quantified? ? quantifier.to_h : nil,
119
+ }
120
+ end
121
+ alias :to_h :attributes
122
+ end
123
+ end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module Anchor
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -22,5 +21,4 @@ module Regexp::Expression
22
21
  EOS = EndOfString
23
22
  EOSobEOL = EndOfStringOrBeforeEndOfLine
24
23
  end
25
-
26
24
  end
@@ -1,16 +1,22 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
2
3
  module EscapeSequence
3
4
  class Base < Regexp::Expression::Base
4
- require 'yaml'
5
-
6
- def char
7
- # poor man's unescape without using eval
8
- YAML.load(%Q(---\n"#{text}"\n))
9
- end
10
-
11
5
  def codepoint
12
6
  char.ord
13
7
  end
8
+
9
+ if ''.respond_to?(:undump)
10
+ def char
11
+ %("#{text}").undump
12
+ end
13
+ else
14
+ # poor man's unescape without using eval
15
+ require 'yaml'
16
+ def char
17
+ YAML.load(%Q(---\n"#{text}"\n))
18
+ end
19
+ end
14
20
  end
15
21
 
16
22
  class Literal < EscapeSequence::Base
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class FreeSpace < Regexp::Expression::Base
4
3
  def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
5
4
  raise Regexp::Parser::Error, 'Can not quantify a free space object'
@@ -13,5 +12,4 @@ module Regexp::Expression
13
12
  text << exp.text
14
13
  end
15
14
  end
16
-
17
15
  end
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -116,5 +115,4 @@ module Regexp::Expression
116
115
  class Script < UnicodeProperty::Base; end
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
119
-
120
118
  end # module Regexp::Expression
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
3
  def self.build(options = {})
5
4
  new(build_token, options)
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module CharacterType
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -15,5 +14,4 @@ module Regexp::Expression
15
14
  class Linebreak < CharacterType::Base; end
16
15
  class ExtendedGrapheme < CharacterType::Base; end
17
16
  end
18
-
19
17
  end
@@ -43,7 +43,7 @@ module Regexp::Expression
43
43
 
44
44
  # Order is important! Fields that use other fields in their
45
45
  # definition must appear before the fields they use.
46
- part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
47
47
  part.keys.each {|k| part[k] = "<?#{k}?>"}
48
48
 
49
49
  part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  class Quantifier
3
- MODES = [:greedy, :possessive, :reluctant]
3
+ MODES = %i[greedy possessive reluctant]
4
4
 
5
5
  attr_reader :token, :text, :min, :max, :mode
6
6
 
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  # A sequence of expressions. Differs from a Subexpressions by how it handles
4
3
  # quantifiers, as it applies them to its last element instead of itself as
5
4
  # a whole subexpression.
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Subexpression < Regexp::Expression::Base
4
3
  include Enumerable
5
4
 
@@ -1,130 +1,6 @@
1
1
  require 'regexp_parser/error'
2
2
 
3
- module Regexp::Expression
4
- class Base
5
- attr_accessor :type, :token
6
- attr_accessor :text, :ts
7
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
8
-
9
- attr_accessor :quantifier
10
- attr_accessor :options
11
-
12
- def initialize(token, options = {})
13
- self.type = token.type
14
- self.token = token.token
15
- self.text = token.text
16
- self.ts = token.ts
17
- self.level = token.level
18
- self.set_level = token.set_level
19
- self.conditional_level = token.conditional_level
20
- self.nesting_level = 0
21
- self.quantifier = nil
22
- self.options = options
23
- end
24
-
25
- def initialize_copy(orig)
26
- self.text = (orig.text ? orig.text.dup : nil)
27
- self.options = (orig.options ? orig.options.dup : nil)
28
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
29
- super
30
- end
31
-
32
- def to_re(format = :full)
33
- ::Regexp.new(to_s(format))
34
- end
35
-
36
- alias :starts_at :ts
37
-
38
- def base_length
39
- to_s(:base).length
40
- end
41
-
42
- def full_length
43
- to_s.length
44
- end
45
-
46
- def offset
47
- [starts_at, full_length]
48
- end
49
-
50
- def coded_offset
51
- '@%d+%d' % offset
52
- end
53
-
54
- def to_s(format = :full)
55
- "#{text}#{quantifier_affix(format)}"
56
- end
57
-
58
- def quantifier_affix(expression_format)
59
- quantifier.to_s if quantified? && expression_format != :base
60
- end
61
-
62
- def terminal?
63
- !respond_to?(:expressions)
64
- end
65
-
66
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
67
- self.quantifier = Quantifier.new(token, text, min, max, mode)
68
- end
69
-
70
- def unquantified_clone
71
- clone.tap { |exp| exp.quantifier = nil }
72
- end
73
-
74
- def quantified?
75
- !quantifier.nil?
76
- end
77
-
78
- # Deprecated. Prefer `#repetitions` which has a more uniform interface.
79
- def quantity
80
- return [nil,nil] unless quantified?
81
- [quantifier.min, quantifier.max]
82
- end
83
-
84
- def repetitions
85
- return 1..1 unless quantified?
86
- min = quantifier.min
87
- max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
88
- range = min..max
89
- # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
90
- if RUBY_VERSION.to_f < 2.7
91
- range.define_singleton_method(:minmax) { [min, max] }
92
- end
93
- range
94
- end
95
-
96
- def greedy?
97
- quantified? and quantifier.greedy?
98
- end
99
-
100
- def reluctant?
101
- quantified? and quantifier.reluctant?
102
- end
103
- alias :lazy? :reluctant?
104
-
105
- def possessive?
106
- quantified? and quantifier.possessive?
107
- end
108
-
109
- def attributes
110
- {
111
- type: type,
112
- token: token,
113
- text: to_s(:base),
114
- starts_at: ts,
115
- length: full_length,
116
- level: level,
117
- set_level: set_level,
118
- conditional_level: conditional_level,
119
- options: options,
120
- quantifier: quantified? ? quantifier.to_h : nil,
121
- }
122
- end
123
- alias :to_h :attributes
124
- end
125
-
126
- end # module Regexp::Expression
127
-
3
+ require 'regexp_parser/expression/base'
128
4
  require 'regexp_parser/expression/quantifier'
129
5
  require 'regexp_parser/expression/subexpression'
130
6
  require 'regexp_parser/expression/sequence'
@@ -132,9 +8,12 @@ require 'regexp_parser/expression/sequence_operation'
132
8
 
133
9
  require 'regexp_parser/expression/classes/alternation'
134
10
  require 'regexp_parser/expression/classes/anchor'
135
- require 'regexp_parser/expression/classes/backref'
11
+ require 'regexp_parser/expression/classes/backreference'
12
+ require 'regexp_parser/expression/classes/character_set'
13
+ require 'regexp_parser/expression/classes/character_set/intersection'
14
+ require 'regexp_parser/expression/classes/character_set/range'
136
15
  require 'regexp_parser/expression/classes/conditional'
137
- require 'regexp_parser/expression/classes/escape'
16
+ require 'regexp_parser/expression/classes/escape_sequence'
138
17
  require 'regexp_parser/expression/classes/free_space'
139
18
  require 'regexp_parser/expression/classes/group'
140
19
  require 'regexp_parser/expression/classes/keep'
@@ -142,9 +21,6 @@ require 'regexp_parser/expression/classes/literal'
142
21
  require 'regexp_parser/expression/classes/posix_class'
143
22
  require 'regexp_parser/expression/classes/property'
144
23
  require 'regexp_parser/expression/classes/root'
145
- require 'regexp_parser/expression/classes/set'
146
- require 'regexp_parser/expression/classes/set/intersection'
147
- require 'regexp_parser/expression/classes/set/range'
148
24
  require 'regexp_parser/expression/classes/type'
149
25
 
150
26
  require 'regexp_parser/expression/methods/match'
@@ -4,12 +4,14 @@
4
4
  # given syntax flavor.
5
5
  class Regexp::Lexer
6
6
 
7
- OPENING_TOKENS = [
8
- :capture, :passive, :lookahead, :nlookahead, :lookbehind, :nlookbehind,
9
- :atomic, :options, :options_switch, :named, :absence
7
+ OPENING_TOKENS = %i[
8
+ capture passive lookahead nlookahead lookbehind nlookbehind
9
+ atomic options options_switch named absence
10
10
  ].freeze
11
11
 
12
- CLOSING_TOKENS = [:close].freeze
12
+ CLOSING_TOKENS = %i[close].freeze
13
+
14
+ CONDITION_TOKENS = %i[condition condition_close].freeze
13
15
 
14
16
  def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
15
17
  new.lex(input, syntax, options: options, &block)
@@ -40,7 +42,7 @@ class Regexp::Lexer
40
42
  nesting, set_nesting, conditional_nesting)
41
43
 
42
44
  current = merge_condition(current) if type == :conditional and
43
- [:condition, :condition_close].include?(token)
45
+ CONDITION_TOKENS.include?(token)
44
46
 
45
47
  last.next = current if last
46
48
  current.previous = last if last