regexp_parser 2.1.1 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +19 -1
  3. data/LICENSE +1 -1
  4. data/README.md +17 -23
  5. data/Rakefile +10 -19
  6. data/lib/regexp_parser/expression/base.rb +123 -0
  7. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  8. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
  9. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  12. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  13. data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
  14. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  15. data/lib/regexp_parser/expression/classes/property.rb +0 -2
  16. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  17. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  18. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  20. data/lib/regexp_parser/expression/sequence.rb +0 -1
  21. data/lib/regexp_parser/expression/subexpression.rb +0 -1
  22. data/lib/regexp_parser/expression.rb +6 -130
  23. data/lib/regexp_parser/lexer.rb +7 -5
  24. data/lib/regexp_parser/scanner/properties/long.csv +604 -0
  25. data/lib/regexp_parser/scanner/properties/short.csv +242 -0
  26. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  27. data/lib/regexp_parser/scanner.rb +126 -124
  28. data/lib/regexp_parser/syntax/any.rb +1 -3
  29. data/lib/regexp_parser/syntax/base.rb +12 -14
  30. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  31. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  32. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  33. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  34. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  35. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  36. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  37. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  38. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  39. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  40. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  41. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  42. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  43. data/lib/regexp_parser/syntax/token.rb +45 -0
  44. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
  45. data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
  46. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  47. data/lib/regexp_parser/syntax.rb +1 -1
  48. data/lib/regexp_parser/token.rb +9 -20
  49. data/lib/regexp_parser/version.rb +1 -1
  50. data/lib/regexp_parser.rb +0 -2
  51. data/regexp_parser.gemspec +20 -22
  52. metadata +32 -164
  53. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  54. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  55. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  56. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  57. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  58. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  59. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  60. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  61. data/spec/expression/base_spec.rb +0 -104
  62. data/spec/expression/clone_spec.rb +0 -152
  63. data/spec/expression/conditional_spec.rb +0 -89
  64. data/spec/expression/free_space_spec.rb +0 -27
  65. data/spec/expression/methods/match_length_spec.rb +0 -161
  66. data/spec/expression/methods/match_spec.rb +0 -25
  67. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  68. data/spec/expression/methods/tests_spec.rb +0 -99
  69. data/spec/expression/methods/traverse_spec.rb +0 -161
  70. data/spec/expression/options_spec.rb +0 -128
  71. data/spec/expression/subexpression_spec.rb +0 -50
  72. data/spec/expression/to_h_spec.rb +0 -26
  73. data/spec/expression/to_s_spec.rb +0 -108
  74. data/spec/lexer/all_spec.rb +0 -22
  75. data/spec/lexer/conditionals_spec.rb +0 -53
  76. data/spec/lexer/delimiters_spec.rb +0 -68
  77. data/spec/lexer/escapes_spec.rb +0 -14
  78. data/spec/lexer/keep_spec.rb +0 -10
  79. data/spec/lexer/literals_spec.rb +0 -64
  80. data/spec/lexer/nesting_spec.rb +0 -99
  81. data/spec/lexer/refcalls_spec.rb +0 -60
  82. data/spec/parser/all_spec.rb +0 -43
  83. data/spec/parser/alternation_spec.rb +0 -88
  84. data/spec/parser/anchors_spec.rb +0 -17
  85. data/spec/parser/conditionals_spec.rb +0 -179
  86. data/spec/parser/errors_spec.rb +0 -30
  87. data/spec/parser/escapes_spec.rb +0 -121
  88. data/spec/parser/free_space_spec.rb +0 -130
  89. data/spec/parser/groups_spec.rb +0 -108
  90. data/spec/parser/keep_spec.rb +0 -6
  91. data/spec/parser/options_spec.rb +0 -28
  92. data/spec/parser/posix_classes_spec.rb +0 -8
  93. data/spec/parser/properties_spec.rb +0 -115
  94. data/spec/parser/quantifiers_spec.rb +0 -68
  95. data/spec/parser/refcalls_spec.rb +0 -117
  96. data/spec/parser/set/intersections_spec.rb +0 -127
  97. data/spec/parser/set/ranges_spec.rb +0 -111
  98. data/spec/parser/sets_spec.rb +0 -178
  99. data/spec/parser/types_spec.rb +0 -18
  100. data/spec/scanner/all_spec.rb +0 -18
  101. data/spec/scanner/anchors_spec.rb +0 -21
  102. data/spec/scanner/conditionals_spec.rb +0 -128
  103. data/spec/scanner/delimiters_spec.rb +0 -52
  104. data/spec/scanner/errors_spec.rb +0 -67
  105. data/spec/scanner/escapes_spec.rb +0 -64
  106. data/spec/scanner/free_space_spec.rb +0 -165
  107. data/spec/scanner/groups_spec.rb +0 -61
  108. data/spec/scanner/keep_spec.rb +0 -10
  109. data/spec/scanner/literals_spec.rb +0 -39
  110. data/spec/scanner/meta_spec.rb +0 -18
  111. data/spec/scanner/options_spec.rb +0 -36
  112. data/spec/scanner/properties_spec.rb +0 -64
  113. data/spec/scanner/quantifiers_spec.rb +0 -25
  114. data/spec/scanner/refcalls_spec.rb +0 -55
  115. data/spec/scanner/sets_spec.rb +0 -151
  116. data/spec/scanner/types_spec.rb +0 -14
  117. data/spec/spec_helper.rb +0 -16
  118. data/spec/support/runner.rb +0 -42
  119. data/spec/support/shared_examples.rb +0 -77
  120. data/spec/support/warning_extractor.rb +0 -60
  121. data/spec/syntax/syntax_spec.rb +0 -48
  122. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  123. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  124. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  125. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  126. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  127. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  128. data/spec/syntax/versions/aliases_spec.rb +0 -37
  129. data/spec/token/token_spec.rb +0 -85
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 077b8a0c90d90cf46e44671ec1335a5373eef72c61a0bcf4de43ba5217a188c3
4
- data.tar.gz: b9aed868af73adcdf40c09720c5d10091b25a53b25a792717ceb5591039a2931
3
+ metadata.gz: 381a794200168f95ff6329cc8a01330d21a05e02b75e0b06dcc6bd8f763c111d
4
+ data.tar.gz: bd7617cb3763e6d759c8e1364aed037ae2fff85af3cf28823476cadd14ff080e
5
5
  SHA512:
6
- metadata.gz: 9c04d9a6434c6e3f322e97e8e2a1c86b3ddda88bd8821368a37b92f5836e4c3df1dc27a79165303420c3e8d5eea31bda1483824da01a40ce30961b645ba65ddd
7
- data.tar.gz: 01e5c261e9dca0c4df7c696128dbc0520ca40aa6b9393cc8d6c3bdb8386470aeb773566000b811f98c1407038216c8d2c0b444c7955ea5a881ac759796f8a440
6
+ metadata.gz: 0a039012013e9b57329fd685aaf29386d8b848071e514f59df0acc3437a1dae5c76b6bf94158cc3deece08f3a1fec9437ac84590d97f8590d8dcee1e0dc6c726
7
+ data.tar.gz: 4d67da41fbef9b9336ccfd02e3a742286bf4ef96d469c8aa2bbb9a6a55ed4aa6027a28b10ba6c9993b15937e3fe51a349632bcf5808f6237cf77a1d29ceb74f2
data/CHANGELOG.md CHANGED
@@ -1,4 +1,22 @@
1
- ## [Unreleased]
1
+ ## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
2
+
3
+ ### Fixed
4
+
5
+ - fixed Syntax version of absence groups (`(?~...)`)
6
+ - the lexer accepted them for any Ruby version
7
+ - now they are only recognized for Ruby >= 2.4.1 in which they were introduced
8
+ - reduced gem size by excluding specs from package
9
+ - removed deprecated `test_files` gemspec setting
10
+ - no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
11
+ - no longer depend on `set`
12
+ - `set` was removed from the stdlib and made a standalone gem as of Ruby 3
13
+ - this made it a hidden/undeclared dependency of `regexp_parser`
14
+
15
+ ## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
16
+
17
+ ### Added
18
+
19
+ - added support for 13 new unicode properties introduced in Ruby 3.1.0
2
20
 
3
21
  ## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
4
22
 
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2015, Ammar Ali
1
+ Copyright (c) 2010, 2012-2022, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
1
  # Regexp::Parser
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser) [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
3
+ [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
4
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions)
5
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions)
6
+ [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
4
7
 
5
8
  A Ruby gem for tokenizing, parsing, and transforming regular expressions.
6
9
 
@@ -357,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
357
360
  | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | &#x2713; |
358
361
  | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | &#x2713; |
359
362
  | **String Escapes** | | &#x22f1; |
360
- | &emsp;&nbsp;_**Control**_ | `\C-C`, `\cD` | &#x2713; |
363
+ | &emsp;&nbsp;_**Control** \[1\]_ | `\C-C`, `\cD` | &#x2713; |
361
364
  | &emsp;&nbsp;_**Hex**_ | `\x20`, `\x{701230}` | &#x2713; |
362
- | &emsp;&nbsp;_**Meta**_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
365
+ | &emsp;&nbsp;_**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
363
366
  | &emsp;&nbsp;_**Octal**_ | `\0`, `\01`, `\012` | &#x2713; |
364
367
  | &emsp;&nbsp;_**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | &#x2713; |
365
- | **Unicode Properties** | _<sub>([Unicode 11.0.0](http://www.unicode.org/versions/Unicode11.0.0/))</sub>_ | &#x22f1; |
368
+ | **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
366
369
  | &emsp;&nbsp;_**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | &#x2713; |
367
370
  | &emsp;&nbsp;_**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | &#x2713; |
368
371
  | &emsp;&nbsp;_**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | &#x2713; |
@@ -371,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
371
374
  | &emsp;&nbsp;_**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | &#x2713; |
372
375
  | &emsp;&nbsp;_**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | &#x2713; |
373
376
 
377
+ **\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
378
+ https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
379
+ scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
380
+
374
381
  ##### Inapplicable Features
375
382
 
376
383
  Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
@@ -384,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
384
391
  - **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
385
392
  - **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
386
393
 
387
-
388
394
  See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
389
395
 
390
396
  _**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
@@ -392,26 +398,14 @@ or incorrectly return tokens/objects as literals._
392
398
 
393
399
 
394
400
  ## Testing
395
- To run the tests simply run rake from the root directory, as 'test' is the default task.
396
-
397
- It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
398
-
399
- The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
400
-
401
- ```
402
- bin/test
403
- ```
404
-
405
- You can run a specific test like so:
401
+ To run the tests simply run rake from the root directory.
406
402
 
407
- ```
408
- bin/test spec/scanner/properties_spec.rb
409
- ```
403
+ The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
410
404
 
411
- Note that changes to Ragel files will not be reflected when running `rspec` or `bin/test`, so you might want to run:
405
+ Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
412
406
 
413
407
  ```
414
- rake ragel:rb && bin/test spec/scanner/properties_spec.rb
408
+ rake ragel:rb && rspec spec/scanner/properties_spec.rb
415
409
  ```
416
410
 
417
411
  ## Building
@@ -443,7 +437,7 @@ Projects using regexp_parser.
443
437
 
444
438
  - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
445
439
 
446
- - [mutant](https://github.com/mbj/mutant) (before v0.9.0) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
440
+ - [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
447
441
 
448
442
  - [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
449
443
 
@@ -476,4 +470,4 @@ Documentation and books used while working on this project.
476
470
 
477
471
  ---
478
472
  ##### Copyright
479
- _Copyright (c) 2010-2020 Ammar Ali. See LICENSE file for details._
473
+ _Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._
data/Rakefile CHANGED
@@ -1,26 +1,22 @@
1
+ require 'bundler'
1
2
  require 'rubygems'
2
-
3
+ require 'rubygems/package_task'
3
4
  require 'rake'
4
5
  require 'rake/testtask'
5
-
6
- require 'bundler'
7
- require 'rubygems/package_task'
8
-
6
+ require 'rspec/core/rake_task'
9
7
 
10
8
  RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
11
9
  RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
12
- RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
13
-
10
+ RAGEL_SOURCE_FILES = %w[scanner] # scanner.rl imports the other files
14
11
 
15
12
  Bundler::GemHelper.install_tasks
16
13
 
14
+ RSpec::Core::RakeTask.new(:spec)
17
15
 
18
16
  task :default => [:'test:full']
19
17
 
20
18
  namespace :test do
21
- task full: :'ragel:rb' do
22
- sh 'bin/test'
23
- end
19
+ task full: [:'ragel:rb', :spec]
24
20
  end
25
21
 
26
22
  namespace :ragel do
@@ -49,13 +45,11 @@ namespace :ragel do
49
45
  end
50
46
  end
51
47
 
52
-
53
48
  # Add ragel task as a prerequisite for building the gem to ensure that the
54
49
  # latest scanner code is generated and included in the build.
55
50
  desc "Runs ragel:rb before building the gem"
56
51
  task :build => ['ragel:rb']
57
52
 
58
-
59
53
  namespace :props do
60
54
  desc 'Write new property value hashes for the properties scanner'
61
55
  task :update do
@@ -63,13 +57,10 @@ namespace :props do
63
57
  RegexpPropertyValues.update
64
58
  dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
65
59
 
66
- require 'psych'
67
60
  write_hash_to_file = ->(hash, path) do
68
61
  File.open(path, 'w') do |f|
69
- f.puts '#',
70
- "# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
71
- '#',
72
- hash.sort.to_h.to_yaml
62
+ f.puts "# THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT",
63
+ *hash.sort.map { |pair| pair.join(',') }
73
64
  end
74
65
  puts "Wrote #{hash.count} aliases to `#{path}`"
75
66
  end
@@ -77,11 +68,11 @@ namespace :props do
77
68
  long_names_to_tokens = RegexpPropertyValues.all.map do |val|
78
69
  [val.identifier, val.full_name.downcase]
79
70
  end
80
- write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
71
+ write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.csv")
81
72
 
82
73
  short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
83
74
  [k.identifier, v.full_name.downcase]
84
75
  end
85
- write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
76
+ write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.csv")
86
77
  end
87
78
  end
@@ -0,0 +1,123 @@
1
+ module Regexp::Expression
2
+ class Base
3
+ attr_accessor :type, :token
4
+ attr_accessor :text, :ts
5
+ attr_accessor :level, :set_level, :conditional_level, :nesting_level
6
+
7
+ attr_accessor :quantifier
8
+ attr_accessor :options
9
+
10
+ def initialize(token, options = {})
11
+ self.type = token.type
12
+ self.token = token.token
13
+ self.text = token.text
14
+ self.ts = token.ts
15
+ self.level = token.level
16
+ self.set_level = token.set_level
17
+ self.conditional_level = token.conditional_level
18
+ self.nesting_level = 0
19
+ self.quantifier = nil
20
+ self.options = options
21
+ end
22
+
23
+ def initialize_copy(orig)
24
+ self.text = (orig.text ? orig.text.dup : nil)
25
+ self.options = (orig.options ? orig.options.dup : nil)
26
+ self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
27
+ super
28
+ end
29
+
30
+ def to_re(format = :full)
31
+ ::Regexp.new(to_s(format))
32
+ end
33
+
34
+ alias :starts_at :ts
35
+
36
+ def base_length
37
+ to_s(:base).length
38
+ end
39
+
40
+ def full_length
41
+ to_s.length
42
+ end
43
+
44
+ def offset
45
+ [starts_at, full_length]
46
+ end
47
+
48
+ def coded_offset
49
+ '@%d+%d' % offset
50
+ end
51
+
52
+ def to_s(format = :full)
53
+ "#{text}#{quantifier_affix(format)}"
54
+ end
55
+
56
+ def quantifier_affix(expression_format)
57
+ quantifier.to_s if quantified? && expression_format != :base
58
+ end
59
+
60
+ def terminal?
61
+ !respond_to?(:expressions)
62
+ end
63
+
64
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
65
+ self.quantifier = Quantifier.new(token, text, min, max, mode)
66
+ end
67
+
68
+ def unquantified_clone
69
+ clone.tap { |exp| exp.quantifier = nil }
70
+ end
71
+
72
+ def quantified?
73
+ !quantifier.nil?
74
+ end
75
+
76
+ # Deprecated. Prefer `#repetitions` which has a more uniform interface.
77
+ def quantity
78
+ return [nil,nil] unless quantified?
79
+ [quantifier.min, quantifier.max]
80
+ end
81
+
82
+ def repetitions
83
+ return 1..1 unless quantified?
84
+ min = quantifier.min
85
+ max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
86
+ range = min..max
87
+ # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
88
+ if RUBY_VERSION.to_f < 2.7
89
+ range.define_singleton_method(:minmax) { [min, max] }
90
+ end
91
+ range
92
+ end
93
+
94
+ def greedy?
95
+ quantified? and quantifier.greedy?
96
+ end
97
+
98
+ def reluctant?
99
+ quantified? and quantifier.reluctant?
100
+ end
101
+ alias :lazy? :reluctant?
102
+
103
+ def possessive?
104
+ quantified? and quantifier.possessive?
105
+ end
106
+
107
+ def attributes
108
+ {
109
+ type: type,
110
+ token: token,
111
+ text: to_s(:base),
112
+ starts_at: ts,
113
+ length: full_length,
114
+ level: level,
115
+ set_level: set_level,
116
+ conditional_level: conditional_level,
117
+ options: options,
118
+ quantifier: quantified? ? quantifier.to_h : nil,
119
+ }
120
+ end
121
+ alias :to_h :attributes
122
+ end
123
+ end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module Anchor
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -22,5 +21,4 @@ module Regexp::Expression
22
21
  EOS = EndOfString
23
22
  EOSobEOL = EndOfStringOrBeforeEndOfLine
24
23
  end
25
-
26
24
  end
@@ -1,16 +1,22 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
2
3
  module EscapeSequence
3
4
  class Base < Regexp::Expression::Base
4
- require 'yaml'
5
-
6
- def char
7
- # poor man's unescape without using eval
8
- YAML.load(%Q(---\n"#{text}"\n))
9
- end
10
-
11
5
  def codepoint
12
6
  char.ord
13
7
  end
8
+
9
+ if ''.respond_to?(:undump)
10
+ def char
11
+ %("#{text}").undump
12
+ end
13
+ else
14
+ # poor man's unescape without using eval
15
+ require 'yaml'
16
+ def char
17
+ YAML.load(%Q(---\n"#{text}"\n))
18
+ end
19
+ end
14
20
  end
15
21
 
16
22
  class Literal < EscapeSequence::Base
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class FreeSpace < Regexp::Expression::Base
4
3
  def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
5
4
  raise Regexp::Parser::Error, 'Can not quantify a free space object'
@@ -13,5 +12,4 @@ module Regexp::Expression
13
12
  text << exp.text
14
13
  end
15
14
  end
16
-
17
15
  end
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -116,5 +115,4 @@ module Regexp::Expression
116
115
  class Script < UnicodeProperty::Base; end
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
119
-
120
118
  end # module Regexp::Expression
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
3
  def self.build(options = {})
5
4
  new(build_token, options)
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module CharacterType
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -15,5 +14,4 @@ module Regexp::Expression
15
14
  class Linebreak < CharacterType::Base; end
16
15
  class ExtendedGrapheme < CharacterType::Base; end
17
16
  end
18
-
19
17
  end
@@ -43,7 +43,7 @@ module Regexp::Expression
43
43
 
44
44
  # Order is important! Fields that use other fields in their
45
45
  # definition must appear before the fields they use.
46
- part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
47
47
  part.keys.each {|k| part[k] = "<?#{k}?>"}
48
48
 
49
49
  part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  class Quantifier
3
- MODES = [:greedy, :possessive, :reluctant]
3
+ MODES = %i[greedy possessive reluctant]
4
4
 
5
5
  attr_reader :token, :text, :min, :max, :mode
6
6
 
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  # A sequence of expressions. Differs from a Subexpressions by how it handles
4
3
  # quantifiers, as it applies them to its last element instead of itself as
5
4
  # a whole subexpression.
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Subexpression < Regexp::Expression::Base
4
3
  include Enumerable
5
4
 
@@ -1,130 +1,6 @@
1
1
  require 'regexp_parser/error'
2
2
 
3
- module Regexp::Expression
4
- class Base
5
- attr_accessor :type, :token
6
- attr_accessor :text, :ts
7
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
8
-
9
- attr_accessor :quantifier
10
- attr_accessor :options
11
-
12
- def initialize(token, options = {})
13
- self.type = token.type
14
- self.token = token.token
15
- self.text = token.text
16
- self.ts = token.ts
17
- self.level = token.level
18
- self.set_level = token.set_level
19
- self.conditional_level = token.conditional_level
20
- self.nesting_level = 0
21
- self.quantifier = nil
22
- self.options = options
23
- end
24
-
25
- def initialize_copy(orig)
26
- self.text = (orig.text ? orig.text.dup : nil)
27
- self.options = (orig.options ? orig.options.dup : nil)
28
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
29
- super
30
- end
31
-
32
- def to_re(format = :full)
33
- ::Regexp.new(to_s(format))
34
- end
35
-
36
- alias :starts_at :ts
37
-
38
- def base_length
39
- to_s(:base).length
40
- end
41
-
42
- def full_length
43
- to_s.length
44
- end
45
-
46
- def offset
47
- [starts_at, full_length]
48
- end
49
-
50
- def coded_offset
51
- '@%d+%d' % offset
52
- end
53
-
54
- def to_s(format = :full)
55
- "#{text}#{quantifier_affix(format)}"
56
- end
57
-
58
- def quantifier_affix(expression_format)
59
- quantifier.to_s if quantified? && expression_format != :base
60
- end
61
-
62
- def terminal?
63
- !respond_to?(:expressions)
64
- end
65
-
66
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
67
- self.quantifier = Quantifier.new(token, text, min, max, mode)
68
- end
69
-
70
- def unquantified_clone
71
- clone.tap { |exp| exp.quantifier = nil }
72
- end
73
-
74
- def quantified?
75
- !quantifier.nil?
76
- end
77
-
78
- # Deprecated. Prefer `#repetitions` which has a more uniform interface.
79
- def quantity
80
- return [nil,nil] unless quantified?
81
- [quantifier.min, quantifier.max]
82
- end
83
-
84
- def repetitions
85
- return 1..1 unless quantified?
86
- min = quantifier.min
87
- max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
88
- range = min..max
89
- # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
90
- if RUBY_VERSION.to_f < 2.7
91
- range.define_singleton_method(:minmax) { [min, max] }
92
- end
93
- range
94
- end
95
-
96
- def greedy?
97
- quantified? and quantifier.greedy?
98
- end
99
-
100
- def reluctant?
101
- quantified? and quantifier.reluctant?
102
- end
103
- alias :lazy? :reluctant?
104
-
105
- def possessive?
106
- quantified? and quantifier.possessive?
107
- end
108
-
109
- def attributes
110
- {
111
- type: type,
112
- token: token,
113
- text: to_s(:base),
114
- starts_at: ts,
115
- length: full_length,
116
- level: level,
117
- set_level: set_level,
118
- conditional_level: conditional_level,
119
- options: options,
120
- quantifier: quantified? ? quantifier.to_h : nil,
121
- }
122
- end
123
- alias :to_h :attributes
124
- end
125
-
126
- end # module Regexp::Expression
127
-
3
+ require 'regexp_parser/expression/base'
128
4
  require 'regexp_parser/expression/quantifier'
129
5
  require 'regexp_parser/expression/subexpression'
130
6
  require 'regexp_parser/expression/sequence'
@@ -132,9 +8,12 @@ require 'regexp_parser/expression/sequence_operation'
132
8
 
133
9
  require 'regexp_parser/expression/classes/alternation'
134
10
  require 'regexp_parser/expression/classes/anchor'
135
- require 'regexp_parser/expression/classes/backref'
11
+ require 'regexp_parser/expression/classes/backreference'
12
+ require 'regexp_parser/expression/classes/character_set'
13
+ require 'regexp_parser/expression/classes/character_set/intersection'
14
+ require 'regexp_parser/expression/classes/character_set/range'
136
15
  require 'regexp_parser/expression/classes/conditional'
137
- require 'regexp_parser/expression/classes/escape'
16
+ require 'regexp_parser/expression/classes/escape_sequence'
138
17
  require 'regexp_parser/expression/classes/free_space'
139
18
  require 'regexp_parser/expression/classes/group'
140
19
  require 'regexp_parser/expression/classes/keep'
@@ -142,9 +21,6 @@ require 'regexp_parser/expression/classes/literal'
142
21
  require 'regexp_parser/expression/classes/posix_class'
143
22
  require 'regexp_parser/expression/classes/property'
144
23
  require 'regexp_parser/expression/classes/root'
145
- require 'regexp_parser/expression/classes/set'
146
- require 'regexp_parser/expression/classes/set/intersection'
147
- require 'regexp_parser/expression/classes/set/range'
148
24
  require 'regexp_parser/expression/classes/type'
149
25
 
150
26
  require 'regexp_parser/expression/methods/match'
@@ -4,12 +4,14 @@
4
4
  # given syntax flavor.
5
5
  class Regexp::Lexer
6
6
 
7
- OPENING_TOKENS = [
8
- :capture, :passive, :lookahead, :nlookahead, :lookbehind, :nlookbehind,
9
- :atomic, :options, :options_switch, :named, :absence
7
+ OPENING_TOKENS = %i[
8
+ capture passive lookahead nlookahead lookbehind nlookbehind
9
+ atomic options options_switch named absence
10
10
  ].freeze
11
11
 
12
- CLOSING_TOKENS = [:close].freeze
12
+ CLOSING_TOKENS = %i[close].freeze
13
+
14
+ CONDITION_TOKENS = %i[condition condition_close].freeze
13
15
 
14
16
  def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
15
17
  new.lex(input, syntax, options: options, &block)
@@ -40,7 +42,7 @@ class Regexp::Lexer
40
42
  nesting, set_nesting, conditional_nesting)
41
43
 
42
44
  current = merge_condition(current) if type == :conditional and
43
- [:condition, :condition_close].include?(token)
45
+ CONDITION_TOKENS.include?(token)
44
46
 
45
47
  last.next = current if last
46
48
  current.previous = last if last