regexp_parser 2.1.1 → 2.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +8 -5
  3. data/LICENSE +1 -1
  4. data/Rakefile +10 -72
  5. data/lib/regexp_parser/error.rb +3 -1
  6. data/lib/regexp_parser/expression/base.rb +78 -0
  7. data/lib/regexp_parser/expression/classes/alternation.rb +3 -1
  8. data/lib/regexp_parser/expression/classes/anchor.rb +2 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +8 -10
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +2 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -7
  12. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +6 -8
  13. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +2 -2
  14. data/lib/regexp_parser/expression/classes/conditional.rb +4 -20
  15. data/lib/regexp_parser/expression/classes/escape_sequence.rb +33 -0
  16. data/lib/regexp_parser/expression/classes/free_space.rb +6 -4
  17. data/lib/regexp_parser/expression/classes/group.rb +12 -22
  18. data/lib/regexp_parser/expression/classes/keep.rb +4 -0
  19. data/lib/regexp_parser/expression/classes/literal.rb +3 -5
  20. data/lib/regexp_parser/expression/classes/posix_class.rb +7 -5
  21. data/lib/regexp_parser/expression/classes/root.rb +5 -6
  22. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +12 -11
  23. data/lib/regexp_parser/expression/methods/construct.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +7 -0
  25. data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +76 -0
  26. data/lib/regexp_parser/expression/methods/human_name.rb +45 -0
  27. data/lib/regexp_parser/expression/methods/match.rb +2 -0
  28. data/lib/regexp_parser/expression/methods/match_length.rb +11 -5
  29. data/lib/regexp_parser/expression/methods/negative.rb +22 -0
  30. data/lib/regexp_parser/expression/methods/options.rb +2 -0
  31. data/lib/regexp_parser/expression/methods/parts.rb +25 -0
  32. data/lib/regexp_parser/expression/methods/printing.rb +28 -0
  33. data/lib/regexp_parser/expression/methods/referenced_expressions.rb +30 -0
  34. data/lib/regexp_parser/expression/methods/strfregexp.rb +3 -1
  35. data/lib/regexp_parser/expression/methods/tests.rb +49 -1
  36. data/lib/regexp_parser/expression/methods/traverse.rb +37 -19
  37. data/lib/regexp_parser/expression/quantifier.rb +57 -24
  38. data/lib/regexp_parser/expression/sequence.rb +13 -31
  39. data/lib/regexp_parser/expression/sequence_operation.rb +6 -9
  40. data/lib/regexp_parser/expression/shared.rb +114 -0
  41. data/lib/regexp_parser/expression/subexpression.rb +28 -18
  42. data/lib/regexp_parser/expression.rb +42 -155
  43. data/lib/regexp_parser/lexer.rb +83 -39
  44. data/lib/regexp_parser/parser.rb +142 -174
  45. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +10 -0
  46. data/lib/regexp_parser/scanner/errors/scanner_error.rb +8 -0
  47. data/lib/regexp_parser/scanner/errors/validation_error.rb +65 -0
  48. data/lib/regexp_parser/scanner/properties/long.csv +670 -0
  49. data/lib/regexp_parser/scanner/properties/short.csv +257 -0
  50. data/lib/regexp_parser/scanner/property.rl +2 -2
  51. data/lib/regexp_parser/scanner/scanner.rl +167 -189
  52. data/lib/regexp_parser/scanner.rb +1419 -1549
  53. data/lib/regexp_parser/syntax/any.rb +4 -7
  54. data/lib/regexp_parser/syntax/base.rb +92 -65
  55. data/lib/regexp_parser/syntax/token/anchor.rb +17 -0
  56. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +4 -2
  57. data/lib/regexp_parser/syntax/token/backreference.rb +35 -0
  58. data/lib/regexp_parser/syntax/token/character_set.rb +18 -0
  59. data/lib/regexp_parser/syntax/token/character_type.rb +18 -0
  60. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +5 -3
  61. data/lib/regexp_parser/syntax/token/escape.rb +35 -0
  62. data/lib/regexp_parser/syntax/token/group.rb +25 -0
  63. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +3 -1
  64. data/lib/regexp_parser/syntax/token/meta.rb +22 -0
  65. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +5 -3
  66. data/lib/regexp_parser/syntax/token/quantifier.rb +37 -0
  67. data/lib/regexp_parser/syntax/token/unicode_property.rb +766 -0
  68. data/lib/regexp_parser/syntax/token/virtual.rb +13 -0
  69. data/lib/regexp_parser/syntax/token.rb +47 -0
  70. data/lib/regexp_parser/syntax/version_lookup.rb +21 -36
  71. data/lib/regexp_parser/syntax/versions/1.8.6.rb +14 -19
  72. data/lib/regexp_parser/syntax/versions/1.9.1.rb +11 -16
  73. data/lib/regexp_parser/syntax/versions/1.9.3.rb +4 -9
  74. data/lib/regexp_parser/syntax/versions/2.0.0.rb +9 -14
  75. data/lib/regexp_parser/syntax/versions/2.2.0.rb +4 -8
  76. data/lib/regexp_parser/syntax/versions/2.3.0.rb +4 -8
  77. data/lib/regexp_parser/syntax/versions/2.4.0.rb +4 -8
  78. data/lib/regexp_parser/syntax/versions/2.4.1.rb +3 -7
  79. data/lib/regexp_parser/syntax/versions/2.5.0.rb +4 -8
  80. data/lib/regexp_parser/syntax/versions/2.6.0.rb +4 -8
  81. data/lib/regexp_parser/syntax/versions/2.6.2.rb +4 -8
  82. data/lib/regexp_parser/syntax/versions/2.6.3.rb +4 -8
  83. data/lib/regexp_parser/syntax/versions/3.1.0.rb +6 -0
  84. data/lib/regexp_parser/syntax/versions/3.2.0.rb +6 -0
  85. data/lib/regexp_parser/syntax/versions/3.5.0.rb +4 -0
  86. data/lib/regexp_parser/syntax/versions.rb +6 -2
  87. data/lib/regexp_parser/syntax.rb +4 -2
  88. data/lib/regexp_parser/token.rb +11 -20
  89. data/lib/regexp_parser/version.rb +3 -1
  90. data/lib/regexp_parser.rb +7 -7
  91. data/regexp_parser.gemspec +22 -22
  92. metadata +53 -174
  93. data/CHANGELOG.md +0 -494
  94. data/README.md +0 -479
  95. data/lib/regexp_parser/expression/classes/escape.rb +0 -94
  96. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  97. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  98. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  99. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  100. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  101. data/lib/regexp_parser/syntax/tokens/character_type.rb +0 -16
  102. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  103. data/lib/regexp_parser/syntax/tokens/group.rb +0 -23
  104. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  105. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  106. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  107. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  108. data/spec/expression/base_spec.rb +0 -104
  109. data/spec/expression/clone_spec.rb +0 -152
  110. data/spec/expression/conditional_spec.rb +0 -89
  111. data/spec/expression/free_space_spec.rb +0 -27
  112. data/spec/expression/methods/match_length_spec.rb +0 -161
  113. data/spec/expression/methods/match_spec.rb +0 -25
  114. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  115. data/spec/expression/methods/tests_spec.rb +0 -99
  116. data/spec/expression/methods/traverse_spec.rb +0 -161
  117. data/spec/expression/options_spec.rb +0 -128
  118. data/spec/expression/subexpression_spec.rb +0 -50
  119. data/spec/expression/to_h_spec.rb +0 -26
  120. data/spec/expression/to_s_spec.rb +0 -108
  121. data/spec/lexer/all_spec.rb +0 -22
  122. data/spec/lexer/conditionals_spec.rb +0 -53
  123. data/spec/lexer/delimiters_spec.rb +0 -68
  124. data/spec/lexer/escapes_spec.rb +0 -14
  125. data/spec/lexer/keep_spec.rb +0 -10
  126. data/spec/lexer/literals_spec.rb +0 -64
  127. data/spec/lexer/nesting_spec.rb +0 -99
  128. data/spec/lexer/refcalls_spec.rb +0 -60
  129. data/spec/parser/all_spec.rb +0 -43
  130. data/spec/parser/alternation_spec.rb +0 -88
  131. data/spec/parser/anchors_spec.rb +0 -17
  132. data/spec/parser/conditionals_spec.rb +0 -179
  133. data/spec/parser/errors_spec.rb +0 -30
  134. data/spec/parser/escapes_spec.rb +0 -121
  135. data/spec/parser/free_space_spec.rb +0 -130
  136. data/spec/parser/groups_spec.rb +0 -108
  137. data/spec/parser/keep_spec.rb +0 -6
  138. data/spec/parser/options_spec.rb +0 -28
  139. data/spec/parser/posix_classes_spec.rb +0 -8
  140. data/spec/parser/properties_spec.rb +0 -115
  141. data/spec/parser/quantifiers_spec.rb +0 -68
  142. data/spec/parser/refcalls_spec.rb +0 -117
  143. data/spec/parser/set/intersections_spec.rb +0 -127
  144. data/spec/parser/set/ranges_spec.rb +0 -111
  145. data/spec/parser/sets_spec.rb +0 -178
  146. data/spec/parser/types_spec.rb +0 -18
  147. data/spec/scanner/all_spec.rb +0 -18
  148. data/spec/scanner/anchors_spec.rb +0 -21
  149. data/spec/scanner/conditionals_spec.rb +0 -128
  150. data/spec/scanner/delimiters_spec.rb +0 -52
  151. data/spec/scanner/errors_spec.rb +0 -67
  152. data/spec/scanner/escapes_spec.rb +0 -64
  153. data/spec/scanner/free_space_spec.rb +0 -165
  154. data/spec/scanner/groups_spec.rb +0 -61
  155. data/spec/scanner/keep_spec.rb +0 -10
  156. data/spec/scanner/literals_spec.rb +0 -39
  157. data/spec/scanner/meta_spec.rb +0 -18
  158. data/spec/scanner/options_spec.rb +0 -36
  159. data/spec/scanner/properties_spec.rb +0 -64
  160. data/spec/scanner/quantifiers_spec.rb +0 -25
  161. data/spec/scanner/refcalls_spec.rb +0 -55
  162. data/spec/scanner/sets_spec.rb +0 -151
  163. data/spec/scanner/types_spec.rb +0 -14
  164. data/spec/spec_helper.rb +0 -16
  165. data/spec/support/runner.rb +0 -42
  166. data/spec/support/shared_examples.rb +0 -77
  167. data/spec/support/warning_extractor.rb +0 -60
  168. data/spec/syntax/syntax_spec.rb +0 -48
  169. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  170. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  171. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  172. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  173. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  174. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  175. data/spec/syntax/versions/aliases_spec.rb +0 -37
  176. data/spec/token/token_spec.rb +0 -85
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 077b8a0c90d90cf46e44671ec1335a5373eef72c61a0bcf4de43ba5217a188c3
4
- data.tar.gz: b9aed868af73adcdf40c09720c5d10091b25a53b25a792717ceb5591039a2931
3
+ metadata.gz: ba0845a7ebcd158dc60281b731adb0d597b71028a734209a9cf6e850986c03b4
4
+ data.tar.gz: '078369f6bdbf716aff8f435a318e3f1a8e83593951ee7b21c94bbcd597213d54'
5
5
  SHA512:
6
- metadata.gz: 9c04d9a6434c6e3f322e97e8e2a1c86b3ddda88bd8821368a37b92f5836e4c3df1dc27a79165303420c3e8d5eea31bda1483824da01a40ce30961b645ba65ddd
7
- data.tar.gz: 01e5c261e9dca0c4df7c696128dbc0520ca40aa6b9393cc8d6c3bdb8386470aeb773566000b811f98c1407038216c8d2c0b444c7955ea5a881ac759796f8a440
6
+ metadata.gz: e4539f7196c10d233aca76dc0da3fc8ae8df48b11afd3cc8c7548eedf5893a1202ba06f5fa841444b8afc7d4b0178b6cfb2f16db5e4d05401c64ba26fb05d1de
7
+ data.tar.gz: 801716036ad9a094641094077a8f1695d82cda38020369fb7385a9a7c34d7df0fc90c1629865072d22921fdcfa02a11f70c504220be2bd8df699a10d6d787647
data/Gemfile CHANGED
@@ -1,14 +1,17 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
 
3
5
  gemspec
4
6
 
5
7
  group :development, :test do
6
- gem 'ice_nine', '~> 0.11.2'
7
- gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.0'
8
+ gem 'leto', '~> 2.1'
9
+ gem 'rake', '~> 13.1'
10
+ gem 'regexp_property_values', '~> 1.5'
9
11
  gem 'rspec', '~> 3.10'
10
12
  if RUBY_VERSION.to_f >= 2.7
11
- gem 'gouteur'
12
- gem 'rubocop', '~> 1.7'
13
+ gem 'benchmark-ips', '~> 2.1'
14
+ gem 'gouteur', '~> 1.1'
15
+ gem 'rubocop', '>= 1.80.2'
13
16
  end
14
17
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2015, Ammar Ali
1
+ Copyright (c) 2010, 2012-2025, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
data/Rakefile CHANGED
@@ -1,87 +1,25 @@
1
- require 'rubygems'
2
-
3
- require 'rake'
4
- require 'rake/testtask'
1
+ # frozen_string_literal: true
5
2
 
6
3
  require 'bundler'
4
+ require 'rubygems'
7
5
  require 'rubygems/package_task'
6
+ require 'rake'
7
+ require 'rake/testtask'
8
+ require 'rspec/core/rake_task'
8
9
 
9
-
10
- RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
11
- RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
12
- RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
13
-
10
+ Dir['tasks/**/*.rake'].each { |file| load(file) }
14
11
 
15
12
  Bundler::GemHelper.install_tasks
16
13
 
14
+ RSpec::Core::RakeTask.new(:spec)
17
15
 
18
16
  task :default => [:'test:full']
19
17
 
20
18
  namespace :test do
21
- task full: :'ragel:rb' do
22
- sh 'bin/test'
23
- end
24
- end
25
-
26
- namespace :ragel do
27
- desc "Process the ragel source files and output ruby code"
28
- task :rb do
29
- RAGEL_SOURCE_FILES.each do |source_file|
30
- output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
31
- # using faster flat table driven FSM, about 25% larger code, but about 30% faster
32
- sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
33
-
34
- contents = File.read(output_file)
35
-
36
- File.open(output_file, 'r+') do |file|
37
- contents = "# -*- warn-indent:false; -*-\n" + contents
38
-
39
- file.write(contents)
40
- end
41
- end
42
- end
43
-
44
- desc "Delete the ragel generated source file(s)"
45
- task :clean do
46
- RAGEL_SOURCE_FILES.each do |file|
47
- sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
48
- end
49
- end
19
+ task full: [:ragel, :spec]
50
20
  end
51
21
 
52
-
53
22
  # Add ragel task as a prerequisite for building the gem to ensure that the
54
23
  # latest scanner code is generated and included in the build.
55
- desc "Runs ragel:rb before building the gem"
56
- task :build => ['ragel:rb']
57
-
58
-
59
- namespace :props do
60
- desc 'Write new property value hashes for the properties scanner'
61
- task :update do
62
- require 'regexp_property_values'
63
- RegexpPropertyValues.update
64
- dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
65
-
66
- require 'psych'
67
- write_hash_to_file = ->(hash, path) do
68
- File.open(path, 'w') do |f|
69
- f.puts '#',
70
- "# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
71
- '#',
72
- hash.sort.to_h.to_yaml
73
- end
74
- puts "Wrote #{hash.count} aliases to `#{path}`"
75
- end
76
-
77
- long_names_to_tokens = RegexpPropertyValues.all.map do |val|
78
- [val.identifier, val.full_name.downcase]
79
- end
80
- write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
81
-
82
- short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
83
- [k.identifier, v.full_name.downcase]
84
- end
85
- write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
86
- end
87
- end
24
+ desc "Runs ragel before building the gem"
25
+ task build: :ragel
@@ -1,4 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class Regexp::Parser
2
- # base class for all gem-specific errors (inherited but never raised itself)
4
+ # base class for all gem-specific errors
3
5
  class Error < StandardError; end
4
6
  end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Regexp::Expression
4
+ class Base
5
+ include Regexp::Expression::Shared
6
+
7
+ def initialize(token, options = {})
8
+ init_from_token_and_options(token, options)
9
+ end
10
+
11
+ def to_re(format = :full)
12
+ if set_level > 0
13
+ warn "Calling #to_re on character set members is deprecated - "\
14
+ "their behavior might not be equivalent outside of the set."
15
+ end
16
+ ::Regexp.new(to_s(format))
17
+ end
18
+
19
+ def quantify(*args)
20
+ self.quantifier = Quantifier.new(*args)
21
+ end
22
+
23
+ def unquantified_clone
24
+ clone.tap { |exp| exp.quantifier = nil }
25
+ end
26
+
27
+ # Deprecated. Prefer `#repetitions` which has a more uniform interface.
28
+ def quantity
29
+ return [nil,nil] unless quantified?
30
+ [quantifier.min, quantifier.max]
31
+ end
32
+
33
+ def repetitions
34
+ @repetitions ||=
35
+ if quantified?
36
+ min = quantifier.min
37
+ max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
38
+ range = min..max
39
+ # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
40
+ if RUBY_VERSION.to_f < 2.7
41
+ range.define_singleton_method(:minmax) { [min, max] }
42
+ end
43
+ range
44
+ else
45
+ 1..1
46
+ end
47
+ end
48
+
49
+ def greedy?
50
+ quantified? and quantifier.greedy?
51
+ end
52
+
53
+ def reluctant?
54
+ quantified? and quantifier.reluctant?
55
+ end
56
+ alias :lazy? :reluctant?
57
+
58
+ def possessive?
59
+ quantified? and quantifier.possessive?
60
+ end
61
+
62
+ def to_h
63
+ {
64
+ type: type,
65
+ token: token,
66
+ text: to_s(:base),
67
+ starts_at: ts,
68
+ length: full_length,
69
+ level: level,
70
+ set_level: set_level,
71
+ conditional_level: conditional_level,
72
+ options: options,
73
+ quantifier: quantified? ? quantifier.to_h : nil,
74
+ }
75
+ end
76
+ alias :attributes :to_h
77
+ end
78
+ end
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Regexp::Expression
2
- # A sequence of expressions, used by Alternation as one of its alternative.
4
+ # A sequence of expressions, used by Alternation as one of its alternatives.
3
5
  class Alternative < Regexp::Expression::Sequence; end
4
6
 
5
7
  class Alternation < Regexp::Expression::SequenceOperation
@@ -1,5 +1,6 @@
1
- module Regexp::Expression
1
+ # frozen_string_literal: true
2
2
 
3
+ module Regexp::Expression
3
4
  module Anchor
4
5
  class Base < Regexp::Expression::Base; end
5
6
 
@@ -22,5 +23,4 @@ module Regexp::Expression
22
23
  EOS = EndOfString
23
24
  EOSobEOL = EndOfStringOrBeforeEndOfLine
24
25
  end
25
-
26
26
  end
@@ -1,20 +1,15 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Regexp::Expression
2
4
  module Backreference
3
- class Base < Regexp::Expression::Base
4
- attr_accessor :referenced_expression
5
-
6
- def initialize_copy(orig)
7
- self.referenced_expression = orig.referenced_expression.dup
8
- super
9
- end
10
- end
5
+ class Base < Regexp::Expression::Base; end
11
6
 
12
7
  class Number < Backreference::Base
13
8
  attr_reader :number
14
9
  alias reference number
15
10
 
16
11
  def initialize(token, options = {})
17
- @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
12
+ @number = token.text[/-?\d+/].to_i
18
13
  super
19
14
  end
20
15
  end
@@ -38,7 +33,7 @@ module Regexp::Expression
38
33
  class NameCall < Backreference::Name; end
39
34
  class NumberCallRelative < Backreference::NumberRelative; end
40
35
 
41
- class NumberRecursionLevel < Backreference::Number
36
+ class NumberRecursionLevel < Backreference::NumberRelative
42
37
  attr_reader :recursion_level
43
38
 
44
39
  def initialize(token, options = {})
@@ -57,4 +52,7 @@ module Regexp::Expression
57
52
  end
58
53
  end
59
54
  end
55
+
56
+ # alias for symmetry between token symbol and Expression class name
57
+ Backref = Backreference
60
58
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Regexp::Expression
2
4
  class CharacterSet < Regexp::Expression::Subexpression
3
5
  class IntersectedSequence < Regexp::Expression::Sequence; end
@@ -1,10 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Regexp::Expression
2
4
  class CharacterSet < Regexp::Expression::Subexpression
3
5
  class Range < Regexp::Expression::Subexpression
4
- def starts_at
5
- expressions.first.starts_at
6
+ def ts
7
+ (head = expressions.first) ? head.ts : @ts
6
8
  end
7
- alias :ts :starts_at
8
9
 
9
10
  def <<(exp)
10
11
  complete? and raise Regexp::Parser::Error,
@@ -15,10 +16,6 @@ module Regexp::Expression
15
16
  def complete?
16
17
  count == 2
17
18
  end
18
-
19
- def to_s(_format = :full)
20
- expressions.join(text)
21
- end
22
19
  end
23
20
  end
24
21
  end
@@ -1,10 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Regexp::Expression
2
4
  class CharacterSet < Regexp::Expression::Subexpression
3
5
  attr_accessor :closed, :negative
4
-
5
- alias :negative? :negative
6
- alias :negated? :negative
7
- alias :closed? :closed
6
+ alias :closed? :closed
8
7
 
9
8
  def initialize(token, options = {})
10
9
  self.negative = false
@@ -19,9 +18,8 @@ module Regexp::Expression
19
18
  def close
20
19
  self.closed = true
21
20
  end
22
-
23
- def to_s(format = :full)
24
- "#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
25
- end
26
21
  end
22
+
23
+ # alias for symmetry between token symbol and Expression class name
24
+ Set = CharacterSet
27
25
  end # module Regexp::Expression
@@ -1,5 +1,6 @@
1
- module Regexp::Expression
1
+ # frozen_string_literal: true
2
2
 
3
+ module Regexp::Expression
3
4
  module CharacterType
4
5
  class Base < Regexp::Expression::Base; end
5
6
 
@@ -15,5 +16,4 @@ module Regexp::Expression
15
16
  class Linebreak < CharacterType::Base; end
16
17
  class ExtendedGrapheme < CharacterType::Base; end
17
18
  end
18
-
19
19
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Regexp::Expression
2
4
  module Conditional
3
5
  class TooManyBranches < Regexp::Parser::Error
@@ -7,33 +9,24 @@ module Regexp::Expression
7
9
  end
8
10
 
9
11
  class Condition < Regexp::Expression::Base
10
- attr_accessor :referenced_expression
11
-
12
12
  # Name or number of the referenced capturing group that determines state.
13
13
  # Returns a String if reference is by name, Integer if by number.
14
14
  def reference
15
15
  ref = text.tr("'<>()", "")
16
16
  ref =~ /\D/ ? ref : Integer(ref)
17
17
  end
18
-
19
- def initialize_copy(orig)
20
- self.referenced_expression = orig.referenced_expression.dup
21
- super
22
- end
23
18
  end
24
19
 
25
20
  class Branch < Regexp::Expression::Sequence; end
26
21
 
27
22
  class Expression < Regexp::Expression::Subexpression
28
- attr_accessor :referenced_expression
29
-
30
23
  def <<(exp)
31
24
  expressions.last << exp
32
25
  end
33
26
 
34
- def add_sequence(active_opts = {})
27
+ def add_sequence(active_opts = {}, params = { ts: 0 })
35
28
  raise TooManyBranches.new if branches.length == 2
36
- params = { conditional_level: conditional_level + 1 }
29
+ params = params.merge({ conditional_level: conditional_level + 1 })
37
30
  Branch.add_to(self, params, active_opts)
38
31
  end
39
32
  alias :branch :add_sequence
@@ -54,15 +47,6 @@ module Regexp::Expression
54
47
  def reference
55
48
  condition.reference
56
49
  end
57
-
58
- def to_s(format = :full)
59
- "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
60
- end
61
-
62
- def initialize_copy(orig)
63
- self.referenced_expression = orig.referenced_expression.dup
64
- super
65
- end
66
50
  end
67
51
  end
68
52
  end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Regexp::Expression
4
+ module EscapeSequence
5
+ Base = Class.new(Regexp::Expression::Base)
6
+
7
+ AsciiEscape = Class.new(Base) # \e
8
+ Backspace = Class.new(Base) # \b
9
+ Bell = Class.new(Base) # \a
10
+ FormFeed = Class.new(Base) # \f
11
+ Newline = Class.new(Base) # \n
12
+ Return = Class.new(Base) # \r
13
+ Tab = Class.new(Base) # \t
14
+ VerticalTab = Class.new(Base) # \v
15
+
16
+ Literal = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
17
+
18
+ Octal = Class.new(Base) # e.g. \012
19
+ Hex = Class.new(Base) # e.g. \x0A
20
+ Codepoint = Class.new(Base) # e.g. \u000A
21
+
22
+ CodepointList = Class.new(Base) # e.g. \u{A B}
23
+ UTF8Hex = Class.new(Base) # e.g. \xE2\x82\xAC
24
+
25
+ AbstractMetaControlSequence = Class.new(Base)
26
+ Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
27
+ Meta = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
28
+ MetaControl = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
29
+ end
30
+
31
+ # alias for symmetry between Token::* and Expression::*
32
+ Escape = EscapeSequence
33
+ end
@@ -1,17 +1,19 @@
1
- module Regexp::Expression
1
+ # frozen_string_literal: true
2
2
 
3
+ module Regexp::Expression
3
4
  class FreeSpace < Regexp::Expression::Base
4
- def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
5
+ def quantify(*_args)
5
6
  raise Regexp::Parser::Error, 'Can not quantify a free space object'
6
7
  end
7
8
  end
8
9
 
9
- class Comment < Regexp::Expression::FreeSpace; end
10
+ class Comment < Regexp::Expression::FreeSpace
11
+ end
10
12
 
11
13
  class WhiteSpace < Regexp::Expression::FreeSpace
12
14
  def merge(exp)
15
+ warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
13
16
  text << exp.text
14
17
  end
15
18
  end
16
-
17
19
  end
@@ -1,13 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Regexp::Expression
2
4
  module Group
3
5
  class Base < Regexp::Expression::Subexpression
4
- def to_s(format = :full)
5
- "#{text}#{expressions.join})#{quantifier_affix(format)}"
6
- end
7
-
8
- def capturing?; false end
9
-
10
- def comment?; false end
11
6
  end
12
7
 
13
8
  class Passive < Group::Base
@@ -18,14 +13,6 @@ module Regexp::Expression
18
13
  super
19
14
  end
20
15
 
21
- def to_s(format = :full)
22
- if implicit?
23
- "#{expressions.join}#{quantifier_affix(format)}"
24
- else
25
- super
26
- end
27
- end
28
-
29
16
  def implicit?
30
17
  @implicit
31
18
  end
@@ -33,6 +20,8 @@ module Regexp::Expression
33
20
 
34
21
  class Absence < Group::Base; end
35
22
  class Atomic < Group::Base; end
23
+ # TODO: should split off OptionsSwitch in v3.0.0. Maybe even make it no
24
+ # longer inherit from Group because it is effectively a terminal expression.
36
25
  class Options < Group::Base
37
26
  attr_accessor :option_changes
38
27
 
@@ -40,13 +29,19 @@ module Regexp::Expression
40
29
  self.option_changes = orig.option_changes.dup
41
30
  super
42
31
  end
32
+
33
+ def quantify(*args)
34
+ if token == :options_switch
35
+ raise Regexp::Parser::Error, 'Can not quantify an option switch'
36
+ else
37
+ super
38
+ end
39
+ end
43
40
  end
44
41
 
45
42
  class Capture < Group::Base
46
43
  attr_accessor :number, :number_at_level
47
44
  alias identifier number
48
-
49
- def capturing?; true end
50
45
  end
51
46
 
52
47
  class Named < Group::Capture
@@ -65,11 +60,6 @@ module Regexp::Expression
65
60
  end
66
61
 
67
62
  class Comment < Group::Base
68
- def to_s(_format = :full)
69
- text.dup
70
- end
71
-
72
- def comment?; true end
73
63
  end
74
64
  end
75
65
 
@@ -1,5 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Regexp::Expression
2
4
  module Keep
5
+ # TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
6
+ # that contains all expressions to its left.
3
7
  class Mark < Regexp::Expression::Base; end
4
8
  end
5
9
  end
@@ -1,7 +1,5 @@
1
- module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
1
+ # frozen_string_literal: true
6
2
 
3
+ module Regexp::Expression
4
+ class Literal < Regexp::Expression::Base; end
7
5
  end
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Regexp::Expression
2
4
  class PosixClass < Regexp::Expression::Base
3
- def negative?
4
- type == :nonposixclass
5
- end
6
-
7
5
  def name
8
- token.to_s
6
+ text[/\w+/]
9
7
  end
10
8
  end
9
+
10
+ # alias for symmetry between token symbol and Expression class name
11
+ Posixclass = PosixClass
12
+ Nonposixclass = PosixClass
11
13
  end
@@ -1,12 +1,11 @@
1
- module Regexp::Expression
1
+ # frozen_string_literal: true
2
2
 
3
+ module Regexp::Expression
3
4
  class Root < Regexp::Expression::Subexpression
4
5
  def self.build(options = {})
5
- new(build_token, options)
6
- end
7
-
8
- def self.build_token
9
- Regexp::Token.new(:expression, :root, '', 0)
6
+ warn "`#{self.class}.build(options)` is deprecated and will raise in "\
7
+ "regexp_parser v3.0.0. Please use `.construct(options: options)`."
8
+ construct(options: options)
10
9
  end
11
10
  end
12
11
  end
@@ -1,17 +1,14 @@
1
- module Regexp::Expression
1
+ # frozen_string_literal: true
2
2
 
3
+ module Regexp::Expression
3
4
  module UnicodeProperty
4
5
  class Base < Regexp::Expression::Base
5
- def negative?
6
- type == :nonproperty
7
- end
8
-
9
6
  def name
10
7
  text[/\A\\[pP]\{([^}]+)\}\z/, 1]
11
8
  end
12
9
 
13
10
  def shortcut
14
- (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
11
+ Regexp::Scanner.short_prop_map.key(token.to_s)
15
12
  end
16
13
  end
17
14
 
@@ -110,11 +107,15 @@ module Regexp::Expression
110
107
  class Unassigned < Codepoint::Base; end
111
108
  end
112
109
 
113
- class Age < UnicodeProperty::Base; end
114
- class Derived < UnicodeProperty::Base; end
115
- class Emoji < UnicodeProperty::Base; end
116
- class Script < UnicodeProperty::Base; end
117
- class Block < UnicodeProperty::Base; end
110
+ class Age < UnicodeProperty::Base; end
111
+ class Block < UnicodeProperty::Base; end
112
+ class Derived < UnicodeProperty::Base; end
113
+ class Emoji < UnicodeProperty::Base; end
114
+ class Enumerated < UnicodeProperty::Base; end
115
+ class Script < UnicodeProperty::Base; end
118
116
  end
119
117
 
118
+ # alias for symmetry between token symbol and Expression class name
119
+ Property = UnicodeProperty
120
+ Nonproperty = UnicodeProperty
120
121
  end # module Regexp::Expression