regexp_parser 1.7.0 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +9 -3
  3. data/LICENSE +1 -1
  4. data/Rakefile +6 -70
  5. data/lib/regexp_parser/error.rb +4 -0
  6. data/lib/regexp_parser/expression/base.rb +76 -0
  7. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +4 -8
  12. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  15. data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
  16. data/lib/regexp_parser/expression/classes/group.rb +28 -15
  17. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  20. data/lib/regexp_parser/expression/classes/root.rb +4 -19
  21. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +11 -12
  22. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  23. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
  25. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  26. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  27. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  28. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  29. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  30. data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
  31. data/lib/regexp_parser/expression/quantifier.rb +57 -17
  32. data/lib/regexp_parser/expression/sequence.rb +11 -47
  33. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  34. data/lib/regexp_parser/expression/shared.rb +111 -0
  35. data/lib/regexp_parser/expression/subexpression.rb +27 -19
  36. data/lib/regexp_parser/expression.rb +15 -141
  37. data/lib/regexp_parser/lexer.rb +83 -41
  38. data/lib/regexp_parser/parser.rb +372 -429
  39. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  40. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  41. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  42. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  43. data/lib/regexp_parser/scanner/properties/long.csv +651 -0
  44. data/lib/regexp_parser/scanner/properties/short.csv +249 -0
  45. data/lib/regexp_parser/scanner/property.rl +4 -4
  46. data/lib/regexp_parser/scanner/scanner.rl +303 -368
  47. data/lib/regexp_parser/scanner.rb +1423 -1674
  48. data/lib/regexp_parser/syntax/any.rb +2 -7
  49. data/lib/regexp_parser/syntax/base.rb +92 -67
  50. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  51. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  52. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  53. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  54. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  55. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  56. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  57. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  58. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  59. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  60. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  61. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  62. data/lib/regexp_parser/syntax/token/unicode_property.rb +751 -0
  63. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  64. data/lib/regexp_parser/syntax/token.rb +45 -0
  65. data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
  66. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  67. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  68. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  69. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  70. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  71. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  73. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  74. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  75. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  78. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  79. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  80. data/lib/regexp_parser/syntax/versions.rb +3 -1
  81. data/lib/regexp_parser/syntax.rb +8 -6
  82. data/lib/regexp_parser/token.rb +9 -20
  83. data/lib/regexp_parser/version.rb +1 -1
  84. data/lib/regexp_parser.rb +0 -2
  85. data/regexp_parser.gemspec +19 -23
  86. metadata +53 -171
  87. data/CHANGELOG.md +0 -349
  88. data/README.md +0 -470
  89. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  90. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  91. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  92. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  93. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  94. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  95. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  96. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  97. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  98. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  99. data/spec/expression/base_spec.rb +0 -94
  100. data/spec/expression/clone_spec.rb +0 -120
  101. data/spec/expression/conditional_spec.rb +0 -89
  102. data/spec/expression/free_space_spec.rb +0 -27
  103. data/spec/expression/methods/match_length_spec.rb +0 -161
  104. data/spec/expression/methods/match_spec.rb +0 -25
  105. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  106. data/spec/expression/methods/tests_spec.rb +0 -99
  107. data/spec/expression/methods/traverse_spec.rb +0 -161
  108. data/spec/expression/options_spec.rb +0 -128
  109. data/spec/expression/root_spec.rb +0 -9
  110. data/spec/expression/sequence_spec.rb +0 -9
  111. data/spec/expression/subexpression_spec.rb +0 -50
  112. data/spec/expression/to_h_spec.rb +0 -26
  113. data/spec/expression/to_s_spec.rb +0 -100
  114. data/spec/lexer/all_spec.rb +0 -22
  115. data/spec/lexer/conditionals_spec.rb +0 -53
  116. data/spec/lexer/escapes_spec.rb +0 -14
  117. data/spec/lexer/keep_spec.rb +0 -10
  118. data/spec/lexer/literals_spec.rb +0 -89
  119. data/spec/lexer/nesting_spec.rb +0 -99
  120. data/spec/lexer/refcalls_spec.rb +0 -55
  121. data/spec/parser/all_spec.rb +0 -43
  122. data/spec/parser/alternation_spec.rb +0 -88
  123. data/spec/parser/anchors_spec.rb +0 -17
  124. data/spec/parser/conditionals_spec.rb +0 -179
  125. data/spec/parser/errors_spec.rb +0 -30
  126. data/spec/parser/escapes_spec.rb +0 -121
  127. data/spec/parser/free_space_spec.rb +0 -130
  128. data/spec/parser/groups_spec.rb +0 -108
  129. data/spec/parser/keep_spec.rb +0 -6
  130. data/spec/parser/posix_classes_spec.rb +0 -8
  131. data/spec/parser/properties_spec.rb +0 -115
  132. data/spec/parser/quantifiers_spec.rb +0 -51
  133. data/spec/parser/refcalls_spec.rb +0 -112
  134. data/spec/parser/set/intersections_spec.rb +0 -127
  135. data/spec/parser/set/ranges_spec.rb +0 -111
  136. data/spec/parser/sets_spec.rb +0 -178
  137. data/spec/parser/types_spec.rb +0 -18
  138. data/spec/scanner/all_spec.rb +0 -18
  139. data/spec/scanner/anchors_spec.rb +0 -21
  140. data/spec/scanner/conditionals_spec.rb +0 -128
  141. data/spec/scanner/errors_spec.rb +0 -68
  142. data/spec/scanner/escapes_spec.rb +0 -53
  143. data/spec/scanner/free_space_spec.rb +0 -133
  144. data/spec/scanner/groups_spec.rb +0 -52
  145. data/spec/scanner/keep_spec.rb +0 -10
  146. data/spec/scanner/literals_spec.rb +0 -49
  147. data/spec/scanner/meta_spec.rb +0 -18
  148. data/spec/scanner/properties_spec.rb +0 -64
  149. data/spec/scanner/quantifiers_spec.rb +0 -20
  150. data/spec/scanner/refcalls_spec.rb +0 -36
  151. data/spec/scanner/sets_spec.rb +0 -102
  152. data/spec/scanner/types_spec.rb +0 -14
  153. data/spec/spec_helper.rb +0 -15
  154. data/spec/support/runner.rb +0 -42
  155. data/spec/support/shared_examples.rb +0 -77
  156. data/spec/support/warning_extractor.rb +0 -60
  157. data/spec/syntax/syntax_spec.rb +0 -48
  158. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  159. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  160. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  161. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  162. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  163. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  164. data/spec/syntax/versions/aliases_spec.rb +0 -37
  165. data/spec/token/token_spec.rb +0 -85
  166. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d7b93dde993f6fe427ff43755738bf7de50f8613cf6e8097c9d791646d803e4c
4
- data.tar.gz: 993a88720a4ee1d8a34f4c95e167089adc6455289bfeb356de8c028a9bbee63d
3
+ metadata.gz: 8cc1826647cde51d6d1b5a5a58fb005efd2a38a85fa0e817616591ee2fad7862
4
+ data.tar.gz: 572a6203741b9970bcedc1ace243ea0b9c300ca60b71ac263036eb0f4222dd50
5
5
  SHA512:
6
- metadata.gz: 0bf5c142591b2d5a65023c53f76a64a13106074050042d24614963cc14dabda197ea9140fccd93f26ad06885293369b076bb5e9198967a6e3762654df8033455
7
- data.tar.gz: 1311b3dfa90633ef456edc12abf6ace2d7311c7be8450f3768a436f9c8491c3a87987f3d6ac24c6966b6f4de5363e0f6f874bfe9e1b038a6cf5d9c043553b58e
6
+ metadata.gz: 3521fe6dab4be0c0db3c37f3f8d196fc754ff72937336a73ef5547a15ae4f2d366aa28e73d6e5756920d610b943ee51cb2db8e51e53ccb19c1c235a8c45da708
7
+ data.tar.gz: d05b7babb79c118bdc36ae168d8199ee3500b0cff33cb00ed46d51a4a88725130e931c588146a3f989dd87778b1f39684b2c8a5541c9ac8f91427fc31b1ec97a
data/Gemfile CHANGED
@@ -3,7 +3,13 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
- gem 'rake', '~> 13.0'
7
- gem 'regexp_property_values', '~> 1.0'
8
- gem 'rspec', '~> 3.8'
6
+ gem 'leto', '~> 2.1'
7
+ gem 'rake', '~> 13.1'
8
+ gem 'regexp_property_values', '~> 1.5'
9
+ gem 'rspec', '~> 3.10'
10
+ if RUBY_VERSION.to_f >= 2.7
11
+ gem 'benchmark-ips', '~> 2.1'
12
+ gem 'gouteur', '~> 1.1'
13
+ gem 'rubocop', '~> 1.59'
14
+ end
9
15
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2015, Ammar Ali
1
+ Copyright (c) 2010, 2012-2024, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
data/Rakefile CHANGED
@@ -1,87 +1,23 @@
1
+ require 'bundler'
1
2
  require 'rubygems'
2
-
3
+ require 'rubygems/package_task'
3
4
  require 'rake'
4
5
  require 'rake/testtask'
6
+ require 'rspec/core/rake_task'
5
7
 
6
- require 'bundler'
7
- require 'rubygems/package_task'
8
-
9
-
10
- RAGEL_SOURCE_DIR = File.expand_path '../lib/regexp_parser/scanner', __FILE__
11
- RAGEL_OUTPUT_DIR = File.expand_path '../lib/regexp_parser', __FILE__
12
- RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
13
-
8
+ Dir['tasks/**/*.rake'].each { |file| load(file) }
14
9
 
15
10
  Bundler::GemHelper.install_tasks
16
11
 
12
+ RSpec::Core::RakeTask.new(:spec)
17
13
 
18
14
  task :default => [:'test:full']
19
15
 
20
16
  namespace :test do
21
- task full: :'ragel:rb' do
22
- sh 'bin/test'
23
- end
17
+ task full: [:'ragel:rb', :spec]
24
18
  end
25
19
 
26
- namespace :ragel do
27
- desc "Process the ragel source files and output ruby code"
28
- task :rb do |t|
29
- RAGEL_SOURCE_FILES.each do |file|
30
- output_file = "#{RAGEL_OUTPUT_DIR}/#{file}.rb"
31
- # using faster flat table driven FSM, about 25% larger code, but about 30% faster
32
- sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{file}.rl -o #{output_file}"
33
-
34
- contents = File.read(output_file)
35
-
36
- File.open(output_file, 'r+') do |file|
37
- contents = "# -*- warn-indent:false; -*-\n" + contents
38
-
39
- file.write(contents)
40
- end
41
- end
42
- end
43
-
44
- desc "Delete the ragel generated source file(s)"
45
- task :clean do |t|
46
- RAGEL_SOURCE_FILES.each do |file|
47
- sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
48
- end
49
- end
50
- end
51
-
52
-
53
20
  # Add ragel task as a prerequisite for building the gem to ensure that the
54
21
  # latest scanner code is generated and included in the build.
55
22
  desc "Runs ragel:rb before building the gem"
56
23
  task :build => ['ragel:rb']
57
-
58
-
59
- namespace :props do
60
- desc 'Write new property value hashes for the properties scanner'
61
- task :update do
62
- require 'regexp_property_values'
63
- RegexpPropertyValues.update
64
- dir = File.expand_path('../lib/regexp_parser/scanner/properties', __FILE__)
65
-
66
- require 'psych'
67
- write_hash_to_file = ->(hash, path) do
68
- File.open(path, 'w') do |f|
69
- f.puts '#',
70
- "# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
71
- '#',
72
- hash.sort.to_h.to_yaml
73
- end
74
- puts "Wrote #{hash.count} aliases to `#{path}`"
75
- end
76
-
77
- long_names_to_tokens = RegexpPropertyValues.all.map do |val|
78
- [val.identifier, val.full_name.downcase]
79
- end
80
- write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
81
-
82
- short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
83
- [k.identifier, v.full_name.downcase]
84
- end
85
- write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
86
- end
87
- end
@@ -0,0 +1,4 @@
1
+ class Regexp::Parser
2
+ # base class for all gem-specific errors
3
+ class Error < StandardError; end
4
+ end
@@ -0,0 +1,76 @@
1
+ module Regexp::Expression
2
+ class Base
3
+ include Regexp::Expression::Shared
4
+
5
+ def initialize(token, options = {})
6
+ init_from_token_and_options(token, options)
7
+ end
8
+
9
+ def to_re(format = :full)
10
+ if set_level > 0
11
+ warn "Calling #to_re on character set members is deprecated - "\
12
+ "their behavior might not be equivalent outside of the set."
13
+ end
14
+ ::Regexp.new(to_s(format))
15
+ end
16
+
17
+ def quantify(*args)
18
+ self.quantifier = Quantifier.new(*args)
19
+ end
20
+
21
+ def unquantified_clone
22
+ clone.tap { |exp| exp.quantifier = nil }
23
+ end
24
+
25
+ # Deprecated. Prefer `#repetitions` which has a more uniform interface.
26
+ def quantity
27
+ return [nil,nil] unless quantified?
28
+ [quantifier.min, quantifier.max]
29
+ end
30
+
31
+ def repetitions
32
+ @repetitions ||=
33
+ if quantified?
34
+ min = quantifier.min
35
+ max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
36
+ range = min..max
37
+ # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
38
+ if RUBY_VERSION.to_f < 2.7
39
+ range.define_singleton_method(:minmax) { [min, max] }
40
+ end
41
+ range
42
+ else
43
+ 1..1
44
+ end
45
+ end
46
+
47
+ def greedy?
48
+ quantified? and quantifier.greedy?
49
+ end
50
+
51
+ def reluctant?
52
+ quantified? and quantifier.reluctant?
53
+ end
54
+ alias :lazy? :reluctant?
55
+
56
+ def possessive?
57
+ quantified? and quantifier.possessive?
58
+ end
59
+
60
+ def to_h
61
+ {
62
+ type: type,
63
+ token: token,
64
+ text: to_s(:base),
65
+ starts_at: ts,
66
+ length: full_length,
67
+ level: level,
68
+ set_level: set_level,
69
+ conditional_level: conditional_level,
70
+ options: options,
71
+ quantifier: quantified? ? quantifier.to_h : nil,
72
+ }
73
+ end
74
+ alias :attributes :to_h
75
+ end
76
+ end
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- # A sequence of expressions, used by Alternation as one of its alternative.
2
+ # A sequence of expressions, used by Alternation as one of its alternatives.
3
3
  class Alternative < Regexp::Expression::Sequence; end
4
4
 
5
5
  class Alternation < Regexp::Expression::SequenceOperation
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module Anchor
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -22,5 +21,4 @@ module Regexp::Expression
22
21
  EOS = EndOfString
23
22
  EOSobEOL = EndOfStringOrBeforeEndOfLine
24
23
  end
25
-
26
24
  end
@@ -2,6 +2,23 @@ module Regexp::Expression
2
2
  module Backreference
3
3
  class Base < Regexp::Expression::Base
4
4
  attr_accessor :referenced_expression
5
+
6
+ def initialize_copy(orig)
7
+ exp_id = [self.class, self.starts_at]
8
+
9
+ # prevent infinite recursion for recursive subexp calls
10
+ copied = @@copied ||= {}
11
+ self.referenced_expression =
12
+ if copied[exp_id]
13
+ orig.referenced_expression
14
+ else
15
+ copied[exp_id] = true
16
+ orig.referenced_expression.dup
17
+ end
18
+ copied.clear
19
+
20
+ super
21
+ end
5
22
  end
6
23
 
7
24
  class Number < Backreference::Base
@@ -9,7 +26,7 @@ module Regexp::Expression
9
26
  alias reference number
10
27
 
11
28
  def initialize(token, options = {})
12
- @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
29
+ @number = token.text[/-?\d+/].to_i
13
30
  super
14
31
  end
15
32
  end
@@ -33,7 +50,7 @@ module Regexp::Expression
33
50
  class NameCall < Backreference::Name; end
34
51
  class NumberCallRelative < Backreference::NumberRelative; end
35
52
 
36
- class NumberRecursionLevel < Backreference::Number
53
+ class NumberRecursionLevel < Backreference::NumberRelative
37
54
  attr_reader :recursion_level
38
55
 
39
56
  def initialize(token, options = {})
@@ -52,4 +69,7 @@ module Regexp::Expression
52
69
  end
53
70
  end
54
71
  end
72
+
73
+ # alias for symmetry between token symbol and Expression class name
74
+ Backref = Backreference
55
75
  end
@@ -1,23 +1,19 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  class Range < Regexp::Expression::Subexpression
4
- def starts_at
5
- expressions.first.starts_at
4
+ def ts
5
+ (head = expressions.first) ? head.ts : @ts
6
6
  end
7
- alias :ts :starts_at
8
7
 
9
8
  def <<(exp)
10
- complete? && raise("Can't add more than 2 expressions to a Range")
9
+ complete? and raise Regexp::Parser::Error,
10
+ "Can't add more than 2 expressions to a Range"
11
11
  super
12
12
  end
13
13
 
14
14
  def complete?
15
15
  count == 2
16
16
  end
17
-
18
- def to_s(_format = :full)
19
- expressions.join(text)
20
- end
21
17
  end
22
18
  end
23
19
  end
@@ -1,10 +1,7 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  attr_accessor :closed, :negative
4
-
5
- alias :negative? :negative
6
- alias :negated? :negative
7
- alias :closed? :closed
4
+ alias :closed? :closed
8
5
 
9
6
  def initialize(token, options = {})
10
7
  self.negative = false
@@ -19,9 +16,8 @@ module Regexp::Expression
19
16
  def close
20
17
  self.closed = true
21
18
  end
22
-
23
- def to_s(format = :full)
24
- "#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
25
- end
26
19
  end
20
+
21
+ # alias for symmetry between token symbol and Expression class name
22
+ Set = CharacterSet
27
23
  end # module Regexp::Expression
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module CharacterType
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -15,5 +14,4 @@ module Regexp::Expression
15
14
  class Linebreak < CharacterType::Base; end
16
15
  class ExtendedGrapheme < CharacterType::Base; end
17
16
  end
18
-
19
17
  end
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Conditional
3
- class TooManyBranches < StandardError
3
+ class TooManyBranches < Regexp::Parser::Error
4
4
  def initialize
5
5
  super('The conditional expression has more than 2 branches')
6
6
  end
@@ -15,6 +15,11 @@ module Regexp::Expression
15
15
  ref = text.tr("'<>()", "")
16
16
  ref =~ /\D/ ? ref : Integer(ref)
17
17
  end
18
+
19
+ def initialize_copy(orig)
20
+ self.referenced_expression = orig.referenced_expression.dup
21
+ super
22
+ end
18
23
  end
19
24
 
20
25
  class Branch < Regexp::Expression::Sequence; end
@@ -26,9 +31,9 @@ module Regexp::Expression
26
31
  expressions.last << exp
27
32
  end
28
33
 
29
- def add_sequence(active_opts = {})
34
+ def add_sequence(active_opts = {}, params = { ts: 0 })
30
35
  raise TooManyBranches.new if branches.length == 2
31
- params = { conditional_level: conditional_level + 1 }
36
+ params = params.merge({ conditional_level: conditional_level + 1 })
32
37
  Branch.add_to(self, params, active_opts)
33
38
  end
34
39
  alias :branch :add_sequence
@@ -50,8 +55,9 @@ module Regexp::Expression
50
55
  condition.reference
51
56
  end
52
57
 
53
- def to_s(format = :full)
54
- "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
58
+ def initialize_copy(orig)
59
+ self.referenced_expression = orig.referenced_expression.dup
60
+ super
55
61
  end
56
62
  end
57
63
  end
@@ -1,16 +1,21 @@
1
1
  module Regexp::Expression
2
2
  module EscapeSequence
3
3
  class Base < Regexp::Expression::Base
4
- require 'yaml'
5
-
6
- def char
7
- # poor man's unescape without using eval
8
- YAML.load(%Q(---\n"#{text}"\n))
9
- end
10
-
11
4
  def codepoint
12
5
  char.ord
13
6
  end
7
+
8
+ if ''.respond_to?(:undump)
9
+ def char
10
+ %("#{text}").undump
11
+ end
12
+ else
13
+ # poor man's unescape without using eval
14
+ require 'yaml'
15
+ def char
16
+ YAML.load(%Q(---\n"#{text}"\n))
17
+ end
18
+ end
14
19
  end
15
20
 
16
21
  class Literal < EscapeSequence::Base
@@ -91,4 +96,7 @@ module Regexp::Expression
91
96
  end
92
97
  end
93
98
  end
99
+
100
+ # alias for symmetry between Token::* and Expression::*
101
+ Escape = EscapeSequence
94
102
  end
@@ -1,17 +1,17 @@
1
1
  module Regexp::Expression
2
-
3
2
  class FreeSpace < Regexp::Expression::Base
4
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
5
- raise "Can not quantify a free space object"
3
+ def quantify(*_args)
4
+ raise Regexp::Parser::Error, 'Can not quantify a free space object'
6
5
  end
7
6
  end
8
7
 
9
- class Comment < Regexp::Expression::FreeSpace; end
8
+ class Comment < Regexp::Expression::FreeSpace
9
+ end
10
10
 
11
11
  class WhiteSpace < Regexp::Expression::FreeSpace
12
12
  def merge(exp)
13
+ warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
13
14
  text << exp.text
14
15
  end
15
16
  end
16
-
17
17
  end
@@ -1,27 +1,45 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def to_s(format = :full)
5
- "#{text}#{expressions.join})#{quantifier_affix(format)}"
6
- end
4
+ end
7
5
 
8
- def capturing?; false end
6
+ class Passive < Group::Base
7
+ attr_writer :implicit
8
+
9
+ def initialize(*)
10
+ @implicit = false
11
+ super
12
+ end
9
13
 
10
- def comment?; false end
14
+ def implicit?
15
+ @implicit
16
+ end
11
17
  end
12
18
 
13
- class Atomic < Group::Base; end
14
- class Passive < Group::Base; end
15
19
  class Absence < Group::Base; end
20
+ class Atomic < Group::Base; end
21
+ # TODO: should split off OptionsSwitch in v3.0.0. Maybe even make it no
22
+ # longer inherit from Group because it is effectively a terminal expression.
16
23
  class Options < Group::Base
17
24
  attr_accessor :option_changes
25
+
26
+ def initialize_copy(orig)
27
+ self.option_changes = orig.option_changes.dup
28
+ super
29
+ end
30
+
31
+ def quantify(*args)
32
+ if token == :options_switch
33
+ raise Regexp::Parser::Error, 'Can not quantify an option switch'
34
+ else
35
+ super
36
+ end
37
+ end
18
38
  end
19
39
 
20
40
  class Capture < Group::Base
21
41
  attr_accessor :number, :number_at_level
22
42
  alias identifier number
23
-
24
- def capturing?; true end
25
43
  end
26
44
 
27
45
  class Named < Group::Capture
@@ -33,18 +51,13 @@ module Regexp::Expression
33
51
  super
34
52
  end
35
53
 
36
- def initialize_clone(orig)
54
+ def initialize_copy(orig)
37
55
  @name = orig.name.dup
38
56
  super
39
57
  end
40
58
  end
41
59
 
42
60
  class Comment < Group::Base
43
- def to_s(_format = :full)
44
- text.dup
45
- end
46
-
47
- def comment?; true end
48
61
  end
49
62
  end
50
63
 
@@ -1,5 +1,7 @@
1
1
  module Regexp::Expression
2
2
  module Keep
3
+ # TOOD: in regexp_parser v3.0.0 this should possibly be a Subexpression
4
+ # that contains all expressions to its left.
3
5
  class Mark < Regexp::Expression::Base; end
4
6
  end
5
7
  end
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -1,11 +1,11 @@
1
1
  module Regexp::Expression
2
2
  class PosixClass < Regexp::Expression::Base
3
- def negative?
4
- type == :nonposixclass
5
- end
6
-
7
3
  def name
8
- token.to_s
4
+ text[/\w+/]
9
5
  end
10
6
  end
7
+
8
+ # alias for symmetry between token symbol and Expression class name
9
+ Posixclass = PosixClass
10
+ Nonposixclass = PosixClass
11
11
  end
@@ -1,24 +1,9 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
- # TODO: this override is here for backwards compatibility, remove in 2.0.0
5
- def initialize(*args)
6
- unless args.first.is_a?(Regexp::Token)
7
- warn('WARNING: Root.new without a Token argument is deprecated and '\
8
- 'will be removed in 2.0.0. Use Root.build for the old behavior.')
9
- return super(self.class.build_token, *args)
10
- end
11
- super
12
- end
13
-
14
- class << self
15
- def build(options = {})
16
- new(build_token, options)
17
- end
18
-
19
- def build_token
20
- Regexp::Token.new(:expression, :root, '', 0)
21
- end
3
+ def self.build(options = {})
4
+ warn "`#{self.class}.build(options)` is deprecated and will raise in "\
5
+ "regexp_parser v3.0.0. Please use `.construct(options: options)`."
6
+ construct(options: options)
22
7
  end
23
8
  end
24
9
  end
@@ -1,17 +1,12 @@
1
1
  module Regexp::Expression
2
-
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
- def negative?
6
- type == :nonproperty
7
- end
8
-
9
4
  def name
10
- text =~ /\A\\[pP]\{([^}]+)\}\z/; $1
5
+ text[/\A\\[pP]\{([^}]+)\}\z/, 1]
11
6
  end
12
7
 
13
8
  def shortcut
14
- (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
9
+ Regexp::Scanner.short_prop_map.key(token.to_s)
15
10
  end
16
11
  end
17
12
 
@@ -110,11 +105,15 @@ module Regexp::Expression
110
105
  class Unassigned < Codepoint::Base; end
111
106
  end
112
107
 
113
- class Age < UnicodeProperty::Base; end
114
- class Derived < UnicodeProperty::Base; end
115
- class Emoji < UnicodeProperty::Base; end
116
- class Script < UnicodeProperty::Base; end
117
- class Block < UnicodeProperty::Base; end
108
+ class Age < UnicodeProperty::Base; end
109
+ class Block < UnicodeProperty::Base; end
110
+ class Derived < UnicodeProperty::Base; end
111
+ class Emoji < UnicodeProperty::Base; end
112
+ class Enumerated < UnicodeProperty::Base; end
113
+ class Script < UnicodeProperty::Base; end
118
114
  end
119
115
 
116
+ # alias for symmetry between token symbol and Expression class name
117
+ Property = UnicodeProperty
118
+ Nonproperty = UnicodeProperty
120
119
  end # module Regexp::Expression
@@ -0,0 +1,41 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods
4
+ # Convenience method to init a valid Expression without a Regexp::Token
5
+ def construct(params = {})
6
+ attrs = construct_defaults.merge(params)
7
+ options = attrs.delete(:options)
8
+ token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
9
+ token = Regexp::Token.new(*token_args)
10
+ raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
11
+
12
+ new(token, options)
13
+ end
14
+
15
+ def construct_defaults
16
+ if self == Root
17
+ { type: :expression, token: :root, ts: 0 }
18
+ elsif self < Sequence
19
+ { type: :expression, token: :sequence }
20
+ else
21
+ { type: token_class::Type }
22
+ end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
23
+ end
24
+
25
+ def token_class
26
+ if self == Root || self < Sequence
27
+ nil # no token class because these objects are Parser-generated
28
+ # TODO: synch exp class, token class & type names for this in v3.0.0
29
+ elsif self == CharacterType::Any
30
+ Regexp::Syntax::Token::Meta
31
+ else
32
+ Regexp::Syntax::Token.const_get(name.split('::')[2])
33
+ end
34
+ end
35
+ end
36
+
37
+ def token_class
38
+ self.class.token_class
39
+ end
40
+ end
41
+ end