regexp_parser 1.7.0 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +8 -2
  3. data/LICENSE +1 -1
  4. data/Rakefile +6 -70
  5. data/lib/regexp_parser/error.rb +4 -0
  6. data/lib/regexp_parser/expression/base.rb +76 -0
  7. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
  12. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  15. data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
  16. data/lib/regexp_parser/expression/classes/group.rb +28 -15
  17. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
  20. data/lib/regexp_parser/expression/classes/root.rb +4 -19
  21. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
  22. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  23. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
  25. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  26. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  27. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  28. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  29. data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
  30. data/lib/regexp_parser/expression/quantifier.rb +57 -17
  31. data/lib/regexp_parser/expression/sequence.rb +11 -47
  32. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  33. data/lib/regexp_parser/expression/shared.rb +111 -0
  34. data/lib/regexp_parser/expression/subexpression.rb +27 -19
  35. data/lib/regexp_parser/expression.rb +14 -141
  36. data/lib/regexp_parser/lexer.rb +83 -41
  37. data/lib/regexp_parser/parser.rb +371 -429
  38. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  39. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  40. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  41. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  42. data/lib/regexp_parser/scanner/properties/long.csv +633 -0
  43. data/lib/regexp_parser/scanner/properties/short.csv +248 -0
  44. data/lib/regexp_parser/scanner/property.rl +4 -4
  45. data/lib/regexp_parser/scanner/scanner.rl +303 -368
  46. data/lib/regexp_parser/scanner.rb +1423 -1674
  47. data/lib/regexp_parser/syntax/any.rb +2 -7
  48. data/lib/regexp_parser/syntax/base.rb +92 -67
  49. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  50. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  51. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  52. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  53. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  54. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  55. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  56. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  57. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  58. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  59. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  60. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  61. data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
  62. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  63. data/lib/regexp_parser/syntax/token.rb +45 -0
  64. data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
  65. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  66. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  67. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  68. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  69. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  70. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  71. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  73. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  74. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  75. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  78. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  79. data/lib/regexp_parser/syntax/versions.rb +3 -1
  80. data/lib/regexp_parser/syntax.rb +8 -6
  81. data/lib/regexp_parser/token.rb +9 -20
  82. data/lib/regexp_parser/version.rb +1 -1
  83. data/lib/regexp_parser.rb +0 -2
  84. data/regexp_parser.gemspec +19 -23
  85. metadata +52 -171
  86. data/CHANGELOG.md +0 -349
  87. data/README.md +0 -470
  88. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  89. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  90. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  91. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  92. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  93. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  94. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  95. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  96. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  97. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  98. data/spec/expression/base_spec.rb +0 -94
  99. data/spec/expression/clone_spec.rb +0 -120
  100. data/spec/expression/conditional_spec.rb +0 -89
  101. data/spec/expression/free_space_spec.rb +0 -27
  102. data/spec/expression/methods/match_length_spec.rb +0 -161
  103. data/spec/expression/methods/match_spec.rb +0 -25
  104. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  105. data/spec/expression/methods/tests_spec.rb +0 -99
  106. data/spec/expression/methods/traverse_spec.rb +0 -161
  107. data/spec/expression/options_spec.rb +0 -128
  108. data/spec/expression/root_spec.rb +0 -9
  109. data/spec/expression/sequence_spec.rb +0 -9
  110. data/spec/expression/subexpression_spec.rb +0 -50
  111. data/spec/expression/to_h_spec.rb +0 -26
  112. data/spec/expression/to_s_spec.rb +0 -100
  113. data/spec/lexer/all_spec.rb +0 -22
  114. data/spec/lexer/conditionals_spec.rb +0 -53
  115. data/spec/lexer/escapes_spec.rb +0 -14
  116. data/spec/lexer/keep_spec.rb +0 -10
  117. data/spec/lexer/literals_spec.rb +0 -89
  118. data/spec/lexer/nesting_spec.rb +0 -99
  119. data/spec/lexer/refcalls_spec.rb +0 -55
  120. data/spec/parser/all_spec.rb +0 -43
  121. data/spec/parser/alternation_spec.rb +0 -88
  122. data/spec/parser/anchors_spec.rb +0 -17
  123. data/spec/parser/conditionals_spec.rb +0 -179
  124. data/spec/parser/errors_spec.rb +0 -30
  125. data/spec/parser/escapes_spec.rb +0 -121
  126. data/spec/parser/free_space_spec.rb +0 -130
  127. data/spec/parser/groups_spec.rb +0 -108
  128. data/spec/parser/keep_spec.rb +0 -6
  129. data/spec/parser/posix_classes_spec.rb +0 -8
  130. data/spec/parser/properties_spec.rb +0 -115
  131. data/spec/parser/quantifiers_spec.rb +0 -51
  132. data/spec/parser/refcalls_spec.rb +0 -112
  133. data/spec/parser/set/intersections_spec.rb +0 -127
  134. data/spec/parser/set/ranges_spec.rb +0 -111
  135. data/spec/parser/sets_spec.rb +0 -178
  136. data/spec/parser/types_spec.rb +0 -18
  137. data/spec/scanner/all_spec.rb +0 -18
  138. data/spec/scanner/anchors_spec.rb +0 -21
  139. data/spec/scanner/conditionals_spec.rb +0 -128
  140. data/spec/scanner/errors_spec.rb +0 -68
  141. data/spec/scanner/escapes_spec.rb +0 -53
  142. data/spec/scanner/free_space_spec.rb +0 -133
  143. data/spec/scanner/groups_spec.rb +0 -52
  144. data/spec/scanner/keep_spec.rb +0 -10
  145. data/spec/scanner/literals_spec.rb +0 -49
  146. data/spec/scanner/meta_spec.rb +0 -18
  147. data/spec/scanner/properties_spec.rb +0 -64
  148. data/spec/scanner/quantifiers_spec.rb +0 -20
  149. data/spec/scanner/refcalls_spec.rb +0 -36
  150. data/spec/scanner/sets_spec.rb +0 -102
  151. data/spec/scanner/types_spec.rb +0 -14
  152. data/spec/spec_helper.rb +0 -15
  153. data/spec/support/runner.rb +0 -42
  154. data/spec/support/shared_examples.rb +0 -77
  155. data/spec/support/warning_extractor.rb +0 -60
  156. data/spec/syntax/syntax_spec.rb +0 -48
  157. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  158. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  159. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  160. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  161. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  162. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  163. data/spec/syntax/versions/aliases_spec.rb +0 -37
  164. data/spec/token/token_spec.rb +0 -85
  165. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d7b93dde993f6fe427ff43755738bf7de50f8613cf6e8097c9d791646d803e4c
4
- data.tar.gz: 993a88720a4ee1d8a34f4c95e167089adc6455289bfeb356de8c028a9bbee63d
3
+ metadata.gz: 53bc2105c4601ec650c24172f48b4dfa4ffa356f84f4de2a58cca4429cff45a4
4
+ data.tar.gz: 3cb580ee3db70e9490b350722fcd63b77640cc1cbaed4c7555b192f9ddcc341b
5
5
  SHA512:
6
- metadata.gz: 0bf5c142591b2d5a65023c53f76a64a13106074050042d24614963cc14dabda197ea9140fccd93f26ad06885293369b076bb5e9198967a6e3762654df8033455
7
- data.tar.gz: 1311b3dfa90633ef456edc12abf6ace2d7311c7be8450f3768a436f9c8491c3a87987f3d6ac24c6966b6f4de5363e0f6f874bfe9e1b038a6cf5d9c043553b58e
6
+ metadata.gz: fcdf19c19bc62b11a3c8a22a75c54ef996635dfc3cbabf7512eee49090c0215b21c751a275923561a0be33bb0070e04ce57f49a5bc95bedb386afd763ba6d7ba
7
+ data.tar.gz: 5744e2e8baa4ecf52d87718a821c4ba69e8d2d1ced6deffca3224cf415a72714726f8465d50c0f504421fed5992c94519c3c8e0a0e7f79e24037a9fad5386347
data/Gemfile CHANGED
@@ -3,7 +3,13 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
+ gem 'leto', '~> 2.0'
6
7
  gem 'rake', '~> 13.0'
7
- gem 'regexp_property_values', '~> 1.0'
8
- gem 'rspec', '~> 3.8'
8
+ gem 'regexp_property_values', '~> 1.4'
9
+ gem 'rspec', '~> 3.10'
10
+ if RUBY_VERSION.to_f >= 2.7
11
+ gem 'benchmark-ips', '~> 2.1'
12
+ gem 'gouteur', '~> 1.1'
13
+ gem 'rubocop', '~> 1.7'
14
+ end
9
15
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2015, Ammar Ali
1
+ Copyright (c) 2010, 2012-2023, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
data/Rakefile CHANGED
@@ -1,87 +1,23 @@
1
+ require 'bundler'
1
2
  require 'rubygems'
2
-
3
+ require 'rubygems/package_task'
3
4
  require 'rake'
4
5
  require 'rake/testtask'
6
+ require 'rspec/core/rake_task'
5
7
 
6
- require 'bundler'
7
- require 'rubygems/package_task'
8
-
9
-
10
- RAGEL_SOURCE_DIR = File.expand_path '../lib/regexp_parser/scanner', __FILE__
11
- RAGEL_OUTPUT_DIR = File.expand_path '../lib/regexp_parser', __FILE__
12
- RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
13
-
8
+ Dir['tasks/**/*.rake'].each { |file| load(file) }
14
9
 
15
10
  Bundler::GemHelper.install_tasks
16
11
 
12
+ RSpec::Core::RakeTask.new(:spec)
17
13
 
18
14
  task :default => [:'test:full']
19
15
 
20
16
  namespace :test do
21
- task full: :'ragel:rb' do
22
- sh 'bin/test'
23
- end
17
+ task full: [:'ragel:rb', :spec]
24
18
  end
25
19
 
26
- namespace :ragel do
27
- desc "Process the ragel source files and output ruby code"
28
- task :rb do |t|
29
- RAGEL_SOURCE_FILES.each do |file|
30
- output_file = "#{RAGEL_OUTPUT_DIR}/#{file}.rb"
31
- # using faster flat table driven FSM, about 25% larger code, but about 30% faster
32
- sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{file}.rl -o #{output_file}"
33
-
34
- contents = File.read(output_file)
35
-
36
- File.open(output_file, 'r+') do |file|
37
- contents = "# -*- warn-indent:false; -*-\n" + contents
38
-
39
- file.write(contents)
40
- end
41
- end
42
- end
43
-
44
- desc "Delete the ragel generated source file(s)"
45
- task :clean do |t|
46
- RAGEL_SOURCE_FILES.each do |file|
47
- sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
48
- end
49
- end
50
- end
51
-
52
-
53
20
  # Add ragel task as a prerequisite for building the gem to ensure that the
54
21
  # latest scanner code is generated and included in the build.
55
22
  desc "Runs ragel:rb before building the gem"
56
23
  task :build => ['ragel:rb']
57
-
58
-
59
- namespace :props do
60
- desc 'Write new property value hashes for the properties scanner'
61
- task :update do
62
- require 'regexp_property_values'
63
- RegexpPropertyValues.update
64
- dir = File.expand_path('../lib/regexp_parser/scanner/properties', __FILE__)
65
-
66
- require 'psych'
67
- write_hash_to_file = ->(hash, path) do
68
- File.open(path, 'w') do |f|
69
- f.puts '#',
70
- "# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
71
- '#',
72
- hash.sort.to_h.to_yaml
73
- end
74
- puts "Wrote #{hash.count} aliases to `#{path}`"
75
- end
76
-
77
- long_names_to_tokens = RegexpPropertyValues.all.map do |val|
78
- [val.identifier, val.full_name.downcase]
79
- end
80
- write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
81
-
82
- short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
83
- [k.identifier, v.full_name.downcase]
84
- end
85
- write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
86
- end
87
- end
@@ -0,0 +1,4 @@
1
+ class Regexp::Parser
2
+ # base class for all gem-specific errors
3
+ class Error < StandardError; end
4
+ end
@@ -0,0 +1,76 @@
1
+ module Regexp::Expression
2
+ class Base
3
+ include Regexp::Expression::Shared
4
+
5
+ def initialize(token, options = {})
6
+ init_from_token_and_options(token, options)
7
+ end
8
+
9
+ def to_re(format = :full)
10
+ if set_level > 0
11
+ warn "Calling #to_re on character set members is deprecated - "\
12
+ "their behavior might not be equivalent outside of the set."
13
+ end
14
+ ::Regexp.new(to_s(format))
15
+ end
16
+
17
+ def quantify(*args)
18
+ self.quantifier = Quantifier.new(*args)
19
+ end
20
+
21
+ def unquantified_clone
22
+ clone.tap { |exp| exp.quantifier = nil }
23
+ end
24
+
25
+ # Deprecated. Prefer `#repetitions` which has a more uniform interface.
26
+ def quantity
27
+ return [nil,nil] unless quantified?
28
+ [quantifier.min, quantifier.max]
29
+ end
30
+
31
+ def repetitions
32
+ @repetitions ||=
33
+ if quantified?
34
+ min = quantifier.min
35
+ max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
36
+ range = min..max
37
+ # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
38
+ if RUBY_VERSION.to_f < 2.7
39
+ range.define_singleton_method(:minmax) { [min, max] }
40
+ end
41
+ range
42
+ else
43
+ 1..1
44
+ end
45
+ end
46
+
47
+ def greedy?
48
+ quantified? and quantifier.greedy?
49
+ end
50
+
51
+ def reluctant?
52
+ quantified? and quantifier.reluctant?
53
+ end
54
+ alias :lazy? :reluctant?
55
+
56
+ def possessive?
57
+ quantified? and quantifier.possessive?
58
+ end
59
+
60
+ def to_h
61
+ {
62
+ type: type,
63
+ token: token,
64
+ text: to_s(:base),
65
+ starts_at: ts,
66
+ length: full_length,
67
+ level: level,
68
+ set_level: set_level,
69
+ conditional_level: conditional_level,
70
+ options: options,
71
+ quantifier: quantified? ? quantifier.to_h : nil,
72
+ }
73
+ end
74
+ alias :attributes :to_h
75
+ end
76
+ end
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- # A sequence of expressions, used by Alternation as one of its alternative.
2
+ # A sequence of expressions, used by Alternation as one of its alternatives.
3
3
  class Alternative < Regexp::Expression::Sequence; end
4
4
 
5
5
  class Alternation < Regexp::Expression::SequenceOperation
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module Anchor
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -22,5 +21,4 @@ module Regexp::Expression
22
21
  EOS = EndOfString
23
22
  EOSobEOL = EndOfStringOrBeforeEndOfLine
24
23
  end
25
-
26
24
  end
@@ -2,6 +2,23 @@ module Regexp::Expression
2
2
  module Backreference
3
3
  class Base < Regexp::Expression::Base
4
4
  attr_accessor :referenced_expression
5
+
6
+ def initialize_copy(orig)
7
+ exp_id = [self.class, self.starts_at]
8
+
9
+ # prevent infinite recursion for recursive subexp calls
10
+ copied = @@copied ||= {}
11
+ self.referenced_expression =
12
+ if copied[exp_id]
13
+ orig.referenced_expression
14
+ else
15
+ copied[exp_id] = true
16
+ orig.referenced_expression.dup
17
+ end
18
+ copied.clear
19
+
20
+ super
21
+ end
5
22
  end
6
23
 
7
24
  class Number < Backreference::Base
@@ -9,7 +26,7 @@ module Regexp::Expression
9
26
  alias reference number
10
27
 
11
28
  def initialize(token, options = {})
12
- @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
29
+ @number = token.text[/-?\d+/].to_i
13
30
  super
14
31
  end
15
32
  end
@@ -33,7 +50,7 @@ module Regexp::Expression
33
50
  class NameCall < Backreference::Name; end
34
51
  class NumberCallRelative < Backreference::NumberRelative; end
35
52
 
36
- class NumberRecursionLevel < Backreference::Number
53
+ class NumberRecursionLevel < Backreference::NumberRelative
37
54
  attr_reader :recursion_level
38
55
 
39
56
  def initialize(token, options = {})
@@ -52,4 +69,7 @@ module Regexp::Expression
52
69
  end
53
70
  end
54
71
  end
72
+
73
+ # alias for symmetry between token symbol and Expression class name
74
+ Backref = Backreference
55
75
  end
@@ -1,23 +1,19 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  class Range < Regexp::Expression::Subexpression
4
- def starts_at
5
- expressions.first.starts_at
4
+ def ts
5
+ (head = expressions.first) ? head.ts : @ts
6
6
  end
7
- alias :ts :starts_at
8
7
 
9
8
  def <<(exp)
10
- complete? && raise("Can't add more than 2 expressions to a Range")
9
+ complete? and raise Regexp::Parser::Error,
10
+ "Can't add more than 2 expressions to a Range"
11
11
  super
12
12
  end
13
13
 
14
14
  def complete?
15
15
  count == 2
16
16
  end
17
-
18
- def to_s(_format = :full)
19
- expressions.join(text)
20
- end
21
17
  end
22
18
  end
23
19
  end
@@ -19,9 +19,8 @@ module Regexp::Expression
19
19
  def close
20
20
  self.closed = true
21
21
  end
22
-
23
- def to_s(format = :full)
24
- "#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
25
- end
26
22
  end
23
+
24
+ # alias for symmetry between token symbol and Expression class name
25
+ Set = CharacterSet
27
26
  end # module Regexp::Expression
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module CharacterType
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -15,5 +14,4 @@ module Regexp::Expression
15
14
  class Linebreak < CharacterType::Base; end
16
15
  class ExtendedGrapheme < CharacterType::Base; end
17
16
  end
18
-
19
17
  end
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Conditional
3
- class TooManyBranches < StandardError
3
+ class TooManyBranches < Regexp::Parser::Error
4
4
  def initialize
5
5
  super('The conditional expression has more than 2 branches')
6
6
  end
@@ -15,6 +15,11 @@ module Regexp::Expression
15
15
  ref = text.tr("'<>()", "")
16
16
  ref =~ /\D/ ? ref : Integer(ref)
17
17
  end
18
+
19
+ def initialize_copy(orig)
20
+ self.referenced_expression = orig.referenced_expression.dup
21
+ super
22
+ end
18
23
  end
19
24
 
20
25
  class Branch < Regexp::Expression::Sequence; end
@@ -26,9 +31,9 @@ module Regexp::Expression
26
31
  expressions.last << exp
27
32
  end
28
33
 
29
- def add_sequence(active_opts = {})
34
+ def add_sequence(active_opts = {}, params = { ts: 0 })
30
35
  raise TooManyBranches.new if branches.length == 2
31
- params = { conditional_level: conditional_level + 1 }
36
+ params = params.merge({ conditional_level: conditional_level + 1 })
32
37
  Branch.add_to(self, params, active_opts)
33
38
  end
34
39
  alias :branch :add_sequence
@@ -50,8 +55,9 @@ module Regexp::Expression
50
55
  condition.reference
51
56
  end
52
57
 
53
- def to_s(format = :full)
54
- "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
58
+ def initialize_copy(orig)
59
+ self.referenced_expression = orig.referenced_expression.dup
60
+ super
55
61
  end
56
62
  end
57
63
  end
@@ -1,16 +1,21 @@
1
1
  module Regexp::Expression
2
2
  module EscapeSequence
3
3
  class Base < Regexp::Expression::Base
4
- require 'yaml'
5
-
6
- def char
7
- # poor man's unescape without using eval
8
- YAML.load(%Q(---\n"#{text}"\n))
9
- end
10
-
11
4
  def codepoint
12
5
  char.ord
13
6
  end
7
+
8
+ if ''.respond_to?(:undump)
9
+ def char
10
+ %("#{text}").undump
11
+ end
12
+ else
13
+ # poor man's unescape without using eval
14
+ require 'yaml'
15
+ def char
16
+ YAML.load(%Q(---\n"#{text}"\n))
17
+ end
18
+ end
14
19
  end
15
20
 
16
21
  class Literal < EscapeSequence::Base
@@ -91,4 +96,7 @@ module Regexp::Expression
91
96
  end
92
97
  end
93
98
  end
99
+
100
+ # alias for symmetry between Token::* and Expression::*
101
+ Escape = EscapeSequence
94
102
  end
@@ -1,17 +1,17 @@
1
1
  module Regexp::Expression
2
-
3
2
  class FreeSpace < Regexp::Expression::Base
4
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
5
- raise "Can not quantify a free space object"
3
+ def quantify(*_args)
4
+ raise Regexp::Parser::Error, 'Can not quantify a free space object'
6
5
  end
7
6
  end
8
7
 
9
- class Comment < Regexp::Expression::FreeSpace; end
8
+ class Comment < Regexp::Expression::FreeSpace
9
+ end
10
10
 
11
11
  class WhiteSpace < Regexp::Expression::FreeSpace
12
12
  def merge(exp)
13
+ warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
13
14
  text << exp.text
14
15
  end
15
16
  end
16
-
17
17
  end
@@ -1,27 +1,45 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def to_s(format = :full)
5
- "#{text}#{expressions.join})#{quantifier_affix(format)}"
6
- end
4
+ end
7
5
 
8
- def capturing?; false end
6
+ class Passive < Group::Base
7
+ attr_writer :implicit
8
+
9
+ def initialize(*)
10
+ @implicit = false
11
+ super
12
+ end
9
13
 
10
- def comment?; false end
14
+ def implicit?
15
+ @implicit
16
+ end
11
17
  end
12
18
 
13
- class Atomic < Group::Base; end
14
- class Passive < Group::Base; end
15
19
  class Absence < Group::Base; end
20
+ class Atomic < Group::Base; end
21
+ # TODO: should split off OptionsSwitch in v3.0.0. Maybe even make it no
22
+ # longer inherit from Group because it is effectively a terminal expression.
16
23
  class Options < Group::Base
17
24
  attr_accessor :option_changes
25
+
26
+ def initialize_copy(orig)
27
+ self.option_changes = orig.option_changes.dup
28
+ super
29
+ end
30
+
31
+ def quantify(*args)
32
+ if token == :options_switch
33
+ raise Regexp::Parser::Error, 'Can not quantify an option switch'
34
+ else
35
+ super
36
+ end
37
+ end
18
38
  end
19
39
 
20
40
  class Capture < Group::Base
21
41
  attr_accessor :number, :number_at_level
22
42
  alias identifier number
23
-
24
- def capturing?; true end
25
43
  end
26
44
 
27
45
  class Named < Group::Capture
@@ -33,18 +51,13 @@ module Regexp::Expression
33
51
  super
34
52
  end
35
53
 
36
- def initialize_clone(orig)
54
+ def initialize_copy(orig)
37
55
  @name = orig.name.dup
38
56
  super
39
57
  end
40
58
  end
41
59
 
42
60
  class Comment < Group::Base
43
- def to_s(_format = :full)
44
- text.dup
45
- end
46
-
47
- def comment?; true end
48
61
  end
49
62
  end
50
63
 
@@ -1,5 +1,7 @@
1
1
  module Regexp::Expression
2
2
  module Keep
3
+ # TOOD: in regexp_parser v3.0.0 this should possibly be a Subexpression
4
+ # that contains all expressions to its left.
3
5
  class Mark < Regexp::Expression::Base; end
4
6
  end
5
7
  end
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -5,7 +5,11 @@ module Regexp::Expression
5
5
  end
6
6
 
7
7
  def name
8
- token.to_s
8
+ text[/\w+/]
9
9
  end
10
10
  end
11
+
12
+ # alias for symmetry between token symbol and Expression class name
13
+ Posixclass = PosixClass
14
+ Nonposixclass = PosixClass
11
15
  end
@@ -1,24 +1,9 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
- # TODO: this override is here for backwards compatibility, remove in 2.0.0
5
- def initialize(*args)
6
- unless args.first.is_a?(Regexp::Token)
7
- warn('WARNING: Root.new without a Token argument is deprecated and '\
8
- 'will be removed in 2.0.0. Use Root.build for the old behavior.')
9
- return super(self.class.build_token, *args)
10
- end
11
- super
12
- end
13
-
14
- class << self
15
- def build(options = {})
16
- new(build_token, options)
17
- end
18
-
19
- def build_token
20
- Regexp::Token.new(:expression, :root, '', 0)
21
- end
3
+ def self.build(options = {})
4
+ warn "`#{self.class}.build(options)` is deprecated and will raise in "\
5
+ "regexp_parser v3.0.0. Please use `.construct(options: options)`."
6
+ construct(options: options)
22
7
  end
23
8
  end
24
9
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -7,11 +6,11 @@ module Regexp::Expression
7
6
  end
8
7
 
9
8
  def name
10
- text =~ /\A\\[pP]\{([^}]+)\}\z/; $1
9
+ text[/\A\\[pP]\{([^}]+)\}\z/, 1]
11
10
  end
12
11
 
13
12
  def shortcut
14
- (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
13
+ Regexp::Scanner.short_prop_map.key(token.to_s)
15
14
  end
16
15
  end
17
16
 
@@ -117,4 +116,7 @@ module Regexp::Expression
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
119
118
 
119
+ # alias for symmetry between token symbol and Expression class name
120
+ Property = UnicodeProperty
121
+ Nonproperty = UnicodeProperty
120
122
  end # module Regexp::Expression
@@ -0,0 +1,41 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods
4
+ # Convenience method to init a valid Expression without a Regexp::Token
5
+ def construct(params = {})
6
+ attrs = construct_defaults.merge(params)
7
+ options = attrs.delete(:options)
8
+ token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
9
+ token = Regexp::Token.new(*token_args)
10
+ raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
11
+
12
+ new(token, options)
13
+ end
14
+
15
+ def construct_defaults
16
+ if self == Root
17
+ { type: :expression, token: :root, ts: 0 }
18
+ elsif self < Sequence
19
+ { type: :expression, token: :sequence }
20
+ else
21
+ { type: token_class::Type }
22
+ end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
23
+ end
24
+
25
+ def token_class
26
+ if self == Root || self < Sequence
27
+ nil # no token class because these objects are Parser-generated
28
+ # TODO: synch exp class, token class & type names for this in v3.0.0
29
+ elsif self == CharacterType::Any
30
+ Regexp::Syntax::Token::Meta
31
+ else
32
+ Regexp::Syntax::Token.const_get(name.split('::')[2])
33
+ end
34
+ end
35
+ end
36
+
37
+ def token_class
38
+ self.class.token_class
39
+ end
40
+ end
41
+ end