regexp_parser 2.1.1 → 2.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +6 -5
  3. data/LICENSE +1 -1
  4. data/Rakefile +6 -70
  5. data/lib/regexp_parser/error.rb +1 -1
  6. data/lib/regexp_parser/expression/base.rb +76 -0
  7. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +18 -3
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -7
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +4 -8
  12. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  13. data/lib/regexp_parser/expression/classes/conditional.rb +2 -6
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  15. data/lib/regexp_parser/expression/classes/free_space.rb +4 -4
  16. data/lib/regexp_parser/expression/classes/group.rb +10 -22
  17. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  20. data/lib/regexp_parser/expression/classes/root.rb +3 -6
  21. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +10 -11
  22. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  23. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/match_length.rb +9 -5
  25. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  26. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  27. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  28. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  29. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  30. data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
  31. data/lib/regexp_parser/expression/quantifier.rb +55 -24
  32. data/lib/regexp_parser/expression/sequence.rb +11 -31
  33. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  34. data/lib/regexp_parser/expression/shared.rb +111 -0
  35. data/lib/regexp_parser/expression/subexpression.rb +26 -18
  36. data/lib/regexp_parser/expression.rb +37 -155
  37. data/lib/regexp_parser/lexer.rb +81 -39
  38. data/lib/regexp_parser/parser.rb +135 -173
  39. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  40. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  41. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  42. data/lib/regexp_parser/scanner/properties/long.csv +651 -0
  43. data/lib/regexp_parser/scanner/properties/short.csv +249 -0
  44. data/lib/regexp_parser/scanner/property.rl +2 -2
  45. data/lib/regexp_parser/scanner/scanner.rl +127 -185
  46. data/lib/regexp_parser/scanner.rb +1185 -1402
  47. data/lib/regexp_parser/syntax/any.rb +2 -7
  48. data/lib/regexp_parser/syntax/base.rb +91 -66
  49. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  50. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  51. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  52. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  53. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  54. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  55. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  56. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  57. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  58. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  59. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  60. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  61. data/lib/regexp_parser/syntax/token/unicode_property.rb +751 -0
  62. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  63. data/lib/regexp_parser/syntax/token.rb +45 -0
  64. data/lib/regexp_parser/syntax/version_lookup.rb +17 -34
  65. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  66. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  67. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  68. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  69. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  70. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  71. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  73. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  74. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  75. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  78. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  79. data/lib/regexp_parser/syntax/versions.rb +4 -2
  80. data/lib/regexp_parser/syntax.rb +2 -2
  81. data/lib/regexp_parser/token.rb +9 -20
  82. data/lib/regexp_parser/version.rb +1 -1
  83. data/lib/regexp_parser.rb +6 -8
  84. data/regexp_parser.gemspec +20 -22
  85. metadata +49 -171
  86. data/CHANGELOG.md +0 -494
  87. data/README.md +0 -479
  88. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  89. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  90. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  91. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  92. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  93. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  94. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  95. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  96. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  97. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  98. data/spec/expression/base_spec.rb +0 -104
  99. data/spec/expression/clone_spec.rb +0 -152
  100. data/spec/expression/conditional_spec.rb +0 -89
  101. data/spec/expression/free_space_spec.rb +0 -27
  102. data/spec/expression/methods/match_length_spec.rb +0 -161
  103. data/spec/expression/methods/match_spec.rb +0 -25
  104. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  105. data/spec/expression/methods/tests_spec.rb +0 -99
  106. data/spec/expression/methods/traverse_spec.rb +0 -161
  107. data/spec/expression/options_spec.rb +0 -128
  108. data/spec/expression/subexpression_spec.rb +0 -50
  109. data/spec/expression/to_h_spec.rb +0 -26
  110. data/spec/expression/to_s_spec.rb +0 -108
  111. data/spec/lexer/all_spec.rb +0 -22
  112. data/spec/lexer/conditionals_spec.rb +0 -53
  113. data/spec/lexer/delimiters_spec.rb +0 -68
  114. data/spec/lexer/escapes_spec.rb +0 -14
  115. data/spec/lexer/keep_spec.rb +0 -10
  116. data/spec/lexer/literals_spec.rb +0 -64
  117. data/spec/lexer/nesting_spec.rb +0 -99
  118. data/spec/lexer/refcalls_spec.rb +0 -60
  119. data/spec/parser/all_spec.rb +0 -43
  120. data/spec/parser/alternation_spec.rb +0 -88
  121. data/spec/parser/anchors_spec.rb +0 -17
  122. data/spec/parser/conditionals_spec.rb +0 -179
  123. data/spec/parser/errors_spec.rb +0 -30
  124. data/spec/parser/escapes_spec.rb +0 -121
  125. data/spec/parser/free_space_spec.rb +0 -130
  126. data/spec/parser/groups_spec.rb +0 -108
  127. data/spec/parser/keep_spec.rb +0 -6
  128. data/spec/parser/options_spec.rb +0 -28
  129. data/spec/parser/posix_classes_spec.rb +0 -8
  130. data/spec/parser/properties_spec.rb +0 -115
  131. data/spec/parser/quantifiers_spec.rb +0 -68
  132. data/spec/parser/refcalls_spec.rb +0 -117
  133. data/spec/parser/set/intersections_spec.rb +0 -127
  134. data/spec/parser/set/ranges_spec.rb +0 -111
  135. data/spec/parser/sets_spec.rb +0 -178
  136. data/spec/parser/types_spec.rb +0 -18
  137. data/spec/scanner/all_spec.rb +0 -18
  138. data/spec/scanner/anchors_spec.rb +0 -21
  139. data/spec/scanner/conditionals_spec.rb +0 -128
  140. data/spec/scanner/delimiters_spec.rb +0 -52
  141. data/spec/scanner/errors_spec.rb +0 -67
  142. data/spec/scanner/escapes_spec.rb +0 -64
  143. data/spec/scanner/free_space_spec.rb +0 -165
  144. data/spec/scanner/groups_spec.rb +0 -61
  145. data/spec/scanner/keep_spec.rb +0 -10
  146. data/spec/scanner/literals_spec.rb +0 -39
  147. data/spec/scanner/meta_spec.rb +0 -18
  148. data/spec/scanner/options_spec.rb +0 -36
  149. data/spec/scanner/properties_spec.rb +0 -64
  150. data/spec/scanner/quantifiers_spec.rb +0 -25
  151. data/spec/scanner/refcalls_spec.rb +0 -55
  152. data/spec/scanner/sets_spec.rb +0 -151
  153. data/spec/scanner/types_spec.rb +0 -14
  154. data/spec/spec_helper.rb +0 -16
  155. data/spec/support/runner.rb +0 -42
  156. data/spec/support/shared_examples.rb +0 -77
  157. data/spec/support/warning_extractor.rb +0 -60
  158. data/spec/syntax/syntax_spec.rb +0 -48
  159. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  160. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  161. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  162. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  163. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  164. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  165. data/spec/syntax/versions/aliases_spec.rb +0 -37
  166. data/spec/token/token_spec.rb +0 -85
  167. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 077b8a0c90d90cf46e44671ec1335a5373eef72c61a0bcf4de43ba5217a188c3
4
- data.tar.gz: b9aed868af73adcdf40c09720c5d10091b25a53b25a792717ceb5591039a2931
3
+ metadata.gz: c88d5bc178e9bf95a8a008d9d5e9d8cf1b4a8bb0d65310901a995daa448a28f4
4
+ data.tar.gz: 47c1ed4782981f5cc2a0bb7bd8f402e360cd60ebeba33615df0c94dd3842b48c
5
5
  SHA512:
6
- metadata.gz: 9c04d9a6434c6e3f322e97e8e2a1c86b3ddda88bd8821368a37b92f5836e4c3df1dc27a79165303420c3e8d5eea31bda1483824da01a40ce30961b645ba65ddd
7
- data.tar.gz: 01e5c261e9dca0c4df7c696128dbc0520ca40aa6b9393cc8d6c3bdb8386470aeb773566000b811f98c1407038216c8d2c0b444c7955ea5a881ac759796f8a440
6
+ metadata.gz: 5dc1bf229c259b762ea38f459f70a9a04e5ee08207fbae04bdf9045f9f2b1c0f0b6a716a3e08fda55ca0b769ef55f480f7f0e19f3412175fdc7a475362889ab3
7
+ data.tar.gz: 5de692c1cce8f2436936752d0cf6c5ea51d84bb9c63110dcc49621a476b47800300911952f4d4a687c81f151886bc5570b14af559d74b5196b63e13c684ab7c5
data/Gemfile CHANGED
@@ -3,12 +3,13 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
- gem 'ice_nine', '~> 0.11.2'
7
- gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.0'
6
+ gem 'leto', '~> 2.1'
7
+ gem 'rake', '~> 13.1'
8
+ gem 'regexp_property_values', '~> 1.5'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
- gem 'gouteur'
12
- gem 'rubocop', '~> 1.7'
11
+ gem 'benchmark-ips', '~> 2.1'
12
+ gem 'gouteur', '~> 1.1'
13
+ gem 'rubocop', '~> 1.59'
13
14
  end
14
15
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2015, Ammar Ali
1
+ Copyright (c) 2010, 2012-2024, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
data/Rakefile CHANGED
@@ -1,87 +1,23 @@
1
+ require 'bundler'
1
2
  require 'rubygems'
2
-
3
+ require 'rubygems/package_task'
3
4
  require 'rake'
4
5
  require 'rake/testtask'
6
+ require 'rspec/core/rake_task'
5
7
 
6
- require 'bundler'
7
- require 'rubygems/package_task'
8
-
9
-
10
- RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
11
- RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
12
- RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
13
-
8
+ Dir['tasks/**/*.rake'].each { |file| load(file) }
14
9
 
15
10
  Bundler::GemHelper.install_tasks
16
11
 
12
+ RSpec::Core::RakeTask.new(:spec)
17
13
 
18
14
  task :default => [:'test:full']
19
15
 
20
16
  namespace :test do
21
- task full: :'ragel:rb' do
22
- sh 'bin/test'
23
- end
17
+ task full: [:'ragel:rb', :spec]
24
18
  end
25
19
 
26
- namespace :ragel do
27
- desc "Process the ragel source files and output ruby code"
28
- task :rb do
29
- RAGEL_SOURCE_FILES.each do |source_file|
30
- output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
31
- # using faster flat table driven FSM, about 25% larger code, but about 30% faster
32
- sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
33
-
34
- contents = File.read(output_file)
35
-
36
- File.open(output_file, 'r+') do |file|
37
- contents = "# -*- warn-indent:false; -*-\n" + contents
38
-
39
- file.write(contents)
40
- end
41
- end
42
- end
43
-
44
- desc "Delete the ragel generated source file(s)"
45
- task :clean do
46
- RAGEL_SOURCE_FILES.each do |file|
47
- sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
48
- end
49
- end
50
- end
51
-
52
-
53
20
  # Add ragel task as a prerequisite for building the gem to ensure that the
54
21
  # latest scanner code is generated and included in the build.
55
22
  desc "Runs ragel:rb before building the gem"
56
23
  task :build => ['ragel:rb']
57
-
58
-
59
- namespace :props do
60
- desc 'Write new property value hashes for the properties scanner'
61
- task :update do
62
- require 'regexp_property_values'
63
- RegexpPropertyValues.update
64
- dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
65
-
66
- require 'psych'
67
- write_hash_to_file = ->(hash, path) do
68
- File.open(path, 'w') do |f|
69
- f.puts '#',
70
- "# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
71
- '#',
72
- hash.sort.to_h.to_yaml
73
- end
74
- puts "Wrote #{hash.count} aliases to `#{path}`"
75
- end
76
-
77
- long_names_to_tokens = RegexpPropertyValues.all.map do |val|
78
- [val.identifier, val.full_name.downcase]
79
- end
80
- write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
81
-
82
- short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
83
- [k.identifier, v.full_name.downcase]
84
- end
85
- write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
86
- end
87
- end
@@ -1,4 +1,4 @@
1
1
  class Regexp::Parser
2
- # base class for all gem-specific errors (inherited but never raised itself)
2
+ # base class for all gem-specific errors
3
3
  class Error < StandardError; end
4
4
  end
@@ -0,0 +1,76 @@
1
+ module Regexp::Expression
2
+ class Base
3
+ include Regexp::Expression::Shared
4
+
5
+ def initialize(token, options = {})
6
+ init_from_token_and_options(token, options)
7
+ end
8
+
9
+ def to_re(format = :full)
10
+ if set_level > 0
11
+ warn "Calling #to_re on character set members is deprecated - "\
12
+ "their behavior might not be equivalent outside of the set."
13
+ end
14
+ ::Regexp.new(to_s(format))
15
+ end
16
+
17
+ def quantify(*args)
18
+ self.quantifier = Quantifier.new(*args)
19
+ end
20
+
21
+ def unquantified_clone
22
+ clone.tap { |exp| exp.quantifier = nil }
23
+ end
24
+
25
+ # Deprecated. Prefer `#repetitions` which has a more uniform interface.
26
+ def quantity
27
+ return [nil,nil] unless quantified?
28
+ [quantifier.min, quantifier.max]
29
+ end
30
+
31
+ def repetitions
32
+ @repetitions ||=
33
+ if quantified?
34
+ min = quantifier.min
35
+ max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
36
+ range = min..max
37
+ # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
38
+ if RUBY_VERSION.to_f < 2.7
39
+ range.define_singleton_method(:minmax) { [min, max] }
40
+ end
41
+ range
42
+ else
43
+ 1..1
44
+ end
45
+ end
46
+
47
+ def greedy?
48
+ quantified? and quantifier.greedy?
49
+ end
50
+
51
+ def reluctant?
52
+ quantified? and quantifier.reluctant?
53
+ end
54
+ alias :lazy? :reluctant?
55
+
56
+ def possessive?
57
+ quantified? and quantifier.possessive?
58
+ end
59
+
60
+ def to_h
61
+ {
62
+ type: type,
63
+ token: token,
64
+ text: to_s(:base),
65
+ starts_at: ts,
66
+ length: full_length,
67
+ level: level,
68
+ set_level: set_level,
69
+ conditional_level: conditional_level,
70
+ options: options,
71
+ quantifier: quantified? ? quantifier.to_h : nil,
72
+ }
73
+ end
74
+ alias :attributes :to_h
75
+ end
76
+ end
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- # A sequence of expressions, used by Alternation as one of its alternative.
2
+ # A sequence of expressions, used by Alternation as one of its alternatives.
3
3
  class Alternative < Regexp::Expression::Sequence; end
4
4
 
5
5
  class Alternation < Regexp::Expression::SequenceOperation
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module Anchor
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -22,5 +21,4 @@ module Regexp::Expression
22
21
  EOS = EndOfString
23
22
  EOSobEOL = EndOfStringOrBeforeEndOfLine
24
23
  end
25
-
26
24
  end
@@ -4,7 +4,19 @@ module Regexp::Expression
4
4
  attr_accessor :referenced_expression
5
5
 
6
6
  def initialize_copy(orig)
7
- self.referenced_expression = orig.referenced_expression.dup
7
+ exp_id = [self.class, self.starts_at]
8
+
9
+ # prevent infinite recursion for recursive subexp calls
10
+ copied = @@copied ||= {}
11
+ self.referenced_expression =
12
+ if copied[exp_id]
13
+ orig.referenced_expression
14
+ else
15
+ copied[exp_id] = true
16
+ orig.referenced_expression.dup
17
+ end
18
+ copied.clear
19
+
8
20
  super
9
21
  end
10
22
  end
@@ -14,7 +26,7 @@ module Regexp::Expression
14
26
  alias reference number
15
27
 
16
28
  def initialize(token, options = {})
17
- @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
29
+ @number = token.text[/-?\d+/].to_i
18
30
  super
19
31
  end
20
32
  end
@@ -38,7 +50,7 @@ module Regexp::Expression
38
50
  class NameCall < Backreference::Name; end
39
51
  class NumberCallRelative < Backreference::NumberRelative; end
40
52
 
41
- class NumberRecursionLevel < Backreference::Number
53
+ class NumberRecursionLevel < Backreference::NumberRelative
42
54
  attr_reader :recursion_level
43
55
 
44
56
  def initialize(token, options = {})
@@ -57,4 +69,7 @@ module Regexp::Expression
57
69
  end
58
70
  end
59
71
  end
72
+
73
+ # alias for symmetry between token symbol and Expression class name
74
+ Backref = Backreference
60
75
  end
@@ -1,10 +1,9 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  class Range < Regexp::Expression::Subexpression
4
- def starts_at
5
- expressions.first.starts_at
4
+ def ts
5
+ (head = expressions.first) ? head.ts : @ts
6
6
  end
7
- alias :ts :starts_at
8
7
 
9
8
  def <<(exp)
10
9
  complete? and raise Regexp::Parser::Error,
@@ -15,10 +14,6 @@ module Regexp::Expression
15
14
  def complete?
16
15
  count == 2
17
16
  end
18
-
19
- def to_s(_format = :full)
20
- expressions.join(text)
21
- end
22
17
  end
23
18
  end
24
19
  end
@@ -1,10 +1,7 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  attr_accessor :closed, :negative
4
-
5
- alias :negative? :negative
6
- alias :negated? :negative
7
- alias :closed? :closed
4
+ alias :closed? :closed
8
5
 
9
6
  def initialize(token, options = {})
10
7
  self.negative = false
@@ -19,9 +16,8 @@ module Regexp::Expression
19
16
  def close
20
17
  self.closed = true
21
18
  end
22
-
23
- def to_s(format = :full)
24
- "#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
25
- end
26
19
  end
20
+
21
+ # alias for symmetry between token symbol and Expression class name
22
+ Set = CharacterSet
27
23
  end # module Regexp::Expression
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module CharacterType
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -15,5 +14,4 @@ module Regexp::Expression
15
14
  class Linebreak < CharacterType::Base; end
16
15
  class ExtendedGrapheme < CharacterType::Base; end
17
16
  end
18
-
19
17
  end
@@ -31,9 +31,9 @@ module Regexp::Expression
31
31
  expressions.last << exp
32
32
  end
33
33
 
34
- def add_sequence(active_opts = {})
34
+ def add_sequence(active_opts = {}, params = { ts: 0 })
35
35
  raise TooManyBranches.new if branches.length == 2
36
- params = { conditional_level: conditional_level + 1 }
36
+ params = params.merge({ conditional_level: conditional_level + 1 })
37
37
  Branch.add_to(self, params, active_opts)
38
38
  end
39
39
  alias :branch :add_sequence
@@ -55,10 +55,6 @@ module Regexp::Expression
55
55
  condition.reference
56
56
  end
57
57
 
58
- def to_s(format = :full)
59
- "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
60
- end
61
-
62
58
  def initialize_copy(orig)
63
59
  self.referenced_expression = orig.referenced_expression.dup
64
60
  super
@@ -1,16 +1,21 @@
1
1
  module Regexp::Expression
2
2
  module EscapeSequence
3
3
  class Base < Regexp::Expression::Base
4
- require 'yaml'
5
-
6
- def char
7
- # poor man's unescape without using eval
8
- YAML.load(%Q(---\n"#{text}"\n))
9
- end
10
-
11
4
  def codepoint
12
5
  char.ord
13
6
  end
7
+
8
+ if ''.respond_to?(:undump)
9
+ def char
10
+ %("#{text}").undump
11
+ end
12
+ else
13
+ # poor man's unescape without using eval
14
+ require 'yaml'
15
+ def char
16
+ YAML.load(%Q(---\n"#{text}"\n))
17
+ end
18
+ end
14
19
  end
15
20
 
16
21
  class Literal < EscapeSequence::Base
@@ -91,4 +96,7 @@ module Regexp::Expression
91
96
  end
92
97
  end
93
98
  end
99
+
100
+ # alias for symmetry between Token::* and Expression::*
101
+ Escape = EscapeSequence
94
102
  end
@@ -1,17 +1,17 @@
1
1
  module Regexp::Expression
2
-
3
2
  class FreeSpace < Regexp::Expression::Base
4
- def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
3
+ def quantify(*_args)
5
4
  raise Regexp::Parser::Error, 'Can not quantify a free space object'
6
5
  end
7
6
  end
8
7
 
9
- class Comment < Regexp::Expression::FreeSpace; end
8
+ class Comment < Regexp::Expression::FreeSpace
9
+ end
10
10
 
11
11
  class WhiteSpace < Regexp::Expression::FreeSpace
12
12
  def merge(exp)
13
+ warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
13
14
  text << exp.text
14
15
  end
15
16
  end
16
-
17
17
  end
@@ -1,13 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def to_s(format = :full)
5
- "#{text}#{expressions.join})#{quantifier_affix(format)}"
6
- end
7
-
8
- def capturing?; false end
9
-
10
- def comment?; false end
11
4
  end
12
5
 
13
6
  class Passive < Group::Base
@@ -18,14 +11,6 @@ module Regexp::Expression
18
11
  super
19
12
  end
20
13
 
21
- def to_s(format = :full)
22
- if implicit?
23
- "#{expressions.join}#{quantifier_affix(format)}"
24
- else
25
- super
26
- end
27
- end
28
-
29
14
  def implicit?
30
15
  @implicit
31
16
  end
@@ -33,6 +18,8 @@ module Regexp::Expression
33
18
 
34
19
  class Absence < Group::Base; end
35
20
  class Atomic < Group::Base; end
21
+ # TODO: should split off OptionsSwitch in v3.0.0. Maybe even make it no
22
+ # longer inherit from Group because it is effectively a terminal expression.
36
23
  class Options < Group::Base
37
24
  attr_accessor :option_changes
38
25
 
@@ -40,13 +27,19 @@ module Regexp::Expression
40
27
  self.option_changes = orig.option_changes.dup
41
28
  super
42
29
  end
30
+
31
+ def quantify(*args)
32
+ if token == :options_switch
33
+ raise Regexp::Parser::Error, 'Can not quantify an option switch'
34
+ else
35
+ super
36
+ end
37
+ end
43
38
  end
44
39
 
45
40
  class Capture < Group::Base
46
41
  attr_accessor :number, :number_at_level
47
42
  alias identifier number
48
-
49
- def capturing?; true end
50
43
  end
51
44
 
52
45
  class Named < Group::Capture
@@ -65,11 +58,6 @@ module Regexp::Expression
65
58
  end
66
59
 
67
60
  class Comment < Group::Base
68
- def to_s(_format = :full)
69
- text.dup
70
- end
71
-
72
- def comment?; true end
73
61
  end
74
62
  end
75
63
 
@@ -1,5 +1,7 @@
1
1
  module Regexp::Expression
2
2
  module Keep
3
+ # TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
4
+ # that contains all expressions to its left.
3
5
  class Mark < Regexp::Expression::Base; end
4
6
  end
5
7
  end
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -1,11 +1,11 @@
1
1
  module Regexp::Expression
2
2
  class PosixClass < Regexp::Expression::Base
3
- def negative?
4
- type == :nonposixclass
5
- end
6
-
7
3
  def name
8
- token.to_s
4
+ text[/\w+/]
9
5
  end
10
6
  end
7
+
8
+ # alias for symmetry between token symbol and Expression class name
9
+ Posixclass = PosixClass
10
+ Nonposixclass = PosixClass
11
11
  end
@@ -1,12 +1,9 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
3
  def self.build(options = {})
5
- new(build_token, options)
6
- end
7
-
8
- def self.build_token
9
- Regexp::Token.new(:expression, :root, '', 0)
4
+ warn "`#{self.class}.build(options)` is deprecated and will raise in "\
5
+ "regexp_parser v3.0.0. Please use `.construct(options: options)`."
6
+ construct(options: options)
10
7
  end
11
8
  end
12
9
  end
@@ -1,17 +1,12 @@
1
1
  module Regexp::Expression
2
-
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
- def negative?
6
- type == :nonproperty
7
- end
8
-
9
4
  def name
10
5
  text[/\A\\[pP]\{([^}]+)\}\z/, 1]
11
6
  end
12
7
 
13
8
  def shortcut
14
- (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
9
+ Regexp::Scanner.short_prop_map.key(token.to_s)
15
10
  end
16
11
  end
17
12
 
@@ -110,11 +105,15 @@ module Regexp::Expression
110
105
  class Unassigned < Codepoint::Base; end
111
106
  end
112
107
 
113
- class Age < UnicodeProperty::Base; end
114
- class Derived < UnicodeProperty::Base; end
115
- class Emoji < UnicodeProperty::Base; end
116
- class Script < UnicodeProperty::Base; end
117
- class Block < UnicodeProperty::Base; end
108
+ class Age < UnicodeProperty::Base; end
109
+ class Block < UnicodeProperty::Base; end
110
+ class Derived < UnicodeProperty::Base; end
111
+ class Emoji < UnicodeProperty::Base; end
112
+ class Enumerated < UnicodeProperty::Base; end
113
+ class Script < UnicodeProperty::Base; end
118
114
  end
119
115
 
116
+ # alias for symmetry between token symbol and Expression class name
117
+ Property = UnicodeProperty
118
+ Nonproperty = UnicodeProperty
120
119
  end # module Regexp::Expression
@@ -0,0 +1,41 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods
4
+ # Convenience method to init a valid Expression without a Regexp::Token
5
+ def construct(params = {})
6
+ attrs = construct_defaults.merge(params)
7
+ options = attrs.delete(:options)
8
+ token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
9
+ token = Regexp::Token.new(*token_args)
10
+ raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
11
+
12
+ new(token, options)
13
+ end
14
+
15
+ def construct_defaults
16
+ if self == Root
17
+ { type: :expression, token: :root, ts: 0 }
18
+ elsif self < Sequence
19
+ { type: :expression, token: :sequence }
20
+ else
21
+ { type: token_class::Type }
22
+ end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
23
+ end
24
+
25
+ def token_class
26
+ if self == Root || self < Sequence
27
+ nil # no token class because these objects are Parser-generated
28
+ # TODO: synch exp class, token class & type names for this in v3.0.0
29
+ elsif self == CharacterType::Any
30
+ Regexp::Syntax::Token::Meta
31
+ else
32
+ Regexp::Syntax::Token.const_get(name.split('::')[2])
33
+ end
34
+ end
35
+ end
36
+
37
+ def token_class
38
+ self.class.token_class
39
+ end
40
+ end
41
+ end