regexp_parser 2.8.2 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5309aa54a7dc49b910246ad7efd9bf1e2f6b19917c41f2c4dd389568c7a1d0c5
4
- data.tar.gz: 0174ec36fcec3ca3696f7e5fbe2eeaa5e9d75b09d4e0891bd841da997101f123
3
+ metadata.gz: 8cc1826647cde51d6d1b5a5a58fb005efd2a38a85fa0e817616591ee2fad7862
4
+ data.tar.gz: 572a6203741b9970bcedc1ace243ea0b9c300ca60b71ac263036eb0f4222dd50
5
5
  SHA512:
6
- metadata.gz: b52cfb89d0cb55c63844fe5f808c8472c2987746e33d71fe2ede2b9759d3f572611679050a0ae407a391c0f2ba88e9049beb579c2ead9c037b711c9a0c62b18b
7
- data.tar.gz: 708e81a6463887e849ed1c560eb36abae556dc7b87c1f3fac30f74b1822a778b7d711c2fbba52ad9114db1ee5ba3a47f3898d17023de97b56799aed891e34c7f
6
+ metadata.gz: 3521fe6dab4be0c0db3c37f3f8d196fc754ff72937336a73ef5547a15ae4f2d366aa28e73d6e5756920d610b943ee51cb2db8e51e53ccb19c1c235a8c45da708
7
+ data.tar.gz: d05b7babb79c118bdc36ae168d8199ee3500b0cff33cb00ed46d51a4a88725130e931c588146a3f989dd87778b1f39684b2c8a5541c9ac8f91427fc31b1ec97a
data/Gemfile CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
- gem 'leto', '~> 2.0'
7
- gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.4'
6
+ gem 'leto', '~> 2.1'
7
+ gem 'rake', '~> 13.1'
8
+ gem 'regexp_property_values', '~> 1.5'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
11
  gem 'benchmark-ips', '~> 2.1'
12
12
  gem 'gouteur', '~> 1.1'
13
- gem 'rubocop', '~> 1.7'
13
+ gem 'rubocop', '~> 1.59'
14
14
  end
15
15
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2023, Ammar Ali
1
+ Copyright (c) 2010, 2012-2024, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
@@ -1,10 +1,7 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  attr_accessor :closed, :negative
4
-
5
- alias :negative? :negative
6
- alias :negated? :negative
7
- alias :closed? :closed
4
+ alias :closed? :closed
8
5
 
9
6
  def initialize(token, options = {})
10
7
  self.negative = false
@@ -1,9 +1,5 @@
1
1
  module Regexp::Expression
2
2
  class PosixClass < Regexp::Expression::Base
3
- def negative?
4
- type == :nonposixclass
5
- end
6
-
7
3
  def name
8
4
  text[/\w+/]
9
5
  end
@@ -1,10 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module UnicodeProperty
3
3
  class Base < Regexp::Expression::Base
4
- def negative?
5
- type == :nonproperty
6
- end
7
-
8
4
  def name
9
5
  text[/\A\\[pP]\{([^}]+)\}\z/, 1]
10
6
  end
@@ -109,11 +105,12 @@ module Regexp::Expression
109
105
  class Unassigned < Codepoint::Base; end
110
106
  end
111
107
 
112
- class Age < UnicodeProperty::Base; end
113
- class Derived < UnicodeProperty::Base; end
114
- class Emoji < UnicodeProperty::Base; end
115
- class Script < UnicodeProperty::Base; end
116
- class Block < UnicodeProperty::Base; end
108
+ class Age < UnicodeProperty::Base; end
109
+ class Block < UnicodeProperty::Base; end
110
+ class Derived < UnicodeProperty::Base; end
111
+ class Emoji < UnicodeProperty::Base; end
112
+ class Enumerated < UnicodeProperty::Base; end
113
+ class Script < UnicodeProperty::Base; end
117
114
  end
118
115
 
119
116
  # alias for symmetry between token symbol and Expression class name
@@ -0,0 +1,20 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def negative?
4
+ false
5
+ end
6
+
7
+ # not an alias so as to respect overrides of #negative?
8
+ def negated?
9
+ negative?
10
+ end
11
+ end
12
+
13
+ Anchor::NonWordBoundary.class_eval { def negative?; true end }
14
+ Assertion::NegativeLookahead.class_eval { def negative?; true end }
15
+ Assertion::NegativeLookbehind.class_eval { def negative?; true end }
16
+ CharacterSet.class_eval { def negative?; negative end }
17
+ CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
18
+ PosixClass.class_eval { def negative?; type == :nonposixclass end }
19
+ UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
20
+ end
@@ -28,6 +28,7 @@ require 'regexp_parser/expression/methods/construct'
28
28
  require 'regexp_parser/expression/methods/human_name'
29
29
  require 'regexp_parser/expression/methods/match'
30
30
  require 'regexp_parser/expression/methods/match_length'
31
+ require 'regexp_parser/expression/methods/negative'
31
32
  require 'regexp_parser/expression/methods/options'
32
33
  require 'regexp_parser/expression/methods/parts'
33
34
  require 'regexp_parser/expression/methods/printing'
@@ -467,6 +467,7 @@ class Regexp::Parser
467
467
  when *UPTokens::Age; node << UP::Age.new(token, active_opts)
468
468
  when *UPTokens::Derived; node << UP::Derived.new(token, active_opts)
469
469
  when *UPTokens::Emoji; node << UP::Emoji.new(token, active_opts)
470
+ when *UPTokens::Enumerated; node << UP::Enumerated.new(token, active_opts)
470
471
  when *UPTokens::Script; node << UP::Script.new(token, active_opts)
471
472
  when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
472
473
 
@@ -8,6 +8,7 @@ age=12.1,age=12.1
8
8
  age=13.0,age=13.0
9
9
  age=14.0,age=14.0
10
10
  age=15.0,age=15.0
11
+ age=15.1,age=15.1
11
12
  age=2.0,age=2.0
12
13
  age=2.1,age=2.1
13
14
  age=3.0,age=3.0
@@ -108,6 +109,19 @@ gothic,gothic
108
109
  grantha,grantha
109
110
  graph,graph
110
111
  graphemebase,grapheme_base
112
+ graphemeclusterbreak=control,grapheme_cluster_break=control
113
+ graphemeclusterbreak=cr,grapheme_cluster_break=cr
114
+ graphemeclusterbreak=extend,grapheme_cluster_break=extend
115
+ graphemeclusterbreak=l,grapheme_cluster_break=l
116
+ graphemeclusterbreak=lf,grapheme_cluster_break=lf
117
+ graphemeclusterbreak=lv,grapheme_cluster_break=lv
118
+ graphemeclusterbreak=lvt,grapheme_cluster_break=lvt
119
+ graphemeclusterbreak=prepend,grapheme_cluster_break=prepend
120
+ graphemeclusterbreak=regionalindicator,grapheme_cluster_break=regional_indicator
121
+ graphemeclusterbreak=spacingmark,grapheme_cluster_break=spacingmark
122
+ graphemeclusterbreak=t,grapheme_cluster_break=t
123
+ graphemeclusterbreak=v,grapheme_cluster_break=v
124
+ graphemeclusterbreak=zwj,grapheme_cluster_break=zwj
111
125
  graphemeextend,grapheme_extend
112
126
  graphemelink,grapheme_link
113
127
  greek,greek
@@ -123,11 +137,14 @@ hebrew,hebrew
123
137
  hexdigit,hex_digit
124
138
  hiragana,hiragana
125
139
  hyphen,hyphen
140
+ idcompatmathcontinue,id_compat_math_continue
141
+ idcompatmathstart,id_compat_math_start
126
142
  idcontinue,id_continue
127
143
  ideographic,ideographic
128
144
  idsbinaryoperator,ids_binary_operator
129
145
  idstart,id_start
130
146
  idstrinaryoperator,ids_trinary_operator
147
+ idsunaryoperator,ids_unary_operator
131
148
  imperialaramaic,imperial_aramaic
132
149
  inadlam,in_adlam
133
150
  inaegeannumbers,in_aegean_numbers
@@ -190,6 +207,7 @@ incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
190
207
  incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
191
208
  incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
192
209
  incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
210
+ incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
193
211
  incombiningdiacriticalmarks,in_combining_diacritical_marks
194
212
  incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
195
213
  incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
@@ -86,6 +86,7 @@ ideo,ideographic
86
86
  ids,id_start
87
87
  idsb,ids_binary_operator
88
88
  idst,ids_trinary_operator
89
+ idsu,ids_unary_operator
89
90
  ital,old_italic
90
91
  java,javanese
91
92
  joinc,join_control
@@ -364,6 +364,7 @@
364
364
  conditional_expression := |*
365
365
  group_lookup . ')' {
366
366
  text = copy(data, ts, te-1)
367
+ text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID')
367
368
  emit(:conditional, :condition, text)
368
369
  emit(:conditional, :condition_close, ')')
369
370
  };
@@ -541,13 +542,13 @@
541
542
  case text = copy(data, ts, te)
542
543
  when /^\\k(.)[^0-9\-][^+\-]*['>]$/
543
544
  emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
544
- when /^\\k(.)[1-9]\d*['>]$/
545
+ when /^\\k(.)0*[1-9]\d*['>]$/
545
546
  emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
546
- when /^\\k(.)-[1-9]\d*['>]$/
547
+ when /^\\k(.)-0*[1-9]\d*['>]$/
547
548
  emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
548
549
  when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
549
550
  emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
550
- when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
551
+ when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/
551
552
  emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
552
553
  else
553
554
  raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
@@ -560,9 +561,9 @@
560
561
  case text = copy(data, ts, te)
561
562
  when /^\\g(.)[^0-9+\-].*['>]$/
562
563
  emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
563
- when /^\\g(.)\d+['>]$/
564
+ when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/
564
565
  emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
565
- when /^\\g(.)[+-]\d+/
566
+ when /^\\g(.)[+-]0*[1-9]\d*/
566
567
  emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
567
568
  else
568
569
  raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
@@ -1570,6 +1570,7 @@ end
1570
1570
  te = p+1
1571
1571
  begin
1572
1572
  text = copy(data, ts, te-1)
1573
+ text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID')
1573
1574
  emit(:conditional, :condition, text)
1574
1575
  emit(:conditional, :condition_close, ')')
1575
1576
  end
@@ -1750,13 +1751,13 @@ te = p+1
1750
1751
  case text = copy(data, ts, te)
1751
1752
  when /^\\k(.)[^0-9\-][^+\-]*['>]$/
1752
1753
  emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
1753
- when /^\\k(.)[1-9]\d*['>]$/
1754
+ when /^\\k(.)0*[1-9]\d*['>]$/
1754
1755
  emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
1755
- when /^\\k(.)-[1-9]\d*['>]$/
1756
+ when /^\\k(.)-0*[1-9]\d*['>]$/
1756
1757
  emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
1757
1758
  when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
1758
1759
  emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
1759
- when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
1760
+ when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/
1760
1761
  emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
1761
1762
  else
1762
1763
  raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
@@ -1770,9 +1771,9 @@ te = p+1
1770
1771
  case text = copy(data, ts, te)
1771
1772
  when /^\\g(.)[^0-9+\-].*['>]$/
1772
1773
  emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
1773
- when /^\\g(.)\d+['>]$/
1774
+ when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/
1774
1775
  emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
1775
- when /^\\g(.)[+-]\d+/
1776
+ when /^\\g(.)[+-]0*[1-9]\d*/
1776
1777
  emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
1777
1778
  else
1778
1779
  raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
@@ -703,6 +703,24 @@ module Regexp::Syntax
703
703
  extended_pictographic
704
704
  ]
705
705
 
706
+ Enumerated_V2_4_0 = %i[
707
+ grapheme_cluster_break=control
708
+ grapheme_cluster_break=cr
709
+ grapheme_cluster_break=extend
710
+ grapheme_cluster_break=l
711
+ grapheme_cluster_break=lf
712
+ grapheme_cluster_break=lv
713
+ grapheme_cluster_break=lvt
714
+ grapheme_cluster_break=prepend
715
+ grapheme_cluster_break=regional_indicator
716
+ grapheme_cluster_break=spacingmark
717
+ grapheme_cluster_break=t
718
+ grapheme_cluster_break=v
719
+ grapheme_cluster_break=zwj
720
+ ]
721
+
722
+ Enumerated = all[:Enumerated_V]
723
+
706
724
  Emoji = all[:Emoji_V]
707
725
 
708
726
  V1_9_0 = Category::All + POSIX + all[:V1_9_0]
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '2.8.2'
3
+ VERSION = '2.9.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.8.2
4
+ version: 2.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ammar Ali
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2023-10-10 00:00:00.000000000 Z
12
+ date: 2024-01-07 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
15
15
  email:
@@ -46,6 +46,7 @@ files:
46
46
  - lib/regexp_parser/expression/methods/human_name.rb
47
47
  - lib/regexp_parser/expression/methods/match.rb
48
48
  - lib/regexp_parser/expression/methods/match_length.rb
49
+ - lib/regexp_parser/expression/methods/negative.rb
49
50
  - lib/regexp_parser/expression/methods/options.rb
50
51
  - lib/regexp_parser/expression/methods/parts.rb
51
52
  - lib/regexp_parser/expression/methods/printing.rb