regexp_parser 2.6.0 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -5
  3. data/LICENSE +1 -1
  4. data/lib/regexp_parser/expression/base.rb +0 -7
  5. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  6. data/lib/regexp_parser/expression/classes/backreference.rb +5 -10
  7. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  8. data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
  9. data/lib/regexp_parser/expression/classes/conditional.rb +2 -20
  10. data/lib/regexp_parser/expression/classes/escape_sequence.rb +21 -91
  11. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  13. data/lib/regexp_parser/expression/classes/keep.rb +1 -1
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +5 -0
  18. data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +68 -0
  19. data/lib/regexp_parser/expression/methods/match_length.rb +8 -4
  20. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  21. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  22. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  23. data/lib/regexp_parser/expression/methods/referenced_expressions.rb +28 -0
  24. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  25. data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
  26. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  27. data/lib/regexp_parser/expression/sequence.rb +5 -10
  28. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  29. data/lib/regexp_parser/expression/shared.rb +37 -20
  30. data/lib/regexp_parser/expression/subexpression.rb +20 -15
  31. data/lib/regexp_parser/expression.rb +37 -31
  32. data/lib/regexp_parser/lexer.rb +76 -36
  33. data/lib/regexp_parser/parser.rb +107 -103
  34. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  35. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  36. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  37. data/lib/regexp_parser/scanner/properties/long.csv +29 -0
  38. data/lib/regexp_parser/scanner/properties/short.csv +3 -0
  39. data/lib/regexp_parser/scanner/property.rl +2 -2
  40. data/lib/regexp_parser/scanner/scanner.rl +101 -172
  41. data/lib/regexp_parser/scanner.rb +1171 -1365
  42. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  43. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  44. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  45. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  46. data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
  47. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  48. data/lib/regexp_parser/syntax/token.rb +13 -13
  49. data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
  50. data/lib/regexp_parser/syntax/versions.rb +3 -1
  51. data/lib/regexp_parser/syntax.rb +1 -1
  52. data/lib/regexp_parser/version.rb +1 -1
  53. data/lib/regexp_parser.rb +6 -6
  54. data/regexp_parser.gemspec +5 -5
  55. metadata +17 -8
  56. data/CHANGELOG.md +0 -601
  57. data/README.md +0 -503
@@ -26,5 +26,8 @@ module Regexp::Syntax
26
26
 
27
27
  Map[Backreference::Type] = Backreference::All +
28
28
  SubexpressionCall::All
29
+
30
+ # alias for symmetry between token symbol and Expression class name
31
+ Backref = Backreference
29
32
  end
30
33
  end
@@ -9,5 +9,8 @@ module Regexp::Syntax
9
9
  end
10
10
 
11
11
  Map[CharacterSet::Type] = CharacterSet::All
12
+
13
+ # alias for symmetry between token symbol and Token module name
14
+ Set = CharacterSet
12
15
  end
13
16
  end
@@ -1,6 +1,5 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
- # TODO: unify naming with RE::EscapeSequence, one way or the other, in v3.0.0
4
3
  module Escape
5
4
  Basic = %i[backslash literal]
6
5
 
@@ -27,5 +26,8 @@ module Regexp::Syntax
27
26
  end
28
27
 
29
28
  Map[Escape::Type] = Escape::All
29
+
30
+ # alias for symmetry between Token::* and Expression::*
31
+ EscapeSequence = Escape
30
32
  end
31
33
  end
@@ -1,13 +1,20 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Meta
4
- Basic = %i[dot]
5
- Extended = Basic + %i[alternation]
4
+ Basic = %i[dot]
5
+ Alternation = %i[alternation]
6
+ Extended = Basic + Alternation
6
7
 
7
8
  All = Extended
8
9
  Type = :meta
9
10
  end
10
11
 
11
12
  Map[Meta::Type] = Meta::All
13
+
14
+ # alias for symmetry between Token::* and Expression::*
15
+ module Alternation
16
+ All = Meta::Alternation
17
+ Type = Meta::Type
18
+ end
12
19
  end
13
20
  end
@@ -59,7 +59,7 @@ module Regexp::Syntax
59
59
 
60
60
  Age_V3_1_0 = %i[age=13.0]
61
61
 
62
- Age_V3_2_0 = %i[age=14.0]
62
+ Age_V3_2_0 = %i[age=14.0 age=15.0]
63
63
 
64
64
  Age = all[:Age_V]
65
65
 
@@ -321,6 +321,8 @@ module Regexp::Syntax
321
321
 
322
322
  Script_V3_2_0 = %i[
323
323
  cypro_minoan
324
+ kawi
325
+ nag_mundari
324
326
  old_uyghur
325
327
  tangsa
326
328
  toto
@@ -667,11 +669,18 @@ module Regexp::Syntax
667
669
 
668
670
  UnicodeBlock_V3_2_0 = %i[
669
671
  in_arabic_extended_b
672
+ in_arabic_extended_c
673
+ in_cjk_unified_ideographs_extension_h
670
674
  in_cypro_minoan
675
+ in_cyrillic_extended_d
676
+ in_devanagari_extended_a
671
677
  in_ethiopic_extended_b
678
+ in_kaktovik_numerals
672
679
  in_kana_extended_b
680
+ in_kawi
673
681
  in_latin_extended_f
674
682
  in_latin_extended_g
683
+ in_nag_mundari
675
684
  in_old_uyghur
676
685
  in_tangsa
677
686
  in_toto
@@ -690,6 +699,28 @@ module Regexp::Syntax
690
699
  emoji_presentation
691
700
  ]
692
701
 
702
+ Emoji_V2_6_0 = %i[
703
+ extended_pictographic
704
+ ]
705
+
706
+ Enumerated_V2_4_0 = %i[
707
+ grapheme_cluster_break=control
708
+ grapheme_cluster_break=cr
709
+ grapheme_cluster_break=extend
710
+ grapheme_cluster_break=l
711
+ grapheme_cluster_break=lf
712
+ grapheme_cluster_break=lv
713
+ grapheme_cluster_break=lvt
714
+ grapheme_cluster_break=prepend
715
+ grapheme_cluster_break=regional_indicator
716
+ grapheme_cluster_break=spacingmark
717
+ grapheme_cluster_break=t
718
+ grapheme_cluster_break=v
719
+ grapheme_cluster_break=zwj
720
+ ]
721
+
722
+ Enumerated = all[:Enumerated_V]
723
+
693
724
  Emoji = all[:Emoji_V]
694
725
 
695
726
  V1_9_0 = Category::All + POSIX + all[:V1_9_0]
@@ -713,5 +744,8 @@ module Regexp::Syntax
713
744
 
714
745
  Map[UnicodeProperty::Type] = UnicodeProperty::All
715
746
  Map[UnicodeProperty::NonType] = UnicodeProperty::All
747
+
748
+ # alias for symmetry between token symbol and Token module name
749
+ Property = UnicodeProperty
716
750
  end
717
751
  end
@@ -0,0 +1,11 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Virtual
4
+ Root = %i[root]
5
+ Sequence = %i[sequence]
6
+
7
+ All = %i[root sequence]
8
+ Type = :expression
9
+ end
10
+ end
11
+ end
@@ -20,19 +20,19 @@ end
20
20
 
21
21
 
22
22
  # Load all the token files, they will populate the Map constant.
23
- require 'regexp_parser/syntax/token/anchor'
24
- require 'regexp_parser/syntax/token/assertion'
25
- require 'regexp_parser/syntax/token/backreference'
26
- require 'regexp_parser/syntax/token/posix_class'
27
- require 'regexp_parser/syntax/token/character_set'
28
- require 'regexp_parser/syntax/token/character_type'
29
- require 'regexp_parser/syntax/token/conditional'
30
- require 'regexp_parser/syntax/token/escape'
31
- require 'regexp_parser/syntax/token/group'
32
- require 'regexp_parser/syntax/token/keep'
33
- require 'regexp_parser/syntax/token/meta'
34
- require 'regexp_parser/syntax/token/quantifier'
35
- require 'regexp_parser/syntax/token/unicode_property'
23
+ require_relative 'token/anchor'
24
+ require_relative 'token/assertion'
25
+ require_relative 'token/backreference'
26
+ require_relative 'token/posix_class'
27
+ require_relative 'token/character_set'
28
+ require_relative 'token/character_type'
29
+ require_relative 'token/conditional'
30
+ require_relative 'token/escape'
31
+ require_relative 'token/group'
32
+ require_relative 'token/keep'
33
+ require_relative 'token/meta'
34
+ require_relative 'token/quantifier'
35
+ require_relative 'token/unicode_property'
36
36
 
37
37
 
38
38
  # After loading all the tokens the map is full. Extract all tokens and types
@@ -37,7 +37,6 @@ module Regexp::Syntax
37
37
  return Regexp::Syntax::Any if ['*', 'any'].include?(version.to_s)
38
38
 
39
39
  version =~ VERSION_REGEXP || raise(InvalidVersionNameError, version)
40
- warn_if_future_version(version)
41
40
  version_const_name = "V#{version.to_s.scan(/\d+/).join('_')}"
42
41
  const_get(version_const_name) || raise(UnknownSyntaxNameError, version)
43
42
  end
@@ -63,11 +62,4 @@ module Regexp::Syntax
63
62
  # add .99 to treat versions without a patch value as latest patch version
64
63
  Gem::Version.new((name.to_s.scan(/\d+/) << 99).join('.'))
65
64
  end
66
-
67
- def warn_if_future_version(const_name)
68
- return if comparable(const_name) < comparable('4.0.0')
69
-
70
- warn('This library has only been tested up to Ruby 3.x, '\
71
- "but you are running with #{const_name}")
72
- end
73
65
  end
@@ -3,4 +3,6 @@
3
3
  #
4
4
  # Aliases for the latest patch version are provided as 'ruby/n.n',
5
5
  # e.g. 'ruby/1.9' refers to Ruby v1.9.3.
6
- Dir[File.expand_path('../versions/*.rb', __FILE__)].sort.each { |f| require f }
6
+ Dir[File.expand_path('../versions/*.rb', __FILE__)].sort.each { |f| require_relative f }
7
+
8
+ Regexp::Syntax::CURRENT = Regexp::Syntax.for("ruby/#{RUBY_VERSION}")
@@ -1,4 +1,4 @@
1
- require 'regexp_parser/error'
1
+ require_relative 'error'
2
2
 
3
3
  module Regexp::Syntax
4
4
  class SyntaxError < Regexp::Parser::Error; end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '2.6.0'
3
+ VERSION = '2.10.0'
4
4
  end
5
5
  end
data/lib/regexp_parser.rb CHANGED
@@ -1,6 +1,6 @@
1
- require 'regexp_parser/version'
2
- require 'regexp_parser/token'
3
- require 'regexp_parser/scanner'
4
- require 'regexp_parser/syntax'
5
- require 'regexp_parser/lexer'
6
- require 'regexp_parser/parser'
1
+ require_relative 'regexp_parser/version'
2
+ require_relative 'regexp_parser/token'
3
+ require_relative 'regexp_parser/scanner'
4
+ require_relative 'regexp_parser/syntax'
5
+ require_relative 'regexp_parser/lexer'
6
+ require_relative 'regexp_parser/parser'
@@ -16,17 +16,17 @@ Gem::Specification.new do |spec|
16
16
  spec.metadata['source_code_uri'] = spec.homepage
17
17
  spec.metadata['wiki_uri'] = "#{spec.homepage}/wiki"
18
18
 
19
- spec.authors = ['Ammar Ali']
20
- spec.email = ['ammarabuali@gmail.com']
19
+ spec.metadata['rubygems_mfa_required'] = 'true'
20
+
21
+ spec.authors = ['Ammar Ali', 'Janosch Müller']
22
+ spec.email = ['ammarabuali@gmail.com', 'janosch84@gmail.com']
21
23
 
22
24
  spec.license = 'MIT'
23
25
 
24
26
  spec.require_paths = ['lib']
25
27
 
26
28
  spec.files = Dir.glob('lib/**/*.{csv,rb,rl}') +
27
- %w[Gemfile Rakefile LICENSE README.md CHANGELOG.md regexp_parser.gemspec]
28
-
29
- spec.rdoc_options = ["--inline-source", "--charset=UTF-8"]
29
+ %w[Gemfile Rakefile LICENSE regexp_parser.gemspec]
30
30
 
31
31
  spec.platform = Gem::Platform::RUBY
32
32
 
metadata CHANGED
@@ -1,26 +1,26 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.0
4
+ version: 2.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ammar Ali
8
+ - Janosch Müller
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2022-09-26 00:00:00.000000000 Z
12
+ date: 2024-12-25 00:00:00.000000000 Z
12
13
  dependencies: []
13
14
  description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
14
15
  email:
15
16
  - ammarabuali@gmail.com
17
+ - janosch84@gmail.com
16
18
  executables: []
17
19
  extensions: []
18
20
  extra_rdoc_files: []
19
21
  files:
20
- - CHANGELOG.md
21
22
  - Gemfile
22
23
  - LICENSE
23
- - README.md
24
24
  - Rakefile
25
25
  - lib/regexp_parser.rb
26
26
  - lib/regexp_parser/error.rb
@@ -43,10 +43,16 @@ files:
43
43
  - lib/regexp_parser/expression/classes/root.rb
44
44
  - lib/regexp_parser/expression/classes/unicode_property.rb
45
45
  - lib/regexp_parser/expression/methods/construct.rb
46
+ - lib/regexp_parser/expression/methods/escape_sequence_char.rb
47
+ - lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb
46
48
  - lib/regexp_parser/expression/methods/human_name.rb
47
49
  - lib/regexp_parser/expression/methods/match.rb
48
50
  - lib/regexp_parser/expression/methods/match_length.rb
51
+ - lib/regexp_parser/expression/methods/negative.rb
49
52
  - lib/regexp_parser/expression/methods/options.rb
53
+ - lib/regexp_parser/expression/methods/parts.rb
54
+ - lib/regexp_parser/expression/methods/printing.rb
55
+ - lib/regexp_parser/expression/methods/referenced_expressions.rb
50
56
  - lib/regexp_parser/expression/methods/strfregexp.rb
51
57
  - lib/regexp_parser/expression/methods/tests.rb
52
58
  - lib/regexp_parser/expression/methods/traverse.rb
@@ -59,6 +65,9 @@ files:
59
65
  - lib/regexp_parser/parser.rb
60
66
  - lib/regexp_parser/scanner.rb
61
67
  - lib/regexp_parser/scanner/char_type.rl
68
+ - lib/regexp_parser/scanner/errors/premature_end_error.rb
69
+ - lib/regexp_parser/scanner/errors/scanner_error.rb
70
+ - lib/regexp_parser/scanner/errors/validation_error.rb
62
71
  - lib/regexp_parser/scanner/properties/long.csv
63
72
  - lib/regexp_parser/scanner/properties/short.csv
64
73
  - lib/regexp_parser/scanner/property.rl
@@ -80,6 +89,7 @@ files:
80
89
  - lib/regexp_parser/syntax/token/posix_class.rb
81
90
  - lib/regexp_parser/syntax/token/quantifier.rb
82
91
  - lib/regexp_parser/syntax/token/unicode_property.rb
92
+ - lib/regexp_parser/syntax/token/virtual.rb
83
93
  - lib/regexp_parser/syntax/version_lookup.rb
84
94
  - lib/regexp_parser/syntax/versions.rb
85
95
  - lib/regexp_parser/syntax/versions/1.8.6.rb
@@ -108,10 +118,9 @@ metadata:
108
118
  homepage_uri: https://github.com/ammar/regexp_parser
109
119
  source_code_uri: https://github.com/ammar/regexp_parser
110
120
  wiki_uri: https://github.com/ammar/regexp_parser/wiki
121
+ rubygems_mfa_required: 'true'
111
122
  post_install_message:
112
- rdoc_options:
113
- - "--inline-source"
114
- - "--charset=UTF-8"
123
+ rdoc_options: []
115
124
  require_paths:
116
125
  - lib
117
126
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -125,7 +134,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
125
134
  - !ruby/object:Gem::Version
126
135
  version: '0'
127
136
  requirements: []
128
- rubygems_version: 3.3.3
137
+ rubygems_version: 3.4.13
129
138
  signing_key:
130
139
  specification_version: 4
131
140
  summary: Scanner, lexer, parser for ruby's regular expressions