regexp_parser 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +34 -1
  3. data/Gemfile +2 -1
  4. data/LICENSE +1 -1
  5. data/README.md +16 -6
  6. data/Rakefile +1 -59
  7. data/lib/regexp_parser/expression/classes/escape_sequence.rb +12 -7
  8. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  9. data/lib/regexp_parser/lexer.rb +1 -1
  10. data/lib/regexp_parser/scanner/properties/long.csv +622 -0
  11. data/lib/regexp_parser/scanner/properties/short.csv +246 -0
  12. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  13. data/lib/regexp_parser/scanner.rb +126 -124
  14. data/lib/regexp_parser/syntax/any.rb +2 -5
  15. data/lib/regexp_parser/syntax/base.rb +91 -66
  16. data/lib/regexp_parser/syntax/token/backreference.rb +7 -2
  17. data/lib/regexp_parser/syntax/token/quantifier.rb +4 -4
  18. data/lib/regexp_parser/syntax/token/unicode_property.rb +26 -5
  19. data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
  20. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  21. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  22. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  23. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  24. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  25. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  26. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  27. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  28. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  29. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  30. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  31. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  32. data/lib/regexp_parser/syntax/versions/3.1.0.rb +3 -9
  33. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  34. data/lib/regexp_parser/syntax/versions.rb +1 -1
  35. data/lib/regexp_parser/version.rb +1 -1
  36. data/regexp_parser.gemspec +20 -22
  37. metadata +12 -143
  38. data/lib/regexp_parser/scanner/properties/long.yml +0 -607
  39. data/lib/regexp_parser/scanner/properties/short.yml +0 -245
  40. data/spec/expression/base_spec.rb +0 -104
  41. data/spec/expression/clone_spec.rb +0 -152
  42. data/spec/expression/conditional_spec.rb +0 -89
  43. data/spec/expression/free_space_spec.rb +0 -27
  44. data/spec/expression/methods/match_length_spec.rb +0 -161
  45. data/spec/expression/methods/match_spec.rb +0 -25
  46. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  47. data/spec/expression/methods/tests_spec.rb +0 -99
  48. data/spec/expression/methods/traverse_spec.rb +0 -161
  49. data/spec/expression/options_spec.rb +0 -128
  50. data/spec/expression/subexpression_spec.rb +0 -50
  51. data/spec/expression/to_h_spec.rb +0 -26
  52. data/spec/expression/to_s_spec.rb +0 -108
  53. data/spec/lexer/all_spec.rb +0 -22
  54. data/spec/lexer/conditionals_spec.rb +0 -53
  55. data/spec/lexer/delimiters_spec.rb +0 -68
  56. data/spec/lexer/escapes_spec.rb +0 -14
  57. data/spec/lexer/keep_spec.rb +0 -10
  58. data/spec/lexer/literals_spec.rb +0 -64
  59. data/spec/lexer/nesting_spec.rb +0 -99
  60. data/spec/lexer/refcalls_spec.rb +0 -60
  61. data/spec/parser/all_spec.rb +0 -43
  62. data/spec/parser/alternation_spec.rb +0 -88
  63. data/spec/parser/anchors_spec.rb +0 -17
  64. data/spec/parser/conditionals_spec.rb +0 -179
  65. data/spec/parser/errors_spec.rb +0 -30
  66. data/spec/parser/escapes_spec.rb +0 -133
  67. data/spec/parser/free_space_spec.rb +0 -130
  68. data/spec/parser/groups_spec.rb +0 -108
  69. data/spec/parser/keep_spec.rb +0 -6
  70. data/spec/parser/options_spec.rb +0 -28
  71. data/spec/parser/posix_classes_spec.rb +0 -8
  72. data/spec/parser/properties_spec.rb +0 -117
  73. data/spec/parser/quantifiers_spec.rb +0 -68
  74. data/spec/parser/refcalls_spec.rb +0 -117
  75. data/spec/parser/set/intersections_spec.rb +0 -127
  76. data/spec/parser/set/ranges_spec.rb +0 -121
  77. data/spec/parser/sets_spec.rb +0 -178
  78. data/spec/parser/types_spec.rb +0 -18
  79. data/spec/scanner/all_spec.rb +0 -18
  80. data/spec/scanner/anchors_spec.rb +0 -21
  81. data/spec/scanner/conditionals_spec.rb +0 -128
  82. data/spec/scanner/delimiters_spec.rb +0 -52
  83. data/spec/scanner/errors_spec.rb +0 -67
  84. data/spec/scanner/escapes_spec.rb +0 -73
  85. data/spec/scanner/free_space_spec.rb +0 -165
  86. data/spec/scanner/groups_spec.rb +0 -61
  87. data/spec/scanner/keep_spec.rb +0 -10
  88. data/spec/scanner/literals_spec.rb +0 -39
  89. data/spec/scanner/meta_spec.rb +0 -18
  90. data/spec/scanner/options_spec.rb +0 -36
  91. data/spec/scanner/properties_spec.rb +0 -64
  92. data/spec/scanner/quantifiers_spec.rb +0 -25
  93. data/spec/scanner/refcalls_spec.rb +0 -55
  94. data/spec/scanner/sets_spec.rb +0 -151
  95. data/spec/scanner/types_spec.rb +0 -14
  96. data/spec/spec_helper.rb +0 -28
  97. data/spec/support/capturing_stderr.rb +0 -9
  98. data/spec/support/shared_examples.rb +0 -77
  99. data/spec/syntax/syntax_spec.rb +0 -48
  100. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  101. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  102. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  103. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  104. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  105. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  106. data/spec/syntax/versions/aliases_spec.rb +0 -38
  107. data/spec/token/token_spec.rb +0 -85
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 332259c898b9b344e10961053bb2b761f4dd5530182a5f6195639dba9cbb99f9
4
- data.tar.gz: b537f9bd23db799ee562494633f1e8423501651540a04b634ae07dfe8f3b19c3
3
+ metadata.gz: 6dedba6b051b22dd917febd957e35e6800b37af958780e331007de1f59b2b466
4
+ data.tar.gz: 55fa98afa6031a38cac2045f8c592290fad9a4e500597277f3f79be75f1076f3
5
5
  SHA512:
6
- metadata.gz: 393ecc1cc20189e4a79252e6acf6dab7dd6dc07ba9c47ae7479746eaf8ebe2ccfd1ebcb82fd027edc2c5c938eb490f2f36a93587d2405a54017e0e2727a35a15
7
- data.tar.gz: 6c961232ce5f3f409c91d0b66dd23c809e92f47aa6c1f94f2f1929e8eeccfb4bc25fcdf5935fc968d7e0c0ae632992a6d38bc8e982858f2da996a8eac54d3c89
6
+ metadata.gz: 6b2b463f3a28450527691d90bcfc8901b815bd4a32e88bcdee1f95db4588599993766687a4c7a1b785b450c0f0025039caf07bb93aaf1013fa365e4ed16fc040
7
+ data.tar.gz: 8aabccda06bb1f20485610076ad679a0d62e8630f992c55be7158c01f3b8b6dd6eb44a206f71b613c0335db13789d8e21d652973ecfb7c262c25a6d54e35a371
data/CHANGELOG.md CHANGED
@@ -1,10 +1,43 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
4
+
5
+ ### Fixed
6
+
7
+ - removed five inexistent unicode properties from `Syntax#features`
8
+ - these were never supported by Ruby but incorrectly accepted by the parser
9
+ - thanks to [Markus Schirp](https://github.com/mbj) for the report
10
+
11
+ ## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
12
+
13
+ ### Added
14
+
15
+ - improved parsing performance through `Syntax` refactoring
16
+ - instead of fresh `Syntax` instances, pre-loaded constants are now re-used
17
+ - this approximately doubles the parsing speed for simple regexps
18
+ - added methods to `Syntax` classes to show relative feature sets
19
+ - e.g. `Regexp::Syntax::V3_2_0.added_features`
20
+ - support for new unicode properties of Ruby 3.2 / Unicode 14.0
21
+
22
+ ## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
23
+
24
+ ### Fixed
25
+
26
+ - fixed Syntax version of absence groups (`(?~...)`)
27
+ - the lexer accepted them for any Ruby version
28
+ - now they are only recognized for Ruby >= 2.4.1 in which they were introduced
29
+ - reduced gem size by excluding specs from package
30
+ - removed deprecated `test_files` gemspec setting
31
+ - no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
32
+ - no longer depend on `set`
33
+ - `set` was removed from the stdlib and made a standalone gem as of Ruby 3
34
+ - this made it a hidden/undeclared dependency of `regexp_parser`
35
+
3
36
  ## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
4
37
 
5
38
  ### Added
6
39
 
7
- - Added support for 13 new unicode properties introduced in Ruby 3.1.0-dev
40
+ - added support for 13 new unicode properties introduced in Ruby 3.1.0
8
41
 
9
42
  ## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
10
43
 
data/Gemfile CHANGED
@@ -5,9 +5,10 @@ gemspec
5
5
  group :development, :test do
6
6
  gem 'ice_nine', '~> 0.11.2'
7
7
  gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.0'
8
+ gem 'regexp_property_values', '~> 1.3'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
+ gem 'benchmark-ips', '~> 2.1'
11
12
  gem 'gouteur'
12
13
  gem 'rubocop', '~> 1.7'
13
14
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2015, Ammar Ali
1
+ Copyright (c) 2010, 2012-2022, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
data/README.md CHANGED
@@ -157,31 +157,41 @@ flavor). Syntax classes act as lookup tables, and are layered to create
157
157
  flavor variations. Syntax only comes into play in the lexer.
158
158
 
159
159
  #### Example
160
- The following instantiates syntax objects for Ruby 2.0, 1.9, 1.8, and
160
+ The following fetches syntax objects for Ruby 2.0, 1.9, 1.8, and
161
161
  checks a few of their implementation features.
162
162
 
163
163
  ```ruby
164
164
  require 'regexp_parser'
165
165
 
166
- ruby_20 = Regexp::Syntax.new 'ruby/2.0'
166
+ ruby_20 = Regexp::Syntax.for 'ruby/2.0'
167
167
  ruby_20.implements? :quantifier, :zero_or_one # => true
168
168
  ruby_20.implements? :quantifier, :zero_or_one_reluctant # => true
169
169
  ruby_20.implements? :quantifier, :zero_or_one_possessive # => true
170
170
  ruby_20.implements? :conditional, :condition # => true
171
171
 
172
- ruby_19 = Regexp::Syntax.new 'ruby/1.9'
172
+ ruby_19 = Regexp::Syntax.for 'ruby/1.9'
173
173
  ruby_19.implements? :quantifier, :zero_or_one # => true
174
174
  ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
175
175
  ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
176
176
  ruby_19.implements? :conditional, :condition # => false
177
177
 
178
- ruby_18 = Regexp::Syntax.new 'ruby/1.8'
178
+ ruby_18 = Regexp::Syntax.for 'ruby/1.8'
179
179
  ruby_18.implements? :quantifier, :zero_or_one # => true
180
180
  ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
181
181
  ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
182
182
  ruby_18.implements? :conditional, :condition # => false
183
183
  ```
184
184
 
185
+ Syntax objects can also be queried about their complete and relative feature sets.
186
+
187
+ ```ruby
188
+ require 'regexp_parser'
189
+
190
+ ruby_20 = Regexp::Syntax.for 'ruby/2.0' # => Regexp::Syntax::V2_0_0
191
+ ruby_20.added_features # => { conditional: [...], ... }
192
+ ruby_20.removed_features # => { property: [:newline], ... }
193
+ ruby_20.features # => { anchor: [...], ... }
194
+ ```
185
195
 
186
196
  #### Notes
187
197
  * Variations on a token, for example a named group with angle brackets (< and >)
@@ -437,7 +447,7 @@ Projects using regexp_parser.
437
447
 
438
448
  - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
439
449
 
440
- - [mutant](https://github.com/mbj/mutant) (before v0.9.0) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
450
+ - [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
441
451
 
442
452
  - [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
443
453
 
@@ -470,4 +480,4 @@ Documentation and books used while working on this project.
470
480
 
471
481
  ---
472
482
  ##### Copyright
473
- _Copyright (c) 2010-2020 Ammar Ali. See LICENSE file for details._
483
+ _Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._
data/Rakefile CHANGED
@@ -5,9 +5,7 @@ require 'rake'
5
5
  require 'rake/testtask'
6
6
  require 'rspec/core/rake_task'
7
7
 
8
- RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
9
- RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
10
- RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
8
+ Dir['tasks/**/*.rake'].each { |file| load(file) }
11
9
 
12
10
  Bundler::GemHelper.install_tasks
13
11
 
@@ -19,63 +17,7 @@ namespace :test do
19
17
  task full: [:'ragel:rb', :spec]
20
18
  end
21
19
 
22
- namespace :ragel do
23
- desc "Process the ragel source files and output ruby code"
24
- task :rb do
25
- RAGEL_SOURCE_FILES.each do |source_file|
26
- output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
27
- # using faster flat table driven FSM, about 25% larger code, but about 30% faster
28
- sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
29
-
30
- contents = File.read(output_file)
31
-
32
- File.open(output_file, 'r+') do |file|
33
- contents = "# -*- warn-indent:false; -*-\n" + contents
34
-
35
- file.write(contents)
36
- end
37
- end
38
- end
39
-
40
- desc "Delete the ragel generated source file(s)"
41
- task :clean do
42
- RAGEL_SOURCE_FILES.each do |file|
43
- sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
44
- end
45
- end
46
- end
47
-
48
20
  # Add ragel task as a prerequisite for building the gem to ensure that the
49
21
  # latest scanner code is generated and included in the build.
50
22
  desc "Runs ragel:rb before building the gem"
51
23
  task :build => ['ragel:rb']
52
-
53
- namespace :props do
54
- desc 'Write new property value hashes for the properties scanner'
55
- task :update do
56
- require 'regexp_property_values'
57
- RegexpPropertyValues.update
58
- dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
59
-
60
- require 'psych'
61
- write_hash_to_file = ->(hash, path) do
62
- File.open(path, 'w') do |f|
63
- f.puts '#',
64
- "# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
65
- '#',
66
- hash.sort.to_h.to_yaml
67
- end
68
- puts "Wrote #{hash.count} aliases to `#{path}`"
69
- end
70
-
71
- long_names_to_tokens = RegexpPropertyValues.all.map do |val|
72
- [val.identifier, val.full_name.downcase]
73
- end
74
- write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
75
-
76
- short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
77
- [k.identifier, v.full_name.downcase]
78
- end
79
- write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
80
- end
81
- end
@@ -2,16 +2,21 @@ module Regexp::Expression
2
2
  # TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
3
3
  module EscapeSequence
4
4
  class Base < Regexp::Expression::Base
5
- require 'yaml'
6
-
7
- def char
8
- # poor man's unescape without using eval
9
- YAML.load(%Q(---\n"#{text}"\n))
10
- end
11
-
12
5
  def codepoint
13
6
  char.ord
14
7
  end
8
+
9
+ if ''.respond_to?(:undump)
10
+ def char
11
+ %("#{text}").undump
12
+ end
13
+ else
14
+ # poor man's unescape without using eval
15
+ require 'yaml'
16
+ def char
17
+ YAML.load(%Q(---\n"#{text}"\n))
18
+ end
19
+ end
15
20
  end
16
21
 
17
22
  class Literal < EscapeSequence::Base
@@ -43,7 +43,7 @@ module Regexp::Expression
43
43
 
44
44
  # Order is important! Fields that use other fields in their
45
45
  # definition must appear before the fields they use.
46
- part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
47
47
  part.keys.each {|k| part[k] = "<?#{k}?>"}
48
48
 
49
49
  part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
@@ -18,7 +18,7 @@ class Regexp::Lexer
18
18
  end
19
19
 
20
20
  def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
21
- syntax = Regexp::Syntax.new(syntax)
21
+ syntax = Regexp::Syntax.for(syntax)
22
22
 
23
23
  self.tokens = []
24
24
  self.nesting = 0