regexp_parser 2.2.0 → 2.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +34 -1
  3. data/Gemfile +2 -1
  4. data/LICENSE +1 -1
  5. data/README.md +16 -6
  6. data/Rakefile +1 -59
  7. data/lib/regexp_parser/expression/classes/escape_sequence.rb +12 -7
  8. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  9. data/lib/regexp_parser/lexer.rb +1 -1
  10. data/lib/regexp_parser/scanner/properties/long.csv +622 -0
  11. data/lib/regexp_parser/scanner/properties/short.csv +246 -0
  12. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  13. data/lib/regexp_parser/scanner.rb +126 -124
  14. data/lib/regexp_parser/syntax/any.rb +2 -5
  15. data/lib/regexp_parser/syntax/base.rb +91 -66
  16. data/lib/regexp_parser/syntax/token/backreference.rb +7 -2
  17. data/lib/regexp_parser/syntax/token/quantifier.rb +4 -4
  18. data/lib/regexp_parser/syntax/token/unicode_property.rb +26 -5
  19. data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
  20. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  21. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  22. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  23. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  24. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  25. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  26. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  27. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  28. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  29. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  30. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  31. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  32. data/lib/regexp_parser/syntax/versions/3.1.0.rb +3 -9
  33. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  34. data/lib/regexp_parser/syntax/versions.rb +1 -1
  35. data/lib/regexp_parser/version.rb +1 -1
  36. data/regexp_parser.gemspec +20 -22
  37. metadata +12 -143
  38. data/lib/regexp_parser/scanner/properties/long.yml +0 -607
  39. data/lib/regexp_parser/scanner/properties/short.yml +0 -245
  40. data/spec/expression/base_spec.rb +0 -104
  41. data/spec/expression/clone_spec.rb +0 -152
  42. data/spec/expression/conditional_spec.rb +0 -89
  43. data/spec/expression/free_space_spec.rb +0 -27
  44. data/spec/expression/methods/match_length_spec.rb +0 -161
  45. data/spec/expression/methods/match_spec.rb +0 -25
  46. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  47. data/spec/expression/methods/tests_spec.rb +0 -99
  48. data/spec/expression/methods/traverse_spec.rb +0 -161
  49. data/spec/expression/options_spec.rb +0 -128
  50. data/spec/expression/subexpression_spec.rb +0 -50
  51. data/spec/expression/to_h_spec.rb +0 -26
  52. data/spec/expression/to_s_spec.rb +0 -108
  53. data/spec/lexer/all_spec.rb +0 -22
  54. data/spec/lexer/conditionals_spec.rb +0 -53
  55. data/spec/lexer/delimiters_spec.rb +0 -68
  56. data/spec/lexer/escapes_spec.rb +0 -14
  57. data/spec/lexer/keep_spec.rb +0 -10
  58. data/spec/lexer/literals_spec.rb +0 -64
  59. data/spec/lexer/nesting_spec.rb +0 -99
  60. data/spec/lexer/refcalls_spec.rb +0 -60
  61. data/spec/parser/all_spec.rb +0 -43
  62. data/spec/parser/alternation_spec.rb +0 -88
  63. data/spec/parser/anchors_spec.rb +0 -17
  64. data/spec/parser/conditionals_spec.rb +0 -179
  65. data/spec/parser/errors_spec.rb +0 -30
  66. data/spec/parser/escapes_spec.rb +0 -133
  67. data/spec/parser/free_space_spec.rb +0 -130
  68. data/spec/parser/groups_spec.rb +0 -108
  69. data/spec/parser/keep_spec.rb +0 -6
  70. data/spec/parser/options_spec.rb +0 -28
  71. data/spec/parser/posix_classes_spec.rb +0 -8
  72. data/spec/parser/properties_spec.rb +0 -117
  73. data/spec/parser/quantifiers_spec.rb +0 -68
  74. data/spec/parser/refcalls_spec.rb +0 -117
  75. data/spec/parser/set/intersections_spec.rb +0 -127
  76. data/spec/parser/set/ranges_spec.rb +0 -121
  77. data/spec/parser/sets_spec.rb +0 -178
  78. data/spec/parser/types_spec.rb +0 -18
  79. data/spec/scanner/all_spec.rb +0 -18
  80. data/spec/scanner/anchors_spec.rb +0 -21
  81. data/spec/scanner/conditionals_spec.rb +0 -128
  82. data/spec/scanner/delimiters_spec.rb +0 -52
  83. data/spec/scanner/errors_spec.rb +0 -67
  84. data/spec/scanner/escapes_spec.rb +0 -73
  85. data/spec/scanner/free_space_spec.rb +0 -165
  86. data/spec/scanner/groups_spec.rb +0 -61
  87. data/spec/scanner/keep_spec.rb +0 -10
  88. data/spec/scanner/literals_spec.rb +0 -39
  89. data/spec/scanner/meta_spec.rb +0 -18
  90. data/spec/scanner/options_spec.rb +0 -36
  91. data/spec/scanner/properties_spec.rb +0 -64
  92. data/spec/scanner/quantifiers_spec.rb +0 -25
  93. data/spec/scanner/refcalls_spec.rb +0 -55
  94. data/spec/scanner/sets_spec.rb +0 -151
  95. data/spec/scanner/types_spec.rb +0 -14
  96. data/spec/spec_helper.rb +0 -28
  97. data/spec/support/capturing_stderr.rb +0 -9
  98. data/spec/support/shared_examples.rb +0 -77
  99. data/spec/syntax/syntax_spec.rb +0 -48
  100. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  101. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  102. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  103. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  104. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  105. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  106. data/spec/syntax/versions/aliases_spec.rb +0 -38
  107. data/spec/token/token_spec.rb +0 -85
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 332259c898b9b344e10961053bb2b761f4dd5530182a5f6195639dba9cbb99f9
4
- data.tar.gz: b537f9bd23db799ee562494633f1e8423501651540a04b634ae07dfe8f3b19c3
3
+ metadata.gz: 6dedba6b051b22dd917febd957e35e6800b37af958780e331007de1f59b2b466
4
+ data.tar.gz: 55fa98afa6031a38cac2045f8c592290fad9a4e500597277f3f79be75f1076f3
5
5
  SHA512:
6
- metadata.gz: 393ecc1cc20189e4a79252e6acf6dab7dd6dc07ba9c47ae7479746eaf8ebe2ccfd1ebcb82fd027edc2c5c938eb490f2f36a93587d2405a54017e0e2727a35a15
7
- data.tar.gz: 6c961232ce5f3f409c91d0b66dd23c809e92f47aa6c1f94f2f1929e8eeccfb4bc25fcdf5935fc968d7e0c0ae632992a6d38bc8e982858f2da996a8eac54d3c89
6
+ metadata.gz: 6b2b463f3a28450527691d90bcfc8901b815bd4a32e88bcdee1f95db4588599993766687a4c7a1b785b450c0f0025039caf07bb93aaf1013fa365e4ed16fc040
7
+ data.tar.gz: 8aabccda06bb1f20485610076ad679a0d62e8630f992c55be7158c01f3b8b6dd6eb44a206f71b613c0335db13789d8e21d652973ecfb7c262c25a6d54e35a371
data/CHANGELOG.md CHANGED
@@ -1,10 +1,43 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
4
+
5
+ ### Fixed
6
+
7
+ - removed five inexistent unicode properties from `Syntax#features`
8
+ - these were never supported by Ruby but incorrectly accepted by the parser
9
+ - thanks to [Markus Schirp](https://github.com/mbj) for the report
10
+
11
+ ## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
12
+
13
+ ### Added
14
+
15
+ - improved parsing performance through `Syntax` refactoring
16
+ - instead of fresh `Syntax` instances, pre-loaded constants are now re-used
17
+ - this approximately doubles the parsing speed for simple regexps
18
+ - added methods to `Syntax` classes to show relative feature sets
19
+ - e.g. `Regexp::Syntax::V3_2_0.added_features`
20
+ - support for new unicode properties of Ruby 3.2 / Unicode 14.0
21
+
22
+ ## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
23
+
24
+ ### Fixed
25
+
26
+ - fixed Syntax version of absence groups (`(?~...)`)
27
+ - the lexer accepted them for any Ruby version
28
+ - now they are only recognized for Ruby >= 2.4.1 in which they were introduced
29
+ - reduced gem size by excluding specs from package
30
+ - removed deprecated `test_files` gemspec setting
31
+ - no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
32
+ - no longer depend on `set`
33
+ - `set` was removed from the stdlib and made a standalone gem as of Ruby 3
34
+ - this made it a hidden/undeclared dependency of `regexp_parser`
35
+
3
36
  ## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
4
37
 
5
38
  ### Added
6
39
 
7
- - Added support for 13 new unicode properties introduced in Ruby 3.1.0-dev
40
+ - added support for 13 new unicode properties introduced in Ruby 3.1.0
8
41
 
9
42
  ## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
10
43
 
data/Gemfile CHANGED
@@ -5,9 +5,10 @@ gemspec
5
5
  group :development, :test do
6
6
  gem 'ice_nine', '~> 0.11.2'
7
7
  gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.0'
8
+ gem 'regexp_property_values', '~> 1.3'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
+ gem 'benchmark-ips', '~> 2.1'
11
12
  gem 'gouteur'
12
13
  gem 'rubocop', '~> 1.7'
13
14
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2015, Ammar Ali
1
+ Copyright (c) 2010, 2012-2022, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
data/README.md CHANGED
@@ -157,31 +157,41 @@ flavor). Syntax classes act as lookup tables, and are layered to create
157
157
  flavor variations. Syntax only comes into play in the lexer.
158
158
 
159
159
  #### Example
160
- The following instantiates syntax objects for Ruby 2.0, 1.9, 1.8, and
160
+ The following fetches syntax objects for Ruby 2.0, 1.9, 1.8, and
161
161
  checks a few of their implementation features.
162
162
 
163
163
  ```ruby
164
164
  require 'regexp_parser'
165
165
 
166
- ruby_20 = Regexp::Syntax.new 'ruby/2.0'
166
+ ruby_20 = Regexp::Syntax.for 'ruby/2.0'
167
167
  ruby_20.implements? :quantifier, :zero_or_one # => true
168
168
  ruby_20.implements? :quantifier, :zero_or_one_reluctant # => true
169
169
  ruby_20.implements? :quantifier, :zero_or_one_possessive # => true
170
170
  ruby_20.implements? :conditional, :condition # => true
171
171
 
172
- ruby_19 = Regexp::Syntax.new 'ruby/1.9'
172
+ ruby_19 = Regexp::Syntax.for 'ruby/1.9'
173
173
  ruby_19.implements? :quantifier, :zero_or_one # => true
174
174
  ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
175
175
  ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
176
176
  ruby_19.implements? :conditional, :condition # => false
177
177
 
178
- ruby_18 = Regexp::Syntax.new 'ruby/1.8'
178
+ ruby_18 = Regexp::Syntax.for 'ruby/1.8'
179
179
  ruby_18.implements? :quantifier, :zero_or_one # => true
180
180
  ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
181
181
  ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
182
182
  ruby_18.implements? :conditional, :condition # => false
183
183
  ```
184
184
 
185
+ Syntax objects can also be queried about their complete and relative feature sets.
186
+
187
+ ```ruby
188
+ require 'regexp_parser'
189
+
190
+ ruby_20 = Regexp::Syntax.for 'ruby/2.0' # => Regexp::Syntax::V2_0_0
191
+ ruby_20.added_features # => { conditional: [...], ... }
192
+ ruby_20.removed_features # => { property: [:newline], ... }
193
+ ruby_20.features # => { anchor: [...], ... }
194
+ ```
185
195
 
186
196
  #### Notes
187
197
  * Variations on a token, for example a named group with angle brackets (< and >)
@@ -437,7 +447,7 @@ Projects using regexp_parser.
437
447
 
438
448
  - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
439
449
 
440
- - [mutant](https://github.com/mbj/mutant) (before v0.9.0) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
450
+ - [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
441
451
 
442
452
  - [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
443
453
 
@@ -470,4 +480,4 @@ Documentation and books used while working on this project.
470
480
 
471
481
  ---
472
482
  ##### Copyright
473
- _Copyright (c) 2010-2020 Ammar Ali. See LICENSE file for details._
483
+ _Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._
data/Rakefile CHANGED
@@ -5,9 +5,7 @@ require 'rake'
5
5
  require 'rake/testtask'
6
6
  require 'rspec/core/rake_task'
7
7
 
8
- RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
9
- RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
10
- RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
8
+ Dir['tasks/**/*.rake'].each { |file| load(file) }
11
9
 
12
10
  Bundler::GemHelper.install_tasks
13
11
 
@@ -19,63 +17,7 @@ namespace :test do
19
17
  task full: [:'ragel:rb', :spec]
20
18
  end
21
19
 
22
- namespace :ragel do
23
- desc "Process the ragel source files and output ruby code"
24
- task :rb do
25
- RAGEL_SOURCE_FILES.each do |source_file|
26
- output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
27
- # using faster flat table driven FSM, about 25% larger code, but about 30% faster
28
- sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
29
-
30
- contents = File.read(output_file)
31
-
32
- File.open(output_file, 'r+') do |file|
33
- contents = "# -*- warn-indent:false; -*-\n" + contents
34
-
35
- file.write(contents)
36
- end
37
- end
38
- end
39
-
40
- desc "Delete the ragel generated source file(s)"
41
- task :clean do
42
- RAGEL_SOURCE_FILES.each do |file|
43
- sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
44
- end
45
- end
46
- end
47
-
48
20
  # Add ragel task as a prerequisite for building the gem to ensure that the
49
21
  # latest scanner code is generated and included in the build.
50
22
  desc "Runs ragel:rb before building the gem"
51
23
  task :build => ['ragel:rb']
52
-
53
- namespace :props do
54
- desc 'Write new property value hashes for the properties scanner'
55
- task :update do
56
- require 'regexp_property_values'
57
- RegexpPropertyValues.update
58
- dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
59
-
60
- require 'psych'
61
- write_hash_to_file = ->(hash, path) do
62
- File.open(path, 'w') do |f|
63
- f.puts '#',
64
- "# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
65
- '#',
66
- hash.sort.to_h.to_yaml
67
- end
68
- puts "Wrote #{hash.count} aliases to `#{path}`"
69
- end
70
-
71
- long_names_to_tokens = RegexpPropertyValues.all.map do |val|
72
- [val.identifier, val.full_name.downcase]
73
- end
74
- write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
75
-
76
- short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
77
- [k.identifier, v.full_name.downcase]
78
- end
79
- write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
80
- end
81
- end
@@ -2,16 +2,21 @@ module Regexp::Expression
2
2
  # TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
3
3
  module EscapeSequence
4
4
  class Base < Regexp::Expression::Base
5
- require 'yaml'
6
-
7
- def char
8
- # poor man's unescape without using eval
9
- YAML.load(%Q(---\n"#{text}"\n))
10
- end
11
-
12
5
  def codepoint
13
6
  char.ord
14
7
  end
8
+
9
+ if ''.respond_to?(:undump)
10
+ def char
11
+ %("#{text}").undump
12
+ end
13
+ else
14
+ # poor man's unescape without using eval
15
+ require 'yaml'
16
+ def char
17
+ YAML.load(%Q(---\n"#{text}"\n))
18
+ end
19
+ end
15
20
  end
16
21
 
17
22
  class Literal < EscapeSequence::Base
@@ -43,7 +43,7 @@ module Regexp::Expression
43
43
 
44
44
  # Order is important! Fields that use other fields in their
45
45
  # definition must appear before the fields they use.
46
- part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
47
47
  part.keys.each {|k| part[k] = "<?#{k}?>"}
48
48
 
49
49
  part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
@@ -18,7 +18,7 @@ class Regexp::Lexer
18
18
  end
19
19
 
20
20
  def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
21
- syntax = Regexp::Syntax.new(syntax)
21
+ syntax = Regexp::Syntax.for(syntax)
22
22
 
23
23
  self.tokens = []
24
24
  self.nesting = 0