regexp_property_values 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bfe93f83643ea9d85f998e3c0cfed5ae3580211a0509a140ef66f99749ea7e83
4
- data.tar.gz: e2c7cbda1f656ba625e84b60fe1ac94dd07bea0199342c033b5d7e35057d3a36
3
+ metadata.gz: 20ea749474673385c6a98a9cfb2a6f4265d84ce688f69d3068ee3ba351ddb18a
4
+ data.tar.gz: 8cc47f0492ac633689a75bbf92a3b50fa7c71c3e5b36dd72be0d2c50f6b375d6
5
5
  SHA512:
6
- metadata.gz: 5b3bb49079c6d969db2c32ef682bd33bb7bc8ca73eded912d27dd3b146735de8e1c239bb2b7657901cce7d5172a3c3b47f0e04d97221b64b83c9e7107b0fc12f
7
- data.tar.gz: 1c871837a405a1b3944a00e3ab256872a3e22b21f66a7237801b3efe4fb7783286127b59a6587c7e99e76e02dc14f27f13e854ceae0b848f3c5fc23e4542b018
6
+ metadata.gz: 6dd88b94bc75382f973bfe7fc402bcf52183dcc93ed05630b02c9f2890bce40bc01ce07a9e98e02b10f592da3394e2faceeb173a7a7b3995f1a0f14f1698d9e6
7
+ data.tar.gz: cfa31e9a424a1dc385bc8719605566ace765fa4fbaf5ab594d7fe09c09028db5e1b2aea5a6eaaef11e579626ff849f89f5c2e88a44e368567295712be487fef9
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
+ *.bundle
1
2
  *.gem
2
3
  *.iml
3
4
  *.stTheme.cache
data/.travis.yml CHANGED
@@ -1,9 +1,10 @@
1
1
  sudo: false
2
2
  language: ruby
3
3
  rvm:
4
- - 1.9.3
5
- - 2.0.0
6
- - 2.2.0
7
- - 2.5.0
8
- - 2.6.0
9
- before_install: gem install bundler -v 1.16.1
4
+ - 2.0
5
+ - 2.4
6
+ - 2.5
7
+ - 2.6
8
+ before_install:
9
+ - gem update --system
10
+ - gem install bundler
data/README.md CHANGED
@@ -3,9 +3,9 @@
3
3
  [![Gem Version](https://badge.fury.io/rb/regexp_property_values.svg)](http://badge.fury.io/rb/regexp_property_values)
4
4
  [![Build Status](https://travis-ci.org/janosch-x/regexp_property_values.svg?branch=master)](https://travis-ci.org/janosch-x/regexp_property_values)
5
5
 
6
- This microlibrary lets you see which property values are supported by the regular expression engine of the Ruby version you are running.
6
+ This small library lets you see which property values are supported by the regular expression engine of the Ruby version you are running and directly reads out their codepoint ranges from there.
7
7
 
8
- That is, it determines all supported values for `\p{value}` expressions.
8
+ That is, it determines all supported values for `\p{value}` expressions and what they match.
9
9
 
10
10
  ## Usage
11
11
 
@@ -17,10 +17,7 @@ require 'regexp_property_values'
17
17
  PV = RegexpPropertyValues
18
18
 
19
19
  PV.all # => ["Alpha", "Blank", "Cntrl", ...]
20
- PV.all.sort # => ["AHex", "ASCII", "Adlam", "Adlm", "Age=1.1", ...]
21
-
22
20
  PV.by_category # => {"POSIX brackets" => ["Alpha", ...], "Special" => ...}
23
-
24
21
  PV.short_and_long_names # => [["M", "Grek", ...], ["Mark", "Greek", ...]]
25
22
  ```
26
23
 
@@ -28,23 +25,27 @@ PV.short_and_long_names # => [["M", "Grek", ...], ["Mark", "Greek", ...]]
28
25
 
29
26
  ```ruby
30
27
  PV.all_for_current_ruby # => ["Alpha", "Blank", "Cntrl", ...]
31
- PV.all_for_current_ruby.include?('Newline') # => false
32
28
 
33
29
  PV.by_category.map { |k, v| [k, v.select(&:supported_by_current_ruby?] }
34
30
 
35
- PV.short_and_long_names.map { |a| a.select(&:supported_by_current_ruby?) }
31
+ # etc.
36
32
  ```
37
33
 
38
- ##### Utility methods
34
+ ##### Inspect properties
39
35
 
40
36
  ```ruby
41
- PV.supported_by_current_ruby?('alpha') # => true
42
- PV.supported_by_current_ruby?('foobar') # => false
37
+ PV['alpha'].supported_by_current_ruby? # => true
38
+ PV['foobar'].supported_by_current_ruby? # => false
43
39
 
44
- # this one takes a second
45
- PV.matched_characters('AHex') # => %w[0 1 2 3 4 5 6 7 8 9 A B C ...]
40
+ PV['AHex'].matched_characters # => %w[0 1 2 3 4 5 6 7 8 9 A B C ...]
41
+ PV['AHex'].matched_codepoints # => [48, 49, 50, ...]
42
+ PV['AHex'].matched_ranges # => [48..57, 65..70, 97..102]
43
+ ```
46
44
 
47
- # this one takes a minute or two
45
+ ##### Utility methods
46
+
47
+ ```ruby
48
+ # This one takes a few seconds (or minutes, without the C extension)
48
49
  PV.alias_hash # => {"M" => "Mark", "Grek" => "Greek", ...}
49
50
 
50
51
  # download the latest list of possible properties
data/Rakefile CHANGED
@@ -4,3 +4,12 @@ require "rspec/core/rake_task"
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
6
  task :default => :spec
7
+
8
+ require 'rake/extensiontask'
9
+
10
+ Rake::ExtensionTask.new('regexp_property_values') do |ext|
11
+ ext.lib_dir = 'lib/regexp_property_values'
12
+ end
13
+
14
+ # recompile before running specs
15
+ task(:spec).enhance([:compile])
@@ -0,0 +1,5 @@
1
+ require 'mkmf'
2
+
3
+ name = 'regexp_property_values'
4
+
5
+ create_makefile("#{name}/#{name}")
@@ -0,0 +1,56 @@
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
3
+ #include "ruby/oniguruma.h" // still in recent rubies f. backwards compatibility
4
+
5
+ static int prop_name_to_ctype(char* name, rb_encoding *enc) {
6
+ UChar *uname;
7
+ int ctype;
8
+
9
+ uname = (UChar*)name;
10
+ ctype = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, uname, uname + strlen(name));
11
+ if (ctype < 0) rb_raise(rb_eArgError, "Unknown property name `%s`", name);
12
+
13
+ return ctype;
14
+ }
15
+
16
+ VALUE onig_ranges_to_rb(const OnigCodePoint *onig_ranges) {
17
+ unsigned int range_count, i;
18
+ VALUE result, sub_range;
19
+
20
+ range_count = onig_ranges[0];
21
+ result = rb_ary_new2(range_count); // rb_ary_new_capa not avail. in Ruby 2.0
22
+
23
+ for (i = 0; i < range_count; i++) {
24
+ sub_range = rb_range_new(INT2FIX(onig_ranges[(i * 2) + 1]),
25
+ INT2FIX(onig_ranges[(i * 2) + 2]),
26
+ 0);
27
+ rb_ary_store(result, i, sub_range);
28
+ }
29
+
30
+ return result;
31
+ }
32
+
33
+ VALUE rb_prop_ranges(char* name, rb_encoding *enc) {
34
+ int ctype;
35
+ const OnigCodePoint *onig_ranges;
36
+ OnigCodePoint sb_out;
37
+
38
+ ctype = prop_name_to_ctype(name, enc);
39
+ ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &onig_ranges);
40
+ return onig_ranges_to_rb(onig_ranges);
41
+ }
42
+
43
+ VALUE method_matched_ranges(VALUE self, VALUE arg) {
44
+ char *prop_name;
45
+ rb_encoding *enc;
46
+
47
+ prop_name = StringValueCStr(arg);
48
+ enc = rb_enc_get(arg);
49
+ return rb_prop_ranges(prop_name, enc);
50
+ }
51
+
52
+ void Init_regexp_property_values() {
53
+ VALUE module;
54
+ module = rb_define_module("OnigRegexpPropertyHelper");
55
+ rb_define_singleton_method(module, "matched_ranges", method_matched_ranges, 1);
56
+ }
@@ -0,0 +1,53 @@
1
+ module RegexpPropertyValues
2
+ module Extension
3
+ def supported_by_current_ruby?
4
+ !!regexp
5
+ rescue RegexpError, SyntaxError
6
+ false
7
+ end
8
+
9
+ def regexp
10
+ @regexp ||= /\p{#{self}}/u
11
+ end
12
+
13
+ if const_defined?(:OnigRegexpPropertyHelper)
14
+ # C extension loaded
15
+
16
+ def matched_codepoints
17
+ matched_ranges.flat_map(&:to_a)
18
+ end
19
+
20
+ def matched_ranges
21
+ OnigRegexpPropertyHelper.matched_ranges(self)
22
+ end
23
+
24
+ def matched_characters
25
+ matched_codepoints.map { |cp| cp.chr('utf-8') }
26
+ end
27
+ else
28
+ # Ruby fallback - this stuff is slow as hell, and it wont get much faster
29
+
30
+ def matched_codepoints
31
+ matched_characters.map(&:ord)
32
+ end
33
+
34
+ def matched_ranges
35
+ require 'set'
36
+ matched_characters
37
+ .to_set(SortedSet)
38
+ .divide { |i, j| (i - j).abs == 1 }
39
+ .map { |s| a = s.to_a; a.first..a.last }
40
+ end
41
+
42
+ def matched_characters
43
+ regexp.respond_to?(:match?) ||
44
+ regexp.define_singleton_method(:match?) { |str| !!match(str) }
45
+
46
+ @@characters ||= ((0..0xD7FF).to_a + (0xE000..0x10FFFF).to_a)
47
+ .map { |cp_number| [cp_number].pack('U') }
48
+
49
+ @@characters.select { |char| regexp.match?(char) }
50
+ end
51
+ end
52
+ end
53
+ end
@@ -1,3 +1,3 @@
1
1
  module RegexpPropertyValues
2
- VERSION = '0.2.1'
2
+ VERSION = '0.3.0'
3
3
  end
@@ -1,4 +1,9 @@
1
- require 'regexp_property_values/value_extension'
1
+ begin
2
+ require 'regexp_property_values/regexp_property_values'
3
+ rescue LoadError
4
+ warn 'regexp_property_values could not load C extension, using slower Ruby'
5
+ end
6
+ require 'regexp_property_values/extension'
2
7
  require 'regexp_property_values/version'
3
8
 
4
9
  module RegexpPropertyValues
@@ -26,21 +31,20 @@ module RegexpPropertyValues
26
31
  end
27
32
 
28
33
  def by_category
29
- result = File.foreach(file_path).inject({}) do |hash, line|
34
+ result = File.foreach(file_path).each_with_object({}) do |line, hash|
30
35
  if /^\* (?<category>\S.+)/ =~ line
31
36
  @current_category = category
32
37
  hash[@current_category] ||= []
33
38
  elsif /^ {4}(?<value_name>\S.*)/ =~ line
34
- hash[@current_category] << value(value_name)
39
+ hash[@current_category] << value_name.extend(Extension)
35
40
  end
36
- hash
37
41
  end
38
42
  add_oniguruma_properties(result)
39
43
  result
40
44
  end
41
45
 
42
46
  def add_oniguruma_properties(props_by_category)
43
- props_by_category['Special'] << value('Newline')
47
+ props_by_category['Special'] << 'Newline'.extend(Extension)
44
48
  end
45
49
 
46
50
  def alias_hash
@@ -48,11 +52,10 @@ module RegexpPropertyValues
48
52
  return {} if short_names.empty?
49
53
 
50
54
  long_names -= by_category['POSIX brackets']
51
- by_matched_characters.each_value.inject({}) do |hash, props|
52
- next hash if props.count < 2
55
+ by_matched_codepoints.each_value.each_with_object({}) do |props, hash|
56
+ next if props.count < 2
53
57
  long_name = (props & long_names)[0] || fail("no long name for #{props}")
54
58
  (props & short_names).each { |short_name| hash[short_name] = long_name }
55
- hash
56
59
  end
57
60
  end
58
61
 
@@ -60,27 +63,17 @@ module RegexpPropertyValues
60
63
  short_name_categories = ['Major and General Categories',
61
64
  'PropertyAliases',
62
65
  'PropertyValueAliases (Script)']
63
- by_category.inject([[], []]) do |(short, long), (cat_name, props)|
66
+ by_category.each_with_object([[], []]) do |(cat_name, props), (short, long)|
64
67
  (short_name_categories.include?(cat_name) ? short : long).concat(props)
65
- [short, long]
66
68
  end
67
69
  end
68
70
 
69
- def by_matched_characters
70
- puts 'Establishing property characters, this may take a bit ...'
71
- all_for_current_ruby.group_by(&:matched_characters)
72
- end
73
-
74
- def matched_characters(prop)
75
- value(prop).matched_characters
76
- end
77
-
78
- def supported_by_current_ruby?(prop)
79
- value(prop).supported_by_current_ruby?
71
+ def by_matched_codepoints
72
+ puts 'Establishing property codepoints, this may take a bit ...'
73
+ all_for_current_ruby.group_by(&:matched_codepoints)
80
74
  end
81
75
 
82
- def value(prop)
83
- prop.singleton_class.send(:include, ValueExtension)
84
- prop
76
+ def [](prop)
77
+ prop.extend(Extension)
85
78
  end
86
79
  end
@@ -8,11 +8,10 @@ Gem::Specification.new do |s|
8
8
  s.authors = ['Janosch Müller']
9
9
  s.email = ['janosch84@gmail.com']
10
10
 
11
- s.summary = "Lists property values supported by Ruby's regex engine"
12
- s.description = 'This microlibrary lets you see which property values are '\
13
- 'supported by the regular expression engine of the Ruby '\
14
- 'version you are running. That is, it determines all '\
15
- 'supported values for `\p{value}` expressions.'
11
+ s.summary = "Inspect property values supported by Ruby's regex engine"
12
+ s.description = 'This small library lets you see which property values '\
13
+ 'are supported by the regular expression engine of the '\
14
+ 'Ruby version you are running, and what they match.'
16
15
  s.homepage = 'https://github.com/janosch-x/regexp_property_values'
17
16
  s.license = 'MIT'
18
17
 
@@ -21,7 +20,12 @@ Gem::Specification.new do |s|
21
20
  end
22
21
  s.require_paths = ['lib']
23
22
 
23
+ s.extensions = %w[ext/regexp_property_values/extconf.rb]
24
+
25
+ s.required_ruby_version = '>= 2.0.0'
26
+
24
27
  s.add_development_dependency 'bundler', '~> 1.16'
25
28
  s.add_development_dependency 'rake', '~> 10.0'
29
+ s.add_development_dependency 'rake-compiler', '~> 1.0'
26
30
  s.add_development_dependency 'rspec', '~> 3.0'
27
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_property_values
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-05-13 00:00:00.000000000 Z
11
+ date: 2018-07-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rspec
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -52,13 +66,14 @@ dependencies:
52
66
  - - "~>"
53
67
  - !ruby/object:Gem::Version
54
68
  version: '3.0'
55
- description: This microlibrary lets you see which property values are supported by
56
- the regular expression engine of the Ruby version you are running. That is, it determines
57
- all supported values for `\p{value}` expressions.
69
+ description: This small library lets you see which property values are supported by
70
+ the regular expression engine of the Ruby version you are running, and what they
71
+ match.
58
72
  email:
59
73
  - janosch84@gmail.com
60
74
  executables: []
61
- extensions: []
75
+ extensions:
76
+ - ext/regexp_property_values/extconf.rb
62
77
  extra_rdoc_files: []
63
78
  files:
64
79
  - ".gitignore"
@@ -70,9 +85,11 @@ files:
70
85
  - Rakefile
71
86
  - bin/console
72
87
  - bin/setup
88
+ - ext/regexp_property_values/extconf.rb
89
+ - ext/regexp_property_values/regexp_property_values.c
73
90
  - lib/UnicodeProps.txt
74
91
  - lib/regexp_property_values.rb
75
- - lib/regexp_property_values/value_extension.rb
92
+ - lib/regexp_property_values/extension.rb
76
93
  - lib/regexp_property_values/version.rb
77
94
  - regexp_property_values.gemspec
78
95
  homepage: https://github.com/janosch-x/regexp_property_values
@@ -87,7 +104,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
87
104
  requirements:
88
105
  - - ">="
89
106
  - !ruby/object:Gem::Version
90
- version: '0'
107
+ version: 2.0.0
91
108
  required_rubygems_version: !ruby/object:Gem::Requirement
92
109
  requirements:
93
110
  - - ">="
@@ -98,5 +115,5 @@ rubyforge_project:
98
115
  rubygems_version: 2.7.6
99
116
  signing_key:
100
117
  specification_version: 4
101
- summary: Lists property values supported by Ruby's regex engine
118
+ summary: Inspect property values supported by Ruby's regex engine
102
119
  test_files: []
@@ -1,20 +0,0 @@
1
- module RegexpPropertyValues
2
- def self.characters
3
- @characters ||= ((0..55_295).to_a + (57_344..1_114_111).to_a)
4
- .map { |cp_number| [cp_number].pack('U') }
5
- end
6
-
7
- module ValueExtension
8
- def supported_by_current_ruby?
9
- begin !!regexp; rescue RegexpError, SyntaxError; false end
10
- end
11
-
12
- def matched_characters
13
- RegexpPropertyValues.characters.select { |char| regexp.match(char) }
14
- end
15
-
16
- def regexp
17
- @regexp ||= /\p{#{self}}/u
18
- end
19
- end
20
- end