regexp_property_values 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +7 -6
- data/README.md +14 -13
- data/Rakefile +9 -0
- data/ext/regexp_property_values/extconf.rb +5 -0
- data/ext/regexp_property_values/regexp_property_values.c +56 -0
- data/lib/regexp_property_values/extension.rb +53 -0
- data/lib/regexp_property_values/version.rb +1 -1
- data/lib/regexp_property_values.rb +17 -24
- data/regexp_property_values.gemspec +9 -5
- metadata +26 -9
- data/lib/regexp_property_values/value_extension.rb +0 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20ea749474673385c6a98a9cfb2a6f4265d84ce688f69d3068ee3ba351ddb18a
|
4
|
+
data.tar.gz: 8cc47f0492ac633689a75bbf92a3b50fa7c71c3e5b36dd72be0d2c50f6b375d6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6dd88b94bc75382f973bfe7fc402bcf52183dcc93ed05630b02c9f2890bce40bc01ce07a9e98e02b10f592da3394e2faceeb173a7a7b3995f1a0f14f1698d9e6
|
7
|
+
data.tar.gz: cfa31e9a424a1dc385bc8719605566ace765fa4fbaf5ab594d7fe09c09028db5e1b2aea5a6eaaef11e579626ff849f89f5c2e88a44e368567295712be487fef9
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/regexp_property_values.svg)](http://badge.fury.io/rb/regexp_property_values)
|
4
4
|
[![Build Status](https://travis-ci.org/janosch-x/regexp_property_values.svg?branch=master)](https://travis-ci.org/janosch-x/regexp_property_values)
|
5
5
|
|
6
|
-
This
|
6
|
+
This small library lets you see which property values are supported by the regular expression engine of the Ruby version you are running and directly reads out their codepoint ranges from there.
|
7
7
|
|
8
|
-
That is, it determines all supported values for `\p{value}` expressions.
|
8
|
+
That is, it determines all supported values for `\p{value}` expressions and what they match.
|
9
9
|
|
10
10
|
## Usage
|
11
11
|
|
@@ -17,10 +17,7 @@ require 'regexp_property_values'
|
|
17
17
|
PV = RegexpPropertyValues
|
18
18
|
|
19
19
|
PV.all # => ["Alpha", "Blank", "Cntrl", ...]
|
20
|
-
PV.all.sort # => ["AHex", "ASCII", "Adlam", "Adlm", "Age=1.1", ...]
|
21
|
-
|
22
20
|
PV.by_category # => {"POSIX brackets" => ["Alpha", ...], "Special" => ...}
|
23
|
-
|
24
21
|
PV.short_and_long_names # => [["M", "Grek", ...], ["Mark", "Greek", ...]]
|
25
22
|
```
|
26
23
|
|
@@ -28,23 +25,27 @@ PV.short_and_long_names # => [["M", "Grek", ...], ["Mark", "Greek", ...]]
|
|
28
25
|
|
29
26
|
```ruby
|
30
27
|
PV.all_for_current_ruby # => ["Alpha", "Blank", "Cntrl", ...]
|
31
|
-
PV.all_for_current_ruby.include?('Newline') # => false
|
32
28
|
|
33
29
|
PV.by_category.map { |k, v| [k, v.select(&:supported_by_current_ruby?] }
|
34
30
|
|
35
|
-
|
31
|
+
# etc.
|
36
32
|
```
|
37
33
|
|
38
|
-
#####
|
34
|
+
##### Inspect properties
|
39
35
|
|
40
36
|
```ruby
|
41
|
-
PV.supported_by_current_ruby?
|
42
|
-
PV.supported_by_current_ruby?
|
37
|
+
PV['alpha'].supported_by_current_ruby? # => true
|
38
|
+
PV['foobar'].supported_by_current_ruby? # => false
|
43
39
|
|
44
|
-
#
|
45
|
-
PV
|
40
|
+
PV['AHex'].matched_characters # => %w[0 1 2 3 4 5 6 7 8 9 A B C ...]
|
41
|
+
PV['AHex'].matched_codepoints # => [48, 49, 50, ...]
|
42
|
+
PV['AHex'].matched_ranges # => [48..57, 65..70, 97..102]
|
43
|
+
```
|
46
44
|
|
47
|
-
|
45
|
+
##### Utility methods
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
# This one takes a few seconds (or minutes, without the C extension)
|
48
49
|
PV.alias_hash # => {"M" => "Mark", "Grek" => "Greek", ...}
|
49
50
|
|
50
51
|
# download the latest list of possible properties
|
data/Rakefile
CHANGED
@@ -4,3 +4,12 @@ require "rspec/core/rake_task"
|
|
4
4
|
RSpec::Core::RakeTask.new(:spec)
|
5
5
|
|
6
6
|
task :default => :spec
|
7
|
+
|
8
|
+
require 'rake/extensiontask'
|
9
|
+
|
10
|
+
Rake::ExtensionTask.new('regexp_property_values') do |ext|
|
11
|
+
ext.lib_dir = 'lib/regexp_property_values'
|
12
|
+
end
|
13
|
+
|
14
|
+
# recompile before running specs
|
15
|
+
task(:spec).enhance([:compile])
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "ruby/encoding.h"
|
3
|
+
#include "ruby/oniguruma.h" // still in recent rubies f. backwards compatibility
|
4
|
+
|
5
|
+
static int prop_name_to_ctype(char* name, rb_encoding *enc) {
|
6
|
+
UChar *uname;
|
7
|
+
int ctype;
|
8
|
+
|
9
|
+
uname = (UChar*)name;
|
10
|
+
ctype = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, uname, uname + strlen(name));
|
11
|
+
if (ctype < 0) rb_raise(rb_eArgError, "Unknown property name `%s`", name);
|
12
|
+
|
13
|
+
return ctype;
|
14
|
+
}
|
15
|
+
|
16
|
+
VALUE onig_ranges_to_rb(const OnigCodePoint *onig_ranges) {
|
17
|
+
unsigned int range_count, i;
|
18
|
+
VALUE result, sub_range;
|
19
|
+
|
20
|
+
range_count = onig_ranges[0];
|
21
|
+
result = rb_ary_new2(range_count); // rb_ary_new_capa not avail. in Ruby 2.0
|
22
|
+
|
23
|
+
for (i = 0; i < range_count; i++) {
|
24
|
+
sub_range = rb_range_new(INT2FIX(onig_ranges[(i * 2) + 1]),
|
25
|
+
INT2FIX(onig_ranges[(i * 2) + 2]),
|
26
|
+
0);
|
27
|
+
rb_ary_store(result, i, sub_range);
|
28
|
+
}
|
29
|
+
|
30
|
+
return result;
|
31
|
+
}
|
32
|
+
|
33
|
+
VALUE rb_prop_ranges(char* name, rb_encoding *enc) {
|
34
|
+
int ctype;
|
35
|
+
const OnigCodePoint *onig_ranges;
|
36
|
+
OnigCodePoint sb_out;
|
37
|
+
|
38
|
+
ctype = prop_name_to_ctype(name, enc);
|
39
|
+
ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &onig_ranges);
|
40
|
+
return onig_ranges_to_rb(onig_ranges);
|
41
|
+
}
|
42
|
+
|
43
|
+
VALUE method_matched_ranges(VALUE self, VALUE arg) {
|
44
|
+
char *prop_name;
|
45
|
+
rb_encoding *enc;
|
46
|
+
|
47
|
+
prop_name = StringValueCStr(arg);
|
48
|
+
enc = rb_enc_get(arg);
|
49
|
+
return rb_prop_ranges(prop_name, enc);
|
50
|
+
}
|
51
|
+
|
52
|
+
void Init_regexp_property_values() {
|
53
|
+
VALUE module;
|
54
|
+
module = rb_define_module("OnigRegexpPropertyHelper");
|
55
|
+
rb_define_singleton_method(module, "matched_ranges", method_matched_ranges, 1);
|
56
|
+
}
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module RegexpPropertyValues
|
2
|
+
module Extension
|
3
|
+
def supported_by_current_ruby?
|
4
|
+
!!regexp
|
5
|
+
rescue RegexpError, SyntaxError
|
6
|
+
false
|
7
|
+
end
|
8
|
+
|
9
|
+
def regexp
|
10
|
+
@regexp ||= /\p{#{self}}/u
|
11
|
+
end
|
12
|
+
|
13
|
+
if const_defined?(:OnigRegexpPropertyHelper)
|
14
|
+
# C extension loaded
|
15
|
+
|
16
|
+
def matched_codepoints
|
17
|
+
matched_ranges.flat_map(&:to_a)
|
18
|
+
end
|
19
|
+
|
20
|
+
def matched_ranges
|
21
|
+
OnigRegexpPropertyHelper.matched_ranges(self)
|
22
|
+
end
|
23
|
+
|
24
|
+
def matched_characters
|
25
|
+
matched_codepoints.map { |cp| cp.chr('utf-8') }
|
26
|
+
end
|
27
|
+
else
|
28
|
+
# Ruby fallback - this stuff is slow as hell, and it wont get much faster
|
29
|
+
|
30
|
+
def matched_codepoints
|
31
|
+
matched_characters.map(&:ord)
|
32
|
+
end
|
33
|
+
|
34
|
+
def matched_ranges
|
35
|
+
require 'set'
|
36
|
+
matched_characters
|
37
|
+
.to_set(SortedSet)
|
38
|
+
.divide { |i, j| (i - j).abs == 1 }
|
39
|
+
.map { |s| a = s.to_a; a.first..a.last }
|
40
|
+
end
|
41
|
+
|
42
|
+
def matched_characters
|
43
|
+
regexp.respond_to?(:match?) ||
|
44
|
+
regexp.define_singleton_method(:match?) { |str| !!match(str) }
|
45
|
+
|
46
|
+
@@characters ||= ((0..0xD7FF).to_a + (0xE000..0x10FFFF).to_a)
|
47
|
+
.map { |cp_number| [cp_number].pack('U') }
|
48
|
+
|
49
|
+
@@characters.select { |char| regexp.match?(char) }
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -1,4 +1,9 @@
|
|
1
|
-
|
1
|
+
begin
|
2
|
+
require 'regexp_property_values/regexp_property_values'
|
3
|
+
rescue LoadError
|
4
|
+
warn 'regexp_property_values could not load C extension, using slower Ruby'
|
5
|
+
end
|
6
|
+
require 'regexp_property_values/extension'
|
2
7
|
require 'regexp_property_values/version'
|
3
8
|
|
4
9
|
module RegexpPropertyValues
|
@@ -26,21 +31,20 @@ module RegexpPropertyValues
|
|
26
31
|
end
|
27
32
|
|
28
33
|
def by_category
|
29
|
-
result = File.foreach(file_path).
|
34
|
+
result = File.foreach(file_path).each_with_object({}) do |line, hash|
|
30
35
|
if /^\* (?<category>\S.+)/ =~ line
|
31
36
|
@current_category = category
|
32
37
|
hash[@current_category] ||= []
|
33
38
|
elsif /^ {4}(?<value_name>\S.*)/ =~ line
|
34
|
-
hash[@current_category] <<
|
39
|
+
hash[@current_category] << value_name.extend(Extension)
|
35
40
|
end
|
36
|
-
hash
|
37
41
|
end
|
38
42
|
add_oniguruma_properties(result)
|
39
43
|
result
|
40
44
|
end
|
41
45
|
|
42
46
|
def add_oniguruma_properties(props_by_category)
|
43
|
-
props_by_category['Special'] <<
|
47
|
+
props_by_category['Special'] << 'Newline'.extend(Extension)
|
44
48
|
end
|
45
49
|
|
46
50
|
def alias_hash
|
@@ -48,11 +52,10 @@ module RegexpPropertyValues
|
|
48
52
|
return {} if short_names.empty?
|
49
53
|
|
50
54
|
long_names -= by_category['POSIX brackets']
|
51
|
-
|
52
|
-
next
|
55
|
+
by_matched_codepoints.each_value.each_with_object({}) do |props, hash|
|
56
|
+
next if props.count < 2
|
53
57
|
long_name = (props & long_names)[0] || fail("no long name for #{props}")
|
54
58
|
(props & short_names).each { |short_name| hash[short_name] = long_name }
|
55
|
-
hash
|
56
59
|
end
|
57
60
|
end
|
58
61
|
|
@@ -60,27 +63,17 @@ module RegexpPropertyValues
|
|
60
63
|
short_name_categories = ['Major and General Categories',
|
61
64
|
'PropertyAliases',
|
62
65
|
'PropertyValueAliases (Script)']
|
63
|
-
by_category.
|
66
|
+
by_category.each_with_object([[], []]) do |(cat_name, props), (short, long)|
|
64
67
|
(short_name_categories.include?(cat_name) ? short : long).concat(props)
|
65
|
-
[short, long]
|
66
68
|
end
|
67
69
|
end
|
68
70
|
|
69
|
-
def
|
70
|
-
puts 'Establishing property
|
71
|
-
all_for_current_ruby.group_by(&:
|
72
|
-
end
|
73
|
-
|
74
|
-
def matched_characters(prop)
|
75
|
-
value(prop).matched_characters
|
76
|
-
end
|
77
|
-
|
78
|
-
def supported_by_current_ruby?(prop)
|
79
|
-
value(prop).supported_by_current_ruby?
|
71
|
+
def by_matched_codepoints
|
72
|
+
puts 'Establishing property codepoints, this may take a bit ...'
|
73
|
+
all_for_current_ruby.group_by(&:matched_codepoints)
|
80
74
|
end
|
81
75
|
|
82
|
-
def
|
83
|
-
prop.
|
84
|
-
prop
|
76
|
+
def [](prop)
|
77
|
+
prop.extend(Extension)
|
85
78
|
end
|
86
79
|
end
|
@@ -8,11 +8,10 @@ Gem::Specification.new do |s|
|
|
8
8
|
s.authors = ['Janosch Müller']
|
9
9
|
s.email = ['janosch84@gmail.com']
|
10
10
|
|
11
|
-
s.summary = "
|
12
|
-
s.description = 'This
|
13
|
-
'supported by the regular expression engine of the
|
14
|
-
'version you are running
|
15
|
-
'supported values for `\p{value}` expressions.'
|
11
|
+
s.summary = "Inspect property values supported by Ruby's regex engine"
|
12
|
+
s.description = 'This small library lets you see which property values '\
|
13
|
+
'are supported by the regular expression engine of the '\
|
14
|
+
'Ruby version you are running, and what they match.'
|
16
15
|
s.homepage = 'https://github.com/janosch-x/regexp_property_values'
|
17
16
|
s.license = 'MIT'
|
18
17
|
|
@@ -21,7 +20,12 @@ Gem::Specification.new do |s|
|
|
21
20
|
end
|
22
21
|
s.require_paths = ['lib']
|
23
22
|
|
23
|
+
s.extensions = %w[ext/regexp_property_values/extconf.rb]
|
24
|
+
|
25
|
+
s.required_ruby_version = '>= 2.0.0'
|
26
|
+
|
24
27
|
s.add_development_dependency 'bundler', '~> 1.16'
|
25
28
|
s.add_development_dependency 'rake', '~> 10.0'
|
29
|
+
s.add_development_dependency 'rake-compiler', '~> 1.0'
|
26
30
|
s.add_development_dependency 'rspec', '~> 3.0'
|
27
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_property_values
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-07-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake-compiler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rspec
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,13 +66,14 @@ dependencies:
|
|
52
66
|
- - "~>"
|
53
67
|
- !ruby/object:Gem::Version
|
54
68
|
version: '3.0'
|
55
|
-
description: This
|
56
|
-
the regular expression engine of the Ruby version you are running
|
57
|
-
|
69
|
+
description: This small library lets you see which property values are supported by
|
70
|
+
the regular expression engine of the Ruby version you are running, and what they
|
71
|
+
match.
|
58
72
|
email:
|
59
73
|
- janosch84@gmail.com
|
60
74
|
executables: []
|
61
|
-
extensions:
|
75
|
+
extensions:
|
76
|
+
- ext/regexp_property_values/extconf.rb
|
62
77
|
extra_rdoc_files: []
|
63
78
|
files:
|
64
79
|
- ".gitignore"
|
@@ -70,9 +85,11 @@ files:
|
|
70
85
|
- Rakefile
|
71
86
|
- bin/console
|
72
87
|
- bin/setup
|
88
|
+
- ext/regexp_property_values/extconf.rb
|
89
|
+
- ext/regexp_property_values/regexp_property_values.c
|
73
90
|
- lib/UnicodeProps.txt
|
74
91
|
- lib/regexp_property_values.rb
|
75
|
-
- lib/regexp_property_values/
|
92
|
+
- lib/regexp_property_values/extension.rb
|
76
93
|
- lib/regexp_property_values/version.rb
|
77
94
|
- regexp_property_values.gemspec
|
78
95
|
homepage: https://github.com/janosch-x/regexp_property_values
|
@@ -87,7 +104,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
87
104
|
requirements:
|
88
105
|
- - ">="
|
89
106
|
- !ruby/object:Gem::Version
|
90
|
-
version:
|
107
|
+
version: 2.0.0
|
91
108
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
109
|
requirements:
|
93
110
|
- - ">="
|
@@ -98,5 +115,5 @@ rubyforge_project:
|
|
98
115
|
rubygems_version: 2.7.6
|
99
116
|
signing_key:
|
100
117
|
specification_version: 4
|
101
|
-
summary:
|
118
|
+
summary: Inspect property values supported by Ruby's regex engine
|
102
119
|
test_files: []
|
@@ -1,20 +0,0 @@
|
|
1
|
-
module RegexpPropertyValues
|
2
|
-
def self.characters
|
3
|
-
@characters ||= ((0..55_295).to_a + (57_344..1_114_111).to_a)
|
4
|
-
.map { |cp_number| [cp_number].pack('U') }
|
5
|
-
end
|
6
|
-
|
7
|
-
module ValueExtension
|
8
|
-
def supported_by_current_ruby?
|
9
|
-
begin !!regexp; rescue RegexpError, SyntaxError; false end
|
10
|
-
end
|
11
|
-
|
12
|
-
def matched_characters
|
13
|
-
RegexpPropertyValues.characters.select { |char| regexp.match(char) }
|
14
|
-
end
|
15
|
-
|
16
|
-
def regexp
|
17
|
-
@regexp ||= /\p{#{self}}/u
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|