regexp_property_values 0.3.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +0 -3
- data/CHANGELOG.md +19 -0
- data/README.md +8 -14
- data/Rakefile +2 -0
- data/ext/regexp_property_values/regexp_property_values.c +18 -12
- data/lib/aliases +233 -0
- data/lib/regexp_property_values.rb +17 -59
- data/lib/regexp_property_values/updater.rb +133 -0
- data/lib/regexp_property_values/value.rb +14 -0
- data/lib/regexp_property_values/value/ext_adapter.rb +19 -0
- data/lib/regexp_property_values/value/ruby_fallback.rb +21 -0
- data/lib/regexp_property_values/value/shared_methods.rb +63 -0
- data/lib/regexp_property_values/version.rb +1 -1
- data/lib/values +590 -0
- data/regexp_property_values.gemspec +4 -4
- metadata +22 -17
- data/lib/UnicodeProps.txt +0 -828
- data/lib/regexp_property_values/extension.rb +0 -65
@@ -1,65 +0,0 @@
|
|
1
|
-
module RegexpPropertyValues
|
2
|
-
module Extension
|
3
|
-
def supported_by_current_ruby?
|
4
|
-
!!regexp
|
5
|
-
rescue ArgumentError
|
6
|
-
false
|
7
|
-
end
|
8
|
-
|
9
|
-
def regexp
|
10
|
-
@regexp ||= /\p{#{self}}/u
|
11
|
-
rescue RegexpError, SyntaxError
|
12
|
-
raise ArgumentError, "Unknown property name #{self}"
|
13
|
-
end
|
14
|
-
|
15
|
-
if const_defined?(:OnigRegexpPropertyHelper)
|
16
|
-
# C extension loaded
|
17
|
-
|
18
|
-
def matched_codepoints
|
19
|
-
matched_ranges.flat_map(&:to_a)
|
20
|
-
end
|
21
|
-
|
22
|
-
def matched_ranges
|
23
|
-
OnigRegexpPropertyHelper.matched_ranges(self.encode('utf-8'))
|
24
|
-
end
|
25
|
-
|
26
|
-
def matched_characters
|
27
|
-
matched_codepoints.map { |cp| cp.chr('utf-8') }
|
28
|
-
end
|
29
|
-
|
30
|
-
def character_set
|
31
|
-
require 'character_set'
|
32
|
-
CharacterSet.from_ranges(*matched_ranges)
|
33
|
-
end
|
34
|
-
else
|
35
|
-
# Ruby fallback - this stuff is slow as hell, and it wont get much faster
|
36
|
-
|
37
|
-
def matched_codepoints
|
38
|
-
matched_characters.map(&:ord)
|
39
|
-
end
|
40
|
-
|
41
|
-
def matched_ranges
|
42
|
-
require 'set'
|
43
|
-
matched_codepoints
|
44
|
-
.to_set(SortedSet)
|
45
|
-
.divide { |i, j| (i - j).abs == 1 }
|
46
|
-
.map { |s| a = s.to_a; a.first..a.last }
|
47
|
-
end
|
48
|
-
|
49
|
-
def matched_characters
|
50
|
-
regexp.respond_to?(:match?) ||
|
51
|
-
regexp.define_singleton_method(:match?) { |str| !!match(str) }
|
52
|
-
|
53
|
-
@@characters ||= ((0..0xD7FF).to_a + (0xE000..0x10FFFF).to_a)
|
54
|
-
.map { |cp_number| [cp_number].pack('U') }
|
55
|
-
|
56
|
-
@@characters.select { |char| regexp.match?(char) }
|
57
|
-
end
|
58
|
-
|
59
|
-
def character_set
|
60
|
-
require 'character_set'
|
61
|
-
CharacterSet.new(matched_codepoints)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|