unicode-types 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/data/types.marshal.gz +0 -0
- data/lib/unicode/types.rb +24 -2
- data/lib/unicode/types/constants.rb +1 -1
- data/spec/.unicode_types_spec.rb.swp +0 -0
- data/spec/unicode_types_spec.rb +4 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e9488ebcd84e5ca6243a189ba0424afa3793c565
|
4
|
+
data.tar.gz: 2df94931f98452e6d948f5bf9fe300d752ddc9a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2589173a6bbb02749b23c7f82fa136c7c7380a552095e08aebd4479ad0d07ff0e8aa92bdfa671960103ea359b9b1efa3a59637cf943588dbad2cd6bc102339b5
|
7
|
+
data.tar.gz: af68eea885a8bc4b22e6eff8863c5b9f2c93e888feda6dd352dfe895220ed31e8c27d96757aa6b8f580ae60b1b2f964b66a3d1ea99f3fc3e1fdf6f2f6c41887c
|
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
### 1.1.2
|
4
|
+
|
5
|
+
* Fix that surrogates were detected in UTF-8 (regression of 1.1.1)
|
6
|
+
* Fix bug in index compression scheme
|
7
|
+
|
3
8
|
### 1.1.1
|
4
9
|
|
5
|
-
* Fix bug that prevented non-UTF-8 encodings from
|
10
|
+
* Fix bug that prevented non-UTF-8 encodings from working
|
6
11
|
|
7
12
|
### 1.1.0
|
8
13
|
|
data/data/types.marshal.gz
CHANGED
Binary file
|
data/lib/unicode/types.rb
CHANGED
@@ -14,8 +14,7 @@ module Unicode
|
|
14
14
|
|
15
15
|
def self.type(char)
|
16
16
|
require_relative 'types/index' unless defined? ::Unicode::Types::INDEX
|
17
|
-
codepoint_depth_offset = char
|
18
|
-
raise(ArgumentError, "Unicode::Types.type must be given a valid char")
|
17
|
+
codepoint_depth_offset = get_codepoint_value(char)
|
19
18
|
index_or_value = INDEX[:TYPES]
|
20
19
|
[0x10000, 0x1000, 0x100, 0x10].each{ |depth|
|
21
20
|
index_or_value = index_or_value[codepoint_depth_offset / depth]
|
@@ -31,5 +30,28 @@ module Unicode
|
|
31
30
|
require_relative 'types/index' unless defined? ::Unicode::Types::INDEX
|
32
31
|
INDEX[:TYPE_NAMES].dup
|
33
32
|
end
|
33
|
+
|
34
|
+
def self.get_codepoint_value(char)
|
35
|
+
ord = nil
|
36
|
+
|
37
|
+
if char.valid_encoding?
|
38
|
+
ord = char.ord
|
39
|
+
elsif char.encoding.name == "UTF-8"
|
40
|
+
begin
|
41
|
+
ord = char.unpack("U*")[0]
|
42
|
+
rescue ArgumentError
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if ord
|
47
|
+
ord
|
48
|
+
else
|
49
|
+
raise(ArgumentError, "Unicode::Types.type must be given a valid char")
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class << self
|
54
|
+
private :get_codepoint_value
|
55
|
+
end
|
34
56
|
end
|
35
57
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Unicode
|
2
2
|
module Types
|
3
|
-
VERSION = "1.1.
|
3
|
+
VERSION = "1.1.2".freeze
|
4
4
|
UNICODE_VERSION = "9.0.0".freeze
|
5
5
|
DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + '/../../../data/').freeze
|
6
6
|
INDEX_FILENAME = (DATA_DIRECTORY + '/types.marshal.gz').freeze
|
Binary file
|
data/spec/unicode_types_spec.rb
CHANGED
@@ -40,6 +40,10 @@ describe Unicode::Types do
|
|
40
40
|
assert_equal "Reserved", Unicode::Types.type("\u{10c50}")
|
41
41
|
assert_equal "Reserved", Unicode::Types.type("\u{c03a6}")
|
42
42
|
end
|
43
|
+
|
44
|
+
it "will work with invalid surrogate values" do
|
45
|
+
assert_equal "Surrogate", Unicode::Types.type("\xED\xA0\x80")
|
46
|
+
end
|
43
47
|
end
|
44
48
|
|
45
49
|
describe ".names" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-types
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "[Unicode 9.0.0] Determine the basic type of codepoints (Graphic, Format,
|
14
14
|
Control, Private-use, Surrogate, Noncharacter, Reserved)"
|
@@ -31,6 +31,7 @@ files:
|
|
31
31
|
- lib/unicode/types/constants.rb
|
32
32
|
- lib/unicode/types/index.rb
|
33
33
|
- lib/unicode/types/string_ext.rb
|
34
|
+
- spec/.unicode_types_spec.rb.swp
|
34
35
|
- spec/unicode_types_spec.rb
|
35
36
|
- unicode-types.gemspec
|
36
37
|
homepage: https://github.com/janlelis/unicode-types
|
@@ -58,4 +59,5 @@ signing_key:
|
|
58
59
|
specification_version: 4
|
59
60
|
summary: Determine the basic type of codepoints.
|
60
61
|
test_files:
|
62
|
+
- spec/.unicode_types_spec.rb.swp
|
61
63
|
- spec/unicode_types_spec.rb
|