unicode-scripts 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +7 -5
- data/CHANGELOG.md +4 -0
- data/MIT-LICENSE.txt +1 -1
- data/README.md +22 -10
- data/data/scripts.marshal.gz +0 -0
- data/lib/unicode/scripts/constants.rb +6 -5
- data/spec/unicode_scripts_spec.rb +8 -0
- metadata +4 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8882022a63cd2caef9118238dae75e3ea6f2c306556a458c4fc9d584503aaa8a
|
4
|
+
data.tar.gz: 236d353516d78090e3a45a5076c4f839e9b5ac0f90e644675fb59e7d819d7880
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0c2b41bf6dc20bdd1a5c82c6681b55a409904af186224b64e4fe5f0d6a0dc24f49ca8aa391d4528878265ab19865db4a97b17006e84373ad26acce79504ad9b
|
7
|
+
data.tar.gz: ae6e6ffb1dab3a8e068a8cf7f4c844826cd2f7357f9805be036f1477cef4f392d88ba0b4ac633cdc58e6d7913704d4a44af3874cf4477b21f92f3faee8b52b51
|
data/.travis.yml
CHANGED
@@ -4,18 +4,20 @@ language: ruby
|
|
4
4
|
script: bundle exec ruby spec/unicode_scripts_spec.rb
|
5
5
|
|
6
6
|
rvm:
|
7
|
-
-
|
8
|
-
- 2.5.
|
9
|
-
- 2.4.
|
10
|
-
- 2.3.
|
7
|
+
- 2.6.1
|
8
|
+
- 2.5.3
|
9
|
+
- 2.4.5
|
10
|
+
- 2.3.8
|
11
11
|
- 2.2
|
12
12
|
- 2.1
|
13
13
|
- 2.0
|
14
|
+
- ruby-head
|
14
15
|
- jruby-head
|
15
|
-
- jruby-9.
|
16
|
+
- jruby-9.2.6.0
|
16
17
|
|
17
18
|
matrix:
|
18
19
|
allow_failures:
|
20
|
+
- rvm: 2.3.8
|
19
21
|
- rvm: 2.2
|
20
22
|
- rvm: 2.1
|
21
23
|
- rvm: 2.0
|
data/CHANGELOG.md
CHANGED
data/MIT-LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
# Unicode::Scripts [![[version]](https://badge.fury.io/rb/unicode-scripts.svg)](
|
1
|
+
# Unicode::Scripts [![[version]](https://badge.fury.io/rb/unicode-scripts.svg)](https://badge.fury.io/rb/unicode-scripts) [![[travis]](https://travis-ci.org/janlelis/unicode-scripts.png)](https://travis-ci.org/janlelis/unicode-scripts)
|
2
2
|
|
3
3
|
Retrieve the [Unicode script(s)](https://en.wikipedia.org/wiki/Script_%28Unicode%29) a string belongs to. Can also return the *Script_Extension* property which is defined as characters which are "commonly used with more than one script, but with a limited number of scripts".
|
4
4
|
|
5
|
-
Unicode version: **
|
5
|
+
Unicode version: **12.0.0** (March 2019)
|
6
6
|
|
7
|
-
Supported Rubies: **2.
|
7
|
+
Supported Rubies: **2.6**, **2.5**, **2.4**
|
8
8
|
|
9
|
-
Old Rubies that might still work: **2.2**, **2.1**, **2.0**
|
9
|
+
Old Rubies that might still work: **2.3**, **2.2**, **2.1**, **2.0**
|
10
10
|
|
11
11
|
## Gemfile
|
12
12
|
|
@@ -29,21 +29,25 @@ Unicode::Scripts.script("ᴦ") # => "Greek"
|
|
29
29
|
|
30
30
|
# Script_Extension property
|
31
31
|
Unicode::Scripts.script_extensions("॥")
|
32
|
-
# => ["Bengali", "Devanagari", "Dogra", "Grantha", "Gujarati",
|
33
|
-
"
|
34
|
-
"
|
35
|
-
"Takri", "Tamil", "Telugu", "Tirhuta"]
|
32
|
+
# => ["Bengali", "Devanagari", "Dogra", "Grantha", "Gujarati","Gunjala_Gondi", "Gurmukhi", "Kannada",
|
33
|
+
"Khudawadi", "Limbu", "Mahajani", "Malayalam", "Masaram_Gondi", "Nandinagari", "Oriya", "Sinhala",
|
34
|
+
"Syloti_Nagri", "Takri", "Tamil", "Telugu", "Tirhuta"]
|
36
35
|
```
|
37
36
|
|
37
|
+
|
38
|
+
|
39
|
+
|
38
40
|
## Hints
|
39
41
|
### Regex Matching
|
40
42
|
|
41
|
-
If you have a string and want to match a substring/character from a specific Unicode script, you actually won't need this gem. Instead, you can use the [Regexp Unicode Property Syntax `\p{}`](http://ruby-doc.org/core
|
43
|
+
If you have a string and want to match a substring/character from a specific Unicode script, you actually won't need this gem. Instead, you can use the [Regexp Unicode Property Syntax `\p{}`](http://ruby-doc.org/core/Regexp.html#class-Regexp-label-Character+Properties):
|
42
44
|
|
43
45
|
```ruby
|
44
46
|
"Coptic letter: ⲁ".scan(/\p{Coptic}/) # => ["ⲁ"]
|
45
47
|
```
|
46
48
|
|
49
|
+
See [Idiosyncratic Ruby: Proper Unicoding](https://idiosyncratic-ruby.com/41-proper-unicoding.html) for more info.
|
50
|
+
|
47
51
|
### Script Names
|
48
52
|
|
49
53
|
You can extract all script names from the gem like this:
|
@@ -88,6 +92,7 @@ Dogra
|
|
88
92
|
Duployan
|
89
93
|
Egyptian_Hieroglyphs
|
90
94
|
Elbasan
|
95
|
+
Elymaic
|
91
96
|
Ethiopic
|
92
97
|
Georgian
|
93
98
|
Glagolitic
|
@@ -146,10 +151,12 @@ Mro
|
|
146
151
|
Multani
|
147
152
|
Myanmar
|
148
153
|
Nabataean
|
154
|
+
Nandinagari
|
149
155
|
New_Tai_Lue
|
150
156
|
Newa
|
151
157
|
Nko
|
152
158
|
Nushu
|
159
|
+
Nyiakeng_Puachue_Hmong
|
153
160
|
Ogham
|
154
161
|
Ol_Chiki
|
155
162
|
Old_Hungarian
|
@@ -201,6 +208,7 @@ Tirhuta
|
|
201
208
|
Ugaritic
|
202
209
|
Unknown
|
203
210
|
Vai
|
211
|
+
Wancho
|
204
212
|
Warang_Citi
|
205
213
|
Yi
|
206
214
|
Zanabazar_Square
|
@@ -248,6 +256,7 @@ Dsrt
|
|
248
256
|
Dupl
|
249
257
|
Egyp
|
250
258
|
Elba
|
259
|
+
Elym
|
251
260
|
Ethi
|
252
261
|
Geor
|
253
262
|
Glag
|
@@ -266,6 +275,7 @@ Hebr
|
|
266
275
|
Hira
|
267
276
|
Hluw
|
268
277
|
Hmng
|
278
|
+
Hmnp
|
269
279
|
Hrkt
|
270
280
|
Hung
|
271
281
|
Ital
|
@@ -303,6 +313,7 @@ Mroo
|
|
303
313
|
Mtei
|
304
314
|
Mult
|
305
315
|
Mymr
|
316
|
+
Nand
|
306
317
|
Narb
|
307
318
|
Nbat
|
308
319
|
Newa
|
@@ -361,6 +372,7 @@ Tirh
|
|
361
372
|
Ugar
|
362
373
|
Vaii
|
363
374
|
Wara
|
375
|
+
Wcho
|
364
376
|
Xpeo
|
365
377
|
Xsux
|
366
378
|
Yiii
|
@@ -374,5 +386,5 @@ See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related
|
|
374
386
|
|
375
387
|
## MIT License
|
376
388
|
|
377
|
-
- Copyright (C) 2016-
|
389
|
+
- Copyright (C) 2016-2019 Jan Lelis <http://janlelis.com>. Released under the MIT license.
|
378
390
|
- Unicode data: http://www.unicode.org/copyright.html#Exhibit1
|
data/data/scripts.marshal.gz
CHANGED
Binary file
|
@@ -1,9 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Unicode
|
2
4
|
module Scripts
|
3
|
-
VERSION = "1.
|
4
|
-
UNICODE_VERSION = "
|
5
|
-
DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) +
|
6
|
-
INDEX_FILENAME = (DATA_DIRECTORY +
|
5
|
+
VERSION = "1.4.0"
|
6
|
+
UNICODE_VERSION = "12.0.0"
|
7
|
+
DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + "/../../../data/").freeze
|
8
|
+
INDEX_FILENAME = (DATA_DIRECTORY + "/scripts.marshal.gz").freeze
|
7
9
|
end
|
8
10
|
end
|
9
|
-
|
@@ -51,14 +51,18 @@ describe Unicode::Scripts do
|
|
51
51
|
assert_equal [
|
52
52
|
"Bengali",
|
53
53
|
"Devanagari",
|
54
|
+
"Dogra",
|
54
55
|
"Grantha",
|
55
56
|
"Gujarati",
|
57
|
+
"Gunjala_Gondi",
|
56
58
|
"Gurmukhi",
|
57
59
|
"Kannada",
|
58
60
|
"Khudawadi",
|
59
61
|
"Limbu",
|
60
62
|
"Mahajani",
|
61
63
|
"Malayalam",
|
64
|
+
"Masaram_Gondi",
|
65
|
+
"Nandinagari",
|
62
66
|
"Oriya",
|
63
67
|
"Sinhala",
|
64
68
|
"Syloti_Nagri",
|
@@ -73,6 +77,9 @@ describe Unicode::Scripts do
|
|
73
77
|
assert_equal [
|
74
78
|
"Beng",
|
75
79
|
"Deva",
|
80
|
+
"Dogr",
|
81
|
+
"Gong",
|
82
|
+
"Gonm",
|
76
83
|
"Gran",
|
77
84
|
"Gujr",
|
78
85
|
"Guru",
|
@@ -80,6 +87,7 @@ describe Unicode::Scripts do
|
|
80
87
|
"Limb",
|
81
88
|
"Mahj",
|
82
89
|
"Mlym",
|
90
|
+
"Nand",
|
83
91
|
"Orya",
|
84
92
|
"Sind",
|
85
93
|
"Sinh",
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-scripts
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: "[Unicode
|
13
|
+
description: "[Unicode 12.0.0] Retrieve the Unicode script(s) a string belongs to.
|
14
14
|
Can also return the Script_Extension property which is defined as characters which
|
15
15
|
are 'commonly used with more than one script, but with a limited number of scripts'. "
|
16
16
|
email:
|
@@ -54,8 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: '0'
|
56
56
|
requirements: []
|
57
|
-
|
58
|
-
rubygems_version: 2.7.6
|
57
|
+
rubygems_version: 3.0.1
|
59
58
|
signing_key:
|
60
59
|
specification_version: 4
|
61
60
|
summary: Which script(s) does a Unicode string belong to?
|