unicoder 0.1.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicoder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-13 00:00:00.000000000 Z
11
+ date: 2024-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rationalist
@@ -24,9 +24,37 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '2.0'
27
- description: Generate specialized indexes for Unicode data lookup
27
+ - !ruby/object:Gem::Dependency
28
+ name: rubyzip
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.2'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.2'
41
+ - !ruby/object:Gem::Dependency
42
+ name: oga
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.9'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.9'
55
+ description: Generates specialized indexes for Unicode data lookup
28
56
  email:
29
- - mail@janlelis.de
57
+ - hi@ruby.consulting
30
58
  executables:
31
59
  - unicoder
32
60
  extensions: []
@@ -37,57 +65,53 @@ files:
37
65
  - CHANGELOG.md
38
66
  - CODE_OF_CONDUCT.md
39
67
  - Gemfile
68
+ - Gemfile.lock
40
69
  - MIT-LICENSE.txt
41
70
  - README.md
42
71
  - Rakefile
43
72
  - bin/unicoder
44
- - data/.keep
45
- - data/unicode/8.0.0/ucd/Blocks.txt
46
- - data/unicode/8.0.0/ucd/EastAsianWidth.txt
47
- - data/unicode/8.0.0/ucd/NameAliases.txt
48
- - data/unicode/8.0.0/ucd/PropertyValueAliases.txt
49
- - data/unicode/8.0.0/ucd/ScriptExtensions.txt
50
- - data/unicode/8.0.0/ucd/Scripts.txt
51
- - data/unicode/8.0.0/ucd/UnicodeData.txt
52
- - data/unicode/8.0.0/ucd/extracted/DerivedGeneralCategory.txt
53
- - data/unicode/security/8.0.0/confusables.txt
54
73
  - lib/unicoder.rb
55
74
  - lib/unicoder/builder.rb
56
75
  - lib/unicoder/builders/blocks.rb
57
76
  - lib/unicoder/builders/categories.rb
58
77
  - lib/unicoder/builders/confusable.rb
59
78
  - lib/unicoder/builders/display_width.rb
79
+ - lib/unicoder/builders/emoji.rb
80
+ - lib/unicoder/builders/name.rb
81
+ - lib/unicoder/builders/numeric_value.rb
60
82
  - lib/unicoder/builders/scripts.rb
83
+ - lib/unicoder/builders/sequence_name.rb
84
+ - lib/unicoder/builders/types.rb
61
85
  - lib/unicoder/constants.rb
62
86
  - lib/unicoder/downloader.rb
63
87
  - lib/unicoder/multi_dimensional_array_builder.rb
88
+ - lib/unicoder/replace_common_words.rb
64
89
  - lib/unicoder/tasks.rake
65
- - spec/unicoder_spec.rb
66
90
  - unicoder.gemspec
67
91
  homepage: https://github.com/janlelis/unicoder
68
92
  licenses:
69
93
  - MIT
70
94
  metadata: {}
71
- post_install_message:
95
+ post_install_message:
72
96
  rdoc_options: []
73
97
  require_paths:
74
98
  - lib
75
99
  required_ruby_version: !ruby/object:Gem::Requirement
76
100
  requirements:
77
- - - "~>"
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '3.0'
104
+ - - "<"
78
105
  - !ruby/object:Gem::Version
79
- version: '2.0'
106
+ version: '4.0'
80
107
  required_rubygems_version: !ruby/object:Gem::Requirement
81
108
  requirements:
82
109
  - - ">="
83
110
  - !ruby/object:Gem::Version
84
111
  version: '0'
85
112
  requirements: []
86
- rubyforge_project:
87
- rubygems_version: 2.6.3
88
- signing_key:
113
+ rubygems_version: 3.5.21
114
+ signing_key:
89
115
  specification_version: 4
90
- summary: Create specialized indexes for Unicode data lookup
91
- test_files:
92
- - spec/unicoder_spec.rb
93
- has_rdoc:
116
+ summary: Creates specialized indexes for Unicode data lookup
117
+ test_files: []
data/data/.keep DELETED
File without changes
@@ -1,298 +0,0 @@
1
- # Blocks-8.0.0.txt
2
- # Date: 2014-11-10, 23:04:00 GMT [KW]
3
- #
4
- # Unicode Character Database
5
- # Copyright (c) 1991-2014 Unicode, Inc.
6
- # For terms of use, see http://www.unicode.org/terms_of_use.html
7
- # For documentation, see http://www.unicode.org/reports/tr44/
8
- #
9
- # Format:
10
- # Start Code..End Code; Block Name
11
-
12
- # ================================================
13
-
14
- # Note: When comparing block names, casing, whitespace, hyphens,
15
- # and underbars are ignored.
16
- # For example, "Latin Extended-A" and "latin extended a" are equivalent.
17
- # For more information on the comparison of property values,
18
- # see UAX #44: http://www.unicode.org/reports/tr44/
19
- #
20
- # All block ranges start with a value where (cp MOD 16) = 0,
21
- # and end with a value where (cp MOD 16) = 15. In other words,
22
- # the last hexadecimal digit of the start of range is ...0
23
- # and the last hexadecimal digit of the end of range is ...F.
24
- # This constraint on block ranges guarantees that allocations
25
- # are done in terms of whole columns, and that code chart display
26
- # never involves splitting columns in the charts.
27
- #
28
- # All code points not explicitly listed for Block
29
- # have the value No_Block.
30
-
31
- # Property: Block
32
- #
33
- # @missing: 0000..10FFFF; No_Block
34
-
35
- 0000..007F; Basic Latin
36
- 0080..00FF; Latin-1 Supplement
37
- 0100..017F; Latin Extended-A
38
- 0180..024F; Latin Extended-B
39
- 0250..02AF; IPA Extensions
40
- 02B0..02FF; Spacing Modifier Letters
41
- 0300..036F; Combining Diacritical Marks
42
- 0370..03FF; Greek and Coptic
43
- 0400..04FF; Cyrillic
44
- 0500..052F; Cyrillic Supplement
45
- 0530..058F; Armenian
46
- 0590..05FF; Hebrew
47
- 0600..06FF; Arabic
48
- 0700..074F; Syriac
49
- 0750..077F; Arabic Supplement
50
- 0780..07BF; Thaana
51
- 07C0..07FF; NKo
52
- 0800..083F; Samaritan
53
- 0840..085F; Mandaic
54
- 08A0..08FF; Arabic Extended-A
55
- 0900..097F; Devanagari
56
- 0980..09FF; Bengali
57
- 0A00..0A7F; Gurmukhi
58
- 0A80..0AFF; Gujarati
59
- 0B00..0B7F; Oriya
60
- 0B80..0BFF; Tamil
61
- 0C00..0C7F; Telugu
62
- 0C80..0CFF; Kannada
63
- 0D00..0D7F; Malayalam
64
- 0D80..0DFF; Sinhala
65
- 0E00..0E7F; Thai
66
- 0E80..0EFF; Lao
67
- 0F00..0FFF; Tibetan
68
- 1000..109F; Myanmar
69
- 10A0..10FF; Georgian
70
- 1100..11FF; Hangul Jamo
71
- 1200..137F; Ethiopic
72
- 1380..139F; Ethiopic Supplement
73
- 13A0..13FF; Cherokee
74
- 1400..167F; Unified Canadian Aboriginal Syllabics
75
- 1680..169F; Ogham
76
- 16A0..16FF; Runic
77
- 1700..171F; Tagalog
78
- 1720..173F; Hanunoo
79
- 1740..175F; Buhid
80
- 1760..177F; Tagbanwa
81
- 1780..17FF; Khmer
82
- 1800..18AF; Mongolian
83
- 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
84
- 1900..194F; Limbu
85
- 1950..197F; Tai Le
86
- 1980..19DF; New Tai Lue
87
- 19E0..19FF; Khmer Symbols
88
- 1A00..1A1F; Buginese
89
- 1A20..1AAF; Tai Tham
90
- 1AB0..1AFF; Combining Diacritical Marks Extended
91
- 1B00..1B7F; Balinese
92
- 1B80..1BBF; Sundanese
93
- 1BC0..1BFF; Batak
94
- 1C00..1C4F; Lepcha
95
- 1C50..1C7F; Ol Chiki
96
- 1CC0..1CCF; Sundanese Supplement
97
- 1CD0..1CFF; Vedic Extensions
98
- 1D00..1D7F; Phonetic Extensions
99
- 1D80..1DBF; Phonetic Extensions Supplement
100
- 1DC0..1DFF; Combining Diacritical Marks Supplement
101
- 1E00..1EFF; Latin Extended Additional
102
- 1F00..1FFF; Greek Extended
103
- 2000..206F; General Punctuation
104
- 2070..209F; Superscripts and Subscripts
105
- 20A0..20CF; Currency Symbols
106
- 20D0..20FF; Combining Diacritical Marks for Symbols
107
- 2100..214F; Letterlike Symbols
108
- 2150..218F; Number Forms
109
- 2190..21FF; Arrows
110
- 2200..22FF; Mathematical Operators
111
- 2300..23FF; Miscellaneous Technical
112
- 2400..243F; Control Pictures
113
- 2440..245F; Optical Character Recognition
114
- 2460..24FF; Enclosed Alphanumerics
115
- 2500..257F; Box Drawing
116
- 2580..259F; Block Elements
117
- 25A0..25FF; Geometric Shapes
118
- 2600..26FF; Miscellaneous Symbols
119
- 2700..27BF; Dingbats
120
- 27C0..27EF; Miscellaneous Mathematical Symbols-A
121
- 27F0..27FF; Supplemental Arrows-A
122
- 2800..28FF; Braille Patterns
123
- 2900..297F; Supplemental Arrows-B
124
- 2980..29FF; Miscellaneous Mathematical Symbols-B
125
- 2A00..2AFF; Supplemental Mathematical Operators
126
- 2B00..2BFF; Miscellaneous Symbols and Arrows
127
- 2C00..2C5F; Glagolitic
128
- 2C60..2C7F; Latin Extended-C
129
- 2C80..2CFF; Coptic
130
- 2D00..2D2F; Georgian Supplement
131
- 2D30..2D7F; Tifinagh
132
- 2D80..2DDF; Ethiopic Extended
133
- 2DE0..2DFF; Cyrillic Extended-A
134
- 2E00..2E7F; Supplemental Punctuation
135
- 2E80..2EFF; CJK Radicals Supplement
136
- 2F00..2FDF; Kangxi Radicals
137
- 2FF0..2FFF; Ideographic Description Characters
138
- 3000..303F; CJK Symbols and Punctuation
139
- 3040..309F; Hiragana
140
- 30A0..30FF; Katakana
141
- 3100..312F; Bopomofo
142
- 3130..318F; Hangul Compatibility Jamo
143
- 3190..319F; Kanbun
144
- 31A0..31BF; Bopomofo Extended
145
- 31C0..31EF; CJK Strokes
146
- 31F0..31FF; Katakana Phonetic Extensions
147
- 3200..32FF; Enclosed CJK Letters and Months
148
- 3300..33FF; CJK Compatibility
149
- 3400..4DBF; CJK Unified Ideographs Extension A
150
- 4DC0..4DFF; Yijing Hexagram Symbols
151
- 4E00..9FFF; CJK Unified Ideographs
152
- A000..A48F; Yi Syllables
153
- A490..A4CF; Yi Radicals
154
- A4D0..A4FF; Lisu
155
- A500..A63F; Vai
156
- A640..A69F; Cyrillic Extended-B
157
- A6A0..A6FF; Bamum
158
- A700..A71F; Modifier Tone Letters
159
- A720..A7FF; Latin Extended-D
160
- A800..A82F; Syloti Nagri
161
- A830..A83F; Common Indic Number Forms
162
- A840..A87F; Phags-pa
163
- A880..A8DF; Saurashtra
164
- A8E0..A8FF; Devanagari Extended
165
- A900..A92F; Kayah Li
166
- A930..A95F; Rejang
167
- A960..A97F; Hangul Jamo Extended-A
168
- A980..A9DF; Javanese
169
- A9E0..A9FF; Myanmar Extended-B
170
- AA00..AA5F; Cham
171
- AA60..AA7F; Myanmar Extended-A
172
- AA80..AADF; Tai Viet
173
- AAE0..AAFF; Meetei Mayek Extensions
174
- AB00..AB2F; Ethiopic Extended-A
175
- AB30..AB6F; Latin Extended-E
176
- AB70..ABBF; Cherokee Supplement
177
- ABC0..ABFF; Meetei Mayek
178
- AC00..D7AF; Hangul Syllables
179
- D7B0..D7FF; Hangul Jamo Extended-B
180
- D800..DB7F; High Surrogates
181
- DB80..DBFF; High Private Use Surrogates
182
- DC00..DFFF; Low Surrogates
183
- E000..F8FF; Private Use Area
184
- F900..FAFF; CJK Compatibility Ideographs
185
- FB00..FB4F; Alphabetic Presentation Forms
186
- FB50..FDFF; Arabic Presentation Forms-A
187
- FE00..FE0F; Variation Selectors
188
- FE10..FE1F; Vertical Forms
189
- FE20..FE2F; Combining Half Marks
190
- FE30..FE4F; CJK Compatibility Forms
191
- FE50..FE6F; Small Form Variants
192
- FE70..FEFF; Arabic Presentation Forms-B
193
- FF00..FFEF; Halfwidth and Fullwidth Forms
194
- FFF0..FFFF; Specials
195
- 10000..1007F; Linear B Syllabary
196
- 10080..100FF; Linear B Ideograms
197
- 10100..1013F; Aegean Numbers
198
- 10140..1018F; Ancient Greek Numbers
199
- 10190..101CF; Ancient Symbols
200
- 101D0..101FF; Phaistos Disc
201
- 10280..1029F; Lycian
202
- 102A0..102DF; Carian
203
- 102E0..102FF; Coptic Epact Numbers
204
- 10300..1032F; Old Italic
205
- 10330..1034F; Gothic
206
- 10350..1037F; Old Permic
207
- 10380..1039F; Ugaritic
208
- 103A0..103DF; Old Persian
209
- 10400..1044F; Deseret
210
- 10450..1047F; Shavian
211
- 10480..104AF; Osmanya
212
- 10500..1052F; Elbasan
213
- 10530..1056F; Caucasian Albanian
214
- 10600..1077F; Linear A
215
- 10800..1083F; Cypriot Syllabary
216
- 10840..1085F; Imperial Aramaic
217
- 10860..1087F; Palmyrene
218
- 10880..108AF; Nabataean
219
- 108E0..108FF; Hatran
220
- 10900..1091F; Phoenician
221
- 10920..1093F; Lydian
222
- 10980..1099F; Meroitic Hieroglyphs
223
- 109A0..109FF; Meroitic Cursive
224
- 10A00..10A5F; Kharoshthi
225
- 10A60..10A7F; Old South Arabian
226
- 10A80..10A9F; Old North Arabian
227
- 10AC0..10AFF; Manichaean
228
- 10B00..10B3F; Avestan
229
- 10B40..10B5F; Inscriptional Parthian
230
- 10B60..10B7F; Inscriptional Pahlavi
231
- 10B80..10BAF; Psalter Pahlavi
232
- 10C00..10C4F; Old Turkic
233
- 10C80..10CFF; Old Hungarian
234
- 10E60..10E7F; Rumi Numeral Symbols
235
- 11000..1107F; Brahmi
236
- 11080..110CF; Kaithi
237
- 110D0..110FF; Sora Sompeng
238
- 11100..1114F; Chakma
239
- 11150..1117F; Mahajani
240
- 11180..111DF; Sharada
241
- 111E0..111FF; Sinhala Archaic Numbers
242
- 11200..1124F; Khojki
243
- 11280..112AF; Multani
244
- 112B0..112FF; Khudawadi
245
- 11300..1137F; Grantha
246
- 11480..114DF; Tirhuta
247
- 11580..115FF; Siddham
248
- 11600..1165F; Modi
249
- 11680..116CF; Takri
250
- 11700..1173F; Ahom
251
- 118A0..118FF; Warang Citi
252
- 11AC0..11AFF; Pau Cin Hau
253
- 12000..123FF; Cuneiform
254
- 12400..1247F; Cuneiform Numbers and Punctuation
255
- 12480..1254F; Early Dynastic Cuneiform
256
- 13000..1342F; Egyptian Hieroglyphs
257
- 14400..1467F; Anatolian Hieroglyphs
258
- 16800..16A3F; Bamum Supplement
259
- 16A40..16A6F; Mro
260
- 16AD0..16AFF; Bassa Vah
261
- 16B00..16B8F; Pahawh Hmong
262
- 16F00..16F9F; Miao
263
- 1B000..1B0FF; Kana Supplement
264
- 1BC00..1BC9F; Duployan
265
- 1BCA0..1BCAF; Shorthand Format Controls
266
- 1D000..1D0FF; Byzantine Musical Symbols
267
- 1D100..1D1FF; Musical Symbols
268
- 1D200..1D24F; Ancient Greek Musical Notation
269
- 1D300..1D35F; Tai Xuan Jing Symbols
270
- 1D360..1D37F; Counting Rod Numerals
271
- 1D400..1D7FF; Mathematical Alphanumeric Symbols
272
- 1D800..1DAAF; Sutton SignWriting
273
- 1E800..1E8DF; Mende Kikakui
274
- 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
275
- 1F000..1F02F; Mahjong Tiles
276
- 1F030..1F09F; Domino Tiles
277
- 1F0A0..1F0FF; Playing Cards
278
- 1F100..1F1FF; Enclosed Alphanumeric Supplement
279
- 1F200..1F2FF; Enclosed Ideographic Supplement
280
- 1F300..1F5FF; Miscellaneous Symbols and Pictographs
281
- 1F600..1F64F; Emoticons
282
- 1F650..1F67F; Ornamental Dingbats
283
- 1F680..1F6FF; Transport and Map Symbols
284
- 1F700..1F77F; Alchemical Symbols
285
- 1F780..1F7FF; Geometric Shapes Extended
286
- 1F800..1F8FF; Supplemental Arrows-C
287
- 1F900..1F9FF; Supplemental Symbols and Pictographs
288
- 20000..2A6DF; CJK Unified Ideographs Extension B
289
- 2A700..2B73F; CJK Unified Ideographs Extension C
290
- 2B740..2B81F; CJK Unified Ideographs Extension D
291
- 2B820..2CEAF; CJK Unified Ideographs Extension E
292
- 2F800..2FA1F; CJK Compatibility Ideographs Supplement
293
- E0000..E007F; Tags
294
- E0100..E01EF; Variation Selectors Supplement
295
- F0000..FFFFF; Supplementary Private Use Area-A
296
- 100000..10FFFF; Supplementary Private Use Area-B
297
-
298
- # EOF