unicoder 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +6 -1
- data/.travis.yml +13 -13
- data/CHANGELOG.md +19 -1
- data/Gemfile +2 -0
- data/Gemfile.lock +99 -0
- data/MIT-LICENSE.txt +1 -1
- data/README.md +35 -5
- data/bin/unicoder +1 -1
- data/lib/unicoder/builder.rb +77 -15
- data/lib/unicoder/builders/categories.rb +7 -12
- data/lib/unicoder/builders/display_width.rb +28 -7
- data/lib/unicoder/builders/emoji.rb +97 -0
- data/lib/unicoder/builders/name.rb +75 -0
- data/lib/unicoder/builders/numeric_value.rb +30 -0
- data/lib/unicoder/builders/sequence_name.rb +72 -0
- data/lib/unicoder/builders/types.rb +83 -0
- data/lib/unicoder/constants.rb +81 -16
- data/lib/unicoder/downloader.rb +54 -8
- data/lib/unicoder/multi_dimensional_array_builder.rb +24 -2
- data/unicoder.gemspec +7 -5
- metadata +48 -25
- data/data/.keep +0 -0
- data/data/unicode/8.0.0/ucd/Blocks.txt +0 -298
- data/data/unicode/8.0.0/ucd/EastAsianWidth.txt +0 -2174
- data/data/unicode/8.0.0/ucd/NameAliases.txt +0 -554
- data/data/unicode/8.0.0/ucd/PropertyValueAliases.txt +0 -1420
- data/data/unicode/8.0.0/ucd/ScriptExtensions.txt +0 -454
- data/data/unicode/8.0.0/ucd/Scripts.txt +0 -2539
- data/data/unicode/8.0.0/ucd/UnicodeData.txt +0 -29215
- data/data/unicode/8.0.0/ucd/extracted/DerivedGeneralCategory.txt +0 -3789
- data/data/unicode/security/8.0.0/confusables.txt +0 -9274
- data/spec/unicoder_spec.rb +0 -9
@@ -0,0 +1,30 @@
|
|
1
|
+
module Unicoder
|
2
|
+
module Builder
|
3
|
+
class NumericValue
|
4
|
+
include Builder
|
5
|
+
|
6
|
+
def initialize_index
|
7
|
+
@index = {
|
8
|
+
NUMBERS: {},
|
9
|
+
}
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse!
|
13
|
+
parse_file :unicode_data, :line, regex: /^(?<codepoint>.+?);(.*?;){7}(?<value>.*?);.*$/ do |line|
|
14
|
+
unless line["value"].empty?
|
15
|
+
if line["value"] =~ %r</>
|
16
|
+
|
17
|
+
assign :NUMBERS, line["codepoint"].to_i(16), option =~ /stringfractions/ ? "#{line["value"]}" : line["value"].to_r
|
18
|
+
else
|
19
|
+
assign :NUMBERS, line["codepoint"].to_i(16), line["value"].to_i
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
parse_file :unihan_numeric_values, :line, regex: /^U\+(?<codepoint>\S+)\s+\S+\s+(?<value>\S+)$/ do |line|
|
25
|
+
assign :NUMBERS, line["codepoint"].to_i(16), line["value"].to_i
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module Unicoder
|
2
|
+
module Builder
|
3
|
+
class SequenceName
|
4
|
+
include Builder
|
5
|
+
|
6
|
+
def initialize_index
|
7
|
+
@index = {
|
8
|
+
SEQUENCES: {},
|
9
|
+
}
|
10
|
+
end
|
11
|
+
|
12
|
+
def assign_codepoint(codepoints, value, idx = @index[:SEQUENCES], combine: false)
|
13
|
+
if option =~ /charkeys/
|
14
|
+
key = codepoints.pack("U*")
|
15
|
+
else
|
16
|
+
key = codepoints
|
17
|
+
end
|
18
|
+
|
19
|
+
if idx.has_key?(codepoints)
|
20
|
+
if combine
|
21
|
+
idx[key] << " / #{value}"
|
22
|
+
else
|
23
|
+
# ignore new one
|
24
|
+
end
|
25
|
+
else
|
26
|
+
idx[key] = value
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def parse!
|
31
|
+
parse_file :named_sequences, :line, regex: /^(?!#)(?<name>.+?);(?<codepoints>.+?)$/ do |line|
|
32
|
+
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, line["name"]
|
33
|
+
end
|
34
|
+
|
35
|
+
parse_file :named_sequences_prov, :line, regex: /^(?!#)(?<name>.+?);(?<codepoints>.+?)$/ do |line|
|
36
|
+
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, line["name"]
|
37
|
+
end
|
38
|
+
|
39
|
+
parse_file :standardized_variants, :line, regex: /^(?<codepoints>.+?);\s*(?<variant>.+?)\s*;\s*(?<context>.*?)\s*# (?<name>.+)$/ do |line|
|
40
|
+
name = "#{line["name"].strip} (#{line["variant"]})"
|
41
|
+
name << " [#{line["context"]}]" if line["context"] && !line["context"].empty?
|
42
|
+
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, name, combine: true
|
43
|
+
end
|
44
|
+
|
45
|
+
parse_file :standardized_variants, :line, regex: /^(?<codepoints>.+?); (?<name>.+?)\s*;$/ do |line|
|
46
|
+
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, line["name"]
|
47
|
+
end
|
48
|
+
|
49
|
+
parse_file :ivd_sequences, :line, regex: /^(?<codepoints>.+?);.*?; (?<name>.+?)$/ do |line|
|
50
|
+
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, line["name"], combine: true
|
51
|
+
end
|
52
|
+
|
53
|
+
parse_file :emoji_variation_sequences, :line, regex: /^(?<codepoints>.+?)\s*;\s*(?<variant>.+?)\s*;\s*# \(.*\)\s*(?<name>.+?)\s*$/ do |line|
|
54
|
+
name = "#{line["name"].strip} (#{line["variant"]})"
|
55
|
+
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, name
|
56
|
+
end
|
57
|
+
|
58
|
+
parse_file :emoji_sequences, :line, regex: /^(?<codepoints>.+?)\s*;\s*(?<type>.+?)\s*; (?<name>.+?)\s*#/ do |line|
|
59
|
+
next if line["type"] == "Basic_Emoji"
|
60
|
+
name = line["name"].gsub(/\\x{(\h+)}/){ [$1.to_i(16)].pack("U") }.upcase
|
61
|
+
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, name
|
62
|
+
end
|
63
|
+
|
64
|
+
parse_file :emoji_zwj_sequences, :line, regex: /^(?<codepoints>.+?)\s*;.*?; (?<name>.+?)\s*#/ do |line|
|
65
|
+
name = line["name"].gsub(/\\x{(\h+)}/){ [$1.to_i(16)].pack("U") }.upcase
|
66
|
+
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, name
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Unicoder
|
2
|
+
module Builder
|
3
|
+
class Types
|
4
|
+
include Builder
|
5
|
+
include MultiDimensionalArrayBuilder
|
6
|
+
|
7
|
+
NONCHARACTERS = [
|
8
|
+
*0xFDD0..0xFDEF,
|
9
|
+
0xFFFE, 0xFFFF,
|
10
|
+
0x1FFFE, 0x1FFFF,
|
11
|
+
0x2FFFE, 0x2FFFF,
|
12
|
+
0x3FFFE, 0x3FFFF,
|
13
|
+
0x4FFFE, 0x4FFFF,
|
14
|
+
0x5FFFE, 0x5FFFF,
|
15
|
+
0x6FFFE, 0x6FFFF,
|
16
|
+
0x7FFFE, 0x7FFFF,
|
17
|
+
0x8FFFE, 0x8FFFF,
|
18
|
+
0x9FFFE, 0x9FFFF,
|
19
|
+
0xAFFFE, 0xAFFFF,
|
20
|
+
0xBFFFE, 0xBFFFF,
|
21
|
+
0xCFFFE, 0xCFFFF,
|
22
|
+
0xDFFFE, 0xDFFFF,
|
23
|
+
0xEFFFE, 0xEFFFF,
|
24
|
+
0xFFFFE, 0xFFFFF,
|
25
|
+
0x10FFFE, 0x10FFFF,
|
26
|
+
]
|
27
|
+
|
28
|
+
def initialize_index
|
29
|
+
@index = {
|
30
|
+
TYPES: [],
|
31
|
+
TYPE_NAMES: %w[
|
32
|
+
Graphic
|
33
|
+
Format
|
34
|
+
Control
|
35
|
+
Private-use
|
36
|
+
Surrogate
|
37
|
+
Noncharacter
|
38
|
+
Reserved
|
39
|
+
],
|
40
|
+
OFFSETS: [
|
41
|
+
0x10000,
|
42
|
+
0x1000,
|
43
|
+
0x100,
|
44
|
+
0x10
|
45
|
+
],
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
def parse!
|
50
|
+
parse_file :general_categories, :line, regex: /^(?<from>[^. ]+)(?:..(?<to>\S+))?\s*; (?<category>\S+).*$/ do |line|
|
51
|
+
if line["to"]
|
52
|
+
codepoints = Range.new(line["from"].to_i(16), line["to"].to_i(16))
|
53
|
+
else
|
54
|
+
codepoints = [line["from"].to_i(16)]
|
55
|
+
end
|
56
|
+
|
57
|
+
codepoints.each{ |codepoint|
|
58
|
+
case line["category"]
|
59
|
+
when "Cf", "Zl", "Zp"
|
60
|
+
type = 1
|
61
|
+
when "Cc"
|
62
|
+
type = 2
|
63
|
+
when "Co"
|
64
|
+
type = 3
|
65
|
+
when "Cs"
|
66
|
+
type = 4
|
67
|
+
when "Cn"
|
68
|
+
if NONCHARACTERS.include?(codepoint)
|
69
|
+
type = 5
|
70
|
+
else
|
71
|
+
type = 6
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
assign :TYPES, codepoint, type
|
76
|
+
}
|
77
|
+
end
|
78
|
+
|
79
|
+
4.times{ compress! @index[:TYPES] }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
data/lib/unicoder/constants.rb
CHANGED
@@ -1,29 +1,94 @@
|
|
1
|
-
|
2
|
-
VERSION = "0.1.0".freeze
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
module Unicoder
|
4
|
+
VERSION = "1.0.0"
|
5
5
|
|
6
6
|
UNICODE_VERSIONS = %w[
|
7
|
-
|
8
|
-
|
9
|
-
|
7
|
+
16.0.0
|
8
|
+
15.1.0
|
9
|
+
15.0.0
|
10
|
+
14.0.0
|
11
|
+
13.0.0
|
12
|
+
12.1.0
|
13
|
+
12.0.0
|
14
|
+
11.0.0
|
15
|
+
10.0.0
|
10
16
|
9.0.0
|
17
|
+
8.0.0
|
18
|
+
7.0.0
|
19
|
+
6.3.0
|
11
20
|
].freeze
|
12
21
|
|
13
|
-
|
22
|
+
CURRENT_UNICODE_VERSION = UNICODE_VERSIONS.first
|
23
|
+
|
24
|
+
EMOJI_VERSIONS = %w[
|
25
|
+
16.0
|
26
|
+
15.1
|
27
|
+
15.0
|
28
|
+
14.0
|
29
|
+
13.1
|
30
|
+
13.0
|
31
|
+
12.1
|
32
|
+
12.0
|
33
|
+
11.0
|
34
|
+
5.0
|
35
|
+
4.0
|
36
|
+
3.0
|
37
|
+
2.0
|
38
|
+
].freeze
|
39
|
+
|
40
|
+
EMOJI_RELATED_UNICODE_VERSIONS = {
|
41
|
+
"16.0" => "16.0.0",
|
42
|
+
"15.1" => "15.1.0",
|
43
|
+
"15.0" => "15.0.0",
|
44
|
+
"14.0" => "14.0.0",
|
45
|
+
"13.1" => "13.0.0",
|
46
|
+
"13.0" => "13.0.0",
|
47
|
+
"12.1" => "12.1.0",
|
48
|
+
"12.0" => "12.0.0",
|
49
|
+
"11.0" => "11.0.0",
|
50
|
+
"5.0" => "10.0.0",
|
51
|
+
"4.0" => "9.0.0",
|
52
|
+
"3.0" => "9.0.0",
|
53
|
+
"2.0" => "8.0.0",
|
54
|
+
}.freeze
|
55
|
+
|
56
|
+
CURRENT_EMOJI_VERSION = EMOJI_VERSIONS.first
|
57
|
+
|
58
|
+
IVD_VERSION = "2022-09-13"
|
59
|
+
|
60
|
+
CLDR_VERSION = "45"
|
61
|
+
|
62
|
+
UNICODE_DATA_ENDPOINT = "ftp://ftp.unicode.org/Public"
|
14
63
|
|
15
64
|
LOCAL_DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + "/../../data/unicode").freeze
|
16
65
|
|
17
66
|
UNICODE_FILES = {
|
18
|
-
east_asian_width:
|
19
|
-
unicode_data:
|
20
|
-
name_aliases:
|
21
|
-
confusables:
|
22
|
-
blocks:
|
23
|
-
scripts:
|
24
|
-
script_extensions:
|
25
|
-
property_value_aliases:
|
26
|
-
general_categories:
|
67
|
+
east_asian_width: "/UNICODE_VERSION/ucd/EastAsianWidth.txt",
|
68
|
+
unicode_data: "/UNICODE_VERSION/ucd/UnicodeData.txt",
|
69
|
+
name_aliases: "/UNICODE_VERSION/ucd/NameAliases.txt",
|
70
|
+
confusables: "/security/UNICODE_VERSION/confusables.txt",
|
71
|
+
blocks: "/UNICODE_VERSION/ucd/Blocks.txt",
|
72
|
+
scripts: "/UNICODE_VERSION/ucd/Scripts.txt",
|
73
|
+
script_extensions: "/UNICODE_VERSION/ucd/ScriptExtensions.txt",
|
74
|
+
property_value_aliases: "/UNICODE_VERSION/ucd/PropertyValueAliases.txt",
|
75
|
+
general_categories: "/UNICODE_VERSION/ucd/extracted/DerivedGeneralCategory.txt",
|
76
|
+
unihan_numeric_values: "/UNICODE_VERSION/ucd/Unihan.zip/Unihan_NumericValues.txt",
|
77
|
+
jamo: "/UNICODE_VERSION/ucd/Jamo.txt",
|
78
|
+
named_sequences: "/UNICODE_VERSION/ucd/NamedSequences.txt",
|
79
|
+
named_sequences_prov: "/UNICODE_VERSION/ucd/NamedSequencesProv.txt",
|
80
|
+
standardized_variants: "/UNICODE_VERSION/ucd/StandardizedVariants.txt",
|
81
|
+
ivd_sequences: "https://www.unicode.org/ivd/data/#{IVD_VERSION}/IVD_Sequences.txt",
|
82
|
+
# emoji_data: "/EMOJI_VERSION/ucd/emoji/",
|
83
|
+
emoji_data: "/EMOJI_RELATED_VERSION/ucd/emoji/emoji-data.txt",
|
84
|
+
emoji_sequences: "/emoji/EMOJI_VERSION/emoji-sequences.txt",
|
85
|
+
# emoji_variation_sequences: "/emoji/EMOJI_VERSION/emoji-variation-sequences.txt",
|
86
|
+
emoji_variation_sequences: "/EMOJI_RELATED_VERSION/ucd/emoji/emoji-variation-sequences.txt",
|
87
|
+
emoji_zwj_sequences: "/emoji/EMOJI_VERSION/emoji-zwj-sequences.txt",
|
88
|
+
emoji_test: "/emoji/EMOJI_VERSION/emoji-test.txt",
|
89
|
+
# valid_subdivisions: "https://www.unicode.org/repos/cldr/tags/release-#{CLDR_VERSION}/common/validity/subdivision.xml",
|
90
|
+
valid_subdivisions: "https://raw.githubusercontent.com/unicode-org/cldr/release-#{CLDR_VERSION}/common/validity/subdivision.xml",
|
91
|
+
# ""
|
27
92
|
}
|
28
93
|
end
|
29
94
|
|
data/lib/unicoder/downloader.rb
CHANGED
@@ -1,28 +1,74 @@
|
|
1
1
|
require "open-uri"
|
2
2
|
require "fileutils"
|
3
|
+
require "zip"
|
3
4
|
|
4
5
|
module Unicoder
|
5
6
|
module Downloader
|
6
7
|
def self.fetch(identifier,
|
7
8
|
unicode_version: CURRENT_UNICODE_VERSION,
|
9
|
+
emoji_version: CURRENT_EMOJI_VERSION,
|
8
10
|
destination_directory: LOCAL_DATA_DIRECTORY,
|
9
11
|
destination: nil,
|
10
12
|
filename: nil
|
11
13
|
)
|
12
14
|
filename = UNICODE_FILES[identifier.to_sym] || filename
|
13
15
|
raise ArgumentError, "No valid file identifier or filename given" if !filename
|
14
|
-
filename
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
filename = filename.dup
|
17
|
+
filename.sub! 'UNICODE_VERSION', unicode_version
|
18
|
+
filename.sub! 'EMOJI_VERSION', emoji_version
|
19
|
+
filename.sub! 'EMOJI_RELATED_VERSION', EMOJI_RELATED_UNICODE_VERSIONS[emoji_version]
|
20
|
+
if filename =~ /\A(https?|ftp):\/\//
|
21
|
+
source = filename
|
22
|
+
destination ||= destination_directory + filename.sub(/\A(https?|ftp):\//, "")
|
23
|
+
else
|
24
|
+
source = UNICODE_DATA_ENDPOINT + filename
|
25
|
+
destination ||= destination_directory + filename
|
26
|
+
end
|
22
27
|
|
23
28
|
puts "GET #{source} => #{destination}"
|
29
|
+
|
30
|
+
if source =~ %r[^(?<outer_path>.*).zip/(?<inner_path>.*)$]
|
31
|
+
# Too much magic, download unzip zip files
|
32
|
+
zip = true
|
33
|
+
source = $~[:outer_path] + ".zip"
|
34
|
+
inner_zip_filename = $~[:inner_path]
|
35
|
+
if destination =~ %r[^(?<outer_path>.*).zip/(?<inner_path>.*)$]
|
36
|
+
destination = $~[:outer_path] + ".zip"
|
37
|
+
destination_files = $~[:outer_path]
|
38
|
+
else
|
39
|
+
raise "uncoder bug"
|
40
|
+
end
|
41
|
+
else
|
42
|
+
zip = false
|
43
|
+
end
|
44
|
+
|
45
|
+
if File.exist?(destination)
|
46
|
+
puts "Skipping download of #{source} (already exists)"
|
47
|
+
else
|
48
|
+
URI.open(source){ |f|
|
49
|
+
FileUtils.mkdir_p(File.dirname(destination))
|
50
|
+
File.write(destination, f.read)
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
if zip
|
55
|
+
unzip(destination, [inner_zip_filename], destination_files)
|
56
|
+
end
|
24
57
|
rescue => e
|
25
58
|
$stderr.puts "#{e.class}: #{e.message}"
|
26
59
|
end
|
60
|
+
|
61
|
+
def self.unzip(archive, files, destination_dir)
|
62
|
+
Zip::File.open(archive) do |zip|
|
63
|
+
zip.each do |file_in_zip|
|
64
|
+
if files.include?(file_in_zip.name)
|
65
|
+
FileUtils.mkdir_p(destination_dir)
|
66
|
+
puts "Extract #{file_in_zip.name}"
|
67
|
+
file_in_zip.extract(destination_dir + "/#{file_in_zip.name}")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
# entry = zip.glob('*.csv').first
|
71
|
+
end
|
72
|
+
end
|
27
73
|
end
|
28
74
|
end
|
@@ -59,6 +59,28 @@ module Unicoder
|
|
59
59
|
end
|
60
60
|
}
|
61
61
|
end
|
62
|
-
|
62
|
+
|
63
|
+
def remove_trailing_nils!(index = @index)
|
64
|
+
index.each{ |plane|
|
65
|
+
if plane.is_a?(Array)
|
66
|
+
plane.pop while plane[-1] == nil
|
67
|
+
plane.each{ |row|
|
68
|
+
if row.is_a?(Array)
|
69
|
+
row.pop while row[-1] == nil
|
70
|
+
row.each{ |byte|
|
71
|
+
if byte.is_a?(Array)
|
72
|
+
byte.pop while byte[-1] == nil
|
73
|
+
byte.each{ |nibble|
|
74
|
+
if nibble.is_a?(Array)
|
75
|
+
nibble.pop while nibble[-1] == nil
|
76
|
+
end
|
77
|
+
}
|
78
|
+
end
|
79
|
+
}
|
80
|
+
end
|
81
|
+
}
|
82
|
+
end
|
83
|
+
}
|
84
|
+
end
|
63
85
|
end
|
64
|
-
end
|
86
|
+
end
|
data/unicoder.gemspec
CHANGED
@@ -5,18 +5,20 @@ require File.dirname(__FILE__) + "/lib/unicoder/constants"
|
|
5
5
|
Gem::Specification.new do |gem|
|
6
6
|
gem.name = "unicoder"
|
7
7
|
gem.version = Unicoder::VERSION
|
8
|
-
gem.summary = "
|
9
|
-
gem.description = "
|
8
|
+
gem.summary = "Creates specialized indexes for Unicode data lookup"
|
9
|
+
gem.description = "Generates specialized indexes for Unicode data lookup"
|
10
10
|
gem.authors = ["Jan Lelis"]
|
11
|
-
gem.email = ["
|
11
|
+
gem.email = ["hi@ruby.consulting"]
|
12
12
|
gem.homepage = "https://github.com/janlelis/unicoder"
|
13
13
|
gem.license = "MIT"
|
14
14
|
|
15
|
-
gem.files = Dir["{**/}{.*,*}"].select{ |path| File.file?(path) && path !~ /^pkg/ }
|
15
|
+
gem.files = Dir["{**/}{.*,*}"].select{ |path| File.file?(path) && path !~ /^(pkg|data)/ && path !~ /(marshal|mjs|json)(.gz)?$/ }
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
19
|
|
20
|
-
gem.required_ruby_version = "
|
20
|
+
gem.required_ruby_version = ">= 2.0", "< 4.0"
|
21
21
|
gem.add_dependency "rationalist", "~> 2.0"
|
22
|
+
gem.add_dependency "rubyzip", "~> 1.2"
|
23
|
+
gem.add_dependency "oga", "~> 2.9"
|
22
24
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicoder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-10-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rationalist
|
@@ -24,9 +24,37 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2.0'
|
27
|
-
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rubyzip
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.2'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.2'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: oga
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.9'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.9'
|
55
|
+
description: Generates specialized indexes for Unicode data lookup
|
28
56
|
email:
|
29
|
-
-
|
57
|
+
- hi@ruby.consulting
|
30
58
|
executables:
|
31
59
|
- unicoder
|
32
60
|
extensions: []
|
@@ -37,57 +65,52 @@ files:
|
|
37
65
|
- CHANGELOG.md
|
38
66
|
- CODE_OF_CONDUCT.md
|
39
67
|
- Gemfile
|
68
|
+
- Gemfile.lock
|
40
69
|
- MIT-LICENSE.txt
|
41
70
|
- README.md
|
42
71
|
- Rakefile
|
43
72
|
- bin/unicoder
|
44
|
-
- data/.keep
|
45
|
-
- data/unicode/8.0.0/ucd/Blocks.txt
|
46
|
-
- data/unicode/8.0.0/ucd/EastAsianWidth.txt
|
47
|
-
- data/unicode/8.0.0/ucd/NameAliases.txt
|
48
|
-
- data/unicode/8.0.0/ucd/PropertyValueAliases.txt
|
49
|
-
- data/unicode/8.0.0/ucd/ScriptExtensions.txt
|
50
|
-
- data/unicode/8.0.0/ucd/Scripts.txt
|
51
|
-
- data/unicode/8.0.0/ucd/UnicodeData.txt
|
52
|
-
- data/unicode/8.0.0/ucd/extracted/DerivedGeneralCategory.txt
|
53
|
-
- data/unicode/security/8.0.0/confusables.txt
|
54
73
|
- lib/unicoder.rb
|
55
74
|
- lib/unicoder/builder.rb
|
56
75
|
- lib/unicoder/builders/blocks.rb
|
57
76
|
- lib/unicoder/builders/categories.rb
|
58
77
|
- lib/unicoder/builders/confusable.rb
|
59
78
|
- lib/unicoder/builders/display_width.rb
|
79
|
+
- lib/unicoder/builders/emoji.rb
|
80
|
+
- lib/unicoder/builders/name.rb
|
81
|
+
- lib/unicoder/builders/numeric_value.rb
|
60
82
|
- lib/unicoder/builders/scripts.rb
|
83
|
+
- lib/unicoder/builders/sequence_name.rb
|
84
|
+
- lib/unicoder/builders/types.rb
|
61
85
|
- lib/unicoder/constants.rb
|
62
86
|
- lib/unicoder/downloader.rb
|
63
87
|
- lib/unicoder/multi_dimensional_array_builder.rb
|
64
88
|
- lib/unicoder/tasks.rake
|
65
|
-
- spec/unicoder_spec.rb
|
66
89
|
- unicoder.gemspec
|
67
90
|
homepage: https://github.com/janlelis/unicoder
|
68
91
|
licenses:
|
69
92
|
- MIT
|
70
93
|
metadata: {}
|
71
|
-
post_install_message:
|
94
|
+
post_install_message:
|
72
95
|
rdoc_options: []
|
73
96
|
require_paths:
|
74
97
|
- lib
|
75
98
|
required_ruby_version: !ruby/object:Gem::Requirement
|
76
99
|
requirements:
|
77
|
-
- - "
|
100
|
+
- - ">="
|
78
101
|
- !ruby/object:Gem::Version
|
79
102
|
version: '2.0'
|
103
|
+
- - "<"
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '4.0'
|
80
106
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
107
|
requirements:
|
82
108
|
- - ">="
|
83
109
|
- !ruby/object:Gem::Version
|
84
110
|
version: '0'
|
85
111
|
requirements: []
|
86
|
-
|
87
|
-
|
88
|
-
signing_key:
|
112
|
+
rubygems_version: 3.5.21
|
113
|
+
signing_key:
|
89
114
|
specification_version: 4
|
90
|
-
summary:
|
91
|
-
test_files:
|
92
|
-
- spec/unicoder_spec.rb
|
93
|
-
has_rdoc:
|
115
|
+
summary: Creates specialized indexes for Unicode data lookup
|
116
|
+
test_files: []
|
data/data/.keep
DELETED
File without changes
|