uniprop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ DOWNLOADER_SETTINGS = {
2
+ default: {
3
+ cache_path: Pathname.new(__dir__) / "UCD",
4
+ excluded_extensions: %w{zip gz Z pdf ps gif jpg C html},
5
+ excluded_directories: %w{MAPPINGS PROGRAMS UCA cldr idna math reconstructed security vertical zipped charts ucdxml },
6
+ excluded_files: [
7
+ "Index",
8
+ "CJKXREF",
9
+ "StandardizedVariants",
10
+ "TangutSources",
11
+ "NushuSources",
12
+ "USourceData",
13
+ "NamedSequencesProv",
14
+ "ReadMe",
15
+
16
+ # files don't have property
17
+ "NormalizationCorrections",
18
+ "NamedSequences",
19
+ "CJKRadicals",
20
+ "NamesList",
21
+ "emoji-variation-sequences",
22
+ "EmojiSources",
23
+ ],
24
+ included_files: [
25
+ "Unihan",
26
+ ],
27
+ unicode_beta: false,
28
+ },
29
+
30
+ # can override settings for each version if need
31
+ # "15.0.0": {
32
+ # unicode_beta: true,
33
+ # },
34
+ }
35
+
36
+ FILES_INFORMATION = {
37
+ default: {
38
+ property_aliases_file_name: "PropertyAliases",
39
+ property_value_aliases_file_name: "PropertyValueAliases",
40
+
41
+ default_file_format: {
42
+ strip: "\s",
43
+ split: ";",
44
+ },
45
+
46
+ file_formats: [
47
+ {
48
+ file_name: "NushuSources",
49
+ strip: "",
50
+ split: "\s",
51
+ },
52
+ ],
53
+
54
+ unihan_file_format: {
55
+ strip: "",
56
+ split: "\s",
57
+ }
58
+ },
59
+
60
+ "15.0.0": {
61
+ file_formats: [
62
+ {
63
+ file_name: "NushuSources",
64
+ strip: "",
65
+ split: "\s",
66
+ },
67
+ ],
68
+
69
+ }
70
+ }
71
+
72
+ PROPERTIES_INFORMATION = {
73
+ default: {
74
+ miscellaneous_formats: [
75
+ {
76
+ property_name: "Bidi_Mirroring_Glyph",
77
+ format_type: "String"
78
+ },
79
+ {
80
+ property_name: "Bidi_Paired_Bracket",
81
+ format_type: "String"
82
+ },
83
+ {
84
+ property_name: "Equivalent_Unified_Ideograph",
85
+ format_type: "String"
86
+ },
87
+ {
88
+ property_name: "Jamo_Short_Name",
89
+ format_type: "Jamo_Short_Name"
90
+ },
91
+ {
92
+ property_name: "Name",
93
+ format_type: "Unique",
94
+ unique_threshold: 0.9
95
+ },
96
+ {
97
+ property_name: "Name_Alias",
98
+ format_type: "Unique",
99
+ unique_threshold: 0.9
100
+ },
101
+ {
102
+ property_name: "Script_Extensions",
103
+ format_type: "Script_Extensions"
104
+ },
105
+ {
106
+ property_name: "Unicode_1_Name",
107
+ format_type: "text"
108
+ },
109
+ {
110
+ property_name: "ISO_Comment",
111
+ format_type: "text"
112
+ }
113
+ ]
114
+ },
115
+
116
+ # "15.0.0": {
117
+ # miscellaneous_formats: [
118
+ # ]
119
+ # }
120
+ }
@@ -0,0 +1,31 @@
1
+ module UniProp
2
+ RE_CODEPOINT = /[0-9A-F]{4,6}\.\.[0-9A-F]{4,6}|[0-9A-F]{4,6}/
3
+ MIN_CODEPOINT = 0x0000
4
+ MAX_CODEPOINT = 0x10ffff
5
+ CODEPOINT_RANGE = MIN_CODEPOINT..MAX_CODEPOINT
6
+
7
+ # プロパティの記述箇所を管理するためのStruct
8
+ # @param [PropFile] propfile
9
+ # @param [Integer] block
10
+ # @param [Range<Integer>] range
11
+ # @param [Integer] column
12
+ Position = Struct.new(:propfile, :range, :block, :columns)
13
+
14
+ # missingコメント1行を解析した結果を格納するためのStruct
15
+ # @param [Range<Integer>] codepoint_range
16
+ # @param [Property] property
17
+ # @param [String] missing_value
18
+ MissingDef = Struct.new(:codepoint_range, :property, :missing_value)
19
+
20
+ # メタデータに記述された1つのブロックを管理するためのStruct
21
+ # @note 主にメタデータの再作成時に使用。メタデータの内容をそのままStringで管理
22
+ # @param [Array<String>/Array<Array<String>>] content
23
+ # @param [String] range
24
+ RawBlock = Struct.new(:content, :range)
25
+
26
+ # メタデータに記述された1つのブロックを管理するためのStruct
27
+ # @note 主にメタデータの利用時に使用。メタデータを解析した結果、適したオブジェクトで管理
28
+ # @param [Array<Property?>/Array<Array<Property?>>] content
29
+ # @param [Range<Integer>]
30
+ Block = Struct.new(:content, :range)
31
+ end
@@ -0,0 +1,262 @@
1
+ require 'fileutils'
2
+ require 'open-uri'
3
+ require 'pathname'
4
+ begin
5
+ require 'net/https'
6
+ rescue LoadError
7
+ https = 'http'
8
+ else
9
+ https = 'https'
10
+
11
+ # open-uri of ruby 2.2.0 accepts an array of PEMs as ssl_ca_cert, but old
12
+ # versions do not. so, patching OpenSSL::X509::Store#add_file instead.
13
+ class OpenSSL::X509::Store
14
+ alias orig_add_file add_file
15
+ def add_file(pems)
16
+ Array(pems).each do |pem|
17
+ if File.directory?(pem)
18
+ add_path pem
19
+ else
20
+ orig_add_file pem
21
+ end
22
+ end
23
+ end
24
+ end
25
+ # since open-uri internally checks ssl_ca_cert using File.directory?,
26
+ # allow to accept an array.
27
+ class <<File
28
+ alias orig_directory? directory?
29
+ def File.directory? files
30
+ files.is_a?(Array) ? false : orig_directory?(files)
31
+ end
32
+ end
33
+ end
34
+
35
+ class Downloader
36
+ def self.https=(https)
37
+ @@https = https
38
+ end
39
+
40
+ def self.https?
41
+ @@https == 'https'
42
+ end
43
+
44
+ def self.https
45
+ @@https
46
+ end
47
+
48
+ def self.mode_for(data)
49
+ /\A#!/ =~ data ? 0755 : 0644
50
+ end
51
+
52
+ def self.http_options(file, since)
53
+ options = {}
54
+ if since
55
+ case since
56
+ when true
57
+ since = (File.mtime(file).httpdate rescue nil)
58
+ when Time
59
+ since = since.httpdate
60
+ end
61
+ if since
62
+ options['If-Modified-Since'] = since
63
+ end
64
+ end
65
+ options['Accept-Encoding'] = 'identity' # to disable Net::HTTP::GenericRequest#decode_content
66
+ options
67
+ end
68
+
69
+ def self.httpdate(date)
70
+ Time.httpdate(date)
71
+ rescue ArgumentError => e
72
+ # Some hosts (e.g., zlib.net) return similar to RFC 850 but 4
73
+ # digit year, sometimes.
74
+ /\A\s*
75
+ (?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun)day,\x20
76
+ (\d\d)-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-(\d{4})\x20
77
+ (\d\d):(\d\d):(\d\d)\x20
78
+ GMT
79
+ \s*\z/ix =~ date or raise
80
+ warn e.message
81
+ Time.utc($3, $2, $1, $4, $5, $6)
82
+ end
83
+
84
+ def self.download(url, name, dir = nil, since = true, options = {})
85
+ options = options.dup
86
+ url = URI(url)
87
+ dryrun = options.delete(:dryrun)
88
+ options.delete(:unicode_beta) # just to be on the safe side for gems and gcc
89
+
90
+ if name
91
+ file = Pathname.new(under(dir, name))
92
+ else
93
+ name = File.basename(url.path)
94
+ end
95
+ cache_save = options.delete(:cache_save) {
96
+ ENV["CACHE_SAVE"] != "no"
97
+ }
98
+ cache = cache_file(url, name, options.delete(:cache_dir))
99
+ file ||= cache
100
+ if since.nil? and file.exist?
101
+ if $VERBOSE
102
+ $stdout.puts "#{file} already exists"
103
+ $stdout.flush
104
+ end
105
+ if cache_save
106
+ save_cache(cache, file, name)
107
+ end
108
+ return file.to_path
109
+ end
110
+ if dryrun
111
+ puts "Download #{url} into #{file}"
112
+ return
113
+ end
114
+ if link_cache(cache, file, name, $VERBOSE)
115
+ return file.to_path
116
+ end
117
+ if !https? and URI::HTTPS === url
118
+ warn "*** using http instead of https ***"
119
+ url.scheme = 'http'
120
+ url = URI(url.to_s)
121
+ end
122
+ if $VERBOSE
123
+ $stdout.print "downloading #{name} ... "
124
+ $stdout.flush
125
+ end
126
+ mtime = nil
127
+ options = options.merge(http_options(file, since.nil? ? true : since))
128
+ begin
129
+ data = with_retry(10) do
130
+ data = url.read(options)
131
+ if mtime = data.meta["last-modified"]
132
+ mtime = Time.httpdate(mtime)
133
+ end
134
+ data
135
+ end
136
+ rescue OpenURI::HTTPError => http_error
137
+ if http_error.message =~ /^304 / # 304 Not Modified
138
+ if $VERBOSE
139
+ $stdout.puts "not modified"
140
+ $stdout.flush
141
+ end
142
+ return file.to_path
143
+ end
144
+ raise
145
+ rescue Timeout::Error
146
+ if since.nil? and file.exist?
147
+ puts "Request for #{url} timed out, using old version."
148
+ return file.to_path
149
+ end
150
+ raise
151
+ rescue SocketError
152
+ if since.nil? and file.exist?
153
+ puts "No network connection, unable to download #{url}, using old version."
154
+ return file.to_path
155
+ end
156
+ raise
157
+ end
158
+ dest = (cache_save && cache && !cache.exist? ? cache : file)
159
+ dest.parent.mkpath
160
+ dest.open("wb", 0600) do |f|
161
+ data.scrub!('?') # invalid byte sequence in UTF-8 対策
162
+ f.write(data)
163
+ f.chmod(mode_for(data))
164
+ end
165
+ if mtime
166
+ dest.utime(mtime, mtime)
167
+ end
168
+ if $VERBOSE
169
+ $stdout.puts "done"
170
+ $stdout.flush
171
+ end
172
+ if dest.eql?(cache)
173
+ link_cache(cache, file, name)
174
+ elsif cache_save
175
+ save_cache(cache, file, name)
176
+ end
177
+ return file.to_path
178
+ rescue => e
179
+ raise "failed to download #{name}\n#{e.class}: #{e.message}: #{url}"
180
+ end
181
+
182
+ def self.under(dir, name)
183
+ dir ? File.join(dir, File.basename(name)) : name
184
+ end
185
+
186
+ def self.cache_file(url, name, cache_dir = nil)
187
+ case cache_dir
188
+ when false
189
+ return nil
190
+ when nil
191
+ cache_dir = ENV['CACHE_DIR']
192
+ if !cache_dir or cache_dir.empty?
193
+ cache_dir = ".downloaded-cache"
194
+ end
195
+ end
196
+ Pathname.new(cache_dir) + (name || File.basename(URI(url).path))
197
+ end
198
+
199
+ def self.link_cache(cache, file, name, verbose = false)
200
+ return false unless cache and cache.exist?
201
+ return true if cache.eql?(file)
202
+ if /cygwin/ !~ RUBY_PLATFORM or /winsymlink:nativestrict/ =~ ENV['CYGWIN']
203
+ begin
204
+ file.make_symlink(cache.relative_path_from(file.parent))
205
+ rescue SystemCallError
206
+ else
207
+ if verbose
208
+ $stdout.puts "made symlink #{name} to #{cache}"
209
+ $stdout.flush
210
+ end
211
+ return true
212
+ end
213
+ end
214
+ begin
215
+ file.make_link(cache)
216
+ rescue SystemCallError
217
+ else
218
+ if verbose
219
+ $stdout.puts "made link #{name} to #{cache}"
220
+ $stdout.flush
221
+ end
222
+ return true
223
+ end
224
+ end
225
+
226
+ def self.save_cache(cache, file, name)
227
+ return unless cache or cache.eql?(file)
228
+ begin
229
+ st = cache.stat
230
+ rescue
231
+ begin
232
+ file.rename(cache)
233
+ rescue
234
+ return
235
+ end
236
+ else
237
+ return unless st.mtime > file.lstat.mtime
238
+ file.unlink
239
+ end
240
+ link_cache(cache, file, name)
241
+ end
242
+
243
+ def self.with_retry(max_times, &block)
244
+ times = 0
245
+ begin
246
+ block.call
247
+ rescue Errno::ETIMEDOUT, SocketError, OpenURI::HTTPError, Net::ReadTimeout, Net::OpenTimeout, ArgumentError => e
248
+ raise if e.is_a?(OpenURI::HTTPError) && e.message !~ /^50[023] / # retry only 500, 502, 503 for http error
249
+ times += 1
250
+ if times <= max_times
251
+ $stderr.puts "retrying #{e.class} (#{e.message}) after #{times ** 2} seconds..."
252
+ sleep(times ** 2)
253
+ retry
254
+ else
255
+ raise
256
+ end
257
+ end
258
+ end
259
+ private_class_method :with_retry
260
+ end
261
+
262
+ Downloader.https = https.freeze
@@ -0,0 +1,53 @@
1
+ module UniProp
2
+ def prop_data
3
+ @@prop_data ||= PropData.new(
4
+ Pathname.new(__dir__) / "../resources/settings.rb",
5
+ Pathname.new(__dir__) / "../resources/metadata.json"
6
+ )
7
+ end
8
+
9
+ # @param [String] version_name
10
+ # @return [VersionManager]
11
+ def version(version_name)
12
+ prop_data.version_manager(version_name)
13
+ end
14
+
15
+ # @return [UnicodeManager]
16
+ def unicode_manager
17
+ prop_data.unicode_manager
18
+ end
19
+
20
+ # 最新バージョンの名前を取得
21
+ # @param [Boolean] update_metadata trueの場合、バージョン名を取得し、メタデータを更新する
22
+ # @return [String]
23
+ def latest_version(update_metadata: false)
24
+ version_names = prop_data.metadata.version_names(update_metadata: update_metadata, confirm: update_metadata)
25
+
26
+ version_names.sort_by { Version.name_to_weight(_1) }.last
27
+ end
28
+ end
29
+
30
+ class Module
31
+ alias_method :const_missing_orig, :const_missing
32
+ def const_missing(const, *args, &block)
33
+ # VersionManager
34
+ if const =~ /^V([\d_]+)$/
35
+ # A_B_C -> A.B.C
36
+ version_nums = $1.split(/_/)
37
+ version_nums[1] ||= "0"
38
+ version_nums[2] ||= "0"
39
+ version_name = version_nums.join(".")
40
+ UniProp::prop_data.version_manager(version_name)
41
+ elsif const =~ /^V(\d+)_(\d+)_Update(\d+)$/
42
+ # A_B_C -> A.B-UpdateC
43
+ version_name = "#{$1}.#{$2}-Update#{$3}"
44
+ UniProp::prop_data.version_manager(version_name)
45
+
46
+ # UnicodeManager
47
+ elsif const =~ /UNICODE/
48
+ UniProp::prop_data.unicode_manager
49
+ else
50
+ const_missing_orig(const, *args, &block)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,40 @@
1
+ module UniProp
2
+ # 完全なメタデータが存在する事を前提とし、必要なファイルをキャッシュにダウンロードしてVersionと同じ動きをするクラス
3
+ class EfficientVersion < Version
4
+ # @return [Set<PropFile>]
5
+ def files
6
+ return @files if @files
7
+ @files = version_metadata.propfile_names.map { create_propfile(_1) }
8
+
9
+ # PropertyAliases, PropertyValueAliasesはメタデータに記述されない
10
+ @files << property_aliases_file
11
+ @files << property_value_aliases_file
12
+ @files
13
+ end
14
+
15
+ # @return [Array<String>?]
16
+ def unihan_file_names
17
+ version_metadata.unihan_file_names
18
+ end
19
+
20
+ # @return [Array<Property>]
21
+ def unihan_properties
22
+ version_metadata.unihan_properties
23
+ end
24
+
25
+ # @return [PropFile]
26
+ def find_file(propfile)
27
+ super(propfile, confirm: false)
28
+ end
29
+
30
+ # @param [String] filename basename_prefixに該当するファイル名
31
+ # @return [PropFile]
32
+ def create_propfile(filename)
33
+ if UniPropUtils::FileManager.unihan_file?(filename, unihan_file_names)
34
+ return PropFile::UnihanFile.new(filename, self)
35
+ else
36
+ return PropFile.new(filename, self)
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,31 @@
1
+ module UniProp
2
+ class VersionNotMatchedError < StandardError; end
3
+ class ParseError < StandardError; end
4
+ class PropertyNotFoundError < StandardError
5
+ def initialize(searched_property)
6
+ super("property not found. (searched property: #{searched_property})")
7
+ end
8
+ end
9
+ class PropertyValueTypeNotExistsError < StandardError
10
+ def initialize(type)
11
+ super("#{type} does not exist as property value type.")
12
+ end
13
+ end
14
+ class FileExistsError < StandardError
15
+ def initialize(file_path)
16
+ super("#{file_path} is already exists. Please delete the file and run again.")
17
+ end
18
+ end
19
+ class FileNotFoundError < StandardError; end
20
+ class MetaDataNotFoundError < StandardError; end
21
+ class MetaDataParseError < StandardError; end
22
+ class PropDataDifferentError < StandardError; end
23
+ class MetaDataExistsError < StandardError
24
+ def initialize(version_name)
25
+ super("Metadata for #{version_name} is already exists. Please delete the data and run again.")
26
+ end
27
+ end
28
+ class VersionMetaDataNotExistsError < StandardError; end
29
+ class PropertyValueNotFoundError < StandardError; end
30
+ class VersionDifferentError < StandardError; end
31
+ end
@@ -0,0 +1,122 @@
1
+ # :nocov:
2
+ module UniProp
3
+ class PropFile
4
+ # @private
5
+ def inspect
6
+ "#<#{self.class.name} #{basename_prefix}>"
7
+ end
8
+ end
9
+
10
+ class Version
11
+ # @private
12
+ def inspect
13
+ "#<#{self.class.name} #{major}.#{minor}.#{tiny}>"
14
+ end
15
+ end
16
+
17
+ class EfficientVersion < Version
18
+ # @private
19
+ def inspect
20
+ "#<#{self.class} #{version_metadata.version.version_name}>"
21
+ end
22
+ end
23
+
24
+ class PropFileValueGroup
25
+ # @private
26
+ def inspect
27
+ "#<#{self.class.name} #{propfile.basename_prefix}>"
28
+ end
29
+ end
30
+
31
+ class ActualPropertyValueGroup
32
+ # @private
33
+ def inspect
34
+ property_names = properties.map { _1.longest_alias }
35
+ .join(', ')
36
+ "#<#{self.class.name} #{propfile.basename_prefix} (#{property_names})>"
37
+ end
38
+ end
39
+
40
+ class RevisingHintGenerator
41
+ # @private
42
+ def inspect
43
+ "#<#{self.class} #{recreator.old_version.version_name},#{recreator.new_version.version_name}>"
44
+ end
45
+ end
46
+
47
+ class MetaData
48
+ # @private
49
+ def inspect
50
+ "#<#{self.class.name}>"
51
+ end
52
+ end
53
+
54
+ class VersionMetaData
55
+ # @private
56
+ def inspect
57
+ "#<#{self.class.name} #{version.major}.#{version.minor}.#{version.tiny}>"
58
+ end
59
+ end
60
+
61
+ class PropFileMetaData
62
+ # @private
63
+ def inspect
64
+ "#<#{self.class.name} #{propfile.basename_prefix}>"
65
+ end
66
+ end
67
+
68
+ class VersionMetaDataValidator
69
+ # @private
70
+ def inspect
71
+ "#<#{self.class.name} #{version_metadata.version.major}.#{version_metadata.version.minor}.#{version_metadata.version.tiny}>"
72
+ end
73
+ end
74
+
75
+ class PropData
76
+ # @private
77
+ def inspect
78
+ "#<#{self.class.name}>"
79
+ end
80
+ end
81
+
82
+ module Alias
83
+ # @private
84
+ def inspect
85
+ "#<#{self.class.name} #{longest_alias}>"
86
+ end
87
+ end
88
+
89
+ class BasePropertyValueGroup
90
+ # @private
91
+ def inspect
92
+ "#<#{self.class.name}>"
93
+ end
94
+ end
95
+
96
+ class PropertyMetaData
97
+ # @private
98
+ def inspect
99
+ "#<#{self.class.name}>"
100
+ end
101
+ end
102
+
103
+ class VersionPropertyMetaData
104
+ # @private
105
+ def inspect
106
+ "#<#{self.class.name}>"
107
+ end
108
+ end
109
+
110
+ class UnicodeManager
111
+ def inspect
112
+ "#<#{self.class.name}>"
113
+ end
114
+ end
115
+
116
+ class VersionManager
117
+ def inspect
118
+ "#<#{self.class.name} #{version.version_name}>"
119
+ end
120
+ end
121
+ end
122
+ # :nocov: