uniprop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +0 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +31 -0
- data/README.md +29 -0
- data/Rakefile +12 -0
- data/lib/resources/metadata.json +19899 -0
- data/lib/resources/settings.rb +120 -0
- data/lib/uniprop/consts.rb +31 -0
- data/lib/uniprop/downloader.rb +262 -0
- data/lib/uniprop/dsl.rb +53 -0
- data/lib/uniprop/efficient_elements.rb +40 -0
- data/lib/uniprop/errors.rb +31 -0
- data/lib/uniprop/inspects.rb +122 -0
- data/lib/uniprop/metadata_generator.rb +403 -0
- data/lib/uniprop/metadata_processor.rb +673 -0
- data/lib/uniprop/metadata_validator.rb +282 -0
- data/lib/uniprop/propdata.rb +293 -0
- data/lib/uniprop/unicode_elements.rb +998 -0
- data/lib/uniprop/unicode_manager.rb +277 -0
- data/lib/uniprop/unihanprop.rb +91 -0
- data/lib/uniprop/uniinteger.rb +16 -0
- data/lib/uniprop/unistring.rb +34 -0
- data/lib/uniprop/utils.rb +542 -0
- data/lib/uniprop/value_group.rb +276 -0
- data/lib/uniprop/version.rb +5 -0
- data/lib/uniprop.rb +29 -0
- data/sig/uniprop.rbs +4 -0
- data/uniprop.gemspec +39 -0
- metadata +75 -0
@@ -0,0 +1,276 @@
|
|
1
|
+
module UniProp
|
2
|
+
# codepointと値の関係の集合を扱うためのmodule
|
3
|
+
module ValueGroup
|
4
|
+
# @param [String/Integer] codepoint codepointを表す16進数のString、もしくはそれを10進数に変換したInteger
|
5
|
+
# @return [String/Array<String>]
|
6
|
+
def values_of(codepoint)
|
7
|
+
if codepoint.class == String
|
8
|
+
if UniPropUtils::TypeJudgementer.validate_codepoint(codepoint)
|
9
|
+
codepoint = codepoint.hex
|
10
|
+
else
|
11
|
+
return
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
codepoint_to_values[codepoint]
|
16
|
+
end
|
17
|
+
|
18
|
+
# オブジェクトに保存されているcodepointの種類を取得
|
19
|
+
# @return [Array<Integer>]
|
20
|
+
def codepoints
|
21
|
+
@codepoints ||= codepoint_to_values.keys
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [Hash<Object,Array<Integer>>]
|
25
|
+
# @note 複数の値を持つが、その中に特定のプロパティ値を含むコードポイントを探す場合、values_including_codepointsを使用
|
26
|
+
def values_to_codepoints
|
27
|
+
@values_to_codepoints ||= codepoints.group_by { |cp| codepoint_to_values[cp] }
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
# @return [Hash<Integer,String/Array<String>>]
|
32
|
+
# @note 扱いやすさの観点から、keyであるcodepointはStringではなくIntegerに変換して格納する(値を使用する頻度は値を追加する頻度に比べて多いので、String->Integerの変換を値の追加時に行う)
|
33
|
+
def codepoint_to_values
|
34
|
+
@codepoint_to_values ||= {}
|
35
|
+
end
|
36
|
+
|
37
|
+
# codepoint_to_valuesに値を定義。すでに値が存在する場合、値の形式をArrayに変換して値を追加。
|
38
|
+
# @param [Integer] codepoint codepointを10新数に変換したInteger
|
39
|
+
# @param [String] value
|
40
|
+
def add_single_value(codepoint, value)
|
41
|
+
if codepoint_to_values[codepoint]
|
42
|
+
# あるコードポイントに対応する値が1つの場合はStringで、2つ以上の場合はArray<String>で管理
|
43
|
+
if codepoint_to_values[codepoint].class==String
|
44
|
+
codepoint_to_values[codepoint] = [codepoint_to_values[codepoint]]
|
45
|
+
end
|
46
|
+
codepoint_to_values[codepoint] << value
|
47
|
+
else
|
48
|
+
codepoint_to_values[codepoint] = value
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# @param [String] codepoint nnnnまたはnnnn..nnnn形式のString
|
53
|
+
# @param [String] value
|
54
|
+
# @note このmoduleのオブジェクトの使用方法として、「インスタンス化処理と全ての値の追加処理を同時に行う」事を想定しているため、値の追加処理はprivateとしてある。
|
55
|
+
def add_value(codepoint, value)
|
56
|
+
codepoint = codepoint.gsub(/U\+/, '')
|
57
|
+
value = value.gsub(/U\+/, '')
|
58
|
+
|
59
|
+
if UniPropUtils::TypeJudgementer.validate_codepoint(codepoint)
|
60
|
+
cp = UniPropUtils::CodepointConverter.str_to_int(codepoint)
|
61
|
+
|
62
|
+
if cp.class==Range
|
63
|
+
cp.each { add_single_value(_1, value) }
|
64
|
+
else
|
65
|
+
add_single_value(cp, value)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# 複数の値を一気に追加する
|
71
|
+
# @param [String] codepoint nnnnまたはnnnn..nnnn形式のString
|
72
|
+
# @param [Array<String>/String] values Arrayの場合はvaluesの要素それぞれに、Stringの場合はvaluesに対し、add_valueが呼ばれる
|
73
|
+
def add_values(codepoint, values)
|
74
|
+
if values.class==Array
|
75
|
+
values.each { add_value(codepoint, _1) }
|
76
|
+
elsif values.class==String
|
77
|
+
add_value(codepoint, values)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
class BasePropertyValueGroup
|
83
|
+
include ValueGroup
|
84
|
+
|
85
|
+
# valueをプロパティ値に持つコードポイントを取得
|
86
|
+
# @note プロパティ値のエイリアスは考慮せず、単なる文字列の一致を確かめる
|
87
|
+
# @param [String] value
|
88
|
+
# @return [Array<Integer>]
|
89
|
+
def string_value_including_codepoints(value)
|
90
|
+
result = []
|
91
|
+
values_to_codepoints.each do |values, codepoints|
|
92
|
+
if values.include?(value)
|
93
|
+
result.concat(codepoints)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
result
|
97
|
+
end
|
98
|
+
|
99
|
+
# valueをプロパティ値に持つコードポイントを取得
|
100
|
+
# @note プロパティ値のエイリアスも加味して探索
|
101
|
+
# @param [String] value
|
102
|
+
# @return [Array<Integer>]
|
103
|
+
def value_including_codepoints(value)
|
104
|
+
# プロパティが列挙型の場合、プロパティ値の全エイリアスを確かめる
|
105
|
+
pvs = []
|
106
|
+
properties.each { pvs<<_1.find_property_value(value) if _1.has_property_value?(value) }
|
107
|
+
|
108
|
+
if pvs.empty?
|
109
|
+
string_value_including_codepoints(value)
|
110
|
+
else
|
111
|
+
# propertiesのプロパティに結びつくPropertyValueのうち
|
112
|
+
# valueをエイリアスに持つPropertyValueの全エイリアスに対し
|
113
|
+
# string_value_including_codepointsを実行して和集合を取る
|
114
|
+
result = []
|
115
|
+
pvs.each do |pv|
|
116
|
+
result |= pv.uncanonicaled_aliases
|
117
|
+
.map { string_value_including_codepoints(_1) }
|
118
|
+
.reduce([], :|)
|
119
|
+
end
|
120
|
+
result
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# @return [Array<Property>]
|
125
|
+
def properties
|
126
|
+
@properties ||= []
|
127
|
+
end
|
128
|
+
|
129
|
+
# @param [Property] property
|
130
|
+
def has_property?(property)
|
131
|
+
properties.include?(property)
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
# @param [Property] property
|
136
|
+
def add_property(property)
|
137
|
+
properties << property if property.class==Property
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
class PropertyValueGroup < BasePropertyValueGroup
|
142
|
+
# propfile内の特定のプロパティのcodeopintと値の対応を管理するためのオブジェクトを生成
|
143
|
+
# @param [PropFile] propfile
|
144
|
+
# @param [Array<Property>] props ブロックに含まれるプロパティ
|
145
|
+
# @param [Integer] block_no 一番最初のブロックを0とした時の、ブロックの番号
|
146
|
+
def initialize(propfile, props, block_no)
|
147
|
+
@propfile = propfile
|
148
|
+
@propfile_metadata = @propfile.version.version_metadata.find_propfile_metadata(@propfile)
|
149
|
+
|
150
|
+
if props.class==Array
|
151
|
+
props.each { add_property(_1) }
|
152
|
+
elsif props.class==Property
|
153
|
+
add_property(props)
|
154
|
+
end
|
155
|
+
|
156
|
+
block_range = @propfile_metadata.blocks[block_no].range
|
157
|
+
raw_block_content = @propfile_metadata.raw_blocks[block_no].content
|
158
|
+
codepoint_col_no = @propfile_metadata.codepoint_column_nos[block_no]
|
159
|
+
|
160
|
+
value_col_nos = []
|
161
|
+
properties.each { value_col_nos.concat(@propfile_metadata.property_column_nos(_1)[block_no]) }
|
162
|
+
|
163
|
+
add_block_values(
|
164
|
+
propfile.shaped_lines[block_range].to_a,
|
165
|
+
raw_block_content,
|
166
|
+
codepoint_col_no,
|
167
|
+
value_col_nos.uniq.sort
|
168
|
+
)
|
169
|
+
end
|
170
|
+
|
171
|
+
private
|
172
|
+
# add_block_valuesで使用するメソッド名を決定
|
173
|
+
# @param [Array<Object>] raw_content メタデータのraw_content項の値
|
174
|
+
# @param [Integer] codepoint_column_no codepointが記述されている列の番号
|
175
|
+
# @param [Array<Integer>] value_column_nos プロパティ値が記述されている列の番号
|
176
|
+
# @return [Symbol]
|
177
|
+
# @note codepoint_column_no,value_column_nosはどちらも最初の列を0列目としてカウントする
|
178
|
+
def value_add_method(raw_content, codepoint_column_no, value_column_nos)
|
179
|
+
# ブロックの形による使用メソッドの選択
|
180
|
+
case raw_content.values_at(*value_column_nos)
|
181
|
+
when ([
|
182
|
+
["codepoint", "Composition_Exclusion"]
|
183
|
+
])
|
184
|
+
return :adv_composition_exclusion
|
185
|
+
end
|
186
|
+
|
187
|
+
# プロパティの型による使用メソッドの選択
|
188
|
+
if properties.size==1
|
189
|
+
case properties[0].property_value_type
|
190
|
+
when :binary
|
191
|
+
return :adv_binary_property
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
# 特殊なメソッドを使用しない場合、デフォルトの値追加メソッドを使用
|
196
|
+
:default_add_block_values
|
197
|
+
end
|
198
|
+
|
199
|
+
# ブロックの構成によって追加方法を変えながら、ブロック内の全ての値を追加する
|
200
|
+
# @param [Array<Array<String>>] shaped_lines PropFile#shaped_linesのうち、hashの生成に使用する行の範囲の値
|
201
|
+
# @param [Array<Object>] raw_content
|
202
|
+
# @param [Integer] codepoint_column_no
|
203
|
+
# @param [Array<Integer>] value_column_nos
|
204
|
+
def add_block_values(shaped_lines, raw_content, codepoint_column_no, value_column_nos)
|
205
|
+
method_name = value_add_method(raw_content, codepoint_column_no, value_column_nos)
|
206
|
+
send(method_name, shaped_lines, codepoint_column_no, value_column_nos)
|
207
|
+
end
|
208
|
+
|
209
|
+
def default_add_block_values(shaped_lines, codepoint_column_no, value_column_nos)
|
210
|
+
shaped_lines.each do |shaped_line|
|
211
|
+
codepoint = shaped_line[codepoint_column_no]
|
212
|
+
values = shaped_line.values_at(*value_column_nos)
|
213
|
+
|
214
|
+
# codepoint, valuesともにnilを含まない場合、値を追加
|
215
|
+
if codepoint && values.all? { _1 }
|
216
|
+
add_values(codepoint, values)
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def adv_composition_exclusion(shaped_lines, codepoint_column_no, value_column_nos)
|
222
|
+
# CompositionExclusions.txtでは、BinaryプロパティComposition_ExclusionがTrueであるコードポイントだけが列挙されている
|
223
|
+
# そのため、ファイル内に記述のあるプロパティに対しては"True"の値をセットする(それ以外の値はmissingとして"False"が取得される)
|
224
|
+
shaped_lines.each do |shaped_line|
|
225
|
+
if shaped_line.size==1
|
226
|
+
add_value(shaped_line[0], "True")
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
def adv_binary_property(shaped_lines, codepoint_column_no, value_column_nos)
|
232
|
+
# binaryプロパティはデータファイルにプロパティ名が記述される
|
233
|
+
# そのため、データファイル内の値(プロパティ名)ではなく、"True"を値として追加
|
234
|
+
shaped_lines.each do |shaped_line|
|
235
|
+
codepoint = shaped_line[codepoint_column_no]
|
236
|
+
add_value(codepoint, "True") if codepoint
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
class UnihanValueGroup < BasePropertyValueGroup
|
242
|
+
# @param [Property] property
|
243
|
+
# @param [Array<Array<String>>] shaped_lines Unihanの中の、propertyに関するshaped_lines
|
244
|
+
def initialize(property, shaped_lines)
|
245
|
+
add_property(property)
|
246
|
+
shaped_lines.each { add_values(_1[0], _1[2..]) }
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
class BlockValueGroup
|
251
|
+
include ValueGroup
|
252
|
+
|
253
|
+
attr_reader :propfile
|
254
|
+
|
255
|
+
# propfile内の特定のブロックのcodeopintと値の対応を管理するためのオブジェクトを生成
|
256
|
+
# @param [PropFile] propfile
|
257
|
+
# @param [Integer] block_no 一番最初のブロックを0とした時の、ブロックの番号
|
258
|
+
def initialize(propfile, block_no)
|
259
|
+
@propfile = propfile
|
260
|
+
propfile_metadata = @propfile.version.version_metadata.find_propfile_metadata(@propfile)
|
261
|
+
|
262
|
+
block_range = propfile_metadata.blocks[block_no].range
|
263
|
+
codepoint_col_no = propfile_metadata.codepoint_column_nos[block_no]
|
264
|
+
|
265
|
+
@propfile.shaped_lines[block_range].each do |shaped_line|
|
266
|
+
shaped_line_dup = shaped_line.dup
|
267
|
+
codepoint = shaped_line_dup.delete_at(codepoint_col_no)
|
268
|
+
values = shaped_line_dup
|
269
|
+
|
270
|
+
if codepoint && values
|
271
|
+
add_values(codepoint, values)
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
data/lib/uniprop.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'json'
|
5
|
+
require 'zip'
|
6
|
+
|
7
|
+
require_relative "uniprop/version"
|
8
|
+
require_relative "uniprop/inspects"
|
9
|
+
require_relative "uniprop/downloader"
|
10
|
+
require_relative "uniprop/propdata"
|
11
|
+
require_relative "uniprop/unicode_elements"
|
12
|
+
require_relative "uniprop/efficient_elements"
|
13
|
+
require_relative "uniprop/utils"
|
14
|
+
require_relative "uniprop/metadata_processor"
|
15
|
+
require_relative "uniprop/value_group"
|
16
|
+
require_relative "uniprop/metadata_generator"
|
17
|
+
require_relative "uniprop/metadata_validator"
|
18
|
+
require_relative "uniprop/unicode_manager"
|
19
|
+
require_relative "uniprop/unihanprop"
|
20
|
+
require_relative "uniprop/errors"
|
21
|
+
require_relative "uniprop/dsl"
|
22
|
+
require_relative "uniprop/unistring"
|
23
|
+
require_relative "uniprop/uniinteger"
|
24
|
+
require_relative "uniprop/consts"
|
25
|
+
|
26
|
+
module Uniprop
|
27
|
+
class Error < StandardError; end
|
28
|
+
# Your code goes here...
|
29
|
+
end
|
data/sig/uniprop.rbs
ADDED
data/uniprop.gemspec
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/uniprop/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "uniprop"
|
7
|
+
spec.version = Uniprop::VERSION
|
8
|
+
spec.authors = ["psychoidx"]
|
9
|
+
spec.email = ["psychoidx99@gmail.com"]
|
10
|
+
|
11
|
+
spec.summary = "A Library for the Management and Analysis of Unicode Properties"
|
12
|
+
spec.homepage = "https://github.com/PsychoidX/UniProp"
|
13
|
+
spec.required_ruby_version = ">= 2.6.0"
|
14
|
+
|
15
|
+
spec.license = 'MIT'
|
16
|
+
|
17
|
+
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
18
|
+
|
19
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
20
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
21
|
+
spec.metadata["changelog_uri"] = "https://github.com/PsychoidX/UniProp/blob/master/CHANGELOG.md"
|
22
|
+
|
23
|
+
# Specify which files should be added to the gem when it is released.
|
24
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
25
|
+
spec.files = Dir.chdir(__dir__) do
|
26
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
27
|
+
(f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
|
28
|
+
end
|
29
|
+
end
|
30
|
+
spec.bindir = "exe"
|
31
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
32
|
+
spec.require_paths = ["lib"]
|
33
|
+
|
34
|
+
# Uncomment to register a new dependency of your gem
|
35
|
+
# spec.add_dependency "example-gem", "~> 1.0"
|
36
|
+
|
37
|
+
# For more information and examples about making a new gem, check out our
|
38
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: uniprop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- psychoidx
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-03-10 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email:
|
15
|
+
- psychoidx99@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- CHANGELOG.md
|
21
|
+
- Gemfile
|
22
|
+
- Gemfile.lock
|
23
|
+
- README.md
|
24
|
+
- Rakefile
|
25
|
+
- lib/resources/metadata.json
|
26
|
+
- lib/resources/settings.rb
|
27
|
+
- lib/uniprop.rb
|
28
|
+
- lib/uniprop/consts.rb
|
29
|
+
- lib/uniprop/downloader.rb
|
30
|
+
- lib/uniprop/dsl.rb
|
31
|
+
- lib/uniprop/efficient_elements.rb
|
32
|
+
- lib/uniprop/errors.rb
|
33
|
+
- lib/uniprop/inspects.rb
|
34
|
+
- lib/uniprop/metadata_generator.rb
|
35
|
+
- lib/uniprop/metadata_processor.rb
|
36
|
+
- lib/uniprop/metadata_validator.rb
|
37
|
+
- lib/uniprop/propdata.rb
|
38
|
+
- lib/uniprop/unicode_elements.rb
|
39
|
+
- lib/uniprop/unicode_manager.rb
|
40
|
+
- lib/uniprop/unihanprop.rb
|
41
|
+
- lib/uniprop/uniinteger.rb
|
42
|
+
- lib/uniprop/unistring.rb
|
43
|
+
- lib/uniprop/utils.rb
|
44
|
+
- lib/uniprop/value_group.rb
|
45
|
+
- lib/uniprop/version.rb
|
46
|
+
- sig/uniprop.rbs
|
47
|
+
- uniprop.gemspec
|
48
|
+
homepage: https://github.com/PsychoidX/UniProp
|
49
|
+
licenses:
|
50
|
+
- MIT
|
51
|
+
metadata:
|
52
|
+
allowed_push_host: https://rubygems.org
|
53
|
+
homepage_uri: https://github.com/PsychoidX/UniProp
|
54
|
+
source_code_uri: https://github.com/PsychoidX/UniProp
|
55
|
+
changelog_uri: https://github.com/PsychoidX/UniProp/blob/master/CHANGELOG.md
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 2.6.0
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubygems_version: 3.3.7
|
72
|
+
signing_key:
|
73
|
+
specification_version: 4
|
74
|
+
summary: A Library for the Management and Analysis of Unicode Properties
|
75
|
+
test_files: []
|