uniprop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,276 @@
1
+ module UniProp
2
+ # codepointと値の関係の集合を扱うためのmodule
3
+ module ValueGroup
4
+ # @param [String/Integer] codepoint codepointを表す16進数のString、もしくはそれを10進数に変換したInteger
5
+ # @return [String/Array<String>]
6
+ def values_of(codepoint)
7
+ if codepoint.class == String
8
+ if UniPropUtils::TypeJudgementer.validate_codepoint(codepoint)
9
+ codepoint = codepoint.hex
10
+ else
11
+ return
12
+ end
13
+ end
14
+
15
+ codepoint_to_values[codepoint]
16
+ end
17
+
18
+ # オブジェクトに保存されているcodepointの種類を取得
19
+ # @return [Array<Integer>]
20
+ def codepoints
21
+ @codepoints ||= codepoint_to_values.keys
22
+ end
23
+
24
+ # @return [Hash<Object,Array<Integer>>]
25
+ # @note 複数の値を持つが、その中に特定のプロパティ値を含むコードポイントを探す場合、values_including_codepointsを使用
26
+ def values_to_codepoints
27
+ @values_to_codepoints ||= codepoints.group_by { |cp| codepoint_to_values[cp] }
28
+ end
29
+
30
+ private
31
+ # @return [Hash<Integer,String/Array<String>>]
32
+ # @note 扱いやすさの観点から、keyであるcodepointはStringではなくIntegerに変換して格納する(値を使用する頻度は値を追加する頻度に比べて多いので、String->Integerの変換を値の追加時に行う)
33
+ def codepoint_to_values
34
+ @codepoint_to_values ||= {}
35
+ end
36
+
37
+ # codepoint_to_valuesに値を定義。すでに値が存在する場合、値の形式をArrayに変換して値を追加。
38
+ # @param [Integer] codepoint codepointを10新数に変換したInteger
39
+ # @param [String] value
40
+ def add_single_value(codepoint, value)
41
+ if codepoint_to_values[codepoint]
42
+ # あるコードポイントに対応する値が1つの場合はStringで、2つ以上の場合はArray<String>で管理
43
+ if codepoint_to_values[codepoint].class==String
44
+ codepoint_to_values[codepoint] = [codepoint_to_values[codepoint]]
45
+ end
46
+ codepoint_to_values[codepoint] << value
47
+ else
48
+ codepoint_to_values[codepoint] = value
49
+ end
50
+ end
51
+
52
+ # @param [String] codepoint nnnnまたはnnnn..nnnn形式のString
53
+ # @param [String] value
54
+ # @note このmoduleのオブジェクトの使用方法として、「インスタンス化処理と全ての値の追加処理を同時に行う」事を想定しているため、値の追加処理はprivateとしてある。
55
+ def add_value(codepoint, value)
56
+ codepoint = codepoint.gsub(/U\+/, '')
57
+ value = value.gsub(/U\+/, '')
58
+
59
+ if UniPropUtils::TypeJudgementer.validate_codepoint(codepoint)
60
+ cp = UniPropUtils::CodepointConverter.str_to_int(codepoint)
61
+
62
+ if cp.class==Range
63
+ cp.each { add_single_value(_1, value) }
64
+ else
65
+ add_single_value(cp, value)
66
+ end
67
+ end
68
+ end
69
+
70
+ # 複数の値を一気に追加する
71
+ # @param [String] codepoint nnnnまたはnnnn..nnnn形式のString
72
+ # @param [Array<String>/String] values Arrayの場合はvaluesの要素それぞれに、Stringの場合はvaluesに対し、add_valueが呼ばれる
73
+ def add_values(codepoint, values)
74
+ if values.class==Array
75
+ values.each { add_value(codepoint, _1) }
76
+ elsif values.class==String
77
+ add_value(codepoint, values)
78
+ end
79
+ end
80
+ end
81
+
82
+ class BasePropertyValueGroup
83
+ include ValueGroup
84
+
85
+ # valueをプロパティ値に持つコードポイントを取得
86
+ # @note プロパティ値のエイリアスは考慮せず、単なる文字列の一致を確かめる
87
+ # @param [String] value
88
+ # @return [Array<Integer>]
89
+ def string_value_including_codepoints(value)
90
+ result = []
91
+ values_to_codepoints.each do |values, codepoints|
92
+ if values.include?(value)
93
+ result.concat(codepoints)
94
+ end
95
+ end
96
+ result
97
+ end
98
+
99
+ # valueをプロパティ値に持つコードポイントを取得
100
+ # @note プロパティ値のエイリアスも加味して探索
101
+ # @param [String] value
102
+ # @return [Array<Integer>]
103
+ def value_including_codepoints(value)
104
+ # プロパティが列挙型の場合、プロパティ値の全エイリアスを確かめる
105
+ pvs = []
106
+ properties.each { pvs<<_1.find_property_value(value) if _1.has_property_value?(value) }
107
+
108
+ if pvs.empty?
109
+ string_value_including_codepoints(value)
110
+ else
111
+ # propertiesのプロパティに結びつくPropertyValueのうち
112
+ # valueをエイリアスに持つPropertyValueの全エイリアスに対し
113
+ # string_value_including_codepointsを実行して和集合を取る
114
+ result = []
115
+ pvs.each do |pv|
116
+ result |= pv.uncanonicaled_aliases
117
+ .map { string_value_including_codepoints(_1) }
118
+ .reduce([], :|)
119
+ end
120
+ result
121
+ end
122
+ end
123
+
124
+ # @return [Array<Property>]
125
+ def properties
126
+ @properties ||= []
127
+ end
128
+
129
+ # @param [Property] property
130
+ def has_property?(property)
131
+ properties.include?(property)
132
+ end
133
+
134
+ private
135
+ # @param [Property] property
136
+ def add_property(property)
137
+ properties << property if property.class==Property
138
+ end
139
+ end
140
+
141
+ class PropertyValueGroup < BasePropertyValueGroup
142
+ # propfile内の特定のプロパティのcodeopintと値の対応を管理するためのオブジェクトを生成
143
+ # @param [PropFile] propfile
144
+ # @param [Array<Property>] props ブロックに含まれるプロパティ
145
+ # @param [Integer] block_no 一番最初のブロックを0とした時の、ブロックの番号
146
+ def initialize(propfile, props, block_no)
147
+ @propfile = propfile
148
+ @propfile_metadata = @propfile.version.version_metadata.find_propfile_metadata(@propfile)
149
+
150
+ if props.class==Array
151
+ props.each { add_property(_1) }
152
+ elsif props.class==Property
153
+ add_property(props)
154
+ end
155
+
156
+ block_range = @propfile_metadata.blocks[block_no].range
157
+ raw_block_content = @propfile_metadata.raw_blocks[block_no].content
158
+ codepoint_col_no = @propfile_metadata.codepoint_column_nos[block_no]
159
+
160
+ value_col_nos = []
161
+ properties.each { value_col_nos.concat(@propfile_metadata.property_column_nos(_1)[block_no]) }
162
+
163
+ add_block_values(
164
+ propfile.shaped_lines[block_range].to_a,
165
+ raw_block_content,
166
+ codepoint_col_no,
167
+ value_col_nos.uniq.sort
168
+ )
169
+ end
170
+
171
+ private
172
+ # add_block_valuesで使用するメソッド名を決定
173
+ # @param [Array<Object>] raw_content メタデータのraw_content項の値
174
+ # @param [Integer] codepoint_column_no codepointが記述されている列の番号
175
+ # @param [Array<Integer>] value_column_nos プロパティ値が記述されている列の番号
176
+ # @return [Symbol]
177
+ # @note codepoint_column_no,value_column_nosはどちらも最初の列を0列目としてカウントする
178
+ def value_add_method(raw_content, codepoint_column_no, value_column_nos)
179
+ # ブロックの形による使用メソッドの選択
180
+ case raw_content.values_at(*value_column_nos)
181
+ when ([
182
+ ["codepoint", "Composition_Exclusion"]
183
+ ])
184
+ return :adv_composition_exclusion
185
+ end
186
+
187
+ # プロパティの型による使用メソッドの選択
188
+ if properties.size==1
189
+ case properties[0].property_value_type
190
+ when :binary
191
+ return :adv_binary_property
192
+ end
193
+ end
194
+
195
+ # 特殊なメソッドを使用しない場合、デフォルトの値追加メソッドを使用
196
+ :default_add_block_values
197
+ end
198
+
199
+ # ブロックの構成によって追加方法を変えながら、ブロック内の全ての値を追加する
200
+ # @param [Array<Array<String>>] shaped_lines PropFile#shaped_linesのうち、hashの生成に使用する行の範囲の値
201
+ # @param [Array<Object>] raw_content
202
+ # @param [Integer] codepoint_column_no
203
+ # @param [Array<Integer>] value_column_nos
204
+ def add_block_values(shaped_lines, raw_content, codepoint_column_no, value_column_nos)
205
+ method_name = value_add_method(raw_content, codepoint_column_no, value_column_nos)
206
+ send(method_name, shaped_lines, codepoint_column_no, value_column_nos)
207
+ end
208
+
209
+ def default_add_block_values(shaped_lines, codepoint_column_no, value_column_nos)
210
+ shaped_lines.each do |shaped_line|
211
+ codepoint = shaped_line[codepoint_column_no]
212
+ values = shaped_line.values_at(*value_column_nos)
213
+
214
+ # codepoint, valuesともにnilを含まない場合、値を追加
215
+ if codepoint && values.all? { _1 }
216
+ add_values(codepoint, values)
217
+ end
218
+ end
219
+ end
220
+
221
+ def adv_composition_exclusion(shaped_lines, codepoint_column_no, value_column_nos)
222
+ # CompositionExclusions.txtでは、BinaryプロパティComposition_ExclusionがTrueであるコードポイントだけが列挙されている
223
+ # そのため、ファイル内に記述のあるプロパティに対しては"True"の値をセットする(それ以外の値はmissingとして"False"が取得される)
224
+ shaped_lines.each do |shaped_line|
225
+ if shaped_line.size==1
226
+ add_value(shaped_line[0], "True")
227
+ end
228
+ end
229
+ end
230
+
231
+ def adv_binary_property(shaped_lines, codepoint_column_no, value_column_nos)
232
+ # binaryプロパティはデータファイルにプロパティ名が記述される
233
+ # そのため、データファイル内の値(プロパティ名)ではなく、"True"を値として追加
234
+ shaped_lines.each do |shaped_line|
235
+ codepoint = shaped_line[codepoint_column_no]
236
+ add_value(codepoint, "True") if codepoint
237
+ end
238
+ end
239
+ end
240
+
241
+ class UnihanValueGroup < BasePropertyValueGroup
242
+ # @param [Property] property
243
+ # @param [Array<Array<String>>] shaped_lines Unihanの中の、propertyに関するshaped_lines
244
+ def initialize(property, shaped_lines)
245
+ add_property(property)
246
+ shaped_lines.each { add_values(_1[0], _1[2..]) }
247
+ end
248
+ end
249
+
250
+ class BlockValueGroup
251
+ include ValueGroup
252
+
253
+ attr_reader :propfile
254
+
255
+ # propfile内の特定のブロックのcodeopintと値の対応を管理するためのオブジェクトを生成
256
+ # @param [PropFile] propfile
257
+ # @param [Integer] block_no 一番最初のブロックを0とした時の、ブロックの番号
258
+ def initialize(propfile, block_no)
259
+ @propfile = propfile
260
+ propfile_metadata = @propfile.version.version_metadata.find_propfile_metadata(@propfile)
261
+
262
+ block_range = propfile_metadata.blocks[block_no].range
263
+ codepoint_col_no = propfile_metadata.codepoint_column_nos[block_no]
264
+
265
+ @propfile.shaped_lines[block_range].each do |shaped_line|
266
+ shaped_line_dup = shaped_line.dup
267
+ codepoint = shaped_line_dup.delete_at(codepoint_col_no)
268
+ values = shaped_line_dup
269
+
270
+ if codepoint && values
271
+ add_values(codepoint, values)
272
+ end
273
+ end
274
+ end
275
+ end
276
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Uniprop
4
+ VERSION = "0.1.0"
5
+ end
data/lib/uniprop.rb ADDED
@@ -0,0 +1,29 @@
1
+ require 'set'
2
+ require 'open-uri'
3
+ require 'nokogiri'
4
+ require 'json'
5
+ require 'zip'
6
+
7
+ require_relative "uniprop/version"
8
+ require_relative "uniprop/inspects"
9
+ require_relative "uniprop/downloader"
10
+ require_relative "uniprop/propdata"
11
+ require_relative "uniprop/unicode_elements"
12
+ require_relative "uniprop/efficient_elements"
13
+ require_relative "uniprop/utils"
14
+ require_relative "uniprop/metadata_processor"
15
+ require_relative "uniprop/value_group"
16
+ require_relative "uniprop/metadata_generator"
17
+ require_relative "uniprop/metadata_validator"
18
+ require_relative "uniprop/unicode_manager"
19
+ require_relative "uniprop/unihanprop"
20
+ require_relative "uniprop/errors"
21
+ require_relative "uniprop/dsl"
22
+ require_relative "uniprop/unistring"
23
+ require_relative "uniprop/uniinteger"
24
+ require_relative "uniprop/consts"
25
+
26
+ module Uniprop
27
+ class Error < StandardError; end
28
+ # Your code goes here...
29
+ end
data/sig/uniprop.rbs ADDED
@@ -0,0 +1,4 @@
1
+ module Uniprop
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
data/uniprop.gemspec ADDED
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/uniprop/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "uniprop"
7
+ spec.version = Uniprop::VERSION
8
+ spec.authors = ["psychoidx"]
9
+ spec.email = ["psychoidx99@gmail.com"]
10
+
11
+ spec.summary = "A Library for the Management and Analysis of Unicode Properties"
12
+ spec.homepage = "https://github.com/PsychoidX/UniProp"
13
+ spec.required_ruby_version = ">= 2.6.0"
14
+
15
+ spec.license = 'MIT'
16
+
17
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
18
+
19
+ spec.metadata["homepage_uri"] = spec.homepage
20
+ spec.metadata["source_code_uri"] = spec.homepage
21
+ spec.metadata["changelog_uri"] = "https://github.com/PsychoidX/UniProp/blob/master/CHANGELOG.md"
22
+
23
+ # Specify which files should be added to the gem when it is released.
24
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
25
+ spec.files = Dir.chdir(__dir__) do
26
+ `git ls-files -z`.split("\x0").reject do |f|
27
+ (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
28
+ end
29
+ end
30
+ spec.bindir = "exe"
31
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
32
+ spec.require_paths = ["lib"]
33
+
34
+ # Uncomment to register a new dependency of your gem
35
+ # spec.add_dependency "example-gem", "~> 1.0"
36
+
37
+ # For more information and examples about making a new gem, check out our
38
+ # guide at: https://bundler.io/guides/creating_gem.html
39
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: uniprop
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - psychoidx
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2023-03-10 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - psychoidx99@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - CHANGELOG.md
21
+ - Gemfile
22
+ - Gemfile.lock
23
+ - README.md
24
+ - Rakefile
25
+ - lib/resources/metadata.json
26
+ - lib/resources/settings.rb
27
+ - lib/uniprop.rb
28
+ - lib/uniprop/consts.rb
29
+ - lib/uniprop/downloader.rb
30
+ - lib/uniprop/dsl.rb
31
+ - lib/uniprop/efficient_elements.rb
32
+ - lib/uniprop/errors.rb
33
+ - lib/uniprop/inspects.rb
34
+ - lib/uniprop/metadata_generator.rb
35
+ - lib/uniprop/metadata_processor.rb
36
+ - lib/uniprop/metadata_validator.rb
37
+ - lib/uniprop/propdata.rb
38
+ - lib/uniprop/unicode_elements.rb
39
+ - lib/uniprop/unicode_manager.rb
40
+ - lib/uniprop/unihanprop.rb
41
+ - lib/uniprop/uniinteger.rb
42
+ - lib/uniprop/unistring.rb
43
+ - lib/uniprop/utils.rb
44
+ - lib/uniprop/value_group.rb
45
+ - lib/uniprop/version.rb
46
+ - sig/uniprop.rbs
47
+ - uniprop.gemspec
48
+ homepage: https://github.com/PsychoidX/UniProp
49
+ licenses:
50
+ - MIT
51
+ metadata:
52
+ allowed_push_host: https://rubygems.org
53
+ homepage_uri: https://github.com/PsychoidX/UniProp
54
+ source_code_uri: https://github.com/PsychoidX/UniProp
55
+ changelog_uri: https://github.com/PsychoidX/UniProp/blob/master/CHANGELOG.md
56
+ post_install_message:
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 2.6.0
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubygems_version: 3.3.7
72
+ signing_key:
73
+ specification_version: 4
74
+ summary: A Library for the Management and Analysis of Unicode Properties
75
+ test_files: []