yosina 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +36 -0
  3. data/Gemfile +6 -0
  4. data/README.ja.md +229 -0
  5. data/README.md +229 -0
  6. data/Rakefile +30 -0
  7. data/codegen/dataset.rb +215 -0
  8. data/codegen/emitters/circled_or_squared_transliterator_data.rb +30 -0
  9. data/codegen/emitters/combined_transliterator_data.rb +28 -0
  10. data/codegen/emitters/hyphens_transliterator_data.rb +48 -0
  11. data/codegen/emitters/ivs_svs_base_transliterator_data.rb +121 -0
  12. data/codegen/emitters/simple_transliterator.rb +76 -0
  13. data/codegen/emitters/utils.rb +45 -0
  14. data/codegen/emitters.rb +8 -0
  15. data/codegen/main.rb +109 -0
  16. data/lib/yosina/char.rb +65 -0
  17. data/lib/yosina/chars.rb +152 -0
  18. data/lib/yosina/recipes.rb +359 -0
  19. data/lib/yosina/transliterator.rb +49 -0
  20. data/lib/yosina/transliterators/circled_or_squared.rb +67 -0
  21. data/lib/yosina/transliterators/circled_or_squared_data.rb +469 -0
  22. data/lib/yosina/transliterators/combined.rb +52 -0
  23. data/lib/yosina/transliterators/combined_data.rb +495 -0
  24. data/lib/yosina/transliterators/hira_kata.rb +106 -0
  25. data/lib/yosina/transliterators/hira_kata_composition.rb +103 -0
  26. data/lib/yosina/transliterators/hira_kata_table.rb +116 -0
  27. data/lib/yosina/transliterators/hyphens.rb +83 -0
  28. data/lib/yosina/transliterators/hyphens_data.rb +60 -0
  29. data/lib/yosina/transliterators/ideographic_annotations.rb +73 -0
  30. data/lib/yosina/transliterators/ivs_svs_base.rb +169 -0
  31. data/lib/yosina/transliterators/ivs_svs_base_data.rb +0 -0
  32. data/lib/yosina/transliterators/japanese_iteration_marks.rb +261 -0
  33. data/lib/yosina/transliterators/jisx0201_and_alike.rb +451 -0
  34. data/lib/yosina/transliterators/kanji_old_new.rb +1137 -0
  35. data/lib/yosina/transliterators/mathematical_alphanumerics.rb +799 -0
  36. data/lib/yosina/transliterators/prolonged_sound_marks.rb +206 -0
  37. data/lib/yosina/transliterators/radicals.rb +361 -0
  38. data/lib/yosina/transliterators/spaces.rb +79 -0
  39. data/lib/yosina/transliterators.rb +57 -0
  40. data/lib/yosina/version.rb +5 -0
  41. data/lib/yosina.rb +62 -0
  42. data/yosina.gemspec +41 -0
  43. metadata +159 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 79e70c5202973c9c7c6e2c5b5de4538422eb9323071dbaabc6563be241129a44
4
+ data.tar.gz: bf615675a3b77c5b330ac5da9132e2df90d92911d3763ae8c3bcc612aa894289
5
+ SHA512:
6
+ metadata.gz: 9acbf40e48148bd8cd6db88b02df70364a840c5b8a283df61256e838db9990bff8b73b1a30e95e892c20cbe5542a8d21d870284a7779cbc833043fd3aef612b7
7
+ data.tar.gz: 64e0f480547d54d14318f677ac459905209b6365096623cfe849a21740a1150a3c263c950b37f62dd30b82dce55b1a2e89e2c5def130736a1ac4b44d1c21acc4
data/.rubocop.yml ADDED
@@ -0,0 +1,36 @@
1
+ plugins:
2
+ # - rubocop-rake
3
+ - rubocop-minitest
4
+ AllCops:
5
+ Include:
6
+ - lib/**/*.rb
7
+ - codegen/**/*.rb
8
+ - Gemfile
9
+ - "*.gemspec"
10
+ Exclude:
11
+ - lib/yosina/transliterators/hyphens_data.rb
12
+ - lib/yosina/transliterators/ivs_svs_base_data.rb
13
+ - lib/yosina/transliterators/combined_data.rb
14
+ - lib/yosina/transliterators/circled_or_squared_data.rb
15
+ - vendor/**/*
16
+ Naming/VariableNumber:
17
+ Exclude:
18
+ - codegen/**/*.rb
19
+ Metrics/ModuleLength:
20
+ Enabled: false
21
+ Metrics/MethodLength:
22
+ Enabled: false
23
+ Metrics/BlockLength:
24
+ Enabled: false
25
+ Metrics/ClassLength:
26
+ Max: 200
27
+ Metrics/AbcSize:
28
+ Enabled: false
29
+ Metrics/CyclomaticComplexity:
30
+ Enabled: false
31
+ Metrics/PerceivedComplexity:
32
+ Enabled: false
33
+ Metrics/BlockNesting:
34
+ Enabled: false
35
+ Style/WordArray:
36
+ Enabled: false
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in yosina.gemspec
6
+ gemspec
data/README.ja.md ADDED
@@ -0,0 +1,229 @@
1
+ # Yosina Ruby
2
+
3
+ Yosina日本語テキスト翻字ライブラリのRubyポート。
4
+
5
+ ## 概要
6
+
7
+ Yosinaは、日本語テキスト処理でよく必要とされる様々なテキスト正規化および変換機能を提供する日本語テキスト翻字ライブラリです。
8
+
9
+ ## 使用方法
10
+
11
+ ```ruby
12
+ require 'yosina'
13
+
14
+ # レシピを使用してトランスリテレータを作成
15
+ recipe = Yosina::TransliterationRecipe.new(
16
+ replace_spaces: true,
17
+ kanji_old_new: true,
18
+ replace_circled_or_squared_characters: true,
19
+ replace_combined_characters: true,
20
+ hira_kata: "hira-to-kata", # ひらがなをカタカナに変換
21
+ replace_japanese_iteration_marks: true, # 繰り返し記号を展開
22
+ to_fullwidth: true
23
+ )
24
+
25
+ transliterator = Yosina.make_transliterator(recipe)
26
+
27
+ # 様々な特殊文字でテキストを翻字
28
+ input = "①②③ ⒶⒷⒸ ㍿㍑㌠㋿" # 丸囲み数字、文字、表意文字空白、結合文字
29
+ result = transliterator.call(input)
30
+ puts result # "(1)(2)(3) (A)(B)(C) 株式会社リットルサンチーム令和"
31
+
32
+ # 旧字体を新字体に変換
33
+ old_kanji = "舊字體"
34
+ result = transliterator.call(old_kanji)
35
+ puts result # "旧字体"
36
+
37
+ # 半角カタカナを全角に変換
38
+ half_width = "テストモジレツ"
39
+ result = transliterator.call(half_width)
40
+ puts result # "テストモジレツ"
41
+
42
+ # ひらがなからカタカナへの変換と繰り返し記号のデモ
43
+ mixed_text = "学問のすゝめ"
44
+ result = transliterator.call(mixed_text)
45
+ puts result # "学問ノススメ"
46
+ ```
47
+
48
+ ### 設定を使用した高度な使用方法
49
+
50
+ ```ruby
51
+ require 'yosina'
52
+
53
+ # 特定の設定でトランスリテレータを作成
54
+ configs = [
55
+ Yosina::TransliteratorConfig.new('spaces'),
56
+ Yosina::TransliteratorConfig.new('kanji-old-new'),
57
+ Yosina::TransliteratorConfig.new('radicals'),
58
+ Yosina::TransliteratorConfig.new('circled-or-squared'),
59
+ Yosina::TransliteratorConfig.new('combined'),
60
+ Yosina::TransliteratorConfig.new('hira-kata', { mode: 'kata-to-hira' }), # カタカナをひらがなに変換
61
+ Yosina::TransliteratorConfig.new('japanese-iteration-marks') # 々、ゝゞ、ヽヾなどの繰り返し記号を展開
62
+ ]
63
+
64
+ transliterator = Yosina.make_transliterator(configs)
65
+
66
+ # 新しい変換を含む様々な変換の例
67
+ input_text = "カタカナでの時々の佐々木さん"
68
+ result = transliterator.call(input_text)
69
+ puts result # "かたかなでの時時の佐佐木さん"
70
+ ```
71
+
72
+ ### 文字列名を使用
73
+
74
+ ```ruby
75
+ require 'yosina'
76
+
77
+ # 文字列名での簡略化された設定
78
+ configs = ['spaces', 'kanji-old-new', 'radicals']
79
+
80
+ transliterator = Yosina.make_transliterator(configs)
81
+ result = transliterator.call("日本語のテキスト")
82
+ puts result
83
+ ```
84
+
85
+ ### 個別のトランスリテレータを使用
86
+
87
+ ```ruby
88
+ require 'yosina'
89
+
90
+ # 丸囲み・角囲みトランスリテレータを作成
91
+ circled_factory = Yosina::Transliterators::CircledOrSquared
92
+ circled_transliterator = circled_factory.call
93
+
94
+ chars = Yosina::Chars.build_char_array("①②③ⒶⒷⒸ")
95
+ result_chars = circled_transliterator.call(chars)
96
+ output = Yosina::Chars.from_chars(result_chars)
97
+ puts output # "123ABC"
98
+
99
+ # 結合トランスリテレータを作成
100
+ combined_factory = Yosina::Transliterators::Combined
101
+ combined_transliterator = combined_factory.call
102
+
103
+ chars2 = Yosina::Chars.build_char_array("㍿㍑㌠㋿") # 結合文字
104
+ result_chars2 = combined_transliterator.call(chars2)
105
+ output2 = Yosina::Chars.from_chars(result_chars2)
106
+ puts output2 # "株式会社リットルサンチーム令和"
107
+ ```
108
+
109
+ ## インストール
110
+
111
+ アプリケーションのGemfileに以下の行を追加:
112
+
113
+ ```ruby
114
+ gem 'yosina'
115
+ ```
116
+
117
+ その後、実行:
118
+
119
+ $ bundle install
120
+
121
+ または自分でインストール:
122
+
123
+ $ gem install yosina
124
+
125
+ ## 利用可能なトランスリテレータ
126
+
127
+ ### 1. **丸囲み・角囲み文字** (`circled-or-squared`)
128
+ 丸囲みや角囲みの文字を通常の文字に変換します。
129
+ - オプション: `templates` (カスタムレンダリング)、`includeEmojis` (絵文字を含める)
130
+ - 例: `①②③` → `(1)(2)(3)`、`㊙㊗` → `(秘)(祝)`
131
+
132
+ ### 2. **結合文字** (`combined`)
133
+ 結合文字を個別の文字シーケンスに展開します。
134
+ - 例: `㍻` (平成) → `平成`、`㈱` → `(株)`
135
+
136
+ ### 3. **ひらがな・カタカナ合成** (`hira-kata-composition`)
137
+ 分解されたひらがなとカタカナを合成された等価文字に結合します。
138
+ - オプション: `composeNonCombiningMarks` (非結合マークを合成)
139
+ - 例: `か + ゙` → `が`、`ヘ + ゜` → `ペ`
140
+
141
+ ### 4. **ひらがな・カタカナ** (`hira-kata`)
142
+ ひらがなとカタカナの間で双方向に変換します。
143
+ - オプション: `mode` ("hira-to-kata" または "kata-to-hira")
144
+ - 例: `ひらがな` → `ヒラガナ` (hira-to-kata)
145
+
146
+ ### 5. **ハイフン** (`hyphens`)
147
+ 様々なダッシュ・ハイフン記号を日本語で一般的に使用されるものに置き換えます。
148
+ - オプション: `precedence` (マッピング優先順位)
149
+ - 利用可能なマッピング: "ascii"、"jisx0201"、"jisx0208_90"、"jisx0208_90_windows"、"jisx0208_verbatim"
150
+ - 例: `2019—2020` (emダッシュ) → `2019-2020`
151
+
152
+ ### 6. **表意文字注釈** (`ideographic-annotations`)
153
+ 伝統的な中国語から日本語への翻訳で使用される表意文字注釈を置き換えます。
154
+ - 例: `㆖㆘` → `上下`
155
+
156
+ ### 7. **IVS-SVSベース** (`ivs-svs-base`)
157
+ 表意文字異体字セレクタ(IVS)と標準化異体字セレクタ(SVS)を処理します。
158
+ - オプション: `charset`、`mode` ("ivs-or-svs" または "base")、`preferSVS`、`dropSelectorsAltogether`
159
+ - 例: `葛󠄀` (葛 + IVS) → `葛`
160
+
161
+ ### 8. **日本語繰り返し記号** (`japanese-iteration-marks`)
162
+ 繰り返し記号を前の文字を繰り返すことで展開します。
163
+ - 例: `時々` → `時時`、`いすゞ` → `いすず`
164
+
165
+ ### 9. **JIS X 0201および類似** (`jisx0201-and-alike`)
166
+ 半角・全角文字変換を処理します。
167
+ - オプション: `fullwidthToHalfwidth`、`convertGL` (英数字/記号)、`convertGR` (カタカナ)、`u005cAsYenSign`
168
+ - 例: `ABC123` → `ABC123`、`カタカナ` → `カタカナ`
169
+
170
+ ### 10. **旧字体・新字体** (`kanji-old-new`)
171
+ 旧字体の漢字を新字体に変換します。
172
+ - 例: `舊字體の變換` → `旧字体の変換`
173
+
174
+ ### 11. **数学英数記号** (`mathematical-alphanumerics`)
175
+ 数学英数記号を通常のASCIIに正規化します。
176
+ - 例: `𝐀𝐁𝐂` (数学太字) → `ABC`
177
+
178
+ ### 12. **長音記号** (`prolonged-sound-marks`)
179
+ ハイフンと長音記号の間の文脈的な変換を処理します。
180
+ - オプション: `skipAlreadyTransliteratedChars`、`allowProlongedHatsuon`、`allowProlongedSokuon`、`replaceProlongedMarksFollowingAlnums`
181
+ - 例: `イ−ハト−ヴォ` (ハイフン付き) → `イーハトーヴォ` (長音記号)
182
+
183
+ ### 13. **部首** (`radicals`)
184
+ CJK部首文字を対応する表意文字に変換します。
185
+ - 例: `⾔⾨⾷` (康熙部首) → `言門食`
186
+
187
+ ### 14. **空白** (`spaces`)
188
+ 様々なUnicode空白文字を標準ASCII空白に正規化します。
189
+ - 例: `A B` (表意文字空白) → `A B`
190
+
191
+ ## 開発
192
+
193
+ リポジトリをチェックアウトした後、`bundle install`を実行して依存関係をインストールします。
194
+
195
+ ### コード生成
196
+
197
+ 一部のトランスリテレータはデータファイルから生成されます:
198
+
199
+ ```bash
200
+ rake codegen
201
+ ```
202
+
203
+ これにより、`../data`ディレクトリのJSONデータファイルからトランスリテレータが生成されます。
204
+
205
+ ### テスト
206
+
207
+ テストスイートを実行:
208
+
209
+ ```bash
210
+ rake test
211
+ ```
212
+
213
+ または特定のテストを実行:
214
+
215
+ ```bash
216
+ ruby test/test_basic.rb
217
+ ```
218
+
219
+ ## 要件
220
+
221
+ - Ruby 3.0以降のバージョン
222
+
223
+ ## 貢献
224
+
225
+ バグレポートとプルリクエストは、GitHubのhttps://github.com/yosina-lib/yosinaで歓迎します。
226
+
227
+ ## ライセンス
228
+
229
+ gemは[MITライセンス](https://opensource.org/licenses/MIT)の条件の下でオープンソースとして利用可能です。
data/README.md ADDED
@@ -0,0 +1,229 @@
1
+ # Yosina Ruby
2
+
3
+ A Ruby port of the Yosina Japanese text transliteration library.
4
+
5
+ ## Overview
6
+
7
+ Yosina is a library for Japanese text transliteration that provides various text normalization and conversion features commonly needed when processing Japanese text.
8
+
9
+ ## Usage
10
+
11
+ ```ruby
12
+ require 'yosina'
13
+
14
+ # Create a transliterator using a recipe
15
+ recipe = Yosina::TransliterationRecipe.new(
16
+ replace_spaces: true,
17
+ kanji_old_new: true,
18
+ replace_circled_or_squared_characters: true,
19
+ replace_combined_characters: true,
20
+ hira_kata: "hira-to-kata", # Convert hiragana to katakana
21
+ replace_japanese_iteration_marks: true, # Expand iteration marks
22
+ to_fullwidth: true
23
+ )
24
+
25
+ transliterator = Yosina.make_transliterator(recipe)
26
+
27
+ # Use it to transliterate text with various special characters
28
+ input = "①②③ ⒶⒷⒸ ㍿㍑㌠㋿" # circled numbers, letters, ideographic space, combined characters
29
+ result = transliterator.call(input)
30
+ puts result # "(1)(2)(3) (A)(B)(C) 株式会社リットルサンチーム令和"
31
+
32
+ # Convert old kanji to new
33
+ old_kanji = "舊字體"
34
+ result = transliterator.call(old_kanji)
35
+ puts result # "旧字体"
36
+
37
+ # Convert half-width katakana to full-width
38
+ half_width = "テストモジレツ"
39
+ result = transliterator.call(half_width)
40
+ puts result # "テストモジレツ"
41
+
42
+ # Demonstrate hiragana to katakana conversion with iteration marks
43
+ mixed_text = "学問のすゝめ"
44
+ result = transliterator.call(mixed_text)
45
+ puts result # "学問ノススメ"
46
+ ```
47
+
48
+ ### Advanced Usage with Configs
49
+
50
+ ```ruby
51
+ require 'yosina'
52
+
53
+ # Create transliterator with specific configurations
54
+ configs = [
55
+ Yosina::TransliteratorConfig.new('spaces'),
56
+ Yosina::TransliteratorConfig.new('kanji-old-new'),
57
+ Yosina::TransliteratorConfig.new('radicals'),
58
+ Yosina::TransliteratorConfig.new('circled-or-squared'),
59
+ Yosina::TransliteratorConfig.new('combined'),
60
+ Yosina::TransliteratorConfig.new('hira-kata', { mode: 'kata-to-hira' }), # Convert katakana to hiragana
61
+ Yosina::TransliteratorConfig.new('japanese-iteration-marks') # Expand iteration marks like 々, ゝゞ, ヽヾ
62
+ ]
63
+
64
+ transliterator = Yosina.make_transliterator(configs)
65
+
66
+ # Example with various transformations including the new ones
67
+ input_text = "カタカナでの時々の佐々木さん"
68
+ result = transliterator.call(input_text)
69
+ puts result # "かたかなでの時時の佐佐木さん"
70
+ ```
71
+
72
+ ### Using String Names
73
+
74
+ ```ruby
75
+ require 'yosina'
76
+
77
+ # Simplified configuration with string names
78
+ configs = ['spaces', 'kanji-old-new', 'radicals']
79
+
80
+ transliterator = Yosina.make_transliterator(configs)
81
+ result = transliterator.call("some japanese text")
82
+ puts result
83
+ ```
84
+
85
+ ### Using Individual Transliterators
86
+
87
+ ```ruby
88
+ require 'yosina'
89
+
90
+ # Create a circled-or-squared transliterator
91
+ circled_factory = Yosina::Transliterators::CircledOrSquared
92
+ circled_transliterator = circled_factory.call
93
+
94
+ chars = Yosina::Chars.build_char_array("①②③ⒶⒷⒸ")
95
+ result_chars = circled_transliterator.call(chars)
96
+ output = Yosina::Chars.from_chars(result_chars)
97
+ puts output # "123ABC"
98
+
99
+ # Create a combined transliterator
100
+ combined_factory = Yosina::Transliterators::Combined
101
+ combined_transliterator = combined_factory.call
102
+
103
+ chars2 = Yosina::Chars.build_char_array("㍿㍑㌠㋿") # combined characters
104
+ result_chars2 = combined_transliterator.call(chars2)
105
+ output2 = Yosina::Chars.from_chars(result_chars2)
106
+ puts output2 # "株式会社リットルサンチーム令和"
107
+ ```
108
+
109
+ ## Requirements
110
+
111
+ - Ruby 3.0 and later versions
112
+
113
+ ## Installation
114
+
115
+ Add this line to your application's Gemfile:
116
+
117
+ ```ruby
118
+ gem 'yosina'
119
+ ```
120
+
121
+ And then execute:
122
+
123
+ $ bundle install
124
+
125
+ Or install it yourself as:
126
+
127
+ $ gem install yosina
128
+
129
+ ## Available Transliterators
130
+
131
+ ### 1. **Circled or Squared** (`circled-or-squared`)
132
+ Converts circled or squared characters to their plain equivalents.
133
+ - Options: `templates` (custom rendering), `includeEmojis` (include emoji characters)
134
+ - Example: `①②③` → `(1)(2)(3)`, `㊙㊗` → `(秘)(祝)`
135
+
136
+ ### 2. **Combined** (`combined`)
137
+ Expands combined characters into their individual character sequences.
138
+ - Example: `㍻` (Heisei era) → `平成`, `㈱` → `(株)`
139
+
140
+ ### 3. **Hiragana-Katakana Composition** (`hira-kata-composition`)
141
+ Combines decomposed hiraganas and katakanas into composed equivalents.
142
+ - Options: `composeNonCombiningMarks` (compose non-combining marks)
143
+ - Example: `か + ゙` → `が`, `ヘ + ゜` → `ペ`
144
+
145
+ ### 4. **Hiragana-Katakana** (`hira-kata`)
146
+ Converts between hiragana and katakana scripts bidirectionally.
147
+ - Options: `mode` ("hira-to-kata" or "kata-to-hira")
148
+ - Example: `ひらがな` → `ヒラガナ` (hira-to-kata)
149
+
150
+ ### 5. **Hyphens** (`hyphens`)
151
+ Replaces various dash/hyphen symbols with common ones used in Japanese.
152
+ - Options: `precedence` (mapping priority order)
153
+ - Available mappings: "ascii", "jisx0201", "jisx0208_90", "jisx0208_90_windows", "jisx0208_verbatim"
154
+ - Example: `2019—2020` (em dash) → `2019-2020`
155
+
156
+ ### 6. **Ideographic Annotations** (`ideographic-annotations`)
157
+ Replaces ideographic annotations used in traditional Chinese-to-Japanese translation.
158
+ - Example: `㆖㆘` → `上下`
159
+
160
+ ### 7. **IVS-SVS Base** (`ivs-svs-base`)
161
+ Handles Ideographic and Standardized Variation Selectors.
162
+ - Options: `charset`, `mode` ("ivs-or-svs" or "base"), `preferSVS`, `dropSelectorsAltogether`
163
+ - Example: `葛󠄀` (葛 + IVS) → `葛`
164
+
165
+ ### 8. **Japanese Iteration Marks** (`japanese-iteration-marks`)
166
+ Expands iteration marks by repeating the preceding character.
167
+ - Example: `時々` → `時時`, `いすゞ` → `いすず`
168
+
169
+ ### 9. **JIS X 0201 and Alike** (`jisx0201-and-alike`)
170
+ Handles half-width/full-width character conversion.
171
+ - Options: `fullwidthToHalfwidth`, `convertGL` (alphanumerics/symbols), `convertGR` (katakana), `u005cAsYenSign`
172
+ - Example: `ABC123` → `ABC123`, `カタカナ` → `カタカナ`
173
+
174
+ ### 10. **Kanji Old-New** (`kanji-old-new`)
175
+ Converts old-style kanji (旧字体) to modern forms (新字体).
176
+ - Example: `舊字體の變換` → `旧字体の変換`
177
+
178
+ ### 11. **Mathematical Alphanumerics** (`mathematical-alphanumerics`)
179
+ Normalizes mathematical alphanumeric symbols to plain ASCII.
180
+ - Example: `𝐀𝐁𝐂` (mathematical bold) → `ABC`
181
+
182
+ ### 12. **Prolonged Sound Marks** (`prolonged-sound-marks`)
183
+ Handles contextual conversion between hyphens and prolonged sound marks.
184
+ - Options: `skipAlreadyTransliteratedChars`, `allowProlongedHatsuon`, `allowProlongedSokuon`, `replaceProlongedMarksFollowingAlnums`
185
+ - Example: `イ−ハト−ヴォ` (with hyphen) → `イーハトーヴォ` (prolonged mark)
186
+
187
+ ### 13. **Radicals** (`radicals`)
188
+ Converts CJK radical characters to their corresponding ideographs.
189
+ - Example: `⾔⾨⾷` (Kangxi radicals) → `言門食`
190
+
191
+ ### 14. **Spaces** (`spaces`)
192
+ Normalizes various Unicode space characters to standard ASCII space.
193
+ - Example: `A B` (ideographic space) → `A B`
194
+
195
+ ## Development
196
+
197
+ After checking out the repo, run `bundle install` to install dependencies.
198
+
199
+ ### Code Generation
200
+
201
+ Some transliterators are generated from data files:
202
+
203
+ ```bash
204
+ rake codegen
205
+ ```
206
+
207
+ This generates transliterators from the JSON data files in the `../data` directory.
208
+
209
+ ### Testing
210
+
211
+ Run the test suite with:
212
+
213
+ ```bash
214
+ rake test
215
+ ```
216
+
217
+ Or run specific tests:
218
+
219
+ ```bash
220
+ ruby test/test_basic.rb
221
+ ```
222
+
223
+ ## Contributing
224
+
225
+ Bug reports and pull requests are welcome on GitHub at https://github.com/yosina-lib/yosina.
226
+
227
+ ## License
228
+
229
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rake/testtask'
5
+ require 'rdoc/task'
6
+ require 'rubocop/rake_task'
7
+ require 'yard'
8
+
9
+ RuboCop::RakeTask.new
10
+
11
+ Rake::TestTask.new(:test) do |t|
12
+ t.libs << 'test'
13
+ t.libs << 'lib'
14
+ t.test_files = FileList['test/**/test_*.rb']
15
+ end
16
+
17
+ RDoc::Task.new do |rd|
18
+ rd.main = 'README.md'
19
+ rd.rdoc_files.include('README.md', 'lib/**/*.rb')
20
+ end
21
+
22
+ YARD::Rake::YardocTask.new do |t|
23
+ t.files = ['lib/**/*.rb']
24
+ end
25
+
26
+ task :codegen do
27
+ ruby 'codegen/main.rb'
28
+ end
29
+
30
+ task default: :test