@yosina-lib/yosina 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/README.ja.md +177 -0
  2. package/README.md +177 -0
  3. package/dist/cjs/chars.d.ts +3 -0
  4. package/dist/cjs/chars.js +42 -0
  5. package/dist/cjs/chars.js.map +1 -0
  6. package/dist/cjs/index.d.ts +35 -0
  7. package/dist/cjs/index.js +48 -0
  8. package/dist/cjs/index.js.map +1 -0
  9. package/dist/cjs/intrinsics.d.ts +4 -0
  10. package/dist/cjs/intrinsics.js +33 -0
  11. package/dist/cjs/intrinsics.js.map +1 -0
  12. package/dist/cjs/recipes.d.ts +176 -0
  13. package/dist/cjs/recipes.js +155 -0
  14. package/dist/cjs/recipes.js.map +1 -0
  15. package/dist/cjs/transliterators/circled-or-squared.d.ts +27 -0
  16. package/dist/cjs/transliterators/circled-or-squared.js +491 -0
  17. package/dist/cjs/transliterators/circled-or-squared.js.map +1 -0
  18. package/dist/cjs/transliterators/combined.d.ts +15 -0
  19. package/dist/cjs/transliterators/combined.js +510 -0
  20. package/dist/cjs/transliterators/combined.js.map +1 -0
  21. package/dist/cjs/transliterators/hira-kata-composition.d.ts +11 -0
  22. package/dist/cjs/transliterators/hira-kata-composition.js +46 -0
  23. package/dist/cjs/transliterators/hira-kata-composition.js.map +1 -0
  24. package/dist/cjs/transliterators/hira-kata-table.d.ts +4 -0
  25. package/dist/cjs/transliterators/hira-kata-table.js +80 -0
  26. package/dist/cjs/transliterators/hira-kata-table.js.map +1 -0
  27. package/dist/cjs/transliterators/hira-kata.d.ts +6 -0
  28. package/dist/cjs/transliterators/hira-kata.js +67 -0
  29. package/dist/cjs/transliterators/hira-kata.js.map +1 -0
  30. package/dist/cjs/transliterators/hyphens.d.ts +35 -0
  31. package/dist/cjs/transliterators/hyphens.js +329 -0
  32. package/dist/cjs/transliterators/hyphens.js.map +1 -0
  33. package/dist/cjs/transliterators/ideographic-annotations.d.ts +15 -0
  34. package/dist/cjs/transliterators/ideographic-annotations.js +39 -0
  35. package/dist/cjs/transliterators/ideographic-annotations.js.map +1 -0
  36. package/dist/cjs/transliterators/index.d.ts +19 -0
  37. package/dist/cjs/transliterators/index.js +67 -0
  38. package/dist/cjs/transliterators/index.js.map +1 -0
  39. package/dist/cjs/transliterators/ivs-svs-base.d.ts +34 -0
  40. package/dist/cjs/transliterators/ivs-svs-base.js +567 -0
  41. package/dist/cjs/transliterators/ivs-svs-base.js.map +1 -0
  42. package/dist/cjs/transliterators/japanese-iteration-marks.d.ts +4 -0
  43. package/dist/cjs/transliterators/japanese-iteration-marks.js +206 -0
  44. package/dist/cjs/transliterators/japanese-iteration-marks.js.map +1 -0
  45. package/dist/cjs/transliterators/jisx0201-and-alike.d.ts +71 -0
  46. package/dist/cjs/transliterators/jisx0201-and-alike.js +349 -0
  47. package/dist/cjs/transliterators/jisx0201-and-alike.js.map +1 -0
  48. package/dist/cjs/transliterators/kanji-old-new.d.ts +15 -0
  49. package/dist/cjs/transliterators/kanji-old-new.js +1103 -0
  50. package/dist/cjs/transliterators/kanji-old-new.js.map +1 -0
  51. package/dist/cjs/transliterators/mathematical-alphanumerics.d.ts +15 -0
  52. package/dist/cjs/transliterators/mathematical-alphanumerics.js +765 -0
  53. package/dist/cjs/transliterators/mathematical-alphanumerics.js.map +1 -0
  54. package/dist/cjs/transliterators/prolonged-sound-marks.d.ts +13 -0
  55. package/dist/cjs/transliterators/prolonged-sound-marks.js +147 -0
  56. package/dist/cjs/transliterators/prolonged-sound-marks.js.map +1 -0
  57. package/dist/cjs/transliterators/radicals.d.ts +15 -0
  58. package/dist/cjs/transliterators/radicals.js +327 -0
  59. package/dist/cjs/transliterators/radicals.js.map +1 -0
  60. package/dist/cjs/transliterators/spaces.d.ts +15 -0
  61. package/dist/cjs/transliterators/spaces.js +45 -0
  62. package/dist/cjs/transliterators/spaces.js.map +1 -0
  63. package/dist/cjs/types.d.ts +7 -0
  64. package/dist/cjs/types.js +3 -0
  65. package/dist/cjs/types.js.map +1 -0
  66. package/dist/esm/chars.d.ts +3 -0
  67. package/dist/esm/chars.js +37 -0
  68. package/dist/esm/chars.js.map +1 -0
  69. package/dist/esm/index.d.ts +35 -0
  70. package/dist/esm/index.js +40 -0
  71. package/dist/esm/index.js.map +1 -0
  72. package/dist/esm/intrinsics.d.ts +4 -0
  73. package/dist/esm/intrinsics.js +28 -0
  74. package/dist/esm/intrinsics.js.map +1 -0
  75. package/dist/esm/package.json +1 -0
  76. package/dist/esm/recipes.d.ts +176 -0
  77. package/dist/esm/recipes.js +151 -0
  78. package/dist/esm/recipes.js.map +1 -0
  79. package/dist/esm/transliterators/circled-or-squared.d.ts +27 -0
  80. package/dist/esm/transliterators/circled-or-squared.js +489 -0
  81. package/dist/esm/transliterators/circled-or-squared.js.map +1 -0
  82. package/dist/esm/transliterators/combined.d.ts +15 -0
  83. package/dist/esm/transliterators/combined.js +508 -0
  84. package/dist/esm/transliterators/combined.js.map +1 -0
  85. package/dist/esm/transliterators/hira-kata-composition.d.ts +11 -0
  86. package/dist/esm/transliterators/hira-kata-composition.js +44 -0
  87. package/dist/esm/transliterators/hira-kata-composition.js.map +1 -0
  88. package/dist/esm/transliterators/hira-kata-table.d.ts +4 -0
  89. package/dist/esm/transliterators/hira-kata-table.js +77 -0
  90. package/dist/esm/transliterators/hira-kata-table.js.map +1 -0
  91. package/dist/esm/transliterators/hira-kata.d.ts +6 -0
  92. package/dist/esm/transliterators/hira-kata.js +65 -0
  93. package/dist/esm/transliterators/hira-kata.js.map +1 -0
  94. package/dist/esm/transliterators/hyphens.d.ts +35 -0
  95. package/dist/esm/transliterators/hyphens.js +326 -0
  96. package/dist/esm/transliterators/hyphens.js.map +1 -0
  97. package/dist/esm/transliterators/ideographic-annotations.d.ts +15 -0
  98. package/dist/esm/transliterators/ideographic-annotations.js +37 -0
  99. package/dist/esm/transliterators/ideographic-annotations.js.map +1 -0
  100. package/dist/esm/transliterators/index.d.ts +19 -0
  101. package/dist/esm/transliterators/index.js +30 -0
  102. package/dist/esm/transliterators/index.js.map +1 -0
  103. package/dist/esm/transliterators/ivs-svs-base.d.ts +34 -0
  104. package/dist/esm/transliterators/ivs-svs-base.js +564 -0
  105. package/dist/esm/transliterators/ivs-svs-base.js.map +1 -0
  106. package/dist/esm/transliterators/japanese-iteration-marks.d.ts +4 -0
  107. package/dist/esm/transliterators/japanese-iteration-marks.js +204 -0
  108. package/dist/esm/transliterators/japanese-iteration-marks.js.map +1 -0
  109. package/dist/esm/transliterators/jisx0201-and-alike.d.ts +71 -0
  110. package/dist/esm/transliterators/jisx0201-and-alike.js +347 -0
  111. package/dist/esm/transliterators/jisx0201-and-alike.js.map +1 -0
  112. package/dist/esm/transliterators/kanji-old-new.d.ts +15 -0
  113. package/dist/esm/transliterators/kanji-old-new.js +1101 -0
  114. package/dist/esm/transliterators/kanji-old-new.js.map +1 -0
  115. package/dist/esm/transliterators/mathematical-alphanumerics.d.ts +15 -0
  116. package/dist/esm/transliterators/mathematical-alphanumerics.js +763 -0
  117. package/dist/esm/transliterators/mathematical-alphanumerics.js.map +1 -0
  118. package/dist/esm/transliterators/prolonged-sound-marks.d.ts +13 -0
  119. package/dist/esm/transliterators/prolonged-sound-marks.js +145 -0
  120. package/dist/esm/transliterators/prolonged-sound-marks.js.map +1 -0
  121. package/dist/esm/transliterators/radicals.d.ts +15 -0
  122. package/dist/esm/transliterators/radicals.js +325 -0
  123. package/dist/esm/transliterators/radicals.js.map +1 -0
  124. package/dist/esm/transliterators/spaces.d.ts +15 -0
  125. package/dist/esm/transliterators/spaces.js +43 -0
  126. package/dist/esm/transliterators/spaces.js.map +1 -0
  127. package/dist/esm/types.d.ts +7 -0
  128. package/dist/esm/types.js +2 -0
  129. package/dist/esm/types.js.map +1 -0
  130. package/package.json +74 -0
package/README.ja.md ADDED
@@ -0,0 +1,177 @@
1
+ # Yosina JavaScript
2
+
3
+ Yosina日本語テキスト翻字ライブラリのTypeScriptポート。
4
+
5
+ ## 概要
6
+
7
+ Yosinaは、日本語テキスト処理でよく必要とされる様々なテキスト正規化および変換機能を提供する日本語テキスト翻字ライブラリです。
8
+
9
+ ## 使用方法
10
+
11
+ ```typescript
12
+ import { makeTransliterator, TransliterationRecipe } from '@yosina-lib/yosina';
13
+
14
+ // 希望する変換でレシピを作成
15
+ const recipe: TransliterationRecipe = {
16
+ kanjiOldNew: true,
17
+ replaceSpaces: true,
18
+ replaceSuspiciousHyphensToProlongedSoundMarks: true,
19
+ replaceCircledOrSquaredCharacters: true,
20
+ replaceCombinedCharacters: true,
21
+ hiraKata: "hira-to-kata", // ひらがなをカタカナに変換
22
+ replaceJapaneseIterationMarks: true, // 繰り返し記号を展開
23
+ toFullwidth: true,
24
+ };
25
+
26
+ // トランスリテレータを作成
27
+ const transliterator = await makeTransliterator(recipe);
28
+
29
+ // 様々な特殊文字で使用
30
+ const input = '①②③ ⒶⒷⒸ ㍿㍑㌠㋿'; // 丸囲み数字、文字、空白、結合文字
31
+ const result = transliterator(input);
32
+ console.log(result); // "(1)(2)(3) (A)(B)(C) 株式会社リットルサンチーム令和"
33
+
34
+ // 旧字体を新字体に変換
35
+ const oldKanji = '舊字體';
36
+ const kanjiResult = transliterator(oldKanji);
37
+ console.log(kanjiResult); // "旧字体"
38
+
39
+ // 半角カタカナを全角に変換
40
+ const halfWidth = 'テストモジレツ';
41
+ const fullWidthResult = transliterator(halfWidth);
42
+ console.log(fullWidthResult); // "テストモジレツ"
43
+
44
+ // ひらがなからカタカナへの変換と繰り返し記号のデモ
45
+ const mixedText = '学問のすゝめ';
46
+ const convertedResult = transliterator(mixedText);
47
+ console.log(convertedResult); // "学問ノススメ"
48
+ ```
49
+
50
+ ### 直接設定を使用
51
+
52
+ ```typescript
53
+ import { makeTransliterator } from '@yosina-lib/yosina';
54
+
55
+ // 直接トランスリテレータ設定で構成
56
+ const configs = [
57
+ ["kanji-old-new", {}],
58
+ ["spaces", {}],
59
+ ["prolonged-sound-marks", { replaceProlongedMarksFollowingAlnums: true }],
60
+ ["circled-or-squared", {}],
61
+ ["combined", {}],
62
+ ["hira-kata", { mode: "kata-to-hira" }], // カタカナをひらがなに変換
63
+ ["japanese-iteration-marks", {}], // 々、ゝゞ、ヽヾなどの繰り返し記号を展開
64
+ ];
65
+
66
+ const transliterator = await makeTransliterator(configs);
67
+
68
+ // 新しい変換を含む様々な変換の例
69
+ const input = "カタカナでの時々の佐々木さん";
70
+ const result = transliterator(input);
71
+ console.log(result); // "かたかなでの時時の佐佐木さん"
72
+ ```
73
+
74
+ ## インストール
75
+
76
+ ```bash
77
+ npm install @yosina-lib/yosina
78
+ ```
79
+
80
+ ## 利用可能なトランスリテレータ
81
+
82
+ ### 1. **丸囲み・角囲み文字** (`circled-or-squared`)
83
+ 丸囲みや角囲みの文字を通常の文字に変換します。
84
+ - オプション: `templates` (カスタムレンダリング)、`includeEmojis` (絵文字を含める)
85
+ - 例: `①②③` → `(1)(2)(3)`、`㊙㊗` → `(秘)(祝)`
86
+
87
+ ### 2. **結合文字** (`combined`)
88
+ 結合文字を個別の文字シーケンスに展開します。
89
+ - 例: `㍻` (平成) → `平成`、`㈱` → `(株)`
90
+
91
+ ### 3. **ひらがな・カタカナ合成** (`hira-kata-composition`)
92
+ 分解されたひらがなとカタカナを合成された等価文字に結合します。
93
+ - オプション: `composeNonCombiningMarks` (非結合マークを合成)
94
+ - 例: `か + ゙` → `が`、`ヘ + ゜` → `ペ`
95
+
96
+ ### 4. **ひらがな・カタカナ** (`hira-kata`)
97
+ ひらがなとカタカナの間で双方向に変換します。
98
+ - オプション: `mode` ("hira-to-kata" または "kata-to-hira")
99
+ - 例: `ひらがな` → `ヒラガナ` (hira-to-kata)
100
+
101
+ ### 5. **ハイフン** (`hyphens`)
102
+ 様々なダッシュ・ハイフン記号を日本語で一般的に使用されるものに置き換えます。
103
+ - オプション: `precedence` (マッピング優先順位)
104
+ - 利用可能なマッピング: "ascii"、"jisx0201"、"jisx0208_90"、"jisx0208_90_windows"、"jisx0208_verbatim"
105
+ - 例: `2019—2020` (emダッシュ) → `2019-2020`
106
+
107
+ ### 6. **表意文字注釈** (`ideographic-annotations`)
108
+ 伝統的な中国語から日本語への翻訳で使用される表意文字注釈を置き換えます。
109
+ - 例: `㆖㆘` → `上下`
110
+
111
+ ### 7. **IVS-SVSベース** (`ivs-svs-base`)
112
+ 表意文字異体字セレクタ(IVS)と標準化異体字セレクタ(SVS)を処理します。
113
+ - オプション: `charset`、`mode` ("ivs-or-svs" または "base")、`preferSVS`、`dropSelectorsAltogether`
114
+ - 例: `葛󠄀` (葛 + IVS) → `葛`
115
+
116
+ ### 8. **日本語繰り返し記号** (`japanese-iteration-marks`)
117
+ 繰り返し記号を前の文字を繰り返すことで展開します。
118
+ - 例: `時々` → `時時`、`いすゞ` → `いすず`
119
+
120
+ ### 9. **JIS X 0201および類似** (`jisx0201-and-alike`)
121
+ 半角・全角文字変換を処理します。
122
+ - オプション: `fullwidthToHalfwidth`、`convertGL` (英数字/記号)、`convertGR` (カタカナ)、`u005cAsYenSign`
123
+ - 例: `ABC123` → `ABC123`、`カタカナ` → `カタカナ`
124
+
125
+ ### 10. **旧字体・新字体** (`kanji-old-new`)
126
+ 旧字体の漢字を新字体に変換します。
127
+ - 例: `舊字體の變換` → `旧字体の変換`
128
+
129
+ ### 11. **数学英数記号** (`mathematical-alphanumerics`)
130
+ 数学英数記号を通常のASCIIに正規化します。
131
+ - 例: `𝐀𝐁𝐂` (数学太字) → `ABC`
132
+
133
+ ### 12. **長音記号** (`prolonged-sound-marks`)
134
+ ハイフンと長音記号の間の文脈的な変換を処理します。
135
+ - オプション: `skipAlreadyTransliteratedChars`、`allowProlongedHatsuon`、`allowProlongedSokuon`、`replaceProlongedMarksFollowingAlnums`
136
+ - 例: `イ−ハト−ヴォ` (ハイフン付き) → `イーハトーヴォ` (長音記号)
137
+
138
+ ### 13. **部首** (`radicals`)
139
+ CJK部首文字を対応する表意文字に変換します。
140
+ - 例: `⾔⾨⾷` (康熙部首) → `言門食`
141
+
142
+ ### 14. **空白** (`spaces`)
143
+ 様々なUnicode空白文字を標準ASCII空白に正規化します。
144
+ - 例: `A B` (表意文字空白) → `A B`
145
+
146
+ ## 開発
147
+
148
+ このプロジェクトはTypeScriptを使用したNode.jsと、フォーマッティング/リンティングにBiomeを使用しています。
149
+
150
+ ```bash
151
+ # 依存関係をインストール
152
+ npm install
153
+
154
+ # テストを実行
155
+ npm test
156
+
157
+ # リンティングを実行
158
+ npm run lint
159
+
160
+ # フォーマッティングを実行
161
+ npm run format
162
+
163
+ # ライブラリをビルド
164
+ npm run build
165
+
166
+ # ドキュメントを生成
167
+ npm run docs:build
168
+ ```
169
+
170
+ ## 要件
171
+
172
+ - Node.js 18+
173
+ - TypeScript 5.0+
174
+
175
+ ## ライセンス
176
+
177
+ MIT
package/README.md ADDED
@@ -0,0 +1,177 @@
1
+ # Yosina JavaScript
2
+
3
+ A TypeScript port of the Yosina Japanese text transliteration library.
4
+
5
+ ## Overview
6
+
7
+ Yosina is a library for Japanese text transliteration that provides various text normalization and conversion features commonly needed when processing Japanese text.
8
+
9
+ ## Usage
10
+
11
+ ```typescript
12
+ import { makeTransliterator, TransliterationRecipe } from '@yosina-lib/yosina';
13
+
14
+ // Create a recipe with desired transformations
15
+ const recipe: TransliterationRecipe = {
16
+ kanjiOldNew: true,
17
+ replaceSpaces: true,
18
+ replaceSuspiciousHyphensToProlongedSoundMarks: true,
19
+ replaceCircledOrSquaredCharacters: true,
20
+ replaceCombinedCharacters: true,
21
+ hiraKata: "hira-to-kata", // Convert hiragana to katakana
22
+ replaceJapaneseIterationMarks: true, // Expand iteration marks
23
+ toFullwidth: true,
24
+ };
25
+
26
+ // Create the transliterator
27
+ const transliterator = await makeTransliterator(recipe);
28
+
29
+ // Use it with various special characters
30
+ const input = '①②③ ⒶⒷⒸ ㍿㍑㌠㋿'; // circled numbers, letters, space, combined characters
31
+ const result = transliterator(input);
32
+ console.log(result); // "(1)(2)(3) (A)(B)(C) 株式会社リットルサンチーム令和"
33
+
34
+ // Convert old kanji to new
35
+ const oldKanji = '舊字體';
36
+ const kanjiResult = transliterator(oldKanji);
37
+ console.log(kanjiResult); // "旧字体"
38
+
39
+ // Convert half-width katakana to full-width
40
+ const halfWidth = 'テストモジレツ';
41
+ const fullWidthResult = transliterator(halfWidth);
42
+ console.log(fullWidthResult); // "テストモジレツ"
43
+
44
+ // Demonstrate hiragana to katakana conversion with iteration marks
45
+ const mixedText = '学問のすゝめ';
46
+ const convertedResult = transliterator(mixedText);
47
+ console.log(convertedResult); // "学問ノススメ"
48
+ ```
49
+
50
+ ### Using Direct Configuration
51
+
52
+ ```typescript
53
+ import { makeTransliterator } from '@yosina-lib/yosina';
54
+
55
+ // Configure with direct transliterator configs
56
+ const configs = [
57
+ ["kanji-old-new", {}],
58
+ ["spaces", {}],
59
+ ["prolonged-sound-marks", { replaceProlongedMarksFollowingAlnums: true }],
60
+ ["circled-or-squared", {}],
61
+ ["combined", {}],
62
+ ["hira-kata", { mode: "kata-to-hira" }], // Convert katakana to hiragana
63
+ ["japanese-iteration-marks", {}], // Expand iteration marks like 々, ゝゞ, ヽヾ
64
+ ];
65
+
66
+ const transliterator = await makeTransliterator(configs);
67
+
68
+ // Example with various transformations including the new ones
69
+ const input = "カタカナでの時々の佐々木さん";
70
+ const result = transliterator(input);
71
+ console.log(result); // "かたかなでの時時の佐佐木さん"
72
+ ```
73
+
74
+ ## Available Transliterators
75
+
76
+ ### 1. **Circled or Squared** (`circled-or-squared`)
77
+ Converts circled or squared characters to their plain equivalents.
78
+ - Options: `templates` (custom rendering), `includeEmojis` (include emoji characters)
79
+ - Example: `①②③` → `(1)(2)(3)`, `㊙㊗` → `(秘)(祝)`
80
+
81
+ ### 2. **Combined** (`combined`)
82
+ Expands combined characters into their individual character sequences.
83
+ - Example: `㍻` (Heisei era) → `平成`, `㈱` → `(株)`
84
+
85
+ ### 3. **Hiragana-Katakana Composition** (`hira-kata-composition`)
86
+ Combines decomposed hiraganas and katakanas into composed equivalents.
87
+ - Options: `composeNonCombiningMarks` (compose non-combining marks)
88
+ - Example: `か + ゙` → `が`, `ヘ + ゜` → `ペ`
89
+
90
+ ### 4. **Hiragana-Katakana** (`hira-kata`)
91
+ Converts between hiragana and katakana scripts bidirectionally.
92
+ - Options: `mode` ("hira-to-kata" or "kata-to-hira")
93
+ - Example: `ひらがな` → `ヒラガナ` (hira-to-kata)
94
+
95
+ ### 5. **Hyphens** (`hyphens`)
96
+ Replaces various dash/hyphen symbols with common ones used in Japanese.
97
+ - Options: `precedence` (mapping priority order)
98
+ - Available mappings: "ascii", "jisx0201", "jisx0208_90", "jisx0208_90_windows", "jisx0208_verbatim"
99
+ - Example: `2019—2020` (em dash) → `2019-2020`
100
+
101
+ ### 6. **Ideographic Annotations** (`ideographic-annotations`)
102
+ Replaces ideographic annotations used in traditional Chinese-to-Japanese translation.
103
+ - Example: `㆖㆘` → `上下`
104
+
105
+ ### 7. **IVS-SVS Base** (`ivs-svs-base`)
106
+ Handles Ideographic and Standardized Variation Selectors.
107
+ - Options: `charset`, `mode` ("ivs-or-svs" or "base"), `preferSVS`, `dropSelectorsAltogether`
108
+ - Example: `葛󠄀` (葛 + IVS) → `葛`
109
+
110
+ ### 8. **Japanese Iteration Marks** (`japanese-iteration-marks`)
111
+ Expands iteration marks by repeating the preceding character.
112
+ - Example: `時々` → `時時`, `いすゞ` → `いすず`
113
+
114
+ ### 9. **JIS X 0201 and Alike** (`jisx0201-and-alike`)
115
+ Handles half-width/full-width character conversion.
116
+ - Options: `fullwidthToHalfwidth`, `convertGL` (alphanumerics/symbols), `convertGR` (katakana), `u005cAsYenSign`
117
+ - Example: `ABC123` → `ABC123`, `カタカナ` → `カタカナ`
118
+
119
+ ### 10. **Kanji Old-New** (`kanji-old-new`)
120
+ Converts old-style kanji (旧字体) to modern forms (新字体).
121
+ - Example: `舊字體の變換` → `旧字体の変換`
122
+
123
+ ### 11. **Mathematical Alphanumerics** (`mathematical-alphanumerics`)
124
+ Normalizes mathematical alphanumeric symbols to plain ASCII.
125
+ - Example: `𝐀𝐁𝐂` (mathematical bold) → `ABC`
126
+
127
+ ### 12. **Prolonged Sound Marks** (`prolonged-sound-marks`)
128
+ Handles contextual conversion between hyphens and prolonged sound marks.
129
+ - Options: `skipAlreadyTransliteratedChars`, `allowProlongedHatsuon`, `allowProlongedSokuon`, `replaceProlongedMarksFollowingAlnums`
130
+ - Example: `イ−ハト−ヴォ` (with hyphen) → `イーハトーヴォ` (prolonged mark)
131
+
132
+ ### 13. **Radicals** (`radicals`)
133
+ Converts CJK radical characters to their corresponding ideographs.
134
+ - Example: `⾔⾨⾷` (Kangxi radicals) → `言門食`
135
+
136
+ ### 14. **Spaces** (`spaces`)
137
+ Normalizes various Unicode space characters to standard ASCII space.
138
+ - Example: `A B` (ideographic space) → `A B`
139
+
140
+ ## Installation
141
+
142
+ ```bash
143
+ npm install @yosina-lib/yosina
144
+ ```
145
+
146
+ ## Development
147
+
148
+ This project uses Node.js with TypeScript and Biome for formatting/linting.
149
+
150
+ ```bash
151
+ # Install dependencies
152
+ npm install
153
+
154
+ # Run tests
155
+ npm test
156
+
157
+ # Run linting
158
+ npm run lint
159
+
160
+ # Run formatting
161
+ npm run format
162
+
163
+ # Build the library
164
+ npm run build
165
+
166
+ # Generate documentation
167
+ npm run docs:build
168
+ ```
169
+
170
+ ## Requirements
171
+
172
+ - Node.js 18+
173
+ - TypeScript 5.0+
174
+
175
+ ## License
176
+
177
+ MIT
@@ -0,0 +1,3 @@
1
+ import type { Char } from "./types.js";
2
+ export declare const buildCharArray: (text: string) => Char[];
3
+ export declare const fromChars: (chars: Iterable<Char>) => string;
@@ -0,0 +1,42 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.fromChars = exports.buildCharArray = void 0;
4
+ const buildCharArray = (text) => {
5
+ const retval = [];
6
+ let offset = 0;
7
+ let pc;
8
+ let pcp;
9
+ for (const c of text) {
10
+ const cp = c.codePointAt(0);
11
+ if (pc !== undefined && pcp !== undefined) {
12
+ if ((cp >= 0xfe00 && cp <= 0xfe0f) || (cp >= 0xe0100 && cp <= 0xe01ef)) {
13
+ const cc = pc + c;
14
+ retval.push({ c: cc, offset, source: undefined });
15
+ offset += cc.length;
16
+ pc = pcp = undefined;
17
+ continue;
18
+ }
19
+ retval.push({ c: pc, offset, source: undefined });
20
+ offset += pc.length;
21
+ }
22
+ pc = c;
23
+ pcp = cp;
24
+ }
25
+ if (pc !== undefined && pcp !== undefined) {
26
+ retval.push({ c: pc, offset, source: undefined });
27
+ offset += pc.length;
28
+ }
29
+ // add sentinel
30
+ retval.push({ c: "", offset: offset, source: undefined });
31
+ return retval;
32
+ };
33
+ exports.buildCharArray = buildCharArray;
34
+ const fromChars = (chars) => {
35
+ let result = "";
36
+ for (const c of chars) {
37
+ result += c.c;
38
+ }
39
+ return result;
40
+ };
41
+ exports.fromChars = fromChars;
42
+ //# sourceMappingURL=chars.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chars.js","sourceRoot":"","sources":["../../src/chars.ts"],"names":[],"mappings":";;;AAEO,MAAM,cAAc,GAAG,CAAC,IAAY,EAAU,EAAE;IACrD,MAAM,MAAM,GAAW,EAAE,CAAC;IAC1B,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,EAAsB,CAAC;IAC3B,IAAI,GAAuB,CAAC;IAC5B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,MAAM,EAAE,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC,CAAW,CAAC;QACtC,IAAI,EAAE,KAAK,SAAS,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;YAC1C,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,OAAO,IAAI,EAAE,IAAI,OAAO,CAAC,EAAE,CAAC;gBACvE,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;gBAClB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;gBAClD,MAAM,IAAI,EAAE,CAAC,MAAM,CAAC;gBACpB,EAAE,GAAG,GAAG,GAAG,SAAS,CAAC;gBACrB,SAAS;YACX,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;YAClD,MAAM,IAAI,EAAE,CAAC,MAAM,CAAC;QACtB,CAAC;QACD,EAAE,GAAG,CAAC,CAAC;QACP,GAAG,GAAG,EAAE,CAAC;IACX,CAAC;IACD,IAAI,EAAE,KAAK,SAAS,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;QAClD,MAAM,IAAI,EAAE,CAAC,MAAM,CAAC;IACtB,CAAC;IACD,eAAe;IACf,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;IAC1D,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AA5BW,QAAA,cAAc,kBA4BzB;AAEK,MAAM,SAAS,GAAG,CAAC,KAAqB,EAAE,EAAE;IACjD,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC;IAChB,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AANW,QAAA,SAAS,aAMpB"}
@@ -0,0 +1,35 @@
1
+ import type { TransliterationRecipe } from "./recipes.js";
2
+ import type { TransliteratorConfig } from "./transliterators/index.js";
3
+ export { buildCharArray, fromChars } from "./chars.js";
4
+ export { makeChainedTransliterator } from "./intrinsics.js";
5
+ export type { TransliterationRecipe } from "./recipes.js";
6
+ export { buildTransliteratorConfigsFromRecipe } from "./recipes.js";
7
+ export type { TransliteratorConfig } from "./transliterators/index.js";
8
+ /**
9
+ * Frontend convenience function to create a string-to-string transliterator from a recipe or a list of configs.
10
+ *
11
+ * ```typescript
12
+ * export const makeTransliterator = async (configsOrRecipe: (TransliteratorConfig | string)[] | TransliterationRecipe) => {
13
+ * const tl = await makeChainedTransliterator(
14
+ * Array.isArray(configsOrRecipe) ? configsOrRecipe : buildTransliteratorConfigsFromRecipe(configsOrRecipe),
15
+ * );
16
+ * return (in_: string) => {
17
+ * return fromChars(tl(buildCharArray(in_)));
18
+ * };
19
+ * };
20
+ * ```
21
+ *
22
+ * The low-level functions used here are:
23
+ *
24
+ * - {@link makeChainedTransliterator}
25
+ * - {@link fromChars}
26
+ * - {@link buildCharArray}
27
+ * - {@link buildTransliteratorConfigsFromRecipe}
28
+ *
29
+ * Please refer to the description of {@link buildTransliteratorConfigsFromRecipe} for the preferences that can be specified in a recipe. Also,
30
+ * refer to the description of {@link makeChainedTransliterator} and {@link TransliteratorConfig} for the details of the configuration objects.
31
+ *
32
+ * @param configsOrRecipe A recipe or a list of `TransliteratorConfig`s.
33
+ * @returns A transliterator function that takes a string and returns a string.
34
+ */
35
+ export declare const makeTransliterator: (configsOrRecipe: (TransliteratorConfig | string)[] | TransliterationRecipe) => Promise<(in_: string) => string>;
@@ -0,0 +1,48 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.makeTransliterator = exports.buildTransliteratorConfigsFromRecipe = exports.makeChainedTransliterator = exports.fromChars = exports.buildCharArray = void 0;
4
+ const chars_js_1 = require("./chars.js");
5
+ const intrinsics_js_1 = require("./intrinsics.js");
6
+ const recipes_js_1 = require("./recipes.js");
7
+ var chars_js_2 = require("./chars.js");
8
+ Object.defineProperty(exports, "buildCharArray", { enumerable: true, get: function () { return chars_js_2.buildCharArray; } });
9
+ Object.defineProperty(exports, "fromChars", { enumerable: true, get: function () { return chars_js_2.fromChars; } });
10
+ var intrinsics_js_2 = require("./intrinsics.js");
11
+ Object.defineProperty(exports, "makeChainedTransliterator", { enumerable: true, get: function () { return intrinsics_js_2.makeChainedTransliterator; } });
12
+ var recipes_js_2 = require("./recipes.js");
13
+ Object.defineProperty(exports, "buildTransliteratorConfigsFromRecipe", { enumerable: true, get: function () { return recipes_js_2.buildTransliteratorConfigsFromRecipe; } });
14
+ /**
15
+ * Frontend convenience function to create a string-to-string transliterator from a recipe or a list of configs.
16
+ *
17
+ * ```typescript
18
+ * export const makeTransliterator = async (configsOrRecipe: (TransliteratorConfig | string)[] | TransliterationRecipe) => {
19
+ * const tl = await makeChainedTransliterator(
20
+ * Array.isArray(configsOrRecipe) ? configsOrRecipe : buildTransliteratorConfigsFromRecipe(configsOrRecipe),
21
+ * );
22
+ * return (in_: string) => {
23
+ * return fromChars(tl(buildCharArray(in_)));
24
+ * };
25
+ * };
26
+ * ```
27
+ *
28
+ * The low-level functions used here are:
29
+ *
30
+ * - {@link makeChainedTransliterator}
31
+ * - {@link fromChars}
32
+ * - {@link buildCharArray}
33
+ * - {@link buildTransliteratorConfigsFromRecipe}
34
+ *
35
+ * Please refer to the description of {@link buildTransliteratorConfigsFromRecipe} for the preferences that can be specified in a recipe. Also,
36
+ * refer to the description of {@link makeChainedTransliterator} and {@link TransliteratorConfig} for the details of the configuration objects.
37
+ *
38
+ * @param configsOrRecipe A recipe or a list of `TransliteratorConfig`s.
39
+ * @returns A transliterator function that takes a string and returns a string.
40
+ */
41
+ const makeTransliterator = async (configsOrRecipe) => {
42
+ const tl = await (0, intrinsics_js_1.makeChainedTransliterator)(Array.isArray(configsOrRecipe) ? configsOrRecipe : (0, recipes_js_1.buildTransliteratorConfigsFromRecipe)(configsOrRecipe));
43
+ return (in_) => {
44
+ return (0, chars_js_1.fromChars)(tl((0, chars_js_1.buildCharArray)(in_)));
45
+ };
46
+ };
47
+ exports.makeTransliterator = makeTransliterator;
48
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;AAAA,yCAAuD;AACvD,mDAA4D;AAE5D,6CAAoE;AAGpE,uCAAuD;AAA9C,0GAAA,cAAc,OAAA;AAAE,qGAAA,SAAS,OAAA;AAClC,iDAA4D;AAAnD,0HAAA,yBAAyB,OAAA;AAElC,2CAAoE;AAA3D,kIAAA,oCAAoC,OAAA;AAG7C;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACI,MAAM,kBAAkB,GAAG,KAAK,EACrC,eAA0E,EAC1E,EAAE;IACF,MAAM,EAAE,GAAG,MAAM,IAAA,yCAAyB,EACxC,KAAK,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,IAAA,iDAAoC,EAAC,eAAe,CAAC,CACzG,CAAC;IACF,OAAO,CAAC,GAAW,EAAE,EAAE;QACrB,OAAO,IAAA,oBAAS,EAAC,EAAE,CAAC,IAAA,yBAAc,EAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC;AACJ,CAAC,CAAC;AATW,QAAA,kBAAkB,sBAS7B"}
@@ -0,0 +1,4 @@
1
+ import type { TransliteratorConfig } from "./transliterators/index.js";
2
+ import type { Char, Transliterator } from "./types.js";
3
+ export declare const makeChainedTransliterator: (transliteratorConfigs: (TransliteratorConfig | string)[]) => Promise<Transliterator>;
4
+ export declare const isTransliterated: (c: Char) => boolean;
@@ -0,0 +1,33 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.isTransliterated = exports.makeChainedTransliterator = void 0;
4
+ const index_js_1 = require("./transliterators/index.js");
5
+ const makeChainedTransliterator = async (transliteratorConfigs) => {
6
+ let result;
7
+ for (const config of transliteratorConfigs) {
8
+ const t = typeof config === "string"
9
+ ? (await (0, index_js_1.getTransliteratorFactory)(config))({})
10
+ : (await (0, index_js_1.getTransliteratorFactory)(config[0]))(config[1]);
11
+ result = ((tt) => (tt !== undefined ? (in_) => t(tt(in_)) : t))(result);
12
+ }
13
+ if (result === undefined) {
14
+ throw new Error("at least one transliterator must be specified");
15
+ }
16
+ return result;
17
+ };
18
+ exports.makeChainedTransliterator = makeChainedTransliterator;
19
+ const isTransliterated = (c) => {
20
+ for (;;) {
21
+ const s = c.source;
22
+ if (s === undefined) {
23
+ break;
24
+ }
25
+ if (s.c !== c.c) {
26
+ return true;
27
+ }
28
+ c = s;
29
+ }
30
+ return false;
31
+ };
32
+ exports.isTransliterated = isTransliterated;
33
+ //# sourceMappingURL=intrinsics.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"intrinsics.js","sourceRoot":"","sources":["../../src/intrinsics.ts"],"names":[],"mappings":";;;AACA,yDAAsE;AAG/D,MAAM,yBAAyB,GAAG,KAAK,EAC5C,qBAAwD,EAC/B,EAAE;IAC3B,IAAI,MAAkC,CAAC;IACvC,KAAK,MAAM,MAAM,IAAI,qBAAqB,EAAE,CAAC;QAC3C,MAAM,CAAC,GACL,OAAO,MAAM,KAAK,QAAQ;YACxB,CAAC,CAAC,CAAC,MAAM,IAAA,mCAAwB,EAAC,MAAkC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC1E,CAAC,CAAC,CAAC,MAAM,IAAA,mCAAwB,EAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7D,MAAM,GAAG,CAAC,CAAC,EAA8B,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,GAAmB,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IACtH,CAAC;IACD,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;IACnE,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAfW,QAAA,yBAAyB,6BAepC;AAEK,MAAM,gBAAgB,GAAG,CAAC,CAAO,EAAE,EAAE;IAC1C,SAAS,CAAC;QACR,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;QACnB,IAAI,CAAC,KAAK,SAAS,EAAE,CAAC;YACpB,MAAM;QACR,CAAC;QACD,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;YAChB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,CAAC,GAAG,CAAC,CAAC;IACR,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC,CAAC;AAZW,QAAA,gBAAgB,oBAY3B"}