mojix 0.0.2 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,21 +1,20 @@
1
1
  # Mojix
2
2
 
3
- [![ESDoc coverage badge](https://natade-jp.github.io/mojix/docs/badge.svg)](https://natade-jp.github.io/mojix/docs/)
3
+ [![ESDoc coverage badge](https://natade-jp.github.io/mojix/badge.svg)](https://natade-jp.github.io/mojix/)
4
4
  ![MIT License](https://img.shields.io/badge/license-MIT-blue.svg?style=flat)
5
5
 
6
- > ⚠️ **Work in progress**
7
6
  > Mojix is the successor of **MojiJS**.
8
7
  > This project was renamed to avoid confusion with other libraries named "`moji`" or "`moji.js`".
9
8
 
10
9
  ## What
11
10
 
12
11
  - 日本語・Unicode 文字データを解析および変換するライブラリです。
12
+ - [詳細な API を公開しています。](https://natade-jp.github.io/mojix/)
13
+ - [動作例](https://natade-jp.github.io/mojix/examples/) (コンソール及び[ソースコード](https://natade-jp.github.io/mojix/examples/main.js)を確認してみてください。)
14
+ - [npm](https://www.npmjs.com/package/mojix)
13
15
  - MojiJS の後継ライブラリとして開発されています。
14
- - API や機能は基本的に MojiJS を引き継いでいます。
15
-
16
- > ⚠️ 注意
17
- > Mojix では ECMAScript 3 のサポートを終了しました。
18
- > モダンな JavaScript 実行環境(ES2015 以降)を前提としています。
16
+ - API や機能は基本的に [MojiJS](https://github.com/natade-jp/MojiJS) を引き継いでいます。
17
+ - Mojix では ECMAScript 3 をサポートしていません。JScript 実行環境が必要な場合は、[MojiJS](https://github.com/natade-jp/MojiJS) をご利用ください。
19
18
 
20
19
  ### 主な機能
21
20
 
@@ -33,15 +32,45 @@
33
32
  npm install mojix
34
33
  ```
35
34
 
36
- 現在準備中のため、API や挙動は変更される可能性があります。
35
+ ### ESM
36
+
37
+ - `sample.js`
38
+
39
+ ```javascript
40
+ import Mojix from "mojix";
41
+ console.log(Mojix);
42
+ ```
43
+
44
+ ### CommonJS
45
+
46
+ - `sample.cjs`
47
+
48
+ ```javascript
49
+ const Mojix = require("mojix");
50
+ console.log(Mojix);
51
+ ```
52
+
53
+ ### Browser(umd)
54
+
55
+ - `sample.html`
56
+
57
+ After loading the script, `Mojix` will be available on `globalThis`.
58
+
59
+ ```html
60
+ <script src=".../umd/mojix.min.js" charset="utf-8"></script>
61
+ <script>
62
+ /** @typedef {typeof import('.../types/mojix').default} MojixClass */
63
+ /** @type {MojixClass} */
64
+ const Mojix = /** @type {any} */ (globalThis).Mojix;
65
+ console.log(Mojix);
66
+ </script>
67
+ ```
37
68
 
38
69
  ## Sample
39
70
 
40
71
  ### エンコード
41
72
 
42
73
  ```javascript
43
- import * as Mojix from "mojix";
44
-
45
74
  console.log(Mojix.encode("圡①靁謹𪘂麵", "shift_jis-2004"));
46
75
  // -> [ 136, 98, 135, 64, 251, 154, 238, 174, 252, 238, 239, 238 ]
47
76
 
@@ -52,8 +81,6 @@ console.log(Mojix.decode([0x61, 0xE3, 0x81, 0x82], "utf-8"));
52
81
  ### 日本語の変換
53
82
 
54
83
  ```javascript
55
- import * as Mojix from "mojix";
56
-
57
84
  console.log(Mojix.toHiragana("カキクケコ"));
58
85
  // -> かきくけこ
59
86
  ```
@@ -61,27 +88,23 @@ console.log(Mojix.toHiragana("カキクケコ"));
61
88
  ### 面区点
62
89
 
63
90
  ```javascript
64
- import * as Mojix from "mojix";
65
-
66
91
  const data1 = Mojix.getMojiData(Mojix.codePointAt("髙"));
67
92
  console.log("区点:" + data1.encode.kuten.text + ", 漢字水準:" + data1.type.kanji_suijun);
68
- -> 区点:118-94, 漢字水準:0
93
+ // -> 区点:118-94, 漢字水準:0
69
94
  // ※髙は JIS X 0208 に登録されていないので、漢字水準は表示不可
70
95
 
71
96
  const data2 = Mojix.getMojiData(Mojix.codePointAt("圡"));
72
97
  console.log("面区点:" + data2.encode.menkuten.text + ", 漢字水準:" + data2.type.kanji_suijun);
73
- -> 面区点:1-15-35, 漢字水準:3
98
+ // -> 面区点:1-15-35, 漢字水準:3
74
99
 
75
100
  const data3 = Mojix.getMojiData(Mojix.codePointAt("唁"));
76
101
  console.log("面区点:" + data3.encode.menkuten.text + ", 漢字水準:" + data3.type.kanji_suijun);
77
- -> 面区点:2-3-93, 漢字水準:4
102
+ // -> 面区点:2-3-93, 漢字水準:4
78
103
  ```
79
104
 
80
105
  ### 自然順ソート
81
106
 
82
107
  ```javascript
83
- import * as Mojix from "mojix";
84
-
85
108
  console.log(["3", "02", "あ", "イ", "う", "1"].sort(Mojix.compareToForNatural));
86
109
  // -> [ '1', '02', '3', 'あ', 'イ', 'う' ]
87
110
  ```
@@ -175,35 +175,35 @@ class Unicode {
175
175
  "Khmer", "Mongolian", "Unified Canadian Aboriginal Syllabics Extended", "Limbu", "Tai Le", "New Tai Lue", "Khmer Symbols", "Buginese",
176
176
  "Tai Tham", "Combining Diacritical Marks Extended", "Balinese", "Sundanese", "Batak", "Lepcha", "Ol Chiki", "Cyrillic Extended-C",
177
177
  "Georgian Extended", "Sundanese Supplement", "Vedic Extensions", "Phonetic Extensions", "Phonetic Extensions Supplement", "Combining Diacritical Marks Supplement", "Latin Extended Additional", "Greek Extended",
178
- "General Punctuation", "Superscripts and Subscripts", "Currency Symbols", "Combining Diacritical Marks for Symbols", "Letterlike Symbols", "number Forms", "Arrows", "Mathematical Operators",
178
+ "General Punctuation", "Superscripts and Subscripts", "Currency Symbols", "Combining Diacritical Marks for Symbols", "Letterlike Symbols", "Number Forms", "Arrows", "Mathematical Operators",
179
179
  "Miscellaneous Technical", "Control Pictures", "Optical Character Recognition", "Enclosed Alphanumerics", "Box Drawing", "Block Elements", "Geometric Shapes", "Miscellaneous Symbols",
180
180
  "Dingbats", "Miscellaneous Mathematical Symbols-A", "Supplemental Arrows-A", "Braille Patterns", "Supplemental Arrows-B", "Miscellaneous Mathematical Symbols-B", "Supplemental Mathematical Operators", "Miscellaneous Symbols and Arrows",
181
181
  "Glagolitic", "Latin Extended-C", "Coptic", "Georgian Supplement", "Tifinagh", "Ethiopic Extended", "Cyrillic Extended-A", "Supplemental Punctuation",
182
182
  "CJK Radicals Supplement", "Kangxi Radicals", "Ideographic Description Characters", "CJK Symbols and Punctuation", "Hiragana", "Katakana", "Bopomofo", "Hangul Compatibility Jamo",
183
183
  "Kanbun", "Bopomofo Extended", "CJK Strokes", "Katakana Phonetic Extensions", "Enclosed CJK Letters and Months", "CJK Compatibility", "CJK Unified Ideographs Extension A", "Yijing Hexagram Symbols",
184
184
  "CJK Unified Ideographs", "Yi Syllables", "Yi Radicals", "Lisu", "Vai", "Cyrillic Extended-B", "Bamum", "Modifier Tone Letters",
185
- "Latin Extended-D", "Syloti Nagri", "Common Indic number Forms", "Phags-pa", "Saurashtra", "Devanagari Extended", "Kayah Li", "Rejang",
185
+ "Latin Extended-D", "Syloti Nagri", "Common Indic Number Forms", "Phags-pa", "Saurashtra", "Devanagari Extended", "Kayah Li", "Rejang",
186
186
  "Hangul Jamo Extended-A", "Javanese", "Myanmar Extended-B", "Cham", "Myanmar Extended-A", "Tai Viet", "Meetei Mayek Extensions", "Ethiopic Extended-A",
187
187
  "Latin Extended-E", "Cherokee Supplement", "Meetei Mayek", "Hangul Syllables", "Hangul Jamo Extended-B", "High Surrogates", "High Private Use Surrogates", "Low Surrogates",
188
188
  "Private Use Area", "CJK Compatibility Ideographs", "Alphabetic Presentation Forms", "Arabic Presentation Forms-A", "Variation Selectors", "Vertical Forms", "Combining Half Marks", "CJK Compatibility Forms",
189
- "Small Form Variants", "Arabic Presentation Forms-B", "Halfwidth and Fullwidth Forms", "Specials", "Linear B Syllabary", "Linear B Ideograms", "Aegean numbers", "Ancient Greek numbers",
190
- "Ancient Symbols", "Phaistos Disc", "Lycian", "Carian", "Coptic Epact numbers", "Old Italic", "Gothic", "Old Permic",
189
+ "Small Form Variants", "Arabic Presentation Forms-B", "Halfwidth and Fullwidth Forms", "Specials", "Linear B Syllabary", "Linear B Ideograms", "Aegean Numbers", "Ancient Greek Numbers",
190
+ "Ancient Symbols", "Phaistos Disc", "Lycian", "Carian", "Coptic Epact Numbers", "Old Italic", "Gothic", "Old Permic",
191
191
  "Ugaritic", "Old Persian", "Deseret", "Shavian", "Osmanya", "Osage", "Elbasan", "Caucasian Albanian",
192
192
  "Vithkuqi", "Linear A", "Latin Extended-F", "Cypriot Syllabary", "Imperial Aramaic", "Palmyrene", "Nabataean", "Hatran",
193
193
  "Phoenician", "Lydian", "Meroitic Hieroglyphs", "Meroitic Cursive", "Kharoshthi", "Old South Arabian", "Old North Arabian", "Manichaean",
194
194
  "Avestan", "Inscriptional Parthian", "Inscriptional Pahlavi", "Psalter Pahlavi", "Old Turkic", "Old Hungarian", "Hanifi Rohingya", "Rumi Numeral Symbols",
195
195
  "Yezidi", "Arabic Extended-C", "Old Sogdian", "Sogdian", "Old Uyghur", "Chorasmian", "Elymaic", "Brahmi",
196
- "Kaithi", "Sora Sompeng", "Chakma", "Mahajani", "Sharada", "Sinhala Archaic numbers", "Khojki", "Multani",
196
+ "Kaithi", "Sora Sompeng", "Chakma", "Mahajani", "Sharada", "Sinhala Archaic Numbers", "Khojki", "Multani",
197
197
  "Khudawadi", "Grantha", "Newa", "Tirhuta", "Siddham", "Modi", "Mongolian Supplement", "Takri",
198
198
  "Ahom", "Dogra", "Warang Citi", "Dives Akuru", "Nandinagari", "Zanabazar Square", "Soyombo", "Unified Canadian Aboriginal Syllabics Extended-A",
199
199
  "Pau Cin Hau", "Devanagari Extended-A", "Bhaiksuki", "Marchen", "Masaram Gondi", "Gunjala Gondi", "Makasar", "Kawi",
200
- "Lisu Supplement", "Tamil Supplement", "Cuneiform", "Cuneiform numbers and Punctuation", "Early Dynastic Cuneiform", "Cypro-Minoan", "Egyptian Hieroglyphs", "Egyptian Hieroglyph Format Controls",
200
+ "Lisu Supplement", "Tamil Supplement", "Cuneiform", "Cuneiform Numbers and Punctuation", "Early Dynastic Cuneiform", "Cypro-Minoan", "Egyptian Hieroglyphs", "Egyptian Hieroglyph Format Controls",
201
201
  "Anatolian Hieroglyphs", "Bamum Supplement", "Mro", "Tangsa", "Bassa Vah", "Pahawh Hmong", "Medefaidrin", "Miao",
202
202
  "Ideographic Symbols and Punctuation", "Tangut", "Tangut Components", "Khitan Small Script", "Tangut Supplement", "Kana Extended-B", "Kana Supplement", "Kana Extended-A",
203
203
  "Small Kana Extension", "Nushu", "Duployan", "Shorthand Format Controls", "Znamenny Musical Notation", "Byzantine Musical Symbols", "Musical Symbols", "Ancient Greek Musical Notation",
204
204
  "Kaktovik Numerals", "Mayan Numerals", "Tai Xuan Jing Symbols", "Counting Rod Numerals", "Mathematical Alphanumeric Symbols", "Sutton SignWriting", "Latin Extended-G", "Glagolitic Supplement",
205
205
  "Cyrillic Extended-D", "Nyiakeng Puachue Hmong", "Toto", "Wancho", "Nag Mundari", "Ethiopic Extended-B", "Mende Kikakui", "Adlam",
206
- "Indic Siyaq numbers", "Ottoman Siyaq numbers", "Arabic Mathematical Alphabetic Symbols", "Mahjong Tiles", "Domino Tiles", "Playing Cards", "Enclosed Alphanumeric Supplement", "Enclosed Ideographic Supplement",
206
+ "Indic Siyaq Numbers", "Ottoman Siyaq Numbers", "Arabic Mathematical Alphabetic Symbols", "Mahjong Tiles", "Domino Tiles", "Playing Cards", "Enclosed Alphanumeric Supplement", "Enclosed Ideographic Supplement",
207
207
  "Miscellaneous Symbols and Pictographs", "Emoticons", "Ornamental Dingbats", "Transport and Map Symbols", "Alchemical Symbols", "Geometric Shapes Extended", "Supplemental Arrows-C", "Supplemental Symbols and Pictographs",
208
208
  "Chess Symbols", "Symbols and Pictographs Extended-A", "Symbols for Legacy Computing", "CJK Unified Ideographs Extension B", "CJK Unified Ideographs Extension C", "CJK Unified Ideographs Extension D", "CJK Unified Ideographs Extension E", "CJK Unified Ideographs Extension F", "CJK Unified Ideographs Extension I",
209
209
  "CJK Compatibility Ideographs Supplement", "CJK Unified Ideographs Extension G", "CJK Unified Ideographs Extension H", "CJK Unified Ideographs Extension J", "Tags", "Variation Selectors Supplement", "Supplementary Private Use Area-A", "Supplementary Private Use Area-B"
@@ -2003,7 +2003,7 @@ class CP932 {
2003
2003
  * 指定した文字から Windows-31J 上の区点番号に変換
2004
2004
  * - 2文字以上を指定した場合は、1文字目のみを変換する
2005
2005
  * @param {string} text - 変換したいテキスト
2006
- * @returns {import('./SJIS.js').MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
2006
+ * @returns {MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
2007
2007
  */
2008
2008
  static toKuTen(text) {
2009
2009
  if (text.length === 0) {
@@ -2015,7 +2015,7 @@ class CP932 {
2015
2015
 
2016
2016
  /**
2017
2017
  * Windows-31J 上の区点番号から文字列に変換
2018
- * @param {import('./SJIS.js').MenKuTen|string} kuten - 区点番号
2018
+ * @param {MenKuTen|string} kuten - 区点番号
2019
2019
  * @returns {string} 変換後のテキスト
2020
2020
  */
2021
2021
  static fromKuTen(kuten) {
@@ -2528,7 +2528,7 @@ class SJIS2004 {
2528
2528
  * 指定した文字から Shift_JIS-2004 上の面区点番号に変換
2529
2529
  * - 2文字以上を指定した場合は、1文字目のみを変換する
2530
2530
  * @param {string} text - 変換したいテキスト
2531
- * @returns {import('./SJIS.js').MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
2531
+ * @returns {MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
2532
2532
  */
2533
2533
  static toMenKuTen(text) {
2534
2534
  if (text.length === 0) {
@@ -2540,7 +2540,7 @@ class SJIS2004 {
2540
2540
 
2541
2541
  /**
2542
2542
  * Shift_JIS-2004 上の面区点番号から文字列に変換
2543
- * @param {import('./SJIS.js').MenKuTen|string} menkuten - 面区点番号
2543
+ * @param {MenKuTen|string} menkuten - 面区点番号
2544
2544
  * @returns {string} 変換後のテキスト
2545
2545
  */
2546
2546
  static fromMenKuTen(menkuten) {
@@ -4262,23 +4262,28 @@ class Japanese {
4262
4262
  return "";
4263
4263
  }
4264
4264
  for (let i = 0; i < moji_array.length; i++) {
4265
+ // 文字データ
4266
+ const moji = moji_array[i];
4265
4267
  // 1文字目の横幅を取得
4266
- const ch = moji_array[i][0];
4268
+ const cp = moji[0];
4269
+ // ASCII文字, 半角カタカナ, Regional Indicator(単体)
4267
4270
  // prettier-ignore
4268
- const ch_size = ch < 0x80 || (0xFF61 <= ch && ch < 0xFFA0) ? 1 : 2;
4271
+ const cp_size = cp < 0x80
4272
+ || (0xFF61 <= cp && cp < 0xFFA0)
4273
+ || (moji.length === 1 && Unicode.isRegionalIndicatorFromCodePoint(cp)) ? 1 : 2;
4269
4274
  if (position >= offset) {
4270
4275
  is_target = true;
4271
- if (cut_size >= ch_size) {
4272
- output.push(moji_array[i]);
4276
+ if (cut_size >= cp_size) {
4277
+ output.push(moji);
4273
4278
  } else {
4274
4279
  output.push(SPACE);
4275
4280
  }
4276
- cut_size -= ch_size;
4281
+ cut_size -= cp_size;
4277
4282
  if (cut_size <= 0) {
4278
4283
  break;
4279
4284
  }
4280
4285
  }
4281
- position += ch_size;
4286
+ position += cp_size;
4282
4287
  // 2バイト文字の途中をoffset指定していた場合になる。
4283
4288
  if (position - 1 >= offset && !is_target) {
4284
4289
  cut_size--;
@@ -4684,8 +4689,8 @@ class MojiAnalizerTools {
4684
4689
  /**
4685
4690
  * 文字のエンコード情報
4686
4691
  * @typedef {Object} MojiEncodeData
4687
- * @property {import('../encode/SJIS.js').MenKuTen} kuten 区点 コード
4688
- * @property {import('../encode/SJIS.js').MenKuTen} menkuten 面区点 コード
4692
+ * @property {MenKuTen} kuten 区点 コード
4693
+ * @property {MenKuTen} menkuten 面区点 コード
4689
4694
  * @property {number} cp932_code CP932(Windows-31J) コード
4690
4695
  * @property {number} sjis2004_code Shift_JIS-2004 コード
4691
4696
  * @property {number[]} utf8_array UTF-8 配列
@@ -5447,7 +5452,7 @@ class Mojix {
5447
5452
  * 指定した文字から Windows-31J 上の区点番号に変換
5448
5453
  * - 2文字以上を指定した場合は、1文字目のみを変換する
5449
5454
  * @param {string} text - 変換したいテキスト
5450
- * @returns {import('./encode/SJIS.js').MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
5455
+ * @returns {MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
5451
5456
  */
5452
5457
  static toKuTen(text) {
5453
5458
  return CP932.toKuTen(text);
@@ -5455,7 +5460,7 @@ class Mojix {
5455
5460
 
5456
5461
  /**
5457
5462
  * Windows-31J 上の区点番号から文字列に変換
5458
- * @param {import('./encode/SJIS.js').MenKuTen|string} kuten - 区点番号
5463
+ * @param {MenKuTen|string} kuten - 区点番号
5459
5464
  * @returns {string} 変換後のテキスト
5460
5465
  */
5461
5466
  static fromKuTen(kuten) {
@@ -5466,7 +5471,7 @@ class Mojix {
5466
5471
  * 指定した文字から Shift_JIS-2004 上の面区点番号に変換
5467
5472
  * - 2文字以上を指定した場合は、1文字目のみを変換する
5468
5473
  * @param {string} text - 変換したいテキスト
5469
- * @returns {import('./encode/SJIS.js').MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
5474
+ * @returns {MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
5470
5475
  */
5471
5476
  static toMenKuTen(text) {
5472
5477
  return SJIS2004.toMenKuTen(text);
@@ -5474,7 +5479,7 @@ class Mojix {
5474
5479
 
5475
5480
  /**
5476
5481
  * Shift_JIS-2004 上の面区点番号から文字列に変換
5477
- * @param {import('./encode/SJIS.js').MenKuTen|string} menkuten - 面区点番号
5482
+ * @param {MenKuTen|string} menkuten - 面区点番号
5478
5483
  * @returns {string} 変換後のテキスト
5479
5484
  */
5480
5485
  static fromMenKuTen(menkuten) {
@@ -5654,7 +5659,7 @@ class Mojix {
5654
5659
  /**
5655
5660
  * 指定した1つのUTF-32 コードポイントに関して、解析を行い情報を返します
5656
5661
  * @param {number} unicode_codepoint - UTF-32 のコードポイント
5657
- * @returns {import('./tools/MojiAnalyzer.js').MojiData} 文字の情報がつまったオブジェクト
5662
+ * @returns {MojiData} 文字の情報がつまったオブジェクト
5658
5663
  */
5659
5664
  static getMojiData(unicode_codepoint) {
5660
5665
  return MojiAnalyzer.getMojiData(unicode_codepoint);
package/dist/esm/mojix.js CHANGED
@@ -173,35 +173,35 @@ class Unicode {
173
173
  "Khmer", "Mongolian", "Unified Canadian Aboriginal Syllabics Extended", "Limbu", "Tai Le", "New Tai Lue", "Khmer Symbols", "Buginese",
174
174
  "Tai Tham", "Combining Diacritical Marks Extended", "Balinese", "Sundanese", "Batak", "Lepcha", "Ol Chiki", "Cyrillic Extended-C",
175
175
  "Georgian Extended", "Sundanese Supplement", "Vedic Extensions", "Phonetic Extensions", "Phonetic Extensions Supplement", "Combining Diacritical Marks Supplement", "Latin Extended Additional", "Greek Extended",
176
- "General Punctuation", "Superscripts and Subscripts", "Currency Symbols", "Combining Diacritical Marks for Symbols", "Letterlike Symbols", "number Forms", "Arrows", "Mathematical Operators",
176
+ "General Punctuation", "Superscripts and Subscripts", "Currency Symbols", "Combining Diacritical Marks for Symbols", "Letterlike Symbols", "Number Forms", "Arrows", "Mathematical Operators",
177
177
  "Miscellaneous Technical", "Control Pictures", "Optical Character Recognition", "Enclosed Alphanumerics", "Box Drawing", "Block Elements", "Geometric Shapes", "Miscellaneous Symbols",
178
178
  "Dingbats", "Miscellaneous Mathematical Symbols-A", "Supplemental Arrows-A", "Braille Patterns", "Supplemental Arrows-B", "Miscellaneous Mathematical Symbols-B", "Supplemental Mathematical Operators", "Miscellaneous Symbols and Arrows",
179
179
  "Glagolitic", "Latin Extended-C", "Coptic", "Georgian Supplement", "Tifinagh", "Ethiopic Extended", "Cyrillic Extended-A", "Supplemental Punctuation",
180
180
  "CJK Radicals Supplement", "Kangxi Radicals", "Ideographic Description Characters", "CJK Symbols and Punctuation", "Hiragana", "Katakana", "Bopomofo", "Hangul Compatibility Jamo",
181
181
  "Kanbun", "Bopomofo Extended", "CJK Strokes", "Katakana Phonetic Extensions", "Enclosed CJK Letters and Months", "CJK Compatibility", "CJK Unified Ideographs Extension A", "Yijing Hexagram Symbols",
182
182
  "CJK Unified Ideographs", "Yi Syllables", "Yi Radicals", "Lisu", "Vai", "Cyrillic Extended-B", "Bamum", "Modifier Tone Letters",
183
- "Latin Extended-D", "Syloti Nagri", "Common Indic number Forms", "Phags-pa", "Saurashtra", "Devanagari Extended", "Kayah Li", "Rejang",
183
+ "Latin Extended-D", "Syloti Nagri", "Common Indic Number Forms", "Phags-pa", "Saurashtra", "Devanagari Extended", "Kayah Li", "Rejang",
184
184
  "Hangul Jamo Extended-A", "Javanese", "Myanmar Extended-B", "Cham", "Myanmar Extended-A", "Tai Viet", "Meetei Mayek Extensions", "Ethiopic Extended-A",
185
185
  "Latin Extended-E", "Cherokee Supplement", "Meetei Mayek", "Hangul Syllables", "Hangul Jamo Extended-B", "High Surrogates", "High Private Use Surrogates", "Low Surrogates",
186
186
  "Private Use Area", "CJK Compatibility Ideographs", "Alphabetic Presentation Forms", "Arabic Presentation Forms-A", "Variation Selectors", "Vertical Forms", "Combining Half Marks", "CJK Compatibility Forms",
187
- "Small Form Variants", "Arabic Presentation Forms-B", "Halfwidth and Fullwidth Forms", "Specials", "Linear B Syllabary", "Linear B Ideograms", "Aegean numbers", "Ancient Greek numbers",
188
- "Ancient Symbols", "Phaistos Disc", "Lycian", "Carian", "Coptic Epact numbers", "Old Italic", "Gothic", "Old Permic",
187
+ "Small Form Variants", "Arabic Presentation Forms-B", "Halfwidth and Fullwidth Forms", "Specials", "Linear B Syllabary", "Linear B Ideograms", "Aegean Numbers", "Ancient Greek Numbers",
188
+ "Ancient Symbols", "Phaistos Disc", "Lycian", "Carian", "Coptic Epact Numbers", "Old Italic", "Gothic", "Old Permic",
189
189
  "Ugaritic", "Old Persian", "Deseret", "Shavian", "Osmanya", "Osage", "Elbasan", "Caucasian Albanian",
190
190
  "Vithkuqi", "Linear A", "Latin Extended-F", "Cypriot Syllabary", "Imperial Aramaic", "Palmyrene", "Nabataean", "Hatran",
191
191
  "Phoenician", "Lydian", "Meroitic Hieroglyphs", "Meroitic Cursive", "Kharoshthi", "Old South Arabian", "Old North Arabian", "Manichaean",
192
192
  "Avestan", "Inscriptional Parthian", "Inscriptional Pahlavi", "Psalter Pahlavi", "Old Turkic", "Old Hungarian", "Hanifi Rohingya", "Rumi Numeral Symbols",
193
193
  "Yezidi", "Arabic Extended-C", "Old Sogdian", "Sogdian", "Old Uyghur", "Chorasmian", "Elymaic", "Brahmi",
194
- "Kaithi", "Sora Sompeng", "Chakma", "Mahajani", "Sharada", "Sinhala Archaic numbers", "Khojki", "Multani",
194
+ "Kaithi", "Sora Sompeng", "Chakma", "Mahajani", "Sharada", "Sinhala Archaic Numbers", "Khojki", "Multani",
195
195
  "Khudawadi", "Grantha", "Newa", "Tirhuta", "Siddham", "Modi", "Mongolian Supplement", "Takri",
196
196
  "Ahom", "Dogra", "Warang Citi", "Dives Akuru", "Nandinagari", "Zanabazar Square", "Soyombo", "Unified Canadian Aboriginal Syllabics Extended-A",
197
197
  "Pau Cin Hau", "Devanagari Extended-A", "Bhaiksuki", "Marchen", "Masaram Gondi", "Gunjala Gondi", "Makasar", "Kawi",
198
- "Lisu Supplement", "Tamil Supplement", "Cuneiform", "Cuneiform numbers and Punctuation", "Early Dynastic Cuneiform", "Cypro-Minoan", "Egyptian Hieroglyphs", "Egyptian Hieroglyph Format Controls",
198
+ "Lisu Supplement", "Tamil Supplement", "Cuneiform", "Cuneiform Numbers and Punctuation", "Early Dynastic Cuneiform", "Cypro-Minoan", "Egyptian Hieroglyphs", "Egyptian Hieroglyph Format Controls",
199
199
  "Anatolian Hieroglyphs", "Bamum Supplement", "Mro", "Tangsa", "Bassa Vah", "Pahawh Hmong", "Medefaidrin", "Miao",
200
200
  "Ideographic Symbols and Punctuation", "Tangut", "Tangut Components", "Khitan Small Script", "Tangut Supplement", "Kana Extended-B", "Kana Supplement", "Kana Extended-A",
201
201
  "Small Kana Extension", "Nushu", "Duployan", "Shorthand Format Controls", "Znamenny Musical Notation", "Byzantine Musical Symbols", "Musical Symbols", "Ancient Greek Musical Notation",
202
202
  "Kaktovik Numerals", "Mayan Numerals", "Tai Xuan Jing Symbols", "Counting Rod Numerals", "Mathematical Alphanumeric Symbols", "Sutton SignWriting", "Latin Extended-G", "Glagolitic Supplement",
203
203
  "Cyrillic Extended-D", "Nyiakeng Puachue Hmong", "Toto", "Wancho", "Nag Mundari", "Ethiopic Extended-B", "Mende Kikakui", "Adlam",
204
- "Indic Siyaq numbers", "Ottoman Siyaq numbers", "Arabic Mathematical Alphabetic Symbols", "Mahjong Tiles", "Domino Tiles", "Playing Cards", "Enclosed Alphanumeric Supplement", "Enclosed Ideographic Supplement",
204
+ "Indic Siyaq Numbers", "Ottoman Siyaq Numbers", "Arabic Mathematical Alphabetic Symbols", "Mahjong Tiles", "Domino Tiles", "Playing Cards", "Enclosed Alphanumeric Supplement", "Enclosed Ideographic Supplement",
205
205
  "Miscellaneous Symbols and Pictographs", "Emoticons", "Ornamental Dingbats", "Transport and Map Symbols", "Alchemical Symbols", "Geometric Shapes Extended", "Supplemental Arrows-C", "Supplemental Symbols and Pictographs",
206
206
  "Chess Symbols", "Symbols and Pictographs Extended-A", "Symbols for Legacy Computing", "CJK Unified Ideographs Extension B", "CJK Unified Ideographs Extension C", "CJK Unified Ideographs Extension D", "CJK Unified Ideographs Extension E", "CJK Unified Ideographs Extension F", "CJK Unified Ideographs Extension I",
207
207
  "CJK Compatibility Ideographs Supplement", "CJK Unified Ideographs Extension G", "CJK Unified Ideographs Extension H", "CJK Unified Ideographs Extension J", "Tags", "Variation Selectors Supplement", "Supplementary Private Use Area-A", "Supplementary Private Use Area-B"
@@ -2001,7 +2001,7 @@ class CP932 {
2001
2001
  * 指定した文字から Windows-31J 上の区点番号に変換
2002
2002
  * - 2文字以上を指定した場合は、1文字目のみを変換する
2003
2003
  * @param {string} text - 変換したいテキスト
2004
- * @returns {import('./SJIS.js').MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
2004
+ * @returns {MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
2005
2005
  */
2006
2006
  static toKuTen(text) {
2007
2007
  if (text.length === 0) {
@@ -2013,7 +2013,7 @@ class CP932 {
2013
2013
 
2014
2014
  /**
2015
2015
  * Windows-31J 上の区点番号から文字列に変換
2016
- * @param {import('./SJIS.js').MenKuTen|string} kuten - 区点番号
2016
+ * @param {MenKuTen|string} kuten - 区点番号
2017
2017
  * @returns {string} 変換後のテキスト
2018
2018
  */
2019
2019
  static fromKuTen(kuten) {
@@ -2526,7 +2526,7 @@ class SJIS2004 {
2526
2526
  * 指定した文字から Shift_JIS-2004 上の面区点番号に変換
2527
2527
  * - 2文字以上を指定した場合は、1文字目のみを変換する
2528
2528
  * @param {string} text - 変換したいテキスト
2529
- * @returns {import('./SJIS.js').MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
2529
+ * @returns {MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
2530
2530
  */
2531
2531
  static toMenKuTen(text) {
2532
2532
  if (text.length === 0) {
@@ -2538,7 +2538,7 @@ class SJIS2004 {
2538
2538
 
2539
2539
  /**
2540
2540
  * Shift_JIS-2004 上の面区点番号から文字列に変換
2541
- * @param {import('./SJIS.js').MenKuTen|string} menkuten - 面区点番号
2541
+ * @param {MenKuTen|string} menkuten - 面区点番号
2542
2542
  * @returns {string} 変換後のテキスト
2543
2543
  */
2544
2544
  static fromMenKuTen(menkuten) {
@@ -4260,23 +4260,28 @@ class Japanese {
4260
4260
  return "";
4261
4261
  }
4262
4262
  for (let i = 0; i < moji_array.length; i++) {
4263
+ // 文字データ
4264
+ const moji = moji_array[i];
4263
4265
  // 1文字目の横幅を取得
4264
- const ch = moji_array[i][0];
4266
+ const cp = moji[0];
4267
+ // ASCII文字, 半角カタカナ, Regional Indicator(単体)
4265
4268
  // prettier-ignore
4266
- const ch_size = ch < 0x80 || (0xFF61 <= ch && ch < 0xFFA0) ? 1 : 2;
4269
+ const cp_size = cp < 0x80
4270
+ || (0xFF61 <= cp && cp < 0xFFA0)
4271
+ || (moji.length === 1 && Unicode.isRegionalIndicatorFromCodePoint(cp)) ? 1 : 2;
4267
4272
  if (position >= offset) {
4268
4273
  is_target = true;
4269
- if (cut_size >= ch_size) {
4270
- output.push(moji_array[i]);
4274
+ if (cut_size >= cp_size) {
4275
+ output.push(moji);
4271
4276
  } else {
4272
4277
  output.push(SPACE);
4273
4278
  }
4274
- cut_size -= ch_size;
4279
+ cut_size -= cp_size;
4275
4280
  if (cut_size <= 0) {
4276
4281
  break;
4277
4282
  }
4278
4283
  }
4279
- position += ch_size;
4284
+ position += cp_size;
4280
4285
  // 2バイト文字の途中をoffset指定していた場合になる。
4281
4286
  if (position - 1 >= offset && !is_target) {
4282
4287
  cut_size--;
@@ -4682,8 +4687,8 @@ class MojiAnalizerTools {
4682
4687
  /**
4683
4688
  * 文字のエンコード情報
4684
4689
  * @typedef {Object} MojiEncodeData
4685
- * @property {import('../encode/SJIS.js').MenKuTen} kuten 区点 コード
4686
- * @property {import('../encode/SJIS.js').MenKuTen} menkuten 面区点 コード
4690
+ * @property {MenKuTen} kuten 区点 コード
4691
+ * @property {MenKuTen} menkuten 面区点 コード
4687
4692
  * @property {number} cp932_code CP932(Windows-31J) コード
4688
4693
  * @property {number} sjis2004_code Shift_JIS-2004 コード
4689
4694
  * @property {number[]} utf8_array UTF-8 配列
@@ -5445,7 +5450,7 @@ class Mojix {
5445
5450
  * 指定した文字から Windows-31J 上の区点番号に変換
5446
5451
  * - 2文字以上を指定した場合は、1文字目のみを変換する
5447
5452
  * @param {string} text - 変換したいテキスト
5448
- * @returns {import('./encode/SJIS.js').MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
5453
+ * @returns {MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
5449
5454
  */
5450
5455
  static toKuTen(text) {
5451
5456
  return CP932.toKuTen(text);
@@ -5453,7 +5458,7 @@ class Mojix {
5453
5458
 
5454
5459
  /**
5455
5460
  * Windows-31J 上の区点番号から文字列に変換
5456
- * @param {import('./encode/SJIS.js').MenKuTen|string} kuten - 区点番号
5461
+ * @param {MenKuTen|string} kuten - 区点番号
5457
5462
  * @returns {string} 変換後のテキスト
5458
5463
  */
5459
5464
  static fromKuTen(kuten) {
@@ -5464,7 +5469,7 @@ class Mojix {
5464
5469
  * 指定した文字から Shift_JIS-2004 上の面区点番号に変換
5465
5470
  * - 2文字以上を指定した場合は、1文字目のみを変換する
5466
5471
  * @param {string} text - 変換したいテキスト
5467
- * @returns {import('./encode/SJIS.js').MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
5472
+ * @returns {MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
5468
5473
  */
5469
5474
  static toMenKuTen(text) {
5470
5475
  return SJIS2004.toMenKuTen(text);
@@ -5472,7 +5477,7 @@ class Mojix {
5472
5477
 
5473
5478
  /**
5474
5479
  * Shift_JIS-2004 上の面区点番号から文字列に変換
5475
- * @param {import('./encode/SJIS.js').MenKuTen|string} menkuten - 面区点番号
5480
+ * @param {MenKuTen|string} menkuten - 面区点番号
5476
5481
  * @returns {string} 変換後のテキスト
5477
5482
  */
5478
5483
  static fromMenKuTen(menkuten) {
@@ -5652,7 +5657,7 @@ class Mojix {
5652
5657
  /**
5653
5658
  * 指定した1つのUTF-32 コードポイントに関して、解析を行い情報を返します
5654
5659
  * @param {number} unicode_codepoint - UTF-32 のコードポイント
5655
- * @returns {import('./tools/MojiAnalyzer.js').MojiData} 文字の情報がつまったオブジェクト
5660
+ * @returns {MojiData} 文字の情報がつまったオブジェクト
5656
5661
  */
5657
5662
  static getMojiData(unicode_codepoint) {
5658
5663
  return MojiAnalyzer.getMojiData(unicode_codepoint);