mojix 0.0.2 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -19
- package/dist/cjs/{mojix.js → mojix.cjs} +29 -24
- package/dist/esm/mojix.js +29 -24
- package/dist/umd/mojix.js +30 -25
- package/dist/umd/mojix.min.js +1 -1
- package/package.json +26 -5
package/README.md
CHANGED
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
# Mojix
|
|
2
2
|
|
|
3
|
-
[](https://natade-jp.github.io/mojix/)
|
|
4
4
|

|
|
5
5
|
|
|
6
|
-
> ⚠️ **Work in progress**
|
|
7
6
|
> Mojix is the successor of **MojiJS**.
|
|
8
7
|
> This project was renamed to avoid confusion with other libraries named "`moji`" or "`moji.js`".
|
|
9
8
|
|
|
10
9
|
## What
|
|
11
10
|
|
|
12
11
|
- 日本語・Unicode 文字データを解析および変換するライブラリです。
|
|
12
|
+
- [詳細な API を公開しています。](https://natade-jp.github.io/mojix/)
|
|
13
|
+
- [動作例](https://natade-jp.github.io/mojix/examples/) (コンソール及び[ソースコード](https://natade-jp.github.io/mojix/examples/main.js)を確認してみてください。)
|
|
14
|
+
- [npm](https://www.npmjs.com/package/mojix)
|
|
13
15
|
- MojiJS の後継ライブラリとして開発されています。
|
|
14
|
-
- API や機能は基本的に MojiJS を引き継いでいます。
|
|
15
|
-
|
|
16
|
-
> ⚠️ 注意
|
|
17
|
-
> Mojix では ECMAScript 3 のサポートを終了しました。
|
|
18
|
-
> モダンな JavaScript 実行環境(ES2015 以降)を前提としています。
|
|
16
|
+
- API や機能は基本的に [MojiJS](https://github.com/natade-jp/MojiJS) を引き継いでいます。
|
|
17
|
+
- Mojix では ECMAScript 3 をサポートしていません。JScript 実行環境が必要な場合は、[MojiJS](https://github.com/natade-jp/MojiJS) をご利用ください。
|
|
19
18
|
|
|
20
19
|
### 主な機能
|
|
21
20
|
|
|
@@ -33,15 +32,45 @@
|
|
|
33
32
|
npm install mojix
|
|
34
33
|
```
|
|
35
34
|
|
|
36
|
-
|
|
35
|
+
### ESM
|
|
36
|
+
|
|
37
|
+
- `sample.js`
|
|
38
|
+
|
|
39
|
+
```javascript
|
|
40
|
+
import Mojix from "mojix";
|
|
41
|
+
console.log(Mojix);
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### CommonJS
|
|
45
|
+
|
|
46
|
+
- `sample.cjs`
|
|
47
|
+
|
|
48
|
+
```javascript
|
|
49
|
+
const Mojix = require("mojix");
|
|
50
|
+
console.log(Mojix);
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Browser(umd)
|
|
54
|
+
|
|
55
|
+
- `sample.html`
|
|
56
|
+
|
|
57
|
+
After loading the script, `Mojix` will be available on `globalThis`.
|
|
58
|
+
|
|
59
|
+
```html
|
|
60
|
+
<script src=".../umd/mojix.min.js" charset="utf-8"></script>
|
|
61
|
+
<script>
|
|
62
|
+
/** @typedef {typeof import('.../types/mojix').default} MojixClass */
|
|
63
|
+
/** @type {MojixClass} */
|
|
64
|
+
const Mojix = /** @type {any} */ (globalThis).Mojix;
|
|
65
|
+
console.log(Mojix);
|
|
66
|
+
</script>
|
|
67
|
+
```
|
|
37
68
|
|
|
38
69
|
## Sample
|
|
39
70
|
|
|
40
71
|
### エンコード
|
|
41
72
|
|
|
42
73
|
```javascript
|
|
43
|
-
import * as Mojix from "mojix";
|
|
44
|
-
|
|
45
74
|
console.log(Mojix.encode("圡①靁謹𪘂麵", "shift_jis-2004"));
|
|
46
75
|
// -> [ 136, 98, 135, 64, 251, 154, 238, 174, 252, 238, 239, 238 ]
|
|
47
76
|
|
|
@@ -52,8 +81,6 @@ console.log(Mojix.decode([0x61, 0xE3, 0x81, 0x82], "utf-8"));
|
|
|
52
81
|
### 日本語の変換
|
|
53
82
|
|
|
54
83
|
```javascript
|
|
55
|
-
import * as Mojix from "mojix";
|
|
56
|
-
|
|
57
84
|
console.log(Mojix.toHiragana("カキクケコ"));
|
|
58
85
|
// -> かきくけこ
|
|
59
86
|
```
|
|
@@ -61,27 +88,23 @@ console.log(Mojix.toHiragana("カキクケコ"));
|
|
|
61
88
|
### 面区点
|
|
62
89
|
|
|
63
90
|
```javascript
|
|
64
|
-
import * as Mojix from "mojix";
|
|
65
|
-
|
|
66
91
|
const data1 = Mojix.getMojiData(Mojix.codePointAt("髙"));
|
|
67
92
|
console.log("区点:" + data1.encode.kuten.text + ", 漢字水準:" + data1.type.kanji_suijun);
|
|
68
|
-
-> 区点:118-94, 漢字水準:0
|
|
93
|
+
// -> 区点:118-94, 漢字水準:0
|
|
69
94
|
// ※髙は JIS X 0208 に登録されていないので、漢字水準は表示不可
|
|
70
95
|
|
|
71
96
|
const data2 = Mojix.getMojiData(Mojix.codePointAt("圡"));
|
|
72
97
|
console.log("面区点:" + data2.encode.menkuten.text + ", 漢字水準:" + data2.type.kanji_suijun);
|
|
73
|
-
-> 面区点:1-15-35, 漢字水準:3
|
|
98
|
+
// -> 面区点:1-15-35, 漢字水準:3
|
|
74
99
|
|
|
75
100
|
const data3 = Mojix.getMojiData(Mojix.codePointAt("唁"));
|
|
76
101
|
console.log("面区点:" + data3.encode.menkuten.text + ", 漢字水準:" + data3.type.kanji_suijun);
|
|
77
|
-
-> 面区点:2-3-93, 漢字水準:4
|
|
102
|
+
// -> 面区点:2-3-93, 漢字水準:4
|
|
78
103
|
```
|
|
79
104
|
|
|
80
105
|
### 自然順ソート
|
|
81
106
|
|
|
82
107
|
```javascript
|
|
83
|
-
import * as Mojix from "mojix";
|
|
84
|
-
|
|
85
108
|
console.log(["3", "02", "あ", "イ", "う", "1"].sort(Mojix.compareToForNatural));
|
|
86
109
|
// -> [ '1', '02', '3', 'あ', 'イ', 'う' ]
|
|
87
110
|
```
|
|
@@ -175,35 +175,35 @@ class Unicode {
|
|
|
175
175
|
"Khmer", "Mongolian", "Unified Canadian Aboriginal Syllabics Extended", "Limbu", "Tai Le", "New Tai Lue", "Khmer Symbols", "Buginese",
|
|
176
176
|
"Tai Tham", "Combining Diacritical Marks Extended", "Balinese", "Sundanese", "Batak", "Lepcha", "Ol Chiki", "Cyrillic Extended-C",
|
|
177
177
|
"Georgian Extended", "Sundanese Supplement", "Vedic Extensions", "Phonetic Extensions", "Phonetic Extensions Supplement", "Combining Diacritical Marks Supplement", "Latin Extended Additional", "Greek Extended",
|
|
178
|
-
"General Punctuation", "Superscripts and Subscripts", "Currency Symbols", "Combining Diacritical Marks for Symbols", "Letterlike Symbols", "
|
|
178
|
+
"General Punctuation", "Superscripts and Subscripts", "Currency Symbols", "Combining Diacritical Marks for Symbols", "Letterlike Symbols", "Number Forms", "Arrows", "Mathematical Operators",
|
|
179
179
|
"Miscellaneous Technical", "Control Pictures", "Optical Character Recognition", "Enclosed Alphanumerics", "Box Drawing", "Block Elements", "Geometric Shapes", "Miscellaneous Symbols",
|
|
180
180
|
"Dingbats", "Miscellaneous Mathematical Symbols-A", "Supplemental Arrows-A", "Braille Patterns", "Supplemental Arrows-B", "Miscellaneous Mathematical Symbols-B", "Supplemental Mathematical Operators", "Miscellaneous Symbols and Arrows",
|
|
181
181
|
"Glagolitic", "Latin Extended-C", "Coptic", "Georgian Supplement", "Tifinagh", "Ethiopic Extended", "Cyrillic Extended-A", "Supplemental Punctuation",
|
|
182
182
|
"CJK Radicals Supplement", "Kangxi Radicals", "Ideographic Description Characters", "CJK Symbols and Punctuation", "Hiragana", "Katakana", "Bopomofo", "Hangul Compatibility Jamo",
|
|
183
183
|
"Kanbun", "Bopomofo Extended", "CJK Strokes", "Katakana Phonetic Extensions", "Enclosed CJK Letters and Months", "CJK Compatibility", "CJK Unified Ideographs Extension A", "Yijing Hexagram Symbols",
|
|
184
184
|
"CJK Unified Ideographs", "Yi Syllables", "Yi Radicals", "Lisu", "Vai", "Cyrillic Extended-B", "Bamum", "Modifier Tone Letters",
|
|
185
|
-
"Latin Extended-D", "Syloti Nagri", "Common Indic
|
|
185
|
+
"Latin Extended-D", "Syloti Nagri", "Common Indic Number Forms", "Phags-pa", "Saurashtra", "Devanagari Extended", "Kayah Li", "Rejang",
|
|
186
186
|
"Hangul Jamo Extended-A", "Javanese", "Myanmar Extended-B", "Cham", "Myanmar Extended-A", "Tai Viet", "Meetei Mayek Extensions", "Ethiopic Extended-A",
|
|
187
187
|
"Latin Extended-E", "Cherokee Supplement", "Meetei Mayek", "Hangul Syllables", "Hangul Jamo Extended-B", "High Surrogates", "High Private Use Surrogates", "Low Surrogates",
|
|
188
188
|
"Private Use Area", "CJK Compatibility Ideographs", "Alphabetic Presentation Forms", "Arabic Presentation Forms-A", "Variation Selectors", "Vertical Forms", "Combining Half Marks", "CJK Compatibility Forms",
|
|
189
|
-
"Small Form Variants", "Arabic Presentation Forms-B", "Halfwidth and Fullwidth Forms", "Specials", "Linear B Syllabary", "Linear B Ideograms", "Aegean
|
|
190
|
-
"Ancient Symbols", "Phaistos Disc", "Lycian", "Carian", "Coptic Epact
|
|
189
|
+
"Small Form Variants", "Arabic Presentation Forms-B", "Halfwidth and Fullwidth Forms", "Specials", "Linear B Syllabary", "Linear B Ideograms", "Aegean Numbers", "Ancient Greek Numbers",
|
|
190
|
+
"Ancient Symbols", "Phaistos Disc", "Lycian", "Carian", "Coptic Epact Numbers", "Old Italic", "Gothic", "Old Permic",
|
|
191
191
|
"Ugaritic", "Old Persian", "Deseret", "Shavian", "Osmanya", "Osage", "Elbasan", "Caucasian Albanian",
|
|
192
192
|
"Vithkuqi", "Linear A", "Latin Extended-F", "Cypriot Syllabary", "Imperial Aramaic", "Palmyrene", "Nabataean", "Hatran",
|
|
193
193
|
"Phoenician", "Lydian", "Meroitic Hieroglyphs", "Meroitic Cursive", "Kharoshthi", "Old South Arabian", "Old North Arabian", "Manichaean",
|
|
194
194
|
"Avestan", "Inscriptional Parthian", "Inscriptional Pahlavi", "Psalter Pahlavi", "Old Turkic", "Old Hungarian", "Hanifi Rohingya", "Rumi Numeral Symbols",
|
|
195
195
|
"Yezidi", "Arabic Extended-C", "Old Sogdian", "Sogdian", "Old Uyghur", "Chorasmian", "Elymaic", "Brahmi",
|
|
196
|
-
"Kaithi", "Sora Sompeng", "Chakma", "Mahajani", "Sharada", "Sinhala Archaic
|
|
196
|
+
"Kaithi", "Sora Sompeng", "Chakma", "Mahajani", "Sharada", "Sinhala Archaic Numbers", "Khojki", "Multani",
|
|
197
197
|
"Khudawadi", "Grantha", "Newa", "Tirhuta", "Siddham", "Modi", "Mongolian Supplement", "Takri",
|
|
198
198
|
"Ahom", "Dogra", "Warang Citi", "Dives Akuru", "Nandinagari", "Zanabazar Square", "Soyombo", "Unified Canadian Aboriginal Syllabics Extended-A",
|
|
199
199
|
"Pau Cin Hau", "Devanagari Extended-A", "Bhaiksuki", "Marchen", "Masaram Gondi", "Gunjala Gondi", "Makasar", "Kawi",
|
|
200
|
-
"Lisu Supplement", "Tamil Supplement", "Cuneiform", "Cuneiform
|
|
200
|
+
"Lisu Supplement", "Tamil Supplement", "Cuneiform", "Cuneiform Numbers and Punctuation", "Early Dynastic Cuneiform", "Cypro-Minoan", "Egyptian Hieroglyphs", "Egyptian Hieroglyph Format Controls",
|
|
201
201
|
"Anatolian Hieroglyphs", "Bamum Supplement", "Mro", "Tangsa", "Bassa Vah", "Pahawh Hmong", "Medefaidrin", "Miao",
|
|
202
202
|
"Ideographic Symbols and Punctuation", "Tangut", "Tangut Components", "Khitan Small Script", "Tangut Supplement", "Kana Extended-B", "Kana Supplement", "Kana Extended-A",
|
|
203
203
|
"Small Kana Extension", "Nushu", "Duployan", "Shorthand Format Controls", "Znamenny Musical Notation", "Byzantine Musical Symbols", "Musical Symbols", "Ancient Greek Musical Notation",
|
|
204
204
|
"Kaktovik Numerals", "Mayan Numerals", "Tai Xuan Jing Symbols", "Counting Rod Numerals", "Mathematical Alphanumeric Symbols", "Sutton SignWriting", "Latin Extended-G", "Glagolitic Supplement",
|
|
205
205
|
"Cyrillic Extended-D", "Nyiakeng Puachue Hmong", "Toto", "Wancho", "Nag Mundari", "Ethiopic Extended-B", "Mende Kikakui", "Adlam",
|
|
206
|
-
"Indic Siyaq
|
|
206
|
+
"Indic Siyaq Numbers", "Ottoman Siyaq Numbers", "Arabic Mathematical Alphabetic Symbols", "Mahjong Tiles", "Domino Tiles", "Playing Cards", "Enclosed Alphanumeric Supplement", "Enclosed Ideographic Supplement",
|
|
207
207
|
"Miscellaneous Symbols and Pictographs", "Emoticons", "Ornamental Dingbats", "Transport and Map Symbols", "Alchemical Symbols", "Geometric Shapes Extended", "Supplemental Arrows-C", "Supplemental Symbols and Pictographs",
|
|
208
208
|
"Chess Symbols", "Symbols and Pictographs Extended-A", "Symbols for Legacy Computing", "CJK Unified Ideographs Extension B", "CJK Unified Ideographs Extension C", "CJK Unified Ideographs Extension D", "CJK Unified Ideographs Extension E", "CJK Unified Ideographs Extension F", "CJK Unified Ideographs Extension I",
|
|
209
209
|
"CJK Compatibility Ideographs Supplement", "CJK Unified Ideographs Extension G", "CJK Unified Ideographs Extension H", "CJK Unified Ideographs Extension J", "Tags", "Variation Selectors Supplement", "Supplementary Private Use Area-A", "Supplementary Private Use Area-B"
|
|
@@ -2003,7 +2003,7 @@ class CP932 {
|
|
|
2003
2003
|
* 指定した文字から Windows-31J 上の区点番号に変換
|
|
2004
2004
|
* - 2文字以上を指定した場合は、1文字目のみを変換する
|
|
2005
2005
|
* @param {string} text - 変換したいテキスト
|
|
2006
|
-
* @returns {
|
|
2006
|
+
* @returns {MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
|
|
2007
2007
|
*/
|
|
2008
2008
|
static toKuTen(text) {
|
|
2009
2009
|
if (text.length === 0) {
|
|
@@ -2015,7 +2015,7 @@ class CP932 {
|
|
|
2015
2015
|
|
|
2016
2016
|
/**
|
|
2017
2017
|
* Windows-31J 上の区点番号から文字列に変換
|
|
2018
|
-
* @param {
|
|
2018
|
+
* @param {MenKuTen|string} kuten - 区点番号
|
|
2019
2019
|
* @returns {string} 変換後のテキスト
|
|
2020
2020
|
*/
|
|
2021
2021
|
static fromKuTen(kuten) {
|
|
@@ -2528,7 +2528,7 @@ class SJIS2004 {
|
|
|
2528
2528
|
* 指定した文字から Shift_JIS-2004 上の面区点番号に変換
|
|
2529
2529
|
* - 2文字以上を指定した場合は、1文字目のみを変換する
|
|
2530
2530
|
* @param {string} text - 変換したいテキスト
|
|
2531
|
-
* @returns {
|
|
2531
|
+
* @returns {MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
|
|
2532
2532
|
*/
|
|
2533
2533
|
static toMenKuTen(text) {
|
|
2534
2534
|
if (text.length === 0) {
|
|
@@ -2540,7 +2540,7 @@ class SJIS2004 {
|
|
|
2540
2540
|
|
|
2541
2541
|
/**
|
|
2542
2542
|
* Shift_JIS-2004 上の面区点番号から文字列に変換
|
|
2543
|
-
* @param {
|
|
2543
|
+
* @param {MenKuTen|string} menkuten - 面区点番号
|
|
2544
2544
|
* @returns {string} 変換後のテキスト
|
|
2545
2545
|
*/
|
|
2546
2546
|
static fromMenKuTen(menkuten) {
|
|
@@ -4262,23 +4262,28 @@ class Japanese {
|
|
|
4262
4262
|
return "";
|
|
4263
4263
|
}
|
|
4264
4264
|
for (let i = 0; i < moji_array.length; i++) {
|
|
4265
|
+
// 文字データ
|
|
4266
|
+
const moji = moji_array[i];
|
|
4265
4267
|
// 1文字目の横幅を取得
|
|
4266
|
-
const
|
|
4268
|
+
const cp = moji[0];
|
|
4269
|
+
// ASCII文字, 半角カタカナ, Regional Indicator(単体)
|
|
4267
4270
|
// prettier-ignore
|
|
4268
|
-
const
|
|
4271
|
+
const cp_size = cp < 0x80
|
|
4272
|
+
|| (0xFF61 <= cp && cp < 0xFFA0)
|
|
4273
|
+
|| (moji.length === 1 && Unicode.isRegionalIndicatorFromCodePoint(cp)) ? 1 : 2;
|
|
4269
4274
|
if (position >= offset) {
|
|
4270
4275
|
is_target = true;
|
|
4271
|
-
if (cut_size >=
|
|
4272
|
-
output.push(
|
|
4276
|
+
if (cut_size >= cp_size) {
|
|
4277
|
+
output.push(moji);
|
|
4273
4278
|
} else {
|
|
4274
4279
|
output.push(SPACE);
|
|
4275
4280
|
}
|
|
4276
|
-
cut_size -=
|
|
4281
|
+
cut_size -= cp_size;
|
|
4277
4282
|
if (cut_size <= 0) {
|
|
4278
4283
|
break;
|
|
4279
4284
|
}
|
|
4280
4285
|
}
|
|
4281
|
-
position +=
|
|
4286
|
+
position += cp_size;
|
|
4282
4287
|
// 2バイト文字の途中をoffset指定していた場合になる。
|
|
4283
4288
|
if (position - 1 >= offset && !is_target) {
|
|
4284
4289
|
cut_size--;
|
|
@@ -4684,8 +4689,8 @@ class MojiAnalizerTools {
|
|
|
4684
4689
|
/**
|
|
4685
4690
|
* 文字のエンコード情報
|
|
4686
4691
|
* @typedef {Object} MojiEncodeData
|
|
4687
|
-
* @property {
|
|
4688
|
-
* @property {
|
|
4692
|
+
* @property {MenKuTen} kuten 区点 コード
|
|
4693
|
+
* @property {MenKuTen} menkuten 面区点 コード
|
|
4689
4694
|
* @property {number} cp932_code CP932(Windows-31J) コード
|
|
4690
4695
|
* @property {number} sjis2004_code Shift_JIS-2004 コード
|
|
4691
4696
|
* @property {number[]} utf8_array UTF-8 配列
|
|
@@ -5447,7 +5452,7 @@ class Mojix {
|
|
|
5447
5452
|
* 指定した文字から Windows-31J 上の区点番号に変換
|
|
5448
5453
|
* - 2文字以上を指定した場合は、1文字目のみを変換する
|
|
5449
5454
|
* @param {string} text - 変換したいテキスト
|
|
5450
|
-
* @returns {
|
|
5455
|
+
* @returns {MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
|
|
5451
5456
|
*/
|
|
5452
5457
|
static toKuTen(text) {
|
|
5453
5458
|
return CP932.toKuTen(text);
|
|
@@ -5455,7 +5460,7 @@ class Mojix {
|
|
|
5455
5460
|
|
|
5456
5461
|
/**
|
|
5457
5462
|
* Windows-31J 上の区点番号から文字列に変換
|
|
5458
|
-
* @param {
|
|
5463
|
+
* @param {MenKuTen|string} kuten - 区点番号
|
|
5459
5464
|
* @returns {string} 変換後のテキスト
|
|
5460
5465
|
*/
|
|
5461
5466
|
static fromKuTen(kuten) {
|
|
@@ -5466,7 +5471,7 @@ class Mojix {
|
|
|
5466
5471
|
* 指定した文字から Shift_JIS-2004 上の面区点番号に変換
|
|
5467
5472
|
* - 2文字以上を指定した場合は、1文字目のみを変換する
|
|
5468
5473
|
* @param {string} text - 変換したいテキスト
|
|
5469
|
-
* @returns {
|
|
5474
|
+
* @returns {MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
|
|
5470
5475
|
*/
|
|
5471
5476
|
static toMenKuTen(text) {
|
|
5472
5477
|
return SJIS2004.toMenKuTen(text);
|
|
@@ -5474,7 +5479,7 @@ class Mojix {
|
|
|
5474
5479
|
|
|
5475
5480
|
/**
|
|
5476
5481
|
* Shift_JIS-2004 上の面区点番号から文字列に変換
|
|
5477
|
-
* @param {
|
|
5482
|
+
* @param {MenKuTen|string} menkuten - 面区点番号
|
|
5478
5483
|
* @returns {string} 変換後のテキスト
|
|
5479
5484
|
*/
|
|
5480
5485
|
static fromMenKuTen(menkuten) {
|
|
@@ -5654,7 +5659,7 @@ class Mojix {
|
|
|
5654
5659
|
/**
|
|
5655
5660
|
* 指定した1つのUTF-32 コードポイントに関して、解析を行い情報を返します
|
|
5656
5661
|
* @param {number} unicode_codepoint - UTF-32 のコードポイント
|
|
5657
|
-
* @returns {
|
|
5662
|
+
* @returns {MojiData} 文字の情報がつまったオブジェクト
|
|
5658
5663
|
*/
|
|
5659
5664
|
static getMojiData(unicode_codepoint) {
|
|
5660
5665
|
return MojiAnalyzer.getMojiData(unicode_codepoint);
|
package/dist/esm/mojix.js
CHANGED
|
@@ -173,35 +173,35 @@ class Unicode {
|
|
|
173
173
|
"Khmer", "Mongolian", "Unified Canadian Aboriginal Syllabics Extended", "Limbu", "Tai Le", "New Tai Lue", "Khmer Symbols", "Buginese",
|
|
174
174
|
"Tai Tham", "Combining Diacritical Marks Extended", "Balinese", "Sundanese", "Batak", "Lepcha", "Ol Chiki", "Cyrillic Extended-C",
|
|
175
175
|
"Georgian Extended", "Sundanese Supplement", "Vedic Extensions", "Phonetic Extensions", "Phonetic Extensions Supplement", "Combining Diacritical Marks Supplement", "Latin Extended Additional", "Greek Extended",
|
|
176
|
-
"General Punctuation", "Superscripts and Subscripts", "Currency Symbols", "Combining Diacritical Marks for Symbols", "Letterlike Symbols", "
|
|
176
|
+
"General Punctuation", "Superscripts and Subscripts", "Currency Symbols", "Combining Diacritical Marks for Symbols", "Letterlike Symbols", "Number Forms", "Arrows", "Mathematical Operators",
|
|
177
177
|
"Miscellaneous Technical", "Control Pictures", "Optical Character Recognition", "Enclosed Alphanumerics", "Box Drawing", "Block Elements", "Geometric Shapes", "Miscellaneous Symbols",
|
|
178
178
|
"Dingbats", "Miscellaneous Mathematical Symbols-A", "Supplemental Arrows-A", "Braille Patterns", "Supplemental Arrows-B", "Miscellaneous Mathematical Symbols-B", "Supplemental Mathematical Operators", "Miscellaneous Symbols and Arrows",
|
|
179
179
|
"Glagolitic", "Latin Extended-C", "Coptic", "Georgian Supplement", "Tifinagh", "Ethiopic Extended", "Cyrillic Extended-A", "Supplemental Punctuation",
|
|
180
180
|
"CJK Radicals Supplement", "Kangxi Radicals", "Ideographic Description Characters", "CJK Symbols and Punctuation", "Hiragana", "Katakana", "Bopomofo", "Hangul Compatibility Jamo",
|
|
181
181
|
"Kanbun", "Bopomofo Extended", "CJK Strokes", "Katakana Phonetic Extensions", "Enclosed CJK Letters and Months", "CJK Compatibility", "CJK Unified Ideographs Extension A", "Yijing Hexagram Symbols",
|
|
182
182
|
"CJK Unified Ideographs", "Yi Syllables", "Yi Radicals", "Lisu", "Vai", "Cyrillic Extended-B", "Bamum", "Modifier Tone Letters",
|
|
183
|
-
"Latin Extended-D", "Syloti Nagri", "Common Indic
|
|
183
|
+
"Latin Extended-D", "Syloti Nagri", "Common Indic Number Forms", "Phags-pa", "Saurashtra", "Devanagari Extended", "Kayah Li", "Rejang",
|
|
184
184
|
"Hangul Jamo Extended-A", "Javanese", "Myanmar Extended-B", "Cham", "Myanmar Extended-A", "Tai Viet", "Meetei Mayek Extensions", "Ethiopic Extended-A",
|
|
185
185
|
"Latin Extended-E", "Cherokee Supplement", "Meetei Mayek", "Hangul Syllables", "Hangul Jamo Extended-B", "High Surrogates", "High Private Use Surrogates", "Low Surrogates",
|
|
186
186
|
"Private Use Area", "CJK Compatibility Ideographs", "Alphabetic Presentation Forms", "Arabic Presentation Forms-A", "Variation Selectors", "Vertical Forms", "Combining Half Marks", "CJK Compatibility Forms",
|
|
187
|
-
"Small Form Variants", "Arabic Presentation Forms-B", "Halfwidth and Fullwidth Forms", "Specials", "Linear B Syllabary", "Linear B Ideograms", "Aegean
|
|
188
|
-
"Ancient Symbols", "Phaistos Disc", "Lycian", "Carian", "Coptic Epact
|
|
187
|
+
"Small Form Variants", "Arabic Presentation Forms-B", "Halfwidth and Fullwidth Forms", "Specials", "Linear B Syllabary", "Linear B Ideograms", "Aegean Numbers", "Ancient Greek Numbers",
|
|
188
|
+
"Ancient Symbols", "Phaistos Disc", "Lycian", "Carian", "Coptic Epact Numbers", "Old Italic", "Gothic", "Old Permic",
|
|
189
189
|
"Ugaritic", "Old Persian", "Deseret", "Shavian", "Osmanya", "Osage", "Elbasan", "Caucasian Albanian",
|
|
190
190
|
"Vithkuqi", "Linear A", "Latin Extended-F", "Cypriot Syllabary", "Imperial Aramaic", "Palmyrene", "Nabataean", "Hatran",
|
|
191
191
|
"Phoenician", "Lydian", "Meroitic Hieroglyphs", "Meroitic Cursive", "Kharoshthi", "Old South Arabian", "Old North Arabian", "Manichaean",
|
|
192
192
|
"Avestan", "Inscriptional Parthian", "Inscriptional Pahlavi", "Psalter Pahlavi", "Old Turkic", "Old Hungarian", "Hanifi Rohingya", "Rumi Numeral Symbols",
|
|
193
193
|
"Yezidi", "Arabic Extended-C", "Old Sogdian", "Sogdian", "Old Uyghur", "Chorasmian", "Elymaic", "Brahmi",
|
|
194
|
-
"Kaithi", "Sora Sompeng", "Chakma", "Mahajani", "Sharada", "Sinhala Archaic
|
|
194
|
+
"Kaithi", "Sora Sompeng", "Chakma", "Mahajani", "Sharada", "Sinhala Archaic Numbers", "Khojki", "Multani",
|
|
195
195
|
"Khudawadi", "Grantha", "Newa", "Tirhuta", "Siddham", "Modi", "Mongolian Supplement", "Takri",
|
|
196
196
|
"Ahom", "Dogra", "Warang Citi", "Dives Akuru", "Nandinagari", "Zanabazar Square", "Soyombo", "Unified Canadian Aboriginal Syllabics Extended-A",
|
|
197
197
|
"Pau Cin Hau", "Devanagari Extended-A", "Bhaiksuki", "Marchen", "Masaram Gondi", "Gunjala Gondi", "Makasar", "Kawi",
|
|
198
|
-
"Lisu Supplement", "Tamil Supplement", "Cuneiform", "Cuneiform
|
|
198
|
+
"Lisu Supplement", "Tamil Supplement", "Cuneiform", "Cuneiform Numbers and Punctuation", "Early Dynastic Cuneiform", "Cypro-Minoan", "Egyptian Hieroglyphs", "Egyptian Hieroglyph Format Controls",
|
|
199
199
|
"Anatolian Hieroglyphs", "Bamum Supplement", "Mro", "Tangsa", "Bassa Vah", "Pahawh Hmong", "Medefaidrin", "Miao",
|
|
200
200
|
"Ideographic Symbols and Punctuation", "Tangut", "Tangut Components", "Khitan Small Script", "Tangut Supplement", "Kana Extended-B", "Kana Supplement", "Kana Extended-A",
|
|
201
201
|
"Small Kana Extension", "Nushu", "Duployan", "Shorthand Format Controls", "Znamenny Musical Notation", "Byzantine Musical Symbols", "Musical Symbols", "Ancient Greek Musical Notation",
|
|
202
202
|
"Kaktovik Numerals", "Mayan Numerals", "Tai Xuan Jing Symbols", "Counting Rod Numerals", "Mathematical Alphanumeric Symbols", "Sutton SignWriting", "Latin Extended-G", "Glagolitic Supplement",
|
|
203
203
|
"Cyrillic Extended-D", "Nyiakeng Puachue Hmong", "Toto", "Wancho", "Nag Mundari", "Ethiopic Extended-B", "Mende Kikakui", "Adlam",
|
|
204
|
-
"Indic Siyaq
|
|
204
|
+
"Indic Siyaq Numbers", "Ottoman Siyaq Numbers", "Arabic Mathematical Alphabetic Symbols", "Mahjong Tiles", "Domino Tiles", "Playing Cards", "Enclosed Alphanumeric Supplement", "Enclosed Ideographic Supplement",
|
|
205
205
|
"Miscellaneous Symbols and Pictographs", "Emoticons", "Ornamental Dingbats", "Transport and Map Symbols", "Alchemical Symbols", "Geometric Shapes Extended", "Supplemental Arrows-C", "Supplemental Symbols and Pictographs",
|
|
206
206
|
"Chess Symbols", "Symbols and Pictographs Extended-A", "Symbols for Legacy Computing", "CJK Unified Ideographs Extension B", "CJK Unified Ideographs Extension C", "CJK Unified Ideographs Extension D", "CJK Unified Ideographs Extension E", "CJK Unified Ideographs Extension F", "CJK Unified Ideographs Extension I",
|
|
207
207
|
"CJK Compatibility Ideographs Supplement", "CJK Unified Ideographs Extension G", "CJK Unified Ideographs Extension H", "CJK Unified Ideographs Extension J", "Tags", "Variation Selectors Supplement", "Supplementary Private Use Area-A", "Supplementary Private Use Area-B"
|
|
@@ -2001,7 +2001,7 @@ class CP932 {
|
|
|
2001
2001
|
* 指定した文字から Windows-31J 上の区点番号に変換
|
|
2002
2002
|
* - 2文字以上を指定した場合は、1文字目のみを変換する
|
|
2003
2003
|
* @param {string} text - 変換したいテキスト
|
|
2004
|
-
* @returns {
|
|
2004
|
+
* @returns {MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
|
|
2005
2005
|
*/
|
|
2006
2006
|
static toKuTen(text) {
|
|
2007
2007
|
if (text.length === 0) {
|
|
@@ -2013,7 +2013,7 @@ class CP932 {
|
|
|
2013
2013
|
|
|
2014
2014
|
/**
|
|
2015
2015
|
* Windows-31J 上の区点番号から文字列に変換
|
|
2016
|
-
* @param {
|
|
2016
|
+
* @param {MenKuTen|string} kuten - 区点番号
|
|
2017
2017
|
* @returns {string} 変換後のテキスト
|
|
2018
2018
|
*/
|
|
2019
2019
|
static fromKuTen(kuten) {
|
|
@@ -2526,7 +2526,7 @@ class SJIS2004 {
|
|
|
2526
2526
|
* 指定した文字から Shift_JIS-2004 上の面区点番号に変換
|
|
2527
2527
|
* - 2文字以上を指定した場合は、1文字目のみを変換する
|
|
2528
2528
|
* @param {string} text - 変換したいテキスト
|
|
2529
|
-
* @returns {
|
|
2529
|
+
* @returns {MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
|
|
2530
2530
|
*/
|
|
2531
2531
|
static toMenKuTen(text) {
|
|
2532
2532
|
if (text.length === 0) {
|
|
@@ -2538,7 +2538,7 @@ class SJIS2004 {
|
|
|
2538
2538
|
|
|
2539
2539
|
/**
|
|
2540
2540
|
* Shift_JIS-2004 上の面区点番号から文字列に変換
|
|
2541
|
-
* @param {
|
|
2541
|
+
* @param {MenKuTen|string} menkuten - 面区点番号
|
|
2542
2542
|
* @returns {string} 変換後のテキスト
|
|
2543
2543
|
*/
|
|
2544
2544
|
static fromMenKuTen(menkuten) {
|
|
@@ -4260,23 +4260,28 @@ class Japanese {
|
|
|
4260
4260
|
return "";
|
|
4261
4261
|
}
|
|
4262
4262
|
for (let i = 0; i < moji_array.length; i++) {
|
|
4263
|
+
// 文字データ
|
|
4264
|
+
const moji = moji_array[i];
|
|
4263
4265
|
// 1文字目の横幅を取得
|
|
4264
|
-
const
|
|
4266
|
+
const cp = moji[0];
|
|
4267
|
+
// ASCII文字, 半角カタカナ, Regional Indicator(単体)
|
|
4265
4268
|
// prettier-ignore
|
|
4266
|
-
const
|
|
4269
|
+
const cp_size = cp < 0x80
|
|
4270
|
+
|| (0xFF61 <= cp && cp < 0xFFA0)
|
|
4271
|
+
|| (moji.length === 1 && Unicode.isRegionalIndicatorFromCodePoint(cp)) ? 1 : 2;
|
|
4267
4272
|
if (position >= offset) {
|
|
4268
4273
|
is_target = true;
|
|
4269
|
-
if (cut_size >=
|
|
4270
|
-
output.push(
|
|
4274
|
+
if (cut_size >= cp_size) {
|
|
4275
|
+
output.push(moji);
|
|
4271
4276
|
} else {
|
|
4272
4277
|
output.push(SPACE);
|
|
4273
4278
|
}
|
|
4274
|
-
cut_size -=
|
|
4279
|
+
cut_size -= cp_size;
|
|
4275
4280
|
if (cut_size <= 0) {
|
|
4276
4281
|
break;
|
|
4277
4282
|
}
|
|
4278
4283
|
}
|
|
4279
|
-
position +=
|
|
4284
|
+
position += cp_size;
|
|
4280
4285
|
// 2バイト文字の途中をoffset指定していた場合になる。
|
|
4281
4286
|
if (position - 1 >= offset && !is_target) {
|
|
4282
4287
|
cut_size--;
|
|
@@ -4682,8 +4687,8 @@ class MojiAnalizerTools {
|
|
|
4682
4687
|
/**
|
|
4683
4688
|
* 文字のエンコード情報
|
|
4684
4689
|
* @typedef {Object} MojiEncodeData
|
|
4685
|
-
* @property {
|
|
4686
|
-
* @property {
|
|
4690
|
+
* @property {MenKuTen} kuten 区点 コード
|
|
4691
|
+
* @property {MenKuTen} menkuten 面区点 コード
|
|
4687
4692
|
* @property {number} cp932_code CP932(Windows-31J) コード
|
|
4688
4693
|
* @property {number} sjis2004_code Shift_JIS-2004 コード
|
|
4689
4694
|
* @property {number[]} utf8_array UTF-8 配列
|
|
@@ -5445,7 +5450,7 @@ class Mojix {
|
|
|
5445
5450
|
* 指定した文字から Windows-31J 上の区点番号に変換
|
|
5446
5451
|
* - 2文字以上を指定した場合は、1文字目のみを変換する
|
|
5447
5452
|
* @param {string} text - 変換したいテキスト
|
|
5448
|
-
* @returns {
|
|
5453
|
+
* @returns {MenKuTen} 区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
|
|
5449
5454
|
*/
|
|
5450
5455
|
static toKuTen(text) {
|
|
5451
5456
|
return CP932.toKuTen(text);
|
|
@@ -5453,7 +5458,7 @@ class Mojix {
|
|
|
5453
5458
|
|
|
5454
5459
|
/**
|
|
5455
5460
|
* Windows-31J 上の区点番号から文字列に変換
|
|
5456
|
-
* @param {
|
|
5461
|
+
* @param {MenKuTen|string} kuten - 区点番号
|
|
5457
5462
|
* @returns {string} 変換後のテキスト
|
|
5458
5463
|
*/
|
|
5459
5464
|
static fromKuTen(kuten) {
|
|
@@ -5464,7 +5469,7 @@ class Mojix {
|
|
|
5464
5469
|
* 指定した文字から Shift_JIS-2004 上の面区点番号に変換
|
|
5465
5470
|
* - 2文字以上を指定した場合は、1文字目のみを変換する
|
|
5466
5471
|
* @param {string} text - 変換したいテキスト
|
|
5467
|
-
* @returns {
|
|
5472
|
+
* @returns {MenKuTen} 面区点番号(存在しない場合(1バイトのJISコードなど)はnullを返す)
|
|
5468
5473
|
*/
|
|
5469
5474
|
static toMenKuTen(text) {
|
|
5470
5475
|
return SJIS2004.toMenKuTen(text);
|
|
@@ -5472,7 +5477,7 @@ class Mojix {
|
|
|
5472
5477
|
|
|
5473
5478
|
/**
|
|
5474
5479
|
* Shift_JIS-2004 上の面区点番号から文字列に変換
|
|
5475
|
-
* @param {
|
|
5480
|
+
* @param {MenKuTen|string} menkuten - 面区点番号
|
|
5476
5481
|
* @returns {string} 変換後のテキスト
|
|
5477
5482
|
*/
|
|
5478
5483
|
static fromMenKuTen(menkuten) {
|
|
@@ -5652,7 +5657,7 @@ class Mojix {
|
|
|
5652
5657
|
/**
|
|
5653
5658
|
* 指定した1つのUTF-32 コードポイントに関して、解析を行い情報を返します
|
|
5654
5659
|
* @param {number} unicode_codepoint - UTF-32 のコードポイント
|
|
5655
|
-
* @returns {
|
|
5660
|
+
* @returns {MojiData} 文字の情報がつまったオブジェクト
|
|
5656
5661
|
*/
|
|
5657
5662
|
static getMojiData(unicode_codepoint) {
|
|
5658
5663
|
return MojiAnalyzer.getMojiData(unicode_codepoint);
|