mojijs 4.0.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HISTORY.md +20 -0
- package/README.md +23 -15
- package/build/CommonJS/index.js +145 -32
- package/build/esm/index.js +144 -31
- package/build/index.d.ts +2 -0
- package/build/mojijs.esm.min.js +1 -9
- package/build/mojijs.umd.min.js +1 -1
- package/build/mojijs.wsh.js +0 -0
- package/package.json +1 -1
package/HISTORY.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# History
|
|
2
2
|
|
|
3
|
+
## v5.0.0
|
|
4
|
+
|
|
5
|
+
### 機能改善
|
|
6
|
+
|
|
7
|
+
- 異体字セレクタの判定に、注釈機能を追加
|
|
8
|
+
- 絵文字の判定を強化
|
|
9
|
+
- 記号の判定を追加
|
|
10
|
+
- Unicodeの制御文字を追加
|
|
11
|
+
- CJK Unified Ideographs Extension I (2EBF0–2EE5F)
|
|
12
|
+
- CJK Unified Ideographs Extension J (323B0–3347F)
|
|
13
|
+
|
|
14
|
+
### 変更
|
|
15
|
+
|
|
16
|
+
- travisが動作しないので除去
|
|
17
|
+
|
|
18
|
+
### 不具合修正
|
|
19
|
+
|
|
20
|
+
- 結合していない文字も結合文字と判定する場合があるのを修正
|
|
21
|
+
- getVariationSelectorsNumberFromCodePoint での戻り値で意図しない文字列を返す問題を修正
|
|
22
|
+
|
|
3
23
|
## v4.0.0
|
|
4
24
|
|
|
5
25
|
### 機能改善
|
package/README.md
CHANGED
|
@@ -1,28 +1,32 @@
|
|
|
1
|
-
# MojiJS
|
|
2
|
-
|
|
1
|
+
# MojiJS
|
|
2
|
+
|
|
3
3
|
[](https://natade-jp.github.io/MojiJS/docs/)
|
|
4
4
|

|
|
5
5
|
|
|
6
|
-
## What
|
|
7
|
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
- [
|
|
6
|
+
## What
|
|
7
|
+
|
|
8
|
+
- 日本語の文字データを解析及び、変換するライブラリです。
|
|
9
|
+
- [詳細な API を公開しています。](https://natade-jp.github.io/MojiJS/docs/)
|
|
10
|
+
- [動作例](https://natade-jp.github.io/MojiJS/html/examples/demos/Text/) (コンソール及び[ソースコード](https://natade-jp.github.io/MojiJS/html/examples/demos/Text/main.mjs)を確認してみてください。)
|
|
11
|
+
- [npm](https://www.npmjs.com/package/mojijs)
|
|
11
12
|
|
|
12
13
|
以下のことが行えます
|
|
13
|
-
- エンコード(UTF-8 / UTF-16 / UTF-32 / Shift_JIS / Shift_JIS-2004 / EUC-JP / EUC-JIS-2004 )
|
|
14
|
-
- 日本語の変換 (ひらがな, カタカナ, 半角, 全角, ローマ字 など))
|
|
15
|
-
- 漢字の判定 (常用漢字, 人名用漢字, 面区点, 漢字水準 など)
|
|
16
|
-
- 自然順ソート
|
|
17
14
|
|
|
18
|
-
|
|
15
|
+
- エンコード(UTF-8 / UTF-16 / UTF-32 / Shift_JIS / Shift_JIS-2004 / EUC-JP / EUC-JIS-2004 )
|
|
16
|
+
- 日本語の変換 (ひらがな, カタカナ, 半角, 全角, ローマ字 など))
|
|
17
|
+
- 漢字の判定 (常用漢字, 人名用漢字, 面区点, 漢字水準 など)
|
|
18
|
+
- 自然順ソート
|
|
19
|
+
|
|
20
|
+
## Install
|
|
21
|
+
|
|
19
22
|
```
|
|
20
23
|
npm install --save-dev mojijs
|
|
21
24
|
```
|
|
22
25
|
|
|
23
|
-
## Sample
|
|
26
|
+
## Sample
|
|
24
27
|
|
|
25
28
|
### エンコード
|
|
29
|
+
|
|
26
30
|
```javascript
|
|
27
31
|
const MojiJS = require("mojijs");
|
|
28
32
|
|
|
@@ -34,6 +38,7 @@ console.log(MojiJS.decode([0x61, 0xE3, 0x81, 0x82], "utf-8"));
|
|
|
34
38
|
```
|
|
35
39
|
|
|
36
40
|
### 日本語の変換
|
|
41
|
+
|
|
37
42
|
```javascript
|
|
38
43
|
const MojiJS = require("mojijs");
|
|
39
44
|
|
|
@@ -42,6 +47,7 @@ console.log(MojiJS.toHiragana("カキクケコ"));
|
|
|
42
47
|
```
|
|
43
48
|
|
|
44
49
|
### 面区点
|
|
50
|
+
|
|
45
51
|
```javascript
|
|
46
52
|
const MojiJS = require("mojijs");
|
|
47
53
|
|
|
@@ -61,6 +67,7 @@ console.log("面区点:" + data3.encode.menkuten.text + ", 漢字水準:" +
|
|
|
61
67
|
```
|
|
62
68
|
|
|
63
69
|
### 自然順ソート
|
|
70
|
+
|
|
64
71
|
```javascript
|
|
65
72
|
const MojiJS = require("mojijs");
|
|
66
73
|
|
|
@@ -68,5 +75,6 @@ console.log(["3", "02", "あ", "イ", "う", "1"].sort(MojiJS.compareToForNatu
|
|
|
68
75
|
-> [ '1', '02', '3', 'あ', 'イ', 'う' ]
|
|
69
76
|
```
|
|
70
77
|
|
|
71
|
-
## Author
|
|
72
|
-
|
|
78
|
+
## Author
|
|
79
|
+
|
|
80
|
+
- [natade-jp](https://github.com/natade-jp/)
|
package/build/CommonJS/index.js
CHANGED
|
@@ -591,20 +591,28 @@ class Unicode {
|
|
|
591
591
|
* @returns {boolean} 確認結果
|
|
592
592
|
*/
|
|
593
593
|
static isCombiningMarkFromCodePoint(codepoint) {
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
((
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
594
|
+
try {
|
|
595
|
+
new RegExp("\\p{Mark}", "u");
|
|
596
|
+
return /\p{Mark}/u.test(String.fromCodePoint(codepoint));
|
|
597
|
+
} catch (e) {
|
|
598
|
+
// フォールバック処理
|
|
599
|
+
return (
|
|
600
|
+
// Combining Diacritical Marks
|
|
601
|
+
((0x0300 <= codepoint) && (codepoint <= 0x036F)) ||
|
|
602
|
+
// Combining Diacritical Marks Extended
|
|
603
|
+
((0x1AB0 <= codepoint) && (codepoint <= 0x1AFF)) ||
|
|
604
|
+
// Combining Diacritical Marks Supplement
|
|
605
|
+
((0x1DC0 <= codepoint) && (codepoint <= 0x1DFF)) ||
|
|
606
|
+
// Combining Diacritical Marks for Symbols
|
|
607
|
+
((0x20D0 <= codepoint) && (codepoint <= 0x20FF)) ||
|
|
608
|
+
// 日本語に含まれる2種類の文字
|
|
609
|
+
// COMBINING VOICED SOUND MARK
|
|
610
|
+
// COMBINING SEMI-VOICED SOUND MARK
|
|
611
|
+
((0x3099 <= codepoint) && (codepoint <= 0x309A)) ||
|
|
612
|
+
// Combining Half Marks
|
|
613
|
+
((0xFE20 <= codepoint) && (codepoint <= 0xFE2F))
|
|
614
|
+
);
|
|
615
|
+
}
|
|
608
616
|
}
|
|
609
617
|
|
|
610
618
|
|
|
@@ -3997,15 +4005,111 @@ class MOJI_CHAR_MAP {
|
|
|
3997
4005
|
// 制御文字、VSは多いため含めていない
|
|
3998
4006
|
|
|
3999
4007
|
control_charcter_map = {
|
|
4000
|
-
|
|
4001
|
-
|
|
4002
|
-
|
|
4003
|
-
|
|
4004
|
-
|
|
4005
|
-
|
|
4006
|
-
|
|
4007
|
-
|
|
4008
|
-
|
|
4008
|
+
// --- C0 control characters (ASCII 0x00–0x1F) ---
|
|
4009
|
+
0: "NUL", // Null
|
|
4010
|
+
1: "SOH", // Start of Heading
|
|
4011
|
+
2: "STX", // Start of Text
|
|
4012
|
+
3: "ETX", // End of Text
|
|
4013
|
+
4: "EOT", // End of Transmission
|
|
4014
|
+
5: "ENQ", // Enquiry
|
|
4015
|
+
6: "ACK", // Acknowledge
|
|
4016
|
+
7: "BEL", // Bell (beep)
|
|
4017
|
+
|
|
4018
|
+
8: "BS", // Backspace
|
|
4019
|
+
9: "HT", // Horizontal Tab
|
|
4020
|
+
10: "LF", // Line Feed
|
|
4021
|
+
11: "VT", // Vertical Tab
|
|
4022
|
+
12: "FF", // Form Feed
|
|
4023
|
+
13: "CR", // Carriage Return
|
|
4024
|
+
14: "SO", // Shift Out
|
|
4025
|
+
15: "SI", // Shift In
|
|
4026
|
+
|
|
4027
|
+
16: "DLE", // Data Link Escape
|
|
4028
|
+
17: "DC1", // Device Control 1 (XON)
|
|
4029
|
+
18: "DC2", // Device Control 2
|
|
4030
|
+
19: "DC3", // Device Control 3 (XOFF)
|
|
4031
|
+
20: "DC4", // Device Control 4
|
|
4032
|
+
21: "NAK", // Negative Acknowledge
|
|
4033
|
+
22: "SYN", // Synchronous Idle
|
|
4034
|
+
23: "ETB", // End of Transmission Block
|
|
4035
|
+
|
|
4036
|
+
24: "CAN", // Cancel
|
|
4037
|
+
25: "EM", // End of Medium
|
|
4038
|
+
26: "SUB", // Substitute
|
|
4039
|
+
27: "ESC", // Escape
|
|
4040
|
+
28: "FS", // File Separator
|
|
4041
|
+
29: "GS", // Group Separator
|
|
4042
|
+
30: "RS", // Record Separator
|
|
4043
|
+
31: "US", // Unit Separator
|
|
4044
|
+
|
|
4045
|
+
// --- DEL ---
|
|
4046
|
+
127: "DEL", // Delete
|
|
4047
|
+
|
|
4048
|
+
// --- C1 control characters (ISO/IEC 6429, 0x80–0x9F) ---
|
|
4049
|
+
128: "PAD", // Padding Character
|
|
4050
|
+
129: "HOP", // High Octet Preset
|
|
4051
|
+
130: "BPH", // Break Permitted Here
|
|
4052
|
+
131: "NBH", // No Break Here
|
|
4053
|
+
132: "IND", // Index
|
|
4054
|
+
133: "NEL", // Next Line
|
|
4055
|
+
134: "SSA", // Start of Selected Area
|
|
4056
|
+
135: "ESA", // End of Selected Area
|
|
4057
|
+
136: "HTS", // Horizontal Tab Set
|
|
4058
|
+
137: "HTJ", // Horizontal Tab with Justification
|
|
4059
|
+
138: "VTS", // Vertical Tab Set
|
|
4060
|
+
139: "PLD", // Partial Line Down
|
|
4061
|
+
140: "PLU", // Partial Line Up
|
|
4062
|
+
141: "RI", // Reverse Index
|
|
4063
|
+
142: "SS2", // Single Shift 2
|
|
4064
|
+
143: "SS3", // Single Shift 3
|
|
4065
|
+
144: "DCS", // Device Control String
|
|
4066
|
+
145: "PU1", // Private Use 1
|
|
4067
|
+
146: "PU2", // Private Use 2
|
|
4068
|
+
147: "STS", // Set Transmit State
|
|
4069
|
+
148: "CCH", // Cancel Character
|
|
4070
|
+
149: "MW", // Message Waiting
|
|
4071
|
+
150: "SPA", // Start of Protected Area
|
|
4072
|
+
151: "EPA", // End of Protected Area
|
|
4073
|
+
152: "SOS", // Start of String
|
|
4074
|
+
153: "SGCI",// Single Graphic Character Introducer
|
|
4075
|
+
154: "SCI", // Single Character Introducer
|
|
4076
|
+
155: "CSI", // Control Sequence Introducer
|
|
4077
|
+
156: "ST", // String Terminator
|
|
4078
|
+
157: "OSC", // Operating System Command
|
|
4079
|
+
158: "PM", // Privacy Message
|
|
4080
|
+
159: "APC", // Application Program Command
|
|
4081
|
+
|
|
4082
|
+
// --- Unicode but制御的に扱われる文字 ---
|
|
4083
|
+
160: "NBSP", // No-Break Space(表示は空白だが改行不可)
|
|
4084
|
+
173: "SHY", // Soft Hyphen(通常は表示されない)
|
|
4085
|
+
|
|
4086
|
+
// --- Unicode Interlinear Annotation ---
|
|
4087
|
+
65529: "IAA", // Interlinear Annotation Anchor
|
|
4088
|
+
65530: "IAS", // Interlinear Annotation Separator
|
|
4089
|
+
65531: "IAT", // Interlinear Annotation Terminator
|
|
4090
|
+
|
|
4091
|
+
// Zero Width / Joiner 系(Cf)
|
|
4092
|
+
0x200B: "ZWSP", // ZERO WIDTH SPACE
|
|
4093
|
+
0x200C: "ZWNJ", // ZERO WIDTH NON-JOINER
|
|
4094
|
+
0x200D: "ZWJ", // ZERO WIDTH JOINER
|
|
4095
|
+
0x2060: "WJ", // WORD JOINER
|
|
4096
|
+
0xFEFF: "BOM", // BYTE ORDER MARK / ZERO WIDTH NO-BREAK SPACE
|
|
4097
|
+
|
|
4098
|
+
// 双方向(BiDi)制御文字
|
|
4099
|
+
0x202A: "LRE", // LEFT-TO-RIGHT EMBEDDING
|
|
4100
|
+
0x202B: "RLE", // RIGHT-TO-LEFT EMBEDDING
|
|
4101
|
+
0x202C: "PDF", // POP DIRECTIONAL FORMATTING
|
|
4102
|
+
0x202D: "LRO", // LEFT-TO-RIGHT OVERRIDE
|
|
4103
|
+
0x202E: "RLO", // RIGHT-TO-LEFT OVERRIDE
|
|
4104
|
+
|
|
4105
|
+
0x2066: "LRI", // LEFT-TO-RIGHT ISOLATE
|
|
4106
|
+
0x2067: "RLI", // RIGHT-TO-LEFT ISOLATE
|
|
4107
|
+
0x2068: "FSI", // FIRST STRONG ISOLATE
|
|
4108
|
+
0x2069: "PDI" , // POP DIRECTIONAL ISOLATE
|
|
4109
|
+
|
|
4110
|
+
// Unicode Noncharacter(検証・防御用途)
|
|
4111
|
+
0xFFFE: "NONCHAR_FFFE",
|
|
4112
|
+
0xFFFF: "NONCHAR_FFFF"
|
|
4009
4113
|
};
|
|
4010
4114
|
|
|
4011
4115
|
const unicode_blockname_array = [
|
|
@@ -4048,8 +4152,8 @@ class MOJI_CHAR_MAP {
|
|
|
4048
4152
|
"Cyrillic Extended-D", "Nyiakeng Puachue Hmong", "Toto", "Wancho", "Nag Mundari", "Ethiopic Extended-B", "Mende Kikakui", "Adlam",
|
|
4049
4153
|
"Indic Siyaq Numbers", "Ottoman Siyaq Numbers", "Arabic Mathematical Alphabetic Symbols", "Mahjong Tiles", "Domino Tiles", "Playing Cards", "Enclosed Alphanumeric Supplement", "Enclosed Ideographic Supplement",
|
|
4050
4154
|
"Miscellaneous Symbols and Pictographs", "Emoticons", "Ornamental Dingbats", "Transport and Map Symbols", "Alchemical Symbols", "Geometric Shapes Extended", "Supplemental Arrows-C", "Supplemental Symbols and Pictographs",
|
|
4051
|
-
"Chess Symbols", "Symbols and Pictographs Extended-A", "Symbols for Legacy Computing", "CJK Unified Ideographs Extension B", "CJK Unified Ideographs Extension C", "CJK Unified Ideographs Extension D", "CJK Unified Ideographs Extension E", "CJK Unified Ideographs Extension F",
|
|
4052
|
-
"CJK Compatibility Ideographs Supplement", "CJK Unified Ideographs Extension G", "CJK Unified Ideographs Extension H", "Tags", "Variation Selectors Supplement", "Supplementary Private Use Area-A", "Supplementary Private Use Area-B"
|
|
4155
|
+
"Chess Symbols", "Symbols and Pictographs Extended-A", "Symbols for Legacy Computing", "CJK Unified Ideographs Extension B", "CJK Unified Ideographs Extension C", "CJK Unified Ideographs Extension D", "CJK Unified Ideographs Extension E", "CJK Unified Ideographs Extension F", "CJK Unified Ideographs Extension I",
|
|
4156
|
+
"CJK Compatibility Ideographs Supplement", "CJK Unified Ideographs Extension G", "CJK Unified Ideographs Extension H", "CJK Unified Ideographs Extension J", "Tags", "Variation Selectors Supplement", "Supplementary Private Use Area-A", "Supplementary Private Use Area-B"
|
|
4053
4157
|
];
|
|
4054
4158
|
|
|
4055
4159
|
const unicode_blockaddress_array = [
|
|
@@ -4072,8 +4176,8 @@ class MOJI_CHAR_MAP {
|
|
|
4072
4176
|
0x1467F, 0x16A3F, 0x16A6F, 0x16ACF, 0x16AFF, 0x16B8F, 0x16E9F, 0x16F9F, 0x16FFF, 0x187FF, 0x18AFF, 0x18CFF, 0x18D7F, 0x1AFFF, 0x1B0FF, 0x1B12F,
|
|
4073
4177
|
0x1B16F, 0x1B2FF, 0x1BC9F, 0x1BCAF, 0x1CFCF, 0x1D0FF, 0x1D1FF, 0x1D24F, 0x1D2DF, 0x1D2FF, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1DAAF, 0x1DFFF, 0x1E02F,
|
|
4074
4178
|
0x1E08F, 0x1E14F, 0x1E2BF, 0x1E2FF, 0x1E4FF, 0x1E7FF, 0x1E8DF, 0x1E95F, 0x1ECBF, 0x1ED4F, 0x1EEFF, 0x1F02F, 0x1F09F, 0x1F0FF, 0x1F1FF, 0x1F2FF,
|
|
4075
|
-
0x1F5FF, 0x1F64F, 0x1F67F, 0x1F6FF, 0x1F77F, 0x1F7FF, 0x1F8FF, 0x1F9FF, 0x1FA6F, 0x1FAFF, 0x1FBFF, 0x2A6DF, 0x2B73F, 0x2B81F, 0x2CEAF, 0x2EBEF,
|
|
4076
|
-
0x2FA1F, 0x3134F, 0x323AF, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF
|
|
4179
|
+
0x1F5FF, 0x1F64F, 0x1F67F, 0x1F6FF, 0x1F77F, 0x1F7FF, 0x1F8FF, 0x1F9FF, 0x1FA6F, 0x1FAFF, 0x1FBFF, 0x2A6DF, 0x2B73F, 0x2B81F, 0x2CEAF, 0x2EBEF, 0x2EE5F,
|
|
4180
|
+
0x2FA1F, 0x3134F, 0x323AF, 0x3347F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF
|
|
4077
4181
|
];
|
|
4078
4182
|
|
|
4079
4183
|
to_block_name_from_unicode = function(unicode_codepoint) {
|
|
@@ -4177,20 +4281,25 @@ class MojiAnalizerTools {
|
|
|
4177
4281
|
/**
|
|
4178
4282
|
* コードポイントから異体字セレクタの判定
|
|
4179
4283
|
* @param {Number} codepoint - コードポイント
|
|
4284
|
+
* @param {boolean} [annotate = false] - 注釈をつけるか否か
|
|
4180
4285
|
* @returns {string|null} 確認結果(異体字セレクタではない場合はNULLを返す)
|
|
4181
4286
|
*/
|
|
4182
|
-
static getVariationSelectorsNumberFromCodePoint(codepoint) {
|
|
4287
|
+
static getVariationSelectorsNumberFromCodePoint(codepoint, annotate) {
|
|
4183
4288
|
// モンゴル自由字形選択子 U+180B〜U+180D (3個)
|
|
4184
4289
|
if((0x180B <= codepoint) && (codepoint <= 0x180D)) {
|
|
4185
4290
|
return "FVS" + ((codepoint - 0x180B) + 1);
|
|
4186
4291
|
}
|
|
4187
4292
|
// SVSで利用される異体字セレクタ U+FE00〜U+FE0F (VS1~VS16) (16個)
|
|
4188
4293
|
if((0xFE00 <= codepoint) && (codepoint <= 0xFE0F)) {
|
|
4189
|
-
|
|
4294
|
+
const n = (codepoint - 0xFE00) + 1;
|
|
4295
|
+
if (!annotate) return "VS" + n;
|
|
4296
|
+
if (codepoint === 0xFE0E) return "VS15 (text)";
|
|
4297
|
+
if (codepoint === 0xFE0F) return "VS16 (emoji)";
|
|
4298
|
+
return "VS" + n;
|
|
4190
4299
|
}
|
|
4191
4300
|
// IVSで利用される異体字セレクタ U+E0100〜U+E01EF (VS17~VS256) (240個)
|
|
4192
4301
|
else if((0xE0100 <= codepoint) && (codepoint <= 0xE01EF)) {
|
|
4193
|
-
return "VS" + (codepoint - 0xE0100) + 17;
|
|
4302
|
+
return "VS" + ((codepoint - 0xE0100) + 17);
|
|
4194
4303
|
}
|
|
4195
4304
|
return null;
|
|
4196
4305
|
}
|
|
@@ -4342,6 +4451,7 @@ class MojiAnalizerTools {
|
|
|
4342
4451
|
* @property {boolean} is_halfwidth_katakana 半角カタカナ
|
|
4343
4452
|
* @property {boolean} is_emoji 絵文字
|
|
4344
4453
|
* @property {boolean} is_emoticons 顔文字
|
|
4454
|
+
* @property {boolean} is_symbol_base 記号(VS16 が付くと絵文字化)
|
|
4345
4455
|
* @property {boolean} is_gaiji 外字
|
|
4346
4456
|
* @property {boolean} is_combining_mark 結合文字
|
|
4347
4457
|
* @property {boolean} is_variation_selector 異体字セレクタ
|
|
@@ -4412,6 +4522,7 @@ class MojiAnalyzer {
|
|
|
4412
4522
|
is_halfwidth_katakana : false,
|
|
4413
4523
|
is_emoji : false,
|
|
4414
4524
|
is_emoticons : false,
|
|
4525
|
+
is_symbol_base : false,
|
|
4415
4526
|
is_gaiji : false,
|
|
4416
4527
|
is_combining_mark : false,
|
|
4417
4528
|
is_variation_selector : false
|
|
@@ -4536,9 +4647,11 @@ class MojiAnalyzer {
|
|
|
4536
4647
|
type.is_fullwidth_ascii = /[\u3000\uFF01-\uFF5E]/.test(data.character);
|
|
4537
4648
|
type.is_halfwidth_katakana = /[\uFF61-\uFF9F]/.test(data.character);
|
|
4538
4649
|
// 絵文字
|
|
4539
|
-
type.is_emoji = /Pictographs/.test(type.blockname);
|
|
4650
|
+
type.is_emoji = /Pictographs|Transport and Map Symbols/.test(type.blockname);
|
|
4540
4651
|
// 顔文字
|
|
4541
4652
|
type.is_emoticons = /Emoticons/.test(type.blockname);
|
|
4653
|
+
// 記号(VS16 が付くと絵文字化)
|
|
4654
|
+
type.is_symbol_base = /Dingbats|Miscellaneous Symbols/.test(type.blockname);
|
|
4542
4655
|
// 外字
|
|
4543
4656
|
type.is_gaiji = /Private Use Area/.test(type.blockname);
|
|
4544
4657
|
// 結合文字
|
|
@@ -5270,4 +5383,4 @@ class MojiJS {
|
|
|
5270
5383
|
|
|
5271
5384
|
}
|
|
5272
5385
|
|
|
5273
|
-
|
|
5386
|
+
export default MojiJS;
|
package/build/esm/index.js
CHANGED
|
@@ -591,20 +591,28 @@ class Unicode {
|
|
|
591
591
|
* @returns {boolean} 確認結果
|
|
592
592
|
*/
|
|
593
593
|
static isCombiningMarkFromCodePoint(codepoint) {
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
((
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
594
|
+
try {
|
|
595
|
+
new RegExp("\\p{Mark}", "u");
|
|
596
|
+
return /\p{Mark}/u.test(String.fromCodePoint(codepoint));
|
|
597
|
+
} catch (e) {
|
|
598
|
+
// フォールバック処理
|
|
599
|
+
return (
|
|
600
|
+
// Combining Diacritical Marks
|
|
601
|
+
((0x0300 <= codepoint) && (codepoint <= 0x036F)) ||
|
|
602
|
+
// Combining Diacritical Marks Extended
|
|
603
|
+
((0x1AB0 <= codepoint) && (codepoint <= 0x1AFF)) ||
|
|
604
|
+
// Combining Diacritical Marks Supplement
|
|
605
|
+
((0x1DC0 <= codepoint) && (codepoint <= 0x1DFF)) ||
|
|
606
|
+
// Combining Diacritical Marks for Symbols
|
|
607
|
+
((0x20D0 <= codepoint) && (codepoint <= 0x20FF)) ||
|
|
608
|
+
// 日本語に含まれる2種類の文字
|
|
609
|
+
// COMBINING VOICED SOUND MARK
|
|
610
|
+
// COMBINING SEMI-VOICED SOUND MARK
|
|
611
|
+
((0x3099 <= codepoint) && (codepoint <= 0x309A)) ||
|
|
612
|
+
// Combining Half Marks
|
|
613
|
+
((0xFE20 <= codepoint) && (codepoint <= 0xFE2F))
|
|
614
|
+
);
|
|
615
|
+
}
|
|
608
616
|
}
|
|
609
617
|
|
|
610
618
|
|
|
@@ -3997,15 +4005,111 @@ class MOJI_CHAR_MAP {
|
|
|
3997
4005
|
// 制御文字、VSは多いため含めていない
|
|
3998
4006
|
|
|
3999
4007
|
control_charcter_map = {
|
|
4000
|
-
|
|
4001
|
-
|
|
4002
|
-
|
|
4003
|
-
|
|
4004
|
-
|
|
4005
|
-
|
|
4006
|
-
|
|
4007
|
-
|
|
4008
|
-
|
|
4008
|
+
// --- C0 control characters (ASCII 0x00–0x1F) ---
|
|
4009
|
+
0: "NUL", // Null
|
|
4010
|
+
1: "SOH", // Start of Heading
|
|
4011
|
+
2: "STX", // Start of Text
|
|
4012
|
+
3: "ETX", // End of Text
|
|
4013
|
+
4: "EOT", // End of Transmission
|
|
4014
|
+
5: "ENQ", // Enquiry
|
|
4015
|
+
6: "ACK", // Acknowledge
|
|
4016
|
+
7: "BEL", // Bell (beep)
|
|
4017
|
+
|
|
4018
|
+
8: "BS", // Backspace
|
|
4019
|
+
9: "HT", // Horizontal Tab
|
|
4020
|
+
10: "LF", // Line Feed
|
|
4021
|
+
11: "VT", // Vertical Tab
|
|
4022
|
+
12: "FF", // Form Feed
|
|
4023
|
+
13: "CR", // Carriage Return
|
|
4024
|
+
14: "SO", // Shift Out
|
|
4025
|
+
15: "SI", // Shift In
|
|
4026
|
+
|
|
4027
|
+
16: "DLE", // Data Link Escape
|
|
4028
|
+
17: "DC1", // Device Control 1 (XON)
|
|
4029
|
+
18: "DC2", // Device Control 2
|
|
4030
|
+
19: "DC3", // Device Control 3 (XOFF)
|
|
4031
|
+
20: "DC4", // Device Control 4
|
|
4032
|
+
21: "NAK", // Negative Acknowledge
|
|
4033
|
+
22: "SYN", // Synchronous Idle
|
|
4034
|
+
23: "ETB", // End of Transmission Block
|
|
4035
|
+
|
|
4036
|
+
24: "CAN", // Cancel
|
|
4037
|
+
25: "EM", // End of Medium
|
|
4038
|
+
26: "SUB", // Substitute
|
|
4039
|
+
27: "ESC", // Escape
|
|
4040
|
+
28: "FS", // File Separator
|
|
4041
|
+
29: "GS", // Group Separator
|
|
4042
|
+
30: "RS", // Record Separator
|
|
4043
|
+
31: "US", // Unit Separator
|
|
4044
|
+
|
|
4045
|
+
// --- DEL ---
|
|
4046
|
+
127: "DEL", // Delete
|
|
4047
|
+
|
|
4048
|
+
// --- C1 control characters (ISO/IEC 6429, 0x80–0x9F) ---
|
|
4049
|
+
128: "PAD", // Padding Character
|
|
4050
|
+
129: "HOP", // High Octet Preset
|
|
4051
|
+
130: "BPH", // Break Permitted Here
|
|
4052
|
+
131: "NBH", // No Break Here
|
|
4053
|
+
132: "IND", // Index
|
|
4054
|
+
133: "NEL", // Next Line
|
|
4055
|
+
134: "SSA", // Start of Selected Area
|
|
4056
|
+
135: "ESA", // End of Selected Area
|
|
4057
|
+
136: "HTS", // Horizontal Tab Set
|
|
4058
|
+
137: "HTJ", // Horizontal Tab with Justification
|
|
4059
|
+
138: "VTS", // Vertical Tab Set
|
|
4060
|
+
139: "PLD", // Partial Line Down
|
|
4061
|
+
140: "PLU", // Partial Line Up
|
|
4062
|
+
141: "RI", // Reverse Index
|
|
4063
|
+
142: "SS2", // Single Shift 2
|
|
4064
|
+
143: "SS3", // Single Shift 3
|
|
4065
|
+
144: "DCS", // Device Control String
|
|
4066
|
+
145: "PU1", // Private Use 1
|
|
4067
|
+
146: "PU2", // Private Use 2
|
|
4068
|
+
147: "STS", // Set Transmit State
|
|
4069
|
+
148: "CCH", // Cancel Character
|
|
4070
|
+
149: "MW", // Message Waiting
|
|
4071
|
+
150: "SPA", // Start of Protected Area
|
|
4072
|
+
151: "EPA", // End of Protected Area
|
|
4073
|
+
152: "SOS", // Start of String
|
|
4074
|
+
153: "SGCI",// Single Graphic Character Introducer
|
|
4075
|
+
154: "SCI", // Single Character Introducer
|
|
4076
|
+
155: "CSI", // Control Sequence Introducer
|
|
4077
|
+
156: "ST", // String Terminator
|
|
4078
|
+
157: "OSC", // Operating System Command
|
|
4079
|
+
158: "PM", // Privacy Message
|
|
4080
|
+
159: "APC", // Application Program Command
|
|
4081
|
+
|
|
4082
|
+
// --- Unicode but制御的に扱われる文字 ---
|
|
4083
|
+
160: "NBSP", // No-Break Space(表示は空白だが改行不可)
|
|
4084
|
+
173: "SHY", // Soft Hyphen(通常は表示されない)
|
|
4085
|
+
|
|
4086
|
+
// --- Unicode Interlinear Annotation ---
|
|
4087
|
+
65529: "IAA", // Interlinear Annotation Anchor
|
|
4088
|
+
65530: "IAS", // Interlinear Annotation Separator
|
|
4089
|
+
65531: "IAT", // Interlinear Annotation Terminator
|
|
4090
|
+
|
|
4091
|
+
// Zero Width / Joiner 系(Cf)
|
|
4092
|
+
0x200B: "ZWSP", // ZERO WIDTH SPACE
|
|
4093
|
+
0x200C: "ZWNJ", // ZERO WIDTH NON-JOINER
|
|
4094
|
+
0x200D: "ZWJ", // ZERO WIDTH JOINER
|
|
4095
|
+
0x2060: "WJ", // WORD JOINER
|
|
4096
|
+
0xFEFF: "BOM", // BYTE ORDER MARK / ZERO WIDTH NO-BREAK SPACE
|
|
4097
|
+
|
|
4098
|
+
// 双方向(BiDi)制御文字
|
|
4099
|
+
0x202A: "LRE", // LEFT-TO-RIGHT EMBEDDING
|
|
4100
|
+
0x202B: "RLE", // RIGHT-TO-LEFT EMBEDDING
|
|
4101
|
+
0x202C: "PDF", // POP DIRECTIONAL FORMATTING
|
|
4102
|
+
0x202D: "LRO", // LEFT-TO-RIGHT OVERRIDE
|
|
4103
|
+
0x202E: "RLO", // RIGHT-TO-LEFT OVERRIDE
|
|
4104
|
+
|
|
4105
|
+
0x2066: "LRI", // LEFT-TO-RIGHT ISOLATE
|
|
4106
|
+
0x2067: "RLI", // RIGHT-TO-LEFT ISOLATE
|
|
4107
|
+
0x2068: "FSI", // FIRST STRONG ISOLATE
|
|
4108
|
+
0x2069: "PDI" , // POP DIRECTIONAL ISOLATE
|
|
4109
|
+
|
|
4110
|
+
// Unicode Noncharacter(検証・防御用途)
|
|
4111
|
+
0xFFFE: "NONCHAR_FFFE",
|
|
4112
|
+
0xFFFF: "NONCHAR_FFFF"
|
|
4009
4113
|
};
|
|
4010
4114
|
|
|
4011
4115
|
const unicode_blockname_array = [
|
|
@@ -4048,8 +4152,8 @@ class MOJI_CHAR_MAP {
|
|
|
4048
4152
|
"Cyrillic Extended-D", "Nyiakeng Puachue Hmong", "Toto", "Wancho", "Nag Mundari", "Ethiopic Extended-B", "Mende Kikakui", "Adlam",
|
|
4049
4153
|
"Indic Siyaq Numbers", "Ottoman Siyaq Numbers", "Arabic Mathematical Alphabetic Symbols", "Mahjong Tiles", "Domino Tiles", "Playing Cards", "Enclosed Alphanumeric Supplement", "Enclosed Ideographic Supplement",
|
|
4050
4154
|
"Miscellaneous Symbols and Pictographs", "Emoticons", "Ornamental Dingbats", "Transport and Map Symbols", "Alchemical Symbols", "Geometric Shapes Extended", "Supplemental Arrows-C", "Supplemental Symbols and Pictographs",
|
|
4051
|
-
"Chess Symbols", "Symbols and Pictographs Extended-A", "Symbols for Legacy Computing", "CJK Unified Ideographs Extension B", "CJK Unified Ideographs Extension C", "CJK Unified Ideographs Extension D", "CJK Unified Ideographs Extension E", "CJK Unified Ideographs Extension F",
|
|
4052
|
-
"CJK Compatibility Ideographs Supplement", "CJK Unified Ideographs Extension G", "CJK Unified Ideographs Extension H", "Tags", "Variation Selectors Supplement", "Supplementary Private Use Area-A", "Supplementary Private Use Area-B"
|
|
4155
|
+
"Chess Symbols", "Symbols and Pictographs Extended-A", "Symbols for Legacy Computing", "CJK Unified Ideographs Extension B", "CJK Unified Ideographs Extension C", "CJK Unified Ideographs Extension D", "CJK Unified Ideographs Extension E", "CJK Unified Ideographs Extension F", "CJK Unified Ideographs Extension I",
|
|
4156
|
+
"CJK Compatibility Ideographs Supplement", "CJK Unified Ideographs Extension G", "CJK Unified Ideographs Extension H", "CJK Unified Ideographs Extension J", "Tags", "Variation Selectors Supplement", "Supplementary Private Use Area-A", "Supplementary Private Use Area-B"
|
|
4053
4157
|
];
|
|
4054
4158
|
|
|
4055
4159
|
const unicode_blockaddress_array = [
|
|
@@ -4072,8 +4176,8 @@ class MOJI_CHAR_MAP {
|
|
|
4072
4176
|
0x1467F, 0x16A3F, 0x16A6F, 0x16ACF, 0x16AFF, 0x16B8F, 0x16E9F, 0x16F9F, 0x16FFF, 0x187FF, 0x18AFF, 0x18CFF, 0x18D7F, 0x1AFFF, 0x1B0FF, 0x1B12F,
|
|
4073
4177
|
0x1B16F, 0x1B2FF, 0x1BC9F, 0x1BCAF, 0x1CFCF, 0x1D0FF, 0x1D1FF, 0x1D24F, 0x1D2DF, 0x1D2FF, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1DAAF, 0x1DFFF, 0x1E02F,
|
|
4074
4178
|
0x1E08F, 0x1E14F, 0x1E2BF, 0x1E2FF, 0x1E4FF, 0x1E7FF, 0x1E8DF, 0x1E95F, 0x1ECBF, 0x1ED4F, 0x1EEFF, 0x1F02F, 0x1F09F, 0x1F0FF, 0x1F1FF, 0x1F2FF,
|
|
4075
|
-
0x1F5FF, 0x1F64F, 0x1F67F, 0x1F6FF, 0x1F77F, 0x1F7FF, 0x1F8FF, 0x1F9FF, 0x1FA6F, 0x1FAFF, 0x1FBFF, 0x2A6DF, 0x2B73F, 0x2B81F, 0x2CEAF, 0x2EBEF,
|
|
4076
|
-
0x2FA1F, 0x3134F, 0x323AF, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF
|
|
4179
|
+
0x1F5FF, 0x1F64F, 0x1F67F, 0x1F6FF, 0x1F77F, 0x1F7FF, 0x1F8FF, 0x1F9FF, 0x1FA6F, 0x1FAFF, 0x1FBFF, 0x2A6DF, 0x2B73F, 0x2B81F, 0x2CEAF, 0x2EBEF, 0x2EE5F,
|
|
4180
|
+
0x2FA1F, 0x3134F, 0x323AF, 0x3347F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF
|
|
4077
4181
|
];
|
|
4078
4182
|
|
|
4079
4183
|
to_block_name_from_unicode = function(unicode_codepoint) {
|
|
@@ -4177,20 +4281,25 @@ class MojiAnalizerTools {
|
|
|
4177
4281
|
/**
|
|
4178
4282
|
* コードポイントから異体字セレクタの判定
|
|
4179
4283
|
* @param {Number} codepoint - コードポイント
|
|
4284
|
+
* @param {boolean} [annotate = false] - 注釈をつけるか否か
|
|
4180
4285
|
* @returns {string|null} 確認結果(異体字セレクタではない場合はNULLを返す)
|
|
4181
4286
|
*/
|
|
4182
|
-
static getVariationSelectorsNumberFromCodePoint(codepoint) {
|
|
4287
|
+
static getVariationSelectorsNumberFromCodePoint(codepoint, annotate) {
|
|
4183
4288
|
// モンゴル自由字形選択子 U+180B〜U+180D (3個)
|
|
4184
4289
|
if((0x180B <= codepoint) && (codepoint <= 0x180D)) {
|
|
4185
4290
|
return "FVS" + ((codepoint - 0x180B) + 1);
|
|
4186
4291
|
}
|
|
4187
4292
|
// SVSで利用される異体字セレクタ U+FE00〜U+FE0F (VS1~VS16) (16個)
|
|
4188
4293
|
if((0xFE00 <= codepoint) && (codepoint <= 0xFE0F)) {
|
|
4189
|
-
|
|
4294
|
+
const n = (codepoint - 0xFE00) + 1;
|
|
4295
|
+
if (!annotate) return "VS" + n;
|
|
4296
|
+
if (codepoint === 0xFE0E) return "VS15 (text)";
|
|
4297
|
+
if (codepoint === 0xFE0F) return "VS16 (emoji)";
|
|
4298
|
+
return "VS" + n;
|
|
4190
4299
|
}
|
|
4191
4300
|
// IVSで利用される異体字セレクタ U+E0100〜U+E01EF (VS17~VS256) (240個)
|
|
4192
4301
|
else if((0xE0100 <= codepoint) && (codepoint <= 0xE01EF)) {
|
|
4193
|
-
return "VS" + (codepoint - 0xE0100) + 17;
|
|
4302
|
+
return "VS" + ((codepoint - 0xE0100) + 17);
|
|
4194
4303
|
}
|
|
4195
4304
|
return null;
|
|
4196
4305
|
}
|
|
@@ -4342,6 +4451,7 @@ class MojiAnalizerTools {
|
|
|
4342
4451
|
* @property {boolean} is_halfwidth_katakana 半角カタカナ
|
|
4343
4452
|
* @property {boolean} is_emoji 絵文字
|
|
4344
4453
|
* @property {boolean} is_emoticons 顔文字
|
|
4454
|
+
* @property {boolean} is_symbol_base 記号(VS16 が付くと絵文字化)
|
|
4345
4455
|
* @property {boolean} is_gaiji 外字
|
|
4346
4456
|
* @property {boolean} is_combining_mark 結合文字
|
|
4347
4457
|
* @property {boolean} is_variation_selector 異体字セレクタ
|
|
@@ -4412,6 +4522,7 @@ class MojiAnalyzer {
|
|
|
4412
4522
|
is_halfwidth_katakana : false,
|
|
4413
4523
|
is_emoji : false,
|
|
4414
4524
|
is_emoticons : false,
|
|
4525
|
+
is_symbol_base : false,
|
|
4415
4526
|
is_gaiji : false,
|
|
4416
4527
|
is_combining_mark : false,
|
|
4417
4528
|
is_variation_selector : false
|
|
@@ -4536,9 +4647,11 @@ class MojiAnalyzer {
|
|
|
4536
4647
|
type.is_fullwidth_ascii = /[\u3000\uFF01-\uFF5E]/.test(data.character);
|
|
4537
4648
|
type.is_halfwidth_katakana = /[\uFF61-\uFF9F]/.test(data.character);
|
|
4538
4649
|
// 絵文字
|
|
4539
|
-
type.is_emoji = /Pictographs/.test(type.blockname);
|
|
4650
|
+
type.is_emoji = /Pictographs|Transport and Map Symbols/.test(type.blockname);
|
|
4540
4651
|
// 顔文字
|
|
4541
4652
|
type.is_emoticons = /Emoticons/.test(type.blockname);
|
|
4653
|
+
// 記号(VS16 が付くと絵文字化)
|
|
4654
|
+
type.is_symbol_base = /Dingbats|Miscellaneous Symbols/.test(type.blockname);
|
|
4542
4655
|
// 外字
|
|
4543
4656
|
type.is_gaiji = /Private Use Area/.test(type.blockname);
|
|
4544
4657
|
// 結合文字
|
package/build/index.d.ts
CHANGED
|
@@ -346,6 +346,7 @@ declare type _MojiEncodeData_ = {
|
|
|
346
346
|
* @property {boolean} is_halfwidth_katakana 半角カタカナ
|
|
347
347
|
* @property {boolean} is_emoji 絵文字
|
|
348
348
|
* @property {boolean} is_emoticons 顔文字
|
|
349
|
+
* @property {boolean} is_symbol_base 記号(VS16 が付くと絵文字化)
|
|
349
350
|
* @property {boolean} is_gaiji 外字
|
|
350
351
|
* @property {boolean} is_combining_mark 結合文字
|
|
351
352
|
* @property {boolean} is_variation_selector 異体字セレクタ
|
|
@@ -371,6 +372,7 @@ declare type _MojiTypeData_ = {
|
|
|
371
372
|
is_halfwidth_katakana: boolean;
|
|
372
373
|
is_emoji: boolean;
|
|
373
374
|
is_emoticons: boolean;
|
|
375
|
+
is_symbol_base: boolean;
|
|
374
376
|
is_gaiji: boolean;
|
|
375
377
|
is_combining_mark: boolean;
|
|
376
378
|
is_variation_selector: boolean;
|