micromark-extension-cjk-friendly-util 2.0.0-rc.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/categoryUtil.d.ts +8 -1
- package/dist/categoryUtil.js +6 -1
- package/dist/characterWithNonBmp.d.ts +2 -1
- package/dist/characterWithNonBmp.js +6 -1
- package/dist/classifyCharacter.d.ts +15 -2
- package/dist/classifyCharacter.js +31 -5
- package/dist/index.d.ts +3 -3
- package/dist/index.js +62 -2
- package/package.json +1 -1
package/dist/categoryUtil.d.ts
CHANGED
|
@@ -31,6 +31,13 @@ declare function isCjk(category: Category): boolean;
|
|
|
31
31
|
* @returns `true` if the code point represents an IVS
|
|
32
32
|
*/
|
|
33
33
|
declare function isIvs(category: Category): boolean;
|
|
34
|
+
/**
|
|
35
|
+
* `true` if {@link isCjk} or {@link isIvs}.
|
|
36
|
+
*
|
|
37
|
+
* @param category the return value of {@link classifyCharacter}.
|
|
38
|
+
* @returns `true` if the code point represents a CJK or IVS
|
|
39
|
+
*/
|
|
40
|
+
declare function isCjkOrIvs(category: Category): boolean;
|
|
34
41
|
/**
|
|
35
42
|
* `true` if the code point represents a [Non-emoji General-use Variation Selector](https://github.com/tats-u/markdown-cjk-friendly/blob/main/specification.md#non-emoji-general-use-variation-selector).
|
|
36
43
|
*
|
|
@@ -46,4 +53,4 @@ declare function isNonEmojiGeneralUseVS(category: Category): boolean;
|
|
|
46
53
|
*/
|
|
47
54
|
declare function isSpaceOrPunctuation(category: Category): boolean;
|
|
48
55
|
|
|
49
|
-
export { isCjk, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace };
|
|
56
|
+
export { isCjk, isCjkOrIvs, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace };
|
package/dist/categoryUtil.js
CHANGED
|
@@ -12,7 +12,8 @@ var constantsEx;
|
|
|
12
12
|
constantsEx2.ivs = 8192;
|
|
13
13
|
constantsEx2.cjkOrIvs = 12288;
|
|
14
14
|
constantsEx2.nonEmojiGeneralUseVS = 16384;
|
|
15
|
-
constantsEx2.variationSelector =
|
|
15
|
+
constantsEx2.variationSelector = 24576;
|
|
16
|
+
constantsEx2.ivsToCjkRightShift = 1;
|
|
16
17
|
})(constantsEx || (constantsEx = {}));
|
|
17
18
|
|
|
18
19
|
// src/categoryUtil.ts
|
|
@@ -28,6 +29,9 @@ function isCjk(category) {
|
|
|
28
29
|
function isIvs(category) {
|
|
29
30
|
return category === constantsEx.ivs;
|
|
30
31
|
}
|
|
32
|
+
function isCjkOrIvs(category) {
|
|
33
|
+
return Boolean(category & constantsEx.cjkOrIvs);
|
|
34
|
+
}
|
|
31
35
|
function isNonEmojiGeneralUseVS(category) {
|
|
32
36
|
return category === constantsEx.nonEmojiGeneralUseVS;
|
|
33
37
|
}
|
|
@@ -36,6 +40,7 @@ function isSpaceOrPunctuation(category) {
|
|
|
36
40
|
}
|
|
37
41
|
export {
|
|
38
42
|
isCjk,
|
|
43
|
+
isCjkOrIvs,
|
|
39
44
|
isIvs,
|
|
40
45
|
isNonCjkPunctuation,
|
|
41
46
|
isNonEmojiGeneralUseVS,
|
|
@@ -7,6 +7,7 @@ import { Code } from 'micromark-util-types';
|
|
|
7
7
|
* @returns `true` if `uc` is CJK, `null` if IVS, or `false` if neither
|
|
8
8
|
*/
|
|
9
9
|
declare function cjkOrIvs(uc: Code): boolean | null;
|
|
10
|
+
declare function isCjkAmbiguousPunctuation(main: Code, vs: Code): boolean;
|
|
10
11
|
/**
|
|
11
12
|
* Check whether the character code represents Non-emoji General-use Variation Selector (U+FE00-U+FE0E).
|
|
12
13
|
*/
|
|
@@ -53,4 +54,4 @@ declare const unicodePunctuation: (code: Code) => boolean;
|
|
|
53
54
|
*/
|
|
54
55
|
declare const unicodeWhitespace: (code: Code) => boolean;
|
|
55
56
|
|
|
56
|
-
export { cjkOrIvs, nonEmojiGeneralUseVS, unicodePunctuation, unicodeWhitespace };
|
|
57
|
+
export { cjkOrIvs, isCjkAmbiguousPunctuation, nonEmojiGeneralUseVS, unicodePunctuation, unicodeWhitespace };
|
|
@@ -18,7 +18,7 @@ var isEmoji = function(uc) {
|
|
|
18
18
|
fn: null
|
|
19
19
|
});
|
|
20
20
|
function cjkOrIvs(uc) {
|
|
21
|
-
if (!uc || uc <
|
|
21
|
+
if (!uc || uc < 4352) {
|
|
22
22
|
return false;
|
|
23
23
|
}
|
|
24
24
|
const eaw = eastAsianWidthType(uc);
|
|
@@ -37,6 +37,10 @@ function cjkOrIvs(uc) {
|
|
|
37
37
|
return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
|
|
38
38
|
}
|
|
39
39
|
}
|
|
40
|
+
function isCjkAmbiguousPunctuation(main, vs) {
|
|
41
|
+
if (vs !== 65025 || !main || main < 8216) return false;
|
|
42
|
+
return main === 8216 || main === 8217 || main === 8220 || main === 8221;
|
|
43
|
+
}
|
|
40
44
|
function nonEmojiGeneralUseVS(code) {
|
|
41
45
|
return code !== null && code >= 65024 && code <= 65038;
|
|
42
46
|
}
|
|
@@ -50,6 +54,7 @@ function regexCheck(regex) {
|
|
|
50
54
|
}
|
|
51
55
|
export {
|
|
52
56
|
cjkOrIvs,
|
|
57
|
+
isCjkAmbiguousPunctuation,
|
|
53
58
|
nonEmojiGeneralUseVS,
|
|
54
59
|
unicodePunctuation,
|
|
55
60
|
unicodeWhitespace
|
|
@@ -8,7 +8,8 @@ declare namespace constantsEx {
|
|
|
8
8
|
const ivs: 8192;
|
|
9
9
|
const cjkOrIvs: 12288;
|
|
10
10
|
const nonEmojiGeneralUseVS: 16384;
|
|
11
|
-
const variationSelector:
|
|
11
|
+
const variationSelector: 24576;
|
|
12
|
+
const ivsToCjkRightShift: 1;
|
|
12
13
|
}
|
|
13
14
|
/**
|
|
14
15
|
* Classify whether a code represents whitespace, punctuation, or something
|
|
@@ -25,5 +26,17 @@ declare namespace constantsEx {
|
|
|
25
26
|
* Group.
|
|
26
27
|
*/
|
|
27
28
|
declare function classifyCharacter(code: Code): typeof constants.characterGroupWhitespace | typeof constants.characterGroupPunctuation | typeof constantsEx.cjk | typeof constantsEx.cjkPunctuation | typeof constantsEx.ivs | typeof constantsEx.nonEmojiGeneralUseVS | 0;
|
|
29
|
+
/**}
|
|
30
|
+
* Classify whether a code represents whitespace, punctuation, or something else.
|
|
31
|
+
*
|
|
32
|
+
* Recognizes general-use variation selectors. Use this instead of {@linkcode classifyCharacter} for previous character.
|
|
33
|
+
*
|
|
34
|
+
* @param before result of {@linkcode classifyCharacter} of the preceding character.
|
|
35
|
+
* @param get2Previous a function that returns the code point of the character before the preceding character. Use lambda or {@linkcode Function.prototype.bind}.
|
|
36
|
+
* @param previous code point of the preceding character
|
|
37
|
+
* @returns
|
|
38
|
+
* Group of the main code point of the preceding character. Use `isCjkOrIvs` to check whether it is CJK
|
|
39
|
+
*/
|
|
40
|
+
declare function classifyPrecedingCharacter(before: ReturnType<typeof classifyCharacter>, get2Previous: () => Code, previous: Code): ReturnType<typeof classifyCharacter>;
|
|
28
41
|
|
|
29
|
-
export { classifyCharacter, constantsEx };
|
|
42
|
+
export { classifyCharacter, classifyPrecedingCharacter, constantsEx };
|
|
@@ -1,6 +1,15 @@
|
|
|
1
1
|
// src/classifyCharacter.ts
|
|
2
2
|
import { markdownLineEndingOrSpace } from "micromark-util-character";
|
|
3
|
-
import { constants, codes } from "micromark-util-symbol";
|
|
3
|
+
import { constants as constants2, codes } from "micromark-util-symbol";
|
|
4
|
+
|
|
5
|
+
// src/categoryUtil.ts
|
|
6
|
+
import { constants } from "micromark-util-symbol";
|
|
7
|
+
function isUnicodeWhitespace(category) {
|
|
8
|
+
return Boolean(category & constants.characterGroupWhitespace);
|
|
9
|
+
}
|
|
10
|
+
function isNonEmojiGeneralUseVS(category) {
|
|
11
|
+
return category === constantsEx.nonEmojiGeneralUseVS;
|
|
12
|
+
}
|
|
4
13
|
|
|
5
14
|
// src/characterWithNonBmp.ts
|
|
6
15
|
import { eastAsianWidthType } from "get-east-asian-width";
|
|
@@ -22,7 +31,7 @@ var isEmoji = function(uc) {
|
|
|
22
31
|
fn: null
|
|
23
32
|
});
|
|
24
33
|
function cjkOrIvs(uc) {
|
|
25
|
-
if (!uc || uc <
|
|
34
|
+
if (!uc || uc < 4352) {
|
|
26
35
|
return false;
|
|
27
36
|
}
|
|
28
37
|
const eaw = eastAsianWidthType(uc);
|
|
@@ -41,6 +50,10 @@ function cjkOrIvs(uc) {
|
|
|
41
50
|
return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
|
|
42
51
|
}
|
|
43
52
|
}
|
|
53
|
+
function isCjkAmbiguousPunctuation(main, vs) {
|
|
54
|
+
if (vs !== 65025 || !main || main < 8216) return false;
|
|
55
|
+
return main === 8216 || main === 8217 || main === 8220 || main === 8221;
|
|
56
|
+
}
|
|
44
57
|
function nonEmojiGeneralUseVS(code) {
|
|
45
58
|
return code !== null && code >= 65024 && code <= 65038;
|
|
46
59
|
}
|
|
@@ -62,11 +75,12 @@ var constantsEx;
|
|
|
62
75
|
constantsEx2.ivs = 8192;
|
|
63
76
|
constantsEx2.cjkOrIvs = 12288;
|
|
64
77
|
constantsEx2.nonEmojiGeneralUseVS = 16384;
|
|
65
|
-
constantsEx2.variationSelector =
|
|
78
|
+
constantsEx2.variationSelector = 24576;
|
|
79
|
+
constantsEx2.ivsToCjkRightShift = 1;
|
|
66
80
|
})(constantsEx || (constantsEx = {}));
|
|
67
81
|
function classifyCharacter(code) {
|
|
68
82
|
if (code === codes.eof || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
|
|
69
|
-
return
|
|
83
|
+
return constants2.characterGroupWhitespace;
|
|
70
84
|
}
|
|
71
85
|
let value = 0;
|
|
72
86
|
if (code >= 4352) {
|
|
@@ -82,11 +96,23 @@ function classifyCharacter(code) {
|
|
|
82
96
|
}
|
|
83
97
|
}
|
|
84
98
|
if (unicodePunctuation(code)) {
|
|
85
|
-
value |=
|
|
99
|
+
value |= constants2.characterGroupPunctuation;
|
|
86
100
|
}
|
|
87
101
|
return value;
|
|
88
102
|
}
|
|
103
|
+
function classifyPrecedingCharacter(before, get2Previous, previous) {
|
|
104
|
+
if (!isNonEmojiGeneralUseVS(before)) {
|
|
105
|
+
return before;
|
|
106
|
+
}
|
|
107
|
+
const twoPrevious = get2Previous();
|
|
108
|
+
const twoBefore = classifyCharacter(twoPrevious);
|
|
109
|
+
return !twoPrevious || isUnicodeWhitespace(twoBefore) ? before : isCjkAmbiguousPunctuation(twoPrevious, previous) ? constantsEx.cjkPunctuation : stripIvs(twoBefore);
|
|
110
|
+
}
|
|
111
|
+
function stripIvs(twoBefore) {
|
|
112
|
+
return twoBefore & ~constantsEx.ivs;
|
|
113
|
+
}
|
|
89
114
|
export {
|
|
90
115
|
classifyCharacter,
|
|
116
|
+
classifyPrecedingCharacter,
|
|
91
117
|
constantsEx
|
|
92
118
|
};
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
export { isCjk, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace } from './categoryUtil.js';
|
|
2
|
-
export { classifyCharacter, constantsEx } from './classifyCharacter.js';
|
|
3
|
-
export { isCodeHighSurrogate, isCodeLowSurrogate, tryGetCodeTwoBefore, tryGetGenuineNextCode, tryGetGenuinePreviousCode } from './codeUtil.js';
|
|
1
|
+
export { isCjk, isCjkOrIvs, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace } from './categoryUtil.js';
|
|
2
|
+
export { classifyCharacter, classifyPrecedingCharacter, constantsEx } from './classifyCharacter.js';
|
|
3
|
+
export { TwoPreviousCode, isCodeHighSurrogate, isCodeLowSurrogate, tryGetCodeTwoBefore, tryGetGenuineNextCode, tryGetGenuinePreviousCode } from './codeUtil.js';
|
|
4
4
|
import 'micromark-util-symbol';
|
|
5
5
|
import 'micromark-util-types';
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
|
|
3
|
+
var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
|
|
4
|
+
|
|
1
5
|
// src/categoryUtil.ts
|
|
2
6
|
import { constants as constants2 } from "micromark-util-symbol";
|
|
3
7
|
|
|
@@ -25,7 +29,7 @@ var isEmoji = function(uc) {
|
|
|
25
29
|
fn: null
|
|
26
30
|
});
|
|
27
31
|
function cjkOrIvs(uc) {
|
|
28
|
-
if (!uc || uc <
|
|
32
|
+
if (!uc || uc < 4352) {
|
|
29
33
|
return false;
|
|
30
34
|
}
|
|
31
35
|
const eaw = eastAsianWidthType(uc);
|
|
@@ -44,6 +48,10 @@ function cjkOrIvs(uc) {
|
|
|
44
48
|
return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
|
|
45
49
|
}
|
|
46
50
|
}
|
|
51
|
+
function isCjkAmbiguousPunctuation(main, vs) {
|
|
52
|
+
if (vs !== 65025 || !main || main < 8216) return false;
|
|
53
|
+
return main === 8216 || main === 8217 || main === 8220 || main === 8221;
|
|
54
|
+
}
|
|
47
55
|
function nonEmojiGeneralUseVS(code) {
|
|
48
56
|
return code !== null && code >= 65024 && code <= 65038;
|
|
49
57
|
}
|
|
@@ -65,7 +73,8 @@ var constantsEx;
|
|
|
65
73
|
constantsEx2.ivs = 8192;
|
|
66
74
|
constantsEx2.cjkOrIvs = 12288;
|
|
67
75
|
constantsEx2.nonEmojiGeneralUseVS = 16384;
|
|
68
|
-
constantsEx2.variationSelector =
|
|
76
|
+
constantsEx2.variationSelector = 24576;
|
|
77
|
+
constantsEx2.ivsToCjkRightShift = 1;
|
|
69
78
|
})(constantsEx || (constantsEx = {}));
|
|
70
79
|
function classifyCharacter(code) {
|
|
71
80
|
if (code === codes.eof || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
|
|
@@ -89,6 +98,17 @@ function classifyCharacter(code) {
|
|
|
89
98
|
}
|
|
90
99
|
return value;
|
|
91
100
|
}
|
|
101
|
+
function classifyPrecedingCharacter(before, get2Previous, previous) {
|
|
102
|
+
if (!isNonEmojiGeneralUseVS(before)) {
|
|
103
|
+
return before;
|
|
104
|
+
}
|
|
105
|
+
const twoPrevious = get2Previous();
|
|
106
|
+
const twoBefore = classifyCharacter(twoPrevious);
|
|
107
|
+
return !twoPrevious || isUnicodeWhitespace(twoBefore) ? before : isCjkAmbiguousPunctuation(twoPrevious, previous) ? constantsEx.cjkPunctuation : stripIvs(twoBefore);
|
|
108
|
+
}
|
|
109
|
+
function stripIvs(twoBefore) {
|
|
110
|
+
return twoBefore & ~constantsEx.ivs;
|
|
111
|
+
}
|
|
92
112
|
|
|
93
113
|
// src/categoryUtil.ts
|
|
94
114
|
function isUnicodeWhitespace(category) {
|
|
@@ -103,6 +123,9 @@ function isCjk(category) {
|
|
|
103
123
|
function isIvs(category) {
|
|
104
124
|
return category === constantsEx.ivs;
|
|
105
125
|
}
|
|
126
|
+
function isCjkOrIvs(category) {
|
|
127
|
+
return Boolean(category & constantsEx.cjkOrIvs);
|
|
128
|
+
}
|
|
106
129
|
function isNonEmojiGeneralUseVS(category) {
|
|
107
130
|
return category === constantsEx.nonEmojiGeneralUseVS;
|
|
108
131
|
}
|
|
@@ -161,6 +184,40 @@ function tryGetCodeTwoBefore(previousCode, nowPoint, sliceSerialize) {
|
|
|
161
184
|
}
|
|
162
185
|
return twoPreviousLast;
|
|
163
186
|
}
|
|
187
|
+
var TwoPreviousCode = class {
|
|
188
|
+
/**
|
|
189
|
+
* @see {@link tryGetCodeTwoBefore}
|
|
190
|
+
*
|
|
191
|
+
* @param previousCode a previous code point. Should be greater than 65,535 if it represents a [Supplementary Character](https://www.unicode.org/glossary/#supplementary_character).
|
|
192
|
+
* @param nowPoint `this.now()` (`this` = `TokenizeContext`)
|
|
193
|
+
* @param sliceSerialize `this.sliceSerialize` (`this` = `TokenizeContext`)
|
|
194
|
+
*/
|
|
195
|
+
constructor(previousCode, nowPoint, sliceSerialize) {
|
|
196
|
+
this.previousCode = previousCode;
|
|
197
|
+
this.nowPoint = nowPoint;
|
|
198
|
+
this.sliceSerialize = sliceSerialize;
|
|
199
|
+
__publicField(this, "cachedValue");
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Returns the return value of {@link tryGetCodeTwoBefore}.
|
|
203
|
+
*
|
|
204
|
+
* If the value has not been computed yet, it will be computed and cached.
|
|
205
|
+
*
|
|
206
|
+
* @see {@link tryGetCodeTwoBefore}
|
|
207
|
+
*
|
|
208
|
+
* @returns a value greater than 65,535 if the code point two positions before represents a [Supplementary Character](https://www.unicode.org/glossary/#supplementary_character), a value less than 65,536 for a [BMP Character](https://www.unicode.org/glossary/#bmp_character), or `null` if not found
|
|
209
|
+
*/
|
|
210
|
+
value() {
|
|
211
|
+
if (this.cachedValue === void 0) {
|
|
212
|
+
this.cachedValue = tryGetCodeTwoBefore(
|
|
213
|
+
this.previousCode,
|
|
214
|
+
this.nowPoint,
|
|
215
|
+
this.sliceSerialize
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
return this.cachedValue;
|
|
219
|
+
}
|
|
220
|
+
};
|
|
164
221
|
function tryGetGenuineNextCode(code, nowPoint, sliceSerialize) {
|
|
165
222
|
const nextCandidate = sliceSerialize({
|
|
166
223
|
start: nowPoint,
|
|
@@ -169,9 +226,12 @@ function tryGetGenuineNextCode(code, nowPoint, sliceSerialize) {
|
|
|
169
226
|
return nextCandidate && nextCandidate >= 65536 ? nextCandidate : code;
|
|
170
227
|
}
|
|
171
228
|
export {
|
|
229
|
+
TwoPreviousCode,
|
|
172
230
|
classifyCharacter,
|
|
231
|
+
classifyPrecedingCharacter,
|
|
173
232
|
constantsEx,
|
|
174
233
|
isCjk,
|
|
234
|
+
isCjkOrIvs,
|
|
175
235
|
isCodeHighSurrogate,
|
|
176
236
|
isCodeLowSurrogate,
|
|
177
237
|
isIvs,
|