micromark-extension-cjk-friendly-util 1.1.0 → 2.0.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,12 +32,19 @@ declare function isCjk(category: Category): boolean;
32
32
  */
33
33
  declare function isIvs(category: Category): boolean;
34
34
  /**
35
- * `true` if the code point represents a [Standard Variation Selector that can follow CJK](https://github.com/tats-u/markdown-cjk-friendly/blob/main/specification.md#svs-that-can-follow-cjk).
35
+ * `true` if {@link isCjk} or {@link isIvs}.
36
+ *
37
+ * @param category the return value of {@link classifyCharacter}.
38
+ * @returns `true` if the code point represents a CJK or IVS
39
+ */
40
+ declare function isCjkOrIvs(category: Category): boolean;
41
+ /**
42
+ * `true` if the code point represents a [Non-emoji General-use Variation Selector](https://github.com/tats-u/markdown-cjk-friendly/blob/main/specification.md#non-emoji-general-use-variation-selector).
36
43
  *
37
44
  * @param category the return value of `classifyCharacter`.
38
- * @returns `true` if the code point represents an Standard Variation Selector that can follow CJK
45
+ * @returns `true` if the code point represents an Non-emoji General-use Variation Selector
39
46
  */
40
- declare function isSvsFollowingCjk(category: Category): boolean;
47
+ declare function isNonEmojiGeneralUseVS(category: Category): boolean;
41
48
  /**
42
49
  * `true` if the code point represents an [Unicode whitespace character](https://spec.commonmark.org/0.31.2/#unicode-whitespace-character) or an [Unicode punctuation character](https://spec.commonmark.org/0.31.2/#unicode-punctuation-character).
43
50
  *
@@ -46,4 +53,4 @@ declare function isSvsFollowingCjk(category: Category): boolean;
46
53
  */
47
54
  declare function isSpaceOrPunctuation(category: Category): boolean;
48
55
 
49
- export { isCjk, isIvs, isNonCjkPunctuation, isSpaceOrPunctuation, isSvsFollowingCjk, isUnicodeWhitespace };
56
+ export { isCjk, isCjkOrIvs, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace };
@@ -11,8 +11,9 @@ var constantsEx;
11
11
  constantsEx2.cjkPunctuation = 4098;
12
12
  constantsEx2.ivs = 8192;
13
13
  constantsEx2.cjkOrIvs = 12288;
14
- constantsEx2.svsFollowingCjk = 16384;
15
- constantsEx2.variationSelector = 28672;
14
+ constantsEx2.nonEmojiGeneralUseVS = 16384;
15
+ constantsEx2.variationSelector = 24576;
16
+ constantsEx2.ivsToCjkRightShift = 1;
16
17
  })(constantsEx || (constantsEx = {}));
17
18
 
18
19
  // src/categoryUtil.ts
@@ -28,17 +29,21 @@ function isCjk(category) {
28
29
  function isIvs(category) {
29
30
  return category === constantsEx.ivs;
30
31
  }
31
- function isSvsFollowingCjk(category) {
32
- return category === constantsEx.svsFollowingCjk;
32
+ function isCjkOrIvs(category) {
33
+ return Boolean(category & constantsEx.cjkOrIvs);
34
+ }
35
+ function isNonEmojiGeneralUseVS(category) {
36
+ return category === constantsEx.nonEmojiGeneralUseVS;
33
37
  }
34
38
  function isSpaceOrPunctuation(category) {
35
39
  return Boolean(category & constantsEx.spaceOrPunctuation);
36
40
  }
37
41
  export {
38
42
  isCjk,
43
+ isCjkOrIvs,
39
44
  isIvs,
40
45
  isNonCjkPunctuation,
46
+ isNonEmojiGeneralUseVS,
41
47
  isSpaceOrPunctuation,
42
- isSvsFollowingCjk,
43
48
  isUnicodeWhitespace
44
49
  };
@@ -7,12 +7,11 @@ import { Code } from 'micromark-util-types';
7
7
  * @returns `true` if `uc` is CJK, `null` if IVS, or `false` if neither
8
8
  */
9
9
  declare function cjkOrIvs(uc: Code): boolean | null;
10
+ declare function isCjkAmbiguousPunctuation(main: Code, vs: Code): boolean;
10
11
  /**
11
- * Check whether the character code represents Standard Variation Sequence that can follow an ideographic character.
12
- *
13
- * U+FE0E is used for some CJK symbols (e.g. U+3299) that can also be
12
+ * Check whether the character code represents Non-emoji General-use Variation Selector (U+FE00-U+FE0E).
14
13
  */
15
- declare const svsFollowingCjk: (code: Code) => boolean;
14
+ declare function nonEmojiGeneralUseVS(code: Code): boolean;
16
15
  /**
17
16
  * Check whether the character code represents Unicode punctuation.
18
17
  *
@@ -55,4 +54,4 @@ declare const unicodePunctuation: (code: Code) => boolean;
55
54
  */
56
55
  declare const unicodeWhitespace: (code: Code) => boolean;
57
56
 
58
- export { cjkOrIvs, svsFollowingCjk, unicodePunctuation, unicodeWhitespace };
57
+ export { cjkOrIvs, isCjkAmbiguousPunctuation, nonEmojiGeneralUseVS, unicodePunctuation, unicodeWhitespace };
@@ -18,7 +18,7 @@ var isEmoji = function(uc) {
18
18
  fn: null
19
19
  });
20
20
  function cjkOrIvs(uc) {
21
- if (!uc || uc < 0) {
21
+ if (!uc || uc < 4352) {
22
22
  return false;
23
23
  }
24
24
  const eaw = eastAsianWidthType(uc);
@@ -37,7 +37,13 @@ function cjkOrIvs(uc) {
37
37
  return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
38
38
  }
39
39
  }
40
- var svsFollowingCjk = regexCheck(/[\uFE00-\uFE02\uFE0E]/u);
40
+ function isCjkAmbiguousPunctuation(main, vs) {
41
+ if (vs !== 65025 || !main || main < 8216) return false;
42
+ return main === 8216 || main === 8217 || main === 8220 || main === 8221;
43
+ }
44
+ function nonEmojiGeneralUseVS(code) {
45
+ return code !== null && code >= 65024 && code <= 65038;
46
+ }
41
47
  var unicodePunctuation = regexCheck(/\p{P}|\p{S}/u);
42
48
  var unicodeWhitespace = regexCheck(/\s/);
43
49
  function regexCheck(regex) {
@@ -48,7 +54,8 @@ function regexCheck(regex) {
48
54
  }
49
55
  export {
50
56
  cjkOrIvs,
51
- svsFollowingCjk,
57
+ isCjkAmbiguousPunctuation,
58
+ nonEmojiGeneralUseVS,
52
59
  unicodePunctuation,
53
60
  unicodeWhitespace
54
61
  };
@@ -7,8 +7,9 @@ declare namespace constantsEx {
7
7
  const cjkPunctuation: 4098;
8
8
  const ivs: 8192;
9
9
  const cjkOrIvs: 12288;
10
- const svsFollowingCjk: 16384;
11
- const variationSelector: 28672;
10
+ const nonEmojiGeneralUseVS: 16384;
11
+ const variationSelector: 24576;
12
+ const ivsToCjkRightShift: 1;
12
13
  }
13
14
  /**
14
15
  * Classify whether a code represents whitespace, punctuation, or something
@@ -24,6 +25,18 @@ declare namespace constantsEx {
24
25
  * @returns
25
26
  * Group.
26
27
  */
27
- declare function classifyCharacter(code: Code): typeof constants.characterGroupWhitespace | typeof constants.characterGroupPunctuation | typeof constantsEx.cjk | typeof constantsEx.cjkPunctuation | typeof constantsEx.ivs | typeof constantsEx.svsFollowingCjk | 0;
28
+ declare function classifyCharacter(code: Code): typeof constants.characterGroupWhitespace | typeof constants.characterGroupPunctuation | typeof constantsEx.cjk | typeof constantsEx.cjkPunctuation | typeof constantsEx.ivs | typeof constantsEx.nonEmojiGeneralUseVS | 0;
29
+ /**}
30
+ * Classify whether a code represents whitespace, punctuation, or something else.
31
+ *
32
+ * Recognizes general-use variation selectors. Use this instead of {@linkcode classifyCharacter} for previous character.
33
+ *
34
+ * @param before result of {@linkcode classifyCharacter} of the preceding character.
35
+ * @param get2Previous a function that returns the code point of the character before the preceding character. Use lambda or {@linkcode Function.prototype.bind}.
36
+ * @param previous code point of the preceding character
37
+ * @returns
38
+ * Group of the main code point of the preceding character. Use `isCjkOrIvs` to check whether it is CJK
39
+ */
40
+ declare function classifyPrecedingCharacter(before: ReturnType<typeof classifyCharacter>, get2Previous: () => Code, previous: Code): ReturnType<typeof classifyCharacter>;
28
41
 
29
- export { classifyCharacter, constantsEx };
42
+ export { classifyCharacter, classifyPrecedingCharacter, constantsEx };
@@ -1,6 +1,15 @@
1
1
  // src/classifyCharacter.ts
2
2
  import { markdownLineEndingOrSpace } from "micromark-util-character";
3
- import { constants, codes } from "micromark-util-symbol";
3
+ import { constants as constants2, codes } from "micromark-util-symbol";
4
+
5
+ // src/categoryUtil.ts
6
+ import { constants } from "micromark-util-symbol";
7
+ function isUnicodeWhitespace(category) {
8
+ return Boolean(category & constants.characterGroupWhitespace);
9
+ }
10
+ function isNonEmojiGeneralUseVS(category) {
11
+ return category === constantsEx.nonEmojiGeneralUseVS;
12
+ }
4
13
 
5
14
  // src/characterWithNonBmp.ts
6
15
  import { eastAsianWidthType } from "get-east-asian-width";
@@ -22,7 +31,7 @@ var isEmoji = function(uc) {
22
31
  fn: null
23
32
  });
24
33
  function cjkOrIvs(uc) {
25
- if (!uc || uc < 0) {
34
+ if (!uc || uc < 4352) {
26
35
  return false;
27
36
  }
28
37
  const eaw = eastAsianWidthType(uc);
@@ -41,7 +50,13 @@ function cjkOrIvs(uc) {
41
50
  return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
42
51
  }
43
52
  }
44
- var svsFollowingCjk = regexCheck(/[\uFE00-\uFE02\uFE0E]/u);
53
+ function isCjkAmbiguousPunctuation(main, vs) {
54
+ if (vs !== 65025 || !main || main < 8216) return false;
55
+ return main === 8216 || main === 8217 || main === 8220 || main === 8221;
56
+ }
57
+ function nonEmojiGeneralUseVS(code) {
58
+ return code !== null && code >= 65024 && code <= 65038;
59
+ }
45
60
  var unicodePunctuation = regexCheck(/\p{P}|\p{S}/u);
46
61
  var unicodeWhitespace = regexCheck(/\s/);
47
62
  function regexCheck(regex) {
@@ -59,17 +74,18 @@ var constantsEx;
59
74
  constantsEx2.cjkPunctuation = 4098;
60
75
  constantsEx2.ivs = 8192;
61
76
  constantsEx2.cjkOrIvs = 12288;
62
- constantsEx2.svsFollowingCjk = 16384;
63
- constantsEx2.variationSelector = 28672;
77
+ constantsEx2.nonEmojiGeneralUseVS = 16384;
78
+ constantsEx2.variationSelector = 24576;
79
+ constantsEx2.ivsToCjkRightShift = 1;
64
80
  })(constantsEx || (constantsEx = {}));
65
81
  function classifyCharacter(code) {
66
82
  if (code === codes.eof || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
67
- return constants.characterGroupWhitespace;
83
+ return constants2.characterGroupWhitespace;
68
84
  }
69
85
  let value = 0;
70
86
  if (code >= 4352) {
71
- if (svsFollowingCjk(code)) {
72
- return constantsEx.svsFollowingCjk;
87
+ if (nonEmojiGeneralUseVS(code)) {
88
+ return constantsEx.nonEmojiGeneralUseVS;
73
89
  }
74
90
  switch (cjkOrIvs(code)) {
75
91
  case null:
@@ -80,11 +96,23 @@ function classifyCharacter(code) {
80
96
  }
81
97
  }
82
98
  if (unicodePunctuation(code)) {
83
- value |= constants.characterGroupPunctuation;
99
+ value |= constants2.characterGroupPunctuation;
84
100
  }
85
101
  return value;
86
102
  }
103
+ function classifyPrecedingCharacter(before, get2Previous, previous) {
104
+ if (!isNonEmojiGeneralUseVS(before)) {
105
+ return before;
106
+ }
107
+ const twoPrevious = get2Previous();
108
+ const twoBefore = classifyCharacter(twoPrevious);
109
+ return !twoPrevious || isUnicodeWhitespace(twoBefore) ? before : isCjkAmbiguousPunctuation(twoPrevious, previous) ? constantsEx.cjkPunctuation : stripIvs(twoBefore);
110
+ }
111
+ function stripIvs(twoBefore) {
112
+ return twoBefore & ~constantsEx.ivs;
113
+ }
87
114
  export {
88
115
  classifyCharacter,
116
+ classifyPrecedingCharacter,
89
117
  constantsEx
90
118
  };
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- export { isCjk, isIvs, isNonCjkPunctuation, isSpaceOrPunctuation, isSvsFollowingCjk, isUnicodeWhitespace } from './categoryUtil.js';
2
- export { classifyCharacter, constantsEx } from './classifyCharacter.js';
3
- export { isCodeHighSurrogate, isCodeLowSurrogate, tryGetCodeTwoBefore, tryGetGenuineNextCode, tryGetGenuinePreviousCode } from './codeUtil.js';
1
+ export { isCjk, isCjkOrIvs, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace } from './categoryUtil.js';
2
+ export { classifyCharacter, classifyPrecedingCharacter, constantsEx } from './classifyCharacter.js';
3
+ export { TwoPreviousCode, isCodeHighSurrogate, isCodeLowSurrogate, tryGetCodeTwoBefore, tryGetGenuineNextCode, tryGetGenuinePreviousCode } from './codeUtil.js';
4
4
  import 'micromark-util-symbol';
5
5
  import 'micromark-util-types';
package/dist/index.js CHANGED
@@ -1,3 +1,7 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
3
+ var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
4
+
1
5
  // src/categoryUtil.ts
2
6
  import { constants as constants2 } from "micromark-util-symbol";
3
7
 
@@ -25,7 +29,7 @@ var isEmoji = function(uc) {
25
29
  fn: null
26
30
  });
27
31
  function cjkOrIvs(uc) {
28
- if (!uc || uc < 0) {
32
+ if (!uc || uc < 4352) {
29
33
  return false;
30
34
  }
31
35
  const eaw = eastAsianWidthType(uc);
@@ -44,7 +48,13 @@ function cjkOrIvs(uc) {
44
48
  return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
45
49
  }
46
50
  }
47
- var svsFollowingCjk = regexCheck(/[\uFE00-\uFE02\uFE0E]/u);
51
+ function isCjkAmbiguousPunctuation(main, vs) {
52
+ if (vs !== 65025 || !main || main < 8216) return false;
53
+ return main === 8216 || main === 8217 || main === 8220 || main === 8221;
54
+ }
55
+ function nonEmojiGeneralUseVS(code) {
56
+ return code !== null && code >= 65024 && code <= 65038;
57
+ }
48
58
  var unicodePunctuation = regexCheck(/\p{P}|\p{S}/u);
49
59
  var unicodeWhitespace = regexCheck(/\s/);
50
60
  function regexCheck(regex) {
@@ -62,8 +72,9 @@ var constantsEx;
62
72
  constantsEx2.cjkPunctuation = 4098;
63
73
  constantsEx2.ivs = 8192;
64
74
  constantsEx2.cjkOrIvs = 12288;
65
- constantsEx2.svsFollowingCjk = 16384;
66
- constantsEx2.variationSelector = 28672;
75
+ constantsEx2.nonEmojiGeneralUseVS = 16384;
76
+ constantsEx2.variationSelector = 24576;
77
+ constantsEx2.ivsToCjkRightShift = 1;
67
78
  })(constantsEx || (constantsEx = {}));
68
79
  function classifyCharacter(code) {
69
80
  if (code === codes.eof || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
@@ -71,8 +82,8 @@ function classifyCharacter(code) {
71
82
  }
72
83
  let value = 0;
73
84
  if (code >= 4352) {
74
- if (svsFollowingCjk(code)) {
75
- return constantsEx.svsFollowingCjk;
85
+ if (nonEmojiGeneralUseVS(code)) {
86
+ return constantsEx.nonEmojiGeneralUseVS;
76
87
  }
77
88
  switch (cjkOrIvs(code)) {
78
89
  case null:
@@ -87,6 +98,17 @@ function classifyCharacter(code) {
87
98
  }
88
99
  return value;
89
100
  }
101
+ function classifyPrecedingCharacter(before, get2Previous, previous) {
102
+ if (!isNonEmojiGeneralUseVS(before)) {
103
+ return before;
104
+ }
105
+ const twoPrevious = get2Previous();
106
+ const twoBefore = classifyCharacter(twoPrevious);
107
+ return !twoPrevious || isUnicodeWhitespace(twoBefore) ? before : isCjkAmbiguousPunctuation(twoPrevious, previous) ? constantsEx.cjkPunctuation : stripIvs(twoBefore);
108
+ }
109
+ function stripIvs(twoBefore) {
110
+ return twoBefore & ~constantsEx.ivs;
111
+ }
90
112
 
91
113
  // src/categoryUtil.ts
92
114
  function isUnicodeWhitespace(category) {
@@ -101,8 +123,11 @@ function isCjk(category) {
101
123
  function isIvs(category) {
102
124
  return category === constantsEx.ivs;
103
125
  }
104
- function isSvsFollowingCjk(category) {
105
- return category === constantsEx.svsFollowingCjk;
126
+ function isCjkOrIvs(category) {
127
+ return Boolean(category & constantsEx.cjkOrIvs);
128
+ }
129
+ function isNonEmojiGeneralUseVS(category) {
130
+ return category === constantsEx.nonEmojiGeneralUseVS;
106
131
  }
107
132
  function isSpaceOrPunctuation(category) {
108
133
  return Boolean(category & constantsEx.spaceOrPunctuation);
@@ -159,6 +184,40 @@ function tryGetCodeTwoBefore(previousCode, nowPoint, sliceSerialize) {
159
184
  }
160
185
  return twoPreviousLast;
161
186
  }
187
+ var TwoPreviousCode = class {
188
+ /**
189
+ * @see {@link tryGetCodeTwoBefore}
190
+ *
191
+ * @param previousCode a previous code point. Should be greater than 65,535 if it represents a [Supplementary Character](https://www.unicode.org/glossary/#supplementary_character).
192
+ * @param nowPoint `this.now()` (`this` = `TokenizeContext`)
193
+ * @param sliceSerialize `this.sliceSerialize` (`this` = `TokenizeContext`)
194
+ */
195
+ constructor(previousCode, nowPoint, sliceSerialize) {
196
+ this.previousCode = previousCode;
197
+ this.nowPoint = nowPoint;
198
+ this.sliceSerialize = sliceSerialize;
199
+ __publicField(this, "cachedValue");
200
+ }
201
+ /**
202
+ * Returns the return value of {@link tryGetCodeTwoBefore}.
203
+ *
204
+ * If the value has not been computed yet, it will be computed and cached.
205
+ *
206
+ * @see {@link tryGetCodeTwoBefore}
207
+ *
208
+ * @returns a value greater than 65,535 if the code point two positions before represents a [Supplementary Character](https://www.unicode.org/glossary/#supplementary_character), a value less than 65,536 for a [BMP Character](https://www.unicode.org/glossary/#bmp_character), or `null` if not found
209
+ */
210
+ value() {
211
+ if (this.cachedValue === void 0) {
212
+ this.cachedValue = tryGetCodeTwoBefore(
213
+ this.previousCode,
214
+ this.nowPoint,
215
+ this.sliceSerialize
216
+ );
217
+ }
218
+ return this.cachedValue;
219
+ }
220
+ };
162
221
  function tryGetGenuineNextCode(code, nowPoint, sliceSerialize) {
163
222
  const nextCandidate = sliceSerialize({
164
223
  start: nowPoint,
@@ -167,15 +226,18 @@ function tryGetGenuineNextCode(code, nowPoint, sliceSerialize) {
167
226
  return nextCandidate && nextCandidate >= 65536 ? nextCandidate : code;
168
227
  }
169
228
  export {
229
+ TwoPreviousCode,
170
230
  classifyCharacter,
231
+ classifyPrecedingCharacter,
171
232
  constantsEx,
172
233
  isCjk,
234
+ isCjkOrIvs,
173
235
  isCodeHighSurrogate,
174
236
  isCodeLowSurrogate,
175
237
  isIvs,
176
238
  isNonCjkPunctuation,
239
+ isNonEmojiGeneralUseVS,
177
240
  isSpaceOrPunctuation,
178
- isSvsFollowingCjk,
179
241
  isUnicodeWhitespace,
180
242
  tryGetCodeTwoBefore,
181
243
  tryGetGenuineNextCode,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "micromark-extension-cjk-friendly-util",
3
- "version": "1.1.0",
3
+ "version": "2.0.0-rc.2",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": {