micromark-extension-cjk-friendly-util 2.0.0-rc.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,13 @@ declare function isCjk(category: Category): boolean;
31
31
  * @returns `true` if the code point represents an IVS
32
32
  */
33
33
  declare function isIvs(category: Category): boolean;
34
+ /**
35
+ * `true` if {@link isCjk} or {@link isIvs}.
36
+ *
37
+ * @param category the return value of {@link classifyCharacter}.
38
+ * @returns `true` if the code point represents a CJK or IVS
39
+ */
40
+ declare function isCjkOrIvs(category: Category): boolean;
34
41
  /**
35
42
  * `true` if the code point represents a [Non-emoji General-use Variation Selector](https://github.com/tats-u/markdown-cjk-friendly/blob/main/specification.md#non-emoji-general-use-variation-selector).
36
43
  *
@@ -46,4 +53,4 @@ declare function isNonEmojiGeneralUseVS(category: Category): boolean;
46
53
  */
47
54
  declare function isSpaceOrPunctuation(category: Category): boolean;
48
55
 
49
- export { isCjk, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace };
56
+ export { isCjk, isCjkOrIvs, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace };
@@ -12,7 +12,8 @@ var constantsEx;
12
12
  constantsEx2.ivs = 8192;
13
13
  constantsEx2.cjkOrIvs = 12288;
14
14
  constantsEx2.nonEmojiGeneralUseVS = 16384;
15
- constantsEx2.variationSelector = 28672;
15
+ constantsEx2.variationSelector = 24576;
16
+ constantsEx2.ivsToCjkRightShift = 1;
16
17
  })(constantsEx || (constantsEx = {}));
17
18
 
18
19
  // src/categoryUtil.ts
@@ -28,6 +29,9 @@ function isCjk(category) {
28
29
  function isIvs(category) {
29
30
  return category === constantsEx.ivs;
30
31
  }
32
+ function isCjkOrIvs(category) {
33
+ return Boolean(category & constantsEx.cjkOrIvs);
34
+ }
31
35
  function isNonEmojiGeneralUseVS(category) {
32
36
  return category === constantsEx.nonEmojiGeneralUseVS;
33
37
  }
@@ -36,6 +40,7 @@ function isSpaceOrPunctuation(category) {
36
40
  }
37
41
  export {
38
42
  isCjk,
43
+ isCjkOrIvs,
39
44
  isIvs,
40
45
  isNonCjkPunctuation,
41
46
  isNonEmojiGeneralUseVS,
@@ -7,6 +7,7 @@ import { Code } from 'micromark-util-types';
7
7
  * @returns `true` if `uc` is CJK, `null` if IVS, or `false` if neither
8
8
  */
9
9
  declare function cjkOrIvs(uc: Code): boolean | null;
10
+ declare function isCjkAmbiguousPunctuation(main: Code, vs: Code): boolean;
10
11
  /**
11
12
  * Check whether the character code represents Non-emoji General-use Variation Selector (U+FE00-U+FE0E).
12
13
  */
@@ -53,4 +54,4 @@ declare const unicodePunctuation: (code: Code) => boolean;
53
54
  */
54
55
  declare const unicodeWhitespace: (code: Code) => boolean;
55
56
 
56
- export { cjkOrIvs, nonEmojiGeneralUseVS, unicodePunctuation, unicodeWhitespace };
57
+ export { cjkOrIvs, isCjkAmbiguousPunctuation, nonEmojiGeneralUseVS, unicodePunctuation, unicodeWhitespace };
@@ -18,7 +18,7 @@ var isEmoji = function(uc) {
18
18
  fn: null
19
19
  });
20
20
  function cjkOrIvs(uc) {
21
- if (!uc || uc < 0) {
21
+ if (!uc || uc < 4352) {
22
22
  return false;
23
23
  }
24
24
  const eaw = eastAsianWidthType(uc);
@@ -37,6 +37,10 @@ function cjkOrIvs(uc) {
37
37
  return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
38
38
  }
39
39
  }
40
+ function isCjkAmbiguousPunctuation(main, vs) {
41
+ if (vs !== 65025 || !main || main < 8216) return false;
42
+ return main === 8216 || main === 8217 || main === 8220 || main === 8221;
43
+ }
40
44
  function nonEmojiGeneralUseVS(code) {
41
45
  return code !== null && code >= 65024 && code <= 65038;
42
46
  }
@@ -50,6 +54,7 @@ function regexCheck(regex) {
50
54
  }
51
55
  export {
52
56
  cjkOrIvs,
57
+ isCjkAmbiguousPunctuation,
53
58
  nonEmojiGeneralUseVS,
54
59
  unicodePunctuation,
55
60
  unicodeWhitespace
@@ -8,7 +8,8 @@ declare namespace constantsEx {
8
8
  const ivs: 8192;
9
9
  const cjkOrIvs: 12288;
10
10
  const nonEmojiGeneralUseVS: 16384;
11
- const variationSelector: 28672;
11
+ const variationSelector: 24576;
12
+ const ivsToCjkRightShift: 1;
12
13
  }
13
14
  /**
14
15
  * Classify whether a code represents whitespace, punctuation, or something
@@ -25,5 +26,17 @@ declare namespace constantsEx {
25
26
  * Group.
26
27
  */
27
28
  declare function classifyCharacter(code: Code): typeof constants.characterGroupWhitespace | typeof constants.characterGroupPunctuation | typeof constantsEx.cjk | typeof constantsEx.cjkPunctuation | typeof constantsEx.ivs | typeof constantsEx.nonEmojiGeneralUseVS | 0;
29
+ /**}
30
+ * Classify whether a code represents whitespace, punctuation, or something else.
31
+ *
32
+ * Recognizes general-use variation selectors. Use this instead of {@linkcode classifyCharacter} for previous character.
33
+ *
34
+ * @param before result of {@linkcode classifyCharacter} of the preceding character.
35
+ * @param get2Previous a function that returns the code point of the character before the preceding character. Use lambda or {@linkcode Function.prototype.bind}.
36
+ * @param previous code point of the preceding character
37
+ * @returns
38
+ * Group of the main code point of the preceding character. Use `isCjkOrIvs` to check whether it is CJK
39
+ */
40
+ declare function classifyPrecedingCharacter(before: ReturnType<typeof classifyCharacter>, get2Previous: () => Code, previous: Code): ReturnType<typeof classifyCharacter>;
28
41
 
29
- export { classifyCharacter, constantsEx };
42
+ export { classifyCharacter, classifyPrecedingCharacter, constantsEx };
@@ -1,6 +1,15 @@
1
1
  // src/classifyCharacter.ts
2
2
  import { markdownLineEndingOrSpace } from "micromark-util-character";
3
- import { constants, codes } from "micromark-util-symbol";
3
+ import { constants as constants2, codes } from "micromark-util-symbol";
4
+
5
+ // src/categoryUtil.ts
6
+ import { constants } from "micromark-util-symbol";
7
+ function isUnicodeWhitespace(category) {
8
+ return Boolean(category & constants.characterGroupWhitespace);
9
+ }
10
+ function isNonEmojiGeneralUseVS(category) {
11
+ return category === constantsEx.nonEmojiGeneralUseVS;
12
+ }
4
13
 
5
14
  // src/characterWithNonBmp.ts
6
15
  import { eastAsianWidthType } from "get-east-asian-width";
@@ -22,7 +31,7 @@ var isEmoji = function(uc) {
22
31
  fn: null
23
32
  });
24
33
  function cjkOrIvs(uc) {
25
- if (!uc || uc < 0) {
34
+ if (!uc || uc < 4352) {
26
35
  return false;
27
36
  }
28
37
  const eaw = eastAsianWidthType(uc);
@@ -41,6 +50,10 @@ function cjkOrIvs(uc) {
41
50
  return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
42
51
  }
43
52
  }
53
+ function isCjkAmbiguousPunctuation(main, vs) {
54
+ if (vs !== 65025 || !main || main < 8216) return false;
55
+ return main === 8216 || main === 8217 || main === 8220 || main === 8221;
56
+ }
44
57
  function nonEmojiGeneralUseVS(code) {
45
58
  return code !== null && code >= 65024 && code <= 65038;
46
59
  }
@@ -62,11 +75,12 @@ var constantsEx;
62
75
  constantsEx2.ivs = 8192;
63
76
  constantsEx2.cjkOrIvs = 12288;
64
77
  constantsEx2.nonEmojiGeneralUseVS = 16384;
65
- constantsEx2.variationSelector = 28672;
78
+ constantsEx2.variationSelector = 24576;
79
+ constantsEx2.ivsToCjkRightShift = 1;
66
80
  })(constantsEx || (constantsEx = {}));
67
81
  function classifyCharacter(code) {
68
82
  if (code === codes.eof || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
69
- return constants.characterGroupWhitespace;
83
+ return constants2.characterGroupWhitespace;
70
84
  }
71
85
  let value = 0;
72
86
  if (code >= 4352) {
@@ -82,11 +96,23 @@ function classifyCharacter(code) {
82
96
  }
83
97
  }
84
98
  if (unicodePunctuation(code)) {
85
- value |= constants.characterGroupPunctuation;
99
+ value |= constants2.characterGroupPunctuation;
86
100
  }
87
101
  return value;
88
102
  }
103
+ function classifyPrecedingCharacter(before, get2Previous, previous) {
104
+ if (!isNonEmojiGeneralUseVS(before)) {
105
+ return before;
106
+ }
107
+ const twoPrevious = get2Previous();
108
+ const twoBefore = classifyCharacter(twoPrevious);
109
+ return !twoPrevious || isUnicodeWhitespace(twoBefore) ? before : isCjkAmbiguousPunctuation(twoPrevious, previous) ? constantsEx.cjkPunctuation : stripIvs(twoBefore);
110
+ }
111
+ function stripIvs(twoBefore) {
112
+ return twoBefore & ~constantsEx.ivs;
113
+ }
89
114
  export {
90
115
  classifyCharacter,
116
+ classifyPrecedingCharacter,
91
117
  constantsEx
92
118
  };
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- export { isCjk, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace } from './categoryUtil.js';
2
- export { classifyCharacter, constantsEx } from './classifyCharacter.js';
3
- export { isCodeHighSurrogate, isCodeLowSurrogate, tryGetCodeTwoBefore, tryGetGenuineNextCode, tryGetGenuinePreviousCode } from './codeUtil.js';
1
+ export { isCjk, isCjkOrIvs, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace } from './categoryUtil.js';
2
+ export { classifyCharacter, classifyPrecedingCharacter, constantsEx } from './classifyCharacter.js';
3
+ export { TwoPreviousCode, isCodeHighSurrogate, isCodeLowSurrogate, tryGetCodeTwoBefore, tryGetGenuineNextCode, tryGetGenuinePreviousCode } from './codeUtil.js';
4
4
  import 'micromark-util-symbol';
5
5
  import 'micromark-util-types';
package/dist/index.js CHANGED
@@ -1,3 +1,7 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
3
+ var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
4
+
1
5
  // src/categoryUtil.ts
2
6
  import { constants as constants2 } from "micromark-util-symbol";
3
7
 
@@ -25,7 +29,7 @@ var isEmoji = function(uc) {
25
29
  fn: null
26
30
  });
27
31
  function cjkOrIvs(uc) {
28
- if (!uc || uc < 0) {
32
+ if (!uc || uc < 4352) {
29
33
  return false;
30
34
  }
31
35
  const eaw = eastAsianWidthType(uc);
@@ -44,6 +48,10 @@ function cjkOrIvs(uc) {
44
48
  return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
45
49
  }
46
50
  }
51
+ function isCjkAmbiguousPunctuation(main, vs) {
52
+ if (vs !== 65025 || !main || main < 8216) return false;
53
+ return main === 8216 || main === 8217 || main === 8220 || main === 8221;
54
+ }
47
55
  function nonEmojiGeneralUseVS(code) {
48
56
  return code !== null && code >= 65024 && code <= 65038;
49
57
  }
@@ -65,7 +73,8 @@ var constantsEx;
65
73
  constantsEx2.ivs = 8192;
66
74
  constantsEx2.cjkOrIvs = 12288;
67
75
  constantsEx2.nonEmojiGeneralUseVS = 16384;
68
- constantsEx2.variationSelector = 28672;
76
+ constantsEx2.variationSelector = 24576;
77
+ constantsEx2.ivsToCjkRightShift = 1;
69
78
  })(constantsEx || (constantsEx = {}));
70
79
  function classifyCharacter(code) {
71
80
  if (code === codes.eof || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
@@ -89,6 +98,17 @@ function classifyCharacter(code) {
89
98
  }
90
99
  return value;
91
100
  }
101
+ function classifyPrecedingCharacter(before, get2Previous, previous) {
102
+ if (!isNonEmojiGeneralUseVS(before)) {
103
+ return before;
104
+ }
105
+ const twoPrevious = get2Previous();
106
+ const twoBefore = classifyCharacter(twoPrevious);
107
+ return !twoPrevious || isUnicodeWhitespace(twoBefore) ? before : isCjkAmbiguousPunctuation(twoPrevious, previous) ? constantsEx.cjkPunctuation : stripIvs(twoBefore);
108
+ }
109
+ function stripIvs(twoBefore) {
110
+ return twoBefore & ~constantsEx.ivs;
111
+ }
92
112
 
93
113
  // src/categoryUtil.ts
94
114
  function isUnicodeWhitespace(category) {
@@ -103,6 +123,9 @@ function isCjk(category) {
103
123
  function isIvs(category) {
104
124
  return category === constantsEx.ivs;
105
125
  }
126
+ function isCjkOrIvs(category) {
127
+ return Boolean(category & constantsEx.cjkOrIvs);
128
+ }
106
129
  function isNonEmojiGeneralUseVS(category) {
107
130
  return category === constantsEx.nonEmojiGeneralUseVS;
108
131
  }
@@ -161,6 +184,40 @@ function tryGetCodeTwoBefore(previousCode, nowPoint, sliceSerialize) {
161
184
  }
162
185
  return twoPreviousLast;
163
186
  }
187
+ var TwoPreviousCode = class {
188
+ /**
189
+ * @see {@link tryGetCodeTwoBefore}
190
+ *
191
+ * @param previousCode a previous code point. Should be greater than 65,535 if it represents a [Supplementary Character](https://www.unicode.org/glossary/#supplementary_character).
192
+ * @param nowPoint `this.now()` (`this` = `TokenizeContext`)
193
+ * @param sliceSerialize `this.sliceSerialize` (`this` = `TokenizeContext`)
194
+ */
195
+ constructor(previousCode, nowPoint, sliceSerialize) {
196
+ this.previousCode = previousCode;
197
+ this.nowPoint = nowPoint;
198
+ this.sliceSerialize = sliceSerialize;
199
+ __publicField(this, "cachedValue");
200
+ }
201
+ /**
202
+ * Returns the return value of {@link tryGetCodeTwoBefore}.
203
+ *
204
+ * If the value has not been computed yet, it will be computed and cached.
205
+ *
206
+ * @see {@link tryGetCodeTwoBefore}
207
+ *
208
+ * @returns a value greater than 65,535 if the code point two positions before represents a [Supplementary Character](https://www.unicode.org/glossary/#supplementary_character), a value less than 65,536 for a [BMP Character](https://www.unicode.org/glossary/#bmp_character), or `null` if not found
209
+ */
210
+ value() {
211
+ if (this.cachedValue === void 0) {
212
+ this.cachedValue = tryGetCodeTwoBefore(
213
+ this.previousCode,
214
+ this.nowPoint,
215
+ this.sliceSerialize
216
+ );
217
+ }
218
+ return this.cachedValue;
219
+ }
220
+ };
164
221
  function tryGetGenuineNextCode(code, nowPoint, sliceSerialize) {
165
222
  const nextCandidate = sliceSerialize({
166
223
  start: nowPoint,
@@ -169,9 +226,12 @@ function tryGetGenuineNextCode(code, nowPoint, sliceSerialize) {
169
226
  return nextCandidate && nextCandidate >= 65536 ? nextCandidate : code;
170
227
  }
171
228
  export {
229
+ TwoPreviousCode,
172
230
  classifyCharacter,
231
+ classifyPrecedingCharacter,
173
232
  constantsEx,
174
233
  isCjk,
234
+ isCjkOrIvs,
175
235
  isCodeHighSurrogate,
176
236
  isCodeLowSurrogate,
177
237
  isIvs,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "micromark-extension-cjk-friendly-util",
3
- "version": "2.0.0-rc.1",
3
+ "version": "2.0.0",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": {