npm - micromark-extension-cjk-friendly-util - Versions diffs - 1.1.0 → 2.0.0-rc.2 - Mend

micromark-extension-cjk-friendly-util 1.1.0 → 2.0.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/categoryUtil.d.ts +11 -4
package/dist/categoryUtil.js +10 -5
package/dist/characterWithNonBmp.d.ts +4 -5
package/dist/characterWithNonBmp.js +10 -3
package/dist/classifyCharacter.d.ts +17 -4
package/dist/classifyCharacter.js +37 -9
package/dist/index.d.ts +3 -3
package/dist/index.js +71 -9
package/package.json +1 -1

package/dist/categoryUtil.d.ts CHANGED Viewed

@@ -32,12 +32,19 @@ declare function isCjk(category: Category): boolean;
  */
 declare function isIvs(category: Category): boolean;
 /**
- * `true` if the code point represents a [Standard Variation Selector that can follow CJK](https://github.com/tats-u/markdown-cjk-friendly/blob/main/specification.md#svs-that-can-follow-cjk).
+ * `true` if {@link isCjk} or {@link isIvs}.
+ *
+ * @param category the return value of {@link classifyCharacter}.
+ * @returns `true` if the code point represents a CJK or IVS
+ */
+declare function isCjkOrIvs(category: Category): boolean;
+/**
+ * `true` if the code point represents a [Non-emoji General-use Variation Selector](https://github.com/tats-u/markdown-cjk-friendly/blob/main/specification.md#non-emoji-general-use-variation-selector).
  *
  * @param category the return value of `classifyCharacter`.
- * @returns `true` if the code point represents an Standard Variation Selector that can follow CJK
+ * @returns `true` if the code point represents an Non-emoji General-use Variation Selector
  */
-declare function isSvsFollowingCjk(category: Category): boolean;
+declare function isNonEmojiGeneralUseVS(category: Category): boolean;
 /**
  * `true` if the code point represents an [Unicode whitespace character](https://spec.commonmark.org/0.31.2/#unicode-whitespace-character) or an [Unicode punctuation character](https://spec.commonmark.org/0.31.2/#unicode-punctuation-character).
  *
@@ -46,4 +53,4 @@ declare function isSvsFollowingCjk(category: Category): boolean;
  */
 declare function isSpaceOrPunctuation(category: Category): boolean;
-export { isCjk, isIvs, isNonCjkPunctuation, isSpaceOrPunctuation, isSvsFollowingCjk, isUnicodeWhitespace };
+export { isCjk, isCjkOrIvs, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace };

package/dist/categoryUtil.js CHANGED Viewed

@@ -11,8 +11,9 @@ var constantsEx;
   constantsEx2.cjkPunctuation = 4098;
   constantsEx2.ivs = 8192;
   constantsEx2.cjkOrIvs = 12288;
-  constantsEx2.svsFollowingCjk = 16384;
-  constantsEx2.variationSelector = 28672;
+  constantsEx2.nonEmojiGeneralUseVS = 16384;
+  constantsEx2.variationSelector = 24576;
+  constantsEx2.ivsToCjkRightShift = 1;
 })(constantsEx || (constantsEx = {}));
 // src/categoryUtil.ts
@@ -28,17 +29,21 @@ function isCjk(category) {
 function isIvs(category) {
   return category === constantsEx.ivs;
 }
-function isSvsFollowingCjk(category) {
-  return category === constantsEx.svsFollowingCjk;
+function isCjkOrIvs(category) {
+  return Boolean(category & constantsEx.cjkOrIvs);
+}
+function isNonEmojiGeneralUseVS(category) {
+  return category === constantsEx.nonEmojiGeneralUseVS;
 }
 function isSpaceOrPunctuation(category) {
   return Boolean(category & constantsEx.spaceOrPunctuation);
 }
 export {
   isCjk,
+  isCjkOrIvs,
   isIvs,
   isNonCjkPunctuation,
+  isNonEmojiGeneralUseVS,
   isSpaceOrPunctuation,
-  isSvsFollowingCjk,
   isUnicodeWhitespace
 };

package/dist/characterWithNonBmp.d.ts CHANGED Viewed

@@ -7,12 +7,11 @@ import { Code } from 'micromark-util-types';
  * @returns `true` if `uc` is CJK, `null` if IVS, or `false` if neither
  */
 declare function cjkOrIvs(uc: Code): boolean | null;
+declare function isCjkAmbiguousPunctuation(main: Code, vs: Code): boolean;
 /**
- * Check whether the character code represents Standard Variation Sequence that can follow an ideographic character.
- *
- * U+FE0E is used for some CJK symbols (e.g. U+3299) that can also be
+ * Check whether the character code represents Non-emoji General-use Variation Selector (U+FE00-U+FE0E).
  */
-declare const svsFollowingCjk: (code: Code) => boolean;
+declare function nonEmojiGeneralUseVS(code: Code): boolean;
 /**
  * Check whether the character code represents Unicode punctuation.
  *
@@ -55,4 +54,4 @@ declare const unicodePunctuation: (code: Code) => boolean;
  */
 declare const unicodeWhitespace: (code: Code) => boolean;
-export { cjkOrIvs, svsFollowingCjk, unicodePunctuation, unicodeWhitespace };
+export { cjkOrIvs, isCjkAmbiguousPunctuation, nonEmojiGeneralUseVS, unicodePunctuation, unicodeWhitespace };

package/dist/characterWithNonBmp.js CHANGED Viewed

@@ -18,7 +18,7 @@ var isEmoji = function(uc) {
   fn: null
 });
 function cjkOrIvs(uc) {
-  if (!uc || uc < 0) {
+  if (!uc || uc < 4352) {
     return false;
   }
   const eaw = eastAsianWidthType(uc);
@@ -37,7 +37,13 @@ function cjkOrIvs(uc) {
       return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
   }
 }
-var svsFollowingCjk = regexCheck(/[\uFE00-\uFE02\uFE0E]/u);
+function isCjkAmbiguousPunctuation(main, vs) {
+  if (vs !== 65025 || !main || main < 8216) return false;
+  return main === 8216 || main === 8217 || main === 8220 || main === 8221;
+}
+function nonEmojiGeneralUseVS(code) {
+  return code !== null && code >= 65024 && code <= 65038;
+}
 var unicodePunctuation = regexCheck(/\p{P}|\p{S}/u);
 var unicodeWhitespace = regexCheck(/\s/);
 function regexCheck(regex) {
@@ -48,7 +54,8 @@ function regexCheck(regex) {
 }
 export {
   cjkOrIvs,
-  svsFollowingCjk,
+  isCjkAmbiguousPunctuation,
+  nonEmojiGeneralUseVS,
   unicodePunctuation,
   unicodeWhitespace
 };

package/dist/classifyCharacter.d.ts CHANGED Viewed

@@ -7,8 +7,9 @@ declare namespace constantsEx {
     const cjkPunctuation: 4098;
     const ivs: 8192;
     const cjkOrIvs: 12288;
-    const svsFollowingCjk: 16384;
-    const variationSelector: 28672;
+    const nonEmojiGeneralUseVS: 16384;
+    const variationSelector: 24576;
+    const ivsToCjkRightShift: 1;
 }
 /**
  * Classify whether a code represents whitespace, punctuation, or something
@@ -24,6 +25,18 @@ declare namespace constantsEx {
  * @returns
  *   Group.
  */
-declare function classifyCharacter(code: Code): typeof constants.characterGroupWhitespace | typeof constants.characterGroupPunctuation | typeof constantsEx.cjk | typeof constantsEx.cjkPunctuation | typeof constantsEx.ivs | typeof constantsEx.svsFollowingCjk | 0;
+declare function classifyCharacter(code: Code): typeof constants.characterGroupWhitespace | typeof constants.characterGroupPunctuation | typeof constantsEx.cjk | typeof constantsEx.cjkPunctuation | typeof constantsEx.ivs | typeof constantsEx.nonEmojiGeneralUseVS | 0;
+/**}
+ * Classify whether a code represents whitespace, punctuation, or something else.
+ *
+ * Recognizes general-use variation selectors. Use this instead of {@linkcode classifyCharacter} for previous character.
+ *
+ * @param before result of {@linkcode classifyCharacter} of the preceding character.
+ * @param get2Previous a function that returns the code point of the character before the preceding character. Use lambda or {@linkcode Function.prototype.bind}.
+ * @param previous code point of the preceding character
+ * @returns
+ *   Group of the main code point of the preceding character. Use `isCjkOrIvs` to check whether it is CJK
+ */
+declare function classifyPrecedingCharacter(before: ReturnType<typeof classifyCharacter>, get2Previous: () => Code, previous: Code): ReturnType<typeof classifyCharacter>;
-export { classifyCharacter, constantsEx };
+export { classifyCharacter, classifyPrecedingCharacter, constantsEx };

package/dist/classifyCharacter.js CHANGED Viewed

@@ -1,6 +1,15 @@
 // src/classifyCharacter.ts
 import { markdownLineEndingOrSpace } from "micromark-util-character";
-import { constants, codes } from "micromark-util-symbol";
+import { constants as constants2, codes } from "micromark-util-symbol";
+// src/categoryUtil.ts
+import { constants } from "micromark-util-symbol";
+function isUnicodeWhitespace(category) {
+  return Boolean(category & constants.characterGroupWhitespace);
+}
+function isNonEmojiGeneralUseVS(category) {
+  return category === constantsEx.nonEmojiGeneralUseVS;
+}
 // src/characterWithNonBmp.ts
 import { eastAsianWidthType } from "get-east-asian-width";
@@ -22,7 +31,7 @@ var isEmoji = function(uc) {
   fn: null
 });
 function cjkOrIvs(uc) {
-  if (!uc || uc < 0) {
+  if (!uc || uc < 4352) {
     return false;
   }
   const eaw = eastAsianWidthType(uc);
@@ -41,7 +50,13 @@ function cjkOrIvs(uc) {
       return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
   }
 }
-var svsFollowingCjk = regexCheck(/[\uFE00-\uFE02\uFE0E]/u);
+function isCjkAmbiguousPunctuation(main, vs) {
+  if (vs !== 65025 || !main || main < 8216) return false;
+  return main === 8216 || main === 8217 || main === 8220 || main === 8221;
+}
+function nonEmojiGeneralUseVS(code) {
+  return code !== null && code >= 65024 && code <= 65038;
+}
 var unicodePunctuation = regexCheck(/\p{P}|\p{S}/u);
 var unicodeWhitespace = regexCheck(/\s/);
 function regexCheck(regex) {
@@ -59,17 +74,18 @@ var constantsEx;
   constantsEx2.cjkPunctuation = 4098;
   constantsEx2.ivs = 8192;
   constantsEx2.cjkOrIvs = 12288;
-  constantsEx2.svsFollowingCjk = 16384;
-  constantsEx2.variationSelector = 28672;
+  constantsEx2.nonEmojiGeneralUseVS = 16384;
+  constantsEx2.variationSelector = 24576;
+  constantsEx2.ivsToCjkRightShift = 1;
 })(constantsEx || (constantsEx = {}));
 function classifyCharacter(code) {
   if (code === codes.eof || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
-    return constants.characterGroupWhitespace;
+    return constants2.characterGroupWhitespace;
   }
   let value = 0;
   if (code >= 4352) {
-    if (svsFollowingCjk(code)) {
-      return constantsEx.svsFollowingCjk;
+    if (nonEmojiGeneralUseVS(code)) {
+      return constantsEx.nonEmojiGeneralUseVS;
     }
     switch (cjkOrIvs(code)) {
       case null:
@@ -80,11 +96,23 @@ function classifyCharacter(code) {
     }
   }
   if (unicodePunctuation(code)) {
-    value |= constants.characterGroupPunctuation;
+    value |= constants2.characterGroupPunctuation;
   }
   return value;
 }
+function classifyPrecedingCharacter(before, get2Previous, previous) {
+  if (!isNonEmojiGeneralUseVS(before)) {
+    return before;
+  }
+  const twoPrevious = get2Previous();
+  const twoBefore = classifyCharacter(twoPrevious);
+  return !twoPrevious || isUnicodeWhitespace(twoBefore) ? before : isCjkAmbiguousPunctuation(twoPrevious, previous) ? constantsEx.cjkPunctuation : stripIvs(twoBefore);
+}
+function stripIvs(twoBefore) {
+  return twoBefore & ~constantsEx.ivs;
+}
 export {
   classifyCharacter,
+  classifyPrecedingCharacter,
   constantsEx
 };

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-export { isCjk, isIvs, isNonCjkPunctuation, isSpaceOrPunctuation, isSvsFollowingCjk, isUnicodeWhitespace } from './categoryUtil.js';
-export { classifyCharacter, constantsEx } from './classifyCharacter.js';
-export { isCodeHighSurrogate, isCodeLowSurrogate, tryGetCodeTwoBefore, tryGetGenuineNextCode, tryGetGenuinePreviousCode } from './codeUtil.js';
+export { isCjk, isCjkOrIvs, isIvs, isNonCjkPunctuation, isNonEmojiGeneralUseVS, isSpaceOrPunctuation, isUnicodeWhitespace } from './categoryUtil.js';
+export { classifyCharacter, classifyPrecedingCharacter, constantsEx } from './classifyCharacter.js';
+export { TwoPreviousCode, isCodeHighSurrogate, isCodeLowSurrogate, tryGetCodeTwoBefore, tryGetGenuineNextCode, tryGetGenuinePreviousCode } from './codeUtil.js';
 import 'micromark-util-symbol';
 import 'micromark-util-types';

package/dist/index.js CHANGED Viewed

@@ -1,3 +1,7 @@
+var __defProp = Object.defineProperty;
+var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
+var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
 // src/categoryUtil.ts
 import { constants as constants2 } from "micromark-util-symbol";
@@ -25,7 +29,7 @@ var isEmoji = function(uc) {
   fn: null
 });
 function cjkOrIvs(uc) {
-  if (!uc || uc < 0) {
+  if (!uc || uc < 4352) {
     return false;
   }
   const eaw = eastAsianWidthType(uc);
@@ -44,7 +48,13 @@ function cjkOrIvs(uc) {
       return /^\p{sc=Hangul}/u.test(String.fromCodePoint(uc));
   }
 }
-var svsFollowingCjk = regexCheck(/[\uFE00-\uFE02\uFE0E]/u);
+function isCjkAmbiguousPunctuation(main, vs) {
+  if (vs !== 65025 || !main || main < 8216) return false;
+  return main === 8216 || main === 8217 || main === 8220 || main === 8221;
+}
+function nonEmojiGeneralUseVS(code) {
+  return code !== null && code >= 65024 && code <= 65038;
+}
 var unicodePunctuation = regexCheck(/\p{P}|\p{S}/u);
 var unicodeWhitespace = regexCheck(/\s/);
 function regexCheck(regex) {
@@ -62,8 +72,9 @@ var constantsEx;
   constantsEx2.cjkPunctuation = 4098;
   constantsEx2.ivs = 8192;
   constantsEx2.cjkOrIvs = 12288;
-  constantsEx2.svsFollowingCjk = 16384;
-  constantsEx2.variationSelector = 28672;
+  constantsEx2.nonEmojiGeneralUseVS = 16384;
+  constantsEx2.variationSelector = 24576;
+  constantsEx2.ivsToCjkRightShift = 1;
 })(constantsEx || (constantsEx = {}));
 function classifyCharacter(code) {
   if (code === codes.eof || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
@@ -71,8 +82,8 @@ function classifyCharacter(code) {
   }
   let value = 0;
   if (code >= 4352) {
-    if (svsFollowingCjk(code)) {
-      return constantsEx.svsFollowingCjk;
+    if (nonEmojiGeneralUseVS(code)) {
+      return constantsEx.nonEmojiGeneralUseVS;
     }
     switch (cjkOrIvs(code)) {
       case null:
@@ -87,6 +98,17 @@ function classifyCharacter(code) {
   }
   return value;
 }
+function classifyPrecedingCharacter(before, get2Previous, previous) {
+  if (!isNonEmojiGeneralUseVS(before)) {
+    return before;
+  }
+  const twoPrevious = get2Previous();
+  const twoBefore = classifyCharacter(twoPrevious);
+  return !twoPrevious || isUnicodeWhitespace(twoBefore) ? before : isCjkAmbiguousPunctuation(twoPrevious, previous) ? constantsEx.cjkPunctuation : stripIvs(twoBefore);
+}
+function stripIvs(twoBefore) {
+  return twoBefore & ~constantsEx.ivs;
+}
 // src/categoryUtil.ts
 function isUnicodeWhitespace(category) {
@@ -101,8 +123,11 @@ function isCjk(category) {
 function isIvs(category) {
   return category === constantsEx.ivs;
 }
-function isSvsFollowingCjk(category) {
-  return category === constantsEx.svsFollowingCjk;
+function isCjkOrIvs(category) {
+  return Boolean(category & constantsEx.cjkOrIvs);
+}
+function isNonEmojiGeneralUseVS(category) {
+  return category === constantsEx.nonEmojiGeneralUseVS;
 }
 function isSpaceOrPunctuation(category) {
   return Boolean(category & constantsEx.spaceOrPunctuation);
@@ -159,6 +184,40 @@ function tryGetCodeTwoBefore(previousCode, nowPoint, sliceSerialize) {
   }
   return twoPreviousLast;
 }
+var TwoPreviousCode = class {
+  /**
+   * @see {@link tryGetCodeTwoBefore}
+   *
+   * @param previousCode a previous code point. Should be greater than 65,535 if it represents a [Supplementary Character](https://www.unicode.org/glossary/#supplementary_character).
+   * @param nowPoint `this.now()` (`this` = `TokenizeContext`)
+   * @param sliceSerialize `this.sliceSerialize` (`this` = `TokenizeContext`)
+   */
+  constructor(previousCode, nowPoint, sliceSerialize) {
+    this.previousCode = previousCode;
+    this.nowPoint = nowPoint;
+    this.sliceSerialize = sliceSerialize;
+    __publicField(this, "cachedValue");
+  }
+  /**
+   * Returns the return value of {@link tryGetCodeTwoBefore}.
+   *
+   * If the value has not been computed yet, it will be computed and cached.
+   *
+   * @see {@link tryGetCodeTwoBefore}
+   *
+   * @returns a value greater than 65,535 if the code point two positions before represents a [Supplementary Character](https://www.unicode.org/glossary/#supplementary_character), a value less than 65,536 for a [BMP Character](https://www.unicode.org/glossary/#bmp_character), or `null` if not found
+   */
+  value() {
+    if (this.cachedValue === void 0) {
+      this.cachedValue = tryGetCodeTwoBefore(
+        this.previousCode,
+        this.nowPoint,
+        this.sliceSerialize
+      );
+    }
+    return this.cachedValue;
+  }
+};
 function tryGetGenuineNextCode(code, nowPoint, sliceSerialize) {
   const nextCandidate = sliceSerialize({
     start: nowPoint,
@@ -167,15 +226,18 @@ function tryGetGenuineNextCode(code, nowPoint, sliceSerialize) {
   return nextCandidate && nextCandidate >= 65536 ? nextCandidate : code;
 }
 export {
+  TwoPreviousCode,
   classifyCharacter,
+  classifyPrecedingCharacter,
   constantsEx,
   isCjk,
+  isCjkOrIvs,
   isCodeHighSurrogate,
   isCodeLowSurrogate,
   isIvs,
   isNonCjkPunctuation,
+  isNonEmojiGeneralUseVS,
   isSpaceOrPunctuation,
-  isSvsFollowingCjk,
   isUnicodeWhitespace,
   tryGetCodeTwoBefore,
   tryGetGenuineNextCode,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "micromark-extension-cjk-friendly-util",
-  "version": "1.1.0",
+  "version": "2.0.0-rc.2",
   "type": "module",
   "exports": {
     ".": {