npm - onchain-lexical-markdown - Versions diffs - 0.0.1 - Mend

onchain-lexical-markdown 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/OnchainLexicalMarkdown.js +11 -0
package/README.md +96 -0
package/flow/OnchainLexicalMarkdown.js.flow +130 -0
package/package.json +45 -0
package/src/MarkdownExport.ts +362 -0
package/src/MarkdownImport.ts +343 -0
package/src/MarkdownShortcuts.ts +529 -0
package/src/MarkdownTransformers.ts +631 -0
package/src/__tests__/unit/LexicalMarkdown.test.ts +891 -0
package/src/fromMarkdownString.ts +39 -0
package/src/importTextFormatTransformer.ts +137 -0
package/src/importTextMatchTransformer.ts +108 -0
package/src/importTextTransformers.ts +142 -0
package/src/index.ts +81 -0
package/src/toMarkdownString.ts +25 -0
package/src/transformer/const.ts +12 -0
package/src/transformer/hr.ts +37 -0
package/src/transformer/index.ts +97 -0
package/src/transformer/instance.ts +73 -0
package/src/transformer/levelBasedControl.ts +95 -0
package/src/transformer/table.ts +182 -0
package/src/transformer/utils.ts +21 -0
package/src/utils.ts +462 -0

package/src/MarkdownTransformers.ts ADDED Viewed

@@ -0,0 +1,631 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ */
+import type {ListType} from '@lexical/list';
+import type {HeadingTagType} from '@lexical/rich-text';
+import {$isCodeNode, CodeNode} from '@lexical/code';
+import {$createLinkNode, $isLinkNode, LinkNode} from '@lexical/link';
+import {
+  $isListItemNode,
+  $isListNode,
+  ListItemNode,
+  ListNode,
+} from '@lexical/list';
+import {
+  $isHeadingNode,
+  $isQuoteNode,
+  HeadingNode,
+  QuoteNode,
+} from '@lexical/rich-text';
+import {
+  $createLineBreakNode,
+  $createTextNode,
+  ElementNode,
+  Klass,
+  LexicalNode,
+  TextFormatType,
+  TextNode,
+} from 'lexical';
+import {
+  $createInstanceCodeNode,
+  $createInstanceHeadingNode,
+  $createInstanceListItemNode,
+  $createInstanceListNode,
+  $createInstanceQuoteNode,
+} from 'onchain-lexical-instance';
+export type Transformer =
+  | ElementTransformer
+  | MultilineElementTransformer
+  | TextFormatTransformer
+  | TextMatchTransformer;
+export type ElementTransformer = {
+  dependencies: Array<Klass<LexicalNode>>;
+  /**
+   * `export` is called when the `$convertToMarkdownString` is called to convert the editor state into markdown.
+   *
+   * @return return null to cancel the export, even though the regex matched. Lexical will then search for the next transformer.
+   */
+  export: (
+    node: LexicalNode,
+    // eslint-disable-next-line no-shadow
+    traverseChildren: (node: ElementNode) => string,
+  ) => string | null;
+  regExp: RegExp;
+  /**
+   * `replace` is called when markdown is imported or typed in the editor
+   *
+   * @return return false to cancel the transform, even though the regex matched. Lexical will then search for the next transformer.
+   */
+  replace: (
+    parentNode: ElementNode,
+    children: Array<LexicalNode>,
+    match: Array<string>,
+    /**
+     * Whether the match is from an import operation (e.g. through `$convertFromMarkdownString`) or not (e.g. through typing in the editor).
+     */
+    isImport: boolean,
+  ) => boolean | void;
+  type: 'element';
+};
+export type MultilineElementTransformer = {
+  /**
+   * Use this function to manually handle the import process, once the `regExpStart` has matched successfully.
+   * Without providing this function, the default behavior is to match until `regExpEnd` is found, or until the end of the document if `regExpEnd.optional` is true.
+   *
+   * @returns a tuple or null. The first element of the returned tuple is a boolean indicating if a multiline element was imported. The second element is the index of the last line that was processed. If null is returned, the next multilineElementTransformer will be tried. If undefined is returned, the default behavior will be used.
+   */
+  handleImportAfterStartMatch?: (args: {
+    lines: Array<string>;
+    rootNode: ElementNode;
+    startLineIndex: number;
+    startMatch: RegExpMatchArray;
+    transformer: MultilineElementTransformer;
+  }) => [boolean, number] | null | undefined;
+  dependencies: Array<Klass<LexicalNode>>;
+  /**
+   * `export` is called when the `$convertToMarkdownString` is called to convert the editor state into markdown.
+   *
+   * @return return null to cancel the export, even though the regex matched. Lexical will then search for the next transformer.
+   */
+  export?: (
+    node: LexicalNode,
+    // eslint-disable-next-line no-shadow
+    traverseChildren: (node: ElementNode) => string,
+  ) => string | null;
+  /**
+   * This regex determines when to start matching
+   */
+  regExpStart: RegExp;
+  /**
+   * This regex determines when to stop matching. Anything in between regExpStart and regExpEnd will be matched
+   */
+  regExpEnd?:
+    | RegExp
+    | {
+        /**
+         * Whether the end match is optional. If true, the end match is not required to match for the transformer to be triggered.
+         * The entire text from regexpStart to the end of the document will then be matched.
+         */
+        optional?: true;
+        regExp: RegExp;
+      };
+  /**
+   * `replace` is called only when markdown is imported in the editor, not when it's typed
+   *
+   * @return return false to cancel the transform, even though the regex matched. Lexical will then search for the next transformer.
+   */
+  replace: (
+    rootNode: ElementNode,
+    /**
+     * During markdown shortcut transforms, children nodes may be provided to the transformer. If this is the case, no `linesInBetween` will be provided and
+     * the children nodes should be used instead of the `linesInBetween` to create the new node.
+     */
+    children: Array<LexicalNode> | null,
+    startMatch: Array<string>,
+    endMatch: Array<string> | null,
+    /**
+     * linesInBetween includes the text between the start & end matches, split up by lines, not including the matches themselves.
+     * This is null when the transformer is triggered through markdown shortcuts (by typing in the editor)
+     */
+    linesInBetween: Array<string> | null,
+    /**
+     * Whether the match is from an import operation (e.g. through `$convertFromMarkdownString`) or not (e.g. through typing in the editor).
+     */
+    isImport: boolean,
+  ) => boolean | void;
+  type: 'multiline-element';
+};
+export type TextFormatTransformer = Readonly<{
+  format: ReadonlyArray<TextFormatType>;
+  tag: string;
+  intraword?: boolean;
+  type: 'text-format';
+}>;
+export type TextMatchTransformer = Readonly<{
+  dependencies: Array<Klass<LexicalNode>>;
+  /**
+   * Determines how a node should be exported to markdown
+   */
+  export?: (
+    node: LexicalNode,
+    // eslint-disable-next-line no-shadow
+    exportChildren: (node: ElementNode) => string,
+    // eslint-disable-next-line no-shadow
+    exportFormat: (node: TextNode, textContent: string) => string,
+  ) => string | null;
+  /**
+   * This regex determines what text is matched during markdown imports
+   */
+  importRegExp?: RegExp;
+  /**
+   * This regex determines what text is matched for markdown shortcuts while typing in the editor
+   */
+  regExp: RegExp;
+  /**
+   * Determines how the matched markdown text should be transformed into a node during the markdown import process
+   *
+   * @returns nothing, or a TextNode that may be a child of the new node that is created.
+   * If a TextNode is returned, text format matching will be applied to it (e.g. bold, italic, etc.)
+   */
+  replace?: (node: TextNode, match: RegExpMatchArray) => void | TextNode;
+  /**
+   * For import operations, this function can be used to determine the end index of the match, after `importRegExp` has matched.
+   * Without this function, the end index will be determined by the length of the match from `importRegExp`. Manually determining the end index can be useful if
+   * the match from `importRegExp` is not the entire text content of the node. That way, `importRegExp` can be used to match only the start of the node, and `getEndIndex`
+   * can be used to match the end of the node.
+   *
+   * @returns The end index of the match, or false if the match was unsuccessful and a different transformer should be tried.
+   */
+  getEndIndex?: (node: TextNode, match: RegExpMatchArray) => number | false;
+  /**
+   * Single character that allows the transformer to trigger when typed in the editor. This does not affect markdown imports outside of the markdown shortcut plugin.
+   * If the trigger is matched, the `regExp` will be used to match the text in the second step.
+   */
+  trigger?: string;
+  type: 'text-match';
+}>;
+const ORDERED_LIST_REGEX = /^(\s*)(\d{1,})\.\s/;
+const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/;
+const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i;
+const HEADING_REGEX = /^(#{1,6})\s/;
+const QUOTE_REGEX = /^>\s/;
+const CODE_START_REGEX = /^[ \t]*```(\w+)?/;
+const CODE_END_REGEX = /[ \t]*```$/;
+const CODE_SINGLE_LINE_REGEX =
+  /^[ \t]*```[^`]+(?:(?:`{1,2}|`{4,})[^`]+)*```(?:[^`]|$)/;
+const TABLE_ROW_REG_EXP = /^(?:\|)(.+)(?:\|)\s?$/;
+const TABLE_ROW_DIVIDER_REG_EXP = /^(\| ?:?-*:? ?)+\|\s?$/;
+export const createBlockNode = (
+  createNode: (match: Array<string>) => ElementNode,
+): ElementTransformer['replace'] => {
+  return (parentNode, children, match) => {
+    const node = createNode(match);
+    node.append(...children);
+    parentNode.replace(node);
+    node.select(0, 0);
+  };
+};
+// Amount of spaces that define indentation level
+// TODO: should be an option
+const LIST_INDENT_SIZE = 4;
+function getIndent(whitespaces: string): number {
+  const tabs = whitespaces.match(/\t/g);
+  const spaces = whitespaces.match(/ /g);
+  let indent = 0;
+  if (tabs) {
+    indent += tabs.length;
+  }
+  if (spaces) {
+    indent += Math.floor(spaces.length / LIST_INDENT_SIZE);
+  }
+  return indent;
+}
+const listReplace = (listType: ListType): ElementTransformer['replace'] => {
+  return (parentNode, children, match) => {
+    const previousNode = parentNode.getPreviousSibling();
+    const nextNode = parentNode.getNextSibling();
+    const listItem = $createInstanceListItemNode(
+      listType === 'check' ? match[3] === 'x' : undefined,
+    );
+    if ($isListNode(nextNode) && nextNode.getListType() === listType) {
+      const firstChild = nextNode.getFirstChild();
+      if (firstChild !== null) {
+        firstChild.insertBefore(listItem);
+      } else {
+        // should never happen, but let's handle gracefully, just in case.
+        nextNode.append(listItem);
+      }
+      parentNode.remove();
+    } else if (
+      $isListNode(previousNode) &&
+      previousNode.getListType() === listType
+    ) {
+      previousNode.append(listItem);
+      parentNode.remove();
+    } else {
+      const list = $createInstanceListNode(
+        listType,
+        listType === 'number' ? Number(match[2]) : undefined,
+      );
+      list.append(listItem);
+      parentNode.replace(list);
+    }
+    listItem.append(...children);
+    listItem.select(0, 0);
+    const indent = getIndent(match[1]);
+    if (indent) {
+      listItem.setIndent(indent);
+    }
+  };
+};
+const listExport = (
+  listNode: ListNode,
+  exportChildren: (node: ElementNode) => string,
+  depth: number,
+): string => {
+  const output = [];
+  const children = listNode.getChildren();
+  let index = 0;
+  for (const listItemNode of children) {
+    if ($isListItemNode(listItemNode)) {
+      if (listItemNode.getChildrenSize() === 1) {
+        const firstChild = listItemNode.getFirstChild();
+        if ($isListNode(firstChild)) {
+          output.push(listExport(firstChild, exportChildren, depth + 1));
+          continue;
+        }
+      }
+      const indent = ' '.repeat(depth * LIST_INDENT_SIZE);
+      const listType = listNode.getListType();
+      const prefix =
+        listType === 'number'
+          ? `${listNode.getStart() + index}. `
+          : listType === 'check'
+          ? `- [${listItemNode.getChecked() ? 'x' : ' '}] `
+          : '- ';
+      output.push(indent + prefix + exportChildren(listItemNode));
+      index++;
+    }
+  }
+  return output.join('\n');
+};
+export const HEADING: ElementTransformer = {
+  dependencies: [HeadingNode],
+  export: (node, exportChildren) => {
+    if (!$isHeadingNode(node)) {
+      return null;
+    }
+    const level = Number(node.getTag().slice(1));
+    return '#'.repeat(level) + ' ' + exportChildren(node);
+  },
+  regExp: HEADING_REGEX,
+  replace: createBlockNode((match) => {
+    const tag = ('h' + match[1].length) as HeadingTagType;
+    return $createInstanceHeadingNode(tag);
+  }),
+  type: 'element',
+};
+export const QUOTE: ElementTransformer = {
+  dependencies: [QuoteNode],
+  export: (node, exportChildren) => {
+    if (!$isQuoteNode(node)) {
+      return null;
+    }
+    const lines = exportChildren(node).split('\n');
+    const output = [];
+    for (const line of lines) {
+      output.push('> ' + line);
+    }
+    return output.join('\n');
+  },
+  regExp: QUOTE_REGEX,
+  replace: (parentNode, children, _match, isImport) => {
+    if (isImport) {
+      const previousNode = parentNode.getPreviousSibling();
+      if ($isQuoteNode(previousNode)) {
+        previousNode.splice(previousNode.getChildrenSize(), 0, [
+          $createLineBreakNode(),
+          ...children,
+        ]);
+        previousNode.select(0, 0);
+        parentNode.remove();
+        return;
+      }
+    }
+    const node = $createInstanceQuoteNode();
+    node.append(...children);
+    parentNode.replace(node);
+    node.select(0, 0);
+  },
+  type: 'element',
+};
+export const CODE: MultilineElementTransformer = {
+  dependencies: [CodeNode],
+  export: (node: LexicalNode) => {
+    if (!$isCodeNode(node)) {
+      return null;
+    }
+    const textContent = node.getTextContent();
+    return (
+      '```' +
+      (node.getLanguage() || '') +
+      (textContent ? '\n' + textContent : '') +
+      '\n' +
+      '```'
+    );
+  },
+  regExpEnd: {
+    optional: true,
+    regExp: CODE_END_REGEX,
+  },
+  regExpStart: CODE_START_REGEX,
+  replace: (
+    rootNode,
+    children,
+    startMatch,
+    endMatch,
+    linesInBetween,
+    isImport,
+  ) => {
+    let codeBlockNode: CodeNode;
+    let code: string;
+    if (!children && linesInBetween) {
+      if (linesInBetween.length === 1) {
+        // Single-line code blocks
+        if (endMatch) {
+          // End match on same line. Example: ```markdown hello```. markdown should not be considered the language here.
+          codeBlockNode = $createInstanceCodeNode();
+          code = startMatch[1] + linesInBetween[0];
+        } else {
+          // No end match. We should assume the language is next to the backticks and that code will be typed on the next line in the future
+          codeBlockNode = $createInstanceCodeNode(startMatch[1]);
+          code = linesInBetween[0].startsWith(' ')
+            ? linesInBetween[0].slice(1)
+            : linesInBetween[0];
+        }
+      } else {
+        // Treat multi-line code blocks as if they always have an end match
+        codeBlockNode = $createInstanceCodeNode(startMatch[1]);
+        if (linesInBetween[0].trim().length === 0) {
+          // Filter out all start and end lines that are length 0 until we find the first line with content
+          while (linesInBetween.length > 0 && !linesInBetween[0].length) {
+            linesInBetween.shift();
+          }
+        } else {
+          // The first line already has content => Remove the first space of the line if it exists
+          linesInBetween[0] = linesInBetween[0].startsWith(' ')
+            ? linesInBetween[0].slice(1)
+            : linesInBetween[0];
+        }
+        // Filter out all end lines that are length 0 until we find the last line with content
+        while (
+          linesInBetween.length > 0 &&
+          !linesInBetween[linesInBetween.length - 1].length
+        ) {
+          linesInBetween.pop();
+        }
+        code = linesInBetween.join('\n');
+      }
+      const textNode = $createTextNode(code);
+      codeBlockNode.append(textNode);
+      rootNode.append(codeBlockNode);
+    } else if (children) {
+      createBlockNode((match) => {
+        return $createInstanceCodeNode(match ? match[1] : undefined);
+      })(rootNode, children, startMatch, isImport);
+    }
+  },
+  type: 'multiline-element',
+};
+export const UNORDERED_LIST: ElementTransformer = {
+  dependencies: [ListNode, ListItemNode],
+  export: (node, exportChildren) => {
+    return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
+  },
+  regExp: UNORDERED_LIST_REGEX,
+  replace: listReplace('bullet'),
+  type: 'element',
+};
+export const CHECK_LIST: ElementTransformer = {
+  dependencies: [ListNode, ListItemNode],
+  export: (node, exportChildren) => {
+    return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
+  },
+  regExp: CHECK_LIST_REGEX,
+  replace: listReplace('check'),
+  type: 'element',
+};
+export const ORDERED_LIST: ElementTransformer = {
+  dependencies: [ListNode, ListItemNode],
+  export: (node, exportChildren) => {
+    return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
+  },
+  regExp: ORDERED_LIST_REGEX,
+  replace: listReplace('number'),
+  type: 'element',
+};
+export const INLINE_CODE: TextFormatTransformer = {
+  format: ['code'],
+  tag: '`',
+  type: 'text-format',
+};
+export const HIGHLIGHT: TextFormatTransformer = {
+  format: ['highlight'],
+  tag: '==',
+  type: 'text-format',
+};
+export const BOLD_ITALIC_STAR: TextFormatTransformer = {
+  format: ['bold', 'italic'],
+  tag: '***',
+  type: 'text-format',
+};
+export const BOLD_ITALIC_UNDERSCORE: TextFormatTransformer = {
+  format: ['bold', 'italic'],
+  intraword: false,
+  tag: '___',
+  type: 'text-format',
+};
+export const BOLD_STAR: TextFormatTransformer = {
+  format: ['bold'],
+  tag: '**',
+  type: 'text-format',
+};
+export const BOLD_UNDERSCORE: TextFormatTransformer = {
+  format: ['bold'],
+  intraword: false,
+  tag: '__',
+  type: 'text-format',
+};
+export const STRIKETHROUGH: TextFormatTransformer = {
+  format: ['strikethrough'],
+  tag: '~~',
+  type: 'text-format',
+};
+export const ITALIC_STAR: TextFormatTransformer = {
+  format: ['italic'],
+  tag: '*',
+  type: 'text-format',
+};
+export const ITALIC_UNDERSCORE: TextFormatTransformer = {
+  format: ['italic'],
+  intraword: false,
+  tag: '_',
+  type: 'text-format',
+};
+// Order of text transformers matters:
+//
+// - code should go first as it prevents any transformations inside
+// - then longer tags match (e.g. ** or __ should go before * or _)
+export const LINK: TextMatchTransformer = {
+  dependencies: [LinkNode],
+  export: (node, exportChildren, exportFormat) => {
+    if (!$isLinkNode(node)) {
+      return null;
+    }
+    const title = node.getTitle();
+    const textContent = exportChildren(node);
+    const linkContent = title
+      ? `[${textContent}](${node.getURL()} "${title}")`
+      : `[${textContent}](${node.getURL()})`;
+    return linkContent;
+  },
+  importRegExp:
+    /(?:\[([^[]+)\])(?:\((?:([^()\s]+)(?:\s"((?:[^"]*\\")*[^"]*)"\s*)?)\))/,
+  regExp:
+    /(?:\[([^[]+)\])(?:\((?:([^()\s]+)(?:\s"((?:[^"]*\\")*[^"]*)"\s*)?)\))$/,
+  replace: (textNode, match) => {
+    const [, linkText, linkUrl, linkTitle] = match;
+    const linkNode = $createLinkNode(linkUrl, {title: linkTitle});
+    const linkTextNode = $createTextNode(linkText);
+    linkTextNode.setFormat(textNode.getFormat());
+    linkNode.append(linkTextNode);
+    textNode.replace(linkNode);
+    return linkTextNode;
+  },
+  trigger: ')',
+  type: 'text-match',
+};
+export function normalizeMarkdown(
+  input: string,
+  shouldMergeAdjacentLines = false,
+): string {
+  const lines = input.split('\n');
+  let inCodeBlock = false;
+  const sanitizedLines: string[] = [];
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    const lastLine = sanitizedLines[sanitizedLines.length - 1];
+    // Code blocks of ```single line``` don't toggle the inCodeBlock flag
+    if (CODE_SINGLE_LINE_REGEX.test(line)) {
+      sanitizedLines.push(line);
+      continue;
+    }
+    // Detect the start or end of a code block
+    if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) {
+      inCodeBlock = !inCodeBlock;
+      sanitizedLines.push(line);
+      continue;
+    }
+    // If we are inside a code block, keep the line unchanged
+    if (inCodeBlock) {
+      sanitizedLines.push(line);
+      continue;
+    }
+    // In markdown the concept of "empty paragraphs" does not exist.
+    // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged.
+    if (
+      line === '' ||
+      lastLine === '' ||
+      !lastLine ||
+      HEADING_REGEX.test(lastLine) ||
+      HEADING_REGEX.test(line) ||
+      QUOTE_REGEX.test(line) ||
+      ORDERED_LIST_REGEX.test(line) ||
+      UNORDERED_LIST_REGEX.test(line) ||
+      CHECK_LIST_REGEX.test(line) ||
+      TABLE_ROW_REG_EXP.test(line) ||
+      TABLE_ROW_DIVIDER_REG_EXP.test(line) ||
+      !shouldMergeAdjacentLines
+    ) {
+      sanitizedLines.push(line);
+    } else {
+      sanitizedLines[sanitizedLines.length - 1] = lastLine + line;
+    }
+  }
+  return sanitizedLines.join('\n');
+}