npm - overtype - Versions diffs - 1.2.3 → 1.2.5 - Mend

overtype 1.2.3 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/src/parser.js CHANGED Viewed

@@ -1,6 +1,6 @@
 /**
  * MarkdownParser - Parses markdown into HTML while preserving character alignment
- *
+ *
  * Key principles:
  * - Every character must occupy the exact same position as in the textarea
  * - No font-size changes, no padding/margin on inline elements
@@ -9,14 +9,14 @@
 export class MarkdownParser {
   // Track link index for anchor naming
   static linkIndex = 0;
   /**
    * Reset link index (call before parsing a new document)
    */
   static resetLinkIndex() {
     this.linkIndex = 0;
   }
   /**
    * Escape HTML special characters
    * @param {string} text - Raw text to escape
@@ -134,8 +134,27 @@ export class MarkdownParser {
    * @returns {string} HTML with italic styling
    */
   static parseItalic(html) {
+    // Single asterisk - must not be adjacent to other asterisks
     html = html.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, '<em><span class="syntax-marker">*</span>$1<span class="syntax-marker">*</span></em>');
-    html = html.replace(/(?<!_)_(?!_)(.+?)(?<!_)_(?!_)/g, '<em><span class="syntax-marker">_</span>$1<span class="syntax-marker">_</span></em>');
+    // Single underscore - must be at word boundaries to avoid matching inside words
+    // This prevents matching underscores in the middle of words like "bold_with_underscore"
+    html = html.replace(/(?<=^|\s)_(?!_)(.+?)(?<!_)_(?!_)(?=\s|$)/g, '<em><span class="syntax-marker">_</span>$1<span class="syntax-marker">_</span></em>');
+    return html;
+  }
+  /**
+   * Parse strikethrough text
+   * Supports both single (~) and double (~~) tildes, but rejects 3+ tildes
+   * @param {string} html - HTML with potential strikethrough markdown
+   * @returns {string} HTML with strikethrough styling
+   */
+  static parseStrikethrough(html) {
+    // Double tilde strikethrough: ~~text~~ (but not if part of 3+ tildes)
+    html = html.replace(/(?<!~)~~(?!~)(.+?)(?<!~)~~(?!~)/g, '<del><span class="syntax-marker">~~</span>$1<span class="syntax-marker">~~</span></del>');
+    // Single tilde strikethrough: ~text~ (but not if part of 2+ tildes on either side)
+    html = html.replace(/(?<!~)~(?!~)(.+?)(?<!~)~(?!~)/g, '<del><span class="syntax-marker">~</span>$1<span class="syntax-marker">~</span></del>');
     return html;
   }
@@ -165,7 +184,7 @@ export class MarkdownParser {
     // Trim whitespace and convert to lowercase for protocol check
     const trimmed = url.trim();
     const lower = trimmed.toLowerCase();
     // Allow safe protocols
     const safeProtocols = [
       'http://',
@@ -174,22 +193,22 @@ export class MarkdownParser {
       'ftp://',
       'ftps://'
     ];
     // Check if URL starts with a safe protocol
     const hasSafeProtocol = safeProtocols.some(protocol => lower.startsWith(protocol));
     // Allow relative URLs (starting with / or # or no protocol)
-    const isRelative = trimmed.startsWith('/') ||
-                      trimmed.startsWith('#') ||
+    const isRelative = trimmed.startsWith('/') ||
+                      trimmed.startsWith('#') ||
                       trimmed.startsWith('?') ||
                       trimmed.startsWith('.') ||
                       (!trimmed.includes(':') && !trimmed.includes('//'));
     // If safe protocol or relative URL, return as-is
     if (hasSafeProtocol || isRelative) {
       return url;
     }
     // Block dangerous protocols (javascript:, data:, vbscript:, etc.)
     return '#';
   }
@@ -210,49 +229,158 @@ export class MarkdownParser {
   }
   /**
-   * Parse all inline elements in correct order
-   * @param {string} text - Text with potential inline markdown
-   * @returns {string} HTML with all inline styling
+   * Identify and protect sanctuaries (code and links) before parsing
+   * @param {string} text - Text with potential markdown
+   * @returns {Object} Object with protected text and sanctuary map
    */
-  static parseInlineElements(text) {
-    let html = text;
-    // Order matters: parse code first
-    html = this.parseInlineCode(html);
-    // Use placeholders to protect inline code while preserving formatting spans
-    // We use Unicode Private Use Area (U+E000-U+F8FF) as placeholders because:
-    // 1. These characters are reserved for application-specific use
-    // 2. They'll never appear in user text
-    // 3. They maintain single-character width (important for alignment)
-    // 4. They're invisible if accidentally rendered
+  static identifyAndProtectSanctuaries(text) {
     const sanctuaries = new Map();
+    let sanctuaryCounter = 0;
+    let protectedText = text;
+    // Create a map to track protected regions (URLs should not be processed)
+    const protectedRegions = [];
+    // First, find all links and mark their URL regions as protected
+    const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
+    let linkMatch;
+    while ((linkMatch = linkRegex.exec(text)) !== null) {
+      // Calculate the exact position of the URL part
+      // linkMatch.index is the start of the match
+      // We need to find where "](" starts, then add 2 to get URL start
+      const bracketPos = linkMatch.index + linkMatch[0].indexOf('](');
+      const urlStart = bracketPos + 2;
+      const urlEnd = urlStart + linkMatch[2].length;
+      protectedRegions.push({ start: urlStart, end: urlEnd });
+    }
+    // Now protect inline code, but skip if it's inside a protected region (URL)
+    const codeRegex = /(?<!`)(`+)(?!`)((?:(?!\1).)+?)(\1)(?!`)/g;
+    let codeMatch;
+    const codeMatches = [];
+    while ((codeMatch = codeRegex.exec(text)) !== null) {
+      const codeStart = codeMatch.index;
+      const codeEnd = codeMatch.index + codeMatch[0].length;
+      // Check if this code is inside a protected URL region
+      const inProtectedRegion = protectedRegions.some(region =>
+        codeStart >= region.start && codeEnd <= region.end
+      );
+      if (!inProtectedRegion) {
+        codeMatches.push({
+          match: codeMatch[0],
+          index: codeMatch.index,
+          openTicks: codeMatch[1],
+          content: codeMatch[2],
+          closeTicks: codeMatch[3]
+        });
+      }
+    }
-    // Protect code blocks
-    html = html.replace(/(<code>.*?<\/code>)/g, (match) => {
-      const placeholder = `\uE000${sanctuaries.size}\uE001`;
-      sanctuaries.set(placeholder, match);
+    // Replace code matches from end to start to preserve indices
+    codeMatches.sort((a, b) => b.index - a.index);
+    codeMatches.forEach(codeInfo => {
+      const placeholder = `\uE000${sanctuaryCounter++}\uE001`;
+      sanctuaries.set(placeholder, {
+        type: 'code',
+        original: codeInfo.match,
+        openTicks: codeInfo.openTicks,
+        content: codeInfo.content,
+        closeTicks: codeInfo.closeTicks
+      });
+      protectedText = protectedText.substring(0, codeInfo.index) +
+                     placeholder +
+                     protectedText.substring(codeInfo.index + codeInfo.match.length);
+    });
+    // Then protect links - they can contain sanctuary placeholders for code but not raw code
+    protectedText = protectedText.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, linkText, url) => {
+      const placeholder = `\uE000${sanctuaryCounter++}\uE001`;
+      sanctuaries.set(placeholder, {
+        type: 'link',
+        original: match,
+        linkText,
+        url
+      });
       return placeholder;
     });
-    // Parse links AFTER protecting code but BEFORE bold/italic
-    // This ensures link URLs don't get processed as markdown
-    html = this.parseLinks(html);
+    return { protectedText, sanctuaries };
+  }
+  /**
+   * Restore and transform sanctuaries back to HTML
+   * @param {string} html - HTML with sanctuary placeholders
+   * @param {Map} sanctuaries - Map of sanctuaries to restore
+   * @returns {string} HTML with sanctuaries restored and transformed
+   */
+  static restoreAndTransformSanctuaries(html, sanctuaries) {
+    // Sort sanctuary placeholders by position to restore in order
+    const placeholders = Array.from(sanctuaries.keys()).sort((a, b) => {
+      const indexA = html.indexOf(a);
+      const indexB = html.indexOf(b);
+      return indexA - indexB;
+    });
-    // Protect entire link elements (not just the URL part)
-    html = html.replace(/(<a[^>]*>.*?<\/a>)/g, (match) => {
-      const placeholder = `\uE000${sanctuaries.size}\uE001`;
-      sanctuaries.set(placeholder, match);
-      return placeholder;
+    placeholders.forEach(placeholder => {
+      const sanctuary = sanctuaries.get(placeholder);
+      let replacement;
+      if (sanctuary.type === 'code') {
+        // Transform code sanctuary to HTML
+        replacement = `<code><span class="syntax-marker">${sanctuary.openTicks}</span>${this.escapeHtml(sanctuary.content)}<span class="syntax-marker">${sanctuary.closeTicks}</span></code>`;
+      } else if (sanctuary.type === 'link') {
+        // For links, we need to process the link text for markdown
+        let processedLinkText = sanctuary.linkText;
+        // First restore any sanctuary placeholders that were already in the link text
+        // (e.g., inline code that was protected before the link)
+        sanctuaries.forEach((innerSanctuary, innerPlaceholder) => {
+          if (processedLinkText.includes(innerPlaceholder)) {
+            if (innerSanctuary.type === 'code') {
+              const codeHtml = `<code><span class="syntax-marker">${innerSanctuary.openTicks}</span>${this.escapeHtml(innerSanctuary.content)}<span class="syntax-marker">${innerSanctuary.closeTicks}</span></code>`;
+              processedLinkText = processedLinkText.replace(innerPlaceholder, codeHtml);
+            }
+          }
+        });
+        // Now parse other markdown in the link text (bold, italic, etc)
+        processedLinkText = this.parseStrikethrough(processedLinkText);
+        processedLinkText = this.parseBold(processedLinkText);
+        processedLinkText = this.parseItalic(processedLinkText);
+        // Transform link sanctuary to HTML
+        // URL should NOT be processed for markdown - use it as-is
+        const anchorName = `--link-${this.linkIndex++}`;
+        const safeUrl = this.sanitizeUrl(sanctuary.url);
+        replacement = `<a href="${safeUrl}" style="anchor-name: ${anchorName}"><span class="syntax-marker">[</span>${processedLinkText}<span class="syntax-marker url-part">](${this.escapeHtml(sanctuary.url)})</span></a>`;
+      }
+      html = html.replace(placeholder, replacement);
     });
-    // Process other inline elements on text with placeholders
+    return html;
+  }
+  /**
+   * Parse all inline elements in correct order
+   * @param {string} text - Text with potential inline markdown
+   * @returns {string} HTML with all inline styling
+   */
+  static parseInlineElements(text) {
+    // Step 1: Identify and protect sanctuaries (code and links)
+    const { protectedText, sanctuaries } = this.identifyAndProtectSanctuaries(text);
+    // Step 2: Parse other inline elements on protected text
+    let html = protectedText;
+    html = this.parseStrikethrough(html);
     html = this.parseBold(html);
     html = this.parseItalic(html);
-    // Restore all sanctuaries
-    sanctuaries.forEach((content, placeholder) => {
-      html = html.replace(placeholder, content);
-    });
+    // Step 3: Restore and transform sanctuaries
+    html = this.restoreAndTransformSanctuaries(html, sanctuaries);
     return html;
   }
@@ -264,33 +392,33 @@ export class MarkdownParser {
    */
   static parseLine(line) {
     let html = this.escapeHtml(line);
     // Preserve indentation
     html = this.preserveIndentation(html, line);
     // Check for block elements first
     const horizontalRule = this.parseHorizontalRule(html);
     if (horizontalRule) return horizontalRule;
     const codeBlock = this.parseCodeBlock(html);
     if (codeBlock) return codeBlock;
     // Parse block elements
     html = this.parseHeader(html);
     html = this.parseBlockquote(html);
     html = this.parseBulletList(html);
     html = this.parseNumberedList(html);
     // Parse inline elements
     html = this.parseInlineElements(html);
     // Wrap in div to maintain line structure
     if (html.trim() === '') {
       // Intentionally use &nbsp; for empty lines to maintain vertical spacing
       // This causes a 0->1 character count difference but preserves visual alignment
       return '<div>&nbsp;</div>';
     }
     return `<div>${html}</div>`;
   }
@@ -304,17 +432,17 @@ export class MarkdownParser {
   static parse(text, activeLine = -1, showActiveLineRaw = false) {
     // Reset link counter for each parse
     this.resetLinkIndex();
     const lines = text.split('\n');
     let inCodeBlock = false;
     const parsedLines = lines.map((line, index) => {
       // Show raw markdown on active line if requested
       if (showActiveLineRaw && index === activeLine) {
         const content = this.escapeHtml(line) || '&nbsp;';
         return `<div class="raw-line">${content}</div>`;
       }
       // Check if this line is a code fence
       const codeFenceRegex = /^```[^`]*$/;
       if (codeFenceRegex.test(line)) {
@@ -322,21 +450,21 @@ export class MarkdownParser {
         // Parse fence markers normally to get styled output
         return this.parseLine(line);
       }
       // If we're inside a code block, don't parse as markdown
       if (inCodeBlock) {
         const escaped = this.escapeHtml(line);
         const indented = this.preserveIndentation(escaped, line);
         return `<div>${indented || '&nbsp;'}</div>`;
       }
       // Otherwise, parse the markdown normally
       return this.parseLine(line);
     });
     // Join without newlines to prevent extra spacing
     const html = parsedLines.join('');
     // Apply post-processing for list consolidation
     return this.postProcessHTML(html);
   }
@@ -352,25 +480,25 @@ export class MarkdownParser {
       // In Node.js environment - do manual post-processing
       return this.postProcessHTMLManual(html);
     }
     // Parse HTML string into DOM
     const container = document.createElement('div');
     container.innerHTML = html;
     let currentList = null;
     let listType = null;
     let currentCodeBlock = null;
     let inCodeBlock = false;
     // Process all direct children - need to be careful with live NodeList
     const children = Array.from(container.children);
     for (let i = 0; i < children.length; i++) {
       const child = children[i];
       // Skip if child was already processed/removed
       if (!child.parentNode) continue;
       // Check for code fence start/end
       const codeFence = child.querySelector('.code-fence');
       if (codeFence) {
@@ -379,22 +507,22 @@ export class MarkdownParser {
           if (!inCodeBlock) {
             // Start of code block - keep fence visible, then add pre/code
             inCodeBlock = true;
             // Create the code block that will follow the fence
             currentCodeBlock = document.createElement('pre');
             const codeElement = document.createElement('code');
             currentCodeBlock.appendChild(codeElement);
             currentCodeBlock.className = 'code-block';
             // Extract language if present
             const lang = fenceText.slice(3).trim();
             if (lang) {
               codeElement.className = `language-${lang}`;
             }
             // Insert code block after the fence div (don't remove the fence)
             container.insertBefore(currentCodeBlock, child.nextSibling);
             // Store reference to the code element for adding content
             currentCodeBlock._codeElement = codeElement;
             continue;
@@ -406,7 +534,7 @@ export class MarkdownParser {
           }
         }
       }
       // Check if we're in a code block - any div that's not a code fence
       if (inCodeBlock && currentCodeBlock && child.tagName === 'DIV' && !child.querySelector('.code-fence')) {
         const codeElement = currentCodeBlock._codeElement || currentCodeBlock.querySelector('code');
@@ -422,36 +550,52 @@ export class MarkdownParser {
         child.remove();
         continue;
       }
       // Check if this div contains a list item
       let listItem = null;
       if (child.tagName === 'DIV') {
         // Look for li inside the div
         listItem = child.querySelector('li');
       }
       if (listItem) {
         const isBullet = listItem.classList.contains('bullet-list');
         const isOrdered = listItem.classList.contains('ordered-list');
         if (!isBullet && !isOrdered) {
           currentList = null;
           listType = null;
           continue;
         }
         const newType = isBullet ? 'ul' : 'ol';
         // Start new list or continue current
         if (!currentList || listType !== newType) {
           currentList = document.createElement(newType);
           container.insertBefore(currentList, child);
           listType = newType;
         }
+        // Extract and preserve indentation from the div before moving the list item
+        const indentationNodes = [];
+        for (const node of child.childNodes) {
+          if (node.nodeType === 3 && node.textContent.match(/^\u00A0+$/)) {
+            // This is a text node containing only non-breaking spaces (indentation)
+            indentationNodes.push(node.cloneNode(true));
+          } else if (node === listItem) {
+            break; // Stop when we reach the list item
+          }
+        }
+        // Add indentation to the list item
+        indentationNodes.forEach(node => {
+          listItem.insertBefore(node, listItem.firstChild);
+        });
         // Move the list item to the current list
         currentList.appendChild(listItem);
         // Remove the now-empty div wrapper
         child.remove();
       } else {
@@ -460,7 +604,7 @@ export class MarkdownParser {
         listType = null;
       }
     }
     return container.innerHTML;
   }
@@ -471,25 +615,53 @@ export class MarkdownParser {
    */
   static postProcessHTMLManual(html) {
     let processed = html;
     // Process unordered lists
     processed = processed.replace(/((?:<div>(?:&nbsp;)*<li class="bullet-list">.*?<\/li><\/div>\s*)+)/gs, (match) => {
-      const items = match.match(/<li class="bullet-list">.*?<\/li>/gs) || [];
-      if (items.length > 0) {
+      const divs = match.match(/<div>(?:&nbsp;)*<li class="bullet-list">.*?<\/li><\/div>/gs) || [];
+      if (divs.length > 0) {
+        const items = divs.map(div => {
+          // Extract indentation and list item
+          const indentMatch = div.match(/<div>((?:&nbsp;)*)<li/);
+          const listItemMatch = div.match(/<li class="bullet-list">.*?<\/li>/);
+          if (indentMatch && listItemMatch) {
+            const indentation = indentMatch[1];
+            const listItem = listItemMatch[0];
+            // Insert indentation at the start of the list item content
+            return listItem.replace(/<li class="bullet-list">/, `<li class="bullet-list">${indentation}`);
+          }
+          return listItemMatch ? listItemMatch[0] : '';
+        }).filter(Boolean);
         return '<ul>' + items.join('') + '</ul>';
       }
       return match;
     });
     // Process ordered lists
     processed = processed.replace(/((?:<div>(?:&nbsp;)*<li class="ordered-list">.*?<\/li><\/div>\s*)+)/gs, (match) => {
-      const items = match.match(/<li class="ordered-list">.*?<\/li>/gs) || [];
-      if (items.length > 0) {
+      const divs = match.match(/<div>(?:&nbsp;)*<li class="ordered-list">.*?<\/li><\/div>/gs) || [];
+      if (divs.length > 0) {
+        const items = divs.map(div => {
+          // Extract indentation and list item
+          const indentMatch = div.match(/<div>((?:&nbsp;)*)<li/);
+          const listItemMatch = div.match(/<li class="ordered-list">.*?<\/li>/);
+          if (indentMatch && listItemMatch) {
+            const indentation = indentMatch[1];
+            const listItem = listItemMatch[0];
+            // Insert indentation at the start of the list item content
+            return listItem.replace(/<li class="ordered-list">/, `<li class="ordered-list">${indentation}`);
+          }
+          return listItemMatch ? listItemMatch[0] : '';
+        }).filter(Boolean);
         return '<ol>' + items.join('') + '</ol>';
       }
       return match;
     });
     // Process code blocks - KEEP the fence markers for alignment AND use semantic pre/code
     const codeBlockRegex = /<div><span class="code-fence">(```[^<]*)<\/span><\/div>(.*?)<div><span class="code-fence">(```)<\/span><\/div>/gs;
     processed = processed.replace(codeBlockRegex, (match, openFence, content, closeFence) => {
@@ -501,20 +673,20 @@ export class MarkdownParser {
           .replace(/&nbsp;/g, ' ');
         return text;
       }).join('\n');
       // Extract language from the opening fence
       const lang = openFence.slice(3).trim();
       const langClass = lang ? ` class="language-${lang}"` : '';
       // Keep fence markers visible as separate divs, with pre/code block between them
       let result = `<div><span class="code-fence">${openFence}</span></div>`;
       // Content is already escaped, don't double-escape
       result += `<pre class="code-block"><code${langClass}>${codeContent}</code></pre>`;
       result += `<div><span class="code-fence">${closeFence}</span></div>`;
       return result;
     });
     return processed;
   }
@@ -539,7 +711,7 @@ export class MarkdownParser {
     let currentPos = 0;
     let lineIndex = 0;
     let lineStart = 0;
     for (let i = 0; i < lines.length; i++) {
       const lineLength = lines[i].length;
       if (currentPos + lineLength >= cursorPosition) {
@@ -549,10 +721,10 @@ export class MarkdownParser {
       }
       currentPos += lineLength + 1; // +1 for newline
     }
     const currentLine = lines[lineIndex];
     const lineEnd = lineStart + currentLine.length;
     // Check for checkbox first (most specific)
     const checkboxMatch = currentLine.match(this.LIST_PATTERNS.checkbox);
     if (checkboxMatch) {
@@ -568,7 +740,7 @@ export class MarkdownParser {
         markerEndPos: lineStart + checkboxMatch[1].length + checkboxMatch[2].length + 5 // indent + "- [ ] "
       };
     }
     // Check for bullet list
     const bulletMatch = currentLine.match(this.LIST_PATTERNS.bullet);
     if (bulletMatch) {
@@ -583,7 +755,7 @@ export class MarkdownParser {
         markerEndPos: lineStart + bulletMatch[1].length + bulletMatch[2].length + 1 // indent + marker + space
       };
     }
     // Check for numbered list
     const numberedMatch = currentLine.match(this.LIST_PATTERNS.numbered);
     if (numberedMatch) {
@@ -598,7 +770,7 @@ export class MarkdownParser {
         markerEndPos: lineStart + numberedMatch[1].length + numberedMatch[2].length + 2 // indent + number + ". "
       };
     }
     // Not in a list
     return {
       inList: false,
@@ -639,31 +811,31 @@ export class MarkdownParser {
     const lines = text.split('\n');
     const numbersByIndent = new Map();
     let inList = false;
     const result = lines.map(line => {
       const match = line.match(this.LIST_PATTERNS.numbered);
       if (match) {
         const indent = match[1];
         const indentLevel = indent.length;
         const content = match[3];
         // If we weren't in a list or indent changed, reset lower levels
         if (!inList) {
           numbersByIndent.clear();
         }
         // Get the next number for this indent level
         const currentNumber = (numbersByIndent.get(indentLevel) || 0) + 1;
         numbersByIndent.set(indentLevel, currentNumber);
         // Clear deeper indent levels
         for (const [level] of numbersByIndent) {
           if (level > indentLevel) {
             numbersByIndent.delete(level);
           }
         }
         inList = true;
         return `${indent}${currentNumber}. ${content}`;
       } else {
@@ -676,7 +848,7 @@ export class MarkdownParser {
         return line;
       }
     });
     return result.join('\n');
   }
-}
+}