overtype 1.2.4 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/parser.js CHANGED
@@ -134,8 +134,13 @@ export class MarkdownParser {
134
134
  * @returns {string} HTML with italic styling
135
135
  */
136
136
  static parseItalic(html) {
137
+ // Single asterisk - must not be adjacent to other asterisks
137
138
  html = html.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, '<em><span class="syntax-marker">*</span>$1<span class="syntax-marker">*</span></em>');
138
- html = html.replace(/(?<!_)_(?!_)(.+?)(?<!_)_(?!_)/g, '<em><span class="syntax-marker">_</span>$1<span class="syntax-marker">_</span></em>');
139
+
140
+ // Single underscore - must be at word boundaries to avoid matching inside words
141
+ // This prevents matching underscores in the middle of words like "bold_with_underscore"
142
+ html = html.replace(/(?<=^|\s)_(?!_)(.+?)(?<!_)_(?!_)(?=\s|$)/g, '<em><span class="syntax-marker">_</span>$1<span class="syntax-marker">_</span></em>');
143
+
139
144
  return html;
140
145
  }
141
146
 
@@ -224,50 +229,158 @@ export class MarkdownParser {
224
229
  }
225
230
 
226
231
  /**
227
- * Parse all inline elements in correct order
228
- * @param {string} text - Text with potential inline markdown
229
- * @returns {string} HTML with all inline styling
232
+ * Identify and protect sanctuaries (code and links) before parsing
233
+ * @param {string} text - Text with potential markdown
234
+ * @returns {Object} Object with protected text and sanctuary map
230
235
  */
231
- static parseInlineElements(text) {
232
- let html = text;
233
- // Order matters: parse code first
234
- html = this.parseInlineCode(html);
235
-
236
- // Use placeholders to protect inline code while preserving formatting spans
237
- // We use Unicode Private Use Area (U+E000-U+F8FF) as placeholders because:
238
- // 1. These characters are reserved for application-specific use
239
- // 2. They'll never appear in user text
240
- // 3. They maintain single-character width (important for alignment)
241
- // 4. They're invisible if accidentally rendered
236
+ static identifyAndProtectSanctuaries(text) {
242
237
  const sanctuaries = new Map();
238
+ let sanctuaryCounter = 0;
239
+ let protectedText = text;
240
+
241
+ // Create a map to track protected regions (URLs should not be processed)
242
+ const protectedRegions = [];
243
+
244
+ // First, find all links and mark their URL regions as protected
245
+ const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
246
+ let linkMatch;
247
+ while ((linkMatch = linkRegex.exec(text)) !== null) {
248
+ // Calculate the exact position of the URL part
249
+ // linkMatch.index is the start of the match
250
+ // We need to find where "](" starts, then add 2 to get URL start
251
+ const bracketPos = linkMatch.index + linkMatch[0].indexOf('](');
252
+ const urlStart = bracketPos + 2;
253
+ const urlEnd = urlStart + linkMatch[2].length;
254
+ protectedRegions.push({ start: urlStart, end: urlEnd });
255
+ }
256
+
257
+ // Now protect inline code, but skip if it's inside a protected region (URL)
258
+ const codeRegex = /(?<!`)(`+)(?!`)((?:(?!\1).)+?)(\1)(?!`)/g;
259
+ let codeMatch;
260
+ const codeMatches = [];
243
261
 
244
- // Protect code blocks
245
- html = html.replace(/(<code>.*?<\/code>)/g, (match) => {
246
- const placeholder = `\uE000${sanctuaries.size}\uE001`;
247
- sanctuaries.set(placeholder, match);
262
+ while ((codeMatch = codeRegex.exec(text)) !== null) {
263
+ const codeStart = codeMatch.index;
264
+ const codeEnd = codeMatch.index + codeMatch[0].length;
265
+
266
+ // Check if this code is inside a protected URL region
267
+ const inProtectedRegion = protectedRegions.some(region =>
268
+ codeStart >= region.start && codeEnd <= region.end
269
+ );
270
+
271
+ if (!inProtectedRegion) {
272
+ codeMatches.push({
273
+ match: codeMatch[0],
274
+ index: codeMatch.index,
275
+ openTicks: codeMatch[1],
276
+ content: codeMatch[2],
277
+ closeTicks: codeMatch[3]
278
+ });
279
+ }
280
+ }
281
+
282
+ // Replace code matches from end to start to preserve indices
283
+ codeMatches.sort((a, b) => b.index - a.index);
284
+ codeMatches.forEach(codeInfo => {
285
+ const placeholder = `\uE000${sanctuaryCounter++}\uE001`;
286
+ sanctuaries.set(placeholder, {
287
+ type: 'code',
288
+ original: codeInfo.match,
289
+ openTicks: codeInfo.openTicks,
290
+ content: codeInfo.content,
291
+ closeTicks: codeInfo.closeTicks
292
+ });
293
+ protectedText = protectedText.substring(0, codeInfo.index) +
294
+ placeholder +
295
+ protectedText.substring(codeInfo.index + codeInfo.match.length);
296
+ });
297
+
298
+ // Then protect links - they can contain sanctuary placeholders for code but not raw code
299
+ protectedText = protectedText.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, linkText, url) => {
300
+ const placeholder = `\uE000${sanctuaryCounter++}\uE001`;
301
+ sanctuaries.set(placeholder, {
302
+ type: 'link',
303
+ original: match,
304
+ linkText,
305
+ url
306
+ });
248
307
  return placeholder;
249
308
  });
250
309
 
251
- // Parse links AFTER protecting code but BEFORE bold/italic
252
- // This ensures link URLs don't get processed as markdown
253
- html = this.parseLinks(html);
310
+ return { protectedText, sanctuaries };
311
+ }
312
+
313
+ /**
314
+ * Restore and transform sanctuaries back to HTML
315
+ * @param {string} html - HTML with sanctuary placeholders
316
+ * @param {Map} sanctuaries - Map of sanctuaries to restore
317
+ * @returns {string} HTML with sanctuaries restored and transformed
318
+ */
319
+ static restoreAndTransformSanctuaries(html, sanctuaries) {
320
+ // Sort sanctuary placeholders by position to restore in order
321
+ const placeholders = Array.from(sanctuaries.keys()).sort((a, b) => {
322
+ const indexA = html.indexOf(a);
323
+ const indexB = html.indexOf(b);
324
+ return indexA - indexB;
325
+ });
254
326
 
255
- // Protect entire link elements (not just the URL part)
256
- html = html.replace(/(<a[^>]*>.*?<\/a>)/g, (match) => {
257
- const placeholder = `\uE000${sanctuaries.size}\uE001`;
258
- sanctuaries.set(placeholder, match);
259
- return placeholder;
327
+ placeholders.forEach(placeholder => {
328
+ const sanctuary = sanctuaries.get(placeholder);
329
+ let replacement;
330
+
331
+ if (sanctuary.type === 'code') {
332
+ // Transform code sanctuary to HTML
333
+ replacement = `<code><span class="syntax-marker">${sanctuary.openTicks}</span>${this.escapeHtml(sanctuary.content)}<span class="syntax-marker">${sanctuary.closeTicks}</span></code>`;
334
+ } else if (sanctuary.type === 'link') {
335
+ // For links, we need to process the link text for markdown
336
+ let processedLinkText = sanctuary.linkText;
337
+
338
+ // First restore any sanctuary placeholders that were already in the link text
339
+ // (e.g., inline code that was protected before the link)
340
+ sanctuaries.forEach((innerSanctuary, innerPlaceholder) => {
341
+ if (processedLinkText.includes(innerPlaceholder)) {
342
+ if (innerSanctuary.type === 'code') {
343
+ const codeHtml = `<code><span class="syntax-marker">${innerSanctuary.openTicks}</span>${this.escapeHtml(innerSanctuary.content)}<span class="syntax-marker">${innerSanctuary.closeTicks}</span></code>`;
344
+ processedLinkText = processedLinkText.replace(innerPlaceholder, codeHtml);
345
+ }
346
+ }
347
+ });
348
+
349
+ // Now parse other markdown in the link text (bold, italic, etc)
350
+ processedLinkText = this.parseStrikethrough(processedLinkText);
351
+ processedLinkText = this.parseBold(processedLinkText);
352
+ processedLinkText = this.parseItalic(processedLinkText);
353
+
354
+ // Transform link sanctuary to HTML
355
+ // URL should NOT be processed for markdown - use it as-is
356
+ const anchorName = `--link-${this.linkIndex++}`;
357
+ const safeUrl = this.sanitizeUrl(sanctuary.url);
358
+ replacement = `<a href="${safeUrl}" style="anchor-name: ${anchorName}"><span class="syntax-marker">[</span>${processedLinkText}<span class="syntax-marker url-part">](${this.escapeHtml(sanctuary.url)})</span></a>`;
359
+ }
360
+
361
+ html = html.replace(placeholder, replacement);
260
362
  });
261
363
 
262
- // Process other inline elements on text with placeholders
364
+ return html;
365
+ }
366
+
367
+ /**
368
+ * Parse all inline elements in correct order
369
+ * @param {string} text - Text with potential inline markdown
370
+ * @returns {string} HTML with all inline styling
371
+ */
372
+ static parseInlineElements(text) {
373
+ // Step 1: Identify and protect sanctuaries (code and links)
374
+ const { protectedText, sanctuaries } = this.identifyAndProtectSanctuaries(text);
375
+
376
+ // Step 2: Parse other inline elements on protected text
377
+ let html = protectedText;
263
378
  html = this.parseStrikethrough(html);
264
379
  html = this.parseBold(html);
265
380
  html = this.parseItalic(html);
266
381
 
267
- // Restore all sanctuaries
268
- sanctuaries.forEach((content, placeholder) => {
269
- html = html.replace(placeholder, content);
270
- });
382
+ // Step 3: Restore and transform sanctuaries
383
+ html = this.restoreAndTransformSanctuaries(html, sanctuaries);
271
384
 
272
385
  return html;
273
386
  }