@arsedizioni/ars-utils 22.0.16 → 22.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,9 @@ import { SelectionModel } from '@angular/cdk/collections';
28
28
  * Supported syntax: headings, paragraphs, hard/soft breaks, hr, blockquotes
29
29
  * (nested), fenced code blocks, inline code, bold/italic/strikethrough, links
30
30
  * (with title), images, autolinked bare URLs, ordered/unordered lists (nested,
31
- * task lists, ordered start offset), GFM tables with alignment.
31
+ * task lists, ordered start offset), GFM tables with alignment, raw HTML
32
+ * blocks (e.g. <table>...</table>) passed through verbatim when escapeHtml
33
+ * is false: their newlines stay plain newlines, never converted to <br>.
32
34
  *
33
35
  * Known simplifications (documented, by design):
34
36
  * - A blank line terminates a list.
@@ -72,6 +74,161 @@ class MarkdownUtils {
72
74
  }
73
75
  // #endregion
74
76
  // #region Block parser (single pass)
77
+ /** Block-level HTML tags that start a raw HTML block (passthrough, no <p>/<br>). */
78
+ static { this.htmlBlockTags = new Set([
79
+ 'address', 'article', 'aside', 'blockquote', 'caption', 'colgroup', 'col',
80
+ 'dd', 'details', 'dialog', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
81
+ 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header',
82
+ 'hr', 'iframe', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'summary',
83
+ 'table', 'tbody', 'td', 'tfoot', 'thead', 'th', 'tr', 'ul',
84
+ ]); }
85
+ /** Void/self-contained tags: a single tag, no closing tag expected. */
86
+ static { this.htmlVoidTags = new Set(['hr', 'col', 'img', 'input']); }
87
+ /**
88
+ * Precompiled: first block-level tag occurrence anywhere in a line.
89
+ * Anchored on the '<' literal, so scanning is cheap; the lookahead
90
+ * prevents partial matches ('<td' must not match inside '<tdx').
91
+ */
92
+ static { this.htmlBlockScanRe = new RegExp(`<(\\/?)(${[...MarkdownUtils.htmlBlockTags].join('|')})(?=[\\s/>])`, 'gi'); }
93
+ /** Result of scanning a line for the start of a raw HTML block. */
94
+ static findHtmlBlockStart(line) {
95
+ // HTML comment candidate (skipping occurrences inside inline code spans)
96
+ let comment = line.indexOf('<!--');
97
+ while (comment !== -1 && this.insideCodeSpan(line, comment)) {
98
+ comment = line.indexOf('<!--', comment + 4);
99
+ }
100
+ // Block tag candidate (same code-span guard)
101
+ const re = this.htmlBlockScanRe;
102
+ re.lastIndex = 0;
103
+ let m;
104
+ let tagHit;
105
+ while ((m = re.exec(line)) !== null) {
106
+ if (!this.insideCodeSpan(line, m.index)) {
107
+ tagHit = { index: m.index, tag: m[2].toLowerCase(), isClose: m[1] === '/' };
108
+ break;
109
+ }
110
+ }
111
+ if (comment !== -1 && (!tagHit || comment < tagHit.index)) {
112
+ return { index: comment, tag: '', isClose: false, isComment: true };
113
+ }
114
+ return tagHit ? { ...tagHit, isComment: false } : undefined;
115
+ }
116
+ /** True when `index` falls inside an inline code span (odd backtick count before it). */
117
+ static insideCodeSpan(line, index) {
118
+ let count = 0;
119
+ for (let i = 0; i < index; i++) {
120
+ if (line.charCodeAt(i) === 96 /* ` */)
121
+ count++;
122
+ }
123
+ return (count & 1) === 1;
124
+ }
125
+ /**
126
+ * indexOf-based depth scanner (no regex, no allocations): walks `text`
127
+ * adjusting `depth` for <tag ...> / </tag> occurrences of the SAME tag.
128
+ * Self-closing <tag ... /> forms do not alter depth.
129
+ * @returns [newDepth, endIndex] where endIndex is the position right after
130
+ * the '>' that balanced the element, or -1 when still open.
131
+ */
132
+ static scanHtmlDepth(text, tag, depth) {
133
+ const lower = text.toLowerCase();
134
+ const open = '<' + tag;
135
+ const close = '</' + tag;
136
+ let i = 0;
137
+ while ((i = lower.indexOf('<', i)) !== -1) {
138
+ if (lower.startsWith(close, i) && this.isTagBoundary(lower, i + close.length)) {
139
+ depth--;
140
+ const gt = lower.indexOf('>', i);
141
+ const end = gt === -1 ? lower.length : gt + 1;
142
+ if (depth <= 0)
143
+ return [0, end];
144
+ i = end;
145
+ }
146
+ else if (lower.startsWith(open, i) && this.isTagBoundary(lower, i + open.length)) {
147
+ const gt = lower.indexOf('>', i);
148
+ const selfClosing = gt !== -1 && lower.charCodeAt(gt - 1) === 47 /* / */;
149
+ if (selfClosing) {
150
+ if (depth === 0)
151
+ return [0, gt + 1]; // standalone self-closed element
152
+ }
153
+ else {
154
+ depth++;
155
+ }
156
+ i = gt === -1 ? lower.length : gt + 1;
157
+ }
158
+ else {
159
+ i++;
160
+ }
161
+ }
162
+ return [depth, -1];
163
+ }
164
+ /** A tag token must be followed by whitespace, '/', '>' or end of line. */
165
+ static isTagBoundary(s, idx) {
166
+ if (idx >= s.length)
167
+ return true; // tag opening continues on the next line
168
+ const c = s.charCodeAt(idx);
169
+ return c === 62 /* > */ || c === 47 /* / */ || c === 32 /* space */ || c === 9 /* tab */;
170
+ }
171
+ /**
172
+ * Re-injects the remainder of a partially consumed line so the main loop
173
+ * processes it as markdown. Returns the index the loop should resume from.
174
+ */
175
+ static pushBack(lines, i, remainder) {
176
+ if (remainder.trim()) {
177
+ lines[i] = remainder;
178
+ return i - 1; // the loop's i++ re-processes the remainder
179
+ }
180
+ return i;
181
+ }
182
+ /**
183
+ * Emits a raw HTML block starting at `hit.index` of lines[i], consuming
184
+ * following lines until the element is balanced. Text after the block on
185
+ * the closing line is pushed back for markdown processing.
186
+ * @returns the index of the last consumed line
187
+ */
188
+ static emitHtmlBlock(lines, i, hit, out) {
189
+ const line = lines[i];
190
+ // HTML comment: raw until '-->'
191
+ if (hit.isComment) {
192
+ let end = line.indexOf('-->', hit.index);
193
+ if (end !== -1) {
194
+ out.push(line.slice(hit.index, end + 3));
195
+ return this.pushBack(lines, i, line.slice(end + 3));
196
+ }
197
+ out.push(line.slice(hit.index));
198
+ while (++i < lines.length) {
199
+ end = lines[i].indexOf('-->');
200
+ if (end !== -1) {
201
+ out.push(lines[i].slice(0, end + 3));
202
+ return this.pushBack(lines, i, lines[i].slice(end + 3));
203
+ }
204
+ out.push(lines[i]);
205
+ }
206
+ return lines.length - 1; // unterminated: consumed to EOF
207
+ }
208
+ // Stray closing tag or void tag: emit just the tag, resume after '>'
209
+ if (hit.isClose || this.htmlVoidTags.has(hit.tag)) {
210
+ const gt = line.indexOf('>', hit.index);
211
+ const end = gt === -1 ? line.length : gt + 1;
212
+ out.push(line.slice(hit.index, end));
213
+ return this.pushBack(lines, i, line.slice(end));
214
+ }
215
+ // Opening block tag: raw until the element is balanced (depth === 0)
216
+ let [depth, split] = this.scanHtmlDepth(line.slice(hit.index), hit.tag, 0);
217
+ if (split !== -1) {
218
+ out.push(line.slice(hit.index, hit.index + split));
219
+ return this.pushBack(lines, i, line.slice(hit.index + split));
220
+ }
221
+ out.push(line.slice(hit.index));
222
+ while (depth > 0 && ++i < lines.length) {
223
+ [depth, split] = this.scanHtmlDepth(lines[i], hit.tag, depth);
224
+ if (split !== -1) {
225
+ out.push(lines[i].slice(0, split));
226
+ return this.pushBack(lines, i, lines[i].slice(split));
227
+ }
228
+ out.push(lines[i]);
229
+ }
230
+ return Math.min(i, lines.length - 1);
231
+ }
75
232
  static parseBlocks(lines, opts) {
76
233
  const out = [];
77
234
  let paragraph = [];
@@ -104,6 +261,22 @@ class MarkdownUtils {
104
261
  flush();
105
262
  continue;
106
263
  }
264
+ // --- Raw HTML block passthrough (only with escaping disabled) ------------
265
+ // A block-level HTML element found ANYWHERE in the line starts a raw
266
+ // block: text before it joins the current paragraph, the element is
267
+ // emitted as-is (plain newlines, no <p>, no <br>) until balanced, and
268
+ // any text after the closing tag resumes markdown processing.
269
+ if (!opts.escapeHtml && line.includes('<')) {
270
+ const hit = this.findHtmlBlockStart(line);
271
+ if (hit) {
272
+ const before = line.slice(0, hit.index).trim();
273
+ if (before)
274
+ paragraph.push(before);
275
+ flush();
276
+ i = this.emitHtmlBlock(lines, i, hit, out);
277
+ continue;
278
+ }
279
+ }
107
280
  // --- ATX heading -------------------------------------------------------
108
281
  const heading = trimmed.match(/^(#{1,6})\s+(.+?)\s*#*\s*$/);
109
282
  if (heading) {