@herb-tools/formatter 0.7.5 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,510 @@
1
+ import { isNode, isERBNode, getTagName, isAnyOf, isERBControlFlowNode, hasERBOutput } from "@herb-tools/core"
2
+ import { Node, HTMLDoctypeNode, HTMLTextNode, HTMLElementNode, HTMLCommentNode, HTMLOpenTagNode, HTMLCloseTagNode, ERBIfNode, ERBContentNode, WhitespaceNode } from "@herb-tools/core"
3
+
4
+ // --- Types ---
5
+
6
+ /**
7
+ * Analysis result for HTMLElementNode formatting decisions
8
+ */
9
+ export interface ElementFormattingAnalysis {
10
+ openTagInline: boolean
11
+ elementContentInline: boolean
12
+ closeTagInline: boolean
13
+ }
14
+
15
+ /**
16
+ * Content unit represents a piece of content in text flow
17
+ * Can be atomic (inline elements, ERB) or splittable (text)
18
+ */
19
+ export interface ContentUnit {
20
+ content: string
21
+ type: 'text' | 'inline' | 'erb' | 'block'
22
+ isAtomic: boolean
23
+ breaksFlow: boolean
24
+ isHerbDisable?: boolean
25
+ }
26
+
27
+ /**
28
+ * Content unit paired with its source AST node
29
+ */
30
+ export interface ContentUnitWithNode {
31
+ unit: ContentUnit
32
+ node: Node | null
33
+ }
34
+
35
+ // --- Constants ---
36
+
37
+ // TODO: we can probably expand this list with more tags/attributes
38
+ export const FORMATTABLE_ATTRIBUTES: Record<string, string[]> = {
39
+ '*': ['class'],
40
+ 'img': ['srcset', 'sizes']
41
+ }
42
+
43
+ export const INLINE_ELEMENTS = new Set([
44
+ 'a', 'abbr', 'acronym', 'b', 'bdo', 'big', 'br', 'cite', 'code',
45
+ 'dfn', 'em', 'hr', 'i', 'img', 'kbd', 'label', 'map', 'object', 'q',
46
+ 'samp', 'small', 'span', 'strong', 'sub', 'sup',
47
+ 'tt', 'var', 'del', 'ins', 'mark', 's', 'u', 'time', 'wbr'
48
+ ])
49
+
50
+ export const CONTENT_PRESERVING_ELEMENTS = new Set([
51
+ 'script', 'style', 'pre', 'textarea'
52
+ ])
53
+
54
+ export const SPACEABLE_CONTAINERS = new Set([
55
+ 'div', 'section', 'article', 'main', 'header', 'footer', 'aside',
56
+ 'figure', 'details', 'summary', 'dialog', 'fieldset'
57
+ ])
58
+
59
+ export const TIGHT_GROUP_PARENTS = new Set([
60
+ 'ul', 'ol', 'nav', 'select', 'datalist', 'optgroup', 'tr', 'thead',
61
+ 'tbody', 'tfoot'
62
+ ])
63
+
64
+ export const TIGHT_GROUP_CHILDREN = new Set([
65
+ 'li', 'option', 'td', 'th', 'dt', 'dd'
66
+ ])
67
+
68
+ export const SPACING_THRESHOLD = 3
69
+
70
+ /**
71
+ * Token list attributes that contain space-separated values and benefit from
72
+ * spacing around ERB content for readability
73
+ */
74
+ export const TOKEN_LIST_ATTRIBUTES = new Set([
75
+ 'class', 'data-controller', 'data-action'
76
+ ])
77
+
78
+
79
+ // --- Node Utility Functions ---
80
+
81
+ /**
82
+ * Check if a node is pure whitespace (empty text node with only whitespace)
83
+ */
84
+ export function isPureWhitespaceNode(node: Node): boolean {
85
+ return isNode(node, HTMLTextNode) && node.content.trim() === ""
86
+ }
87
+
88
+ /**
89
+ * Check if a node is non-whitespace (has meaningful content)
90
+ */
91
+ export function isNonWhitespaceNode(node: Node): boolean {
92
+ if (isNode(node, WhitespaceNode)) return false
93
+ if (isNode(node, HTMLTextNode)) return node.content.trim() !== ""
94
+
95
+ return true
96
+ }
97
+
98
+ /**
99
+ * Find the previous meaningful (non-whitespace) sibling
100
+ * Returns -1 if no meaningful sibling is found
101
+ */
102
+ export function findPreviousMeaningfulSibling(siblings: Node[], currentIndex: number): number {
103
+ for (let i = currentIndex - 1; i >= 0; i--) {
104
+ if (isNonWhitespaceNode(siblings[i])) {
105
+ return i
106
+ }
107
+ }
108
+
109
+ return -1
110
+ }
111
+
112
+ /**
113
+ * Check if there's whitespace between two indices in children array
114
+ */
115
+ export function hasWhitespaceBetween(children: Node[], startIndex: number, endIndex: number): boolean {
116
+ for (let j = startIndex + 1; j < endIndex; j++) {
117
+ if (isNode(children[j], WhitespaceNode) || isPureWhitespaceNode(children[j])) {
118
+ return true
119
+ }
120
+ }
121
+
122
+ return false
123
+ }
124
+
125
+ /**
126
+ * Filter children to remove insignificant whitespace
127
+ */
128
+ export function filterSignificantChildren(body: Node[]): Node[] {
129
+ return body.filter(child => {
130
+ if (isNode(child, WhitespaceNode)) return false
131
+
132
+ if (isNode(child, HTMLTextNode)) {
133
+ if (child.content === " ") return true
134
+
135
+ return child.content.trim() !== ""
136
+ }
137
+
138
+ return true
139
+ })
140
+ }
141
+
142
+ /**
143
+ * Smart filter that preserves exactly ONE whitespace before herb:disable comments
144
+ */
145
+ export function filterEmptyNodesForHerbDisable(nodes: Node[]): Node[] {
146
+ const result: Node[] = []
147
+ let pendingWhitespace: Node | null = null
148
+
149
+ for (const node of nodes) {
150
+ const isWhitespace = isNode(node, WhitespaceNode) || (isNode(node, HTMLTextNode) && node.content.trim() === "")
151
+ const isHerbDisable = isNode(node, ERBContentNode) && isHerbDisableComment(node)
152
+
153
+ if (isWhitespace) {
154
+ if (!pendingWhitespace) {
155
+ pendingWhitespace = node
156
+ }
157
+ } else {
158
+ if (isHerbDisable && pendingWhitespace) {
159
+ result.push(pendingWhitespace)
160
+ }
161
+
162
+ pendingWhitespace = null
163
+ result.push(node)
164
+ }
165
+ }
166
+
167
+ return result
168
+ }
169
+
170
+ // --- Punctuation and Word Spacing Functions ---
171
+
172
+ /**
173
+ * Check if a word is standalone closing punctuation
174
+ */
175
+ export function isClosingPunctuation(word: string): boolean {
176
+ return /^[.,;:!?)\]]+$/.test(word)
177
+ }
178
+
179
+ /**
180
+ * Check if a line ends with opening punctuation
181
+ */
182
+ export function lineEndsWithOpeningPunctuation(line: string): boolean {
183
+ return /[(\[]$/.test(line)
184
+ }
185
+
186
+ /**
187
+ * Check if a string is an ERB tag
188
+ */
189
+ export function isERBTag(text: string): boolean {
190
+ return /^<%.*?%>$/.test(text.trim())
191
+ }
192
+
193
+ /**
194
+ * Check if a string ends with an ERB tag
195
+ */
196
+ export function endsWithERBTag(text: string): boolean {
197
+ const trimmed = text.trim()
198
+
199
+ return /%>$/.test(trimmed) || /%>\S+$/.test(trimmed)
200
+ }
201
+
202
+ /**
203
+ * Check if a string starts with an ERB tag
204
+ */
205
+ export function startsWithERBTag(text: string): boolean {
206
+ return /^<%/.test(text.trim())
207
+ }
208
+
209
+ /**
210
+ * Determine if space is needed between the current line and the next word
211
+ */
212
+ export function needsSpaceBetween(currentLine: string, word: string): boolean {
213
+ if (isClosingPunctuation(word)) return false
214
+ if (lineEndsWithOpeningPunctuation(currentLine)) return false
215
+ if (currentLine.endsWith(' ')) return false
216
+ if (word.startsWith(' ')) return false
217
+ if (endsWithERBTag(currentLine) && startsWithERBTag(word)) return false
218
+
219
+ return true
220
+ }
221
+
222
+ /**
223
+ * Build a line by adding a word with appropriate spacing
224
+ */
225
+ export function buildLineWithWord(currentLine: string, word: string): string {
226
+ if (!currentLine) return word
227
+
228
+ if (word === ' ') {
229
+ return currentLine.endsWith(' ') ? currentLine : `${currentLine} `
230
+ }
231
+
232
+ if (isClosingPunctuation(word)) {
233
+ currentLine = currentLine.trimEnd()
234
+
235
+ return `${currentLine}${word}`
236
+ }
237
+
238
+ return needsSpaceBetween(currentLine, word) ? `${currentLine} ${word}` : `${currentLine}${word}`
239
+ }
240
+
241
+ /**
242
+ * Check if a node is an inline element or ERB node
243
+ */
244
+ export function isInlineOrERBNode(node: Node): boolean {
245
+ return isERBNode(node) || (isNode(node, HTMLElementNode) && isInlineElement(getTagName(node)))
246
+ }
247
+
248
+ /**
249
+ * Check if an element should be treated as inline based on its tag name
250
+ */
251
+ export function isInlineElement(tagName: string): boolean {
252
+ return INLINE_ELEMENTS.has(tagName.toLowerCase())
253
+ }
254
+
255
+ /**
256
+ * Check if the current inline element is adjacent to a previous inline element (no whitespace between)
257
+ */
258
+ export function isAdjacentToPreviousInline(siblings: Node[], index: number): boolean {
259
+ const previousNode = siblings[index - 1]
260
+
261
+ if (isInlineOrERBNode(previousNode)) {
262
+ return true
263
+ }
264
+
265
+ if (index > 1 && isNode(previousNode, HTMLTextNode) && !/^\s/.test(previousNode.content)) {
266
+ const twoBack = siblings[index - 2]
267
+
268
+ return isInlineOrERBNode(twoBack)
269
+ }
270
+
271
+ return false
272
+ }
273
+
274
+ /**
275
+ * Check if a node should be appended to the last line (for adjacent inline elements and punctuation)
276
+ */
277
+ export function shouldAppendToLastLine(child: Node, siblings: Node[], index: number): boolean {
278
+ if (index === 0) return false
279
+
280
+ if (isNode(child, HTMLTextNode) && !/^\s/.test(child.content)) {
281
+ const previousNode = siblings[index - 1]
282
+
283
+ return isInlineOrERBNode(previousNode)
284
+ }
285
+
286
+ if (isNode(child, HTMLElementNode) && isInlineElement(getTagName(child))) {
287
+ return isAdjacentToPreviousInline(siblings, index)
288
+ }
289
+
290
+ if (isNode(child, ERBContentNode)) {
291
+ for (let i = index - 1; i >= 0; i--) {
292
+ const previousSibling = siblings[i]
293
+
294
+ if (isPureWhitespaceNode(previousSibling) || isNode(previousSibling, WhitespaceNode)) {
295
+ continue
296
+ }
297
+
298
+ if (previousSibling.location && child.location) {
299
+ return previousSibling.location.end.line === child.location.start.line
300
+ }
301
+
302
+ break
303
+ }
304
+ }
305
+
306
+ return false
307
+ }
308
+
309
+ /**
310
+ * Check if user-intentional spacing should be preserved (double newlines between elements)
311
+ */
312
+ export function shouldPreserveUserSpacing(child: Node, siblings: Node[], index: number): boolean {
313
+ if (!isPureWhitespaceNode(child)) return false
314
+
315
+ const hasPreviousNonWhitespace = index > 0 && isNonWhitespaceNode(siblings[index - 1])
316
+ const hasNextNonWhitespace = index < siblings.length - 1 && isNonWhitespaceNode(siblings[index + 1])
317
+ const hasMultipleNewlines = isNode(child, HTMLTextNode) && child.content.includes('\n\n')
318
+
319
+ return hasPreviousNonWhitespace && hasNextNonWhitespace && hasMultipleNewlines
320
+ }
321
+
322
+
323
+ /**
324
+ * Check if children contain any text content with newlines
325
+ */
326
+ export function hasMultilineTextContent(children: Node[]): boolean {
327
+ for (const child of children) {
328
+ if (isNode(child, HTMLTextNode)) {
329
+ return child.content.includes('\n')
330
+ }
331
+
332
+ if (isNode(child, HTMLElementNode) && hasMultilineTextContent(child.body)) {
333
+ return true
334
+ }
335
+ }
336
+
337
+ return false
338
+ }
339
+
340
+ /**
341
+ * Check if all nested elements in the children are inline elements
342
+ */
343
+ export function areAllNestedElementsInline(children: Node[]): boolean {
344
+ for (const child of children) {
345
+ if (isNode(child, HTMLElementNode)) {
346
+ if (!isInlineElement(getTagName(child))) {
347
+ return false
348
+ }
349
+
350
+ if (!areAllNestedElementsInline(child.body)) {
351
+ return false
352
+ }
353
+ } else if (isAnyOf(child, HTMLDoctypeNode, HTMLCommentNode, isERBControlFlowNode)) {
354
+ return false
355
+ }
356
+ }
357
+
358
+ return true
359
+ }
360
+
361
+ /**
362
+ * Check if element has complex ERB control flow
363
+ */
364
+ export function hasComplexERBControlFlow(inlineNodes: Node[]): boolean {
365
+ return inlineNodes.some(node => {
366
+ if (isNode(node, ERBIfNode)) {
367
+ if (node.statements.length > 0 && node.location) {
368
+ const startLine = node.location.start.line
369
+ const endLine = node.location.end.line
370
+
371
+ return startLine !== endLine
372
+ }
373
+
374
+ return false
375
+ }
376
+
377
+ return false
378
+ })
379
+ }
380
+
381
+ /**
382
+ * Check if children contain mixed text and inline elements (like "text<em>inline</em>text")
383
+ * or mixed ERB output and text (like "<%= value %> text")
384
+ * This indicates content that should be formatted inline even with structural newlines
385
+ */
386
+ export function hasMixedTextAndInlineContent(children: Node[]): boolean {
387
+ let hasText = false
388
+ let hasInlineElements = false
389
+
390
+ for (const child of children) {
391
+ if (isNode(child, HTMLTextNode)) {
392
+ if (child.content.trim() !== "") {
393
+ hasText = true
394
+ }
395
+ } else if (isNode(child, HTMLElementNode)) {
396
+ if (isInlineElement(getTagName(child))) {
397
+ hasInlineElements = true
398
+ }
399
+ }
400
+ }
401
+
402
+ return (hasText && hasInlineElements) || (hasERBOutput(children) && hasText)
403
+ }
404
+
405
+ export function isContentPreserving(element: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode): boolean {
406
+ const tagName = getTagName(element)
407
+
408
+ return CONTENT_PRESERVING_ELEMENTS.has(tagName)
409
+ }
410
+
411
+ /**
412
+ * Count consecutive inline elements/ERB at the start of children (with no whitespace between)
413
+ */
414
+ export function countAdjacentInlineElements(children: Node[]): number {
415
+ let count = 0
416
+ let lastSignificantIndex = -1
417
+
418
+ for (let i = 0; i < children.length; i++) {
419
+ const child = children[i]
420
+
421
+ if (isPureWhitespaceNode(child) || isNode(child, WhitespaceNode)) {
422
+ continue
423
+ }
424
+
425
+ const isInlineOrERB = (isNode(child, HTMLElementNode) && isInlineElement(getTagName(child))) || isNode(child, ERBContentNode)
426
+
427
+ if (!isInlineOrERB) {
428
+ break
429
+ }
430
+
431
+ if (lastSignificantIndex >= 0 && hasWhitespaceBetween(children, lastSignificantIndex, i)) {
432
+ break
433
+ }
434
+
435
+ count++
436
+ lastSignificantIndex = i
437
+ }
438
+
439
+ return count
440
+ }
441
+
442
+ /**
443
+ * Check if a node represents a block-level element
444
+ */
445
+ export function isBlockLevelNode(node: Node): boolean {
446
+ if (!isNode(node, HTMLElementNode)) {
447
+ return false
448
+ }
449
+
450
+ const tagName = getTagName(node)
451
+
452
+ if (INLINE_ELEMENTS.has(tagName)) {
453
+ return false
454
+ }
455
+
456
+ return true
457
+ }
458
+
459
+ /**
460
+ * Check if an element is a line-breaking element (br or hr)
461
+ */
462
+ export function isLineBreakingElement(node: Node): boolean {
463
+ if (!isNode(node, HTMLElementNode)) {
464
+ return false
465
+ }
466
+
467
+ const tagName = getTagName(node)
468
+
469
+ return tagName === 'br' || tagName === 'hr'
470
+ }
471
+
472
+ /**
473
+ * Normalize text by replacing multiple spaces with single space and trim
474
+ * Then split into words
475
+ */
476
+ export function normalizeAndSplitWords(text: string): string[] {
477
+ const normalized = text.replace(/\s+/g, ' ')
478
+ return normalized.trim().split(' ')
479
+ }
480
+
481
+ /**
482
+ * Check if text ends with whitespace
483
+ */
484
+ export function endsWithWhitespace(text: string): boolean {
485
+ return /\s$/.test(text)
486
+ }
487
+
488
+ /**
489
+ * Check if an ERB content node is a herb:disable comment
490
+ */
491
+ export function isHerbDisableComment(node: Node): boolean {
492
+ if (!isNode(node, ERBContentNode)) return false
493
+ if (node.tag_opening?.value !== "<%#") return false
494
+
495
+ const content = node?.content?.value || ""
496
+ const trimmed = content.trim()
497
+
498
+ return trimmed.startsWith("herb:disable")
499
+ }
500
+
501
+ /**
502
+ * Check if a text node is YAML frontmatter (starts and ends with ---)
503
+ */
504
+ export function isFrontmatter(node: Node): node is HTMLTextNode {
505
+ if (!isNode(node, HTMLTextNode)) return false
506
+
507
+ const content = node.content.trim()
508
+
509
+ return content.startsWith("---") && /---\s*$/.test(content)
510
+ }