@herb-tools/formatter 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,508 @@
1
+ import { isNode, isERBNode, getTagName, isAnyOf, isERBControlFlowNode, hasERBOutput } from "@herb-tools/core"
2
+ import { Node, HTMLDoctypeNode, HTMLTextNode, HTMLElementNode, HTMLCommentNode, HTMLOpenTagNode, HTMLCloseTagNode, ERBIfNode, ERBContentNode, WhitespaceNode } from "@herb-tools/core"
3
+
4
+ // --- Types ---
5
+
6
+ /**
7
+ * Analysis result for HTMLElementNode formatting decisions
8
+ */
9
+ export interface ElementFormattingAnalysis {
10
+ openTagInline: boolean
11
+ elementContentInline: boolean
12
+ closeTagInline: boolean
13
+ }
14
+
15
+ /**
16
+ * Content unit represents a piece of content in text flow
17
+ * Can be atomic (inline elements, ERB) or splittable (text)
18
+ */
19
+ export interface ContentUnit {
20
+ content: string
21
+ type: 'text' | 'inline' | 'erb' | 'block'
22
+ isAtomic: boolean
23
+ breaksFlow: boolean
24
+ isHerbDisable?: boolean
25
+ }
26
+
27
+ /**
28
+ * Content unit paired with its source AST node
29
+ */
30
+ export interface ContentUnitWithNode {
31
+ unit: ContentUnit
32
+ node: Node | null
33
+ }
34
+
35
+ // --- Constants ---
36
+
37
+ // TODO: we can probably expand this list with more tags/attributes
38
+ export const FORMATTABLE_ATTRIBUTES: Record<string, string[]> = {
39
+ '*': ['class'],
40
+ 'img': ['srcset', 'sizes']
41
+ }
42
+
43
+ export const INLINE_ELEMENTS = new Set([
44
+ 'a', 'abbr', 'acronym', 'b', 'bdo', 'big', 'br', 'cite', 'code',
45
+ 'dfn', 'em', 'hr', 'i', 'img', 'kbd', 'label', 'map', 'object', 'q',
46
+ 'samp', 'small', 'span', 'strong', 'sub', 'sup',
47
+ 'tt', 'var', 'del', 'ins', 'mark', 's', 'u', 'time', 'wbr'
48
+ ])
49
+
50
+ export const CONTENT_PRESERVING_ELEMENTS = new Set([
51
+ 'script', 'style', 'pre', 'textarea'
52
+ ])
53
+
54
+ export const SPACEABLE_CONTAINERS = new Set([
55
+ 'div', 'section', 'article', 'main', 'header', 'footer', 'aside',
56
+ 'figure', 'details', 'summary', 'dialog', 'fieldset'
57
+ ])
58
+
59
+ export const TIGHT_GROUP_PARENTS = new Set([
60
+ 'ul', 'ol', 'nav', 'select', 'datalist', 'optgroup', 'tr', 'thead',
61
+ 'tbody', 'tfoot'
62
+ ])
63
+
64
+ export const TIGHT_GROUP_CHILDREN = new Set([
65
+ 'li', 'option', 'td', 'th', 'dt', 'dd'
66
+ ])
67
+
68
+ export const SPACING_THRESHOLD = 3
69
+
70
+ /**
71
+ * Token list attributes that contain space-separated values and benefit from
72
+ * spacing around ERB content for readability
73
+ */
74
+ export const TOKEN_LIST_ATTRIBUTES = new Set([
75
+ 'class', 'data-controller', 'data-action'
76
+ ])
77
+
78
+
79
+ // --- Node Utility Functions ---
80
+
81
+ /**
82
+ * Check if a node is pure whitespace (empty text node with only whitespace)
83
+ */
84
+ export function isPureWhitespaceNode(node: Node): boolean {
85
+ return isNode(node, HTMLTextNode) && node.content.trim() === ""
86
+ }
87
+
88
+ /**
89
+ * Check if a node is non-whitespace (has meaningful content)
90
+ */
91
+ export function isNonWhitespaceNode(node: Node): boolean {
92
+ if (isNode(node, WhitespaceNode)) return false
93
+ if (isNode(node, HTMLTextNode)) return node.content.trim() !== ""
94
+
95
+ return true
96
+ }
97
+
98
+ /**
99
+ * Find the previous meaningful (non-whitespace) sibling
100
+ * Returns -1 if no meaningful sibling is found
101
+ */
102
+ export function findPreviousMeaningfulSibling(siblings: Node[], currentIndex: number): number {
103
+ for (let i = currentIndex - 1; i >= 0; i--) {
104
+ if (isNonWhitespaceNode(siblings[i])) {
105
+ return i
106
+ }
107
+ }
108
+
109
+ return -1
110
+ }
111
+
112
+ /**
113
+ * Check if there's whitespace between two indices in children array
114
+ */
115
+ export function hasWhitespaceBetween(children: Node[], startIndex: number, endIndex: number): boolean {
116
+ for (let j = startIndex + 1; j < endIndex; j++) {
117
+ if (isNode(children[j], WhitespaceNode) || isPureWhitespaceNode(children[j])) {
118
+ return true
119
+ }
120
+ }
121
+
122
+ return false
123
+ }
124
+
125
+ /**
126
+ * Filter children to remove insignificant whitespace
127
+ */
128
+ export function filterSignificantChildren(body: Node[]): Node[] {
129
+ return body.filter(child => {
130
+ if (isNode(child, WhitespaceNode)) return false
131
+
132
+ if (isNode(child, HTMLTextNode)) {
133
+ if (child.content === " ") return true
134
+
135
+ return child.content.trim() !== ""
136
+ }
137
+
138
+ return true
139
+ })
140
+ }
141
+
142
+ /**
143
+ * Smart filter that preserves exactly ONE whitespace before herb:disable comments
144
+ */
145
+ export function filterEmptyNodesForHerbDisable(nodes: Node[]): Node[] {
146
+ const result: Node[] = []
147
+ let pendingWhitespace: Node | null = null
148
+
149
+ for (const node of nodes) {
150
+ const isWhitespace = isNode(node, WhitespaceNode) || (isNode(node, HTMLTextNode) && node.content.trim() === "")
151
+ const isHerbDisable = isNode(node, ERBContentNode) && isHerbDisableComment(node)
152
+
153
+ if (isWhitespace) {
154
+ if (!pendingWhitespace) {
155
+ pendingWhitespace = node
156
+ }
157
+ } else {
158
+ if (isHerbDisable && pendingWhitespace) {
159
+ result.push(pendingWhitespace)
160
+ }
161
+
162
+ pendingWhitespace = null
163
+ result.push(node)
164
+ }
165
+ }
166
+
167
+ return result
168
+ }
169
+
170
+ // --- Punctuation and Word Spacing Functions ---
171
+
172
+ /**
173
+ * Check if a word is standalone closing punctuation
174
+ */
175
+ export function isClosingPunctuation(word: string): boolean {
176
+ return /^[.,;:!?)\]]+$/.test(word)
177
+ }
178
+
179
+ /**
180
+ * Check if a line ends with opening punctuation
181
+ */
182
+ export function lineEndsWithOpeningPunctuation(line: string): boolean {
183
+ return /[(\[]$/.test(line)
184
+ }
185
+
186
+ /**
187
+ * Check if a string is an ERB tag
188
+ */
189
+ export function isERBTag(text: string): boolean {
190
+ return /^<%.*?%>$/.test(text.trim())
191
+ }
192
+
193
+ /**
194
+ * Check if a string ends with an ERB tag
195
+ */
196
+ export function endsWithERBTag(text: string): boolean {
197
+ return /%>$/.test(text.trim())
198
+ }
199
+
200
+ /**
201
+ * Check if a string starts with an ERB tag
202
+ */
203
+ export function startsWithERBTag(text: string): boolean {
204
+ return /^<%/.test(text.trim())
205
+ }
206
+
207
+ /**
208
+ * Determine if space is needed between the current line and the next word
209
+ */
210
+ export function needsSpaceBetween(currentLine: string, word: string): boolean {
211
+ if (isClosingPunctuation(word)) return false
212
+ if (lineEndsWithOpeningPunctuation(currentLine)) return false
213
+ if (currentLine.endsWith(' ')) return false
214
+ if (word.startsWith(' ')) return false
215
+ if (endsWithERBTag(currentLine) && startsWithERBTag(word)) return false
216
+
217
+ return true
218
+ }
219
+
220
+ /**
221
+ * Build a line by adding a word with appropriate spacing
222
+ */
223
+ export function buildLineWithWord(currentLine: string, word: string): string {
224
+ if (!currentLine) return word
225
+
226
+ if (word === ' ') {
227
+ return currentLine.endsWith(' ') ? currentLine : `${currentLine} `
228
+ }
229
+
230
+ if (isClosingPunctuation(word)) {
231
+ currentLine = currentLine.trimEnd()
232
+
233
+ return `${currentLine}${word}`
234
+ }
235
+
236
+ return needsSpaceBetween(currentLine, word) ? `${currentLine} ${word}` : `${currentLine}${word}`
237
+ }
238
+
239
+ /**
240
+ * Check if a node is an inline element or ERB node
241
+ */
242
+ export function isInlineOrERBNode(node: Node): boolean {
243
+ return isERBNode(node) || (isNode(node, HTMLElementNode) && isInlineElement(getTagName(node)))
244
+ }
245
+
246
+ /**
247
+ * Check if an element should be treated as inline based on its tag name
248
+ */
249
+ export function isInlineElement(tagName: string): boolean {
250
+ return INLINE_ELEMENTS.has(tagName.toLowerCase())
251
+ }
252
+
253
+ /**
254
+ * Check if the current inline element is adjacent to a previous inline element (no whitespace between)
255
+ */
256
+ export function isAdjacentToPreviousInline(siblings: Node[], index: number): boolean {
257
+ const previousNode = siblings[index - 1]
258
+
259
+ if (isInlineOrERBNode(previousNode)) {
260
+ return true
261
+ }
262
+
263
+ if (index > 1 && isNode(previousNode, HTMLTextNode) && !/^\s/.test(previousNode.content)) {
264
+ const twoBack = siblings[index - 2]
265
+
266
+ return isInlineOrERBNode(twoBack)
267
+ }
268
+
269
+ return false
270
+ }
271
+
272
+ /**
273
+ * Check if a node should be appended to the last line (for adjacent inline elements and punctuation)
274
+ */
275
+ export function shouldAppendToLastLine(child: Node, siblings: Node[], index: number): boolean {
276
+ if (index === 0) return false
277
+
278
+ if (isNode(child, HTMLTextNode) && !/^\s/.test(child.content)) {
279
+ const previousNode = siblings[index - 1]
280
+
281
+ return isInlineOrERBNode(previousNode)
282
+ }
283
+
284
+ if (isNode(child, HTMLElementNode) && isInlineElement(getTagName(child))) {
285
+ return isAdjacentToPreviousInline(siblings, index)
286
+ }
287
+
288
+ if (isNode(child, ERBContentNode)) {
289
+ for (let i = index - 1; i >= 0; i--) {
290
+ const previousSibling = siblings[i]
291
+
292
+ if (isPureWhitespaceNode(previousSibling) || isNode(previousSibling, WhitespaceNode)) {
293
+ continue
294
+ }
295
+
296
+ if (previousSibling.location && child.location) {
297
+ return previousSibling.location.end.line === child.location.start.line
298
+ }
299
+
300
+ break
301
+ }
302
+ }
303
+
304
+ return false
305
+ }
306
+
307
+ /**
308
+ * Check if user-intentional spacing should be preserved (double newlines between elements)
309
+ */
310
+ export function shouldPreserveUserSpacing(child: Node, siblings: Node[], index: number): boolean {
311
+ if (!isPureWhitespaceNode(child)) return false
312
+
313
+ const hasPreviousNonWhitespace = index > 0 && isNonWhitespaceNode(siblings[index - 1])
314
+ const hasNextNonWhitespace = index < siblings.length - 1 && isNonWhitespaceNode(siblings[index + 1])
315
+ const hasMultipleNewlines = isNode(child, HTMLTextNode) && child.content.includes('\n\n')
316
+
317
+ return hasPreviousNonWhitespace && hasNextNonWhitespace && hasMultipleNewlines
318
+ }
319
+
320
+
321
+ /**
322
+ * Check if children contain any text content with newlines
323
+ */
324
+ export function hasMultilineTextContent(children: Node[]): boolean {
325
+ for (const child of children) {
326
+ if (isNode(child, HTMLTextNode)) {
327
+ return child.content.includes('\n')
328
+ }
329
+
330
+ if (isNode(child, HTMLElementNode) && hasMultilineTextContent(child.body)) {
331
+ return true
332
+ }
333
+ }
334
+
335
+ return false
336
+ }
337
+
338
+ /**
339
+ * Check if all nested elements in the children are inline elements
340
+ */
341
+ export function areAllNestedElementsInline(children: Node[]): boolean {
342
+ for (const child of children) {
343
+ if (isNode(child, HTMLElementNode)) {
344
+ if (!isInlineElement(getTagName(child))) {
345
+ return false
346
+ }
347
+
348
+ if (!areAllNestedElementsInline(child.body)) {
349
+ return false
350
+ }
351
+ } else if (isAnyOf(child, HTMLDoctypeNode, HTMLCommentNode, isERBControlFlowNode)) {
352
+ return false
353
+ }
354
+ }
355
+
356
+ return true
357
+ }
358
+
359
+ /**
360
+ * Check if element has complex ERB control flow
361
+ */
362
+ export function hasComplexERBControlFlow(inlineNodes: Node[]): boolean {
363
+ return inlineNodes.some(node => {
364
+ if (isNode(node, ERBIfNode)) {
365
+ if (node.statements.length > 0 && node.location) {
366
+ const startLine = node.location.start.line
367
+ const endLine = node.location.end.line
368
+
369
+ return startLine !== endLine
370
+ }
371
+
372
+ return false
373
+ }
374
+
375
+ return false
376
+ })
377
+ }
378
+
379
+ /**
380
+ * Check if children contain mixed text and inline elements (like "text<em>inline</em>text")
381
+ * or mixed ERB output and text (like "<%= value %> text")
382
+ * This indicates content that should be formatted inline even with structural newlines
383
+ */
384
+ export function hasMixedTextAndInlineContent(children: Node[]): boolean {
385
+ let hasText = false
386
+ let hasInlineElements = false
387
+
388
+ for (const child of children) {
389
+ if (isNode(child, HTMLTextNode)) {
390
+ if (child.content.trim() !== "") {
391
+ hasText = true
392
+ }
393
+ } else if (isNode(child, HTMLElementNode)) {
394
+ if (isInlineElement(getTagName(child))) {
395
+ hasInlineElements = true
396
+ }
397
+ }
398
+ }
399
+
400
+ return (hasText && hasInlineElements) || (hasERBOutput(children) && hasText)
401
+ }
402
+
403
+ export function isContentPreserving(element: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode): boolean {
404
+ const tagName = getTagName(element)
405
+
406
+ return CONTENT_PRESERVING_ELEMENTS.has(tagName)
407
+ }
408
+
409
+ /**
410
+ * Count consecutive inline elements/ERB at the start of children (with no whitespace between)
411
+ */
412
+ export function countAdjacentInlineElements(children: Node[]): number {
413
+ let count = 0
414
+ let lastSignificantIndex = -1
415
+
416
+ for (let i = 0; i < children.length; i++) {
417
+ const child = children[i]
418
+
419
+ if (isPureWhitespaceNode(child) || isNode(child, WhitespaceNode)) {
420
+ continue
421
+ }
422
+
423
+ const isInlineOrERB = (isNode(child, HTMLElementNode) && isInlineElement(getTagName(child))) || isNode(child, ERBContentNode)
424
+
425
+ if (!isInlineOrERB) {
426
+ break
427
+ }
428
+
429
+ if (lastSignificantIndex >= 0 && hasWhitespaceBetween(children, lastSignificantIndex, i)) {
430
+ break
431
+ }
432
+
433
+ count++
434
+ lastSignificantIndex = i
435
+ }
436
+
437
+ return count
438
+ }
439
+
440
+ /**
441
+ * Check if a node represents a block-level element
442
+ */
443
+ export function isBlockLevelNode(node: Node): boolean {
444
+ if (!isNode(node, HTMLElementNode)) {
445
+ return false
446
+ }
447
+
448
+ const tagName = getTagName(node)
449
+
450
+ if (INLINE_ELEMENTS.has(tagName)) {
451
+ return false
452
+ }
453
+
454
+ return true
455
+ }
456
+
457
+ /**
458
+ * Check if an element is a line-breaking element (br or hr)
459
+ */
460
+ export function isLineBreakingElement(node: Node): boolean {
461
+ if (!isNode(node, HTMLElementNode)) {
462
+ return false
463
+ }
464
+
465
+ const tagName = getTagName(node)
466
+
467
+ return tagName === 'br' || tagName === 'hr'
468
+ }
469
+
470
+ /**
471
+ * Normalize text by replacing multiple spaces with single space and trim
472
+ * Then split into words
473
+ */
474
+ export function normalizeAndSplitWords(text: string): string[] {
475
+ const normalized = text.replace(/\s+/g, ' ')
476
+ return normalized.trim().split(' ')
477
+ }
478
+
479
+ /**
480
+ * Check if text ends with whitespace
481
+ */
482
+ export function endsWithWhitespace(text: string): boolean {
483
+ return /\s$/.test(text)
484
+ }
485
+
486
+ /**
487
+ * Check if an ERB content node is a herb:disable comment
488
+ */
489
+ export function isHerbDisableComment(node: Node): boolean {
490
+ if (!isNode(node, ERBContentNode)) return false
491
+ if (node.tag_opening?.value !== "<%#") return false
492
+
493
+ const content = node?.content?.value || ""
494
+ const trimmed = content.trim()
495
+
496
+ return trimmed.startsWith("herb:disable")
497
+ }
498
+
499
+ /**
500
+ * Check if a text node is YAML frontmatter (starts and ends with ---)
501
+ */
502
+ export function isFrontmatter(node: Node): node is HTMLTextNode {
503
+ if (!isNode(node, HTMLTextNode)) return false
504
+
505
+ const content = node.content.trim()
506
+
507
+ return content.startsWith("---") && /---\s*$/.test(content)
508
+ }