@herb-tools/formatter 0.8.10 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,311 @@
1
+ import { isNode, getTagName, isPureWhitespaceNode } from "@herb-tools/core"
2
+ import { Node, HTMLTextNode, HTMLElementNode, ERBContentNode, WhitespaceNode } from "@herb-tools/core"
3
+
4
+ import type { ContentUnitWithNode } from "./format-helpers.js"
5
+
6
+ import {
7
+ ASCII_WHITESPACE,
8
+ buildLineWithWord,
9
+ countAdjacentInlineElements,
10
+ isClosingPunctuation,
11
+ isInlineElement,
12
+ isLineBreakingElement,
13
+ needsSpaceBetween,
14
+ } from "./format-helpers.js"
15
+
16
+ import {
17
+ collectTextFlowRun as collectTextFlowRunHelper,
18
+ isInTextFlowContext as isInTextFlowContextHelper,
19
+ isTextFlowNode as isTextFlowNodeHelper,
20
+ tryMergePunctuationText as tryMergePunctuationTextHelper,
21
+ } from "./text-flow-helpers.js"
22
+
23
+ import { TextFlowAnalyzer } from "./text-flow-analyzer.js"
24
+ import type { TextFlowAnalyzerDelegate } from "./text-flow-analyzer.js"
25
+
26
+ /**
27
+ * Interface that the FormatPrinter implements to provide
28
+ * rendering capabilities to the TextFlowEngine.
29
+ */
30
+ export interface TextFlowDelegate extends TextFlowAnalyzerDelegate {
31
+ readonly indent: string
32
+ readonly maxLineLength: number
33
+
34
+ push(line: string): void
35
+ pushWithIndent(line: string): void
36
+ renderInlineElementAsString(element: HTMLElementNode): string
37
+ visit(node: Node): void
38
+ }
39
+
40
+ /**
41
+ * TextFlowEngine handles the formatting of mixed text + inline elements + ERB content.
42
+ *
43
+ * It orchestrates analysis (via TextFlowAnalyzer) and rendering phases:
44
+ * groups adjacent inline elements, and wraps words to fit within line length constraints.
45
+ */
46
+ export class TextFlowEngine {
47
+ private analyzer: TextFlowAnalyzer
48
+
49
+ constructor(private delegate: TextFlowDelegate) {
50
+ this.analyzer = new TextFlowAnalyzer(delegate)
51
+ }
52
+
53
+ visitTextFlowChildren(children: Node[]): void {
54
+ const adjacentInlineCount = countAdjacentInlineElements(children)
55
+
56
+ if (adjacentInlineCount >= 2) {
57
+ const { processedIndices } = this.renderAdjacentInlineElements(children, adjacentInlineCount)
58
+ this.visitRemainingChildrenAsTextFlow(children, processedIndices)
59
+
60
+ return
61
+ }
62
+
63
+ this.buildAndWrapTextFlow(children)
64
+ }
65
+
66
+ isInTextFlowContext(children: Node[]): boolean {
67
+ return isInTextFlowContextHelper(children)
68
+ }
69
+
70
+ collectTextFlowRun(body: Node[], startIndex: number): { nodes: Node[], endIndex: number } | null {
71
+ return collectTextFlowRunHelper(body, startIndex)
72
+ }
73
+
74
+ isTextFlowNode(node: Node): boolean {
75
+ return isTextFlowNodeHelper(node)
76
+ }
77
+
78
+ private renderAdjacentInlineElements(children: Node[], count: number, startIndex = 0, alreadyProcessed?: Set<number>): { processedIndices: Set<number>; lastIndex: number } {
79
+ let inlineContent = ""
80
+ let processedCount = 0
81
+ let lastProcessedIndex = -1
82
+ const processedIndices = new Set<number>()
83
+
84
+ for (let index = startIndex; index < children.length && processedCount < count; index++) {
85
+ const child = children[index]
86
+
87
+ if (isPureWhitespaceNode(child) || isNode(child, WhitespaceNode)) {
88
+ continue
89
+ }
90
+
91
+ if (alreadyProcessed?.has(index)) {
92
+ continue
93
+ }
94
+
95
+ if (isNode(child, HTMLElementNode) && isInlineElement(getTagName(child))) {
96
+ inlineContent += this.delegate.renderInlineElementAsString(child)
97
+ processedCount++
98
+ lastProcessedIndex = index
99
+ processedIndices.add(index)
100
+
101
+ if (inlineContent && isLineBreakingElement(child)) {
102
+ this.delegate.pushWithIndent(inlineContent)
103
+ inlineContent = ""
104
+ }
105
+ } else if (isNode(child, ERBContentNode)) {
106
+ inlineContent += this.delegate.renderERBAsString(child)
107
+ processedCount++
108
+ lastProcessedIndex = index
109
+ processedIndices.add(index)
110
+ }
111
+ }
112
+
113
+ if (inlineContent && lastProcessedIndex >= 0) {
114
+ for (let index = lastProcessedIndex + 1; index < children.length; index++) {
115
+ const child = children[index]
116
+
117
+ if (isPureWhitespaceNode(child) || isNode(child, WhitespaceNode)) {
118
+ continue
119
+ }
120
+
121
+ if (alreadyProcessed?.has(index)) {
122
+ break
123
+ }
124
+
125
+ if (isNode(child, ERBContentNode)) {
126
+ inlineContent += this.delegate.renderERBAsString(child)
127
+ processedIndices.add(index)
128
+ lastProcessedIndex = index
129
+ continue
130
+ }
131
+
132
+ if (isNode(child, HTMLTextNode)) {
133
+ const trimmed = child.content.trim()
134
+
135
+ if (trimmed && /^[.!?:;%]/.test(trimmed)) {
136
+ const wrapWidth = this.delegate.maxLineLength - this.delegate.indent.length
137
+ const result = tryMergePunctuationTextHelper(inlineContent, trimmed, wrapWidth, this.delegate.indent)
138
+
139
+ inlineContent = result.mergedContent
140
+ processedIndices.add(index)
141
+ lastProcessedIndex = index
142
+
143
+ if (result.shouldStop) {
144
+ if (inlineContent) {
145
+ this.delegate.pushWithIndent(inlineContent)
146
+ }
147
+
148
+ result.wrappedLines.forEach(line => this.delegate.push(line))
149
+
150
+ return { processedIndices, lastIndex: lastProcessedIndex }
151
+ }
152
+ }
153
+ }
154
+
155
+ break
156
+ }
157
+ }
158
+
159
+ if (inlineContent) {
160
+ this.delegate.pushWithIndent(inlineContent)
161
+ }
162
+
163
+ return {
164
+ processedIndices,
165
+ lastIndex: lastProcessedIndex >= 0 ? lastProcessedIndex : startIndex + count - 1
166
+ }
167
+ }
168
+
169
+ private visitRemainingChildrenAsTextFlow(children: Node[], processedIndices: Set<number>): void {
170
+ let index = 0
171
+ let textFlowBuffer: Node[] = []
172
+
173
+ const flushTextFlow = () => {
174
+ if (textFlowBuffer.length > 0) {
175
+ this.buildAndWrapTextFlow(textFlowBuffer)
176
+ textFlowBuffer = []
177
+ }
178
+ }
179
+
180
+ while (index < children.length) {
181
+ const child = children[index]
182
+
183
+ if (processedIndices.has(index)) {
184
+ index++
185
+ continue
186
+ }
187
+
188
+ if (isPureWhitespaceNode(child) || isNode(child, WhitespaceNode)) {
189
+ textFlowBuffer.push(child)
190
+ index++
191
+ continue
192
+ }
193
+
194
+ const adjacentCount = countAdjacentInlineElements(children, index, processedIndices)
195
+
196
+ if (adjacentCount >= 2) {
197
+ flushTextFlow()
198
+
199
+ const { processedIndices: newProcessedIndices, lastIndex } =
200
+ this.renderAdjacentInlineElements(children, adjacentCount, index, processedIndices)
201
+
202
+ newProcessedIndices.forEach(i => processedIndices.add(i))
203
+ index = lastIndex + 1
204
+ } else {
205
+ textFlowBuffer.push(child)
206
+ index++
207
+ }
208
+ }
209
+
210
+ flushTextFlow()
211
+ }
212
+
213
+ private buildAndWrapTextFlow(children: Node[]): void {
214
+ const unitsWithNodes: ContentUnitWithNode[] = this.analyzer.buildContentUnits(children)
215
+ const words: Array<{ word: string, isHerbDisable: boolean }> = []
216
+
217
+ for (const { unit, node } of unitsWithNodes) {
218
+ if (unit.breaksFlow) {
219
+ this.flushWords(words)
220
+
221
+ if (node) {
222
+ this.delegate.visit(node)
223
+ }
224
+ } else if (unit.isAtomic) {
225
+ words.push({ word: unit.content, isHerbDisable: unit.isHerbDisable || false })
226
+ } else {
227
+ const text = unit.content.replace(ASCII_WHITESPACE, ' ')
228
+ const hasLeadingSpace = text.startsWith(' ')
229
+ const hasTrailingSpace = text.endsWith(' ')
230
+ const trimmedText = text.trim()
231
+
232
+ if (trimmedText) {
233
+ if (hasLeadingSpace && words.length > 0) {
234
+ const lastWord = words[words.length - 1]
235
+
236
+ if (!lastWord.word.endsWith(' ')) {
237
+ lastWord.word += ' '
238
+ }
239
+ }
240
+
241
+ const textWords = trimmedText.split(' ').map(w => ({ word: w, isHerbDisable: false }))
242
+ words.push(...textWords)
243
+
244
+ if (hasTrailingSpace && words.length > 0) {
245
+ const lastWord = words[words.length - 1]
246
+
247
+ if (!isClosingPunctuation(lastWord.word)) {
248
+ lastWord.word += ' '
249
+ }
250
+ }
251
+ } else if (text === ' ' && words.length > 0) {
252
+ const lastWord = words[words.length - 1]
253
+
254
+ if (!lastWord.word.endsWith(' ')) {
255
+ lastWord.word += ' '
256
+ }
257
+ }
258
+ }
259
+ }
260
+
261
+ // Trim trailing space from last word before final flush - trailing spaces are
262
+ // informational for spacing with subsequent words but shouldn't inflate
263
+ // effective length when it's the final word (it gets trimmed from output anyway)
264
+ if (words.length > 0) {
265
+ words[words.length - 1].word = words[words.length - 1].word.trimEnd()
266
+ }
267
+
268
+ this.flushWords(words)
269
+ }
270
+
271
+ private flushWords(words: Array<{ word: string, isHerbDisable: boolean }>): void {
272
+ if (words.length > 0) {
273
+ this.wrapAndPushWords(words)
274
+ words.length = 0
275
+ }
276
+ }
277
+
278
+ private wrapAndPushWords(words: Array<{ word: string, isHerbDisable: boolean }>): void {
279
+ const wrapWidth = this.delegate.maxLineLength - this.delegate.indent.length
280
+ const lines: string[] = []
281
+ let currentLine = ""
282
+ let effectiveLength = 0
283
+
284
+ for (const { word, isHerbDisable } of words) {
285
+ const nextLine = buildLineWithWord(currentLine, word)
286
+
287
+ let nextEffectiveLength = effectiveLength
288
+
289
+ if (!isHerbDisable) {
290
+ const spaceBefore = currentLine && needsSpaceBetween(currentLine, word) ? 1 : 0
291
+ nextEffectiveLength = effectiveLength + spaceBefore + word.length
292
+ }
293
+
294
+ if (currentLine && !isClosingPunctuation(word) && nextEffectiveLength > wrapWidth) {
295
+ lines.push(this.delegate.indent + currentLine.trim())
296
+
297
+ currentLine = word
298
+ effectiveLength = isHerbDisable ? 0 : word.length
299
+ } else {
300
+ currentLine = nextLine
301
+ effectiveLength = nextEffectiveLength
302
+ }
303
+ }
304
+
305
+ if (currentLine) {
306
+ lines.push(this.delegate.indent + currentLine.trim())
307
+ }
308
+
309
+ lines.forEach(line => this.delegate.push(line))
310
+ }
311
+ }
@@ -0,0 +1,319 @@
1
+ import { isNode, getTagName } from "@herb-tools/core"
2
+ import { Node, HTMLTextNode, HTMLElementNode, ERBContentNode, WhitespaceNode } from "@herb-tools/core"
3
+
4
+ import type { ContentUnitWithNode } from "./format-helpers.js"
5
+
6
+ import {
7
+ endsWithWhitespace,
8
+ hasWhitespaceBetween,
9
+ isInlineElement,
10
+ normalizeAndSplitWords,
11
+ } from "./format-helpers.js"
12
+
13
+
14
+ /**
15
+ * Check if a node participates in text flow
16
+ */
17
+ export function isTextFlowNode(node: Node): boolean {
18
+ if (isNode(node, ERBContentNode)) return true
19
+ if (isNode(node, HTMLTextNode) && node.content.trim() !== "") return true
20
+ if (isNode(node, HTMLElementNode) && isInlineElement(getTagName(node))) return true
21
+
22
+ return false
23
+ }
24
+
25
+ /**
26
+ * Check if a node is whitespace that can appear within a text flow run
27
+ */
28
+ export function isTextFlowWhitespace(node: Node): boolean {
29
+ if (isNode(node, WhitespaceNode)) return true
30
+ if (isNode(node, HTMLTextNode) && node.content.trim() === "" && !node.content.includes('\n\n')) return true
31
+
32
+ return false
33
+ }
34
+
35
+ /**
36
+ * Collect a run of text flow nodes starting at the given index.
37
+ * Returns the nodes in the run and the index after the last node.
38
+ * Returns null if the run doesn't qualify (needs 2+ text flow nodes with both text and atomic content).
39
+ */
40
+ export function collectTextFlowRun(body: Node[], startIndex: number): { nodes: Node[], endIndex: number } | null {
41
+ const nodes: Node[] = []
42
+ let index = startIndex
43
+ let textFlowCount = 0
44
+
45
+ while (index < body.length) {
46
+ const child = body[index]
47
+
48
+ if (isTextFlowNode(child)) {
49
+ nodes.push(child)
50
+ textFlowCount++
51
+ index++
52
+ } else if (isTextFlowWhitespace(child)) {
53
+ let hasMoreTextFlow = false
54
+
55
+ for (let lookaheadIndex = index + 1; lookaheadIndex < body.length; lookaheadIndex++) {
56
+ if (isTextFlowNode(body[lookaheadIndex])) {
57
+ hasMoreTextFlow = true
58
+ break
59
+ }
60
+
61
+ if (isTextFlowWhitespace(body[lookaheadIndex])) {
62
+ continue
63
+ }
64
+
65
+ break
66
+ }
67
+
68
+ if (hasMoreTextFlow) {
69
+ nodes.push(child)
70
+ index++
71
+ } else {
72
+ break
73
+ }
74
+ } else {
75
+ break
76
+ }
77
+ }
78
+
79
+ if (textFlowCount >= 2) {
80
+ const hasText = nodes.some(node => isNode(node, HTMLTextNode) && node.content.trim() !== "")
81
+ const hasAtomicContent = nodes.some(node => isNode(node, ERBContentNode) || (isNode(node, HTMLElementNode) && isInlineElement(getTagName(node))))
82
+
83
+ if (hasText && hasAtomicContent) {
84
+ return { nodes, endIndex: index }
85
+ }
86
+ }
87
+
88
+ return null
89
+ }
90
+
91
+ /**
92
+ * Check if children represent a text flow context
93
+ * (has text content mixed with inline elements or ERB)
94
+ */
95
+ export function isInTextFlowContext(children: Node[]): boolean {
96
+ const hasTextContent = children.some(child => isNode(child, HTMLTextNode) && child.content.trim() !== "")
97
+ const nonTextChildren = children.filter(child => !isNode(child, HTMLTextNode))
98
+
99
+ if (!hasTextContent) return false
100
+ if (nonTextChildren.length === 0) return false
101
+
102
+ const allInline = nonTextChildren.every(child => {
103
+ if (isNode(child, ERBContentNode)) return true
104
+
105
+ if (isNode(child, HTMLElementNode)) {
106
+ return isInlineElement(getTagName(child))
107
+ }
108
+
109
+ return false
110
+ })
111
+
112
+ if (!allInline) return false
113
+
114
+ return true
115
+ }
116
+
117
+ /**
118
+ * Try to merge text that follows an atomic unit (ERB/inline) with no whitespace.
119
+ * Merges the first word of the text into the preceding atomic unit.
120
+ * Returns true if merge was performed.
121
+ */
122
+ export function tryMergeTextAfterAtomic(result: ContentUnitWithNode[], textNode: HTMLTextNode): boolean {
123
+ if (result.length === 0) return false
124
+
125
+ const lastUnit = result[result.length - 1]
126
+
127
+ if (!lastUnit.unit.isAtomic || (lastUnit.unit.type !== 'erb' && lastUnit.unit.type !== 'inline')) {
128
+ return false
129
+ }
130
+
131
+ const words = normalizeAndSplitWords(textNode.content)
132
+ if (words.length === 0 || !words[0]) return false
133
+
134
+ const firstWord = words[0]
135
+ const firstChar = firstWord[0]
136
+
137
+ if (' \t\n\r'.includes(firstChar)) {
138
+ return false
139
+ }
140
+
141
+ lastUnit.unit.content += firstWord
142
+
143
+ if (words.length > 1) {
144
+ let remainingText = words.slice(1).join(' ')
145
+
146
+ if (endsWithWhitespace(textNode.content)) {
147
+ remainingText += ' '
148
+ }
149
+
150
+ result.push({
151
+ unit: { content: remainingText, type: 'text', isAtomic: false, breaksFlow: false },
152
+ node: textNode
153
+ })
154
+ } else if (endsWithWhitespace(textNode.content)) {
155
+ result.push({
156
+ unit: { content: ' ', type: 'text', isAtomic: false, breaksFlow: false },
157
+ node: textNode
158
+ })
159
+ }
160
+
161
+ return true
162
+ }
163
+
164
+ /**
165
+ * Try to merge an atomic unit (ERB/inline) with preceding text that has no whitespace.
166
+ * Splits preceding text, merges last word with atomic content.
167
+ * Returns true if merge was performed.
168
+ */
169
+ export function tryMergeAtomicAfterText(result: ContentUnitWithNode[], children: Node[], lastProcessedIndex: number, atomicContent: string, atomicType: 'erb' | 'inline', atomicNode: Node): boolean {
170
+ if (result.length === 0) return false
171
+
172
+ const lastUnit = result[result.length - 1]
173
+ if (lastUnit.unit.type !== 'text' || lastUnit.unit.isAtomic) return false
174
+
175
+ const words = normalizeAndSplitWords(lastUnit.unit.content)
176
+ const lastWord = words[words.length - 1]
177
+ if (!lastWord) return false
178
+
179
+ result.pop()
180
+
181
+ if (words.length > 1) {
182
+ const remainingText = words.slice(0, -1).join(' ')
183
+
184
+ result.push({
185
+ unit: { content: remainingText, type: 'text', isAtomic: false, breaksFlow: false },
186
+ node: children[lastProcessedIndex]
187
+ })
188
+ }
189
+
190
+ result.push({
191
+ unit: { content: lastWord + atomicContent, type: atomicType, isAtomic: true, breaksFlow: false },
192
+ node: atomicNode
193
+ })
194
+
195
+ return true
196
+ }
197
+
198
+ /**
199
+ * Check if there's whitespace between current node and last processed node
200
+ */
201
+ export function hasWhitespaceBeforeNode(children: Node[], lastProcessedIndex: number, currentIndex: number, currentNode: Node): boolean {
202
+ if (hasWhitespaceBetween(children, lastProcessedIndex, currentIndex)) {
203
+ return true
204
+ }
205
+
206
+ if (isNode(currentNode, HTMLTextNode) && /^[ \t\n\r]/.test(currentNode.content)) {
207
+ return true
208
+ }
209
+
210
+ return false
211
+ }
212
+
213
+ /**
214
+ * Check if last unit in result ends with whitespace
215
+ */
216
+ export function lastUnitEndsWithWhitespace(result: ContentUnitWithNode[]): boolean {
217
+ if (result.length === 0) return false
218
+
219
+ const lastUnit = result[result.length - 1]
220
+
221
+ return lastUnit.unit.type === 'text' && endsWithWhitespace(lastUnit.unit.content)
222
+ }
223
+
224
+ /**
225
+ * Wrap remaining words that don't fit on the current line.
226
+ * Returns the wrapped lines with proper indentation.
227
+ */
228
+ export function wrapRemainingWords(words: string[], wrapWidth: number, indent: string): string[] {
229
+ const lines: string[] = []
230
+ let line = ""
231
+
232
+ for (const word of words) {
233
+ const testLine = line + (line ? " " : "") + word
234
+
235
+ if (testLine.length > wrapWidth && line) {
236
+ lines.push(indent + line)
237
+ line = word
238
+ } else {
239
+ line = testLine
240
+ }
241
+ }
242
+
243
+ if (line) {
244
+ lines.push(indent + line)
245
+ }
246
+
247
+ return lines
248
+ }
249
+
250
+ /**
251
+ * Try to merge text starting with punctuation to inline content.
252
+ * Returns object with merged content and whether processing should stop.
253
+ */
254
+ export function tryMergePunctuationText(inlineContent: string, trimmedText: string, wrapWidth: number, indent: string): { mergedContent: string, shouldStop: boolean, wrappedLines: string[] } {
255
+ const combined = inlineContent + trimmedText
256
+
257
+ if (combined.length <= wrapWidth) {
258
+ return {
259
+ mergedContent: inlineContent + trimmedText,
260
+ shouldStop: false,
261
+ wrappedLines: []
262
+ }
263
+ }
264
+
265
+ const match = trimmedText.match(/^[.!?:;%]+/)
266
+
267
+ if (!match) {
268
+ return {
269
+ mergedContent: inlineContent,
270
+ shouldStop: false,
271
+ wrappedLines: []
272
+ }
273
+ }
274
+
275
+ const punctuation = match[0]
276
+ const restText = trimmedText.substring(punctuation.length).trim()
277
+
278
+ if (!restText) {
279
+ return {
280
+ mergedContent: inlineContent + punctuation,
281
+ shouldStop: false,
282
+ wrappedLines: []
283
+ }
284
+ }
285
+
286
+ const words = restText.split(/[ \t\n\r]+/)
287
+ let toMerge = punctuation
288
+ let mergedWordCount = 0
289
+
290
+ for (const word of words) {
291
+ const testMerge = toMerge + ' ' + word
292
+
293
+ if ((inlineContent + testMerge).length <= wrapWidth) {
294
+ toMerge = testMerge
295
+ mergedWordCount++
296
+ } else {
297
+ break
298
+ }
299
+ }
300
+
301
+ const mergedContent = inlineContent + toMerge
302
+
303
+ if (mergedWordCount >= words.length) {
304
+ return {
305
+ mergedContent,
306
+ shouldStop: false,
307
+ wrappedLines: []
308
+ }
309
+ }
310
+
311
+ const remainingWords = words.slice(mergedWordCount)
312
+ const wrappedLines = wrapRemainingWords(remainingWords, wrapWidth, indent)
313
+
314
+ return {
315
+ mergedContent,
316
+ shouldStop: true,
317
+ wrappedLines
318
+ }
319
+ }