methanol 0.0.21 → 0.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,591 @@
1
+ /* Copyright Yukino Song, SudoMaker Ltd.
2
+ *
3
+ * Licensed to the Apache Software Foundation (ASF) under one
4
+ * or more contributor license agreements. See the NOTICE file
5
+ * distributed with this work for additional information
6
+ * regarding copyright ownership. The ASF licenses this file
7
+ * to you under the Apache License, Version 2.0 (the
8
+ * "License"); you may not use this file except in compliance
9
+ * with the License. You may obtain a copy of the License at
10
+ *
11
+ * http://www.apache.org/licenses/LICENSE-2.0
12
+ *
13
+ * Unless required by applicable law or agreed to in writing,
14
+ * software distributed under the License is distributed on an
15
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16
+ * KIND, either express or implied. See the License for the
17
+ * specific language governing permissions and limitations
18
+ * under the License.
19
+ */
20
+
21
+ import { existsSync } from 'fs'
22
+ import { mkdir, writeFile } from 'fs/promises'
23
+ import { resolve } from 'path'
24
+ import { Parser } from 'htmlparser2'
25
+ import { state } from '../state.js'
26
+ import { resolveBasePrefix } from '../config.js'
27
+ import {
28
+ hashMd5,
29
+ splitUrlParts,
30
+ isExternalUrl,
31
+ resolveManifestKey,
32
+ joinBasePrefix,
33
+ stripBasePrefix
34
+ } from './utils.js'
35
+
36
+ let inlineReady = false
37
+
38
+ const resolveMethanolDir = () => resolve(state.PAGES_DIR, '.methanol')
39
+ const isStaticPath = (resolvedPath) => {
40
+ if (!resolvedPath || !state.STATIC_DIR) return false
41
+ if (!resolvedPath.startsWith('/')) return false
42
+ return existsSync(resolve(state.STATIC_DIR, resolvedPath.slice(1)))
43
+ }
44
+
45
+ const ensureInlineDir = async () => {
46
+ if (inlineReady) return
47
+ const inlineDir = resolve(resolveMethanolDir(), 'inline')
48
+ await mkdir(inlineDir, { recursive: true })
49
+ inlineReady = true
50
+ }
51
+
52
+ const applyPatches = (html, patches = []) => {
53
+ if (!patches.length) return html
54
+ const sorted = patches
55
+ .filter((patch) => patch && typeof patch.start === 'number' && typeof patch.end === 'number')
56
+ .sort((a, b) => (b.start - a.start) || (b.end - a.end))
57
+ let out = html
58
+ for (const patch of sorted) {
59
+ out = `${out.slice(0, patch.start)}${patch.text ?? ''}${out.slice(patch.end)}`
60
+ }
61
+ return out
62
+ }
63
+
64
+ const escapeAttr = (value) =>
65
+ String(value ?? '')
66
+ .replace(/&/g, '&')
67
+ .replace(/"/g, '"')
68
+
69
+ const serializeAttrs = (attrs = {}) => {
70
+ const entries = Object.entries(attrs)
71
+ .filter(([name, value]) => name && value != null)
72
+ .map(([name, value]) => {
73
+ if (value === '') return name
74
+ return `${name}="${escapeAttr(value)}"`
75
+ })
76
+ return entries.length ? ` ${entries.join(' ')}` : ''
77
+ }
78
+
79
+ const serializeTag = (tag, attrs = {}, { closeTag = false } = {}) => {
80
+ const attrText = serializeAttrs(attrs)
81
+ if (closeTag) {
82
+ return `<${tag}${attrText}></${tag}>`
83
+ }
84
+ return `<${tag}${attrText}>`
85
+ }
86
+
87
+ const rewriteInlineScripts = async (html, routePath) => {
88
+ const basePrefix = resolveBasePrefix()
89
+ const inlineDir = resolve(resolveMethanolDir(), 'inline')
90
+ const patches = []
91
+ const inlineScripts = []
92
+ let current = null
93
+ const resolveTagEnd = (index) => {
94
+ if (typeof index !== 'number') return index
95
+ if (html[index] === '>') return index + 1
96
+ const next = html.indexOf('>', index)
97
+ return next >= 0 ? next + 1 : index + 1
98
+ }
99
+
100
+ const parser = new Parser(
101
+ {
102
+ onopentag(name, attrs) {
103
+ if (name !== 'script') return
104
+ const type = (attrs?.type || '').toLowerCase()
105
+ const src = attrs?.src
106
+ if (type !== 'module' || src) {
107
+ current = null
108
+ return
109
+ }
110
+ current = {
111
+ start: parser.startIndex,
112
+ end: null,
113
+ content: ''
114
+ }
115
+ },
116
+ ontext(text) {
117
+ if (current) {
118
+ current.content += text
119
+ }
120
+ },
121
+ onclosetag(name) {
122
+ if (name !== 'script' || !current) return
123
+ current.end = resolveTagEnd(parser.endIndex)
124
+ inlineScripts.push(current)
125
+ current = null
126
+ }
127
+ },
128
+ {
129
+ decodeEntities: false,
130
+ lowerCaseTags: true,
131
+ lowerCaseAttributeNames: true
132
+ }
133
+ )
134
+
135
+ parser.write(html)
136
+ parser.end()
137
+
138
+ if (!inlineScripts.length) {
139
+ return { html, changed: false }
140
+ }
141
+
142
+ for (const entry of inlineScripts) {
143
+ const content = entry.content || ''
144
+ const hash = hashMd5(content)
145
+ await ensureInlineDir()
146
+ const filename = `inline-${hash}.js`
147
+ const fsPath = resolve(inlineDir, filename)
148
+ await writeFile(fsPath, content)
149
+ const publicPath = `/.methanol/inline/${filename}`
150
+ const srcAttr = `src="${joinBasePrefix(basePrefix, publicPath)}"`
151
+ const replacement = `<script type="module" ${srcAttr}></script>`
152
+ patches.push({ start: entry.start, end: entry.end, text: replacement })
153
+ }
154
+
155
+ if (!patches.length) {
156
+ return { html, changed: false }
157
+ }
158
+ return { html: applyPatches(html, patches), changed: true }
159
+ }
160
+
161
+ const buildRewritePlan = async (html, routePath) => {
162
+ const basePrefix = resolveBasePrefix()
163
+ const staticDir = state.STATIC_DIR
164
+ const scripts = new Set()
165
+ const styles = new Set()
166
+ const assets = new Set()
167
+ const resolveTagEnd = (index) => {
168
+ if (typeof index !== 'number') return index
169
+ if (html[index] === '>') return index + 1
170
+ const next = html.indexOf('>', index)
171
+ return next >= 0 ? next + 1 : index + 1
172
+ }
173
+ const plan = {
174
+ headEndOffset: null,
175
+ scripts: [],
176
+ styles: [],
177
+ icons: [],
178
+ media: []
179
+ }
180
+ const parseSrcset = (value = '') =>
181
+ value
182
+ .split(',')
183
+ .map((entry) => entry.trim())
184
+ .filter(Boolean)
185
+ .map((entry) => {
186
+ const [url, ...rest] = entry.split(/\s+/)
187
+ return { url, descriptor: rest.join(' ') }
188
+ })
189
+
190
+ const addAsset = (rawValue) => {
191
+ if (!rawValue || isExternalUrl(rawValue)) return
192
+ const resolved = resolveManifestKey(rawValue, basePrefix, routePath)
193
+ const resolvedPath = resolved?.resolvedPath
194
+ if (!resolvedPath || resolvedPath === '/') return
195
+ if (staticDir && resolvedPath.startsWith('/')) {
196
+ const publicCandidate = resolve(staticDir, resolvedPath.slice(1))
197
+ if (existsSync(publicCandidate)) return
198
+ }
199
+ assets.add(resolvedPath)
200
+ }
201
+
202
+ const scriptStack = []
203
+ const parser = new Parser(
204
+ {
205
+ onopentag(name, attrs) {
206
+ const tag = name?.toLowerCase?.() || name
207
+ const start = parser.startIndex
208
+ const end = resolveTagEnd(parser.endIndex)
209
+
210
+ if (tag === 'script') {
211
+ const type = (attrs?.type || '').toLowerCase()
212
+ const src = attrs?.src
213
+ const resolved = type === 'module' && src
214
+ ? resolveManifestKey(src, basePrefix, routePath)
215
+ : null
216
+ const entry = {
217
+ tag,
218
+ attrs: { ...(attrs || {}) },
219
+ src,
220
+ start,
221
+ end,
222
+ resolvedPath: resolved?.resolvedPath,
223
+ manifestKey: resolved?.key
224
+ }
225
+ scriptStack.push(entry)
226
+ if (entry.resolvedPath) {
227
+ scripts.add(entry.resolvedPath)
228
+ }
229
+ return
230
+ }
231
+
232
+ if (tag === 'link') {
233
+ const rel = (attrs?.rel || '').toLowerCase()
234
+ const href = attrs?.href
235
+ if (rel.includes('stylesheet')) {
236
+ const resolved = href ? resolveManifestKey(href, basePrefix, routePath) : null
237
+ if (resolved?.resolvedPath && !isStaticPath(resolved.resolvedPath)) {
238
+ styles.add(resolved.resolvedPath)
239
+ plan.styles.push({
240
+ tag,
241
+ attrs: { ...(attrs || {}) },
242
+ href,
243
+ start,
244
+ end,
245
+ resolvedPath: resolved.resolvedPath,
246
+ manifestKey: resolved.key
247
+ })
248
+ }
249
+ return
250
+ }
251
+ if (rel.includes('icon') || rel.includes('apple-touch-icon')) {
252
+ if (href) {
253
+ addAsset(href)
254
+ plan.icons.push({
255
+ tag,
256
+ attrs: { ...(attrs || {}) },
257
+ href,
258
+ start,
259
+ end
260
+ })
261
+ }
262
+ }
263
+ return
264
+ }
265
+
266
+ if (tag === 'img' || tag === 'source' || tag === 'video' || tag === 'audio') {
267
+ const entry = {
268
+ tag,
269
+ attrs: { ...(attrs || {}) },
270
+ start,
271
+ end
272
+ }
273
+ const src = attrs?.src
274
+ if (src) addAsset(src)
275
+ const poster = attrs?.poster
276
+ if (poster) addAsset(poster)
277
+ const srcset = attrs?.srcset
278
+ if (srcset) {
279
+ for (const item of parseSrcset(srcset)) {
280
+ addAsset(item.url)
281
+ }
282
+ }
283
+ if (src || poster || srcset) {
284
+ plan.media.push(entry)
285
+ }
286
+ }
287
+ },
288
+ onclosetag(name) {
289
+ const tag = name?.toLowerCase?.() || name
290
+ if (tag !== 'script') return
291
+ const entry = scriptStack.pop()
292
+ if (!entry || !entry.resolvedPath) return
293
+ entry.end = resolveTagEnd(parser.endIndex)
294
+ plan.scripts.push(entry)
295
+ }
296
+ },
297
+ {
298
+ decodeEntities: false,
299
+ lowerCaseTags: true,
300
+ lowerCaseAttributeNames: true
301
+ }
302
+ )
303
+
304
+ parser.write(html)
305
+ parser.end()
306
+
307
+ return {
308
+ // V8 memory optimization: this blocks v8 from referencing string fragments
309
+ // from potentially big html strings, which could leaad to GBs of memory leak
310
+ plan: JSON.parse(JSON.stringify(plan)),
311
+ scan: {
312
+ scripts: Array.from(scripts),
313
+ styles: Array.from(styles),
314
+ assets: Array.from(assets)
315
+ }
316
+ }
317
+ }
318
+
319
+ export const scanRenderedHtml = async (html, routePath) => {
320
+ const inline = await rewriteInlineScripts(html, routePath)
321
+ const nextHtml = inline.html
322
+ const { plan, scan } = await buildRewritePlan(nextHtml, routePath)
323
+ return {
324
+ html: nextHtml,
325
+ changed: inline.changed,
326
+ plan,
327
+ scan
328
+ }
329
+ }
330
+
331
+ export const resolveManifestEntry = (manifest, key) => {
332
+ if (!manifest || !key) return null
333
+ if (manifest[key]) return manifest[key]
334
+ if (manifest[`/${key}`]) return manifest[`/${key}`]
335
+ return null
336
+ }
337
+
338
+ export const rewriteHtmlByPlan = (
339
+ html,
340
+ plan,
341
+ routePath,
342
+ basePrefix,
343
+ manifest,
344
+ scriptMap,
345
+ styleMap,
346
+ commonScripts,
347
+ commonEntry
348
+ ) => {
349
+ if (!plan) return html
350
+ const holes = []
351
+ const cssFiles = new Set()
352
+ const linkedHrefs = new Set()
353
+ let commonInserted = false
354
+
355
+ const addHole = (start, end, text) => {
356
+ if (typeof start !== 'number' || typeof end !== 'number') return
357
+ holes.push({ start, end, text })
358
+ }
359
+
360
+ const replaceTag = (entry, tag, attrs, { closeTag = false } = {}) => {
361
+ if (!entry || typeof entry.start !== 'number' || typeof entry.end !== 'number') return
362
+ addHole(entry.start, entry.end, serializeTag(tag, attrs, { closeTag }))
363
+ }
364
+
365
+ const resolveAssetValue = (rawValue) => {
366
+ const { path, suffix } = splitUrlParts(rawValue)
367
+ const resolved = resolveManifestKey(rawValue, basePrefix, routePath)
368
+ if (!resolved?.resolvedPath) return null
369
+ const manifestEntry = resolveManifestEntry(manifest, resolved.key)
370
+ if (manifestEntry?.file) {
371
+ return joinBasePrefix(basePrefix, manifestEntry.file) + suffix
372
+ }
373
+ if (!path?.startsWith('/') || !basePrefix || basePrefix === '/') return null
374
+ if (stripBasePrefix(path, basePrefix) !== path) return null
375
+ return joinBasePrefix(basePrefix, path) + suffix
376
+ }
377
+
378
+ for (const entry of plan.scripts || []) {
379
+ const src = entry?.src || entry?.attrs?.src
380
+ if (!src) continue
381
+ const resolved = resolveManifestKey(src, basePrefix, routePath)
382
+ const publicPath = resolved?.resolvedPath
383
+ if (!publicPath) continue
384
+ const attrs = { ...(entry.attrs || {}) }
385
+ if (commonScripts.has(publicPath)) {
386
+ if (!commonEntry?.file) {
387
+ continue
388
+ }
389
+ if (!commonInserted) {
390
+ const newSrc = joinBasePrefix(basePrefix, commonEntry.file) + splitUrlParts(src).suffix
391
+ attrs.src = newSrc
392
+ replaceTag(entry, 'script', attrs, { closeTag: true })
393
+ commonInserted = true
394
+ if (Array.isArray(commonEntry.css)) {
395
+ for (const css of commonEntry.css) {
396
+ cssFiles.add(css)
397
+ }
398
+ }
399
+ } else {
400
+ addHole(entry.start, entry.end, '')
401
+ }
402
+ continue
403
+ }
404
+ const entryInfo = scriptMap.get(publicPath)
405
+ if (!entryInfo?.file) continue
406
+ const newSrc = joinBasePrefix(basePrefix, entryInfo.file) + splitUrlParts(src).suffix
407
+ attrs.src = newSrc
408
+ replaceTag(entry, 'script', attrs, { closeTag: true })
409
+ if (Array.isArray(entryInfo.css)) {
410
+ for (const css of entryInfo.css) {
411
+ cssFiles.add(css)
412
+ }
413
+ }
414
+ }
415
+
416
+ for (const entry of plan.styles || []) {
417
+ const href = entry?.href || entry?.attrs?.href
418
+ if (!href) continue
419
+ const resolved = resolveManifestKey(href, basePrefix, routePath)
420
+ const publicPath = resolved?.resolvedPath
421
+ if (!publicPath) continue
422
+ const attrs = { ...(entry.attrs || {}) }
423
+ const entryInfo = styleMap.get(publicPath)
424
+ if (!entryInfo?.file) {
425
+ const manifestEntry = resolveManifestEntry(manifest, resolved.key)
426
+ const cssFile = manifestEntry?.css?.[0] || (manifestEntry?.file?.endsWith('.css') ? manifestEntry.file : null)
427
+ if (cssFile) {
428
+ const newHref = joinBasePrefix(basePrefix, cssFile) + splitUrlParts(href).suffix
429
+ attrs.href = newHref
430
+ replaceTag(entry, 'link', attrs)
431
+ linkedHrefs.add(newHref)
432
+ if (Array.isArray(manifestEntry?.css) && manifestEntry.css.length > 1) {
433
+ for (const css of manifestEntry.css.slice(1)) {
434
+ cssFiles.add(css)
435
+ }
436
+ }
437
+ continue
438
+ }
439
+ if (cssFiles.size) {
440
+ const [fallbackCss] = Array.from(cssFiles)
441
+ if (fallbackCss) {
442
+ const newHref = joinBasePrefix(basePrefix, fallbackCss) + splitUrlParts(href).suffix
443
+ attrs.href = newHref
444
+ replaceTag(entry, 'link', attrs)
445
+ linkedHrefs.add(newHref)
446
+ continue
447
+ }
448
+ }
449
+ linkedHrefs.add(href)
450
+ continue
451
+ }
452
+ const newHref = joinBasePrefix(basePrefix, entryInfo.file) + splitUrlParts(href).suffix
453
+ attrs.href = newHref
454
+ replaceTag(entry, 'link', attrs)
455
+ linkedHrefs.add(newHref)
456
+ if (Array.isArray(entryInfo.css)) {
457
+ for (const css of entryInfo.css) {
458
+ cssFiles.add(css)
459
+ }
460
+ }
461
+ }
462
+
463
+ for (const entry of plan.icons || []) {
464
+ const href = entry?.href || entry?.attrs?.href
465
+ if (!href) continue
466
+ const updated = resolveAssetValue(href)
467
+ if (!updated) continue
468
+ const attrs = { ...(entry.attrs || {}) }
469
+ attrs.href = updated
470
+ replaceTag(entry, 'link', attrs)
471
+ }
472
+
473
+ const parseSrcset = (value = '') =>
474
+ value
475
+ .split(',')
476
+ .map((entry) => entry.trim())
477
+ .filter(Boolean)
478
+ .map((entry) => {
479
+ const [url, ...rest] = entry.split(/\s+/)
480
+ return { url, descriptor: rest.join(' ') }
481
+ })
482
+
483
+ for (const entry of plan.media || []) {
484
+ if (!entry?.attrs) continue
485
+ const attrs = { ...(entry.attrs || {}) }
486
+ let touched = false
487
+ if (attrs.src) {
488
+ const updated = resolveAssetValue(attrs.src)
489
+ if (updated) {
490
+ attrs.src = updated
491
+ touched = true
492
+ }
493
+ }
494
+ if (attrs.poster) {
495
+ const updated = resolveAssetValue(attrs.poster)
496
+ if (updated) {
497
+ attrs.poster = updated
498
+ touched = true
499
+ }
500
+ }
501
+ if (attrs.srcset) {
502
+ const updated = []
503
+ let changed = false
504
+ for (const item of parseSrcset(attrs.srcset)) {
505
+ if (!item.url || isExternalUrl(item.url)) {
506
+ updated.push([item.url, item.descriptor].filter(Boolean).join(' '))
507
+ continue
508
+ }
509
+ const resolved = resolveManifestKey(item.url, basePrefix, routePath)
510
+ if (!resolved?.resolvedPath) {
511
+ updated.push([item.url, item.descriptor].filter(Boolean).join(' '))
512
+ continue
513
+ }
514
+ const manifestEntry = resolveManifestEntry(manifest, resolved.key)
515
+ if (!manifestEntry?.file) {
516
+ updated.push([item.url, item.descriptor].filter(Boolean).join(' '))
517
+ continue
518
+ }
519
+ const rewritten = joinBasePrefix(basePrefix, manifestEntry.file) + splitUrlParts(item.url).suffix
520
+ updated.push([rewritten, item.descriptor].filter(Boolean).join(' '))
521
+ changed = true
522
+ }
523
+ if (changed) {
524
+ attrs.srcset = updated.join(', ')
525
+ touched = true
526
+ }
527
+ }
528
+ if (touched) {
529
+ replaceTag(entry, entry.tag || 'img', attrs)
530
+ }
531
+ }
532
+
533
+ if (cssFiles.size) {
534
+ const snippets = []
535
+ for (const css of Array.from(cssFiles)) {
536
+ const href = joinBasePrefix(basePrefix, css)
537
+ if (linkedHrefs.has(href)) continue
538
+ if (html.includes(`href="${href}"`) || html.includes(`href='${href}'`)) continue
539
+ snippets.push(`<link rel="stylesheet" href="${href}">`)
540
+ }
541
+ if (snippets.length) {
542
+ let insertAt = typeof plan.headEndOffset === 'number' ? plan.headEndOffset : null
543
+ if (insertAt == null) {
544
+ const index = html.indexOf('</head>')
545
+ if (index >= 0) insertAt = index
546
+ }
547
+ if (insertAt != null) {
548
+ addHole(insertAt, insertAt, snippets.join(''))
549
+ }
550
+ }
551
+ }
552
+
553
+ if (!holes.length) return html
554
+ const sorted = holes.sort((a, b) => (a.start - b.start) || (b.end - a.end))
555
+ const chunks = []
556
+ const fills = []
557
+ let cursor = 0
558
+ for (const hole of sorted) {
559
+ if (hole.start < cursor) continue
560
+ chunks.push(html.slice(cursor, hole.start))
561
+ fills.push(hole.text ?? '')
562
+ cursor = hole.end
563
+ }
564
+ chunks.push(html.slice(cursor))
565
+ return String.raw({ raw: chunks }, ...fills)
566
+ }
567
+
568
+ export const rewriteHtmlContent = (
569
+ html,
570
+ plan,
571
+ routePath,
572
+ basePrefix,
573
+ manifest,
574
+ scriptMap,
575
+ styleMap,
576
+ commonScripts,
577
+ commonEntry
578
+ ) => {
579
+ if (!plan) return html
580
+ return rewriteHtmlByPlan(
581
+ html,
582
+ plan,
583
+ routePath,
584
+ basePrefix,
585
+ manifest,
586
+ scriptMap,
587
+ styleMap,
588
+ commonScripts,
589
+ commonEntry
590
+ )
591
+ }