html-to-gutenberg 4.2.9 → 4.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +20 -3
- package/.github/workflows/sync-npm.yml +154 -0
- package/fetch-page-assets.test.ts +448 -0
- package/index.d.ts +173 -0
- package/index.js +570 -224
- package/index.test.ts +633 -4
- package/index.ts +168 -63
- package/package.json +25 -24
- package/r2.js +163 -0
- package/readme.md +122 -88
- package/scripts/patch-fetch-page-assets.mjs +13 -0
- package/scripts/sync-from-npm.mjs +115 -0
- package/tsconfig.json +17 -2
- package/vendor/fetch-page-assets/LICENSE.MD +21 -0
- package/vendor/fetch-page-assets/README.md +117 -0
- package/vendor/fetch-page-assets/index.js +362 -0
- package/vendor/fetch-page-assets/package.json +48 -0
- package/.env +0 -1
package/.env.example
CHANGED
|
@@ -1,3 +1,20 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
|
|
1
|
+
# Copy this file to .env and keep the real values private.
|
|
2
|
+
# Never commit your real tokens or keys.
|
|
3
|
+
|
|
4
|
+
# Optional: SnapAPI preview generation
|
|
5
|
+
SNAPAPI_KEY=sk_live_replace_me
|
|
6
|
+
|
|
7
|
+
# Optional: Cloudflare API token for verification/management workflows
|
|
8
|
+
CLOUDFLARE_API_TOKEN=cfut_replace_me
|
|
9
|
+
|
|
10
|
+
# Required for R2 uploads in job mode
|
|
11
|
+
CLOUDFLARE_R2_ACCOUNT_ID=replace_me
|
|
12
|
+
CLOUDFLARE_R2_BUCKET=replace_me
|
|
13
|
+
CLOUDFLARE_R2_ACCESS_KEY_ID=replace_me
|
|
14
|
+
CLOUDFLARE_R2_SECRET_ACCESS_KEY=replace_me
|
|
15
|
+
|
|
16
|
+
# Public base URL for generated assets and bundles
|
|
17
|
+
CLOUDFLARE_R2_PUBLIC_BASE_URL=https://storage.example.com
|
|
18
|
+
|
|
19
|
+
# Optional override for advanced setups
|
|
20
|
+
# CLOUDFLARE_R2_ENDPOINT=https://<account_id>.r2.cloudflarestorage.com
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
name: Sync And Publish NPM Packages
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
release:
|
|
7
|
+
types: [published]
|
|
8
|
+
workflow_dispatch:
|
|
9
|
+
inputs:
|
|
10
|
+
html_to_gutenberg_version:
|
|
11
|
+
description: Specific html-to-gutenberg npm version to sync. Leave empty to use the dist-tag.
|
|
12
|
+
required: false
|
|
13
|
+
type: string
|
|
14
|
+
fetch_page_assets_version:
|
|
15
|
+
description: Specific fetch-page-assets npm version to sync. Leave empty to use the dist-tag.
|
|
16
|
+
required: false
|
|
17
|
+
type: string
|
|
18
|
+
dist_tag:
|
|
19
|
+
description: npm dist-tag to sync when no explicit version is provided.
|
|
20
|
+
required: false
|
|
21
|
+
default: latest
|
|
22
|
+
type: string
|
|
23
|
+
schedule:
|
|
24
|
+
- cron: '0 9 * * *'
|
|
25
|
+
|
|
26
|
+
concurrency:
|
|
27
|
+
group: sync-npm
|
|
28
|
+
cancel-in-progress: false
|
|
29
|
+
|
|
30
|
+
jobs:
|
|
31
|
+
sync:
|
|
32
|
+
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
|
33
|
+
runs-on: ubuntu-latest
|
|
34
|
+
permissions:
|
|
35
|
+
contents: write
|
|
36
|
+
steps:
|
|
37
|
+
- name: Checkout repository
|
|
38
|
+
uses: actions/checkout@v4
|
|
39
|
+
with:
|
|
40
|
+
fetch-depth: 0
|
|
41
|
+
|
|
42
|
+
- name: Set up Node.js
|
|
43
|
+
uses: actions/setup-node@v4
|
|
44
|
+
with:
|
|
45
|
+
node-version: '20'
|
|
46
|
+
|
|
47
|
+
- name: Sync html-to-gutenberg from npm
|
|
48
|
+
env:
|
|
49
|
+
NPM_SYNC_VERSION: ${{ inputs.html_to_gutenberg_version }}
|
|
50
|
+
NPM_SYNC_DIST_TAG: ${{ inputs.dist_tag }}
|
|
51
|
+
NPM_SYNC_PRESERVE_PATHS: .git,.github,node_modules,.env,scripts,vendor
|
|
52
|
+
run: node ./scripts/sync-from-npm.mjs
|
|
53
|
+
|
|
54
|
+
- name: Sync fetch-page-assets from npm
|
|
55
|
+
env:
|
|
56
|
+
NPM_PACKAGE_NAME: fetch-page-assets
|
|
57
|
+
NPM_SYNC_TARGET_DIR: vendor/fetch-page-assets
|
|
58
|
+
NPM_SYNC_VERSION: ${{ inputs.fetch_page_assets_version }}
|
|
59
|
+
NPM_SYNC_DIST_TAG: ${{ inputs.dist_tag }}
|
|
60
|
+
run: node ./scripts/sync-from-npm.mjs
|
|
61
|
+
|
|
62
|
+
- name: Detect changes
|
|
63
|
+
id: changes
|
|
64
|
+
run: |
|
|
65
|
+
if git diff --quiet; then
|
|
66
|
+
echo "changed=false" >> "$GITHUB_OUTPUT"
|
|
67
|
+
else
|
|
68
|
+
echo "changed=true" >> "$GITHUB_OUTPUT"
|
|
69
|
+
fi
|
|
70
|
+
|
|
71
|
+
- name: Read synced version
|
|
72
|
+
if: steps.changes.outputs.changed == 'true'
|
|
73
|
+
id: versions
|
|
74
|
+
run: |
|
|
75
|
+
echo "html_to_gutenberg=$(node -p \"require('./package.json').version\")" >> "$GITHUB_OUTPUT"
|
|
76
|
+
echo "fetch_page_assets=$(node -p \"require('./vendor/fetch-page-assets/package.json').version\")" >> "$GITHUB_OUTPUT"
|
|
77
|
+
|
|
78
|
+
- name: Commit changes
|
|
79
|
+
if: steps.changes.outputs.changed == 'true'
|
|
80
|
+
run: |
|
|
81
|
+
git config user.name "github-actions[bot]"
|
|
82
|
+
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
|
83
|
+
git add -A
|
|
84
|
+
git commit -m "chore: sync npm packages html-to-gutenberg v${{ steps.versions.outputs.html_to_gutenberg }} and fetch-page-assets v${{ steps.versions.outputs.fetch_page_assets }}"
|
|
85
|
+
|
|
86
|
+
- name: Push changes
|
|
87
|
+
if: steps.changes.outputs.changed == 'true'
|
|
88
|
+
run: git push
|
|
89
|
+
|
|
90
|
+
publish_html_to_gutenberg:
|
|
91
|
+
if: github.event_name == 'release' || (github.event_name == 'push' && github.actor != 'github-actions[bot]')
|
|
92
|
+
runs-on: ubuntu-latest
|
|
93
|
+
environment: .env
|
|
94
|
+
permissions:
|
|
95
|
+
contents: write
|
|
96
|
+
id-token: write
|
|
97
|
+
steps:
|
|
98
|
+
- name: Checkout repository
|
|
99
|
+
uses: actions/checkout@v4
|
|
100
|
+
with:
|
|
101
|
+
fetch-depth: 0
|
|
102
|
+
|
|
103
|
+
- name: Set up Node.js
|
|
104
|
+
uses: actions/setup-node@v4
|
|
105
|
+
with:
|
|
106
|
+
node-version: '24'
|
|
107
|
+
registry-url: 'https://registry.npmjs.org'
|
|
108
|
+
cache: npm
|
|
109
|
+
|
|
110
|
+
- name: Install dependencies
|
|
111
|
+
run: npm ci
|
|
112
|
+
|
|
113
|
+
- name: Build package
|
|
114
|
+
run: npm run build
|
|
115
|
+
|
|
116
|
+
- name: Run tests
|
|
117
|
+
run: npm test
|
|
118
|
+
|
|
119
|
+
- name: Bump patch version on main pushes
|
|
120
|
+
if: github.event_name == 'push'
|
|
121
|
+
id: bump_version
|
|
122
|
+
run: |
|
|
123
|
+
npm version patch --no-git-tag-version
|
|
124
|
+
PACKAGE_VERSION=$(node -e "console.log(require('./package.json').version)")
|
|
125
|
+
echo "package_version=$PACKAGE_VERSION" >> "$GITHUB_OUTPUT"
|
|
126
|
+
|
|
127
|
+
- name: Check published version
|
|
128
|
+
id: version_check
|
|
129
|
+
run: |
|
|
130
|
+
PACKAGE_VERSION=$(node -p "require('./package.json').version")
|
|
131
|
+
PUBLISHED_VERSION=$(npm view html-to-gutenberg version 2>/dev/null || true)
|
|
132
|
+
echo "package_version=$PACKAGE_VERSION" >> "$GITHUB_OUTPUT"
|
|
133
|
+
echo "published_version=$PUBLISHED_VERSION" >> "$GITHUB_OUTPUT"
|
|
134
|
+
if [ "$PACKAGE_VERSION" = "$PUBLISHED_VERSION" ]; then
|
|
135
|
+
echo "should_publish=false" >> "$GITHUB_OUTPUT"
|
|
136
|
+
else
|
|
137
|
+
echo "should_publish=true" >> "$GITHUB_OUTPUT"
|
|
138
|
+
fi
|
|
139
|
+
|
|
140
|
+
- name: Publish to npm
|
|
141
|
+
if: steps.version_check.outputs.should_publish == 'true'
|
|
142
|
+
run: npm publish --provenance
|
|
143
|
+
|
|
144
|
+
- name: Commit version bump
|
|
145
|
+
if: github.event_name == 'push'
|
|
146
|
+
run: |
|
|
147
|
+
git config user.name "github-actions[bot]"
|
|
148
|
+
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
|
149
|
+
git add package.json package-lock.json
|
|
150
|
+
git commit -m "chore: release html-to-gutenberg v${{ steps.version_check.outputs.package_version }}" || echo "No version changes to commit"
|
|
151
|
+
|
|
152
|
+
- name: Push version bump
|
|
153
|
+
if: github.event_name == 'push'
|
|
154
|
+
run: git push
|
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
import { expect } from 'chai';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import http from 'http';
|
|
4
|
+
import os from 'os';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
|
|
7
|
+
import extractAssets, {
|
|
8
|
+
h,
|
|
9
|
+
p,
|
|
10
|
+
d,
|
|
11
|
+
m,
|
|
12
|
+
u,
|
|
13
|
+
w,
|
|
14
|
+
$,
|
|
15
|
+
E,
|
|
16
|
+
g,
|
|
17
|
+
U,
|
|
18
|
+
v,
|
|
19
|
+
A,
|
|
20
|
+
F,
|
|
21
|
+
x,
|
|
22
|
+
P,
|
|
23
|
+
R,
|
|
24
|
+
D,
|
|
25
|
+
} from './vendor/fetch-page-assets/index.js';
|
|
26
|
+
|
|
27
|
+
const tinyPng = Buffer.from(
|
|
28
|
+
'89504e470d0a1a0a0000000d4948445200000001000000010802000000907753de0000000c4944415408d763f8ffff3f0005fe02fea557a90000000049454e44ae426082',
|
|
29
|
+
'hex'
|
|
30
|
+
);
|
|
31
|
+
|
|
32
|
+
const listen = (server: http.Server) => {
|
|
33
|
+
return new Promise<{ baseUrl: string; close: () => Promise<void> }>((resolve, reject) => {
|
|
34
|
+
server.listen(0, '127.0.0.1', () => {
|
|
35
|
+
const address = server.address();
|
|
36
|
+
|
|
37
|
+
if (!address || typeof address === 'string') {
|
|
38
|
+
reject(new Error('Unable to determine test server address.'));
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
resolve({
|
|
43
|
+
baseUrl: `http://127.0.0.1:${address.port}`,
|
|
44
|
+
close: () =>
|
|
45
|
+
new Promise<void>((closeResolve, closeReject) => {
|
|
46
|
+
server.close((error) => {
|
|
47
|
+
if (error) {
|
|
48
|
+
closeReject(error);
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
closeResolve();
|
|
53
|
+
});
|
|
54
|
+
}),
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
server.on('error', reject);
|
|
59
|
+
});
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
describe('fetch-page-assets helpers', () => {
|
|
63
|
+
it('logs through h and p only when verbose', () => {
|
|
64
|
+
const errors: string[] = [];
|
|
65
|
+
const logs: string[] = [];
|
|
66
|
+
const originalError = console.error;
|
|
67
|
+
const originalLog = console.log;
|
|
68
|
+
|
|
69
|
+
console.error = (message?: any) => errors.push(String(message));
|
|
70
|
+
console.log = (message?: any) => logs.push(String(message));
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
h('failed', true);
|
|
74
|
+
h('ignored', false);
|
|
75
|
+
p('ok', true);
|
|
76
|
+
p('ignored', false);
|
|
77
|
+
} finally {
|
|
78
|
+
console.error = originalError;
|
|
79
|
+
console.log = originalLog;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
expect(errors).to.deep.equal(['[Error] failed']);
|
|
83
|
+
expect(logs).to.deep.equal(['[Success] ok']);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it('covers the URL and path helpers', () => {
|
|
87
|
+
expect(d('//cdn.example.com')).to.equal(true);
|
|
88
|
+
expect(d('https://example.com')).to.equal(false);
|
|
89
|
+
expect(m('//cdn.example.com/image.png', 'http')).to.equal('http://cdn.example.com/image.png');
|
|
90
|
+
expect(m('https://example.com/image.png')).to.equal('https://example.com/image.png');
|
|
91
|
+
expect(u('/tmp/')).to.equal(true);
|
|
92
|
+
expect(u('/tmp')).to.equal(false);
|
|
93
|
+
expect(w('images/logo.png')).to.equal(true);
|
|
94
|
+
expect(w('//cdn.example.com/logo.png')).to.equal(false);
|
|
95
|
+
expect($(' "hello" ')).to.equal('hello');
|
|
96
|
+
expect(E('images/logo.png', 'https://example.com/base/page')).to.equal('https://example.com/base/images/logo.png');
|
|
97
|
+
expect(E('//cdn.example.com/logo.png', '', 'http')).to.equal('http://cdn.example.com/logo.png');
|
|
98
|
+
expect(E('images/logo.png', 'not a valid url')).to.equal('images/logo.png');
|
|
99
|
+
expect(g(['a', 'b', 'c'])).to.equal(path.join('a', 'b', 'c'));
|
|
100
|
+
expect(A('image.png?foo=1#bar')).to.equal('image.png');
|
|
101
|
+
expect(F('/tmp', 'asset.png?foo=1#bar')).to.equal(path.join('/tmp', 'asset.png'));
|
|
102
|
+
expect(x('archive.tar.gz')).to.deep.equal(['archive', 'tar', 'gz']);
|
|
103
|
+
expect(P({ Header: 'A', header: 'B' }, 'Header')).to.equal('A');
|
|
104
|
+
expect(P({ header: 'B' }, 'Header')).to.equal('B');
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it('covers file utilities and header parsing helpers', () => {
|
|
108
|
+
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'fetch-assets-helpers-'));
|
|
109
|
+
const nestedDir = path.join(tempRoot, 'nested', 'dir');
|
|
110
|
+
const html = '<img src="https://example.com/images/photo.png">';
|
|
111
|
+
const destinationPath = path.join(tempRoot, 'images');
|
|
112
|
+
const destinationFilePath = path.join(destinationPath, 'photo.png');
|
|
113
|
+
|
|
114
|
+
try {
|
|
115
|
+
U(nestedDir, false);
|
|
116
|
+
expect(fs.existsSync(nestedDir)).to.equal(true);
|
|
117
|
+
|
|
118
|
+
fs.mkdirSync(destinationPath, { recursive: true });
|
|
119
|
+
v(
|
|
120
|
+
{
|
|
121
|
+
parsedUrl: 'https://example.com/images/photo.png',
|
|
122
|
+
destinationFilePath,
|
|
123
|
+
},
|
|
124
|
+
html,
|
|
125
|
+
tempRoot,
|
|
126
|
+
true,
|
|
127
|
+
false
|
|
128
|
+
);
|
|
129
|
+
|
|
130
|
+
const writtenHtml = fs.readFileSync(path.join(tempRoot, 'index.html'), 'utf8');
|
|
131
|
+
expect(writtenHtml).to.include('images/photo.png');
|
|
132
|
+
expect(R({ 'Content-Disposition': 'attachment; filename="report"' }, 'fallback')).to.equal('report');
|
|
133
|
+
expect(R({ 'Content-Type': 'image/png' }, 'fallback')).to.equal('fallback.png');
|
|
134
|
+
} finally {
|
|
135
|
+
fs.rmSync(tempRoot, { recursive: true, force: true });
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it('covers download progress logging for valid and invalid ratios', () => {
|
|
140
|
+
const logs: string[] = [];
|
|
141
|
+
const originalLog = console.log;
|
|
142
|
+
console.log = (message?: any) => logs.push(String(message));
|
|
143
|
+
|
|
144
|
+
try {
|
|
145
|
+
D({ loaded: 50, total: 100 });
|
|
146
|
+
D({ loaded: Number.NaN, total: 100 });
|
|
147
|
+
} finally {
|
|
148
|
+
console.log = originalLog;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
expect(logs).to.deep.equal(['Download progress: 50%', 'Download progress: 0%']);
|
|
152
|
+
});
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
describe('fetch-page-assets integration', () => {
|
|
156
|
+
it('downloads local file assets from file URLs', async () => {
|
|
157
|
+
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'fetch-assets-local-'));
|
|
158
|
+
const sourceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'fetch-assets-source-'));
|
|
159
|
+
const localFile = path.join(sourceDir, 'asset.txt');
|
|
160
|
+
|
|
161
|
+
fs.writeFileSync(localFile, 'local file content');
|
|
162
|
+
|
|
163
|
+
try {
|
|
164
|
+
const html = `<img src="file://${localFile}" />`;
|
|
165
|
+
const output = await extractAssets(html, {
|
|
166
|
+
basePath: tempRoot,
|
|
167
|
+
verbose: false,
|
|
168
|
+
maxRetryAttempts: 1,
|
|
169
|
+
retryDelay: 0,
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
expect(output).to.equal(html);
|
|
173
|
+
expect(fs.existsSync(path.join(tempRoot, localFile.replace(/^\//, '')))).to.equal(true);
|
|
174
|
+
} finally {
|
|
175
|
+
fs.rmSync(tempRoot, { recursive: true, force: true });
|
|
176
|
+
fs.rmSync(sourceDir, { recursive: true, force: true });
|
|
177
|
+
}
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it('fetches remote HTML, follows redirects, and de-duplicates repeated asset downloads', async () => {
|
|
181
|
+
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'fetch-assets-remote-'));
|
|
182
|
+
const requestCounts: Record<string, number> = {};
|
|
183
|
+
const server = http.createServer((req, res) => {
|
|
184
|
+
const url = req.url || '/';
|
|
185
|
+
requestCounts[url] = (requestCounts[url] || 0) + 1;
|
|
186
|
+
|
|
187
|
+
if (url === '/redirect-html') {
|
|
188
|
+
res.writeHead(302, { Location: '/page' });
|
|
189
|
+
res.end();
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (url === '/page') {
|
|
194
|
+
res.writeHead(200, { 'Content-Type': 'text/html' });
|
|
195
|
+
res.end(`
|
|
196
|
+
<html>
|
|
197
|
+
<body>
|
|
198
|
+
<img src="/redirect-image.png" srcset="/ignored-1x.png 1x" sizes="100vw" />
|
|
199
|
+
<img src="/redirect-image.png" />
|
|
200
|
+
<source src="/video.mp4" />
|
|
201
|
+
<script src="/script.js"></script>
|
|
202
|
+
<link rel="stylesheet" href="/styles/site.css" />
|
|
203
|
+
<div style="background-image:url('/backgrounds/bg.png')"></div>
|
|
204
|
+
</body>
|
|
205
|
+
</html>
|
|
206
|
+
`);
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (url === '/redirect-image.png') {
|
|
211
|
+
res.writeHead(302, { Location: '/images/logo-download' });
|
|
212
|
+
res.end();
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
if (url === '/images/logo-download') {
|
|
217
|
+
res.writeHead(200, {
|
|
218
|
+
'Content-Type': 'image/png',
|
|
219
|
+
'Content-Disposition': 'attachment; filename="logo-file"',
|
|
220
|
+
});
|
|
221
|
+
res.end(tinyPng);
|
|
222
|
+
return;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (url === '/video.mp4') {
|
|
226
|
+
res.writeHead(200, { 'Content-Type': 'video/mp4' });
|
|
227
|
+
res.end(Buffer.from('video'));
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (url === '/script.js') {
|
|
232
|
+
res.writeHead(200, { 'Content-Type': 'application/javascript' });
|
|
233
|
+
res.end('window.__fetchedScript = true;');
|
|
234
|
+
return;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (url === '/styles/site.css') {
|
|
238
|
+
res.writeHead(200, { 'Content-Type': 'text/css' });
|
|
239
|
+
res.end(".hero { background-image: url('/backgrounds/bg.png'); }");
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (url === '/backgrounds/bg.png') {
|
|
244
|
+
res.writeHead(200, { 'Content-Type': 'image/png' });
|
|
245
|
+
res.end(tinyPng);
|
|
246
|
+
return;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
|
250
|
+
res.end('not found');
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
const { baseUrl, close } = await listen(server);
|
|
254
|
+
|
|
255
|
+
try {
|
|
256
|
+
const output = await extractAssets(`${baseUrl}/redirect-html`, {
|
|
257
|
+
basePath: tempRoot,
|
|
258
|
+
protocol: 'http',
|
|
259
|
+
verbose: false,
|
|
260
|
+
maxRetryAttempts: 1,
|
|
261
|
+
retryDelay: 0,
|
|
262
|
+
concurrency: 4,
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
expect(output).to.include('<img src="/redirect-image.png"');
|
|
266
|
+
expect(output).to.not.include('srcset=');
|
|
267
|
+
expect(output).to.not.include('sizes=');
|
|
268
|
+
expect(fs.existsSync(path.join(tempRoot, 'logo-file.png'))).to.equal(true);
|
|
269
|
+
expect(fs.existsSync(path.join(tempRoot, 'styles', 'site.css'))).to.equal(true);
|
|
270
|
+
expect(fs.existsSync(path.join(tempRoot, 'script.js'))).to.equal(true);
|
|
271
|
+
expect(fs.existsSync(path.join(tempRoot, 'backgrounds', 'bg.png'))).to.equal(true);
|
|
272
|
+
expect(requestCounts['/redirect-image.png']).to.equal(1);
|
|
273
|
+
expect(requestCounts['/images/logo-download']).to.equal(1);
|
|
274
|
+
} finally {
|
|
275
|
+
await close();
|
|
276
|
+
fs.rmSync(tempRoot, { recursive: true, force: true });
|
|
277
|
+
}
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
it('handles invalid inputs, unsupported remote URLs, and failed asset downloads gracefully', async () => {
|
|
281
|
+
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'fetch-assets-errors-'));
|
|
282
|
+
const errors: string[] = [];
|
|
283
|
+
const originalError = console.error;
|
|
284
|
+
console.error = (message?: any) => errors.push(String(message));
|
|
285
|
+
|
|
286
|
+
const server = http.createServer((req, res) => {
|
|
287
|
+
if (req.url === '/broken.png') {
|
|
288
|
+
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
|
289
|
+
res.end('broken');
|
|
290
|
+
return;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
|
294
|
+
res.end('not found');
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
const { baseUrl, close } = await listen(server);
|
|
298
|
+
|
|
299
|
+
try {
|
|
300
|
+
const invalidInput = await extractAssets(123 as any, {
|
|
301
|
+
basePath: tempRoot,
|
|
302
|
+
verbose: true,
|
|
303
|
+
} as any);
|
|
304
|
+
const unsupportedProtocol = await extractAssets('ftp://example.com/file.html', {
|
|
305
|
+
basePath: tempRoot,
|
|
306
|
+
verbose: true,
|
|
307
|
+
});
|
|
308
|
+
const unresolvedRelative = await extractAssets('<img src="/missing.png" />', {
|
|
309
|
+
basePath: tempRoot,
|
|
310
|
+
verbose: true,
|
|
311
|
+
maxRetryAttempts: 1,
|
|
312
|
+
retryDelay: 0,
|
|
313
|
+
});
|
|
314
|
+
const failedDownload = await extractAssets(`<img src="${baseUrl}/broken.png" />`, {
|
|
315
|
+
basePath: tempRoot,
|
|
316
|
+
verbose: true,
|
|
317
|
+
maxRetryAttempts: 1,
|
|
318
|
+
retryDelay: 0,
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
expect(invalidInput).to.equal('');
|
|
322
|
+
expect(unsupportedProtocol).to.equal('');
|
|
323
|
+
expect(unresolvedRelative).to.equal('<img src="/missing.png" />');
|
|
324
|
+
expect(failedDownload).to.equal(`<img src="${baseUrl}/broken.png" />`);
|
|
325
|
+
expect(errors.some((message) => message.includes('Invalid user input'))).to.equal(true);
|
|
326
|
+
expect(errors.some((message) => message.includes('Invalid baseUrl'))).to.equal(true);
|
|
327
|
+
expect(errors.some((message) => message.includes('A source URL is required'))).to.equal(true);
|
|
328
|
+
expect(errors.some((message) => message.includes('HTTP error! Status: 500'))).to.equal(true);
|
|
329
|
+
} finally {
|
|
330
|
+
console.error = originalError;
|
|
331
|
+
await close();
|
|
332
|
+
fs.rmSync(tempRoot, { recursive: true, force: true });
|
|
333
|
+
}
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
it('reuses a shared asset task cache and retries transient download failures', async () => {
|
|
337
|
+
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'fetch-assets-cache-'));
|
|
338
|
+
const sharedCache = new Map();
|
|
339
|
+
const requestCounts: Record<string, number> = {};
|
|
340
|
+
|
|
341
|
+
const server = http.createServer((req, res) => {
|
|
342
|
+
const url = req.url || '/';
|
|
343
|
+
requestCounts[url] = (requestCounts[url] || 0) + 1;
|
|
344
|
+
|
|
345
|
+
if (url === '/cached.png') {
|
|
346
|
+
res.writeHead(200, { 'Content-Type': 'image/png' });
|
|
347
|
+
res.end(tinyPng);
|
|
348
|
+
return;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
if (url === '/flaky.png') {
|
|
352
|
+
if (requestCounts[url] === 1) {
|
|
353
|
+
req.socket.destroy();
|
|
354
|
+
return;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
res.writeHead(200, { 'Content-Type': 'image/png' });
|
|
358
|
+
res.end(tinyPng);
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if (url === '/broken-page') {
|
|
363
|
+
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
|
364
|
+
res.end('broken');
|
|
365
|
+
return;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
|
369
|
+
res.end('not found');
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
const { baseUrl, close } = await listen(server);
|
|
373
|
+
|
|
374
|
+
try {
|
|
375
|
+
await extractAssets(`<img src="${baseUrl}/cached.png" />`, {
|
|
376
|
+
basePath: tempRoot,
|
|
377
|
+
verbose: false,
|
|
378
|
+
maxRetryAttempts: 1,
|
|
379
|
+
retryDelay: 0,
|
|
380
|
+
_assetTaskCache: sharedCache,
|
|
381
|
+
});
|
|
382
|
+
await extractAssets(`<img src="${baseUrl}/cached.png" />`, {
|
|
383
|
+
basePath: tempRoot,
|
|
384
|
+
verbose: false,
|
|
385
|
+
maxRetryAttempts: 1,
|
|
386
|
+
retryDelay: 0,
|
|
387
|
+
_assetTaskCache: sharedCache,
|
|
388
|
+
});
|
|
389
|
+
await extractAssets(`<img src="${baseUrl}/flaky.png" />`, {
|
|
390
|
+
basePath: tempRoot,
|
|
391
|
+
verbose: false,
|
|
392
|
+
maxRetryAttempts: 2,
|
|
393
|
+
retryDelay: 0,
|
|
394
|
+
});
|
|
395
|
+
const brokenHtml = await extractAssets(`${baseUrl}/broken-page`, {
|
|
396
|
+
basePath: tempRoot,
|
|
397
|
+
verbose: false,
|
|
398
|
+
});
|
|
399
|
+
|
|
400
|
+
expect(requestCounts['/cached.png']).to.equal(1);
|
|
401
|
+
expect(requestCounts['/flaky.png']).to.equal(2);
|
|
402
|
+
expect(fs.existsSync(path.join(tempRoot, 'cached.png'))).to.equal(true);
|
|
403
|
+
expect(fs.existsSync(path.join(tempRoot, 'flaky.png'))).to.equal(true);
|
|
404
|
+
expect(brokenHtml).to.equal('');
|
|
405
|
+
} finally {
|
|
406
|
+
await close();
|
|
407
|
+
fs.rmSync(tempRoot, { recursive: true, force: true });
|
|
408
|
+
}
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
it('can upload extracted assets to mocked R2 and return asset metadata', async () => {
|
|
412
|
+
const server = http.createServer((req, res) => {
|
|
413
|
+
if (req.url === '/asset.png') {
|
|
414
|
+
res.writeHead(200, { 'Content-Type': 'image/png' });
|
|
415
|
+
res.end(tinyPng);
|
|
416
|
+
return;
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
|
420
|
+
res.end('not found');
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
const { baseUrl, close } = await listen(server);
|
|
424
|
+
process.env.HTG_R2_MOCK = '1';
|
|
425
|
+
process.env.CLOUDFLARE_R2_PUBLIC_BASE_URL = 'https://storage.example.com';
|
|
426
|
+
|
|
427
|
+
try {
|
|
428
|
+
const result = await extractAssets(`<img src="${baseUrl}/asset.png" />`, {
|
|
429
|
+
basePath: process.cwd(),
|
|
430
|
+
verbose: false,
|
|
431
|
+
uploadToR2: true,
|
|
432
|
+
returnDetails: true,
|
|
433
|
+
jobId: 'conv_123',
|
|
434
|
+
r2Prefix: 'generated/conv_123/assets',
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
expect(result.html).to.include(`${baseUrl}/asset.png`);
|
|
438
|
+
expect(result.assets).to.have.length(1);
|
|
439
|
+
expect(result.assets[0].name).to.equal('asset.png');
|
|
440
|
+
expect(result.assets[0].url).to.equal('https://storage.example.com/generated/conv_123/assets/asset.png');
|
|
441
|
+
expect(result.assets[0].path).to.equal('/generated/conv_123/assets/asset.png');
|
|
442
|
+
} finally {
|
|
443
|
+
delete process.env.HTG_R2_MOCK;
|
|
444
|
+
delete process.env.CLOUDFLARE_R2_PUBLIC_BASE_URL;
|
|
445
|
+
await close();
|
|
446
|
+
}
|
|
447
|
+
});
|
|
448
|
+
});
|