@kbediako/codex-orchestrator 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +7 -0
- package/README.md +238 -0
- package/dist/bin/codex-orchestrator.js +507 -0
- package/dist/orchestrator/src/agents/builder.js +16 -0
- package/dist/orchestrator/src/agents/index.js +4 -0
- package/dist/orchestrator/src/agents/planner.js +17 -0
- package/dist/orchestrator/src/agents/reviewer.js +13 -0
- package/dist/orchestrator/src/agents/tester.js +13 -0
- package/dist/orchestrator/src/cli/adapters/CommandBuilder.js +20 -0
- package/dist/orchestrator/src/cli/adapters/CommandPlanner.js +164 -0
- package/dist/orchestrator/src/cli/adapters/CommandReviewer.js +32 -0
- package/dist/orchestrator/src/cli/adapters/CommandTester.js +33 -0
- package/dist/orchestrator/src/cli/adapters/index.js +4 -0
- package/dist/orchestrator/src/cli/config/userConfig.js +28 -0
- package/dist/orchestrator/src/cli/doctor.js +48 -0
- package/dist/orchestrator/src/cli/events/runEvents.js +84 -0
- package/dist/orchestrator/src/cli/exec/command.js +56 -0
- package/dist/orchestrator/src/cli/exec/context.js +108 -0
- package/dist/orchestrator/src/cli/exec/experience.js +77 -0
- package/dist/orchestrator/src/cli/exec/finalization.js +140 -0
- package/dist/orchestrator/src/cli/exec/learning.js +62 -0
- package/dist/orchestrator/src/cli/exec/stageRunner.js +71 -0
- package/dist/orchestrator/src/cli/exec/summary.js +109 -0
- package/dist/orchestrator/src/cli/exec/telemetry.js +18 -0
- package/dist/orchestrator/src/cli/exec/tfgrpo.js +200 -0
- package/dist/orchestrator/src/cli/exec/tfgrpoArtifacts.js +19 -0
- package/dist/orchestrator/src/cli/exec/types.js +1 -0
- package/dist/orchestrator/src/cli/init.js +64 -0
- package/dist/orchestrator/src/cli/mcp.js +124 -0
- package/dist/orchestrator/src/cli/metrics/metricsAggregator.js +404 -0
- package/dist/orchestrator/src/cli/metrics/metricsRecorder.js +138 -0
- package/dist/orchestrator/src/cli/orchestrator.js +554 -0
- package/dist/orchestrator/src/cli/pipelines/defaultDiagnostics.js +32 -0
- package/dist/orchestrator/src/cli/pipelines/designReference.js +72 -0
- package/dist/orchestrator/src/cli/pipelines/hiFiDesignToolkit.js +71 -0
- package/dist/orchestrator/src/cli/pipelines/index.js +34 -0
- package/dist/orchestrator/src/cli/run/environment.js +24 -0
- package/dist/orchestrator/src/cli/run/manifest.js +367 -0
- package/dist/orchestrator/src/cli/run/manifestPersister.js +88 -0
- package/dist/orchestrator/src/cli/run/runPaths.js +30 -0
- package/dist/orchestrator/src/cli/selfCheck.js +12 -0
- package/dist/orchestrator/src/cli/services/commandRunner.js +420 -0
- package/dist/orchestrator/src/cli/services/controlPlaneService.js +107 -0
- package/dist/orchestrator/src/cli/services/execRuntime.js +69 -0
- package/dist/orchestrator/src/cli/services/pipelineResolver.js +47 -0
- package/dist/orchestrator/src/cli/services/runPreparation.js +82 -0
- package/dist/orchestrator/src/cli/services/runSummaryWriter.js +35 -0
- package/dist/orchestrator/src/cli/services/schedulerService.js +42 -0
- package/dist/orchestrator/src/cli/tasks/taskMetadata.js +19 -0
- package/dist/orchestrator/src/cli/telemetry/schema.js +8 -0
- package/dist/orchestrator/src/cli/types.js +1 -0
- package/dist/orchestrator/src/cli/ui/HudApp.js +112 -0
- package/dist/orchestrator/src/cli/ui/controller.js +26 -0
- package/dist/orchestrator/src/cli/ui/store.js +240 -0
- package/dist/orchestrator/src/cli/utils/enforcementMode.js +12 -0
- package/dist/orchestrator/src/cli/utils/fs.js +8 -0
- package/dist/orchestrator/src/cli/utils/interactive.js +25 -0
- package/dist/orchestrator/src/cli/utils/jsonlWriter.js +10 -0
- package/dist/orchestrator/src/cli/utils/optionalDeps.js +30 -0
- package/dist/orchestrator/src/cli/utils/packageInfo.js +25 -0
- package/dist/orchestrator/src/cli/utils/planFormatter.js +49 -0
- package/dist/orchestrator/src/cli/utils/runId.js +7 -0
- package/dist/orchestrator/src/cli/utils/specGuardRunner.js +26 -0
- package/dist/orchestrator/src/cli/utils/strings.js +8 -0
- package/dist/orchestrator/src/cli/utils/time.js +6 -0
- package/dist/orchestrator/src/control-plane/drift-reporter.js +109 -0
- package/dist/orchestrator/src/control-plane/index.js +3 -0
- package/dist/orchestrator/src/control-plane/request-builder.js +217 -0
- package/dist/orchestrator/src/control-plane/types.js +1 -0
- package/dist/orchestrator/src/control-plane/validator.js +50 -0
- package/dist/orchestrator/src/credentials/CredentialBroker.js +1 -0
- package/dist/orchestrator/src/events/EventBus.js +25 -0
- package/dist/orchestrator/src/learning/crystalizer.js +108 -0
- package/dist/orchestrator/src/learning/harvester.js +146 -0
- package/dist/orchestrator/src/learning/manifest.js +56 -0
- package/dist/orchestrator/src/learning/runner.js +177 -0
- package/dist/orchestrator/src/learning/validator.js +164 -0
- package/dist/orchestrator/src/logger.js +20 -0
- package/dist/orchestrator/src/manager.js +388 -0
- package/dist/orchestrator/src/persistence/ArtifactStager.js +95 -0
- package/dist/orchestrator/src/persistence/ExperienceStore.js +210 -0
- package/dist/orchestrator/src/persistence/PersistenceCoordinator.js +65 -0
- package/dist/orchestrator/src/persistence/RunManifestWriter.js +23 -0
- package/dist/orchestrator/src/persistence/TaskStateStore.js +172 -0
- package/dist/orchestrator/src/persistence/identifierGuards.js +1 -0
- package/dist/orchestrator/src/persistence/lockFile.js +26 -0
- package/dist/orchestrator/src/persistence/sanitizeIdentifier.js +26 -0
- package/dist/orchestrator/src/persistence/sanitizeRunId.js +8 -0
- package/dist/orchestrator/src/persistence/sanitizeTaskId.js +8 -0
- package/dist/orchestrator/src/persistence/writeAtomicFile.js +4 -0
- package/dist/orchestrator/src/privacy/guard.js +111 -0
- package/dist/orchestrator/src/scheduler/index.js +1 -0
- package/dist/orchestrator/src/scheduler/plan.js +171 -0
- package/dist/orchestrator/src/scheduler/types.js +1 -0
- package/dist/orchestrator/src/sync/CloudRunsClient.js +1 -0
- package/dist/orchestrator/src/sync/CloudRunsHttpClient.js +82 -0
- package/dist/orchestrator/src/sync/CloudSyncWorker.js +206 -0
- package/dist/orchestrator/src/sync/createCloudSyncWorker.js +15 -0
- package/dist/orchestrator/src/types.js +1 -0
- package/dist/orchestrator/src/utils/atomicWrite.js +15 -0
- package/dist/orchestrator/src/utils/errorMessage.js +14 -0
- package/dist/orchestrator/src/utils/executionMode.js +69 -0
- package/dist/packages/control-plane-schemas/src/index.js +1 -0
- package/dist/packages/control-plane-schemas/src/run-request.js +548 -0
- package/dist/packages/orchestrator/src/exec/handle-service.js +203 -0
- package/dist/packages/orchestrator/src/exec/session-manager.js +147 -0
- package/dist/packages/orchestrator/src/exec/unified-exec.js +432 -0
- package/dist/packages/orchestrator/src/index.js +3 -0
- package/dist/packages/orchestrator/src/instructions/loader.js +101 -0
- package/dist/packages/orchestrator/src/instructions/promptPacks.js +151 -0
- package/dist/packages/orchestrator/src/notifications/index.js +74 -0
- package/dist/packages/orchestrator/src/telemetry/otel-exporter.js +142 -0
- package/dist/packages/orchestrator/src/tool-orchestrator.js +161 -0
- package/dist/packages/sdk-node/src/orchestrator.js +195 -0
- package/dist/packages/shared/config/designConfig.js +495 -0
- package/dist/packages/shared/config/env.js +37 -0
- package/dist/packages/shared/config/index.js +2 -0
- package/dist/packages/shared/design-artifacts/writer.js +221 -0
- package/dist/packages/shared/events/serializer.js +84 -0
- package/dist/packages/shared/events/types.js +1 -0
- package/dist/packages/shared/manifest/artifactUtils.js +36 -0
- package/dist/packages/shared/manifest/designArtifacts.js +665 -0
- package/dist/packages/shared/manifest/fileIO.js +29 -0
- package/dist/packages/shared/manifest/toolRuns.js +78 -0
- package/dist/packages/shared/manifest/toolkitArtifacts.js +223 -0
- package/dist/packages/shared/manifest/types.js +5 -0
- package/dist/packages/shared/manifest/validator.js +73 -0
- package/dist/packages/shared/manifest/writer.js +2 -0
- package/dist/packages/shared/streams/stdio.js +112 -0
- package/dist/scripts/design/pipeline/advanced-assets.js +466 -0
- package/dist/scripts/design/pipeline/componentize.js +74 -0
- package/dist/scripts/design/pipeline/context.js +34 -0
- package/dist/scripts/design/pipeline/extract.js +249 -0
- package/dist/scripts/design/pipeline/optionalDeps.js +107 -0
- package/dist/scripts/design/pipeline/prepare.js +46 -0
- package/dist/scripts/design/pipeline/reference.js +94 -0
- package/dist/scripts/design/pipeline/state.js +206 -0
- package/dist/scripts/design/pipeline/toolkit/common.js +94 -0
- package/dist/scripts/design/pipeline/toolkit/extract.js +258 -0
- package/dist/scripts/design/pipeline/toolkit/publish.js +202 -0
- package/dist/scripts/design/pipeline/toolkit/publishActions.js +12 -0
- package/dist/scripts/design/pipeline/toolkit/reference.js +846 -0
- package/dist/scripts/design/pipeline/toolkit/snapshot.js +882 -0
- package/dist/scripts/design/pipeline/toolkit/tokens.js +456 -0
- package/dist/scripts/design/pipeline/visual-regression.js +137 -0
- package/dist/scripts/design/pipeline/write-artifacts.js +61 -0
- package/package.json +97 -0
- package/schemas/manifest.json +1064 -0
- package/templates/README.md +12 -0
- package/templates/codex/mcp-client.json +8 -0
|
@@ -0,0 +1,882 @@
|
|
|
1
|
+
import { Buffer } from 'node:buffer';
|
|
2
|
+
import { loadCheerio, loadPlaywright } from '../optionalDeps.js';
|
|
3
|
+
const DEFAULT_MAX_STYLESHEETS = 24;
|
|
4
|
+
const DEFAULT_VIEWPORT = { width: 1440, height: 900 };
|
|
5
|
+
const USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
6
|
+
const MIRRORABLE_RESOURCE_TYPES = new Set(['document', 'stylesheet', 'image', 'media', 'font', 'script', 'xhr']);
|
|
7
|
+
let cachedCheerio = null;
|
|
8
|
+
async function getCheerio() {
|
|
9
|
+
if (!cachedCheerio) {
|
|
10
|
+
cachedCheerio = await loadCheerio();
|
|
11
|
+
}
|
|
12
|
+
return cachedCheerio;
|
|
13
|
+
}
|
|
14
|
+
async function getPlaywright() {
|
|
15
|
+
return loadPlaywright();
|
|
16
|
+
}
|
|
17
|
+
function normalizeViewport(viewport) {
|
|
18
|
+
if (viewport && viewport.width > 0 && viewport.height > 0) {
|
|
19
|
+
return {
|
|
20
|
+
width: viewport.width,
|
|
21
|
+
height: viewport.height,
|
|
22
|
+
deviceScaleFactor: viewport.deviceScaleFactor
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
return { ...DEFAULT_VIEWPORT };
|
|
26
|
+
}
|
|
27
|
+
function getPageOrigin(value) {
|
|
28
|
+
try {
|
|
29
|
+
return new URL(value).origin;
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
return '';
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
function registerMissingAsset(options) {
|
|
36
|
+
const normalized = normalizeAssetUrl(options.url);
|
|
37
|
+
const existing = options.missing.find((item) => item.url === normalized);
|
|
38
|
+
if (existing) {
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
options.missing.push({
|
|
42
|
+
url: normalized,
|
|
43
|
+
reason: options.reason
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
function clearMissingAsset(missing, url) {
|
|
47
|
+
const normalized = normalizeAssetUrl(url);
|
|
48
|
+
const index = missing.findIndex((item) => item.url === normalized);
|
|
49
|
+
if (index >= 0) {
|
|
50
|
+
missing.splice(index, 1);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
export async function capturePageSnapshot(url, options) {
|
|
54
|
+
const mirrorAssets = Boolean(options?.mirrorAssets);
|
|
55
|
+
const allowRemoteAssets = Boolean(options?.allowRemoteAssets);
|
|
56
|
+
const viewport = normalizeViewport(options?.viewport);
|
|
57
|
+
const baseOrigin = getPageOrigin(url);
|
|
58
|
+
const playwright = await getPlaywright();
|
|
59
|
+
const browser = await playwright.chromium.launch({ headless: true });
|
|
60
|
+
const assetTasks = [];
|
|
61
|
+
const capturedAssets = [];
|
|
62
|
+
const missingAssets = [];
|
|
63
|
+
try {
|
|
64
|
+
const context = await browser.newContext({
|
|
65
|
+
userAgent: USER_AGENT,
|
|
66
|
+
viewport,
|
|
67
|
+
deviceScaleFactor: options?.viewport?.deviceScaleFactor
|
|
68
|
+
});
|
|
69
|
+
const page = await context.newPage();
|
|
70
|
+
if (mirrorAssets) {
|
|
71
|
+
page.on('response', (response) => {
|
|
72
|
+
assetTasks.push(captureResponseAsset(response, url, baseOrigin, capturedAssets, allowRemoteAssets, missingAssets));
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
await page.goto(url, { waitUntil: 'networkidle', timeout: 120_000 });
|
|
76
|
+
await page.waitForTimeout(2000);
|
|
77
|
+
if (options?.runInteractions) {
|
|
78
|
+
await runDefaultInteractions(page);
|
|
79
|
+
}
|
|
80
|
+
const runtimeMetadata = await collectRuntimeMetadata(page);
|
|
81
|
+
const html = await page.content();
|
|
82
|
+
await Promise.all(assetTasks);
|
|
83
|
+
const cheerio = await getCheerio();
|
|
84
|
+
const $ = cheerio.load(html);
|
|
85
|
+
absolutizeDocument($, url);
|
|
86
|
+
const assetRewrite = mirrorAssets ? buildAssetRewrite(capturedAssets, baseOrigin, missingAssets) : null;
|
|
87
|
+
if (assetRewrite) {
|
|
88
|
+
await ensureInlineAssets(html, url, assetRewrite, allowRemoteAssets, baseOrigin);
|
|
89
|
+
await ensurePortfolioAssets(html, url, assetRewrite, allowRemoteAssets, baseOrigin);
|
|
90
|
+
await ensureDocumentAssets($, url, assetRewrite, allowRemoteAssets, baseOrigin);
|
|
91
|
+
rewriteDocumentAssets($, url, assetRewrite.map);
|
|
92
|
+
}
|
|
93
|
+
rewriteBaseHref($, mirrorAssets ? './' : deriveOriginBase(url));
|
|
94
|
+
if (!options?.keepScripts) {
|
|
95
|
+
stripExecutableContent($);
|
|
96
|
+
}
|
|
97
|
+
const maxSheets = options?.maxStylesheets ?? DEFAULT_MAX_STYLESHEETS;
|
|
98
|
+
const stylesheetHrefs = collectStylesheetHrefs($, url, maxSheets);
|
|
99
|
+
const inlineCss = [];
|
|
100
|
+
$('style').each((_, element) => {
|
|
101
|
+
const text = $(element).text();
|
|
102
|
+
if (text && text.trim().length > 0) {
|
|
103
|
+
inlineCss.push(text);
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
for (const href of stylesheetHrefs) {
|
|
107
|
+
const cssResponse = await fetchWithHeaders(href, url);
|
|
108
|
+
if (!cssResponse.ok) {
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
const cssText = await cssResponse.text();
|
|
112
|
+
if (cssText.trim().length > 0) {
|
|
113
|
+
inlineCss.push(`/* source: ${href} */\n${cssText}`);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
let aggregatedCss = absolutizeCssUrls(inlineCss.join('\n\n'), url);
|
|
117
|
+
if (assetRewrite) {
|
|
118
|
+
await ensureCssAssets(aggregatedCss, url, assetRewrite, allowRemoteAssets, baseOrigin);
|
|
119
|
+
aggregatedCss = rewriteCssAssetUrls(aggregatedCss, url, assetRewrite.map);
|
|
120
|
+
}
|
|
121
|
+
if (aggregatedCss.trim().length > 0) {
|
|
122
|
+
$('head').append(`\n<style data-inline-source="hi-fi-toolkit">\n${aggregatedCss}\n</style>`);
|
|
123
|
+
}
|
|
124
|
+
const colorPalette = computeColorPalette(aggregatedCss).slice(0, 24);
|
|
125
|
+
const fontFamilies = computeFontFamilies(aggregatedCss).slice(0, 8);
|
|
126
|
+
const sections = summarizeSections(cheerio, $);
|
|
127
|
+
const inlineHtml = buildDocumentHtml($);
|
|
128
|
+
if (assetRewrite && assetRewrite.missing.length > 0) {
|
|
129
|
+
const missingPath = 'assets/missing-assets.json';
|
|
130
|
+
assetRewrite.assets.push({
|
|
131
|
+
sourceUrl: 'about:missing-assets',
|
|
132
|
+
relativePath: missingPath,
|
|
133
|
+
buffer: Buffer.from(JSON.stringify(assetRewrite.missing, null, 2))
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
const warnings = assetRewrite?.missing ?? missingAssets;
|
|
137
|
+
if (warnings.length > 0) {
|
|
138
|
+
console.warn(`[snapshot] Missing assets while mirroring ${url}: ${warnings.length} issue${warnings.length === 1 ? '' : 's'}`);
|
|
139
|
+
warnings.slice(0, 8).forEach((warning) => {
|
|
140
|
+
console.warn(' -', warning.url, `(${warning.reason})`);
|
|
141
|
+
});
|
|
142
|
+
// Hard fail when mirroring is requested but assets are missing, to avoid shipping incomplete offline clones.
|
|
143
|
+
if (mirrorAssets) {
|
|
144
|
+
throw new Error(`Missing ${warnings.length} mirrored assets for ${url}; enable network/allow_remote_assets or fix URLs before cloning`);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return {
|
|
148
|
+
originalHtml: html,
|
|
149
|
+
inlineHtml,
|
|
150
|
+
aggregatedCss,
|
|
151
|
+
colorPalette,
|
|
152
|
+
fontFamilies,
|
|
153
|
+
runtimeCanvasColors: runtimeMetadata.runtimeCanvasColors,
|
|
154
|
+
resolvedFonts: runtimeMetadata.resolvedFonts,
|
|
155
|
+
sections,
|
|
156
|
+
assets: assetRewrite?.assets ?? [],
|
|
157
|
+
assetWarnings: assetRewrite?.missing ?? missingAssets
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
throw new Error(`Failed to render ${url} via Playwright: ${error.message}`);
|
|
162
|
+
}
|
|
163
|
+
finally {
|
|
164
|
+
await browser.close();
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
export async function runDefaultInteractions(page) {
|
|
168
|
+
try {
|
|
169
|
+
const safeWait = (ms) => page.waitForTimeout(ms);
|
|
170
|
+
await page.mouse.wheel(0, 1400);
|
|
171
|
+
await safeWait(600);
|
|
172
|
+
await page.mouse.wheel(0, 1400);
|
|
173
|
+
await safeWait(600);
|
|
174
|
+
await page.mouse.wheel(0, -600);
|
|
175
|
+
await safeWait(400);
|
|
176
|
+
const sliderSelectors = ['[data-slider="next"]', '.swiper-button-next', '.w-slider-arrow-right', '[data-scroll="next"]'];
|
|
177
|
+
for (const selector of sliderSelectors) {
|
|
178
|
+
const handle = await page.$(selector);
|
|
179
|
+
if (handle) {
|
|
180
|
+
await handle.click().catch(() => { });
|
|
181
|
+
await safeWait(350);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
const hoverSelectors = ['[data-lottie]', 'video[autoplay]'];
|
|
185
|
+
for (const selector of hoverSelectors) {
|
|
186
|
+
const element = await page.$(selector);
|
|
187
|
+
if (element) {
|
|
188
|
+
await element.hover().catch(() => { });
|
|
189
|
+
await safeWait(200);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
await safeWait(600);
|
|
193
|
+
await page
|
|
194
|
+
.evaluate(() => {
|
|
195
|
+
const preload = new Set();
|
|
196
|
+
const normalizePath = (value) => {
|
|
197
|
+
if (!value) {
|
|
198
|
+
return null;
|
|
199
|
+
}
|
|
200
|
+
const trimmed = value.trim();
|
|
201
|
+
if (/^https?:\/\//i.test(trimmed)) {
|
|
202
|
+
return trimmed;
|
|
203
|
+
}
|
|
204
|
+
if (trimmed.startsWith('/')) {
|
|
205
|
+
return trimmed;
|
|
206
|
+
}
|
|
207
|
+
if (trimmed.startsWith('assets/')) {
|
|
208
|
+
return `/${trimmed}`;
|
|
209
|
+
}
|
|
210
|
+
return `/assets/portfolio/${trimmed}`;
|
|
211
|
+
};
|
|
212
|
+
document.querySelectorAll('[data-src]').forEach((element) => {
|
|
213
|
+
const attr = element.getAttribute('data-src');
|
|
214
|
+
const resolved = normalizePath(attr ?? '');
|
|
215
|
+
if (!resolved || preload.has(resolved)) {
|
|
216
|
+
return;
|
|
217
|
+
}
|
|
218
|
+
preload.add(resolved);
|
|
219
|
+
const img = new Image();
|
|
220
|
+
img.decoding = 'async';
|
|
221
|
+
img.src = resolved;
|
|
222
|
+
});
|
|
223
|
+
})
|
|
224
|
+
.catch(() => { });
|
|
225
|
+
}
|
|
226
|
+
catch (error) {
|
|
227
|
+
console.warn('[snapshot] interaction macro failed', error);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
async function captureResponseAsset(response, baseUrl, baseOrigin, bucket, allowRemoteAssets, missingAssets) {
|
|
231
|
+
try {
|
|
232
|
+
const request = response.request();
|
|
233
|
+
if (!MIRRORABLE_RESOURCE_TYPES.has(request.resourceType())) {
|
|
234
|
+
return;
|
|
235
|
+
}
|
|
236
|
+
const absoluteUrl = response.url();
|
|
237
|
+
const remote = !isSameOrigin(absoluteUrl, baseUrl);
|
|
238
|
+
if (!allowRemoteAssets && remote) {
|
|
239
|
+
registerMissingAsset({
|
|
240
|
+
url: absoluteUrl,
|
|
241
|
+
reason: 'remote-blocked',
|
|
242
|
+
missing: missingAssets
|
|
243
|
+
});
|
|
244
|
+
return;
|
|
245
|
+
}
|
|
246
|
+
if (!response.ok()) {
|
|
247
|
+
registerMissingAsset({
|
|
248
|
+
url: absoluteUrl,
|
|
249
|
+
reason: `http-${response.status()}`,
|
|
250
|
+
missing: missingAssets
|
|
251
|
+
});
|
|
252
|
+
return;
|
|
253
|
+
}
|
|
254
|
+
const parsed = new URL(absoluteUrl);
|
|
255
|
+
const buffer = Buffer.from(await response.body());
|
|
256
|
+
const pathname = parsed.pathname;
|
|
257
|
+
bucket.push({ url: normalizeAssetUrl(absoluteUrl), pathname, buffer, origin: parsed.origin });
|
|
258
|
+
}
|
|
259
|
+
catch (error) {
|
|
260
|
+
console.warn('[snapshot] Failed to capture asset', response.url(), 'due to', error);
|
|
261
|
+
registerMissingAsset({
|
|
262
|
+
url: response.url(),
|
|
263
|
+
reason: 'capture-error',
|
|
264
|
+
missing: missingAssets
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
function buildAssetRewrite(records, baseOrigin, missingAssets = []) {
|
|
269
|
+
const map = new Map();
|
|
270
|
+
const assets = [];
|
|
271
|
+
for (const record of records) {
|
|
272
|
+
const relativePath = buildRelativeAssetPath(record.url, record.origin ?? baseOrigin, baseOrigin);
|
|
273
|
+
if (map.has(record.url)) {
|
|
274
|
+
continue;
|
|
275
|
+
}
|
|
276
|
+
clearMissingAsset(missingAssets, record.url);
|
|
277
|
+
map.set(record.url, relativePath);
|
|
278
|
+
assets.push({ sourceUrl: record.url, relativePath, buffer: record.buffer });
|
|
279
|
+
}
|
|
280
|
+
return { assets, map, missing: missingAssets };
|
|
281
|
+
}
|
|
282
|
+
function absolutizeDocument($, baseUrl) {
|
|
283
|
+
const selectors = [
|
|
284
|
+
{ selector: 'img', attribute: 'src' },
|
|
285
|
+
{ selector: 'img', attribute: 'srcset', kind: 'srcset' },
|
|
286
|
+
{ selector: 'source', attribute: 'src' },
|
|
287
|
+
{ selector: 'source', attribute: 'srcset', kind: 'srcset' },
|
|
288
|
+
{ selector: 'video', attribute: 'poster' },
|
|
289
|
+
{ selector: 'video', attribute: 'src' },
|
|
290
|
+
{ selector: 'a', attribute: 'href' },
|
|
291
|
+
{ selector: 'link', attribute: 'href' },
|
|
292
|
+
{ selector: 'use', attribute: 'xlink:href' },
|
|
293
|
+
{ selector: 'img', attribute: 'data-src' }
|
|
294
|
+
];
|
|
295
|
+
for (const item of selectors) {
|
|
296
|
+
$(item.selector).each((_, element) => {
|
|
297
|
+
const attr = $(element).attr(item.attribute);
|
|
298
|
+
if (!attr) {
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
301
|
+
if (item.kind === 'srcset') {
|
|
302
|
+
$(element).attr(item.attribute, absolutizeSrcset(attr, baseUrl));
|
|
303
|
+
}
|
|
304
|
+
else {
|
|
305
|
+
$(element).attr(item.attribute, absolutizeUrl(attr, baseUrl));
|
|
306
|
+
}
|
|
307
|
+
});
|
|
308
|
+
}
|
|
309
|
+
if ($('head base').length === 0) {
|
|
310
|
+
$('head').prepend(`<base href="${baseUrl}">`);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
function rewriteBaseHref($, baseHref) {
|
|
314
|
+
$('head base').remove();
|
|
315
|
+
$('head').prepend(`<base href="${baseHref}">`);
|
|
316
|
+
}
|
|
317
|
+
function stripExecutableContent($) {
|
|
318
|
+
$('script').remove();
|
|
319
|
+
$('noscript').remove();
|
|
320
|
+
}
|
|
321
|
+
function collectStylesheetHrefs($, baseUrl, max) {
|
|
322
|
+
const hrefs = [];
|
|
323
|
+
$('link[rel="stylesheet"]').each((_, element) => {
|
|
324
|
+
const href = $(element).attr('href');
|
|
325
|
+
if (!href) {
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
if (hrefs.length >= max) {
|
|
329
|
+
return false;
|
|
330
|
+
}
|
|
331
|
+
hrefs.push(absolutizeUrl(href, baseUrl));
|
|
332
|
+
return undefined;
|
|
333
|
+
});
|
|
334
|
+
return hrefs;
|
|
335
|
+
}
|
|
336
|
+
function buildDocumentHtml($) {
|
|
337
|
+
const doc = $.root().html();
|
|
338
|
+
if (doc && doc.toLowerCase().startsWith('<!doctype')) {
|
|
339
|
+
return doc;
|
|
340
|
+
}
|
|
341
|
+
return `<!doctype html>\n${doc ?? ''}`;
|
|
342
|
+
}
|
|
343
|
+
function rewriteDocumentAssets($, baseUrl, assetMap) {
|
|
344
|
+
const attributes = [
|
|
345
|
+
{ selector: 'img', attribute: 'src' },
|
|
346
|
+
{ selector: 'script', attribute: 'src' },
|
|
347
|
+
{ selector: 'link', attribute: 'href' },
|
|
348
|
+
{ selector: 'video', attribute: 'poster' },
|
|
349
|
+
{ selector: 'video source', attribute: 'src' },
|
|
350
|
+
{ selector: 'source', attribute: 'src' },
|
|
351
|
+
{ selector: 'use', attribute: 'xlink:href' }
|
|
352
|
+
];
|
|
353
|
+
for (const item of attributes) {
|
|
354
|
+
$(item.selector).each((_, element) => {
|
|
355
|
+
const value = $(element).attr(item.attribute);
|
|
356
|
+
if (!value) {
|
|
357
|
+
return;
|
|
358
|
+
}
|
|
359
|
+
const absolute = absolutizeUrl(value, baseUrl);
|
|
360
|
+
const replacement = assetMap.get(normalizeAssetUrl(absolute));
|
|
361
|
+
if (replacement) {
|
|
362
|
+
$(element).attr(item.attribute, replacement.startsWith('.') ? replacement : `./${replacement}`);
|
|
363
|
+
}
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
async function ensureDocumentAssets($, baseUrl, assetRewrite, allowRemoteAssets, baseOrigin) {
|
|
368
|
+
const selectors = [
|
|
369
|
+
{ selector: 'img', attribute: 'src' },
|
|
370
|
+
{ selector: 'script', attribute: 'src' },
|
|
371
|
+
{ selector: 'link', attribute: 'href' },
|
|
372
|
+
{ selector: 'video', attribute: 'poster' },
|
|
373
|
+
{ selector: 'video source', attribute: 'src' },
|
|
374
|
+
{ selector: 'source', attribute: 'src' },
|
|
375
|
+
{ selector: 'use', attribute: 'xlink:href' }
|
|
376
|
+
];
|
|
377
|
+
for (const item of selectors) {
|
|
378
|
+
const nodes = $(item.selector);
|
|
379
|
+
for (let index = 0; index < nodes.length; index += 1) {
|
|
380
|
+
const element = nodes[index];
|
|
381
|
+
const value = $(element).attr(item.attribute);
|
|
382
|
+
if (!value) {
|
|
383
|
+
continue;
|
|
384
|
+
}
|
|
385
|
+
const absolute = absolutizeUrl(value, baseUrl);
|
|
386
|
+
await fetchMissingAsset(absolute, baseUrl, assetRewrite, allowRemoteAssets, baseOrigin);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
function rewriteCssAssetUrls(css, baseUrl, assetMap) {
|
|
391
|
+
if (!css) {
|
|
392
|
+
return css;
|
|
393
|
+
}
|
|
394
|
+
return css.replace(/url\(([^)]+)\)/gi, (match, rawValue) => {
|
|
395
|
+
const trimmed = rawValue?.trim();
|
|
396
|
+
if (!trimmed) {
|
|
397
|
+
return match;
|
|
398
|
+
}
|
|
399
|
+
const unquoted = trimmed.replace(/^['"]|['"]$/g, '');
|
|
400
|
+
if (/^(data:|mailto:|#)/i.test(unquoted)) {
|
|
401
|
+
return match;
|
|
402
|
+
}
|
|
403
|
+
const absolute = absolutizeUrl(unquoted, baseUrl);
|
|
404
|
+
const replacement = assetMap.get(normalizeAssetUrl(absolute));
|
|
405
|
+
if (!replacement) {
|
|
406
|
+
return match;
|
|
407
|
+
}
|
|
408
|
+
return `url(${replacement.startsWith('.') ? replacement : `./${replacement}`})`;
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
async function ensureInlineAssets(html, baseUrl, assetRewrite, allowRemoteAssets, baseOrigin) {
|
|
412
|
+
if (!html) {
|
|
413
|
+
return;
|
|
414
|
+
}
|
|
415
|
+
const inlineRegex = /["'`](\.{0,2}\/?assets\/[^"'`]+)["'`]/gi;
|
|
416
|
+
const pending = new Set();
|
|
417
|
+
let match;
|
|
418
|
+
while ((match = inlineRegex.exec(html)) !== null) {
|
|
419
|
+
const rawPath = match[1];
|
|
420
|
+
if (!rawPath) {
|
|
421
|
+
continue;
|
|
422
|
+
}
|
|
423
|
+
pending.add(rawPath);
|
|
424
|
+
}
|
|
425
|
+
for (const referencePath of pending) {
|
|
426
|
+
const absolute = absolutizeUrl(referencePath, baseUrl);
|
|
427
|
+
await fetchMissingAsset(absolute, baseUrl, assetRewrite, allowRemoteAssets, baseOrigin);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
async function ensurePortfolioAssets(html, baseUrl, assetRewrite, allowRemoteAssets, baseOrigin) {
|
|
431
|
+
if (!html) {
|
|
432
|
+
return;
|
|
433
|
+
}
|
|
434
|
+
const regex = /project_[^"'`\\\s]+?\.jpg/gi;
|
|
435
|
+
const matches = new Set();
|
|
436
|
+
let match;
|
|
437
|
+
while ((match = regex.exec(html)) !== null) {
|
|
438
|
+
const filename = match[0];
|
|
439
|
+
if (filename) {
|
|
440
|
+
matches.add(filename);
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
for (const filename of matches) {
|
|
444
|
+
const absolute = new URL(`/assets/portfolio/${filename}`, baseUrl).toString();
|
|
445
|
+
await fetchMissingAsset(absolute, baseUrl, assetRewrite, allowRemoteAssets, baseOrigin);
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
async function ensureCssAssets(css, baseUrl, assetRewrite, allowRemoteAssets, baseOrigin) {
|
|
449
|
+
if (!css) {
|
|
450
|
+
return;
|
|
451
|
+
}
|
|
452
|
+
const urlRegex = /url\(([^)]+)\)/gi;
|
|
453
|
+
let match;
|
|
454
|
+
while ((match = urlRegex.exec(css)) !== null) {
|
|
455
|
+
const rawValue = match[1];
|
|
456
|
+
if (!rawValue) {
|
|
457
|
+
continue;
|
|
458
|
+
}
|
|
459
|
+
const trimmed = rawValue.trim();
|
|
460
|
+
const unquoted = trimmed.replace(/^['"]|['"]$/g, '');
|
|
461
|
+
if (/^(data:|mailto:|#)/i.test(unquoted)) {
|
|
462
|
+
continue;
|
|
463
|
+
}
|
|
464
|
+
const absolute = absolutizeUrl(unquoted, baseUrl);
|
|
465
|
+
await fetchMissingAsset(absolute, baseUrl, assetRewrite, allowRemoteAssets, baseOrigin);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
function absolutizeUrl(value, baseUrl) {
|
|
469
|
+
const trimmed = value.trim();
|
|
470
|
+
if (trimmed.startsWith('http://') || trimmed.startsWith('https://') || trimmed.startsWith('data:') || trimmed.startsWith('mailto:')) {
|
|
471
|
+
return trimmed;
|
|
472
|
+
}
|
|
473
|
+
try {
|
|
474
|
+
return new URL(trimmed, baseUrl).toString();
|
|
475
|
+
}
|
|
476
|
+
catch {
|
|
477
|
+
return trimmed;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
function absolutizeSrcset(value, baseUrl) {
|
|
481
|
+
return value
|
|
482
|
+
.split(',')
|
|
483
|
+
.map((entry) => entry.trim())
|
|
484
|
+
.filter((entry) => entry.length > 0)
|
|
485
|
+
.map((entry) => {
|
|
486
|
+
const [url, descriptor] = entry.split(/\s+/, 2);
|
|
487
|
+
const absolute = absolutizeUrl(url, baseUrl);
|
|
488
|
+
return descriptor ? `${absolute} ${descriptor}` : absolute;
|
|
489
|
+
})
|
|
490
|
+
.join(', ');
|
|
491
|
+
}
|
|
492
|
+
function absolutizeCssUrls(css, baseUrl) {
|
|
493
|
+
if (!css) {
|
|
494
|
+
return css;
|
|
495
|
+
}
|
|
496
|
+
return css.replace(/url\(([^)]+)\)/gi, (fullMatch, rawValue) => {
|
|
497
|
+
const trimmed = rawValue.trim();
|
|
498
|
+
const unquoted = trimmed.replace(/^['"]|['"]$/g, '');
|
|
499
|
+
if (/^(data:|https?:|mailto:|#)/i.test(unquoted)) {
|
|
500
|
+
return `url(${trimmed})`;
|
|
501
|
+
}
|
|
502
|
+
let absolute = unquoted;
|
|
503
|
+
try {
|
|
504
|
+
absolute = new URL(unquoted, baseUrl).toString();
|
|
505
|
+
}
|
|
506
|
+
catch (error) {
|
|
507
|
+
console.warn('[snapshot] Failed to absolutize CSS url', unquoted, 'due to', error);
|
|
508
|
+
}
|
|
509
|
+
const quote = trimmed.startsWith('"') ? '"' : trimmed.startsWith('\'') ? '\'' : '';
|
|
510
|
+
return `url(${quote}${absolute}${quote})`;
|
|
511
|
+
});
|
|
512
|
+
}
|
|
513
|
+
export function computeColorPalette(css) {
|
|
514
|
+
const palette = new Set();
|
|
515
|
+
const hexRegex = /#([0-9a-fA-F]{3}|[0-9a-fA-F]{6})\b/g;
|
|
516
|
+
let match;
|
|
517
|
+
while ((match = hexRegex.exec(css)) !== null) {
|
|
518
|
+
palette.add(normalizeHex(match[0]));
|
|
519
|
+
}
|
|
520
|
+
const rgbRegex = /rgb(a)?\(([^)]+)\)/g;
|
|
521
|
+
while ((match = rgbRegex.exec(css)) !== null) {
|
|
522
|
+
const hex = rgbToHex(match[0]);
|
|
523
|
+
if (hex) {
|
|
524
|
+
palette.add(hex);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
return Array.from(palette);
|
|
528
|
+
}
|
|
529
|
+
function normalizeHex(value) {
|
|
530
|
+
let hex = value.toLowerCase();
|
|
531
|
+
if (hex.length === 4) {
|
|
532
|
+
hex = `#${hex[1]}${hex[1]}${hex[2]}${hex[2]}${hex[3]}${hex[3]}`;
|
|
533
|
+
}
|
|
534
|
+
return hex;
|
|
535
|
+
}
|
|
536
|
+
function rgbToHex(value) {
|
|
537
|
+
const parts = value
|
|
538
|
+
.replace(/rgba?\(/, '')
|
|
539
|
+
.replace(')', '')
|
|
540
|
+
.split(',')
|
|
541
|
+
.map((part) => part.trim())
|
|
542
|
+
.slice(0, 3);
|
|
543
|
+
if (parts.length !== 3) {
|
|
544
|
+
return null;
|
|
545
|
+
}
|
|
546
|
+
const channel = parts.map((part) => {
|
|
547
|
+
const numeric = Number.parseInt(part, 10);
|
|
548
|
+
if (Number.isNaN(numeric)) {
|
|
549
|
+
return null;
|
|
550
|
+
}
|
|
551
|
+
return Math.max(0, Math.min(255, numeric));
|
|
552
|
+
});
|
|
553
|
+
if (channel.some((value) => value === null)) {
|
|
554
|
+
return null;
|
|
555
|
+
}
|
|
556
|
+
const [r, g, b] = channel;
|
|
557
|
+
return `#${toHex(r)}${toHex(g)}${toHex(b)}`;
|
|
558
|
+
}
|
|
559
|
+
function toHex(value) {
|
|
560
|
+
return value.toString(16).padStart(2, '0');
|
|
561
|
+
}
|
|
562
|
+
export function computeFontFamilies(css) {
|
|
563
|
+
const regex = /font-family\s*:\s*([^;{}]+)/gi;
|
|
564
|
+
const families = new Set();
|
|
565
|
+
let match;
|
|
566
|
+
while ((match = regex.exec(css)) !== null) {
|
|
567
|
+
const value = match[1];
|
|
568
|
+
value
|
|
569
|
+
.split(',')
|
|
570
|
+
.map((segment) => segment.trim().replace(/^['"]|['"]$/g, ''))
|
|
571
|
+
.filter((segment) => segment.length > 0)
|
|
572
|
+
.forEach((segment) => families.add(segment));
|
|
573
|
+
}
|
|
574
|
+
return Array.from(families);
|
|
575
|
+
}
|
|
576
|
+
function summarizeSections(cheerio, $) {
|
|
577
|
+
const summaries = [];
|
|
578
|
+
const candidates = collectSectionCandidates($);
|
|
579
|
+
candidates.forEach((element, index) => {
|
|
580
|
+
const headingElement = $(element).find('h1, h2, h3').first().get(0);
|
|
581
|
+
const heading = headingElement ? extractSectionText(cheerio, $, headingElement) : '';
|
|
582
|
+
const text = extractSectionText(cheerio, $, element);
|
|
583
|
+
if (text.length === 0) {
|
|
584
|
+
return;
|
|
585
|
+
}
|
|
586
|
+
summaries.push({
|
|
587
|
+
title: heading || `Section ${index + 1}`,
|
|
588
|
+
description: text.slice(0, 280)
|
|
589
|
+
});
|
|
590
|
+
});
|
|
591
|
+
if (summaries.length === 0) {
|
|
592
|
+
const fallback = extractSectionText(cheerio, $, $('body').get(0)).slice(0, 280);
|
|
593
|
+
if (fallback.length > 0) {
|
|
594
|
+
summaries.push({ title: 'Page overview', description: fallback });
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
return summaries.slice(0, 8);
|
|
598
|
+
}
|
|
599
|
+
function collectSectionCandidates($) {
|
|
600
|
+
const selectors = [
|
|
601
|
+
'section',
|
|
602
|
+
'[data-section]',
|
|
603
|
+
'[data-load-stage]',
|
|
604
|
+
'[data-load-section]',
|
|
605
|
+
'[data-scroll]',
|
|
606
|
+
'[data-anchor]',
|
|
607
|
+
'.section',
|
|
608
|
+
'[class*="section"]',
|
|
609
|
+
'.w-layout-blockcontainer',
|
|
610
|
+
'main > div'
|
|
611
|
+
];
|
|
612
|
+
const seen = new Set();
|
|
613
|
+
const result = [];
|
|
614
|
+
const addElement = (element) => {
|
|
615
|
+
if (seen.has(element)) {
|
|
616
|
+
return;
|
|
617
|
+
}
|
|
618
|
+
seen.add(element);
|
|
619
|
+
result.push(element);
|
|
620
|
+
};
|
|
621
|
+
for (const selector of selectors) {
|
|
622
|
+
$(selector).each((_, element) => addElement(element));
|
|
623
|
+
if (result.length >= 12) {
|
|
624
|
+
break;
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
if (result.length === 0) {
|
|
628
|
+
$('body')
|
|
629
|
+
.children()
|
|
630
|
+
.each((_, element) => addElement(element));
|
|
631
|
+
}
|
|
632
|
+
return result;
|
|
633
|
+
}
|
|
634
|
+
function extractSectionText(cheerio, $, element) {
|
|
635
|
+
if (!element) {
|
|
636
|
+
return '';
|
|
637
|
+
}
|
|
638
|
+
const html = $(element).html();
|
|
639
|
+
if (!html) {
|
|
640
|
+
return normalizeSentenceSpacing($(element)
|
|
641
|
+
.text()
|
|
642
|
+
.replace(/\u00a0/g, ' '))
|
|
643
|
+
.replace(/\s+/g, ' ')
|
|
644
|
+
.trim();
|
|
645
|
+
}
|
|
646
|
+
const spaced = html.replace(/></g, '> <');
|
|
647
|
+
const wrapped = cheerio.load(`<root>${spaced}</root>`);
|
|
648
|
+
const rawText = wrapped('root')
|
|
649
|
+
.text()
|
|
650
|
+
.replace(/\u00a0/g, ' ');
|
|
651
|
+
return normalizeSentenceSpacing(rawText)
|
|
652
|
+
.replace(/\s+/g, ' ')
|
|
653
|
+
.trim();
|
|
654
|
+
}
|
|
655
|
+
export function normalizeSentenceSpacing(value) {
|
|
656
|
+
if (!value) {
|
|
657
|
+
return '';
|
|
658
|
+
}
|
|
659
|
+
return value.replace(/([.?!%])(?!\s)(?=[A-Za-z0-9])/g, '$1 ');
|
|
660
|
+
}
|
|
661
|
+
function isSameOrigin(candidate, baseUrl) {
|
|
662
|
+
try {
|
|
663
|
+
const candidateOrigin = new URL(candidate).origin;
|
|
664
|
+
const baseOrigin = new URL(baseUrl).origin;
|
|
665
|
+
return candidateOrigin === baseOrigin;
|
|
666
|
+
}
|
|
667
|
+
catch {
|
|
668
|
+
return false;
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
function normalizeAssetUrl(value) {
|
|
672
|
+
try {
|
|
673
|
+
const parsed = new URL(value);
|
|
674
|
+
parsed.hash = '';
|
|
675
|
+
return parsed.toString();
|
|
676
|
+
}
|
|
677
|
+
catch {
|
|
678
|
+
return value;
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
function sanitizeAssetPath(pathname) {
|
|
682
|
+
const segments = pathname
|
|
683
|
+
.split('/')
|
|
684
|
+
.map((segment) => segment.trim())
|
|
685
|
+
.filter((segment) => segment.length > 0 && segment !== '.' && segment !== '..');
|
|
686
|
+
if (segments.length === 0) {
|
|
687
|
+
return 'index.html';
|
|
688
|
+
}
|
|
689
|
+
return segments.join('/');
|
|
690
|
+
}
|
|
691
|
+
function buildRelativeAssetPath(resourceUrl, assetOrigin, baseOrigin) {
|
|
692
|
+
let parsed = null;
|
|
693
|
+
try {
|
|
694
|
+
parsed = new URL(resourceUrl);
|
|
695
|
+
}
|
|
696
|
+
catch {
|
|
697
|
+
parsed = null;
|
|
698
|
+
}
|
|
699
|
+
const safePath = sanitizeAssetPath(parsed?.pathname ?? resourceUrl);
|
|
700
|
+
const sameOrigin = assetOrigin === baseOrigin || (!assetOrigin && parsed && parsed.origin === baseOrigin);
|
|
701
|
+
const prefix = sameOrigin ? '' : `remote/${sanitizeHost(parsed?.host ?? assetOrigin)}`;
|
|
702
|
+
const qualifiedPath = prefix ? `${prefix}/${safePath}` : safePath;
|
|
703
|
+
if (qualifiedPath.length === 0) {
|
|
704
|
+
return 'index.html';
|
|
705
|
+
}
|
|
706
|
+
const lower = qualifiedPath.toLowerCase();
|
|
707
|
+
if (lower.startsWith('assets/')) {
|
|
708
|
+
return qualifiedPath;
|
|
709
|
+
}
|
|
710
|
+
if (lower.startsWith('video/')) {
|
|
711
|
+
return qualifiedPath;
|
|
712
|
+
}
|
|
713
|
+
return `assets/${qualifiedPath}`;
|
|
714
|
+
}
|
|
715
|
+
function sanitizeHost(host) {
|
|
716
|
+
if (!host) {
|
|
717
|
+
return 'external';
|
|
718
|
+
}
|
|
719
|
+
return host
|
|
720
|
+
.toLowerCase()
|
|
721
|
+
.replace(/[^a-z0-9.-]+/g, '-')
|
|
722
|
+
.replace(/-+/g, '-')
|
|
723
|
+
.replace(/^-+|-+$/g, '') || 'external';
|
|
724
|
+
}
|
|
725
|
+
async function collectRuntimeMetadata(page) {
|
|
726
|
+
try {
|
|
727
|
+
const result = await page.evaluate(() => {
|
|
728
|
+
const toHex = (value) => value.toString(16).padStart(2, '0');
|
|
729
|
+
const clamp = (value) => Math.max(0, Math.min(255, Math.round(value)));
|
|
730
|
+
const normalizeColor = (r, g, b) => `#${toHex(clamp(r))}${toHex(clamp(g))}${toHex(clamp(b))}`;
|
|
731
|
+
const canvasColors = new Set();
|
|
732
|
+
const resolvedFonts = new Set();
|
|
733
|
+
try {
|
|
734
|
+
if (document.fonts && typeof document.fonts.forEach === 'function') {
|
|
735
|
+
document.fonts.forEach((font) => {
|
|
736
|
+
if (font && typeof font.family === 'string') {
|
|
737
|
+
const cleaned = font.family.trim().replace(/^['"]|['"]$/g, '');
|
|
738
|
+
if (cleaned) {
|
|
739
|
+
resolvedFonts.add(cleaned);
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
});
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
catch {
|
|
746
|
+
// Ignore document.fonts access errors.
|
|
747
|
+
}
|
|
748
|
+
const canvases = Array.from(document.querySelectorAll('canvas'));
|
|
749
|
+
for (const canvas of canvases) {
|
|
750
|
+
let ctx = null;
|
|
751
|
+
try {
|
|
752
|
+
ctx = typeof canvas.getContext === 'function' ? canvas.getContext('2d') : null;
|
|
753
|
+
}
|
|
754
|
+
catch {
|
|
755
|
+
ctx = null;
|
|
756
|
+
}
|
|
757
|
+
if (!ctx) {
|
|
758
|
+
continue;
|
|
759
|
+
}
|
|
760
|
+
const width = canvas.width || canvas.clientWidth || 0;
|
|
761
|
+
const height = canvas.height || canvas.clientHeight || 0;
|
|
762
|
+
if (width === 0 || height === 0) {
|
|
763
|
+
continue;
|
|
764
|
+
}
|
|
765
|
+
const samples = [
|
|
766
|
+
[0, 0],
|
|
767
|
+
[Math.floor(width / 2), Math.floor(height / 2)],
|
|
768
|
+
[Math.max(0, width - 1), Math.max(0, height - 1)]
|
|
769
|
+
];
|
|
770
|
+
for (const [x, y] of samples) {
|
|
771
|
+
try {
|
|
772
|
+
const data = ctx.getImageData(x, y, 1, 1).data;
|
|
773
|
+
if (data && data[3] > 0) {
|
|
774
|
+
canvasColors.add(normalizeColor(data[0], data[1], data[2]));
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
catch {
|
|
778
|
+
break;
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
return {
|
|
783
|
+
runtimeCanvasColors: Array.from(canvasColors),
|
|
784
|
+
resolvedFonts: Array.from(resolvedFonts)
|
|
785
|
+
};
|
|
786
|
+
});
|
|
787
|
+
return {
|
|
788
|
+
runtimeCanvasColors: result.runtimeCanvasColors.slice(0, 12),
|
|
789
|
+
resolvedFonts: result.resolvedFonts.slice(0, 24)
|
|
790
|
+
};
|
|
791
|
+
}
|
|
792
|
+
catch (error) {
|
|
793
|
+
console.warn('[snapshot] Failed to collect runtime metadata', error);
|
|
794
|
+
return { runtimeCanvasColors: [], resolvedFonts: [] };
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
function deriveOriginBase(pageUrl) {
|
|
798
|
+
const origin = new URL(pageUrl).origin.replace(/\/$/, '');
|
|
799
|
+
return `${origin}/`;
|
|
800
|
+
}
|
|
801
|
+
async function fetchWithHeaders(url, referer) {
|
|
802
|
+
const headers = {
|
|
803
|
+
'user-agent': USER_AGENT,
|
|
804
|
+
'accept-language': 'en-US,en;q=0.9',
|
|
805
|
+
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
|
806
|
+
};
|
|
807
|
+
if (referer) {
|
|
808
|
+
headers.referer = referer;
|
|
809
|
+
}
|
|
810
|
+
return fetch(url, { headers });
|
|
811
|
+
}
|
|
812
|
+
async function fetchMissingAsset(absoluteUrl, referer, assetRewrite, allowRemoteAssets, baseOrigin) {
|
|
813
|
+
const normalized = normalizeAssetUrl(absoluteUrl);
|
|
814
|
+
if (assetRewrite.map.has(normalized)) {
|
|
815
|
+
return;
|
|
816
|
+
}
|
|
817
|
+
let parsed;
|
|
818
|
+
try {
|
|
819
|
+
parsed = new URL(absoluteUrl);
|
|
820
|
+
}
|
|
821
|
+
catch {
|
|
822
|
+
registerMissingAsset({
|
|
823
|
+
url: absoluteUrl,
|
|
824
|
+
reason: 'invalid-url',
|
|
825
|
+
missing: assetRewrite.missing
|
|
826
|
+
});
|
|
827
|
+
return;
|
|
828
|
+
}
|
|
829
|
+
if (!allowRemoteAssets && !isSameOrigin(absoluteUrl, referer)) {
|
|
830
|
+
registerMissingAsset({
|
|
831
|
+
url: absoluteUrl,
|
|
832
|
+
reason: 'remote-blocked',
|
|
833
|
+
missing: assetRewrite.missing
|
|
834
|
+
});
|
|
835
|
+
return;
|
|
836
|
+
}
|
|
837
|
+
if (parsed.protocol === 'file:') {
|
|
838
|
+
return;
|
|
839
|
+
}
|
|
840
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
|
841
|
+
return;
|
|
842
|
+
}
|
|
843
|
+
try {
|
|
844
|
+
const response = await fetchWithHeaders(parsed.toString(), referer);
|
|
845
|
+
if (!response.ok) {
|
|
846
|
+
registerMissingAsset({
|
|
847
|
+
url: absoluteUrl,
|
|
848
|
+
reason: `http-${response.status()}`,
|
|
849
|
+
missing: assetRewrite.missing
|
|
850
|
+
});
|
|
851
|
+
return;
|
|
852
|
+
}
|
|
853
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
854
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
855
|
+
const pathname = parsed.pathname;
|
|
856
|
+
const filename = pathname.split('/').pop() ?? '';
|
|
857
|
+
if (!filename.includes('.')) {
|
|
858
|
+
registerMissingAsset({
|
|
859
|
+
url: absoluteUrl,
|
|
860
|
+
reason: 'unqualified-path',
|
|
861
|
+
missing: assetRewrite.missing
|
|
862
|
+
});
|
|
863
|
+
return;
|
|
864
|
+
}
|
|
865
|
+
const relativePath = buildRelativeAssetPath(parsed.toString(), parsed.origin, baseOrigin);
|
|
866
|
+
clearMissingAsset(assetRewrite.missing, normalized);
|
|
867
|
+
assetRewrite.map.set(normalized, relativePath);
|
|
868
|
+
assetRewrite.assets.push({
|
|
869
|
+
sourceUrl: normalized,
|
|
870
|
+
relativePath,
|
|
871
|
+
buffer
|
|
872
|
+
});
|
|
873
|
+
}
|
|
874
|
+
catch (error) {
|
|
875
|
+
console.warn('[snapshot] Failed to fetch referenced asset', absoluteUrl, 'due to', error);
|
|
876
|
+
registerMissingAsset({
|
|
877
|
+
url: absoluteUrl,
|
|
878
|
+
reason: 'fetch-error',
|
|
879
|
+
missing: assetRewrite.missing
|
|
880
|
+
});
|
|
881
|
+
}
|
|
882
|
+
}
|