smippo 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.js +46 -1
- package/src/crawler.js +6 -0
- package/src/page-capture.js +317 -3
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "smippo",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "S.M.I.P.P.O. — Structured Mirroring of Internet Pages and Public Objects. Modern website copier that captures sites exactly as they appear in your browser.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
package/src/cli.js
CHANGED
|
@@ -89,8 +89,47 @@ export function run() {
|
|
|
89
89
|
'Wait strategy: networkidle|load|domcontentloaded',
|
|
90
90
|
'networkidle',
|
|
91
91
|
)
|
|
92
|
-
.option(
|
|
92
|
+
.option(
|
|
93
|
+
'--wait-time <ms>',
|
|
94
|
+
'Additional wait time after network idle',
|
|
95
|
+
'500',
|
|
96
|
+
)
|
|
93
97
|
.option('--timeout <ms>', 'Page load timeout', '30000')
|
|
98
|
+
|
|
99
|
+
// Scroll and reveal options (for capturing dynamic content)
|
|
100
|
+
.option(
|
|
101
|
+
'--scroll',
|
|
102
|
+
'Pre-scroll page to trigger lazy content (default: true)',
|
|
103
|
+
)
|
|
104
|
+
.option('--no-scroll', 'Disable pre-scroll behavior')
|
|
105
|
+
.option(
|
|
106
|
+
'--scroll-wait <ms>',
|
|
107
|
+
'Wait time after scrolling for animations',
|
|
108
|
+
'1000',
|
|
109
|
+
)
|
|
110
|
+
.option(
|
|
111
|
+
'--scroll-step <px>',
|
|
112
|
+
'Pixels per scroll increment (default: 200)',
|
|
113
|
+
'200',
|
|
114
|
+
)
|
|
115
|
+
.option(
|
|
116
|
+
'--scroll-delay <ms>',
|
|
117
|
+
'Delay between scroll steps (default: 50)',
|
|
118
|
+
'50',
|
|
119
|
+
)
|
|
120
|
+
.option(
|
|
121
|
+
'--scroll-behavior <type>',
|
|
122
|
+
'Scroll behavior: smooth|instant (default: smooth)',
|
|
123
|
+
'smooth',
|
|
124
|
+
)
|
|
125
|
+
.option(
|
|
126
|
+
'--reveal-all',
|
|
127
|
+
'Force reveal scroll-triggered content like GSAP, AOS (default: true)',
|
|
128
|
+
)
|
|
129
|
+
.option(
|
|
130
|
+
'--no-reveal-all',
|
|
131
|
+
'Disable force-reveal of scroll-triggered content',
|
|
132
|
+
)
|
|
94
133
|
.option('--user-agent <string>', 'Custom user agent')
|
|
95
134
|
.option('--viewport <WxH>', 'Viewport size', '1920x1080')
|
|
96
135
|
.option('--device <name>', 'Emulate device (e.g., "iPhone 13")')
|
|
@@ -312,6 +351,12 @@ async function capture(url, options) {
|
|
|
312
351
|
wait: options.wait,
|
|
313
352
|
waitTime: parseInt(options.waitTime, 10),
|
|
314
353
|
timeout: parseInt(options.timeout, 10),
|
|
354
|
+
scroll: options.scroll,
|
|
355
|
+
scrollWait: parseInt(options.scrollWait, 10),
|
|
356
|
+
scrollStep: parseInt(options.scrollStep, 10),
|
|
357
|
+
scrollDelay: parseInt(options.scrollDelay, 10),
|
|
358
|
+
scrollBehavior: options.scrollBehavior,
|
|
359
|
+
revealAll: options.revealAll,
|
|
315
360
|
userAgent: options.userAgent,
|
|
316
361
|
viewport: parseViewport(options.viewport),
|
|
317
362
|
device: options.device,
|
package/src/crawler.js
CHANGED
|
@@ -270,6 +270,12 @@ export class Crawler extends EventEmitter {
|
|
|
270
270
|
mimeExclude: this.options.mimeExclude,
|
|
271
271
|
maxSize: this.options.maxSize,
|
|
272
272
|
minSize: this.options.minSize,
|
|
273
|
+
scroll: this.options.scroll,
|
|
274
|
+
scrollWait: this.options.scrollWait,
|
|
275
|
+
scrollStep: this.options.scrollStep,
|
|
276
|
+
scrollDelay: this.options.scrollDelay,
|
|
277
|
+
scrollBehavior: this.options.scrollBehavior,
|
|
278
|
+
revealAll: this.options.revealAll,
|
|
273
279
|
});
|
|
274
280
|
|
|
275
281
|
const result = await capture.capture(url);
|
package/src/page-capture.js
CHANGED
|
@@ -35,9 +35,26 @@ export class PageCapture {
|
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
-
// Additional wait time if specified
|
|
39
|
-
|
|
40
|
-
|
|
38
|
+
// Additional wait time if specified (default 500ms for animations to start)
|
|
39
|
+
const waitTime = this.options.waitTime ?? 500;
|
|
40
|
+
if (waitTime > 0) {
|
|
41
|
+
await this.page.waitForTimeout(waitTime);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Step 1: Force reveal all scroll-triggered content
|
|
45
|
+
if (this.options.revealAll !== false) {
|
|
46
|
+
await this._revealAllContent();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Step 2: Pre-scroll the page to trigger scroll animations
|
|
50
|
+
if (this.options.scroll !== false) {
|
|
51
|
+
await this._scrollPage();
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Step 3: Additional wait after scroll for animations to complete
|
|
55
|
+
const scrollWait = this.options.scrollWait ?? 1000;
|
|
56
|
+
if (scrollWait > 0 && this.options.scroll !== false) {
|
|
57
|
+
await this.page.waitForTimeout(scrollWait);
|
|
41
58
|
}
|
|
42
59
|
|
|
43
60
|
// Get the rendered HTML
|
|
@@ -148,4 +165,301 @@ export class PageCapture {
|
|
|
148
165
|
return type === filter || type.startsWith(filter + ';');
|
|
149
166
|
});
|
|
150
167
|
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Pre-scroll the page to trigger scroll-based animations and lazy loading
|
|
171
|
+
* Performs smooth, incremental scrolling to trigger all scroll-based content
|
|
172
|
+
*/
|
|
173
|
+
async _scrollPage() {
|
|
174
|
+
const scrollBehavior = this.options.scrollBehavior || 'smooth';
|
|
175
|
+
const scrollStep = this.options.scrollStep || 200; // pixels per step
|
|
176
|
+
const scrollDelay = this.options.scrollDelay || 50; // ms between steps
|
|
177
|
+
|
|
178
|
+
/* eslint-disable no-undef */
|
|
179
|
+
await this.page.evaluate(
|
|
180
|
+
async ({step, delay, behavior}) => {
|
|
181
|
+
// Helper for smooth scrolling with requestAnimationFrame
|
|
182
|
+
const smoothScroll = (targetY, duration = 300) => {
|
|
183
|
+
return new Promise(resolve => {
|
|
184
|
+
const startY = window.scrollY;
|
|
185
|
+
const distance = targetY - startY;
|
|
186
|
+
const startTime = performance.now();
|
|
187
|
+
|
|
188
|
+
const animate = currentTime => {
|
|
189
|
+
const elapsed = currentTime - startTime;
|
|
190
|
+
const progress = Math.min(elapsed / duration, 1);
|
|
191
|
+
|
|
192
|
+
// Easing function (ease-out-cubic)
|
|
193
|
+
const eased = 1 - Math.pow(1 - progress, 3);
|
|
194
|
+
|
|
195
|
+
window.scrollTo(0, startY + distance * eased);
|
|
196
|
+
|
|
197
|
+
if (progress < 1) {
|
|
198
|
+
requestAnimationFrame(animate);
|
|
199
|
+
} else {
|
|
200
|
+
resolve();
|
|
201
|
+
}
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
requestAnimationFrame(animate);
|
|
205
|
+
});
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
// Get initial page height
|
|
209
|
+
let lastHeight = document.body.scrollHeight;
|
|
210
|
+
let currentY = 0;
|
|
211
|
+
|
|
212
|
+
// Phase 1: Scroll down incrementally
|
|
213
|
+
while (currentY < document.body.scrollHeight) {
|
|
214
|
+
const targetY = Math.min(currentY + step, document.body.scrollHeight);
|
|
215
|
+
|
|
216
|
+
if (behavior === 'smooth') {
|
|
217
|
+
await smoothScroll(targetY, delay * 2);
|
|
218
|
+
} else {
|
|
219
|
+
window.scrollTo(0, targetY);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
currentY = targetY;
|
|
223
|
+
await new Promise(r => setTimeout(r, delay));
|
|
224
|
+
|
|
225
|
+
// Check if page height increased (lazy content loaded)
|
|
226
|
+
const newHeight = document.body.scrollHeight;
|
|
227
|
+
if (newHeight > lastHeight) {
|
|
228
|
+
lastHeight = newHeight;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Phase 2: Wait at bottom for any pending lazy loads
|
|
233
|
+
await new Promise(r => setTimeout(r, 500));
|
|
234
|
+
|
|
235
|
+
// Check if more content loaded while waiting
|
|
236
|
+
if (document.body.scrollHeight > lastHeight) {
|
|
237
|
+
// Scroll to the new bottom
|
|
238
|
+
if (behavior === 'smooth') {
|
|
239
|
+
await smoothScroll(document.body.scrollHeight, 300);
|
|
240
|
+
} else {
|
|
241
|
+
window.scrollTo(0, document.body.scrollHeight);
|
|
242
|
+
}
|
|
243
|
+
await new Promise(r => setTimeout(r, 300));
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Phase 3: Scroll back up slowly (some sites have scroll-up animations)
|
|
247
|
+
const scrollUpStep = step * 2; // Faster on the way up
|
|
248
|
+
currentY = window.scrollY;
|
|
249
|
+
|
|
250
|
+
while (currentY > 0) {
|
|
251
|
+
const targetY = Math.max(currentY - scrollUpStep, 0);
|
|
252
|
+
|
|
253
|
+
if (behavior === 'smooth') {
|
|
254
|
+
await smoothScroll(targetY, delay);
|
|
255
|
+
} else {
|
|
256
|
+
window.scrollTo(0, targetY);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
currentY = targetY;
|
|
260
|
+
await new Promise(r => setTimeout(r, delay / 2));
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Phase 4: Return to top and wait
|
|
264
|
+
window.scrollTo(0, 0);
|
|
265
|
+
await new Promise(r => setTimeout(r, 200));
|
|
266
|
+
},
|
|
267
|
+
{step: scrollStep, delay: scrollDelay, behavior: scrollBehavior},
|
|
268
|
+
);
|
|
269
|
+
/* eslint-enable no-undef */
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Force reveal all scroll-triggered content by disabling/triggering
|
|
274
|
+
* common animation libraries like GSAP ScrollTrigger, AOS, etc.
|
|
275
|
+
*/
|
|
276
|
+
async _revealAllContent() {
|
|
277
|
+
/* eslint-disable no-undef */
|
|
278
|
+
await this.page.evaluate(() => {
|
|
279
|
+
// Helper to safely access nested properties
|
|
280
|
+
const safeGet = (obj, path) => {
|
|
281
|
+
try {
|
|
282
|
+
return path.split('.').reduce((o, k) => o?.[k], obj);
|
|
283
|
+
} catch {
|
|
284
|
+
return undefined;
|
|
285
|
+
}
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
// 1. GSAP ScrollTrigger - kill all triggers and show content
|
|
289
|
+
const ScrollTrigger = safeGet(window, 'ScrollTrigger');
|
|
290
|
+
if (ScrollTrigger) {
|
|
291
|
+
try {
|
|
292
|
+
// Get all ScrollTrigger instances
|
|
293
|
+
const triggers = ScrollTrigger.getAll?.() || [];
|
|
294
|
+
triggers.forEach(trigger => {
|
|
295
|
+
try {
|
|
296
|
+
// Kill the trigger to prevent it from hiding content
|
|
297
|
+
trigger.kill?.();
|
|
298
|
+
} catch (e) {
|
|
299
|
+
/* ignore */
|
|
300
|
+
}
|
|
301
|
+
});
|
|
302
|
+
// Refresh to ensure proper state
|
|
303
|
+
ScrollTrigger.refresh?.();
|
|
304
|
+
} catch (e) {
|
|
305
|
+
/* ignore */
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// Also check for gsap.ScrollTrigger
|
|
310
|
+
const gsapScrollTrigger = safeGet(window, 'gsap.ScrollTrigger');
|
|
311
|
+
if (gsapScrollTrigger && gsapScrollTrigger !== ScrollTrigger) {
|
|
312
|
+
try {
|
|
313
|
+
const triggers = gsapScrollTrigger.getAll?.() || [];
|
|
314
|
+
triggers.forEach(trigger => trigger.kill?.());
|
|
315
|
+
} catch (e) {
|
|
316
|
+
/* ignore */
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// 2. AOS (Animate On Scroll) - reveal all elements
|
|
321
|
+
const AOS = safeGet(window, 'AOS');
|
|
322
|
+
if (AOS) {
|
|
323
|
+
try {
|
|
324
|
+
// Disable AOS and show all elements
|
|
325
|
+
document.querySelectorAll('[data-aos]').forEach(el => {
|
|
326
|
+
el.classList.add('aos-animate');
|
|
327
|
+
el.style.opacity = '1';
|
|
328
|
+
el.style.transform = 'none';
|
|
329
|
+
el.style.visibility = 'visible';
|
|
330
|
+
});
|
|
331
|
+
} catch (e) {
|
|
332
|
+
/* ignore */
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// 3. WOW.js - reveal all elements
|
|
337
|
+
document.querySelectorAll('.wow').forEach(el => {
|
|
338
|
+
el.classList.add('animated');
|
|
339
|
+
el.style.visibility = 'visible';
|
|
340
|
+
el.style.opacity = '1';
|
|
341
|
+
el.style.animationName = 'none';
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
// 4. ScrollReveal - reveal all elements
|
|
345
|
+
const ScrollReveal = safeGet(window, 'ScrollReveal');
|
|
346
|
+
if (ScrollReveal) {
|
|
347
|
+
try {
|
|
348
|
+
document.querySelectorAll('[data-sr-id]').forEach(el => {
|
|
349
|
+
el.style.visibility = 'visible';
|
|
350
|
+
el.style.opacity = '1';
|
|
351
|
+
el.style.transform = 'none';
|
|
352
|
+
});
|
|
353
|
+
} catch (e) {
|
|
354
|
+
/* ignore */
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// 5. Intersection Observer based lazy loading - trigger all observers
|
|
359
|
+
// This is tricky since we can't access observers directly,
|
|
360
|
+
// but we can trigger the elements they're watching
|
|
361
|
+
|
|
362
|
+
// 6. Generic fixes for common hidden patterns
|
|
363
|
+
// Elements with opacity: 0 that are meant to fade in
|
|
364
|
+
document
|
|
365
|
+
.querySelectorAll('[style*="opacity: 0"], [style*="opacity:0"]')
|
|
366
|
+
.forEach(el => {
|
|
367
|
+
// Only reveal if it seems intentionally hidden for animation
|
|
368
|
+
const computedStyle = window.getComputedStyle(el);
|
|
369
|
+
if (
|
|
370
|
+
computedStyle.opacity === '0' &&
|
|
371
|
+
!el.hasAttribute('aria-hidden')
|
|
372
|
+
) {
|
|
373
|
+
el.style.opacity = '1';
|
|
374
|
+
}
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
// Elements with visibility: hidden that may animate in
|
|
378
|
+
document
|
|
379
|
+
.querySelectorAll(
|
|
380
|
+
'[style*="visibility: hidden"], [style*="visibility:hidden"]',
|
|
381
|
+
)
|
|
382
|
+
.forEach(el => {
|
|
383
|
+
el.style.visibility = 'visible';
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
// Elements with transform: translateY that slide in
|
|
387
|
+
document.querySelectorAll('[style*="translateY"]').forEach(el => {
|
|
388
|
+
const style = el.getAttribute('style') || '';
|
|
389
|
+
// Only fix if it looks like a scroll animation starting position
|
|
390
|
+
if (
|
|
391
|
+
style.includes('translateY(') &&
|
|
392
|
+
(style.includes('opacity') || el.classList.length > 0)
|
|
393
|
+
) {
|
|
394
|
+
el.style.transform = 'none';
|
|
395
|
+
}
|
|
396
|
+
});
|
|
397
|
+
|
|
398
|
+
// 7. Lazy-loaded images - force load
|
|
399
|
+
document
|
|
400
|
+
.querySelectorAll('img[data-src], img[data-lazy], img[loading="lazy"]')
|
|
401
|
+
.forEach(img => {
|
|
402
|
+
const src =
|
|
403
|
+
img.getAttribute('data-src') || img.getAttribute('data-lazy');
|
|
404
|
+
if (src && !img.src) {
|
|
405
|
+
img.src = src;
|
|
406
|
+
}
|
|
407
|
+
// Remove lazy loading to ensure images load
|
|
408
|
+
img.removeAttribute('loading');
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
// 8. Lazy-loaded iframes
|
|
412
|
+
document.querySelectorAll('iframe[data-src]').forEach(iframe => {
|
|
413
|
+
const src = iframe.getAttribute('data-src');
|
|
414
|
+
if (src && !iframe.src) {
|
|
415
|
+
iframe.src = src;
|
|
416
|
+
}
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
// 9. Picture elements with lazy loading
|
|
420
|
+
document
|
|
421
|
+
.querySelectorAll('picture source[data-srcset]')
|
|
422
|
+
.forEach(source => {
|
|
423
|
+
const srcset = source.getAttribute('data-srcset');
|
|
424
|
+
if (srcset) {
|
|
425
|
+
source.srcset = srcset;
|
|
426
|
+
}
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
// 10. Background images in data attributes
|
|
430
|
+
document.querySelectorAll('[data-bg], [data-background]').forEach(el => {
|
|
431
|
+
const bg =
|
|
432
|
+
el.getAttribute('data-bg') || el.getAttribute('data-background');
|
|
433
|
+
if (bg && !el.style.backgroundImage) {
|
|
434
|
+
el.style.backgroundImage = `url(${bg})`;
|
|
435
|
+
}
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
// 11. Lottie animations - try to advance to final state
|
|
439
|
+
const lottieElements = document.querySelectorAll(
|
|
440
|
+
'lottie-player, [data-lottie]',
|
|
441
|
+
);
|
|
442
|
+
lottieElements.forEach(el => {
|
|
443
|
+
try {
|
|
444
|
+
if (el.goToAndStop) {
|
|
445
|
+
el.goToAndStop(el.totalFrames - 1, true);
|
|
446
|
+
}
|
|
447
|
+
} catch (e) {
|
|
448
|
+
/* ignore */
|
|
449
|
+
}
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
// 12. Force all CSS animations to complete
|
|
453
|
+
document.querySelectorAll('*').forEach(el => {
|
|
454
|
+
const style = window.getComputedStyle(el);
|
|
455
|
+
if (style.animationName && style.animationName !== 'none') {
|
|
456
|
+
// Set animation to end state
|
|
457
|
+
el.style.animationPlayState = 'paused';
|
|
458
|
+
el.style.animationDelay = '0s';
|
|
459
|
+
el.style.animationDuration = '0.001s';
|
|
460
|
+
}
|
|
461
|
+
});
|
|
462
|
+
});
|
|
463
|
+
/* eslint-enable no-undef */
|
|
464
|
+
}
|
|
151
465
|
}
|