entari-plugin-hyw 4.0.0rc6__py3-none-any.whl → 4.0.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (114) hide show
  1. entari_plugin_hyw/Untitled-1 +1865 -0
  2. entari_plugin_hyw/__init__.py +733 -379
  3. entari_plugin_hyw/history.py +60 -57
  4. entari_plugin_hyw/misc.py +3 -0
  5. entari_plugin_hyw/search_cache.py +154 -0
  6. {entari_plugin_hyw-4.0.0rc6.dist-info → entari_plugin_hyw-4.0.0rc8.dist-info}/METADATA +3 -1
  7. entari_plugin_hyw-4.0.0rc8.dist-info/RECORD +68 -0
  8. {entari_plugin_hyw-4.0.0rc6.dist-info → entari_plugin_hyw-4.0.0rc8.dist-info}/WHEEL +1 -1
  9. {entari_plugin_hyw-4.0.0rc6.dist-info → entari_plugin_hyw-4.0.0rc8.dist-info}/top_level.txt +1 -0
  10. hyw_core/__init__.py +94 -0
  11. hyw_core/browser_control/__init__.py +65 -0
  12. hyw_core/browser_control/assets/card-dist/index.html +409 -0
  13. hyw_core/browser_control/assets/index.html +5691 -0
  14. hyw_core/browser_control/engines/__init__.py +17 -0
  15. hyw_core/browser_control/engines/default.py +166 -0
  16. {entari_plugin_hyw/browser → hyw_core/browser_control}/engines/duckduckgo.py +42 -8
  17. {entari_plugin_hyw/browser → hyw_core/browser_control}/engines/google.py +1 -1
  18. {entari_plugin_hyw/browser → hyw_core/browser_control}/manager.py +15 -8
  19. entari_plugin_hyw/render_vue.py → hyw_core/browser_control/renderer.py +29 -14
  20. hyw_core/browser_control/service.py +720 -0
  21. hyw_core/config.py +154 -0
  22. hyw_core/core.py +322 -0
  23. hyw_core/definitions.py +83 -0
  24. entari_plugin_hyw/modular_pipeline.py → hyw_core/pipeline.py +204 -86
  25. {entari_plugin_hyw → hyw_core}/search.py +60 -19
  26. hyw_core/stages/__init__.py +21 -0
  27. entari_plugin_hyw/stage_base.py → hyw_core/stages/base.py +3 -0
  28. entari_plugin_hyw/stage_summary.py → hyw_core/stages/summary.py +36 -7
  29. entari_plugin_hyw/assets/card-dist/index.html +0 -387
  30. entari_plugin_hyw/browser/__init__.py +0 -10
  31. entari_plugin_hyw/browser/engines/bing.py +0 -95
  32. entari_plugin_hyw/browser/service.py +0 -304
  33. entari_plugin_hyw/card-ui/.gitignore +0 -24
  34. entari_plugin_hyw/card-ui/README.md +0 -5
  35. entari_plugin_hyw/card-ui/index.html +0 -16
  36. entari_plugin_hyw/card-ui/package-lock.json +0 -2342
  37. entari_plugin_hyw/card-ui/package.json +0 -31
  38. entari_plugin_hyw/card-ui/public/logos/anthropic.svg +0 -1
  39. entari_plugin_hyw/card-ui/public/logos/cerebras.svg +0 -9
  40. entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
  41. entari_plugin_hyw/card-ui/public/logos/gemini.svg +0 -1
  42. entari_plugin_hyw/card-ui/public/logos/google.svg +0 -1
  43. entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
  44. entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
  45. entari_plugin_hyw/card-ui/public/logos/microsoft.svg +0 -15
  46. entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
  47. entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
  48. entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
  49. entari_plugin_hyw/card-ui/public/logos/openai.svg +0 -1
  50. entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
  51. entari_plugin_hyw/card-ui/public/logos/perplexity.svg +0 -24
  52. entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
  53. entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
  54. entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
  55. entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
  56. entari_plugin_hyw/card-ui/public/vite.svg +0 -1
  57. entari_plugin_hyw/card-ui/src/App.vue +0 -756
  58. entari_plugin_hyw/card-ui/src/assets/vue.svg +0 -1
  59. entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +0 -41
  60. entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +0 -382
  61. entari_plugin_hyw/card-ui/src/components/SectionCard.vue +0 -41
  62. entari_plugin_hyw/card-ui/src/components/StageCard.vue +0 -240
  63. entari_plugin_hyw/card-ui/src/main.ts +0 -5
  64. entari_plugin_hyw/card-ui/src/style.css +0 -29
  65. entari_plugin_hyw/card-ui/src/test_regex.js +0 -103
  66. entari_plugin_hyw/card-ui/src/types.ts +0 -61
  67. entari_plugin_hyw/card-ui/tsconfig.app.json +0 -16
  68. entari_plugin_hyw/card-ui/tsconfig.json +0 -7
  69. entari_plugin_hyw/card-ui/tsconfig.node.json +0 -26
  70. entari_plugin_hyw/card-ui/vite.config.ts +0 -16
  71. entari_plugin_hyw/definitions.py +0 -155
  72. entari_plugin_hyw/stage_instruct.py +0 -345
  73. entari_plugin_hyw/stage_instruct_deepsearch.py +0 -104
  74. entari_plugin_hyw-4.0.0rc6.dist-info/RECORD +0 -100
  75. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/anthropic.svg +0 -0
  76. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/cerebras.svg +0 -0
  77. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/deepseek.png +0 -0
  78. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/gemini.svg +0 -0
  79. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/google.svg +0 -0
  80. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/grok.png +0 -0
  81. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/huggingface.png +0 -0
  82. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/microsoft.svg +0 -0
  83. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/minimax.png +0 -0
  84. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/mistral.png +0 -0
  85. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/nvida.png +0 -0
  86. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/openai.svg +0 -0
  87. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/openrouter.png +0 -0
  88. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/perplexity.svg +0 -0
  89. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/qwen.png +0 -0
  90. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/xai.png +0 -0
  91. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/xiaomi.png +0 -0
  92. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/zai.png +0 -0
  93. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/vite.svg +0 -0
  94. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/anthropic.svg +0 -0
  95. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/cerebras.svg +0 -0
  96. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/deepseek.png +0 -0
  97. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/gemini.svg +0 -0
  98. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/google.svg +0 -0
  99. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/grok.png +0 -0
  100. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/huggingface.png +0 -0
  101. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/microsoft.svg +0 -0
  102. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/minimax.png +0 -0
  103. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/mistral.png +0 -0
  104. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/nvida.png +0 -0
  105. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/openai.svg +0 -0
  106. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/openrouter.png +0 -0
  107. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/perplexity.svg +0 -0
  108. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/qwen.png +0 -0
  109. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/xai.png +0 -0
  110. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/xiaomi.png +0 -0
  111. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/zai.png +0 -0
  112. {entari_plugin_hyw/browser → hyw_core/browser_control}/engines/base.py +0 -0
  113. {entari_plugin_hyw/browser → hyw_core/browser_control}/landing.html +0 -0
  114. {entari_plugin_hyw → hyw_core}/image_cache.py +0 -0
@@ -0,0 +1,1865 @@
1
+ [Crawl4AI 文档 (v0.7.x)](https://crawl4ai.docslib.dev/)
2
+ -
3
+ [首页](https://crawl4ai.docslib.dev/)
4
+
5
+
6
+ -
7
+ [询问AI](https://crawl4ai.docslib.dev/core/ask-ai/)
8
+
9
+
10
+ -
11
+ [快速开始](https://crawl4ai.docslib.dev/core/quickstart/)
12
+
13
+
14
+ -
15
+ [代码示例](https://crawl4ai.docslib.dev/core/examples/)
16
+
17
+
18
+ -
19
+ [Search](https://crawl4ai.docslib.dev/advanced/lazy-loading/)
20
+
21
+
22
+ [首页](https://crawl4ai.docslib.dev/)
23
+ [询问AI](https://crawl4ai.docslib.dev/core/ask-ai/)
24
+ [快速开始](https://crawl4ai.docslib.dev/core/quickstart/)
25
+ [代码示例](https://crawl4ai.docslib.dev/core/examples/)
26
+ [Search](https://crawl4ai.docslib.dev/advanced/lazy-loading/)
27
+ -
28
+
29
+
30
+
31
+
32
+
33
+ [首页](https://crawl4ai.docslib.dev/)
34
+
35
+
36
+
37
+
38
+
39
+ -
40
+
41
+
42
+
43
+
44
+
45
+ [询问AI](https://crawl4ai.docslib.dev/core/ask-ai/)
46
+
47
+
48
+
49
+
50
+
51
+ -
52
+
53
+
54
+
55
+
56
+
57
+ [快速开始](https://crawl4ai.docslib.dev/core/quickstart/)
58
+
59
+
60
+
61
+
62
+
63
+ -
64
+
65
+
66
+
67
+
68
+
69
+ [代码示例](https://crawl4ai.docslib.dev/core/examples/)
70
+
71
+
72
+
73
+
74
+
75
+ -
76
+
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+ 应用程序
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+ [演示应用](https://crawl4ai.docslib.dev/apps/)
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+ [C4A脚本编辑器](https://crawl4ai.docslib.dev/apps/c4a-script/index.html)
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+ [LLM上下文构建器](https://crawl4ai.docslib.dev/apps/llmtxt/index.html)
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+ -
144
+
145
+
146
+
147
+ [演示应用](https://crawl4ai.docslib.dev/apps/)
148
+
149
+
150
+
151
+ -
152
+
153
+
154
+
155
+ [C4A脚本编辑器](https://crawl4ai.docslib.dev/apps/c4a-script/index.html)
156
+
157
+
158
+
159
+ -
160
+
161
+
162
+
163
+ [LLM上下文构建器](https://crawl4ai.docslib.dev/apps/llmtxt/index.html)
164
+
165
+
166
+
167
+ -
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+
180
+ 设置与[安装](https://crawl4ai.docslib.dev/core/installation/)
181
+
182
+
183
+
184
+
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+ 安装
198
+
199
+
200
+
201
+
202
+
203
+
204
+
205
+
206
+
207
+
208
+
209
+
210
+
211
+ [Docker部署](https://crawl4ai.docslib.dev/core/docker-deployment/)
212
+
213
+
214
+
215
+
216
+
217
+
218
+
219
+
220
+
221
+ -
222
+
223
+
224
+
225
+ [安装](https://crawl4ai.docslib.dev/core/installation/)
226
+
227
+
228
+
229
+ -
230
+
231
+
232
+
233
+ [Docker部署](https://crawl4ai.docslib.dev/core/docker-deployment/)
234
+
235
+
236
+
237
+ -
238
+
239
+
240
+
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+
249
+
250
+ 博客与[变更日志](https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md)
251
+
252
+
253
+
254
+
255
+
256
+
257
+
258
+
259
+
260
+
261
+
262
+
263
+
264
+
265
+
266
+
267
+ [博客首页](https://crawl4ai.docslib.dev/blog/)
268
+
269
+
270
+
271
+
272
+
273
+
274
+
275
+
276
+
277
+
278
+
279
+
280
+
281
+ 变更日志
282
+
283
+
284
+
285
+
286
+
287
+
288
+
289
+
290
+
291
+ -
292
+
293
+
294
+
295
+ [博客首页](https://crawl4ai.docslib.dev/blog/)
296
+
297
+
298
+
299
+ -
300
+
301
+
302
+
303
+ [变更日志](https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md)
304
+
305
+
306
+
307
+ -
308
+
309
+
310
+
311
+
312
+
313
+
314
+
315
+
316
+
317
+
318
+
319
+
320
+ 核心
321
+
322
+
323
+
324
+
325
+
326
+
327
+
328
+
329
+
330
+
331
+
332
+
333
+
334
+
335
+
336
+
337
+ [命令行界面](https://crawl4ai.docslib.dev/core/cli/)
338
+
339
+
340
+
341
+
342
+
343
+
344
+
345
+
346
+
347
+
348
+
349
+
350
+
351
+ [简单爬取](https://crawl4ai.docslib.dev/core/simple-crawling/)
352
+
353
+
354
+
355
+
356
+
357
+
358
+
359
+
360
+
361
+
362
+
363
+
364
+
365
+ [深度爬取](https://crawl4ai.docslib.dev/core/deep-crawling/)
366
+
367
+
368
+
369
+
370
+
371
+
372
+
373
+
374
+
375
+
376
+
377
+
378
+
379
+ [自适应爬取](https://crawl4ai.docslib.dev/core/adaptive-crawling/)
380
+
381
+
382
+
383
+
384
+
385
+
386
+
387
+
388
+
389
+
390
+
391
+
392
+
393
+ [URL播种](https://crawl4ai.docslib.dev/core/url-seeding/)
394
+
395
+
396
+
397
+
398
+
399
+
400
+
401
+
402
+
403
+
404
+
405
+
406
+
407
+ [C4A脚本](https://crawl4ai.docslib.dev/core/c4a-script/)
408
+
409
+
410
+
411
+
412
+
413
+
414
+
415
+
416
+
417
+
418
+
419
+
420
+
421
+ [爬虫结果](https://crawl4ai.docslib.dev/core/crawler-result/)
422
+
423
+
424
+
425
+
426
+
427
+
428
+
429
+
430
+
431
+
432
+
433
+
434
+
435
+ [浏览器、爬虫和LLM配置](https://crawl4ai.docslib.dev/core/browser-crawler-config/)
436
+
437
+
438
+
439
+
440
+
441
+
442
+
443
+
444
+
445
+
446
+
447
+
448
+
449
+ [Markdown生成](https://crawl4ai.docslib.dev/core/markdown-generation/)
450
+
451
+
452
+
453
+
454
+
455
+
456
+
457
+
458
+
459
+
460
+
461
+
462
+
463
+ [适合的Markdown](https://crawl4ai.docslib.dev/core/fit-markdown/)
464
+
465
+
466
+
467
+
468
+
469
+
470
+
471
+
472
+
473
+
474
+
475
+
476
+
477
+ [页面交互](https://crawl4ai.docslib.dev/core/page-interaction/)
478
+
479
+
480
+
481
+
482
+
483
+
484
+
485
+
486
+
487
+
488
+
489
+
490
+
491
+ [内容选择](https://crawl4ai.docslib.dev/core/content-selection/)
492
+
493
+
494
+
495
+
496
+
497
+
498
+
499
+
500
+
501
+
502
+
503
+
504
+
505
+ [缓存模式](https://crawl4ai.docslib.dev/core/cache-modes/)
506
+
507
+
508
+
509
+
510
+
511
+
512
+
513
+
514
+
515
+
516
+
517
+
518
+
519
+ [本地文件和原始HTML](https://crawl4ai.docslib.dev/core/local-files/)
520
+
521
+
522
+
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+
533
+ [链接与媒体](https://crawl4ai.docslib.dev/core/link-media/)
534
+
535
+
536
+
537
+
538
+
539
+
540
+
541
+
542
+
543
+ -
544
+
545
+
546
+
547
+ [命令行界面](https://crawl4ai.docslib.dev/core/cli/)
548
+
549
+
550
+
551
+ -
552
+
553
+
554
+
555
+ [简单爬取](https://crawl4ai.docslib.dev/core/simple-crawling/)
556
+
557
+
558
+
559
+ -
560
+
561
+
562
+
563
+ [深度爬取](https://crawl4ai.docslib.dev/core/deep-crawling/)
564
+
565
+
566
+
567
+ -
568
+
569
+
570
+
571
+ [自适应爬取](https://crawl4ai.docslib.dev/core/adaptive-crawling/)
572
+
573
+
574
+
575
+ -
576
+
577
+
578
+
579
+ [URL播种](https://crawl4ai.docslib.dev/core/url-seeding/)
580
+
581
+
582
+
583
+ -
584
+
585
+
586
+
587
+ [C4A脚本](https://crawl4ai.docslib.dev/core/c4a-script/)
588
+
589
+
590
+
591
+ -
592
+
593
+
594
+
595
+ [爬虫结果](https://crawl4ai.docslib.dev/core/crawler-result/)
596
+
597
+
598
+
599
+ -
600
+
601
+ [浏览器、爬虫和LLM配置](https://crawl4ai.docslib.dev/core/browser-crawler-config/)
602
+
603
+
604
+
605
+ -
606
+
607
+
608
+
609
+ [Markdown生成](https://crawl4ai.docslib.dev/core/markdown-generation/)
610
+
611
+
612
+
613
+ -
614
+
615
+
616
+
617
+ [适合的Markdown](https://crawl4ai.docslib.dev/core/fit-markdown/)
618
+
619
+
620
+
621
+ -
622
+
623
+
624
+
625
+ [页面交互](https://crawl4ai.docslib.dev/core/page-interaction/)
626
+
627
+
628
+
629
+ -
630
+
631
+
632
+
633
+ [内容选择](https://crawl4ai.docslib.dev/core/content-selection/)
634
+
635
+
636
+
637
+ -
638
+
639
+
640
+
641
+ [缓存模式](https://crawl4ai.docslib.dev/core/cache-modes/)
642
+
643
+
644
+
645
+ -
646
+
647
+
648
+
649
+ [本地文件和原始HTML](https://crawl4ai.docslib.dev/core/local-files/)
650
+
651
+
652
+
653
+ -
654
+
655
+
656
+
657
+ [链接与媒体](https://crawl4ai.docslib.dev/core/link-media/)
658
+
659
+
660
+
661
+ -
662
+
663
+
664
+
665
+
666
+
667
+
668
+
669
+
670
+
671
+ 高级
672
+
673
+
674
+
675
+
676
+
677
+
678
+
679
+
680
+
681
+
682
+
683
+
684
+
685
+
686
+
687
+ [概述](https://crawl4ai.docslib.dev/advanced/advanced-features.md)
688
+
689
+
690
+
691
+
692
+
693
+
694
+
695
+
696
+
697
+
698
+
699
+
700
+
701
+ [自适应策略](https://crawl4ai.docslib.dev/advanced/adaptive-strategies/)
702
+
703
+
704
+
705
+
706
+
707
+
708
+
709
+
710
+
711
+
712
+
713
+
714
+
715
+ [虚拟滚动](https://crawl4ai.docslib.dev/advanced/virtual-scroll/)
716
+
717
+
718
+
719
+
720
+
721
+
722
+
723
+
724
+
725
+
726
+
727
+
728
+
729
+ [文件下载](https://crawl4ai.docslib.dev/advanced/file-downloading/)
730
+
731
+
732
+
733
+
734
+
735
+
736
+
737
+
738
+
739
+
740
+
741
+
742
+ 懒加载
743
+
744
+
745
+
746
+
747
+
748
+
749
+
750
+
751
+
752
+
753
+
754
+
755
+ [钩子与认证](https://crawl4ai.docslib.dev/advanced/hooks-auth/)
756
+
757
+
758
+
759
+
760
+
761
+
762
+
763
+
764
+
765
+
766
+
767
+
768
+
769
+ [代理与安全](https://crawl4ai.docslib.dev/advanced/proxy-security/)
770
+
771
+
772
+
773
+
774
+
775
+
776
+
777
+
778
+
779
+
780
+
781
+
782
+
783
+ [无痕浏览器](https://crawl4ai.docslib.dev/advanced/undetected-browser/)
784
+
785
+
786
+
787
+
788
+
789
+
790
+
791
+
792
+
793
+
794
+
795
+
796
+
797
+ [会话管理](https://crawl4ai.docslib.dev/advanced/session-management/)
798
+
799
+
800
+
801
+
802
+
803
+
804
+
805
+
806
+
807
+
808
+
809
+
810
+
811
+ [多URL爬取](https://crawl4ai.docslib.dev/advanced/multi-url-crawling/)
812
+
813
+
814
+
815
+
816
+
817
+
818
+
819
+
820
+
821
+
822
+
823
+
824
+
825
+ [爬虫调度器](https://crawl4ai.docslib.dev/advanced/crawl-dispatcher/)
826
+
827
+
828
+
829
+
830
+
831
+
832
+
833
+
834
+
835
+
836
+
837
+
838
+
839
+ [基于身份的爬取](https://crawl4ai.docslib.dev/advanced/identity-based-crawling/)
840
+
841
+
842
+
843
+
844
+
845
+
846
+
847
+
848
+
849
+
850
+
851
+
852
+
853
+ [SSL证书](https://crawl4ai.docslib.dev/advanced/ssl-certificate/)
854
+
855
+
856
+
857
+
858
+
859
+
860
+
861
+
862
+
863
+
864
+
865
+
866
+
867
+ [网络与控制台捕获](https://crawl4ai.docslib.dev/advanced/network-console-capture/)
868
+
869
+
870
+
871
+
872
+
873
+
874
+
875
+
876
+
877
+
878
+
879
+
880
+
881
+ [PDF解析](https://crawl4ai.docslib.dev/advanced/pdf-parsing/)
882
+
883
+
884
+
885
+
886
+
887
+
888
+
889
+
890
+
891
+ -
892
+
893
+
894
+
895
+ [概述](https://crawl4ai.docslib.dev/advanced/advanced-features.md)
896
+
897
+
898
+
899
+ -
900
+
901
+
902
+
903
+ [自适应策略](https://crawl4ai.docslib.dev/advanced/adaptive-strategies/)
904
+
905
+
906
+
907
+ -
908
+
909
+
910
+
911
+ [虚拟滚动](https://crawl4ai.docslib.dev/advanced/virtual-scroll/)
912
+
913
+
914
+
915
+ -
916
+
917
+
918
+
919
+ [文件下载](https://crawl4ai.docslib.dev/advanced/file-downloading/)
920
+
921
+
922
+
923
+ -
924
+
925
+
926
+ 懒加载
927
+
928
+
929
+ -
930
+
931
+
932
+
933
+ [钩子与认证](https://crawl4ai.docslib.dev/advanced/hooks-auth/)
934
+
935
+
936
+
937
+ -
938
+
939
+
940
+
941
+ [代理与安全](https://crawl4ai.docslib.dev/advanced/proxy-security/)
942
+
943
+
944
+
945
+ -
946
+
947
+
948
+
949
+ [无痕浏览器](https://crawl4ai.docslib.dev/advanced/undetected-browser/)
950
+
951
+
952
+
953
+ -
954
+
955
+
956
+
957
+ [会话管理](https://crawl4ai.docslib.dev/advanced/session-management/)
958
+
959
+
960
+
961
+ -
962
+
963
+
964
+
965
+ [多URL爬取](https://crawl4ai.docslib.dev/advanced/multi-url-crawling/)
966
+
967
+
968
+
969
+ -
970
+
971
+
972
+
973
+ [爬虫调度器](https://crawl4ai.docslib.dev/advanced/crawl-dispatcher/)
974
+
975
+
976
+
977
+ -
978
+
979
+
980
+
981
+ [基于身份的爬取](https://crawl4ai.docslib.dev/advanced/identity-based-crawling/)
982
+
983
+
984
+
985
+ -
986
+
987
+
988
+
989
+ [SSL证书](https://crawl4ai.docslib.dev/advanced/ssl-certificate/)
990
+
991
+
992
+
993
+ -
994
+
995
+
996
+
997
+ [网络与控制台捕获](https://crawl4ai.docslib.dev/advanced/network-console-capture/)
998
+
999
+
1000
+
1001
+ -
1002
+
1003
+
1004
+
1005
+ [PDF解析](https://crawl4ai.docslib.dev/advanced/pdf-parsing/)
1006
+
1007
+
1008
+
1009
+ -
1010
+
1011
+
1012
+
1013
+
1014
+
1015
+
1016
+
1017
+
1018
+
1019
+
1020
+
1021
+
1022
+ 提取
1023
+
1024
+
1025
+
1026
+
1027
+
1028
+
1029
+
1030
+
1031
+
1032
+
1033
+
1034
+
1035
+
1036
+
1037
+
1038
+
1039
+ [无[LLM策略](https://crawl4ai.docslib.dev/extraction/llm-strategies/)](https://crawl4ai.docslib.dev/extraction/no-llm-strategies/)
1040
+
1041
+
1042
+
1043
+
1044
+
1045
+
1046
+
1047
+
1048
+
1049
+
1050
+
1051
+
1052
+
1053
+ LLM策略
1054
+
1055
+
1056
+
1057
+
1058
+
1059
+
1060
+
1061
+
1062
+
1063
+
1064
+
1065
+
1066
+
1067
+ [聚类策略](https://crawl4ai.docslib.dev/extraction/clustring-strategies/)
1068
+
1069
+
1070
+
1071
+
1072
+
1073
+
1074
+
1075
+
1076
+
1077
+
1078
+
1079
+
1080
+
1081
+ [分块](https://crawl4ai.docslib.dev/extraction/chunking/)
1082
+
1083
+
1084
+
1085
+
1086
+
1087
+
1088
+
1089
+
1090
+
1091
+ -
1092
+
1093
+
1094
+
1095
+ [无LLM策略](https://crawl4ai.docslib.dev/extraction/no-llm-strategies/)
1096
+
1097
+
1098
+
1099
+ -
1100
+
1101
+
1102
+
1103
+ [LLM策略](https://crawl4ai.docslib.dev/extraction/llm-strategies/)
1104
+
1105
+
1106
+
1107
+ -
1108
+
1109
+
1110
+
1111
+ [聚类策略](https://crawl4ai.docslib.dev/extraction/clustring-strategies/)
1112
+
1113
+
1114
+
1115
+ -
1116
+
1117
+
1118
+
1119
+ [分块](https://crawl4ai.docslib.dev/extraction/chunking/)
1120
+
1121
+
1122
+
1123
+ -
1124
+
1125
+
1126
+
1127
+
1128
+
1129
+
1130
+
1131
+
1132
+
1133
+
1134
+
1135
+
1136
+ API参考
1137
+
1138
+
1139
+
1140
+
1141
+
1142
+
1143
+
1144
+
1145
+
1146
+
1147
+
1148
+
1149
+
1150
+
1151
+
1152
+
1153
+ [异步网页爬虫](https://crawl4ai.docslib.dev/api/async-webcrawler/)
1154
+
1155
+ [arun()](https://crawl4ai.docslib.dev/api/arun/)
1156
+
1157
+
1158
+
1159
+
1160
+
1161
+
1162
+
1163
+
1164
+
1165
+
1166
+
1167
+
1168
+
1169
+ [arun_many()](https://crawl4ai.docslib.dev/api/arun_many/)
1170
+
1171
+
1172
+
1173
+
1174
+
1175
+
1176
+
1177
+
1178
+
1179
+
1180
+
1181
+
1182
+
1183
+ [浏览器、爬虫和LLM配置](https://crawl4ai.docslib.dev/api/parameters/)
1184
+
1185
+
1186
+
1187
+
1188
+
1189
+
1190
+
1191
+
1192
+
1193
+
1194
+
1195
+
1196
+
1197
+ [爬取结果](https://crawl4ai.docslib.dev/api/crawl-result/)
1198
+
1199
+
1200
+
1201
+
1202
+
1203
+
1204
+
1205
+
1206
+
1207
+
1208
+
1209
+
1210
+
1211
+ [策略](https://crawl4ai.docslib.dev/api/strategies/)
1212
+
1213
+
1214
+
1215
+
1216
+
1217
+
1218
+
1219
+
1220
+
1221
+
1222
+
1223
+
1224
+
1225
+ [C4A脚本参考](https://crawl4ai.docslib.dev/api/c4a-script-reference/)
1226
+
1227
+
1228
+
1229
+
1230
+
1231
+
1232
+
1233
+
1234
+
1235
+ -
1236
+
1237
+
1238
+
1239
+ [异步网页爬虫](https://crawl4ai.docslib.dev/api/async-webcrawler/)
1240
+
1241
+
1242
+
1243
+ -
1244
+
1245
+
1246
+
1247
+ [arun()](https://crawl4ai.docslib.dev/api/arun/)
1248
+
1249
+
1250
+
1251
+ -
1252
+
1253
+
1254
+
1255
+ [arun_many()](https://crawl4ai.docslib.dev/api/arun_many/)
1256
+
1257
+
1258
+
1259
+ -
1260
+
1261
+
1262
+
1263
+ [浏览器、爬虫和LLM配置](https://crawl4ai.docslib.dev/api/parameters/)
1264
+
1265
+
1266
+
1267
+ -
1268
+
1269
+
1270
+
1271
+ [爬取结果](https://crawl4ai.docslib.dev/api/crawl-result/)
1272
+
1273
+
1274
+
1275
+ -
1276
+
1277
+
1278
+
1279
+ [策略](https://crawl4ai.docslib.dev/api/strategies/)
1280
+
1281
+
1282
+
1283
+ -
1284
+
1285
+
1286
+
1287
+ [C4A脚本参考](https://crawl4ai.docslib.dev/api/c4a-script-reference/)
1288
+
1289
+
1290
+
1291
+ [首页](https://crawl4ai.docslib.dev/)
1292
+ [询问AI](https://crawl4ai.docslib.dev/core/ask-ai/)
1293
+ [快速开始](https://crawl4ai.docslib.dev/core/quickstart/)
1294
+ [代码示例](https://crawl4ai.docslib.dev/core/examples/)
1295
+ -
1296
+
1297
+
1298
+
1299
+ [演示应用](https://crawl4ai.docslib.dev/apps/)
1300
+
1301
+
1302
+
1303
+ -
1304
+
1305
+
1306
+
1307
+ [C4A脚本编辑器](https://crawl4ai.docslib.dev/apps/c4a-script/index.html)
1308
+
1309
+
1310
+
1311
+ -
1312
+
1313
+
1314
+
1315
+ [LLM上下文构建器](https://crawl4ai.docslib.dev/apps/llmtxt/index.html)
1316
+
1317
+
1318
+
1319
+ [演示应用](https://crawl4ai.docslib.dev/apps/)
1320
+ [C4A脚本编辑器](https://crawl4ai.docslib.dev/apps/c4a-script/index.html)
1321
+ [LLM上下文构建器](https://crawl4ai.docslib.dev/apps/llmtxt/index.html)
1322
+ -
1323
+
1324
+
1325
+
1326
+ [安装](https://crawl4ai.docslib.dev/core/installation/)
1327
+
1328
+
1329
+
1330
+ -
1331
+
1332
+
1333
+
1334
+ [Docker部署](https://crawl4ai.docslib.dev/core/docker-deployment/)
1335
+
1336
+
1337
+
1338
+ [安装](https://crawl4ai.docslib.dev/core/installation/)
1339
+ [Docker部署](https://crawl4ai.docslib.dev/core/docker-deployment/)
1340
+ -
1341
+
1342
+
1343
+
1344
+ [博客首页](https://crawl4ai.docslib.dev/blog/)
1345
+
1346
+
1347
+
1348
+ -
1349
+
1350
+
1351
+
1352
+ [变更日志](https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md)
1353
+
1354
+
1355
+
1356
+ [博客首页](https://crawl4ai.docslib.dev/blog/)
1357
+ [变更日志](https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md)
1358
+ -
1359
+
1360
+
1361
+
1362
+ [命令行界面](https://crawl4ai.docslib.dev/core/cli/)
1363
+
1364
+
1365
+
1366
+ -
1367
+
1368
+
1369
+
1370
+ [简单爬取](https://crawl4ai.docslib.dev/core/simple-crawling/)
1371
+
1372
+
1373
+
1374
+ -
1375
+
1376
+
1377
+
1378
+ [深度爬取](https://crawl4ai.docslib.dev/core/deep-crawling/)
1379
+
1380
+
1381
+
1382
+ -
1383
+
1384
+
1385
+
1386
+ [自适应爬取](https://crawl4ai.docslib.dev/core/adaptive-crawling/)
1387
+
1388
+
1389
+
1390
+ -
1391
+
1392
+
1393
+
1394
+ [URL播种](https://crawl4ai.docslib.dev/core/url-seeding/)
1395
+
1396
+
1397
+
1398
+ -
1399
+
1400
+
1401
+
1402
+ [C4A脚本](https://crawl4ai.docslib.dev/core/c4a-script/)
1403
+
1404
+
1405
+
1406
+ -
1407
+
1408
+
1409
+
1410
+ [爬虫结果](https://crawl4ai.docslib.dev/core/crawler-result/)
1411
+
1412
+
1413
+
1414
+ -
1415
+
1416
+
1417
+
1418
+ [浏览器、爬虫和LLM配置](https://crawl4ai.docslib.dev/core/browser-crawler-config/)
1419
+
1420
+
1421
+
1422
+ -
1423
+
1424
+
1425
+
1426
+ [Markdown生成](https://crawl4ai.docslib.dev/core/markdown-generation/)
1427
+
1428
+
1429
+
1430
+ -
1431
+
1432
+
1433
+
1434
+ [适合的Markdown](https://crawl4ai.docslib.dev/core/fit-markdown/)
1435
+
1436
+
1437
+
1438
+ -
1439
+
1440
+
1441
+
1442
+ [页面交互](https://crawl4ai.docslib.dev/core/page-interaction/)
1443
+
1444
+
1445
+
1446
+ -
1447
+
1448
+
1449
+
1450
+ [内容选择](https://crawl4ai.docslib.dev/core/content-selection/)
1451
+
1452
+
1453
+
1454
+ -
1455
+
1456
+
1457
+
1458
+ [缓存模式](https://crawl4ai.docslib.dev/core/cache-modes/)
1459
+
1460
+
1461
+
1462
+ -
1463
+
1464
+
1465
+
1466
+ [本地文件和原始HTML](https://crawl4ai.docslib.dev/core/local-files/)
1467
+
1468
+
1469
+
1470
+ -
1471
+
1472
+
1473
+
1474
+ [链接与媒体](https://crawl4ai.docslib.dev/core/link-media/)
1475
+
1476
+
1477
+
1478
+ [命令行界面](https://crawl4ai.docslib.dev/core/cli/)
1479
+ [简单爬取](https://crawl4ai.docslib.dev/core/simple-crawling/)
1480
+ [深度爬取](https://crawl4ai.docslib.dev/core/deep-crawling/)
1481
+ [自适应爬取](https://crawl4ai.docslib.dev/core/adaptive-crawling/)
1482
+ [URL播种](https://crawl4ai.docslib.dev/core/url-seeding/)
1483
+ [C4A脚本](https://crawl4ai.docslib.dev/core/c4a-script/)
1484
+ [爬虫结果](https://crawl4ai.docslib.dev/core/crawler-result/)
1485
+ [浏览器、爬虫和LLM配置](https://crawl4ai.docslib.dev/core/browser-crawler-config/)
1486
+ [Markdown生成](https://crawl4ai.docslib.dev/core/markdown-generation/)
1487
+ [适合的Markdown](https://crawl4ai.docslib.dev/core/fit-markdown/)
1488
+ [页面交互](https://crawl4ai.docslib.dev/core/page-interaction/)
1489
+
1490
+ [内容选择](https://crawl4ai.docslib.dev/core/content-selection/)
1491
+ [缓存模式](https://crawl4ai.docslib.dev/core/cache-modes/)
1492
+ [本地文件和原始HTML](https://crawl4ai.docslib.dev/core/local-files/)
1493
+ [链接与媒体](https://crawl4ai.docslib.dev/core/link-media/)
1494
+ -
1495
+
1496
+
1497
+
1498
+ [概述](https://crawl4ai.docslib.dev/advanced/advanced-features.md)
1499
+
1500
+
1501
+
1502
+ -
1503
+
1504
+
1505
+
1506
+ [自适应策略](https://crawl4ai.docslib.dev/advanced/adaptive-strategies/)
1507
+
1508
+
1509
+
1510
+ -
1511
+
1512
+
1513
+
1514
+ [虚拟滚动](https://crawl4ai.docslib.dev/advanced/virtual-scroll/)
1515
+
1516
+
1517
+
1518
+ -
1519
+
1520
+
1521
+
1522
+ [文件下载](https://crawl4ai.docslib.dev/advanced/file-downloading/)
1523
+
1524
+
1525
+
1526
+ -
1527
+
1528
+
1529
+ 懒加载
1530
+
1531
+
1532
+ -
1533
+
1534
+
1535
+
1536
+ [钩子与认证](https://crawl4ai.docslib.dev/advanced/hooks-auth/)
1537
+
1538
+
1539
+
1540
+ -
1541
+
1542
+
1543
+
1544
+ [代理与安全](https://crawl4ai.docslib.dev/advanced/proxy-security/)
1545
+
1546
+
1547
+
1548
+ -
1549
+
1550
+
1551
+
1552
+ [无痕浏览器](https://crawl4ai.docslib.dev/advanced/undetected-browser/)
1553
+
1554
+
1555
+
1556
+ -
1557
+
1558
+
1559
+
1560
+ [会话管理](https://crawl4ai.docslib.dev/advanced/session-management/)
1561
+
1562
+
1563
+
1564
+ -
1565
+
1566
+
1567
+
1568
+ [多URL爬取](https://crawl4ai.docslib.dev/advanced/multi-url-crawling/)
1569
+
1570
+
1571
+
1572
+ -
1573
+
1574
+
1575
+
1576
+ [爬虫调度器](https://crawl4ai.docslib.dev/advanced/crawl-dispatcher/)
1577
+
1578
+
1579
+
1580
+ -
1581
+
1582
+
1583
+
1584
+ [基于身份的爬取](https://crawl4ai.docslib.dev/advanced/identity-based-crawling/)
1585
+
1586
+
1587
+
1588
+ -
1589
+
1590
+
1591
+
1592
+ [SSL证书](https://crawl4ai.docslib.dev/advanced/ssl-certificate/)
1593
+
1594
+
1595
+
1596
+ -
1597
+
1598
+
1599
+
1600
+ [网络与控制台捕获](https://crawl4ai.docslib.dev/advanced/network-console-capture/)
1601
+
1602
+
1603
+
1604
+ -
1605
+
1606
+
1607
+
1608
+ [PDF解析](https://crawl4ai.docslib.dev/advanced/pdf-parsing/)
1609
+
1610
+
1611
+
1612
+ [概述](https://crawl4ai.docslib.dev/advanced/advanced-features.md)
1613
+ [自适应策略](https://crawl4ai.docslib.dev/advanced/adaptive-strategies/)
1614
+ [虚拟滚动](https://crawl4ai.docslib.dev/advanced/virtual-scroll/)
1615
+ [文件下载](https://crawl4ai.docslib.dev/advanced/file-downloading/)
1616
+ [钩子与认证](https://crawl4ai.docslib.dev/advanced/hooks-auth/)
1617
+ [代理与安全](https://crawl4ai.docslib.dev/advanced/proxy-security/)
1618
+ [无痕浏览器](https://crawl4ai.docslib.dev/advanced/undetected-browser/)
1619
+ [会话管理](https://crawl4ai.docslib.dev/advanced/session-management/)
1620
+ [多URL爬取](https://crawl4ai.docslib.dev/advanced/multi-url-crawling/)
1621
+ [爬虫调度器](https://crawl4ai.docslib.dev/advanced/crawl-dispatcher/)
1622
+ [基于身份的爬取](https://crawl4ai.docslib.dev/advanced/identity-based-crawling/)
1623
+ [SSL证书](https://crawl4ai.docslib.dev/advanced/ssl-certificate/)
1624
+ [网络与控制台捕获](https://crawl4ai.docslib.dev/advanced/network-console-capture/)
1625
+ [PDF解析](https://crawl4ai.docslib.dev/advanced/pdf-parsing/)
1626
+ -
1627
+
1628
+
1629
+
1630
+ [无LLM策略](https://crawl4ai.docslib.dev/extraction/no-llm-strategies/)
1631
+
1632
+
1633
+
1634
+ -
1635
+
1636
+
1637
+
1638
+ [LLM策略](https://crawl4ai.docslib.dev/extraction/llm-strategies/)
1639
+
1640
+
1641
+
1642
+ -
1643
+
1644
+
1645
+
1646
+ [聚类策略](https://crawl4ai.docslib.dev/extraction/clustring-strategies/)
1647
+
1648
+
1649
+
1650
+ -
1651
+
1652
+
1653
+
1654
+ [分块](https://crawl4ai.docslib.dev/extraction/chunking/)
1655
+
1656
+
1657
+
1658
+ [无LLM策略](https://crawl4ai.docslib.dev/extraction/no-llm-strategies/)
1659
+ [LLM策略](https://crawl4ai.docslib.dev/extraction/llm-strategies/)
1660
+ [聚类策略](https://crawl4ai.docslib.dev/extraction/clustring-strategies/)
1661
+ [分块](https://crawl4ai.docslib.dev/extraction/chunking/)
1662
+ -
1663
+
1664
+
1665
+
1666
+ [异步网页爬虫](https://crawl4ai.docslib.dev/api/async-webcrawler/)
1667
+
1668
+
1669
+
1670
+ -
1671
+
1672
+
1673
+
1674
+ [arun()](https://crawl4ai.docslib.dev/api/arun/)
1675
+
1676
+
1677
+
1678
+ -
1679
+
1680
+
1681
+
1682
+ [arun_many()](https://crawl4ai.docslib.dev/api/arun_many/)
1683
+
1684
+
1685
+
1686
+ -
1687
+
1688
+
1689
+
1690
+ [浏览器、爬虫和LLM配置](https://crawl4ai.docslib.dev/api/parameters/)
1691
+
1692
+
1693
+
1694
+ -
1695
+
1696
+
1697
+
1698
+ [爬取结果](https://crawl4ai.docslib.dev/api/crawl-result/)
1699
+
1700
+
1701
+
1702
+ -
1703
+
1704
+
1705
+
1706
+ [策略](https://crawl4ai.docslib.dev/api/strategies/)
1707
+
1708
+
1709
+
1710
+ -
1711
+
1712
+
1713
+
1714
+ [C4A脚本参考](https://crawl4ai.docslib.dev/api/c4a-script-reference/)
1715
+
1716
+
1717
+
1718
+ [异步网页爬虫](https://crawl4ai.docslib.dev/api/async-webcrawler/)
1719
+ [arun()](https://crawl4ai.docslib.dev/api/arun/)
1720
+ [arun_many()](https://crawl4ai.docslib.dev/api/arun_many/)
1721
+ [浏览器、爬虫和LLM配置](https://crawl4ai.docslib.dev/api/parameters/)
1722
+ [爬取结果](https://crawl4ai.docslib.dev/api/crawl-result/)
1723
+ [策略](https://crawl4ai.docslib.dev/api/strategies/)
1724
+ [C4A脚本参考](https://crawl4ai.docslib.dev/api/c4a-script-reference/)
1725
+ - [处理懒加载图片](https://crawl4ai.docslib.dev/advanced/lazy-loading/#_1)
1726
+
1727
+ - [示例:确保懒加载图片出现](https://crawl4ai.docslib.dev/advanced/lazy-loading/#_2)
1728
+ - [与其他链接和媒体过滤器结合使用](https://crawl4ai.docslib.dev/advanced/lazy-loading/#_3)
1729
+ - [技巧与故障排除](https://crawl4ai.docslib.dev/advanced/lazy-loading/#_4)
1730
+ [处理懒加载图片](https://crawl4ai.docslib.dev/advanced/lazy-loading/#_1)
1731
+ [示例:确保懒加载图片出现](https://crawl4ai.docslib.dev/advanced/lazy-loading/#_2)
1732
+ [与其他链接和媒体过滤器结合使用](https://crawl4ai.docslib.dev/advanced/lazy-loading/#_3)
1733
+ [技巧与故障排除](https://crawl4ai.docslib.dev/advanced/lazy-loading/#_4)
1734
+
1735
+ ## 处理懒加载图片
1736
+
1737
+ 如今许多网站在滚动时会懒加载图片。若需确保它们出现在最终爬取结果(及 result.media 中),可考虑:
1738
+
1739
+ ```
1740
+ result.media
1741
+ ```
1742
+
1743
+ 1. wait_for_images=True – 等待图片完全加载。 2. scan_full_page – 强制爬虫滚动整个页面以触发懒加载。 3. scroll_delay – 在滚动步骤间添加短暂延迟。
1744
+
1745
+ ```
1746
+ wait_for_images=True
1747
+ ```
1748
+
1749
+
1750
+ ```
1751
+ scan_full_page
1752
+ ```
1753
+
1754
+
1755
+ ```
1756
+ scroll_delay
1757
+ ```
1758
+
1759
+ 注意:若网站需要多次触发"加载更多"或复杂交互,请参阅[页面交互文档](https://crawl4ai.docslib.dev/core/page-interaction/)。对于虚拟滚动(Twitter/Instagram风格)的网站,请查看[虚拟滚动文档](https://crawl4ai.docslib.dev/advanced/virtual-scroll/)。
1760
+
1761
+ ### 示例:确保懒加载图片出现
1762
+
1763
+ ```
1764
+ https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-33https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-32https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-31https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-30https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-29https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-28https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-27https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-26https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-25https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-24https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-23https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-22https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-21https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-20https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-19https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-18https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-17https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-16https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-15https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-14https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-13https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-12https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-11https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-10https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-9https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-8https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-7https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-6https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-5https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-4https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-3https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-2https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-1import asyncio from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, BrowserConfig from crawl4ai.async_configs import CacheMode async def main(): config = CrawlerRunConfig( # 强制爬虫等待图片完全加载 wait_for_images=True, # 选项1:若需自动滚动页面加载图片 scan_full_page=True, # 指示爬虫尝试滚动整个页面 scroll_delay=0.5, # 滚动步骤间的延迟(秒) # 选项2:若网站使用"加载更多"或JS触发器加载图片, # 可在此指定js_code或wait_for逻辑 cache_mode=CacheMode.BYPASS, verbose=True ) async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler: result = await crawler.arun("https://www.example.com/gallery", config=config) if result.success: images = result.media.get("images", []) print("Images found:", len(images)) for i, img in enumerate(images[:5]): print(f"[Image {i}] URL: {img['src']}, Score: {img.get('score','N/A')}") else: print("Error:", result.error_message) if __name__ == "__main__": asyncio.run(main())
1765
+ ```
1766
+
1767
+ ### 示例:确保懒加载图片出现
1768
+
1769
+ ```
1770
+ https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-33https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-32https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-31https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-30https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-29https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-28https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-27https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-26https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-25https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-24https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-23https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-22https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-21https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-20https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-19https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-18https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-17https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-16https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-15https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-14https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-13https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-12https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-11https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-10https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-9https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-8https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-7https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-6https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-5https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-4https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-3https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-2https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-0-1import asyncio from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, BrowserConfig from crawl4ai.async_configs import CacheMode async def main(): config = CrawlerRunConfig( # 强制爬虫等待图片完全加载 wait_for_images=True, # 选项1:若需自动滚动页面加载图片 scan_full_page=True, # 指示爬虫尝试滚动整个页面 scroll_delay=0.5, # 滚动步骤间的延迟(秒) # 选项2:若网站使用"加载更多"或JS触发器加载图片, # 可在此指定js_code或wait_for逻辑 cache_mode=CacheMode.BYPASS, verbose=True ) async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler: result = await crawler.arun("https://www.example.com/gallery", config=config) if result.success: images = result.media.get("images", []) print("Images found:", len(images)) for i, img in enumerate(images[:5]): print(f"[Image {i}] URL: {img['src']}, Score: {img.get('score','N/A')}") else: print("Error:", result.error_message) if __name__ == "__main__": asyncio.run(main())
1771
+ ```
1772
+
1773
+ 说明:
1774
+ - wait_for_images=True
1775
+ 爬虫会尝试确保图片完成加载后再生成最终HTML。
1776
+ - scan_full_page=True
1777
+ 指示爬虫尝试从顶部滚动到底部。每次滚动有助于触发懒加载。
1778
+ - scroll_delay=0.5
1779
+ 每次滚动步骤间暂停0.5秒。让网站在继续前完成图片加载。
1780
+
1781
+ ```
1782
+ wait_for_images=True
1783
+ ```
1784
+
1785
+
1786
+ ```
1787
+ scan_full_page=True
1788
+ ```
1789
+
1790
+
1791
+ ```
1792
+ scroll_delay=0.5
1793
+ ```
1794
+
1795
+ 适用场景:
1796
+ - 懒加载:若图片仅在用户滚动到视窗内才显示,scan_full_page + scroll_delay 可帮助爬虫捕获它们。
1797
+ - 重型页面:若页面极长,需注意完整扫描可能较慢。可调整 scroll_delay 或最大滚动步数。
1798
+
1799
+ ```
1800
+ scan_full_page
1801
+ ```
1802
+
1803
+
1804
+ ```
1805
+ scroll_delay
1806
+ ```
1807
+
1808
+
1809
+ ```
1810
+ scroll_delay
1811
+ ```
1812
+
1813
+ ## 与其他链接和媒体过滤器结合使用
1814
+ 仍可将懒加载逻辑与常规的exclude_external_images、exclude_domains或链接过滤结合:
1815
+
1816
+ ```
1817
+ https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-11https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-10https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-9https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-8https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-7https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-6https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-5https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-4https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-3https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-2https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-1config = CrawlerRunConfig( wait_for_images=True, scan_full_page=True, scroll_delay=0.5, # 若只需本地图片则过滤外部图片 exclude_external_images=True, # 排除特定域名的链接 exclude_domains=["spammycdn.com"], )
1818
+ ```
1819
+
1820
+
1821
+ ```
1822
+ https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-11https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-10https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-9https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-8https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-7https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-6https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-5https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-4https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-3https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-2https://crawl4ai.docslib.dev/advanced/lazy-loading/#__codelineno-1-1config = CrawlerRunConfig( wait_for_images=True, scan_full_page=True, scroll_delay=0.5, # 若只需本地图片则过滤外部图片 exclude_external_images=True, # 排除特定域名的链接 exclude_domains=["spammycdn.com"], )
1823
+ ```
1824
+
1825
+ 此方法确保捕获主域名的所有图片同时忽略外部图片,且爬虫会实际滚动整个页面以触发懒加载。
1826
+
1827
+ ## 技巧与故障排除
1828
+ 1. 长页面 - 在极长或无限滚动页面上设置 scan_full_page=True 可能消耗较多资源。 - 可考虑使用[钩子](https://crawl4ai.docslib.dev/core/page-interaction/)或专用逻辑来加载特定区域或重复触发"加载更多"。
1829
+
1830
+ ```
1831
+ scan_full_page=True
1832
+ ```
1833
+
1834
+ 2. 混合图片行为 - 部分网站滚动时批量加载图片。若遗漏图片,可增加 scroll_delay 或用JS代码/钩子循环调用部分滚动。
1835
+
1836
+ ```
1837
+ scroll_delay
1838
+ ```
1839
+
1840
+ 3. 与动态等待结合 - 若网站占位图需特定事件后才转为真实图片,可使用 wait_for="css:img.loaded" 或自定义JS wait_for。
1841
+
1842
+ ```
1843
+ wait_for="css:img.loaded"
1844
+ ```
1845
+
1846
+
1847
+ ```
1848
+ wait_for
1849
+ ```
1850
+
1851
+ 4. 缓存 - 若启用 cache_mode,重复爬取可能跳过部分网络请求。若怀疑缓存遗漏新图片,可设 cache_mode=CacheMode.BYPASS 强制重新获取。
1852
+
1853
+ ```
1854
+ cache_mode
1855
+ ```
1856
+
1857
+
1858
+ ```
1859
+ cache_mode=CacheMode.BYPASS
1860
+ ```
1861
+
1862
+ 通过懒加载支持、wait_for_images和scan_full_page设置,可捕获预期的完整图片库或信息流——即使网站仅在用户滚动时加载它们。结合标准媒体过滤和域名排除策略,形成完整的链接与媒体处理方案。
1863
+
1864
+ ##### Search
1865
+ Type to start searching