pod_ident 1.0.8 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,861 @@
1
+ ---
2
+ - app: AAABot - unknown bot
3
+ match:
4
+ regex: AAABot
5
+ platform: bot
6
+ test:
7
+ userAgents:
8
+ - userAgent: AAABot
9
+ - app: AhrefsBot
10
+ match:
11
+ regex: AhrefsBot/
12
+ platform: bot
13
+ test:
14
+ userAgents:
15
+ - userAgent: Mozilla/5.0 (compatible; AhrefsBot/7.0; http://ahrefs.com/robot/)
16
+ - userAgent: Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)
17
+ - app: AirableBot
18
+ match:
19
+ regex: AirableBot-Podcast/
20
+ platform: bot
21
+ test:
22
+ userAgents:
23
+ - userAgent: AirableBot-Podcast/1.0 (+https//www.airablenow.com)
24
+ - userAgent: AirableBot-Podcast/1.0 ( https//www.airablenow.com)
25
+ - app: Alexa Flash Briefing cache
26
+ match:
27
+ regex: "^AmazonNewsContentService"
28
+ platform: bot
29
+ - app: AlignaBot
30
+ match:
31
+ regex: "^Alignabot"
32
+ platform: bot
33
+ test:
34
+ userAgents:
35
+ - userAgent: Alignabot 1.0 (https://www.alignable.com/)
36
+ - app: Amazon Music Podcasts Bot
37
+ match:
38
+ regex: "^Amazon Music Podcast"
39
+ platform: bot
40
+ - app: Anchor Importer
41
+ match:
42
+ regex: AnchorImport
43
+ platform: bot
44
+ test:
45
+ userAgents:
46
+ - userAgent: AnchorImport/1.0
47
+ - app: Apple Podcasts (Watch)
48
+ match:
49
+ regex: "^atc/|\\(null\\) watchOS/"
50
+ platform: bot
51
+ test:
52
+ userAgents:
53
+ - userAgent: atc/1.0
54
+ - userAgent: atc/1.0 watchOS/6.2 model/Watch3,3 hwp/t8004 build/17T529 (6; dt:155)
55
+ - userAgent: atc/1.0 watchOS/6.2.8 model/Watch2,3 hwp/t8002 build/17U63 (6; dt:133)
56
+ - userAgent: atc/1.0 watchOS/6.2.8 model/Watch3,3 hwp/t8004 build/17U63 (6; dt:155)
57
+ - userAgent: atc/1.0 watchOS/6.2.8 model/Watch4,2 hwp/t8006 build/17U63 (6; dt:191)
58
+ - userAgent: atc/1.0 watchOS/7.0.2 model/Watch5,10 hwp/t8006 build/18R402 (6;
59
+ dt:233)
60
+ - userAgent: atc/1.0 watchOS/7.0.2 model/Watch5,11 hwp/t8006 build/18R402 (6;
61
+ dt:234)
62
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch4,2 hwp/t8006 build/18R590 (6; dt:191)
63
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch4,3 hwp/t8006 build/18R590 (6; dt:192)
64
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch4,4 hwp/t8006 build/18R590 (6; dt:193)
65
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch5,1 hwp/t8006 build/18R590 (6; dt:201)
66
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch5,3 hwp/t8006 build/18R590 (6; dt:202)
67
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch5,4 hwp/t8006 build/18R590 (6; dt:202)
68
+ - userAgent: "(null)/(null) watchOS/5.0.1 model/Watch3,3 hwp/t8004 build/16R381
69
+ (6; dt:155)"
70
+ - app: Apple Podcasts automated checks
71
+ match:
72
+ regex: "(iTMS|itunesstored)"
73
+ platform: bot
74
+ - app: Applebot
75
+ match:
76
+ regex: "^Applebot/"
77
+ platform: bot
78
+ - app: Archive.org
79
+ match:
80
+ regex: archive\.org_bot
81
+ platform: bot
82
+ test:
83
+ userAgents:
84
+ - userAgent: Mozilla/5.0 (compatible; archive.org_bot http://archive.org/details/archive.org_bot)
85
+ - app: atheerfm
86
+ match:
87
+ regex: "^atheerfm/"
88
+ platform: bot
89
+ test:
90
+ userAgents:
91
+ - userAgent: atheerfm/1 CFNetwork/758.3.15 Darwin/15.3.0
92
+ - app: Audiomack
93
+ match:
94
+ regex: "^Audiomack Podcast Processor/"
95
+ platform: bot
96
+ test:
97
+ userAgents:
98
+ - userAgent: Audiomack Podcast Processor/1.0 (https://audiomack.com/)
99
+ - app: AudioWave feed parser
100
+ match:
101
+ regex: "^AudioWaveBot/1\\.0"
102
+ platform: bot
103
+ test:
104
+ userAgents:
105
+ - userAgent: AudioWaveBot/1.0
106
+ - app: AwarioSmartBot
107
+ match:
108
+ regex: "^AwarioSmartBot/"
109
+ platform: bot
110
+ test:
111
+ userAgents:
112
+ - userAgent: AwarioSmartBot/1.0 (+https://awario.com/bots.html; bots@awario.com)
113
+ - app: Babbar
114
+ match:
115
+ regex: Barkrowler/
116
+ platform: bot
117
+ - app: Baidu
118
+ match:
119
+ regex: "\\(ce\\.baidu\\.com"
120
+ platform: bot
121
+ test:
122
+ userAgents:
123
+ - userAgent: Baidu-YunGuanCe-SLABot(ce.baidu.com)
124
+ - app: bbot
125
+ match:
126
+ regex: "^bbot/"
127
+ platform: bot
128
+ test:
129
+ userAgents:
130
+ - userAgent: bbot/0.1
131
+ - app: British Library
132
+ match:
133
+ regex: "^bl\\.uk_ldfc_bot"
134
+ platform: bot
135
+ test:
136
+ userAgents:
137
+ - userAgent: bl.uk_ldfc_bot/3.4.0-20200518 ( http://www.bl.uk/aboutus/legaldeposit/websites/websites/faqswebmaster/index.html)
138
+ - app: Blubrry Migration Service
139
+ match:
140
+ regex: "^Blubrry Migration Service"
141
+ platform: bot
142
+ test:
143
+ userAgents:
144
+ - userAgent: Blubrry Migration Service
145
+ - app: Buzzsprout Importer
146
+ match:
147
+ regex: "^Buzzsprout Importer"
148
+ platform: bot
149
+ test:
150
+ userAgents:
151
+ - userAgent: Buzzsprout Importer
152
+ - app: CastFeedValidator
153
+ match:
154
+ regex: "^CastFeedValidator/"
155
+ platform: bot
156
+ test:
157
+ userAgents:
158
+ - userAgent: CastFeedValidator/3.0.5 (https://castfeedvalidator.com)
159
+ - app: Castopod
160
+ match:
161
+ regex: Castopod/1\.0
162
+ platform: bot
163
+ test:
164
+ userAgents:
165
+ - userAgent: Castopod/1.0
166
+ - app: Chartable
167
+ match:
168
+ regex: "^Trackable/"
169
+ platform: bot
170
+ - app: Clark-Crawler, unknown
171
+ match:
172
+ regex: "^clark-crawler2"
173
+ platform: bot
174
+ - app: Critical Mention
175
+ match:
176
+ regex: "^Podcast-CriticalMention/"
177
+ platform: bot
178
+ test:
179
+ userAgents:
180
+ - userAgent: Podcast-CriticalMention/1.0
181
+ - app: curl
182
+ match:
183
+ regex: "^curl|^libcurl/|^PycURL/| curl/"
184
+ platform: bot
185
+ test:
186
+ userAgents:
187
+ - userAgent: libcurl/7.65.1
188
+ - userAgent: PycURL/7.43.0 libcurl/7.47.0 GnuTLS/3.4.10 zlib/1.2.8 libidn/1.32
189
+ librtmp/2.3
190
+ - userAgent: GuzzleHttp/6.1.0 curl/7.50.1 PHP/7.0.13-0ubuntu0.16.10.1
191
+ - app: DataforSEO
192
+ match:
193
+ regex: DataForSeoBot/
194
+ platform: bot
195
+ test:
196
+ userAgents:
197
+ - userAgent: Mozilla/5.0 (compatible; DataForSeoBot/1.0; +https://dataforseo.com/dataforseo-bot)
198
+ - app: Datagnion Bot
199
+ match:
200
+ regex: "^datagnionbot"
201
+ platform: bot
202
+ - app: dataprovider.com
203
+ match:
204
+ regex: Dataprovider\.com
205
+ platform: bot
206
+ test:
207
+ userAgents:
208
+ - userAgent: Mozilla/5.0 (compatible; Dataprovider.com)
209
+ - app: Daum
210
+ match:
211
+ regex: http://cs\.daum\.net
212
+ platform: bot
213
+ test:
214
+ userAgents:
215
+ - userAgent: Mozilla/5.0 (compatible; Daum/4.1; +http://cs.daum.net/faq/15/4118.html?faqId=28966)
216
+ - app: Deezer Podcasters
217
+ match:
218
+ regex: "^Deezer Podcasters/1\\.0"
219
+ platform: bot
220
+ - app: Digg
221
+ match:
222
+ regex: "^Digg "
223
+ platform: bot
224
+ test:
225
+ userAgents:
226
+ - userAgent: Digg Mozilla/5.0 (Digg/1.0; support@digg.com)
227
+ - app: dorada
228
+ match:
229
+ regex: support@dorada\.co\.uk
230
+ platform: bot
231
+ - app: DotBot
232
+ match:
233
+ regex: DotBot
234
+ platform: bot
235
+ test:
236
+ userAgents:
237
+ - userAgent: Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot,
238
+ help@moz.com)
239
+ - userAgent: Mozilla/5.0 (compatible; DotBot/1.2; https://opensiteexplorer.org/dotbot;
240
+ help@moz.com)
241
+ - app: Downcast Bot
242
+ match:
243
+ regex: downcast feed consumer/
244
+ platform: bot
245
+ test:
246
+ userAgents:
247
+ - userAgent: downcast feed consumer/0.0.175; (mode=dev; id=u2NgjBSPM6; downcast.fm)
248
+ - app: DuckDuckBot
249
+ match:
250
+ regex: DuckDuckBot
251
+ platform: bot
252
+ test:
253
+ userAgents:
254
+ - userAgent: Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)
255
+ - app: F-Secure Riddler
256
+ match:
257
+ regex: "^Riddler "
258
+ platform: bot
259
+ test:
260
+ userAgents:
261
+ - userAgent: Riddler (http://riddler.io/about)
262
+ - app: Facebook Bot
263
+ match:
264
+ regex: FacebookBot|f?acebookexternalhit/?|^podcastbot$|Facebot|facebookexternalua|^facebookplatform/
265
+ platform: bot
266
+ test:
267
+ userAgents:
268
+ - userAgent: facebookexternalhit/1.1 ( http://www.facebook.com/externalhit_uatext.php)
269
+ - userAgent: podcastbot
270
+ - userAgent: facebookexternalhit
271
+ - userAgent: Facebot
272
+ - userAgent: facebookexternalua
273
+ - userAgent: facebookplatform/1.0 (+http://developers.facebook.com)
274
+ - userAgent: acebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)
275
+ - app: Feedly
276
+ match:
277
+ regex: "^Feedly/"
278
+ platform: bot
279
+ test:
280
+ userAgents:
281
+ - userAgent: Feedly/1.0 (+http://www.feedly.com/fetcher.html; like FeedFetcher-Google)
282
+ - app: FlexGet
283
+ match:
284
+ regex: "^FlexGet/"
285
+ platform: bot
286
+ test:
287
+ userAgents:
288
+ - userAgent: FlexGet/3.3.4 (www.flexget.com)
289
+ - app: Flipboard Proxy
290
+ match:
291
+ regex: FlipboardProxy/
292
+ platform: bot
293
+ test:
294
+ userAgents:
295
+ - userAgent: Mozilla/5.0 (compatible; FlipboardProxy/1.1; +http://flipboard.com/browserproxy)
296
+ - app: Fyyd
297
+ match:
298
+ regex: "^fyyd-poll"
299
+ platform: bot
300
+ - app: Go Storage Gateway V1
301
+ match:
302
+ regex: "^storagegw-v1-go$"
303
+ platform: bot
304
+ test:
305
+ userAgents:
306
+ - userAgent: storagegw-v1-go
307
+ - app: Goodpods Bot
308
+ match:
309
+ regex: Goodpods/\d+\.\d+
310
+ platform: bot
311
+ test:
312
+ userAgents:
313
+ - userAgent: Goodpods/2.2
314
+ - app: Google AdsBot
315
+ match:
316
+ regex: AdsBot-Google
317
+ platform: bot
318
+ - app: Google Podcasts Manager
319
+ match:
320
+ regex: Google-Podcast
321
+ platform: bot
322
+ - app: Googlebot
323
+ match:
324
+ regex: Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google
325
+ platform: bot
326
+ test:
327
+ userAgents:
328
+ - userAgent: Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36
329
+ (KHTML, like Gecko) Chrome/86.0.4240.96 Mobile Safari/537.36 (compatible;
330
+ Googlebot/2.1; http://www.google.com/bot.html)
331
+ - userAgent: Googlebot-Image/1.0
332
+ - userAgent: Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; 1 subscribers;
333
+ feed-id=4653447469100830145)
334
+ - app: Gumball
335
+ match:
336
+ regex: "^Gumball"
337
+ platform: bot
338
+ test:
339
+ userAgents:
340
+ - userAgent: Gumball.fm Analytics Prefix Checker
341
+ - app: Headliner
342
+ match:
343
+ regex: Headliner/
344
+ platform: bot
345
+ test:
346
+ userAgents:
347
+ - userAgent: Headliner/1.0.0 +https://headliner.app
348
+ - app: HubSpot Crawler
349
+ match:
350
+ regex: HubSpot Crawler
351
+ platform: bot
352
+ test:
353
+ userAgents:
354
+ - userAgent: Mozilla/5.0 (compatible; HubSpot Crawler; +https://www.hubspot.com)
355
+ - app: Internet Archive
356
+ match:
357
+ regex: Archive-It;|web\.archive\.org
358
+ platform: bot
359
+ test:
360
+ userAgents:
361
+ - userAgent: Mozilla/5.0 (compatible; special_archiver; Archive-It; http://archive-it.org/files/site-owners-special.html)
362
+ - userAgent: ia_archiver-web.archive.org
363
+ - app: Jaunt
364
+ match:
365
+ regex: "^Jaunt/"
366
+ platform: bot
367
+ test:
368
+ userAgents:
369
+ - userAgent: Jaunt/1.5
370
+ - app: l'Institut national de l'audiovisuel
371
+ match:
372
+ regex: INA dlweb
373
+ platform: bot
374
+ - app: Libsyn
375
+ match:
376
+ regex: "^Libsyn4"
377
+ platform: bot
378
+ test:
379
+ userAgents:
380
+ - userAgent: Libsyn4-Download
381
+ - app: libwww-perl
382
+ match:
383
+ regex: "^libwww-perl| libwww-perl"
384
+ platform: bot
385
+ test:
386
+ userAgents:
387
+ - userAgent: LWP::Simple/6.34 libwww-perl/6.34
388
+ - app: Livelap Crawler
389
+ match:
390
+ regex: LivelapBot
391
+ platform: bot
392
+ - app: LTX71
393
+ match:
394
+ regex: "^ltx71 "
395
+ platform: bot
396
+ test:
397
+ userAgents:
398
+ - userAgent: ltx71 - (http://ltx71.com/)
399
+ - app: MauiBot
400
+ match:
401
+ regex: "^MauiBot"
402
+ platform: bot
403
+ test:
404
+ userAgents:
405
+ - userAgent: MauiBot (crawler.feedback dc@gmail.com)
406
+ - app: Mastodon Bot
407
+ match:
408
+ regex: rb/.*Mastodon/
409
+ platform: bot
410
+ test:
411
+ userAgents:
412
+ - userAgent: http.rb/5.0.4 (Mastodon/3.5.3; +https://mastodon.xyz/) Bot
413
+ - app: Microsoft Bingbot
414
+ match:
415
+ regex: "(BingPreview/|adidxbot/|[bB]ingbot/)"
416
+ platform: bot
417
+ test:
418
+ userAgents:
419
+ - userAgent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534 (KHTML, like
420
+ Gecko) BingPreview/1.0b
421
+ - userAgent: Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)
422
+ - app: Microsoft Office
423
+ match:
424
+ regex: ms-office; MSOffice
425
+ platform: bot
426
+ test:
427
+ userAgents:
428
+ - userAgent: Mozilla/4.0 (compatible; ms-office; MSOffice 16)
429
+ - app: Mixcloud Podcast Importer
430
+ match:
431
+ regex: "^MixcloudPodcastImporter/"
432
+ platform: bot
433
+ test:
434
+ userAgents:
435
+ - userAgent: MixcloudPodcastImporter/2.0
436
+ - app: MJ12bot
437
+ match:
438
+ regex: ".*MJ12bot"
439
+ platform: bot
440
+ test:
441
+ userAgents:
442
+ - userAgent: Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)
443
+ - app: Mozilla Bot
444
+ match:
445
+ regex: "^'?Mozilla(/5\\.0(\\.\\.\\.)?)?$|^\\(Mozilla/5\\.0\\)$"
446
+ platform: bot
447
+ test:
448
+ userAgents:
449
+ - userAgent: Mozilla/5.0
450
+ - userAgent: Mozilla
451
+ - userAgent: Mozilla/5.0...
452
+ - userAgent: "'Mozilla/5.0"
453
+ - userAgent: "(Mozilla/5.0)"
454
+ - app: MSN Bot
455
+ match:
456
+ regex: "^msnbot/"
457
+ platform: bot
458
+ - app: Neevabot
459
+ match:
460
+ regex: ".*Neevabot"
461
+ platform: bot
462
+ test:
463
+ userAgents:
464
+ - userAgent: Mozilla/5.0 (compatible; Neevabot/1.0; https://neeva.com/neevabot)
465
+ - app: Netcraft Survey Agent
466
+ match:
467
+ regex: " NetcraftSurveyAgent/"
468
+ platform: bot
469
+ test:
470
+ userAgents:
471
+ - userAgent: Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; +info@netcraft.com)
472
+ - app: OgScrper
473
+ match:
474
+ regex: OgScrper
475
+ platform: bot
476
+ test:
477
+ userAgents:
478
+ - userAgent: OgScrper
479
+ - app: OkDownload
480
+ match:
481
+ regex: "^OkDownload/"
482
+ platform: bot
483
+ - app: OP3 Fetcher
484
+ match:
485
+ regex: "^op3-fetcher/"
486
+ platform: bot
487
+ test:
488
+ userAgents:
489
+ - userAgent: op3-fetcher/1.0 (https://op3.dev)
490
+ - app: Overcast feed parser
491
+ match:
492
+ regex: "^Overcast/1\\.0 Podcast Sync"
493
+ platform: bot
494
+ test:
495
+ userAgents:
496
+ - userAgent: Overcast/1.0 Podcast Sync
497
+ - app: Pandora RSS crawler
498
+ match:
499
+ regex: "^PandoraRSSCrawler"
500
+ platform: bot
501
+ - app: PaperLi
502
+ match:
503
+ regex: PaperLiBot/
504
+ platform: bot
505
+ test:
506
+ userAgents:
507
+ - userAgent: Mozilla/5.0 (compatible; PaperLiBot/2.1; https://support.paper.li/entries/20023257-what-is-paper-li)
508
+ - app: PetalBot
509
+ match:
510
+ regex: PetalBot
511
+ platform: bot
512
+ test:
513
+ userAgents:
514
+ - userAgent: Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like
515
+ Gecko) Mobile Safari/537.36 (compatible; PetalBot; https://aspiegel.com/petalbot)
516
+ - app: Pingdom
517
+ match:
518
+ regex: "^Pingdom"
519
+ platform: bot
520
+ - app: PlayerFM Podcast Sync
521
+ match:
522
+ regex: PlayerFM/.* Podcast Sync
523
+ platform: bot
524
+ test:
525
+ userAgents:
526
+ - userAgent: PlayerFM/1.0 Podcast Sync (0 subscribers; url=https://player.fm/series/series-2567872)
527
+ - app: Podbean Importer
528
+ match:
529
+ regex: "^Podbean Importer"
530
+ platform: bot
531
+ test:
532
+ userAgents:
533
+ - userAgent: Podbean Importer
534
+ - app: Podcastindex.org
535
+ match:
536
+ regex: "^Podcastindex\\.org/"
537
+ platform: bot
538
+ test:
539
+ userAgents:
540
+ - userAgent: Podcastindex.org/v0.3.3 (Aggrivate)
541
+ - app: PodCloud
542
+ match:
543
+ regex: podCloud
544
+ platform: bot
545
+ - app: Podcorn
546
+ match:
547
+ regex: Podcorn/
548
+ platform: bot
549
+ test:
550
+ userAgents:
551
+ - userAgent: Podcorn/1.0
552
+ - app: PodderBot
553
+ match:
554
+ regex: PodderBot/
555
+ platform: bot
556
+ test:
557
+ userAgents:
558
+ - userAgent: PodderBot/1.0
559
+ - app: Podfollow
560
+ match:
561
+ regex: podfollowbot/
562
+ platform: bot
563
+ test:
564
+ userAgents:
565
+ - userAgent: Mozilla/5.0 https://podfollow.com/crawling podfollowbot/1.0
566
+ - app: Podgrab
567
+ match:
568
+ regex: "^Podgrab$"
569
+ platform: bot
570
+ test:
571
+ userAgents:
572
+ - userAgent: Podgrab
573
+ - app: Podhound
574
+ match:
575
+ regex: PodhoundBeta
576
+ platform: bot
577
+ test:
578
+ userAgents:
579
+ - userAgent: PodhoundBeta
580
+ - app: Podio Bot
581
+ match:
582
+ regex: "^Podio/"
583
+ platform: bot
584
+ test:
585
+ userAgents:
586
+ - userAgent: Podio/1.0
587
+ - app: Podnews
588
+ match:
589
+ regex: PodnewsBot
590
+ platform: bot
591
+ - app: Podnods Bot
592
+ match:
593
+ regex: "(podnods-crawler|podnods)"
594
+ platform: bot
595
+ - app: Podscribe
596
+ match:
597
+ regex: "(^Adswizz-podscribe/|^Podscribe/)"
598
+ platform: bot
599
+ test:
600
+ userAgents:
601
+ - userAgent: Adswizz-podscribe/1.0
602
+ - userAgent: Podscribe/1.1
603
+ - app: Podverse Feed Parser
604
+ match:
605
+ regex: "^Podverse/Feed Parser"
606
+ platform: bot
607
+ - app: PodvineBot
608
+ match:
609
+ regex: "^PodvineBot/"
610
+ platform: bot
611
+ test:
612
+ userAgents:
613
+ - userAgent: PodvineBot/4.0.1 (www.podvine.com)
614
+ - app: PostRank Bot
615
+ match:
616
+ regex: "^PostRank/"
617
+ platform: bot
618
+ test:
619
+ userAgents:
620
+ - userAgent: PostRank/2.0 (postrank.com; 1 subscribers)
621
+ - app: PodwatchPro
622
+ match:
623
+ regex: Podwatch-Pro Crawler
624
+ platform: bot
625
+ test:
626
+ userAgents:
627
+ - userAgent: Podwatch-Pro Crawler
628
+ - app: Puppeteer
629
+ match:
630
+ regex: " HeadlessChrome/\\d"
631
+ platform: bot
632
+ test:
633
+ userAgents:
634
+ - userAgent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36
635
+ (KHTML, like Gecko) HeadlessChrome/105.0.5173.0 Safari/537.36
636
+ - app: python-requests
637
+ match:
638
+ regex: python-requests
639
+ platform: bot
640
+ - app: RedCircle
641
+ match:
642
+ regex: RedCircle
643
+ platform: bot
644
+ test:
645
+ userAgents:
646
+ - userAgent: RedCircle
647
+ - app: Ridder Bot
648
+ match:
649
+ regex: " RidderBot/"
650
+ platform: bot
651
+ test:
652
+ userAgents:
653
+ - userAgent: Mozilla/5.0 (compatible; RidderBot/1.0; bot@ridder.co)
654
+ - app: RSS to Telegram
655
+ match:
656
+ regex: "^RSStT"
657
+ platform: bot
658
+ test:
659
+ userAgents:
660
+ - userAgent: RSStT/2.2.1 RSS Reader
661
+ - app: RSSRadio
662
+ match:
663
+ regex: "^RSSRadio \\("
664
+ platform: bot
665
+ - app: Ruby Mechanize
666
+ match:
667
+ regex: "^Mechanize|[ -]Mechanize/"
668
+ platform: bot
669
+ test:
670
+ userAgents:
671
+ - userAgent: Mozilla/5.0 (compatible; Mechanize/2.7.4)
672
+ - userAgent: WWW-Mechanize/1.72
673
+ - app: Screaming Frog SEO Spider
674
+ match:
675
+ regex: "^Screaming Frog "
676
+ platform: bot
677
+ test:
678
+ userAgents:
679
+ - userAgent: Screaming Frog SEO Spider/5.1
680
+ - app: SearchAtlas.com SEO Crawler
681
+ match:
682
+ regex: "^SearchAtlas.*Crawler"
683
+ platform: bot
684
+ test:
685
+ userAgents:
686
+ - userAgent: SearchAtlas.com SEO Crawler
687
+ - app: SEMrushBot
688
+ match:
689
+ regex: SemrushBot/|^SEMrushBot$
690
+ platform: bot
691
+ test:
692
+ userAgents:
693
+ - userAgent: Mozilla/5.0 (compatible; SemrushBot/6~bl; http://www.semrush.com/bot.html)
694
+ - userAgent: SEMrushBot
695
+ - app: Serendeputy
696
+ match:
697
+ regex: SerendeputyBot/
698
+ platform: bot
699
+ test:
700
+ userAgents:
701
+ - userAgent: SerendeputyBot/0.8.6 (http://serendeputy.com/about/serendeputy-bot)
702
+ - app: Simplecast
703
+ match:
704
+ regex: "^Simplecast$"
705
+ platform: bot
706
+ test:
707
+ userAgents:
708
+ - userAgent: Simplecast
709
+ - app: Slack Bot
710
+ match:
711
+ regex: "^Slackbot 1\\.0"
712
+ platform: bot
713
+ test:
714
+ userAgents:
715
+ - userAgent: Slackbot 1.0 ( https://api.slack.com/robots)
716
+ - app: Snapchat Bot
717
+ match:
718
+ regex: "://developers\\.snap\\.com/robots"
719
+ platform: bot
720
+ test:
721
+ userAgents:
722
+ - userAgent: Snap URL Preview Service; bot; snapchat; https://developers.snap.com/robots
723
+ - app: SoundOn Bot
724
+ match:
725
+ regex: "^SoundOn/[\\d.]+\\s+\\(bot"
726
+ platform: bot
727
+ - app: Spotify cache service
728
+ match:
729
+ regex: "^Spotify/1\\.0$"
730
+ platform: bot
731
+ test:
732
+ userAgents:
733
+ - userAgent: Spotify/1.0
734
+ - app: Stitcher Bot
735
+ match:
736
+ regex: "^StitcherBot"
737
+ platform: bot
738
+ - app: Substack Content Fetcher
739
+ match:
740
+ regex: "^SubstackContentFetch/"
741
+ platform: bot
742
+ test:
743
+ userAgents:
744
+ - userAgent: SubstackContentFetch/1.0 (https://substack.com/)
745
+ - app: Timpi search crawler
746
+ match:
747
+ regex: Timpibot/
748
+ platform: bot
749
+ test:
750
+ userAgents:
751
+ - userAgent: Timpibot/0.8 ( http://www.timpi.io)
752
+ - app: Tiny Tiny RSS
753
+ match:
754
+ regex: "^Tiny Tiny RSS/"
755
+ platform: bot
756
+ test:
757
+ userAgents:
758
+ - userAgent: Tiny Tiny RSS/22.12-c30b24d09 (Unsupported) (https://tt-rss.org/)
759
+ - app: Trendsmap Resolver
760
+ match:
761
+ regex: TrendsmapResolver/
762
+ platform: bot
763
+ - app: Twingly Bot
764
+ match:
765
+ regex: Twingly Recon;
766
+ platform: bot
767
+ test:
768
+ userAgents:
769
+ - userAgent: Mozilla/5.0 (compatible; Twingly Recon; twingly.com)
770
+ - app: Twitterbot
771
+ match:
772
+ regex: "^Twitterbot"
773
+ platform: bot
774
+ - app: Typhoeus
775
+ match:
776
+ regex: "^Typhoeus"
777
+ platform: bot
778
+ - app: UCast
779
+ match:
780
+ regex: "^UCast/"
781
+ platform: bot
782
+ test:
783
+ userAgents:
784
+ - userAgent: UCast/1.0 Podcast Sync (1 subscribers; feed-id=aHR0cHM6Ly9mZWVkcy5idXp6c3Byb3V0LmNvbS8yMDg0OTQucnNzEAEBAD32-0242-42AC-8583-21E9BAD8C544;
785
+ +http://www.ucastapp.com/)
786
+ - app: TelegramBot
787
+ match:
788
+ regex: "^TelegramBot "
789
+ platform: bot
790
+ test:
791
+ userAgents:
792
+ - userAgent: TelegramBot (like TwitterBot)
793
+ - app: Vurbl
794
+ match:
795
+ regex: VurblBot
796
+ platform: bot
797
+ test:
798
+ userAgents:
799
+ - userAgent: Mozilla/5.0 https://vurbl.com VurblBot/1.0
800
+ - app: Wget
801
+ match:
802
+ regex: Wget
803
+ platform: bot
804
+ - app: weborama
805
+ match:
806
+ regex: "^weborama-fetcher"
807
+ platform: bot
808
+ test:
809
+ userAgents:
810
+ - userAgent: weborama-fetcher (+http://www.weborama.com)
811
+ - app: Windows Crawler
812
+ match:
813
+ regex: "^ZDM/.*Windows"
814
+ platform: bot
815
+ test:
816
+ userAgents:
817
+ - userAgent: ZDM/4.0; Windows Mobile 7.0;
818
+ - app: WordPress
819
+ match:
820
+ regex: "^WordPress"
821
+ platform: bot
822
+ - app: wsrv.nl
823
+ match:
824
+ regex: https?://wsrv.nl/
825
+ platform: bot
826
+ test:
827
+ userAgents:
828
+ - userAgent: Mozilla/5.0 (compatible; ImageFetcher/9.0; +http://wsrv.nl/)
829
+ - app: YaCy
830
+ match:
831
+ regex: "^yacybot"
832
+ platform: bot
833
+ test:
834
+ userAgents:
835
+ - userAgent: yacybot (/global; amd64 Linux 5.9.8-zen1-1-zen; java 1.8.0_265; Europe/de)
836
+ http://yacy.net/bot.html
837
+ - app: Yahoo Crawler
838
+ match:
839
+ regex: Yahoo! Slurp
840
+ platform: bot
841
+ test:
842
+ userAgents:
843
+ - userAgent: Crawler Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)
844
+ - app: YandexBot
845
+ match:
846
+ regex: YandexBot/
847
+ platform: bot
848
+ - app: Zapier
849
+ match:
850
+ regex: "^Zapier$"
851
+ platform: bot
852
+ test:
853
+ userAgents:
854
+ - userAgent: Zapier
855
+ - app: Zencast
856
+ match:
857
+ regex: "^Zencastr/"
858
+ platform: bot
859
+ test:
860
+ userAgents:
861
+ - userAgent: Zencastr/2.0