pod_ident 1.0.8 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,861 @@
1
+ ---
2
+ - app: AAABot - unknown bot
3
+ match:
4
+ regex: AAABot
5
+ platform: bot
6
+ test:
7
+ userAgents:
8
+ - userAgent: AAABot
9
+ - app: AhrefsBot
10
+ match:
11
+ regex: AhrefsBot/
12
+ platform: bot
13
+ test:
14
+ userAgents:
15
+ - userAgent: Mozilla/5.0 (compatible; AhrefsBot/7.0; http://ahrefs.com/robot/)
16
+ - userAgent: Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)
17
+ - app: AirableBot
18
+ match:
19
+ regex: AirableBot-Podcast/
20
+ platform: bot
21
+ test:
22
+ userAgents:
23
+ - userAgent: AirableBot-Podcast/1.0 (+https//www.airablenow.com)
24
+ - userAgent: AirableBot-Podcast/1.0 ( https//www.airablenow.com)
25
+ - app: Alexa Flash Briefing cache
26
+ match:
27
+ regex: "^AmazonNewsContentService"
28
+ platform: bot
29
+ - app: AlignaBot
30
+ match:
31
+ regex: "^Alignabot"
32
+ platform: bot
33
+ test:
34
+ userAgents:
35
+ - userAgent: Alignabot 1.0 (https://www.alignable.com/)
36
+ - app: Amazon Music Podcasts Bot
37
+ match:
38
+ regex: "^Amazon Music Podcast"
39
+ platform: bot
40
+ - app: Anchor Importer
41
+ match:
42
+ regex: AnchorImport
43
+ platform: bot
44
+ test:
45
+ userAgents:
46
+ - userAgent: AnchorImport/1.0
47
+ - app: Apple Podcasts (Watch)
48
+ match:
49
+ regex: "^atc/|\\(null\\) watchOS/"
50
+ platform: bot
51
+ test:
52
+ userAgents:
53
+ - userAgent: atc/1.0
54
+ - userAgent: atc/1.0 watchOS/6.2 model/Watch3,3 hwp/t8004 build/17T529 (6; dt:155)
55
+ - userAgent: atc/1.0 watchOS/6.2.8 model/Watch2,3 hwp/t8002 build/17U63 (6; dt:133)
56
+ - userAgent: atc/1.0 watchOS/6.2.8 model/Watch3,3 hwp/t8004 build/17U63 (6; dt:155)
57
+ - userAgent: atc/1.0 watchOS/6.2.8 model/Watch4,2 hwp/t8006 build/17U63 (6; dt:191)
58
+ - userAgent: atc/1.0 watchOS/7.0.2 model/Watch5,10 hwp/t8006 build/18R402 (6;
59
+ dt:233)
60
+ - userAgent: atc/1.0 watchOS/7.0.2 model/Watch5,11 hwp/t8006 build/18R402 (6;
61
+ dt:234)
62
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch4,2 hwp/t8006 build/18R590 (6; dt:191)
63
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch4,3 hwp/t8006 build/18R590 (6; dt:192)
64
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch4,4 hwp/t8006 build/18R590 (6; dt:193)
65
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch5,1 hwp/t8006 build/18R590 (6; dt:201)
66
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch5,3 hwp/t8006 build/18R590 (6; dt:202)
67
+ - userAgent: atc/1.0 watchOS/7.1 model/Watch5,4 hwp/t8006 build/18R590 (6; dt:202)
68
+ - userAgent: "(null)/(null) watchOS/5.0.1 model/Watch3,3 hwp/t8004 build/16R381
69
+ (6; dt:155)"
70
+ - app: Apple Podcasts automated checks
71
+ match:
72
+ regex: "(iTMS|itunesstored)"
73
+ platform: bot
74
+ - app: Applebot
75
+ match:
76
+ regex: "^Applebot/"
77
+ platform: bot
78
+ - app: Archive.org
79
+ match:
80
+ regex: archive\.org_bot
81
+ platform: bot
82
+ test:
83
+ userAgents:
84
+ - userAgent: Mozilla/5.0 (compatible; archive.org_bot http://archive.org/details/archive.org_bot)
85
+ - app: atheerfm
86
+ match:
87
+ regex: "^atheerfm/"
88
+ platform: bot
89
+ test:
90
+ userAgents:
91
+ - userAgent: atheerfm/1 CFNetwork/758.3.15 Darwin/15.3.0
92
+ - app: Audiomack
93
+ match:
94
+ regex: "^Audiomack Podcast Processor/"
95
+ platform: bot
96
+ test:
97
+ userAgents:
98
+ - userAgent: Audiomack Podcast Processor/1.0 (https://audiomack.com/)
99
+ - app: AudioWave feed parser
100
+ match:
101
+ regex: "^AudioWaveBot/1\\.0"
102
+ platform: bot
103
+ test:
104
+ userAgents:
105
+ - userAgent: AudioWaveBot/1.0
106
+ - app: AwarioSmartBot
107
+ match:
108
+ regex: "^AwarioSmartBot/"
109
+ platform: bot
110
+ test:
111
+ userAgents:
112
+ - userAgent: AwarioSmartBot/1.0 (+https://awario.com/bots.html; bots@awario.com)
113
+ - app: Babbar
114
+ match:
115
+ regex: Barkrowler/
116
+ platform: bot
117
+ - app: Baidu
118
+ match:
119
+ regex: "\\(ce\\.baidu\\.com"
120
+ platform: bot
121
+ test:
122
+ userAgents:
123
+ - userAgent: Baidu-YunGuanCe-SLABot(ce.baidu.com)
124
+ - app: bbot
125
+ match:
126
+ regex: "^bbot/"
127
+ platform: bot
128
+ test:
129
+ userAgents:
130
+ - userAgent: bbot/0.1
131
+ - app: British Library
132
+ match:
133
+ regex: "^bl\\.uk_ldfc_bot"
134
+ platform: bot
135
+ test:
136
+ userAgents:
137
+ - userAgent: bl.uk_ldfc_bot/3.4.0-20200518 ( http://www.bl.uk/aboutus/legaldeposit/websites/websites/faqswebmaster/index.html)
138
+ - app: Blubrry Migration Service
139
+ match:
140
+ regex: "^Blubrry Migration Service"
141
+ platform: bot
142
+ test:
143
+ userAgents:
144
+ - userAgent: Blubrry Migration Service
145
+ - app: Buzzsprout Importer
146
+ match:
147
+ regex: "^Buzzsprout Importer"
148
+ platform: bot
149
+ test:
150
+ userAgents:
151
+ - userAgent: Buzzsprout Importer
152
+ - app: CastFeedValidator
153
+ match:
154
+ regex: "^CastFeedValidator/"
155
+ platform: bot
156
+ test:
157
+ userAgents:
158
+ - userAgent: CastFeedValidator/3.0.5 (https://castfeedvalidator.com)
159
+ - app: Castopod
160
+ match:
161
+ regex: Castopod/1\.0
162
+ platform: bot
163
+ test:
164
+ userAgents:
165
+ - userAgent: Castopod/1.0
166
+ - app: Chartable
167
+ match:
168
+ regex: "^Trackable/"
169
+ platform: bot
170
+ - app: Clark-Crawler, unknown
171
+ match:
172
+ regex: "^clark-crawler2"
173
+ platform: bot
174
+ - app: Critical Mention
175
+ match:
176
+ regex: "^Podcast-CriticalMention/"
177
+ platform: bot
178
+ test:
179
+ userAgents:
180
+ - userAgent: Podcast-CriticalMention/1.0
181
+ - app: curl
182
+ match:
183
+ regex: "^curl|^libcurl/|^PycURL/| curl/"
184
+ platform: bot
185
+ test:
186
+ userAgents:
187
+ - userAgent: libcurl/7.65.1
188
+ - userAgent: PycURL/7.43.0 libcurl/7.47.0 GnuTLS/3.4.10 zlib/1.2.8 libidn/1.32
189
+ librtmp/2.3
190
+ - userAgent: GuzzleHttp/6.1.0 curl/7.50.1 PHP/7.0.13-0ubuntu0.16.10.1
191
+ - app: DataforSEO
192
+ match:
193
+ regex: DataForSeoBot/
194
+ platform: bot
195
+ test:
196
+ userAgents:
197
+ - userAgent: Mozilla/5.0 (compatible; DataForSeoBot/1.0; +https://dataforseo.com/dataforseo-bot)
198
+ - app: Datagnion Bot
199
+ match:
200
+ regex: "^datagnionbot"
201
+ platform: bot
202
+ - app: dataprovider.com
203
+ match:
204
+ regex: Dataprovider\.com
205
+ platform: bot
206
+ test:
207
+ userAgents:
208
+ - userAgent: Mozilla/5.0 (compatible; Dataprovider.com)
209
+ - app: Daum
210
+ match:
211
+ regex: http://cs\.daum\.net
212
+ platform: bot
213
+ test:
214
+ userAgents:
215
+ - userAgent: Mozilla/5.0 (compatible; Daum/4.1; +http://cs.daum.net/faq/15/4118.html?faqId=28966)
216
+ - app: Deezer Podcasters
217
+ match:
218
+ regex: "^Deezer Podcasters/1\\.0"
219
+ platform: bot
220
+ - app: Digg
221
+ match:
222
+ regex: "^Digg "
223
+ platform: bot
224
+ test:
225
+ userAgents:
226
+ - userAgent: Digg Mozilla/5.0 (Digg/1.0; support@digg.com)
227
+ - app: dorada
228
+ match:
229
+ regex: support@dorada\.co\.uk
230
+ platform: bot
231
+ - app: DotBot
232
+ match:
233
+ regex: DotBot
234
+ platform: bot
235
+ test:
236
+ userAgents:
237
+ - userAgent: Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot,
238
+ help@moz.com)
239
+ - userAgent: Mozilla/5.0 (compatible; DotBot/1.2; https://opensiteexplorer.org/dotbot;
240
+ help@moz.com)
241
+ - app: Downcast Bot
242
+ match:
243
+ regex: downcast feed consumer/
244
+ platform: bot
245
+ test:
246
+ userAgents:
247
+ - userAgent: downcast feed consumer/0.0.175; (mode=dev; id=u2NgjBSPM6; downcast.fm)
248
+ - app: DuckDuckBot
249
+ match:
250
+ regex: DuckDuckBot
251
+ platform: bot
252
+ test:
253
+ userAgents:
254
+ - userAgent: Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)
255
+ - app: F-Secure Riddler
256
+ match:
257
+ regex: "^Riddler "
258
+ platform: bot
259
+ test:
260
+ userAgents:
261
+ - userAgent: Riddler (http://riddler.io/about)
262
+ - app: Facebook Bot
263
+ match:
264
+ regex: FacebookBot|f?acebookexternalhit/?|^podcastbot$|Facebot|facebookexternalua|^facebookplatform/
265
+ platform: bot
266
+ test:
267
+ userAgents:
268
+ - userAgent: facebookexternalhit/1.1 ( http://www.facebook.com/externalhit_uatext.php)
269
+ - userAgent: podcastbot
270
+ - userAgent: facebookexternalhit
271
+ - userAgent: Facebot
272
+ - userAgent: facebookexternalua
273
+ - userAgent: facebookplatform/1.0 (+http://developers.facebook.com)
274
+ - userAgent: acebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)
275
+ - app: Feedly
276
+ match:
277
+ regex: "^Feedly/"
278
+ platform: bot
279
+ test:
280
+ userAgents:
281
+ - userAgent: Feedly/1.0 (+http://www.feedly.com/fetcher.html; like FeedFetcher-Google)
282
+ - app: FlexGet
283
+ match:
284
+ regex: "^FlexGet/"
285
+ platform: bot
286
+ test:
287
+ userAgents:
288
+ - userAgent: FlexGet/3.3.4 (www.flexget.com)
289
+ - app: Flipboard Proxy
290
+ match:
291
+ regex: FlipboardProxy/
292
+ platform: bot
293
+ test:
294
+ userAgents:
295
+ - userAgent: Mozilla/5.0 (compatible; FlipboardProxy/1.1; +http://flipboard.com/browserproxy)
296
+ - app: Fyyd
297
+ match:
298
+ regex: "^fyyd-poll"
299
+ platform: bot
300
+ - app: Go Storage Gateway V1
301
+ match:
302
+ regex: "^storagegw-v1-go$"
303
+ platform: bot
304
+ test:
305
+ userAgents:
306
+ - userAgent: storagegw-v1-go
307
+ - app: Goodpods Bot
308
+ match:
309
+ regex: Goodpods/\d+\.\d+
310
+ platform: bot
311
+ test:
312
+ userAgents:
313
+ - userAgent: Goodpods/2.2
314
+ - app: Google AdsBot
315
+ match:
316
+ regex: AdsBot-Google
317
+ platform: bot
318
+ - app: Google Podcasts Manager
319
+ match:
320
+ regex: Google-Podcast
321
+ platform: bot
322
+ - app: Googlebot
323
+ match:
324
+ regex: Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google
325
+ platform: bot
326
+ test:
327
+ userAgents:
328
+ - userAgent: Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36
329
+ (KHTML, like Gecko) Chrome/86.0.4240.96 Mobile Safari/537.36 (compatible;
330
+ Googlebot/2.1; http://www.google.com/bot.html)
331
+ - userAgent: Googlebot-Image/1.0
332
+ - userAgent: Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; 1 subscribers;
333
+ feed-id=4653447469100830145)
334
+ - app: Gumball
335
+ match:
336
+ regex: "^Gumball"
337
+ platform: bot
338
+ test:
339
+ userAgents:
340
+ - userAgent: Gumball.fm Analytics Prefix Checker
341
+ - app: Headliner
342
+ match:
343
+ regex: Headliner/
344
+ platform: bot
345
+ test:
346
+ userAgents:
347
+ - userAgent: Headliner/1.0.0 +https://headliner.app
348
+ - app: HubSpot Crawler
349
+ match:
350
+ regex: HubSpot Crawler
351
+ platform: bot
352
+ test:
353
+ userAgents:
354
+ - userAgent: Mozilla/5.0 (compatible; HubSpot Crawler; +https://www.hubspot.com)
355
+ - app: Internet Archive
356
+ match:
357
+ regex: Archive-It;|web\.archive\.org
358
+ platform: bot
359
+ test:
360
+ userAgents:
361
+ - userAgent: Mozilla/5.0 (compatible; special_archiver; Archive-It; http://archive-it.org/files/site-owners-special.html)
362
+ - userAgent: ia_archiver-web.archive.org
363
+ - app: Jaunt
364
+ match:
365
+ regex: "^Jaunt/"
366
+ platform: bot
367
+ test:
368
+ userAgents:
369
+ - userAgent: Jaunt/1.5
370
+ - app: l'Institut national de l'audiovisuel
371
+ match:
372
+ regex: INA dlweb
373
+ platform: bot
374
+ - app: Libsyn
375
+ match:
376
+ regex: "^Libsyn4"
377
+ platform: bot
378
+ test:
379
+ userAgents:
380
+ - userAgent: Libsyn4-Download
381
+ - app: libwww-perl
382
+ match:
383
+ regex: "^libwww-perl| libwww-perl"
384
+ platform: bot
385
+ test:
386
+ userAgents:
387
+ - userAgent: LWP::Simple/6.34 libwww-perl/6.34
388
+ - app: Livelap Crawler
389
+ match:
390
+ regex: LivelapBot
391
+ platform: bot
392
+ - app: LTX71
393
+ match:
394
+ regex: "^ltx71 "
395
+ platform: bot
396
+ test:
397
+ userAgents:
398
+ - userAgent: ltx71 - (http://ltx71.com/)
399
+ - app: MauiBot
400
+ match:
401
+ regex: "^MauiBot"
402
+ platform: bot
403
+ test:
404
+ userAgents:
405
+ - userAgent: MauiBot (crawler.feedback dc@gmail.com)
406
+ - app: Mastodon Bot
407
+ match:
408
+ regex: rb/.*Mastodon/
409
+ platform: bot
410
+ test:
411
+ userAgents:
412
+ - userAgent: http.rb/5.0.4 (Mastodon/3.5.3; +https://mastodon.xyz/) Bot
413
+ - app: Microsoft Bingbot
414
+ match:
415
+ regex: "(BingPreview/|adidxbot/|[bB]ingbot/)"
416
+ platform: bot
417
+ test:
418
+ userAgents:
419
+ - userAgent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534 (KHTML, like
420
+ Gecko) BingPreview/1.0b
421
+ - userAgent: Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)
422
+ - app: Microsoft Office
423
+ match:
424
+ regex: ms-office; MSOffice
425
+ platform: bot
426
+ test:
427
+ userAgents:
428
+ - userAgent: Mozilla/4.0 (compatible; ms-office; MSOffice 16)
429
+ - app: Mixcloud Podcast Importer
430
+ match:
431
+ regex: "^MixcloudPodcastImporter/"
432
+ platform: bot
433
+ test:
434
+ userAgents:
435
+ - userAgent: MixcloudPodcastImporter/2.0
436
+ - app: MJ12bot
437
+ match:
438
+ regex: ".*MJ12bot"
439
+ platform: bot
440
+ test:
441
+ userAgents:
442
+ - userAgent: Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)
443
+ - app: Mozilla Bot
444
+ match:
445
+ regex: "^'?Mozilla(/5\\.0(\\.\\.\\.)?)?$|^\\(Mozilla/5\\.0\\)$"
446
+ platform: bot
447
+ test:
448
+ userAgents:
449
+ - userAgent: Mozilla/5.0
450
+ - userAgent: Mozilla
451
+ - userAgent: Mozilla/5.0...
452
+ - userAgent: "'Mozilla/5.0"
453
+ - userAgent: "(Mozilla/5.0)"
454
+ - app: MSN Bot
455
+ match:
456
+ regex: "^msnbot/"
457
+ platform: bot
458
+ - app: Neevabot
459
+ match:
460
+ regex: ".*Neevabot"
461
+ platform: bot
462
+ test:
463
+ userAgents:
464
+ - userAgent: Mozilla/5.0 (compatible; Neevabot/1.0; https://neeva.com/neevabot)
465
+ - app: Netcraft Survey Agent
466
+ match:
467
+ regex: " NetcraftSurveyAgent/"
468
+ platform: bot
469
+ test:
470
+ userAgents:
471
+ - userAgent: Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; +info@netcraft.com)
472
+ - app: OgScrper
473
+ match:
474
+ regex: OgScrper
475
+ platform: bot
476
+ test:
477
+ userAgents:
478
+ - userAgent: OgScrper
479
+ - app: OkDownload
480
+ match:
481
+ regex: "^OkDownload/"
482
+ platform: bot
483
+ - app: OP3 Fetcher
484
+ match:
485
+ regex: "^op3-fetcher/"
486
+ platform: bot
487
+ test:
488
+ userAgents:
489
+ - userAgent: op3-fetcher/1.0 (https://op3.dev)
490
+ - app: Overcast feed parser
491
+ match:
492
+ regex: "^Overcast/1\\.0 Podcast Sync"
493
+ platform: bot
494
+ test:
495
+ userAgents:
496
+ - userAgent: Overcast/1.0 Podcast Sync
497
+ - app: Pandora RSS crawler
498
+ match:
499
+ regex: "^PandoraRSSCrawler"
500
+ platform: bot
501
+ - app: PaperLi
502
+ match:
503
+ regex: PaperLiBot/
504
+ platform: bot
505
+ test:
506
+ userAgents:
507
+ - userAgent: Mozilla/5.0 (compatible; PaperLiBot/2.1; https://support.paper.li/entries/20023257-what-is-paper-li)
508
+ - app: PetalBot
509
+ match:
510
+ regex: PetalBot
511
+ platform: bot
512
+ test:
513
+ userAgents:
514
+ - userAgent: Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like
515
+ Gecko) Mobile Safari/537.36 (compatible; PetalBot; https://aspiegel.com/petalbot)
516
+ - app: Pingdom
517
+ match:
518
+ regex: "^Pingdom"
519
+ platform: bot
520
+ - app: PlayerFM Podcast Sync
521
+ match:
522
+ regex: PlayerFM/.* Podcast Sync
523
+ platform: bot
524
+ test:
525
+ userAgents:
526
+ - userAgent: PlayerFM/1.0 Podcast Sync (0 subscribers; url=https://player.fm/series/series-2567872)
527
+ - app: Podbean Importer
528
+ match:
529
+ regex: "^Podbean Importer"
530
+ platform: bot
531
+ test:
532
+ userAgents:
533
+ - userAgent: Podbean Importer
534
+ - app: Podcastindex.org
535
+ match:
536
+ regex: "^Podcastindex\\.org/"
537
+ platform: bot
538
+ test:
539
+ userAgents:
540
+ - userAgent: Podcastindex.org/v0.3.3 (Aggrivate)
541
+ - app: PodCloud
542
+ match:
543
+ regex: podCloud
544
+ platform: bot
545
+ - app: Podcorn
546
+ match:
547
+ regex: Podcorn/
548
+ platform: bot
549
+ test:
550
+ userAgents:
551
+ - userAgent: Podcorn/1.0
552
+ - app: PodderBot
553
+ match:
554
+ regex: PodderBot/
555
+ platform: bot
556
+ test:
557
+ userAgents:
558
+ - userAgent: PodderBot/1.0
559
+ - app: Podfollow
560
+ match:
561
+ regex: podfollowbot/
562
+ platform: bot
563
+ test:
564
+ userAgents:
565
+ - userAgent: Mozilla/5.0 https://podfollow.com/crawling podfollowbot/1.0
566
+ - app: Podgrab
567
+ match:
568
+ regex: "^Podgrab$"
569
+ platform: bot
570
+ test:
571
+ userAgents:
572
+ - userAgent: Podgrab
573
+ - app: Podhound
574
+ match:
575
+ regex: PodhoundBeta
576
+ platform: bot
577
+ test:
578
+ userAgents:
579
+ - userAgent: PodhoundBeta
580
+ - app: Podio Bot
581
+ match:
582
+ regex: "^Podio/"
583
+ platform: bot
584
+ test:
585
+ userAgents:
586
+ - userAgent: Podio/1.0
587
+ - app: Podnews
588
+ match:
589
+ regex: PodnewsBot
590
+ platform: bot
591
+ - app: Podnods Bot
592
+ match:
593
+ regex: "(podnods-crawler|podnods)"
594
+ platform: bot
595
+ - app: Podscribe
596
+ match:
597
+ regex: "(^Adswizz-podscribe/|^Podscribe/)"
598
+ platform: bot
599
+ test:
600
+ userAgents:
601
+ - userAgent: Adswizz-podscribe/1.0
602
+ - userAgent: Podscribe/1.1
603
+ - app: Podverse Feed Parser
604
+ match:
605
+ regex: "^Podverse/Feed Parser"
606
+ platform: bot
607
+ - app: PodvineBot
608
+ match:
609
+ regex: "^PodvineBot/"
610
+ platform: bot
611
+ test:
612
+ userAgents:
613
+ - userAgent: PodvineBot/4.0.1 (www.podvine.com)
614
+ - app: PostRank Bot
615
+ match:
616
+ regex: "^PostRank/"
617
+ platform: bot
618
+ test:
619
+ userAgents:
620
+ - userAgent: PostRank/2.0 (postrank.com; 1 subscribers)
621
+ - app: PodwatchPro
622
+ match:
623
+ regex: Podwatch-Pro Crawler
624
+ platform: bot
625
+ test:
626
+ userAgents:
627
+ - userAgent: Podwatch-Pro Crawler
628
+ - app: Puppeteer
629
+ match:
630
+ regex: " HeadlessChrome/\\d"
631
+ platform: bot
632
+ test:
633
+ userAgents:
634
+ - userAgent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36
635
+ (KHTML, like Gecko) HeadlessChrome/105.0.5173.0 Safari/537.36
636
+ - app: python-requests
637
+ match:
638
+ regex: python-requests
639
+ platform: bot
640
+ - app: RedCircle
641
+ match:
642
+ regex: RedCircle
643
+ platform: bot
644
+ test:
645
+ userAgents:
646
+ - userAgent: RedCircle
647
+ - app: Ridder Bot
648
+ match:
649
+ regex: " RidderBot/"
650
+ platform: bot
651
+ test:
652
+ userAgents:
653
+ - userAgent: Mozilla/5.0 (compatible; RidderBot/1.0; bot@ridder.co)
654
+ - app: RSS to Telegram
655
+ match:
656
+ regex: "^RSStT"
657
+ platform: bot
658
+ test:
659
+ userAgents:
660
+ - userAgent: RSStT/2.2.1 RSS Reader
661
+ - app: RSSRadio
662
+ match:
663
+ regex: "^RSSRadio \\("
664
+ platform: bot
665
+ - app: Ruby Mechanize
666
+ match:
667
+ regex: "^Mechanize|[ -]Mechanize/"
668
+ platform: bot
669
+ test:
670
+ userAgents:
671
+ - userAgent: Mozilla/5.0 (compatible; Mechanize/2.7.4)
672
+ - userAgent: WWW-Mechanize/1.72
673
+ - app: Screaming Frog SEO Spider
674
+ match:
675
+ regex: "^Screaming Frog "
676
+ platform: bot
677
+ test:
678
+ userAgents:
679
+ - userAgent: Screaming Frog SEO Spider/5.1
680
+ - app: SearchAtlas.com SEO Crawler
681
+ match:
682
+ regex: "^SearchAtlas.*Crawler"
683
+ platform: bot
684
+ test:
685
+ userAgents:
686
+ - userAgent: SearchAtlas.com SEO Crawler
687
+ - app: SEMrushBot
688
+ match:
689
+ regex: SemrushBot/|^SEMrushBot$
690
+ platform: bot
691
+ test:
692
+ userAgents:
693
+ - userAgent: Mozilla/5.0 (compatible; SemrushBot/6~bl; http://www.semrush.com/bot.html)
694
+ - userAgent: SEMrushBot
695
+ - app: Serendeputy
696
+ match:
697
+ regex: SerendeputyBot/
698
+ platform: bot
699
+ test:
700
+ userAgents:
701
+ - userAgent: SerendeputyBot/0.8.6 (http://serendeputy.com/about/serendeputy-bot)
702
+ - app: Simplecast
703
+ match:
704
+ regex: "^Simplecast$"
705
+ platform: bot
706
+ test:
707
+ userAgents:
708
+ - userAgent: Simplecast
709
+ - app: Slack Bot
710
+ match:
711
+ regex: "^Slackbot 1\\.0"
712
+ platform: bot
713
+ test:
714
+ userAgents:
715
+ - userAgent: Slackbot 1.0 ( https://api.slack.com/robots)
716
+ - app: Snapchat Bot
717
+ match:
718
+ regex: "://developers\\.snap\\.com/robots"
719
+ platform: bot
720
+ test:
721
+ userAgents:
722
+ - userAgent: Snap URL Preview Service; bot; snapchat; https://developers.snap.com/robots
723
+ - app: SoundOn Bot
724
+ match:
725
+ regex: "^SoundOn/[\\d.]+\\s+\\(bot"
726
+ platform: bot
727
+ - app: Spotify cache service
728
+ match:
729
+ regex: "^Spotify/1\\.0$"
730
+ platform: bot
731
+ test:
732
+ userAgents:
733
+ - userAgent: Spotify/1.0
734
+ - app: Stitcher Bot
735
+ match:
736
+ regex: "^StitcherBot"
737
+ platform: bot
738
+ - app: Substack Content Fetcher
739
+ match:
740
+ regex: "^SubstackContentFetch/"
741
+ platform: bot
742
+ test:
743
+ userAgents:
744
+ - userAgent: SubstackContentFetch/1.0 (https://substack.com/)
745
+ - app: Timpi search crawler
746
+ match:
747
+ regex: Timpibot/
748
+ platform: bot
749
+ test:
750
+ userAgents:
751
+ - userAgent: Timpibot/0.8 ( http://www.timpi.io)
752
+ - app: Tiny Tiny RSS
753
+ match:
754
+ regex: "^Tiny Tiny RSS/"
755
+ platform: bot
756
+ test:
757
+ userAgents:
758
+ - userAgent: Tiny Tiny RSS/22.12-c30b24d09 (Unsupported) (https://tt-rss.org/)
759
+ - app: Trendsmap Resolver
760
+ match:
761
+ regex: TrendsmapResolver/
762
+ platform: bot
763
+ - app: Twingly Bot
764
+ match:
765
+ regex: Twingly Recon;
766
+ platform: bot
767
+ test:
768
+ userAgents:
769
+ - userAgent: Mozilla/5.0 (compatible; Twingly Recon; twingly.com)
770
+ - app: Twitterbot
771
+ match:
772
+ regex: "^Twitterbot"
773
+ platform: bot
774
+ - app: Typhoeus
775
+ match:
776
+ regex: "^Typhoeus"
777
+ platform: bot
778
+ - app: UCast
779
+ match:
780
+ regex: "^UCast/"
781
+ platform: bot
782
+ test:
783
+ userAgents:
784
+ - userAgent: UCast/1.0 Podcast Sync (1 subscribers; feed-id=aHR0cHM6Ly9mZWVkcy5idXp6c3Byb3V0LmNvbS8yMDg0OTQucnNzEAEBAD32-0242-42AC-8583-21E9BAD8C544;
785
+ +http://www.ucastapp.com/)
786
+ - app: TelegramBot
787
+ match:
788
+ regex: "^TelegramBot "
789
+ platform: bot
790
+ test:
791
+ userAgents:
792
+ - userAgent: TelegramBot (like TwitterBot)
793
+ - app: Vurbl
794
+ match:
795
+ regex: VurblBot
796
+ platform: bot
797
+ test:
798
+ userAgents:
799
+ - userAgent: Mozilla/5.0 https://vurbl.com VurblBot/1.0
800
+ - app: Wget
801
+ match:
802
+ regex: Wget
803
+ platform: bot
804
+ - app: weborama
805
+ match:
806
+ regex: "^weborama-fetcher"
807
+ platform: bot
808
+ test:
809
+ userAgents:
810
+ - userAgent: weborama-fetcher (+http://www.weborama.com)
811
+ - app: Windows Crawler
812
+ match:
813
+ regex: "^ZDM/.*Windows"
814
+ platform: bot
815
+ test:
816
+ userAgents:
817
+ - userAgent: ZDM/4.0; Windows Mobile 7.0;
818
+ - app: WordPress
819
+ match:
820
+ regex: "^WordPress"
821
+ platform: bot
822
+ - app: wsrv.nl
823
+ match:
824
+ regex: https?://wsrv.nl/
825
+ platform: bot
826
+ test:
827
+ userAgents:
828
+ - userAgent: Mozilla/5.0 (compatible; ImageFetcher/9.0; +http://wsrv.nl/)
829
+ - app: YaCy
830
+ match:
831
+ regex: "^yacybot"
832
+ platform: bot
833
+ test:
834
+ userAgents:
835
+ - userAgent: yacybot (/global; amd64 Linux 5.9.8-zen1-1-zen; java 1.8.0_265; Europe/de)
836
+ http://yacy.net/bot.html
837
+ - app: Yahoo Crawler
838
+ match:
839
+ regex: Yahoo! Slurp
840
+ platform: bot
841
+ test:
842
+ userAgents:
843
+ - userAgent: Crawler Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)
844
+ - app: YandexBot
845
+ match:
846
+ regex: YandexBot/
847
+ platform: bot
848
+ - app: Zapier
849
+ match:
850
+ regex: "^Zapier$"
851
+ platform: bot
852
+ test:
853
+ userAgents:
854
+ - userAgent: Zapier
855
+ - app: Zencast
856
+ match:
857
+ regex: "^Zencastr/"
858
+ platform: bot
859
+ test:
860
+ userAgents:
861
+ - userAgent: Zencastr/2.0