pod_ident 1.0.7 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/.ruby-version +1 -1
- data/Gemfile.lock +3 -3
- data/README.md +21 -0
- data/Rakefile +58 -3
- data/lib/detection_rules.yml +25 -3
- data/lib/detection_rules_bots.yml +832 -0
- data/lib/pod_ident/detection_result.rb +4 -0
- data/lib/pod_ident/detection_rules.rb +1 -1
- data/lib/pod_ident/detection_rules_bots.rb +3 -0
- data/lib/pod_ident/rule_parser.rb +32 -1
- data/lib/pod_ident/version.rb +1 -1
- data/lib/pod_ident.rb +20 -2
- data/pod_ident.gemspec +1 -1
- metadata +7 -5
@@ -0,0 +1,832 @@
|
|
1
|
+
---
|
2
|
+
- app: AAABot - unknown bot
|
3
|
+
match:
|
4
|
+
regex: AAABot
|
5
|
+
platform: bot
|
6
|
+
test:
|
7
|
+
userAgents:
|
8
|
+
- userAgent: AAABot
|
9
|
+
- app: AhrefsBot
|
10
|
+
match:
|
11
|
+
regex: AhrefsBot/
|
12
|
+
platform: bot
|
13
|
+
test:
|
14
|
+
userAgents:
|
15
|
+
- userAgent: Mozilla/5.0 (compatible; AhrefsBot/7.0; http://ahrefs.com/robot/)
|
16
|
+
- userAgent: Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)
|
17
|
+
- app: AirableBot
|
18
|
+
match:
|
19
|
+
regex: AirableBot-Podcast/
|
20
|
+
platform: bot
|
21
|
+
test:
|
22
|
+
userAgents:
|
23
|
+
- userAgent: AirableBot-Podcast/1.0 (+https//www.airablenow.com)
|
24
|
+
- userAgent: AirableBot-Podcast/1.0 ( https//www.airablenow.com)
|
25
|
+
- app: Alexa Flash Briefing cache
|
26
|
+
match:
|
27
|
+
regex: "^AmazonNewsContentService"
|
28
|
+
platform: bot
|
29
|
+
- app: AlignaBot
|
30
|
+
match:
|
31
|
+
regex: "^Alignabot"
|
32
|
+
platform: bot
|
33
|
+
test:
|
34
|
+
userAgents:
|
35
|
+
- userAgent: Alignabot 1.0 (https://www.alignable.com/)
|
36
|
+
- app: Amazon Music Podcasts Bot
|
37
|
+
match:
|
38
|
+
regex: "^Amazon Music Podcast"
|
39
|
+
platform: bot
|
40
|
+
- app: Anchor Importer
|
41
|
+
match:
|
42
|
+
regex: AnchorImport
|
43
|
+
platform: bot
|
44
|
+
test:
|
45
|
+
userAgents:
|
46
|
+
- userAgent: AnchorImport/1.0
|
47
|
+
- app: Apple Podcasts (Watch)
|
48
|
+
match:
|
49
|
+
regex: "^atc/|\\(null\\) watchOS/"
|
50
|
+
platform: bot
|
51
|
+
test:
|
52
|
+
userAgents:
|
53
|
+
- userAgent: atc/1.0
|
54
|
+
- userAgent: atc/1.0 watchOS/6.2 model/Watch3,3 hwp/t8004 build/17T529 (6; dt:155)
|
55
|
+
- userAgent: atc/1.0 watchOS/6.2.8 model/Watch2,3 hwp/t8002 build/17U63 (6; dt:133)
|
56
|
+
- userAgent: atc/1.0 watchOS/6.2.8 model/Watch3,3 hwp/t8004 build/17U63 (6; dt:155)
|
57
|
+
- userAgent: atc/1.0 watchOS/6.2.8 model/Watch4,2 hwp/t8006 build/17U63 (6; dt:191)
|
58
|
+
- userAgent: atc/1.0 watchOS/7.0.2 model/Watch5,10 hwp/t8006 build/18R402 (6;
|
59
|
+
dt:233)
|
60
|
+
- userAgent: atc/1.0 watchOS/7.0.2 model/Watch5,11 hwp/t8006 build/18R402 (6;
|
61
|
+
dt:234)
|
62
|
+
- userAgent: atc/1.0 watchOS/7.1 model/Watch4,2 hwp/t8006 build/18R590 (6; dt:191)
|
63
|
+
- userAgent: atc/1.0 watchOS/7.1 model/Watch4,3 hwp/t8006 build/18R590 (6; dt:192)
|
64
|
+
- userAgent: atc/1.0 watchOS/7.1 model/Watch4,4 hwp/t8006 build/18R590 (6; dt:193)
|
65
|
+
- userAgent: atc/1.0 watchOS/7.1 model/Watch5,1 hwp/t8006 build/18R590 (6; dt:201)
|
66
|
+
- userAgent: atc/1.0 watchOS/7.1 model/Watch5,3 hwp/t8006 build/18R590 (6; dt:202)
|
67
|
+
- userAgent: atc/1.0 watchOS/7.1 model/Watch5,4 hwp/t8006 build/18R590 (6; dt:202)
|
68
|
+
- userAgent: "(null)/(null) watchOS/5.0.1 model/Watch3,3 hwp/t8004 build/16R381 (6; dt:155)"
|
69
|
+
- app: Apple Podcasts automated checks
|
70
|
+
match:
|
71
|
+
regex: "(iTMS|itunesstored)"
|
72
|
+
platform: bot
|
73
|
+
- app: Applebot
|
74
|
+
match:
|
75
|
+
regex: "^Applebot/"
|
76
|
+
platform: bot
|
77
|
+
- app: Archive.org
|
78
|
+
match:
|
79
|
+
regex: archive\.org_bot
|
80
|
+
platform: bot
|
81
|
+
test:
|
82
|
+
userAgents:
|
83
|
+
- userAgent: Mozilla/5.0 (compatible; archive.org_bot http://archive.org/details/archive.org_bot)
|
84
|
+
- app: atheerfm
|
85
|
+
match:
|
86
|
+
regex: "^atheerfm/"
|
87
|
+
platform: bot
|
88
|
+
test:
|
89
|
+
userAgents:
|
90
|
+
- userAgent: atheerfm/1 CFNetwork/758.3.15 Darwin/15.3.0
|
91
|
+
- app: Audiomack
|
92
|
+
match:
|
93
|
+
regex: "^Audiomack Podcast Processor/"
|
94
|
+
platform: bot
|
95
|
+
test:
|
96
|
+
userAgents:
|
97
|
+
- userAgent: Audiomack Podcast Processor/1.0 (https://audiomack.com/)
|
98
|
+
- app: AudioWave feed parser
|
99
|
+
match:
|
100
|
+
regex: "^AudioWaveBot/1\\.0"
|
101
|
+
platform: bot
|
102
|
+
test:
|
103
|
+
userAgents:
|
104
|
+
- userAgent: AudioWaveBot/1.0
|
105
|
+
- app: AwarioSmartBot
|
106
|
+
match:
|
107
|
+
regex: "^AwarioSmartBot/"
|
108
|
+
platform: bot
|
109
|
+
test:
|
110
|
+
userAgents:
|
111
|
+
- userAgent: AwarioSmartBot/1.0 (+https://awario.com/bots.html; bots@awario.com)
|
112
|
+
- app: Babbar
|
113
|
+
match:
|
114
|
+
regex: Barkrowler/
|
115
|
+
platform: bot
|
116
|
+
- app: Baidu
|
117
|
+
match:
|
118
|
+
regex: "\\(ce\\.baidu\\.com"
|
119
|
+
platform: bot
|
120
|
+
test:
|
121
|
+
userAgents:
|
122
|
+
- userAgent: Baidu-YunGuanCe-SLABot(ce.baidu.com)
|
123
|
+
- app: bbot
|
124
|
+
match:
|
125
|
+
regex: "^bbot/"
|
126
|
+
platform: bot
|
127
|
+
test:
|
128
|
+
userAgents:
|
129
|
+
- userAgent: bbot/0.1
|
130
|
+
- app: British Library
|
131
|
+
match:
|
132
|
+
regex: "^bl\\.uk_ldfc_bot"
|
133
|
+
platform: bot
|
134
|
+
test:
|
135
|
+
userAgents:
|
136
|
+
- userAgent: bl.uk_ldfc_bot/3.4.0-20200518 ( http://www.bl.uk/aboutus/legaldeposit/websites/websites/faqswebmaster/index.html)
|
137
|
+
- app: CastFeedValidator
|
138
|
+
match:
|
139
|
+
regex: "^CastFeedValidator/"
|
140
|
+
platform: bot
|
141
|
+
test:
|
142
|
+
userAgents:
|
143
|
+
- userAgent: CastFeedValidator/3.0.5 (https://castfeedvalidator.com)
|
144
|
+
- app: Castopod
|
145
|
+
match:
|
146
|
+
regex: Castopod/1\.0
|
147
|
+
platform: bot
|
148
|
+
test:
|
149
|
+
userAgents:
|
150
|
+
- userAgent: Castopod/1.0
|
151
|
+
- app: Chartable
|
152
|
+
match:
|
153
|
+
regex: "^Trackable/"
|
154
|
+
platform: bot
|
155
|
+
- app: Clark-Crawler, unknown
|
156
|
+
match:
|
157
|
+
regex: "^clark-crawler2"
|
158
|
+
platform: bot
|
159
|
+
- app: Critical Mention
|
160
|
+
match:
|
161
|
+
regex: "^Podcast-CriticalMention/"
|
162
|
+
platform: bot
|
163
|
+
test:
|
164
|
+
userAgents:
|
165
|
+
- userAgent: Podcast-CriticalMention/1.0
|
166
|
+
- app: curl
|
167
|
+
match:
|
168
|
+
regex: "^curl|^libcurl/|^PycURL/| curl/"
|
169
|
+
platform: bot
|
170
|
+
test:
|
171
|
+
userAgents:
|
172
|
+
- userAgent: libcurl/7.65.1
|
173
|
+
- userAgent: PycURL/7.43.0 libcurl/7.47.0 GnuTLS/3.4.10 zlib/1.2.8 libidn/1.32
|
174
|
+
librtmp/2.3
|
175
|
+
- userAgent: GuzzleHttp/6.1.0 curl/7.50.1 PHP/7.0.13-0ubuntu0.16.10.1
|
176
|
+
- app: DataforSEO
|
177
|
+
match:
|
178
|
+
regex: DataForSeoBot/
|
179
|
+
platform: bot
|
180
|
+
test:
|
181
|
+
userAgents:
|
182
|
+
- userAgent: Mozilla/5.0 (compatible; DataForSeoBot/1.0; +https://dataforseo.com/dataforseo-bot)
|
183
|
+
- app: Datagnion Bot
|
184
|
+
match:
|
185
|
+
regex: "^datagnionbot"
|
186
|
+
platform: bot
|
187
|
+
- app: Daum
|
188
|
+
match:
|
189
|
+
regex: http://cs\.daum\.net
|
190
|
+
platform: bot
|
191
|
+
test:
|
192
|
+
userAgents:
|
193
|
+
- userAgent: Mozilla/5.0 (compatible; Daum/4.1; +http://cs.daum.net/faq/15/4118.html?faqId=28966)
|
194
|
+
- app: Deezer Podcasters
|
195
|
+
match:
|
196
|
+
regex: "^Deezer Podcasters/1\\.0"
|
197
|
+
platform: bot
|
198
|
+
- app: Digg
|
199
|
+
match:
|
200
|
+
regex: "^Digg "
|
201
|
+
platform: bot
|
202
|
+
test:
|
203
|
+
userAgents:
|
204
|
+
- userAgent: Digg Mozilla/5.0 (Digg/1.0; support@digg.com)
|
205
|
+
- app: dorada
|
206
|
+
match:
|
207
|
+
regex: support@dorada\.co\.uk
|
208
|
+
platform: bot
|
209
|
+
- app: DotBot
|
210
|
+
match:
|
211
|
+
regex: DotBot
|
212
|
+
platform: bot
|
213
|
+
test:
|
214
|
+
userAgents:
|
215
|
+
- userAgent: Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot,
|
216
|
+
help@moz.com)
|
217
|
+
- userAgent: Mozilla/5.0 (compatible; DotBot/1.2; https://opensiteexplorer.org/dotbot;
|
218
|
+
help@moz.com)
|
219
|
+
- app: Downcast Bot
|
220
|
+
match:
|
221
|
+
regex: downcast feed consumer/
|
222
|
+
platform: bot
|
223
|
+
test:
|
224
|
+
userAgents:
|
225
|
+
- userAgent: downcast feed consumer/0.0.175; (mode=dev; id=u2NgjBSPM6; downcast.fm)
|
226
|
+
- app: DuckDuckBot
|
227
|
+
match:
|
228
|
+
regex: DuckDuckBot
|
229
|
+
platform: bot
|
230
|
+
test:
|
231
|
+
userAgents:
|
232
|
+
- userAgent: Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)
|
233
|
+
- app: F-Secure Riddler
|
234
|
+
match:
|
235
|
+
regex: "^Riddler "
|
236
|
+
platform: bot
|
237
|
+
test:
|
238
|
+
userAgents:
|
239
|
+
- userAgent: Riddler (http://riddler.io/about)
|
240
|
+
- app: Facebook Bot
|
241
|
+
match:
|
242
|
+
regex: FacebookBot|f?acebookexternalhit/?|^podcastbot$|Facebot|facebookexternalua|^facebookplatform/
|
243
|
+
platform: bot
|
244
|
+
test:
|
245
|
+
userAgents:
|
246
|
+
- userAgent: facebookexternalhit/1.1 ( http://www.facebook.com/externalhit_uatext.php)
|
247
|
+
- userAgent: podcastbot
|
248
|
+
- userAgent: facebookexternalhit
|
249
|
+
- userAgent: Facebot
|
250
|
+
- userAgent: facebookexternalua
|
251
|
+
- userAgent: facebookplatform/1.0 (+http://developers.facebook.com)
|
252
|
+
- userAgent: acebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)
|
253
|
+
- app: Feedly
|
254
|
+
match:
|
255
|
+
regex: "^Feedly/"
|
256
|
+
platform: bot
|
257
|
+
test:
|
258
|
+
userAgents:
|
259
|
+
- userAgent: Feedly/1.0 (+http://www.feedly.com/fetcher.html; like FeedFetcher-Google)
|
260
|
+
- app: FlexGet
|
261
|
+
match:
|
262
|
+
regex: "^FlexGet/"
|
263
|
+
platform: bot
|
264
|
+
test:
|
265
|
+
userAgents:
|
266
|
+
- userAgent: FlexGet/3.3.4 (www.flexget.com)
|
267
|
+
- app: Flipboard Proxy
|
268
|
+
match:
|
269
|
+
regex: FlipboardProxy/
|
270
|
+
platform: bot
|
271
|
+
test:
|
272
|
+
userAgents:
|
273
|
+
- userAgent: Mozilla/5.0 (compatible; FlipboardProxy/1.1; +http://flipboard.com/browserproxy)
|
274
|
+
- app: Fyyd
|
275
|
+
match:
|
276
|
+
regex: "^fyyd-poll"
|
277
|
+
platform: bot
|
278
|
+
- app: Go Storage Gateway V1
|
279
|
+
match:
|
280
|
+
regex: "^storagegw-v1-go$"
|
281
|
+
platform: bot
|
282
|
+
test:
|
283
|
+
userAgents:
|
284
|
+
- userAgent: storagegw-v1-go
|
285
|
+
- app: Goodpods Bot
|
286
|
+
match:
|
287
|
+
regex: Goodpods/\d+\.\d+
|
288
|
+
platform: bot
|
289
|
+
test:
|
290
|
+
userAgents:
|
291
|
+
- userAgent: Goodpods/2.2
|
292
|
+
- app: Google AdsBot
|
293
|
+
match:
|
294
|
+
regex: AdsBot-Google
|
295
|
+
platform: bot
|
296
|
+
- app: Google Podcasts Manager
|
297
|
+
match:
|
298
|
+
regex: Google-Podcast
|
299
|
+
platform: bot
|
300
|
+
- app: Googlebot
|
301
|
+
match:
|
302
|
+
regex: Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google
|
303
|
+
platform: bot
|
304
|
+
test:
|
305
|
+
userAgents:
|
306
|
+
- userAgent: Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36
|
307
|
+
(KHTML, like Gecko) Chrome/86.0.4240.96 Mobile Safari/537.36 (compatible;
|
308
|
+
Googlebot/2.1; http://www.google.com/bot.html)
|
309
|
+
- userAgent: Googlebot-Image/1.0
|
310
|
+
- userAgent: Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; 1 subscribers;
|
311
|
+
feed-id=4653447469100830145)
|
312
|
+
- app: Gumball
|
313
|
+
match:
|
314
|
+
regex: "^Gumball"
|
315
|
+
platform: bot
|
316
|
+
test:
|
317
|
+
userAgents:
|
318
|
+
- userAgent: Gumball.fm Analytics Prefix Checker
|
319
|
+
- app: Headliner
|
320
|
+
match:
|
321
|
+
regex: Headliner/
|
322
|
+
platform: bot
|
323
|
+
test:
|
324
|
+
userAgents:
|
325
|
+
- userAgent: Headliner/1.0.0 +https://headliner.app
|
326
|
+
- app: HubSpot Crawler
|
327
|
+
match:
|
328
|
+
regex: HubSpot Crawler
|
329
|
+
platform: bot
|
330
|
+
test:
|
331
|
+
userAgents:
|
332
|
+
- userAgent: Mozilla/5.0 (compatible; HubSpot Crawler; +https://www.hubspot.com)
|
333
|
+
- app: Internet Archive
|
334
|
+
match:
|
335
|
+
regex: Archive-It;|web\.archive\.org
|
336
|
+
platform: bot
|
337
|
+
test:
|
338
|
+
userAgents:
|
339
|
+
- userAgent: Mozilla/5.0 (compatible; special_archiver; Archive-It; http://archive-it.org/files/site-owners-special.html)
|
340
|
+
- userAgent: ia_archiver-web.archive.org
|
341
|
+
- app: Jaunt
|
342
|
+
match:
|
343
|
+
regex: "^Jaunt/"
|
344
|
+
platform: bot
|
345
|
+
test:
|
346
|
+
userAgents:
|
347
|
+
- userAgent: Jaunt/1.5
|
348
|
+
- app: l'Institut national de l'audiovisuel
|
349
|
+
match:
|
350
|
+
regex: INA dlweb
|
351
|
+
platform: bot
|
352
|
+
- app: Libsyn
|
353
|
+
match:
|
354
|
+
regex: "^Libsyn4"
|
355
|
+
platform: bot
|
356
|
+
test:
|
357
|
+
userAgents:
|
358
|
+
- userAgent: Libsyn4-Download
|
359
|
+
- app: libwww-perl
|
360
|
+
match:
|
361
|
+
regex: "^libwww-perl| libwww-perl"
|
362
|
+
platform: bot
|
363
|
+
test:
|
364
|
+
userAgents:
|
365
|
+
- userAgent: LWP::Simple/6.34 libwww-perl/6.34
|
366
|
+
- app: Livelap Crawler
|
367
|
+
match:
|
368
|
+
regex: LivelapBot
|
369
|
+
platform: bot
|
370
|
+
- app: LTX71
|
371
|
+
match:
|
372
|
+
regex: "^ltx71 "
|
373
|
+
platform: bot
|
374
|
+
test:
|
375
|
+
userAgents:
|
376
|
+
- userAgent: ltx71 - (http://ltx71.com/)
|
377
|
+
- app: MauiBot
|
378
|
+
match:
|
379
|
+
regex: "^MauiBot"
|
380
|
+
platform: bot
|
381
|
+
test:
|
382
|
+
userAgents:
|
383
|
+
- userAgent: MauiBot (crawler.feedback dc@gmail.com)
|
384
|
+
- app: Mastodon Bot
|
385
|
+
match:
|
386
|
+
regex: rb/.*Mastodon/
|
387
|
+
platform: bot
|
388
|
+
test:
|
389
|
+
userAgents:
|
390
|
+
- userAgent: http.rb/5.0.4 (Mastodon/3.5.3; +https://mastodon.xyz/) Bot
|
391
|
+
- app: Microsoft Bingbot
|
392
|
+
match:
|
393
|
+
regex: "(BingPreview/|adidxbot/|[bB]ingbot/)"
|
394
|
+
platform: bot
|
395
|
+
test:
|
396
|
+
userAgents:
|
397
|
+
- userAgent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534 (KHTML, like
|
398
|
+
Gecko) BingPreview/1.0b
|
399
|
+
- userAgent: Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)
|
400
|
+
- app: Microsoft Office
|
401
|
+
match:
|
402
|
+
regex: ms-office; MSOffice
|
403
|
+
platform: bot
|
404
|
+
test:
|
405
|
+
userAgents:
|
406
|
+
- userAgent: Mozilla/4.0 (compatible; ms-office; MSOffice 16)
|
407
|
+
- app: Mixcloud Podcast Importer
|
408
|
+
match:
|
409
|
+
regex: "^MixcloudPodcastImporter/"
|
410
|
+
platform: bot
|
411
|
+
test:
|
412
|
+
userAgents:
|
413
|
+
- userAgent: MixcloudPodcastImporter/2.0
|
414
|
+
- app: MJ12bot
|
415
|
+
match:
|
416
|
+
regex: ".*MJ12bot"
|
417
|
+
platform: bot
|
418
|
+
test:
|
419
|
+
userAgents:
|
420
|
+
- userAgent: Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)
|
421
|
+
- app: Mozilla Bot
|
422
|
+
match:
|
423
|
+
regex: "^'?Mozilla(/5\\.0(\\.\\.\\.)?)?$|^\\(Mozilla/5\\.0\\)$"
|
424
|
+
platform: bot
|
425
|
+
test:
|
426
|
+
userAgents:
|
427
|
+
- userAgent: Mozilla/5.0
|
428
|
+
- userAgent: Mozilla
|
429
|
+
- userAgent: Mozilla/5.0...
|
430
|
+
- userAgent: "'Mozilla/5.0"
|
431
|
+
- userAgent: "(Mozilla/5.0)"
|
432
|
+
- app: MSN Bot
|
433
|
+
match:
|
434
|
+
regex: "^msnbot/"
|
435
|
+
platform: bot
|
436
|
+
- app: Neevabot
|
437
|
+
match:
|
438
|
+
regex: ".*Neevabot"
|
439
|
+
platform: bot
|
440
|
+
test:
|
441
|
+
userAgents:
|
442
|
+
- userAgent: Mozilla/5.0 (compatible; Neevabot/1.0; https://neeva.com/neevabot)
|
443
|
+
- app: Netcraft Survey Agent
|
444
|
+
match:
|
445
|
+
regex: " NetcraftSurveyAgent/"
|
446
|
+
platform: bot
|
447
|
+
test:
|
448
|
+
userAgents:
|
449
|
+
- userAgent: Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; +info@netcraft.com)
|
450
|
+
- app: OgScrper
|
451
|
+
match:
|
452
|
+
regex: OgScrper
|
453
|
+
platform: bot
|
454
|
+
test:
|
455
|
+
userAgents:
|
456
|
+
- userAgent: OgScrper
|
457
|
+
- app: OkDownload
|
458
|
+
match:
|
459
|
+
regex: "^OkDownload/"
|
460
|
+
platform: bot
|
461
|
+
- app: OP3 Fetcher
|
462
|
+
match:
|
463
|
+
regex: "^op3-fetcher/"
|
464
|
+
platform: bot
|
465
|
+
test:
|
466
|
+
userAgents:
|
467
|
+
- userAgent: op3-fetcher/1.0 (https://op3.dev)
|
468
|
+
- app: Overcast feed parser
|
469
|
+
match:
|
470
|
+
regex: "^Overcast/1\\.0 Podcast Sync"
|
471
|
+
platform: bot
|
472
|
+
test:
|
473
|
+
userAgents:
|
474
|
+
- userAgent: Overcast/1.0 Podcast Sync
|
475
|
+
- app: Pandora RSS crawler
|
476
|
+
match:
|
477
|
+
regex: "^PandoraRSSCrawler"
|
478
|
+
platform: bot
|
479
|
+
- app: PaperLi
|
480
|
+
match:
|
481
|
+
regex: PaperLiBot/
|
482
|
+
platform: bot
|
483
|
+
test:
|
484
|
+
userAgents:
|
485
|
+
- userAgent: Mozilla/5.0 (compatible; PaperLiBot/2.1; https://support.paper.li/entries/20023257-what-is-paper-li)
|
486
|
+
- app: PetalBot
|
487
|
+
match:
|
488
|
+
regex: PetalBot
|
489
|
+
platform: bot
|
490
|
+
test:
|
491
|
+
userAgents:
|
492
|
+
- userAgent: Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like
|
493
|
+
Gecko) Mobile Safari/537.36 (compatible; PetalBot; https://aspiegel.com/petalbot)
|
494
|
+
- app: Pingdom
|
495
|
+
match:
|
496
|
+
regex: "^Pingdom"
|
497
|
+
platform: bot
|
498
|
+
- app: PlayerFM Podcast Sync
|
499
|
+
match:
|
500
|
+
regex: PlayerFM/.* Podcast Sync
|
501
|
+
platform: bot
|
502
|
+
test:
|
503
|
+
userAgents:
|
504
|
+
- userAgent: PlayerFM/1.0 Podcast Sync (0 subscribers; url=https://player.fm/series/series-2567872)
|
505
|
+
- app: Podbean Importer
|
506
|
+
match:
|
507
|
+
regex: "^Podbean Importer"
|
508
|
+
platform: bot
|
509
|
+
test:
|
510
|
+
userAgents:
|
511
|
+
- userAgent: Podbean Importer
|
512
|
+
- app: Podcastindex.org
|
513
|
+
match:
|
514
|
+
regex: "^Podcastindex\\.org/"
|
515
|
+
platform: bot
|
516
|
+
test:
|
517
|
+
userAgents:
|
518
|
+
- userAgent: Podcastindex.org/v0.3.3 (Aggrivate)
|
519
|
+
- app: PodCloud
|
520
|
+
match:
|
521
|
+
regex: podCloud
|
522
|
+
platform: bot
|
523
|
+
- app: Podcorn
|
524
|
+
match:
|
525
|
+
regex: Podcorn/
|
526
|
+
platform: bot
|
527
|
+
test:
|
528
|
+
userAgents:
|
529
|
+
- userAgent: Podcorn/1.0
|
530
|
+
- app: PodderBot
|
531
|
+
match:
|
532
|
+
regex: PodderBot/
|
533
|
+
platform: bot
|
534
|
+
test:
|
535
|
+
userAgents:
|
536
|
+
- userAgent: PodderBot/1.0
|
537
|
+
- app: Podfollow
|
538
|
+
match:
|
539
|
+
regex: podfollowbot/
|
540
|
+
platform: bot
|
541
|
+
test:
|
542
|
+
userAgents:
|
543
|
+
- userAgent: Mozilla/5.0 https://podfollow.com/crawling podfollowbot/1.0
|
544
|
+
- app: Podgrab
|
545
|
+
match:
|
546
|
+
regex: "^Podgrab$"
|
547
|
+
platform: bot
|
548
|
+
test:
|
549
|
+
userAgents:
|
550
|
+
- userAgent: Podgrab
|
551
|
+
- app: Podhound
|
552
|
+
match:
|
553
|
+
regex: PodhoundBeta
|
554
|
+
platform: bot
|
555
|
+
test:
|
556
|
+
userAgents:
|
557
|
+
- userAgent: PodhoundBeta
|
558
|
+
- app: Podio Bot
|
559
|
+
match:
|
560
|
+
regex: "^Podio/"
|
561
|
+
platform: bot
|
562
|
+
test:
|
563
|
+
userAgents:
|
564
|
+
- userAgent: Podio/1.0
|
565
|
+
- app: Podnews
|
566
|
+
match:
|
567
|
+
regex: PodnewsBot
|
568
|
+
platform: bot
|
569
|
+
- app: Podnods Bot
|
570
|
+
match:
|
571
|
+
regex: "(podnods-crawler|podnods)"
|
572
|
+
platform: bot
|
573
|
+
- app: Podscribe
|
574
|
+
match:
|
575
|
+
regex: "(^Adswizz-podscribe/|^Podscribe/)"
|
576
|
+
platform: bot
|
577
|
+
test:
|
578
|
+
userAgents:
|
579
|
+
- userAgent: Adswizz-podscribe/1.0
|
580
|
+
- userAgent: Podscribe/1.1
|
581
|
+
- app: Podverse Feed Parser
|
582
|
+
match:
|
583
|
+
regex: "^Podverse/Feed Parser"
|
584
|
+
platform: bot
|
585
|
+
- app: PodvineBot
|
586
|
+
match:
|
587
|
+
regex: "^PodvineBot/"
|
588
|
+
platform: bot
|
589
|
+
test:
|
590
|
+
userAgents:
|
591
|
+
- userAgent: PodvineBot/4.0.1 (www.podvine.com)
|
592
|
+
- app: PostRank Bot
|
593
|
+
match:
|
594
|
+
regex: "^PostRank/"
|
595
|
+
platform: bot
|
596
|
+
test:
|
597
|
+
userAgents:
|
598
|
+
- userAgent: PostRank/2.0 (postrank.com; 1 subscribers)
|
599
|
+
- app: PodwatchPro
|
600
|
+
match:
|
601
|
+
regex: Podwatch-Pro Crawler
|
602
|
+
platform: bot
|
603
|
+
test:
|
604
|
+
userAgents:
|
605
|
+
- userAgent: Podwatch-Pro Crawler
|
606
|
+
- app: Puppeteer
|
607
|
+
match:
|
608
|
+
regex: " HeadlessChrome/\\d"
|
609
|
+
platform: bot
|
610
|
+
test:
|
611
|
+
userAgents:
|
612
|
+
- userAgent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36
|
613
|
+
(KHTML, like Gecko) HeadlessChrome/105.0.5173.0 Safari/537.36
|
614
|
+
- app: python-requests
|
615
|
+
match:
|
616
|
+
regex: python-requests
|
617
|
+
platform: bot
|
618
|
+
- app: RedCircle
|
619
|
+
match:
|
620
|
+
regex: RedCircle
|
621
|
+
platform: bot
|
622
|
+
test:
|
623
|
+
userAgents:
|
624
|
+
- userAgent: RedCircle
|
625
|
+
- app: Ridder Bot
|
626
|
+
match:
|
627
|
+
regex: " RidderBot/"
|
628
|
+
platform: bot
|
629
|
+
test:
|
630
|
+
userAgents:
|
631
|
+
- userAgent: Mozilla/5.0 (compatible; RidderBot/1.0; bot@ridder.co)
|
632
|
+
- app: RSS to Telegram
|
633
|
+
match:
|
634
|
+
regex: "^RSStT"
|
635
|
+
platform: bot
|
636
|
+
test:
|
637
|
+
userAgents:
|
638
|
+
- userAgent: RSStT/2.2.1 RSS Reader
|
639
|
+
- app: RSSRadio
|
640
|
+
match:
|
641
|
+
regex: "^RSSRadio \\("
|
642
|
+
platform: bot
|
643
|
+
- app: Ruby Mechanize
|
644
|
+
match:
|
645
|
+
regex: "^Mechanize|[ -]Mechanize/"
|
646
|
+
platform: bot
|
647
|
+
test:
|
648
|
+
userAgents:
|
649
|
+
- userAgent: Mozilla/5.0 (compatible; Mechanize/2.7.4)
|
650
|
+
- userAgent: WWW-Mechanize/1.72
|
651
|
+
- app: Screaming Frog SEO Spider
|
652
|
+
match:
|
653
|
+
regex: "^Screaming Frog "
|
654
|
+
platform: bot
|
655
|
+
test:
|
656
|
+
userAgents:
|
657
|
+
- userAgent: Screaming Frog SEO Spider/5.1
|
658
|
+
- app: SearchAtlas.com SEO Crawler
|
659
|
+
match:
|
660
|
+
regex: "^SearchAtlas.*Crawler"
|
661
|
+
platform: bot
|
662
|
+
test:
|
663
|
+
userAgents:
|
664
|
+
- userAgent: SearchAtlas.com SEO Crawler
|
665
|
+
- app: SEMrushBot
|
666
|
+
match:
|
667
|
+
regex: SemrushBot/|^SEMrushBot$
|
668
|
+
platform: bot
|
669
|
+
test:
|
670
|
+
userAgents:
|
671
|
+
- userAgent: Mozilla/5.0 (compatible; SemrushBot/6~bl; http://www.semrush.com/bot.html)
|
672
|
+
- userAgent: SEMrushBot
|
673
|
+
- app: Serendeputy
|
674
|
+
match:
|
675
|
+
regex: SerendeputyBot/
|
676
|
+
platform: bot
|
677
|
+
test:
|
678
|
+
userAgents:
|
679
|
+
- userAgent: SerendeputyBot/0.8.6 (http://serendeputy.com/about/serendeputy-bot)
|
680
|
+
- app: Simplecast
|
681
|
+
match:
|
682
|
+
regex: "^Simplecast$"
|
683
|
+
platform: bot
|
684
|
+
test:
|
685
|
+
userAgents:
|
686
|
+
- userAgent: Simplecast
|
687
|
+
- app: Slack Bot
|
688
|
+
match:
|
689
|
+
regex: "^Slackbot 1\\.0"
|
690
|
+
platform: bot
|
691
|
+
test:
|
692
|
+
userAgents:
|
693
|
+
- userAgent: Slackbot 1.0 ( https://api.slack.com/robots)
|
694
|
+
- app: Snapchat Bot
|
695
|
+
match:
|
696
|
+
regex: "://developers\\.snap\\.com/robots"
|
697
|
+
platform: bot
|
698
|
+
test:
|
699
|
+
userAgents:
|
700
|
+
- userAgent: Snap URL Preview Service; bot; snapchat; https://developers.snap.com/robots
|
701
|
+
- app: SoundOn Bot
|
702
|
+
match:
|
703
|
+
regex: "^SoundOn/[\\d.]+\\s+\\(bot"
|
704
|
+
platform: bot
|
705
|
+
- app: Spotify cache service
|
706
|
+
match:
|
707
|
+
regex: "^Spotify/1\\.0$"
|
708
|
+
platform: bot
|
709
|
+
test:
|
710
|
+
userAgents:
|
711
|
+
- userAgent: Spotify/1.0
|
712
|
+
- app: Stitcher Bot
|
713
|
+
match:
|
714
|
+
regex: "^StitcherBot"
|
715
|
+
platform: bot
|
716
|
+
- app: Timpi search crawler
|
717
|
+
match:
|
718
|
+
regex: Timpibot/
|
719
|
+
platform: bot
|
720
|
+
test:
|
721
|
+
userAgents:
|
722
|
+
- userAgent: Timpibot/0.8 ( http://www.timpi.io)
|
723
|
+
- app: Tiny Tiny RSS
|
724
|
+
match:
|
725
|
+
regex: "^Tiny Tiny RSS/"
|
726
|
+
platform: bot
|
727
|
+
test:
|
728
|
+
userAgents:
|
729
|
+
- userAgent: Tiny Tiny RSS/22.12-c30b24d09 (Unsupported) (https://tt-rss.org/)
|
730
|
+
- app: Trendsmap Resolver
|
731
|
+
match:
|
732
|
+
regex: TrendsmapResolver/
|
733
|
+
platform: bot
|
734
|
+
- app: Twingly Bot
|
735
|
+
match:
|
736
|
+
regex: Twingly Recon;
|
737
|
+
platform: bot
|
738
|
+
test:
|
739
|
+
userAgents:
|
740
|
+
- userAgent: Mozilla/5.0 (compatible; Twingly Recon; twingly.com)
|
741
|
+
- app: Twitterbot
|
742
|
+
match:
|
743
|
+
regex: "^Twitterbot"
|
744
|
+
platform: bot
|
745
|
+
- app: Typhoeus
|
746
|
+
match:
|
747
|
+
regex: "^Typhoeus"
|
748
|
+
platform: bot
|
749
|
+
- app: UCast
|
750
|
+
match:
|
751
|
+
regex: "^UCast/"
|
752
|
+
platform: bot
|
753
|
+
test:
|
754
|
+
userAgents:
|
755
|
+
- userAgent: UCast/1.0 Podcast Sync (1 subscribers; feed-id=aHR0cHM6Ly9mZWVkcy5idXp6c3Byb3V0LmNvbS8yMDg0OTQucnNzEAEBAD32-0242-42AC-8583-21E9BAD8C544;
|
756
|
+
+http://www.ucastapp.com/)
|
757
|
+
- app: TelegramBot
|
758
|
+
match:
|
759
|
+
regex: "^TelegramBot "
|
760
|
+
platform: bot
|
761
|
+
test:
|
762
|
+
userAgents:
|
763
|
+
- userAgent: TelegramBot (like TwitterBot)
|
764
|
+
- app: Vurbl
|
765
|
+
match:
|
766
|
+
regex: VurblBot
|
767
|
+
platform: bot
|
768
|
+
test:
|
769
|
+
userAgents:
|
770
|
+
- userAgent: Mozilla/5.0 https://vurbl.com VurblBot/1.0
|
771
|
+
- app: Wget
|
772
|
+
match:
|
773
|
+
regex: Wget
|
774
|
+
platform: bot
|
775
|
+
- app: weborama
|
776
|
+
match:
|
777
|
+
regex: "^weborama-fetcher"
|
778
|
+
platform: bot
|
779
|
+
test:
|
780
|
+
userAgents:
|
781
|
+
- userAgent: weborama-fetcher (+http://www.weborama.com)
|
782
|
+
- app: Windows Crawler
|
783
|
+
match:
|
784
|
+
regex: "^ZDM/.*Windows"
|
785
|
+
platform: bot
|
786
|
+
test:
|
787
|
+
userAgents:
|
788
|
+
- userAgent: ZDM/4.0; Windows Mobile 7.0;
|
789
|
+
- app: WordPress
|
790
|
+
match:
|
791
|
+
regex: "^WordPress"
|
792
|
+
platform: bot
|
793
|
+
- app: wsrv.nl
|
794
|
+
match:
|
795
|
+
regex: https?://wsrv.nl/
|
796
|
+
platform: bot
|
797
|
+
test:
|
798
|
+
userAgents:
|
799
|
+
- userAgent: Mozilla/5.0 (compatible; ImageFetcher/9.0; +http://wsrv.nl/)
|
800
|
+
- app: YaCy
|
801
|
+
match:
|
802
|
+
regex: "^yacybot"
|
803
|
+
platform: bot
|
804
|
+
test:
|
805
|
+
userAgents:
|
806
|
+
- userAgent: yacybot (/global; amd64 Linux 5.9.8-zen1-1-zen; java 1.8.0_265; Europe/de)
|
807
|
+
http://yacy.net/bot.html
|
808
|
+
- app: Yahoo Crawler
|
809
|
+
match:
|
810
|
+
regex: Yahoo! Slurp
|
811
|
+
platform: bot
|
812
|
+
test:
|
813
|
+
userAgents:
|
814
|
+
- userAgent: Crawler Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)
|
815
|
+
- app: YandexBot
|
816
|
+
match:
|
817
|
+
regex: YandexBot/
|
818
|
+
platform: bot
|
819
|
+
- app: Zapier
|
820
|
+
match:
|
821
|
+
regex: "^Zapier$"
|
822
|
+
platform: bot
|
823
|
+
test:
|
824
|
+
userAgents:
|
825
|
+
- userAgent: Zapier
|
826
|
+
- app: Zencast
|
827
|
+
match:
|
828
|
+
regex: "^Zencastr/"
|
829
|
+
platform: bot
|
830
|
+
test:
|
831
|
+
userAgents:
|
832
|
+
- userAgent: Zencastr/2.0
|