pod_ident 1.1.2 → 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +1 -1
- data/lib/detection_rules.yml +6 -2
- data/lib/detection_rules_bots.yml +84 -3
- data/lib/detection_rules_custom_bots.yml +15 -0
- data/lib/pod_ident/detection_rules.rb +1 -1
- data/lib/pod_ident/detection_rules_bots.rb +1 -1
- data/lib/pod_ident/detection_rules_custom_bots.rb +3 -0
- data/lib/pod_ident/rule_parser.rb +33 -1
- data/lib/pod_ident/version.rb +1 -1
- data/lib/pod_ident.rb +12 -2
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e296aac18aa3dd0c789adaaffb6037075b87294486c04d244f44b2993d403c97
|
4
|
+
data.tar.gz: 4e8bbc8d91af324ac8ccfae2ade4f542fa6e1df91799a717ba2400e9d668a784
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5cea987f5581c97a005f02a137ae8fddc4e60b65c029eda4f41a957f736cfc759615565c084cce75975fababac84de6e217c8f5f9d80fdcca6744cf4ed716c42
|
7
|
+
data.tar.gz: 58a0e9f80a38cdaa167313666979313dba482f65760bdf0b2a4100097a3ab5b9b5ad12a34c7e90d2ed64954b0747d03cf71c80ad60e91374437bd144350adb10
|
data/CHANGELOG.md
ADDED
data/Gemfile.lock
CHANGED
data/lib/detection_rules.yml
CHANGED
@@ -191,7 +191,7 @@
|
|
191
191
|
- userAgent: Overcast/3.0 (+http://overcast.fm/; iOS podcast app) BMID/E67A460F81
|
192
192
|
platform: iOS
|
193
193
|
|
194
|
-
- app:
|
194
|
+
- app: radio.net
|
195
195
|
match:
|
196
196
|
startsWith:
|
197
197
|
- radio.net
|
@@ -204,14 +204,18 @@
|
|
204
204
|
- radio.pt
|
205
205
|
- radio.pl
|
206
206
|
platform:
|
207
|
-
regex: '(Android|Darwin)'
|
207
|
+
regex: '(Android|Darwin|Linux)'
|
208
208
|
replacements:
|
209
209
|
- name: Darwin
|
210
210
|
replaceWith: iOS
|
211
|
+
- name: Linux
|
212
|
+
replaceWith: Android
|
211
213
|
test:
|
212
214
|
userAgents:
|
213
215
|
- userAgent: radio.de 4.9.3 (softwinner/QT-7; Android 4.4.2; de_DE)
|
214
216
|
platform: Android
|
217
|
+
- userAgent: radio.de/5.13.0. 1-app (Linux; 13) ExoPlayerLib/2.19.1
|
218
|
+
platform: Android
|
215
219
|
- userAgent: radio.de/3735 CFNetwork/758.5.3 Darwin/15.6.0
|
216
220
|
platform: iOS
|
217
221
|
|
@@ -6,6 +6,13 @@
|
|
6
6
|
test:
|
7
7
|
userAgents:
|
8
8
|
- userAgent: AAABot
|
9
|
+
- app: Adswizz
|
10
|
+
match:
|
11
|
+
regex: "^AIS VirtualListener"
|
12
|
+
platform: bot
|
13
|
+
test:
|
14
|
+
userAgents:
|
15
|
+
- userAgent: AIS VirtualListener
|
9
16
|
- app: AhrefsBot
|
10
17
|
match:
|
11
18
|
regex: AhrefsBot/
|
@@ -69,7 +76,7 @@
|
|
69
76
|
(6; dt:155)"
|
70
77
|
- app: Apple Podcasts automated checks
|
71
78
|
match:
|
72
|
-
regex: "(iTMS|itunesstored)"
|
79
|
+
regex: "(iTMS|itunesstored|itms)"
|
73
80
|
platform: bot
|
74
81
|
- app: Applebot
|
75
82
|
match:
|
@@ -158,11 +165,12 @@
|
|
158
165
|
- userAgent: CastFeedValidator/3.0.5 (https://castfeedvalidator.com)
|
159
166
|
- app: Castopod
|
160
167
|
match:
|
161
|
-
regex: Castopod
|
168
|
+
regex: Castopod/\d
|
162
169
|
platform: bot
|
163
170
|
test:
|
164
171
|
userAgents:
|
165
172
|
- userAgent: Castopod/1.0
|
173
|
+
- userAgent: Castopod/1.1.2
|
166
174
|
- app: Chartable
|
167
175
|
match:
|
168
176
|
regex: "^Trackable/"
|
@@ -279,6 +287,14 @@
|
|
279
287
|
test:
|
280
288
|
userAgents:
|
281
289
|
- userAgent: Feedly/1.0 (+http://www.feedly.com/fetcher.html; like FeedFetcher-Google)
|
290
|
+
- app: Feedspot Fetcher
|
291
|
+
match:
|
292
|
+
regex: https://www\.feedspot\.com/fs/fetcher
|
293
|
+
platform: bot
|
294
|
+
test:
|
295
|
+
userAgents:
|
296
|
+
- userAgent: Mozilla/5.0 (compatible; Feedspot/1.0 (+https://www.feedspot.com/fs/fetcher;
|
297
|
+
like FeedFetcher-Google)
|
282
298
|
- app: FlexGet
|
283
299
|
match:
|
284
300
|
regex: "^FlexGet/"
|
@@ -311,6 +327,13 @@
|
|
311
327
|
test:
|
312
328
|
userAgents:
|
313
329
|
- userAgent: Goodpods/2.2
|
330
|
+
- app: Google Adsense Bot
|
331
|
+
match:
|
332
|
+
regex: Mediapartners-Google
|
333
|
+
platform: bot
|
334
|
+
test:
|
335
|
+
userAgents:
|
336
|
+
- userAgent: Mediapartners-Google
|
314
337
|
- app: Google AdsBot
|
315
338
|
match:
|
316
339
|
regex: AdsBot-Google
|
@@ -321,7 +344,7 @@
|
|
321
344
|
platform: bot
|
322
345
|
- app: Googlebot
|
323
346
|
match:
|
324
|
-
regex: Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google
|
347
|
+
regex: Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google|google-xrawler
|
325
348
|
platform: bot
|
326
349
|
test:
|
327
350
|
userAgents:
|
@@ -331,6 +354,7 @@
|
|
331
354
|
- userAgent: Googlebot-Image/1.0
|
332
355
|
- userAgent: Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; 1 subscribers;
|
333
356
|
feed-id=4653447469100830145)
|
357
|
+
- userAgent: google-xrawler
|
334
358
|
- app: Gumball
|
335
359
|
match:
|
336
360
|
regex: "^Gumball"
|
@@ -469,6 +493,13 @@
|
|
469
493
|
test:
|
470
494
|
userAgents:
|
471
495
|
- userAgent: Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; +info@netcraft.com)
|
496
|
+
- app: Newspaper
|
497
|
+
match:
|
498
|
+
regex: "^newspaper/\\d"
|
499
|
+
platform: bot
|
500
|
+
test:
|
501
|
+
userAgents:
|
502
|
+
- userAgent: newspaper/0.3.0
|
472
503
|
- app: OgScrper
|
473
504
|
match:
|
474
505
|
regex: OgScrper
|
@@ -538,6 +569,20 @@
|
|
538
569
|
test:
|
539
570
|
userAgents:
|
540
571
|
- userAgent: Podcastindex.org/v0.3.3 (Aggrivate)
|
572
|
+
- app: Podcast de facto Standard
|
573
|
+
match:
|
574
|
+
regex: "^PodcastStandard/"
|
575
|
+
platform: bot
|
576
|
+
test:
|
577
|
+
userAgents:
|
578
|
+
- userAgent: PodcastStandard/0.1.0 (+https://podcast-standard.org)
|
579
|
+
- app: Podcast Archiver
|
580
|
+
match:
|
581
|
+
regex: "^Podcast%20Archiver/"
|
582
|
+
platform: bot
|
583
|
+
test:
|
584
|
+
userAgents:
|
585
|
+
- userAgent: Podcast%20Archiver/3000000003 CFNetwork/1406.0.4 Darwin/22.4.0
|
541
586
|
- app: PodCloud
|
542
587
|
match:
|
543
588
|
regex: podCloud
|
@@ -637,6 +682,13 @@
|
|
637
682
|
match:
|
638
683
|
regex: python-requests
|
639
684
|
platform: bot
|
685
|
+
- app: Qiniu spider
|
686
|
+
match:
|
687
|
+
regex: qiniu.*spider
|
688
|
+
platform: bot
|
689
|
+
test:
|
690
|
+
userAgents:
|
691
|
+
- userAgent: z0-qiniu-imgstg-spider-1.0
|
640
692
|
- app: RedCircle
|
641
693
|
match:
|
642
694
|
regex: RedCircle
|
@@ -742,6 +794,14 @@
|
|
742
794
|
test:
|
743
795
|
userAgents:
|
744
796
|
- userAgent: SubstackContentFetch/1.0 (https://substack.com/)
|
797
|
+
- app: SupportingCast
|
798
|
+
match:
|
799
|
+
regex: "^SupportingCast(/.*)?$"
|
800
|
+
platform: bot
|
801
|
+
test:
|
802
|
+
userAgents:
|
803
|
+
- userAgent: SupportingCast/1.0
|
804
|
+
- userAgent: SupportingCast
|
745
805
|
- app: Timpi search crawler
|
746
806
|
match:
|
747
807
|
regex: Timpibot/
|
@@ -790,6 +850,20 @@
|
|
790
850
|
test:
|
791
851
|
userAgents:
|
792
852
|
- userAgent: TelegramBot (like TwitterBot)
|
853
|
+
- app: theTradeDesk Content Web Scraper
|
854
|
+
match:
|
855
|
+
regex: TTD-Content
|
856
|
+
platform: bot
|
857
|
+
test:
|
858
|
+
userAgents:
|
859
|
+
- userAgent: Mozilla/5.0 (compatible; TTD-Content; +https://www.thetradedesk.com/general/ttd-content)
|
860
|
+
- app: Veritone Engine Toolkit
|
861
|
+
match:
|
862
|
+
regex: "^veritone/engine-toolkit"
|
863
|
+
platform: bot
|
864
|
+
test:
|
865
|
+
userAgents:
|
866
|
+
- userAgent: veritone/engine-toolkit-3.0
|
793
867
|
- app: Vurbl
|
794
868
|
match:
|
795
869
|
regex: VurblBot
|
@@ -815,6 +889,13 @@
|
|
815
889
|
test:
|
816
890
|
userAgents:
|
817
891
|
- userAgent: ZDM/4.0; Windows Mobile 7.0;
|
892
|
+
- app: Windows PowerShell
|
893
|
+
match:
|
894
|
+
regex: WindowsPowerShell/
|
895
|
+
platform: bot
|
896
|
+
test:
|
897
|
+
userAgents:
|
898
|
+
- userAgent: Mozilla/5.0 (Windows NT; Windows NT 10.0; en-US) WindowsPowerShell/5.1.19041.2673
|
818
899
|
- app: WordPress
|
819
900
|
match:
|
820
901
|
regex: "^WordPress"
|
@@ -1,3 +1,3 @@
|
|
1
1
|
# DO NOT EDIT THIS FILE - it gets automatically generated by running "bin/parse-rules"
|
2
2
|
|
3
|
-
RULES = [{:app=>"Apple Podcasts", :match=>{"startsWith"=>"AppleCoreMedia"}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"startsWith"=>"itunesstored"}, :platform=>{"regex"=>"(iPad|iPod|iPhone)"}}, {:app=>"iTunes", :match=>{"startsWith"=>"iTunes", "excluding"=>{"regex"=>"Downcast|iCatcher|SqueezeCenter|SqueezeNetwork|MusicServer"}}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"includes"=>"watchOS", "excluding"=>{"regex"=>"^atc/|\\(null\\) watchOS/"}}, :platform=>{"text"=>"watchOS"}}, {:app=>"Spotify", :match=>{"startsWith"=>"Spotify", "excluding"=>{"regex"=>"^Spotify/1\\.0$"}}, :platform=>{"regexes"=>["(Android|iOS)"]}}, {:app=>"CastBox", :match=>{"startsWith"=>["CastBox", "Castbox"]}, :platform=>{"regexes"=>["(Android|iOS|OS\\sVersion)", "(CastBox)"], "replacements"=>[{"name"=>"OS Version", "replaceWith"=>"iOS"}, {"name"=>"CastBox", "replaceWith"=>"Android"}]}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Alexa"}, :platform=>{"text"=>"Alexa-capable device"}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Echo"}, :platform=>{"text"=>"Amazon Echo", "userAgents"=>[{"userAgent"=>"Echo/1.0(APNG)", "platform"=>"Amazon Echo"}]}}, {:app=>"Deezer", :match=>{"startsWith"=>"Deezer"}, :platform=>{"regexes"=>["(Android|Darwin)", "\\((\\w+\\s*\\w*)"], "replacements"=>[{"name"=>"osx", "replaceWith"=>"Mac"}, {"name"=>"Darwin", "replaceWith"=>"Apple Device"}]}}, {:app=>"Overcast", :match=>{"startsWith"=>"Overcast", "excluding"=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}}, :platform=>{"text"=>"iOS"}}, {:app=>"
|
3
|
+
RULES = [{:app=>"Apple Podcasts", :match=>{"startsWith"=>"AppleCoreMedia"}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"startsWith"=>"itunesstored"}, :platform=>{"regex"=>"(iPad|iPod|iPhone)"}}, {:app=>"iTunes", :match=>{"startsWith"=>"iTunes", "excluding"=>{"regex"=>"Downcast|iCatcher|SqueezeCenter|SqueezeNetwork|MusicServer"}}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"includes"=>"watchOS", "excluding"=>{"regex"=>"^atc/|\\(null\\) watchOS/"}}, :platform=>{"text"=>"watchOS"}}, {:app=>"Spotify", :match=>{"startsWith"=>"Spotify", "excluding"=>{"regex"=>"^Spotify/1\\.0$"}}, :platform=>{"regexes"=>["(Android|iOS)"]}}, {:app=>"CastBox", :match=>{"startsWith"=>["CastBox", "Castbox"]}, :platform=>{"regexes"=>["(Android|iOS|OS\\sVersion)", "(CastBox)"], "replacements"=>[{"name"=>"OS Version", "replaceWith"=>"iOS"}, {"name"=>"CastBox", "replaceWith"=>"Android"}]}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Alexa"}, :platform=>{"text"=>"Alexa-capable device"}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Echo"}, :platform=>{"text"=>"Amazon Echo", "userAgents"=>[{"userAgent"=>"Echo/1.0(APNG)", "platform"=>"Amazon Echo"}]}}, {:app=>"Deezer", :match=>{"startsWith"=>"Deezer"}, :platform=>{"regexes"=>["(Android|Darwin)", "\\((\\w+\\s*\\w*)"], "replacements"=>[{"name"=>"osx", "replaceWith"=>"Mac"}, {"name"=>"Darwin", "replaceWith"=>"Apple Device"}]}}, {:app=>"Overcast", :match=>{"startsWith"=>"Overcast", "excluding"=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}}, :platform=>{"text"=>"iOS"}}, {:app=>"radio.net", :match=>{"startsWith"=>["radio.net", "radio.de", "radio.at", "radio.fr", "radio.dk", "radio.es", "radio.it", "radio.pt", "radio.pl"]}, :platform=>{"regex"=>"(Android|Darwin|Linux)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}, {"name"=>"Linux", "replaceWith"=>"Android"}]}}, {:app=>"PocketCasts", :match=>{"startsWith"=>["PocketCasts", "Pocket Casts", "Shifty Jelly Pocket Casts"]}, :platform=>{"regex"=>"(Android)"}}, {:app=>"Himalaya", :match=>{"startsWith"=>"Himalaya"}, :platform=>{"regex"=>"(Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"ExoPlayer", :match=>{"startsWith"=>["ExoPlayer", "yourApplicationName", "null", "md5d42223d6ee7473da82e8136ffb794439.App"]}, :platform=>{"text"=>"Android"}}, {:app=>"Download Manager", :match=>{"startsWith"=>"AndroidDownloadManager"}, :platform=>{"text"=>"Android"}}, {:app=>"Castamatic", :match=>{"startsWith"=>"Castamatic"}, :platform=>{"text"=>"iOS"}}, {:app=>"The Podcast App (podcast.app)", :match=>{"includes"=>"The Podcast App"}, :platform=>{"text"=>"iOS"}}, {:app=>"CastMix", :match=>{"startsWith"=>"CastMix"}, :platform=>{"text"=>"Android"}}, {:app=>"Unknown App", :match=>{"startsWith"=>"okhttp"}, :platform=>{"text"=>"Android"}}, {:app=>"Stagefright Media Playback Engine", :match=>{"includes"=>"stagefright", "excluding"=>{"text"=>"stagefright alternative"}}, :platform=>{"regex"=>"(Fire OS|Android)"}}, {:app=>"LG Player", :match=>{"startsWith"=>"Player/LG Player", "includes"=>["LG Player", "LG-Player"]}, :platform=>{"text"=>"Android"}}, {:app=>"Android Browser", :match=>{"startsWith"=>"Dalvik"}, :platform=>{"text"=>"Android"}}, {:app=>"Acast", :match=>{"startsWith"=>"Acast"}, :platform=>{"regex"=>"(Darwin|Android|Windows)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"Castro", :match=>{"startsWith"=>"Castro"}, :platform=>{"text"=>"iOS"}}, {:app=>"Breaker", :match=>{"startsWith"=>"Breaker"}, :platform=>{"regex"=>"(Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"Podcast Addict", :match=>{"startsWith"=>["PodcastAddict", "Podcast Addict"]}, :platform=>{"text"=>"Android"}}, {:app=>"Podbean", :match=>{"startsWith"=>"Podbean", "excluding"=>{"regex"=>"^Podbean Importer"}}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"Google Podcasts", :match=>{"includes"=>["GSA"], "excluding"=>{"regex"=>"iPhone|iPad"}}, :platform=>{"text"=>"Android"}}, {:app=>"Google Search App", :match=>{"includes"=>["GSA"], "excluding"=>{"regex"=>"Android"}}, :platform=>{"regexes"=>["(iPhone|iPad)"]}}, {:app=>"Google Podcasts", :match=>{"includes"=>"GoogleChirp"}, :platform=>{"text"=>"Google Smart Speaker"}}, {:app=>"Stitcher", :match=>{"startsWith"=>"Stitcher"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"TuneIn", :match=>{"startsWith"=>"TuneIn"}, :platform=>{"regex"=>"(Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"PodCruncher", :match=>{"startsWith"=>"PodCruncher"}, :platform=>{"text"=>"iOS"}}, {:app=>"iCatcher!", :match=>{"startsWith"=>"iCatcher!", "includes"=>"iCatcher!"}, :platform=>{"regex"=>"\\((iPhone|iPad|iPod touch)", "fallback"=>"iOS"}}, {:app=>"Castaway", :match=>{"startsWith"=>"Castaway"}, :platform=>{"text"=>"iOS"}}, {:app=>"Instacast", :match=>{"startsWith"=>"Instacast"}, :platform=>{"text"=>"Apple Device"}}, {:app=>"VLC", :match=>{"startsWith"=>["VLC", "LibVLC"], "includes"=>"VLC"}, :platform=>{"regex"=>"(Android|iPhone)"}}, {:app=>"Podcast Republic", :match=>{"startsWith"=>"PodcastRepublic"}, :platform=>{"text"=>"Android"}}, {:app=>"DoggCatcher", :match=>{"includes"=>"DoggCatcher"}, :platform=>{"text"=>"Android"}}, {:app=>"Player FM", :match=>{"startsWith"=>["Player FM", "Player%20FM"]}, :platform=>{"regex"=>"(Darwin)", "fallback"=>"Android", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"Podkicker", :match=>{"startsWith"=>"Podkicker"}, :platform=>{"text"=>"Android"}}, {:app=>"AntennaPod", :match=>{"startsWith"=>"AntennaPod"}, :platform=>{"text"=>"Android"}}, {:app=>"Downcast", :match=>{"startsWith"=>"Downcast", "includes"=>"Downcast"}, :platform=>{"regex"=>"\\((iPhone|iPad|iPod touch|Mac)"}}, {:app=>"gPodder", :match=>{"startsWith"=>"gPodder"}, :platform=>{"regex"=>"(Linux|Windows)"}}, {:app=>"Podcatcher Deluxe", :match=>{"includes"=>"Podcatcher Deluxe"}, :platform=>{"text"=>"Android"}}, {:app=>"Procast", :match=>{"startsWith"=>["Procast", "ProCast"]}, :platform=>{"text"=>"iOS"}}, {:app=>"RSSRadio", :match=>{"startsWith"=>"RSSRadio"}, :platform=>{"regex"=>"(iPhone|iPad|iPod touch|Darwin)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}], "fallback"=>"iOS"}}, {:app=>"Podcat", :match=>{"startsWith"=>"Podcat", "excluding"=>{"regex"=>"Podcatcher"}}, :platform=>{"text"=>"iOS"}}, {:app=>"Audio Now", :match=>{"startsWith"=>"AudioNow", "includes"=>"audionow"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"DIE ZEIT App", :match=>{"includes"=>"ZONApp"}, :platform=>{"regex"=>"(iPhone|iPad|iPod touch|Android)"}}, {:app=>"F.A.Z Der Tag App", :match=>{"includes"=>"FAZDERTAG"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"ANTENNE BAYERN App", :match=>{"includes"=>"AntenneBayern"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"BuzzFeed App", :match=>{"includes"=>"buzzfeed"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"Facebook in-app browser", :match=>{"includes"=>["FBAN", "FBAV"]}, :platform=>{"regexes"=>["\\((iPhone|iPad|iPod touch)", "(Android)"]}}, {:app=>"Instagram in-app browser", :match=>{"includes"=>"Instagram"}, :platform=>{"regexes"=>["(iPad)", "(iPhone|Android)"]}}, {:app=>"Twitter in-app browser", :match=>{"includes"=>"Twitter", "excluding"=>{"regex"=>"^TelegramBot "}}, :platform=>{"regex"=>"(iPhone|iPad|Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"Apple device"}]}}, {:app=>"Pinterest in-app browser", :match=>{"includes"=>"Pinterest"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"Windows Media Player", :match=>{"startsWith"=>["NSPlayer", "WMPlayer"]}, :platform=>{"text"=>"Windows"}}, {:app=>"Sonos", :match=>{"includes"=>"Sonos"}, :platform=>{"text"=>"Sonos"}}, {:app=>"Internet Explorer", :match=>{"includes"=>"Trident"}, :platform=>{"regex"=>"(Windows Phone)", "fallback"=>"Windows"}}, {:app=>"Kodi Media Center", :match=>{"startsWith"=>"Kodi", "includes"=>"Kodi"}, :platform=>{"regex"=>"(X11|Android|Windows)", "replacements"=>[{"name"=>"X11", "replaceWith"=>"Linux"}]}}, {:app=>"HermesPod", :match=>{"startsWith"=>"+hermespod.com"}, :platform=>{"text"=>"Windows"}}, {:app=>"ViennaRSS", :match=>{"includes"=>"Vienna"}, :platform=>{"text"=>"Mac"}}, {:app=>"Unknown client", :match=>{"startsWith"=>"(null)", "excluding"=>{"regex"=>"^atc/|\\(null\\) watchOS/"}}, :platform=>{"regex"=>"(iPhone|iPad|iPod touch)"}}, {:app=>"Clementine Music Player", :match=>{"startsWith"=>"Clementine"}, :platform=>{"text"=>"Unknown"}}, {:app=>"Flipboard", :match=>{"includes"=>"Flipboard", "excluding"=>{"regex"=>"FlipboardProxy/"}}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch|Macintosh)"], "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"iVoox", :match=>{"startsWith"=>["ivoox", "iVoox"]}, :platform=>{"regexes"=>["(Android)", "(Darwin)\\/", "\\((iPhone|iPad|iPod touch|Macintosh)"], "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"Apple device"}, {"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"FYEO", :match=>{"startsWith"=>"FYEO"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"Amazon Music", :match=>{"startsWith"=>"AmazonMusic"}, :platform=>{"regex"=>"(iPhone|Android|MacOS|Fire OS|iPad)"}}, {:app=>"Podimo", :match=>{"startsWith"=>"Podimo"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"ARD Audiothek", :match=>{"startsWith"=>"ARD Audiothek"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"Samsung Podcast", :match=>{"startsWith"=>"sp-agent"}, :platform=>{"text"=>"Android"}}, {:app=>"RTL+", :match=>{"startsWith"=>"RTL+"}, :platform=>{"regex"=>"(iOS|iPhone|Android)"}}].freeze
|
@@ -1,3 +1,3 @@
|
|
1
1
|
# DO NOT EDIT THIS FILE - it gets automatically generated by running "bin/parse-rules"
|
2
2
|
|
3
|
-
BOTS_RULES = [{:app=>"AAABot - unknown bot", :match=>{"regex"=>"AAABot"}, :platform=>"bot"}, {:app=>"AhrefsBot", :match=>{"regex"=>"AhrefsBot/"}, :platform=>"bot"}, {:app=>"AirableBot", :match=>{"regex"=>"AirableBot-Podcast/"}, :platform=>"bot"}, {:app=>"Alexa Flash Briefing cache", :match=>{"regex"=>"^AmazonNewsContentService"}, :platform=>"bot"}, {:app=>"AlignaBot", :match=>{"regex"=>"^Alignabot"}, :platform=>"bot"}, {:app=>"Amazon Music Podcasts Bot", :match=>{"regex"=>"^Amazon Music Podcast"}, :platform=>"bot"}, {:app=>"Anchor Importer", :match=>{"regex"=>"AnchorImport"}, :platform=>"bot"}, {:app=>"Apple Podcasts (Watch)", :match=>{"regex"=>"^atc/|\\(null\\) watchOS/"}, :platform=>"bot"}, {:app=>"Apple Podcasts automated checks", :match=>{"regex"=>"(iTMS|itunesstored)"}, :platform=>"bot"}, {:app=>"Applebot", :match=>{"regex"=>"^Applebot/"}, :platform=>"bot"}, {:app=>"Archive.org", :match=>{"regex"=>"archive\\.org_bot"}, :platform=>"bot"}, {:app=>"atheerfm", :match=>{"regex"=>"^atheerfm/"}, :platform=>"bot"}, {:app=>"Audiomack", :match=>{"regex"=>"^Audiomack Podcast Processor/"}, :platform=>"bot"}, {:app=>"AudioWave feed parser", :match=>{"regex"=>"^AudioWaveBot/1\\.0"}, :platform=>"bot"}, {:app=>"AwarioSmartBot", :match=>{"regex"=>"^AwarioSmartBot/"}, :platform=>"bot"}, {:app=>"Babbar", :match=>{"regex"=>"Barkrowler/"}, :platform=>"bot"}, {:app=>"Baidu", :match=>{"regex"=>"\\(ce\\.baidu\\.com"}, :platform=>"bot"}, {:app=>"bbot", :match=>{"regex"=>"^bbot/"}, :platform=>"bot"}, {:app=>"British Library", :match=>{"regex"=>"^bl\\.uk_ldfc_bot"}, :platform=>"bot"}, {:app=>"Blubrry Migration Service", :match=>{"regex"=>"^Blubrry Migration Service"}, :platform=>"bot"}, {:app=>"Buzzsprout Importer", :match=>{"regex"=>"^Buzzsprout Importer"}, :platform=>"bot"}, {:app=>"CastFeedValidator", :match=>{"regex"=>"^CastFeedValidator/"}, :platform=>"bot"}, {:app=>"Castopod", :match=>{"regex"=>"Castopod/1\\.0"}, :platform=>"bot"}, {:app=>"Chartable", :match=>{"regex"=>"^Trackable/"}, :platform=>"bot"}, {:app=>"Clark-Crawler, unknown", :match=>{"regex"=>"^clark-crawler2"}, :platform=>"bot"}, {:app=>"Critical Mention", :match=>{"regex"=>"^Podcast-CriticalMention/"}, :platform=>"bot"}, {:app=>"curl", :match=>{"regex"=>"^curl|^libcurl/|^PycURL/| curl/"}, :platform=>"bot"}, {:app=>"DataforSEO", :match=>{"regex"=>"DataForSeoBot/"}, :platform=>"bot"}, {:app=>"Datagnion Bot", :match=>{"regex"=>"^datagnionbot"}, :platform=>"bot"}, {:app=>"dataprovider.com", :match=>{"regex"=>"Dataprovider\\.com"}, :platform=>"bot"}, {:app=>"Daum", :match=>{"regex"=>"http://cs\\.daum\\.net"}, :platform=>"bot"}, {:app=>"Deezer Podcasters", :match=>{"regex"=>"^Deezer Podcasters/1\\.0"}, :platform=>"bot"}, {:app=>"Digg", :match=>{"regex"=>"^Digg "}, :platform=>"bot"}, {:app=>"dorada", :match=>{"regex"=>"support@dorada\\.co\\.uk"}, :platform=>"bot"}, {:app=>"DotBot", :match=>{"regex"=>"DotBot"}, :platform=>"bot"}, {:app=>"Downcast Bot", :match=>{"regex"=>"downcast feed consumer/"}, :platform=>"bot"}, {:app=>"DuckDuckBot", :match=>{"regex"=>"DuckDuckBot"}, :platform=>"bot"}, {:app=>"F-Secure Riddler", :match=>{"regex"=>"^Riddler "}, :platform=>"bot"}, {:app=>"Facebook Bot", :match=>{"regex"=>"FacebookBot|f?acebookexternalhit/?|^podcastbot$|Facebot|facebookexternalua|^facebookplatform/"}, :platform=>"bot"}, {:app=>"Feedly", :match=>{"regex"=>"^Feedly/"}, :platform=>"bot"}, {:app=>"FlexGet", :match=>{"regex"=>"^FlexGet/"}, :platform=>"bot"}, {:app=>"Flipboard Proxy", :match=>{"regex"=>"FlipboardProxy/"}, :platform=>"bot"}, {:app=>"Fyyd", :match=>{"regex"=>"^fyyd-poll"}, :platform=>"bot"}, {:app=>"Go Storage Gateway V1", :match=>{"regex"=>"^storagegw-v1-go$"}, :platform=>"bot"}, {:app=>"Goodpods Bot", :match=>{"regex"=>"Goodpods/\\d+\\.\\d+"}, :platform=>"bot"}, {:app=>"Google AdsBot", :match=>{"regex"=>"AdsBot-Google"}, :platform=>"bot"}, {:app=>"Google Podcasts Manager", :match=>{"regex"=>"Google-Podcast"}, :platform=>"bot"}, {:app=>"Googlebot", :match=>{"regex"=>"Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google"}, :platform=>"bot"}, {:app=>"Gumball", :match=>{"regex"=>"^Gumball"}, :platform=>"bot"}, {:app=>"Headliner", :match=>{"regex"=>"Headliner/"}, :platform=>"bot"}, {:app=>"HubSpot Crawler", :match=>{"regex"=>"HubSpot Crawler"}, :platform=>"bot"}, {:app=>"Internet Archive", :match=>{"regex"=>"Archive-It;|web\\.archive\\.org"}, :platform=>"bot"}, {:app=>"Jaunt", :match=>{"regex"=>"^Jaunt/"}, :platform=>"bot"}, {:app=>"l'Institut national de l'audiovisuel", :match=>{"regex"=>"INA dlweb"}, :platform=>"bot"}, {:app=>"Libsyn", :match=>{"regex"=>"^Libsyn4"}, :platform=>"bot"}, {:app=>"libwww-perl", :match=>{"regex"=>"^libwww-perl| libwww-perl"}, :platform=>"bot"}, {:app=>"Livelap Crawler", :match=>{"regex"=>"LivelapBot"}, :platform=>"bot"}, {:app=>"LTX71", :match=>{"regex"=>"^ltx71 "}, :platform=>"bot"}, {:app=>"MauiBot", :match=>{"regex"=>"^MauiBot"}, :platform=>"bot"}, {:app=>"Mastodon Bot", :match=>{"regex"=>"rb/.*Mastodon/"}, :platform=>"bot"}, {:app=>"Microsoft Bingbot", :match=>{"regex"=>"(BingPreview/|adidxbot/|[bB]ingbot/)"}, :platform=>"bot"}, {:app=>"Microsoft Office", :match=>{"regex"=>"ms-office; MSOffice"}, :platform=>"bot"}, {:app=>"Mixcloud Podcast Importer", :match=>{"regex"=>"^MixcloudPodcastImporter/"}, :platform=>"bot"}, {:app=>"MJ12bot", :match=>{"regex"=>".*MJ12bot"}, :platform=>"bot"}, {:app=>"Mozilla Bot", :match=>{"regex"=>"^'?Mozilla(/5\\.0(\\.\\.\\.)?)?$|^\\(Mozilla/5\\.0\\)$"}, :platform=>"bot"}, {:app=>"MSN Bot", :match=>{"regex"=>"^msnbot/"}, :platform=>"bot"}, {:app=>"Neevabot", :match=>{"regex"=>".*Neevabot"}, :platform=>"bot"}, {:app=>"Netcraft Survey Agent", :match=>{"regex"=>" NetcraftSurveyAgent/"}, :platform=>"bot"}, {:app=>"OgScrper", :match=>{"regex"=>"OgScrper"}, :platform=>"bot"}, {:app=>"OkDownload", :match=>{"regex"=>"^OkDownload/"}, :platform=>"bot"}, {:app=>"OP3 Fetcher", :match=>{"regex"=>"^op3-fetcher/"}, :platform=>"bot"}, {:app=>"Overcast feed parser", :match=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}, :platform=>"bot"}, {:app=>"Pandora RSS crawler", :match=>{"regex"=>"^PandoraRSSCrawler"}, :platform=>"bot"}, {:app=>"PaperLi", :match=>{"regex"=>"PaperLiBot/"}, :platform=>"bot"}, {:app=>"PetalBot", :match=>{"regex"=>"PetalBot"}, :platform=>"bot"}, {:app=>"Pingdom", :match=>{"regex"=>"^Pingdom"}, :platform=>"bot"}, {:app=>"PlayerFM Podcast Sync", :match=>{"regex"=>"PlayerFM/.* Podcast Sync"}, :platform=>"bot"}, {:app=>"Podbean Importer", :match=>{"regex"=>"^Podbean Importer"}, :platform=>"bot"}, {:app=>"Podcastindex.org", :match=>{"regex"=>"^Podcastindex\\.org/"}, :platform=>"bot"}, {:app=>"PodCloud", :match=>{"regex"=>"podCloud"}, :platform=>"bot"}, {:app=>"Podcorn", :match=>{"regex"=>"Podcorn/"}, :platform=>"bot"}, {:app=>"PodderBot", :match=>{"regex"=>"PodderBot/"}, :platform=>"bot"}, {:app=>"Podfollow", :match=>{"regex"=>"podfollowbot/"}, :platform=>"bot"}, {:app=>"Podgrab", :match=>{"regex"=>"^Podgrab$"}, :platform=>"bot"}, {:app=>"Podhound", :match=>{"regex"=>"PodhoundBeta"}, :platform=>"bot"}, {:app=>"Podio Bot", :match=>{"regex"=>"^Podio/"}, :platform=>"bot"}, {:app=>"Podnews", :match=>{"regex"=>"PodnewsBot"}, :platform=>"bot"}, {:app=>"Podnods Bot", :match=>{"regex"=>"(podnods-crawler|podnods)"}, :platform=>"bot"}, {:app=>"Podscribe", :match=>{"regex"=>"(^Adswizz-podscribe/|^Podscribe/)"}, :platform=>"bot"}, {:app=>"Podverse Feed Parser", :match=>{"regex"=>"^Podverse/Feed Parser"}, :platform=>"bot"}, {:app=>"PodvineBot", :match=>{"regex"=>"^PodvineBot/"}, :platform=>"bot"}, {:app=>"PostRank Bot", :match=>{"regex"=>"^PostRank/"}, :platform=>"bot"}, {:app=>"PodwatchPro", :match=>{"regex"=>"Podwatch-Pro Crawler"}, :platform=>"bot"}, {:app=>"Puppeteer", :match=>{"regex"=>" HeadlessChrome/\\d"}, :platform=>"bot"}, {:app=>"python-requests", :match=>{"regex"=>"python-requests"}, :platform=>"bot"}, {:app=>"RedCircle", :match=>{"regex"=>"RedCircle"}, :platform=>"bot"}, {:app=>"Ridder Bot", :match=>{"regex"=>" RidderBot/"}, :platform=>"bot"}, {:app=>"RSS to Telegram", :match=>{"regex"=>"^RSStT"}, :platform=>"bot"}, {:app=>"RSSRadio", :match=>{"regex"=>"^RSSRadio \\("}, :platform=>"bot"}, {:app=>"Ruby Mechanize", :match=>{"regex"=>"^Mechanize|[ -]Mechanize/"}, :platform=>"bot"}, {:app=>"Screaming Frog SEO Spider", :match=>{"regex"=>"^Screaming Frog "}, :platform=>"bot"}, {:app=>"SearchAtlas.com SEO Crawler", :match=>{"regex"=>"^SearchAtlas.*Crawler"}, :platform=>"bot"}, {:app=>"SEMrushBot", :match=>{"regex"=>"SemrushBot/|^SEMrushBot$"}, :platform=>"bot"}, {:app=>"Serendeputy", :match=>{"regex"=>"SerendeputyBot/"}, :platform=>"bot"}, {:app=>"Simplecast", :match=>{"regex"=>"^Simplecast$"}, :platform=>"bot"}, {:app=>"Slack Bot", :match=>{"regex"=>"^Slackbot 1\\.0"}, :platform=>"bot"}, {:app=>"Snapchat Bot", :match=>{"regex"=>"://developers\\.snap\\.com/robots"}, :platform=>"bot"}, {:app=>"SoundOn Bot", :match=>{"regex"=>"^SoundOn/[\\d.]+\\s+\\(bot"}, :platform=>"bot"}, {:app=>"Spotify cache service", :match=>{"regex"=>"^Spotify/1\\.0$"}, :platform=>"bot"}, {:app=>"Stitcher Bot", :match=>{"regex"=>"^StitcherBot"}, :platform=>"bot"}, {:app=>"Substack Content Fetcher", :match=>{"regex"=>"^SubstackContentFetch/"}, :platform=>"bot"}, {:app=>"Timpi search crawler", :match=>{"regex"=>"Timpibot/"}, :platform=>"bot"}, {:app=>"Tiny Tiny RSS", :match=>{"regex"=>"^Tiny Tiny RSS/"}, :platform=>"bot"}, {:app=>"Trendsmap Resolver", :match=>{"regex"=>"TrendsmapResolver/"}, :platform=>"bot"}, {:app=>"Twingly Bot", :match=>{"regex"=>"Twingly Recon;"}, :platform=>"bot"}, {:app=>"Twitterbot", :match=>{"regex"=>"^Twitterbot"}, :platform=>"bot"}, {:app=>"Typhoeus", :match=>{"regex"=>"^Typhoeus"}, :platform=>"bot"}, {:app=>"UCast", :match=>{"regex"=>"^UCast/"}, :platform=>"bot"}, {:app=>"TelegramBot", :match=>{"regex"=>"^TelegramBot "}, :platform=>"bot"}, {:app=>"Vurbl", :match=>{"regex"=>"VurblBot"}, :platform=>"bot"}, {:app=>"Wget", :match=>{"regex"=>"Wget"}, :platform=>"bot"}, {:app=>"weborama", :match=>{"regex"=>"^weborama-fetcher"}, :platform=>"bot"}, {:app=>"Windows Crawler", :match=>{"regex"=>"^ZDM/.*Windows"}, :platform=>"bot"}, {:app=>"WordPress", :match=>{"regex"=>"^WordPress"}, :platform=>"bot"}, {:app=>"wsrv.nl", :match=>{"regex"=>"https?://wsrv.nl/"}, :platform=>"bot"}, {:app=>"YaCy", :match=>{"regex"=>"^yacybot"}, :platform=>"bot"}, {:app=>"Yahoo Crawler", :match=>{"regex"=>"Yahoo! Slurp"}, :platform=>"bot"}, {:app=>"YandexBot", :match=>{"regex"=>"YandexBot/"}, :platform=>"bot"}, {:app=>"Zapier", :match=>{"regex"=>"^Zapier$"}, :platform=>"bot"}, {:app=>"Zencast", :match=>{"regex"=>"^Zencastr/"}, :platform=>"bot"}].freeze
|
3
|
+
BOTS_RULES = [{:app=>"AAABot - unknown bot", :match=>{"regex"=>"AAABot"}, :platform=>"bot"}, {:app=>"Adswizz", :match=>{"regex"=>"^AIS VirtualListener"}, :platform=>"bot"}, {:app=>"AhrefsBot", :match=>{"regex"=>"AhrefsBot/"}, :platform=>"bot"}, {:app=>"AirableBot", :match=>{"regex"=>"AirableBot-Podcast/"}, :platform=>"bot"}, {:app=>"Alexa Flash Briefing cache", :match=>{"regex"=>"^AmazonNewsContentService"}, :platform=>"bot"}, {:app=>"AlignaBot", :match=>{"regex"=>"^Alignabot"}, :platform=>"bot"}, {:app=>"Amazon Music Podcasts Bot", :match=>{"regex"=>"^Amazon Music Podcast"}, :platform=>"bot"}, {:app=>"Anchor Importer", :match=>{"regex"=>"AnchorImport"}, :platform=>"bot"}, {:app=>"Apple Podcasts (Watch)", :match=>{"regex"=>"^atc/|\\(null\\) watchOS/"}, :platform=>"bot"}, {:app=>"Apple Podcasts automated checks", :match=>{"regex"=>"(iTMS|itunesstored|itms)"}, :platform=>"bot"}, {:app=>"Applebot", :match=>{"regex"=>"^Applebot/"}, :platform=>"bot"}, {:app=>"Archive.org", :match=>{"regex"=>"archive\\.org_bot"}, :platform=>"bot"}, {:app=>"atheerfm", :match=>{"regex"=>"^atheerfm/"}, :platform=>"bot"}, {:app=>"Audiomack", :match=>{"regex"=>"^Audiomack Podcast Processor/"}, :platform=>"bot"}, {:app=>"AudioWave feed parser", :match=>{"regex"=>"^AudioWaveBot/1\\.0"}, :platform=>"bot"}, {:app=>"AwarioSmartBot", :match=>{"regex"=>"^AwarioSmartBot/"}, :platform=>"bot"}, {:app=>"Babbar", :match=>{"regex"=>"Barkrowler/"}, :platform=>"bot"}, {:app=>"Baidu", :match=>{"regex"=>"\\(ce\\.baidu\\.com"}, :platform=>"bot"}, {:app=>"bbot", :match=>{"regex"=>"^bbot/"}, :platform=>"bot"}, {:app=>"British Library", :match=>{"regex"=>"^bl\\.uk_ldfc_bot"}, :platform=>"bot"}, {:app=>"Blubrry Migration Service", :match=>{"regex"=>"^Blubrry Migration Service"}, :platform=>"bot"}, {:app=>"Buzzsprout Importer", :match=>{"regex"=>"^Buzzsprout Importer"}, :platform=>"bot"}, {:app=>"CastFeedValidator", :match=>{"regex"=>"^CastFeedValidator/"}, :platform=>"bot"}, {:app=>"Castopod", :match=>{"regex"=>"Castopod/\\d"}, :platform=>"bot"}, {:app=>"Chartable", :match=>{"regex"=>"^Trackable/"}, :platform=>"bot"}, {:app=>"Clark-Crawler, unknown", :match=>{"regex"=>"^clark-crawler2"}, :platform=>"bot"}, {:app=>"Critical Mention", :match=>{"regex"=>"^Podcast-CriticalMention/"}, :platform=>"bot"}, {:app=>"curl", :match=>{"regex"=>"^curl|^libcurl/|^PycURL/| curl/"}, :platform=>"bot"}, {:app=>"DataforSEO", :match=>{"regex"=>"DataForSeoBot/"}, :platform=>"bot"}, {:app=>"Datagnion Bot", :match=>{"regex"=>"^datagnionbot"}, :platform=>"bot"}, {:app=>"dataprovider.com", :match=>{"regex"=>"Dataprovider\\.com"}, :platform=>"bot"}, {:app=>"Daum", :match=>{"regex"=>"http://cs\\.daum\\.net"}, :platform=>"bot"}, {:app=>"Deezer Podcasters", :match=>{"regex"=>"^Deezer Podcasters/1\\.0"}, :platform=>"bot"}, {:app=>"Digg", :match=>{"regex"=>"^Digg "}, :platform=>"bot"}, {:app=>"dorada", :match=>{"regex"=>"support@dorada\\.co\\.uk"}, :platform=>"bot"}, {:app=>"DotBot", :match=>{"regex"=>"DotBot"}, :platform=>"bot"}, {:app=>"Downcast Bot", :match=>{"regex"=>"downcast feed consumer/"}, :platform=>"bot"}, {:app=>"DuckDuckBot", :match=>{"regex"=>"DuckDuckBot"}, :platform=>"bot"}, {:app=>"F-Secure Riddler", :match=>{"regex"=>"^Riddler "}, :platform=>"bot"}, {:app=>"Facebook Bot", :match=>{"regex"=>"FacebookBot|f?acebookexternalhit/?|^podcastbot$|Facebot|facebookexternalua|^facebookplatform/"}, :platform=>"bot"}, {:app=>"Feedly", :match=>{"regex"=>"^Feedly/"}, :platform=>"bot"}, {:app=>"Feedspot Fetcher", :match=>{"regex"=>"https://www\\.feedspot\\.com/fs/fetcher"}, :platform=>"bot"}, {:app=>"FlexGet", :match=>{"regex"=>"^FlexGet/"}, :platform=>"bot"}, {:app=>"Flipboard Proxy", :match=>{"regex"=>"FlipboardProxy/"}, :platform=>"bot"}, {:app=>"Fyyd", :match=>{"regex"=>"^fyyd-poll"}, :platform=>"bot"}, {:app=>"Go Storage Gateway V1", :match=>{"regex"=>"^storagegw-v1-go$"}, :platform=>"bot"}, {:app=>"Goodpods Bot", :match=>{"regex"=>"Goodpods/\\d+\\.\\d+"}, :platform=>"bot"}, {:app=>"Google Adsense Bot", :match=>{"regex"=>"Mediapartners-Google"}, :platform=>"bot"}, {:app=>"Google AdsBot", :match=>{"regex"=>"AdsBot-Google"}, :platform=>"bot"}, {:app=>"Google Podcasts Manager", :match=>{"regex"=>"Google-Podcast"}, :platform=>"bot"}, {:app=>"Googlebot", :match=>{"regex"=>"Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google|google-xrawler"}, :platform=>"bot"}, {:app=>"Gumball", :match=>{"regex"=>"^Gumball"}, :platform=>"bot"}, {:app=>"Headliner", :match=>{"regex"=>"Headliner/"}, :platform=>"bot"}, {:app=>"HubSpot Crawler", :match=>{"regex"=>"HubSpot Crawler"}, :platform=>"bot"}, {:app=>"Internet Archive", :match=>{"regex"=>"Archive-It;|web\\.archive\\.org"}, :platform=>"bot"}, {:app=>"Jaunt", :match=>{"regex"=>"^Jaunt/"}, :platform=>"bot"}, {:app=>"l'Institut national de l'audiovisuel", :match=>{"regex"=>"INA dlweb"}, :platform=>"bot"}, {:app=>"Libsyn", :match=>{"regex"=>"^Libsyn4"}, :platform=>"bot"}, {:app=>"libwww-perl", :match=>{"regex"=>"^libwww-perl| libwww-perl"}, :platform=>"bot"}, {:app=>"Livelap Crawler", :match=>{"regex"=>"LivelapBot"}, :platform=>"bot"}, {:app=>"LTX71", :match=>{"regex"=>"^ltx71 "}, :platform=>"bot"}, {:app=>"MauiBot", :match=>{"regex"=>"^MauiBot"}, :platform=>"bot"}, {:app=>"Mastodon Bot", :match=>{"regex"=>"rb/.*Mastodon/"}, :platform=>"bot"}, {:app=>"Microsoft Bingbot", :match=>{"regex"=>"(BingPreview/|adidxbot/|[bB]ingbot/)"}, :platform=>"bot"}, {:app=>"Microsoft Office", :match=>{"regex"=>"ms-office; MSOffice"}, :platform=>"bot"}, {:app=>"Mixcloud Podcast Importer", :match=>{"regex"=>"^MixcloudPodcastImporter/"}, :platform=>"bot"}, {:app=>"MJ12bot", :match=>{"regex"=>".*MJ12bot"}, :platform=>"bot"}, {:app=>"Mozilla Bot", :match=>{"regex"=>"^'?Mozilla(/5\\.0(\\.\\.\\.)?)?$|^\\(Mozilla/5\\.0\\)$"}, :platform=>"bot"}, {:app=>"MSN Bot", :match=>{"regex"=>"^msnbot/"}, :platform=>"bot"}, {:app=>"Neevabot", :match=>{"regex"=>".*Neevabot"}, :platform=>"bot"}, {:app=>"Netcraft Survey Agent", :match=>{"regex"=>" NetcraftSurveyAgent/"}, :platform=>"bot"}, {:app=>"Newspaper", :match=>{"regex"=>"^newspaper/\\d"}, :platform=>"bot"}, {:app=>"OgScrper", :match=>{"regex"=>"OgScrper"}, :platform=>"bot"}, {:app=>"OkDownload", :match=>{"regex"=>"^OkDownload/"}, :platform=>"bot"}, {:app=>"OP3 Fetcher", :match=>{"regex"=>"^op3-fetcher/"}, :platform=>"bot"}, {:app=>"Overcast feed parser", :match=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}, :platform=>"bot"}, {:app=>"Pandora RSS crawler", :match=>{"regex"=>"^PandoraRSSCrawler"}, :platform=>"bot"}, {:app=>"PaperLi", :match=>{"regex"=>"PaperLiBot/"}, :platform=>"bot"}, {:app=>"PetalBot", :match=>{"regex"=>"PetalBot"}, :platform=>"bot"}, {:app=>"Pingdom", :match=>{"regex"=>"^Pingdom"}, :platform=>"bot"}, {:app=>"PlayerFM Podcast Sync", :match=>{"regex"=>"PlayerFM/.* Podcast Sync"}, :platform=>"bot"}, {:app=>"Podbean Importer", :match=>{"regex"=>"^Podbean Importer"}, :platform=>"bot"}, {:app=>"Podcastindex.org", :match=>{"regex"=>"^Podcastindex\\.org/"}, :platform=>"bot"}, {:app=>"Podcast de facto Standard", :match=>{"regex"=>"^PodcastStandard/"}, :platform=>"bot"}, {:app=>"Podcast Archiver", :match=>{"regex"=>"^Podcast%20Archiver/"}, :platform=>"bot"}, {:app=>"PodCloud", :match=>{"regex"=>"podCloud"}, :platform=>"bot"}, {:app=>"Podcorn", :match=>{"regex"=>"Podcorn/"}, :platform=>"bot"}, {:app=>"PodderBot", :match=>{"regex"=>"PodderBot/"}, :platform=>"bot"}, {:app=>"Podfollow", :match=>{"regex"=>"podfollowbot/"}, :platform=>"bot"}, {:app=>"Podgrab", :match=>{"regex"=>"^Podgrab$"}, :platform=>"bot"}, {:app=>"Podhound", :match=>{"regex"=>"PodhoundBeta"}, :platform=>"bot"}, {:app=>"Podio Bot", :match=>{"regex"=>"^Podio/"}, :platform=>"bot"}, {:app=>"Podnews", :match=>{"regex"=>"PodnewsBot"}, :platform=>"bot"}, {:app=>"Podnods Bot", :match=>{"regex"=>"(podnods-crawler|podnods)"}, :platform=>"bot"}, {:app=>"Podscribe", :match=>{"regex"=>"(^Adswizz-podscribe/|^Podscribe/)"}, :platform=>"bot"}, {:app=>"Podverse Feed Parser", :match=>{"regex"=>"^Podverse/Feed Parser"}, :platform=>"bot"}, {:app=>"PodvineBot", :match=>{"regex"=>"^PodvineBot/"}, :platform=>"bot"}, {:app=>"PostRank Bot", :match=>{"regex"=>"^PostRank/"}, :platform=>"bot"}, {:app=>"PodwatchPro", :match=>{"regex"=>"Podwatch-Pro Crawler"}, :platform=>"bot"}, {:app=>"Puppeteer", :match=>{"regex"=>" HeadlessChrome/\\d"}, :platform=>"bot"}, {:app=>"python-requests", :match=>{"regex"=>"python-requests"}, :platform=>"bot"}, {:app=>"Qiniu spider", :match=>{"regex"=>"qiniu.*spider"}, :platform=>"bot"}, {:app=>"RedCircle", :match=>{"regex"=>"RedCircle"}, :platform=>"bot"}, {:app=>"Ridder Bot", :match=>{"regex"=>" RidderBot/"}, :platform=>"bot"}, {:app=>"RSS to Telegram", :match=>{"regex"=>"^RSStT"}, :platform=>"bot"}, {:app=>"RSSRadio", :match=>{"regex"=>"^RSSRadio \\("}, :platform=>"bot"}, {:app=>"Ruby Mechanize", :match=>{"regex"=>"^Mechanize|[ -]Mechanize/"}, :platform=>"bot"}, {:app=>"Screaming Frog SEO Spider", :match=>{"regex"=>"^Screaming Frog "}, :platform=>"bot"}, {:app=>"SearchAtlas.com SEO Crawler", :match=>{"regex"=>"^SearchAtlas.*Crawler"}, :platform=>"bot"}, {:app=>"SEMrushBot", :match=>{"regex"=>"SemrushBot/|^SEMrushBot$"}, :platform=>"bot"}, {:app=>"Serendeputy", :match=>{"regex"=>"SerendeputyBot/"}, :platform=>"bot"}, {:app=>"Simplecast", :match=>{"regex"=>"^Simplecast$"}, :platform=>"bot"}, {:app=>"Slack Bot", :match=>{"regex"=>"^Slackbot 1\\.0"}, :platform=>"bot"}, {:app=>"Snapchat Bot", :match=>{"regex"=>"://developers\\.snap\\.com/robots"}, :platform=>"bot"}, {:app=>"SoundOn Bot", :match=>{"regex"=>"^SoundOn/[\\d.]+\\s+\\(bot"}, :platform=>"bot"}, {:app=>"Spotify cache service", :match=>{"regex"=>"^Spotify/1\\.0$"}, :platform=>"bot"}, {:app=>"Stitcher Bot", :match=>{"regex"=>"^StitcherBot"}, :platform=>"bot"}, {:app=>"Substack Content Fetcher", :match=>{"regex"=>"^SubstackContentFetch/"}, :platform=>"bot"}, {:app=>"SupportingCast", :match=>{"regex"=>"^SupportingCast(/.*)?$"}, :platform=>"bot"}, {:app=>"Timpi search crawler", :match=>{"regex"=>"Timpibot/"}, :platform=>"bot"}, {:app=>"Tiny Tiny RSS", :match=>{"regex"=>"^Tiny Tiny RSS/"}, :platform=>"bot"}, {:app=>"Trendsmap Resolver", :match=>{"regex"=>"TrendsmapResolver/"}, :platform=>"bot"}, {:app=>"Twingly Bot", :match=>{"regex"=>"Twingly Recon;"}, :platform=>"bot"}, {:app=>"Twitterbot", :match=>{"regex"=>"^Twitterbot"}, :platform=>"bot"}, {:app=>"Typhoeus", :match=>{"regex"=>"^Typhoeus"}, :platform=>"bot"}, {:app=>"UCast", :match=>{"regex"=>"^UCast/"}, :platform=>"bot"}, {:app=>"TelegramBot", :match=>{"regex"=>"^TelegramBot "}, :platform=>"bot"}, {:app=>"theTradeDesk Content Web Scraper", :match=>{"regex"=>"TTD-Content"}, :platform=>"bot"}, {:app=>"Veritone Engine Toolkit", :match=>{"regex"=>"^veritone/engine-toolkit"}, :platform=>"bot"}, {:app=>"Vurbl", :match=>{"regex"=>"VurblBot"}, :platform=>"bot"}, {:app=>"Wget", :match=>{"regex"=>"Wget"}, :platform=>"bot"}, {:app=>"weborama", :match=>{"regex"=>"^weborama-fetcher"}, :platform=>"bot"}, {:app=>"Windows Crawler", :match=>{"regex"=>"^ZDM/.*Windows"}, :platform=>"bot"}, {:app=>"Windows PowerShell", :match=>{"regex"=>"WindowsPowerShell/"}, :platform=>"bot"}, {:app=>"WordPress", :match=>{"regex"=>"^WordPress"}, :platform=>"bot"}, {:app=>"wsrv.nl", :match=>{"regex"=>"https?://wsrv.nl/"}, :platform=>"bot"}, {:app=>"YaCy", :match=>{"regex"=>"^yacybot"}, :platform=>"bot"}, {:app=>"Yahoo Crawler", :match=>{"regex"=>"Yahoo! Slurp"}, :platform=>"bot"}, {:app=>"YandexBot", :match=>{"regex"=>"YandexBot/"}, :platform=>"bot"}, {:app=>"Zapier", :match=>{"regex"=>"^Zapier$"}, :platform=>"bot"}, {:app=>"Zencast", :match=>{"regex"=>"^Zencastr/"}, :platform=>"bot"}].freeze
|
@@ -7,15 +7,19 @@ module PodIdent
|
|
7
7
|
class RuleParser
|
8
8
|
RULES_YAML = File.expand_path('../detection_rules.yml', __dir__)
|
9
9
|
RULES_BOTS_YAML = File.expand_path('../detection_rules_bots.yml', __dir__)
|
10
|
+
CUSTOM_RULES_BOTS_YAML = File.expand_path('../detection_rules_custom_bots.yml', __dir__)
|
10
11
|
RULES_RUBY = File.expand_path('detection_rules.rb', __dir__)
|
11
12
|
RULES_BOTS_RUBY = File.expand_path('detection_rules_bots.rb', __dir__)
|
13
|
+
CUSTOM_RULES_BOTS_RUBY = File.expand_path('detection_rules_custom_bots.rb', __dir__)
|
12
14
|
RULES_SPEC_RUBY = File.expand_path('../../spec/detection_rules.rb', __dir__)
|
13
15
|
RULES_SPEC_BOTS_RUBY = File.expand_path('../../spec/detection_rules_bots.rb', __dir__)
|
16
|
+
RULES_SPEC_CUSTOM_BOTS_RUBY = File.expand_path('../../spec/detection_rules_custom_bots.rb',
|
17
|
+
__dir__)
|
14
18
|
DO_NOT_EDIT_TEXT = <<~HEREDOC
|
15
19
|
# DO NOT EDIT THIS FILE - it gets automatically generated by running \"bin/parse-rules\"\n
|
16
20
|
HEREDOC
|
17
21
|
|
18
|
-
attr_accessor :bots_rules, :rules
|
22
|
+
attr_accessor :bots_rules, :custom_bots_rules, :rules
|
19
23
|
|
20
24
|
def call
|
21
25
|
parse_yaml
|
@@ -42,6 +46,14 @@ module PodIdent
|
|
42
46
|
}
|
43
47
|
end
|
44
48
|
|
49
|
+
cleaned_custom_bots_rules = custom_bots_rules.dup.map do |rule|
|
50
|
+
{
|
51
|
+
app: rule['app'],
|
52
|
+
match: rule['match'],
|
53
|
+
platform: rule['platform']
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
45
57
|
File.open(RULES_RUBY, 'w') do |file|
|
46
58
|
file.write(DO_NOT_EDIT_TEXT)
|
47
59
|
file.write("RULES = #{cleaned_rules}.freeze")
|
@@ -51,6 +63,11 @@ module PodIdent
|
|
51
63
|
file.write(DO_NOT_EDIT_TEXT)
|
52
64
|
file.write("BOTS_RULES = #{cleaned_bots_rules}.freeze")
|
53
65
|
end
|
66
|
+
|
67
|
+
File.open(CUSTOM_RULES_BOTS_RUBY, 'w') do |file|
|
68
|
+
file.write(DO_NOT_EDIT_TEXT)
|
69
|
+
file.write("CUSTOM_BOTS_RULES = #{cleaned_custom_bots_rules}.freeze")
|
70
|
+
end
|
54
71
|
end
|
55
72
|
|
56
73
|
def write_rules_spec_rb
|
@@ -64,6 +81,11 @@ module PodIdent
|
|
64
81
|
Hash[rule.map { |(k, v)| [k.to_sym, v] }]
|
65
82
|
end
|
66
83
|
|
84
|
+
all_custom_bot_rules = custom_bots_rules.dup.map do |rule|
|
85
|
+
# symbolize keys
|
86
|
+
Hash[rule.map { |(k, v)| [k.to_sym, v] }]
|
87
|
+
end
|
88
|
+
|
67
89
|
File.open(RULES_SPEC_RUBY, 'w') do |file|
|
68
90
|
file.write(DO_NOT_EDIT_TEXT)
|
69
91
|
file.write("RULES = #{all_rules}.freeze")
|
@@ -73,11 +95,17 @@ module PodIdent
|
|
73
95
|
file.write(DO_NOT_EDIT_TEXT)
|
74
96
|
file.write("BOTS_RULES = #{all_bot_rules}.freeze")
|
75
97
|
end
|
98
|
+
|
99
|
+
File.open(RULES_SPEC_CUSTOM_BOTS_RUBY, 'w') do |file|
|
100
|
+
file.write(DO_NOT_EDIT_TEXT)
|
101
|
+
file.write("CUSTOM_BOTS_RULES = #{all_custom_bot_rules}.freeze")
|
102
|
+
end
|
76
103
|
end
|
77
104
|
|
78
105
|
def parse_yaml
|
79
106
|
@rules = YAML.safe_load(rules_yaml_file_content)
|
80
107
|
@bots_rules = YAML.safe_load(bot_rules_yaml_file_content)
|
108
|
+
@custom_bots_rules = YAML.safe_load(custom_bot_rules_yaml_file_content)
|
81
109
|
end
|
82
110
|
|
83
111
|
def rules_yaml_file_content
|
@@ -87,5 +115,9 @@ module PodIdent
|
|
87
115
|
def bot_rules_yaml_file_content
|
88
116
|
File.read(RULES_BOTS_YAML)
|
89
117
|
end
|
118
|
+
|
119
|
+
def custom_bot_rules_yaml_file_content
|
120
|
+
File.read(CUSTOM_RULES_BOTS_YAML)
|
121
|
+
end
|
90
122
|
end
|
91
123
|
end
|
data/lib/pod_ident/version.rb
CHANGED
data/lib/pod_ident.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'pod_ident/version'
|
4
4
|
require 'pod_ident/detection_rules'
|
5
5
|
require 'pod_ident/detection_rules_bots'
|
6
|
+
require 'pod_ident/detection_rules_custom_bots'
|
6
7
|
require 'pod_ident/detection_result'
|
7
8
|
|
8
9
|
module PodIdent
|
@@ -22,7 +23,7 @@ module PodIdent
|
|
22
23
|
# !~ /[^[:space:]]/ is what Active Support does to detect blank strings
|
23
24
|
return nil if user_agent_string !~ /[^[:space:]]/
|
24
25
|
|
25
|
-
rule = find_rule || find_rule_bots
|
26
|
+
rule = find_rule || find_rule_bots || find_rule_custom_bots
|
26
27
|
|
27
28
|
self.result = DetectionResult.new(rule, user_agent_string)
|
28
29
|
identify_platform if result.positive?
|
@@ -31,7 +32,7 @@ module PodIdent
|
|
31
32
|
end
|
32
33
|
|
33
34
|
def self.bot?
|
34
|
-
find_rule_bots
|
35
|
+
find_rule_bots || find_rule_custom_bots
|
35
36
|
end
|
36
37
|
|
37
38
|
private
|
@@ -67,6 +68,15 @@ module PodIdent
|
|
67
68
|
end
|
68
69
|
end
|
69
70
|
|
71
|
+
def find_rule_custom_bots
|
72
|
+
CUSTOM_BOTS_RULES.detect do |rule|
|
73
|
+
match = rule.fetch(:match)
|
74
|
+
regex = match['regex']
|
75
|
+
match = Regexp.new(regex).match(user_agent_string)
|
76
|
+
!match.nil?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
70
80
|
def apply_starts_with(starts_with, found)
|
71
81
|
return found unless starts_with
|
72
82
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pod_ident
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Podigee GmbH
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -96,6 +96,7 @@ files:
|
|
96
96
|
- ".rspec"
|
97
97
|
- ".rubocop.yml"
|
98
98
|
- ".ruby-version"
|
99
|
+
- CHANGELOG.md
|
99
100
|
- Gemfile
|
100
101
|
- Gemfile.lock
|
101
102
|
- README.md
|
@@ -105,10 +106,12 @@ files:
|
|
105
106
|
- bin/setup
|
106
107
|
- lib/detection_rules.yml
|
107
108
|
- lib/detection_rules_bots.yml
|
109
|
+
- lib/detection_rules_custom_bots.yml
|
108
110
|
- lib/pod_ident.rb
|
109
111
|
- lib/pod_ident/detection_result.rb
|
110
112
|
- lib/pod_ident/detection_rules.rb
|
111
113
|
- lib/pod_ident/detection_rules_bots.rb
|
114
|
+
- lib/pod_ident/detection_rules_custom_bots.rb
|
112
115
|
- lib/pod_ident/rule_parser.rb
|
113
116
|
- lib/pod_ident/version.rb
|
114
117
|
- pod_ident.gemspec
|