pod_ident 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +1 -1
- data/lib/detection_rules.yml +6 -2
- data/lib/detection_rules_bots.yml +84 -3
- data/lib/detection_rules_custom_bots.yml +15 -0
- data/lib/pod_ident/detection_rules.rb +1 -1
- data/lib/pod_ident/detection_rules_bots.rb +1 -1
- data/lib/pod_ident/detection_rules_custom_bots.rb +3 -0
- data/lib/pod_ident/rule_parser.rb +33 -1
- data/lib/pod_ident/version.rb +1 -1
- data/lib/pod_ident.rb +12 -2
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e296aac18aa3dd0c789adaaffb6037075b87294486c04d244f44b2993d403c97
|
|
4
|
+
data.tar.gz: 4e8bbc8d91af324ac8ccfae2ade4f542fa6e1df91799a717ba2400e9d668a784
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5cea987f5581c97a005f02a137ae8fddc4e60b65c029eda4f41a957f736cfc759615565c084cce75975fababac84de6e217c8f5f9d80fdcca6744cf4ed716c42
|
|
7
|
+
data.tar.gz: 58a0e9f80a38cdaa167313666979313dba482f65760bdf0b2a4100097a3ab5b9b5ad12a34c7e90d2ed64954b0747d03cf71c80ad60e91374437bd144350adb10
|
data/CHANGELOG.md
ADDED
data/Gemfile.lock
CHANGED
data/lib/detection_rules.yml
CHANGED
|
@@ -191,7 +191,7 @@
|
|
|
191
191
|
- userAgent: Overcast/3.0 (+http://overcast.fm/; iOS podcast app) BMID/E67A460F81
|
|
192
192
|
platform: iOS
|
|
193
193
|
|
|
194
|
-
- app:
|
|
194
|
+
- app: radio.net
|
|
195
195
|
match:
|
|
196
196
|
startsWith:
|
|
197
197
|
- radio.net
|
|
@@ -204,14 +204,18 @@
|
|
|
204
204
|
- radio.pt
|
|
205
205
|
- radio.pl
|
|
206
206
|
platform:
|
|
207
|
-
regex: '(Android|Darwin)'
|
|
207
|
+
regex: '(Android|Darwin|Linux)'
|
|
208
208
|
replacements:
|
|
209
209
|
- name: Darwin
|
|
210
210
|
replaceWith: iOS
|
|
211
|
+
- name: Linux
|
|
212
|
+
replaceWith: Android
|
|
211
213
|
test:
|
|
212
214
|
userAgents:
|
|
213
215
|
- userAgent: radio.de 4.9.3 (softwinner/QT-7; Android 4.4.2; de_DE)
|
|
214
216
|
platform: Android
|
|
217
|
+
- userAgent: radio.de/5.13.0. 1-app (Linux; 13) ExoPlayerLib/2.19.1
|
|
218
|
+
platform: Android
|
|
215
219
|
- userAgent: radio.de/3735 CFNetwork/758.5.3 Darwin/15.6.0
|
|
216
220
|
platform: iOS
|
|
217
221
|
|
|
@@ -6,6 +6,13 @@
|
|
|
6
6
|
test:
|
|
7
7
|
userAgents:
|
|
8
8
|
- userAgent: AAABot
|
|
9
|
+
- app: Adswizz
|
|
10
|
+
match:
|
|
11
|
+
regex: "^AIS VirtualListener"
|
|
12
|
+
platform: bot
|
|
13
|
+
test:
|
|
14
|
+
userAgents:
|
|
15
|
+
- userAgent: AIS VirtualListener
|
|
9
16
|
- app: AhrefsBot
|
|
10
17
|
match:
|
|
11
18
|
regex: AhrefsBot/
|
|
@@ -69,7 +76,7 @@
|
|
|
69
76
|
(6; dt:155)"
|
|
70
77
|
- app: Apple Podcasts automated checks
|
|
71
78
|
match:
|
|
72
|
-
regex: "(iTMS|itunesstored)"
|
|
79
|
+
regex: "(iTMS|itunesstored|itms)"
|
|
73
80
|
platform: bot
|
|
74
81
|
- app: Applebot
|
|
75
82
|
match:
|
|
@@ -158,11 +165,12 @@
|
|
|
158
165
|
- userAgent: CastFeedValidator/3.0.5 (https://castfeedvalidator.com)
|
|
159
166
|
- app: Castopod
|
|
160
167
|
match:
|
|
161
|
-
regex: Castopod
|
|
168
|
+
regex: Castopod/\d
|
|
162
169
|
platform: bot
|
|
163
170
|
test:
|
|
164
171
|
userAgents:
|
|
165
172
|
- userAgent: Castopod/1.0
|
|
173
|
+
- userAgent: Castopod/1.1.2
|
|
166
174
|
- app: Chartable
|
|
167
175
|
match:
|
|
168
176
|
regex: "^Trackable/"
|
|
@@ -279,6 +287,14 @@
|
|
|
279
287
|
test:
|
|
280
288
|
userAgents:
|
|
281
289
|
- userAgent: Feedly/1.0 (+http://www.feedly.com/fetcher.html; like FeedFetcher-Google)
|
|
290
|
+
- app: Feedspot Fetcher
|
|
291
|
+
match:
|
|
292
|
+
regex: https://www\.feedspot\.com/fs/fetcher
|
|
293
|
+
platform: bot
|
|
294
|
+
test:
|
|
295
|
+
userAgents:
|
|
296
|
+
- userAgent: Mozilla/5.0 (compatible; Feedspot/1.0 (+https://www.feedspot.com/fs/fetcher;
|
|
297
|
+
like FeedFetcher-Google)
|
|
282
298
|
- app: FlexGet
|
|
283
299
|
match:
|
|
284
300
|
regex: "^FlexGet/"
|
|
@@ -311,6 +327,13 @@
|
|
|
311
327
|
test:
|
|
312
328
|
userAgents:
|
|
313
329
|
- userAgent: Goodpods/2.2
|
|
330
|
+
- app: Google Adsense Bot
|
|
331
|
+
match:
|
|
332
|
+
regex: Mediapartners-Google
|
|
333
|
+
platform: bot
|
|
334
|
+
test:
|
|
335
|
+
userAgents:
|
|
336
|
+
- userAgent: Mediapartners-Google
|
|
314
337
|
- app: Google AdsBot
|
|
315
338
|
match:
|
|
316
339
|
regex: AdsBot-Google
|
|
@@ -321,7 +344,7 @@
|
|
|
321
344
|
platform: bot
|
|
322
345
|
- app: Googlebot
|
|
323
346
|
match:
|
|
324
|
-
regex: Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google
|
|
347
|
+
regex: Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google|google-xrawler
|
|
325
348
|
platform: bot
|
|
326
349
|
test:
|
|
327
350
|
userAgents:
|
|
@@ -331,6 +354,7 @@
|
|
|
331
354
|
- userAgent: Googlebot-Image/1.0
|
|
332
355
|
- userAgent: Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; 1 subscribers;
|
|
333
356
|
feed-id=4653447469100830145)
|
|
357
|
+
- userAgent: google-xrawler
|
|
334
358
|
- app: Gumball
|
|
335
359
|
match:
|
|
336
360
|
regex: "^Gumball"
|
|
@@ -469,6 +493,13 @@
|
|
|
469
493
|
test:
|
|
470
494
|
userAgents:
|
|
471
495
|
- userAgent: Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; +info@netcraft.com)
|
|
496
|
+
- app: Newspaper
|
|
497
|
+
match:
|
|
498
|
+
regex: "^newspaper/\\d"
|
|
499
|
+
platform: bot
|
|
500
|
+
test:
|
|
501
|
+
userAgents:
|
|
502
|
+
- userAgent: newspaper/0.3.0
|
|
472
503
|
- app: OgScrper
|
|
473
504
|
match:
|
|
474
505
|
regex: OgScrper
|
|
@@ -538,6 +569,20 @@
|
|
|
538
569
|
test:
|
|
539
570
|
userAgents:
|
|
540
571
|
- userAgent: Podcastindex.org/v0.3.3 (Aggrivate)
|
|
572
|
+
- app: Podcast de facto Standard
|
|
573
|
+
match:
|
|
574
|
+
regex: "^PodcastStandard/"
|
|
575
|
+
platform: bot
|
|
576
|
+
test:
|
|
577
|
+
userAgents:
|
|
578
|
+
- userAgent: PodcastStandard/0.1.0 (+https://podcast-standard.org)
|
|
579
|
+
- app: Podcast Archiver
|
|
580
|
+
match:
|
|
581
|
+
regex: "^Podcast%20Archiver/"
|
|
582
|
+
platform: bot
|
|
583
|
+
test:
|
|
584
|
+
userAgents:
|
|
585
|
+
- userAgent: Podcast%20Archiver/3000000003 CFNetwork/1406.0.4 Darwin/22.4.0
|
|
541
586
|
- app: PodCloud
|
|
542
587
|
match:
|
|
543
588
|
regex: podCloud
|
|
@@ -637,6 +682,13 @@
|
|
|
637
682
|
match:
|
|
638
683
|
regex: python-requests
|
|
639
684
|
platform: bot
|
|
685
|
+
- app: Qiniu spider
|
|
686
|
+
match:
|
|
687
|
+
regex: qiniu.*spider
|
|
688
|
+
platform: bot
|
|
689
|
+
test:
|
|
690
|
+
userAgents:
|
|
691
|
+
- userAgent: z0-qiniu-imgstg-spider-1.0
|
|
640
692
|
- app: RedCircle
|
|
641
693
|
match:
|
|
642
694
|
regex: RedCircle
|
|
@@ -742,6 +794,14 @@
|
|
|
742
794
|
test:
|
|
743
795
|
userAgents:
|
|
744
796
|
- userAgent: SubstackContentFetch/1.0 (https://substack.com/)
|
|
797
|
+
- app: SupportingCast
|
|
798
|
+
match:
|
|
799
|
+
regex: "^SupportingCast(/.*)?$"
|
|
800
|
+
platform: bot
|
|
801
|
+
test:
|
|
802
|
+
userAgents:
|
|
803
|
+
- userAgent: SupportingCast/1.0
|
|
804
|
+
- userAgent: SupportingCast
|
|
745
805
|
- app: Timpi search crawler
|
|
746
806
|
match:
|
|
747
807
|
regex: Timpibot/
|
|
@@ -790,6 +850,20 @@
|
|
|
790
850
|
test:
|
|
791
851
|
userAgents:
|
|
792
852
|
- userAgent: TelegramBot (like TwitterBot)
|
|
853
|
+
- app: theTradeDesk Content Web Scraper
|
|
854
|
+
match:
|
|
855
|
+
regex: TTD-Content
|
|
856
|
+
platform: bot
|
|
857
|
+
test:
|
|
858
|
+
userAgents:
|
|
859
|
+
- userAgent: Mozilla/5.0 (compatible; TTD-Content; +https://www.thetradedesk.com/general/ttd-content)
|
|
860
|
+
- app: Veritone Engine Toolkit
|
|
861
|
+
match:
|
|
862
|
+
regex: "^veritone/engine-toolkit"
|
|
863
|
+
platform: bot
|
|
864
|
+
test:
|
|
865
|
+
userAgents:
|
|
866
|
+
- userAgent: veritone/engine-toolkit-3.0
|
|
793
867
|
- app: Vurbl
|
|
794
868
|
match:
|
|
795
869
|
regex: VurblBot
|
|
@@ -815,6 +889,13 @@
|
|
|
815
889
|
test:
|
|
816
890
|
userAgents:
|
|
817
891
|
- userAgent: ZDM/4.0; Windows Mobile 7.0;
|
|
892
|
+
- app: Windows PowerShell
|
|
893
|
+
match:
|
|
894
|
+
regex: WindowsPowerShell/
|
|
895
|
+
platform: bot
|
|
896
|
+
test:
|
|
897
|
+
userAgents:
|
|
898
|
+
- userAgent: Mozilla/5.0 (Windows NT; Windows NT 10.0; en-US) WindowsPowerShell/5.1.19041.2673
|
|
818
899
|
- app: WordPress
|
|
819
900
|
match:
|
|
820
901
|
regex: "^WordPress"
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# DO NOT EDIT THIS FILE - it gets automatically generated by running "bin/parse-rules"
|
|
2
2
|
|
|
3
|
-
RULES = [{:app=>"Apple Podcasts", :match=>{"startsWith"=>"AppleCoreMedia"}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"startsWith"=>"itunesstored"}, :platform=>{"regex"=>"(iPad|iPod|iPhone)"}}, {:app=>"iTunes", :match=>{"startsWith"=>"iTunes", "excluding"=>{"regex"=>"Downcast|iCatcher|SqueezeCenter|SqueezeNetwork|MusicServer"}}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"includes"=>"watchOS", "excluding"=>{"regex"=>"^atc/|\\(null\\) watchOS/"}}, :platform=>{"text"=>"watchOS"}}, {:app=>"Spotify", :match=>{"startsWith"=>"Spotify", "excluding"=>{"regex"=>"^Spotify/1\\.0$"}}, :platform=>{"regexes"=>["(Android|iOS)"]}}, {:app=>"CastBox", :match=>{"startsWith"=>["CastBox", "Castbox"]}, :platform=>{"regexes"=>["(Android|iOS|OS\\sVersion)", "(CastBox)"], "replacements"=>[{"name"=>"OS Version", "replaceWith"=>"iOS"}, {"name"=>"CastBox", "replaceWith"=>"Android"}]}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Alexa"}, :platform=>{"text"=>"Alexa-capable device"}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Echo"}, :platform=>{"text"=>"Amazon Echo", "userAgents"=>[{"userAgent"=>"Echo/1.0(APNG)", "platform"=>"Amazon Echo"}]}}, {:app=>"Deezer", :match=>{"startsWith"=>"Deezer"}, :platform=>{"regexes"=>["(Android|Darwin)", "\\((\\w+\\s*\\w*)"], "replacements"=>[{"name"=>"osx", "replaceWith"=>"Mac"}, {"name"=>"Darwin", "replaceWith"=>"Apple Device"}]}}, {:app=>"Overcast", :match=>{"startsWith"=>"Overcast", "excluding"=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}}, :platform=>{"text"=>"iOS"}}, {:app=>"
|
|
3
|
+
RULES = [{:app=>"Apple Podcasts", :match=>{"startsWith"=>"AppleCoreMedia"}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"startsWith"=>"itunesstored"}, :platform=>{"regex"=>"(iPad|iPod|iPhone)"}}, {:app=>"iTunes", :match=>{"startsWith"=>"iTunes", "excluding"=>{"regex"=>"Downcast|iCatcher|SqueezeCenter|SqueezeNetwork|MusicServer"}}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"includes"=>"watchOS", "excluding"=>{"regex"=>"^atc/|\\(null\\) watchOS/"}}, :platform=>{"text"=>"watchOS"}}, {:app=>"Spotify", :match=>{"startsWith"=>"Spotify", "excluding"=>{"regex"=>"^Spotify/1\\.0$"}}, :platform=>{"regexes"=>["(Android|iOS)"]}}, {:app=>"CastBox", :match=>{"startsWith"=>["CastBox", "Castbox"]}, :platform=>{"regexes"=>["(Android|iOS|OS\\sVersion)", "(CastBox)"], "replacements"=>[{"name"=>"OS Version", "replaceWith"=>"iOS"}, {"name"=>"CastBox", "replaceWith"=>"Android"}]}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Alexa"}, :platform=>{"text"=>"Alexa-capable device"}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Echo"}, :platform=>{"text"=>"Amazon Echo", "userAgents"=>[{"userAgent"=>"Echo/1.0(APNG)", "platform"=>"Amazon Echo"}]}}, {:app=>"Deezer", :match=>{"startsWith"=>"Deezer"}, :platform=>{"regexes"=>["(Android|Darwin)", "\\((\\w+\\s*\\w*)"], "replacements"=>[{"name"=>"osx", "replaceWith"=>"Mac"}, {"name"=>"Darwin", "replaceWith"=>"Apple Device"}]}}, {:app=>"Overcast", :match=>{"startsWith"=>"Overcast", "excluding"=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}}, :platform=>{"text"=>"iOS"}}, {:app=>"radio.net", :match=>{"startsWith"=>["radio.net", "radio.de", "radio.at", "radio.fr", "radio.dk", "radio.es", "radio.it", "radio.pt", "radio.pl"]}, :platform=>{"regex"=>"(Android|Darwin|Linux)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}, {"name"=>"Linux", "replaceWith"=>"Android"}]}}, {:app=>"PocketCasts", :match=>{"startsWith"=>["PocketCasts", "Pocket Casts", "Shifty Jelly Pocket Casts"]}, :platform=>{"regex"=>"(Android)"}}, {:app=>"Himalaya", :match=>{"startsWith"=>"Himalaya"}, :platform=>{"regex"=>"(Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"ExoPlayer", :match=>{"startsWith"=>["ExoPlayer", "yourApplicationName", "null", "md5d42223d6ee7473da82e8136ffb794439.App"]}, :platform=>{"text"=>"Android"}}, {:app=>"Download Manager", :match=>{"startsWith"=>"AndroidDownloadManager"}, :platform=>{"text"=>"Android"}}, {:app=>"Castamatic", :match=>{"startsWith"=>"Castamatic"}, :platform=>{"text"=>"iOS"}}, {:app=>"The Podcast App (podcast.app)", :match=>{"includes"=>"The Podcast App"}, :platform=>{"text"=>"iOS"}}, {:app=>"CastMix", :match=>{"startsWith"=>"CastMix"}, :platform=>{"text"=>"Android"}}, {:app=>"Unknown App", :match=>{"startsWith"=>"okhttp"}, :platform=>{"text"=>"Android"}}, {:app=>"Stagefright Media Playback Engine", :match=>{"includes"=>"stagefright", "excluding"=>{"text"=>"stagefright alternative"}}, :platform=>{"regex"=>"(Fire OS|Android)"}}, {:app=>"LG Player", :match=>{"startsWith"=>"Player/LG Player", "includes"=>["LG Player", "LG-Player"]}, :platform=>{"text"=>"Android"}}, {:app=>"Android Browser", :match=>{"startsWith"=>"Dalvik"}, :platform=>{"text"=>"Android"}}, {:app=>"Acast", :match=>{"startsWith"=>"Acast"}, :platform=>{"regex"=>"(Darwin|Android|Windows)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"Castro", :match=>{"startsWith"=>"Castro"}, :platform=>{"text"=>"iOS"}}, {:app=>"Breaker", :match=>{"startsWith"=>"Breaker"}, :platform=>{"regex"=>"(Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"Podcast Addict", :match=>{"startsWith"=>["PodcastAddict", "Podcast Addict"]}, :platform=>{"text"=>"Android"}}, {:app=>"Podbean", :match=>{"startsWith"=>"Podbean", "excluding"=>{"regex"=>"^Podbean Importer"}}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"Google Podcasts", :match=>{"includes"=>["GSA"], "excluding"=>{"regex"=>"iPhone|iPad"}}, :platform=>{"text"=>"Android"}}, {:app=>"Google Search App", :match=>{"includes"=>["GSA"], "excluding"=>{"regex"=>"Android"}}, :platform=>{"regexes"=>["(iPhone|iPad)"]}}, {:app=>"Google Podcasts", :match=>{"includes"=>"GoogleChirp"}, :platform=>{"text"=>"Google Smart Speaker"}}, {:app=>"Stitcher", :match=>{"startsWith"=>"Stitcher"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"TuneIn", :match=>{"startsWith"=>"TuneIn"}, :platform=>{"regex"=>"(Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"PodCruncher", :match=>{"startsWith"=>"PodCruncher"}, :platform=>{"text"=>"iOS"}}, {:app=>"iCatcher!", :match=>{"startsWith"=>"iCatcher!", "includes"=>"iCatcher!"}, :platform=>{"regex"=>"\\((iPhone|iPad|iPod touch)", "fallback"=>"iOS"}}, {:app=>"Castaway", :match=>{"startsWith"=>"Castaway"}, :platform=>{"text"=>"iOS"}}, {:app=>"Instacast", :match=>{"startsWith"=>"Instacast"}, :platform=>{"text"=>"Apple Device"}}, {:app=>"VLC", :match=>{"startsWith"=>["VLC", "LibVLC"], "includes"=>"VLC"}, :platform=>{"regex"=>"(Android|iPhone)"}}, {:app=>"Podcast Republic", :match=>{"startsWith"=>"PodcastRepublic"}, :platform=>{"text"=>"Android"}}, {:app=>"DoggCatcher", :match=>{"includes"=>"DoggCatcher"}, :platform=>{"text"=>"Android"}}, {:app=>"Player FM", :match=>{"startsWith"=>["Player FM", "Player%20FM"]}, :platform=>{"regex"=>"(Darwin)", "fallback"=>"Android", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"Podkicker", :match=>{"startsWith"=>"Podkicker"}, :platform=>{"text"=>"Android"}}, {:app=>"AntennaPod", :match=>{"startsWith"=>"AntennaPod"}, :platform=>{"text"=>"Android"}}, {:app=>"Downcast", :match=>{"startsWith"=>"Downcast", "includes"=>"Downcast"}, :platform=>{"regex"=>"\\((iPhone|iPad|iPod touch|Mac)"}}, {:app=>"gPodder", :match=>{"startsWith"=>"gPodder"}, :platform=>{"regex"=>"(Linux|Windows)"}}, {:app=>"Podcatcher Deluxe", :match=>{"includes"=>"Podcatcher Deluxe"}, :platform=>{"text"=>"Android"}}, {:app=>"Procast", :match=>{"startsWith"=>["Procast", "ProCast"]}, :platform=>{"text"=>"iOS"}}, {:app=>"RSSRadio", :match=>{"startsWith"=>"RSSRadio"}, :platform=>{"regex"=>"(iPhone|iPad|iPod touch|Darwin)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}], "fallback"=>"iOS"}}, {:app=>"Podcat", :match=>{"startsWith"=>"Podcat", "excluding"=>{"regex"=>"Podcatcher"}}, :platform=>{"text"=>"iOS"}}, {:app=>"Audio Now", :match=>{"startsWith"=>"AudioNow", "includes"=>"audionow"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"DIE ZEIT App", :match=>{"includes"=>"ZONApp"}, :platform=>{"regex"=>"(iPhone|iPad|iPod touch|Android)"}}, {:app=>"F.A.Z Der Tag App", :match=>{"includes"=>"FAZDERTAG"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"ANTENNE BAYERN App", :match=>{"includes"=>"AntenneBayern"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"BuzzFeed App", :match=>{"includes"=>"buzzfeed"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"Facebook in-app browser", :match=>{"includes"=>["FBAN", "FBAV"]}, :platform=>{"regexes"=>["\\((iPhone|iPad|iPod touch)", "(Android)"]}}, {:app=>"Instagram in-app browser", :match=>{"includes"=>"Instagram"}, :platform=>{"regexes"=>["(iPad)", "(iPhone|Android)"]}}, {:app=>"Twitter in-app browser", :match=>{"includes"=>"Twitter", "excluding"=>{"regex"=>"^TelegramBot "}}, :platform=>{"regex"=>"(iPhone|iPad|Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"Apple device"}]}}, {:app=>"Pinterest in-app browser", :match=>{"includes"=>"Pinterest"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"Windows Media Player", :match=>{"startsWith"=>["NSPlayer", "WMPlayer"]}, :platform=>{"text"=>"Windows"}}, {:app=>"Sonos", :match=>{"includes"=>"Sonos"}, :platform=>{"text"=>"Sonos"}}, {:app=>"Internet Explorer", :match=>{"includes"=>"Trident"}, :platform=>{"regex"=>"(Windows Phone)", "fallback"=>"Windows"}}, {:app=>"Kodi Media Center", :match=>{"startsWith"=>"Kodi", "includes"=>"Kodi"}, :platform=>{"regex"=>"(X11|Android|Windows)", "replacements"=>[{"name"=>"X11", "replaceWith"=>"Linux"}]}}, {:app=>"HermesPod", :match=>{"startsWith"=>"+hermespod.com"}, :platform=>{"text"=>"Windows"}}, {:app=>"ViennaRSS", :match=>{"includes"=>"Vienna"}, :platform=>{"text"=>"Mac"}}, {:app=>"Unknown client", :match=>{"startsWith"=>"(null)", "excluding"=>{"regex"=>"^atc/|\\(null\\) watchOS/"}}, :platform=>{"regex"=>"(iPhone|iPad|iPod touch)"}}, {:app=>"Clementine Music Player", :match=>{"startsWith"=>"Clementine"}, :platform=>{"text"=>"Unknown"}}, {:app=>"Flipboard", :match=>{"includes"=>"Flipboard", "excluding"=>{"regex"=>"FlipboardProxy/"}}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch|Macintosh)"], "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"iVoox", :match=>{"startsWith"=>["ivoox", "iVoox"]}, :platform=>{"regexes"=>["(Android)", "(Darwin)\\/", "\\((iPhone|iPad|iPod touch|Macintosh)"], "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"Apple device"}, {"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"FYEO", :match=>{"startsWith"=>"FYEO"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"Amazon Music", :match=>{"startsWith"=>"AmazonMusic"}, :platform=>{"regex"=>"(iPhone|Android|MacOS|Fire OS|iPad)"}}, {:app=>"Podimo", :match=>{"startsWith"=>"Podimo"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"ARD Audiothek", :match=>{"startsWith"=>"ARD Audiothek"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"Samsung Podcast", :match=>{"startsWith"=>"sp-agent"}, :platform=>{"text"=>"Android"}}, {:app=>"RTL+", :match=>{"startsWith"=>"RTL+"}, :platform=>{"regex"=>"(iOS|iPhone|Android)"}}].freeze
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# DO NOT EDIT THIS FILE - it gets automatically generated by running "bin/parse-rules"
|
|
2
2
|
|
|
3
|
-
BOTS_RULES = [{:app=>"AAABot - unknown bot", :match=>{"regex"=>"AAABot"}, :platform=>"bot"}, {:app=>"AhrefsBot", :match=>{"regex"=>"AhrefsBot/"}, :platform=>"bot"}, {:app=>"AirableBot", :match=>{"regex"=>"AirableBot-Podcast/"}, :platform=>"bot"}, {:app=>"Alexa Flash Briefing cache", :match=>{"regex"=>"^AmazonNewsContentService"}, :platform=>"bot"}, {:app=>"AlignaBot", :match=>{"regex"=>"^Alignabot"}, :platform=>"bot"}, {:app=>"Amazon Music Podcasts Bot", :match=>{"regex"=>"^Amazon Music Podcast"}, :platform=>"bot"}, {:app=>"Anchor Importer", :match=>{"regex"=>"AnchorImport"}, :platform=>"bot"}, {:app=>"Apple Podcasts (Watch)", :match=>{"regex"=>"^atc/|\\(null\\) watchOS/"}, :platform=>"bot"}, {:app=>"Apple Podcasts automated checks", :match=>{"regex"=>"(iTMS|itunesstored)"}, :platform=>"bot"}, {:app=>"Applebot", :match=>{"regex"=>"^Applebot/"}, :platform=>"bot"}, {:app=>"Archive.org", :match=>{"regex"=>"archive\\.org_bot"}, :platform=>"bot"}, {:app=>"atheerfm", :match=>{"regex"=>"^atheerfm/"}, :platform=>"bot"}, {:app=>"Audiomack", :match=>{"regex"=>"^Audiomack Podcast Processor/"}, :platform=>"bot"}, {:app=>"AudioWave feed parser", :match=>{"regex"=>"^AudioWaveBot/1\\.0"}, :platform=>"bot"}, {:app=>"AwarioSmartBot", :match=>{"regex"=>"^AwarioSmartBot/"}, :platform=>"bot"}, {:app=>"Babbar", :match=>{"regex"=>"Barkrowler/"}, :platform=>"bot"}, {:app=>"Baidu", :match=>{"regex"=>"\\(ce\\.baidu\\.com"}, :platform=>"bot"}, {:app=>"bbot", :match=>{"regex"=>"^bbot/"}, :platform=>"bot"}, {:app=>"British Library", :match=>{"regex"=>"^bl\\.uk_ldfc_bot"}, :platform=>"bot"}, {:app=>"Blubrry Migration Service", :match=>{"regex"=>"^Blubrry Migration Service"}, :platform=>"bot"}, {:app=>"Buzzsprout Importer", :match=>{"regex"=>"^Buzzsprout Importer"}, :platform=>"bot"}, {:app=>"CastFeedValidator", :match=>{"regex"=>"^CastFeedValidator/"}, :platform=>"bot"}, {:app=>"Castopod", :match=>{"regex"=>"Castopod/1\\.0"}, :platform=>"bot"}, {:app=>"Chartable", :match=>{"regex"=>"^Trackable/"}, :platform=>"bot"}, {:app=>"Clark-Crawler, unknown", :match=>{"regex"=>"^clark-crawler2"}, :platform=>"bot"}, {:app=>"Critical Mention", :match=>{"regex"=>"^Podcast-CriticalMention/"}, :platform=>"bot"}, {:app=>"curl", :match=>{"regex"=>"^curl|^libcurl/|^PycURL/| curl/"}, :platform=>"bot"}, {:app=>"DataforSEO", :match=>{"regex"=>"DataForSeoBot/"}, :platform=>"bot"}, {:app=>"Datagnion Bot", :match=>{"regex"=>"^datagnionbot"}, :platform=>"bot"}, {:app=>"dataprovider.com", :match=>{"regex"=>"Dataprovider\\.com"}, :platform=>"bot"}, {:app=>"Daum", :match=>{"regex"=>"http://cs\\.daum\\.net"}, :platform=>"bot"}, {:app=>"Deezer Podcasters", :match=>{"regex"=>"^Deezer Podcasters/1\\.0"}, :platform=>"bot"}, {:app=>"Digg", :match=>{"regex"=>"^Digg "}, :platform=>"bot"}, {:app=>"dorada", :match=>{"regex"=>"support@dorada\\.co\\.uk"}, :platform=>"bot"}, {:app=>"DotBot", :match=>{"regex"=>"DotBot"}, :platform=>"bot"}, {:app=>"Downcast Bot", :match=>{"regex"=>"downcast feed consumer/"}, :platform=>"bot"}, {:app=>"DuckDuckBot", :match=>{"regex"=>"DuckDuckBot"}, :platform=>"bot"}, {:app=>"F-Secure Riddler", :match=>{"regex"=>"^Riddler "}, :platform=>"bot"}, {:app=>"Facebook Bot", :match=>{"regex"=>"FacebookBot|f?acebookexternalhit/?|^podcastbot$|Facebot|facebookexternalua|^facebookplatform/"}, :platform=>"bot"}, {:app=>"Feedly", :match=>{"regex"=>"^Feedly/"}, :platform=>"bot"}, {:app=>"FlexGet", :match=>{"regex"=>"^FlexGet/"}, :platform=>"bot"}, {:app=>"Flipboard Proxy", :match=>{"regex"=>"FlipboardProxy/"}, :platform=>"bot"}, {:app=>"Fyyd", :match=>{"regex"=>"^fyyd-poll"}, :platform=>"bot"}, {:app=>"Go Storage Gateway V1", :match=>{"regex"=>"^storagegw-v1-go$"}, :platform=>"bot"}, {:app=>"Goodpods Bot", :match=>{"regex"=>"Goodpods/\\d+\\.\\d+"}, :platform=>"bot"}, {:app=>"Google AdsBot", :match=>{"regex"=>"AdsBot-Google"}, :platform=>"bot"}, {:app=>"Google Podcasts Manager", :match=>{"regex"=>"Google-Podcast"}, :platform=>"bot"}, {:app=>"Googlebot", :match=>{"regex"=>"Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google"}, :platform=>"bot"}, {:app=>"Gumball", :match=>{"regex"=>"^Gumball"}, :platform=>"bot"}, {:app=>"Headliner", :match=>{"regex"=>"Headliner/"}, :platform=>"bot"}, {:app=>"HubSpot Crawler", :match=>{"regex"=>"HubSpot Crawler"}, :platform=>"bot"}, {:app=>"Internet Archive", :match=>{"regex"=>"Archive-It;|web\\.archive\\.org"}, :platform=>"bot"}, {:app=>"Jaunt", :match=>{"regex"=>"^Jaunt/"}, :platform=>"bot"}, {:app=>"l'Institut national de l'audiovisuel", :match=>{"regex"=>"INA dlweb"}, :platform=>"bot"}, {:app=>"Libsyn", :match=>{"regex"=>"^Libsyn4"}, :platform=>"bot"}, {:app=>"libwww-perl", :match=>{"regex"=>"^libwww-perl| libwww-perl"}, :platform=>"bot"}, {:app=>"Livelap Crawler", :match=>{"regex"=>"LivelapBot"}, :platform=>"bot"}, {:app=>"LTX71", :match=>{"regex"=>"^ltx71 "}, :platform=>"bot"}, {:app=>"MauiBot", :match=>{"regex"=>"^MauiBot"}, :platform=>"bot"}, {:app=>"Mastodon Bot", :match=>{"regex"=>"rb/.*Mastodon/"}, :platform=>"bot"}, {:app=>"Microsoft Bingbot", :match=>{"regex"=>"(BingPreview/|adidxbot/|[bB]ingbot/)"}, :platform=>"bot"}, {:app=>"Microsoft Office", :match=>{"regex"=>"ms-office; MSOffice"}, :platform=>"bot"}, {:app=>"Mixcloud Podcast Importer", :match=>{"regex"=>"^MixcloudPodcastImporter/"}, :platform=>"bot"}, {:app=>"MJ12bot", :match=>{"regex"=>".*MJ12bot"}, :platform=>"bot"}, {:app=>"Mozilla Bot", :match=>{"regex"=>"^'?Mozilla(/5\\.0(\\.\\.\\.)?)?$|^\\(Mozilla/5\\.0\\)$"}, :platform=>"bot"}, {:app=>"MSN Bot", :match=>{"regex"=>"^msnbot/"}, :platform=>"bot"}, {:app=>"Neevabot", :match=>{"regex"=>".*Neevabot"}, :platform=>"bot"}, {:app=>"Netcraft Survey Agent", :match=>{"regex"=>" NetcraftSurveyAgent/"}, :platform=>"bot"}, {:app=>"OgScrper", :match=>{"regex"=>"OgScrper"}, :platform=>"bot"}, {:app=>"OkDownload", :match=>{"regex"=>"^OkDownload/"}, :platform=>"bot"}, {:app=>"OP3 Fetcher", :match=>{"regex"=>"^op3-fetcher/"}, :platform=>"bot"}, {:app=>"Overcast feed parser", :match=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}, :platform=>"bot"}, {:app=>"Pandora RSS crawler", :match=>{"regex"=>"^PandoraRSSCrawler"}, :platform=>"bot"}, {:app=>"PaperLi", :match=>{"regex"=>"PaperLiBot/"}, :platform=>"bot"}, {:app=>"PetalBot", :match=>{"regex"=>"PetalBot"}, :platform=>"bot"}, {:app=>"Pingdom", :match=>{"regex"=>"^Pingdom"}, :platform=>"bot"}, {:app=>"PlayerFM Podcast Sync", :match=>{"regex"=>"PlayerFM/.* Podcast Sync"}, :platform=>"bot"}, {:app=>"Podbean Importer", :match=>{"regex"=>"^Podbean Importer"}, :platform=>"bot"}, {:app=>"Podcastindex.org", :match=>{"regex"=>"^Podcastindex\\.org/"}, :platform=>"bot"}, {:app=>"PodCloud", :match=>{"regex"=>"podCloud"}, :platform=>"bot"}, {:app=>"Podcorn", :match=>{"regex"=>"Podcorn/"}, :platform=>"bot"}, {:app=>"PodderBot", :match=>{"regex"=>"PodderBot/"}, :platform=>"bot"}, {:app=>"Podfollow", :match=>{"regex"=>"podfollowbot/"}, :platform=>"bot"}, {:app=>"Podgrab", :match=>{"regex"=>"^Podgrab$"}, :platform=>"bot"}, {:app=>"Podhound", :match=>{"regex"=>"PodhoundBeta"}, :platform=>"bot"}, {:app=>"Podio Bot", :match=>{"regex"=>"^Podio/"}, :platform=>"bot"}, {:app=>"Podnews", :match=>{"regex"=>"PodnewsBot"}, :platform=>"bot"}, {:app=>"Podnods Bot", :match=>{"regex"=>"(podnods-crawler|podnods)"}, :platform=>"bot"}, {:app=>"Podscribe", :match=>{"regex"=>"(^Adswizz-podscribe/|^Podscribe/)"}, :platform=>"bot"}, {:app=>"Podverse Feed Parser", :match=>{"regex"=>"^Podverse/Feed Parser"}, :platform=>"bot"}, {:app=>"PodvineBot", :match=>{"regex"=>"^PodvineBot/"}, :platform=>"bot"}, {:app=>"PostRank Bot", :match=>{"regex"=>"^PostRank/"}, :platform=>"bot"}, {:app=>"PodwatchPro", :match=>{"regex"=>"Podwatch-Pro Crawler"}, :platform=>"bot"}, {:app=>"Puppeteer", :match=>{"regex"=>" HeadlessChrome/\\d"}, :platform=>"bot"}, {:app=>"python-requests", :match=>{"regex"=>"python-requests"}, :platform=>"bot"}, {:app=>"RedCircle", :match=>{"regex"=>"RedCircle"}, :platform=>"bot"}, {:app=>"Ridder Bot", :match=>{"regex"=>" RidderBot/"}, :platform=>"bot"}, {:app=>"RSS to Telegram", :match=>{"regex"=>"^RSStT"}, :platform=>"bot"}, {:app=>"RSSRadio", :match=>{"regex"=>"^RSSRadio \\("}, :platform=>"bot"}, {:app=>"Ruby Mechanize", :match=>{"regex"=>"^Mechanize|[ -]Mechanize/"}, :platform=>"bot"}, {:app=>"Screaming Frog SEO Spider", :match=>{"regex"=>"^Screaming Frog "}, :platform=>"bot"}, {:app=>"SearchAtlas.com SEO Crawler", :match=>{"regex"=>"^SearchAtlas.*Crawler"}, :platform=>"bot"}, {:app=>"SEMrushBot", :match=>{"regex"=>"SemrushBot/|^SEMrushBot$"}, :platform=>"bot"}, {:app=>"Serendeputy", :match=>{"regex"=>"SerendeputyBot/"}, :platform=>"bot"}, {:app=>"Simplecast", :match=>{"regex"=>"^Simplecast$"}, :platform=>"bot"}, {:app=>"Slack Bot", :match=>{"regex"=>"^Slackbot 1\\.0"}, :platform=>"bot"}, {:app=>"Snapchat Bot", :match=>{"regex"=>"://developers\\.snap\\.com/robots"}, :platform=>"bot"}, {:app=>"SoundOn Bot", :match=>{"regex"=>"^SoundOn/[\\d.]+\\s+\\(bot"}, :platform=>"bot"}, {:app=>"Spotify cache service", :match=>{"regex"=>"^Spotify/1\\.0$"}, :platform=>"bot"}, {:app=>"Stitcher Bot", :match=>{"regex"=>"^StitcherBot"}, :platform=>"bot"}, {:app=>"Substack Content Fetcher", :match=>{"regex"=>"^SubstackContentFetch/"}, :platform=>"bot"}, {:app=>"Timpi search crawler", :match=>{"regex"=>"Timpibot/"}, :platform=>"bot"}, {:app=>"Tiny Tiny RSS", :match=>{"regex"=>"^Tiny Tiny RSS/"}, :platform=>"bot"}, {:app=>"Trendsmap Resolver", :match=>{"regex"=>"TrendsmapResolver/"}, :platform=>"bot"}, {:app=>"Twingly Bot", :match=>{"regex"=>"Twingly Recon;"}, :platform=>"bot"}, {:app=>"Twitterbot", :match=>{"regex"=>"^Twitterbot"}, :platform=>"bot"}, {:app=>"Typhoeus", :match=>{"regex"=>"^Typhoeus"}, :platform=>"bot"}, {:app=>"UCast", :match=>{"regex"=>"^UCast/"}, :platform=>"bot"}, {:app=>"TelegramBot", :match=>{"regex"=>"^TelegramBot "}, :platform=>"bot"}, {:app=>"Vurbl", :match=>{"regex"=>"VurblBot"}, :platform=>"bot"}, {:app=>"Wget", :match=>{"regex"=>"Wget"}, :platform=>"bot"}, {:app=>"weborama", :match=>{"regex"=>"^weborama-fetcher"}, :platform=>"bot"}, {:app=>"Windows Crawler", :match=>{"regex"=>"^ZDM/.*Windows"}, :platform=>"bot"}, {:app=>"WordPress", :match=>{"regex"=>"^WordPress"}, :platform=>"bot"}, {:app=>"wsrv.nl", :match=>{"regex"=>"https?://wsrv.nl/"}, :platform=>"bot"}, {:app=>"YaCy", :match=>{"regex"=>"^yacybot"}, :platform=>"bot"}, {:app=>"Yahoo Crawler", :match=>{"regex"=>"Yahoo! Slurp"}, :platform=>"bot"}, {:app=>"YandexBot", :match=>{"regex"=>"YandexBot/"}, :platform=>"bot"}, {:app=>"Zapier", :match=>{"regex"=>"^Zapier$"}, :platform=>"bot"}, {:app=>"Zencast", :match=>{"regex"=>"^Zencastr/"}, :platform=>"bot"}].freeze
|
|
3
|
+
BOTS_RULES = [{:app=>"AAABot - unknown bot", :match=>{"regex"=>"AAABot"}, :platform=>"bot"}, {:app=>"Adswizz", :match=>{"regex"=>"^AIS VirtualListener"}, :platform=>"bot"}, {:app=>"AhrefsBot", :match=>{"regex"=>"AhrefsBot/"}, :platform=>"bot"}, {:app=>"AirableBot", :match=>{"regex"=>"AirableBot-Podcast/"}, :platform=>"bot"}, {:app=>"Alexa Flash Briefing cache", :match=>{"regex"=>"^AmazonNewsContentService"}, :platform=>"bot"}, {:app=>"AlignaBot", :match=>{"regex"=>"^Alignabot"}, :platform=>"bot"}, {:app=>"Amazon Music Podcasts Bot", :match=>{"regex"=>"^Amazon Music Podcast"}, :platform=>"bot"}, {:app=>"Anchor Importer", :match=>{"regex"=>"AnchorImport"}, :platform=>"bot"}, {:app=>"Apple Podcasts (Watch)", :match=>{"regex"=>"^atc/|\\(null\\) watchOS/"}, :platform=>"bot"}, {:app=>"Apple Podcasts automated checks", :match=>{"regex"=>"(iTMS|itunesstored|itms)"}, :platform=>"bot"}, {:app=>"Applebot", :match=>{"regex"=>"^Applebot/"}, :platform=>"bot"}, {:app=>"Archive.org", :match=>{"regex"=>"archive\\.org_bot"}, :platform=>"bot"}, {:app=>"atheerfm", :match=>{"regex"=>"^atheerfm/"}, :platform=>"bot"}, {:app=>"Audiomack", :match=>{"regex"=>"^Audiomack Podcast Processor/"}, :platform=>"bot"}, {:app=>"AudioWave feed parser", :match=>{"regex"=>"^AudioWaveBot/1\\.0"}, :platform=>"bot"}, {:app=>"AwarioSmartBot", :match=>{"regex"=>"^AwarioSmartBot/"}, :platform=>"bot"}, {:app=>"Babbar", :match=>{"regex"=>"Barkrowler/"}, :platform=>"bot"}, {:app=>"Baidu", :match=>{"regex"=>"\\(ce\\.baidu\\.com"}, :platform=>"bot"}, {:app=>"bbot", :match=>{"regex"=>"^bbot/"}, :platform=>"bot"}, {:app=>"British Library", :match=>{"regex"=>"^bl\\.uk_ldfc_bot"}, :platform=>"bot"}, {:app=>"Blubrry Migration Service", :match=>{"regex"=>"^Blubrry Migration Service"}, :platform=>"bot"}, {:app=>"Buzzsprout Importer", :match=>{"regex"=>"^Buzzsprout Importer"}, :platform=>"bot"}, {:app=>"CastFeedValidator", :match=>{"regex"=>"^CastFeedValidator/"}, :platform=>"bot"}, {:app=>"Castopod", :match=>{"regex"=>"Castopod/\\d"}, :platform=>"bot"}, {:app=>"Chartable", :match=>{"regex"=>"^Trackable/"}, :platform=>"bot"}, {:app=>"Clark-Crawler, unknown", :match=>{"regex"=>"^clark-crawler2"}, :platform=>"bot"}, {:app=>"Critical Mention", :match=>{"regex"=>"^Podcast-CriticalMention/"}, :platform=>"bot"}, {:app=>"curl", :match=>{"regex"=>"^curl|^libcurl/|^PycURL/| curl/"}, :platform=>"bot"}, {:app=>"DataforSEO", :match=>{"regex"=>"DataForSeoBot/"}, :platform=>"bot"}, {:app=>"Datagnion Bot", :match=>{"regex"=>"^datagnionbot"}, :platform=>"bot"}, {:app=>"dataprovider.com", :match=>{"regex"=>"Dataprovider\\.com"}, :platform=>"bot"}, {:app=>"Daum", :match=>{"regex"=>"http://cs\\.daum\\.net"}, :platform=>"bot"}, {:app=>"Deezer Podcasters", :match=>{"regex"=>"^Deezer Podcasters/1\\.0"}, :platform=>"bot"}, {:app=>"Digg", :match=>{"regex"=>"^Digg "}, :platform=>"bot"}, {:app=>"dorada", :match=>{"regex"=>"support@dorada\\.co\\.uk"}, :platform=>"bot"}, {:app=>"DotBot", :match=>{"regex"=>"DotBot"}, :platform=>"bot"}, {:app=>"Downcast Bot", :match=>{"regex"=>"downcast feed consumer/"}, :platform=>"bot"}, {:app=>"DuckDuckBot", :match=>{"regex"=>"DuckDuckBot"}, :platform=>"bot"}, {:app=>"F-Secure Riddler", :match=>{"regex"=>"^Riddler "}, :platform=>"bot"}, {:app=>"Facebook Bot", :match=>{"regex"=>"FacebookBot|f?acebookexternalhit/?|^podcastbot$|Facebot|facebookexternalua|^facebookplatform/"}, :platform=>"bot"}, {:app=>"Feedly", :match=>{"regex"=>"^Feedly/"}, :platform=>"bot"}, {:app=>"Feedspot Fetcher", :match=>{"regex"=>"https://www\\.feedspot\\.com/fs/fetcher"}, :platform=>"bot"}, {:app=>"FlexGet", :match=>{"regex"=>"^FlexGet/"}, :platform=>"bot"}, {:app=>"Flipboard Proxy", :match=>{"regex"=>"FlipboardProxy/"}, :platform=>"bot"}, {:app=>"Fyyd", :match=>{"regex"=>"^fyyd-poll"}, :platform=>"bot"}, {:app=>"Go Storage Gateway V1", :match=>{"regex"=>"^storagegw-v1-go$"}, :platform=>"bot"}, {:app=>"Goodpods Bot", :match=>{"regex"=>"Goodpods/\\d+\\.\\d+"}, :platform=>"bot"}, {:app=>"Google Adsense Bot", :match=>{"regex"=>"Mediapartners-Google"}, :platform=>"bot"}, {:app=>"Google AdsBot", :match=>{"regex"=>"AdsBot-Google"}, :platform=>"bot"}, {:app=>"Google Podcasts Manager", :match=>{"regex"=>"Google-Podcast"}, :platform=>"bot"}, {:app=>"Googlebot", :match=>{"regex"=>"Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google|google-xrawler"}, :platform=>"bot"}, {:app=>"Gumball", :match=>{"regex"=>"^Gumball"}, :platform=>"bot"}, {:app=>"Headliner", :match=>{"regex"=>"Headliner/"}, :platform=>"bot"}, {:app=>"HubSpot Crawler", :match=>{"regex"=>"HubSpot Crawler"}, :platform=>"bot"}, {:app=>"Internet Archive", :match=>{"regex"=>"Archive-It;|web\\.archive\\.org"}, :platform=>"bot"}, {:app=>"Jaunt", :match=>{"regex"=>"^Jaunt/"}, :platform=>"bot"}, {:app=>"l'Institut national de l'audiovisuel", :match=>{"regex"=>"INA dlweb"}, :platform=>"bot"}, {:app=>"Libsyn", :match=>{"regex"=>"^Libsyn4"}, :platform=>"bot"}, {:app=>"libwww-perl", :match=>{"regex"=>"^libwww-perl| libwww-perl"}, :platform=>"bot"}, {:app=>"Livelap Crawler", :match=>{"regex"=>"LivelapBot"}, :platform=>"bot"}, {:app=>"LTX71", :match=>{"regex"=>"^ltx71 "}, :platform=>"bot"}, {:app=>"MauiBot", :match=>{"regex"=>"^MauiBot"}, :platform=>"bot"}, {:app=>"Mastodon Bot", :match=>{"regex"=>"rb/.*Mastodon/"}, :platform=>"bot"}, {:app=>"Microsoft Bingbot", :match=>{"regex"=>"(BingPreview/|adidxbot/|[bB]ingbot/)"}, :platform=>"bot"}, {:app=>"Microsoft Office", :match=>{"regex"=>"ms-office; MSOffice"}, :platform=>"bot"}, {:app=>"Mixcloud Podcast Importer", :match=>{"regex"=>"^MixcloudPodcastImporter/"}, :platform=>"bot"}, {:app=>"MJ12bot", :match=>{"regex"=>".*MJ12bot"}, :platform=>"bot"}, {:app=>"Mozilla Bot", :match=>{"regex"=>"^'?Mozilla(/5\\.0(\\.\\.\\.)?)?$|^\\(Mozilla/5\\.0\\)$"}, :platform=>"bot"}, {:app=>"MSN Bot", :match=>{"regex"=>"^msnbot/"}, :platform=>"bot"}, {:app=>"Neevabot", :match=>{"regex"=>".*Neevabot"}, :platform=>"bot"}, {:app=>"Netcraft Survey Agent", :match=>{"regex"=>" NetcraftSurveyAgent/"}, :platform=>"bot"}, {:app=>"Newspaper", :match=>{"regex"=>"^newspaper/\\d"}, :platform=>"bot"}, {:app=>"OgScrper", :match=>{"regex"=>"OgScrper"}, :platform=>"bot"}, {:app=>"OkDownload", :match=>{"regex"=>"^OkDownload/"}, :platform=>"bot"}, {:app=>"OP3 Fetcher", :match=>{"regex"=>"^op3-fetcher/"}, :platform=>"bot"}, {:app=>"Overcast feed parser", :match=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}, :platform=>"bot"}, {:app=>"Pandora RSS crawler", :match=>{"regex"=>"^PandoraRSSCrawler"}, :platform=>"bot"}, {:app=>"PaperLi", :match=>{"regex"=>"PaperLiBot/"}, :platform=>"bot"}, {:app=>"PetalBot", :match=>{"regex"=>"PetalBot"}, :platform=>"bot"}, {:app=>"Pingdom", :match=>{"regex"=>"^Pingdom"}, :platform=>"bot"}, {:app=>"PlayerFM Podcast Sync", :match=>{"regex"=>"PlayerFM/.* Podcast Sync"}, :platform=>"bot"}, {:app=>"Podbean Importer", :match=>{"regex"=>"^Podbean Importer"}, :platform=>"bot"}, {:app=>"Podcastindex.org", :match=>{"regex"=>"^Podcastindex\\.org/"}, :platform=>"bot"}, {:app=>"Podcast de facto Standard", :match=>{"regex"=>"^PodcastStandard/"}, :platform=>"bot"}, {:app=>"Podcast Archiver", :match=>{"regex"=>"^Podcast%20Archiver/"}, :platform=>"bot"}, {:app=>"PodCloud", :match=>{"regex"=>"podCloud"}, :platform=>"bot"}, {:app=>"Podcorn", :match=>{"regex"=>"Podcorn/"}, :platform=>"bot"}, {:app=>"PodderBot", :match=>{"regex"=>"PodderBot/"}, :platform=>"bot"}, {:app=>"Podfollow", :match=>{"regex"=>"podfollowbot/"}, :platform=>"bot"}, {:app=>"Podgrab", :match=>{"regex"=>"^Podgrab$"}, :platform=>"bot"}, {:app=>"Podhound", :match=>{"regex"=>"PodhoundBeta"}, :platform=>"bot"}, {:app=>"Podio Bot", :match=>{"regex"=>"^Podio/"}, :platform=>"bot"}, {:app=>"Podnews", :match=>{"regex"=>"PodnewsBot"}, :platform=>"bot"}, {:app=>"Podnods Bot", :match=>{"regex"=>"(podnods-crawler|podnods)"}, :platform=>"bot"}, {:app=>"Podscribe", :match=>{"regex"=>"(^Adswizz-podscribe/|^Podscribe/)"}, :platform=>"bot"}, {:app=>"Podverse Feed Parser", :match=>{"regex"=>"^Podverse/Feed Parser"}, :platform=>"bot"}, {:app=>"PodvineBot", :match=>{"regex"=>"^PodvineBot/"}, :platform=>"bot"}, {:app=>"PostRank Bot", :match=>{"regex"=>"^PostRank/"}, :platform=>"bot"}, {:app=>"PodwatchPro", :match=>{"regex"=>"Podwatch-Pro Crawler"}, :platform=>"bot"}, {:app=>"Puppeteer", :match=>{"regex"=>" HeadlessChrome/\\d"}, :platform=>"bot"}, {:app=>"python-requests", :match=>{"regex"=>"python-requests"}, :platform=>"bot"}, {:app=>"Qiniu spider", :match=>{"regex"=>"qiniu.*spider"}, :platform=>"bot"}, {:app=>"RedCircle", :match=>{"regex"=>"RedCircle"}, :platform=>"bot"}, {:app=>"Ridder Bot", :match=>{"regex"=>" RidderBot/"}, :platform=>"bot"}, {:app=>"RSS to Telegram", :match=>{"regex"=>"^RSStT"}, :platform=>"bot"}, {:app=>"RSSRadio", :match=>{"regex"=>"^RSSRadio \\("}, :platform=>"bot"}, {:app=>"Ruby Mechanize", :match=>{"regex"=>"^Mechanize|[ -]Mechanize/"}, :platform=>"bot"}, {:app=>"Screaming Frog SEO Spider", :match=>{"regex"=>"^Screaming Frog "}, :platform=>"bot"}, {:app=>"SearchAtlas.com SEO Crawler", :match=>{"regex"=>"^SearchAtlas.*Crawler"}, :platform=>"bot"}, {:app=>"SEMrushBot", :match=>{"regex"=>"SemrushBot/|^SEMrushBot$"}, :platform=>"bot"}, {:app=>"Serendeputy", :match=>{"regex"=>"SerendeputyBot/"}, :platform=>"bot"}, {:app=>"Simplecast", :match=>{"regex"=>"^Simplecast$"}, :platform=>"bot"}, {:app=>"Slack Bot", :match=>{"regex"=>"^Slackbot 1\\.0"}, :platform=>"bot"}, {:app=>"Snapchat Bot", :match=>{"regex"=>"://developers\\.snap\\.com/robots"}, :platform=>"bot"}, {:app=>"SoundOn Bot", :match=>{"regex"=>"^SoundOn/[\\d.]+\\s+\\(bot"}, :platform=>"bot"}, {:app=>"Spotify cache service", :match=>{"regex"=>"^Spotify/1\\.0$"}, :platform=>"bot"}, {:app=>"Stitcher Bot", :match=>{"regex"=>"^StitcherBot"}, :platform=>"bot"}, {:app=>"Substack Content Fetcher", :match=>{"regex"=>"^SubstackContentFetch/"}, :platform=>"bot"}, {:app=>"SupportingCast", :match=>{"regex"=>"^SupportingCast(/.*)?$"}, :platform=>"bot"}, {:app=>"Timpi search crawler", :match=>{"regex"=>"Timpibot/"}, :platform=>"bot"}, {:app=>"Tiny Tiny RSS", :match=>{"regex"=>"^Tiny Tiny RSS/"}, :platform=>"bot"}, {:app=>"Trendsmap Resolver", :match=>{"regex"=>"TrendsmapResolver/"}, :platform=>"bot"}, {:app=>"Twingly Bot", :match=>{"regex"=>"Twingly Recon;"}, :platform=>"bot"}, {:app=>"Twitterbot", :match=>{"regex"=>"^Twitterbot"}, :platform=>"bot"}, {:app=>"Typhoeus", :match=>{"regex"=>"^Typhoeus"}, :platform=>"bot"}, {:app=>"UCast", :match=>{"regex"=>"^UCast/"}, :platform=>"bot"}, {:app=>"TelegramBot", :match=>{"regex"=>"^TelegramBot "}, :platform=>"bot"}, {:app=>"theTradeDesk Content Web Scraper", :match=>{"regex"=>"TTD-Content"}, :platform=>"bot"}, {:app=>"Veritone Engine Toolkit", :match=>{"regex"=>"^veritone/engine-toolkit"}, :platform=>"bot"}, {:app=>"Vurbl", :match=>{"regex"=>"VurblBot"}, :platform=>"bot"}, {:app=>"Wget", :match=>{"regex"=>"Wget"}, :platform=>"bot"}, {:app=>"weborama", :match=>{"regex"=>"^weborama-fetcher"}, :platform=>"bot"}, {:app=>"Windows Crawler", :match=>{"regex"=>"^ZDM/.*Windows"}, :platform=>"bot"}, {:app=>"Windows PowerShell", :match=>{"regex"=>"WindowsPowerShell/"}, :platform=>"bot"}, {:app=>"WordPress", :match=>{"regex"=>"^WordPress"}, :platform=>"bot"}, {:app=>"wsrv.nl", :match=>{"regex"=>"https?://wsrv.nl/"}, :platform=>"bot"}, {:app=>"YaCy", :match=>{"regex"=>"^yacybot"}, :platform=>"bot"}, {:app=>"Yahoo Crawler", :match=>{"regex"=>"Yahoo! Slurp"}, :platform=>"bot"}, {:app=>"YandexBot", :match=>{"regex"=>"YandexBot/"}, :platform=>"bot"}, {:app=>"Zapier", :match=>{"regex"=>"^Zapier$"}, :platform=>"bot"}, {:app=>"Zencast", :match=>{"regex"=>"^Zencastr/"}, :platform=>"bot"}].freeze
|
|
@@ -7,15 +7,19 @@ module PodIdent
|
|
|
7
7
|
class RuleParser
|
|
8
8
|
RULES_YAML = File.expand_path('../detection_rules.yml', __dir__)
|
|
9
9
|
RULES_BOTS_YAML = File.expand_path('../detection_rules_bots.yml', __dir__)
|
|
10
|
+
CUSTOM_RULES_BOTS_YAML = File.expand_path('../detection_rules_custom_bots.yml', __dir__)
|
|
10
11
|
RULES_RUBY = File.expand_path('detection_rules.rb', __dir__)
|
|
11
12
|
RULES_BOTS_RUBY = File.expand_path('detection_rules_bots.rb', __dir__)
|
|
13
|
+
CUSTOM_RULES_BOTS_RUBY = File.expand_path('detection_rules_custom_bots.rb', __dir__)
|
|
12
14
|
RULES_SPEC_RUBY = File.expand_path('../../spec/detection_rules.rb', __dir__)
|
|
13
15
|
RULES_SPEC_BOTS_RUBY = File.expand_path('../../spec/detection_rules_bots.rb', __dir__)
|
|
16
|
+
RULES_SPEC_CUSTOM_BOTS_RUBY = File.expand_path('../../spec/detection_rules_custom_bots.rb',
|
|
17
|
+
__dir__)
|
|
14
18
|
DO_NOT_EDIT_TEXT = <<~HEREDOC
|
|
15
19
|
# DO NOT EDIT THIS FILE - it gets automatically generated by running \"bin/parse-rules\"\n
|
|
16
20
|
HEREDOC
|
|
17
21
|
|
|
18
|
-
attr_accessor :bots_rules, :rules
|
|
22
|
+
attr_accessor :bots_rules, :custom_bots_rules, :rules
|
|
19
23
|
|
|
20
24
|
def call
|
|
21
25
|
parse_yaml
|
|
@@ -42,6 +46,14 @@ module PodIdent
|
|
|
42
46
|
}
|
|
43
47
|
end
|
|
44
48
|
|
|
49
|
+
cleaned_custom_bots_rules = custom_bots_rules.dup.map do |rule|
|
|
50
|
+
{
|
|
51
|
+
app: rule['app'],
|
|
52
|
+
match: rule['match'],
|
|
53
|
+
platform: rule['platform']
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
|
|
45
57
|
File.open(RULES_RUBY, 'w') do |file|
|
|
46
58
|
file.write(DO_NOT_EDIT_TEXT)
|
|
47
59
|
file.write("RULES = #{cleaned_rules}.freeze")
|
|
@@ -51,6 +63,11 @@ module PodIdent
|
|
|
51
63
|
file.write(DO_NOT_EDIT_TEXT)
|
|
52
64
|
file.write("BOTS_RULES = #{cleaned_bots_rules}.freeze")
|
|
53
65
|
end
|
|
66
|
+
|
|
67
|
+
File.open(CUSTOM_RULES_BOTS_RUBY, 'w') do |file|
|
|
68
|
+
file.write(DO_NOT_EDIT_TEXT)
|
|
69
|
+
file.write("CUSTOM_BOTS_RULES = #{cleaned_custom_bots_rules}.freeze")
|
|
70
|
+
end
|
|
54
71
|
end
|
|
55
72
|
|
|
56
73
|
def write_rules_spec_rb
|
|
@@ -64,6 +81,11 @@ module PodIdent
|
|
|
64
81
|
Hash[rule.map { |(k, v)| [k.to_sym, v] }]
|
|
65
82
|
end
|
|
66
83
|
|
|
84
|
+
all_custom_bot_rules = custom_bots_rules.dup.map do |rule|
|
|
85
|
+
# symbolize keys
|
|
86
|
+
Hash[rule.map { |(k, v)| [k.to_sym, v] }]
|
|
87
|
+
end
|
|
88
|
+
|
|
67
89
|
File.open(RULES_SPEC_RUBY, 'w') do |file|
|
|
68
90
|
file.write(DO_NOT_EDIT_TEXT)
|
|
69
91
|
file.write("RULES = #{all_rules}.freeze")
|
|
@@ -73,11 +95,17 @@ module PodIdent
|
|
|
73
95
|
file.write(DO_NOT_EDIT_TEXT)
|
|
74
96
|
file.write("BOTS_RULES = #{all_bot_rules}.freeze")
|
|
75
97
|
end
|
|
98
|
+
|
|
99
|
+
File.open(RULES_SPEC_CUSTOM_BOTS_RUBY, 'w') do |file|
|
|
100
|
+
file.write(DO_NOT_EDIT_TEXT)
|
|
101
|
+
file.write("CUSTOM_BOTS_RULES = #{all_custom_bot_rules}.freeze")
|
|
102
|
+
end
|
|
76
103
|
end
|
|
77
104
|
|
|
78
105
|
def parse_yaml
|
|
79
106
|
@rules = YAML.safe_load(rules_yaml_file_content)
|
|
80
107
|
@bots_rules = YAML.safe_load(bot_rules_yaml_file_content)
|
|
108
|
+
@custom_bots_rules = YAML.safe_load(custom_bot_rules_yaml_file_content)
|
|
81
109
|
end
|
|
82
110
|
|
|
83
111
|
def rules_yaml_file_content
|
|
@@ -87,5 +115,9 @@ module PodIdent
|
|
|
87
115
|
def bot_rules_yaml_file_content
|
|
88
116
|
File.read(RULES_BOTS_YAML)
|
|
89
117
|
end
|
|
118
|
+
|
|
119
|
+
def custom_bot_rules_yaml_file_content
|
|
120
|
+
File.read(CUSTOM_RULES_BOTS_YAML)
|
|
121
|
+
end
|
|
90
122
|
end
|
|
91
123
|
end
|
data/lib/pod_ident/version.rb
CHANGED
data/lib/pod_ident.rb
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require 'pod_ident/version'
|
|
4
4
|
require 'pod_ident/detection_rules'
|
|
5
5
|
require 'pod_ident/detection_rules_bots'
|
|
6
|
+
require 'pod_ident/detection_rules_custom_bots'
|
|
6
7
|
require 'pod_ident/detection_result'
|
|
7
8
|
|
|
8
9
|
module PodIdent
|
|
@@ -22,7 +23,7 @@ module PodIdent
|
|
|
22
23
|
# !~ /[^[:space:]]/ is what Active Support does to detect blank strings
|
|
23
24
|
return nil if user_agent_string !~ /[^[:space:]]/
|
|
24
25
|
|
|
25
|
-
rule = find_rule || find_rule_bots
|
|
26
|
+
rule = find_rule || find_rule_bots || find_rule_custom_bots
|
|
26
27
|
|
|
27
28
|
self.result = DetectionResult.new(rule, user_agent_string)
|
|
28
29
|
identify_platform if result.positive?
|
|
@@ -31,7 +32,7 @@ module PodIdent
|
|
|
31
32
|
end
|
|
32
33
|
|
|
33
34
|
def self.bot?
|
|
34
|
-
find_rule_bots
|
|
35
|
+
find_rule_bots || find_rule_custom_bots
|
|
35
36
|
end
|
|
36
37
|
|
|
37
38
|
private
|
|
@@ -67,6 +68,15 @@ module PodIdent
|
|
|
67
68
|
end
|
|
68
69
|
end
|
|
69
70
|
|
|
71
|
+
def find_rule_custom_bots
|
|
72
|
+
CUSTOM_BOTS_RULES.detect do |rule|
|
|
73
|
+
match = rule.fetch(:match)
|
|
74
|
+
regex = match['regex']
|
|
75
|
+
match = Regexp.new(regex).match(user_agent_string)
|
|
76
|
+
!match.nil?
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
70
80
|
def apply_starts_with(starts_with, found)
|
|
71
81
|
return found unless starts_with
|
|
72
82
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pod_ident
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1.
|
|
4
|
+
version: 1.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Podigee GmbH
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-
|
|
11
|
+
date: 2023-10-18 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -96,6 +96,7 @@ files:
|
|
|
96
96
|
- ".rspec"
|
|
97
97
|
- ".rubocop.yml"
|
|
98
98
|
- ".ruby-version"
|
|
99
|
+
- CHANGELOG.md
|
|
99
100
|
- Gemfile
|
|
100
101
|
- Gemfile.lock
|
|
101
102
|
- README.md
|
|
@@ -105,10 +106,12 @@ files:
|
|
|
105
106
|
- bin/setup
|
|
106
107
|
- lib/detection_rules.yml
|
|
107
108
|
- lib/detection_rules_bots.yml
|
|
109
|
+
- lib/detection_rules_custom_bots.yml
|
|
108
110
|
- lib/pod_ident.rb
|
|
109
111
|
- lib/pod_ident/detection_result.rb
|
|
110
112
|
- lib/pod_ident/detection_rules.rb
|
|
111
113
|
- lib/pod_ident/detection_rules_bots.rb
|
|
114
|
+
- lib/pod_ident/detection_rules_custom_bots.rb
|
|
112
115
|
- lib/pod_ident/rule_parser.rb
|
|
113
116
|
- lib/pod_ident/version.rb
|
|
114
117
|
- pod_ident.gemspec
|