pod_ident 1.1.2 → 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +1 -1
- data/lib/detection_rules.yml +6 -2
- data/lib/detection_rules_bots.yml +92 -3
- data/lib/detection_rules_custom_bots.yml +15 -0
- data/lib/pod_ident/detection_rules.rb +1 -1
- data/lib/pod_ident/detection_rules_bots.rb +1 -1
- data/lib/pod_ident/detection_rules_custom_bots.rb +3 -0
- data/lib/pod_ident/rule_parser.rb +33 -1
- data/lib/pod_ident/version.rb +1 -1
- data/lib/pod_ident.rb +12 -2
- data/pod_ident.gemspec +4 -0
- metadata +9 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: afcdcf2885da59cf91c1f669a4a8702d837b4845853d7bc6ea9547b11b0c4d9d
|
4
|
+
data.tar.gz: 38f68e13ac6f5c61e5871a548c5ed00ccd946d0f53234feb2e88f45b792c5874
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 471b01dc07a2fcb7f968a7157ce7bd557e8a9ca23d7fa906b7074a5a42a143209d277792905b96ea31844ea86020e3e20ce5b40a8dc03cd2ad6cfa342cc1f9c8
|
7
|
+
data.tar.gz: 8c1e868617be4bacaa491db6d8f8cb1edf23bf92a57fd03b000e0f1dd3d40f4f1abbb46be7298d45c10126bc85f66190ae76545590d53fc697f15211195524bc
|
data/CHANGELOG.md
ADDED
data/Gemfile.lock
CHANGED
data/lib/detection_rules.yml
CHANGED
@@ -191,7 +191,7 @@
|
|
191
191
|
- userAgent: Overcast/3.0 (+http://overcast.fm/; iOS podcast app) BMID/E67A460F81
|
192
192
|
platform: iOS
|
193
193
|
|
194
|
-
- app:
|
194
|
+
- app: radio.net
|
195
195
|
match:
|
196
196
|
startsWith:
|
197
197
|
- radio.net
|
@@ -204,14 +204,18 @@
|
|
204
204
|
- radio.pt
|
205
205
|
- radio.pl
|
206
206
|
platform:
|
207
|
-
regex: '(Android|Darwin)'
|
207
|
+
regex: '(Android|Darwin|Linux)'
|
208
208
|
replacements:
|
209
209
|
- name: Darwin
|
210
210
|
replaceWith: iOS
|
211
|
+
- name: Linux
|
212
|
+
replaceWith: Android
|
211
213
|
test:
|
212
214
|
userAgents:
|
213
215
|
- userAgent: radio.de 4.9.3 (softwinner/QT-7; Android 4.4.2; de_DE)
|
214
216
|
platform: Android
|
217
|
+
- userAgent: radio.de/5.13.0. 1-app (Linux; 13) ExoPlayerLib/2.19.1
|
218
|
+
platform: Android
|
215
219
|
- userAgent: radio.de/3735 CFNetwork/758.5.3 Darwin/15.6.0
|
216
220
|
platform: iOS
|
217
221
|
|
@@ -6,6 +6,13 @@
|
|
6
6
|
test:
|
7
7
|
userAgents:
|
8
8
|
- userAgent: AAABot
|
9
|
+
- app: Adswizz
|
10
|
+
match:
|
11
|
+
regex: "^AIS VirtualListener"
|
12
|
+
platform: bot
|
13
|
+
test:
|
14
|
+
userAgents:
|
15
|
+
- userAgent: AIS VirtualListener
|
9
16
|
- app: AhrefsBot
|
10
17
|
match:
|
11
18
|
regex: AhrefsBot/
|
@@ -44,6 +51,14 @@
|
|
44
51
|
test:
|
45
52
|
userAgents:
|
46
53
|
- userAgent: AnchorImport/1.0
|
54
|
+
- app: Apache HTTPClient
|
55
|
+
match:
|
56
|
+
regex: "^Apache\\-HttpClient"
|
57
|
+
platform: bot
|
58
|
+
test:
|
59
|
+
userAgents:
|
60
|
+
- userAgent: Apache-HttpClient/5.1.4 (Java/1.8.0_144)
|
61
|
+
- userAgent: Apache-HttpClient/5.1.4 (Java/1.8.0_91)
|
47
62
|
- app: Apple Podcasts (Watch)
|
48
63
|
match:
|
49
64
|
regex: "^atc/|\\(null\\) watchOS/"
|
@@ -69,7 +84,7 @@
|
|
69
84
|
(6; dt:155)"
|
70
85
|
- app: Apple Podcasts automated checks
|
71
86
|
match:
|
72
|
-
regex: "(iTMS|itunesstored)"
|
87
|
+
regex: "(iTMS|itunesstored|itms)"
|
73
88
|
platform: bot
|
74
89
|
- app: Applebot
|
75
90
|
match:
|
@@ -158,11 +173,12 @@
|
|
158
173
|
- userAgent: CastFeedValidator/3.0.5 (https://castfeedvalidator.com)
|
159
174
|
- app: Castopod
|
160
175
|
match:
|
161
|
-
regex: Castopod
|
176
|
+
regex: Castopod/\d
|
162
177
|
platform: bot
|
163
178
|
test:
|
164
179
|
userAgents:
|
165
180
|
- userAgent: Castopod/1.0
|
181
|
+
- userAgent: Castopod/1.1.2
|
166
182
|
- app: Chartable
|
167
183
|
match:
|
168
184
|
regex: "^Trackable/"
|
@@ -279,6 +295,14 @@
|
|
279
295
|
test:
|
280
296
|
userAgents:
|
281
297
|
- userAgent: Feedly/1.0 (+http://www.feedly.com/fetcher.html; like FeedFetcher-Google)
|
298
|
+
- app: Feedspot Fetcher
|
299
|
+
match:
|
300
|
+
regex: https://www\.feedspot\.com/fs/fetcher
|
301
|
+
platform: bot
|
302
|
+
test:
|
303
|
+
userAgents:
|
304
|
+
- userAgent: Mozilla/5.0 (compatible; Feedspot/1.0 (+https://www.feedspot.com/fs/fetcher;
|
305
|
+
like FeedFetcher-Google)
|
282
306
|
- app: FlexGet
|
283
307
|
match:
|
284
308
|
regex: "^FlexGet/"
|
@@ -311,6 +335,13 @@
|
|
311
335
|
test:
|
312
336
|
userAgents:
|
313
337
|
- userAgent: Goodpods/2.2
|
338
|
+
- app: Google Adsense Bot
|
339
|
+
match:
|
340
|
+
regex: Mediapartners-Google
|
341
|
+
platform: bot
|
342
|
+
test:
|
343
|
+
userAgents:
|
344
|
+
- userAgent: Mediapartners-Google
|
314
345
|
- app: Google AdsBot
|
315
346
|
match:
|
316
347
|
regex: AdsBot-Google
|
@@ -321,7 +352,7 @@
|
|
321
352
|
platform: bot
|
322
353
|
- app: Googlebot
|
323
354
|
match:
|
324
|
-
regex: Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google
|
355
|
+
regex: Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google|google-xrawler
|
325
356
|
platform: bot
|
326
357
|
test:
|
327
358
|
userAgents:
|
@@ -331,6 +362,7 @@
|
|
331
362
|
- userAgent: Googlebot-Image/1.0
|
332
363
|
- userAgent: Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; 1 subscribers;
|
333
364
|
feed-id=4653447469100830145)
|
365
|
+
- userAgent: google-xrawler
|
334
366
|
- app: Gumball
|
335
367
|
match:
|
336
368
|
regex: "^Gumball"
|
@@ -469,6 +501,13 @@
|
|
469
501
|
test:
|
470
502
|
userAgents:
|
471
503
|
- userAgent: Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; +info@netcraft.com)
|
504
|
+
- app: Newspaper
|
505
|
+
match:
|
506
|
+
regex: "^newspaper/\\d"
|
507
|
+
platform: bot
|
508
|
+
test:
|
509
|
+
userAgents:
|
510
|
+
- userAgent: newspaper/0.3.0
|
472
511
|
- app: OgScrper
|
473
512
|
match:
|
474
513
|
regex: OgScrper
|
@@ -538,6 +577,20 @@
|
|
538
577
|
test:
|
539
578
|
userAgents:
|
540
579
|
- userAgent: Podcastindex.org/v0.3.3 (Aggrivate)
|
580
|
+
- app: Podcast de facto Standard
|
581
|
+
match:
|
582
|
+
regex: "^PodcastStandard/"
|
583
|
+
platform: bot
|
584
|
+
test:
|
585
|
+
userAgents:
|
586
|
+
- userAgent: PodcastStandard/0.1.0 (+https://podcast-standard.org)
|
587
|
+
- app: Podcast Archiver
|
588
|
+
match:
|
589
|
+
regex: "^Podcast%20Archiver/"
|
590
|
+
platform: bot
|
591
|
+
test:
|
592
|
+
userAgents:
|
593
|
+
- userAgent: Podcast%20Archiver/3000000003 CFNetwork/1406.0.4 Darwin/22.4.0
|
541
594
|
- app: PodCloud
|
542
595
|
match:
|
543
596
|
regex: podCloud
|
@@ -637,6 +690,13 @@
|
|
637
690
|
match:
|
638
691
|
regex: python-requests
|
639
692
|
platform: bot
|
693
|
+
- app: Qiniu spider
|
694
|
+
match:
|
695
|
+
regex: qiniu.*spider
|
696
|
+
platform: bot
|
697
|
+
test:
|
698
|
+
userAgents:
|
699
|
+
- userAgent: z0-qiniu-imgstg-spider-1.0
|
640
700
|
- app: RedCircle
|
641
701
|
match:
|
642
702
|
regex: RedCircle
|
@@ -742,6 +802,14 @@
|
|
742
802
|
test:
|
743
803
|
userAgents:
|
744
804
|
- userAgent: SubstackContentFetch/1.0 (https://substack.com/)
|
805
|
+
- app: SupportingCast
|
806
|
+
match:
|
807
|
+
regex: "^SupportingCast(/.*)?$"
|
808
|
+
platform: bot
|
809
|
+
test:
|
810
|
+
userAgents:
|
811
|
+
- userAgent: SupportingCast/1.0
|
812
|
+
- userAgent: SupportingCast
|
745
813
|
- app: Timpi search crawler
|
746
814
|
match:
|
747
815
|
regex: Timpibot/
|
@@ -790,6 +858,20 @@
|
|
790
858
|
test:
|
791
859
|
userAgents:
|
792
860
|
- userAgent: TelegramBot (like TwitterBot)
|
861
|
+
- app: theTradeDesk Content Web Scraper
|
862
|
+
match:
|
863
|
+
regex: TTD-Content
|
864
|
+
platform: bot
|
865
|
+
test:
|
866
|
+
userAgents:
|
867
|
+
- userAgent: Mozilla/5.0 (compatible; TTD-Content; +https://www.thetradedesk.com/general/ttd-content)
|
868
|
+
- app: Veritone Engine Toolkit
|
869
|
+
match:
|
870
|
+
regex: "^veritone/engine-toolkit"
|
871
|
+
platform: bot
|
872
|
+
test:
|
873
|
+
userAgents:
|
874
|
+
- userAgent: veritone/engine-toolkit-3.0
|
793
875
|
- app: Vurbl
|
794
876
|
match:
|
795
877
|
regex: VurblBot
|
@@ -815,6 +897,13 @@
|
|
815
897
|
test:
|
816
898
|
userAgents:
|
817
899
|
- userAgent: ZDM/4.0; Windows Mobile 7.0;
|
900
|
+
- app: Windows PowerShell
|
901
|
+
match:
|
902
|
+
regex: WindowsPowerShell/
|
903
|
+
platform: bot
|
904
|
+
test:
|
905
|
+
userAgents:
|
906
|
+
- userAgent: Mozilla/5.0 (Windows NT; Windows NT 10.0; en-US) WindowsPowerShell/5.1.19041.2673
|
818
907
|
- app: WordPress
|
819
908
|
match:
|
820
909
|
regex: "^WordPress"
|
@@ -1,3 +1,3 @@
|
|
1
1
|
# DO NOT EDIT THIS FILE - it gets automatically generated by running "bin/parse-rules"
|
2
2
|
|
3
|
-
RULES = [{:app=>"Apple Podcasts", :match=>{"startsWith"=>"AppleCoreMedia"}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"startsWith"=>"itunesstored"}, :platform=>{"regex"=>"(iPad|iPod|iPhone)"}}, {:app=>"iTunes", :match=>{"startsWith"=>"iTunes", "excluding"=>{"regex"=>"Downcast|iCatcher|SqueezeCenter|SqueezeNetwork|MusicServer"}}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"includes"=>"watchOS", "excluding"=>{"regex"=>"^atc/|\\(null\\) watchOS/"}}, :platform=>{"text"=>"watchOS"}}, {:app=>"Spotify", :match=>{"startsWith"=>"Spotify", "excluding"=>{"regex"=>"^Spotify/1\\.0$"}}, :platform=>{"regexes"=>["(Android|iOS)"]}}, {:app=>"CastBox", :match=>{"startsWith"=>["CastBox", "Castbox"]}, :platform=>{"regexes"=>["(Android|iOS|OS\\sVersion)", "(CastBox)"], "replacements"=>[{"name"=>"OS Version", "replaceWith"=>"iOS"}, {"name"=>"CastBox", "replaceWith"=>"Android"}]}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Alexa"}, :platform=>{"text"=>"Alexa-capable device"}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Echo"}, :platform=>{"text"=>"Amazon Echo", "userAgents"=>[{"userAgent"=>"Echo/1.0(APNG)", "platform"=>"Amazon Echo"}]}}, {:app=>"Deezer", :match=>{"startsWith"=>"Deezer"}, :platform=>{"regexes"=>["(Android|Darwin)", "\\((\\w+\\s*\\w*)"], "replacements"=>[{"name"=>"osx", "replaceWith"=>"Mac"}, {"name"=>"Darwin", "replaceWith"=>"Apple Device"}]}}, {:app=>"Overcast", :match=>{"startsWith"=>"Overcast", "excluding"=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}}, :platform=>{"text"=>"iOS"}}, {:app=>"
|
3
|
+
RULES = [{:app=>"Apple Podcasts", :match=>{"startsWith"=>"AppleCoreMedia"}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"startsWith"=>"itunesstored"}, :platform=>{"regex"=>"(iPad|iPod|iPhone)"}}, {:app=>"iTunes", :match=>{"startsWith"=>"iTunes", "excluding"=>{"regex"=>"Downcast|iCatcher|SqueezeCenter|SqueezeNetwork|MusicServer"}}, :platform=>{"regex"=>"\\((\\w+\\s*\\w*)", "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"Apple Podcasts", :match=>{"includes"=>"watchOS", "excluding"=>{"regex"=>"^atc/|\\(null\\) watchOS/"}}, :platform=>{"text"=>"watchOS"}}, {:app=>"Spotify", :match=>{"startsWith"=>"Spotify", "excluding"=>{"regex"=>"^Spotify/1\\.0$"}}, :platform=>{"regexes"=>["(Android|iOS)"]}}, {:app=>"CastBox", :match=>{"startsWith"=>["CastBox", "Castbox"]}, :platform=>{"regexes"=>["(Android|iOS|OS\\sVersion)", "(CastBox)"], "replacements"=>[{"name"=>"OS Version", "replaceWith"=>"iOS"}, {"name"=>"CastBox", "replaceWith"=>"Android"}]}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Alexa"}, :platform=>{"text"=>"Alexa-capable device"}}, {:app=>"Amazon Alexa", :match=>{"startsWith"=>"Echo"}, :platform=>{"text"=>"Amazon Echo", "userAgents"=>[{"userAgent"=>"Echo/1.0(APNG)", "platform"=>"Amazon Echo"}]}}, {:app=>"Deezer", :match=>{"startsWith"=>"Deezer"}, :platform=>{"regexes"=>["(Android|Darwin)", "\\((\\w+\\s*\\w*)"], "replacements"=>[{"name"=>"osx", "replaceWith"=>"Mac"}, {"name"=>"Darwin", "replaceWith"=>"Apple Device"}]}}, {:app=>"Overcast", :match=>{"startsWith"=>"Overcast", "excluding"=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}}, :platform=>{"text"=>"iOS"}}, {:app=>"radio.net", :match=>{"startsWith"=>["radio.net", "radio.de", "radio.at", "radio.fr", "radio.dk", "radio.es", "radio.it", "radio.pt", "radio.pl"]}, :platform=>{"regex"=>"(Android|Darwin|Linux)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}, {"name"=>"Linux", "replaceWith"=>"Android"}]}}, {:app=>"PocketCasts", :match=>{"startsWith"=>["PocketCasts", "Pocket Casts", "Shifty Jelly Pocket Casts"]}, :platform=>{"regex"=>"(Android)"}}, {:app=>"Himalaya", :match=>{"startsWith"=>"Himalaya"}, :platform=>{"regex"=>"(Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"ExoPlayer", :match=>{"startsWith"=>["ExoPlayer", "yourApplicationName", "null", "md5d42223d6ee7473da82e8136ffb794439.App"]}, :platform=>{"text"=>"Android"}}, {:app=>"Download Manager", :match=>{"startsWith"=>"AndroidDownloadManager"}, :platform=>{"text"=>"Android"}}, {:app=>"Castamatic", :match=>{"startsWith"=>"Castamatic"}, :platform=>{"text"=>"iOS"}}, {:app=>"The Podcast App (podcast.app)", :match=>{"includes"=>"The Podcast App"}, :platform=>{"text"=>"iOS"}}, {:app=>"CastMix", :match=>{"startsWith"=>"CastMix"}, :platform=>{"text"=>"Android"}}, {:app=>"Unknown App", :match=>{"startsWith"=>"okhttp"}, :platform=>{"text"=>"Android"}}, {:app=>"Stagefright Media Playback Engine", :match=>{"includes"=>"stagefright", "excluding"=>{"text"=>"stagefright alternative"}}, :platform=>{"regex"=>"(Fire OS|Android)"}}, {:app=>"LG Player", :match=>{"startsWith"=>"Player/LG Player", "includes"=>["LG Player", "LG-Player"]}, :platform=>{"text"=>"Android"}}, {:app=>"Android Browser", :match=>{"startsWith"=>"Dalvik"}, :platform=>{"text"=>"Android"}}, {:app=>"Acast", :match=>{"startsWith"=>"Acast"}, :platform=>{"regex"=>"(Darwin|Android|Windows)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"Castro", :match=>{"startsWith"=>"Castro"}, :platform=>{"text"=>"iOS"}}, {:app=>"Breaker", :match=>{"startsWith"=>"Breaker"}, :platform=>{"regex"=>"(Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"Podcast Addict", :match=>{"startsWith"=>["PodcastAddict", "Podcast Addict"]}, :platform=>{"text"=>"Android"}}, {:app=>"Podbean", :match=>{"startsWith"=>"Podbean", "excluding"=>{"regex"=>"^Podbean Importer"}}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"Google Podcasts", :match=>{"includes"=>["GSA"], "excluding"=>{"regex"=>"iPhone|iPad"}}, :platform=>{"text"=>"Android"}}, {:app=>"Google Search App", :match=>{"includes"=>["GSA"], "excluding"=>{"regex"=>"Android"}}, :platform=>{"regexes"=>["(iPhone|iPad)"]}}, {:app=>"Google Podcasts", :match=>{"includes"=>"GoogleChirp"}, :platform=>{"text"=>"Google Smart Speaker"}}, {:app=>"Stitcher", :match=>{"startsWith"=>"Stitcher"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"TuneIn", :match=>{"startsWith"=>"TuneIn"}, :platform=>{"regex"=>"(Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"PodCruncher", :match=>{"startsWith"=>"PodCruncher"}, :platform=>{"text"=>"iOS"}}, {:app=>"iCatcher!", :match=>{"startsWith"=>"iCatcher!", "includes"=>"iCatcher!"}, :platform=>{"regex"=>"\\((iPhone|iPad|iPod touch)", "fallback"=>"iOS"}}, {:app=>"Castaway", :match=>{"startsWith"=>"Castaway"}, :platform=>{"text"=>"iOS"}}, {:app=>"Instacast", :match=>{"startsWith"=>"Instacast"}, :platform=>{"text"=>"Apple Device"}}, {:app=>"VLC", :match=>{"startsWith"=>["VLC", "LibVLC"], "includes"=>"VLC"}, :platform=>{"regex"=>"(Android|iPhone)"}}, {:app=>"Podcast Republic", :match=>{"startsWith"=>"PodcastRepublic"}, :platform=>{"text"=>"Android"}}, {:app=>"DoggCatcher", :match=>{"includes"=>"DoggCatcher"}, :platform=>{"text"=>"Android"}}, {:app=>"Player FM", :match=>{"startsWith"=>["Player FM", "Player%20FM"]}, :platform=>{"regex"=>"(Darwin)", "fallback"=>"Android", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}]}}, {:app=>"Podkicker", :match=>{"startsWith"=>"Podkicker"}, :platform=>{"text"=>"Android"}}, {:app=>"AntennaPod", :match=>{"startsWith"=>"AntennaPod"}, :platform=>{"text"=>"Android"}}, {:app=>"Downcast", :match=>{"startsWith"=>"Downcast", "includes"=>"Downcast"}, :platform=>{"regex"=>"\\((iPhone|iPad|iPod touch|Mac)"}}, {:app=>"gPodder", :match=>{"startsWith"=>"gPodder"}, :platform=>{"regex"=>"(Linux|Windows)"}}, {:app=>"Podcatcher Deluxe", :match=>{"includes"=>"Podcatcher Deluxe"}, :platform=>{"text"=>"Android"}}, {:app=>"Procast", :match=>{"startsWith"=>["Procast", "ProCast"]}, :platform=>{"text"=>"iOS"}}, {:app=>"RSSRadio", :match=>{"startsWith"=>"RSSRadio"}, :platform=>{"regex"=>"(iPhone|iPad|iPod touch|Darwin)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"iOS"}], "fallback"=>"iOS"}}, {:app=>"Podcat", :match=>{"startsWith"=>"Podcat", "excluding"=>{"regex"=>"Podcatcher"}}, :platform=>{"text"=>"iOS"}}, {:app=>"Audio Now", :match=>{"startsWith"=>"AudioNow", "includes"=>"audionow"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"DIE ZEIT App", :match=>{"includes"=>"ZONApp"}, :platform=>{"regex"=>"(iPhone|iPad|iPod touch|Android)"}}, {:app=>"F.A.Z Der Tag App", :match=>{"includes"=>"FAZDERTAG"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"ANTENNE BAYERN App", :match=>{"includes"=>"AntenneBayern"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"BuzzFeed App", :match=>{"includes"=>"buzzfeed"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"Facebook in-app browser", :match=>{"includes"=>["FBAN", "FBAV"]}, :platform=>{"regexes"=>["\\((iPhone|iPad|iPod touch)", "(Android)"]}}, {:app=>"Instagram in-app browser", :match=>{"includes"=>"Instagram"}, :platform=>{"regexes"=>["(iPad)", "(iPhone|Android)"]}}, {:app=>"Twitter in-app browser", :match=>{"includes"=>"Twitter", "excluding"=>{"regex"=>"^TelegramBot "}}, :platform=>{"regex"=>"(iPhone|iPad|Darwin|Android)", "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"Apple device"}]}}, {:app=>"Pinterest in-app browser", :match=>{"includes"=>"Pinterest"}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch)"]}}, {:app=>"Windows Media Player", :match=>{"startsWith"=>["NSPlayer", "WMPlayer"]}, :platform=>{"text"=>"Windows"}}, {:app=>"Sonos", :match=>{"includes"=>"Sonos"}, :platform=>{"text"=>"Sonos"}}, {:app=>"Internet Explorer", :match=>{"includes"=>"Trident"}, :platform=>{"regex"=>"(Windows Phone)", "fallback"=>"Windows"}}, {:app=>"Kodi Media Center", :match=>{"startsWith"=>"Kodi", "includes"=>"Kodi"}, :platform=>{"regex"=>"(X11|Android|Windows)", "replacements"=>[{"name"=>"X11", "replaceWith"=>"Linux"}]}}, {:app=>"HermesPod", :match=>{"startsWith"=>"+hermespod.com"}, :platform=>{"text"=>"Windows"}}, {:app=>"ViennaRSS", :match=>{"includes"=>"Vienna"}, :platform=>{"text"=>"Mac"}}, {:app=>"Unknown client", :match=>{"startsWith"=>"(null)", "excluding"=>{"regex"=>"^atc/|\\(null\\) watchOS/"}}, :platform=>{"regex"=>"(iPhone|iPad|iPod touch)"}}, {:app=>"Clementine Music Player", :match=>{"startsWith"=>"Clementine"}, :platform=>{"text"=>"Unknown"}}, {:app=>"Flipboard", :match=>{"includes"=>"Flipboard", "excluding"=>{"regex"=>"FlipboardProxy/"}}, :platform=>{"regexes"=>["(Android)", "\\((iPhone|iPad|iPod touch|Macintosh)"], "replacements"=>[{"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"iVoox", :match=>{"startsWith"=>["ivoox", "iVoox"]}, :platform=>{"regexes"=>["(Android)", "(Darwin)\\/", "\\((iPhone|iPad|iPod touch|Macintosh)"], "replacements"=>[{"name"=>"Darwin", "replaceWith"=>"Apple device"}, {"name"=>"Macintosh", "replaceWith"=>"Mac"}]}}, {:app=>"FYEO", :match=>{"startsWith"=>"FYEO"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"Amazon Music", :match=>{"startsWith"=>"AmazonMusic"}, :platform=>{"regex"=>"(iPhone|Android|MacOS|Fire OS|iPad)"}}, {:app=>"Podimo", :match=>{"startsWith"=>"Podimo"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"ARD Audiothek", :match=>{"startsWith"=>"ARD Audiothek"}, :platform=>{"regex"=>"(iOS|Android)"}}, {:app=>"Samsung Podcast", :match=>{"startsWith"=>"sp-agent"}, :platform=>{"text"=>"Android"}}, {:app=>"RTL+", :match=>{"startsWith"=>"RTL+"}, :platform=>{"regex"=>"(iOS|iPhone|Android)"}}].freeze
|
@@ -1,3 +1,3 @@
|
|
1
1
|
# DO NOT EDIT THIS FILE - it gets automatically generated by running "bin/parse-rules"
|
2
2
|
|
3
|
-
BOTS_RULES = [{:app=>"AAABot - unknown bot", :match=>{"regex"=>"AAABot"}, :platform=>"bot"}, {:app=>"AhrefsBot", :match=>{"regex"=>"AhrefsBot/"}, :platform=>"bot"}, {:app=>"AirableBot", :match=>{"regex"=>"AirableBot-Podcast/"}, :platform=>"bot"}, {:app=>"Alexa Flash Briefing cache", :match=>{"regex"=>"^AmazonNewsContentService"}, :platform=>"bot"}, {:app=>"AlignaBot", :match=>{"regex"=>"^Alignabot"}, :platform=>"bot"}, {:app=>"Amazon Music Podcasts Bot", :match=>{"regex"=>"^Amazon Music Podcast"}, :platform=>"bot"}, {:app=>"Anchor Importer", :match=>{"regex"=>"AnchorImport"}, :platform=>"bot"}, {:app=>"Apple Podcasts (Watch)", :match=>{"regex"=>"^atc/|\\(null\\) watchOS/"}, :platform=>"bot"}, {:app=>"Apple Podcasts automated checks", :match=>{"regex"=>"(iTMS|itunesstored)"}, :platform=>"bot"}, {:app=>"Applebot", :match=>{"regex"=>"^Applebot/"}, :platform=>"bot"}, {:app=>"Archive.org", :match=>{"regex"=>"archive\\.org_bot"}, :platform=>"bot"}, {:app=>"atheerfm", :match=>{"regex"=>"^atheerfm/"}, :platform=>"bot"}, {:app=>"Audiomack", :match=>{"regex"=>"^Audiomack Podcast Processor/"}, :platform=>"bot"}, {:app=>"AudioWave feed parser", :match=>{"regex"=>"^AudioWaveBot/1\\.0"}, :platform=>"bot"}, {:app=>"AwarioSmartBot", :match=>{"regex"=>"^AwarioSmartBot/"}, :platform=>"bot"}, {:app=>"Babbar", :match=>{"regex"=>"Barkrowler/"}, :platform=>"bot"}, {:app=>"Baidu", :match=>{"regex"=>"\\(ce\\.baidu\\.com"}, :platform=>"bot"}, {:app=>"bbot", :match=>{"regex"=>"^bbot/"}, :platform=>"bot"}, {:app=>"British Library", :match=>{"regex"=>"^bl\\.uk_ldfc_bot"}, :platform=>"bot"}, {:app=>"Blubrry Migration Service", :match=>{"regex"=>"^Blubrry Migration Service"}, :platform=>"bot"}, {:app=>"Buzzsprout Importer", :match=>{"regex"=>"^Buzzsprout Importer"}, :platform=>"bot"}, {:app=>"CastFeedValidator", :match=>{"regex"=>"^CastFeedValidator/"}, :platform=>"bot"}, {:app=>"Castopod", :match=>{"regex"=>"Castopod/1\\.0"}, :platform=>"bot"}, {:app=>"Chartable", :match=>{"regex"=>"^Trackable/"}, :platform=>"bot"}, {:app=>"Clark-Crawler, unknown", :match=>{"regex"=>"^clark-crawler2"}, :platform=>"bot"}, {:app=>"Critical Mention", :match=>{"regex"=>"^Podcast-CriticalMention/"}, :platform=>"bot"}, {:app=>"curl", :match=>{"regex"=>"^curl|^libcurl/|^PycURL/| curl/"}, :platform=>"bot"}, {:app=>"DataforSEO", :match=>{"regex"=>"DataForSeoBot/"}, :platform=>"bot"}, {:app=>"Datagnion Bot", :match=>{"regex"=>"^datagnionbot"}, :platform=>"bot"}, {:app=>"dataprovider.com", :match=>{"regex"=>"Dataprovider\\.com"}, :platform=>"bot"}, {:app=>"Daum", :match=>{"regex"=>"http://cs\\.daum\\.net"}, :platform=>"bot"}, {:app=>"Deezer Podcasters", :match=>{"regex"=>"^Deezer Podcasters/1\\.0"}, :platform=>"bot"}, {:app=>"Digg", :match=>{"regex"=>"^Digg "}, :platform=>"bot"}, {:app=>"dorada", :match=>{"regex"=>"support@dorada\\.co\\.uk"}, :platform=>"bot"}, {:app=>"DotBot", :match=>{"regex"=>"DotBot"}, :platform=>"bot"}, {:app=>"Downcast Bot", :match=>{"regex"=>"downcast feed consumer/"}, :platform=>"bot"}, {:app=>"DuckDuckBot", :match=>{"regex"=>"DuckDuckBot"}, :platform=>"bot"}, {:app=>"F-Secure Riddler", :match=>{"regex"=>"^Riddler "}, :platform=>"bot"}, {:app=>"Facebook Bot", :match=>{"regex"=>"FacebookBot|f?acebookexternalhit/?|^podcastbot$|Facebot|facebookexternalua|^facebookplatform/"}, :platform=>"bot"}, {:app=>"Feedly", :match=>{"regex"=>"^Feedly/"}, :platform=>"bot"}, {:app=>"FlexGet", :match=>{"regex"=>"^FlexGet/"}, :platform=>"bot"}, {:app=>"Flipboard Proxy", :match=>{"regex"=>"FlipboardProxy/"}, :platform=>"bot"}, {:app=>"Fyyd", :match=>{"regex"=>"^fyyd-poll"}, :platform=>"bot"}, {:app=>"Go Storage Gateway V1", :match=>{"regex"=>"^storagegw-v1-go$"}, :platform=>"bot"}, {:app=>"Goodpods Bot", :match=>{"regex"=>"Goodpods/\\d+\\.\\d+"}, :platform=>"bot"}, {:app=>"Google AdsBot", :match=>{"regex"=>"AdsBot-Google"}, :platform=>"bot"}, {:app=>"Google Podcasts Manager", :match=>{"regex"=>"Google-Podcast"}, :platform=>"bot"}, {:app=>"Googlebot", :match=>{"regex"=>"Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google"}, :platform=>"bot"}, {:app=>"Gumball", :match=>{"regex"=>"^Gumball"}, :platform=>"bot"}, {:app=>"Headliner", :match=>{"regex"=>"Headliner/"}, :platform=>"bot"}, {:app=>"HubSpot Crawler", :match=>{"regex"=>"HubSpot Crawler"}, :platform=>"bot"}, {:app=>"Internet Archive", :match=>{"regex"=>"Archive-It;|web\\.archive\\.org"}, :platform=>"bot"}, {:app=>"Jaunt", :match=>{"regex"=>"^Jaunt/"}, :platform=>"bot"}, {:app=>"l'Institut national de l'audiovisuel", :match=>{"regex"=>"INA dlweb"}, :platform=>"bot"}, {:app=>"Libsyn", :match=>{"regex"=>"^Libsyn4"}, :platform=>"bot"}, {:app=>"libwww-perl", :match=>{"regex"=>"^libwww-perl| libwww-perl"}, :platform=>"bot"}, {:app=>"Livelap Crawler", :match=>{"regex"=>"LivelapBot"}, :platform=>"bot"}, {:app=>"LTX71", :match=>{"regex"=>"^ltx71 "}, :platform=>"bot"}, {:app=>"MauiBot", :match=>{"regex"=>"^MauiBot"}, :platform=>"bot"}, {:app=>"Mastodon Bot", :match=>{"regex"=>"rb/.*Mastodon/"}, :platform=>"bot"}, {:app=>"Microsoft Bingbot", :match=>{"regex"=>"(BingPreview/|adidxbot/|[bB]ingbot/)"}, :platform=>"bot"}, {:app=>"Microsoft Office", :match=>{"regex"=>"ms-office; MSOffice"}, :platform=>"bot"}, {:app=>"Mixcloud Podcast Importer", :match=>{"regex"=>"^MixcloudPodcastImporter/"}, :platform=>"bot"}, {:app=>"MJ12bot", :match=>{"regex"=>".*MJ12bot"}, :platform=>"bot"}, {:app=>"Mozilla Bot", :match=>{"regex"=>"^'?Mozilla(/5\\.0(\\.\\.\\.)?)?$|^\\(Mozilla/5\\.0\\)$"}, :platform=>"bot"}, {:app=>"MSN Bot", :match=>{"regex"=>"^msnbot/"}, :platform=>"bot"}, {:app=>"Neevabot", :match=>{"regex"=>".*Neevabot"}, :platform=>"bot"}, {:app=>"Netcraft Survey Agent", :match=>{"regex"=>" NetcraftSurveyAgent/"}, :platform=>"bot"}, {:app=>"OgScrper", :match=>{"regex"=>"OgScrper"}, :platform=>"bot"}, {:app=>"OkDownload", :match=>{"regex"=>"^OkDownload/"}, :platform=>"bot"}, {:app=>"OP3 Fetcher", :match=>{"regex"=>"^op3-fetcher/"}, :platform=>"bot"}, {:app=>"Overcast feed parser", :match=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}, :platform=>"bot"}, {:app=>"Pandora RSS crawler", :match=>{"regex"=>"^PandoraRSSCrawler"}, :platform=>"bot"}, {:app=>"PaperLi", :match=>{"regex"=>"PaperLiBot/"}, :platform=>"bot"}, {:app=>"PetalBot", :match=>{"regex"=>"PetalBot"}, :platform=>"bot"}, {:app=>"Pingdom", :match=>{"regex"=>"^Pingdom"}, :platform=>"bot"}, {:app=>"PlayerFM Podcast Sync", :match=>{"regex"=>"PlayerFM/.* Podcast Sync"}, :platform=>"bot"}, {:app=>"Podbean Importer", :match=>{"regex"=>"^Podbean Importer"}, :platform=>"bot"}, {:app=>"Podcastindex.org", :match=>{"regex"=>"^Podcastindex\\.org/"}, :platform=>"bot"}, {:app=>"PodCloud", :match=>{"regex"=>"podCloud"}, :platform=>"bot"}, {:app=>"Podcorn", :match=>{"regex"=>"Podcorn/"}, :platform=>"bot"}, {:app=>"PodderBot", :match=>{"regex"=>"PodderBot/"}, :platform=>"bot"}, {:app=>"Podfollow", :match=>{"regex"=>"podfollowbot/"}, :platform=>"bot"}, {:app=>"Podgrab", :match=>{"regex"=>"^Podgrab$"}, :platform=>"bot"}, {:app=>"Podhound", :match=>{"regex"=>"PodhoundBeta"}, :platform=>"bot"}, {:app=>"Podio Bot", :match=>{"regex"=>"^Podio/"}, :platform=>"bot"}, {:app=>"Podnews", :match=>{"regex"=>"PodnewsBot"}, :platform=>"bot"}, {:app=>"Podnods Bot", :match=>{"regex"=>"(podnods-crawler|podnods)"}, :platform=>"bot"}, {:app=>"Podscribe", :match=>{"regex"=>"(^Adswizz-podscribe/|^Podscribe/)"}, :platform=>"bot"}, {:app=>"Podverse Feed Parser", :match=>{"regex"=>"^Podverse/Feed Parser"}, :platform=>"bot"}, {:app=>"PodvineBot", :match=>{"regex"=>"^PodvineBot/"}, :platform=>"bot"}, {:app=>"PostRank Bot", :match=>{"regex"=>"^PostRank/"}, :platform=>"bot"}, {:app=>"PodwatchPro", :match=>{"regex"=>"Podwatch-Pro Crawler"}, :platform=>"bot"}, {:app=>"Puppeteer", :match=>{"regex"=>" HeadlessChrome/\\d"}, :platform=>"bot"}, {:app=>"python-requests", :match=>{"regex"=>"python-requests"}, :platform=>"bot"}, {:app=>"RedCircle", :match=>{"regex"=>"RedCircle"}, :platform=>"bot"}, {:app=>"Ridder Bot", :match=>{"regex"=>" RidderBot/"}, :platform=>"bot"}, {:app=>"RSS to Telegram", :match=>{"regex"=>"^RSStT"}, :platform=>"bot"}, {:app=>"RSSRadio", :match=>{"regex"=>"^RSSRadio \\("}, :platform=>"bot"}, {:app=>"Ruby Mechanize", :match=>{"regex"=>"^Mechanize|[ -]Mechanize/"}, :platform=>"bot"}, {:app=>"Screaming Frog SEO Spider", :match=>{"regex"=>"^Screaming Frog "}, :platform=>"bot"}, {:app=>"SearchAtlas.com SEO Crawler", :match=>{"regex"=>"^SearchAtlas.*Crawler"}, :platform=>"bot"}, {:app=>"SEMrushBot", :match=>{"regex"=>"SemrushBot/|^SEMrushBot$"}, :platform=>"bot"}, {:app=>"Serendeputy", :match=>{"regex"=>"SerendeputyBot/"}, :platform=>"bot"}, {:app=>"Simplecast", :match=>{"regex"=>"^Simplecast$"}, :platform=>"bot"}, {:app=>"Slack Bot", :match=>{"regex"=>"^Slackbot 1\\.0"}, :platform=>"bot"}, {:app=>"Snapchat Bot", :match=>{"regex"=>"://developers\\.snap\\.com/robots"}, :platform=>"bot"}, {:app=>"SoundOn Bot", :match=>{"regex"=>"^SoundOn/[\\d.]+\\s+\\(bot"}, :platform=>"bot"}, {:app=>"Spotify cache service", :match=>{"regex"=>"^Spotify/1\\.0$"}, :platform=>"bot"}, {:app=>"Stitcher Bot", :match=>{"regex"=>"^StitcherBot"}, :platform=>"bot"}, {:app=>"Substack Content Fetcher", :match=>{"regex"=>"^SubstackContentFetch/"}, :platform=>"bot"}, {:app=>"Timpi search crawler", :match=>{"regex"=>"Timpibot/"}, :platform=>"bot"}, {:app=>"Tiny Tiny RSS", :match=>{"regex"=>"^Tiny Tiny RSS/"}, :platform=>"bot"}, {:app=>"Trendsmap Resolver", :match=>{"regex"=>"TrendsmapResolver/"}, :platform=>"bot"}, {:app=>"Twingly Bot", :match=>{"regex"=>"Twingly Recon;"}, :platform=>"bot"}, {:app=>"Twitterbot", :match=>{"regex"=>"^Twitterbot"}, :platform=>"bot"}, {:app=>"Typhoeus", :match=>{"regex"=>"^Typhoeus"}, :platform=>"bot"}, {:app=>"UCast", :match=>{"regex"=>"^UCast/"}, :platform=>"bot"}, {:app=>"TelegramBot", :match=>{"regex"=>"^TelegramBot "}, :platform=>"bot"}, {:app=>"Vurbl", :match=>{"regex"=>"VurblBot"}, :platform=>"bot"}, {:app=>"Wget", :match=>{"regex"=>"Wget"}, :platform=>"bot"}, {:app=>"weborama", :match=>{"regex"=>"^weborama-fetcher"}, :platform=>"bot"}, {:app=>"Windows Crawler", :match=>{"regex"=>"^ZDM/.*Windows"}, :platform=>"bot"}, {:app=>"WordPress", :match=>{"regex"=>"^WordPress"}, :platform=>"bot"}, {:app=>"wsrv.nl", :match=>{"regex"=>"https?://wsrv.nl/"}, :platform=>"bot"}, {:app=>"YaCy", :match=>{"regex"=>"^yacybot"}, :platform=>"bot"}, {:app=>"Yahoo Crawler", :match=>{"regex"=>"Yahoo! Slurp"}, :platform=>"bot"}, {:app=>"YandexBot", :match=>{"regex"=>"YandexBot/"}, :platform=>"bot"}, {:app=>"Zapier", :match=>{"regex"=>"^Zapier$"}, :platform=>"bot"}, {:app=>"Zencast", :match=>{"regex"=>"^Zencastr/"}, :platform=>"bot"}].freeze
|
3
|
+
BOTS_RULES = [{:app=>"AAABot - unknown bot", :match=>{"regex"=>"AAABot"}, :platform=>"bot"}, {:app=>"Adswizz", :match=>{"regex"=>"^AIS VirtualListener"}, :platform=>"bot"}, {:app=>"AhrefsBot", :match=>{"regex"=>"AhrefsBot/"}, :platform=>"bot"}, {:app=>"AirableBot", :match=>{"regex"=>"AirableBot-Podcast/"}, :platform=>"bot"}, {:app=>"Alexa Flash Briefing cache", :match=>{"regex"=>"^AmazonNewsContentService"}, :platform=>"bot"}, {:app=>"AlignaBot", :match=>{"regex"=>"^Alignabot"}, :platform=>"bot"}, {:app=>"Amazon Music Podcasts Bot", :match=>{"regex"=>"^Amazon Music Podcast"}, :platform=>"bot"}, {:app=>"Anchor Importer", :match=>{"regex"=>"AnchorImport"}, :platform=>"bot"}, {:app=>"Apache HTTPClient", :match=>{"regex"=>"^Apache\\-HttpClient"}, :platform=>"bot"}, {:app=>"Apple Podcasts (Watch)", :match=>{"regex"=>"^atc/|\\(null\\) watchOS/"}, :platform=>"bot"}, {:app=>"Apple Podcasts automated checks", :match=>{"regex"=>"(iTMS|itunesstored|itms)"}, :platform=>"bot"}, {:app=>"Applebot", :match=>{"regex"=>"^Applebot/"}, :platform=>"bot"}, {:app=>"Archive.org", :match=>{"regex"=>"archive\\.org_bot"}, :platform=>"bot"}, {:app=>"atheerfm", :match=>{"regex"=>"^atheerfm/"}, :platform=>"bot"}, {:app=>"Audiomack", :match=>{"regex"=>"^Audiomack Podcast Processor/"}, :platform=>"bot"}, {:app=>"AudioWave feed parser", :match=>{"regex"=>"^AudioWaveBot/1\\.0"}, :platform=>"bot"}, {:app=>"AwarioSmartBot", :match=>{"regex"=>"^AwarioSmartBot/"}, :platform=>"bot"}, {:app=>"Babbar", :match=>{"regex"=>"Barkrowler/"}, :platform=>"bot"}, {:app=>"Baidu", :match=>{"regex"=>"\\(ce\\.baidu\\.com"}, :platform=>"bot"}, {:app=>"bbot", :match=>{"regex"=>"^bbot/"}, :platform=>"bot"}, {:app=>"British Library", :match=>{"regex"=>"^bl\\.uk_ldfc_bot"}, :platform=>"bot"}, {:app=>"Blubrry Migration Service", :match=>{"regex"=>"^Blubrry Migration Service"}, :platform=>"bot"}, {:app=>"Buzzsprout Importer", :match=>{"regex"=>"^Buzzsprout Importer"}, :platform=>"bot"}, {:app=>"CastFeedValidator", :match=>{"regex"=>"^CastFeedValidator/"}, :platform=>"bot"}, {:app=>"Castopod", :match=>{"regex"=>"Castopod/\\d"}, :platform=>"bot"}, {:app=>"Chartable", :match=>{"regex"=>"^Trackable/"}, :platform=>"bot"}, {:app=>"Clark-Crawler, unknown", :match=>{"regex"=>"^clark-crawler2"}, :platform=>"bot"}, {:app=>"Critical Mention", :match=>{"regex"=>"^Podcast-CriticalMention/"}, :platform=>"bot"}, {:app=>"curl", :match=>{"regex"=>"^curl|^libcurl/|^PycURL/| curl/"}, :platform=>"bot"}, {:app=>"DataforSEO", :match=>{"regex"=>"DataForSeoBot/"}, :platform=>"bot"}, {:app=>"Datagnion Bot", :match=>{"regex"=>"^datagnionbot"}, :platform=>"bot"}, {:app=>"dataprovider.com", :match=>{"regex"=>"Dataprovider\\.com"}, :platform=>"bot"}, {:app=>"Daum", :match=>{"regex"=>"http://cs\\.daum\\.net"}, :platform=>"bot"}, {:app=>"Deezer Podcasters", :match=>{"regex"=>"^Deezer Podcasters/1\\.0"}, :platform=>"bot"}, {:app=>"Digg", :match=>{"regex"=>"^Digg "}, :platform=>"bot"}, {:app=>"dorada", :match=>{"regex"=>"support@dorada\\.co\\.uk"}, :platform=>"bot"}, {:app=>"DotBot", :match=>{"regex"=>"DotBot"}, :platform=>"bot"}, {:app=>"Downcast Bot", :match=>{"regex"=>"downcast feed consumer/"}, :platform=>"bot"}, {:app=>"DuckDuckBot", :match=>{"regex"=>"DuckDuckBot"}, :platform=>"bot"}, {:app=>"F-Secure Riddler", :match=>{"regex"=>"^Riddler "}, :platform=>"bot"}, {:app=>"Facebook Bot", :match=>{"regex"=>"FacebookBot|f?acebookexternalhit/?|^podcastbot$|Facebot|facebookexternalua|^facebookplatform/"}, :platform=>"bot"}, {:app=>"Feedly", :match=>{"regex"=>"^Feedly/"}, :platform=>"bot"}, {:app=>"Feedspot Fetcher", :match=>{"regex"=>"https://www\\.feedspot\\.com/fs/fetcher"}, :platform=>"bot"}, {:app=>"FlexGet", :match=>{"regex"=>"^FlexGet/"}, :platform=>"bot"}, {:app=>"Flipboard Proxy", :match=>{"regex"=>"FlipboardProxy/"}, :platform=>"bot"}, {:app=>"Fyyd", :match=>{"regex"=>"^fyyd-poll"}, :platform=>"bot"}, {:app=>"Go Storage Gateway V1", :match=>{"regex"=>"^storagegw-v1-go$"}, :platform=>"bot"}, {:app=>"Goodpods Bot", :match=>{"regex"=>"Goodpods/\\d+\\.\\d+"}, :platform=>"bot"}, {:app=>"Google Adsense Bot", :match=>{"regex"=>"Mediapartners-Google"}, :platform=>"bot"}, {:app=>"Google AdsBot", :match=>{"regex"=>"AdsBot-Google"}, :platform=>"bot"}, {:app=>"Google Podcasts Manager", :match=>{"regex"=>"Google-Podcast"}, :platform=>"bot"}, {:app=>"Googlebot", :match=>{"regex"=>"Googlebot/|Googlebot-Video/|Googlebot-Image/|^Feedfetcher-Google|google-xrawler"}, :platform=>"bot"}, {:app=>"Gumball", :match=>{"regex"=>"^Gumball"}, :platform=>"bot"}, {:app=>"Headliner", :match=>{"regex"=>"Headliner/"}, :platform=>"bot"}, {:app=>"HubSpot Crawler", :match=>{"regex"=>"HubSpot Crawler"}, :platform=>"bot"}, {:app=>"Internet Archive", :match=>{"regex"=>"Archive-It;|web\\.archive\\.org"}, :platform=>"bot"}, {:app=>"Jaunt", :match=>{"regex"=>"^Jaunt/"}, :platform=>"bot"}, {:app=>"l'Institut national de l'audiovisuel", :match=>{"regex"=>"INA dlweb"}, :platform=>"bot"}, {:app=>"Libsyn", :match=>{"regex"=>"^Libsyn4"}, :platform=>"bot"}, {:app=>"libwww-perl", :match=>{"regex"=>"^libwww-perl| libwww-perl"}, :platform=>"bot"}, {:app=>"Livelap Crawler", :match=>{"regex"=>"LivelapBot"}, :platform=>"bot"}, {:app=>"LTX71", :match=>{"regex"=>"^ltx71 "}, :platform=>"bot"}, {:app=>"MauiBot", :match=>{"regex"=>"^MauiBot"}, :platform=>"bot"}, {:app=>"Mastodon Bot", :match=>{"regex"=>"rb/.*Mastodon/"}, :platform=>"bot"}, {:app=>"Microsoft Bingbot", :match=>{"regex"=>"(BingPreview/|adidxbot/|[bB]ingbot/)"}, :platform=>"bot"}, {:app=>"Microsoft Office", :match=>{"regex"=>"ms-office; MSOffice"}, :platform=>"bot"}, {:app=>"Mixcloud Podcast Importer", :match=>{"regex"=>"^MixcloudPodcastImporter/"}, :platform=>"bot"}, {:app=>"MJ12bot", :match=>{"regex"=>".*MJ12bot"}, :platform=>"bot"}, {:app=>"Mozilla Bot", :match=>{"regex"=>"^'?Mozilla(/5\\.0(\\.\\.\\.)?)?$|^\\(Mozilla/5\\.0\\)$"}, :platform=>"bot"}, {:app=>"MSN Bot", :match=>{"regex"=>"^msnbot/"}, :platform=>"bot"}, {:app=>"Neevabot", :match=>{"regex"=>".*Neevabot"}, :platform=>"bot"}, {:app=>"Netcraft Survey Agent", :match=>{"regex"=>" NetcraftSurveyAgent/"}, :platform=>"bot"}, {:app=>"Newspaper", :match=>{"regex"=>"^newspaper/\\d"}, :platform=>"bot"}, {:app=>"OgScrper", :match=>{"regex"=>"OgScrper"}, :platform=>"bot"}, {:app=>"OkDownload", :match=>{"regex"=>"^OkDownload/"}, :platform=>"bot"}, {:app=>"OP3 Fetcher", :match=>{"regex"=>"^op3-fetcher/"}, :platform=>"bot"}, {:app=>"Overcast feed parser", :match=>{"regex"=>"^Overcast/1\\.0 Podcast Sync"}, :platform=>"bot"}, {:app=>"Pandora RSS crawler", :match=>{"regex"=>"^PandoraRSSCrawler"}, :platform=>"bot"}, {:app=>"PaperLi", :match=>{"regex"=>"PaperLiBot/"}, :platform=>"bot"}, {:app=>"PetalBot", :match=>{"regex"=>"PetalBot"}, :platform=>"bot"}, {:app=>"Pingdom", :match=>{"regex"=>"^Pingdom"}, :platform=>"bot"}, {:app=>"PlayerFM Podcast Sync", :match=>{"regex"=>"PlayerFM/.* Podcast Sync"}, :platform=>"bot"}, {:app=>"Podbean Importer", :match=>{"regex"=>"^Podbean Importer"}, :platform=>"bot"}, {:app=>"Podcastindex.org", :match=>{"regex"=>"^Podcastindex\\.org/"}, :platform=>"bot"}, {:app=>"Podcast de facto Standard", :match=>{"regex"=>"^PodcastStandard/"}, :platform=>"bot"}, {:app=>"Podcast Archiver", :match=>{"regex"=>"^Podcast%20Archiver/"}, :platform=>"bot"}, {:app=>"PodCloud", :match=>{"regex"=>"podCloud"}, :platform=>"bot"}, {:app=>"Podcorn", :match=>{"regex"=>"Podcorn/"}, :platform=>"bot"}, {:app=>"PodderBot", :match=>{"regex"=>"PodderBot/"}, :platform=>"bot"}, {:app=>"Podfollow", :match=>{"regex"=>"podfollowbot/"}, :platform=>"bot"}, {:app=>"Podgrab", :match=>{"regex"=>"^Podgrab$"}, :platform=>"bot"}, {:app=>"Podhound", :match=>{"regex"=>"PodhoundBeta"}, :platform=>"bot"}, {:app=>"Podio Bot", :match=>{"regex"=>"^Podio/"}, :platform=>"bot"}, {:app=>"Podnews", :match=>{"regex"=>"PodnewsBot"}, :platform=>"bot"}, {:app=>"Podnods Bot", :match=>{"regex"=>"(podnods-crawler|podnods)"}, :platform=>"bot"}, {:app=>"Podscribe", :match=>{"regex"=>"(^Adswizz-podscribe/|^Podscribe/)"}, :platform=>"bot"}, {:app=>"Podverse Feed Parser", :match=>{"regex"=>"^Podverse/Feed Parser"}, :platform=>"bot"}, {:app=>"PodvineBot", :match=>{"regex"=>"^PodvineBot/"}, :platform=>"bot"}, {:app=>"PostRank Bot", :match=>{"regex"=>"^PostRank/"}, :platform=>"bot"}, {:app=>"PodwatchPro", :match=>{"regex"=>"Podwatch-Pro Crawler"}, :platform=>"bot"}, {:app=>"Puppeteer", :match=>{"regex"=>" HeadlessChrome/\\d"}, :platform=>"bot"}, {:app=>"python-requests", :match=>{"regex"=>"python-requests"}, :platform=>"bot"}, {:app=>"Qiniu spider", :match=>{"regex"=>"qiniu.*spider"}, :platform=>"bot"}, {:app=>"RedCircle", :match=>{"regex"=>"RedCircle"}, :platform=>"bot"}, {:app=>"Ridder Bot", :match=>{"regex"=>" RidderBot/"}, :platform=>"bot"}, {:app=>"RSS to Telegram", :match=>{"regex"=>"^RSStT"}, :platform=>"bot"}, {:app=>"RSSRadio", :match=>{"regex"=>"^RSSRadio \\("}, :platform=>"bot"}, {:app=>"Ruby Mechanize", :match=>{"regex"=>"^Mechanize|[ -]Mechanize/"}, :platform=>"bot"}, {:app=>"Screaming Frog SEO Spider", :match=>{"regex"=>"^Screaming Frog "}, :platform=>"bot"}, {:app=>"SearchAtlas.com SEO Crawler", :match=>{"regex"=>"^SearchAtlas.*Crawler"}, :platform=>"bot"}, {:app=>"SEMrushBot", :match=>{"regex"=>"SemrushBot/|^SEMrushBot$"}, :platform=>"bot"}, {:app=>"Serendeputy", :match=>{"regex"=>"SerendeputyBot/"}, :platform=>"bot"}, {:app=>"Simplecast", :match=>{"regex"=>"^Simplecast$"}, :platform=>"bot"}, {:app=>"Slack Bot", :match=>{"regex"=>"^Slackbot 1\\.0"}, :platform=>"bot"}, {:app=>"Snapchat Bot", :match=>{"regex"=>"://developers\\.snap\\.com/robots"}, :platform=>"bot"}, {:app=>"SoundOn Bot", :match=>{"regex"=>"^SoundOn/[\\d.]+\\s+\\(bot"}, :platform=>"bot"}, {:app=>"Spotify cache service", :match=>{"regex"=>"^Spotify/1\\.0$"}, :platform=>"bot"}, {:app=>"Stitcher Bot", :match=>{"regex"=>"^StitcherBot"}, :platform=>"bot"}, {:app=>"Substack Content Fetcher", :match=>{"regex"=>"^SubstackContentFetch/"}, :platform=>"bot"}, {:app=>"SupportingCast", :match=>{"regex"=>"^SupportingCast(/.*)?$"}, :platform=>"bot"}, {:app=>"Timpi search crawler", :match=>{"regex"=>"Timpibot/"}, :platform=>"bot"}, {:app=>"Tiny Tiny RSS", :match=>{"regex"=>"^Tiny Tiny RSS/"}, :platform=>"bot"}, {:app=>"Trendsmap Resolver", :match=>{"regex"=>"TrendsmapResolver/"}, :platform=>"bot"}, {:app=>"Twingly Bot", :match=>{"regex"=>"Twingly Recon;"}, :platform=>"bot"}, {:app=>"Twitterbot", :match=>{"regex"=>"^Twitterbot"}, :platform=>"bot"}, {:app=>"Typhoeus", :match=>{"regex"=>"^Typhoeus"}, :platform=>"bot"}, {:app=>"UCast", :match=>{"regex"=>"^UCast/"}, :platform=>"bot"}, {:app=>"TelegramBot", :match=>{"regex"=>"^TelegramBot "}, :platform=>"bot"}, {:app=>"theTradeDesk Content Web Scraper", :match=>{"regex"=>"TTD-Content"}, :platform=>"bot"}, {:app=>"Veritone Engine Toolkit", :match=>{"regex"=>"^veritone/engine-toolkit"}, :platform=>"bot"}, {:app=>"Vurbl", :match=>{"regex"=>"VurblBot"}, :platform=>"bot"}, {:app=>"Wget", :match=>{"regex"=>"Wget"}, :platform=>"bot"}, {:app=>"weborama", :match=>{"regex"=>"^weborama-fetcher"}, :platform=>"bot"}, {:app=>"Windows Crawler", :match=>{"regex"=>"^ZDM/.*Windows"}, :platform=>"bot"}, {:app=>"Windows PowerShell", :match=>{"regex"=>"WindowsPowerShell/"}, :platform=>"bot"}, {:app=>"WordPress", :match=>{"regex"=>"^WordPress"}, :platform=>"bot"}, {:app=>"wsrv.nl", :match=>{"regex"=>"https?://wsrv.nl/"}, :platform=>"bot"}, {:app=>"YaCy", :match=>{"regex"=>"^yacybot"}, :platform=>"bot"}, {:app=>"Yahoo Crawler", :match=>{"regex"=>"Yahoo! Slurp"}, :platform=>"bot"}, {:app=>"YandexBot", :match=>{"regex"=>"YandexBot/"}, :platform=>"bot"}, {:app=>"Zapier", :match=>{"regex"=>"^Zapier$"}, :platform=>"bot"}, {:app=>"Zencast", :match=>{"regex"=>"^Zencastr/"}, :platform=>"bot"}].freeze
|
@@ -7,15 +7,19 @@ module PodIdent
|
|
7
7
|
class RuleParser
|
8
8
|
RULES_YAML = File.expand_path('../detection_rules.yml', __dir__)
|
9
9
|
RULES_BOTS_YAML = File.expand_path('../detection_rules_bots.yml', __dir__)
|
10
|
+
CUSTOM_RULES_BOTS_YAML = File.expand_path('../detection_rules_custom_bots.yml', __dir__)
|
10
11
|
RULES_RUBY = File.expand_path('detection_rules.rb', __dir__)
|
11
12
|
RULES_BOTS_RUBY = File.expand_path('detection_rules_bots.rb', __dir__)
|
13
|
+
CUSTOM_RULES_BOTS_RUBY = File.expand_path('detection_rules_custom_bots.rb', __dir__)
|
12
14
|
RULES_SPEC_RUBY = File.expand_path('../../spec/detection_rules.rb', __dir__)
|
13
15
|
RULES_SPEC_BOTS_RUBY = File.expand_path('../../spec/detection_rules_bots.rb', __dir__)
|
16
|
+
RULES_SPEC_CUSTOM_BOTS_RUBY = File.expand_path('../../spec/detection_rules_custom_bots.rb',
|
17
|
+
__dir__)
|
14
18
|
DO_NOT_EDIT_TEXT = <<~HEREDOC
|
15
19
|
# DO NOT EDIT THIS FILE - it gets automatically generated by running \"bin/parse-rules\"\n
|
16
20
|
HEREDOC
|
17
21
|
|
18
|
-
attr_accessor :bots_rules, :rules
|
22
|
+
attr_accessor :bots_rules, :custom_bots_rules, :rules
|
19
23
|
|
20
24
|
def call
|
21
25
|
parse_yaml
|
@@ -42,6 +46,14 @@ module PodIdent
|
|
42
46
|
}
|
43
47
|
end
|
44
48
|
|
49
|
+
cleaned_custom_bots_rules = custom_bots_rules.dup.map do |rule|
|
50
|
+
{
|
51
|
+
app: rule['app'],
|
52
|
+
match: rule['match'],
|
53
|
+
platform: rule['platform']
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
45
57
|
File.open(RULES_RUBY, 'w') do |file|
|
46
58
|
file.write(DO_NOT_EDIT_TEXT)
|
47
59
|
file.write("RULES = #{cleaned_rules}.freeze")
|
@@ -51,6 +63,11 @@ module PodIdent
|
|
51
63
|
file.write(DO_NOT_EDIT_TEXT)
|
52
64
|
file.write("BOTS_RULES = #{cleaned_bots_rules}.freeze")
|
53
65
|
end
|
66
|
+
|
67
|
+
File.open(CUSTOM_RULES_BOTS_RUBY, 'w') do |file|
|
68
|
+
file.write(DO_NOT_EDIT_TEXT)
|
69
|
+
file.write("CUSTOM_BOTS_RULES = #{cleaned_custom_bots_rules}.freeze")
|
70
|
+
end
|
54
71
|
end
|
55
72
|
|
56
73
|
def write_rules_spec_rb
|
@@ -64,6 +81,11 @@ module PodIdent
|
|
64
81
|
Hash[rule.map { |(k, v)| [k.to_sym, v] }]
|
65
82
|
end
|
66
83
|
|
84
|
+
all_custom_bot_rules = custom_bots_rules.dup.map do |rule|
|
85
|
+
# symbolize keys
|
86
|
+
Hash[rule.map { |(k, v)| [k.to_sym, v] }]
|
87
|
+
end
|
88
|
+
|
67
89
|
File.open(RULES_SPEC_RUBY, 'w') do |file|
|
68
90
|
file.write(DO_NOT_EDIT_TEXT)
|
69
91
|
file.write("RULES = #{all_rules}.freeze")
|
@@ -73,11 +95,17 @@ module PodIdent
|
|
73
95
|
file.write(DO_NOT_EDIT_TEXT)
|
74
96
|
file.write("BOTS_RULES = #{all_bot_rules}.freeze")
|
75
97
|
end
|
98
|
+
|
99
|
+
File.open(RULES_SPEC_CUSTOM_BOTS_RUBY, 'w') do |file|
|
100
|
+
file.write(DO_NOT_EDIT_TEXT)
|
101
|
+
file.write("CUSTOM_BOTS_RULES = #{all_custom_bot_rules}.freeze")
|
102
|
+
end
|
76
103
|
end
|
77
104
|
|
78
105
|
def parse_yaml
|
79
106
|
@rules = YAML.safe_load(rules_yaml_file_content)
|
80
107
|
@bots_rules = YAML.safe_load(bot_rules_yaml_file_content)
|
108
|
+
@custom_bots_rules = YAML.safe_load(custom_bot_rules_yaml_file_content)
|
81
109
|
end
|
82
110
|
|
83
111
|
def rules_yaml_file_content
|
@@ -87,5 +115,9 @@ module PodIdent
|
|
87
115
|
def bot_rules_yaml_file_content
|
88
116
|
File.read(RULES_BOTS_YAML)
|
89
117
|
end
|
118
|
+
|
119
|
+
def custom_bot_rules_yaml_file_content
|
120
|
+
File.read(CUSTOM_RULES_BOTS_YAML)
|
121
|
+
end
|
90
122
|
end
|
91
123
|
end
|
data/lib/pod_ident/version.rb
CHANGED
data/lib/pod_ident.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'pod_ident/version'
|
4
4
|
require 'pod_ident/detection_rules'
|
5
5
|
require 'pod_ident/detection_rules_bots'
|
6
|
+
require 'pod_ident/detection_rules_custom_bots'
|
6
7
|
require 'pod_ident/detection_result'
|
7
8
|
|
8
9
|
module PodIdent
|
@@ -22,7 +23,7 @@ module PodIdent
|
|
22
23
|
# !~ /[^[:space:]]/ is what Active Support does to detect blank strings
|
23
24
|
return nil if user_agent_string !~ /[^[:space:]]/
|
24
25
|
|
25
|
-
rule = find_rule || find_rule_bots
|
26
|
+
rule = find_rule || find_rule_bots || find_rule_custom_bots
|
26
27
|
|
27
28
|
self.result = DetectionResult.new(rule, user_agent_string)
|
28
29
|
identify_platform if result.positive?
|
@@ -31,7 +32,7 @@ module PodIdent
|
|
31
32
|
end
|
32
33
|
|
33
34
|
def self.bot?
|
34
|
-
find_rule_bots
|
35
|
+
find_rule_bots || find_rule_custom_bots
|
35
36
|
end
|
36
37
|
|
37
38
|
private
|
@@ -67,6 +68,15 @@ module PodIdent
|
|
67
68
|
end
|
68
69
|
end
|
69
70
|
|
71
|
+
def find_rule_custom_bots
|
72
|
+
CUSTOM_BOTS_RULES.detect do |rule|
|
73
|
+
match = rule.fetch(:match)
|
74
|
+
regex = match['regex']
|
75
|
+
match = Regexp.new(regex).match(user_agent_string)
|
76
|
+
!match.nil?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
70
80
|
def apply_starts_with(starts_with, found)
|
71
81
|
return found unless starts_with
|
72
82
|
|
data/pod_ident.gemspec
CHANGED
@@ -15,6 +15,10 @@ Gem::Specification.new do |spec|
|
|
15
15
|
Library to identify podcast client user agents and translate them into human readable information.'
|
16
16
|
HEREDOC
|
17
17
|
spec.homepage = 'https://www.podigee.com'
|
18
|
+
spec.metadata = {
|
19
|
+
'source_code_uri' => 'https://github.com/podigee/pod-ident.rb',
|
20
|
+
'changelog_uri' => 'https://github.com/podigee/pod-ident.rb/blob/master/CHANGELOG.md'
|
21
|
+
}
|
18
22
|
|
19
23
|
# Specify which files should be added to the gem when it is released.
|
20
24
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pod_ident
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Podigee GmbH
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -96,6 +96,7 @@ files:
|
|
96
96
|
- ".rspec"
|
97
97
|
- ".rubocop.yml"
|
98
98
|
- ".ruby-version"
|
99
|
+
- CHANGELOG.md
|
99
100
|
- Gemfile
|
100
101
|
- Gemfile.lock
|
101
102
|
- README.md
|
@@ -105,16 +106,20 @@ files:
|
|
105
106
|
- bin/setup
|
106
107
|
- lib/detection_rules.yml
|
107
108
|
- lib/detection_rules_bots.yml
|
109
|
+
- lib/detection_rules_custom_bots.yml
|
108
110
|
- lib/pod_ident.rb
|
109
111
|
- lib/pod_ident/detection_result.rb
|
110
112
|
- lib/pod_ident/detection_rules.rb
|
111
113
|
- lib/pod_ident/detection_rules_bots.rb
|
114
|
+
- lib/pod_ident/detection_rules_custom_bots.rb
|
112
115
|
- lib/pod_ident/rule_parser.rb
|
113
116
|
- lib/pod_ident/version.rb
|
114
117
|
- pod_ident.gemspec
|
115
118
|
homepage: https://www.podigee.com
|
116
119
|
licenses: []
|
117
|
-
metadata:
|
120
|
+
metadata:
|
121
|
+
source_code_uri: https://github.com/podigee/pod-ident.rb
|
122
|
+
changelog_uri: https://github.com/podigee/pod-ident.rb/blob/master/CHANGELOG.md
|
118
123
|
post_install_message:
|
119
124
|
rdoc_options: []
|
120
125
|
require_paths:
|
@@ -130,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
130
135
|
- !ruby/object:Gem::Version
|
131
136
|
version: '0'
|
132
137
|
requirements: []
|
133
|
-
rubygems_version: 3.4
|
138
|
+
rubygems_version: 3.5.4
|
134
139
|
signing_key:
|
135
140
|
specification_version: 4
|
136
141
|
summary: Identifies podcast client user agents
|