device_detector 1.0.5 → 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -6
- data/lib/device_detector/browser.rb +364 -0
- data/lib/device_detector/client.rb +8 -0
- data/lib/device_detector/device.rb +1124 -4
- data/lib/device_detector/os.rb +36 -11
- data/lib/device_detector/version.rb +1 -1
- data/lib/device_detector/version_extractor.rb +9 -0
- data/lib/device_detector.rb +57 -13
- data/regexes/bots.yml +551 -58
- data/regexes/client/browser_engine.yml +7 -1
- data/regexes/client/browsers.yml +594 -71
- data/regexes/client/feed_readers.yml +4 -10
- data/regexes/client/libraries.yml +47 -2
- data/regexes/client/mediaplayers.yml +25 -1
- data/regexes/client/mobile_apps.yml +447 -77
- data/regexes/client/pim.yml +49 -1
- data/regexes/device/cameras.yml +5 -5
- data/regexes/device/car_browsers.yml +16 -0
- data/regexes/device/consoles.yml +6 -0
- data/regexes/device/mobiles.yml +12905 -4561
- data/regexes/device/portable_media_player.yml +20 -3
- data/regexes/device/shell_tv.yml +117 -0
- data/regexes/device/televisions.yml +426 -35
- data/regexes/oss.yml +567 -139
- data/spec/device_detector/concrete_user_agent_spec.rb +30 -42
- data/spec/device_detector/detector_fixtures_spec.rb +9 -5
- data/spec/device_detector/device_spec.rb +26 -10
- data/spec/fixtures/client/browser.yml +1463 -391
- data/spec/fixtures/client/feed_reader.yml +0 -12
- data/spec/fixtures/client/library.yml +91 -37
- data/spec/fixtures/client/mediaplayer.yml +30 -0
- data/spec/fixtures/client/mobile_app.yml +498 -45
- data/spec/fixtures/client/pim.yml +60 -0
- data/spec/fixtures/detector/bots.yml +1189 -568
- data/spec/fixtures/detector/camera.yml +12 -26
- data/spec/fixtures/detector/car_browser.yml +151 -15
- data/spec/fixtures/detector/console.yml +70 -48
- data/spec/fixtures/detector/desktop.yml +2041 -916
- data/spec/fixtures/detector/feature_phone.yml +895 -189
- data/spec/fixtures/detector/feed_reader.yml +50 -77
- data/spec/fixtures/detector/mediaplayer.yml +79 -26
- data/spec/fixtures/detector/mobile_apps.yml +726 -72
- data/spec/fixtures/detector/peripheral.yml +271 -0
- data/spec/fixtures/detector/phablet.yml +3635 -1596
- data/spec/fixtures/detector/portable_media_player.yml +355 -46
- data/spec/fixtures/detector/smart_display.yml +183 -9
- data/spec/fixtures/detector/smart_speaker.yml +13 -8
- data/spec/fixtures/detector/smartphone-1.yml +4002 -4286
- data/spec/fixtures/detector/smartphone-10.yml +3771 -4763
- data/spec/fixtures/detector/smartphone-11.yml +3615 -4692
- data/spec/fixtures/detector/smartphone-12.yml +3856 -4764
- data/spec/fixtures/detector/smartphone-13.yml +4213 -4713
- data/spec/fixtures/detector/smartphone-14.yml +4039 -4497
- data/spec/fixtures/detector/smartphone-15.yml +5642 -2956
- data/spec/fixtures/detector/smartphone-16.yml +4739 -5082
- data/spec/fixtures/detector/smartphone-17.yml +4832 -4275
- data/spec/fixtures/detector/smartphone-18.yml +9806 -0
- data/spec/fixtures/detector/smartphone-19.yml +9965 -0
- data/spec/fixtures/detector/smartphone-2.yml +4842 -2589
- data/spec/fixtures/detector/smartphone-20.yml +9710 -0
- data/spec/fixtures/detector/smartphone-21.yml +8693 -0
- data/spec/fixtures/detector/smartphone-22.yml +10178 -0
- data/spec/fixtures/detector/smartphone-23.yml +9453 -0
- data/spec/fixtures/detector/smartphone-24.yml +9843 -0
- data/spec/fixtures/detector/smartphone-25.yml +9703 -0
- data/spec/fixtures/detector/smartphone-26.yml +10007 -0
- data/spec/fixtures/detector/smartphone-27.yml +4927 -0
- data/spec/fixtures/detector/smartphone-3.yml +4387 -4427
- data/spec/fixtures/detector/smartphone-4.yml +3597 -4582
- data/spec/fixtures/detector/smartphone-5.yml +4066 -5022
- data/spec/fixtures/detector/smartphone-6.yml +3455 -4621
- data/spec/fixtures/detector/smartphone-7.yml +3574 -4574
- data/spec/fixtures/detector/smartphone-8.yml +4617 -4704
- data/spec/fixtures/detector/smartphone-9.yml +4080 -5035
- data/spec/fixtures/detector/smartphone.yml +3244 -4234
- data/spec/fixtures/detector/tablet-1.yml +4652 -4492
- data/spec/fixtures/detector/tablet-2.yml +3515 -4434
- data/spec/fixtures/detector/tablet-3.yml +3418 -4351
- data/spec/fixtures/detector/tablet-4.yml +5149 -3200
- data/spec/fixtures/detector/tablet-5.yml +9273 -0
- data/spec/fixtures/detector/tablet-6.yml +4588 -0
- data/spec/fixtures/detector/tablet.yml +1621 -2613
- data/spec/fixtures/detector/tv-1.yml +2501 -0
- data/spec/fixtures/detector/tv.yml +7826 -3114
- data/spec/fixtures/detector/unknown.yml +370 -531
- data/spec/fixtures/detector/wearable.yml +863 -9
- data/spec/fixtures/parser/oss.yml +1350 -21
- data/spec/fixtures/parser/vendorfragments.yml +53 -53
- metadata +35 -5
data/regexes/bots.yml
CHANGED
@@ -5,10 +5,10 @@
|
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
6
6
|
###############
|
7
7
|
|
8
|
-
- regex: '360Spider
|
8
|
+
- regex: '360Spider'
|
9
9
|
name: '360Spider'
|
10
10
|
category: 'Search bot'
|
11
|
-
url: '
|
11
|
+
url: 'https://www.so.com/help/help_3_2.html'
|
12
12
|
producer:
|
13
13
|
name: 'Online Media Group, Inc.'
|
14
14
|
url: ''
|
@@ -40,26 +40,34 @@
|
|
40
40
|
- regex: 'AhrefsBot'
|
41
41
|
name: 'aHrefs Bot'
|
42
42
|
category: 'Crawler'
|
43
|
-
url: '
|
43
|
+
url: 'https://ahrefs.com/robot'
|
44
44
|
producer:
|
45
45
|
name: 'Ahrefs Pte Ltd'
|
46
|
-
url: '
|
46
|
+
url: 'https://ahrefs.com/robot'
|
47
47
|
|
48
48
|
- regex: 'ia_archiver|alexabot|verifybot'
|
49
49
|
name: 'Alexa Crawler'
|
50
50
|
category: 'Search bot'
|
51
|
-
url: 'https://alexa.
|
51
|
+
url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
|
52
52
|
producer:
|
53
53
|
name: 'Alexa Internet'
|
54
|
-
url: '
|
54
|
+
url: 'https://www.alexa.com'
|
55
55
|
|
56
56
|
- regex: 'alexa site audit'
|
57
57
|
name: 'Alexa Site Audit'
|
58
58
|
category: 'Site Monitor'
|
59
|
-
url: '
|
59
|
+
url: 'https://support.alexa.com/hc/en-us/articles/200450194'
|
60
60
|
producer:
|
61
61
|
name: 'Alexa Internet'
|
62
|
-
url: '
|
62
|
+
url: 'https://www.alexa.com'
|
63
|
+
|
64
|
+
- regex: 'Amazonbot'
|
65
|
+
name: 'Amazon Bot'
|
66
|
+
category: 'Crawler'
|
67
|
+
url: 'https://developer.amazon.com/support/amazonbot'
|
68
|
+
producer:
|
69
|
+
name: 'Amazon.com, Inc.'
|
70
|
+
url: 'https://www.amazon.com/'
|
63
71
|
|
64
72
|
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
|
65
73
|
name: 'Amazon Route53 Health Check'
|
@@ -82,23 +90,31 @@
|
|
82
90
|
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
|
83
91
|
producer:
|
84
92
|
name: 'The Apache Software Foundation'
|
85
|
-
url: '
|
93
|
+
url: 'https://www.apache.org/foundation/'
|
86
94
|
|
87
95
|
- regex: 'Applebot'
|
88
96
|
name: 'Applebot'
|
89
97
|
category: 'Crawler'
|
90
|
-
url: '
|
98
|
+
url: 'https://support.apple.com/en-us/HT204683'
|
91
99
|
producer:
|
92
100
|
name: 'Apple Inc'
|
93
|
-
url: '
|
101
|
+
url: 'https://www.apple.com'
|
102
|
+
|
103
|
+
- regex: "AppSignalBot"
|
104
|
+
name: "AppSignalBot"
|
105
|
+
category: "Site Monitor"
|
106
|
+
url: "https://docs.appsignal.com/uptime-monitoring/"
|
107
|
+
producer:
|
108
|
+
name: "AppSignal"
|
109
|
+
url: "https://appsignal.com/"
|
94
110
|
|
95
111
|
- regex: 'Arachni'
|
96
112
|
name: 'Arachni'
|
97
113
|
category: 'Security Checker'
|
98
|
-
url: '
|
114
|
+
url: 'https://www.arachni-scanner.com/'
|
99
115
|
producer:
|
100
116
|
name: 'Sarosys LLC'
|
101
|
-
url: '
|
117
|
+
url: 'https://www.sarosys.com/'
|
102
118
|
|
103
119
|
- regex: 'AspiegelBot'
|
104
120
|
name: 'AspiegelBot'
|
@@ -112,7 +128,7 @@
|
|
112
128
|
name: 'Castro 2'
|
113
129
|
category: 'Service Agent'
|
114
130
|
url: 'http://supertop.co/castro/'
|
115
|
-
producer:
|
131
|
+
producer:
|
116
132
|
name: 'Supertop'
|
117
133
|
url: 'http://supertop.co'
|
118
134
|
|
@@ -127,10 +143,10 @@
|
|
127
143
|
- regex: 'archive\.org_bot|special_archiver'
|
128
144
|
name: 'archive.org bot'
|
129
145
|
category: 'Crawler'
|
130
|
-
url: '
|
146
|
+
url: 'https://archive.org/details/archive.org_bot'
|
131
147
|
producer:
|
132
148
|
name: 'The Internet Archive'
|
133
|
-
url: '
|
149
|
+
url: 'https://archive.org'
|
134
150
|
|
135
151
|
- regex: 'Ask Jeeves/Teoma'
|
136
152
|
name: 'Ask Jeeves'
|
@@ -156,7 +172,7 @@
|
|
156
172
|
name: '2.0Promotion GbR'
|
157
173
|
url: 'http://www.backlinktest.com'
|
158
174
|
|
159
|
-
- regex: '
|
175
|
+
- regex: 'Baidu.*spider|baidu Transcoder'
|
160
176
|
name: 'Baidu Spider'
|
161
177
|
category: 'Search bot'
|
162
178
|
url: 'http://www.baidu.com/search/spider.htm'
|
@@ -172,6 +188,14 @@
|
|
172
188
|
name: ''
|
173
189
|
url: ''
|
174
190
|
|
191
|
+
- regex: 'Better Uptime Bot'
|
192
|
+
name: 'Better Uptime Bot'
|
193
|
+
category: 'Site Monitor'
|
194
|
+
url: 'https://betteruptime.com/faq'
|
195
|
+
producer:
|
196
|
+
name: 'Better Uptime'
|
197
|
+
url: 'https://betteruptime.com/'
|
198
|
+
|
175
199
|
- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
|
176
200
|
name: 'BingBot'
|
177
201
|
category: 'Search bot'
|
@@ -188,7 +212,7 @@
|
|
188
212
|
name: 'Blekko'
|
189
213
|
url: 'http://blekko.com'
|
190
214
|
|
191
|
-
- regex: 'BLEXBot
|
215
|
+
- regex: 'BLEXBot'
|
192
216
|
name: 'BLEXBot Crawler'
|
193
217
|
category: 'Crawler'
|
194
218
|
url: 'http://webmeup-crawler.com'
|
@@ -217,7 +241,7 @@
|
|
217
241
|
category: 'Crawler'
|
218
242
|
producer:
|
219
243
|
name: 'BoardReader'
|
220
|
-
url: '
|
244
|
+
url: 'https://boardreader.com/'
|
221
245
|
|
222
246
|
- regex: 'BountiiBot'
|
223
247
|
name: 'Bountii Bot'
|
@@ -283,6 +307,14 @@
|
|
283
307
|
name: 'CloudFlare'
|
284
308
|
url: 'http://www.cloudflare.com'
|
285
309
|
|
310
|
+
- regex: 'CloudflareDiagnostics'
|
311
|
+
name: 'Cloudflare Diagnostics'
|
312
|
+
category: 'Site Monitor'
|
313
|
+
url: 'https://www.cloudflare.com/'
|
314
|
+
producer:
|
315
|
+
name: 'Cloudflare'
|
316
|
+
url: 'https://www.cloudflare.com'
|
317
|
+
|
286
318
|
- regex: 'CloudFlare-AlwaysOnline'
|
287
319
|
name: 'CloudFlare Always Online'
|
288
320
|
category: 'Site Monitor'
|
@@ -291,7 +323,7 @@
|
|
291
323
|
name: 'CloudFlare'
|
292
324
|
url: 'http://www.cloudflare.com'
|
293
325
|
|
294
|
-
- regex: 'coccoc
|
326
|
+
- regex: 'coccoc.com'
|
295
327
|
name: 'Cốc Cốc Bot'
|
296
328
|
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
|
297
329
|
category: 'Search bot'
|
@@ -339,7 +371,6 @@
|
|
339
371
|
name: 'Datanyze'
|
340
372
|
url: 'https://www.datanyze.com'
|
341
373
|
|
342
|
-
|
343
374
|
- regex: 'Dataprovider'
|
344
375
|
name: 'Dataprovider'
|
345
376
|
category: 'Crawler'
|
@@ -364,7 +395,7 @@
|
|
364
395
|
name: 'DAZOO.FR'
|
365
396
|
url: 'http://dazoo.fr'
|
366
397
|
|
367
|
-
- regex: 'discobot
|
398
|
+
- regex: 'discobot'
|
368
399
|
name: 'Discobot'
|
369
400
|
category: 'Search bot'
|
370
401
|
url: 'http://discoveryengine.com/discobot.html'
|
@@ -427,7 +458,7 @@
|
|
427
458
|
name: 'eVenture Capital Partners II, LLC'
|
428
459
|
url: 'http://www.eventures.vc/'
|
429
460
|
|
430
|
-
- regex: 'Exabot
|
461
|
+
- regex: 'Exabot|ExaleadCloudview'
|
431
462
|
name: 'ExaBot'
|
432
463
|
category: 'Crawler'
|
433
464
|
url: 'http://www.exabot.com/go/robot'
|
@@ -483,7 +514,7 @@
|
|
483
514
|
name: 'David Smith & Developing Perspective, LLC'
|
484
515
|
url: 'https://david-smith.org'
|
485
516
|
|
486
|
-
- regex: '
|
517
|
+
- regex: 'Feedly'
|
487
518
|
name: 'Feedly'
|
488
519
|
url: 'http://www.feedly.com'
|
489
520
|
category: 'Feed Fetcher'
|
@@ -549,6 +580,10 @@
|
|
549
580
|
name: ''
|
550
581
|
url: ''
|
551
582
|
|
583
|
+
- regex: 'gobuster'
|
584
|
+
name: 'Gobuster'
|
585
|
+
url: 'https://github.com/OJ/gobuster'
|
586
|
+
|
552
587
|
- regex: 'ichiro/mobile goo'
|
553
588
|
name: 'Goo'
|
554
589
|
category: 'Search bot'
|
@@ -557,6 +592,10 @@
|
|
557
592
|
name: 'NTT Resonant'
|
558
593
|
url: 'http://goo.ne.jp'
|
559
594
|
|
595
|
+
- regex: 'Storebot-Google'
|
596
|
+
name: 'Google StoreBot'
|
597
|
+
category: 'Crawler'
|
598
|
+
|
560
599
|
- regex: 'Google Favicon'
|
561
600
|
name: 'Google Favicon'
|
562
601
|
category: 'Crawler'
|
@@ -649,7 +688,7 @@
|
|
649
688
|
name: 'Visual Meta'
|
650
689
|
url: 'https://www.shopalike.cz/'
|
651
690
|
|
652
|
-
- regex: 'AdsBot-Google
|
691
|
+
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
|
653
692
|
name: 'Googlebot'
|
654
693
|
category: 'Search bot'
|
655
694
|
url: 'http://www.google.com/bot.html'
|
@@ -663,7 +702,7 @@
|
|
663
702
|
url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
|
664
703
|
producer:
|
665
704
|
name: 'The Internet Archive'
|
666
|
-
url: '
|
705
|
+
url: 'https://archive.org'
|
667
706
|
|
668
707
|
- regex: 'HubSpot '
|
669
708
|
name: 'HubSpot'
|
@@ -672,7 +711,6 @@
|
|
672
711
|
name: 'HubSpot Inc.'
|
673
712
|
url: 'https://www.hubspot.com'
|
674
713
|
|
675
|
-
|
676
714
|
- regex: 'HTTPMon'
|
677
715
|
name: 'HTTPMon'
|
678
716
|
category: 'Site Monitor'
|
@@ -704,7 +742,7 @@
|
|
704
742
|
|
705
743
|
- regex: 'ips-agent'
|
706
744
|
name: 'IPS Agent'
|
707
|
-
category: '
|
745
|
+
category: 'Crawler'
|
708
746
|
producer:
|
709
747
|
name: 'VeriSign, Inc'
|
710
748
|
url: 'http://www.verisign.com/'
|
@@ -717,6 +755,10 @@
|
|
717
755
|
name: ''
|
718
756
|
url: 'https://ip-guide.com'
|
719
757
|
|
758
|
+
- regex: 'k6/[0-9\.]+'
|
759
|
+
name: 'K6'
|
760
|
+
url: 'https://k6.io/'
|
761
|
+
|
720
762
|
- regex: 'kouio'
|
721
763
|
name: 'Kouio'
|
722
764
|
url: 'http://kouio.com/'
|
@@ -741,7 +783,7 @@
|
|
741
783
|
name: 'Lighthouse'
|
742
784
|
url: 'https://developers.google.com/web/tools/lighthouse'
|
743
785
|
|
744
|
-
- regex: 'linkdexbot
|
786
|
+
- regex: 'linkdexbot|linkdex\.com'
|
745
787
|
name: 'Linkdex Bot'
|
746
788
|
category: 'Search bot'
|
747
789
|
url: 'http://www.linkdex.com/bots'
|
@@ -764,7 +806,7 @@
|
|
764
806
|
name: ''
|
765
807
|
url: ''
|
766
808
|
|
767
|
-
- regex: 'Mail\.RU
|
809
|
+
- regex: 'Mail\.RU'
|
768
810
|
name: 'Mail.Ru Bot'
|
769
811
|
category: 'Search bot'
|
770
812
|
url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
|
@@ -788,7 +830,7 @@
|
|
788
830
|
name: ''
|
789
831
|
url: ''
|
790
832
|
|
791
|
-
- regex
|
833
|
+
- regex: 'masscan'
|
792
834
|
name: 'masscan'
|
793
835
|
url: 'https://github.com/robertdavidgraham/masscan'
|
794
836
|
category: 'Crawler'
|
@@ -941,12 +983,12 @@
|
|
941
983
|
category: 'Crawler'
|
942
984
|
producer:
|
943
985
|
name: 'Nuzzel'
|
944
|
-
url: https://www.nuzzel.com/
|
986
|
+
url: 'https://www.nuzzel.com/'
|
945
987
|
|
946
988
|
- regex: 'Octopus [0-9]'
|
947
989
|
name: 'Octopus'
|
948
990
|
|
949
|
-
- regex: 'omgili
|
991
|
+
- regex: 'omgili'
|
950
992
|
name: 'Omgili bot'
|
951
993
|
category: 'Search bot'
|
952
994
|
url: 'http://www.omgili.com/Crawler.html'
|
@@ -1023,7 +1065,15 @@
|
|
1023
1065
|
name: 'Bitlove'
|
1024
1066
|
url: 'http://bitlove.org/'
|
1025
1067
|
|
1026
|
-
- regex: '
|
1068
|
+
- regex: 'PRTG Network Monitor'
|
1069
|
+
name: 'PRTG Network Monitor'
|
1070
|
+
category: 'Network Monitor'
|
1071
|
+
url: 'https://www.paessler.com/prtg'
|
1072
|
+
producer:
|
1073
|
+
name: 'Paessler AG'
|
1074
|
+
url: 'https://www.paessler.com'
|
1075
|
+
|
1076
|
+
- regex: 'psbot'
|
1027
1077
|
name: 'Picsearch bot'
|
1028
1078
|
category: 'Search bot'
|
1029
1079
|
url: 'http://www.picsearch.com/bot.html'
|
@@ -1031,7 +1081,7 @@
|
|
1031
1081
|
name: 'Picsearch'
|
1032
1082
|
url: 'http://www.picsearch.com'
|
1033
1083
|
|
1034
|
-
- regex: 'Pingdom
|
1084
|
+
- regex: 'Pingdom(?:\.com|TMS)'
|
1035
1085
|
name: 'Pingdom Bot'
|
1036
1086
|
category: 'Site Monitor'
|
1037
1087
|
url: ''
|
@@ -1047,6 +1097,14 @@
|
|
1047
1097
|
name: 'Quora'
|
1048
1098
|
url: 'http://www.quora.com'
|
1049
1099
|
|
1100
|
+
- regex: 'Quora-Bot'
|
1101
|
+
name: 'Quora Bot'
|
1102
|
+
category: 'Crawler'
|
1103
|
+
url: ''
|
1104
|
+
producer:
|
1105
|
+
name: 'Quora'
|
1106
|
+
url: 'https://www.quora.com/'
|
1107
|
+
|
1050
1108
|
- regex: 'RamblerMail'
|
1051
1109
|
name: 'RamblerMail Image Proxy'
|
1052
1110
|
category: 'Crawler'
|
@@ -1275,6 +1333,14 @@
|
|
1275
1333
|
name: 'Tencent Holdings'
|
1276
1334
|
url: 'http://www.soso.com'
|
1277
1335
|
|
1336
|
+
- regex: 'Sprinklr'
|
1337
|
+
name: 'Sprinklr'
|
1338
|
+
category: 'Crawler'
|
1339
|
+
url: ''
|
1340
|
+
producer:
|
1341
|
+
name: 'Sprinklr, Inc.'
|
1342
|
+
url: 'https://www.sprinklr.com/'
|
1343
|
+
|
1278
1344
|
- regex: 'sqlmap/'
|
1279
1345
|
name: 'sqlmap'
|
1280
1346
|
category: 'Security Checker'
|
@@ -1320,13 +1386,20 @@
|
|
1320
1386
|
name: 'Tailrank Inc'
|
1321
1387
|
url: 'http://spinn3r.com'
|
1322
1388
|
|
1323
|
-
- regex: '
|
1389
|
+
- regex: 'SputnikBot'
|
1324
1390
|
name: 'Sputnik Bot'
|
1325
|
-
category: ''
|
1391
|
+
category: 'Crawler'
|
1392
|
+
url: ''
|
1393
|
+
|
1394
|
+
- regex: 'SputnikFaviconBot'
|
1395
|
+
name: 'Sputnik Favicon Bot'
|
1396
|
+
category: 'Crawler'
|
1397
|
+
url: ''
|
1398
|
+
|
1399
|
+
- regex: 'SputnikImageBot'
|
1400
|
+
name: 'Sputnik Image Bot'
|
1401
|
+
category: 'Crawler'
|
1326
1402
|
url: ''
|
1327
|
-
producer:
|
1328
|
-
name: ''
|
1329
|
-
url: ''
|
1330
1403
|
|
1331
1404
|
- regex: 'SurveyBot'
|
1332
1405
|
name: 'Survey Bot'
|
@@ -1545,7 +1618,7 @@
|
|
1545
1618
|
category: 'Site Monitor'
|
1546
1619
|
url: 'https://www.webpagetest.org'
|
1547
1620
|
|
1548
|
-
- regex: 'WeSEE
|
1621
|
+
- regex: 'WeSEE'
|
1549
1622
|
name: 'WeSEE:Search'
|
1550
1623
|
category: 'Search bot'
|
1551
1624
|
url: 'http://www.wesee.com/bot'
|
@@ -1617,6 +1690,14 @@
|
|
1617
1690
|
name: 'Yahoo! Inc.'
|
1618
1691
|
url: 'http://www.yahoo.com'
|
1619
1692
|
|
1693
|
+
- regex: 'YahooMailProxy'
|
1694
|
+
name: 'Yahoo! Mail Proxy'
|
1695
|
+
category: 'Service Agent'
|
1696
|
+
url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
|
1697
|
+
producer:
|
1698
|
+
name: 'Yahoo! Inc.'
|
1699
|
+
url: 'http://www.yahoo.com'
|
1700
|
+
|
1620
1701
|
- regex: 'YahooCacheSystem'
|
1621
1702
|
name: 'Yahoo! Cache System'
|
1622
1703
|
category: 'Crawler'
|
@@ -1633,7 +1714,7 @@
|
|
1633
1714
|
name: 'Yahoo! Japan Corp.'
|
1634
1715
|
url: 'https://www.yahoo.co.jp/'
|
1635
1716
|
|
1636
|
-
- regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News
|
1717
|
+
- regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
|
1637
1718
|
name: 'Yandex Bot'
|
1638
1719
|
category: 'Search bot'
|
1639
1720
|
url: 'http://www.yandex.com/bots'
|
@@ -1718,18 +1799,18 @@
|
|
1718
1799
|
- regex: 'HubPages.*crawlingpolicy'
|
1719
1800
|
name: 'HubPages'
|
1720
1801
|
category: 'Crawler'
|
1721
|
-
url: '
|
1802
|
+
url: 'https://hubpages.com/help/crawlingpolicy'
|
1722
1803
|
producer:
|
1723
|
-
name: 'HubPages'
|
1724
|
-
url: '
|
1804
|
+
name: 'HubPages, Inc.'
|
1805
|
+
url: 'https://discover.hubpages.com/'
|
1725
1806
|
|
1726
1807
|
- regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
|
1727
1808
|
name: 'Pinterest'
|
1728
|
-
url: '
|
1809
|
+
url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
|
1729
1810
|
category: 'Crawler'
|
1730
1811
|
producer:
|
1731
1812
|
name: 'Pinterest'
|
1732
|
-
url: '
|
1813
|
+
url: 'https://www.pinterest.com/'
|
1733
1814
|
|
1734
1815
|
- regex: 'Site24x7'
|
1735
1816
|
name: 'Site24x7 Website Monitoring'
|
@@ -1771,13 +1852,13 @@
|
|
1771
1852
|
name: 'Monitor.Us'
|
1772
1853
|
url: 'http://www.monitor.us'
|
1773
1854
|
|
1774
|
-
- regex: 'Catchpoint
|
1855
|
+
- regex: 'Catchpoint'
|
1775
1856
|
name: 'Catchpoint'
|
1776
1857
|
category: 'Site Monitor'
|
1777
|
-
url: ''
|
1858
|
+
url: 'https://www.catchpoint.com/'
|
1778
1859
|
producer:
|
1779
1860
|
name: 'Catchpoint Systems'
|
1780
|
-
url: '
|
1861
|
+
url: 'https://www.catchpoint.com/'
|
1781
1862
|
|
1782
1863
|
- regex: 'bitlybot'
|
1783
1864
|
name: 'BitlyBot'
|
@@ -1845,7 +1926,7 @@
|
|
1845
1926
|
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
1846
1927
|
name: 'RSSRadio Bot'
|
1847
1928
|
|
1848
|
-
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
|
1929
|
+
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent)'
|
1849
1930
|
name: 'Generic Bot'
|
1850
1931
|
|
1851
1932
|
- regex: '^sentry'
|
@@ -1955,10 +2036,10 @@
|
|
1955
2036
|
- regex: 'BoardReader Favicon Fetcher'
|
1956
2037
|
name: 'BoardReader'
|
1957
2038
|
category: 'Search bot'
|
1958
|
-
url: '
|
2039
|
+
url: 'https://boardreader.com/'
|
1959
2040
|
producer:
|
1960
2041
|
name: 'Effyis Inc'
|
1961
|
-
url: '
|
2042
|
+
url: 'https://boardreader.com/'
|
1962
2043
|
|
1963
2044
|
- regex: 'IDG/IT'
|
1964
2045
|
name: 'IDG/IT'
|
@@ -2003,7 +2084,7 @@
|
|
2003
2084
|
- regex: 'oBot'
|
2004
2085
|
name: 'oBot'
|
2005
2086
|
category: 'Search bot'
|
2006
|
-
url: '
|
2087
|
+
url: 'https://www.xforce-security.com/crawler/'
|
2007
2088
|
producer:
|
2008
2089
|
name: 'IBM Germany Research & Development GmbH'
|
2009
2090
|
url: 'https://exchange.xforce.ibmcloud.com/'
|
@@ -2046,7 +2127,7 @@
|
|
2046
2127
|
url: 'https://nutch.apache.org'
|
2047
2128
|
producer:
|
2048
2129
|
name: 'The Apache Software Foundation'
|
2049
|
-
url: '
|
2130
|
+
url: 'https://www.apache.org/foundation/'
|
2050
2131
|
|
2051
2132
|
- regex: 'Seobility'
|
2052
2133
|
name: 'Seobility'
|
@@ -2061,7 +2142,7 @@
|
|
2061
2142
|
- regex: 'Grammarly'
|
2062
2143
|
name: 'Grammarly'
|
2063
2144
|
category: 'Service bot'
|
2064
|
-
url: '
|
2145
|
+
url: 'https://www.grammarly.com'
|
2065
2146
|
|
2066
2147
|
- regex: 'Robozilla'
|
2067
2148
|
name: 'Robozilla'
|
@@ -2080,7 +2161,7 @@
|
|
2080
2161
|
- regex: 'SerendeputyBot'
|
2081
2162
|
name: 'Serendeputy Bot'
|
2082
2163
|
category: 'Crawler'
|
2083
|
-
url: '
|
2164
|
+
url: 'https://serendeputy.com/about/serendeputy-bot'
|
2084
2165
|
|
2085
2166
|
- regex: 'ias-va.*admantx.*service-fetcher'
|
2086
2167
|
name: 'ADmantX Service Fetcher'
|
@@ -2102,7 +2183,419 @@
|
|
2102
2183
|
category: 'Crawler'
|
2103
2184
|
url: 'http://www.exensa.com/crawl'
|
2104
2185
|
|
2105
|
-
|
2186
|
+
- regex: 'BDCbot'
|
2187
|
+
name: 'BDCbot'
|
2188
|
+
category: 'Crawler'
|
2189
|
+
url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
|
2190
|
+
producer:
|
2191
|
+
name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
|
2192
|
+
url: 'https://bigdatacorp.com.br/'
|
2193
|
+
|
2194
|
+
- regex: 'adbeat'
|
2195
|
+
name: 'Adbeat'
|
2196
|
+
category: 'Crawler'
|
2197
|
+
url: 'https://www.adbeat.com/operation_policy'
|
2198
|
+
producer:
|
2199
|
+
name: 'PPC Labs LLC'
|
2200
|
+
url: 'https://www.adbeat.com/'
|
2201
|
+
|
2202
|
+
- regex: 'BW/(?:(\d+[\.\d]+))'
|
2203
|
+
name: 'BuiltWith'
|
2204
|
+
category: 'Crawler'
|
2205
|
+
url: 'https://builtwith.com/biup'
|
2206
|
+
producer:
|
2207
|
+
name: 'BuiltWith Pty Ltd'
|
2208
|
+
url: 'https://builtwith.com/'
|
2209
|
+
|
2210
|
+
- regex: 'https://whatis.contentkingapp.com'
|
2211
|
+
name: 'ContentKing'
|
2212
|
+
category: 'Site Monitor'
|
2213
|
+
url: 'https://whatis.contentkingapp.com/'
|
2214
|
+
producer:
|
2215
|
+
name: 'ContentKing BV'
|
2216
|
+
url: 'https://www.contentkingapp.com/'
|
2217
|
+
|
2218
|
+
- regex: 'MicroAdBot'
|
2219
|
+
name: 'MicroAdBot'
|
2220
|
+
category: 'Crawler'
|
2221
|
+
url: 'https://www.microad.co.jp/'
|
2222
|
+
producer:
|
2223
|
+
name: 'MicroAd, Inc.'
|
2224
|
+
url: 'https://www.microad.co.jp/'
|
2225
|
+
|
2226
|
+
- regex: 'PingAdmin.Ru'
|
2227
|
+
name: 'PingAdmin.Ru'
|
2228
|
+
category: 'Site Monitor'
|
2229
|
+
url: 'https://ping-admin.ru/'
|
2230
|
+
|
2231
|
+
- regex: 'notifyninja.+monitoring'
|
2232
|
+
name: 'Notify Ninja'
|
2233
|
+
category: 'Site Monitor'
|
2234
|
+
url: 'http://notifyninja.com'
|
2235
|
+
|
2236
|
+
- regex: 'WebDataStats'
|
2237
|
+
name: 'WebDataStats'
|
2238
|
+
category: 'Crawler'
|
2239
|
+
url: 'https://webdatastats.com/policy.html'
|
2240
|
+
producer:
|
2241
|
+
name: 'WebTehRazrabotka LLC'
|
2242
|
+
url: 'https://webdatastats.com/'
|
2243
|
+
|
2244
|
+
- regex: 'parse.ly scraper'
|
2245
|
+
name: 'parse.ly'
|
2246
|
+
category: 'Crawler'
|
2247
|
+
url: 'https://www.parse.ly/help/integration/crawler'
|
2248
|
+
producer:
|
2249
|
+
name: 'Parsely, Inc.'
|
2250
|
+
url: 'https://www.parse.ly/'
|
2251
|
+
|
2252
|
+
- regex: 'Nimbostratus-Bot'
|
2253
|
+
name: 'Nimbostratus Bot'
|
2254
|
+
category: 'Site Monitor'
|
2255
|
+
url: 'http://cloudsystemnetworks.com'
|
2256
|
+
|
2257
|
+
- regex: 'HeartRails_Capture/\d'
|
2258
|
+
name: 'Heart Rails Capture'
|
2259
|
+
category: 'Service Agent'
|
2260
|
+
url: 'http://capture.heartrails.com'
|
2261
|
+
|
2262
|
+
- regex: 'Project-Resonance'
|
2263
|
+
name: 'Project Resonance'
|
2264
|
+
category: 'Crawler'
|
2265
|
+
url: 'http://project-resonance.com'
|
2266
|
+
|
2267
|
+
- regex: 'DataXu/\d'
|
2268
|
+
name: 'DataXu'
|
2269
|
+
category: 'Service Agent'
|
2270
|
+
url: 'https://advertising.roku.com/dataxu'
|
2271
|
+
producer:
|
2272
|
+
name: 'Roku, Inc.'
|
2273
|
+
url: 'https://roku.com'
|
2274
|
+
|
2275
|
+
- regex: 'Cocolyzebot'
|
2276
|
+
name: 'Cocolyzebot'
|
2277
|
+
category: 'Crawler'
|
2278
|
+
url: 'https://cocolyze.com/en/cocolyzebot'
|
2279
|
+
producer:
|
2280
|
+
name: 'VSI INNOVATION SAS'
|
2281
|
+
url: 'https://vsi-innovation.com/'
|
2282
|
+
|
2283
|
+
- regex: 'veryhip'
|
2284
|
+
name: 'VeryHip'
|
2285
|
+
category: 'Crawler'
|
2286
|
+
url: 'https://veryhip.com/'
|
2287
|
+
producer:
|
2288
|
+
name: 'VeryHip'
|
2289
|
+
url: 'https://veryhip.com/'
|
2290
|
+
|
2291
|
+
- regex: 'LinkpadBot'
|
2292
|
+
name: 'LinkpadBot'
|
2293
|
+
category: 'Crawler'
|
2294
|
+
url: 'https://www.linkpad.org/'
|
2295
|
+
producer:
|
2296
|
+
name: 'Solomono LLC'
|
2297
|
+
url: 'https://www.linkpad.org/'
|
2298
|
+
|
2299
|
+
- regex: 'MuscatFerret'
|
2300
|
+
name: 'MuscatFerret'
|
2301
|
+
category: 'Crawler'
|
2302
|
+
url: 'http://www.webtop.com/'
|
2303
|
+
|
2304
|
+
- regex: 'PageThing.com'
|
2305
|
+
name: 'PageThing'
|
2306
|
+
category: 'Crawler'
|
2307
|
+
url: 'https://www.pagething.com/'
|
2308
|
+
producer:
|
2309
|
+
name: 'SPECIALNOISE LTD'
|
2310
|
+
url: 'https://www.specialnoise.com/'
|
2311
|
+
|
2312
|
+
- regex: 'ArchiveBox'
|
2313
|
+
name: 'ArchiveBox'
|
2314
|
+
url: 'https://archivebox.io/'
|
2315
|
+
category: 'Crawler'
|
2316
|
+
producer:
|
2317
|
+
name: ''
|
2318
|
+
url: ''
|
2319
|
+
|
2320
|
+
- regex: 'Choosito'
|
2321
|
+
name: 'Choosito'
|
2322
|
+
url: 'https://www.choosito.com/'
|
2323
|
+
category: 'Crawler'
|
2324
|
+
producer:
|
2325
|
+
name: 'Choosito! Inc.'
|
2326
|
+
url: 'https://www.choosito.com/'
|
2327
|
+
|
2328
|
+
- regex: 'datagnionbot'
|
2329
|
+
name: 'datagnionbot'
|
2330
|
+
url: 'https://www.datagnion.com/bot.html'
|
2331
|
+
category: 'Crawler'
|
2332
|
+
producer:
|
2333
|
+
name: 'DATAGNION GMBH'
|
2334
|
+
url: 'https://www.datagnion.com/'
|
2335
|
+
|
2336
|
+
- regex: 'WhatCMS'
|
2337
|
+
name: 'WhatCMS'
|
2338
|
+
url: 'https://whatcms.org/'
|
2339
|
+
category: 'Crawler'
|
2340
|
+
producer:
|
2341
|
+
name: 'Nineteen Ten LLC'
|
2342
|
+
url: 'https://whatcms.org/'
|
2343
|
+
|
2344
|
+
- regex: 'httpx'
|
2345
|
+
name: 'httpx'
|
2346
|
+
url: 'https://github.com/projectdiscovery/httpx'
|
2347
|
+
category: 'Crawler'
|
2348
|
+
producer:
|
2349
|
+
name: ''
|
2350
|
+
url: ''
|
2351
|
+
|
2352
|
+
- regex: 'scaninfo@expanseinc.com'
|
2353
|
+
name: 'Expanse'
|
2354
|
+
category: 'Security Checker'
|
2355
|
+
url: 'https://expanse.co/'
|
2356
|
+
producer:
|
2357
|
+
name: 'Expanse Inc.'
|
2358
|
+
url: 'https://expanse.co/'
|
2359
|
+
|
2360
|
+
- regex: 'HuaweiWebCatBot'
|
2361
|
+
name: 'HuaweiWebCatBot'
|
2362
|
+
category: 'Crawler'
|
2363
|
+
url: 'https://isecurity.huawei.com'
|
2364
|
+
producer:
|
2365
|
+
name: 'Huawei Technologies Co., Ltd.'
|
2366
|
+
url: 'https://huawei.com'
|
2367
|
+
|
2368
|
+
- regex: 'Hatena-Favicon'
|
2369
|
+
name: 'Hatena Favicon'
|
2370
|
+
category: 'Crawler'
|
2371
|
+
url: 'https://www.hatena.ne.jp/faq/'
|
2372
|
+
producer:
|
2373
|
+
name: 'Hatena Co., Ltd.'
|
2374
|
+
url: 'https://www.hatena.ne.jp'
|
2375
|
+
|
2376
|
+
- regex: 'RyowlEngine/(\d+)'
|
2377
|
+
name: 'Ryowl'
|
2378
|
+
category: 'Crawler'
|
2379
|
+
url: 'https://ryowl.org'
|
2380
|
+
|
2381
|
+
- regex: 'OdklBot/(\d+)'
|
2382
|
+
name: 'Odnoklassniki Bot'
|
2383
|
+
category: 'Crawler'
|
2384
|
+
url: 'https://odnoklassniki.ru'
|
2385
|
+
|
2386
|
+
- regex: 'Mediatoolkitbot'
|
2387
|
+
name: 'Mediatoolkit Bot'
|
2388
|
+
category: 'Crawler'
|
2389
|
+
url: 'https://mediatoolkit.com'
|
2390
|
+
|
2391
|
+
- regex: 'ZoominfoBot'
|
2392
|
+
name: 'ZoominfoBot'
|
2393
|
+
category: 'Crawler'
|
2394
|
+
url: 'https://www.zoominfo.com'
|
2395
|
+
|
2396
|
+
- regex: 'WeViKaBot/([\d+\.])'
|
2397
|
+
name: 'WeViKaBot'
|
2398
|
+
category: 'Crawler'
|
2399
|
+
url: 'http://www.wevika.de'
|
2400
|
+
|
2401
|
+
- regex: 'SEOkicks'
|
2402
|
+
name: 'SEOkicks'
|
2403
|
+
category: 'Crawler'
|
2404
|
+
url: 'https://www.seokicks.de/robot.html'
|
2405
|
+
|
2406
|
+
- regex: 'Plukkie/([\d+\.])'
|
2407
|
+
name: 'Plukkie'
|
2408
|
+
category: 'Crawler'
|
2409
|
+
url: 'http://www.botje.com/plukkie.htm'
|
2410
|
+
|
2411
|
+
- regex: 'proximic;'
|
2412
|
+
name: 'Comscore'
|
2413
|
+
category: 'Crawler'
|
2414
|
+
url: 'https://www.comscore.com/Web-Crawler'
|
2415
|
+
|
2416
|
+
- regex: 'SurdotlyBot/([\d+\.])'
|
2417
|
+
name: 'SurdotlyBot'
|
2418
|
+
category: 'Crawler'
|
2419
|
+
url: 'http://sur.ly/bot.html'
|
2420
|
+
|
2421
|
+
- regex: 'Gowikibot/([\d+\.])'
|
2422
|
+
name: 'Gowikibot'
|
2423
|
+
category: 'Crawler'
|
2424
|
+
url: 'http:/www.gowikibot.com'
|
2425
|
+
|
2426
|
+
- regex: 'SabsimBot/([\d+\.])'
|
2427
|
+
name: 'SabsimBot'
|
2428
|
+
category: 'Crawler'
|
2429
|
+
url: 'https://sabsim.com'
|
2430
|
+
|
2431
|
+
- regex: 'LumtelBot/([\d+\.])'
|
2432
|
+
name: 'LumtelBot'
|
2433
|
+
category: 'Crawler'
|
2434
|
+
url: 'https://umtel.com'
|
2435
|
+
|
2436
|
+
- regex: 'PiplBot'
|
2437
|
+
name: 'PiplBot'
|
2438
|
+
category: 'Crawler'
|
2439
|
+
url: 'http://www.pipl.com/bot'
|
2440
|
+
|
2441
|
+
- regex: 'woobot/([\d+\.])'
|
2442
|
+
name: 'WooRank'
|
2443
|
+
category: 'Crawler'
|
2444
|
+
url: 'https://www.woorank.com/bot'
|
2445
|
+
|
2446
|
+
- regex: 'Cookiebot/([\d+\.])'
|
2447
|
+
name: 'Cookiebot'
|
2448
|
+
category: 'Crawler'
|
2449
|
+
url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
|
2450
|
+
producer:
|
2451
|
+
name: 'Cybot A/S'
|
2452
|
+
url: 'https://www.cybot.com/'
|
2453
|
+
|
2454
|
+
- regex: 'NetSystemsResearch'
|
2455
|
+
name: 'NetSystemsResearch'
|
2456
|
+
category: 'Security Checker'
|
2457
|
+
url: 'https://www.netsystemsresearch.com/'
|
2458
|
+
producer:
|
2459
|
+
name: 'NET SYSTEMS RESEARCH LLC'
|
2460
|
+
url: 'https://www.netsystemsresearch.com/'
|
2461
|
+
|
2462
|
+
- regex: 'CensysInspect/([\d+\.])'
|
2463
|
+
name: 'CensysInspect'
|
2464
|
+
category: 'Security Checker'
|
2465
|
+
url: 'https://about.censys.io/'
|
2466
|
+
producer:
|
2467
|
+
name: 'Censys, Inc.'
|
2468
|
+
url: 'https://censys.io/'
|
2106
2469
|
|
2470
|
+
- regex: 'gdnplus.com'
|
2471
|
+
name: 'GDNP'
|
2472
|
+
category: 'Crawler'
|
2473
|
+
url: 'https://gdnplus.com/'
|
2474
|
+
producer:
|
2475
|
+
name: 'Global Digital Network Plus, LLC'
|
2476
|
+
url: 'https://gdnplus.com/'
|
2477
|
+
|
2478
|
+
- regex: 'WellKnownBot/([\d+\.])'
|
2479
|
+
name: 'WellKnownBot'
|
2480
|
+
category: 'Crawler'
|
2481
|
+
url: 'https://well-known.dev'
|
2482
|
+
|
2483
|
+
- regex: 'Adsbot/([\d+\.])'
|
2484
|
+
name: 'Adsbot'
|
2485
|
+
category: 'Crawler'
|
2486
|
+
url: 'https://seostar.co/robot/'
|
2487
|
+
|
2488
|
+
- regex: 'MTRobot/([\d+\.])'
|
2489
|
+
name: 'MTRobot'
|
2490
|
+
category: 'Crawler'
|
2491
|
+
url: 'https://metrics-tools.de/robot.html'
|
2492
|
+
producer:
|
2493
|
+
name: 'Metrics Tools'
|
2494
|
+
url: 'https://metrics-tools.de/'
|
2495
|
+
|
2496
|
+
- regex: 'serpstatbot/([\d+\.])'
|
2497
|
+
name: 'serpstatbot'
|
2498
|
+
category: 'Crawler'
|
2499
|
+
url: 'http://serpstatbot.com/'
|
2500
|
+
producer:
|
2501
|
+
name: 'Netpeak Ltd'
|
2502
|
+
url: 'https://netpeak.net/'
|
2503
|
+
|
2504
|
+
- regex: 'colly'
|
2505
|
+
name: 'colly'
|
2506
|
+
category: 'Crawler'
|
2507
|
+
url: 'https://github.com/gocolly/colly/'
|
2508
|
+
|
2509
|
+
- regex: 'l9tcpid/v([\d+\.])'
|
2510
|
+
name: 'l9tcpid'
|
2511
|
+
category: 'Security Checker'
|
2512
|
+
url: 'https://github.com/LeakIX/l9tcpid'
|
2513
|
+
|
2514
|
+
- regex: 'MegaIndex.ru/([\d+\.])'
|
2515
|
+
name: 'MegaIndex'
|
2516
|
+
category: 'Crawler'
|
2517
|
+
url: 'https://megaindex.com/crawler'
|
2518
|
+
|
2519
|
+
- regex: 'Seekport'
|
2520
|
+
name: 'Seekport'
|
2521
|
+
category: 'Crawler'
|
2522
|
+
url: 'http://www.seekport.com/'
|
2523
|
+
producer:
|
2524
|
+
name: 'SISTRIX GmbH'
|
2525
|
+
url: 'https://www.sistrix.de/'
|
2526
|
+
|
2527
|
+
- regex: 'seolyt/([\d+\.])'
|
2528
|
+
name: 'seolyt'
|
2529
|
+
category: 'Crawler'
|
2530
|
+
url: 'https://seolyt.com/'
|
2531
|
+
|
2532
|
+
- regex: 'YaK/([\d+\.])'
|
2533
|
+
name: 'YaK'
|
2534
|
+
category: 'Crawler'
|
2535
|
+
url: 'https://www.linkfluence.com/'
|
2536
|
+
producer:
|
2537
|
+
name: 'Linkfluence SAS'
|
2538
|
+
url: 'https://www.linkfluence.com/'
|
2539
|
+
|
2540
|
+
- regex: 'KomodiaBot/([\d+\.])'
|
2541
|
+
name: 'KomodiaBot'
|
2542
|
+
category: 'Crawler'
|
2543
|
+
url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
|
2544
|
+
producer:
|
2545
|
+
name: 'Komodia Inc.'
|
2546
|
+
url: 'https://www.komodia.com/'
|
2547
|
+
|
2548
|
+
- regex: 'Neevabot/([\d+\.])'
|
2549
|
+
name: 'Neevabot'
|
2550
|
+
category: 'Search bot'
|
2551
|
+
url: 'https://neeva.com/neevabot'
|
2552
|
+
producer:
|
2553
|
+
name: 'Neeva Inc.'
|
2554
|
+
url: 'https://neeva.com/'
|
2555
|
+
|
2556
|
+
- regex: 'LinkPreview/([\d+\.])'
|
2557
|
+
name: 'LinkPreview'
|
2558
|
+
category: 'Service Agent'
|
2559
|
+
url: 'https://www.linkpreview.net/'
|
2560
|
+
|
2561
|
+
- regex: 'JungleKeyThumbnail/([\d+\.])'
|
2562
|
+
name: 'JungleKeyThumbnail'
|
2563
|
+
category: 'Crawler'
|
2564
|
+
url: 'https://junglekey.com/'
|
2565
|
+
|
2566
|
+
- regex: 'rocketmonitor(?: |bot/)([\d+\.])'
|
2567
|
+
name: 'RocketMonitorBot'
|
2568
|
+
category: 'Site Monitor'
|
2569
|
+
url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
|
2570
|
+
producer:
|
2571
|
+
name: 'Radio Mast, Inc.'
|
2572
|
+
url: 'https://www.radiomast.io/'
|
2573
|
+
|
2574
|
+
- regex: 'SitemapParser-VIPnytt/([\d+\.])'
|
2575
|
+
name: 'SitemapParser-VIPnytt'
|
2576
|
+
category: 'Crawler'
|
2577
|
+
url: 'https://github.com/VIPnytt/SitemapParser/'
|
2578
|
+
|
2579
|
+
|
2580
|
+
- regex: '^Turnitin'
|
2581
|
+
name: 'Turnitin'
|
2582
|
+
category: 'Crawler'
|
2583
|
+
url: 'https://turnitin.com/robot/crawlerinfo.html'
|
2584
|
+
|
2585
|
+
- regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
|
2586
|
+
name: 'Dotcom Monitor'
|
2587
|
+
category: 'Site Monitor'
|
2588
|
+
url: 'https://www.dotcom-monitor.com'
|
2589
|
+
|
2590
|
+
- regex: 'ThinkChaos/'
|
2591
|
+
name: 'ThinkChaos'
|
2592
|
+
category: 'Crawler'
|
2593
|
+
|
2594
|
+
- regex: 'DataForSeoBot'
|
2595
|
+
name: 'DataForSeoBot'
|
2596
|
+
category: 'Crawler'
|
2597
|
+
url: 'https://dataforseo.com/dataforseo-bot'
|
2598
|
+
|
2599
|
+
# Generic detections
|
2107
2600
|
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
|
2108
2601
|
name: 'Generic Bot'
|