device_detector 1.0.4 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +8 -7
- data/lib/device_detector/browser.rb +364 -0
- data/lib/device_detector/client.rb +8 -0
- data/lib/device_detector/device.rb +1204 -2
- data/lib/device_detector/os.rb +39 -12
- data/lib/device_detector/parser.rb +1 -1
- data/lib/device_detector/version.rb +1 -1
- data/lib/device_detector/version_extractor.rb +28 -0
- data/lib/device_detector.rb +57 -13
- data/regexes/bots.yml +651 -61
- data/regexes/client/browser_engine.yml +7 -1
- data/regexes/client/browsers.yml +774 -78
- data/regexes/client/feed_readers.yml +4 -10
- data/regexes/client/libraries.yml +71 -2
- data/regexes/client/mediaplayers.yml +25 -1
- data/regexes/client/mobile_apps.yml +923 -73
- data/regexes/client/pim.yml +65 -2
- data/regexes/device/cameras.yml +5 -5
- data/regexes/device/car_browsers.yml +16 -0
- data/regexes/device/consoles.yml +13 -1
- data/regexes/device/mobiles.yml +17029 -3889
- data/regexes/device/notebooks.yml +114 -0
- data/regexes/device/portable_media_player.yml +36 -9
- data/regexes/device/shell_tv.yml +117 -0
- data/regexes/device/televisions.yml +439 -34
- data/regexes/oss.yml +620 -284
- metadata +9 -131
- data/.gitignore +0 -14
- data/.rubocop.yml +0 -49
- data/.ruby-version +0 -1
- data/.travis.yml +0 -14
- data/Gemfile +0 -8
- data/Rakefile +0 -85
- data/device_detector.gemspec +0 -27
- data/spec/device_detector/bot_fixtures_spec.rb +0 -30
- data/spec/device_detector/client_fixtures_spec.rb +0 -31
- data/spec/device_detector/concrete_user_agent_spec.rb +0 -135
- data/spec/device_detector/detector_fixtures_spec.rb +0 -100
- data/spec/device_detector/device_fixtures_spec.rb +0 -36
- data/spec/device_detector/device_spec.rb +0 -131
- data/spec/device_detector/memory_cache_spec.rb +0 -148
- data/spec/device_detector/model_extractor_spec.rb +0 -63
- data/spec/device_detector/os_fixtures_spec.rb +0 -26
- data/spec/device_detector/version_extractor_spec.rb +0 -79
- data/spec/device_detector_spec.rb +0 -189
- data/spec/fixtures/client/browser.yml +0 -2836
- data/spec/fixtures/client/feed_reader.yml +0 -199
- data/spec/fixtures/client/library.yml +0 -193
- data/spec/fixtures/client/mediaplayer.yml +0 -163
- data/spec/fixtures/client/mobile_app.yml +0 -217
- data/spec/fixtures/client/pim.yml +0 -115
- data/spec/fixtures/detector/bots.yml +0 -3726
- data/spec/fixtures/detector/camera.yml +0 -141
- data/spec/fixtures/detector/car_browser.yml +0 -81
- data/spec/fixtures/detector/console.yml +0 -321
- data/spec/fixtures/detector/desktop.yml +0 -5461
- data/spec/fixtures/detector/feature_phone.yml +0 -891
- data/spec/fixtures/detector/feed_reader.yml +0 -551
- data/spec/fixtures/detector/mediaplayer.yml +0 -253
- data/spec/fixtures/detector/mobile_apps.yml +0 -494
- data/spec/fixtures/detector/phablet.yml +0 -4167
- data/spec/fixtures/detector/portable_media_player.yml +0 -178
- data/spec/fixtures/detector/smart_display.yml +0 -61
- data/spec/fixtures/detector/smart_speaker.yml +0 -55
- data/spec/fixtures/detector/smartphone-1.yml +0 -9927
- data/spec/fixtures/detector/smartphone-10.yml +0 -9977
- data/spec/fixtures/detector/smartphone-11.yml +0 -9891
- data/spec/fixtures/detector/smartphone-12.yml +0 -9906
- data/spec/fixtures/detector/smartphone-13.yml +0 -9920
- data/spec/fixtures/detector/smartphone-14.yml +0 -2662
- data/spec/fixtures/detector/smartphone-2.yml +0 -9992
- data/spec/fixtures/detector/smartphone-3.yml +0 -9945
- data/spec/fixtures/detector/smartphone-4.yml +0 -9923
- data/spec/fixtures/detector/smartphone-5.yml +0 -9914
- data/spec/fixtures/detector/smartphone-6.yml +0 -9962
- data/spec/fixtures/detector/smartphone-7.yml +0 -9899
- data/spec/fixtures/detector/smartphone-8.yml +0 -9931
- data/spec/fixtures/detector/smartphone-9.yml +0 -9899
- data/spec/fixtures/detector/smartphone.yml +0 -9984
- data/spec/fixtures/detector/tablet-1.yml +0 -9995
- data/spec/fixtures/detector/tablet-2.yml +0 -9977
- data/spec/fixtures/detector/tablet-3.yml +0 -9959
- data/spec/fixtures/detector/tablet-4.yml +0 -4528
- data/spec/fixtures/detector/tablet.yml +0 -9971
- data/spec/fixtures/detector/tv.yml +0 -4933
- data/spec/fixtures/detector/unknown.yml +0 -3236
- data/spec/fixtures/detector/wearable.yml +0 -61
- data/spec/fixtures/device/camera.yml +0 -19
- data/spec/fixtures/device/car_browser.yml +0 -13
- data/spec/fixtures/device/console.yml +0 -79
- data/spec/fixtures/parser/oss.yml +0 -1082
- data/spec/fixtures/parser/vendorfragments.yml +0 -168
- data/spec/spec_helper.rb +0 -9
data/regexes/bots.yml
CHANGED
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
|
6
6
|
###############
|
|
7
7
|
|
|
8
|
-
- regex: '360Spider
|
|
8
|
+
- regex: '360Spider'
|
|
9
9
|
name: '360Spider'
|
|
10
10
|
category: 'Search bot'
|
|
11
|
-
url: '
|
|
11
|
+
url: 'https://www.so.com/help/help_3_2.html'
|
|
12
12
|
producer:
|
|
13
13
|
name: 'Online Media Group, Inc.'
|
|
14
14
|
url: ''
|
|
@@ -40,26 +40,34 @@
|
|
|
40
40
|
- regex: 'AhrefsBot'
|
|
41
41
|
name: 'aHrefs Bot'
|
|
42
42
|
category: 'Crawler'
|
|
43
|
-
url: '
|
|
43
|
+
url: 'https://ahrefs.com/robot'
|
|
44
44
|
producer:
|
|
45
45
|
name: 'Ahrefs Pte Ltd'
|
|
46
|
-
url: '
|
|
46
|
+
url: 'https://ahrefs.com/robot'
|
|
47
47
|
|
|
48
48
|
- regex: 'ia_archiver|alexabot|verifybot'
|
|
49
49
|
name: 'Alexa Crawler'
|
|
50
50
|
category: 'Search bot'
|
|
51
|
-
url: 'https://alexa.
|
|
51
|
+
url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
|
|
52
52
|
producer:
|
|
53
53
|
name: 'Alexa Internet'
|
|
54
|
-
url: '
|
|
54
|
+
url: 'https://www.alexa.com'
|
|
55
55
|
|
|
56
56
|
- regex: 'alexa site audit'
|
|
57
57
|
name: 'Alexa Site Audit'
|
|
58
58
|
category: 'Site Monitor'
|
|
59
|
-
url: '
|
|
59
|
+
url: 'https://support.alexa.com/hc/en-us/articles/200450194'
|
|
60
60
|
producer:
|
|
61
61
|
name: 'Alexa Internet'
|
|
62
|
-
url: '
|
|
62
|
+
url: 'https://www.alexa.com'
|
|
63
|
+
|
|
64
|
+
- regex: 'Amazonbot'
|
|
65
|
+
name: 'Amazon Bot'
|
|
66
|
+
category: 'Crawler'
|
|
67
|
+
url: 'https://developer.amazon.com/support/amazonbot'
|
|
68
|
+
producer:
|
|
69
|
+
name: 'Amazon.com, Inc.'
|
|
70
|
+
url: 'https://www.amazon.com/'
|
|
63
71
|
|
|
64
72
|
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
|
|
65
73
|
name: 'Amazon Route53 Health Check'
|
|
@@ -82,23 +90,31 @@
|
|
|
82
90
|
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
|
|
83
91
|
producer:
|
|
84
92
|
name: 'The Apache Software Foundation'
|
|
85
|
-
url: '
|
|
93
|
+
url: 'https://www.apache.org/foundation/'
|
|
86
94
|
|
|
87
95
|
- regex: 'Applebot'
|
|
88
96
|
name: 'Applebot'
|
|
89
97
|
category: 'Crawler'
|
|
90
|
-
url: '
|
|
98
|
+
url: 'https://support.apple.com/en-us/HT204683'
|
|
91
99
|
producer:
|
|
92
100
|
name: 'Apple Inc'
|
|
93
|
-
url: '
|
|
101
|
+
url: 'https://www.apple.com'
|
|
102
|
+
|
|
103
|
+
- regex: "AppSignalBot"
|
|
104
|
+
name: "AppSignalBot"
|
|
105
|
+
category: "Site Monitor"
|
|
106
|
+
url: "https://docs.appsignal.com/uptime-monitoring/"
|
|
107
|
+
producer:
|
|
108
|
+
name: "AppSignal"
|
|
109
|
+
url: "https://appsignal.com/"
|
|
94
110
|
|
|
95
111
|
- regex: 'Arachni'
|
|
96
112
|
name: 'Arachni'
|
|
97
113
|
category: 'Security Checker'
|
|
98
|
-
url: '
|
|
114
|
+
url: 'https://www.arachni-scanner.com/'
|
|
99
115
|
producer:
|
|
100
116
|
name: 'Sarosys LLC'
|
|
101
|
-
url: '
|
|
117
|
+
url: 'https://www.sarosys.com/'
|
|
102
118
|
|
|
103
119
|
- regex: 'AspiegelBot'
|
|
104
120
|
name: 'AspiegelBot'
|
|
@@ -112,7 +128,7 @@
|
|
|
112
128
|
name: 'Castro 2'
|
|
113
129
|
category: 'Service Agent'
|
|
114
130
|
url: 'http://supertop.co/castro/'
|
|
115
|
-
producer:
|
|
131
|
+
producer:
|
|
116
132
|
name: 'Supertop'
|
|
117
133
|
url: 'http://supertop.co'
|
|
118
134
|
|
|
@@ -127,10 +143,10 @@
|
|
|
127
143
|
- regex: 'archive\.org_bot|special_archiver'
|
|
128
144
|
name: 'archive.org bot'
|
|
129
145
|
category: 'Crawler'
|
|
130
|
-
url: '
|
|
146
|
+
url: 'https://archive.org/details/archive.org_bot'
|
|
131
147
|
producer:
|
|
132
148
|
name: 'The Internet Archive'
|
|
133
|
-
url: '
|
|
149
|
+
url: 'https://archive.org'
|
|
134
150
|
|
|
135
151
|
- regex: 'Ask Jeeves/Teoma'
|
|
136
152
|
name: 'Ask Jeeves'
|
|
@@ -156,7 +172,7 @@
|
|
|
156
172
|
name: '2.0Promotion GbR'
|
|
157
173
|
url: 'http://www.backlinktest.com'
|
|
158
174
|
|
|
159
|
-
- regex: '
|
|
175
|
+
- regex: 'Baidu.*spider|baidu Transcoder'
|
|
160
176
|
name: 'Baidu Spider'
|
|
161
177
|
category: 'Search bot'
|
|
162
178
|
url: 'http://www.baidu.com/search/spider.htm'
|
|
@@ -172,6 +188,14 @@
|
|
|
172
188
|
name: ''
|
|
173
189
|
url: ''
|
|
174
190
|
|
|
191
|
+
- regex: 'Better Uptime Bot'
|
|
192
|
+
name: 'Better Uptime Bot'
|
|
193
|
+
category: 'Site Monitor'
|
|
194
|
+
url: 'https://betteruptime.com/faq'
|
|
195
|
+
producer:
|
|
196
|
+
name: 'Better Uptime'
|
|
197
|
+
url: 'https://betteruptime.com/'
|
|
198
|
+
|
|
175
199
|
- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
|
|
176
200
|
name: 'BingBot'
|
|
177
201
|
category: 'Search bot'
|
|
@@ -188,7 +212,7 @@
|
|
|
188
212
|
name: 'Blekko'
|
|
189
213
|
url: 'http://blekko.com'
|
|
190
214
|
|
|
191
|
-
- regex: 'BLEXBot
|
|
215
|
+
- regex: 'BLEXBot'
|
|
192
216
|
name: 'BLEXBot Crawler'
|
|
193
217
|
category: 'Crawler'
|
|
194
218
|
url: 'http://webmeup-crawler.com'
|
|
@@ -217,7 +241,7 @@
|
|
|
217
241
|
category: 'Crawler'
|
|
218
242
|
producer:
|
|
219
243
|
name: 'BoardReader'
|
|
220
|
-
url: '
|
|
244
|
+
url: 'https://boardreader.com/'
|
|
221
245
|
|
|
222
246
|
- regex: 'BountiiBot'
|
|
223
247
|
name: 'Bountii Bot'
|
|
@@ -283,6 +307,14 @@
|
|
|
283
307
|
name: 'CloudFlare'
|
|
284
308
|
url: 'http://www.cloudflare.com'
|
|
285
309
|
|
|
310
|
+
- regex: 'CloudflareDiagnostics'
|
|
311
|
+
name: 'Cloudflare Diagnostics'
|
|
312
|
+
category: 'Site Monitor'
|
|
313
|
+
url: 'https://www.cloudflare.com/'
|
|
314
|
+
producer:
|
|
315
|
+
name: 'Cloudflare'
|
|
316
|
+
url: 'https://www.cloudflare.com'
|
|
317
|
+
|
|
286
318
|
- regex: 'CloudFlare-AlwaysOnline'
|
|
287
319
|
name: 'CloudFlare Always Online'
|
|
288
320
|
category: 'Site Monitor'
|
|
@@ -291,7 +323,7 @@
|
|
|
291
323
|
name: 'CloudFlare'
|
|
292
324
|
url: 'http://www.cloudflare.com'
|
|
293
325
|
|
|
294
|
-
- regex: 'coccoc
|
|
326
|
+
- regex: 'coccoc.com'
|
|
295
327
|
name: 'Cốc Cốc Bot'
|
|
296
328
|
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
|
|
297
329
|
category: 'Search bot'
|
|
@@ -339,7 +371,6 @@
|
|
|
339
371
|
name: 'Datanyze'
|
|
340
372
|
url: 'https://www.datanyze.com'
|
|
341
373
|
|
|
342
|
-
|
|
343
374
|
- regex: 'Dataprovider'
|
|
344
375
|
name: 'Dataprovider'
|
|
345
376
|
category: 'Crawler'
|
|
@@ -364,7 +395,7 @@
|
|
|
364
395
|
name: 'DAZOO.FR'
|
|
365
396
|
url: 'http://dazoo.fr'
|
|
366
397
|
|
|
367
|
-
- regex: 'discobot
|
|
398
|
+
- regex: 'discobot'
|
|
368
399
|
name: 'Discobot'
|
|
369
400
|
category: 'Search bot'
|
|
370
401
|
url: 'http://discoveryengine.com/discobot.html'
|
|
@@ -427,7 +458,7 @@
|
|
|
427
458
|
name: 'eVenture Capital Partners II, LLC'
|
|
428
459
|
url: 'http://www.eventures.vc/'
|
|
429
460
|
|
|
430
|
-
- regex: 'Exabot
|
|
461
|
+
- regex: 'Exabot|ExaleadCloudview'
|
|
431
462
|
name: 'ExaBot'
|
|
432
463
|
category: 'Crawler'
|
|
433
464
|
url: 'http://www.exabot.com/go/robot'
|
|
@@ -451,7 +482,7 @@
|
|
|
451
482
|
name: 'SEOmoz, Inc.'
|
|
452
483
|
url: 'http://moz.com/'
|
|
453
484
|
|
|
454
|
-
- regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
|
|
485
|
+
- regex: 'facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog'
|
|
455
486
|
name: 'Facebook External Hit'
|
|
456
487
|
category: 'Social Media Agent'
|
|
457
488
|
url: 'https://www.facebook.com/externalhit_uatext.php'
|
|
@@ -483,7 +514,7 @@
|
|
|
483
514
|
name: 'David Smith & Developing Perspective, LLC'
|
|
484
515
|
url: 'https://david-smith.org'
|
|
485
516
|
|
|
486
|
-
- regex: '
|
|
517
|
+
- regex: 'Feedly'
|
|
487
518
|
name: 'Feedly'
|
|
488
519
|
url: 'http://www.feedly.com'
|
|
489
520
|
category: 'Feed Fetcher'
|
|
@@ -549,6 +580,10 @@
|
|
|
549
580
|
name: ''
|
|
550
581
|
url: ''
|
|
551
582
|
|
|
583
|
+
- regex: 'gobuster'
|
|
584
|
+
name: 'Gobuster'
|
|
585
|
+
url: 'https://github.com/OJ/gobuster'
|
|
586
|
+
|
|
552
587
|
- regex: 'ichiro/mobile goo'
|
|
553
588
|
name: 'Goo'
|
|
554
589
|
category: 'Search bot'
|
|
@@ -557,6 +592,10 @@
|
|
|
557
592
|
name: 'NTT Resonant'
|
|
558
593
|
url: 'http://goo.ne.jp'
|
|
559
594
|
|
|
595
|
+
- regex: 'Storebot-Google'
|
|
596
|
+
name: 'Google StoreBot'
|
|
597
|
+
category: 'Crawler'
|
|
598
|
+
|
|
560
599
|
- regex: 'Google Favicon'
|
|
561
600
|
name: 'Google Favicon'
|
|
562
601
|
category: 'Crawler'
|
|
@@ -649,7 +688,7 @@
|
|
|
649
688
|
name: 'Visual Meta'
|
|
650
689
|
url: 'https://www.shopalike.cz/'
|
|
651
690
|
|
|
652
|
-
- regex: 'AdsBot-Google
|
|
691
|
+
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
|
|
653
692
|
name: 'Googlebot'
|
|
654
693
|
category: 'Search bot'
|
|
655
694
|
url: 'http://www.google.com/bot.html'
|
|
@@ -663,7 +702,7 @@
|
|
|
663
702
|
url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
|
|
664
703
|
producer:
|
|
665
704
|
name: 'The Internet Archive'
|
|
666
|
-
url: '
|
|
705
|
+
url: 'https://archive.org'
|
|
667
706
|
|
|
668
707
|
- regex: 'HubSpot '
|
|
669
708
|
name: 'HubSpot'
|
|
@@ -672,7 +711,6 @@
|
|
|
672
711
|
name: 'HubSpot Inc.'
|
|
673
712
|
url: 'https://www.hubspot.com'
|
|
674
713
|
|
|
675
|
-
|
|
676
714
|
- regex: 'HTTPMon'
|
|
677
715
|
name: 'HTTPMon'
|
|
678
716
|
category: 'Site Monitor'
|
|
@@ -704,7 +742,7 @@
|
|
|
704
742
|
|
|
705
743
|
- regex: 'ips-agent'
|
|
706
744
|
name: 'IPS Agent'
|
|
707
|
-
category: '
|
|
745
|
+
category: 'Crawler'
|
|
708
746
|
producer:
|
|
709
747
|
name: 'VeriSign, Inc'
|
|
710
748
|
url: 'http://www.verisign.com/'
|
|
@@ -717,6 +755,10 @@
|
|
|
717
755
|
name: ''
|
|
718
756
|
url: 'https://ip-guide.com'
|
|
719
757
|
|
|
758
|
+
- regex: 'k6/[0-9\.]+'
|
|
759
|
+
name: 'K6'
|
|
760
|
+
url: 'https://k6.io/'
|
|
761
|
+
|
|
720
762
|
- regex: 'kouio'
|
|
721
763
|
name: 'Kouio'
|
|
722
764
|
url: 'http://kouio.com/'
|
|
@@ -741,7 +783,7 @@
|
|
|
741
783
|
name: 'Lighthouse'
|
|
742
784
|
url: 'https://developers.google.com/web/tools/lighthouse'
|
|
743
785
|
|
|
744
|
-
- regex: 'linkdexbot
|
|
786
|
+
- regex: 'linkdexbot|linkdex\.com'
|
|
745
787
|
name: 'Linkdex Bot'
|
|
746
788
|
category: 'Search bot'
|
|
747
789
|
url: 'http://www.linkdex.com/bots'
|
|
@@ -764,7 +806,7 @@
|
|
|
764
806
|
name: ''
|
|
765
807
|
url: ''
|
|
766
808
|
|
|
767
|
-
- regex: 'Mail\.RU
|
|
809
|
+
- regex: 'Mail\.RU'
|
|
768
810
|
name: 'Mail.Ru Bot'
|
|
769
811
|
category: 'Search bot'
|
|
770
812
|
url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
|
|
@@ -788,7 +830,7 @@
|
|
|
788
830
|
name: ''
|
|
789
831
|
url: ''
|
|
790
832
|
|
|
791
|
-
- regex
|
|
833
|
+
- regex: 'masscan'
|
|
792
834
|
name: 'masscan'
|
|
793
835
|
url: 'https://github.com/robertdavidgraham/masscan'
|
|
794
836
|
category: 'Crawler'
|
|
@@ -941,12 +983,12 @@
|
|
|
941
983
|
category: 'Crawler'
|
|
942
984
|
producer:
|
|
943
985
|
name: 'Nuzzel'
|
|
944
|
-
url: https://www.nuzzel.com/
|
|
986
|
+
url: 'https://www.nuzzel.com/'
|
|
945
987
|
|
|
946
988
|
- regex: 'Octopus [0-9]'
|
|
947
989
|
name: 'Octopus'
|
|
948
990
|
|
|
949
|
-
- regex: 'omgili
|
|
991
|
+
- regex: 'omgili'
|
|
950
992
|
name: 'Omgili bot'
|
|
951
993
|
category: 'Search bot'
|
|
952
994
|
url: 'http://www.omgili.com/Crawler.html'
|
|
@@ -1023,7 +1065,15 @@
|
|
|
1023
1065
|
name: 'Bitlove'
|
|
1024
1066
|
url: 'http://bitlove.org/'
|
|
1025
1067
|
|
|
1026
|
-
- regex: '
|
|
1068
|
+
- regex: 'PRTG Network Monitor'
|
|
1069
|
+
name: 'PRTG Network Monitor'
|
|
1070
|
+
category: 'Network Monitor'
|
|
1071
|
+
url: 'https://www.paessler.com/prtg'
|
|
1072
|
+
producer:
|
|
1073
|
+
name: 'Paessler AG'
|
|
1074
|
+
url: 'https://www.paessler.com'
|
|
1075
|
+
|
|
1076
|
+
- regex: 'psbot'
|
|
1027
1077
|
name: 'Picsearch bot'
|
|
1028
1078
|
category: 'Search bot'
|
|
1029
1079
|
url: 'http://www.picsearch.com/bot.html'
|
|
@@ -1031,7 +1081,7 @@
|
|
|
1031
1081
|
name: 'Picsearch'
|
|
1032
1082
|
url: 'http://www.picsearch.com'
|
|
1033
1083
|
|
|
1034
|
-
- regex: 'Pingdom
|
|
1084
|
+
- regex: 'Pingdom(?:\.com|TMS)'
|
|
1035
1085
|
name: 'Pingdom Bot'
|
|
1036
1086
|
category: 'Site Monitor'
|
|
1037
1087
|
url: ''
|
|
@@ -1047,6 +1097,14 @@
|
|
|
1047
1097
|
name: 'Quora'
|
|
1048
1098
|
url: 'http://www.quora.com'
|
|
1049
1099
|
|
|
1100
|
+
- regex: 'Quora-Bot'
|
|
1101
|
+
name: 'Quora Bot'
|
|
1102
|
+
category: 'Crawler'
|
|
1103
|
+
url: ''
|
|
1104
|
+
producer:
|
|
1105
|
+
name: 'Quora'
|
|
1106
|
+
url: 'https://www.quora.com/'
|
|
1107
|
+
|
|
1050
1108
|
- regex: 'RamblerMail'
|
|
1051
1109
|
name: 'RamblerMail Image Proxy'
|
|
1052
1110
|
category: 'Crawler'
|
|
@@ -1230,7 +1288,7 @@
|
|
|
1230
1288
|
name: 'SISTRIX GmbH'
|
|
1231
1289
|
url: 'http://www.sistrix.de'
|
|
1232
1290
|
|
|
1233
|
-
- regex: 'SISTRIX Optimizer'
|
|
1291
|
+
- regex: 'compatible; (?:SISTRIX )?Optimizer'
|
|
1234
1292
|
name: 'SISTRIX Optimizer'
|
|
1235
1293
|
category: 'Crawler'
|
|
1236
1294
|
url: 'https://optimizer.sistrix.com'
|
|
@@ -1275,6 +1333,14 @@
|
|
|
1275
1333
|
name: 'Tencent Holdings'
|
|
1276
1334
|
url: 'http://www.soso.com'
|
|
1277
1335
|
|
|
1336
|
+
- regex: 'Sprinklr'
|
|
1337
|
+
name: 'Sprinklr'
|
|
1338
|
+
category: 'Crawler'
|
|
1339
|
+
url: ''
|
|
1340
|
+
producer:
|
|
1341
|
+
name: 'Sprinklr, Inc.'
|
|
1342
|
+
url: 'https://www.sprinklr.com/'
|
|
1343
|
+
|
|
1278
1344
|
- regex: 'sqlmap/'
|
|
1279
1345
|
name: 'sqlmap'
|
|
1280
1346
|
category: 'Security Checker'
|
|
@@ -1320,13 +1386,20 @@
|
|
|
1320
1386
|
name: 'Tailrank Inc'
|
|
1321
1387
|
url: 'http://spinn3r.com'
|
|
1322
1388
|
|
|
1323
|
-
- regex: '
|
|
1389
|
+
- regex: 'SputnikBot'
|
|
1324
1390
|
name: 'Sputnik Bot'
|
|
1325
|
-
category: ''
|
|
1391
|
+
category: 'Crawler'
|
|
1392
|
+
url: ''
|
|
1393
|
+
|
|
1394
|
+
- regex: 'SputnikFaviconBot'
|
|
1395
|
+
name: 'Sputnik Favicon Bot'
|
|
1396
|
+
category: 'Crawler'
|
|
1397
|
+
url: ''
|
|
1398
|
+
|
|
1399
|
+
- regex: 'SputnikImageBot'
|
|
1400
|
+
name: 'Sputnik Image Bot'
|
|
1401
|
+
category: 'Crawler'
|
|
1326
1402
|
url: ''
|
|
1327
|
-
producer:
|
|
1328
|
-
name: ''
|
|
1329
|
-
url: ''
|
|
1330
1403
|
|
|
1331
1404
|
- regex: 'SurveyBot'
|
|
1332
1405
|
name: 'Survey Bot'
|
|
@@ -1545,7 +1618,7 @@
|
|
|
1545
1618
|
category: 'Site Monitor'
|
|
1546
1619
|
url: 'https://www.webpagetest.org'
|
|
1547
1620
|
|
|
1548
|
-
- regex: 'WeSEE
|
|
1621
|
+
- regex: 'WeSEE'
|
|
1549
1622
|
name: 'WeSEE:Search'
|
|
1550
1623
|
category: 'Search bot'
|
|
1551
1624
|
url: 'http://www.wesee.com/bot'
|
|
@@ -1617,6 +1690,14 @@
|
|
|
1617
1690
|
name: 'Yahoo! Inc.'
|
|
1618
1691
|
url: 'http://www.yahoo.com'
|
|
1619
1692
|
|
|
1693
|
+
- regex: 'YahooMailProxy'
|
|
1694
|
+
name: 'Yahoo! Mail Proxy'
|
|
1695
|
+
category: 'Service Agent'
|
|
1696
|
+
url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
|
|
1697
|
+
producer:
|
|
1698
|
+
name: 'Yahoo! Inc.'
|
|
1699
|
+
url: 'http://www.yahoo.com'
|
|
1700
|
+
|
|
1620
1701
|
- regex: 'YahooCacheSystem'
|
|
1621
1702
|
name: 'Yahoo! Cache System'
|
|
1622
1703
|
category: 'Crawler'
|
|
@@ -1633,7 +1714,7 @@
|
|
|
1633
1714
|
name: 'Yahoo! Japan Corp.'
|
|
1634
1715
|
url: 'https://www.yahoo.co.jp/'
|
|
1635
1716
|
|
|
1636
|
-
- regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News
|
|
1717
|
+
- regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
|
|
1637
1718
|
name: 'Yandex Bot'
|
|
1638
1719
|
category: 'Search bot'
|
|
1639
1720
|
url: 'http://www.yandex.com/bots'
|
|
@@ -1641,7 +1722,7 @@
|
|
|
1641
1722
|
name: 'Yandex LLC'
|
|
1642
1723
|
url: 'http://company.yandex.com'
|
|
1643
1724
|
|
|
1644
|
-
- regex: 'Yeti|NaverJapan'
|
|
1725
|
+
- regex: 'Yeti|NaverJapan|AdsBot-Naver'
|
|
1645
1726
|
name: 'Yeti/Naverbot'
|
|
1646
1727
|
category: 'Search bot'
|
|
1647
1728
|
url: 'http://help.naver.com/robots/'
|
|
@@ -1718,18 +1799,18 @@
|
|
|
1718
1799
|
- regex: 'HubPages.*crawlingpolicy'
|
|
1719
1800
|
name: 'HubPages'
|
|
1720
1801
|
category: 'Crawler'
|
|
1721
|
-
url: '
|
|
1802
|
+
url: 'https://hubpages.com/help/crawlingpolicy'
|
|
1722
1803
|
producer:
|
|
1723
|
-
name: 'HubPages'
|
|
1724
|
-
url: '
|
|
1804
|
+
name: 'HubPages, Inc.'
|
|
1805
|
+
url: 'https://discover.hubpages.com/'
|
|
1725
1806
|
|
|
1726
1807
|
- regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
|
|
1727
1808
|
name: 'Pinterest'
|
|
1728
|
-
url: '
|
|
1809
|
+
url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
|
|
1729
1810
|
category: 'Crawler'
|
|
1730
1811
|
producer:
|
|
1731
1812
|
name: 'Pinterest'
|
|
1732
|
-
url: '
|
|
1813
|
+
url: 'https://www.pinterest.com/'
|
|
1733
1814
|
|
|
1734
1815
|
- regex: 'Site24x7'
|
|
1735
1816
|
name: 'Site24x7 Website Monitoring'
|
|
@@ -1771,13 +1852,13 @@
|
|
|
1771
1852
|
name: 'Monitor.Us'
|
|
1772
1853
|
url: 'http://www.monitor.us'
|
|
1773
1854
|
|
|
1774
|
-
- regex: 'Catchpoint
|
|
1855
|
+
- regex: 'Catchpoint'
|
|
1775
1856
|
name: 'Catchpoint'
|
|
1776
1857
|
category: 'Site Monitor'
|
|
1777
|
-
url: ''
|
|
1858
|
+
url: 'https://www.catchpoint.com/'
|
|
1778
1859
|
producer:
|
|
1779
1860
|
name: 'Catchpoint Systems'
|
|
1780
|
-
url: '
|
|
1861
|
+
url: 'https://www.catchpoint.com/'
|
|
1781
1862
|
|
|
1782
1863
|
- regex: 'bitlybot'
|
|
1783
1864
|
name: 'BitlyBot'
|
|
@@ -1845,7 +1926,7 @@
|
|
|
1845
1926
|
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
|
1846
1927
|
name: 'RSSRadio Bot'
|
|
1847
1928
|
|
|
1848
|
-
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
|
|
1929
|
+
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
|
|
1849
1930
|
name: 'Generic Bot'
|
|
1850
1931
|
|
|
1851
1932
|
- regex: '^sentry'
|
|
@@ -1955,10 +2036,10 @@
|
|
|
1955
2036
|
- regex: 'BoardReader Favicon Fetcher'
|
|
1956
2037
|
name: 'BoardReader'
|
|
1957
2038
|
category: 'Search bot'
|
|
1958
|
-
url: '
|
|
2039
|
+
url: 'https://boardreader.com/'
|
|
1959
2040
|
producer:
|
|
1960
2041
|
name: 'Effyis Inc'
|
|
1961
|
-
url: '
|
|
2042
|
+
url: 'https://boardreader.com/'
|
|
1962
2043
|
|
|
1963
2044
|
- regex: 'IDG/IT'
|
|
1964
2045
|
name: 'IDG/IT'
|
|
@@ -2003,7 +2084,7 @@
|
|
|
2003
2084
|
- regex: 'oBot'
|
|
2004
2085
|
name: 'oBot'
|
|
2005
2086
|
category: 'Search bot'
|
|
2006
|
-
url: '
|
|
2087
|
+
url: 'https://www.xforce-security.com/crawler/'
|
|
2007
2088
|
producer:
|
|
2008
2089
|
name: 'IBM Germany Research & Development GmbH'
|
|
2009
2090
|
url: 'https://exchange.xforce.ibmcloud.com/'
|
|
@@ -2040,15 +2121,524 @@
|
|
|
2040
2121
|
name: 'Carbon60 Operating Co. Ltd.'
|
|
2041
2122
|
url: 'https://www.carbon60.com/'
|
|
2042
2123
|
|
|
2043
|
-
# Generic detections
|
|
2044
|
-
|
|
2045
2124
|
- regex: 'Nutch'
|
|
2046
2125
|
name: 'Nutch-based Bot'
|
|
2047
2126
|
category: 'Crawler'
|
|
2048
2127
|
url: 'https://nutch.apache.org'
|
|
2049
2128
|
producer:
|
|
2050
2129
|
name: 'The Apache Software Foundation'
|
|
2051
|
-
url: '
|
|
2130
|
+
url: 'https://www.apache.org/foundation/'
|
|
2131
|
+
|
|
2132
|
+
- regex: 'Seobility'
|
|
2133
|
+
name: 'Seobility'
|
|
2134
|
+
category: 'Crawler'
|
|
2135
|
+
url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
|
|
2136
|
+
|
|
2137
|
+
- regex: 'Vercelbot'
|
|
2138
|
+
name: 'Vercel Bot'
|
|
2139
|
+
category: 'Service bot'
|
|
2140
|
+
url: 'https://vercel.com'
|
|
2141
|
+
|
|
2142
|
+
- regex: 'Grammarly'
|
|
2143
|
+
name: 'Grammarly'
|
|
2144
|
+
category: 'Service bot'
|
|
2145
|
+
url: 'https://www.grammarly.com'
|
|
2146
|
+
|
|
2147
|
+
- regex: 'Robozilla'
|
|
2148
|
+
name: 'Robozilla'
|
|
2149
|
+
category: 'Crawler'
|
|
2150
|
+
|
|
2151
|
+
- regex: 'Domains Project'
|
|
2152
|
+
name: 'Domains Project'
|
|
2153
|
+
category: 'Crawler'
|
|
2154
|
+
url: 'https://domainsproject.org'
|
|
2155
|
+
|
|
2156
|
+
- regex: 'PetalBot'
|
|
2157
|
+
name: 'Petal Bot'
|
|
2158
|
+
category: 'Crawler'
|
|
2159
|
+
url: 'https://aspiegel.com/petalbot'
|
|
2160
|
+
|
|
2161
|
+
- regex: 'SerendeputyBot'
|
|
2162
|
+
name: 'Serendeputy Bot'
|
|
2163
|
+
category: 'Crawler'
|
|
2164
|
+
url: 'https://serendeputy.com/about/serendeputy-bot'
|
|
2165
|
+
|
|
2166
|
+
- regex: 'ias-va.*admantx.*service-fetcher'
|
|
2167
|
+
name: 'ADmantX Service Fetcher'
|
|
2168
|
+
category: 'Service bot'
|
|
2169
|
+
url: 'https://www.admantx.com/service-fetcher.html'
|
|
2170
|
+
|
|
2171
|
+
- regex: 'SemanticScholarBot'
|
|
2172
|
+
name: 'Semantic Scholar Bot'
|
|
2173
|
+
category: 'Crawler'
|
|
2174
|
+
url: 'https://www.semanticscholar.org/crawler'
|
|
2175
|
+
|
|
2176
|
+
- regex: 'VelenPublicWebCrawler'
|
|
2177
|
+
name: 'Velen Public Web Crawler'
|
|
2178
|
+
category: 'Crawler'
|
|
2179
|
+
url: 'https://hunter.io/robot'
|
|
2180
|
+
|
|
2181
|
+
- regex: 'Barkrowler'
|
|
2182
|
+
name: 'Barkrowler'
|
|
2183
|
+
category: 'Crawler'
|
|
2184
|
+
url: 'http://www.exensa.com/crawl'
|
|
2185
|
+
|
|
2186
|
+
- regex: 'BDCbot'
|
|
2187
|
+
name: 'BDCbot'
|
|
2188
|
+
category: 'Crawler'
|
|
2189
|
+
url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
|
|
2190
|
+
producer:
|
|
2191
|
+
name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
|
|
2192
|
+
url: 'https://bigdatacorp.com.br/'
|
|
2193
|
+
|
|
2194
|
+
- regex: 'adbeat'
|
|
2195
|
+
name: 'Adbeat'
|
|
2196
|
+
category: 'Crawler'
|
|
2197
|
+
url: 'https://www.adbeat.com/operation_policy'
|
|
2198
|
+
producer:
|
|
2199
|
+
name: 'PPC Labs LLC'
|
|
2200
|
+
url: 'https://www.adbeat.com/'
|
|
2201
|
+
|
|
2202
|
+
- regex: 'BW/(?:(\d+[\.\d]+))'
|
|
2203
|
+
name: 'BuiltWith'
|
|
2204
|
+
category: 'Crawler'
|
|
2205
|
+
url: 'https://builtwith.com/biup'
|
|
2206
|
+
producer:
|
|
2207
|
+
name: 'BuiltWith Pty Ltd'
|
|
2208
|
+
url: 'https://builtwith.com/'
|
|
2209
|
+
|
|
2210
|
+
- regex: 'https://whatis.contentkingapp.com'
|
|
2211
|
+
name: 'ContentKing'
|
|
2212
|
+
category: 'Site Monitor'
|
|
2213
|
+
url: 'https://whatis.contentkingapp.com/'
|
|
2214
|
+
producer:
|
|
2215
|
+
name: 'ContentKing BV'
|
|
2216
|
+
url: 'https://www.contentkingapp.com/'
|
|
2217
|
+
|
|
2218
|
+
- regex: 'MicroAdBot'
|
|
2219
|
+
name: 'MicroAdBot'
|
|
2220
|
+
category: 'Crawler'
|
|
2221
|
+
url: 'https://www.microad.co.jp/'
|
|
2222
|
+
producer:
|
|
2223
|
+
name: 'MicroAd, Inc.'
|
|
2224
|
+
url: 'https://www.microad.co.jp/'
|
|
2225
|
+
|
|
2226
|
+
- regex: 'PingAdmin.Ru'
|
|
2227
|
+
name: 'PingAdmin.Ru'
|
|
2228
|
+
category: 'Site Monitor'
|
|
2229
|
+
url: 'https://ping-admin.ru/'
|
|
2230
|
+
|
|
2231
|
+
- regex: 'notifyninja.+monitoring'
|
|
2232
|
+
name: 'Notify Ninja'
|
|
2233
|
+
category: 'Site Monitor'
|
|
2234
|
+
url: 'http://notifyninja.com'
|
|
2235
|
+
|
|
2236
|
+
- regex: 'WebDataStats'
|
|
2237
|
+
name: 'WebDataStats'
|
|
2238
|
+
category: 'Crawler'
|
|
2239
|
+
url: 'https://webdatastats.com/policy.html'
|
|
2240
|
+
producer:
|
|
2241
|
+
name: 'WebTehRazrabotka LLC'
|
|
2242
|
+
url: 'https://webdatastats.com/'
|
|
2243
|
+
|
|
2244
|
+
- regex: 'parse.ly scraper'
|
|
2245
|
+
name: 'parse.ly'
|
|
2246
|
+
category: 'Crawler'
|
|
2247
|
+
url: 'https://www.parse.ly/help/integration/crawler'
|
|
2248
|
+
producer:
|
|
2249
|
+
name: 'Parsely, Inc.'
|
|
2250
|
+
url: 'https://www.parse.ly/'
|
|
2251
|
+
|
|
2252
|
+
- regex: 'Nimbostratus-Bot'
|
|
2253
|
+
name: 'Nimbostratus Bot'
|
|
2254
|
+
category: 'Site Monitor'
|
|
2255
|
+
url: 'http://cloudsystemnetworks.com'
|
|
2052
2256
|
|
|
2053
|
-
- regex: '
|
|
2257
|
+
- regex: 'HeartRails_Capture/\d'
|
|
2258
|
+
name: 'Heart Rails Capture'
|
|
2259
|
+
category: 'Service Agent'
|
|
2260
|
+
url: 'http://capture.heartrails.com'
|
|
2261
|
+
|
|
2262
|
+
- regex: 'Project-Resonance'
|
|
2263
|
+
name: 'Project Resonance'
|
|
2264
|
+
category: 'Crawler'
|
|
2265
|
+
url: 'http://project-resonance.com'
|
|
2266
|
+
|
|
2267
|
+
- regex: 'DataXu/\d'
|
|
2268
|
+
name: 'DataXu'
|
|
2269
|
+
category: 'Service Agent'
|
|
2270
|
+
url: 'https://advertising.roku.com/dataxu'
|
|
2271
|
+
producer:
|
|
2272
|
+
name: 'Roku, Inc.'
|
|
2273
|
+
url: 'https://roku.com'
|
|
2274
|
+
|
|
2275
|
+
- regex: 'Cocolyzebot'
|
|
2276
|
+
name: 'Cocolyzebot'
|
|
2277
|
+
category: 'Crawler'
|
|
2278
|
+
url: 'https://cocolyze.com/en/cocolyzebot'
|
|
2279
|
+
producer:
|
|
2280
|
+
name: 'VSI INNOVATION SAS'
|
|
2281
|
+
url: 'https://vsi-innovation.com/'
|
|
2282
|
+
|
|
2283
|
+
- regex: 'veryhip'
|
|
2284
|
+
name: 'VeryHip'
|
|
2285
|
+
category: 'Crawler'
|
|
2286
|
+
url: 'https://veryhip.com/'
|
|
2287
|
+
producer:
|
|
2288
|
+
name: 'VeryHip'
|
|
2289
|
+
url: 'https://veryhip.com/'
|
|
2290
|
+
|
|
2291
|
+
- regex: 'LinkpadBot'
|
|
2292
|
+
name: 'LinkpadBot'
|
|
2293
|
+
category: 'Crawler'
|
|
2294
|
+
url: 'https://www.linkpad.org/'
|
|
2295
|
+
producer:
|
|
2296
|
+
name: 'Solomono LLC'
|
|
2297
|
+
url: 'https://www.linkpad.org/'
|
|
2298
|
+
|
|
2299
|
+
- regex: 'MuscatFerret'
|
|
2300
|
+
name: 'MuscatFerret'
|
|
2301
|
+
category: 'Crawler'
|
|
2302
|
+
url: 'http://www.webtop.com/'
|
|
2303
|
+
|
|
2304
|
+
- regex: 'PageThing.com'
|
|
2305
|
+
name: 'PageThing'
|
|
2306
|
+
category: 'Crawler'
|
|
2307
|
+
url: 'https://www.pagething.com/'
|
|
2308
|
+
producer:
|
|
2309
|
+
name: 'SPECIALNOISE LTD'
|
|
2310
|
+
url: 'https://www.specialnoise.com/'
|
|
2311
|
+
|
|
2312
|
+
- regex: 'ArchiveBox'
|
|
2313
|
+
name: 'ArchiveBox'
|
|
2314
|
+
url: 'https://archivebox.io/'
|
|
2315
|
+
category: 'Crawler'
|
|
2316
|
+
producer:
|
|
2317
|
+
name: ''
|
|
2318
|
+
url: ''
|
|
2319
|
+
|
|
2320
|
+
- regex: 'Choosito'
|
|
2321
|
+
name: 'Choosito'
|
|
2322
|
+
url: 'https://www.choosito.com/'
|
|
2323
|
+
category: 'Crawler'
|
|
2324
|
+
producer:
|
|
2325
|
+
name: 'Choosito! Inc.'
|
|
2326
|
+
url: 'https://www.choosito.com/'
|
|
2327
|
+
|
|
2328
|
+
- regex: 'datagnionbot'
|
|
2329
|
+
name: 'datagnionbot'
|
|
2330
|
+
url: 'https://www.datagnion.com/bot.html'
|
|
2331
|
+
category: 'Crawler'
|
|
2332
|
+
producer:
|
|
2333
|
+
name: 'DATAGNION GMBH'
|
|
2334
|
+
url: 'https://www.datagnion.com/'
|
|
2335
|
+
|
|
2336
|
+
- regex: 'WhatCMS'
|
|
2337
|
+
name: 'WhatCMS'
|
|
2338
|
+
url: 'https://whatcms.org/'
|
|
2339
|
+
category: 'Crawler'
|
|
2340
|
+
producer:
|
|
2341
|
+
name: 'Nineteen Ten LLC'
|
|
2342
|
+
url: 'https://whatcms.org/'
|
|
2343
|
+
|
|
2344
|
+
- regex: 'httpx'
|
|
2345
|
+
name: 'httpx'
|
|
2346
|
+
url: 'https://github.com/projectdiscovery/httpx'
|
|
2347
|
+
category: 'Crawler'
|
|
2348
|
+
producer:
|
|
2349
|
+
name: ''
|
|
2350
|
+
url: ''
|
|
2351
|
+
|
|
2352
|
+
- regex: 'scaninfo@expanseinc.com'
|
|
2353
|
+
name: 'Expanse'
|
|
2354
|
+
category: 'Security Checker'
|
|
2355
|
+
url: 'https://expanse.co/'
|
|
2356
|
+
producer:
|
|
2357
|
+
name: 'Expanse Inc.'
|
|
2358
|
+
url: 'https://expanse.co/'
|
|
2359
|
+
|
|
2360
|
+
- regex: 'HuaweiWebCatBot'
|
|
2361
|
+
name: 'HuaweiWebCatBot'
|
|
2362
|
+
category: 'Crawler'
|
|
2363
|
+
url: 'https://isecurity.huawei.com'
|
|
2364
|
+
producer:
|
|
2365
|
+
name: 'Huawei Technologies Co., Ltd.'
|
|
2366
|
+
url: 'https://huawei.com'
|
|
2367
|
+
|
|
2368
|
+
- regex: 'Hatena-Favicon'
|
|
2369
|
+
name: 'Hatena Favicon'
|
|
2370
|
+
category: 'Crawler'
|
|
2371
|
+
url: 'https://www.hatena.ne.jp/faq/'
|
|
2372
|
+
producer:
|
|
2373
|
+
name: 'Hatena Co., Ltd.'
|
|
2374
|
+
url: 'https://www.hatena.ne.jp'
|
|
2375
|
+
|
|
2376
|
+
- regex: 'RyowlEngine/(\d+)'
|
|
2377
|
+
name: 'Ryowl'
|
|
2378
|
+
category: 'Crawler'
|
|
2379
|
+
url: 'https://ryowl.org'
|
|
2380
|
+
|
|
2381
|
+
- regex: 'OdklBot/(\d+)'
|
|
2382
|
+
name: 'Odnoklassniki Bot'
|
|
2383
|
+
category: 'Crawler'
|
|
2384
|
+
url: 'https://odnoklassniki.ru'
|
|
2385
|
+
|
|
2386
|
+
- regex: 'Mediatoolkitbot'
|
|
2387
|
+
name: 'Mediatoolkit Bot'
|
|
2388
|
+
category: 'Crawler'
|
|
2389
|
+
url: 'https://mediatoolkit.com'
|
|
2390
|
+
|
|
2391
|
+
- regex: 'ZoominfoBot'
|
|
2392
|
+
name: 'ZoominfoBot'
|
|
2393
|
+
category: 'Crawler'
|
|
2394
|
+
url: 'https://www.zoominfo.com'
|
|
2395
|
+
|
|
2396
|
+
- regex: 'WeViKaBot/([\d+\.])'
|
|
2397
|
+
name: 'WeViKaBot'
|
|
2398
|
+
category: 'Crawler'
|
|
2399
|
+
url: 'http://www.wevika.de'
|
|
2400
|
+
|
|
2401
|
+
- regex: 'SEOkicks'
|
|
2402
|
+
name: 'SEOkicks'
|
|
2403
|
+
category: 'Crawler'
|
|
2404
|
+
url: 'https://www.seokicks.de/robot.html'
|
|
2405
|
+
|
|
2406
|
+
- regex: 'Plukkie/([\d+\.])'
|
|
2407
|
+
name: 'Plukkie'
|
|
2408
|
+
category: 'Crawler'
|
|
2409
|
+
url: 'http://www.botje.com/plukkie.htm'
|
|
2410
|
+
|
|
2411
|
+
- regex: 'proximic;'
|
|
2412
|
+
name: 'Comscore'
|
|
2413
|
+
category: 'Crawler'
|
|
2414
|
+
url: 'https://www.comscore.com/Web-Crawler'
|
|
2415
|
+
|
|
2416
|
+
- regex: 'SurdotlyBot/([\d+\.])'
|
|
2417
|
+
name: 'SurdotlyBot'
|
|
2418
|
+
category: 'Crawler'
|
|
2419
|
+
url: 'http://sur.ly/bot.html'
|
|
2420
|
+
|
|
2421
|
+
- regex: 'Gowikibot/([\d+\.])'
|
|
2422
|
+
name: 'Gowikibot'
|
|
2423
|
+
category: 'Crawler'
|
|
2424
|
+
url: 'http:/www.gowikibot.com'
|
|
2425
|
+
|
|
2426
|
+
- regex: 'SabsimBot/([\d+\.])'
|
|
2427
|
+
name: 'SabsimBot'
|
|
2428
|
+
category: 'Crawler'
|
|
2429
|
+
url: 'https://sabsim.com'
|
|
2430
|
+
|
|
2431
|
+
- regex: 'LumtelBot/([\d+\.])'
|
|
2432
|
+
name: 'LumtelBot'
|
|
2433
|
+
category: 'Crawler'
|
|
2434
|
+
url: 'https://umtel.com'
|
|
2435
|
+
|
|
2436
|
+
- regex: 'PiplBot'
|
|
2437
|
+
name: 'PiplBot'
|
|
2438
|
+
category: 'Crawler'
|
|
2439
|
+
url: 'http://www.pipl.com/bot'
|
|
2440
|
+
|
|
2441
|
+
- regex: 'woobot/([\d+\.])'
|
|
2442
|
+
name: 'WooRank'
|
|
2443
|
+
category: 'Crawler'
|
|
2444
|
+
url: 'https://www.woorank.com/bot'
|
|
2445
|
+
|
|
2446
|
+
- regex: 'Cookiebot/([\d+\.])'
|
|
2447
|
+
name: 'Cookiebot'
|
|
2448
|
+
category: 'Crawler'
|
|
2449
|
+
url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
|
|
2450
|
+
producer:
|
|
2451
|
+
name: 'Cybot A/S'
|
|
2452
|
+
url: 'https://www.cybot.com/'
|
|
2453
|
+
|
|
2454
|
+
- regex: 'NetSystemsResearch'
|
|
2455
|
+
name: 'NetSystemsResearch'
|
|
2456
|
+
category: 'Security Checker'
|
|
2457
|
+
url: 'https://www.netsystemsresearch.com/'
|
|
2458
|
+
producer:
|
|
2459
|
+
name: 'NET SYSTEMS RESEARCH LLC'
|
|
2460
|
+
url: 'https://www.netsystemsresearch.com/'
|
|
2461
|
+
|
|
2462
|
+
- regex: 'CensysInspect/([\d+\.])'
|
|
2463
|
+
name: 'CensysInspect'
|
|
2464
|
+
category: 'Security Checker'
|
|
2465
|
+
url: 'https://about.censys.io/'
|
|
2466
|
+
producer:
|
|
2467
|
+
name: 'Censys, Inc.'
|
|
2468
|
+
url: 'https://censys.io/'
|
|
2469
|
+
|
|
2470
|
+
- regex: 'gdnplus.com'
|
|
2471
|
+
name: 'GDNP'
|
|
2472
|
+
category: 'Crawler'
|
|
2473
|
+
url: 'https://gdnplus.com/'
|
|
2474
|
+
producer:
|
|
2475
|
+
name: 'Global Digital Network Plus, LLC'
|
|
2476
|
+
url: 'https://gdnplus.com/'
|
|
2477
|
+
|
|
2478
|
+
- regex: 'WellKnownBot/([\d+\.])'
|
|
2479
|
+
name: 'WellKnownBot'
|
|
2480
|
+
category: 'Crawler'
|
|
2481
|
+
url: 'https://well-known.dev'
|
|
2482
|
+
|
|
2483
|
+
- regex: 'Adsbot/([\d+\.])'
|
|
2484
|
+
name: 'Adsbot'
|
|
2485
|
+
category: 'Crawler'
|
|
2486
|
+
url: 'https://seostar.co/robot/'
|
|
2487
|
+
|
|
2488
|
+
- regex: 'MTRobot/([\d+\.])'
|
|
2489
|
+
name: 'MTRobot'
|
|
2490
|
+
category: 'Crawler'
|
|
2491
|
+
url: 'https://metrics-tools.de/robot.html'
|
|
2492
|
+
producer:
|
|
2493
|
+
name: 'Metrics Tools'
|
|
2494
|
+
url: 'https://metrics-tools.de/'
|
|
2495
|
+
|
|
2496
|
+
- regex: 'serpstatbot/([\d+\.])'
|
|
2497
|
+
name: 'serpstatbot'
|
|
2498
|
+
category: 'Crawler'
|
|
2499
|
+
url: 'http://serpstatbot.com/'
|
|
2500
|
+
producer:
|
|
2501
|
+
name: 'Netpeak Ltd'
|
|
2502
|
+
url: 'https://netpeak.net/'
|
|
2503
|
+
|
|
2504
|
+
- regex: 'colly'
|
|
2505
|
+
name: 'colly'
|
|
2506
|
+
category: 'Crawler'
|
|
2507
|
+
url: 'https://github.com/gocolly/colly/'
|
|
2508
|
+
|
|
2509
|
+
- regex: 'l9tcpid/v([\d+\.])'
|
|
2510
|
+
name: 'l9tcpid'
|
|
2511
|
+
category: 'Security Checker'
|
|
2512
|
+
url: 'https://github.com/LeakIX/l9tcpid'
|
|
2513
|
+
|
|
2514
|
+
- regex: 'MegaIndex.ru/([\d+\.])'
|
|
2515
|
+
name: 'MegaIndex'
|
|
2516
|
+
category: 'Crawler'
|
|
2517
|
+
url: 'https://megaindex.com/crawler'
|
|
2518
|
+
|
|
2519
|
+
- regex: 'Seekport'
|
|
2520
|
+
name: 'Seekport'
|
|
2521
|
+
category: 'Crawler'
|
|
2522
|
+
url: 'http://www.seekport.com/'
|
|
2523
|
+
producer:
|
|
2524
|
+
name: 'SISTRIX GmbH'
|
|
2525
|
+
url: 'https://www.sistrix.de/'
|
|
2526
|
+
|
|
2527
|
+
- regex: 'seolyt/([\d+\.])'
|
|
2528
|
+
name: 'seolyt'
|
|
2529
|
+
category: 'Crawler'
|
|
2530
|
+
url: 'https://seolyt.com/'
|
|
2531
|
+
|
|
2532
|
+
- regex: 'YaK/([\d+\.])'
|
|
2533
|
+
name: 'YaK'
|
|
2534
|
+
category: 'Crawler'
|
|
2535
|
+
url: 'https://www.linkfluence.com/'
|
|
2536
|
+
producer:
|
|
2537
|
+
name: 'Linkfluence SAS'
|
|
2538
|
+
url: 'https://www.linkfluence.com/'
|
|
2539
|
+
|
|
2540
|
+
- regex: 'KomodiaBot/([\d+\.])'
|
|
2541
|
+
name: 'KomodiaBot'
|
|
2542
|
+
category: 'Crawler'
|
|
2543
|
+
url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
|
|
2544
|
+
producer:
|
|
2545
|
+
name: 'Komodia Inc.'
|
|
2546
|
+
url: 'https://www.komodia.com/'
|
|
2547
|
+
|
|
2548
|
+
- regex: 'Neevabot/([\d+\.])'
|
|
2549
|
+
name: 'Neevabot'
|
|
2550
|
+
category: 'Search bot'
|
|
2551
|
+
url: 'https://neeva.com/neevabot'
|
|
2552
|
+
producer:
|
|
2553
|
+
name: 'Neeva Inc.'
|
|
2554
|
+
url: 'https://neeva.com/'
|
|
2555
|
+
|
|
2556
|
+
- regex: 'LinkPreview/([\d+\.])'
|
|
2557
|
+
name: 'LinkPreview'
|
|
2558
|
+
category: 'Service Agent'
|
|
2559
|
+
url: 'https://www.linkpreview.net/'
|
|
2560
|
+
|
|
2561
|
+
- regex: 'JungleKeyThumbnail/([\d+\.])'
|
|
2562
|
+
name: 'JungleKeyThumbnail'
|
|
2563
|
+
category: 'Crawler'
|
|
2564
|
+
url: 'https://junglekey.com/'
|
|
2565
|
+
|
|
2566
|
+
- regex: 'rocketmonitor(?: |bot/)([\d+\.])'
|
|
2567
|
+
name: 'RocketMonitorBot'
|
|
2568
|
+
category: 'Site Monitor'
|
|
2569
|
+
url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
|
|
2570
|
+
producer:
|
|
2571
|
+
name: 'Radio Mast, Inc.'
|
|
2572
|
+
url: 'https://www.radiomast.io/'
|
|
2573
|
+
|
|
2574
|
+
- regex: 'SitemapParser-VIPnytt/([\d+\.])'
|
|
2575
|
+
name: 'SitemapParser-VIPnytt'
|
|
2576
|
+
category: 'Crawler'
|
|
2577
|
+
url: 'https://github.com/VIPnytt/SitemapParser/'
|
|
2578
|
+
|
|
2579
|
+
- regex: '^Turnitin'
|
|
2580
|
+
name: 'Turnitin'
|
|
2581
|
+
category: 'Crawler'
|
|
2582
|
+
url: 'https://turnitin.com/robot/crawlerinfo.html'
|
|
2583
|
+
|
|
2584
|
+
- regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
|
|
2585
|
+
name: 'Dotcom Monitor'
|
|
2586
|
+
category: 'Site Monitor'
|
|
2587
|
+
url: 'https://www.dotcom-monitor.com'
|
|
2588
|
+
|
|
2589
|
+
- regex: 'ThinkChaos/'
|
|
2590
|
+
name: 'ThinkChaos'
|
|
2591
|
+
category: 'Crawler'
|
|
2592
|
+
|
|
2593
|
+
- regex: 'DataForSeoBot'
|
|
2594
|
+
name: 'DataForSeoBot'
|
|
2595
|
+
category: 'Crawler'
|
|
2596
|
+
url: 'https://dataforseo.com/dataforseo-bot'
|
|
2597
|
+
|
|
2598
|
+
- regex: 'Discordbot/([\d+.]+)'
|
|
2599
|
+
name: 'Discord Bot'
|
|
2600
|
+
category: 'Service Agent'
|
|
2601
|
+
url: 'https://discordapp.com'
|
|
2602
|
+
|
|
2603
|
+
- regex: 'Linespider/([\d+.]+)'
|
|
2604
|
+
name: 'Linespider'
|
|
2605
|
+
category: 'Crawler'
|
|
2606
|
+
url: 'https://lin.ee/4dwXkTH'
|
|
2607
|
+
|
|
2608
|
+
- regex: 'Cincraw/([\d+.]+)'
|
|
2609
|
+
name: 'Cincraw'
|
|
2610
|
+
category: 'Crawler'
|
|
2611
|
+
url: 'http://cincrawdata.net/bot/'
|
|
2612
|
+
|
|
2613
|
+
- regex: 'CISPA Web Analyzer'
|
|
2614
|
+
name: 'CISPA Web Analyzer'
|
|
2615
|
+
category: 'Crawler'
|
|
2616
|
+
url: 'https://notify.cispa.de/'
|
|
2617
|
+
producer:
|
|
2618
|
+
name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH'
|
|
2619
|
+
url: 'https://cispa.de/en'
|
|
2620
|
+
|
|
2621
|
+
- regex: 'IonCrawl'
|
|
2622
|
+
name: 'IONOS Crawler'
|
|
2623
|
+
category: 'Crawler'
|
|
2624
|
+
url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/'
|
|
2625
|
+
producer:
|
|
2626
|
+
name: 'IONOS SE'
|
|
2627
|
+
url: 'https://www.ionos.de/'
|
|
2628
|
+
|
|
2629
|
+
- regex: 'Crawldad'
|
|
2630
|
+
name: 'Crawldad'
|
|
2631
|
+
category: 'Crawler'
|
|
2632
|
+
url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
|
|
2633
|
+
|
|
2634
|
+
- regex: 'https://securitytxt-scan.cs.hm.edu/'
|
|
2635
|
+
name: 'security.txt scanserver'
|
|
2636
|
+
category: 'Security Checker'
|
|
2637
|
+
url: 'https://securitytxt-scan.cs.hm.edu/'
|
|
2638
|
+
producer:
|
|
2639
|
+
name: 'Hochschule für angewandte Wissenschaften München'
|
|
2640
|
+
url: 'https://www.hm.edu/'
|
|
2641
|
+
|
|
2642
|
+
# Generic detections
|
|
2643
|
+
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
|
|
2054
2644
|
name: 'Generic Bot'
|