device_detector 1.0.7 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +25 -4
- data/lib/device_detector/browser.rb +178 -5
- data/lib/device_detector/client_hint.rb +166 -0
- data/lib/device_detector/device.rb +291 -2
- data/lib/device_detector/os.rb +33 -3
- data/lib/device_detector/version.rb +1 -1
- data/lib/device_detector.rb +50 -15
- data/regexes/bots.yml +594 -16
- data/regexes/client/browsers.yml +334 -40
- data/regexes/client/hints/apps.yml +102 -0
- data/regexes/client/hints/browsers.yml +190 -0
- data/regexes/client/libraries.yml +199 -2
- data/regexes/client/mobile_apps.yml +320 -12
- data/regexes/client/pim.yml +15 -0
- data/regexes/device/car_browsers.yml +1 -1
- data/regexes/device/consoles.yml +6 -1
- data/regexes/device/mobiles.yml +8406 -1948
- data/regexes/device/portable_media_player.yml +18 -2
- data/regexes/device/shell_tv.yml +11 -0
- data/regexes/device/televisions.yml +26 -4
- data/regexes/oss.yml +329 -47
- metadata +10 -7
data/regexes/bots.yml
CHANGED
@@ -5,6 +5,14 @@
|
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
6
6
|
###############
|
7
7
|
|
8
|
+
- regex: 'monitoring360bot'
|
9
|
+
name: '360 Monitoring'
|
10
|
+
category: 'Site Monitor'
|
11
|
+
url: 'https://www.360monitoring.io'
|
12
|
+
producer:
|
13
|
+
name: 'Plesk International GmbH'
|
14
|
+
url: 'https://www.plesk.com'
|
15
|
+
|
8
16
|
- regex: '360Spider'
|
9
17
|
name: '360Spider'
|
10
18
|
category: 'Search bot'
|
@@ -45,6 +53,14 @@
|
|
45
53
|
name: 'Ahrefs Pte Ltd'
|
46
54
|
url: 'https://ahrefs.com/robot'
|
47
55
|
|
56
|
+
- regex: 'AhrefsSiteAudit/([\d+.]+)'
|
57
|
+
name: 'AhrefsSiteAudit'
|
58
|
+
category: 'Site Monitor'
|
59
|
+
url: 'https://ahrefs.com/robot/site-audit'
|
60
|
+
producer:
|
61
|
+
name: 'Ahrefs Pte Ltd'
|
62
|
+
url: 'https://ahrefs.com/'
|
63
|
+
|
48
64
|
- regex: 'ia_archiver|alexabot|verifybot'
|
49
65
|
name: 'Alexa Crawler'
|
50
66
|
category: 'Search bot'
|
@@ -100,13 +116,13 @@
|
|
100
116
|
name: 'Apple Inc'
|
101
117
|
url: 'https://www.apple.com'
|
102
118
|
|
103
|
-
- regex:
|
104
|
-
name:
|
105
|
-
category:
|
106
|
-
url:
|
119
|
+
- regex: 'AppSignalBot'
|
120
|
+
name: 'AppSignalBot'
|
121
|
+
category: 'Site Monitor'
|
122
|
+
url: 'https://docs.appsignal.com/uptime-monitoring/'
|
107
123
|
producer:
|
108
|
-
name:
|
109
|
-
url:
|
124
|
+
name: 'AppSignal'
|
125
|
+
url: 'https://appsignal.com/'
|
110
126
|
|
111
127
|
- regex: 'Arachni'
|
112
128
|
name: 'Arachni'
|
@@ -355,7 +371,7 @@
|
|
355
371
|
name: 'Certified Security Solutions'
|
356
372
|
url: 'https://www.css-security.com/company/about-us/'
|
357
373
|
|
358
|
-
- regex: 'Datadog Agent'
|
374
|
+
- regex: 'Datadog Agent|Datadog/?Synthetics'
|
359
375
|
name: 'Datadog Agent'
|
360
376
|
url: 'https://github.com/DataDog/dd-agent'
|
361
377
|
category: 'Site Monitor'
|
@@ -688,7 +704,7 @@
|
|
688
704
|
name: 'Visual Meta'
|
689
705
|
url: 'https://www.shopalike.cz/'
|
690
706
|
|
691
|
-
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|
|
707
|
+
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Producer)|Google.*/\+/web/snippet'
|
692
708
|
name: 'Googlebot'
|
693
709
|
category: 'Search bot'
|
694
710
|
url: 'http://www.google.com/bot.html'
|
@@ -783,6 +799,14 @@
|
|
783
799
|
name: 'Lighthouse'
|
784
800
|
url: 'https://developers.google.com/web/tools/lighthouse'
|
785
801
|
|
802
|
+
- regex: 'last-modified\.com'
|
803
|
+
name: 'LastMod Bot'
|
804
|
+
category: 'Site Monitor'
|
805
|
+
url: 'https://last-modified.com/en/about'
|
806
|
+
producer:
|
807
|
+
name: ''
|
808
|
+
url: 'https://last-modified.com/en'
|
809
|
+
|
786
810
|
- regex: 'linkdexbot|linkdex\.com'
|
787
811
|
name: 'Linkdex Bot'
|
788
812
|
category: 'Search bot'
|
@@ -830,6 +854,14 @@
|
|
830
854
|
name: ''
|
831
855
|
url: ''
|
832
856
|
|
857
|
+
- regex: 'masscan-ng/([\d+.]+)'
|
858
|
+
name: 'masscan-ng'
|
859
|
+
url: 'https://github.com/bi-zone/masscan-ng'
|
860
|
+
category: 'Crawler'
|
861
|
+
producer:
|
862
|
+
name: 'BIZON, OOO'
|
863
|
+
url: 'https://bi.zone/'
|
864
|
+
|
833
865
|
- regex: 'masscan'
|
834
866
|
name: 'masscan'
|
835
867
|
url: 'https://github.com/robertdavidgraham/masscan'
|
@@ -1049,12 +1081,12 @@
|
|
1049
1081
|
name: 'PHP Server Monitor'
|
1050
1082
|
url: 'http://www.phpservermonitor.org/'
|
1051
1083
|
|
1052
|
-
- regex: '
|
1053
|
-
name: '
|
1084
|
+
- regex: 'Pocket(?:ImageCache|Parser)/([\d+.]+)'
|
1085
|
+
name: 'Pocket'
|
1054
1086
|
category: 'Read-it-later Service'
|
1055
1087
|
url: 'https://getpocket.com/pocketparser_ua'
|
1056
1088
|
producer:
|
1057
|
-
name: '
|
1089
|
+
name: 'Read It Later, Inc.'
|
1058
1090
|
url: 'https://getpocket.com/'
|
1059
1091
|
|
1060
1092
|
- regex: 'PritTorrent'
|
@@ -1317,7 +1349,7 @@
|
|
1317
1349
|
name: 'Slack Technologies'
|
1318
1350
|
url: 'http://slack.com'
|
1319
1351
|
|
1320
|
-
- regex: '(Sogou (
|
1352
|
+
- regex: '(Sogou[ -](head|inst|Orion|Pic|Test|web)[ -]spider)|New-Sogou-Spider'
|
1321
1353
|
name: 'Sogou Spider'
|
1322
1354
|
category: 'Search bot'
|
1323
1355
|
url: 'http://www.sogou.com/docs/help/webmasters.htm'
|
@@ -1828,6 +1860,14 @@
|
|
1828
1860
|
name: 'Snapchat Inc.'
|
1829
1861
|
url: 'https://www.snapchat.com'
|
1830
1862
|
|
1863
|
+
- regex: 'Snap URL Preview Service'
|
1864
|
+
name: 'Snap URL Preview Service'
|
1865
|
+
category: 'Service Agent'
|
1866
|
+
url: 'https://developers.snap.com/robots'
|
1867
|
+
producer:
|
1868
|
+
name: 'Snapchat Inc.'
|
1869
|
+
url: 'https://www.snapchat.com/'
|
1870
|
+
|
1831
1871
|
- regex: "Let's Encrypt validation server"
|
1832
1872
|
name: "Let's Encrypt Validation"
|
1833
1873
|
category: 'Service Agent'
|
@@ -1926,7 +1966,7 @@
|
|
1926
1966
|
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
1927
1967
|
name: 'RSSRadio Bot'
|
1928
1968
|
|
1929
|
-
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
|
1969
|
+
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel)'
|
1930
1970
|
name: 'Generic Bot'
|
1931
1971
|
|
1932
1972
|
- regex: '^sentry'
|
@@ -1935,7 +1975,7 @@
|
|
1935
1975
|
name: 'Sentry'
|
1936
1976
|
url: 'https://sentry.io'
|
1937
1977
|
|
1938
|
-
- regex: '^Spotify'
|
1978
|
+
- regex: '^Spotify/(\d+[\.\d]+)$'
|
1939
1979
|
name: 'Spotify'
|
1940
1980
|
producer:
|
1941
1981
|
name: 'Spotify'
|
@@ -2163,7 +2203,7 @@
|
|
2163
2203
|
category: 'Crawler'
|
2164
2204
|
url: 'https://serendeputy.com/about/serendeputy-bot'
|
2165
2205
|
|
2166
|
-
- regex: 'ias-va.*admantx.*service-fetcher'
|
2206
|
+
- regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx.com.*service-fetcher'
|
2167
2207
|
name: 'ADmantX Service Fetcher'
|
2168
2208
|
category: 'Service bot'
|
2169
2209
|
url: 'https://www.admantx.com/service-fetcher.html'
|
@@ -2349,7 +2389,7 @@
|
|
2349
2389
|
name: ''
|
2350
2390
|
url: ''
|
2351
2391
|
|
2352
|
-
- regex: 'scaninfo@expanseinc.com'
|
2392
|
+
- regex: 'scaninfo@(?:expanseinc|paloaltonetworks).com'
|
2353
2393
|
name: 'Expanse'
|
2354
2394
|
category: 'Security Checker'
|
2355
2395
|
url: 'https://expanse.co/'
|
@@ -2511,6 +2551,11 @@
|
|
2511
2551
|
category: 'Security Checker'
|
2512
2552
|
url: 'https://github.com/LeakIX/l9tcpid'
|
2513
2553
|
|
2554
|
+
- regex: 'l9explore/([\d+\.])'
|
2555
|
+
name: 'l9explore'
|
2556
|
+
category: 'Security Checker'
|
2557
|
+
url: 'https://github.com/LeakIX/l9explore'
|
2558
|
+
|
2514
2559
|
- regex: 'MegaIndex.ru/([\d+\.])'
|
2515
2560
|
name: 'MegaIndex'
|
2516
2561
|
category: 'Crawler'
|
@@ -2639,6 +2684,539 @@
|
|
2639
2684
|
name: 'Hochschule für angewandte Wissenschaften München'
|
2640
2685
|
url: 'https://www.hm.edu/'
|
2641
2686
|
|
2687
|
+
- regex: 'TigerBot/([\d+.]+)'
|
2688
|
+
name: 'TigerBot'
|
2689
|
+
category: 'Crawler'
|
2690
|
+
url: 'https://tiger.ch/'
|
2691
|
+
|
2692
|
+
- regex: 'TestCrawler/([\d+.]+)'
|
2693
|
+
name: 'TestCrawler'
|
2694
|
+
category: 'Crawler'
|
2695
|
+
url: 'https://www.comcepta.com/'
|
2696
|
+
|
2697
|
+
- regex: 'CrowdTanglebot/([\d+.]+)'
|
2698
|
+
name: 'CrowdTangle'
|
2699
|
+
category: 'Crawler'
|
2700
|
+
url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
|
2701
|
+
producer:
|
2702
|
+
name: 'CrowdTangle, Inc.'
|
2703
|
+
url: 'https://www.crowdtangle.com/'
|
2704
|
+
|
2705
|
+
- regex: 'Sellers.Guide Crawler by Primis'
|
2706
|
+
name: 'Sellers.Guide'
|
2707
|
+
category: 'Crawler'
|
2708
|
+
url: 'https://sellers.guide/'
|
2709
|
+
producer:
|
2710
|
+
name: 'McCann Disciplines, Ltd.'
|
2711
|
+
url: 'https://www.primis.tech/'
|
2712
|
+
|
2713
|
+
- regex: 'OnalyticaBot'
|
2714
|
+
name: 'Onalytica'
|
2715
|
+
category: 'Crawler'
|
2716
|
+
url: 'https://www.airslate.com/bot/explore/onalytica-bot'
|
2717
|
+
producer:
|
2718
|
+
name: 'airSlate, Inc.'
|
2719
|
+
url: 'https://www.airslate.com/'
|
2720
|
+
|
2721
|
+
- regex: 'deepnoc'
|
2722
|
+
name: 'deepnoc'
|
2723
|
+
category: 'Crawler'
|
2724
|
+
url: 'https://deepnoc.com/bot'
|
2725
|
+
producer:
|
2726
|
+
name: 'deepnoc, GmbH'
|
2727
|
+
url: 'https://deepnoc.com/'
|
2728
|
+
|
2729
|
+
- regex: 'Newslitbot/([\d+.]+)'
|
2730
|
+
name: 'Newslitbot'
|
2731
|
+
category: 'Crawler'
|
2732
|
+
url: 'https://www.newslit.co/'
|
2733
|
+
producer:
|
2734
|
+
name: 'Newslit, LLC.'
|
2735
|
+
url: 'https://www.newslit.co/'
|
2736
|
+
|
2737
|
+
- regex: 'um-LN/([\d+.]+)'
|
2738
|
+
name: 'uMBot'
|
2739
|
+
category: 'Crawler'
|
2740
|
+
url: 'https://www.ubermetrics-technologies.com/'
|
2741
|
+
producer:
|
2742
|
+
name: 'Ubermetrics Technologies GmbH'
|
2743
|
+
url: 'https://www.ubermetrics-technologies.com/'
|
2744
|
+
|
2745
|
+
- regex: 'Abonti/([\d+.]+)'
|
2746
|
+
name: 'Abonti'
|
2747
|
+
category: 'Crawler'
|
2748
|
+
url: 'http://abonti.com/'
|
2749
|
+
|
2750
|
+
- regex: 'collection@infegy.com'
|
2751
|
+
name: 'Infegy'
|
2752
|
+
category: 'Crawler'
|
2753
|
+
url: 'https://infegy.com/'
|
2754
|
+
producer:
|
2755
|
+
name: 'Infegy, Inc.'
|
2756
|
+
url: 'https://infegy.com/'
|
2757
|
+
|
2758
|
+
- regex: 'HTTP Banner Detection \(https://security.ipip.net\)'
|
2759
|
+
name: 'IPIP'
|
2760
|
+
category: 'Security Checker'
|
2761
|
+
url: 'https://security.ipip.net/'
|
2762
|
+
producer:
|
2763
|
+
name: 'Beijing Tiantexin Tech. Co., Ltd.'
|
2764
|
+
url: 'https://en.ipip.net/'
|
2765
|
+
|
2766
|
+
- regex: 'ev-crawler/([\d+.]+)'
|
2767
|
+
name: 'Headline'
|
2768
|
+
category: 'Crawler'
|
2769
|
+
url: 'https://headline.com/legal/crawler'
|
2770
|
+
producer:
|
2771
|
+
name: 'e.ventures Managementgesellschaft mbH'
|
2772
|
+
url: 'https://headline.com/'
|
2773
|
+
|
2774
|
+
- regex: 'webprosbot/([\d+.]+)'
|
2775
|
+
name: 'WebPros'
|
2776
|
+
category: 'Crawler'
|
2777
|
+
url: 'https://webpros.com/'
|
2778
|
+
producer:
|
2779
|
+
name: 'WebPros Holdco B.V.'
|
2780
|
+
url: 'https://webpros.com/'
|
2781
|
+
|
2782
|
+
- regex: 'ELB-HealthChecker'
|
2783
|
+
name: 'Amazon ELB'
|
2784
|
+
category: 'Site Monitor'
|
2785
|
+
url: 'https://aws.amazon.com/elasticloadbalancing/'
|
2786
|
+
producer:
|
2787
|
+
name: 'Amazon.com, Inc.'
|
2788
|
+
url: 'https://www.amazon.com/'
|
2789
|
+
|
2790
|
+
- regex: 'Wheregoes.com Redirect Checker/([\d+.]+)'
|
2791
|
+
name: 'WhereGoes'
|
2792
|
+
category: 'Crawler'
|
2793
|
+
url: 'https://wheregoes.com/'
|
2794
|
+
|
2795
|
+
- regex: 'project_patchwatch'
|
2796
|
+
name: 'Project Patchwatch'
|
2797
|
+
category: 'Crawler'
|
2798
|
+
url: 'http://66.240.192.82/'
|
2799
|
+
|
2800
|
+
- regex: 'InternetMeasurement/([\d+.]+)'
|
2801
|
+
name: 'InternetMeasurement'
|
2802
|
+
category: 'Crawler'
|
2803
|
+
url: 'https://internet-measurement.com/'
|
2804
|
+
|
2805
|
+
- regex: 'DomainAppender /([\d+.]+)'
|
2806
|
+
name: 'DomainAppender'
|
2807
|
+
category: 'Crawler'
|
2808
|
+
url: 'https://www.profound.net/product/domain_append/'
|
2809
|
+
producer:
|
2810
|
+
name: 'Profound Networks, LLC'
|
2811
|
+
url: 'https://www.profound.net/'
|
2812
|
+
|
2813
|
+
- regex: 'FreeWebMonitoring SiteChecker/([\d+.]+)'
|
2814
|
+
name: 'FreeWebMonitoring'
|
2815
|
+
category: 'Site Monitor'
|
2816
|
+
url: 'https://www.freewebmonitoring.com/bot.html'
|
2817
|
+
producer:
|
2818
|
+
name: 'GreenWave Online, Inc.'
|
2819
|
+
url: 'http://www.greenwaveonline.com/'
|
2820
|
+
|
2821
|
+
- regex: 'Page Modified Pinger'
|
2822
|
+
name: 'Page Modified Pinger'
|
2823
|
+
category: 'Site Monitor'
|
2824
|
+
url: 'https://www.pagemodified.com/'
|
2825
|
+
producer:
|
2826
|
+
name: 'Valley Hosting, LLC'
|
2827
|
+
url: 'https://www.pagemodified.com/'
|
2828
|
+
|
2829
|
+
- regex: 'adstxtlab.com'
|
2830
|
+
name: 'adstxtlab.com'
|
2831
|
+
category: 'Crawler'
|
2832
|
+
url: 'https://adstxtlab.com/validator.php'
|
2833
|
+
producer:
|
2834
|
+
name: 'Jaohawi AB'
|
2835
|
+
url: 'https://adstxtlab.com/'
|
2836
|
+
|
2837
|
+
- regex: 'Iframely/([\d+.]+)'
|
2838
|
+
name: 'Iframely'
|
2839
|
+
category: 'Crawler'
|
2840
|
+
url: 'https://iframely.com/'
|
2841
|
+
producer:
|
2842
|
+
name: 'Itteco Software, Corp.'
|
2843
|
+
url: 'https://iframely.com/'
|
2844
|
+
|
2845
|
+
- regex: 'DomainStatsBot/([\d+.]+)'
|
2846
|
+
name: 'DomainStatsBot'
|
2847
|
+
category: 'Crawler'
|
2848
|
+
url: 'https://domainstats.com/pages/our-bot'
|
2849
|
+
producer:
|
2850
|
+
name: 'Domainstats Ltd'
|
2851
|
+
url: 'https://domainstats.com/'
|
2852
|
+
|
2853
|
+
- regex: 'aiHitBot/([\d+.]+)'
|
2854
|
+
name: 'aiHitBot'
|
2855
|
+
category: 'Crawler'
|
2856
|
+
url: 'https://www.aihitdata.com/about'
|
2857
|
+
|
2858
|
+
- regex: 'DomainCrawler/'
|
2859
|
+
name: 'DomainCrawler'
|
2860
|
+
category: 'Crawler'
|
2861
|
+
url: 'https://domaincrawler.com/about-us/'
|
2862
|
+
|
2863
|
+
- regex: 'DNSResearchBot'
|
2864
|
+
name: 'DNSResearchBot'
|
2865
|
+
category: 'Crawler'
|
2866
|
+
|
2867
|
+
- regex: 'GitCrawlerBot'
|
2868
|
+
name: 'GitCrawlerBot'
|
2869
|
+
category: 'Crawler'
|
2870
|
+
|
2871
|
+
- regex: 'AdAuth/([\d+.]+)'
|
2872
|
+
name: 'AdAuth'
|
2873
|
+
category: 'Crawler'
|
2874
|
+
url: 'https://www.adauth.com'
|
2875
|
+
|
2876
|
+
- regex: 'faveeo.com'
|
2877
|
+
name: 'Faveeo'
|
2878
|
+
category: 'Crawler'
|
2879
|
+
url: 'http://www.faveeo.com'
|
2880
|
+
|
2881
|
+
- regex: 'kozmonavt\.'
|
2882
|
+
name: 'Kozmonavt'
|
2883
|
+
category: 'Crawler'
|
2884
|
+
url: 'https://kozmonavt.ml'
|
2885
|
+
|
2886
|
+
- regex: 'CriteoBot/'
|
2887
|
+
name: 'CriteoBot'
|
2888
|
+
category: 'Crawler'
|
2889
|
+
url: 'https://www.criteo.com/criteo-crawler/'
|
2890
|
+
|
2891
|
+
- regex: 'PayPal IPN'
|
2892
|
+
name: 'PayPal IPN'
|
2893
|
+
category: 'Service Agent'
|
2894
|
+
url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/'
|
2895
|
+
producer:
|
2896
|
+
name: 'PayPal, Inc.'
|
2897
|
+
url: 'https://www.paypal.com/'
|
2898
|
+
|
2899
|
+
- regex: 'MaCoCu'
|
2900
|
+
name: 'MaCoCu'
|
2901
|
+
category: 'Crawler'
|
2902
|
+
url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/'
|
2903
|
+
producer:
|
2904
|
+
name: 'Jožef Stefan Institute'
|
2905
|
+
url: 'https://www.ijs.si/ijsw/JSI'
|
2906
|
+
|
2907
|
+
- regex: 'dnt-policy@eff.org'
|
2908
|
+
name: 'EFF Do Not Track Verifier'
|
2909
|
+
category: 'Crawler'
|
2910
|
+
url: 'https://www.eff.org/issues/do-not-track'
|
2911
|
+
producer:
|
2912
|
+
name: 'Electronic Frontier Foundation'
|
2913
|
+
url: 'https://www.eff.org/'
|
2914
|
+
|
2915
|
+
- regex: 'InfoTigerBot'
|
2916
|
+
name: 'InfoTigerBot'
|
2917
|
+
category: 'Crawler'
|
2918
|
+
url: 'https://infotiger.com/bot'
|
2919
|
+
producer:
|
2920
|
+
name: 'Infotiger UG'
|
2921
|
+
url: 'https://infotiger.com/'
|
2922
|
+
|
2923
|
+
- regex: '(?:Birdcrawlerbot|CrawlaDeBot)'
|
2924
|
+
name: 'Birdcrawlerbot'
|
2925
|
+
category: 'Crawler'
|
2926
|
+
url: 'https://crawla.de/de/index.php'
|
2927
|
+
producer:
|
2928
|
+
name: 'Swoppen Systems GmbH'
|
2929
|
+
url: 'https://www.swoppen.com/de'
|
2930
|
+
|
2931
|
+
- regex: 'ScamadviserExternalHit/([\d+.]+)'
|
2932
|
+
name: 'Scamadviser External Hit'
|
2933
|
+
category: 'Crawler'
|
2934
|
+
url: 'https://www.scamadviser.com/'
|
2935
|
+
producer:
|
2936
|
+
name: 'Ecommerce Operations B.V.'
|
2937
|
+
url: 'https://www.scamadviser.com/'
|
2938
|
+
|
2939
|
+
- regex: 'ZaldamoSearchBot'
|
2940
|
+
name: 'Zaldamo'
|
2941
|
+
category: 'Crawler'
|
2942
|
+
url: 'https://www.zaldamo.com/search.html'
|
2943
|
+
producer:
|
2944
|
+
name: 'Project Orlando, LLC.'
|
2945
|
+
url: 'https://www.projectorlando.com/'
|
2946
|
+
|
2947
|
+
- regex: 'AFB/([\d+.]+)'
|
2948
|
+
name: 'Allloadin Favicon Bot'
|
2949
|
+
category: 'Crawler'
|
2950
|
+
url: 'https://allloadin.com/'
|
2951
|
+
|
2952
|
+
- regex: 'SeolytBot/([\d+.]+)'
|
2953
|
+
name: 'Seolyt Bot'
|
2954
|
+
category: 'Crawler'
|
2955
|
+
url: 'https://seolyt.com'
|
2956
|
+
|
2957
|
+
- regex: 'LinkWalker/([\d+.]+)'
|
2958
|
+
name: 'LinkWalker'
|
2959
|
+
category: 'Crawler'
|
2960
|
+
url: 'https://www.phishlabs.com/'
|
2961
|
+
producer:
|
2962
|
+
name: 'PhishLabs, Inc.'
|
2963
|
+
url: 'https://www.phishlabs.com/'
|
2964
|
+
|
2965
|
+
- regex: 'RenovateBot/([\d+.]+)'
|
2966
|
+
name: 'RenovateBot'
|
2967
|
+
category: 'Security Checker'
|
2968
|
+
url: 'https://github.com/renovatebot/renovate'
|
2969
|
+
producer:
|
2970
|
+
name: 'White Source Ltd.'
|
2971
|
+
url: 'https://www.mend.io/free-developer-tools/renovate/'
|
2972
|
+
|
2973
|
+
- regex: 'INETDEX-BOT/([\d+.]+)'
|
2974
|
+
name: 'Inetdex Bot'
|
2975
|
+
category: 'Crawler'
|
2976
|
+
url: 'https://www.inetdex.com/'
|
2977
|
+
|
2978
|
+
- regex: 'NETZZAPPEN'
|
2979
|
+
name: 'NETZZAPPEN'
|
2980
|
+
category: 'Crawler'
|
2981
|
+
url: 'https://www.netzzappen.com/'
|
2982
|
+
producer:
|
2983
|
+
name: 'Marc Huemer'
|
2984
|
+
url: 'https://www.netzzappen.com/'
|
2985
|
+
|
2986
|
+
- regex: 'SerpReputationManagementAgent/([\d+.]+)'
|
2987
|
+
name: 'SEMrush Reputation Management'
|
2988
|
+
category: 'Service Agent'
|
2989
|
+
url: 'https://www.semrush.com/bot/'
|
2990
|
+
producer:
|
2991
|
+
name: 'SEMrush'
|
2992
|
+
url: 'https://www.semrush.com/'
|
2993
|
+
|
2994
|
+
- regex: 'panscient.com'
|
2995
|
+
name: 'Panscient'
|
2996
|
+
category: 'Crawler'
|
2997
|
+
url: 'https://www.panscient.com/faq.htm'
|
2998
|
+
producer:
|
2999
|
+
name: 'Panscient, Inc.'
|
3000
|
+
url: 'https://www.panscient.com/'
|
3001
|
+
|
3002
|
+
- regex: 'research@pdrlabs.net'
|
3003
|
+
name: 'PDR Labs'
|
3004
|
+
category: 'Security Checker'
|
3005
|
+
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
3006
|
+
producer:
|
3007
|
+
name: 'PDR Labs'
|
3008
|
+
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
3009
|
+
|
3010
|
+
- regex: 'Nicecrawler/([\d+.]+)'
|
3011
|
+
name: 'NiceCrawler'
|
3012
|
+
category: 'Crawler'
|
3013
|
+
url: 'https://www.nicecrawler.com/'
|
3014
|
+
producer:
|
3015
|
+
name: 'Intelium Corp.'
|
3016
|
+
url: 'https://www.intelium.com/'
|
3017
|
+
|
3018
|
+
- regex: 't3versionsBot/([\d+.]+)'
|
3019
|
+
name: 't3versions'
|
3020
|
+
category: 'Crawler'
|
3021
|
+
url: 'https://www.t3versions.com/bot'
|
3022
|
+
producer:
|
3023
|
+
name: 'Torben Hansen'
|
3024
|
+
url: 'https://www.t3versions.com/'
|
3025
|
+
|
3026
|
+
- regex: 'Crawlson/([\d+.]+)'
|
3027
|
+
name: 'Crawlson'
|
3028
|
+
category: 'Crawler'
|
3029
|
+
url: 'https://www.crawlson.com/about'
|
3030
|
+
producer:
|
3031
|
+
name: 'Crawlson'
|
3032
|
+
url: 'https://www.crawlson.com/'
|
3033
|
+
|
3034
|
+
- regex: 'tchelebi/([\d+.]+)'
|
3035
|
+
name: 'tchelebi'
|
3036
|
+
category: 'Crawler'
|
3037
|
+
url: 'https://tchelebi.io/'
|
3038
|
+
producer:
|
3039
|
+
name: 'NormShield, Inc.'
|
3040
|
+
url: 'https://blackkite.com/'
|
3041
|
+
|
3042
|
+
- regex: 'JobboerseBot'
|
3043
|
+
name: 'JobboerseBot'
|
3044
|
+
category: 'Crawler'
|
3045
|
+
url: 'https://www.xing.com/jobs'
|
3046
|
+
producer:
|
3047
|
+
name: 'New Work SE'
|
3048
|
+
url: 'https://www.xing.com/'
|
3049
|
+
|
3050
|
+
- regex: '^Lkx-(.*)/([\d+.]+)'
|
3051
|
+
name: 'LeakIX'
|
3052
|
+
category: 'Security Checker'
|
3053
|
+
url: 'https://leakix.net/'
|
3054
|
+
producer:
|
3055
|
+
name: 'BaDaaS SRL'
|
3056
|
+
url: 'https://leakix.net/'
|
3057
|
+
|
3058
|
+
- regex: 'RepoLookoutBot/([\d+.]+)'
|
3059
|
+
name: 'Repo Lookout'
|
3060
|
+
category: 'Security Checker'
|
3061
|
+
url: 'https://www.repo-lookout.org/'
|
3062
|
+
producer:
|
3063
|
+
name: 'Crissy Field GmbH'
|
3064
|
+
url: 'https://www.crissyfield.de/'
|
3065
|
+
|
3066
|
+
- regex: 'PATHspider'
|
3067
|
+
name: 'PATHspider'
|
3068
|
+
category: 'Security Checker'
|
3069
|
+
url: 'https://pathspider.net/'
|
3070
|
+
producer:
|
3071
|
+
name: 'MAMI Project'
|
3072
|
+
url: 'https://mami-project.eu/'
|
3073
|
+
|
3074
|
+
- regex: 'everyfeed-spider/([\d+.]+)'
|
3075
|
+
name: 'Everyfeed'
|
3076
|
+
url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
|
3077
|
+
category: 'Feed Fetcher'
|
3078
|
+
producer:
|
3079
|
+
name: ''
|
3080
|
+
url: ''
|
3081
|
+
|
3082
|
+
- regex: 'Exchange check'
|
3083
|
+
name: 'Exchange check'
|
3084
|
+
category: 'Security Checker'
|
3085
|
+
url: 'https://github.com/GossiTheDog/scanning'
|
3086
|
+
producer:
|
3087
|
+
name: 'Kevin Beaumont'
|
3088
|
+
url: 'https://doublepulsar.com/'
|
3089
|
+
|
3090
|
+
- regex: 'Sublinq'
|
3091
|
+
name: 'Sublinq'
|
3092
|
+
category: 'Crawler'
|
3093
|
+
url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/'
|
3094
|
+
producer:
|
3095
|
+
name: ''
|
3096
|
+
url: ''
|
3097
|
+
|
3098
|
+
- regex: 'Gregarius/([\d+.]+)'
|
3099
|
+
name: 'Gregarius'
|
3100
|
+
category: 'Feed Fetcher'
|
3101
|
+
url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
|
3102
|
+
producer:
|
3103
|
+
name: ''
|
3104
|
+
url: ''
|
3105
|
+
|
3106
|
+
- regex: 'COMODO DCV'
|
3107
|
+
name: 'COMODO DCV'
|
3108
|
+
category: 'Service Agent'
|
3109
|
+
url: 'https://www.comodo.com/'
|
3110
|
+
producer:
|
3111
|
+
name: 'Comodo Security Solutions, Inc.'
|
3112
|
+
url: 'https://www.comodo.com/'
|
3113
|
+
|
3114
|
+
- regex: 'Sectigo DCV'
|
3115
|
+
name: 'Sectigo DCV'
|
3116
|
+
category: 'Service Agent'
|
3117
|
+
url: 'https://sectigo.com/'
|
3118
|
+
producer:
|
3119
|
+
name: 'Sectigo Limited'
|
3120
|
+
url: 'https://sectigo.com/'
|
3121
|
+
|
3122
|
+
- regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/([\d+.]+)'
|
3123
|
+
name: 'KlarnaBot'
|
3124
|
+
category: 'Crawler'
|
3125
|
+
url: 'https://docs.klarna.com/klarna-bot/'
|
3126
|
+
producer:
|
3127
|
+
name: 'Klarna Bank AB'
|
3128
|
+
url: 'https://www.klarna.com/'
|
3129
|
+
|
3130
|
+
- regex: 'Taboolabot/([\d+.]+)'
|
3131
|
+
name: 'Taboolabot'
|
3132
|
+
category: 'Crawler'
|
3133
|
+
url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
|
3134
|
+
producer:
|
3135
|
+
name: 'Taboola, Inc.'
|
3136
|
+
url: 'https://www.taboola.com/'
|
3137
|
+
|
3138
|
+
- regex: 'Asana/([\d+.]+)'
|
3139
|
+
name: 'Asana'
|
3140
|
+
category: 'Crawler'
|
3141
|
+
url: 'https://asana.com/'
|
3142
|
+
producer:
|
3143
|
+
name: 'Asana, Inc.'
|
3144
|
+
url: 'https://asana.com/'
|
3145
|
+
|
3146
|
+
- regex: 'Chrome Privacy Preserving Prefetch Proxy'
|
3147
|
+
name: 'Chrome Privacy Preserving Prefetch Proxy'
|
3148
|
+
category: 'Service Agent'
|
3149
|
+
url: 'https://developer.chrome.com/blog/private-prefetch-proxy/'
|
3150
|
+
producer:
|
3151
|
+
name: 'Google Inc.'
|
3152
|
+
url: 'https://www.google.com/'
|
3153
|
+
|
3154
|
+
- regex: 'URLinspectorBot/([\d+.]+)'
|
3155
|
+
name: 'URLinspector'
|
3156
|
+
category: 'Site Monitor'
|
3157
|
+
url: 'https://www.urlinspector.com/bot/'
|
3158
|
+
producer:
|
3159
|
+
name: 'LinkResearchTools GmbH'
|
3160
|
+
url: 'https://www.linkresearchtools.com/'
|
3161
|
+
|
3162
|
+
- regex: 'EntferBot/([\d+.]+)'
|
3163
|
+
name: 'Entfer'
|
3164
|
+
category: 'Crawler'
|
3165
|
+
url: 'https://entfer.com/'
|
3166
|
+
producer:
|
3167
|
+
name: 'Entfer Ltd.'
|
3168
|
+
url: 'https://entfer.com/'
|
3169
|
+
|
3170
|
+
- regex: 'TagInspector/([\d+.]+)'
|
3171
|
+
name: 'Tag Inspector'
|
3172
|
+
category: 'Crawler'
|
3173
|
+
url: 'https://taginspector.com/'
|
3174
|
+
producer:
|
3175
|
+
name: 'InfoTrust, LLC'
|
3176
|
+
url: 'https://infotrust.com/'
|
3177
|
+
|
3178
|
+
- regex: 'pageburst'
|
3179
|
+
name: 'Pageburst'
|
3180
|
+
category: 'Crawler'
|
3181
|
+
url: 'https://pageburstls.elsevier.com/'
|
3182
|
+
producer:
|
3183
|
+
name: 'Elsevier Ltd'
|
3184
|
+
url: 'https://www.elsevier.com/'
|
3185
|
+
|
3186
|
+
- regex: '.+diffbot'
|
3187
|
+
name: 'Diffbot'
|
3188
|
+
category: 'Crawler'
|
3189
|
+
url: 'https://docs.diffbot.com/docs/getting-started-with-crawl'
|
3190
|
+
producer:
|
3191
|
+
name: 'Diffbot Technologies Corp.'
|
3192
|
+
url: 'https://www.diffbot.com/'
|
3193
|
+
|
3194
|
+
- regex: 'DisqusAdstxtCrawler/([\d+.]+)'
|
3195
|
+
name: 'Disqus'
|
3196
|
+
category: 'Crawler'
|
3197
|
+
url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
|
3198
|
+
producer:
|
3199
|
+
name: 'Disqus, Inc.'
|
3200
|
+
url: 'https://disqus.com/'
|
3201
|
+
|
3202
|
+
- regex: 'startmebot/([\d+.]+)'
|
3203
|
+
name: 'start.me'
|
3204
|
+
category: 'Crawler'
|
3205
|
+
url: 'https://about.start.me/'
|
3206
|
+
producer:
|
3207
|
+
name: 'start.me BV'
|
3208
|
+
url: 'https://about.start.me/'
|
3209
|
+
|
3210
|
+
- regex: '2ip bot/([\d+.]+)'
|
3211
|
+
name: '2ip'
|
3212
|
+
category: 'Crawler'
|
3213
|
+
url: 'https://2ip.io/'
|
3214
|
+
|
3215
|
+
- regex: 'ReqBin Curl Client/([\d+.]+)'
|
3216
|
+
name: 'ReqBin'
|
3217
|
+
category: 'Crawler'
|
3218
|
+
url: 'https://reqbin.com/curl'
|
3219
|
+
|
2642
3220
|
# Generic detections
|
2643
3221
|
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
|
2644
3222
|
name: 'Generic Bot'
|