device_detector 1.0.7 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +25 -4
- data/lib/device_detector/browser.rb +178 -5
- data/lib/device_detector/client_hint.rb +166 -0
- data/lib/device_detector/device.rb +291 -2
- data/lib/device_detector/os.rb +33 -3
- data/lib/device_detector/version.rb +1 -1
- data/lib/device_detector.rb +50 -15
- data/regexes/bots.yml +594 -16
- data/regexes/client/browsers.yml +334 -40
- data/regexes/client/hints/apps.yml +102 -0
- data/regexes/client/hints/browsers.yml +190 -0
- data/regexes/client/libraries.yml +199 -2
- data/regexes/client/mobile_apps.yml +320 -12
- data/regexes/client/pim.yml +15 -0
- data/regexes/device/car_browsers.yml +1 -1
- data/regexes/device/consoles.yml +6 -1
- data/regexes/device/mobiles.yml +8406 -1948
- data/regexes/device/portable_media_player.yml +18 -2
- data/regexes/device/shell_tv.yml +11 -0
- data/regexes/device/televisions.yml +26 -4
- data/regexes/oss.yml +329 -47
- metadata +10 -7
data/regexes/bots.yml
CHANGED
@@ -5,6 +5,14 @@
|
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
6
6
|
###############
|
7
7
|
|
8
|
+
- regex: 'monitoring360bot'
|
9
|
+
name: '360 Monitoring'
|
10
|
+
category: 'Site Monitor'
|
11
|
+
url: 'https://www.360monitoring.io'
|
12
|
+
producer:
|
13
|
+
name: 'Plesk International GmbH'
|
14
|
+
url: 'https://www.plesk.com'
|
15
|
+
|
8
16
|
- regex: '360Spider'
|
9
17
|
name: '360Spider'
|
10
18
|
category: 'Search bot'
|
@@ -45,6 +53,14 @@
|
|
45
53
|
name: 'Ahrefs Pte Ltd'
|
46
54
|
url: 'https://ahrefs.com/robot'
|
47
55
|
|
56
|
+
- regex: 'AhrefsSiteAudit/([\d+.]+)'
|
57
|
+
name: 'AhrefsSiteAudit'
|
58
|
+
category: 'Site Monitor'
|
59
|
+
url: 'https://ahrefs.com/robot/site-audit'
|
60
|
+
producer:
|
61
|
+
name: 'Ahrefs Pte Ltd'
|
62
|
+
url: 'https://ahrefs.com/'
|
63
|
+
|
48
64
|
- regex: 'ia_archiver|alexabot|verifybot'
|
49
65
|
name: 'Alexa Crawler'
|
50
66
|
category: 'Search bot'
|
@@ -100,13 +116,13 @@
|
|
100
116
|
name: 'Apple Inc'
|
101
117
|
url: 'https://www.apple.com'
|
102
118
|
|
103
|
-
- regex:
|
104
|
-
name:
|
105
|
-
category:
|
106
|
-
url:
|
119
|
+
- regex: 'AppSignalBot'
|
120
|
+
name: 'AppSignalBot'
|
121
|
+
category: 'Site Monitor'
|
122
|
+
url: 'https://docs.appsignal.com/uptime-monitoring/'
|
107
123
|
producer:
|
108
|
-
name:
|
109
|
-
url:
|
124
|
+
name: 'AppSignal'
|
125
|
+
url: 'https://appsignal.com/'
|
110
126
|
|
111
127
|
- regex: 'Arachni'
|
112
128
|
name: 'Arachni'
|
@@ -355,7 +371,7 @@
|
|
355
371
|
name: 'Certified Security Solutions'
|
356
372
|
url: 'https://www.css-security.com/company/about-us/'
|
357
373
|
|
358
|
-
- regex: 'Datadog Agent'
|
374
|
+
- regex: 'Datadog Agent|Datadog/?Synthetics'
|
359
375
|
name: 'Datadog Agent'
|
360
376
|
url: 'https://github.com/DataDog/dd-agent'
|
361
377
|
category: 'Site Monitor'
|
@@ -688,7 +704,7 @@
|
|
688
704
|
name: 'Visual Meta'
|
689
705
|
url: 'https://www.shopalike.cz/'
|
690
706
|
|
691
|
-
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|
|
707
|
+
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Producer)|Google.*/\+/web/snippet'
|
692
708
|
name: 'Googlebot'
|
693
709
|
category: 'Search bot'
|
694
710
|
url: 'http://www.google.com/bot.html'
|
@@ -783,6 +799,14 @@
|
|
783
799
|
name: 'Lighthouse'
|
784
800
|
url: 'https://developers.google.com/web/tools/lighthouse'
|
785
801
|
|
802
|
+
- regex: 'last-modified\.com'
|
803
|
+
name: 'LastMod Bot'
|
804
|
+
category: 'Site Monitor'
|
805
|
+
url: 'https://last-modified.com/en/about'
|
806
|
+
producer:
|
807
|
+
name: ''
|
808
|
+
url: 'https://last-modified.com/en'
|
809
|
+
|
786
810
|
- regex: 'linkdexbot|linkdex\.com'
|
787
811
|
name: 'Linkdex Bot'
|
788
812
|
category: 'Search bot'
|
@@ -830,6 +854,14 @@
|
|
830
854
|
name: ''
|
831
855
|
url: ''
|
832
856
|
|
857
|
+
- regex: 'masscan-ng/([\d+.]+)'
|
858
|
+
name: 'masscan-ng'
|
859
|
+
url: 'https://github.com/bi-zone/masscan-ng'
|
860
|
+
category: 'Crawler'
|
861
|
+
producer:
|
862
|
+
name: 'BIZON, OOO'
|
863
|
+
url: 'https://bi.zone/'
|
864
|
+
|
833
865
|
- regex: 'masscan'
|
834
866
|
name: 'masscan'
|
835
867
|
url: 'https://github.com/robertdavidgraham/masscan'
|
@@ -1049,12 +1081,12 @@
|
|
1049
1081
|
name: 'PHP Server Monitor'
|
1050
1082
|
url: 'http://www.phpservermonitor.org/'
|
1051
1083
|
|
1052
|
-
- regex: '
|
1053
|
-
name: '
|
1084
|
+
- regex: 'Pocket(?:ImageCache|Parser)/([\d+.]+)'
|
1085
|
+
name: 'Pocket'
|
1054
1086
|
category: 'Read-it-later Service'
|
1055
1087
|
url: 'https://getpocket.com/pocketparser_ua'
|
1056
1088
|
producer:
|
1057
|
-
name: '
|
1089
|
+
name: 'Read It Later, Inc.'
|
1058
1090
|
url: 'https://getpocket.com/'
|
1059
1091
|
|
1060
1092
|
- regex: 'PritTorrent'
|
@@ -1317,7 +1349,7 @@
|
|
1317
1349
|
name: 'Slack Technologies'
|
1318
1350
|
url: 'http://slack.com'
|
1319
1351
|
|
1320
|
-
- regex: '(Sogou (
|
1352
|
+
- regex: '(Sogou[ -](head|inst|Orion|Pic|Test|web)[ -]spider)|New-Sogou-Spider'
|
1321
1353
|
name: 'Sogou Spider'
|
1322
1354
|
category: 'Search bot'
|
1323
1355
|
url: 'http://www.sogou.com/docs/help/webmasters.htm'
|
@@ -1828,6 +1860,14 @@
|
|
1828
1860
|
name: 'Snapchat Inc.'
|
1829
1861
|
url: 'https://www.snapchat.com'
|
1830
1862
|
|
1863
|
+
- regex: 'Snap URL Preview Service'
|
1864
|
+
name: 'Snap URL Preview Service'
|
1865
|
+
category: 'Service Agent'
|
1866
|
+
url: 'https://developers.snap.com/robots'
|
1867
|
+
producer:
|
1868
|
+
name: 'Snapchat Inc.'
|
1869
|
+
url: 'https://www.snapchat.com/'
|
1870
|
+
|
1831
1871
|
- regex: "Let's Encrypt validation server"
|
1832
1872
|
name: "Let's Encrypt Validation"
|
1833
1873
|
category: 'Service Agent'
|
@@ -1926,7 +1966,7 @@
|
|
1926
1966
|
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
1927
1967
|
name: 'RSSRadio Bot'
|
1928
1968
|
|
1929
|
-
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
|
1969
|
+
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel)'
|
1930
1970
|
name: 'Generic Bot'
|
1931
1971
|
|
1932
1972
|
- regex: '^sentry'
|
@@ -1935,7 +1975,7 @@
|
|
1935
1975
|
name: 'Sentry'
|
1936
1976
|
url: 'https://sentry.io'
|
1937
1977
|
|
1938
|
-
- regex: '^Spotify'
|
1978
|
+
- regex: '^Spotify/(\d+[\.\d]+)$'
|
1939
1979
|
name: 'Spotify'
|
1940
1980
|
producer:
|
1941
1981
|
name: 'Spotify'
|
@@ -2163,7 +2203,7 @@
|
|
2163
2203
|
category: 'Crawler'
|
2164
2204
|
url: 'https://serendeputy.com/about/serendeputy-bot'
|
2165
2205
|
|
2166
|
-
- regex: 'ias-va.*admantx.*service-fetcher'
|
2206
|
+
- regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx.com.*service-fetcher'
|
2167
2207
|
name: 'ADmantX Service Fetcher'
|
2168
2208
|
category: 'Service bot'
|
2169
2209
|
url: 'https://www.admantx.com/service-fetcher.html'
|
@@ -2349,7 +2389,7 @@
|
|
2349
2389
|
name: ''
|
2350
2390
|
url: ''
|
2351
2391
|
|
2352
|
-
- regex: 'scaninfo@expanseinc.com'
|
2392
|
+
- regex: 'scaninfo@(?:expanseinc|paloaltonetworks).com'
|
2353
2393
|
name: 'Expanse'
|
2354
2394
|
category: 'Security Checker'
|
2355
2395
|
url: 'https://expanse.co/'
|
@@ -2511,6 +2551,11 @@
|
|
2511
2551
|
category: 'Security Checker'
|
2512
2552
|
url: 'https://github.com/LeakIX/l9tcpid'
|
2513
2553
|
|
2554
|
+
- regex: 'l9explore/([\d+\.])'
|
2555
|
+
name: 'l9explore'
|
2556
|
+
category: 'Security Checker'
|
2557
|
+
url: 'https://github.com/LeakIX/l9explore'
|
2558
|
+
|
2514
2559
|
- regex: 'MegaIndex.ru/([\d+\.])'
|
2515
2560
|
name: 'MegaIndex'
|
2516
2561
|
category: 'Crawler'
|
@@ -2639,6 +2684,539 @@
|
|
2639
2684
|
name: 'Hochschule für angewandte Wissenschaften München'
|
2640
2685
|
url: 'https://www.hm.edu/'
|
2641
2686
|
|
2687
|
+
- regex: 'TigerBot/([\d+.]+)'
|
2688
|
+
name: 'TigerBot'
|
2689
|
+
category: 'Crawler'
|
2690
|
+
url: 'https://tiger.ch/'
|
2691
|
+
|
2692
|
+
- regex: 'TestCrawler/([\d+.]+)'
|
2693
|
+
name: 'TestCrawler'
|
2694
|
+
category: 'Crawler'
|
2695
|
+
url: 'https://www.comcepta.com/'
|
2696
|
+
|
2697
|
+
- regex: 'CrowdTanglebot/([\d+.]+)'
|
2698
|
+
name: 'CrowdTangle'
|
2699
|
+
category: 'Crawler'
|
2700
|
+
url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
|
2701
|
+
producer:
|
2702
|
+
name: 'CrowdTangle, Inc.'
|
2703
|
+
url: 'https://www.crowdtangle.com/'
|
2704
|
+
|
2705
|
+
- regex: 'Sellers.Guide Crawler by Primis'
|
2706
|
+
name: 'Sellers.Guide'
|
2707
|
+
category: 'Crawler'
|
2708
|
+
url: 'https://sellers.guide/'
|
2709
|
+
producer:
|
2710
|
+
name: 'McCann Disciplines, Ltd.'
|
2711
|
+
url: 'https://www.primis.tech/'
|
2712
|
+
|
2713
|
+
- regex: 'OnalyticaBot'
|
2714
|
+
name: 'Onalytica'
|
2715
|
+
category: 'Crawler'
|
2716
|
+
url: 'https://www.airslate.com/bot/explore/onalytica-bot'
|
2717
|
+
producer:
|
2718
|
+
name: 'airSlate, Inc.'
|
2719
|
+
url: 'https://www.airslate.com/'
|
2720
|
+
|
2721
|
+
- regex: 'deepnoc'
|
2722
|
+
name: 'deepnoc'
|
2723
|
+
category: 'Crawler'
|
2724
|
+
url: 'https://deepnoc.com/bot'
|
2725
|
+
producer:
|
2726
|
+
name: 'deepnoc, GmbH'
|
2727
|
+
url: 'https://deepnoc.com/'
|
2728
|
+
|
2729
|
+
- regex: 'Newslitbot/([\d+.]+)'
|
2730
|
+
name: 'Newslitbot'
|
2731
|
+
category: 'Crawler'
|
2732
|
+
url: 'https://www.newslit.co/'
|
2733
|
+
producer:
|
2734
|
+
name: 'Newslit, LLC.'
|
2735
|
+
url: 'https://www.newslit.co/'
|
2736
|
+
|
2737
|
+
- regex: 'um-LN/([\d+.]+)'
|
2738
|
+
name: 'uMBot'
|
2739
|
+
category: 'Crawler'
|
2740
|
+
url: 'https://www.ubermetrics-technologies.com/'
|
2741
|
+
producer:
|
2742
|
+
name: 'Ubermetrics Technologies GmbH'
|
2743
|
+
url: 'https://www.ubermetrics-technologies.com/'
|
2744
|
+
|
2745
|
+
- regex: 'Abonti/([\d+.]+)'
|
2746
|
+
name: 'Abonti'
|
2747
|
+
category: 'Crawler'
|
2748
|
+
url: 'http://abonti.com/'
|
2749
|
+
|
2750
|
+
- regex: 'collection@infegy.com'
|
2751
|
+
name: 'Infegy'
|
2752
|
+
category: 'Crawler'
|
2753
|
+
url: 'https://infegy.com/'
|
2754
|
+
producer:
|
2755
|
+
name: 'Infegy, Inc.'
|
2756
|
+
url: 'https://infegy.com/'
|
2757
|
+
|
2758
|
+
- regex: 'HTTP Banner Detection \(https://security.ipip.net\)'
|
2759
|
+
name: 'IPIP'
|
2760
|
+
category: 'Security Checker'
|
2761
|
+
url: 'https://security.ipip.net/'
|
2762
|
+
producer:
|
2763
|
+
name: 'Beijing Tiantexin Tech. Co., Ltd.'
|
2764
|
+
url: 'https://en.ipip.net/'
|
2765
|
+
|
2766
|
+
- regex: 'ev-crawler/([\d+.]+)'
|
2767
|
+
name: 'Headline'
|
2768
|
+
category: 'Crawler'
|
2769
|
+
url: 'https://headline.com/legal/crawler'
|
2770
|
+
producer:
|
2771
|
+
name: 'e.ventures Managementgesellschaft mbH'
|
2772
|
+
url: 'https://headline.com/'
|
2773
|
+
|
2774
|
+
- regex: 'webprosbot/([\d+.]+)'
|
2775
|
+
name: 'WebPros'
|
2776
|
+
category: 'Crawler'
|
2777
|
+
url: 'https://webpros.com/'
|
2778
|
+
producer:
|
2779
|
+
name: 'WebPros Holdco B.V.'
|
2780
|
+
url: 'https://webpros.com/'
|
2781
|
+
|
2782
|
+
- regex: 'ELB-HealthChecker'
|
2783
|
+
name: 'Amazon ELB'
|
2784
|
+
category: 'Site Monitor'
|
2785
|
+
url: 'https://aws.amazon.com/elasticloadbalancing/'
|
2786
|
+
producer:
|
2787
|
+
name: 'Amazon.com, Inc.'
|
2788
|
+
url: 'https://www.amazon.com/'
|
2789
|
+
|
2790
|
+
- regex: 'Wheregoes.com Redirect Checker/([\d+.]+)'
|
2791
|
+
name: 'WhereGoes'
|
2792
|
+
category: 'Crawler'
|
2793
|
+
url: 'https://wheregoes.com/'
|
2794
|
+
|
2795
|
+
- regex: 'project_patchwatch'
|
2796
|
+
name: 'Project Patchwatch'
|
2797
|
+
category: 'Crawler'
|
2798
|
+
url: 'http://66.240.192.82/'
|
2799
|
+
|
2800
|
+
- regex: 'InternetMeasurement/([\d+.]+)'
|
2801
|
+
name: 'InternetMeasurement'
|
2802
|
+
category: 'Crawler'
|
2803
|
+
url: 'https://internet-measurement.com/'
|
2804
|
+
|
2805
|
+
- regex: 'DomainAppender /([\d+.]+)'
|
2806
|
+
name: 'DomainAppender'
|
2807
|
+
category: 'Crawler'
|
2808
|
+
url: 'https://www.profound.net/product/domain_append/'
|
2809
|
+
producer:
|
2810
|
+
name: 'Profound Networks, LLC'
|
2811
|
+
url: 'https://www.profound.net/'
|
2812
|
+
|
2813
|
+
- regex: 'FreeWebMonitoring SiteChecker/([\d+.]+)'
|
2814
|
+
name: 'FreeWebMonitoring'
|
2815
|
+
category: 'Site Monitor'
|
2816
|
+
url: 'https://www.freewebmonitoring.com/bot.html'
|
2817
|
+
producer:
|
2818
|
+
name: 'GreenWave Online, Inc.'
|
2819
|
+
url: 'http://www.greenwaveonline.com/'
|
2820
|
+
|
2821
|
+
- regex: 'Page Modified Pinger'
|
2822
|
+
name: 'Page Modified Pinger'
|
2823
|
+
category: 'Site Monitor'
|
2824
|
+
url: 'https://www.pagemodified.com/'
|
2825
|
+
producer:
|
2826
|
+
name: 'Valley Hosting, LLC'
|
2827
|
+
url: 'https://www.pagemodified.com/'
|
2828
|
+
|
2829
|
+
- regex: 'adstxtlab.com'
|
2830
|
+
name: 'adstxtlab.com'
|
2831
|
+
category: 'Crawler'
|
2832
|
+
url: 'https://adstxtlab.com/validator.php'
|
2833
|
+
producer:
|
2834
|
+
name: 'Jaohawi AB'
|
2835
|
+
url: 'https://adstxtlab.com/'
|
2836
|
+
|
2837
|
+
- regex: 'Iframely/([\d+.]+)'
|
2838
|
+
name: 'Iframely'
|
2839
|
+
category: 'Crawler'
|
2840
|
+
url: 'https://iframely.com/'
|
2841
|
+
producer:
|
2842
|
+
name: 'Itteco Software, Corp.'
|
2843
|
+
url: 'https://iframely.com/'
|
2844
|
+
|
2845
|
+
- regex: 'DomainStatsBot/([\d+.]+)'
|
2846
|
+
name: 'DomainStatsBot'
|
2847
|
+
category: 'Crawler'
|
2848
|
+
url: 'https://domainstats.com/pages/our-bot'
|
2849
|
+
producer:
|
2850
|
+
name: 'Domainstats Ltd'
|
2851
|
+
url: 'https://domainstats.com/'
|
2852
|
+
|
2853
|
+
- regex: 'aiHitBot/([\d+.]+)'
|
2854
|
+
name: 'aiHitBot'
|
2855
|
+
category: 'Crawler'
|
2856
|
+
url: 'https://www.aihitdata.com/about'
|
2857
|
+
|
2858
|
+
- regex: 'DomainCrawler/'
|
2859
|
+
name: 'DomainCrawler'
|
2860
|
+
category: 'Crawler'
|
2861
|
+
url: 'https://domaincrawler.com/about-us/'
|
2862
|
+
|
2863
|
+
- regex: 'DNSResearchBot'
|
2864
|
+
name: 'DNSResearchBot'
|
2865
|
+
category: 'Crawler'
|
2866
|
+
|
2867
|
+
- regex: 'GitCrawlerBot'
|
2868
|
+
name: 'GitCrawlerBot'
|
2869
|
+
category: 'Crawler'
|
2870
|
+
|
2871
|
+
- regex: 'AdAuth/([\d+.]+)'
|
2872
|
+
name: 'AdAuth'
|
2873
|
+
category: 'Crawler'
|
2874
|
+
url: 'https://www.adauth.com'
|
2875
|
+
|
2876
|
+
- regex: 'faveeo.com'
|
2877
|
+
name: 'Faveeo'
|
2878
|
+
category: 'Crawler'
|
2879
|
+
url: 'http://www.faveeo.com'
|
2880
|
+
|
2881
|
+
- regex: 'kozmonavt\.'
|
2882
|
+
name: 'Kozmonavt'
|
2883
|
+
category: 'Crawler'
|
2884
|
+
url: 'https://kozmonavt.ml'
|
2885
|
+
|
2886
|
+
- regex: 'CriteoBot/'
|
2887
|
+
name: 'CriteoBot'
|
2888
|
+
category: 'Crawler'
|
2889
|
+
url: 'https://www.criteo.com/criteo-crawler/'
|
2890
|
+
|
2891
|
+
- regex: 'PayPal IPN'
|
2892
|
+
name: 'PayPal IPN'
|
2893
|
+
category: 'Service Agent'
|
2894
|
+
url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/'
|
2895
|
+
producer:
|
2896
|
+
name: 'PayPal, Inc.'
|
2897
|
+
url: 'https://www.paypal.com/'
|
2898
|
+
|
2899
|
+
- regex: 'MaCoCu'
|
2900
|
+
name: 'MaCoCu'
|
2901
|
+
category: 'Crawler'
|
2902
|
+
url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/'
|
2903
|
+
producer:
|
2904
|
+
name: 'Jožef Stefan Institute'
|
2905
|
+
url: 'https://www.ijs.si/ijsw/JSI'
|
2906
|
+
|
2907
|
+
- regex: 'dnt-policy@eff.org'
|
2908
|
+
name: 'EFF Do Not Track Verifier'
|
2909
|
+
category: 'Crawler'
|
2910
|
+
url: 'https://www.eff.org/issues/do-not-track'
|
2911
|
+
producer:
|
2912
|
+
name: 'Electronic Frontier Foundation'
|
2913
|
+
url: 'https://www.eff.org/'
|
2914
|
+
|
2915
|
+
- regex: 'InfoTigerBot'
|
2916
|
+
name: 'InfoTigerBot'
|
2917
|
+
category: 'Crawler'
|
2918
|
+
url: 'https://infotiger.com/bot'
|
2919
|
+
producer:
|
2920
|
+
name: 'Infotiger UG'
|
2921
|
+
url: 'https://infotiger.com/'
|
2922
|
+
|
2923
|
+
- regex: '(?:Birdcrawlerbot|CrawlaDeBot)'
|
2924
|
+
name: 'Birdcrawlerbot'
|
2925
|
+
category: 'Crawler'
|
2926
|
+
url: 'https://crawla.de/de/index.php'
|
2927
|
+
producer:
|
2928
|
+
name: 'Swoppen Systems GmbH'
|
2929
|
+
url: 'https://www.swoppen.com/de'
|
2930
|
+
|
2931
|
+
- regex: 'ScamadviserExternalHit/([\d+.]+)'
|
2932
|
+
name: 'Scamadviser External Hit'
|
2933
|
+
category: 'Crawler'
|
2934
|
+
url: 'https://www.scamadviser.com/'
|
2935
|
+
producer:
|
2936
|
+
name: 'Ecommerce Operations B.V.'
|
2937
|
+
url: 'https://www.scamadviser.com/'
|
2938
|
+
|
2939
|
+
- regex: 'ZaldamoSearchBot'
|
2940
|
+
name: 'Zaldamo'
|
2941
|
+
category: 'Crawler'
|
2942
|
+
url: 'https://www.zaldamo.com/search.html'
|
2943
|
+
producer:
|
2944
|
+
name: 'Project Orlando, LLC.'
|
2945
|
+
url: 'https://www.projectorlando.com/'
|
2946
|
+
|
2947
|
+
- regex: 'AFB/([\d+.]+)'
|
2948
|
+
name: 'Allloadin Favicon Bot'
|
2949
|
+
category: 'Crawler'
|
2950
|
+
url: 'https://allloadin.com/'
|
2951
|
+
|
2952
|
+
- regex: 'SeolytBot/([\d+.]+)'
|
2953
|
+
name: 'Seolyt Bot'
|
2954
|
+
category: 'Crawler'
|
2955
|
+
url: 'https://seolyt.com'
|
2956
|
+
|
2957
|
+
- regex: 'LinkWalker/([\d+.]+)'
|
2958
|
+
name: 'LinkWalker'
|
2959
|
+
category: 'Crawler'
|
2960
|
+
url: 'https://www.phishlabs.com/'
|
2961
|
+
producer:
|
2962
|
+
name: 'PhishLabs, Inc.'
|
2963
|
+
url: 'https://www.phishlabs.com/'
|
2964
|
+
|
2965
|
+
- regex: 'RenovateBot/([\d+.]+)'
|
2966
|
+
name: 'RenovateBot'
|
2967
|
+
category: 'Security Checker'
|
2968
|
+
url: 'https://github.com/renovatebot/renovate'
|
2969
|
+
producer:
|
2970
|
+
name: 'White Source Ltd.'
|
2971
|
+
url: 'https://www.mend.io/free-developer-tools/renovate/'
|
2972
|
+
|
2973
|
+
- regex: 'INETDEX-BOT/([\d+.]+)'
|
2974
|
+
name: 'Inetdex Bot'
|
2975
|
+
category: 'Crawler'
|
2976
|
+
url: 'https://www.inetdex.com/'
|
2977
|
+
|
2978
|
+
- regex: 'NETZZAPPEN'
|
2979
|
+
name: 'NETZZAPPEN'
|
2980
|
+
category: 'Crawler'
|
2981
|
+
url: 'https://www.netzzappen.com/'
|
2982
|
+
producer:
|
2983
|
+
name: 'Marc Huemer'
|
2984
|
+
url: 'https://www.netzzappen.com/'
|
2985
|
+
|
2986
|
+
- regex: 'SerpReputationManagementAgent/([\d+.]+)'
|
2987
|
+
name: 'SEMrush Reputation Management'
|
2988
|
+
category: 'Service Agent'
|
2989
|
+
url: 'https://www.semrush.com/bot/'
|
2990
|
+
producer:
|
2991
|
+
name: 'SEMrush'
|
2992
|
+
url: 'https://www.semrush.com/'
|
2993
|
+
|
2994
|
+
- regex: 'panscient.com'
|
2995
|
+
name: 'Panscient'
|
2996
|
+
category: 'Crawler'
|
2997
|
+
url: 'https://www.panscient.com/faq.htm'
|
2998
|
+
producer:
|
2999
|
+
name: 'Panscient, Inc.'
|
3000
|
+
url: 'https://www.panscient.com/'
|
3001
|
+
|
3002
|
+
- regex: 'research@pdrlabs.net'
|
3003
|
+
name: 'PDR Labs'
|
3004
|
+
category: 'Security Checker'
|
3005
|
+
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
3006
|
+
producer:
|
3007
|
+
name: 'PDR Labs'
|
3008
|
+
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
3009
|
+
|
3010
|
+
- regex: 'Nicecrawler/([\d+.]+)'
|
3011
|
+
name: 'NiceCrawler'
|
3012
|
+
category: 'Crawler'
|
3013
|
+
url: 'https://www.nicecrawler.com/'
|
3014
|
+
producer:
|
3015
|
+
name: 'Intelium Corp.'
|
3016
|
+
url: 'https://www.intelium.com/'
|
3017
|
+
|
3018
|
+
- regex: 't3versionsBot/([\d+.]+)'
|
3019
|
+
name: 't3versions'
|
3020
|
+
category: 'Crawler'
|
3021
|
+
url: 'https://www.t3versions.com/bot'
|
3022
|
+
producer:
|
3023
|
+
name: 'Torben Hansen'
|
3024
|
+
url: 'https://www.t3versions.com/'
|
3025
|
+
|
3026
|
+
- regex: 'Crawlson/([\d+.]+)'
|
3027
|
+
name: 'Crawlson'
|
3028
|
+
category: 'Crawler'
|
3029
|
+
url: 'https://www.crawlson.com/about'
|
3030
|
+
producer:
|
3031
|
+
name: 'Crawlson'
|
3032
|
+
url: 'https://www.crawlson.com/'
|
3033
|
+
|
3034
|
+
- regex: 'tchelebi/([\d+.]+)'
|
3035
|
+
name: 'tchelebi'
|
3036
|
+
category: 'Crawler'
|
3037
|
+
url: 'https://tchelebi.io/'
|
3038
|
+
producer:
|
3039
|
+
name: 'NormShield, Inc.'
|
3040
|
+
url: 'https://blackkite.com/'
|
3041
|
+
|
3042
|
+
- regex: 'JobboerseBot'
|
3043
|
+
name: 'JobboerseBot'
|
3044
|
+
category: 'Crawler'
|
3045
|
+
url: 'https://www.xing.com/jobs'
|
3046
|
+
producer:
|
3047
|
+
name: 'New Work SE'
|
3048
|
+
url: 'https://www.xing.com/'
|
3049
|
+
|
3050
|
+
- regex: '^Lkx-(.*)/([\d+.]+)'
|
3051
|
+
name: 'LeakIX'
|
3052
|
+
category: 'Security Checker'
|
3053
|
+
url: 'https://leakix.net/'
|
3054
|
+
producer:
|
3055
|
+
name: 'BaDaaS SRL'
|
3056
|
+
url: 'https://leakix.net/'
|
3057
|
+
|
3058
|
+
- regex: 'RepoLookoutBot/([\d+.]+)'
|
3059
|
+
name: 'Repo Lookout'
|
3060
|
+
category: 'Security Checker'
|
3061
|
+
url: 'https://www.repo-lookout.org/'
|
3062
|
+
producer:
|
3063
|
+
name: 'Crissy Field GmbH'
|
3064
|
+
url: 'https://www.crissyfield.de/'
|
3065
|
+
|
3066
|
+
- regex: 'PATHspider'
|
3067
|
+
name: 'PATHspider'
|
3068
|
+
category: 'Security Checker'
|
3069
|
+
url: 'https://pathspider.net/'
|
3070
|
+
producer:
|
3071
|
+
name: 'MAMI Project'
|
3072
|
+
url: 'https://mami-project.eu/'
|
3073
|
+
|
3074
|
+
- regex: 'everyfeed-spider/([\d+.]+)'
|
3075
|
+
name: 'Everyfeed'
|
3076
|
+
url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
|
3077
|
+
category: 'Feed Fetcher'
|
3078
|
+
producer:
|
3079
|
+
name: ''
|
3080
|
+
url: ''
|
3081
|
+
|
3082
|
+
- regex: 'Exchange check'
|
3083
|
+
name: 'Exchange check'
|
3084
|
+
category: 'Security Checker'
|
3085
|
+
url: 'https://github.com/GossiTheDog/scanning'
|
3086
|
+
producer:
|
3087
|
+
name: 'Kevin Beaumont'
|
3088
|
+
url: 'https://doublepulsar.com/'
|
3089
|
+
|
3090
|
+
- regex: 'Sublinq'
|
3091
|
+
name: 'Sublinq'
|
3092
|
+
category: 'Crawler'
|
3093
|
+
url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/'
|
3094
|
+
producer:
|
3095
|
+
name: ''
|
3096
|
+
url: ''
|
3097
|
+
|
3098
|
+
- regex: 'Gregarius/([\d+.]+)'
|
3099
|
+
name: 'Gregarius'
|
3100
|
+
category: 'Feed Fetcher'
|
3101
|
+
url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
|
3102
|
+
producer:
|
3103
|
+
name: ''
|
3104
|
+
url: ''
|
3105
|
+
|
3106
|
+
- regex: 'COMODO DCV'
|
3107
|
+
name: 'COMODO DCV'
|
3108
|
+
category: 'Service Agent'
|
3109
|
+
url: 'https://www.comodo.com/'
|
3110
|
+
producer:
|
3111
|
+
name: 'Comodo Security Solutions, Inc.'
|
3112
|
+
url: 'https://www.comodo.com/'
|
3113
|
+
|
3114
|
+
- regex: 'Sectigo DCV'
|
3115
|
+
name: 'Sectigo DCV'
|
3116
|
+
category: 'Service Agent'
|
3117
|
+
url: 'https://sectigo.com/'
|
3118
|
+
producer:
|
3119
|
+
name: 'Sectigo Limited'
|
3120
|
+
url: 'https://sectigo.com/'
|
3121
|
+
|
3122
|
+
- regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/([\d+.]+)'
|
3123
|
+
name: 'KlarnaBot'
|
3124
|
+
category: 'Crawler'
|
3125
|
+
url: 'https://docs.klarna.com/klarna-bot/'
|
3126
|
+
producer:
|
3127
|
+
name: 'Klarna Bank AB'
|
3128
|
+
url: 'https://www.klarna.com/'
|
3129
|
+
|
3130
|
+
- regex: 'Taboolabot/([\d+.]+)'
|
3131
|
+
name: 'Taboolabot'
|
3132
|
+
category: 'Crawler'
|
3133
|
+
url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
|
3134
|
+
producer:
|
3135
|
+
name: 'Taboola, Inc.'
|
3136
|
+
url: 'https://www.taboola.com/'
|
3137
|
+
|
3138
|
+
- regex: 'Asana/([\d+.]+)'
|
3139
|
+
name: 'Asana'
|
3140
|
+
category: 'Crawler'
|
3141
|
+
url: 'https://asana.com/'
|
3142
|
+
producer:
|
3143
|
+
name: 'Asana, Inc.'
|
3144
|
+
url: 'https://asana.com/'
|
3145
|
+
|
3146
|
+
- regex: 'Chrome Privacy Preserving Prefetch Proxy'
|
3147
|
+
name: 'Chrome Privacy Preserving Prefetch Proxy'
|
3148
|
+
category: 'Service Agent'
|
3149
|
+
url: 'https://developer.chrome.com/blog/private-prefetch-proxy/'
|
3150
|
+
producer:
|
3151
|
+
name: 'Google Inc.'
|
3152
|
+
url: 'https://www.google.com/'
|
3153
|
+
|
3154
|
+
- regex: 'URLinspectorBot/([\d+.]+)'
|
3155
|
+
name: 'URLinspector'
|
3156
|
+
category: 'Site Monitor'
|
3157
|
+
url: 'https://www.urlinspector.com/bot/'
|
3158
|
+
producer:
|
3159
|
+
name: 'LinkResearchTools GmbH'
|
3160
|
+
url: 'https://www.linkresearchtools.com/'
|
3161
|
+
|
3162
|
+
- regex: 'EntferBot/([\d+.]+)'
|
3163
|
+
name: 'Entfer'
|
3164
|
+
category: 'Crawler'
|
3165
|
+
url: 'https://entfer.com/'
|
3166
|
+
producer:
|
3167
|
+
name: 'Entfer Ltd.'
|
3168
|
+
url: 'https://entfer.com/'
|
3169
|
+
|
3170
|
+
- regex: 'TagInspector/([\d+.]+)'
|
3171
|
+
name: 'Tag Inspector'
|
3172
|
+
category: 'Crawler'
|
3173
|
+
url: 'https://taginspector.com/'
|
3174
|
+
producer:
|
3175
|
+
name: 'InfoTrust, LLC'
|
3176
|
+
url: 'https://infotrust.com/'
|
3177
|
+
|
3178
|
+
- regex: 'pageburst'
|
3179
|
+
name: 'Pageburst'
|
3180
|
+
category: 'Crawler'
|
3181
|
+
url: 'https://pageburstls.elsevier.com/'
|
3182
|
+
producer:
|
3183
|
+
name: 'Elsevier Ltd'
|
3184
|
+
url: 'https://www.elsevier.com/'
|
3185
|
+
|
3186
|
+
- regex: '.+diffbot'
|
3187
|
+
name: 'Diffbot'
|
3188
|
+
category: 'Crawler'
|
3189
|
+
url: 'https://docs.diffbot.com/docs/getting-started-with-crawl'
|
3190
|
+
producer:
|
3191
|
+
name: 'Diffbot Technologies Corp.'
|
3192
|
+
url: 'https://www.diffbot.com/'
|
3193
|
+
|
3194
|
+
- regex: 'DisqusAdstxtCrawler/([\d+.]+)'
|
3195
|
+
name: 'Disqus'
|
3196
|
+
category: 'Crawler'
|
3197
|
+
url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
|
3198
|
+
producer:
|
3199
|
+
name: 'Disqus, Inc.'
|
3200
|
+
url: 'https://disqus.com/'
|
3201
|
+
|
3202
|
+
- regex: 'startmebot/([\d+.]+)'
|
3203
|
+
name: 'start.me'
|
3204
|
+
category: 'Crawler'
|
3205
|
+
url: 'https://about.start.me/'
|
3206
|
+
producer:
|
3207
|
+
name: 'start.me BV'
|
3208
|
+
url: 'https://about.start.me/'
|
3209
|
+
|
3210
|
+
- regex: '2ip bot/([\d+.]+)'
|
3211
|
+
name: '2ip'
|
3212
|
+
category: 'Crawler'
|
3213
|
+
url: 'https://2ip.io/'
|
3214
|
+
|
3215
|
+
- regex: 'ReqBin Curl Client/([\d+.]+)'
|
3216
|
+
name: 'ReqBin'
|
3217
|
+
category: 'Crawler'
|
3218
|
+
url: 'https://reqbin.com/curl'
|
3219
|
+
|
2642
3220
|
# Generic detections
|
2643
3221
|
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
|
2644
3222
|
name: 'Generic Bot'
|