device_detector 1.0.7 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/regexes/bots.yml CHANGED
@@ -5,6 +5,22 @@
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
+ - regex: 'monitoring360bot'
9
+ name: '360 Monitoring'
10
+ category: 'Site Monitor'
11
+ url: 'https://www.360monitoring.io'
12
+ producer:
13
+ name: 'Plesk International GmbH'
14
+ url: 'https://www.plesk.com'
15
+
16
+ - regex: 'Cloudflare-Healthchecks'
17
+ name: 'Cloudflare Health Checks'
18
+ category: 'Service Agent'
19
+ url: 'https://developers.cloudflare.com/health-checks/'
20
+ producer:
21
+ name: 'CloudFlare'
22
+ url: 'http://www.cloudflare.com'
23
+
8
24
  - regex: '360Spider'
9
25
  name: '360Spider'
10
26
  category: 'Search bot'
@@ -45,6 +61,14 @@
45
61
  name: 'Ahrefs Pte Ltd'
46
62
  url: 'https://ahrefs.com/robot'
47
63
 
64
+ - regex: 'AhrefsSiteAudit/([\d+.]+)'
65
+ name: 'AhrefsSiteAudit'
66
+ category: 'Site Monitor'
67
+ url: 'https://ahrefs.com/robot/site-audit'
68
+ producer:
69
+ name: 'Ahrefs Pte Ltd'
70
+ url: 'https://ahrefs.com/'
71
+
48
72
  - regex: 'ia_archiver|alexabot|verifybot'
49
73
  name: 'Alexa Crawler'
50
74
  category: 'Search bot'
@@ -100,13 +124,13 @@
100
124
  name: 'Apple Inc'
101
125
  url: 'https://www.apple.com'
102
126
 
103
- - regex: "AppSignalBot"
104
- name: "AppSignalBot"
105
- category: "Site Monitor"
106
- url: "https://docs.appsignal.com/uptime-monitoring/"
127
+ - regex: 'AppSignalBot'
128
+ name: 'AppSignalBot'
129
+ category: 'Site Monitor'
130
+ url: 'https://docs.appsignal.com/uptime-monitoring/'
107
131
  producer:
108
- name: "AppSignal"
109
- url: "https://appsignal.com/"
132
+ name: 'AppSignal'
133
+ url: 'https://appsignal.com/'
110
134
 
111
135
  - regex: 'Arachni'
112
136
  name: 'Arachni'
@@ -355,7 +379,7 @@
355
379
  name: 'Certified Security Solutions'
356
380
  url: 'https://www.css-security.com/company/about-us/'
357
381
 
358
- - regex: 'Datadog Agent'
382
+ - regex: 'Datadog Agent|Datadog/?Synthetics'
359
383
  name: 'Datadog Agent'
360
384
  url: 'https://github.com/DataDog/dd-agent'
361
385
  category: 'Site Monitor'
@@ -688,7 +712,7 @@
688
712
  name: 'Visual Meta'
689
713
  url: 'https://www.shopalike.cz/'
690
714
 
691
- - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
715
+ - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Prober|Producer)|Google.*/\+/web/snippet'
692
716
  name: 'Googlebot'
693
717
  category: 'Search bot'
694
718
  url: 'http://www.google.com/bot.html'
@@ -711,6 +735,11 @@
711
735
  name: 'HubSpot Inc.'
712
736
  url: 'https://www.hubspot.com'
713
737
 
738
+ - regex: 'vuhuvBot'
739
+ name: 'Vuhuv Bot'
740
+ category: 'Crawler'
741
+ url: 'http://vuhuv.com/bot.html'
742
+
714
743
  - regex: 'HTTPMon'
715
744
  name: 'HTTPMon'
716
745
  category: 'Site Monitor'
@@ -783,6 +812,14 @@
783
812
  name: 'Lighthouse'
784
813
  url: 'https://developers.google.com/web/tools/lighthouse'
785
814
 
815
+ - regex: 'last-modified\.com'
816
+ name: 'LastMod Bot'
817
+ category: 'Site Monitor'
818
+ url: 'https://last-modified.com/en/about'
819
+ producer:
820
+ name: ''
821
+ url: 'https://last-modified.com/en'
822
+
786
823
  - regex: 'linkdexbot|linkdex\.com'
787
824
  name: 'Linkdex Bot'
788
825
  category: 'Search bot'
@@ -830,6 +867,14 @@
830
867
  name: ''
831
868
  url: ''
832
869
 
870
+ - regex: 'masscan-ng/([\d+.]+)'
871
+ name: 'masscan-ng'
872
+ url: 'https://github.com/bi-zone/masscan-ng'
873
+ category: 'Crawler'
874
+ producer:
875
+ name: 'BIZON, OOO'
876
+ url: 'https://bi.zone/'
877
+
833
878
  - regex: 'masscan'
834
879
  name: 'masscan'
835
880
  url: 'https://github.com/robertdavidgraham/masscan'
@@ -988,6 +1033,14 @@
988
1033
  - regex: 'Octopus [0-9]'
989
1034
  name: 'Octopus'
990
1035
 
1036
+ - regex: 'OnlineOrNot.com_bot'
1037
+ name: 'OnlineOrNot Bot'
1038
+ category: 'Site Monitor'
1039
+ url: 'https://onlineornot.com/website-monitoring'
1040
+ producer:
1041
+ name: 'OnlineOrNot'
1042
+ url: 'https://onlineornot.com'
1043
+
991
1044
  - regex: 'omgili'
992
1045
  name: 'Omgili bot'
993
1046
  category: 'Search bot'
@@ -1049,12 +1102,12 @@
1049
1102
  name: 'PHP Server Monitor'
1050
1103
  url: 'http://www.phpservermonitor.org/'
1051
1104
 
1052
- - regex: 'PocketParser'
1053
- name: 'PocketParser'
1105
+ - regex: 'Pocket(?:ImageCache|Parser)/([\d+.]+)'
1106
+ name: 'Pocket'
1054
1107
  category: 'Read-it-later Service'
1055
1108
  url: 'https://getpocket.com/pocketparser_ua'
1056
1109
  producer:
1057
- name: 'Pocket'
1110
+ name: 'Read It Later, Inc.'
1058
1111
  url: 'https://getpocket.com/'
1059
1112
 
1060
1113
  - regex: 'PritTorrent'
@@ -1317,7 +1370,7 @@
1317
1370
  name: 'Slack Technologies'
1318
1371
  url: 'http://slack.com'
1319
1372
 
1320
- - regex: '(Sogou (web|inst|Pic) spider)|New-Sogou-Spider'
1373
+ - regex: '(Sogou[ -](head|inst|Orion|Pic|Test|web)[ -]spider)|New-Sogou-Spider'
1321
1374
  name: 'Sogou Spider'
1322
1375
  category: 'Search bot'
1323
1376
  url: 'http://www.sogou.com/docs/help/webmasters.htm'
@@ -1828,6 +1881,14 @@
1828
1881
  name: 'Snapchat Inc.'
1829
1882
  url: 'https://www.snapchat.com'
1830
1883
 
1884
+ - regex: 'Snap URL Preview Service'
1885
+ name: 'Snap URL Preview Service'
1886
+ category: 'Service Agent'
1887
+ url: 'https://developers.snap.com/robots'
1888
+ producer:
1889
+ name: 'Snapchat Inc.'
1890
+ url: 'https://www.snapchat.com/'
1891
+
1831
1892
  - regex: "Let's Encrypt validation server"
1832
1893
  name: "Let's Encrypt Validation"
1833
1894
  category: 'Service Agent'
@@ -1926,7 +1987,7 @@
1926
1987
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1927
1988
  name: 'RSSRadio Bot'
1928
1989
 
1929
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
1990
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel)'
1930
1991
  name: 'Generic Bot'
1931
1992
 
1932
1993
  - regex: '^sentry'
@@ -1935,7 +1996,7 @@
1935
1996
  name: 'Sentry'
1936
1997
  url: 'https://sentry.io'
1937
1998
 
1938
- - regex: '^Spotify'
1999
+ - regex: '^Spotify/(\d+[\.\d]+)$'
1939
2000
  name: 'Spotify'
1940
2001
  producer:
1941
2002
  name: 'Spotify'
@@ -1985,7 +2046,15 @@
1985
2046
  name: 'WooRank sprl'
1986
2047
  url: 'https://www.woorank.com/'
1987
2048
 
1988
- - regex: '(Match|LinkCheck) by Siteimprove.com'
2049
+ - regex: 'by Siteimprove\.com'
2050
+ name: 'Siteimprove'
2051
+ category: 'Search bot'
2052
+ url: 'https://siteimprove.com/'
2053
+ producer:
2054
+ name: 'Siteimprove GmbH'
2055
+ url: 'https://siteimprove.com/'
2056
+
2057
+ - regex: 'Image size by Siteimprove\.com'
1989
2058
  name: 'Siteimprove'
1990
2059
  category: 'Search bot'
1991
2060
  url: 'https://siteimprove.com/'
@@ -2113,6 +2182,14 @@
2113
2182
  name: 'Startpagina B.V.'
2114
2183
  url: 'https://www.startpagina.nl/'
2115
2184
 
2185
+ - regex: 'MoodleBot-Linkchecker'
2186
+ name: 'MoodleBot Linkchecker'
2187
+ category: 'Search bot'
2188
+ url: 'hhttps://docs.moodle.org/en/Usage'
2189
+ producer:
2190
+ name: 'Moodle Pty Ltd'
2191
+ url: 'https://moodle.org/'
2192
+
2116
2193
  - regex: 'GTmetrix'
2117
2194
  name: 'GTmetrix'
2118
2195
  category: 'Crawler'
@@ -2163,7 +2240,7 @@
2163
2240
  category: 'Crawler'
2164
2241
  url: 'https://serendeputy.com/about/serendeputy-bot'
2165
2242
 
2166
- - regex: 'ias-va.*admantx.*service-fetcher'
2243
+ - regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx.com.*service-fetcher'
2167
2244
  name: 'ADmantX Service Fetcher'
2168
2245
  category: 'Service bot'
2169
2246
  url: 'https://www.admantx.com/service-fetcher.html'
@@ -2349,7 +2426,7 @@
2349
2426
  name: ''
2350
2427
  url: ''
2351
2428
 
2352
- - regex: 'scaninfo@expanseinc.com'
2429
+ - regex: 'scaninfo@(?:expanseinc|paloaltonetworks).com'
2353
2430
  name: 'Expanse'
2354
2431
  category: 'Security Checker'
2355
2432
  url: 'https://expanse.co/'
@@ -2372,6 +2449,13 @@
2372
2449
  producer:
2373
2450
  name: 'Hatena Co., Ltd.'
2374
2451
  url: 'https://www.hatena.ne.jp'
2452
+ - regex: 'Hatena-?Bookmark'
2453
+ name: 'Hatena Bookmark'
2454
+ category: 'Crawler'
2455
+ url: 'https://www.hatena.ne.jp/faq/'
2456
+ producer:
2457
+ name: 'Hatena Co., Ltd.'
2458
+ url: 'https://www.hatena.ne.jp'
2375
2459
 
2376
2460
  - regex: 'RyowlEngine/(\d+)'
2377
2461
  name: 'Ryowl'
@@ -2511,6 +2595,19 @@
2511
2595
  category: 'Security Checker'
2512
2596
  url: 'https://github.com/LeakIX/l9tcpid'
2513
2597
 
2598
+ - regex: 'l9explore/([\d+\.])'
2599
+ name: 'l9explore'
2600
+ category: 'Security Checker'
2601
+ url: 'https://github.com/LeakIX/l9explore'
2602
+
2603
+ - regex: 'l9scan/|^Lkx-(.*)/([\d+.]+)'
2604
+ name: 'LeakIX'
2605
+ category: 'Security Checker'
2606
+ url: 'https://leakix.net/'
2607
+ producer:
2608
+ name: 'BaDaaS SRL'
2609
+ url: 'https://leakix.net/'
2610
+
2514
2611
  - regex: 'MegaIndex.ru/([\d+\.])'
2515
2612
  name: 'MegaIndex'
2516
2613
  category: 'Crawler'
@@ -2639,6 +2736,649 @@
2639
2736
  name: 'Hochschule für angewandte Wissenschaften München'
2640
2737
  url: 'https://www.hm.edu/'
2641
2738
 
2739
+ - regex: 'TigerBot/([\d+.]+)'
2740
+ name: 'TigerBot'
2741
+ category: 'Crawler'
2742
+ url: 'https://tiger.ch/'
2743
+
2744
+ - regex: 'TestCrawler/([\d+.]+)'
2745
+ name: 'TestCrawler'
2746
+ category: 'Crawler'
2747
+ url: 'https://www.comcepta.com/'
2748
+
2749
+ - regex: 'CrowdTanglebot/([\d+.]+)'
2750
+ name: 'CrowdTangle'
2751
+ category: 'Crawler'
2752
+ url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
2753
+ producer:
2754
+ name: 'CrowdTangle, Inc.'
2755
+ url: 'https://www.crowdtangle.com/'
2756
+
2757
+ - regex: 'Sellers.Guide Crawler by Primis'
2758
+ name: 'Sellers.Guide'
2759
+ category: 'Crawler'
2760
+ url: 'https://sellers.guide/'
2761
+ producer:
2762
+ name: 'McCann Disciplines, Ltd.'
2763
+ url: 'https://www.primis.tech/'
2764
+
2765
+ - regex: 'OnalyticaBot'
2766
+ name: 'Onalytica'
2767
+ category: 'Crawler'
2768
+ url: 'https://www.airslate.com/bot/explore/onalytica-bot'
2769
+ producer:
2770
+ name: 'airSlate, Inc.'
2771
+ url: 'https://www.airslate.com/'
2772
+
2773
+ - regex: 'deepnoc'
2774
+ name: 'deepnoc'
2775
+ category: 'Crawler'
2776
+ url: 'https://deepnoc.com/bot'
2777
+ producer:
2778
+ name: 'deepnoc, GmbH'
2779
+ url: 'https://deepnoc.com/'
2780
+
2781
+ - regex: 'Newslitbot/([\d+.]+)'
2782
+ name: 'Newslitbot'
2783
+ category: 'Crawler'
2784
+ url: 'https://www.newslit.co/'
2785
+ producer:
2786
+ name: 'Newslit, LLC.'
2787
+ url: 'https://www.newslit.co/'
2788
+
2789
+ - regex: 'um-LN/([\d+.]+)'
2790
+ name: 'uMBot'
2791
+ category: 'Crawler'
2792
+ url: 'https://www.ubermetrics-technologies.com/'
2793
+ producer:
2794
+ name: 'Ubermetrics Technologies GmbH'
2795
+ url: 'https://www.ubermetrics-technologies.com/'
2796
+
2797
+ - regex: 'Abonti/([\d+.]+)'
2798
+ name: 'Abonti'
2799
+ category: 'Crawler'
2800
+ url: 'http://abonti.com/'
2801
+
2802
+ - regex: 'collection@infegy.com'
2803
+ name: 'Infegy'
2804
+ category: 'Crawler'
2805
+ url: 'https://infegy.com/'
2806
+ producer:
2807
+ name: 'Infegy, Inc.'
2808
+ url: 'https://infegy.com/'
2809
+
2810
+ - regex: 'HTTP Banner Detection \(https://security.ipip.net\)'
2811
+ name: 'IPIP'
2812
+ category: 'Security Checker'
2813
+ url: 'https://security.ipip.net/'
2814
+ producer:
2815
+ name: 'Beijing Tiantexin Tech. Co., Ltd.'
2816
+ url: 'https://en.ipip.net/'
2817
+
2818
+ - regex: 'ev-crawler/([\d+.]+)'
2819
+ name: 'Headline'
2820
+ category: 'Crawler'
2821
+ url: 'https://headline.com/legal/crawler'
2822
+ producer:
2823
+ name: 'e.ventures Managementgesellschaft mbH'
2824
+ url: 'https://headline.com/'
2825
+
2826
+ - regex: 'webprosbot/([\d+.]+)'
2827
+ name: 'WebPros'
2828
+ category: 'Crawler'
2829
+ url: 'https://webpros.com/'
2830
+ producer:
2831
+ name: 'WebPros Holdco B.V.'
2832
+ url: 'https://webpros.com/'
2833
+
2834
+ - regex: 'ELB-HealthChecker'
2835
+ name: 'Amazon ELB'
2836
+ category: 'Site Monitor'
2837
+ url: 'https://aws.amazon.com/elasticloadbalancing/'
2838
+ producer:
2839
+ name: 'Amazon.com, Inc.'
2840
+ url: 'https://www.amazon.com/'
2841
+
2842
+ - regex: 'Wheregoes.com Redirect Checker/([\d+.]+)'
2843
+ name: 'WhereGoes'
2844
+ category: 'Crawler'
2845
+ url: 'https://wheregoes.com/'
2846
+
2847
+ - regex: 'project_patchwatch'
2848
+ name: 'Project Patchwatch'
2849
+ category: 'Crawler'
2850
+ url: 'http://66.240.192.82/'
2851
+
2852
+ - regex: 'InternetMeasurement/([\d+.]+)'
2853
+ name: 'InternetMeasurement'
2854
+ category: 'Crawler'
2855
+ url: 'https://internet-measurement.com/'
2856
+
2857
+ - regex: 'DomainAppender /([\d+.]+)'
2858
+ name: 'DomainAppender'
2859
+ category: 'Crawler'
2860
+ url: 'https://www.profound.net/product/domain_append/'
2861
+ producer:
2862
+ name: 'Profound Networks, LLC'
2863
+ url: 'https://www.profound.net/'
2864
+
2865
+ - regex: 'FreeWebMonitoring SiteChecker/([\d+.]+)'
2866
+ name: 'FreeWebMonitoring'
2867
+ category: 'Site Monitor'
2868
+ url: 'https://www.freewebmonitoring.com/bot.html'
2869
+ producer:
2870
+ name: 'GreenWave Online, Inc.'
2871
+ url: 'http://www.greenwaveonline.com/'
2872
+
2873
+ - regex: 'Page Modified Pinger'
2874
+ name: 'Page Modified Pinger'
2875
+ category: 'Site Monitor'
2876
+ url: 'https://www.pagemodified.com/'
2877
+ producer:
2878
+ name: 'Valley Hosting, LLC'
2879
+ url: 'https://www.pagemodified.com/'
2880
+
2881
+ - regex: 'adstxtlab.com'
2882
+ name: 'adstxtlab.com'
2883
+ category: 'Crawler'
2884
+ url: 'https://adstxtlab.com/validator.php'
2885
+ producer:
2886
+ name: 'Jaohawi AB'
2887
+ url: 'https://adstxtlab.com/'
2888
+
2889
+ - regex: 'Iframely/([\d+.]+)'
2890
+ name: 'Iframely'
2891
+ category: 'Crawler'
2892
+ url: 'https://iframely.com/'
2893
+ producer:
2894
+ name: 'Itteco Software, Corp.'
2895
+ url: 'https://iframely.com/'
2896
+
2897
+ - regex: 'DomainStatsBot/([\d+.]+)'
2898
+ name: 'DomainStatsBot'
2899
+ category: 'Crawler'
2900
+ url: 'https://domainstats.com/pages/our-bot'
2901
+ producer:
2902
+ name: 'Domainstats Ltd'
2903
+ url: 'https://domainstats.com/'
2904
+
2905
+ - regex: 'aiHitBot/([\d+.]+)'
2906
+ name: 'aiHitBot'
2907
+ category: 'Crawler'
2908
+ url: 'https://www.aihitdata.com/about'
2909
+
2910
+ - regex: 'DomainCrawler/'
2911
+ name: 'DomainCrawler'
2912
+ category: 'Crawler'
2913
+ url: 'https://domaincrawler.com/about-us/'
2914
+
2915
+ - regex: 'DNSResearchBot'
2916
+ name: 'DNSResearchBot'
2917
+ category: 'Crawler'
2918
+
2919
+ - regex: 'GitCrawlerBot'
2920
+ name: 'GitCrawlerBot'
2921
+ category: 'Crawler'
2922
+
2923
+ - regex: 'AdAuth/([\d+.]+)'
2924
+ name: 'AdAuth'
2925
+ category: 'Crawler'
2926
+ url: 'https://www.adauth.com'
2927
+
2928
+ - regex: 'faveeo.com'
2929
+ name: 'Faveeo'
2930
+ category: 'Crawler'
2931
+ url: 'http://www.faveeo.com'
2932
+
2933
+ - regex: 'kozmonavt\.'
2934
+ name: 'Kozmonavt'
2935
+ category: 'Crawler'
2936
+ url: 'https://kozmonavt.ml'
2937
+
2938
+ - regex: 'CriteoBot/'
2939
+ name: 'CriteoBot'
2940
+ category: 'Crawler'
2941
+ url: 'https://www.criteo.com/criteo-crawler/'
2942
+
2943
+ - regex: 'PayPal IPN'
2944
+ name: 'PayPal IPN'
2945
+ category: 'Service Agent'
2946
+ url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/'
2947
+ producer:
2948
+ name: 'PayPal, Inc.'
2949
+ url: 'https://www.paypal.com/'
2950
+
2951
+ - regex: 'MaCoCu'
2952
+ name: 'MaCoCu'
2953
+ category: 'Crawler'
2954
+ url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/'
2955
+ producer:
2956
+ name: 'Jožef Stefan Institute'
2957
+ url: 'https://www.ijs.si/ijsw/JSI'
2958
+
2959
+ - regex: 'dnt-policy@eff.org'
2960
+ name: 'EFF Do Not Track Verifier'
2961
+ category: 'Crawler'
2962
+ url: 'https://www.eff.org/issues/do-not-track'
2963
+ producer:
2964
+ name: 'Electronic Frontier Foundation'
2965
+ url: 'https://www.eff.org/'
2966
+
2967
+ - regex: 'InfoTigerBot'
2968
+ name: 'InfoTigerBot'
2969
+ category: 'Crawler'
2970
+ url: 'https://infotiger.com/bot'
2971
+ producer:
2972
+ name: 'Infotiger UG'
2973
+ url: 'https://infotiger.com/'
2974
+
2975
+ - regex: '(?:Birdcrawlerbot|CrawlaDeBot)'
2976
+ name: 'Birdcrawlerbot'
2977
+ category: 'Crawler'
2978
+ url: 'https://crawla.de/de/index.php'
2979
+ producer:
2980
+ name: 'Swoppen Systems GmbH'
2981
+ url: 'https://www.swoppen.com/de'
2982
+
2983
+ - regex: 'ScamadviserExternalHit/([\d+.]+)'
2984
+ name: 'Scamadviser External Hit'
2985
+ category: 'Crawler'
2986
+ url: 'https://www.scamadviser.com/'
2987
+ producer:
2988
+ name: 'Ecommerce Operations B.V.'
2989
+ url: 'https://www.scamadviser.com/'
2990
+
2991
+ - regex: 'ZaldamoSearchBot'
2992
+ name: 'Zaldamo'
2993
+ category: 'Crawler'
2994
+ url: 'https://www.zaldamo.com/search.html'
2995
+ producer:
2996
+ name: 'Project Orlando, LLC.'
2997
+ url: 'https://www.projectorlando.com/'
2998
+
2999
+ - regex: 'AFB/([\d+.]+)'
3000
+ name: 'Allloadin Favicon Bot'
3001
+ category: 'Crawler'
3002
+ url: 'https://allloadin.com/'
3003
+
3004
+ - regex: 'SeolytBot/([\d+.]+)'
3005
+ name: 'Seolyt Bot'
3006
+ category: 'Crawler'
3007
+ url: 'https://seolyt.com'
3008
+
3009
+ - regex: 'LinkWalker/([\d+.]+)'
3010
+ name: 'LinkWalker'
3011
+ category: 'Crawler'
3012
+ url: 'https://www.phishlabs.com/'
3013
+ producer:
3014
+ name: 'PhishLabs, Inc.'
3015
+ url: 'https://www.phishlabs.com/'
3016
+
3017
+ - regex: 'RenovateBot/([\d+.]+)'
3018
+ name: 'RenovateBot'
3019
+ category: 'Security Checker'
3020
+ url: 'https://github.com/renovatebot/renovate'
3021
+ producer:
3022
+ name: 'White Source Ltd.'
3023
+ url: 'https://www.mend.io/free-developer-tools/renovate/'
3024
+
3025
+ - regex: 'INETDEX-BOT/([\d+.]+)'
3026
+ name: 'Inetdex Bot'
3027
+ category: 'Crawler'
3028
+ url: 'https://www.inetdex.com/'
3029
+
3030
+ - regex: 'NETZZAPPEN'
3031
+ name: 'NETZZAPPEN'
3032
+ category: 'Crawler'
3033
+ url: 'https://www.netzzappen.com/'
3034
+ producer:
3035
+ name: 'Marc Huemer'
3036
+ url: 'https://www.netzzappen.com/'
3037
+
3038
+ - regex: 'SerpReputationManagementAgent/([\d+.]+)'
3039
+ name: 'SEMrush Reputation Management'
3040
+ category: 'Service Agent'
3041
+ url: 'https://www.semrush.com/bot/'
3042
+ producer:
3043
+ name: 'SEMrush'
3044
+ url: 'https://www.semrush.com/'
3045
+
3046
+ - regex: 'panscient.com'
3047
+ name: 'Panscient'
3048
+ category: 'Crawler'
3049
+ url: 'https://www.panscient.com/faq.htm'
3050
+ producer:
3051
+ name: 'Panscient, Inc.'
3052
+ url: 'https://www.panscient.com/'
3053
+
3054
+ - regex: 'research@pdrlabs.net'
3055
+ name: 'PDR Labs'
3056
+ category: 'Security Checker'
3057
+ url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3058
+ producer:
3059
+ name: 'PDR Labs'
3060
+ url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3061
+
3062
+ - regex: 'Nicecrawler/([\d+.]+)'
3063
+ name: 'NiceCrawler'
3064
+ category: 'Crawler'
3065
+ url: 'https://www.nicecrawler.com/'
3066
+ producer:
3067
+ name: 'Intelium Corp.'
3068
+ url: 'https://www.intelium.com/'
3069
+
3070
+ - regex: 't3versionsBot/([\d+.]+)'
3071
+ name: 't3versions'
3072
+ category: 'Crawler'
3073
+ url: 'https://www.t3versions.com/bot'
3074
+ producer:
3075
+ name: 'Torben Hansen'
3076
+ url: 'https://www.t3versions.com/'
3077
+
3078
+ - regex: 'Crawlson/([\d+.]+)'
3079
+ name: 'Crawlson'
3080
+ category: 'Crawler'
3081
+ url: 'https://www.crawlson.com/about'
3082
+ producer:
3083
+ name: 'Crawlson'
3084
+ url: 'https://www.crawlson.com/'
3085
+
3086
+ - regex: 'tchelebi/([\d+.]+)'
3087
+ name: 'tchelebi'
3088
+ category: 'Crawler'
3089
+ url: 'https://tchelebi.io/'
3090
+ producer:
3091
+ name: 'NormShield, Inc.'
3092
+ url: 'https://blackkite.com/'
3093
+
3094
+ - regex: 'JobboerseBot'
3095
+ name: 'JobboerseBot'
3096
+ category: 'Crawler'
3097
+ url: 'https://www.xing.com/jobs'
3098
+ producer:
3099
+ name: 'New Work SE'
3100
+ url: 'https://www.xing.com/'
3101
+
3102
+ - regex: 'RepoLookoutBot/([\d+.]+)'
3103
+ name: 'Repo Lookout'
3104
+ category: 'Security Checker'
3105
+ url: 'https://www.repo-lookout.org/'
3106
+ producer:
3107
+ name: 'Crissy Field GmbH'
3108
+ url: 'https://www.crissyfield.de/'
3109
+
3110
+ - regex: 'PATHspider'
3111
+ name: 'PATHspider'
3112
+ category: 'Security Checker'
3113
+ url: 'https://pathspider.net/'
3114
+ producer:
3115
+ name: 'MAMI Project'
3116
+ url: 'https://mami-project.eu/'
3117
+
3118
+ - regex: 'everyfeed-spider/([\d+.]+)'
3119
+ name: 'Everyfeed'
3120
+ url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
3121
+ category: 'Feed Fetcher'
3122
+ producer:
3123
+ name: ''
3124
+ url: ''
3125
+
3126
+ - regex: 'Exchange check'
3127
+ name: 'Exchange check'
3128
+ category: 'Security Checker'
3129
+ url: 'https://github.com/GossiTheDog/scanning'
3130
+ producer:
3131
+ name: 'Kevin Beaumont'
3132
+ url: 'https://doublepulsar.com/'
3133
+
3134
+ - regex: 'Sublinq'
3135
+ name: 'Sublinq'
3136
+ category: 'Crawler'
3137
+ url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/'
3138
+ producer:
3139
+ name: ''
3140
+ url: ''
3141
+
3142
+ - regex: 'Gregarius/([\d+.]+)'
3143
+ name: 'Gregarius'
3144
+ category: 'Feed Fetcher'
3145
+ url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
3146
+ producer:
3147
+ name: ''
3148
+ url: ''
3149
+
3150
+ - regex: 'COMODO DCV'
3151
+ name: 'COMODO DCV'
3152
+ category: 'Service Agent'
3153
+ url: 'https://www.comodo.com/'
3154
+ producer:
3155
+ name: 'Comodo Security Solutions, Inc.'
3156
+ url: 'https://www.comodo.com/'
3157
+
3158
+ - regex: 'Sectigo DCV'
3159
+ name: 'Sectigo DCV'
3160
+ category: 'Service Agent'
3161
+ url: 'https://sectigo.com/'
3162
+ producer:
3163
+ name: 'Sectigo Limited'
3164
+ url: 'https://sectigo.com/'
3165
+
3166
+ - regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/([\d+.]+)'
3167
+ name: 'KlarnaBot'
3168
+ category: 'Crawler'
3169
+ url: 'https://docs.klarna.com/klarna-bot/'
3170
+ producer:
3171
+ name: 'Klarna Bank AB'
3172
+ url: 'https://www.klarna.com/'
3173
+
3174
+ - regex: 'Taboolabot/([\d+.]+)'
3175
+ name: 'Taboolabot'
3176
+ category: 'Crawler'
3177
+ url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
3178
+ producer:
3179
+ name: 'Taboola, Inc.'
3180
+ url: 'https://www.taboola.com/'
3181
+
3182
+ - regex: 'Asana/([\d+.]+)'
3183
+ name: 'Asana'
3184
+ category: 'Crawler'
3185
+ url: 'https://asana.com/'
3186
+ producer:
3187
+ name: 'Asana, Inc.'
3188
+ url: 'https://asana.com/'
3189
+
3190
+ - regex: 'Chrome Privacy Preserving Prefetch Proxy'
3191
+ name: 'Chrome Privacy Preserving Prefetch Proxy'
3192
+ category: 'Service Agent'
3193
+ url: 'https://developer.chrome.com/blog/private-prefetch-proxy/'
3194
+ producer:
3195
+ name: 'Google Inc.'
3196
+ url: 'https://www.google.com/'
3197
+
3198
+ - regex: 'URLinspectorBot/([\d+.]+)'
3199
+ name: 'URLinspector'
3200
+ category: 'Site Monitor'
3201
+ url: 'https://www.urlinspector.com/bot/'
3202
+ producer:
3203
+ name: 'LinkResearchTools GmbH'
3204
+ url: 'https://www.linkresearchtools.com/'
3205
+
3206
+ - regex: 'EntferBot/([\d+.]+)'
3207
+ name: 'Entfer'
3208
+ category: 'Crawler'
3209
+ url: 'https://entfer.com/'
3210
+ producer:
3211
+ name: 'Entfer Ltd.'
3212
+ url: 'https://entfer.com/'
3213
+
3214
+ - regex: 'TagInspector/([\d+.]+)'
3215
+ name: 'Tag Inspector'
3216
+ category: 'Crawler'
3217
+ url: 'https://taginspector.com/'
3218
+ producer:
3219
+ name: 'InfoTrust, LLC'
3220
+ url: 'https://infotrust.com/'
3221
+
3222
+ - regex: 'pageburst'
3223
+ name: 'Pageburst'
3224
+ category: 'Crawler'
3225
+ url: 'https://pageburstls.elsevier.com/'
3226
+ producer:
3227
+ name: 'Elsevier Ltd'
3228
+ url: 'https://www.elsevier.com/'
3229
+
3230
+ - regex: '.+diffbot'
3231
+ name: 'Diffbot'
3232
+ category: 'Crawler'
3233
+ url: 'https://docs.diffbot.com/docs/getting-started-with-crawl'
3234
+ producer:
3235
+ name: 'Diffbot Technologies Corp.'
3236
+ url: 'https://www.diffbot.com/'
3237
+
3238
+ - regex: 'DisqusAdstxtCrawler/([\d+.]+)'
3239
+ name: 'Disqus'
3240
+ category: 'Crawler'
3241
+ url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
3242
+ producer:
3243
+ name: 'Disqus, Inc.'
3244
+ url: 'https://disqus.com/'
3245
+
3246
+ - regex: 'startmebot/([\d+.]+)'
3247
+ name: 'start.me'
3248
+ category: 'Crawler'
3249
+ url: 'https://about.start.me/'
3250
+ producer:
3251
+ name: 'start.me BV'
3252
+ url: 'https://about.start.me/'
3253
+
3254
+ - regex: '2ip bot/([\d+.]+)'
3255
+ name: '2ip'
3256
+ category: 'Crawler'
3257
+ url: 'https://2ip.io/'
3258
+
3259
+ - regex: 'ReqBin Curl Client/([\d+.]+)'
3260
+ name: 'ReqBin'
3261
+ category: 'Crawler'
3262
+ url: 'https://reqbin.com/curl'
3263
+
3264
+ - regex: 'XoviBot/([\d+.]+)'
3265
+ name: 'XoviBot'
3266
+ category: 'Crawler'
3267
+ url: 'https://www.xovibot.net'
3268
+ producer:
3269
+ name: 'Xovi GmbH'
3270
+ url: 'http://www.xovi.de'
3271
+
3272
+ - regex: 'Overcast/([\d+.]+) Podcast Sync'
3273
+ name: 'Overcast Podcast Sync'
3274
+ category: 'Service Agent'
3275
+ url: 'https://overcast.fm/podcasterinfo'
3276
+
3277
+ - regex: '^Verity/([\d+.]+)'
3278
+ name: 'GumGum Verity'
3279
+ category: 'Service Agent'
3280
+ url: 'https://gumgum.com/verity'
3281
+
3282
+ - regex: 'hackermention'
3283
+ name: 'hackermention'
3284
+ category: 'Feed Reader'
3285
+ url: 'https://github.com/snarfed/hackermention'
3286
+
3287
+ - regex: 'BitSightBot/([\d+.]+)'
3288
+ name: 'BitSight'
3289
+ category: 'Security Checker'
3290
+ url: 'https://www.bitsight.com/'
3291
+ producer:
3292
+ name: 'BitSight Technologies, Inc.'
3293
+ url: 'https://www.bitsight.com/'
3294
+
3295
+ - regex: 'Ezgif/([\d+.]+)'
3296
+ name: 'Ezgif'
3297
+ category: 'Service Agent'
3298
+ url: 'https://ezgif.com/about'
3299
+
3300
+ - regex: 'intelx.io_bot'
3301
+ name: 'Intelligence X'
3302
+ category: 'Crawler'
3303
+ url: 'https://intelx.io/'
3304
+ producer:
3305
+ name: 'Kleissner Investments s.r.o.'
3306
+ url: 'https://intelx.io/'
3307
+
3308
+ - regex: 'FemtosearchBot/([\d+.]+)'
3309
+ name: 'Femtosearch'
3310
+ category: 'Crawler'
3311
+ url: 'http://femtosearch.com/'
3312
+ producer:
3313
+ name: 'Grier Forensics, LLC'
3314
+ url: 'https://www.grierforensics.com/'
3315
+
3316
+ - regex: 'AdsTxtCrawler/([\d+.]+)'
3317
+ name: 'AdsTxtCrawler'
3318
+ category: 'Crawler'
3319
+ url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
3320
+ producer:
3321
+ name: 'IAB Technology Laboratory, Inc.'
3322
+ url: 'https://iabtechlab.com/'
3323
+
3324
+ - regex: 'Morningscore'
3325
+ name: 'Morningscore Bot'
3326
+ category: 'Crawler'
3327
+ url: 'https://morningscore.io/'
3328
+ producer:
3329
+ name: 'Morningscore'
3330
+ url: 'https://morningscore.io/'
3331
+
3332
+ - regex: 'Uptime-Kuma/([\d+.]+)'
3333
+ name: 'Uptime-Kuma'
3334
+ category: 'Site Monitor'
3335
+ url: 'https://github.com/louislam/uptime-kuma'
3336
+
3337
+ - regex: 'ChatGPT-User'
3338
+ name: 'ChatGPT'
3339
+ category: 'Crawler'
3340
+ url: 'https://platform.openai.com/docs/plugins/bot'
3341
+ producer:
3342
+ name: 'OpenAI OpCo, LLC'
3343
+ url: 'https://openai.com/'
3344
+
3345
+ - regex: 'BrightEdge Crawler/([\d+.]+)'
3346
+ name: 'BrightEdge'
3347
+ category: 'Crawler'
3348
+ url: 'https://www.brightedge.com/'
3349
+ producer:
3350
+ name: 'BrightEdge Technologies, Inc'
3351
+ url: 'https://www.brightedge.com/'
3352
+
3353
+ - regex: 'sfFeedReader/([\d+.]+)'
3354
+ name: 'sfFeedReader'
3355
+ url: 'https://github.com/diem-project/sfFeed2Plugin'
3356
+ category: 'Feed Fetcher'
3357
+
3358
+ - regex: 'cyberscan.io'
3359
+ name: 'Cyberscan'
3360
+ category: 'Security Checker'
3361
+ url: 'https://www.cyberscan.io/'
3362
+ producer:
3363
+ name: 'DGC Verwaltungs GmbH'
3364
+ url: 'https://dgc.org/'
3365
+
3366
+ - regex: 'deepcrawl\.com'
3367
+ name: 'Lumar'
3368
+ category: 'Crawler'
3369
+ url: 'https://deepcrawl.com/bot'
3370
+ producer:
3371
+ name: 'Lumar'
3372
+ url: 'https://www.lumar.io/'
3373
+
3374
+ - regex: 'RepoLookoutBot'
3375
+ name: 'Repo Lookout'
3376
+ category: 'Crawler'
3377
+ url: 'https://www.repo-lookout.org/'
3378
+ producer:
3379
+ name: 'Crissy Field GmbH'
3380
+ url: 'https://www.crissyfield.de/'
3381
+
2642
3382
  # Generic detections
2643
3383
  - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2644
3384
  name: 'Generic Bot'