device_detector 1.0.7 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/regexes/bots.yml CHANGED
@@ -5,6 +5,14 @@
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
+ - regex: 'monitoring360bot'
9
+ name: '360 Monitoring'
10
+ category: 'Site Monitor'
11
+ url: 'https://www.360monitoring.io'
12
+ producer:
13
+ name: 'Plesk International GmbH'
14
+ url: 'https://www.plesk.com'
15
+
8
16
  - regex: '360Spider'
9
17
  name: '360Spider'
10
18
  category: 'Search bot'
@@ -45,6 +53,14 @@
45
53
  name: 'Ahrefs Pte Ltd'
46
54
  url: 'https://ahrefs.com/robot'
47
55
 
56
+ - regex: 'AhrefsSiteAudit/([\d+.]+)'
57
+ name: 'AhrefsSiteAudit'
58
+ category: 'Site Monitor'
59
+ url: 'https://ahrefs.com/robot/site-audit'
60
+ producer:
61
+ name: 'Ahrefs Pte Ltd'
62
+ url: 'https://ahrefs.com/'
63
+
48
64
  - regex: 'ia_archiver|alexabot|verifybot'
49
65
  name: 'Alexa Crawler'
50
66
  category: 'Search bot'
@@ -100,13 +116,13 @@
100
116
  name: 'Apple Inc'
101
117
  url: 'https://www.apple.com'
102
118
 
103
- - regex: "AppSignalBot"
104
- name: "AppSignalBot"
105
- category: "Site Monitor"
106
- url: "https://docs.appsignal.com/uptime-monitoring/"
119
+ - regex: 'AppSignalBot'
120
+ name: 'AppSignalBot'
121
+ category: 'Site Monitor'
122
+ url: 'https://docs.appsignal.com/uptime-monitoring/'
107
123
  producer:
108
- name: "AppSignal"
109
- url: "https://appsignal.com/"
124
+ name: 'AppSignal'
125
+ url: 'https://appsignal.com/'
110
126
 
111
127
  - regex: 'Arachni'
112
128
  name: 'Arachni'
@@ -355,7 +371,7 @@
355
371
  name: 'Certified Security Solutions'
356
372
  url: 'https://www.css-security.com/company/about-us/'
357
373
 
358
- - regex: 'Datadog Agent'
374
+ - regex: 'Datadog Agent|Datadog/?Synthetics'
359
375
  name: 'Datadog Agent'
360
376
  url: 'https://github.com/DataDog/dd-agent'
361
377
  category: 'Site Monitor'
@@ -688,7 +704,7 @@
688
704
  name: 'Visual Meta'
689
705
  url: 'https://www.shopalike.cz/'
690
706
 
691
- - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
707
+ - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Producer)|Google.*/\+/web/snippet'
692
708
  name: 'Googlebot'
693
709
  category: 'Search bot'
694
710
  url: 'http://www.google.com/bot.html'
@@ -783,6 +799,14 @@
783
799
  name: 'Lighthouse'
784
800
  url: 'https://developers.google.com/web/tools/lighthouse'
785
801
 
802
+ - regex: 'last-modified\.com'
803
+ name: 'LastMod Bot'
804
+ category: 'Site Monitor'
805
+ url: 'https://last-modified.com/en/about'
806
+ producer:
807
+ name: ''
808
+ url: 'https://last-modified.com/en'
809
+
786
810
  - regex: 'linkdexbot|linkdex\.com'
787
811
  name: 'Linkdex Bot'
788
812
  category: 'Search bot'
@@ -830,6 +854,14 @@
830
854
  name: ''
831
855
  url: ''
832
856
 
857
+ - regex: 'masscan-ng/([\d+.]+)'
858
+ name: 'masscan-ng'
859
+ url: 'https://github.com/bi-zone/masscan-ng'
860
+ category: 'Crawler'
861
+ producer:
862
+ name: 'BIZON, OOO'
863
+ url: 'https://bi.zone/'
864
+
833
865
  - regex: 'masscan'
834
866
  name: 'masscan'
835
867
  url: 'https://github.com/robertdavidgraham/masscan'
@@ -1049,12 +1081,12 @@
1049
1081
  name: 'PHP Server Monitor'
1050
1082
  url: 'http://www.phpservermonitor.org/'
1051
1083
 
1052
- - regex: 'PocketParser'
1053
- name: 'PocketParser'
1084
+ - regex: 'Pocket(?:ImageCache|Parser)/([\d+.]+)'
1085
+ name: 'Pocket'
1054
1086
  category: 'Read-it-later Service'
1055
1087
  url: 'https://getpocket.com/pocketparser_ua'
1056
1088
  producer:
1057
- name: 'Pocket'
1089
+ name: 'Read It Later, Inc.'
1058
1090
  url: 'https://getpocket.com/'
1059
1091
 
1060
1092
  - regex: 'PritTorrent'
@@ -1317,7 +1349,7 @@
1317
1349
  name: 'Slack Technologies'
1318
1350
  url: 'http://slack.com'
1319
1351
 
1320
- - regex: '(Sogou (web|inst|Pic) spider)|New-Sogou-Spider'
1352
+ - regex: '(Sogou[ -](head|inst|Orion|Pic|Test|web)[ -]spider)|New-Sogou-Spider'
1321
1353
  name: 'Sogou Spider'
1322
1354
  category: 'Search bot'
1323
1355
  url: 'http://www.sogou.com/docs/help/webmasters.htm'
@@ -1828,6 +1860,14 @@
1828
1860
  name: 'Snapchat Inc.'
1829
1861
  url: 'https://www.snapchat.com'
1830
1862
 
1863
+ - regex: 'Snap URL Preview Service'
1864
+ name: 'Snap URL Preview Service'
1865
+ category: 'Service Agent'
1866
+ url: 'https://developers.snap.com/robots'
1867
+ producer:
1868
+ name: 'Snapchat Inc.'
1869
+ url: 'https://www.snapchat.com/'
1870
+
1831
1871
  - regex: "Let's Encrypt validation server"
1832
1872
  name: "Let's Encrypt Validation"
1833
1873
  category: 'Service Agent'
@@ -1926,7 +1966,7 @@
1926
1966
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1927
1967
  name: 'RSSRadio Bot'
1928
1968
 
1929
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
1969
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel)'
1930
1970
  name: 'Generic Bot'
1931
1971
 
1932
1972
  - regex: '^sentry'
@@ -1935,7 +1975,7 @@
1935
1975
  name: 'Sentry'
1936
1976
  url: 'https://sentry.io'
1937
1977
 
1938
- - regex: '^Spotify'
1978
+ - regex: '^Spotify/(\d+[\.\d]+)$'
1939
1979
  name: 'Spotify'
1940
1980
  producer:
1941
1981
  name: 'Spotify'
@@ -2163,7 +2203,7 @@
2163
2203
  category: 'Crawler'
2164
2204
  url: 'https://serendeputy.com/about/serendeputy-bot'
2165
2205
 
2166
- - regex: 'ias-va.*admantx.*service-fetcher'
2206
+ - regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx.com.*service-fetcher'
2167
2207
  name: 'ADmantX Service Fetcher'
2168
2208
  category: 'Service bot'
2169
2209
  url: 'https://www.admantx.com/service-fetcher.html'
@@ -2349,7 +2389,7 @@
2349
2389
  name: ''
2350
2390
  url: ''
2351
2391
 
2352
- - regex: 'scaninfo@expanseinc.com'
2392
+ - regex: 'scaninfo@(?:expanseinc|paloaltonetworks).com'
2353
2393
  name: 'Expanse'
2354
2394
  category: 'Security Checker'
2355
2395
  url: 'https://expanse.co/'
@@ -2511,6 +2551,11 @@
2511
2551
  category: 'Security Checker'
2512
2552
  url: 'https://github.com/LeakIX/l9tcpid'
2513
2553
 
2554
+ - regex: 'l9explore/([\d+\.])'
2555
+ name: 'l9explore'
2556
+ category: 'Security Checker'
2557
+ url: 'https://github.com/LeakIX/l9explore'
2558
+
2514
2559
  - regex: 'MegaIndex.ru/([\d+\.])'
2515
2560
  name: 'MegaIndex'
2516
2561
  category: 'Crawler'
@@ -2639,6 +2684,539 @@
2639
2684
  name: 'Hochschule für angewandte Wissenschaften München'
2640
2685
  url: 'https://www.hm.edu/'
2641
2686
 
2687
+ - regex: 'TigerBot/([\d+.]+)'
2688
+ name: 'TigerBot'
2689
+ category: 'Crawler'
2690
+ url: 'https://tiger.ch/'
2691
+
2692
+ - regex: 'TestCrawler/([\d+.]+)'
2693
+ name: 'TestCrawler'
2694
+ category: 'Crawler'
2695
+ url: 'https://www.comcepta.com/'
2696
+
2697
+ - regex: 'CrowdTanglebot/([\d+.]+)'
2698
+ name: 'CrowdTangle'
2699
+ category: 'Crawler'
2700
+ url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
2701
+ producer:
2702
+ name: 'CrowdTangle, Inc.'
2703
+ url: 'https://www.crowdtangle.com/'
2704
+
2705
+ - regex: 'Sellers.Guide Crawler by Primis'
2706
+ name: 'Sellers.Guide'
2707
+ category: 'Crawler'
2708
+ url: 'https://sellers.guide/'
2709
+ producer:
2710
+ name: 'McCann Disciplines, Ltd.'
2711
+ url: 'https://www.primis.tech/'
2712
+
2713
+ - regex: 'OnalyticaBot'
2714
+ name: 'Onalytica'
2715
+ category: 'Crawler'
2716
+ url: 'https://www.airslate.com/bot/explore/onalytica-bot'
2717
+ producer:
2718
+ name: 'airSlate, Inc.'
2719
+ url: 'https://www.airslate.com/'
2720
+
2721
+ - regex: 'deepnoc'
2722
+ name: 'deepnoc'
2723
+ category: 'Crawler'
2724
+ url: 'https://deepnoc.com/bot'
2725
+ producer:
2726
+ name: 'deepnoc, GmbH'
2727
+ url: 'https://deepnoc.com/'
2728
+
2729
+ - regex: 'Newslitbot/([\d+.]+)'
2730
+ name: 'Newslitbot'
2731
+ category: 'Crawler'
2732
+ url: 'https://www.newslit.co/'
2733
+ producer:
2734
+ name: 'Newslit, LLC.'
2735
+ url: 'https://www.newslit.co/'
2736
+
2737
+ - regex: 'um-LN/([\d+.]+)'
2738
+ name: 'uMBot'
2739
+ category: 'Crawler'
2740
+ url: 'https://www.ubermetrics-technologies.com/'
2741
+ producer:
2742
+ name: 'Ubermetrics Technologies GmbH'
2743
+ url: 'https://www.ubermetrics-technologies.com/'
2744
+
2745
+ - regex: 'Abonti/([\d+.]+)'
2746
+ name: 'Abonti'
2747
+ category: 'Crawler'
2748
+ url: 'http://abonti.com/'
2749
+
2750
+ - regex: 'collection@infegy.com'
2751
+ name: 'Infegy'
2752
+ category: 'Crawler'
2753
+ url: 'https://infegy.com/'
2754
+ producer:
2755
+ name: 'Infegy, Inc.'
2756
+ url: 'https://infegy.com/'
2757
+
2758
+ - regex: 'HTTP Banner Detection \(https://security.ipip.net\)'
2759
+ name: 'IPIP'
2760
+ category: 'Security Checker'
2761
+ url: 'https://security.ipip.net/'
2762
+ producer:
2763
+ name: 'Beijing Tiantexin Tech. Co., Ltd.'
2764
+ url: 'https://en.ipip.net/'
2765
+
2766
+ - regex: 'ev-crawler/([\d+.]+)'
2767
+ name: 'Headline'
2768
+ category: 'Crawler'
2769
+ url: 'https://headline.com/legal/crawler'
2770
+ producer:
2771
+ name: 'e.ventures Managementgesellschaft mbH'
2772
+ url: 'https://headline.com/'
2773
+
2774
+ - regex: 'webprosbot/([\d+.]+)'
2775
+ name: 'WebPros'
2776
+ category: 'Crawler'
2777
+ url: 'https://webpros.com/'
2778
+ producer:
2779
+ name: 'WebPros Holdco B.V.'
2780
+ url: 'https://webpros.com/'
2781
+
2782
+ - regex: 'ELB-HealthChecker'
2783
+ name: 'Amazon ELB'
2784
+ category: 'Site Monitor'
2785
+ url: 'https://aws.amazon.com/elasticloadbalancing/'
2786
+ producer:
2787
+ name: 'Amazon.com, Inc.'
2788
+ url: 'https://www.amazon.com/'
2789
+
2790
+ - regex: 'Wheregoes.com Redirect Checker/([\d+.]+)'
2791
+ name: 'WhereGoes'
2792
+ category: 'Crawler'
2793
+ url: 'https://wheregoes.com/'
2794
+
2795
+ - regex: 'project_patchwatch'
2796
+ name: 'Project Patchwatch'
2797
+ category: 'Crawler'
2798
+ url: 'http://66.240.192.82/'
2799
+
2800
+ - regex: 'InternetMeasurement/([\d+.]+)'
2801
+ name: 'InternetMeasurement'
2802
+ category: 'Crawler'
2803
+ url: 'https://internet-measurement.com/'
2804
+
2805
+ - regex: 'DomainAppender /([\d+.]+)'
2806
+ name: 'DomainAppender'
2807
+ category: 'Crawler'
2808
+ url: 'https://www.profound.net/product/domain_append/'
2809
+ producer:
2810
+ name: 'Profound Networks, LLC'
2811
+ url: 'https://www.profound.net/'
2812
+
2813
+ - regex: 'FreeWebMonitoring SiteChecker/([\d+.]+)'
2814
+ name: 'FreeWebMonitoring'
2815
+ category: 'Site Monitor'
2816
+ url: 'https://www.freewebmonitoring.com/bot.html'
2817
+ producer:
2818
+ name: 'GreenWave Online, Inc.'
2819
+ url: 'http://www.greenwaveonline.com/'
2820
+
2821
+ - regex: 'Page Modified Pinger'
2822
+ name: 'Page Modified Pinger'
2823
+ category: 'Site Monitor'
2824
+ url: 'https://www.pagemodified.com/'
2825
+ producer:
2826
+ name: 'Valley Hosting, LLC'
2827
+ url: 'https://www.pagemodified.com/'
2828
+
2829
+ - regex: 'adstxtlab.com'
2830
+ name: 'adstxtlab.com'
2831
+ category: 'Crawler'
2832
+ url: 'https://adstxtlab.com/validator.php'
2833
+ producer:
2834
+ name: 'Jaohawi AB'
2835
+ url: 'https://adstxtlab.com/'
2836
+
2837
+ - regex: 'Iframely/([\d+.]+)'
2838
+ name: 'Iframely'
2839
+ category: 'Crawler'
2840
+ url: 'https://iframely.com/'
2841
+ producer:
2842
+ name: 'Itteco Software, Corp.'
2843
+ url: 'https://iframely.com/'
2844
+
2845
+ - regex: 'DomainStatsBot/([\d+.]+)'
2846
+ name: 'DomainStatsBot'
2847
+ category: 'Crawler'
2848
+ url: 'https://domainstats.com/pages/our-bot'
2849
+ producer:
2850
+ name: 'Domainstats Ltd'
2851
+ url: 'https://domainstats.com/'
2852
+
2853
+ - regex: 'aiHitBot/([\d+.]+)'
2854
+ name: 'aiHitBot'
2855
+ category: 'Crawler'
2856
+ url: 'https://www.aihitdata.com/about'
2857
+
2858
+ - regex: 'DomainCrawler/'
2859
+ name: 'DomainCrawler'
2860
+ category: 'Crawler'
2861
+ url: 'https://domaincrawler.com/about-us/'
2862
+
2863
+ - regex: 'DNSResearchBot'
2864
+ name: 'DNSResearchBot'
2865
+ category: 'Crawler'
2866
+
2867
+ - regex: 'GitCrawlerBot'
2868
+ name: 'GitCrawlerBot'
2869
+ category: 'Crawler'
2870
+
2871
+ - regex: 'AdAuth/([\d+.]+)'
2872
+ name: 'AdAuth'
2873
+ category: 'Crawler'
2874
+ url: 'https://www.adauth.com'
2875
+
2876
+ - regex: 'faveeo.com'
2877
+ name: 'Faveeo'
2878
+ category: 'Crawler'
2879
+ url: 'http://www.faveeo.com'
2880
+
2881
+ - regex: 'kozmonavt\.'
2882
+ name: 'Kozmonavt'
2883
+ category: 'Crawler'
2884
+ url: 'https://kozmonavt.ml'
2885
+
2886
+ - regex: 'CriteoBot/'
2887
+ name: 'CriteoBot'
2888
+ category: 'Crawler'
2889
+ url: 'https://www.criteo.com/criteo-crawler/'
2890
+
2891
+ - regex: 'PayPal IPN'
2892
+ name: 'PayPal IPN'
2893
+ category: 'Service Agent'
2894
+ url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/'
2895
+ producer:
2896
+ name: 'PayPal, Inc.'
2897
+ url: 'https://www.paypal.com/'
2898
+
2899
+ - regex: 'MaCoCu'
2900
+ name: 'MaCoCu'
2901
+ category: 'Crawler'
2902
+ url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/'
2903
+ producer:
2904
+ name: 'Jožef Stefan Institute'
2905
+ url: 'https://www.ijs.si/ijsw/JSI'
2906
+
2907
+ - regex: 'dnt-policy@eff.org'
2908
+ name: 'EFF Do Not Track Verifier'
2909
+ category: 'Crawler'
2910
+ url: 'https://www.eff.org/issues/do-not-track'
2911
+ producer:
2912
+ name: 'Electronic Frontier Foundation'
2913
+ url: 'https://www.eff.org/'
2914
+
2915
+ - regex: 'InfoTigerBot'
2916
+ name: 'InfoTigerBot'
2917
+ category: 'Crawler'
2918
+ url: 'https://infotiger.com/bot'
2919
+ producer:
2920
+ name: 'Infotiger UG'
2921
+ url: 'https://infotiger.com/'
2922
+
2923
+ - regex: '(?:Birdcrawlerbot|CrawlaDeBot)'
2924
+ name: 'Birdcrawlerbot'
2925
+ category: 'Crawler'
2926
+ url: 'https://crawla.de/de/index.php'
2927
+ producer:
2928
+ name: 'Swoppen Systems GmbH'
2929
+ url: 'https://www.swoppen.com/de'
2930
+
2931
+ - regex: 'ScamadviserExternalHit/([\d+.]+)'
2932
+ name: 'Scamadviser External Hit'
2933
+ category: 'Crawler'
2934
+ url: 'https://www.scamadviser.com/'
2935
+ producer:
2936
+ name: 'Ecommerce Operations B.V.'
2937
+ url: 'https://www.scamadviser.com/'
2938
+
2939
+ - regex: 'ZaldamoSearchBot'
2940
+ name: 'Zaldamo'
2941
+ category: 'Crawler'
2942
+ url: 'https://www.zaldamo.com/search.html'
2943
+ producer:
2944
+ name: 'Project Orlando, LLC.'
2945
+ url: 'https://www.projectorlando.com/'
2946
+
2947
+ - regex: 'AFB/([\d+.]+)'
2948
+ name: 'Allloadin Favicon Bot'
2949
+ category: 'Crawler'
2950
+ url: 'https://allloadin.com/'
2951
+
2952
+ - regex: 'SeolytBot/([\d+.]+)'
2953
+ name: 'Seolyt Bot'
2954
+ category: 'Crawler'
2955
+ url: 'https://seolyt.com'
2956
+
2957
+ - regex: 'LinkWalker/([\d+.]+)'
2958
+ name: 'LinkWalker'
2959
+ category: 'Crawler'
2960
+ url: 'https://www.phishlabs.com/'
2961
+ producer:
2962
+ name: 'PhishLabs, Inc.'
2963
+ url: 'https://www.phishlabs.com/'
2964
+
2965
+ - regex: 'RenovateBot/([\d+.]+)'
2966
+ name: 'RenovateBot'
2967
+ category: 'Security Checker'
2968
+ url: 'https://github.com/renovatebot/renovate'
2969
+ producer:
2970
+ name: 'White Source Ltd.'
2971
+ url: 'https://www.mend.io/free-developer-tools/renovate/'
2972
+
2973
+ - regex: 'INETDEX-BOT/([\d+.]+)'
2974
+ name: 'Inetdex Bot'
2975
+ category: 'Crawler'
2976
+ url: 'https://www.inetdex.com/'
2977
+
2978
+ - regex: 'NETZZAPPEN'
2979
+ name: 'NETZZAPPEN'
2980
+ category: 'Crawler'
2981
+ url: 'https://www.netzzappen.com/'
2982
+ producer:
2983
+ name: 'Marc Huemer'
2984
+ url: 'https://www.netzzappen.com/'
2985
+
2986
+ - regex: 'SerpReputationManagementAgent/([\d+.]+)'
2987
+ name: 'SEMrush Reputation Management'
2988
+ category: 'Service Agent'
2989
+ url: 'https://www.semrush.com/bot/'
2990
+ producer:
2991
+ name: 'SEMrush'
2992
+ url: 'https://www.semrush.com/'
2993
+
2994
+ - regex: 'panscient.com'
2995
+ name: 'Panscient'
2996
+ category: 'Crawler'
2997
+ url: 'https://www.panscient.com/faq.htm'
2998
+ producer:
2999
+ name: 'Panscient, Inc.'
3000
+ url: 'https://www.panscient.com/'
3001
+
3002
+ - regex: 'research@pdrlabs.net'
3003
+ name: 'PDR Labs'
3004
+ category: 'Security Checker'
3005
+ url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3006
+ producer:
3007
+ name: 'PDR Labs'
3008
+ url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3009
+
3010
+ - regex: 'Nicecrawler/([\d+.]+)'
3011
+ name: 'NiceCrawler'
3012
+ category: 'Crawler'
3013
+ url: 'https://www.nicecrawler.com/'
3014
+ producer:
3015
+ name: 'Intelium Corp.'
3016
+ url: 'https://www.intelium.com/'
3017
+
3018
+ - regex: 't3versionsBot/([\d+.]+)'
3019
+ name: 't3versions'
3020
+ category: 'Crawler'
3021
+ url: 'https://www.t3versions.com/bot'
3022
+ producer:
3023
+ name: 'Torben Hansen'
3024
+ url: 'https://www.t3versions.com/'
3025
+
3026
+ - regex: 'Crawlson/([\d+.]+)'
3027
+ name: 'Crawlson'
3028
+ category: 'Crawler'
3029
+ url: 'https://www.crawlson.com/about'
3030
+ producer:
3031
+ name: 'Crawlson'
3032
+ url: 'https://www.crawlson.com/'
3033
+
3034
+ - regex: 'tchelebi/([\d+.]+)'
3035
+ name: 'tchelebi'
3036
+ category: 'Crawler'
3037
+ url: 'https://tchelebi.io/'
3038
+ producer:
3039
+ name: 'NormShield, Inc.'
3040
+ url: 'https://blackkite.com/'
3041
+
3042
+ - regex: 'JobboerseBot'
3043
+ name: 'JobboerseBot'
3044
+ category: 'Crawler'
3045
+ url: 'https://www.xing.com/jobs'
3046
+ producer:
3047
+ name: 'New Work SE'
3048
+ url: 'https://www.xing.com/'
3049
+
3050
+ - regex: '^Lkx-(.*)/([\d+.]+)'
3051
+ name: 'LeakIX'
3052
+ category: 'Security Checker'
3053
+ url: 'https://leakix.net/'
3054
+ producer:
3055
+ name: 'BaDaaS SRL'
3056
+ url: 'https://leakix.net/'
3057
+
3058
+ - regex: 'RepoLookoutBot/([\d+.]+)'
3059
+ name: 'Repo Lookout'
3060
+ category: 'Security Checker'
3061
+ url: 'https://www.repo-lookout.org/'
3062
+ producer:
3063
+ name: 'Crissy Field GmbH'
3064
+ url: 'https://www.crissyfield.de/'
3065
+
3066
+ - regex: 'PATHspider'
3067
+ name: 'PATHspider'
3068
+ category: 'Security Checker'
3069
+ url: 'https://pathspider.net/'
3070
+ producer:
3071
+ name: 'MAMI Project'
3072
+ url: 'https://mami-project.eu/'
3073
+
3074
+ - regex: 'everyfeed-spider/([\d+.]+)'
3075
+ name: 'Everyfeed'
3076
+ url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
3077
+ category: 'Feed Fetcher'
3078
+ producer:
3079
+ name: ''
3080
+ url: ''
3081
+
3082
+ - regex: 'Exchange check'
3083
+ name: 'Exchange check'
3084
+ category: 'Security Checker'
3085
+ url: 'https://github.com/GossiTheDog/scanning'
3086
+ producer:
3087
+ name: 'Kevin Beaumont'
3088
+ url: 'https://doublepulsar.com/'
3089
+
3090
+ - regex: 'Sublinq'
3091
+ name: 'Sublinq'
3092
+ category: 'Crawler'
3093
+ url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/'
3094
+ producer:
3095
+ name: ''
3096
+ url: ''
3097
+
3098
+ - regex: 'Gregarius/([\d+.]+)'
3099
+ name: 'Gregarius'
3100
+ category: 'Feed Fetcher'
3101
+ url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
3102
+ producer:
3103
+ name: ''
3104
+ url: ''
3105
+
3106
+ - regex: 'COMODO DCV'
3107
+ name: 'COMODO DCV'
3108
+ category: 'Service Agent'
3109
+ url: 'https://www.comodo.com/'
3110
+ producer:
3111
+ name: 'Comodo Security Solutions, Inc.'
3112
+ url: 'https://www.comodo.com/'
3113
+
3114
+ - regex: 'Sectigo DCV'
3115
+ name: 'Sectigo DCV'
3116
+ category: 'Service Agent'
3117
+ url: 'https://sectigo.com/'
3118
+ producer:
3119
+ name: 'Sectigo Limited'
3120
+ url: 'https://sectigo.com/'
3121
+
3122
+ - regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/([\d+.]+)'
3123
+ name: 'KlarnaBot'
3124
+ category: 'Crawler'
3125
+ url: 'https://docs.klarna.com/klarna-bot/'
3126
+ producer:
3127
+ name: 'Klarna Bank AB'
3128
+ url: 'https://www.klarna.com/'
3129
+
3130
+ - regex: 'Taboolabot/([\d+.]+)'
3131
+ name: 'Taboolabot'
3132
+ category: 'Crawler'
3133
+ url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
3134
+ producer:
3135
+ name: 'Taboola, Inc.'
3136
+ url: 'https://www.taboola.com/'
3137
+
3138
+ - regex: 'Asana/([\d+.]+)'
3139
+ name: 'Asana'
3140
+ category: 'Crawler'
3141
+ url: 'https://asana.com/'
3142
+ producer:
3143
+ name: 'Asana, Inc.'
3144
+ url: 'https://asana.com/'
3145
+
3146
+ - regex: 'Chrome Privacy Preserving Prefetch Proxy'
3147
+ name: 'Chrome Privacy Preserving Prefetch Proxy'
3148
+ category: 'Service Agent'
3149
+ url: 'https://developer.chrome.com/blog/private-prefetch-proxy/'
3150
+ producer:
3151
+ name: 'Google Inc.'
3152
+ url: 'https://www.google.com/'
3153
+
3154
+ - regex: 'URLinspectorBot/([\d+.]+)'
3155
+ name: 'URLinspector'
3156
+ category: 'Site Monitor'
3157
+ url: 'https://www.urlinspector.com/bot/'
3158
+ producer:
3159
+ name: 'LinkResearchTools GmbH'
3160
+ url: 'https://www.linkresearchtools.com/'
3161
+
3162
+ - regex: 'EntferBot/([\d+.]+)'
3163
+ name: 'Entfer'
3164
+ category: 'Crawler'
3165
+ url: 'https://entfer.com/'
3166
+ producer:
3167
+ name: 'Entfer Ltd.'
3168
+ url: 'https://entfer.com/'
3169
+
3170
+ - regex: 'TagInspector/([\d+.]+)'
3171
+ name: 'Tag Inspector'
3172
+ category: 'Crawler'
3173
+ url: 'https://taginspector.com/'
3174
+ producer:
3175
+ name: 'InfoTrust, LLC'
3176
+ url: 'https://infotrust.com/'
3177
+
3178
+ - regex: 'pageburst'
3179
+ name: 'Pageburst'
3180
+ category: 'Crawler'
3181
+ url: 'https://pageburstls.elsevier.com/'
3182
+ producer:
3183
+ name: 'Elsevier Ltd'
3184
+ url: 'https://www.elsevier.com/'
3185
+
3186
+ - regex: '.+diffbot'
3187
+ name: 'Diffbot'
3188
+ category: 'Crawler'
3189
+ url: 'https://docs.diffbot.com/docs/getting-started-with-crawl'
3190
+ producer:
3191
+ name: 'Diffbot Technologies Corp.'
3192
+ url: 'https://www.diffbot.com/'
3193
+
3194
+ - regex: 'DisqusAdstxtCrawler/([\d+.]+)'
3195
+ name: 'Disqus'
3196
+ category: 'Crawler'
3197
+ url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
3198
+ producer:
3199
+ name: 'Disqus, Inc.'
3200
+ url: 'https://disqus.com/'
3201
+
3202
+ - regex: 'startmebot/([\d+.]+)'
3203
+ name: 'start.me'
3204
+ category: 'Crawler'
3205
+ url: 'https://about.start.me/'
3206
+ producer:
3207
+ name: 'start.me BV'
3208
+ url: 'https://about.start.me/'
3209
+
3210
+ - regex: '2ip bot/([\d+.]+)'
3211
+ name: '2ip'
3212
+ category: 'Crawler'
3213
+ url: 'https://2ip.io/'
3214
+
3215
+ - regex: 'ReqBin Curl Client/([\d+.]+)'
3216
+ name: 'ReqBin'
3217
+ category: 'Crawler'
3218
+ url: 'https://reqbin.com/curl'
3219
+
2642
3220
  # Generic detections
2643
3221
  - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2644
3222
  name: 'Generic Bot'