device_detector 1.0.7 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/regexes/bots.yml CHANGED
@@ -5,6 +5,14 @@
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
+ - regex: 'monitoring360bot'
9
+ name: '360 Monitoring'
10
+ category: 'Site Monitor'
11
+ url: 'https://www.360monitoring.io'
12
+ producer:
13
+ name: 'Plesk International GmbH'
14
+ url: 'https://www.plesk.com'
15
+
8
16
  - regex: '360Spider'
9
17
  name: '360Spider'
10
18
  category: 'Search bot'
@@ -45,6 +53,14 @@
45
53
  name: 'Ahrefs Pte Ltd'
46
54
  url: 'https://ahrefs.com/robot'
47
55
 
56
+ - regex: 'AhrefsSiteAudit/([\d+.]+)'
57
+ name: 'AhrefsSiteAudit'
58
+ category: 'Site Monitor'
59
+ url: 'https://ahrefs.com/robot/site-audit'
60
+ producer:
61
+ name: 'Ahrefs Pte Ltd'
62
+ url: 'https://ahrefs.com/'
63
+
48
64
  - regex: 'ia_archiver|alexabot|verifybot'
49
65
  name: 'Alexa Crawler'
50
66
  category: 'Search bot'
@@ -100,13 +116,13 @@
100
116
  name: 'Apple Inc'
101
117
  url: 'https://www.apple.com'
102
118
 
103
- - regex: "AppSignalBot"
104
- name: "AppSignalBot"
105
- category: "Site Monitor"
106
- url: "https://docs.appsignal.com/uptime-monitoring/"
119
+ - regex: 'AppSignalBot'
120
+ name: 'AppSignalBot'
121
+ category: 'Site Monitor'
122
+ url: 'https://docs.appsignal.com/uptime-monitoring/'
107
123
  producer:
108
- name: "AppSignal"
109
- url: "https://appsignal.com/"
124
+ name: 'AppSignal'
125
+ url: 'https://appsignal.com/'
110
126
 
111
127
  - regex: 'Arachni'
112
128
  name: 'Arachni'
@@ -355,7 +371,7 @@
355
371
  name: 'Certified Security Solutions'
356
372
  url: 'https://www.css-security.com/company/about-us/'
357
373
 
358
- - regex: 'Datadog Agent'
374
+ - regex: 'Datadog Agent|Datadog/?Synthetics'
359
375
  name: 'Datadog Agent'
360
376
  url: 'https://github.com/DataDog/dd-agent'
361
377
  category: 'Site Monitor'
@@ -688,7 +704,7 @@
688
704
  name: 'Visual Meta'
689
705
  url: 'https://www.shopalike.cz/'
690
706
 
691
- - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
707
+ - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Producer)|Google.*/\+/web/snippet'
692
708
  name: 'Googlebot'
693
709
  category: 'Search bot'
694
710
  url: 'http://www.google.com/bot.html'
@@ -783,6 +799,14 @@
783
799
  name: 'Lighthouse'
784
800
  url: 'https://developers.google.com/web/tools/lighthouse'
785
801
 
802
+ - regex: 'last-modified\.com'
803
+ name: 'LastMod Bot'
804
+ category: 'Site Monitor'
805
+ url: 'https://last-modified.com/en/about'
806
+ producer:
807
+ name: ''
808
+ url: 'https://last-modified.com/en'
809
+
786
810
  - regex: 'linkdexbot|linkdex\.com'
787
811
  name: 'Linkdex Bot'
788
812
  category: 'Search bot'
@@ -830,6 +854,14 @@
830
854
  name: ''
831
855
  url: ''
832
856
 
857
+ - regex: 'masscan-ng/([\d+.]+)'
858
+ name: 'masscan-ng'
859
+ url: 'https://github.com/bi-zone/masscan-ng'
860
+ category: 'Crawler'
861
+ producer:
862
+ name: 'BIZON, OOO'
863
+ url: 'https://bi.zone/'
864
+
833
865
  - regex: 'masscan'
834
866
  name: 'masscan'
835
867
  url: 'https://github.com/robertdavidgraham/masscan'
@@ -1049,12 +1081,12 @@
1049
1081
  name: 'PHP Server Monitor'
1050
1082
  url: 'http://www.phpservermonitor.org/'
1051
1083
 
1052
- - regex: 'PocketParser'
1053
- name: 'PocketParser'
1084
+ - regex: 'Pocket(?:ImageCache|Parser)/([\d+.]+)'
1085
+ name: 'Pocket'
1054
1086
  category: 'Read-it-later Service'
1055
1087
  url: 'https://getpocket.com/pocketparser_ua'
1056
1088
  producer:
1057
- name: 'Pocket'
1089
+ name: 'Read It Later, Inc.'
1058
1090
  url: 'https://getpocket.com/'
1059
1091
 
1060
1092
  - regex: 'PritTorrent'
@@ -1317,7 +1349,7 @@
1317
1349
  name: 'Slack Technologies'
1318
1350
  url: 'http://slack.com'
1319
1351
 
1320
- - regex: '(Sogou (web|inst|Pic) spider)|New-Sogou-Spider'
1352
+ - regex: '(Sogou[ -](head|inst|Orion|Pic|Test|web)[ -]spider)|New-Sogou-Spider'
1321
1353
  name: 'Sogou Spider'
1322
1354
  category: 'Search bot'
1323
1355
  url: 'http://www.sogou.com/docs/help/webmasters.htm'
@@ -1828,6 +1860,14 @@
1828
1860
  name: 'Snapchat Inc.'
1829
1861
  url: 'https://www.snapchat.com'
1830
1862
 
1863
+ - regex: 'Snap URL Preview Service'
1864
+ name: 'Snap URL Preview Service'
1865
+ category: 'Service Agent'
1866
+ url: 'https://developers.snap.com/robots'
1867
+ producer:
1868
+ name: 'Snapchat Inc.'
1869
+ url: 'https://www.snapchat.com/'
1870
+
1831
1871
  - regex: "Let's Encrypt validation server"
1832
1872
  name: "Let's Encrypt Validation"
1833
1873
  category: 'Service Agent'
@@ -1926,7 +1966,7 @@
1926
1966
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1927
1967
  name: 'RSSRadio Bot'
1928
1968
 
1929
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
1969
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel)'
1930
1970
  name: 'Generic Bot'
1931
1971
 
1932
1972
  - regex: '^sentry'
@@ -1935,7 +1975,7 @@
1935
1975
  name: 'Sentry'
1936
1976
  url: 'https://sentry.io'
1937
1977
 
1938
- - regex: '^Spotify'
1978
+ - regex: '^Spotify/(\d+[\.\d]+)$'
1939
1979
  name: 'Spotify'
1940
1980
  producer:
1941
1981
  name: 'Spotify'
@@ -2163,7 +2203,7 @@
2163
2203
  category: 'Crawler'
2164
2204
  url: 'https://serendeputy.com/about/serendeputy-bot'
2165
2205
 
2166
- - regex: 'ias-va.*admantx.*service-fetcher'
2206
+ - regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx.com.*service-fetcher'
2167
2207
  name: 'ADmantX Service Fetcher'
2168
2208
  category: 'Service bot'
2169
2209
  url: 'https://www.admantx.com/service-fetcher.html'
@@ -2349,7 +2389,7 @@
2349
2389
  name: ''
2350
2390
  url: ''
2351
2391
 
2352
- - regex: 'scaninfo@expanseinc.com'
2392
+ - regex: 'scaninfo@(?:expanseinc|paloaltonetworks).com'
2353
2393
  name: 'Expanse'
2354
2394
  category: 'Security Checker'
2355
2395
  url: 'https://expanse.co/'
@@ -2511,6 +2551,11 @@
2511
2551
  category: 'Security Checker'
2512
2552
  url: 'https://github.com/LeakIX/l9tcpid'
2513
2553
 
2554
+ - regex: 'l9explore/([\d+\.])'
2555
+ name: 'l9explore'
2556
+ category: 'Security Checker'
2557
+ url: 'https://github.com/LeakIX/l9explore'
2558
+
2514
2559
  - regex: 'MegaIndex.ru/([\d+\.])'
2515
2560
  name: 'MegaIndex'
2516
2561
  category: 'Crawler'
@@ -2639,6 +2684,539 @@
2639
2684
  name: 'Hochschule für angewandte Wissenschaften München'
2640
2685
  url: 'https://www.hm.edu/'
2641
2686
 
2687
+ - regex: 'TigerBot/([\d+.]+)'
2688
+ name: 'TigerBot'
2689
+ category: 'Crawler'
2690
+ url: 'https://tiger.ch/'
2691
+
2692
+ - regex: 'TestCrawler/([\d+.]+)'
2693
+ name: 'TestCrawler'
2694
+ category: 'Crawler'
2695
+ url: 'https://www.comcepta.com/'
2696
+
2697
+ - regex: 'CrowdTanglebot/([\d+.]+)'
2698
+ name: 'CrowdTangle'
2699
+ category: 'Crawler'
2700
+ url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
2701
+ producer:
2702
+ name: 'CrowdTangle, Inc.'
2703
+ url: 'https://www.crowdtangle.com/'
2704
+
2705
+ - regex: 'Sellers.Guide Crawler by Primis'
2706
+ name: 'Sellers.Guide'
2707
+ category: 'Crawler'
2708
+ url: 'https://sellers.guide/'
2709
+ producer:
2710
+ name: 'McCann Disciplines, Ltd.'
2711
+ url: 'https://www.primis.tech/'
2712
+
2713
+ - regex: 'OnalyticaBot'
2714
+ name: 'Onalytica'
2715
+ category: 'Crawler'
2716
+ url: 'https://www.airslate.com/bot/explore/onalytica-bot'
2717
+ producer:
2718
+ name: 'airSlate, Inc.'
2719
+ url: 'https://www.airslate.com/'
2720
+
2721
+ - regex: 'deepnoc'
2722
+ name: 'deepnoc'
2723
+ category: 'Crawler'
2724
+ url: 'https://deepnoc.com/bot'
2725
+ producer:
2726
+ name: 'deepnoc, GmbH'
2727
+ url: 'https://deepnoc.com/'
2728
+
2729
+ - regex: 'Newslitbot/([\d+.]+)'
2730
+ name: 'Newslitbot'
2731
+ category: 'Crawler'
2732
+ url: 'https://www.newslit.co/'
2733
+ producer:
2734
+ name: 'Newslit, LLC.'
2735
+ url: 'https://www.newslit.co/'
2736
+
2737
+ - regex: 'um-LN/([\d+.]+)'
2738
+ name: 'uMBot'
2739
+ category: 'Crawler'
2740
+ url: 'https://www.ubermetrics-technologies.com/'
2741
+ producer:
2742
+ name: 'Ubermetrics Technologies GmbH'
2743
+ url: 'https://www.ubermetrics-technologies.com/'
2744
+
2745
+ - regex: 'Abonti/([\d+.]+)'
2746
+ name: 'Abonti'
2747
+ category: 'Crawler'
2748
+ url: 'http://abonti.com/'
2749
+
2750
+ - regex: 'collection@infegy.com'
2751
+ name: 'Infegy'
2752
+ category: 'Crawler'
2753
+ url: 'https://infegy.com/'
2754
+ producer:
2755
+ name: 'Infegy, Inc.'
2756
+ url: 'https://infegy.com/'
2757
+
2758
+ - regex: 'HTTP Banner Detection \(https://security.ipip.net\)'
2759
+ name: 'IPIP'
2760
+ category: 'Security Checker'
2761
+ url: 'https://security.ipip.net/'
2762
+ producer:
2763
+ name: 'Beijing Tiantexin Tech. Co., Ltd.'
2764
+ url: 'https://en.ipip.net/'
2765
+
2766
+ - regex: 'ev-crawler/([\d+.]+)'
2767
+ name: 'Headline'
2768
+ category: 'Crawler'
2769
+ url: 'https://headline.com/legal/crawler'
2770
+ producer:
2771
+ name: 'e.ventures Managementgesellschaft mbH'
2772
+ url: 'https://headline.com/'
2773
+
2774
+ - regex: 'webprosbot/([\d+.]+)'
2775
+ name: 'WebPros'
2776
+ category: 'Crawler'
2777
+ url: 'https://webpros.com/'
2778
+ producer:
2779
+ name: 'WebPros Holdco B.V.'
2780
+ url: 'https://webpros.com/'
2781
+
2782
+ - regex: 'ELB-HealthChecker'
2783
+ name: 'Amazon ELB'
2784
+ category: 'Site Monitor'
2785
+ url: 'https://aws.amazon.com/elasticloadbalancing/'
2786
+ producer:
2787
+ name: 'Amazon.com, Inc.'
2788
+ url: 'https://www.amazon.com/'
2789
+
2790
+ - regex: 'Wheregoes.com Redirect Checker/([\d+.]+)'
2791
+ name: 'WhereGoes'
2792
+ category: 'Crawler'
2793
+ url: 'https://wheregoes.com/'
2794
+
2795
+ - regex: 'project_patchwatch'
2796
+ name: 'Project Patchwatch'
2797
+ category: 'Crawler'
2798
+ url: 'http://66.240.192.82/'
2799
+
2800
+ - regex: 'InternetMeasurement/([\d+.]+)'
2801
+ name: 'InternetMeasurement'
2802
+ category: 'Crawler'
2803
+ url: 'https://internet-measurement.com/'
2804
+
2805
+ - regex: 'DomainAppender /([\d+.]+)'
2806
+ name: 'DomainAppender'
2807
+ category: 'Crawler'
2808
+ url: 'https://www.profound.net/product/domain_append/'
2809
+ producer:
2810
+ name: 'Profound Networks, LLC'
2811
+ url: 'https://www.profound.net/'
2812
+
2813
+ - regex: 'FreeWebMonitoring SiteChecker/([\d+.]+)'
2814
+ name: 'FreeWebMonitoring'
2815
+ category: 'Site Monitor'
2816
+ url: 'https://www.freewebmonitoring.com/bot.html'
2817
+ producer:
2818
+ name: 'GreenWave Online, Inc.'
2819
+ url: 'http://www.greenwaveonline.com/'
2820
+
2821
+ - regex: 'Page Modified Pinger'
2822
+ name: 'Page Modified Pinger'
2823
+ category: 'Site Monitor'
2824
+ url: 'https://www.pagemodified.com/'
2825
+ producer:
2826
+ name: 'Valley Hosting, LLC'
2827
+ url: 'https://www.pagemodified.com/'
2828
+
2829
+ - regex: 'adstxtlab.com'
2830
+ name: 'adstxtlab.com'
2831
+ category: 'Crawler'
2832
+ url: 'https://adstxtlab.com/validator.php'
2833
+ producer:
2834
+ name: 'Jaohawi AB'
2835
+ url: 'https://adstxtlab.com/'
2836
+
2837
+ - regex: 'Iframely/([\d+.]+)'
2838
+ name: 'Iframely'
2839
+ category: 'Crawler'
2840
+ url: 'https://iframely.com/'
2841
+ producer:
2842
+ name: 'Itteco Software, Corp.'
2843
+ url: 'https://iframely.com/'
2844
+
2845
+ - regex: 'DomainStatsBot/([\d+.]+)'
2846
+ name: 'DomainStatsBot'
2847
+ category: 'Crawler'
2848
+ url: 'https://domainstats.com/pages/our-bot'
2849
+ producer:
2850
+ name: 'Domainstats Ltd'
2851
+ url: 'https://domainstats.com/'
2852
+
2853
+ - regex: 'aiHitBot/([\d+.]+)'
2854
+ name: 'aiHitBot'
2855
+ category: 'Crawler'
2856
+ url: 'https://www.aihitdata.com/about'
2857
+
2858
+ - regex: 'DomainCrawler/'
2859
+ name: 'DomainCrawler'
2860
+ category: 'Crawler'
2861
+ url: 'https://domaincrawler.com/about-us/'
2862
+
2863
+ - regex: 'DNSResearchBot'
2864
+ name: 'DNSResearchBot'
2865
+ category: 'Crawler'
2866
+
2867
+ - regex: 'GitCrawlerBot'
2868
+ name: 'GitCrawlerBot'
2869
+ category: 'Crawler'
2870
+
2871
+ - regex: 'AdAuth/([\d+.]+)'
2872
+ name: 'AdAuth'
2873
+ category: 'Crawler'
2874
+ url: 'https://www.adauth.com'
2875
+
2876
+ - regex: 'faveeo.com'
2877
+ name: 'Faveeo'
2878
+ category: 'Crawler'
2879
+ url: 'http://www.faveeo.com'
2880
+
2881
+ - regex: 'kozmonavt\.'
2882
+ name: 'Kozmonavt'
2883
+ category: 'Crawler'
2884
+ url: 'https://kozmonavt.ml'
2885
+
2886
+ - regex: 'CriteoBot/'
2887
+ name: 'CriteoBot'
2888
+ category: 'Crawler'
2889
+ url: 'https://www.criteo.com/criteo-crawler/'
2890
+
2891
+ - regex: 'PayPal IPN'
2892
+ name: 'PayPal IPN'
2893
+ category: 'Service Agent'
2894
+ url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/'
2895
+ producer:
2896
+ name: 'PayPal, Inc.'
2897
+ url: 'https://www.paypal.com/'
2898
+
2899
+ - regex: 'MaCoCu'
2900
+ name: 'MaCoCu'
2901
+ category: 'Crawler'
2902
+ url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/'
2903
+ producer:
2904
+ name: 'Jožef Stefan Institute'
2905
+ url: 'https://www.ijs.si/ijsw/JSI'
2906
+
2907
+ - regex: 'dnt-policy@eff.org'
2908
+ name: 'EFF Do Not Track Verifier'
2909
+ category: 'Crawler'
2910
+ url: 'https://www.eff.org/issues/do-not-track'
2911
+ producer:
2912
+ name: 'Electronic Frontier Foundation'
2913
+ url: 'https://www.eff.org/'
2914
+
2915
+ - regex: 'InfoTigerBot'
2916
+ name: 'InfoTigerBot'
2917
+ category: 'Crawler'
2918
+ url: 'https://infotiger.com/bot'
2919
+ producer:
2920
+ name: 'Infotiger UG'
2921
+ url: 'https://infotiger.com/'
2922
+
2923
+ - regex: '(?:Birdcrawlerbot|CrawlaDeBot)'
2924
+ name: 'Birdcrawlerbot'
2925
+ category: 'Crawler'
2926
+ url: 'https://crawla.de/de/index.php'
2927
+ producer:
2928
+ name: 'Swoppen Systems GmbH'
2929
+ url: 'https://www.swoppen.com/de'
2930
+
2931
+ - regex: 'ScamadviserExternalHit/([\d+.]+)'
2932
+ name: 'Scamadviser External Hit'
2933
+ category: 'Crawler'
2934
+ url: 'https://www.scamadviser.com/'
2935
+ producer:
2936
+ name: 'Ecommerce Operations B.V.'
2937
+ url: 'https://www.scamadviser.com/'
2938
+
2939
+ - regex: 'ZaldamoSearchBot'
2940
+ name: 'Zaldamo'
2941
+ category: 'Crawler'
2942
+ url: 'https://www.zaldamo.com/search.html'
2943
+ producer:
2944
+ name: 'Project Orlando, LLC.'
2945
+ url: 'https://www.projectorlando.com/'
2946
+
2947
+ - regex: 'AFB/([\d+.]+)'
2948
+ name: 'Allloadin Favicon Bot'
2949
+ category: 'Crawler'
2950
+ url: 'https://allloadin.com/'
2951
+
2952
+ - regex: 'SeolytBot/([\d+.]+)'
2953
+ name: 'Seolyt Bot'
2954
+ category: 'Crawler'
2955
+ url: 'https://seolyt.com'
2956
+
2957
+ - regex: 'LinkWalker/([\d+.]+)'
2958
+ name: 'LinkWalker'
2959
+ category: 'Crawler'
2960
+ url: 'https://www.phishlabs.com/'
2961
+ producer:
2962
+ name: 'PhishLabs, Inc.'
2963
+ url: 'https://www.phishlabs.com/'
2964
+
2965
+ - regex: 'RenovateBot/([\d+.]+)'
2966
+ name: 'RenovateBot'
2967
+ category: 'Security Checker'
2968
+ url: 'https://github.com/renovatebot/renovate'
2969
+ producer:
2970
+ name: 'White Source Ltd.'
2971
+ url: 'https://www.mend.io/free-developer-tools/renovate/'
2972
+
2973
+ - regex: 'INETDEX-BOT/([\d+.]+)'
2974
+ name: 'Inetdex Bot'
2975
+ category: 'Crawler'
2976
+ url: 'https://www.inetdex.com/'
2977
+
2978
+ - regex: 'NETZZAPPEN'
2979
+ name: 'NETZZAPPEN'
2980
+ category: 'Crawler'
2981
+ url: 'https://www.netzzappen.com/'
2982
+ producer:
2983
+ name: 'Marc Huemer'
2984
+ url: 'https://www.netzzappen.com/'
2985
+
2986
+ - regex: 'SerpReputationManagementAgent/([\d+.]+)'
2987
+ name: 'SEMrush Reputation Management'
2988
+ category: 'Service Agent'
2989
+ url: 'https://www.semrush.com/bot/'
2990
+ producer:
2991
+ name: 'SEMrush'
2992
+ url: 'https://www.semrush.com/'
2993
+
2994
+ - regex: 'panscient.com'
2995
+ name: 'Panscient'
2996
+ category: 'Crawler'
2997
+ url: 'https://www.panscient.com/faq.htm'
2998
+ producer:
2999
+ name: 'Panscient, Inc.'
3000
+ url: 'https://www.panscient.com/'
3001
+
3002
+ - regex: 'research@pdrlabs.net'
3003
+ name: 'PDR Labs'
3004
+ category: 'Security Checker'
3005
+ url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3006
+ producer:
3007
+ name: 'PDR Labs'
3008
+ url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3009
+
3010
+ - regex: 'Nicecrawler/([\d+.]+)'
3011
+ name: 'NiceCrawler'
3012
+ category: 'Crawler'
3013
+ url: 'https://www.nicecrawler.com/'
3014
+ producer:
3015
+ name: 'Intelium Corp.'
3016
+ url: 'https://www.intelium.com/'
3017
+
3018
+ - regex: 't3versionsBot/([\d+.]+)'
3019
+ name: 't3versions'
3020
+ category: 'Crawler'
3021
+ url: 'https://www.t3versions.com/bot'
3022
+ producer:
3023
+ name: 'Torben Hansen'
3024
+ url: 'https://www.t3versions.com/'
3025
+
3026
+ - regex: 'Crawlson/([\d+.]+)'
3027
+ name: 'Crawlson'
3028
+ category: 'Crawler'
3029
+ url: 'https://www.crawlson.com/about'
3030
+ producer:
3031
+ name: 'Crawlson'
3032
+ url: 'https://www.crawlson.com/'
3033
+
3034
+ - regex: 'tchelebi/([\d+.]+)'
3035
+ name: 'tchelebi'
3036
+ category: 'Crawler'
3037
+ url: 'https://tchelebi.io/'
3038
+ producer:
3039
+ name: 'NormShield, Inc.'
3040
+ url: 'https://blackkite.com/'
3041
+
3042
+ - regex: 'JobboerseBot'
3043
+ name: 'JobboerseBot'
3044
+ category: 'Crawler'
3045
+ url: 'https://www.xing.com/jobs'
3046
+ producer:
3047
+ name: 'New Work SE'
3048
+ url: 'https://www.xing.com/'
3049
+
3050
+ - regex: '^Lkx-(.*)/([\d+.]+)'
3051
+ name: 'LeakIX'
3052
+ category: 'Security Checker'
3053
+ url: 'https://leakix.net/'
3054
+ producer:
3055
+ name: 'BaDaaS SRL'
3056
+ url: 'https://leakix.net/'
3057
+
3058
+ - regex: 'RepoLookoutBot/([\d+.]+)'
3059
+ name: 'Repo Lookout'
3060
+ category: 'Security Checker'
3061
+ url: 'https://www.repo-lookout.org/'
3062
+ producer:
3063
+ name: 'Crissy Field GmbH'
3064
+ url: 'https://www.crissyfield.de/'
3065
+
3066
+ - regex: 'PATHspider'
3067
+ name: 'PATHspider'
3068
+ category: 'Security Checker'
3069
+ url: 'https://pathspider.net/'
3070
+ producer:
3071
+ name: 'MAMI Project'
3072
+ url: 'https://mami-project.eu/'
3073
+
3074
+ - regex: 'everyfeed-spider/([\d+.]+)'
3075
+ name: 'Everyfeed'
3076
+ url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
3077
+ category: 'Feed Fetcher'
3078
+ producer:
3079
+ name: ''
3080
+ url: ''
3081
+
3082
+ - regex: 'Exchange check'
3083
+ name: 'Exchange check'
3084
+ category: 'Security Checker'
3085
+ url: 'https://github.com/GossiTheDog/scanning'
3086
+ producer:
3087
+ name: 'Kevin Beaumont'
3088
+ url: 'https://doublepulsar.com/'
3089
+
3090
+ - regex: 'Sublinq'
3091
+ name: 'Sublinq'
3092
+ category: 'Crawler'
3093
+ url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/'
3094
+ producer:
3095
+ name: ''
3096
+ url: ''
3097
+
3098
+ - regex: 'Gregarius/([\d+.]+)'
3099
+ name: 'Gregarius'
3100
+ category: 'Feed Fetcher'
3101
+ url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
3102
+ producer:
3103
+ name: ''
3104
+ url: ''
3105
+
3106
+ - regex: 'COMODO DCV'
3107
+ name: 'COMODO DCV'
3108
+ category: 'Service Agent'
3109
+ url: 'https://www.comodo.com/'
3110
+ producer:
3111
+ name: 'Comodo Security Solutions, Inc.'
3112
+ url: 'https://www.comodo.com/'
3113
+
3114
+ - regex: 'Sectigo DCV'
3115
+ name: 'Sectigo DCV'
3116
+ category: 'Service Agent'
3117
+ url: 'https://sectigo.com/'
3118
+ producer:
3119
+ name: 'Sectigo Limited'
3120
+ url: 'https://sectigo.com/'
3121
+
3122
+ - regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/([\d+.]+)'
3123
+ name: 'KlarnaBot'
3124
+ category: 'Crawler'
3125
+ url: 'https://docs.klarna.com/klarna-bot/'
3126
+ producer:
3127
+ name: 'Klarna Bank AB'
3128
+ url: 'https://www.klarna.com/'
3129
+
3130
+ - regex: 'Taboolabot/([\d+.]+)'
3131
+ name: 'Taboolabot'
3132
+ category: 'Crawler'
3133
+ url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
3134
+ producer:
3135
+ name: 'Taboola, Inc.'
3136
+ url: 'https://www.taboola.com/'
3137
+
3138
+ - regex: 'Asana/([\d+.]+)'
3139
+ name: 'Asana'
3140
+ category: 'Crawler'
3141
+ url: 'https://asana.com/'
3142
+ producer:
3143
+ name: 'Asana, Inc.'
3144
+ url: 'https://asana.com/'
3145
+
3146
+ - regex: 'Chrome Privacy Preserving Prefetch Proxy'
3147
+ name: 'Chrome Privacy Preserving Prefetch Proxy'
3148
+ category: 'Service Agent'
3149
+ url: 'https://developer.chrome.com/blog/private-prefetch-proxy/'
3150
+ producer:
3151
+ name: 'Google Inc.'
3152
+ url: 'https://www.google.com/'
3153
+
3154
+ - regex: 'URLinspectorBot/([\d+.]+)'
3155
+ name: 'URLinspector'
3156
+ category: 'Site Monitor'
3157
+ url: 'https://www.urlinspector.com/bot/'
3158
+ producer:
3159
+ name: 'LinkResearchTools GmbH'
3160
+ url: 'https://www.linkresearchtools.com/'
3161
+
3162
+ - regex: 'EntferBot/([\d+.]+)'
3163
+ name: 'Entfer'
3164
+ category: 'Crawler'
3165
+ url: 'https://entfer.com/'
3166
+ producer:
3167
+ name: 'Entfer Ltd.'
3168
+ url: 'https://entfer.com/'
3169
+
3170
+ - regex: 'TagInspector/([\d+.]+)'
3171
+ name: 'Tag Inspector'
3172
+ category: 'Crawler'
3173
+ url: 'https://taginspector.com/'
3174
+ producer:
3175
+ name: 'InfoTrust, LLC'
3176
+ url: 'https://infotrust.com/'
3177
+
3178
+ - regex: 'pageburst'
3179
+ name: 'Pageburst'
3180
+ category: 'Crawler'
3181
+ url: 'https://pageburstls.elsevier.com/'
3182
+ producer:
3183
+ name: 'Elsevier Ltd'
3184
+ url: 'https://www.elsevier.com/'
3185
+
3186
+ - regex: '.+diffbot'
3187
+ name: 'Diffbot'
3188
+ category: 'Crawler'
3189
+ url: 'https://docs.diffbot.com/docs/getting-started-with-crawl'
3190
+ producer:
3191
+ name: 'Diffbot Technologies Corp.'
3192
+ url: 'https://www.diffbot.com/'
3193
+
3194
+ - regex: 'DisqusAdstxtCrawler/([\d+.]+)'
3195
+ name: 'Disqus'
3196
+ category: 'Crawler'
3197
+ url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
3198
+ producer:
3199
+ name: 'Disqus, Inc.'
3200
+ url: 'https://disqus.com/'
3201
+
3202
+ - regex: 'startmebot/([\d+.]+)'
3203
+ name: 'start.me'
3204
+ category: 'Crawler'
3205
+ url: 'https://about.start.me/'
3206
+ producer:
3207
+ name: 'start.me BV'
3208
+ url: 'https://about.start.me/'
3209
+
3210
+ - regex: '2ip bot/([\d+.]+)'
3211
+ name: '2ip'
3212
+ category: 'Crawler'
3213
+ url: 'https://2ip.io/'
3214
+
3215
+ - regex: 'ReqBin Curl Client/([\d+.]+)'
3216
+ name: 'ReqBin'
3217
+ category: 'Crawler'
3218
+ url: 'https://reqbin.com/curl'
3219
+
2642
3220
  # Generic detections
2643
3221
  - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2644
3222
  name: 'Generic Bot'