device_detector 1.0.7 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/regexes/bots.yml CHANGED
@@ -5,6 +5,22 @@
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
+ - regex: 'monitoring360bot'
9
+ name: '360 Monitoring'
10
+ category: 'Site Monitor'
11
+ url: 'https://www.360monitoring.io'
12
+ producer:
13
+ name: 'Plesk International GmbH'
14
+ url: 'https://www.plesk.com'
15
+
16
+ - regex: 'Cloudflare-Healthchecks'
17
+ name: 'Cloudflare Health Checks'
18
+ category: 'Service Agent'
19
+ url: 'https://developers.cloudflare.com/health-checks/'
20
+ producer:
21
+ name: 'CloudFlare'
22
+ url: 'http://www.cloudflare.com'
23
+
8
24
  - regex: '360Spider'
9
25
  name: '360Spider'
10
26
  category: 'Search bot'
@@ -45,6 +61,14 @@
45
61
  name: 'Ahrefs Pte Ltd'
46
62
  url: 'https://ahrefs.com/robot'
47
63
 
64
+ - regex: 'AhrefsSiteAudit/([\d+.]+)'
65
+ name: 'AhrefsSiteAudit'
66
+ category: 'Site Monitor'
67
+ url: 'https://ahrefs.com/robot/site-audit'
68
+ producer:
69
+ name: 'Ahrefs Pte Ltd'
70
+ url: 'https://ahrefs.com/'
71
+
48
72
  - regex: 'ia_archiver|alexabot|verifybot'
49
73
  name: 'Alexa Crawler'
50
74
  category: 'Search bot'
@@ -100,13 +124,13 @@
100
124
  name: 'Apple Inc'
101
125
  url: 'https://www.apple.com'
102
126
 
103
- - regex: "AppSignalBot"
104
- name: "AppSignalBot"
105
- category: "Site Monitor"
106
- url: "https://docs.appsignal.com/uptime-monitoring/"
127
+ - regex: 'AppSignalBot'
128
+ name: 'AppSignalBot'
129
+ category: 'Site Monitor'
130
+ url: 'https://docs.appsignal.com/uptime-monitoring/'
107
131
  producer:
108
- name: "AppSignal"
109
- url: "https://appsignal.com/"
132
+ name: 'AppSignal'
133
+ url: 'https://appsignal.com/'
110
134
 
111
135
  - regex: 'Arachni'
112
136
  name: 'Arachni'
@@ -355,7 +379,7 @@
355
379
  name: 'Certified Security Solutions'
356
380
  url: 'https://www.css-security.com/company/about-us/'
357
381
 
358
- - regex: 'Datadog Agent'
382
+ - regex: 'Datadog Agent|Datadog/?Synthetics'
359
383
  name: 'Datadog Agent'
360
384
  url: 'https://github.com/DataDog/dd-agent'
361
385
  category: 'Site Monitor'
@@ -688,7 +712,7 @@
688
712
  name: 'Visual Meta'
689
713
  url: 'https://www.shopalike.cz/'
690
714
 
691
- - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
715
+ - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Prober|Producer)|Google.*/\+/web/snippet'
692
716
  name: 'Googlebot'
693
717
  category: 'Search bot'
694
718
  url: 'http://www.google.com/bot.html'
@@ -711,6 +735,11 @@
711
735
  name: 'HubSpot Inc.'
712
736
  url: 'https://www.hubspot.com'
713
737
 
738
+ - regex: 'vuhuvBot'
739
+ name: 'Vuhuv Bot'
740
+ category: 'Crawler'
741
+ url: 'http://vuhuv.com/bot.html'
742
+
714
743
  - regex: 'HTTPMon'
715
744
  name: 'HTTPMon'
716
745
  category: 'Site Monitor'
@@ -783,6 +812,14 @@
783
812
  name: 'Lighthouse'
784
813
  url: 'https://developers.google.com/web/tools/lighthouse'
785
814
 
815
+ - regex: 'last-modified\.com'
816
+ name: 'LastMod Bot'
817
+ category: 'Site Monitor'
818
+ url: 'https://last-modified.com/en/about'
819
+ producer:
820
+ name: ''
821
+ url: 'https://last-modified.com/en'
822
+
786
823
  - regex: 'linkdexbot|linkdex\.com'
787
824
  name: 'Linkdex Bot'
788
825
  category: 'Search bot'
@@ -830,6 +867,14 @@
830
867
  name: ''
831
868
  url: ''
832
869
 
870
+ - regex: 'masscan-ng/([\d+.]+)'
871
+ name: 'masscan-ng'
872
+ url: 'https://github.com/bi-zone/masscan-ng'
873
+ category: 'Crawler'
874
+ producer:
875
+ name: 'BIZON, OOO'
876
+ url: 'https://bi.zone/'
877
+
833
878
  - regex: 'masscan'
834
879
  name: 'masscan'
835
880
  url: 'https://github.com/robertdavidgraham/masscan'
@@ -988,6 +1033,14 @@
988
1033
  - regex: 'Octopus [0-9]'
989
1034
  name: 'Octopus'
990
1035
 
1036
+ - regex: 'OnlineOrNot.com_bot'
1037
+ name: 'OnlineOrNot Bot'
1038
+ category: 'Site Monitor'
1039
+ url: 'https://onlineornot.com/website-monitoring'
1040
+ producer:
1041
+ name: 'OnlineOrNot'
1042
+ url: 'https://onlineornot.com'
1043
+
991
1044
  - regex: 'omgili'
992
1045
  name: 'Omgili bot'
993
1046
  category: 'Search bot'
@@ -1049,12 +1102,12 @@
1049
1102
  name: 'PHP Server Monitor'
1050
1103
  url: 'http://www.phpservermonitor.org/'
1051
1104
 
1052
- - regex: 'PocketParser'
1053
- name: 'PocketParser'
1105
+ - regex: 'Pocket(?:ImageCache|Parser)/([\d+.]+)'
1106
+ name: 'Pocket'
1054
1107
  category: 'Read-it-later Service'
1055
1108
  url: 'https://getpocket.com/pocketparser_ua'
1056
1109
  producer:
1057
- name: 'Pocket'
1110
+ name: 'Read It Later, Inc.'
1058
1111
  url: 'https://getpocket.com/'
1059
1112
 
1060
1113
  - regex: 'PritTorrent'
@@ -1317,7 +1370,7 @@
1317
1370
  name: 'Slack Technologies'
1318
1371
  url: 'http://slack.com'
1319
1372
 
1320
- - regex: '(Sogou (web|inst|Pic) spider)|New-Sogou-Spider'
1373
+ - regex: '(Sogou[ -](head|inst|Orion|Pic|Test|web)[ -]spider)|New-Sogou-Spider'
1321
1374
  name: 'Sogou Spider'
1322
1375
  category: 'Search bot'
1323
1376
  url: 'http://www.sogou.com/docs/help/webmasters.htm'
@@ -1828,6 +1881,14 @@
1828
1881
  name: 'Snapchat Inc.'
1829
1882
  url: 'https://www.snapchat.com'
1830
1883
 
1884
+ - regex: 'Snap URL Preview Service'
1885
+ name: 'Snap URL Preview Service'
1886
+ category: 'Service Agent'
1887
+ url: 'https://developers.snap.com/robots'
1888
+ producer:
1889
+ name: 'Snapchat Inc.'
1890
+ url: 'https://www.snapchat.com/'
1891
+
1831
1892
  - regex: "Let's Encrypt validation server"
1832
1893
  name: "Let's Encrypt Validation"
1833
1894
  category: 'Service Agent'
@@ -1926,7 +1987,7 @@
1926
1987
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1927
1988
  name: 'RSSRadio Bot'
1928
1989
 
1929
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
1990
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel)'
1930
1991
  name: 'Generic Bot'
1931
1992
 
1932
1993
  - regex: '^sentry'
@@ -1935,7 +1996,7 @@
1935
1996
  name: 'Sentry'
1936
1997
  url: 'https://sentry.io'
1937
1998
 
1938
- - regex: '^Spotify'
1999
+ - regex: '^Spotify/(\d+[\.\d]+)$'
1939
2000
  name: 'Spotify'
1940
2001
  producer:
1941
2002
  name: 'Spotify'
@@ -1985,7 +2046,15 @@
1985
2046
  name: 'WooRank sprl'
1986
2047
  url: 'https://www.woorank.com/'
1987
2048
 
1988
- - regex: '(Match|LinkCheck) by Siteimprove.com'
2049
+ - regex: 'by Siteimprove\.com'
2050
+ name: 'Siteimprove'
2051
+ category: 'Search bot'
2052
+ url: 'https://siteimprove.com/'
2053
+ producer:
2054
+ name: 'Siteimprove GmbH'
2055
+ url: 'https://siteimprove.com/'
2056
+
2057
+ - regex: 'Image size by Siteimprove\.com'
1989
2058
  name: 'Siteimprove'
1990
2059
  category: 'Search bot'
1991
2060
  url: 'https://siteimprove.com/'
@@ -2113,6 +2182,14 @@
2113
2182
  name: 'Startpagina B.V.'
2114
2183
  url: 'https://www.startpagina.nl/'
2115
2184
 
2185
+ - regex: 'MoodleBot-Linkchecker'
2186
+ name: 'MoodleBot Linkchecker'
2187
+ category: 'Search bot'
2188
+ url: 'hhttps://docs.moodle.org/en/Usage'
2189
+ producer:
2190
+ name: 'Moodle Pty Ltd'
2191
+ url: 'https://moodle.org/'
2192
+
2116
2193
  - regex: 'GTmetrix'
2117
2194
  name: 'GTmetrix'
2118
2195
  category: 'Crawler'
@@ -2163,7 +2240,7 @@
2163
2240
  category: 'Crawler'
2164
2241
  url: 'https://serendeputy.com/about/serendeputy-bot'
2165
2242
 
2166
- - regex: 'ias-va.*admantx.*service-fetcher'
2243
+ - regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx.com.*service-fetcher'
2167
2244
  name: 'ADmantX Service Fetcher'
2168
2245
  category: 'Service bot'
2169
2246
  url: 'https://www.admantx.com/service-fetcher.html'
@@ -2349,7 +2426,7 @@
2349
2426
  name: ''
2350
2427
  url: ''
2351
2428
 
2352
- - regex: 'scaninfo@expanseinc.com'
2429
+ - regex: 'scaninfo@(?:expanseinc|paloaltonetworks).com'
2353
2430
  name: 'Expanse'
2354
2431
  category: 'Security Checker'
2355
2432
  url: 'https://expanse.co/'
@@ -2372,6 +2449,13 @@
2372
2449
  producer:
2373
2450
  name: 'Hatena Co., Ltd.'
2374
2451
  url: 'https://www.hatena.ne.jp'
2452
+ - regex: 'Hatena-?Bookmark'
2453
+ name: 'Hatena Bookmark'
2454
+ category: 'Crawler'
2455
+ url: 'https://www.hatena.ne.jp/faq/'
2456
+ producer:
2457
+ name: 'Hatena Co., Ltd.'
2458
+ url: 'https://www.hatena.ne.jp'
2375
2459
 
2376
2460
  - regex: 'RyowlEngine/(\d+)'
2377
2461
  name: 'Ryowl'
@@ -2511,6 +2595,19 @@
2511
2595
  category: 'Security Checker'
2512
2596
  url: 'https://github.com/LeakIX/l9tcpid'
2513
2597
 
2598
+ - regex: 'l9explore/([\d+\.])'
2599
+ name: 'l9explore'
2600
+ category: 'Security Checker'
2601
+ url: 'https://github.com/LeakIX/l9explore'
2602
+
2603
+ - regex: 'l9scan/|^Lkx-(.*)/([\d+.]+)'
2604
+ name: 'LeakIX'
2605
+ category: 'Security Checker'
2606
+ url: 'https://leakix.net/'
2607
+ producer:
2608
+ name: 'BaDaaS SRL'
2609
+ url: 'https://leakix.net/'
2610
+
2514
2611
  - regex: 'MegaIndex.ru/([\d+\.])'
2515
2612
  name: 'MegaIndex'
2516
2613
  category: 'Crawler'
@@ -2639,6 +2736,649 @@
2639
2736
  name: 'Hochschule für angewandte Wissenschaften München'
2640
2737
  url: 'https://www.hm.edu/'
2641
2738
 
2739
+ - regex: 'TigerBot/([\d+.]+)'
2740
+ name: 'TigerBot'
2741
+ category: 'Crawler'
2742
+ url: 'https://tiger.ch/'
2743
+
2744
+ - regex: 'TestCrawler/([\d+.]+)'
2745
+ name: 'TestCrawler'
2746
+ category: 'Crawler'
2747
+ url: 'https://www.comcepta.com/'
2748
+
2749
+ - regex: 'CrowdTanglebot/([\d+.]+)'
2750
+ name: 'CrowdTangle'
2751
+ category: 'Crawler'
2752
+ url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
2753
+ producer:
2754
+ name: 'CrowdTangle, Inc.'
2755
+ url: 'https://www.crowdtangle.com/'
2756
+
2757
+ - regex: 'Sellers.Guide Crawler by Primis'
2758
+ name: 'Sellers.Guide'
2759
+ category: 'Crawler'
2760
+ url: 'https://sellers.guide/'
2761
+ producer:
2762
+ name: 'McCann Disciplines, Ltd.'
2763
+ url: 'https://www.primis.tech/'
2764
+
2765
+ - regex: 'OnalyticaBot'
2766
+ name: 'Onalytica'
2767
+ category: 'Crawler'
2768
+ url: 'https://www.airslate.com/bot/explore/onalytica-bot'
2769
+ producer:
2770
+ name: 'airSlate, Inc.'
2771
+ url: 'https://www.airslate.com/'
2772
+
2773
+ - regex: 'deepnoc'
2774
+ name: 'deepnoc'
2775
+ category: 'Crawler'
2776
+ url: 'https://deepnoc.com/bot'
2777
+ producer:
2778
+ name: 'deepnoc, GmbH'
2779
+ url: 'https://deepnoc.com/'
2780
+
2781
+ - regex: 'Newslitbot/([\d+.]+)'
2782
+ name: 'Newslitbot'
2783
+ category: 'Crawler'
2784
+ url: 'https://www.newslit.co/'
2785
+ producer:
2786
+ name: 'Newslit, LLC.'
2787
+ url: 'https://www.newslit.co/'
2788
+
2789
+ - regex: 'um-LN/([\d+.]+)'
2790
+ name: 'uMBot'
2791
+ category: 'Crawler'
2792
+ url: 'https://www.ubermetrics-technologies.com/'
2793
+ producer:
2794
+ name: 'Ubermetrics Technologies GmbH'
2795
+ url: 'https://www.ubermetrics-technologies.com/'
2796
+
2797
+ - regex: 'Abonti/([\d+.]+)'
2798
+ name: 'Abonti'
2799
+ category: 'Crawler'
2800
+ url: 'http://abonti.com/'
2801
+
2802
+ - regex: 'collection@infegy.com'
2803
+ name: 'Infegy'
2804
+ category: 'Crawler'
2805
+ url: 'https://infegy.com/'
2806
+ producer:
2807
+ name: 'Infegy, Inc.'
2808
+ url: 'https://infegy.com/'
2809
+
2810
+ - regex: 'HTTP Banner Detection \(https://security.ipip.net\)'
2811
+ name: 'IPIP'
2812
+ category: 'Security Checker'
2813
+ url: 'https://security.ipip.net/'
2814
+ producer:
2815
+ name: 'Beijing Tiantexin Tech. Co., Ltd.'
2816
+ url: 'https://en.ipip.net/'
2817
+
2818
+ - regex: 'ev-crawler/([\d+.]+)'
2819
+ name: 'Headline'
2820
+ category: 'Crawler'
2821
+ url: 'https://headline.com/legal/crawler'
2822
+ producer:
2823
+ name: 'e.ventures Managementgesellschaft mbH'
2824
+ url: 'https://headline.com/'
2825
+
2826
+ - regex: 'webprosbot/([\d+.]+)'
2827
+ name: 'WebPros'
2828
+ category: 'Crawler'
2829
+ url: 'https://webpros.com/'
2830
+ producer:
2831
+ name: 'WebPros Holdco B.V.'
2832
+ url: 'https://webpros.com/'
2833
+
2834
+ - regex: 'ELB-HealthChecker'
2835
+ name: 'Amazon ELB'
2836
+ category: 'Site Monitor'
2837
+ url: 'https://aws.amazon.com/elasticloadbalancing/'
2838
+ producer:
2839
+ name: 'Amazon.com, Inc.'
2840
+ url: 'https://www.amazon.com/'
2841
+
2842
+ - regex: 'Wheregoes.com Redirect Checker/([\d+.]+)'
2843
+ name: 'WhereGoes'
2844
+ category: 'Crawler'
2845
+ url: 'https://wheregoes.com/'
2846
+
2847
+ - regex: 'project_patchwatch'
2848
+ name: 'Project Patchwatch'
2849
+ category: 'Crawler'
2850
+ url: 'http://66.240.192.82/'
2851
+
2852
+ - regex: 'InternetMeasurement/([\d+.]+)'
2853
+ name: 'InternetMeasurement'
2854
+ category: 'Crawler'
2855
+ url: 'https://internet-measurement.com/'
2856
+
2857
+ - regex: 'DomainAppender /([\d+.]+)'
2858
+ name: 'DomainAppender'
2859
+ category: 'Crawler'
2860
+ url: 'https://www.profound.net/product/domain_append/'
2861
+ producer:
2862
+ name: 'Profound Networks, LLC'
2863
+ url: 'https://www.profound.net/'
2864
+
2865
+ - regex: 'FreeWebMonitoring SiteChecker/([\d+.]+)'
2866
+ name: 'FreeWebMonitoring'
2867
+ category: 'Site Monitor'
2868
+ url: 'https://www.freewebmonitoring.com/bot.html'
2869
+ producer:
2870
+ name: 'GreenWave Online, Inc.'
2871
+ url: 'http://www.greenwaveonline.com/'
2872
+
2873
+ - regex: 'Page Modified Pinger'
2874
+ name: 'Page Modified Pinger'
2875
+ category: 'Site Monitor'
2876
+ url: 'https://www.pagemodified.com/'
2877
+ producer:
2878
+ name: 'Valley Hosting, LLC'
2879
+ url: 'https://www.pagemodified.com/'
2880
+
2881
+ - regex: 'adstxtlab.com'
2882
+ name: 'adstxtlab.com'
2883
+ category: 'Crawler'
2884
+ url: 'https://adstxtlab.com/validator.php'
2885
+ producer:
2886
+ name: 'Jaohawi AB'
2887
+ url: 'https://adstxtlab.com/'
2888
+
2889
+ - regex: 'Iframely/([\d+.]+)'
2890
+ name: 'Iframely'
2891
+ category: 'Crawler'
2892
+ url: 'https://iframely.com/'
2893
+ producer:
2894
+ name: 'Itteco Software, Corp.'
2895
+ url: 'https://iframely.com/'
2896
+
2897
+ - regex: 'DomainStatsBot/([\d+.]+)'
2898
+ name: 'DomainStatsBot'
2899
+ category: 'Crawler'
2900
+ url: 'https://domainstats.com/pages/our-bot'
2901
+ producer:
2902
+ name: 'Domainstats Ltd'
2903
+ url: 'https://domainstats.com/'
2904
+
2905
+ - regex: 'aiHitBot/([\d+.]+)'
2906
+ name: 'aiHitBot'
2907
+ category: 'Crawler'
2908
+ url: 'https://www.aihitdata.com/about'
2909
+
2910
+ - regex: 'DomainCrawler/'
2911
+ name: 'DomainCrawler'
2912
+ category: 'Crawler'
2913
+ url: 'https://domaincrawler.com/about-us/'
2914
+
2915
+ - regex: 'DNSResearchBot'
2916
+ name: 'DNSResearchBot'
2917
+ category: 'Crawler'
2918
+
2919
+ - regex: 'GitCrawlerBot'
2920
+ name: 'GitCrawlerBot'
2921
+ category: 'Crawler'
2922
+
2923
+ - regex: 'AdAuth/([\d+.]+)'
2924
+ name: 'AdAuth'
2925
+ category: 'Crawler'
2926
+ url: 'https://www.adauth.com'
2927
+
2928
+ - regex: 'faveeo.com'
2929
+ name: 'Faveeo'
2930
+ category: 'Crawler'
2931
+ url: 'http://www.faveeo.com'
2932
+
2933
+ - regex: 'kozmonavt\.'
2934
+ name: 'Kozmonavt'
2935
+ category: 'Crawler'
2936
+ url: 'https://kozmonavt.ml'
2937
+
2938
+ - regex: 'CriteoBot/'
2939
+ name: 'CriteoBot'
2940
+ category: 'Crawler'
2941
+ url: 'https://www.criteo.com/criteo-crawler/'
2942
+
2943
+ - regex: 'PayPal IPN'
2944
+ name: 'PayPal IPN'
2945
+ category: 'Service Agent'
2946
+ url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/'
2947
+ producer:
2948
+ name: 'PayPal, Inc.'
2949
+ url: 'https://www.paypal.com/'
2950
+
2951
+ - regex: 'MaCoCu'
2952
+ name: 'MaCoCu'
2953
+ category: 'Crawler'
2954
+ url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/'
2955
+ producer:
2956
+ name: 'Jožef Stefan Institute'
2957
+ url: 'https://www.ijs.si/ijsw/JSI'
2958
+
2959
+ - regex: 'dnt-policy@eff.org'
2960
+ name: 'EFF Do Not Track Verifier'
2961
+ category: 'Crawler'
2962
+ url: 'https://www.eff.org/issues/do-not-track'
2963
+ producer:
2964
+ name: 'Electronic Frontier Foundation'
2965
+ url: 'https://www.eff.org/'
2966
+
2967
+ - regex: 'InfoTigerBot'
2968
+ name: 'InfoTigerBot'
2969
+ category: 'Crawler'
2970
+ url: 'https://infotiger.com/bot'
2971
+ producer:
2972
+ name: 'Infotiger UG'
2973
+ url: 'https://infotiger.com/'
2974
+
2975
+ - regex: '(?:Birdcrawlerbot|CrawlaDeBot)'
2976
+ name: 'Birdcrawlerbot'
2977
+ category: 'Crawler'
2978
+ url: 'https://crawla.de/de/index.php'
2979
+ producer:
2980
+ name: 'Swoppen Systems GmbH'
2981
+ url: 'https://www.swoppen.com/de'
2982
+
2983
+ - regex: 'ScamadviserExternalHit/([\d+.]+)'
2984
+ name: 'Scamadviser External Hit'
2985
+ category: 'Crawler'
2986
+ url: 'https://www.scamadviser.com/'
2987
+ producer:
2988
+ name: 'Ecommerce Operations B.V.'
2989
+ url: 'https://www.scamadviser.com/'
2990
+
2991
+ - regex: 'ZaldamoSearchBot'
2992
+ name: 'Zaldamo'
2993
+ category: 'Crawler'
2994
+ url: 'https://www.zaldamo.com/search.html'
2995
+ producer:
2996
+ name: 'Project Orlando, LLC.'
2997
+ url: 'https://www.projectorlando.com/'
2998
+
2999
+ - regex: 'AFB/([\d+.]+)'
3000
+ name: 'Allloadin Favicon Bot'
3001
+ category: 'Crawler'
3002
+ url: 'https://allloadin.com/'
3003
+
3004
+ - regex: 'SeolytBot/([\d+.]+)'
3005
+ name: 'Seolyt Bot'
3006
+ category: 'Crawler'
3007
+ url: 'https://seolyt.com'
3008
+
3009
+ - regex: 'LinkWalker/([\d+.]+)'
3010
+ name: 'LinkWalker'
3011
+ category: 'Crawler'
3012
+ url: 'https://www.phishlabs.com/'
3013
+ producer:
3014
+ name: 'PhishLabs, Inc.'
3015
+ url: 'https://www.phishlabs.com/'
3016
+
3017
+ - regex: 'RenovateBot/([\d+.]+)'
3018
+ name: 'RenovateBot'
3019
+ category: 'Security Checker'
3020
+ url: 'https://github.com/renovatebot/renovate'
3021
+ producer:
3022
+ name: 'White Source Ltd.'
3023
+ url: 'https://www.mend.io/free-developer-tools/renovate/'
3024
+
3025
+ - regex: 'INETDEX-BOT/([\d+.]+)'
3026
+ name: 'Inetdex Bot'
3027
+ category: 'Crawler'
3028
+ url: 'https://www.inetdex.com/'
3029
+
3030
+ - regex: 'NETZZAPPEN'
3031
+ name: 'NETZZAPPEN'
3032
+ category: 'Crawler'
3033
+ url: 'https://www.netzzappen.com/'
3034
+ producer:
3035
+ name: 'Marc Huemer'
3036
+ url: 'https://www.netzzappen.com/'
3037
+
3038
+ - regex: 'SerpReputationManagementAgent/([\d+.]+)'
3039
+ name: 'SEMrush Reputation Management'
3040
+ category: 'Service Agent'
3041
+ url: 'https://www.semrush.com/bot/'
3042
+ producer:
3043
+ name: 'SEMrush'
3044
+ url: 'https://www.semrush.com/'
3045
+
3046
+ - regex: 'panscient.com'
3047
+ name: 'Panscient'
3048
+ category: 'Crawler'
3049
+ url: 'https://www.panscient.com/faq.htm'
3050
+ producer:
3051
+ name: 'Panscient, Inc.'
3052
+ url: 'https://www.panscient.com/'
3053
+
3054
+ - regex: 'research@pdrlabs.net'
3055
+ name: 'PDR Labs'
3056
+ category: 'Security Checker'
3057
+ url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3058
+ producer:
3059
+ name: 'PDR Labs'
3060
+ url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3061
+
3062
+ - regex: 'Nicecrawler/([\d+.]+)'
3063
+ name: 'NiceCrawler'
3064
+ category: 'Crawler'
3065
+ url: 'https://www.nicecrawler.com/'
3066
+ producer:
3067
+ name: 'Intelium Corp.'
3068
+ url: 'https://www.intelium.com/'
3069
+
3070
+ - regex: 't3versionsBot/([\d+.]+)'
3071
+ name: 't3versions'
3072
+ category: 'Crawler'
3073
+ url: 'https://www.t3versions.com/bot'
3074
+ producer:
3075
+ name: 'Torben Hansen'
3076
+ url: 'https://www.t3versions.com/'
3077
+
3078
+ - regex: 'Crawlson/([\d+.]+)'
3079
+ name: 'Crawlson'
3080
+ category: 'Crawler'
3081
+ url: 'https://www.crawlson.com/about'
3082
+ producer:
3083
+ name: 'Crawlson'
3084
+ url: 'https://www.crawlson.com/'
3085
+
3086
+ - regex: 'tchelebi/([\d+.]+)'
3087
+ name: 'tchelebi'
3088
+ category: 'Crawler'
3089
+ url: 'https://tchelebi.io/'
3090
+ producer:
3091
+ name: 'NormShield, Inc.'
3092
+ url: 'https://blackkite.com/'
3093
+
3094
+ - regex: 'JobboerseBot'
3095
+ name: 'JobboerseBot'
3096
+ category: 'Crawler'
3097
+ url: 'https://www.xing.com/jobs'
3098
+ producer:
3099
+ name: 'New Work SE'
3100
+ url: 'https://www.xing.com/'
3101
+
3102
+ - regex: 'RepoLookoutBot/([\d+.]+)'
3103
+ name: 'Repo Lookout'
3104
+ category: 'Security Checker'
3105
+ url: 'https://www.repo-lookout.org/'
3106
+ producer:
3107
+ name: 'Crissy Field GmbH'
3108
+ url: 'https://www.crissyfield.de/'
3109
+
3110
+ - regex: 'PATHspider'
3111
+ name: 'PATHspider'
3112
+ category: 'Security Checker'
3113
+ url: 'https://pathspider.net/'
3114
+ producer:
3115
+ name: 'MAMI Project'
3116
+ url: 'https://mami-project.eu/'
3117
+
3118
+ - regex: 'everyfeed-spider/([\d+.]+)'
3119
+ name: 'Everyfeed'
3120
+ url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
3121
+ category: 'Feed Fetcher'
3122
+ producer:
3123
+ name: ''
3124
+ url: ''
3125
+
3126
+ - regex: 'Exchange check'
3127
+ name: 'Exchange check'
3128
+ category: 'Security Checker'
3129
+ url: 'https://github.com/GossiTheDog/scanning'
3130
+ producer:
3131
+ name: 'Kevin Beaumont'
3132
+ url: 'https://doublepulsar.com/'
3133
+
3134
+ - regex: 'Sublinq'
3135
+ name: 'Sublinq'
3136
+ category: 'Crawler'
3137
+ url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/'
3138
+ producer:
3139
+ name: ''
3140
+ url: ''
3141
+
3142
+ - regex: 'Gregarius/([\d+.]+)'
3143
+ name: 'Gregarius'
3144
+ category: 'Feed Fetcher'
3145
+ url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
3146
+ producer:
3147
+ name: ''
3148
+ url: ''
3149
+
3150
+ - regex: 'COMODO DCV'
3151
+ name: 'COMODO DCV'
3152
+ category: 'Service Agent'
3153
+ url: 'https://www.comodo.com/'
3154
+ producer:
3155
+ name: 'Comodo Security Solutions, Inc.'
3156
+ url: 'https://www.comodo.com/'
3157
+
3158
+ - regex: 'Sectigo DCV'
3159
+ name: 'Sectigo DCV'
3160
+ category: 'Service Agent'
3161
+ url: 'https://sectigo.com/'
3162
+ producer:
3163
+ name: 'Sectigo Limited'
3164
+ url: 'https://sectigo.com/'
3165
+
3166
+ - regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/([\d+.]+)'
3167
+ name: 'KlarnaBot'
3168
+ category: 'Crawler'
3169
+ url: 'https://docs.klarna.com/klarna-bot/'
3170
+ producer:
3171
+ name: 'Klarna Bank AB'
3172
+ url: 'https://www.klarna.com/'
3173
+
3174
+ - regex: 'Taboolabot/([\d+.]+)'
3175
+ name: 'Taboolabot'
3176
+ category: 'Crawler'
3177
+ url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
3178
+ producer:
3179
+ name: 'Taboola, Inc.'
3180
+ url: 'https://www.taboola.com/'
3181
+
3182
+ - regex: 'Asana/([\d+.]+)'
3183
+ name: 'Asana'
3184
+ category: 'Crawler'
3185
+ url: 'https://asana.com/'
3186
+ producer:
3187
+ name: 'Asana, Inc.'
3188
+ url: 'https://asana.com/'
3189
+
3190
+ - regex: 'Chrome Privacy Preserving Prefetch Proxy'
3191
+ name: 'Chrome Privacy Preserving Prefetch Proxy'
3192
+ category: 'Service Agent'
3193
+ url: 'https://developer.chrome.com/blog/private-prefetch-proxy/'
3194
+ producer:
3195
+ name: 'Google Inc.'
3196
+ url: 'https://www.google.com/'
3197
+
3198
+ - regex: 'URLinspectorBot/([\d+.]+)'
3199
+ name: 'URLinspector'
3200
+ category: 'Site Monitor'
3201
+ url: 'https://www.urlinspector.com/bot/'
3202
+ producer:
3203
+ name: 'LinkResearchTools GmbH'
3204
+ url: 'https://www.linkresearchtools.com/'
3205
+
3206
+ - regex: 'EntferBot/([\d+.]+)'
3207
+ name: 'Entfer'
3208
+ category: 'Crawler'
3209
+ url: 'https://entfer.com/'
3210
+ producer:
3211
+ name: 'Entfer Ltd.'
3212
+ url: 'https://entfer.com/'
3213
+
3214
+ - regex: 'TagInspector/([\d+.]+)'
3215
+ name: 'Tag Inspector'
3216
+ category: 'Crawler'
3217
+ url: 'https://taginspector.com/'
3218
+ producer:
3219
+ name: 'InfoTrust, LLC'
3220
+ url: 'https://infotrust.com/'
3221
+
3222
+ - regex: 'pageburst'
3223
+ name: 'Pageburst'
3224
+ category: 'Crawler'
3225
+ url: 'https://pageburstls.elsevier.com/'
3226
+ producer:
3227
+ name: 'Elsevier Ltd'
3228
+ url: 'https://www.elsevier.com/'
3229
+
3230
+ - regex: '.+diffbot'
3231
+ name: 'Diffbot'
3232
+ category: 'Crawler'
3233
+ url: 'https://docs.diffbot.com/docs/getting-started-with-crawl'
3234
+ producer:
3235
+ name: 'Diffbot Technologies Corp.'
3236
+ url: 'https://www.diffbot.com/'
3237
+
3238
+ - regex: 'DisqusAdstxtCrawler/([\d+.]+)'
3239
+ name: 'Disqus'
3240
+ category: 'Crawler'
3241
+ url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
3242
+ producer:
3243
+ name: 'Disqus, Inc.'
3244
+ url: 'https://disqus.com/'
3245
+
3246
+ - regex: 'startmebot/([\d+.]+)'
3247
+ name: 'start.me'
3248
+ category: 'Crawler'
3249
+ url: 'https://about.start.me/'
3250
+ producer:
3251
+ name: 'start.me BV'
3252
+ url: 'https://about.start.me/'
3253
+
3254
+ - regex: '2ip bot/([\d+.]+)'
3255
+ name: '2ip'
3256
+ category: 'Crawler'
3257
+ url: 'https://2ip.io/'
3258
+
3259
+ - regex: 'ReqBin Curl Client/([\d+.]+)'
3260
+ name: 'ReqBin'
3261
+ category: 'Crawler'
3262
+ url: 'https://reqbin.com/curl'
3263
+
3264
+ - regex: 'XoviBot/([\d+.]+)'
3265
+ name: 'XoviBot'
3266
+ category: 'Crawler'
3267
+ url: 'https://www.xovibot.net'
3268
+ producer:
3269
+ name: 'Xovi GmbH'
3270
+ url: 'http://www.xovi.de'
3271
+
3272
+ - regex: 'Overcast/([\d+.]+) Podcast Sync'
3273
+ name: 'Overcast Podcast Sync'
3274
+ category: 'Service Agent'
3275
+ url: 'https://overcast.fm/podcasterinfo'
3276
+
3277
+ - regex: '^Verity/([\d+.]+)'
3278
+ name: 'GumGum Verity'
3279
+ category: 'Service Agent'
3280
+ url: 'https://gumgum.com/verity'
3281
+
3282
+ - regex: 'hackermention'
3283
+ name: 'hackermention'
3284
+ category: 'Feed Reader'
3285
+ url: 'https://github.com/snarfed/hackermention'
3286
+
3287
+ - regex: 'BitSightBot/([\d+.]+)'
3288
+ name: 'BitSight'
3289
+ category: 'Security Checker'
3290
+ url: 'https://www.bitsight.com/'
3291
+ producer:
3292
+ name: 'BitSight Technologies, Inc.'
3293
+ url: 'https://www.bitsight.com/'
3294
+
3295
+ - regex: 'Ezgif/([\d+.]+)'
3296
+ name: 'Ezgif'
3297
+ category: 'Service Agent'
3298
+ url: 'https://ezgif.com/about'
3299
+
3300
+ - regex: 'intelx.io_bot'
3301
+ name: 'Intelligence X'
3302
+ category: 'Crawler'
3303
+ url: 'https://intelx.io/'
3304
+ producer:
3305
+ name: 'Kleissner Investments s.r.o.'
3306
+ url: 'https://intelx.io/'
3307
+
3308
+ - regex: 'FemtosearchBot/([\d+.]+)'
3309
+ name: 'Femtosearch'
3310
+ category: 'Crawler'
3311
+ url: 'http://femtosearch.com/'
3312
+ producer:
3313
+ name: 'Grier Forensics, LLC'
3314
+ url: 'https://www.grierforensics.com/'
3315
+
3316
+ - regex: 'AdsTxtCrawler/([\d+.]+)'
3317
+ name: 'AdsTxtCrawler'
3318
+ category: 'Crawler'
3319
+ url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
3320
+ producer:
3321
+ name: 'IAB Technology Laboratory, Inc.'
3322
+ url: 'https://iabtechlab.com/'
3323
+
3324
+ - regex: 'Morningscore'
3325
+ name: 'Morningscore Bot'
3326
+ category: 'Crawler'
3327
+ url: 'https://morningscore.io/'
3328
+ producer:
3329
+ name: 'Morningscore'
3330
+ url: 'https://morningscore.io/'
3331
+
3332
+ - regex: 'Uptime-Kuma/([\d+.]+)'
3333
+ name: 'Uptime-Kuma'
3334
+ category: 'Site Monitor'
3335
+ url: 'https://github.com/louislam/uptime-kuma'
3336
+
3337
+ - regex: 'ChatGPT-User'
3338
+ name: 'ChatGPT'
3339
+ category: 'Crawler'
3340
+ url: 'https://platform.openai.com/docs/plugins/bot'
3341
+ producer:
3342
+ name: 'OpenAI OpCo, LLC'
3343
+ url: 'https://openai.com/'
3344
+
3345
+ - regex: 'BrightEdge Crawler/([\d+.]+)'
3346
+ name: 'BrightEdge'
3347
+ category: 'Crawler'
3348
+ url: 'https://www.brightedge.com/'
3349
+ producer:
3350
+ name: 'BrightEdge Technologies, Inc'
3351
+ url: 'https://www.brightedge.com/'
3352
+
3353
+ - regex: 'sfFeedReader/([\d+.]+)'
3354
+ name: 'sfFeedReader'
3355
+ url: 'https://github.com/diem-project/sfFeed2Plugin'
3356
+ category: 'Feed Fetcher'
3357
+
3358
+ - regex: 'cyberscan.io'
3359
+ name: 'Cyberscan'
3360
+ category: 'Security Checker'
3361
+ url: 'https://www.cyberscan.io/'
3362
+ producer:
3363
+ name: 'DGC Verwaltungs GmbH'
3364
+ url: 'https://dgc.org/'
3365
+
3366
+ - regex: 'deepcrawl\.com'
3367
+ name: 'Lumar'
3368
+ category: 'Crawler'
3369
+ url: 'https://deepcrawl.com/bot'
3370
+ producer:
3371
+ name: 'Lumar'
3372
+ url: 'https://www.lumar.io/'
3373
+
3374
+ - regex: 'RepoLookoutBot'
3375
+ name: 'Repo Lookout'
3376
+ category: 'Crawler'
3377
+ url: 'https://www.repo-lookout.org/'
3378
+ producer:
3379
+ name: 'Crissy Field GmbH'
3380
+ url: 'https://www.crissyfield.de/'
3381
+
2642
3382
  # Generic detections
2643
3383
  - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2644
3384
  name: 'Generic Bot'