device_detector 1.0.7 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +25 -4
- data/lib/device_detector/browser.rb +184 -5
- data/lib/device_detector/client_hint.rb +181 -0
- data/lib/device_detector/device.rb +522 -3
- data/lib/device_detector/os.rb +33 -3
- data/lib/device_detector/parser.rb +25 -3
- data/lib/device_detector/vendor_fragment.rb +25 -0
- data/lib/device_detector/version.rb +1 -1
- data/lib/device_detector.rb +82 -16
- data/regexes/bots.yml +757 -17
- data/regexes/client/browsers.yml +365 -54
- data/regexes/client/hints/apps.yml +102 -0
- data/regexes/client/hints/browsers.yml +195 -0
- data/regexes/client/libraries.yml +200 -3
- data/regexes/client/mobile_apps.yml +396 -14
- data/regexes/client/pim.yml +15 -0
- data/regexes/device/car_browsers.yml +1 -1
- data/regexes/device/consoles.yml +8 -3
- data/regexes/device/mobiles.yml +12126 -2425
- data/regexes/device/portable_media_player.yml +24 -2
- data/regexes/device/shell_tv.yml +16 -0
- data/regexes/device/televisions.yml +26 -4
- data/regexes/oss.yml +381 -50
- metadata +11 -7
data/regexes/bots.yml
CHANGED
|
@@ -5,6 +5,22 @@
|
|
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
|
6
6
|
###############
|
|
7
7
|
|
|
8
|
+
- regex: 'monitoring360bot'
|
|
9
|
+
name: '360 Monitoring'
|
|
10
|
+
category: 'Site Monitor'
|
|
11
|
+
url: 'https://www.360monitoring.io'
|
|
12
|
+
producer:
|
|
13
|
+
name: 'Plesk International GmbH'
|
|
14
|
+
url: 'https://www.plesk.com'
|
|
15
|
+
|
|
16
|
+
- regex: 'Cloudflare-Healthchecks'
|
|
17
|
+
name: 'Cloudflare Health Checks'
|
|
18
|
+
category: 'Service Agent'
|
|
19
|
+
url: 'https://developers.cloudflare.com/health-checks/'
|
|
20
|
+
producer:
|
|
21
|
+
name: 'CloudFlare'
|
|
22
|
+
url: 'http://www.cloudflare.com'
|
|
23
|
+
|
|
8
24
|
- regex: '360Spider'
|
|
9
25
|
name: '360Spider'
|
|
10
26
|
category: 'Search bot'
|
|
@@ -45,6 +61,14 @@
|
|
|
45
61
|
name: 'Ahrefs Pte Ltd'
|
|
46
62
|
url: 'https://ahrefs.com/robot'
|
|
47
63
|
|
|
64
|
+
- regex: 'AhrefsSiteAudit/([\d+.]+)'
|
|
65
|
+
name: 'AhrefsSiteAudit'
|
|
66
|
+
category: 'Site Monitor'
|
|
67
|
+
url: 'https://ahrefs.com/robot/site-audit'
|
|
68
|
+
producer:
|
|
69
|
+
name: 'Ahrefs Pte Ltd'
|
|
70
|
+
url: 'https://ahrefs.com/'
|
|
71
|
+
|
|
48
72
|
- regex: 'ia_archiver|alexabot|verifybot'
|
|
49
73
|
name: 'Alexa Crawler'
|
|
50
74
|
category: 'Search bot'
|
|
@@ -100,13 +124,13 @@
|
|
|
100
124
|
name: 'Apple Inc'
|
|
101
125
|
url: 'https://www.apple.com'
|
|
102
126
|
|
|
103
|
-
- regex:
|
|
104
|
-
name:
|
|
105
|
-
category:
|
|
106
|
-
url:
|
|
127
|
+
- regex: 'AppSignalBot'
|
|
128
|
+
name: 'AppSignalBot'
|
|
129
|
+
category: 'Site Monitor'
|
|
130
|
+
url: 'https://docs.appsignal.com/uptime-monitoring/'
|
|
107
131
|
producer:
|
|
108
|
-
name:
|
|
109
|
-
url:
|
|
132
|
+
name: 'AppSignal'
|
|
133
|
+
url: 'https://appsignal.com/'
|
|
110
134
|
|
|
111
135
|
- regex: 'Arachni'
|
|
112
136
|
name: 'Arachni'
|
|
@@ -355,7 +379,7 @@
|
|
|
355
379
|
name: 'Certified Security Solutions'
|
|
356
380
|
url: 'https://www.css-security.com/company/about-us/'
|
|
357
381
|
|
|
358
|
-
- regex: 'Datadog Agent'
|
|
382
|
+
- regex: 'Datadog Agent|Datadog/?Synthetics'
|
|
359
383
|
name: 'Datadog Agent'
|
|
360
384
|
url: 'https://github.com/DataDog/dd-agent'
|
|
361
385
|
category: 'Site Monitor'
|
|
@@ -688,7 +712,7 @@
|
|
|
688
712
|
name: 'Visual Meta'
|
|
689
713
|
url: 'https://www.shopalike.cz/'
|
|
690
714
|
|
|
691
|
-
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|
|
|
715
|
+
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Prober|Producer)|Google.*/\+/web/snippet'
|
|
692
716
|
name: 'Googlebot'
|
|
693
717
|
category: 'Search bot'
|
|
694
718
|
url: 'http://www.google.com/bot.html'
|
|
@@ -711,6 +735,11 @@
|
|
|
711
735
|
name: 'HubSpot Inc.'
|
|
712
736
|
url: 'https://www.hubspot.com'
|
|
713
737
|
|
|
738
|
+
- regex: 'vuhuvBot'
|
|
739
|
+
name: 'Vuhuv Bot'
|
|
740
|
+
category: 'Crawler'
|
|
741
|
+
url: 'http://vuhuv.com/bot.html'
|
|
742
|
+
|
|
714
743
|
- regex: 'HTTPMon'
|
|
715
744
|
name: 'HTTPMon'
|
|
716
745
|
category: 'Site Monitor'
|
|
@@ -783,6 +812,14 @@
|
|
|
783
812
|
name: 'Lighthouse'
|
|
784
813
|
url: 'https://developers.google.com/web/tools/lighthouse'
|
|
785
814
|
|
|
815
|
+
- regex: 'last-modified\.com'
|
|
816
|
+
name: 'LastMod Bot'
|
|
817
|
+
category: 'Site Monitor'
|
|
818
|
+
url: 'https://last-modified.com/en/about'
|
|
819
|
+
producer:
|
|
820
|
+
name: ''
|
|
821
|
+
url: 'https://last-modified.com/en'
|
|
822
|
+
|
|
786
823
|
- regex: 'linkdexbot|linkdex\.com'
|
|
787
824
|
name: 'Linkdex Bot'
|
|
788
825
|
category: 'Search bot'
|
|
@@ -830,6 +867,14 @@
|
|
|
830
867
|
name: ''
|
|
831
868
|
url: ''
|
|
832
869
|
|
|
870
|
+
- regex: 'masscan-ng/([\d+.]+)'
|
|
871
|
+
name: 'masscan-ng'
|
|
872
|
+
url: 'https://github.com/bi-zone/masscan-ng'
|
|
873
|
+
category: 'Crawler'
|
|
874
|
+
producer:
|
|
875
|
+
name: 'BIZON, OOO'
|
|
876
|
+
url: 'https://bi.zone/'
|
|
877
|
+
|
|
833
878
|
- regex: 'masscan'
|
|
834
879
|
name: 'masscan'
|
|
835
880
|
url: 'https://github.com/robertdavidgraham/masscan'
|
|
@@ -988,6 +1033,14 @@
|
|
|
988
1033
|
- regex: 'Octopus [0-9]'
|
|
989
1034
|
name: 'Octopus'
|
|
990
1035
|
|
|
1036
|
+
- regex: 'OnlineOrNot.com_bot'
|
|
1037
|
+
name: 'OnlineOrNot Bot'
|
|
1038
|
+
category: 'Site Monitor'
|
|
1039
|
+
url: 'https://onlineornot.com/website-monitoring'
|
|
1040
|
+
producer:
|
|
1041
|
+
name: 'OnlineOrNot'
|
|
1042
|
+
url: 'https://onlineornot.com'
|
|
1043
|
+
|
|
991
1044
|
- regex: 'omgili'
|
|
992
1045
|
name: 'Omgili bot'
|
|
993
1046
|
category: 'Search bot'
|
|
@@ -1049,12 +1102,12 @@
|
|
|
1049
1102
|
name: 'PHP Server Monitor'
|
|
1050
1103
|
url: 'http://www.phpservermonitor.org/'
|
|
1051
1104
|
|
|
1052
|
-
- regex: '
|
|
1053
|
-
name: '
|
|
1105
|
+
- regex: 'Pocket(?:ImageCache|Parser)/([\d+.]+)'
|
|
1106
|
+
name: 'Pocket'
|
|
1054
1107
|
category: 'Read-it-later Service'
|
|
1055
1108
|
url: 'https://getpocket.com/pocketparser_ua'
|
|
1056
1109
|
producer:
|
|
1057
|
-
name: '
|
|
1110
|
+
name: 'Read It Later, Inc.'
|
|
1058
1111
|
url: 'https://getpocket.com/'
|
|
1059
1112
|
|
|
1060
1113
|
- regex: 'PritTorrent'
|
|
@@ -1317,7 +1370,7 @@
|
|
|
1317
1370
|
name: 'Slack Technologies'
|
|
1318
1371
|
url: 'http://slack.com'
|
|
1319
1372
|
|
|
1320
|
-
- regex: '(Sogou (
|
|
1373
|
+
- regex: '(Sogou[ -](head|inst|Orion|Pic|Test|web)[ -]spider)|New-Sogou-Spider'
|
|
1321
1374
|
name: 'Sogou Spider'
|
|
1322
1375
|
category: 'Search bot'
|
|
1323
1376
|
url: 'http://www.sogou.com/docs/help/webmasters.htm'
|
|
@@ -1828,6 +1881,14 @@
|
|
|
1828
1881
|
name: 'Snapchat Inc.'
|
|
1829
1882
|
url: 'https://www.snapchat.com'
|
|
1830
1883
|
|
|
1884
|
+
- regex: 'Snap URL Preview Service'
|
|
1885
|
+
name: 'Snap URL Preview Service'
|
|
1886
|
+
category: 'Service Agent'
|
|
1887
|
+
url: 'https://developers.snap.com/robots'
|
|
1888
|
+
producer:
|
|
1889
|
+
name: 'Snapchat Inc.'
|
|
1890
|
+
url: 'https://www.snapchat.com/'
|
|
1891
|
+
|
|
1831
1892
|
- regex: "Let's Encrypt validation server"
|
|
1832
1893
|
name: "Let's Encrypt Validation"
|
|
1833
1894
|
category: 'Service Agent'
|
|
@@ -1926,7 +1987,7 @@
|
|
|
1926
1987
|
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
|
1927
1988
|
name: 'RSSRadio Bot'
|
|
1928
1989
|
|
|
1929
|
-
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
|
|
1990
|
+
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel)'
|
|
1930
1991
|
name: 'Generic Bot'
|
|
1931
1992
|
|
|
1932
1993
|
- regex: '^sentry'
|
|
@@ -1935,7 +1996,7 @@
|
|
|
1935
1996
|
name: 'Sentry'
|
|
1936
1997
|
url: 'https://sentry.io'
|
|
1937
1998
|
|
|
1938
|
-
- regex: '^Spotify'
|
|
1999
|
+
- regex: '^Spotify/(\d+[\.\d]+)$'
|
|
1939
2000
|
name: 'Spotify'
|
|
1940
2001
|
producer:
|
|
1941
2002
|
name: 'Spotify'
|
|
@@ -1985,7 +2046,15 @@
|
|
|
1985
2046
|
name: 'WooRank sprl'
|
|
1986
2047
|
url: 'https://www.woorank.com/'
|
|
1987
2048
|
|
|
1988
|
-
- regex: '
|
|
2049
|
+
- regex: 'by Siteimprove\.com'
|
|
2050
|
+
name: 'Siteimprove'
|
|
2051
|
+
category: 'Search bot'
|
|
2052
|
+
url: 'https://siteimprove.com/'
|
|
2053
|
+
producer:
|
|
2054
|
+
name: 'Siteimprove GmbH'
|
|
2055
|
+
url: 'https://siteimprove.com/'
|
|
2056
|
+
|
|
2057
|
+
- regex: 'Image size by Siteimprove\.com'
|
|
1989
2058
|
name: 'Siteimprove'
|
|
1990
2059
|
category: 'Search bot'
|
|
1991
2060
|
url: 'https://siteimprove.com/'
|
|
@@ -2113,6 +2182,14 @@
|
|
|
2113
2182
|
name: 'Startpagina B.V.'
|
|
2114
2183
|
url: 'https://www.startpagina.nl/'
|
|
2115
2184
|
|
|
2185
|
+
- regex: 'MoodleBot-Linkchecker'
|
|
2186
|
+
name: 'MoodleBot Linkchecker'
|
|
2187
|
+
category: 'Search bot'
|
|
2188
|
+
url: 'hhttps://docs.moodle.org/en/Usage'
|
|
2189
|
+
producer:
|
|
2190
|
+
name: 'Moodle Pty Ltd'
|
|
2191
|
+
url: 'https://moodle.org/'
|
|
2192
|
+
|
|
2116
2193
|
- regex: 'GTmetrix'
|
|
2117
2194
|
name: 'GTmetrix'
|
|
2118
2195
|
category: 'Crawler'
|
|
@@ -2163,7 +2240,7 @@
|
|
|
2163
2240
|
category: 'Crawler'
|
|
2164
2241
|
url: 'https://serendeputy.com/about/serendeputy-bot'
|
|
2165
2242
|
|
|
2166
|
-
- regex: 'ias-va.*admantx.*service-fetcher'
|
|
2243
|
+
- regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx.com.*service-fetcher'
|
|
2167
2244
|
name: 'ADmantX Service Fetcher'
|
|
2168
2245
|
category: 'Service bot'
|
|
2169
2246
|
url: 'https://www.admantx.com/service-fetcher.html'
|
|
@@ -2349,7 +2426,7 @@
|
|
|
2349
2426
|
name: ''
|
|
2350
2427
|
url: ''
|
|
2351
2428
|
|
|
2352
|
-
- regex: 'scaninfo@expanseinc.com'
|
|
2429
|
+
- regex: 'scaninfo@(?:expanseinc|paloaltonetworks).com'
|
|
2353
2430
|
name: 'Expanse'
|
|
2354
2431
|
category: 'Security Checker'
|
|
2355
2432
|
url: 'https://expanse.co/'
|
|
@@ -2372,6 +2449,13 @@
|
|
|
2372
2449
|
producer:
|
|
2373
2450
|
name: 'Hatena Co., Ltd.'
|
|
2374
2451
|
url: 'https://www.hatena.ne.jp'
|
|
2452
|
+
- regex: 'Hatena-?Bookmark'
|
|
2453
|
+
name: 'Hatena Bookmark'
|
|
2454
|
+
category: 'Crawler'
|
|
2455
|
+
url: 'https://www.hatena.ne.jp/faq/'
|
|
2456
|
+
producer:
|
|
2457
|
+
name: 'Hatena Co., Ltd.'
|
|
2458
|
+
url: 'https://www.hatena.ne.jp'
|
|
2375
2459
|
|
|
2376
2460
|
- regex: 'RyowlEngine/(\d+)'
|
|
2377
2461
|
name: 'Ryowl'
|
|
@@ -2511,6 +2595,19 @@
|
|
|
2511
2595
|
category: 'Security Checker'
|
|
2512
2596
|
url: 'https://github.com/LeakIX/l9tcpid'
|
|
2513
2597
|
|
|
2598
|
+
- regex: 'l9explore/([\d+\.])'
|
|
2599
|
+
name: 'l9explore'
|
|
2600
|
+
category: 'Security Checker'
|
|
2601
|
+
url: 'https://github.com/LeakIX/l9explore'
|
|
2602
|
+
|
|
2603
|
+
- regex: 'l9scan/|^Lkx-(.*)/([\d+.]+)'
|
|
2604
|
+
name: 'LeakIX'
|
|
2605
|
+
category: 'Security Checker'
|
|
2606
|
+
url: 'https://leakix.net/'
|
|
2607
|
+
producer:
|
|
2608
|
+
name: 'BaDaaS SRL'
|
|
2609
|
+
url: 'https://leakix.net/'
|
|
2610
|
+
|
|
2514
2611
|
- regex: 'MegaIndex.ru/([\d+\.])'
|
|
2515
2612
|
name: 'MegaIndex'
|
|
2516
2613
|
category: 'Crawler'
|
|
@@ -2639,6 +2736,649 @@
|
|
|
2639
2736
|
name: 'Hochschule für angewandte Wissenschaften München'
|
|
2640
2737
|
url: 'https://www.hm.edu/'
|
|
2641
2738
|
|
|
2739
|
+
- regex: 'TigerBot/([\d+.]+)'
|
|
2740
|
+
name: 'TigerBot'
|
|
2741
|
+
category: 'Crawler'
|
|
2742
|
+
url: 'https://tiger.ch/'
|
|
2743
|
+
|
|
2744
|
+
- regex: 'TestCrawler/([\d+.]+)'
|
|
2745
|
+
name: 'TestCrawler'
|
|
2746
|
+
category: 'Crawler'
|
|
2747
|
+
url: 'https://www.comcepta.com/'
|
|
2748
|
+
|
|
2749
|
+
- regex: 'CrowdTanglebot/([\d+.]+)'
|
|
2750
|
+
name: 'CrowdTangle'
|
|
2751
|
+
category: 'Crawler'
|
|
2752
|
+
url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
|
|
2753
|
+
producer:
|
|
2754
|
+
name: 'CrowdTangle, Inc.'
|
|
2755
|
+
url: 'https://www.crowdtangle.com/'
|
|
2756
|
+
|
|
2757
|
+
- regex: 'Sellers.Guide Crawler by Primis'
|
|
2758
|
+
name: 'Sellers.Guide'
|
|
2759
|
+
category: 'Crawler'
|
|
2760
|
+
url: 'https://sellers.guide/'
|
|
2761
|
+
producer:
|
|
2762
|
+
name: 'McCann Disciplines, Ltd.'
|
|
2763
|
+
url: 'https://www.primis.tech/'
|
|
2764
|
+
|
|
2765
|
+
- regex: 'OnalyticaBot'
|
|
2766
|
+
name: 'Onalytica'
|
|
2767
|
+
category: 'Crawler'
|
|
2768
|
+
url: 'https://www.airslate.com/bot/explore/onalytica-bot'
|
|
2769
|
+
producer:
|
|
2770
|
+
name: 'airSlate, Inc.'
|
|
2771
|
+
url: 'https://www.airslate.com/'
|
|
2772
|
+
|
|
2773
|
+
- regex: 'deepnoc'
|
|
2774
|
+
name: 'deepnoc'
|
|
2775
|
+
category: 'Crawler'
|
|
2776
|
+
url: 'https://deepnoc.com/bot'
|
|
2777
|
+
producer:
|
|
2778
|
+
name: 'deepnoc, GmbH'
|
|
2779
|
+
url: 'https://deepnoc.com/'
|
|
2780
|
+
|
|
2781
|
+
- regex: 'Newslitbot/([\d+.]+)'
|
|
2782
|
+
name: 'Newslitbot'
|
|
2783
|
+
category: 'Crawler'
|
|
2784
|
+
url: 'https://www.newslit.co/'
|
|
2785
|
+
producer:
|
|
2786
|
+
name: 'Newslit, LLC.'
|
|
2787
|
+
url: 'https://www.newslit.co/'
|
|
2788
|
+
|
|
2789
|
+
- regex: 'um-LN/([\d+.]+)'
|
|
2790
|
+
name: 'uMBot'
|
|
2791
|
+
category: 'Crawler'
|
|
2792
|
+
url: 'https://www.ubermetrics-technologies.com/'
|
|
2793
|
+
producer:
|
|
2794
|
+
name: 'Ubermetrics Technologies GmbH'
|
|
2795
|
+
url: 'https://www.ubermetrics-technologies.com/'
|
|
2796
|
+
|
|
2797
|
+
- regex: 'Abonti/([\d+.]+)'
|
|
2798
|
+
name: 'Abonti'
|
|
2799
|
+
category: 'Crawler'
|
|
2800
|
+
url: 'http://abonti.com/'
|
|
2801
|
+
|
|
2802
|
+
- regex: 'collection@infegy.com'
|
|
2803
|
+
name: 'Infegy'
|
|
2804
|
+
category: 'Crawler'
|
|
2805
|
+
url: 'https://infegy.com/'
|
|
2806
|
+
producer:
|
|
2807
|
+
name: 'Infegy, Inc.'
|
|
2808
|
+
url: 'https://infegy.com/'
|
|
2809
|
+
|
|
2810
|
+
- regex: 'HTTP Banner Detection \(https://security.ipip.net\)'
|
|
2811
|
+
name: 'IPIP'
|
|
2812
|
+
category: 'Security Checker'
|
|
2813
|
+
url: 'https://security.ipip.net/'
|
|
2814
|
+
producer:
|
|
2815
|
+
name: 'Beijing Tiantexin Tech. Co., Ltd.'
|
|
2816
|
+
url: 'https://en.ipip.net/'
|
|
2817
|
+
|
|
2818
|
+
- regex: 'ev-crawler/([\d+.]+)'
|
|
2819
|
+
name: 'Headline'
|
|
2820
|
+
category: 'Crawler'
|
|
2821
|
+
url: 'https://headline.com/legal/crawler'
|
|
2822
|
+
producer:
|
|
2823
|
+
name: 'e.ventures Managementgesellschaft mbH'
|
|
2824
|
+
url: 'https://headline.com/'
|
|
2825
|
+
|
|
2826
|
+
- regex: 'webprosbot/([\d+.]+)'
|
|
2827
|
+
name: 'WebPros'
|
|
2828
|
+
category: 'Crawler'
|
|
2829
|
+
url: 'https://webpros.com/'
|
|
2830
|
+
producer:
|
|
2831
|
+
name: 'WebPros Holdco B.V.'
|
|
2832
|
+
url: 'https://webpros.com/'
|
|
2833
|
+
|
|
2834
|
+
- regex: 'ELB-HealthChecker'
|
|
2835
|
+
name: 'Amazon ELB'
|
|
2836
|
+
category: 'Site Monitor'
|
|
2837
|
+
url: 'https://aws.amazon.com/elasticloadbalancing/'
|
|
2838
|
+
producer:
|
|
2839
|
+
name: 'Amazon.com, Inc.'
|
|
2840
|
+
url: 'https://www.amazon.com/'
|
|
2841
|
+
|
|
2842
|
+
- regex: 'Wheregoes.com Redirect Checker/([\d+.]+)'
|
|
2843
|
+
name: 'WhereGoes'
|
|
2844
|
+
category: 'Crawler'
|
|
2845
|
+
url: 'https://wheregoes.com/'
|
|
2846
|
+
|
|
2847
|
+
- regex: 'project_patchwatch'
|
|
2848
|
+
name: 'Project Patchwatch'
|
|
2849
|
+
category: 'Crawler'
|
|
2850
|
+
url: 'http://66.240.192.82/'
|
|
2851
|
+
|
|
2852
|
+
- regex: 'InternetMeasurement/([\d+.]+)'
|
|
2853
|
+
name: 'InternetMeasurement'
|
|
2854
|
+
category: 'Crawler'
|
|
2855
|
+
url: 'https://internet-measurement.com/'
|
|
2856
|
+
|
|
2857
|
+
- regex: 'DomainAppender /([\d+.]+)'
|
|
2858
|
+
name: 'DomainAppender'
|
|
2859
|
+
category: 'Crawler'
|
|
2860
|
+
url: 'https://www.profound.net/product/domain_append/'
|
|
2861
|
+
producer:
|
|
2862
|
+
name: 'Profound Networks, LLC'
|
|
2863
|
+
url: 'https://www.profound.net/'
|
|
2864
|
+
|
|
2865
|
+
- regex: 'FreeWebMonitoring SiteChecker/([\d+.]+)'
|
|
2866
|
+
name: 'FreeWebMonitoring'
|
|
2867
|
+
category: 'Site Monitor'
|
|
2868
|
+
url: 'https://www.freewebmonitoring.com/bot.html'
|
|
2869
|
+
producer:
|
|
2870
|
+
name: 'GreenWave Online, Inc.'
|
|
2871
|
+
url: 'http://www.greenwaveonline.com/'
|
|
2872
|
+
|
|
2873
|
+
- regex: 'Page Modified Pinger'
|
|
2874
|
+
name: 'Page Modified Pinger'
|
|
2875
|
+
category: 'Site Monitor'
|
|
2876
|
+
url: 'https://www.pagemodified.com/'
|
|
2877
|
+
producer:
|
|
2878
|
+
name: 'Valley Hosting, LLC'
|
|
2879
|
+
url: 'https://www.pagemodified.com/'
|
|
2880
|
+
|
|
2881
|
+
- regex: 'adstxtlab.com'
|
|
2882
|
+
name: 'adstxtlab.com'
|
|
2883
|
+
category: 'Crawler'
|
|
2884
|
+
url: 'https://adstxtlab.com/validator.php'
|
|
2885
|
+
producer:
|
|
2886
|
+
name: 'Jaohawi AB'
|
|
2887
|
+
url: 'https://adstxtlab.com/'
|
|
2888
|
+
|
|
2889
|
+
- regex: 'Iframely/([\d+.]+)'
|
|
2890
|
+
name: 'Iframely'
|
|
2891
|
+
category: 'Crawler'
|
|
2892
|
+
url: 'https://iframely.com/'
|
|
2893
|
+
producer:
|
|
2894
|
+
name: 'Itteco Software, Corp.'
|
|
2895
|
+
url: 'https://iframely.com/'
|
|
2896
|
+
|
|
2897
|
+
- regex: 'DomainStatsBot/([\d+.]+)'
|
|
2898
|
+
name: 'DomainStatsBot'
|
|
2899
|
+
category: 'Crawler'
|
|
2900
|
+
url: 'https://domainstats.com/pages/our-bot'
|
|
2901
|
+
producer:
|
|
2902
|
+
name: 'Domainstats Ltd'
|
|
2903
|
+
url: 'https://domainstats.com/'
|
|
2904
|
+
|
|
2905
|
+
- regex: 'aiHitBot/([\d+.]+)'
|
|
2906
|
+
name: 'aiHitBot'
|
|
2907
|
+
category: 'Crawler'
|
|
2908
|
+
url: 'https://www.aihitdata.com/about'
|
|
2909
|
+
|
|
2910
|
+
- regex: 'DomainCrawler/'
|
|
2911
|
+
name: 'DomainCrawler'
|
|
2912
|
+
category: 'Crawler'
|
|
2913
|
+
url: 'https://domaincrawler.com/about-us/'
|
|
2914
|
+
|
|
2915
|
+
- regex: 'DNSResearchBot'
|
|
2916
|
+
name: 'DNSResearchBot'
|
|
2917
|
+
category: 'Crawler'
|
|
2918
|
+
|
|
2919
|
+
- regex: 'GitCrawlerBot'
|
|
2920
|
+
name: 'GitCrawlerBot'
|
|
2921
|
+
category: 'Crawler'
|
|
2922
|
+
|
|
2923
|
+
- regex: 'AdAuth/([\d+.]+)'
|
|
2924
|
+
name: 'AdAuth'
|
|
2925
|
+
category: 'Crawler'
|
|
2926
|
+
url: 'https://www.adauth.com'
|
|
2927
|
+
|
|
2928
|
+
- regex: 'faveeo.com'
|
|
2929
|
+
name: 'Faveeo'
|
|
2930
|
+
category: 'Crawler'
|
|
2931
|
+
url: 'http://www.faveeo.com'
|
|
2932
|
+
|
|
2933
|
+
- regex: 'kozmonavt\.'
|
|
2934
|
+
name: 'Kozmonavt'
|
|
2935
|
+
category: 'Crawler'
|
|
2936
|
+
url: 'https://kozmonavt.ml'
|
|
2937
|
+
|
|
2938
|
+
- regex: 'CriteoBot/'
|
|
2939
|
+
name: 'CriteoBot'
|
|
2940
|
+
category: 'Crawler'
|
|
2941
|
+
url: 'https://www.criteo.com/criteo-crawler/'
|
|
2942
|
+
|
|
2943
|
+
- regex: 'PayPal IPN'
|
|
2944
|
+
name: 'PayPal IPN'
|
|
2945
|
+
category: 'Service Agent'
|
|
2946
|
+
url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/'
|
|
2947
|
+
producer:
|
|
2948
|
+
name: 'PayPal, Inc.'
|
|
2949
|
+
url: 'https://www.paypal.com/'
|
|
2950
|
+
|
|
2951
|
+
- regex: 'MaCoCu'
|
|
2952
|
+
name: 'MaCoCu'
|
|
2953
|
+
category: 'Crawler'
|
|
2954
|
+
url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/'
|
|
2955
|
+
producer:
|
|
2956
|
+
name: 'Jožef Stefan Institute'
|
|
2957
|
+
url: 'https://www.ijs.si/ijsw/JSI'
|
|
2958
|
+
|
|
2959
|
+
- regex: 'dnt-policy@eff.org'
|
|
2960
|
+
name: 'EFF Do Not Track Verifier'
|
|
2961
|
+
category: 'Crawler'
|
|
2962
|
+
url: 'https://www.eff.org/issues/do-not-track'
|
|
2963
|
+
producer:
|
|
2964
|
+
name: 'Electronic Frontier Foundation'
|
|
2965
|
+
url: 'https://www.eff.org/'
|
|
2966
|
+
|
|
2967
|
+
- regex: 'InfoTigerBot'
|
|
2968
|
+
name: 'InfoTigerBot'
|
|
2969
|
+
category: 'Crawler'
|
|
2970
|
+
url: 'https://infotiger.com/bot'
|
|
2971
|
+
producer:
|
|
2972
|
+
name: 'Infotiger UG'
|
|
2973
|
+
url: 'https://infotiger.com/'
|
|
2974
|
+
|
|
2975
|
+
- regex: '(?:Birdcrawlerbot|CrawlaDeBot)'
|
|
2976
|
+
name: 'Birdcrawlerbot'
|
|
2977
|
+
category: 'Crawler'
|
|
2978
|
+
url: 'https://crawla.de/de/index.php'
|
|
2979
|
+
producer:
|
|
2980
|
+
name: 'Swoppen Systems GmbH'
|
|
2981
|
+
url: 'https://www.swoppen.com/de'
|
|
2982
|
+
|
|
2983
|
+
- regex: 'ScamadviserExternalHit/([\d+.]+)'
|
|
2984
|
+
name: 'Scamadviser External Hit'
|
|
2985
|
+
category: 'Crawler'
|
|
2986
|
+
url: 'https://www.scamadviser.com/'
|
|
2987
|
+
producer:
|
|
2988
|
+
name: 'Ecommerce Operations B.V.'
|
|
2989
|
+
url: 'https://www.scamadviser.com/'
|
|
2990
|
+
|
|
2991
|
+
- regex: 'ZaldamoSearchBot'
|
|
2992
|
+
name: 'Zaldamo'
|
|
2993
|
+
category: 'Crawler'
|
|
2994
|
+
url: 'https://www.zaldamo.com/search.html'
|
|
2995
|
+
producer:
|
|
2996
|
+
name: 'Project Orlando, LLC.'
|
|
2997
|
+
url: 'https://www.projectorlando.com/'
|
|
2998
|
+
|
|
2999
|
+
- regex: 'AFB/([\d+.]+)'
|
|
3000
|
+
name: 'Allloadin Favicon Bot'
|
|
3001
|
+
category: 'Crawler'
|
|
3002
|
+
url: 'https://allloadin.com/'
|
|
3003
|
+
|
|
3004
|
+
- regex: 'SeolytBot/([\d+.]+)'
|
|
3005
|
+
name: 'Seolyt Bot'
|
|
3006
|
+
category: 'Crawler'
|
|
3007
|
+
url: 'https://seolyt.com'
|
|
3008
|
+
|
|
3009
|
+
- regex: 'LinkWalker/([\d+.]+)'
|
|
3010
|
+
name: 'LinkWalker'
|
|
3011
|
+
category: 'Crawler'
|
|
3012
|
+
url: 'https://www.phishlabs.com/'
|
|
3013
|
+
producer:
|
|
3014
|
+
name: 'PhishLabs, Inc.'
|
|
3015
|
+
url: 'https://www.phishlabs.com/'
|
|
3016
|
+
|
|
3017
|
+
- regex: 'RenovateBot/([\d+.]+)'
|
|
3018
|
+
name: 'RenovateBot'
|
|
3019
|
+
category: 'Security Checker'
|
|
3020
|
+
url: 'https://github.com/renovatebot/renovate'
|
|
3021
|
+
producer:
|
|
3022
|
+
name: 'White Source Ltd.'
|
|
3023
|
+
url: 'https://www.mend.io/free-developer-tools/renovate/'
|
|
3024
|
+
|
|
3025
|
+
- regex: 'INETDEX-BOT/([\d+.]+)'
|
|
3026
|
+
name: 'Inetdex Bot'
|
|
3027
|
+
category: 'Crawler'
|
|
3028
|
+
url: 'https://www.inetdex.com/'
|
|
3029
|
+
|
|
3030
|
+
- regex: 'NETZZAPPEN'
|
|
3031
|
+
name: 'NETZZAPPEN'
|
|
3032
|
+
category: 'Crawler'
|
|
3033
|
+
url: 'https://www.netzzappen.com/'
|
|
3034
|
+
producer:
|
|
3035
|
+
name: 'Marc Huemer'
|
|
3036
|
+
url: 'https://www.netzzappen.com/'
|
|
3037
|
+
|
|
3038
|
+
- regex: 'SerpReputationManagementAgent/([\d+.]+)'
|
|
3039
|
+
name: 'SEMrush Reputation Management'
|
|
3040
|
+
category: 'Service Agent'
|
|
3041
|
+
url: 'https://www.semrush.com/bot/'
|
|
3042
|
+
producer:
|
|
3043
|
+
name: 'SEMrush'
|
|
3044
|
+
url: 'https://www.semrush.com/'
|
|
3045
|
+
|
|
3046
|
+
- regex: 'panscient.com'
|
|
3047
|
+
name: 'Panscient'
|
|
3048
|
+
category: 'Crawler'
|
|
3049
|
+
url: 'https://www.panscient.com/faq.htm'
|
|
3050
|
+
producer:
|
|
3051
|
+
name: 'Panscient, Inc.'
|
|
3052
|
+
url: 'https://www.panscient.com/'
|
|
3053
|
+
|
|
3054
|
+
- regex: 'research@pdrlabs.net'
|
|
3055
|
+
name: 'PDR Labs'
|
|
3056
|
+
category: 'Security Checker'
|
|
3057
|
+
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
|
3058
|
+
producer:
|
|
3059
|
+
name: 'PDR Labs'
|
|
3060
|
+
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
|
3061
|
+
|
|
3062
|
+
- regex: 'Nicecrawler/([\d+.]+)'
|
|
3063
|
+
name: 'NiceCrawler'
|
|
3064
|
+
category: 'Crawler'
|
|
3065
|
+
url: 'https://www.nicecrawler.com/'
|
|
3066
|
+
producer:
|
|
3067
|
+
name: 'Intelium Corp.'
|
|
3068
|
+
url: 'https://www.intelium.com/'
|
|
3069
|
+
|
|
3070
|
+
- regex: 't3versionsBot/([\d+.]+)'
|
|
3071
|
+
name: 't3versions'
|
|
3072
|
+
category: 'Crawler'
|
|
3073
|
+
url: 'https://www.t3versions.com/bot'
|
|
3074
|
+
producer:
|
|
3075
|
+
name: 'Torben Hansen'
|
|
3076
|
+
url: 'https://www.t3versions.com/'
|
|
3077
|
+
|
|
3078
|
+
- regex: 'Crawlson/([\d+.]+)'
|
|
3079
|
+
name: 'Crawlson'
|
|
3080
|
+
category: 'Crawler'
|
|
3081
|
+
url: 'https://www.crawlson.com/about'
|
|
3082
|
+
producer:
|
|
3083
|
+
name: 'Crawlson'
|
|
3084
|
+
url: 'https://www.crawlson.com/'
|
|
3085
|
+
|
|
3086
|
+
- regex: 'tchelebi/([\d+.]+)'
|
|
3087
|
+
name: 'tchelebi'
|
|
3088
|
+
category: 'Crawler'
|
|
3089
|
+
url: 'https://tchelebi.io/'
|
|
3090
|
+
producer:
|
|
3091
|
+
name: 'NormShield, Inc.'
|
|
3092
|
+
url: 'https://blackkite.com/'
|
|
3093
|
+
|
|
3094
|
+
- regex: 'JobboerseBot'
|
|
3095
|
+
name: 'JobboerseBot'
|
|
3096
|
+
category: 'Crawler'
|
|
3097
|
+
url: 'https://www.xing.com/jobs'
|
|
3098
|
+
producer:
|
|
3099
|
+
name: 'New Work SE'
|
|
3100
|
+
url: 'https://www.xing.com/'
|
|
3101
|
+
|
|
3102
|
+
- regex: 'RepoLookoutBot/([\d+.]+)'
|
|
3103
|
+
name: 'Repo Lookout'
|
|
3104
|
+
category: 'Security Checker'
|
|
3105
|
+
url: 'https://www.repo-lookout.org/'
|
|
3106
|
+
producer:
|
|
3107
|
+
name: 'Crissy Field GmbH'
|
|
3108
|
+
url: 'https://www.crissyfield.de/'
|
|
3109
|
+
|
|
3110
|
+
- regex: 'PATHspider'
|
|
3111
|
+
name: 'PATHspider'
|
|
3112
|
+
category: 'Security Checker'
|
|
3113
|
+
url: 'https://pathspider.net/'
|
|
3114
|
+
producer:
|
|
3115
|
+
name: 'MAMI Project'
|
|
3116
|
+
url: 'https://mami-project.eu/'
|
|
3117
|
+
|
|
3118
|
+
- regex: 'everyfeed-spider/([\d+.]+)'
|
|
3119
|
+
name: 'Everyfeed'
|
|
3120
|
+
url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
|
|
3121
|
+
category: 'Feed Fetcher'
|
|
3122
|
+
producer:
|
|
3123
|
+
name: ''
|
|
3124
|
+
url: ''
|
|
3125
|
+
|
|
3126
|
+
- regex: 'Exchange check'
|
|
3127
|
+
name: 'Exchange check'
|
|
3128
|
+
category: 'Security Checker'
|
|
3129
|
+
url: 'https://github.com/GossiTheDog/scanning'
|
|
3130
|
+
producer:
|
|
3131
|
+
name: 'Kevin Beaumont'
|
|
3132
|
+
url: 'https://doublepulsar.com/'
|
|
3133
|
+
|
|
3134
|
+
- regex: 'Sublinq'
|
|
3135
|
+
name: 'Sublinq'
|
|
3136
|
+
category: 'Crawler'
|
|
3137
|
+
url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/'
|
|
3138
|
+
producer:
|
|
3139
|
+
name: ''
|
|
3140
|
+
url: ''
|
|
3141
|
+
|
|
3142
|
+
- regex: 'Gregarius/([\d+.]+)'
|
|
3143
|
+
name: 'Gregarius'
|
|
3144
|
+
category: 'Feed Fetcher'
|
|
3145
|
+
url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
|
|
3146
|
+
producer:
|
|
3147
|
+
name: ''
|
|
3148
|
+
url: ''
|
|
3149
|
+
|
|
3150
|
+
- regex: 'COMODO DCV'
|
|
3151
|
+
name: 'COMODO DCV'
|
|
3152
|
+
category: 'Service Agent'
|
|
3153
|
+
url: 'https://www.comodo.com/'
|
|
3154
|
+
producer:
|
|
3155
|
+
name: 'Comodo Security Solutions, Inc.'
|
|
3156
|
+
url: 'https://www.comodo.com/'
|
|
3157
|
+
|
|
3158
|
+
- regex: 'Sectigo DCV'
|
|
3159
|
+
name: 'Sectigo DCV'
|
|
3160
|
+
category: 'Service Agent'
|
|
3161
|
+
url: 'https://sectigo.com/'
|
|
3162
|
+
producer:
|
|
3163
|
+
name: 'Sectigo Limited'
|
|
3164
|
+
url: 'https://sectigo.com/'
|
|
3165
|
+
|
|
3166
|
+
- regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/([\d+.]+)'
|
|
3167
|
+
name: 'KlarnaBot'
|
|
3168
|
+
category: 'Crawler'
|
|
3169
|
+
url: 'https://docs.klarna.com/klarna-bot/'
|
|
3170
|
+
producer:
|
|
3171
|
+
name: 'Klarna Bank AB'
|
|
3172
|
+
url: 'https://www.klarna.com/'
|
|
3173
|
+
|
|
3174
|
+
- regex: 'Taboolabot/([\d+.]+)'
|
|
3175
|
+
name: 'Taboolabot'
|
|
3176
|
+
category: 'Crawler'
|
|
3177
|
+
url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
|
|
3178
|
+
producer:
|
|
3179
|
+
name: 'Taboola, Inc.'
|
|
3180
|
+
url: 'https://www.taboola.com/'
|
|
3181
|
+
|
|
3182
|
+
- regex: 'Asana/([\d+.]+)'
|
|
3183
|
+
name: 'Asana'
|
|
3184
|
+
category: 'Crawler'
|
|
3185
|
+
url: 'https://asana.com/'
|
|
3186
|
+
producer:
|
|
3187
|
+
name: 'Asana, Inc.'
|
|
3188
|
+
url: 'https://asana.com/'
|
|
3189
|
+
|
|
3190
|
+
- regex: 'Chrome Privacy Preserving Prefetch Proxy'
|
|
3191
|
+
name: 'Chrome Privacy Preserving Prefetch Proxy'
|
|
3192
|
+
category: 'Service Agent'
|
|
3193
|
+
url: 'https://developer.chrome.com/blog/private-prefetch-proxy/'
|
|
3194
|
+
producer:
|
|
3195
|
+
name: 'Google Inc.'
|
|
3196
|
+
url: 'https://www.google.com/'
|
|
3197
|
+
|
|
3198
|
+
- regex: 'URLinspectorBot/([\d+.]+)'
|
|
3199
|
+
name: 'URLinspector'
|
|
3200
|
+
category: 'Site Monitor'
|
|
3201
|
+
url: 'https://www.urlinspector.com/bot/'
|
|
3202
|
+
producer:
|
|
3203
|
+
name: 'LinkResearchTools GmbH'
|
|
3204
|
+
url: 'https://www.linkresearchtools.com/'
|
|
3205
|
+
|
|
3206
|
+
- regex: 'EntferBot/([\d+.]+)'
|
|
3207
|
+
name: 'Entfer'
|
|
3208
|
+
category: 'Crawler'
|
|
3209
|
+
url: 'https://entfer.com/'
|
|
3210
|
+
producer:
|
|
3211
|
+
name: 'Entfer Ltd.'
|
|
3212
|
+
url: 'https://entfer.com/'
|
|
3213
|
+
|
|
3214
|
+
- regex: 'TagInspector/([\d+.]+)'
|
|
3215
|
+
name: 'Tag Inspector'
|
|
3216
|
+
category: 'Crawler'
|
|
3217
|
+
url: 'https://taginspector.com/'
|
|
3218
|
+
producer:
|
|
3219
|
+
name: 'InfoTrust, LLC'
|
|
3220
|
+
url: 'https://infotrust.com/'
|
|
3221
|
+
|
|
3222
|
+
- regex: 'pageburst'
|
|
3223
|
+
name: 'Pageburst'
|
|
3224
|
+
category: 'Crawler'
|
|
3225
|
+
url: 'https://pageburstls.elsevier.com/'
|
|
3226
|
+
producer:
|
|
3227
|
+
name: 'Elsevier Ltd'
|
|
3228
|
+
url: 'https://www.elsevier.com/'
|
|
3229
|
+
|
|
3230
|
+
- regex: '.+diffbot'
|
|
3231
|
+
name: 'Diffbot'
|
|
3232
|
+
category: 'Crawler'
|
|
3233
|
+
url: 'https://docs.diffbot.com/docs/getting-started-with-crawl'
|
|
3234
|
+
producer:
|
|
3235
|
+
name: 'Diffbot Technologies Corp.'
|
|
3236
|
+
url: 'https://www.diffbot.com/'
|
|
3237
|
+
|
|
3238
|
+
- regex: 'DisqusAdstxtCrawler/([\d+.]+)'
|
|
3239
|
+
name: 'Disqus'
|
|
3240
|
+
category: 'Crawler'
|
|
3241
|
+
url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
|
|
3242
|
+
producer:
|
|
3243
|
+
name: 'Disqus, Inc.'
|
|
3244
|
+
url: 'https://disqus.com/'
|
|
3245
|
+
|
|
3246
|
+
- regex: 'startmebot/([\d+.]+)'
|
|
3247
|
+
name: 'start.me'
|
|
3248
|
+
category: 'Crawler'
|
|
3249
|
+
url: 'https://about.start.me/'
|
|
3250
|
+
producer:
|
|
3251
|
+
name: 'start.me BV'
|
|
3252
|
+
url: 'https://about.start.me/'
|
|
3253
|
+
|
|
3254
|
+
- regex: '2ip bot/([\d+.]+)'
|
|
3255
|
+
name: '2ip'
|
|
3256
|
+
category: 'Crawler'
|
|
3257
|
+
url: 'https://2ip.io/'
|
|
3258
|
+
|
|
3259
|
+
- regex: 'ReqBin Curl Client/([\d+.]+)'
|
|
3260
|
+
name: 'ReqBin'
|
|
3261
|
+
category: 'Crawler'
|
|
3262
|
+
url: 'https://reqbin.com/curl'
|
|
3263
|
+
|
|
3264
|
+
- regex: 'XoviBot/([\d+.]+)'
|
|
3265
|
+
name: 'XoviBot'
|
|
3266
|
+
category: 'Crawler'
|
|
3267
|
+
url: 'https://www.xovibot.net'
|
|
3268
|
+
producer:
|
|
3269
|
+
name: 'Xovi GmbH'
|
|
3270
|
+
url: 'http://www.xovi.de'
|
|
3271
|
+
|
|
3272
|
+
- regex: 'Overcast/([\d+.]+) Podcast Sync'
|
|
3273
|
+
name: 'Overcast Podcast Sync'
|
|
3274
|
+
category: 'Service Agent'
|
|
3275
|
+
url: 'https://overcast.fm/podcasterinfo'
|
|
3276
|
+
|
|
3277
|
+
- regex: '^Verity/([\d+.]+)'
|
|
3278
|
+
name: 'GumGum Verity'
|
|
3279
|
+
category: 'Service Agent'
|
|
3280
|
+
url: 'https://gumgum.com/verity'
|
|
3281
|
+
|
|
3282
|
+
- regex: 'hackermention'
|
|
3283
|
+
name: 'hackermention'
|
|
3284
|
+
category: 'Feed Reader'
|
|
3285
|
+
url: 'https://github.com/snarfed/hackermention'
|
|
3286
|
+
|
|
3287
|
+
- regex: 'BitSightBot/([\d+.]+)'
|
|
3288
|
+
name: 'BitSight'
|
|
3289
|
+
category: 'Security Checker'
|
|
3290
|
+
url: 'https://www.bitsight.com/'
|
|
3291
|
+
producer:
|
|
3292
|
+
name: 'BitSight Technologies, Inc.'
|
|
3293
|
+
url: 'https://www.bitsight.com/'
|
|
3294
|
+
|
|
3295
|
+
- regex: 'Ezgif/([\d+.]+)'
|
|
3296
|
+
name: 'Ezgif'
|
|
3297
|
+
category: 'Service Agent'
|
|
3298
|
+
url: 'https://ezgif.com/about'
|
|
3299
|
+
|
|
3300
|
+
- regex: 'intelx.io_bot'
|
|
3301
|
+
name: 'Intelligence X'
|
|
3302
|
+
category: 'Crawler'
|
|
3303
|
+
url: 'https://intelx.io/'
|
|
3304
|
+
producer:
|
|
3305
|
+
name: 'Kleissner Investments s.r.o.'
|
|
3306
|
+
url: 'https://intelx.io/'
|
|
3307
|
+
|
|
3308
|
+
- regex: 'FemtosearchBot/([\d+.]+)'
|
|
3309
|
+
name: 'Femtosearch'
|
|
3310
|
+
category: 'Crawler'
|
|
3311
|
+
url: 'http://femtosearch.com/'
|
|
3312
|
+
producer:
|
|
3313
|
+
name: 'Grier Forensics, LLC'
|
|
3314
|
+
url: 'https://www.grierforensics.com/'
|
|
3315
|
+
|
|
3316
|
+
- regex: 'AdsTxtCrawler/([\d+.]+)'
|
|
3317
|
+
name: 'AdsTxtCrawler'
|
|
3318
|
+
category: 'Crawler'
|
|
3319
|
+
url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
|
|
3320
|
+
producer:
|
|
3321
|
+
name: 'IAB Technology Laboratory, Inc.'
|
|
3322
|
+
url: 'https://iabtechlab.com/'
|
|
3323
|
+
|
|
3324
|
+
- regex: 'Morningscore'
|
|
3325
|
+
name: 'Morningscore Bot'
|
|
3326
|
+
category: 'Crawler'
|
|
3327
|
+
url: 'https://morningscore.io/'
|
|
3328
|
+
producer:
|
|
3329
|
+
name: 'Morningscore'
|
|
3330
|
+
url: 'https://morningscore.io/'
|
|
3331
|
+
|
|
3332
|
+
- regex: 'Uptime-Kuma/([\d+.]+)'
|
|
3333
|
+
name: 'Uptime-Kuma'
|
|
3334
|
+
category: 'Site Monitor'
|
|
3335
|
+
url: 'https://github.com/louislam/uptime-kuma'
|
|
3336
|
+
|
|
3337
|
+
- regex: 'ChatGPT-User'
|
|
3338
|
+
name: 'ChatGPT'
|
|
3339
|
+
category: 'Crawler'
|
|
3340
|
+
url: 'https://platform.openai.com/docs/plugins/bot'
|
|
3341
|
+
producer:
|
|
3342
|
+
name: 'OpenAI OpCo, LLC'
|
|
3343
|
+
url: 'https://openai.com/'
|
|
3344
|
+
|
|
3345
|
+
- regex: 'BrightEdge Crawler/([\d+.]+)'
|
|
3346
|
+
name: 'BrightEdge'
|
|
3347
|
+
category: 'Crawler'
|
|
3348
|
+
url: 'https://www.brightedge.com/'
|
|
3349
|
+
producer:
|
|
3350
|
+
name: 'BrightEdge Technologies, Inc'
|
|
3351
|
+
url: 'https://www.brightedge.com/'
|
|
3352
|
+
|
|
3353
|
+
- regex: 'sfFeedReader/([\d+.]+)'
|
|
3354
|
+
name: 'sfFeedReader'
|
|
3355
|
+
url: 'https://github.com/diem-project/sfFeed2Plugin'
|
|
3356
|
+
category: 'Feed Fetcher'
|
|
3357
|
+
|
|
3358
|
+
- regex: 'cyberscan.io'
|
|
3359
|
+
name: 'Cyberscan'
|
|
3360
|
+
category: 'Security Checker'
|
|
3361
|
+
url: 'https://www.cyberscan.io/'
|
|
3362
|
+
producer:
|
|
3363
|
+
name: 'DGC Verwaltungs GmbH'
|
|
3364
|
+
url: 'https://dgc.org/'
|
|
3365
|
+
|
|
3366
|
+
- regex: 'deepcrawl\.com'
|
|
3367
|
+
name: 'Lumar'
|
|
3368
|
+
category: 'Crawler'
|
|
3369
|
+
url: 'https://deepcrawl.com/bot'
|
|
3370
|
+
producer:
|
|
3371
|
+
name: 'Lumar'
|
|
3372
|
+
url: 'https://www.lumar.io/'
|
|
3373
|
+
|
|
3374
|
+
- regex: 'RepoLookoutBot'
|
|
3375
|
+
name: 'Repo Lookout'
|
|
3376
|
+
category: 'Crawler'
|
|
3377
|
+
url: 'https://www.repo-lookout.org/'
|
|
3378
|
+
producer:
|
|
3379
|
+
name: 'Crissy Field GmbH'
|
|
3380
|
+
url: 'https://www.crissyfield.de/'
|
|
3381
|
+
|
|
2642
3382
|
# Generic detections
|
|
2643
3383
|
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
|
|
2644
3384
|
name: 'Generic Bot'
|