crawler_detect 0.1.12 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +13 -168
- data/.travis.yml +15 -5
- data/CHANGELOG.md +32 -0
- data/Gemfile.lock +38 -2
- data/README.md +13 -0
- data/bin/update_raw_files +10 -0
- data/crawler_detect.gemspec +6 -2
- data/lib/crawler_detect.rb +29 -7
- data/lib/crawler_detect/config.rb +29 -0
- data/lib/crawler_detect/detector.rb +27 -14
- data/lib/crawler_detect/library.rb +9 -3
- data/lib/crawler_detect/library/crawlers.rb +6 -1277
- data/lib/crawler_detect/library/exclusions.rb +6 -50
- data/lib/crawler_detect/library/headers.rb +6 -17
- data/lib/crawler_detect/library/loader.rb +18 -0
- data/lib/crawler_detect/library/raw/Crawlers.json +1 -0
- data/lib/crawler_detect/library/raw/Exclusions.json +1 -0
- data/lib/crawler_detect/library/raw/Headers.json +1 -0
- data/lib/crawler_detect/version.rb +2 -1
- data/lib/rack/crawler_detect.rb +20 -17
- metadata +52 -3
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CrawlerDetect
|
4
|
+
# Configuration of CrawlerDetect
|
5
|
+
#
|
6
|
+
# @see settings
|
7
|
+
# @since 1.0.0
|
8
|
+
class Config < ::Qonfig::DataSet
|
9
|
+
CUR_PATH = File.dirname(File.expand_path(__FILE__)).freeze
|
10
|
+
RAW_PATH = File.join(CUR_PATH, "library/raw").freeze
|
11
|
+
|
12
|
+
RAW_CRAWLERS_PATH = File.join(RAW_PATH, "Crawlers.json").freeze
|
13
|
+
RAW_EXCLUSIONS_PATH = File.join(RAW_PATH, "Exclusions.json").freeze
|
14
|
+
RAW_HEADERS_PATH = File.join(RAW_PATH, "Headers.json").freeze
|
15
|
+
|
16
|
+
# @return [String] path to crawlers raw JSON file
|
17
|
+
setting :raw_crawlers_path, RAW_CRAWLERS_PATH
|
18
|
+
|
19
|
+
# @return [String] path to exclusions raw JSON file
|
20
|
+
setting :raw_exclusions_path, RAW_EXCLUSIONS_PATH
|
21
|
+
|
22
|
+
# @return [String] path to headers raw JSON file
|
23
|
+
setting :raw_headers_path, RAW_HEADERS_PATH
|
24
|
+
|
25
|
+
validate :raw_crawlers_path, :string, strict: true
|
26
|
+
validate :raw_exclusions_path, :string, strict: true
|
27
|
+
validate :raw_headers_path, :string, strict: true
|
28
|
+
end
|
29
|
+
end
|
@@ -1,17 +1,22 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module CrawlerDetect
|
4
|
+
# since 0.1.0
|
4
5
|
class Detector
|
6
|
+
# @param user_agent [String] User-agent string to detect
|
7
|
+
# @return [CrawlerDetect::Detector] instance of detector class
|
5
8
|
def initialize(user_agent)
|
6
9
|
@user_agent = user_agent.to_s.dup
|
7
10
|
end
|
8
11
|
|
12
|
+
# @return [true, false] Is User-agent a crawler?
|
9
13
|
def is_crawler?
|
10
14
|
@is_crawler ||= begin
|
11
15
|
!completely_exclusion? && matches_crawler_list?
|
12
16
|
end
|
13
17
|
end
|
14
18
|
|
19
|
+
# @return [String] The detected crawler name from RAW data
|
15
20
|
def crawler_name
|
16
21
|
return unless is_crawler?
|
17
22
|
@crawler_name
|
@@ -19,22 +24,30 @@ module CrawlerDetect
|
|
19
24
|
|
20
25
|
private
|
21
26
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
27
|
+
# @private
|
28
|
+
# @return [true, false] Is User-agent in white-list?
|
29
|
+
def completely_exclusion?
|
30
|
+
@user_agent.gsub!(exclusions_matcher, "")
|
31
|
+
@user_agent.strip.length.zero?
|
32
|
+
end
|
26
33
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
34
|
+
# @private
|
35
|
+
# @return [true, false] Is User-agent in black-list?
|
36
|
+
def matches_crawler_list?
|
37
|
+
@crawler_name = crawlers_matcher.match(@user_agent).to_s.strip
|
38
|
+
!@crawler_name.empty?
|
39
|
+
end
|
31
40
|
|
32
|
-
|
33
|
-
|
34
|
-
|
41
|
+
# @private
|
42
|
+
# @return [Regexp] White-list of User-agents
|
43
|
+
def exclusions_matcher
|
44
|
+
CrawlerDetect::Library.get_regexp("exclusions")
|
45
|
+
end
|
35
46
|
|
36
|
-
|
37
|
-
|
38
|
-
|
47
|
+
# @private
|
48
|
+
# @return [Regexp] Black-list of User-agents
|
49
|
+
def crawlers_matcher
|
50
|
+
CrawlerDetect::Library.get_regexp("crawlers")
|
51
|
+
end
|
39
52
|
end
|
40
53
|
end
|
@@ -1,16 +1,22 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module CrawlerDetect
|
4
|
+
# @since 0.1.0
|
4
5
|
module Library
|
6
|
+
DATA_CLASSES = [Library::Headers, Library::Exclusions, Library::Crawlers].freeze
|
7
|
+
|
5
8
|
class << self
|
9
|
+
# @param param [String] Name of raw data
|
10
|
+
# @return [Regexp]
|
6
11
|
def get_regexp(param)
|
7
12
|
data = get_array(param)
|
8
|
-
%r
|
13
|
+
%r{#{data.join('|')}}i
|
9
14
|
end
|
10
15
|
|
16
|
+
# @param param [String] Name of raw data
|
17
|
+
# @return [Array]
|
11
18
|
def get_array(param)
|
12
|
-
|
13
|
-
const_get(const_name)
|
19
|
+
const_get("CrawlerDetect::Library::#{param.capitalize}").send(:data)
|
14
20
|
end
|
15
21
|
end
|
16
22
|
end
|
@@ -1,1285 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# rubocop:disable Layout/TrailingWhitespace
|
4
3
|
module CrawlerDetect
|
5
4
|
module Library
|
5
|
+
# @since 0.1.0
|
6
6
|
module Crawlers
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
^Calypso v\/
|
13
|
-
^COMODO DCV
|
14
|
-
^DangDang
|
15
|
-
^DavClnt
|
16
|
-
^FDM
|
17
|
-
^git\/
|
18
|
-
^Goose\/
|
19
|
-
^Grabber
|
20
|
-
^HTTPClient\/
|
21
|
-
^Java\/
|
22
|
-
^Jeode\/
|
23
|
-
^Jetty\/
|
24
|
-
^Mail\/
|
25
|
-
^Mget
|
26
|
-
^Microsoft URL Control
|
27
|
-
^NG\/[0-9\.]
|
28
|
-
^NING\/
|
29
|
-
^PHP\/[0-9]
|
30
|
-
^RMA\/
|
31
|
-
^Ruby|Ruby\/[0-9]
|
32
|
-
^VSE\/[0-9]
|
33
|
-
^WordPress\.com
|
34
|
-
^XRL\/[0-9]
|
35
|
-
^ZmEu
|
36
|
-
008\/
|
37
|
-
13TABS
|
38
|
-
192\.comAgent
|
39
|
-
2ip\.ru
|
40
|
-
404enemy
|
41
|
-
7Siters
|
42
|
-
80legs
|
43
|
-
a\.pr-cy\.ru
|
44
|
-
a3logics\.in
|
45
|
-
A6-Indexer
|
46
|
-
Abonti
|
47
|
-
Aboundex
|
48
|
-
aboutthedomain
|
49
|
-
Accoona-AI-Agent
|
50
|
-
acoon
|
51
|
-
acrylicapps\.com\/pulp
|
52
|
-
Acunetix
|
53
|
-
AdAuth\/
|
54
|
-
adbeat
|
55
|
-
AddThis
|
56
|
-
ADmantX
|
57
|
-
AdminLabs
|
58
|
-
adressendeutschland
|
59
|
-
adreview\/
|
60
|
-
adscanner
|
61
|
-
Adstxtaggregator
|
62
|
-
adstxt-worker
|
63
|
-
adstxt\.com
|
64
|
-
agentslug
|
65
|
-
AHC
|
66
|
-
aihit
|
67
|
-
aiohttp\/
|
68
|
-
Airmail
|
69
|
-
akka-http\/
|
70
|
-
akula\/
|
71
|
-
alertra
|
72
|
-
alexa site audit
|
73
|
-
Alibaba\.Security\.Heimdall
|
74
|
-
Alligator
|
75
|
-
allloadin
|
76
|
-
AllSubmitter
|
77
|
-
alyze\.info
|
78
|
-
amagit
|
79
|
-
^Amazon Simple Notification Service Agent$
|
80
|
-
Anarchie
|
81
|
-
AndroidDownloadManager
|
82
|
-
Anemone
|
83
|
-
AngleSharp
|
84
|
-
annotate_google
|
85
|
-
Ant\.com
|
86
|
-
Anturis Agent
|
87
|
-
AnyEvent-HTTP\/
|
88
|
-
Apache Droid
|
89
|
-
Apache OpenOffice
|
90
|
-
Apache-HttpAsyncClient
|
91
|
-
Apache-HttpClient
|
92
|
-
ApacheBench
|
93
|
-
Apexoo
|
94
|
-
APIs-Google
|
95
|
-
AportWorm\/
|
96
|
-
AppBeat\/
|
97
|
-
AppEngine-Google
|
98
|
-
AppleSyndication
|
99
|
-
Aprc\/[0-9]
|
100
|
-
Arachmo
|
101
|
-
arachnode
|
102
|
-
Arachnophilia
|
103
|
-
aria2
|
104
|
-
Arukereso
|
105
|
-
asafaweb
|
106
|
-
AskQuickly
|
107
|
-
Ask Jeeves
|
108
|
-
ASPSeek
|
109
|
-
Asterias
|
110
|
-
Astute
|
111
|
-
asynchttp
|
112
|
-
Attach
|
113
|
-
attohttpc
|
114
|
-
autocite
|
115
|
-
AutomaticWPTester
|
116
|
-
Autonomy
|
117
|
-
axios\/
|
118
|
-
AWS Security Scanner
|
119
|
-
B-l-i-t-z-B-O-T
|
120
|
-
Backlink-Ceck
|
121
|
-
backlink-check
|
122
|
-
BacklinkHttpStatus
|
123
|
-
BackStreet
|
124
|
-
BackWeb
|
125
|
-
Bad-Neighborhood
|
126
|
-
Badass
|
127
|
-
baidu\.com
|
128
|
-
Bandit
|
129
|
-
basicstate
|
130
|
-
BatchFTP
|
131
|
-
Battlezta Bazinga
|
132
|
-
baypup\/
|
133
|
-
BazQux
|
134
|
-
BBBike
|
135
|
-
BCKLINKS
|
136
|
-
BDFetch
|
137
|
-
BegunAdvertising
|
138
|
-
Bewica-security-scan
|
139
|
-
Bidtellect
|
140
|
-
BigBozz
|
141
|
-
Bigfoot
|
142
|
-
biglotron
|
143
|
-
BingLocalSearch
|
144
|
-
BingPreview
|
145
|
-
binlar
|
146
|
-
biNu image cacher
|
147
|
-
Bitacle
|
148
|
-
biz_Directory
|
149
|
-
Black Hole
|
150
|
-
Blackboard Safeassign
|
151
|
-
BlackWidow
|
152
|
-
BlockNote\.Net
|
153
|
-
BlogBridge
|
154
|
-
Bloglines
|
155
|
-
Bloglovin
|
156
|
-
BlogPulseLive
|
157
|
-
BlogSearch
|
158
|
-
Blogtrottr
|
159
|
-
BlowFish
|
160
|
-
boitho\.com-dc
|
161
|
-
Boost\.Beast
|
162
|
-
BPImageWalker
|
163
|
-
Braintree-Webhooks
|
164
|
-
Branch Metrics API
|
165
|
-
Branch-Passthrough
|
166
|
-
Brandprotect
|
167
|
-
BrandVerity
|
168
|
-
Brandwatch
|
169
|
-
Brodie\/
|
170
|
-
Browsershots
|
171
|
-
BUbiNG
|
172
|
-
Buck\/
|
173
|
-
Buddy
|
174
|
-
BuiltWith
|
175
|
-
Bullseye
|
176
|
-
BunnySlippers
|
177
|
-
Burf Search
|
178
|
-
Butterfly\/
|
179
|
-
BuzzSumo
|
180
|
-
CAAM\/[0-9]
|
181
|
-
CakePHP
|
182
|
-
Calculon
|
183
|
-
Canary%20Mail
|
184
|
-
CaretNail
|
185
|
-
catexplorador
|
186
|
-
CC Metadata Scaper
|
187
|
-
Cegbfeieh
|
188
|
-
censys
|
189
|
-
Cerberian Drtrs
|
190
|
-
CERT\.at-Statistics-Survey
|
191
|
-
cg-eye
|
192
|
-
changedetection
|
193
|
-
ChangesMeter
|
194
|
-
Charlotte
|
195
|
-
CheckHost
|
196
|
-
checkprivacy
|
197
|
-
CherryPicker
|
198
|
-
ChinaClaw
|
199
|
-
Chirp\/
|
200
|
-
chkme\.com
|
201
|
-
Chlooe
|
202
|
-
Chromaxa
|
203
|
-
CirrusExplorer
|
204
|
-
CISPA Vulnerability Notification
|
205
|
-
Citoid
|
206
|
-
CJNetworkQuality
|
207
|
-
Clarsentia
|
208
|
-
clips\.ua\.ac\.be
|
209
|
-
Cloud mapping
|
210
|
-
CloudEndure
|
211
|
-
CloudFlare-AlwaysOnline
|
212
|
-
Cloudflare-Healthchecks
|
213
|
-
Cloudinary
|
214
|
-
cmcm\.com
|
215
|
-
coccoc
|
216
|
-
cognitiveseo
|
217
|
-
colly -
|
218
|
-
CommaFeed
|
219
|
-
Commons-HttpClient
|
220
|
-
commonscan
|
221
|
-
contactbigdatafr
|
222
|
-
contentkingapp
|
223
|
-
convera
|
224
|
-
CookieReports
|
225
|
-
copyright sheriff
|
226
|
-
CopyRightCheck
|
227
|
-
Copyscape
|
228
|
-
cortex\/
|
229
|
-
Cosmos4j\.feedback
|
230
|
-
Covario-IDS
|
231
|
-
Craw\/
|
232
|
-
Crescent
|
233
|
-
Crowsnest
|
234
|
-
Criteo
|
235
|
-
CSHttp
|
236
|
-
CSSCheck
|
237
|
-
curb
|
238
|
-
Curious George
|
239
|
-
curl
|
240
|
-
cuwhois\/
|
241
|
-
cybo\.com
|
242
|
-
DAP\/NetHTTP
|
243
|
-
DareBoost
|
244
|
-
DatabaseDriverMysqli
|
245
|
-
DataCha0s
|
246
|
-
Datafeedwatch
|
247
|
-
Datanyze
|
248
|
-
DataparkSearch
|
249
|
-
dataprovider
|
250
|
-
DataXu
|
251
|
-
Daum(oa)?[ \/][0-9]
|
252
|
-
dBpoweramp
|
253
|
-
ddline
|
254
|
-
deeris
|
255
|
-
delve\.ai
|
256
|
-
Demon
|
257
|
-
DeuSu
|
258
|
-
developers\.google\.com\/\+\/web\/snippet\/
|
259
|
-
Devil
|
260
|
-
Digg
|
261
|
-
Digincore
|
262
|
-
DigitalPebble
|
263
|
-
Dirbuster
|
264
|
-
Discourse Forum Onebox
|
265
|
-
Disqus\/
|
266
|
-
Dispatch\/
|
267
|
-
DittoSpyder
|
268
|
-
dlvr
|
269
|
-
DMBrowser
|
270
|
-
DNSPod-reporting
|
271
|
-
docoloc
|
272
|
-
Dolphin http client
|
273
|
-
DomainAppender
|
274
|
-
DomainLabz
|
275
|
-
Donuts Content Explorer
|
276
|
-
dotMailer content retrieval
|
277
|
-
dotSemantic
|
278
|
-
downforeveryoneorjustme
|
279
|
-
Download Wonder
|
280
|
-
downnotifier
|
281
|
-
DowntimeDetector
|
282
|
-
Drip
|
283
|
-
drupact
|
284
|
-
Drupal \(\+http:\/\/drupal\.org\/\)
|
285
|
-
DTS Agent
|
286
|
-
dubaiindex
|
287
|
-
DuplexWeb-Google
|
288
|
-
DynatraceSynthetic
|
289
|
-
EARTHCOM
|
290
|
-
Easy-Thumb
|
291
|
-
EasyDL
|
292
|
-
Ebingbong
|
293
|
-
ec2linkfinder
|
294
|
-
eCairn-Grabber
|
295
|
-
eCatch
|
296
|
-
ECCP
|
297
|
-
eContext\/
|
298
|
-
Ecxi
|
299
|
-
EirGrabber
|
300
|
-
ElectricMonk
|
301
|
-
elefent
|
302
|
-
EMail Exractor
|
303
|
-
EMail Wolf
|
304
|
-
EmailWolf
|
305
|
-
Embarcadero
|
306
|
-
Embed PHP Library
|
307
|
-
Embedly
|
308
|
-
endo\/
|
309
|
-
europarchive\.org
|
310
|
-
evc-batch
|
311
|
-
EventMachine HttpClient
|
312
|
-
Everwall Link Expander
|
313
|
-
Evidon
|
314
|
-
Evrinid
|
315
|
-
ExactSearch
|
316
|
-
ExaleadCloudview
|
317
|
-
Excel\/
|
318
|
-
exif
|
319
|
-
ExoRank
|
320
|
-
Exploratodo
|
321
|
-
Express WebPictures
|
322
|
-
Extreme Picture Finder
|
323
|
-
EyeNetIE
|
324
|
-
ezooms
|
325
|
-
facebookexternalhit
|
326
|
-
facebookexternalua
|
327
|
-
facebookplatform
|
328
|
-
fairshare
|
329
|
-
Faraday v
|
330
|
-
fasthttp
|
331
|
-
Faveeo
|
332
|
-
Favicon downloader
|
333
|
-
faviconkit
|
334
|
-
faviconarchive
|
335
|
-
FavOrg
|
336
|
-
Feed Wrangler
|
337
|
-
Feedable\/
|
338
|
-
Feedbin
|
339
|
-
FeedBooster
|
340
|
-
FeedBucket
|
341
|
-
FeedBunch\/
|
342
|
-
FeedBurner
|
343
|
-
feeder
|
344
|
-
Feedly
|
345
|
-
FeedshowOnline
|
346
|
-
Feedspot
|
347
|
-
Feedwind\/
|
348
|
-
FeedZcollector
|
349
|
-
feeltiptop
|
350
|
-
Fetch API
|
351
|
-
Fetch\/[0-9]
|
352
|
-
Fever\/[0-9]
|
353
|
-
FHscan
|
354
|
-
Filestack
|
355
|
-
Fimap
|
356
|
-
findlink
|
357
|
-
findthatfile
|
358
|
-
FlashGet
|
359
|
-
FlipboardBrowserProxy
|
360
|
-
FlipboardProxy
|
361
|
-
FlipboardRSS
|
362
|
-
Flock\/
|
363
|
-
fluffy
|
364
|
-
Flunky
|
365
|
-
flynxapp
|
366
|
-
forensiq
|
367
|
-
FoundSeoTool
|
368
|
-
http:\/\/www.neomo.de\/
|
369
|
-
free thumbnails
|
370
|
-
Freeuploader
|
371
|
-
Funnelback
|
372
|
-
Fuzz Faster U Fool
|
373
|
-
G-i-g-a-b-o-t
|
374
|
-
g00g1e\.net
|
375
|
-
ganarvisitas
|
376
|
-
geek-tools
|
377
|
-
Genieo
|
378
|
-
GentleSource
|
379
|
-
GetCode
|
380
|
-
Getintent
|
381
|
-
GetLinkInfo
|
382
|
-
getprismatic
|
383
|
-
GetRight
|
384
|
-
getroot
|
385
|
-
GetURLInfo\/
|
386
|
-
GetWeb
|
387
|
-
Geziyor
|
388
|
-
Ghost Inspector
|
389
|
-
GigablastOpenSource
|
390
|
-
GIS-LABS
|
391
|
-
github-camo
|
392
|
-
github\.com
|
393
|
-
Goldfire Server
|
394
|
-
Go [\d\.]* package http
|
395
|
-
Go http package
|
396
|
-
Go-Ahead-Got-It
|
397
|
-
Go-http-client
|
398
|
-
Go!Zilla
|
399
|
-
gobyus
|
400
|
-
gofetch
|
401
|
-
GomezAgent
|
402
|
-
gooblog
|
403
|
-
Goodzer\/
|
404
|
-
Google AppsViewer
|
405
|
-
Google Desktop
|
406
|
-
Google favicon
|
407
|
-
Google Keyword Suggestion
|
408
|
-
Google Keyword Tool
|
409
|
-
Google Page Speed Insights
|
410
|
-
Google PP Default
|
411
|
-
Google Search Console
|
412
|
-
Google Web Preview
|
413
|
-
Google-Ads-Overview
|
414
|
-
Google-Adwords
|
415
|
-
Google-Apps-Script
|
416
|
-
Google-Calendar-Importer
|
417
|
-
Google-HotelAdsVerifier
|
418
|
-
Google-HTTP-Java-Client
|
419
|
-
Google-Publisher-Plugin
|
420
|
-
Google-Read-Aloud
|
421
|
-
Google-SearchByImage
|
422
|
-
Google-Site-Verification
|
423
|
-
Google-speakr
|
424
|
-
Google-Structured-Data-Testing-Tool
|
425
|
-
Google-Youtube-Links
|
426
|
-
google-xrawler
|
427
|
-
GoogleDocs
|
428
|
-
GoogleHC\/
|
429
|
-
GoogleProducer
|
430
|
-
GoogleSites
|
431
|
-
Google-Transparency-Report
|
432
|
-
Gookey
|
433
|
-
GoSpotCheck
|
434
|
-
gosquared-thumbnailer
|
435
|
-
Gotit
|
436
|
-
GoZilla
|
437
|
-
grabify
|
438
|
-
GrabNet
|
439
|
-
Grafula
|
440
|
-
Grammarly
|
441
|
-
GrapeFX
|
442
|
-
GreatNews
|
443
|
-
Gregarius
|
444
|
-
GRequests
|
445
|
-
grokkit
|
446
|
-
grouphigh
|
447
|
-
grub-client
|
448
|
-
gSOAP\/
|
449
|
-
GT::WWW
|
450
|
-
GTmetrix
|
451
|
-
GuzzleHttp
|
452
|
-
gvfs\/
|
453
|
-
HAA(A)?RTLAND http client
|
454
|
-
Haansoft
|
455
|
-
hackney\/
|
456
|
-
Hadi Agent
|
457
|
-
HappyApps-WebCheck
|
458
|
-
Hatena
|
459
|
-
Havij
|
460
|
-
HaxerMen
|
461
|
-
HeadlessChrome
|
462
|
-
HEADMasterSEO
|
463
|
-
HeartRails_Capture
|
464
|
-
help@dataminr\.com
|
465
|
-
heritrix
|
466
|
-
Hexometer
|
467
|
-
historious
|
468
|
-
hkedcity
|
469
|
-
hledejLevne\.cz
|
470
|
-
Hloader
|
471
|
-
HMView
|
472
|
-
Holmes
|
473
|
-
HonesoSearchEngine
|
474
|
-
HootSuite Image proxy
|
475
|
-
Hootsuite-WebFeed
|
476
|
-
hosterstats
|
477
|
-
HostTracker
|
478
|
-
ht:\/\/check
|
479
|
-
htdig
|
480
|
-
HTMLparser
|
481
|
-
htmlyse
|
482
|
-
HTTP Banner Detection
|
483
|
-
HTTP_Compression_Test
|
484
|
-
http_request2
|
485
|
-
http_requester
|
486
|
-
http-get
|
487
|
-
HTTP-Header-Abfrage
|
488
|
-
http-kit
|
489
|
-
http-request\/
|
490
|
-
HTTP-Tiny
|
491
|
-
HTTP::Lite
|
492
|
-
http\.rb\/
|
493
|
-
http_get
|
494
|
-
HttpComponents
|
495
|
-
httphr
|
496
|
-
HTTPMon
|
497
|
-
HTTPie
|
498
|
-
httpRequest
|
499
|
-
httpscheck
|
500
|
-
httpssites_power
|
501
|
-
httpunit
|
502
|
-
HttpUrlConnection
|
503
|
-
httrack
|
504
|
-
huaweisymantec
|
505
|
-
HubSpot
|
506
|
-
Humanlinks
|
507
|
-
i2kconnect\/
|
508
|
-
Iblog
|
509
|
-
ichiro
|
510
|
-
Id-search
|
511
|
-
IdeelaborPlagiaat
|
512
|
-
IDG Twitter Links Resolver
|
513
|
-
IDwhois\/
|
514
|
-
Iframely
|
515
|
-
igdeSpyder
|
516
|
-
IlTrovatore
|
517
|
-
Image Fetch
|
518
|
-
Image Sucker
|
519
|
-
ImageEngine\/
|
520
|
-
ImageVisu\/
|
521
|
-
Imagga
|
522
|
-
imagineeasy
|
523
|
-
imgsizer
|
524
|
-
InAGist
|
525
|
-
inbound\.li parser
|
526
|
-
InDesign%20CC
|
527
|
-
Indy Library
|
528
|
-
InetURL
|
529
|
-
infegy
|
530
|
-
infohelfer
|
531
|
-
InfoTekies
|
532
|
-
InfoWizards Reciprocal Link
|
533
|
-
inpwrd\.com
|
534
|
-
instabid
|
535
|
-
Instapaper
|
536
|
-
Integrity
|
537
|
-
integromedb
|
538
|
-
Intelliseek
|
539
|
-
InterGET
|
540
|
-
internet_archive
|
541
|
-
Internet Ninja
|
542
|
-
InternetSeer
|
543
|
-
internetVista monitor
|
544
|
-
internetwache
|
545
|
-
intraVnews
|
546
|
-
IODC
|
547
|
-
IOI
|
548
|
-
iplabel
|
549
|
-
ips-agent
|
550
|
-
IPS\/[0-9]
|
551
|
-
IPWorks HTTP\/S Component
|
552
|
-
iqdb\/
|
553
|
-
Iria
|
554
|
-
Irokez
|
555
|
-
isitup\.org
|
556
|
-
iskanie
|
557
|
-
isUp\.li
|
558
|
-
iThemes Sync\/
|
559
|
-
IZaBEE
|
560
|
-
iZSearch
|
561
|
-
JAHHO
|
562
|
-
janforman
|
563
|
-
Jaunt\/
|
564
|
-
Jbrofuzz
|
565
|
-
Jersey\/
|
566
|
-
JetCar
|
567
|
-
Jigsaw
|
568
|
-
Jobboerse
|
569
|
-
JobFeed discovery
|
570
|
-
Jobg8 URL Monitor
|
571
|
-
jobo
|
572
|
-
Jobrapido
|
573
|
-
Jobsearch1\.5
|
574
|
-
JoinVision Generic
|
575
|
-
JolokiaPwn
|
576
|
-
Joomla
|
577
|
-
Jorgee
|
578
|
-
JS-Kit
|
579
|
-
JustView
|
580
|
-
Kaspersky Lab CFR link resolver
|
581
|
-
Kelny\/
|
582
|
-
Kerrigan\/
|
583
|
-
KeyCDN
|
584
|
-
Keyword Density
|
585
|
-
Keywords Research
|
586
|
-
khttp\/
|
587
|
-
KickFire
|
588
|
-
KimonoLabs\/
|
589
|
-
Kml-Google
|
590
|
-
knows\.is
|
591
|
-
KOCMOHABT
|
592
|
-
kouio
|
593
|
-
kubectl
|
594
|
-
kube-probe
|
595
|
-
kulturarw3
|
596
|
-
KumKie
|
597
|
-
L\.webis
|
598
|
-
Larbin
|
599
|
-
Lavf\/
|
600
|
-
LeechFTP
|
601
|
-
LeechGet
|
602
|
-
letsencrypt
|
603
|
-
Lftp
|
604
|
-
LibVLC
|
605
|
-
LibWeb
|
606
|
-
Libwhisker
|
607
|
-
libwww
|
608
|
-
Licorne
|
609
|
-
Liferea\/
|
610
|
-
Lightspeedsystems
|
611
|
-
Lighthouse
|
612
|
-
Likse
|
613
|
-
limber\.io
|
614
|
-
Link Valet
|
615
|
-
link_thumbnailer
|
616
|
-
LinkAlarm\/
|
617
|
-
linkCheck
|
618
|
-
linkdex
|
619
|
-
LinkExaminer
|
620
|
-
linkfluence
|
621
|
-
linkpeek
|
622
|
-
LinkPreviewGenerator
|
623
|
-
LinkScan
|
624
|
-
LinksManager
|
625
|
-
LinkTiger
|
626
|
-
LinkWalker
|
627
|
-
Lipperhey
|
628
|
-
Litemage_walker
|
629
|
-
livedoor ScreenShot
|
630
|
-
LoadImpactRload
|
631
|
-
localsearch-web
|
632
|
-
LongURL API
|
633
|
-
longurl-r-package
|
634
|
-
looid\.com
|
635
|
-
looksystems\.net
|
636
|
-
ltx71
|
637
|
-
lua-resty-http
|
638
|
-
lwp-request
|
639
|
-
lwp-trivial
|
640
|
-
LWP::Simple
|
641
|
-
lycos
|
642
|
-
LYT\.SR
|
643
|
-
mabontland
|
644
|
-
Mag-Net
|
645
|
-
MagpieRSS
|
646
|
-
Mail\.Ru
|
647
|
-
MailChimp
|
648
|
-
Majestic12
|
649
|
-
makecontact\/
|
650
|
-
Mandrill
|
651
|
-
MapperCmd
|
652
|
-
marketinggrader
|
653
|
-
MarkMonitor
|
654
|
-
MarkWatch
|
655
|
-
Mass Downloader
|
656
|
-
masscan\/
|
657
|
-
Mata Hari
|
658
|
-
Mediametric
|
659
|
-
Mediapartners-Google
|
660
|
-
mediawords
|
661
|
-
MegaIndex\.ru
|
662
|
-
MeltwaterNews
|
663
|
-
Melvil Rawi
|
664
|
-
MemGator
|
665
|
-
Metaspinner
|
666
|
-
MetaURI
|
667
|
-
MFC_Tear_Sample
|
668
|
-
MicroMessenger\/
|
669
|
-
Microsearch
|
670
|
-
Microsoft Office
|
671
|
-
Microsoft Outlook
|
672
|
-
Microsoft Windows Network Diagnostics
|
673
|
-
Microsoft-WebDAV-MiniRedir
|
674
|
-
Microsoft Data Access
|
675
|
-
MIDown tool
|
676
|
-
MIIxpc
|
677
|
-
Mindjet
|
678
|
-
Miniature\.io
|
679
|
-
Miniflux
|
680
|
-
Mister PiX
|
681
|
-
mixdata dot com
|
682
|
-
mixed-content-scan
|
683
|
-
Mixmax-LinkPreview
|
684
|
-
mixnode
|
685
|
-
Mnogosearch
|
686
|
-
mogimogi
|
687
|
-
Mojeek
|
688
|
-
Mojolicious \(Perl\)
|
689
|
-
Monit\/
|
690
|
-
monitis
|
691
|
-
Monitority\/
|
692
|
-
montastic
|
693
|
-
MonTools
|
694
|
-
Moreover
|
695
|
-
Morfeus Fucking Scanner
|
696
|
-
Morning Paper
|
697
|
-
MovableType
|
698
|
-
mowser
|
699
|
-
Mr\.4x3 Powered
|
700
|
-
Mrcgiguy
|
701
|
-
MS Web Services Client Protocol
|
702
|
-
MSFrontPage
|
703
|
-
mShots
|
704
|
-
MuckRack\/
|
705
|
-
muhstik-scan
|
706
|
-
MVAClient
|
707
|
-
MxToolbox\/
|
708
|
-
nagios
|
709
|
-
Najdi\.si
|
710
|
-
Name Intelligence
|
711
|
-
Nameprotect
|
712
|
-
Navroad
|
713
|
-
NearSite
|
714
|
-
Needle
|
715
|
-
Nessus
|
716
|
-
Net Vampire
|
717
|
-
NetAnts
|
718
|
-
NETCRAFT
|
719
|
-
NetLyzer
|
720
|
-
NetMechanic
|
721
|
-
NetNewsWire
|
722
|
-
Netpursual
|
723
|
-
netresearch
|
724
|
-
NetShelter ContentScan
|
725
|
-
Netsparker
|
726
|
-
NetTrack
|
727
|
-
Netvibes
|
728
|
-
NetZIP
|
729
|
-
Neustar WPM
|
730
|
-
NeutrinoAPI
|
731
|
-
NewRelicPinger
|
732
|
-
NewsBlur .*Finder
|
733
|
-
NewsGator
|
734
|
-
newsme
|
735
|
-
newspaper\/
|
736
|
-
NetSystemsResearch
|
737
|
-
Nexgate Ruby Client
|
738
|
-
NG-Search
|
739
|
-
Nibbler
|
740
|
-
NICErsPRO
|
741
|
-
Nikto
|
742
|
-
nineconnections
|
743
|
-
NLNZ_IAHarvester
|
744
|
-
Nmap Scripting Engine
|
745
|
-
node-superagent
|
746
|
-
node-urllib
|
747
|
-
node\.io
|
748
|
-
Nodemeter
|
749
|
-
NodePing
|
750
|
-
nominet\.org\.uk
|
751
|
-
nominet\.uk
|
752
|
-
Norton-Safeweb
|
753
|
-
Notifixious
|
754
|
-
notifyninja
|
755
|
-
NotionEmbedder
|
756
|
-
nuhk
|
757
|
-
nutch
|
758
|
-
Nuzzel
|
759
|
-
nWormFeedFinder
|
760
|
-
nyawc\/
|
761
|
-
Nymesis
|
762
|
-
NYU
|
763
|
-
Ocelli\/
|
764
|
-
Octopus
|
765
|
-
oegp
|
766
|
-
Offline Explorer
|
767
|
-
Offline Navigator
|
768
|
-
OgScrper
|
769
|
-
okhttp
|
770
|
-
omgili
|
771
|
-
OMSC
|
772
|
-
Online Domain Tools
|
773
|
-
OpenCalaisSemanticProxy
|
774
|
-
Openfind
|
775
|
-
OpenLinkProfiler
|
776
|
-
Openstat\/
|
777
|
-
OpenVAS
|
778
|
-
OPPO A33
|
779
|
-
Optimizer
|
780
|
-
Orbiter
|
781
|
-
OrgProbe\/
|
782
|
-
orion-semantics
|
783
|
-
Outlook-Express
|
784
|
-
Outlook-iOS
|
785
|
-
ow\.ly
|
786
|
-
Owler
|
787
|
-
ownCloud News
|
788
|
-
OxfordCloudService
|
789
|
-
Page Valet
|
790
|
-
page_verifier
|
791
|
-
page scorer
|
792
|
-
page2rss
|
793
|
-
PageFreezer
|
794
|
-
PageGrabber
|
795
|
-
PagePeeker
|
796
|
-
PageScorer
|
797
|
-
Pagespeed\/
|
798
|
-
Panopta
|
799
|
-
panscient
|
800
|
-
Papa Foto
|
801
|
-
parsijoo
|
802
|
-
Pavuk
|
803
|
-
PayPal IPN
|
804
|
-
pcBrowser
|
805
|
-
Pcore-HTTP
|
806
|
-
Pearltrees
|
807
|
-
PECL::HTTP
|
808
|
-
peerindex
|
809
|
-
Peew
|
810
|
-
PeoplePal
|
811
|
-
Perlu -
|
812
|
-
PhantomJS Screenshoter
|
813
|
-
PhantomJS\/
|
814
|
-
Photon\/
|
815
|
-
phpservermon
|
816
|
-
Pi-Monster
|
817
|
-
Picscout
|
818
|
-
Picsearch
|
819
|
-
PictureFinder
|
820
|
-
Pimonster
|
821
|
-
ping\.blo\.gs
|
822
|
-
Pingability
|
823
|
-
PingAdmin\.Ru
|
824
|
-
Pingdom
|
825
|
-
Pingoscope
|
826
|
-
PingSpot
|
827
|
-
pinterest\.com
|
828
|
-
Pixray
|
829
|
-
Pizilla
|
830
|
-
Plagger\/
|
831
|
-
Ploetz \+ Zeller
|
832
|
-
Plukkie
|
833
|
-
plumanalytics
|
834
|
-
PocketImageCache
|
835
|
-
PocketParser
|
836
|
-
Pockey
|
837
|
-
POE-Component-Client-HTTP
|
838
|
-
Polymail\/
|
839
|
-
Pompos
|
840
|
-
Porkbun
|
841
|
-
Port Monitor
|
842
|
-
postano
|
843
|
-
PostmanRuntime
|
844
|
-
PostPost
|
845
|
-
postrank
|
846
|
-
PowerPoint\/
|
847
|
-
Prebid
|
848
|
-
Priceonomics Analysis Engine
|
849
|
-
PrintFriendly
|
850
|
-
PritTorrent
|
851
|
-
Prlog
|
852
|
-
probethenet
|
853
|
-
Project 25499
|
854
|
-
prospectb2b
|
855
|
-
Protopage
|
856
|
-
ProWebWalker
|
857
|
-
proximic
|
858
|
-
PRTG Network Monitor
|
859
|
-
pshtt, https scanning
|
860
|
-
PTST
|
861
|
-
PTST\/[0-9]+
|
862
|
-
Pump
|
863
|
-
python-httpx
|
864
|
-
Python-httplib2
|
865
|
-
python-requests
|
866
|
-
Python-urllib
|
867
|
-
Qirina Hurdler
|
868
|
-
QQDownload
|
869
|
-
QrafterPro
|
870
|
-
Qseero
|
871
|
-
Qualidator
|
872
|
-
QueryN Metasearch
|
873
|
-
queuedriver
|
874
|
-
Quora Link Preview
|
875
|
-
Qwantify
|
876
|
-
Radian6
|
877
|
-
RankActive
|
878
|
-
RankFlex
|
879
|
-
RankSonicSiteAuditor
|
880
|
-
Re-re Studio
|
881
|
-
ReactorNetty
|
882
|
-
Readability
|
883
|
-
RealDownload
|
884
|
-
RealPlayer%20Downloader
|
885
|
-
RebelMouse
|
886
|
-
Recorder
|
887
|
-
RecurPost\/
|
888
|
-
redback\/
|
889
|
-
ReederForMac
|
890
|
-
Reeder\/
|
891
|
-
ReGet
|
892
|
-
RepoMonkey
|
893
|
-
request\.js
|
894
|
-
reqwest\/
|
895
|
-
ResponseCodeTest
|
896
|
-
RestSharp
|
897
|
-
Riddler
|
898
|
-
Rival IQ
|
899
|
-
Robosourcer
|
900
|
-
Robozilla
|
901
|
-
ROI Hunter
|
902
|
-
RPT-HTTPClient
|
903
|
-
RSSOwl
|
904
|
-
RyowlEngine
|
905
|
-
safe-agent-scanner
|
906
|
-
SalesIntelligent
|
907
|
-
Saleslift
|
908
|
-
Sendsay\.Ru
|
909
|
-
SauceNAO
|
910
|
-
SBIder
|
911
|
-
sc-downloader
|
912
|
-
scalaj-http
|
913
|
-
Scamadviser-Frontend
|
914
|
-
scan\.lol
|
915
|
-
ScanAlert
|
916
|
-
Scoop
|
917
|
-
scooter
|
918
|
-
ScoutJet
|
919
|
-
ScoutURLMonitor
|
920
|
-
ScrapeBox Page Scanner
|
921
|
-
Scrapy
|
922
|
-
Screaming
|
923
|
-
ScreenShotService
|
924
|
-
Scrubby
|
925
|
-
Scrutiny\/
|
926
|
-
search\.thunderstone
|
927
|
-
Search37
|
928
|
-
searchenginepromotionhelp
|
929
|
-
Searchestate
|
930
|
-
SearchExpress
|
931
|
-
SearchSight
|
932
|
-
Seeker
|
933
|
-
semanticdiscovery
|
934
|
-
semanticjuice
|
935
|
-
Semiocast HTTP client
|
936
|
-
Semrush
|
937
|
-
sentry\/
|
938
|
-
SEO Browser
|
939
|
-
Seo Servis
|
940
|
-
seo-nastroj\.cz
|
941
|
-
seo4ajax
|
942
|
-
Seobility
|
943
|
-
SEOCentro
|
944
|
-
SeoCheck
|
945
|
-
SEOkicks
|
946
|
-
SEOlizer
|
947
|
-
Seomoz
|
948
|
-
SEOprofiler
|
949
|
-
SEOsearch
|
950
|
-
seoscanners
|
951
|
-
seositecheckup
|
952
|
-
SEOstats
|
953
|
-
servernfo
|
954
|
-
sexsearcher
|
955
|
-
Seznam
|
956
|
-
Shelob
|
957
|
-
Shodan
|
958
|
-
Shoppimon
|
959
|
-
ShopWiki
|
960
|
-
shortURL lengthener
|
961
|
-
ShortLinkTranslate
|
962
|
-
shrinktheweb
|
963
|
-
Sideqik
|
964
|
-
SimplePie
|
965
|
-
SimplyFast
|
966
|
-
Siphon
|
967
|
-
SISTRIX
|
968
|
-
Site-Shot\/
|
969
|
-
Site Sucker
|
970
|
-
Site24x7
|
971
|
-
SiteBar
|
972
|
-
Sitebeam
|
973
|
-
Sitebulb\/
|
974
|
-
SiteCondor
|
975
|
-
SiteExplorer
|
976
|
-
SiteGuardian
|
977
|
-
Siteimprove
|
978
|
-
SiteIndexed
|
979
|
-
Sitemap(s)? Generator
|
980
|
-
SitemapGenerator
|
981
|
-
SiteMonitor
|
982
|
-
Siteshooter B0t
|
983
|
-
SiteSnagger
|
984
|
-
SiteSucker
|
985
|
-
SiteTruth
|
986
|
-
Sitevigil
|
987
|
-
sitexy\.com
|
988
|
-
SkypeUriPreview
|
989
|
-
Slack\/
|
990
|
-
slider\.com
|
991
|
-
slurp
|
992
|
-
SlySearch
|
993
|
-
SmartDownload
|
994
|
-
SMRF URL Expander
|
995
|
-
SMUrlExpander
|
996
|
-
Snake
|
997
|
-
Snappy
|
998
|
-
SnapSearch
|
999
|
-
Snarfer\/
|
1000
|
-
SniffRSS
|
1001
|
-
sniptracker
|
1002
|
-
Snoopy
|
1003
|
-
SnowHaze Search
|
1004
|
-
sogou web
|
1005
|
-
SortSite
|
1006
|
-
Sottopop
|
1007
|
-
sovereign\.ai
|
1008
|
-
SpaceBison
|
1009
|
-
SpamExperts
|
1010
|
-
Spammen
|
1011
|
-
Spanner
|
1012
|
-
spaziodati
|
1013
|
-
SPDYCheck
|
1014
|
-
Specificfeeds
|
1015
|
-
speedy
|
1016
|
-
SPEng
|
1017
|
-
Spinn3r
|
1018
|
-
spray-can
|
1019
|
-
Sprinklr
|
1020
|
-
spyonweb
|
1021
|
-
sqlmap
|
1022
|
-
Sqlworm
|
1023
|
-
Sqworm
|
1024
|
-
SSL Labs
|
1025
|
-
ssl-tools
|
1026
|
-
StackRambler
|
1027
|
-
Statastico\/
|
1028
|
-
StatusCake
|
1029
|
-
Steeler
|
1030
|
-
Stratagems Kumo
|
1031
|
-
Stroke\.cz
|
1032
|
-
StudioFACA
|
1033
|
-
StumbleUpon
|
1034
|
-
suchen
|
1035
|
-
Sucuri
|
1036
|
-
summify
|
1037
|
-
SuperHTTP
|
1038
|
-
Surphace Scout
|
1039
|
-
Suzuran
|
1040
|
-
Symfony BrowserKit
|
1041
|
-
Symfony2 BrowserKit
|
1042
|
-
SynHttpClient-Built
|
1043
|
-
Sysomos
|
1044
|
-
sysscan
|
1045
|
-
Szukacz
|
1046
|
-
T0PHackTeam
|
1047
|
-
tAkeOut
|
1048
|
-
Tarantula\/
|
1049
|
-
Taringa UGC
|
1050
|
-
TarmotGezgin
|
1051
|
-
Teleport
|
1052
|
-
Telesoft
|
1053
|
-
Telesphoreo
|
1054
|
-
Telesphorep
|
1055
|
-
Tenon\.io
|
1056
|
-
teoma
|
1057
|
-
terrainformatica
|
1058
|
-
Test Certificate Info
|
1059
|
-
testuri
|
1060
|
-
Tetrahedron
|
1061
|
-
TextRazor Downloader
|
1062
|
-
The Drop Reaper
|
1063
|
-
The Expert HTML Source Viewer
|
1064
|
-
The Knowledge AI
|
1065
|
-
The Intraformant
|
1066
|
-
theinternetrules
|
1067
|
-
TheNomad
|
1068
|
-
Thinklab
|
1069
|
-
Thumbshots
|
1070
|
-
ThumbSniper
|
1071
|
-
Thumbor
|
1072
|
-
timewe\.net
|
1073
|
-
TinEye
|
1074
|
-
Tiny Tiny RSS
|
1075
|
-
TLSProbe\/
|
1076
|
-
Toata
|
1077
|
-
topster
|
1078
|
-
touche\.com
|
1079
|
-
Traackr\.com
|
1080
|
-
tracemyfile
|
1081
|
-
Trackuity
|
1082
|
-
TrapitAgent
|
1083
|
-
Trendiction
|
1084
|
-
Trendsmap
|
1085
|
-
trendspottr
|
1086
|
-
truwoGPS
|
1087
|
-
TryJsoup
|
1088
|
-
TulipChain
|
1089
|
-
Turingos
|
1090
|
-
Turnitin
|
1091
|
-
tweetedtimes
|
1092
|
-
Tweetminster
|
1093
|
-
Tweezler\/
|
1094
|
-
twibble
|
1095
|
-
Twice
|
1096
|
-
Twikle
|
1097
|
-
Twingly
|
1098
|
-
Twisted PageGetter
|
1099
|
-
Typhoeus
|
1100
|
-
ubermetrics-technologies
|
1101
|
-
uclassify
|
1102
|
-
UdmSearch
|
1103
|
-
unchaos
|
1104
|
-
unirest-java
|
1105
|
-
UniversalFeedParser
|
1106
|
-
Unshorten\.It
|
1107
|
-
Untiny
|
1108
|
-
UnwindFetchor
|
1109
|
-
updated
|
1110
|
-
updown\.io daemon
|
1111
|
-
Upflow
|
1112
|
-
Uptimia
|
1113
|
-
Urlcheckr
|
1114
|
-
URL Verifier
|
1115
|
-
URLitor
|
1116
|
-
urlresolver
|
1117
|
-
Urlstat
|
1118
|
-
URLTester
|
1119
|
-
UrlTrends Ranking Updater
|
1120
|
-
URLy Warning
|
1121
|
-
URLy\.Warning
|
1122
|
-
Vacuum
|
1123
|
-
Vagabondo
|
1124
|
-
VB Project
|
1125
|
-
vBSEO
|
1126
|
-
VCI
|
1127
|
-
via ggpht\.com GoogleImageProxy
|
1128
|
-
Virusdie
|
1129
|
-
visionutils
|
1130
|
-
vkShare
|
1131
|
-
VoidEYE
|
1132
|
-
Voil
|
1133
|
-
voltron
|
1134
|
-
voyager\/
|
1135
|
-
VSAgent\/
|
1136
|
-
VSB-TUO\/
|
1137
|
-
Vulnbusters Meter
|
1138
|
-
VYU2
|
1139
|
-
w3af\.org
|
1140
|
-
W3C_Unicorn
|
1141
|
-
W3C-checklink
|
1142
|
-
W3C-mobileOK
|
1143
|
-
WAC-OFU
|
1144
|
-
Wallpapers\/[0-9]+
|
1145
|
-
WallpapersHD
|
1146
|
-
wangling
|
1147
|
-
Wappalyzer
|
1148
|
-
WatchMouse
|
1149
|
-
WbSrch\/
|
1150
|
-
WDT\.io
|
1151
|
-
web-capture\.net
|
1152
|
-
Web-sniffer
|
1153
|
-
Web Auto
|
1154
|
-
Web Collage
|
1155
|
-
Web Enhancer
|
1156
|
-
Web Fetch
|
1157
|
-
Web Fuck
|
1158
|
-
Web Pix
|
1159
|
-
Web Sauger
|
1160
|
-
Web spyder
|
1161
|
-
Web Sucker
|
1162
|
-
Webalta
|
1163
|
-
Webauskunft
|
1164
|
-
WebAuto
|
1165
|
-
WebCapture
|
1166
|
-
WebClient\/
|
1167
|
-
webcollage
|
1168
|
-
WebCookies
|
1169
|
-
WebCopier
|
1170
|
-
WebCorp
|
1171
|
-
WebDataStats
|
1172
|
-
WebDoc
|
1173
|
-
WebEnhancer
|
1174
|
-
WebFetch
|
1175
|
-
WebFuck
|
1176
|
-
WebGazer
|
1177
|
-
WebGo IS
|
1178
|
-
WebImageCollector
|
1179
|
-
WebImages
|
1180
|
-
WebIndex
|
1181
|
-
webkit2png
|
1182
|
-
WebLeacher
|
1183
|
-
webmastercoffee
|
1184
|
-
webmon\s
|
1185
|
-
WebPix
|
1186
|
-
WebReaper
|
1187
|
-
WebSauger
|
1188
|
-
webscreenie
|
1189
|
-
Webshag
|
1190
|
-
Webshot
|
1191
|
-
Website Quester
|
1192
|
-
websitepulse agent
|
1193
|
-
WebsiteQuester
|
1194
|
-
Websnapr
|
1195
|
-
WebSniffer
|
1196
|
-
Webster
|
1197
|
-
WebStripper
|
1198
|
-
WebSucker
|
1199
|
-
Webthumb\/
|
1200
|
-
WebThumbnail
|
1201
|
-
WebWhacker
|
1202
|
-
WebZIP
|
1203
|
-
WeLikeLinks
|
1204
|
-
WEPA
|
1205
|
-
WeSEE
|
1206
|
-
wf84
|
1207
|
-
Wfuzz\/
|
1208
|
-
wget
|
1209
|
-
WhatsApp
|
1210
|
-
WhatsMyIP
|
1211
|
-
WhatWeb
|
1212
|
-
WhereGoes\?
|
1213
|
-
Whibse
|
1214
|
-
WhoRunsCoinHive
|
1215
|
-
Whynder Magnet
|
1216
|
-
WinHttp-Autoproxy-Service
|
1217
|
-
Windows-RSS-Platform
|
1218
|
-
WinPodder
|
1219
|
-
wkhtmlto
|
1220
|
-
wmtips
|
1221
|
-
Woko
|
1222
|
-
Wolfram HTTPClient
|
1223
|
-
woorankreview
|
1224
|
-
Word\/
|
1225
|
-
WordPress\/
|
1226
|
-
worldping-api
|
1227
|
-
WordupinfoSearch
|
1228
|
-
wotbox
|
1229
|
-
WP Engine Install Performance API
|
1230
|
-
wpif
|
1231
|
-
wprecon\.com survey
|
1232
|
-
WPScan
|
1233
|
-
wscheck
|
1234
|
-
Wtrace
|
1235
|
-
WWW-Collector-E
|
1236
|
-
WWW-Mechanize
|
1237
|
-
WWW::Document
|
1238
|
-
WWW::Mechanize
|
1239
|
-
www\.monitor\.us
|
1240
|
-
WWWOFFLE
|
1241
|
-
x09Mozilla
|
1242
|
-
x22Mozilla
|
1243
|
-
XaxisSemanticsClassifier
|
1244
|
-
Xenu Link Sleuth
|
1245
|
-
XING-contenttabreceiver
|
1246
|
-
xpymep([0-9]?)\.exe
|
1247
|
-
Y!J-(ASR|BSC)
|
1248
|
-
Y\!J-BRW
|
1249
|
-
Yaanb
|
1250
|
-
yacy
|
1251
|
-
Yahoo Link Preview
|
1252
|
-
YahooCacheSystem
|
1253
|
-
YahooYSMcm
|
1254
|
-
YandeG
|
1255
|
-
Yandex(?!Search)
|
1256
|
-
yanga
|
1257
|
-
yeti
|
1258
|
-
Yo-yo
|
1259
|
-
Yoleo Consumer
|
1260
|
-
yoogliFetchAgent
|
1261
|
-
YottaaMonitor
|
1262
|
-
Your-Website-Sucks
|
1263
|
-
yourls\.org
|
1264
|
-
YoYs\.net
|
1265
|
-
YP\.PL
|
1266
|
-
Zabbix
|
1267
|
-
Zade
|
1268
|
-
Zao
|
1269
|
-
Zauba
|
1270
|
-
Zemanta Aggregator
|
1271
|
-
Zend_Http_Client
|
1272
|
-
Zend\\\Http\\\Client
|
1273
|
-
Zermelo
|
1274
|
-
Zeus
|
1275
|
-
zgrab
|
1276
|
-
ZnajdzFoto
|
1277
|
-
ZnHTTP
|
1278
|
-
Zombie\.js
|
1279
|
-
Zoom\.Mac
|
1280
|
-
ZyBorg
|
1281
|
-
[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)
|
1282
|
-
].strip.split(/\n+/).freeze
|
7
|
+
extend Loader
|
8
|
+
|
9
|
+
def self.data
|
10
|
+
@data ||= load_raw(CrawlerDetect.config.settings.raw_crawlers_path).freeze
|
11
|
+
end
|
1283
12
|
end
|
1284
13
|
end
|
1285
14
|
end
|