crawler_detect 0.1.9 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +13 -168
- data/.travis.yml +16 -6
- data/CHANGELOG.md +32 -0
- data/Gemfile.lock +119 -0
- data/README.md +13 -0
- data/bin/update_raw_files +10 -0
- data/crawler_detect.gemspec +7 -3
- data/lib/crawler_detect.rb +29 -7
- data/lib/crawler_detect/config.rb +29 -0
- data/lib/crawler_detect/detector.rb +27 -14
- data/lib/crawler_detect/library.rb +9 -3
- data/lib/crawler_detect/library/crawlers.rb +6 -1249
- data/lib/crawler_detect/library/exclusions.rb +6 -50
- data/lib/crawler_detect/library/headers.rb +6 -17
- data/lib/crawler_detect/library/loader.rb +18 -0
- data/lib/crawler_detect/library/raw/Crawlers.json +1 -0
- data/lib/crawler_detect/library/raw/Exclusions.json +1 -0
- data/lib/crawler_detect/library/raw/Headers.json +1 -0
- data/lib/crawler_detect/version.rb +2 -1
- data/lib/rack/crawler_detect.rb +22 -19
- metadata +55 -6
data/lib/crawler_detect.rb
CHANGED
@@ -1,24 +1,46 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "
|
4
|
-
require "
|
5
|
-
require "crawler_detect/library/crawlers"
|
6
|
-
require "crawler_detect/library/exclusions"
|
7
|
-
require "crawler_detect/library/headers"
|
8
|
-
require "crawler_detect/version"
|
3
|
+
require "oj"
|
4
|
+
require "qonfig"
|
9
5
|
|
10
|
-
|
6
|
+
require_relative "crawler_detect/config"
|
7
|
+
require_relative "crawler_detect/detector"
|
8
|
+
require_relative "crawler_detect/library/loader"
|
9
|
+
require_relative "crawler_detect/library/crawlers"
|
10
|
+
require_relative "crawler_detect/library/exclusions"
|
11
|
+
require_relative "crawler_detect/library/headers"
|
12
|
+
require_relative "crawler_detect/library"
|
13
|
+
require_relative "crawler_detect/version"
|
14
|
+
require_relative "rack/crawler_detect"
|
11
15
|
|
16
|
+
# @since 0.1.0
|
12
17
|
module CrawlerDetect
|
13
18
|
class << self
|
19
|
+
# @param user_agent [String] User-agent string to detect
|
20
|
+
# @return [CrawlerDetect::Detector] Instance of detector class
|
14
21
|
def new(user_agent)
|
15
22
|
detector(user_agent)
|
16
23
|
end
|
17
24
|
|
25
|
+
# @param user_agent [String] User-agent string to detect
|
26
|
+
# @return [true, false] Is User-agent a crawler?
|
18
27
|
def is_crawler?(user_agent)
|
19
28
|
detector(user_agent).is_crawler?
|
20
29
|
end
|
21
30
|
|
31
|
+
# @since 1.0.0
|
32
|
+
# @param config [Proc]
|
33
|
+
def setup!(&config)
|
34
|
+
@config = CrawlerDetect::Config.new(&config)
|
35
|
+
Library::DATA_CLASSES.each(&:reload_data)
|
36
|
+
end
|
37
|
+
|
38
|
+
# @since 1.0.0
|
39
|
+
# @return [CrawlerDetect::Config] Instance of configuration class
|
40
|
+
def config
|
41
|
+
@config ||= CrawlerDetect::Config.new
|
42
|
+
end
|
43
|
+
|
22
44
|
private
|
23
45
|
|
24
46
|
def detector(user_agent)
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CrawlerDetect
|
4
|
+
# Configuration of CrawlerDetect
|
5
|
+
#
|
6
|
+
# @see settings
|
7
|
+
# @since 1.0.0
|
8
|
+
class Config < ::Qonfig::DataSet
|
9
|
+
CUR_PATH = File.dirname(File.expand_path(__FILE__)).freeze
|
10
|
+
RAW_PATH = File.join(CUR_PATH, "library/raw").freeze
|
11
|
+
|
12
|
+
RAW_CRAWLERS_PATH = File.join(RAW_PATH, "Crawlers.json").freeze
|
13
|
+
RAW_EXCLUSIONS_PATH = File.join(RAW_PATH, "Exclusions.json").freeze
|
14
|
+
RAW_HEADERS_PATH = File.join(RAW_PATH, "Headers.json").freeze
|
15
|
+
|
16
|
+
# @return [String] path to crawlers raw JSON file
|
17
|
+
setting :raw_crawlers_path, RAW_CRAWLERS_PATH
|
18
|
+
|
19
|
+
# @return [String] path to exclusions raw JSON file
|
20
|
+
setting :raw_exclusions_path, RAW_EXCLUSIONS_PATH
|
21
|
+
|
22
|
+
# @return [String] path to headers raw JSON file
|
23
|
+
setting :raw_headers_path, RAW_HEADERS_PATH
|
24
|
+
|
25
|
+
validate :raw_crawlers_path, :string, strict: true
|
26
|
+
validate :raw_exclusions_path, :string, strict: true
|
27
|
+
validate :raw_headers_path, :string, strict: true
|
28
|
+
end
|
29
|
+
end
|
@@ -1,17 +1,22 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module CrawlerDetect
|
4
|
+
# since 0.1.0
|
4
5
|
class Detector
|
6
|
+
# @param user_agent [String] User-agent string to detect
|
7
|
+
# @return [CrawlerDetect::Detector] instance of detector class
|
5
8
|
def initialize(user_agent)
|
6
9
|
@user_agent = user_agent.to_s.dup
|
7
10
|
end
|
8
11
|
|
12
|
+
# @return [true, false] Is User-agent a crawler?
|
9
13
|
def is_crawler?
|
10
14
|
@is_crawler ||= begin
|
11
15
|
!completely_exclusion? && matches_crawler_list?
|
12
16
|
end
|
13
17
|
end
|
14
18
|
|
19
|
+
# @return [String] The detected crawler name from RAW data
|
15
20
|
def crawler_name
|
16
21
|
return unless is_crawler?
|
17
22
|
@crawler_name
|
@@ -19,22 +24,30 @@ module CrawlerDetect
|
|
19
24
|
|
20
25
|
private
|
21
26
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
27
|
+
# @private
|
28
|
+
# @return [true, false] Is User-agent in white-list?
|
29
|
+
def completely_exclusion?
|
30
|
+
@user_agent.gsub!(exclusions_matcher, "")
|
31
|
+
@user_agent.strip.length.zero?
|
32
|
+
end
|
26
33
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
34
|
+
# @private
|
35
|
+
# @return [true, false] Is User-agent in black-list?
|
36
|
+
def matches_crawler_list?
|
37
|
+
@crawler_name = crawlers_matcher.match(@user_agent).to_s.strip
|
38
|
+
!@crawler_name.empty?
|
39
|
+
end
|
31
40
|
|
32
|
-
|
33
|
-
|
34
|
-
|
41
|
+
# @private
|
42
|
+
# @return [Regexp] White-list of User-agents
|
43
|
+
def exclusions_matcher
|
44
|
+
CrawlerDetect::Library.get_regexp("exclusions")
|
45
|
+
end
|
35
46
|
|
36
|
-
|
37
|
-
|
38
|
-
|
47
|
+
# @private
|
48
|
+
# @return [Regexp] Black-list of User-agents
|
49
|
+
def crawlers_matcher
|
50
|
+
CrawlerDetect::Library.get_regexp("crawlers")
|
51
|
+
end
|
39
52
|
end
|
40
53
|
end
|
@@ -1,16 +1,22 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module CrawlerDetect
|
4
|
+
# @since 0.1.0
|
4
5
|
module Library
|
6
|
+
DATA_CLASSES = [Library::Headers, Library::Exclusions, Library::Crawlers].freeze
|
7
|
+
|
5
8
|
class << self
|
9
|
+
# @param param [String] Name of raw data
|
10
|
+
# @return [Regexp]
|
6
11
|
def get_regexp(param)
|
7
12
|
data = get_array(param)
|
8
|
-
%r
|
13
|
+
%r{#{data.join('|')}}i
|
9
14
|
end
|
10
15
|
|
16
|
+
# @param param [String] Name of raw data
|
17
|
+
# @return [Array]
|
11
18
|
def get_array(param)
|
12
|
-
|
13
|
-
const_get(const_name)
|
19
|
+
const_get("CrawlerDetect::Library::#{param.capitalize}").send(:data)
|
14
20
|
end
|
15
21
|
end
|
16
22
|
end
|
@@ -1,1257 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# rubocop:disable Layout/TrailingWhitespace
|
4
3
|
module CrawlerDetect
|
5
4
|
module Library
|
5
|
+
# @since 0.1.0
|
6
6
|
module Crawlers
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
^Calypso v\/
|
13
|
-
^COMODO DCV
|
14
|
-
^DangDang
|
15
|
-
^DavClnt
|
16
|
-
^FDM
|
17
|
-
^git\/
|
18
|
-
^Goose\/
|
19
|
-
^Grabber
|
20
|
-
^HTTPClient\/
|
21
|
-
^Java\/
|
22
|
-
^Jeode\/
|
23
|
-
^Jetty\/
|
24
|
-
^Mail\/
|
25
|
-
^Mget
|
26
|
-
^Microsoft URL Control
|
27
|
-
^NG\/[0-9\.]
|
28
|
-
^NING\/
|
29
|
-
^PHP\/[0-9]
|
30
|
-
^RMA\/
|
31
|
-
^Ruby|Ruby\/[0-9]
|
32
|
-
^VSE\/[0-9]
|
33
|
-
^WordPress\.com
|
34
|
-
^XRL\/[0-9]
|
35
|
-
^ZmEu
|
36
|
-
008\/
|
37
|
-
13TABS
|
38
|
-
192\.comAgent
|
39
|
-
2ip\.ru
|
40
|
-
404enemy
|
41
|
-
7Siters
|
42
|
-
80legs
|
43
|
-
a\.pr-cy\.ru
|
44
|
-
a3logics\.in
|
45
|
-
A6-Indexer
|
46
|
-
Abonti
|
47
|
-
Aboundex
|
48
|
-
aboutthedomain
|
49
|
-
Accoona-AI-Agent
|
50
|
-
acoon
|
51
|
-
acrylicapps\.com\/pulp
|
52
|
-
Acunetix
|
53
|
-
AdAuth\/
|
54
|
-
adbeat
|
55
|
-
AddThis
|
56
|
-
ADmantX
|
57
|
-
AdminLabs
|
58
|
-
adressendeutschland
|
59
|
-
adscanner
|
60
|
-
Adstxtaggregator
|
61
|
-
adstxt-worker
|
62
|
-
adstxt\.com
|
63
|
-
agentslug
|
64
|
-
AHC
|
65
|
-
aihit
|
66
|
-
aiohttp\/
|
67
|
-
Airmail
|
68
|
-
akka-http\/
|
69
|
-
akula\/
|
70
|
-
alertra
|
71
|
-
alexa site audit
|
72
|
-
Alibaba\.Security\.Heimdall
|
73
|
-
Alligator
|
74
|
-
allloadin
|
75
|
-
AllSubmitter
|
76
|
-
alyze\.info
|
77
|
-
amagit
|
78
|
-
^Amazon Simple Notification Service Agent$
|
79
|
-
Anarchie
|
80
|
-
AndroidDownloadManager
|
81
|
-
Anemone
|
82
|
-
AngleSharp
|
83
|
-
annotate_google
|
84
|
-
Ant\.com
|
85
|
-
Anturis Agent
|
86
|
-
AnyEvent-HTTP\/
|
87
|
-
Apache Droid
|
88
|
-
Apache OpenOffice
|
89
|
-
Apache-HttpAsyncClient
|
90
|
-
Apache-HttpClient
|
91
|
-
ApacheBench
|
92
|
-
Apexoo
|
93
|
-
APIs-Google
|
94
|
-
AportWorm\/
|
95
|
-
AppBeat\/
|
96
|
-
AppEngine-Google
|
97
|
-
AppleSyndication
|
98
|
-
AppStoreScraperZ
|
99
|
-
Aprc\/[0-9]
|
100
|
-
Arachmo
|
101
|
-
arachnode
|
102
|
-
Arachnophilia
|
103
|
-
aria2
|
104
|
-
Arukereso
|
105
|
-
asafaweb
|
106
|
-
AskQuickly
|
107
|
-
Ask Jeeves
|
108
|
-
ASPSeek
|
109
|
-
Asterias
|
110
|
-
Astute
|
111
|
-
asynchttp
|
112
|
-
Attach
|
113
|
-
autocite
|
114
|
-
AutomaticWPTester
|
115
|
-
Autonomy
|
116
|
-
axios\/
|
117
|
-
B-l-i-t-z-B-O-T
|
118
|
-
Backlink-Ceck
|
119
|
-
backlink-check
|
120
|
-
BacklinkHttpStatus
|
121
|
-
BackStreet
|
122
|
-
BackWeb
|
123
|
-
Bad-Neighborhood
|
124
|
-
Badass
|
125
|
-
baidu\.com
|
126
|
-
Bandit
|
127
|
-
basicstate
|
128
|
-
BatchFTP
|
129
|
-
Battlezta Bazinga
|
130
|
-
baypup\/
|
131
|
-
BazQux
|
132
|
-
BBBike
|
133
|
-
BCKLINKS
|
134
|
-
BDFetch
|
135
|
-
BegunAdvertising
|
136
|
-
Bidtellect
|
137
|
-
BigBozz
|
138
|
-
Bigfoot
|
139
|
-
biglotron
|
140
|
-
BingLocalSearch
|
141
|
-
BingPreview
|
142
|
-
binlar
|
143
|
-
biNu image cacher
|
144
|
-
Bitacle
|
145
|
-
biz_Directory
|
146
|
-
Black Hole
|
147
|
-
Blackboard Safeassign
|
148
|
-
BlackWidow
|
149
|
-
BlockNote\.Net
|
150
|
-
Bloglines
|
151
|
-
Bloglovin
|
152
|
-
BlogPulseLive
|
153
|
-
BlogSearch
|
154
|
-
Blogtrottr
|
155
|
-
BlowFish
|
156
|
-
boitho\.com-dc
|
157
|
-
BPImageWalker
|
158
|
-
Braintree-Webhooks
|
159
|
-
Branch Metrics API
|
160
|
-
Branch-Passthrough
|
161
|
-
Brandprotect
|
162
|
-
BrandVerity
|
163
|
-
Brandwatch
|
164
|
-
Brodie\/
|
165
|
-
Browsershots
|
166
|
-
BUbiNG
|
167
|
-
Buck\/
|
168
|
-
Buddy
|
169
|
-
BuiltWith
|
170
|
-
Bullseye
|
171
|
-
BunnySlippers
|
172
|
-
Burf Search
|
173
|
-
Butterfly\/
|
174
|
-
BuzzSumo
|
175
|
-
CAAM\/[0-9]
|
176
|
-
CakePHP
|
177
|
-
Calculon
|
178
|
-
Canary%20Mail
|
179
|
-
CaretNail
|
180
|
-
catexplorador
|
181
|
-
CC Metadata Scaper
|
182
|
-
Cegbfeieh
|
183
|
-
censys
|
184
|
-
Cerberian Drtrs
|
185
|
-
CERT\.at-Statistics-Survey
|
186
|
-
cg-eye
|
187
|
-
changedetection
|
188
|
-
ChangesMeter
|
189
|
-
Charlotte
|
190
|
-
CheckHost
|
191
|
-
checkprivacy
|
192
|
-
CherryPicker
|
193
|
-
ChinaClaw
|
194
|
-
Chirp\/
|
195
|
-
chkme\.com
|
196
|
-
Chlooe
|
197
|
-
Chromaxa
|
198
|
-
CirrusExplorer
|
199
|
-
CISPA Vulnerability Notification
|
200
|
-
Citoid
|
201
|
-
CJNetworkQuality
|
202
|
-
Clarsentia
|
203
|
-
clips\.ua\.ac\.be
|
204
|
-
Cloud mapping
|
205
|
-
CloudEndure
|
206
|
-
CloudFlare-AlwaysOnline
|
207
|
-
Cloudinary
|
208
|
-
cmcm\.com
|
209
|
-
coccoc
|
210
|
-
cognitiveseo
|
211
|
-
colly -
|
212
|
-
CommaFeed
|
213
|
-
Commons-HttpClient
|
214
|
-
commonscan
|
215
|
-
contactbigdatafr
|
216
|
-
contentkingapp
|
217
|
-
convera
|
218
|
-
CookieReports
|
219
|
-
copyright sheriff
|
220
|
-
CopyRightCheck
|
221
|
-
Copyscape
|
222
|
-
cortex\/
|
223
|
-
Cosmos4j\.feedback
|
224
|
-
Covario-IDS
|
225
|
-
Craw\/
|
226
|
-
Crescent
|
227
|
-
Crowsnest
|
228
|
-
Criteo
|
229
|
-
CSHttp
|
230
|
-
CSSCheck
|
231
|
-
curb
|
232
|
-
Curious George
|
233
|
-
curl
|
234
|
-
cuwhois\/
|
235
|
-
cybo\.com
|
236
|
-
DAP\/NetHTTP
|
237
|
-
DareBoost
|
238
|
-
DatabaseDriverMysqli
|
239
|
-
DataCha0s
|
240
|
-
Datafeedwatch
|
241
|
-
Datanyze
|
242
|
-
DataparkSearch
|
243
|
-
dataprovider
|
244
|
-
DataXu
|
245
|
-
Daum(oa)?[ \/][0-9]
|
246
|
-
dBpoweramp
|
247
|
-
ddline
|
248
|
-
deeris
|
249
|
-
Demon
|
250
|
-
DeuSu
|
251
|
-
developers\.google\.com\/\+\/web\/snippet\/
|
252
|
-
Devil
|
253
|
-
Digg
|
254
|
-
Digincore
|
255
|
-
DigitalPebble
|
256
|
-
Dirbuster
|
257
|
-
Discourse Forum Onebox
|
258
|
-
Disqus\/
|
259
|
-
Dispatch\/
|
260
|
-
DittoSpyder
|
261
|
-
dlvr
|
262
|
-
DMBrowser
|
263
|
-
DNSPod-reporting
|
264
|
-
docoloc
|
265
|
-
Dolphin http client
|
266
|
-
DomainAppender
|
267
|
-
Donuts Content Explorer
|
268
|
-
dotMailer content retrieval
|
269
|
-
dotSemantic
|
270
|
-
downforeveryoneorjustme
|
271
|
-
Download Wonder
|
272
|
-
downnotifier
|
273
|
-
DowntimeDetector
|
274
|
-
Drip
|
275
|
-
drupact
|
276
|
-
Drupal \(\+http:\/\/drupal\.org\/\)
|
277
|
-
DTS Agent
|
278
|
-
dubaiindex
|
279
|
-
DuplexWeb-Google
|
280
|
-
EARTHCOM
|
281
|
-
Easy-Thumb
|
282
|
-
EasyDL
|
283
|
-
Ebingbong
|
284
|
-
ec2linkfinder
|
285
|
-
eCairn-Grabber
|
286
|
-
eCatch
|
287
|
-
ECCP
|
288
|
-
eContext\/
|
289
|
-
Ecxi
|
290
|
-
EirGrabber
|
291
|
-
ElectricMonk
|
292
|
-
elefent
|
293
|
-
EMail Exractor
|
294
|
-
EMail Wolf
|
295
|
-
EmailWolf
|
296
|
-
Embarcadero
|
297
|
-
Embed PHP Library
|
298
|
-
Embedly
|
299
|
-
endo\/
|
300
|
-
europarchive\.org
|
301
|
-
evc-batch
|
302
|
-
EventMachine HttpClient
|
303
|
-
Everwall Link Expander
|
304
|
-
Evidon
|
305
|
-
Evrinid
|
306
|
-
ExactSearch
|
307
|
-
ExaleadCloudview
|
308
|
-
Excel\/
|
309
|
-
exif
|
310
|
-
Exploratodo
|
311
|
-
Express WebPictures
|
312
|
-
Extreme Picture Finder
|
313
|
-
EyeNetIE
|
314
|
-
ezooms
|
315
|
-
facebookexternalhit
|
316
|
-
facebookexternalua
|
317
|
-
facebookplatform
|
318
|
-
fairshare
|
319
|
-
Faraday v
|
320
|
-
fasthttp
|
321
|
-
Faveeo
|
322
|
-
Favicon downloader
|
323
|
-
faviconkit
|
324
|
-
faviconarchive
|
325
|
-
FavOrg
|
326
|
-
Feed Wrangler
|
327
|
-
Feedable\/
|
328
|
-
Feedbin
|
329
|
-
FeedBooster
|
330
|
-
FeedBucket
|
331
|
-
FeedBunch\/
|
332
|
-
FeedBurner
|
333
|
-
feeder
|
334
|
-
Feedly
|
335
|
-
FeedshowOnline
|
336
|
-
Feedspot
|
337
|
-
Feedwind\/
|
338
|
-
FeedZcollector
|
339
|
-
feeltiptop
|
340
|
-
Fetch API
|
341
|
-
Fetch\/[0-9]
|
342
|
-
Fever\/[0-9]
|
343
|
-
FHscan
|
344
|
-
Fimap
|
345
|
-
findlink
|
346
|
-
findthatfile
|
347
|
-
FlashGet
|
348
|
-
FlipboardBrowserProxy
|
349
|
-
FlipboardProxy
|
350
|
-
FlipboardRSS
|
351
|
-
Flock\/
|
352
|
-
fluffy
|
353
|
-
Flunky
|
354
|
-
flynxapp
|
355
|
-
forensiq
|
356
|
-
FoundSeoTool
|
357
|
-
http:\/\/www.neomo.de\/
|
358
|
-
free thumbnails
|
359
|
-
Freeuploader
|
360
|
-
Funnelback
|
361
|
-
G-i-g-a-b-o-t
|
362
|
-
g00g1e\.net
|
363
|
-
ganarvisitas
|
364
|
-
geek-tools
|
365
|
-
Genieo
|
366
|
-
GentleSource
|
367
|
-
GetCode
|
368
|
-
Getintent
|
369
|
-
GetLinkInfo
|
370
|
-
getprismatic
|
371
|
-
GetRight
|
372
|
-
getroot
|
373
|
-
GetURLInfo\/
|
374
|
-
GetWeb
|
375
|
-
Geziyor
|
376
|
-
Ghost Inspector
|
377
|
-
GigablastOpenSource
|
378
|
-
GIS-LABS
|
379
|
-
github-camo
|
380
|
-
github\.com
|
381
|
-
Go [\d\.]* package http
|
382
|
-
Go http package
|
383
|
-
Go-Ahead-Got-It
|
384
|
-
Go-http-client
|
385
|
-
Go!Zilla
|
386
|
-
gobyus
|
387
|
-
gofetch
|
388
|
-
GomezAgent
|
389
|
-
gooblog
|
390
|
-
Goodzer\/
|
391
|
-
Google AppsViewer
|
392
|
-
Google Desktop
|
393
|
-
Google favicon
|
394
|
-
Google Keyword Suggestion
|
395
|
-
Google Keyword Tool
|
396
|
-
Google Page Speed Insights
|
397
|
-
Google PP Default
|
398
|
-
Google Search Console
|
399
|
-
Google Web Preview
|
400
|
-
Google-Adwords
|
401
|
-
Google-Apps-Script
|
402
|
-
Google-Calendar-Importer
|
403
|
-
Google-HotelAdsVerifier
|
404
|
-
Google-HTTP-Java-Client
|
405
|
-
Google-Publisher-Plugin
|
406
|
-
Google-Read-Aloud
|
407
|
-
Google-SearchByImage
|
408
|
-
Google-Site-Verification
|
409
|
-
Google-Structured-Data-Testing-Tool
|
410
|
-
Google-Youtube-Links
|
411
|
-
google-xrawler
|
412
|
-
GoogleDocs
|
413
|
-
GoogleHC\/
|
414
|
-
GoogleProducer
|
415
|
-
GoogleSites
|
416
|
-
Google-Transparency-Report
|
417
|
-
Gookey
|
418
|
-
GoScraper
|
419
|
-
GoSpotCheck
|
420
|
-
gosquared-thumbnailer
|
421
|
-
Gotit
|
422
|
-
GoZilla
|
423
|
-
grabify
|
424
|
-
GrabNet
|
425
|
-
Grafula
|
426
|
-
Grammarly
|
427
|
-
GrapeFX
|
428
|
-
GreatNews
|
429
|
-
Gregarius
|
430
|
-
GRequests
|
431
|
-
grokkit
|
432
|
-
grouphigh
|
433
|
-
grub-client
|
434
|
-
gSOAP\/
|
435
|
-
GT::WWW
|
436
|
-
GTmetrix
|
437
|
-
GuzzleHttp
|
438
|
-
gvfs\/
|
439
|
-
HAA(A)?RTLAND http client
|
440
|
-
Haansoft
|
441
|
-
hackney\/
|
442
|
-
Hadi Agent
|
443
|
-
HappyApps-WebCheck
|
444
|
-
Hatena
|
445
|
-
Havij
|
446
|
-
HeadlessChrome
|
447
|
-
HEADMasterSEO
|
448
|
-
HeartRails_Capture
|
449
|
-
help@dataminr\.com
|
450
|
-
heritrix
|
451
|
-
historious
|
452
|
-
hkedcity
|
453
|
-
hledejLevne\.cz
|
454
|
-
Hloader
|
455
|
-
HMView
|
456
|
-
Holmes
|
457
|
-
HonesoSearchEngine
|
458
|
-
HootSuite Image proxy
|
459
|
-
Hootsuite-WebFeed
|
460
|
-
hosterstats
|
461
|
-
HostTracker
|
462
|
-
ht:\/\/check
|
463
|
-
htdig
|
464
|
-
HTMLparser
|
465
|
-
htmlyse
|
466
|
-
HTTP Banner Detection
|
467
|
-
HTTP_Compression_Test
|
468
|
-
http_request2
|
469
|
-
http_requester
|
470
|
-
http-get
|
471
|
-
HTTP-Header-Abfrage
|
472
|
-
http-kit
|
473
|
-
http-request\/
|
474
|
-
HTTP-Tiny
|
475
|
-
HTTP::Lite
|
476
|
-
http\.rb\/
|
477
|
-
http_get
|
478
|
-
HttpComponents
|
479
|
-
httphr
|
480
|
-
HTTPMon
|
481
|
-
HTTPie
|
482
|
-
httpRequest
|
483
|
-
httpscheck
|
484
|
-
httpssites_power
|
485
|
-
httpunit
|
486
|
-
HttpUrlConnection
|
487
|
-
httrack
|
488
|
-
huaweisymantec
|
489
|
-
HubSpot
|
490
|
-
Humanlinks
|
491
|
-
i2kconnect\/
|
492
|
-
Iblog
|
493
|
-
ichiro
|
494
|
-
Id-search
|
495
|
-
IdeelaborPlagiaat
|
496
|
-
IDG Twitter Links Resolver
|
497
|
-
IDwhois\/
|
498
|
-
Iframely
|
499
|
-
igdeSpyder
|
500
|
-
IlTrovatore
|
501
|
-
Image Fetch
|
502
|
-
Image Sucker
|
503
|
-
ImageEngine\/
|
504
|
-
ImageVisu\/
|
505
|
-
Imagga
|
506
|
-
imagineeasy
|
507
|
-
imgsizer
|
508
|
-
InAGist
|
509
|
-
inbound\.li parser
|
510
|
-
InDesign%20CC
|
511
|
-
Indy Library
|
512
|
-
InetURL
|
513
|
-
infegy
|
514
|
-
infohelfer
|
515
|
-
InfoTekies
|
516
|
-
InfoWizards Reciprocal Link
|
517
|
-
inpwrd\.com
|
518
|
-
instabid
|
519
|
-
Instapaper
|
520
|
-
Integrity
|
521
|
-
integromedb
|
522
|
-
Intelliseek
|
523
|
-
InterGET
|
524
|
-
internet_archive
|
525
|
-
Internet Ninja
|
526
|
-
InternetSeer
|
527
|
-
internetVista monitor
|
528
|
-
internetwache
|
529
|
-
intraVnews
|
530
|
-
IODC
|
531
|
-
IOI
|
532
|
-
iplabel
|
533
|
-
ips-agent
|
534
|
-
IPS\/[0-9]
|
535
|
-
IPWorks HTTP\/S Component
|
536
|
-
iqdb\/
|
537
|
-
Iria
|
538
|
-
Irokez
|
539
|
-
isitup\.org
|
540
|
-
iskanie
|
541
|
-
isUp\.li
|
542
|
-
iThemes Sync\/
|
543
|
-
IZaBEE
|
544
|
-
iZSearch
|
545
|
-
JAHHO
|
546
|
-
janforman
|
547
|
-
Jaunt\/
|
548
|
-
Jbrofuzz
|
549
|
-
Jersey\/
|
550
|
-
JetCar
|
551
|
-
Jigsaw
|
552
|
-
Jobboerse
|
553
|
-
JobFeed discovery
|
554
|
-
Jobg8 URL Monitor
|
555
|
-
jobo
|
556
|
-
Jobrapido
|
557
|
-
Jobsearch1\.5
|
558
|
-
JoinVision Generic
|
559
|
-
JolokiaPwn
|
560
|
-
Joomla
|
561
|
-
Jorgee
|
562
|
-
JS-Kit
|
563
|
-
JustView
|
564
|
-
Kaspersky Lab CFR link resolver
|
565
|
-
Kelny\/
|
566
|
-
Kerrigan\/
|
567
|
-
KeyCDN
|
568
|
-
Keyword Density
|
569
|
-
Keywords Research
|
570
|
-
khttp\/
|
571
|
-
KickFire
|
572
|
-
KimonoLabs\/
|
573
|
-
Kml-Google
|
574
|
-
knows\.is
|
575
|
-
KOCMOHABT
|
576
|
-
kouio
|
577
|
-
kube-probe
|
578
|
-
kulturarw3
|
579
|
-
KumKie
|
580
|
-
L\.webis
|
581
|
-
Larbin
|
582
|
-
Lavf\/
|
583
|
-
LeechFTP
|
584
|
-
LeechGet
|
585
|
-
letsencrypt
|
586
|
-
Lftp
|
587
|
-
LibVLC
|
588
|
-
LibWeb
|
589
|
-
Libwhisker
|
590
|
-
libwww
|
591
|
-
Licorne
|
592
|
-
Liferea\/
|
593
|
-
Lightspeedsystems
|
594
|
-
Lighthouse
|
595
|
-
Likse
|
596
|
-
Link Valet
|
597
|
-
link_thumbnailer
|
598
|
-
LinkAlarm\/
|
599
|
-
linkCheck
|
600
|
-
linkdex
|
601
|
-
LinkExaminer
|
602
|
-
linkfluence
|
603
|
-
linkpeek
|
604
|
-
LinkPreviewGenerator
|
605
|
-
LinkScan
|
606
|
-
LinksManager
|
607
|
-
LinkTiger
|
608
|
-
LinkWalker
|
609
|
-
Lipperhey
|
610
|
-
Litemage_walker
|
611
|
-
livedoor ScreenShot
|
612
|
-
LoadImpactRload
|
613
|
-
localsearch-web
|
614
|
-
LongURL API
|
615
|
-
looid\.com
|
616
|
-
looksystems\.net
|
617
|
-
ltx71
|
618
|
-
lua-resty-http
|
619
|
-
lwp-request
|
620
|
-
lwp-trivial
|
621
|
-
LWP::Simple
|
622
|
-
lycos
|
623
|
-
LYT\.SR
|
624
|
-
mabontland
|
625
|
-
Mag-Net
|
626
|
-
MagpieRSS
|
627
|
-
Mail\.Ru
|
628
|
-
MailChimp
|
629
|
-
Majestic12
|
630
|
-
makecontact\/
|
631
|
-
Mandrill
|
632
|
-
MapperCmd
|
633
|
-
marketinggrader
|
634
|
-
MarkMonitor
|
635
|
-
MarkWatch
|
636
|
-
Mass Downloader
|
637
|
-
masscan\/
|
638
|
-
Mata Hari
|
639
|
-
Mediametric
|
640
|
-
Mediapartners-Google
|
641
|
-
mediawords
|
642
|
-
MegaIndex\.ru
|
643
|
-
MeltwaterNews
|
644
|
-
Melvil Rawi
|
645
|
-
MemGator
|
646
|
-
Metaspinner
|
647
|
-
MetaURI
|
648
|
-
MFC_Tear_Sample
|
649
|
-
Microsearch
|
650
|
-
Microsoft Office
|
651
|
-
Microsoft Outlook
|
652
|
-
Microsoft Windows Network Diagnostics
|
653
|
-
Microsoft-WebDAV-MiniRedir
|
654
|
-
Microsoft Data Access
|
655
|
-
MIDown tool
|
656
|
-
MIIxpc
|
657
|
-
Mindjet
|
658
|
-
Miniature\.io
|
659
|
-
Miniflux
|
660
|
-
Mister PiX
|
661
|
-
mixdata dot com
|
662
|
-
mixed-content-scan
|
663
|
-
Mixmax-LinkPreview
|
664
|
-
mixnode
|
665
|
-
Mnogosearch
|
666
|
-
mogimogi
|
667
|
-
Mojeek
|
668
|
-
Mojolicious \(Perl\)
|
669
|
-
Monit\/
|
670
|
-
monitis
|
671
|
-
Monitority\/
|
672
|
-
montastic
|
673
|
-
MonTools
|
674
|
-
Moreover
|
675
|
-
Morfeus Fucking Scanner
|
676
|
-
Morning Paper
|
677
|
-
MovableType
|
678
|
-
mowser
|
679
|
-
Mrcgiguy
|
680
|
-
MS Web Services Client Protocol
|
681
|
-
MSFrontPage
|
682
|
-
mShots
|
683
|
-
MuckRack\/
|
684
|
-
muhstik-scan
|
685
|
-
MVAClient
|
686
|
-
MxToolbox\/
|
687
|
-
nagios
|
688
|
-
Najdi\.si
|
689
|
-
Name Intelligence
|
690
|
-
Nameprotect
|
691
|
-
Navroad
|
692
|
-
NearSite
|
693
|
-
Needle
|
694
|
-
Nessus
|
695
|
-
Net Vampire
|
696
|
-
NetAnts
|
697
|
-
NETCRAFT
|
698
|
-
NetLyzer
|
699
|
-
NetMechanic
|
700
|
-
NetNewsWire
|
701
|
-
Netpursual
|
702
|
-
netresearch
|
703
|
-
NetShelter ContentScan
|
704
|
-
Netsparker
|
705
|
-
NetTrack
|
706
|
-
Netvibes
|
707
|
-
NetZIP
|
708
|
-
Neustar WPM
|
709
|
-
NeutrinoAPI
|
710
|
-
NewRelicPinger
|
711
|
-
NewsBlur .*Finder
|
712
|
-
NewsGator
|
713
|
-
newsme
|
714
|
-
newspaper\/
|
715
|
-
Nexgate Ruby Client
|
716
|
-
NG-Search
|
717
|
-
Nibbler
|
718
|
-
NICErsPRO
|
719
|
-
Nikto
|
720
|
-
nineconnections
|
721
|
-
NLNZ_IAHarvester
|
722
|
-
Nmap Scripting Engine
|
723
|
-
node-superagent
|
724
|
-
node-urllib
|
725
|
-
node\.io
|
726
|
-
Nodemeter
|
727
|
-
NodePing
|
728
|
-
nominet\.org\.uk
|
729
|
-
nominet\.uk
|
730
|
-
Norton-Safeweb
|
731
|
-
Notifixious
|
732
|
-
notifyninja
|
733
|
-
NotionEmbedder
|
734
|
-
nuhk
|
735
|
-
nutch
|
736
|
-
Nuzzel
|
737
|
-
nWormFeedFinder
|
738
|
-
nyawc\/
|
739
|
-
Nymesis
|
740
|
-
NYU
|
741
|
-
Ocelli\/
|
742
|
-
Octopus
|
743
|
-
oegp
|
744
|
-
Offline Explorer
|
745
|
-
Offline Navigator
|
746
|
-
OgScrper
|
747
|
-
og-scraper
|
748
|
-
okhttp
|
749
|
-
omgili
|
750
|
-
OMSC
|
751
|
-
Online Domain Tools
|
752
|
-
OpenCalaisSemanticProxy
|
753
|
-
Openfind
|
754
|
-
OpenLinkProfiler
|
755
|
-
Openstat\/
|
756
|
-
OpenVAS
|
757
|
-
Optimizer
|
758
|
-
Orbiter
|
759
|
-
OrgProbe\/
|
760
|
-
orion-semantics
|
761
|
-
Outlook-Express
|
762
|
-
Outlook-iOS
|
763
|
-
ow\.ly
|
764
|
-
Owler
|
765
|
-
ownCloud News
|
766
|
-
OxfordCloudService
|
767
|
-
Page Valet
|
768
|
-
page_verifier
|
769
|
-
page scorer
|
770
|
-
page2rss
|
771
|
-
PageGrabber
|
772
|
-
PagePeeker
|
773
|
-
PageScorer
|
774
|
-
Pagespeed\/
|
775
|
-
Panopta
|
776
|
-
panscient
|
777
|
-
Papa Foto
|
778
|
-
parsijoo
|
779
|
-
Pavuk
|
780
|
-
PayPal IPN
|
781
|
-
pcBrowser
|
782
|
-
Pcore-HTTP
|
783
|
-
Pearltrees
|
784
|
-
PECL::HTTP
|
785
|
-
peerindex
|
786
|
-
Peew
|
787
|
-
PeoplePal
|
788
|
-
Perlu -
|
789
|
-
PhantomJS Screenshoter
|
790
|
-
PhantomJS\/
|
791
|
-
Photon\/
|
792
|
-
phpservermon
|
793
|
-
Pi-Monster
|
794
|
-
Picscout
|
795
|
-
Picsearch
|
796
|
-
PictureFinder
|
797
|
-
Pimonster
|
798
|
-
ping\.blo\.gs
|
799
|
-
Pingability
|
800
|
-
PingAdmin\.Ru
|
801
|
-
Pingdom
|
802
|
-
Pingoscope
|
803
|
-
PingSpot
|
804
|
-
pinterest\.com
|
805
|
-
Pixray
|
806
|
-
Pizilla
|
807
|
-
Plagger\/
|
808
|
-
Ploetz \+ Zeller
|
809
|
-
Plukkie
|
810
|
-
plumanalytics
|
811
|
-
PocketImageCache
|
812
|
-
PocketParser
|
813
|
-
Pockey
|
814
|
-
POE-Component-Client-HTTP
|
815
|
-
Polymail\/
|
816
|
-
Pompos
|
817
|
-
Porkbun
|
818
|
-
Port Monitor
|
819
|
-
postano
|
820
|
-
PostmanRuntime
|
821
|
-
PostPost
|
822
|
-
postrank
|
823
|
-
PowerPoint\/
|
824
|
-
Priceonomics Analysis Engine
|
825
|
-
PrintFriendly
|
826
|
-
PritTorrent
|
827
|
-
Prlog
|
828
|
-
probethenet
|
829
|
-
Project 25499
|
830
|
-
prospectb2b
|
831
|
-
Protopage
|
832
|
-
ProWebWalker
|
833
|
-
proximic
|
834
|
-
PRTG Network Monitor
|
835
|
-
pshtt, https scanning
|
836
|
-
PTST
|
837
|
-
PTST\/[0-9]+
|
838
|
-
Pulsepoint XT3 web scraper
|
839
|
-
Pump
|
840
|
-
Python-httplib2
|
841
|
-
python-requests
|
842
|
-
Python-urllib
|
843
|
-
Qirina Hurdler
|
844
|
-
QQDownload
|
845
|
-
QrafterPro
|
846
|
-
Qseero
|
847
|
-
Qualidator
|
848
|
-
QueryN Metasearch
|
849
|
-
queuedriver
|
850
|
-
Quora Link Preview
|
851
|
-
Qwantify
|
852
|
-
Radian6
|
853
|
-
RankActive
|
854
|
-
RankFlex
|
855
|
-
RankSonicSiteAuditor
|
856
|
-
Re-re Studio
|
857
|
-
ReactorNetty
|
858
|
-
Readability
|
859
|
-
RealDownload
|
860
|
-
RealPlayer%20Downloader
|
861
|
-
RebelMouse
|
862
|
-
Recorder
|
863
|
-
RecurPost\/
|
864
|
-
redback\/
|
865
|
-
ReederForMac
|
866
|
-
Reeder\/
|
867
|
-
ReGet
|
868
|
-
RepoMonkey
|
869
|
-
request\.js
|
870
|
-
reqwest\/
|
871
|
-
ResponseCodeTest
|
872
|
-
RestSharp
|
873
|
-
Riddler
|
874
|
-
Rival IQ
|
875
|
-
Robosourcer
|
876
|
-
Robozilla
|
877
|
-
ROI Hunter
|
878
|
-
RPT-HTTPClient
|
879
|
-
RSSOwl
|
880
|
-
safe-agent-scanner
|
881
|
-
SalesIntelligent
|
882
|
-
Saleslift
|
883
|
-
Sendsay\.Ru
|
884
|
-
SauceNAO
|
885
|
-
SBIder
|
886
|
-
scalaj-http
|
887
|
-
scan\.lol
|
888
|
-
ScanAlert
|
889
|
-
Scoop
|
890
|
-
scooter
|
891
|
-
ScoutJet
|
892
|
-
ScoutURLMonitor
|
893
|
-
ScrapeBox Page Scanner
|
894
|
-
SimpleScraper
|
895
|
-
Scrapy
|
896
|
-
Screaming
|
897
|
-
ScreenShotService
|
898
|
-
Scrubby
|
899
|
-
Scrutiny\/
|
900
|
-
search\.thunderstone
|
901
|
-
Search37
|
902
|
-
searchenginepromotionhelp
|
903
|
-
Searchestate
|
904
|
-
SearchExpress
|
905
|
-
SearchSight
|
906
|
-
Seeker
|
907
|
-
semanticdiscovery
|
908
|
-
semanticjuice
|
909
|
-
Semiocast HTTP client
|
910
|
-
Semrush
|
911
|
-
sentry\/
|
912
|
-
SEO Browser
|
913
|
-
Seo Servis
|
914
|
-
seo-nastroj\.cz
|
915
|
-
seo4ajax
|
916
|
-
Seobility
|
917
|
-
SEOCentro
|
918
|
-
SeoCheck
|
919
|
-
SEOkicks
|
920
|
-
Seomoz
|
921
|
-
SEOprofiler
|
922
|
-
SEOsearch
|
923
|
-
seoscanners
|
924
|
-
seositecheckup
|
925
|
-
SEOstats
|
926
|
-
servernfo
|
927
|
-
sexsearcher
|
928
|
-
Seznam
|
929
|
-
Shelob
|
930
|
-
Shodan
|
931
|
-
Shoppimon
|
932
|
-
ShopWiki
|
933
|
-
ShortLinkTranslate
|
934
|
-
shrinktheweb
|
935
|
-
Sideqik
|
936
|
-
SimplePie
|
937
|
-
SimplyFast
|
938
|
-
Siphon
|
939
|
-
SISTRIX
|
940
|
-
Site-Shot\/
|
941
|
-
Site Sucker
|
942
|
-
Site24x7
|
943
|
-
SiteBar
|
944
|
-
Sitebeam
|
945
|
-
Sitebulb\/
|
946
|
-
SiteCondor
|
947
|
-
SiteExplorer
|
948
|
-
SiteGuardian
|
949
|
-
Siteimprove
|
950
|
-
SiteIndexed
|
951
|
-
Sitemap(s)? Generator
|
952
|
-
SitemapGenerator
|
953
|
-
SiteMonitor
|
954
|
-
Siteshooter B0t
|
955
|
-
SiteSnagger
|
956
|
-
SiteSucker
|
957
|
-
SiteTruth
|
958
|
-
Sitevigil
|
959
|
-
sitexy\.com
|
960
|
-
SkypeUriPreview
|
961
|
-
Slack\/
|
962
|
-
slider\.com
|
963
|
-
slurp
|
964
|
-
SlySearch
|
965
|
-
SmartDownload
|
966
|
-
SMRF URL Expander
|
967
|
-
SMUrlExpander
|
968
|
-
Snake
|
969
|
-
Snappy
|
970
|
-
SnapSearch
|
971
|
-
Snarfer\/
|
972
|
-
SniffRSS
|
973
|
-
sniptracker
|
974
|
-
Snoopy
|
975
|
-
SnowHaze Search
|
976
|
-
sogou web
|
977
|
-
SortSite
|
978
|
-
Sottopop
|
979
|
-
sovereign\.ai
|
980
|
-
SpaceBison
|
981
|
-
SpamExperts
|
982
|
-
Spammen
|
983
|
-
Spanner
|
984
|
-
spaziodati
|
985
|
-
SPDYCheck
|
986
|
-
Specificfeeds
|
987
|
-
speedy
|
988
|
-
SPEng
|
989
|
-
Spinn3r
|
990
|
-
spray-can
|
991
|
-
Sprinklr
|
992
|
-
spyonweb
|
993
|
-
sqlmap
|
994
|
-
Sqlworm
|
995
|
-
Sqworm
|
996
|
-
SSL Labs
|
997
|
-
ssl-tools
|
998
|
-
StackRambler
|
999
|
-
Statastico\/
|
1000
|
-
StatusCake
|
1001
|
-
Steeler
|
1002
|
-
Stratagems Kumo
|
1003
|
-
Stroke\.cz
|
1004
|
-
StudioFACA
|
1005
|
-
StumbleUpon
|
1006
|
-
suchen
|
1007
|
-
Sucuri
|
1008
|
-
summify
|
1009
|
-
SuperHTTP
|
1010
|
-
Surphace Scout
|
1011
|
-
Suzuran
|
1012
|
-
SwiteScraper
|
1013
|
-
Symfony BrowserKit
|
1014
|
-
Symfony2 BrowserKit
|
1015
|
-
SynHttpClient-Built
|
1016
|
-
Sysomos
|
1017
|
-
sysscan
|
1018
|
-
Szukacz
|
1019
|
-
T0PHackTeam
|
1020
|
-
tAkeOut
|
1021
|
-
Tarantula\/
|
1022
|
-
Taringa UGC
|
1023
|
-
TarmotGezgin
|
1024
|
-
Teleport
|
1025
|
-
Telesoft
|
1026
|
-
Telesphoreo
|
1027
|
-
Telesphorep
|
1028
|
-
Tenon\.io
|
1029
|
-
teoma
|
1030
|
-
terrainformatica
|
1031
|
-
Test Certificate Info
|
1032
|
-
testuri
|
1033
|
-
Tetrahedron
|
1034
|
-
TextRazor Downloader
|
1035
|
-
The Drop Reaper
|
1036
|
-
The Expert HTML Source Viewer
|
1037
|
-
The Knowledge AI
|
1038
|
-
The Intraformant
|
1039
|
-
theinternetrules
|
1040
|
-
TheNomad
|
1041
|
-
Thinklab
|
1042
|
-
Thumbshots
|
1043
|
-
ThumbSniper
|
1044
|
-
Thumbor
|
1045
|
-
timewe\.net
|
1046
|
-
TinEye
|
1047
|
-
Tiny Tiny RSS
|
1048
|
-
TLSProbe\/
|
1049
|
-
Toata
|
1050
|
-
topster
|
1051
|
-
touche\.com
|
1052
|
-
Traackr\.com
|
1053
|
-
tracemyfile
|
1054
|
-
Trackuity
|
1055
|
-
TrapitAgent
|
1056
|
-
Trendiction
|
1057
|
-
Trendsmap
|
1058
|
-
trendspottr
|
1059
|
-
truwoGPS
|
1060
|
-
TryJsoup
|
1061
|
-
TulipChain
|
1062
|
-
Turingos
|
1063
|
-
Turnitin
|
1064
|
-
tweetedtimes
|
1065
|
-
Tweetminster
|
1066
|
-
Tweezler\/
|
1067
|
-
twibble
|
1068
|
-
Twice
|
1069
|
-
Twikle
|
1070
|
-
Twingly
|
1071
|
-
Twisted PageGetter
|
1072
|
-
Typhoeus
|
1073
|
-
ubermetrics-technologies
|
1074
|
-
uclassify
|
1075
|
-
UdmSearch
|
1076
|
-
unchaos
|
1077
|
-
unirest-java
|
1078
|
-
UniversalFeedParser
|
1079
|
-
Unshorten\.It
|
1080
|
-
Untiny
|
1081
|
-
UnwindFetchor
|
1082
|
-
updated
|
1083
|
-
updown\.io daemon
|
1084
|
-
Upflow
|
1085
|
-
Uptimia
|
1086
|
-
Urlcheckr
|
1087
|
-
URL Verifier
|
1088
|
-
URLitor
|
1089
|
-
urlresolver
|
1090
|
-
Urlstat
|
1091
|
-
URLTester
|
1092
|
-
UrlTrends Ranking Updater
|
1093
|
-
URLy Warning
|
1094
|
-
URLy\.Warning
|
1095
|
-
Vacuum
|
1096
|
-
Vagabondo
|
1097
|
-
VB Project
|
1098
|
-
vBSEO
|
1099
|
-
VCI
|
1100
|
-
via ggpht\.com GoogleImageProxy
|
1101
|
-
VidibleScraper
|
1102
|
-
Virusdie
|
1103
|
-
visionutils
|
1104
|
-
vkShare
|
1105
|
-
VoidEYE
|
1106
|
-
Voil
|
1107
|
-
voltron
|
1108
|
-
voyager\/
|
1109
|
-
VSAgent\/
|
1110
|
-
VSB-TUO\/
|
1111
|
-
Vulnbusters Meter
|
1112
|
-
VYU2
|
1113
|
-
w3af\.org
|
1114
|
-
W3C_Unicorn
|
1115
|
-
W3C-checklink
|
1116
|
-
W3C-mobileOK
|
1117
|
-
WAC-OFU
|
1118
|
-
Wallpapers\/[0-9]+
|
1119
|
-
WallpapersHD
|
1120
|
-
wangling
|
1121
|
-
Wappalyzer
|
1122
|
-
WatchMouse
|
1123
|
-
WbSrch\/
|
1124
|
-
WDT\.io
|
1125
|
-
web-capture\.net
|
1126
|
-
Web-sniffer
|
1127
|
-
Web Auto
|
1128
|
-
Web Collage
|
1129
|
-
Web Enhancer
|
1130
|
-
Web Fetch
|
1131
|
-
Web Fuck
|
1132
|
-
Web Pix
|
1133
|
-
Web Sauger
|
1134
|
-
Web spyder
|
1135
|
-
Web Sucker
|
1136
|
-
Webalta
|
1137
|
-
Webauskunft
|
1138
|
-
WebAuto
|
1139
|
-
WebCapture
|
1140
|
-
WebClient\/
|
1141
|
-
webcollage
|
1142
|
-
WebCookies
|
1143
|
-
WebCopier
|
1144
|
-
WebCorp
|
1145
|
-
WebDataStats
|
1146
|
-
WebDoc
|
1147
|
-
WebEnhancer
|
1148
|
-
WebFetch
|
1149
|
-
WebFuck
|
1150
|
-
WebGazer
|
1151
|
-
WebGo IS
|
1152
|
-
WebImageCollector
|
1153
|
-
WebImages
|
1154
|
-
WebIndex
|
1155
|
-
webkit2png
|
1156
|
-
WebLeacher
|
1157
|
-
webmastercoffee
|
1158
|
-
webmon\s
|
1159
|
-
WebPix
|
1160
|
-
WebReaper
|
1161
|
-
WebSauger
|
1162
|
-
webscreenie
|
1163
|
-
Webshag
|
1164
|
-
Webshot
|
1165
|
-
Website Quester
|
1166
|
-
websitepulse agent
|
1167
|
-
WebsiteQuester
|
1168
|
-
Websnapr
|
1169
|
-
WebSniffer
|
1170
|
-
Webster
|
1171
|
-
WebStripper
|
1172
|
-
WebSucker
|
1173
|
-
Webthumb\/
|
1174
|
-
WebThumbnail
|
1175
|
-
WebWhacker
|
1176
|
-
WebZIP
|
1177
|
-
WeLikeLinks
|
1178
|
-
WEPA
|
1179
|
-
WeSEE
|
1180
|
-
wf84
|
1181
|
-
Wfuzz\/
|
1182
|
-
wget
|
1183
|
-
WhatsApp
|
1184
|
-
WhatsMyIP
|
1185
|
-
WhatWeb
|
1186
|
-
WhereGoes\?
|
1187
|
-
Whibse
|
1188
|
-
WhoRunsCoinHive
|
1189
|
-
Whynder Magnet
|
1190
|
-
Windows-RSS-Platform
|
1191
|
-
WinPodder
|
1192
|
-
wkhtmlto
|
1193
|
-
wmtips
|
1194
|
-
Woko
|
1195
|
-
woorankreview
|
1196
|
-
Word\/
|
1197
|
-
WordPress\/
|
1198
|
-
worldping-api
|
1199
|
-
WordupinfoSearch
|
1200
|
-
wotbox
|
1201
|
-
WP Engine Install Performance API
|
1202
|
-
wpif
|
1203
|
-
wprecon\.com survey
|
1204
|
-
WPScan
|
1205
|
-
wscheck
|
1206
|
-
Wtrace
|
1207
|
-
WWW-Collector-E
|
1208
|
-
WWW-Mechanize
|
1209
|
-
WWW::Document
|
1210
|
-
WWW::Mechanize
|
1211
|
-
www\.monitor\.us
|
1212
|
-
WWWOFFLE
|
1213
|
-
x09Mozilla
|
1214
|
-
x22Mozilla
|
1215
|
-
XaxisSemanticsClassifier
|
1216
|
-
Xenu Link Sleuth
|
1217
|
-
XING-contenttabreceiver
|
1218
|
-
xpymep([0-9]?)\.exe
|
1219
|
-
Y!J-(ASR|BSC)
|
1220
|
-
Y\!J-BRW
|
1221
|
-
Yaanb
|
1222
|
-
yacy
|
1223
|
-
Yahoo Link Preview
|
1224
|
-
YahooCacheSystem
|
1225
|
-
YahooYSMcm
|
1226
|
-
YandeG
|
1227
|
-
Yandex(?!Search)
|
1228
|
-
yanga
|
1229
|
-
yeti
|
1230
|
-
Yo-yo
|
1231
|
-
Yoleo Consumer
|
1232
|
-
yoogliFetchAgent
|
1233
|
-
YottaaMonitor
|
1234
|
-
Your-Website-Sucks
|
1235
|
-
yourls\.org
|
1236
|
-
YoYs\.net
|
1237
|
-
YP\.PL
|
1238
|
-
Zabbix
|
1239
|
-
Zade
|
1240
|
-
Zao
|
1241
|
-
Zauba
|
1242
|
-
Zemanta Aggregator
|
1243
|
-
Zend_Http_Client
|
1244
|
-
Zend\\\Http\\\Client
|
1245
|
-
Zermelo
|
1246
|
-
Zeus
|
1247
|
-
zgrab
|
1248
|
-
ZnajdzFoto
|
1249
|
-
ZnHTTP
|
1250
|
-
Zombie\.js
|
1251
|
-
Zoom\.Mac
|
1252
|
-
ZyBorg
|
1253
|
-
[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer)
|
1254
|
-
].strip.split(/\n+/).freeze
|
7
|
+
extend Loader
|
8
|
+
|
9
|
+
def self.data
|
10
|
+
@data ||= load_raw(CrawlerDetect.config.settings.raw_crawlers_path).freeze
|
11
|
+
end
|
1255
12
|
end
|
1256
13
|
end
|
1257
14
|
end
|