crawler_detect 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 4d6a083297a36f03c1ac9af1e0d7bf644faa228a04d1e0c310be521ca396e430
4
+ data.tar.gz: e089000e6f93f8ac54646855a719a63279cbed7f24dc5fae94d6c9f388cf823d
5
+ SHA512:
6
+ metadata.gz: c8008174d874b18ca0ae75224ede7287e92c5e5e82d81778f131f7eafb030935bb79e47814d12e90d3e72a8e17e6b1b2584621bbf81579e92654ad3558ca8325
7
+ data.tar.gz: 6c15fc963c79b145a32c174275ba12b39ee3ac3f206eb6643cae651c3ea9171ab3fa35f98f77f63d3d1985f2d202c1d8b04ba588fde543f84e68fca94ff8a40a
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format Fuubar
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,174 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.2
3
+ # RuboCop has a bunch of cops enabled by default. This setting tells RuboCop
4
+ # to ignore them, so only the ones explicitly set in this file are enabled.
5
+ DisabledByDefault: true
6
+ Exclude:
7
+ - '**/templates/**/*'
8
+ - '**/vendor/**/*'
9
+ - '**/vendor/**/.*'
10
+ - '**/node_modules/**/*'
11
+ - 'actionpack/lib/action_dispatch/journey/parser.rb'
12
+
13
+ # Prefer assert_not_x over refute_x
14
+ CustomCops/RefuteNot:
15
+ Include:
16
+ - '**/test/**/*'
17
+
18
+ # Prefer &&/|| over and/or.
19
+ Style/AndOr:
20
+ Enabled: true
21
+
22
+ # Do not use braces for hash literals when they are the last argument of a
23
+ # method call.
24
+ Style/BracesAroundHashParameters:
25
+ Enabled: true
26
+ EnforcedStyle: context_dependent
27
+
28
+ # Align `when` with `case`.
29
+ Layout/CaseIndentation:
30
+ Enabled: true
31
+
32
+ # Align comments with method definitions.
33
+ Layout/CommentIndentation:
34
+ Enabled: true
35
+
36
+ Layout/ElseAlignment:
37
+ Enabled: true
38
+
39
+ # Align `end` with the matching keyword or starting expression except for
40
+ # assignments, where it should be aligned with the LHS.
41
+ Layout/EndAlignment:
42
+ Enabled: true
43
+ EnforcedStyleAlignWith: variable
44
+ AutoCorrect: true
45
+
46
+ Layout/EmptyLineAfterMagicComment:
47
+ Enabled: true
48
+
49
+ # In a regular class definition, no empty lines around the body.
50
+ Layout/EmptyLinesAroundClassBody:
51
+ Enabled: true
52
+
53
+ # In a regular method definition, no empty lines around the body.
54
+ Layout/EmptyLinesAroundMethodBody:
55
+ Enabled: true
56
+
57
+ # In a regular module definition, no empty lines around the body.
58
+ Layout/EmptyLinesAroundModuleBody:
59
+ Enabled: true
60
+
61
+ Layout/FirstParameterIndentation:
62
+ Enabled: true
63
+
64
+ # Use Ruby >= 1.9 syntax for hashes. Prefer { a: :b } over { :a => :b }.
65
+ Style/HashSyntax:
66
+ Enabled: true
67
+
68
+ # Method definitions after `private` or `protected` isolated calls need one
69
+ # extra level of indentation.
70
+ Layout/IndentationConsistency:
71
+ Enabled: true
72
+ EnforcedStyle: rails
73
+
74
+ # Two spaces, no tabs (for indentation).
75
+ Layout/IndentationWidth:
76
+ Enabled: true
77
+
78
+ Layout/LeadingCommentSpace:
79
+ Enabled: true
80
+
81
+ Layout/SpaceAfterColon:
82
+ Enabled: true
83
+
84
+ Layout/SpaceAfterComma:
85
+ Enabled: true
86
+
87
+ Layout/SpaceAroundEqualsInParameterDefault:
88
+ Enabled: true
89
+
90
+ Layout/SpaceAroundKeyword:
91
+ Enabled: true
92
+
93
+ Layout/SpaceAroundOperators:
94
+ Enabled: true
95
+
96
+ Layout/SpaceBeforeComma:
97
+ Enabled: true
98
+
99
+ Layout/SpaceBeforeFirstArg:
100
+ Enabled: true
101
+
102
+ Style/DefWithParentheses:
103
+ Enabled: true
104
+
105
+ # Defining a method with parameters needs parentheses.
106
+ Style/MethodDefParentheses:
107
+ Enabled: true
108
+
109
+ Style/FrozenStringLiteralComment:
110
+ Enabled: true
111
+ EnforcedStyle: always
112
+ Exclude:
113
+ - 'actionview/test/**/*.builder'
114
+ - 'actionview/test/**/*.ruby'
115
+ - 'actionpack/test/**/*.builder'
116
+ - 'actionpack/test/**/*.ruby'
117
+ - 'activestorage/db/migrate/**/*.rb'
118
+ - 'db/migrate/**/*.rb'
119
+ - 'db/*.rb'
120
+
121
+ # Use `foo {}` not `foo{}`.
122
+ Layout/SpaceBeforeBlockBraces:
123
+ Enabled: true
124
+
125
+ # Use `foo { bar }` not `foo {bar}`.
126
+ Layout/SpaceInsideBlockBraces:
127
+ Enabled: true
128
+
129
+ # Use `{ a: 1 }` not `{a:1}`.
130
+ Layout/SpaceInsideHashLiteralBraces:
131
+ Enabled: true
132
+
133
+ Layout/SpaceInsideParens:
134
+ Enabled: true
135
+
136
+ # Check quotes usage according to lint rule below.
137
+ Style/StringLiterals:
138
+ Enabled: true
139
+ EnforcedStyle: double_quotes
140
+
141
+ # Detect hard tabs, no hard tabs.
142
+ Layout/Tab:
143
+ Enabled: true
144
+
145
+ # Blank lines should not have any spaces.
146
+ Layout/TrailingBlankLines:
147
+ Enabled: true
148
+
149
+ # No trailing whitespace.
150
+ Layout/TrailingWhitespace:
151
+ Enabled: true
152
+
153
+ # Use quotes for string literals when they are enough.
154
+ Style/UnneededPercentQ:
155
+ Enabled: true
156
+
157
+ # Use my_method(my_arg) not my_method( my_arg ) or my_method my_arg.
158
+ Lint/RequireParentheses:
159
+ Enabled: true
160
+
161
+ Lint/StringConversionInInterpolation:
162
+ Enabled: true
163
+
164
+ Style/RedundantReturn:
165
+ Enabled: true
166
+ AllowMultipleReturnValues: true
167
+
168
+ Style/Semicolon:
169
+ Enabled: true
170
+ AllowAsExpressionSeparator: true
171
+
172
+ # Prefer Foo.method over Foo::method
173
+ Style/ColonMethodCall:
174
+ Enabled: true
data/.travis.yml ADDED
@@ -0,0 +1,12 @@
1
+ ---
2
+ sudo: false
3
+ language: ruby
4
+ cache: bundler
5
+ rvm:
6
+ - 2.2
7
+ - 2.3
8
+ - 2.4
9
+ - 2.5
10
+ before_install: gem install bundler -v 1.16.2
11
+ script:
12
+ - bundle exec parallel_rspec spec/
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
+
7
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Pavel Kozlov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,56 @@
1
+ # CrawlerDetect
2
+
3
+ [![Build Status](https://travis-ci.org/loadkpi/crawler_detect.svg?branch=master)](https://travis-ci.org/loadkpi/crawler_detect)
4
+
5
+ ## About
6
+ **CrawlerDetect** is a Ruby version of PHP class @[CrawlerDetect](https://github.com/JayBizzle/Crawler-Detect).
7
+
8
+ It helps to detect bots/crawlers/spiders via the user agent and other HTTP-headers. Currently able to detect 1,000's of bots/spiders/crawlers.
9
+ ### Why CrawlerDetect?
10
+ Comparing with other popular bot-detection gems:
11
+
12
+ | | CrawlerDetect | Voight-Kampff | Browser |
13
+ |--|--|--|--|
14
+ | Number of bot-patterns | >1000 | ~280 | ~280 |
15
+ | Number of checked HTTP-headers | 10 | 1 | 1 |
16
+ | Number of updates of bot-list *(1st half of 2018)* | 14 | 1 | 7 |
17
+
18
+ ## Installation
19
+ Add this line to your application's Gemfile:
20
+
21
+ `gem 'crawler_detect'`
22
+ ## Basic Usage
23
+ ```
24
+ CrawlerDetect.is_crawler?("Bot user agent")
25
+ => true
26
+ ```
27
+ Or if you need crawler name:
28
+ ```
29
+ detector = CrawlerDetect.new("Googlebot/2.1 (http://www.google.com/bot.html)")
30
+ detector.is_crawler?
31
+ => true
32
+ detector.crawler_name
33
+ => "Googlebot"
34
+ ```
35
+ ## Rack::Request extension
36
+ **Optionally** you can add additional methods for `request`:
37
+ ```
38
+ request.is_crawler?
39
+ => false
40
+ request.crawler_name
41
+ => nil
42
+ ```
43
+ Only one thing you have to do is to configure `Rack::CrawlerDetect` midleware:
44
+ ### Rails
45
+ ```
46
+ class Application < Rails::Application
47
+ ...
48
+ config.middleware.use Rack::CrawlerDetect
49
+ end
50
+ ```
51
+ ### Rake
52
+ ```
53
+ use Rack::CrawlerDetect
54
+ ```
55
+ ## License
56
+ MIT License
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path("../lib", __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require "crawler_detect/version"
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "crawler_detect"
9
+ spec.version = CrawlerDetect::VERSION
10
+ spec.authors = ["Pavel Kozlov"]
11
+ spec.email = ["loadkpi@gmail.com"]
12
+
13
+ spec.summary = "CrawlerDetect: detect bots/crawlers"
14
+ spec.description = "CrawlerDetect is a library to detect bots/crawlers via the user agent"
15
+ spec.homepage = "https://github.com/loadkpi/crawler_detect"
16
+ spec.license = "MIT"
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ end
23
+ spec.bindir = "exe"
24
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
+ spec.require_paths = ["lib"]
26
+
27
+ spec.add_development_dependency "bundler", "~> 1.15"
28
+ spec.add_development_dependency "rake", "~> 10.0"
29
+ spec.add_development_dependency "rspec", "~> 3.0"
30
+
31
+ spec.add_development_dependency "fuubar", "~> 2.0"
32
+ spec.add_development_dependency "parallel_tests", "~> 2.0"
33
+ spec.add_development_dependency "pry-meta", "~> 0.0.10"
34
+ spec.add_development_dependency "rack-test", "~> 1.1"
35
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "crawler_detect/detector"
4
+ require "crawler_detect/library"
5
+ require "crawler_detect/library/crawlers"
6
+ require "crawler_detect/library/exclusions"
7
+ require "crawler_detect/library/headers"
8
+ require "crawler_detect/version"
9
+
10
+ require "rack/crawler_detect"
11
+
12
+ module CrawlerDetect
13
+ class << self
14
+ def new(user_agent)
15
+ detector(user_agent)
16
+ end
17
+
18
+ def is_crawler?(user_agent)
19
+ detector(user_agent).is_crawler?
20
+ end
21
+
22
+ private
23
+
24
+ def detector(user_agent)
25
+ CrawlerDetect::Detector.new(user_agent)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CrawlerDetect
4
+ class Detector
5
+ def initialize(user_agent)
6
+ @user_agent = user_agent.dup
7
+ end
8
+
9
+ def is_crawler?
10
+ @is_crawler ||= begin
11
+ !completely_exclusion? && matches_crawler_list?
12
+ end
13
+ end
14
+
15
+ def crawler_name
16
+ return unless is_crawler?
17
+ @crawler_name
18
+ end
19
+
20
+ private
21
+
22
+ def completely_exclusion?
23
+ @user_agent.gsub!(exclusions_matcher, "")
24
+ @user_agent.strip.length == 0
25
+ end
26
+
27
+ def matches_crawler_list?
28
+ @crawler_name = crawlers_matcher.match(@user_agent).to_s
29
+ !@crawler_name.empty?
30
+ end
31
+
32
+ def exclusions_matcher
33
+ CrawlerDetect::Library.get_regexp("exclusions")
34
+ end
35
+
36
+ def crawlers_matcher
37
+ CrawlerDetect::Library.get_regexp("crawlers")
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CrawlerDetect
4
+ module Library
5
+ class << self
6
+ def get_regexp(param)
7
+ data = get_array(param)
8
+ %r[#{data.join('|')}]i
9
+ end
10
+
11
+ def get_array(param)
12
+ const_name = "CrawlerDetect::Library::#{param.capitalize}::#{param.upcase}"
13
+ const_get(const_name)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,1170 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rubocop:disable Layout/TrailingWhitespace
4
+ module CrawlerDetect
5
+ module Library
6
+ module Crawlers
7
+ CRAWLERS = %q[
8
+ .*Java.*outbrain
9
+ YLT
10
+ 008\/
11
+ 192\.comAgent
12
+ 2ip\.ru
13
+ 404checker
14
+ 404enemy
15
+ 80legs
16
+ ^b0t$
17
+ ^bluefish
18
+ ^Calypso v\/
19
+ ^COMODO DCV
20
+ ^DangDang
21
+ ^DavClnt
22
+ ^FDM
23
+ ^git\/
24
+ ^Goose\/
25
+ ^Grabber
26
+ ^HTTPClient\/
27
+ ^Java\/
28
+ ^Jeode\/
29
+ ^Jetty\/
30
+ ^Mail\/
31
+ ^Mget
32
+ ^Microsoft URL Control
33
+ ^NG\/[0-9\.]
34
+ ^NING\/
35
+ ^PHP\/[0-9]
36
+ ^RMA\/
37
+ ^Ruby|Ruby\/[0-9]
38
+ ^scrutiny\/
39
+ ^VSE\/[0-9]
40
+ ^WordPress\.com
41
+ ^XRL\/[0-9]
42
+ ^ZmEu
43
+ a3logics\.in
44
+ A6-Indexer
45
+ a\.pr-cy\.ru
46
+ Abonti
47
+ Aboundex
48
+ aboutthedomain
49
+ Accoona-AI-Agent
50
+ acoon
51
+ acrylicapps\.com\/pulp
52
+ Acunetix
53
+ AdAuth\/
54
+ adbeat
55
+ AddThis
56
+ ADmantX
57
+ adressendeutschland
58
+ adscanner\/
59
+ Advanced Email Extractor v
60
+ agentslug
61
+ AHC
62
+ aihit
63
+ aiohttp\/
64
+ Airmail
65
+ akka-http\/
66
+ akula\/
67
+ alertra
68
+ alexa site audit
69
+ Alibaba\.Security\.Heimdall
70
+ Alligator
71
+ allloadin\.com
72
+ AllSubmitter
73
+ alyze\.info
74
+ amagit
75
+ Anarchie
76
+ AndroidDownloadManager
77
+ Anemone
78
+ AngleSharp\/
79
+ Ant\.com
80
+ Anturis Agent
81
+ AnyEvent-HTTP\/
82
+ Apache Droid
83
+ Apache-HttpAsyncClient\/
84
+ Apache-HttpClient\/
85
+ ApacheBench\/
86
+ Apexoo
87
+ APIs-Google
88
+ AportWorm\/[0-9]
89
+ AppBeat\/[0-9]
90
+ AppEngine-Google
91
+ AppStoreScraperZ
92
+ Arachmo
93
+ arachnode
94
+ Arachnophilia
95
+ aria2
96
+ Arukereso
97
+ asafaweb.com
98
+ AskQuickly
99
+ ASPSeek
100
+ Asterias
101
+ Astute
102
+ asynchttp
103
+ Attach
104
+ autocite
105
+ Autonomy
106
+ axios\/
107
+ B-l-i-t-z-B-O-T
108
+ Backlink-Ceck
109
+ backlink-check
110
+ BackStreet
111
+ BackWeb
112
+ Bad-Neighborhood
113
+ Badass
114
+ baidu\.com
115
+ Bandit
116
+ BatchFTP
117
+ Battleztar\ Bazinga
118
+ baypup\/[0-9]
119
+ baypup\/colbert
120
+ BazQux
121
+ BBBike
122
+ BCKLINKS
123
+ BDFetch
124
+ BegunAdvertising\/
125
+ BigBozz
126
+ Bigfoot
127
+ biglotron
128
+ BingLocalSearch
129
+ BingPreview
130
+ binlar
131
+ biNu image cacher
132
+ Bitacle
133
+ biz_Directory
134
+ Black\ Hole
135
+ Blackboard Safeassign
136
+ BlackWidow
137
+ Bloglovin
138
+ BlogPulseLive
139
+ BlogSearch
140
+ Blogtrottr
141
+ BlowFish
142
+ Boardreader
143
+ boitho\.com-dc
144
+ BPImageWalker
145
+ Braintree-Webhooks
146
+ Branch Metrics API
147
+ Branch-Passthrough
148
+ Brandprotect
149
+ Brandwatch
150
+ Brodie\/
151
+ Browsershots
152
+ BUbiNG
153
+ Buck\/
154
+ Buddy
155
+ BuiltWith
156
+ Bullseye
157
+ BunnySlippers
158
+ Burf Search
159
+ Butterfly\/
160
+ BuzzSumo
161
+ CAAM\/[0-9]
162
+ CakePHP
163
+ Calculon
164
+ CapsuleChecker
165
+ CaretNail
166
+ catexplorador
167
+ cb crawl
168
+ CC Metadata Scaper
169
+ Cegbfeieh
170
+ Cerberian Drtrs
171
+ CERT\.at-Statistics-Survey
172
+ cg-eye
173
+ changedetection
174
+ ChangesMeter\/
175
+ Charlotte
176
+ CheckHost
177
+ checkprivacy
178
+ CherryPicker
179
+ ChinaClaw
180
+ Chirp\/[0-9]
181
+ chkme\.com
182
+ Chlooe
183
+ CirrusExplorer\/
184
+ CISPA Vulnerability Notification
185
+ Citoid
186
+ CJNetworkQuality
187
+ Clarsentia
188
+ clips\.ua\.ac\.be
189
+ Cloud\ mapping
190
+ CloudEndure
191
+ CloudFlare-AlwaysOnline
192
+ Cloudinary\/[0-9]
193
+ cmcm\.com
194
+ coccoc
195
+ cognitiveseo
196
+ colly -
197
+ CommaFeed
198
+ Commons-HttpClient
199
+ Comodo SSL Checker
200
+ contactbigdatafr
201
+ contentkingapp
202
+ convera
203
+ CookieReports\.com
204
+ copyright sheriff
205
+ CopyRightCheck
206
+ Copyscape
207
+ Cosmos4j\.feedback
208
+ Covario-IDS
209
+ CrawlForMe\/[0-9]
210
+ Crescent
211
+ cron-job\.org
212
+ Crowsnest
213
+ CSHttp
214
+ curb
215
+ Curious George
216
+ curl
217
+ cuwhois\/[0-9]
218
+ cybo\.com
219
+ DareBoost
220
+ DatabaseDriverMysqli
221
+ DataCha0s
222
+ DataparkSearch
223
+ dataprovider
224
+ DataXu
225
+ Daum(oa)?[ \/][0-9]
226
+ Demon
227
+ DeuSu
228
+ developers\.google\.com\/\+\/web\/snippet\/
229
+ Devil
230
+ Digg
231
+ Digincore
232
+ DigitalPebble
233
+ Dirbuster
234
+ Dispatch\/
235
+ DittoSpyder
236
+ dlvr
237
+ DMBrowser
238
+ DNS-Tools Header-Analyzer
239
+ DNSPod-reporting
240
+ docoloc
241
+ Dolphin http client\/
242
+ DomainAppender
243
+ Donuts Content Explorer
244
+ dotMailer content retrieval
245
+ dotSemantic
246
+ downforeveryoneorjustme
247
+ Download\ Wonder
248
+ downnotifier\.com
249
+ DowntimeDetector
250
+ Dragonfly File Reader
251
+ Drip
252
+ drupact
253
+ Drupal \(\+http:\/\/drupal\.org\/\)
254
+ DTS\ Agent
255
+ dubaiindex
256
+ EARTHCOM
257
+ Easy-Thumb
258
+ EasyDL
259
+ Ebingbong
260
+ ec2linkfinder
261
+ eCairn-Grabber
262
+ eCatch
263
+ ECCP
264
+ echocrawl
265
+ eContext\/
266
+ Ecxi
267
+ EirGrabber
268
+ ElectricMonk
269
+ elefent
270
+ EMail Exractor
271
+ Email%20Extractor%20Lite
272
+ EMail\ Wolf
273
+ EmailWolf
274
+ Embed PHP Library
275
+ Embedly
276
+ europarchive\.org
277
+ evc-batch
278
+ EventMachine HttpClient
279
+ Everwall Link Expander
280
+ Evidon
281
+ Evrinid
282
+ ExactSearch
283
+ ExaleadCloudview
284
+ Excel\/
285
+ Exif Viewer
286
+ ExperianCrawlUK
287
+ Exploratodo
288
+ Express WebPictures
289
+ ExtractorPro
290
+ Extreme\ Picture\ Finder
291
+ EyeNetIE
292
+ ezooms
293
+ facebookexternalhit
294
+ facebookplatform
295
+ fairshare
296
+ Faraday v
297
+ fasthttp
298
+ Faveeo
299
+ Favicon downloader
300
+ FavOrg
301
+ Feed Wrangler
302
+ Feedbin
303
+ FeedBooster
304
+ FeedBucket
305
+ FeedBunch\/[0-9]
306
+ FeedBurner
307
+ FeedChecker
308
+ Feedly
309
+ Feedspot
310
+ Feedwind\/[0-9]
311
+ feeltiptop
312
+ Fetch API
313
+ Fetch\/[0-9]
314
+ Fever\/[0-9]
315
+ FHscan
316
+ Fimap
317
+ findlink
318
+ findthatfile
319
+ FlashGet
320
+ FlipboardBrowserProxy
321
+ FlipboardProxy
322
+ FlipboardRSS
323
+ Flock\/
324
+ fluffy
325
+ Flunky
326
+ flynxapp
327
+ forensiq
328
+ FoundSeoTool\/[0-9]
329
+ free thumbnails
330
+ Freeuploader
331
+ FreeWebMonitoring SiteChecker
332
+ Funnelback
333
+ G-i-g-a-b-o-t
334
+ g00g1e\.net
335
+ GAChecker
336
+ ganarvisitas\/[0-9]
337
+ geek-tools
338
+ Genderanalyzer
339
+ Genieo
340
+ GentleSource
341
+ Getintent
342
+ GetLinkInfo
343
+ getprismatic\.com
344
+ GetRight
345
+ GetURLInfo\/[0-9]
346
+ GetWeb
347
+ Ghost Inspector
348
+ GigablastOpenSource
349
+ GIS-LABS
350
+ github-camo
351
+ github\.com\/
352
+ Go [\d\.]* package http
353
+ Go http package
354
+ Go!Zilla
355
+ Go-Ahead-Got-It
356
+ Go-http-client
357
+ gobyus
358
+ gofetch
359
+ GomezAgent
360
+ gooblog
361
+ Goodzer\/[0-9]
362
+ GoogleCloudMonitoring
363
+ Google favicon
364
+ Google Keyword Suggestion
365
+ Google Keyword Tool
366
+ Google Page Speed Insights
367
+ Google PP Default
368
+ Google Search Console
369
+ Google Web Preview
370
+ Google-Adwords
371
+ Google-Apps-Script
372
+ Google-Calendar-Importer
373
+ Google-HotelAdsVerifier
374
+ Google-HTTP-Java-Client
375
+ Google-Publisher-Plugin
376
+ Google-SearchByImage
377
+ Google-Site-Verification
378
+ Google-Structured-Data-Testing-Tool
379
+ Google-Youtube-Links
380
+ google_partner_monitoring
381
+ GoogleDocs
382
+ GoogleHC\/
383
+ GoogleProducer
384
+ Gookey
385
+ GoScraper
386
+ GoSpotCheck
387
+ GoSquared-Status-Checker
388
+ gosquared-thumbnailer
389
+ Gotit
390
+ GoZilla
391
+ grabify
392
+ GrabNet
393
+ Grafula
394
+ Grammarly
395
+ GrapeFX
396
+ grokkit
397
+ grouphigh
398
+ grub-client
399
+ gSOAP\/
400
+ GT::WWW
401
+ GTmetrix
402
+ GuzzleHttp
403
+ gvfs\/
404
+ HAA(A)?RTLAND http client
405
+ Haansoft
406
+ hackney\/
407
+ Hatena
408
+ Havij
409
+ hawkReader
410
+ HEADMasterSEO
411
+ HeartRails_Capture
412
+ help@dataminr\.com
413
+ heritrix
414
+ historious\/
415
+ hledejLevne\.cz\/[0-9]
416
+ Hloader
417
+ HMView
418
+ Holmes
419
+ HonesoSearchEngine\/
420
+ HootSuite Image proxy
421
+ Hootsuite-WebFeed\/[0-9]
422
+ hosterstats
423
+ HostTracker
424
+ ht:\/\/check
425
+ htdig
426
+ HTMLparser
427
+ http-get
428
+ HTTP-Header-Abfrage
429
+ http-kit
430
+ http-request\/
431
+ HTTP-Tiny
432
+ HTTP::Lite
433
+ http\.rb\/
434
+ HTTP_Compression_Test
435
+ http_request2
436
+ http_requester
437
+ HttpComponents
438
+ httphr
439
+ HTTPMon
440
+ httpscheck
441
+ httpssites_power
442
+ httpunit
443
+ HttpUrlConnection
444
+ httrack
445
+ huaweisymantec
446
+ HubPages.*crawlingpolicy
447
+ HubSpot
448
+ Humanlinks
449
+ HyperZbozi.cz Feeder
450
+ i2kconnect\/
451
+ Iblog
452
+ ichiro
453
+ Id-search
454
+ IdeelaborPlagiaat
455
+ IDG Twitter Links Resolver
456
+ IDwhois\/[0-9]
457
+ Iframely
458
+ igdeSpyder
459
+ IlTrovatore
460
+ Image\ Fetch
461
+ Image\ Sucker
462
+ ImageEngine\/
463
+ Imagga
464
+ imgsizer
465
+ InAGist
466
+ inbound\.li parser
467
+ InDesign%20CC
468
+ Indy\ Library
469
+ infegy
470
+ infohelfer
471
+ InfoTekies
472
+ InfoWizards Reciprocal Link System PRO
473
+ inpwrd\.com
474
+ instabid
475
+ Instapaper
476
+ Integrity
477
+ integromedb
478
+ Intelliseek
479
+ InterGET
480
+ Internet\ Ninja
481
+ internet_archive
482
+ InternetSeer
483
+ internetVista monitor
484
+ intraVnews
485
+ IODC
486
+ IOI
487
+ iplabel
488
+ ips-agent
489
+ IPS\/[0-9]
490
+ IPWorks HTTP\/S Component
491
+ iqdb\/
492
+ Iria
493
+ Irokez
494
+ isitup\.org
495
+ iskanie
496
+ iZSearch
497
+ janforman
498
+ Jaunt\/
499
+ Jbrofuzz
500
+ Jersey\/
501
+ JetCar
502
+ Jigsaw
503
+ Jobboerse
504
+ JobFeed discovery
505
+ Jobg8 URL Monitor
506
+ jobo
507
+ Jobrapido
508
+ Jobsearch1\.5
509
+ JoinVision Generic
510
+ JolokiaPwn
511
+ Joomla
512
+ Jorgee
513
+ JS-Kit
514
+ JustView
515
+ Kaspersky Lab CFR link resolver
516
+ KeepRight OpenStreetMap Checker
517
+ Kelny\/
518
+ Kerrigan\/
519
+ KeyCDN
520
+ Keyword Extractor
521
+ Keyword\ Density
522
+ Keywords Research
523
+ KickFire
524
+ KimonoLabs\/
525
+ Kml-Google
526
+ knows\.is
527
+ KOCMOHABT
528
+ kouio
529
+ kube-probe
530
+ kulturarw3
531
+ KumKie
532
+ L\.webis
533
+ Larbin
534
+ Lavf\/
535
+ LayeredExtractor
536
+ LeechFTP
537
+ LeechGet
538
+ letsencrypt
539
+ Lftp
540
+ LibVLC
541
+ LibWeb
542
+ Libwhisker
543
+ libwww
544
+ Licorne Image Snapshot
545
+ Liferea\/
546
+ Lightspeedsystems
547
+ Likse
548
+ link checker
549
+ Link Valet
550
+ link_thumbnailer
551
+ LinkAlarm\/
552
+ linkCheck
553
+ linkdex
554
+ LinkExaminer
555
+ linkfluence
556
+ linkpeek
557
+ LinkPreviewGenerator
558
+ LinkScan
559
+ LinksManager
560
+ LinkTiger
561
+ LinkWalker
562
+ Lipperhey
563
+ Litemage_walker
564
+ livedoor ScreenShot
565
+ LoadImpactRload
566
+ LongURL API
567
+ looksystems\.net
568
+ ltx71
569
+ lua-resty-http
570
+ lwp-request
571
+ lwp-trivial
572
+ LWP::Simple
573
+ lycos
574
+ LYT\.SR
575
+ mabontland
576
+ Mag-Net
577
+ MagpieRSS
578
+ Mail.Ru
579
+ MailChimp
580
+ Majestic12
581
+ makecontact\/
582
+ Mandrill
583
+ MapperCmd
584
+ marketinggrader
585
+ MarkMonitor
586
+ MarkWatch
587
+ Mass\ Downloader
588
+ masscan\/[0-9]
589
+ Mata\ Hari
590
+ Mediapartners-Google
591
+ mediawords
592
+ MegaIndex\.ru
593
+ Melvil Rawi\/
594
+ MergeFlow-PageReader
595
+ Metaspinner
596
+ MetaURI
597
+ MFC_Tear_Sample
598
+ Microsearch
599
+ Microsoft Office
600
+ Microsoft Outlook
601
+ Microsoft Windows Network Diagnostics
602
+ Microsoft-WebDAV-MiniRedir
603
+ Microsoft\ Data\ Access
604
+ MIDown\ tool
605
+ MIIxpc
606
+ Mindjet
607
+ Miniature.io\/
608
+ Miniflux
609
+ Mister\ PiX
610
+ mixdata dot com
611
+ mixed-content-scan
612
+ mixnode
613
+ Mnogosearch
614
+ mogimogi
615
+ Mojeek
616
+ Mojolicious \(Perl\)
617
+ monitis
618
+ Monitority\/[0-9]
619
+ montastic
620
+ MonTools
621
+ Moreover
622
+ Morfeus\ Fucking\ Scanner
623
+ Morning Paper
624
+ MovableType
625
+ mowser
626
+ Mrcgiguy
627
+ MS\ Web\ Services\ Client\ Protocol
628
+ MSFrontPage
629
+ mShots
630
+ MuckRack\/
631
+ muhstik-scan
632
+ MVAClient
633
+ MxToolbox\/
634
+ nagios
635
+ Najdi\.si\/
636
+ Name\ Intelligence
637
+ Nameprotect
638
+ Navroad
639
+ NearSite
640
+ Needle
641
+ Nessus
642
+ Net\ Vampire
643
+ NetAnts
644
+ NETCRAFT
645
+ NetLyzer
646
+ NetMechanic
647
+ Netpursual
648
+ netresearch
649
+ NetShelter ContentScan
650
+ Netsparker
651
+ NetTrack
652
+ Netvibes
653
+ NetZIP
654
+ Neustar WPM
655
+ NeutrinoAPI
656
+ NewRelicPinger\/1.0 \(\d+\)
657
+ NewsBlur .*Finder
658
+ NewsGator
659
+ newsme
660
+ newspaper\/
661
+ Nexgate Ruby Client
662
+ NG-Search
663
+ Nibbler
664
+ NICErsPRO
665
+ Nikto
666
+ nineconnections\.com
667
+ NLNZ_IAHarvester
668
+ Nmap Scripting Engine
669
+ node-superagent
670
+ node-urllib\/
671
+ node\.io
672
+ nominet\.org\.uk
673
+ Norton-Safeweb
674
+ Notifixious
675
+ notifyninja
676
+ nuhk
677
+ nutch
678
+ Nuzzel
679
+ nWormFeedFinder
680
+ Nymesis
681
+ NYU
682
+ Ocelli\/[0-9]
683
+ Octopus
684
+ oegp
685
+ Offline Explorer
686
+ Offline\ Navigator
687
+ okhttp
688
+ Omea Reader
689
+ omgili
690
+ OMSC
691
+ Online Domain Tools
692
+ OpenCalaisSemanticProxy
693
+ Openfind
694
+ OpenLinkProfiler
695
+ Openstat\/
696
+ OpenVAS
697
+ Optimizer
698
+ Orbiter
699
+ OrgProbe\/[0-9]
700
+ orion-semantics
701
+ Outlook-Express
702
+ ow\.ly
703
+ Owler
704
+ ownCloud News
705
+ OxfordCloudService\/[0-9]
706
+ Page Analyzer
707
+ Page Valet
708
+ page2rss
709
+ page\ scorer
710
+ page_verifier
711
+ PageAnalyzer
712
+ PageGrabber
713
+ PagePeeker
714
+ PageScorer
715
+ Pagespeed\/[0-9]
716
+ Panopta
717
+ panscient
718
+ Papa\ Foto
719
+ parsijoo
720
+ Pavuk
721
+ PayPal IPN
722
+ pcBrowser
723
+ Pcore-HTTP
724
+ PEAR HTTPRequest
725
+ Pearltrees
726
+ PECL::HTTP
727
+ peerindex
728
+ Peew
729
+ PeoplePal
730
+ Perlu -
731
+ PhantomJS Screenshoter
732
+ PhantomJS\/
733
+ Photon\/
734
+ phpcrawl
735
+ phpservermon
736
+ Pi-Monster
737
+ Picscout
738
+ Picsearch
739
+ PictureFinder
740
+ Pimonster
741
+ ping\.blo\.gs\/
742
+ Pingability
743
+ Pingdom
744
+ Pingoscope
745
+ PingSpot
746
+ pinterest\.com
747
+ Pixray
748
+ Pizilla
749
+ PleaseCrawl
750
+ Ploetz \+ Zeller
751
+ Plukkie
752
+ plumanalytics
753
+ PocketParser
754
+ Pockey
755
+ POE-Component-Client-HTTP
756
+ Pompos
757
+ Porkbun
758
+ Port Monitor
759
+ postano
760
+ PostmanRuntime\/
761
+ PostPost
762
+ postrank
763
+ PowerPoint\/
764
+ Priceonomics Analysis Engine
765
+ PrintFriendly\.com
766
+ PritTorrent\/[0-9]
767
+ Prlog
768
+ probethenet
769
+ Project 25499
770
+ Promotion_Tools_www.searchenginepromotionhelp.com
771
+ prospectb2b
772
+ Protopage
773
+ ProWebWalker
774
+ proximic
775
+ PRTG Network Monitor
776
+ pshtt, https scanning
777
+ PTST
778
+ PTST\/[0-9]+
779
+ Pulsepoint XT3 web scraper
780
+ Pump
781
+ Python-httplib2
782
+ python-requests
783
+ Python-urllib
784
+ Qirina Hurdler
785
+ QQDownload
786
+ QrafterPro
787
+ Qseero
788
+ Qualidator.com SiteAnalyzer
789
+ QueryN\ Metasearch
790
+ Quora Link Preview
791
+ Qwantify
792
+ Radian6
793
+ RankActive
794
+ RankFlex
795
+ RankSonicSiteAuditor
796
+ Readability
797
+ RealDownload
798
+ RealPlayer%20Downloader
799
+ RebelMouse
800
+ Recorder
801
+ RecurPost\/
802
+ redback\/
803
+ Redirect Checker Tool
804
+ ReederForMac
805
+ ReGet
806
+ RepoMonkey
807
+ request\.js
808
+ ResponseCodeTest\/[0-9]
809
+ RestSharp
810
+ Riddler
811
+ Rival IQ
812
+ Robosourcer
813
+ Robozilla\/[0-9]
814
+ ROI Hunter
815
+ RPT-HTTPClient
816
+ RSSOwl
817
+ safe-agent-scanner
818
+ SalesIntelligent
819
+ Saleslift
820
+ SauceNAO
821
+ SBIder
822
+ scalaj-http
823
+ scan\.lol
824
+ ScanAlert
825
+ Scoop
826
+ scooter
827
+ ScoutJet
828
+ ScoutURLMonitor
829
+ Scrapy
830
+ Screaming
831
+ ScreenShotService\/[0-9]
832
+ Scrubby
833
+ Search37\/
834
+ search\.thunderstone
835
+ Searchestate
836
+ SearchSight
837
+ Seeker
838
+ semanticdiscovery
839
+ semanticjuice
840
+ Semiocast HTTP client
841
+ Semrush
842
+ sentry\/
843
+ SEO Browser
844
+ Seo Servis
845
+ seo-nastroj.cz
846
+ Seobility
847
+ SEOCentro
848
+ SeoCheck
849
+ SEOkicks
850
+ Seomoz
851
+ SEOprofiler
852
+ SeopultContentAnalyzer
853
+ seoscanners
854
+ SEOstats
855
+ Server Density Service Monitoring
856
+ servernfo\.com
857
+ SetCronJob\/
858
+ sexsearcher
859
+ Seznam
860
+ Shelob
861
+ Shodan
862
+ Shoppimon Analyzer
863
+ ShoppimonAgent\/[0-9]
864
+ ShopWiki
865
+ ShortLinkTranslate
866
+ shrinktheweb
867
+ Sideqik
868
+ SilverReader
869
+ SimplePie
870
+ SimplyFast
871
+ Siphon
872
+ SISTRIX
873
+ Site-Shot\/
874
+ Site24x7
875
+ Site\ Sucker
876
+ SiteBar
877
+ Sitebeam
878
+ Sitebulb\/
879
+ SiteCondor
880
+ SiteExplorer
881
+ SiteGuardian
882
+ Siteimprove
883
+ SiteIndexed
884
+ Sitemap(s)? Generator
885
+ SiteMonitor
886
+ Siteshooter B0t
887
+ SiteSnagger
888
+ SiteSucker
889
+ SiteTruth
890
+ Sitevigil
891
+ sitexy\.com
892
+ SkypeUriPreview
893
+ Slack\/
894
+ slider\.com
895
+ slurp
896
+ SlySearch
897
+ SmartDownload
898
+ SMRF URL Expander
899
+ SMUrlExpander
900
+ Snake
901
+ Snappy
902
+ SniffRSS
903
+ sniptracker
904
+ Snoopy
905
+ SnowHaze Search
906
+ sogou web
907
+ SortSite
908
+ Sottopop
909
+ sovereign\.ai
910
+ SpaceBison
911
+ Spammen
912
+ Spanner
913
+ spaziodati
914
+ SPDYCheck
915
+ Specificfeeds
916
+ speedy
917
+ SPEng
918
+ Spinn3r
919
+ spray-can
920
+ Sprinklr
921
+ spyonweb
922
+ sqlmap
923
+ Sqlworm
924
+ Sqworm
925
+ SSL Labs
926
+ ssl-tools
927
+ StackRambler
928
+ Statastico\/
929
+ StatusCake
930
+ Steeler
931
+ Stratagems Kumo
932
+ Stroke.cz
933
+ StudioFACA
934
+ suchen
935
+ Sucuri
936
+ summify
937
+ Super Monitoring
938
+ SuperHTTP
939
+ Surphace Scout
940
+ Suzuran
941
+ SwiteScraper
942
+ Symfony BrowserKit
943
+ Symfony2 BrowserKit
944
+ SynHttpClient-Built
945
+ Sysomos
946
+ sysscan
947
+ Szukacz
948
+ T0PHackTeam
949
+ tAkeOut
950
+ Tarantula\/
951
+ Taringa UGC
952
+ Teleport
953
+ Telesoft
954
+ Telesphoreo
955
+ Telesphorep
956
+ Tenon\.io
957
+ teoma
958
+ terrainformatica\.com
959
+ Test Certificate Info
960
+ Tetrahedron\/[0-9]
961
+ The Drop Reaper
962
+ The Expert HTML Source Viewer
963
+ The Knowledge AI
964
+ The\ Intraformant
965
+ theinternetrules
966
+ TheNomad
967
+ theoldreader\.com
968
+ Thinklab
969
+ Thumbshots
970
+ ThumbSniper
971
+ TinEye
972
+ Tiny Tiny RSS
973
+ TLSProbe\/
974
+ Toata
975
+ topster
976
+ touche.com
977
+ Traackr.com
978
+ TrapitAgent
979
+ Trendiction
980
+ Trendsmap Resolver
981
+ trendspottr\.com
982
+ truwoGPS
983
+ TulipChain
984
+ Turingos
985
+ Turnitin
986
+ tweetedtimes\.com
987
+ Tweetminster
988
+ Tweezler\/
989
+ twibble
990
+ Twice
991
+ Twikle
992
+ Twingly
993
+ Twisted PageGetter
994
+ Typhoeus
995
+ ubermetrics-technologies
996
+ uclassify
997
+ uCrawlr\/
998
+ UdmSearch
999
+ UniversalFeedParser
1000
+ Unshorten\.It\!\/[0-9]
1001
+ Untiny
1002
+ UnwindFetchor
1003
+ updated
1004
+ updown\.io daemon
1005
+ Upflow
1006
+ Uptimia
1007
+ URL Verifier
1008
+ URLChecker
1009
+ URLitor.com
1010
+ urlresolver
1011
+ Urlstat
1012
+ UrlTrends Ranking Updater
1013
+ URLy\ Warning
1014
+ URLy\.Warning
1015
+ Vacuum
1016
+ Vagabondo
1017
+ VB\ Project
1018
+ vBSEO
1019
+ VCI
1020
+ via ggpht\.com GoogleImageProxy
1021
+ VidibleScraper
1022
+ Virusdie
1023
+ visionutils
1024
+ vkShare
1025
+ VoidEYE
1026
+ Voil
1027
+ voltron
1028
+ voyager\/
1029
+ VSAgent\/[0-9]
1030
+ VSB-TUO\/[0-9]
1031
+ Vulnbusters Meter
1032
+ VYU2
1033
+ w3af\.org
1034
+ W3C-checklink
1035
+ W3C-mobileOK
1036
+ W3C_I18n-Checker
1037
+ W3C_Unicorn
1038
+ Wallpapers\/[0-9]+
1039
+ WallpapersHD
1040
+ wangling
1041
+ Wappalyzer
1042
+ WatchMouse
1043
+ WbSrch\/
1044
+ web-capture\.net
1045
+ Web-Monitoring
1046
+ Web-sniffer
1047
+ Web\ Auto
1048
+ Web\ Collage
1049
+ Web\ Enhancer
1050
+ Web\ Fetch
1051
+ Web\ Fuck
1052
+ Web\ Pix
1053
+ Web\ Sauger
1054
+ Web\ Sucker
1055
+ Webalta
1056
+ Webauskunft
1057
+ WebAuto
1058
+ WebCapture
1059
+ WebClient\/
1060
+ webcollage
1061
+ WebCookies
1062
+ WebCopier
1063
+ WebCorp
1064
+ WebDoc
1065
+ WebEnhancer
1066
+ WebFetch
1067
+ WebFuck
1068
+ WebGo\ IS
1069
+ WebImageCollector
1070
+ WebImages
1071
+ WebIndex
1072
+ webkit2png
1073
+ WebLeacher
1074
+ webmastercoffee
1075
+ webmon
1076
+ WebPix
1077
+ WebReaper
1078
+ WebSauger
1079
+ webscreenie
1080
+ Webshag
1081
+ Webshot
1082
+ Website Analyzer\/
1083
+ Website\ Quester
1084
+ WebsiteExtractor
1085
+ websitepulse agent
1086
+ websitepulse[+ ]checker
1087
+ WebsiteQuester
1088
+ Websnapr\/
1089
+ Webster
1090
+ WebStripper
1091
+ WebSucker
1092
+ Webthumb\/[0-9]
1093
+ WebThumbnail
1094
+ WebWhacker
1095
+ WebZIP
1096
+ WeCrawlForThePeace
1097
+ WeLikeLinks
1098
+ WEPA
1099
+ WeSEE
1100
+ wf84
1101
+ Wfuzz\/
1102
+ wget
1103
+ WhatsApp
1104
+ WhatsMyIP
1105
+ WhatWeb
1106
+ WhereGoes\?
1107
+ Whibse
1108
+ WhoRunsCoinHive
1109
+ Whynder Magnet
1110
+ Windows-RSS-Platform
1111
+ WinHttpRequest
1112
+ wkhtmlto
1113
+ wmtips
1114
+ Woko
1115
+ Word\/
1116
+ WordPress\/
1117
+ wotbox
1118
+ WP Engine Install Performance API
1119
+ wpif
1120
+ wprecon\.com survey
1121
+ WPScan
1122
+ wscheck
1123
+ Wtrace
1124
+ WWW-Collector-E
1125
+ WWW-Mechanize
1126
+ WWW::Mechanize
1127
+ www\.monitor\.us
1128
+ WWWOFFLE
1129
+ x09Mozilla
1130
+ x22Mozilla
1131
+ XaxisSemanticsClassifier
1132
+ Xenu Link Sleuth
1133
+ XING-contenttabreceiver\/[0-9]
1134
+ XmlSitemapGenerator
1135
+ xpymep([0-9]?)\.exe
1136
+ Y!J-(ASR|BSC)
1137
+ Yaanb
1138
+ yacy
1139
+ Yahoo Ad monitoring
1140
+ Yahoo Link Preview
1141
+ YahooCacheSystem
1142
+ YahooYSMcm
1143
+ YandeG
1144
+ Yandex(?!Search)
1145
+ yanga
1146
+ yeti
1147
+ Yo-yo
1148
+ Yoleo Consumer
1149
+ yoogliFetchAgent
1150
+ YottaaMonitor
1151
+ Your-Website-Sucks\/[0-9]
1152
+ yourls\.org
1153
+ Zade
1154
+ Zao
1155
+ Zauba
1156
+ Zemanta Aggregator
1157
+ Zend\\\\Http\\\\Client
1158
+ Zend_Http_Client
1159
+ Zermelo
1160
+ Zeus
1161
+ zgrab
1162
+ ZnajdzFoto
1163
+ Zombie\.js
1164
+ ZyBorg
1165
+ SpamExperts
1166
+ [a-z0-9\-_]*(bot|crawler|archiver|transcoder|spider|uptime|validator|fetcher)
1167
+ ].strip.split(/\n+/).freeze
1168
+ end
1169
+ end
1170
+ end