crawler_detect 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 4d6a083297a36f03c1ac9af1e0d7bf644faa228a04d1e0c310be521ca396e430
4
+ data.tar.gz: e089000e6f93f8ac54646855a719a63279cbed7f24dc5fae94d6c9f388cf823d
5
+ SHA512:
6
+ metadata.gz: c8008174d874b18ca0ae75224ede7287e92c5e5e82d81778f131f7eafb030935bb79e47814d12e90d3e72a8e17e6b1b2584621bbf81579e92654ad3558ca8325
7
+ data.tar.gz: 6c15fc963c79b145a32c174275ba12b39ee3ac3f206eb6643cae651c3ea9171ab3fa35f98f77f63d3d1985f2d202c1d8b04ba588fde543f84e68fca94ff8a40a
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format Fuubar
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,174 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.2
3
+ # RuboCop has a bunch of cops enabled by default. This setting tells RuboCop
4
+ # to ignore them, so only the ones explicitly set in this file are enabled.
5
+ DisabledByDefault: true
6
+ Exclude:
7
+ - '**/templates/**/*'
8
+ - '**/vendor/**/*'
9
+ - '**/vendor/**/.*'
10
+ - '**/node_modules/**/*'
11
+ - 'actionpack/lib/action_dispatch/journey/parser.rb'
12
+
13
+ # Prefer assert_not_x over refute_x
14
+ CustomCops/RefuteNot:
15
+ Include:
16
+ - '**/test/**/*'
17
+
18
+ # Prefer &&/|| over and/or.
19
+ Style/AndOr:
20
+ Enabled: true
21
+
22
+ # Do not use braces for hash literals when they are the last argument of a
23
+ # method call.
24
+ Style/BracesAroundHashParameters:
25
+ Enabled: true
26
+ EnforcedStyle: context_dependent
27
+
28
+ # Align `when` with `case`.
29
+ Layout/CaseIndentation:
30
+ Enabled: true
31
+
32
+ # Align comments with method definitions.
33
+ Layout/CommentIndentation:
34
+ Enabled: true
35
+
36
+ Layout/ElseAlignment:
37
+ Enabled: true
38
+
39
+ # Align `end` with the matching keyword or starting expression except for
40
+ # assignments, where it should be aligned with the LHS.
41
+ Layout/EndAlignment:
42
+ Enabled: true
43
+ EnforcedStyleAlignWith: variable
44
+ AutoCorrect: true
45
+
46
+ Layout/EmptyLineAfterMagicComment:
47
+ Enabled: true
48
+
49
+ # In a regular class definition, no empty lines around the body.
50
+ Layout/EmptyLinesAroundClassBody:
51
+ Enabled: true
52
+
53
+ # In a regular method definition, no empty lines around the body.
54
+ Layout/EmptyLinesAroundMethodBody:
55
+ Enabled: true
56
+
57
+ # In a regular module definition, no empty lines around the body.
58
+ Layout/EmptyLinesAroundModuleBody:
59
+ Enabled: true
60
+
61
+ Layout/FirstParameterIndentation:
62
+ Enabled: true
63
+
64
+ # Use Ruby >= 1.9 syntax for hashes. Prefer { a: :b } over { :a => :b }.
65
+ Style/HashSyntax:
66
+ Enabled: true
67
+
68
+ # Method definitions after `private` or `protected` isolated calls need one
69
+ # extra level of indentation.
70
+ Layout/IndentationConsistency:
71
+ Enabled: true
72
+ EnforcedStyle: rails
73
+
74
+ # Two spaces, no tabs (for indentation).
75
+ Layout/IndentationWidth:
76
+ Enabled: true
77
+
78
+ Layout/LeadingCommentSpace:
79
+ Enabled: true
80
+
81
+ Layout/SpaceAfterColon:
82
+ Enabled: true
83
+
84
+ Layout/SpaceAfterComma:
85
+ Enabled: true
86
+
87
+ Layout/SpaceAroundEqualsInParameterDefault:
88
+ Enabled: true
89
+
90
+ Layout/SpaceAroundKeyword:
91
+ Enabled: true
92
+
93
+ Layout/SpaceAroundOperators:
94
+ Enabled: true
95
+
96
+ Layout/SpaceBeforeComma:
97
+ Enabled: true
98
+
99
+ Layout/SpaceBeforeFirstArg:
100
+ Enabled: true
101
+
102
+ Style/DefWithParentheses:
103
+ Enabled: true
104
+
105
+ # Defining a method with parameters needs parentheses.
106
+ Style/MethodDefParentheses:
107
+ Enabled: true
108
+
109
+ Style/FrozenStringLiteralComment:
110
+ Enabled: true
111
+ EnforcedStyle: always
112
+ Exclude:
113
+ - 'actionview/test/**/*.builder'
114
+ - 'actionview/test/**/*.ruby'
115
+ - 'actionpack/test/**/*.builder'
116
+ - 'actionpack/test/**/*.ruby'
117
+ - 'activestorage/db/migrate/**/*.rb'
118
+ - 'db/migrate/**/*.rb'
119
+ - 'db/*.rb'
120
+
121
+ # Use `foo {}` not `foo{}`.
122
+ Layout/SpaceBeforeBlockBraces:
123
+ Enabled: true
124
+
125
+ # Use `foo { bar }` not `foo {bar}`.
126
+ Layout/SpaceInsideBlockBraces:
127
+ Enabled: true
128
+
129
+ # Use `{ a: 1 }` not `{a:1}`.
130
+ Layout/SpaceInsideHashLiteralBraces:
131
+ Enabled: true
132
+
133
+ Layout/SpaceInsideParens:
134
+ Enabled: true
135
+
136
+ # Check quotes usage according to lint rule below.
137
+ Style/StringLiterals:
138
+ Enabled: true
139
+ EnforcedStyle: double_quotes
140
+
141
+ # Detect hard tabs, no hard tabs.
142
+ Layout/Tab:
143
+ Enabled: true
144
+
145
+ # Blank lines should not have any spaces.
146
+ Layout/TrailingBlankLines:
147
+ Enabled: true
148
+
149
+ # No trailing whitespace.
150
+ Layout/TrailingWhitespace:
151
+ Enabled: true
152
+
153
+ # Use quotes for string literals when they are enough.
154
+ Style/UnneededPercentQ:
155
+ Enabled: true
156
+
157
+ # Use my_method(my_arg) not my_method( my_arg ) or my_method my_arg.
158
+ Lint/RequireParentheses:
159
+ Enabled: true
160
+
161
+ Lint/StringConversionInInterpolation:
162
+ Enabled: true
163
+
164
+ Style/RedundantReturn:
165
+ Enabled: true
166
+ AllowMultipleReturnValues: true
167
+
168
+ Style/Semicolon:
169
+ Enabled: true
170
+ AllowAsExpressionSeparator: true
171
+
172
+ # Prefer Foo.method over Foo::method
173
+ Style/ColonMethodCall:
174
+ Enabled: true
data/.travis.yml ADDED
@@ -0,0 +1,12 @@
1
+ ---
2
+ sudo: false
3
+ language: ruby
4
+ cache: bundler
5
+ rvm:
6
+ - 2.2
7
+ - 2.3
8
+ - 2.4
9
+ - 2.5
10
+ before_install: gem install bundler -v 1.16.2
11
+ script:
12
+ - bundle exec parallel_rspec spec/
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
+
7
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Pavel Kozlov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,56 @@
1
+ # CrawlerDetect
2
+
3
+ [![Build Status](https://travis-ci.org/loadkpi/crawler_detect.svg?branch=master)](https://travis-ci.org/loadkpi/crawler_detect)
4
+
5
+ ## About
6
+ **CrawlerDetect** is a Ruby version of PHP class @[CrawlerDetect](https://github.com/JayBizzle/Crawler-Detect).
7
+
8
+ It helps to detect bots/crawlers/spiders via the user agent and other HTTP-headers. Currently able to detect 1,000's of bots/spiders/crawlers.
9
+ ### Why CrawlerDetect?
10
+ Comparing with other popular bot-detection gems:
11
+
12
+ | | CrawlerDetect | Voight-Kampff | Browser |
13
+ |--|--|--|--|
14
+ | Number of bot-patterns | >1000 | ~280 | ~280 |
15
+ | Number of checked HTTP-headers | 10 | 1 | 1 |
16
+ | Number of updates of bot-list *(1st half of 2018)* | 14 | 1 | 7 |
17
+
18
+ ## Installation
19
+ Add this line to your application's Gemfile:
20
+
21
+ `gem 'crawler_detect'`
22
+ ## Basic Usage
23
+ ```
24
+ CrawlerDetect.is_crawler?("Bot user agent")
25
+ => true
26
+ ```
27
+ Or if you need crawler name:
28
+ ```
29
+ detector = CrawlerDetect.new("Googlebot/2.1 (http://www.google.com/bot.html)")
30
+ detector.is_crawler?
31
+ => true
32
+ detector.crawler_name
33
+ => "Googlebot"
34
+ ```
35
+ ## Rack::Request extension
36
+ **Optionally** you can add additional methods for `request`:
37
+ ```
38
+ request.is_crawler?
39
+ => false
40
+ request.crawler_name
41
+ => nil
42
+ ```
43
+ Only one thing you have to do is to configure `Rack::CrawlerDetect` midleware:
44
+ ### Rails
45
+ ```
46
+ class Application < Rails::Application
47
+ ...
48
+ config.middleware.use Rack::CrawlerDetect
49
+ end
50
+ ```
51
+ ### Rake
52
+ ```
53
+ use Rack::CrawlerDetect
54
+ ```
55
+ ## License
56
+ MIT License
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path("../lib", __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require "crawler_detect/version"
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "crawler_detect"
9
+ spec.version = CrawlerDetect::VERSION
10
+ spec.authors = ["Pavel Kozlov"]
11
+ spec.email = ["loadkpi@gmail.com"]
12
+
13
+ spec.summary = "CrawlerDetect: detect bots/crawlers"
14
+ spec.description = "CrawlerDetect is a library to detect bots/crawlers via the user agent"
15
+ spec.homepage = "https://github.com/loadkpi/crawler_detect"
16
+ spec.license = "MIT"
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ end
23
+ spec.bindir = "exe"
24
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
+ spec.require_paths = ["lib"]
26
+
27
+ spec.add_development_dependency "bundler", "~> 1.15"
28
+ spec.add_development_dependency "rake", "~> 10.0"
29
+ spec.add_development_dependency "rspec", "~> 3.0"
30
+
31
+ spec.add_development_dependency "fuubar", "~> 2.0"
32
+ spec.add_development_dependency "parallel_tests", "~> 2.0"
33
+ spec.add_development_dependency "pry-meta", "~> 0.0.10"
34
+ spec.add_development_dependency "rack-test", "~> 1.1"
35
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "crawler_detect/detector"
4
+ require "crawler_detect/library"
5
+ require "crawler_detect/library/crawlers"
6
+ require "crawler_detect/library/exclusions"
7
+ require "crawler_detect/library/headers"
8
+ require "crawler_detect/version"
9
+
10
+ require "rack/crawler_detect"
11
+
12
+ module CrawlerDetect
13
+ class << self
14
+ def new(user_agent)
15
+ detector(user_agent)
16
+ end
17
+
18
+ def is_crawler?(user_agent)
19
+ detector(user_agent).is_crawler?
20
+ end
21
+
22
+ private
23
+
24
+ def detector(user_agent)
25
+ CrawlerDetect::Detector.new(user_agent)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CrawlerDetect
4
+ class Detector
5
+ def initialize(user_agent)
6
+ @user_agent = user_agent.dup
7
+ end
8
+
9
+ def is_crawler?
10
+ @is_crawler ||= begin
11
+ !completely_exclusion? && matches_crawler_list?
12
+ end
13
+ end
14
+
15
+ def crawler_name
16
+ return unless is_crawler?
17
+ @crawler_name
18
+ end
19
+
20
+ private
21
+
22
+ def completely_exclusion?
23
+ @user_agent.gsub!(exclusions_matcher, "")
24
+ @user_agent.strip.length == 0
25
+ end
26
+
27
+ def matches_crawler_list?
28
+ @crawler_name = crawlers_matcher.match(@user_agent).to_s
29
+ !@crawler_name.empty?
30
+ end
31
+
32
+ def exclusions_matcher
33
+ CrawlerDetect::Library.get_regexp("exclusions")
34
+ end
35
+
36
+ def crawlers_matcher
37
+ CrawlerDetect::Library.get_regexp("crawlers")
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CrawlerDetect
4
+ module Library
5
+ class << self
6
+ def get_regexp(param)
7
+ data = get_array(param)
8
+ %r[#{data.join('|')}]i
9
+ end
10
+
11
+ def get_array(param)
12
+ const_name = "CrawlerDetect::Library::#{param.capitalize}::#{param.upcase}"
13
+ const_get(const_name)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,1170 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rubocop:disable Layout/TrailingWhitespace
4
+ module CrawlerDetect
5
+ module Library
6
+ module Crawlers
7
+ CRAWLERS = %q[
8
+ .*Java.*outbrain
9
+ YLT
10
+ 008\/
11
+ 192\.comAgent
12
+ 2ip\.ru
13
+ 404checker
14
+ 404enemy
15
+ 80legs
16
+ ^b0t$
17
+ ^bluefish
18
+ ^Calypso v\/
19
+ ^COMODO DCV
20
+ ^DangDang
21
+ ^DavClnt
22
+ ^FDM
23
+ ^git\/
24
+ ^Goose\/
25
+ ^Grabber
26
+ ^HTTPClient\/
27
+ ^Java\/
28
+ ^Jeode\/
29
+ ^Jetty\/
30
+ ^Mail\/
31
+ ^Mget
32
+ ^Microsoft URL Control
33
+ ^NG\/[0-9\.]
34
+ ^NING\/
35
+ ^PHP\/[0-9]
36
+ ^RMA\/
37
+ ^Ruby|Ruby\/[0-9]
38
+ ^scrutiny\/
39
+ ^VSE\/[0-9]
40
+ ^WordPress\.com
41
+ ^XRL\/[0-9]
42
+ ^ZmEu
43
+ a3logics\.in
44
+ A6-Indexer
45
+ a\.pr-cy\.ru
46
+ Abonti
47
+ Aboundex
48
+ aboutthedomain
49
+ Accoona-AI-Agent
50
+ acoon
51
+ acrylicapps\.com\/pulp
52
+ Acunetix
53
+ AdAuth\/
54
+ adbeat
55
+ AddThis
56
+ ADmantX
57
+ adressendeutschland
58
+ adscanner\/
59
+ Advanced Email Extractor v
60
+ agentslug
61
+ AHC
62
+ aihit
63
+ aiohttp\/
64
+ Airmail
65
+ akka-http\/
66
+ akula\/
67
+ alertra
68
+ alexa site audit
69
+ Alibaba\.Security\.Heimdall
70
+ Alligator
71
+ allloadin\.com
72
+ AllSubmitter
73
+ alyze\.info
74
+ amagit
75
+ Anarchie
76
+ AndroidDownloadManager
77
+ Anemone
78
+ AngleSharp\/
79
+ Ant\.com
80
+ Anturis Agent
81
+ AnyEvent-HTTP\/
82
+ Apache Droid
83
+ Apache-HttpAsyncClient\/
84
+ Apache-HttpClient\/
85
+ ApacheBench\/
86
+ Apexoo
87
+ APIs-Google
88
+ AportWorm\/[0-9]
89
+ AppBeat\/[0-9]
90
+ AppEngine-Google
91
+ AppStoreScraperZ
92
+ Arachmo
93
+ arachnode
94
+ Arachnophilia
95
+ aria2
96
+ Arukereso
97
+ asafaweb.com
98
+ AskQuickly
99
+ ASPSeek
100
+ Asterias
101
+ Astute
102
+ asynchttp
103
+ Attach
104
+ autocite
105
+ Autonomy
106
+ axios\/
107
+ B-l-i-t-z-B-O-T
108
+ Backlink-Ceck
109
+ backlink-check
110
+ BackStreet
111
+ BackWeb
112
+ Bad-Neighborhood
113
+ Badass
114
+ baidu\.com
115
+ Bandit
116
+ BatchFTP
117
+ Battleztar\ Bazinga
118
+ baypup\/[0-9]
119
+ baypup\/colbert
120
+ BazQux
121
+ BBBike
122
+ BCKLINKS
123
+ BDFetch
124
+ BegunAdvertising\/
125
+ BigBozz
126
+ Bigfoot
127
+ biglotron
128
+ BingLocalSearch
129
+ BingPreview
130
+ binlar
131
+ biNu image cacher
132
+ Bitacle
133
+ biz_Directory
134
+ Black\ Hole
135
+ Blackboard Safeassign
136
+ BlackWidow
137
+ Bloglovin
138
+ BlogPulseLive
139
+ BlogSearch
140
+ Blogtrottr
141
+ BlowFish
142
+ Boardreader
143
+ boitho\.com-dc
144
+ BPImageWalker
145
+ Braintree-Webhooks
146
+ Branch Metrics API
147
+ Branch-Passthrough
148
+ Brandprotect
149
+ Brandwatch
150
+ Brodie\/
151
+ Browsershots
152
+ BUbiNG
153
+ Buck\/
154
+ Buddy
155
+ BuiltWith
156
+ Bullseye
157
+ BunnySlippers
158
+ Burf Search
159
+ Butterfly\/
160
+ BuzzSumo
161
+ CAAM\/[0-9]
162
+ CakePHP
163
+ Calculon
164
+ CapsuleChecker
165
+ CaretNail
166
+ catexplorador
167
+ cb crawl
168
+ CC Metadata Scaper
169
+ Cegbfeieh
170
+ Cerberian Drtrs
171
+ CERT\.at-Statistics-Survey
172
+ cg-eye
173
+ changedetection
174
+ ChangesMeter\/
175
+ Charlotte
176
+ CheckHost
177
+ checkprivacy
178
+ CherryPicker
179
+ ChinaClaw
180
+ Chirp\/[0-9]
181
+ chkme\.com
182
+ Chlooe
183
+ CirrusExplorer\/
184
+ CISPA Vulnerability Notification
185
+ Citoid
186
+ CJNetworkQuality
187
+ Clarsentia
188
+ clips\.ua\.ac\.be
189
+ Cloud\ mapping
190
+ CloudEndure
191
+ CloudFlare-AlwaysOnline
192
+ Cloudinary\/[0-9]
193
+ cmcm\.com
194
+ coccoc
195
+ cognitiveseo
196
+ colly -
197
+ CommaFeed
198
+ Commons-HttpClient
199
+ Comodo SSL Checker
200
+ contactbigdatafr
201
+ contentkingapp
202
+ convera
203
+ CookieReports\.com
204
+ copyright sheriff
205
+ CopyRightCheck
206
+ Copyscape
207
+ Cosmos4j\.feedback
208
+ Covario-IDS
209
+ CrawlForMe\/[0-9]
210
+ Crescent
211
+ cron-job\.org
212
+ Crowsnest
213
+ CSHttp
214
+ curb
215
+ Curious George
216
+ curl
217
+ cuwhois\/[0-9]
218
+ cybo\.com
219
+ DareBoost
220
+ DatabaseDriverMysqli
221
+ DataCha0s
222
+ DataparkSearch
223
+ dataprovider
224
+ DataXu
225
+ Daum(oa)?[ \/][0-9]
226
+ Demon
227
+ DeuSu
228
+ developers\.google\.com\/\+\/web\/snippet\/
229
+ Devil
230
+ Digg
231
+ Digincore
232
+ DigitalPebble
233
+ Dirbuster
234
+ Dispatch\/
235
+ DittoSpyder
236
+ dlvr
237
+ DMBrowser
238
+ DNS-Tools Header-Analyzer
239
+ DNSPod-reporting
240
+ docoloc
241
+ Dolphin http client\/
242
+ DomainAppender
243
+ Donuts Content Explorer
244
+ dotMailer content retrieval
245
+ dotSemantic
246
+ downforeveryoneorjustme
247
+ Download\ Wonder
248
+ downnotifier\.com
249
+ DowntimeDetector
250
+ Dragonfly File Reader
251
+ Drip
252
+ drupact
253
+ Drupal \(\+http:\/\/drupal\.org\/\)
254
+ DTS\ Agent
255
+ dubaiindex
256
+ EARTHCOM
257
+ Easy-Thumb
258
+ EasyDL
259
+ Ebingbong
260
+ ec2linkfinder
261
+ eCairn-Grabber
262
+ eCatch
263
+ ECCP
264
+ echocrawl
265
+ eContext\/
266
+ Ecxi
267
+ EirGrabber
268
+ ElectricMonk
269
+ elefent
270
+ EMail Exractor
271
+ Email%20Extractor%20Lite
272
+ EMail\ Wolf
273
+ EmailWolf
274
+ Embed PHP Library
275
+ Embedly
276
+ europarchive\.org
277
+ evc-batch
278
+ EventMachine HttpClient
279
+ Everwall Link Expander
280
+ Evidon
281
+ Evrinid
282
+ ExactSearch
283
+ ExaleadCloudview
284
+ Excel\/
285
+ Exif Viewer
286
+ ExperianCrawlUK
287
+ Exploratodo
288
+ Express WebPictures
289
+ ExtractorPro
290
+ Extreme\ Picture\ Finder
291
+ EyeNetIE
292
+ ezooms
293
+ facebookexternalhit
294
+ facebookplatform
295
+ fairshare
296
+ Faraday v
297
+ fasthttp
298
+ Faveeo
299
+ Favicon downloader
300
+ FavOrg
301
+ Feed Wrangler
302
+ Feedbin
303
+ FeedBooster
304
+ FeedBucket
305
+ FeedBunch\/[0-9]
306
+ FeedBurner
307
+ FeedChecker
308
+ Feedly
309
+ Feedspot
310
+ Feedwind\/[0-9]
311
+ feeltiptop
312
+ Fetch API
313
+ Fetch\/[0-9]
314
+ Fever\/[0-9]
315
+ FHscan
316
+ Fimap
317
+ findlink
318
+ findthatfile
319
+ FlashGet
320
+ FlipboardBrowserProxy
321
+ FlipboardProxy
322
+ FlipboardRSS
323
+ Flock\/
324
+ fluffy
325
+ Flunky
326
+ flynxapp
327
+ forensiq
328
+ FoundSeoTool\/[0-9]
329
+ free thumbnails
330
+ Freeuploader
331
+ FreeWebMonitoring SiteChecker
332
+ Funnelback
333
+ G-i-g-a-b-o-t
334
+ g00g1e\.net
335
+ GAChecker
336
+ ganarvisitas\/[0-9]
337
+ geek-tools
338
+ Genderanalyzer
339
+ Genieo
340
+ GentleSource
341
+ Getintent
342
+ GetLinkInfo
343
+ getprismatic\.com
344
+ GetRight
345
+ GetURLInfo\/[0-9]
346
+ GetWeb
347
+ Ghost Inspector
348
+ GigablastOpenSource
349
+ GIS-LABS
350
+ github-camo
351
+ github\.com\/
352
+ Go [\d\.]* package http
353
+ Go http package
354
+ Go!Zilla
355
+ Go-Ahead-Got-It
356
+ Go-http-client
357
+ gobyus
358
+ gofetch
359
+ GomezAgent
360
+ gooblog
361
+ Goodzer\/[0-9]
362
+ GoogleCloudMonitoring
363
+ Google favicon
364
+ Google Keyword Suggestion
365
+ Google Keyword Tool
366
+ Google Page Speed Insights
367
+ Google PP Default
368
+ Google Search Console
369
+ Google Web Preview
370
+ Google-Adwords
371
+ Google-Apps-Script
372
+ Google-Calendar-Importer
373
+ Google-HotelAdsVerifier
374
+ Google-HTTP-Java-Client
375
+ Google-Publisher-Plugin
376
+ Google-SearchByImage
377
+ Google-Site-Verification
378
+ Google-Structured-Data-Testing-Tool
379
+ Google-Youtube-Links
380
+ google_partner_monitoring
381
+ GoogleDocs
382
+ GoogleHC\/
383
+ GoogleProducer
384
+ Gookey
385
+ GoScraper
386
+ GoSpotCheck
387
+ GoSquared-Status-Checker
388
+ gosquared-thumbnailer
389
+ Gotit
390
+ GoZilla
391
+ grabify
392
+ GrabNet
393
+ Grafula
394
+ Grammarly
395
+ GrapeFX
396
+ grokkit
397
+ grouphigh
398
+ grub-client
399
+ gSOAP\/
400
+ GT::WWW
401
+ GTmetrix
402
+ GuzzleHttp
403
+ gvfs\/
404
+ HAA(A)?RTLAND http client
405
+ Haansoft
406
+ hackney\/
407
+ Hatena
408
+ Havij
409
+ hawkReader
410
+ HEADMasterSEO
411
+ HeartRails_Capture
412
+ help@dataminr\.com
413
+ heritrix
414
+ historious\/
415
+ hledejLevne\.cz\/[0-9]
416
+ Hloader
417
+ HMView
418
+ Holmes
419
+ HonesoSearchEngine\/
420
+ HootSuite Image proxy
421
+ Hootsuite-WebFeed\/[0-9]
422
+ hosterstats
423
+ HostTracker
424
+ ht:\/\/check
425
+ htdig
426
+ HTMLparser
427
+ http-get
428
+ HTTP-Header-Abfrage
429
+ http-kit
430
+ http-request\/
431
+ HTTP-Tiny
432
+ HTTP::Lite
433
+ http\.rb\/
434
+ HTTP_Compression_Test
435
+ http_request2
436
+ http_requester
437
+ HttpComponents
438
+ httphr
439
+ HTTPMon
440
+ httpscheck
441
+ httpssites_power
442
+ httpunit
443
+ HttpUrlConnection
444
+ httrack
445
+ huaweisymantec
446
+ HubPages.*crawlingpolicy
447
+ HubSpot
448
+ Humanlinks
449
+ HyperZbozi.cz Feeder
450
+ i2kconnect\/
451
+ Iblog
452
+ ichiro
453
+ Id-search
454
+ IdeelaborPlagiaat
455
+ IDG Twitter Links Resolver
456
+ IDwhois\/[0-9]
457
+ Iframely
458
+ igdeSpyder
459
+ IlTrovatore
460
+ Image\ Fetch
461
+ Image\ Sucker
462
+ ImageEngine\/
463
+ Imagga
464
+ imgsizer
465
+ InAGist
466
+ inbound\.li parser
467
+ InDesign%20CC
468
+ Indy\ Library
469
+ infegy
470
+ infohelfer
471
+ InfoTekies
472
+ InfoWizards Reciprocal Link System PRO
473
+ inpwrd\.com
474
+ instabid
475
+ Instapaper
476
+ Integrity
477
+ integromedb
478
+ Intelliseek
479
+ InterGET
480
+ Internet\ Ninja
481
+ internet_archive
482
+ InternetSeer
483
+ internetVista monitor
484
+ intraVnews
485
+ IODC
486
+ IOI
487
+ iplabel
488
+ ips-agent
489
+ IPS\/[0-9]
490
+ IPWorks HTTP\/S Component
491
+ iqdb\/
492
+ Iria
493
+ Irokez
494
+ isitup\.org
495
+ iskanie
496
+ iZSearch
497
+ janforman
498
+ Jaunt\/
499
+ Jbrofuzz
500
+ Jersey\/
501
+ JetCar
502
+ Jigsaw
503
+ Jobboerse
504
+ JobFeed discovery
505
+ Jobg8 URL Monitor
506
+ jobo
507
+ Jobrapido
508
+ Jobsearch1\.5
509
+ JoinVision Generic
510
+ JolokiaPwn
511
+ Joomla
512
+ Jorgee
513
+ JS-Kit
514
+ JustView
515
+ Kaspersky Lab CFR link resolver
516
+ KeepRight OpenStreetMap Checker
517
+ Kelny\/
518
+ Kerrigan\/
519
+ KeyCDN
520
+ Keyword Extractor
521
+ Keyword\ Density
522
+ Keywords Research
523
+ KickFire
524
+ KimonoLabs\/
525
+ Kml-Google
526
+ knows\.is
527
+ KOCMOHABT
528
+ kouio
529
+ kube-probe
530
+ kulturarw3
531
+ KumKie
532
+ L\.webis
533
+ Larbin
534
+ Lavf\/
535
+ LayeredExtractor
536
+ LeechFTP
537
+ LeechGet
538
+ letsencrypt
539
+ Lftp
540
+ LibVLC
541
+ LibWeb
542
+ Libwhisker
543
+ libwww
544
+ Licorne Image Snapshot
545
+ Liferea\/
546
+ Lightspeedsystems
547
+ Likse
548
+ link checker
549
+ Link Valet
550
+ link_thumbnailer
551
+ LinkAlarm\/
552
+ linkCheck
553
+ linkdex
554
+ LinkExaminer
555
+ linkfluence
556
+ linkpeek
557
+ LinkPreviewGenerator
558
+ LinkScan
559
+ LinksManager
560
+ LinkTiger
561
+ LinkWalker
562
+ Lipperhey
563
+ Litemage_walker
564
+ livedoor ScreenShot
565
+ LoadImpactRload
566
+ LongURL API
567
+ looksystems\.net
568
+ ltx71
569
+ lua-resty-http
570
+ lwp-request
571
+ lwp-trivial
572
+ LWP::Simple
573
+ lycos
574
+ LYT\.SR
575
+ mabontland
576
+ Mag-Net
577
+ MagpieRSS
578
+ Mail.Ru
579
+ MailChimp
580
+ Majestic12
581
+ makecontact\/
582
+ Mandrill
583
+ MapperCmd
584
+ marketinggrader
585
+ MarkMonitor
586
+ MarkWatch
587
+ Mass\ Downloader
588
+ masscan\/[0-9]
589
+ Mata\ Hari
590
+ Mediapartners-Google
591
+ mediawords
592
+ MegaIndex\.ru
593
+ Melvil Rawi\/
594
+ MergeFlow-PageReader
595
+ Metaspinner
596
+ MetaURI
597
+ MFC_Tear_Sample
598
+ Microsearch
599
+ Microsoft Office
600
+ Microsoft Outlook
601
+ Microsoft Windows Network Diagnostics
602
+ Microsoft-WebDAV-MiniRedir
603
+ Microsoft\ Data\ Access
604
+ MIDown\ tool
605
+ MIIxpc
606
+ Mindjet
607
+ Miniature.io\/
608
+ Miniflux
609
+ Mister\ PiX
610
+ mixdata dot com
611
+ mixed-content-scan
612
+ mixnode
613
+ Mnogosearch
614
+ mogimogi
615
+ Mojeek
616
+ Mojolicious \(Perl\)
617
+ monitis
618
+ Monitority\/[0-9]
619
+ montastic
620
+ MonTools
621
+ Moreover
622
+ Morfeus\ Fucking\ Scanner
623
+ Morning Paper
624
+ MovableType
625
+ mowser
626
+ Mrcgiguy
627
+ MS\ Web\ Services\ Client\ Protocol
628
+ MSFrontPage
629
+ mShots
630
+ MuckRack\/
631
+ muhstik-scan
632
+ MVAClient
633
+ MxToolbox\/
634
+ nagios
635
+ Najdi\.si\/
636
+ Name\ Intelligence
637
+ Nameprotect
638
+ Navroad
639
+ NearSite
640
+ Needle
641
+ Nessus
642
+ Net\ Vampire
643
+ NetAnts
644
+ NETCRAFT
645
+ NetLyzer
646
+ NetMechanic
647
+ Netpursual
648
+ netresearch
649
+ NetShelter ContentScan
650
+ Netsparker
651
+ NetTrack
652
+ Netvibes
653
+ NetZIP
654
+ Neustar WPM
655
+ NeutrinoAPI
656
+ NewRelicPinger\/1.0 \(\d+\)
657
+ NewsBlur .*Finder
658
+ NewsGator
659
+ newsme
660
+ newspaper\/
661
+ Nexgate Ruby Client
662
+ NG-Search
663
+ Nibbler
664
+ NICErsPRO
665
+ Nikto
666
+ nineconnections\.com
667
+ NLNZ_IAHarvester
668
+ Nmap Scripting Engine
669
+ node-superagent
670
+ node-urllib\/
671
+ node\.io
672
+ nominet\.org\.uk
673
+ Norton-Safeweb
674
+ Notifixious
675
+ notifyninja
676
+ nuhk
677
+ nutch
678
+ Nuzzel
679
+ nWormFeedFinder
680
+ Nymesis
681
+ NYU
682
+ Ocelli\/[0-9]
683
+ Octopus
684
+ oegp
685
+ Offline Explorer
686
+ Offline\ Navigator
687
+ okhttp
688
+ Omea Reader
689
+ omgili
690
+ OMSC
691
+ Online Domain Tools
692
+ OpenCalaisSemanticProxy
693
+ Openfind
694
+ OpenLinkProfiler
695
+ Openstat\/
696
+ OpenVAS
697
+ Optimizer
698
+ Orbiter
699
+ OrgProbe\/[0-9]
700
+ orion-semantics
701
+ Outlook-Express
702
+ ow\.ly
703
+ Owler
704
+ ownCloud News
705
+ OxfordCloudService\/[0-9]
706
+ Page Analyzer
707
+ Page Valet
708
+ page2rss
709
+ page\ scorer
710
+ page_verifier
711
+ PageAnalyzer
712
+ PageGrabber
713
+ PagePeeker
714
+ PageScorer
715
+ Pagespeed\/[0-9]
716
+ Panopta
717
+ panscient
718
+ Papa\ Foto
719
+ parsijoo
720
+ Pavuk
721
+ PayPal IPN
722
+ pcBrowser
723
+ Pcore-HTTP
724
+ PEAR HTTPRequest
725
+ Pearltrees
726
+ PECL::HTTP
727
+ peerindex
728
+ Peew
729
+ PeoplePal
730
+ Perlu -
731
+ PhantomJS Screenshoter
732
+ PhantomJS\/
733
+ Photon\/
734
+ phpcrawl
735
+ phpservermon
736
+ Pi-Monster
737
+ Picscout
738
+ Picsearch
739
+ PictureFinder
740
+ Pimonster
741
+ ping\.blo\.gs\/
742
+ Pingability
743
+ Pingdom
744
+ Pingoscope
745
+ PingSpot
746
+ pinterest\.com
747
+ Pixray
748
+ Pizilla
749
+ PleaseCrawl
750
+ Ploetz \+ Zeller
751
+ Plukkie
752
+ plumanalytics
753
+ PocketParser
754
+ Pockey
755
+ POE-Component-Client-HTTP
756
+ Pompos
757
+ Porkbun
758
+ Port Monitor
759
+ postano
760
+ PostmanRuntime\/
761
+ PostPost
762
+ postrank
763
+ PowerPoint\/
764
+ Priceonomics Analysis Engine
765
+ PrintFriendly\.com
766
+ PritTorrent\/[0-9]
767
+ Prlog
768
+ probethenet
769
+ Project 25499
770
+ Promotion_Tools_www.searchenginepromotionhelp.com
771
+ prospectb2b
772
+ Protopage
773
+ ProWebWalker
774
+ proximic
775
+ PRTG Network Monitor
776
+ pshtt, https scanning
777
+ PTST
778
+ PTST\/[0-9]+
779
+ Pulsepoint XT3 web scraper
780
+ Pump
781
+ Python-httplib2
782
+ python-requests
783
+ Python-urllib
784
+ Qirina Hurdler
785
+ QQDownload
786
+ QrafterPro
787
+ Qseero
788
+ Qualidator.com SiteAnalyzer
789
+ QueryN\ Metasearch
790
+ Quora Link Preview
791
+ Qwantify
792
+ Radian6
793
+ RankActive
794
+ RankFlex
795
+ RankSonicSiteAuditor
796
+ Readability
797
+ RealDownload
798
+ RealPlayer%20Downloader
799
+ RebelMouse
800
+ Recorder
801
+ RecurPost\/
802
+ redback\/
803
+ Redirect Checker Tool
804
+ ReederForMac
805
+ ReGet
806
+ RepoMonkey
807
+ request\.js
808
+ ResponseCodeTest\/[0-9]
809
+ RestSharp
810
+ Riddler
811
+ Rival IQ
812
+ Robosourcer
813
+ Robozilla\/[0-9]
814
+ ROI Hunter
815
+ RPT-HTTPClient
816
+ RSSOwl
817
+ safe-agent-scanner
818
+ SalesIntelligent
819
+ Saleslift
820
+ SauceNAO
821
+ SBIder
822
+ scalaj-http
823
+ scan\.lol
824
+ ScanAlert
825
+ Scoop
826
+ scooter
827
+ ScoutJet
828
+ ScoutURLMonitor
829
+ Scrapy
830
+ Screaming
831
+ ScreenShotService\/[0-9]
832
+ Scrubby
833
+ Search37\/
834
+ search\.thunderstone
835
+ Searchestate
836
+ SearchSight
837
+ Seeker
838
+ semanticdiscovery
839
+ semanticjuice
840
+ Semiocast HTTP client
841
+ Semrush
842
+ sentry\/
843
+ SEO Browser
844
+ Seo Servis
845
+ seo-nastroj.cz
846
+ Seobility
847
+ SEOCentro
848
+ SeoCheck
849
+ SEOkicks
850
+ Seomoz
851
+ SEOprofiler
852
+ SeopultContentAnalyzer
853
+ seoscanners
854
+ SEOstats
855
+ Server Density Service Monitoring
856
+ servernfo\.com
857
+ SetCronJob\/
858
+ sexsearcher
859
+ Seznam
860
+ Shelob
861
+ Shodan
862
+ Shoppimon Analyzer
863
+ ShoppimonAgent\/[0-9]
864
+ ShopWiki
865
+ ShortLinkTranslate
866
+ shrinktheweb
867
+ Sideqik
868
+ SilverReader
869
+ SimplePie
870
+ SimplyFast
871
+ Siphon
872
+ SISTRIX
873
+ Site-Shot\/
874
+ Site24x7
875
+ Site\ Sucker
876
+ SiteBar
877
+ Sitebeam
878
+ Sitebulb\/
879
+ SiteCondor
880
+ SiteExplorer
881
+ SiteGuardian
882
+ Siteimprove
883
+ SiteIndexed
884
+ Sitemap(s)? Generator
885
+ SiteMonitor
886
+ Siteshooter B0t
887
+ SiteSnagger
888
+ SiteSucker
889
+ SiteTruth
890
+ Sitevigil
891
+ sitexy\.com
892
+ SkypeUriPreview
893
+ Slack\/
894
+ slider\.com
895
+ slurp
896
+ SlySearch
897
+ SmartDownload
898
+ SMRF URL Expander
899
+ SMUrlExpander
900
+ Snake
901
+ Snappy
902
+ SniffRSS
903
+ sniptracker
904
+ Snoopy
905
+ SnowHaze Search
906
+ sogou web
907
+ SortSite
908
+ Sottopop
909
+ sovereign\.ai
910
+ SpaceBison
911
+ Spammen
912
+ Spanner
913
+ spaziodati
914
+ SPDYCheck
915
+ Specificfeeds
916
+ speedy
917
+ SPEng
918
+ Spinn3r
919
+ spray-can
920
+ Sprinklr
921
+ spyonweb
922
+ sqlmap
923
+ Sqlworm
924
+ Sqworm
925
+ SSL Labs
926
+ ssl-tools
927
+ StackRambler
928
+ Statastico\/
929
+ StatusCake
930
+ Steeler
931
+ Stratagems Kumo
932
+ Stroke.cz
933
+ StudioFACA
934
+ suchen
935
+ Sucuri
936
+ summify
937
+ Super Monitoring
938
+ SuperHTTP
939
+ Surphace Scout
940
+ Suzuran
941
+ SwiteScraper
942
+ Symfony BrowserKit
943
+ Symfony2 BrowserKit
944
+ SynHttpClient-Built
945
+ Sysomos
946
+ sysscan
947
+ Szukacz
948
+ T0PHackTeam
949
+ tAkeOut
950
+ Tarantula\/
951
+ Taringa UGC
952
+ Teleport
953
+ Telesoft
954
+ Telesphoreo
955
+ Telesphorep
956
+ Tenon\.io
957
+ teoma
958
+ terrainformatica\.com
959
+ Test Certificate Info
960
+ Tetrahedron\/[0-9]
961
+ The Drop Reaper
962
+ The Expert HTML Source Viewer
963
+ The Knowledge AI
964
+ The\ Intraformant
965
+ theinternetrules
966
+ TheNomad
967
+ theoldreader\.com
968
+ Thinklab
969
+ Thumbshots
970
+ ThumbSniper
971
+ TinEye
972
+ Tiny Tiny RSS
973
+ TLSProbe\/
974
+ Toata
975
+ topster
976
+ touche.com
977
+ Traackr.com
978
+ TrapitAgent
979
+ Trendiction
980
+ Trendsmap Resolver
981
+ trendspottr\.com
982
+ truwoGPS
983
+ TulipChain
984
+ Turingos
985
+ Turnitin
986
+ tweetedtimes\.com
987
+ Tweetminster
988
+ Tweezler\/
989
+ twibble
990
+ Twice
991
+ Twikle
992
+ Twingly
993
+ Twisted PageGetter
994
+ Typhoeus
995
+ ubermetrics-technologies
996
+ uclassify
997
+ uCrawlr\/
998
+ UdmSearch
999
+ UniversalFeedParser
1000
+ Unshorten\.It\!\/[0-9]
1001
+ Untiny
1002
+ UnwindFetchor
1003
+ updated
1004
+ updown\.io daemon
1005
+ Upflow
1006
+ Uptimia
1007
+ URL Verifier
1008
+ URLChecker
1009
+ URLitor.com
1010
+ urlresolver
1011
+ Urlstat
1012
+ UrlTrends Ranking Updater
1013
+ URLy\ Warning
1014
+ URLy\.Warning
1015
+ Vacuum
1016
+ Vagabondo
1017
+ VB\ Project
1018
+ vBSEO
1019
+ VCI
1020
+ via ggpht\.com GoogleImageProxy
1021
+ VidibleScraper
1022
+ Virusdie
1023
+ visionutils
1024
+ vkShare
1025
+ VoidEYE
1026
+ Voil
1027
+ voltron
1028
+ voyager\/
1029
+ VSAgent\/[0-9]
1030
+ VSB-TUO\/[0-9]
1031
+ Vulnbusters Meter
1032
+ VYU2
1033
+ w3af\.org
1034
+ W3C-checklink
1035
+ W3C-mobileOK
1036
+ W3C_I18n-Checker
1037
+ W3C_Unicorn
1038
+ Wallpapers\/[0-9]+
1039
+ WallpapersHD
1040
+ wangling
1041
+ Wappalyzer
1042
+ WatchMouse
1043
+ WbSrch\/
1044
+ web-capture\.net
1045
+ Web-Monitoring
1046
+ Web-sniffer
1047
+ Web\ Auto
1048
+ Web\ Collage
1049
+ Web\ Enhancer
1050
+ Web\ Fetch
1051
+ Web\ Fuck
1052
+ Web\ Pix
1053
+ Web\ Sauger
1054
+ Web\ Sucker
1055
+ Webalta
1056
+ Webauskunft
1057
+ WebAuto
1058
+ WebCapture
1059
+ WebClient\/
1060
+ webcollage
1061
+ WebCookies
1062
+ WebCopier
1063
+ WebCorp
1064
+ WebDoc
1065
+ WebEnhancer
1066
+ WebFetch
1067
+ WebFuck
1068
+ WebGo\ IS
1069
+ WebImageCollector
1070
+ WebImages
1071
+ WebIndex
1072
+ webkit2png
1073
+ WebLeacher
1074
+ webmastercoffee
1075
+ webmon
1076
+ WebPix
1077
+ WebReaper
1078
+ WebSauger
1079
+ webscreenie
1080
+ Webshag
1081
+ Webshot
1082
+ Website Analyzer\/
1083
+ Website\ Quester
1084
+ WebsiteExtractor
1085
+ websitepulse agent
1086
+ websitepulse[+ ]checker
1087
+ WebsiteQuester
1088
+ Websnapr\/
1089
+ Webster
1090
+ WebStripper
1091
+ WebSucker
1092
+ Webthumb\/[0-9]
1093
+ WebThumbnail
1094
+ WebWhacker
1095
+ WebZIP
1096
+ WeCrawlForThePeace
1097
+ WeLikeLinks
1098
+ WEPA
1099
+ WeSEE
1100
+ wf84
1101
+ Wfuzz\/
1102
+ wget
1103
+ WhatsApp
1104
+ WhatsMyIP
1105
+ WhatWeb
1106
+ WhereGoes\?
1107
+ Whibse
1108
+ WhoRunsCoinHive
1109
+ Whynder Magnet
1110
+ Windows-RSS-Platform
1111
+ WinHttpRequest
1112
+ wkhtmlto
1113
+ wmtips
1114
+ Woko
1115
+ Word\/
1116
+ WordPress\/
1117
+ wotbox
1118
+ WP Engine Install Performance API
1119
+ wpif
1120
+ wprecon\.com survey
1121
+ WPScan
1122
+ wscheck
1123
+ Wtrace
1124
+ WWW-Collector-E
1125
+ WWW-Mechanize
1126
+ WWW::Mechanize
1127
+ www\.monitor\.us
1128
+ WWWOFFLE
1129
+ x09Mozilla
1130
+ x22Mozilla
1131
+ XaxisSemanticsClassifier
1132
+ Xenu Link Sleuth
1133
+ XING-contenttabreceiver\/[0-9]
1134
+ XmlSitemapGenerator
1135
+ xpymep([0-9]?)\.exe
1136
+ Y!J-(ASR|BSC)
1137
+ Yaanb
1138
+ yacy
1139
+ Yahoo Ad monitoring
1140
+ Yahoo Link Preview
1141
+ YahooCacheSystem
1142
+ YahooYSMcm
1143
+ YandeG
1144
+ Yandex(?!Search)
1145
+ yanga
1146
+ yeti
1147
+ Yo-yo
1148
+ Yoleo Consumer
1149
+ yoogliFetchAgent
1150
+ YottaaMonitor
1151
+ Your-Website-Sucks\/[0-9]
1152
+ yourls\.org
1153
+ Zade
1154
+ Zao
1155
+ Zauba
1156
+ Zemanta Aggregator
1157
+ Zend\\\\Http\\\\Client
1158
+ Zend_Http_Client
1159
+ Zermelo
1160
+ Zeus
1161
+ zgrab
1162
+ ZnajdzFoto
1163
+ Zombie\.js
1164
+ ZyBorg
1165
+ SpamExperts
1166
+ [a-z0-9\-_]*(bot|crawler|archiver|transcoder|spider|uptime|validator|fetcher)
1167
+ ].strip.split(/\n+/).freeze
1168
+ end
1169
+ end
1170
+ end