crawler_detect 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.rubocop.yml +174 -0
- data/.travis.yml +12 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +56 -0
- data/Rakefile +8 -0
- data/crawler_detect.gemspec +35 -0
- data/lib/crawler_detect.rb +28 -0
- data/lib/crawler_detect/detector.rb +40 -0
- data/lib/crawler_detect/library.rb +17 -0
- data/lib/crawler_detect/library/crawlers.rb +1170 -0
- data/lib/crawler_detect/library/exclusions.rb +57 -0
- data/lib/crawler_detect/library/headers.rb +25 -0
- data/lib/crawler_detect/version.rb +5 -0
- data/lib/rack/crawler_detect.rb +47 -0
- metadata +159 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4d6a083297a36f03c1ac9af1e0d7bf644faa228a04d1e0c310be521ca396e430
|
4
|
+
data.tar.gz: e089000e6f93f8ac54646855a719a63279cbed7f24dc5fae94d6c9f388cf823d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c8008174d874b18ca0ae75224ede7287e92c5e5e82d81778f131f7eafb030935bb79e47814d12e90d3e72a8e17e6b1b2584621bbf81579e92654ad3558ca8325
|
7
|
+
data.tar.gz: 6c15fc963c79b145a32c174275ba12b39ee3ac3f206eb6643cae651c3ea9171ab3fa35f98f77f63d3d1985f2d202c1d8b04ba588fde543f84e68fca94ff8a40a
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,174 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 2.2
|
3
|
+
# RuboCop has a bunch of cops enabled by default. This setting tells RuboCop
|
4
|
+
# to ignore them, so only the ones explicitly set in this file are enabled.
|
5
|
+
DisabledByDefault: true
|
6
|
+
Exclude:
|
7
|
+
- '**/templates/**/*'
|
8
|
+
- '**/vendor/**/*'
|
9
|
+
- '**/vendor/**/.*'
|
10
|
+
- '**/node_modules/**/*'
|
11
|
+
- 'actionpack/lib/action_dispatch/journey/parser.rb'
|
12
|
+
|
13
|
+
# Prefer assert_not_x over refute_x
|
14
|
+
CustomCops/RefuteNot:
|
15
|
+
Include:
|
16
|
+
- '**/test/**/*'
|
17
|
+
|
18
|
+
# Prefer &&/|| over and/or.
|
19
|
+
Style/AndOr:
|
20
|
+
Enabled: true
|
21
|
+
|
22
|
+
# Do not use braces for hash literals when they are the last argument of a
|
23
|
+
# method call.
|
24
|
+
Style/BracesAroundHashParameters:
|
25
|
+
Enabled: true
|
26
|
+
EnforcedStyle: context_dependent
|
27
|
+
|
28
|
+
# Align `when` with `case`.
|
29
|
+
Layout/CaseIndentation:
|
30
|
+
Enabled: true
|
31
|
+
|
32
|
+
# Align comments with method definitions.
|
33
|
+
Layout/CommentIndentation:
|
34
|
+
Enabled: true
|
35
|
+
|
36
|
+
Layout/ElseAlignment:
|
37
|
+
Enabled: true
|
38
|
+
|
39
|
+
# Align `end` with the matching keyword or starting expression except for
|
40
|
+
# assignments, where it should be aligned with the LHS.
|
41
|
+
Layout/EndAlignment:
|
42
|
+
Enabled: true
|
43
|
+
EnforcedStyleAlignWith: variable
|
44
|
+
AutoCorrect: true
|
45
|
+
|
46
|
+
Layout/EmptyLineAfterMagicComment:
|
47
|
+
Enabled: true
|
48
|
+
|
49
|
+
# In a regular class definition, no empty lines around the body.
|
50
|
+
Layout/EmptyLinesAroundClassBody:
|
51
|
+
Enabled: true
|
52
|
+
|
53
|
+
# In a regular method definition, no empty lines around the body.
|
54
|
+
Layout/EmptyLinesAroundMethodBody:
|
55
|
+
Enabled: true
|
56
|
+
|
57
|
+
# In a regular module definition, no empty lines around the body.
|
58
|
+
Layout/EmptyLinesAroundModuleBody:
|
59
|
+
Enabled: true
|
60
|
+
|
61
|
+
Layout/FirstParameterIndentation:
|
62
|
+
Enabled: true
|
63
|
+
|
64
|
+
# Use Ruby >= 1.9 syntax for hashes. Prefer { a: :b } over { :a => :b }.
|
65
|
+
Style/HashSyntax:
|
66
|
+
Enabled: true
|
67
|
+
|
68
|
+
# Method definitions after `private` or `protected` isolated calls need one
|
69
|
+
# extra level of indentation.
|
70
|
+
Layout/IndentationConsistency:
|
71
|
+
Enabled: true
|
72
|
+
EnforcedStyle: rails
|
73
|
+
|
74
|
+
# Two spaces, no tabs (for indentation).
|
75
|
+
Layout/IndentationWidth:
|
76
|
+
Enabled: true
|
77
|
+
|
78
|
+
Layout/LeadingCommentSpace:
|
79
|
+
Enabled: true
|
80
|
+
|
81
|
+
Layout/SpaceAfterColon:
|
82
|
+
Enabled: true
|
83
|
+
|
84
|
+
Layout/SpaceAfterComma:
|
85
|
+
Enabled: true
|
86
|
+
|
87
|
+
Layout/SpaceAroundEqualsInParameterDefault:
|
88
|
+
Enabled: true
|
89
|
+
|
90
|
+
Layout/SpaceAroundKeyword:
|
91
|
+
Enabled: true
|
92
|
+
|
93
|
+
Layout/SpaceAroundOperators:
|
94
|
+
Enabled: true
|
95
|
+
|
96
|
+
Layout/SpaceBeforeComma:
|
97
|
+
Enabled: true
|
98
|
+
|
99
|
+
Layout/SpaceBeforeFirstArg:
|
100
|
+
Enabled: true
|
101
|
+
|
102
|
+
Style/DefWithParentheses:
|
103
|
+
Enabled: true
|
104
|
+
|
105
|
+
# Defining a method with parameters needs parentheses.
|
106
|
+
Style/MethodDefParentheses:
|
107
|
+
Enabled: true
|
108
|
+
|
109
|
+
Style/FrozenStringLiteralComment:
|
110
|
+
Enabled: true
|
111
|
+
EnforcedStyle: always
|
112
|
+
Exclude:
|
113
|
+
- 'actionview/test/**/*.builder'
|
114
|
+
- 'actionview/test/**/*.ruby'
|
115
|
+
- 'actionpack/test/**/*.builder'
|
116
|
+
- 'actionpack/test/**/*.ruby'
|
117
|
+
- 'activestorage/db/migrate/**/*.rb'
|
118
|
+
- 'db/migrate/**/*.rb'
|
119
|
+
- 'db/*.rb'
|
120
|
+
|
121
|
+
# Use `foo {}` not `foo{}`.
|
122
|
+
Layout/SpaceBeforeBlockBraces:
|
123
|
+
Enabled: true
|
124
|
+
|
125
|
+
# Use `foo { bar }` not `foo {bar}`.
|
126
|
+
Layout/SpaceInsideBlockBraces:
|
127
|
+
Enabled: true
|
128
|
+
|
129
|
+
# Use `{ a: 1 }` not `{a:1}`.
|
130
|
+
Layout/SpaceInsideHashLiteralBraces:
|
131
|
+
Enabled: true
|
132
|
+
|
133
|
+
Layout/SpaceInsideParens:
|
134
|
+
Enabled: true
|
135
|
+
|
136
|
+
# Check quotes usage according to lint rule below.
|
137
|
+
Style/StringLiterals:
|
138
|
+
Enabled: true
|
139
|
+
EnforcedStyle: double_quotes
|
140
|
+
|
141
|
+
# Detect hard tabs, no hard tabs.
|
142
|
+
Layout/Tab:
|
143
|
+
Enabled: true
|
144
|
+
|
145
|
+
# Blank lines should not have any spaces.
|
146
|
+
Layout/TrailingBlankLines:
|
147
|
+
Enabled: true
|
148
|
+
|
149
|
+
# No trailing whitespace.
|
150
|
+
Layout/TrailingWhitespace:
|
151
|
+
Enabled: true
|
152
|
+
|
153
|
+
# Use quotes for string literals when they are enough.
|
154
|
+
Style/UnneededPercentQ:
|
155
|
+
Enabled: true
|
156
|
+
|
157
|
+
# Use my_method(my_arg) not my_method( my_arg ) or my_method my_arg.
|
158
|
+
Lint/RequireParentheses:
|
159
|
+
Enabled: true
|
160
|
+
|
161
|
+
Lint/StringConversionInInterpolation:
|
162
|
+
Enabled: true
|
163
|
+
|
164
|
+
Style/RedundantReturn:
|
165
|
+
Enabled: true
|
166
|
+
AllowMultipleReturnValues: true
|
167
|
+
|
168
|
+
Style/Semicolon:
|
169
|
+
Enabled: true
|
170
|
+
AllowAsExpressionSeparator: true
|
171
|
+
|
172
|
+
# Prefer Foo.method over Foo::method
|
173
|
+
Style/ColonMethodCall:
|
174
|
+
Enabled: true
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 Pavel Kozlov
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# CrawlerDetect
|
2
|
+
|
3
|
+
[](https://travis-ci.org/loadkpi/crawler_detect)
|
4
|
+
|
5
|
+
## About
|
6
|
+
**CrawlerDetect** is a Ruby version of PHP class @[CrawlerDetect](https://github.com/JayBizzle/Crawler-Detect).
|
7
|
+
|
8
|
+
It helps to detect bots/crawlers/spiders via the user agent and other HTTP-headers. Currently able to detect 1,000's of bots/spiders/crawlers.
|
9
|
+
### Why CrawlerDetect?
|
10
|
+
Comparing with other popular bot-detection gems:
|
11
|
+
|
12
|
+
| | CrawlerDetect | Voight-Kampff | Browser |
|
13
|
+
|--|--|--|--|
|
14
|
+
| Number of bot-patterns | >1000 | ~280 | ~280 |
|
15
|
+
| Number of checked HTTP-headers | 10 | 1 | 1 |
|
16
|
+
| Number of updates of bot-list *(1st half of 2018)* | 14 | 1 | 7 |
|
17
|
+
|
18
|
+
## Installation
|
19
|
+
Add this line to your application's Gemfile:
|
20
|
+
|
21
|
+
`gem 'crawler_detect'`
|
22
|
+
## Basic Usage
|
23
|
+
```
|
24
|
+
CrawlerDetect.is_crawler?("Bot user agent")
|
25
|
+
=> true
|
26
|
+
```
|
27
|
+
Or if you need crawler name:
|
28
|
+
```
|
29
|
+
detector = CrawlerDetect.new("Googlebot/2.1 (http://www.google.com/bot.html)")
|
30
|
+
detector.is_crawler?
|
31
|
+
=> true
|
32
|
+
detector.crawler_name
|
33
|
+
=> "Googlebot"
|
34
|
+
```
|
35
|
+
## Rack::Request extension
|
36
|
+
**Optionally** you can add additional methods for `request`:
|
37
|
+
```
|
38
|
+
request.is_crawler?
|
39
|
+
=> false
|
40
|
+
request.crawler_name
|
41
|
+
=> nil
|
42
|
+
```
|
43
|
+
Only one thing you have to do is to configure `Rack::CrawlerDetect` midleware:
|
44
|
+
### Rails
|
45
|
+
```
|
46
|
+
class Application < Rails::Application
|
47
|
+
...
|
48
|
+
config.middleware.use Rack::CrawlerDetect
|
49
|
+
end
|
50
|
+
```
|
51
|
+
### Rake
|
52
|
+
```
|
53
|
+
use Rack::CrawlerDetect
|
54
|
+
```
|
55
|
+
## License
|
56
|
+
MIT License
|
data/Rakefile
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path("../lib", __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require "crawler_detect/version"
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = "crawler_detect"
|
9
|
+
spec.version = CrawlerDetect::VERSION
|
10
|
+
spec.authors = ["Pavel Kozlov"]
|
11
|
+
spec.email = ["loadkpi@gmail.com"]
|
12
|
+
|
13
|
+
spec.summary = "CrawlerDetect: detect bots/crawlers"
|
14
|
+
spec.description = "CrawlerDetect is a library to detect bots/crawlers via the user agent"
|
15
|
+
spec.homepage = "https://github.com/loadkpi/crawler_detect"
|
16
|
+
spec.license = "MIT"
|
17
|
+
|
18
|
+
# Specify which files should be added to the gem when it is released.
|
19
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
20
|
+
spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
|
21
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
|
+
end
|
23
|
+
spec.bindir = "exe"
|
24
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
25
|
+
spec.require_paths = ["lib"]
|
26
|
+
|
27
|
+
spec.add_development_dependency "bundler", "~> 1.15"
|
28
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
29
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
30
|
+
|
31
|
+
spec.add_development_dependency "fuubar", "~> 2.0"
|
32
|
+
spec.add_development_dependency "parallel_tests", "~> 2.0"
|
33
|
+
spec.add_development_dependency "pry-meta", "~> 0.0.10"
|
34
|
+
spec.add_development_dependency "rack-test", "~> 1.1"
|
35
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "crawler_detect/detector"
|
4
|
+
require "crawler_detect/library"
|
5
|
+
require "crawler_detect/library/crawlers"
|
6
|
+
require "crawler_detect/library/exclusions"
|
7
|
+
require "crawler_detect/library/headers"
|
8
|
+
require "crawler_detect/version"
|
9
|
+
|
10
|
+
require "rack/crawler_detect"
|
11
|
+
|
12
|
+
module CrawlerDetect
|
13
|
+
class << self
|
14
|
+
def new(user_agent)
|
15
|
+
detector(user_agent)
|
16
|
+
end
|
17
|
+
|
18
|
+
def is_crawler?(user_agent)
|
19
|
+
detector(user_agent).is_crawler?
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def detector(user_agent)
|
25
|
+
CrawlerDetect::Detector.new(user_agent)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CrawlerDetect
|
4
|
+
class Detector
|
5
|
+
def initialize(user_agent)
|
6
|
+
@user_agent = user_agent.dup
|
7
|
+
end
|
8
|
+
|
9
|
+
def is_crawler?
|
10
|
+
@is_crawler ||= begin
|
11
|
+
!completely_exclusion? && matches_crawler_list?
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def crawler_name
|
16
|
+
return unless is_crawler?
|
17
|
+
@crawler_name
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def completely_exclusion?
|
23
|
+
@user_agent.gsub!(exclusions_matcher, "")
|
24
|
+
@user_agent.strip.length == 0
|
25
|
+
end
|
26
|
+
|
27
|
+
def matches_crawler_list?
|
28
|
+
@crawler_name = crawlers_matcher.match(@user_agent).to_s
|
29
|
+
!@crawler_name.empty?
|
30
|
+
end
|
31
|
+
|
32
|
+
def exclusions_matcher
|
33
|
+
CrawlerDetect::Library.get_regexp("exclusions")
|
34
|
+
end
|
35
|
+
|
36
|
+
def crawlers_matcher
|
37
|
+
CrawlerDetect::Library.get_regexp("crawlers")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CrawlerDetect
|
4
|
+
module Library
|
5
|
+
class << self
|
6
|
+
def get_regexp(param)
|
7
|
+
data = get_array(param)
|
8
|
+
%r[#{data.join('|')}]i
|
9
|
+
end
|
10
|
+
|
11
|
+
def get_array(param)
|
12
|
+
const_name = "CrawlerDetect::Library::#{param.capitalize}::#{param.upcase}"
|
13
|
+
const_get(const_name)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,1170 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# rubocop:disable Layout/TrailingWhitespace
|
4
|
+
module CrawlerDetect
|
5
|
+
module Library
|
6
|
+
module Crawlers
|
7
|
+
CRAWLERS = %q[
|
8
|
+
.*Java.*outbrain
|
9
|
+
YLT
|
10
|
+
008\/
|
11
|
+
192\.comAgent
|
12
|
+
2ip\.ru
|
13
|
+
404checker
|
14
|
+
404enemy
|
15
|
+
80legs
|
16
|
+
^b0t$
|
17
|
+
^bluefish
|
18
|
+
^Calypso v\/
|
19
|
+
^COMODO DCV
|
20
|
+
^DangDang
|
21
|
+
^DavClnt
|
22
|
+
^FDM
|
23
|
+
^git\/
|
24
|
+
^Goose\/
|
25
|
+
^Grabber
|
26
|
+
^HTTPClient\/
|
27
|
+
^Java\/
|
28
|
+
^Jeode\/
|
29
|
+
^Jetty\/
|
30
|
+
^Mail\/
|
31
|
+
^Mget
|
32
|
+
^Microsoft URL Control
|
33
|
+
^NG\/[0-9\.]
|
34
|
+
^NING\/
|
35
|
+
^PHP\/[0-9]
|
36
|
+
^RMA\/
|
37
|
+
^Ruby|Ruby\/[0-9]
|
38
|
+
^scrutiny\/
|
39
|
+
^VSE\/[0-9]
|
40
|
+
^WordPress\.com
|
41
|
+
^XRL\/[0-9]
|
42
|
+
^ZmEu
|
43
|
+
a3logics\.in
|
44
|
+
A6-Indexer
|
45
|
+
a\.pr-cy\.ru
|
46
|
+
Abonti
|
47
|
+
Aboundex
|
48
|
+
aboutthedomain
|
49
|
+
Accoona-AI-Agent
|
50
|
+
acoon
|
51
|
+
acrylicapps\.com\/pulp
|
52
|
+
Acunetix
|
53
|
+
AdAuth\/
|
54
|
+
adbeat
|
55
|
+
AddThis
|
56
|
+
ADmantX
|
57
|
+
adressendeutschland
|
58
|
+
adscanner\/
|
59
|
+
Advanced Email Extractor v
|
60
|
+
agentslug
|
61
|
+
AHC
|
62
|
+
aihit
|
63
|
+
aiohttp\/
|
64
|
+
Airmail
|
65
|
+
akka-http\/
|
66
|
+
akula\/
|
67
|
+
alertra
|
68
|
+
alexa site audit
|
69
|
+
Alibaba\.Security\.Heimdall
|
70
|
+
Alligator
|
71
|
+
allloadin\.com
|
72
|
+
AllSubmitter
|
73
|
+
alyze\.info
|
74
|
+
amagit
|
75
|
+
Anarchie
|
76
|
+
AndroidDownloadManager
|
77
|
+
Anemone
|
78
|
+
AngleSharp\/
|
79
|
+
Ant\.com
|
80
|
+
Anturis Agent
|
81
|
+
AnyEvent-HTTP\/
|
82
|
+
Apache Droid
|
83
|
+
Apache-HttpAsyncClient\/
|
84
|
+
Apache-HttpClient\/
|
85
|
+
ApacheBench\/
|
86
|
+
Apexoo
|
87
|
+
APIs-Google
|
88
|
+
AportWorm\/[0-9]
|
89
|
+
AppBeat\/[0-9]
|
90
|
+
AppEngine-Google
|
91
|
+
AppStoreScraperZ
|
92
|
+
Arachmo
|
93
|
+
arachnode
|
94
|
+
Arachnophilia
|
95
|
+
aria2
|
96
|
+
Arukereso
|
97
|
+
asafaweb.com
|
98
|
+
AskQuickly
|
99
|
+
ASPSeek
|
100
|
+
Asterias
|
101
|
+
Astute
|
102
|
+
asynchttp
|
103
|
+
Attach
|
104
|
+
autocite
|
105
|
+
Autonomy
|
106
|
+
axios\/
|
107
|
+
B-l-i-t-z-B-O-T
|
108
|
+
Backlink-Ceck
|
109
|
+
backlink-check
|
110
|
+
BackStreet
|
111
|
+
BackWeb
|
112
|
+
Bad-Neighborhood
|
113
|
+
Badass
|
114
|
+
baidu\.com
|
115
|
+
Bandit
|
116
|
+
BatchFTP
|
117
|
+
Battleztar\ Bazinga
|
118
|
+
baypup\/[0-9]
|
119
|
+
baypup\/colbert
|
120
|
+
BazQux
|
121
|
+
BBBike
|
122
|
+
BCKLINKS
|
123
|
+
BDFetch
|
124
|
+
BegunAdvertising\/
|
125
|
+
BigBozz
|
126
|
+
Bigfoot
|
127
|
+
biglotron
|
128
|
+
BingLocalSearch
|
129
|
+
BingPreview
|
130
|
+
binlar
|
131
|
+
biNu image cacher
|
132
|
+
Bitacle
|
133
|
+
biz_Directory
|
134
|
+
Black\ Hole
|
135
|
+
Blackboard Safeassign
|
136
|
+
BlackWidow
|
137
|
+
Bloglovin
|
138
|
+
BlogPulseLive
|
139
|
+
BlogSearch
|
140
|
+
Blogtrottr
|
141
|
+
BlowFish
|
142
|
+
Boardreader
|
143
|
+
boitho\.com-dc
|
144
|
+
BPImageWalker
|
145
|
+
Braintree-Webhooks
|
146
|
+
Branch Metrics API
|
147
|
+
Branch-Passthrough
|
148
|
+
Brandprotect
|
149
|
+
Brandwatch
|
150
|
+
Brodie\/
|
151
|
+
Browsershots
|
152
|
+
BUbiNG
|
153
|
+
Buck\/
|
154
|
+
Buddy
|
155
|
+
BuiltWith
|
156
|
+
Bullseye
|
157
|
+
BunnySlippers
|
158
|
+
Burf Search
|
159
|
+
Butterfly\/
|
160
|
+
BuzzSumo
|
161
|
+
CAAM\/[0-9]
|
162
|
+
CakePHP
|
163
|
+
Calculon
|
164
|
+
CapsuleChecker
|
165
|
+
CaretNail
|
166
|
+
catexplorador
|
167
|
+
cb crawl
|
168
|
+
CC Metadata Scaper
|
169
|
+
Cegbfeieh
|
170
|
+
Cerberian Drtrs
|
171
|
+
CERT\.at-Statistics-Survey
|
172
|
+
cg-eye
|
173
|
+
changedetection
|
174
|
+
ChangesMeter\/
|
175
|
+
Charlotte
|
176
|
+
CheckHost
|
177
|
+
checkprivacy
|
178
|
+
CherryPicker
|
179
|
+
ChinaClaw
|
180
|
+
Chirp\/[0-9]
|
181
|
+
chkme\.com
|
182
|
+
Chlooe
|
183
|
+
CirrusExplorer\/
|
184
|
+
CISPA Vulnerability Notification
|
185
|
+
Citoid
|
186
|
+
CJNetworkQuality
|
187
|
+
Clarsentia
|
188
|
+
clips\.ua\.ac\.be
|
189
|
+
Cloud\ mapping
|
190
|
+
CloudEndure
|
191
|
+
CloudFlare-AlwaysOnline
|
192
|
+
Cloudinary\/[0-9]
|
193
|
+
cmcm\.com
|
194
|
+
coccoc
|
195
|
+
cognitiveseo
|
196
|
+
colly -
|
197
|
+
CommaFeed
|
198
|
+
Commons-HttpClient
|
199
|
+
Comodo SSL Checker
|
200
|
+
contactbigdatafr
|
201
|
+
contentkingapp
|
202
|
+
convera
|
203
|
+
CookieReports\.com
|
204
|
+
copyright sheriff
|
205
|
+
CopyRightCheck
|
206
|
+
Copyscape
|
207
|
+
Cosmos4j\.feedback
|
208
|
+
Covario-IDS
|
209
|
+
CrawlForMe\/[0-9]
|
210
|
+
Crescent
|
211
|
+
cron-job\.org
|
212
|
+
Crowsnest
|
213
|
+
CSHttp
|
214
|
+
curb
|
215
|
+
Curious George
|
216
|
+
curl
|
217
|
+
cuwhois\/[0-9]
|
218
|
+
cybo\.com
|
219
|
+
DareBoost
|
220
|
+
DatabaseDriverMysqli
|
221
|
+
DataCha0s
|
222
|
+
DataparkSearch
|
223
|
+
dataprovider
|
224
|
+
DataXu
|
225
|
+
Daum(oa)?[ \/][0-9]
|
226
|
+
Demon
|
227
|
+
DeuSu
|
228
|
+
developers\.google\.com\/\+\/web\/snippet\/
|
229
|
+
Devil
|
230
|
+
Digg
|
231
|
+
Digincore
|
232
|
+
DigitalPebble
|
233
|
+
Dirbuster
|
234
|
+
Dispatch\/
|
235
|
+
DittoSpyder
|
236
|
+
dlvr
|
237
|
+
DMBrowser
|
238
|
+
DNS-Tools Header-Analyzer
|
239
|
+
DNSPod-reporting
|
240
|
+
docoloc
|
241
|
+
Dolphin http client\/
|
242
|
+
DomainAppender
|
243
|
+
Donuts Content Explorer
|
244
|
+
dotMailer content retrieval
|
245
|
+
dotSemantic
|
246
|
+
downforeveryoneorjustme
|
247
|
+
Download\ Wonder
|
248
|
+
downnotifier\.com
|
249
|
+
DowntimeDetector
|
250
|
+
Dragonfly File Reader
|
251
|
+
Drip
|
252
|
+
drupact
|
253
|
+
Drupal \(\+http:\/\/drupal\.org\/\)
|
254
|
+
DTS\ Agent
|
255
|
+
dubaiindex
|
256
|
+
EARTHCOM
|
257
|
+
Easy-Thumb
|
258
|
+
EasyDL
|
259
|
+
Ebingbong
|
260
|
+
ec2linkfinder
|
261
|
+
eCairn-Grabber
|
262
|
+
eCatch
|
263
|
+
ECCP
|
264
|
+
echocrawl
|
265
|
+
eContext\/
|
266
|
+
Ecxi
|
267
|
+
EirGrabber
|
268
|
+
ElectricMonk
|
269
|
+
elefent
|
270
|
+
EMail Exractor
|
271
|
+
Email%20Extractor%20Lite
|
272
|
+
EMail\ Wolf
|
273
|
+
EmailWolf
|
274
|
+
Embed PHP Library
|
275
|
+
Embedly
|
276
|
+
europarchive\.org
|
277
|
+
evc-batch
|
278
|
+
EventMachine HttpClient
|
279
|
+
Everwall Link Expander
|
280
|
+
Evidon
|
281
|
+
Evrinid
|
282
|
+
ExactSearch
|
283
|
+
ExaleadCloudview
|
284
|
+
Excel\/
|
285
|
+
Exif Viewer
|
286
|
+
ExperianCrawlUK
|
287
|
+
Exploratodo
|
288
|
+
Express WebPictures
|
289
|
+
ExtractorPro
|
290
|
+
Extreme\ Picture\ Finder
|
291
|
+
EyeNetIE
|
292
|
+
ezooms
|
293
|
+
facebookexternalhit
|
294
|
+
facebookplatform
|
295
|
+
fairshare
|
296
|
+
Faraday v
|
297
|
+
fasthttp
|
298
|
+
Faveeo
|
299
|
+
Favicon downloader
|
300
|
+
FavOrg
|
301
|
+
Feed Wrangler
|
302
|
+
Feedbin
|
303
|
+
FeedBooster
|
304
|
+
FeedBucket
|
305
|
+
FeedBunch\/[0-9]
|
306
|
+
FeedBurner
|
307
|
+
FeedChecker
|
308
|
+
Feedly
|
309
|
+
Feedspot
|
310
|
+
Feedwind\/[0-9]
|
311
|
+
feeltiptop
|
312
|
+
Fetch API
|
313
|
+
Fetch\/[0-9]
|
314
|
+
Fever\/[0-9]
|
315
|
+
FHscan
|
316
|
+
Fimap
|
317
|
+
findlink
|
318
|
+
findthatfile
|
319
|
+
FlashGet
|
320
|
+
FlipboardBrowserProxy
|
321
|
+
FlipboardProxy
|
322
|
+
FlipboardRSS
|
323
|
+
Flock\/
|
324
|
+
fluffy
|
325
|
+
Flunky
|
326
|
+
flynxapp
|
327
|
+
forensiq
|
328
|
+
FoundSeoTool\/[0-9]
|
329
|
+
free thumbnails
|
330
|
+
Freeuploader
|
331
|
+
FreeWebMonitoring SiteChecker
|
332
|
+
Funnelback
|
333
|
+
G-i-g-a-b-o-t
|
334
|
+
g00g1e\.net
|
335
|
+
GAChecker
|
336
|
+
ganarvisitas\/[0-9]
|
337
|
+
geek-tools
|
338
|
+
Genderanalyzer
|
339
|
+
Genieo
|
340
|
+
GentleSource
|
341
|
+
Getintent
|
342
|
+
GetLinkInfo
|
343
|
+
getprismatic\.com
|
344
|
+
GetRight
|
345
|
+
GetURLInfo\/[0-9]
|
346
|
+
GetWeb
|
347
|
+
Ghost Inspector
|
348
|
+
GigablastOpenSource
|
349
|
+
GIS-LABS
|
350
|
+
github-camo
|
351
|
+
github\.com\/
|
352
|
+
Go [\d\.]* package http
|
353
|
+
Go http package
|
354
|
+
Go!Zilla
|
355
|
+
Go-Ahead-Got-It
|
356
|
+
Go-http-client
|
357
|
+
gobyus
|
358
|
+
gofetch
|
359
|
+
GomezAgent
|
360
|
+
gooblog
|
361
|
+
Goodzer\/[0-9]
|
362
|
+
GoogleCloudMonitoring
|
363
|
+
Google favicon
|
364
|
+
Google Keyword Suggestion
|
365
|
+
Google Keyword Tool
|
366
|
+
Google Page Speed Insights
|
367
|
+
Google PP Default
|
368
|
+
Google Search Console
|
369
|
+
Google Web Preview
|
370
|
+
Google-Adwords
|
371
|
+
Google-Apps-Script
|
372
|
+
Google-Calendar-Importer
|
373
|
+
Google-HotelAdsVerifier
|
374
|
+
Google-HTTP-Java-Client
|
375
|
+
Google-Publisher-Plugin
|
376
|
+
Google-SearchByImage
|
377
|
+
Google-Site-Verification
|
378
|
+
Google-Structured-Data-Testing-Tool
|
379
|
+
Google-Youtube-Links
|
380
|
+
google_partner_monitoring
|
381
|
+
GoogleDocs
|
382
|
+
GoogleHC\/
|
383
|
+
GoogleProducer
|
384
|
+
Gookey
|
385
|
+
GoScraper
|
386
|
+
GoSpotCheck
|
387
|
+
GoSquared-Status-Checker
|
388
|
+
gosquared-thumbnailer
|
389
|
+
Gotit
|
390
|
+
GoZilla
|
391
|
+
grabify
|
392
|
+
GrabNet
|
393
|
+
Grafula
|
394
|
+
Grammarly
|
395
|
+
GrapeFX
|
396
|
+
grokkit
|
397
|
+
grouphigh
|
398
|
+
grub-client
|
399
|
+
gSOAP\/
|
400
|
+
GT::WWW
|
401
|
+
GTmetrix
|
402
|
+
GuzzleHttp
|
403
|
+
gvfs\/
|
404
|
+
HAA(A)?RTLAND http client
|
405
|
+
Haansoft
|
406
|
+
hackney\/
|
407
|
+
Hatena
|
408
|
+
Havij
|
409
|
+
hawkReader
|
410
|
+
HEADMasterSEO
|
411
|
+
HeartRails_Capture
|
412
|
+
help@dataminr\.com
|
413
|
+
heritrix
|
414
|
+
historious\/
|
415
|
+
hledejLevne\.cz\/[0-9]
|
416
|
+
Hloader
|
417
|
+
HMView
|
418
|
+
Holmes
|
419
|
+
HonesoSearchEngine\/
|
420
|
+
HootSuite Image proxy
|
421
|
+
Hootsuite-WebFeed\/[0-9]
|
422
|
+
hosterstats
|
423
|
+
HostTracker
|
424
|
+
ht:\/\/check
|
425
|
+
htdig
|
426
|
+
HTMLparser
|
427
|
+
http-get
|
428
|
+
HTTP-Header-Abfrage
|
429
|
+
http-kit
|
430
|
+
http-request\/
|
431
|
+
HTTP-Tiny
|
432
|
+
HTTP::Lite
|
433
|
+
http\.rb\/
|
434
|
+
HTTP_Compression_Test
|
435
|
+
http_request2
|
436
|
+
http_requester
|
437
|
+
HttpComponents
|
438
|
+
httphr
|
439
|
+
HTTPMon
|
440
|
+
httpscheck
|
441
|
+
httpssites_power
|
442
|
+
httpunit
|
443
|
+
HttpUrlConnection
|
444
|
+
httrack
|
445
|
+
huaweisymantec
|
446
|
+
HubPages.*crawlingpolicy
|
447
|
+
HubSpot
|
448
|
+
Humanlinks
|
449
|
+
HyperZbozi.cz Feeder
|
450
|
+
i2kconnect\/
|
451
|
+
Iblog
|
452
|
+
ichiro
|
453
|
+
Id-search
|
454
|
+
IdeelaborPlagiaat
|
455
|
+
IDG Twitter Links Resolver
|
456
|
+
IDwhois\/[0-9]
|
457
|
+
Iframely
|
458
|
+
igdeSpyder
|
459
|
+
IlTrovatore
|
460
|
+
Image\ Fetch
|
461
|
+
Image\ Sucker
|
462
|
+
ImageEngine\/
|
463
|
+
Imagga
|
464
|
+
imgsizer
|
465
|
+
InAGist
|
466
|
+
inbound\.li parser
|
467
|
+
InDesign%20CC
|
468
|
+
Indy\ Library
|
469
|
+
infegy
|
470
|
+
infohelfer
|
471
|
+
InfoTekies
|
472
|
+
InfoWizards Reciprocal Link System PRO
|
473
|
+
inpwrd\.com
|
474
|
+
instabid
|
475
|
+
Instapaper
|
476
|
+
Integrity
|
477
|
+
integromedb
|
478
|
+
Intelliseek
|
479
|
+
InterGET
|
480
|
+
Internet\ Ninja
|
481
|
+
internet_archive
|
482
|
+
InternetSeer
|
483
|
+
internetVista monitor
|
484
|
+
intraVnews
|
485
|
+
IODC
|
486
|
+
IOI
|
487
|
+
iplabel
|
488
|
+
ips-agent
|
489
|
+
IPS\/[0-9]
|
490
|
+
IPWorks HTTP\/S Component
|
491
|
+
iqdb\/
|
492
|
+
Iria
|
493
|
+
Irokez
|
494
|
+
isitup\.org
|
495
|
+
iskanie
|
496
|
+
iZSearch
|
497
|
+
janforman
|
498
|
+
Jaunt\/
|
499
|
+
Jbrofuzz
|
500
|
+
Jersey\/
|
501
|
+
JetCar
|
502
|
+
Jigsaw
|
503
|
+
Jobboerse
|
504
|
+
JobFeed discovery
|
505
|
+
Jobg8 URL Monitor
|
506
|
+
jobo
|
507
|
+
Jobrapido
|
508
|
+
Jobsearch1\.5
|
509
|
+
JoinVision Generic
|
510
|
+
JolokiaPwn
|
511
|
+
Joomla
|
512
|
+
Jorgee
|
513
|
+
JS-Kit
|
514
|
+
JustView
|
515
|
+
Kaspersky Lab CFR link resolver
|
516
|
+
KeepRight OpenStreetMap Checker
|
517
|
+
Kelny\/
|
518
|
+
Kerrigan\/
|
519
|
+
KeyCDN
|
520
|
+
Keyword Extractor
|
521
|
+
Keyword\ Density
|
522
|
+
Keywords Research
|
523
|
+
KickFire
|
524
|
+
KimonoLabs\/
|
525
|
+
Kml-Google
|
526
|
+
knows\.is
|
527
|
+
KOCMOHABT
|
528
|
+
kouio
|
529
|
+
kube-probe
|
530
|
+
kulturarw3
|
531
|
+
KumKie
|
532
|
+
L\.webis
|
533
|
+
Larbin
|
534
|
+
Lavf\/
|
535
|
+
LayeredExtractor
|
536
|
+
LeechFTP
|
537
|
+
LeechGet
|
538
|
+
letsencrypt
|
539
|
+
Lftp
|
540
|
+
LibVLC
|
541
|
+
LibWeb
|
542
|
+
Libwhisker
|
543
|
+
libwww
|
544
|
+
Licorne Image Snapshot
|
545
|
+
Liferea\/
|
546
|
+
Lightspeedsystems
|
547
|
+
Likse
|
548
|
+
link checker
|
549
|
+
Link Valet
|
550
|
+
link_thumbnailer
|
551
|
+
LinkAlarm\/
|
552
|
+
linkCheck
|
553
|
+
linkdex
|
554
|
+
LinkExaminer
|
555
|
+
linkfluence
|
556
|
+
linkpeek
|
557
|
+
LinkPreviewGenerator
|
558
|
+
LinkScan
|
559
|
+
LinksManager
|
560
|
+
LinkTiger
|
561
|
+
LinkWalker
|
562
|
+
Lipperhey
|
563
|
+
Litemage_walker
|
564
|
+
livedoor ScreenShot
|
565
|
+
LoadImpactRload
|
566
|
+
LongURL API
|
567
|
+
looksystems\.net
|
568
|
+
ltx71
|
569
|
+
lua-resty-http
|
570
|
+
lwp-request
|
571
|
+
lwp-trivial
|
572
|
+
LWP::Simple
|
573
|
+
lycos
|
574
|
+
LYT\.SR
|
575
|
+
mabontland
|
576
|
+
Mag-Net
|
577
|
+
MagpieRSS
|
578
|
+
Mail.Ru
|
579
|
+
MailChimp
|
580
|
+
Majestic12
|
581
|
+
makecontact\/
|
582
|
+
Mandrill
|
583
|
+
MapperCmd
|
584
|
+
marketinggrader
|
585
|
+
MarkMonitor
|
586
|
+
MarkWatch
|
587
|
+
Mass\ Downloader
|
588
|
+
masscan\/[0-9]
|
589
|
+
Mata\ Hari
|
590
|
+
Mediapartners-Google
|
591
|
+
mediawords
|
592
|
+
MegaIndex\.ru
|
593
|
+
Melvil Rawi\/
|
594
|
+
MergeFlow-PageReader
|
595
|
+
Metaspinner
|
596
|
+
MetaURI
|
597
|
+
MFC_Tear_Sample
|
598
|
+
Microsearch
|
599
|
+
Microsoft Office
|
600
|
+
Microsoft Outlook
|
601
|
+
Microsoft Windows Network Diagnostics
|
602
|
+
Microsoft-WebDAV-MiniRedir
|
603
|
+
Microsoft\ Data\ Access
|
604
|
+
MIDown\ tool
|
605
|
+
MIIxpc
|
606
|
+
Mindjet
|
607
|
+
Miniature.io\/
|
608
|
+
Miniflux
|
609
|
+
Mister\ PiX
|
610
|
+
mixdata dot com
|
611
|
+
mixed-content-scan
|
612
|
+
mixnode
|
613
|
+
Mnogosearch
|
614
|
+
mogimogi
|
615
|
+
Mojeek
|
616
|
+
Mojolicious \(Perl\)
|
617
|
+
monitis
|
618
|
+
Monitority\/[0-9]
|
619
|
+
montastic
|
620
|
+
MonTools
|
621
|
+
Moreover
|
622
|
+
Morfeus\ Fucking\ Scanner
|
623
|
+
Morning Paper
|
624
|
+
MovableType
|
625
|
+
mowser
|
626
|
+
Mrcgiguy
|
627
|
+
MS\ Web\ Services\ Client\ Protocol
|
628
|
+
MSFrontPage
|
629
|
+
mShots
|
630
|
+
MuckRack\/
|
631
|
+
muhstik-scan
|
632
|
+
MVAClient
|
633
|
+
MxToolbox\/
|
634
|
+
nagios
|
635
|
+
Najdi\.si\/
|
636
|
+
Name\ Intelligence
|
637
|
+
Nameprotect
|
638
|
+
Navroad
|
639
|
+
NearSite
|
640
|
+
Needle
|
641
|
+
Nessus
|
642
|
+
Net\ Vampire
|
643
|
+
NetAnts
|
644
|
+
NETCRAFT
|
645
|
+
NetLyzer
|
646
|
+
NetMechanic
|
647
|
+
Netpursual
|
648
|
+
netresearch
|
649
|
+
NetShelter ContentScan
|
650
|
+
Netsparker
|
651
|
+
NetTrack
|
652
|
+
Netvibes
|
653
|
+
NetZIP
|
654
|
+
Neustar WPM
|
655
|
+
NeutrinoAPI
|
656
|
+
NewRelicPinger\/1.0 \(\d+\)
|
657
|
+
NewsBlur .*Finder
|
658
|
+
NewsGator
|
659
|
+
newsme
|
660
|
+
newspaper\/
|
661
|
+
Nexgate Ruby Client
|
662
|
+
NG-Search
|
663
|
+
Nibbler
|
664
|
+
NICErsPRO
|
665
|
+
Nikto
|
666
|
+
nineconnections\.com
|
667
|
+
NLNZ_IAHarvester
|
668
|
+
Nmap Scripting Engine
|
669
|
+
node-superagent
|
670
|
+
node-urllib\/
|
671
|
+
node\.io
|
672
|
+
nominet\.org\.uk
|
673
|
+
Norton-Safeweb
|
674
|
+
Notifixious
|
675
|
+
notifyninja
|
676
|
+
nuhk
|
677
|
+
nutch
|
678
|
+
Nuzzel
|
679
|
+
nWormFeedFinder
|
680
|
+
Nymesis
|
681
|
+
NYU
|
682
|
+
Ocelli\/[0-9]
|
683
|
+
Octopus
|
684
|
+
oegp
|
685
|
+
Offline Explorer
|
686
|
+
Offline\ Navigator
|
687
|
+
okhttp
|
688
|
+
Omea Reader
|
689
|
+
omgili
|
690
|
+
OMSC
|
691
|
+
Online Domain Tools
|
692
|
+
OpenCalaisSemanticProxy
|
693
|
+
Openfind
|
694
|
+
OpenLinkProfiler
|
695
|
+
Openstat\/
|
696
|
+
OpenVAS
|
697
|
+
Optimizer
|
698
|
+
Orbiter
|
699
|
+
OrgProbe\/[0-9]
|
700
|
+
orion-semantics
|
701
|
+
Outlook-Express
|
702
|
+
ow\.ly
|
703
|
+
Owler
|
704
|
+
ownCloud News
|
705
|
+
OxfordCloudService\/[0-9]
|
706
|
+
Page Analyzer
|
707
|
+
Page Valet
|
708
|
+
page2rss
|
709
|
+
page\ scorer
|
710
|
+
page_verifier
|
711
|
+
PageAnalyzer
|
712
|
+
PageGrabber
|
713
|
+
PagePeeker
|
714
|
+
PageScorer
|
715
|
+
Pagespeed\/[0-9]
|
716
|
+
Panopta
|
717
|
+
panscient
|
718
|
+
Papa\ Foto
|
719
|
+
parsijoo
|
720
|
+
Pavuk
|
721
|
+
PayPal IPN
|
722
|
+
pcBrowser
|
723
|
+
Pcore-HTTP
|
724
|
+
PEAR HTTPRequest
|
725
|
+
Pearltrees
|
726
|
+
PECL::HTTP
|
727
|
+
peerindex
|
728
|
+
Peew
|
729
|
+
PeoplePal
|
730
|
+
Perlu -
|
731
|
+
PhantomJS Screenshoter
|
732
|
+
PhantomJS\/
|
733
|
+
Photon\/
|
734
|
+
phpcrawl
|
735
|
+
phpservermon
|
736
|
+
Pi-Monster
|
737
|
+
Picscout
|
738
|
+
Picsearch
|
739
|
+
PictureFinder
|
740
|
+
Pimonster
|
741
|
+
ping\.blo\.gs\/
|
742
|
+
Pingability
|
743
|
+
Pingdom
|
744
|
+
Pingoscope
|
745
|
+
PingSpot
|
746
|
+
pinterest\.com
|
747
|
+
Pixray
|
748
|
+
Pizilla
|
749
|
+
PleaseCrawl
|
750
|
+
Ploetz \+ Zeller
|
751
|
+
Plukkie
|
752
|
+
plumanalytics
|
753
|
+
PocketParser
|
754
|
+
Pockey
|
755
|
+
POE-Component-Client-HTTP
|
756
|
+
Pompos
|
757
|
+
Porkbun
|
758
|
+
Port Monitor
|
759
|
+
postano
|
760
|
+
PostmanRuntime\/
|
761
|
+
PostPost
|
762
|
+
postrank
|
763
|
+
PowerPoint\/
|
764
|
+
Priceonomics Analysis Engine
|
765
|
+
PrintFriendly\.com
|
766
|
+
PritTorrent\/[0-9]
|
767
|
+
Prlog
|
768
|
+
probethenet
|
769
|
+
Project 25499
|
770
|
+
Promotion_Tools_www.searchenginepromotionhelp.com
|
771
|
+
prospectb2b
|
772
|
+
Protopage
|
773
|
+
ProWebWalker
|
774
|
+
proximic
|
775
|
+
PRTG Network Monitor
|
776
|
+
pshtt, https scanning
|
777
|
+
PTST
|
778
|
+
PTST\/[0-9]+
|
779
|
+
Pulsepoint XT3 web scraper
|
780
|
+
Pump
|
781
|
+
Python-httplib2
|
782
|
+
python-requests
|
783
|
+
Python-urllib
|
784
|
+
Qirina Hurdler
|
785
|
+
QQDownload
|
786
|
+
QrafterPro
|
787
|
+
Qseero
|
788
|
+
Qualidator.com SiteAnalyzer
|
789
|
+
QueryN\ Metasearch
|
790
|
+
Quora Link Preview
|
791
|
+
Qwantify
|
792
|
+
Radian6
|
793
|
+
RankActive
|
794
|
+
RankFlex
|
795
|
+
RankSonicSiteAuditor
|
796
|
+
Readability
|
797
|
+
RealDownload
|
798
|
+
RealPlayer%20Downloader
|
799
|
+
RebelMouse
|
800
|
+
Recorder
|
801
|
+
RecurPost\/
|
802
|
+
redback\/
|
803
|
+
Redirect Checker Tool
|
804
|
+
ReederForMac
|
805
|
+
ReGet
|
806
|
+
RepoMonkey
|
807
|
+
request\.js
|
808
|
+
ResponseCodeTest\/[0-9]
|
809
|
+
RestSharp
|
810
|
+
Riddler
|
811
|
+
Rival IQ
|
812
|
+
Robosourcer
|
813
|
+
Robozilla\/[0-9]
|
814
|
+
ROI Hunter
|
815
|
+
RPT-HTTPClient
|
816
|
+
RSSOwl
|
817
|
+
safe-agent-scanner
|
818
|
+
SalesIntelligent
|
819
|
+
Saleslift
|
820
|
+
SauceNAO
|
821
|
+
SBIder
|
822
|
+
scalaj-http
|
823
|
+
scan\.lol
|
824
|
+
ScanAlert
|
825
|
+
Scoop
|
826
|
+
scooter
|
827
|
+
ScoutJet
|
828
|
+
ScoutURLMonitor
|
829
|
+
Scrapy
|
830
|
+
Screaming
|
831
|
+
ScreenShotService\/[0-9]
|
832
|
+
Scrubby
|
833
|
+
Search37\/
|
834
|
+
search\.thunderstone
|
835
|
+
Searchestate
|
836
|
+
SearchSight
|
837
|
+
Seeker
|
838
|
+
semanticdiscovery
|
839
|
+
semanticjuice
|
840
|
+
Semiocast HTTP client
|
841
|
+
Semrush
|
842
|
+
sentry\/
|
843
|
+
SEO Browser
|
844
|
+
Seo Servis
|
845
|
+
seo-nastroj.cz
|
846
|
+
Seobility
|
847
|
+
SEOCentro
|
848
|
+
SeoCheck
|
849
|
+
SEOkicks
|
850
|
+
Seomoz
|
851
|
+
SEOprofiler
|
852
|
+
SeopultContentAnalyzer
|
853
|
+
seoscanners
|
854
|
+
SEOstats
|
855
|
+
Server Density Service Monitoring
|
856
|
+
servernfo\.com
|
857
|
+
SetCronJob\/
|
858
|
+
sexsearcher
|
859
|
+
Seznam
|
860
|
+
Shelob
|
861
|
+
Shodan
|
862
|
+
Shoppimon Analyzer
|
863
|
+
ShoppimonAgent\/[0-9]
|
864
|
+
ShopWiki
|
865
|
+
ShortLinkTranslate
|
866
|
+
shrinktheweb
|
867
|
+
Sideqik
|
868
|
+
SilverReader
|
869
|
+
SimplePie
|
870
|
+
SimplyFast
|
871
|
+
Siphon
|
872
|
+
SISTRIX
|
873
|
+
Site-Shot\/
|
874
|
+
Site24x7
|
875
|
+
Site\ Sucker
|
876
|
+
SiteBar
|
877
|
+
Sitebeam
|
878
|
+
Sitebulb\/
|
879
|
+
SiteCondor
|
880
|
+
SiteExplorer
|
881
|
+
SiteGuardian
|
882
|
+
Siteimprove
|
883
|
+
SiteIndexed
|
884
|
+
Sitemap(s)? Generator
|
885
|
+
SiteMonitor
|
886
|
+
Siteshooter B0t
|
887
|
+
SiteSnagger
|
888
|
+
SiteSucker
|
889
|
+
SiteTruth
|
890
|
+
Sitevigil
|
891
|
+
sitexy\.com
|
892
|
+
SkypeUriPreview
|
893
|
+
Slack\/
|
894
|
+
slider\.com
|
895
|
+
slurp
|
896
|
+
SlySearch
|
897
|
+
SmartDownload
|
898
|
+
SMRF URL Expander
|
899
|
+
SMUrlExpander
|
900
|
+
Snake
|
901
|
+
Snappy
|
902
|
+
SniffRSS
|
903
|
+
sniptracker
|
904
|
+
Snoopy
|
905
|
+
SnowHaze Search
|
906
|
+
sogou web
|
907
|
+
SortSite
|
908
|
+
Sottopop
|
909
|
+
sovereign\.ai
|
910
|
+
SpaceBison
|
911
|
+
Spammen
|
912
|
+
Spanner
|
913
|
+
spaziodati
|
914
|
+
SPDYCheck
|
915
|
+
Specificfeeds
|
916
|
+
speedy
|
917
|
+
SPEng
|
918
|
+
Spinn3r
|
919
|
+
spray-can
|
920
|
+
Sprinklr
|
921
|
+
spyonweb
|
922
|
+
sqlmap
|
923
|
+
Sqlworm
|
924
|
+
Sqworm
|
925
|
+
SSL Labs
|
926
|
+
ssl-tools
|
927
|
+
StackRambler
|
928
|
+
Statastico\/
|
929
|
+
StatusCake
|
930
|
+
Steeler
|
931
|
+
Stratagems Kumo
|
932
|
+
Stroke.cz
|
933
|
+
StudioFACA
|
934
|
+
suchen
|
935
|
+
Sucuri
|
936
|
+
summify
|
937
|
+
Super Monitoring
|
938
|
+
SuperHTTP
|
939
|
+
Surphace Scout
|
940
|
+
Suzuran
|
941
|
+
SwiteScraper
|
942
|
+
Symfony BrowserKit
|
943
|
+
Symfony2 BrowserKit
|
944
|
+
SynHttpClient-Built
|
945
|
+
Sysomos
|
946
|
+
sysscan
|
947
|
+
Szukacz
|
948
|
+
T0PHackTeam
|
949
|
+
tAkeOut
|
950
|
+
Tarantula\/
|
951
|
+
Taringa UGC
|
952
|
+
Teleport
|
953
|
+
Telesoft
|
954
|
+
Telesphoreo
|
955
|
+
Telesphorep
|
956
|
+
Tenon\.io
|
957
|
+
teoma
|
958
|
+
terrainformatica\.com
|
959
|
+
Test Certificate Info
|
960
|
+
Tetrahedron\/[0-9]
|
961
|
+
The Drop Reaper
|
962
|
+
The Expert HTML Source Viewer
|
963
|
+
The Knowledge AI
|
964
|
+
The\ Intraformant
|
965
|
+
theinternetrules
|
966
|
+
TheNomad
|
967
|
+
theoldreader\.com
|
968
|
+
Thinklab
|
969
|
+
Thumbshots
|
970
|
+
ThumbSniper
|
971
|
+
TinEye
|
972
|
+
Tiny Tiny RSS
|
973
|
+
TLSProbe\/
|
974
|
+
Toata
|
975
|
+
topster
|
976
|
+
touche.com
|
977
|
+
Traackr.com
|
978
|
+
TrapitAgent
|
979
|
+
Trendiction
|
980
|
+
Trendsmap Resolver
|
981
|
+
trendspottr\.com
|
982
|
+
truwoGPS
|
983
|
+
TulipChain
|
984
|
+
Turingos
|
985
|
+
Turnitin
|
986
|
+
tweetedtimes\.com
|
987
|
+
Tweetminster
|
988
|
+
Tweezler\/
|
989
|
+
twibble
|
990
|
+
Twice
|
991
|
+
Twikle
|
992
|
+
Twingly
|
993
|
+
Twisted PageGetter
|
994
|
+
Typhoeus
|
995
|
+
ubermetrics-technologies
|
996
|
+
uclassify
|
997
|
+
uCrawlr\/
|
998
|
+
UdmSearch
|
999
|
+
UniversalFeedParser
|
1000
|
+
Unshorten\.It\!\/[0-9]
|
1001
|
+
Untiny
|
1002
|
+
UnwindFetchor
|
1003
|
+
updated
|
1004
|
+
updown\.io daemon
|
1005
|
+
Upflow
|
1006
|
+
Uptimia
|
1007
|
+
URL Verifier
|
1008
|
+
URLChecker
|
1009
|
+
URLitor.com
|
1010
|
+
urlresolver
|
1011
|
+
Urlstat
|
1012
|
+
UrlTrends Ranking Updater
|
1013
|
+
URLy\ Warning
|
1014
|
+
URLy\.Warning
|
1015
|
+
Vacuum
|
1016
|
+
Vagabondo
|
1017
|
+
VB\ Project
|
1018
|
+
vBSEO
|
1019
|
+
VCI
|
1020
|
+
via ggpht\.com GoogleImageProxy
|
1021
|
+
VidibleScraper
|
1022
|
+
Virusdie
|
1023
|
+
visionutils
|
1024
|
+
vkShare
|
1025
|
+
VoidEYE
|
1026
|
+
Voil
|
1027
|
+
voltron
|
1028
|
+
voyager\/
|
1029
|
+
VSAgent\/[0-9]
|
1030
|
+
VSB-TUO\/[0-9]
|
1031
|
+
Vulnbusters Meter
|
1032
|
+
VYU2
|
1033
|
+
w3af\.org
|
1034
|
+
W3C-checklink
|
1035
|
+
W3C-mobileOK
|
1036
|
+
W3C_I18n-Checker
|
1037
|
+
W3C_Unicorn
|
1038
|
+
Wallpapers\/[0-9]+
|
1039
|
+
WallpapersHD
|
1040
|
+
wangling
|
1041
|
+
Wappalyzer
|
1042
|
+
WatchMouse
|
1043
|
+
WbSrch\/
|
1044
|
+
web-capture\.net
|
1045
|
+
Web-Monitoring
|
1046
|
+
Web-sniffer
|
1047
|
+
Web\ Auto
|
1048
|
+
Web\ Collage
|
1049
|
+
Web\ Enhancer
|
1050
|
+
Web\ Fetch
|
1051
|
+
Web\ Fuck
|
1052
|
+
Web\ Pix
|
1053
|
+
Web\ Sauger
|
1054
|
+
Web\ Sucker
|
1055
|
+
Webalta
|
1056
|
+
Webauskunft
|
1057
|
+
WebAuto
|
1058
|
+
WebCapture
|
1059
|
+
WebClient\/
|
1060
|
+
webcollage
|
1061
|
+
WebCookies
|
1062
|
+
WebCopier
|
1063
|
+
WebCorp
|
1064
|
+
WebDoc
|
1065
|
+
WebEnhancer
|
1066
|
+
WebFetch
|
1067
|
+
WebFuck
|
1068
|
+
WebGo\ IS
|
1069
|
+
WebImageCollector
|
1070
|
+
WebImages
|
1071
|
+
WebIndex
|
1072
|
+
webkit2png
|
1073
|
+
WebLeacher
|
1074
|
+
webmastercoffee
|
1075
|
+
webmon
|
1076
|
+
WebPix
|
1077
|
+
WebReaper
|
1078
|
+
WebSauger
|
1079
|
+
webscreenie
|
1080
|
+
Webshag
|
1081
|
+
Webshot
|
1082
|
+
Website Analyzer\/
|
1083
|
+
Website\ Quester
|
1084
|
+
WebsiteExtractor
|
1085
|
+
websitepulse agent
|
1086
|
+
websitepulse[+ ]checker
|
1087
|
+
WebsiteQuester
|
1088
|
+
Websnapr\/
|
1089
|
+
Webster
|
1090
|
+
WebStripper
|
1091
|
+
WebSucker
|
1092
|
+
Webthumb\/[0-9]
|
1093
|
+
WebThumbnail
|
1094
|
+
WebWhacker
|
1095
|
+
WebZIP
|
1096
|
+
WeCrawlForThePeace
|
1097
|
+
WeLikeLinks
|
1098
|
+
WEPA
|
1099
|
+
WeSEE
|
1100
|
+
wf84
|
1101
|
+
Wfuzz\/
|
1102
|
+
wget
|
1103
|
+
WhatsApp
|
1104
|
+
WhatsMyIP
|
1105
|
+
WhatWeb
|
1106
|
+
WhereGoes\?
|
1107
|
+
Whibse
|
1108
|
+
WhoRunsCoinHive
|
1109
|
+
Whynder Magnet
|
1110
|
+
Windows-RSS-Platform
|
1111
|
+
WinHttpRequest
|
1112
|
+
wkhtmlto
|
1113
|
+
wmtips
|
1114
|
+
Woko
|
1115
|
+
Word\/
|
1116
|
+
WordPress\/
|
1117
|
+
wotbox
|
1118
|
+
WP Engine Install Performance API
|
1119
|
+
wpif
|
1120
|
+
wprecon\.com survey
|
1121
|
+
WPScan
|
1122
|
+
wscheck
|
1123
|
+
Wtrace
|
1124
|
+
WWW-Collector-E
|
1125
|
+
WWW-Mechanize
|
1126
|
+
WWW::Mechanize
|
1127
|
+
www\.monitor\.us
|
1128
|
+
WWWOFFLE
|
1129
|
+
x09Mozilla
|
1130
|
+
x22Mozilla
|
1131
|
+
XaxisSemanticsClassifier
|
1132
|
+
Xenu Link Sleuth
|
1133
|
+
XING-contenttabreceiver\/[0-9]
|
1134
|
+
XmlSitemapGenerator
|
1135
|
+
xpymep([0-9]?)\.exe
|
1136
|
+
Y!J-(ASR|BSC)
|
1137
|
+
Yaanb
|
1138
|
+
yacy
|
1139
|
+
Yahoo Ad monitoring
|
1140
|
+
Yahoo Link Preview
|
1141
|
+
YahooCacheSystem
|
1142
|
+
YahooYSMcm
|
1143
|
+
YandeG
|
1144
|
+
Yandex(?!Search)
|
1145
|
+
yanga
|
1146
|
+
yeti
|
1147
|
+
Yo-yo
|
1148
|
+
Yoleo Consumer
|
1149
|
+
yoogliFetchAgent
|
1150
|
+
YottaaMonitor
|
1151
|
+
Your-Website-Sucks\/[0-9]
|
1152
|
+
yourls\.org
|
1153
|
+
Zade
|
1154
|
+
Zao
|
1155
|
+
Zauba
|
1156
|
+
Zemanta Aggregator
|
1157
|
+
Zend\\\\Http\\\\Client
|
1158
|
+
Zend_Http_Client
|
1159
|
+
Zermelo
|
1160
|
+
Zeus
|
1161
|
+
zgrab
|
1162
|
+
ZnajdzFoto
|
1163
|
+
Zombie\.js
|
1164
|
+
ZyBorg
|
1165
|
+
SpamExperts
|
1166
|
+
[a-z0-9\-_]*(bot|crawler|archiver|transcoder|spider|uptime|validator|fetcher)
|
1167
|
+
].strip.split(/\n+/).freeze
|
1168
|
+
end
|
1169
|
+
end
|
1170
|
+
end
|