ronin-web 0.3.0.rc1 → 1.0.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.editorconfig +11 -0
- data/.github/workflows/ruby.yml +31 -0
- data/.gitignore +13 -0
- data/.mailmap +1 -0
- data/.ruby-version +1 -0
- data/COPYING.txt +3 -3
- data/ChangeLog.md +115 -70
- data/Gemfile +42 -37
- data/README.md +159 -145
- data/Rakefile +12 -3
- data/bin/ronin-web +9 -17
- data/data/new/nokogiri.rb.erb +12 -0
- data/data/new/server.rb.erb +22 -0
- data/data/new/spider.rb.erb +26 -0
- data/data/new/webapp/.gitignore +15 -0
- data/data/new/webapp/.ruby-version.erb +1 -0
- data/data/new/webapp/Dockerfile.erb +11 -0
- data/data/new/webapp/Gemfile +6 -0
- data/data/new/webapp/app.rb.erb +15 -0
- data/data/new/webapp/config.ru +4 -0
- data/data/new/webapp/docker-compose.yml.erb +9 -0
- data/gemspec.yml +32 -14
- data/lib/ronin/web/cli/command.rb +36 -0
- data/lib/ronin/web/cli/commands/diff.rb +106 -0
- data/lib/ronin/web/cli/commands/html.rb +174 -0
- data/lib/ronin/web/cli/commands/irb.rb +56 -0
- data/lib/ronin/web/cli/commands/new/nokogiri.rb +85 -0
- data/lib/ronin/web/cli/commands/new/server.rb +96 -0
- data/lib/ronin/web/cli/commands/new/spider.rb +315 -0
- data/lib/ronin/web/cli/commands/new/webapp.rb +123 -0
- data/lib/ronin/web/cli/commands/new.rb +64 -0
- data/lib/ronin/web/cli/commands/reverse_proxy.rb +215 -0
- data/lib/ronin/web/cli/commands/server.rb +155 -0
- data/lib/ronin/web/cli/commands/spider.rb +822 -0
- data/lib/ronin/web/cli/ruby_shell.rb +50 -0
- data/lib/ronin/web/cli.rb +44 -0
- data/lib/ronin/web/html.rb +85 -0
- data/lib/ronin/web/mechanize.rb +34 -36
- data/lib/ronin/web/root.rb +27 -0
- data/lib/ronin/web/version.rb +7 -10
- data/lib/ronin/web/xml.rb +85 -0
- data/lib/ronin/web.rb +372 -13
- data/man/ronin-web-diff.1 +41 -0
- data/man/ronin-web-diff.1.md +30 -0
- data/man/ronin-web-html.1 +89 -0
- data/man/ronin-web-html.1.md +66 -0
- data/man/ronin-web-irb.1 +31 -0
- data/man/ronin-web-irb.1.md +22 -0
- data/man/ronin-web-new-nokogiri.1 +41 -0
- data/man/ronin-web-new-nokogiri.1.md +30 -0
- data/man/ronin-web-new-server.1 +45 -0
- data/man/ronin-web-new-server.1.md +33 -0
- data/man/ronin-web-new-spider.1 +173 -0
- data/man/ronin-web-new-spider.1.md +129 -0
- data/man/ronin-web-new-webapp.1 +53 -0
- data/man/ronin-web-new-webapp.1.md +39 -0
- data/man/ronin-web-new.1 +59 -0
- data/man/ronin-web-new.1.md +44 -0
- data/man/ronin-web-reverse-proxy.1 +63 -0
- data/man/ronin-web-reverse-proxy.1.md +47 -0
- data/man/ronin-web-server.1 +59 -0
- data/man/ronin-web-server.1.md +43 -0
- data/man/ronin-web-spider.1 +225 -0
- data/man/ronin-web-spider.1.md +168 -0
- data/man/ronin-web.1 +41 -0
- data/man/ronin-web.1.md +30 -0
- data/ronin-web.gemspec +39 -109
- data/spec/cli/ruby_shell_spec.rb +14 -0
- data/spec/html_spec.rb +43 -0
- data/spec/mechanize_spec.rb +72 -0
- data/spec/spec_helper.rb +5 -3
- data/spec/web_spec.rb +97 -0
- data/spec/xml_spec.rb +42 -0
- metadata +236 -224
- data/.gemtest +0 -0
- data/data/ronin/web/user_agents.yml +0 -247
- data/lib/ronin/network/mixins/web.rb +0 -258
- data/lib/ronin/web/config.rb +0 -34
- data/lib/ronin/web/extensions/nokogiri/xml/attr.rb +0 -47
- data/lib/ronin/web/extensions/nokogiri/xml/document.rb +0 -48
- data/lib/ronin/web/extensions/nokogiri/xml/element.rb +0 -57
- data/lib/ronin/web/extensions/nokogiri/xml/node.rb +0 -86
- data/lib/ronin/web/extensions/nokogiri/xml/text.rb +0 -47
- data/lib/ronin/web/extensions/nokogiri/xml.rb +0 -27
- data/lib/ronin/web/extensions/nokogiri.rb +0 -23
- data/lib/ronin/web/extensions.rb +0 -23
- data/lib/ronin/web/middleware/base.rb +0 -144
- data/lib/ronin/web/middleware/directories.rb +0 -179
- data/lib/ronin/web/middleware/files.rb +0 -144
- data/lib/ronin/web/middleware/filters/campaign_filter.rb +0 -77
- data/lib/ronin/web/middleware/filters/ip_filter.rb +0 -73
- data/lib/ronin/web/middleware/filters/path_filter.rb +0 -73
- data/lib/ronin/web/middleware/filters/referer_filter.rb +0 -71
- data/lib/ronin/web/middleware/filters/user_agent_filter.rb +0 -71
- data/lib/ronin/web/middleware/filters/vhost_filter.rb +0 -71
- data/lib/ronin/web/middleware/filters.rb +0 -28
- data/lib/ronin/web/middleware/helpers.rb +0 -145
- data/lib/ronin/web/middleware/proxy.rb +0 -265
- data/lib/ronin/web/middleware/proxy_request.rb +0 -262
- data/lib/ronin/web/middleware/request.rb +0 -79
- data/lib/ronin/web/middleware/response.rb +0 -33
- data/lib/ronin/web/middleware/router.rb +0 -167
- data/lib/ronin/web/middleware/rule.rb +0 -103
- data/lib/ronin/web/middleware.rb +0 -27
- data/lib/ronin/web/proxy/app.rb +0 -32
- data/lib/ronin/web/proxy/base.rb +0 -46
- data/lib/ronin/web/proxy/web.rb +0 -46
- data/lib/ronin/web/proxy.rb +0 -25
- data/lib/ronin/web/server/app.rb +0 -32
- data/lib/ronin/web/server/base.rb +0 -461
- data/lib/ronin/web/server/web.rb +0 -66
- data/lib/ronin/web/server.rb +0 -25
- data/lib/ronin/web/spider.rb +0 -120
- data/lib/ronin/web/user_agents.rb +0 -196
- data/lib/ronin/web/web.rb +0 -560
- data/spec/helpers/output.rb +0 -3
- data/spec/web/extensions/nokogiri_spec.rb +0 -38
- data/spec/web/helpers/rack_app.rb +0 -24
- data/spec/web/helpers/root/test1/index.html +0 -1
- data/spec/web/helpers/root/test1/test1.txt +0 -1
- data/spec/web/helpers/root/test1.txt +0 -1
- data/spec/web/helpers/root/test2/test2.txt +0 -1
- data/spec/web/helpers/root/test2.txt +0 -1
- data/spec/web/helpers/root/test3/test3.txt +0 -1
- data/spec/web/helpers/root/test3.txt +0 -1
- data/spec/web/helpers/root.rb +0 -15
- data/spec/web/mechanize_spec.rb +0 -62
- data/spec/web/middleware/directories_spec.rb +0 -86
- data/spec/web/middleware/files_spec.rb +0 -57
- data/spec/web/middleware/filters/campaign_filter_spec.rb +0 -30
- data/spec/web/middleware/filters/ip_filter_spec.rb +0 -25
- data/spec/web/middleware/filters/path_filter_spec.rb +0 -29
- data/spec/web/middleware/filters/referer_filter_spec.rb +0 -25
- data/spec/web/middleware/filters/user_agent_filter_spec.rb +0 -25
- data/spec/web/middleware/filters/vhost_filter_spec.rb +0 -23
- data/spec/web/middleware/proxy_spec.rb +0 -67
- data/spec/web/middleware/response_spec.rb +0 -20
- data/spec/web/middleware/router_spec.rb +0 -65
- data/spec/web/middleware/rule_spec.rb +0 -37
- data/spec/web/proxy/base_spec.rb +0 -8
- data/spec/web/server/base_spec.rb +0 -77
- data/spec/web/server/classes/public1/static1.txt +0 -1
- data/spec/web/server/classes/public2/static2.txt +0 -1
- data/spec/web/server/classes/sub_app.rb +0 -13
- data/spec/web/server/classes/test_app.rb +0 -20
- data/spec/web/user_agents_spec.rb +0 -56
- data/spec/web/web_spec.rb +0 -101
@@ -1,196 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Ronin Web - A Ruby library for Ronin that provides support for web
|
3
|
-
# scraping and spidering functionality.
|
4
|
-
#
|
5
|
-
# Copyright (c) 2006-2011 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
|
-
#
|
7
|
-
# This file is part of Ronin Web.
|
8
|
-
#
|
9
|
-
# Ronin is free software: you can redistribute it and/or modify
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# Ronin is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with Ronin. If not, see <http://www.gnu.org/licenses/>.
|
21
|
-
#
|
22
|
-
|
23
|
-
require 'ronin/web/config'
|
24
|
-
|
25
|
-
require 'set'
|
26
|
-
|
27
|
-
module Ronin
|
28
|
-
module Web
|
29
|
-
#
|
30
|
-
# Represents the set of `User-Agent` strings loaded from all
|
31
|
-
# `data/ronin/web/user_agents.yml` files.
|
32
|
-
#
|
33
|
-
# ## ronin/web/user_agents.yml
|
34
|
-
#
|
35
|
-
# The `user_agent.yml` files are essentially YAML files listing
|
36
|
-
# `User-Agent` strings grouped by category:
|
37
|
-
#
|
38
|
-
# ---
|
39
|
-
# :googlebot:
|
40
|
-
# - "Googlebot/2.1 ( http://www.googlebot.com/bot.html)"
|
41
|
-
# - "Googlebot-Image/1.0 ( http://www.googlebot.com/bot.html)"
|
42
|
-
# - "Mediapartners-Google/2.1"
|
43
|
-
# - "Google-Sitemaps/1.0"
|
44
|
-
#
|
45
|
-
# These files can be added to Ronin Repositories or to Ronin libraries,
|
46
|
-
# and will be loaded by the {UserAgents} objects.
|
47
|
-
#
|
48
|
-
# @since 0.3.0
|
49
|
-
#
|
50
|
-
class UserAgents
|
51
|
-
|
52
|
-
include Enumerable
|
53
|
-
|
54
|
-
# Relative path to the User-Agents file.
|
55
|
-
FILE = File.join('ronin','web','user_agents.yml')
|
56
|
-
|
57
|
-
#
|
58
|
-
# Creates a new User-Agent set.
|
59
|
-
#
|
60
|
-
# @api semipublic
|
61
|
-
#
|
62
|
-
def initialize
|
63
|
-
@files = Set[]
|
64
|
-
@user_agents = Hash.new { |hash,key| hash[key] = Set[] }
|
65
|
-
end
|
66
|
-
|
67
|
-
#
|
68
|
-
# The categories of `User-Agent` strings.
|
69
|
-
#
|
70
|
-
# @return [Array<Symbol>]
|
71
|
-
# The names of the categories.
|
72
|
-
#
|
73
|
-
# @api public
|
74
|
-
#
|
75
|
-
def categories
|
76
|
-
reload!
|
77
|
-
|
78
|
-
@user_agents.keys
|
79
|
-
end
|
80
|
-
|
81
|
-
#
|
82
|
-
# Iterates over each User-Agent in the set.
|
83
|
-
#
|
84
|
-
# @yield [ua]
|
85
|
-
# The given block will be passed each User-Agent.
|
86
|
-
#
|
87
|
-
# @yieldparam [String] ua
|
88
|
-
# A User-Agent string within the set.
|
89
|
-
#
|
90
|
-
# @return [Enumerator]
|
91
|
-
# If no block is given, an Enmerator will be returned.
|
92
|
-
#
|
93
|
-
# @api public
|
94
|
-
#
|
95
|
-
def each(&block)
|
96
|
-
return enum_for(:each) unless block_given?
|
97
|
-
|
98
|
-
@user_agents.each do |name,strings|
|
99
|
-
strings.each(&block)
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
#
|
104
|
-
# Selects a `User-Agent` string from the set.
|
105
|
-
#
|
106
|
-
# @param [Symbol, String, Regexp] key
|
107
|
-
# The User-Agents group name, sub-string or Regexp to search for.
|
108
|
-
#
|
109
|
-
# @return [String, nil]
|
110
|
-
# The matching `User-Agent` string.
|
111
|
-
#
|
112
|
-
# @api public
|
113
|
-
#
|
114
|
-
def [](key)
|
115
|
-
reload!
|
116
|
-
|
117
|
-
case key
|
118
|
-
when Symbol
|
119
|
-
if @user_agents.has_key?(key)
|
120
|
-
strings = @user_agents[key]
|
121
|
-
return strings.entries[rand(strings.length)]
|
122
|
-
end
|
123
|
-
when String
|
124
|
-
@user_agents.each do |name,strings|
|
125
|
-
strings.each do |string|
|
126
|
-
return string if string.include?(key)
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
return nil
|
131
|
-
when Regexp
|
132
|
-
@user_agents.each do |name,strings|
|
133
|
-
strings.each do |string|
|
134
|
-
return string if string =~ key
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
return nil
|
139
|
-
else
|
140
|
-
raise(TypeError,"key must be a Symbol, String or Regexp")
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
#
|
145
|
-
# Fetches a `User-Agent` string from the set.
|
146
|
-
#
|
147
|
-
# @param [Symbol, String, Regexp] key
|
148
|
-
# The User-Agents group name, sub-string or Regexp to search for.
|
149
|
-
#
|
150
|
-
# @param [String] default
|
151
|
-
# The `User-Agent` string to default to if no match is found.
|
152
|
-
#
|
153
|
-
# @return [String]
|
154
|
-
# The matching `User-Agent` string.
|
155
|
-
#
|
156
|
-
# @raise [ArgumentError]
|
157
|
-
# No matching `User-Agent` string was found, and no default value
|
158
|
-
# was given.
|
159
|
-
#
|
160
|
-
# @api public
|
161
|
-
#
|
162
|
-
def fetch(key,default=nil)
|
163
|
-
unless (string = (self[key] || default))
|
164
|
-
raise(ArgumentError,"no User-Agent strings match #{key.inspect}")
|
165
|
-
end
|
166
|
-
|
167
|
-
return string
|
168
|
-
end
|
169
|
-
|
170
|
-
protected
|
171
|
-
|
172
|
-
#
|
173
|
-
# Reloads the set of User-Agents.
|
174
|
-
#
|
175
|
-
# @api private
|
176
|
-
#
|
177
|
-
def reload!
|
178
|
-
Config.each_data_file(FILE) do |path|
|
179
|
-
next if @files.include?(path)
|
180
|
-
|
181
|
-
data = YAML.load_file(path)
|
182
|
-
|
183
|
-
unless data.kind_of?(Hash)
|
184
|
-
warn "#{path.dump} did not contain a Hash"
|
185
|
-
next
|
186
|
-
end
|
187
|
-
|
188
|
-
data.each do |name,strings|
|
189
|
-
@user_agents[name.to_sym].merge(strings)
|
190
|
-
end
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
end
|
195
|
-
end
|
196
|
-
end
|