ronin-web 1.0.2 → 2.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +3 -2
- data/.gitignore +1 -0
- data/.rubocop.yml +5 -0
- data/ChangeLog.md +46 -1
- data/Gemfile +25 -12
- data/README.md +257 -51
- data/Rakefile +9 -0
- data/data/completions/ronin-web +203 -0
- data/gemspec.yml +18 -5
- data/lib/ronin/web/cli/browser_options.rb +92 -0
- data/lib/ronin/web/cli/browser_shell.rb +448 -0
- data/lib/ronin/web/cli/command.rb +1 -1
- data/lib/ronin/web/cli/commands/browser.rb +373 -0
- data/lib/ronin/web/cli/commands/completion.rb +63 -0
- data/lib/ronin/web/cli/commands/diff.rb +60 -8
- data/lib/ronin/web/cli/commands/html.rb +21 -33
- data/lib/ronin/web/cli/commands/irb.rb +1 -1
- data/lib/ronin/web/cli/commands/new/{webapp.rb → app.rb} +8 -8
- data/lib/ronin/web/cli/commands/new/nokogiri.rb +4 -4
- data/lib/ronin/web/cli/commands/new/server.rb +1 -1
- data/lib/ronin/web/cli/commands/new/spider.rb +1 -1
- data/lib/ronin/web/cli/commands/new.rb +5 -3
- data/lib/ronin/web/cli/commands/reverse_proxy.rb +1 -1
- data/lib/ronin/web/cli/commands/screenshot.rb +186 -0
- data/lib/ronin/web/cli/commands/server.rb +1 -1
- data/lib/ronin/web/cli/commands/session_cookie.rb +265 -0
- data/lib/ronin/web/cli/commands/spider.rb +61 -467
- data/lib/ronin/web/cli/commands/user_agent.rb +177 -0
- data/lib/ronin/web/cli/commands/vulns.rb +463 -0
- data/lib/ronin/web/cli/commands/wordlist.rb +484 -0
- data/lib/ronin/web/cli/commands/xml.rb +149 -0
- data/lib/ronin/web/cli/js_shell.rb +69 -0
- data/lib/ronin/web/cli/ruby_shell.rb +1 -1
- data/lib/ronin/web/cli/spider_options.rb +919 -0
- data/lib/ronin/web/cli.rb +3 -1
- data/lib/ronin/web/html.rb +1 -1
- data/lib/ronin/web/root.rb +1 -1
- data/lib/ronin/web/version.rb +2 -2
- data/lib/ronin/web/xml.rb +1 -1
- data/lib/ronin/web.rb +4 -364
- data/man/ronin-web-browser.1 +92 -0
- data/man/ronin-web-browser.1.md +96 -0
- data/man/ronin-web-completion.1 +76 -0
- data/man/ronin-web-completion.1.md +78 -0
- data/man/ronin-web-diff.1 +14 -21
- data/man/ronin-web-diff.1.md +13 -6
- data/man/ronin-web-html.1 +30 -46
- data/man/ronin-web-html.1.md +27 -17
- data/man/ronin-web-irb.1 +9 -16
- data/man/ronin-web-irb.1.md +6 -2
- data/man/ronin-web-new-app.1.md +39 -0
- data/man/ronin-web-new-nokogiri.1 +9 -20
- data/man/ronin-web-new-nokogiri.1.md +5 -5
- data/man/ronin-web-new-server.1 +11 -23
- data/man/ronin-web-new-server.1.md +5 -5
- data/man/ronin-web-new-spider.1 +44 -88
- data/man/ronin-web-new-spider.1.md +37 -37
- data/man/ronin-web-new.1 +18 -30
- data/man/ronin-web-new.1.md +15 -11
- data/man/ronin-web-reverse-proxy.1 +33 -38
- data/man/ronin-web-reverse-proxy.1.md +20 -14
- data/man/ronin-web-screenshot.1 +56 -0
- data/man/ronin-web-screenshot.1.md +56 -0
- data/man/ronin-web-server.1 +15 -29
- data/man/ronin-web-server.1.md +13 -9
- data/man/ronin-web-session-cookie.1 +38 -0
- data/man/ronin-web-session-cookie.1.md +41 -0
- data/man/ronin-web-spider.1 +121 -130
- data/man/ronin-web-spider.1.md +115 -66
- data/man/ronin-web-user-agent.1 +44 -0
- data/man/ronin-web-user-agent.1.md +46 -0
- data/man/ronin-web-vulns.1 +175 -0
- data/man/ronin-web-vulns.1.md +177 -0
- data/man/ronin-web-wordlist.1 +258 -0
- data/man/ronin-web-wordlist.1.md +263 -0
- data/man/ronin-web-xml.1 +43 -0
- data/man/ronin-web-xml.1.md +46 -0
- data/man/ronin-web.1 +67 -18
- data/man/ronin-web.1.md +55 -4
- data/scripts/setup +58 -0
- metadata +122 -31
- data/lib/ronin/web/mechanize.rb +0 -84
- data/man/ronin-web-new-webapp.1.md +0 -39
- /data/data/new/{webapp → app}/.gitignore +0 -0
- /data/data/new/{webapp → app}/.ruby-version.erb +0 -0
- /data/data/new/{webapp → app}/Dockerfile.erb +0 -0
- /data/data/new/{webapp → app}/Gemfile +0 -0
- /data/data/new/{webapp → app}/app.rb.erb +0 -0
- /data/data/new/{webapp → app}/config.ru +0 -0
- /data/data/new/{webapp → app}/docker-compose.yml.erb +0 -0
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# ronin-web - A collection of useful web helper methods and commands.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2006-
|
5
|
+
# Copyright (c) 2006-2024 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# ronin-web is free software: you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -19,7 +19,7 @@
|
|
19
19
|
#
|
20
20
|
|
21
21
|
require 'ronin/web/cli/command'
|
22
|
-
require 'ronin/web/
|
22
|
+
require 'ronin/web/cli/spider_options'
|
23
23
|
require 'ronin/web/spider/archive'
|
24
24
|
require 'ronin/web/spider/git_archive'
|
25
25
|
require 'ronin/support/network/http/user_agents'
|
@@ -41,18 +41,20 @@ module Ronin
|
|
41
41
|
#
|
42
42
|
# ## Options
|
43
43
|
#
|
44
|
-
#
|
44
|
+
# --host HOST Spiders the specific HOST
|
45
|
+
# --domain DOMAIN Spiders the whole domain
|
46
|
+
# --site URL Spiders the website, starting at the URL
|
45
47
|
# --open-timeout SECS Sets the connection open timeout
|
46
48
|
# --read-timeout SECS Sets the read timeout
|
47
49
|
# --ssl-timeout SECS Sets the SSL connection timeout
|
48
50
|
# --continue-timeout SECS Sets the continue timeout
|
49
51
|
# --keep-alive-timeout SECS Sets the connection keep alive timeout
|
50
|
-
# -P, --proxy PROXY Sets the proxy to use
|
52
|
+
# -P, --proxy PROXY Sets the proxy to use
|
51
53
|
# -H, --header NAME: VALUE Sets a default header
|
52
54
|
# --host-header NAME=VALUE Sets a default header
|
55
|
+
# -U, --user-agent-string STRING The User-Agent string to use
|
53
56
|
# -u chrome-linux|chrome-macos|chrome-windows|chrome-iphone|chrome-ipad|chrome-android|firefox-linux|firefox-macos|firefox-windows|firefox-iphone|firefox-ipad|firefox-android|safari-macos|safari-iphone|safari-ipad|edge,
|
54
57
|
# --user-agent The User-Agent to use
|
55
|
-
# -U, --user-agent-string STRING The User-Agent string to use
|
56
58
|
# -R, --referer URL Sets the Referer URL
|
57
59
|
# --delay SECS Sets the delay in seconds between each request
|
58
60
|
# -l, --limit COUNT Only spiders up to COUNT pages
|
@@ -61,6 +63,11 @@ module Ronin
|
|
61
63
|
# --visited URL Marks the URL as previously visited
|
62
64
|
# --strip-fragments Enables/disables stripping the fragment component of every URL
|
63
65
|
# --strip-query Enables/disables stripping the query component of every URL
|
66
|
+
# --visit-scheme SCHEME Visit URLs with the URI scheme
|
67
|
+
# --visit-schemes-like /REGEX/ Visit URLs with URI schemes that match the REGEX
|
68
|
+
# --ignore-scheme SCHEME Ignore the URLs with the URI scheme
|
69
|
+
# --ignore-schemes-like /REGEX/
|
70
|
+
# Ignore the URLs with URI schemes matching the REGEX
|
64
71
|
# --visit-host HOST Visit URLs with the matching host name
|
65
72
|
# --visit-hosts-like /REGEX/ Visit URLs with hostnames that match the REGEX
|
66
73
|
# --ignore-host HOST Ignore the host name
|
@@ -78,10 +85,8 @@ module Ronin
|
|
78
85
|
# --ignore-ext FILE_EXT Ignore the URLs with the file ext
|
79
86
|
# --ignore-exts-like /REGEX/ Ignore URLs with file exts matching the REGEX
|
80
87
|
# -r, --robots Specifies whether to honor robots.txt
|
81
|
-
#
|
82
|
-
# --
|
83
|
-
# --site URL Spiders the website, starting at the URL
|
84
|
-
# --print-status Print the status codes for each URL
|
88
|
+
# -v, --verbose Enables verbose output
|
89
|
+
# --print-stauts Print the status codes for each URL
|
85
90
|
# --print-headers Print response headers for each URL
|
86
91
|
# --print-header NAME Prints a specific header
|
87
92
|
# --history FILE The history file
|
@@ -89,6 +94,19 @@ module Ronin
|
|
89
94
|
# --git-archive DIR Archive every visited page to the git repository
|
90
95
|
# -X, --xpath XPATH Evaluates the XPath on each HTML page
|
91
96
|
# -C, --css-path XPATH Evaluates the CSS-path on each HTML page
|
97
|
+
# --print-hosts Print all discovered hostnames
|
98
|
+
# --print-certs Print all encountered SSL/TLS certificates
|
99
|
+
# --save-certs Saves all encountered SSL/TLS certificates
|
100
|
+
# --print-js-strings Print all JavaScript strings
|
101
|
+
# --print-js-url-strings Print URL strings found in JavaScript
|
102
|
+
# --print-js-path-strings Print path strings found in JavaScript
|
103
|
+
# --print-js-absolute-path-strings
|
104
|
+
# Only print absolute path strings found in JavaScript
|
105
|
+
# --print-js-relative-path-strings
|
106
|
+
# Only print relative path strings found in JavaScript
|
107
|
+
# --print-html-comments Print HTML comments
|
108
|
+
# --print-js-comments Print JavaScript comments
|
109
|
+
# --print-comments Print all HTML and JavaScript comments
|
92
110
|
# -h, --help Print help information
|
93
111
|
#
|
94
112
|
# ## Examples
|
@@ -99,298 +117,14 @@ module Ronin
|
|
99
117
|
#
|
100
118
|
class Spider < Command
|
101
119
|
|
120
|
+
include SpiderOptions
|
102
121
|
include CommandKit::Colors
|
103
122
|
include CommandKit::Printing::Indent
|
104
123
|
include CommandKit::Options::Verbose
|
105
124
|
|
106
125
|
usage '[options] {--host HOST | --domain DOMAIN | --site URL}'
|
107
126
|
|
108
|
-
option :
|
109
|
-
type: Integer,
|
110
|
-
usage: 'SECS',
|
111
|
-
default: Spidr.open_timeout
|
112
|
-
},
|
113
|
-
desc: 'Sets the connection open timeout'
|
114
|
-
|
115
|
-
option :read_timeout, value: {
|
116
|
-
type: Integer,
|
117
|
-
usage: 'SECS',
|
118
|
-
default: Spidr.read_timeout
|
119
|
-
},
|
120
|
-
desc: 'Sets the read timeout'
|
121
|
-
|
122
|
-
option :ssl_timeout, value: {
|
123
|
-
type: Integer,
|
124
|
-
usage: 'SECS',
|
125
|
-
default: Spidr.ssl_timeout
|
126
|
-
},
|
127
|
-
desc: 'Sets the SSL connection timeout'
|
128
|
-
|
129
|
-
option :continue_timeout, value: {
|
130
|
-
type: Integer,
|
131
|
-
usage: 'SECS',
|
132
|
-
default: Spidr.continue_timeout
|
133
|
-
},
|
134
|
-
desc: 'Sets the continue timeout'
|
135
|
-
|
136
|
-
option :keep_alive_timeout, value: {
|
137
|
-
type: Integer,
|
138
|
-
usage: 'SECS',
|
139
|
-
default: Spidr.keep_alive_timeout
|
140
|
-
},
|
141
|
-
desc: 'Sets the connection keep alive timeout'
|
142
|
-
|
143
|
-
option :proxy, short: '-P',
|
144
|
-
value: {
|
145
|
-
type: String,
|
146
|
-
usage: 'PROXY'
|
147
|
-
},
|
148
|
-
desc: 'Sets the proxy to use'
|
149
|
-
|
150
|
-
option :header, short: '-H',
|
151
|
-
value: {
|
152
|
-
type: /\A[^\s:]+:.*\z/,
|
153
|
-
usage: 'NAME: VALUE'
|
154
|
-
},
|
155
|
-
desc: 'Sets a default header' do |header|
|
156
|
-
name, value = header.split(/:\s*/,2)
|
157
|
-
|
158
|
-
@default_headers[name] = value
|
159
|
-
end
|
160
|
-
|
161
|
-
option :host_header, value: {
|
162
|
-
type: /\A[^\s=]+=[^\s=]+\z/,
|
163
|
-
usage: 'NAME=VALUE'
|
164
|
-
},
|
165
|
-
desc: 'Sets a default header' do |name_value|
|
166
|
-
name, value = name_value.split('=',2)
|
167
|
-
|
168
|
-
@host_headers[name] = value
|
169
|
-
end
|
170
|
-
|
171
|
-
option :user_agent, value: {
|
172
|
-
type: String,
|
173
|
-
usage: 'USER-AGENT'
|
174
|
-
},
|
175
|
-
desc: 'Sets the User-Agent string'
|
176
|
-
|
177
|
-
option :user_agent_string, short: '-U',
|
178
|
-
value: {
|
179
|
-
type: String,
|
180
|
-
usage: 'STRING'
|
181
|
-
},
|
182
|
-
desc: 'The User-Agent string to use' do |ua|
|
183
|
-
@user_agent = ua
|
184
|
-
end
|
185
|
-
|
186
|
-
option :user_agent, short: '-u',
|
187
|
-
value: {
|
188
|
-
type: Support::Network::HTTP::UserAgents::ALIASES.transform_keys { |key|
|
189
|
-
key.to_s.tr('_','-')
|
190
|
-
}
|
191
|
-
},
|
192
|
-
desc: 'The User-Agent to use' do |name|
|
193
|
-
@user_agent = name
|
194
|
-
end
|
195
|
-
|
196
|
-
option :referer, short: '-R',
|
197
|
-
value: {
|
198
|
-
type: String,
|
199
|
-
usage: 'URL'
|
200
|
-
},
|
201
|
-
desc: 'Sets the Referer URL'
|
202
|
-
|
203
|
-
option :delay, short: '-d',
|
204
|
-
value: {
|
205
|
-
type: Numeric,
|
206
|
-
usage: 'SECS'
|
207
|
-
},
|
208
|
-
desc: 'Sets the delay in seconds between each request'
|
209
|
-
|
210
|
-
option :limit, short: '-l',
|
211
|
-
value: {
|
212
|
-
type: Integer,
|
213
|
-
usage: 'COUNT'
|
214
|
-
},
|
215
|
-
desc: 'Only spiders up to COUNT pages'
|
216
|
-
|
217
|
-
option :max_depth, short: '-d',
|
218
|
-
value: {
|
219
|
-
type: Integer,
|
220
|
-
usage: 'DEPTH'
|
221
|
-
},
|
222
|
-
desc: 'Only spiders up to max depth'
|
223
|
-
|
224
|
-
option :enqueue, value: {
|
225
|
-
type: String,
|
226
|
-
usage: 'URL'
|
227
|
-
},
|
228
|
-
desc: 'Adds the URL to the queue' do |url|
|
229
|
-
@queue << url
|
230
|
-
end
|
231
|
-
|
232
|
-
option :visited, value: {
|
233
|
-
type: String,
|
234
|
-
usage: 'URL'
|
235
|
-
},
|
236
|
-
desc: 'Marks the URL as previously visited' do |url|
|
237
|
-
@history << url
|
238
|
-
end
|
239
|
-
|
240
|
-
option :strip_fragments, desc: 'Enables/disables stripping the fragment component of every URL'
|
241
|
-
|
242
|
-
option :strip_query, desc: 'Enables/disables stripping the query component of every URL'
|
243
|
-
|
244
|
-
option :visit_host, value: {
|
245
|
-
type: String,
|
246
|
-
usage: 'HOST'
|
247
|
-
},
|
248
|
-
desc: 'Visit URLs with the matching host name' do |host|
|
249
|
-
@visit_hosts << host
|
250
|
-
end
|
251
|
-
|
252
|
-
option :visit_hosts_like, value: {
|
253
|
-
type: Regexp,
|
254
|
-
usage: '/REGEX/'
|
255
|
-
},
|
256
|
-
desc: 'Visit URLs with hostnames that match the REGEX' do |regex|
|
257
|
-
@visit_hosts << regex
|
258
|
-
end
|
259
|
-
|
260
|
-
option :ignore_host, value: {
|
261
|
-
type: String,
|
262
|
-
usage: 'HOST'
|
263
|
-
},
|
264
|
-
desc: 'Ignore the host name' do |host|
|
265
|
-
@ignore_hosts << host
|
266
|
-
end
|
267
|
-
|
268
|
-
option :ignore_hosts_like, value: {
|
269
|
-
type: Regexp,
|
270
|
-
usage: '/REGEX/'
|
271
|
-
},
|
272
|
-
desc: 'Ignore the host names matching the REGEX' do |regex|
|
273
|
-
@ignore_hosts << regex
|
274
|
-
end
|
275
|
-
|
276
|
-
option :visit_port, value: {
|
277
|
-
type: Integer,
|
278
|
-
usage: 'PORT'
|
279
|
-
},
|
280
|
-
desc: 'Visit URLs with the matching port number' do |port|
|
281
|
-
@visit_ports << port
|
282
|
-
end
|
283
|
-
|
284
|
-
option :visit_ports_like, value: {
|
285
|
-
type: Regexp,
|
286
|
-
usage: '/REGEX/'
|
287
|
-
},
|
288
|
-
desc: 'Visit URLs with port numbers that match the REGEX' do |regex|
|
289
|
-
@visit_ports << regex
|
290
|
-
end
|
291
|
-
|
292
|
-
option :ignore_port, value: {
|
293
|
-
type: Integer,
|
294
|
-
usage: 'PORT'
|
295
|
-
},
|
296
|
-
desc: 'Ignore the port number' do |port|
|
297
|
-
@ignore_ports << port
|
298
|
-
end
|
299
|
-
|
300
|
-
option :ignore_ports_like, value: {
|
301
|
-
type: Regexp,
|
302
|
-
usage: '/REGEX/'
|
303
|
-
},
|
304
|
-
desc: 'Ignore the port numbers matching the REGEXP' do |regex|
|
305
|
-
@ignore_ports << regex
|
306
|
-
end
|
307
|
-
|
308
|
-
option :visit_link, value: {
|
309
|
-
type: String,
|
310
|
-
usage: 'URL'
|
311
|
-
},
|
312
|
-
desc: 'Visit the URL' do |link|
|
313
|
-
@visit_links << link
|
314
|
-
end
|
315
|
-
|
316
|
-
option :visit_links_like, value: {
|
317
|
-
type: Regexp,
|
318
|
-
usage: '/REGEX/'
|
319
|
-
},
|
320
|
-
desc: 'Visit URLs that match the REGEX' do |regex|
|
321
|
-
@visit_links << regex
|
322
|
-
end
|
323
|
-
|
324
|
-
option :ignore_link, value: {
|
325
|
-
type: String,
|
326
|
-
usage: 'URL'
|
327
|
-
},
|
328
|
-
desc: 'Ignore the URL' do |link|
|
329
|
-
@ignore_links << link
|
330
|
-
end
|
331
|
-
|
332
|
-
option :ignore_links_like, value: {
|
333
|
-
type: Regexp,
|
334
|
-
usage: '/REGEX/'
|
335
|
-
},
|
336
|
-
desc: 'Ignore URLs matching the REGEX' do |regex|
|
337
|
-
@ignore_links << regex
|
338
|
-
end
|
339
|
-
|
340
|
-
option :visit_ext, value: {
|
341
|
-
type: String,
|
342
|
-
usage: 'FILE_EXT'
|
343
|
-
},
|
344
|
-
desc: 'Visit URLs with the matching file ext' do |ext|
|
345
|
-
@visit_exts << ext
|
346
|
-
end
|
347
|
-
|
348
|
-
option :visit_exts_like, value: {
|
349
|
-
type: Regexp,
|
350
|
-
usage: '/REGEX/'
|
351
|
-
},
|
352
|
-
desc: 'Visit URLs with file exts that match the REGEX' do |regex|
|
353
|
-
@visit_exts << regex
|
354
|
-
end
|
355
|
-
|
356
|
-
option :ignore_ext, value: {
|
357
|
-
type: String,
|
358
|
-
usage: 'FILE_EXT'
|
359
|
-
},
|
360
|
-
desc: 'Ignore the URLs with the file ext' do |ext|
|
361
|
-
@ignore_exts << ext
|
362
|
-
end
|
363
|
-
|
364
|
-
option :ignore_exts_like, value: {
|
365
|
-
type: Regexp,
|
366
|
-
usage: '/REGEX/'
|
367
|
-
},
|
368
|
-
desc: 'Ignore URLs with file exts matching the REGEX' do |regex|
|
369
|
-
@ignore_exts << regex
|
370
|
-
end
|
371
|
-
|
372
|
-
option :robots, short: '-r',
|
373
|
-
desc: 'Specifies whether to honor robots.txt'
|
374
|
-
|
375
|
-
option :host, value: {
|
376
|
-
type: String,
|
377
|
-
usage: 'HOST'
|
378
|
-
},
|
379
|
-
desc: 'Spiders the specific HOST'
|
380
|
-
|
381
|
-
option :domain, value: {
|
382
|
-
type: String,
|
383
|
-
usage: 'DOMAIN'
|
384
|
-
},
|
385
|
-
desc: 'Spiders the whole domain'
|
386
|
-
|
387
|
-
option :site, value: {
|
388
|
-
type: String,
|
389
|
-
usage: 'URL'
|
390
|
-
},
|
391
|
-
desc: 'Spiders the website, starting at the URL'
|
392
|
-
|
393
|
-
option :print_status, desc: 'Print the status codes for each URL'
|
127
|
+
option :print_stauts, desc: 'Print the status codes for each URL'
|
394
128
|
|
395
129
|
option :print_headers, desc: 'Print response headers for each URL'
|
396
130
|
|
@@ -440,6 +174,14 @@ module Ronin
|
|
440
174
|
|
441
175
|
option :print_js_strings, desc: 'Print all JavaScript strings'
|
442
176
|
|
177
|
+
option :print_js_url_strings, desc: 'Print URL strings found in JavaScript'
|
178
|
+
|
179
|
+
option :print_js_path_strings, desc: 'Print path strings found in JavaScript'
|
180
|
+
|
181
|
+
option :print_js_absolute_path_strings, desc: 'Only print absolute path strings found in JavaScript'
|
182
|
+
|
183
|
+
option :print_js_relative_path_strings, desc: 'Only print relative path strings found in JavaScript'
|
184
|
+
|
443
185
|
option :print_html_comments, desc: 'Print HTML comments'
|
444
186
|
|
445
187
|
option :print_js_comments, desc: 'Print JavaScript comments'
|
@@ -456,98 +198,6 @@ module Ronin
|
|
456
198
|
|
457
199
|
man_page 'ronin-web-spider.1'
|
458
200
|
|
459
|
-
# The default HTTP headers to send with every request.
|
460
|
-
#
|
461
|
-
# @return [Hash{String => String}]
|
462
|
-
attr_reader :default_headers
|
463
|
-
|
464
|
-
# The mapping of custom `Host` headers.
|
465
|
-
#
|
466
|
-
# @return [Hash{String => String}]
|
467
|
-
attr_reader :host_headers
|
468
|
-
|
469
|
-
# The pre-existing queue of URLs to start spidering with.
|
470
|
-
#
|
471
|
-
# @return [Array<String>]
|
472
|
-
attr_reader :queue
|
473
|
-
|
474
|
-
# The pre-existing of previously visited URLs to start spidering with.
|
475
|
-
#
|
476
|
-
# @return [Array<String>]
|
477
|
-
attr_reader :history
|
478
|
-
|
479
|
-
# The schemes to visit.
|
480
|
-
#
|
481
|
-
# @return [Array<String>]
|
482
|
-
attr_reader :visit_schemes
|
483
|
-
|
484
|
-
# The hosts to visit.
|
485
|
-
#
|
486
|
-
# @return [Array<String, Regexp>]
|
487
|
-
attr_reader :visit_hosts
|
488
|
-
|
489
|
-
# The port numbers to visit.
|
490
|
-
#
|
491
|
-
# @return [Array<Integer, Regexp>]
|
492
|
-
attr_reader :visit_ports
|
493
|
-
|
494
|
-
# The links to visit.
|
495
|
-
#
|
496
|
-
# @return [Array<String, Regexp>]
|
497
|
-
attr_reader :visit_links
|
498
|
-
|
499
|
-
# The URL file extensions to visit.
|
500
|
-
#
|
501
|
-
# @return [Array<String, Regexp>]
|
502
|
-
attr_reader :visit_exts
|
503
|
-
|
504
|
-
# The hosts to ignore.
|
505
|
-
#
|
506
|
-
# @return [Array<String, Regexp>]
|
507
|
-
attr_reader :ignore_hosts
|
508
|
-
|
509
|
-
# The port numbers to ignore.
|
510
|
-
#
|
511
|
-
# @return [Array<Integer, Regexp>]
|
512
|
-
attr_reader :ignore_ports
|
513
|
-
|
514
|
-
# The links to ignore.
|
515
|
-
#
|
516
|
-
# @return [Array<String, Regexp>]
|
517
|
-
attr_reader :ignore_links
|
518
|
-
|
519
|
-
# The URL file extensions to ignore.
|
520
|
-
#
|
521
|
-
# @return [Array<String, Regexp>]
|
522
|
-
attr_reader :ignore_exts
|
523
|
-
|
524
|
-
#
|
525
|
-
# Initializes the spider command.
|
526
|
-
#
|
527
|
-
# @param [Hash{Symbol => Object}] kwargs
|
528
|
-
# Additional keyword arguments.
|
529
|
-
#
|
530
|
-
def initialize(**kwargs)
|
531
|
-
super(**kwargs)
|
532
|
-
|
533
|
-
@default_headers = {}
|
534
|
-
@host_headers = {}
|
535
|
-
|
536
|
-
@queue = []
|
537
|
-
@history = []
|
538
|
-
|
539
|
-
@visit_schemes = []
|
540
|
-
@visit_hosts = []
|
541
|
-
@visit_ports = []
|
542
|
-
@visit_links = []
|
543
|
-
@visit_exts = []
|
544
|
-
|
545
|
-
@ignore_hosts = []
|
546
|
-
@ignore_ports = []
|
547
|
-
@ignore_links = []
|
548
|
-
@ignore_exts = []
|
549
|
-
end
|
550
|
-
|
551
201
|
#
|
552
202
|
# Runs the `ronin-web spider` command.
|
553
203
|
#
|
@@ -646,6 +296,30 @@ module Ronin
|
|
646
296
|
end
|
647
297
|
end
|
648
298
|
|
299
|
+
if options[:print_js_url_strings]
|
300
|
+
agent.every_js_url_string do |url|
|
301
|
+
print_content url
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
if options[:print_js_path_strings]
|
306
|
+
agent.every_js_path_string do |path|
|
307
|
+
print_content path
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
if options[:print_js_absolute_path_strings]
|
312
|
+
agent.every_js_absolute_path_string do |path|
|
313
|
+
print_content path
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
if options[:print_js_relative_path_strings]
|
318
|
+
agent.every_js_relative_path_string do |path|
|
319
|
+
print_content path
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
649
323
|
if options[:print_html_comments]
|
650
324
|
agent.every_html_comment do |comment|
|
651
325
|
print_content comment
|
@@ -665,86 +339,6 @@ module Ronin
|
|
665
339
|
end
|
666
340
|
end
|
667
341
|
|
668
|
-
#
|
669
|
-
# Creates a new web spider agent.
|
670
|
-
#
|
671
|
-
# @yield [agent]
|
672
|
-
# The given block will be given the newly created and configured
|
673
|
-
# web spider agent.
|
674
|
-
#
|
675
|
-
# @yieldparam [Ronin::Web::Spider::Agent] agent
|
676
|
-
# The newly created web spider agent.
|
677
|
-
#
|
678
|
-
# @return [Ronin::Web::Spider::Agent]
|
679
|
-
# The newly created web spider agent, after the agent has completed
|
680
|
-
# it's spidering.
|
681
|
-
#
|
682
|
-
def new_agent(&block)
|
683
|
-
if options[:host]
|
684
|
-
Web::Spider.host(options[:host],**agent_kwargs,&block)
|
685
|
-
elsif options[:domain]
|
686
|
-
Web::Spider.domain(options[:domain],**agent_kwargs,&block)
|
687
|
-
elsif options[:site]
|
688
|
-
Web::Spider.site(options[:site],**agent_kwargs,&block)
|
689
|
-
else
|
690
|
-
print_error "must specify --host, --domain, or --site"
|
691
|
-
exit(-1)
|
692
|
-
end
|
693
|
-
end
|
694
|
-
|
695
|
-
#
|
696
|
-
# Builds keyword arguments for `Ronin::Web::Spider::Agent#initialize`.
|
697
|
-
#
|
698
|
-
# @return [Hash{Symbol => Object}]
|
699
|
-
# The keyword arguments for `Ronin::Web::Spider::Agent#initialize`.
|
700
|
-
#
|
701
|
-
def agent_kwargs
|
702
|
-
kwargs = {}
|
703
|
-
|
704
|
-
kwargs[:proxy] = options[:proxy] if options[:proxy]
|
705
|
-
|
706
|
-
unless @default_headers.empty?
|
707
|
-
kwargs[:default_headers] = @default_headers
|
708
|
-
end
|
709
|
-
|
710
|
-
unless @host_headers.empty?
|
711
|
-
kwargs[:host_headers] = @host_headers
|
712
|
-
end
|
713
|
-
|
714
|
-
kwargs[:user_agent] = @user_agent if @user_agent
|
715
|
-
kwargs[:referer] = options[:referer] if options[:referer]
|
716
|
-
|
717
|
-
kwargs[:delay] = options[:delay] if options[:delay]
|
718
|
-
kwargs[:limit] = options[:limit] if options[:limit]
|
719
|
-
kwargs[:max_depth] = options[:max_depth] if options[:max_depth]
|
720
|
-
|
721
|
-
kwargs[:queue] = @queue unless @queue.empty?
|
722
|
-
kwargs[:history] = @history unless @history.empty?
|
723
|
-
|
724
|
-
if options.has_key?(:strip_fragments)
|
725
|
-
kwargs[:strip_fragments] = options[:strip_fragments]
|
726
|
-
end
|
727
|
-
|
728
|
-
if options.has_key?(:strip_query)
|
729
|
-
kwargs[:strip_query] = options[:strip_query]
|
730
|
-
end
|
731
|
-
|
732
|
-
kwargs[:schemes] = @visit_schemes unless @visit_schemes.empty?
|
733
|
-
kwargs[:hosts] = @visit_hosts unless @visit_hosts.empty?
|
734
|
-
kwargs[:ports] = @visit_ports unless @visit_ports.empty?
|
735
|
-
kwargs[:links] = @visit_links unless @visit_links.empty?
|
736
|
-
kwargs[:exts] = @visit_exts unless @visit_exts.empty?
|
737
|
-
|
738
|
-
kwargs[:ignore_hosts] = @ignore_hosts unless @ignore_hosts.empty?
|
739
|
-
kwargs[:ignore_ports] = @ignore_ports unless @ignore_ports.empty?
|
740
|
-
kwargs[:ignore_links] = @ignore_links unless @ignore_links.empty?
|
741
|
-
kwargs[:ignore_exts] = @ignore_exts unless @ignore_exts.empty?
|
742
|
-
|
743
|
-
kwargs[:robots] = options[:robots] if options.has_key?(:robots)
|
744
|
-
|
745
|
-
return kwargs
|
746
|
-
end
|
747
|
-
|
748
342
|
#
|
749
343
|
# Prints the status of a page.
|
750
344
|
#
|