ronin-web 0.3.0.rc1 → 1.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +11 -0
  3. data/.github/workflows/ruby.yml +31 -0
  4. data/.gitignore +13 -0
  5. data/.mailmap +1 -0
  6. data/.ruby-version +1 -0
  7. data/COPYING.txt +3 -3
  8. data/ChangeLog.md +115 -70
  9. data/Gemfile +42 -37
  10. data/README.md +159 -145
  11. data/Rakefile +12 -3
  12. data/bin/ronin-web +9 -17
  13. data/data/new/nokogiri.rb.erb +12 -0
  14. data/data/new/server.rb.erb +22 -0
  15. data/data/new/spider.rb.erb +26 -0
  16. data/data/new/webapp/.gitignore +15 -0
  17. data/data/new/webapp/.ruby-version.erb +1 -0
  18. data/data/new/webapp/Dockerfile.erb +11 -0
  19. data/data/new/webapp/Gemfile +6 -0
  20. data/data/new/webapp/app.rb.erb +15 -0
  21. data/data/new/webapp/config.ru +4 -0
  22. data/data/new/webapp/docker-compose.yml.erb +9 -0
  23. data/gemspec.yml +32 -14
  24. data/lib/ronin/web/cli/command.rb +36 -0
  25. data/lib/ronin/web/cli/commands/diff.rb +106 -0
  26. data/lib/ronin/web/cli/commands/html.rb +174 -0
  27. data/lib/ronin/web/cli/commands/irb.rb +56 -0
  28. data/lib/ronin/web/cli/commands/new/nokogiri.rb +85 -0
  29. data/lib/ronin/web/cli/commands/new/server.rb +96 -0
  30. data/lib/ronin/web/cli/commands/new/spider.rb +315 -0
  31. data/lib/ronin/web/cli/commands/new/webapp.rb +123 -0
  32. data/lib/ronin/web/cli/commands/new.rb +64 -0
  33. data/lib/ronin/web/cli/commands/reverse_proxy.rb +215 -0
  34. data/lib/ronin/web/cli/commands/server.rb +155 -0
  35. data/lib/ronin/web/cli/commands/spider.rb +822 -0
  36. data/lib/ronin/web/cli/ruby_shell.rb +50 -0
  37. data/lib/ronin/web/cli.rb +44 -0
  38. data/lib/ronin/web/html.rb +85 -0
  39. data/lib/ronin/web/mechanize.rb +34 -36
  40. data/lib/ronin/web/root.rb +27 -0
  41. data/lib/ronin/web/version.rb +7 -10
  42. data/lib/ronin/web/xml.rb +85 -0
  43. data/lib/ronin/web.rb +372 -13
  44. data/man/ronin-web-diff.1 +41 -0
  45. data/man/ronin-web-diff.1.md +30 -0
  46. data/man/ronin-web-html.1 +89 -0
  47. data/man/ronin-web-html.1.md +66 -0
  48. data/man/ronin-web-irb.1 +31 -0
  49. data/man/ronin-web-irb.1.md +22 -0
  50. data/man/ronin-web-new-nokogiri.1 +41 -0
  51. data/man/ronin-web-new-nokogiri.1.md +30 -0
  52. data/man/ronin-web-new-server.1 +45 -0
  53. data/man/ronin-web-new-server.1.md +33 -0
  54. data/man/ronin-web-new-spider.1 +173 -0
  55. data/man/ronin-web-new-spider.1.md +129 -0
  56. data/man/ronin-web-new-webapp.1 +53 -0
  57. data/man/ronin-web-new-webapp.1.md +39 -0
  58. data/man/ronin-web-new.1 +59 -0
  59. data/man/ronin-web-new.1.md +44 -0
  60. data/man/ronin-web-reverse-proxy.1 +63 -0
  61. data/man/ronin-web-reverse-proxy.1.md +47 -0
  62. data/man/ronin-web-server.1 +59 -0
  63. data/man/ronin-web-server.1.md +43 -0
  64. data/man/ronin-web-spider.1 +225 -0
  65. data/man/ronin-web-spider.1.md +168 -0
  66. data/man/ronin-web.1 +41 -0
  67. data/man/ronin-web.1.md +30 -0
  68. data/ronin-web.gemspec +39 -109
  69. data/spec/cli/ruby_shell_spec.rb +14 -0
  70. data/spec/html_spec.rb +43 -0
  71. data/spec/mechanize_spec.rb +72 -0
  72. data/spec/spec_helper.rb +5 -3
  73. data/spec/web_spec.rb +97 -0
  74. data/spec/xml_spec.rb +42 -0
  75. metadata +236 -224
  76. data/.gemtest +0 -0
  77. data/data/ronin/web/user_agents.yml +0 -247
  78. data/lib/ronin/network/mixins/web.rb +0 -258
  79. data/lib/ronin/web/config.rb +0 -34
  80. data/lib/ronin/web/extensions/nokogiri/xml/attr.rb +0 -47
  81. data/lib/ronin/web/extensions/nokogiri/xml/document.rb +0 -48
  82. data/lib/ronin/web/extensions/nokogiri/xml/element.rb +0 -57
  83. data/lib/ronin/web/extensions/nokogiri/xml/node.rb +0 -86
  84. data/lib/ronin/web/extensions/nokogiri/xml/text.rb +0 -47
  85. data/lib/ronin/web/extensions/nokogiri/xml.rb +0 -27
  86. data/lib/ronin/web/extensions/nokogiri.rb +0 -23
  87. data/lib/ronin/web/extensions.rb +0 -23
  88. data/lib/ronin/web/middleware/base.rb +0 -144
  89. data/lib/ronin/web/middleware/directories.rb +0 -179
  90. data/lib/ronin/web/middleware/files.rb +0 -144
  91. data/lib/ronin/web/middleware/filters/campaign_filter.rb +0 -77
  92. data/lib/ronin/web/middleware/filters/ip_filter.rb +0 -73
  93. data/lib/ronin/web/middleware/filters/path_filter.rb +0 -73
  94. data/lib/ronin/web/middleware/filters/referer_filter.rb +0 -71
  95. data/lib/ronin/web/middleware/filters/user_agent_filter.rb +0 -71
  96. data/lib/ronin/web/middleware/filters/vhost_filter.rb +0 -71
  97. data/lib/ronin/web/middleware/filters.rb +0 -28
  98. data/lib/ronin/web/middleware/helpers.rb +0 -145
  99. data/lib/ronin/web/middleware/proxy.rb +0 -265
  100. data/lib/ronin/web/middleware/proxy_request.rb +0 -262
  101. data/lib/ronin/web/middleware/request.rb +0 -79
  102. data/lib/ronin/web/middleware/response.rb +0 -33
  103. data/lib/ronin/web/middleware/router.rb +0 -167
  104. data/lib/ronin/web/middleware/rule.rb +0 -103
  105. data/lib/ronin/web/middleware.rb +0 -27
  106. data/lib/ronin/web/proxy/app.rb +0 -32
  107. data/lib/ronin/web/proxy/base.rb +0 -46
  108. data/lib/ronin/web/proxy/web.rb +0 -46
  109. data/lib/ronin/web/proxy.rb +0 -25
  110. data/lib/ronin/web/server/app.rb +0 -32
  111. data/lib/ronin/web/server/base.rb +0 -461
  112. data/lib/ronin/web/server/web.rb +0 -66
  113. data/lib/ronin/web/server.rb +0 -25
  114. data/lib/ronin/web/spider.rb +0 -120
  115. data/lib/ronin/web/user_agents.rb +0 -196
  116. data/lib/ronin/web/web.rb +0 -560
  117. data/spec/helpers/output.rb +0 -3
  118. data/spec/web/extensions/nokogiri_spec.rb +0 -38
  119. data/spec/web/helpers/rack_app.rb +0 -24
  120. data/spec/web/helpers/root/test1/index.html +0 -1
  121. data/spec/web/helpers/root/test1/test1.txt +0 -1
  122. data/spec/web/helpers/root/test1.txt +0 -1
  123. data/spec/web/helpers/root/test2/test2.txt +0 -1
  124. data/spec/web/helpers/root/test2.txt +0 -1
  125. data/spec/web/helpers/root/test3/test3.txt +0 -1
  126. data/spec/web/helpers/root/test3.txt +0 -1
  127. data/spec/web/helpers/root.rb +0 -15
  128. data/spec/web/mechanize_spec.rb +0 -62
  129. data/spec/web/middleware/directories_spec.rb +0 -86
  130. data/spec/web/middleware/files_spec.rb +0 -57
  131. data/spec/web/middleware/filters/campaign_filter_spec.rb +0 -30
  132. data/spec/web/middleware/filters/ip_filter_spec.rb +0 -25
  133. data/spec/web/middleware/filters/path_filter_spec.rb +0 -29
  134. data/spec/web/middleware/filters/referer_filter_spec.rb +0 -25
  135. data/spec/web/middleware/filters/user_agent_filter_spec.rb +0 -25
  136. data/spec/web/middleware/filters/vhost_filter_spec.rb +0 -23
  137. data/spec/web/middleware/proxy_spec.rb +0 -67
  138. data/spec/web/middleware/response_spec.rb +0 -20
  139. data/spec/web/middleware/router_spec.rb +0 -65
  140. data/spec/web/middleware/rule_spec.rb +0 -37
  141. data/spec/web/proxy/base_spec.rb +0 -8
  142. data/spec/web/server/base_spec.rb +0 -77
  143. data/spec/web/server/classes/public1/static1.txt +0 -1
  144. data/spec/web/server/classes/public2/static2.txt +0 -1
  145. data/spec/web/server/classes/sub_app.rb +0 -13
  146. data/spec/web/server/classes/test_app.rb +0 -20
  147. data/spec/web/user_agents_spec.rb +0 -56
  148. data/spec/web/web_spec.rb +0 -101
@@ -0,0 +1,822 @@
1
+ #
2
+ # ronin-web - A collection of useful web helper methods and commands.
3
+ #
4
+ # Copyright (c) 2006-2022 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # ronin-web is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # ronin-web is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
18
+ #
19
+
20
+ require 'ronin/web/cli/command'
21
+ require 'ronin/web/spider'
22
+ require 'ronin/web/spider/archive'
23
+ require 'ronin/web/spider/git_archive'
24
+ require 'ronin/support/network/http/user_agents'
25
+
26
+ require 'command_kit/colors'
27
+ require 'command_kit/printing/indent'
28
+ require 'command_kit/options/verbose'
29
+
30
+ module Ronin
31
+ module Web
32
+ class CLI
33
+ module Commands
34
+ #
35
+ # Spiders a website.
36
+ #
37
+ # ## Usage
38
+ #
39
+ # ronin-web spider [options] {--host HOST | --domain DOMAIN | --site URL}
40
+ #
41
+ # ## Options
42
+ #
43
+ # -v, --verbose Enables verbose output
44
+ # --open-timeout SECS Sets the connection open timeout
45
+ # --read-timeout SECS Sets the read timeout
46
+ # --ssl-timeout SECS Sets the SSL connection timeout
47
+ # --continue-timeout SECS Sets the continue timeout
48
+ # --keep-alive-timeout SECS Sets the connection keep alive timeout
49
+ # -P, --proxy PROXY Sets the proxy to use.
50
+ # -H, --header NAME: VALUE Sets a default header
51
+ # --host-header NAME=VALUE Sets a default header
52
+ # -u chrome_linux|chrome_macos|chrome_windows|chrome_iphone|chrome_ipad|chrome_android|firefox_linux|firefox_macos|firefox_windows|firefox_iphone|firefox_ipad|firefox_android|safari_macos|safari_iphone|safari_ipad|edge,
53
+ # --user-agent The User-Agent to use
54
+ # -U, --user-agent-string STRING The User-Agent string to use
55
+ # -R, --referer URL Sets the Referer URL
56
+ # --delay SECS Sets the delay in seconds between each request
57
+ # -l, --limit COUNT Only spiders up to COUNT pages
58
+ # -d, --max-depth DEPTH Only spiders up to max depth
59
+ # --enqueue URL Adds the URL to the queue
60
+ # --visited URL Marks the URL as previously visited
61
+ # --strip-fragments Enables/disables stripping the fragment component of every URL
62
+ # --strip-query Enables/disables stripping the query component of every URL
63
+ # --visit-host HOST Visit URLs with the matching host name
64
+ # --visit-hosts-like /REGEX/ Visit URLs with hostnames that match the REGEX
65
+ # --ignore-host HOST Ignore the host name
66
+ # --ignore-hosts-like /REGEX/ Ignore the host names matching the REGEX
67
+ # --visit-port PORT Visit URLs with the matching port number
68
+ # --visit-ports-like /REGEX/ Visit URLs with port numbers that match the REGEX
69
+ # --ignore-port PORT Ignore the port number
70
+ # --ignore-ports-like /REGEX/ Ignore the port numbers matching the REGEXP
71
+ # --visit-link URL Visit the URL
72
+ # --visit-links-like /REGEX/ Visit URLs that match the REGEX
73
+ # --ignore-link URL Ignore the URL
74
+ # --ignore-links-like /REGEX/ Ignore URLs matching the REGEX
75
+ # --visit-ext FILE_EXT Visit URLs with the matching file ext
76
+ # --visit-exts-like /REGEX/ Visit URLs with file exts that match the REGEX
77
+ # --ignore-ext FILE_EXT Ignore the URLs with the file ext
78
+ # --ignore-exts-like /REGEX/ Ignore URLs with file exts matching the REGEX
79
+ # -r, --robots Specifies whether to honor robots.txt
80
+ # --host HOST Spiders the specific HOST
81
+ # --domain DOMAIN Spiders the whole domain
82
+ # --site URL Spiders the website, starting at the URL
83
+ # --print-status Print the status codes for each URL
84
+ # --print-headers Print response headers for each URL
85
+ # --print-header NAME Prints a specific header
86
+ # --archive DIR Archive every visited page to the DIR
87
+ # --git-archive DIR Archive every visited page to the git repository
88
+ # -X, --xpath XPATH Evaluates the XPath on each HTML page
89
+ # -C, --css-path XPATH Evaluates the CSS-path on each HTML page
90
+ # -h, --help Print help information
91
+ #
92
+ # ## Examples
93
+ #
94
+ # ronin-web spider --host scanme.nmap.org
95
+ # ronin-web spider --domain nmap.org
96
+ # ronin-web spider --site https://scanme.nmap.org/
97
+ #
98
+ class Spider < Command
99
+
100
+ include CommandKit::Colors
101
+ include CommandKit::Printing::Indent
102
+ include CommandKit::Options::Verbose
103
+
104
+ usage '[options] {--host HOST | --domain DOMAIN | --site URL}'
105
+
106
+ option :open_timeout, value: {
107
+ type: Integer,
108
+ usage: 'SECS',
109
+ default: Spidr.open_timeout
110
+ },
111
+ desc: 'Sets the connection open timeout'
112
+
113
+ option :read_timeout, value: {
114
+ type: Integer,
115
+ usage: 'SECS',
116
+ default: Spidr.read_timeout
117
+ },
118
+ desc: 'Sets the read timeout'
119
+
120
+ option :ssl_timeout, value: {
121
+ type: Integer,
122
+ usage: 'SECS',
123
+ default: Spidr.ssl_timeout
124
+ },
125
+ desc: 'Sets the SSL connection timeout'
126
+
127
+ option :continue_timeout, value: {
128
+ type: Integer,
129
+ usage: 'SECS',
130
+ default: Spidr.continue_timeout
131
+ },
132
+ desc: 'Sets the continue timeout'
133
+
134
+ option :keep_alive_timeout, value: {
135
+ type: Integer,
136
+ usage: 'SECS',
137
+ default: Spidr.keep_alive_timeout
138
+ },
139
+ desc: 'Sets the connection keep alive timeout'
140
+
141
+ option :proxy, short: '-P',
142
+ value: {
143
+ type: String,
144
+ usage: 'PROXY'
145
+ },
146
+ desc: 'Sets the proxy to use'
147
+
148
+ option :header, short: '-H',
149
+ value: {
150
+ type: /\A[^\s:]+:.*\z/,
151
+ usage: 'NAME: VALUE'
152
+ },
153
+ desc: 'Sets a default header' do |header|
154
+ name, value = header.split(/:\s*/,2)
155
+
156
+ @default_headers[name] = value
157
+ end
158
+
159
+ option :host_header, value: {
160
+ type: /\A[^\s=]+=[^\s=]+\z/,
161
+ usage: 'NAME=VALUE'
162
+ },
163
+ desc: 'Sets a default header' do |name_value|
164
+ name, value = name_value.split('=',2)
165
+
166
+ @host_headers[name] = value
167
+ end
168
+
169
+ option :user_agent, value: {
170
+ type: String,
171
+ usage: 'USER-AGENT'
172
+ },
173
+ desc: 'Sets the User-Agent string'
174
+
175
+ option :user_agent_string, short: '-U',
176
+ value: {
177
+ type: String,
178
+ usage: 'STRING'
179
+ },
180
+ desc: 'The User-Agent string to use' do |ua|
181
+ @user_agent = ua
182
+ end
183
+
184
+ option :user_agent, short: '-u',
185
+ value: {
186
+ type: Support::Network::HTTP::UserAgents::ALIASES.keys
187
+ },
188
+ desc: 'The User-Agent to use' do |name|
189
+ @user_agent = name
190
+ end
191
+
192
+ option :referer, short: '-R',
193
+ value: {
194
+ type: String,
195
+ usage: 'URL'
196
+ },
197
+ desc: 'Sets the Referer URL'
198
+
199
+ option :delay, short: '-d',
200
+ value: {
201
+ type: Numeric,
202
+ usage: 'SECS'
203
+ },
204
+ desc: 'Sets the delay in seconds between each request'
205
+
206
+ option :limit, short: '-l',
207
+ value: {
208
+ type: Integer,
209
+ usage: 'COUNT'
210
+ },
211
+ desc: 'Only spiders up to COUNT pages'
212
+
213
+ option :max_depth, short: '-d',
214
+ value: {
215
+ type: Integer,
216
+ usage: 'DEPTH',
217
+ },
218
+ desc: 'Only spiders up to max depth'
219
+
220
+ option :enqueue, value: {
221
+ type: String,
222
+ usage: 'URL'
223
+ },
224
+ desc: 'Adds the URL to the queue' do |url|
225
+ @queue << url
226
+ end
227
+
228
+ option :visited, value: {
229
+ type: String,
230
+ usage: 'URL'
231
+ },
232
+ desc: 'Marks the URL as previously visited' do |url|
233
+ @history << url
234
+ end
235
+
236
+ option :strip_fragments, desc: 'Enables/disables stripping the fragment component of every URL'
237
+
238
+ option :strip_query, desc: 'Enables/disables stripping the query component of every URL'
239
+
240
+ option :visit_host, value: {
241
+ type: String,
242
+ usage: 'HOST'
243
+ },
244
+ desc: 'Visit URLs with the matching host name' do |host|
245
+ @visit_hosts << host
246
+ end
247
+
248
+ option :visit_hosts_like, value: {
249
+ type: Regexp,
250
+ usage: '/REGEX/'
251
+ },
252
+ desc: 'Visit URLs with hostnames that match the REGEX' do |regex|
253
+ @visit_hosts << regex
254
+ end
255
+
256
+ option :ignore_host, value: {
257
+ type: String,
258
+ usage: 'HOST'
259
+ },
260
+ desc: 'Ignore the host name' do |host|
261
+ @ignore_hosts << host
262
+ end
263
+
264
+ option :ignore_hosts_like, value: {
265
+ type: Regexp,
266
+ usage: '/REGEX/'
267
+ },
268
+ desc: 'Ignore the host names matching the REGEX' do |regex|
269
+ @ignore_hosts << regex
270
+ end
271
+
272
+ option :visit_port, value: {
273
+ type: Integer,
274
+ usage: 'PORT'
275
+ },
276
+ desc: 'Visit URLs with the matching port number' do |port|
277
+ @visit_ports << port
278
+ end
279
+
280
+ option :visit_ports_like, value: {
281
+ type: Regexp,
282
+ usage: '/REGEX/'
283
+ },
284
+ desc: 'Visit URLs with port numbers that match the REGEX' do |regex|
285
+ @visit_ports << regex
286
+ end
287
+
288
+ option :ignore_port, value: {
289
+ type: Integer,
290
+ usage: 'PORT'
291
+ },
292
+ desc: 'Ignore the port number' do |port|
293
+ @ignore_ports << port
294
+ end
295
+
296
+ option :ignore_ports_like, value: {
297
+ type: Regexp,
298
+ usage: '/REGEX/'
299
+ },
300
+ desc: 'Ignore the port numbers matching the REGEXP' do |regex|
301
+ @ignore_ports << regex
302
+ end
303
+
304
+ option :visit_link, value: {
305
+ type: String,
306
+ usage: 'URL'
307
+ },
308
+ desc: 'Visit the URL' do |link|
309
+ @visit_links << link
310
+ end
311
+
312
+ option :visit_links_like, value: {
313
+ type: Regexp,
314
+ usage: '/REGEX/'
315
+ },
316
+ desc: 'Visit URLs that match the REGEX' do |regex|
317
+ @visit_links << regex
318
+ end
319
+
320
+ option :ignore_link, value: {
321
+ type: String,
322
+ usage: 'URL'
323
+ },
324
+ desc: 'Ignore the URL' do |link|
325
+ @ignore_links << link
326
+ end
327
+
328
+ option :ignore_links_like, value: {
329
+ type: Regexp,
330
+ usage: '/REGEX/'
331
+ },
332
+ desc: 'Ignore URLs matching the REGEX' do |regex|
333
+ @ignore_links << regex
334
+ end
335
+
336
+ option :visit_ext, value: {
337
+ type: String,
338
+ usage: 'FILE_EXT'
339
+ },
340
+ desc: 'Visit URLs with the matching file ext' do |ext|
341
+ @visit_exts << ext
342
+ end
343
+
344
+ option :visit_exts_like, value: {
345
+ type: Regexp,
346
+ usage: '/REGEX/'
347
+ },
348
+ desc: 'Visit URLs with file exts that match the REGEX' do |regex|
349
+ @visit_exts << regex
350
+ end
351
+
352
+ option :ignore_ext, value: {
353
+ type: String,
354
+ usage: 'FILE_EXT'
355
+ },
356
+ desc: 'Ignore the URLs with the file ext' do |ext|
357
+ @ignore_exts << ext
358
+ end
359
+
360
+ option :ignore_exts_like, value: {
361
+ type: Regexp,
362
+ usage: '/REGEX/'
363
+ },
364
+ desc: 'Ignore URLs with file exts matching the REGEX' do |regex|
365
+ @ignore_exts << regex
366
+ end
367
+
368
+ option :robots, short: '-r',
369
+ desc: 'Specifies whether to honor robots.txt'
370
+
371
+ option :host, value: {
372
+ type: String,
373
+ usage: 'HOST'
374
+ },
375
+ desc: 'Spiders the specific HOST'
376
+
377
+ option :domain, value: {
378
+ type: String,
379
+ usage: 'DOMAIN',
380
+ },
381
+ desc: 'Spiders the whole domain'
382
+
383
+ option :site, value: {
384
+ type: String,
385
+ usage: 'URL'
386
+ },
387
+ desc: 'Spiders the website, starting at the URL'
388
+
389
+ option :print_verbose, desc: 'Print the status codes for each URL'
390
+
391
+ option :print_headers, desc: 'Print response headers for each URL'
392
+
393
+ option :print_header, value: {
394
+ type: String,
395
+ usage: 'NAME'
396
+ },
397
+ desc: 'Prints a specific header'
398
+
399
+ option :archive, value: {
400
+ type: String,
401
+ usage: 'DIR'
402
+ },
403
+ desc: 'Archive every visited page to the DIR'
404
+
405
+ option :git_archive, value: {
406
+ type: String,
407
+ usage: 'DIR'
408
+ },
409
+ desc: 'Archive every visited page to the git repository'
410
+
411
+ option :xpath, short: '-X',
412
+ value: {
413
+ type: String,
414
+ usage: 'XPATH'
415
+ },
416
+ desc: 'Evaluates the XPath on each HTML page'
417
+
418
+ option :css_path, short: '-C',
419
+ value: {
420
+ type: String,
421
+ usage: 'XPATH'
422
+ },
423
+ desc: 'Evaluates the CSS-path on each HTML page'
424
+
425
+ option :print_hosts, desc: 'Print all discovered hostnames'
426
+
427
+ option :print_certs, desc: 'Print all encountered SSL/TLS certificates'
428
+
429
+ option :save_certs, desc: 'Saves all encountered SSL/TLS certificates'
430
+
431
+ option :print_js_strings, desc: 'Print all JavaScript strings'
432
+
433
+ option :print_html_comments, desc: 'Print HTML comments'
434
+
435
+ option :print_js_comments, desc: 'Print JavaScript comments'
436
+
437
+ option :print_comments, desc: 'Print all HTML and JavaScript comments'
438
+
439
+ description 'Spiders a website'
440
+
441
+ examples [
442
+ "--host scanme.nmap.org",
443
+ "--domain nmap.org",
444
+ "--site https://scanme.nmap.org/"
445
+ ]
446
+
447
+ man_page 'ronin-web-spider.1'
448
+
449
+ # The default HTTP headers to send with every request.
450
+ #
451
+ # @return [Hash{String => String}]
452
+ attr_reader :default_headers
453
+
454
+ # The mapping of custom `Host` headers.
455
+ #
456
+ # @return [Hash{String => String}]
457
+ attr_reader :host_headers
458
+
459
+ # The pre-existing queue of URLs to start spidering with.
460
+ #
461
+ # @return [Array<String>]
462
+ attr_reader :queue
463
+
464
+ # The pre-existing of previously visited URLs to start spidering with.
465
+ #
466
+ # @return [Array<String>]
467
+ attr_reader :history
468
+
469
+ # The schemes to visit.
470
+ #
471
+ # @return [Array<String>]
472
+ attr_reader :visit_schemes
473
+
474
+ # The hosts to visit.
475
+ #
476
+ # @return [Array<String, Regexp>]
477
+ attr_reader :visit_hosts
478
+
479
+ # The port numbers to visit.
480
+ #
481
+ # @return [Array<Integer, Regexp>]
482
+ attr_reader :visit_ports
483
+
484
+ # The links to visit.
485
+ #
486
+ # @return [Array<String, Regexp>]
487
+ attr_reader :visit_links
488
+
489
+ # The URL file extensions to visit.
490
+ #
491
+ # @return [Array<String, Regexp>]
492
+ attr_reader :visit_exts
493
+
494
+ # The hosts to ignore.
495
+ #
496
+ # @return [Array<String, Regexp>]
497
+ attr_reader :ignore_hosts
498
+
499
+ # The port numbers to ignore.
500
+ #
501
+ # @return [Array<Integer, Regexp>]
502
+ attr_reader :ignore_ports
503
+
504
+ # The links to ignore.
505
+ #
506
+ # @return [Array<String, Regexp>]
507
+ attr_reader :ignore_links
508
+
509
+ # The URL file extensions to ignore.
510
+ #
511
+ # @return [Array<String, Regexp>]
512
+ attr_reader :ignore_exts
513
+
514
+ #
515
+ # Initializes the spider command.
516
+ #
517
+ # @param [Hash{Symbol => Object}] kwargs
518
+ # Additional keyword arguments.
519
+ #
520
+ def initialize(**kwargs)
521
+ super(**kwargs)
522
+
523
+ @default_headers = {}
524
+ @host_headers = {}
525
+
526
+ @queue = []
527
+ @history = []
528
+
529
+ @visit_schemes = []
530
+ @visit_hosts = []
531
+ @visit_ports = []
532
+ @visit_links = []
533
+ @visit_exts = []
534
+
535
+ @ignore_hosts = []
536
+ @ignore_ports = []
537
+ @ignore_links = []
538
+ @ignore_exts = []
539
+ end
540
+
541
+ #
542
+ # Runs the `ronin-web spider` command.
543
+ #
544
+ def run
545
+ archive = if options[:archive]
546
+ Web::Spider::Archive.open(options[:archive])
547
+ elsif options[:git_archive]
548
+ Web::Spider::GitArchive.open(options[:git_archive])
549
+ end
550
+
551
+ agent = new_agent do |agent|
552
+ agent.every_page do |page|
553
+ print_page(page)
554
+ end
555
+
556
+ agent.every_failed_url do |url|
557
+ print_verbose "failed to request #{url}"
558
+ end
559
+
560
+ if options[:print_hosts]
561
+ agent.every_host do |host|
562
+ print_verbose "spidering new host #{host}"
563
+ end
564
+ end
565
+
566
+ if options[:print_certs]
567
+ agent.every_cert do |cert|
568
+ print_verbose "encountered new certificate for #{cert.subject.common_name}"
569
+ end
570
+ end
571
+
572
+ if options[:print_js_strings]
573
+ agent.every_js_string do |string|
574
+ print_content string
575
+ end
576
+ end
577
+
578
+ if options[:print_html_comments]
579
+ agent.every_html_comment do |comment|
580
+ print_content comment
581
+ end
582
+ end
583
+
584
+ if options[:print_js_comments]
585
+ agent.every_js_comment do |comment|
586
+ print_content comment
587
+ end
588
+ end
589
+
590
+ if options[:print_comments]
591
+ agent.every_comment do |comment|
592
+ print_content comment
593
+ end
594
+ end
595
+
596
+ if archive
597
+ agent.every_ok_page do |page|
598
+ archive.write(page.url,page.body)
599
+ end
600
+ end
601
+ end
602
+
603
+ if options[:git_archive]
604
+ archive.commit "Updated #{Time.now}"
605
+ end
606
+
607
+ if options[:print_hosts]
608
+ puts
609
+ puts "Spidered the following hosts:"
610
+ puts
611
+
612
+ indent do
613
+ agent.visited_hosts.each do |host|
614
+ puts host
615
+ end
616
+ end
617
+ end
618
+
619
+ if options[:print_certs]
620
+ puts
621
+ puts "Discovered the following certs:"
622
+ puts
623
+
624
+ agent.collected_certs.each do |cert|
625
+ puts cert
626
+ puts
627
+ end
628
+ end
629
+ end
630
+
631
+ #
632
+ # Creates a new web spider agent.
633
+ #
634
+ # @yield [agent]
635
+ # The given block will be given the newly created and configured
636
+ # web spider agent.
637
+ #
638
+ # @yieldparam [Ronin::Web::Spider::Agent] agent
639
+ # The newly created web spider agent.
640
+ #
641
+ # @return [Ronin::Web::Spider::Agent]
642
+ # The newly created web spider agent, after the agent has completed
643
+ # it's spidering.
644
+ #
645
+ def new_agent(&block)
646
+ if options[:host]
647
+ Web::Spider.host(options[:host],**agent_kwargs,&block)
648
+ elsif options[:domain]
649
+ Web::Spider.domain(options[:domain],**agent_kwargs,&block)
650
+ elsif options[:site]
651
+ Web::Spider.site(options[:site],**agent_kwargs,&block)
652
+ else
653
+ print_error "must specify --host, --domain, or --site"
654
+ exit(-1)
655
+ end
656
+ end
657
+
658
+ #
659
+ # Builds keyword arguments for `Ronin::Web::Spider::Agent#initialize`.
660
+ #
661
+ # @return [Hash{Symbol => Object}]
662
+ # The keyword arguments for `Ronin::Web::Spider::Agent#initialize`.
663
+ #
664
+ def agent_kwargs
665
+ kwargs = {}
666
+ kwargs[:proxy] = options[:proxy] if options[:proxy]
667
+
668
+ unless @default_headers.empty?
669
+ kwargs[:default_headers] = @default_headers
670
+ end
671
+
672
+ unless @host_headers.empty?
673
+ kwargs[:host_headers] = @host_headers
674
+ end
675
+
676
+ kwargs[:user_agent] = @user_agent if @user_agent
677
+ kwargs[:referer] = options[:referer] if options[:referer]
678
+
679
+ kwargs[:delay] = options[:delay] if options[:delay]
680
+ kwargs[:limit] = options[:limit] if options[:limit]
681
+ kwargs[:max_depth] = options[:max_depth] if options[:max_depth]
682
+
683
+ kwargs[:queue] = @queue unless @queue.empty?
684
+ kwargs[:history] = @history unless @history.empty?
685
+
686
+ if options.has_key?(:strip_fragments)
687
+ kwargs[:strip_fragments] = options[:strip_fragments]
688
+ end
689
+
690
+ if options.has_key?(:strip_query)
691
+ kwargs[:strip_query] = options[:strip_query]
692
+ end
693
+
694
+ kwargs[:schemes] = @visit_schemes unless @visit_schemes.empty?
695
+ kwargs[:hosts] = @visit_hosts unless @visit_hosts.empty?
696
+ kwargs[:ports] = @visit_ports unless @visit_ports.empty?
697
+ kwargs[:links] = @visit_links unless @visit_links.empty?
698
+ kwargs[:exts] = @visit_exts unless @visit_exts.empty?
699
+
700
+ kwargs[:ignore_hosts] = @ignore_hosts unless @ignore_hosts.empty?
701
+ kwargs[:ignore_ports] = @ignore_ports unless @ignore_ports.empty?
702
+ kwargs[:ignore_links] = @ignore_links unless @ignore_links.empty?
703
+ kwargs[:ignore_exts] = @ignore_exts unless @ignore_exts.empty?
704
+
705
+ kwargs[:robots] = options[:robots] if options.has_key?(:robots)
706
+
707
+ return kwargs
708
+ end
709
+
710
+ #
711
+ # Prints the status of a page.
712
+ #
713
+ # @param [Spidr::Page] page
714
+ # A spidered page.
715
+ #
716
+ def print_verbose(page)
717
+ if page.code < 300
718
+ print "#{colors.bright_green(page.code)} "
719
+ elsif page.code < 400
720
+ print "#{colors.bright_yellow(page.code)} "
721
+ elsif page.code < 500
722
+ print "#{colors.bright_red(page.code)} "
723
+ else
724
+ print "#{colors.bold(colors.bright_red(page.code))} "
725
+ end
726
+ end
727
+
728
+ #
729
+ # Prints the URL for a page.
730
+ #
731
+ # @param [Spidr::Page] page
732
+ # A spidered page.
733
+ #
734
+ def print_url(page)
735
+ if page.code < 300
736
+ puts "#{colors.green(page.url)} "
737
+ elsif page.code < 400
738
+ puts "#{colors.yellow(page.url)} "
739
+ elsif page.code < 500
740
+ puts "#{colors.red(page.url)} "
741
+ else
742
+ puts "#{colors.bold(colors.red(page.url))} "
743
+ end
744
+ end
745
+
746
+ #
747
+ # Prints a page.
748
+ #
749
+ # @param [Spidr::Page] page
750
+ # A spidered page.
751
+ #
752
+ def print_page(page)
753
+ print_verbose(page) if options[:print_verbose]
754
+ print_url(page)
755
+
756
+ if options[:print_headers]
757
+ print_headers(page)
758
+ elsif options[:print_header]
759
+ if (header = page.response[options[:print_header]])
760
+ print_content header
761
+ end
762
+ end
763
+
764
+ print_query(page) if (options[:xpath] || options[:css_path])
765
+ end
766
+
767
+ #
768
+ # Prints the headers of a page.
769
+ #
770
+ # @param [Spidr::Page] page
771
+ # A spidered page.
772
+ #
773
+ def print_headers(page)
774
+ page.response.each_capitalized do |name,value|
775
+ print_content "#{name}: #{value}"
776
+ end
777
+ end
778
+
779
+ #
780
+ # Prints the XPath or CSS-path query result for the page.
781
+ #
782
+ # @param [Spidr::Page] page
783
+ # A spidered page.
784
+ #
785
+ def print_query(page)
786
+ if page.html?
787
+ if options[:xpath]
788
+ print_content page.doc.xpath(options[:xpath])
789
+ elsif options[:css_path]
790
+ print_content page.doc.css(options[:css_path])
791
+ end
792
+ end
793
+ end
794
+
795
+ #
796
+ # Prints an information message.
797
+ #
798
+ # @param [String] message
799
+ #
800
+ def print_verbose(message)
801
+ if verbose?
802
+ puts colors.yellow("* #{message}")
803
+ end
804
+ end
805
+
806
+ #
807
+ # Print content from a page.
808
+ #
809
+ # @param [#to_s] content
810
+ # The content to print.
811
+ #
812
+ def print_content(content)
813
+ content.to_s.each_line do |line|
814
+ puts " #{line}"
815
+ end
816
+ end
817
+
818
+ end
819
+ end
820
+ end
821
+ end
822
+ end