ronin-web 1.0.2 → 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +3 -2
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +5 -0
  5. data/ChangeLog.md +46 -1
  6. data/Gemfile +25 -12
  7. data/README.md +257 -51
  8. data/Rakefile +9 -0
  9. data/data/completions/ronin-web +203 -0
  10. data/gemspec.yml +18 -5
  11. data/lib/ronin/web/cli/browser_options.rb +92 -0
  12. data/lib/ronin/web/cli/browser_shell.rb +448 -0
  13. data/lib/ronin/web/cli/command.rb +1 -1
  14. data/lib/ronin/web/cli/commands/browser.rb +373 -0
  15. data/lib/ronin/web/cli/commands/completion.rb +63 -0
  16. data/lib/ronin/web/cli/commands/diff.rb +60 -8
  17. data/lib/ronin/web/cli/commands/html.rb +21 -33
  18. data/lib/ronin/web/cli/commands/irb.rb +1 -1
  19. data/lib/ronin/web/cli/commands/new/{webapp.rb → app.rb} +8 -8
  20. data/lib/ronin/web/cli/commands/new/nokogiri.rb +4 -4
  21. data/lib/ronin/web/cli/commands/new/server.rb +1 -1
  22. data/lib/ronin/web/cli/commands/new/spider.rb +1 -1
  23. data/lib/ronin/web/cli/commands/new.rb +5 -3
  24. data/lib/ronin/web/cli/commands/reverse_proxy.rb +1 -1
  25. data/lib/ronin/web/cli/commands/screenshot.rb +186 -0
  26. data/lib/ronin/web/cli/commands/server.rb +1 -1
  27. data/lib/ronin/web/cli/commands/session_cookie.rb +265 -0
  28. data/lib/ronin/web/cli/commands/spider.rb +61 -467
  29. data/lib/ronin/web/cli/commands/user_agent.rb +177 -0
  30. data/lib/ronin/web/cli/commands/vulns.rb +463 -0
  31. data/lib/ronin/web/cli/commands/wordlist.rb +484 -0
  32. data/lib/ronin/web/cli/commands/xml.rb +149 -0
  33. data/lib/ronin/web/cli/js_shell.rb +69 -0
  34. data/lib/ronin/web/cli/ruby_shell.rb +1 -1
  35. data/lib/ronin/web/cli/spider_options.rb +919 -0
  36. data/lib/ronin/web/cli.rb +3 -1
  37. data/lib/ronin/web/html.rb +1 -1
  38. data/lib/ronin/web/root.rb +1 -1
  39. data/lib/ronin/web/version.rb +2 -2
  40. data/lib/ronin/web/xml.rb +1 -1
  41. data/lib/ronin/web.rb +4 -364
  42. data/man/ronin-web-browser.1 +92 -0
  43. data/man/ronin-web-browser.1.md +96 -0
  44. data/man/ronin-web-completion.1 +76 -0
  45. data/man/ronin-web-completion.1.md +78 -0
  46. data/man/ronin-web-diff.1 +14 -21
  47. data/man/ronin-web-diff.1.md +13 -6
  48. data/man/ronin-web-html.1 +30 -46
  49. data/man/ronin-web-html.1.md +27 -17
  50. data/man/ronin-web-irb.1 +9 -16
  51. data/man/ronin-web-irb.1.md +6 -2
  52. data/man/ronin-web-new-app.1.md +39 -0
  53. data/man/ronin-web-new-nokogiri.1 +9 -20
  54. data/man/ronin-web-new-nokogiri.1.md +5 -5
  55. data/man/ronin-web-new-server.1 +11 -23
  56. data/man/ronin-web-new-server.1.md +5 -5
  57. data/man/ronin-web-new-spider.1 +44 -88
  58. data/man/ronin-web-new-spider.1.md +37 -37
  59. data/man/ronin-web-new.1 +18 -30
  60. data/man/ronin-web-new.1.md +15 -11
  61. data/man/ronin-web-reverse-proxy.1 +33 -38
  62. data/man/ronin-web-reverse-proxy.1.md +20 -14
  63. data/man/ronin-web-screenshot.1 +56 -0
  64. data/man/ronin-web-screenshot.1.md +56 -0
  65. data/man/ronin-web-server.1 +15 -29
  66. data/man/ronin-web-server.1.md +13 -9
  67. data/man/ronin-web-session-cookie.1 +38 -0
  68. data/man/ronin-web-session-cookie.1.md +41 -0
  69. data/man/ronin-web-spider.1 +121 -130
  70. data/man/ronin-web-spider.1.md +115 -66
  71. data/man/ronin-web-user-agent.1 +44 -0
  72. data/man/ronin-web-user-agent.1.md +46 -0
  73. data/man/ronin-web-vulns.1 +175 -0
  74. data/man/ronin-web-vulns.1.md +177 -0
  75. data/man/ronin-web-wordlist.1 +258 -0
  76. data/man/ronin-web-wordlist.1.md +263 -0
  77. data/man/ronin-web-xml.1 +43 -0
  78. data/man/ronin-web-xml.1.md +46 -0
  79. data/man/ronin-web.1 +67 -18
  80. data/man/ronin-web.1.md +55 -4
  81. data/scripts/setup +58 -0
  82. metadata +122 -31
  83. data/lib/ronin/web/mechanize.rb +0 -84
  84. data/man/ronin-web-new-webapp.1.md +0 -39
  85. /data/data/new/{webapp → app}/.gitignore +0 -0
  86. /data/data/new/{webapp → app}/.ruby-version.erb +0 -0
  87. /data/data/new/{webapp → app}/Dockerfile.erb +0 -0
  88. /data/data/new/{webapp → app}/Gemfile +0 -0
  89. /data/data/new/{webapp → app}/app.rb.erb +0 -0
  90. /data/data/new/{webapp → app}/config.ru +0 -0
  91. /data/data/new/{webapp → app}/docker-compose.yml.erb +0 -0
@@ -0,0 +1,484 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # ronin-web - A collection of useful web helper methods and commands.
4
+ #
5
+ # Copyright (c) 2006-2024 Hal Brodigan (postmodern.mod3 at gmail.com)
6
+ #
7
+ # ronin-web is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation, either version 3 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # ronin-web is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ require 'ronin/web/cli/command'
22
+ require 'ronin/web/cli/spider_options'
23
+ require 'ronin/core/cli/logging'
24
+
25
+ require 'wordlist/builder'
26
+ require 'nokogiri'
27
+
28
+ module Ronin
29
+ module Web
30
+ class CLI
31
+ module Commands
32
+ #
33
+ # Builds a wordlist by spidering a website.
34
+ #
35
+ # ## Usage
36
+ #
37
+ # ronin-web wordlist [options] {--host HOST | --domain DOMAIN | --site URL}
38
+ #
39
+ # ## Options
40
+ #
41
+ # --open-timeout SECS Sets the connection open timeout
42
+ # --read-timeout SECS Sets the read timeout
43
+ # --ssl-timeout SECS Sets the SSL connection timeout
44
+ # --continue-timeout SECS Sets the continue timeout
45
+ # --keep-alive-timeout SECS Sets the connection keep alive timeout
46
+ # -P, --proxy PROXY Sets the proxy to use
47
+ # -H, --header NAME: VALUE Sets a default header
48
+ # --host-header NAME=VALUE Sets a default header
49
+ # -u chrome-linux|chrome-macos|chrome-windows|chrome-iphone|chrome-ipad|chrome-android|firefox-linux|firefox-macos|firefox-windows|firefox-iphone|firefox-ipad|firefox-android|safari-macos|safari-iphone|safari-ipad|edge,
50
+ # --user-agent The User-Agent to use
51
+ # -U, --user-agent-string STRING The User-Agent string to use
52
+ # -R, --referer URL Sets the Referer URL
53
+ # --delay SECS Sets the delay in seconds between each request
54
+ # -l, --limit COUNT Only spiders up to COUNT pages
55
+ # -d, --max-depth DEPTH Only spiders up to max depth
56
+ # --enqueue URL Adds the URL to the queue
57
+ # --visited URL Marks the URL as previously visited
58
+ # --strip-fragments Enables/disables stripping the fragment component of every URL
59
+ # --strip-query Enables/disables stripping the query component of every URL
60
+ # --visit-host HOST Visit URLs with the matching host name
61
+ # --visit-hosts-like /REGEX/ Visit URLs with hostnames that match the REGEX
62
+ # --ignore-host HOST Ignore the host name
63
+ # --ignore-hosts-like /REGEX/ Ignore the host names matching the REGEX
64
+ # --visit-port PORT Visit URLs with the matching port number
65
+ # --visit-ports-like /REGEX/ Visit URLs with port numbers that match the REGEX
66
+ # --ignore-port PORT Ignore the port number
67
+ # --ignore-ports-like /REGEX/ Ignore the port numbers matching the REGEXP
68
+ # --visit-link URL Visit the URL
69
+ # --visit-links-like /REGEX/ Visit URLs that match the REGEX
70
+ # --ignore-link URL Ignore the URL
71
+ # --ignore-links-like /REGEX/ Ignore URLs matching the REGEX
72
+ # --visit-ext FILE_EXT Visit URLs with the matching file ext
73
+ # --visit-exts-like /REGEX/ Visit URLs with file exts that match the REGEX
74
+ # --ignore-ext FILE_EXT Ignore the URLs with the file ext
75
+ # --ignore-exts-like /REGEX/ Ignore URLs with file exts matching the REGEX
76
+ # -r, --robots Specifies whether to honor robots.txt
77
+ # --host HOST Spiders the specific HOST
78
+ # --domain DOMAIN Spiders the whole domain
79
+ # --site URL Spiders the website, starting at the URL
80
+ # -o, --output PATH The wordlist to write to
81
+ # -X, --content-xpath XPATH The XPath for the content (Default: //body)
82
+ # -C, --content-css-path XPATH The XPath for the content
83
+ # --meta-tags Parse certain meta-tags (Default: enabled)
84
+ # --no-meta-tags Ignore meta-tags
85
+ # --alt-tags Parse alt-tags on images (Default: enabled)
86
+ # --no-alt-tags Also parse alt-tags on images
87
+ # --paths Also parse URL paths
88
+ # --query-params-names Also parse URL query param names
89
+ # --query-param-values Also parse URL query param values
90
+ # --only-paths Only build a wordlist based on the paths
91
+ # --only-query-param Only build a wordlist based on the query param names
92
+ # --only-query-param-values Only build a wordlist based on the query param values
93
+ # -f, --format txt|gz|bzip2|xz Specifies the format of the wordlist file
94
+ # -A, --append Append new words to the wordlist file intead of overwriting the file
95
+ # -L, --lang LANG The language of the text to parse
96
+ # --stop-word WORD A stop-word to ignore
97
+ # --only-query-param-values Only build a wordlist based on the query param values
98
+ # -f, --format txt|gz|bzip2|xz Specifies the format of the wordlist file
99
+ # -A, --append Append new words to the wordlist file intead of overwriting the file
100
+ # -L, --lang LANG The language of the text to parse
101
+ # --stop-word WORD A stop-word to ignore
102
+ # --ignore-word WORD Ignores the word
103
+ # --digits Accepts words containing digits (Default: enabled)
104
+ # --no-digits Ignores words containing digits
105
+ # --special-char CHAR Allows a special character within a word (Default: _, -, ')
106
+ # --numbers Accepts numbers as words (Default: disabled)
107
+ # --no-numbers Ignores numbers
108
+ # --acronyms Treats acronyms as words (Default: enabled)
109
+ # --no-acronyms Ignores acronyms
110
+ # --normalize-case Converts all words to lowercase
111
+ # --no-normalize-case Preserve the case of words and letters (Default: enabled)
112
+ # --normalize-apostrophes Removes apostrophes from words
113
+ # --no-normalize-apostrophes Preserve apostrophes from words (Default: enabled)
114
+ # --normalize-acronyms Removes '.' characters from acronyms
115
+ # --no-normalize-acronyms Preserve '.' characters in acronyms (Default: enabled)
116
+ # -h, --help Print help information
117
+ #
118
+ class Wordlist < Command
119
+
120
+ include Core::CLI::Logging
121
+ include SpiderOptions
122
+
123
+ option :output, short: '-o',
124
+ value: {
125
+ type: String,
126
+ usage: 'PATH'
127
+ },
128
+ desc: 'The wordlist to write to'
129
+
130
+ option :content_xpath, short: '-X',
131
+ value: {
132
+ type: String,
133
+ usage: 'XPATH'
134
+ },
135
+ desc: 'The XPath for the content. (Default: //body)' do |xpath|
136
+ @content_xpath = xpath
137
+ end
138
+
139
+ option :content_css_path, short: '-C',
140
+ value: {
141
+ type: String,
142
+ usage: 'CSS-path'
143
+ },
144
+ desc: 'The XPath for the content' do |css_path|
145
+ @content_xpath = Nokogiri::CSS.xpath_for(css_path).first
146
+ end
147
+
148
+ option :meta_tags, desc: 'Parse certain meta-tags (Default: enabled)' do
149
+ @parse_meta_tags = true
150
+ end
151
+ option :no_meta_tags, desc: 'Ignore meta-tags' do
152
+ @parse_meta_tags = false
153
+ end
154
+
155
+ option :comments, desc: 'Parse HTML comments (Default: enabled)' do
156
+ @parse_comments = true
157
+ end
158
+ option :no_comments, desc: 'Ignore HTML comments' do
159
+ @parse_comments = false
160
+ end
161
+
162
+ option :alt_tags, desc: 'Parse alt-tags on images (Default: enabled)' do
163
+ @parse_alt_tags = true
164
+ end
165
+ option :no_alt_tags, desc: 'Ignore alt-tags on images' do
166
+ @parse_alt_tags = false
167
+ end
168
+
169
+ option :paths, desc: 'Also parse URL paths'
170
+ option :query_params_names, desc: 'Also parse URL query param names'
171
+ option :query_param_values, desc: 'Also parse URL query param values'
172
+
173
+ option :only_paths, desc: 'Only build a wordlist based on the paths'
174
+ option :only_query_param, desc: 'Only build a wordlist based on the query param names'
175
+ option :only_query_param_values, desc: 'Only build a wordlist based on the query param values'
176
+
177
+ option :format, short: '-f',
178
+ value: {
179
+ type: [:txt, :gz, :bzip2, :xz]
180
+ },
181
+ desc: 'Specifies the format of the wordlist file'
182
+
183
+ option :append, short: '-A',
184
+ desc: 'Append new words to the wordlist file intead of overwriting the file'
185
+
186
+ option :lang, short: '-L',
187
+ value: {
188
+ type: String,
189
+ usage: 'LANG'
190
+ },
191
+ desc: 'The language of the text to parse' do |lang|
192
+ options[:lang] = lang.to_sym
193
+ end
194
+
195
+ option :stop_word, value: {
196
+ type: String,
197
+ usage: 'WORD'
198
+ },
199
+ desc: 'A stop-word to ignore' do |word|
200
+ @stop_words << word
201
+ end
202
+
203
+ option :ignore_word, value: {
204
+ type: String,
205
+ usage: 'WORD'
206
+ },
207
+ desc: 'Ignores the word' do |word|
208
+ @ignore_words << word
209
+ end
210
+
211
+ option :digits, desc: 'Accepts words containing digits (Default: enabled)'
212
+ option :no_digits, desc: 'Ignores words containing digits' do
213
+ options[:digits] = false
214
+ end
215
+
216
+ option :special_char, value: {
217
+ type: String,
218
+ usage: 'CHAR'
219
+ },
220
+ desc: 'Allows a special character within a word (Default: _, -, \')' do |char|
221
+ @special_chars << char
222
+ end
223
+
224
+ option :numbers, desc: 'Accepts numbers as words (Default: disabled)'
225
+ option :no_numbers, desc: 'Ignores numbers' do
226
+ options[:numbers] = false
227
+ end
228
+
229
+ option :acronyms, desc: 'Treats acronyms as words (Default: enabled)'
230
+ option :no_acronyms, desc: 'Ignores acronyms' do
231
+ options[:acronyms] = false
232
+ end
233
+
234
+ option :normalize_case, desc: 'Converts all words to lowercase'
235
+ option :no_normalize_case, desc: 'Preserve the case of words and letters (Default: enabled)' do
236
+ options[:normalize_case] = false
237
+ end
238
+
239
+ option :normalize_apostrophes, desc: 'Removes apostrophes from words'
240
+ option :no_normalize_apostrophes, desc: 'Preserve apostrophes from words (Default: enabled)' do
241
+ options[:normalize_apostrophes] = false
242
+ end
243
+
244
+ option :normalize_acronyms, desc: "Removes '.' characters from acronyms"
245
+ option :no_normalize_acronyms, desc: "Preserve '.' characters in acronyms (Default: enabled)" do
246
+ options[:no_normalize_acronyms] = false
247
+ end
248
+
249
+ description "Builds a wordlist by spidering a website"
250
+
251
+ man_page 'ronin-web-wordlist.1'
252
+
253
+ # The XPath or CSS-path for the page's content.
254
+ #
255
+ # @return [String]
256
+ attr_reader :content_xpath
257
+
258
+ # List of stop-words to ignore.
259
+ #
260
+ # @return [Array<String>]
261
+ attr_reader :stop_words
262
+
263
+ # List of words to ignore.
264
+ #
265
+ # @return [Array<String>]
266
+ attr_reader :ignore_words
267
+
268
+ # The list of special characters to allow in words.
269
+ #
270
+ # @return [Array<String>]
271
+ attr_reader :special_chars
272
+
273
+ #
274
+ # Initializes the `ronin-web wordlist` command.
275
+ #
276
+ # @param [Hash{Symbol => Object}] kwargs
277
+ # Additional keyword arguments for the command.
278
+ #
279
+ def initialize(**kwargs)
280
+ super(**kwargs)
281
+
282
+ @content_xpath = nil
283
+
284
+ @parse_meta_tags = true
285
+ @parse_comments = true
286
+ @parse_alt_tags = true
287
+
288
+ @stop_words = []
289
+ @ignore_words = []
290
+ @special_chars = []
291
+ end
292
+
293
+ # XPath to find `description` and `keywords` `meta`-tags.
294
+ META_TAGS_XPATH = '/head/meta[@name="description" or @name="keywords"]/@content'
295
+
296
+ # XPath to find all text elements.
297
+ TEXT_XPATH = '//text()[not (ancestor-or-self::script or ancestor-or-self::style)]'
298
+
299
+ # XPath to find all HTML comments.
300
+ COMMENT_XPATH = '//comment()'
301
+
302
+ # XPath which finds all image `alt`-tags, SVG `desc` elements, and `a`
303
+ # `title` attributes.
304
+ ALT_TAGS_XPATH = '//img/@alt|//area/@alt|//input/@alt|//a/@title'
305
+
306
+ #
307
+ # Runs the `ronin-web wordlist` command.
308
+ #
309
+ def run
310
+ @wordlist = ::Wordlist::Builder.new(wordlist_path,**wordlist_builder_kwargs)
311
+
312
+ @xpath = "#{@content_xpath}#{TEXT_XPATH}"
313
+ @xpath << "|#{META_TAGS_XPATH}" if @parse_meta_tags
314
+ @xpath << "|#{@content_xpath}#{COMMENT_XPATH}" if @parse_comments
315
+ @xpath << "|#{@content_xpath}#{ALT_TAGS_XPATH}" if @parse_alt_tags
316
+
317
+ begin
318
+ new_agent do |agent|
319
+ if options[:only_paths]
320
+ agent.every_url(&method(:parse_url_path))
321
+ elsif options[:only_query_param_names]
322
+ agent.every_url(&method(:parse_url_query_param_names))
323
+ elsif options[:only_query_param_values]
324
+ agent.every_url(&method(:parse_url_query_param_values))
325
+ else
326
+ agent.every_url(&method(:parse_url_path)) if options[:paths]
327
+
328
+ agent.every_url(&method(:parse_url_query_param_names)) if options[:query_param_names]
329
+ agent.every_url(&method(:parse_url_query_param_values)) if options[:query_param_values]
330
+
331
+ agent.every_ok_page(&method(:parse_page))
332
+ end
333
+ end
334
+ ensure
335
+ @wordlist.close
336
+ end
337
+ end
338
+
339
+ #
340
+ # The wordlist output path.
341
+ #
342
+ # @return [String]
343
+ #
344
+ def wordlist_path
345
+ options.fetch(:output) { infer_wordlist_path }
346
+ end
347
+
348
+ #
349
+ # Generates the wordlist output path based on the `--host`,
350
+ # `--domain`, or `--site` options.
351
+ #
352
+ # @return [String]
353
+ # The generated wordlist output path.
354
+ #
355
+ def infer_wordlist_path
356
+ if options[:host] then "#{options[:host]}.txt"
357
+ elsif options[:domain] then "#{options[:domain]}.txt"
358
+ elsif options[:site]
359
+ uri = URI.parse(options[:site])
360
+
361
+ unless uri.port == uri.default_port
362
+ "#{uri.host}:#{uri.port}.txt"
363
+ else
364
+ "#{uri.host}.txt"
365
+ end
366
+ else
367
+ print_error "must specify --host, --domain, or --site"
368
+ exit(1)
369
+ end
370
+ end
371
+
372
+ # List of command `options` that directly map to the keyword arguments
373
+ # of `Wordlist::Builder.new`.
374
+ WORDLIST_BUILDER_OPTIONS = [
375
+ :format,
376
+ :append,
377
+ :lang,
378
+ :digits,
379
+ :numbers,
380
+ :acronyms,
381
+ :normalize_case,
382
+ :normalize_apostrophes,
383
+ :normalize_acronyms
384
+ ]
385
+
386
+ #
387
+ # Creates a keyword arguments `Hash` of all command `options` that
388
+ # will be directly passed to `Wordlist::Builder.new`
389
+ #
390
+ def wordlist_builder_kwargs
391
+ kwargs = {}
392
+
393
+ WORDLIST_BUILDER_OPTIONS.each do |key|
394
+ kwargs[key] = options[key] if options.has_key?(key)
395
+ end
396
+
397
+ kwargs[:stop_words] = @stop_words unless @stop_words.empty?
398
+ kwargs[:ignore_words] = @ignore_words unless @ignore_words.empty?
399
+ kwargs[:special_chars] = @special_chars unless @special_chars.empty?
400
+
401
+ return kwargs
402
+ end
403
+
404
+ #
405
+ # Parses the URL's directory names of a spidered page and adds them to
406
+ # the wordlist.
407
+ #
408
+ # @param [URI::HTTP] url
409
+ # A spidered URL.
410
+ #
411
+ def parse_url_path(url)
412
+ log_info "Parsing #{url} ..."
413
+
414
+ url.path.split('/').each do |dirname|
415
+ @wordlist.add(dirname) unless dirname.empty?
416
+ end
417
+ end
418
+
419
+ #
420
+ # Parses the URL's query param names of a spidered page and adds them
421
+ # to the wordlist.
422
+ #
423
+ # @param [URI::HTTP] url
424
+ # A spidered URL.
425
+ #
426
+ def parse_url_query_param_names(url)
427
+ unless url.query_params.empty?
428
+ log_info "Parsing query param for #{url} ..."
429
+ @wordlist.append(url.query_params.keys)
430
+ end
431
+ end
432
+
433
+ #
434
+ # Parses the URL's query param values of a spidered page and adds them
435
+ # to the wordlist.
436
+ #
437
+ # @param [URI::HTTP] url
438
+ # A spidered URL.
439
+ #
440
+ def parse_url_query_param_values(url)
441
+ unless url.query_params.empty?
442
+ log_info "Parsing query param values for #{url} ..."
443
+
444
+ url.query_params.each_value do |value|
445
+ @wordlist.add(value)
446
+ end
447
+ end
448
+ end
449
+
450
+ #
451
+ # Parses the spidered page's content and adds the words to the
452
+ # wordlist.
453
+ #
454
+ # @param [Spidr::Page] page
455
+ # A spidered page.
456
+ #
457
+ def parse_page(page)
458
+ if page.html?
459
+ log_info "Parsing HTML on #{page.url} ..."
460
+ parse_html(page)
461
+ end
462
+ end
463
+
464
+ #
465
+ # Parses the spidered page's HTML and adds the words to the
466
+ # wordlist.
467
+ #
468
+ # @param [Spidr::Page] page
469
+ # A spidered page.
470
+ #
471
+ def parse_html(page)
472
+ page.search(@xpath).each do |node|
473
+ text = node.inner_text
474
+ text.strip!
475
+
476
+ @wordlist.parse(text) unless text.empty?
477
+ end
478
+ end
479
+
480
+ end
481
+ end
482
+ end
483
+ end
484
+ end
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # ronin-web - A collection of useful web helper methods and commands.
4
+ #
5
+ # Copyright (c) 2006-2024 Hal Brodigan (postmodern.mod3 at gmail.com)
6
+ #
7
+ # ronin-web is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation, either version 3 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # ronin-web is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ require 'ronin/web/cli/command'
22
+ require 'ronin/support/network/http'
23
+
24
+ require 'nokogiri'
25
+
26
+ module Ronin
27
+ module Web
28
+ class CLI
29
+ module Commands
30
+ #
31
+ # Performs XPath queries on a URL or XML file.
32
+ #
33
+ # ## Usage
34
+ #
35
+ # ronin-web xml [options] {URL | FILE} [XPATH]
36
+ #
37
+ # ## Options
38
+ #
39
+ # -X, --xpath XPATH XPath query
40
+ # -t, --text Prints the inner-text
41
+ # -F, --first Only print the first match
42
+ # -h, --help Print help information
43
+ #
44
+ # ## Arguments
45
+ #
46
+ # URL | FILE The URL or FILE to search
47
+ # XPATH The XPath query
48
+ #
49
+ # @since 2.0.0
50
+ #
51
+ class Xml < Command
52
+
53
+ usage '[options] {URL | FILE} [XPATH]'
54
+
55
+ option :xpath, short: '-X',
56
+ value: {
57
+ type: String,
58
+ usage: 'XPATH'
59
+ },
60
+ desc: 'XPath query' do |xpath|
61
+ @query = xpath
62
+ end
63
+
64
+ option :first, short: '-F',
65
+ desc: 'Only print the first match'
66
+
67
+ option :text, short: '-t',
68
+ desc: 'Prints the elements inner text'
69
+
70
+ argument :source, required: true,
71
+ usage: 'URL | FILE',
72
+ desc: 'The URL or FILE to search'
73
+
74
+ argument :query, required: false,
75
+ usage: 'XPATH',
76
+ desc: 'The XPath query'
77
+
78
+ description 'Performs XPath queries on a URL or HTML file'
79
+
80
+ man_page 'ronin-web-xml.1'
81
+
82
+ # The XPath expression.
83
+ #
84
+ # @return [String, nil]
85
+ attr_reader :query
86
+
87
+ #
88
+ # Runs the `ronin-web xml` command.
89
+ #
90
+ # @param [String] source
91
+ # The `URL` or `FILE` argument.
92
+ #
93
+ # @param [String, nil] query
94
+ # The optional XPath argument.
95
+ #
96
+ def run(source,query=@query)
97
+ unless query
98
+ print_error "must specify --xpath or an XPath argument"
99
+ exit(-1)
100
+ end
101
+
102
+ doc = parse(read(source))
103
+ nodes = if options[:first] then doc.at(query)
104
+ else doc.search(query)
105
+ end
106
+
107
+ if options[:text]
108
+ puts nodes.inner_text
109
+ else
110
+ puts nodes
111
+ end
112
+ end
113
+
114
+ #
115
+ # Reads a URI or file.
116
+ #
117
+ # @param [String] source
118
+ # The URI or file path.
119
+ #
120
+ # @return [File, String]
121
+ # The contents of the URI or file.
122
+ #
123
+ def read(source)
124
+ if source.start_with?('https://') ||
125
+ source.start_with?('http://')
126
+ Support::Network::HTTP.get_body(source)
127
+ else
128
+ File.new(source)
129
+ end
130
+ end
131
+
132
+ #
133
+ # Parses the HTML source code.
134
+ #
135
+ # @param [String] html
136
+ # The raw unparsed XML.
137
+ #
138
+ # @return [Nokogiri::XML::Document]
139
+ # The parsed XML document.
140
+ #
141
+ def parse(html)
142
+ Nokogiri::XML(html)
143
+ end
144
+
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # ronin-web - A collection of useful web helper methods and commands.
4
+ #
5
+ # Copyright (c) 2006-2024 Hal Brodigan (postmodern.mod3 at gmail.com)
6
+ #
7
+ # ronin-web is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation, either version 3 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # ronin-web is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ require 'ronin/core/cli/shell'
22
+
23
+ module Ronin
24
+ module Web
25
+ class CLI
26
+ #
27
+ # Represents a JavaScript shell for a browser.
28
+ #
29
+ class JSShell < Core::CLI::Shell
30
+
31
+ shell_name 'js'
32
+
33
+ # The parent browser.
34
+ #
35
+ # @return [Ronin::Web::Browser::Agent]
36
+ attr_reader :browser
37
+
38
+ #
39
+ # Initializes the JavaScript shell.
40
+ #
41
+ # @param [Ronin::Web::Browser::Agent] browser
42
+ # The browser instance.
43
+ #
44
+ def initialize(browser,**kwargs)
45
+ super(**kwargs)
46
+
47
+ @browser = browser
48
+ end
49
+
50
+ #
51
+ # Evaluates the JavaScript in the current browser page.
52
+ #
53
+ # @param [String] js
54
+ # The JavaScript to evaluate.
55
+ #
56
+ def exec(js)
57
+ value = @browser.eval_js(js)
58
+
59
+ unless value.nil?
60
+ p value
61
+ end
62
+ rescue Ferrum::JavaScriptError => error
63
+ puts error.message
64
+ end
65
+
66
+ end
67
+ end
68
+ end
69
+ end