ronin-web 1.0.2 → 2.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +3 -2
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +5 -0
  5. data/ChangeLog.md +46 -1
  6. data/Gemfile +25 -12
  7. data/README.md +257 -51
  8. data/Rakefile +9 -0
  9. data/data/completions/ronin-web +203 -0
  10. data/gemspec.yml +18 -5
  11. data/lib/ronin/web/cli/browser_options.rb +92 -0
  12. data/lib/ronin/web/cli/browser_shell.rb +448 -0
  13. data/lib/ronin/web/cli/command.rb +1 -1
  14. data/lib/ronin/web/cli/commands/browser.rb +373 -0
  15. data/lib/ronin/web/cli/commands/completion.rb +63 -0
  16. data/lib/ronin/web/cli/commands/diff.rb +60 -8
  17. data/lib/ronin/web/cli/commands/html.rb +21 -33
  18. data/lib/ronin/web/cli/commands/irb.rb +1 -1
  19. data/lib/ronin/web/cli/commands/new/{webapp.rb → app.rb} +8 -8
  20. data/lib/ronin/web/cli/commands/new/nokogiri.rb +4 -4
  21. data/lib/ronin/web/cli/commands/new/server.rb +1 -1
  22. data/lib/ronin/web/cli/commands/new/spider.rb +1 -1
  23. data/lib/ronin/web/cli/commands/new.rb +5 -3
  24. data/lib/ronin/web/cli/commands/reverse_proxy.rb +1 -1
  25. data/lib/ronin/web/cli/commands/screenshot.rb +186 -0
  26. data/lib/ronin/web/cli/commands/server.rb +1 -1
  27. data/lib/ronin/web/cli/commands/session_cookie.rb +265 -0
  28. data/lib/ronin/web/cli/commands/spider.rb +61 -467
  29. data/lib/ronin/web/cli/commands/user_agent.rb +177 -0
  30. data/lib/ronin/web/cli/commands/vulns.rb +463 -0
  31. data/lib/ronin/web/cli/commands/wordlist.rb +484 -0
  32. data/lib/ronin/web/cli/commands/xml.rb +149 -0
  33. data/lib/ronin/web/cli/js_shell.rb +69 -0
  34. data/lib/ronin/web/cli/ruby_shell.rb +1 -1
  35. data/lib/ronin/web/cli/spider_options.rb +919 -0
  36. data/lib/ronin/web/cli.rb +3 -1
  37. data/lib/ronin/web/html.rb +1 -1
  38. data/lib/ronin/web/root.rb +1 -1
  39. data/lib/ronin/web/version.rb +2 -2
  40. data/lib/ronin/web/xml.rb +1 -1
  41. data/lib/ronin/web.rb +4 -364
  42. data/man/ronin-web-browser.1 +92 -0
  43. data/man/ronin-web-browser.1.md +96 -0
  44. data/man/ronin-web-completion.1 +76 -0
  45. data/man/ronin-web-completion.1.md +78 -0
  46. data/man/ronin-web-diff.1 +14 -21
  47. data/man/ronin-web-diff.1.md +13 -6
  48. data/man/ronin-web-html.1 +30 -46
  49. data/man/ronin-web-html.1.md +27 -17
  50. data/man/ronin-web-irb.1 +9 -16
  51. data/man/ronin-web-irb.1.md +6 -2
  52. data/man/ronin-web-new-app.1.md +39 -0
  53. data/man/ronin-web-new-nokogiri.1 +9 -20
  54. data/man/ronin-web-new-nokogiri.1.md +5 -5
  55. data/man/ronin-web-new-server.1 +11 -23
  56. data/man/ronin-web-new-server.1.md +5 -5
  57. data/man/ronin-web-new-spider.1 +44 -88
  58. data/man/ronin-web-new-spider.1.md +37 -37
  59. data/man/ronin-web-new.1 +18 -30
  60. data/man/ronin-web-new.1.md +15 -11
  61. data/man/ronin-web-reverse-proxy.1 +33 -38
  62. data/man/ronin-web-reverse-proxy.1.md +20 -14
  63. data/man/ronin-web-screenshot.1 +56 -0
  64. data/man/ronin-web-screenshot.1.md +56 -0
  65. data/man/ronin-web-server.1 +15 -29
  66. data/man/ronin-web-server.1.md +13 -9
  67. data/man/ronin-web-session-cookie.1 +38 -0
  68. data/man/ronin-web-session-cookie.1.md +41 -0
  69. data/man/ronin-web-spider.1 +121 -130
  70. data/man/ronin-web-spider.1.md +115 -66
  71. data/man/ronin-web-user-agent.1 +44 -0
  72. data/man/ronin-web-user-agent.1.md +46 -0
  73. data/man/ronin-web-vulns.1 +175 -0
  74. data/man/ronin-web-vulns.1.md +177 -0
  75. data/man/ronin-web-wordlist.1 +258 -0
  76. data/man/ronin-web-wordlist.1.md +263 -0
  77. data/man/ronin-web-xml.1 +43 -0
  78. data/man/ronin-web-xml.1.md +46 -0
  79. data/man/ronin-web.1 +67 -18
  80. data/man/ronin-web.1.md +55 -4
  81. data/scripts/setup +58 -0
  82. metadata +122 -31
  83. data/lib/ronin/web/mechanize.rb +0 -84
  84. data/man/ronin-web-new-webapp.1.md +0 -39
  85. /data/data/new/{webapp → app}/.gitignore +0 -0
  86. /data/data/new/{webapp → app}/.ruby-version.erb +0 -0
  87. /data/data/new/{webapp → app}/Dockerfile.erb +0 -0
  88. /data/data/new/{webapp → app}/Gemfile +0 -0
  89. /data/data/new/{webapp → app}/app.rb.erb +0 -0
  90. /data/data/new/{webapp → app}/config.ru +0 -0
  91. /data/data/new/{webapp → app}/docker-compose.yml.erb +0 -0
@@ -0,0 +1,484 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # ronin-web - A collection of useful web helper methods and commands.
4
+ #
5
+ # Copyright (c) 2006-2024 Hal Brodigan (postmodern.mod3 at gmail.com)
6
+ #
7
+ # ronin-web is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation, either version 3 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # ronin-web is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ require 'ronin/web/cli/command'
22
+ require 'ronin/web/cli/spider_options'
23
+ require 'ronin/core/cli/logging'
24
+
25
+ require 'wordlist/builder'
26
+ require 'nokogiri'
27
+
28
+ module Ronin
29
+ module Web
30
+ class CLI
31
+ module Commands
32
+ #
33
+ # Builds a wordlist by spidering a website.
34
+ #
35
+ # ## Usage
36
+ #
37
+ # ronin-web wordlist [options] {--host HOST | --domain DOMAIN | --site URL}
38
+ #
39
+ # ## Options
40
+ #
41
+ # --open-timeout SECS Sets the connection open timeout
42
+ # --read-timeout SECS Sets the read timeout
43
+ # --ssl-timeout SECS Sets the SSL connection timeout
44
+ # --continue-timeout SECS Sets the continue timeout
45
+ # --keep-alive-timeout SECS Sets the connection keep alive timeout
46
+ # -P, --proxy PROXY Sets the proxy to use
47
+ # -H, --header NAME: VALUE Sets a default header
48
+ # --host-header NAME=VALUE Sets a default header
49
+ # -u chrome-linux|chrome-macos|chrome-windows|chrome-iphone|chrome-ipad|chrome-android|firefox-linux|firefox-macos|firefox-windows|firefox-iphone|firefox-ipad|firefox-android|safari-macos|safari-iphone|safari-ipad|edge,
50
+ # --user-agent The User-Agent to use
51
+ # -U, --user-agent-string STRING The User-Agent string to use
52
+ # -R, --referer URL Sets the Referer URL
53
+ # --delay SECS Sets the delay in seconds between each request
54
+ # -l, --limit COUNT Only spiders up to COUNT pages
55
+ # -d, --max-depth DEPTH Only spiders up to max depth
56
+ # --enqueue URL Adds the URL to the queue
57
+ # --visited URL Marks the URL as previously visited
58
+ # --strip-fragments Enables/disables stripping the fragment component of every URL
59
+ # --strip-query Enables/disables stripping the query component of every URL
60
+ # --visit-host HOST Visit URLs with the matching host name
61
+ # --visit-hosts-like /REGEX/ Visit URLs with hostnames that match the REGEX
62
+ # --ignore-host HOST Ignore the host name
63
+ # --ignore-hosts-like /REGEX/ Ignore the host names matching the REGEX
64
+ # --visit-port PORT Visit URLs with the matching port number
65
+ # --visit-ports-like /REGEX/ Visit URLs with port numbers that match the REGEX
66
+ # --ignore-port PORT Ignore the port number
67
+ # --ignore-ports-like /REGEX/ Ignore the port numbers matching the REGEXP
68
+ # --visit-link URL Visit the URL
69
+ # --visit-links-like /REGEX/ Visit URLs that match the REGEX
70
+ # --ignore-link URL Ignore the URL
71
+ # --ignore-links-like /REGEX/ Ignore URLs matching the REGEX
72
+ # --visit-ext FILE_EXT Visit URLs with the matching file ext
73
+ # --visit-exts-like /REGEX/ Visit URLs with file exts that match the REGEX
74
+ # --ignore-ext FILE_EXT Ignore the URLs with the file ext
75
+ # --ignore-exts-like /REGEX/ Ignore URLs with file exts matching the REGEX
76
+ # -r, --robots Specifies whether to honor robots.txt
77
+ # --host HOST Spiders the specific HOST
78
+ # --domain DOMAIN Spiders the whole domain
79
+ # --site URL Spiders the website, starting at the URL
80
+ # -o, --output PATH The wordlist to write to
81
+ # -X, --content-xpath XPATH The XPath for the content (Default: //body)
82
+ # -C, --content-css-path XPATH The XPath for the content
83
+ # --meta-tags Parse certain meta-tags (Default: enabled)
84
+ # --no-meta-tags Ignore meta-tags
85
+ # --alt-tags Parse alt-tags on images (Default: enabled)
86
+ # --no-alt-tags Also parse alt-tags on images
87
+ # --paths Also parse URL paths
88
+ # --query-params-names Also parse URL query param names
89
+ # --query-param-values Also parse URL query param values
90
+ # --only-paths Only build a wordlist based on the paths
91
+ # --only-query-param Only build a wordlist based on the query param names
92
+ # --only-query-param-values Only build a wordlist based on the query param values
93
+ # -f, --format txt|gz|bzip2|xz Specifies the format of the wordlist file
94
+ # -A, --append Append new words to the wordlist file intead of overwriting the file
95
+ # -L, --lang LANG The language of the text to parse
96
+ # --stop-word WORD A stop-word to ignore
97
+ # --only-query-param-values Only build a wordlist based on the query param values
98
+ # -f, --format txt|gz|bzip2|xz Specifies the format of the wordlist file
99
+ # -A, --append Append new words to the wordlist file intead of overwriting the file
100
+ # -L, --lang LANG The language of the text to parse
101
+ # --stop-word WORD A stop-word to ignore
102
+ # --ignore-word WORD Ignores the word
103
+ # --digits Accepts words containing digits (Default: enabled)
104
+ # --no-digits Ignores words containing digits
105
+ # --special-char CHAR Allows a special character within a word (Default: _, -, ')
106
+ # --numbers Accepts numbers as words (Default: disabled)
107
+ # --no-numbers Ignores numbers
108
+ # --acronyms Treats acronyms as words (Default: enabled)
109
+ # --no-acronyms Ignores acronyms
110
+ # --normalize-case Converts all words to lowercase
111
+ # --no-normalize-case Preserve the case of words and letters (Default: enabled)
112
+ # --normalize-apostrophes Removes apostrophes from words
113
+ # --no-normalize-apostrophes Preserve apostrophes from words (Default: enabled)
114
+ # --normalize-acronyms Removes '.' characters from acronyms
115
+ # --no-normalize-acronyms Preserve '.' characters in acronyms (Default: enabled)
116
+ # -h, --help Print help information
117
+ #
118
+ class Wordlist < Command
119
+
120
+ include Core::CLI::Logging
121
+ include SpiderOptions
122
+
123
+ option :output, short: '-o',
124
+ value: {
125
+ type: String,
126
+ usage: 'PATH'
127
+ },
128
+ desc: 'The wordlist to write to'
129
+
130
+ option :content_xpath, short: '-X',
131
+ value: {
132
+ type: String,
133
+ usage: 'XPATH'
134
+ },
135
+ desc: 'The XPath for the content. (Default: //body)' do |xpath|
136
+ @content_xpath = xpath
137
+ end
138
+
139
+ option :content_css_path, short: '-C',
140
+ value: {
141
+ type: String,
142
+ usage: 'CSS-path'
143
+ },
144
+ desc: 'The XPath for the content' do |css_path|
145
+ @content_xpath = Nokogiri::CSS.xpath_for(css_path).first
146
+ end
147
+
148
+ option :meta_tags, desc: 'Parse certain meta-tags (Default: enabled)' do
149
+ @parse_meta_tags = true
150
+ end
151
+ option :no_meta_tags, desc: 'Ignore meta-tags' do
152
+ @parse_meta_tags = false
153
+ end
154
+
155
+ option :comments, desc: 'Parse HTML comments (Default: enabled)' do
156
+ @parse_comments = true
157
+ end
158
+ option :no_comments, desc: 'Ignore HTML comments' do
159
+ @parse_comments = false
160
+ end
161
+
162
+ option :alt_tags, desc: 'Parse alt-tags on images (Default: enabled)' do
163
+ @parse_alt_tags = true
164
+ end
165
+ option :no_alt_tags, desc: 'Ignore alt-tags on images' do
166
+ @parse_alt_tags = false
167
+ end
168
+
169
+ option :paths, desc: 'Also parse URL paths'
170
+ option :query_params_names, desc: 'Also parse URL query param names'
171
+ option :query_param_values, desc: 'Also parse URL query param values'
172
+
173
+ option :only_paths, desc: 'Only build a wordlist based on the paths'
174
+ option :only_query_param, desc: 'Only build a wordlist based on the query param names'
175
+ option :only_query_param_values, desc: 'Only build a wordlist based on the query param values'
176
+
177
+ option :format, short: '-f',
178
+ value: {
179
+ type: [:txt, :gz, :bzip2, :xz]
180
+ },
181
+ desc: 'Specifies the format of the wordlist file'
182
+
183
+ option :append, short: '-A',
184
+ desc: 'Append new words to the wordlist file intead of overwriting the file'
185
+
186
+ option :lang, short: '-L',
187
+ value: {
188
+ type: String,
189
+ usage: 'LANG'
190
+ },
191
+ desc: 'The language of the text to parse' do |lang|
192
+ options[:lang] = lang.to_sym
193
+ end
194
+
195
+ option :stop_word, value: {
196
+ type: String,
197
+ usage: 'WORD'
198
+ },
199
+ desc: 'A stop-word to ignore' do |word|
200
+ @stop_words << word
201
+ end
202
+
203
+ option :ignore_word, value: {
204
+ type: String,
205
+ usage: 'WORD'
206
+ },
207
+ desc: 'Ignores the word' do |word|
208
+ @ignore_words << word
209
+ end
210
+
211
+ option :digits, desc: 'Accepts words containing digits (Default: enabled)'
212
+ option :no_digits, desc: 'Ignores words containing digits' do
213
+ options[:digits] = false
214
+ end
215
+
216
+ option :special_char, value: {
217
+ type: String,
218
+ usage: 'CHAR'
219
+ },
220
+ desc: 'Allows a special character within a word (Default: _, -, \')' do |char|
221
+ @special_chars << char
222
+ end
223
+
224
+ option :numbers, desc: 'Accepts numbers as words (Default: disabled)'
225
+ option :no_numbers, desc: 'Ignores numbers' do
226
+ options[:numbers] = false
227
+ end
228
+
229
+ option :acronyms, desc: 'Treats acronyms as words (Default: enabled)'
230
+ option :no_acronyms, desc: 'Ignores acronyms' do
231
+ options[:acronyms] = false
232
+ end
233
+
234
+ option :normalize_case, desc: 'Converts all words to lowercase'
235
+ option :no_normalize_case, desc: 'Preserve the case of words and letters (Default: enabled)' do
236
+ options[:normalize_case] = false
237
+ end
238
+
239
+ option :normalize_apostrophes, desc: 'Removes apostrophes from words'
240
+ option :no_normalize_apostrophes, desc: 'Preserve apostrophes from words (Default: enabled)' do
241
+ options[:normalize_apostrophes] = false
242
+ end
243
+
244
+ option :normalize_acronyms, desc: "Removes '.' characters from acronyms"
245
+ option :no_normalize_acronyms, desc: "Preserve '.' characters in acronyms (Default: enabled)" do
246
+ options[:no_normalize_acronyms] = false
247
+ end
248
+
249
+ description "Builds a wordlist by spidering a website"
250
+
251
+ man_page 'ronin-web-wordlist.1'
252
+
253
+ # The XPath or CSS-path for the page's content.
254
+ #
255
+ # @return [String]
256
+ attr_reader :content_xpath
257
+
258
+ # List of stop-words to ignore.
259
+ #
260
+ # @return [Array<String>]
261
+ attr_reader :stop_words
262
+
263
+ # List of words to ignore.
264
+ #
265
+ # @return [Array<String>]
266
+ attr_reader :ignore_words
267
+
268
+ # The list of special characters to allow in words.
269
+ #
270
+ # @return [Array<String>]
271
+ attr_reader :special_chars
272
+
273
+ #
274
+ # Initializes the `ronin-web wordlist` command.
275
+ #
276
+ # @param [Hash{Symbol => Object}] kwargs
277
+ # Additional keyword arguments for the command.
278
+ #
279
+ def initialize(**kwargs)
280
+ super(**kwargs)
281
+
282
+ @content_xpath = nil
283
+
284
+ @parse_meta_tags = true
285
+ @parse_comments = true
286
+ @parse_alt_tags = true
287
+
288
+ @stop_words = []
289
+ @ignore_words = []
290
+ @special_chars = []
291
+ end
292
+
293
+ # XPath to find `description` and `keywords` `meta`-tags.
294
+ META_TAGS_XPATH = '/head/meta[@name="description" or @name="keywords"]/@content'
295
+
296
+ # XPath to find all text elements.
297
+ TEXT_XPATH = '//text()[not (ancestor-or-self::script or ancestor-or-self::style)]'
298
+
299
+ # XPath to find all HTML comments.
300
+ COMMENT_XPATH = '//comment()'
301
+
302
+ # XPath which finds all image `alt`-tags, SVG `desc` elements, and `a`
303
+ # `title` attributes.
304
+ ALT_TAGS_XPATH = '//img/@alt|//area/@alt|//input/@alt|//a/@title'
305
+
306
+ #
307
+ # Runs the `ronin-web wordlist` command.
308
+ #
309
+ def run
310
+ @wordlist = ::Wordlist::Builder.new(wordlist_path,**wordlist_builder_kwargs)
311
+
312
+ @xpath = "#{@content_xpath}#{TEXT_XPATH}"
313
+ @xpath << "|#{META_TAGS_XPATH}" if @parse_meta_tags
314
+ @xpath << "|#{@content_xpath}#{COMMENT_XPATH}" if @parse_comments
315
+ @xpath << "|#{@content_xpath}#{ALT_TAGS_XPATH}" if @parse_alt_tags
316
+
317
+ begin
318
+ new_agent do |agent|
319
+ if options[:only_paths]
320
+ agent.every_url(&method(:parse_url_path))
321
+ elsif options[:only_query_param_names]
322
+ agent.every_url(&method(:parse_url_query_param_names))
323
+ elsif options[:only_query_param_values]
324
+ agent.every_url(&method(:parse_url_query_param_values))
325
+ else
326
+ agent.every_url(&method(:parse_url_path)) if options[:paths]
327
+
328
+ agent.every_url(&method(:parse_url_query_param_names)) if options[:query_param_names]
329
+ agent.every_url(&method(:parse_url_query_param_values)) if options[:query_param_values]
330
+
331
+ agent.every_ok_page(&method(:parse_page))
332
+ end
333
+ end
334
+ ensure
335
+ @wordlist.close
336
+ end
337
+ end
338
+
339
+ #
340
+ # The wordlist output path.
341
+ #
342
+ # @return [String]
343
+ #
344
+ def wordlist_path
345
+ options.fetch(:output) { infer_wordlist_path }
346
+ end
347
+
348
+ #
349
+ # Generates the wordlist output path based on the `--host`,
350
+ # `--domain`, or `--site` options.
351
+ #
352
+ # @return [String]
353
+ # The generated wordlist output path.
354
+ #
355
+ def infer_wordlist_path
356
+ if options[:host] then "#{options[:host]}.txt"
357
+ elsif options[:domain] then "#{options[:domain]}.txt"
358
+ elsif options[:site]
359
+ uri = URI.parse(options[:site])
360
+
361
+ unless uri.port == uri.default_port
362
+ "#{uri.host}:#{uri.port}.txt"
363
+ else
364
+ "#{uri.host}.txt"
365
+ end
366
+ else
367
+ print_error "must specify --host, --domain, or --site"
368
+ exit(1)
369
+ end
370
+ end
371
+
372
+ # List of command `options` that directly map to the keyword arguments
373
+ # of `Wordlist::Builder.new`.
374
+ WORDLIST_BUILDER_OPTIONS = [
375
+ :format,
376
+ :append,
377
+ :lang,
378
+ :digits,
379
+ :numbers,
380
+ :acronyms,
381
+ :normalize_case,
382
+ :normalize_apostrophes,
383
+ :normalize_acronyms
384
+ ]
385
+
386
+ #
387
+ # Creates a keyword arguments `Hash` of all command `options` that
388
+ # will be directly passed to `Wordlist::Builder.new`
389
+ #
390
+ def wordlist_builder_kwargs
391
+ kwargs = {}
392
+
393
+ WORDLIST_BUILDER_OPTIONS.each do |key|
394
+ kwargs[key] = options[key] if options.has_key?(key)
395
+ end
396
+
397
+ kwargs[:stop_words] = @stop_words unless @stop_words.empty?
398
+ kwargs[:ignore_words] = @ignore_words unless @ignore_words.empty?
399
+ kwargs[:special_chars] = @special_chars unless @special_chars.empty?
400
+
401
+ return kwargs
402
+ end
403
+
404
+ #
405
+ # Parses the URL's directory names of a spidered page and adds them to
406
+ # the wordlist.
407
+ #
408
+ # @param [URI::HTTP] url
409
+ # A spidered URL.
410
+ #
411
+ def parse_url_path(url)
412
+ log_info "Parsing #{url} ..."
413
+
414
+ url.path.split('/').each do |dirname|
415
+ @wordlist.add(dirname) unless dirname.empty?
416
+ end
417
+ end
418
+
419
+ #
420
+ # Parses the URL's query param names of a spidered page and adds them
421
+ # to the wordlist.
422
+ #
423
+ # @param [URI::HTTP] url
424
+ # A spidered URL.
425
+ #
426
+ def parse_url_query_param_names(url)
427
+ unless url.query_params.empty?
428
+ log_info "Parsing query param for #{url} ..."
429
+ @wordlist.append(url.query_params.keys)
430
+ end
431
+ end
432
+
433
+ #
434
+ # Parses the URL's query param values of a spidered page and adds them
435
+ # to the wordlist.
436
+ #
437
+ # @param [URI::HTTP] url
438
+ # A spidered URL.
439
+ #
440
+ def parse_url_query_param_values(url)
441
+ unless url.query_params.empty?
442
+ log_info "Parsing query param values for #{url} ..."
443
+
444
+ url.query_params.each_value do |value|
445
+ @wordlist.add(value)
446
+ end
447
+ end
448
+ end
449
+
450
+ #
451
+ # Parses the spidered page's content and adds the words to the
452
+ # wordlist.
453
+ #
454
+ # @param [Spidr::Page] page
455
+ # A spidered page.
456
+ #
457
+ def parse_page(page)
458
+ if page.html?
459
+ log_info "Parsing HTML on #{page.url} ..."
460
+ parse_html(page)
461
+ end
462
+ end
463
+
464
+ #
465
+ # Parses the spidered page's HTML and adds the words to the
466
+ # wordlist.
467
+ #
468
+ # @param [Spidr::Page] page
469
+ # A spidered page.
470
+ #
471
+ def parse_html(page)
472
+ page.search(@xpath).each do |node|
473
+ text = node.inner_text
474
+ text.strip!
475
+
476
+ @wordlist.parse(text) unless text.empty?
477
+ end
478
+ end
479
+
480
+ end
481
+ end
482
+ end
483
+ end
484
+ end
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # ronin-web - A collection of useful web helper methods and commands.
4
+ #
5
+ # Copyright (c) 2006-2024 Hal Brodigan (postmodern.mod3 at gmail.com)
6
+ #
7
+ # ronin-web is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation, either version 3 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # ronin-web is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ require 'ronin/web/cli/command'
22
+ require 'ronin/support/network/http'
23
+
24
+ require 'nokogiri'
25
+
26
+ module Ronin
27
+ module Web
28
+ class CLI
29
+ module Commands
30
+ #
31
+ # Performs XPath queries on a URL or XML file.
32
+ #
33
+ # ## Usage
34
+ #
35
+ # ronin-web xml [options] {URL | FILE} [XPATH]
36
+ #
37
+ # ## Options
38
+ #
39
+ # -X, --xpath XPATH XPath query
40
+ # -t, --text Prints the inner-text
41
+ # -F, --first Only print the first match
42
+ # -h, --help Print help information
43
+ #
44
+ # ## Arguments
45
+ #
46
+ # URL | FILE The URL or FILE to search
47
+ # XPATH The XPath query
48
+ #
49
+ # @since 2.0.0
50
+ #
51
+ class Xml < Command
52
+
53
+ usage '[options] {URL | FILE} [XPATH]'
54
+
55
+ option :xpath, short: '-X',
56
+ value: {
57
+ type: String,
58
+ usage: 'XPATH'
59
+ },
60
+ desc: 'XPath query' do |xpath|
61
+ @query = xpath
62
+ end
63
+
64
+ option :first, short: '-F',
65
+ desc: 'Only print the first match'
66
+
67
+ option :text, short: '-t',
68
+ desc: 'Prints the elements inner text'
69
+
70
+ argument :source, required: true,
71
+ usage: 'URL | FILE',
72
+ desc: 'The URL or FILE to search'
73
+
74
+ argument :query, required: false,
75
+ usage: 'XPATH',
76
+ desc: 'The XPath query'
77
+
78
+ description 'Performs XPath queries on a URL or HTML file'
79
+
80
+ man_page 'ronin-web-xml.1'
81
+
82
+ # The XPath expression.
83
+ #
84
+ # @return [String, nil]
85
+ attr_reader :query
86
+
87
+ #
88
+ # Runs the `ronin-web xml` command.
89
+ #
90
+ # @param [String] source
91
+ # The `URL` or `FILE` argument.
92
+ #
93
+ # @param [String, nil] query
94
+ # The optional XPath argument.
95
+ #
96
+ def run(source,query=@query)
97
+ unless query
98
+ print_error "must specify --xpath or an XPath argument"
99
+ exit(-1)
100
+ end
101
+
102
+ doc = parse(read(source))
103
+ nodes = if options[:first] then doc.at(query)
104
+ else doc.search(query)
105
+ end
106
+
107
+ if options[:text]
108
+ puts nodes.inner_text
109
+ else
110
+ puts nodes
111
+ end
112
+ end
113
+
114
+ #
115
+ # Reads a URI or file.
116
+ #
117
+ # @param [String] source
118
+ # The URI or file path.
119
+ #
120
+ # @return [File, String]
121
+ # The contents of the URI or file.
122
+ #
123
+ def read(source)
124
+ if source.start_with?('https://') ||
125
+ source.start_with?('http://')
126
+ Support::Network::HTTP.get_body(source)
127
+ else
128
+ File.new(source)
129
+ end
130
+ end
131
+
132
+ #
133
+ # Parses the HTML source code.
134
+ #
135
+ # @param [String] html
136
+ # The raw unparsed XML.
137
+ #
138
+ # @return [Nokogiri::XML::Document]
139
+ # The parsed XML document.
140
+ #
141
+ def parse(html)
142
+ Nokogiri::XML(html)
143
+ end
144
+
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # ronin-web - A collection of useful web helper methods and commands.
4
+ #
5
+ # Copyright (c) 2006-2024 Hal Brodigan (postmodern.mod3 at gmail.com)
6
+ #
7
+ # ronin-web is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation, either version 3 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # ronin-web is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ require 'ronin/core/cli/shell'
22
+
23
+ module Ronin
24
+ module Web
25
+ class CLI
26
+ #
27
+ # Represents a JavaScript shell for a browser.
28
+ #
29
+ class JSShell < Core::CLI::Shell
30
+
31
+ shell_name 'js'
32
+
33
+ # The parent browser.
34
+ #
35
+ # @return [Ronin::Web::Browser::Agent]
36
+ attr_reader :browser
37
+
38
+ #
39
+ # Initializes the JavaScript shell.
40
+ #
41
+ # @param [Ronin::Web::Browser::Agent] browser
42
+ # The browser instance.
43
+ #
44
+ def initialize(browser,**kwargs)
45
+ super(**kwargs)
46
+
47
+ @browser = browser
48
+ end
49
+
50
+ #
51
+ # Evaluates the JavaScript in the current browser page.
52
+ #
53
+ # @param [String] js
54
+ # The JavaScript to evaluate.
55
+ #
56
+ def exec(js)
57
+ value = @browser.eval_js(js)
58
+
59
+ unless value.nil?
60
+ p value
61
+ end
62
+ rescue Ferrum::JavaScriptError => error
63
+ puts error.message
64
+ end
65
+
66
+ end
67
+ end
68
+ end
69
+ end