ronin-web-spider 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +16 -1
- data/.rubocop.yml +11 -0
- data/ChangeLog.md +20 -1
- data/Gemfile +3 -0
- data/README.md +1 -2
- data/Rakefile +2 -2
- data/gemspec.yml +1 -1
- data/lib/ronin/web/spider/agent.rb +63 -7
- data/lib/ronin/web/spider/archive.rb +2 -1
- data/lib/ronin/web/spider/exceptions.rb +1 -0
- data/lib/ronin/web/spider/git_archive.rb +2 -1
- data/lib/ronin/web/spider/version.rb +2 -1
- data/lib/ronin/web/spider.rb +63 -62
- data/ronin-web-spider.gemspec +3 -3
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dab34842325a731e13f23303b1dec66c7ab9d78b4805b982d518ba01024c9352
|
4
|
+
data.tar.gz: '0668f126b3e828c6409cc7b7adef2d74cd527f48de334b10a3d54f3767fe3afd'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d474705a601b7fe27be2a9c5f5e5485ed39b38dec1581db295b0e1ff524c987c9e74522afd24569829bc7e64f6930767104f9bd927a3007a17f627de003492f7
|
7
|
+
data.tar.gz: 397b84308ec62d51e1dba64cff37c75eda8ebd8c2e6a792487468158fb774aae566b8be5cd0388521774d62873e5ba6b751423bc1138ac9e3d47804bdd877a81
|
data/.github/workflows/ruby.yml
CHANGED
@@ -12,11 +12,12 @@ jobs:
|
|
12
12
|
- '3.0'
|
13
13
|
- '3.1'
|
14
14
|
- '3.2'
|
15
|
+
- '3.3'
|
15
16
|
- jruby
|
16
17
|
- truffleruby
|
17
18
|
name: Ruby ${{ matrix.ruby }}
|
18
19
|
steps:
|
19
|
-
- uses: actions/checkout@
|
20
|
+
- uses: actions/checkout@v4
|
20
21
|
- name: Set up Ruby
|
21
22
|
uses: ruby/setup-ruby@v1
|
22
23
|
with:
|
@@ -26,3 +27,17 @@ jobs:
|
|
26
27
|
run: bundle install --jobs 4 --retry 3
|
27
28
|
- name: Run tests
|
28
29
|
run: bundle exec rake test
|
30
|
+
|
31
|
+
# rubocop linting
|
32
|
+
rubocop:
|
33
|
+
runs-on: ubuntu-latest
|
34
|
+
steps:
|
35
|
+
- uses: actions/checkout@v4
|
36
|
+
- name: Set up Ruby
|
37
|
+
uses: ruby/setup-ruby@v1
|
38
|
+
with:
|
39
|
+
ruby-version: 3.0
|
40
|
+
- name: Install dependencies
|
41
|
+
run: bundle install --jobs 4 --retry 3
|
42
|
+
- name: Run rubocop
|
43
|
+
run: bundle exec rubocop --parallel
|
data/.rubocop.yml
ADDED
data/ChangeLog.md
CHANGED
@@ -1,4 +1,22 @@
|
|
1
|
-
### 0.1.
|
1
|
+
### 0.1.1 / 2024-06-19
|
2
|
+
|
3
|
+
* Fixed {Ronin::Web::Spider::Agent#every_html_comment} and
|
4
|
+
{Ronin::Web::Spider::Agent#every_javascript} when the page's `Content-Type`
|
5
|
+
header included `text/html` but lacked a response body, causing `page.doc` to
|
6
|
+
be `nil`.
|
7
|
+
* Fixed a bug in {Ronin::Web::Spider::Agent#every_javascript} where parsed
|
8
|
+
JavaScript source code strings containing UTF-8 characters where being
|
9
|
+
incorrectly encoded as ASCII-8bit strings, if the page's `Content-Type` header
|
10
|
+
did not include a `charset=` attribute.
|
11
|
+
* Fixed a bug in {Ronin::Web::Spider::Agent#every_javascript_string} where
|
12
|
+
inline JavaScript regexes containing the `"` or `'` characters (ex: `/["'=]/`)
|
13
|
+
would incorrectly be treated as the beginning or ends of JavaScript string
|
14
|
+
literals. Note that while this greatly improves the accuracy of
|
15
|
+
{Ronin::Web::Spider::Agent#every_javascript_string}, it still does not
|
16
|
+
support parsing JavaScript template literals that may also contain string
|
17
|
+
literals (ex: ````Hello \"World\"```` or ````Hello ${myFunc("string literal")}````).
|
18
|
+
|
19
|
+
### 0.1.0 / 2023-02-01
|
2
20
|
|
3
21
|
* Extracted and refactored from [ronin-web](https://github.com/ronin-rb/ronin-web/tree/v0.3.0.rc1).
|
4
22
|
* Relicensed as LGPL-3.0.
|
@@ -20,3 +38,4 @@
|
|
20
38
|
* `every_comment` - yields every HTML or JavaScript comment.
|
21
39
|
* Supports archiving spidered pages to a directory or git repository.
|
22
40
|
|
41
|
+
[spidr]: https://github.com/postmodern/spidr#readme
|
data/Gemfile
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
source 'https://rubygems.org'
|
2
3
|
|
3
4
|
gemspec
|
@@ -28,4 +29,6 @@ group :development do
|
|
28
29
|
gem 'dead_end', require: false
|
29
30
|
gem 'sord', require: false, platform: :mri
|
30
31
|
gem 'stackprof', require: false, platform: :mri
|
32
|
+
gem 'rubocop', require: false, platform: :mri
|
33
|
+
gem 'rubocop-ronin', require: false, platform: :mri
|
31
34
|
end
|
data/README.md
CHANGED
@@ -9,7 +9,6 @@
|
|
9
9
|
* [Issues](https://github.com/ronin-rb/ronin-web-spider/issues)
|
10
10
|
* [Documentation](https://ronin-rb.dev/docs/ronin-web-spider/frames)
|
11
11
|
* [Discord](https://discord.gg/6WAb3PsVX9) |
|
12
|
-
[Twitter](https://twitter.com/ronin_rb) |
|
13
12
|
[Mastodon](https://infosec.exchange/@ronin_rb)
|
14
13
|
|
15
14
|
## Description
|
@@ -38,7 +37,7 @@ ronin-web-spider is a collection of common web spidering routines using the
|
|
38
37
|
* [every_comment][docs-every_comment] - yields every HTML or JavaScript
|
39
38
|
comment.
|
40
39
|
* Supports archiving spidered pages to a directory or git repository.
|
41
|
-
* Has
|
40
|
+
* Has 97% documentation coverage.
|
42
41
|
* Has 94% test coverage.
|
43
42
|
|
44
43
|
[docs-every_host]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_host-instance_method
|
data/Rakefile
CHANGED
data/gemspec.yml
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#
|
2
3
|
# ronin-web-spider - A collection of common web spidering routines.
|
3
4
|
#
|
@@ -237,6 +238,8 @@ module Ronin
|
|
237
238
|
#
|
238
239
|
def every_html_comment
|
239
240
|
every_html_page do |page|
|
241
|
+
next unless page.doc
|
242
|
+
|
240
243
|
page.doc.xpath('//comment()').each do |comment|
|
241
244
|
comment_text = comment.inner_text.strip
|
242
245
|
|
@@ -267,20 +270,60 @@ module Ronin
|
|
267
270
|
# yield inner text of every `<script type="text/javascript">` tag
|
268
271
|
# and every `.js` URL.
|
269
272
|
every_html_page do |page|
|
273
|
+
next unless page.doc
|
274
|
+
|
270
275
|
page.doc.xpath('//script[@type="text/javascript"]').each do |script|
|
271
|
-
|
272
|
-
|
276
|
+
source = script.inner_text
|
277
|
+
source.force_encoding(Encoding::UTF_8)
|
278
|
+
|
279
|
+
unless source.empty?
|
280
|
+
yield source
|
273
281
|
end
|
274
282
|
end
|
275
283
|
end
|
276
284
|
|
277
285
|
every_javascript_page do |page|
|
278
|
-
|
286
|
+
source = page.body
|
287
|
+
source.force_encoding(Encoding::UTF_8)
|
288
|
+
|
289
|
+
yield source
|
279
290
|
end
|
280
291
|
end
|
281
292
|
|
282
293
|
alias every_js every_javascript
|
283
294
|
|
295
|
+
# Regex to match and skip JavaScript inline regexes.
|
296
|
+
#
|
297
|
+
# @api private
|
298
|
+
#
|
299
|
+
# @since 0.1.1
|
300
|
+
JAVASCRIPT_INLINE_REGEX = %r{
|
301
|
+
(?# match before the regex to avoid matching division operators )
|
302
|
+
(?:[\{\[\(;:,]\s*|=\s*)
|
303
|
+
/
|
304
|
+
(?# inline regex contents )
|
305
|
+
(?:
|
306
|
+
\[ (?:\\. | [^\]]) \] (?# [...] ) |
|
307
|
+
\\. (?# backslash escaped characters ) |
|
308
|
+
[^/] (?# everything else )
|
309
|
+
)+
|
310
|
+
/[dgimsuvy]* (?# also match any regex flags )
|
311
|
+
}mx
|
312
|
+
|
313
|
+
# Regex to match and skip JavaScript template literals.
|
314
|
+
#
|
315
|
+
# @note
|
316
|
+
# This regex will not properly match nested template literals:
|
317
|
+
#
|
318
|
+
# ```javascript
|
319
|
+
# `foo ${`bar ${1+1}`}`
|
320
|
+
# ```
|
321
|
+
#
|
322
|
+
# @api private
|
323
|
+
#
|
324
|
+
# @since 0.1.1
|
325
|
+
JAVASCRIPT_TEMPLATE_LITERAL = /`(?:\\`|[^`])+`/m
|
326
|
+
|
284
327
|
#
|
285
328
|
# Passes every JavaScript string value to the given block.
|
286
329
|
#
|
@@ -293,15 +336,28 @@ module Ronin
|
|
293
336
|
#
|
294
337
|
# @example
|
295
338
|
# spider.every_javascript_string do |str|
|
296
|
-
#
|
297
|
-
#
|
339
|
+
# puts str
|
340
|
+
# end
|
298
341
|
#
|
299
342
|
# @api public
|
300
343
|
#
|
301
344
|
def every_javascript_string
|
302
345
|
every_javascript do |js|
|
303
|
-
|
304
|
-
|
346
|
+
scanner = StringScanner.new(js)
|
347
|
+
|
348
|
+
until scanner.eos?
|
349
|
+
# NOTE: this is a naive JavaScript string scanner and should
|
350
|
+
# eventually be replaced with a real JavaScript lexer or parser.
|
351
|
+
case scanner.peek(1)
|
352
|
+
when '"', "'" # beginning of a quoted string
|
353
|
+
js_string = scanner.scan(Support::Text::Patterns::STRING)
|
354
|
+
|
355
|
+
yield Support::Encoding::JS.unquote(js_string)
|
356
|
+
else
|
357
|
+
scanner.skip(JAVASCRIPT_INLINE_REGEX) ||
|
358
|
+
scanner.skip(JAVASCRIPT_TEMPLATE_LITERAL) ||
|
359
|
+
scanner.getch
|
360
|
+
end
|
305
361
|
end
|
306
362
|
end
|
307
363
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#
|
2
3
|
# ronin-web-spider - A collection of common web spidering routines.
|
3
4
|
#
|
@@ -31,7 +32,7 @@ module Ronin
|
|
31
32
|
#
|
32
33
|
# require 'ronin/web/spider'
|
33
34
|
# require 'ronin/web/spider/archive'
|
34
|
-
#
|
35
|
+
#
|
35
36
|
# Ronin::Web::Spider::Archive.open('path/to/root') do |archive|
|
36
37
|
# Ronin::Web::Spider.every_page(host: 'example.com') do |page|
|
37
38
|
# archive.write(page.url,page.body)
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#
|
2
3
|
# ronin-web-spider - A collection of common web spidering routines.
|
3
4
|
#
|
@@ -33,7 +34,7 @@ module Ronin
|
|
33
34
|
# require 'ronin/web/spider'
|
34
35
|
# require 'ronin/web/spider/git_archive'
|
35
36
|
# require 'date'
|
36
|
-
#
|
37
|
+
#
|
37
38
|
# Ronin::Web::Spider::GitArchive.open('path/to/root') do |archive|
|
38
39
|
# archive.commit("Updated #{Date.today}") do
|
39
40
|
# Ronin::Web::Spider.every_page(host: 'example.com') do |page|
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#
|
2
3
|
# ronin-web-spider - A collection of common web spidering routines.
|
3
4
|
#
|
@@ -21,7 +22,7 @@ module Ronin
|
|
21
22
|
module Web
|
22
23
|
module Spider
|
23
24
|
# ronin-web-spider version
|
24
|
-
VERSION = '0.1.
|
25
|
+
VERSION = '0.1.1'
|
25
26
|
end
|
26
27
|
end
|
27
28
|
end
|
data/lib/ronin/web/spider.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#
|
2
3
|
# ronin-web-spider - A collection of common web spidering routines.
|
3
4
|
#
|
@@ -30,136 +31,136 @@ module Ronin
|
|
30
31
|
# ## Examples
|
31
32
|
#
|
32
33
|
# Spider a host:
|
33
|
-
#
|
34
|
+
#
|
34
35
|
# ```ruby
|
35
36
|
# require 'ronin/web/spider'
|
36
|
-
#
|
37
|
+
#
|
37
38
|
# Ronin::Web::Spider.start_at('http://tenderlovemaking.com/') do |agent|
|
38
39
|
# # ...
|
39
40
|
# end
|
40
41
|
# ```
|
41
|
-
#
|
42
|
+
#
|
42
43
|
# Spider a host:
|
43
|
-
#
|
44
|
+
#
|
44
45
|
# ```ruby
|
45
46
|
# Ronin::Web::Spider.host('solnic.eu') do |agent|
|
46
47
|
# # ...
|
47
48
|
# end
|
48
49
|
# ```
|
49
|
-
#
|
50
|
+
#
|
50
51
|
# Spider a domain (and any sub-domains):
|
51
|
-
#
|
52
|
+
#
|
52
53
|
# ```ruby
|
53
54
|
# Ronin::Web::Spider.domain('ruby-lang.org') do |agent|
|
54
55
|
# # ...
|
55
56
|
# end
|
56
57
|
# ```
|
57
|
-
#
|
58
|
+
#
|
58
59
|
# Spider a site:
|
59
|
-
#
|
60
|
+
#
|
60
61
|
# ```ruby
|
61
62
|
# Ronin::Web::Spider.site('http://www.rubyflow.com/') do |agent|
|
62
63
|
# # ...
|
63
64
|
# end
|
64
65
|
# ```
|
65
|
-
#
|
66
|
+
#
|
66
67
|
# Spider multiple hosts:
|
67
|
-
#
|
68
|
+
#
|
68
69
|
# ```ruby
|
69
70
|
# Ronin::Web::Spider.start_at('http://company.com/', hosts: ['company.com', /host[\d]+\.company\.com/]) do |agent|
|
70
71
|
# # ...
|
71
72
|
# end
|
72
73
|
# ```
|
73
|
-
#
|
74
|
+
#
|
74
75
|
# Do not spider certain links:
|
75
|
-
#
|
76
|
+
#
|
76
77
|
# ```ruby
|
77
78
|
# Ronin::Web::Spider.site('http://company.com/', ignore_links: [%{^/blog/}]) do |agent|
|
78
79
|
# # ...
|
79
80
|
# end
|
80
81
|
# ```
|
81
|
-
#
|
82
|
+
#
|
82
83
|
# Do not spider links on certain ports:
|
83
|
-
#
|
84
|
+
#
|
84
85
|
# ```ruby
|
85
86
|
# Ronin::Web::Spider.site('http://company.com/', ignore_ports: [8000, 8010, 8080]) do |agent|
|
86
87
|
# # ...
|
87
88
|
# end
|
88
89
|
# ```
|
89
|
-
#
|
90
|
+
#
|
90
91
|
# Do not spider links blacklisted in robots.txt:
|
91
|
-
#
|
92
|
+
#
|
92
93
|
# ```ruby
|
93
94
|
# Ronin::Web::Spider.site('http://company.com/', robots: true) do |agent|
|
94
95
|
# # ...
|
95
96
|
# end
|
96
97
|
# ```
|
97
|
-
#
|
98
|
+
#
|
98
99
|
# Print out visited URLs:
|
99
|
-
#
|
100
|
+
#
|
100
101
|
# ```ruby
|
101
102
|
# Ronin::Web::Spider.site('http://www.rubyinside.com/') do |spider|
|
102
103
|
# spider.every_url { |url| puts url }
|
103
104
|
# end
|
104
105
|
# ```
|
105
|
-
#
|
106
|
+
#
|
106
107
|
# Build a URL map of a site:
|
107
|
-
#
|
108
|
+
#
|
108
109
|
# ```ruby
|
109
110
|
# url_map = Hash.new { |hash,key| hash[key] = [] }
|
110
|
-
#
|
111
|
+
#
|
111
112
|
# Ronin::Web::Spider.site('http://intranet.com/') do |spider|
|
112
113
|
# spider.every_link do |origin,dest|
|
113
114
|
# url_map[dest] << origin
|
114
115
|
# end
|
115
116
|
# end
|
116
117
|
# ```
|
117
|
-
#
|
118
|
+
#
|
118
119
|
# Print out the URLs that could not be requested:
|
119
|
-
#
|
120
|
+
#
|
120
121
|
# ```ruby
|
121
122
|
# Ronin::Web::Spider.site('http://company.com/') do |spider|
|
122
123
|
# spider.every_failed_url { |url| puts url }
|
123
124
|
# end
|
124
125
|
# ```
|
125
|
-
#
|
126
|
+
#
|
126
127
|
# Finds all pages which have broken links:
|
127
|
-
#
|
128
|
+
#
|
128
129
|
# ```ruby
|
129
130
|
# url_map = Hash.new { |hash,key| hash[key] = [] }
|
130
|
-
#
|
131
|
+
#
|
131
132
|
# spider = Ronin::Web::Spider.site('http://intranet.com/') do |spider|
|
132
133
|
# spider.every_link do |origin,dest|
|
133
134
|
# url_map[dest] << origin
|
134
135
|
# end
|
135
136
|
# end
|
136
|
-
#
|
137
|
+
#
|
137
138
|
# spider.failures.each do |url|
|
138
139
|
# puts "Broken link #{url} found in:"
|
139
|
-
#
|
140
|
+
#
|
140
141
|
# url_map[url].each { |page| puts " #{page}" }
|
141
142
|
# end
|
142
143
|
# ```
|
143
|
-
#
|
144
|
+
#
|
144
145
|
# Search HTML and XML pages:
|
145
|
-
#
|
146
|
+
#
|
146
147
|
# ```ruby
|
147
148
|
# Ronin::Web::Spider.site('http://company.com/') do |spider|
|
148
149
|
# spider.every_page do |page|
|
149
150
|
# puts ">>> #{page.url}"
|
150
|
-
#
|
151
|
+
#
|
151
152
|
# page.search('//meta').each do |meta|
|
152
153
|
# name = (meta.attributes['name'] || meta.attributes['http-equiv'])
|
153
154
|
# value = meta.attributes['content']
|
154
|
-
#
|
155
|
+
#
|
155
156
|
# puts " #{name} = #{value}"
|
156
157
|
# end
|
157
158
|
# end
|
158
159
|
# end
|
159
160
|
# ```
|
160
|
-
#
|
161
|
+
#
|
161
162
|
# Print out the titles from every page:
|
162
|
-
#
|
163
|
+
#
|
163
164
|
# ```ruby
|
164
165
|
# Ronin::Web::Spider.site('https://www.ruby-lang.org/') do |spider|
|
165
166
|
# spider.every_html_page do |page|
|
@@ -167,9 +168,9 @@ module Ronin
|
|
167
168
|
# end
|
168
169
|
# end
|
169
170
|
# ```
|
170
|
-
#
|
171
|
+
#
|
171
172
|
# Print out every HTTP redirect:
|
172
|
-
#
|
173
|
+
#
|
173
174
|
# ```ruby
|
174
175
|
# Ronin::Web::Spider.host('company.com') do |spider|
|
175
176
|
# spider.every_redirect_page do |page|
|
@@ -177,21 +178,21 @@ module Ronin
|
|
177
178
|
# end
|
178
179
|
# end
|
179
180
|
# ```
|
180
|
-
#
|
181
|
+
#
|
181
182
|
# Find what kinds of web servers a host is using, by accessing the headers:
|
182
|
-
#
|
183
|
+
#
|
183
184
|
# ```ruby
|
184
185
|
# servers = Set[]
|
185
|
-
#
|
186
|
+
#
|
186
187
|
# Ronin::Web::Spider.host('company.com') do |spider|
|
187
188
|
# spider.all_headers do |headers|
|
188
189
|
# servers << headers['server']
|
189
190
|
# end
|
190
191
|
# end
|
191
192
|
# ```
|
192
|
-
#
|
193
|
+
#
|
193
194
|
# Pause the spider on a forbidden page:
|
194
|
-
#
|
195
|
+
#
|
195
196
|
# ```ruby
|
196
197
|
# Ronin::Web::Spider.host('company.com') do |spider|
|
197
198
|
# spider.every_forbidden_page do |page|
|
@@ -199,9 +200,9 @@ module Ronin
|
|
199
200
|
# end
|
200
201
|
# end
|
201
202
|
# ```
|
202
|
-
#
|
203
|
+
#
|
203
204
|
# Skip the processing of a page:
|
204
|
-
#
|
205
|
+
#
|
205
206
|
# ```ruby
|
206
207
|
# Ronin::Web::Spider.host('company.com') do |spider|
|
207
208
|
# spider.every_missing_page do |page|
|
@@ -209,9 +210,9 @@ module Ronin
|
|
209
210
|
# end
|
210
211
|
# end
|
211
212
|
# ```
|
212
|
-
#
|
213
|
+
#
|
213
214
|
# Skip the processing of links:
|
214
|
-
#
|
215
|
+
#
|
215
216
|
# ```ruby
|
216
217
|
# Ronin::Web::Spider.host('company.com') do |spider|
|
217
218
|
# spider.every_url do |url|
|
@@ -221,9 +222,9 @@ module Ronin
|
|
221
222
|
# end
|
222
223
|
# end
|
223
224
|
# ```
|
224
|
-
#
|
225
|
+
#
|
225
226
|
# Detect when a new host name is spidered:
|
226
|
-
#
|
227
|
+
#
|
227
228
|
# ```ruby
|
228
229
|
# Ronin::Web::Spider.domain('example.com') do |spider|
|
229
230
|
# spider.every_host do |host|
|
@@ -231,9 +232,9 @@ module Ronin
|
|
231
232
|
# end
|
232
233
|
# end
|
233
234
|
# ```
|
234
|
-
#
|
235
|
+
#
|
235
236
|
# Detect when a new SSL/TLS certificate is encountered:
|
236
|
-
#
|
237
|
+
#
|
237
238
|
# ```ruby
|
238
239
|
# Ronin::Web::Spider.domain('example.com') do |spider|
|
239
240
|
# spider.every_cert do |cert|
|
@@ -241,9 +242,9 @@ module Ronin
|
|
241
242
|
# end
|
242
243
|
# end
|
243
244
|
# ```
|
244
|
-
#
|
245
|
+
#
|
245
246
|
# Print the MD5 checksum of every `favicon.ico` file:
|
246
|
-
#
|
247
|
+
#
|
247
248
|
# ```ruby
|
248
249
|
# Ronin::Web::Spider.domain('example.com') do |spider|
|
249
250
|
# spider.every_favicon do |page|
|
@@ -251,9 +252,9 @@ module Ronin
|
|
251
252
|
# end
|
252
253
|
# end
|
253
254
|
# ```
|
254
|
-
#
|
255
|
+
#
|
255
256
|
# Print every HTML comment:
|
256
|
-
#
|
257
|
+
#
|
257
258
|
# ```ruby
|
258
259
|
# Ronin::Web::Spider.domain('example.com') do |spider|
|
259
260
|
# spider.every_html_comment do |comment|
|
@@ -261,9 +262,9 @@ module Ronin
|
|
261
262
|
# end
|
262
263
|
# end
|
263
264
|
# ```
|
264
|
-
#
|
265
|
+
#
|
265
266
|
# Print all JavaScript source code:
|
266
|
-
#
|
267
|
+
#
|
267
268
|
# ```ruby
|
268
269
|
# Ronin::Web::Spider.domain('example.com') do |spider|
|
269
270
|
# spider.every_javascript do |js|
|
@@ -271,9 +272,9 @@ module Ronin
|
|
271
272
|
# end
|
272
273
|
# end
|
273
274
|
# ```
|
274
|
-
#
|
275
|
+
#
|
275
276
|
# Print every JavaScript string literal:
|
276
|
-
#
|
277
|
+
#
|
277
278
|
# ```ruby
|
278
279
|
# Ronin::Web::Spider.domain('example.com') do |spider|
|
279
280
|
# spider.every_javascript_string do |str|
|
@@ -281,9 +282,9 @@ module Ronin
|
|
281
282
|
# end
|
282
283
|
# end
|
283
284
|
# ```
|
284
|
-
#
|
285
|
+
#
|
285
286
|
# Print every JavaScript comment:
|
286
|
-
#
|
287
|
+
#
|
287
288
|
# ```ruby
|
288
289
|
# Ronin::Web::Spider.domain('example.com') do |spider|
|
289
290
|
# spider.every_javascript_comment do |comment|
|
@@ -291,9 +292,9 @@ module Ronin
|
|
291
292
|
# end
|
292
293
|
# end
|
293
294
|
# ```
|
294
|
-
#
|
295
|
+
#
|
295
296
|
# Print every HTML and JavaScript comment:
|
296
|
-
#
|
297
|
+
#
|
297
298
|
# ```ruby
|
298
299
|
# Ronin::Web::Spider.domain('example.com') do |spider|
|
299
300
|
# spider.every_comment do |comment|
|
@@ -301,7 +302,7 @@ module Ronin
|
|
301
302
|
# end
|
302
303
|
# end
|
303
304
|
# ```
|
304
|
-
#
|
305
|
+
#
|
305
306
|
module Spider
|
306
307
|
#
|
307
308
|
# Creates a new agent and begin spidering at the given URL.
|
data/ronin-web-spider.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'yaml'
|
4
4
|
|
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
|
|
22
22
|
gem.homepage = gemspec['homepage']
|
23
23
|
gem.metadata = gemspec['metadata'] if gemspec['metadata']
|
24
24
|
|
25
|
-
glob =
|
25
|
+
glob = ->(patterns) { gem.files & Dir[*patterns] }
|
26
26
|
|
27
27
|
gem.files = `git ls-files`.split($/)
|
28
28
|
gem.files = glob[gemspec['files']] if gemspec['files']
|
@@ -46,7 +46,7 @@ Gem::Specification.new do |gem|
|
|
46
46
|
gem.required_rubygems_version = gemspec['required_rubygems_version']
|
47
47
|
gem.post_install_message = gemspec['post_install_message']
|
48
48
|
|
49
|
-
split =
|
49
|
+
split = ->(string) { string.split(/,\s*/) }
|
50
50
|
|
51
51
|
if gemspec['dependencies']
|
52
52
|
gemspec['dependencies'].each do |name,versions|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ronin-web-spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Postmodern
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spidr
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- ".github/workflows/ruby.yml"
|
67
67
|
- ".gitignore"
|
68
68
|
- ".rspec"
|
69
|
+
- ".rubocop.yml"
|
69
70
|
- ".ruby-version"
|
70
71
|
- ".yardopts"
|
71
72
|
- COPYING.txt
|
@@ -105,8 +106,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
105
106
|
- !ruby/object:Gem::Version
|
106
107
|
version: '0'
|
107
108
|
requirements: []
|
108
|
-
rubygems_version: 3.3.
|
109
|
+
rubygems_version: 3.3.27
|
109
110
|
signing_key:
|
110
111
|
specification_version: 4
|
111
|
-
summary: collection of common web spidering routines
|
112
|
+
summary: A collection of common web spidering routines.
|
112
113
|
test_files: []
|