ronin-web-spider 0.1.0.beta2 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +17 -5
- data/.rubocop.yml +11 -0
- data/.yardopts +1 -1
- data/ChangeLog.md +23 -1
- data/Gemfile +3 -0
- data/README.md +303 -32
- data/Rakefile +2 -2
- data/gemspec.yml +4 -4
- data/lib/ronin/web/spider/agent.rb +123 -7
- data/lib/ronin/web/spider/archive.rb +4 -0
- data/lib/ronin/web/spider/exceptions.rb +2 -1
- data/lib/ronin/web/spider/git_archive.rb +3 -2
- data/lib/ronin/web/spider/version.rb +3 -2
- data/lib/ronin/web/spider.rb +290 -1
- data/ronin-web-spider.gemspec +5 -4
- metadata +10 -19
- data/spec/agent_spec.rb +0 -585
- data/spec/archive_spec.rb +0 -91
- data/spec/example_app.rb +0 -27
- data/spec/git_archive_spec.rb +0 -137
- data/spec/spec_helper.rb +0 -4
- data/spec/spider_spec.rb +0 -252
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ronin-web-spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Postmodern
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spidr
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.0
|
33
|
+
version: '1.0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 1.0
|
40
|
+
version: '1.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- ".github/workflows/ruby.yml"
|
67
67
|
- ".gitignore"
|
68
68
|
- ".rspec"
|
69
|
+
- ".rubocop.yml"
|
69
70
|
- ".ruby-version"
|
70
71
|
- ".yardopts"
|
71
72
|
- COPYING.txt
|
@@ -81,20 +82,14 @@ files:
|
|
81
82
|
- lib/ronin/web/spider/git_archive.rb
|
82
83
|
- lib/ronin/web/spider/version.rb
|
83
84
|
- ronin-web-spider.gemspec
|
84
|
-
- spec/agent_spec.rb
|
85
|
-
- spec/archive_spec.rb
|
86
|
-
- spec/example_app.rb
|
87
|
-
- spec/git_archive_spec.rb
|
88
|
-
- spec/spec_helper.rb
|
89
|
-
- spec/spider_spec.rb
|
90
85
|
homepage: https://ronin-rb.dev/
|
91
86
|
licenses:
|
92
87
|
- LGPL-3.0
|
93
88
|
metadata:
|
94
|
-
documentation_uri: https://
|
89
|
+
documentation_uri: https://ronin-rb.dev/docs/ronin-web-spider
|
95
90
|
source_code_uri: https://github.com/ronin-rb/ronin-web-spider
|
96
91
|
bug_tracker_uri: https://github.com/ronin-rb/ronin-web-spider/issues
|
97
|
-
changelog_uri: https://github.com/ronin-rb/ronin-web-spider/blob/
|
92
|
+
changelog_uri: https://github.com/ronin-rb/ronin-web-spider/blob/main/ChangeLog.md
|
98
93
|
rubygems_mfa_required: 'true'
|
99
94
|
post_install_message:
|
100
95
|
rdoc_options: []
|
@@ -111,12 +106,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
106
|
- !ruby/object:Gem::Version
|
112
107
|
version: '0'
|
113
108
|
requirements: []
|
114
|
-
rubygems_version: 3.3.
|
109
|
+
rubygems_version: 3.3.27
|
115
110
|
signing_key:
|
116
111
|
specification_version: 4
|
117
|
-
summary: collection of common web spidering routines
|
118
|
-
test_files:
|
119
|
-
- spec/agent_spec.rb
|
120
|
-
- spec/archive_spec.rb
|
121
|
-
- spec/git_archive_spec.rb
|
122
|
-
- spec/spider_spec.rb
|
112
|
+
summary: A collection of common web spidering routines.
|
113
|
+
test_files: []
|
data/spec/agent_spec.rb
DELETED
@@ -1,585 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require 'ronin/web/spider/agent'
|
3
|
-
|
4
|
-
require 'webmock/rspec'
|
5
|
-
require 'sinatra/base'
|
6
|
-
|
7
|
-
describe Ronin::Web::Spider::Agent do
|
8
|
-
describe "#initialize" do
|
9
|
-
context "when Ronin::Support::Network::HTTP.proxy is set" do
|
10
|
-
let(:proxy_host) { 'example.com' }
|
11
|
-
let(:proxy_port) { 8080 }
|
12
|
-
let(:proxy_uri) { URI::HTTP.build(host: proxy_host, port: proxy_port) }
|
13
|
-
|
14
|
-
before { Ronin::Support::Network::HTTP.proxy = proxy_uri }
|
15
|
-
|
16
|
-
it "must parse ENV['RONIN_HTTP_USER_AGENT'] and set #proxy" do
|
17
|
-
expect(subject.proxy).to be_kind_of(Spidr::Proxy)
|
18
|
-
expect(subject.proxy.host).to eq(proxy_host)
|
19
|
-
expect(subject.proxy.port).to eq(proxy_port)
|
20
|
-
end
|
21
|
-
|
22
|
-
after { Ronin::Support::Network::HTTP.proxy = nil }
|
23
|
-
end
|
24
|
-
|
25
|
-
context "when Ronin::Support::Network::HTTP.user_agent is set" do
|
26
|
-
let(:user_agent) { 'Foo Bar' }
|
27
|
-
|
28
|
-
before { Ronin::Support::Network::HTTP.user_agent = user_agent }
|
29
|
-
|
30
|
-
it "must default #user_agent to ENV['RONIN_HTTP_USER_AGENT']" do
|
31
|
-
expect(subject.user_agent).to eq(user_agent)
|
32
|
-
end
|
33
|
-
|
34
|
-
after { Ronin::Support::Network::HTTP.user_agent = nil }
|
35
|
-
end
|
36
|
-
|
37
|
-
context "when given the proxy: keyword argument" do
|
38
|
-
let(:proxy_host) { 'example.com' }
|
39
|
-
let(:proxy_port) { 8080 }
|
40
|
-
|
41
|
-
context "and it's an Addressable::URI" do
|
42
|
-
let(:proxy) { Addressable::URI.new(host: proxy_host, port: proxy_port) }
|
43
|
-
|
44
|
-
subject { described_class.new(proxy: proxy) }
|
45
|
-
|
46
|
-
it "must convert it to a Spidr::Proxy object" do
|
47
|
-
expect(subject.proxy).to be_kind_of(Spidr::Proxy)
|
48
|
-
expect(subject.proxy.host).to eq(proxy_host)
|
49
|
-
expect(subject.proxy.port).to eq(proxy_port)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
context "and it's an URI::HTTP" do
|
54
|
-
let(:proxy) { URI::HTTP.build(host: proxy_host, port: proxy_port) }
|
55
|
-
|
56
|
-
subject { described_class.new(proxy: proxy) }
|
57
|
-
|
58
|
-
it "must convert it to a Spidr::Proxy object" do
|
59
|
-
expect(subject.proxy).to be_kind_of(Spidr::Proxy)
|
60
|
-
expect(subject.proxy.host).to eq(proxy_host)
|
61
|
-
expect(subject.proxy.port).to eq(proxy_port)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
context "and it's a Hash" do
|
66
|
-
let(:proxy) do
|
67
|
-
{host: proxy_host, port: proxy_port}
|
68
|
-
end
|
69
|
-
|
70
|
-
subject { described_class.new(proxy: proxy) }
|
71
|
-
|
72
|
-
it "must convert it to a Spidr::Proxy object" do
|
73
|
-
expect(subject.proxy).to be_kind_of(Spidr::Proxy)
|
74
|
-
expect(subject.proxy.host).to eq(proxy_host)
|
75
|
-
expect(subject.proxy.port).to eq(proxy_port)
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
context "and it's a String" do
|
80
|
-
let(:proxy) { "http://#{proxy_host}:#{proxy_port}" }
|
81
|
-
|
82
|
-
subject { described_class.new(proxy: proxy) }
|
83
|
-
|
84
|
-
it "must convert it to a Spidr::Proxy object" do
|
85
|
-
expect(subject.proxy).to be_kind_of(Spidr::Proxy)
|
86
|
-
expect(subject.proxy.host).to eq(proxy_host)
|
87
|
-
expect(subject.proxy.port).to eq(proxy_port)
|
88
|
-
end
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
context "when given the user_agent: keyword argument" do
|
93
|
-
context "and it's a String" do
|
94
|
-
let(:user_agent) { "test user-agent" }
|
95
|
-
|
96
|
-
subject { described_class.new(user_agent: user_agent) }
|
97
|
-
|
98
|
-
it "must set the #user_agent" do
|
99
|
-
expect(subject.user_agent).to eq(user_agent)
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
context "and it's a Symbol" do
|
104
|
-
let(:user_agent) { :chrome_linux }
|
105
|
-
let(:expected_user_agent) do
|
106
|
-
Ronin::Support::Network::HTTP::UserAgents[user_agent]
|
107
|
-
end
|
108
|
-
|
109
|
-
subject { described_class.new(user_agent: user_agent) }
|
110
|
-
|
111
|
-
it "must map the Symbol to one of Ronin::Support::Network::HTTP::UserAgents" do
|
112
|
-
expect(subject.user_agent).to eq(expected_user_agent)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
it "must default #visited_hosts to nil" do
|
118
|
-
expect(subject.visited_hosts).to be(nil)
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
describe "#every_host" do
|
123
|
-
module TestAgentEveryHost
|
124
|
-
class Host1 < Sinatra::Base
|
125
|
-
|
126
|
-
set :host, 'host1.example.com'
|
127
|
-
set :port, 80
|
128
|
-
|
129
|
-
get '/' do
|
130
|
-
<<~HTML
|
131
|
-
<html>
|
132
|
-
<body>
|
133
|
-
<a href="/link1">link1</a>
|
134
|
-
<a href="http://host2.example.com/offsite-link">offsite link</a>
|
135
|
-
<a href="/link2">link2</a>
|
136
|
-
</body>
|
137
|
-
</html>
|
138
|
-
HTML
|
139
|
-
end
|
140
|
-
|
141
|
-
get '/link1' do
|
142
|
-
'<html><body>got here</body></html>'
|
143
|
-
end
|
144
|
-
|
145
|
-
get '/link2' do
|
146
|
-
'<html><body>got here</body></html>'
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
class Host2 < Sinatra::Base
|
151
|
-
|
152
|
-
set :host, 'host2.example.com'
|
153
|
-
set :port, 80
|
154
|
-
|
155
|
-
get '/offsite-link' do
|
156
|
-
'<html><body>should not get here</body></html>'
|
157
|
-
end
|
158
|
-
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
let(:host1) { 'host1.example.com' }
|
163
|
-
let(:host2) { 'host2.example.com' }
|
164
|
-
|
165
|
-
let(:host1_app) { TestAgentEveryHost::Host1 }
|
166
|
-
let(:host2_app) { TestAgentEveryHost::Host2 }
|
167
|
-
|
168
|
-
before do
|
169
|
-
stub_request(:any, /#{Regexp.escape(host1)}/).to_rack(host1_app)
|
170
|
-
stub_request(:any, /#{Regexp.escape(host2)}/).to_rack(host2_app)
|
171
|
-
end
|
172
|
-
|
173
|
-
it "must yield every newly discovered hostname while spidering" do
|
174
|
-
yielded_hosts = []
|
175
|
-
|
176
|
-
subject.every_host do |host|
|
177
|
-
yielded_hosts << host
|
178
|
-
end
|
179
|
-
|
180
|
-
subject.start_at("http://#{host1}/")
|
181
|
-
|
182
|
-
expect(yielded_hosts).to eq([host1, host2])
|
183
|
-
end
|
184
|
-
|
185
|
-
it "must popualte #visited_hosts" do
|
186
|
-
subject.every_host { |host| }
|
187
|
-
subject.start_at("http://#{host1}/")
|
188
|
-
|
189
|
-
expect(subject.visited_hosts).to be_kind_of(Set)
|
190
|
-
expect(subject.visited_hosts.entries).to eq([host1, host2])
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
# TODO: need to figure out how to test #every_cert using webmock.
|
195
|
-
describe "#every_cert"
|
196
|
-
|
197
|
-
describe "#every_favicon" do
|
198
|
-
module TestAgentEveryHost
|
199
|
-
class TestApp < Sinatra::Base
|
200
|
-
|
201
|
-
set :host, 'example.com'
|
202
|
-
set :port, 80
|
203
|
-
|
204
|
-
get '/' do
|
205
|
-
<<~HTML
|
206
|
-
<html>
|
207
|
-
<head>
|
208
|
-
<link rel="favicon" href="/favicon1.ico" type="image/x-icon"/>
|
209
|
-
</head>
|
210
|
-
<body>
|
211
|
-
<a href="/link1">link1</a>
|
212
|
-
<a href="http://host2.example.com/offsite-link">offsite link</a>
|
213
|
-
<a href="/link2">link2</a>
|
214
|
-
</body>
|
215
|
-
</html>
|
216
|
-
HTML
|
217
|
-
end
|
218
|
-
|
219
|
-
get '/favicon1.ico' do
|
220
|
-
content_type 'image/x-icon'
|
221
|
-
|
222
|
-
"favicon1"
|
223
|
-
end
|
224
|
-
|
225
|
-
get '/favicon2.ico' do
|
226
|
-
content_type 'image/vnd.microsoft.icon'
|
227
|
-
|
228
|
-
"favicon2"
|
229
|
-
end
|
230
|
-
|
231
|
-
get '/link1' do
|
232
|
-
'<html><body>got here</body></html>'
|
233
|
-
end
|
234
|
-
|
235
|
-
get '/link2' do
|
236
|
-
<<~HTML
|
237
|
-
<html>
|
238
|
-
<head>
|
239
|
-
<link rel="favicon" href="/favicon2.ico" type="image/x-icon"/>
|
240
|
-
</head>
|
241
|
-
<body>got here</body>
|
242
|
-
</html>
|
243
|
-
HTML
|
244
|
-
end
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
let(:host) { 'example.com' }
|
249
|
-
|
250
|
-
let(:test_app) { TestAgentEveryHost::TestApp }
|
251
|
-
|
252
|
-
before do
|
253
|
-
stub_request(:any, /#{Regexp.escape(host)}/).to_rack(test_app)
|
254
|
-
end
|
255
|
-
|
256
|
-
it "must yield Spidr::Page objects for each encountered .ico file" do
|
257
|
-
yielded_favicons = []
|
258
|
-
|
259
|
-
subject.every_favicon do |favicon|
|
260
|
-
yielded_favicons << favicon
|
261
|
-
end
|
262
|
-
|
263
|
-
subject.start_at("http://#{host}/")
|
264
|
-
|
265
|
-
expect(yielded_favicons).to_not be_empty
|
266
|
-
|
267
|
-
expect(yielded_favicons[0]).to be_kind_of(Spidr::Page)
|
268
|
-
expect(yielded_favicons[0].content_type).to eq('image/x-icon')
|
269
|
-
expect(yielded_favicons[0].url).to eq(URI("http://#{host}/favicon1.ico"))
|
270
|
-
|
271
|
-
expect(yielded_favicons[1]).to be_kind_of(Spidr::Page)
|
272
|
-
expect(yielded_favicons[1].content_type).to eq('image/vnd.microsoft.icon')
|
273
|
-
expect(yielded_favicons[1].url).to eq(URI("http://#{host}/favicon2.ico"))
|
274
|
-
end
|
275
|
-
end
|
276
|
-
|
277
|
-
describe "#every_html_comment" do
|
278
|
-
module TestAgentEveryHTMLComment
|
279
|
-
class TestApp < Sinatra::Base
|
280
|
-
|
281
|
-
set :host, 'example.com'
|
282
|
-
set :port, 80
|
283
|
-
|
284
|
-
get '/' do
|
285
|
-
<<~HTML
|
286
|
-
<html>
|
287
|
-
<head>
|
288
|
-
<!-- comment 1 -->
|
289
|
-
</head>
|
290
|
-
<!-- -->
|
291
|
-
<body>
|
292
|
-
<!-- comment 2 -->
|
293
|
-
</body>
|
294
|
-
</html>
|
295
|
-
HTML
|
296
|
-
end
|
297
|
-
end
|
298
|
-
end
|
299
|
-
|
300
|
-
let(:host) { 'example.com' }
|
301
|
-
|
302
|
-
let(:test_app) { TestAgentEveryHTMLComment::TestApp }
|
303
|
-
|
304
|
-
before do
|
305
|
-
stub_request(:any, /#{Regexp.escape(host)}/).to_rack(test_app)
|
306
|
-
end
|
307
|
-
|
308
|
-
it "must yield every non-empty/non-whitespace HTML comment String" do
|
309
|
-
yielded_comments = []
|
310
|
-
|
311
|
-
subject.every_html_comment do |comment|
|
312
|
-
yielded_comments << comment
|
313
|
-
end
|
314
|
-
|
315
|
-
subject.start_at("http://#{host}/")
|
316
|
-
|
317
|
-
expect(yielded_comments).to match_array(
|
318
|
-
[
|
319
|
-
'comment 1',
|
320
|
-
'comment 2'
|
321
|
-
]
|
322
|
-
)
|
323
|
-
end
|
324
|
-
end
|
325
|
-
|
326
|
-
describe "#every_javascript" do
|
327
|
-
module TestAgentEveryJavaScript
|
328
|
-
class TestApp < Sinatra::Base
|
329
|
-
|
330
|
-
set :host, 'example.com'
|
331
|
-
set :port, 80
|
332
|
-
|
333
|
-
get '/' do
|
334
|
-
<<~HTML
|
335
|
-
<html>
|
336
|
-
<head>
|
337
|
-
<script type="text/javascript" src="/javascript1.js"></script>
|
338
|
-
<script type="text/javascript">javascript2</script>
|
339
|
-
</head>
|
340
|
-
<body>
|
341
|
-
<a href="/link1">link1</a>
|
342
|
-
<a href="http://host2.example.com/offsite-link">offsite link</a>
|
343
|
-
<a href="/link2">link2</a>
|
344
|
-
</body>
|
345
|
-
</html>
|
346
|
-
HTML
|
347
|
-
end
|
348
|
-
|
349
|
-
get '/javascript1.js' do
|
350
|
-
content_type 'text/javascript'
|
351
|
-
"javascript1"
|
352
|
-
end
|
353
|
-
end
|
354
|
-
end
|
355
|
-
|
356
|
-
let(:host) { 'example.com' }
|
357
|
-
|
358
|
-
let(:test_app) { TestAgentEveryJavaScript::TestApp }
|
359
|
-
|
360
|
-
before do
|
361
|
-
stub_request(:any, /#{Regexp.escape(host)}/).to_rack(test_app)
|
362
|
-
end
|
363
|
-
|
364
|
-
it "must yield both the contents of .js files and inline <script> tags" do
|
365
|
-
yielded_javascripts = []
|
366
|
-
|
367
|
-
subject.every_javascript do |js|
|
368
|
-
yielded_javascripts << js
|
369
|
-
end
|
370
|
-
|
371
|
-
subject.start_at("http://#{host}/")
|
372
|
-
|
373
|
-
expect(yielded_javascripts).to match_array(%w[javascript1 javascript2])
|
374
|
-
end
|
375
|
-
end
|
376
|
-
|
377
|
-
describe "#every_javascript_string" do
|
378
|
-
module TestAgentEveryJavaScriptString
|
379
|
-
class TestApp < Sinatra::Base
|
380
|
-
|
381
|
-
set :host, 'example.com'
|
382
|
-
set :port, 80
|
383
|
-
|
384
|
-
get '/' do
|
385
|
-
<<~HTML
|
386
|
-
<html>
|
387
|
-
<head>
|
388
|
-
<script type="text/javascript" src="/javascript1.js"></script>
|
389
|
-
<script type="text/javascript">
|
390
|
-
var str3 = "string #3";
|
391
|
-
var str4 = 'string #4';
|
392
|
-
</script>
|
393
|
-
</head>
|
394
|
-
<body>
|
395
|
-
<a href="/link1">link1</a>
|
396
|
-
<a href="http://host2.example.com/offsite-link">offsite link</a>
|
397
|
-
<a href="/link2">link2</a>
|
398
|
-
</body>
|
399
|
-
</html>
|
400
|
-
HTML
|
401
|
-
end
|
402
|
-
|
403
|
-
get '/javascript1.js' do
|
404
|
-
content_type 'text/javascript'
|
405
|
-
<<~JS
|
406
|
-
var str1 = "string #1";
|
407
|
-
var str2 = 'string #2';
|
408
|
-
JS
|
409
|
-
end
|
410
|
-
end
|
411
|
-
end
|
412
|
-
|
413
|
-
let(:host) { 'example.com' }
|
414
|
-
|
415
|
-
let(:test_app) { TestAgentEveryJavaScriptString::TestApp }
|
416
|
-
|
417
|
-
before do
|
418
|
-
stub_request(:any, /#{Regexp.escape(host)}/).to_rack(test_app)
|
419
|
-
end
|
420
|
-
|
421
|
-
it "must yield every JavaScript string from any <script> tag" do
|
422
|
-
yielded_javascript_strings = []
|
423
|
-
|
424
|
-
subject.every_javascript_string do |string|
|
425
|
-
yielded_javascript_strings << string
|
426
|
-
end
|
427
|
-
|
428
|
-
subject.start_at("http://#{host}/")
|
429
|
-
|
430
|
-
expect(yielded_javascript_strings).to match_array(
|
431
|
-
[
|
432
|
-
'string #1',
|
433
|
-
'string #2',
|
434
|
-
'string #3',
|
435
|
-
'string #4'
|
436
|
-
]
|
437
|
-
)
|
438
|
-
end
|
439
|
-
end
|
440
|
-
|
441
|
-
describe "#every_javascript_comment" do
|
442
|
-
module TestAgentEveryJavaScriptComment
|
443
|
-
class TestApp < Sinatra::Base
|
444
|
-
|
445
|
-
set :host, 'example.com'
|
446
|
-
set :port, 80
|
447
|
-
|
448
|
-
get '/' do
|
449
|
-
<<~HTML
|
450
|
-
<html>
|
451
|
-
<head>
|
452
|
-
<script type="text/javascript" src="/javascript1.js"></script>
|
453
|
-
<script type="text/javascript">
|
454
|
-
// comment 3
|
455
|
-
var str3 = "string #3";
|
456
|
-
/*
|
457
|
-
comment 4
|
458
|
-
*/
|
459
|
-
var str4 = 'string #4';
|
460
|
-
</script>
|
461
|
-
</head>
|
462
|
-
<body>
|
463
|
-
<a href="/link1">link1</a>
|
464
|
-
<a href="http://host2.example.com/offsite-link">offsite link</a>
|
465
|
-
<a href="/link2">link2</a>
|
466
|
-
</body>
|
467
|
-
</html>
|
468
|
-
HTML
|
469
|
-
end
|
470
|
-
|
471
|
-
get '/javascript1.js' do
|
472
|
-
content_type 'text/javascript'
|
473
|
-
<<~JS
|
474
|
-
// comment 1
|
475
|
-
var str1 = "string #1";
|
476
|
-
/* comment 2 */
|
477
|
-
var str2 = 'string #2';
|
478
|
-
JS
|
479
|
-
end
|
480
|
-
end
|
481
|
-
end
|
482
|
-
|
483
|
-
let(:host) { 'example.com' }
|
484
|
-
|
485
|
-
let(:test_app) { TestAgentEveryJavaScriptComment::TestApp }
|
486
|
-
|
487
|
-
before do
|
488
|
-
stub_request(:any, /#{Regexp.escape(host)}/).to_rack(test_app)
|
489
|
-
end
|
490
|
-
|
491
|
-
it "must yield every JavaScript comment from any <script> tag" do
|
492
|
-
yielded_javascript_comments = []
|
493
|
-
|
494
|
-
subject.every_javascript_comment do |comment|
|
495
|
-
yielded_javascript_comments << comment
|
496
|
-
end
|
497
|
-
|
498
|
-
subject.start_at("http://#{host}/")
|
499
|
-
|
500
|
-
expect(yielded_javascript_comments).to match_array(
|
501
|
-
[
|
502
|
-
"// comment 1\n",
|
503
|
-
"/* comment 2 */",
|
504
|
-
"// comment 3\n",
|
505
|
-
"/*\n comment 4\n */"
|
506
|
-
]
|
507
|
-
)
|
508
|
-
end
|
509
|
-
end
|
510
|
-
|
511
|
-
describe "#every_comment" do
|
512
|
-
module TestAgentEveryComment
|
513
|
-
class TestApp < Sinatra::Base
|
514
|
-
|
515
|
-
set :host, 'example.com'
|
516
|
-
set :port, 80
|
517
|
-
|
518
|
-
get '/' do
|
519
|
-
<<~HTML
|
520
|
-
<html>
|
521
|
-
<head>
|
522
|
-
<!-- HTML comment 1 -->
|
523
|
-
<script type="text/javascript" src="/javascript1.js"></script>
|
524
|
-
<script type="text/javascript">
|
525
|
-
// JavaScript comment 3
|
526
|
-
var str3 = "string #3";
|
527
|
-
/*
|
528
|
-
JavaScript comment 4
|
529
|
-
*/
|
530
|
-
var str4 = 'string #4';
|
531
|
-
</script>
|
532
|
-
</head>
|
533
|
-
<!-- -->
|
534
|
-
<body>
|
535
|
-
<!-- HTML comment 2 -->
|
536
|
-
<a href="/link1">link1</a>
|
537
|
-
<a href="http://host2.example.com/offsite-link">offsite link</a>
|
538
|
-
<a href="/link2">link2</a>
|
539
|
-
</body>
|
540
|
-
</html>
|
541
|
-
HTML
|
542
|
-
end
|
543
|
-
|
544
|
-
get '/javascript1.js' do
|
545
|
-
content_type 'text/javascript'
|
546
|
-
<<~JS
|
547
|
-
// JavaScript comment 1
|
548
|
-
var str1 = "string #1";
|
549
|
-
/* JavaScript comment 2 */
|
550
|
-
var str2 = 'string #2';
|
551
|
-
JS
|
552
|
-
end
|
553
|
-
end
|
554
|
-
end
|
555
|
-
|
556
|
-
let(:host) { 'example.com' }
|
557
|
-
|
558
|
-
let(:test_app) { TestAgentEveryComment::TestApp }
|
559
|
-
|
560
|
-
before do
|
561
|
-
stub_request(:any, /#{Regexp.escape(host)}/).to_rack(test_app)
|
562
|
-
end
|
563
|
-
|
564
|
-
it "must yield every HTML and JavaScript comment from any <script> tag" do
|
565
|
-
yielded_comments = []
|
566
|
-
|
567
|
-
subject.every_comment do |comment|
|
568
|
-
yielded_comments << comment
|
569
|
-
end
|
570
|
-
|
571
|
-
subject.start_at("http://#{host}/")
|
572
|
-
|
573
|
-
expect(yielded_comments).to match_array(
|
574
|
-
[
|
575
|
-
"HTML comment 1",
|
576
|
-
"// JavaScript comment 1\n",
|
577
|
-
"/* JavaScript comment 2 */",
|
578
|
-
"// JavaScript comment 3\n",
|
579
|
-
"/*\n JavaScript comment 4\n */",
|
580
|
-
"HTML comment 2"
|
581
|
-
]
|
582
|
-
)
|
583
|
-
end
|
584
|
-
end
|
585
|
-
end
|