content_urls 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/Gemfile +1 -1
- data/content_urls.gemspec +5 -5
- data/lib/content_urls/parsers/java_script_parser.rb +2 -3
- data/lib/content_urls/version.rb +1 -1
- data/spec/html_parser_spec.rb +32 -16
- data/spec/java_script_parser_spec.rb +2 -1
- metadata +4 -4
checksums.yaml
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
---
|
|
2
2
|
!binary "U0hBMQ==":
|
|
3
3
|
metadata.gz: !binary |-
|
|
4
|
-
|
|
4
|
+
MTUxOTdmMDIxMmQzZDQ4ZGE0YWVlNTk5YjBlOTAxM2NkOWZmOTk4Mg==
|
|
5
5
|
data.tar.gz: !binary |-
|
|
6
|
-
|
|
6
|
+
MGM3ODQ4MzQ2NTdiYzk1NWYxZDgyMzA1ZjNlOTMzYzZlMTM1Yjc5Yg==
|
|
7
7
|
!binary "U0hBNTEy":
|
|
8
8
|
metadata.gz: !binary |-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
ZTIxYjJhN2FkYzc5NTA1NTdjN2IxOTJmNGNmNzM4OWYwYWE5NjAzYzI4NDU1
|
|
10
|
+
ZWRhMDcyYzA0ZjY0OTI5NTY3MjZjMjZhNWRjOTYyNzJhYThjZmQ4MjcxNmI0
|
|
11
|
+
MjAzMGJhY2M4YTRkNDllMWI0YmE5NTkwMzljZGFlYWYzMTYyNmU=
|
|
12
12
|
data.tar.gz: !binary |-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
NTQ0MDg3YTJlYmRiZmVmNDU5NzUzYWQyMDFiNmI4YTU1YWE2YWNjODQ3MzQ0
|
|
14
|
+
NzAzNWY1ZTFiNjAyYWFkNTY1YjNmZjc4OTc2MTE1NTJkNTQwYjA3MzIzMDE4
|
|
15
|
+
OTg2NmQ4NTM2YTU3NTcyN2Q1ODRiN2I3MmU4N2NlNDBjODAzZTE=
|
data/Gemfile
CHANGED
data/content_urls.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = "content_urls"
|
|
8
|
-
s.version = "0.1.
|
|
8
|
+
s.version = "0.1.6"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Dennis Sutch"]
|
|
12
|
-
s.date = "2013-07-
|
|
12
|
+
s.date = "2013-07-16"
|
|
13
13
|
s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
|
|
14
14
|
s.email = "dennis@sutch.com"
|
|
15
15
|
s.extra_rdoc_files = [
|
|
@@ -46,7 +46,7 @@ Gem::Specification.new do |s|
|
|
|
46
46
|
|
|
47
47
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
48
48
|
s.add_runtime_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
|
49
|
-
s.add_development_dependency(%q<rspec>, ["~> 2.
|
|
49
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.14.1"])
|
|
50
50
|
s.add_development_dependency(%q<yard>, ["~> 0.7"])
|
|
51
51
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
|
52
52
|
s.add_development_dependency(%q<bundler>, ["~> 1.3.5"])
|
|
@@ -54,7 +54,7 @@ Gem::Specification.new do |s|
|
|
|
54
54
|
s.add_development_dependency(%q<rake>, ["~> 10.1.0"])
|
|
55
55
|
else
|
|
56
56
|
s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
|
57
|
-
s.add_dependency(%q<rspec>, ["~> 2.
|
|
57
|
+
s.add_dependency(%q<rspec>, ["~> 2.14.1"])
|
|
58
58
|
s.add_dependency(%q<yard>, ["~> 0.7"])
|
|
59
59
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
|
60
60
|
s.add_dependency(%q<bundler>, ["~> 1.3.5"])
|
|
@@ -63,7 +63,7 @@ Gem::Specification.new do |s|
|
|
|
63
63
|
end
|
|
64
64
|
else
|
|
65
65
|
s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
|
66
|
-
s.add_dependency(%q<rspec>, ["~> 2.
|
|
66
|
+
s.add_dependency(%q<rspec>, ["~> 2.14.1"])
|
|
67
67
|
s.add_dependency(%q<yard>, ["~> 0.7"])
|
|
68
68
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
|
69
69
|
s.add_dependency(%q<bundler>, ["~> 1.3.5"])
|
|
@@ -46,12 +46,11 @@ class ContentUrls
|
|
|
46
46
|
rewritten += match.pre_match
|
|
47
47
|
replacement = url.nil? ? nil : (yield url)
|
|
48
48
|
if replacement.nil? or replacement == url # no change in URL
|
|
49
|
-
rewritten += url
|
|
50
|
-
remaining = url[1..-1] + match.post_match
|
|
49
|
+
rewritten += url
|
|
51
50
|
else
|
|
52
51
|
rewritten += replacement
|
|
53
|
-
remaining = match.post_match
|
|
54
52
|
end
|
|
53
|
+
remaining = match.post_match
|
|
55
54
|
else
|
|
56
55
|
rewritten += remaining
|
|
57
56
|
remaining = ''
|
data/lib/content_urls/version.rb
CHANGED
data/spec/html_parser_spec.rb
CHANGED
|
@@ -10,13 +10,14 @@ describe ContentUrls::HtmlParser do
|
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
describe ContentUrls::HtmlParser do
|
|
13
|
-
it "should return the
|
|
13
|
+
it "should return the URL in the content" do
|
|
14
14
|
ContentUrls::HtmlParser.urls("<a href='index.html").first.should eq 'index.html'
|
|
15
|
+
ContentUrls::HtmlParser.urls("<a href='index.html").count.should eq 1
|
|
15
16
|
end
|
|
16
17
|
end
|
|
17
18
|
|
|
18
19
|
describe ContentUrls::HtmlParser do
|
|
19
|
-
it "should parse HTML Sample 1 and return all a links" do
|
|
20
|
+
it "should parse HTML Sample 1 and return all a links and no other links" do
|
|
20
21
|
|
|
21
22
|
html_sample_1 =<<SAMPLE_1
|
|
22
23
|
<html>
|
|
@@ -36,6 +37,7 @@ SAMPLE_1
|
|
|
36
37
|
urls.include?('a-href-link-1.html').should eq true
|
|
37
38
|
urls.include?('http://www.example.com/1/2/3/a-href-link-2.html').should eq true
|
|
38
39
|
urls.include?('/folder/a-href-link-3.html?a=1').should eq true
|
|
40
|
+
urls.count.should eq 3
|
|
39
41
|
end
|
|
40
42
|
end
|
|
41
43
|
|
|
@@ -69,7 +71,7 @@ SAMPLE_1
|
|
|
69
71
|
end
|
|
70
72
|
|
|
71
73
|
describe ContentUrls::HtmlParser do
|
|
72
|
-
it "should parse HTML Sample 2 and return all 'area href' URLs" do
|
|
74
|
+
it "should parse HTML Sample 2 and return all 'area href' URLs and no other URLs" do
|
|
73
75
|
|
|
74
76
|
html_sample_2 =<<SAMPLE_2
|
|
75
77
|
<html>
|
|
@@ -78,7 +80,9 @@ html_sample_2 =<<SAMPLE_2
|
|
|
78
80
|
</head>
|
|
79
81
|
<body>
|
|
80
82
|
<h1>HTML Sample 2</h1>
|
|
81
|
-
|
|
83
|
+
<!-- commented out in order to not affect URL count
|
|
84
|
+
<img src="sample.gif" width="200" height="200" alt="Click somewhere" usemap="#sample-map">
|
|
85
|
+
-->
|
|
82
86
|
<map name="sample-map">
|
|
83
87
|
<area shape="rect" coords="0,0,100,100" href="area-href-link-1.html" alt="link 1">
|
|
84
88
|
<area shape="circle" coords="150,150,2" href="http://www.example.com/1/2/3/area-href-link-2.html" alt="link 2">
|
|
@@ -92,11 +96,12 @@ SAMPLE_2
|
|
|
92
96
|
urls.include?('area-href-link-1.html').should eq true
|
|
93
97
|
urls.include?('http://www.example.com/1/2/3/area-href-link-2.html').should eq true
|
|
94
98
|
urls.include?('/folder/area-href-link-3.html?a=1').should eq true
|
|
99
|
+
urls.count.should eq 3
|
|
95
100
|
end
|
|
96
101
|
end
|
|
97
102
|
|
|
98
103
|
describe ContentUrls::HtmlParser do
|
|
99
|
-
it "should parse HTML Sample 3 and return 'body background' URL" do
|
|
104
|
+
it "should parse HTML Sample 3 and return 'body background' URL and no other URLs" do
|
|
100
105
|
|
|
101
106
|
html_sample_3 =<<SAMPLE_3
|
|
102
107
|
<html>
|
|
@@ -111,11 +116,12 @@ SAMPLE_3
|
|
|
111
116
|
|
|
112
117
|
urls = ContentUrls::HtmlParser.urls(html_sample_3)
|
|
113
118
|
urls.first.should eq '/images/background.png'
|
|
119
|
+
urls.count.should eq 1
|
|
114
120
|
end
|
|
115
121
|
end
|
|
116
122
|
|
|
117
123
|
describe ContentUrls::HtmlParser do
|
|
118
|
-
it "should parse HTML Sample 4 and return 'embed src' URL" do
|
|
124
|
+
it "should parse HTML Sample 4 and return 'embed src' URL and no other URLs" do
|
|
119
125
|
|
|
120
126
|
html_sample_4 =<<SAMPLE_4
|
|
121
127
|
<html>
|
|
@@ -131,11 +137,12 @@ SAMPLE_4
|
|
|
131
137
|
|
|
132
138
|
urls = ContentUrls::HtmlParser.urls(html_sample_4)
|
|
133
139
|
urls.first.should eq 'sample.swf'
|
|
140
|
+
urls.count.should eq 1
|
|
134
141
|
end
|
|
135
142
|
end
|
|
136
143
|
|
|
137
144
|
describe ContentUrls::HtmlParser do
|
|
138
|
-
it "should parse HTML Sample 5 and return 'img src' URL" do
|
|
145
|
+
it "should parse HTML Sample 5 and return 'img src' URL and no other URLs" do
|
|
139
146
|
|
|
140
147
|
html_sample_5 =<<SAMPLE_5
|
|
141
148
|
<html>
|
|
@@ -151,11 +158,12 @@ SAMPLE_5
|
|
|
151
158
|
|
|
152
159
|
urls = ContentUrls::HtmlParser.urls(html_sample_5)
|
|
153
160
|
urls.first.should eq 'sample.gif'
|
|
161
|
+
urls.count.should eq 1
|
|
154
162
|
end
|
|
155
163
|
end
|
|
156
164
|
|
|
157
165
|
describe ContentUrls::HtmlParser do
|
|
158
|
-
it "should parse HTML Sample 6 and return 'link href' URL" do
|
|
166
|
+
it "should parse HTML Sample 6 and return 'link href' URL and no other URLs" do
|
|
159
167
|
|
|
160
168
|
html_sample_6 =<<SAMPLE_6
|
|
161
169
|
<html>
|
|
@@ -171,11 +179,12 @@ SAMPLE_6
|
|
|
171
179
|
|
|
172
180
|
urls = ContentUrls::HtmlParser.urls(html_sample_6)
|
|
173
181
|
urls.first.should eq '/index.php'
|
|
182
|
+
urls.count.should eq 1
|
|
174
183
|
end
|
|
175
184
|
end
|
|
176
185
|
|
|
177
186
|
describe ContentUrls::HtmlParser do
|
|
178
|
-
it "should parse HTML Sample 7 and return 'object data' URL" do
|
|
187
|
+
it "should parse HTML Sample 7 and return 'object data' URL and no other URLs" do
|
|
179
188
|
|
|
180
189
|
html_sample_7 =<<SAMPLE_7
|
|
181
190
|
<html>
|
|
@@ -191,11 +200,12 @@ SAMPLE_7
|
|
|
191
200
|
|
|
192
201
|
urls = ContentUrls::HtmlParser.urls(html_sample_7)
|
|
193
202
|
urls.first.should eq '/stuff/example.swf'
|
|
203
|
+
urls.count.should eq 1
|
|
194
204
|
end
|
|
195
205
|
end
|
|
196
206
|
|
|
197
207
|
describe ContentUrls::HtmlParser do
|
|
198
|
-
it "should parse HTML Sample 8 and return 'script src' URL" do
|
|
208
|
+
it "should parse HTML Sample 8 and return 'script src' URL and no other URLs" do
|
|
199
209
|
|
|
200
210
|
html_sample_8 =<<SAMPLE_8
|
|
201
211
|
<html>
|
|
@@ -211,11 +221,12 @@ SAMPLE_8
|
|
|
211
221
|
|
|
212
222
|
urls = ContentUrls::HtmlParser.urls(html_sample_8)
|
|
213
223
|
urls.first.should eq '../scripts/go.js'
|
|
224
|
+
urls.count.should eq 1
|
|
214
225
|
end
|
|
215
226
|
end
|
|
216
227
|
|
|
217
228
|
describe ContentUrls::HtmlParser do
|
|
218
|
-
it "should parse HTML Sample 9 and return 'meta content' URL" do
|
|
229
|
+
it "should parse HTML Sample 9 and return 'meta content' URL and no other URLs" do
|
|
219
230
|
|
|
220
231
|
html_sample_9 =<<SAMPLE_9
|
|
221
232
|
<html>
|
|
@@ -231,11 +242,12 @@ SAMPLE_9
|
|
|
231
242
|
|
|
232
243
|
urls = ContentUrls::HtmlParser.urls(html_sample_9)
|
|
233
244
|
urls.first.should eq 'http://example.com/'
|
|
245
|
+
urls.count.should eq 1
|
|
234
246
|
end
|
|
235
247
|
end
|
|
236
248
|
|
|
237
249
|
describe ContentUrls::HtmlParser do
|
|
238
|
-
it "should parse HTML Sample 10 and return URLs found within 'style' attributes" do
|
|
250
|
+
it "should parse HTML Sample 10 and return URLs found within 'style' attributes, and return no other URLs" do
|
|
239
251
|
|
|
240
252
|
html_sample_10 =<<SAMPLE_10
|
|
241
253
|
<html>
|
|
@@ -250,11 +262,12 @@ SAMPLE_10
|
|
|
250
262
|
|
|
251
263
|
urls = ContentUrls::HtmlParser.urls(html_sample_10)
|
|
252
264
|
urls.first.should eq 'background.jpg'
|
|
265
|
+
urls.count.should eq 1
|
|
253
266
|
end
|
|
254
267
|
end
|
|
255
268
|
|
|
256
269
|
describe ContentUrls::HtmlParser do
|
|
257
|
-
it "should parse HTML Sample 11 and return URLs found within 'style' tags" do
|
|
270
|
+
it "should parse HTML Sample 11 and return URLs found within 'style' tags, and return no other URLs" do
|
|
258
271
|
|
|
259
272
|
html_sample_11 =<<SAMPLE_11
|
|
260
273
|
<html>
|
|
@@ -272,11 +285,12 @@ SAMPLE_11
|
|
|
272
285
|
|
|
273
286
|
urls = ContentUrls::HtmlParser.urls(html_sample_11)
|
|
274
287
|
urls.first.should eq '/image/background.jpg'
|
|
288
|
+
urls.count.should eq 1
|
|
275
289
|
end
|
|
276
290
|
end
|
|
277
291
|
|
|
278
292
|
describe ContentUrls::HtmlParser do
|
|
279
|
-
it "should parse HTML Sample 12 and return URLs found within 'script' tags" do
|
|
293
|
+
it "should parse HTML Sample 12 and return URLs found within 'script' tags, and return no other URLs" do
|
|
280
294
|
|
|
281
295
|
html_sample_12 =<<SAMPLE_12
|
|
282
296
|
<html>
|
|
@@ -295,11 +309,12 @@ SAMPLE_12
|
|
|
295
309
|
|
|
296
310
|
urls = ContentUrls::HtmlParser.urls(html_sample_12)
|
|
297
311
|
urls.first.should eq 'http://www.sample.com/index.html'
|
|
312
|
+
urls.count.should eq 1
|
|
298
313
|
end
|
|
299
314
|
end
|
|
300
315
|
|
|
301
316
|
describe ContentUrls::HtmlParser do
|
|
302
|
-
it "should parse HTML Sample 13 and return 'frame src' URL" do
|
|
317
|
+
it "should parse HTML Sample 13 and return 'frame src' URL and no other URLs" do
|
|
303
318
|
|
|
304
319
|
html_sample_13 =<<SAMPLE_13
|
|
305
320
|
<html>
|
|
@@ -315,11 +330,12 @@ SAMPLE_13
|
|
|
315
330
|
|
|
316
331
|
urls = ContentUrls::HtmlParser.urls(html_sample_13)
|
|
317
332
|
urls.first.should eq '/info.html'
|
|
333
|
+
urls.count.should eq 1
|
|
318
334
|
end
|
|
319
335
|
end
|
|
320
336
|
|
|
321
337
|
describe ContentUrls::HtmlParser do
|
|
322
|
-
it "should parse the HTML and return the 'base' URL" do
|
|
338
|
+
it "should parse the HTML and return the 'base' URL and no other URLs" do
|
|
323
339
|
|
|
324
340
|
html_base_sample =<<BASE_SAMPLE
|
|
325
341
|
<html>
|
|
@@ -7,8 +7,9 @@ describe ContentUrls::JavaScriptParser do
|
|
|
7
7
|
end
|
|
8
8
|
|
|
9
9
|
describe ContentUrls::JavaScriptParser do
|
|
10
|
-
it "should return the URLs in the content" do
|
|
10
|
+
it "should return only the URLs in the content" do
|
|
11
11
|
ContentUrls::JavaScriptParser.urls('var link="http://www.sample.com/index.html"').first.should eq 'http://www.sample.com/index.html'
|
|
12
|
+
ContentUrls::JavaScriptParser.urls('var link="http://www.sample.com/index.html"').count.should eq 1
|
|
12
13
|
end
|
|
13
14
|
end
|
|
14
15
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: content_urls
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dennis Sutch
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2013-07-
|
|
11
|
+
date: 2013-07-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -30,14 +30,14 @@ dependencies:
|
|
|
30
30
|
requirements:
|
|
31
31
|
- - ~>
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: 2.
|
|
33
|
+
version: 2.14.1
|
|
34
34
|
type: :development
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
38
|
- - ~>
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: 2.
|
|
40
|
+
version: 2.14.1
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
42
|
name: yard
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|