content_urls 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/Gemfile +1 -1
- data/content_urls.gemspec +5 -5
- data/lib/content_urls/parsers/java_script_parser.rb +2 -3
- data/lib/content_urls/version.rb +1 -1
- data/spec/html_parser_spec.rb +32 -16
- data/spec/java_script_parser_spec.rb +2 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MTUxOTdmMDIxMmQzZDQ4ZGE0YWVlNTk5YjBlOTAxM2NkOWZmOTk4Mg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MGM3ODQ4MzQ2NTdiYzk1NWYxZDgyMzA1ZjNlOTMzYzZlMTM1Yjc5Yg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZTIxYjJhN2FkYzc5NTA1NTdjN2IxOTJmNGNmNzM4OWYwYWE5NjAzYzI4NDU1
|
10
|
+
ZWRhMDcyYzA0ZjY0OTI5NTY3MjZjMjZhNWRjOTYyNzJhYThjZmQ4MjcxNmI0
|
11
|
+
MjAzMGJhY2M4YTRkNDllMWI0YmE5NTkwMzljZGFlYWYzMTYyNmU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NTQ0MDg3YTJlYmRiZmVmNDU5NzUzYWQyMDFiNmI4YTU1YWE2YWNjODQ3MzQ0
|
14
|
+
NzAzNWY1ZTFiNjAyYWFkNTY1YjNmZjc4OTc2MTE1NTJkNTQwYjA3MzIzMDE4
|
15
|
+
OTg2NmQ4NTM2YTU3NTcyN2Q1ODRiN2I3MmU4N2NlNDBjODAzZTE=
|
data/Gemfile
CHANGED
data/content_urls.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "content_urls"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.6"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Dennis Sutch"]
|
12
|
-
s.date = "2013-07-
|
12
|
+
s.date = "2013-07-16"
|
13
13
|
s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
|
14
14
|
s.email = "dennis@sutch.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -46,7 +46,7 @@ Gem::Specification.new do |s|
|
|
46
46
|
|
47
47
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
48
48
|
s.add_runtime_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
49
|
-
s.add_development_dependency(%q<rspec>, ["~> 2.
|
49
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.14.1"])
|
50
50
|
s.add_development_dependency(%q<yard>, ["~> 0.7"])
|
51
51
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
52
52
|
s.add_development_dependency(%q<bundler>, ["~> 1.3.5"])
|
@@ -54,7 +54,7 @@ Gem::Specification.new do |s|
|
|
54
54
|
s.add_development_dependency(%q<rake>, ["~> 10.1.0"])
|
55
55
|
else
|
56
56
|
s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
57
|
-
s.add_dependency(%q<rspec>, ["~> 2.
|
57
|
+
s.add_dependency(%q<rspec>, ["~> 2.14.1"])
|
58
58
|
s.add_dependency(%q<yard>, ["~> 0.7"])
|
59
59
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
60
60
|
s.add_dependency(%q<bundler>, ["~> 1.3.5"])
|
@@ -63,7 +63,7 @@ Gem::Specification.new do |s|
|
|
63
63
|
end
|
64
64
|
else
|
65
65
|
s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
66
|
-
s.add_dependency(%q<rspec>, ["~> 2.
|
66
|
+
s.add_dependency(%q<rspec>, ["~> 2.14.1"])
|
67
67
|
s.add_dependency(%q<yard>, ["~> 0.7"])
|
68
68
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
69
69
|
s.add_dependency(%q<bundler>, ["~> 1.3.5"])
|
@@ -46,12 +46,11 @@ class ContentUrls
|
|
46
46
|
rewritten += match.pre_match
|
47
47
|
replacement = url.nil? ? nil : (yield url)
|
48
48
|
if replacement.nil? or replacement == url # no change in URL
|
49
|
-
rewritten += url
|
50
|
-
remaining = url[1..-1] + match.post_match
|
49
|
+
rewritten += url
|
51
50
|
else
|
52
51
|
rewritten += replacement
|
53
|
-
remaining = match.post_match
|
54
52
|
end
|
53
|
+
remaining = match.post_match
|
55
54
|
else
|
56
55
|
rewritten += remaining
|
57
56
|
remaining = ''
|
data/lib/content_urls/version.rb
CHANGED
data/spec/html_parser_spec.rb
CHANGED
@@ -10,13 +10,14 @@ describe ContentUrls::HtmlParser do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
describe ContentUrls::HtmlParser do
|
13
|
-
it "should return the
|
13
|
+
it "should return the URL in the content" do
|
14
14
|
ContentUrls::HtmlParser.urls("<a href='index.html").first.should eq 'index.html'
|
15
|
+
ContentUrls::HtmlParser.urls("<a href='index.html").count.should eq 1
|
15
16
|
end
|
16
17
|
end
|
17
18
|
|
18
19
|
describe ContentUrls::HtmlParser do
|
19
|
-
it "should parse HTML Sample 1 and return all a links" do
|
20
|
+
it "should parse HTML Sample 1 and return all a links and no other links" do
|
20
21
|
|
21
22
|
html_sample_1 =<<SAMPLE_1
|
22
23
|
<html>
|
@@ -36,6 +37,7 @@ SAMPLE_1
|
|
36
37
|
urls.include?('a-href-link-1.html').should eq true
|
37
38
|
urls.include?('http://www.example.com/1/2/3/a-href-link-2.html').should eq true
|
38
39
|
urls.include?('/folder/a-href-link-3.html?a=1').should eq true
|
40
|
+
urls.count.should eq 3
|
39
41
|
end
|
40
42
|
end
|
41
43
|
|
@@ -69,7 +71,7 @@ SAMPLE_1
|
|
69
71
|
end
|
70
72
|
|
71
73
|
describe ContentUrls::HtmlParser do
|
72
|
-
it "should parse HTML Sample 2 and return all 'area href' URLs" do
|
74
|
+
it "should parse HTML Sample 2 and return all 'area href' URLs and no other URLs" do
|
73
75
|
|
74
76
|
html_sample_2 =<<SAMPLE_2
|
75
77
|
<html>
|
@@ -78,7 +80,9 @@ html_sample_2 =<<SAMPLE_2
|
|
78
80
|
</head>
|
79
81
|
<body>
|
80
82
|
<h1>HTML Sample 2</h1>
|
81
|
-
|
83
|
+
<!-- commented out in order to not affect URL count
|
84
|
+
<img src="sample.gif" width="200" height="200" alt="Click somewhere" usemap="#sample-map">
|
85
|
+
-->
|
82
86
|
<map name="sample-map">
|
83
87
|
<area shape="rect" coords="0,0,100,100" href="area-href-link-1.html" alt="link 1">
|
84
88
|
<area shape="circle" coords="150,150,2" href="http://www.example.com/1/2/3/area-href-link-2.html" alt="link 2">
|
@@ -92,11 +96,12 @@ SAMPLE_2
|
|
92
96
|
urls.include?('area-href-link-1.html').should eq true
|
93
97
|
urls.include?('http://www.example.com/1/2/3/area-href-link-2.html').should eq true
|
94
98
|
urls.include?('/folder/area-href-link-3.html?a=1').should eq true
|
99
|
+
urls.count.should eq 3
|
95
100
|
end
|
96
101
|
end
|
97
102
|
|
98
103
|
describe ContentUrls::HtmlParser do
|
99
|
-
it "should parse HTML Sample 3 and return 'body background' URL" do
|
104
|
+
it "should parse HTML Sample 3 and return 'body background' URL and no other URLs" do
|
100
105
|
|
101
106
|
html_sample_3 =<<SAMPLE_3
|
102
107
|
<html>
|
@@ -111,11 +116,12 @@ SAMPLE_3
|
|
111
116
|
|
112
117
|
urls = ContentUrls::HtmlParser.urls(html_sample_3)
|
113
118
|
urls.first.should eq '/images/background.png'
|
119
|
+
urls.count.should eq 1
|
114
120
|
end
|
115
121
|
end
|
116
122
|
|
117
123
|
describe ContentUrls::HtmlParser do
|
118
|
-
it "should parse HTML Sample 4 and return 'embed src' URL" do
|
124
|
+
it "should parse HTML Sample 4 and return 'embed src' URL and no other URLs" do
|
119
125
|
|
120
126
|
html_sample_4 =<<SAMPLE_4
|
121
127
|
<html>
|
@@ -131,11 +137,12 @@ SAMPLE_4
|
|
131
137
|
|
132
138
|
urls = ContentUrls::HtmlParser.urls(html_sample_4)
|
133
139
|
urls.first.should eq 'sample.swf'
|
140
|
+
urls.count.should eq 1
|
134
141
|
end
|
135
142
|
end
|
136
143
|
|
137
144
|
describe ContentUrls::HtmlParser do
|
138
|
-
it "should parse HTML Sample 5 and return 'img src' URL" do
|
145
|
+
it "should parse HTML Sample 5 and return 'img src' URL and no other URLs" do
|
139
146
|
|
140
147
|
html_sample_5 =<<SAMPLE_5
|
141
148
|
<html>
|
@@ -151,11 +158,12 @@ SAMPLE_5
|
|
151
158
|
|
152
159
|
urls = ContentUrls::HtmlParser.urls(html_sample_5)
|
153
160
|
urls.first.should eq 'sample.gif'
|
161
|
+
urls.count.should eq 1
|
154
162
|
end
|
155
163
|
end
|
156
164
|
|
157
165
|
describe ContentUrls::HtmlParser do
|
158
|
-
it "should parse HTML Sample 6 and return 'link href' URL" do
|
166
|
+
it "should parse HTML Sample 6 and return 'link href' URL and no other URLs" do
|
159
167
|
|
160
168
|
html_sample_6 =<<SAMPLE_6
|
161
169
|
<html>
|
@@ -171,11 +179,12 @@ SAMPLE_6
|
|
171
179
|
|
172
180
|
urls = ContentUrls::HtmlParser.urls(html_sample_6)
|
173
181
|
urls.first.should eq '/index.php'
|
182
|
+
urls.count.should eq 1
|
174
183
|
end
|
175
184
|
end
|
176
185
|
|
177
186
|
describe ContentUrls::HtmlParser do
|
178
|
-
it "should parse HTML Sample 7 and return 'object data' URL" do
|
187
|
+
it "should parse HTML Sample 7 and return 'object data' URL and no other URLs" do
|
179
188
|
|
180
189
|
html_sample_7 =<<SAMPLE_7
|
181
190
|
<html>
|
@@ -191,11 +200,12 @@ SAMPLE_7
|
|
191
200
|
|
192
201
|
urls = ContentUrls::HtmlParser.urls(html_sample_7)
|
193
202
|
urls.first.should eq '/stuff/example.swf'
|
203
|
+
urls.count.should eq 1
|
194
204
|
end
|
195
205
|
end
|
196
206
|
|
197
207
|
describe ContentUrls::HtmlParser do
|
198
|
-
it "should parse HTML Sample 8 and return 'script src' URL" do
|
208
|
+
it "should parse HTML Sample 8 and return 'script src' URL and no other URLs" do
|
199
209
|
|
200
210
|
html_sample_8 =<<SAMPLE_8
|
201
211
|
<html>
|
@@ -211,11 +221,12 @@ SAMPLE_8
|
|
211
221
|
|
212
222
|
urls = ContentUrls::HtmlParser.urls(html_sample_8)
|
213
223
|
urls.first.should eq '../scripts/go.js'
|
224
|
+
urls.count.should eq 1
|
214
225
|
end
|
215
226
|
end
|
216
227
|
|
217
228
|
describe ContentUrls::HtmlParser do
|
218
|
-
it "should parse HTML Sample 9 and return 'meta content' URL" do
|
229
|
+
it "should parse HTML Sample 9 and return 'meta content' URL and no other URLs" do
|
219
230
|
|
220
231
|
html_sample_9 =<<SAMPLE_9
|
221
232
|
<html>
|
@@ -231,11 +242,12 @@ SAMPLE_9
|
|
231
242
|
|
232
243
|
urls = ContentUrls::HtmlParser.urls(html_sample_9)
|
233
244
|
urls.first.should eq 'http://example.com/'
|
245
|
+
urls.count.should eq 1
|
234
246
|
end
|
235
247
|
end
|
236
248
|
|
237
249
|
describe ContentUrls::HtmlParser do
|
238
|
-
it "should parse HTML Sample 10 and return URLs found within 'style' attributes" do
|
250
|
+
it "should parse HTML Sample 10 and return URLs found within 'style' attributes, and return no other URLs" do
|
239
251
|
|
240
252
|
html_sample_10 =<<SAMPLE_10
|
241
253
|
<html>
|
@@ -250,11 +262,12 @@ SAMPLE_10
|
|
250
262
|
|
251
263
|
urls = ContentUrls::HtmlParser.urls(html_sample_10)
|
252
264
|
urls.first.should eq 'background.jpg'
|
265
|
+
urls.count.should eq 1
|
253
266
|
end
|
254
267
|
end
|
255
268
|
|
256
269
|
describe ContentUrls::HtmlParser do
|
257
|
-
it "should parse HTML Sample 11 and return URLs found within 'style' tags" do
|
270
|
+
it "should parse HTML Sample 11 and return URLs found within 'style' tags, and return no other URLs" do
|
258
271
|
|
259
272
|
html_sample_11 =<<SAMPLE_11
|
260
273
|
<html>
|
@@ -272,11 +285,12 @@ SAMPLE_11
|
|
272
285
|
|
273
286
|
urls = ContentUrls::HtmlParser.urls(html_sample_11)
|
274
287
|
urls.first.should eq '/image/background.jpg'
|
288
|
+
urls.count.should eq 1
|
275
289
|
end
|
276
290
|
end
|
277
291
|
|
278
292
|
describe ContentUrls::HtmlParser do
|
279
|
-
it "should parse HTML Sample 12 and return URLs found within 'script' tags" do
|
293
|
+
it "should parse HTML Sample 12 and return URLs found within 'script' tags, and return no other URLs" do
|
280
294
|
|
281
295
|
html_sample_12 =<<SAMPLE_12
|
282
296
|
<html>
|
@@ -295,11 +309,12 @@ SAMPLE_12
|
|
295
309
|
|
296
310
|
urls = ContentUrls::HtmlParser.urls(html_sample_12)
|
297
311
|
urls.first.should eq 'http://www.sample.com/index.html'
|
312
|
+
urls.count.should eq 1
|
298
313
|
end
|
299
314
|
end
|
300
315
|
|
301
316
|
describe ContentUrls::HtmlParser do
|
302
|
-
it "should parse HTML Sample 13 and return 'frame src' URL" do
|
317
|
+
it "should parse HTML Sample 13 and return 'frame src' URL and no other URLs" do
|
303
318
|
|
304
319
|
html_sample_13 =<<SAMPLE_13
|
305
320
|
<html>
|
@@ -315,11 +330,12 @@ SAMPLE_13
|
|
315
330
|
|
316
331
|
urls = ContentUrls::HtmlParser.urls(html_sample_13)
|
317
332
|
urls.first.should eq '/info.html'
|
333
|
+
urls.count.should eq 1
|
318
334
|
end
|
319
335
|
end
|
320
336
|
|
321
337
|
describe ContentUrls::HtmlParser do
|
322
|
-
it "should parse the HTML and return the 'base' URL" do
|
338
|
+
it "should parse the HTML and return the 'base' URL and no other URLs" do
|
323
339
|
|
324
340
|
html_base_sample =<<BASE_SAMPLE
|
325
341
|
<html>
|
@@ -7,8 +7,9 @@ describe ContentUrls::JavaScriptParser do
|
|
7
7
|
end
|
8
8
|
|
9
9
|
describe ContentUrls::JavaScriptParser do
|
10
|
-
it "should return the URLs in the content" do
|
10
|
+
it "should return only the URLs in the content" do
|
11
11
|
ContentUrls::JavaScriptParser.urls('var link="http://www.sample.com/index.html"').first.should eq 'http://www.sample.com/index.html'
|
12
|
+
ContentUrls::JavaScriptParser.urls('var link="http://www.sample.com/index.html"').count.should eq 1
|
12
13
|
end
|
13
14
|
end
|
14
15
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: content_urls
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dennis Sutch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 2.
|
33
|
+
version: 2.14.1
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 2.
|
40
|
+
version: 2.14.1
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: yard
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|