content_urls 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.document +0 -0
- data/.rspec +0 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +0 -0
- data/README.rdoc +2 -1
- data/Rakefile +0 -0
- data/content_urls.gemspec +80 -74
- data/lib/content_urls/parsers/css_parser.rb +174 -29
- data/lib/content_urls/parsers/html_parser.rb +1 -1
- data/lib/content_urls/parsers/java_script_parser.rb +24 -18
- data/lib/content_urls/version.rb +1 -1
- data/lib/content_urls.rb +1 -0
- data/spec/content_urls_spec.rb +0 -0
- data/spec/css_parser_spec.rb +16 -0
- data/spec/html_parser_spec.rb +0 -0
- data/spec/java_script_parser_spec.rb +18 -0
- data/spec/spec_helper.rb +0 -0
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OTA3MGM5MWE0ZTY5MTA0M2E2MTQyMTI5ZWEwODA0NzZiY2M1YTI5Yw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MGY3ZjAzZTQ0NGM5NTRiZDBiMTk3NWQ4Y2EzNWNkZGRjZjE2YjIwMw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YTY2NzBlZTIzYjI1MGRlMmU1YTQyN2I2Y2E2ZDgyOGMyOTY0Y2Y0NTY2ZDMz
|
10
|
+
ZmQ0N2JlMTYyMDlmNDdlY2M4MmYwNjQ3MzkzYzZkNzllNjg5YzY0NGYwYzM1
|
11
|
+
OGM1NWQzN2JmYWQ3Yzg4ZWZmZmZlODY1ZmI1YzUzYzZjNWU5NDI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
Y2JhOTRhNGYzODRlNmQxNzEyYTc4NTA0YWJmZWRmYzNkOGNjYTMzMTU0MDc5
|
14
|
+
MjY1NmQ2OWQ3YjQ2ZDkyMmRlYTY2OGI2NzI0MTJhY2FlM2VmOGE5MzQyMTk0
|
15
|
+
NTYzMjc4OWI1NzljZjRiNjM1ZGI2NDg0YjcxZDZhOGNiNDZlZWI=
|
data/.document
CHANGED
File without changes
|
data/.rspec
CHANGED
File without changes
|
data/Gemfile
CHANGED
data/LICENSE.txt
CHANGED
File without changes
|
data/README.rdoc
CHANGED
@@ -43,13 +43,14 @@ Provide the HTML content, the content type, and a block to rewrite each URL's ex
|
|
43
43
|
|
44
44
|
== Requirements
|
45
45
|
* nokogiri
|
46
|
+
* css_parser
|
47
|
+
* rkelly
|
46
48
|
|
47
49
|
== Development
|
48
50
|
To test and develop this gem, additional requirements are:
|
49
51
|
* bundler
|
50
52
|
* jeweler
|
51
53
|
* rake
|
52
|
-
* rcov
|
53
54
|
* rdoc
|
54
55
|
* rspec
|
55
56
|
* yard
|
data/Rakefile
CHANGED
File without changes
|
data/content_urls.gemspec
CHANGED
@@ -1,74 +1,80 @@
|
|
1
|
-
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in
|
4
|
-
# -*- encoding: utf-8 -*-
|
5
|
-
|
6
|
-
Gem::Specification.new do |s|
|
7
|
-
s.name = "content_urls"
|
8
|
-
s.version = "0.1.
|
9
|
-
|
10
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = ["Dennis Sutch"]
|
12
|
-
s.date = "2013-
|
13
|
-
s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
|
14
|
-
s.email = "dennis@sutch.com"
|
15
|
-
s.extra_rdoc_files = [
|
16
|
-
"LICENSE.txt",
|
17
|
-
"README.rdoc"
|
18
|
-
]
|
19
|
-
s.files = [
|
20
|
-
".document",
|
21
|
-
".rspec",
|
22
|
-
"Gemfile",
|
23
|
-
"LICENSE.txt",
|
24
|
-
"README.rdoc",
|
25
|
-
"Rakefile",
|
26
|
-
"content_urls.gemspec",
|
27
|
-
"lib/content_urls.rb",
|
28
|
-
"lib/content_urls/parsers/css_parser.rb",
|
29
|
-
"lib/content_urls/parsers/html_parser.rb",
|
30
|
-
"lib/content_urls/parsers/java_script_parser.rb",
|
31
|
-
"lib/content_urls/version.rb",
|
32
|
-
"spec/content_urls_spec.rb",
|
33
|
-
"spec/css_parser_spec.rb",
|
34
|
-
"spec/html_parser_spec.rb",
|
35
|
-
"spec/java_script_parser_spec.rb",
|
36
|
-
"spec/spec_helper.rb"
|
37
|
-
]
|
38
|
-
s.homepage = "http://github.com/sutch/content_urls"
|
39
|
-
s.licenses = ["MIT"]
|
40
|
-
s.require_paths = ["lib"]
|
41
|
-
s.rubygems_version = "2.0.
|
42
|
-
s.summary = "Find and rewrite URLs in different types of content."
|
43
|
-
|
44
|
-
if s.respond_to? :specification_version then
|
45
|
-
s.specification_version = 4
|
46
|
-
|
47
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
48
|
-
s.add_runtime_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
49
|
-
s.
|
50
|
-
s.
|
51
|
-
s.add_development_dependency(%q<
|
52
|
-
s.add_development_dependency(%q<
|
53
|
-
s.add_development_dependency(%q<
|
54
|
-
s.add_development_dependency(%q<
|
55
|
-
|
56
|
-
s.
|
57
|
-
|
58
|
-
s.add_dependency(%q<
|
59
|
-
s.add_dependency(%q<
|
60
|
-
s.add_dependency(%q<
|
61
|
-
s.add_dependency(%q<
|
62
|
-
s.add_dependency(%q<
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
s.add_dependency(%q<
|
70
|
-
s.add_dependency(%q<
|
71
|
-
s.add_dependency(%q<
|
72
|
-
|
73
|
-
|
74
|
-
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "content_urls"
|
8
|
+
s.version = "0.1.8"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Dennis Sutch"]
|
12
|
+
s.date = "2013-08-01"
|
13
|
+
s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
|
14
|
+
s.email = "dennis@sutch.com"
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".rspec",
|
22
|
+
"Gemfile",
|
23
|
+
"LICENSE.txt",
|
24
|
+
"README.rdoc",
|
25
|
+
"Rakefile",
|
26
|
+
"content_urls.gemspec",
|
27
|
+
"lib/content_urls.rb",
|
28
|
+
"lib/content_urls/parsers/css_parser.rb",
|
29
|
+
"lib/content_urls/parsers/html_parser.rb",
|
30
|
+
"lib/content_urls/parsers/java_script_parser.rb",
|
31
|
+
"lib/content_urls/version.rb",
|
32
|
+
"spec/content_urls_spec.rb",
|
33
|
+
"spec/css_parser_spec.rb",
|
34
|
+
"spec/html_parser_spec.rb",
|
35
|
+
"spec/java_script_parser_spec.rb",
|
36
|
+
"spec/spec_helper.rb"
|
37
|
+
]
|
38
|
+
s.homepage = "http://github.com/sutch/content_urls"
|
39
|
+
s.licenses = ["MIT"]
|
40
|
+
s.require_paths = ["lib"]
|
41
|
+
s.rubygems_version = "2.0.6"
|
42
|
+
s.summary = "Find and rewrite URLs in different types of content."
|
43
|
+
|
44
|
+
if s.respond_to? :specification_version then
|
45
|
+
s.specification_version = 4
|
46
|
+
|
47
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
48
|
+
s.add_runtime_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
49
|
+
s.add_runtime_dependency(%q<css_parser>, ["~> 1.3.4"])
|
50
|
+
s.add_runtime_dependency(%q<rkelly>, ["~> 1.0.7"])
|
51
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.14.1"])
|
52
|
+
s.add_development_dependency(%q<yard>, ["~> 0.7"])
|
53
|
+
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
54
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.3.5"])
|
55
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.8.6"])
|
56
|
+
s.add_development_dependency(%q<rake>, ["~> 10.1.0"])
|
57
|
+
else
|
58
|
+
s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
59
|
+
s.add_dependency(%q<css_parser>, ["~> 1.3.4"])
|
60
|
+
s.add_dependency(%q<rkelly>, ["~> 1.0.7"])
|
61
|
+
s.add_dependency(%q<rspec>, ["~> 2.14.1"])
|
62
|
+
s.add_dependency(%q<yard>, ["~> 0.7"])
|
63
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
64
|
+
s.add_dependency(%q<bundler>, ["~> 1.3.5"])
|
65
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
|
66
|
+
s.add_dependency(%q<rake>, ["~> 10.1.0"])
|
67
|
+
end
|
68
|
+
else
|
69
|
+
s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
70
|
+
s.add_dependency(%q<css_parser>, ["~> 1.3.4"])
|
71
|
+
s.add_dependency(%q<rkelly>, ["~> 1.0.7"])
|
72
|
+
s.add_dependency(%q<rspec>, ["~> 2.14.1"])
|
73
|
+
s.add_dependency(%q<yard>, ["~> 0.7"])
|
74
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
75
|
+
s.add_dependency(%q<bundler>, ["~> 1.3.5"])
|
76
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
|
77
|
+
s.add_dependency(%q<rake>, ["~> 10.1.0"])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'css_parser'
|
2
|
+
|
1
3
|
class ContentUrls
|
2
4
|
|
3
5
|
# +CssParser+ finds and rewrites URLs in CSS content.
|
@@ -17,25 +19,12 @@ class ContentUrls
|
|
17
19
|
# puts "Found URL: #{url}"
|
18
20
|
# end
|
19
21
|
# # => "Found URL: /images/rainbows.jpg"
|
22
|
+
#
|
20
23
|
def self.urls(content)
|
21
24
|
urls = []
|
22
|
-
|
23
|
-
while ! remaining.empty?
|
24
|
-
if @@regex_uri =~ remaining
|
25
|
-
match = $1
|
26
|
-
url = $7 || $14 || $23
|
27
|
-
#if @@regex_baduri =~ match ## bad URL
|
28
|
-
# remaining = remaining[Regexp.last_match.begin(0)+1..-1] # Use last_match from regex_uri test
|
29
|
-
#else
|
30
|
-
remaining = Regexp.last_match.post_match
|
31
|
-
urls << url
|
32
|
-
#end
|
33
|
-
else
|
34
|
-
remaining = ''
|
35
|
-
end
|
36
|
-
end
|
25
|
+
rewrite_each_url(content) { |url| urls << url; url }
|
37
26
|
urls.uniq!
|
38
|
-
urls
|
27
|
+
urls
|
39
28
|
end
|
40
29
|
|
41
30
|
# Rewrites each URL in the CSS content by calling the supplied block with each URL.
|
@@ -49,22 +38,177 @@ class ContentUrls
|
|
49
38
|
# # => "Rewritten: body { background: url(/images/unicorns.jpg) }"
|
50
39
|
#
|
51
40
|
def self.rewrite_each_url(content, &block)
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
41
|
+
urls = {}
|
42
|
+
parser = ::CssParser::Parser.new
|
43
|
+
parser.load_string!(content)
|
44
|
+
parser.each_selector do |selector|
|
45
|
+
parser[selector].each do |element|
|
46
|
+
remaining = element
|
47
|
+
while !remaining.empty?
|
48
|
+
if match = @@regex_uri.match(remaining)
|
49
|
+
urls[match[:url]] = match[:uri]
|
50
|
+
remaining = match.post_match
|
51
|
+
else
|
52
|
+
remaining = ''
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
rewritten_content = [{:content => content, :is_rewritten => false}]
|
58
|
+
urls.each do |property_value, url|
|
59
|
+
rewritten_url = yield url
|
60
|
+
if rewritten_url != url
|
61
|
+
rewritten_property_value = property_value.dup
|
62
|
+
rewritten_property_value[url] = rewritten_url
|
63
|
+
i = 0
|
64
|
+
while i < rewritten_content.count
|
65
|
+
if !rewritten_content[i][:is_rewritten]
|
66
|
+
if match = /#{Regexp.escape(property_value)}/.match(rewritten_content[i][:content])
|
67
|
+
if match.pre_match.length > 0
|
68
|
+
rewritten_content.insert(i, {:content => match.pre_match, :is_rewritten => false})
|
69
|
+
i += 1
|
70
|
+
end
|
71
|
+
rewritten_content[i] = {:content => rewritten_property_value, :is_rewritten => true}
|
72
|
+
if match.post_match.length > 0
|
73
|
+
rewritten_content.insert(i+1, {:content => match.post_match, :is_rewritten => false})
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
i += 1
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
rewritten_content.map { |c| c[:content]}.join
|
82
|
+
end
|
83
|
+
|
84
|
+
protected
|
85
|
+
|
86
|
+
# Regular expressions based on http://www.w3.org/TR/CSS21/syndata.html
|
87
|
+
|
88
|
+
# {w}: [ \t\r\n\f]*
|
89
|
+
@@w = '([ \t\r\n\f]*)'
|
90
|
+
|
91
|
+
# {nl}: \n|\r\n|\r|\f
|
92
|
+
@@nl = '(\n|\r\n|\r|\f)'
|
93
|
+
|
94
|
+
# {unicode}: \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
|
95
|
+
@@unicode = '(\\\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?)'
|
96
|
+
|
97
|
+
# {escape}: {unicode}|\\[^\n\r\f0-9a-f]
|
98
|
+
@@escape = '(' + @@unicode + '|\\\\[^\n\r\f0-9a-f])'
|
99
|
+
|
100
|
+
# {string1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
101
|
+
@@string1 = '(\"(?<uri>([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
|
102
|
+
|
103
|
+
# {string2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\'
|
104
|
+
@@string2 = '(\\\'(?<uri>([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
|
105
|
+
|
106
|
+
# {string}: {string1}|{string2}
|
107
|
+
@@string = '(' + @@string1 + '|' + @@string2 + ')'
|
108
|
+
|
109
|
+
# {nonascii}: [^\0-\237]
|
110
|
+
@@nonascii = '([^\x0-\x237])'
|
111
|
+
|
112
|
+
# {uri}: url\({w}{string}{w}\)|url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}\)
|
113
|
+
@@uri = '(?<url>((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(?<uri>([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
|
114
|
+
|
115
|
+
# {badstring1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
|
116
|
+
@@badstring1 = '(\"([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
|
117
|
+
|
118
|
+
# {badstring2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\\?
|
119
|
+
@@badstring2 = '(\\\'([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
|
120
|
+
|
121
|
+
# {badstring}: {badstring1}|{badstring2}
|
122
|
+
@@badstring = '(' + @@badstring1 + '|' + @@badstring2 + ')'
|
123
|
+
|
124
|
+
# {baduri1}: url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}
|
125
|
+
@@baduri1 = '(url\(' + @@w + '([!#$%&*-~]|' + @@nonascii + '|' + @@escape + ')*' + @@w + ')'
|
126
|
+
|
127
|
+
# {baduri2}: url\({w}{string}{w}
|
128
|
+
@@baduri2 = '(url\(' + @@w + @@string + @@w + ')'
|
129
|
+
|
130
|
+
# {baduri3}: url\({w}{badstring}
|
131
|
+
@@baduri3 = '(url\(' + @@w + @@badstring + ')'
|
132
|
+
|
133
|
+
# {baduri}: {baduri1}|{baduri2}|{baduri3}
|
134
|
+
@@baduri = '(' + @@baduri1 + '|' + @@baduri2 + '|' + @@baduri3 + ')'
|
135
|
+
|
136
|
+
@@regex_uri = Regexp.new(@@uri)
|
137
|
+
@@regex_baduri = Regexp.new(@@baduri)
|
138
|
+
|
139
|
+
end
|
140
|
+
|
141
|
+
# +StyleParser+ finds and rewrites URLs in HTML style attributes.
|
142
|
+
#
|
143
|
+
# === Implementation note:
|
144
|
+
# This methods in this class identify URLs by using regular expressions based on the W3C CSS 2.1 Specification (http://www.w3.org/TR/CSS21/syndata.html).
|
145
|
+
class StyleParser
|
146
|
+
|
147
|
+
# Returns the URLs found in a style attribute.
|
148
|
+
#
|
149
|
+
# @param [String] content the style attribute.
|
150
|
+
# @return [Array] the unique URLs found in the content.
|
151
|
+
#
|
152
|
+
# @example Parse style attribute for URLs
|
153
|
+
# style = 'background: url(/images/rainbows.jpg);'
|
154
|
+
# ContentUrls::StyleParser.urls(style).each do |url|
|
155
|
+
# puts "Found URL: #{url}"
|
156
|
+
# end
|
157
|
+
# # => "Found URL: /images/rainbows.jpg"
|
158
|
+
#
|
159
|
+
def self.urls(style)
|
160
|
+
urls = []
|
161
|
+
rewrite_each_url(style) { |url| urls << url; url }
|
162
|
+
urls.uniq!
|
163
|
+
urls
|
164
|
+
end
|
165
|
+
|
166
|
+
# Rewrites each URL in an style attribute by calling the supplied block with each URL.
|
167
|
+
#
|
168
|
+
# @param [String] content the style attribute.
|
169
|
+
#
|
170
|
+
# @example Rewrite URLs in style attribute
|
171
|
+
# style = 'background: url(/images/rainbows.jpg);'
|
172
|
+
# style = ContentUrls::StyleParser.rewrite_each_url(style) {|url| url.sub(/rainbows.jpg/, 'unicorns.jpg')}
|
173
|
+
# puts "Rewritten: #{style}"
|
174
|
+
# # => "Rewritten: background: url(/images/unicorns.jpg);"
|
175
|
+
#
|
176
|
+
def self.rewrite_each_url(style, &block)
|
177
|
+
urls = {}
|
178
|
+
remaining = style
|
179
|
+
while !remaining.empty?
|
56
180
|
if match = @@regex_uri.match(remaining)
|
57
|
-
|
58
|
-
rewritten += match.pre_match
|
181
|
+
urls[match[:url]] = match[:uri]
|
59
182
|
remaining = match.post_match
|
60
|
-
replacement = yield url
|
61
|
-
rewritten += (replacement.nil? ? match[0] : match[0].sub(url, replacement))
|
62
183
|
else
|
63
|
-
rewritten += remaining
|
64
184
|
remaining = ''
|
65
185
|
end
|
66
186
|
end
|
67
|
-
|
187
|
+
rewritten_content = [{:content => style, :is_rewritten => false}]
|
188
|
+
urls.each do |property_value, url|
|
189
|
+
rewritten_url = yield url
|
190
|
+
if rewritten_url != url
|
191
|
+
rewritten_property_value = property_value.dup
|
192
|
+
rewritten_property_value[url] = rewritten_url
|
193
|
+
i = 0
|
194
|
+
while i < rewritten_content.count
|
195
|
+
if !rewritten_content[i][:is_rewritten]
|
196
|
+
if match = /#{Regexp.escape(property_value)}/.match(rewritten_content[i][:content])
|
197
|
+
if match.pre_match.length > 0
|
198
|
+
rewritten_content.insert(i, {:content => match.pre_match, :is_rewritten => false})
|
199
|
+
i += 1
|
200
|
+
end
|
201
|
+
rewritten_content[i] = {:content => rewritten_property_value, :is_rewritten => true}
|
202
|
+
if match.post_match.length > 0
|
203
|
+
rewritten_content.insert(i+1, {:content => match.post_match, :is_rewritten => false})
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
i += 1
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
rewritten_content.map { |c| c[:content]}.join
|
68
212
|
end
|
69
213
|
|
70
214
|
protected
|
@@ -84,10 +228,10 @@ class ContentUrls
|
|
84
228
|
@@escape = '(' + @@unicode + '|\\\\[^\n\r\f0-9a-f])'
|
85
229
|
|
86
230
|
# {string1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
87
|
-
@@string1 = '(\"(([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
|
231
|
+
@@string1 = '(\"(?<uri>([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
|
88
232
|
|
89
233
|
# {string2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\'
|
90
|
-
@@string2 = '(\\\'(([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
|
234
|
+
@@string2 = '(\\\'(?<uri>([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
|
91
235
|
|
92
236
|
# {string}: {string1}|{string2}
|
93
237
|
@@string = '(' + @@string1 + '|' + @@string2 + ')'
|
@@ -96,7 +240,7 @@ class ContentUrls
|
|
96
240
|
@@nonascii = '([^\x0-\x237])'
|
97
241
|
|
98
242
|
# {uri}: url\({w}{string}{w}\)|url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}\)
|
99
|
-
@@uri = '(((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
|
243
|
+
@@uri = '(?<url>((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(?<uri>([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
|
100
244
|
|
101
245
|
# {badstring1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
|
102
246
|
@@badstring1 = '(\"([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
|
@@ -123,4 +267,5 @@ class ContentUrls
|
|
123
267
|
@@regex_baduri = Regexp.new(@@baduri)
|
124
268
|
|
125
269
|
end
|
270
|
+
|
126
271
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'uri'
|
2
|
+
require 'rkelly'
|
2
3
|
|
3
4
|
class ContentUrls
|
4
5
|
|
@@ -21,7 +22,8 @@ class ContentUrls
|
|
21
22
|
# # => "Found URL: http://example.com/"
|
22
23
|
def self.urls(content)
|
23
24
|
urls = []
|
24
|
-
|
25
|
+
return urls if content.nil? || content.length == 0
|
26
|
+
rewrite_each_url(content) { |url| urls << url; url }
|
25
27
|
urls.uniq!
|
26
28
|
urls
|
27
29
|
end
|
@@ -37,26 +39,30 @@ class ContentUrls
|
|
37
39
|
# # => "Rewritten: var link="HTTP://EXAMPLE.COM/""
|
38
40
|
#
|
39
41
|
def self.rewrite_each_url(content, &block)
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
if
|
49
|
-
|
50
|
-
else
|
51
|
-
rewritten += replacement
|
42
|
+
rewritten_content = content.dup
|
43
|
+
rewrite_urls = {}
|
44
|
+
parser = RKelly::Parser.new
|
45
|
+
ast = parser.parse(content)
|
46
|
+
return content if ast.nil?
|
47
|
+
ast.each do |node|
|
48
|
+
if node.kind_of? RKelly::Nodes::StringNode
|
49
|
+
value = node.value
|
50
|
+
if match = /^'(.*)'$/.match(value)
|
51
|
+
value = match[1] # remove single quotes
|
52
52
|
end
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
53
|
+
if match = URI.regexp.match(value)
|
54
|
+
url = match.to_s
|
55
|
+
rewritten_url = yield url
|
56
|
+
rewrite_urls[url] = rewritten_url if url != rewritten_url
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
if rewrite_urls.count > 0
|
61
|
+
rewrite_urls.each do |url, rewritten_url|
|
62
|
+
rewritten_content[url] = rewritten_url
|
57
63
|
end
|
58
64
|
end
|
59
|
-
|
65
|
+
rewritten_content
|
60
66
|
end
|
61
67
|
|
62
68
|
end
|
data/lib/content_urls/version.rb
CHANGED
data/lib/content_urls.rb
CHANGED
@@ -146,6 +146,7 @@ class ContentUrls
|
|
146
146
|
|
147
147
|
require 'content_urls/parsers/css_parser'
|
148
148
|
register_parser ContentUrls::CssParser, %r{^(text/css)\b}
|
149
|
+
register_parser ContentUrls::StyleParser, %r{^(html-inline-style)\b}
|
149
150
|
|
150
151
|
require 'content_urls/parsers/java_script_parser'
|
151
152
|
register_parser ContentUrls::JavaScriptParser, %r{^(application/x-javascript)\b}, %r{^(application/javascript)\b}, %r{^(text/javascript)\b}
|
data/spec/content_urls_spec.rb
CHANGED
File without changes
|
data/spec/css_parser_spec.rb
CHANGED
@@ -31,4 +31,20 @@ describe ContentUrls::CssParser do
|
|
31
31
|
end
|
32
32
|
output.should eq %Q{Found URL: /images/rainbows.jpg\n}
|
33
33
|
end
|
34
|
+
|
35
|
+
it "should find and rewrite urls when css contains no spaces" do
|
36
|
+
output = ''
|
37
|
+
css = 'body{background:url(/images/rainbows.jpg)}'
|
38
|
+
css = ContentUrls::CssParser.rewrite_each_url(css) {|url| url.sub(/rainbows.jpg/, 'unicorns.jpg')}
|
39
|
+
output += "Rewritten: #{css}" + "\n"
|
40
|
+
output.should eq %Q{Rewritten: body{background:url(/images/unicorns.jpg)}\n}
|
41
|
+
end
|
42
|
+
it "should find urls when css contains no spaces" do
|
43
|
+
output = ''
|
44
|
+
css = 'body { background: url(/images/rainbows.jpg) }'
|
45
|
+
ContentUrls::CssParser.urls(css).each do |url|
|
46
|
+
output += "Found URL: #{url}" + "\n"
|
47
|
+
end
|
48
|
+
output.should eq %Q{Found URL: /images/rainbows.jpg\n}
|
49
|
+
end
|
34
50
|
end
|
data/spec/html_parser_spec.rb
CHANGED
File without changes
|
@@ -30,3 +30,21 @@ describe ContentUrls::JavaScriptParser do
|
|
30
30
|
output.should eq %Q{Found URL: http://example.com/\n}
|
31
31
|
end
|
32
32
|
end
|
33
|
+
|
34
|
+
describe ContentUrls::JavaScriptParser do
|
35
|
+
it "should correctly handle single quotes in rewrite_each_url method" do
|
36
|
+
output = ''
|
37
|
+
javascript = "var link='http://example.com/';"
|
38
|
+
javascript = ContentUrls::JavaScriptParser.rewrite_each_url(javascript) {|url| url.upcase}
|
39
|
+
output += "Rewritten: #{javascript}" + "\n"
|
40
|
+
output.should eq %Q{Rewritten: var link='HTTP://EXAMPLE.COM/';\n}
|
41
|
+
end
|
42
|
+
it "should correctly handle single quotes in urls method" do
|
43
|
+
output = ''
|
44
|
+
javascript = "var link='http://example.com/';"
|
45
|
+
ContentUrls::JavaScriptParser.urls(javascript).each do |url|
|
46
|
+
output += "Found URL: #{url}" + "\n"
|
47
|
+
end
|
48
|
+
output.should eq %Q{Found URL: http://example.com/\n}
|
49
|
+
end
|
50
|
+
end
|
data/spec/spec_helper.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: content_urls
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dennis Sutch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -24,6 +24,34 @@ dependencies:
|
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 1.5.10
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: css_parser
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.3.4
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.3.4
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rkelly
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.0.7
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.7
|
27
55
|
- !ruby/object:Gem::Dependency
|
28
56
|
name: rspec
|
29
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -154,7 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
182
|
version: '0'
|
155
183
|
requirements: []
|
156
184
|
rubyforge_project:
|
157
|
-
rubygems_version: 2.0.
|
185
|
+
rubygems_version: 2.0.6
|
158
186
|
signing_key:
|
159
187
|
specification_version: 4
|
160
188
|
summary: Find and rewrite URLs in different types of content.
|