content_urls 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MTEwNGE1ZjIwZTdjNDA1ZGJjODM0N2Q4ZWEyNmRlMGFiMDVjOTc4Yg==
4
+ OTA3MGM5MWE0ZTY5MTA0M2E2MTQyMTI5ZWEwODA0NzZiY2M1YTI5Yw==
5
5
  data.tar.gz: !binary |-
6
- NzVlMDU0ZDRlMDBiMTNkZDBmYzg5YTJiYjEwNjc1M2U4ZmQ1MDQ2Yg==
6
+ MGY3ZjAzZTQ0NGM5NTRiZDBiMTk3NWQ4Y2EzNWNkZGRjZjE2YjIwMw==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- NWEzNGVmZjFlOWVhZDdjOTNlN2IyNjgzNjA0OThhNWMwNzRjNDNhNjc4NTZi
10
- NjI0N2NjYzUwYTRkNjYzNmEzM2RiMmI3ZDZkM2NiYWUxZGNmNzc1NzU0ZjBh
11
- ZDY5ZTAxMjUzMWM3YTZiOTg3ZWVkMTE4MDRhYzY3MjI5ZTk3ZDA=
9
+ YTY2NzBlZTIzYjI1MGRlMmU1YTQyN2I2Y2E2ZDgyOGMyOTY0Y2Y0NTY2ZDMz
10
+ ZmQ0N2JlMTYyMDlmNDdlY2M4MmYwNjQ3MzkzYzZkNzllNjg5YzY0NGYwYzM1
11
+ OGM1NWQzN2JmYWQ3Yzg4ZWZmZmZlODY1ZmI1YzUzYzZjNWU5NDI=
12
12
  data.tar.gz: !binary |-
13
- MTczNGZhNDE1MDZhZWJkZTEwNjg1NDdlZDFlMzVjODRiMzg4NjE3ZTc0ZWI5
14
- NDdkNWQ2OWFhNmU3ODI0ZmM4NmEzNGM2MzIxZmVkODRmOGZiYWJlZTJkNDhl
15
- NjgzNDZjMzVhY2QwMDQ3MjIxYzk3OGZlOWVhNmFlZGM4ZGE5YmI=
13
+ Y2JhOTRhNGYzODRlNmQxNzEyYTc4NTA0YWJmZWRmYzNkOGNjYTMzMTU0MDc5
14
+ MjY1NmQ2OWQ3YjQ2ZDkyMmRlYTY2OGI2NzI0MTJhY2FlM2VmOGE5MzQyMTk0
15
+ NTYzMjc4OWI1NzljZjRiNjM1ZGI2NDg0YjcxZDZhOGNiNDZlZWI=
data/.document CHANGED
File without changes
data/.rspec CHANGED
File without changes
data/Gemfile CHANGED
@@ -1,6 +1,8 @@
1
1
  source "https://rubygems.org"
2
2
 
3
3
  gem "nokogiri", "~> 1.5.10"
4
+ gem "css_parser", "~> 1.3.4"
5
+ gem "rkelly", "~> 1.0.7"
4
6
 
5
7
  group :development do
6
8
  gem "rspec", "~> 2.14.1"
data/LICENSE.txt CHANGED
File without changes
data/README.rdoc CHANGED
@@ -43,13 +43,14 @@ Provide the HTML content, the content type, and a block to rewrite each URL's ex
43
43
 
44
44
  == Requirements
45
45
  * nokogiri
46
+ * css_parser
47
+ * rkelly
46
48
 
47
49
  == Development
48
50
  To test and develop this gem, additional requirements are:
49
51
  * bundler
50
52
  * jeweler
51
53
  * rake
52
- * rcov
53
54
  * rdoc
54
55
  * rspec
55
56
  * yard
data/Rakefile CHANGED
File without changes
data/content_urls.gemspec CHANGED
@@ -1,74 +1,80 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in rakefile, and run 'rake gemspec'
4
- # -*- encoding: utf-8 -*-
5
-
6
- Gem::Specification.new do |s|
7
- s.name = "content_urls"
8
- s.version = "0.1.7"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Dennis Sutch"]
12
- s.date = "2013-07-18"
13
- s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
14
- s.email = "dennis@sutch.com"
15
- s.extra_rdoc_files = [
16
- "LICENSE.txt",
17
- "README.rdoc"
18
- ]
19
- s.files = [
20
- ".document",
21
- ".rspec",
22
- "Gemfile",
23
- "LICENSE.txt",
24
- "README.rdoc",
25
- "Rakefile",
26
- "content_urls.gemspec",
27
- "lib/content_urls.rb",
28
- "lib/content_urls/parsers/css_parser.rb",
29
- "lib/content_urls/parsers/html_parser.rb",
30
- "lib/content_urls/parsers/java_script_parser.rb",
31
- "lib/content_urls/version.rb",
32
- "spec/content_urls_spec.rb",
33
- "spec/css_parser_spec.rb",
34
- "spec/html_parser_spec.rb",
35
- "spec/java_script_parser_spec.rb",
36
- "spec/spec_helper.rb"
37
- ]
38
- s.homepage = "http://github.com/sutch/content_urls"
39
- s.licenses = ["MIT"]
40
- s.require_paths = ["lib"]
41
- s.rubygems_version = "2.0.4"
42
- s.summary = "Find and rewrite URLs in different types of content."
43
-
44
- if s.respond_to? :specification_version then
45
- s.specification_version = 4
46
-
47
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
- s.add_runtime_dependency(%q<nokogiri>, ["~> 1.5.10"])
49
- s.add_development_dependency(%q<rspec>, ["~> 2.14.1"])
50
- s.add_development_dependency(%q<yard>, ["~> 0.7"])
51
- s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
52
- s.add_development_dependency(%q<bundler>, ["~> 1.3.5"])
53
- s.add_development_dependency(%q<jeweler>, ["~> 1.8.6"])
54
- s.add_development_dependency(%q<rake>, ["~> 10.1.0"])
55
- else
56
- s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
57
- s.add_dependency(%q<rspec>, ["~> 2.14.1"])
58
- s.add_dependency(%q<yard>, ["~> 0.7"])
59
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
60
- s.add_dependency(%q<bundler>, ["~> 1.3.5"])
61
- s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
62
- s.add_dependency(%q<rake>, ["~> 10.1.0"])
63
- end
64
- else
65
- s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
66
- s.add_dependency(%q<rspec>, ["~> 2.14.1"])
67
- s.add_dependency(%q<yard>, ["~> 0.7"])
68
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
69
- s.add_dependency(%q<bundler>, ["~> 1.3.5"])
70
- s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
71
- s.add_dependency(%q<rake>, ["~> 10.1.0"])
72
- end
73
- end
74
-
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "content_urls"
8
+ s.version = "0.1.8"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Dennis Sutch"]
12
+ s.date = "2013-08-01"
13
+ s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
14
+ s.email = "dennis@sutch.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "LICENSE.txt",
24
+ "README.rdoc",
25
+ "Rakefile",
26
+ "content_urls.gemspec",
27
+ "lib/content_urls.rb",
28
+ "lib/content_urls/parsers/css_parser.rb",
29
+ "lib/content_urls/parsers/html_parser.rb",
30
+ "lib/content_urls/parsers/java_script_parser.rb",
31
+ "lib/content_urls/version.rb",
32
+ "spec/content_urls_spec.rb",
33
+ "spec/css_parser_spec.rb",
34
+ "spec/html_parser_spec.rb",
35
+ "spec/java_script_parser_spec.rb",
36
+ "spec/spec_helper.rb"
37
+ ]
38
+ s.homepage = "http://github.com/sutch/content_urls"
39
+ s.licenses = ["MIT"]
40
+ s.require_paths = ["lib"]
41
+ s.rubygems_version = "2.0.6"
42
+ s.summary = "Find and rewrite URLs in different types of content."
43
+
44
+ if s.respond_to? :specification_version then
45
+ s.specification_version = 4
46
+
47
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
+ s.add_runtime_dependency(%q<nokogiri>, ["~> 1.5.10"])
49
+ s.add_runtime_dependency(%q<css_parser>, ["~> 1.3.4"])
50
+ s.add_runtime_dependency(%q<rkelly>, ["~> 1.0.7"])
51
+ s.add_development_dependency(%q<rspec>, ["~> 2.14.1"])
52
+ s.add_development_dependency(%q<yard>, ["~> 0.7"])
53
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
54
+ s.add_development_dependency(%q<bundler>, ["~> 1.3.5"])
55
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.6"])
56
+ s.add_development_dependency(%q<rake>, ["~> 10.1.0"])
57
+ else
58
+ s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
59
+ s.add_dependency(%q<css_parser>, ["~> 1.3.4"])
60
+ s.add_dependency(%q<rkelly>, ["~> 1.0.7"])
61
+ s.add_dependency(%q<rspec>, ["~> 2.14.1"])
62
+ s.add_dependency(%q<yard>, ["~> 0.7"])
63
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
64
+ s.add_dependency(%q<bundler>, ["~> 1.3.5"])
65
+ s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
66
+ s.add_dependency(%q<rake>, ["~> 10.1.0"])
67
+ end
68
+ else
69
+ s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
70
+ s.add_dependency(%q<css_parser>, ["~> 1.3.4"])
71
+ s.add_dependency(%q<rkelly>, ["~> 1.0.7"])
72
+ s.add_dependency(%q<rspec>, ["~> 2.14.1"])
73
+ s.add_dependency(%q<yard>, ["~> 0.7"])
74
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
75
+ s.add_dependency(%q<bundler>, ["~> 1.3.5"])
76
+ s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
77
+ s.add_dependency(%q<rake>, ["~> 10.1.0"])
78
+ end
79
+ end
80
+
@@ -1,3 +1,5 @@
1
+ require 'css_parser'
2
+
1
3
  class ContentUrls
2
4
 
3
5
  # +CssParser+ finds and rewrites URLs in CSS content.
@@ -17,25 +19,12 @@ class ContentUrls
17
19
  # puts "Found URL: #{url}"
18
20
  # end
19
21
  # # => "Found URL: /images/rainbows.jpg"
22
+ #
20
23
  def self.urls(content)
21
24
  urls = []
22
- remaining = content
23
- while ! remaining.empty?
24
- if @@regex_uri =~ remaining
25
- match = $1
26
- url = $7 || $14 || $23
27
- #if @@regex_baduri =~ match ## bad URL
28
- # remaining = remaining[Regexp.last_match.begin(0)+1..-1] # Use last_match from regex_uri test
29
- #else
30
- remaining = Regexp.last_match.post_match
31
- urls << url
32
- #end
33
- else
34
- remaining = ''
35
- end
36
- end
25
+ rewrite_each_url(content) { |url| urls << url; url }
37
26
  urls.uniq!
38
- urls
27
+ urls
39
28
  end
40
29
 
41
30
  # Rewrites each URL in the CSS content by calling the supplied block with each URL.
@@ -49,22 +38,177 @@ class ContentUrls
49
38
  # # => "Rewritten: body { background: url(/images/unicorns.jpg) }"
50
39
  #
51
40
  def self.rewrite_each_url(content, &block)
52
- done = false
53
- remaining = content
54
- rewritten = ''
55
- while ! remaining.empty?
41
+ urls = {}
42
+ parser = ::CssParser::Parser.new
43
+ parser.load_string!(content)
44
+ parser.each_selector do |selector|
45
+ parser[selector].each do |element|
46
+ remaining = element
47
+ while !remaining.empty?
48
+ if match = @@regex_uri.match(remaining)
49
+ urls[match[:url]] = match[:uri]
50
+ remaining = match.post_match
51
+ else
52
+ remaining = ''
53
+ end
54
+ end
55
+ end
56
+ end
57
+ rewritten_content = [{:content => content, :is_rewritten => false}]
58
+ urls.each do |property_value, url|
59
+ rewritten_url = yield url
60
+ if rewritten_url != url
61
+ rewritten_property_value = property_value.dup
62
+ rewritten_property_value[url] = rewritten_url
63
+ i = 0
64
+ while i < rewritten_content.count
65
+ if !rewritten_content[i][:is_rewritten]
66
+ if match = /#{Regexp.escape(property_value)}/.match(rewritten_content[i][:content])
67
+ if match.pre_match.length > 0
68
+ rewritten_content.insert(i, {:content => match.pre_match, :is_rewritten => false})
69
+ i += 1
70
+ end
71
+ rewritten_content[i] = {:content => rewritten_property_value, :is_rewritten => true}
72
+ if match.post_match.length > 0
73
+ rewritten_content.insert(i+1, {:content => match.post_match, :is_rewritten => false})
74
+ end
75
+ end
76
+ end
77
+ i += 1
78
+ end
79
+ end
80
+ end
81
+ rewritten_content.map { |c| c[:content]}.join
82
+ end
83
+
84
+ protected
85
+
86
+ # Regular expressions based on http://www.w3.org/TR/CSS21/syndata.html
87
+
88
+ # {w}: [ \t\r\n\f]*
89
+ @@w = '([ \t\r\n\f]*)'
90
+
91
+ # {nl}: \n|\r\n|\r|\f
92
+ @@nl = '(\n|\r\n|\r|\f)'
93
+
94
+ # {unicode}: \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
95
+ @@unicode = '(\\\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?)'
96
+
97
+ # {escape}: {unicode}|\\[^\n\r\f0-9a-f]
98
+ @@escape = '(' + @@unicode + '|\\\\[^\n\r\f0-9a-f])'
99
+
100
+ # {string1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
101
+ @@string1 = '(\"(?<uri>([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
102
+
103
+ # {string2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\'
104
+ @@string2 = '(\\\'(?<uri>([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
105
+
106
+ # {string}: {string1}|{string2}
107
+ @@string = '(' + @@string1 + '|' + @@string2 + ')'
108
+
109
+ # {nonascii}: [^\0-\237]
110
+ @@nonascii = '([^\x0-\x237])'
111
+
112
+ # {uri}: url\({w}{string}{w}\)|url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}\)
113
+ @@uri = '(?<url>((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(?<uri>([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
114
+
115
+ # {badstring1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
116
+ @@badstring1 = '(\"([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
117
+
118
+ # {badstring2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\\?
119
+ @@badstring2 = '(\\\'([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
120
+
121
+ # {badstring}: {badstring1}|{badstring2}
122
+ @@badstring = '(' + @@badstring1 + '|' + @@badstring2 + ')'
123
+
124
+ # {baduri1}: url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}
125
+ @@baduri1 = '(url\(' + @@w + '([!#$%&*-~]|' + @@nonascii + '|' + @@escape + ')*' + @@w + ')'
126
+
127
+ # {baduri2}: url\({w}{string}{w}
128
+ @@baduri2 = '(url\(' + @@w + @@string + @@w + ')'
129
+
130
+ # {baduri3}: url\({w}{badstring}
131
+ @@baduri3 = '(url\(' + @@w + @@badstring + ')'
132
+
133
+ # {baduri}: {baduri1}|{baduri2}|{baduri3}
134
+ @@baduri = '(' + @@baduri1 + '|' + @@baduri2 + '|' + @@baduri3 + ')'
135
+
136
+ @@regex_uri = Regexp.new(@@uri)
137
+ @@regex_baduri = Regexp.new(@@baduri)
138
+
139
+ end
140
+
141
+ # +StyleParser+ finds and rewrites URLs in HTML style attributes.
142
+ #
143
+ # === Implementation note:
144
+ # This methods in this class identify URLs by using regular expressions based on the W3C CSS 2.1 Specification (http://www.w3.org/TR/CSS21/syndata.html).
145
+ class StyleParser
146
+
147
+ # Returns the URLs found in a style attribute.
148
+ #
149
+ # @param [String] content the style attribute.
150
+ # @return [Array] the unique URLs found in the content.
151
+ #
152
+ # @example Parse style attribute for URLs
153
+ # style = 'background: url(/images/rainbows.jpg);'
154
+ # ContentUrls::StyleParser.urls(style).each do |url|
155
+ # puts "Found URL: #{url}"
156
+ # end
157
+ # # => "Found URL: /images/rainbows.jpg"
158
+ #
159
+ def self.urls(style)
160
+ urls = []
161
+ rewrite_each_url(style) { |url| urls << url; url }
162
+ urls.uniq!
163
+ urls
164
+ end
165
+
166
+ # Rewrites each URL in an style attribute by calling the supplied block with each URL.
167
+ #
168
+ # @param [String] content the style attribute.
169
+ #
170
+ # @example Rewrite URLs in style attribute
171
+ # style = 'background: url(/images/rainbows.jpg);'
172
+ # style = ContentUrls::StyleParser.rewrite_each_url(style) {|url| url.sub(/rainbows.jpg/, 'unicorns.jpg')}
173
+ # puts "Rewritten: #{style}"
174
+ # # => "Rewritten: background: url(/images/unicorns.jpg);"
175
+ #
176
+ def self.rewrite_each_url(style, &block)
177
+ urls = {}
178
+ remaining = style
179
+ while !remaining.empty?
56
180
  if match = @@regex_uri.match(remaining)
57
- url = match[7] || match[14] || match[23]
58
- rewritten += match.pre_match
181
+ urls[match[:url]] = match[:uri]
59
182
  remaining = match.post_match
60
- replacement = yield url
61
- rewritten += (replacement.nil? ? match[0] : match[0].sub(url, replacement))
62
183
  else
63
- rewritten += remaining
64
184
  remaining = ''
65
185
  end
66
186
  end
67
- return rewritten
187
+ rewritten_content = [{:content => style, :is_rewritten => false}]
188
+ urls.each do |property_value, url|
189
+ rewritten_url = yield url
190
+ if rewritten_url != url
191
+ rewritten_property_value = property_value.dup
192
+ rewritten_property_value[url] = rewritten_url
193
+ i = 0
194
+ while i < rewritten_content.count
195
+ if !rewritten_content[i][:is_rewritten]
196
+ if match = /#{Regexp.escape(property_value)}/.match(rewritten_content[i][:content])
197
+ if match.pre_match.length > 0
198
+ rewritten_content.insert(i, {:content => match.pre_match, :is_rewritten => false})
199
+ i += 1
200
+ end
201
+ rewritten_content[i] = {:content => rewritten_property_value, :is_rewritten => true}
202
+ if match.post_match.length > 0
203
+ rewritten_content.insert(i+1, {:content => match.post_match, :is_rewritten => false})
204
+ end
205
+ end
206
+ end
207
+ i += 1
208
+ end
209
+ end
210
+ end
211
+ rewritten_content.map { |c| c[:content]}.join
68
212
  end
69
213
 
70
214
  protected
@@ -84,10 +228,10 @@ class ContentUrls
84
228
  @@escape = '(' + @@unicode + '|\\\\[^\n\r\f0-9a-f])'
85
229
 
86
230
  # {string1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
87
- @@string1 = '(\"(([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
231
+ @@string1 = '(\"(?<uri>([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
88
232
 
89
233
  # {string2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\'
90
- @@string2 = '(\\\'(([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
234
+ @@string2 = '(\\\'(?<uri>([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
91
235
 
92
236
  # {string}: {string1}|{string2}
93
237
  @@string = '(' + @@string1 + '|' + @@string2 + ')'
@@ -96,7 +240,7 @@ class ContentUrls
96
240
  @@nonascii = '([^\x0-\x237])'
97
241
 
98
242
  # {uri}: url\({w}{string}{w}\)|url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}\)
99
- @@uri = '(((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
243
+ @@uri = '(?<url>((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(?<uri>([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
100
244
 
101
245
  # {badstring1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
102
246
  @@badstring1 = '(\"([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
@@ -123,4 +267,5 @@ class ContentUrls
123
267
  @@regex_baduri = Regexp.new(@@baduri)
124
268
 
125
269
  end
270
+
126
271
  end
@@ -152,7 +152,7 @@ class ContentUrls
152
152
  style_attribute: {
153
153
  xpath: "//*[@style]",
154
154
  attribute: 'style',
155
- parser: 'text/css'
155
+ parser: 'html-inline-style'
156
156
  },
157
157
  style_tag: {
158
158
  xpath: "//style",
@@ -1,4 +1,5 @@
1
1
  require 'uri'
2
+ require 'rkelly'
2
3
 
3
4
  class ContentUrls
4
5
 
@@ -21,7 +22,8 @@ class ContentUrls
21
22
  # # => "Found URL: http://example.com/"
22
23
  def self.urls(content)
23
24
  urls = []
24
- URI.extract(content).each { |u| urls << u }
25
+ return urls if content.nil? || content.length == 0
26
+ rewrite_each_url(content) { |url| urls << url; url }
25
27
  urls.uniq!
26
28
  urls
27
29
  end
@@ -37,26 +39,30 @@ class ContentUrls
37
39
  # # => "Rewritten: var link="HTTP://EXAMPLE.COM/""
38
40
  #
39
41
  def self.rewrite_each_url(content, &block)
40
- done = false
41
- remaining = content
42
- rewritten = ''
43
- while ! remaining.empty?
44
- if match = URI.regexp.match(remaining)
45
- url = match.to_s
46
- rewritten += match.pre_match
47
- replacement = url.nil? ? nil : (yield url)
48
- if replacement.nil? or replacement == url # no change in URL
49
- rewritten += url
50
- else
51
- rewritten += replacement
42
+ rewritten_content = content.dup
43
+ rewrite_urls = {}
44
+ parser = RKelly::Parser.new
45
+ ast = parser.parse(content)
46
+ return content if ast.nil?
47
+ ast.each do |node|
48
+ if node.kind_of? RKelly::Nodes::StringNode
49
+ value = node.value
50
+ if match = /^'(.*)'$/.match(value)
51
+ value = match[1] # remove single quotes
52
52
  end
53
- remaining = match.post_match
54
- else
55
- rewritten += remaining
56
- remaining = ''
53
+ if match = URI.regexp.match(value)
54
+ url = match.to_s
55
+ rewritten_url = yield url
56
+ rewrite_urls[url] = rewritten_url if url != rewritten_url
57
+ end
58
+ end
59
+ end
60
+ if rewrite_urls.count > 0
61
+ rewrite_urls.each do |url, rewritten_url|
62
+ rewritten_content[url] = rewritten_url
57
63
  end
58
64
  end
59
- return rewritten
65
+ rewritten_content
60
66
  end
61
67
 
62
68
  end
@@ -2,7 +2,7 @@ class ContentUrls
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- PATCH = 7
5
+ PATCH = 8
6
6
  BUILD = nil
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
data/lib/content_urls.rb CHANGED
@@ -146,6 +146,7 @@ class ContentUrls
146
146
 
147
147
  require 'content_urls/parsers/css_parser'
148
148
  register_parser ContentUrls::CssParser, %r{^(text/css)\b}
149
+ register_parser ContentUrls::StyleParser, %r{^(html-inline-style)\b}
149
150
 
150
151
  require 'content_urls/parsers/java_script_parser'
151
152
  register_parser ContentUrls::JavaScriptParser, %r{^(application/x-javascript)\b}, %r{^(application/javascript)\b}, %r{^(text/javascript)\b}
File without changes
@@ -31,4 +31,20 @@ describe ContentUrls::CssParser do
31
31
  end
32
32
  output.should eq %Q{Found URL: /images/rainbows.jpg\n}
33
33
  end
34
+
35
+ it "should find and rewrite urls when css contains no spaces" do
36
+ output = ''
37
+ css = 'body{background:url(/images/rainbows.jpg)}'
38
+ css = ContentUrls::CssParser.rewrite_each_url(css) {|url| url.sub(/rainbows.jpg/, 'unicorns.jpg')}
39
+ output += "Rewritten: #{css}" + "\n"
40
+ output.should eq %Q{Rewritten: body{background:url(/images/unicorns.jpg)}\n}
41
+ end
42
+ it "should find urls when css contains no spaces" do
43
+ output = ''
44
+ css = 'body { background: url(/images/rainbows.jpg) }'
45
+ ContentUrls::CssParser.urls(css).each do |url|
46
+ output += "Found URL: #{url}" + "\n"
47
+ end
48
+ output.should eq %Q{Found URL: /images/rainbows.jpg\n}
49
+ end
34
50
  end
File without changes
@@ -30,3 +30,21 @@ describe ContentUrls::JavaScriptParser do
30
30
  output.should eq %Q{Found URL: http://example.com/\n}
31
31
  end
32
32
  end
33
+
34
+ describe ContentUrls::JavaScriptParser do
35
+ it "should correctly handle single quotes in rewrite_each_url method" do
36
+ output = ''
37
+ javascript = "var link='http://example.com/';"
38
+ javascript = ContentUrls::JavaScriptParser.rewrite_each_url(javascript) {|url| url.upcase}
39
+ output += "Rewritten: #{javascript}" + "\n"
40
+ output.should eq %Q{Rewritten: var link='HTTP://EXAMPLE.COM/';\n}
41
+ end
42
+ it "should correctly handle single quotes in urls method" do
43
+ output = ''
44
+ javascript = "var link='http://example.com/';"
45
+ ContentUrls::JavaScriptParser.urls(javascript).each do |url|
46
+ output += "Found URL: #{url}" + "\n"
47
+ end
48
+ output.should eq %Q{Found URL: http://example.com/\n}
49
+ end
50
+ end
data/spec/spec_helper.rb CHANGED
File without changes
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: content_urls
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dennis Sutch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-18 00:00:00.000000000 Z
11
+ date: 2013-08-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -24,6 +24,34 @@ dependencies:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.5.10
27
+ - !ruby/object:Gem::Dependency
28
+ name: css_parser
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 1.3.4
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 1.3.4
41
+ - !ruby/object:Gem::Dependency
42
+ name: rkelly
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 1.0.7
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.7
27
55
  - !ruby/object:Gem::Dependency
28
56
  name: rspec
29
57
  requirement: !ruby/object:Gem::Requirement
@@ -154,7 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
154
182
  version: '0'
155
183
  requirements: []
156
184
  rubyforge_project:
157
- rubygems_version: 2.0.4
185
+ rubygems_version: 2.0.6
158
186
  signing_key:
159
187
  specification_version: 4
160
188
  summary: Find and rewrite URLs in different types of content.