content_urls 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MTEwNGE1ZjIwZTdjNDA1ZGJjODM0N2Q4ZWEyNmRlMGFiMDVjOTc4Yg==
4
+ OTA3MGM5MWE0ZTY5MTA0M2E2MTQyMTI5ZWEwODA0NzZiY2M1YTI5Yw==
5
5
  data.tar.gz: !binary |-
6
- NzVlMDU0ZDRlMDBiMTNkZDBmYzg5YTJiYjEwNjc1M2U4ZmQ1MDQ2Yg==
6
+ MGY3ZjAzZTQ0NGM5NTRiZDBiMTk3NWQ4Y2EzNWNkZGRjZjE2YjIwMw==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- NWEzNGVmZjFlOWVhZDdjOTNlN2IyNjgzNjA0OThhNWMwNzRjNDNhNjc4NTZi
10
- NjI0N2NjYzUwYTRkNjYzNmEzM2RiMmI3ZDZkM2NiYWUxZGNmNzc1NzU0ZjBh
11
- ZDY5ZTAxMjUzMWM3YTZiOTg3ZWVkMTE4MDRhYzY3MjI5ZTk3ZDA=
9
+ YTY2NzBlZTIzYjI1MGRlMmU1YTQyN2I2Y2E2ZDgyOGMyOTY0Y2Y0NTY2ZDMz
10
+ ZmQ0N2JlMTYyMDlmNDdlY2M4MmYwNjQ3MzkzYzZkNzllNjg5YzY0NGYwYzM1
11
+ OGM1NWQzN2JmYWQ3Yzg4ZWZmZmZlODY1ZmI1YzUzYzZjNWU5NDI=
12
12
  data.tar.gz: !binary |-
13
- MTczNGZhNDE1MDZhZWJkZTEwNjg1NDdlZDFlMzVjODRiMzg4NjE3ZTc0ZWI5
14
- NDdkNWQ2OWFhNmU3ODI0ZmM4NmEzNGM2MzIxZmVkODRmOGZiYWJlZTJkNDhl
15
- NjgzNDZjMzVhY2QwMDQ3MjIxYzk3OGZlOWVhNmFlZGM4ZGE5YmI=
13
+ Y2JhOTRhNGYzODRlNmQxNzEyYTc4NTA0YWJmZWRmYzNkOGNjYTMzMTU0MDc5
14
+ MjY1NmQ2OWQ3YjQ2ZDkyMmRlYTY2OGI2NzI0MTJhY2FlM2VmOGE5MzQyMTk0
15
+ NTYzMjc4OWI1NzljZjRiNjM1ZGI2NDg0YjcxZDZhOGNiNDZlZWI=
data/.document CHANGED
File without changes
data/.rspec CHANGED
File without changes
data/Gemfile CHANGED
@@ -1,6 +1,8 @@
1
1
  source "https://rubygems.org"
2
2
 
3
3
  gem "nokogiri", "~> 1.5.10"
4
+ gem "css_parser", "~> 1.3.4"
5
+ gem "rkelly", "~> 1.0.7"
4
6
 
5
7
  group :development do
6
8
  gem "rspec", "~> 2.14.1"
data/LICENSE.txt CHANGED
File without changes
data/README.rdoc CHANGED
@@ -43,13 +43,14 @@ Provide the HTML content, the content type, and a block to rewrite each URL's ex
43
43
 
44
44
  == Requirements
45
45
  * nokogiri
46
+ * css_parser
47
+ * rkelly
46
48
 
47
49
  == Development
48
50
  To test and develop this gem, additional requirements are:
49
51
  * bundler
50
52
  * jeweler
51
53
  * rake
52
- * rcov
53
54
  * rdoc
54
55
  * rspec
55
56
  * yard
data/Rakefile CHANGED
File without changes
data/content_urls.gemspec CHANGED
@@ -1,74 +1,80 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in rakefile, and run 'rake gemspec'
4
- # -*- encoding: utf-8 -*-
5
-
6
- Gem::Specification.new do |s|
7
- s.name = "content_urls"
8
- s.version = "0.1.7"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Dennis Sutch"]
12
- s.date = "2013-07-18"
13
- s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
14
- s.email = "dennis@sutch.com"
15
- s.extra_rdoc_files = [
16
- "LICENSE.txt",
17
- "README.rdoc"
18
- ]
19
- s.files = [
20
- ".document",
21
- ".rspec",
22
- "Gemfile",
23
- "LICENSE.txt",
24
- "README.rdoc",
25
- "Rakefile",
26
- "content_urls.gemspec",
27
- "lib/content_urls.rb",
28
- "lib/content_urls/parsers/css_parser.rb",
29
- "lib/content_urls/parsers/html_parser.rb",
30
- "lib/content_urls/parsers/java_script_parser.rb",
31
- "lib/content_urls/version.rb",
32
- "spec/content_urls_spec.rb",
33
- "spec/css_parser_spec.rb",
34
- "spec/html_parser_spec.rb",
35
- "spec/java_script_parser_spec.rb",
36
- "spec/spec_helper.rb"
37
- ]
38
- s.homepage = "http://github.com/sutch/content_urls"
39
- s.licenses = ["MIT"]
40
- s.require_paths = ["lib"]
41
- s.rubygems_version = "2.0.4"
42
- s.summary = "Find and rewrite URLs in different types of content."
43
-
44
- if s.respond_to? :specification_version then
45
- s.specification_version = 4
46
-
47
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
- s.add_runtime_dependency(%q<nokogiri>, ["~> 1.5.10"])
49
- s.add_development_dependency(%q<rspec>, ["~> 2.14.1"])
50
- s.add_development_dependency(%q<yard>, ["~> 0.7"])
51
- s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
52
- s.add_development_dependency(%q<bundler>, ["~> 1.3.5"])
53
- s.add_development_dependency(%q<jeweler>, ["~> 1.8.6"])
54
- s.add_development_dependency(%q<rake>, ["~> 10.1.0"])
55
- else
56
- s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
57
- s.add_dependency(%q<rspec>, ["~> 2.14.1"])
58
- s.add_dependency(%q<yard>, ["~> 0.7"])
59
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
60
- s.add_dependency(%q<bundler>, ["~> 1.3.5"])
61
- s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
62
- s.add_dependency(%q<rake>, ["~> 10.1.0"])
63
- end
64
- else
65
- s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
66
- s.add_dependency(%q<rspec>, ["~> 2.14.1"])
67
- s.add_dependency(%q<yard>, ["~> 0.7"])
68
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
69
- s.add_dependency(%q<bundler>, ["~> 1.3.5"])
70
- s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
71
- s.add_dependency(%q<rake>, ["~> 10.1.0"])
72
- end
73
- end
74
-
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "content_urls"
8
+ s.version = "0.1.8"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Dennis Sutch"]
12
+ s.date = "2013-08-01"
13
+ s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
14
+ s.email = "dennis@sutch.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "LICENSE.txt",
24
+ "README.rdoc",
25
+ "Rakefile",
26
+ "content_urls.gemspec",
27
+ "lib/content_urls.rb",
28
+ "lib/content_urls/parsers/css_parser.rb",
29
+ "lib/content_urls/parsers/html_parser.rb",
30
+ "lib/content_urls/parsers/java_script_parser.rb",
31
+ "lib/content_urls/version.rb",
32
+ "spec/content_urls_spec.rb",
33
+ "spec/css_parser_spec.rb",
34
+ "spec/html_parser_spec.rb",
35
+ "spec/java_script_parser_spec.rb",
36
+ "spec/spec_helper.rb"
37
+ ]
38
+ s.homepage = "http://github.com/sutch/content_urls"
39
+ s.licenses = ["MIT"]
40
+ s.require_paths = ["lib"]
41
+ s.rubygems_version = "2.0.6"
42
+ s.summary = "Find and rewrite URLs in different types of content."
43
+
44
+ if s.respond_to? :specification_version then
45
+ s.specification_version = 4
46
+
47
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
+ s.add_runtime_dependency(%q<nokogiri>, ["~> 1.5.10"])
49
+ s.add_runtime_dependency(%q<css_parser>, ["~> 1.3.4"])
50
+ s.add_runtime_dependency(%q<rkelly>, ["~> 1.0.7"])
51
+ s.add_development_dependency(%q<rspec>, ["~> 2.14.1"])
52
+ s.add_development_dependency(%q<yard>, ["~> 0.7"])
53
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
54
+ s.add_development_dependency(%q<bundler>, ["~> 1.3.5"])
55
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.6"])
56
+ s.add_development_dependency(%q<rake>, ["~> 10.1.0"])
57
+ else
58
+ s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
59
+ s.add_dependency(%q<css_parser>, ["~> 1.3.4"])
60
+ s.add_dependency(%q<rkelly>, ["~> 1.0.7"])
61
+ s.add_dependency(%q<rspec>, ["~> 2.14.1"])
62
+ s.add_dependency(%q<yard>, ["~> 0.7"])
63
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
64
+ s.add_dependency(%q<bundler>, ["~> 1.3.5"])
65
+ s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
66
+ s.add_dependency(%q<rake>, ["~> 10.1.0"])
67
+ end
68
+ else
69
+ s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
70
+ s.add_dependency(%q<css_parser>, ["~> 1.3.4"])
71
+ s.add_dependency(%q<rkelly>, ["~> 1.0.7"])
72
+ s.add_dependency(%q<rspec>, ["~> 2.14.1"])
73
+ s.add_dependency(%q<yard>, ["~> 0.7"])
74
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
75
+ s.add_dependency(%q<bundler>, ["~> 1.3.5"])
76
+ s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
77
+ s.add_dependency(%q<rake>, ["~> 10.1.0"])
78
+ end
79
+ end
80
+
@@ -1,3 +1,5 @@
1
+ require 'css_parser'
2
+
1
3
  class ContentUrls
2
4
 
3
5
  # +CssParser+ finds and rewrites URLs in CSS content.
@@ -17,25 +19,12 @@ class ContentUrls
17
19
  # puts "Found URL: #{url}"
18
20
  # end
19
21
  # # => "Found URL: /images/rainbows.jpg"
22
+ #
20
23
  def self.urls(content)
21
24
  urls = []
22
- remaining = content
23
- while ! remaining.empty?
24
- if @@regex_uri =~ remaining
25
- match = $1
26
- url = $7 || $14 || $23
27
- #if @@regex_baduri =~ match ## bad URL
28
- # remaining = remaining[Regexp.last_match.begin(0)+1..-1] # Use last_match from regex_uri test
29
- #else
30
- remaining = Regexp.last_match.post_match
31
- urls << url
32
- #end
33
- else
34
- remaining = ''
35
- end
36
- end
25
+ rewrite_each_url(content) { |url| urls << url; url }
37
26
  urls.uniq!
38
- urls
27
+ urls
39
28
  end
40
29
 
41
30
  # Rewrites each URL in the CSS content by calling the supplied block with each URL.
@@ -49,22 +38,177 @@ class ContentUrls
49
38
  # # => "Rewritten: body { background: url(/images/unicorns.jpg) }"
50
39
  #
51
40
  def self.rewrite_each_url(content, &block)
52
- done = false
53
- remaining = content
54
- rewritten = ''
55
- while ! remaining.empty?
41
+ urls = {}
42
+ parser = ::CssParser::Parser.new
43
+ parser.load_string!(content)
44
+ parser.each_selector do |selector|
45
+ parser[selector].each do |element|
46
+ remaining = element
47
+ while !remaining.empty?
48
+ if match = @@regex_uri.match(remaining)
49
+ urls[match[:url]] = match[:uri]
50
+ remaining = match.post_match
51
+ else
52
+ remaining = ''
53
+ end
54
+ end
55
+ end
56
+ end
57
+ rewritten_content = [{:content => content, :is_rewritten => false}]
58
+ urls.each do |property_value, url|
59
+ rewritten_url = yield url
60
+ if rewritten_url != url
61
+ rewritten_property_value = property_value.dup
62
+ rewritten_property_value[url] = rewritten_url
63
+ i = 0
64
+ while i < rewritten_content.count
65
+ if !rewritten_content[i][:is_rewritten]
66
+ if match = /#{Regexp.escape(property_value)}/.match(rewritten_content[i][:content])
67
+ if match.pre_match.length > 0
68
+ rewritten_content.insert(i, {:content => match.pre_match, :is_rewritten => false})
69
+ i += 1
70
+ end
71
+ rewritten_content[i] = {:content => rewritten_property_value, :is_rewritten => true}
72
+ if match.post_match.length > 0
73
+ rewritten_content.insert(i+1, {:content => match.post_match, :is_rewritten => false})
74
+ end
75
+ end
76
+ end
77
+ i += 1
78
+ end
79
+ end
80
+ end
81
+ rewritten_content.map { |c| c[:content]}.join
82
+ end
83
+
84
+ protected
85
+
86
+ # Regular expressions based on http://www.w3.org/TR/CSS21/syndata.html
87
+
88
+ # {w}: [ \t\r\n\f]*
89
+ @@w = '([ \t\r\n\f]*)'
90
+
91
+ # {nl}: \n|\r\n|\r|\f
92
+ @@nl = '(\n|\r\n|\r|\f)'
93
+
94
+ # {unicode}: \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
95
+ @@unicode = '(\\\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?)'
96
+
97
+ # {escape}: {unicode}|\\[^\n\r\f0-9a-f]
98
+ @@escape = '(' + @@unicode + '|\\\\[^\n\r\f0-9a-f])'
99
+
100
+ # {string1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
101
+ @@string1 = '(\"(?<uri>([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
102
+
103
+ # {string2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\'
104
+ @@string2 = '(\\\'(?<uri>([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
105
+
106
+ # {string}: {string1}|{string2}
107
+ @@string = '(' + @@string1 + '|' + @@string2 + ')'
108
+
109
+ # {nonascii}: [^\0-\237]
110
+ @@nonascii = '([^\x0-\x237])'
111
+
112
+ # {uri}: url\({w}{string}{w}\)|url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}\)
113
+ @@uri = '(?<url>((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(?<uri>([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
114
+
115
+ # {badstring1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
116
+ @@badstring1 = '(\"([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
117
+
118
+ # {badstring2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\\?
119
+ @@badstring2 = '(\\\'([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
120
+
121
+ # {badstring}: {badstring1}|{badstring2}
122
+ @@badstring = '(' + @@badstring1 + '|' + @@badstring2 + ')'
123
+
124
+ # {baduri1}: url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}
125
+ @@baduri1 = '(url\(' + @@w + '([!#$%&*-~]|' + @@nonascii + '|' + @@escape + ')*' + @@w + ')'
126
+
127
+ # {baduri2}: url\({w}{string}{w}
128
+ @@baduri2 = '(url\(' + @@w + @@string + @@w + ')'
129
+
130
+ # {baduri3}: url\({w}{badstring}
131
+ @@baduri3 = '(url\(' + @@w + @@badstring + ')'
132
+
133
+ # {baduri}: {baduri1}|{baduri2}|{baduri3}
134
+ @@baduri = '(' + @@baduri1 + '|' + @@baduri2 + '|' + @@baduri3 + ')'
135
+
136
+ @@regex_uri = Regexp.new(@@uri)
137
+ @@regex_baduri = Regexp.new(@@baduri)
138
+
139
+ end
140
+
141
+ # +StyleParser+ finds and rewrites URLs in HTML style attributes.
142
+ #
143
+ # === Implementation note:
144
+ # This methods in this class identify URLs by using regular expressions based on the W3C CSS 2.1 Specification (http://www.w3.org/TR/CSS21/syndata.html).
145
+ class StyleParser
146
+
147
+ # Returns the URLs found in a style attribute.
148
+ #
149
+ # @param [String] content the style attribute.
150
+ # @return [Array] the unique URLs found in the content.
151
+ #
152
+ # @example Parse style attribute for URLs
153
+ # style = 'background: url(/images/rainbows.jpg);'
154
+ # ContentUrls::StyleParser.urls(style).each do |url|
155
+ # puts "Found URL: #{url}"
156
+ # end
157
+ # # => "Found URL: /images/rainbows.jpg"
158
+ #
159
+ def self.urls(style)
160
+ urls = []
161
+ rewrite_each_url(style) { |url| urls << url; url }
162
+ urls.uniq!
163
+ urls
164
+ end
165
+
166
+ # Rewrites each URL in an style attribute by calling the supplied block with each URL.
167
+ #
168
+ # @param [String] content the style attribute.
169
+ #
170
+ # @example Rewrite URLs in style attribute
171
+ # style = 'background: url(/images/rainbows.jpg);'
172
+ # style = ContentUrls::StyleParser.rewrite_each_url(style) {|url| url.sub(/rainbows.jpg/, 'unicorns.jpg')}
173
+ # puts "Rewritten: #{style}"
174
+ # # => "Rewritten: background: url(/images/unicorns.jpg);"
175
+ #
176
+ def self.rewrite_each_url(style, &block)
177
+ urls = {}
178
+ remaining = style
179
+ while !remaining.empty?
56
180
  if match = @@regex_uri.match(remaining)
57
- url = match[7] || match[14] || match[23]
58
- rewritten += match.pre_match
181
+ urls[match[:url]] = match[:uri]
59
182
  remaining = match.post_match
60
- replacement = yield url
61
- rewritten += (replacement.nil? ? match[0] : match[0].sub(url, replacement))
62
183
  else
63
- rewritten += remaining
64
184
  remaining = ''
65
185
  end
66
186
  end
67
- return rewritten
187
+ rewritten_content = [{:content => style, :is_rewritten => false}]
188
+ urls.each do |property_value, url|
189
+ rewritten_url = yield url
190
+ if rewritten_url != url
191
+ rewritten_property_value = property_value.dup
192
+ rewritten_property_value[url] = rewritten_url
193
+ i = 0
194
+ while i < rewritten_content.count
195
+ if !rewritten_content[i][:is_rewritten]
196
+ if match = /#{Regexp.escape(property_value)}/.match(rewritten_content[i][:content])
197
+ if match.pre_match.length > 0
198
+ rewritten_content.insert(i, {:content => match.pre_match, :is_rewritten => false})
199
+ i += 1
200
+ end
201
+ rewritten_content[i] = {:content => rewritten_property_value, :is_rewritten => true}
202
+ if match.post_match.length > 0
203
+ rewritten_content.insert(i+1, {:content => match.post_match, :is_rewritten => false})
204
+ end
205
+ end
206
+ end
207
+ i += 1
208
+ end
209
+ end
210
+ end
211
+ rewritten_content.map { |c| c[:content]}.join
68
212
  end
69
213
 
70
214
  protected
@@ -84,10 +228,10 @@ class ContentUrls
84
228
  @@escape = '(' + @@unicode + '|\\\\[^\n\r\f0-9a-f])'
85
229
 
86
230
  # {string1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
87
- @@string1 = '(\"(([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
231
+ @@string1 = '(\"(?<uri>([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
88
232
 
89
233
  # {string2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\'
90
- @@string2 = '(\\\'(([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
234
+ @@string2 = '(\\\'(?<uri>([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
91
235
 
92
236
  # {string}: {string1}|{string2}
93
237
  @@string = '(' + @@string1 + '|' + @@string2 + ')'
@@ -96,7 +240,7 @@ class ContentUrls
96
240
  @@nonascii = '([^\x0-\x237])'
97
241
 
98
242
  # {uri}: url\({w}{string}{w}\)|url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}\)
99
- @@uri = '(((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
243
+ @@uri = '(?<url>((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(?<uri>([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
100
244
 
101
245
  # {badstring1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
102
246
  @@badstring1 = '(\"([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
@@ -123,4 +267,5 @@ class ContentUrls
123
267
  @@regex_baduri = Regexp.new(@@baduri)
124
268
 
125
269
  end
270
+
126
271
  end
@@ -152,7 +152,7 @@ class ContentUrls
152
152
  style_attribute: {
153
153
  xpath: "//*[@style]",
154
154
  attribute: 'style',
155
- parser: 'text/css'
155
+ parser: 'html-inline-style'
156
156
  },
157
157
  style_tag: {
158
158
  xpath: "//style",
@@ -1,4 +1,5 @@
1
1
  require 'uri'
2
+ require 'rkelly'
2
3
 
3
4
  class ContentUrls
4
5
 
@@ -21,7 +22,8 @@ class ContentUrls
21
22
  # # => "Found URL: http://example.com/"
22
23
  def self.urls(content)
23
24
  urls = []
24
- URI.extract(content).each { |u| urls << u }
25
+ return urls if content.nil? || content.length == 0
26
+ rewrite_each_url(content) { |url| urls << url; url }
25
27
  urls.uniq!
26
28
  urls
27
29
  end
@@ -37,26 +39,30 @@ class ContentUrls
37
39
  # # => "Rewritten: var link="HTTP://EXAMPLE.COM/""
38
40
  #
39
41
  def self.rewrite_each_url(content, &block)
40
- done = false
41
- remaining = content
42
- rewritten = ''
43
- while ! remaining.empty?
44
- if match = URI.regexp.match(remaining)
45
- url = match.to_s
46
- rewritten += match.pre_match
47
- replacement = url.nil? ? nil : (yield url)
48
- if replacement.nil? or replacement == url # no change in URL
49
- rewritten += url
50
- else
51
- rewritten += replacement
42
+ rewritten_content = content.dup
43
+ rewrite_urls = {}
44
+ parser = RKelly::Parser.new
45
+ ast = parser.parse(content)
46
+ return content if ast.nil?
47
+ ast.each do |node|
48
+ if node.kind_of? RKelly::Nodes::StringNode
49
+ value = node.value
50
+ if match = /^'(.*)'$/.match(value)
51
+ value = match[1] # remove single quotes
52
52
  end
53
- remaining = match.post_match
54
- else
55
- rewritten += remaining
56
- remaining = ''
53
+ if match = URI.regexp.match(value)
54
+ url = match.to_s
55
+ rewritten_url = yield url
56
+ rewrite_urls[url] = rewritten_url if url != rewritten_url
57
+ end
58
+ end
59
+ end
60
+ if rewrite_urls.count > 0
61
+ rewrite_urls.each do |url, rewritten_url|
62
+ rewritten_content[url] = rewritten_url
57
63
  end
58
64
  end
59
- return rewritten
65
+ rewritten_content
60
66
  end
61
67
 
62
68
  end
@@ -2,7 +2,7 @@ class ContentUrls
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- PATCH = 7
5
+ PATCH = 8
6
6
  BUILD = nil
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
data/lib/content_urls.rb CHANGED
@@ -146,6 +146,7 @@ class ContentUrls
146
146
 
147
147
  require 'content_urls/parsers/css_parser'
148
148
  register_parser ContentUrls::CssParser, %r{^(text/css)\b}
149
+ register_parser ContentUrls::StyleParser, %r{^(html-inline-style)\b}
149
150
 
150
151
  require 'content_urls/parsers/java_script_parser'
151
152
  register_parser ContentUrls::JavaScriptParser, %r{^(application/x-javascript)\b}, %r{^(application/javascript)\b}, %r{^(text/javascript)\b}
File without changes
@@ -31,4 +31,20 @@ describe ContentUrls::CssParser do
31
31
  end
32
32
  output.should eq %Q{Found URL: /images/rainbows.jpg\n}
33
33
  end
34
+
35
+ it "should find and rewrite urls when css contains no spaces" do
36
+ output = ''
37
+ css = 'body{background:url(/images/rainbows.jpg)}'
38
+ css = ContentUrls::CssParser.rewrite_each_url(css) {|url| url.sub(/rainbows.jpg/, 'unicorns.jpg')}
39
+ output += "Rewritten: #{css}" + "\n"
40
+ output.should eq %Q{Rewritten: body{background:url(/images/unicorns.jpg)}\n}
41
+ end
42
+ it "should find urls when css contains no spaces" do
43
+ output = ''
44
+ css = 'body { background: url(/images/rainbows.jpg) }'
45
+ ContentUrls::CssParser.urls(css).each do |url|
46
+ output += "Found URL: #{url}" + "\n"
47
+ end
48
+ output.should eq %Q{Found URL: /images/rainbows.jpg\n}
49
+ end
34
50
  end
File without changes
@@ -30,3 +30,21 @@ describe ContentUrls::JavaScriptParser do
30
30
  output.should eq %Q{Found URL: http://example.com/\n}
31
31
  end
32
32
  end
33
+
34
+ describe ContentUrls::JavaScriptParser do
35
+ it "should correctly handle single quotes in rewrite_each_url method" do
36
+ output = ''
37
+ javascript = "var link='http://example.com/';"
38
+ javascript = ContentUrls::JavaScriptParser.rewrite_each_url(javascript) {|url| url.upcase}
39
+ output += "Rewritten: #{javascript}" + "\n"
40
+ output.should eq %Q{Rewritten: var link='HTTP://EXAMPLE.COM/';\n}
41
+ end
42
+ it "should correctly handle single quotes in urls method" do
43
+ output = ''
44
+ javascript = "var link='http://example.com/';"
45
+ ContentUrls::JavaScriptParser.urls(javascript).each do |url|
46
+ output += "Found URL: #{url}" + "\n"
47
+ end
48
+ output.should eq %Q{Found URL: http://example.com/\n}
49
+ end
50
+ end
data/spec/spec_helper.rb CHANGED
File without changes
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: content_urls
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dennis Sutch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-18 00:00:00.000000000 Z
11
+ date: 2013-08-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -24,6 +24,34 @@ dependencies:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.5.10
27
+ - !ruby/object:Gem::Dependency
28
+ name: css_parser
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 1.3.4
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 1.3.4
41
+ - !ruby/object:Gem::Dependency
42
+ name: rkelly
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 1.0.7
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.7
27
55
  - !ruby/object:Gem::Dependency
28
56
  name: rspec
29
57
  requirement: !ruby/object:Gem::Requirement
@@ -154,7 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
154
182
  version: '0'
155
183
  requirements: []
156
184
  rubyforge_project:
157
- rubygems_version: 2.0.4
185
+ rubygems_version: 2.0.6
158
186
  signing_key:
159
187
  specification_version: 4
160
188
  summary: Find and rewrite URLs in different types of content.