content_urls 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.document +0 -0
- data/.rspec +0 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +0 -0
- data/README.rdoc +2 -1
- data/Rakefile +0 -0
- data/content_urls.gemspec +80 -74
- data/lib/content_urls/parsers/css_parser.rb +174 -29
- data/lib/content_urls/parsers/html_parser.rb +1 -1
- data/lib/content_urls/parsers/java_script_parser.rb +24 -18
- data/lib/content_urls/version.rb +1 -1
- data/lib/content_urls.rb +1 -0
- data/spec/content_urls_spec.rb +0 -0
- data/spec/css_parser_spec.rb +16 -0
- data/spec/html_parser_spec.rb +0 -0
- data/spec/java_script_parser_spec.rb +18 -0
- data/spec/spec_helper.rb +0 -0
- metadata +31 -3
checksums.yaml
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
---
|
|
2
2
|
!binary "U0hBMQ==":
|
|
3
3
|
metadata.gz: !binary |-
|
|
4
|
-
|
|
4
|
+
OTA3MGM5MWE0ZTY5MTA0M2E2MTQyMTI5ZWEwODA0NzZiY2M1YTI5Yw==
|
|
5
5
|
data.tar.gz: !binary |-
|
|
6
|
-
|
|
6
|
+
MGY3ZjAzZTQ0NGM5NTRiZDBiMTk3NWQ4Y2EzNWNkZGRjZjE2YjIwMw==
|
|
7
7
|
!binary "U0hBNTEy":
|
|
8
8
|
metadata.gz: !binary |-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
YTY2NzBlZTIzYjI1MGRlMmU1YTQyN2I2Y2E2ZDgyOGMyOTY0Y2Y0NTY2ZDMz
|
|
10
|
+
ZmQ0N2JlMTYyMDlmNDdlY2M4MmYwNjQ3MzkzYzZkNzllNjg5YzY0NGYwYzM1
|
|
11
|
+
OGM1NWQzN2JmYWQ3Yzg4ZWZmZmZlODY1ZmI1YzUzYzZjNWU5NDI=
|
|
12
12
|
data.tar.gz: !binary |-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
Y2JhOTRhNGYzODRlNmQxNzEyYTc4NTA0YWJmZWRmYzNkOGNjYTMzMTU0MDc5
|
|
14
|
+
MjY1NmQ2OWQ3YjQ2ZDkyMmRlYTY2OGI2NzI0MTJhY2FlM2VmOGE5MzQyMTk0
|
|
15
|
+
NTYzMjc4OWI1NzljZjRiNjM1ZGI2NDg0YjcxZDZhOGNiNDZlZWI=
|
data/.document
CHANGED
|
File without changes
|
data/.rspec
CHANGED
|
File without changes
|
data/Gemfile
CHANGED
data/LICENSE.txt
CHANGED
|
File without changes
|
data/README.rdoc
CHANGED
|
@@ -43,13 +43,14 @@ Provide the HTML content, the content type, and a block to rewrite each URL's ex
|
|
|
43
43
|
|
|
44
44
|
== Requirements
|
|
45
45
|
* nokogiri
|
|
46
|
+
* css_parser
|
|
47
|
+
* rkelly
|
|
46
48
|
|
|
47
49
|
== Development
|
|
48
50
|
To test and develop this gem, additional requirements are:
|
|
49
51
|
* bundler
|
|
50
52
|
* jeweler
|
|
51
53
|
* rake
|
|
52
|
-
* rcov
|
|
53
54
|
* rdoc
|
|
54
55
|
* rspec
|
|
55
56
|
* yard
|
data/Rakefile
CHANGED
|
File without changes
|
data/content_urls.gemspec
CHANGED
|
@@ -1,74 +1,80 @@
|
|
|
1
|
-
# Generated by jeweler
|
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
-
# Instead, edit Jeweler::Tasks in
|
|
4
|
-
# -*- encoding: utf-8 -*-
|
|
5
|
-
|
|
6
|
-
Gem::Specification.new do |s|
|
|
7
|
-
s.name = "content_urls"
|
|
8
|
-
s.version = "0.1.
|
|
9
|
-
|
|
10
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
|
-
s.authors = ["Dennis Sutch"]
|
|
12
|
-
s.date = "2013-
|
|
13
|
-
s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
|
|
14
|
-
s.email = "dennis@sutch.com"
|
|
15
|
-
s.extra_rdoc_files = [
|
|
16
|
-
"LICENSE.txt",
|
|
17
|
-
"README.rdoc"
|
|
18
|
-
]
|
|
19
|
-
s.files = [
|
|
20
|
-
".document",
|
|
21
|
-
".rspec",
|
|
22
|
-
"Gemfile",
|
|
23
|
-
"LICENSE.txt",
|
|
24
|
-
"README.rdoc",
|
|
25
|
-
"Rakefile",
|
|
26
|
-
"content_urls.gemspec",
|
|
27
|
-
"lib/content_urls.rb",
|
|
28
|
-
"lib/content_urls/parsers/css_parser.rb",
|
|
29
|
-
"lib/content_urls/parsers/html_parser.rb",
|
|
30
|
-
"lib/content_urls/parsers/java_script_parser.rb",
|
|
31
|
-
"lib/content_urls/version.rb",
|
|
32
|
-
"spec/content_urls_spec.rb",
|
|
33
|
-
"spec/css_parser_spec.rb",
|
|
34
|
-
"spec/html_parser_spec.rb",
|
|
35
|
-
"spec/java_script_parser_spec.rb",
|
|
36
|
-
"spec/spec_helper.rb"
|
|
37
|
-
]
|
|
38
|
-
s.homepage = "http://github.com/sutch/content_urls"
|
|
39
|
-
s.licenses = ["MIT"]
|
|
40
|
-
s.require_paths = ["lib"]
|
|
41
|
-
s.rubygems_version = "2.0.
|
|
42
|
-
s.summary = "Find and rewrite URLs in different types of content."
|
|
43
|
-
|
|
44
|
-
if s.respond_to? :specification_version then
|
|
45
|
-
s.specification_version = 4
|
|
46
|
-
|
|
47
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
48
|
-
s.add_runtime_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
|
49
|
-
s.
|
|
50
|
-
s.
|
|
51
|
-
s.add_development_dependency(%q<
|
|
52
|
-
s.add_development_dependency(%q<
|
|
53
|
-
s.add_development_dependency(%q<
|
|
54
|
-
s.add_development_dependency(%q<
|
|
55
|
-
|
|
56
|
-
s.
|
|
57
|
-
|
|
58
|
-
s.add_dependency(%q<
|
|
59
|
-
s.add_dependency(%q<
|
|
60
|
-
s.add_dependency(%q<
|
|
61
|
-
s.add_dependency(%q<
|
|
62
|
-
s.add_dependency(%q<
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
s.add_dependency(%q<
|
|
70
|
-
s.add_dependency(%q<
|
|
71
|
-
s.add_dependency(%q<
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
1
|
+
# Generated by jeweler
|
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
|
+
# -*- encoding: utf-8 -*-
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |s|
|
|
7
|
+
s.name = "content_urls"
|
|
8
|
+
s.version = "0.1.8"
|
|
9
|
+
|
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
|
+
s.authors = ["Dennis Sutch"]
|
|
12
|
+
s.date = "2013-08-01"
|
|
13
|
+
s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
|
|
14
|
+
s.email = "dennis@sutch.com"
|
|
15
|
+
s.extra_rdoc_files = [
|
|
16
|
+
"LICENSE.txt",
|
|
17
|
+
"README.rdoc"
|
|
18
|
+
]
|
|
19
|
+
s.files = [
|
|
20
|
+
".document",
|
|
21
|
+
".rspec",
|
|
22
|
+
"Gemfile",
|
|
23
|
+
"LICENSE.txt",
|
|
24
|
+
"README.rdoc",
|
|
25
|
+
"Rakefile",
|
|
26
|
+
"content_urls.gemspec",
|
|
27
|
+
"lib/content_urls.rb",
|
|
28
|
+
"lib/content_urls/parsers/css_parser.rb",
|
|
29
|
+
"lib/content_urls/parsers/html_parser.rb",
|
|
30
|
+
"lib/content_urls/parsers/java_script_parser.rb",
|
|
31
|
+
"lib/content_urls/version.rb",
|
|
32
|
+
"spec/content_urls_spec.rb",
|
|
33
|
+
"spec/css_parser_spec.rb",
|
|
34
|
+
"spec/html_parser_spec.rb",
|
|
35
|
+
"spec/java_script_parser_spec.rb",
|
|
36
|
+
"spec/spec_helper.rb"
|
|
37
|
+
]
|
|
38
|
+
s.homepage = "http://github.com/sutch/content_urls"
|
|
39
|
+
s.licenses = ["MIT"]
|
|
40
|
+
s.require_paths = ["lib"]
|
|
41
|
+
s.rubygems_version = "2.0.6"
|
|
42
|
+
s.summary = "Find and rewrite URLs in different types of content."
|
|
43
|
+
|
|
44
|
+
if s.respond_to? :specification_version then
|
|
45
|
+
s.specification_version = 4
|
|
46
|
+
|
|
47
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
48
|
+
s.add_runtime_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
|
49
|
+
s.add_runtime_dependency(%q<css_parser>, ["~> 1.3.4"])
|
|
50
|
+
s.add_runtime_dependency(%q<rkelly>, ["~> 1.0.7"])
|
|
51
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.14.1"])
|
|
52
|
+
s.add_development_dependency(%q<yard>, ["~> 0.7"])
|
|
53
|
+
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
|
54
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.3.5"])
|
|
55
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.8.6"])
|
|
56
|
+
s.add_development_dependency(%q<rake>, ["~> 10.1.0"])
|
|
57
|
+
else
|
|
58
|
+
s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
|
59
|
+
s.add_dependency(%q<css_parser>, ["~> 1.3.4"])
|
|
60
|
+
s.add_dependency(%q<rkelly>, ["~> 1.0.7"])
|
|
61
|
+
s.add_dependency(%q<rspec>, ["~> 2.14.1"])
|
|
62
|
+
s.add_dependency(%q<yard>, ["~> 0.7"])
|
|
63
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
|
64
|
+
s.add_dependency(%q<bundler>, ["~> 1.3.5"])
|
|
65
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
|
|
66
|
+
s.add_dependency(%q<rake>, ["~> 10.1.0"])
|
|
67
|
+
end
|
|
68
|
+
else
|
|
69
|
+
s.add_dependency(%q<nokogiri>, ["~> 1.5.10"])
|
|
70
|
+
s.add_dependency(%q<css_parser>, ["~> 1.3.4"])
|
|
71
|
+
s.add_dependency(%q<rkelly>, ["~> 1.0.7"])
|
|
72
|
+
s.add_dependency(%q<rspec>, ["~> 2.14.1"])
|
|
73
|
+
s.add_dependency(%q<yard>, ["~> 0.7"])
|
|
74
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
|
75
|
+
s.add_dependency(%q<bundler>, ["~> 1.3.5"])
|
|
76
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.6"])
|
|
77
|
+
s.add_dependency(%q<rake>, ["~> 10.1.0"])
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
require 'css_parser'
|
|
2
|
+
|
|
1
3
|
class ContentUrls
|
|
2
4
|
|
|
3
5
|
# +CssParser+ finds and rewrites URLs in CSS content.
|
|
@@ -17,25 +19,12 @@ class ContentUrls
|
|
|
17
19
|
# puts "Found URL: #{url}"
|
|
18
20
|
# end
|
|
19
21
|
# # => "Found URL: /images/rainbows.jpg"
|
|
22
|
+
#
|
|
20
23
|
def self.urls(content)
|
|
21
24
|
urls = []
|
|
22
|
-
|
|
23
|
-
while ! remaining.empty?
|
|
24
|
-
if @@regex_uri =~ remaining
|
|
25
|
-
match = $1
|
|
26
|
-
url = $7 || $14 || $23
|
|
27
|
-
#if @@regex_baduri =~ match ## bad URL
|
|
28
|
-
# remaining = remaining[Regexp.last_match.begin(0)+1..-1] # Use last_match from regex_uri test
|
|
29
|
-
#else
|
|
30
|
-
remaining = Regexp.last_match.post_match
|
|
31
|
-
urls << url
|
|
32
|
-
#end
|
|
33
|
-
else
|
|
34
|
-
remaining = ''
|
|
35
|
-
end
|
|
36
|
-
end
|
|
25
|
+
rewrite_each_url(content) { |url| urls << url; url }
|
|
37
26
|
urls.uniq!
|
|
38
|
-
urls
|
|
27
|
+
urls
|
|
39
28
|
end
|
|
40
29
|
|
|
41
30
|
# Rewrites each URL in the CSS content by calling the supplied block with each URL.
|
|
@@ -49,22 +38,177 @@ class ContentUrls
|
|
|
49
38
|
# # => "Rewritten: body { background: url(/images/unicorns.jpg) }"
|
|
50
39
|
#
|
|
51
40
|
def self.rewrite_each_url(content, &block)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
41
|
+
urls = {}
|
|
42
|
+
parser = ::CssParser::Parser.new
|
|
43
|
+
parser.load_string!(content)
|
|
44
|
+
parser.each_selector do |selector|
|
|
45
|
+
parser[selector].each do |element|
|
|
46
|
+
remaining = element
|
|
47
|
+
while !remaining.empty?
|
|
48
|
+
if match = @@regex_uri.match(remaining)
|
|
49
|
+
urls[match[:url]] = match[:uri]
|
|
50
|
+
remaining = match.post_match
|
|
51
|
+
else
|
|
52
|
+
remaining = ''
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
rewritten_content = [{:content => content, :is_rewritten => false}]
|
|
58
|
+
urls.each do |property_value, url|
|
|
59
|
+
rewritten_url = yield url
|
|
60
|
+
if rewritten_url != url
|
|
61
|
+
rewritten_property_value = property_value.dup
|
|
62
|
+
rewritten_property_value[url] = rewritten_url
|
|
63
|
+
i = 0
|
|
64
|
+
while i < rewritten_content.count
|
|
65
|
+
if !rewritten_content[i][:is_rewritten]
|
|
66
|
+
if match = /#{Regexp.escape(property_value)}/.match(rewritten_content[i][:content])
|
|
67
|
+
if match.pre_match.length > 0
|
|
68
|
+
rewritten_content.insert(i, {:content => match.pre_match, :is_rewritten => false})
|
|
69
|
+
i += 1
|
|
70
|
+
end
|
|
71
|
+
rewritten_content[i] = {:content => rewritten_property_value, :is_rewritten => true}
|
|
72
|
+
if match.post_match.length > 0
|
|
73
|
+
rewritten_content.insert(i+1, {:content => match.post_match, :is_rewritten => false})
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
i += 1
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
rewritten_content.map { |c| c[:content]}.join
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
protected
|
|
85
|
+
|
|
86
|
+
# Regular expressions based on http://www.w3.org/TR/CSS21/syndata.html
|
|
87
|
+
|
|
88
|
+
# {w}: [ \t\r\n\f]*
|
|
89
|
+
@@w = '([ \t\r\n\f]*)'
|
|
90
|
+
|
|
91
|
+
# {nl}: \n|\r\n|\r|\f
|
|
92
|
+
@@nl = '(\n|\r\n|\r|\f)'
|
|
93
|
+
|
|
94
|
+
# {unicode}: \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
|
|
95
|
+
@@unicode = '(\\\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?)'
|
|
96
|
+
|
|
97
|
+
# {escape}: {unicode}|\\[^\n\r\f0-9a-f]
|
|
98
|
+
@@escape = '(' + @@unicode + '|\\\\[^\n\r\f0-9a-f])'
|
|
99
|
+
|
|
100
|
+
# {string1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
|
101
|
+
@@string1 = '(\"(?<uri>([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
|
|
102
|
+
|
|
103
|
+
# {string2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\'
|
|
104
|
+
@@string2 = '(\\\'(?<uri>([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
|
|
105
|
+
|
|
106
|
+
# {string}: {string1}|{string2}
|
|
107
|
+
@@string = '(' + @@string1 + '|' + @@string2 + ')'
|
|
108
|
+
|
|
109
|
+
# {nonascii}: [^\0-\237]
|
|
110
|
+
@@nonascii = '([^\x0-\x237])'
|
|
111
|
+
|
|
112
|
+
# {uri}: url\({w}{string}{w}\)|url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}\)
|
|
113
|
+
@@uri = '(?<url>((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(?<uri>([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
|
|
114
|
+
|
|
115
|
+
# {badstring1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
|
|
116
|
+
@@badstring1 = '(\"([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
|
|
117
|
+
|
|
118
|
+
# {badstring2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\\?
|
|
119
|
+
@@badstring2 = '(\\\'([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
|
|
120
|
+
|
|
121
|
+
# {badstring}: {badstring1}|{badstring2}
|
|
122
|
+
@@badstring = '(' + @@badstring1 + '|' + @@badstring2 + ')'
|
|
123
|
+
|
|
124
|
+
# {baduri1}: url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}
|
|
125
|
+
@@baduri1 = '(url\(' + @@w + '([!#$%&*-~]|' + @@nonascii + '|' + @@escape + ')*' + @@w + ')'
|
|
126
|
+
|
|
127
|
+
# {baduri2}: url\({w}{string}{w}
|
|
128
|
+
@@baduri2 = '(url\(' + @@w + @@string + @@w + ')'
|
|
129
|
+
|
|
130
|
+
# {baduri3}: url\({w}{badstring}
|
|
131
|
+
@@baduri3 = '(url\(' + @@w + @@badstring + ')'
|
|
132
|
+
|
|
133
|
+
# {baduri}: {baduri1}|{baduri2}|{baduri3}
|
|
134
|
+
@@baduri = '(' + @@baduri1 + '|' + @@baduri2 + '|' + @@baduri3 + ')'
|
|
135
|
+
|
|
136
|
+
@@regex_uri = Regexp.new(@@uri)
|
|
137
|
+
@@regex_baduri = Regexp.new(@@baduri)
|
|
138
|
+
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# +StyleParser+ finds and rewrites URLs in HTML style attributes.
|
|
142
|
+
#
|
|
143
|
+
# === Implementation note:
|
|
144
|
+
# This methods in this class identify URLs by using regular expressions based on the W3C CSS 2.1 Specification (http://www.w3.org/TR/CSS21/syndata.html).
|
|
145
|
+
class StyleParser
|
|
146
|
+
|
|
147
|
+
# Returns the URLs found in a style attribute.
|
|
148
|
+
#
|
|
149
|
+
# @param [String] content the style attribute.
|
|
150
|
+
# @return [Array] the unique URLs found in the content.
|
|
151
|
+
#
|
|
152
|
+
# @example Parse style attribute for URLs
|
|
153
|
+
# style = 'background: url(/images/rainbows.jpg);'
|
|
154
|
+
# ContentUrls::StyleParser.urls(style).each do |url|
|
|
155
|
+
# puts "Found URL: #{url}"
|
|
156
|
+
# end
|
|
157
|
+
# # => "Found URL: /images/rainbows.jpg"
|
|
158
|
+
#
|
|
159
|
+
def self.urls(style)
|
|
160
|
+
urls = []
|
|
161
|
+
rewrite_each_url(style) { |url| urls << url; url }
|
|
162
|
+
urls.uniq!
|
|
163
|
+
urls
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Rewrites each URL in an style attribute by calling the supplied block with each URL.
|
|
167
|
+
#
|
|
168
|
+
# @param [String] content the style attribute.
|
|
169
|
+
#
|
|
170
|
+
# @example Rewrite URLs in style attribute
|
|
171
|
+
# style = 'background: url(/images/rainbows.jpg);'
|
|
172
|
+
# style = ContentUrls::StyleParser.rewrite_each_url(style) {|url| url.sub(/rainbows.jpg/, 'unicorns.jpg')}
|
|
173
|
+
# puts "Rewritten: #{style}"
|
|
174
|
+
# # => "Rewritten: background: url(/images/unicorns.jpg);"
|
|
175
|
+
#
|
|
176
|
+
def self.rewrite_each_url(style, &block)
|
|
177
|
+
urls = {}
|
|
178
|
+
remaining = style
|
|
179
|
+
while !remaining.empty?
|
|
56
180
|
if match = @@regex_uri.match(remaining)
|
|
57
|
-
|
|
58
|
-
rewritten += match.pre_match
|
|
181
|
+
urls[match[:url]] = match[:uri]
|
|
59
182
|
remaining = match.post_match
|
|
60
|
-
replacement = yield url
|
|
61
|
-
rewritten += (replacement.nil? ? match[0] : match[0].sub(url, replacement))
|
|
62
183
|
else
|
|
63
|
-
rewritten += remaining
|
|
64
184
|
remaining = ''
|
|
65
185
|
end
|
|
66
186
|
end
|
|
67
|
-
|
|
187
|
+
rewritten_content = [{:content => style, :is_rewritten => false}]
|
|
188
|
+
urls.each do |property_value, url|
|
|
189
|
+
rewritten_url = yield url
|
|
190
|
+
if rewritten_url != url
|
|
191
|
+
rewritten_property_value = property_value.dup
|
|
192
|
+
rewritten_property_value[url] = rewritten_url
|
|
193
|
+
i = 0
|
|
194
|
+
while i < rewritten_content.count
|
|
195
|
+
if !rewritten_content[i][:is_rewritten]
|
|
196
|
+
if match = /#{Regexp.escape(property_value)}/.match(rewritten_content[i][:content])
|
|
197
|
+
if match.pre_match.length > 0
|
|
198
|
+
rewritten_content.insert(i, {:content => match.pre_match, :is_rewritten => false})
|
|
199
|
+
i += 1
|
|
200
|
+
end
|
|
201
|
+
rewritten_content[i] = {:content => rewritten_property_value, :is_rewritten => true}
|
|
202
|
+
if match.post_match.length > 0
|
|
203
|
+
rewritten_content.insert(i+1, {:content => match.post_match, :is_rewritten => false})
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
i += 1
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
rewritten_content.map { |c| c[:content]}.join
|
|
68
212
|
end
|
|
69
213
|
|
|
70
214
|
protected
|
|
@@ -84,10 +228,10 @@ class ContentUrls
|
|
|
84
228
|
@@escape = '(' + @@unicode + '|\\\\[^\n\r\f0-9a-f])'
|
|
85
229
|
|
|
86
230
|
# {string1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
|
87
|
-
@@string1 = '(\"(([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
|
|
231
|
+
@@string1 = '(\"(?<uri>([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
|
|
88
232
|
|
|
89
233
|
# {string2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\'
|
|
90
|
-
@@string2 = '(\\\'(([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
|
|
234
|
+
@@string2 = '(\\\'(?<uri>([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
|
|
91
235
|
|
|
92
236
|
# {string}: {string1}|{string2}
|
|
93
237
|
@@string = '(' + @@string1 + '|' + @@string2 + ')'
|
|
@@ -96,7 +240,7 @@ class ContentUrls
|
|
|
96
240
|
@@nonascii = '([^\x0-\x237])'
|
|
97
241
|
|
|
98
242
|
# {uri}: url\({w}{string}{w}\)|url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}\)
|
|
99
|
-
@@uri = '(((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
|
|
243
|
+
@@uri = '(?<url>((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(?<uri>([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
|
|
100
244
|
|
|
101
245
|
# {badstring1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
|
|
102
246
|
@@badstring1 = '(\"([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
|
|
@@ -123,4 +267,5 @@ class ContentUrls
|
|
|
123
267
|
@@regex_baduri = Regexp.new(@@baduri)
|
|
124
268
|
|
|
125
269
|
end
|
|
270
|
+
|
|
126
271
|
end
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
require 'uri'
|
|
2
|
+
require 'rkelly'
|
|
2
3
|
|
|
3
4
|
class ContentUrls
|
|
4
5
|
|
|
@@ -21,7 +22,8 @@ class ContentUrls
|
|
|
21
22
|
# # => "Found URL: http://example.com/"
|
|
22
23
|
def self.urls(content)
|
|
23
24
|
urls = []
|
|
24
|
-
|
|
25
|
+
return urls if content.nil? || content.length == 0
|
|
26
|
+
rewrite_each_url(content) { |url| urls << url; url }
|
|
25
27
|
urls.uniq!
|
|
26
28
|
urls
|
|
27
29
|
end
|
|
@@ -37,26 +39,30 @@ class ContentUrls
|
|
|
37
39
|
# # => "Rewritten: var link="HTTP://EXAMPLE.COM/""
|
|
38
40
|
#
|
|
39
41
|
def self.rewrite_each_url(content, &block)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if
|
|
49
|
-
|
|
50
|
-
else
|
|
51
|
-
rewritten += replacement
|
|
42
|
+
rewritten_content = content.dup
|
|
43
|
+
rewrite_urls = {}
|
|
44
|
+
parser = RKelly::Parser.new
|
|
45
|
+
ast = parser.parse(content)
|
|
46
|
+
return content if ast.nil?
|
|
47
|
+
ast.each do |node|
|
|
48
|
+
if node.kind_of? RKelly::Nodes::StringNode
|
|
49
|
+
value = node.value
|
|
50
|
+
if match = /^'(.*)'$/.match(value)
|
|
51
|
+
value = match[1] # remove single quotes
|
|
52
52
|
end
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
if match = URI.regexp.match(value)
|
|
54
|
+
url = match.to_s
|
|
55
|
+
rewritten_url = yield url
|
|
56
|
+
rewrite_urls[url] = rewritten_url if url != rewritten_url
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
if rewrite_urls.count > 0
|
|
61
|
+
rewrite_urls.each do |url, rewritten_url|
|
|
62
|
+
rewritten_content[url] = rewritten_url
|
|
57
63
|
end
|
|
58
64
|
end
|
|
59
|
-
|
|
65
|
+
rewritten_content
|
|
60
66
|
end
|
|
61
67
|
|
|
62
68
|
end
|
data/lib/content_urls/version.rb
CHANGED
data/lib/content_urls.rb
CHANGED
|
@@ -146,6 +146,7 @@ class ContentUrls
|
|
|
146
146
|
|
|
147
147
|
require 'content_urls/parsers/css_parser'
|
|
148
148
|
register_parser ContentUrls::CssParser, %r{^(text/css)\b}
|
|
149
|
+
register_parser ContentUrls::StyleParser, %r{^(html-inline-style)\b}
|
|
149
150
|
|
|
150
151
|
require 'content_urls/parsers/java_script_parser'
|
|
151
152
|
register_parser ContentUrls::JavaScriptParser, %r{^(application/x-javascript)\b}, %r{^(application/javascript)\b}, %r{^(text/javascript)\b}
|
data/spec/content_urls_spec.rb
CHANGED
|
File without changes
|
data/spec/css_parser_spec.rb
CHANGED
|
@@ -31,4 +31,20 @@ describe ContentUrls::CssParser do
|
|
|
31
31
|
end
|
|
32
32
|
output.should eq %Q{Found URL: /images/rainbows.jpg\n}
|
|
33
33
|
end
|
|
34
|
+
|
|
35
|
+
it "should find and rewrite urls when css contains no spaces" do
|
|
36
|
+
output = ''
|
|
37
|
+
css = 'body{background:url(/images/rainbows.jpg)}'
|
|
38
|
+
css = ContentUrls::CssParser.rewrite_each_url(css) {|url| url.sub(/rainbows.jpg/, 'unicorns.jpg')}
|
|
39
|
+
output += "Rewritten: #{css}" + "\n"
|
|
40
|
+
output.should eq %Q{Rewritten: body{background:url(/images/unicorns.jpg)}\n}
|
|
41
|
+
end
|
|
42
|
+
it "should find urls when css contains no spaces" do
|
|
43
|
+
output = ''
|
|
44
|
+
css = 'body { background: url(/images/rainbows.jpg) }'
|
|
45
|
+
ContentUrls::CssParser.urls(css).each do |url|
|
|
46
|
+
output += "Found URL: #{url}" + "\n"
|
|
47
|
+
end
|
|
48
|
+
output.should eq %Q{Found URL: /images/rainbows.jpg\n}
|
|
49
|
+
end
|
|
34
50
|
end
|
data/spec/html_parser_spec.rb
CHANGED
|
File without changes
|
|
@@ -30,3 +30,21 @@ describe ContentUrls::JavaScriptParser do
|
|
|
30
30
|
output.should eq %Q{Found URL: http://example.com/\n}
|
|
31
31
|
end
|
|
32
32
|
end
|
|
33
|
+
|
|
34
|
+
describe ContentUrls::JavaScriptParser do
|
|
35
|
+
it "should correctly handle single quotes in rewrite_each_url method" do
|
|
36
|
+
output = ''
|
|
37
|
+
javascript = "var link='http://example.com/';"
|
|
38
|
+
javascript = ContentUrls::JavaScriptParser.rewrite_each_url(javascript) {|url| url.upcase}
|
|
39
|
+
output += "Rewritten: #{javascript}" + "\n"
|
|
40
|
+
output.should eq %Q{Rewritten: var link='HTTP://EXAMPLE.COM/';\n}
|
|
41
|
+
end
|
|
42
|
+
it "should correctly handle single quotes in urls method" do
|
|
43
|
+
output = ''
|
|
44
|
+
javascript = "var link='http://example.com/';"
|
|
45
|
+
ContentUrls::JavaScriptParser.urls(javascript).each do |url|
|
|
46
|
+
output += "Found URL: #{url}" + "\n"
|
|
47
|
+
end
|
|
48
|
+
output.should eq %Q{Found URL: http://example.com/\n}
|
|
49
|
+
end
|
|
50
|
+
end
|
data/spec/spec_helper.rb
CHANGED
|
File without changes
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: content_urls
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dennis Sutch
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2013-
|
|
11
|
+
date: 2013-08-01 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -24,6 +24,34 @@ dependencies:
|
|
|
24
24
|
- - ~>
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: 1.5.10
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: css_parser
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ~>
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: 1.3.4
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ~>
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: 1.3.4
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rkelly
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ~>
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: 1.0.7
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ~>
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: 1.0.7
|
|
27
55
|
- !ruby/object:Gem::Dependency
|
|
28
56
|
name: rspec
|
|
29
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -154,7 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
154
182
|
version: '0'
|
|
155
183
|
requirements: []
|
|
156
184
|
rubyforge_project:
|
|
157
|
-
rubygems_version: 2.0.
|
|
185
|
+
rubygems_version: 2.0.6
|
|
158
186
|
signing_key:
|
|
159
187
|
specification_version: 4
|
|
160
188
|
summary: Find and rewrite URLs in different types of content.
|